...

Source file src/text/scanner/scanner_test.go

Documentation: text/scanner

     1  // Copyright 2009 The Go Authors. All rights reserved.
     2  // Use of this source code is governed by a BSD-style
     3  // license that can be found in the LICENSE file.
     4  
     5  package scanner
     6  
     7  import (
     8  	"bytes"
     9  	"fmt"
    10  	"io"
    11  	"strings"
    12  	"testing"
    13  	"unicode/utf8"
    14  )
    15  
    16  // A StringReader delivers its data one string segment at a time via Read.
    17  type StringReader struct {
    18  	data []string
    19  	step int
    20  }
    21  
    22  func (r *StringReader) Read(p []byte) (n int, err error) {
    23  	if r.step < len(r.data) {
    24  		s := r.data[r.step]
    25  		n = copy(p, s)
    26  		r.step++
    27  	} else {
    28  		err = io.EOF
    29  	}
    30  	return
    31  }
    32  
    33  func readRuneSegments(t *testing.T, segments []string) {
    34  	got := ""
    35  	want := strings.Join(segments, "")
    36  	s := new(Scanner).Init(&StringReader{data: segments})
    37  	for {
    38  		ch := s.Next()
    39  		if ch == EOF {
    40  			break
    41  		}
    42  		got += string(ch)
    43  	}
    44  	if got != want {
    45  		t.Errorf("segments=%v got=%s want=%s", segments, got, want)
    46  	}
    47  }
    48  
    49  var segmentList = [][]string{
    50  	{},
    51  	{""},
    52  	{"日", "本語"},
    53  	{"\u65e5", "\u672c", "\u8a9e"},
    54  	{"\U000065e5", " ", "\U0000672c", "\U00008a9e"},
    55  	{"\xe6", "\x97\xa5\xe6", "\x9c\xac\xe8\xaa\x9e"},
    56  	{"Hello", ", ", "World", "!"},
    57  	{"Hello", ", ", "", "World", "!"},
    58  }
    59  
    60  func TestNext(t *testing.T) {
    61  	for _, s := range segmentList {
    62  		readRuneSegments(t, s)
    63  	}
    64  }
    65  
    66  type token struct {
    67  	tok  rune
    68  	text string
    69  }
    70  
    71  var f100 = "ffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffff"
    72  
    73  var tokenList = []token{
    74  	{Comment, "// line comments"},
    75  	{Comment, "//"},
    76  	{Comment, "////"},
    77  	{Comment, "// comment"},
    78  	{Comment, "// /* comment */"},
    79  	{Comment, "// // comment //"},
    80  	{Comment, "//" + f100},
    81  
    82  	{Comment, "// general comments"},
    83  	{Comment, "/**/"},
    84  	{Comment, "/***/"},
    85  	{Comment, "/* comment */"},
    86  	{Comment, "/* // comment */"},
    87  	{Comment, "/* /* comment */"},
    88  	{Comment, "/*\n comment\n*/"},
    89  	{Comment, "/*" + f100 + "*/"},
    90  
    91  	{Comment, "// identifiers"},
    92  	{Ident, "a"},
    93  	{Ident, "a0"},
    94  	{Ident, "foobar"},
    95  	{Ident, "abc123"},
    96  	{Ident, "LGTM"},
    97  	{Ident, "_"},
    98  	{Ident, "_abc123"},
    99  	{Ident, "abc123_"},
   100  	{Ident, "_abc_123_"},
   101  	{Ident, "_äöü"},
   102  	{Ident, "_本"},
   103  	{Ident, "äöü"},
   104  	{Ident, "本"},
   105  	{Ident, "a۰۱۸"},
   106  	{Ident, "foo६४"},
   107  	{Ident, "bar9876"},
   108  	{Ident, f100},
   109  
   110  	{Comment, "// decimal ints"},
   111  	{Int, "0"},
   112  	{Int, "1"},
   113  	{Int, "9"},
   114  	{Int, "42"},
   115  	{Int, "1234567890"},
   116  
   117  	{Comment, "// octal ints"},
   118  	{Int, "00"},
   119  	{Int, "01"},
   120  	{Int, "07"},
   121  	{Int, "042"},
   122  	{Int, "01234567"},
   123  
   124  	{Comment, "// hexadecimal ints"},
   125  	{Int, "0x0"},
   126  	{Int, "0x1"},
   127  	{Int, "0xf"},
   128  	{Int, "0x42"},
   129  	{Int, "0x123456789abcDEF"},
   130  	{Int, "0x" + f100},
   131  	{Int, "0X0"},
   132  	{Int, "0X1"},
   133  	{Int, "0XF"},
   134  	{Int, "0X42"},
   135  	{Int, "0X123456789abcDEF"},
   136  	{Int, "0X" + f100},
   137  
   138  	{Comment, "// floats"},
   139  	{Float, "0."},
   140  	{Float, "1."},
   141  	{Float, "42."},
   142  	{Float, "01234567890."},
   143  	{Float, ".0"},
   144  	{Float, ".1"},
   145  	{Float, ".42"},
   146  	{Float, ".0123456789"},
   147  	{Float, "0.0"},
   148  	{Float, "1.0"},
   149  	{Float, "42.0"},
   150  	{Float, "01234567890.0"},
   151  	{Float, "0e0"},
   152  	{Float, "1e0"},
   153  	{Float, "42e0"},
   154  	{Float, "01234567890e0"},
   155  	{Float, "0E0"},
   156  	{Float, "1E0"},
   157  	{Float, "42E0"},
   158  	{Float, "01234567890E0"},
   159  	{Float, "0e+10"},
   160  	{Float, "1e-10"},
   161  	{Float, "42e+10"},
   162  	{Float, "01234567890e-10"},
   163  	{Float, "0E+10"},
   164  	{Float, "1E-10"},
   165  	{Float, "42E+10"},
   166  	{Float, "01234567890E-10"},
   167  
   168  	{Comment, "// chars"},
   169  	{Char, `' '`},
   170  	{Char, `'a'`},
   171  	{Char, `'本'`},
   172  	{Char, `'\a'`},
   173  	{Char, `'\b'`},
   174  	{Char, `'\f'`},
   175  	{Char, `'\n'`},
   176  	{Char, `'\r'`},
   177  	{Char, `'\t'`},
   178  	{Char, `'\v'`},
   179  	{Char, `'\''`},
   180  	{Char, `'\000'`},
   181  	{Char, `'\777'`},
   182  	{Char, `'\x00'`},
   183  	{Char, `'\xff'`},
   184  	{Char, `'\u0000'`},
   185  	{Char, `'\ufA16'`},
   186  	{Char, `'\U00000000'`},
   187  	{Char, `'\U0000ffAB'`},
   188  
   189  	{Comment, "// strings"},
   190  	{String, `" "`},
   191  	{String, `"a"`},
   192  	{String, `"本"`},
   193  	{String, `"\a"`},
   194  	{String, `"\b"`},
   195  	{String, `"\f"`},
   196  	{String, `"\n"`},
   197  	{String, `"\r"`},
   198  	{String, `"\t"`},
   199  	{String, `"\v"`},
   200  	{String, `"\""`},
   201  	{String, `"\000"`},
   202  	{String, `"\777"`},
   203  	{String, `"\x00"`},
   204  	{String, `"\xff"`},
   205  	{String, `"\u0000"`},
   206  	{String, `"\ufA16"`},
   207  	{String, `"\U00000000"`},
   208  	{String, `"\U0000ffAB"`},
   209  	{String, `"` + f100 + `"`},
   210  
   211  	{Comment, "// raw strings"},
   212  	{RawString, "``"},
   213  	{RawString, "`\\`"},
   214  	{RawString, "`" + "\n\n/* foobar */\n\n" + "`"},
   215  	{RawString, "`" + f100 + "`"},
   216  
   217  	{Comment, "// individual characters"},
   218  	// NUL character is not allowed
   219  	{'\x01', "\x01"},
   220  	{' ' - 1, string(' ' - 1)},
   221  	{'+', "+"},
   222  	{'/', "/"},
   223  	{'.', "."},
   224  	{'~', "~"},
   225  	{'(', "("},
   226  }
   227  
   228  func makeSource(pattern string) *bytes.Buffer {
   229  	var buf bytes.Buffer
   230  	for _, k := range tokenList {
   231  		fmt.Fprintf(&buf, pattern, k.text)
   232  	}
   233  	return &buf
   234  }
   235  
   236  func checkTok(t *testing.T, s *Scanner, line int, got, want rune, text string) {
   237  	if got != want {
   238  		t.Fatalf("tok = %s, want %s for %q", TokenString(got), TokenString(want), text)
   239  	}
   240  	if s.Line != line {
   241  		t.Errorf("line = %d, want %d for %q", s.Line, line, text)
   242  	}
   243  	stext := s.TokenText()
   244  	if stext != text {
   245  		t.Errorf("text = %q, want %q", stext, text)
   246  	} else {
   247  		// check idempotency of TokenText() call
   248  		stext = s.TokenText()
   249  		if stext != text {
   250  			t.Errorf("text = %q, want %q (idempotency check)", stext, text)
   251  		}
   252  	}
   253  }
   254  
   255  func checkTokErr(t *testing.T, s *Scanner, line int, want rune, text string) {
   256  	prevCount := s.ErrorCount
   257  	checkTok(t, s, line, s.Scan(), want, text)
   258  	if s.ErrorCount != prevCount+1 {
   259  		t.Fatalf("want error for %q", text)
   260  	}
   261  }
   262  
   263  func countNewlines(s string) int {
   264  	n := 0
   265  	for _, ch := range s {
   266  		if ch == '\n' {
   267  			n++
   268  		}
   269  	}
   270  	return n
   271  }
   272  
   273  func testScan(t *testing.T, mode uint) {
   274  	s := new(Scanner).Init(makeSource(" \t%s\n"))
   275  	s.Mode = mode
   276  	tok := s.Scan()
   277  	line := 1
   278  	for _, k := range tokenList {
   279  		if mode&SkipComments == 0 || k.tok != Comment {
   280  			checkTok(t, s, line, tok, k.tok, k.text)
   281  			tok = s.Scan()
   282  		}
   283  		line += countNewlines(k.text) + 1 // each token is on a new line
   284  	}
   285  	checkTok(t, s, line, tok, EOF, "")
   286  }
   287  
   288  func TestScan(t *testing.T) {
   289  	testScan(t, GoTokens)
   290  	testScan(t, GoTokens&^SkipComments)
   291  }
   292  
   293  func TestInvalidExponent(t *testing.T) {
   294  	const src = "1.5e 1.5E 1e+ 1e- 1.5z"
   295  	s := new(Scanner).Init(strings.NewReader(src))
   296  	s.Error = func(s *Scanner, msg string) {
   297  		const want = "exponent has no digits"
   298  		if msg != want {
   299  			t.Errorf("%s: got error %q; want %q", s.TokenText(), msg, want)
   300  		}
   301  	}
   302  	checkTokErr(t, s, 1, Float, "1.5e")
   303  	checkTokErr(t, s, 1, Float, "1.5E")
   304  	checkTokErr(t, s, 1, Float, "1e+")
   305  	checkTokErr(t, s, 1, Float, "1e-")
   306  	checkTok(t, s, 1, s.Scan(), Float, "1.5")
   307  	checkTok(t, s, 1, s.Scan(), Ident, "z")
   308  	checkTok(t, s, 1, s.Scan(), EOF, "")
   309  	if s.ErrorCount != 4 {
   310  		t.Errorf("%d errors, want 4", s.ErrorCount)
   311  	}
   312  }
   313  
   314  func TestPosition(t *testing.T) {
   315  	src := makeSource("\t\t\t\t%s\n")
   316  	s := new(Scanner).Init(src)
   317  	s.Mode = GoTokens &^ SkipComments
   318  	s.Scan()
   319  	pos := Position{"", 4, 1, 5}
   320  	for _, k := range tokenList {
   321  		if s.Offset != pos.Offset {
   322  			t.Errorf("offset = %d, want %d for %q", s.Offset, pos.Offset, k.text)
   323  		}
   324  		if s.Line != pos.Line {
   325  			t.Errorf("line = %d, want %d for %q", s.Line, pos.Line, k.text)
   326  		}
   327  		if s.Column != pos.Column {
   328  			t.Errorf("column = %d, want %d for %q", s.Column, pos.Column, k.text)
   329  		}
   330  		pos.Offset += 4 + len(k.text) + 1     // 4 tabs + token bytes + newline
   331  		pos.Line += countNewlines(k.text) + 1 // each token is on a new line
   332  		s.Scan()
   333  	}
   334  	// make sure there were no token-internal errors reported by scanner
   335  	if s.ErrorCount != 0 {
   336  		t.Errorf("%d errors", s.ErrorCount)
   337  	}
   338  }
   339  
   340  func TestScanZeroMode(t *testing.T) {
   341  	src := makeSource("%s\n")
   342  	str := src.String()
   343  	s := new(Scanner).Init(src)
   344  	s.Mode = 0       // don't recognize any token classes
   345  	s.Whitespace = 0 // don't skip any whitespace
   346  	tok := s.Scan()
   347  	for i, ch := range str {
   348  		if tok != ch {
   349  			t.Fatalf("%d. tok = %s, want %s", i, TokenString(tok), TokenString(ch))
   350  		}
   351  		tok = s.Scan()
   352  	}
   353  	if tok != EOF {
   354  		t.Fatalf("tok = %s, want EOF", TokenString(tok))
   355  	}
   356  	if s.ErrorCount != 0 {
   357  		t.Errorf("%d errors", s.ErrorCount)
   358  	}
   359  }
   360  
   361  func testScanSelectedMode(t *testing.T, mode uint, class rune) {
   362  	src := makeSource("%s\n")
   363  	s := new(Scanner).Init(src)
   364  	s.Mode = mode
   365  	tok := s.Scan()
   366  	for tok != EOF {
   367  		if tok < 0 && tok != class {
   368  			t.Fatalf("tok = %s, want %s", TokenString(tok), TokenString(class))
   369  		}
   370  		tok = s.Scan()
   371  	}
   372  	if s.ErrorCount != 0 {
   373  		t.Errorf("%d errors", s.ErrorCount)
   374  	}
   375  }
   376  
   377  func TestScanSelectedMask(t *testing.T) {
   378  	testScanSelectedMode(t, 0, 0)
   379  	testScanSelectedMode(t, ScanIdents, Ident)
   380  	// Don't test ScanInts and ScanNumbers since some parts of
   381  	// the floats in the source look like (invalid) octal ints
   382  	// and ScanNumbers may return either Int or Float.
   383  	testScanSelectedMode(t, ScanChars, Char)
   384  	testScanSelectedMode(t, ScanStrings, String)
   385  	testScanSelectedMode(t, SkipComments, 0)
   386  	testScanSelectedMode(t, ScanComments, Comment)
   387  }
   388  
   389  func TestScanCustomIdent(t *testing.T) {
   390  	const src = "faab12345 a12b123 a12 3b"
   391  	s := new(Scanner).Init(strings.NewReader(src))
   392  	// ident = ( 'a' | 'b' ) { digit } .
   393  	// digit = '0' .. '3' .
   394  	// with a maximum length of 4
   395  	s.IsIdentRune = func(ch rune, i int) bool {
   396  		return i == 0 && (ch == 'a' || ch == 'b') || 0 < i && i < 4 && '0' <= ch && ch <= '3'
   397  	}
   398  	checkTok(t, s, 1, s.Scan(), 'f', "f")
   399  	checkTok(t, s, 1, s.Scan(), Ident, "a")
   400  	checkTok(t, s, 1, s.Scan(), Ident, "a")
   401  	checkTok(t, s, 1, s.Scan(), Ident, "b123")
   402  	checkTok(t, s, 1, s.Scan(), Int, "45")
   403  	checkTok(t, s, 1, s.Scan(), Ident, "a12")
   404  	checkTok(t, s, 1, s.Scan(), Ident, "b123")
   405  	checkTok(t, s, 1, s.Scan(), Ident, "a12")
   406  	checkTok(t, s, 1, s.Scan(), Int, "3")
   407  	checkTok(t, s, 1, s.Scan(), Ident, "b")
   408  	checkTok(t, s, 1, s.Scan(), EOF, "")
   409  }
   410  
   411  func TestScanNext(t *testing.T) {
   412  	const BOM = '\uFEFF'
   413  	BOMs := string(BOM)
   414  	s := new(Scanner).Init(strings.NewReader(BOMs + "if a == bcd /* com" + BOMs + "ment */ {\n\ta += c\n}" + BOMs + "// line comment ending in eof"))
   415  	checkTok(t, s, 1, s.Scan(), Ident, "if") // the first BOM is ignored
   416  	checkTok(t, s, 1, s.Scan(), Ident, "a")
   417  	checkTok(t, s, 1, s.Scan(), '=', "=")
   418  	checkTok(t, s, 0, s.Next(), '=', "")
   419  	checkTok(t, s, 0, s.Next(), ' ', "")
   420  	checkTok(t, s, 0, s.Next(), 'b', "")
   421  	checkTok(t, s, 1, s.Scan(), Ident, "cd")
   422  	checkTok(t, s, 1, s.Scan(), '{', "{")
   423  	checkTok(t, s, 2, s.Scan(), Ident, "a")
   424  	checkTok(t, s, 2, s.Scan(), '+', "+")
   425  	checkTok(t, s, 0, s.Next(), '=', "")
   426  	checkTok(t, s, 2, s.Scan(), Ident, "c")
   427  	checkTok(t, s, 3, s.Scan(), '}', "}")
   428  	checkTok(t, s, 3, s.Scan(), BOM, BOMs)
   429  	checkTok(t, s, 3, s.Scan(), -1, "")
   430  	if s.ErrorCount != 0 {
   431  		t.Errorf("%d errors", s.ErrorCount)
   432  	}
   433  }
   434  
   435  func TestScanWhitespace(t *testing.T) {
   436  	var buf bytes.Buffer
   437  	var ws uint64
   438  	// start at 1, NUL character is not allowed
   439  	for ch := byte(1); ch < ' '; ch++ {
   440  		buf.WriteByte(ch)
   441  		ws |= 1 << ch
   442  	}
   443  	const orig = 'x'
   444  	buf.WriteByte(orig)
   445  
   446  	s := new(Scanner).Init(&buf)
   447  	s.Mode = 0
   448  	s.Whitespace = ws
   449  	tok := s.Scan()
   450  	if tok != orig {
   451  		t.Errorf("tok = %s, want %s", TokenString(tok), TokenString(orig))
   452  	}
   453  }
   454  
   455  func testError(t *testing.T, src, pos, msg string, tok rune) {
   456  	s := new(Scanner).Init(strings.NewReader(src))
   457  	errorCalled := false
   458  	s.Error = func(s *Scanner, m string) {
   459  		if !errorCalled {
   460  			// only look at first error
   461  			if p := s.Pos().String(); p != pos {
   462  				t.Errorf("pos = %q, want %q for %q", p, pos, src)
   463  			}
   464  			if m != msg {
   465  				t.Errorf("msg = %q, want %q for %q", m, msg, src)
   466  			}
   467  			errorCalled = true
   468  		}
   469  	}
   470  	tk := s.Scan()
   471  	if tk != tok {
   472  		t.Errorf("tok = %s, want %s for %q", TokenString(tk), TokenString(tok), src)
   473  	}
   474  	if !errorCalled {
   475  		t.Errorf("error handler not called for %q", src)
   476  	}
   477  	if s.ErrorCount == 0 {
   478  		t.Errorf("count = %d, want > 0 for %q", s.ErrorCount, src)
   479  	}
   480  }
   481  
   482  func TestError(t *testing.T) {
   483  	testError(t, "\x00", "<input>:1:1", "invalid character NUL", 0)
   484  	testError(t, "\x80", "<input>:1:1", "invalid UTF-8 encoding", utf8.RuneError)
   485  	testError(t, "\xff", "<input>:1:1", "invalid UTF-8 encoding", utf8.RuneError)
   486  
   487  	testError(t, "a\x00", "<input>:1:2", "invalid character NUL", Ident)
   488  	testError(t, "ab\x80", "<input>:1:3", "invalid UTF-8 encoding", Ident)
   489  	testError(t, "abc\xff", "<input>:1:4", "invalid UTF-8 encoding", Ident)
   490  
   491  	testError(t, `"a`+"\x00", "<input>:1:3", "invalid character NUL", String)
   492  	testError(t, `"ab`+"\x80", "<input>:1:4", "invalid UTF-8 encoding", String)
   493  	testError(t, `"abc`+"\xff", "<input>:1:5", "invalid UTF-8 encoding", String)
   494  
   495  	testError(t, "`a"+"\x00", "<input>:1:3", "invalid character NUL", RawString)
   496  	testError(t, "`ab"+"\x80", "<input>:1:4", "invalid UTF-8 encoding", RawString)
   497  	testError(t, "`abc"+"\xff", "<input>:1:5", "invalid UTF-8 encoding", RawString)
   498  
   499  	testError(t, `'\"'`, "<input>:1:3", "invalid char escape", Char)
   500  	testError(t, `"\'"`, "<input>:1:3", "invalid char escape", String)
   501  
   502  	testError(t, `01238`, "<input>:1:6", "invalid digit '8' in octal literal", Int)
   503  	testError(t, `01238123`, "<input>:1:9", "invalid digit '8' in octal literal", Int)
   504  	testError(t, `0x`, "<input>:1:3", "hexadecimal literal has no digits", Int)
   505  	testError(t, `0xg`, "<input>:1:3", "hexadecimal literal has no digits", Int)
   506  	testError(t, `'aa'`, "<input>:1:4", "invalid char literal", Char)
   507  	testError(t, `1.5e`, "<input>:1:5", "exponent has no digits", Float)
   508  	testError(t, `1.5E`, "<input>:1:5", "exponent has no digits", Float)
   509  	testError(t, `1.5e+`, "<input>:1:6", "exponent has no digits", Float)
   510  	testError(t, `1.5e-`, "<input>:1:6", "exponent has no digits", Float)
   511  
   512  	testError(t, `'`, "<input>:1:2", "literal not terminated", Char)
   513  	testError(t, `'`+"\n", "<input>:1:2", "literal not terminated", Char)
   514  	testError(t, `"abc`, "<input>:1:5", "literal not terminated", String)
   515  	testError(t, `"abc`+"\n", "<input>:1:5", "literal not terminated", String)
   516  	testError(t, "`abc\n", "<input>:2:1", "literal not terminated", RawString)
   517  	testError(t, `/*/`, "<input>:1:4", "comment not terminated", EOF)
   518  }
   519  
   520  // An errReader returns (0, err) where err is not io.EOF.
   521  type errReader struct{}
   522  
   523  func (errReader) Read(b []byte) (int, error) {
   524  	return 0, io.ErrNoProgress // some error that is not io.EOF
   525  }
   526  
   527  func TestIOError(t *testing.T) {
   528  	s := new(Scanner).Init(errReader{})
   529  	errorCalled := false
   530  	s.Error = func(s *Scanner, msg string) {
   531  		if !errorCalled {
   532  			if want := io.ErrNoProgress.Error(); msg != want {
   533  				t.Errorf("msg = %q, want %q", msg, want)
   534  			}
   535  			errorCalled = true
   536  		}
   537  	}
   538  	tok := s.Scan()
   539  	if tok != EOF {
   540  		t.Errorf("tok = %s, want EOF", TokenString(tok))
   541  	}
   542  	if !errorCalled {
   543  		t.Errorf("error handler not called")
   544  	}
   545  }
   546  
   547  func checkPos(t *testing.T, got, want Position) {
   548  	if got.Offset != want.Offset || got.Line != want.Line || got.Column != want.Column {
   549  		t.Errorf("got offset, line, column = %d, %d, %d; want %d, %d, %d",
   550  			got.Offset, got.Line, got.Column, want.Offset, want.Line, want.Column)
   551  	}
   552  }
   553  
   554  func checkNextPos(t *testing.T, s *Scanner, offset, line, column int, char rune) {
   555  	if ch := s.Next(); ch != char {
   556  		t.Errorf("ch = %s, want %s", TokenString(ch), TokenString(char))
   557  	}
   558  	want := Position{Offset: offset, Line: line, Column: column}
   559  	checkPos(t, s.Pos(), want)
   560  }
   561  
   562  func checkScanPos(t *testing.T, s *Scanner, offset, line, column int, char rune) {
   563  	want := Position{Offset: offset, Line: line, Column: column}
   564  	checkPos(t, s.Pos(), want)
   565  	if ch := s.Scan(); ch != char {
   566  		t.Errorf("ch = %s, want %s", TokenString(ch), TokenString(char))
   567  		if string(ch) != s.TokenText() {
   568  			t.Errorf("tok = %q, want %q", s.TokenText(), string(ch))
   569  		}
   570  	}
   571  	checkPos(t, s.Position, want)
   572  }
   573  
   574  func TestPos(t *testing.T) {
   575  	// corner case: empty source
   576  	s := new(Scanner).Init(strings.NewReader(""))
   577  	checkPos(t, s.Pos(), Position{Offset: 0, Line: 1, Column: 1})
   578  	s.Peek() // peek doesn't affect the position
   579  	checkPos(t, s.Pos(), Position{Offset: 0, Line: 1, Column: 1})
   580  
   581  	// corner case: source with only a newline
   582  	s = new(Scanner).Init(strings.NewReader("\n"))
   583  	checkPos(t, s.Pos(), Position{Offset: 0, Line: 1, Column: 1})
   584  	checkNextPos(t, s, 1, 2, 1, '\n')
   585  	// after EOF position doesn't change
   586  	for i := 10; i > 0; i-- {
   587  		checkScanPos(t, s, 1, 2, 1, EOF)
   588  	}
   589  	if s.ErrorCount != 0 {
   590  		t.Errorf("%d errors", s.ErrorCount)
   591  	}
   592  
   593  	// corner case: source with only a single character
   594  	s = new(Scanner).Init(strings.NewReader("本"))
   595  	checkPos(t, s.Pos(), Position{Offset: 0, Line: 1, Column: 1})
   596  	checkNextPos(t, s, 3, 1, 2, '本')
   597  	// after EOF position doesn't change
   598  	for i := 10; i > 0; i-- {
   599  		checkScanPos(t, s, 3, 1, 2, EOF)
   600  	}
   601  	if s.ErrorCount != 0 {
   602  		t.Errorf("%d errors", s.ErrorCount)
   603  	}
   604  
   605  	// positions after calling Next
   606  	s = new(Scanner).Init(strings.NewReader("  foo६४  \n\n本語\n"))
   607  	checkNextPos(t, s, 1, 1, 2, ' ')
   608  	s.Peek() // peek doesn't affect the position
   609  	checkNextPos(t, s, 2, 1, 3, ' ')
   610  	checkNextPos(t, s, 3, 1, 4, 'f')
   611  	checkNextPos(t, s, 4, 1, 5, 'o')
   612  	checkNextPos(t, s, 5, 1, 6, 'o')
   613  	checkNextPos(t, s, 8, 1, 7, '६')
   614  	checkNextPos(t, s, 11, 1, 8, '४')
   615  	checkNextPos(t, s, 12, 1, 9, ' ')
   616  	checkNextPos(t, s, 13, 1, 10, ' ')
   617  	checkNextPos(t, s, 14, 2, 1, '\n')
   618  	checkNextPos(t, s, 15, 3, 1, '\n')
   619  	checkNextPos(t, s, 18, 3, 2, '本')
   620  	checkNextPos(t, s, 21, 3, 3, '語')
   621  	checkNextPos(t, s, 22, 4, 1, '\n')
   622  	// after EOF position doesn't change
   623  	for i := 10; i > 0; i-- {
   624  		checkScanPos(t, s, 22, 4, 1, EOF)
   625  	}
   626  	if s.ErrorCount != 0 {
   627  		t.Errorf("%d errors", s.ErrorCount)
   628  	}
   629  
   630  	// positions after calling Scan
   631  	s = new(Scanner).Init(strings.NewReader("abc\n本語\n\nx"))
   632  	s.Mode = 0
   633  	s.Whitespace = 0
   634  	checkScanPos(t, s, 0, 1, 1, 'a')
   635  	s.Peek() // peek doesn't affect the position
   636  	checkScanPos(t, s, 1, 1, 2, 'b')
   637  	checkScanPos(t, s, 2, 1, 3, 'c')
   638  	checkScanPos(t, s, 3, 1, 4, '\n')
   639  	checkScanPos(t, s, 4, 2, 1, '本')
   640  	checkScanPos(t, s, 7, 2, 2, '語')
   641  	checkScanPos(t, s, 10, 2, 3, '\n')
   642  	checkScanPos(t, s, 11, 3, 1, '\n')
   643  	checkScanPos(t, s, 12, 4, 1, 'x')
   644  	// after EOF position doesn't change
   645  	for i := 10; i > 0; i-- {
   646  		checkScanPos(t, s, 13, 4, 2, EOF)
   647  	}
   648  	if s.ErrorCount != 0 {
   649  		t.Errorf("%d errors", s.ErrorCount)
   650  	}
   651  }
   652  
   653  type countReader int
   654  
   655  func (r *countReader) Read([]byte) (int, error) {
   656  	*r++
   657  	return 0, io.EOF
   658  }
   659  
   660  func TestNextEOFHandling(t *testing.T) {
   661  	var r countReader
   662  
   663  	// corner case: empty source
   664  	s := new(Scanner).Init(&r)
   665  
   666  	tok := s.Next()
   667  	if tok != EOF {
   668  		t.Error("1) EOF not reported")
   669  	}
   670  
   671  	tok = s.Peek()
   672  	if tok != EOF {
   673  		t.Error("2) EOF not reported")
   674  	}
   675  
   676  	if r != 1 {
   677  		t.Errorf("scanner called Read %d times, not once", r)
   678  	}
   679  }
   680  
   681  func TestScanEOFHandling(t *testing.T) {
   682  	var r countReader
   683  
   684  	// corner case: empty source
   685  	s := new(Scanner).Init(&r)
   686  
   687  	tok := s.Scan()
   688  	if tok != EOF {
   689  		t.Error("1) EOF not reported")
   690  	}
   691  
   692  	tok = s.Peek()
   693  	if tok != EOF {
   694  		t.Error("2) EOF not reported")
   695  	}
   696  
   697  	if r != 1 {
   698  		t.Errorf("scanner called Read %d times, not once", r)
   699  	}
   700  }
   701  
   702  func TestIssue29723(t *testing.T) {
   703  	s := new(Scanner).Init(strings.NewReader(`x "`))
   704  	s.Error = func(s *Scanner, _ string) {
   705  		got := s.TokenText() // this call shouldn't panic
   706  		const want = `"`
   707  		if got != want {
   708  			t.Errorf("got %q; want %q", got, want)
   709  		}
   710  	}
   711  	for r := s.Scan(); r != EOF; r = s.Scan() {
   712  	}
   713  }
   714  
   715  func TestNumbers(t *testing.T) {
   716  	for _, test := range []struct {
   717  		tok              rune
   718  		src, tokens, err string
   719  	}{
   720  		// binaries
   721  		{Int, "0b0", "0b0", ""},
   722  		{Int, "0b1010", "0b1010", ""},
   723  		{Int, "0B1110", "0B1110", ""},
   724  
   725  		{Int, "0b", "0b", "binary literal has no digits"},
   726  		{Int, "0b0190", "0b0190", "invalid digit '9' in binary literal"},
   727  		{Int, "0b01a0", "0b01 a0", ""}, // only accept 0-9
   728  
   729  		// binary floats (invalid)
   730  		{Float, "0b.", "0b.", "invalid radix point in binary literal"},
   731  		{Float, "0b.1", "0b.1", "invalid radix point in binary literal"},
   732  		{Float, "0b1.0", "0b1.0", "invalid radix point in binary literal"},
   733  		{Float, "0b1e10", "0b1e10", "'e' exponent requires decimal mantissa"},
   734  		{Float, "0b1P-1", "0b1P-1", "'P' exponent requires hexadecimal mantissa"},
   735  
   736  		// octals
   737  		{Int, "0o0", "0o0", ""},
   738  		{Int, "0o1234", "0o1234", ""},
   739  		{Int, "0O1234", "0O1234", ""},
   740  
   741  		{Int, "0o", "0o", "octal literal has no digits"},
   742  		{Int, "0o8123", "0o8123", "invalid digit '8' in octal literal"},
   743  		{Int, "0o1293", "0o1293", "invalid digit '9' in octal literal"},
   744  		{Int, "0o12a3", "0o12 a3", ""}, // only accept 0-9
   745  
   746  		// octal floats (invalid)
   747  		{Float, "0o.", "0o.", "invalid radix point in octal literal"},
   748  		{Float, "0o.2", "0o.2", "invalid radix point in octal literal"},
   749  		{Float, "0o1.2", "0o1.2", "invalid radix point in octal literal"},
   750  		{Float, "0o1E+2", "0o1E+2", "'E' exponent requires decimal mantissa"},
   751  		{Float, "0o1p10", "0o1p10", "'p' exponent requires hexadecimal mantissa"},
   752  
   753  		// 0-octals
   754  		{Int, "0", "0", ""},
   755  		{Int, "0123", "0123", ""},
   756  
   757  		{Int, "08123", "08123", "invalid digit '8' in octal literal"},
   758  		{Int, "01293", "01293", "invalid digit '9' in octal literal"},
   759  		{Int, "0F.", "0 F .", ""}, // only accept 0-9
   760  		{Int, "0123F.", "0123 F .", ""},
   761  		{Int, "0123456x", "0123456 x", ""},
   762  
   763  		// decimals
   764  		{Int, "1", "1", ""},
   765  		{Int, "1234", "1234", ""},
   766  
   767  		{Int, "1f", "1 f", ""}, // only accept 0-9
   768  
   769  		// decimal floats
   770  		{Float, "0.", "0.", ""},
   771  		{Float, "123.", "123.", ""},
   772  		{Float, "0123.", "0123.", ""},
   773  
   774  		{Float, ".0", ".0", ""},
   775  		{Float, ".123", ".123", ""},
   776  		{Float, ".0123", ".0123", ""},
   777  
   778  		{Float, "0.0", "0.0", ""},
   779  		{Float, "123.123", "123.123", ""},
   780  		{Float, "0123.0123", "0123.0123", ""},
   781  
   782  		{Float, "0e0", "0e0", ""},
   783  		{Float, "123e+0", "123e+0", ""},
   784  		{Float, "0123E-1", "0123E-1", ""},
   785  
   786  		{Float, "0.e+1", "0.e+1", ""},
   787  		{Float, "123.E-10", "123.E-10", ""},
   788  		{Float, "0123.e123", "0123.e123", ""},
   789  
   790  		{Float, ".0e-1", ".0e-1", ""},
   791  		{Float, ".123E+10", ".123E+10", ""},
   792  		{Float, ".0123E123", ".0123E123", ""},
   793  
   794  		{Float, "0.0e1", "0.0e1", ""},
   795  		{Float, "123.123E-10", "123.123E-10", ""},
   796  		{Float, "0123.0123e+456", "0123.0123e+456", ""},
   797  
   798  		{Float, "0e", "0e", "exponent has no digits"},
   799  		{Float, "0E+", "0E+", "exponent has no digits"},
   800  		{Float, "1e+f", "1e+ f", "exponent has no digits"},
   801  		{Float, "0p0", "0p0", "'p' exponent requires hexadecimal mantissa"},
   802  		{Float, "1.0P-1", "1.0P-1", "'P' exponent requires hexadecimal mantissa"},
   803  
   804  		// hexadecimals
   805  		{Int, "0x0", "0x0", ""},
   806  		{Int, "0x1234", "0x1234", ""},
   807  		{Int, "0xcafef00d", "0xcafef00d", ""},
   808  		{Int, "0XCAFEF00D", "0XCAFEF00D", ""},
   809  
   810  		{Int, "0x", "0x", "hexadecimal literal has no digits"},
   811  		{Int, "0x1g", "0x1 g", ""},
   812  
   813  		// hexadecimal floats
   814  		{Float, "0x0p0", "0x0p0", ""},
   815  		{Float, "0x12efp-123", "0x12efp-123", ""},
   816  		{Float, "0xABCD.p+0", "0xABCD.p+0", ""},
   817  		{Float, "0x.0189P-0", "0x.0189P-0", ""},
   818  		{Float, "0x1.ffffp+1023", "0x1.ffffp+1023", ""},
   819  
   820  		{Float, "0x.", "0x.", "hexadecimal literal has no digits"},
   821  		{Float, "0x0.", "0x0.", "hexadecimal mantissa requires a 'p' exponent"},
   822  		{Float, "0x.0", "0x.0", "hexadecimal mantissa requires a 'p' exponent"},
   823  		{Float, "0x1.1", "0x1.1", "hexadecimal mantissa requires a 'p' exponent"},
   824  		{Float, "0x1.1e0", "0x1.1e0", "hexadecimal mantissa requires a 'p' exponent"},
   825  		{Float, "0x1.2gp1a", "0x1.2 gp1a", "hexadecimal mantissa requires a 'p' exponent"},
   826  		{Float, "0x0p", "0x0p", "exponent has no digits"},
   827  		{Float, "0xeP-", "0xeP-", "exponent has no digits"},
   828  		{Float, "0x1234PAB", "0x1234P AB", "exponent has no digits"},
   829  		{Float, "0x1.2p1a", "0x1.2p1 a", ""},
   830  
   831  		// separators
   832  		{Int, "0b_1000_0001", "0b_1000_0001", ""},
   833  		{Int, "0o_600", "0o_600", ""},
   834  		{Int, "0_466", "0_466", ""},
   835  		{Int, "1_000", "1_000", ""},
   836  		{Float, "1_000.000_1", "1_000.000_1", ""},
   837  		{Int, "0x_f00d", "0x_f00d", ""},
   838  		{Float, "0x_f00d.0p1_2", "0x_f00d.0p1_2", ""},
   839  
   840  		{Int, "0b__1000", "0b__1000", "'_' must separate successive digits"},
   841  		{Int, "0o60___0", "0o60___0", "'_' must separate successive digits"},
   842  		{Int, "0466_", "0466_", "'_' must separate successive digits"},
   843  		{Float, "1_.", "1_.", "'_' must separate successive digits"},
   844  		{Float, "0._1", "0._1", "'_' must separate successive digits"},
   845  		{Float, "2.7_e0", "2.7_e0", "'_' must separate successive digits"},
   846  		{Int, "0x___0", "0x___0", "'_' must separate successive digits"},
   847  		{Float, "0x1.0_p0", "0x1.0_p0", "'_' must separate successive digits"},
   848  	} {
   849  		s := new(Scanner).Init(strings.NewReader(test.src))
   850  		var err string
   851  		s.Error = func(s *Scanner, msg string) {
   852  			if err == "" {
   853  				err = msg
   854  			}
   855  		}
   856  
   857  		for i, want := range strings.Split(test.tokens, " ") {
   858  			err = ""
   859  			tok := s.Scan()
   860  			lit := s.TokenText()
   861  			if i == 0 {
   862  				if tok != test.tok {
   863  					t.Errorf("%q: got token %s; want %s", test.src, TokenString(tok), TokenString(test.tok))
   864  				}
   865  				if err != test.err {
   866  					t.Errorf("%q: got error %q; want %q", test.src, err, test.err)
   867  				}
   868  			}
   869  			if lit != want {
   870  				t.Errorf("%q: got literal %q (%s); want %s", test.src, lit, TokenString(tok), want)
   871  			}
   872  		}
   873  
   874  		// make sure we read all
   875  		if tok := s.Scan(); tok != EOF {
   876  			t.Errorf("%q: got %s; want EOF", test.src, TokenString(tok))
   877  		}
   878  	}
   879  }
   880  
   881  func TestIssue30320(t *testing.T) {
   882  	for _, test := range []struct {
   883  		in, want string
   884  		mode     uint
   885  	}{
   886  		{"foo01.bar31.xx-0-1-1-0", "01 31 0 1 1 0", ScanInts},
   887  		{"foo0/12/0/5.67", "0 12 0 5 67", ScanInts},
   888  		{"xxx1e0yyy", "1 0", ScanInts},
   889  		{"1_2", "1_2", ScanInts},
   890  		{"xxx1.0yyy2e3ee", "1 0 2 3", ScanInts},
   891  		{"xxx1.0yyy2e3ee", "1.0 2e3", ScanFloats},
   892  	} {
   893  		got := extractInts(test.in, test.mode)
   894  		if got != test.want {
   895  			t.Errorf("%q: got %q; want %q", test.in, got, test.want)
   896  		}
   897  	}
   898  }
   899  
   900  func extractInts(t string, mode uint) (res string) {
   901  	var s Scanner
   902  	s.Init(strings.NewReader(t))
   903  	s.Mode = mode
   904  	for {
   905  		switch tok := s.Scan(); tok {
   906  		case Int, Float:
   907  			if len(res) > 0 {
   908  				res += " "
   909  			}
   910  			res += s.TokenText()
   911  		case EOF:
   912  			return
   913  		}
   914  	}
   915  }
   916  
   917  func TestIssue50909(t *testing.T) {
   918  	var s Scanner
   919  	s.Init(strings.NewReader("hello \n\nworld\n!\n"))
   920  	s.IsIdentRune = func(ch rune, _ int) bool { return ch != '\n' }
   921  
   922  	r := ""
   923  	n := 0
   924  	for s.Scan() != EOF && n < 10 {
   925  		r += s.TokenText()
   926  		n++
   927  	}
   928  
   929  	const R = "hello world!"
   930  	const N = 3
   931  	if r != R || n != N {
   932  		t.Errorf("got %q (n = %d); want %q (n = %d)", r, n, R, N)
   933  	}
   934  }
   935  

View as plain text