111
|
1 // Copyright 2013 The Go Authors. All rights reserved.
|
|
2 // Use of this source code is governed by a BSD-style
|
|
3 // license that can be found in the LICENSE file.
|
|
4
|
|
5 package bufio_test
|
|
6
|
|
7 import (
|
|
8 . "bufio"
|
|
9 "bytes"
|
|
10 "errors"
|
|
11 "io"
|
|
12 "strings"
|
|
13 "testing"
|
|
14 "unicode"
|
|
15 "unicode/utf8"
|
|
16 )
|
|
17
|
|
18 const smallMaxTokenSize = 256 // Much smaller for more efficient testing.
|
|
19
|
|
20 // Test white space table matches the Unicode definition.
|
|
21 func TestSpace(t *testing.T) {
|
|
22 for r := rune(0); r <= utf8.MaxRune; r++ {
|
|
23 if IsSpace(r) != unicode.IsSpace(r) {
|
|
24 t.Fatalf("white space property disagrees: %#U should be %t", r, unicode.IsSpace(r))
|
|
25 }
|
|
26 }
|
|
27 }
|
|
28
|
|
29 var scanTests = []string{
|
|
30 "",
|
|
31 "a",
|
|
32 "¼",
|
|
33 "☹",
|
|
34 "\x81", // UTF-8 error
|
|
35 "\uFFFD", // correctly encoded RuneError
|
|
36 "abcdefgh",
|
|
37 "abc def\n\t\tgh ",
|
|
38 "abc¼☹\x81\uFFFD日本語\x82abc",
|
|
39 }
|
|
40
|
|
41 func TestScanByte(t *testing.T) {
|
|
42 for n, test := range scanTests {
|
|
43 buf := strings.NewReader(test)
|
|
44 s := NewScanner(buf)
|
|
45 s.Split(ScanBytes)
|
|
46 var i int
|
|
47 for i = 0; s.Scan(); i++ {
|
|
48 if b := s.Bytes(); len(b) != 1 || b[0] != test[i] {
|
|
49 t.Errorf("#%d: %d: expected %q got %q", n, i, test, b)
|
|
50 }
|
|
51 }
|
|
52 if i != len(test) {
|
|
53 t.Errorf("#%d: termination expected at %d; got %d", n, len(test), i)
|
|
54 }
|
|
55 err := s.Err()
|
|
56 if err != nil {
|
|
57 t.Errorf("#%d: %v", n, err)
|
|
58 }
|
|
59 }
|
|
60 }
|
|
61
|
|
62 // Test that the rune splitter returns same sequence of runes (not bytes) as for range string.
|
|
63 func TestScanRune(t *testing.T) {
|
|
64 for n, test := range scanTests {
|
|
65 buf := strings.NewReader(test)
|
|
66 s := NewScanner(buf)
|
|
67 s.Split(ScanRunes)
|
|
68 var i, runeCount int
|
|
69 var expect rune
|
|
70 // Use a string range loop to validate the sequence of runes.
|
|
71 for i, expect = range string(test) {
|
|
72 if !s.Scan() {
|
|
73 break
|
|
74 }
|
|
75 runeCount++
|
|
76 got, _ := utf8.DecodeRune(s.Bytes())
|
|
77 if got != expect {
|
|
78 t.Errorf("#%d: %d: expected %q got %q", n, i, expect, got)
|
|
79 }
|
|
80 }
|
|
81 if s.Scan() {
|
|
82 t.Errorf("#%d: scan ran too long, got %q", n, s.Text())
|
|
83 }
|
|
84 testRuneCount := utf8.RuneCountInString(test)
|
|
85 if runeCount != testRuneCount {
|
|
86 t.Errorf("#%d: termination expected at %d; got %d", n, testRuneCount, runeCount)
|
|
87 }
|
|
88 err := s.Err()
|
|
89 if err != nil {
|
|
90 t.Errorf("#%d: %v", n, err)
|
|
91 }
|
|
92 }
|
|
93 }
|
|
94
|
|
95 var wordScanTests = []string{
|
|
96 "",
|
|
97 " ",
|
|
98 "\n",
|
|
99 "a",
|
|
100 " a ",
|
|
101 "abc def",
|
|
102 " abc def ",
|
|
103 " abc\tdef\nghi\rjkl\fmno\vpqr\u0085stu\u00a0\n",
|
|
104 }
|
|
105
|
|
106 // Test that the word splitter returns the same data as strings.Fields.
|
|
107 func TestScanWords(t *testing.T) {
|
|
108 for n, test := range wordScanTests {
|
|
109 buf := strings.NewReader(test)
|
|
110 s := NewScanner(buf)
|
|
111 s.Split(ScanWords)
|
|
112 words := strings.Fields(test)
|
|
113 var wordCount int
|
|
114 for wordCount = 0; wordCount < len(words); wordCount++ {
|
|
115 if !s.Scan() {
|
|
116 break
|
|
117 }
|
|
118 got := s.Text()
|
|
119 if got != words[wordCount] {
|
|
120 t.Errorf("#%d: %d: expected %q got %q", n, wordCount, words[wordCount], got)
|
|
121 }
|
|
122 }
|
|
123 if s.Scan() {
|
|
124 t.Errorf("#%d: scan ran too long, got %q", n, s.Text())
|
|
125 }
|
|
126 if wordCount != len(words) {
|
|
127 t.Errorf("#%d: termination expected at %d; got %d", n, len(words), wordCount)
|
|
128 }
|
|
129 err := s.Err()
|
|
130 if err != nil {
|
|
131 t.Errorf("#%d: %v", n, err)
|
|
132 }
|
|
133 }
|
|
134 }
|
|
135
|
|
136 // slowReader is a reader that returns only a few bytes at a time, to test the incremental
|
|
137 // reads in Scanner.Scan.
|
|
138 type slowReader struct {
|
|
139 max int
|
|
140 buf io.Reader
|
|
141 }
|
|
142
|
|
143 func (sr *slowReader) Read(p []byte) (n int, err error) {
|
|
144 if len(p) > sr.max {
|
|
145 p = p[0:sr.max]
|
|
146 }
|
|
147 return sr.buf.Read(p)
|
|
148 }
|
|
149
|
|
150 // genLine writes to buf a predictable but non-trivial line of text of length
|
|
151 // n, including the terminal newline and an occasional carriage return.
|
|
152 // If addNewline is false, the \r and \n are not emitted.
|
|
153 func genLine(buf *bytes.Buffer, lineNum, n int, addNewline bool) {
|
|
154 buf.Reset()
|
|
155 doCR := lineNum%5 == 0
|
|
156 if doCR {
|
|
157 n--
|
|
158 }
|
|
159 for i := 0; i < n-1; i++ { // Stop early for \n.
|
|
160 c := 'a' + byte(lineNum+i)
|
|
161 if c == '\n' || c == '\r' { // Don't confuse us.
|
|
162 c = 'N'
|
|
163 }
|
|
164 buf.WriteByte(c)
|
|
165 }
|
|
166 if addNewline {
|
|
167 if doCR {
|
|
168 buf.WriteByte('\r')
|
|
169 }
|
|
170 buf.WriteByte('\n')
|
|
171 }
|
|
172 }
|
|
173
|
|
174 // Test the line splitter, including some carriage returns but no long lines.
|
|
175 func TestScanLongLines(t *testing.T) {
|
|
176 // Build a buffer of lots of line lengths up to but not exceeding smallMaxTokenSize.
|
|
177 tmp := new(bytes.Buffer)
|
|
178 buf := new(bytes.Buffer)
|
|
179 lineNum := 0
|
|
180 j := 0
|
|
181 for i := 0; i < 2*smallMaxTokenSize; i++ {
|
|
182 genLine(tmp, lineNum, j, true)
|
|
183 if j < smallMaxTokenSize {
|
|
184 j++
|
|
185 } else {
|
|
186 j--
|
|
187 }
|
|
188 buf.Write(tmp.Bytes())
|
|
189 lineNum++
|
|
190 }
|
|
191 s := NewScanner(&slowReader{1, buf})
|
|
192 s.Split(ScanLines)
|
|
193 s.MaxTokenSize(smallMaxTokenSize)
|
|
194 j = 0
|
|
195 for lineNum := 0; s.Scan(); lineNum++ {
|
|
196 genLine(tmp, lineNum, j, false)
|
|
197 if j < smallMaxTokenSize {
|
|
198 j++
|
|
199 } else {
|
|
200 j--
|
|
201 }
|
|
202 line := tmp.String() // We use the string-valued token here, for variety.
|
|
203 if s.Text() != line {
|
|
204 t.Errorf("%d: bad line: %d %d\n%.100q\n%.100q\n", lineNum, len(s.Bytes()), len(line), s.Text(), line)
|
|
205 }
|
|
206 }
|
|
207 err := s.Err()
|
|
208 if err != nil {
|
|
209 t.Fatal(err)
|
|
210 }
|
|
211 }
|
|
212
|
|
213 // Test that the line splitter errors out on a long line.
|
|
214 func TestScanLineTooLong(t *testing.T) {
|
|
215 const smallMaxTokenSize = 256 // Much smaller for more efficient testing.
|
|
216 // Build a buffer of lots of line lengths up to but not exceeding smallMaxTokenSize.
|
|
217 tmp := new(bytes.Buffer)
|
|
218 buf := new(bytes.Buffer)
|
|
219 lineNum := 0
|
|
220 j := 0
|
|
221 for i := 0; i < 2*smallMaxTokenSize; i++ {
|
|
222 genLine(tmp, lineNum, j, true)
|
|
223 j++
|
|
224 buf.Write(tmp.Bytes())
|
|
225 lineNum++
|
|
226 }
|
|
227 s := NewScanner(&slowReader{3, buf})
|
|
228 s.Split(ScanLines)
|
|
229 s.MaxTokenSize(smallMaxTokenSize)
|
|
230 j = 0
|
|
231 for lineNum := 0; s.Scan(); lineNum++ {
|
|
232 genLine(tmp, lineNum, j, false)
|
|
233 if j < smallMaxTokenSize {
|
|
234 j++
|
|
235 } else {
|
|
236 j--
|
|
237 }
|
|
238 line := tmp.Bytes()
|
|
239 if !bytes.Equal(s.Bytes(), line) {
|
|
240 t.Errorf("%d: bad line: %d %d\n%.100q\n%.100q\n", lineNum, len(s.Bytes()), len(line), s.Bytes(), line)
|
|
241 }
|
|
242 }
|
|
243 err := s.Err()
|
|
244 if err != ErrTooLong {
|
|
245 t.Fatalf("expected ErrTooLong; got %s", err)
|
|
246 }
|
|
247 }
|
|
248
|
|
249 // Test that the line splitter handles a final line without a newline.
|
|
250 func testNoNewline(text string, lines []string, t *testing.T) {
|
|
251 buf := strings.NewReader(text)
|
|
252 s := NewScanner(&slowReader{7, buf})
|
|
253 s.Split(ScanLines)
|
|
254 for lineNum := 0; s.Scan(); lineNum++ {
|
|
255 line := lines[lineNum]
|
|
256 if s.Text() != line {
|
|
257 t.Errorf("%d: bad line: %d %d\n%.100q\n%.100q\n", lineNum, len(s.Bytes()), len(line), s.Bytes(), line)
|
|
258 }
|
|
259 }
|
|
260 err := s.Err()
|
|
261 if err != nil {
|
|
262 t.Fatal(err)
|
|
263 }
|
|
264 }
|
|
265
|
|
266 // Test that the line splitter handles a final line without a newline.
|
|
267 func TestScanLineNoNewline(t *testing.T) {
|
|
268 const text = "abcdefghijklmn\nopqrstuvwxyz"
|
|
269 lines := []string{
|
|
270 "abcdefghijklmn",
|
|
271 "opqrstuvwxyz",
|
|
272 }
|
|
273 testNoNewline(text, lines, t)
|
|
274 }
|
|
275
|
|
276 // Test that the line splitter handles a final line with a carriage return but no newline.
|
|
277 func TestScanLineReturnButNoNewline(t *testing.T) {
|
|
278 const text = "abcdefghijklmn\nopqrstuvwxyz\r"
|
|
279 lines := []string{
|
|
280 "abcdefghijklmn",
|
|
281 "opqrstuvwxyz",
|
|
282 }
|
|
283 testNoNewline(text, lines, t)
|
|
284 }
|
|
285
|
|
286 // Test that the line splitter handles a final empty line.
|
|
287 func TestScanLineEmptyFinalLine(t *testing.T) {
|
|
288 const text = "abcdefghijklmn\nopqrstuvwxyz\n\n"
|
|
289 lines := []string{
|
|
290 "abcdefghijklmn",
|
|
291 "opqrstuvwxyz",
|
|
292 "",
|
|
293 }
|
|
294 testNoNewline(text, lines, t)
|
|
295 }
|
|
296
|
|
297 // Test that the line splitter handles a final empty line with a carriage return but no newline.
|
|
298 func TestScanLineEmptyFinalLineWithCR(t *testing.T) {
|
|
299 const text = "abcdefghijklmn\nopqrstuvwxyz\n\r"
|
|
300 lines := []string{
|
|
301 "abcdefghijklmn",
|
|
302 "opqrstuvwxyz",
|
|
303 "",
|
|
304 }
|
|
305 testNoNewline(text, lines, t)
|
|
306 }
|
|
307
|
|
308 var testError = errors.New("testError")
|
|
309
|
|
310 // Test the correct error is returned when the split function errors out.
|
|
311 func TestSplitError(t *testing.T) {
|
|
312 // Create a split function that delivers a little data, then a predictable error.
|
|
313 numSplits := 0
|
|
314 const okCount = 7
|
|
315 errorSplit := func(data []byte, atEOF bool) (advance int, token []byte, err error) {
|
|
316 if atEOF {
|
|
317 panic("didn't get enough data")
|
|
318 }
|
|
319 if numSplits >= okCount {
|
|
320 return 0, nil, testError
|
|
321 }
|
|
322 numSplits++
|
|
323 return 1, data[0:1], nil
|
|
324 }
|
|
325 // Read the data.
|
|
326 const text = "abcdefghijklmnopqrstuvwxyz"
|
|
327 buf := strings.NewReader(text)
|
|
328 s := NewScanner(&slowReader{1, buf})
|
|
329 s.Split(errorSplit)
|
|
330 var i int
|
|
331 for i = 0; s.Scan(); i++ {
|
|
332 if len(s.Bytes()) != 1 || text[i] != s.Bytes()[0] {
|
|
333 t.Errorf("#%d: expected %q got %q", i, text[i], s.Bytes()[0])
|
|
334 }
|
|
335 }
|
|
336 // Check correct termination location and error.
|
|
337 if i != okCount {
|
|
338 t.Errorf("unexpected termination; expected %d tokens got %d", okCount, i)
|
|
339 }
|
|
340 err := s.Err()
|
|
341 if err != testError {
|
|
342 t.Fatalf("expected %q got %v", testError, err)
|
|
343 }
|
|
344 }
|
|
345
|
|
346 // Test that an EOF is overridden by a user-generated scan error.
|
|
347 func TestErrAtEOF(t *testing.T) {
|
|
348 s := NewScanner(strings.NewReader("1 2 33"))
|
|
349 // This splitter will fail on last entry, after s.err==EOF.
|
|
350 split := func(data []byte, atEOF bool) (advance int, token []byte, err error) {
|
|
351 advance, token, err = ScanWords(data, atEOF)
|
|
352 if len(token) > 1 {
|
|
353 if s.ErrOrEOF() != io.EOF {
|
|
354 t.Fatal("not testing EOF")
|
|
355 }
|
|
356 err = testError
|
|
357 }
|
|
358 return
|
|
359 }
|
|
360 s.Split(split)
|
|
361 for s.Scan() {
|
|
362 }
|
|
363 if s.Err() != testError {
|
|
364 t.Fatal("wrong error:", s.Err())
|
|
365 }
|
|
366 }
|
|
367
|
|
368 // Test for issue 5268.
|
|
369 type alwaysError struct{}
|
|
370
|
|
371 func (alwaysError) Read(p []byte) (int, error) {
|
|
372 return 0, io.ErrUnexpectedEOF
|
|
373 }
|
|
374
|
|
375 func TestNonEOFWithEmptyRead(t *testing.T) {
|
|
376 scanner := NewScanner(alwaysError{})
|
|
377 for scanner.Scan() {
|
|
378 t.Fatal("read should fail")
|
|
379 }
|
|
380 err := scanner.Err()
|
|
381 if err != io.ErrUnexpectedEOF {
|
|
382 t.Errorf("unexpected error: %v", err)
|
|
383 }
|
|
384 }
|
|
385
|
|
386 // Test that Scan finishes if we have endless empty reads.
|
|
387 type endlessZeros struct{}
|
|
388
|
|
389 func (endlessZeros) Read(p []byte) (int, error) {
|
|
390 return 0, nil
|
|
391 }
|
|
392
|
|
393 func TestBadReader(t *testing.T) {
|
|
394 scanner := NewScanner(endlessZeros{})
|
|
395 for scanner.Scan() {
|
|
396 t.Fatal("read should fail")
|
|
397 }
|
|
398 err := scanner.Err()
|
|
399 if err != io.ErrNoProgress {
|
|
400 t.Errorf("unexpected error: %v", err)
|
|
401 }
|
|
402 }
|
|
403
|
|
404 func TestScanWordsExcessiveWhiteSpace(t *testing.T) {
|
|
405 const word = "ipsum"
|
|
406 s := strings.Repeat(" ", 4*smallMaxTokenSize) + word
|
|
407 scanner := NewScanner(strings.NewReader(s))
|
|
408 scanner.MaxTokenSize(smallMaxTokenSize)
|
|
409 scanner.Split(ScanWords)
|
|
410 if !scanner.Scan() {
|
|
411 t.Fatalf("scan failed: %v", scanner.Err())
|
|
412 }
|
|
413 if token := scanner.Text(); token != word {
|
|
414 t.Fatalf("unexpected token: %v", token)
|
|
415 }
|
|
416 }
|
|
417
|
|
418 // Test that empty tokens, including at end of line or end of file, are found by the scanner.
|
|
419 // Issue 8672: Could miss final empty token.
|
|
420
|
|
421 func commaSplit(data []byte, atEOF bool) (advance int, token []byte, err error) {
|
|
422 for i := 0; i < len(data); i++ {
|
|
423 if data[i] == ',' {
|
|
424 return i + 1, data[:i], nil
|
|
425 }
|
|
426 }
|
|
427 return 0, data, ErrFinalToken
|
|
428 }
|
|
429
|
|
430 func testEmptyTokens(t *testing.T, text string, values []string) {
|
|
431 s := NewScanner(strings.NewReader(text))
|
|
432 s.Split(commaSplit)
|
|
433 var i int
|
|
434 for i = 0; s.Scan(); i++ {
|
|
435 if i >= len(values) {
|
|
436 t.Fatalf("got %d fields, expected %d", i+1, len(values))
|
|
437 }
|
|
438 if s.Text() != values[i] {
|
|
439 t.Errorf("%d: expected %q got %q", i, values[i], s.Text())
|
|
440 }
|
|
441 }
|
|
442 if i != len(values) {
|
|
443 t.Fatalf("got %d fields, expected %d", i, len(values))
|
|
444 }
|
|
445 if err := s.Err(); err != nil {
|
|
446 t.Fatal(err)
|
|
447 }
|
|
448 }
|
|
449
|
|
450 func TestEmptyTokens(t *testing.T) {
|
|
451 testEmptyTokens(t, "1,2,3,", []string{"1", "2", "3", ""})
|
|
452 }
|
|
453
|
|
454 func TestWithNoEmptyTokens(t *testing.T) {
|
|
455 testEmptyTokens(t, "1,2,3", []string{"1", "2", "3"})
|
|
456 }
|
|
457
|
|
458 func loopAtEOFSplit(data []byte, atEOF bool) (advance int, token []byte, err error) {
|
|
459 if len(data) > 0 {
|
|
460 return 1, data[:1], nil
|
|
461 }
|
|
462 return 0, data, nil
|
|
463 }
|
|
464
|
|
465 func TestDontLoopForever(t *testing.T) {
|
|
466 s := NewScanner(strings.NewReader("abc"))
|
|
467 s.Split(loopAtEOFSplit)
|
|
468 // Expect a panic
|
|
469 defer func() {
|
|
470 err := recover()
|
|
471 if err == nil {
|
|
472 t.Fatal("should have panicked")
|
|
473 }
|
|
474 if msg, ok := err.(string); !ok || !strings.Contains(msg, "empty tokens") {
|
|
475 panic(err)
|
|
476 }
|
|
477 }()
|
|
478 for count := 0; s.Scan(); count++ {
|
|
479 if count > 1000 {
|
|
480 t.Fatal("looping")
|
|
481 }
|
|
482 }
|
|
483 if s.Err() != nil {
|
|
484 t.Fatal("after scan:", s.Err())
|
|
485 }
|
|
486 }
|
|
487
|
|
488 func TestBlankLines(t *testing.T) {
|
|
489 s := NewScanner(strings.NewReader(strings.Repeat("\n", 1000)))
|
|
490 for count := 0; s.Scan(); count++ {
|
|
491 if count > 2000 {
|
|
492 t.Fatal("looping")
|
|
493 }
|
|
494 }
|
|
495 if s.Err() != nil {
|
|
496 t.Fatal("after scan:", s.Err())
|
|
497 }
|
|
498 }
|
|
499
|
|
500 type countdown int
|
|
501
|
|
502 func (c *countdown) split(data []byte, atEOF bool) (advance int, token []byte, err error) {
|
|
503 if *c > 0 {
|
|
504 *c--
|
|
505 return 1, data[:1], nil
|
|
506 }
|
|
507 return 0, nil, nil
|
|
508 }
|
|
509
|
|
510 // Check that the looping-at-EOF check doesn't trigger for merely empty tokens.
|
|
511 func TestEmptyLinesOK(t *testing.T) {
|
|
512 c := countdown(10000)
|
|
513 s := NewScanner(strings.NewReader(strings.Repeat("\n", 10000)))
|
|
514 s.Split(c.split)
|
|
515 for s.Scan() {
|
|
516 }
|
|
517 if s.Err() != nil {
|
|
518 t.Fatal("after scan:", s.Err())
|
|
519 }
|
|
520 if c != 0 {
|
|
521 t.Fatalf("stopped with %d left to process", c)
|
|
522 }
|
|
523 }
|
|
524
|
|
525 // Make sure we can read a huge token if a big enough buffer is provided.
|
|
526 func TestHugeBuffer(t *testing.T) {
|
|
527 text := strings.Repeat("x", 2*MaxScanTokenSize)
|
|
528 s := NewScanner(strings.NewReader(text + "\n"))
|
|
529 s.Buffer(make([]byte, 100), 3*MaxScanTokenSize)
|
|
530 for s.Scan() {
|
|
531 token := s.Text()
|
|
532 if token != text {
|
|
533 t.Errorf("scan got incorrect token of length %d", len(token))
|
|
534 }
|
|
535 }
|
|
536 if s.Err() != nil {
|
|
537 t.Fatal("after scan:", s.Err())
|
|
538 }
|
|
539 }
|