Skip to content

Commit 5d7a635

Browse files
bitfieldhobti01
andcommitted
Remove arbitrary 64KiB limit on line length
The default bufio.Scanner is limited to 64 * 1024 bytes, throwing a 'token too long' error if a line exceeds this size. This commit sets the token limit instead to math.MaxInt, which is the biggest possible slice capacity in Go, and effectively limits the scan line length only by available memory (fixes #167). Co-authored-by: Tim Hobbs <timothy.hobbs@servicelayers.com>
1 parent 5bcb73e commit 5d7a635

File tree

2 files changed

+110
-16
lines changed

2 files changed

+110
-16
lines changed

script.go

Lines changed: 13 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -8,6 +8,7 @@ import (
88
"encoding/json"
99
"fmt"
1010
"io"
11+
"math"
1112
"net/http"
1213
"os"
1314
"os/exec"
@@ -338,7 +339,7 @@ func (p *Pipe) Do(req *http.Request) *Pipe {
338339
// concurrently and don't do unnecessary reads on the input.
339340
func (p *Pipe) EachLine(process func(string, *strings.Builder)) *Pipe {
340341
return p.Filter(func(r io.Reader, w io.Writer) error {
341-
scanner := bufio.NewScanner(r)
342+
scanner := newScanner(r)
342343
output := strings.Builder{}
343344
for scanner.Scan() {
344345
process(scanner.Text(), &output)
@@ -413,7 +414,7 @@ func (p *Pipe) ExecForEach(cmdLine string) *Pipe {
413414
return p.WithError(err)
414415
}
415416
return p.Filter(func(r io.Reader, w io.Writer) error {
416-
scanner := bufio.NewScanner(r)
417+
scanner := newScanner(r)
417418
for scanner.Scan() {
418419
cmdLine := strings.Builder{}
419420
err := tpl.Execute(&cmdLine, scanner.Text())
@@ -502,7 +503,7 @@ func (p *Pipe) FilterLine(filter func(string) string) *Pipe {
502503
// handling.
503504
func (p *Pipe) FilterScan(filter func(string, io.Writer)) *Pipe {
504505
return p.Filter(func(r io.Reader, w io.Writer) error {
505-
scanner := bufio.NewScanner(r)
506+
scanner := newScanner(r)
506507
for scanner.Scan() {
507508
filter(scanner.Text(), w)
508509
}
@@ -555,7 +556,7 @@ func (p *Pipe) Freq() *Pipe {
555556
count int
556557
}
557558
return p.Filter(func(r io.Reader, w io.Writer) error {
558-
scanner := bufio.NewScanner(r)
559+
scanner := newScanner(r)
559560
for scanner.Scan() {
560561
freq[scanner.Text()]++
561562
}
@@ -597,7 +598,7 @@ func (p *Pipe) Get(URL string) *Pipe {
597598
// space-separated string, which will always end with a newline.
598599
func (p *Pipe) Join() *Pipe {
599600
return p.Filter(func(r io.Reader, w io.Writer) error {
600-
scanner := bufio.NewScanner(r)
601+
scanner := newScanner(r)
601602
var line string
602603
first := true
603604
for scanner.Scan() {
@@ -659,7 +660,7 @@ func (p *Pipe) Last(n int) *Pipe {
659660
return NewPipe()
660661
}
661662
return p.Filter(func(r io.Reader, w io.Writer) error {
662-
scanner := bufio.NewScanner(r)
663+
scanner := newScanner(r)
663664
input := ring.New(n)
664665
for scanner.Scan() {
665666
input.Value = scanner.Text()
@@ -938,3 +939,9 @@ func (ra ReadAutoCloser) Read(b []byte) (n int, err error) {
938939
}
939940
return n, err
940941
}
942+
943+
func newScanner(r io.Reader) *bufio.Scanner {
944+
scanner := bufio.NewScanner(r)
945+
scanner.Buffer(make([]byte, 4096), math.MaxInt)
946+
return scanner
947+
}

script_test.go

Lines changed: 97 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -270,12 +270,11 @@ func TestDoPerformsSuppliedHTTPRequest(t *testing.T) {
270270

271271
func TestEachLine_FiltersInputThroughSuppliedFunction(t *testing.T) {
272272
t.Parallel()
273-
p := script.Echo("Hello\nGoodbye")
274-
q := p.EachLine(func(line string, out *strings.Builder) {
275-
out.WriteString(line + " world\n")
276-
})
277273
want := "Hello world\nGoodbye world\n"
278-
got, err := q.String()
274+
got, err := script.Echo("Hello\nGoodbye").
275+
EachLine(func(line string, out *strings.Builder) {
276+
out.WriteString(line + " world\n")
277+
}).String()
279278
if err != nil {
280279
t.Fatal(err)
281280
}
@@ -284,6 +283,22 @@ func TestEachLine_FiltersInputThroughSuppliedFunction(t *testing.T) {
284283
}
285284
}
286285

286+
func TestEachLine_HandlesLongLines(t *testing.T) {
287+
t.Parallel()
288+
var got int
289+
_, err := script.Echo(longLine).
290+
EachLine(func(line string, out *strings.Builder) {
291+
got++
292+
}).String()
293+
if err != nil {
294+
t.Fatal(err)
295+
}
296+
want := 2
297+
if want != got {
298+
t.Errorf("want %d lines counted, got %d", want, got)
299+
}
300+
}
301+
287302
func TestEchoProducesSuppliedString(t *testing.T) {
288303
t.Parallel()
289304
want := "Hello, world."
@@ -328,6 +343,17 @@ func TestExecForEach_ErrorsOnUnbalancedQuotes(t *testing.T) {
328343
}
329344
}
330345

346+
func TestExecForEach_HandlesLongLines(t *testing.T) {
347+
t.Parallel()
348+
got, err := script.Echo(longLine).ExecForEach(`echo "{{.}}"`).String()
349+
if err != nil {
350+
t.Fatal(err)
351+
}
352+
if longLine != got {
353+
t.Error(cmp.Diff(longLine, got))
354+
}
355+
}
356+
331357
func TestFilterByCopyPassesInputThroughUnchanged(t *testing.T) {
332358
t.Parallel()
333359
p := script.Echo("hello").Filter(func(r io.Reader, w io.Writer) error {
@@ -455,11 +481,29 @@ func TestFilterScan_FiltersInputLineByLine(t *testing.T) {
455481
t.Parallel()
456482
input := "hello\nworld\ngoodbye"
457483
want := "world\n"
458-
got, err := script.Echo(input).FilterScan(func(line string, w io.Writer) {
459-
if strings.HasPrefix(line, "w") {
460-
fmt.Fprintln(w, line)
461-
}
462-
}).String()
484+
got, err := script.Echo(input).
485+
FilterScan(func(line string, w io.Writer) {
486+
if strings.HasPrefix(line, "w") {
487+
fmt.Fprintln(w, line)
488+
}
489+
}).String()
490+
if err != nil {
491+
t.Fatal(err)
492+
}
493+
if want != got {
494+
t.Error(cmp.Diff(want, got))
495+
}
496+
}
497+
498+
func TestFilterScan_HandlesLongLines(t *testing.T) {
499+
t.Parallel()
500+
want := "last line\n"
501+
got, err := script.Echo(longLine).
502+
FilterScan(func(line string, w io.Writer) {
503+
if strings.HasPrefix(line, "last") {
504+
fmt.Fprintln(w, line)
505+
}
506+
}).String()
463507
if err != nil {
464508
t.Fatal(err)
465509
}
@@ -520,6 +564,20 @@ func TestFirstHasNoEffectGivenLessThanNInputLines(t *testing.T) {
520564
}
521565
}
522566

567+
func TestFreqHandlesLongLines(t *testing.T) {
568+
t.Parallel()
569+
got, err := script.Echo(longLine).Freq().Slice()
570+
if err != nil {
571+
t.Fatal(err)
572+
}
573+
if len(got) != 2 {
574+
t.Fatalf("want 2 results, got %d: %q", len(got), got)
575+
}
576+
if got[0] != "1 last line" {
577+
t.Fatalf("wrong result: %q", got)
578+
}
579+
}
580+
523581
func TestFreqProducesCorrectFrequencyTableForInput(t *testing.T) {
524582
t.Parallel()
525583
input := strings.Join([]string{
@@ -620,6 +678,18 @@ func TestGetUsesPipeContentsAsRequestBody(t *testing.T) {
620678
}
621679
}
622680

681+
func TestJoinHandlesLongLines(t *testing.T) {
682+
t.Parallel()
683+
result, err := script.Echo(longLine).Join().String()
684+
if err != nil {
685+
t.Fatal(err)
686+
}
687+
want := len(longLine)
688+
if want != len(result) {
689+
t.Errorf("want result length %d, got %d", want, len(result))
690+
}
691+
}
692+
623693
func TestJoinJoinsInputLinesIntoSpaceSeparatedString(t *testing.T) {
624694
t.Parallel()
625695
input := "hello\nfrom\nthe\njoin\ntest"
@@ -726,6 +796,18 @@ func TestLastDropsAllButLastNLinesOfInput(t *testing.T) {
726796
}
727797
}
728798

799+
func TestLastHandlesLongLines(t *testing.T) {
800+
t.Parallel()
801+
want := "last line\n"
802+
got, err := script.Echo(longLine).Last(1).String()
803+
if err != nil {
804+
t.Fatal(err)
805+
}
806+
if want != got {
807+
t.Error(cmp.Diff(want, got))
808+
}
809+
}
810+
729811
func TestLastHasNoOutputWhenNIs0(t *testing.T) {
730812
t.Parallel()
731813
input := "a\nb\nc\n"
@@ -2123,3 +2205,8 @@ func ExampleSlice() {
21232205
// 2
21242206
// 3
21252207
}
2208+
2209+
// A string containing a line longer than bufio.MaxScanTokenSize, for testing
2210+
// methods that buffer input. We want to make sure they don't throw
2211+
// "bufio.Scanner: token too long" errors.
2212+
var longLine = strings.Repeat("super long line ", 4096) + "\nlast line\n"

0 commit comments

Comments
 (0)