Skip to content

Commit 29d43df

Browse files
aclementsgopherbot
authored andcommitted
go/build, cmd/go: use ast.ParseDirective for go:embed
Currently, "//go:embed" directives are read by bespoke parsers in go/build and cmd/go/internal/modindex. Replace these bespoke parsers with scanner.Scanner for finding these directives and ast.ParseDirective for parsing them. It's not clear why we had a bespoke parser just for finding "//go:embed" directives in the first place. We have a bespoke parser for reading imports in order to avoid having to read the entire source file into memory, but if we're parsing embeds, we wind up reading the entire source file into memory anyway. Using scanner.Scanner instead eliminates some truly confusing code. This also demonstrates that ast.ParseDirective as proposed in #68021 achieves useful API coverage. Updates #68021. Change-Id: Ieb68738121dcff605a6a704a8045ddd2ff35df35 Reviewed-on: https://go-review.googlesource.com/c/go/+/704836 LUCI-TryBot-Result: Go LUCI <golang-scoped@luci-project-accounts.iam.gserviceaccount.com> Auto-Submit: Austin Clements <austin@google.com> Reviewed-by: Alan Donovan <adonovan@google.com>
1 parent 4e695dd commit 29d43df

File tree

2 files changed

+86
-472
lines changed

2 files changed

+86
-472
lines changed

src/cmd/go/internal/modindex/build_read.go

Lines changed: 43 additions & 236 deletions
Original file line numberDiff line numberDiff line change
@@ -92,37 +92,24 @@ func (r *importReader) readByte() byte {
9292
return c
9393
}
9494

95-
// readByteNoBuf is like readByte but doesn't buffer the byte.
96-
// It exhausts r.buf before reading from r.b.
97-
func (r *importReader) readByteNoBuf() byte {
98-
var c byte
99-
var err error
100-
if len(r.buf) > 0 {
101-
c = r.buf[0]
102-
r.buf = r.buf[1:]
103-
} else {
104-
c, err = r.b.ReadByte()
105-
if err == nil && c == 0 {
106-
err = errNUL
95+
// readRest reads the entire rest of the file into r.buf.
96+
func (r *importReader) readRest() {
97+
for {
98+
if len(r.buf) == cap(r.buf) {
99+
// Grow the buffer
100+
r.buf = append(r.buf, 0)[:len(r.buf)]
107101
}
108-
}
109-
110-
if err != nil {
111-
if err == io.EOF {
112-
r.eof = true
113-
} else if r.err == nil {
114-
r.err = err
102+
n, err := r.b.Read(r.buf[len(r.buf):cap(r.buf)])
103+
r.buf = r.buf[:len(r.buf)+n]
104+
if err != nil {
105+
if err == io.EOF {
106+
r.eof = true
107+
} else if r.err == nil {
108+
r.err = err
109+
}
110+
break
115111
}
116-
return 0
117112
}
118-
r.pos.Offset++
119-
if c == '\n' {
120-
r.pos.Line++
121-
r.pos.Column = 1
122-
} else {
123-
r.pos.Column++
124-
}
125-
return c
126113
}
127114

128115
// peekByte returns the next byte from the input reader but does not advance beyond it.
@@ -185,130 +172,6 @@ func (r *importReader) nextByte(skipSpace bool) byte {
185172
return c
186173
}
187174

188-
var goEmbed = []byte("go:embed")
189-
190-
// findEmbed advances the input reader to the next //go:embed comment.
191-
// It reports whether it found a comment.
192-
// (Otherwise it found an error or EOF.)
193-
func (r *importReader) findEmbed(first bool) bool {
194-
// The import block scan stopped after a non-space character,
195-
// so the reader is not at the start of a line on the first call.
196-
// After that, each //go:embed extraction leaves the reader
197-
// at the end of a line.
198-
startLine := !first
199-
var c byte
200-
for r.err == nil && !r.eof {
201-
c = r.readByteNoBuf()
202-
Reswitch:
203-
switch c {
204-
default:
205-
startLine = false
206-
207-
case '\n':
208-
startLine = true
209-
210-
case ' ', '\t':
211-
// leave startLine alone
212-
213-
case '"':
214-
startLine = false
215-
for r.err == nil {
216-
if r.eof {
217-
r.syntaxError()
218-
}
219-
c = r.readByteNoBuf()
220-
if c == '\\' {
221-
r.readByteNoBuf()
222-
if r.err != nil {
223-
r.syntaxError()
224-
return false
225-
}
226-
continue
227-
}
228-
if c == '"' {
229-
c = r.readByteNoBuf()
230-
goto Reswitch
231-
}
232-
}
233-
goto Reswitch
234-
235-
case '`':
236-
startLine = false
237-
for r.err == nil {
238-
if r.eof {
239-
r.syntaxError()
240-
}
241-
c = r.readByteNoBuf()
242-
if c == '`' {
243-
c = r.readByteNoBuf()
244-
goto Reswitch
245-
}
246-
}
247-
248-
case '\'':
249-
startLine = false
250-
for r.err == nil {
251-
if r.eof {
252-
r.syntaxError()
253-
}
254-
c = r.readByteNoBuf()
255-
if c == '\\' {
256-
r.readByteNoBuf()
257-
if r.err != nil {
258-
r.syntaxError()
259-
return false
260-
}
261-
continue
262-
}
263-
if c == '\'' {
264-
c = r.readByteNoBuf()
265-
goto Reswitch
266-
}
267-
}
268-
269-
case '/':
270-
c = r.readByteNoBuf()
271-
switch c {
272-
default:
273-
startLine = false
274-
goto Reswitch
275-
276-
case '*':
277-
var c1 byte
278-
for (c != '*' || c1 != '/') && r.err == nil {
279-
if r.eof {
280-
r.syntaxError()
281-
}
282-
c, c1 = c1, r.readByteNoBuf()
283-
}
284-
startLine = false
285-
286-
case '/':
287-
if startLine {
288-
// Try to read this as a //go:embed comment.
289-
for i := range goEmbed {
290-
c = r.readByteNoBuf()
291-
if c != goEmbed[i] {
292-
goto SkipSlashSlash
293-
}
294-
}
295-
c = r.readByteNoBuf()
296-
if c == ' ' || c == '\t' {
297-
// Found one!
298-
return true
299-
}
300-
}
301-
SkipSlashSlash:
302-
for c != '\n' && r.err == nil && !r.eof {
303-
c = r.readByteNoBuf()
304-
}
305-
startLine = true
306-
}
307-
}
308-
}
309-
return false
310-
}
311-
312175
// readKeyword reads the given keyword from the input.
313176
// If the keyword is not present, readKeyword records a syntax error.
314177
func (r *importReader) readKeyword(kw string) {
@@ -429,9 +292,7 @@ func readGoInfo(f io.Reader, info *fileInfo) error {
429292
// we are sure we don't change the errors that go/parser returns.
430293
if r.err == errSyntax {
431294
r.err = nil
432-
for r.err == nil && !r.eof {
433-
r.readByte()
434-
}
295+
r.readRest()
435296
info.header = r.buf
436297
}
437298
if r.err != nil {
@@ -504,23 +365,23 @@ func readGoInfo(f io.Reader, info *fileInfo) error {
504365
// (near the package statement or imports), the compiler
505366
// will reject them. They can be (and have already been) ignored.
506367
if hasEmbed {
507-
var line []byte
508-
for first := true; r.findEmbed(first); first = false {
509-
line = line[:0]
510-
pos := r.pos
511-
for {
512-
c := r.readByteNoBuf()
513-
if c == '\n' || r.err != nil || r.eof {
514-
break
515-
}
516-
line = append(line, c)
368+
r.readRest()
369+
fset := token.NewFileSet()
370+
file := fset.AddFile(r.pos.Filename, -1, len(r.buf))
371+
var sc scanner.Scanner
372+
sc.Init(file, r.buf, nil, scanner.ScanComments)
373+
for {
374+
pos, tok, lit := sc.Scan()
375+
if tok == token.EOF {
376+
break
517377
}
518-
// Add args if line is well-formed.
519-
// Ignore badly-formed lines - the compiler will report them when it finds them,
520-
// and we can pretend they are not there to help go list succeed with what it knows.
521-
embs, err := parseGoEmbed(string(line), pos)
522-
if err == nil {
523-
info.embeds = append(info.embeds, embs...)
378+
if tok == token.COMMENT && strings.HasPrefix(lit, "//go:embed") {
379+
// Ignore badly-formed lines - the compiler will report them when it finds them,
380+
// and we can pretend they are not there to help go list succeed with what it knows.
381+
embs, err := parseGoEmbed(fset, pos, lit)
382+
if err == nil {
383+
info.embeds = append(info.embeds, embs...)
384+
}
524385
}
525386
}
526387
}
@@ -542,75 +403,21 @@ func isValidImport(s string) bool {
542403
return s != ""
543404
}
544405

545-
// parseGoEmbed parses the text following "//go:embed" to extract the glob patterns.
406+
// parseGoEmbed parses a "//go:embed" to extract the glob patterns.
546407
// It accepts unquoted space-separated patterns as well as double-quoted and back-quoted Go strings.
547-
// This is based on a similar function in cmd/compile/internal/gc/noder.go;
548-
// this version calculates position information as well.
549-
func parseGoEmbed(args string, pos token.Position) ([]fileEmbed, error) {
550-
trimBytes := func(n int) {
551-
pos.Offset += n
552-
pos.Column += utf8.RuneCountInString(args[:n])
553-
args = args[n:]
408+
// This must match the behavior of cmd/compile/internal/noder.go.
409+
func parseGoEmbed(fset *token.FileSet, pos token.Pos, comment string) ([]fileEmbed, error) {
410+
dir, ok := ast.ParseDirective(pos, comment)
411+
if !ok || dir.Tool != "go" || dir.Name != "embed" {
412+
return nil, nil
554413
}
555-
trimSpace := func() {
556-
trim := strings.TrimLeftFunc(args, unicode.IsSpace)
557-
trimBytes(len(args) - len(trim))
414+
args, err := dir.ParseArgs()
415+
if err != nil {
416+
return nil, err
558417
}
559-
560418
var list []fileEmbed
561-
for trimSpace(); args != ""; trimSpace() {
562-
var path string
563-
pathPos := pos
564-
Switch:
565-
switch args[0] {
566-
default:
567-
i := len(args)
568-
for j, c := range args {
569-
if unicode.IsSpace(c) {
570-
i = j
571-
break
572-
}
573-
}
574-
path = args[:i]
575-
trimBytes(i)
576-
577-
case '`':
578-
var ok bool
579-
path, _, ok = strings.Cut(args[1:], "`")
580-
if !ok {
581-
return nil, fmt.Errorf("invalid quoted string in //go:embed: %s", args)
582-
}
583-
trimBytes(1 + len(path) + 1)
584-
585-
case '"':
586-
i := 1
587-
for ; i < len(args); i++ {
588-
if args[i] == '\\' {
589-
i++
590-
continue
591-
}
592-
if args[i] == '"' {
593-
q, err := strconv.Unquote(args[:i+1])
594-
if err != nil {
595-
return nil, fmt.Errorf("invalid quoted string in //go:embed: %s", args[:i+1])
596-
}
597-
path = q
598-
trimBytes(i + 1)
599-
break Switch
600-
}
601-
}
602-
if i >= len(args) {
603-
return nil, fmt.Errorf("invalid quoted string in //go:embed: %s", args)
604-
}
605-
}
606-
607-
if args != "" {
608-
r, _ := utf8.DecodeRuneInString(args)
609-
if !unicode.IsSpace(r) {
610-
return nil, fmt.Errorf("invalid quoted string in //go:embed: %s", args)
611-
}
612-
}
613-
list = append(list, fileEmbed{path, pathPos})
419+
for _, arg := range args {
420+
list = append(list, fileEmbed{arg.Arg, fset.Position(arg.Pos)})
614421
}
615422
return list, nil
616423
}

0 commit comments

Comments
 (0)