@@ -7,8 +7,9 @@ import core.Names.SimpleName
77import Scanners ._
88import util .SourceFile
99import JavaTokens ._
10- import scala .annotation .{ switch , tailrec }
10+ import scala .annotation .{switch , tailrec }
1111import util .Chars ._
12+ import PartialFunction .cond
1213
1314object JavaScanners {
1415
@@ -31,23 +32,29 @@ object JavaScanners {
3132 // Get next token ------------------------------------------------------------
3233
3334 def nextToken (): Unit =
34- if ( next.token == EMPTY ) {
35+ if next.token == EMPTY then
3536 lastOffset = lastCharOffset
3637 fetchToken()
37- }
38- else {
39- this copyFrom next
38+ else
39+ this .copyFrom(next)
4040 next.token = EMPTY
41- }
4241
43- def lookaheadToken : Int = {
44- prev copyFrom this
45- nextToken()
42+ def lookaheadToken : Int =
43+ lookAhead()
4644 val t = token
47- next copyFrom this
48- this copyFrom prev
45+ reset()
4946 t
50- }
47+
48+ def lookAhead () =
49+ prev.copyFrom(this )
50+ nextToken()
51+
52+ def reset () =
53+ next.copyFrom(this )
54+ this .copyFrom(prev)
55+
56+ class LookaheadScanner extends JavaScanner (source, startFrom = charOffset - 1 ):
57+ override protected def initialize (): Unit = nextChar()
5158
5259 /** read next token
5360 */
@@ -93,15 +100,23 @@ object JavaScanners {
93100
94101 case '\" ' =>
95102 nextChar()
96- while (ch != '\" ' && (isUnicodeEscape || ch != CR && ch != LF && ch != SU ))
97- getlitch()
98- if (ch == '\" ' ) {
99- token = STRINGLIT
100- setStrVal()
101- nextChar()
102- }
103+ if ch != '\" ' then // "..." non-empty string literal
104+ while ch != '\" ' && (isUnicodeEscape || ch != CR && ch != LF && ch != SU ) do
105+ getlitch()
106+ if ch == '\" ' then
107+ token = STRINGLIT
108+ setStrVal()
109+ nextChar()
110+ else
111+ error(" unclosed string literal" )
103112 else
104- error(" unclosed string literal" )
113+ nextChar()
114+ if ch != '\" ' then // "" empty string literal
115+ token = STRINGLIT
116+ setStrVal()
117+ else
118+ nextChar()
119+ getTextBlock()
105120
106121 case '\' ' =>
107122 nextChar()
@@ -399,46 +414,177 @@ object JavaScanners {
399414
400415 // Literals -----------------------------------------------------------------
401416
402- /** read next character in character or string literal:
417+ /** Read next character in character or string literal.
403418 */
404- protected def getlitch (): Unit =
405- if (ch == '\\ ' ) {
419+ protected def getlitch (): Unit = getlitch(scanOnly = false , inTextBlock = false )
420+
421+ /** Read next character in character or string literal.
422+ *
423+ * @param scanOnly skip emitting errors or adding to the literal buffer
424+ * @param inTextBlock is this for a text block?
425+ */
426+ def getlitch (scanOnly : Boolean , inTextBlock : Boolean ): Unit =
427+ def octal : Char =
428+ val leadch : Char = ch
429+ var oct : Int = digit2int(ch, 8 )
406430 nextChar()
407431 if ('0' <= ch && ch <= '7' ) {
408- val leadch : Char = ch
409- var oct : Int = digit2int(ch, 8 )
432+ oct = oct * 8 + digit2int(ch, 8 )
410433 nextChar()
411- if ('0' <= ch && ch <= '7' ) {
434+ if (leadch <= '3' && '0' <= ch && ch <= '7' ) {
412435 oct = oct * 8 + digit2int(ch, 8 )
413436 nextChar()
414- if (leadch <= '3' && '0' <= ch && ch <= '7' ) {
415- oct = oct * 8 + digit2int(ch, 8 )
416- nextChar()
437+ }
438+ }
439+ oct.asInstanceOf [Char ]
440+ end octal
441+ def greatEscape : Char =
442+ nextChar()
443+ if '0' <= ch && ch <= '7' then octal
444+ else
445+ val x = ch match
446+ case 'b' => '\b '
447+ case 's' => ' '
448+ case 't' => '\t '
449+ case 'n' => '\n '
450+ case 'f' => '\f '
451+ case 'r' => '\r '
452+ case '\" ' => '\" '
453+ case '\' ' => '\' '
454+ case '\\ ' => '\\ '
455+ case CR | LF if inTextBlock =>
456+ if ! scanOnly then nextChar()
457+ 0
458+ case _ =>
459+ if ! scanOnly then error(" invalid escape character" , charOffset - 1 )
460+ ch
461+ if x != 0 then nextChar()
462+ x
463+ end greatEscape
464+
465+ // begin getlitch
466+ val c : Char =
467+ if ch == '\\ ' then greatEscape
468+ else
469+ val res = ch
470+ nextChar()
471+ res
472+ if c != 0 && ! scanOnly then putChar(c)
473+ end getlitch
474+
475+ /** Read a triple-quote delimited text block, starting after the first three double quotes.
476+ */
477+ private def getTextBlock (): Unit = {
478+ // Open delimiter is followed by optional space, then a newline
479+ while (ch == ' ' || ch == '\t ' || ch == FF ) {
480+ nextChar()
481+ }
482+ if (ch != LF && ch != CR ) { // CR-LF is already normalized into LF by `JavaCharArrayReader`
483+ error(" illegal text block open delimiter sequence, missing line terminator" )
484+ return
485+ }
486+ nextChar()
487+
488+ /* Do a lookahead scan over the full text block to:
489+ * - compute common white space prefix
490+ * - find the offset where the text block ends
491+ */
492+ var commonWhiteSpacePrefix = Int .MaxValue
493+ var blockEndOffset = 0
494+ var blockClosed = false
495+ var lineWhiteSpacePrefix = 0
496+ var lineIsOnlyWhitespace = true
497+ val in = LookaheadScanner ()
498+ while (! blockClosed && (isUnicodeEscape || ch != SU )) {
499+ if (in.ch == '\" ' ) { // Potential end of the block
500+ in.nextChar()
501+ if (in.ch == '\" ' ) {
502+ in.nextChar()
503+ if (in.ch == '\" ' ) {
504+ blockClosed = true
505+ commonWhiteSpacePrefix = commonWhiteSpacePrefix min lineWhiteSpacePrefix
506+ blockEndOffset = in.charOffset - 2
417507 }
418508 }
419- putChar(oct.asInstanceOf [Char ])
509+
510+ // Not the end of the block - just a single or double " character
511+ if (! blockClosed) {
512+ lineIsOnlyWhitespace = false
513+ }
514+ } else if (in.ch == CR || in.ch == LF ) { // new line in the block
515+ in.nextChar()
516+ if (! lineIsOnlyWhitespace) {
517+ commonWhiteSpacePrefix = commonWhiteSpacePrefix min lineWhiteSpacePrefix
518+ }
519+ lineWhiteSpacePrefix = 0
520+ lineIsOnlyWhitespace = true
521+ } else if (lineIsOnlyWhitespace && Character .isWhitespace(in.ch)) { // extend white space prefix
522+ in.nextChar()
523+ lineWhiteSpacePrefix += 1
524+ } else {
525+ lineIsOnlyWhitespace = false
526+ in.getlitch(scanOnly = true , inTextBlock = true )
420527 }
421- else {
422- ch match {
423- case 'b' => putChar('\b ' )
424- case 't' => putChar('\t ' )
425- case 'n' => putChar('\n ' )
426- case 'f' => putChar('\f ' )
427- case 'r' => putChar('\r ' )
428- case '\" ' => putChar('\" ' )
429- case '\' ' => putChar('\' ' )
430- case '\\ ' => putChar('\\ ' )
431- case _ =>
432- error(" invalid escape character" , charOffset - 1 )
433- putChar(ch)
528+ }
529+
530+ // Bail out if the block never did have an end
531+ if (! blockClosed) {
532+ error(" unclosed text block" )
533+ return
534+ }
535+
536+ // Second pass: construct the literal string value this time
537+ while (charOffset < blockEndOffset) {
538+ // Drop the line's leading whitespace
539+ var remainingPrefix = commonWhiteSpacePrefix
540+ while (remainingPrefix > 0 && ch != CR && ch != LF && charOffset < blockEndOffset) {
541+ nextChar()
542+ remainingPrefix -= 1
543+ }
544+
545+ var trailingWhitespaceLength = 0
546+ var escapedNewline = false // Does the line end with `\`?
547+ while (ch != CR && ch != LF && charOffset < blockEndOffset && ! escapedNewline) {
548+ if (Character .isWhitespace(ch)) {
549+ trailingWhitespaceLength += 1
550+ } else {
551+ trailingWhitespaceLength = 0
434552 }
553+
554+ // Detect if the line is about to end with `\`
555+ if ch == '\\ ' && cond(lookaheadChar()) { case CR | LF => true } then
556+ escapedNewline = true
557+
558+ getlitch(scanOnly = false , inTextBlock = true )
559+ }
560+
561+ // Remove the last N characters from the buffer */
562+ def popNChars (n : Int ): Unit =
563+ if n > 0 then
564+ val text = litBuf.toString
565+ litBuf.clear()
566+ val trimmed = text.substring(0 , text.length - (n min text.length))
567+ trimmed.nn.foreach(litBuf.append)
568+
569+ // Drop the line's trailing whitespace
570+ popNChars(trailingWhitespaceLength)
571+
572+ // Normalize line terminators
573+ if ((ch == CR || ch == LF ) && ! escapedNewline) {
435574 nextChar()
575+ putChar('\n ' )
436576 }
437577 }
438- else {
439- putChar(ch)
440- nextChar()
441- }
578+
579+ token = STRINGLIT
580+ setStrVal()
581+
582+ // Trailing """
583+ nextChar()
584+ nextChar()
585+ nextChar()
586+ }
587+ end getTextBlock
442588
443589 /** read fractional part and exponent of floating point number
444590 * if one is present.
@@ -585,8 +731,10 @@ object JavaScanners {
585731 }
586732
587733 /* Initialization: read first char, then first token */
588- nextChar()
589- nextToken()
734+ protected def initialize (): Unit =
735+ nextChar()
736+ nextToken()
737+ initialize()
590738 }
591739
592740 private val (lastKeywordStart, kwArray) = buildKeywordArray(keywords)
0 commit comments