@@ -19,6 +19,16 @@ typedef struct {
1919 bool in_line_continuation ;
2020} Scanner ;
2121
22+ typedef enum {
23+ False ,
24+ True ,
25+ Error ,
26+ } BoolOrErr ;
27+
28+ static BoolOrErr bool_or_err_max (BoolOrErr lhs , BoolOrErr rhs ) {
29+ return lhs >= rhs ? lhs : rhs ;
30+ }
31+
2232// consume current character into current token and advance
2333static inline void advance (TSLexer * lexer ) { lexer -> advance (lexer , false); }
2434
@@ -54,61 +64,63 @@ static bool is_exp_sentinel(char chr) {
5464 }
5565}
5666
57- static bool scan_int (TSLexer * lexer ) {
67+ static BoolOrErr scan_int (TSLexer * lexer ) {
5868 if (!iswdigit (lexer -> lookahead )) {
59- return false ;
69+ return False ;
6070 }
6171 // consume digits
6272 while (iswdigit (lexer -> lookahead )) {
6373 advance (lexer ); // store all digits
6474 }
65-
66- // handle line continuations
75+ lexer -> mark_end (lexer );
76+ // Return an error if a line continuation is found. This scanner cannot
77+ // handle line continuations, particularly in cases like:
78+ //
79+ // ```fortran
80+ // b = 6& ! foo
81+ // &7;
82+ // ```
83+ //
84+ // Here, the scanner would need to return multiple tokens, but tree-sitter
85+ // expects only a single token.
6786 if (lexer -> lookahead == '&' ) {
68- skip (lexer );
69- while (iswspace (lexer -> lookahead )) {
70- skip (lexer );
71- }
72- // second '&' required to continue the literal
73- if (lexer -> lookahead == '&' ) {
74- skip (lexer );
75- // don't return here, as we may have finished literal on first
76- // line but still have second '&'
77- scan_int (lexer );
78- }
87+ return Error ;
7988 }
80-
81- lexer -> mark_end (lexer );
82- return true;
89+ return True ;
8390}
8491
8592/// Scan a number of the forms 1XXX, 1.0XXX, 0.1XXX, 1.XDX, etc.
86- static bool scan_number (TSLexer * lexer ) {
93+ static BoolOrErr scan_number (TSLexer * lexer ) {
8794 lexer -> result_symbol = INTEGER_LITERAL ;
88- bool digits = scan_int (lexer );
95+ BoolOrErr digits = scan_int (lexer );
8996 if (lexer -> lookahead == '.' ) {
9097 advance (lexer );
9198 // exclude decimal if followed by any letter other than d/D and e/E
9299 // if no leading digits are present and a non-digit follows
93100 // the decimal it's a nonmatch.
94- if (digits && !iswalnum (lexer -> lookahead )) {
101+ if (( digits == True ) && !iswalnum (lexer -> lookahead )) {
95102 lexer -> mark_end (lexer ); // add decimal to token
96103 }
97104 lexer -> result_symbol = FLOAT_LITERAL ;
98105 }
99106 // if next char isn't number return since we handle exp
100107 // notation and precision identifiers separately. If there are
101108 // no leading digit it's a nonmatch.
102- digits = scan_int (lexer ) || digits ;
103- if (digits ) {
109+ digits = bool_or_err_max ( scan_int (lexer ), digits ) ;
110+ if (digits == True ) {
104111 // process exp notation
105112 if (is_exp_sentinel (lexer -> lookahead )) {
106113 advance (lexer );
107114 if (lexer -> lookahead == '+' || lexer -> lookahead == '-' ) {
108115 advance (lexer );
109116 }
110- if (!scan_int (lexer )) {
111- return true; // valid number token with junk after it
117+ switch (scan_int (lexer )) {
118+ case False :
119+ return True ; // valid number token with junk after it
120+ case True :
121+ break ;
122+ case Error :
123+ return Error ;
112124 }
113125 lexer -> mark_end (lexer );
114126 lexer -> result_symbol = FLOAT_LITERAL ;
@@ -429,8 +441,13 @@ static bool scan(Scanner *scanner, TSLexer *lexer, const bool *valid_symbols) {
429441 if (valid_symbols [INTEGER_LITERAL ] || valid_symbols [FLOAT_LITERAL ] ||
430442 valid_symbols [BOZ_LITERAL ]) {
431443 // extract out root number from expression
432- if (scan_number (lexer )) {
433- return true;
444+ switch (scan_number (lexer )) {
445+ case False :
446+ break ;
447+ case True :
448+ return true;
449+ case Error :
450+ return false;
434451 }
435452 if (scan_boz (lexer )) {
436453 return true;
0 commit comments