1+ #include "tree_sitter/alloc.h"
12#include "tree_sitter/parser.h"
3+
24#include <wctype.h>
35
46enum TokenType {
57 STRING_CONTENT ,
6- RAW_STRING_LITERAL ,
8+ RAW_STRING_LITERAL_START ,
9+ RAW_STRING_LITERAL_CONTENT ,
10+ RAW_STRING_LITERAL_END ,
711 FLOAT_LITERAL ,
812 BLOCK_OUTER_DOC_MARKER ,
913 BLOCK_INNER_DOC_MARKER ,
@@ -12,15 +16,28 @@ enum TokenType {
1216 ERROR_SENTINEL
1317};
1418
15- void * tree_sitter_rust_external_scanner_create () { return NULL ; }
19+ typedef struct {
20+ uint8_t opening_hash_count ;
21+ } Scanner ;
1622
17- void tree_sitter_rust_external_scanner_destroy ( void * p ) {}
23+ void * tree_sitter_rust_external_scanner_create ( ) { return ts_calloc ( 1 , sizeof ( Scanner )); }
1824
19- void tree_sitter_rust_external_scanner_reset (void * p ) {}
25+ void tree_sitter_rust_external_scanner_destroy (void * payload ) { ts_free (( Scanner * ) payload ); }
2026
21- unsigned tree_sitter_rust_external_scanner_serialize (void * p , char * buffer ) { return 0 ; }
27+ unsigned tree_sitter_rust_external_scanner_serialize (void * payload , char * buffer ) {
28+ Scanner * scanner = (Scanner * )payload ;
29+ buffer [0 ] = (char )scanner -> opening_hash_count ;
30+ return 1 ;
31+ }
2232
23- void tree_sitter_rust_external_scanner_deserialize (void * p , const char * b , unsigned n ) {}
33+ void tree_sitter_rust_external_scanner_deserialize (void * payload , const char * buffer , unsigned length ) {
34+ Scanner * scanner = (Scanner * )payload ;
35+ scanner -> opening_hash_count = 0 ;
36+ if (length == 1 ) {
37+ Scanner * scanner = (Scanner * )payload ;
38+ scanner -> opening_hash_count = buffer [0 ];
39+ }
40+ }
2441
2542static inline bool is_num_char (int32_t c ) { return c == '_' || iswdigit (c ); }
2643
@@ -45,8 +62,7 @@ static inline bool process_string(TSLexer *lexer) {
4562 return has_content ;
4663}
4764
48- static inline bool process_raw_string (TSLexer * lexer ) {
49- lexer -> result_symbol = RAW_STRING_LITERAL ;
65+ static inline bool scan_raw_string_start (Scanner * scanner , TSLexer * lexer ) {
5066 if (lexer -> lookahead == 'b' || lexer -> lookahead == 'c' ) {
5167 advance (lexer );
5268 }
@@ -55,7 +71,7 @@ static inline bool process_raw_string(TSLexer *lexer) {
5571 }
5672 advance (lexer );
5773
58- unsigned opening_hash_count = 0 ;
74+ uint8_t opening_hash_count = 0 ;
5975 while (lexer -> lookahead == '#' ) {
6076 advance (lexer );
6177 opening_hash_count ++ ;
@@ -65,20 +81,27 @@ static inline bool process_raw_string(TSLexer *lexer) {
6581 return false;
6682 }
6783 advance (lexer );
84+ scanner -> opening_hash_count = opening_hash_count ;
6885
86+ lexer -> result_symbol = RAW_STRING_LITERAL_START ;
87+ return true;
88+ }
89+
90+ static inline bool scan_raw_string_content (Scanner * scanner , TSLexer * lexer ) {
6991 for (;;) {
7092 if (lexer -> eof (lexer )) {
7193 return false;
7294 }
7395 if (lexer -> lookahead == '"' ) {
96+ lexer -> mark_end (lexer );
7497 advance (lexer );
7598 unsigned hash_count = 0 ;
76- while (lexer -> lookahead == '#' && hash_count < opening_hash_count ) {
99+ while (lexer -> lookahead == '#' && hash_count < scanner -> opening_hash_count ) {
77100 advance (lexer );
78101 hash_count ++ ;
79102 }
80- if (hash_count == opening_hash_count ) {
81- lexer -> mark_end ( lexer ) ;
103+ if (hash_count == scanner -> opening_hash_count ) {
104+ lexer -> result_symbol = RAW_STRING_LITERAL_CONTENT ;
82105 return true;
83106 }
84107 } else {
@@ -87,6 +110,15 @@ static inline bool process_raw_string(TSLexer *lexer) {
87110 }
88111}
89112
113+ static inline bool scan_raw_string_end (Scanner * scanner , TSLexer * lexer ) {
114+ advance (lexer );
115+ for (unsigned i = 0 ; i < scanner -> opening_hash_count ; i ++ ) {
116+ advance (lexer );
117+ }
118+ lexer -> result_symbol = RAW_STRING_LITERAL_END ;
119+ return true;
120+ }
121+
90122static inline bool process_float_literal (TSLexer * lexer ) {
91123 lexer -> result_symbol = FLOAT_LITERAL ;
92124
@@ -321,7 +353,10 @@ bool tree_sitter_rust_external_scanner_scan(void *payload, TSLexer *lexer, const
321353 return false;
322354 }
323355
324- if (valid_symbols [BLOCK_COMMENT_CONTENT ] || valid_symbols [BLOCK_INNER_DOC_MARKER ] || valid_symbols [BLOCK_OUTER_DOC_MARKER ]) {
356+ Scanner * scanner = (Scanner * )payload ;
357+
358+ if (valid_symbols [BLOCK_COMMENT_CONTENT ] || valid_symbols [BLOCK_INNER_DOC_MARKER ] ||
359+ valid_symbols [BLOCK_OUTER_DOC_MARKER ]) {
325360 return process_block_comment (lexer , valid_symbols );
326361 }
327362
@@ -337,9 +372,17 @@ bool tree_sitter_rust_external_scanner_scan(void *payload, TSLexer *lexer, const
337372 skip (lexer );
338373 }
339374
340- if (valid_symbols [RAW_STRING_LITERAL ] &&
375+ if (valid_symbols [RAW_STRING_LITERAL_START ] &&
341376 (lexer -> lookahead == 'r' || lexer -> lookahead == 'b' || lexer -> lookahead == 'c' )) {
342- return process_raw_string (lexer );
377+ return scan_raw_string_start (scanner , lexer );
378+ }
379+
380+ if (valid_symbols [RAW_STRING_LITERAL_CONTENT ]) {
381+ return scan_raw_string_content (scanner , lexer );
382+ }
383+
384+ if (valid_symbols [RAW_STRING_LITERAL_END ] && lexer -> lookahead == '"' ) {
385+ return scan_raw_string_end (scanner , lexer );
343386 }
344387
345388 if (valid_symbols [FLOAT_LITERAL ] && iswdigit (lexer -> lookahead )) {
0 commit comments