@@ -13,12 +13,15 @@ enum TokenType {
1313 BLOCK_INNER_DOC_MARKER ,
1414 BLOCK_COMMENT_CONTENT ,
1515 LINE_DOC_CONTENT ,
16- FRONTMATTER ,
16+ FRONTMATTER_START ,
17+ FRONTMATTER_CONTENT ,
18+ FRONTMATTER_END ,
1719 ERROR_SENTINEL
1820};
1921
2022typedef struct {
2123 uint8_t opening_hash_count ;
24+ uint8_t frontmatter_dashes ;
2225} Scanner ;
2326
2427void * tree_sitter_rust_external_scanner_create () { return ts_calloc (1 , sizeof (Scanner )); }
@@ -28,15 +31,17 @@ void tree_sitter_rust_external_scanner_destroy(void *payload) { ts_free((Scanner
2831unsigned tree_sitter_rust_external_scanner_serialize (void * payload , char * buffer ) {
2932 Scanner * scanner = (Scanner * )payload ;
3033 buffer [0 ] = (char )scanner -> opening_hash_count ;
31- return 1 ;
34+ buffer [1 ] = (char )scanner -> frontmatter_dashes ;
35+ return 2 ;
3236}
3337
3438void tree_sitter_rust_external_scanner_deserialize (void * payload , const char * buffer , unsigned length ) {
3539 Scanner * scanner = (Scanner * )payload ;
3640 scanner -> opening_hash_count = 0 ;
37- if ( length == 1 ) {
38- Scanner * scanner = ( Scanner * ) payload ;
41+ scanner -> frontmatter_dashes = 0 ;
42+ if ( length == 2 ) {
3943 scanner -> opening_hash_count = buffer [0 ];
44+ scanner -> frontmatter_dashes = buffer [1 ];
4045 }
4146}
4247
@@ -332,15 +337,42 @@ static inline bool process_block_comment(TSLexer *lexer, const bool *valid_symbo
332337 return false;
333338}
334339
335- static inline bool process_frontmatter (TSLexer * lexer ) {
336- uint8_t opening = 0 ;
340+ static inline bool process_frontmatter_start (TSLexer * lexer , Scanner * scanner ) {
341+ uint8_t amount = 0 ;
337342 while (lexer -> lookahead == '-' ) {
338- opening ++ ;
343+ amount ++ ;
339344 advance (lexer );
340345 }
341346
342- if (opening < 3 ) {
347+ if (amount < 3 ) {
343348 return false;
349+ } else {
350+ scanner -> frontmatter_dashes = amount ;
351+ lexer -> result_symbol = FRONTMATTER_START ;
352+
353+ // parse optional info string after the initial fence
354+ while (lexer -> lookahead != '\n' && !lexer -> eof (lexer )) {
355+ advance (lexer );
356+ }
357+ advance (lexer );
358+
359+ return true;
360+ }
361+ }
362+
363+ static inline bool process_frontmatter (TSLexer * lexer , Scanner * scanner ) {
364+ // seperately parse empty frontmatter, as tree-sitter strips all whitespace,
365+ // including newlines, so i can't rely on parsing only after a newline in this case.
366+ lexer -> mark_end (lexer );
367+ uint8_t amount = 0 ;
368+ while (lexer -> lookahead == '-' && amount < scanner -> frontmatter_dashes ) {
369+ amount ++ ;
370+ advance (lexer );
371+ }
372+
373+ if (amount == scanner -> frontmatter_dashes ) {
374+ lexer -> result_symbol = FRONTMATTER_CONTENT ;
375+ return true;
344376 }
345377
346378 for (;;) {
@@ -349,16 +381,17 @@ static inline bool process_frontmatter(TSLexer *lexer) {
349381 }
350382
351383 if (lexer -> lookahead == '\n' ) {
384+ lexer -> mark_end (lexer );
352385 advance (lexer );
353386
354387 uint8_t amount = 0 ;
355- while (lexer -> lookahead == '-' && amount < opening ) {
388+ while (lexer -> lookahead == '-' && amount < scanner -> frontmatter_dashes ) {
356389 amount ++ ;
357390 advance (lexer );
358391 }
359392
360- if (amount == opening ) {
361- lexer -> result_symbol = FRONTMATTER ;
393+ if (amount == scanner -> frontmatter_dashes ) {
394+ lexer -> result_symbol = FRONTMATTER_CONTENT ;
362395 return true;
363396 }
364397 } else {
@@ -367,6 +400,16 @@ static inline bool process_frontmatter(TSLexer *lexer) {
367400 }
368401}
369402
403+ static inline bool process_frontmatter_end (TSLexer * lexer , Scanner * scanner ) {
404+ advance (lexer );
405+ for (unsigned int amount = 0 ; amount < scanner -> frontmatter_dashes ; amount ++ ) {
406+ advance (lexer );
407+ }
408+
409+ lexer -> result_symbol = FRONTMATTER_END ;
410+ return true;
411+ }
412+
370413bool tree_sitter_rust_external_scanner_scan (void * payload , TSLexer * lexer , const bool * valid_symbols ) {
371414 // The documentation states that if the lexical analysis fails for some reason
372415 // they will mark every state as valid and pass it to the external scanner
@@ -425,8 +468,16 @@ bool tree_sitter_rust_external_scanner_scan(void *payload, TSLexer *lexer, const
425468 return process_float_literal (lexer );
426469 }
427470
428- if (valid_symbols [FRONTMATTER ]) {
429- return process_frontmatter (lexer );
471+ if (valid_symbols [FRONTMATTER_START ]) {
472+ return process_frontmatter_start (lexer , scanner );
473+ }
474+
475+ if (valid_symbols [FRONTMATTER_CONTENT ]) {
476+ return process_frontmatter (lexer , scanner );
477+ }
478+
479+ if (valid_symbols [FRONTMATTER_END ]) {
480+ return process_frontmatter_end (lexer , scanner );
430481 }
431482
432483 return false;
0 commit comments