@@ -372,26 +372,87 @@ module BindingStringParser<BindingStringReaderSig BindingStringReader> {
372372 this = MkDoubleQuoteToken ( _, _, _, result )
373373 }
374374
375- Token getNext ( ) {
376- result .getBegin ( ) = this .getEnd ( ) + 1 and
377- result .getReader ( ) = this .getReader ( )
375+ /**
376+ * Gets the next contiguous token after the end of this token.
377+ */
378+ Token getNextAfterEnd ( ) {
379+ // Find the last contained token, then get the next token after that
380+ result = getNextAfterContained ( ) and
381+ // Ensure contiguous tokens
382+ consecutiveToken ( this , result )
378383 }
379384
380- predicate isFirst ( ) {
381- not exists ( Token other |
382- other .getBegin ( ) < this .getBegin ( ) and other .getReader ( ) = this .getReader ( )
385+ bindingset [ t1, t2]
386+ pragma [ inline_late]
387+ private predicate consecutiveToken ( Token t1 , Token t2 ) { t1 .getEnd ( ) + 1 = t2 .getBegin ( ) }
388+
389+ /**
390+ * Gets the next token that occurs after the start of this token.
391+ */
392+ private Token getNext ( ) {
393+ exists ( int pos |
394+ tokenOrdering ( this .getReader ( ) , this , pos ) and
395+ tokenOrdering ( this .getReader ( ) , result , pos + uniqueTokensAtPosition ( this .getReader ( ) , pos ) )
383396 )
384397 }
385398
399+ /**
400+ * Get the last token contained by this token.
401+ */
402+ pragma [ noinline]
403+ private Token getNextAfterContained ( ) {
404+ exists ( Token lastToken |
405+ contains ( lastToken ) and
406+ result = lastToken .getNext ( ) and
407+ not contains ( result )
408+ )
409+ }
410+
411+ predicate isFirst ( ) { tokenOrdering ( this .getReader ( ) , this , 1 ) }
412+
386413 predicate contains ( Token t ) {
387- this .getReader ( ) = t .getReader ( ) and
388- this .getBegin ( ) < t .getBegin ( ) and
389- this .getEnd ( ) > t .getEnd ( )
414+ // base case, every token contains itself
415+ t = this
416+ or
417+ // Recursive case, find an existing token contained by this token, and determine whether the next token is
418+ // contained
419+ exists ( Token prev |
420+ this .contains ( prev ) and
421+ prev .getNext ( ) = t and
422+ // In the case of overlapping ranges, report tokens that begin inside this token as "contained"
423+ this .getEnd ( ) >= t .getBegin ( )
424+ )
425+ }
426+
427+ /**
428+ * The token `t` is completely contained within this outer token.
429+ */
430+ predicate strictContains ( Token t ) {
431+ this .contains ( t ) and t .getBegin ( ) > this .getBegin ( ) and t .getEnd ( ) < this .getEnd ( )
390432 }
391433
392434 stdlib:: Location getLocation ( ) { result = getReader ( ) .getLocation ( ) }
393435 }
394436
437+ /**
438+ * Holds if `t` is ordered at `position` according to the location of the beginning of the token.
439+ *
440+ * Note: `position` is not contiguous as certain strings may be matched by multiple tokens. In this
441+ * case those tokens will all have the same `position`, and the subsequent token will have
442+ * `position + count(tokens_with_current_position)`.
443+ */
444+ private predicate tokenOrdering ( BindingStringReader reader , Token t , int position ) {
445+ t = rank [ position ] ( Token token | token .getReader ( ) = reader | token order by token .getBegin ( ) )
446+ }
447+
448+ /**
449+ * Identify how many tokens are at a given position in the ordering, i.e. have the same beginning and end.
450+ */
451+ private int uniqueTokensAtPosition ( BindingStringReader reader , int position ) {
452+ tokenOrdering ( reader , _, position ) and
453+ result = count ( Token t | tokenOrdering ( reader , t , position ) )
454+ }
455+
395456 private class WhiteSpaceToken extends Token , MkWhiteSpaceToken { }
396457
397458 private class CommaToken extends Token , MkCommaToken { }
@@ -431,10 +492,10 @@ module BindingStringParser<BindingStringReaderSig BindingStringReader> {
431492 private class DotToken extends Token , MkDot { }
432493
433494 private Token getNextSkippingWhitespace ( Token t ) {
434- result = t .getNext ( ) and
495+ result = t .getNextAfterEnd ( ) and
435496 not result instanceof WhiteSpaceToken
436497 or
437- exists ( WhiteSpaceToken ws | ws = t .getNext ( ) | result = getNextSkippingWhitespace ( ws ) )
498+ exists ( WhiteSpaceToken ws | ws = t .getNextAfterEnd ( ) | result = getNextSkippingWhitespace ( ws ) )
438499 }
439500
440501 private newtype TMemberList =
@@ -625,15 +686,15 @@ module BindingStringParser<BindingStringReaderSig BindingStringReader> {
625686 private newtype TValue =
626687 MkNumber ( float n , Token source ) {
627688 exists ( NumberToken t | t .getValue ( ) .toFloat ( ) = n and source = t |
628- not any ( StringToken str ) .contains ( t ) and
629- not any ( NameToken name ) .contains ( t )
689+ not any ( StringToken str ) .strictContains ( t ) and
690+ not any ( NameToken name ) .strictContains ( t )
630691 )
631692 } or
632693 MkString ( string s , Token source ) {
633694 exists ( StringToken t |
634695 t .( Token ) .getValue ( ) = s and
635696 t = source and
636- not any ( NameToken nameToken ) .contains ( t )
697+ not any ( NameToken nameToken ) .strictContains ( t )
637698 )
638699 } or
639700 MkObject ( MemberList members , Token source ) {
@@ -662,27 +723,27 @@ module BindingStringParser<BindingStringReaderSig BindingStringReader> {
662723 } or
663724 MkTrue ( Token source ) {
664725 exists ( TrueToken t |
665- not any ( StringToken str ) .contains ( t ) and
666- not any ( NameToken nameToken ) .contains ( t ) and
726+ not any ( StringToken str ) .strictContains ( t ) and
727+ not any ( NameToken nameToken ) .strictContains ( t ) and
667728 source = t
668729 )
669730 } or
670731 MkFalse ( Token source ) {
671732 exists ( FalseToken t |
672- not any ( StringToken str ) .contains ( t ) and
673- not any ( NameToken nameToken ) .contains ( t ) and
733+ not any ( StringToken str ) .strictContains ( t ) and
734+ not any ( NameToken nameToken ) .strictContains ( t ) and
674735 source = t
675736 )
676737 } or
677738 MkNull ( Token source ) {
678739 exists ( NullToken t |
679- not any ( StringToken str ) .contains ( t ) and
680- not any ( NameToken nameToken ) .contains ( t ) and
740+ not any ( StringToken str ) .strictContains ( t ) and
741+ not any ( NameToken nameToken ) .strictContains ( t ) and
681742 source = t
682743 )
683744 } or
684745 MkName ( Token source ) {
685- exists ( NameToken t | not any ( StringToken str ) .contains ( t ) and source = t )
746+ exists ( NameToken t | not any ( StringToken str ) .strictContains ( t ) and source = t )
686747 } or
687748 MkIdent ( Token source ) {
688749 exists ( IdentToken t | source = t and getNextSkippingWhitespace ( t ) instanceof ColonToken )
0 commit comments