@@ -720,11 +720,13 @@ final class Tokenizer
720720
721721 // Regular expressions for tokenizing
722722
723- private readonly string $ regexBoundaries ;
724- private readonly string $ regexReserved ;
725- private readonly string $ regexReservedNewline ;
726- private readonly string $ regexReservedToplevel ;
727- private readonly string $ regexFunction ;
723+ private readonly string $ nextTokenRegexNumber ;
724+ private readonly string $ nextTokenRegexBoundaryCharacter ;
725+ private readonly string $ nextTokenRegexReservedToplevel ;
726+ private readonly string $ nextTokenRegexReservedNewline ;
727+ private readonly string $ nextTokenRegexReserved ;
728+ private readonly string $ nextTokenRegexFunction ;
729+ private readonly string $ nextTokenRegexNonReserved ;
728730
729731 /**
730732 * Punctuation that can be used as a boundary between other tokens
@@ -770,24 +772,31 @@ public function __construct()
770772 };
771773
772774 // Set up regular expressions
773- $ this -> regexBoundaries = '(?> ' . implode (
775+ $ regexBoundaries = '(?> ' . implode (
774776 '| ' ,
775777 $ this ->quoteRegex ($ this ->boundaries ),
776778 ) . ') ' ;
777- $ this -> regexReserved = '(?> ' . implode (
779+ $ regexReserved = '(?> ' . implode (
778780 '| ' ,
779781 $ this ->quoteRegex ($ sortByLengthFx ($ this ->reserved )),
780782 ) . ') ' ;
781- $ this -> regexReservedToplevel = str_replace (' ' , '\s+ ' , ' (?> ' . implode (
783+ $ regexReservedToplevel = ' (?> ' . str_replace (' ' , '\s+ ' , implode (
782784 '| ' ,
783785 $ this ->quoteRegex ($ sortByLengthFx ($ this ->reservedToplevel )),
784- ) . ') ' ) ;
785- $ this -> regexReservedNewline = str_replace (' ' , '\s+ ' , ' (?> ' . implode (
786+ )) . ') ' ;
787+ $ regexReservedNewline = ' (?> ' . str_replace (' ' , '\s+ ' , implode (
786788 '| ' ,
787789 $ this ->quoteRegex ($ sortByLengthFx ($ this ->reservedNewline )),
788- ) . ') ' );
790+ )) . ') ' ;
791+ $ regexFunction = '(?> ' . implode ('| ' , $ this ->quoteRegex ($ sortByLengthFx ($ this ->functions ))) . ') ' ;
789792
790- $ this ->regexFunction = '(?> ' . implode ('| ' , $ this ->quoteRegex ($ sortByLengthFx ($ this ->functions ))) . ') ' ;
793+ $ this ->nextTokenRegexNumber = '/\G(\d+(\.\d+)?|0x[\da-fA-F]+|0b[01]+)($|\s|" \'`| ' . $ regexBoundaries . ')/ ' ;
794+ $ this ->nextTokenRegexBoundaryCharacter = '/\G( ' . $ regexBoundaries . ')/ ' ;
795+ $ this ->nextTokenRegexReservedToplevel = '/\G( ' . $ regexReservedToplevel . ')($|\s| ' . $ regexBoundaries . ')/ ' ;
796+ $ this ->nextTokenRegexReservedNewline = '/\G( ' . $ regexReservedNewline . ')($|\s| ' . $ regexBoundaries . ')/ ' ;
797+ $ this ->nextTokenRegexReserved = '/\G( ' . $ regexReserved . ')($|\s| ' . $ regexBoundaries . ')/ ' ;
798+ $ this ->nextTokenRegexFunction = '/\G( ' . $ regexFunction . '[(]|\s|[)])/ ' ;
799+ $ this ->nextTokenRegexNonReserved = '/\G(.*?)($|\s|[" \'`]| ' . $ regexBoundaries . ')/ ' ;
791800 }
792801
793802 /**
@@ -883,7 +892,7 @@ private function createNextToken(string $string, string $upper, int $offset, Tok
883892 $ value = $ firstChar . $ this ->getNextQuotedString ($ string , $ offset + 1 );
884893 } else {
885894 // Non-quoted variable name
886- preg_match ('/\G( ' . $ firstChar . ' [\w.$]+)/ ' , $ string , $ matches , 0 , $ offset );
895+ preg_match ('/\G([@:] [\w.$]+)/ ' , $ string , $ matches , 0 , $ offset );
887896 if ($ matches ) {
888897 $ value = $ matches [1 ];
889898 }
@@ -897,7 +906,7 @@ private function createNextToken(string $string, string $upper, int $offset, Tok
897906 // Number (decimal, binary, or hex)
898907 if (
899908 preg_match (
900- ' /\G(\d+(\.\d+)?|0x[\da-fA-F]+|0b[01]+)($|\s|" \' `| ' . $ this ->regexBoundaries . ' )/ ' ,
909+ $ this ->nextTokenRegexNumber ,
901910 $ string ,
902911 $ matches ,
903912 0 ,
@@ -908,7 +917,7 @@ private function createNextToken(string $string, string $upper, int $offset, Tok
908917 }
909918
910919 // Boundary Character (punctuation and symbols)
911- if (preg_match (' /\G( ' . $ this ->regexBoundaries . ' )/ ' , $ string , $ matches , 0 , $ offset )) {
920+ if (preg_match ($ this ->nextTokenRegexBoundaryCharacter , $ string , $ matches , 0 , $ offset )) {
912921 return new Token (Token::TOKEN_TYPE_BOUNDARY , $ matches [1 ]);
913922 }
914923
@@ -918,7 +927,7 @@ private function createNextToken(string $string, string $upper, int $offset, Tok
918927 // Top Level Reserved Word
919928 if (
920929 preg_match (
921- ' /\G( ' . $ this ->regexReservedToplevel . ' )($|\s| ' . $ this -> regexBoundaries . ' )/ ' ,
930+ $ this ->nextTokenRegexReservedToplevel ,
922931 $ upper ,
923932 $ matches ,
924933 0 ,
@@ -934,7 +943,7 @@ private function createNextToken(string $string, string $upper, int $offset, Tok
934943 // Newline Reserved Word
935944 if (
936945 preg_match (
937- ' /\G( ' . $ this ->regexReservedNewline . ' )($|\s| ' . $ this -> regexBoundaries . ' )/ ' ,
946+ $ this ->nextTokenRegexReservedNewline ,
938947 $ upper ,
939948 $ matches ,
940949 0 ,
@@ -950,7 +959,7 @@ private function createNextToken(string $string, string $upper, int $offset, Tok
950959 // Other Reserved Word
951960 if (
952961 preg_match (
953- ' /\G( ' . $ this ->regexReserved . ' )($|\s| ' . $ this -> regexBoundaries . ' )/ ' ,
962+ $ this ->nextTokenRegexReserved ,
954963 $ upper ,
955964 $ matches ,
956965 0 ,
@@ -965,17 +974,16 @@ private function createNextToken(string $string, string $upper, int $offset, Tok
965974 }
966975
967976 // A function must be succeeded by '('
968- // this makes it so "count(" is considered a function, but "count" alone is not
969- // function
970- if (preg_match ('/\G( ' . $ this ->regexFunction . '[(]|\s|[)])/ ' , $ upper , $ matches , 0 , $ offset )) {
977+ // this makes it so "count(" is considered a function, but "count" alone is not function
978+ if (preg_match ($ this ->nextTokenRegexFunction , $ upper , $ matches , 0 , $ offset )) {
971979 return new Token (
972980 Token::TOKEN_TYPE_RESERVED ,
973981 substr ($ string , $ offset , strlen ($ matches [1 ]) - 1 ),
974982 );
975983 }
976984
977985 // Non reserved word
978- preg_match (' /\G(.*?)($|\s|[" \' `]| ' . $ this ->regexBoundaries . ' )/ ' , $ string , $ matches , 0 , $ offset );
986+ preg_match ($ this ->nextTokenRegexNonReserved , $ string , $ matches , 0 , $ offset );
979987
980988 return new Token (Token::TOKEN_TYPE_WORD , $ matches [1 ]);
981989 }
0 commit comments