Skip to content

Commit 19bb3ab

Browse files
committed
Minimize capturing groups in tokenizer regexes
-4% improved runtime
1 parent 7d3ff7b commit 19bb3ab

File tree

1 file changed

+16
-17
lines changed

1 file changed

+16
-17
lines changed

src/Tokenizer.php

Lines changed: 16 additions & 17 deletions
Original file line numberDiff line numberDiff line change
@@ -790,13 +790,13 @@ public function __construct()
790790
)) . ')';
791791
$regexFunction = '(?>' . implode('|', $this->quoteRegex($sortByLengthFx($this->functions))) . ')';
792792

793-
$this->nextTokenRegexNumber = '/\G(\d+(\.\d+)?|0x[\da-fA-F]+|0b[01]+)($|\s|"\'`|' . $regexBoundaries . ')/';
794-
$this->nextTokenRegexBoundaryCharacter = '/\G(' . $regexBoundaries . ')/';
795-
$this->nextTokenRegexReservedToplevel = '/\G(' . $regexReservedToplevel . ')($|\s|' . $regexBoundaries . ')/';
796-
$this->nextTokenRegexReservedNewline = '/\G(' . $regexReservedNewline . ')($|\s|' . $regexBoundaries . ')/';
797-
$this->nextTokenRegexReserved = '/\G(' . $regexReserved . ')($|\s|' . $regexBoundaries . ')/';
798-
$this->nextTokenRegexFunction = '/\G(' . $regexFunction . '[(]|\s|[)])/';
799-
$this->nextTokenRegexNonReserved = '/\G(.*?)($|\s|["\'`]|' . $regexBoundaries . ')/';
793+
$this->nextTokenRegexNumber = '/\G(?:\d+(?:\.\d+)?|0x[\da-fA-F]+|0b[01]+)(?=$|\s|"\'`|' . $regexBoundaries . ')/';
794+
$this->nextTokenRegexBoundaryCharacter = '/\G' . $regexBoundaries . '/';
795+
$this->nextTokenRegexReservedToplevel = '/\G' . $regexReservedToplevel . '(?=$|\s|' . $regexBoundaries . ')/';
796+
$this->nextTokenRegexReservedNewline = '/\G' . $regexReservedNewline . '(?=$|\s|' . $regexBoundaries . ')/';
797+
$this->nextTokenRegexReserved = '/\G' . $regexReserved . '(?=$|\s|' . $regexBoundaries . ')/';
798+
$this->nextTokenRegexFunction = '/\G' . $regexFunction . '(?=\s*\()/';
799+
$this->nextTokenRegexNonReserved = '/\G.*?(?=$|\s|["\'`]|' . $regexBoundaries . ')/';
800800
}
801801

802802
/**
@@ -838,7 +838,6 @@ public function tokenize(string $string): Cursor
838838
*/
839839
private function createNextToken(string $string, string $upper, int $offset, Token|null $previous = null): Token
840840
{
841-
$matches = [];
842841
// Whitespace
843842
if (preg_match('/\G\s+/', $string, $matches, 0, $offset)) {
844843
return new Token(Token::TOKEN_TYPE_WHITESPACE, $matches[0]);
@@ -892,9 +891,9 @@ private function createNextToken(string $string, string $upper, int $offset, Tok
892891
$value = $firstChar . $this->getNextQuotedString($string, $offset + 1);
893892
} else {
894893
// Non-quoted variable name
895-
preg_match('/\G([@:][\w.$]+)/', $string, $matches, 0, $offset);
894+
preg_match('/\G[@:][\w.$]+/', $string, $matches, 0, $offset);
896895
if ($matches) {
897-
$value = $matches[1];
896+
$value = $matches[0];
898897
}
899898
}
900899

@@ -913,12 +912,12 @@ private function createNextToken(string $string, string $upper, int $offset, Tok
913912
$offset,
914913
)
915914
) {
916-
return new Token(Token::TOKEN_TYPE_NUMBER, $matches[1]);
915+
return new Token(Token::TOKEN_TYPE_NUMBER, $matches[0]);
917916
}
918917

919918
// Boundary Character (punctuation and symbols)
920919
if (preg_match($this->nextTokenRegexBoundaryCharacter, $string, $matches, 0, $offset)) {
921-
return new Token(Token::TOKEN_TYPE_BOUNDARY, $matches[1]);
920+
return new Token(Token::TOKEN_TYPE_BOUNDARY, $matches[0]);
922921
}
923922

924923
// A reserved word cannot be preceded by a '.'
@@ -936,7 +935,7 @@ private function createNextToken(string $string, string $upper, int $offset, Tok
936935
) {
937936
return new Token(
938937
Token::TOKEN_TYPE_RESERVED_TOPLEVEL,
939-
substr($string, $offset, strlen($matches[1])),
938+
substr($string, $offset, strlen($matches[0])),
940939
);
941940
}
942941

@@ -952,7 +951,7 @@ private function createNextToken(string $string, string $upper, int $offset, Tok
952951
) {
953952
return new Token(
954953
Token::TOKEN_TYPE_RESERVED_NEWLINE,
955-
substr($string, $offset, strlen($matches[1])),
954+
substr($string, $offset, strlen($matches[0])),
956955
);
957956
}
958957

@@ -968,7 +967,7 @@ private function createNextToken(string $string, string $upper, int $offset, Tok
968967
) {
969968
return new Token(
970969
Token::TOKEN_TYPE_RESERVED,
971-
substr($string, $offset, strlen($matches[1])),
970+
substr($string, $offset, strlen($matches[0])),
972971
);
973972
}
974973
}
@@ -978,14 +977,14 @@ private function createNextToken(string $string, string $upper, int $offset, Tok
978977
if (preg_match($this->nextTokenRegexFunction, $upper, $matches, 0, $offset)) {
979978
return new Token(
980979
Token::TOKEN_TYPE_RESERVED,
981-
substr($string, $offset, strlen($matches[1]) - 1),
980+
substr($string, $offset, strlen($matches[0])),
982981
);
983982
}
984983

985984
// Non reserved word
986985
preg_match($this->nextTokenRegexNonReserved, $string, $matches, 0, $offset);
987986

988-
return new Token(Token::TOKEN_TYPE_WORD, $matches[1]);
987+
return new Token(Token::TOKEN_TYPE_WORD, $matches[0]);
989988
}
990989

991990
/**

0 commit comments

Comments
 (0)