88 */
99namespace nicoSWD\Rules;
1010
11- /**
12- * Class Tokenizer
13- * @package nicoSWD\Rules
14- */
11+ use SplPriorityQueue;
12+ use stdClass;
13+
1514final class Tokenizer implements TokenizerInterface
1615{
17- /**
18- * @var string
19- */
20- private $tokens = '
21- ~(
22- (?<And>&&)
23- | (?<Or>\|\|)
24- | (?<NotEqualStrict>!==)
25- | (?<NotEqual><>|!=)
26- | (?<EqualStrict>===)
27- | (?<Equal>==)
28- | (?<In>\bin\b)
29- | (?<Bool>\b(?:true|false)\b)
30- | (?<Null>\bnull\b)
31- | (?<Method>\.\s*[a-zA-Z_]\w*\s*\()
32- | (?<Function>[a-zA-Z_]\w*\s*\()
33- | (?<Variable>[a-zA-Z_]\w*)
34- | (?<Float>-?\d+(?:\.\d+))
35- | (?<Integer>-?\d+)
36- | (?<EncapsedString>"[^"]*"|\'[^\']*\')
37- | (?<SmallerEqual><=)
38- | (?<GreaterEqual>>=)
39- | (?<Smaller><)
40- | (?<Greater>>)
41- | (?<OpeningParentheses>\()
42- | (?<ClosingParentheses>\))
43- | (?<OpeningArray>\[)
44- | (?<ClosingArray>\])
45- | (?<Comma>,)
46- | (?<Regex>/[^/\*].*/[igm]{0,3})
47- | (?<Comment>//[^\r\n]*|/\*.*?\*/)
48- | (?<Newline>\r?\n)
49- | (?<Space>\s+)
50- | (?<Unknown>.)
51- )~xAs';
16+ const TOKEN_AND = 'And';
17+ const TOKEN_OR = 'Or';
18+ const TOKEN_NOT_EQUAL_STRICT = 'NotEqualStrict';
19+ const TOKEN_NOT_EQUAL = 'NotEqual';
20+ const TOKEN_EQUAL_STRICT = 'EqualStrict';
21+ const TOKEN_EQUAL = 'Equal';
22+ const TOKEN_IN = 'In';
23+ const TOKEN_BOOL = 'Bool';
24+ const TOKEN_NULL = 'Null';
25+ const TOKEN_METHOD = 'Method';
26+ const TOKEN_FUNCTION = 'Function';
27+ const TOKEN_VARIABLE = 'Variable';
28+ const TOKEN_FLOAT = 'Float';
29+ const TOKEN_INTEGER = 'Integer';
30+ const TOKEN_ENCAPSED_STRING = 'EncapsedString';
31+ const TOKEN_SMALLER_EQUAL = 'SmallerEqual';
32+ const TOKEN_GREATER_EQUAL = 'GreaterEqual';
33+ const TOKEN_SMALLER = 'Smaller';
34+ const TOKEN_GREATER = 'Greater';
35+ const TOKEN_OPENING_PARENTHESIS = 'OpeningParentheses';
36+ const TOKEN_CLOSING_PARENTHESIS = 'ClosingParentheses';
37+ const TOKEN_OPENING_ARRAY = 'OpeningArray';
38+ const TOKEN_CLOSING_ARRAY = 'ClosingArray';
39+ const TOKEN_COMMA = 'Comma';
40+ const TOKEN_REGEX = 'Regex';
41+ const TOKEN_COMMENT = 'Comment';
42+ const TOKEN_NEWLINE = 'Newline';
43+ const TOKEN_SPACE = 'Space';
44+ const TOKEN_UNKNOWN = 'Unknown';
45+
46+ private $internalTokens = [];
47+
48+ private $regex = '';
49+
50+ private $regexRequiresReassambly = false;
51+
52+ public function __construct()
53+ {
54+ $this->registerToken(self::TOKEN_AND, '&&', 145);
55+ $this->registerToken(self::TOKEN_OR, '\|\|', 140);
56+ $this->registerToken(self::TOKEN_NOT_EQUAL_STRICT, '!==', 135);
57+ $this->registerToken(self::TOKEN_NOT_EQUAL, '<>|!=', 130);
58+ $this->registerToken(self::TOKEN_EQUAL_STRICT, '===', 125);
59+ $this->registerToken(self::TOKEN_EQUAL, '==', 120);
60+ $this->registerToken(self::TOKEN_IN, '\bin\b', 115);
61+ $this->registerToken(self::TOKEN_BOOL, '\b(?:true|false)\b', 110);
62+ $this->registerToken(self::TOKEN_NULL, '\bnull\b', 105);
63+ $this->registerToken(self::TOKEN_METHOD, '\.\s*[a-zA-Z_]\w*\s*\(', 100);
64+ $this->registerToken(self::TOKEN_FUNCTION, '[a-zA-Z_]\w*\s*\(', 95);
65+ $this->registerToken(self::TOKEN_FLOAT, '-?\d+(?:\.\d+)', 90);
66+ $this->registerToken(self::TOKEN_INTEGER, '-?\d+', 85);
67+ $this->registerToken(self::TOKEN_ENCAPSED_STRING, '"[^"]*"|\'[^\']*\'', 80);
68+ $this->registerToken(self::TOKEN_SMALLER_EQUAL, '<=', 75);
69+ $this->registerToken(self::TOKEN_GREATER_EQUAL, '>=', 70);
70+ $this->registerToken(self::TOKEN_SMALLER, '<', 65);
71+ $this->registerToken(self::TOKEN_GREATER, '>', 60);
72+ $this->registerToken(self::TOKEN_OPENING_PARENTHESIS, '\(', 55);
73+ $this->registerToken(self::TOKEN_CLOSING_PARENTHESIS, '\)', 50);
74+ $this->registerToken(self::TOKEN_OPENING_ARRAY, '\[', 45);
75+ $this->registerToken(self::TOKEN_CLOSING_ARRAY, '\]', 40);
76+ $this->registerToken(self::TOKEN_COMMA, ',', 35);
77+ $this->registerToken(self::TOKEN_REGEX, '/[^/\*].*/[igm]{0,3}', 30);
78+ $this->registerToken(self::TOKEN_COMMENT, '//[^\r\n]*|/\*.*?\*/', 25);
79+ $this->registerToken(self::TOKEN_NEWLINE, '\r?\n', 20);
80+ $this->registerToken(self::TOKEN_SPACE, '\s+', 15);
81+ $this->registerToken(self::TOKEN_VARIABLE, '[a-zA-Z_]\w*', 10);
82+ $this->registerToken(self::TOKEN_UNKNOWN, '.', 5);
83+ }
5284
5385 /**
54- * @param string $string
55- * @return Stack
86+ * {@inheritdoc}
5687 */
5788 public function tokenize($string)
5889 {
5990 $stack = new Stack();
91+ $regex = $this->getRegex();
6092 $baseNameSpace = __NAMESPACE__ . '\\Tokens\\Token';
6193 $offset = 0;
6294
63- while (preg_match($this->tokens , $string, $matches, 0, $offset)) {
95+ while (preg_match($regex , $string, $matches, 0, $offset)) {
6496 $token = $this->getMatchedToken($matches);
6597 $className = $baseNameSpace . $token;
6698
@@ -77,8 +109,22 @@ public function tokenize($string)
77109 }
78110
79111 /**
80- * @param string[] $matches
81- * @return string
112+ * {@inheritdoc}
113+ */
114+ public function registerToken($class, $regex, $priority = null)
115+ {
116+ $token = new StdClass();
117+ $token->class = $class;
118+ $token->regex = $regex;
119+ $token->priority = $priority !== null ? $priority : $this->getPriority($class);
120+
121+ $this->internalTokens[$class] = $token;
122+ $this->regexRequiresReassambly = true;
123+ }
124+
125+ /**
126+ * @param array $matches
127+ * @return int|string
82128 */
83129 private function getMatchedToken(array $matches)
84130 {
@@ -90,4 +136,48 @@ private function getMatchedToken(array $matches)
90136
91137 return 'Unknown';
92138 }
93- }
139+
140+ /**
141+ * @return string
142+ */
143+ private function getRegex()
144+ {
145+ if (!$this->regex || $this->regexRequiresReassambly) {
146+ $regex = [];
147+
148+ foreach ($this->getQueue() as $token) {
149+ $regex[] = "(?<$token->class>$token->regex)";
150+ }
151+
152+ $this->regex = sprintf('~(%s)~As', implode('|', $regex));
153+ $this->regexRequiresReassambly = false;
154+ }
155+
156+ return $this->regex;
157+ }
158+
159+ /**
160+ * @return SplPriorityQueue
161+ */
162+ private function getQueue()
163+ {
164+ $queue = new SplPriorityQueue();
165+
166+ foreach ($this->internalTokens as $class) {
167+ $queue->insert($class, $class->priority);
168+ }
169+
170+ return $queue;
171+ }
172+
173+ /**
174+ * @param string $class
175+ * @return int
176+ */
177+ private function getPriority($class)
178+ {
179+ return isset($this->internalTokens[$class])
180+ ? $this->internalTokens[$class]->priority
181+ : 10;
182+ }
183+ }
0 commit comments