Skip to content

Commit b4f5f2c

Browse files
authored
Merge pull request microsoft#317 from TysonAndre/token_get_all-override
Add a way to override the lexer in a Parser instance
2 parents e813b45 + 6f9d25b commit b4f5f2c

File tree

2 files changed

+33
-2
lines changed

2 files changed

+33
-2
lines changed

src/Parser.php

Lines changed: 16 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -138,6 +138,21 @@ public function __construct() {
138138
$this->returnTypeDeclarationTokens = \array_merge([TokenKind::VoidReservedWord, TokenKind::NullReservedWord, TokenKind::FalseReservedWord, TokenKind::StaticKeyword], $this->parameterTypeDeclarationTokens);
139139
}
140140

141+
/**
142+
* This method exists so that it can be overridden in subclasses.
143+
* Any subclass must return a token stream that is equivalent to the contents in $fileContents for this to work properly.
144+
*
145+
* Possible reasons for applications to override the lexer:
146+
*
147+
* - Imitate token stream of a newer/older PHP version (e.g. T_FN is only available in php 7.4)
148+
* - Reuse the result of token_get_all to create a Node again.
149+
* - Reuse the result of token_get_all in a different library.
150+
*/
151+
protected function makeLexer(string $fileContents): TokenStreamProviderInterface
152+
{
153+
return TokenStreamProviderFactory::GetTokenStreamProvider($fileContents);
154+
}
155+
141156
/**
142157
* Generates AST from source file contents. Returns an instance of SourceFileNode, which is always the top-most
143158
* Node-type of the tree.
@@ -146,7 +161,7 @@ public function __construct() {
146161
* @return SourceFileNode
147162
*/
148163
public function parseSourceFile(string $fileContents, string $uri = null) : SourceFileNode {
149-
$this->lexer = TokenStreamProviderFactory::GetTokenStreamProvider($fileContents);
164+
$this->lexer = $this->makeLexer($fileContents);
150165

151166
$this->reset();
152167

src/PhpTokenizer.php

Lines changed: 17 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -76,7 +76,7 @@ public static function getTokensArrayFromContent(
7676
$content = $prefix . $content;
7777
}
7878

79-
$tokens = @\token_get_all($content);
79+
$tokens = static::tokenGetAll($content, $parseContext);
8080

8181
$arr = array();
8282
$fullStart = $start = $pos = $initialPos;
@@ -149,6 +149,22 @@ public static function getTokensArrayFromContent(
149149
return $arr;
150150
}
151151

152+
/**
153+
* @param string $content the raw php code
154+
* @param ?int $parseContext can be SourceElements when extracting doc comments.
155+
* Having this available may be useful for subclasses to decide whether or not to post-process results, cache results, etc.
156+
* @return array[]|string[] an array of tokens. When concatenated, these tokens must equal $content.
157+
*
158+
* This exists so that it can be overridden in subclasses, e.g. to cache the result of tokenizing entire files.
159+
* Applications using tolerant-php-parser may often end up needing to use the token stream for other reasons that are hard to do in the resulting AST,
160+
* such as iterating over T_COMMENTS, checking for inline html,
161+
* looking up all tokens (including skipped tokens) on a given line, etc.
162+
*/
163+
protected static function tokenGetAll(string $content, $parseContext): array
164+
{
165+
return @\token_get_all($content);
166+
}
167+
152168
const TOKEN_MAP = [
153169
T_CLASS_C => TokenKind::Name,
154170
T_DIR => TokenKind::Name,

0 commit comments

Comments
 (0)