Skip to content

Commit 1fd9de1

Browse files
authored
Merge pull request microsoft#168 from Microsoft/roblou/fixTemplateStringParse
Parse template strings correctly
2 parents 5549ff2 + b64f8c6 commit 1fd9de1

40 files changed

+1289
-56
lines changed

src/Node/Expression/Variable.php

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -24,7 +24,7 @@ class Variable extends Expression {
2424
public function getName() {
2525
if (
2626
$this->name instanceof Token &&
27-
$name = substr($this->name->getText($this->getFileContents()), 1)
27+
$name = ltrim($this->name->getText($this->getFileContents()), '$')
2828
) {
2929
return $name;
3030
}

src/Parser.php

Lines changed: 80 additions & 45 deletions
Original file line numberDiff line numberDiff line change
@@ -796,8 +796,6 @@ private function isExpressionStartFn() {
796796
return $this->checkToken(TokenKind::BackslashToken);
797797

798798
// literal
799-
case TokenKind::TemplateStringStart:
800-
801799
case TokenKind::DecimalLiteralToken: // TODO merge dec, oct, hex, bin, float -> NumericLiteral
802800
case TokenKind::OctalLiteralToken:
803801
case TokenKind::HexadecimalLiteralToken:
@@ -808,10 +806,7 @@ private function isExpressionStartFn() {
808806
case TokenKind::InvalidBinaryLiteral:
809807
case TokenKind::IntegerLiteralToken:
810808

811-
case TokenKind::StringLiteralToken: // TODO merge unterminated
812-
case TokenKind::UnterminatedStringLiteralToken:
813-
case TokenKind::NoSubstitutionTemplateLiteral:
814-
case TokenKind::UnterminatedNoSubstitutionTemplateLiteral:
809+
case TokenKind::StringLiteralToken:
815810

816811
case TokenKind::SingleQuoteToken:
817812
case TokenKind::DoubleQuoteToken:
@@ -854,29 +849,6 @@ private function isExpressionStartFn() {
854849
};
855850
}
856851

857-
private function parseTemplateString($parentNode) {
858-
$templateNode = new TemplateExpression();
859-
$templateNode->parent = $parentNode;
860-
$templateNode->children = array();
861-
do {
862-
$templateNode->children[] = $this->getCurrentToken();
863-
$this->advanceToken();
864-
$token = $this->getCurrentToken();
865-
866-
if ($token->kind === TokenKind::VariableName) {
867-
$templateNode->children[] = $token;
868-
// $this->advanceToken();
869-
// $token = $this->getCurrentToken();
870-
// TODO figure out how to expose this in TokenStreamProviderInterface
871-
$this->token = $this->lexer->reScanTemplateToken($token);
872-
$token = $this->getCurrentToken();
873-
}
874-
} while ($token->kind === TokenKind::TemplateStringMiddle);
875-
876-
$templateNode->children[] = $this->eat(TokenKind::TemplateStringEnd);
877-
return $templateNode;
878-
}
879-
880852
private function parsePrimaryExpression($parentNode) {
881853
$token = $this->getCurrentToken();
882854
switch ($token->kind) {
@@ -891,10 +863,6 @@ private function parsePrimaryExpression($parentNode) {
891863
case TokenKind::NamespaceKeyword:
892864
return $this->parseQualifiedName($parentNode);
893865

894-
// literal
895-
case TokenKind::TemplateStringStart:
896-
return $this->parseTemplateString($parentNode);
897-
898866
case TokenKind::DecimalLiteralToken: // TODO merge dec, oct, hex, bin, float -> NumericLiteral
899867
case TokenKind::OctalLiteralToken:
900868
case TokenKind::HexadecimalLiteralToken:
@@ -906,10 +874,7 @@ private function parsePrimaryExpression($parentNode) {
906874
case TokenKind::IntegerLiteralToken:
907875
return $this->parseNumericLiteralExpression($parentNode);
908876

909-
case TokenKind::StringLiteralToken: // TODO merge unterminated
910-
case TokenKind::UnterminatedStringLiteralToken:
911-
case TokenKind::NoSubstitutionTemplateLiteral:
912-
case TokenKind::UnterminatedNoSubstitutionTemplateLiteral:
877+
case TokenKind::StringLiteralToken:
913878
return $this->parseStringLiteralExpression($parentNode);
914879

915880
case TokenKind::DoubleQuoteToken:
@@ -1007,14 +972,21 @@ private function parseStringLiteralExpression2($parentNode) {
1007972
case TokenKind::DollarOpenBraceToken:
1008973
case TokenKind::OpenBraceDollarToken:
1009974
$expression->children[] = $this->eat(TokenKind::DollarOpenBraceToken, TokenKind::OpenBraceDollarToken);
1010-
$expression->children[] = $this->parseExpression($expression);
975+
if ($this->getCurrentToken()->kind === TokenKind::StringVarname) {
976+
$expression->children[] = $this->parseSimpleVariable($expression);
977+
} else {
978+
$expression->children[] = $this->parseExpression($expression);
979+
}
1011980
$expression->children[] = $this->eat(TokenKind::CloseBraceToken);
1012981
continue;
1013982
case $startQuoteKind = $expression->startQuote->kind:
1014983
case TokenKind::EndOfFileToken:
1015984
case TokenKind::HeredocEnd:
1016985
$expression->endQuote = $this->eat($startQuoteKind, TokenKind::HeredocEnd);
1017986
return $expression;
987+
case TokenKind::VariableName:
988+
$expression->children[] = $this->parseTemplateStringExpression($expression);
989+
continue;
1018990
default:
1019991
$expression->children[] = $this->getCurrentToken();
1020992
$this->advanceToken();
@@ -1025,6 +997,71 @@ private function parseStringLiteralExpression2($parentNode) {
1025997
return $expression;
1026998
}
1027999

1000+
/**
1001+
* Double-quoted and heredoc strings support a basic set of expression types, described in http://php.net/manual/en/language.types.string.php#language.types.string.parsing
1002+
* Supported: $x, $x->p, $x[0], $x[$y]
1003+
* Not supported: $x->p1->p2, $x[0][1], etc.
1004+
* Since there is a relatively small finite set of allowed forms, I implement it here rather than trying to reuse the general expression parsing code.
1005+
*/
1006+
private function parseTemplateStringExpression($parentNode) {
1007+
$token = $this->getCurrentToken();
1008+
if ($token->kind === TokenKind::VariableName) {
1009+
$var = $this->parseSimpleVariable($parentNode);
1010+
$token = $this->getCurrentToken();
1011+
if ($token->kind === TokenKind::OpenBracketToken) {
1012+
return $this->parseTemplateStringSubscriptExpression($var);
1013+
} else if ($token->kind === TokenKind::ArrowToken) {
1014+
return $this->parseTemplateStringMemberAccessExpression($var);
1015+
} else {
1016+
return $var;
1017+
}
1018+
}
1019+
1020+
return null;
1021+
}
1022+
1023+
private function parseTemplateStringSubscriptExpression($postfixExpression) : SubscriptExpression {
1024+
$subscriptExpression = new SubscriptExpression();
1025+
$subscriptExpression->parent = $postfixExpression->parent;
1026+
$postfixExpression->parent = $subscriptExpression;
1027+
1028+
$subscriptExpression->postfixExpression = $postfixExpression;
1029+
$subscriptExpression->openBracketOrBrace = $this->eat(TokenKind::OpenBracketToken); // Only [] syntax is supported, not {}
1030+
$token = $this->getCurrentToken();
1031+
if ($token->kind === TokenKind::VariableName) {
1032+
$subscriptExpression->accessExpression = $this->parseSimpleVariable($subscriptExpression);
1033+
} elseif ($token->kind === TokenKind::IntegerLiteralToken) {
1034+
$subscriptExpression->accessExpression = $this->parseNumericLiteralExpression($subscriptExpression);
1035+
} elseif ($token->kind === TokenKind::Name) {
1036+
$subscriptExpression->accessExpression = $this->parseTemplateStringSubscriptStringLiteral($subscriptExpression);
1037+
} else {
1038+
$subscriptExpression->accessExpression = new MissingToken(TokenKind::Expression, $token->fullStart);
1039+
}
1040+
1041+
$subscriptExpression->closeBracketOrBrace = $this->eat(TokenKind::CloseBracketToken);
1042+
1043+
return $subscriptExpression;
1044+
}
1045+
1046+
private function parseTemplateStringSubscriptStringLiteral($parentNode) : StringLiteral {
1047+
$expression = new StringLiteral();
1048+
$expression->parent = $parentNode;
1049+
$expression->children = $this->eat(TokenKind::Name);
1050+
return $expression;
1051+
}
1052+
1053+
private function parseTemplateStringMemberAccessExpression($expression) : MemberAccessExpression {
1054+
$memberAccessExpression = new MemberAccessExpression();
1055+
$memberAccessExpression->parent = $expression->parent;
1056+
$expression->parent = $memberAccessExpression;
1057+
1058+
$memberAccessExpression->dereferencableExpression = $expression;
1059+
$memberAccessExpression->arrowToken = $this->eat(TokenKind::ArrowToken);
1060+
$memberAccessExpression->memberName = $this->eat(TokenKind::Name);
1061+
1062+
return $memberAccessExpression;
1063+
}
1064+
10281065
private function parseNumericLiteralExpression($parentNode) {
10291066
$numericLiteral = new NumericLiteral();
10301067
$numericLiteral->parent = $parentNode;
@@ -1932,10 +1969,7 @@ private function parseDeclareDirective($parentNode) {
19321969
TokenKind::InvalidOctalLiteralToken,
19331970
TokenKind::InvalidHexadecimalLiteral,
19341971
TokenKind::InvalidBinaryLiteral,
1935-
TokenKind::StringLiteralToken,
1936-
TokenKind::UnterminatedStringLiteralToken,
1937-
TokenKind::NoSubstitutionTemplateLiteral,
1938-
TokenKind::UnterminatedNoSubstitutionTemplateLiteral
1972+
TokenKind::StringLiteralToken
19391973
); // TODO simplify
19401974

19411975
return $declareDirective;
@@ -1959,9 +1993,10 @@ private function parseSimpleVariableFn() {
19591993
$token->kind === TokenKind::OpenBraceToken ?
19601994
$this->parseBracedExpression($variable) :
19611995
$this->parseSimpleVariable($variable);
1962-
} elseif ($token->kind === TokenKind::VariableName) {
1963-
// TODO consider splitting into dollar and name
1964-
$variable->name = $this->eat(TokenKind::VariableName);
1996+
} elseif ($token->kind === TokenKind::VariableName || $token->kind === TokenKind::StringVarname) {
1997+
// TODO consider splitting into dollar and name.
1998+
// StringVarname is the variable name without $, used in a template string e.g. `"${foo}"`
1999+
$variable->name = $this->eat(TokenKind::VariableName, TokenKind::StringVarname);
19652000
} else {
19662001
$variable->name = new MissingToken(TokenKind::VariableName, $token->fullStart);
19672002
}

src/PhpTokenizer.php

Lines changed: 4 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -289,11 +289,13 @@ public static function getTokensArrayFromContent(
289289
T_OBJECT_CAST => TokenKind::ObjectCastToken,
290290
T_STRING_CAST => TokenKind::StringCastToken,
291291
T_UNSET_CAST => TokenKind::UnsetCastToken,
292+
292293
T_START_HEREDOC => TokenKind::HeredocStart,
293294
T_END_HEREDOC => TokenKind::HeredocEnd,
294-
T_STRING_VARNAME => TokenKind::VariableName,
295+
T_STRING_VARNAME => TokenKind::StringVarname,
295296
T_COMMENT => TokenKind::CommentToken,
296-
T_DOC_COMMENT => TokenKind::DocCommentToken
297+
T_DOC_COMMENT => TokenKind::DocCommentToken,
298+
T_NUM_STRING => TokenKind::IntegerLiteralToken
297299
];
298300

299301
const PARSE_CONTEXT_TO_PREFIX = [

src/TokenKind.php

Lines changed: 0 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -156,14 +156,6 @@ class TokenKind {
156156
const InvalidHexadecimalLiteral = 307;
157157
const InvalidBinaryLiteral = 308;
158158
const StringLiteralToken = 309;
159-
const UnterminatedStringLiteralToken = 310;
160-
161-
const TemplateStringStart = 311;
162-
const TemplateStringMiddle = 312;
163-
const TemplateStringEnd = 313;
164-
const NoSubstitutionTemplateLiteral = 314;
165-
const UnterminatedNoSubstitutionTemplateLiteral = 315;
166-
const UnterminatedTemplateStringEnd = 316;
167159

168160
// RESERVED WORDS
169161
const IntReservedWord = 317;
Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,2 @@
1+
<?php
2+
"$x"
Lines changed: 22 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,22 @@
1+
[
2+
{
3+
"kind": "ScriptSectionStartTag",
4+
"textLength": 6
5+
},
6+
{
7+
"kind": "DoubleQuoteToken",
8+
"textLength": 1
9+
},
10+
{
11+
"kind": "VariableName",
12+
"textLength": 2
13+
},
14+
{
15+
"kind": "DoubleQuoteToken",
16+
"textLength": 1
17+
},
18+
{
19+
"kind": "EndOfFileToken",
20+
"textLength": 0
21+
}
22+
]
Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,2 @@
1+
<?php
2+
"${x}"
Lines changed: 30 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,30 @@
1+
[
2+
{
3+
"kind": "ScriptSectionStartTag",
4+
"textLength": 6
5+
},
6+
{
7+
"kind": "DoubleQuoteToken",
8+
"textLength": 1
9+
},
10+
{
11+
"kind": "DollarOpenBraceToken",
12+
"textLength": 2
13+
},
14+
{
15+
"kind": "StringVarname",
16+
"textLength": 1
17+
},
18+
{
19+
"kind": "CloseBraceToken",
20+
"textLength": 1
21+
},
22+
{
23+
"kind": "DoubleQuoteToken",
24+
"textLength": 1
25+
},
26+
{
27+
"kind": "EndOfFileToken",
28+
"textLength": 0
29+
}
30+
]
Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,2 @@
1+
<?php
2+
"${$x}"
Lines changed: 30 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,30 @@
1+
[
2+
{
3+
"kind": "ScriptSectionStartTag",
4+
"textLength": 6
5+
},
6+
{
7+
"kind": "DoubleQuoteToken",
8+
"textLength": 1
9+
},
10+
{
11+
"kind": "DollarOpenBraceToken",
12+
"textLength": 2
13+
},
14+
{
15+
"kind": "VariableName",
16+
"textLength": 2
17+
},
18+
{
19+
"kind": "CloseBraceToken",
20+
"textLength": 1
21+
},
22+
{
23+
"kind": "DoubleQuoteToken",
24+
"textLength": 1
25+
},
26+
{
27+
"kind": "EndOfFileToken",
28+
"textLength": 0
29+
}
30+
]

0 commit comments

Comments
 (0)