Skip to content

Navigation Menu

Sign in
Appearance settings

Search code, repositories, users, issues, pull requests...

Provide feedback

We read every piece of feedback, and take your input very seriously.

Saved searches

Use saved searches to filter your results more quickly

Sign up
Appearance settings

Commit 4461548

Browse files
Unescape constant strings
1 parent a6c17e2 commit 4461548

File tree

2 files changed

+148
-12
lines changed

2 files changed

+148
-12
lines changed

‎src/Parser/ConstExprParser.php

Lines changed: 101 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -4,12 +4,35 @@
44

55
use PHPStan\PhpDocParser\Ast;
66
use PHPStan\PhpDocParser\Lexer\Lexer;
7+
use function chr;
8+
use function hexdec;
9+
use function octdec;
10+
use function preg_replace_callback;
11+
use function str_replace;
712
use function strtolower;
8-
use function trim;
13+
use function substr;
914

1015
class ConstExprParser
1116
{
1217

18+
private const REPLACEMENTS = [
19+
'\\' => '\\',
20+
'n' => "\n",
21+
'r' => "\r",
22+
't' => "\t",
23+
'f' => "\f",
24+
'v' => "\v",
25+
'e' => "\x1B",
26+
];
27+
28+
/** @var bool */
29+
private $unescapeStrings;
30+
31+
public function __construct(bool $unescapeStrings = false)
32+
{
33+
$this->unescapeStrings = $unescapeStrings;
34+
}
35+
1336
public function parse(TokenIterator $tokens, bool $trimStrings = false): Ast\ConstExpr\ConstExprNode
1437
{
1538
if ($tokens->isCurrentTokenType(Lexer::TOKEN_FLOAT)) {
@@ -24,18 +47,14 @@ public function parse(TokenIterator $tokens, bool $trimStrings = false): Ast\Con
2447
return new Ast\ConstExpr\ConstExprIntegerNode($value);
2548
}
2649

27-
if ($tokens->isCurrentTokenType(Lexer::TOKEN_SINGLE_QUOTED_STRING)) {
28-
$value = $tokens->currentTokenValue();
29-
if ($trimStrings) {
30-
$value = trim($tokens->currentTokenValue(), "'");
31-
}
32-
$tokens->next();
33-
return new Ast\ConstExpr\ConstExprStringNode($value);
34-
35-
} elseif ($tokens->isCurrentTokenType(Lexer::TOKEN_DOUBLE_QUOTED_STRING)) {
50+
if ($tokens->isCurrentTokenType(Lexer::TOKEN_SINGLE_QUOTED_STRING, Lexer::TOKEN_DOUBLE_QUOTED_STRING)) {
3651
$value = $tokens->currentTokenValue();
3752
if ($trimStrings) {
38-
$value = trim($tokens->currentTokenValue(), '"');
53+
if ($this->unescapeStrings) {
54+
$value = self::unescapeString($value);
55+
} else {
56+
$value = substr($value, 1, -1);
57+
}
3958
}
4059
$tokens->next();
4160
return new Ast\ConstExpr\ConstExprStringNode($value);
@@ -137,4 +156,75 @@ private function parseArrayItem(TokenIterator $tokens): Ast\ConstExpr\ConstExprA
137156
return new Ast\ConstExpr\ConstExprArrayItemNode($key, $value);
138157
}
139158

159+
private static function unescapeString(string $string): string
160+
{
161+
$quote = $string[0];
162+
163+
if ($quote === '\'') {
164+
return str_replace(
165+
['\\\\', '\\\''],
166+
['\\', '\''],
167+
substr($string, 1, -1)
168+
);
169+
}
170+
171+
return self::parseEscapeSequences(substr($string, 1, -1), '"');
172+
}
173+
174+
/**
175+
* Implementation based on https://github.com/nikic/PHP-Parser/blob/b0edd4c41111042d43bb45c6c657b2e0db367d9e/lib/PhpParser/Node/Scalar/String_.php#L90-L130
176+
*/
177+
private static function parseEscapeSequences(string $str, string $quote): string
178+
{
179+
$str = str_replace('\\' . $quote, $quote, $str);
180+
181+
return preg_replace_callback(
182+
'~\\\\([\\\\nrtfve]|[xX][0-9a-fA-F]{1,2}|[0-7]{1,3}|u\{([0-9a-fA-F]+)\})~',
183+
static function ($matches) {
184+
$str = $matches[1];
185+
186+
if (isset(self::REPLACEMENTS[$str])) {
187+
return self::REPLACEMENTS[$str];
188+
}
189+
if ($str[0] === 'x' || $str[0] === 'X') {
190+
return chr(hexdec(substr($str, 1)));
191+
}
192+
if ($str[0] === 'u') {
193+
return self::codePointToUtf8(hexdec($matches[2]));
194+
}
195+
196+
return chr(octdec($str));
197+
},
198+
$str
199+
);
200+
}
201+
202+
/**
203+
* Implementation based on https://github.com/nikic/PHP-Parser/blob/b0edd4c41111042d43bb45c6c657b2e0db367d9e/lib/PhpParser/Node/Scalar/String_.php#L132-L154
204+
*/
205+
private static function codePointToUtf8(int $num): string
206+
{
207+
if ($num <= 0x7F) {
208+
return chr($num);
209+
}
210+
if ($num <= 0x7FF) {
211+
return chr(($num >> 6) + 0xC0)
212+
. chr(($num & 0x3F) + 0x80);
213+
}
214+
if ($num <= 0xFFFF) {
215+
return chr(($num >> 12) + 0xE0)
216+
. chr((($num >> 6) & 0x3F) + 0x80)
217+
. chr(($num & 0x3F) + 0x80);
218+
}
219+
if ($num <= 0x1FFFFF) {
220+
return chr(($num >> 18) + 0xF0)
221+
. chr((($num >> 12) & 0x3F) + 0x80)
222+
. chr((($num >> 6) & 0x3F) + 0x80)
223+
. chr(($num & 0x3F) + 0x80);
224+
}
225+
226+
// Invalid UTF-8 codepoint escape sequence: Codepoint too large
227+
return "\xef\xbf\xbd";
228+
}
229+
140230
}

‎tests/PHPStan/Parser/ConstExprParserTest.php

Lines changed: 47 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -29,7 +29,7 @@ protected function setUp(): void
2929
{
3030
parent::setUp();
3131
$this->lexer = new Lexer();
32-
$this->constExprParser = new ConstExprParser();
32+
$this->constExprParser = new ConstExprParser(true);
3333
}
3434

3535

@@ -358,4 +358,50 @@ public function provideFetchNodeParseData(): Iterator
358358
];
359359
}
360360

361+
/**
362+
* @dataProvider provideWithTrimStringsStringNodeParseData
363+
*/
364+
public function testParseWithTrimStrings(string $input, ConstExprNode $expectedExpr, int $nextTokenType = Lexer::TOKEN_END): void
365+
{
366+
$tokens = new TokenIterator($this->lexer->tokenize($input));
367+
$exprNode = $this->constExprParser->parse($tokens, true);
368+
369+
$this->assertSame((string) $expectedExpr, (string) $exprNode);
370+
$this->assertEquals($expectedExpr, $exprNode);
371+
$this->assertSame($nextTokenType, $tokens->currentTokenType());
372+
}
373+
374+
public function provideWithTrimStringsStringNodeParseData(): Iterator
375+
{
376+
yield [
377+
'"foo"',
378+
new ConstExprStringNode('foo'),
379+
];
380+
381+
yield [
382+
'"Foo \\n\\"\\r Bar"',
383+
new ConstExprStringNode("Foo \n\"\r Bar"),
384+
];
385+
386+
yield [
387+
'\'bar\'',
388+
new ConstExprStringNode('bar'),
389+
];
390+
391+
yield [
392+
'\'Foo \\\' Bar\'',
393+
new ConstExprStringNode('Foo \' Bar'),
394+
];
395+
396+
yield [
397+
'"\u{1f601}"',
398+
new ConstExprStringNode("\u{1f601}"),
399+
];
400+
401+
yield [
402+
'"\u{ffffffff}"',
403+
new ConstExprStringNode("\u{fffd}"),
404+
];
405+
}
406+
361407
}

0 commit comments

Comments
(0)

AltStyle によって変換されたページ (->オリジナル) /