Skip to content

Commit b4945d8

Browse files
authored
Filter to valid surrogate pairs (#56)
* filter to valid surrogate pairs * handle out of range
1 parent 47d061a commit b4945d8

File tree

3 files changed

+41
-6
lines changed

3 files changed

+41
-6
lines changed

lexer.js

Lines changed: 22 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -52,15 +52,33 @@ function parseCJS (source, name = '@') {
5252
return result;
5353
}
5454

55-
function decode (name) {
56-
if (name[0] === '"' || name[0] === "'") {
55+
function decode (str) {
56+
if (str[0] === '"' || str[0] === '\'') {
5757
try {
58-
return (0, eval)(name);
58+
const decoded = (0, eval)(str);
59+
// Filter to exclude non-matching UTF-16 surrogate strings
60+
for (let i = 0; i < decoded.length; i++) {
61+
const surrogatePrefix = decoded.charCodeAt(i) & 0xFC00;
62+
if (surrogatePrefix < 0xD800) {
63+
// Not a surrogate
64+
continue;
65+
}
66+
else if (surrogatePrefix === 0xD800) {
67+
// Validate surrogate pair
68+
if ((decoded.charCodeAt(++i) & 0xFC00) !== 0xDC00)
69+
return;
70+
}
71+
else {
72+
// Out-of-range surrogate code (above 0xD800)
73+
return;
74+
}
75+
}
76+
return decoded;
5977
}
6078
catch {}
6179
}
6280
else {
63-
return name;
81+
return str;
6482
}
6583
}
6684

src/lexer.js

Lines changed: 19 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -40,7 +40,25 @@ export function parse (source, name = '@') {
4040
function decode (str) {
4141
if (str[0] === '"' || str[0] === '\'') {
4242
try {
43-
return (0, eval)(str);
43+
const decoded = (0, eval)(str);
44+
// Filter to exclude non-matching UTF-16 surrogate strings
45+
for (let i = 0; i < decoded.length; i++) {
46+
const surrogatePrefix = decoded.charCodeAt(i) & 0xFC00;
47+
if (surrogatePrefix < 0xD800) {
48+
// Not a surrogate
49+
continue;
50+
}
51+
else if (surrogatePrefix === 0xD800) {
52+
// Validate surrogate pair
53+
if ((decoded.charCodeAt(++i) & 0xFC00) !== 0xDC00)
54+
return;
55+
}
56+
else {
57+
// Out-of-range surrogate code (above 0xD800)
58+
return;
59+
}
60+
}
61+
return decoded;
4462
}
4563
catch {}
4664
}

test/_unit.js

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -494,7 +494,6 @@ suite('Lexer', () => {
494494
'ab cd',
495495
'not identifier',
496496
'\u{D83C}\u{DF10}',
497-
'\u{D83C}',
498497
'\'',
499498
'@notidentifier',
500499
'%notidentifier',

0 commit comments

Comments
 (0)