Skip to content

Commit 9bce51e

Browse files
author
Fabrice Bellard
committed
improved JSON parser conformity (chqrlie) (#250)
1 parent aaa9cea commit 9bce51e

File tree

5 files changed

+152
-31
lines changed

5 files changed

+152
-31
lines changed

TODO

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -62,5 +62,5 @@ Optimization ideas:
6262
Test262o: 0/11262 errors, 463 excluded
6363
Test262o commit: 7da91bceb9ce7613f87db47ddd1292a2dda58b42 (es5-tests branch)
6464

65-
Result: 60/79202 errors, 1610 excluded, 6738 skipped
65+
Result: 58/79202 errors, 1610 excluded, 6738 skipped
6666
Test262 commit: 27622d764767dcb3778784884022c2c7de5769b8

quickjs.c

Lines changed: 149 additions & 26 deletions
Original file line numberDiff line numberDiff line change
@@ -12127,7 +12127,7 @@ static JSValue js_atof(JSContext *ctx, const char *str, const char **pp,
1212712127
case ATOD_TYPE_FLOAT64:
1212812128
{
1212912129
double d;
12130-
d = js_atod(buf,NULL, radix, is_float ? 0 : JS_ATOD_INT_ONLY,
12130+
d = js_atod(buf, NULL, radix, is_float ? 0 : JS_ATOD_INT_ONLY,
1213112131
&atod_mem);
1213212132
/* return int or float64 */
1213312133
val = JS_NewFloat64(ctx, d);
@@ -21031,11 +21031,6 @@ static __exception int js_parse_string(JSParseState *s, int sep,
2103121031
goto invalid_char;
2103221032
c = *p;
2103321033
if (c < 0x20) {
21034-
if (!s->cur_func) {
21035-
if (do_throw)
21036-
js_parse_error_pos(s, p, "invalid character in a JSON string");
21037-
goto fail;
21038-
}
2103921034
if (sep == '`') {
2104021035
if (c == '\r') {
2104121036
if (p[1] == '\n')
@@ -21081,8 +21076,6 @@ static __exception int js_parse_string(JSParseState *s, int sep,
2108121076
continue;
2108221077
default:
2108321078
if (c >= '0' && c <= '9') {
21084-
if (!s->cur_func)
21085-
goto invalid_escape; /* JSON case */
2108621079
if (!(s->cur_func->js_mode & JS_MODE_STRICT) && sep != '`')
2108721080
goto parse_escape;
2108821081
if (c == '0' && !(p[1] >= '0' && p[1] <= '9')) {
@@ -21851,6 +21844,150 @@ static JSAtom json_parse_ident(JSParseState *s, const uint8_t **pp, int c)
2185121844
return atom;
2185221845
}
2185321846

21847+
static int json_parse_string(JSParseState *s, const uint8_t **pp, int sep)
21848+
{
21849+
const uint8_t *p, *p_next;
21850+
int i;
21851+
uint32_t c;
21852+
StringBuffer b_s, *b = &b_s;
21853+
21854+
if (string_buffer_init(s->ctx, b, 32))
21855+
goto fail;
21856+
21857+
p = *pp;
21858+
for(;;) {
21859+
if (p >= s->buf_end) {
21860+
goto end_of_input;
21861+
}
21862+
c = *p++;
21863+
if (c == sep)
21864+
break;
21865+
if (c < 0x20) {
21866+
js_parse_error_pos(s, p - 1, "Bad control character in string literal");
21867+
goto fail;
21868+
}
21869+
if (c == '\\') {
21870+
c = *p++;
21871+
switch(c) {
21872+
case 'b': c = '\b'; break;
21873+
case 'f': c = '\f'; break;
21874+
case 'n': c = '\n'; break;
21875+
case 'r': c = '\r'; break;
21876+
case 't': c = '\t'; break;
21877+
case '\\': break;
21878+
case '/': break;
21879+
case 'u':
21880+
c = 0;
21881+
for(i = 0; i < 4; i++) {
21882+
int h = from_hex(*p++);
21883+
if (h < 0) {
21884+
js_parse_error_pos(s, p - 1, "Bad Unicode escape");
21885+
goto fail;
21886+
}
21887+
c = (c << 4) | h;
21888+
}
21889+
break;
21890+
default:
21891+
if (c == sep)
21892+
break;
21893+
if (p > s->buf_end)
21894+
goto end_of_input;
21895+
js_parse_error_pos(s, p - 1, "Bad escaped character");
21896+
goto fail;
21897+
}
21898+
} else
21899+
if (c >= 0x80) {
21900+
c = unicode_from_utf8(p - 1, UTF8_CHAR_LEN_MAX, &p_next);
21901+
if (c > 0x10FFFF) {
21902+
js_parse_error_pos(s, p - 1, "Bad UTF-8 sequence");
21903+
goto fail;
21904+
}
21905+
p = p_next;
21906+
}
21907+
if (string_buffer_putc(b, c))
21908+
goto fail;
21909+
}
21910+
s->token.val = TOK_STRING;
21911+
s->token.u.str.sep = sep;
21912+
s->token.u.str.str = string_buffer_end(b);
21913+
*pp = p;
21914+
return 0;
21915+
21916+
end_of_input:
21917+
js_parse_error(s, "Unexpected end of JSON input");
21918+
fail:
21919+
string_buffer_free(b);
21920+
return -1;
21921+
}
21922+
21923+
static int json_parse_number(JSParseState *s, const uint8_t **pp)
21924+
{
21925+
const uint8_t *p = *pp;
21926+
const uint8_t *p_start = p;
21927+
int radix;
21928+
double d;
21929+
JSATODTempMem atod_mem;
21930+
21931+
if (*p == '+' || *p == '-')
21932+
p++;
21933+
21934+
if (!is_digit(*p))
21935+
return js_parse_error_pos(s, p, "Unexpected token '%c'", *p_start);
21936+
21937+
if (p[0] == '0') {
21938+
if (s->ext_json) {
21939+
/* also accepts base 16, 8 and 2 prefix for integers */
21940+
radix = 10;
21941+
if (p[1] == 'x' || p[1] == 'X') {
21942+
p += 2;
21943+
radix = 16;
21944+
} else if ((p[1] == 'o' || p[1] == 'O')) {
21945+
p += 2;
21946+
radix = 8;
21947+
} else if ((p[1] == 'b' || p[1] == 'B')) {
21948+
p += 2;
21949+
radix = 2;
21950+
}
21951+
if (radix != 10) {
21952+
/* prefix is present */
21953+
if (to_digit(*p) >= radix)
21954+
return js_parse_error_pos(s, p, "Unexpected token '%c'", *p);
21955+
d = js_atod((const char *)p_start, (const char **)&p, 0,
21956+
JS_ATOD_INT_ONLY | JS_ATOD_ACCEPT_BIN_OCT, &atod_mem);
21957+
goto done;
21958+
}
21959+
}
21960+
if (is_digit(p[1]))
21961+
return js_parse_error_pos(s, p, "Unexpected number");
21962+
}
21963+
21964+
while (is_digit(*p))
21965+
p++;
21966+
21967+
if (*p == '.') {
21968+
p++;
21969+
if (!is_digit(*p))
21970+
return js_parse_error_pos(s, p, "Unterminated fractional number");
21971+
while (is_digit(*p))
21972+
p++;
21973+
}
21974+
if (*p == 'e' || *p == 'E') {
21975+
p++;
21976+
if (*p == '+' || *p == '-')
21977+
p++;
21978+
if (!is_digit(*p))
21979+
return js_parse_error_pos(s, p, "Exponent part is missing a number");
21980+
while (is_digit(*p))
21981+
p++;
21982+
}
21983+
d = js_atod((const char *)p_start, NULL, 10, 0, &atod_mem);
21984+
done:
21985+
s->token.val = TOK_NUMBER;
21986+
s->token.u.num.val = JS_NewFloat64(s->ctx, d);
21987+
*pp = p;
21988+
return 0;
21989+
}
21990+
2185421991
static __exception int json_next_token(JSParseState *s)
2185521992
{
2185621993
const uint8_t *p;
@@ -21882,7 +22019,8 @@ static __exception int json_next_token(JSParseState *s)
2188222019
}
2188322020
/* fall through */
2188422021
case '\"':
21885-
if (js_parse_string(s, c, TRUE, p + 1, &s->token, &p))
22022+
p++;
22023+
if (json_parse_string(s, &p, c))
2188622024
goto fail;
2188722025
break;
2188822026
case '\r': /* accept DOS and MAC newline sequences */
@@ -21999,23 +22137,8 @@ static __exception int json_next_token(JSParseState *s)
2199922137
case '9':
2200022138
/* number */
2200122139
parse_number:
22002-
{
22003-
JSValue ret;
22004-
int flags, radix;
22005-
if (!s->ext_json) {
22006-
flags = 0;
22007-
radix = 10;
22008-
} else {
22009-
flags = ATOD_ACCEPT_BIN_OCT;
22010-
radix = 0;
22011-
}
22012-
ret = js_atof(s->ctx, (const char *)p, (const char **)&p, radix,
22013-
flags);
22014-
if (JS_IsException(ret))
22015-
goto fail;
22016-
s->token.val = TOK_NUMBER;
22017-
s->token.u.num.val = ret;
22018-
}
22140+
if (json_parse_number(s, &p))
22141+
goto fail;
2201922142
break;
2202022143
default:
2202122144
if (c >= 128) {

test262_errors.txt

Lines changed: 0 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -12,8 +12,6 @@ test262/test/staging/sm/Function/function-toString-builtin.js:14: Test262Error:
1212
}' Expected SameValue(«null», «null») to be false
1313
test262/test/staging/sm/Function/implicit-this-in-parameter-expression.js:13: Test262Error: Expected SameValue(«[object Object]», «undefined») to be true
1414
test262/test/staging/sm/Function/invalid-parameter-list.js:35: Error: Assertion failed: expected exception SyntaxError, no exception thrown
15-
test262/test/staging/sm/JSON/parse-number-syntax.js:39: Test262Error: parsing string <1.> threw a non-SyntaxError exception: Test262Error: string <1.> shouldn't have parsed as JSON Expected SameValue(«false», «true») to be true Expected SameValue(«true», «false») to be true
16-
test262/test/staging/sm/JSON/parse-syntax-errors-02.js:51: Test262Error: parsing string <["Illegal backslash escape: \x15"]> threw a non-SyntaxError exception: Test262Error: string <["Illegal backslash escape: \x15"]> shouldn't have parsed as JSON Expected SameValue(«false», «true») to be true Expected SameValue(«true», «false») to be true
1715
test262/test/staging/sm/Math/cbrt-approx.js:26: Error: got 1.39561242508609, expected a number near 1.3956124250860895 (relative error: 2)
1816
test262/test/staging/sm/RegExp/constructor-ordering-2.js:15: Test262Error: Expected SameValue(«false», «true») to be true
1917
test262/test/staging/sm/RegExp/regress-613820-1.js:13: Test262Error: Expected SameValue(«"aaa"», «"aa"») to be true

tests/test_builtin.js

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -596,7 +596,7 @@ function test_json()
596596
]
597597
]`);
598598

599-
assert_json_error('\n" @\\x"');
599+
assert_json_error('\n" \\@x"');
600600
assert_json_error('\n{ "a": @x }"');
601601
}
602602

tests/test_std.js

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -134,7 +134,7 @@ function test_ext_json()
134134
"y":true, // also a comment
135135
z2:null, // unquoted property names
136136
"a":[+1,0o10,0xa0,], // plus prefix, octal, hexadecimal
137-
"s":"str",} // trailing comma in objects and arrays
137+
"s":'str',} // trailing comma in objects and arrays, single quoted string
138138
`;
139139
obj = std.parseExtJSON(input);
140140
assert(JSON.stringify(obj), expected);

0 commit comments

Comments
 (0)