Skip to content

Commit 3f06c95

Browse files
authored
Use more functions for explicit surrogate handling (#353)
- add `is_surrogate`, `get_hi_surrogate` and `get_lo_surrogate` - use surrogate functions instead of hard coded computations
1 parent 1db884b commit 3f06c95

File tree

2 files changed

+30
-19
lines changed

2 files changed

+30
-19
lines changed

cutils.h

Lines changed: 17 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -384,14 +384,29 @@ static inline void dbuf_set_error(DynBuf *s)
384384
int unicode_to_utf8(uint8_t *buf, unsigned int c);
385385
int unicode_from_utf8(const uint8_t *p, int max_len, const uint8_t **pp);
386386

387+
static inline BOOL is_surrogate(uint32_t c)
388+
{
389+
return (c >> 11) == (0xD800 >> 11); // 0xD800-0xDFFF
390+
}
391+
387392
static inline BOOL is_hi_surrogate(uint32_t c)
388393
{
389-
return 54 == (c >> 10); // 0xD800-0xDBFF
394+
return (c >> 10) == (0xD800 >> 10); // 0xD800-0xDBFF
390395
}
391396

392397
static inline BOOL is_lo_surrogate(uint32_t c)
393398
{
394-
return 55 == (c >> 10); // 0xDC00-0xDFFF
399+
return (c >> 10) == (0xDC00 >> 10); // 0xDC00-0xDFFF
400+
}
401+
402+
static inline uint32_t get_hi_surrogate(uint32_t c)
403+
{
404+
return (c >> 10) - (0x10000 >> 10) + 0xD800;
405+
}
406+
407+
static inline uint32_t get_lo_surrogate(uint32_t c)
408+
{
409+
return (c & 0x3FF) | 0xDC00;
395410
}
396411

397412
static inline uint32_t from_surrogate(uint32_t hi, uint32_t lo)

quickjs.c

Lines changed: 13 additions & 17 deletions
Original file line numberDiff line numberDiff line change
@@ -3664,10 +3664,9 @@ static int string_buffer_putc(StringBuffer *s, uint32_t c)
36643664
{
36653665
if (unlikely(c >= 0x10000)) {
36663666
/* surrogate pair */
3667-
c -= 0x10000;
3668-
if (string_buffer_putc16(s, (c >> 10) + 0xd800))
3667+
if (string_buffer_putc16(s, get_hi_surrogate(c)))
36693668
return -1;
3670-
c = (c & 0x3ff) + 0xdc00;
3669+
c = get_lo_surrogate(c);
36713670
}
36723671
return string_buffer_putc16(s, c);
36733672
}
@@ -3883,9 +3882,8 @@ JSValue JS_NewStringLen(JSContext *ctx, const char *buf, size_t buf_len)
38833882
} else if (c <= 0x10FFFF) {
38843883
p = p_next;
38853884
/* surrogate pair */
3886-
c -= 0x10000;
3887-
string_buffer_putc16(b, (c >> 10) + 0xd800);
3888-
c = (c & 0x3ff) + 0xdc00;
3885+
string_buffer_putc16(b, get_hi_surrogate(c));
3886+
c = get_lo_surrogate(c);
38893887
} else {
38903888
/* invalid char */
38913889
c = 0xfffd;
@@ -11508,7 +11506,7 @@ static JSValue JS_ToQuotedString(JSContext *ctx, JSValue val1)
1150811506
goto fail;
1150911507
break;
1151011508
default:
11511-
if (c < 32 || is_hi_surrogate(c) || is_lo_surrogate(c)) {
11509+
if (c < 32 || is_surrogate(c)) {
1151211510
snprintf(buf, sizeof(buf), "\\u%04x", c);
1151311511
if (string_buffer_puts8(b, buf))
1151411512
goto fail;
@@ -19796,8 +19794,7 @@ static __exception int json_next_token(JSParseState *s)
1979619794
js_parse_error(s, "Unexpected token '\\x%02x' in JSON", *p);
1979719795
} else {
1979819796
if (c > 0xFFFF) {
19799-
/* get high surrogate */
19800-
c = (c >> 10) - (0x10000 >> 10) + 0xD800;
19797+
c = get_hi_surrogate(c);
1980119798
}
1980219799
js_parse_error(s, "Unexpected token '\\u%04x' in JSON", c);
1980319800
}
@@ -39555,12 +39552,12 @@ static JSValue js_string_isWellFormed(JSContext *ctx, JSValue this_val,
3955539552

3955639553
for (i = 0, n = p->len; i < n; i++) {
3955739554
c = p->u.str16[i];
39558-
if (c < 0xD800 || c > 0xDFFF)
39555+
if (!is_surrogate(c))
3955939556
continue;
39560-
if (c > 0xDBFF || i+1 == n)
39557+
if (is_lo_surrogate(c) || i + 1 == n)
3956139558
break;
3956239559
c = p->u.str16[++i];
39563-
if (c < 0xDC00 || c > 0xDFFF)
39560+
if (!is_lo_surrogate(c))
3956439561
break;
3956539562
}
3956639563

@@ -39597,14 +39594,14 @@ static JSValue js_string_toWellFormed(JSContext *ctx, JSValue this_val,
3959739594
p = JS_VALUE_GET_STRING(ret);
3959839595
for (i = 0, n = p->len; i < n; i++) {
3959939596
c = p->u.str16[i];
39600-
if (c < 0xD800 || c > 0xDFFF)
39597+
if (!is_surrogate(c))
3960139598
continue;
39602-
if (c > 0xDBFF || i+1 == n) {
39599+
if (is_lo_surrogate(c) || i + 1 == n) {
3960339600
p->u.str16[i] = 0xFFFD;
3960439601
continue;
3960539602
}
3960639603
c = p->u.str16[++i];
39607-
if (c < 0xDC00 || c > 0xDFFF)
39604+
if (!is_lo_surrogate(c))
3960839605
p->u.str16[--i] = 0xFFFD;
3960939606
}
3961039607

@@ -46865,8 +46862,7 @@ static JSValue js_global_decodeURI(JSContext *ctx, JSValue this_val,
4686546862
}
4686646863
c = (c << 6) | (c1 & 0x3f);
4686746864
}
46868-
if (c < c_min || c > 0x10FFFF ||
46869-
is_hi_surrogate(c) || is_lo_surrogate(c)) {
46865+
if (c < c_min || c > 0x10FFFF || is_surrogate(c)) {
4687046866
js_throw_URIError(ctx, "malformed UTF-8");
4687146867
goto fail;
4687246868
}

0 commit comments

Comments
 (0)