@@ -28,13 +28,6 @@ function expectSyntaxError(text: string) {
2828}
2929
3030describe ( 'Lexer' , ( ) => {
31- it ( 'disallows uncommon control characters' , ( ) => {
32- expectSyntaxError ( '\u0007' ) . to . deep . equal ( {
33- message : 'Syntax Error: Invalid character: U+0007.' ,
34- locations : [ { line : 1 , column : 1 } ] ,
35- } ) ;
36- } ) ;
37-
3831 it ( 'ignores BOM header' , ( ) => {
3932 expect ( lexOne ( '\uFEFF foo' ) ) . to . contain ( {
4033 kind : TokenKind . NAME ,
@@ -269,12 +262,98 @@ describe('Lexer', () => {
269262 value : 'slashes \\ /' ,
270263 } ) ;
271264
265+ expect ( lexOne ( '"unescaped unicode outside BMP \u{1f600}"' ) ) . to . contain ( {
266+ kind : TokenKind . STRING ,
267+ start : 0 ,
268+ end : 34 ,
269+ value : 'unescaped unicode outside BMP \u{1f600}' ,
270+ } ) ;
271+
272+ expect (
273+ lexOne ( '"unescaped maximal unicode outside BMP \u{10ffff}"' ) ,
274+ ) . to . contain ( {
275+ kind : TokenKind . STRING ,
276+ start : 0 ,
277+ end : 42 ,
278+ value : 'unescaped maximal unicode outside BMP \u{10ffff}' ,
279+ } ) ;
280+
272281 expect ( lexOne ( '"unicode \\u1234\\u5678\\u90AB\\uCDEF"' ) ) . to . contain ( {
273282 kind : TokenKind . STRING ,
274283 start : 0 ,
275284 end : 34 ,
276285 value : 'unicode \u1234\u5678\u90AB\uCDEF' ,
277286 } ) ;
287+
288+ expect ( lexOne ( '"unicode \\u{1234}\\u{5678}\\u{90AB}\\u{CDEF}"' ) ) . to . contain (
289+ {
290+ kind : TokenKind . STRING ,
291+ start : 0 ,
292+ end : 42 ,
293+ value : 'unicode \u1234\u5678\u90AB\uCDEF' ,
294+ } ,
295+ ) ;
296+
297+ expect (
298+ lexOne ( '"string with unicode escape outside BMP \\u{1F600}"' ) ,
299+ ) . to . contain ( {
300+ kind : TokenKind . STRING ,
301+ start : 0 ,
302+ end : 50 ,
303+ value : 'string with unicode escape outside BMP \u{1f600}' ,
304+ } ) ;
305+
306+ expect ( lexOne ( '"string with minimal unicode escape \\u{0}"' ) ) . to . contain ( {
307+ kind : TokenKind . STRING ,
308+ start : 0 ,
309+ end : 42 ,
310+ value : 'string with minimal unicode escape \u{0}' ,
311+ } ) ;
312+
313+ expect (
314+ lexOne ( '"string with maximal unicode escape \\u{10FFFF}"' ) ,
315+ ) . to . contain ( {
316+ kind : TokenKind . STRING ,
317+ start : 0 ,
318+ end : 47 ,
319+ value : 'string with maximal unicode escape \u{10FFFF}' ,
320+ } ) ;
321+
322+ expect (
323+ lexOne ( '"string with maximal minimal unicode escape \\u{00000000}"' ) ,
324+ ) . to . contain ( {
325+ kind : TokenKind . STRING ,
326+ start : 0 ,
327+ end : 57 ,
328+ value : 'string with maximal minimal unicode escape \u{0}' ,
329+ } ) ;
330+
331+ expect (
332+ lexOne ( '"string with unicode surrogate pair escape \\uD83D\\uDE00"' ) ,
333+ ) . to . contain ( {
334+ kind : TokenKind . STRING ,
335+ start : 0 ,
336+ end : 56 ,
337+ value : 'string with unicode surrogate pair escape \u{1f600}' ,
338+ } ) ;
339+
340+ expect (
341+ lexOne ( '"string with minimal surrogate pair escape \\uD800\\uDC00"' ) ,
342+ ) . to . contain ( {
343+ kind : TokenKind . STRING ,
344+ start : 0 ,
345+ end : 56 ,
346+ value : 'string with minimal surrogate pair escape \u{10000}' ,
347+ } ) ;
348+
349+ expect (
350+ lexOne ( '"string with maximal surrogate pair escape \\uDBFF\\uDFFF"' ) ,
351+ ) . to . contain ( {
352+ kind : TokenKind . STRING ,
353+ start : 0 ,
354+ end : 56 ,
355+ value : 'string with maximal surrogate pair escape \u{10FFFF}' ,
356+ } ) ;
278357 } ) ;
279358
280359 it ( 'lex reports useful string errors' , ( ) => {
@@ -304,16 +383,19 @@ describe('Lexer', () => {
304383 locations : [ { line : 1 , column : 1 } ] ,
305384 } ) ;
306385
307- expectSyntaxError ( '"contains unescaped \u0007 control char"' ) . to . deep . equal (
308- {
309- message : 'Syntax Error: Invalid character within String: U+0007.' ,
310- locations : [ { line : 1 , column : 21 } ] ,
311- } ,
312- ) ;
386+ expectSyntaxError ( '"bad surrogate \uDEAD"' ) . to . deep . equal ( {
387+ message : 'Syntax Error: Invalid character within String: U+DEAD.' ,
388+ locations : [ { line : 1 , column : 16 } ] ,
389+ } ) ;
390+
391+ expectSyntaxError ( '"bad high surrogate pair \uDEAD\uDEAD"' ) . to . deep . equal ( {
392+ message : 'Syntax Error: Invalid character within String: U+DEAD.' ,
393+ locations : [ { line : 1 , column : 26 } ] ,
394+ } ) ;
313395
314- expectSyntaxError ( '"null-byte is not \u0000 end of file "' ) . to . deep . equal ( {
315- message : 'Syntax Error: Invalid character within String: U+0000 .' ,
316- locations : [ { line : 1 , column : 19 } ] ,
396+ expectSyntaxError ( '"bad low surrogate pair \uD800\uD800 "' ) . to . deep . equal ( {
397+ message : 'Syntax Error: Invalid character within String: U+D800 .' ,
398+ locations : [ { line : 1 , column : 25 } ] ,
317399 } ) ;
318400
319401 expectSyntaxError ( '"multi\nline"' ) . to . deep . equal ( {
@@ -360,6 +442,93 @@ describe('Lexer', () => {
360442 message : 'Syntax Error: Invalid Unicode escape sequence: "\\uXXXF".' ,
361443 locations : [ { line : 1 , column : 6 } ] ,
362444 } ) ;
445+
446+ expectSyntaxError ( '"bad \\u{} esc"' ) . to . deep . equal ( {
447+ message : 'Syntax Error: Invalid Unicode escape sequence: "\\u{}".' ,
448+ locations : [ { line : 1 , column : 6 } ] ,
449+ } ) ;
450+
451+ expectSyntaxError ( '"bad \\u{FXXX} esc"' ) . to . deep . equal ( {
452+ message : 'Syntax Error: Invalid Unicode escape sequence: "\\u{FX".' ,
453+ locations : [ { line : 1 , column : 6 } ] ,
454+ } ) ;
455+
456+ expectSyntaxError ( '"bad \\u{FFFF esc"' ) . to . deep . equal ( {
457+ message : 'Syntax Error: Invalid Unicode escape sequence: "\\u{FFFF ".' ,
458+ locations : [ { line : 1 , column : 6 } ] ,
459+ } ) ;
460+
461+ expectSyntaxError ( '"bad \\u{FFFF"' ) . to . deep . equal ( {
462+ message : 'Syntax Error: Invalid Unicode escape sequence: "\\u{FFFF"".' ,
463+ locations : [ { line : 1 , column : 6 } ] ,
464+ } ) ;
465+
466+ expectSyntaxError ( '"too high \\u{110000} esc"' ) . to . deep . equal ( {
467+ message : 'Syntax Error: Invalid Unicode escape sequence: "\\u{110000}".' ,
468+ locations : [ { line : 1 , column : 11 } ] ,
469+ } ) ;
470+
471+ expectSyntaxError ( '"way too high \\u{12345678} esc"' ) . to . deep . equal ( {
472+ message :
473+ 'Syntax Error: Invalid Unicode escape sequence: "\\u{12345678}".' ,
474+ locations : [ { line : 1 , column : 15 } ] ,
475+ } ) ;
476+
477+ expectSyntaxError ( '"too long \\u{000000000} esc"' ) . to . deep . equal ( {
478+ message :
479+ 'Syntax Error: Invalid Unicode escape sequence: "\\u{000000000".' ,
480+ locations : [ { line : 1 , column : 11 } ] ,
481+ } ) ;
482+
483+ expectSyntaxError ( '"bad surrogate \\uDEAD esc"' ) . to . deep . equal ( {
484+ message : 'Syntax Error: Invalid Unicode escape sequence: "\\uDEAD".' ,
485+ locations : [ { line : 1 , column : 16 } ] ,
486+ } ) ;
487+
488+ expectSyntaxError ( '"bad surrogate \\u{DEAD} esc"' ) . to . deep . equal ( {
489+ message : 'Syntax Error: Invalid Unicode escape sequence: "\\u{DEAD}".' ,
490+ locations : [ { line : 1 , column : 16 } ] ,
491+ } ) ;
492+
493+ expectSyntaxError (
494+ '"cannot use braces for surrogate pair \\u{D83D}\\u{DE00} esc"' ,
495+ ) . to . deep . equal ( {
496+ message : 'Syntax Error: Invalid Unicode escape sequence: "\\u{D83D}".' ,
497+ locations : [ { line : 1 , column : 39 } ] ,
498+ } ) ;
499+
500+ expectSyntaxError (
501+ '"bad high surrogate pair \\uDEAD\\uDEAD esc"' ,
502+ ) . to . deep . equal ( {
503+ message : 'Syntax Error: Invalid Unicode escape sequence: "\\uDEAD".' ,
504+ locations : [ { line : 1 , column : 26 } ] ,
505+ } ) ;
506+
507+ expectSyntaxError (
508+ '"bad low surrogate pair \\uD800\\uD800 esc"' ,
509+ ) . to . deep . equal ( {
510+ message : 'Syntax Error: Invalid Unicode escape sequence: "\\uD800".' ,
511+ locations : [ { line : 1 , column : 25 } ] ,
512+ } ) ;
513+
514+ expectSyntaxError (
515+ '"cannot escape half a pair \uD83D\\uDE00 esc"' ,
516+ ) . to . deep . equal ( {
517+ message : 'Syntax Error: Invalid character within String: U+D83D.' ,
518+ locations : [ { line : 1 , column : 28 } ] ,
519+ } ) ;
520+
521+ expectSyntaxError (
522+ '"cannot escape half a pair \\uD83D\uDE00 esc"' ,
523+ ) . to . deep . equal ( {
524+ message : 'Syntax Error: Invalid Unicode escape sequence: "\\uD83D".' ,
525+ locations : [ { line : 1 , column : 28 } ] ,
526+ } ) ;
527+
528+ expectSyntaxError ( '"bad \\uD83D\\not an escape"' ) . to . deep . equal ( {
529+ message : 'Syntax Error: Invalid Unicode escape sequence: "\\uD83D".' ,
530+ locations : [ { line : 1 , column : 6 } ] ,
531+ } ) ;
363532 } ) ;
364533
365534 it ( 'lexes block strings' , ( ) => {
@@ -419,6 +588,13 @@ describe('Lexer', () => {
419588 value : 'unescaped \\n\\r\\b\\t\\f\\u1234' ,
420589 } ) ;
421590
591+ expect ( lexOne ( '"""unescaped unicode outside BMP \u{1f600}"""' ) ) . to . contain ( {
592+ kind : TokenKind . BLOCK_STRING ,
593+ start : 0 ,
594+ end : 38 ,
595+ value : 'unescaped unicode outside BMP \u{1f600}' ,
596+ } ) ;
597+
422598 expect ( lexOne ( '"""slashes \\\\ \\/"""' ) ) . to . contain ( {
423599 kind : TokenKind . BLOCK_STRING ,
424600 start : 0 ,
@@ -491,18 +667,9 @@ describe('Lexer', () => {
491667 locations : [ { line : 1 , column : 16 } ] ,
492668 } ) ;
493669
494- expectSyntaxError (
495- '"""contains unescaped \u0007 control char"""' ,
496- ) . to . deep . equal ( {
497- message : 'Syntax Error: Invalid character within String: U+0007.' ,
498- locations : [ { line : 1 , column : 23 } ] ,
499- } ) ;
500-
501- expectSyntaxError (
502- '"""null-byte is not \u0000 end of file"""' ,
503- ) . to . deep . equal ( {
504- message : 'Syntax Error: Invalid character within String: U+0000.' ,
505- locations : [ { line : 1 , column : 21 } ] ,
670+ expectSyntaxError ( '"""contains invalid surrogate \uDEAD"""' ) . to . deep . equal ( {
671+ message : 'Syntax Error: Invalid character within String: U+DEAD.' ,
672+ locations : [ { line : 1 , column : 31 } ] ,
506673 } ) ;
507674 } ) ;
508675
@@ -842,6 +1009,16 @@ describe('Lexer', () => {
8421009 locations : [ { line : 1 , column : 1 } ] ,
8431010 } ) ;
8441011
1012+ expectSyntaxError ( '\x00' ) . to . deep . equal ( {
1013+ message : 'Syntax Error: Unexpected character: U+0000.' ,
1014+ locations : [ { line : 1 , column : 1 } ] ,
1015+ } ) ;
1016+
1017+ expectSyntaxError ( '\b' ) . to . deep . equal ( {
1018+ message : 'Syntax Error: Unexpected character: U+0008.' ,
1019+ locations : [ { line : 1 , column : 1 } ] ,
1020+ } ) ;
1021+
8451022 expectSyntaxError ( '\u00AA' ) . to . deep . equal ( {
8461023 message : 'Syntax Error: Unexpected character: U+00AA.' ,
8471024 locations : [ { line : 1 , column : 1 } ] ,
@@ -856,6 +1033,16 @@ describe('Lexer', () => {
8561033 message : 'Syntax Error: Unexpected character: U+203B.' ,
8571034 locations : [ { line : 1 , column : 1 } ] ,
8581035 } ) ;
1036+
1037+ expectSyntaxError ( '\u{1f600}' ) . to . deep . equal ( {
1038+ message : 'Syntax Error: Unexpected character: U+1F600.' ,
1039+ locations : [ { line : 1 , column : 1 } ] ,
1040+ } ) ;
1041+
1042+ expectSyntaxError ( '\uDEAD' ) . to . deep . equal ( {
1043+ message : 'Syntax Error: Invalid character: U+DEAD.' ,
1044+ locations : [ { line : 1 , column : 1 } ] ,
1045+ } ) ;
8591046 } ) ;
8601047
8611048 it ( 'lex reports useful information for dashes in names' , ( ) => {
@@ -936,9 +1123,15 @@ describe('Lexer', () => {
9361123 end : 9 ,
9371124 value : ' Comment' ,
9381125 } ) ;
939- expectSyntaxError ( '# \u0007' ) . to . deep . equal ( {
940- message : 'Syntax Error: Invalid character: U+0007.' ,
941- locations : [ { line : 1 , column : 3 } ] ,
1126+ expect ( lexOne ( '# Comment \u{1f600}' ) . prev ) . to . contain ( {
1127+ kind : TokenKind . COMMENT ,
1128+ start : 0 ,
1129+ end : 12 ,
1130+ value : ' Comment \u{1f600}' ,
1131+ } ) ;
1132+ expectSyntaxError ( '# Invalid surrogate \uDEAD' ) . to . deep . equal ( {
1133+ message : 'Syntax Error: Invalid character: U+DEAD.' ,
1134+ locations : [ { line : 1 , column : 21 } ] ,
9421135 } ) ;
9431136 } ) ;
9441137} ) ;
0 commit comments