diff --git a/_packages/api/test/api.test.ts b/_packages/api/test/api.test.ts index a01b029fac..1a097cf1b3 100644 --- a/_packages/api/test/api.test.ts +++ b/_packages/api/test/api.test.ts @@ -8,6 +8,7 @@ import { cast, isImportDeclaration, isNamedImports, + isStringLiteral, isTemplateHead, isTemplateMiddle, isTemplateTail, @@ -113,6 +114,31 @@ describe("SourceFile", () => { }); }); +test("unicode escapes", () => { + const srcFiles = { + "/src/1.ts": `"😃"`, + "/src/2.ts": `"\\ud83d\\ude03"`, // this is "😃" + }; + + const api = spawnAPI({ + "/tsconfig.json": "{}", + ...srcFiles, + }); + const project = api.loadProject("/tsconfig.json"); + + Object.keys(srcFiles).forEach(file => { + const sourceFile = project.getSourceFile(file); + assert.ok(sourceFile); + + sourceFile.forEachChild(function visit(node) { + if (isStringLiteral(node)) { + assert.equal(node.text, "😃"); + } + node.forEachChild(visit); + }); + }); +}); + test("Object equality", () => { const api = spawnAPI(); const project = api.loadProject("/tsconfig.json"); diff --git a/internal/api/encoder/encoder_test.go b/internal/api/encoder/encoder_test.go index 4d1be79d36..232c0fec37 100644 --- a/internal/api/encoder/encoder_test.go +++ b/internal/api/encoder/encoder_test.go @@ -35,6 +35,24 @@ func TestEncodeSourceFile(t *testing.T) { }) } +func TestEncodeSourceFileWithUnicodeEscapes(t *testing.T) { + t.Parallel() + sourceFile := parser.ParseSourceFile(ast.SourceFileParseOptions{ + FileName: "/test.ts", + Path: "/test.ts", + }, `let a = "😃"; let b = "\ud83d\ude03"; let c = "\udc00\ud83d\ude03"; let d = "\ud83d\ud83d\ude03"`, core.ScriptKindTS) + t.Run("baseline", func(t *testing.T) { + t.Parallel() + buf, err := encoder.EncodeSourceFile(sourceFile, "") + assert.NilError(t, err) + + str := formatEncodedSourceFile(buf) + baseline.Run(t, "encodeSourceFileWithUnicodeEscapes.txt", str, baseline.Options{ + Subfolder: "api", + }) + }) +} + func BenchmarkEncodeSourceFile(b *testing.B) { repo.SkipIfNoTypeScriptSubmodule(b) filePath := filepath.Join(repo.TypeScriptSubmodulePath, "src/compiler/checker.ts") diff --git a/internal/scanner/scanner.go b/internal/scanner/scanner.go index 1ee64b62f1..72ae4f7ca6 100644 --- a/internal/scanner/scanner.go +++ b/internal/scanner/scanner.go @@ -1629,6 +1629,13 @@ func (s *Scanner) scanEscapeSequence(flags EscapeSequenceScanningFlags) string { codePoint := s.scanUnicodeEscape(flags&EscapeSequenceScanningFlagsReportInvalidEscapeErrors != 0) if codePoint < 0 { return s.text[start:s.pos] + } else if codePointIsHighSurrogate(codePoint) && s.char() == '\\' && s.charAt(1) == 'u' { + savedPos := s.pos + nextCodePoint := s.scanUnicodeEscape(flags&EscapeSequenceScanningFlagsReportInvalidEscapeErrors != 0) + if codePointIsLowSurrogate(nextCodePoint) { + return string(surrogatePairToCodepoint(codePoint, nextCodePoint)) + } + s.pos = savedPos // restore position because we do not consume nextCodePoint } return string(codePoint) case 'x': diff --git a/internal/scanner/utilities.go b/internal/scanner/utilities.go index 06ba7cf666..06b29a1e2a 100644 --- a/internal/scanner/utilities.go +++ b/internal/scanner/utilities.go @@ -8,6 +8,25 @@ import ( "github.com/microsoft/typescript-go/internal/core" ) +const ( + surr1 = 0xd800 + surr2 = 0xdc00 + surr3 = 0xe000 + surrSelf = 0x10000 +) + +func codePointIsHighSurrogate(r rune) bool { + return surr1 <= r && r < surr2 +} + +func codePointIsLowSurrogate(r rune) bool { + return surr2 <= r && r < surr3 +} + +func surrogatePairToCodepoint(r1, r2 rune) rune { + return (r1-surr1)<<10 | (r2 - surr2) + surrSelf +} + func tokenIsIdentifierOrKeyword(token ast.Kind) bool { return token >= ast.KindIdentifier } diff --git a/testdata/baselines/reference/api/encodeSourceFileWithUnicodeEscapes.txt b/testdata/baselines/reference/api/encodeSourceFileWithUnicodeEscapes.txt new file mode 100644 index 0000000000..93102c786a --- /dev/null +++ b/testdata/baselines/reference/api/encodeSourceFileWithUnicodeEscapes.txt @@ -0,0 +1,27 @@ +KindSourceFile [0, 98), i=1, next=0 + NodeList [0, 98), i=2, next=27 + KindVariableStatement [0, 15), i=3, next=9 + KindVariableDeclarationList [0, 14), i=4, next=0 + NodeList [3, 14), i=5, next=0 + KindVariableDeclaration [3, 14), i=6, next=0 + KindIdentifier "a" [3, 5), i=7, next=8 + KindStringLiteral "😃" [7, 14), i=8, next=0 + KindVariableStatement [15, 39), i=9, next=15 + KindVariableDeclarationList [15, 38), i=10, next=0 + NodeList [19, 38), i=11, next=0 + KindVariableDeclaration [19, 38), i=12, next=0 + KindIdentifier "b" [19, 21), i=13, next=14 + KindStringLiteral "😃" [23, 38), i=14, next=0 + KindVariableStatement [39, 69), i=15, next=21 + KindVariableDeclarationList [39, 68), i=16, next=0 + NodeList [43, 68), i=17, next=0 + KindVariableDeclaration [43, 68), i=18, next=0 + KindIdentifier "c" [43, 45), i=19, next=20 + KindStringLiteral "�😃" [47, 68), i=20, next=0 + KindVariableStatement [69, 98), i=21, next=0 + KindVariableDeclarationList [69, 98), i=22, next=0 + NodeList [73, 98), i=23, next=0 + KindVariableDeclaration [73, 98), i=24, next=0 + KindIdentifier "d" [73, 75), i=25, next=26 + KindStringLiteral "�😃" [77, 98), i=26, next=0 + KindEndOfFile [98, 98), i=27, next=0