Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
26 changes: 26 additions & 0 deletions _packages/api/test/api.test.ts
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,7 @@ import {
cast,
isImportDeclaration,
isNamedImports,
isStringLiteral,
isTemplateHead,
isTemplateMiddle,
isTemplateTail,
Expand Down Expand Up @@ -113,6 +114,31 @@ describe("SourceFile", () => {
});
});

test("unicode escapes", () => {
const srcFiles = {
"/src/1.ts": `"😃"`,
"/src/2.ts": `"\\ud83d\\ude03"`, // this is "😃"
};

const api = spawnAPI({
"/tsconfig.json": "{}",
...srcFiles,
});
const project = api.loadProject("/tsconfig.json");

Object.keys(srcFiles).forEach(file => {
const sourceFile = project.getSourceFile(file);
assert.ok(sourceFile);

sourceFile.forEachChild(function visit(node) {
if (isStringLiteral(node)) {
assert.equal(node.text, "😃");
}
node.forEachChild(visit);
});
});
});

test("Object equality", () => {
const api = spawnAPI();
const project = api.loadProject("/tsconfig.json");
Expand Down
18 changes: 18 additions & 0 deletions internal/api/encoder/encoder_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -35,6 +35,24 @@ func TestEncodeSourceFile(t *testing.T) {
})
}

func TestEncodeSourceFileWithUnicodeEscapes(t *testing.T) {
t.Parallel()
sourceFile := parser.ParseSourceFile(ast.SourceFileParseOptions{
FileName: "/test.ts",
Path: "/test.ts",
}, `let a = "😃"; let b = "\ud83d\ude03"; let c = "\udc00\ud83d\ude03"; let d = "\ud83d\ud83d\ude03"`, core.ScriptKindTS)
t.Run("baseline", func(t *testing.T) {
t.Parallel()
buf, err := encoder.EncodeSourceFile(sourceFile, "")
assert.NilError(t, err)

str := formatEncodedSourceFile(buf)
baseline.Run(t, "encodeSourceFileWithUnicodeEscapes.txt", str, baseline.Options{
Subfolder: "api",
})
})
}

func BenchmarkEncodeSourceFile(b *testing.B) {
repo.SkipIfNoTypeScriptSubmodule(b)
filePath := filepath.Join(repo.TypeScriptSubmodulePath, "src/compiler/checker.ts")
Expand Down
7 changes: 7 additions & 0 deletions internal/scanner/scanner.go
Original file line number Diff line number Diff line change
Expand Up @@ -1629,6 +1629,13 @@ func (s *Scanner) scanEscapeSequence(flags EscapeSequenceScanningFlags) string {
codePoint := s.scanUnicodeEscape(flags&EscapeSequenceScanningFlagsReportInvalidEscapeErrors != 0)
if codePoint < 0 {
return s.text[start:s.pos]
} else if codePointIsHighSurrogate(codePoint) && s.char() == '\\' && s.charAt(1) == 'u' {
savedPos := s.pos
nextCodePoint := s.scanUnicodeEscape(flags&EscapeSequenceScanningFlagsReportInvalidEscapeErrors != 0)
if codePointIsLowSurrogate(nextCodePoint) {
Copy link
Member

@DanielRosenwasser DanielRosenwasser Nov 7, 2025

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I think I need to see what happens when a user has provided a single high surrogate, a low surrogate, or mismatched surrogates.

Maybe we need a compiler test for this in tests/cases/compiler or tests/cases/conformance with the following:

// @declaration: true

// low-high surrogate pair - the "correct" case
export const highLow = "\ud83d\ude03" as const;

// high surrogate
export const high = "\ud83d" as const;

// low surrogate
export const low = "\ude03" as const;

// two high surrogates
export const highHigh = "\ud83d\ud83d" as const;

// two low surrogates
export const lowLow = "\ude03\ude03" as const;

// swapped expected order of surrogates
export const lowHigh = "\ude03\ud83d" as const;

I think you are currently doing the right thing by only consuming when you have a correct pair, but I'm guessing that we're missing coverage here, so might as well add the test now.

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Also, I think I have done some very similar stuff in #2026, which (unfortunately) duplicates some of this scanning code in another package and fixes the bugs.

return string(surrogatePairToCodepoint(codePoint, nextCodePoint))
}
s.pos = savedPos // restore position because we do not consume nextCodePoint
}
return string(codePoint)
case 'x':
Expand Down
19 changes: 19 additions & 0 deletions internal/scanner/utilities.go
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,25 @@ import (
"github.com/microsoft/typescript-go/internal/core"
)

const (
surr1 = 0xd800
surr2 = 0xdc00
surr3 = 0xe000
surrSelf = 0x10000
)

func codePointIsHighSurrogate(r rune) bool {
return surr1 <= r && r < surr2
}

func codePointIsLowSurrogate(r rune) bool {
return surr2 <= r && r < surr3
}

func surrogatePairToCodepoint(r1, r2 rune) rune {
return (r1-surr1)<<10 | (r2 - surr2) + surrSelf
}

func tokenIsIdentifierOrKeyword(token ast.Kind) bool {
return token >= ast.KindIdentifier
}
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,27 @@
KindSourceFile [0, 98), i=1, next=0
NodeList [0, 98), i=2, next=27
KindVariableStatement [0, 15), i=3, next=9
KindVariableDeclarationList [0, 14), i=4, next=0
NodeList [3, 14), i=5, next=0
KindVariableDeclaration [3, 14), i=6, next=0
KindIdentifier "a" [3, 5), i=7, next=8
KindStringLiteral "😃" [7, 14), i=8, next=0
KindVariableStatement [15, 39), i=9, next=15
KindVariableDeclarationList [15, 38), i=10, next=0
NodeList [19, 38), i=11, next=0
KindVariableDeclaration [19, 38), i=12, next=0
KindIdentifier "b" [19, 21), i=13, next=14
KindStringLiteral "😃" [23, 38), i=14, next=0
KindVariableStatement [39, 69), i=15, next=21
KindVariableDeclarationList [39, 68), i=16, next=0
NodeList [43, 68), i=17, next=0
KindVariableDeclaration [43, 68), i=18, next=0
KindIdentifier "c" [43, 45), i=19, next=20
KindStringLiteral "�😃" [47, 68), i=20, next=0
KindVariableStatement [69, 98), i=21, next=0
KindVariableDeclarationList [69, 98), i=22, next=0
NodeList [73, 98), i=23, next=0
KindVariableDeclaration [73, 98), i=24, next=0
KindIdentifier "d" [73, 75), i=25, next=26
KindStringLiteral "�😃" [77, 98), i=26, next=0
KindEndOfFile [98, 98), i=27, next=0