Error on replacement character only in top-level scanning (#58227)

jakebailey · web-flow · commit 0dd12da1659f · 2024-04-17T11:34:00.000-07:00
diff --git a/src/compiler/scanner.ts b/src/compiler/scanner.ts
@@ -1768,18 +1768,6 @@ export function createScanner(languageVersion: ScriptTarget, skipTrivia: boolean
 
             const ch = codePointAt(text, pos);
             if (pos === 0) {
-                // If a file isn't valid text at all, it will usually be apparent
-                // in the first few characters because UTF-8 decode will fail and produce U+FFFD.
-                // If that happens, just issue one error and refuse to try to scan further;
-                // this is likely a binary file that cannot be parsed.
-                //
-                // It's safe to slice the text; U+FFFD can only be produced by an invalid decode,
-                // so even if we cut a surrogate pair in half, they wouldn't be U+FFFD.
-                if (text.slice(0, 256).includes("\uFFFD")) {
-                    error(Diagnostics.File_appears_to_be_binary);
-                    pos = end;
-                    return token = SyntaxKind.NonTextFileMarkerTrivia;
-                }
                 // Special handling for shebang
                 if (ch === CharacterCodes.hash && isShebangTrivia(text, pos)) {
                     pos = scanShebangTrivia(text, pos);
@@ -2242,6 +2230,10 @@ export function createScanner(languageVersion: ScriptTarget, skipTrivia: boolean
                         error(Diagnostics.Invalid_character, pos++, charSize(ch));
                     }
                     return token = SyntaxKind.PrivateIdentifier;
+                case CharacterCodes.replacementCharacter:
+                    error(Diagnostics.File_appears_to_be_binary, 0, 0);
+                    pos = end;
+                    return token = SyntaxKind.NonTextFileMarkerTrivia;
                 default:
                     const identifierKind = scanIdentifier(ch, languageVersion);
                     if (identifierKind) {
diff --git a/src/compiler/types.ts b/src/compiler/types.ts
@@ -7612,6 +7612,9 @@ export const enum CharacterCodes {
     mathematicalSpace = 0x205F,
     ogham = 0x1680,
 
+    // Unicode replacement character produced when a byte sequence is invalid
+    replacementCharacter = 0xFFFD,
+
     _ = 0x5F,
     $ = 0x24,
 
diff --git a/tests/baselines/reference/TransportStream.errors.txt b/tests/baselines/reference/TransportStream.errors.txt
@@ -1,7 +1,19 @@
 TransportStream.ts(1,1): error TS1490: File appears to be binary.
+TransportStream.ts(1,1): error TS1434: Unexpected keyword or identifier.
+TransportStream.ts(1,1): error TS2304: Cannot find name 'G'.
+TransportStream.ts(1,3): error TS1127: Invalid character.
+TransportStream.ts(1,4): error TS1128: Declaration or statement expected.
 
 
-==== TransportStream.ts (1 errors) ====
+==== TransportStream.ts (5 errors) ====
     G@�~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~G@�G@�
     
-!!! error TS1490: File appears to be binary.
+!!! error TS1490: File appears to be binary.
+    ~
+!!! error TS1434: Unexpected keyword or identifier.
+    ~
+!!! error TS2304: Cannot find name 'G'.
+      ~
+!!! error TS1127: Invalid character.
+       ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+!!! error TS1128: Declaration or statement expected.
diff --git a/tests/baselines/reference/TransportStream.js b/tests/baselines/reference/TransportStream.js
@@ -4,3 +4,4 @@
 G@�~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~G@�G@�
 
 //// [TransportStream.js]
+G;
diff --git a/tests/baselines/reference/TransportStream.types b/tests/baselines/reference/TransportStream.types
@@ -1,5 +1,9 @@
 //// [tests/cases/compiler/TransportStream.ts] ////
 
 === TransportStream.ts ===
-
 G@�~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~G@�G@�
+>G : any
+>  : ^^^
+> : any
+> : ^^^
+
diff --git a/tests/baselines/reference/parseReplacementCharacter.js b/tests/baselines/reference/parseReplacementCharacter.js
@@ -0,0 +1,16 @@
+//// [tests/cases/compiler/parseReplacementCharacter.ts] ////
+
+//// [parseReplacementCharacter.ts]
+"oops �� oops";
+'oops �� oops';
+`oops �� oops`;
+`${"oops �� oops"}`;
+// oops �� oops
+/* oops �� oops */
+/** oops �� oops */
+
+//// [parseReplacementCharacter.js]
+"oops �� oops";
+'oops �� oops';
+"oops \uFFFD\uFFFD oops";
+"".concat("oops �� oops");
diff --git a/tests/baselines/reference/parseReplacementCharacter.symbols b/tests/baselines/reference/parseReplacementCharacter.symbols
@@ -0,0 +1,11 @@
+//// [tests/cases/compiler/parseReplacementCharacter.ts] ////
+
+=== parseReplacementCharacter.ts ===
+
+"oops �� oops";
+'oops �� oops';
+`oops �� oops`;
+`${"oops �� oops"}`;
+// oops �� oops
+/* oops �� oops */
+/** oops �� oops */
diff --git a/tests/baselines/reference/parseReplacementCharacter.types b/tests/baselines/reference/parseReplacementCharacter.types
@@ -0,0 +1,24 @@
+//// [tests/cases/compiler/parseReplacementCharacter.ts] ////
+
+=== parseReplacementCharacter.ts ===
+"oops �� oops";
+>"oops �� oops" : "oops �� oops"
+>               : ^^^^^^^^^^^^^^
+
+'oops �� oops';
+>'oops �� oops' : "oops �� oops"
+>               : ^^^^^^^^^^^^^^
+
+`oops �� oops`;
+>`oops �� oops` : "oops �� oops"
+>               : ^^^^^^^^^^^^^^
+
+`${"oops �� oops"}`;
+>`${"oops �� oops"}` : "oops �� oops"
+>                    : ^^^^^^^^^^^^^^
+>"oops �� oops" : "oops �� oops"
+>               : ^^^^^^^^^^^^^^
+
+// oops �� oops
+/* oops �� oops */
+/** oops �� oops */
diff --git a/tests/cases/compiler/parseReplacementCharacter.ts b/tests/cases/compiler/parseReplacementCharacter.ts
@@ -0,0 +1,7 @@
+"oops �� oops";
+'oops �� oops';
+`oops �� oops`;
+`${"oops �� oops"}`;
+// oops �� oops
+/* oops �� oops */
+/** oops �� oops */

Original file line number	Diff line number	Diff line change
`@@ -4,3 +4,4 @@`
`4`	`4`	`G@�G@�G@�`
`5`	`5`
`6`	`6`	`//// [TransportStream.js]`
	`7`	`+G;`
-Original file line number
+Diff line change
@@ @@ -1,5 +1,9 @@ @@
 //// [tests/cases/compiler/TransportStream.ts] ////
 === TransportStream.ts ===
+-
 G@�G@�G@�
 +>G : any
 +>  : ^^^
 +> : any
 +> : ^^^
++