Skip to content

Commit 6460989

Browse files
fix(NODE-6124): utf8 validation is insufficiently strict (#680)
1 parent d6b15f8 commit 6460989

File tree

12 files changed

+213
-119
lines changed

12 files changed

+213
-119
lines changed
Lines changed: 22 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -1,9 +1,12 @@
11
import MagicString from 'magic-string';
22

3-
const REQUIRE_POLYFILLS =
4-
`const { TextEncoder, TextDecoder } = require('../vendor/text-encoding');
3+
const REQUIRE_WEB_UTILS_POLYFILLS =
4+
`const { TextEncoder } = require('../vendor/text-encoding');
55
const { encode: btoa, decode: atob } = require('../vendor/base64');\n`
66

7+
const REQUIRE_PARSE_UTF8_POLYFILLS =
8+
`const { TextDecoder } = require('../vendor/text-encoding');\n`;
9+
710
export class RequireVendor {
811
/**
912
* Take the compiled source code input; types are expected to already have been removed.
@@ -14,17 +17,24 @@ export class RequireVendor {
1417
* @returns {{ code: string; map: import('magic-string').SourceMap }}
1518
*/
1619
transform(code, id) {
17-
if (!id.includes('web_byte_utils')) {
18-
return;
19-
}
20+
if (id.includes('parse_utf8')) {
21+
// MagicString lets us edit the source code and still generate an accurate source map
22+
const magicString = new MagicString(code);
23+
magicString.prepend(REQUIRE_PARSE_UTF8_POLYFILLS);
2024

21-
// MagicString lets us edit the source code and still generate an accurate source map
22-
const magicString = new MagicString(code);
23-
magicString.prepend(REQUIRE_POLYFILLS);
25+
return {
26+
code: magicString.toString(),
27+
map: magicString.generateMap({ hires: true })
28+
};
29+
} else if (id.includes('web_byte_utils')) {
30+
// MagicString lets us edit the source code and still generate an accurate source map
31+
const magicString = new MagicString(code);
32+
magicString.prepend(REQUIRE_WEB_UTILS_POLYFILLS);
2433

25-
return {
26-
code: magicString.toString(),
27-
map: magicString.generateMap({ hires: true })
28-
};
34+
return {
35+
code: magicString.toString(),
36+
map: magicString.generateMap({ hires: true })
37+
};
38+
}
2939
}
3040
}

src/binary.ts

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -224,8 +224,8 @@ export class Binary extends BSONValue {
224224
if (encoding === 'hex') return ByteUtils.toHex(this.buffer.subarray(0, this.position));
225225
if (encoding === 'base64') return ByteUtils.toBase64(this.buffer.subarray(0, this.position));
226226
if (encoding === 'utf8' || encoding === 'utf-8')
227-
return ByteUtils.toUTF8(this.buffer, 0, this.position);
228-
return ByteUtils.toUTF8(this.buffer, 0, this.position);
227+
return ByteUtils.toUTF8(this.buffer, 0, this.position, false);
228+
return ByteUtils.toUTF8(this.buffer, 0, this.position, false);
229229
}
230230

231231
/** @internal */

src/parse_utf8.ts

Lines changed: 35 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,35 @@
1+
import { BSONError } from './error';
2+
3+
type TextDecoder = {
4+
readonly encoding: string;
5+
readonly fatal: boolean;
6+
readonly ignoreBOM: boolean;
7+
decode(input?: Uint8Array): string;
8+
};
9+
type TextDecoderConstructor = {
10+
new (label: 'utf8', options: { fatal: boolean; ignoreBOM?: boolean }): TextDecoder;
11+
};
12+
13+
// parse utf8 globals
14+
declare const TextDecoder: TextDecoderConstructor;
15+
let TextDecoderFatal: TextDecoder;
16+
let TextDecoderNonFatal: TextDecoder;
17+
18+
/**
19+
* Determines if the passed in bytes are valid utf8
20+
* @param bytes - An array of 8-bit bytes. Must be indexable and have length property
21+
* @param start - The index to start validating
22+
* @param end - The index to end validating
23+
*/
24+
export function parseUtf8(buffer: Uint8Array, start: number, end: number, fatal: boolean): string {
25+
if (fatal) {
26+
TextDecoderFatal ??= new TextDecoder('utf8', { fatal: true });
27+
try {
28+
return TextDecoderFatal.decode(buffer.subarray(start, end));
29+
} catch (cause) {
30+
throw new BSONError('Invalid UTF-8 string in BSON document');
31+
}
32+
}
33+
TextDecoderNonFatal ??= new TextDecoder('utf8', { fatal: false });
34+
return TextDecoderNonFatal.decode(buffer.subarray(start, end));
35+
}

src/parser/deserializer.ts

Lines changed: 10 additions & 37 deletions
Original file line numberDiff line numberDiff line change
@@ -15,7 +15,6 @@ import { BSONRegExp } from '../regexp';
1515
import { BSONSymbol } from '../symbol';
1616
import { Timestamp } from '../timestamp';
1717
import { BSONDataView, ByteUtils } from '../utils/byte_utils';
18-
import { validateUtf8 } from '../validate_utf8';
1918

2019
/** @public */
2120
export interface DeserializeOptions {
@@ -236,7 +235,7 @@ function deserializeObject(
236235
if (i >= buffer.byteLength) throw new BSONError('Bad BSON Document: illegal CString');
237236

238237
// Represents the key
239-
const name = isArray ? arrayIndex++ : ByteUtils.toUTF8(buffer, index, i);
238+
const name = isArray ? arrayIndex++ : ByteUtils.toUTF8(buffer, index, i, false);
240239

241240
// shouldValidateKey is true if the key should be validated, false otherwise
242241
let shouldValidateKey = true;
@@ -266,7 +265,7 @@ function deserializeObject(
266265
) {
267266
throw new BSONError('bad string length in bson');
268267
}
269-
value = getValidatedString(buffer, index, index + stringSize - 1, shouldValidateKey);
268+
value = ByteUtils.toUTF8(buffer, index, index + stringSize - 1, shouldValidateKey);
270269
index = index + stringSize;
271270
} else if (elementType === constants.BSON_DATA_OID) {
272271
const oid = ByteUtils.allocate(12);
@@ -476,7 +475,7 @@ function deserializeObject(
476475
// If are at the end of the buffer there is a problem with the document
477476
if (i >= buffer.length) throw new BSONError('Bad BSON Document: illegal CString');
478477
// Return the C string
479-
const source = ByteUtils.toUTF8(buffer, index, i);
478+
const source = ByteUtils.toUTF8(buffer, index, i, false);
480479
// Create the regexp
481480
index = i + 1;
482481

@@ -489,7 +488,7 @@ function deserializeObject(
489488
// If are at the end of the buffer there is a problem with the document
490489
if (i >= buffer.length) throw new BSONError('Bad BSON Document: illegal CString');
491490
// Return the C string
492-
const regExpOptions = ByteUtils.toUTF8(buffer, index, i);
491+
const regExpOptions = ByteUtils.toUTF8(buffer, index, i, false);
493492
index = i + 1;
494493

495494
// For each option add the corresponding one for javascript
@@ -521,7 +520,7 @@ function deserializeObject(
521520
// If are at the end of the buffer there is a problem with the document
522521
if (i >= buffer.length) throw new BSONError('Bad BSON Document: illegal CString');
523522
// Return the C string
524-
const source = ByteUtils.toUTF8(buffer, index, i);
523+
const source = ByteUtils.toUTF8(buffer, index, i, false);
525524
index = i + 1;
526525

527526
// Get the start search index
@@ -533,7 +532,7 @@ function deserializeObject(
533532
// If are at the end of the buffer there is a problem with the document
534533
if (i >= buffer.length) throw new BSONError('Bad BSON Document: illegal CString');
535534
// Return the C string
536-
const regExpOptions = ByteUtils.toUTF8(buffer, index, i);
535+
const regExpOptions = ByteUtils.toUTF8(buffer, index, i, false);
537536
index = i + 1;
538537

539538
// Set the object
@@ -551,7 +550,7 @@ function deserializeObject(
551550
) {
552551
throw new BSONError('bad string length in bson');
553552
}
554-
const symbol = getValidatedString(buffer, index, index + stringSize - 1, shouldValidateKey);
553+
const symbol = ByteUtils.toUTF8(buffer, index, index + stringSize - 1, shouldValidateKey);
555554
value = promoteValues ? symbol : new BSONSymbol(symbol);
556555
index = index + stringSize;
557556
} else if (elementType === constants.BSON_DATA_TIMESTAMP) {
@@ -587,7 +586,7 @@ function deserializeObject(
587586
) {
588587
throw new BSONError('bad string length in bson');
589588
}
590-
const functionString = getValidatedString(
589+
const functionString = ByteUtils.toUTF8(
591590
buffer,
592591
index,
593592
index + stringSize - 1,
@@ -626,7 +625,7 @@ function deserializeObject(
626625
}
627626

628627
// Javascript function
629-
const functionString = getValidatedString(
628+
const functionString = ByteUtils.toUTF8(
630629
buffer,
631630
index,
632631
index + stringSize - 1,
@@ -673,12 +672,7 @@ function deserializeObject(
673672
)
674673
throw new BSONError('bad string length in bson');
675674
// Namespace
676-
if (validation != null && validation.utf8) {
677-
if (!validateUtf8(buffer, index, index + stringSize - 1)) {
678-
throw new BSONError('Invalid UTF-8 string in BSON document');
679-
}
680-
}
681-
const namespace = ByteUtils.toUTF8(buffer, index, index + stringSize - 1);
675+
const namespace = ByteUtils.toUTF8(buffer, index, index + stringSize - 1, shouldValidateKey);
682676
// Update parse index position
683677
index = index + stringSize;
684678

@@ -728,24 +722,3 @@ function deserializeObject(
728722

729723
return object;
730724
}
731-
732-
function getValidatedString(
733-
buffer: Uint8Array,
734-
start: number,
735-
end: number,
736-
shouldValidateUtf8: boolean
737-
) {
738-
const value = ByteUtils.toUTF8(buffer, start, end);
739-
// if utf8 validation is on, do the check
740-
if (shouldValidateUtf8) {
741-
for (let i = 0; i < value.length; i++) {
742-
if (value.charCodeAt(i) === 0xfffd) {
743-
if (!validateUtf8(buffer, start, end)) {
744-
throw new BSONError('Invalid UTF-8 string in BSON document');
745-
}
746-
break;
747-
}
748-
}
749-
}
750-
return value;
751-
}

src/utils/byte_utils.ts

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -26,7 +26,7 @@ export type ByteUtils = {
2626
/** Create a Uint8Array containing utf8 code units from a string */
2727
fromUTF8: (text: string) => Uint8Array;
2828
/** Create a string from utf8 code units */
29-
toUTF8: (buffer: Uint8Array, start: number, end: number) => string;
29+
toUTF8: (buffer: Uint8Array, start: number, end: number, fatal: boolean) => string;
3030
/** Get the utf8 code unit count from a string if it were to be transformed to utf8 */
3131
utf8ByteLength: (input: string) => number;
3232
/** Encode UTF8 bytes generated from `source` string into `destination` at byteOffset. Returns the number of bytes encoded. */

src/utils/node_byte_utils.ts

Lines changed: 11 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,5 @@
11
import { BSONError } from '../error';
2+
import { parseUtf8 } from '../parse_utf8';
23

34
type NodeJsEncoding = 'base64' | 'hex' | 'utf8' | 'binary';
45
type NodeJsBuffer = ArrayBufferView &
@@ -125,8 +126,16 @@ export const nodeJsByteUtils = {
125126
return Buffer.from(text, 'utf8');
126127
},
127128

128-
toUTF8(buffer: Uint8Array, start: number, end: number): string {
129-
return nodeJsByteUtils.toLocalBufferType(buffer).toString('utf8', start, end);
129+
toUTF8(buffer: Uint8Array, start: number, end: number, fatal: boolean): string {
130+
const value = nodeJsByteUtils.toLocalBufferType(buffer).toString('utf8', start, end);
131+
if (fatal) {
132+
for (let i = 0; i < value.length; i++) {
133+
if (value.charCodeAt(i) === 0xfffd) {
134+
parseUtf8(buffer, start, end, fatal);
135+
}
136+
}
137+
}
138+
return value;
130139
},
131140

132141
utf8ByteLength(input: string): number {

src/utils/web_byte_utils.ts

Lines changed: 3 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -1,14 +1,5 @@
11
import { BSONError } from '../error';
2-
3-
type TextDecoder = {
4-
readonly encoding: string;
5-
readonly fatal: boolean;
6-
readonly ignoreBOM: boolean;
7-
decode(input?: Uint8Array): string;
8-
};
9-
type TextDecoderConstructor = {
10-
new (label: 'utf8', options: { fatal: boolean; ignoreBOM?: boolean }): TextDecoder;
11-
};
2+
import { parseUtf8 } from '../parse_utf8';
123

134
type TextEncoder = {
145
readonly encoding: string;
@@ -19,7 +10,6 @@ type TextEncoderConstructor = {
1910
};
2011

2112
// Web global
22-
declare const TextDecoder: TextDecoderConstructor;
2313
declare const TextEncoder: TextEncoderConstructor;
2414
declare const atob: (base64: string) => string;
2515
declare const btoa: (binary: string) => string;
@@ -172,8 +162,8 @@ export const webByteUtils = {
172162
return new TextEncoder().encode(text);
173163
},
174164

175-
toUTF8(uint8array: Uint8Array, start: number, end: number): string {
176-
return new TextDecoder('utf8', { fatal: false }).decode(uint8array.slice(start, end));
165+
toUTF8(uint8array: Uint8Array, start: number, end: number, fatal: boolean): string {
166+
return parseUtf8(uint8array, start, end, fatal);
177167
},
178168

179169
utf8ByteLength(input: string): number {

src/validate_utf8.ts

Lines changed: 0 additions & 47 deletions
This file was deleted.

test/node/byte_utils.test.ts

Lines changed: 16 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -8,6 +8,7 @@ import { webByteUtils } from '../../src/utils/web_byte_utils';
88
import * as sinon from 'sinon';
99
import { loadCJSModuleBSON, loadReactNativeCJSModuleBSON, loadESModuleBSON } from '../load_bson';
1010
import * as crypto from 'node:crypto';
11+
import { utf8WebPlatformSpecTests } from './data/utf8_wpt_error_cases';
1112

1213
type ByteUtilTest<K extends keyof ByteUtils> = {
1314
name: string;
@@ -400,20 +401,32 @@ const fromUTF8Tests: ByteUtilTest<'fromUTF8'>[] = [
400401
const toUTF8Tests: ByteUtilTest<'toUTF8'>[] = [
401402
{
402403
name: 'should create utf8 string from buffer input',
403-
inputs: [Buffer.from('abc\u{1f913}', 'utf8')],
404+
inputs: [Buffer.from('abc\u{1f913}', 'utf8'), 0, 7, false],
404405
expectation({ output, error }) {
405406
expect(error).to.be.null;
406407
expect(output).to.deep.equal(Buffer.from('abc\u{1f913}', 'utf8').toString('utf8'));
407408
}
408409
},
409410
{
410411
name: 'should return empty string for empty buffer input',
411-
inputs: [Buffer.alloc(0)],
412+
inputs: [Buffer.alloc(0), 0, 0, false],
412413
expectation({ output, error }) {
413414
expect(error).to.be.null;
414415
expect(output).to.be.a('string').with.lengthOf(0);
415416
}
416-
}
417+
},
418+
...utf8WebPlatformSpecTests.map(t => ({
419+
name: t.name,
420+
inputs: [Uint8Array.from(t.input), 0, t.input.length, true] as [
421+
buffer: Uint8Array,
422+
start: number,
423+
end: number,
424+
fatal: boolean
425+
],
426+
expectation({ error }) {
427+
expect(error).to.match(/Invalid UTF-8 string in BSON document/i);
428+
}
429+
}))
417430
];
418431
const utf8ByteLengthTests: ByteUtilTest<'utf8ByteLength'>[] = [
419432
{

0 commit comments

Comments
 (0)