Skip to content

Commit 40bc600

Browse files
authored
test(core/protocols): add shape serde perf baselines (#7479)
* test(core/protocols): add shape serde perf baselines * test(core/protocols): add experimental single-pass json serializer
1 parent 065860a commit 40bc600

File tree

8 files changed

+476
-39
lines changed

8 files changed

+476
-39
lines changed
Lines changed: 72 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,72 @@
1+
import { CborCodec } from "@smithy/core/cbor";
2+
import { describe, test as it } from "vitest";
3+
4+
import { createNestingWidget, nestingWidget } from "../test-schema.spec";
5+
6+
describe("performance baseline indicator", () => {
7+
const codec = new CborCodec();
8+
const serializer = codec.createSerializer();
9+
const deserializer = codec.createDeserializer();
10+
11+
it("should serialize objects", () => {
12+
const timings: string[] = [];
13+
const objects = [];
14+
15+
// warmup
16+
for (let i = 0; i < 13; ++i) {
17+
const o = createNestingWidget(2 ** i);
18+
objects.push(o);
19+
serializer.write(nestingWidget, o);
20+
serializer.flush();
21+
}
22+
23+
for (let i = 0; i < objects.length; ++i) {
24+
const o = objects[i];
25+
26+
const A = performance.now();
27+
serializer.write(nestingWidget, o);
28+
const serialization = serializer.flush();
29+
const B = performance.now();
30+
31+
timings.push(
32+
`${B - A} (byte length = ${serialization.byteLength}, ${serialization.byteLength / 1024 / (B - A)} kb/ms)`
33+
);
34+
}
35+
36+
/**
37+
* No assertion here.
38+
* In the initial dual-pass implementation,
39+
* par time is 0 to 23ms for up to 381014 bytes of CBOR. Up to 15 kb/ms. (kuhe's computer)
40+
*/
41+
console.log("CborShapeSerializer performance timings", timings);
42+
});
43+
44+
it("should deserialize bytes", async () => {
45+
const timings: string[] = [];
46+
const strings = [];
47+
48+
// warmup
49+
for (let i = 0; i < 12; ++i) {
50+
const o = createNestingWidget(2 ** i);
51+
serializer.write(nestingWidget, o);
52+
const json = serializer.flush();
53+
strings.push(json);
54+
await deserializer.read(nestingWidget, json);
55+
}
56+
57+
for (const s of strings) {
58+
const A = performance.now();
59+
await deserializer.read(nestingWidget, s);
60+
const B = performance.now();
61+
62+
timings.push(`${B - A} (byte length = ${s.byteLength}, ${s.byteLength / 1024 / (B - A)} kb/ms)`);
63+
}
64+
65+
/**
66+
* No assertion here.
67+
* In the initial dual-pass implementation,
68+
* par time is 0 to 9ms for up to 190550 bytes of CBOR. Up to 23 kb/ms. (kuhe's computer)
69+
*/
70+
console.log("CborShapeDeserializer performance timings", timings);
71+
});
72+
}, 30_000);

packages/core/src/submodules/protocols/json/JsonShapeDeserializer.spec.ts

Lines changed: 43 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -2,8 +2,9 @@ import { NumericValue } from "@smithy/core/serde";
22
import type { TimestampEpochSecondsSchema } from "@smithy/types";
33
import { describe, expect, test as it } from "vitest";
44

5-
import { widget } from "../test-schema.spec";
5+
import { createNestingWidget, nestingWidget, widget } from "../test-schema.spec";
66
import { JsonShapeDeserializer } from "./JsonShapeDeserializer";
7+
import { JsonShapeSerializer } from "./JsonShapeSerializer";
78

89
describe(JsonShapeDeserializer.name, () => {
910
let contextSourceAvailable = false;
@@ -153,4 +154,45 @@ describe(JsonShapeDeserializer.name, () => {
153154
expect(await deserializer.read(widget, JSON.stringify({ scalar: "-Infinity" }))).toEqual({ scalar: -Infinity });
154155
expect(await deserializer.read(widget, JSON.stringify({ scalar: "NaN" }))).toEqual({ scalar: NaN });
155156
});
157+
158+
describe("performance baseline indicator", () => {
159+
const serializer = new JsonShapeSerializer({
160+
jsonName: true,
161+
timestampFormat: { default: 7 satisfies TimestampEpochSecondsSchema, useTrait: true },
162+
});
163+
serializer.setSerdeContext({
164+
base64Encoder: (input: Uint8Array) => {
165+
return Buffer.from(input).toString("base64");
166+
},
167+
} as any);
168+
169+
it("should deserialize JSON strings", async () => {
170+
const timings: string[] = [];
171+
const strings = [];
172+
173+
// warmup
174+
for (let i = 0; i < 12; ++i) {
175+
const o = createNestingWidget(2 ** i);
176+
serializer.write(nestingWidget, o);
177+
const json = serializer.flush();
178+
strings.push(json);
179+
await deserializer.read(nestingWidget, json);
180+
}
181+
182+
for (const s of strings) {
183+
const A = performance.now();
184+
await deserializer.read(nestingWidget, s);
185+
const B = performance.now();
186+
187+
timings.push(`${B - A} (JSON length = ${s.length}, ${s.length / 1024 / (B - A)} kb/ms)`);
188+
}
189+
190+
/**
191+
* No assertion here.
192+
* In the initial dual-pass implementation,
193+
* par time is 0 to 25ms for up to 288899 chars of JSON. Up to 13 kb/ms. (kuhe's computer)
194+
*/
195+
console.log("JsonShapeDeserializer performance timings", timings);
196+
});
197+
}, 30_000);
156198
});

packages/core/src/submodules/protocols/json/JsonShapeSerializer.spec.ts

Lines changed: 57 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -2,31 +2,75 @@ import { NumericValue } from "@smithy/core/serde";
22
import type { TimestampEpochSecondsSchema } from "@smithy/types";
33
import { describe, expect, test as it } from "vitest";
44

5-
import { widget } from "../test-schema.spec";
5+
import { createNestingWidget, nestingWidget, widget } from "../test-schema.spec";
6+
import { SinglePassJsonShapeSerializer } from "./experimental/SinglePassJsonShapeSerializer";
67
import { JsonShapeSerializer } from "./JsonShapeSerializer";
78

89
describe(JsonShapeSerializer.name, () => {
9-
it("serializes data to JSON", async () => {
10-
const serializer = new JsonShapeSerializer({
11-
jsonName: true,
12-
timestampFormat: { default: 7 satisfies TimestampEpochSecondsSchema, useTrait: true },
13-
});
14-
serializer.setSerdeContext({
15-
base64Encoder: (input: Uint8Array) => {
16-
return Buffer.from(input).toString("base64");
17-
},
18-
} as any);
10+
const serializer1 = new JsonShapeSerializer({
11+
jsonName: true,
12+
timestampFormat: { default: 7 satisfies TimestampEpochSecondsSchema, useTrait: true },
13+
});
14+
15+
const serializer2 = new SinglePassJsonShapeSerializer({
16+
jsonName: true,
17+
timestampFormat: { default: 7 satisfies TimestampEpochSecondsSchema, useTrait: true },
18+
});
1919

20+
it("serializes data to JSON", async () => {
2021
const data = {
2122
timestamp: new Date(0),
2223
bigint: 10000000000000000000000054321n,
2324
bigdecimal: new NumericValue("0.10000000000000000000000054321", "bigDecimal"),
2425
blob: new Uint8Array([0, 0, 0, 1]),
2526
};
26-
serializer.write(widget, data);
27-
const serialization = serializer.flush();
27+
serializer1.write(widget, data);
28+
const serialization = serializer1.flush();
2829
expect(serialization).toEqual(
2930
`{"blob":"AAAAAQ==","timestamp":0,"bigint":10000000000000000000000054321,"bigdecimal":0.10000000000000000000000054321}`
3031
);
3132
});
33+
34+
describe("performance baseline indicator", () => {
35+
for (const serializer of [serializer1, serializer2]) {
36+
it("should serialize objects", () => {
37+
const timings: string[] = [];
38+
const objects = [];
39+
40+
// warmup
41+
for (let i = 0; i < 12; ++i) {
42+
const o = createNestingWidget(2 ** i);
43+
objects.push(o);
44+
serializer.write(nestingWidget, o);
45+
serializer.write(nestingWidget, o);
46+
serializer.write(nestingWidget, o);
47+
serializer.write(nestingWidget, o);
48+
serializer.flush();
49+
}
50+
51+
for (let i = 0; i < objects.length; ++i) {
52+
const o = objects[i];
53+
54+
const A = performance.now();
55+
serializer.write(nestingWidget, o);
56+
const serialization = serializer.flush();
57+
const B = performance.now();
58+
59+
timings.push(
60+
`${B - A} (JSON length = ${serialization.length}, ${serialization.length / 1024 / (B - A)} kb/ms)`
61+
);
62+
}
63+
64+
/**
65+
* No assertion here.
66+
* In the initial dual-pass implementation,
67+
* par time is 0 to 30ms for up to 288899 chars of JSON. Up to 11 kb/ms. (kuhe's computer)
68+
*
69+
* In the single-pass implementation using string buildup,
70+
* par time is 0 to 51ms for up to 288899 chars of JSON. Up to 13 kb/ms. (kuhe's computer)
71+
*/
72+
console.log(`${serializer.constructor.name} performance timings`, timings);
73+
});
74+
}
75+
}, 30_000);
3276
});

packages/core/src/submodules/protocols/json/JsonShapeSerializer.ts

Lines changed: 1 addition & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -102,10 +102,7 @@ export class JsonShapeSerializer extends SerdeContextConfig implements ShapeSeri
102102
if (ns === this.rootSchema) {
103103
return value;
104104
}
105-
if (!this.serdeContext?.base64Encoder) {
106-
return toBase64(value);
107-
}
108-
return this.serdeContext?.base64Encoder(value);
105+
return (this.serdeContext?.base64Encoder ?? toBase64)(value);
109106
}
110107

111108
if ((ns.isTimestampSchema() || ns.isDocumentSchema()) && value instanceof Date) {
Lines changed: 168 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,168 @@
1+
import { determineTimestampFormat } from "@smithy/core/protocols";
2+
import { NormalizedSchema } from "@smithy/core/schema";
3+
import { dateToUtcString, generateIdempotencyToken, LazyJsonString, NumericValue } from "@smithy/core/serde";
4+
import type {
5+
Schema,
6+
ShapeSerializer,
7+
TimestampDateTimeSchema,
8+
TimestampEpochSecondsSchema,
9+
TimestampHttpDateSchema,
10+
} from "@smithy/types";
11+
import { toBase64 } from "@smithy/util-base64";
12+
13+
import { SerdeContextConfig } from "../../ConfigurableSerdeContext";
14+
import type { JsonSettings } from "../JsonCodec";
15+
16+
/**
17+
* This implementation uses single-pass JSON serialization with JS code instead of
18+
* JSON.stringify.
19+
*
20+
* It isn't significantly faster than dual-pass ending with native JSON.stringify
21+
* that I would want to use it. It seems to be barely faster in some mid-range object
22+
* sizes but slower on the high end.
23+
*
24+
* @internal
25+
*/
26+
export class SinglePassJsonShapeSerializer extends SerdeContextConfig implements ShapeSerializer<string> {
27+
private buffer: any;
28+
private rootSchema: NormalizedSchema | undefined;
29+
30+
public constructor(public readonly settings: JsonSettings) {
31+
super();
32+
}
33+
34+
public write(schema: Schema, value: unknown): void {
35+
this.rootSchema = NormalizedSchema.of(schema);
36+
this.buffer = this.writeObject(this.rootSchema, value);
37+
}
38+
39+
/**
40+
* @internal
41+
*/
42+
public writeDiscriminatedDocument(schema: Schema, value: unknown): void {
43+
this.write(schema, value);
44+
if (typeof this.buffer === "object") {
45+
this.buffer.__type = NormalizedSchema.of(schema).getName(true);
46+
}
47+
}
48+
49+
public flush(): string {
50+
this.rootSchema = undefined;
51+
52+
return this.buffer;
53+
}
54+
55+
private writeObject(schema: Schema, value: unknown): string {
56+
if (value == undefined) {
57+
return "";
58+
}
59+
60+
let b = "";
61+
const ns = NormalizedSchema.of(schema);
62+
const sparse = !!ns.getMergedTraits().sparse;
63+
64+
if (Array.isArray(value) && (ns.isDocumentSchema() || ns.isListSchema())) {
65+
b += "[";
66+
for (let i = 0; i < value.length; ++i) {
67+
const item = value[i];
68+
if (item != null || sparse) {
69+
b += this.writeValue(ns.getValueSchema(), item);
70+
b += ",";
71+
}
72+
}
73+
} else if (ns.isStructSchema()) {
74+
b += "{";
75+
for (const [name, member] of ns.structIterator()) {
76+
const item = (value as any)[name];
77+
const targetKey = this.settings.jsonName ? member.getMergedTraits().jsonName ?? name : name;
78+
const serializableValue = this.writeValue(member, item);
79+
if (item != null || member.isIdempotencyToken()) {
80+
b += `"${targetKey}":${serializableValue}`;
81+
b += ",";
82+
}
83+
}
84+
} else if (ns.isMapSchema() || ns.isDocumentSchema()) {
85+
b += "{";
86+
for (const [k, v] of Object.entries(value)) {
87+
if (v != null || sparse) {
88+
b += `"${k}":${this.writeValue(ns, v)}`;
89+
b += ",";
90+
}
91+
}
92+
}
93+
94+
if (b[b.length - 1] === ",") {
95+
b = b.slice(0, -1);
96+
}
97+
if (b[0] === "[") {
98+
b += "]";
99+
}
100+
if (b[0] === "{") {
101+
b += "}";
102+
}
103+
return b;
104+
}
105+
106+
private writeValue(schema: Schema, value: unknown): string {
107+
const isObject = value !== null && typeof value === "object";
108+
109+
const ns = NormalizedSchema.of(schema);
110+
const quote = (_: string) => `"${_}"`;
111+
112+
if (
113+
(ns.isBlobSchema() && (value instanceof Uint8Array || typeof value === "string")) ||
114+
(ns.isDocumentSchema() && value instanceof Uint8Array)
115+
) {
116+
return quote((this.serdeContext?.base64Encoder ?? toBase64)(value));
117+
}
118+
119+
if ((ns.isTimestampSchema() || ns.isDocumentSchema()) && value instanceof Date) {
120+
const format = determineTimestampFormat(ns, this.settings);
121+
switch (format) {
122+
case 5 satisfies TimestampDateTimeSchema:
123+
return quote(value.toISOString().replace(".000Z", "Z"));
124+
case 6 satisfies TimestampHttpDateSchema:
125+
return quote(dateToUtcString(value));
126+
case 7 satisfies TimestampEpochSecondsSchema:
127+
return String(value.getTime() / 1000);
128+
default:
129+
console.warn("Missing timestamp format, using epoch seconds", value);
130+
return String(value.getTime() / 1000);
131+
}
132+
}
133+
134+
if (ns.isNumericSchema() && typeof value === "number") {
135+
if (Math.abs(value) === Infinity || isNaN(value)) {
136+
return quote(String(value));
137+
}
138+
}
139+
140+
if (ns.isStringSchema()) {
141+
if (typeof value === "undefined" && ns.isIdempotencyToken()) {
142+
return quote(generateIdempotencyToken());
143+
}
144+
145+
if (typeof value === "string") {
146+
const mediaType = ns.getMergedTraits().mediaType;
147+
148+
if (mediaType) {
149+
const isJson = mediaType === "application/json" || mediaType.endsWith("+json");
150+
if (isJson) {
151+
return quote(LazyJsonString.from(value).toString());
152+
}
153+
}
154+
}
155+
}
156+
157+
if (value instanceof NumericValue) {
158+
// ns can be BigDecimal or Document.
159+
return value.string;
160+
}
161+
162+
if (isObject) {
163+
return this.writeObject(ns, value);
164+
}
165+
166+
return typeof value === "string" ? quote(value) : String(value);
167+
}
168+
}

0 commit comments

Comments
 (0)