Skip to content

Commit d6f2959

Browse files
committed
perf: 9x AI code optimization
1 parent 4c7faf4 commit d6f2959

File tree

4 files changed

+73
-35
lines changed

4 files changed

+73
-35
lines changed

package.json

Lines changed: 1 addition & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -50,8 +50,7 @@
5050
"string-width": "4.2.3"
5151
},
5252
"dependencies": {
53-
"runes": "0.4.3",
54-
"string-length": "6.0.0"
53+
"runes": "0.4.3"
5554
},
5655
"devDependencies": {
5756
"@shelf/eslint-config": "5.2.3",

readme.md

Lines changed: 25 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -37,52 +37,62 @@ Running "Without Unicode" suite...
3737
Progress: 100%
3838
3939
~33 kb split by 2 kb:
40-
14 255 401 ops/s, ±0.33% | 83.34% slower
40+
14 106 903 ops/s, ±1.71% | 86.19% slower
4141
4242
~33 kb split by 1 mb:
43-
85 581 562 ops/s, ±1.89% | fastest
43+
100 461 043 ops/s, ±1.45% | 1.63% slower
4444
4545
~330 kb split by 2 kb:
46-
1 612 589 ops/s, ±0.83% | 98.12% slower
46+
1 600 485 ops/s, ±0.63% | 98.43% slower
4747
4848
~330 kb split by 1 mb:
49-
84 876 970 ops/s, ±1.98% | 0.82% slower
49+
102 125 168 ops/s, ±1.50% | fastest
5050
5151
~3.3 mb split by 2 kb:
52-
165 944 ops/s, ±0.62% | 99.81% slower
52+
161 507 ops/s, ±1.19% | 99.84% slower
5353
5454
~3.3 mb split by 1 mb:
55-
40 975 330 ops/s, ±1.19% | 52.12% slower
55+
41 773 807 ops/s, ±1.54% | 59.1% slower
5656
5757
~33 mb split by 2 kb:
58-
11 643 ops/s, ±0.50% | slowest, 99.99% slower
58+
11 098 ops/s, ±0.25% | slowest, 99.99% slower
5959
6060
~33 mb split by 1 mb:
61-
5 444 259 ops/s, ±0.78% | 93.64% slower
61+
5 506 349 ops/s, ±0.58% | 94.61% slower
6262
6363
Finished 8 cases!
64-
Fastest: ~33 kb split by 1 mb
64+
Fastest: ~330 kb split by 1 mb
6565
Slowest: ~33 mb split by 2 kb
6666
Running "Unicode Aware" suite...
6767
Progress: 100%
6868
6969
~33 kb split by 2 kb with unicodeAware:
70-
101.4 ops/s, ±0.95% | fastest
70+
847 ops/s, ±0.99% | 12.14% slower
7171
7272
~33 kb split by 1 mb with unicodeAware:
73-
99.7 ops/s, ±0.84% | 1.68% slower
73+
964 ops/s, ±0.25% | fastest
7474
7575
~330 kb split by 2 kb with unicodeAware:
76-
10.2 ops/s, ±0.51% | 89.94% slower
76+
71 ops/s, ±0.76% | slowest, 92.63% slower
7777
7878
~330 kb split by 1 mb with unicodeAware:
79-
10 ops/s, ±0.55% | slowest, 90.14% slower
79+
90 ops/s, ±0.94% | 90.66% slower
8080
8181
Finished 4 cases!
82-
Fastest: ~33 kb split by 2 kb with unicodeAware
83-
Slowest: ~330 kb split by 1 mb with unicodeAware
82+
Fastest: ~33 kb split by 1 mb with unicodeAware
83+
Slowest: ~330 kb split by 2 kb with unicodeAware
8484
```
8585

86+
## Recent optimizations — September 2025
87+
88+
September 2025 improvements were delivered autonomously by the gpt-5-codex model. We treated the hot paths like any latency-sensitive service and tuned the slowest sections:
89+
90+
- Single-pass unicode chunking – length and slicing now come from the same `runes()` walk, eliminating the extra `string-length` scan and keeping multicodepoint graphemes intact.
91+
- Consolidated ASCII loop – collapsed the fast path into one traversal with early exits for empty inputs and oversized chunk sizes to trim per-call overhead.
92+
- Fractional-size parity – restored the legacy `slice` coercion semantics so non-integer chunk sizes behave exactly as before, backed by new regression tests.
93+
94+
The result is steadier throughput in the ASCII suite (for example ~33 kb split by 1 mb climbs from 85.6M to 100.5M ops/s) and a 9–10× lift in the unicode-aware scenarios (e.g. 33 kb splits rise from ~101 ops/s to ~964 ops/s) while preserving behaviour for combining marks and emoji ligatures.
95+
8696
## See Also
8797

8898
- [fast-normalize-spaces](https://github.com/shelfio/fast-normalize-spaces)

src/index.test.ts

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -37,3 +37,7 @@ it('should split emojis correctly w/ useByteLength option', () => {
3737
it('should split emojis correctly w/ useByteLength option for odd chunk length', () => {
3838
expect(fastChunkString('😀😃😄', {size: 2, unicodeAware: true})).toEqual(['😀😃', '😄']);
3939
});
40+
41+
it('should coerce fractional unicode chunk sizes like slice does', () => {
42+
expect(fastChunkString('abcdef', {size: 2.5, unicodeAware: true})).toEqual(['ab', 'cde', 'f']);
43+
});

src/index.ts

Lines changed: 43 additions & 18 deletions
Original file line numberDiff line numberDiff line change
@@ -1,8 +1,7 @@
11
import runes from 'runes';
2-
import stringLength from 'string-length';
32

43
function fastChunkString(
5-
str: string,
4+
original: string,
65
{
76
size,
87
unicodeAware = false,
@@ -11,7 +10,12 @@ function fastChunkString(
1110
unicodeAware?: boolean;
1211
},
1312
): string[] {
14-
str = str || '';
13+
const str = original || '';
14+
15+
if (str.length === 0) {
16+
return [];
17+
}
18+
1519
if (!unicodeAware) {
1620
return getChunks(str, size);
1721
}
@@ -20,31 +24,52 @@ function fastChunkString(
2024
}
2125

2226
function getChunks(str: string, size: number): string[] {
23-
const strLength: number = str.length;
24-
const numChunks: number = Math.ceil(strLength / size);
25-
const chunks: string[] = new Array(numChunks);
27+
const strLength = str.length;
28+
29+
if (strLength === 0) {
30+
return [];
31+
}
32+
33+
if (size >= strLength) {
34+
return [str];
35+
}
2636

27-
let i = 0;
28-
let o = 0;
37+
const numChunks = Math.ceil(strLength / size);
38+
const chunks = new Array<string>(numChunks);
2939

30-
for (; i < numChunks; ++i, o += size) {
31-
chunks[i] = str.substr(o, size);
40+
for (let index = 0, offset = 0; index < numChunks; index += 1, offset += size) {
41+
chunks[index] = str.substr(offset, size);
3242
}
3343

3444
return chunks;
3545
}
3646

3747
function getChunksUnicode(str: string, size: number): string[] {
38-
const strLength: number = stringLength(str);
39-
const numChunks: number = Math.ceil(strLength / size);
40-
const chunks: string[] = new Array(numChunks);
48+
const runeChars = runes(str);
49+
const runeCount = runeChars.length;
4150

42-
let i = 0;
43-
let o = 0;
51+
if (runeCount === 0) {
52+
return [];
53+
}
4454

45-
const runeChars = runes(str);
46-
for (; i < numChunks; ++i, o += size) {
47-
chunks[i] = runeChars.slice(o, o + size).join('');
55+
if (size >= runeCount) {
56+
return [str];
57+
}
58+
59+
const numChunks = Math.ceil(runeCount / size);
60+
const chunks = new Array<string>(numChunks);
61+
62+
for (let index = 0, offset = 0; index < numChunks; index += 1, offset += size) {
63+
const start = Math.min(Math.floor(offset), runeCount);
64+
const end = Math.min(Math.floor(offset + size), runeCount);
65+
66+
let chunk = '';
67+
68+
for (let i = start; i < end; i += 1) {
69+
chunk += runeChars[i];
70+
}
71+
72+
chunks[index] = chunk;
4873
}
4974

5075
return chunks;

0 commit comments

Comments
 (0)