Skip to content

Commit 2e1ed11

Browse files
xet chunker is created for each file instead of reused (#1738)
We need to create a fresh Chunker object for each file we process to avoid the last bytes from the previous file to impact the hash used to generate chunk boundaries. This gets ride of the `try {} finally {chunker.free()}` at the end of file processing so this causes a lot of code to be de-indented, changes are pretty minimal. Co-authored-by: coyotte508 <coyotte508@protonmail.com>
1 parent 1aea2b9 commit 2e1ed11

File tree

1 file changed

+16
-16
lines changed

1 file changed

+16
-16
lines changed

packages/hub/src/utils/createXorbs.ts

Lines changed: 16 additions & 16 deletions
Original file line numberDiff line numberDiff line change
@@ -112,7 +112,6 @@ export async function* createXorbs(
112112
let xorbId = 0;
113113

114114
await chunkModule.init();
115-
const chunker = new chunkModule.Chunker(TARGET_CHUNK_SIZE);
116115
const chunkCache = new ChunkCache();
117116
let xorb = new CurrentXorbInfo();
118117

@@ -147,8 +146,9 @@ export async function* createXorbs(
147146

148147
const remoteXorbHashes: string[] = [""]; // starts at index 1 (to simplify implem a bit)
149148

150-
try {
151-
for await (const fileSource of fileSources) {
149+
for await (const fileSource of fileSources) {
150+
const chunker = new chunkModule.Chunker(TARGET_CHUNK_SIZE);
151+
try {
152152
xorb.fileSize[fileSource.path] = fileSource.content.size;
153153

154154
// Load dedup info for the first chunk of the file, if it's potentially modified by the splice
@@ -342,22 +342,22 @@ export async function* createXorbs(
342342
dedupRatio,
343343
representation: fileRepresentation,
344344
});
345+
} finally {
346+
chunker.free();
347+
// ^ is this really needed ?
345348
}
349+
}
346350

347-
if (xorb.offset > 0) {
348-
yield xorb.event(chunkModule.compute_xorb_hash.bind(chunkModule));
349-
}
351+
if (xorb.offset > 0) {
352+
yield xorb.event(chunkModule.compute_xorb_hash.bind(chunkModule));
353+
}
350354

351-
for (const event of pendingFileEvents) {
352-
event.representation = event.representation.map((rep) => ({
353-
...rep,
354-
xorbId: (rep.xorbId as number) >= 0 ? rep.xorbId : remoteXorbHashes[-rep.xorbId],
355-
}));
356-
yield event;
357-
}
358-
} finally {
359-
chunker.free();
360-
// ^ is this really needed ?
355+
for (const event of pendingFileEvents) {
356+
event.representation = event.representation.map((rep) => ({
357+
...rep,
358+
xorbId: (rep.xorbId as number) >= 0 ? rep.xorbId : remoteXorbHashes[-rep.xorbId],
359+
}));
360+
yield event;
361361
}
362362
}
363363

0 commit comments

Comments
 (0)