@@ -2,14 +2,16 @@ import { uploadShards } from "../src/utils/uploadShards.js";
22import { sha256 } from "../src/utils/sha256.js" ;
33import { parseArgs } from "node:util" ;
44import { tmpdir } from "node:os" ;
5- import { join } from "node:path" ;
5+ import path , { join } from "node:path" ;
66import { writeFile , readFile , stat , mkdir } from "node:fs/promises" ;
77import type { RepoId } from "../src/types/public.js" ;
88import { toRepoId } from "../src/utils/toRepoId.js" ;
99import type { CommitOperation } from "../src/index.js" ;
1010import { commitIter , downloadFile } from "../src/index.js" ;
1111import { SplicedBlob } from "../src/utils/SplicedBlob.js" ;
1212import { pathToFileURL } from "node:url" ;
13+ import { existsSync } from "node:fs" ;
14+ import { FileBlob } from "../src/utils/FileBlob.js" ;
1315
1416/**
1517 * This script downloads the files from openai-community/gpt2 and simulates an upload to a xet repo.
@@ -37,6 +39,11 @@ const FILES_TO_DOWNLOAD = [
3739 filename : "64.tflite" ,
3840 sha256 : "cfcd510b239d90b71ee87d4e57a5a8c2d55b2a941e5d9fe5852298268ddbe61b" ,
3941 } ,
42+ {
43+ url : "https://huggingface.co/openai-community/gpt2/resolve/main/model.safetensors?download=true" ,
44+ filename : "model.safetensors" ,
45+ sha256 : "248dfc3911869ec493c76e65bf2fcf7f615828b0254c12b473182f0f81d3a707" ,
46+ } ,
4047] ;
4148
4249const FILES_TO_EDIT = [
@@ -84,8 +91,7 @@ async function* createFileSource(files: Array<{ filepath: string; filename: stri
8491} > {
8592 for ( const file of files ) {
8693 console . log ( `Processing ${ file . filename } ...` ) ;
87- const buffer = await readFile ( file . filepath ) ;
88- let blob = new Blob ( [ buffer ] ) ;
94+ let blob : Blob = await FileBlob . create ( file . filepath ) ;
8995
9096 if ( file . filename . endsWith ( ".edited" ) ) {
9197 const edits = FILES_TO_EDIT . find ( ( f ) => f . filename === file . filename ) ?. edits ;
@@ -110,7 +116,7 @@ async function* createFileSource(files: Array<{ filepath: string; filename: stri
110116 const sha256ToCheck =
111117 FILES_TO_DOWNLOAD . find ( ( f ) => f . filename === file . filename ) ?. sha256 ||
112118 FILES_TO_EDIT . find ( ( f ) => f . filename === file . filename ) ?. sha256 ;
113- if ( sha256Hash !== sha256ToCheck ) {
119+ if ( sha256ToCheck !== undefined && sha256Hash !== sha256ToCheck ) {
114120 throw new Error ( `SHA256 mismatch for ${ file . filename } : ${ sha256Hash } !== ${ sha256ToCheck } ` ) ;
115121 }
116122
@@ -214,6 +220,10 @@ async function main() {
214220 short : "c" ,
215221 default : false ,
216222 } ,
223+ localFilePath : {
224+ type : "string" ,
225+ short : "f" ,
226+ } ,
217227 write : {
218228 type : "boolean" ,
219229 short : "w" ,
@@ -250,6 +260,13 @@ async function main() {
250260 files . push ( { filepath, filename : fileInfo . filename } ) ;
251261 }
252262
263+ if ( args . localFilePath ) {
264+ if ( ! existsSync ( args . localFilePath ) ) {
265+ throw new Error ( `Local file ${ args . localFilePath } does not exist` ) ;
266+ }
267+ files . push ( { filepath : args . localFilePath , filename : path . basename ( args . localFilePath ) } ) ;
268+ }
269+
253270 // Parse repo
254271 const repoName = args . repo ;
255272
@@ -279,7 +296,20 @@ async function main() {
279296 // Process files through uploadShards
280297 const fileSource = createFileSource ( files ) ;
281298
282- for await ( const event of uploadShards ( fileSource , uploadParams ) ) {
299+ const fileProgress : Record < string , number > = { } ;
300+
301+ for await ( const event of uploadShards ( fileSource , {
302+ ...uploadParams ,
303+ yieldCallback : ( event ) => {
304+ if ( ! fileProgress [ event . path ] ) {
305+ fileProgress [ event . path ] = event . progress ;
306+ }
307+ if ( event . progress < fileProgress [ event . path ] ) {
308+ throw new Error ( `Progress for ${ event . path } went down from ${ fileProgress [ event . path ] } to ${ event . progress } ` ) ;
309+ }
310+ fileProgress [ event . path ] = event . progress ;
311+ } ,
312+ } ) ) {
283313 switch ( event . event ) {
284314 case "file" : {
285315 console . log ( `\n📁 Processed file: ${ event . path } ` ) ;
@@ -303,6 +333,14 @@ async function main() {
303333 case "fileProgress" : {
304334 const progress = ( event . progress * 100 ) . toFixed ( 1 ) ;
305335 console . log ( ` 📈 Progress for ${ event . path } : ${ progress } %` ) ;
336+
337+ if ( ! fileProgress [ event . path ] ) {
338+ fileProgress [ event . path ] = event . progress ;
339+ }
340+ if ( event . progress < fileProgress [ event . path ] ) {
341+ throw new Error ( `Progress for ${ event . path } went down from ${ fileProgress [ event . path ] } to ${ event . progress } ` ) ;
342+ }
343+ fileProgress [ event . path ] = event . progress ;
306344 break ;
307345 }
308346 }
0 commit comments