@@ -12,6 +12,7 @@ import { fixedSize } from 'ipfs-unixfs-importer/chunker'
1212import { balanced , type FileLayout , flat , trickle } from 'ipfs-unixfs-importer/layout'
1313import all from 'it-all'
1414import randomBytes from 'it-buffer-stream'
15+ import drain from 'it-drain'
1516import first from 'it-first'
1617import last from 'it-last'
1718import toBuffer from 'it-to-buffer'
@@ -20,6 +21,7 @@ import * as raw from 'multiformats/codecs/raw'
2021import { identity } from 'multiformats/hashes/identity'
2122import { sha256 } from 'multiformats/hashes/sha2'
2223import { Readable } from 'readable-stream'
24+ import Sinon from 'sinon'
2325import { concat as uint8ArrayConcat } from 'uint8arrays/concat'
2426import { fromString as uint8ArrayFromString } from 'uint8arrays/from-string'
2527import { toString as uint8ArrayToString } from 'uint8arrays/to-string'
@@ -1343,4 +1345,228 @@ describe('exporter', () => {
13431345 dataSizeInBytes *= 10
13441346 }
13451347 } )
1348+
1349+ it ( 'should allow control of block read concurrency for files' , async ( ) => {
1350+ // create a multi-layered DAG of a manageable size
1351+ const imported = await first ( importer ( [ {
1352+ path : '1.2MiB.txt' ,
1353+ content : asAsyncIterable ( smallFile )
1354+ } ] , block , {
1355+ rawLeaves : true ,
1356+ chunker : fixedSize ( { chunkSize : 50 } ) ,
1357+ layout : balanced ( { maxChildrenPerNode : 2 } )
1358+ } ) )
1359+
1360+ if ( imported == null ) {
1361+ throw new Error ( 'Nothing imported' )
1362+ }
1363+
1364+ const node = dagPb . decode ( await block . get ( imported . cid ) )
1365+ expect ( node . Links ) . to . have . lengthOf ( 2 , 'imported node had too many children' )
1366+
1367+ const child1 = dagPb . decode ( await block . get ( node . Links [ 0 ] . Hash ) )
1368+ expect ( child1 . Links ) . to . have . lengthOf ( 2 , 'layer 1 node had too many children' )
1369+
1370+ const child2 = dagPb . decode ( await block . get ( node . Links [ 1 ] . Hash ) )
1371+ expect ( child2 . Links ) . to . have . lengthOf ( 2 , 'layer 1 node had too many children' )
1372+
1373+ // should be raw nodes
1374+ expect ( child1 . Links [ 0 ] . Hash . code ) . to . equal ( raw . code , 'layer 2 node had wrong codec' )
1375+ expect ( child1 . Links [ 1 ] . Hash . code ) . to . equal ( raw . code , 'layer 2 node had wrong codec' )
1376+ expect ( child2 . Links [ 0 ] . Hash . code ) . to . equal ( raw . code , 'layer 2 node had wrong codec' )
1377+ expect ( child2 . Links [ 1 ] . Hash . code ) . to . equal ( raw . code , 'layer 2 node had wrong codec' )
1378+
1379+ // export file
1380+ const file = await exporter ( imported . cid , block )
1381+
1382+ // export file data with default settings
1383+ const blockReadSpy = Sinon . spy ( block , 'get' )
1384+ const contentWithDefaultBlockConcurrency = await toBuffer ( file . content ( ) )
1385+
1386+ // blocks should be loaded in default order - a whole level of sibling nodes at a time
1387+ expect ( blockReadSpy . getCalls ( ) . map ( call => call . args [ 0 ] . toString ( ) ) ) . to . deep . equal ( [
1388+ node . Links [ 0 ] . Hash . toString ( ) ,
1389+ node . Links [ 1 ] . Hash . toString ( ) ,
1390+ child1 . Links [ 0 ] . Hash . toString ( ) ,
1391+ child1 . Links [ 1 ] . Hash . toString ( ) ,
1392+ child2 . Links [ 0 ] . Hash . toString ( ) ,
1393+ child2 . Links [ 1 ] . Hash . toString ( )
1394+ ] )
1395+
1396+ // export file data overriding read concurrency
1397+ blockReadSpy . resetHistory ( )
1398+ const contentWitSmallBlockConcurrency = await toBuffer ( file . content ( {
1399+ blockReadConcurrency : 1
1400+ } ) )
1401+
1402+ // blocks should be loaded in traversal order
1403+ expect ( blockReadSpy . getCalls ( ) . map ( call => call . args [ 0 ] . toString ( ) ) ) . to . deep . equal ( [
1404+ node . Links [ 0 ] . Hash . toString ( ) ,
1405+ child1 . Links [ 0 ] . Hash . toString ( ) ,
1406+ child1 . Links [ 1 ] . Hash . toString ( ) ,
1407+ node . Links [ 1 ] . Hash . toString ( ) ,
1408+ child2 . Links [ 0 ] . Hash . toString ( ) ,
1409+ child2 . Links [ 1 ] . Hash . toString ( )
1410+ ] )
1411+
1412+ // ensure exported bytes are the same
1413+ expect ( contentWithDefaultBlockConcurrency ) . to . equalBytes ( contentWitSmallBlockConcurrency )
1414+ } )
1415+
1416+ it ( 'should allow control of block read concurrency for directories' , async ( ) => {
1417+ const entries = 1024
1418+
1419+ // create a largeish directory
1420+ const imported = await last ( importer ( ( async function * ( ) {
1421+ for ( let i = 0 ; i < entries ; i ++ ) {
1422+ yield {
1423+ path : `file-${ i } .txt` ,
1424+ content : Uint8Array . from ( [ i ] )
1425+ }
1426+ }
1427+ } ) ( ) , block , {
1428+ wrapWithDirectory : true
1429+ } ) )
1430+
1431+ if ( imported == null ) {
1432+ throw new Error ( 'Nothing imported' )
1433+ }
1434+
1435+ const node = dagPb . decode ( await block . get ( imported . cid ) )
1436+ expect ( node . Links ) . to . have . lengthOf ( entries , 'imported node had too many children' )
1437+
1438+ for ( const link of node . Links ) {
1439+ // should be raw nodes
1440+ expect ( link . Hash . code ) . to . equal ( raw . code , 'child node had wrong codec' )
1441+ }
1442+
1443+ // export directory
1444+ const directory = await exporter ( imported . cid , block )
1445+
1446+ // export file data with default settings
1447+ const originalGet = block . get . bind ( block )
1448+
1449+ const expectedInvocations : string [ ] = [ ]
1450+
1451+ for ( const link of node . Links ) {
1452+ expectedInvocations . push ( `${ link . Hash . toString ( ) } -start` )
1453+ expectedInvocations . push ( `${ link . Hash . toString ( ) } -end` )
1454+ }
1455+
1456+ const actualInvocations : string [ ] = [ ]
1457+
1458+ block . get = async ( cid ) => {
1459+ actualInvocations . push ( `${ cid . toString ( ) } -start` )
1460+
1461+ // introduce a small delay - if running in parallel actualInvocations will
1462+ // be:
1463+ // `foo-start`, `bar-start`, `baz-start`, `foo-end`, `bar-end`, `baz-end`
1464+ // if in series it will be:
1465+ // `foo-start`, `foo-end`, `bar-start`, `bar-end`, `baz-start`, `baz-end`
1466+ await delay ( 1 )
1467+
1468+ actualInvocations . push ( `${ cid . toString ( ) } -end` )
1469+
1470+ return originalGet ( cid )
1471+ }
1472+
1473+ const blockReadSpy = Sinon . spy ( block , 'get' )
1474+ await drain ( directory . content ( {
1475+ blockReadConcurrency : 1
1476+ } ) )
1477+
1478+ // blocks should be loaded in default order - a whole level of sibling nodes at a time
1479+ expect ( blockReadSpy . getCalls ( ) . map ( call => call . args [ 0 ] . toString ( ) ) ) . to . deep . equal (
1480+ node . Links . map ( link => link . Hash . toString ( ) )
1481+ )
1482+
1483+ expect ( actualInvocations ) . to . deep . equal ( expectedInvocations )
1484+ } )
1485+
1486+ it ( 'should allow control of block read concurrency for HAMT sharded directories' , async ( ) => {
1487+ const entries = 1024
1488+
1489+ // create a sharded directory
1490+ const imported = await last ( importer ( ( async function * ( ) {
1491+ for ( let i = 0 ; i < entries ; i ++ ) {
1492+ yield {
1493+ path : `file-${ i } .txt` ,
1494+ content : Uint8Array . from ( [ i ] )
1495+ }
1496+ }
1497+ } ) ( ) , block , {
1498+ wrapWithDirectory : true ,
1499+ shardSplitThresholdBytes : 10
1500+ } ) )
1501+
1502+ if ( imported == null ) {
1503+ throw new Error ( 'Nothing imported' )
1504+ }
1505+
1506+ const node = dagPb . decode ( await block . get ( imported . cid ) )
1507+ const data = UnixFS . unmarshal ( node . Data ?? new Uint8Array ( 0 ) )
1508+ expect ( data . type ) . to . equal ( 'hamt-sharded-directory' )
1509+
1510+ // traverse the shard, collect all the CIDs
1511+ async function collectCIDs ( node : PBNode ) : Promise < CID [ ] > {
1512+ const children : CID [ ] = [ ]
1513+
1514+ for ( const link of node . Links ) {
1515+ children . push ( link . Hash )
1516+
1517+ if ( link . Hash . code === dagPb . code ) {
1518+ const buf = await block . get ( link . Hash )
1519+ const childNode = dagPb . decode ( buf )
1520+
1521+ children . push ( ...( await collectCIDs ( childNode ) ) )
1522+ }
1523+ }
1524+
1525+ return children
1526+ }
1527+
1528+ const children : CID [ ] = await collectCIDs ( node )
1529+
1530+ // export directory
1531+ const directory = await exporter ( imported . cid , block )
1532+
1533+ // export file data with default settings
1534+ const originalGet = block . get . bind ( block )
1535+
1536+ const expectedInvocations : string [ ] = [ ]
1537+
1538+ for ( const cid of children ) {
1539+ expectedInvocations . push ( `${ cid . toString ( ) } -start` )
1540+ expectedInvocations . push ( `${ cid . toString ( ) } -end` )
1541+ }
1542+
1543+ const actualInvocations : string [ ] = [ ]
1544+
1545+ block . get = async ( cid ) => {
1546+ actualInvocations . push ( `${ cid . toString ( ) } -start` )
1547+
1548+ // introduce a small delay - if running in parallel actualInvocations will
1549+ // be:
1550+ // `foo-start`, `bar-start`, `baz-start`, `foo-end`, `bar-end`, `baz-end`
1551+ // if in series it will be:
1552+ // `foo-start`, `foo-end`, `bar-start`, `bar-end`, `baz-start`, `baz-end`
1553+ await delay ( 1 )
1554+
1555+ actualInvocations . push ( `${ cid . toString ( ) } -end` )
1556+
1557+ return originalGet ( cid )
1558+ }
1559+
1560+ const blockReadSpy = Sinon . spy ( block , 'get' )
1561+ await drain ( directory . content ( {
1562+ blockReadConcurrency : 1
1563+ } ) )
1564+
1565+ // blocks should be loaded in default order - a whole level of sibling nodes at a time
1566+ expect ( blockReadSpy . getCalls ( ) . map ( call => call . args [ 0 ] . toString ( ) ) ) . to . deep . equal (
1567+ children . map ( link => link . toString ( ) )
1568+ )
1569+
1570+ expect ( actualInvocations ) . to . deep . equal ( expectedInvocations )
1571+ } )
13461572} )
0 commit comments