Skip to content

Commit 443e285

Browse files
SgtPookiCopilotrvagg
authored
feat: data-set CLI uses filecoin-pin/core/data-set (#211)
* feat: add core/data-set methods * Update src/core/data-set/types.ts Co-authored-by: Copilot <175728472+Copilot@users.noreply.github.com> * Update src/core/data-set/get-data-set-pieces.ts Co-authored-by: Copilot <175728472+Copilot@users.noreply.github.com> * Update src/core/data-set/get-data-set-pieces.ts Co-authored-by: Copilot <175728472+Copilot@users.noreply.github.com> * feat: data-set CLI uses filecoin-pin/core/data-set * fix: remove batchSize from core/data-set * fix: add createdWithFilecoinPin to DataSetSummary also adds some jsdoc comments explaining that metadata does exist * fix: call sp-registry for specific data-set providers * test: cleanup core/data-set unit tests * fix: cleanup pieceSize calculations and types * chore: remove leaf count info from data-set * fix: listDataSets only gets providerInfo if asked * fix: major data-set overhaul * refactor: use only one dataSet display function * fix: re-arrange provider info * Update display.ts Co-authored-by: Rod Vagg <rod@vagg.org> * Update display.ts Co-authored-by: Rod Vagg <rod@vagg.org> * Update display.ts Co-authored-by: Rod Vagg <rod@vagg.org> * Update src/data-set/display.ts Co-authored-by: Rod Vagg <rod@vagg.org> * fix: get data-set details with show <id> * chore: fix lint * chore: dont output unnecessary details see https://github.com/filecoin-project/filecoin-pin/pull/211\#discussion_r2502272156 * chore: only display commission if not zero * fix: metadata values wrapped in double-quotes * fix: ls --all shows data-sets not created by filecoin-pin --------- Co-authored-by: Copilot <175728472+Copilot@users.noreply.github.com> Co-authored-by: Rod Vagg <rod@vagg.org>
1 parent 6d81508 commit 443e285

File tree

13 files changed

+577
-629
lines changed

13 files changed

+577
-629
lines changed

src/commands/data-set.ts

Lines changed: 34 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -1,24 +1,47 @@
11
import { Command } from 'commander'
2-
import { runDataSetCommand } from '../data-set/run.js'
3-
import type { DataSetCommandOptions } from '../data-set/types.js'
4-
import { addAuthOptions } from '../utils/cli-options.js'
2+
import { runDataSetDetailsCommand, runDataSetListCommand } from '../data-set/run.js'
3+
import type { DataSetCommandOptions, DataSetListCommandOptions } from '../data-set/types.js'
4+
import { addAuthOptions, addProviderOptions } from '../utils/cli-options.js'
55

6-
export const dataSetCommand = new Command('data-set')
7-
.description('Inspect data sets managed through Filecoin Onchain Cloud')
8-
.argument('[dataSetId]', 'Optional data set ID to inspect')
9-
.option('--ls', 'List all data sets for the configured account')
10-
.action(async (dataSetId: string | undefined, options) => {
6+
export const dataSetCommand = new Command('data-set').description(
7+
'Inspect data sets managed through Filecoin Onchain Cloud'
8+
)
9+
10+
export const dataSetShowCommand = new Command('show')
11+
.description('Display detailed information about a data set')
12+
.argument('<dataSetId>', 'Display detailed information about a data set')
13+
.action(async (dataSetId: string, options) => {
1114
try {
1215
const commandOptions: DataSetCommandOptions = {
1316
...options,
14-
ls: options.ls,
17+
}
18+
const dataSetIdNumber = Number.parseInt(dataSetId, 10)
19+
if (Number.isNaN(dataSetIdNumber)) {
20+
throw new Error('Invalid data set ID')
1521
}
1622

17-
await runDataSetCommand(dataSetId, commandOptions)
23+
await runDataSetDetailsCommand(dataSetIdNumber, commandOptions)
1824
} catch (error) {
1925
console.error('Data set command failed:', error instanceof Error ? error.message : error)
2026
process.exit(1)
2127
}
2228
})
29+
addAuthOptions(dataSetShowCommand)
30+
31+
export const dataSetListCommand = new Command('list')
32+
.alias('ls')
33+
.description('List all data sets for the configured account')
34+
.option('--all', 'Show all data sets, not just the ones created with filecoin-pin', false)
35+
.action(async (options: DataSetListCommandOptions) => {
36+
try {
37+
await runDataSetListCommand(options)
38+
} catch (error) {
39+
console.error('Data set list command failed:', error instanceof Error ? error.message : error)
40+
process.exit(1)
41+
}
42+
})
43+
addAuthOptions(dataSetListCommand)
44+
addProviderOptions(dataSetListCommand)
2345

24-
addAuthOptions(dataSetCommand)
46+
dataSetCommand.addCommand(dataSetShowCommand)
47+
dataSetCommand.addCommand(dataSetListCommand)

src/core/data-set/get-data-set-pieces.ts

Lines changed: 23 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -7,6 +7,7 @@
77
*/
88

99
import { METADATA_KEYS, type StorageContext, type Synapse, WarmStorageService } from '@filoz/synapse-sdk'
10+
import { getSizeFromPieceCID } from '@filoz/synapse-sdk/piece'
1011
import { isStorageContextWithDataSetId } from './type-guards.js'
1112
import type {
1213
DataSetPiecesResult,
@@ -62,9 +63,18 @@ export async function getDataSetPieces(
6263
try {
6364
const getPiecesOptions = { ...(signal && { signal }) }
6465
for await (const piece of storageContext.getPieces(getPiecesOptions)) {
65-
const pieceInfo: PieceInfo = {
66-
pieceId: piece.pieceId,
67-
pieceCid: piece.pieceCid.toString(),
66+
const pieceId = piece.pieceId
67+
const pieceCid = piece.pieceCid
68+
const pieceInfo: PieceInfo = { pieceId, pieceCid: pieceCid.toString() }
69+
70+
// Calculate piece size from CID
71+
try {
72+
pieceInfo.size = getSizeFromPieceCID(pieceCid)
73+
} catch (error) {
74+
logger?.warn(
75+
{ pieceId: piece.pieceId, pieceCid: piece.pieceCid.toString(), error },
76+
'Failed to calculate piece size from CID'
77+
)
6878
}
6979

7080
pieces.push(pieceInfo)
@@ -80,11 +90,20 @@ export async function getDataSetPieces(
8090
await enrichPiecesWithMetadata(synapse, storageContext, pieces, warnings, logger)
8191
}
8292

83-
return {
93+
// Calculate total size from pieces that have sizes
94+
const piecesWithSizes = pieces.filter((p): p is PieceInfo & { size: number } => p.size != null)
95+
96+
const result: DataSetPiecesResult = {
8497
pieces,
8598
dataSetId: storageContext.dataSetId,
8699
warnings,
87100
}
101+
102+
if (piecesWithSizes.length > 0) {
103+
result.totalSizeBytes = piecesWithSizes.reduce((sum, piece) => sum + BigInt(piece.size), 0n)
104+
}
105+
106+
return result
88107
}
89108

90109
/**
Lines changed: 43 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,43 @@
1+
import type { Synapse } from '@filoz/synapse-sdk'
2+
import { getDataSetPieces } from './get-data-set-pieces.js'
3+
import { listDataSets } from './list-data-sets.js'
4+
import type { DataSetSummary, ListDataSetsOptions } from './types.js'
5+
6+
export async function getDetailedDataSet(
7+
synapse: Synapse,
8+
dataSetId: number,
9+
options?: ListDataSetsOptions
10+
): Promise<DataSetSummary> {
11+
const logger = options?.logger
12+
const dataSets = await listDataSets(synapse, {
13+
...options,
14+
withProviderDetails: true,
15+
filter: (dataSet) => dataSet.pdpVerifierDataSetId === dataSetId,
16+
})
17+
18+
const dataSet = dataSets[0]
19+
if (dataSets.length === 0 || dataSet == null) {
20+
logger?.error({ dataSetId }, `Data set ${dataSetId} not found`)
21+
throw new Error(`Data set ${dataSetId} not found`)
22+
}
23+
24+
const storageContext = await synapse.storage.createContext({
25+
dataSetId: dataSet.dataSetId,
26+
})
27+
28+
const piecesResult = await getDataSetPieces(synapse, storageContext, {
29+
includeMetadata: true,
30+
logger,
31+
})
32+
33+
const result: DataSetSummary = {
34+
...dataSet,
35+
pieces: piecesResult.pieces,
36+
}
37+
38+
if (piecesResult.totalSizeBytes != null) {
39+
result.totalSizeBytes = piecesResult.totalSizeBytes
40+
}
41+
42+
return result
43+
}

src/core/data-set/index.ts

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -15,5 +15,6 @@
1515
*/
1616

1717
export * from './get-data-set-pieces.js'
18+
export * from './get-detailed-data-set.js'
1819
export * from './list-data-sets.js'
1920
export * from './types.js'

src/core/data-set/list-data-sets.ts

Lines changed: 11 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -7,6 +7,7 @@
77
*/
88

99
import type { ProviderInfo, Synapse } from '@filoz/synapse-sdk'
10+
import { WarmStorageService } from '@filoz/synapse-sdk'
1011
import { SPRegistryService } from '@filoz/synapse-sdk/sp-registry'
1112
import { DEFAULT_DATA_SET_METADATA } from '../synapse/constants.js'
1213
import type { DataSetSummary, ListDataSetsOptions } from './types.js'
@@ -34,27 +35,33 @@ import type { DataSetSummary, ListDataSetsOptions } from './types.js'
3435
export async function listDataSets(synapse: Synapse, options?: ListDataSetsOptions): Promise<DataSetSummary[]> {
3536
const logger = options?.logger
3637
const address = options?.address ?? (await synapse.getClient().getAddress())
38+
const withProviderDetails = options?.withProviderDetails ?? false
39+
const filter = options?.filter
3740

3841
// Step 1: Find data sets
3942
const dataSets = await synapse.storage.findDataSets(address)
4043

44+
const filteredDataSets = filter ? dataSets.filter(filter) : dataSets
45+
4146
// Step 2: Collect unique provider IDs from data sets
42-
const uniqueProviderIds = Array.from(new Set(dataSets.map((ds) => ds.providerId)))
47+
const uniqueProviderIds = withProviderDetails ? Array.from(new Set(filteredDataSets.map((ds) => ds.providerId))) : []
4348

4449
// Step 3: Fetch provider info for the specific provider IDs using sp-registry
4550
let providerMap: Map<number, ProviderInfo> = new Map()
4651
if (uniqueProviderIds.length > 0) {
4752
try {
48-
const spRegistry = new SPRegistryService(synapse.getProvider(), synapse.getNetwork())
53+
const warmStorageService = await WarmStorageService.create(synapse.getProvider(), synapse.getWarmStorageAddress())
54+
const serviceProviderRegistryAddress = await warmStorageService.getServiceProviderRegistryAddress()
55+
const spRegistry = new SPRegistryService(synapse.getProvider(), serviceProviderRegistryAddress)
4956
const providers = await spRegistry.getProviders(uniqueProviderIds)
50-
providerMap = new Map(providers.map((provider) => [provider.id, provider] as const))
57+
providerMap = new Map(providers.map((provider) => [provider.id, provider]))
5158
} catch (error) {
5259
logger?.warn({ error }, 'Failed to fetch provider info from sp-registry for provider enrichment')
5360
}
5461
}
5562

5663
// Map SDK datasets to our summary format (spread all fields, add dataSetId alias, provider, and filecoin-pin creation flag)
57-
return dataSets.map((ds) => {
64+
return filteredDataSets.map((ds) => {
5865
// Check if this dataset was created by filecoin-pin by looking for our DEFAULT_DATA_SET_METADATA fields
5966
const createdWithFilecoinPin = Object.entries(DEFAULT_DATA_SET_METADATA).every(
6067
([key, value]) => ds.metadata[key] === value

src/core/data-set/types.ts

Lines changed: 26 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -35,6 +35,8 @@ export interface DataSetPiecesResult {
3535
pieces: PieceInfo[]
3636
/** Dataset ID these pieces belong to */
3737
dataSetId: number
38+
/** Total size of all pieces in bytes (sum of individual piece sizes) */
39+
totalSizeBytes?: bigint
3840
/** Non-fatal warnings encountered during retrieval */
3941
warnings?: Warning[]
4042
}
@@ -62,6 +64,7 @@ export interface Warning {
6264
* - Provider enrichment (optional provider field)
6365
* - Dataset metadata (inherited from EnhancedDataSetInfo.metadata - key-value pairs from WarmStorage)
6466
* - Filecoin-pin creation flag (indicates if created by filecoin-pin)
67+
* - Optional detailed information (pieces, metadata, size calculations, warnings)
6568
*
6669
* The dataSetId alias makes pdpVerifierDataSetId more discoverable.
6770
*/
@@ -70,6 +73,10 @@ export interface DataSetSummary extends EnhancedDataSetInfo {
7073
dataSetId: number
7174
/** Provider information (enriched from getStorageInfo if available) */
7275
provider: ProviderInfo | undefined
76+
/** Total size in bytes (optional, calculated from piece sizes) */
77+
totalSizeBytes?: bigint
78+
/** Pieces in the dataset (optional, populated when fetching detailed info) */
79+
pieces?: PieceInfo[]
7380
/** Indicates if this dataset was created by filecoin-pin (has WITH_IPFS_INDEXING and source='filecoin-pin' metadata) */
7481
createdWithFilecoinPin: boolean
7582
}
@@ -81,7 +88,24 @@ export interface ListDataSetsOptions {
8188
/** Address to list datasets for (defaults to synapse client address) */
8289
address?: string
8390
/** Logger instance for debugging (optional) */
84-
logger?: Logger
91+
logger?: Logger | undefined
92+
/**
93+
* Whether to get the provider details from the SP registry
94+
*
95+
* @default false
96+
*/
97+
withProviderDetails?: boolean
98+
99+
/**
100+
* Filter function to apply to the data sets before additional processing
101+
*
102+
* Note: The filter receives raw EnhancedDataSetInfo objects from the SDK
103+
* (with pdpVerifierDataSetId field) before transformation to DataSetSummary
104+
*
105+
* @param dataSet - Raw dataset from SDK storage.findDataSets()
106+
* @returns true to include the dataset, false to exclude it
107+
*/
108+
filter?: undefined | ((dataSet: EnhancedDataSetInfo) => boolean)
85109
}
86110

87111
/**
@@ -93,7 +117,7 @@ export interface GetDataSetPiecesOptions {
93117
/** Abort signal for cancellation */
94118
signal?: AbortSignal
95119
/** Logger instance for debugging (optional) */
96-
logger?: Logger
120+
logger?: Logger | undefined
97121
}
98122

99123
export type StorageContextWithDataSetId = StorageContext & { dataSetId: number }

0 commit comments

Comments
 (0)