Skip to content

Commit 8e80cbf

Browse files
Feat/v2 commp cid arbitrary size (#6)
* chore: go mod tidy * feat: switch tests to crypto/rand to make the linter happy * ci: add github actions * feat: add v2 piece cid and v1/v2 piece cid converters * feat: change v2 piece multihashes to enable arbitrarily sized data * update draft to put the padding before the height * change test fixtures to match those in the frc * rename functions for ease * fix lint issues * fix go version in CI * undo broken test * use newer CI * unexport test functions * more func renames * replace unpadded with payload * use multicodec --------- Co-authored-by: LexLuthr <lexluthr@curiostorage.org>
1 parent 3d22d68 commit 8e80cbf

File tree

6 files changed

+525
-53
lines changed

6 files changed

+525
-53
lines changed

.github/workflows/go-check.yml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -15,4 +15,4 @@ concurrency:
1515

1616
jobs:
1717
go-check:
18-
uses: ipdxco/unified-github-workflows/.github/workflows/go-check.yml@v1.0
18+
uses: ipdxco/unified-github-workflows/.github/workflows/go-check.yml@v1.0

.github/workflows/go-test.yml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -17,4 +17,4 @@ jobs:
1717
go-test:
1818
uses: ipdxco/unified-github-workflows/.github/workflows/go-test.yml@v1.0
1919
secrets:
20-
CODECOV_TOKEN: ${{ secrets.CODECOV_TOKEN }}
20+
CODECOV_TOKEN: ${{ secrets.CODECOV_TOKEN }}

commcid.go

Lines changed: 202 additions & 17 deletions
Original file line numberDiff line numberDiff line change
@@ -5,8 +5,10 @@ package commcid
55
import (
66
"errors"
77
"fmt"
8+
"math/bits"
89

910
"github.com/ipfs/go-cid"
11+
"github.com/multiformats/go-multicodec"
1012
"github.com/multiformats/go-multihash"
1113
"github.com/multiformats/go-varint"
1214
"golang.org/x/xerrors"
@@ -37,36 +39,39 @@ var (
3739
// by adding:
3840
// - the given filecoin codec type
3941
// - the given filecoin hash type
40-
func CommitmentToCID(mc FilMultiCodec, mh FilMultiHash, commX []byte) (cid.Cid, error) {
42+
// Deprecated: Use the alternatives like ReplicaCommitmentV1ToCID, DataCommitmentV1ToCID or DataCommitmentV1ToCID
43+
func CommitmentToCID(mc multicodec.Code, mh multicodec.Code, commX []byte) (cid.Cid, error) {
4144
if err := validateFilecoinCidSegments(mc, mh, commX); err != nil {
4245
return cid.Undef, err
4346
}
4447

4548
mhBuf := make(
4649
[]byte,
47-
(varint.UvarintSize(uint64(mh)) + varint.UvarintSize(uint64(len(commX))) + len(commX)),
50+
varint.UvarintSize(uint64(mh))+varint.UvarintSize(uint64(len(commX)))+len(commX),
4851
)
4952

5053
pos := varint.PutUvarint(mhBuf, uint64(mh))
5154
pos += varint.PutUvarint(mhBuf[pos:], uint64(len(commX)))
5255
copy(mhBuf[pos:], commX)
5356

54-
return cid.NewCidV1(uint64(mc), multihash.Multihash(mhBuf)), nil
57+
return cid.NewCidV1(uint64(mc), mhBuf), nil
5558
}
5659

5760
// CIDToCommitment extracts the raw commitment bytes, the FilMultiCodec and
5861
// FilMultiHash from a CID, after validating that the codec and hash type are
5962
// consistent
60-
func CIDToCommitment(c cid.Cid) (FilMultiCodec, FilMultiHash, []byte, error) {
61-
decoded, err := multihash.Decode([]byte(c.Hash()))
63+
//
64+
// Deprecated: Use the alternatives like CIDToReplicaCommitmentV1, CIDToDataCommitmentV1 or PieceCidV2ToDataCommitment
65+
func CIDToCommitment(c cid.Cid) (multicodec.Code, multicodec.Code, []byte, error) {
66+
decoded, err := multihash.Decode(c.Hash())
6267
if err != nil {
63-
return FILCODEC_UNDEFINED, FILMULTIHASH_UNDEFINED, nil, xerrors.Errorf("Error decoding data commitment hash: %w", err)
68+
return 0, 0, nil, xerrors.Errorf("Error decoding data commitment hash: %w", err)
6469
}
6570

66-
filCodec := FilMultiCodec(c.Type())
67-
filMh := FilMultiHash(decoded.Code)
71+
filCodec := multicodec.Code(c.Type())
72+
filMh := multicodec.Code(decoded.Code)
6873
if err := validateFilecoinCidSegments(filCodec, filMh, decoded.Digest); err != nil {
69-
return FILCODEC_UNDEFINED, FILMULTIHASH_UNDEFINED, nil, err
74+
return 0, 0, nil, err
7075
}
7176

7277
return filCodec, filMh, decoded.Digest, nil
@@ -76,23 +81,172 @@ func CIDToCommitment(c cid.Cid) (FilMultiCodec, FilMultiHash, []byte, error) {
7681
// by adding:
7782
// - codec: cid.FilCommitmentUnsealed
7883
// - hash type: multihash.SHA2_256_TRUNC254_PADDED
84+
//
85+
// Deprecated: This function should be avoided when possible and DataCommitmentToPieceCidv2 preferred
7986
func DataCommitmentV1ToCID(commD []byte) (cid.Cid, error) {
80-
return CommitmentToCID(cid.FilCommitmentUnsealed, multihash.SHA2_256_TRUNC254_PADDED, commD)
87+
return CommitmentToCID(multicodec.FilCommitmentUnsealed, multicodec.Sha2_256Trunc254Padded, commD)
88+
}
89+
90+
// fr32PaddedSizeToV1TreeHeight calculates the height of the piece tree given data that's been FR32 padded. Because
91+
// pieces are only defined on binary trees if the size is not a power of 2 it will be rounded up to the next one under
92+
// the assumption that the rest of the tree will be padded out (e.g. with zeros)
93+
func fr32PaddedSizeToV1TreeHeight(size uint64) uint8 {
94+
if size <= 32 {
95+
return 0
96+
}
97+
98+
// Calculate the floor of log2(size)
99+
b := 63 - bits.LeadingZeros64(size)
100+
// Leaf size is 32 == 2^5
101+
b -= 5
102+
103+
// Check if the size is a power of 2 and if not then add one since the tree will need to be padded out
104+
if 32<<b < size {
105+
b += 1
106+
}
107+
return uint8(b)
108+
}
109+
110+
// payloadsizeToV1TreeHeight calculates the height of the piece tree given the data that's meant to be encoded in the
111+
// tree before any FR32 padding is applied. Because pieces are only defined on binary trees of FR32 encoded data if the
112+
// size is not a power of 2 after the FR32 padding is applied it will be rounded up to the next one under the assumption
113+
// that the rest of the tree will be padded out (e.g. with zeros)
114+
func payloadsizeToV1TreeHeight(size uint64) (uint8, error) {
115+
if size*128 < size {
116+
return 0, fmt.Errorf("unsupported size: too big")
117+
}
118+
119+
paddedSize := size * 128 / 127
120+
if paddedSize*127 != size*128 {
121+
paddedSize += 1
122+
}
123+
124+
return fr32PaddedSizeToV1TreeHeight(paddedSize), nil
125+
}
126+
127+
// PayloadSizeToV1TreeHeightAndPadding calculates the height of the piece tree given the data that's meant to be
128+
// encoded in the tree before any FR32 padding is applied. Because pieces are only defined on binary trees of FR32
129+
// encoded data if the size is not a power of 2 after the FR32 padding is applied it will be rounded up to the next one
130+
// under the assumption that the rest of the tree will be padded out (e.g. with zeros). The amount of data padding that
131+
// is needed to be applied is returned alongside the tree height.
132+
func PayloadSizeToV1TreeHeightAndPadding(dataSize uint64) (uint8, uint64, error) {
133+
if dataSize*128 < dataSize {
134+
return 0, 0, fmt.Errorf("unsupported size: too big")
135+
}
136+
137+
fr32DataSize := dataSize * 128 / 127
138+
// If the FR32 padding doesn't fill an exact number of bytes add up to 1 more byte of zeros to round it out
139+
if fr32DataSize*127 != dataSize*128 {
140+
fr32DataSize += 1
141+
}
142+
143+
treeHeight := fr32PaddedSizeToV1TreeHeight(fr32DataSize)
144+
paddedFr32DataSize := uint64(32) << treeHeight
145+
paddedDataSize := paddedFr32DataSize / 128 * 127
146+
padding := paddedDataSize - dataSize
147+
148+
return treeHeight, padding, nil
149+
}
150+
151+
// DataCommitmentToPieceCidv2 converts a raw data commitment and the height of the commitment tree
152+
// (i.e. log_2(padded data size in bytes) - 5, because 2^5 is 32 bytes which is the leaf node size) to a CID
153+
// by adding:
154+
// - codec: cid.Raw
155+
// - hash type: multihash.SHA2_256_TRUNC254_PADDED_BINARY_TREE
156+
//
157+
// The helpers payloadsizeToV1TreeHeight and Fr32PaddedSizeToV1TreeHeight may help in computing tree height
158+
func DataCommitmentToPieceCidv2(commD []byte, PayloadSize uint64) (cid.Cid, error) {
159+
if len(commD) != 32 {
160+
return cid.Undef, fmt.Errorf("commitments must be 32 bytes long")
161+
}
162+
163+
if PayloadSize < 127 {
164+
return cid.Undef, fmt.Errorf("payloadsize data size must be at least 127, but was %d", PayloadSize)
165+
}
166+
167+
height, padding, err := PayloadSizeToV1TreeHeightAndPadding(PayloadSize)
168+
if err != nil {
169+
return cid.Undef, err
170+
}
171+
172+
if padding > varint.MaxValueUvarint63 {
173+
return cid.Undef, fmt.Errorf("padded data size must be less than 2^63-1, but was %d", padding)
174+
}
175+
176+
mh := multicodec.Fr32Sha256Trunc254Padbintree
177+
paddingSize := varint.UvarintSize(padding)
178+
digestSize := len(commD) + 1 + paddingSize
179+
180+
mhBuf := make(
181+
[]byte,
182+
varint.UvarintSize(uint64(mh))+varint.UvarintSize(uint64(digestSize))+digestSize,
183+
)
184+
185+
pos := varint.PutUvarint(mhBuf, uint64(mh))
186+
pos += varint.PutUvarint(mhBuf[pos:], uint64(digestSize))
187+
pos += varint.PutUvarint(mhBuf[pos:], padding)
188+
mhBuf[pos] = height
189+
pos++
190+
copy(mhBuf[pos:], commD)
191+
192+
return cid.NewCidV1(uint64(cid.Raw), mhBuf), nil
81193
}
82194

83195
// CIDToDataCommitmentV1 extracts the raw data commitment from a CID
84196
// after checking for the correct codec and hash types.
197+
//
198+
// Deprecated: This function should be avoided when possible and PieceCidV2ToDataCommitment preferred
85199
func CIDToDataCommitmentV1(c cid.Cid) ([]byte, error) {
86200
codec, _, commD, err := CIDToCommitment(c)
87201
if err != nil {
88202
return nil, err
89203
}
90-
if codec != cid.FilCommitmentUnsealed {
204+
if codec != multicodec.FilCommitmentUnsealed {
91205
return nil, ErrIncorrectCodec
92206
}
93207
return commD, nil
94208
}
95209

210+
// PieceCidV2ToDataCommitment extracts the raw data commitment and payloadsize data size from the CID
211+
func PieceCidV2ToDataCommitment(c cid.Cid) ([]byte, uint64, error) {
212+
decoded, err := multihash.Decode(c.Hash())
213+
if err != nil {
214+
return nil, 0, xerrors.Errorf("Error decoding data commitment hash: %w", err)
215+
}
216+
217+
if decoded.Code != uint64(multicodec.Fr32Sha256Trunc254Padbintree) {
218+
return nil, 0, ErrIncorrectHash
219+
}
220+
221+
if decoded.Length < 34 {
222+
return nil, 0, xerrors.Errorf("expected multihash digest to be at least 34 bytes, but was %d bytes", decoded.Length)
223+
}
224+
225+
paddingSize, paddingSizeVarintLen, err := varint.FromUvarint(decoded.Digest)
226+
if err != nil {
227+
return nil, 0, xerrors.Errorf("error decoding padding size: %w", err)
228+
}
229+
230+
if expectedDigestSize := 33 + paddingSizeVarintLen; decoded.Length != expectedDigestSize {
231+
return nil, 0, xerrors.Errorf("expected multihash digest to be %d bytes, but was %d bytes", expectedDigestSize, decoded.Length)
232+
}
233+
234+
treeHeight := decoded.Digest[paddingSizeVarintLen]
235+
236+
paddedFr32TreeSize := uint64(32) << treeHeight
237+
paddedTreeSize := paddedFr32TreeSize * 127 / 128
238+
halfPaddedTreeSize := paddedTreeSize >> 1
239+
240+
if paddingSize >= halfPaddedTreeSize {
241+
return nil, 0, xerrors.Errorf("size of padding (%d) must be less than half the size of the padded data (%d)", paddingSize, halfPaddedTreeSize)
242+
}
243+
244+
payloadsize := paddedTreeSize - paddingSize
245+
246+
commitmentHash := decoded.Digest[1+paddingSizeVarintLen:]
247+
return commitmentHash, payloadsize, nil
248+
}
249+
96250
// ReplicaCommitmentV1ToCID converts a raw data commitment to a CID
97251
// by adding:
98252
// - codec: cid.FilCommitmentSealed
@@ -108,23 +262,23 @@ func CIDToReplicaCommitmentV1(c cid.Cid) ([]byte, error) {
108262
if err != nil {
109263
return nil, err
110264
}
111-
if codec != cid.FilCommitmentSealed {
265+
if codec != multicodec.FilCommitmentSealed {
112266
return nil, ErrIncorrectCodec
113267
}
114268
return commR, nil
115269
}
116270

117271
// ValidateFilecoinCidSegments returns an error if the provided CID parts
118272
// conflict with each other.
119-
func validateFilecoinCidSegments(mc FilMultiCodec, mh FilMultiHash, commX []byte) error {
273+
func validateFilecoinCidSegments(mc multicodec.Code, mh multicodec.Code, commX []byte) error {
120274

121275
switch mc {
122-
case cid.FilCommitmentUnsealed:
123-
if mh != multihash.SHA2_256_TRUNC254_PADDED {
276+
case multicodec.FilCommitmentUnsealed:
277+
if mh != multicodec.Sha2_256Trunc254Padded {
124278
return ErrIncorrectHash
125279
}
126-
case cid.FilCommitmentSealed:
127-
if mh != multihash.POSEIDON_BLS12_381_A1_FC1 {
280+
case multicodec.FilCommitmentSealed:
281+
if mh != multicodec.PoseidonBls12_381A2Fc1 {
128282
return ErrIncorrectHash
129283
}
130284
default: // neither of the codecs above: we are not in Fil teritory
@@ -138,12 +292,43 @@ func validateFilecoinCidSegments(mc FilMultiCodec, mh FilMultiHash, commX []byte
138292
return nil
139293
}
140294

295+
// PieceCidV2FromV1 takes a v1 piece CID and the CommP tree height and produces a
296+
// piece multihash CID
297+
//
298+
// The helpers payloadsizeToV1TreeHeight and Fr32PaddedSizeToV1TreeHeight may help in computing tree height
299+
func PieceCidV2FromV1(v1PieceCid cid.Cid, payloadsize uint64) (cid.Cid, error) {
300+
hashDigest, err := CIDToDataCommitmentV1(v1PieceCid)
301+
if err != nil {
302+
return cid.Undef, xerrors.Errorf("Error decoding piece CID v1: %w", err)
303+
}
304+
305+
return DataCommitmentToPieceCidv2(hashDigest, payloadsize)
306+
}
307+
308+
// PieceCidV1FromV2 takes a piece multihash CID and produces a v1 piece CID along with the payloadsize
309+
func PieceCidV1FromV2(pcidV2 cid.Cid) (cid.Cid, uint64, error) {
310+
digest, payloadsize, err := PieceCidV2ToDataCommitment(pcidV2)
311+
if err != nil {
312+
return cid.Undef, 0, xerrors.Errorf("Error decoding data piece CID v2: %w", err)
313+
}
314+
315+
c, err := DataCommitmentV1ToCID(digest)
316+
if err != nil {
317+
return cid.Undef, 0, xerrors.Errorf("Could not create piece CID v1: %w", err)
318+
}
319+
return c, payloadsize, nil
320+
}
321+
141322
// PieceCommitmentV1ToCID converts a commP to a CID
142323
// -- it is just a helper function that is equivalent to
143324
// DataCommitmentV1ToCID.
325+
//
326+
// Deprecated: This function should be avoided when possible and DataCommitmentToPieceCidv2 preferred
144327
var PieceCommitmentV1ToCID = DataCommitmentV1ToCID
145328

146329
// CIDToPieceCommitmentV1 converts a CID to a commP
147330
// -- it is just a helper function that is equivalent to
148331
// CIDToDataCommitmentV1.
332+
//
333+
// Deprecated: This function should be avoided when possible and PieceCidV2ToDataCommitment preferred
149334
var CIDToPieceCommitmentV1 = CIDToDataCommitmentV1

0 commit comments

Comments
 (0)