Skip to content

Commit e41cb45

Browse files
alexlarssonallisonkarlitskaya
authored andcommitted
splitstream: Rework file format
This changes the splitstream format a bit, with the goal of allowing splitstreams to support ostree files as well (see containers#144) The primary differences are: * The header is not compressed * All referenced fs-verity objects are stored in the header, including external chunks, mapped splitstreams and (a new feature) references that are not used in chunks. * The mapping table is separate from the reference table (and generally smaller), and indexes into it. * There is a magic value to detect the file format. * There is a magic content type to detect the type wrapped in the stream. * We store a tag for what ObjectID format is used * The total size of the stream is stored in the header. The ability to reference file objects in the repo even if they are not part of the splitstream "content" will be useful for the ostree support to reference file content objects. This change also allows more efficient GC enumeration, because we don't have to parse the entire splitstream to find the referenced objects. Signed-off-by: Alexander Larsson <alexl@redhat.com>
1 parent 0f63603 commit e41cb45

File tree

10 files changed

+336
-132
lines changed

10 files changed

+336
-132
lines changed

crates/cfsctl/src/main.rs

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -194,7 +194,7 @@ async fn main() -> Result<()> {
194194
}
195195
}
196196
Command::Cat { name } => {
197-
repo.merge_splitstream(&name, None, &mut std::io::stdout())?;
197+
repo.merge_splitstream(&name, None, None, &mut std::io::stdout())?;
198198
}
199199
Command::ImportImage { reference } => {
200200
let image_id = repo.import_image(&reference, &mut std::io::stdin())?;

crates/composefs-http/src/lib.rs

Lines changed: 3 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -19,9 +19,7 @@ use sha2::{Digest, Sha256};
1919
use tokio::task::JoinSet;
2020

2121
use composefs::{
22-
fsverity::FsVerityHashValue,
23-
repository::Repository,
24-
splitstream::{DigestMapEntry, SplitStreamReader},
22+
fsverity::FsVerityHashValue, repository::Repository, splitstream::SplitStreamReader,
2523
util::Sha256Digest,
2624
};
2725

@@ -67,7 +65,7 @@ impl<ObjectID: FsVerityHashValue> Downloader<ObjectID> {
6765
}
6866

6967
fn open_splitstream(&self, id: &ObjectID) -> Result<SplitStreamReader<File, ObjectID>> {
70-
SplitStreamReader::new(File::from(self.repo.open_object(id)?))
68+
SplitStreamReader::new(File::from(self.repo.open_object(id)?), None)
7169
}
7270

7371
fn read_object(&self, id: &ObjectID) -> Result<Vec<u8>> {
@@ -113,7 +111,7 @@ impl<ObjectID: FsVerityHashValue> Downloader<ObjectID> {
113111

114112
// this part is fast: it only touches the header
115113
let mut reader = self.open_splitstream(&id)?;
116-
for DigestMapEntry { verity, body } in &reader.refs.map {
114+
for (body, verity) in reader.iter_mappings() {
117115
match splitstreams.insert(verity.clone(), Some(*body)) {
118116
// This is the (normal) case if we encounter a splitstream we didn't see yet...
119117
None => {

crates/composefs-oci/src/image.rs

Lines changed: 8 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -19,6 +19,7 @@ use composefs::{
1919
tree::{Directory, FileSystem, Inode, Leaf},
2020
};
2121

22+
use crate::skopeo::{OCI_CONFIG_CONTENT_TYPE, TAR_LAYER_CONTENT_TYPE};
2223
use crate::tar::{TarEntry, TarItem};
2324

2425
/// Processes a single tar entry and adds it to the filesystem.
@@ -91,14 +92,19 @@ pub fn create_filesystem<ObjectID: FsVerityHashValue>(
9192
) -> Result<FileSystem<ObjectID>> {
9293
let mut filesystem = FileSystem::default();
9394

94-
let mut config_stream = repo.open_stream(config_name, config_verity)?;
95+
let mut config_stream =
96+
repo.open_stream(config_name, config_verity, Some(OCI_CONFIG_CONTENT_TYPE))?;
9597
let config = ImageConfiguration::from_reader(&mut config_stream)?;
9698

9799
for diff_id in config.rootfs().diff_ids() {
98100
let layer_sha256 = super::sha256_from_digest(diff_id)?;
99101
let layer_verity = config_stream.lookup(&layer_sha256)?;
100102

101-
let mut layer_stream = repo.open_stream(&hex::encode(layer_sha256), Some(layer_verity))?;
103+
let mut layer_stream = repo.open_stream(
104+
&hex::encode(layer_sha256),
105+
Some(layer_verity),
106+
Some(TAR_LAYER_CONTENT_TYPE),
107+
)?;
102108
while let Some(entry) = crate::tar::get_entry(&mut layer_stream)? {
103109
process_entry(&mut filesystem, entry)?;
104110
}

crates/composefs-oci/src/lib.rs

Lines changed: 14 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -28,6 +28,7 @@ use composefs::{
2828
util::{parse_sha256, Sha256Digest},
2929
};
3030

31+
use crate::skopeo::{OCI_CONFIG_CONTENT_TYPE, TAR_LAYER_CONTENT_TYPE};
3132
use crate::tar::get_entry;
3233

3334
type ContentAndVerity<ObjectID> = (Sha256Digest, ObjectID);
@@ -58,7 +59,12 @@ pub fn import_layer<ObjectID: FsVerityHashValue>(
5859
name: Option<&str>,
5960
tar_stream: &mut impl Read,
6061
) -> Result<ObjectID> {
61-
repo.ensure_stream(sha256, |writer| tar::split(tar_stream, writer), name)
62+
repo.ensure_stream(
63+
sha256,
64+
TAR_LAYER_CONTENT_TYPE,
65+
|writer| tar::split(tar_stream, writer),
66+
name,
67+
)
6268
}
6369

6470
/// Lists the contents of a container layer stored in the repository.
@@ -69,7 +75,7 @@ pub fn ls_layer<ObjectID: FsVerityHashValue>(
6975
repo: &Repository<ObjectID>,
7076
name: &str,
7177
) -> Result<()> {
72-
let mut split_stream = repo.open_stream(name, None)?;
78+
let mut split_stream = repo.open_stream(name, None, Some(TAR_LAYER_CONTENT_TYPE))?;
7379

7480
while let Some(entry) = get_entry(&mut split_stream)? {
7581
println!("{entry}");
@@ -115,9 +121,9 @@ pub fn open_config<ObjectID: FsVerityHashValue>(
115121
.with_context(|| format!("Object {name} is unknown to us"))?
116122
}
117123
};
118-
let mut stream = repo.open_stream(name, Some(id))?;
124+
let mut stream = repo.open_stream(name, Some(id), Some(OCI_CONFIG_CONTENT_TYPE))?;
119125
let config = ImageConfiguration::from_reader(&mut stream)?;
120-
Ok((config, stream.refs))
126+
Ok((config, stream.get_mappings()))
121127
}
122128

123129
fn hash(bytes: &[u8]) -> Sha256Digest {
@@ -147,7 +153,7 @@ pub fn open_config_shallow<ObjectID: FsVerityHashValue>(
147153
// we need to manually check the content digest
148154
let expected_hash = parse_sha256(name)
149155
.context("Containers must be referred to by sha256 if verity is missing")?;
150-
let mut stream = repo.open_stream(name, None)?;
156+
let mut stream = repo.open_stream(name, None, Some(OCI_CONFIG_CONTENT_TYPE))?;
151157
let mut raw_config = vec![];
152158
stream.read_to_end(&mut raw_config)?;
153159
ensure!(hash(&raw_config) == expected_hash, "Data integrity issue");
@@ -170,7 +176,8 @@ pub fn write_config<ObjectID: FsVerityHashValue>(
170176
let json = config.to_string()?;
171177
let json_bytes = json.as_bytes();
172178
let sha256 = hash(json_bytes);
173-
let mut stream = repo.create_stream(Some(sha256), Some(refs));
179+
let mut stream = repo.create_stream(OCI_CONFIG_CONTENT_TYPE, Some(sha256));
180+
stream.add_sha256_mappings(refs);
174181
stream.write_inline(json_bytes);
175182
let id = repo.write_stream(stream, None)?;
176183
Ok((sha256, id))
@@ -262,7 +269,7 @@ mod test {
262269
let id = import_layer(&repo, &layer_id, Some("name"), &mut layer.as_slice()).unwrap();
263270

264271
let mut dump = String::new();
265-
let mut split_stream = repo.open_stream("refs/name", Some(&id)).unwrap();
272+
let mut split_stream = repo.open_stream("refs/name", Some(&id), None).unwrap();
266273
while let Some(entry) = tar::get_entry(&mut split_stream).unwrap() {
267274
writeln!(dump, "{entry}").unwrap();
268275
}

crates/composefs-oci/src/skopeo.rs

Lines changed: 13 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -20,12 +20,14 @@ use oci_spec::image::{Descriptor, ImageConfiguration, ImageManifest, MediaType};
2020
use rustix::process::geteuid;
2121
use tokio::{io::AsyncReadExt, sync::Semaphore};
2222

23-
use composefs::{
24-
fsverity::FsVerityHashValue, repository::Repository, splitstream::DigestMap, util::Sha256Digest,
25-
};
23+
use composefs::{fsverity::FsVerityHashValue, repository::Repository, util::Sha256Digest};
2624

2725
use crate::{sha256_from_descriptor, sha256_from_digest, tar::split_async, ContentAndVerity};
2826

27+
// These are randomly generated UUID-like content types
28+
pub const TAR_LAYER_CONTENT_TYPE: u64 = 0x2a037edfcae1ffea;
29+
pub const OCI_CONFIG_CONTENT_TYPE: u64 = 0x44218c839727a80b;
30+
2931
struct ImageOp<ObjectID: FsVerityHashValue> {
3032
repo: Arc<Repository<ObjectID>>,
3133
proxy: ImageProxy,
@@ -105,7 +107,9 @@ impl<ObjectID: FsVerityHashValue> ImageOp<ObjectID> {
105107
self.progress
106108
.println(format!("Fetching layer {}", hex::encode(layer_sha256)))?;
107109

108-
let mut splitstream = self.repo.create_stream(Some(layer_sha256), None);
110+
let mut splitstream = self
111+
.repo
112+
.create_stream(TAR_LAYER_CONTENT_TYPE, Some(layer_sha256));
109113
match descriptor.media_type() {
110114
MediaType::ImageLayer => {
111115
split_async(progress, &mut splitstream).await?;
@@ -182,15 +186,15 @@ impl<ObjectID: FsVerityHashValue> ImageOp<ObjectID> {
182186
entries.push((layer_sha256, future));
183187
}
184188

189+
let mut splitstream = self
190+
.repo
191+
.create_stream(OCI_CONFIG_CONTENT_TYPE, Some(config_sha256));
192+
185193
// Collect the results.
186-
let mut config_maps = DigestMap::new();
187194
for (layer_sha256, future) in entries {
188-
config_maps.insert(&layer_sha256, &future.await??);
195+
splitstream.add_sha256_mapping(&layer_sha256, &future.await??);
189196
}
190197

191-
let mut splitstream = self
192-
.repo
193-
.create_stream(Some(config_sha256), Some(config_maps));
194198
splitstream.write_inline(&raw_config);
195199
let config_id = self.repo.write_stream(splitstream, None)?;
196200

crates/composefs-oci/src/tar.rs

Lines changed: 32 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -320,7 +320,10 @@ pub fn get_entry<R: Read, ObjectID: FsVerityHashValue>(
320320
}
321321

322322
#[cfg(test)]
323+
323324
mod tests {
325+
use crate::TAR_LAYER_CONTENT_TYPE;
326+
324327
use super::*;
325328
use composefs::{
326329
fsverity::Sha256HashValue, generic_tree::LeafContent, repository::Repository,
@@ -377,13 +380,15 @@ mod tests {
377380
fn read_all_via_splitstream(tar_data: Vec<u8>) -> Result<Vec<TarEntry<Sha256HashValue>>> {
378381
let mut tar_cursor = Cursor::new(tar_data);
379382
let repo = create_test_repository()?;
380-
let mut writer = repo.create_stream(None, None);
383+
let mut writer = repo.create_stream(TAR_LAYER_CONTENT_TYPE, None);
381384

382385
split(&mut tar_cursor, &mut writer)?;
383386
let object_id = writer.done()?;
384387

385-
let mut reader: SplitStreamReader<std::fs::File, Sha256HashValue> =
386-
SplitStreamReader::new(repo.open_object(&object_id)?.into())?;
388+
let mut reader: SplitStreamReader<std::fs::File, Sha256HashValue> = SplitStreamReader::new(
389+
repo.open_object(&object_id)?.into(),
390+
Some(TAR_LAYER_CONTENT_TYPE),
391+
)?;
387392

388393
let mut entries = Vec::new();
389394
while let Some(entry) = get_entry(&mut reader)? {
@@ -402,13 +407,16 @@ mod tests {
402407

403408
let mut tar_cursor = Cursor::new(tar_data);
404409
let repo = create_test_repository().unwrap();
405-
let mut writer = repo.create_stream(None, None);
410+
let mut writer = repo.create_stream(TAR_LAYER_CONTENT_TYPE, None);
406411

407412
split(&mut tar_cursor, &mut writer).unwrap();
408413
let object_id = writer.done().unwrap();
409414

410-
let mut reader: SplitStreamReader<std::fs::File, Sha256HashValue> =
411-
SplitStreamReader::new(repo.open_object(&object_id).unwrap().into()).unwrap();
415+
let mut reader: SplitStreamReader<std::fs::File, Sha256HashValue> = SplitStreamReader::new(
416+
repo.open_object(&object_id).unwrap().into(),
417+
Some(TAR_LAYER_CONTENT_TYPE),
418+
)
419+
.unwrap();
412420
assert!(get_entry(&mut reader).unwrap().is_none());
413421
}
414422

@@ -428,13 +436,16 @@ mod tests {
428436

429437
let mut tar_cursor = Cursor::new(tar_data);
430438
let repo = create_test_repository().unwrap();
431-
let mut writer = repo.create_stream(None, None);
439+
let mut writer = repo.create_stream(TAR_LAYER_CONTENT_TYPE, None);
432440

433441
split(&mut tar_cursor, &mut writer).unwrap();
434442
let object_id = writer.done().unwrap();
435443

436-
let mut reader: SplitStreamReader<std::fs::File, Sha256HashValue> =
437-
SplitStreamReader::new(repo.open_object(&object_id).unwrap().into()).unwrap();
444+
let mut reader: SplitStreamReader<std::fs::File, Sha256HashValue> = SplitStreamReader::new(
445+
repo.open_object(&object_id).unwrap().into(),
446+
Some(TAR_LAYER_CONTENT_TYPE),
447+
)
448+
.unwrap();
438449

439450
// Should have exactly one entry
440451
let entry = get_entry(&mut reader)
@@ -483,13 +494,16 @@ mod tests {
483494

484495
let mut tar_cursor = Cursor::new(tar_data);
485496
let repo = create_test_repository().unwrap();
486-
let mut writer = repo.create_stream(None, None);
497+
let mut writer = repo.create_stream(TAR_LAYER_CONTENT_TYPE, None);
487498

488499
split(&mut tar_cursor, &mut writer).unwrap();
489500
let object_id = writer.done().unwrap();
490501

491-
let mut reader: SplitStreamReader<std::fs::File, Sha256HashValue> =
492-
SplitStreamReader::new(repo.open_object(&object_id).unwrap().into()).unwrap();
502+
let mut reader: SplitStreamReader<std::fs::File, Sha256HashValue> = SplitStreamReader::new(
503+
repo.open_object(&object_id).unwrap().into(),
504+
Some(TAR_LAYER_CONTENT_TYPE),
505+
)
506+
.unwrap();
493507
let mut entries = Vec::new();
494508

495509
while let Some(entry) = get_entry(&mut reader).unwrap() {
@@ -547,13 +561,16 @@ mod tests {
547561
// Split the tar
548562
let mut tar_cursor = Cursor::new(original_tar.clone());
549563
let repo = create_test_repository().unwrap();
550-
let mut writer = repo.create_stream(None, None);
564+
let mut writer = repo.create_stream(TAR_LAYER_CONTENT_TYPE, None);
551565
split(&mut tar_cursor, &mut writer).unwrap();
552566
let object_id = writer.done().unwrap();
553567

554568
// Read back entries and compare with original headers
555-
let mut reader: SplitStreamReader<std::fs::File, Sha256HashValue> =
556-
SplitStreamReader::new(repo.open_object(&object_id).unwrap().into()).unwrap();
569+
let mut reader: SplitStreamReader<std::fs::File, Sha256HashValue> = SplitStreamReader::new(
570+
repo.open_object(&object_id).unwrap().into(),
571+
Some(TAR_LAYER_CONTENT_TYPE),
572+
)
573+
.unwrap();
557574
let mut entries = Vec::new();
558575

559576
while let Some(entry) = get_entry(&mut reader).unwrap() {

crates/composefs/src/fsverity/hashvalue.rs

Lines changed: 28 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -8,6 +8,7 @@ use core::{fmt, hash::Hash};
88

99
use hex::FromHexError;
1010
use sha2::{digest::FixedOutputReset, digest::Output, Digest, Sha256, Sha512};
11+
use std::cmp::Ord;
1112
use zerocopy::{FromBytes, Immutable, IntoBytes, KnownLayout, Unaligned};
1213

1314
/// Trait for fs-verity hash value types supporting SHA-256 and SHA-512.
@@ -22,6 +23,7 @@ where
2223
Self: Hash + Eq,
2324
Self: fmt::Debug,
2425
Self: Send + Sync + Unpin + 'static,
26+
Self: PartialOrd + Ord,
2527
{
2628
/// The underlying hash digest algorithm type.
2729
type Digest: Digest + FixedOutputReset + fmt::Debug;
@@ -160,7 +162,19 @@ impl fmt::Debug for Sha512HashValue {
160162
/// A SHA-256 hash value for fs-verity operations.
161163
///
162164
/// This is a 32-byte hash value using the SHA-256 algorithm.
163-
#[derive(Clone, Eq, FromBytes, Hash, Immutable, IntoBytes, KnownLayout, PartialEq, Unaligned)]
165+
#[derive(
166+
Clone,
167+
Eq,
168+
FromBytes,
169+
Hash,
170+
Immutable,
171+
IntoBytes,
172+
KnownLayout,
173+
PartialEq,
174+
Unaligned,
175+
PartialOrd,
176+
Ord,
177+
)]
164178
#[repr(C)]
165179
pub struct Sha256HashValue([u8; 32]);
166180

@@ -180,7 +194,19 @@ impl FsVerityHashValue for Sha256HashValue {
180194
/// A SHA-512 hash value for fs-verity operations.
181195
///
182196
/// This is a 64-byte hash value using the SHA-512 algorithm.
183-
#[derive(Clone, Eq, FromBytes, Hash, Immutable, IntoBytes, KnownLayout, PartialEq, Unaligned)]
197+
#[derive(
198+
Clone,
199+
Eq,
200+
FromBytes,
201+
Hash,
202+
Immutable,
203+
IntoBytes,
204+
KnownLayout,
205+
PartialEq,
206+
Unaligned,
207+
PartialOrd,
208+
Ord,
209+
)]
184210
#[repr(C)]
185211
pub struct Sha512HashValue([u8; 64]);
186212

0 commit comments

Comments
 (0)