From 9c99b177ae0f4b60bd2c3a8ff03f16b9c251c1f9 Mon Sep 17 00:00:00 2001 From: Ruediger Klaehn Date: Wed, 3 Sep 2025 10:54:37 +0200 Subject: [PATCH 1/6] Feature gate redb based store. --- Cargo.toml | 7 ++++--- src/api/blobs/reader.rs | 1 + src/api/remote.rs | 1 + src/hash.rs | 6 +++--- src/lib.rs | 1 + src/store/mod.rs | 1 + src/store/util.rs | 1 + 7 files changed, 12 insertions(+), 6 deletions(-) diff --git a/Cargo.toml b/Cargo.toml index bcd5f42d0..2c6d8754a 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -21,7 +21,6 @@ quinn = { package = "iroh-quinn", version = "0.14.0" } n0-future = "0.2.0" n0-snafu = "0.2.0" range-collections = { version = "0.4.6", features = ["serde"] } -redb = { version = "=2.4" } smallvec = { version = "1", features = ["serde", "const_new"] } snafu = "0.8.5" tokio = { version = "1.43.0", features = ["full"] } @@ -41,9 +40,10 @@ iroh = "0.91.1" self_cell = "1.1.0" genawaiter = { version = "0.99.1", features = ["futures03"] } iroh-base = "0.91.1" -reflink-copy = "0.1.24" irpc = { version = "0.7.0", features = ["rpc", "quinn_endpoint_setup", "spans", "stream", "derive"], default-features = false } iroh-metrics = { version = "0.35" } +redb = { version = "=2.4", optional = true } +reflink-copy = { version = "0.1.24", optional = true } [dev-dependencies] clap = { version = "4.5.31", features = ["derive"] } @@ -64,7 +64,8 @@ iroh = { version = "0.91.1", features = ["discovery-local-network"]} [features] hide-proto-docs = [] metrics = [] -default = ["hide-proto-docs"] +default = ["hide-proto-docs", "fs-store"] +fs-store = ["dep:redb", "dep:reflink-copy"] [patch.crates-io] iroh = { git = "https://github.com/n0-computer/iroh", branch = "main" } diff --git a/src/api/blobs/reader.rs b/src/api/blobs/reader.rs index 9e337dae1..5077c2632 100644 --- a/src/api/blobs/reader.rs +++ b/src/api/blobs/reader.rs @@ -214,6 +214,7 @@ impl tokio::io::AsyncSeek for BlobReader { } #[cfg(test)] +#[cfg(feature = "fs-store")] mod tests { use bao_tree::ChunkRanges; use testresult::TestResult; diff --git a/src/api/remote.rs b/src/api/remote.rs index 623200900..5eb64c24b 100644 --- a/src/api/remote.rs +++ b/src/api/remote.rs @@ -1061,6 +1061,7 @@ where } #[cfg(test)] +#[cfg(feature = "fs-store")] mod tests { use bao_tree::{ChunkNum, ChunkRanges}; use testresult::TestResult; diff --git a/src/hash.rs b/src/hash.rs index 006f4a9d8..22fe333d4 100644 --- a/src/hash.rs +++ b/src/hash.rs @@ -283,7 +283,7 @@ impl From for HashAndFormat { } } -// #[cfg(feature = "redb")] +#[cfg(feature = "fs-store")] mod redb_support { use postcard::experimental::max_size::MaxSize; use redb::{Key as RedbKey, Value as RedbValue}; @@ -493,7 +493,7 @@ mod tests { assert_eq_hex!(serialized, expected); } - // #[cfg(feature = "redb")] + #[cfg(feature = "fs-store")] #[test] fn hash_redb() { use redb::Value as RedbValue; @@ -518,7 +518,7 @@ mod tests { assert_eq_hex!(serialized, expected); } - // #[cfg(feature = "redb")] + #[cfg(feature = "fs-store")] #[test] fn hash_and_format_redb() { use redb::Value as RedbValue; diff --git a/src/lib.rs b/src/lib.rs index 521ba4f7f..cc9a35f78 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -46,6 +46,7 @@ pub mod test; pub mod util; #[cfg(test)] +#[cfg(feature = "fs-store")] mod tests; pub use protocol::ALPN; diff --git a/src/store/mod.rs b/src/store/mod.rs index 3e1a3748f..df20aff63 100644 --- a/src/store/mod.rs +++ b/src/store/mod.rs @@ -5,6 +5,7 @@ //! for when you want to efficiently share more than the available memory and //! have access to a writeable filesystem. use bao_tree::BlockSize; +#[cfg(feature = "fs-store")] pub mod fs; pub mod mem; pub mod readonly_mem; diff --git a/src/store/util.rs b/src/store/util.rs index 240ad233f..1b44cf945 100644 --- a/src/store/util.rs +++ b/src/store/util.rs @@ -138,6 +138,7 @@ pub(crate) fn get_limited_slice(bytes: &Bytes, offset: u64, len: usize) -> Bytes bytes.slice(limited_range(offset, len, bytes.len())) } +#[cfg(feature = "fs-store")] mod redb_support { use bytes::Bytes; use redb::{Key as RedbKey, Value as RedbValue}; From b701107fb20d45078fbd26e5d4d7744adea81c31 Mon Sep 17 00:00:00 2001 From: Ruediger Klaehn Date: Wed, 3 Sep 2025 11:07:36 +0200 Subject: [PATCH 2/6] more feature gating ugliness --- src/api/downloader.rs | 1 + src/store/util.rs | 4 +++- src/util.rs | 1 + tests/blobs.rs | 1 + tests/tags.rs | 1 + 5 files changed, 7 insertions(+), 1 deletion(-) diff --git a/src/api/downloader.rs b/src/api/downloader.rs index a2abbd7ea..ffdfd2782 100644 --- a/src/api/downloader.rs +++ b/src/api/downloader.rs @@ -524,6 +524,7 @@ impl ContentDiscovery for Shuffled { } #[cfg(test)] +#[cfg(feature = "fs-store")] mod tests { use std::ops::Deref; diff --git a/src/store/util.rs b/src/store/util.rs index 1b44cf945..31c1de4d0 100644 --- a/src/store/util.rs +++ b/src/store/util.rs @@ -12,10 +12,12 @@ use bao_tree::{blake3, io::mixed::EncodedItem}; use bytes::Bytes; use derive_more::{From, Into}; +#[cfg(feature = "fs-store")] mod mem_or_file; +#[cfg(feature = "fs-store")] +pub use mem_or_file::{FixedSize, MemOrFile}; mod sparse_mem_file; use irpc::channel::mpsc; -pub use mem_or_file::{FixedSize, MemOrFile}; use range_collections::{range_set::RangeSetEntry, RangeSetRef}; use ref_cast::RefCast; use serde::{de::DeserializeOwned, Deserialize, Serialize}; diff --git a/src/util.rs b/src/util.rs index 3fdaacbca..954a89354 100644 --- a/src/util.rs +++ b/src/util.rs @@ -329,6 +329,7 @@ pub(crate) mod outboard_with_progress { } #[cfg(test)] + #[cfg(feature = "fs-store")] mod tests { use bao_tree::{ blake3, diff --git a/tests/blobs.rs b/tests/blobs.rs index dcb8118dc..92ba46f7c 100644 --- a/tests/blobs.rs +++ b/tests/blobs.rs @@ -1,3 +1,4 @@ +#![cfg(feature = "fs-store")] use std::{ net::{Ipv4Addr, SocketAddr, SocketAddrV4}, ops::Deref, diff --git a/tests/tags.rs b/tests/tags.rs index 3864bc545..5fe929488 100644 --- a/tests/tags.rs +++ b/tests/tags.rs @@ -1,3 +1,4 @@ +#![cfg(feature = "fs-store")] use std::{ net::{Ipv4Addr, SocketAddr, SocketAddrV4}, ops::Deref, From 7208efadb40d981ad85c3512a54b6eae414345b2 Mon Sep 17 00:00:00 2001 From: Ruediger Klaehn Date: Wed, 3 Sep 2025 11:34:32 +0200 Subject: [PATCH 3/6] Restructure some stuff to reduce feature flags --- src/api/blobs.rs | 4 + src/api/proto.rs | 1 + src/api/proto/bitfield.rs | 1 + src/store/fs.rs | 7 +- src/store/fs/bao_file.rs | 3 +- src/store/fs/meta.rs | 7 +- src/store/fs/meta/tables.rs | 3 +- src/store/mem.rs | 3 +- src/store/mod.rs | 2 - src/store/util.rs | 372 +++++++++++++++++++----------------- src/util.rs | 2 +- src/util/channel.rs | 1 + 12 files changed, 213 insertions(+), 193 deletions(-) diff --git a/src/api/blobs.rs b/src/api/blobs.rs index d0b948598..3cab4b198 100644 --- a/src/api/blobs.rs +++ b/src/api/blobs.rs @@ -144,6 +144,8 @@ impl Blobs { /// clears the protections before. /// /// Users should rely only on garbage collection for blob deletion. + + #[allow(dead_code)] pub(crate) async fn delete_with_opts(&self, options: DeleteOptions) -> RequestResult<()> { trace!("{options:?}"); self.client.rpc(options).await??; @@ -151,6 +153,7 @@ impl Blobs { } /// See [`Self::delete_with_opts`]. + #[allow(dead_code)] pub(crate) async fn delete( &self, hashes: impl IntoIterator>, @@ -510,6 +513,7 @@ impl Blobs { } } + #[allow(dead_code)] pub(crate) async fn clear_protected(&self) -> RequestResult<()> { let msg = ClearProtectedRequest; self.client.rpc(msg).await??; diff --git a/src/api/proto.rs b/src/api/proto.rs index 8b3780bd7..502215edd 100644 --- a/src/api/proto.rs +++ b/src/api/proto.rs @@ -40,6 +40,7 @@ pub use bitfield::Bitfield; use crate::{store::util::Tag, util::temp_tag::TempTag, BlobFormat, Hash, HashAndFormat}; +#[allow(dead_code)] pub(crate) trait HashSpecific { fn hash(&self) -> Hash; diff --git a/src/api/proto/bitfield.rs b/src/api/proto/bitfield.rs index d3ccca66b..2e1144b10 100644 --- a/src/api/proto/bitfield.rs +++ b/src/api/proto/bitfield.rs @@ -70,6 +70,7 @@ impl<'de> Deserialize<'de> for Bitfield { } impl Bitfield { + #[cfg(feature = "fs-store")] pub(crate) fn new_unchecked(ranges: ChunkRanges, size: u64) -> Self { Self { ranges, size } } diff --git a/src/store/fs.rs b/src/store/fs.rs index 2eb21b312..645b4548f 100644 --- a/src/store/fs.rs +++ b/src/store/fs.rs @@ -121,12 +121,13 @@ use crate::{ util::entity_manager::{self, ActiveEntityState}, }, util::{BaoTreeSender, FixedSize, MemOrFile, ValueOrPoisioned}, - Hash, IROH_BLOCK_SIZE, + IROH_BLOCK_SIZE, }, util::{ channel::oneshot, temp_tag::{TagDrop, TempTag, TempTagScope, TempTags}, }, + Hash, }; mod bao_file; use bao_file::BaoFileHandle; @@ -142,7 +143,7 @@ use options::Options; use tracing::Instrument; mod gc; -use super::HashAndFormat; +use crate::HashAndFormat; use crate::api::{ self, blobs::{AddProgressItem, ExportMode, ExportProgressItem}, @@ -1477,7 +1478,7 @@ pub mod tests { api::blobs::Bitfield, store::{ util::{read_checksummed, SliceInfoExt, Tag}, - HashAndFormat, IROH_BLOCK_SIZE, + IROH_BLOCK_SIZE, }, }; diff --git a/src/store/fs/bao_file.rs b/src/store/fs/bao_file.rs index 63d2402c3..3b09f8daf 100644 --- a/src/store/fs/bao_file.rs +++ b/src/store/fs/bao_file.rs @@ -35,8 +35,9 @@ use crate::{ read_checksummed_and_truncate, write_checksummed, FixedSize, MemOrFile, PartialMemStorage, DD, }, - Hash, IROH_BLOCK_SIZE, + IROH_BLOCK_SIZE, }, + Hash, }; /// Storage for complete blobs. There is no longer any uncertainty about the diff --git a/src/store/fs/meta.rs b/src/store/fs/meta.rs index 21fbd9ed4..ebe162b26 100644 --- a/src/store/fs/meta.rs +++ b/src/store/fs/meta.rs @@ -26,9 +26,8 @@ use crate::{ CreateTagRequest, DeleteBlobsMsg, DeleteTagsRequest, ListBlobsMsg, ListRequest, ListTagsRequest, RenameTagRequest, SetTagRequest, ShutdownMsg, SyncDbMsg, }, - tags::TagInfo, - }, - util::channel::oneshot, + tags::TagInfo, Tag, + }, util::channel::oneshot, Hash }; mod proto; pub use proto::*; @@ -43,7 +42,7 @@ use super::{ util::PeekableReceiver, BaoFilePart, }; -use crate::store::{util::Tag, Hash, IROH_BLOCK_SIZE}; +use crate::store::IROH_BLOCK_SIZE; /// Error type for message handler functions of the redb actor. /// diff --git a/src/store/fs/meta/tables.rs b/src/store/fs/meta/tables.rs index a983a275a..ac15e9ccd 100644 --- a/src/store/fs/meta/tables.rs +++ b/src/store/fs/meta/tables.rs @@ -2,7 +2,8 @@ use redb::{ReadableTable, TableDefinition, TableError}; use super::EntryState; -use crate::store::{fs::delete_set::FileTransaction, util::Tag, Hash, HashAndFormat}; +use crate::store::{fs::delete_set::FileTransaction}; +use crate::{Hash, HashAndFormat, api::Tag}; pub(super) const BLOBS_TABLE: TableDefinition = TableDefinition::new("blobs-0"); diff --git a/src/store/mem.rs b/src/store/mem.rs index 8a2a227b7..5fd4be918 100644 --- a/src/store/mem.rs +++ b/src/store/mem.rs @@ -59,8 +59,9 @@ use crate::{ protocol::ChunkRangesExt, store::{ util::{SizeInfo, SparseMemFile, Tag}, - HashAndFormat, IROH_BLOCK_SIZE, + IROH_BLOCK_SIZE, }, + HashAndFormat, util::temp_tag::{TagDrop, TempTagScope, TempTags}, BlobFormat, Hash, }; diff --git a/src/store/mod.rs b/src/store/mod.rs index df20aff63..4fdb30606 100644 --- a/src/store/mod.rs +++ b/src/store/mod.rs @@ -12,7 +12,5 @@ pub mod readonly_mem; mod test; pub(crate) mod util; -use crate::hash::{Hash, HashAndFormat}; - /// Block size used by iroh, 2^4*1024 = 16KiB pub const IROH_BLOCK_SIZE: BlockSize = BlockSize::from_chunk_log(4); diff --git a/src/store/util.rs b/src/store/util.rs index 31c1de4d0..fe0c94683 100644 --- a/src/store/util.rs +++ b/src/store/util.rs @@ -1,26 +1,14 @@ -use std::{ - borrow::Borrow, - fmt, - fs::{File, OpenOptions}, - io::{self, Read, Write}, - path::Path, - time::SystemTime, -}; - -use arrayvec::ArrayString; -use bao_tree::{blake3, io::mixed::EncodedItem}; +use std::{borrow::Borrow, fmt, time::SystemTime}; + +use bao_tree::io::mixed::EncodedItem; use bytes::Bytes; use derive_more::{From, Into}; -#[cfg(feature = "fs-store")] -mod mem_or_file; -#[cfg(feature = "fs-store")] -pub use mem_or_file::{FixedSize, MemOrFile}; mod sparse_mem_file; use irpc::channel::mpsc; use range_collections::{range_set::RangeSetEntry, RangeSetRef}; use ref_cast::RefCast; -use serde::{de::DeserializeOwned, Deserialize, Serialize}; +use serde::{Deserialize, Serialize}; pub use sparse_mem_file::SparseMemFile; pub mod observer; mod size_info; @@ -28,6 +16,11 @@ pub use size_info::SizeInfo; mod partial_mem_storage; pub use partial_mem_storage::PartialMemStorage; +#[cfg(feature = "fs-store")] +mod mem_or_file; +#[cfg(feature = "fs-store")] +pub use mem_or_file::{FixedSize, MemOrFile}; + /// A named, persistent tag. #[derive(Serialize, Deserialize, Clone, PartialEq, Eq, PartialOrd, Ord, From, Into)] pub struct Tag(pub Bytes); @@ -140,49 +133,6 @@ pub(crate) fn get_limited_slice(bytes: &Bytes, offset: u64, len: usize) -> Bytes bytes.slice(limited_range(offset, len, bytes.len())) } -#[cfg(feature = "fs-store")] -mod redb_support { - use bytes::Bytes; - use redb::{Key as RedbKey, Value as RedbValue}; - - use super::Tag; - - impl RedbValue for Tag { - type SelfType<'a> = Self; - - type AsBytes<'a> = bytes::Bytes; - - fn fixed_width() -> Option { - None - } - - fn from_bytes<'a>(data: &'a [u8]) -> Self::SelfType<'a> - where - Self: 'a, - { - Self(Bytes::copy_from_slice(data)) - } - - fn as_bytes<'a, 'b: 'a>(value: &'a Self::SelfType<'b>) -> Self::AsBytes<'a> - where - Self: 'a, - Self: 'b, - { - value.0.clone() - } - - fn type_name() -> redb::TypeName { - redb::TypeName::new("Tag") - } - } - - impl RedbKey for Tag { - fn compare(data1: &[u8], data2: &[u8]) -> std::cmp::Ordering { - data1.cmp(data2) - } - } -} - pub trait RangeSetExt { fn upper_bound(&self) -> Option; } @@ -201,161 +151,223 @@ impl RangeSetExt for RangeSetRef { } } -pub fn write_checksummed, T: Serialize>(path: P, data: &T) -> io::Result<()> { - // Build Vec with space for hash - let mut buffer = Vec::with_capacity(32 + 128); - buffer.extend_from_slice(&[0u8; 32]); +#[cfg(feature = "fs-store")] +mod fs { + use std::{ + fmt, fs::{File, OpenOptions}, io::{self, Read, Write}, path::Path + }; - // Serialize directly into buffer - postcard::to_io(data, &mut buffer).map_err(io::Error::other)?; + use arrayvec::ArrayString; + use bao_tree::blake3; + use serde::{de::DeserializeOwned, Serialize}; - // Compute hash over data (skip first 32 bytes) - let data_slice = &buffer[32..]; - let hash = blake3::hash(data_slice); - buffer[..32].copy_from_slice(hash.as_bytes()); + mod redb_support { + use bytes::Bytes; + use redb::{Key as RedbKey, Value as RedbValue}; - // Write all at once - let mut file = File::create(&path)?; - file.write_all(&buffer)?; - file.sync_all()?; + use super::super::Tag; - Ok(()) -} + impl RedbValue for Tag { + type SelfType<'a> = Self; -pub fn read_checksummed_and_truncate(path: impl AsRef) -> io::Result { - let path = path.as_ref(); - let mut file = OpenOptions::new() - .read(true) - .write(true) - .truncate(false) - .open(path)?; - let mut buffer = Vec::new(); - file.read_to_end(&mut buffer)?; - file.set_len(0)?; - file.sync_all()?; - - if buffer.is_empty() { - return Err(io::Error::new( - io::ErrorKind::InvalidData, - "File marked dirty", - )); - } + type AsBytes<'a> = bytes::Bytes; - if buffer.len() < 32 { - return Err(io::Error::new(io::ErrorKind::InvalidData, "File too short")); - } + fn fixed_width() -> Option { + None + } - let stored_hash = &buffer[..32]; - let data = &buffer[32..]; + fn from_bytes<'a>(data: &'a [u8]) -> Self::SelfType<'a> + where + Self: 'a, + { + Self(Bytes::copy_from_slice(data)) + } + + fn as_bytes<'a, 'b: 'a>(value: &'a Self::SelfType<'b>) -> Self::AsBytes<'a> + where + Self: 'a, + Self: 'b, + { + value.0.clone() + } + + fn type_name() -> redb::TypeName { + redb::TypeName::new("Tag") + } + } - let computed_hash = blake3::hash(data); - if computed_hash.as_bytes() != stored_hash { - return Err(io::Error::new(io::ErrorKind::InvalidData, "Hash mismatch")); + impl RedbKey for Tag { + fn compare(data1: &[u8], data2: &[u8]) -> std::cmp::Ordering { + data1.cmp(data2) + } + } } - let deserialized = postcard::from_bytes(data).map_err(io::Error::other)?; + pub fn write_checksummed, T: Serialize>(path: P, data: &T) -> io::Result<()> { + // Build Vec with space for hash + let mut buffer = Vec::with_capacity(32 + 128); + buffer.extend_from_slice(&[0u8; 32]); - Ok(deserialized) -} + // Serialize directly into buffer + postcard::to_io(data, &mut buffer).map_err(io::Error::other)?; -#[cfg(test)] -pub fn read_checksummed(path: impl AsRef) -> io::Result { - use tracing::info; - - let path = path.as_ref(); - let mut file = File::open(path)?; - let mut buffer = Vec::new(); - file.read_to_end(&mut buffer)?; - info!("{} {}", path.display(), hex::encode(&buffer)); - - if buffer.is_empty() { - return Err(io::Error::new( - io::ErrorKind::InvalidData, - "File marked dirty", - )); - } + // Compute hash over data (skip first 32 bytes) + let data_slice = &buffer[32..]; + let hash = blake3::hash(data_slice); + buffer[..32].copy_from_slice(hash.as_bytes()); - if buffer.len() < 32 { - return Err(io::Error::new(io::ErrorKind::InvalidData, "File too short")); + // Write all at once + let mut file = File::create(&path)?; + file.write_all(&buffer)?; + file.sync_all()?; + + Ok(()) } - let stored_hash = &buffer[..32]; - let data = &buffer[32..]; + pub fn read_checksummed_and_truncate( + path: impl AsRef, + ) -> io::Result { + let path = path.as_ref(); + let mut file = OpenOptions::new() + .read(true) + .write(true) + .truncate(false) + .open(path)?; + let mut buffer = Vec::new(); + file.read_to_end(&mut buffer)?; + file.set_len(0)?; + file.sync_all()?; + + if buffer.is_empty() { + return Err(io::Error::new( + io::ErrorKind::InvalidData, + "File marked dirty", + )); + } - let computed_hash = blake3::hash(data); - if computed_hash.as_bytes() != stored_hash { - return Err(io::Error::new(io::ErrorKind::InvalidData, "Hash mismatch")); - } + if buffer.len() < 32 { + return Err(io::Error::new(io::ErrorKind::InvalidData, "File too short")); + } - let deserialized = postcard::from_bytes(data).map_err(io::Error::other)?; + let stored_hash = &buffer[..32]; + let data = &buffer[32..]; - Ok(deserialized) -} + let computed_hash = blake3::hash(data); + if computed_hash.as_bytes() != stored_hash { + return Err(io::Error::new(io::ErrorKind::InvalidData, "Hash mismatch")); + } -/// Helper trait for bytes for debugging -pub trait SliceInfoExt: AsRef<[u8]> { - // get the addr of the actual data, to check if data was copied - fn addr(&self) -> usize; + let deserialized = postcard::from_bytes(data).map_err(io::Error::other)?; - // a short symbol string for the address - fn addr_short(&self) -> ArrayString<12> { - let addr = self.addr().to_le_bytes(); - symbol_string(&addr) + Ok(deserialized) } - #[allow(dead_code)] - fn hash_short(&self) -> ArrayString<10> { - crate::Hash::new(self.as_ref()).fmt_short() - } -} + #[cfg(test)] + pub fn read_checksummed(path: impl AsRef) -> io::Result { + use std::{fs::File, io::Read}; + + use bao_tree::blake3; + use tracing::info; + + let path = path.as_ref(); + let mut file = File::open(path)?; + let mut buffer = Vec::new(); + file.read_to_end(&mut buffer)?; + info!("{} {}", path.display(), hex::encode(&buffer)); + + if buffer.is_empty() { + use std::io; + + return Err(io::Error::new( + io::ErrorKind::InvalidData, + "File marked dirty", + )); + } + + if buffer.len() < 32 { + return Err(io::Error::new(io::ErrorKind::InvalidData, "File too short")); + } -impl> SliceInfoExt for T { - fn addr(&self) -> usize { - self.as_ref() as *const [u8] as *const u8 as usize + let stored_hash = &buffer[..32]; + let data = &buffer[32..]; + + let computed_hash = blake3::hash(data); + if computed_hash.as_bytes() != stored_hash { + return Err(io::Error::new(io::ErrorKind::InvalidData, "Hash mismatch")); + } + + let deserialized = postcard::from_bytes(data).map_err(io::Error::other)?; + + Ok(deserialized) } - fn hash_short(&self) -> ArrayString<10> { - crate::Hash::new(self.as_ref()).fmt_short() + /// Helper trait for bytes for debugging + pub trait SliceInfoExt: AsRef<[u8]> { + // get the addr of the actual data, to check if data was copied + fn addr(&self) -> usize; + + // a short symbol string for the address + fn addr_short(&self) -> ArrayString<12> { + let addr = self.addr().to_le_bytes(); + symbol_string(&addr) + } + + #[allow(dead_code)] + fn hash_short(&self) -> ArrayString<10> { + crate::Hash::new(self.as_ref()).fmt_short() + } } -} -pub fn symbol_string(data: &[u8]) -> ArrayString<12> { - const SYMBOLS: &[char] = &[ - '😀', '😂', '😍', '😎', '😢', '😡', '😱', '😴', '🤓', '🤔', '🤗', '🤢', '🤡', '🤖', '👽', - '👾', '👻', '💀', '💩', '♥', '💥', '💦', '💨', '💫', '💬', '💭', '💰', '💳', '💼', '📈', - '📉', '📍', '📢', '📦', '📱', '📷', '📺', '🎃', '🎄', '🎉', '🎋', '🎍', '🎒', '🎓', '🎖', - '🎤', '🎧', '🎮', '🎰', '🎲', '🎳', '🎴', '🎵', '🎷', '🎸', '🎹', '🎺', '🎻', '🎼', '🏀', - '🏁', '🏆', '🏈', - ]; - const BASE: usize = SYMBOLS.len(); // 64 - - // Hash the input with BLAKE3 - let hash = blake3::hash(data); - let bytes = hash.as_bytes(); // 32-byte hash - - // Create an ArrayString with capacity 12 (bytes) - let mut result = ArrayString::<12>::new(); - - // Fill with 3 symbols - for byte in bytes.iter().take(3) { - let byte = *byte as usize; - let index = byte % BASE; - result.push(SYMBOLS[index]); // Each char can be up to 4 bytes + impl> SliceInfoExt for T { + fn addr(&self) -> usize { + self.as_ref() as *const [u8] as *const u8 as usize + } + + fn hash_short(&self) -> ArrayString<10> { + crate::Hash::new(self.as_ref()).fmt_short() + } } - result -} + pub fn symbol_string(data: &[u8]) -> ArrayString<12> { + const SYMBOLS: &[char] = &[ + '😀', '😂', '😍', '😎', '😢', '😡', '😱', '😴', '🤓', '🤔', '🤗', '🤢', '🤡', '🤖', + '👽', '👾', '👻', '💀', '💩', '♥', '💥', '💦', '💨', '💫', '💬', '💭', '💰', '💳', + '💼', '📈', '📉', '📍', '📢', '📦', '📱', '📷', '📺', '🎃', '🎄', '🎉', '🎋', '🎍', + '🎒', '🎓', '🎖', '🎤', '🎧', '🎮', '🎰', '🎲', '🎳', '🎴', '🎵', '🎷', '🎸', '🎹', + '🎺', '🎻', '🎼', '🏀', '🏁', '🏆', '🏈', + ]; + const BASE: usize = SYMBOLS.len(); // 64 + + // Hash the input with BLAKE3 + let hash = blake3::hash(data); + let bytes = hash.as_bytes(); // 32-byte hash + + // Create an ArrayString with capacity 12 (bytes) + let mut result = ArrayString::<12>::new(); + + // Fill with 3 symbols + for byte in bytes.iter().take(3) { + let byte = *byte as usize; + let index = byte % BASE; + result.push(SYMBOLS[index]); // Each char can be up to 4 bytes + } -pub struct ValueOrPoisioned(pub Option); + result + } -impl fmt::Debug for ValueOrPoisioned { - fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { - match &self.0 { - Some(x) => x.fmt(f), - None => f.debug_tuple("Poisoned").finish(), + pub struct ValueOrPoisioned(pub Option); + + impl fmt::Debug for ValueOrPoisioned { + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + match &self.0 { + Some(x) => x.fmt(f), + None => f.debug_tuple("Poisoned").finish(), + } } } } +#[cfg(feature = "fs-store")] +pub use fs::*; /// Given a prefix, increment it lexographically. /// diff --git a/src/util.rs b/src/util.rs index 954a89354..59e366d81 100644 --- a/src/util.rs +++ b/src/util.rs @@ -214,6 +214,7 @@ pub(crate) mod serde { } } +#[cfg(feature = "fs-store")] pub(crate) mod outboard_with_progress { use std::io::{self, BufReader, Read}; @@ -329,7 +330,6 @@ pub(crate) mod outboard_with_progress { } #[cfg(test)] - #[cfg(feature = "fs-store")] mod tests { use bao_tree::{ blake3, diff --git a/src/util/channel.rs b/src/util/channel.rs index 248b0fb4f..dc8ad1d85 100644 --- a/src/util/channel.rs +++ b/src/util/channel.rs @@ -1,3 +1,4 @@ +#[cfg(feature = "fs-store")] pub mod oneshot { use std::{ future::Future, From a89ec2cbca4010d5c9dc5fa52dd433bc40efba28 Mon Sep 17 00:00:00 2001 From: Ruediger Klaehn Date: Wed, 3 Sep 2025 11:36:46 +0200 Subject: [PATCH 4/6] fmt --- src/api/blobs.rs | 1 - src/store/fs.rs | 12 +++++++----- src/store/fs/meta.rs | 7 +++++-- src/store/fs/meta/tables.rs | 3 +-- src/store/mem.rs | 3 +-- src/store/util.rs | 5 ++++- 6 files changed, 18 insertions(+), 13 deletions(-) diff --git a/src/api/blobs.rs b/src/api/blobs.rs index 3cab4b198..63e44b141 100644 --- a/src/api/blobs.rs +++ b/src/api/blobs.rs @@ -144,7 +144,6 @@ impl Blobs { /// clears the protections before. /// /// Users should rely only on garbage collection for blob deletion. - #[allow(dead_code)] pub(crate) async fn delete_with_opts(&self, options: DeleteOptions) -> RequestResult<()> { trace!("{options:?}"); diff --git a/src/store/fs.rs b/src/store/fs.rs index 645b4548f..b64244a31 100644 --- a/src/store/fs.rs +++ b/src/store/fs.rs @@ -143,11 +143,13 @@ use options::Options; use tracing::Instrument; mod gc; -use crate::HashAndFormat; -use crate::api::{ - self, - blobs::{AddProgressItem, ExportMode, ExportProgressItem}, - Store, +use crate::{ + api::{ + self, + blobs::{AddProgressItem, ExportMode, ExportProgressItem}, + Store, + }, + HashAndFormat, }; /// Create a 16 byte unique ID. diff --git a/src/store/fs/meta.rs b/src/store/fs/meta.rs index ebe162b26..d71f15c20 100644 --- a/src/store/fs/meta.rs +++ b/src/store/fs/meta.rs @@ -26,8 +26,11 @@ use crate::{ CreateTagRequest, DeleteBlobsMsg, DeleteTagsRequest, ListBlobsMsg, ListRequest, ListTagsRequest, RenameTagRequest, SetTagRequest, ShutdownMsg, SyncDbMsg, }, - tags::TagInfo, Tag, - }, util::channel::oneshot, Hash + tags::TagInfo, + Tag, + }, + util::channel::oneshot, + Hash, }; mod proto; pub use proto::*; diff --git a/src/store/fs/meta/tables.rs b/src/store/fs/meta/tables.rs index ac15e9ccd..3695832eb 100644 --- a/src/store/fs/meta/tables.rs +++ b/src/store/fs/meta/tables.rs @@ -2,8 +2,7 @@ use redb::{ReadableTable, TableDefinition, TableError}; use super::EntryState; -use crate::store::{fs::delete_set::FileTransaction}; -use crate::{Hash, HashAndFormat, api::Tag}; +use crate::{api::Tag, store::fs::delete_set::FileTransaction, Hash, HashAndFormat}; pub(super) const BLOBS_TABLE: TableDefinition = TableDefinition::new("blobs-0"); diff --git a/src/store/mem.rs b/src/store/mem.rs index 5fd4be918..eccd1416b 100644 --- a/src/store/mem.rs +++ b/src/store/mem.rs @@ -61,9 +61,8 @@ use crate::{ util::{SizeInfo, SparseMemFile, Tag}, IROH_BLOCK_SIZE, }, - HashAndFormat, util::temp_tag::{TagDrop, TempTagScope, TempTags}, - BlobFormat, Hash, + BlobFormat, Hash, HashAndFormat, }; #[derive(Debug, Default)] diff --git a/src/store/util.rs b/src/store/util.rs index fe0c94683..7bc3a3227 100644 --- a/src/store/util.rs +++ b/src/store/util.rs @@ -154,7 +154,10 @@ impl RangeSetExt for RangeSetRef { #[cfg(feature = "fs-store")] mod fs { use std::{ - fmt, fs::{File, OpenOptions}, io::{self, Read, Write}, path::Path + fmt, + fs::{File, OpenOptions}, + io::{self, Read, Write}, + path::Path, }; use arrayvec::ArrayString; From 2a1d783a057075d08b37784d2e147bc0654d1380 Mon Sep 17 00:00:00 2001 From: Ruediger Klaehn Date: Wed, 3 Sep 2025 11:56:46 +0200 Subject: [PATCH 5/6] deny --- Cargo.lock | 48 +++++++++++++----------------------------------- 1 file changed, 13 insertions(+), 35 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index 4068354f7..988d7955a 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -2094,11 +2094,11 @@ checksum = "112b39cec0b298b6c1999fee3e31427f74f676e4cb9879ed1a121b43661a4154" [[package]] name = "matchers" -version = "0.1.0" +version = "0.2.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "8263075bb86c5a1b1427b5ae862e8889656f126e9f77c484496e8b47cf5c5558" +checksum = "d1525a2a28c7f4fa0fc98bb91ae755d1e2d1505079e05539e35bc876b5d65ae9" dependencies = [ - "regex-automata 0.1.10", + "regex-automata", ] [[package]] @@ -2385,12 +2385,11 @@ dependencies = [ [[package]] name = "nu-ansi-term" -version = "0.46.0" +version = "0.50.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "77a8165726e8236064dbb45459242600304b42a5ea24ee2948e18e023bf7ba84" +checksum = "d4a28e057d01f97e61255210fcff094d74ed0466038633e95017f5beb68e4399" dependencies = [ - "overload", - "winapi", + "windows-sys 0.52.0", ] [[package]] @@ -2467,12 +2466,6 @@ version = "0.1.6" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "d05e27ee213611ffe7d6348b942e8f942b37114c00cc03cec254295a4a17852e" -[[package]] -name = "overload" -version = "0.1.1" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "b15813163c1d831bf4a13c3610c05c0d03b39feb07f7e09fa234dac9b15aaf39" - [[package]] name = "parking" version = "2.2.1" @@ -2907,7 +2900,7 @@ dependencies = [ "rand 0.9.2", "rand_chacha 0.9.0", "rand_xorshift", - "regex-syntax 0.8.5", + "regex-syntax", "rusty-fork", "tempfile", "unarray", @@ -3151,17 +3144,8 @@ checksum = "b544ef1b4eac5dc2db33ea63606ae9ffcfac26c1416a2806ae0bf5f56b201191" dependencies = [ "aho-corasick", "memchr", - "regex-automata 0.4.9", - "regex-syntax 0.8.5", -] - -[[package]] -name = "regex-automata" -version = "0.1.10" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "6c230d73fb8d8c1b9c0b3135c5142a8acee3a0558fb8db5cf1cb65f8d7862132" -dependencies = [ - "regex-syntax 0.6.29", + "regex-automata", + "regex-syntax", ] [[package]] @@ -3172,7 +3156,7 @@ checksum = "809e8dc61f6de73b46c85f4c96486310fe304c434cfa43669d7b40f711150908" dependencies = [ "aho-corasick", "memchr", - "regex-syntax 0.8.5", + "regex-syntax", ] [[package]] @@ -3181,12 +3165,6 @@ version = "0.1.6" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "53a49587ad06b26609c52e423de037e7f57f20d53535d66e08c695f347df952a" -[[package]] -name = "regex-syntax" -version = "0.6.29" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "f162c6dd7b008981e4d40210aca20b4bd0f9b60ca9271061b07f78537722f2e1" - [[package]] name = "regex-syntax" version = "0.8.5" @@ -4245,14 +4223,14 @@ dependencies = [ [[package]] name = "tracing-subscriber" -version = "0.3.19" +version = "0.3.20" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "e8189decb5ac0fa7bc8b96b7cb9b2701d60d48805aca84a238004d665fcc4008" +checksum = "2054a14f5307d601f88daf0553e1cbf472acc4f2c51afab632431cdcd72124d5" dependencies = [ "matchers", "nu-ansi-term", "once_cell", - "regex", + "regex-automata", "sharded-slab", "smallvec", "thread_local", From 1e83a0805ca42526a86ee5ad8f4ff6854ecfd323 Mon Sep 17 00:00:00 2001 From: Ruediger Klaehn Date: Thu, 11 Sep 2025 14:49:40 +0300 Subject: [PATCH 6/6] Add docs about the features, and replace dead_code with feature gating --- src/api/blobs.rs | 4 ++-- src/lib.rs | 5 +++++ 2 files changed, 7 insertions(+), 2 deletions(-) diff --git a/src/api/blobs.rs b/src/api/blobs.rs index 63e44b141..cbd27bbac 100644 --- a/src/api/blobs.rs +++ b/src/api/blobs.rs @@ -144,7 +144,7 @@ impl Blobs { /// clears the protections before. /// /// Users should rely only on garbage collection for blob deletion. - #[allow(dead_code)] + #[cfg(feature = "fs-store")] pub(crate) async fn delete_with_opts(&self, options: DeleteOptions) -> RequestResult<()> { trace!("{options:?}"); self.client.rpc(options).await??; @@ -152,7 +152,7 @@ impl Blobs { } /// See [`Self::delete_with_opts`]. - #[allow(dead_code)] + #[cfg(feature = "fs-store")] pub(crate) async fn delete( &self, hashes: impl IntoIterator>, diff --git a/src/lib.rs b/src/lib.rs index cc9a35f78..dddacd854 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -24,6 +24,11 @@ //! The [downloader](api::downloader) module provides a component to download blobs from //! multiple sources and store them in a store. //! +//! # Features: +//! +//! - `fs-store`: Enables the filesystem based store implementation. This comes with a few additional dependencies such as `redb` and `reflink-copy`. +//! - `metrics`: Enables prometheus metrics for stores and the protocol. +//! //! [BLAKE3]: https://github.com/BLAKE3-team/BLAKE3-specs/blob/master/blake3.pdf //! [iroh]: https://docs.rs/iroh mod hash;