From 30b8ac55ef937d7769fef50d41cbe4f8fd241847 Mon Sep 17 00:00:00 2001 From: Ruediger Klaehn Date: Wed, 17 Sep 2025 10:07:12 +0300 Subject: [PATCH 1/3] Fix double external path export bug We don't want to move the file from an external location, but copy if external! --- src/store/fs.rs | 67 +++++++++++++++++++++++++++++-------------------- 1 file changed, 40 insertions(+), 27 deletions(-) diff --git a/src/store/fs.rs b/src/store/fs.rs index b64244a31..c62c647cb 100644 --- a/src/store/fs.rs +++ b/src/store/fs.rs @@ -1239,18 +1239,21 @@ async fn export_path_impl( } }; trace!("exporting {} to {}", cmd.hash.to_hex(), target.display()); - let data = match data_location { - DataLocation::Inline(data) => MemOrFile::Mem(data), - DataLocation::Owned(size) => { - MemOrFile::File((ctx.options().path.data_path(&cmd.hash), size)) - } - DataLocation::External(paths, size) => MemOrFile::File(( - paths - .into_iter() - .next() - .ok_or_else(|| io::Error::new(io::ErrorKind::NotFound, "no external data path"))?, - size, - )), + let (data, external) = match data_location { + DataLocation::Inline(data) => (MemOrFile::Mem(data), false), + DataLocation::Owned(size) => ( + MemOrFile::File((ctx.options().path.data_path(&cmd.hash), size)), + false, + ), + DataLocation::External(paths, size) => ( + MemOrFile::File(( + paths.into_iter().next().ok_or_else(|| { + io::Error::new(io::ErrorKind::NotFound, "no external data path") + })?, + size, + )), + true, + ), }; let size = match &data { MemOrFile::Mem(data) => data.len() as u64, @@ -1274,24 +1277,34 @@ async fn export_path_impl( ); } ExportMode::TryReference => { - match std::fs::rename(&source_path, &target) { - Ok(()) => {} - Err(cause) => { - const ERR_CROSS: i32 = 18; - if cause.raw_os_error() == Some(ERR_CROSS) { - let source = fs::File::open(&source_path)?; - let mut target = fs::File::create(&target)?; - copy_with_progress(&source, size, &mut target, tx).await?; - } else { - return Err(cause.into()); + if external { + let res = + reflink_or_copy_with_progress(&source_path, &target, size, tx).await?; + trace!( + "exported {} also to {}, {res:?}", + source_path.display(), + target.display() + ); + } else { + match std::fs::rename(&source_path, &target) { + Ok(()) => {} + Err(cause) => { + const ERR_CROSS: i32 = 18; + if cause.raw_os_error() == Some(ERR_CROSS) { + let source = fs::File::open(&source_path)?; + let mut target = fs::File::create(&target)?; + copy_with_progress(&source, size, &mut target, tx).await?; + } else { + return Err(cause.into()); + } } } + ctx.set(EntryState::Complete { + data_location: DataLocation::External(vec![target], size), + outboard_location, + }) + .await?; } - ctx.set(EntryState::Complete { - data_location: DataLocation::External(vec![target], size), - outboard_location, - }) - .await?; } }, } From cac182e092e034be0a9c4545d780872f06cbdb0e Mon Sep 17 00:00:00 2001 From: Ruediger Klaehn Date: Wed, 17 Sep 2025 10:17:04 +0300 Subject: [PATCH 2/3] fix: properly handle the case where a file is already external. We don't want to move, but to copy in that case. We also update the db, but limit the max number of external paths to MAX_EXTERNAL_PATHS This fixes a regression from blobs 0.35 regarding ExportMode::TryReference --- src/store/fs.rs | 42 +++++++++++++++++++++++++++--------------- 1 file changed, 27 insertions(+), 15 deletions(-) diff --git a/src/store/fs.rs b/src/store/fs.rs index c62c647cb..0e03308a5 100644 --- a/src/store/fs.rs +++ b/src/store/fs.rs @@ -152,6 +152,9 @@ use crate::{ HashAndFormat, }; +/// Maximum number of external paths we track per blob. +const MAX_EXTERNAL_PATHS: usize = 8; + /// Create a 16 byte unique ID. fn new_uuid() -> [u8; 16] { use rand::RngCore; @@ -1239,20 +1242,20 @@ async fn export_path_impl( } }; trace!("exporting {} to {}", cmd.hash.to_hex(), target.display()); - let (data, external) = match data_location { - DataLocation::Inline(data) => (MemOrFile::Mem(data), false), + let (data, mut external) = match data_location { + DataLocation::Inline(data) => (MemOrFile::Mem(data), vec![]), DataLocation::Owned(size) => ( MemOrFile::File((ctx.options().path.data_path(&cmd.hash), size)), - false, + vec![], ), DataLocation::External(paths, size) => ( MemOrFile::File(( - paths.into_iter().next().ok_or_else(|| { + paths.iter().cloned().next().ok_or_else(|| { io::Error::new(io::ErrorKind::NotFound, "no external data path") })?, size, )), - true, + paths, ), }; let size = match &data { @@ -1277,7 +1280,9 @@ async fn export_path_impl( ); } ExportMode::TryReference => { - if external { + if !external.is_empty() { + // the file already exists externally, so we need to copy it. + // if the OS supports reflink, we might as well use that. let res = reflink_or_copy_with_progress(&source_path, &target, size, tx).await?; trace!( @@ -1285,26 +1290,33 @@ async fn export_path_impl( source_path.display(), target.display() ); + external.push(target); + external.sort(); + external.dedup(); + external.truncate(MAX_EXTERNAL_PATHS); } else { + // the file was previously owned, so we can just move it. + // if that fails with ERR_CROSS, we fall back to copy. match std::fs::rename(&source_path, &target) { Ok(()) => {} Err(cause) => { const ERR_CROSS: i32 = 18; if cause.raw_os_error() == Some(ERR_CROSS) { - let source = fs::File::open(&source_path)?; - let mut target = fs::File::create(&target)?; - copy_with_progress(&source, size, &mut target, tx).await?; + reflink_or_copy_with_progress(&source_path, &target, size, tx) + .await?; + // todo: delete file at source_path } else { return Err(cause.into()); } } } - ctx.set(EntryState::Complete { - data_location: DataLocation::External(vec![target], size), - outboard_location, - }) - .await?; - } + external.push(target); + }; + ctx.set(EntryState::Complete { + data_location: DataLocation::External(external, size), + outboard_location, + }) + .await?; } }, } From 530f86360d581b31afd8a828fc9486082fb1ac12 Mon Sep 17 00:00:00 2001 From: Ruediger Klaehn Date: Wed, 17 Sep 2025 10:53:29 +0300 Subject: [PATCH 3/3] Add comment about file deletion! also clippy --- src/store/fs.rs | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/store/fs.rs b/src/store/fs.rs index 0e03308a5..e7f16387e 100644 --- a/src/store/fs.rs +++ b/src/store/fs.rs @@ -1250,7 +1250,7 @@ async fn export_path_impl( ), DataLocation::External(paths, size) => ( MemOrFile::File(( - paths.iter().cloned().next().ok_or_else(|| { + paths.first().cloned().ok_or_else(|| { io::Error::new(io::ErrorKind::NotFound, "no external data path") })?, size, @@ -1304,7 +1304,6 @@ async fn export_path_impl( if cause.raw_os_error() == Some(ERR_CROSS) { reflink_or_copy_with_progress(&source_path, &target, size, tx) .await?; - // todo: delete file at source_path } else { return Err(cause.into()); } @@ -1312,6 +1311,7 @@ async fn export_path_impl( } external.push(target); }; + // setting the new entry state will also take care of deleting the owned data file! ctx.set(EntryState::Complete { data_location: DataLocation::External(external, size), outboard_location,