Skip to content

Commit 11e8eec

Browse files
Merge pull request #118 from theseus-rs/refactor-extract-directories
refactor!: refactor archive extract directories
2 parents ffb94f6 + 6d5ec7d commit 11e8eec

File tree

16 files changed

+396
-245
lines changed

16 files changed

+396
-245
lines changed

Cargo.lock

Lines changed: 0 additions & 3 deletions
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

examples/zonky/src/main.rs

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -9,7 +9,7 @@ use postgresql_embedded::{PostgreSQL, Result, Settings};
99
async fn main() -> Result<()> {
1010
let settings = Settings {
1111
releases_url: zonky::URL.to_string(),
12-
version: VersionReq::parse("=16.2.0")?,
12+
version: VersionReq::parse("=16.3.0")?,
1313
..Default::default()
1414
};
1515
let mut postgresql = PostgreSQL::new(settings);

postgresql_archive/Cargo.toml

Lines changed: 4 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -12,7 +12,7 @@ version.workspace = true
1212
[dependencies]
1313
anyhow = { workspace = true }
1414
async-trait = { workspace = true }
15-
flate2 = { workspace = true, optional = true }
15+
flate2 = { workspace = true }
1616
hex = { workspace = true }
1717
http = { workspace = true }
1818
human_bytes = { workspace = true, default-features = false }
@@ -29,15 +29,15 @@ serde = { workspace = true, features = ["derive"] }
2929
serde_json = { workspace = true, optional = true }
3030
sha1 = { workspace = true, optional = true }
3131
sha2 = { workspace = true, optional = true }
32-
tar = { workspace = true, optional = true }
32+
tar = { workspace = true }
3333
target-triple = { workspace = true, optional = true }
3434
tempfile = { workspace = true }
3535
thiserror = { workspace = true }
3636
tokio = { workspace = true, features = ["full"], optional = true }
3737
tracing = { workspace = true, features = ["log"] }
3838
url = { workspace = true }
39-
xz2 = { workspace = true, optional = true }
40-
zip = { workspace = true, optional = true }
39+
xz2 = { workspace = true }
40+
zip = { workspace = true }
4141

4242
[dev-dependencies]
4343
criterion = { workspace = true }
@@ -66,17 +66,11 @@ rustls-tls = ["reqwest/rustls-tls-native-roots"]
6666
sha1 = ["dep:sha1"]
6767
sha2 = ["dep:sha2"]
6868
theseus = [
69-
"dep:flate2",
70-
"dep:tar",
7169
"dep:target-triple",
7270
"github",
7371
"sha2",
7472
]
7573
zonky = [
76-
"dep:flate2",
77-
"dep:tar",
78-
"dep:xz2",
79-
"dep:zip",
8074
"maven",
8175
]
8276

postgresql_archive/src/archive.rs

Lines changed: 4 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -3,6 +3,7 @@
33

44
use crate::error::Result;
55
use crate::{extractor, repository};
6+
use regex::Regex;
67
use semver::{Version, VersionReq};
78
use std::path::{Path, PathBuf};
89
use tracing::instrument;
@@ -43,7 +44,9 @@ pub async fn get_archive(url: &str, version_req: &VersionReq) -> Result<(Version
4344
#[instrument(skip(bytes))]
4445
pub async fn extract(url: &str, bytes: &Vec<u8>, out_dir: &Path) -> Result<Vec<PathBuf>> {
4546
let extractor_fn = extractor::registry::get(url)?;
46-
extractor_fn(bytes, out_dir)
47+
let mut extract_directories = extractor::ExtractDirectories::default();
48+
extract_directories.add_mapping(Regex::new(".*")?, out_dir.to_path_buf());
49+
extractor_fn(bytes, extract_directories)
4750
}
4851

4952
#[cfg(test)]

postgresql_archive/src/configuration/theseus/extractor.rs

Lines changed: 10 additions & 65 deletions
Original file line numberDiff line numberDiff line change
@@ -1,14 +1,11 @@
1+
use crate::extractor::{tar_gz_extract, ExtractDirectories};
12
use crate::Error::Unexpected;
23
use crate::Result;
3-
use flate2::bufread::GzDecoder;
4-
use human_bytes::human_bytes;
5-
use num_format::{Locale, ToFormattedString};
6-
use std::fs::{create_dir_all, remove_dir_all, remove_file, rename, File};
7-
use std::io::{copy, BufReader, Cursor};
4+
use regex::Regex;
5+
use std::fs::{create_dir_all, remove_dir_all, remove_file, rename};
86
use std::path::{Path, PathBuf};
97
use std::thread::sleep;
108
use std::time::Duration;
11-
use tar::Archive;
129
use tracing::{debug, instrument, warn};
1310

1411
/// Extracts the compressed tar `bytes` to the [out_dir](Path).
@@ -17,18 +14,14 @@ use tracing::{debug, instrument, warn};
1714
/// Returns an error if the extraction fails.
1815
#[allow(clippy::cast_precision_loss)]
1916
#[instrument(skip(bytes))]
20-
pub fn extract(bytes: &Vec<u8>, out_dir: &Path) -> Result<Vec<PathBuf>> {
21-
let mut files = Vec::new();
22-
let input = BufReader::new(Cursor::new(bytes));
23-
let decoder = GzDecoder::new(input);
24-
let mut archive = Archive::new(decoder);
25-
let mut extracted_bytes = 0;
17+
pub fn extract(bytes: &Vec<u8>, extract_directories: ExtractDirectories) -> Result<Vec<PathBuf>> {
18+
let out_dir = extract_directories.get_path(".")?;
2619

2720
let parent_dir = if let Some(parent) = out_dir.parent() {
2821
parent
2922
} else {
3023
debug!("No parent directory for {}", out_dir.to_string_lossy());
31-
out_dir
24+
out_dir.as_path()
3225
};
3326

3427
create_dir_all(parent_dir)?;
@@ -42,55 +35,14 @@ pub fn extract(bytes: &Vec<u8>, out_dir: &Path) -> Result<Vec<PathBuf>> {
4235
out_dir.to_string_lossy()
4336
);
4437
remove_file(&lock_file)?;
45-
return Ok(files);
38+
return Ok(Vec::new());
4639
}
4740

4841
let extract_dir = tempfile::tempdir_in(parent_dir)?.into_path();
4942
debug!("Extracting archive to {}", extract_dir.to_string_lossy());
50-
51-
for archive_entry in archive.entries()? {
52-
let mut entry = archive_entry?;
53-
let entry_header = entry.header();
54-
let entry_type = entry_header.entry_type();
55-
let entry_size = entry_header.size()?;
56-
#[cfg(unix)]
57-
let file_mode = entry_header.mode()?;
58-
59-
let entry_header_path = entry_header.path()?.to_path_buf();
60-
let prefix = match entry_header_path.components().next() {
61-
Some(component) => component.as_os_str().to_str().unwrap_or_default(),
62-
None => {
63-
return Err(Unexpected(
64-
"Failed to get file header path prefix".to_string(),
65-
));
66-
}
67-
};
68-
let stripped_entry_header_path = entry_header_path.strip_prefix(prefix)?.to_path_buf();
69-
let mut entry_name = extract_dir.clone();
70-
entry_name.push(stripped_entry_header_path);
71-
72-
if entry_type.is_dir() || entry_name.is_dir() {
73-
create_dir_all(&entry_name)?;
74-
} else if entry_type.is_file() {
75-
let mut output_file = File::create(&entry_name)?;
76-
copy(&mut entry, &mut output_file)?;
77-
extracted_bytes += entry_size;
78-
79-
#[cfg(unix)]
80-
{
81-
use std::os::unix::fs::PermissionsExt;
82-
output_file.set_permissions(std::fs::Permissions::from_mode(file_mode))?;
83-
}
84-
files.push(entry_name);
85-
} else if entry_type.is_symlink() {
86-
#[cfg(unix)]
87-
if let Some(symlink_target) = entry.link_name()? {
88-
let symlink_path = entry_name.clone();
89-
std::os::unix::fs::symlink(symlink_target.as_ref(), symlink_path)?;
90-
files.push(entry_name);
91-
}
92-
}
93-
}
43+
let mut archive_extract_directories = ExtractDirectories::default();
44+
archive_extract_directories.add_mapping(Regex::new(".*")?, extract_dir.clone());
45+
let files = tar_gz_extract(bytes, archive_extract_directories)?;
9446

9547
if out_dir.exists() {
9648
debug!(
@@ -113,13 +65,6 @@ pub fn extract(bytes: &Vec<u8>, out_dir: &Path) -> Result<Vec<PathBuf>> {
11365
remove_file(lock_file)?;
11466
}
11567

116-
let number_of_files = files.len();
117-
debug!(
118-
"Extracting {} files totalling {}",
119-
number_of_files.to_formatted_string(&Locale::en),
120-
human_bytes(extracted_bytes as f64)
121-
);
122-
12368
Ok(files)
12469
}
12570

postgresql_archive/src/configuration/zonky/extractor.rs

Lines changed: 11 additions & 62 deletions
Original file line numberDiff line numberDiff line change
@@ -1,15 +1,13 @@
1+
use crate::extractor::{tar_xz_extract, ExtractDirectories};
12
use crate::Error::Unexpected;
23
use crate::Result;
3-
use human_bytes::human_bytes;
4-
use num_format::{Locale, ToFormattedString};
5-
use std::fs::{create_dir_all, remove_dir_all, remove_file, rename, File};
6-
use std::io::{copy, BufReader, Cursor};
4+
use regex::Regex;
5+
use std::fs::{create_dir_all, remove_dir_all, remove_file, rename};
6+
use std::io::Cursor;
77
use std::path::{Path, PathBuf};
88
use std::thread::sleep;
99
use std::time::Duration;
10-
use tar::Archive;
1110
use tracing::{debug, instrument, warn};
12-
use xz2::bufread::XzDecoder;
1311
use zip::ZipArchive;
1412

1513
/// Extracts the compressed tar `bytes` to the [out_dir](Path).
@@ -19,13 +17,13 @@ use zip::ZipArchive;
1917
#[allow(clippy::case_sensitive_file_extension_comparisons)]
2018
#[allow(clippy::cast_precision_loss)]
2119
#[instrument(skip(bytes))]
22-
pub fn extract(bytes: &Vec<u8>, out_dir: &Path) -> Result<Vec<PathBuf>> {
23-
let mut files = Vec::new();
20+
pub fn extract(bytes: &Vec<u8>, extract_directories: ExtractDirectories) -> Result<Vec<PathBuf>> {
21+
let out_dir = extract_directories.get_path(".")?;
2422
let parent_dir = if let Some(parent) = out_dir.parent() {
2523
parent
2624
} else {
2725
debug!("No parent directory for {}", out_dir.to_string_lossy());
28-
out_dir
26+
out_dir.as_path()
2927
};
3028

3129
create_dir_all(parent_dir)?;
@@ -39,7 +37,7 @@ pub fn extract(bytes: &Vec<u8>, out_dir: &Path) -> Result<Vec<PathBuf>> {
3937
out_dir.to_string_lossy()
4038
);
4139
remove_file(&lock_file)?;
42-
return Ok(files);
40+
return Ok(Vec::new());
4341
}
4442

4543
let extract_dir = tempfile::tempdir_in(parent_dir)?.into_path();
@@ -64,51 +62,9 @@ pub fn extract(bytes: &Vec<u8>, out_dir: &Path) -> Result<Vec<PathBuf>> {
6462
return Err(Unexpected("Failed to find archive file".to_string()));
6563
}
6664

67-
let input = BufReader::new(Cursor::new(archive_bytes));
68-
let decoder = XzDecoder::new(input);
69-
let mut archive = Archive::new(decoder);
70-
let mut extracted_bytes = 0;
71-
72-
for archive_entry in archive.entries()? {
73-
let mut entry = archive_entry?;
74-
let entry_header = entry.header();
75-
let entry_type = entry_header.entry_type();
76-
let entry_size = entry_header.size()?;
77-
#[cfg(unix)]
78-
let file_mode = entry_header.mode()?;
79-
80-
let entry_header_path = entry_header.path()?.to_path_buf();
81-
let mut entry_name = extract_dir.clone();
82-
entry_name.push(entry_header_path);
83-
84-
if let Some(parent) = entry_name.parent() {
85-
if !parent.exists() {
86-
create_dir_all(parent)?;
87-
}
88-
}
89-
90-
if entry_type.is_dir() || entry_name.is_dir() {
91-
create_dir_all(&entry_name)?;
92-
} else if entry_type.is_file() {
93-
let mut output_file = File::create(&entry_name)?;
94-
copy(&mut entry, &mut output_file)?;
95-
extracted_bytes += entry_size;
96-
97-
#[cfg(unix)]
98-
{
99-
use std::os::unix::fs::PermissionsExt;
100-
output_file.set_permissions(std::fs::Permissions::from_mode(file_mode))?;
101-
}
102-
files.push(entry_name);
103-
} else if entry_type.is_symlink() {
104-
#[cfg(unix)]
105-
if let Some(symlink_target) = entry.link_name()? {
106-
let symlink_path = entry_name.clone();
107-
std::os::unix::fs::symlink(symlink_target.as_ref(), symlink_path)?;
108-
files.push(entry_name);
109-
}
110-
}
111-
}
65+
let mut archive_extract_directories = ExtractDirectories::default();
66+
archive_extract_directories.add_mapping(Regex::new(".*")?, extract_dir.clone());
67+
let files = tar_xz_extract(&archive_bytes, archive_extract_directories)?;
11268

11369
if out_dir.exists() {
11470
debug!(
@@ -131,13 +87,6 @@ pub fn extract(bytes: &Vec<u8>, out_dir: &Path) -> Result<Vec<PathBuf>> {
13187
remove_file(lock_file)?;
13288
}
13389

134-
let number_of_files = files.len();
135-
debug!(
136-
"Extracting {} files totalling {}",
137-
number_of_files.to_formatted_string(&Locale::en),
138-
human_bytes(extracted_bytes as f64)
139-
);
140-
14190
Ok(files)
14291
}
14392

Lines changed: 9 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1 +1,10 @@
1+
mod model;
12
pub mod registry;
3+
mod tar_gz_extractor;
4+
mod tar_xz_extractor;
5+
mod zip_extractor;
6+
7+
pub use model::ExtractDirectories;
8+
pub use tar_gz_extractor::extract as tar_gz_extract;
9+
pub use tar_xz_extractor::extract as tar_xz_extract;
10+
pub use zip_extractor::extract as zip_extract;

0 commit comments

Comments
 (0)