Skip to content

Commit 855b164

Browse files
committed
set & handle etags/if-none-match for first rustdoc assets
1 parent 4b62ba6 commit 855b164

File tree

10 files changed

+363
-104
lines changed

10 files changed

+363
-104
lines changed

clippy.toml

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -14,3 +14,8 @@ reason = """
1414
[[disallowed-types]]
1515
path = "semver::Version"
1616
reason = "use our own custom db::types::version::Version so you can use it with sqlx"
17+
18+
[[disallowed-types]]
19+
path = "axum_extra::headers::IfNoneMatch"
20+
reason = "use our own custom web::headers::IfNoneMatch for sane behaviour with missing headers"
21+

src/storage/database.rs

Lines changed: 4 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,5 @@
11
use super::{BlobUpload, FileRange, StorageMetrics, StreamingBlob};
2-
use crate::{InstanceMetrics, db::Pool, error::Result};
2+
use crate::{InstanceMetrics, db::Pool, error::Result, web::headers::compute_etag};
33
use chrono::{DateTime, Utc};
44
use futures_util::stream::{Stream, TryStreamExt};
55
use sqlx::Acquire;
@@ -123,13 +123,16 @@ impl DatabaseBackend {
123123
});
124124
let content = result.content.unwrap_or_default();
125125
let content_len = content.len();
126+
127+
let etag = compute_etag(&content);
126128
Ok(StreamingBlob {
127129
path: result.path,
128130
mime: result
129131
.mime
130132
.parse()
131133
.unwrap_or(mime::APPLICATION_OCTET_STREAM),
132134
date_updated: result.date_updated,
135+
etag: Some(etag),
133136
content: Box::new(io::Cursor::new(content)),
134137
content_length: content_len,
135138
compression,

src/storage/mod.rs

Lines changed: 48 additions & 18 deletions
Original file line numberDiff line numberDiff line change
@@ -22,6 +22,7 @@ use crate::{
2222
utils::spawn_blocking,
2323
};
2424
use anyhow::anyhow;
25+
use axum_extra::headers;
2526
use chrono::{DateTime, Utc};
2627
use dashmap::DashMap;
2728
use fn_error_context::context;
@@ -57,7 +58,7 @@ type FileRange = RangeInclusive<u64>;
5758
pub(crate) struct PathNotFoundError;
5859

5960
/// represents a blob to be uploaded to storage.
60-
#[derive(Clone, Debug, PartialEq, Eq, Hash)]
61+
#[derive(Clone, Debug, PartialEq, Eq)]
6162
pub(crate) struct BlobUpload {
6263
pub(crate) path: String,
6364
pub(crate) mime: Mime,
@@ -76,11 +77,12 @@ impl From<Blob> for BlobUpload {
7677
}
7778
}
7879

79-
#[derive(Clone, Debug, PartialEq, Eq, Hash)]
80+
#[derive(Clone, Debug, PartialEq, Eq)]
8081
pub(crate) struct Blob {
8182
pub(crate) path: String,
8283
pub(crate) mime: Mime,
8384
pub(crate) date_updated: DateTime<Utc>,
85+
pub(crate) etag: Option<headers::ETag>,
8486
pub(crate) content: Vec<u8>,
8587
pub(crate) compression: Option<CompressionAlgorithm>,
8688
}
@@ -89,6 +91,7 @@ pub(crate) struct StreamingBlob {
8991
pub(crate) path: String,
9092
pub(crate) mime: Mime,
9193
pub(crate) date_updated: DateTime<Utc>,
94+
pub(crate) etag: Option<headers::ETag>,
9295
pub(crate) compression: Option<CompressionAlgorithm>,
9396
pub(crate) content_length: usize,
9497
pub(crate) content: Box<dyn AsyncBufRead + Unpin + Send>,
@@ -100,6 +103,7 @@ impl std::fmt::Debug for StreamingBlob {
100103
.field("path", &self.path)
101104
.field("mime", &self.mime)
102105
.field("date_updated", &self.date_updated)
106+
.field("etag", &self.etag)
103107
.field("compression", &self.compression)
104108
.finish()
105109
}
@@ -134,6 +138,7 @@ impl StreamingBlob {
134138
);
135139

136140
self.compression = None;
141+
// not touching the etag, it should represent the original content
137142
Ok(self)
138143
}
139144

@@ -148,12 +153,27 @@ impl StreamingBlob {
148153
path: self.path,
149154
mime: self.mime,
150155
date_updated: self.date_updated,
156+
etag: self.etag, // downloading doesn't change the etag
151157
content: content.into_inner(),
152158
compression: self.compression,
153159
})
154160
}
155161
}
156162

163+
impl From<Blob> for StreamingBlob {
164+
fn from(value: Blob) -> Self {
165+
Self {
166+
path: value.path,
167+
mime: value.mime,
168+
date_updated: value.date_updated,
169+
etag: value.etag,
170+
compression: value.compression,
171+
content_length: value.content.len(),
172+
content: Box::new(io::Cursor::new(value.content)),
173+
}
174+
}
175+
}
176+
157177
pub fn get_file_list<P: AsRef<Path>>(path: P) -> Box<dyn Iterator<Item = Result<PathBuf>>> {
158178
let path = path.as_ref().to_path_buf();
159179
if path.is_file() {
@@ -583,6 +603,7 @@ impl AsyncStorage {
583603
path: format!("{archive_path}/{path}"),
584604
mime: detect_mime(path),
585605
date_updated: stream.date_updated,
606+
etag: stream.etag,
586607
content: stream.content,
587608
content_length: stream.content_length,
588609
compression: None,
@@ -756,7 +777,6 @@ impl AsyncStorage {
756777
mime,
757778
content,
758779
compression: Some(alg),
759-
// this field is ignored by the backend
760780
});
761781
}
762782
Ok((blobs, file_paths))
@@ -1135,7 +1155,7 @@ pub(crate) fn source_archive_path(name: &str, version: &Version) -> String {
11351155
#[cfg(test)]
11361156
mod test {
11371157
use super::*;
1138-
use crate::test::TestEnvironment;
1158+
use crate::{test::TestEnvironment, web::headers::compute_etag};
11391159
use std::env;
11401160
use test_case::test_case;
11411161

@@ -1151,6 +1171,7 @@ mod test {
11511171
mime: mime::APPLICATION_OCTET_STREAM,
11521172
date_updated: Utc::now(),
11531173
compression: alg,
1174+
etag: Some(compute_etag(&content)),
11541175
content_length: content.len(),
11551176
content: Box::new(io::Cursor::new(content)),
11561177
}
@@ -1292,6 +1313,7 @@ mod test {
12921313
path: "some_path.db".into(),
12931314
mime: mime::APPLICATION_OCTET_STREAM,
12941315
date_updated: Utc::now(),
1316+
etag: None,
12951317
compression: Some(alg),
12961318
content_length: compressed_index_content.len(),
12971319
content: Box::new(io::Cursor::new(compressed_index_content)),
@@ -1494,9 +1516,8 @@ mod test {
14941516
/// This is the preferred way to test whether backends work.
14951517
#[cfg(test)]
14961518
mod backend_tests {
1497-
use crate::test::TestEnvironment;
1498-
14991519
use super::*;
1520+
use crate::{test::TestEnvironment, web::headers::compute_etag};
15001521

15011522
fn get_file_info(files: &[FileEntry], path: impl AsRef<Path>) -> Option<&FileEntry> {
15021523
let path = path.as_ref();
@@ -1560,6 +1581,9 @@ mod backend_tests {
15601581
let found = storage.get(path, usize::MAX)?;
15611582
assert_eq!(blob.mime, found.mime);
15621583
assert_eq!(blob.content, found.content);
1584+
// while our db backend just does MD5,
1585+
// it seems like minio does it too :)
1586+
assert_eq!(found.etag, Some(compute_etag(&blob.content)));
15631587

15641588
// default visibility is private
15651589
assert!(!storage.get_public_access(path)?);
@@ -1593,20 +1617,26 @@ mod backend_tests {
15931617
content: b"test content\n".to_vec(),
15941618
};
15951619

1620+
let full_etag = compute_etag(&blob.content);
1621+
15961622
storage.store_blobs(vec![blob.clone()])?;
15971623

1598-
assert_eq!(
1599-
blob.content[0..=4],
1600-
storage
1601-
.get_range("foo/bar.txt", usize::MAX, 0..=4, None)?
1602-
.content
1603-
);
1604-
assert_eq!(
1605-
blob.content[5..=12],
1606-
storage
1607-
.get_range("foo/bar.txt", usize::MAX, 5..=12, None)?
1608-
.content
1609-
);
1624+
let mut etags = Vec::new();
1625+
1626+
for range in [0..=4, 5..=12] {
1627+
let partial_blob = storage.get_range("foo/bar.txt", usize::MAX, range.clone(), None)?;
1628+
let range = (*range.start() as usize)..=(*range.end() as usize);
1629+
assert_eq!(blob.content[range], partial_blob.content);
1630+
1631+
etags.push(partial_blob.etag.unwrap());
1632+
}
1633+
if let [etag1, etag2] = &etags[..] {
1634+
assert_ne!(etag1, etag2);
1635+
assert_ne!(etag1, &full_etag);
1636+
assert_ne!(etag2, &full_etag);
1637+
} else {
1638+
panic!("expected two etags");
1639+
}
16101640

16111641
for path in &["bar.txt", "baz.txt", "foo/baz.txt"] {
16121642
assert!(

src/storage/s3.rs

Lines changed: 45 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,5 @@
11
use super::{BlobUpload, FileRange, StorageMetrics, StreamingBlob};
2-
use crate::{Config, InstanceMetrics};
2+
use crate::{Config, InstanceMetrics, web::headers::compute_etag};
33
use anyhow::{Context as _, Error};
44
use async_stream::try_stream;
55
use aws_config::BehaviorVersion;
@@ -10,14 +10,15 @@ use aws_sdk_s3::{
1010
types::{Delete, ObjectIdentifier, Tag, Tagging},
1111
};
1212
use aws_smithy_types_convert::date_time::DateTimeExt;
13+
use axum_extra::headers;
1314
use chrono::Utc;
1415
use futures_util::{
1516
future::TryFutureExt,
1617
pin_mut,
1718
stream::{FuturesUnordered, Stream, StreamExt},
1819
};
1920
use std::sync::Arc;
20-
use tracing::{error, warn};
21+
use tracing::{error, instrument, warn};
2122

2223
const PUBLIC_ACCESS_TAG: &str = "static-cloudfront-access";
2324
const PUBLIC_ACCESS_VALUE: &str = "allow";
@@ -180,6 +181,7 @@ impl S3Backend {
180181
.map(|_| ())
181182
}
182183

184+
#[instrument(skip(self))]
183185
pub(super) async fn get_stream(
184186
&self,
185187
path: &str,
@@ -190,7 +192,11 @@ impl S3Backend {
190192
.get_object()
191193
.bucket(&self.bucket)
192194
.key(path)
193-
.set_range(range.map(|r| format!("bytes={}-{}", r.start(), r.end())))
195+
.set_range(
196+
range
197+
.as_ref()
198+
.map(|r| format!("bytes={}-{}", r.start(), r.end())),
199+
)
194200
.send()
195201
.await
196202
.convert_errors()?;
@@ -204,6 +210,41 @@ impl S3Backend {
204210

205211
let compression = res.content_encoding.as_ref().and_then(|s| s.parse().ok());
206212

213+
let etag = if let Some(s3_etag) = res.e_tag
214+
&& !s3_etag.is_empty()
215+
{
216+
if let Some(range) = range {
217+
// we can generate a unique etag for a range of the remote object too,
218+
// by just concatenating the original etag with the range start and end.
219+
//
220+
// About edge cases:
221+
// When the etag of the full archive changes after a rebuild,
222+
// all derived etags for files inside the archive will also change.
223+
//
224+
// This could lead to _changed_ ETags, where the single file inside the archive
225+
// is actually the same.
226+
//
227+
// AWS implementation (an minio) is to just use an MD5 hash of the file as
228+
// ETag
229+
Some(compute_etag(format!(
230+
"{}-{}-{}",
231+
s3_etag,
232+
range.start(),
233+
range.end()
234+
)))
235+
} else {
236+
match s3_etag.parse::<headers::ETag>() {
237+
Ok(etag) => Some(etag),
238+
Err(err) => {
239+
error!(?err, s3_etag, "Failed to parse ETag from S3");
240+
None
241+
}
242+
}
243+
}
244+
} else {
245+
None
246+
};
247+
207248
Ok(StreamingBlob {
208249
path: path.into(),
209250
mime: res
@@ -213,6 +254,7 @@ impl S3Backend {
213254
.parse()
214255
.unwrap_or(mime::APPLICATION_OCTET_STREAM),
215256
date_updated,
257+
etag,
216258
content_length: res
217259
.content_length
218260
.and_then(|length| length.try_into().ok())

0 commit comments

Comments
 (0)