Skip to content

Commit 7d2507e

Browse files
mdbook-slide-evaluator: ignore redirect pages to speed up the evaluation when there are many redirects
1 parent 1d7c916 commit 7d2507e

File tree

2 files changed

+36
-3
lines changed

2 files changed

+36
-3
lines changed

mdbook-slide-evaluator/src/main.rs

Lines changed: 5 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -61,6 +61,9 @@ struct Args {
6161
violations_only: bool,
6262
/// directory of the book that is evaluated
6363
source_dir: PathBuf,
64+
/// ignore HTML pages that redirect to canonical pages
65+
#[arg(long, default_value_t = false)]
66+
ignore_redirects: bool,
6467
}
6568

6669
#[tokio::main]
@@ -71,7 +74,8 @@ async fn main() -> anyhow::Result<()> {
7174
let args = Args::parse();
7275

7376
// gather information about the book from the filesystem
74-
let book = Book::from_html_slides(args.source_dir.clone())?;
77+
let book =
78+
Book::from_html_slides(args.source_dir.clone(), args.ignore_redirects)?;
7579

7680
// create a new webclient that is used by the evaluator
7781
let webclient: fantoccini::Client =

mdbook-slide-evaluator/src/slides.rs

Lines changed: 31 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -12,9 +12,12 @@
1212
// See the License for the specific language governing permissions and
1313
// limitations under the License.
1414

15+
use std::fs::File;
16+
use std::io::Read;
1517
use std::path::{Path, PathBuf};
1618
use std::sync::Arc;
1719

20+
use anyhow::Ok;
1821
use log::debug;
1922

2023
/// a slide is a page in the book
@@ -33,17 +36,26 @@ pub struct Book {
3336

3437
impl Book {
3538
/// create a book from all html files in the source_dir
36-
pub fn from_html_slides(source_dir: PathBuf) -> anyhow::Result<Book> {
39+
pub fn from_html_slides(
40+
source_dir: PathBuf,
41+
ignore_redirects: bool,
42+
) -> anyhow::Result<Book> {
3743
let mut slides = vec![];
3844
let files = glob::glob(&format!(
3945
"{}/**/*.html",
4046
source_dir.to_str().expect("invalid path")
4147
))?;
4248
for file in files {
43-
let slide = Slide { filename: file?.into() };
49+
let file = file?;
50+
if ignore_redirects && file_is_redirect(&file)? {
51+
debug!("slide {file:?} is a redirect page");
52+
continue;
53+
}
54+
let slide = Slide { filename: file.into() };
4455
debug!("add {:?}", slide);
4556
slides.push(slide);
4657
}
58+
debug!("processing {} slides", slides.len());
4759
Ok(Book { _source_dir: source_dir, slides })
4860
}
4961

@@ -52,3 +64,20 @@ impl Book {
5264
&self.slides
5365
}
5466
}
67+
68+
const HTML_REDIRECT_PAGE: &str = r#"<!DOCTYPE html>
69+
<html lang="en">
70+
<head>
71+
<meta charset="utf-8">
72+
<title>Redirecting...</title>"#;
73+
74+
/// check if the file is starting with the mdbook redirect page.
75+
/// This method is optimized to not read the entire file but only the start
76+
fn file_is_redirect(filename: &PathBuf) -> anyhow::Result<bool> {
77+
let mut file = File::open(filename)?;
78+
// create a buffer with the exact length of the text that is checked
79+
let mut file_start_buffer = [0u8; HTML_REDIRECT_PAGE.len()];
80+
// read only the part that is relevant
81+
file.read_exact(&mut file_start_buffer)?;
82+
Ok(file_start_buffer.eq(HTML_REDIRECT_PAGE.as_bytes()))
83+
}

0 commit comments

Comments
 (0)