Skip to content

Commit fa185ea

Browse files
committed
Merge remote-tracking branch 'origin/main' into bikeshed-experiment
2 parents 1f6b200 + 0af3f35 commit fa185ea

15 files changed

+797
-84
lines changed

Dockerfile

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,4 @@
1-
FROM rust:1.88-slim as builder
1+
FROM rust:1.90-slim AS builder
22
WORKDIR /whatwg/html-build
33
COPY Cargo.lock Cargo.toml ./
44
COPY src ./src/
@@ -13,11 +13,11 @@ COPY --from=builder /usr/local/cargo/bin/html-build /bin/html-build
1313

1414
COPY --from=ghcr.io/whatwg/wattsi:latest /whatwg/wattsi/bin/wattsi /bin/wattsi
1515

16-
ENV PIPX_HOME /opt/pipx
17-
ENV PIPX_BIN_DIR /usr/bin
16+
ENV PIPX_HOME=/opt/pipx
17+
ENV PIPX_BIN_DIR=/usr/bin
1818
RUN pipx install bs-highlighter
1919

2020
COPY . /whatwg/html-build/
2121

22-
ENV SKIP_BUILD_UPDATE_CHECK true
22+
ENV SKIP_BUILD_UPDATE_CHECK=true
2323
ENTRYPOINT ["bash", "/whatwg/html-build/build.sh"]

build.sh

Lines changed: 22 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -400,7 +400,7 @@ function confirmRepo {
400400
echo
401401

402402
local build_yn
403-
read -r -e -p "Y or N? " yn
403+
read -r -e -p "Y or N? " build_yn
404404
if [[ $build_yn == "y" || $build_yn == "Y" ]]; then
405405
return
406406
else
@@ -538,8 +538,8 @@ function doServerBuild {
538538
# server getting confused about their absence.) demos/ needs to be sent in full for inlining.
539539
local zip_args=(
540540
--recurse-paths "$HTML_TEMP/$input_zip" . \
541-
--include ./source ./404.html ./link-fixup.js ./html-dfn.js ./styles.css \
542-
./fonts/ ./images/ ./dev/ ./demos/\*
541+
--include ./source ./404.html "./*.js" ./styles.css \
542+
./fonts/ ./images/ ./dev/ "./demos/*"
543543
)
544544
$QUIET && zip_args+=( --quiet )
545545
(cd "$HTML_SOURCE" && zip "${zip_args[@]}")
@@ -662,14 +662,7 @@ function processSource {
662662
$QUIET || echo "Pre-processing the source..."
663663
cp -p entities/out/entities.inc "$HTML_CACHE"
664664
cp -p entities/out/entities-dtd.url "$HTML_CACHE"
665-
if hash html-build 2>/dev/null; then
666-
html-build <"$HTML_SOURCE/$source_location" >"$HTML_TEMP/source-whatwg-complete"
667-
else
668-
local cargo_args=( --release )
669-
$VERBOSE && cargo_args+=( --verbose )
670-
$QUIET && cargo_args+=( --quiet )
671-
cargo run "${cargo_args[@]}" <"$HTML_SOURCE/$source_location" >"$HTML_TEMP/source-whatwg-complete"
672-
fi
665+
runRustTools <"$HTML_SOURCE/$source_location" >"$HTML_TEMP/source-whatwg-complete"
673666

674667
if [[ $USE_BIKESHED == "true" ]]; then
675668
clearDir "$HTML_TEMP/bikeshed-output"
@@ -711,7 +704,7 @@ function processSource {
711704
if [[ $USE_BIKESHED == "true" ]]; then
712705
mv "$HTML_TEMP/bikeshed-output/index.html" "$HTML_OUTPUT/index.html"
713706
else
714-
mv "$HTML_TEMP/wattsi-output/index-html" "$HTML_OUTPUT/index.html"
707+
runRustTools --singlepage-post <"$HTML_TEMP/wattsi-output/index-html" >"$HTML_OUTPUT/index.html"
715708
fi
716709

717710
if [[ $SINGLE_PAGE_ONLY == "false" ]]; then
@@ -738,8 +731,7 @@ function processSource {
738731
Disallow: /commit-snapshots/
739732
Disallow: /review-drafts/" > "$HTML_OUTPUT/robots.txt"
740733
cp -p "$HTML_SOURCE/404.html" "$HTML_OUTPUT"
741-
cp -p "$HTML_SOURCE/link-fixup.js" "$HTML_OUTPUT"
742-
cp -p "$HTML_SOURCE/html-dfn.js" "$HTML_OUTPUT"
734+
cp -p "$HTML_SOURCE/"*.js "$HTML_OUTPUT"
743735
cp -p "$HTML_SOURCE/styles.css" "$HTML_OUTPUT"
744736
cp -pR "$HTML_SOURCE/fonts" "$HTML_OUTPUT"
745737
cp -pR "$HTML_SOURCE/images" "$HTML_OUTPUT"
@@ -774,6 +766,22 @@ function checkWattsi {
774766
fi
775767
}
776768

769+
# Runs the Rust-based build tools, either with the version in $PATH or by using cargo to compile
770+
# them beforehand.
771+
# - Arguments: all arguments to pass to the tools
772+
# - Output: whatever the tools output
773+
function runRustTools {
774+
if hash html-build 2>/dev/null; then
775+
html-build "$@"
776+
else
777+
local cargo_args=( --release )
778+
$VERBOSE && cargo_args+=( --verbose )
779+
$QUIET && cargo_args+=( --quiet )
780+
cargo_args+=( -- )
781+
cargo run "${cargo_args[@]}" "$@"
782+
fi
783+
}
784+
777785
# Runs Wattsi on the given file, either locally or using the web service
778786
# - Arguments:
779787
# - $1: the file to run Wattsi on

ci-build/Dockerfile

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,6 @@
11
# This Dockerfile is just used to run on Travis CI in an environment that can easily and repeatedly
22
# install our build dependencies.
3-
FROM rust:1.88-slim as builder
3+
FROM rust:1.90-slim AS builder
44
WORKDIR /whatwg/html-build
55
COPY Cargo.lock Cargo.toml ./
66
COPY src ./src/
@@ -27,8 +27,8 @@ COPY --from=builder /usr/local/cargo/bin/html-build /bin/html-build
2727

2828
COPY --from=ghcr.io/whatwg/wattsi:latest /whatwg/wattsi/bin/wattsi /bin/wattsi
2929

30-
ENV PIPX_HOME /opt/pipx
31-
ENV PIPX_BIN_DIR /usr/bin
30+
ENV PIPX_HOME=/opt/pipx
31+
ENV PIPX_BIN_DIR=/usr/bin
3232
RUN pipx install bs-highlighter
3333

3434
# The DockerHub container for the validator only contains the server version, so we get the .jar

src/anchor_permanence.rs

Lines changed: 169 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,169 @@
1+
//! Postprocess step for ensuring anchor permanence: see
2+
//! https://whatwg.org/working-mode#anchors.
3+
//!
4+
//! Scans for the `<script type="text/required-ids">` element, which lists
5+
//! (whitespace-separated) IDs that must appear somewhere in the document.
6+
//! After verifying that all listed IDs are present, removes the script element.
7+
8+
use crate::dom_utils::NodeHandleExt;
9+
use html5ever::{QualName, local_name, ns};
10+
use markup5ever_rcdom::Handle;
11+
use std::collections::HashSet;
12+
13+
pub struct Processor {
14+
required_ids: HashSet<String>,
15+
script_node: Option<Handle>,
16+
}
17+
18+
impl Processor {
19+
pub fn new() -> Self {
20+
Self {
21+
required_ids: HashSet::new(),
22+
script_node: None,
23+
}
24+
}
25+
26+
pub fn visit(&mut self, node: &Handle) {
27+
// Capture and parse the <script type="text/required-ids"> element exactly once.
28+
if node.is_html_element(&local_name!("script")) {
29+
const TYPE: QualName = QualName {
30+
prefix: None,
31+
ns: ns!(),
32+
local: local_name!("type"),
33+
};
34+
if node.get_attribute(&TYPE).as_deref() == Some("text/required-ids") {
35+
assert!(
36+
self.script_node.is_none(),
37+
"multiple required-ids scripts encountered"
38+
);
39+
self.script_node = Some(node.clone());
40+
// Gather all text within the script and split on any ASCII whitespace.
41+
let content = node.text_content();
42+
for id_token in content.split_ascii_whitespace() {
43+
if !id_token.is_empty() {
44+
self.required_ids.insert(id_token.to_string());
45+
}
46+
}
47+
}
48+
}
49+
50+
// For elements with an id attribute, mark the ID as seen.
51+
if self.required_ids.is_empty() {
52+
return;
53+
}
54+
const ID_QN: QualName = QualName {
55+
prefix: None,
56+
ns: ns!(),
57+
local: local_name!("id"),
58+
};
59+
if let Some(id) = node.get_attribute(&ID_QN) {
60+
self.required_ids.remove(id.as_ref());
61+
}
62+
}
63+
64+
pub fn apply(self) -> std::io::Result<()> {
65+
if !self.required_ids.is_empty() {
66+
let mut missing: Vec<_> = self.required_ids.into_iter().collect();
67+
missing.sort();
68+
return Err(std::io::Error::new(
69+
std::io::ErrorKind::InvalidData,
70+
format!(
71+
"Missing required IDs for anchor permanence: {}",
72+
missing.join(", ")
73+
),
74+
));
75+
}
76+
77+
// Remove the script element (if present) after verification.
78+
if let Some(script) = self.script_node {
79+
script.remove();
80+
}
81+
Ok(())
82+
}
83+
}
84+
85+
#[cfg(test)]
86+
mod tests {
87+
use super::*;
88+
use crate::dom_utils;
89+
use crate::parser::{parse_document_async, tests::serialize_for_test};
90+
use std::io;
91+
92+
#[tokio::test]
93+
async fn removes_script_from_head() -> io::Result<()> {
94+
let parsed = parse_document_async(r#"<!DOCTYPE html>
95+
<html><head><script type="text/required-ids">a b c</script></head><body><div id="a"></div><p id="b"></p><section id="c"></section></body></html>
96+
"#.as_bytes()).await?;
97+
let document = parsed.document().clone();
98+
let mut processor = Processor::new();
99+
dom_utils::scan_dom(&document, &mut |h| processor.visit(h));
100+
processor.apply().unwrap();
101+
let serialized = serialize_for_test(&[document]);
102+
assert!(!serialized.contains("text/required-ids"));
103+
Ok(())
104+
}
105+
106+
#[tokio::test]
107+
async fn no_script_present_noop() -> io::Result<()> {
108+
let parsed = parse_document_async(
109+
r#"<!DOCTYPE html>
110+
<html><head></head><body></body></html>
111+
"#
112+
.as_bytes(),
113+
)
114+
.await?;
115+
let document = parsed.document().clone();
116+
let before = serialize_for_test(&[document.clone()]);
117+
let mut processor = Processor::new();
118+
dom_utils::scan_dom(&document, &mut |h| processor.visit(h));
119+
processor.apply().unwrap();
120+
assert_eq!(before, serialize_for_test(&[document]));
121+
Ok(())
122+
}
123+
124+
#[tokio::test]
125+
async fn whitespace_splitting() -> io::Result<()> {
126+
// Includes indentation, multiple spaces, and newlines in the script content.
127+
let parsed = parse_document_async(r#"<!DOCTYPE html><html><head><script type="text/required-ids">
128+
foo bar
129+
baz
130+
qux
131+
</script></head><body><div id="foo"></div><div id="bar"></div><div id="baz"></div><div id="qux"></div></body></html>
132+
"#.as_bytes()).await?;
133+
let document = parsed.document().clone();
134+
let mut processor = Processor::new();
135+
dom_utils::scan_dom(&document, &mut |h| processor.visit(h));
136+
processor.apply().unwrap();
137+
let serialized = serialize_for_test(&[document]);
138+
assert!(!serialized.contains("text/required-ids"));
139+
Ok(())
140+
}
141+
142+
#[tokio::test]
143+
async fn errors_on_missing_ids() -> io::Result<()> {
144+
let parsed = parse_document_async(r#"<!DOCTYPE html>
145+
<html><head><script type="text/required-ids">foo bar baz</script></head><body><div id="foo"></div></body></html>
146+
"#.as_bytes()).await?;
147+
let document = parsed.document().clone();
148+
let mut processor = Processor::new();
149+
dom_utils::scan_dom(&document, &mut |h| processor.visit(h));
150+
let err = processor.apply().expect_err("expected missing IDs error");
151+
assert!(
152+
err.to_string()
153+
.contains("Missing required IDs for anchor permanence: bar, baz")
154+
);
155+
Ok(())
156+
}
157+
158+
#[tokio::test]
159+
#[should_panic(expected = "multiple required-ids scripts encountered")]
160+
async fn panics_on_multiple_required_ids_scripts() {
161+
let parsed = parse_document_async(r#"<!DOCTYPE html><html><head>
162+
<script type="text/required-ids">a b</script>
163+
<script type="text/required-ids">c d</script>
164+
</head><body><div id="a"></div><div id="b"></div><div id="c"></div><div id="d"></div></body></html>"#.as_bytes()).await.unwrap();
165+
let document = parsed.document().clone();
166+
let mut processor = Processor::new();
167+
dom_utils::scan_dom(&document, &mut |h| processor.visit(h));
168+
}
169+
}

src/annotate_attributes.rs

Lines changed: 10 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -311,7 +311,7 @@ mod tests {
311311
// before and after the attributes table, to demonstrate that this is
312312
// not sensitive to which order they occur in (i.e., these could be
313313
// reordered in the HTML spec).
314-
let document = parse_document_async(
314+
let parsed = parse_document_async(
315315
r#"
316316
<!DOCTYPE html>
317317
<h3>The a element</h3>
@@ -333,6 +333,7 @@ mod tests {
333333
<dd><code data-x="attr-area-href">href</code>
334334
</dl>
335335
"#.trim().as_bytes()).await?;
336+
let document = parsed.document().clone();
336337
let mut proc = Processor::new();
337338
dom_utils::scan_dom(&document, &mut |h| proc.visit(h));
338339
proc.apply().await?;
@@ -368,7 +369,7 @@ mod tests {
368369
async fn test_variant() -> io::Result<()> {
369370
// This checks that <!-- variant --> and <!-- or: --> work correctly.
370371
// i.e., the variant description is used where requested
371-
let document = parse_document_async(
372+
let parsed = parse_document_async(
372373
r#"
373374
<!DOCTYPE html>
374375
<h3>The a element</h3>
@@ -386,6 +387,7 @@ mod tests {
386387
<dd><code data-x="attr-area-href">href</code><!-- variant -->
387388
</dl>
388389
"#.trim().as_bytes()).await?;
390+
let document = parsed.document().clone();
389391
let mut proc = Processor::new();
390392
dom_utils::scan_dom(&document, &mut |h| proc.visit(h));
391393
proc.apply().await?;
@@ -415,7 +417,7 @@ mod tests {
415417
#[tokio::test]
416418
async fn test_special_semantics() -> io::Result<()> {
417419
// Checks that the special rules for using : instead of an em dash work.
418-
let document = parse_document_async(
420+
let parsed = parse_document_async(
419421
r#"
420422
<!DOCTYPE html>
421423
<h3>The a element</h3>
@@ -428,6 +430,7 @@ mod tests {
428430
<tr><th><code data-x>name</code><td><code data-x="attr-a-name">a</code><td>Anchor name
429431
</tbody></table>
430432
"#.trim().as_bytes()).await?;
433+
let document = parsed.document().clone();
431434
let mut proc = Processor::new();
432435
dom_utils::scan_dom(&document, &mut |h| proc.visit(h));
433436
proc.apply().await?;
@@ -451,7 +454,7 @@ mod tests {
451454
#[tokio::test]
452455
async fn test_special_semantics_multiple() -> io::Result<()> {
453456
// Checks that the special rules for joining any special semantics with a ; work.
454-
let document = parse_document_async(
457+
let parsed = parse_document_async(
455458
r#"
456459
<!DOCTYPE html>
457460
<h3>The a element</h3>
@@ -465,6 +468,7 @@ mod tests {
465468
<tr><th><code data-x>name</code><td><code data-x="attr-a-name">a</code><td>Name of the anchor
466469
</tbody></table>
467470
"#.trim().as_bytes()).await?;
471+
let document = parsed.document().clone();
468472
let mut proc = Processor::new();
469473
dom_utils::scan_dom(&document, &mut |h| proc.visit(h));
470474
proc.apply().await?;
@@ -490,7 +494,7 @@ mod tests {
490494
async fn test_identical_links() -> io::Result<()> {
491495
// This checks the same identifier can be linked multiple times without
492496
// repeating the description.
493-
let document = parse_document_async(
497+
let parsed = parse_document_async(
494498
r#"
495499
<!DOCTYPE html>
496500
<h3>The img element</h3>
@@ -508,6 +512,7 @@ mod tests {
508512
<tr><th><code data-x>width</code><td><code data-x="attr-dim-width">img</code>; <code data-x="attr-dim-width">video</code><td>Horizontal dimension
509513
</tbody></table>
510514
"#.trim().as_bytes()).await?;
515+
let document = parsed.document().clone();
511516
let mut proc = Processor::new();
512517
dom_utils::scan_dom(&document, &mut |h| proc.visit(h));
513518
proc.apply().await?;

0 commit comments

Comments
 (0)