Skip to content

Commit 6b9bd8d

Browse files
committed
Add initial variable processing
This supports the work in whatwg/html#11392 with a new processor found in variables.rs, documented there. This required updating the parser to store line numbers for each element, which changed a lot of test call sites.
1 parent e3d362b commit 6b9bd8d

File tree

11 files changed

+534
-41
lines changed

11 files changed

+534
-41
lines changed

src/annotate_attributes.rs

Lines changed: 10 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -311,7 +311,7 @@ mod tests {
311311
// before and after the attributes table, to demonstrate that this is
312312
// not sensitive to which order they occur in (i.e., these could be
313313
// reordered in the HTML spec).
314-
let document = parse_document_async(
314+
let parsed = parse_document_async(
315315
r#"
316316
<!DOCTYPE html>
317317
<h3>The a element</h3>
@@ -333,6 +333,7 @@ mod tests {
333333
<dd><code data-x="attr-area-href">href</code>
334334
</dl>
335335
"#.trim().as_bytes()).await?;
336+
let document = parsed.document().clone();
336337
let mut proc = Processor::new();
337338
dom_utils::scan_dom(&document, &mut |h| proc.visit(h));
338339
proc.apply().await?;
@@ -368,7 +369,7 @@ mod tests {
368369
async fn test_variant() -> io::Result<()> {
369370
// This checks that <!-- variant --> and <!-- or: --> work correctly.
370371
// i.e., the variant description is used where requested
371-
let document = parse_document_async(
372+
let parsed = parse_document_async(
372373
r#"
373374
<!DOCTYPE html>
374375
<h3>The a element</h3>
@@ -386,6 +387,7 @@ mod tests {
386387
<dd><code data-x="attr-area-href">href</code><!-- variant -->
387388
</dl>
388389
"#.trim().as_bytes()).await?;
390+
let document = parsed.document().clone();
389391
let mut proc = Processor::new();
390392
dom_utils::scan_dom(&document, &mut |h| proc.visit(h));
391393
proc.apply().await?;
@@ -415,7 +417,7 @@ mod tests {
415417
#[tokio::test]
416418
async fn test_special_semantics() -> io::Result<()> {
417419
// Checks that the special rules for using : instead of an em dash work.
418-
let document = parse_document_async(
420+
let parsed = parse_document_async(
419421
r#"
420422
<!DOCTYPE html>
421423
<h3>The a element</h3>
@@ -428,6 +430,7 @@ mod tests {
428430
<tr><th><code data-x>name</code><td><code data-x="attr-a-name">a</code><td>Anchor name
429431
</tbody></table>
430432
"#.trim().as_bytes()).await?;
433+
let document = parsed.document().clone();
431434
let mut proc = Processor::new();
432435
dom_utils::scan_dom(&document, &mut |h| proc.visit(h));
433436
proc.apply().await?;
@@ -451,7 +454,7 @@ mod tests {
451454
#[tokio::test]
452455
async fn test_special_semantics_multiple() -> io::Result<()> {
453456
// Checks that the special rules for joining any special semantics with a ; work.
454-
let document = parse_document_async(
457+
let parsed = parse_document_async(
455458
r#"
456459
<!DOCTYPE html>
457460
<h3>The a element</h3>
@@ -465,6 +468,7 @@ mod tests {
465468
<tr><th><code data-x>name</code><td><code data-x="attr-a-name">a</code><td>Name of the anchor
466469
</tbody></table>
467470
"#.trim().as_bytes()).await?;
471+
let document = parsed.document().clone();
468472
let mut proc = Processor::new();
469473
dom_utils::scan_dom(&document, &mut |h| proc.visit(h));
470474
proc.apply().await?;
@@ -490,7 +494,7 @@ mod tests {
490494
async fn test_identical_links() -> io::Result<()> {
491495
// This checks the same identifier can be linked multiple times without
492496
// repeating the description.
493-
let document = parse_document_async(
497+
let parsed = parse_document_async(
494498
r#"
495499
<!DOCTYPE html>
496500
<h3>The img element</h3>
@@ -508,6 +512,7 @@ mod tests {
508512
<tr><th><code data-x>width</code><td><code data-x="attr-dim-width">img</code>; <code data-x="attr-dim-width">video</code><td>Horizontal dimension
509513
</tbody></table>
510514
"#.trim().as_bytes()).await?;
515+
let document = parsed.document().clone();
511516
let mut proc = Processor::new();
512517
dom_utils::scan_dom(&document, &mut |h| proc.visit(h));
513518
proc.apply().await?;

src/boilerplate.rs

Lines changed: 8 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -166,10 +166,11 @@ mod tests {
166166
"<tr><td>en<td>English",
167167
)
168168
.await?;
169-
let document = parse_document_async(
169+
let parsed = parse_document_async(
170170
"<!DOCTYPE html><table><!--BOILERPLATE languages--></table>".as_bytes(),
171171
)
172172
.await?;
173+
let document = parsed.document().clone();
173174
let mut proc = Processor::new(boilerplate_dir.path(), Path::new("."));
174175
dom_utils::scan_dom(&document, &mut |h| proc.visit(h));
175176
proc.apply().await?;
@@ -188,10 +189,11 @@ mod tests {
188189
"data:text/html,Hello, world!",
189190
)
190191
.await?;
191-
let document = parse_document_async(
192+
let parsed = parse_document_async(
192193
"<!DOCTYPE html><a href=\"<!--BOILERPLATE data.url-->\">hello</a>".as_bytes(),
193194
)
194195
.await?;
196+
let document = parsed.document().clone();
195197
let mut proc = Processor::new(boilerplate_dir.path(), Path::new("."));
196198
dom_utils::scan_dom(&document, &mut |h| proc.visit(h));
197199
proc.apply().await?;
@@ -208,9 +210,10 @@ mod tests {
208210
tokio::fs::write(example_dir.path().join("ex1"), "first").await?;
209211
tokio::fs::write(example_dir.path().join("ex2"), "second").await?;
210212
tokio::fs::write(example_dir.path().join("ignored"), "bad").await?;
211-
let document =
213+
let parsed =
212214
parse_document_async("<!DOCTYPE html><pre>EXAMPLE ex1</pre><pre><code class=html>\nEXAMPLE ex2 </code></pre><p>EXAMPLE ignored</p>".as_bytes())
213215
.await?;
216+
let document = parsed.document().clone();
214217
let mut proc = Processor::new(Path::new("."), example_dir.path());
215218
dom_utils::scan_dom(&document, &mut |h| proc.visit(h));
216219
proc.apply().await?;
@@ -229,7 +232,8 @@ mod tests {
229232
"<!DOCTYPE html><body><pre>EXAMPLE ../foo</pre>",
230233
];
231234
for example in bad_path_examples {
232-
let document = parse_document_async(example.as_bytes()).await?;
235+
let parsed = parse_document_async(example.as_bytes()).await?;
236+
let document = parsed.document().clone();
233237
let mut proc = Processor::new(Path::new("."), Path::new("."));
234238
dom_utils::scan_dom(&document, &mut |h| proc.visit(h));
235239
let result = proc.apply().await;

src/dom_utils.rs

Lines changed: 13 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -28,6 +28,9 @@ pub trait NodeHandleExt {
2828
/// Sets an attribute on the element. Must be an element.
2929
fn set_attribute(&self, name: &QualName, value: StrTendril);
3030

31+
/// Removes an attribute from the element, if present. Must be an element.
32+
fn remove_attribute(&self, name: &QualName);
33+
3134
/// Returns true if the node is an element.
3235
fn is_element(&self) -> bool;
3336

@@ -220,6 +223,16 @@ impl NodeHandleExt for Handle {
220223
}
221224
}
222225

226+
fn remove_attribute(&self, name: &QualName) {
227+
let mut attrs = match self.data {
228+
NodeData::Element { ref attrs, .. } => attrs.borrow_mut(),
229+
_ => panic!("not an element"),
230+
};
231+
if let Some(i) = attrs.iter().position(|a| &a.name == name) {
232+
attrs.remove(i);
233+
}
234+
}
235+
223236
fn is_element(&self) -> bool {
224237
matches!(&self.data, NodeData::Element { .. })
225238
}

src/interface_index.rs

Lines changed: 18 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -186,7 +186,7 @@ mod tests {
186186

187187
#[tokio::test]
188188
async fn test_two_interfaces_in_one_block() -> io::Result<()> {
189-
let document = parse_document_async(
189+
let parsed = parse_document_async(
190190
r#"
191191
<!DOCTYPE html>
192192
<pre><code class=idl>
@@ -199,6 +199,7 @@ INSERT INTERFACES HERE
199199
.as_bytes(),
200200
)
201201
.await?;
202+
let document = parsed.document().clone();
202203
let mut proc = Processor::new();
203204
dom_utils::scan_dom(&document, &mut |h| proc.visit(h));
204205
proc.apply()?;
@@ -216,7 +217,7 @@ interface <dfn interface="">HTMLBlinkElement</dfn> { ... }
216217

217218
#[tokio::test]
218219
async fn test_two_interfaces_in_separate_blocks() -> io::Result<()> {
219-
let document = parse_document_async(
220+
let parsed = parse_document_async(
220221
r#"
221222
<!DOCTYPE html>
222223
<pre><code class=idl>
@@ -231,6 +232,7 @@ INSERT INTERFACES HERE
231232
.as_bytes(),
232233
)
233234
.await?;
235+
let document = parsed.document().clone();
234236
let mut proc = Processor::new();
235237
dom_utils::scan_dom(&document, &mut |h| proc.visit(h));
236238
proc.apply()?;
@@ -250,7 +252,7 @@ interface <dfn interface="">HTMLBlinkElement</dfn> { ... }
250252

251253
#[tokio::test]
252254
async fn interface_with_partial() -> io::Result<()> {
253-
let document = parse_document_async(
255+
let parsed = parse_document_async(
254256
r#"
255257
<!DOCTYPE html>
256258
<pre><code class=idl>
@@ -265,6 +267,7 @@ INSERT INTERFACES HERE
265267
.as_bytes(),
266268
)
267269
.await?;
270+
let document = parsed.document().clone();
268271
let mut proc = Processor::new();
269272
dom_utils::scan_dom(&document, &mut |h| proc.visit(h));
270273
proc.apply()?;
@@ -284,7 +287,7 @@ partial interface <span id="HTMLMarqueeElement-partial">HTMLMarqueeElement</span
284287

285288
#[tokio::test]
286289
async fn interface_with_two_partials() -> io::Result<()> {
287-
let document = parse_document_async(
290+
let parsed = parse_document_async(
288291
r#"
289292
<!DOCTYPE html>
290293
<pre><code class=idl>
@@ -298,6 +301,7 @@ INSERT INTERFACES HERE
298301
.as_bytes(),
299302
)
300303
.await?;
304+
let document = parsed.document().clone();
301305
let mut proc = Processor::new();
302306
dom_utils::scan_dom(&document, &mut |h| proc.visit(h));
303307
proc.apply()?;
@@ -316,7 +320,7 @@ partial interface <span id="HTMLMarqueeElement-partial-2">HTMLMarqueeElement</sp
316320

317321
#[tokio::test]
318322
async fn only_partials() -> io::Result<()> {
319-
let document = parse_document_async(
323+
let parsed = parse_document_async(
320324
r#"
321325
<!DOCTYPE html>
322326
<pre><code class=idl>
@@ -329,6 +333,7 @@ INSERT INTERFACES HERE
329333
.as_bytes(),
330334
)
331335
.await?;
336+
let document = parsed.document().clone();
332337
let mut proc = Processor::new();
333338
dom_utils::scan_dom(&document, &mut |h| proc.visit(h));
334339
proc.apply()?;
@@ -346,7 +351,7 @@ partial interface <span id="HTMLMarqueeElement-partial-2">HTMLMarqueeElement</sp
346351

347352
#[tokio::test]
348353
async fn marker_before() -> io::Result<()> {
349-
let document = parse_document_async(
354+
let parsed = parse_document_async(
350355
r#"
351356
<!DOCTYPE html>
352357
INSERT INTERFACES HERE
@@ -358,6 +363,7 @@ interface <dfn interface>HTMLMarqueeElement</dfn> { ... }
358363
.as_bytes(),
359364
)
360365
.await?;
366+
let document = parsed.document().clone();
361367
let mut proc = Processor::new();
362368
dom_utils::scan_dom(&document, &mut |h| proc.visit(h));
363369
proc.apply()?;
@@ -376,7 +382,8 @@ interface <dfn interface="">HTMLMarqueeElement</dfn> { ... }
376382

377383
#[tokio::test]
378384
async fn no_marker() -> io::Result<()> {
379-
let document = parse_document_async("<!DOCTYPE html>".as_bytes()).await?;
385+
let parsed = parse_document_async("<!DOCTYPE html>".as_bytes()).await?;
386+
let document = parsed.document().clone();
380387
let mut proc = Processor::new();
381388
dom_utils::scan_dom(&document, &mut |h| proc.visit(h));
382389
let result = proc.apply();
@@ -386,11 +393,12 @@ interface <dfn interface="">HTMLMarqueeElement</dfn> { ... }
386393

387394
#[tokio::test]
388395
async fn duplicate_marker() -> io::Result<()> {
389-
let document = parse_document_async(
396+
let parsed = parse_document_async(
390397
"<!DOCTYPE html><div>INSERT INTERFACES HERE</div><div>INSERT INTERFACES HERE</div>"
391398
.as_bytes(),
392399
)
393400
.await?;
401+
let document = parsed.document().clone();
394402
let mut proc = Processor::new();
395403
dom_utils::scan_dom(&document, &mut |h| proc.visit(h));
396404
let result = proc.apply();
@@ -400,7 +408,7 @@ interface <dfn interface="">HTMLMarqueeElement</dfn> { ... }
400408

401409
#[tokio::test]
402410
async fn duplicate_dfn() -> io::Result<()> {
403-
let document = parse_document_async(
411+
let parsed = parse_document_async(
404412
r#"
405413
<!DOCTYPE html>
406414
<pre><code class=idl>
@@ -411,6 +419,7 @@ interface <dfn interface>HTMLMarqueeElement</dfn> { ... }
411419
.as_bytes(),
412420
)
413421
.await?;
422+
let document = parsed.document().clone();
414423
let mut proc = Processor::new();
415424
dom_utils::scan_dom(&document, &mut |h| proc.visit(h));
416425
let result = proc.apply();

src/main.rs

Lines changed: 6 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -18,6 +18,7 @@ mod rcdom_with_line_numbers;
1818
mod represents;
1919
mod self_link;
2020
mod tag_omission;
21+
mod variables;
2122

2223
#[tokio::main]
2324
async fn main() -> io::Result<()> {
@@ -40,19 +41,22 @@ async fn run() -> io::Result<()> {
4041
// Because parsing can jump around the tree a little, it's most reasonable
4142
// to just parse the whole document before doing any processing. Even for
4243
// the HTML standard, this doesn't take too long.
43-
let document = parser::parse_document_async(tokio::io::stdin()).await?;
44+
let parsed = parser::parse_document_async(tokio::io::stdin()).await?;
45+
let document = parsed.document().clone();
4446

4547
let mut boilerplate = boilerplate::Processor::new(cache_dir.clone(), source_dir.join("demos"));
4648
let mut represents = represents::Processor::new();
4749
let mut annotate_attributes = annotate_attributes::Processor::new();
4850
let mut tag_omission = tag_omission::Processor::new();
4951
let mut interface_index = interface_index::Processor::new();
5052
let mut self_link = self_link::Processor::new();
53+
let mut variables = variables::Processor::new(&parsed);
5154

5255
// We do exactly one pass to identify the changes that need to be made.
5356
dom_utils::scan_dom(&document, &mut |h| {
5457
boilerplate.visit(h);
5558
represents.visit(h);
59+
variables.visit(h);
5660
annotate_attributes.visit(h);
5761
tag_omission.visit(h);
5862
interface_index.visit(h);
@@ -64,6 +68,7 @@ async fn run() -> io::Result<()> {
6468
// conflicts between them.
6569
boilerplate.apply().await?;
6670
represents.apply()?;
71+
variables.apply()?;
6772
annotate_attributes.apply().await?;
6873
tag_omission.apply()?;
6974
interface_index.apply()?;

src/parser.rs

Lines changed: 8 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -65,12 +65,12 @@ pub async fn parse_fragment_async<R: AsyncRead + Unpin>(
6565
Ok(new_children)
6666
}
6767

68-
pub async fn parse_document_async<R: AsyncRead + Unpin>(r: R) -> io::Result<Handle> {
68+
pub async fn parse_document_async<R: AsyncRead + Unpin>(r: R) -> io::Result<RcDomWithLineNumbers> {
6969
let parser = driver::parse_document(RcDomWithLineNumbers::default(), create_error_opts());
7070
let dom = parse_internal_async(parser, r).await?;
7171
dom.create_error_from_parse_errors()?;
7272

73-
Ok(dom.document().clone())
73+
Ok(dom)
7474
}
7575

7676
fn create_error_opts() -> ParseOpts {
@@ -120,7 +120,8 @@ pub(crate) mod tests {
120120
// we're in. This is important because of the special rules
121121
// surrounding, e.g., tables. If you change this to use the body as context,
122122
// no element at all is emitted.
123-
let document = parse_document_async("<!DOCTYPE html><table></table>".as_bytes()).await?;
123+
let parsed = parse_document_async("<!DOCTYPE html><table></table>".as_bytes()).await?;
124+
let document = parsed.document().clone();
124125
let body = document.children.borrow()[1].children.borrow()[1].clone();
125126
assert!(body.is_html_element(&local_name!("body")));
126127
let table = body.children.borrow()[0].clone();
@@ -176,7 +177,8 @@ pub(crate) mod tests {
176177

177178
#[tokio::test]
178179
async fn test_fragment_error_line_number() -> io::Result<()> {
179-
let document = parse_document_async("<!DOCTYPE html>".as_bytes()).await?;
180+
let parsed = parse_document_async("<!DOCTYPE html>".as_bytes()).await?;
181+
let document = parsed.document().clone();
180182
let body = document.children.borrow()[1].children.borrow()[1].clone();
181183
assert!(body.is_html_element(&local_name!("body")));
182184
let result = parse_fragment_async(
@@ -194,7 +196,8 @@ pub(crate) mod tests {
194196

195197
#[tokio::test]
196198
async fn test_fragment_error_exact() -> io::Result<()> {
197-
let document = parse_document_async("<!DOCTYPE html>".as_bytes()).await?;
199+
let parsed = parse_document_async("<!DOCTYPE html>".as_bytes()).await?;
200+
let document = parsed.document().clone();
198201
let body = document.children.borrow()[1].children.borrow()[1].clone();
199202
assert!(body.is_html_element(&local_name!("body")));
200203
let result = parse_fragment_async("&asdf;".as_bytes(), &body).await;

0 commit comments

Comments
 (0)