@@ -19,7 +19,7 @@ use pulldown_cmark::{Alignment, CodeBlockKind, CowStr, Event, LinkType, Tag, Tag
1919use std:: borrow:: Cow ;
2020use std:: collections:: { HashMap , HashSet } ;
2121use std:: ops:: Deref ;
22- use tracing:: { error , trace, warn} ;
22+ use tracing:: { trace, warn} ;
2323
2424/// Helper to create a [`QualName`].
2525macro_rules! attr_qual_name {
@@ -307,6 +307,8 @@ where
307307 match event {
308308 Event :: Start ( tag) => self . start_tag ( tag) ,
309309 Event :: End ( tag) => {
310+ // TODO: This should validate that the event stack is
311+ // properly synchronized with the tag stack.
310312 self . pop ( ) ;
311313 match tag {
312314 TagEnd :: TableHead => {
@@ -378,6 +380,7 @@ where
378380 }
379381 }
380382 }
383+ self . finish_stack ( ) ;
381384 self . collect_footnote_defs ( ) ;
382385 }
383386
@@ -606,40 +609,10 @@ where
606609 trace ! ( "html token={token:?}" ) ;
607610 match token {
608611 Token :: DoctypeToken ( _) => { }
609- Token :: TagToken ( tag) => {
610- match tag. kind {
611- TagKind :: StartTag => {
612- let is_closed = is_void_element ( & tag. name ) || tag. self_closing ;
613- is_raw = matches ! ( & * tag. name, "script" | "style" ) ;
614- let name = QualName :: new ( None , html5ever:: ns!( html) , tag. name ) ;
615- let attrs = tag
616- . attrs
617- . into_iter ( )
618- . map ( |attr| ( attr. name , attr. value ) )
619- . collect ( ) ;
620- let mut el = Element {
621- name,
622- attrs,
623- self_closing : tag. self_closing ,
624- was_raw : true ,
625- } ;
626- fix_html_link ( & mut el) ;
627- self . push ( Node :: Element ( el) ) ;
628- if is_closed {
629- // No end element.
630- self . pop ( ) ;
631- }
632- }
633- TagKind :: EndTag => {
634- is_raw = false ;
635- if self . is_html_tag_matching ( & tag. name ) {
636- self . pop ( ) ;
637- }
638- // else the stack is corrupt. I'm not really sure
639- // what to do here...
640- }
641- }
642- }
612+ Token :: TagToken ( tag) => match tag. kind {
613+ TagKind :: StartTag => self . start_html_tag ( tag, & mut is_raw) ,
614+ TagKind :: EndTag => self . end_html_tag ( tag, & mut is_raw) ,
615+ } ,
643616 Token :: CommentToken ( comment) => {
644617 self . append ( Node :: Comment ( comment) ) ;
645618 }
@@ -664,22 +637,59 @@ where
664637 }
665638 }
666639
640+ /// Adds an open HTML tag.
641+ fn start_html_tag ( & mut self , tag : html5ever:: tokenizer:: Tag , is_raw : & mut bool ) {
642+ let is_closed = is_void_element ( & tag. name ) || tag. self_closing ;
643+ * is_raw = matches ! ( & * tag. name, "script" | "style" ) ;
644+ let name = QualName :: new ( None , html5ever:: ns!( html) , tag. name ) ;
645+ let attrs = tag
646+ . attrs
647+ . into_iter ( )
648+ . map ( |attr| ( attr. name , attr. value ) )
649+ . collect ( ) ;
650+ let mut el = Element {
651+ name,
652+ attrs,
653+ self_closing : tag. self_closing ,
654+ was_raw : true ,
655+ } ;
656+ fix_html_link ( & mut el) ;
657+ self . push ( Node :: Element ( el) ) ;
658+ if is_closed {
659+ // No end element.
660+ self . pop ( ) ;
661+ }
662+ }
663+
664+ /// Closes the given HTML tag.
665+ fn end_html_tag ( & mut self , tag : html5ever:: tokenizer:: Tag , is_raw : & mut bool ) {
666+ * is_raw = false ;
667+ if self . is_html_tag_matching ( & tag. name ) {
668+ self . pop ( ) ;
669+ } else {
670+ // The proper thing to do here is to recover. However, the HTML
671+ // parsing algorithm for that is quite complex. See
672+ // https://html.spec.whatwg.org/multipage/parsing.html#parsing-main-inbody
673+ // and the adoption agency algorithm.
674+ warn ! (
675+ "unexpected HTML end tag `</{}>` found in `{}`\n \
676+ Check that the HTML tags are properly balanced.",
677+ tag. name,
678+ self . options. path. display( )
679+ ) ;
680+ }
681+ }
682+
667683 /// This is used to verify HTML parsing keeps the stack of tags in sync.
668684 fn is_html_tag_matching ( & self , name : & str ) -> bool {
669685 let current = self . tree . get ( self . current_node ) . unwrap ( ) . value ( ) ;
670686 if let Node :: Element ( el) = current
671687 && el. name ( ) == name
672688 {
673- return true ;
689+ true
690+ } else {
691+ false
674692 }
675- error ! (
676- "internal error: HTML tag stack out of sync.\n
677- path: `{}`\n \
678- current={current:?}\n \
679- pop name: {name}",
680- self . options. path. display( )
681- ) ;
682- false
683693 }
684694
685695 /// Eats all pulldown-cmark events until the next `End` matching the
@@ -736,6 +746,40 @@ where
736746 output
737747 }
738748
749+ /// Deals with any unclosed elements on the stack.
750+ fn finish_stack ( & mut self ) {
751+ while let Some ( node_id) = self . tag_stack . pop ( ) {
752+ let node = self . tree . get ( node_id) . unwrap ( ) . value ( ) ;
753+ match node {
754+ Node :: Fragment => { }
755+ Node :: Element ( el) => {
756+ if el. was_raw {
757+ warn ! (
758+ "unclosed HTML tag `<{}>` found in `{}`" ,
759+ el. name. local,
760+ self . options. path. display( )
761+ ) ;
762+ } else {
763+ panic ! (
764+ "internal error: expected empty tag stack.\n
765+ path: `{}`\n \
766+ element={el:?}",
767+ self . options. path. display( )
768+ ) ;
769+ }
770+ }
771+ node => {
772+ panic ! (
773+ "internal error: expected empty tag stack.\n
774+ path: `{}`\n \
775+ node={node:?}",
776+ self . options. path. display( )
777+ ) ;
778+ }
779+ }
780+ }
781+ }
782+
739783 /// Appends a new footnote reference.
740784 fn footnote_reference ( & mut self , name : CowStr < ' event > ) {
741785 let len = self . footnote_numbers . len ( ) + 1 ;
0 commit comments