@@ -28,7 +28,7 @@ pub(super) struct CharRef {
2828pub ( super ) enum Status {
2929 Stuck ,
3030 Progress ,
31- Done ,
31+ Done ( CharRef ) ,
3232}
3333
3434#[ derive( Debug ) ]
@@ -43,7 +43,6 @@ enum State {
4343
4444pub ( super ) struct CharRefTokenizer {
4545 state : State ,
46- result : Option < CharRef > ,
4746 is_consumed_in_attribute : bool ,
4847
4948 num : u32 ,
@@ -56,12 +55,18 @@ pub(super) struct CharRefTokenizer {
5655 name_len : usize ,
5756}
5857
58+ impl CharRef {
59+ const EMPTY : CharRef = CharRef {
60+ chars : [ '\0' , '\0' ] ,
61+ num_chars : 0 ,
62+ } ;
63+ }
64+
5965impl CharRefTokenizer {
6066 pub ( super ) fn new ( is_consumed_in_attribute : bool ) -> CharRefTokenizer {
6167 CharRefTokenizer {
6268 is_consumed_in_attribute,
6369 state : State :: Begin ,
64- result : None ,
6570 num : 0 ,
6671 num_too_big : false ,
6772 seen_digit : false ,
@@ -72,12 +77,6 @@ impl CharRefTokenizer {
7277 }
7378 }
7479
75- // A CharRefTokenizer can only tokenize one character reference,
76- // so this method consumes the tokenizer.
77- pub ( super ) fn get_result ( self ) -> CharRef {
78- self . result . expect ( "get_result called before done" )
79- }
80-
8180 fn name_buf ( & self ) -> & StrTendril {
8281 self . name_buf_opt
8382 . as_ref ( )
@@ -90,20 +89,11 @@ impl CharRefTokenizer {
9089 . expect ( "name_buf missing in named character reference" )
9190 }
9291
93- fn finish_none ( & mut self ) -> Status {
94- self . result = Some ( CharRef {
95- chars : [ '\0' , '\0' ] ,
96- num_chars : 0 ,
97- } ) ;
98- Status :: Done
99- }
100-
10192 fn finish_one ( & mut self , c : char ) -> Status {
102- self . result = Some ( CharRef {
93+ Status :: Done ( CharRef {
10394 chars : [ c, '\0' ] ,
10495 num_chars : 1 ,
105- } ) ;
106- Status :: Done
96+ } )
10797 }
10898}
10999
@@ -113,10 +103,6 @@ impl CharRefTokenizer {
113103 tokenizer : & Tokenizer < Sink > ,
114104 input : & BufferQueue ,
115105 ) -> Status {
116- if self . result . is_some ( ) {
117- return Status :: Done ;
118- }
119-
120106 debug ! ( "char ref tokenizer stepping in state {:?}" , self . state) ;
121107 match self . state {
122108 State :: Begin => self . do_begin ( tokenizer, input) ,
@@ -144,7 +130,7 @@ impl CharRefTokenizer {
144130 self . state = State :: Octothorpe ;
145131 Status :: Progress
146132 } ,
147- Some ( _) => self . finish_none ( ) ,
133+ Some ( _) => Status :: Done ( CharRef :: EMPTY ) ,
148134 None => Status :: Stuck ,
149135 }
150136 }
@@ -228,7 +214,7 @@ impl CharRefTokenizer {
228214
229215 input. push_front ( unconsume) ;
230216 tokenizer. emit_error ( Borrowed ( "Numeric character reference without digits" ) ) ;
231- self . finish_none ( )
217+ Status :: Done ( CharRef :: EMPTY )
232218 }
233219
234220 fn finish_numeric < Sink : TokenSink > ( & mut self , tokenizer : & Tokenizer < Sink > ) -> Status {
@@ -331,7 +317,7 @@ impl CharRefTokenizer {
331317 _ => ( ) ,
332318 }
333319 self . unconsume_name ( input) ;
334- self . finish_none ( )
320+ Status :: Done ( CharRef :: EMPTY )
335321 } ,
336322
337323 Some ( ( c1, c2) ) => {
@@ -379,15 +365,14 @@ impl CharRefTokenizer {
379365
380366 if unconsume_all {
381367 self . unconsume_name ( input) ;
382- self . finish_none ( )
368+ Status :: Done ( CharRef :: EMPTY )
383369 } else {
384370 input. push_front ( StrTendril :: from_slice ( & self . name_buf ( ) [ name_len..] ) ) ;
385371 tokenizer. ignore_lf . set ( false ) ;
386- self . result = Some ( CharRef {
372+ Status :: Done ( CharRef {
387373 chars : [ from_u32 ( c1) . unwrap ( ) , from_u32 ( c2) . unwrap ( ) ] ,
388374 num_chars : if c2 == 0 { 1 } else { 2 } ,
389- } ) ;
390- Status :: Done
375+ } )
391376 }
392377 } ,
393378 }
@@ -411,34 +396,42 @@ impl CharRefTokenizer {
411396 _ => ( ) ,
412397 }
413398 self . unconsume_name ( input) ;
414- self . finish_none ( )
399+ Status :: Done ( CharRef :: EMPTY )
415400 }
416401
417402 pub ( super ) fn end_of_file < Sink : TokenSink > (
418403 & mut self ,
419404 tokenizer : & Tokenizer < Sink > ,
420405 input : & BufferQueue ,
421- ) {
422- while self . result . is_none ( ) {
423- match self . state {
424- State :: Begin => drop ( self . finish_none ( ) ) ,
425- State :: Numeric ( _) if !self . seen_digit => {
426- self . unconsume_numeric ( tokenizer, input) ;
427- } ,
406+ ) -> CharRef {
407+ loop {
408+ let status = match self . state {
409+ State :: Begin => Status :: Done ( CharRef :: EMPTY ) ,
410+ State :: Numeric ( _) if !self . seen_digit => self . unconsume_numeric ( tokenizer, input) ,
428411 State :: Numeric ( _) | State :: NumericSemicolon => {
429412 tokenizer. emit_error ( Borrowed ( "EOF in numeric character reference" ) ) ;
430- self . finish_numeric ( tokenizer) ;
413+ self . finish_numeric ( tokenizer)
431414 } ,
432- State :: Named => drop ( self . finish_named ( tokenizer, input, None ) ) ,
415+ State :: Named => self . finish_named ( tokenizer, input, None ) ,
433416 State :: BogusName => {
434417 self . unconsume_name ( input) ;
435- self . finish_none ( ) ;
418+ Status :: Done ( CharRef :: EMPTY )
436419 } ,
437420 State :: Octothorpe => {
438421 input. push_front ( StrTendril :: from_slice ( "#" ) ) ;
439422 tokenizer. emit_error ( Borrowed ( "EOF after '#' in character reference" ) ) ;
440- self . finish_none ( ) ;
423+ Status :: Done ( CharRef :: EMPTY )
424+ } ,
425+ } ;
426+
427+ match status {
428+ Status :: Done ( char_ref) => {
429+ return char_ref;
430+ } ,
431+ Status :: Stuck => {
432+ return CharRef :: EMPTY ;
441433 } ,
434+ Status :: Progress => { } ,
442435 }
443436 }
444437 }
0 commit comments