@@ -5,10 +5,10 @@ extern crate html5ever;
55use std:: fs;
66use std:: path:: PathBuf ;
77
8- use criterion:: Criterion ;
8+ use criterion:: { BatchSize , Criterion } ;
99
10- use html5ever:: tendril:: * ;
1110use html5ever:: tokenizer:: { BufferQueue , Token , TokenSink , TokenSinkResult , Tokenizer } ;
11+ use html5ever:: { tendril:: * , TokenizerResult } ;
1212
1313struct Sink ;
1414
@@ -51,19 +51,25 @@ fn run_bench(c: &mut Criterion, name: &str) {
5151
5252 let test_name = format ! ( "html tokenizing {name}" ) ;
5353
54+ // Construct a buffer queue to feed to the tokenizer
55+ let buffer_queue = BufferQueue :: default ( ) ;
56+ for buf in input. into_iter ( ) {
57+ buffer_queue. push_back ( buf) ;
58+ }
59+
5460 c. bench_function ( & test_name, move |b| {
55- b. iter ( || {
56- let tok = Tokenizer :: new ( Sink , Default :: default ( ) ) ;
57- let buffer = BufferQueue :: default ( ) ;
58- // We are doing clone inside the bench function, this is not ideal, but possibly
59- // necessary since our iterator consumes the underlying buffer.
60- for buf in input. clone ( ) . into_iter ( ) {
61- buffer . push_back ( buf ) ;
62- let _ = tok . feed ( & buffer ) ;
63- }
64- let _ = tok . feed ( & buffer ) ;
65- tok . end ( ) ;
66- } )
61+ b. iter_batched (
62+ || buffer_queue . clone ( ) ,
63+ |buffer_queue| {
64+ let tok = Tokenizer :: new ( Sink , Default :: default ( ) ) ;
65+
66+ // Tokenize the entire input, ignoring any <script> elements we find along the way
67+ while tok . feed ( & buffer_queue ) != TokenizerResult :: Done { }
68+
69+ tok . end ( ) ;
70+ } ,
71+ BatchSize :: SmallInput ,
72+ )
6773 } ) ;
6874}
6975
0 commit comments