@@ -6,11 +6,11 @@ use flatbuffers::WIPOffset;
66
77use crate :: filters:: fb_builder:: EngineFlatBuilder ;
88use crate :: filters:: network:: { FilterTokens , NetworkFilter } ;
9+ use crate :: filters:: token_selector:: TokenSelector ;
910
1011use crate :: filters:: network:: NetworkFilterMaskHelper ;
1112use crate :: flatbuffers:: containers:: flat_multimap:: FlatMultiMapBuilder ;
1213use crate :: flatbuffers:: containers:: flat_serialize:: { FlatBuilder , FlatSerialize , WIPFlatVec } ;
13- use crate :: network_filter_list:: token_histogram;
1414use crate :: optimizer;
1515use crate :: utils:: { to_short_hash, Hash , ShortHash } ;
1616
@@ -133,21 +133,10 @@ impl<'a> FlatSerialize<'a, EngineFlatBuilder<'a>> for NetworkFilterListBuilder {
133133
134134 let mut optimizable = HashMap :: < ShortHash , Vec < NetworkFilter > > :: new ( ) ;
135135
136- // Compute tokens for all filters
137- let filter_tokens: Vec < _ > = rule_list
138- . filters
139- . into_iter ( )
140- . map ( |filter| {
141- let tokens = filter. get_tokens_optimized ( ) ;
142- ( filter, tokens)
143- } )
144- . collect ( ) ;
145-
146- // compute the tokens' frequency histogram
147- let ( total_number_of_tokens, tokens_histogram) = token_histogram ( & filter_tokens) ;
136+ let mut token_frequencies = TokenSelector :: new ( rule_list. filters . len ( ) ) ;
148137
149138 {
150- for ( network_filter, multi_tokens ) in filter_tokens . into_iter ( ) {
139+ for network_filter in rule_list . filters {
151140 let flat_filter = if !rule_list. optimize
152141 || !optimizer:: is_filter_optimizable_by_patterns ( & network_filter)
153142 {
@@ -156,46 +145,34 @@ impl<'a> FlatSerialize<'a, EngineFlatBuilder<'a>> for NetworkFilterListBuilder {
156145 None
157146 } ;
158147
159- let mut store_filter = |token : ShortHash | {
148+ let mut store_filter = |token : Hash | {
149+ let short_token = to_short_hash ( token) ;
160150 if let Some ( flat_filter) = flat_filter {
161- filter_map_builder. insert ( token , flat_filter) ;
151+ filter_map_builder. insert ( short_token , flat_filter) ;
162152 } else {
163153 optimizable
164- . entry ( token )
154+ . entry ( short_token )
165155 . or_default ( )
166156 . push ( network_filter. clone ( ) ) ;
167157 }
168158 } ;
169159
160+ let multi_tokens = network_filter. get_tokens_optimized ( ) ;
170161 match multi_tokens {
171162 FilterTokens :: Empty => {
172- // No tokens, skip this filter
163+ // No tokens, add to fallback bucket (token 0)
164+ store_filter ( 0 ) ;
173165 }
174166 FilterTokens :: OptDomains ( opt_domains) => {
175167 // For OptDomains, each domain is treated as a separate token group
176168 for & token in opt_domains. iter ( ) {
177- store_filter ( to_short_hash ( token) ) ;
169+ store_filter ( token) ;
170+ token_frequencies. record_usage ( token) ;
178171 }
179172 }
180173 FilterTokens :: Other ( tokens) => {
181- // For Other tokens, find the best token from the group
182- let mut best_token: ShortHash = 0 ;
183- let mut min_count = total_number_of_tokens + 1 ;
184- for & token in tokens. iter ( ) {
185- let token = to_short_hash ( token) ;
186- match tokens_histogram. get ( & token) {
187- None => {
188- min_count = 0 ;
189- best_token = token
190- }
191- Some ( & count) if count < min_count => {
192- min_count = count;
193- best_token = token
194- }
195- _ => { }
196- }
197- }
198-
174+ let best_token = token_frequencies. select_least_used_token ( & tokens) ;
175+ token_frequencies. record_usage ( best_token) ;
199176 store_filter ( best_token) ;
200177 }
201178 }
0 commit comments