Skip to content

Commit f4be889

Browse files
authored
Optimize network filter get tokens (#540)
* optimize NetworkFilter::get_tokens() * Fix some review issues * Return get_tokens() for API compatibility
1 parent 671ee95 commit f4be889

File tree

3 files changed

+77
-38
lines changed

3 files changed

+77
-38
lines changed

src/filters/fb_network_builder.rs

Lines changed: 39 additions & 23 deletions
Original file line numberDiff line numberDiff line change
@@ -5,7 +5,7 @@ use std::collections::{HashMap, HashSet};
55
use flatbuffers::WIPOffset;
66

77
use crate::filters::fb_builder::EngineFlatBuilder;
8-
use crate::filters::network::NetworkFilter;
8+
use crate::filters::network::{FilterTokens, NetworkFilter};
99

1010
use crate::filters::network::NetworkFilterMaskHelper;
1111
use crate::flatbuffers::containers::flat_multimap::FlatMultiMapBuilder;
@@ -138,7 +138,7 @@ impl<'a> FlatSerialize<'a, EngineFlatBuilder<'a>> for NetworkFilterListBuilder {
138138
.filters
139139
.into_iter()
140140
.map(|filter| {
141-
let tokens = filter.get_tokens();
141+
let tokens = filter.get_tokens_optimized();
142142
(filter, tokens)
143143
})
144144
.collect();
@@ -156,33 +156,49 @@ impl<'a> FlatSerialize<'a, EngineFlatBuilder<'a>> for NetworkFilterListBuilder {
156156
None
157157
};
158158

159-
for tokens in multi_tokens {
160-
let mut best_token: ShortHash = 0;
161-
let mut min_count = total_number_of_tokens + 1;
162-
for token in tokens {
163-
let token = to_short_hash(token);
164-
match tokens_histogram.get(&token) {
165-
None => {
166-
min_count = 0;
167-
best_token = token
168-
}
169-
Some(&count) if count < min_count => {
170-
min_count = count;
171-
best_token = token
172-
}
173-
_ => {}
174-
}
175-
}
176-
159+
let mut store_filter = |token: ShortHash| {
177160
if let Some(flat_filter) = flat_filter {
178-
filter_map.entry(best_token).or_default().push(flat_filter);
161+
filter_map.entry(token).or_default().push(flat_filter);
179162
} else {
180163
optimizable
181-
.entry(best_token)
164+
.entry(token)
182165
.or_default()
183166
.push(network_filter.clone());
184167
}
185-
} // tokens
168+
};
169+
170+
match multi_tokens {
171+
FilterTokens::Empty => {
172+
// No tokens, skip this filter
173+
}
174+
FilterTokens::OptDomains(opt_domains) => {
175+
// For OptDomains, each domain is treated as a separate token group
176+
for &token in opt_domains.iter() {
177+
store_filter(to_short_hash(token));
178+
}
179+
}
180+
FilterTokens::Other(tokens) => {
181+
// For Other tokens, find the best token from the group
182+
let mut best_token: ShortHash = 0;
183+
let mut min_count = total_number_of_tokens + 1;
184+
for &token in tokens.iter() {
185+
let token = to_short_hash(token);
186+
match tokens_histogram.get(&token) {
187+
None => {
188+
min_count = 0;
189+
best_token = token
190+
}
191+
Some(&count) if count < min_count => {
192+
min_count = count;
193+
best_token = token
194+
}
195+
_ => {}
196+
}
197+
}
198+
199+
store_filter(best_token);
200+
}
201+
}
186202
}
187203
}
188204

src/filters/network.rs

Lines changed: 28 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -311,6 +311,12 @@ pub enum FilterPart {
311311
AnyOf(Vec<String>),
312312
}
313313

314+
pub enum FilterTokens {
315+
Empty,
316+
OptDomains(Vec<Hash>),
317+
Other(Vec<Hash>),
318+
}
319+
314320
pub struct FilterPartIterator<'a> {
315321
filter_part: &'a FilterPart,
316322
index: usize,
@@ -876,7 +882,18 @@ impl NetworkFilter {
876882
)
877883
}
878884

885+
#[deprecated(since = "0.11.1", note = "use get_tokens_optimized instead")]
879886
pub fn get_tokens(&self) -> Vec<Vec<Hash>> {
887+
match self.get_tokens_optimized() {
888+
FilterTokens::OptDomains(domains) => {
889+
domains.into_iter().map(|domain| vec![domain]).collect()
890+
}
891+
FilterTokens::Other(tokens) => vec![tokens],
892+
FilterTokens::Empty => vec![],
893+
}
894+
}
895+
896+
pub fn get_tokens_optimized(&self) -> FilterTokens {
880897
let mut tokens: Vec<Hash> = Vec::with_capacity(TOKENS_BUFFER_SIZE);
881898

882899
// If there is only one domain and no domain negation, we also use this
@@ -930,21 +947,24 @@ impl NetworkFilter {
930947
// If we got no tokens for the filter/hostname part, then we will dispatch
931948
// this filter in multiple buckets based on the domains option.
932949
if tokens.is_empty() && self.opt_domains.is_some() && self.opt_not_domains.is_none() {
933-
self.opt_domains
934-
.as_ref()
935-
.unwrap_or(&vec![])
936-
.iter()
937-
.map(|&d| vec![d])
938-
.collect()
950+
if let Some(opt_domains) = self.opt_domains.as_ref() {
951+
if !opt_domains.is_empty() {
952+
return FilterTokens::OptDomains(opt_domains.clone());
953+
}
954+
}
955+
FilterTokens::Empty
939956
} else {
940957
// Add optional token for protocol
941958
if self.for_http() && !self.for_https() {
942959
tokens.push(utils::fast_hash("http"));
943960
} else if self.for_https() && !self.for_http() {
944961
tokens.push(utils::fast_hash("https"));
945962
}
946-
tokens.shrink_to_fit();
947-
vec![tokens]
963+
964+
// Remake a vector to drop extra capacity.
965+
let mut t = Vec::with_capacity(tokens.len());
966+
t.extend(tokens);
967+
FilterTokens::Other(t)
948968
}
949969
}
950970
}

src/network_filter_list.rs

Lines changed: 10 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -8,13 +8,13 @@ use crate::filters::fb_network::FlatNetworkFilter;
88
use crate::filters::filter_data_context::FilterDataContext;
99
use crate::filters::flatbuffer_generated::fb;
1010
use crate::filters::network::{
11-
NetworkFilter, NetworkFilterMask, NetworkFilterMaskHelper, NetworkMatchable,
11+
FilterTokens, NetworkFilter, NetworkFilterMask, NetworkFilterMaskHelper, NetworkMatchable,
1212
};
1313
use crate::flatbuffers::containers::flat_multimap::FlatMultiMapView;
1414
use crate::flatbuffers::unsafe_tools::fb_vector_to_slice;
1515
use crate::regex_manager::RegexManager;
1616
use crate::request::Request;
17-
use crate::utils::{fast_hash, to_short_hash, Hash, ShortHash};
17+
use crate::utils::{fast_hash, to_short_hash, ShortHash};
1818

1919
/// Holds relevant information from a single matchin gnetwork filter rule as a result of querying a
2020
/// [NetworkFilterList] for a given request.
@@ -155,16 +155,19 @@ impl NetworkFilterList<'_> {
155155
}
156156

157157
pub(crate) fn token_histogram<T>(
158-
filter_tokens: &[(T, Vec<Vec<Hash>>)],
158+
filter_tokens: &[(T, FilterTokens)],
159159
) -> (u32, HashMap<ShortHash, u32>) {
160160
let mut tokens_histogram: HashMap<ShortHash, u32> = HashMap::new();
161161
let mut number_of_tokens = 0;
162162
for (_, tokens) in filter_tokens.iter() {
163-
for tg in tokens {
164-
for t in tg {
165-
*tokens_histogram.entry(to_short_hash(*t)).or_insert(0) += 1;
166-
number_of_tokens += 1;
163+
match tokens {
164+
FilterTokens::Other(tokens) | FilterTokens::OptDomains(tokens) => {
165+
for t in tokens {
166+
*tokens_histogram.entry(to_short_hash(*t)).or_insert(0) += 1;
167+
number_of_tokens += 1;
168+
}
167169
}
170+
FilterTokens::Empty => {}
168171
}
169172
}
170173

0 commit comments

Comments
 (0)