Skip to content

Commit a48509d

Browse files
committed
Change get_tokens_optimized()
1 parent 6d24c8b commit a48509d

File tree

5 files changed

+36
-24
lines changed

5 files changed

+36
-24
lines changed

src/filters/fb_network_builder.rs

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -7,6 +7,7 @@ use flatbuffers::WIPOffset;
77
use crate::filters::fb_builder::EngineFlatBuilder;
88
use crate::filters::network::{FilterTokens, NetworkFilter};
99
use crate::filters::token_selector::TokenSelector;
10+
use crate::utils::TokensBuffer;
1011

1112
use crate::filters::network::NetworkFilterMaskHelper;
1213
use crate::flatbuffers::containers::flat_multimap::FlatMultiMapBuilder;
@@ -134,6 +135,7 @@ impl<'a> FlatSerialize<'a, EngineFlatBuilder<'a>> for NetworkFilterListBuilder {
134135
let mut optimizable = HashMap::<ShortHash, Vec<NetworkFilter>>::new();
135136

136137
let mut token_frequencies = TokenSelector::new(rule_list.filters.len());
138+
let mut tokens_buffer = TokensBuffer::default();
137139

138140
{
139141
for network_filter in rule_list.filters {
@@ -157,7 +159,7 @@ impl<'a> FlatSerialize<'a, EngineFlatBuilder<'a>> for NetworkFilterListBuilder {
157159
}
158160
};
159161

160-
let multi_tokens = network_filter.get_tokens_optimized();
162+
let multi_tokens = network_filter.get_tokens_optimized(&mut tokens_buffer);
161163
match multi_tokens {
162164
FilterTokens::Empty => {
163165
// No tokens, add to fallback bucket (token 0)

src/filters/network.rs

Lines changed: 20 additions & 19 deletions
Original file line numberDiff line numberDiff line change
@@ -310,10 +310,10 @@ pub enum FilterPart {
310310
}
311311

312312
#[derive(Debug, PartialEq)]
313-
pub enum FilterTokens {
313+
pub enum FilterTokens<'a> {
314314
Empty,
315-
OptDomains(Vec<Hash>),
316-
Other(Vec<Hash>),
315+
OptDomains(&'a [Hash]),
316+
Other(&'a [Hash]),
317317
}
318318

319319
pub struct FilterPartIterator<'a> {
@@ -883,17 +883,18 @@ impl NetworkFilter {
883883

884884
#[deprecated(since = "0.11.1", note = "use get_tokens_optimized instead")]
885885
pub fn get_tokens(&self) -> Vec<Vec<Hash>> {
886-
match self.get_tokens_optimized() {
886+
let mut tokens_buffer = TokensBuffer::default();
887+
match self.get_tokens_optimized(&mut tokens_buffer) {
887888
FilterTokens::OptDomains(domains) => {
888-
domains.into_iter().map(|domain| vec![domain]).collect()
889+
domains.into_iter().map(|domain| vec![*domain]).collect()
889890
}
890-
FilterTokens::Other(tokens) => vec![tokens],
891+
FilterTokens::Other(tokens) => vec![tokens.to_vec()],
891892
FilterTokens::Empty => vec![],
892893
}
893894
}
894895

895-
pub fn get_tokens_optimized(&self) -> FilterTokens {
896-
let mut tokens = TokensBuffer::default();
896+
pub fn get_tokens_optimized<'a>(&'a self, tokens_buffer: &'a mut TokensBuffer) -> FilterTokens<'a> {
897+
tokens_buffer.clear();
897898

898899
// If there is only one domain and no domain negation, we also use this
899900
// domain as a token.
@@ -903,7 +904,7 @@ impl NetworkFilter {
903904
{
904905
if let Some(domains) = self.opt_domains.as_ref() {
905906
if let Some(domain) = domains.first() {
906-
tokens.push(*domain);
907+
tokens_buffer.push(*domain);
907908
}
908909
}
909910
}
@@ -916,7 +917,7 @@ impl NetworkFilter {
916917
(self.is_plain() || self.is_regex()) && !self.is_right_anchor();
917918
let skip_first_token = self.is_right_anchor();
918919

919-
utils::tokenize_filter_to(f, skip_first_token, skip_last_token, &mut tokens);
920+
utils::tokenize_filter_to(f, skip_first_token, skip_last_token, tokens_buffer);
920921
}
921922
}
922923
FilterPart::AnyOf(_) => (), // across AnyOf set of filters no single token is guaranteed to match to a request
@@ -926,42 +927,42 @@ impl NetworkFilter {
926927
// Append tokens from hostname, if any
927928
if !self.mask.contains(NetworkFilterMask::IS_HOSTNAME_REGEX) {
928929
if let Some(hostname) = self.hostname.as_ref() {
929-
utils::tokenize_to(hostname, &mut tokens);
930+
utils::tokenize_to(hostname, tokens_buffer);
930931
}
931932
} else if let Some(hostname) = self.hostname.as_ref() {
932933
// Find last dot to tokenize the prefix
933934
let last_dot_pos = hostname.rfind('.');
934935
if let Some(last_dot_pos) = last_dot_pos {
935-
utils::tokenize_to(&hostname[..last_dot_pos], &mut tokens);
936+
utils::tokenize_to(&hostname[..last_dot_pos], tokens_buffer);
936937
}
937938
}
938939

939-
if tokens.is_empty() && self.mask.contains(NetworkFilterMask::IS_REMOVEPARAM) {
940+
if tokens_buffer.is_empty() && self.mask.contains(NetworkFilterMask::IS_REMOVEPARAM) {
940941
if let Some(removeparam) = &self.modifier_option {
941942
if VALID_PARAM.is_match(removeparam) {
942-
utils::tokenize_to(&removeparam.to_ascii_lowercase(), &mut tokens);
943+
utils::tokenize_to(&removeparam.to_ascii_lowercase(), tokens_buffer);
943944
}
944945
}
945946
}
946947

947948
// If we got no tokens for the filter/hostname part, then we will dispatch
948949
// this filter in multiple buckets based on the domains option.
949-
if tokens.is_empty() && self.opt_domains.is_some() && self.opt_not_domains.is_none() {
950+
if tokens_buffer.is_empty() && self.opt_domains.is_some() && self.opt_not_domains.is_none() {
950951
if let Some(opt_domains) = self.opt_domains.as_ref() {
951952
if !opt_domains.is_empty() {
952-
return FilterTokens::OptDomains(opt_domains.clone());
953+
return FilterTokens::OptDomains(opt_domains);
953954
}
954955
}
955956
FilterTokens::Empty
956957
} else {
957958
// Add optional token for protocol
958959
if self.for_http() && !self.for_https() {
959-
tokens.push(utils::fast_hash("http"));
960+
tokens_buffer.push(utils::fast_hash("http"));
960961
} else if self.for_https() && !self.for_http() {
961-
tokens.push(utils::fast_hash("https"));
962+
tokens_buffer.push(utils::fast_hash("https"));
962963
}
963964

964-
FilterTokens::Other(tokens.into_vec())
965+
FilterTokens::Other(tokens_buffer.as_slice())
965966
}
966967
}
967968
}

src/flatbuffers/unsafe_tools.rs

Lines changed: 9 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -101,7 +101,7 @@ impl VerifiedFlatbufferMemory {
101101

102102
/// A simple stack-allocated vector.
103103
/// It is used to avoid allocations when the vector is small.
104-
pub(crate) struct StackVector<T, const MAX_SIZE: usize> {
104+
pub struct StackVector<T, const MAX_SIZE: usize> {
105105
data: [MaybeUninit<T>; MAX_SIZE],
106106
size: usize,
107107
}
@@ -133,6 +133,14 @@ impl<T, const MAX_SIZE: usize> StackVector<T, MAX_SIZE> {
133133
self.size == 0
134134
}
135135

136+
pub fn clear(&mut self) {
137+
self.size = 0;
138+
}
139+
140+
pub fn as_slice(&self) -> &[T] {
141+
unsafe { std::slice::from_raw_parts(self.data.as_ptr() as *const T, self.size) }
142+
}
143+
136144
pub fn into_vec(self) -> Vec<T> {
137145
let mut v = Vec::with_capacity(self.size);
138146
for i in 0..self.size {

src/utils.rs

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -29,7 +29,7 @@ fn is_allowed_filter(ch: char) -> bool {
2929
ch.is_alphanumeric() || ch == '%'
3030
}
3131

32-
pub(crate) type TokensBuffer = StackVector<Hash, 200>;
32+
pub type TokensBuffer = StackVector<Hash, 200>;
3333

3434
fn fast_tokenizer_no_regex(
3535
pattern: &str,

tests/unit/filters/network.rs

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1191,9 +1191,10 @@ mod parse_tests {
11911191
fn test_simple_pattern_tokenization() {
11921192
let rule = "||some.primewire.c*/sw$script,1p";
11931193
let filter = NetworkFilter::parse(rule, true, ParseOptions::default()).unwrap();
1194+
let mut tokens_buffer = utils::TokensBuffer::default();
11941195
assert_eq!(
1195-
filter.get_tokens_optimized(),
1196-
FilterTokens::Other(vec![
1196+
filter.get_tokens_optimized(&mut tokens_buffer),
1197+
FilterTokens::Other(&[
11971198
utils::fast_hash("some"),
11981199
utils::fast_hash("primewire")
11991200
])

0 commit comments

Comments
 (0)