Skip to content

Commit 04a095e

Browse files
authored
tokenize some regexp patterns (#554)
1 parent 33c03d9 commit 04a095e

File tree

3 files changed

+22
-2
lines changed

3 files changed

+22
-2
lines changed

src/filters/network.rs

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -311,6 +311,7 @@ pub enum FilterPart {
311311
AnyOf(Vec<String>),
312312
}
313313

314+
#[derive(Debug, PartialEq)]
314315
pub enum FilterTokens {
315316
Empty,
316317
OptDomains(Vec<Hash>),
@@ -929,6 +930,12 @@ impl NetworkFilter {
929930
if let Some(hostname) = self.hostname.as_ref() {
930931
utils::tokenize_to(hostname, &mut tokens);
931932
}
933+
} else if let Some(hostname) = self.hostname.as_ref() {
934+
// Find last dot to tokenize the prefix
935+
let last_dot_pos = hostname.rfind('.');
936+
if let Some(last_dot_pos) = last_dot_pos {
937+
utils::tokenize_to(&hostname[..last_dot_pos], &mut tokens);
938+
}
932939
}
933940

934941
if tokens.is_empty() && self.mask.contains(NetworkFilterMask::IS_REMOVEPARAM) {

tests/unit/engine.rs

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -237,9 +237,9 @@ mod tests {
237237
);
238238
}
239239
let expected_hash: u64 = if cfg!(feature = "css-validation") {
240-
9439492009815519037
240+
15545091389304905433
241241
} else {
242-
14803842039735157685
242+
543362704487480180
243243
};
244244

245245
assert_eq!(hash(&data), expected_hash, "{HASH_MISMATCH_MSG}");

tests/unit/filters/network.rs

Lines changed: 13 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1186,4 +1186,17 @@ mod parse_tests {
11861186
defaults.opt_domains = Some(vec![utils::fast_hash("auth.wi-fi.ru")]);
11871187
assert_eq!(defaults, NetworkFilterBreakdown::from(&filter));
11881188
}
1189+
1190+
#[test]
1191+
fn test_simple_pattern_tokenization() {
1192+
let rule = "||some.primewire.c*/sw$script,1p";
1193+
let filter = NetworkFilter::parse(rule, true, ParseOptions::default()).unwrap();
1194+
assert_eq!(
1195+
filter.get_tokens_optimized(),
1196+
FilterTokens::Other(vec![
1197+
utils::fast_hash("some"),
1198+
utils::fast_hash("primewire")
1199+
])
1200+
);
1201+
}
11891202
}

0 commit comments

Comments
 (0)