Skip to content

Commit 013f82a

Browse files
authored
Merge pull request #536 from brave/optimize-ruleset-building
Optimize ruleset building
2 parents 7091433 + 6d3fef0 commit 013f82a

File tree

6 files changed

+107
-48
lines changed

6 files changed

+107
-48
lines changed

src/cosmetic_filter_cache_builder.rs

Lines changed: 64 additions & 31 deletions
Original file line numberDiff line numberDiff line change
@@ -6,6 +6,7 @@ use crate::cosmetic_filter_cache::ProceduralOrActionFilter;
66
use crate::cosmetic_filter_utils::SpecificFilterType;
77
use crate::cosmetic_filter_utils::{encode_script_with_permission, key_from_selector};
88
use crate::filters::cosmetic::{CosmeticFilter, CosmeticFilterMask, CosmeticFilterOperator};
9+
use crate::filters::fb_builder::{EngineFlatBuilder, ShareableString};
910
use crate::filters::flatbuffer_generated::fb;
1011
use crate::flatbuffers::containers::flat_map::FlatMapBuilder;
1112
use crate::flatbuffers::containers::flat_multimap::FlatMultiMapBuilder;
@@ -27,18 +28,18 @@ use flatbuffers::WIPOffset;
2728
/// See HostnameSpecificRules declaration for more details.
2829
#[derive(Default)]
2930
struct HostnameRule {
30-
unhide: Vec<String>,
31-
uninject_script: Vec<String>,
32-
procedural_action: Vec<String>,
33-
procedural_action_exception: Vec<String>,
31+
unhide: Vec<ShareableString>,
32+
uninject_script: Vec<ShareableString>,
33+
procedural_action: Vec<ShareableString>,
34+
procedural_action_exception: Vec<ShareableString>,
3435
}
3536

36-
impl<'a, B: FlatBuilder<'a>> FlatSerialize<'a, B> for HostnameRule {
37+
impl<'a> FlatSerialize<'a, EngineFlatBuilder<'a>> for HostnameRule {
3738
type Output = WIPOffset<fb::HostnameSpecificRules<'a>>;
3839

3940
fn serialize(
4041
value: Self,
41-
builder: &mut B,
42+
builder: &mut EngineFlatBuilder<'a>,
4243
) -> flatbuffers::WIPOffset<fb::HostnameSpecificRules<'a>> {
4344
let unhide = serialize_vec_opt(value.unhide, builder);
4445
let uninject_script = serialize_vec_opt(value.uninject_script, builder);
@@ -69,29 +70,29 @@ pub(crate) struct CosmeticFilterCacheBuilder {
6970
complex_class_rules: HashMapBuilder<String, StringVector>,
7071
complex_id_rules: HashMapBuilder<String, StringVector>,
7172

72-
hostname_hide: FlatMultiMapBuilder<Hash, String>,
73-
hostname_inject_script: FlatMultiMapBuilder<Hash, String>,
73+
hostname_hide: FlatMultiMapBuilder<Hash, ShareableString>,
74+
hostname_inject_script: FlatMultiMapBuilder<Hash, ShareableString>,
7475

7576
specific_rules: HashMap<Hash, HostnameRule>,
7677
}
7778

7879
impl CosmeticFilterCacheBuilder {
79-
pub fn from_rules(rules: Vec<CosmeticFilter>) -> Self {
80+
pub fn from_rules(rules: Vec<CosmeticFilter>, builder: &mut EngineFlatBuilder) -> Self {
8081
let mut self_ = Self::default();
8182

8283
for rule in rules {
83-
self_.add_filter(rule)
84+
self_.add_filter(rule, builder);
8485
}
8586

8687
self_
8788
}
8889

89-
pub fn add_filter(&mut self, rule: CosmeticFilter) {
90+
pub fn add_filter(&mut self, rule: CosmeticFilter, builder: &mut EngineFlatBuilder) {
9091
if rule.has_hostname_constraint() {
9192
if let Some(generic_rule) = rule.hidden_generic_rule() {
9293
self.add_generic_filter(generic_rule);
9394
}
94-
self.store_hostname_rule(rule);
95+
self.store_hostname_rule(rule, builder);
9596
} else {
9697
self.add_generic_filter(rule);
9798
}
@@ -139,7 +140,7 @@ impl CosmeticFilterCacheBuilder {
139140
}
140141
}
141142

142-
fn store_hostname_rule(&mut self, rule: CosmeticFilter) {
143+
fn store_hostname_rule(&mut self, rule: CosmeticFilter, builder: &mut EngineFlatBuilder) {
143144
use SpecificFilterType::*;
144145

145146
let unhide = rule.mask.contains(CosmeticFilterMask::UNHIDE);
@@ -171,45 +172,74 @@ impl CosmeticFilterCacheBuilder {
171172
.chain(rule.hostnames.unwrap_or_default())
172173
.chain(rule.entities.unwrap_or_default());
173174

174-
tokens_to_insert.for_each(|t| self.store_hostname_filter(&t, kind.clone()));
175+
self.store_hostname_filter(tokens_to_insert, &kind, builder);
175176

177+
let negated = kind.negated();
176178
let tokens_to_insert_negated = std::iter::empty()
177179
.chain(rule.not_hostnames.unwrap_or_default())
178180
.chain(rule.not_entities.unwrap_or_default());
179181

180-
let negated = kind.negated();
181-
182-
tokens_to_insert_negated.for_each(|t| self.store_hostname_filter(&t, negated.clone()));
182+
self.store_hostname_filter(tokens_to_insert_negated, &negated, builder);
183183
}
184184

185-
fn store_hostname_filter(&mut self, token: &Hash, kind: SpecificFilterType) {
185+
fn store_hostname_filter(
186+
&mut self,
187+
tokens: impl IntoIterator<Item = Hash>,
188+
kind: &SpecificFilterType,
189+
builder: &mut EngineFlatBuilder,
190+
) {
186191
use SpecificFilterType::*;
187192

188193
match kind {
189194
// Handle hide and inject_script at top level for better deduplication
190195
Hide(s) => {
191-
self.hostname_hide.insert(*token, s);
196+
let mut shareable_string = None;
197+
for token in tokens {
198+
let s = shareable_string.get_or_insert_with(|| builder.add_shareable_string(s));
199+
self.hostname_hide.insert(token, s.clone());
200+
}
192201
}
193202
InjectScript((s, permission)) => {
194-
let encoded_script = encode_script_with_permission(s, permission);
195-
self.hostname_inject_script.insert(*token, encoded_script);
203+
let mut shareable_string = None;
204+
for token in tokens {
205+
let s = shareable_string.get_or_insert_with(|| {
206+
builder.add_shareable_string(&encode_script_with_permission(s, permission))
207+
});
208+
self.hostname_inject_script.insert(token, s.clone());
209+
}
196210
}
197211
// Handle remaining types through HostnameRule
198212
Unhide(s) => {
199-
let entry = self.specific_rules.entry(*token).or_default();
200-
entry.unhide.push(s);
213+
let mut shareable_string = None;
214+
for token in tokens {
215+
let s = shareable_string.get_or_insert_with(|| builder.add_shareable_string(s));
216+
let entry = self.specific_rules.entry(token).or_default();
217+
entry.unhide.push(s.clone());
218+
}
201219
}
202220
UninjectScript((s, _)) => {
203-
let entry = self.specific_rules.entry(*token).or_default();
204-
entry.uninject_script.push(s);
221+
let mut shareable_string = None;
222+
for token in tokens {
223+
let s = shareable_string.get_or_insert_with(|| builder.add_shareable_string(s));
224+
let entry = self.specific_rules.entry(token).or_default();
225+
entry.uninject_script.push(s.clone());
226+
}
205227
}
206228
ProceduralOrAction(s) => {
207-
let entry = self.specific_rules.entry(*token).or_default();
208-
entry.procedural_action.push(s);
229+
let mut shareable_string = None;
230+
for token in tokens {
231+
let s = shareable_string.get_or_insert_with(|| builder.add_shareable_string(s));
232+
let entry = self.specific_rules.entry(token).or_default();
233+
entry.procedural_action.push(s.clone());
234+
}
209235
}
210236
ProceduralOrActionException(s) => {
211-
let entry = self.specific_rules.entry(*token).or_default();
212-
entry.procedural_action_exception.push(s);
237+
let mut shareable_string = None;
238+
for token in tokens {
239+
let s = shareable_string.get_or_insert_with(|| builder.add_shareable_string(s));
240+
let entry = self.specific_rules.entry(token).or_default();
241+
entry.procedural_action_exception.push(s.clone());
242+
}
213243
}
214244
}
215245
}
@@ -227,10 +257,13 @@ impl<'a, B: FlatBuilder<'a>> FlatSerialize<'a, B> for StringVector {
227257
}
228258
}
229259

230-
impl<'a, B: FlatBuilder<'a>> FlatSerialize<'a, B> for CosmeticFilterCacheBuilder {
260+
impl<'a> FlatSerialize<'a, EngineFlatBuilder<'a>> for CosmeticFilterCacheBuilder {
231261
type Output = WIPOffset<fb::CosmeticFilters<'a>>;
232262

233-
fn serialize(value: Self, builder: &mut B) -> WIPOffset<fb::CosmeticFilters<'a>> {
263+
fn serialize(
264+
value: Self,
265+
builder: &mut EngineFlatBuilder<'a>,
266+
) -> WIPOffset<fb::CosmeticFilters<'a>> {
234267
let complex_class_rules = HashMapBuilder::finish(value.complex_class_rules, builder);
235268
let complex_id_rules = HashMapBuilder::finish(value.complex_id_rules, builder);
236269

src/cosmetic_filter_utils.rs

Lines changed: 7 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -86,17 +86,16 @@ impl SpecificFilterType {
8686

8787
/// Encodes permission bits in the last 2 ascii chars of a script string
8888
/// Returns the script with permission appended
89-
pub(crate) fn encode_script_with_permission(
90-
mut script: String,
91-
permission: PermissionMask,
92-
) -> String {
89+
pub(crate) fn encode_script_with_permission(script: &str, permission: &PermissionMask) -> String {
9390
const HEX_CHARS: &[u8; 16] = b"0123456789abcdef";
9491
let high = (permission.to_bits() >> 4) as usize;
9592
let low = (permission.to_bits() & 0x0f) as usize;
9693

97-
script.push(HEX_CHARS[high] as char);
98-
script.push(HEX_CHARS[low] as char);
99-
script
94+
let mut encoded_script = String::with_capacity(script.len() + 2);
95+
encoded_script.push_str(script);
96+
encoded_script.push(HEX_CHARS[high] as char);
97+
encoded_script.push(HEX_CHARS[low] as char);
98+
encoded_script
10099
}
101100

102101
/// Decodes permission bits from the last 2 ascii chars of a script string
@@ -133,7 +132,7 @@ mod tests {
133132
let script = "console.log('测试 🚀 emoji')".to_string();
134133
let permission = PermissionMask::from_bits(permission);
135134

136-
let encoded = encode_script_with_permission(script.clone(), permission);
135+
let encoded = encode_script_with_permission(&script, &permission);
137136
let (decoded_permission, decoded_script) = decode_script_with_permission(&encoded);
138137

139138
assert_eq!(decoded_permission.to_bits(), permission.to_bits());

src/data_format/mod.rs

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -17,7 +17,7 @@ const ADBLOCK_RUST_DAT_MAGIC: [u8; 4] = [0xd1, 0xd9, 0x3a, 0xaf];
1717

1818
/// The version of the data format.
1919
/// If the data format version is incremented, the data is considered as incompatible.
20-
const ADBLOCK_RUST_DAT_VERSION: u8 = 2;
20+
const ADBLOCK_RUST_DAT_VERSION: u8 = 3;
2121

2222
/// The total length of the header prefix (magic + version + seahash)
2323
const HEADER_PREFIX_LENGTH: usize = 4 + 1 + 8;

src/engine.rs

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -328,7 +328,7 @@ fn make_flatbuffer(
328328
let mut builder = EngineFlatBuilder::default();
329329
let network_rules_builder = NetworkRulesBuilder::from_rules(network_filters, optimize);
330330
let network_rules = FlatSerialize::serialize(network_rules_builder, &mut builder);
331-
let cosmetic_rules = CosmeticFilterCacheBuilder::from_rules(cosmetic_filters);
331+
let cosmetic_rules = CosmeticFilterCacheBuilder::from_rules(cosmetic_filters, &mut builder);
332332
let cosmetic_rules = FlatSerialize::serialize(cosmetic_rules, &mut builder);
333333
builder.finish(network_rules, cosmetic_rules)
334334
}

src/filters/fb_builder.rs

Lines changed: 28 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -5,17 +5,24 @@ use std::collections::HashMap;
55
use flatbuffers::WIPOffset;
66

77
use crate::filters::fb_network_builder::NetworkFilterListBuilder;
8-
use crate::flatbuffers::containers::flat_serialize::{FlatBuilder, WIPFlatVec};
8+
use crate::flatbuffers::containers::flat_serialize::{FlatBuilder, FlatSerialize, WIPFlatVec};
99
use crate::flatbuffers::unsafe_tools::VerifiedFlatbufferMemory;
1010
use crate::utils::Hash;
1111

1212
use super::flat::fb;
1313

14+
#[derive(Clone, Default)]
15+
pub(crate) struct ShareableString {
16+
index: Option<usize>,
17+
}
18+
1419
#[derive(Default)]
1520
pub(crate) struct EngineFlatBuilder<'a> {
1621
fb_builder: flatbuffers::FlatBufferBuilder<'a>,
1722
unique_domains_hashes: Vec<Hash>,
1823
unique_domains_hashes_map: HashMap<Hash, u32>,
24+
shared_strings: Vec<WIPOffset<&'a str>>,
25+
shared_strings_original: Vec<String>,
1926
}
2027

2128
impl<'a> EngineFlatBuilder<'a> {
@@ -29,6 +36,15 @@ impl<'a> EngineFlatBuilder<'a> {
2936
index
3037
}
3138

39+
pub fn add_shareable_string(&mut self, s: &str) -> ShareableString {
40+
let wip_offset = self.fb_builder.create_string(s);
41+
self.shared_strings.push(wip_offset);
42+
self.shared_strings_original.push(s.to_string());
43+
ShareableString {
44+
index: Some(self.shared_strings.len() - 1),
45+
}
46+
}
47+
3248
pub fn finish(
3349
&mut self,
3450
network_rules: WIPFlatVec<'a, NetworkFilterListBuilder, EngineFlatBuilder<'a>>,
@@ -58,3 +74,14 @@ impl<'a> FlatBuilder<'a> for EngineFlatBuilder<'a> {
5874
&mut self.fb_builder
5975
}
6076
}
77+
78+
impl<'a> FlatSerialize<'a, EngineFlatBuilder<'a>> for ShareableString {
79+
type Output = WIPOffset<&'a str>;
80+
fn serialize(value: Self, builder: &mut EngineFlatBuilder<'a>) -> Self::Output {
81+
if let Some(index) = value.index {
82+
builder.shared_strings[index]
83+
} else {
84+
builder.raw_builder().create_shared_string("")
85+
}
86+
}
87+
}

tests/unit/engine.rs

Lines changed: 6 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -183,7 +183,7 @@ mod tests {
183183
fn deserialization_generate_simple() {
184184
let mut engine = Engine::from_rules(["ad-banner"], Default::default());
185185
let data = engine.serialize().to_vec();
186-
const EXPECTED_HASH: u64 = 884296823183764168;
186+
const EXPECTED_HASH: u64 = 10945714988765761881;
187187
assert_eq!(hash(&data), EXPECTED_HASH, "{HASH_MISMATCH_MSG}");
188188
engine.deserialize(&data).unwrap();
189189
}
@@ -193,7 +193,7 @@ mod tests {
193193
let mut engine = Engine::from_rules(["ad-banner$tag=abc"], Default::default());
194194
engine.use_tags(&["abc"]);
195195
let data = engine.serialize().to_vec();
196-
const EXPECTED_HASH: u64 = 7887643884738497753;
196+
const EXPECTED_HASH: u64 = 4608037684406751718;
197197
assert_eq!(hash(&data), EXPECTED_HASH, "{HASH_MISMATCH_MSG}");
198198
engine.deserialize(&data).unwrap();
199199
}
@@ -221,8 +221,8 @@ mod tests {
221221
#[cfg(feature = "debug-info")]
222222
{
223223
let debug_info = engine.get_debug_info();
224-
let low_bound = 9_500_000;
225-
let high_bound = 10_000_000;
224+
let low_bound = 8_000_000;
225+
let high_bound = 8_500_000;
226226
assert!(
227227
debug_info.flatbuffer_size >= low_bound,
228228
"Expected size >= {} bytes, got {}",
@@ -237,9 +237,9 @@ mod tests {
237237
);
238238
}
239239
let expected_hash: u64 = if cfg!(feature = "css-validation") {
240-
18094146314477408965
240+
9439492009815519037
241241
} else {
242-
8215024964158872824
242+
14803842039735157685
243243
};
244244

245245
assert_eq!(hash(&data), expected_hash, "{HASH_MISMATCH_MSG}");

0 commit comments

Comments
 (0)