Skip to content

Commit c4f7e16

Browse files
authored
perf: replace source data struct (#194)
1 parent c5b3502 commit c4f7e16

File tree

3 files changed

+55
-98
lines changed

3 files changed

+55
-98
lines changed

Cargo.lock

Lines changed: 2 additions & 12 deletions
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

Cargo.toml

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -35,7 +35,6 @@ serde_json = "1.0.133"
3535
dyn-clone = "1.0.17"
3636
rustc-hash = "2.1.0"
3737
memchr = "2.7.4"
38-
itertools = "0.13"
3938

4039

4140
codspeed-criterion-compat = { version = "2.7.2", default-features = false, optional = true }

src/replace_source.rs

Lines changed: 53 additions & 85 deletions
Original file line numberDiff line numberDiff line change
@@ -2,13 +2,8 @@ use std::{
22
borrow::Cow,
33
cell::RefCell,
44
hash::{Hash, Hasher},
5-
sync::{
6-
atomic::{AtomicBool, Ordering},
7-
Mutex,
8-
},
95
};
106

11-
use itertools::Itertools;
127
use rustc_hash::FxHashMap as HashMap;
138

149
use crate::{
@@ -44,9 +39,6 @@ use crate::{
4439
pub struct ReplaceSource {
4540
inner: BoxSource,
4641
replacements: Vec<Replacement>,
47-
sorted_index: Mutex<Vec<usize>>,
48-
/// Whether `replacements` is sorted.
49-
is_sorted: AtomicBool,
5042
}
5143

5244
/// Enforce replacement order when two replacement start and end are both equal
@@ -68,23 +60,23 @@ struct Replacement {
6860
content: String,
6961
name: Option<String>,
7062
enforce: ReplacementEnforce,
63+
insertion_order: u32,
7164
}
7265

73-
impl Replacement {
74-
pub fn new(
75-
start: u32,
76-
end: u32,
77-
content: String,
78-
name: Option<String>,
79-
enforce: ReplacementEnforce,
80-
) -> Self {
81-
Self {
82-
start,
83-
end,
84-
content,
85-
name,
86-
enforce,
87-
}
66+
impl Ord for Replacement {
67+
fn cmp(&self, other: &Self) -> std::cmp::Ordering {
68+
(self.start, self.end, self.enforce, self.insertion_order).cmp(&(
69+
other.start,
70+
other.end,
71+
other.enforce,
72+
other.insertion_order,
73+
))
74+
}
75+
}
76+
77+
impl PartialOrd for Replacement {
78+
fn partial_cmp(&self, other: &Self) -> Option<std::cmp::Ordering> {
79+
Some(self.cmp(other))
8880
}
8981
}
9082

@@ -94,36 +86,8 @@ impl ReplaceSource {
9486
Self {
9587
inner: source.boxed(),
9688
replacements: Vec::new(),
97-
sorted_index: Mutex::new(Vec::new()),
98-
is_sorted: AtomicBool::new(true),
9989
}
10090
}
101-
102-
fn sort_replacement(&self) {
103-
if self.is_sorted.load(Ordering::SeqCst) {
104-
return;
105-
}
106-
let sorted_index = self
107-
.replacements
108-
.iter()
109-
.enumerate()
110-
.sorted_by(|(_, a), (_, b)| {
111-
(a.start, a.end, a.enforce).cmp(&(b.start, b.end, b.enforce))
112-
})
113-
.map(|replacement| replacement.0)
114-
.collect::<Vec<_>>();
115-
*self.sorted_index.lock().unwrap() = sorted_index;
116-
self.is_sorted.store(true, Ordering::SeqCst)
117-
}
118-
119-
fn sorted_replacement(&self) -> Vec<&Replacement> {
120-
self.sort_replacement();
121-
let sorted_index = self.sorted_index.lock().unwrap();
122-
sorted_index
123-
.iter()
124-
.map(|idx| &self.replacements[*idx])
125-
.collect()
126-
}
12791
}
12892

12993
impl ReplaceSource {
@@ -151,14 +115,13 @@ impl ReplaceSource {
151115
content: &str,
152116
name: Option<&str>,
153117
) {
154-
self.replacements.push(Replacement::new(
118+
self.replace_with_enforce(
155119
start,
156120
end,
157-
content.into(),
158-
name.map(|s| s.into()),
121+
content,
122+
name,
159123
ReplacementEnforce::Normal,
160-
));
161-
self.is_sorted.store(false, Ordering::SeqCst);
124+
);
162125
}
163126

164127
/// Create a replacement with content at `[start, end)`, with ReplacementEnforce.
@@ -170,14 +133,30 @@ impl ReplaceSource {
170133
name: Option<&str>,
171134
enforce: ReplacementEnforce,
172135
) {
173-
self.replacements.push(Replacement::new(
136+
let replacement = Replacement {
174137
start,
175138
end,
176-
content.into(),
177-
name.map(|s| s.into()),
139+
content: content.into(),
140+
name: name.map(|s| s.into()),
178141
enforce,
179-
));
180-
self.is_sorted.store(false, Ordering::SeqCst);
142+
insertion_order: self.replacements.len() as u32,
143+
};
144+
145+
if let Some(last) = self.replacements.last() {
146+
let cmp = replacement.cmp(last);
147+
if cmp == std::cmp::Ordering::Greater || cmp == std::cmp::Ordering::Equal
148+
{
149+
self.replacements.push(replacement);
150+
} else {
151+
let insert_at = self
152+
.replacements
153+
.binary_search_by(|other| other.cmp(&replacement))
154+
.unwrap_or_else(|e| e);
155+
self.replacements.insert(insert_at, replacement);
156+
}
157+
} else {
158+
self.replacements.push(replacement);
159+
}
181160
}
182161
}
183162

@@ -187,18 +166,13 @@ impl Source for ReplaceSource {
187166

188167
// mut_string_push_str is faster that vec join
189168
// concatenate strings benchmark, see https://github.com/hoodie/concatenation_benchmarks-rs
190-
let replacements = self.sorted_replacement();
191-
if replacements.is_empty() {
169+
if self.replacements.is_empty() {
192170
return SourceValue::String(inner_source_code);
193171
}
194-
let max_len = replacements
195-
.iter()
196-
.map(|replacement| replacement.content.len())
197-
.sum::<usize>()
198-
+ inner_source_code.len();
199-
let mut source_code = String::with_capacity(max_len);
172+
let capacity = self.size();
173+
let mut source_code = String::with_capacity(capacity);
200174
let mut inner_pos = 0;
201-
for replacement in replacements.iter() {
175+
for replacement in &self.replacements {
202176
if inner_pos < replacement.start {
203177
let end_pos = (replacement.start as usize).min(inner_source_code.len());
204178
source_code.push_str(&inner_source_code[inner_pos as usize..end_pos]);
@@ -223,13 +197,12 @@ impl Source for ReplaceSource {
223197

224198
// mut_string_push_str is faster that vec join
225199
// concatenate strings benchmark, see https://github.com/hoodie/concatenation_benchmarks-rs
226-
let replacements = self.sorted_replacement();
227-
if replacements.is_empty() {
200+
if self.replacements.is_empty() {
228201
return inner_source_code;
229202
}
230203
let mut source_code = Rope::new();
231204
let mut inner_pos = 0;
232-
for replacement in replacements.iter() {
205+
for replacement in self.replacements.iter() {
233206
if inner_pos < replacement.start {
234207
let end_pos = (replacement.start as usize).min(inner_source_code.len());
235208
let slice = inner_source_code.byte_slice(inner_pos as usize..end_pos);
@@ -256,17 +229,16 @@ impl Source for ReplaceSource {
256229

257230
fn size(&self) -> usize {
258231
let inner_source_size = self.inner.size();
259-
let replacements = self.sorted_replacement();
260232

261-
if replacements.is_empty() {
233+
if self.replacements.is_empty() {
262234
return inner_source_size;
263235
}
264236

265237
// Simulate the replacement process to calculate accurate size
266238
let mut size = inner_source_size;
267239
let mut inner_pos = 0u32;
268240

269-
for replacement in replacements.iter() {
241+
for replacement in self.replacements.iter() {
270242
// Add original content before replacement
271243
if inner_pos < replacement.start {
272244
// This content is already counted in inner_source_size, so no change needed
@@ -316,7 +288,7 @@ impl std::fmt::Debug for ReplaceSource {
316288
writeln!(f, "{indent_str} let mut source = ReplaceSource::new(")?;
317289
writeln!(f, "{:indent$?}", &self.inner, indent = indent + 4)?;
318290
writeln!(f, "{indent_str} );")?;
319-
for repl in self.sorted_replacement() {
291+
for repl in self.replacements.iter() {
320292
match repl.enforce {
321293
ReplacementEnforce::Pre => {
322294
writeln!(
@@ -375,7 +347,7 @@ impl StreamChunks for ReplaceSource {
375347
on_name: crate::helpers::OnName<'_, 'a>,
376348
) -> crate::helpers::GeneratedInfo {
377349
let on_name = RefCell::new(on_name);
378-
let repls = &self.sorted_replacement();
350+
let repls = &self.replacements;
379351
let mut pos: u32 = 0;
380352
let mut i: usize = 0;
381353
let mut replacement_end: Option<u32> = None;
@@ -546,9 +518,7 @@ impl StreamChunks for ReplaceSource {
546518
// Insert replacement content split into chunks by lines
547519
#[allow(unsafe_code)]
548520
// SAFETY: The safety of this operation relies on the fact that the `ReplaceSource` type will not delete the `replacements` during its entire lifetime.
549-
let repl = unsafe {
550-
std::mem::transmute::<&Replacement, &'a Replacement>(repls[i])
551-
};
521+
let repl = &repls[i];
552522

553523
let lines =
554524
split_into_lines(&repl.content.as_str()).collect::<Vec<_>>();
@@ -789,16 +759,14 @@ impl Clone for ReplaceSource {
789759
Self {
790760
inner: self.inner.clone(),
791761
replacements: self.replacements.clone(),
792-
sorted_index: Mutex::new(self.sorted_index.lock().unwrap().clone()),
793-
is_sorted: AtomicBool::new(self.is_sorted.load(Ordering::SeqCst)),
794762
}
795763
}
796764
}
797765

798766
impl Hash for ReplaceSource {
799767
fn hash<H: Hasher>(&self, state: &mut H) {
800768
"ReplaceSource".hash(state);
801-
for repl in self.sorted_replacement() {
769+
for repl in &self.replacements {
802770
repl.hash(state);
803771
}
804772
self.inner.hash(state);
@@ -1217,7 +1185,7 @@ return <div>{data.foo}</div>
12171185
assert_eq!(source.map(&MapOptions::default()), None);
12181186
let mut hasher = twox_hash::XxHash64::default();
12191187
source.hash(&mut hasher);
1220-
assert_eq!(format!("{:x}", hasher.finish()), "899cecd4bd020d47");
1188+
assert_eq!(format!("{:x}", hasher.finish()), "15e48cdf294935ab");
12211189
}
12221190

12231191
#[test]

0 commit comments

Comments
 (0)