Skip to content

Commit 851fa2b

Browse files
committed
perf: add rope
1 parent bc23974 commit 851fa2b

File tree

8 files changed

+192
-73
lines changed

8 files changed

+192
-73
lines changed

src/cached_source.rs

Lines changed: 15 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -21,6 +21,7 @@ use crate::{
2121
struct CachedData {
2222
hash: OnceLock<u64>,
2323
size: OnceLock<usize>,
24+
source: OnceLock<Vec<&'static str>>,
2425
columns_map: OnceLock<Option<SourceMap>>,
2526
line_only_map: OnceLock<Option<SourceMap>>,
2627
}
@@ -83,7 +84,20 @@ impl CachedSource {
8384

8485
impl Source for CachedSource {
8586
fn source(&self) -> SourceValue {
86-
self.inner.source()
87+
let rope = self.cache.source.get_or_init(|| {
88+
#[allow(unsafe_code)]
89+
// SAFETY: CachedSource guarantees that the underlying source outlives the cache,
90+
// so transmuting Vec<&str> to Vec<&'static str> is safe in this context.
91+
// This allows us to store string slices in the cache without additional allocations.
92+
unsafe {
93+
std::mem::transmute::<Vec<&str>, Vec<&'static str>>(self.rope())
94+
}
95+
});
96+
SourceValue::String(Cow::Owned(rope.join("")))
97+
}
98+
99+
fn rope(&self) -> Vec<&str> {
100+
self.inner.rope()
87101
}
88102

89103
fn buffer(&self) -> Cow<[u8]> {
@@ -114,10 +128,6 @@ impl Source for CachedSource {
114128
}
115129
}
116130

117-
fn write_to_string(&self, string: &mut String) {
118-
self.inner.write_to_string(string);
119-
}
120-
121131
fn to_writer(&self, writer: &mut dyn std::io::Write) -> std::io::Result<()> {
122132
self.inner.to_writer(writer)
123133
}

src/concat_source.rs

Lines changed: 13 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -167,11 +167,19 @@ impl Source for ConcatSource {
167167
if children.len() == 1 {
168168
children[0].source()
169169
} else {
170-
// Use to_writer to avoid multiple heap allocations that would occur
171-
// when concatenating nested ConcatSource instances directly
172-
let mut string = String::with_capacity(self.size());
173-
self.write_to_string(&mut string);
174-
SourceValue::String(Cow::Owned(string))
170+
SourceValue::String(Cow::Owned(self.rope().join("")))
171+
}
172+
}
173+
174+
fn rope(&self) -> Vec<&str> {
175+
let children = self.optimized_children();
176+
if children.len() == 1 {
177+
children[0].rope()
178+
} else {
179+
children
180+
.iter()
181+
.flat_map(|child| child.rope())
182+
.collect::<Vec<_>>()
175183
}
176184
}
177185

@@ -206,12 +214,6 @@ impl Source for ConcatSource {
206214
result
207215
}
208216

209-
fn write_to_string(&self, string: &mut String) {
210-
for child in self.optimized_children() {
211-
child.write_to_string(string);
212-
}
213-
}
214-
215217
fn to_writer(&self, writer: &mut dyn std::io::Write) -> std::io::Result<()> {
216218
for child in self.optimized_children() {
217219
child.to_writer(writer)?;

src/original_source.rs

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -56,6 +56,10 @@ impl Source for OriginalSource {
5656
SourceValue::String(Cow::Borrowed(&self.value))
5757
}
5858

59+
fn rope(&self) -> Vec<&str> {
60+
vec![self.value.as_ref()]
61+
}
62+
5963
fn buffer(&self) -> Cow<[u8]> {
6064
Cow::Borrowed(self.value.as_bytes())
6165
}
@@ -73,10 +77,6 @@ impl Source for OriginalSource {
7377
get_map(object_pool, chunks.as_ref(), options)
7478
}
7579

76-
fn write_to_string(&self, string: &mut String) {
77-
string.push_str(self.value.as_ref());
78-
}
79-
8080
fn to_writer(&self, writer: &mut dyn std::io::Write) -> std::io::Result<()> {
8181
writer.write_all(self.value.as_bytes())
8282
}

src/raw_source.rs

Lines changed: 8 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -64,6 +64,10 @@ impl Source for RawStringSource {
6464
SourceValue::String(Cow::Borrowed(&self.0))
6565
}
6666

67+
fn rope(&self) -> Vec<&str> {
68+
vec![self.0.as_ref()]
69+
}
70+
6771
fn buffer(&self) -> Cow<[u8]> {
6872
Cow::Borrowed(self.0.as_bytes())
6973
}
@@ -76,10 +80,6 @@ impl Source for RawStringSource {
7680
None
7781
}
7882

79-
fn write_to_string(&self, string: &mut String) {
80-
string.push_str(self.0.as_ref());
81-
}
82-
8383
fn to_writer(&self, writer: &mut dyn std::io::Write) -> std::io::Result<()> {
8484
writer.write_all(self.0.as_bytes())
8585
}
@@ -210,6 +210,10 @@ impl Source for RawBufferSource {
210210
SourceValue::Buffer(Cow::Borrowed(&self.value))
211211
}
212212

213+
fn rope(&self) -> Vec<&str> {
214+
vec![self.get_or_init_value_as_string()]
215+
}
216+
213217
fn buffer(&self) -> Cow<[u8]> {
214218
Cow::Borrowed(&self.value)
215219
}
@@ -222,10 +226,6 @@ impl Source for RawBufferSource {
222226
None
223227
}
224228

225-
fn write_to_string(&self, string: &mut String) {
226-
string.push_str(self.get_or_init_value_as_string());
227-
}
228-
229229
fn to_writer(&self, writer: &mut dyn std::io::Write) -> std::io::Result<()> {
230230
writer.write_all(&self.value)
231231
}

src/replace_source.rs

Lines changed: 137 additions & 25 deletions
Original file line numberDiff line numberDiff line change
@@ -161,34 +161,150 @@ impl ReplaceSource {
161161

162162
impl Source for ReplaceSource {
163163
fn source(&self) -> SourceValue {
164-
let inner_source_code = self.inner.source().into_string_lossy();
164+
let rope = self.rope();
165+
if rope.len() == 1 {
166+
SourceValue::String(Cow::Borrowed(rope[0]))
167+
} else {
168+
SourceValue::String(Cow::Owned(rope.join("")))
169+
}
170+
}
171+
172+
fn rope(&self) -> Vec<&str> {
173+
let inner_source_code = self.inner.rope();
165174

166-
// mut_string_push_str is faster that vec join
167-
// concatenate strings benchmark, see https://github.com/hoodie/concatenation_benchmarks-rs
168175
if self.replacements.is_empty() {
169-
return SourceValue::String(inner_source_code);
176+
return inner_source_code;
170177
}
171-
let capacity = self.size();
172-
let mut source_code = String::with_capacity(capacity);
173-
let mut inner_pos = 0;
174-
for replacement in &self.replacements {
175-
if inner_pos < replacement.start {
176-
let end_pos = (replacement.start as usize).min(inner_source_code.len());
177-
source_code.push_str(&inner_source_code[inner_pos as usize..end_pos]);
178+
179+
let mut result = Vec::new();
180+
let mut pos: u32 = 0;
181+
let mut chunk_index = 0;
182+
let mut chunk_pos = 0; // Position within current chunk
183+
let mut replacement_index = 0;
184+
185+
// Calculate total length to determine positions
186+
let mut chunk_start_positions = Vec::new();
187+
let mut total_pos = 0;
188+
for chunk in &inner_source_code {
189+
chunk_start_positions.push(total_pos);
190+
total_pos += chunk.len() as u32;
191+
}
192+
193+
while replacement_index < self.replacements.len()
194+
|| chunk_index < inner_source_code.len()
195+
{
196+
let next_replacement = self.replacements.get(replacement_index);
197+
198+
// Process chunks until we hit a replacement or finish
199+
while chunk_index < inner_source_code.len() {
200+
let chunk = inner_source_code[chunk_index];
201+
let chunk_start = chunk_start_positions[chunk_index];
202+
let chunk_end = chunk_start + chunk.len() as u32;
203+
204+
// Check if there's a replacement that starts within this chunk
205+
if let Some(replacement) = next_replacement {
206+
if replacement.start >= chunk_start && replacement.start < chunk_end {
207+
// Replacement starts within this chunk
208+
let offset_in_chunk = (replacement.start - chunk_start) as usize;
209+
210+
// Add the part of chunk before replacement
211+
if offset_in_chunk > chunk_pos {
212+
result.push(&chunk[chunk_pos..offset_in_chunk]);
213+
}
214+
215+
// Add replacement content
216+
result.push(&replacement.content);
217+
218+
// Update positions
219+
pos = replacement.end;
220+
replacement_index += 1;
221+
222+
// Find where to continue after replacement
223+
let mut found_continue_pos = false;
224+
for (idx, &chunk_start_pos) in
225+
chunk_start_positions.iter().enumerate()
226+
{
227+
let chunk_end_pos =
228+
chunk_start_pos + inner_source_code[idx].len() as u32;
229+
230+
if pos >= chunk_start_pos && pos < chunk_end_pos {
231+
// Continue from within this chunk
232+
chunk_index = idx;
233+
chunk_pos = (pos - chunk_start_pos) as usize;
234+
found_continue_pos = true;
235+
break;
236+
} else if pos <= chunk_start_pos {
237+
// Continue from the start of this chunk
238+
chunk_index = idx;
239+
chunk_pos = 0;
240+
found_continue_pos = true;
241+
break;
242+
}
243+
}
244+
245+
if !found_continue_pos {
246+
// Replacement goes beyond all chunks
247+
chunk_index = inner_source_code.len();
248+
}
249+
250+
break;
251+
} else if replacement.start < chunk_start {
252+
// Replacement starts before this chunk
253+
result.push(&replacement.content);
254+
replacement_index += 1;
255+
256+
// Skip chunks that are replaced
257+
pos = replacement.end;
258+
while chunk_index < inner_source_code.len() {
259+
let current_chunk_start = chunk_start_positions[chunk_index];
260+
let current_chunk_end = current_chunk_start
261+
+ inner_source_code[chunk_index].len() as u32;
262+
263+
if pos <= current_chunk_start {
264+
// Start from beginning of this chunk
265+
chunk_pos = 0;
266+
break;
267+
} else if pos < current_chunk_end {
268+
// Start from middle of this chunk
269+
chunk_pos = (pos - current_chunk_start) as usize;
270+
break;
271+
} else {
272+
// Skip this entire chunk
273+
chunk_index += 1;
274+
}
275+
}
276+
break;
277+
}
278+
}
279+
280+
// No replacement affecting this chunk, add the remaining part
281+
if chunk_pos == 0
282+
&& (next_replacement.is_none()
283+
|| next_replacement.unwrap().start > chunk_end)
284+
{
285+
// Add entire chunk
286+
result.push(chunk);
287+
} else if chunk_pos < chunk.len() {
288+
// Add remaining part of chunk
289+
result.push(&chunk[chunk_pos..]);
290+
}
291+
292+
chunk_index += 1;
293+
chunk_pos = 0;
294+
pos = chunk_end;
178295
}
179-
source_code.push_str(&replacement.content);
180-
#[allow(clippy::manual_clamp)]
181-
{
182-
inner_pos = inner_pos
183-
.max(replacement.end)
184-
.min(inner_source_code.len() as u32);
296+
297+
// Handle remaining replacements that are beyond all chunks
298+
while replacement_index < self.replacements.len() {
299+
let replacement = &self.replacements[replacement_index];
300+
if replacement.start >= pos {
301+
result.push(&replacement.content);
302+
}
303+
replacement_index += 1;
185304
}
186305
}
187-
source_code.push_str(
188-
&inner_source_code[inner_pos as usize..inner_source_code.len()],
189-
);
190306

191-
SourceValue::String(Cow::Owned(source_code))
307+
result
192308
}
193309

194310
fn buffer(&self) -> Cow<[u8]> {
@@ -248,10 +364,6 @@ impl Source for ReplaceSource {
248364
get_map(&ObjectPool::default(), chunks.as_ref(), options)
249365
}
250366

251-
fn write_to_string(&self, string: &mut String) {
252-
string.push_str(&self.source().into_string_lossy());
253-
}
254-
255367
fn to_writer(&self, writer: &mut dyn std::io::Write) -> std::io::Result<()> {
256368
writer.write_all(self.source().as_bytes())
257369
}

src/source.rs

Lines changed: 7 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -114,6 +114,9 @@ pub trait Source:
114114
/// Get the source code.
115115
fn source(&self) -> SourceValue;
116116

117+
/// Return a lightweight "rope" view of the source as borrowed string slices.
118+
fn rope(&self) -> Vec<&str>;
119+
117120
/// Get the source buffer.
118121
fn buffer(&self) -> Cow<[u8]>;
119122

@@ -132,14 +135,6 @@ pub trait Source:
132135
self.dyn_hash(state);
133136
}
134137

135-
/// Appends the source content to the provided string buffer.
136-
///
137-
/// This method efficiently writes the source content directly into an existing
138-
/// string buffer, avoiding additional memory allocations when the buffer has
139-
/// sufficient capacity. This is particularly useful for concatenating multiple
140-
/// sources or building larger strings incrementally.
141-
fn write_to_string(&self, string: &mut String);
142-
143138
/// Writes the source into a writer, preferably a `std::io::BufWriter<std::io::Write>`.
144139
fn to_writer(&self, writer: &mut dyn std::io::Write) -> std::io::Result<()>;
145140
}
@@ -149,6 +144,10 @@ impl Source for BoxSource {
149144
self.as_ref().source()
150145
}
151146

147+
fn rope(&self) -> Vec<&str> {
148+
self.as_ref().rope()
149+
}
150+
152151
fn buffer(&self) -> Cow<[u8]> {
153152
self.as_ref().buffer()
154153
}
@@ -165,10 +164,6 @@ impl Source for BoxSource {
165164
self.as_ref().map(object_pool, options)
166165
}
167166

168-
fn write_to_string(&self, string: &mut String) {
169-
self.as_ref().write_to_string(string)
170-
}
171-
172167
fn to_writer(&self, writer: &mut dyn std::io::Write) -> std::io::Result<()> {
173168
self.as_ref().to_writer(writer)
174169
}

src/source_map_source.rs

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -94,6 +94,10 @@ impl Source for SourceMapSource {
9494
SourceValue::String(Cow::Borrowed(&self.value))
9595
}
9696

97+
fn rope(&self) -> Vec<&str> {
98+
vec![self.value.as_ref()]
99+
}
100+
97101
fn buffer(&self) -> Cow<[u8]> {
98102
Cow::Borrowed(self.value.as_bytes())
99103
}
@@ -114,10 +118,6 @@ impl Source for SourceMapSource {
114118
get_map(object_pool, chunks.as_ref(), options)
115119
}
116120

117-
fn write_to_string(&self, string: &mut String) {
118-
string.push_str(self.value.as_ref());
119-
}
120-
121121
fn to_writer(&self, writer: &mut dyn std::io::Write) -> std::io::Result<()> {
122122
writer.write_all(self.value.as_bytes())
123123
}

0 commit comments

Comments
 (0)