From 746949b08a35bec3d951b84738982254d5cf8f86 Mon Sep 17 00:00:00 2001 From: Cong-Cong Date: Fri, 7 Nov 2025 16:55:55 +0800 Subject: [PATCH 01/24] fix: replace size compute --- src/replace_source.rs | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/src/replace_source.rs b/src/replace_source.rs index 2a196541..159b8b27 100644 --- a/src/replace_source.rs +++ b/src/replace_source.rs @@ -211,6 +211,10 @@ impl Source for ReplaceSource { if inner_pos < replacement.start { // This content is already counted in inner_source_size, so no change needed } + if replacement.start as usize >= inner_source_size { + size += replacement.content.len(); + continue; + } // Handle the replacement itself let original_length = replacement @@ -1421,6 +1425,7 @@ return
{data.foo}
None, ReplacementEnforce::Post, ); + source.replace(10000000, 20000000, "// end line", None); assert_eq!(source.size(), source.source().into_string_lossy().len()); } From 045e2c02682c38bb42a69663bfd07e66a75c193e Mon Sep 17 00:00:00 2001 From: Cong-Cong Date: Fri, 7 Nov 2025 17:00:59 +0800 Subject: [PATCH 02/24] perf: WithUtf16 --- src/helpers.rs | 5 ++--- src/with_utf16.rs | 37 +++++++++++++++++++++++++++++-------- 2 files changed, 31 insertions(+), 11 deletions(-) diff --git a/src/helpers.rs b/src/helpers.rs index 8cbd2c13..cd45bfb7 100644 --- a/src/helpers.rs +++ b/src/helpers.rs @@ -460,10 +460,9 @@ fn stream_chunks_of_source_map_full<'a>( on_source: OnSource<'_, 'a>, on_name: OnName<'_, 'a>, ) -> GeneratedInfo { - let a = split_into_lines(source); - let lines: Vec> = a + let lines = split_into_lines(source) .map(|line| WithUtf16::new(object_pool, line)) - .collect::>(); + .collect::>>(); if lines.is_empty() { return GeneratedInfo { diff --git a/src/with_utf16.rs b/src/with_utf16.rs index f874de4e..8ffc6511 100644 --- a/src/with_utf16.rs +++ b/src/with_utf16.rs @@ -7,7 +7,7 @@ pub struct WithUtf16<'object_pool, 'text> { /// line is a string reference pub line: &'text str, /// the byte position of each `char` in `line` string slice . - pub utf16_byte_indices: OnceCell>, + pub utf16_byte_indices: OnceCell>>, object_pool: &'object_pool ObjectPool, } @@ -21,8 +21,13 @@ impl<'object_pool, 'text> WithUtf16<'object_pool, 'text> { } /// substring::SubString with cache - pub fn substring(&self, start_index: usize, end_index: usize) -> &'text str { - if end_index <= start_index { + #[allow(unsafe_code)] + pub fn substring( + &self, + start_utf16_index: usize, + end_utf16_index: usize, + ) -> &'text str { + if end_utf16_index <= start_utf16_index { return ""; } @@ -38,14 +43,30 @@ impl<'object_pool, 'text> WithUtf16<'object_pool, 'text> { _ => unreachable!(), } } - vec + if vec.len() == self.line.len() { + // Optimization: UTF-16 length equals UTF-8 length, indicating no surrogate pairs. + // Return None to release the vector back to the object pool for better memory efficiency. + None + } else { + Some(vec) + } }); - let str_len = self.line.len(); - let start = *utf16_byte_indices.get(start_index).unwrap_or(&str_len); - let end = *utf16_byte_indices.get(end_index).unwrap_or(&str_len); + let utf8_len = self.line.len(); + + let Some(utf16_byte_indices) = utf16_byte_indices else { + let start_utf16_index = start_utf16_index.min(utf8_len); + let end_utf16_index = end_utf16_index.min(utf8_len); + return unsafe { + self.line.get_unchecked(start_utf16_index..end_utf16_index) + }; + }; + + let start = *utf16_byte_indices + .get(start_utf16_index) + .unwrap_or(&utf8_len); + let end = *utf16_byte_indices.get(end_utf16_index).unwrap_or(&utf8_len); - #[allow(unsafe_code)] unsafe { // SAFETY: Since `indices` iterates over the `CharIndices` of `self`, we can guarantee // that the indices obtained from it will always be within the bounds of `self` and they From bc23974f8f99b85e532df4e63e3a3716e2d2874c Mon Sep 17 00:00:00 2001 From: Cong-Cong Date: Fri, 7 Nov 2025 17:33:59 +0800 Subject: [PATCH 03/24] perf: WithUtf16 by bytes --- src/with_utf16.rs | 31 +++++++++++++++++++++++-------- 1 file changed, 23 insertions(+), 8 deletions(-) diff --git a/src/with_utf16.rs b/src/with_utf16.rs index 8ffc6511..4a2e108b 100644 --- a/src/with_utf16.rs +++ b/src/with_utf16.rs @@ -33,16 +33,31 @@ impl<'object_pool, 'text> WithUtf16<'object_pool, 'text> { let utf16_byte_indices = self.utf16_byte_indices.get_or_init(|| { let mut vec = self.object_pool.pull(self.line.len()); - for (byte_index, ch) in self.line.char_indices() { - match ch.len_utf16() { - 1 => vec.push(byte_index), - 2 => { - vec.push(byte_index); - vec.push(byte_index); - } - _ => unreachable!(), + + let bytes = self.line.as_bytes(); + let mut byte_pos = 0; + while byte_pos < bytes.len() { + let byte = unsafe { *bytes.get_unchecked(byte_pos) }; + if byte < 0x80 { + // ASCII: 1 byte = 1 UTF-16 unit + vec.push(byte_pos); + byte_pos += 1; + } else if byte < 0xE0 { + // 2-byte UTF-8 = 1 UTF-16 unit + vec.push(byte_pos); + byte_pos += 2; + } else if byte < 0xF0 { + // 3-byte UTF-8 = 1 UTF-16 unit + vec.push(byte_pos); + byte_pos += 3; + } else { + // 4-byte UTF-8 = 2 UTF-16 units (surrogate pair) + vec.push(byte_pos); + vec.push(byte_pos); + byte_pos += 4; } } + if vec.len() == self.line.len() { // Optimization: UTF-16 length equals UTF-8 length, indicating no surrogate pairs. // Return None to release the vector back to the object pool for better memory efficiency. From 851fa2bc459887c3a705912cd50cb462c7cd2b88 Mon Sep 17 00:00:00 2001 From: Cong-Cong Date: Fri, 7 Nov 2025 21:26:17 +0800 Subject: [PATCH 04/24] perf: add rope --- src/cached_source.rs | 20 +++-- src/concat_source.rs | 24 +++--- src/original_source.rs | 8 +- src/raw_source.rs | 16 ++-- src/replace_source.rs | 162 +++++++++++++++++++++++++++++++++------ src/source.rs | 19 ++--- src/source_map_source.rs | 8 +- tests/compat_source.rs | 8 +- 8 files changed, 192 insertions(+), 73 deletions(-) diff --git a/src/cached_source.rs b/src/cached_source.rs index c7906445..da737ccb 100644 --- a/src/cached_source.rs +++ b/src/cached_source.rs @@ -21,6 +21,7 @@ use crate::{ struct CachedData { hash: OnceLock, size: OnceLock, + source: OnceLock>, columns_map: OnceLock>, line_only_map: OnceLock>, } @@ -83,7 +84,20 @@ impl CachedSource { impl Source for CachedSource { fn source(&self) -> SourceValue { - self.inner.source() + let rope = self.cache.source.get_or_init(|| { + #[allow(unsafe_code)] + // SAFETY: CachedSource guarantees that the underlying source outlives the cache, + // so transmuting Vec<&str> to Vec<&'static str> is safe in this context. + // This allows us to store string slices in the cache without additional allocations. + unsafe { + std::mem::transmute::, Vec<&'static str>>(self.rope()) + } + }); + SourceValue::String(Cow::Owned(rope.join(""))) + } + + fn rope(&self) -> Vec<&str> { + self.inner.rope() } fn buffer(&self) -> Cow<[u8]> { @@ -114,10 +128,6 @@ impl Source for CachedSource { } } - fn write_to_string(&self, string: &mut String) { - self.inner.write_to_string(string); - } - fn to_writer(&self, writer: &mut dyn std::io::Write) -> std::io::Result<()> { self.inner.to_writer(writer) } diff --git a/src/concat_source.rs b/src/concat_source.rs index bb1eb7c0..2e33b222 100644 --- a/src/concat_source.rs +++ b/src/concat_source.rs @@ -167,11 +167,19 @@ impl Source for ConcatSource { if children.len() == 1 { children[0].source() } else { - // Use to_writer to avoid multiple heap allocations that would occur - // when concatenating nested ConcatSource instances directly - let mut string = String::with_capacity(self.size()); - self.write_to_string(&mut string); - SourceValue::String(Cow::Owned(string)) + SourceValue::String(Cow::Owned(self.rope().join(""))) + } + } + + fn rope(&self) -> Vec<&str> { + let children = self.optimized_children(); + if children.len() == 1 { + children[0].rope() + } else { + children + .iter() + .flat_map(|child| child.rope()) + .collect::>() } } @@ -206,12 +214,6 @@ impl Source for ConcatSource { result } - fn write_to_string(&self, string: &mut String) { - for child in self.optimized_children() { - child.write_to_string(string); - } - } - fn to_writer(&self, writer: &mut dyn std::io::Write) -> std::io::Result<()> { for child in self.optimized_children() { child.to_writer(writer)?; diff --git a/src/original_source.rs b/src/original_source.rs index 3824fc37..bf4ec766 100644 --- a/src/original_source.rs +++ b/src/original_source.rs @@ -56,6 +56,10 @@ impl Source for OriginalSource { SourceValue::String(Cow::Borrowed(&self.value)) } + fn rope(&self) -> Vec<&str> { + vec![self.value.as_ref()] + } + fn buffer(&self) -> Cow<[u8]> { Cow::Borrowed(self.value.as_bytes()) } @@ -73,10 +77,6 @@ impl Source for OriginalSource { get_map(object_pool, chunks.as_ref(), options) } - fn write_to_string(&self, string: &mut String) { - string.push_str(self.value.as_ref()); - } - fn to_writer(&self, writer: &mut dyn std::io::Write) -> std::io::Result<()> { writer.write_all(self.value.as_bytes()) } diff --git a/src/raw_source.rs b/src/raw_source.rs index 3bc5abfd..af3337a6 100644 --- a/src/raw_source.rs +++ b/src/raw_source.rs @@ -64,6 +64,10 @@ impl Source for RawStringSource { SourceValue::String(Cow::Borrowed(&self.0)) } + fn rope(&self) -> Vec<&str> { + vec![self.0.as_ref()] + } + fn buffer(&self) -> Cow<[u8]> { Cow::Borrowed(self.0.as_bytes()) } @@ -76,10 +80,6 @@ impl Source for RawStringSource { None } - fn write_to_string(&self, string: &mut String) { - string.push_str(self.0.as_ref()); - } - fn to_writer(&self, writer: &mut dyn std::io::Write) -> std::io::Result<()> { writer.write_all(self.0.as_bytes()) } @@ -210,6 +210,10 @@ impl Source for RawBufferSource { SourceValue::Buffer(Cow::Borrowed(&self.value)) } + fn rope(&self) -> Vec<&str> { + vec![self.get_or_init_value_as_string()] + } + fn buffer(&self) -> Cow<[u8]> { Cow::Borrowed(&self.value) } @@ -222,10 +226,6 @@ impl Source for RawBufferSource { None } - fn write_to_string(&self, string: &mut String) { - string.push_str(self.get_or_init_value_as_string()); - } - fn to_writer(&self, writer: &mut dyn std::io::Write) -> std::io::Result<()> { writer.write_all(&self.value) } diff --git a/src/replace_source.rs b/src/replace_source.rs index 159b8b27..4200ce59 100644 --- a/src/replace_source.rs +++ b/src/replace_source.rs @@ -161,34 +161,150 @@ impl ReplaceSource { impl Source for ReplaceSource { fn source(&self) -> SourceValue { - let inner_source_code = self.inner.source().into_string_lossy(); + let rope = self.rope(); + if rope.len() == 1 { + SourceValue::String(Cow::Borrowed(rope[0])) + } else { + SourceValue::String(Cow::Owned(rope.join(""))) + } + } + + fn rope(&self) -> Vec<&str> { + let inner_source_code = self.inner.rope(); - // mut_string_push_str is faster that vec join - // concatenate strings benchmark, see https://github.com/hoodie/concatenation_benchmarks-rs if self.replacements.is_empty() { - return SourceValue::String(inner_source_code); + return inner_source_code; } - let capacity = self.size(); - let mut source_code = String::with_capacity(capacity); - let mut inner_pos = 0; - for replacement in &self.replacements { - if inner_pos < replacement.start { - let end_pos = (replacement.start as usize).min(inner_source_code.len()); - source_code.push_str(&inner_source_code[inner_pos as usize..end_pos]); + + let mut result = Vec::new(); + let mut pos: u32 = 0; + let mut chunk_index = 0; + let mut chunk_pos = 0; // Position within current chunk + let mut replacement_index = 0; + + // Calculate total length to determine positions + let mut chunk_start_positions = Vec::new(); + let mut total_pos = 0; + for chunk in &inner_source_code { + chunk_start_positions.push(total_pos); + total_pos += chunk.len() as u32; + } + + while replacement_index < self.replacements.len() + || chunk_index < inner_source_code.len() + { + let next_replacement = self.replacements.get(replacement_index); + + // Process chunks until we hit a replacement or finish + while chunk_index < inner_source_code.len() { + let chunk = inner_source_code[chunk_index]; + let chunk_start = chunk_start_positions[chunk_index]; + let chunk_end = chunk_start + chunk.len() as u32; + + // Check if there's a replacement that starts within this chunk + if let Some(replacement) = next_replacement { + if replacement.start >= chunk_start && replacement.start < chunk_end { + // Replacement starts within this chunk + let offset_in_chunk = (replacement.start - chunk_start) as usize; + + // Add the part of chunk before replacement + if offset_in_chunk > chunk_pos { + result.push(&chunk[chunk_pos..offset_in_chunk]); + } + + // Add replacement content + result.push(&replacement.content); + + // Update positions + pos = replacement.end; + replacement_index += 1; + + // Find where to continue after replacement + let mut found_continue_pos = false; + for (idx, &chunk_start_pos) in + chunk_start_positions.iter().enumerate() + { + let chunk_end_pos = + chunk_start_pos + inner_source_code[idx].len() as u32; + + if pos >= chunk_start_pos && pos < chunk_end_pos { + // Continue from within this chunk + chunk_index = idx; + chunk_pos = (pos - chunk_start_pos) as usize; + found_continue_pos = true; + break; + } else if pos <= chunk_start_pos { + // Continue from the start of this chunk + chunk_index = idx; + chunk_pos = 0; + found_continue_pos = true; + break; + } + } + + if !found_continue_pos { + // Replacement goes beyond all chunks + chunk_index = inner_source_code.len(); + } + + break; + } else if replacement.start < chunk_start { + // Replacement starts before this chunk + result.push(&replacement.content); + replacement_index += 1; + + // Skip chunks that are replaced + pos = replacement.end; + while chunk_index < inner_source_code.len() { + let current_chunk_start = chunk_start_positions[chunk_index]; + let current_chunk_end = current_chunk_start + + inner_source_code[chunk_index].len() as u32; + + if pos <= current_chunk_start { + // Start from beginning of this chunk + chunk_pos = 0; + break; + } else if pos < current_chunk_end { + // Start from middle of this chunk + chunk_pos = (pos - current_chunk_start) as usize; + break; + } else { + // Skip this entire chunk + chunk_index += 1; + } + } + break; + } + } + + // No replacement affecting this chunk, add the remaining part + if chunk_pos == 0 + && (next_replacement.is_none() + || next_replacement.unwrap().start > chunk_end) + { + // Add entire chunk + result.push(chunk); + } else if chunk_pos < chunk.len() { + // Add remaining part of chunk + result.push(&chunk[chunk_pos..]); + } + + chunk_index += 1; + chunk_pos = 0; + pos = chunk_end; } - source_code.push_str(&replacement.content); - #[allow(clippy::manual_clamp)] - { - inner_pos = inner_pos - .max(replacement.end) - .min(inner_source_code.len() as u32); + + // Handle remaining replacements that are beyond all chunks + while replacement_index < self.replacements.len() { + let replacement = &self.replacements[replacement_index]; + if replacement.start >= pos { + result.push(&replacement.content); + } + replacement_index += 1; } } - source_code.push_str( - &inner_source_code[inner_pos as usize..inner_source_code.len()], - ); - SourceValue::String(Cow::Owned(source_code)) + result } fn buffer(&self) -> Cow<[u8]> { @@ -248,10 +364,6 @@ impl Source for ReplaceSource { get_map(&ObjectPool::default(), chunks.as_ref(), options) } - fn write_to_string(&self, string: &mut String) { - string.push_str(&self.source().into_string_lossy()); - } - fn to_writer(&self, writer: &mut dyn std::io::Write) -> std::io::Result<()> { writer.write_all(self.source().as_bytes()) } diff --git a/src/source.rs b/src/source.rs index c4ab26ff..d380b6cd 100644 --- a/src/source.rs +++ b/src/source.rs @@ -114,6 +114,9 @@ pub trait Source: /// Get the source code. fn source(&self) -> SourceValue; + /// Return a lightweight "rope" view of the source as borrowed string slices. + fn rope(&self) -> Vec<&str>; + /// Get the source buffer. fn buffer(&self) -> Cow<[u8]>; @@ -132,14 +135,6 @@ pub trait Source: self.dyn_hash(state); } - /// Appends the source content to the provided string buffer. - /// - /// This method efficiently writes the source content directly into an existing - /// string buffer, avoiding additional memory allocations when the buffer has - /// sufficient capacity. This is particularly useful for concatenating multiple - /// sources or building larger strings incrementally. - fn write_to_string(&self, string: &mut String); - /// Writes the source into a writer, preferably a `std::io::BufWriter`. fn to_writer(&self, writer: &mut dyn std::io::Write) -> std::io::Result<()>; } @@ -149,6 +144,10 @@ impl Source for BoxSource { self.as_ref().source() } + fn rope(&self) -> Vec<&str> { + self.as_ref().rope() + } + fn buffer(&self) -> Cow<[u8]> { self.as_ref().buffer() } @@ -165,10 +164,6 @@ impl Source for BoxSource { self.as_ref().map(object_pool, options) } - fn write_to_string(&self, string: &mut String) { - self.as_ref().write_to_string(string) - } - fn to_writer(&self, writer: &mut dyn std::io::Write) -> std::io::Result<()> { self.as_ref().to_writer(writer) } diff --git a/src/source_map_source.rs b/src/source_map_source.rs index 5982eabd..a074499b 100644 --- a/src/source_map_source.rs +++ b/src/source_map_source.rs @@ -94,6 +94,10 @@ impl Source for SourceMapSource { SourceValue::String(Cow::Borrowed(&self.value)) } + fn rope(&self) -> Vec<&str> { + vec![self.value.as_ref()] + } + fn buffer(&self) -> Cow<[u8]> { Cow::Borrowed(self.value.as_bytes()) } @@ -114,10 +118,6 @@ impl Source for SourceMapSource { get_map(object_pool, chunks.as_ref(), options) } - fn write_to_string(&self, string: &mut String) { - string.push_str(self.value.as_ref()); - } - fn to_writer(&self, writer: &mut dyn std::io::Write) -> std::io::Result<()> { writer.write_all(self.value.as_bytes()) } diff --git a/tests/compat_source.rs b/tests/compat_source.rs index 8da41a8a..fe2d3500 100644 --- a/tests/compat_source.rs +++ b/tests/compat_source.rs @@ -19,6 +19,10 @@ impl Source for CompatSource { SourceValue::String(Cow::Borrowed(self.0)) } + fn rope(&self) -> Vec<&str> { + vec![self.0] + } + fn buffer(&self) -> Cow<[u8]> { Cow::Borrowed(self.0.as_bytes()) } @@ -35,10 +39,6 @@ impl Source for CompatSource { self.1.clone() } - fn write_to_string(&self, string: &mut String) { - string.push_str(self.0.as_ref()) - } - fn to_writer(&self, writer: &mut dyn std::io::Write) -> std::io::Result<()> { writer.write_all(self.0.as_bytes()) } From 4b7e1153cc08f36ef1d2442b7900ac3f0c1ed183 Mon Sep 17 00:00:00 2001 From: Cong-Cong Date: Fri, 7 Nov 2025 22:25:36 +0800 Subject: [PATCH 05/24] refactor rope in ReplaceSource --- src/replace_source.rs | 210 +++++++++++++++++------------------------- 1 file changed, 86 insertions(+), 124 deletions(-) diff --git a/src/replace_source.rs b/src/replace_source.rs index 4200ce59..92cb078b 100644 --- a/src/replace_source.rs +++ b/src/replace_source.rs @@ -170,141 +170,100 @@ impl Source for ReplaceSource { } fn rope(&self) -> Vec<&str> { - let inner_source_code = self.inner.rope(); - - if self.replacements.is_empty() { - return inner_source_code; - } - - let mut result = Vec::new(); - let mut pos: u32 = 0; - let mut chunk_index = 0; - let mut chunk_pos = 0; // Position within current chunk - let mut replacement_index = 0; - - // Calculate total length to determine positions - let mut chunk_start_positions = Vec::new(); - let mut total_pos = 0; - for chunk in &inner_source_code { - chunk_start_positions.push(total_pos); - total_pos += chunk.len() as u32; - } - - while replacement_index < self.replacements.len() - || chunk_index < inner_source_code.len() - { - let next_replacement = self.replacements.get(replacement_index); - - // Process chunks until we hit a replacement or finish - while chunk_index < inner_source_code.len() { - let chunk = inner_source_code[chunk_index]; - let chunk_start = chunk_start_positions[chunk_index]; - let chunk_end = chunk_start + chunk.len() as u32; - - // Check if there's a replacement that starts within this chunk - if let Some(replacement) = next_replacement { - if replacement.start >= chunk_start && replacement.start < chunk_end { - // Replacement starts within this chunk - let offset_in_chunk = (replacement.start - chunk_start) as usize; - - // Add the part of chunk before replacement - if offset_in_chunk > chunk_pos { - result.push(&chunk[chunk_pos..offset_in_chunk]); - } - - // Add replacement content - result.push(&replacement.content); + let inner_rope = self.inner.rope(); + let mut rope = + Vec::with_capacity(inner_rope.len() + self.replacements.len() * 2); + + let mut pos: usize = 0; + let mut replacement_idx: usize = 0; + let mut replacement_end: Option = None; + let mut next_replacement: Option = (replacement_idx + < self.replacements.len()) + .then(|| self.replacements[replacement_idx].start as usize); + + 'chunk_loop: for chunk in self.inner.rope() { + let mut chunk_pos = 0; + let end_pos = pos + chunk.len(); + + // Skip over when it has been replaced + if let Some(replacement_end) = + replacement_end.filter(|replacement_end| *replacement_end > pos) + { + // Skip over the whole chunk + if replacement_end >= end_pos { + pos = end_pos; + continue; + } + // Partially skip over chunk + chunk_pos = replacement_end - pos; + pos += chunk_pos; + } - // Update positions - pos = replacement.end; - replacement_index += 1; + // Is a replacement in the chunk? + while let Some(next_replacement_pos) = next_replacement + .filter(|next_replacement_pos| *next_replacement_pos < end_pos) + { + if next_replacement_pos > pos { + // Emit chunk until replacement + let offset = next_replacement_pos - pos; + let chunk_slice = &chunk[chunk_pos..(chunk_pos + offset)]; + rope.push(chunk_slice); + chunk_pos += offset; + pos = next_replacement_pos; + } + // Insert replacement content split into chunks by lines + let replacement = &self.replacements[replacement_idx]; + rope.push(&replacement.content); - // Find where to continue after replacement - let mut found_continue_pos = false; - for (idx, &chunk_start_pos) in - chunk_start_positions.iter().enumerate() - { - let chunk_end_pos = - chunk_start_pos + inner_source_code[idx].len() as u32; - - if pos >= chunk_start_pos && pos < chunk_end_pos { - // Continue from within this chunk - chunk_index = idx; - chunk_pos = (pos - chunk_start_pos) as usize; - found_continue_pos = true; - break; - } else if pos <= chunk_start_pos { - // Continue from the start of this chunk - chunk_index = idx; - chunk_pos = 0; - found_continue_pos = true; - break; - } - } + // Remove replaced content by settings this variable + replacement_end = if let Some(replacement_end) = replacement_end { + Some(replacement_end.max(replacement.end as usize)) + } else { + Some(replacement.end as usize) + }; - if !found_continue_pos { - // Replacement goes beyond all chunks - chunk_index = inner_source_code.len(); - } + // Move to next replacement + replacement_idx += 1; + next_replacement = if replacement_idx < self.replacements.len() { + Some(self.replacements[replacement_idx].start as usize) + } else { + None + }; - break; - } else if replacement.start < chunk_start { - // Replacement starts before this chunk - result.push(&replacement.content); - replacement_index += 1; - - // Skip chunks that are replaced - pos = replacement.end; - while chunk_index < inner_source_code.len() { - let current_chunk_start = chunk_start_positions[chunk_index]; - let current_chunk_end = current_chunk_start - + inner_source_code[chunk_index].len() as u32; - - if pos <= current_chunk_start { - // Start from beginning of this chunk - chunk_pos = 0; - break; - } else if pos < current_chunk_end { - // Start from middle of this chunk - chunk_pos = (pos - current_chunk_start) as usize; - break; - } else { - // Skip this entire chunk - chunk_index += 1; - } - } - break; + // Skip over when it has been replaced + let offset = chunk.len() as i64 - end_pos as i64 + + replacement_end.unwrap() as i64 + - chunk_pos as i64; + if offset > 0 { + // Skip over whole chunk + if replacement_end + .is_some_and(|replacement_end| replacement_end >= end_pos) + { + pos = end_pos; + continue 'chunk_loop; } - } - // No replacement affecting this chunk, add the remaining part - if chunk_pos == 0 - && (next_replacement.is_none() - || next_replacement.unwrap().start > chunk_end) - { - // Add entire chunk - result.push(chunk); - } else if chunk_pos < chunk.len() { - // Add remaining part of chunk - result.push(&chunk[chunk_pos..]); + // Partially skip over chunk + chunk_pos += offset as usize; + pos += offset as usize; } - - chunk_index += 1; - chunk_pos = 0; - pos = chunk_end; } - // Handle remaining replacements that are beyond all chunks - while replacement_index < self.replacements.len() { - let replacement = &self.replacements[replacement_index]; - if replacement.start >= pos { - result.push(&replacement.content); - } - replacement_index += 1; + // Emit remaining chunk + if chunk_pos < chunk.len() { + rope.push(&chunk[chunk_pos..]); } + pos = end_pos; } - result + // Handle remaining replacements one by one + while replacement_idx < self.replacements.len() { + let content = &self.replacements[replacement_idx].content; + rope.push(content); + replacement_idx += 1; + } + + rope } fn buffer(&self) -> Cow<[u8]> { @@ -365,7 +324,10 @@ impl Source for ReplaceSource { } fn to_writer(&self, writer: &mut dyn std::io::Write) -> std::io::Result<()> { - writer.write_all(self.source().as_bytes()) + for text in self.rope() { + writer.write_all(text.as_bytes())?; + } + Ok(()) } } From 23ac55366df7a811f76bf714e168bccd605abf6d Mon Sep 17 00:00:00 2001 From: Cong-Cong Date: Fri, 7 Nov 2025 22:36:55 +0800 Subject: [PATCH 06/24] perf: write_to_string --- src/cached_source.rs | 4 ++++ src/concat_source.rs | 12 +++++++++++- src/original_source.rs | 4 ++++ src/raw_source.rs | 8 ++++++++ src/replace_source.rs | 8 +++++++- src/source.rs | 12 ++++++++++++ src/source_map_source.rs | 4 ++++ tests/compat_source.rs | 4 ++++ 8 files changed, 54 insertions(+), 2 deletions(-) diff --git a/src/cached_source.rs b/src/cached_source.rs index da737ccb..c0544fef 100644 --- a/src/cached_source.rs +++ b/src/cached_source.rs @@ -128,6 +128,10 @@ impl Source for CachedSource { } } + fn write_to_string(&self, string: &mut String) { + self.inner.write_to_string(string); + } + fn to_writer(&self, writer: &mut dyn std::io::Write) -> std::io::Result<()> { self.inner.to_writer(writer) } diff --git a/src/concat_source.rs b/src/concat_source.rs index 2e33b222..dffb3e1d 100644 --- a/src/concat_source.rs +++ b/src/concat_source.rs @@ -167,7 +167,11 @@ impl Source for ConcatSource { if children.len() == 1 { children[0].source() } else { - SourceValue::String(Cow::Owned(self.rope().join(""))) + // Use to_writer to avoid multiple heap allocations that would occur + // when concatenating nested ConcatSource instances directly + let mut string = String::with_capacity(self.size()); + self.write_to_string(&mut string); + SourceValue::String(Cow::Owned(string)) } } @@ -214,6 +218,12 @@ impl Source for ConcatSource { result } + fn write_to_string(&self, string: &mut String) { + for child in self.optimized_children() { + child.write_to_string(string); + } + } + fn to_writer(&self, writer: &mut dyn std::io::Write) -> std::io::Result<()> { for child in self.optimized_children() { child.to_writer(writer)?; diff --git a/src/original_source.rs b/src/original_source.rs index bf4ec766..83e522a1 100644 --- a/src/original_source.rs +++ b/src/original_source.rs @@ -77,6 +77,10 @@ impl Source for OriginalSource { get_map(object_pool, chunks.as_ref(), options) } + fn write_to_string(&self, string: &mut String) { + string.push_str(self.value.as_ref()); + } + fn to_writer(&self, writer: &mut dyn std::io::Write) -> std::io::Result<()> { writer.write_all(self.value.as_bytes()) } diff --git a/src/raw_source.rs b/src/raw_source.rs index af3337a6..44c146fb 100644 --- a/src/raw_source.rs +++ b/src/raw_source.rs @@ -80,6 +80,10 @@ impl Source for RawStringSource { None } + fn write_to_string(&self, string: &mut String) { + string.push_str(self.0.as_ref()); + } + fn to_writer(&self, writer: &mut dyn std::io::Write) -> std::io::Result<()> { writer.write_all(self.0.as_bytes()) } @@ -226,6 +230,10 @@ impl Source for RawBufferSource { None } + fn write_to_string(&self, string: &mut String) { + string.push_str(self.get_or_init_value_as_string()); + } + fn to_writer(&self, writer: &mut dyn std::io::Write) -> std::io::Result<()> { writer.write_all(&self.value) } diff --git a/src/replace_source.rs b/src/replace_source.rs index 92cb078b..3df319f1 100644 --- a/src/replace_source.rs +++ b/src/replace_source.rs @@ -181,7 +181,7 @@ impl Source for ReplaceSource { < self.replacements.len()) .then(|| self.replacements[replacement_idx].start as usize); - 'chunk_loop: for chunk in self.inner.rope() { + 'chunk_loop: for chunk in inner_rope { let mut chunk_pos = 0; let end_pos = pos + chunk.len(); @@ -323,6 +323,12 @@ impl Source for ReplaceSource { get_map(&ObjectPool::default(), chunks.as_ref(), options) } + fn write_to_string(&self, string: &mut String) { + for chunk in self.rope() { + string.push_str(chunk); + } + } + fn to_writer(&self, writer: &mut dyn std::io::Write) -> std::io::Result<()> { for text in self.rope() { writer.write_all(text.as_bytes())?; diff --git a/src/source.rs b/src/source.rs index d380b6cd..3e67189f 100644 --- a/src/source.rs +++ b/src/source.rs @@ -135,6 +135,14 @@ pub trait Source: self.dyn_hash(state); } + /// Appends the source content to the provided string buffer. + /// + /// This method efficiently writes the source content directly into an existing + /// string buffer, avoiding additional memory allocations when the buffer has + /// sufficient capacity. This is particularly useful for concatenating multiple + /// sources or building larger strings incrementally. + fn write_to_string(&self, string: &mut String); + /// Writes the source into a writer, preferably a `std::io::BufWriter`. fn to_writer(&self, writer: &mut dyn std::io::Write) -> std::io::Result<()>; } @@ -164,6 +172,10 @@ impl Source for BoxSource { self.as_ref().map(object_pool, options) } + fn write_to_string(&self, string: &mut String) { + self.as_ref().write_to_string(string) + } + fn to_writer(&self, writer: &mut dyn std::io::Write) -> std::io::Result<()> { self.as_ref().to_writer(writer) } diff --git a/src/source_map_source.rs b/src/source_map_source.rs index a074499b..72a4b3be 100644 --- a/src/source_map_source.rs +++ b/src/source_map_source.rs @@ -118,6 +118,10 @@ impl Source for SourceMapSource { get_map(object_pool, chunks.as_ref(), options) } + fn write_to_string(&self, string: &mut String) { + string.push_str(self.value.as_ref()); + } + fn to_writer(&self, writer: &mut dyn std::io::Write) -> std::io::Result<()> { writer.write_all(self.value.as_bytes()) } diff --git a/tests/compat_source.rs b/tests/compat_source.rs index fe2d3500..e4b0b84c 100644 --- a/tests/compat_source.rs +++ b/tests/compat_source.rs @@ -39,6 +39,10 @@ impl Source for CompatSource { self.1.clone() } + fn write_to_string(&self, string: &mut String) { + string.push_str(self.0.as_ref()) + } + fn to_writer(&self, writer: &mut dyn std::io::Write) -> std::io::Result<()> { writer.write_all(self.0.as_bytes()) } From fec461ed7e4b14a16fadeb756dcadf845ba65f00 Mon Sep 17 00:00:00 2001 From: Cong-Cong Date: Fri, 7 Nov 2025 23:22:20 +0800 Subject: [PATCH 07/24] fix: codspeed --- .github/workflows/Bench.yaml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/Bench.yaml b/.github/workflows/Bench.yaml index ef6ef1d2..da30b16d 100644 --- a/.github/workflows/Bench.yaml +++ b/.github/workflows/Bench.yaml @@ -29,7 +29,7 @@ jobs: - name: Install codspeed uses: taiki-e/install-action@v2 with: - tool: cargo-codspeed + tool: cargo-codspeed@4.0.5 - name: Build Benchmark run: cargo codspeed build --features codspeed From ef71210dbf620ea79e5027c843c39dc79c0edf5d Mon Sep 17 00:00:00 2001 From: Cong-Cong Date: Sat, 8 Nov 2025 00:30:58 +0800 Subject: [PATCH 08/24] perf: get_unchecked --- src/cached_source.rs | 6 +++++- src/replace_source.rs | 18 ++++++++++++------ 2 files changed, 17 insertions(+), 7 deletions(-) diff --git a/src/cached_source.rs b/src/cached_source.rs index c0544fef..c115dd2c 100644 --- a/src/cached_source.rs +++ b/src/cached_source.rs @@ -93,7 +93,11 @@ impl Source for CachedSource { std::mem::transmute::, Vec<&'static str>>(self.rope()) } }); - SourceValue::String(Cow::Owned(rope.join(""))) + let mut string = String::with_capacity(self.size()); + for segment in rope { + string.push_str(segment); + } + SourceValue::String(Cow::Owned(string)) } fn rope(&self) -> Vec<&str> { diff --git a/src/replace_source.rs b/src/replace_source.rs index 3df319f1..b5fa1f6e 100644 --- a/src/replace_source.rs +++ b/src/replace_source.rs @@ -169,9 +169,10 @@ impl Source for ReplaceSource { } } + #[allow(unsafe_code)] fn rope(&self) -> Vec<&str> { let inner_rope = self.inner.rope(); - let mut rope = + let mut rope: Vec<&str> = Vec::with_capacity(inner_rope.len() + self.replacements.len() * 2); let mut pos: usize = 0; @@ -206,13 +207,15 @@ impl Source for ReplaceSource { if next_replacement_pos > pos { // Emit chunk until replacement let offset = next_replacement_pos - pos; - let chunk_slice = &chunk[chunk_pos..(chunk_pos + offset)]; + let chunk_slice = + unsafe { &chunk.get_unchecked(chunk_pos..(chunk_pos + offset)) }; rope.push(chunk_slice); chunk_pos += offset; pos = next_replacement_pos; } // Insert replacement content split into chunks by lines - let replacement = &self.replacements[replacement_idx]; + let replacement = + unsafe { &self.replacements.get_unchecked(replacement_idx) }; rope.push(&replacement.content); // Remove replaced content by settings this variable @@ -225,7 +228,9 @@ impl Source for ReplaceSource { // Move to next replacement replacement_idx += 1; next_replacement = if replacement_idx < self.replacements.len() { - Some(self.replacements[replacement_idx].start as usize) + Some(unsafe { + self.replacements.get_unchecked(replacement_idx).start as usize + }) } else { None }; @@ -251,14 +256,15 @@ impl Source for ReplaceSource { // Emit remaining chunk if chunk_pos < chunk.len() { - rope.push(&chunk[chunk_pos..]); + rope.push(unsafe { &chunk.get_unchecked(chunk_pos..) }); } pos = end_pos; } // Handle remaining replacements one by one while replacement_idx < self.replacements.len() { - let content = &self.replacements[replacement_idx].content; + let content = + unsafe { &self.replacements.get_unchecked(replacement_idx).content }; rope.push(content); replacement_idx += 1; } From d1e19a4c50057ba404ffea5a965d6758d0bf8758 Mon Sep 17 00:00:00 2001 From: Cong-Cong Date: Sat, 8 Nov 2025 00:46:22 +0800 Subject: [PATCH 09/24] perf: ReplaceSource source --- src/replace_source.rs | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/src/replace_source.rs b/src/replace_source.rs index b5fa1f6e..5a52fe55 100644 --- a/src/replace_source.rs +++ b/src/replace_source.rs @@ -165,7 +165,11 @@ impl Source for ReplaceSource { if rope.len() == 1 { SourceValue::String(Cow::Borrowed(rope[0])) } else { - SourceValue::String(Cow::Owned(rope.join(""))) + let mut string = String::with_capacity(rope.iter().map(|c| c.len()).sum()); + for chunk in rope { + string.push_str(chunk); + } + SourceValue::String(Cow::Owned(string)) } } From c90f574eafa1c47cfa5d8d492b017970ef9838f3 Mon Sep 17 00:00:00 2001 From: Cong-Cong Date: Sat, 8 Nov 2025 01:03:45 +0800 Subject: [PATCH 10/24] rope add len --- src/cached_source.rs | 14 +++++++------- src/concat_source.rs | 14 +++++++++----- src/original_source.rs | 4 ++-- src/raw_source.rs | 9 +++++---- src/replace_source.rs | 40 +++++++++++++++++++++++----------------- src/source.rs | 4 ++-- src/source_map_source.rs | 4 ++-- 7 files changed, 50 insertions(+), 39 deletions(-) diff --git a/src/cached_source.rs b/src/cached_source.rs index c115dd2c..fa4bce97 100644 --- a/src/cached_source.rs +++ b/src/cached_source.rs @@ -21,7 +21,7 @@ use crate::{ struct CachedData { hash: OnceLock, size: OnceLock, - source: OnceLock>, + source: OnceLock<(Vec<&'static str>, usize)>, columns_map: OnceLock>, line_only_map: OnceLock>, } @@ -84,23 +84,23 @@ impl CachedSource { impl Source for CachedSource { fn source(&self) -> SourceValue { - let rope = self.cache.source.get_or_init(|| { + let (chunks, len) = self.cache.source.get_or_init(|| { #[allow(unsafe_code)] // SAFETY: CachedSource guarantees that the underlying source outlives the cache, // so transmuting Vec<&str> to Vec<&'static str> is safe in this context. // This allows us to store string slices in the cache without additional allocations. unsafe { - std::mem::transmute::, Vec<&'static str>>(self.rope()) + std::mem::transmute::<(Vec<&str>, usize), (Vec<&'static str>, usize)>(self.rope()) } }); - let mut string = String::with_capacity(self.size()); - for segment in rope { - string.push_str(segment); + let mut string = String::with_capacity(*len); + for chunk in chunks { + string.push_str(chunk); } SourceValue::String(Cow::Owned(string)) } - fn rope(&self) -> Vec<&str> { + fn rope(&self) -> (Vec<&str>, usize) { self.inner.rope() } diff --git a/src/concat_source.rs b/src/concat_source.rs index dffb3e1d..d1b5f211 100644 --- a/src/concat_source.rs +++ b/src/concat_source.rs @@ -175,15 +175,19 @@ impl Source for ConcatSource { } } - fn rope(&self) -> Vec<&str> { + fn rope(&self) -> (Vec<&str>, usize) { let children = self.optimized_children(); if children.len() == 1 { children[0].rope() } else { - children - .iter() - .flat_map(|child| child.rope()) - .collect::>() + let mut merged_chunks = vec![]; + let mut merged_len = 0; + for child in children { + let (chunks, len) = child.rope(); + merged_chunks.extend(chunks); + merged_len += len; + } + (merged_chunks, merged_len) } } diff --git a/src/original_source.rs b/src/original_source.rs index 83e522a1..7c6904fd 100644 --- a/src/original_source.rs +++ b/src/original_source.rs @@ -56,8 +56,8 @@ impl Source for OriginalSource { SourceValue::String(Cow::Borrowed(&self.value)) } - fn rope(&self) -> Vec<&str> { - vec![self.value.as_ref()] + fn rope(&self) -> (Vec<&str>, usize) { + (vec![self.value.as_ref()], self.value.len()) } fn buffer(&self) -> Cow<[u8]> { diff --git a/src/raw_source.rs b/src/raw_source.rs index 44c146fb..fcdf3a30 100644 --- a/src/raw_source.rs +++ b/src/raw_source.rs @@ -64,8 +64,8 @@ impl Source for RawStringSource { SourceValue::String(Cow::Borrowed(&self.0)) } - fn rope(&self) -> Vec<&str> { - vec![self.0.as_ref()] + fn rope(&self) -> (Vec<&str>, usize) { + (vec![self.0.as_ref()], self.0.len()) } fn buffer(&self) -> Cow<[u8]> { @@ -214,8 +214,9 @@ impl Source for RawBufferSource { SourceValue::Buffer(Cow::Borrowed(&self.value)) } - fn rope(&self) -> Vec<&str> { - vec![self.get_or_init_value_as_string()] + fn rope(&self) -> (Vec<&str>, usize) { + let s = self.get_or_init_value_as_string(); + (vec![s], s.len()) } fn buffer(&self) -> Cow<[u8]> { diff --git a/src/replace_source.rs b/src/replace_source.rs index 5a52fe55..d55e47f0 100644 --- a/src/replace_source.rs +++ b/src/replace_source.rs @@ -161,12 +161,12 @@ impl ReplaceSource { impl Source for ReplaceSource { fn source(&self) -> SourceValue { - let rope = self.rope(); - if rope.len() == 1 { - SourceValue::String(Cow::Borrowed(rope[0])) + let (chunks, len) = self.rope(); + if chunks.len() == 1 { + SourceValue::String(Cow::Borrowed(chunks[0])) } else { - let mut string = String::with_capacity(rope.iter().map(|c| c.len()).sum()); - for chunk in rope { + let mut string = String::with_capacity(len); + for chunk in chunks { string.push_str(chunk); } SourceValue::String(Cow::Owned(string)) @@ -174,10 +174,11 @@ impl Source for ReplaceSource { } #[allow(unsafe_code)] - fn rope(&self) -> Vec<&str> { - let inner_rope = self.inner.rope(); - let mut rope: Vec<&str> = - Vec::with_capacity(inner_rope.len() + self.replacements.len() * 2); + fn rope(&self) -> (Vec<&str>, usize) { + let (inner_chunks, _) = self.inner.rope(); + let mut chunks: Vec<&str> = + Vec::with_capacity(inner_chunks.len() + self.replacements.len() * 2); + let mut len: usize = 0; let mut pos: usize = 0; let mut replacement_idx: usize = 0; @@ -186,7 +187,7 @@ impl Source for ReplaceSource { < self.replacements.len()) .then(|| self.replacements[replacement_idx].start as usize); - 'chunk_loop: for chunk in inner_rope { + 'chunk_loop: for chunk in inner_chunks { let mut chunk_pos = 0; let end_pos = pos + chunk.len(); @@ -213,14 +214,16 @@ impl Source for ReplaceSource { let offset = next_replacement_pos - pos; let chunk_slice = unsafe { &chunk.get_unchecked(chunk_pos..(chunk_pos + offset)) }; - rope.push(chunk_slice); + chunks.push(chunk_slice); + len += chunk_slice.len(); chunk_pos += offset; pos = next_replacement_pos; } // Insert replacement content split into chunks by lines let replacement = unsafe { &self.replacements.get_unchecked(replacement_idx) }; - rope.push(&replacement.content); + chunks.push(&replacement.content); + len += replacement.content.len(); // Remove replaced content by settings this variable replacement_end = if let Some(replacement_end) = replacement_end { @@ -260,7 +263,9 @@ impl Source for ReplaceSource { // Emit remaining chunk if chunk_pos < chunk.len() { - rope.push(unsafe { &chunk.get_unchecked(chunk_pos..) }); + let chunk = unsafe { &chunk.get_unchecked(chunk_pos..) }; + chunks.push(chunk); + len += chunk.len(); } pos = end_pos; } @@ -269,11 +274,12 @@ impl Source for ReplaceSource { while replacement_idx < self.replacements.len() { let content = unsafe { &self.replacements.get_unchecked(replacement_idx).content }; - rope.push(content); + chunks.push(content); + len += content.len(); replacement_idx += 1; } - rope + (chunks, len) } fn buffer(&self) -> Cow<[u8]> { @@ -334,13 +340,13 @@ impl Source for ReplaceSource { } fn write_to_string(&self, string: &mut String) { - for chunk in self.rope() { + for chunk in self.rope().0 { string.push_str(chunk); } } fn to_writer(&self, writer: &mut dyn std::io::Write) -> std::io::Result<()> { - for text in self.rope() { + for text in self.rope().0 { writer.write_all(text.as_bytes())?; } Ok(()) diff --git a/src/source.rs b/src/source.rs index 3e67189f..363662aa 100644 --- a/src/source.rs +++ b/src/source.rs @@ -115,7 +115,7 @@ pub trait Source: fn source(&self) -> SourceValue; /// Return a lightweight "rope" view of the source as borrowed string slices. - fn rope(&self) -> Vec<&str>; + fn rope(&self) -> (Vec<&str>, usize); /// Get the source buffer. fn buffer(&self) -> Cow<[u8]>; @@ -152,7 +152,7 @@ impl Source for BoxSource { self.as_ref().source() } - fn rope(&self) -> Vec<&str> { + fn rope(&self) -> (Vec<&str>, usize) { self.as_ref().rope() } diff --git a/src/source_map_source.rs b/src/source_map_source.rs index 72a4b3be..d8ec96f5 100644 --- a/src/source_map_source.rs +++ b/src/source_map_source.rs @@ -94,8 +94,8 @@ impl Source for SourceMapSource { SourceValue::String(Cow::Borrowed(&self.value)) } - fn rope(&self) -> Vec<&str> { - vec![self.value.as_ref()] + fn rope(&self) -> (Vec<&str>, usize) { + (vec![self.value.as_ref()], self.value.len()) } fn buffer(&self) -> Cow<[u8]> { From 04b5c66f071c6746de4bc5b78a5037eae77cf98a Mon Sep 17 00:00:00 2001 From: Cong-Cong Date: Sat, 8 Nov 2025 01:18:09 +0800 Subject: [PATCH 11/24] perf: ReplaceSource source --- src/cached_source.rs | 6 ++- src/replace_source.rs | 109 ++++++++++++++++++++++++++++++++++++++---- 2 files changed, 104 insertions(+), 11 deletions(-) diff --git a/src/cached_source.rs b/src/cached_source.rs index fa4bce97..832742d0 100644 --- a/src/cached_source.rs +++ b/src/cached_source.rs @@ -90,12 +90,14 @@ impl Source for CachedSource { // so transmuting Vec<&str> to Vec<&'static str> is safe in this context. // This allows us to store string slices in the cache without additional allocations. unsafe { - std::mem::transmute::<(Vec<&str>, usize), (Vec<&'static str>, usize)>(self.rope()) + std::mem::transmute::<(Vec<&str>, usize), (Vec<&'static str>, usize)>( + self.rope(), + ) } }); let mut string = String::with_capacity(*len); for chunk in chunks { - string.push_str(chunk); + string.push_str(chunk); } SourceValue::String(Cow::Owned(string)) } diff --git a/src/replace_source.rs b/src/replace_source.rs index d55e47f0..f7c11747 100644 --- a/src/replace_source.rs +++ b/src/replace_source.rs @@ -161,14 +161,11 @@ impl ReplaceSource { impl Source for ReplaceSource { fn source(&self) -> SourceValue { - let (chunks, len) = self.rope(); - if chunks.len() == 1 { - SourceValue::String(Cow::Borrowed(chunks[0])) + if self.replacements.len() == 0 { + self.inner.source() } else { - let mut string = String::with_capacity(len); - for chunk in chunks { - string.push_str(chunk); - } + let mut string = String::with_capacity(self.size()); + self.write_to_string(&mut string); SourceValue::String(Cow::Owned(string)) } } @@ -339,9 +336,103 @@ impl Source for ReplaceSource { get_map(&ObjectPool::default(), chunks.as_ref(), options) } + #[allow(unsafe_code)] fn write_to_string(&self, string: &mut String) { - for chunk in self.rope().0 { - string.push_str(chunk); + let (inner_chunks, _) = self.inner.rope(); + + let mut pos: usize = 0; + let mut replacement_idx: usize = 0; + let mut replacement_end: Option = None; + let mut next_replacement: Option = (replacement_idx + < self.replacements.len()) + .then(|| self.replacements[replacement_idx].start as usize); + + 'chunk_loop: for chunk in inner_chunks { + let mut chunk_pos = 0; + let end_pos = pos + chunk.len(); + + // Skip over when it has been replaced + if let Some(replacement_end) = + replacement_end.filter(|replacement_end| *replacement_end > pos) + { + // Skip over the whole chunk + if replacement_end >= end_pos { + pos = end_pos; + continue; + } + // Partially skip over chunk + chunk_pos = replacement_end - pos; + pos += chunk_pos; + } + + // Is a replacement in the chunk? + while let Some(next_replacement_pos) = next_replacement + .filter(|next_replacement_pos| *next_replacement_pos < end_pos) + { + if next_replacement_pos > pos { + // Emit chunk until replacement + let offset = next_replacement_pos - pos; + let chunk_slice = + unsafe { &chunk.get_unchecked(chunk_pos..(chunk_pos + offset)) }; + string.push_str(chunk_slice); + chunk_pos += offset; + pos = next_replacement_pos; + } + // Insert replacement content split into chunks by lines + let replacement = + unsafe { &self.replacements.get_unchecked(replacement_idx) }; + string.push_str(&replacement.content); + + // Remove replaced content by settings this variable + replacement_end = if let Some(replacement_end) = replacement_end { + Some(replacement_end.max(replacement.end as usize)) + } else { + Some(replacement.end as usize) + }; + + // Move to next replacement + replacement_idx += 1; + next_replacement = if replacement_idx < self.replacements.len() { + Some(unsafe { + self.replacements.get_unchecked(replacement_idx).start as usize + }) + } else { + None + }; + + // Skip over when it has been replaced + let offset = chunk.len() as i64 - end_pos as i64 + + replacement_end.unwrap() as i64 + - chunk_pos as i64; + if offset > 0 { + // Skip over whole chunk + if replacement_end + .is_some_and(|replacement_end| replacement_end >= end_pos) + { + pos = end_pos; + continue 'chunk_loop; + } + + // Partially skip over chunk + chunk_pos += offset as usize; + pos += offset as usize; + } + } + + // Emit remaining chunk + if chunk_pos < chunk.len() { + let chunk = unsafe { &chunk.get_unchecked(chunk_pos..) }; + string.push_str(chunk); + } + pos = end_pos; + } + + // Handle remaining replacements one by one + while replacement_idx < self.replacements.len() { + let content = + unsafe { &self.replacements.get_unchecked(replacement_idx).content }; + string.push_str(content); + replacement_idx += 1; } } From 34505cb719a9bcc51319d2cdaee5717ee9298374 Mon Sep 17 00:00:00 2001 From: Cong-Cong Date: Sat, 8 Nov 2025 15:07:29 +0800 Subject: [PATCH 12/24] perf: rope iter --- src/cached_source.rs | 12 +- src/concat_source.rs | 11 +- src/original_source.rs | 4 +- src/raw_source.rs | 9 +- src/replace_source.rs | 330 +++++++++++++++++++++++++-------------- src/source.rs | 4 +- src/source_map_source.rs | 4 +- tests/compat_source.rs | 4 +- 8 files changed, 236 insertions(+), 142 deletions(-) diff --git a/src/cached_source.rs b/src/cached_source.rs index 832742d0..a5ad643d 100644 --- a/src/cached_source.rs +++ b/src/cached_source.rs @@ -21,7 +21,7 @@ use crate::{ struct CachedData { hash: OnceLock, size: OnceLock, - source: OnceLock<(Vec<&'static str>, usize)>, + source: OnceLock>, columns_map: OnceLock>, line_only_map: OnceLock>, } @@ -84,25 +84,25 @@ impl CachedSource { impl Source for CachedSource { fn source(&self) -> SourceValue { - let (chunks, len) = self.cache.source.get_or_init(|| { + let chunks = self.cache.source.get_or_init(|| { #[allow(unsafe_code)] // SAFETY: CachedSource guarantees that the underlying source outlives the cache, // so transmuting Vec<&str> to Vec<&'static str> is safe in this context. // This allows us to store string slices in the cache without additional allocations. unsafe { - std::mem::transmute::<(Vec<&str>, usize), (Vec<&'static str>, usize)>( - self.rope(), + std::mem::transmute::, Vec<&'static str>>( + self.rope().collect(), ) } }); - let mut string = String::with_capacity(*len); + let mut string = String::with_capacity(self.size()); for chunk in chunks { string.push_str(chunk); } SourceValue::String(Cow::Owned(string)) } - fn rope(&self) -> (Vec<&str>, usize) { + fn rope(&self) -> Box + '_> { self.inner.rope() } diff --git a/src/concat_source.rs b/src/concat_source.rs index d1b5f211..3920a706 100644 --- a/src/concat_source.rs +++ b/src/concat_source.rs @@ -175,19 +175,12 @@ impl Source for ConcatSource { } } - fn rope(&self) -> (Vec<&str>, usize) { + fn rope(&self) -> Box + '_> { let children = self.optimized_children(); if children.len() == 1 { children[0].rope() } else { - let mut merged_chunks = vec![]; - let mut merged_len = 0; - for child in children { - let (chunks, len) = child.rope(); - merged_chunks.extend(chunks); - merged_len += len; - } - (merged_chunks, merged_len) + Box::new(children.iter().flat_map(|child| child.rope())) } } diff --git a/src/original_source.rs b/src/original_source.rs index 7c6904fd..a7969d6f 100644 --- a/src/original_source.rs +++ b/src/original_source.rs @@ -56,8 +56,8 @@ impl Source for OriginalSource { SourceValue::String(Cow::Borrowed(&self.value)) } - fn rope(&self) -> (Vec<&str>, usize) { - (vec![self.value.as_ref()], self.value.len()) + fn rope(&self) -> Box + '_> { + Box::new(std::iter::once(self.value.as_ref())) } fn buffer(&self) -> Cow<[u8]> { diff --git a/src/raw_source.rs b/src/raw_source.rs index fcdf3a30..f5f30330 100644 --- a/src/raw_source.rs +++ b/src/raw_source.rs @@ -64,8 +64,8 @@ impl Source for RawStringSource { SourceValue::String(Cow::Borrowed(&self.0)) } - fn rope(&self) -> (Vec<&str>, usize) { - (vec![self.0.as_ref()], self.0.len()) + fn rope(&self) -> Box + '_> { + Box::new(std::iter::once(self.0.as_ref())) } fn buffer(&self) -> Cow<[u8]> { @@ -214,9 +214,8 @@ impl Source for RawBufferSource { SourceValue::Buffer(Cow::Borrowed(&self.value)) } - fn rope(&self) -> (Vec<&str>, usize) { - let s = self.get_or_init_value_as_string(); - (vec![s], s.len()) + fn rope(&self) -> Box + '_> { + Box::new(std::iter::once(self.get_or_init_value_as_string())) } fn buffer(&self) -> Cow<[u8]> { diff --git a/src/replace_source.rs b/src/replace_source.rs index f7c11747..5cc80102 100644 --- a/src/replace_source.rs +++ b/src/replace_source.rs @@ -161,122 +161,24 @@ impl ReplaceSource { impl Source for ReplaceSource { fn source(&self) -> SourceValue { - if self.replacements.len() == 0 { - self.inner.source() - } else { - let mut string = String::with_capacity(self.size()); - self.write_to_string(&mut string); - SourceValue::String(Cow::Owned(string)) + if self.replacements.is_empty() { + return self.inner.source(); } - } - - #[allow(unsafe_code)] - fn rope(&self) -> (Vec<&str>, usize) { - let (inner_chunks, _) = self.inner.rope(); - let mut chunks: Vec<&str> = - Vec::with_capacity(inner_chunks.len() + self.replacements.len() * 2); - let mut len: usize = 0; - - let mut pos: usize = 0; - let mut replacement_idx: usize = 0; - let mut replacement_end: Option = None; - let mut next_replacement: Option = (replacement_idx - < self.replacements.len()) - .then(|| self.replacements[replacement_idx].start as usize); - - 'chunk_loop: for chunk in inner_chunks { - let mut chunk_pos = 0; - let end_pos = pos + chunk.len(); - - // Skip over when it has been replaced - if let Some(replacement_end) = - replacement_end.filter(|replacement_end| *replacement_end > pos) - { - // Skip over the whole chunk - if replacement_end >= end_pos { - pos = end_pos; - continue; - } - // Partially skip over chunk - chunk_pos = replacement_end - pos; - pos += chunk_pos; - } - - // Is a replacement in the chunk? - while let Some(next_replacement_pos) = next_replacement - .filter(|next_replacement_pos| *next_replacement_pos < end_pos) - { - if next_replacement_pos > pos { - // Emit chunk until replacement - let offset = next_replacement_pos - pos; - let chunk_slice = - unsafe { &chunk.get_unchecked(chunk_pos..(chunk_pos + offset)) }; - chunks.push(chunk_slice); - len += chunk_slice.len(); - chunk_pos += offset; - pos = next_replacement_pos; - } - // Insert replacement content split into chunks by lines - let replacement = - unsafe { &self.replacements.get_unchecked(replacement_idx) }; - chunks.push(&replacement.content); - len += replacement.content.len(); - - // Remove replaced content by settings this variable - replacement_end = if let Some(replacement_end) = replacement_end { - Some(replacement_end.max(replacement.end as usize)) - } else { - Some(replacement.end as usize) - }; - - // Move to next replacement - replacement_idx += 1; - next_replacement = if replacement_idx < self.replacements.len() { - Some(unsafe { - self.replacements.get_unchecked(replacement_idx).start as usize - }) - } else { - None - }; - - // Skip over when it has been replaced - let offset = chunk.len() as i64 - end_pos as i64 - + replacement_end.unwrap() as i64 - - chunk_pos as i64; - if offset > 0 { - // Skip over whole chunk - if replacement_end - .is_some_and(|replacement_end| replacement_end >= end_pos) - { - pos = end_pos; - continue 'chunk_loop; - } - - // Partially skip over chunk - chunk_pos += offset as usize; - pos += offset as usize; - } - } - - // Emit remaining chunk - if chunk_pos < chunk.len() { - let chunk = unsafe { &chunk.get_unchecked(chunk_pos..) }; - chunks.push(chunk); - len += chunk.len(); - } - pos = end_pos; + let mut string = String::with_capacity(self.size()); + for chunk in self.rope() { + string.push_str(chunk); } + SourceValue::String(Cow::Owned(string)) + } - // Handle remaining replacements one by one - while replacement_idx < self.replacements.len() { - let content = - unsafe { &self.replacements.get_unchecked(replacement_idx).content }; - chunks.push(content); - len += content.len(); - replacement_idx += 1; + fn rope(&self) -> Box + '_> { + if self.replacements.is_empty() { + return self.inner.rope(); } - - (chunks, len) + Box::new(ReplaceSourceRopeIterator::new( + self.inner.rope(), + &self.replacements, + )) } fn buffer(&self) -> Cow<[u8]> { @@ -338,7 +240,7 @@ impl Source for ReplaceSource { #[allow(unsafe_code)] fn write_to_string(&self, string: &mut String) { - let (inner_chunks, _) = self.inner.rope(); + let inner_chunks = self.inner.rope(); let mut pos: usize = 0; let mut replacement_idx: usize = 0; @@ -437,7 +339,7 @@ impl Source for ReplaceSource { } fn to_writer(&self, writer: &mut dyn std::io::Write) -> std::io::Result<()> { - for text in self.rope().0 { + for text in self.rope() { writer.write_all(text.as_bytes())?; } Ok(()) @@ -1004,6 +906,206 @@ impl PartialEq for ReplaceSource { impl Eq for ReplaceSource {} +enum IteratorState { + ProcessingChunk, + EmittingReplacement, + FinalReplacements, + Done, +} + +/// Iterator for ReplaceSource rope that applies replacements on the fly +pub struct ReplaceSourceRopeIterator<'a> { + inner_chunks: Box + 'a>, + replacements: &'a [Replacement], + pos: usize, + replacement_idx: usize, + replacement_end: Option, + next_replacement: Option, + current_chunk: Option<&'a str>, + current_chunk_start: usize, + current_chunk_pos: usize, + state: IteratorState, +} + +impl<'a> ReplaceSourceRopeIterator<'a> { + fn new( + inner_chunks: Box + 'a>, + replacements: &'a [Replacement], + ) -> Self { + let mut iter = Self { + inner_chunks, + replacements, + pos: 0, + replacement_idx: 0, + replacement_end: None, + next_replacement: replacements.first().map(|r| r.start as usize), + current_chunk: None, + current_chunk_start: 0, + current_chunk_pos: 0, + state: IteratorState::ProcessingChunk, + }; + + iter.load_next_chunk(); + iter + } + + fn load_next_chunk(&mut self) { + self.current_chunk = self.inner_chunks.next(); + self.current_chunk_pos = 0; + if self.current_chunk.is_some() { + self.current_chunk_start = self.pos; + } else { + self.state = IteratorState::FinalReplacements; + } + } + + fn skip_replaced_content(&mut self) -> bool { + if let Some(replacement_end) = self.replacement_end { + if replacement_end > self.pos { + let chunk_end = + self.current_chunk_start + self.current_chunk.unwrap().len(); + + // Skip the entire chunk + if replacement_end >= chunk_end { + self.pos = chunk_end; + self.load_next_chunk(); + return true; + } + + // Partially skip the chunk + let skip_len = replacement_end - self.pos; + self.current_chunk_pos += skip_len; + self.pos += skip_len; + } + } + false + } + + fn process_current_chunk(&mut self) -> Option<&'a str> { + let chunk = self.current_chunk?; + let chunk_end = self.current_chunk_start + chunk.len(); + + // Check if there are replacements in the current chunk + while let Some(next_repl_pos) = + self.next_replacement.filter(|&pos| pos < chunk_end) + { + if next_repl_pos > self.pos { + // Return content before replacement + let offset = next_repl_pos - self.pos; + let result = + &chunk[self.current_chunk_pos..self.current_chunk_pos + offset]; + self.current_chunk_pos += offset; + self.pos = next_repl_pos; + return Some(result); + } + + // Prepare to return replacement content + self.state = IteratorState::EmittingReplacement; + return None; + } + + // Return remaining content of the chunk + if self.current_chunk_pos < chunk.len() { + let result = &chunk[self.current_chunk_pos..]; + self.pos = chunk_end; + self.load_next_chunk(); + Some(result) + } else { + self.load_next_chunk(); + None + } + } + + fn emit_replacement(&mut self) -> Option<&'a str> { + let replacement = &self.replacements[self.replacement_idx]; + let content = &replacement.content; + + // Update replacement state + self.replacement_end = Some( + self + .replacement_end + .map_or(replacement.end as usize, |end| { + end.max(replacement.end as usize) + }), + ); + + self.replacement_idx += 1; + self.next_replacement = self + .replacements + .get(self.replacement_idx) + .map(|r| r.start as usize); + + // Check if we need to skip replaced content + if let Some(replacement_end) = self.replacement_end { + if replacement_end > self.pos { + self.pos = replacement_end; + + // If current chunk needs to be skipped, reload it + if let Some(chunk) = self.current_chunk { + let chunk_end = self.current_chunk_start + chunk.len(); + if replacement_end >= chunk_end { + self.load_next_chunk(); + } else { + self.current_chunk_pos = replacement_end - self.current_chunk_start; + } + } + } + } + + self.state = IteratorState::ProcessingChunk; + Some(content) + } + + fn emit_final_replacements(&mut self) -> Option<&'a str> { + if self.replacement_idx < self.replacements.len() { + let content = &self.replacements[self.replacement_idx].content; + self.replacement_idx += 1; + Some(content) + } else { + self.state = IteratorState::Done; + None + } + } +} + +impl<'a> Iterator for ReplaceSourceRopeIterator<'a> { + type Item = &'a str; + + fn next(&mut self) -> Option { + loop { + match self.state { + IteratorState::ProcessingChunk => { + if self.skip_replaced_content() { + continue; + } + + if let Some(result) = self.process_current_chunk() { + return Some(result); + } + + if self.current_chunk.is_none() { + self.state = IteratorState::FinalReplacements; + } + } + + IteratorState::EmittingReplacement => { + if let Some(result) = self.emit_replacement() { + return Some(result); + } + } + + IteratorState::FinalReplacements => { + if let Some(result) = self.emit_final_replacements() { + return Some(result); + } + } + + IteratorState::Done => return None, + } + } + } +} + #[cfg(test)] mod tests { use rustc_hash::FxHasher; diff --git a/src/source.rs b/src/source.rs index 363662aa..7adc262e 100644 --- a/src/source.rs +++ b/src/source.rs @@ -115,7 +115,7 @@ pub trait Source: fn source(&self) -> SourceValue; /// Return a lightweight "rope" view of the source as borrowed string slices. - fn rope(&self) -> (Vec<&str>, usize); + fn rope(&self) -> Box + '_>; /// Get the source buffer. fn buffer(&self) -> Cow<[u8]>; @@ -152,7 +152,7 @@ impl Source for BoxSource { self.as_ref().source() } - fn rope(&self) -> (Vec<&str>, usize) { + fn rope(&self) -> Box + '_> { self.as_ref().rope() } diff --git a/src/source_map_source.rs b/src/source_map_source.rs index d8ec96f5..5506c0ee 100644 --- a/src/source_map_source.rs +++ b/src/source_map_source.rs @@ -94,8 +94,8 @@ impl Source for SourceMapSource { SourceValue::String(Cow::Borrowed(&self.value)) } - fn rope(&self) -> (Vec<&str>, usize) { - (vec![self.value.as_ref()], self.value.len()) + fn rope(&self) -> Box + '_> { + Box::new(std::iter::once(self.value.as_ref())) } fn buffer(&self) -> Cow<[u8]> { diff --git a/tests/compat_source.rs b/tests/compat_source.rs index e4b0b84c..3aec141f 100644 --- a/tests/compat_source.rs +++ b/tests/compat_source.rs @@ -19,8 +19,8 @@ impl Source for CompatSource { SourceValue::String(Cow::Borrowed(self.0)) } - fn rope(&self) -> Vec<&str> { - vec![self.0] + fn rope(&self) -> Box + '_> { + Box::new(std::iter::once(self.0)) } fn buffer(&self) -> Cow<[u8]> { From 118f6eaf6834a10664cab9862526d5f837abb330 Mon Sep 17 00:00:00 2001 From: Cong-Cong Date: Sat, 8 Nov 2025 15:20:18 +0800 Subject: [PATCH 13/24] perf: ReplaceSourceRopeIterator --- src/replace_source.rs | 243 +++++++++++++++++------------------------- 1 file changed, 96 insertions(+), 147 deletions(-) diff --git a/src/replace_source.rs b/src/replace_source.rs index 5cc80102..30f469ec 100644 --- a/src/replace_source.rs +++ b/src/replace_source.rs @@ -906,13 +906,6 @@ impl PartialEq for ReplaceSource { impl Eq for ReplaceSource {} -enum IteratorState { - ProcessingChunk, - EmittingReplacement, - FinalReplacements, - Done, -} - /// Iterator for ReplaceSource rope that applies replacements on the fly pub struct ReplaceSourceRopeIterator<'a> { inner_chunks: Box + 'a>, @@ -924,7 +917,6 @@ pub struct ReplaceSourceRopeIterator<'a> { current_chunk: Option<&'a str>, current_chunk_start: usize, current_chunk_pos: usize, - state: IteratorState, } impl<'a> ReplaceSourceRopeIterator<'a> { @@ -932,7 +924,7 @@ impl<'a> ReplaceSourceRopeIterator<'a> { inner_chunks: Box + 'a>, replacements: &'a [Replacement], ) -> Self { - let mut iter = Self { + Self { inner_chunks, replacements, pos: 0, @@ -942,166 +934,123 @@ impl<'a> ReplaceSourceRopeIterator<'a> { current_chunk: None, current_chunk_start: 0, current_chunk_pos: 0, - state: IteratorState::ProcessingChunk, - }; - - iter.load_next_chunk(); - iter - } - - fn load_next_chunk(&mut self) { - self.current_chunk = self.inner_chunks.next(); - self.current_chunk_pos = 0; - if self.current_chunk.is_some() { - self.current_chunk_start = self.pos; - } else { - self.state = IteratorState::FinalReplacements; } } +} - fn skip_replaced_content(&mut self) -> bool { - if let Some(replacement_end) = self.replacement_end { - if replacement_end > self.pos { - let chunk_end = - self.current_chunk_start + self.current_chunk.unwrap().len(); +impl<'a> Iterator for ReplaceSourceRopeIterator<'a> { + type Item = &'a str; - // Skip the entire chunk - if replacement_end >= chunk_end { - self.pos = chunk_end; - self.load_next_chunk(); - return true; + #[allow(unsafe_code)] + fn next(&mut self) -> Option { + loop { + // Load next chunk (if needed) + if self.current_chunk.is_none() { + self.current_chunk = self.inner_chunks.next(); + self.current_chunk_pos = 0; + if self.current_chunk.is_some() { + self.current_chunk_start = self.pos; + } else { + // No more chunks, handle remaining replacements + return if self.replacement_idx < self.replacements.len() { + let content = unsafe { + &self + .replacements + .get_unchecked(self.replacement_idx) + .content + }; + self.replacement_idx += 1; + Some(content) + } else { + None + }; } - - // Partially skip the chunk - let skip_len = replacement_end - self.pos; - self.current_chunk_pos += skip_len; - self.pos += skip_len; - } - } - false - } - - fn process_current_chunk(&mut self) -> Option<&'a str> { - let chunk = self.current_chunk?; - let chunk_end = self.current_chunk_start + chunk.len(); - - // Check if there are replacements in the current chunk - while let Some(next_repl_pos) = - self.next_replacement.filter(|&pos| pos < chunk_end) - { - if next_repl_pos > self.pos { - // Return content before replacement - let offset = next_repl_pos - self.pos; - let result = - &chunk[self.current_chunk_pos..self.current_chunk_pos + offset]; - self.current_chunk_pos += offset; - self.pos = next_repl_pos; - return Some(result); } - // Prepare to return replacement content - self.state = IteratorState::EmittingReplacement; - return None; - } - - // Return remaining content of the chunk - if self.current_chunk_pos < chunk.len() { - let result = &chunk[self.current_chunk_pos..]; - self.pos = chunk_end; - self.load_next_chunk(); - Some(result) - } else { - self.load_next_chunk(); - None - } - } - - fn emit_replacement(&mut self) -> Option<&'a str> { - let replacement = &self.replacements[self.replacement_idx]; - let content = &replacement.content; - - // Update replacement state - self.replacement_end = Some( - self - .replacement_end - .map_or(replacement.end as usize, |end| { - end.max(replacement.end as usize) - }), - ); - - self.replacement_idx += 1; - self.next_replacement = self - .replacements - .get(self.replacement_idx) - .map(|r| r.start as usize); + let chunk = self.current_chunk.unwrap(); + let chunk_end = self.current_chunk_start + chunk.len(); - // Check if we need to skip replaced content - if let Some(replacement_end) = self.replacement_end { - if replacement_end > self.pos { - self.pos = replacement_end; - - // If current chunk needs to be skipped, reload it - if let Some(chunk) = self.current_chunk { - let chunk_end = self.current_chunk_start + chunk.len(); + // Skip replaced content + if let Some(replacement_end) = self.replacement_end { + if replacement_end > self.pos { if replacement_end >= chunk_end { - self.load_next_chunk(); + // Skip entire chunk + self.pos = chunk_end; + self.current_chunk = None; + continue; } else { - self.current_chunk_pos = replacement_end - self.current_chunk_start; + // Partially skip chunk + let skip_len = replacement_end - self.pos; + self.current_chunk_pos += skip_len; + self.pos += skip_len; } } } - } - - self.state = IteratorState::ProcessingChunk; - Some(content) - } - - fn emit_final_replacements(&mut self) -> Option<&'a str> { - if self.replacement_idx < self.replacements.len() { - let content = &self.replacements[self.replacement_idx].content; - self.replacement_idx += 1; - Some(content) - } else { - self.state = IteratorState::Done; - None - } - } -} - -impl<'a> Iterator for ReplaceSourceRopeIterator<'a> { - type Item = &'a str; - fn next(&mut self) -> Option { - loop { - match self.state { - IteratorState::ProcessingChunk => { - if self.skip_replaced_content() { - continue; - } + // Check if there are replacements in the current chunk + if let Some(next_repl_pos) = + self.next_replacement.filter(|&pos| pos < chunk_end) + { + if next_repl_pos > self.pos { + // Return content before replacement + let offset = next_repl_pos - self.pos; + let result = unsafe { + chunk.get_unchecked( + self.current_chunk_pos..self.current_chunk_pos + offset, + ) + }; + self.current_chunk_pos += offset; + self.pos = next_repl_pos; + return Some(result); + } - if let Some(result) = self.process_current_chunk() { - return Some(result); - } + // Process replacement + let replacement = + unsafe { self.replacements.get_unchecked(self.replacement_idx) }; + let content = &replacement.content; - if self.current_chunk.is_none() { - self.state = IteratorState::FinalReplacements; - } - } + // Move to next replacement + self.replacement_end = Some( + self + .replacement_end + .map_or(replacement.end as usize, |end| { + end.max(replacement.end as usize) + }), + ); - IteratorState::EmittingReplacement => { - if let Some(result) = self.emit_replacement() { - return Some(result); + // Update position (skip replaced content) + self.replacement_idx += 1; + self.next_replacement = self + .replacements + .get(self.replacement_idx) + .map(|r| r.start as usize); + + // Update position (skip replaced content) + if let Some(replacement_end) = self.replacement_end { + if replacement_end > self.pos { + self.pos = replacement_end; + // If current chunk needs to be skipped, reset it + if replacement_end >= chunk_end { + self.current_chunk = None; + } else { + self.current_chunk_pos = + replacement_end - self.current_chunk_start; + } } } - IteratorState::FinalReplacements => { - if let Some(result) = self.emit_final_replacements() { - return Some(result); - } - } + return Some(content); + } - IteratorState::Done => return None, + // Return remaining chunk content + if self.current_chunk_pos < chunk.len() { + let result = unsafe { chunk.get_unchecked(self.current_chunk_pos..) }; + self.pos = chunk_end; + self.current_chunk = None; + return Some(result); } + + self.current_chunk = None; } } } From 0fe731b1c10bfeb4971ce46e748ff175371bb8d0 Mon Sep 17 00:00:00 2001 From: Cong-Cong Date: Sat, 8 Nov 2025 15:38:56 +0800 Subject: [PATCH 14/24] perf: cached soruce stream --- src/cached_source.rs | 19 +++++++++---------- src/replace_source.rs | 8 ++++++++ 2 files changed, 17 insertions(+), 10 deletions(-) diff --git a/src/cached_source.rs b/src/cached_source.rs index a5ad643d..27db8a03 100644 --- a/src/cached_source.rs +++ b/src/cached_source.rs @@ -1,6 +1,5 @@ use std::{ borrow::Cow, - cell::OnceCell, hash::{Hash, Hasher}, sync::{Arc, OnceLock}, }; @@ -95,6 +94,9 @@ impl Source for CachedSource { ) } }); + if chunks.len() == 1 { + return SourceValue::String(Cow::Borrowed(chunks[0])); + } let mut string = String::with_capacity(self.size()); for chunk in chunks { string.push_str(chunk); @@ -146,17 +148,17 @@ impl Source for CachedSource { struct CachedSourceChunks<'source> { chunks: Box, cache: Arc, - inner: &'source dyn Source, - source: OnceCell>, + source: Cow<'source, str>, } impl<'a> CachedSourceChunks<'a> { fn new(cache_source: &'a CachedSource) -> Self { + let source = cache_source.source().into_string_lossy(); + Self { chunks: cache_source.inner.stream_chunks(), cache: cache_source.cache.clone(), - inner: &cache_source.inner, - source: OnceCell::new(), + source, } } } @@ -177,14 +179,11 @@ impl Chunks for CachedSourceChunks<'_> { }; match cell.get() { Some(map) => { - let source = self - .source - .get_or_init(|| self.inner.source().into_string_lossy()); if let Some(map) = map { stream_chunks_of_source_map( options, object_pool, - source.as_ref(), + self.source.as_ref(), map, on_chunk, on_source, @@ -192,7 +191,7 @@ impl Chunks for CachedSourceChunks<'_> { ) } else { stream_chunks_of_raw_source( - source.as_ref(), + self.source.as_ref(), options, on_chunk, on_source, diff --git a/src/replace_source.rs b/src/replace_source.rs index 30f469ec..ad290c49 100644 --- a/src/replace_source.rs +++ b/src/replace_source.rs @@ -1053,6 +1053,14 @@ impl<'a> Iterator for ReplaceSourceRopeIterator<'a> { self.current_chunk = None; } } + + fn size_hint(&self) -> (usize, Option) { + let (lower, upper) = self.inner_chunks.size_hint(); + ( + lower + self.replacements.len(), + upper.map(|size| size + self.replacements.len() * 2), + ) + } } #[cfg(test)] From b41064f2f57738e4b240b7401e117b5409fcb305 Mon Sep 17 00:00:00 2001 From: Cong-Cong Date: Sat, 8 Nov 2025 15:55:50 +0800 Subject: [PATCH 15/24] rm write_to_string --- src/cached_source.rs | 4 -- src/concat_source.rs | 12 ++--- src/original_source.rs | 4 -- src/raw_source.rs | 8 ---- src/replace_source.rs | 100 --------------------------------------- src/source.rs | 12 ----- src/source_map_source.rs | 4 -- tests/compat_source.rs | 4 -- 8 files changed, 3 insertions(+), 145 deletions(-) diff --git a/src/cached_source.rs b/src/cached_source.rs index 27db8a03..18f878f9 100644 --- a/src/cached_source.rs +++ b/src/cached_source.rs @@ -136,10 +136,6 @@ impl Source for CachedSource { } } - fn write_to_string(&self, string: &mut String) { - self.inner.write_to_string(string); - } - fn to_writer(&self, writer: &mut dyn std::io::Write) -> std::io::Result<()> { self.inner.to_writer(writer) } diff --git a/src/concat_source.rs b/src/concat_source.rs index 3920a706..df5f32f8 100644 --- a/src/concat_source.rs +++ b/src/concat_source.rs @@ -167,10 +167,10 @@ impl Source for ConcatSource { if children.len() == 1 { children[0].source() } else { - // Use to_writer to avoid multiple heap allocations that would occur - // when concatenating nested ConcatSource instances directly let mut string = String::with_capacity(self.size()); - self.write_to_string(&mut string); + for chunk in self.rope() { + string.push_str(chunk); + } SourceValue::String(Cow::Owned(string)) } } @@ -215,12 +215,6 @@ impl Source for ConcatSource { result } - fn write_to_string(&self, string: &mut String) { - for child in self.optimized_children() { - child.write_to_string(string); - } - } - fn to_writer(&self, writer: &mut dyn std::io::Write) -> std::io::Result<()> { for child in self.optimized_children() { child.to_writer(writer)?; diff --git a/src/original_source.rs b/src/original_source.rs index a7969d6f..3eef561a 100644 --- a/src/original_source.rs +++ b/src/original_source.rs @@ -77,10 +77,6 @@ impl Source for OriginalSource { get_map(object_pool, chunks.as_ref(), options) } - fn write_to_string(&self, string: &mut String) { - string.push_str(self.value.as_ref()); - } - fn to_writer(&self, writer: &mut dyn std::io::Write) -> std::io::Result<()> { writer.write_all(self.value.as_bytes()) } diff --git a/src/raw_source.rs b/src/raw_source.rs index f5f30330..3c0bd9dc 100644 --- a/src/raw_source.rs +++ b/src/raw_source.rs @@ -80,10 +80,6 @@ impl Source for RawStringSource { None } - fn write_to_string(&self, string: &mut String) { - string.push_str(self.0.as_ref()); - } - fn to_writer(&self, writer: &mut dyn std::io::Write) -> std::io::Result<()> { writer.write_all(self.0.as_bytes()) } @@ -230,10 +226,6 @@ impl Source for RawBufferSource { None } - fn write_to_string(&self, string: &mut String) { - string.push_str(self.get_or_init_value_as_string()); - } - fn to_writer(&self, writer: &mut dyn std::io::Write) -> std::io::Result<()> { writer.write_all(&self.value) } diff --git a/src/replace_source.rs b/src/replace_source.rs index ad290c49..b4746fcc 100644 --- a/src/replace_source.rs +++ b/src/replace_source.rs @@ -238,106 +238,6 @@ impl Source for ReplaceSource { get_map(&ObjectPool::default(), chunks.as_ref(), options) } - #[allow(unsafe_code)] - fn write_to_string(&self, string: &mut String) { - let inner_chunks = self.inner.rope(); - - let mut pos: usize = 0; - let mut replacement_idx: usize = 0; - let mut replacement_end: Option = None; - let mut next_replacement: Option = (replacement_idx - < self.replacements.len()) - .then(|| self.replacements[replacement_idx].start as usize); - - 'chunk_loop: for chunk in inner_chunks { - let mut chunk_pos = 0; - let end_pos = pos + chunk.len(); - - // Skip over when it has been replaced - if let Some(replacement_end) = - replacement_end.filter(|replacement_end| *replacement_end > pos) - { - // Skip over the whole chunk - if replacement_end >= end_pos { - pos = end_pos; - continue; - } - // Partially skip over chunk - chunk_pos = replacement_end - pos; - pos += chunk_pos; - } - - // Is a replacement in the chunk? - while let Some(next_replacement_pos) = next_replacement - .filter(|next_replacement_pos| *next_replacement_pos < end_pos) - { - if next_replacement_pos > pos { - // Emit chunk until replacement - let offset = next_replacement_pos - pos; - let chunk_slice = - unsafe { &chunk.get_unchecked(chunk_pos..(chunk_pos + offset)) }; - string.push_str(chunk_slice); - chunk_pos += offset; - pos = next_replacement_pos; - } - // Insert replacement content split into chunks by lines - let replacement = - unsafe { &self.replacements.get_unchecked(replacement_idx) }; - string.push_str(&replacement.content); - - // Remove replaced content by settings this variable - replacement_end = if let Some(replacement_end) = replacement_end { - Some(replacement_end.max(replacement.end as usize)) - } else { - Some(replacement.end as usize) - }; - - // Move to next replacement - replacement_idx += 1; - next_replacement = if replacement_idx < self.replacements.len() { - Some(unsafe { - self.replacements.get_unchecked(replacement_idx).start as usize - }) - } else { - None - }; - - // Skip over when it has been replaced - let offset = chunk.len() as i64 - end_pos as i64 - + replacement_end.unwrap() as i64 - - chunk_pos as i64; - if offset > 0 { - // Skip over whole chunk - if replacement_end - .is_some_and(|replacement_end| replacement_end >= end_pos) - { - pos = end_pos; - continue 'chunk_loop; - } - - // Partially skip over chunk - chunk_pos += offset as usize; - pos += offset as usize; - } - } - - // Emit remaining chunk - if chunk_pos < chunk.len() { - let chunk = unsafe { &chunk.get_unchecked(chunk_pos..) }; - string.push_str(chunk); - } - pos = end_pos; - } - - // Handle remaining replacements one by one - while replacement_idx < self.replacements.len() { - let content = - unsafe { &self.replacements.get_unchecked(replacement_idx).content }; - string.push_str(content); - replacement_idx += 1; - } - } - fn to_writer(&self, writer: &mut dyn std::io::Write) -> std::io::Result<()> { for text in self.rope() { writer.write_all(text.as_bytes())?; diff --git a/src/source.rs b/src/source.rs index 7adc262e..e7961d57 100644 --- a/src/source.rs +++ b/src/source.rs @@ -135,14 +135,6 @@ pub trait Source: self.dyn_hash(state); } - /// Appends the source content to the provided string buffer. - /// - /// This method efficiently writes the source content directly into an existing - /// string buffer, avoiding additional memory allocations when the buffer has - /// sufficient capacity. This is particularly useful for concatenating multiple - /// sources or building larger strings incrementally. - fn write_to_string(&self, string: &mut String); - /// Writes the source into a writer, preferably a `std::io::BufWriter`. fn to_writer(&self, writer: &mut dyn std::io::Write) -> std::io::Result<()>; } @@ -172,10 +164,6 @@ impl Source for BoxSource { self.as_ref().map(object_pool, options) } - fn write_to_string(&self, string: &mut String) { - self.as_ref().write_to_string(string) - } - fn to_writer(&self, writer: &mut dyn std::io::Write) -> std::io::Result<()> { self.as_ref().to_writer(writer) } diff --git a/src/source_map_source.rs b/src/source_map_source.rs index 5506c0ee..a654e382 100644 --- a/src/source_map_source.rs +++ b/src/source_map_source.rs @@ -118,10 +118,6 @@ impl Source for SourceMapSource { get_map(object_pool, chunks.as_ref(), options) } - fn write_to_string(&self, string: &mut String) { - string.push_str(self.value.as_ref()); - } - fn to_writer(&self, writer: &mut dyn std::io::Write) -> std::io::Result<()> { writer.write_all(self.value.as_bytes()) } diff --git a/tests/compat_source.rs b/tests/compat_source.rs index 3aec141f..c643c05c 100644 --- a/tests/compat_source.rs +++ b/tests/compat_source.rs @@ -39,10 +39,6 @@ impl Source for CompatSource { self.1.clone() } - fn write_to_string(&self, string: &mut String) { - string.push_str(self.0.as_ref()) - } - fn to_writer(&self, writer: &mut dyn std::io::Write) -> std::io::Result<()> { writer.write_all(self.0.as_bytes()) } From 9d2263cb751e7fd0d083cfd608dc58ea2e5d83c4 Mon Sep 17 00:00:00 2001 From: Cong-Cong Date: Sat, 8 Nov 2025 16:06:57 +0800 Subject: [PATCH 16/24] perf: cached source rope --- src/cached_source.rs | 22 +++++++++++++++------- 1 file changed, 15 insertions(+), 7 deletions(-) diff --git a/src/cached_source.rs b/src/cached_source.rs index 18f878f9..79a1531c 100644 --- a/src/cached_source.rs +++ b/src/cached_source.rs @@ -20,7 +20,7 @@ use crate::{ struct CachedData { hash: OnceLock, size: OnceLock, - source: OnceLock>, + chunks: OnceLock>, columns_map: OnceLock>, line_only_map: OnceLock>, } @@ -79,11 +79,9 @@ impl CachedSource { cache: Arc::new(CachedData::default()), } } -} -impl Source for CachedSource { - fn source(&self) -> SourceValue { - let chunks = self.cache.source.get_or_init(|| { + fn get_or_init_chunks(&self) -> &[&str] { + self.cache.chunks.get_or_init(|| { #[allow(unsafe_code)] // SAFETY: CachedSource guarantees that the underlying source outlives the cache, // so transmuting Vec<&str> to Vec<&'static str> is safe in this context. @@ -93,7 +91,13 @@ impl Source for CachedSource { self.rope().collect(), ) } - }); + }) + } +} + +impl Source for CachedSource { + fn source(&self) -> SourceValue { + let chunks = self.get_or_init_chunks(); if chunks.len() == 1 { return SourceValue::String(Cow::Borrowed(chunks[0])); } @@ -105,7 +109,8 @@ impl Source for CachedSource { } fn rope(&self) -> Box + '_> { - self.inner.rope() + let chunks = self.get_or_init_chunks(); + Box::new(chunks.iter().cloned()) } fn buffer(&self) -> Cow<[u8]> { @@ -113,6 +118,9 @@ impl Source for CachedSource { } fn size(&self) -> usize { + if let Some(chunks) = self.cache.chunks.get() { + return chunks.iter().map(|chunk| chunk.len()).sum(); + } *self.cache.size.get_or_init(|| self.inner.size()) } From 62de8436b22814e78ecc0f179796160997d681ac Mon Sep 17 00:00:00 2001 From: Cong-Cong Date: Sat, 8 Nov 2025 16:29:11 +0800 Subject: [PATCH 17/24] perf: Rope enum --- .../benchmark_repetitive_react_components.rs | 6 +-- src/cached_source.rs | 18 ++++---- src/concat_source.rs | 30 ++++++++++---- src/lib.rs | 2 +- src/original_source.rs | 6 +-- src/raw_source.rs | 10 ++--- src/replace_source.rs | 41 ++++++++++++------- src/source.rs | 17 +++++++- src/source_map_source.rs | 6 +-- tests/compat_source.rs | 8 ++-- 10 files changed, 90 insertions(+), 54 deletions(-) diff --git a/benches/benchmark_repetitive_react_components.rs b/benches/benchmark_repetitive_react_components.rs index 7f571551..602282ee 100644 --- a/benches/benchmark_repetitive_react_components.rs +++ b/benches/benchmark_repetitive_react_components.rs @@ -9,9 +9,9 @@ pub use criterion::*; pub use codspeed_criterion_compat::*; use rspack_sources::{ - BoxSource, CachedSource, ConcatSource, MapOptions, ObjectPool, - OriginalSource, RawStringSource, ReplaceSource, ReplacementEnforce, Source, - SourceExt, SourceMap, SourceMapSource, SourceMapSourceOptions, + BoxSource, ConcatSource, MapOptions, ObjectPool, OriginalSource, + RawStringSource, ReplaceSource, ReplacementEnforce, Source, SourceExt, + SourceMap, SourceMapSource, SourceMapSourceOptions, }; static REPETITIVE_1K_REACT_COMPONENTS_SOURCE: LazyLock = diff --git a/src/cached_source.rs b/src/cached_source.rs index 79a1531c..19b52ff6 100644 --- a/src/cached_source.rs +++ b/src/cached_source.rs @@ -13,7 +13,7 @@ use crate::{ }, object_pool::ObjectPool, source::SourceValue, - BoxSource, MapOptions, Source, SourceExt, SourceMap, + BoxSource, MapOptions, Rope, Source, SourceExt, SourceMap, }; #[derive(Default)] @@ -82,14 +82,17 @@ impl CachedSource { fn get_or_init_chunks(&self) -> &[&str] { self.cache.chunks.get_or_init(|| { + let rope = self.inner.rope(); + let chunks = match rope { + Rope::Light(s) => vec![s], + Rope::Full(iter) => iter.collect::>(), + }; #[allow(unsafe_code)] // SAFETY: CachedSource guarantees that the underlying source outlives the cache, // so transmuting Vec<&str> to Vec<&'static str> is safe in this context. // This allows us to store string slices in the cache without additional allocations. unsafe { - std::mem::transmute::, Vec<&'static str>>( - self.rope().collect(), - ) + std::mem::transmute::, Vec<&'static str>>(chunks) } }) } @@ -98,9 +101,6 @@ impl CachedSource { impl Source for CachedSource { fn source(&self) -> SourceValue { let chunks = self.get_or_init_chunks(); - if chunks.len() == 1 { - return SourceValue::String(Cow::Borrowed(chunks[0])); - } let mut string = String::with_capacity(self.size()); for chunk in chunks { string.push_str(chunk); @@ -108,9 +108,9 @@ impl Source for CachedSource { SourceValue::String(Cow::Owned(string)) } - fn rope(&self) -> Box + '_> { + fn rope(&self) -> Rope { let chunks = self.get_or_init_chunks(); - Box::new(chunks.iter().cloned()) + Rope::Full(Box::new(chunks.iter().cloned())) } fn buffer(&self) -> Cow<[u8]> { diff --git a/src/concat_source.rs b/src/concat_source.rs index df5f32f8..822d3228 100644 --- a/src/concat_source.rs +++ b/src/concat_source.rs @@ -12,7 +12,7 @@ use crate::{ linear_map::LinearMap, object_pool::ObjectPool, source::{Mapping, OriginalLocation}, - BoxSource, MapOptions, RawStringSource, Source, SourceExt, SourceMap, + BoxSource, MapOptions, RawStringSource, Rope, Source, SourceExt, SourceMap, SourceValue, }; @@ -165,22 +165,34 @@ impl Source for ConcatSource { fn source(&self) -> SourceValue { let children = self.optimized_children(); if children.len() == 1 { - children[0].source() - } else { - let mut string = String::with_capacity(self.size()); - for chunk in self.rope() { - string.push_str(chunk); + return children[0].source(); + } + + let mut string = String::with_capacity(self.size()); + for child in children { + match child.rope() { + Rope::Light(s) => string.push_str(s), + Rope::Full(chunks) => { + for chunk in chunks { + string.push_str(chunk); + } + } } - SourceValue::String(Cow::Owned(string)) } + SourceValue::String(Cow::Owned(string)) } - fn rope(&self) -> Box + '_> { + fn rope(&self) -> Rope { let children = self.optimized_children(); if children.len() == 1 { children[0].rope() } else { - Box::new(children.iter().flat_map(|child| child.rope())) + Rope::Full(Box::new(children.iter().flat_map( + |child| match child.rope() { + Rope::Light(s) => Box::new(std::iter::once(s)), + Rope::Full(iter) => iter, + }, + ))) } } diff --git a/src/lib.rs b/src/lib.rs index c5dca46a..b3736484 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -23,7 +23,7 @@ pub use original_source::OriginalSource; pub use raw_source::{RawBufferSource, RawStringSource}; pub use replace_source::{ReplaceSource, ReplacementEnforce}; pub use source::{ - BoxSource, MapOptions, Mapping, OriginalLocation, Source, SourceExt, + BoxSource, MapOptions, Mapping, OriginalLocation, Rope, Source, SourceExt, SourceMap, SourceValue, }; pub use source_map_source::{ diff --git a/src/original_source.rs b/src/original_source.rs index 3eef561a..2a4d80e5 100644 --- a/src/original_source.rs +++ b/src/original_source.rs @@ -11,7 +11,7 @@ use crate::{ }, object_pool::ObjectPool, source::{Mapping, OriginalLocation}, - MapOptions, Source, SourceMap, SourceValue, + MapOptions, Rope, Source, SourceMap, SourceValue, }; /// Represents source code, it will create source map for the source code, @@ -56,8 +56,8 @@ impl Source for OriginalSource { SourceValue::String(Cow::Borrowed(&self.value)) } - fn rope(&self) -> Box + '_> { - Box::new(std::iter::once(self.value.as_ref())) + fn rope(&self) -> Rope { + Rope::Light(self.value.as_ref()) } fn buffer(&self) -> Cow<[u8]> { diff --git a/src/raw_source.rs b/src/raw_source.rs index 3c0bd9dc..5bebb9ba 100644 --- a/src/raw_source.rs +++ b/src/raw_source.rs @@ -10,7 +10,7 @@ use crate::{ GeneratedInfo, StreamChunks, }, object_pool::ObjectPool, - MapOptions, Source, SourceMap, SourceValue, + MapOptions, Rope, Source, SourceMap, SourceValue, }; /// A string variant of [RawStringSource]. @@ -64,8 +64,8 @@ impl Source for RawStringSource { SourceValue::String(Cow::Borrowed(&self.0)) } - fn rope(&self) -> Box + '_> { - Box::new(std::iter::once(self.0.as_ref())) + fn rope(&self) -> Rope { + Rope::Light(self.0.as_ref()) } fn buffer(&self) -> Cow<[u8]> { @@ -210,8 +210,8 @@ impl Source for RawBufferSource { SourceValue::Buffer(Cow::Borrowed(&self.value)) } - fn rope(&self) -> Box + '_> { - Box::new(std::iter::once(self.get_or_init_value_as_string())) + fn rope(&self) -> Rope { + Rope::Light(self.get_or_init_value_as_string()) } fn buffer(&self) -> Cow<[u8]> { diff --git a/src/replace_source.rs b/src/replace_source.rs index b4746fcc..ff172555 100644 --- a/src/replace_source.rs +++ b/src/replace_source.rs @@ -12,8 +12,8 @@ use crate::{ linear_map::LinearMap, object_pool::ObjectPool, source_content_lines::SourceContentLines, - BoxSource, MapOptions, Mapping, OriginalLocation, OriginalSource, Source, - SourceExt, SourceMap, SourceValue, + BoxSource, MapOptions, Mapping, OriginalLocation, OriginalSource, Rope, + Source, SourceExt, SourceMap, SourceValue, }; /// Decorates a Source with replacements and insertions of source code, @@ -161,24 +161,30 @@ impl ReplaceSource { impl Source for ReplaceSource { fn source(&self) -> SourceValue { - if self.replacements.is_empty() { - return self.inner.source(); - } - let mut string = String::with_capacity(self.size()); - for chunk in self.rope() { - string.push_str(chunk); + match self.rope() { + Rope::Light(s) => SourceValue::String(Cow::Borrowed(s)), + Rope::Full(iter) => { + let mut string = String::with_capacity(self.size()); + for chunk in iter { + string.push_str(chunk); + } + SourceValue::String(Cow::Owned(string)) + } } - SourceValue::String(Cow::Owned(string)) } - fn rope(&self) -> Box + '_> { + fn rope(&self) -> Rope { if self.replacements.is_empty() { return self.inner.rope(); } - Box::new(ReplaceSourceRopeIterator::new( - self.inner.rope(), + let inner_chunks = match self.inner.rope() { + Rope::Light(s) => Box::new(std::iter::once(s)), + Rope::Full(iter) => iter, + }; + Rope::Full(Box::new(ReplaceSourceRopeIterator::new( + inner_chunks, &self.replacements, - )) + ))) } fn buffer(&self) -> Cow<[u8]> { @@ -239,8 +245,13 @@ impl Source for ReplaceSource { } fn to_writer(&self, writer: &mut dyn std::io::Write) -> std::io::Result<()> { - for text in self.rope() { - writer.write_all(text.as_bytes())?; + match self.rope() { + Rope::Light(s) => writer.write_all(s.as_bytes())?, + Rope::Full(iter) => { + for chunk in iter { + writer.write_all(chunk.as_bytes())? + } + } } Ok(()) } diff --git a/src/source.rs b/src/source.rs index e7961d57..55c31905 100644 --- a/src/source.rs +++ b/src/source.rs @@ -107,6 +107,19 @@ impl<'a> SourceValue<'a> { } } +/// A lightweight representation of source content as string slices. +/// +/// `Rope` provides an efficient way to represent source content that may consist +/// of either a single string slice or multiple string segments. This abstraction +/// is particularly useful for build tools and bundlers that need to process +/// and manipulate source code without unnecessary string allocations. +pub enum Rope<'a> { + /// A single borrowed string slice representing contiguous source content. + Light(&'a str), + /// An iterator over multiple string slices representing segmented source content. + Full(Box + 'a>), +} + /// [Source] abstraction, [webpack-sources docs](https://github.com/webpack/webpack-sources/#source). pub trait Source: StreamChunks + DynHash + AsAny + DynEq + DynClone + fmt::Debug + Sync + Send @@ -115,7 +128,7 @@ pub trait Source: fn source(&self) -> SourceValue; /// Return a lightweight "rope" view of the source as borrowed string slices. - fn rope(&self) -> Box + '_>; + fn rope(&self) -> Rope; /// Get the source buffer. fn buffer(&self) -> Cow<[u8]>; @@ -144,7 +157,7 @@ impl Source for BoxSource { self.as_ref().source() } - fn rope(&self) -> Box + '_> { + fn rope(&self) -> Rope { self.as_ref().rope() } diff --git a/src/source_map_source.rs b/src/source_map_source.rs index a654e382..549d9e69 100644 --- a/src/source_map_source.rs +++ b/src/source_map_source.rs @@ -10,7 +10,7 @@ use crate::{ Chunks, StreamChunks, }, object_pool::ObjectPool, - MapOptions, Source, SourceMap, SourceValue, + MapOptions, Rope, Source, SourceMap, SourceValue, }; /// Options for [SourceMapSource::new]. @@ -94,8 +94,8 @@ impl Source for SourceMapSource { SourceValue::String(Cow::Borrowed(&self.value)) } - fn rope(&self) -> Box + '_> { - Box::new(std::iter::once(self.value.as_ref())) + fn rope(&self) -> Rope { + Rope::Light(&self.value) } fn buffer(&self) -> Cow<[u8]> { diff --git a/tests/compat_source.rs b/tests/compat_source.rs index c643c05c..e783e9f3 100644 --- a/tests/compat_source.rs +++ b/tests/compat_source.rs @@ -7,8 +7,8 @@ use rspack_sources::stream_chunks::{ StreamChunks, }; use rspack_sources::{ - ConcatSource, MapOptions, ObjectPool, RawStringSource, Source, SourceExt, - SourceMap, SourceValue, + ConcatSource, MapOptions, ObjectPool, RawStringSource, Rope, Source, + SourceExt, SourceMap, SourceValue, }; #[derive(Debug, Eq)] @@ -19,8 +19,8 @@ impl Source for CompatSource { SourceValue::String(Cow::Borrowed(self.0)) } - fn rope(&self) -> Box + '_> { - Box::new(std::iter::once(self.0)) + fn rope(&self) -> Rope { + Rope::Light(self.0) } fn buffer(&self) -> Cow<[u8]> { From 9eb87d671c0d4cacfa5b8adbbd4e13c01096008e Mon Sep 17 00:00:00 2001 From: Cong-Cong Date: Sat, 8 Nov 2025 16:42:22 +0800 Subject: [PATCH 18/24] perf: inline next --- src/replace_source.rs | 1 + 1 file changed, 1 insertion(+) diff --git a/src/replace_source.rs b/src/replace_source.rs index ff172555..19c128e7 100644 --- a/src/replace_source.rs +++ b/src/replace_source.rs @@ -853,6 +853,7 @@ impl<'a> Iterator for ReplaceSourceRopeIterator<'a> { type Item = &'a str; #[allow(unsafe_code)] + #[inline] fn next(&mut self) -> Option { loop { // Load next chunk (if needed) From 6e46ac9092cc89eedadf5f297585100830d42259 Mon Sep 17 00:00:00 2001 From: Cong-Cong Date: Sat, 8 Nov 2025 17:32:25 +0800 Subject: [PATCH 19/24] refactor rope --- src/cached_source.rs | 15 +- src/concat_source.rs | 32 ++--- src/lib.rs | 2 +- src/original_source.rs | 6 +- src/raw_source.rs | 10 +- src/replace_source.rs | 295 ++++++++++++++------------------------- src/source.rs | 19 +-- src/source_map_source.rs | 6 +- tests/compat_source.rs | 8 +- 9 files changed, 144 insertions(+), 249 deletions(-) diff --git a/src/cached_source.rs b/src/cached_source.rs index 19b52ff6..2fdc8d10 100644 --- a/src/cached_source.rs +++ b/src/cached_source.rs @@ -13,7 +13,7 @@ use crate::{ }, object_pool::ObjectPool, source::SourceValue, - BoxSource, MapOptions, Rope, Source, SourceExt, SourceMap, + BoxSource, MapOptions, Source, SourceExt, SourceMap, }; #[derive(Default)] @@ -82,11 +82,10 @@ impl CachedSource { fn get_or_init_chunks(&self) -> &[&str] { self.cache.chunks.get_or_init(|| { - let rope = self.inner.rope(); - let chunks = match rope { - Rope::Light(s) => vec![s], - Rope::Full(iter) => iter.collect::>(), - }; + let mut chunks = Vec::new(); + self.inner.rope(&mut |chunk| { + chunks.push(chunk); + }); #[allow(unsafe_code)] // SAFETY: CachedSource guarantees that the underlying source outlives the cache, // so transmuting Vec<&str> to Vec<&'static str> is safe in this context. @@ -108,9 +107,9 @@ impl Source for CachedSource { SourceValue::String(Cow::Owned(string)) } - fn rope(&self) -> Rope { + fn rope<'a>(&'a self, on_chunk: &mut dyn FnMut(&'a str)) { let chunks = self.get_or_init_chunks(); - Rope::Full(Box::new(chunks.iter().cloned())) + chunks.iter().for_each(|chunk| on_chunk(chunk)); } fn buffer(&self) -> Cow<[u8]> { diff --git a/src/concat_source.rs b/src/concat_source.rs index 822d3228..2805bc61 100644 --- a/src/concat_source.rs +++ b/src/concat_source.rs @@ -12,7 +12,7 @@ use crate::{ linear_map::LinearMap, object_pool::ObjectPool, source::{Mapping, OriginalLocation}, - BoxSource, MapOptions, RawStringSource, Rope, Source, SourceExt, SourceMap, + BoxSource, MapOptions, RawStringSource, Source, SourceExt, SourceMap, SourceValue, }; @@ -169,31 +169,19 @@ impl Source for ConcatSource { } let mut string = String::with_capacity(self.size()); - for child in children { - match child.rope() { - Rope::Light(s) => string.push_str(s), - Rope::Full(chunks) => { - for chunk in chunks { - string.push_str(chunk); - } - } - } - } + children.iter().for_each(|child| { + child.rope(&mut |chunk| { + string.push_str(chunk); + }); + }); SourceValue::String(Cow::Owned(string)) } - fn rope(&self) -> Rope { + fn rope<'a>(&'a self, on_chunk: &mut dyn FnMut(&'a str)) { let children = self.optimized_children(); - if children.len() == 1 { - children[0].rope() - } else { - Rope::Full(Box::new(children.iter().flat_map( - |child| match child.rope() { - Rope::Light(s) => Box::new(std::iter::once(s)), - Rope::Full(iter) => iter, - }, - ))) - } + children.iter().for_each(|child| { + child.rope(on_chunk); + }); } fn buffer(&self) -> Cow<[u8]> { diff --git a/src/lib.rs b/src/lib.rs index b3736484..c5dca46a 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -23,7 +23,7 @@ pub use original_source::OriginalSource; pub use raw_source::{RawBufferSource, RawStringSource}; pub use replace_source::{ReplaceSource, ReplacementEnforce}; pub use source::{ - BoxSource, MapOptions, Mapping, OriginalLocation, Rope, Source, SourceExt, + BoxSource, MapOptions, Mapping, OriginalLocation, Source, SourceExt, SourceMap, SourceValue, }; pub use source_map_source::{ diff --git a/src/original_source.rs b/src/original_source.rs index 2a4d80e5..5aebdef5 100644 --- a/src/original_source.rs +++ b/src/original_source.rs @@ -11,7 +11,7 @@ use crate::{ }, object_pool::ObjectPool, source::{Mapping, OriginalLocation}, - MapOptions, Rope, Source, SourceMap, SourceValue, + MapOptions, Source, SourceMap, SourceValue, }; /// Represents source code, it will create source map for the source code, @@ -56,8 +56,8 @@ impl Source for OriginalSource { SourceValue::String(Cow::Borrowed(&self.value)) } - fn rope(&self) -> Rope { - Rope::Light(self.value.as_ref()) + fn rope<'a>(&'a self, on_chunk: &mut dyn FnMut(&'a str)) { + on_chunk(self.value.as_ref()) } fn buffer(&self) -> Cow<[u8]> { diff --git a/src/raw_source.rs b/src/raw_source.rs index 5bebb9ba..3b82f69f 100644 --- a/src/raw_source.rs +++ b/src/raw_source.rs @@ -10,7 +10,7 @@ use crate::{ GeneratedInfo, StreamChunks, }, object_pool::ObjectPool, - MapOptions, Rope, Source, SourceMap, SourceValue, + MapOptions, Source, SourceMap, SourceValue, }; /// A string variant of [RawStringSource]. @@ -64,8 +64,8 @@ impl Source for RawStringSource { SourceValue::String(Cow::Borrowed(&self.0)) } - fn rope(&self) -> Rope { - Rope::Light(self.0.as_ref()) + fn rope<'a>(&'a self, on_chunk: &mut dyn FnMut(&'a str)) { + on_chunk(self.0.as_ref()) } fn buffer(&self) -> Cow<[u8]> { @@ -210,8 +210,8 @@ impl Source for RawBufferSource { SourceValue::Buffer(Cow::Borrowed(&self.value)) } - fn rope(&self) -> Rope { - Rope::Light(self.get_or_init_value_as_string()) + fn rope<'a>(&'a self, on_chunk: &mut dyn FnMut(&'a str)) { + on_chunk(self.get_or_init_value_as_string()) } fn buffer(&self) -> Cow<[u8]> { diff --git a/src/replace_source.rs b/src/replace_source.rs index 19c128e7..a54e6a8f 100644 --- a/src/replace_source.rs +++ b/src/replace_source.rs @@ -12,8 +12,8 @@ use crate::{ linear_map::LinearMap, object_pool::ObjectPool, source_content_lines::SourceContentLines, - BoxSource, MapOptions, Mapping, OriginalLocation, OriginalSource, Rope, - Source, SourceExt, SourceMap, SourceValue, + BoxSource, MapOptions, Mapping, OriginalLocation, OriginalSource, Source, + SourceExt, SourceMap, SourceValue, }; /// Decorates a Source with replacements and insertions of source code, @@ -161,30 +161,110 @@ impl ReplaceSource { impl Source for ReplaceSource { fn source(&self) -> SourceValue { - match self.rope() { - Rope::Light(s) => SourceValue::String(Cow::Borrowed(s)), - Rope::Full(iter) => { - let mut string = String::with_capacity(self.size()); - for chunk in iter { - string.push_str(chunk); - } - SourceValue::String(Cow::Owned(string)) - } + if self.replacements.is_empty() { + return self.inner.source(); } + + let mut string = String::with_capacity(self.size()); + self.rope(&mut |chunk| { + string.push_str(chunk); + }); + SourceValue::String(Cow::Owned(string)) } - fn rope(&self) -> Rope { + fn rope<'a>(&'a self, on_chunk: &mut dyn FnMut(&'a str)) { if self.replacements.is_empty() { - return self.inner.rope(); + return self.inner.rope(on_chunk); + } + + let mut pos: usize = 0; + let mut replacement_idx: usize = 0; + let mut replacement_end: Option = None; + let mut next_replacement: Option = (replacement_idx + < self.replacements.len()) + .then(|| self.replacements[replacement_idx].start as usize); + + self.inner.rope(&mut |chunk| { + let mut chunk_pos = 0; + let end_pos = pos + chunk.len(); + + // Skip over when it has been replaced + if let Some(replacement_end) = + replacement_end.filter(|replacement_end| *replacement_end > pos) + { + // Skip over the whole chunk + if replacement_end >= end_pos { + pos = end_pos; + return; + } + // Partially skip over chunk + chunk_pos = replacement_end - pos; + pos += chunk_pos; + } + + // Is a replacement in the chunk? + while let Some(next_replacement_pos) = next_replacement + .filter(|next_replacement_pos| *next_replacement_pos < end_pos) + { + if next_replacement_pos > pos { + // Emit chunk until replacement + let offset = next_replacement_pos - pos; + let chunk_slice = &chunk[chunk_pos..(chunk_pos + offset)]; + on_chunk(chunk_slice); + chunk_pos += offset; + pos = next_replacement_pos; + } + // Insert replacement content split into chunks by lines + let replacement = &self.replacements[replacement_idx]; + on_chunk(&replacement.content); + + // Remove replaced content by settings this variable + replacement_end = if let Some(replacement_end) = replacement_end { + Some(replacement_end.max(replacement.end as usize)) + } else { + Some(replacement.end as usize) + }; + + // Move to next replacement + replacement_idx += 1; + next_replacement = if replacement_idx < self.replacements.len() { + Some(self.replacements[replacement_idx].start as usize) + } else { + None + }; + + // Skip over when it has been replaced + let offset = chunk.len() as i64 - end_pos as i64 + + replacement_end.unwrap() as i64 + - chunk_pos as i64; + if offset > 0 { + // Skip over whole chunk + if replacement_end + .is_some_and(|replacement_end| replacement_end >= end_pos) + { + pos = end_pos; + return; + } + + // Partially skip over chunk + chunk_pos += offset as usize; + pos += offset as usize; + } + } + + // Emit remaining chunk + if chunk_pos < chunk.len() { + on_chunk(&chunk[chunk_pos..]); + } + pos = end_pos; + }); + + // Handle remaining replacements one by one + while replacement_idx < self.replacements.len() { + let content = &self.replacements[replacement_idx].content; + on_chunk(content); + replacement_idx += 1; } - let inner_chunks = match self.inner.rope() { - Rope::Light(s) => Box::new(std::iter::once(s)), - Rope::Full(iter) => iter, - }; - Rope::Full(Box::new(ReplaceSourceRopeIterator::new( - inner_chunks, - &self.replacements, - ))) } fn buffer(&self) -> Cow<[u8]> { @@ -245,15 +325,14 @@ impl Source for ReplaceSource { } fn to_writer(&self, writer: &mut dyn std::io::Write) -> std::io::Result<()> { - match self.rope() { - Rope::Light(s) => writer.write_all(s.as_bytes())?, - Rope::Full(iter) => { - for chunk in iter { - writer.write_all(chunk.as_bytes())? - } + let mut result = Ok(()); + self.rope(&mut |chunk| { + if result.is_err() { + return; } - } - Ok(()) + result = writer.write_all(chunk.as_bytes()); + }); + result } } @@ -817,164 +896,6 @@ impl PartialEq for ReplaceSource { impl Eq for ReplaceSource {} -/// Iterator for ReplaceSource rope that applies replacements on the fly -pub struct ReplaceSourceRopeIterator<'a> { - inner_chunks: Box + 'a>, - replacements: &'a [Replacement], - pos: usize, - replacement_idx: usize, - replacement_end: Option, - next_replacement: Option, - current_chunk: Option<&'a str>, - current_chunk_start: usize, - current_chunk_pos: usize, -} - -impl<'a> ReplaceSourceRopeIterator<'a> { - fn new( - inner_chunks: Box + 'a>, - replacements: &'a [Replacement], - ) -> Self { - Self { - inner_chunks, - replacements, - pos: 0, - replacement_idx: 0, - replacement_end: None, - next_replacement: replacements.first().map(|r| r.start as usize), - current_chunk: None, - current_chunk_start: 0, - current_chunk_pos: 0, - } - } -} - -impl<'a> Iterator for ReplaceSourceRopeIterator<'a> { - type Item = &'a str; - - #[allow(unsafe_code)] - #[inline] - fn next(&mut self) -> Option { - loop { - // Load next chunk (if needed) - if self.current_chunk.is_none() { - self.current_chunk = self.inner_chunks.next(); - self.current_chunk_pos = 0; - if self.current_chunk.is_some() { - self.current_chunk_start = self.pos; - } else { - // No more chunks, handle remaining replacements - return if self.replacement_idx < self.replacements.len() { - let content = unsafe { - &self - .replacements - .get_unchecked(self.replacement_idx) - .content - }; - self.replacement_idx += 1; - Some(content) - } else { - None - }; - } - } - - let chunk = self.current_chunk.unwrap(); - let chunk_end = self.current_chunk_start + chunk.len(); - - // Skip replaced content - if let Some(replacement_end) = self.replacement_end { - if replacement_end > self.pos { - if replacement_end >= chunk_end { - // Skip entire chunk - self.pos = chunk_end; - self.current_chunk = None; - continue; - } else { - // Partially skip chunk - let skip_len = replacement_end - self.pos; - self.current_chunk_pos += skip_len; - self.pos += skip_len; - } - } - } - - // Check if there are replacements in the current chunk - if let Some(next_repl_pos) = - self.next_replacement.filter(|&pos| pos < chunk_end) - { - if next_repl_pos > self.pos { - // Return content before replacement - let offset = next_repl_pos - self.pos; - let result = unsafe { - chunk.get_unchecked( - self.current_chunk_pos..self.current_chunk_pos + offset, - ) - }; - self.current_chunk_pos += offset; - self.pos = next_repl_pos; - return Some(result); - } - - // Process replacement - let replacement = - unsafe { self.replacements.get_unchecked(self.replacement_idx) }; - let content = &replacement.content; - - // Move to next replacement - self.replacement_end = Some( - self - .replacement_end - .map_or(replacement.end as usize, |end| { - end.max(replacement.end as usize) - }), - ); - - // Update position (skip replaced content) - self.replacement_idx += 1; - self.next_replacement = self - .replacements - .get(self.replacement_idx) - .map(|r| r.start as usize); - - // Update position (skip replaced content) - if let Some(replacement_end) = self.replacement_end { - if replacement_end > self.pos { - self.pos = replacement_end; - // If current chunk needs to be skipped, reset it - if replacement_end >= chunk_end { - self.current_chunk = None; - } else { - self.current_chunk_pos = - replacement_end - self.current_chunk_start; - } - } - } - - return Some(content); - } - - // Return remaining chunk content - if self.current_chunk_pos < chunk.len() { - let result = unsafe { chunk.get_unchecked(self.current_chunk_pos..) }; - self.pos = chunk_end; - self.current_chunk = None; - return Some(result); - } - - self.current_chunk = None; - } - } - - fn size_hint(&self) -> (usize, Option) { - let (lower, upper) = self.inner_chunks.size_hint(); - ( - lower + self.replacements.len(), - upper.map(|size| size + self.replacements.len() * 2), - ) - } -} - #[cfg(test)] mod tests { use rustc_hash::FxHasher; diff --git a/src/source.rs b/src/source.rs index 55c31905..36b642bb 100644 --- a/src/source.rs +++ b/src/source.rs @@ -107,19 +107,6 @@ impl<'a> SourceValue<'a> { } } -/// A lightweight representation of source content as string slices. -/// -/// `Rope` provides an efficient way to represent source content that may consist -/// of either a single string slice or multiple string segments. This abstraction -/// is particularly useful for build tools and bundlers that need to process -/// and manipulate source code without unnecessary string allocations. -pub enum Rope<'a> { - /// A single borrowed string slice representing contiguous source content. - Light(&'a str), - /// An iterator over multiple string slices representing segmented source content. - Full(Box + 'a>), -} - /// [Source] abstraction, [webpack-sources docs](https://github.com/webpack/webpack-sources/#source). pub trait Source: StreamChunks + DynHash + AsAny + DynEq + DynClone + fmt::Debug + Sync + Send @@ -128,7 +115,7 @@ pub trait Source: fn source(&self) -> SourceValue; /// Return a lightweight "rope" view of the source as borrowed string slices. - fn rope(&self) -> Rope; + fn rope<'a>(&'a self, on_chunk: &mut dyn FnMut(&'a str)); /// Get the source buffer. fn buffer(&self) -> Cow<[u8]>; @@ -157,8 +144,8 @@ impl Source for BoxSource { self.as_ref().source() } - fn rope(&self) -> Rope { - self.as_ref().rope() + fn rope<'a>(&'a self, on_chunk: &mut dyn FnMut(&'a str)) { + self.as_ref().rope(on_chunk) } fn buffer(&self) -> Cow<[u8]> { diff --git a/src/source_map_source.rs b/src/source_map_source.rs index 549d9e69..0be2723a 100644 --- a/src/source_map_source.rs +++ b/src/source_map_source.rs @@ -10,7 +10,7 @@ use crate::{ Chunks, StreamChunks, }, object_pool::ObjectPool, - MapOptions, Rope, Source, SourceMap, SourceValue, + MapOptions, Source, SourceMap, SourceValue, }; /// Options for [SourceMapSource::new]. @@ -94,8 +94,8 @@ impl Source for SourceMapSource { SourceValue::String(Cow::Borrowed(&self.value)) } - fn rope(&self) -> Rope { - Rope::Light(&self.value) + fn rope<'a>(&'a self, on_chunk: &mut dyn FnMut(&'a str)) { + on_chunk(&self.value) } fn buffer(&self) -> Cow<[u8]> { diff --git a/tests/compat_source.rs b/tests/compat_source.rs index e783e9f3..8048de76 100644 --- a/tests/compat_source.rs +++ b/tests/compat_source.rs @@ -7,8 +7,8 @@ use rspack_sources::stream_chunks::{ StreamChunks, }; use rspack_sources::{ - ConcatSource, MapOptions, ObjectPool, RawStringSource, Rope, Source, - SourceExt, SourceMap, SourceValue, + ConcatSource, MapOptions, ObjectPool, RawStringSource, Source, SourceExt, + SourceMap, SourceValue, }; #[derive(Debug, Eq)] @@ -19,8 +19,8 @@ impl Source for CompatSource { SourceValue::String(Cow::Borrowed(self.0)) } - fn rope(&self) -> Rope { - Rope::Light(self.0) + fn rope<'a>(&'a self, on_chunk: &mut dyn FnMut(&'a str)) { + on_chunk(self.0) } fn buffer(&self) -> Cow<[u8]> { From f03f3e1428d6977c99294d1cf5f4828e60726991 Mon Sep 17 00:00:00 2001 From: Cong-Cong Date: Sat, 8 Nov 2025 17:36:48 +0800 Subject: [PATCH 20/24] inline OriginalSource rope --- src/cached_source.rs | 1 + src/concat_source.rs | 1 + src/original_source.rs | 1 + src/raw_source.rs | 1 + src/source.rs | 6 ++++++ src/source_map_source.rs | 1 + 6 files changed, 11 insertions(+) diff --git a/src/cached_source.rs b/src/cached_source.rs index 2fdc8d10..75e74b07 100644 --- a/src/cached_source.rs +++ b/src/cached_source.rs @@ -107,6 +107,7 @@ impl Source for CachedSource { SourceValue::String(Cow::Owned(string)) } + #[inline] fn rope<'a>(&'a self, on_chunk: &mut dyn FnMut(&'a str)) { let chunks = self.get_or_init_chunks(); chunks.iter().for_each(|chunk| on_chunk(chunk)); diff --git a/src/concat_source.rs b/src/concat_source.rs index 2805bc61..1d2e8aa7 100644 --- a/src/concat_source.rs +++ b/src/concat_source.rs @@ -177,6 +177,7 @@ impl Source for ConcatSource { SourceValue::String(Cow::Owned(string)) } + #[inline] fn rope<'a>(&'a self, on_chunk: &mut dyn FnMut(&'a str)) { let children = self.optimized_children(); children.iter().for_each(|child| { diff --git a/src/original_source.rs b/src/original_source.rs index 5aebdef5..f86121d0 100644 --- a/src/original_source.rs +++ b/src/original_source.rs @@ -56,6 +56,7 @@ impl Source for OriginalSource { SourceValue::String(Cow::Borrowed(&self.value)) } + #[inline] fn rope<'a>(&'a self, on_chunk: &mut dyn FnMut(&'a str)) { on_chunk(self.value.as_ref()) } diff --git a/src/raw_source.rs b/src/raw_source.rs index 3b82f69f..9d2b6a2b 100644 --- a/src/raw_source.rs +++ b/src/raw_source.rs @@ -64,6 +64,7 @@ impl Source for RawStringSource { SourceValue::String(Cow::Borrowed(&self.0)) } + #[inline] fn rope<'a>(&'a self, on_chunk: &mut dyn FnMut(&'a str)) { on_chunk(self.0.as_ref()) } diff --git a/src/source.rs b/src/source.rs index 36b642bb..00391196 100644 --- a/src/source.rs +++ b/src/source.rs @@ -140,22 +140,27 @@ pub trait Source: } impl Source for BoxSource { + #[inline] fn source(&self) -> SourceValue { self.as_ref().source() } + #[inline] fn rope<'a>(&'a self, on_chunk: &mut dyn FnMut(&'a str)) { self.as_ref().rope(on_chunk) } + #[inline] fn buffer(&self) -> Cow<[u8]> { self.as_ref().buffer() } + #[inline] fn size(&self) -> usize { self.as_ref().size() } + #[inline] fn map( &self, object_pool: &ObjectPool, @@ -164,6 +169,7 @@ impl Source for BoxSource { self.as_ref().map(object_pool, options) } + #[inline] fn to_writer(&self, writer: &mut dyn std::io::Write) -> std::io::Result<()> { self.as_ref().to_writer(writer) } diff --git a/src/source_map_source.rs b/src/source_map_source.rs index 0be2723a..669150b8 100644 --- a/src/source_map_source.rs +++ b/src/source_map_source.rs @@ -94,6 +94,7 @@ impl Source for SourceMapSource { SourceValue::String(Cow::Borrowed(&self.value)) } + #[inline] fn rope<'a>(&'a self, on_chunk: &mut dyn FnMut(&'a str)) { on_chunk(&self.value) } From 9b5beabc75edb15bbc99017590b105668d3e25a4 Mon Sep 17 00:00:00 2001 From: Cong-Cong Date: Sat, 8 Nov 2025 17:47:42 +0800 Subject: [PATCH 21/24] perf: unsafe get_unchecked --- src/replace_source.rs | 30 ++++++++++++++++++------------ 1 file changed, 18 insertions(+), 12 deletions(-) diff --git a/src/replace_source.rs b/src/replace_source.rs index a54e6a8f..3f89932d 100644 --- a/src/replace_source.rs +++ b/src/replace_source.rs @@ -172,6 +172,8 @@ impl Source for ReplaceSource { SourceValue::String(Cow::Owned(string)) } + #[inline] + #[allow(unsafe_code)] fn rope<'a>(&'a self, on_chunk: &mut dyn FnMut(&'a str)) { if self.replacements.is_empty() { return self.inner.rope(on_chunk); @@ -180,9 +182,10 @@ impl Source for ReplaceSource { let mut pos: usize = 0; let mut replacement_idx: usize = 0; let mut replacement_end: Option = None; - let mut next_replacement: Option = (replacement_idx - < self.replacements.len()) - .then(|| self.replacements[replacement_idx].start as usize); + let mut next_replacement: Option = self + .replacements + .get(replacement_idx) + .map(|repl| repl.start as usize); self.inner.rope(&mut |chunk| { let mut chunk_pos = 0; @@ -209,13 +212,15 @@ impl Source for ReplaceSource { if next_replacement_pos > pos { // Emit chunk until replacement let offset = next_replacement_pos - pos; - let chunk_slice = &chunk[chunk_pos..(chunk_pos + offset)]; + let chunk_slice = + unsafe { chunk.get_unchecked(chunk_pos..(chunk_pos + offset)) }; on_chunk(chunk_slice); chunk_pos += offset; pos = next_replacement_pos; } // Insert replacement content split into chunks by lines - let replacement = &self.replacements[replacement_idx]; + let replacement = + unsafe { self.replacements.get_unchecked(replacement_idx) }; on_chunk(&replacement.content); // Remove replaced content by settings this variable @@ -227,11 +232,10 @@ impl Source for ReplaceSource { // Move to next replacement replacement_idx += 1; - next_replacement = if replacement_idx < self.replacements.len() { - Some(self.replacements[replacement_idx].start as usize) - } else { - None - }; + next_replacement = self + .replacements + .get(replacement_idx) + .map(|repl| repl.start as usize); // Skip over when it has been replaced let offset = chunk.len() as i64 - end_pos as i64 @@ -254,14 +258,16 @@ impl Source for ReplaceSource { // Emit remaining chunk if chunk_pos < chunk.len() { - on_chunk(&chunk[chunk_pos..]); + on_chunk(unsafe { chunk.get_unchecked(chunk_pos..) }); } pos = end_pos; }); // Handle remaining replacements one by one while replacement_idx < self.replacements.len() { - let content = &self.replacements[replacement_idx].content; + let replacement = + unsafe { self.replacements.get_unchecked(replacement_idx) }; + let content = &replacement.content; on_chunk(content); replacement_idx += 1; } From 0f8a33f472c2640bc82778fe3e0b850e9d34f583 Mon Sep 17 00:00:00 2001 From: Cong-Cong Date: Sat, 8 Nov 2025 19:32:17 +0800 Subject: [PATCH 22/24] remove rope inline --- src/cached_source.rs | 1 - src/concat_source.rs | 1 - src/original_source.rs | 1 - src/raw_source.rs | 1 - src/replace_source.rs | 1 - src/source_map_source.rs | 1 - 6 files changed, 6 deletions(-) diff --git a/src/cached_source.rs b/src/cached_source.rs index 75e74b07..2fdc8d10 100644 --- a/src/cached_source.rs +++ b/src/cached_source.rs @@ -107,7 +107,6 @@ impl Source for CachedSource { SourceValue::String(Cow::Owned(string)) } - #[inline] fn rope<'a>(&'a self, on_chunk: &mut dyn FnMut(&'a str)) { let chunks = self.get_or_init_chunks(); chunks.iter().for_each(|chunk| on_chunk(chunk)); diff --git a/src/concat_source.rs b/src/concat_source.rs index 1d2e8aa7..2805bc61 100644 --- a/src/concat_source.rs +++ b/src/concat_source.rs @@ -177,7 +177,6 @@ impl Source for ConcatSource { SourceValue::String(Cow::Owned(string)) } - #[inline] fn rope<'a>(&'a self, on_chunk: &mut dyn FnMut(&'a str)) { let children = self.optimized_children(); children.iter().for_each(|child| { diff --git a/src/original_source.rs b/src/original_source.rs index f86121d0..5aebdef5 100644 --- a/src/original_source.rs +++ b/src/original_source.rs @@ -56,7 +56,6 @@ impl Source for OriginalSource { SourceValue::String(Cow::Borrowed(&self.value)) } - #[inline] fn rope<'a>(&'a self, on_chunk: &mut dyn FnMut(&'a str)) { on_chunk(self.value.as_ref()) } diff --git a/src/raw_source.rs b/src/raw_source.rs index 9d2b6a2b..3b82f69f 100644 --- a/src/raw_source.rs +++ b/src/raw_source.rs @@ -64,7 +64,6 @@ impl Source for RawStringSource { SourceValue::String(Cow::Borrowed(&self.0)) } - #[inline] fn rope<'a>(&'a self, on_chunk: &mut dyn FnMut(&'a str)) { on_chunk(self.0.as_ref()) } diff --git a/src/replace_source.rs b/src/replace_source.rs index 3f89932d..2ba91436 100644 --- a/src/replace_source.rs +++ b/src/replace_source.rs @@ -172,7 +172,6 @@ impl Source for ReplaceSource { SourceValue::String(Cow::Owned(string)) } - #[inline] #[allow(unsafe_code)] fn rope<'a>(&'a self, on_chunk: &mut dyn FnMut(&'a str)) { if self.replacements.is_empty() { diff --git a/src/source_map_source.rs b/src/source_map_source.rs index 669150b8..0be2723a 100644 --- a/src/source_map_source.rs +++ b/src/source_map_source.rs @@ -94,7 +94,6 @@ impl Source for SourceMapSource { SourceValue::String(Cow::Borrowed(&self.value)) } - #[inline] fn rope<'a>(&'a self, on_chunk: &mut dyn FnMut(&'a str)) { on_chunk(&self.value) } From 4b4866c334bb9c3775db9e3b1afb684184286fb8 Mon Sep 17 00:00:00 2001 From: Cong-Cong Date: Sat, 8 Nov 2025 21:29:11 +0800 Subject: [PATCH 23/24] perf: substring --- src/concat_source.rs | 7 ++++--- src/with_utf16.rs | 14 +++++--------- 2 files changed, 9 insertions(+), 12 deletions(-) diff --git a/src/concat_source.rs b/src/concat_source.rs index 2805bc61..e3a34db5 100644 --- a/src/concat_source.rs +++ b/src/concat_source.rs @@ -169,10 +169,11 @@ impl Source for ConcatSource { } let mut string = String::with_capacity(self.size()); + let mut on_chunk = |chunk| { + string.push_str(chunk); + }; children.iter().for_each(|child| { - child.rope(&mut |chunk| { - string.push_str(chunk); - }); + child.rope(&mut on_chunk); }); SourceValue::String(Cow::Owned(string)) } diff --git a/src/with_utf16.rs b/src/with_utf16.rs index 4a2e108b..74c58319 100644 --- a/src/with_utf16.rs +++ b/src/with_utf16.rs @@ -32,8 +32,11 @@ impl<'object_pool, 'text> WithUtf16<'object_pool, 'text> { } let utf16_byte_indices = self.utf16_byte_indices.get_or_init(|| { - let mut vec = self.object_pool.pull(self.line.len()); + if self.line.is_ascii() { + return None; + } + let mut vec = self.object_pool.pull(self.line.len()); let bytes = self.line.as_bytes(); let mut byte_pos = 0; while byte_pos < bytes.len() { @@ -57,14 +60,7 @@ impl<'object_pool, 'text> WithUtf16<'object_pool, 'text> { byte_pos += 4; } } - - if vec.len() == self.line.len() { - // Optimization: UTF-16 length equals UTF-8 length, indicating no surrogate pairs. - // Return None to release the vector back to the object pool for better memory efficiency. - None - } else { - Some(vec) - } + Some(vec) }); let utf8_len = self.line.len(); From b9d8cee231743c4842e5588f80b1a9bba1c18d41 Mon Sep 17 00:00:00 2001 From: Cong-Cong Date: Sun, 9 Nov 2025 11:55:09 +0800 Subject: [PATCH 24/24] fix: cached source size --- src/cached_source.rs | 10 ++++++---- 1 file changed, 6 insertions(+), 4 deletions(-) diff --git a/src/cached_source.rs b/src/cached_source.rs index 2fdc8d10..36a2e0e2 100644 --- a/src/cached_source.rs +++ b/src/cached_source.rs @@ -117,10 +117,12 @@ impl Source for CachedSource { } fn size(&self) -> usize { - if let Some(chunks) = self.cache.chunks.get() { - return chunks.iter().map(|chunk| chunk.len()).sum(); - } - *self.cache.size.get_or_init(|| self.inner.size()) + *self.cache.size.get_or_init(|| { + if let Some(chunks) = self.cache.chunks.get() { + return chunks.iter().fold(0, |acc, chunk| acc + chunk.len()); + } + self.inner.size() + }) } fn map(