Skip to content

Commit 045e2c0

Browse files
committed
perf: WithUtf16
1 parent 746949b commit 045e2c0

File tree

2 files changed

+31
-11
lines changed

2 files changed

+31
-11
lines changed

src/helpers.rs

Lines changed: 2 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -460,10 +460,9 @@ fn stream_chunks_of_source_map_full<'a>(
460460
on_source: OnSource<'_, 'a>,
461461
on_name: OnName<'_, 'a>,
462462
) -> GeneratedInfo {
463-
let a = split_into_lines(source);
464-
let lines: Vec<WithUtf16<'a, 'a>> = a
463+
let lines = split_into_lines(source)
465464
.map(|line| WithUtf16::new(object_pool, line))
466-
.collect::<Vec<_>>();
465+
.collect::<Vec<WithUtf16<'a, 'a>>>();
467466

468467
if lines.is_empty() {
469468
return GeneratedInfo {

src/with_utf16.rs

Lines changed: 29 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -7,7 +7,7 @@ pub struct WithUtf16<'object_pool, 'text> {
77
/// line is a string reference
88
pub line: &'text str,
99
/// the byte position of each `char` in `line` string slice .
10-
pub utf16_byte_indices: OnceCell<Pooled<'object_pool>>,
10+
pub utf16_byte_indices: OnceCell<Option<Pooled<'object_pool>>>,
1111
object_pool: &'object_pool ObjectPool,
1212
}
1313

@@ -21,8 +21,13 @@ impl<'object_pool, 'text> WithUtf16<'object_pool, 'text> {
2121
}
2222

2323
/// substring::SubString with cache
24-
pub fn substring(&self, start_index: usize, end_index: usize) -> &'text str {
25-
if end_index <= start_index {
24+
#[allow(unsafe_code)]
25+
pub fn substring(
26+
&self,
27+
start_utf16_index: usize,
28+
end_utf16_index: usize,
29+
) -> &'text str {
30+
if end_utf16_index <= start_utf16_index {
2631
return "";
2732
}
2833

@@ -38,14 +43,30 @@ impl<'object_pool, 'text> WithUtf16<'object_pool, 'text> {
3843
_ => unreachable!(),
3944
}
4045
}
41-
vec
46+
if vec.len() == self.line.len() {
47+
// Optimization: UTF-16 length equals UTF-8 length, indicating no surrogate pairs.
48+
// Return None to release the vector back to the object pool for better memory efficiency.
49+
None
50+
} else {
51+
Some(vec)
52+
}
4253
});
4354

44-
let str_len = self.line.len();
45-
let start = *utf16_byte_indices.get(start_index).unwrap_or(&str_len);
46-
let end = *utf16_byte_indices.get(end_index).unwrap_or(&str_len);
55+
let utf8_len = self.line.len();
56+
57+
let Some(utf16_byte_indices) = utf16_byte_indices else {
58+
let start_utf16_index = start_utf16_index.min(utf8_len);
59+
let end_utf16_index = end_utf16_index.min(utf8_len);
60+
return unsafe {
61+
self.line.get_unchecked(start_utf16_index..end_utf16_index)
62+
};
63+
};
64+
65+
let start = *utf16_byte_indices
66+
.get(start_utf16_index)
67+
.unwrap_or(&utf8_len);
68+
let end = *utf16_byte_indices.get(end_utf16_index).unwrap_or(&utf8_len);
4769

48-
#[allow(unsafe_code)]
4970
unsafe {
5071
// SAFETY: Since `indices` iterates over the `CharIndices` of `self`, we can guarantee
5172
// that the indices obtained from it will always be within the bounds of `self` and they

0 commit comments

Comments
 (0)