@@ -7,7 +7,7 @@ pub struct WithUtf16<'object_pool, 'text> {
77 /// line is a string reference
88 pub line : & ' text str ,
99 /// the byte position of each `char` in `line` string slice .
10- pub utf16_byte_indices : OnceCell < Pooled < ' object_pool > > ,
10+ pub utf16_byte_indices : OnceCell < Option < Pooled < ' object_pool > > > ,
1111 object_pool : & ' object_pool ObjectPool ,
1212}
1313
@@ -21,8 +21,13 @@ impl<'object_pool, 'text> WithUtf16<'object_pool, 'text> {
2121 }
2222
2323 /// substring::SubString with cache
24- pub fn substring ( & self , start_index : usize , end_index : usize ) -> & ' text str {
25- if end_index <= start_index {
24+ #[ allow( unsafe_code) ]
25+ pub fn substring (
26+ & self ,
27+ start_utf16_index : usize ,
28+ end_utf16_index : usize ,
29+ ) -> & ' text str {
30+ if end_utf16_index <= start_utf16_index {
2631 return "" ;
2732 }
2833
@@ -38,14 +43,30 @@ impl<'object_pool, 'text> WithUtf16<'object_pool, 'text> {
3843 _ => unreachable ! ( ) ,
3944 }
4045 }
41- vec
46+ if vec. len ( ) == self . line . len ( ) {
47+ // Optimization: UTF-16 length equals UTF-8 length, indicating no surrogate pairs.
48+ // Return None to release the vector back to the object pool for better memory efficiency.
49+ None
50+ } else {
51+ Some ( vec)
52+ }
4253 } ) ;
4354
44- let str_len = self . line . len ( ) ;
45- let start = * utf16_byte_indices. get ( start_index) . unwrap_or ( & str_len) ;
46- let end = * utf16_byte_indices. get ( end_index) . unwrap_or ( & str_len) ;
55+ let utf8_len = self . line . len ( ) ;
56+
57+ let Some ( utf16_byte_indices) = utf16_byte_indices else {
58+ let start_utf16_index = start_utf16_index. min ( utf8_len) ;
59+ let end_utf16_index = end_utf16_index. min ( utf8_len) ;
60+ return unsafe {
61+ self . line . get_unchecked ( start_utf16_index..end_utf16_index)
62+ } ;
63+ } ;
64+
65+ let start = * utf16_byte_indices
66+ . get ( start_utf16_index)
67+ . unwrap_or ( & utf8_len) ;
68+ let end = * utf16_byte_indices. get ( end_utf16_index) . unwrap_or ( & utf8_len) ;
4769
48- #[ allow( unsafe_code) ]
4970 unsafe {
5071 // SAFETY: Since `indices` iterates over the `CharIndices` of `self`, we can guarantee
5172 // that the indices obtained from it will always be within the bounds of `self` and they
0 commit comments