@@ -91,18 +91,11 @@ pub trait Chunks {
9191/// [StreamChunks] abstraction, see [webpack-sources source.streamChunks](https://github.com/webpack/webpack-sources/blob/9f98066311d53a153fdc7c633422a1d086528027/lib/helpers/streamChunks.js#L13).
9292pub trait StreamChunks {
9393 /// [StreamChunks] abstraction
94- fn stream_chunks < ' a > (
95- & ' a self ,
96- // object_pool: &'a ObjectPool,
97- // options: &MapOptions,
98- // on_chunk: OnChunk<'_, 'a>,
99- // on_source: OnSource<'_, 'a>,
100- // on_name: OnName<'_, 'a>,
101- ) -> Box < dyn Chunks + ' a > ;
94+ fn stream_chunks < ' a > ( & ' a self ) -> Box < dyn Chunks + ' a > ;
10295}
10396
10497/// [OnChunk] abstraction, see [webpack-sources onChunk](https://github.com/webpack/webpack-sources/blob/9f98066311d53a153fdc7c633422a1d086528027/lib/helpers/streamChunks.js#L13).
105- pub type OnChunk < ' a , ' b > = & ' a mut dyn FnMut ( Option < Cow < ' b , str > > , Mapping ) ;
98+ pub type OnChunk < ' a , ' b > = & ' a mut dyn FnMut ( Option < & ' b str > , Mapping ) ;
10699
107100/// [OnSource] abstraction, see [webpack-sources onSource](https://github.com/webpack/webpack-sources/blob/9f98066311d53a153fdc7c633422a1d086528027/lib/helpers/streamChunks.js#L13).
108101///
@@ -160,57 +153,94 @@ pub fn encode_mappings(mappings: impl Iterator<Item = Mapping>) -> String {
160153 encoder. drain ( )
161154}
162155
156+ #[ derive( Debug , PartialEq , Eq ) ]
157+ pub struct Token < ' a > {
158+ pub text : & ' a str ,
159+ pub utf16_len : usize ,
160+ }
161+
163162pub struct PotentialTokens < ' a > {
164163 bytes : & ' a [ u8 ] ,
165- source : & ' a str ,
164+ text : & ' a str ,
166165 index : usize ,
166+ utf16_len : usize ,
167167}
168168
169169impl < ' a > Iterator for PotentialTokens < ' a > {
170- type Item = & ' a str ;
170+ type Item = Token < ' a > ;
171171
172+ #[ allow( unsafe_code) ]
172173 fn next ( & mut self ) -> Option < Self :: Item > {
173- if let Some ( & c) = self . bytes . get ( self . index ) {
174- let start = self . index ;
175- let mut c = char:: from ( c) ;
176- while c != '\n' && c != ';' && c != '{' && c != '}' {
174+ if self . index >= self . bytes . len ( ) {
175+ return None ;
176+ }
177+
178+ let start = self . index ;
179+ let mut c = self . bytes [ self . index ] ;
180+ while c != b'\n' && c != b';' && c != b'{' && c != b'}' {
181+ // Determine character boundaries based on UTF-8 bytes and calculate UTF-16 length
182+ if c < 0x80 {
183+ // ASCII character: 1 byte -> 1 UTF-16 code unit
184+ self . utf16_len += 1 ;
177185 self . index += 1 ;
178- if let Some ( & ch) = self . bytes . get ( self . index ) {
179- c = char:: from ( ch) ;
180- } else {
181- return Some ( & self . source [ start..self . index ] ) ;
182- }
186+ } else if c < 0xE0 {
187+ // 2-byte UTF-8 sequence -> 1 UTF-16 code unit
188+ self . utf16_len += 1 ;
189+ self . index += 2 ;
190+ } else if c < 0xF0 {
191+ // 3-byte UTF-8 sequence -> 1 UTF-16 code unit
192+ self . utf16_len += 1 ;
193+ self . index += 3 ;
194+ } else {
195+ // 4-byte UTF-8 sequence -> 2 UTF-16 code units (surrogate pair)
196+ self . utf16_len += 2 ;
197+ self . index += 4 ;
183198 }
184- while c == ';'
185- || c == ' '
186- || c == '{'
187- || c == '}'
188- || c == '\r'
189- || c == '\t'
190- {
191- self . index += 1 ;
192- if let Some ( & ch) = self . bytes . get ( self . index ) {
193- c = char:: from ( ch) ;
194- } else {
195- return Some ( & self . source [ start..self . index ] ) ;
196- }
199+
200+ if self . index < self . bytes . len ( ) {
201+ c = self . bytes [ self . index ] ;
202+ } else {
203+ let text = unsafe { self . text . get_unchecked ( start..) } ;
204+ return Some ( Token {
205+ text,
206+ utf16_len : self . utf16_len ,
207+ } ) ;
197208 }
198- if c == '\n' {
199- self . index += 1 ;
209+ }
210+
211+ while self . index < self . bytes . len ( ) {
212+ match self . bytes [ self . index ] {
213+ b';' | b' ' | b'{' | b'}' | b'\r' | b'\t' => {
214+ self . index += 1 ;
215+ self . utf16_len += 1 ;
216+ }
217+ b'\n' => {
218+ self . index += 1 ;
219+ self . utf16_len += 1 ;
220+ break ;
221+ }
222+ _ => {
223+ break ;
224+ }
200225 }
201- Some ( & self . source [ start..self . index ] )
202- } else {
203- None
204226 }
227+ let text = unsafe { self . text . get_unchecked ( start..self . index ) } ;
228+ let token = Token {
229+ text,
230+ utf16_len : self . utf16_len ,
231+ } ;
232+ self . utf16_len = 0 ;
233+ Some ( token)
205234 }
206235}
207236
208237// /[^\n;{}]+[;{} \r\t]*\n?|[;{} \r\t]+\n?|\n/g
209- pub fn split_into_potential_tokens ( source : & str ) -> PotentialTokens {
238+ pub fn split_into_potential_tokens ( text : & str ) -> PotentialTokens {
210239 PotentialTokens {
211- bytes : source . as_bytes ( ) ,
212- source ,
240+ bytes : text . as_bytes ( ) ,
241+ text ,
213242 index : 0 ,
243+ utf16_len : 0 ,
214244 }
215245}
216246
@@ -283,7 +313,7 @@ pub fn stream_chunks_of_raw_source<'a>(
283313 let mut last_line = None ;
284314 for l in split_into_lines ( source) {
285315 on_chunk (
286- Some ( Cow :: Borrowed ( l ) ) ,
316+ Some ( l ) ,
287317 Mapping {
288318 generated_line : line,
289319 generated_column : 0 ,
@@ -487,7 +517,7 @@ fn stream_chunks_of_source_map_full<'a>(
487517 }
488518 if !chunk. is_empty ( ) {
489519 on_chunk (
490- Some ( Cow :: Borrowed ( chunk) ) ,
520+ Some ( chunk) ,
491521 Mapping {
492522 generated_line : mapping_line,
493523 generated_column : mapping_column,
@@ -504,7 +534,7 @@ fn stream_chunks_of_source_map_full<'a>(
504534 let chunk = lines[ ( current_generated_line - 1 ) as usize ]
505535 . substring ( current_generated_column as usize , usize:: MAX ) ;
506536 on_chunk (
507- Some ( Cow :: Borrowed ( chunk) ) ,
537+ Some ( chunk) ,
508538 Mapping {
509539 generated_line : current_generated_line,
510540 generated_column : current_generated_column,
@@ -519,7 +549,7 @@ fn stream_chunks_of_source_map_full<'a>(
519549 if current_generated_line as usize <= lines. len ( ) {
520550 let chunk = & lines[ ( current_generated_line as usize ) - 1 ] . line ;
521551 on_chunk (
522- Some ( Cow :: Borrowed ( chunk) ) ,
552+ Some ( chunk) ,
523553 Mapping {
524554 generated_line : current_generated_line,
525555 generated_column : 0 ,
@@ -536,7 +566,7 @@ fn stream_chunks_of_source_map_full<'a>(
536566 mapping. generated_column as usize ,
537567 ) ;
538568 on_chunk (
539- Some ( Cow :: Borrowed ( chunk) ) ,
569+ Some ( chunk) ,
540570 Mapping {
541571 generated_line : current_generated_line,
542572 generated_column : current_generated_column,
@@ -648,7 +678,7 @@ fn stream_chunks_of_source_map_lines_full<'a>(
648678 if current_generated_line as usize <= lines. len ( ) {
649679 let chunk = & lines[ current_generated_line as usize - 1 ] ;
650680 on_chunk (
651- Some ( Cow :: Borrowed ( chunk) ) ,
681+ Some ( chunk) ,
652682 Mapping {
653683 generated_line : current_generated_line,
654684 generated_column : 0 ,
@@ -666,7 +696,7 @@ fn stream_chunks_of_source_map_lines_full<'a>(
666696 let chunk = & lines[ current_generated_line as usize - 1 ] ;
667697 mapping. generated_column = 0 ;
668698 original. name_index = None ;
669- on_chunk ( Some ( Cow :: Borrowed ( chunk) ) , mapping) ;
699+ on_chunk ( Some ( chunk) , mapping) ;
670700 current_generated_line += 1 ;
671701 }
672702 } ;
@@ -676,7 +706,7 @@ fn stream_chunks_of_source_map_lines_full<'a>(
676706 while current_generated_line as usize <= lines. len ( ) {
677707 let chunk = & lines[ current_generated_line as usize - 1 ] ;
678708 on_chunk (
679- Some ( Cow :: Borrowed ( chunk) ) ,
709+ Some ( chunk) ,
680710 Mapping {
681711 generated_line : current_generated_line,
682712 generated_column : 0 ,
@@ -706,7 +736,7 @@ fn stream_chunks_of_source_map_lines_full<'a>(
706736#[ derive( Debug ) ]
707737struct SourceMapLineData < ' a > {
708738 pub mappings_data : Vec < i64 > ,
709- pub chunks : Vec < Cow < ' a , str > > ,
739+ pub chunks : Vec < & ' a str > ,
710740}
711741
712742type InnerSourceIndexValueMapping < ' a > =
@@ -1257,11 +1287,13 @@ mod tests {
12571287 use std:: sync:: LazyLock ;
12581288
12591289 use super :: {
1260- stream_chunks_of_source_map_final , stream_chunks_of_source_map_full ,
1261- stream_chunks_of_source_map_lines_final,
1290+ split_into_potential_tokens , stream_chunks_of_source_map_final ,
1291+ stream_chunks_of_source_map_full , stream_chunks_of_source_map_lines_final,
12621292 stream_chunks_of_source_map_lines_full, GeneratedInfo ,
12631293 } ;
1264- use crate :: { Mapping , ObjectPool , OriginalLocation , SourceMap } ;
1294+ use crate :: {
1295+ helpers:: Token , Mapping , ObjectPool , OriginalLocation , SourceMap ,
1296+ } ;
12651297
12661298 const UTF16_SOURCE : & ' static str = "var i18n = JSON.parse('{\" 魑魅魍魉\" :{\" en-US\" :\" Evil spirits\" ,\" zh-CN\" :\" 魑魅魍魉\" }}');\n var __webpack_exports___ = i18n[\" 魑魅魍魉\" ];\n export { __webpack_exports___ as 魑魅魍魉 };" ;
12671299
@@ -1291,22 +1323,22 @@ mod tests {
12911323 assert_eq ! (
12921324 chunks,
12931325 vec![
1294- ( "var " . into ( ) , Mapping { generated_line: 1 , generated_column: 0 , original: Some ( OriginalLocation { source_index: 0 , original_line: 1 , original_column: 0 , name_index: None } ) } ) ,
1295- ( "i18n = " . into ( ) , Mapping { generated_line: 1 , generated_column: 4 , original: Some ( OriginalLocation { source_index: 0 , original_line: 1 , original_column: 4 , name_index: Some ( 0 ) } ) } ) ,
1296- ( "JSON." . into ( ) , Mapping { generated_line: 1 , generated_column: 11 , original: Some ( OriginalLocation { source_index: 0 , original_line: 1 , original_column: 11 , name_index: Some ( 1 ) } ) } ) ,
1297- ( "parse" . into ( ) , Mapping { generated_line: 1 , generated_column: 16 , original: Some ( OriginalLocation { source_index: 0 , original_line: 1 , original_column: 16 , name_index: None } ) } ) ,
1298- ( "(" . into ( ) , Mapping { generated_line: 1 , generated_column: 21 , original: Some ( OriginalLocation { source_index: 0 , original_line: 1 , original_column: 21 , name_index: None } ) } ) ,
1299- ( "'{\" 魑魅魍魉\" :{\" en-US\" :\" Evil spirits\" ,\" zh-CN\" :\" 魑魅魍魉\" }}');\n " . into ( ) , Mapping { generated_line: 1 , generated_column: 22 , original: Some ( OriginalLocation { source_index: 0 , original_line: 1 , original_column: 22 , name_index: None } ) } ) ,
1300- ( "var " . into ( ) , Mapping { generated_line: 2 , generated_column: 0 , original: Some ( OriginalLocation { source_index: 0 , original_line: 2 , original_column: 0 , name_index: None } ) } ) ,
1301- ( "__webpack_exports___ = " . into ( ) , Mapping { generated_line: 2 , generated_column: 4 , original: Some ( OriginalLocation { source_index: 0 , original_line: 2 , original_column: 4 , name_index: Some ( 2 ) } ) } ) ,
1302- ( "i18n" . into ( ) , Mapping { generated_line: 2 , generated_column: 27 , original: Some ( OriginalLocation { source_index: 0 , original_line: 2 , original_column: 27 , name_index: Some ( 0 ) } ) } ) ,
1303- ( "[" . into ( ) , Mapping { generated_line: 2 , generated_column: 31 , original: Some ( OriginalLocation { source_index: 0 , original_line: 2 , original_column: 31 , name_index: None } ) } ) ,
1304- ( "\" 魑魅魍魉\" ]" . into ( ) , Mapping { generated_line: 2 , generated_column: 32 , original: Some ( OriginalLocation { source_index: 0 , original_line: 2 , original_column: 32 , name_index: None } ) } ) ,
1305- ( ";\n " . into ( ) , Mapping { generated_line: 2 , generated_column: 39 , original: Some ( OriginalLocation { source_index: 0 , original_line: 2 , original_column: 39 , name_index: None } ) } ) ,
1306- ( "export { " . into ( ) , Mapping { generated_line: 3 , generated_column: 0 , original: Some ( OriginalLocation { source_index: 0 , original_line: 3 , original_column: 0 , name_index: None } ) } ) ,
1307- ( "__webpack_exports___ as " . into ( ) , Mapping { generated_line: 3 , generated_column: 9 , original: Some ( OriginalLocation { source_index: 0 , original_line: 3 , original_column: 9 , name_index: Some ( 2 ) } ) } ) ,
1308- ( "魑魅魍魉" . into ( ) , Mapping { generated_line: 3 , generated_column: 33 , original: Some ( OriginalLocation { source_index: 0 , original_line: 3 , original_column: 33 , name_index: Some ( 3 ) } ) } ) ,
1309- ( " };" . into ( ) , Mapping { generated_line: 3 , generated_column: 37 , original: Some ( OriginalLocation { source_index: 0 , original_line: 3 , original_column: 37 , name_index: None } ) } )
1326+ ( "var " , Mapping { generated_line: 1 , generated_column: 0 , original: Some ( OriginalLocation { source_index: 0 , original_line: 1 , original_column: 0 , name_index: None } ) } ) ,
1327+ ( "i18n = " , Mapping { generated_line: 1 , generated_column: 4 , original: Some ( OriginalLocation { source_index: 0 , original_line: 1 , original_column: 4 , name_index: Some ( 0 ) } ) } ) ,
1328+ ( "JSON." , Mapping { generated_line: 1 , generated_column: 11 , original: Some ( OriginalLocation { source_index: 0 , original_line: 1 , original_column: 11 , name_index: Some ( 1 ) } ) } ) ,
1329+ ( "parse" , Mapping { generated_line: 1 , generated_column: 16 , original: Some ( OriginalLocation { source_index: 0 , original_line: 1 , original_column: 16 , name_index: None } ) } ) ,
1330+ ( "(" , Mapping { generated_line: 1 , generated_column: 21 , original: Some ( OriginalLocation { source_index: 0 , original_line: 1 , original_column: 21 , name_index: None } ) } ) ,
1331+ ( "'{\" 魑魅魍魉\" :{\" en-US\" :\" Evil spirits\" ,\" zh-CN\" :\" 魑魅魍魉\" }}');\n " , Mapping { generated_line: 1 , generated_column: 22 , original: Some ( OriginalLocation { source_index: 0 , original_line: 1 , original_column: 22 , name_index: None } ) } ) ,
1332+ ( "var " , Mapping { generated_line: 2 , generated_column: 0 , original: Some ( OriginalLocation { source_index: 0 , original_line: 2 , original_column: 0 , name_index: None } ) } ) ,
1333+ ( "__webpack_exports___ = " , Mapping { generated_line: 2 , generated_column: 4 , original: Some ( OriginalLocation { source_index: 0 , original_line: 2 , original_column: 4 , name_index: Some ( 2 ) } ) } ) ,
1334+ ( "i18n" , Mapping { generated_line: 2 , generated_column: 27 , original: Some ( OriginalLocation { source_index: 0 , original_line: 2 , original_column: 27 , name_index: Some ( 0 ) } ) } ) ,
1335+ ( "[" , Mapping { generated_line: 2 , generated_column: 31 , original: Some ( OriginalLocation { source_index: 0 , original_line: 2 , original_column: 31 , name_index: None } ) } ) ,
1336+ ( "\" 魑魅魍魉\" ]" , Mapping { generated_line: 2 , generated_column: 32 , original: Some ( OriginalLocation { source_index: 0 , original_line: 2 , original_column: 32 , name_index: None } ) } ) ,
1337+ ( ";\n " , Mapping { generated_line: 2 , generated_column: 39 , original: Some ( OriginalLocation { source_index: 0 , original_line: 2 , original_column: 39 , name_index: None } ) } ) ,
1338+ ( "export { " , Mapping { generated_line: 3 , generated_column: 0 , original: Some ( OriginalLocation { source_index: 0 , original_line: 3 , original_column: 0 , name_index: None } ) } ) ,
1339+ ( "__webpack_exports___ as " , Mapping { generated_line: 3 , generated_column: 9 , original: Some ( OriginalLocation { source_index: 0 , original_line: 3 , original_column: 9 , name_index: Some ( 2 ) } ) } ) ,
1340+ ( "魑魅魍魉" , Mapping { generated_line: 3 , generated_column: 33 , original: Some ( OriginalLocation { source_index: 0 , original_line: 3 , original_column: 33 , name_index: Some ( 3 ) } ) } ) ,
1341+ ( " };" , Mapping { generated_line: 3 , generated_column: 37 , original: Some ( OriginalLocation { source_index: 0 , original_line: 3 , original_column: 37 , name_index: None } ) } )
13101342 ]
13111343 ) ;
13121344
@@ -1384,4 +1416,42 @@ mod tests {
13841416 }
13851417 )
13861418 }
1419+
1420+ #[ test]
1421+ fn test_split_into_potential_tokens ( ) {
1422+ let tokens = split_into_potential_tokens ( "var i18n = JSON.parse('{\" 魑魅魍魉\" :{\" en-US\" :\" Evil spirits\" ,\" zh-CN\" :\" 魑魅魍魉\" }}');\n var __webpack_exports___ = i18n[\" 魑魅魍魉\" ];\n export { __webpack_exports___ as 魑魅魍魉 };" ) . collect :: < Vec < _ > > ( ) ;
1423+ assert_eq ! (
1424+ tokens,
1425+ vec![
1426+ Token {
1427+ text: "var i18n = JSON.parse('{" ,
1428+ utf16_len: 24 ,
1429+ } ,
1430+ Token {
1431+ text: "\" 魑魅魍魉\" :{" ,
1432+ utf16_len: 8 ,
1433+ } ,
1434+ Token {
1435+ text: "\" en-US\" :\" Evil spirits\" ,\" zh-CN\" :\" 魑魅魍魉\" }}" ,
1436+ utf16_len: 39 ,
1437+ } ,
1438+ Token {
1439+ text: "');\n " ,
1440+ utf16_len: 4 ,
1441+ } ,
1442+ Token {
1443+ text: "var __webpack_exports___ = i18n[\" 魑魅魍魉\" ];\n " ,
1444+ utf16_len: 41 ,
1445+ } ,
1446+ Token {
1447+ text: "export { " ,
1448+ utf16_len: 9 ,
1449+ } ,
1450+ Token {
1451+ text: "__webpack_exports___ as 魑魅魍魉 };" ,
1452+ utf16_len: 31 ,
1453+ } ,
1454+ ]
1455+ ) ;
1456+ }
13871457}
0 commit comments