Skip to content

Commit b1eb535

Browse files
authored
fix: replace source for multi byte chars (#198)
1 parent 4310b76 commit b1eb535

File tree

6 files changed

+236
-41
lines changed

6 files changed

+236
-41
lines changed

src/helpers.rs

Lines changed: 158 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -15,7 +15,7 @@ use crate::{
1515
object_pool::ObjectPool,
1616
source::{Mapping, OriginalLocation},
1717
source_content_lines::SourceContentLines,
18-
with_indices::WithIndices,
18+
with_utf16::WithUtf16,
1919
MapOptions, Rope, SourceMap,
2020
};
2121

@@ -259,7 +259,7 @@ where
259259
last_line = line;
260260
}
261261

262-
(line_count.max(1), last_line.len())
262+
(line_count.max(1), last_line.utf16_len())
263263
};
264264
GeneratedInfo {
265265
generated_line: generated_line as u32,
@@ -443,7 +443,7 @@ where
443443
{
444444
let lines = split_into_lines(&source);
445445
let line_with_indices_list = lines
446-
.map(|line| WithIndices::new(object_pool, line))
446+
.map(|line| WithUtf16::new(object_pool, line))
447447
.collect::<Vec<_>>();
448448

449449
if line_with_indices_list.is_empty() {
@@ -470,8 +470,11 @@ where
470470
} else {
471471
line_with_indices_list.len()
472472
} as u32;
473-
let final_column: u32 =
474-
if last_new_line { 0 } else { last_line.len() } as u32;
473+
let final_column: u32 = if last_new_line {
474+
0
475+
} else {
476+
last_line.utf16_len()
477+
} as u32;
475478
let mut current_generated_line: u32 = 1;
476479
let mut current_generated_column: u32 = 0;
477480
let mut mapping_active = false;
@@ -713,7 +716,11 @@ where
713716
} else {
714717
lines.len()
715718
} as u32;
716-
let final_column = if last_new_line { 0 } else { last_line.len() } as u32;
719+
let final_column = if last_new_line {
720+
0
721+
} else {
722+
last_line.utf16_len()
723+
} as u32;
717724
GeneratedInfo {
718725
generated_line: final_line,
719726
generated_column: final_column,
@@ -1300,6 +1307,9 @@ pub trait SourceText<'a>: Default + Clone + ToString {
13001307
/// Returns the length of the text in bytes.
13011308
fn len(&self) -> usize;
13021309

1310+
/// Returns the utf16 length of the text in bytes.
1311+
fn utf16_len(&self) -> usize;
1312+
13031313
/// Converts this text into a Rope.
13041314
fn into_rope(self) -> Rope<'a>
13051315
where
@@ -1349,6 +1359,10 @@ impl<'a> SourceText<'a> for Rope<'a> {
13491359
fn get_byte(&self, byte_index: usize) -> Option<u8> {
13501360
self.get_byte(byte_index)
13511361
}
1362+
1363+
fn utf16_len(&self) -> usize {
1364+
self.utf16_len()
1365+
}
13521366
}
13531367

13541368
impl<'a> SourceText<'a> for &'a str {
@@ -1391,4 +1405,142 @@ impl<'a> SourceText<'a> for &'a str {
13911405
fn get_byte(&self, byte_index: usize) -> Option<u8> {
13921406
self.as_bytes().get(byte_index).copied()
13931407
}
1408+
1409+
fn utf16_len(&self) -> usize {
1410+
self.encode_utf16().count()
1411+
}
1412+
}
1413+
1414+
#[cfg(test)]
1415+
mod tests {
1416+
use std::sync::LazyLock;
1417+
1418+
use super::{
1419+
stream_chunks_of_source_map_final, stream_chunks_of_source_map_full,
1420+
stream_chunks_of_source_map_lines_final,
1421+
stream_chunks_of_source_map_lines_full, GeneratedInfo,
1422+
};
1423+
use crate::{Mapping, ObjectPool, OriginalLocation, SourceMap};
1424+
1425+
const UTF16_SOURCE: &'static str = "var i18n = JSON.parse('{\"魑魅魍魉\":{\"en-US\":\"Evil spirits\",\"zh-CN\":\"魑魅魍魉\"}}');\nvar __webpack_exports___ = i18n[\"魑魅魍魉\"];\nexport { __webpack_exports___ as 魑魅魍魉 };";
1426+
1427+
const UTF16_SOURCE_MAP: LazyLock<SourceMap> = LazyLock::new(|| {
1428+
SourceMap::from_json("{\"version\":3,\"sources\":[\"i18.js\"],\"sourcesContent\":[\"var i18n = JSON.parse('{\\\"魑魅魍魉\\\":{\\\"en-US\\\":\\\"Evil spirits\\\",\\\"zh-CN\\\":\\\"魑魅魍魉\\\"}}');\\nvar __webpack_exports___ = i18n[\\\"魑魅魍魉\\\"];\\nexport { __webpack_exports___ as 魑魅魍魉 };\\n\"],\"names\":[\"i18n\",\"JSON\",\"__webpack_exports___\",\"魑魅魍魉\"],\"mappings\":\"AAAA,IAAIA,OAAOC,KAAK,KAAK,CAAC;AACtB,IAAIC,uBAAuBF,IAAI,CAAC,OAAO;AACvC,SAASE,wBAAwBC,IAAI,GAAG\"}").unwrap()
1429+
});
1430+
1431+
#[test]
1432+
fn test_stream_chunks_of_source_map_full_handles_multi_unit_utf16() {
1433+
let source = UTF16_SOURCE;
1434+
let source_map = &*UTF16_SOURCE_MAP;
1435+
let object_pool = ObjectPool::default();
1436+
1437+
let mut chunks = vec![];
1438+
1439+
let generated_info = stream_chunks_of_source_map_full(
1440+
&object_pool,
1441+
source,
1442+
source_map,
1443+
&mut |chunk, mapping| {
1444+
chunks.push((chunk.unwrap(), mapping));
1445+
},
1446+
&mut |_i, _source, _source_content| {},
1447+
&mut |_i, _name| {},
1448+
);
1449+
1450+
assert_eq!(
1451+
chunks,
1452+
vec![
1453+
("var ".into(), Mapping { generated_line: 1, generated_column: 0, original: Some(OriginalLocation { source_index: 0, original_line: 1, original_column: 0, name_index: None }) }),
1454+
("i18n = ".into(), Mapping { generated_line: 1, generated_column: 4, original: Some(OriginalLocation { source_index: 0, original_line: 1, original_column: 4, name_index: Some(0) }) }),
1455+
("JSON.".into(), Mapping { generated_line: 1, generated_column: 11, original: Some(OriginalLocation { source_index: 0, original_line: 1, original_column: 11, name_index: Some(1) }) }),
1456+
("parse".into(), Mapping { generated_line: 1, generated_column: 16, original: Some(OriginalLocation { source_index: 0, original_line: 1, original_column: 16, name_index: None }) }),
1457+
("(".into(), Mapping { generated_line: 1, generated_column: 21, original: Some(OriginalLocation { source_index: 0, original_line: 1, original_column: 21, name_index: None }) }),
1458+
("'{\"魑魅魍魉\":{\"en-US\":\"Evil spirits\",\"zh-CN\":\"魑魅魍魉\"}}');\n".into(), Mapping { generated_line: 1, generated_column: 22, original: Some(OriginalLocation { source_index: 0, original_line: 1, original_column: 22, name_index: None }) }),
1459+
("var ".into(), Mapping { generated_line: 2, generated_column: 0, original: Some(OriginalLocation { source_index: 0, original_line: 2, original_column: 0, name_index: None }) }),
1460+
("__webpack_exports___ = ".into(), Mapping { generated_line: 2, generated_column: 4, original: Some(OriginalLocation { source_index: 0, original_line: 2, original_column: 4, name_index: Some(2) }) }),
1461+
("i18n".into(), Mapping { generated_line: 2, generated_column: 27, original: Some(OriginalLocation { source_index: 0, original_line: 2, original_column: 27, name_index: Some(0) }) }),
1462+
("[".into(), Mapping { generated_line: 2, generated_column: 31, original: Some(OriginalLocation { source_index: 0, original_line: 2, original_column: 31, name_index: None }) }),
1463+
("\"魑魅魍魉\"]".into(), Mapping { generated_line: 2, generated_column: 32, original: Some(OriginalLocation { source_index: 0, original_line: 2, original_column: 32, name_index: None }) }),
1464+
(";\n".into(), Mapping { generated_line: 2, generated_column: 39, original: Some(OriginalLocation { source_index: 0, original_line: 2, original_column: 39, name_index: None }) }),
1465+
("export { ".into(), Mapping { generated_line: 3, generated_column: 0, original: Some(OriginalLocation { source_index: 0, original_line: 3, original_column: 0, name_index: None }) }),
1466+
("__webpack_exports___ as ".into(), Mapping { generated_line: 3, generated_column: 9, original: Some(OriginalLocation { source_index: 0, original_line: 3, original_column: 9, name_index: Some(2) }) }),
1467+
("魑魅魍魉".into(), Mapping { generated_line: 3, generated_column: 33, original: Some(OriginalLocation { source_index: 0, original_line: 3, original_column: 33, name_index: Some(3) }) }),
1468+
(" };".into(), Mapping { generated_line: 3, generated_column: 37, original: Some(OriginalLocation { source_index: 0, original_line: 3, original_column: 37, name_index: None }) })
1469+
]
1470+
);
1471+
1472+
assert_eq!(
1473+
generated_info,
1474+
GeneratedInfo {
1475+
generated_line: 3,
1476+
generated_column: 40
1477+
}
1478+
)
1479+
}
1480+
1481+
#[test]
1482+
fn test_stream_chunks_of_source_map_final_handles_multi_unit_utf16() {
1483+
let source = UTF16_SOURCE;
1484+
let source_map = &*UTF16_SOURCE_MAP;
1485+
1486+
let generated_info = stream_chunks_of_source_map_final(
1487+
source,
1488+
source_map,
1489+
&mut |_chunk, _mapping| {},
1490+
&mut |_i, _source, _source_content| {},
1491+
&mut |_i, _name| {},
1492+
);
1493+
1494+
assert_eq!(
1495+
generated_info,
1496+
GeneratedInfo {
1497+
generated_line: 3,
1498+
generated_column: 40
1499+
}
1500+
)
1501+
}
1502+
1503+
#[test]
1504+
fn test_stream_chunks_of_source_map_lines_final_handles_multi_unit_utf16() {
1505+
let source = UTF16_SOURCE;
1506+
let source_map = &*UTF16_SOURCE_MAP;
1507+
1508+
let generated_info = stream_chunks_of_source_map_lines_final(
1509+
source,
1510+
source_map,
1511+
&mut |_chunk, _mapping| {},
1512+
&mut |_i, _source, _source_content| {},
1513+
&mut |_i, _name| {},
1514+
);
1515+
1516+
assert_eq!(
1517+
generated_info,
1518+
GeneratedInfo {
1519+
generated_line: 3,
1520+
generated_column: 40
1521+
}
1522+
)
1523+
}
1524+
1525+
#[test]
1526+
fn test_stream_chunks_of_source_map_lines_full_handles_multi_unit_utf16() {
1527+
let source = UTF16_SOURCE;
1528+
let source_map = &*UTF16_SOURCE_MAP;
1529+
1530+
let generated_info = stream_chunks_of_source_map_lines_full(
1531+
source,
1532+
source_map,
1533+
&mut |_chunk, _mapping| {},
1534+
&mut |_i, _source, _source_content| {},
1535+
&mut |_i, _name| {},
1536+
);
1537+
1538+
assert_eq!(
1539+
generated_info,
1540+
GeneratedInfo {
1541+
generated_line: 3,
1542+
generated_column: 40
1543+
}
1544+
)
1545+
}
13941546
}

src/lib.rs

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -15,7 +15,7 @@ mod rope;
1515
mod source;
1616
mod source_content_lines;
1717
mod source_map_source;
18-
mod with_indices;
18+
mod with_utf16;
1919

2020
pub use cached_source::CachedSource;
2121
pub use concat_source::ConcatSource;

src/replace_source.rs

Lines changed: 36 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -443,9 +443,9 @@ impl StreamChunks for ReplaceSource {
443443
generated_column_offset += mapping.generated_column as i64;
444444
}
445445
} else if generated_column_offset_line == line {
446-
generated_column_offset -= chunk.len() as i64;
446+
generated_column_offset -= chunk.utf16_len() as i64;
447447
} else {
448-
generated_column_offset = -(chunk.len() as i64);
448+
generated_column_offset = -(chunk.utf16_len() as i64);
449449
generated_column_offset_line = line;
450450
}
451451
pos = end_pos;
@@ -572,9 +572,9 @@ impl StreamChunks for ReplaceSource {
572572

573573
if m == lines.len() - 1 && !content_line.ends_with('\n') {
574574
if generated_column_offset_line == line {
575-
generated_column_offset += content_line.len() as i64;
575+
generated_column_offset += content_line.utf16_len() as i64;
576576
} else {
577-
generated_column_offset = content_line.len() as i64;
577+
generated_column_offset = content_line.utf16_len() as i64;
578578
generated_column_offset_line = line;
579579
}
580580
} else {
@@ -618,9 +618,10 @@ impl StreamChunks for ReplaceSource {
618618
}
619619
} else if generated_column_offset_line == line {
620620
generated_column_offset -=
621-
chunk.len() as i64 - chunk_pos as i64;
621+
chunk.utf16_len() as i64 - chunk_pos as i64;
622622
} else {
623-
generated_column_offset = chunk_pos as i64 - chunk.len() as i64;
623+
generated_column_offset =
624+
chunk_pos as i64 - chunk.utf16_len() as i64;
624625
generated_column_offset_line = line;
625626
}
626627
pos = end_pos;
@@ -737,9 +738,9 @@ impl StreamChunks for ReplaceSource {
737738

738739
if m == matches.len() - 1 && !content_line.ends_with('\n') {
739740
if generated_column_offset_line == line {
740-
generated_column_offset += content_line.len() as i64;
741+
generated_column_offset += content_line.utf16_len() as i64;
741742
} else {
742-
generated_column_offset = content_line.len() as i64;
743+
generated_column_offset = content_line.utf16_len() as i64;
743744
generated_column_offset_line = line;
744745
}
745746
} else {
@@ -801,7 +802,7 @@ mod tests {
801802

802803
use crate::{
803804
source_map_source::WithoutOriginalOptions, OriginalSource, RawStringSource,
804-
ReplacementEnforce, SourceExt, SourceMapSource,
805+
ReplacementEnforce, SourceExt, SourceMapSource, SourceMapSourceOptions,
805806
};
806807

807808
use super::*;
@@ -1427,4 +1428,30 @@ return <div>{data.foo}</div>
14271428
source2.hash(&mut hasher2);
14281429
assert_eq!(hasher1.finish(), hasher2.finish());
14291430
}
1431+
1432+
#[test]
1433+
fn test_replace_source_with_multi_unit_utf16() {
1434+
let mut source = ReplaceSource::new(
1435+
SourceMapSource::new(SourceMapSourceOptions {
1436+
value: "var i18n = JSON.parse('{\"魑魅魍魉\":{\"en-US\":\"Evil spirits\",\"zh-CN\":\"魑魅魍魉\"}}');\nvar __webpack_exports___ = i18n[\"魑魅魍魉\"];\nexport { __webpack_exports___ as 魑魅魍魉 };\n",
1437+
name: "main.js",
1438+
source_map: SourceMap::from_json("{\"version\":3,\"sources\":[\"i18n.js\"],\"sourcesContent\":[\"var i18n = JSON.parse('{\\\"魑魅魍魉\\\":{\\\"en-US\\\":\\\"Evil spirits\\\",\\\"zh-CN\\\":\\\"魑魅魍魉\\\"}}');\\nvar __webpack_exports___ = i18n[\\\"魑魅魍魉\\\"];\\nexport { __webpack_exports___ as 魑魅魍魉 };\\n\"],\"names\":[\"i18n\",\"JSON\",\"__webpack_exports___\",\"魑魅魍魉\"],\"mappings\":\"AAAA,IAAIA,OAAOC,KAAK,KAAK,CAAC;AACtB,IAAIC,uBAAuBF,IAAI,CAAC,OAAO;AACvC,SAASE,wBAAwBC,IAAI,GAAG\"}").unwrap(),
1439+
original_source: None,
1440+
inner_source_map: None,
1441+
remove_original_source: false,
1442+
}).boxed()
1443+
);
1444+
source.replace(140, 188, "", None);
1445+
1446+
assert_eq!(source.source().into_string_lossy(), "var i18n = JSON.parse('{\"魑魅魍魉\":{\"en-US\":\"Evil spirits\",\"zh-CN\":\"魑魅魍魉\"}}');\nvar __webpack_exports___ = i18n[\"魑魅魍魉\"];\n\n");
1447+
assert_eq!(source.map(&ObjectPool::default(), &MapOptions::default()).unwrap(), SourceMap::from_json(
1448+
r#"{
1449+
"version": 3,
1450+
"sources": ["i18n.js"],
1451+
"mappings": "AAAA,IAAIA,OAAOC,KAAK,KAAK,CAAC;AACtB,IAAIC,uBAAuBF,IAAI,CAAC,OAAO;AACC",
1452+
"names": ["i18n", "JSON", "__webpack_exports___", "魑魅魍魉"],
1453+
"sourcesContent": ["var i18n = JSON.parse('{\"魑魅魍魉\":{\"en-US\":\"Evil spirits\",\"zh-CN\":\"魑魅魍魉\"}}');\nvar __webpack_exports___ = i18n[\"魑魅魍魉\"];\nexport { __webpack_exports___ as 魑魅魍魉 };\n"]
1454+
}"#
1455+
).unwrap());
1456+
}
14301457
}

src/rope.rs

Lines changed: 10 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -134,6 +134,16 @@ impl<'a> Rope<'a> {
134134
}
135135
}
136136

137+
/// Returns the length of the rope in UTF-16 code units.
138+
pub fn utf16_len(&self) -> usize {
139+
match &self.repr {
140+
Repr::Light(s) => s.encode_utf16().count(),
141+
Repr::Full(data) => {
142+
data.iter().map(|(s, _)| s.encode_utf16().count()).sum()
143+
}
144+
}
145+
}
146+
137147
/// Returns an iterator over the characters and their byte positions.
138148
pub fn char_indices(&self) -> CharIndices<'_> {
139149
match &self.repr {

src/source_content_lines.rs

Lines changed: 4 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -1,13 +1,13 @@
11
use std::sync::Arc;
22

33
use crate::{
4-
helpers::split_into_lines, object_pool::ObjectPool, with_indices::WithIndices,
4+
helpers::split_into_lines, object_pool::ObjectPool, with_utf16::WithUtf16,
55
};
66

77
pub struct SourceContentLines<'object_pool> {
88
text: Arc<str>,
99
// Self-referential data structure: lines borrow from the text.
10-
lines: Vec<WithIndices<'object_pool, 'static, &'static str>>,
10+
lines: Vec<WithUtf16<'object_pool, 'static, &'static str>>,
1111
}
1212

1313
impl<'object_pool> SourceContentLines<'object_pool> {
@@ -18,15 +18,12 @@ impl<'object_pool> SourceContentLines<'object_pool> {
1818
let text_ref =
1919
unsafe { std::mem::transmute::<&str, &'static str>(text.as_ref()) };
2020
let lines = split_into_lines::<&str>(&text_ref)
21-
.map(|line| WithIndices::new(object_pool, line))
21+
.map(|line| WithUtf16::new(object_pool, line))
2222
.collect::<Vec<_>>();
2323
Self { text, lines }
2424
}
2525

26-
pub fn get(
27-
&self,
28-
line: usize,
29-
) -> Option<&WithIndices<'object_pool, '_, &str>> {
26+
pub fn get(&self, line: usize) -> Option<&WithUtf16<'object_pool, '_, &str>> {
3027
let _ = &self.text;
3128
self.lines.get(line)
3229
}

0 commit comments

Comments
 (0)