Skip to content

Commit 0f94e5e

Browse files
committed
Correctly handle escaped strings when parsing json strings. Fixes #31
1 parent 238c8fe commit 0f94e5e

File tree

1 file changed

+53
-4
lines changed

1 file changed

+53
-4
lines changed

src/de/mod.rs

Lines changed: 53 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -175,10 +175,37 @@ impl<'a> Deserializer<'a> {
175175
loop {
176176
match self.peek() {
177177
Some(b'"') => {
178-
let end = self.index;
179-
self.eat_char();
180-
return str::from_utf8(&self.slice[start..end])
181-
.map_err(|_| Error::InvalidUnicodeCodePoint);
178+
// Counts the number of backslashes in front of the current index.
179+
//
180+
// "some string with \\\" included."
181+
// ^^^^^
182+
// |||||
183+
// loop run: 4321|
184+
// |
185+
// `index`
186+
//
187+
// Since we only get in this code branch if we found a " starting the string and `index` is greater
188+
// than the start position, we know the loop will end no later than this point.
189+
let leading_backslashes = |index: usize| -> usize {
190+
let mut count = 0;
191+
loop {
192+
if self.slice[index - count - 1] == b'\\' {
193+
count += 1;
194+
} else {
195+
return count;
196+
}
197+
}
198+
};
199+
200+
let is_escaped = leading_backslashes(self.index) % 2 == 1;
201+
if is_escaped {
202+
self.eat_char(); // just continue
203+
} else {
204+
let end = self.index;
205+
self.eat_char();
206+
return str::from_utf8(&self.slice[start..end])
207+
.map_err(|_| Error::InvalidUnicodeCodePoint);
208+
}
182209
}
183210
Some(_) => self.eat_char(),
184211
None => return Err(Error::EofWhileParsingString),
@@ -745,6 +772,28 @@ mod tests {
745772
#[test]
746773
fn str() {
747774
assert_eq!(crate::from_str(r#" "hello" "#), Ok("hello"));
775+
assert_eq!(crate::from_str(r#" "" "#), Ok(""));
776+
assert_eq!(crate::from_str(r#" " " "#), Ok(" "));
777+
assert_eq!(crate::from_str(r#" "👏" "#), Ok("👏"));
778+
779+
// no unescaping is done (as documented as a known issue in lib.rs)
780+
assert_eq!(crate::from_str(r#" "hel\tlo" "#), Ok("hel\\tlo"));
781+
assert_eq!(crate::from_str(r#" "hello \\" "#), Ok("hello \\\\"));
782+
783+
// escaped " in the string content
784+
assert_eq!(crate::from_str(r#" "foo\"bar" "#), Ok(r#"foo\"bar"#));
785+
assert_eq!(crate::from_str(r#" "foo\\\"bar" "#), Ok(r#"foo\\\"bar"#));
786+
assert_eq!(crate::from_str(r#" "foo\"\"bar" "#), Ok(r#"foo\"\"bar"#));
787+
assert_eq!(crate::from_str(r#" "\"bar" "#), Ok(r#"\"bar"#));
788+
assert_eq!(crate::from_str(r#" "foo\"" "#), Ok(r#"foo\""#));
789+
assert_eq!(crate::from_str(r#" "\"" "#), Ok(r#"\""#));
790+
791+
// non-excaped " preceded by backslashes
792+
assert_eq!(crate::from_str(r#" "foo bar\\" "#), Ok(r#"foo bar\\"#));
793+
assert_eq!(crate::from_str(r#" "foo bar\\\\" "#), Ok(r#"foo bar\\\\"#));
794+
assert_eq!(crate::from_str(r#" "foo bar\\\\\\" "#), Ok(r#"foo bar\\\\\\"#));
795+
assert_eq!(crate::from_str(r#" "foo bar\\\\\\\\" "#), Ok(r#"foo bar\\\\\\\\"#));
796+
assert_eq!(crate::from_str(r#" "\\" "#), Ok(r#"\\"#));
748797
}
749798

750799
#[test]

0 commit comments

Comments
 (0)