|
| 1 | +use quick_xml::events::Event; |
| 2 | +use quick_xml::Reader; |
| 3 | + |
| 4 | +#[cfg(feature = "encoding")] |
| 5 | +mod decode { |
| 6 | + use encoding_rs::{UTF_16BE, UTF_16LE, UTF_8}; |
| 7 | + use quick_xml::encoding::*; |
| 8 | + use std::borrow::Cow; |
| 9 | + |
| 10 | + static UTF16BE_TEXT_WITH_BOM: &[u8] = include_bytes!("./documents/utf16be.xml"); |
| 11 | + static UTF16LE_TEXT_WITH_BOM: &[u8] = include_bytes!("./documents/utf16le.xml"); |
| 12 | + static UTF8_TEXT_WITH_BOM: &[u8] = include_bytes!("./documents/utf8.xml"); |
| 13 | + |
| 14 | + static UTF8_TEXT: &str = r#"<?xml version="1.0"?> |
| 15 | +<project name="project-name"> |
| 16 | +</project> |
| 17 | +"#; |
| 18 | + |
| 19 | + #[test] |
| 20 | + fn test_removes_bom() { |
| 21 | + // No BOM |
| 22 | + assert_eq!( |
| 23 | + decode_with_bom_removal(UTF8_TEXT.as_bytes()).unwrap(), |
| 24 | + Cow::Borrowed(UTF8_TEXT) |
| 25 | + ); |
| 26 | + // BOM |
| 27 | + assert_eq!( |
| 28 | + decode_with_bom_removal(UTF8_TEXT_WITH_BOM).unwrap(), |
| 29 | + Cow::Borrowed(UTF8_TEXT) |
| 30 | + ); |
| 31 | + assert_eq!( |
| 32 | + decode_with_bom_removal(UTF16BE_TEXT_WITH_BOM).unwrap(), |
| 33 | + Cow::Borrowed(UTF8_TEXT).into_owned() |
| 34 | + ); |
| 35 | + assert_eq!( |
| 36 | + decode_with_bom_removal(UTF16LE_TEXT_WITH_BOM).unwrap(), |
| 37 | + Cow::Borrowed(UTF8_TEXT).into_owned() |
| 38 | + ); |
| 39 | + } |
| 40 | + |
| 41 | + #[test] |
| 42 | + fn test_detect_encoding() { |
| 43 | + // No BOM |
| 44 | + assert_eq!(detect_encoding(UTF8_TEXT.as_bytes()), Some(UTF_8)); |
| 45 | + // BOM |
| 46 | + assert_eq!(detect_encoding(UTF8_TEXT_WITH_BOM), Some(UTF_8)); |
| 47 | + assert_eq!(detect_encoding(UTF16BE_TEXT_WITH_BOM), Some(UTF_16BE)); |
| 48 | + assert_eq!(detect_encoding(UTF16LE_TEXT_WITH_BOM), Some(UTF_16LE)); |
| 49 | + } |
| 50 | +} |
| 51 | + |
| 52 | +#[test] |
| 53 | +#[cfg(feature = "encoding")] |
| 54 | +fn test_koi8_r_encoding() { |
| 55 | + let src = include_bytes!("documents/opennews_all.rss").as_ref(); |
| 56 | + let mut buf = vec![]; |
| 57 | + let mut r = Reader::from_reader(src); |
| 58 | + r.trim_text(true).expand_empty_elements(false); |
| 59 | + loop { |
| 60 | + match r.read_event_into(&mut buf) { |
| 61 | + Ok(Event::Text(e)) => { |
| 62 | + e.unescape().unwrap(); |
| 63 | + } |
| 64 | + Ok(Event::Eof) => break, |
| 65 | + _ => (), |
| 66 | + } |
| 67 | + } |
| 68 | +} |
| 69 | + |
| 70 | +#[test] |
| 71 | +#[cfg(feature = "encoding")] |
| 72 | +fn fuzz_53() { |
| 73 | + use std::io::Cursor; |
| 74 | + |
| 75 | + let data: &[u8] = b"\xe9\x00\x00\x00\x00\x00\x00\x00\x00\ |
| 76 | +\x00\x00\x00\x00\n(\x00\x00\x00\x00\x00\x00\x01\x00\x00\x00\ |
| 77 | +\x00<>\x00\x08\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00<<\x00\x00\x00"; |
| 78 | + let cursor = Cursor::new(data); |
| 79 | + let mut reader = Reader::from_reader(cursor); |
| 80 | + let mut buf = vec![]; |
| 81 | + loop { |
| 82 | + match reader.read_event_into(&mut buf) { |
| 83 | + Ok(Event::Eof) | Err(..) => break, |
| 84 | + _ => buf.clear(), |
| 85 | + } |
| 86 | + } |
| 87 | +} |
0 commit comments