|
| 1 | +// Copyright 2014 The html5ever Project Developers. See the |
| 2 | +// COPYRIGHT file at the top-level directory of this distribution. |
| 3 | +// |
| 4 | +// Licensed under the Apache License, Version 2.0 <LICENSE-APACHE or |
| 5 | +// http://www.apache.org/licenses/LICENSE-2.0> or the MIT license |
| 6 | +// <LICENSE-MIT or http://opensource.org/licenses/MIT>, at your |
| 7 | +// option. This file may not be copied, modified, or distributed |
| 8 | +// except according to those terms. |
| 9 | + |
| 10 | +#[macro_export] |
| 11 | +macro_rules! small_char_set ( ($($e:expr)+) => ( |
| 12 | + ::markup5ever::SmallCharSet { |
| 13 | + bits: $( (1 << ($e as usize)) )|+ |
| 14 | + } |
| 15 | +)); |
| 16 | + |
| 17 | +/// Represents a set of "small characters", those with Unicode scalar |
| 18 | +/// values less than 64. |
| 19 | +pub struct SmallCharSet { |
| 20 | + pub bits: u64, |
| 21 | +} |
| 22 | + |
| 23 | +impl SmallCharSet { |
| 24 | + #[inline] |
| 25 | + fn contains(&self, n: u8) -> bool { |
| 26 | + 0 != (self.bits & (1 << (n as usize))) |
| 27 | + } |
| 28 | + |
| 29 | + /// Count the number of bytes of characters at the beginning |
| 30 | + /// of `buf` which are not in the set. |
| 31 | + /// See `tokenizer::buffer_queue::pop_except_from`. |
| 32 | + pub fn nonmember_prefix_len(&self, buf: &str) -> u32 { |
| 33 | + let mut n = 0; |
| 34 | + for b in buf.bytes() { |
| 35 | + if b >= 64 || !self.contains(b) { |
| 36 | + n += 1; |
| 37 | + } else { |
| 38 | + break; |
| 39 | + } |
| 40 | + } |
| 41 | + n |
| 42 | + } |
| 43 | +} |
| 44 | + |
| 45 | +#[cfg(test)] |
| 46 | +mod test { |
| 47 | + use std::iter::repeat; |
| 48 | + |
| 49 | + #[test] |
| 50 | + fn nonmember_prefix() { |
| 51 | + for &c in ['&', '\0'].iter() { |
| 52 | + for x in 0 .. 48u32 { |
| 53 | + for y in 0 .. 48u32 { |
| 54 | + let mut s = repeat("x").take(x as usize).collect::<String>(); |
| 55 | + s.push(c); |
| 56 | + s.push_str(&repeat("x").take(y as usize).collect::<String>()); |
| 57 | + let set = small_char_set!('&' '\0'); |
| 58 | + |
| 59 | + assert_eq!(x, set.nonmember_prefix_len(&s)); |
| 60 | + } |
| 61 | + } |
| 62 | + } |
| 63 | + } |
| 64 | +} |
0 commit comments