Skip to content

Commit df3dd43

Browse files
committed
Move buffer_queue and smallcharset to markup5ever
1 parent 19c89ff commit df3dd43

File tree

9 files changed

+80
-234
lines changed

9 files changed

+80
-234
lines changed

html5ever/src/driver.rs

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -9,8 +9,8 @@
99

1010
//! High-level interface to the parser.
1111
12-
use tokenizer::buffer_queue::BufferQueue;
1312
use tokenizer::{Tokenizer, TokenizerOpts, TokenizerResult};
13+
use markup5ever::util::buffer_queue::BufferQueue;
1414
use tree_builder::{TreeBuilderOpts, TreeBuilder, TreeSink};
1515

1616
use std::borrow::Cow;

html5ever/src/tokenizer/char_ref/mod.rs

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -8,7 +8,7 @@
88
// except according to those terms.
99

1010
use super::{Tokenizer, TokenSink};
11-
use super::buffer_queue::BufferQueue;
11+
use markup5ever::util::buffer_queue::BufferQueue;
1212

1313
use util::str::{is_ascii_alnum};
1414

html5ever/src/tokenizer/mod.rs

Lines changed: 2 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -21,8 +21,6 @@ use self::states::{DoctypeIdKind, Public, System};
2121

2222
use self::char_ref::{CharRef, CharRefTokenizer};
2323

24-
use self::buffer_queue::{BufferQueue, SetResult, FromSet, NotFromSet};
25-
2624
use util::str::lower_ascii_letter;
2725

2826
use std::ascii::AsciiExt;
@@ -33,11 +31,10 @@ use std::collections::BTreeMap;
3331

3432
use {LocalName, QualName};
3533
use tendril::StrTendril;
36-
use markup5ever::SmallCharSet;
34+
use markup5ever::{SmallCharSet};
3735
use markup5ever::interface::{Attribute};
3836
pub use markup5ever::util::buffer_queue::{BufferQueue, SetResult, FromSet, NotFromSet};
3937

40-
pub mod buffer_queue;
4138
pub mod states;
4239
mod interface;
4340
mod char_ref;
@@ -1418,7 +1415,7 @@ mod test {
14181415
use super::interface::{CharacterTokens, NullCharacterToken, EOFToken, ParseError};
14191416
use super::interface::{TagKind, StartTag, EndTag, Tag};
14201417

1421-
use super::buffer_queue::{BufferQueue};
1418+
use markup5ever::util::buffer_queue::{BufferQueue};
14221419
use std::mem::replace;
14231420

14241421
use {LocalName};

html5ever/tests/tokenizer.rs

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -8,7 +8,6 @@
88
// except according to those terms.
99

1010
extern crate rustc_serialize;
11-
extern crate tendril;
1211
extern crate test;
1312
extern crate html5ever;
1413

@@ -31,14 +30,15 @@ use std::collections::BTreeMap;
3130
use std::borrow::Cow::Borrowed;
3231

3332
use html5ever::{LocalName, QualName};
34-
use html5ever::tokenizer::{Doctype, Attribute, StartTag, EndTag, Tag};
33+
use html5ever::tokenizer::{Doctype, StartTag, EndTag, Tag};
3534
use html5ever::tokenizer::{Token, DoctypeToken, TagToken, CommentToken};
3635
use html5ever::tokenizer::{CharacterTokens, NullCharacterToken, EOFToken, ParseError};
3736
use html5ever::tokenizer::{TokenSink, Tokenizer, TokenizerOpts, TokenSinkResult};
38-
use html5ever::tokenizer::buffer_queue::BufferQueue;
37+
use html5ever::tokenizer::{BufferQueue};
3938
use html5ever::tokenizer::states::{Plaintext, RawData, Rcdata, Rawtext};
39+
use html5ever::tendril::*;
40+
use html5ever::{Attribute};
4041

41-
use tendril::{StrTendril, SliceExt};
4242

4343
// Return all ways of splitting the string into at most n
4444
// possibly-empty pieces.

markup5ever/lib.rs

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -37,6 +37,10 @@ include!(concat!(env!("OUT_DIR"), "/generated.rs"));
3737

3838
pub mod data;
3939
pub mod interface;
40+
pub mod util {
41+
pub mod buffer_queue;
42+
pub mod smallcharset;
4043
}
4144

4245
pub use interface::{QualName, Attribute};
46+
pub use util::smallcharset::SmallCharSet;

html5ever/src/tokenizer/buffer_queue.rs renamed to markup5ever/util/buffer_queue.rs

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -8,12 +8,13 @@
88
// except according to those terms.
99

1010

11+
use std::ascii::AsciiExt;
1112
use std::collections::VecDeque;
1213

1314
use tendril::StrTendril;
14-
use markup5ever::SmallCharSet;
1515

1616
pub use self::SetResult::{FromSet, NotFromSet};
17+
use util::smallcharset::SmallCharSet;
1718

1819
/// Result from `pop_except_from`.
1920
#[derive(PartialEq, Eq, Debug)]

markup5ever/util/smallcharset.rs

Lines changed: 64 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,64 @@
1+
// Copyright 2014 The html5ever Project Developers. See the
2+
// COPYRIGHT file at the top-level directory of this distribution.
3+
//
4+
// Licensed under the Apache License, Version 2.0 <LICENSE-APACHE or
5+
// http://www.apache.org/licenses/LICENSE-2.0> or the MIT license
6+
// <LICENSE-MIT or http://opensource.org/licenses/MIT>, at your
7+
// option. This file may not be copied, modified, or distributed
8+
// except according to those terms.
9+
10+
#[macro_export]
11+
macro_rules! small_char_set ( ($($e:expr)+) => (
12+
::markup5ever::SmallCharSet {
13+
bits: $( (1 << ($e as usize)) )|+
14+
}
15+
));
16+
17+
/// Represents a set of "small characters", those with Unicode scalar
18+
/// values less than 64.
19+
pub struct SmallCharSet {
20+
pub bits: u64,
21+
}
22+
23+
impl SmallCharSet {
24+
#[inline]
25+
fn contains(&self, n: u8) -> bool {
26+
0 != (self.bits & (1 << (n as usize)))
27+
}
28+
29+
/// Count the number of bytes of characters at the beginning
30+
/// of `buf` which are not in the set.
31+
/// See `tokenizer::buffer_queue::pop_except_from`.
32+
pub fn nonmember_prefix_len(&self, buf: &str) -> u32 {
33+
let mut n = 0;
34+
for b in buf.bytes() {
35+
if b >= 64 || !self.contains(b) {
36+
n += 1;
37+
} else {
38+
break;
39+
}
40+
}
41+
n
42+
}
43+
}
44+
45+
#[cfg(test)]
46+
mod test {
47+
use std::iter::repeat;
48+
49+
#[test]
50+
fn nonmember_prefix() {
51+
for &c in ['&', '\0'].iter() {
52+
for x in 0 .. 48u32 {
53+
for y in 0 .. 48u32 {
54+
let mut s = repeat("x").take(x as usize).collect::<String>();
55+
s.push(c);
56+
s.push_str(&repeat("x").take(y as usize).collect::<String>());
57+
let set = small_char_set!('&' '\0');
58+
59+
assert_eq!(x, set.nonmember_prefix_len(&s));
60+
}
61+
}
62+
}
63+
}
64+
}

xml5ever/src/tokenizer/buffer_queue.rs

Lines changed: 0 additions & 219 deletions
This file was deleted.

0 commit comments

Comments
 (0)