|
2 | 2 | // Alphabetic : 1723 bytes, 142707 codepoints in 755 ranges (U+0000AA - U+0323B0) using skiplist |
3 | 3 | // Case_Ignorable : 1043 bytes, 2744 codepoints in 447 ranges (U+0000A8 - U+0E01F0) using skiplist |
4 | 4 | // Cased : 403 bytes, 4526 codepoints in 157 ranges (U+0000AA - U+01F18A) using skiplist |
5 | | -// Cc : 7 bytes, 32 codepoints in 1 ranges (U+000080 - U+0000A0) using skiplist |
| 5 | +// Cc : 0 bytes, 32 codepoints in 1 ranges (U+000080 - U+0000A0) using match |
6 | 6 | // Grapheme_Extend : 887 bytes, 2193 codepoints in 375 ranges (U+000300 - U+0E01F0) using skiplist |
7 | 7 | // Lowercase : 933 bytes, 2543 codepoints in 674 ranges (U+0000AA - U+01E944) using bitset |
8 | 8 | // N : 455 bytes, 1901 codepoints in 143 ranges (U+0000B2 - U+01FBFA) using skiplist |
9 | 9 | // Uppercase : 797 bytes, 1952 codepoints in 655 ranges (U+0000C0 - U+01F18A) using bitset |
10 | | -// White_Space : 256 bytes, 19 codepoints in 8 ranges (U+000085 - U+003001) using cascading |
| 10 | +// White_Space : 0 bytes, 19 codepoints in 8 ranges (U+000085 - U+003001) using match |
11 | 11 | // to_lower : 11484 bytes |
12 | 12 | // to_upper : 13432 bytes |
13 | | -// Total : 31420 bytes |
| 13 | +// Total : 31157 bytes |
14 | 14 |
|
15 | 15 | #[inline(always)] |
16 | 16 | const fn bitset_search< |
@@ -393,33 +393,13 @@ pub mod cased { |
393 | 393 |
|
394 | 394 | #[rustfmt::skip] |
395 | 395 | pub mod cc { |
396 | | - use super::ShortOffsetRunHeader; |
397 | | - |
398 | | - static SHORT_OFFSET_RUNS: [ShortOffsetRunHeader; 1] = [ |
399 | | - ShortOffsetRunHeader::new(0, 1114272), |
400 | | - ]; |
401 | | - static OFFSETS: [u8; 3] = [ |
402 | | - 128, 32, 0, |
403 | | - ]; |
404 | 396 | #[inline] |
405 | | - pub fn lookup(c: char) -> bool { |
| 397 | + pub const fn lookup(c: char) -> bool { |
406 | 398 | debug_assert!(!c.is_ascii()); |
407 | | - (c as u32) >= 0x80 && lookup_slow(c) |
408 | | - } |
409 | | - |
410 | | - #[inline(never)] |
411 | | - fn lookup_slow(c: char) -> bool { |
412 | | - const { |
413 | | - assert!(SHORT_OFFSET_RUNS.last().unwrap().0 > char::MAX as u32); |
414 | | - let mut i = 0; |
415 | | - while i < SHORT_OFFSET_RUNS.len() { |
416 | | - assert!(SHORT_OFFSET_RUNS[i].start_index() < OFFSETS.len()); |
417 | | - i += 1; |
418 | | - } |
| 399 | + match c as u32 { |
| 400 | + 0x80..=0x9f => true, |
| 401 | + _ => false, |
419 | 402 | } |
420 | | - // SAFETY: We just ensured the last element of `SHORT_OFFSET_RUNS` is greater than `std::char::MAX` |
421 | | - // and the start indices of all elements in `SHORT_OFFSET_RUNS` are smaller than `OFFSETS.len()`. |
422 | | - unsafe { super::skip_search(c, &SHORT_OFFSET_RUNS, &OFFSETS) } |
423 | 403 | } |
424 | 404 | } |
425 | 405 |
|
@@ -763,25 +743,18 @@ pub mod uppercase { |
763 | 743 |
|
764 | 744 | #[rustfmt::skip] |
765 | 745 | pub mod white_space { |
766 | | - static WHITESPACE_MAP: [u8; 256] = [ |
767 | | - 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, |
768 | | - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 2, 2, 0, 0, 0, 0, 0, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, |
769 | | - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, |
770 | | - 0, 0, 0, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, |
771 | | - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, |
772 | | - 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, |
773 | | - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, |
774 | | - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, |
775 | | - 0, 0, 0, 0, 0, 0, 0, 0, 0, |
776 | | - ]; |
777 | 746 | #[inline] |
778 | 747 | pub const fn lookup(c: char) -> bool { |
779 | 748 | debug_assert!(!c.is_ascii()); |
780 | | - match c as u32 >> 8 { |
781 | | - 0 => WHITESPACE_MAP[c as usize & 0xff] & 1 != 0, |
782 | | - 22 => c as u32 == 0x1680, |
783 | | - 32 => WHITESPACE_MAP[c as usize & 0xff] & 2 != 0, |
784 | | - 48 => c as u32 == 0x3000, |
| 749 | + match c as u32 { |
| 750 | + 0x85 => true, |
| 751 | + 0xa0 => true, |
| 752 | + 0x1680 => true, |
| 753 | + 0x2000..=0x200a => true, |
| 754 | + 0x2028..=0x2029 => true, |
| 755 | + 0x202f => true, |
| 756 | + 0x205f => true, |
| 757 | + 0x3000 => true, |
785 | 758 | _ => false, |
786 | 759 | } |
787 | 760 | } |
|
0 commit comments