|
1 | 1 | ///! This file is generated by `./x run src/tools/unicode-table-generator`; do not edit manually! |
2 | 2 | // Alphabetic : 1723 bytes, 142707 codepoints in 755 ranges (U+0000AA - U+0323B0) using skiplist |
3 | 3 | // Case_Ignorable : 1043 bytes, 2744 codepoints in 447 ranges (U+0000A8 - U+0E01F0) using skiplist |
4 | | -// Cc : 7 bytes, 32 codepoints in 1 ranges (U+000080 - U+0000A0) using skiplist |
| 4 | +// Cc : 0 bytes, 32 codepoints in 1 ranges (U+000080 - U+0000A0) using match |
5 | 5 | // Grapheme_Extend : 887 bytes, 2193 codepoints in 375 ranges (U+000300 - U+0E01F0) using skiplist |
6 | 6 | // Lowercase : 933 bytes, 2543 codepoints in 674 ranges (U+0000AA - U+01E944) using bitset |
7 | | -// Lt : 33 bytes, 31 codepoints in 10 ranges (U+0001C5 - U+001FFD) using skiplist |
| 7 | +// Lt : 0 bytes, 31 codepoints in 10 ranges (U+0001C5 - U+001FFD) using match |
8 | 8 | // N : 455 bytes, 1901 codepoints in 143 ranges (U+0000B2 - U+01FBFA) using skiplist |
9 | 9 | // Uppercase : 797 bytes, 1952 codepoints in 655 ranges (U+0000C0 - U+01F18A) using bitset |
10 | | -// White_Space : 256 bytes, 19 codepoints in 8 ranges (U+000085 - U+003001) using cascading |
| 10 | +// White_Space : 0 bytes, 19 codepoints in 8 ranges (U+000085 - U+003001) using match |
11 | 11 | // to_lower : 11484 bytes |
12 | 12 | // to_upper : 13432 bytes |
13 | | -// Total : 31050 bytes |
| 13 | +// Total : 30754 bytes |
14 | 14 |
|
15 | 15 | #[inline(always)] |
16 | 16 | const fn bitset_search< |
@@ -340,33 +340,13 @@ pub mod case_ignorable { |
340 | 340 |
|
341 | 341 | #[rustfmt::skip] |
342 | 342 | pub mod cc { |
343 | | - use super::ShortOffsetRunHeader; |
344 | | - |
345 | | - static SHORT_OFFSET_RUNS: [ShortOffsetRunHeader; 1] = [ |
346 | | - ShortOffsetRunHeader::new(0, 1114272), |
347 | | - ]; |
348 | | - static OFFSETS: [u8; 3] = [ |
349 | | - 128, 32, 0, |
350 | | - ]; |
351 | 343 | #[inline] |
352 | | - pub fn lookup(c: char) -> bool { |
| 344 | + pub const fn lookup(c: char) -> bool { |
353 | 345 | debug_assert!(!c.is_ascii()); |
354 | | - (c as u32) >= 0x80 && lookup_slow(c) |
355 | | - } |
356 | | - |
357 | | - #[inline(never)] |
358 | | - fn lookup_slow(c: char) -> bool { |
359 | | - const { |
360 | | - assert!(SHORT_OFFSET_RUNS.last().unwrap().0 > char::MAX as u32); |
361 | | - let mut i = 0; |
362 | | - while i < SHORT_OFFSET_RUNS.len() { |
363 | | - assert!(SHORT_OFFSET_RUNS[i].start_index() < OFFSETS.len()); |
364 | | - i += 1; |
365 | | - } |
| 346 | + match c as u32 { |
| 347 | + 0x80..=0x9f => true, |
| 348 | + _ => false, |
366 | 349 | } |
367 | | - // SAFETY: We just ensured the last element of `SHORT_OFFSET_RUNS` is greater than `std::char::MAX` |
368 | | - // and the start indices of all elements in `SHORT_OFFSET_RUNS` are smaller than `OFFSETS.len()`. |
369 | | - unsafe { super::skip_search(c, &SHORT_OFFSET_RUNS, &OFFSETS) } |
370 | 350 | } |
371 | 351 | } |
372 | 352 |
|
@@ -554,34 +534,22 @@ pub mod lowercase { |
554 | 534 |
|
555 | 535 | #[rustfmt::skip] |
556 | 536 | pub mod lt { |
557 | | - use super::ShortOffsetRunHeader; |
558 | | - |
559 | | - static SHORT_OFFSET_RUNS: [ShortOffsetRunHeader; 3] = [ |
560 | | - ShortOffsetRunHeader::new(0, 453), ShortOffsetRunHeader::new(1, 8072), |
561 | | - ShortOffsetRunHeader::new(9, 1122301), |
562 | | - ]; |
563 | | - static OFFSETS: [u8; 21] = [ |
564 | | - 0, 1, 2, 1, 2, 1, 38, 1, 0, 8, 8, 8, 8, 8, 12, 1, 15, 1, 47, 1, 0, |
565 | | - ]; |
566 | 537 | #[inline] |
567 | | - pub fn lookup(c: char) -> bool { |
| 538 | + pub const fn lookup(c: char) -> bool { |
568 | 539 | debug_assert!(!c.is_ascii()); |
569 | | - (c as u32) >= 0x1c5 && lookup_slow(c) |
570 | | - } |
571 | | - |
572 | | - #[inline(never)] |
573 | | - fn lookup_slow(c: char) -> bool { |
574 | | - const { |
575 | | - assert!(SHORT_OFFSET_RUNS.last().unwrap().0 > char::MAX as u32); |
576 | | - let mut i = 0; |
577 | | - while i < SHORT_OFFSET_RUNS.len() { |
578 | | - assert!(SHORT_OFFSET_RUNS[i].start_index() < OFFSETS.len()); |
579 | | - i += 1; |
580 | | - } |
| 540 | + match c as u32 { |
| 541 | + 0x1c5 => true, |
| 542 | + 0x1c8 => true, |
| 543 | + 0x1cb => true, |
| 544 | + 0x1f2 => true, |
| 545 | + 0x1f88..=0x1f8f => true, |
| 546 | + 0x1f98..=0x1f9f => true, |
| 547 | + 0x1fa8..=0x1faf => true, |
| 548 | + 0x1fbc => true, |
| 549 | + 0x1fcc => true, |
| 550 | + 0x1ffc => true, |
| 551 | + _ => false, |
581 | 552 | } |
582 | | - // SAFETY: We just ensured the last element of `SHORT_OFFSET_RUNS` is greater than `std::char::MAX` |
583 | | - // and the start indices of all elements in `SHORT_OFFSET_RUNS` are smaller than `OFFSETS.len()`. |
584 | | - unsafe { super::skip_search(c, &SHORT_OFFSET_RUNS, &OFFSETS) } |
585 | 553 | } |
586 | 554 | } |
587 | 555 |
|
@@ -743,25 +711,18 @@ pub mod uppercase { |
743 | 711 |
|
744 | 712 | #[rustfmt::skip] |
745 | 713 | pub mod white_space { |
746 | | - static WHITESPACE_MAP: [u8; 256] = [ |
747 | | - 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, |
748 | | - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 2, 2, 0, 0, 0, 0, 0, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, |
749 | | - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, |
750 | | - 0, 0, 0, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, |
751 | | - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, |
752 | | - 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, |
753 | | - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, |
754 | | - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, |
755 | | - 0, 0, 0, 0, 0, 0, 0, 0, 0, |
756 | | - ]; |
757 | 714 | #[inline] |
758 | 715 | pub const fn lookup(c: char) -> bool { |
759 | 716 | debug_assert!(!c.is_ascii()); |
760 | | - match c as u32 >> 8 { |
761 | | - 0 => WHITESPACE_MAP[c as usize & 0xff] & 1 != 0, |
762 | | - 22 => c as u32 == 0x1680, |
763 | | - 32 => WHITESPACE_MAP[c as usize & 0xff] & 2 != 0, |
764 | | - 48 => c as u32 == 0x3000, |
| 717 | + match c as u32 { |
| 718 | + 0x85 => true, |
| 719 | + 0xa0 => true, |
| 720 | + 0x1680 => true, |
| 721 | + 0x2000..=0x200a => true, |
| 722 | + 0x2028..=0x2029 => true, |
| 723 | + 0x202f => true, |
| 724 | + 0x205f => true, |
| 725 | + 0x3000 => true, |
765 | 726 | _ => false, |
766 | 727 | } |
767 | 728 | } |
|
0 commit comments