|
1 | | -#![allow(dead_code)] |
| 1 | +use crate::enc::floatX; |
2 | 2 |
|
3 | | -static kMinUTF8Ratio: super::util::floatX = 0.75 as super::util::floatX; |
4 | | - |
5 | | -fn BrotliParseAsUTF8(symbol: &mut i32, input: &[u8], size: usize) -> usize { |
6 | | - if input[0] & 0x80 == 0 { |
7 | | - *symbol = input[0] as i32; |
8 | | - if *symbol > 0i32 { |
9 | | - return 1usize; |
| 3 | +fn parse_as_utf8(input: &[u8], size: usize) -> (usize, i32) { |
| 4 | + if (input[0] & 0x80) == 0 { |
| 5 | + if input[0] > 0 { |
| 6 | + return (1, i32::from(input[0])); |
10 | 7 | } |
11 | 8 | } |
12 | | - if size > 1u32 as usize |
13 | | - && (input[0] as i32 & 0xe0i32 == 0xc0i32) |
14 | | - && (input[1] as i32 & 0xc0i32 == 0x80i32) |
15 | | - { |
16 | | - *symbol = (input[0] as i32 & 0x1fi32) << 6 | input[1] as i32 & 0x3fi32; |
17 | | - if *symbol > 0x7fi32 { |
18 | | - return 2usize; |
| 9 | + if size > 1 && (input[0] & 0xe0) == 0xc0 && (input[1] & 0xc0) == 0x80 { |
| 10 | + let symbol = (input[0] as i32 & 0x1f) << 6 | input[1] as i32 & 0x3f; |
| 11 | + if symbol > 0x7f { |
| 12 | + return (2, symbol); |
19 | 13 | } |
20 | 14 | } |
21 | | - if size > 2u32 as usize |
22 | | - && (input[0] as i32 & 0xf0i32 == 0xe0i32) |
23 | | - && (input[1] as i32 & 0xc0i32 == 0x80i32) |
24 | | - && (input[2] as i32 & 0xc0i32 == 0x80i32) |
| 15 | + if size > 2 |
| 16 | + && (input[0] & 0xf0) == 0xe0 |
| 17 | + && (input[1] & 0xc0) == 0x80 |
| 18 | + && (input[2] & 0xc0) == 0x80 |
25 | 19 | { |
26 | | - *symbol = (input[0] as i32 & 0xfi32) << 12 |
27 | | - | (input[1] as i32 & 0x3fi32) << 6 |
28 | | - | input[2] as i32 & 0x3fi32; |
29 | | - if *symbol > 0x7ffi32 { |
30 | | - return 3usize; |
| 20 | + let symbol = (i32::from(input[0]) & 0x0f) << 12 |
| 21 | + | (i32::from(input[1]) & 0x3f) << 6 |
| 22 | + | i32::from(input[2]) & 0x3f; |
| 23 | + if symbol > 0x7ff { |
| 24 | + return (3, symbol); |
31 | 25 | } |
32 | 26 | } |
33 | | - if size > 3u32 as usize |
34 | | - && (input[0] as i32 & 0xf8i32 == 0xf0i32) |
35 | | - && (input[1] as i32 & 0xc0i32 == 0x80i32) |
36 | | - && (input[2] as i32 & 0xc0i32 == 0x80i32) |
37 | | - && (input[3] as i32 & 0xc0i32 == 0x80i32) |
| 27 | + if size > 3 |
| 28 | + && (input[0] & 0xf8) == 0xf0 |
| 29 | + && (input[1] & 0xc0) == 0x80 |
| 30 | + && (input[2] & 0xc0) == 0x80 |
| 31 | + && (input[3] & 0xc0) == 0x80 |
38 | 32 | { |
39 | | - *symbol = (input[0] as i32 & 0x7i32) << 18 |
40 | | - | (input[1] as i32 & 0x3fi32) << 12 |
41 | | - | (input[2] as i32 & 0x3fi32) << 6 |
42 | | - | input[3] as i32 & 0x3fi32; |
43 | | - if *symbol > 0xffffi32 && (*symbol <= 0x10ffffi32) { |
44 | | - return 4usize; |
| 33 | + let symbol = (i32::from(input[0]) & 0x07) << 18 |
| 34 | + | (i32::from(input[1]) & 0x3f) << 12 |
| 35 | + | (i32::from(input[2]) & 0x3f) << 6 |
| 36 | + | i32::from(input[3]) & 0x3f; |
| 37 | + if symbol > 0xffff && symbol <= 0x10_ffff { |
| 38 | + return (4, symbol); |
45 | 39 | } |
46 | 40 | } |
47 | | - *symbol = 0x110000i32 | input[0] as i32; |
48 | | - 1usize |
| 41 | + |
| 42 | + (1, 0x11_0000 | i32::from(input[0])) |
49 | 43 | } |
50 | 44 |
|
| 45 | +#[deprecated(note = "Use is_mostly_utf8 instead")] |
51 | 46 | pub fn BrotliIsMostlyUTF8( |
52 | 47 | data: &[u8], |
53 | 48 | pos: usize, |
54 | 49 | mask: usize, |
55 | 50 | length: usize, |
56 | | - min_fraction: super::util::floatX, |
| 51 | + min_fraction: floatX, |
57 | 52 | ) -> i32 { |
58 | | - let mut size_utf8: usize = 0usize; |
59 | | - let mut i: usize = 0usize; |
| 53 | + is_mostly_utf8(data, pos, mask, length, min_fraction).into() |
| 54 | +} |
| 55 | + |
| 56 | +pub(crate) fn is_mostly_utf8( |
| 57 | + data: &[u8], |
| 58 | + pos: usize, |
| 59 | + mask: usize, |
| 60 | + length: usize, |
| 61 | + min_fraction: floatX, |
| 62 | +) -> bool { |
| 63 | + let mut size_utf8: usize = 0; |
| 64 | + let mut i: usize = 0; |
60 | 65 | while i < length { |
61 | | - let mut symbol: i32 = 0; |
62 | | - let bytes_read: usize = BrotliParseAsUTF8( |
63 | | - &mut symbol, |
64 | | - &data[(pos.wrapping_add(i) & mask)..], |
65 | | - length.wrapping_sub(i), |
66 | | - ); |
| 66 | + let (bytes_read, symbol) = parse_as_utf8(&data[(pos.wrapping_add(i) & mask)..], length - i); |
67 | 67 | i = i.wrapping_add(bytes_read); |
68 | | - if symbol < 0x110000i32 { |
| 68 | + if symbol < 0x11_0000 { |
69 | 69 | size_utf8 = size_utf8.wrapping_add(bytes_read); |
70 | 70 | } |
71 | 71 | } |
72 | | - if size_utf8 as (super::util::floatX) > min_fraction * length as (super::util::floatX) { |
73 | | - 1i32 |
74 | | - } else { |
75 | | - 0i32 |
76 | | - } |
| 72 | + size_utf8 as floatX > min_fraction * length as floatX |
77 | 73 | } |
0 commit comments