Skip to content

Commit 859f722

Browse files
authored
Merge pull request #210 from nyurik/utf8-rework
Rework utf8, bool return
2 parents 50df0ed + 4e778a1 commit 859f722

File tree

3 files changed

+74
-89
lines changed

3 files changed

+74
-89
lines changed

src/enc/encode.rs

Lines changed: 24 additions & 35 deletions
Original file line numberDiff line numberDiff line change
@@ -37,9 +37,10 @@ use super::metablock::{
3737
};
3838
pub use super::parameters::BrotliEncoderParameter;
3939
use super::static_dict::{kNumDistanceCacheEntries, BrotliGetDictionary};
40-
use super::utf8_util::BrotliIsMostlyUTF8;
4140
use super::util::Log2FloorNonZero;
41+
use crate::enc::floatX;
4242
use crate::enc::input_pair::InputReferenceMut;
43+
use crate::enc::utf8_util::is_mostly_utf8;
4344

4445
//fn BrotliCreateHqZopfliBackwardReferences(m: &mut [MemoryManager],
4546
// dictionary: &[BrotliDictionary],
@@ -1293,46 +1294,35 @@ fn InitOrStitchToPreviousBlock<Alloc: alloc::Allocator<u16> + alloc::Allocator<u
12931294
handle.StitchToPreviousBlock(input_size, position, data, mask);
12941295
}
12951296

1296-
fn ShouldCompress(
1297+
fn should_compress(
12971298
data: &[u8],
12981299
mask: usize,
12991300
last_flush_pos: u64,
13001301
bytes: usize,
13011302
num_literals: usize,
13021303
num_commands: usize,
1303-
) -> i32 {
1304-
if num_commands < (bytes >> 8).wrapping_add(2)
1305-
&& num_literals as (super::util::floatX)
1306-
> 0.99 as super::util::floatX * bytes as (super::util::floatX)
1307-
{
1304+
) -> bool {
1305+
const K_SAMPLE_RATE: u32 = 13;
1306+
const K_MIN_ENTROPY: floatX = 7.92;
1307+
1308+
if num_commands < (bytes >> 8) + 2 && num_literals as floatX > 0.99 * bytes as floatX {
13081309
let mut literal_histo = [0u32; 256];
1309-
static kSampleRate: u32 = 13u32;
1310-
static kMinEntropy: super::util::floatX = 7.92 as super::util::floatX;
1311-
let bit_cost_threshold: super::util::floatX =
1312-
bytes as (super::util::floatX) * kMinEntropy / kSampleRate as (super::util::floatX);
1313-
let t: usize = bytes
1314-
.wrapping_add(kSampleRate as usize)
1310+
let bit_cost_threshold = bytes as floatX * K_MIN_ENTROPY / K_SAMPLE_RATE as floatX;
1311+
let t = bytes
1312+
.wrapping_add(K_SAMPLE_RATE as usize)
13151313
.wrapping_sub(1)
1316-
.wrapping_div(kSampleRate as usize);
1317-
let mut pos: u32 = last_flush_pos as u32;
1318-
let mut i: usize;
1319-
i = 0usize;
1320-
while i < t {
1321-
{
1322-
{
1323-
let _rhs = 1;
1324-
let _lhs = &mut literal_histo[data[(pos as usize & mask)] as usize];
1325-
*_lhs = (*_lhs).wrapping_add(_rhs as u32);
1326-
}
1327-
pos = pos.wrapping_add(kSampleRate);
1328-
}
1329-
i = i.wrapping_add(1);
1330-
}
1331-
if BitsEntropy(&literal_histo[..], 256usize) > bit_cost_threshold {
1332-
return 0i32;
1314+
.wrapping_div(K_SAMPLE_RATE as usize);
1315+
let mut pos = last_flush_pos as u32;
1316+
for _ in 0..t {
1317+
let value = &mut literal_histo[data[pos as usize & mask] as usize];
1318+
*value = value.wrapping_add(1);
1319+
pos = pos.wrapping_add(K_SAMPLE_RATE);
1320+
}
1321+
if BitsEntropy(&literal_histo[..], 256) > bit_cost_threshold {
1322+
return false;
13331323
}
13341324
}
1335-
1i32
1325+
true
13361326
}
13371327

13381328
/* Chooses the literal context mode for a metablock */
@@ -1352,7 +1342,7 @@ fn ChooseContextMode(
13521342
BrotliEncoderMode::BROTLI_FORCE_SIGNED_PRIOR => return ContextType::CONTEXT_SIGNED,
13531343
_ => {}
13541344
}
1355-
if (params.quality >= 10 && BrotliIsMostlyUTF8(data, pos, mask, length, kMinUTF8Ratio) == 0) {
1345+
if (params.quality >= 10 && !is_mostly_utf8(data, pos, mask, length, kMinUTF8Ratio)) {
13561346
return ContextType::CONTEXT_SIGNED;
13571347
}
13581348
ContextType::CONTEXT_UTF8
@@ -1986,15 +1976,14 @@ fn WriteMetaBlockInternal<Alloc: BrotliAlloc, Cb>(
19861976
*storage_ix = storage_ix.wrapping_add(7u32 as usize) & !7u32 as usize;
19871977
return;
19881978
}
1989-
if ShouldCompress(
1979+
if !should_compress(
19901980
data,
19911981
mask,
19921982
last_flush_pos,
19931983
bytes,
19941984
num_literals,
19951985
num_commands,
1996-
) == 0
1997-
{
1986+
) {
19981987
dist_cache[..4].clone_from_slice(&saved_dist_cache[..4]);
19991988
store_uncompressed_meta_block(
20001989
alloc,

src/enc/literal_cost.rs

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -2,8 +2,8 @@
22

33
use core::cmp::min;
44

5-
use super::utf8_util::BrotliIsMostlyUTF8;
65
use super::util::FastLog2f64;
6+
use crate::enc::utf8_util::is_mostly_utf8;
77

88
static kMinUTF8Ratio: super::util::floatX = 0.75 as super::util::floatX;
99

@@ -184,7 +184,7 @@ pub fn BrotliEstimateBitCostsForLiterals(
184184
data: &[u8],
185185
cost: &mut [super::util::floatX],
186186
) {
187-
if BrotliIsMostlyUTF8(data, pos, mask, len, kMinUTF8Ratio) != 0 {
187+
if is_mostly_utf8(data, pos, mask, len, kMinUTF8Ratio) {
188188
EstimateBitCostsForLiteralsUTF8(pos, len, mask, data, cost);
189189
} else {
190190
let mut histogram: [usize; 256] = [0; 256];

src/enc/utf8_util.rs

Lines changed: 48 additions & 52 deletions
Original file line numberDiff line numberDiff line change
@@ -1,77 +1,73 @@
1-
#![allow(dead_code)]
1+
use crate::enc::floatX;
22

3-
static kMinUTF8Ratio: super::util::floatX = 0.75 as super::util::floatX;
4-
5-
fn BrotliParseAsUTF8(symbol: &mut i32, input: &[u8], size: usize) -> usize {
6-
if input[0] & 0x80 == 0 {
7-
*symbol = input[0] as i32;
8-
if *symbol > 0i32 {
9-
return 1usize;
3+
fn parse_as_utf8(input: &[u8], size: usize) -> (usize, i32) {
4+
if (input[0] & 0x80) == 0 {
5+
if input[0] > 0 {
6+
return (1, i32::from(input[0]));
107
}
118
}
12-
if size > 1u32 as usize
13-
&& (input[0] as i32 & 0xe0i32 == 0xc0i32)
14-
&& (input[1] as i32 & 0xc0i32 == 0x80i32)
15-
{
16-
*symbol = (input[0] as i32 & 0x1fi32) << 6 | input[1] as i32 & 0x3fi32;
17-
if *symbol > 0x7fi32 {
18-
return 2usize;
9+
if size > 1 && (input[0] & 0xe0) == 0xc0 && (input[1] & 0xc0) == 0x80 {
10+
let symbol = (input[0] as i32 & 0x1f) << 6 | input[1] as i32 & 0x3f;
11+
if symbol > 0x7f {
12+
return (2, symbol);
1913
}
2014
}
21-
if size > 2u32 as usize
22-
&& (input[0] as i32 & 0xf0i32 == 0xe0i32)
23-
&& (input[1] as i32 & 0xc0i32 == 0x80i32)
24-
&& (input[2] as i32 & 0xc0i32 == 0x80i32)
15+
if size > 2
16+
&& (input[0] & 0xf0) == 0xe0
17+
&& (input[1] & 0xc0) == 0x80
18+
&& (input[2] & 0xc0) == 0x80
2519
{
26-
*symbol = (input[0] as i32 & 0xfi32) << 12
27-
| (input[1] as i32 & 0x3fi32) << 6
28-
| input[2] as i32 & 0x3fi32;
29-
if *symbol > 0x7ffi32 {
30-
return 3usize;
20+
let symbol = (i32::from(input[0]) & 0x0f) << 12
21+
| (i32::from(input[1]) & 0x3f) << 6
22+
| i32::from(input[2]) & 0x3f;
23+
if symbol > 0x7ff {
24+
return (3, symbol);
3125
}
3226
}
33-
if size > 3u32 as usize
34-
&& (input[0] as i32 & 0xf8i32 == 0xf0i32)
35-
&& (input[1] as i32 & 0xc0i32 == 0x80i32)
36-
&& (input[2] as i32 & 0xc0i32 == 0x80i32)
37-
&& (input[3] as i32 & 0xc0i32 == 0x80i32)
27+
if size > 3
28+
&& (input[0] & 0xf8) == 0xf0
29+
&& (input[1] & 0xc0) == 0x80
30+
&& (input[2] & 0xc0) == 0x80
31+
&& (input[3] & 0xc0) == 0x80
3832
{
39-
*symbol = (input[0] as i32 & 0x7i32) << 18
40-
| (input[1] as i32 & 0x3fi32) << 12
41-
| (input[2] as i32 & 0x3fi32) << 6
42-
| input[3] as i32 & 0x3fi32;
43-
if *symbol > 0xffffi32 && (*symbol <= 0x10ffffi32) {
44-
return 4usize;
33+
let symbol = (i32::from(input[0]) & 0x07) << 18
34+
| (i32::from(input[1]) & 0x3f) << 12
35+
| (i32::from(input[2]) & 0x3f) << 6
36+
| i32::from(input[3]) & 0x3f;
37+
if symbol > 0xffff && symbol <= 0x10_ffff {
38+
return (4, symbol);
4539
}
4640
}
47-
*symbol = 0x110000i32 | input[0] as i32;
48-
1usize
41+
42+
(1, 0x11_0000 | i32::from(input[0]))
4943
}
5044

45+
#[deprecated(note = "Use is_mostly_utf8 instead")]
5146
pub fn BrotliIsMostlyUTF8(
5247
data: &[u8],
5348
pos: usize,
5449
mask: usize,
5550
length: usize,
56-
min_fraction: super::util::floatX,
51+
min_fraction: floatX,
5752
) -> i32 {
58-
let mut size_utf8: usize = 0usize;
59-
let mut i: usize = 0usize;
53+
is_mostly_utf8(data, pos, mask, length, min_fraction).into()
54+
}
55+
56+
pub(crate) fn is_mostly_utf8(
57+
data: &[u8],
58+
pos: usize,
59+
mask: usize,
60+
length: usize,
61+
min_fraction: floatX,
62+
) -> bool {
63+
let mut size_utf8: usize = 0;
64+
let mut i: usize = 0;
6065
while i < length {
61-
let mut symbol: i32 = 0;
62-
let bytes_read: usize = BrotliParseAsUTF8(
63-
&mut symbol,
64-
&data[(pos.wrapping_add(i) & mask)..],
65-
length.wrapping_sub(i),
66-
);
66+
let (bytes_read, symbol) = parse_as_utf8(&data[(pos.wrapping_add(i) & mask)..], length - i);
6767
i = i.wrapping_add(bytes_read);
68-
if symbol < 0x110000i32 {
68+
if symbol < 0x11_0000 {
6969
size_utf8 = size_utf8.wrapping_add(bytes_read);
7070
}
7171
}
72-
if size_utf8 as (super::util::floatX) > min_fraction * length as (super::util::floatX) {
73-
1i32
74-
} else {
75-
0i32
76-
}
72+
size_utf8 as floatX > min_fraction * length as floatX
7773
}

0 commit comments

Comments
 (0)