From 1c21d2f29ce357b66b61414d9f946d28a83111a7 Mon Sep 17 00:00:00 2001 From: Ryan Castellucci Date: Tue, 2 Dec 2025 22:04:11 +0000 Subject: [PATCH] Add support for bare streams and byte alignment for stream concatenation This commit introduces new functionality to facilitate simpler stream concatenation, allowing Brotli streams to be combined using standard tools like `cat`. * Add `-bare` (headerless/trailerless) and `-bytealign` (byte-boundary padding) CLI options and encoder parameters. * Implement logic to handle parameter dependencies (e.g., `-bare` implies `-bytealign`). * Update C, Go, and Python bindings with `BROTLI_PARAM_BARE_STREAM` and `BROTLI_PARAM_BYTE_ALIGN`. * Add comprehensive tests for various concatenation scenarios in `src/bin/test_broccoli.rs`. * Update README.md with detailed documentation on simple vs. efficient stream concatenation. * Fix lifetime warnings in prior/stride evaluation. --- README.md | 99 +++++++++++-- c/arg.h | 15 ++ c/brotli/encode.h | 6 +- c/go/brotli/brotli.go | 10 ++ c/go/brotli/brotli/encode.h | 6 +- c/py/brotli.py | 4 + src/bin/brotli.rs | 23 ++- src/bin/test_broccoli.rs | 222 +++++++++++++++++++++++++++++ src/enc/backward_references/mod.rs | 8 +- src/enc/brotli_bit_stream.rs | 43 +++++- src/enc/encode.rs | 62 ++++++-- src/enc/parameters.rs | 4 +- src/enc/prior_eval.rs | 2 +- src/enc/stride_eval.rs | 2 +- 14 files changed, 472 insertions(+), 34 deletions(-) diff --git a/README.md b/README.md index 2e660649..f9bc9f42 100644 --- a/README.md +++ b/README.md @@ -210,14 +210,97 @@ The code also allows a wider range of options, including forcing the prediction (eg UTF8 vs signed vs MSB vs LSB) and changing the weight of the literal cost from 540 to other values. -Additionally the CATABLE and APPENDABLE options are exposed and allow concatenation of files -created in this manner. +## Stream Concatenation -Specifically CATABLE files can be concatenated in any order using the catbrotli tool -and APPENDABLE files can be the first file in a sequence of catable files... -eg you can combine -appendable.br catable1.br catable2.br catable3.br +Brotli supports creating streams that can be concatenated together, useful for streaming +scenarios where you want to compress chunks independently but decompress as a single stream. -or simply -catable0.br catable1.br catable2.br catable3.br +### Simple Concatenation (Fast) +Use `-bare -appendable` for the first file and `-bare -catable` for subsequent files. +These can be combined using plain byte concatenation without special tools, with a +finalization byte (`0x03`) added at the end: + +```bash +# Create the base file with header but no trailer (must specify window size) +brotli -c -bare -appendable -w22 input1.txt > base.br + +# Create bare-catable streams (no header, no trailer, same window size!) +brotli -c -bare -catable -w22 input2.txt > part2.br +brotli -c -bare -catable -w22 input3.txt > part3.br + +# Simple concatenation with finalization byte +# Note: printf '\x03' adds the required final byte +(cat base.br part2.br part3.br; printf '\x03') > combined.br + +# Decompress normally +brotli -d combined.br -o output.txt +``` + +**Advantages:** +- Instant concatenation (no processing) +- No special tools required +- Bare streams can be appended in any order + +**Requirements:** +- All files must use the same window size (`-w22` recommended) +- First file: `-bare -appendable` (has header, no trailer) +- Subsequent files: `-bare -catable` (no header, no trailer, no dictionary refs) +- A final `0x03` byte must be appended to complete the stream + +### Efficient Concatenation (Size-optimized) + +Use the `catbrotli` tool with `-catable` and `-appendable` flags for better compression +at the cost of processing time: + +```bash +# Create files for catbrotli tool +brotli -c -appendable input1.txt > appendable.br +brotli -c -catable input2.txt > catable1.br +brotli -c -catable input3.txt > catable2.br + +# Concatenate using catbrotli tool +catbrotli appendable.br catable1.br catable2.br > combined.br +``` + +**Tradeoff:** `catbrotli` produces smaller output but requires CPU time to process the +streams intelligently. Use this when size matters more than concatenation speed. + +### Technical Reference: Stream Parameter Interactions + +**Stream Types and Their Parameters:** + +| Stream Type | bare_stream | byte_align | appendable | catable | use_dictionary | Description | +|-------------|-------------|------------|------------|---------|----------------|-------------| +| Standard | false | false | false | false | true | Normal brotli stream with header and trailer | +| First (simple concat) | true | true | true | false | true | Has header, no trailer - for simple `cat` concatenation | +| Subsequent (simple concat) | true | true | true | true | false | No header, no trailer, no dict refs - append to first | +| Appendable (catbrotli) | false | varies | true | false | true | For use with `catbrotli` tool | +| Catable (catbrotli) | false | varies | true | true | false | For use with `catbrotli` tool | + +**Important Notes:** +- **Parameter dependencies are applied automatically** by the library in both CLI and API usage +- The library's `SanitizeParams` function ensures: + - `catable = true` → automatically sets `appendable = true` and `use_dictionary = false` + - `bare_stream = true` → automatically sets `byte_align = true` + - `!appendable` → automatically sets `byte_align = false` +- When using `set_parameter()`, dependencies are applied immediately +- When setting fields directly (e.g., `params.catable = true`), dependencies are applied during compression initialization +- **No manual fixups needed** - the library handles all parameter dependencies +- The `use_dictionary = false` for catable streams prevents references to bytes before the chunk boundary +- Simple concatenation requires a final `0x03` byte to complete the stream +- All concatenated streams must use the same window size + +**Example API Usage:** +```rust +// First file: -bare -appendable equivalent +params.bare_stream = true; // Sets bare_stream=true, byte_align=true (automatic) +params.appendable = true; // Sets appendable=true + +// Subsequent files: -bare -catable equivalent +params.bare_stream = true; // Sets bare_stream=true, byte_align=true (automatic) +params.catable = true; // Sets catable=true, appendable=true, use_dictionary=false (automatic) + +// All parameter dependencies are handled automatically by the library. +// No manual fixups required - just set the primary flags you want. +``` diff --git a/c/arg.h b/c/arg.h index 19bac930..2de71ec6 100644 --- a/c/arg.h +++ b/c/arg.h @@ -70,6 +70,21 @@ size_t set_options(BrotliEncoderParameter *out_encoder_param_keys, out_encoder_param_values[ret] = 1; ret += 1; } + if (strstr(argv[i], "-appendable") == argv[i]) { + out_encoder_param_keys[ret] = BROTLI_PARAM_APPENDABLE; + out_encoder_param_values[ret] = 1; + ret += 1; + } + if (strstr(argv[i], "-bytealign") == argv[i]) { + out_encoder_param_keys[ret] = BROTLI_PARAM_BYTE_ALIGN; + out_encoder_param_values[ret] = 1; + ret += 1; + } + if (strstr(argv[i], "-bare") == argv[i]) { + out_encoder_param_keys[ret] = BROTLI_PARAM_BARE_STREAM; + out_encoder_param_values[ret] = 1; + ret += 1; + } } return ret; } diff --git a/c/brotli/encode.h b/c/brotli/encode.h index 8c2a0b6f..f00b5440 100644 --- a/c/brotli/encode.h +++ b/c/brotli/encode.h @@ -224,7 +224,11 @@ typedef enum BrotliEncoderParameter { BROTLI_PARAM_AVOID_DISTANCE_PREFIX_SEARCH = 166, BROTLI_PARAM_CATABLE = 167, BROTLI_PARAM_APPENDABLE = 168, - BROTLI_PARAM_MAGIC_NUMBER = 169 + BROTLI_PARAM_MAGIC_NUMBER = 169, + BROTLI_PARAM_NO_DICTIONARY = 170, + BROTLI_PARAM_FAVOR_EFFICIENCY = 171, + BROTLI_PARAM_BYTE_ALIGN = 172, + BROTLI_PARAM_BARE_STREAM = 173 } BrotliEncoderParameter; /** diff --git a/c/go/brotli/brotli.go b/c/go/brotli/brotli.go index ce24370e..66090ae2 100644 --- a/c/go/brotli/brotli.go +++ b/c/go/brotli/brotli.go @@ -60,6 +60,8 @@ type CompressionOptions struct { Catable bool Appendable bool Magic bool + ByteAlign bool + BareStream bool Mode int LgWin byte LgBlock byte @@ -281,6 +283,14 @@ func makeCompressionOptionsSlices(options CompressionOptions, qualityParams = append(qualityParams, C.BROTLI_PARAM_MAGIC_NUMBER) values = append(values, 1) } + if options.ByteAlign { + qualityParams = append(qualityParams, C.BROTLI_PARAM_BYTE_ALIGN) + values = append(values, 1) + } + if options.BareStream { + qualityParams = append(qualityParams, C.BROTLI_PARAM_BARE_STREAM) + values = append(values, 1) + } if options.Mode != 0 { qualityParams = append(qualityParams, C.BROTLI_PARAM_MODE) values = append(values, C.uint32_t(options.Mode)) diff --git a/c/go/brotli/brotli/encode.h b/c/go/brotli/brotli/encode.h index 3e968975..985df347 100644 --- a/c/go/brotli/brotli/encode.h +++ b/c/go/brotli/brotli/encode.h @@ -224,7 +224,11 @@ typedef enum BrotliEncoderParameter { BROTLI_PARAM_AVOID_DISTANCE_PREFIX_SEARCH = 166, BROTLI_PARAM_CATABLE = 167, BROTLI_PARAM_APPENDABLE = 168, - BROTLI_PARAM_MAGIC_NUMBER = 169 + BROTLI_PARAM_MAGIC_NUMBER = 169, + BROTLI_PARAM_NO_DICTIONARY = 170, + BROTLI_PARAM_FAVOR_EFFICIENCY = 171, + BROTLI_PARAM_BYTE_ALIGN = 172, + BROTLI_PARAM_BARE_STREAM = 173, } BrotliEncoderParameter; /** diff --git a/c/py/brotli.py b/c/py/brotli.py index 80af1221..0999be77 100644 --- a/c/py/brotli.py +++ b/c/py/brotli.py @@ -313,6 +313,10 @@ def BrotliParseHeader(raw_data): BROTLI_PARAM_CATABLE = 167 BROTLI_PARAM_APPENDABLE = 168 BROTLI_PARAM_MAGIC_NUMBER = 169 +BROTLI_PARAM_NO_DICTIONARY = 170 +BROTLI_PARAM_FAVOR_EFFICIENCY = 171 +BROTLI_PARAM_BYTE_ALIGN = 172 +BROTLI_PARAM_BARE_STREAM = 173 #simple test binary def main(args): diff --git a/src/bin/brotli.rs b/src/bin/brotli.rs index bb2e5986..f3f7cf44 100644 --- a/src/bin/brotli.rs +++ b/src/bin/brotli.rs @@ -553,15 +553,20 @@ fn main() { } if (argument == "-catable" || argument == "--catable") && !double_dash { params.catable = true; - params.use_dictionary = false; - params.appendable = true; continue; } if (argument == "-nothreadpool" || argument == "--nothreadpool") && !double_dash { use_work_pool = false; continue; } - + if (argument == "-bytealign" || argument == "--bytealign") && !double_dash { + params.byte_align = true; + continue; + } + if (argument == "-bare" || argument == "--bare") && !double_dash { + params.bare_stream = true; + continue; + } if (argument == "-appendable" || argument == "--appendable") && !double_dash { params.appendable = true; continue; @@ -824,7 +829,7 @@ fn main() { continue; } if argument == "-h" || argument == "-help" || argument == "--help" && !double_dash { - println_stderr!("Decompression:\nbrotli [input_file] [output_file]\nCompression:brotli -c -q9.5 -w22 [input_file] [output_file]\nQuality may be one of -q9.5 -q9.5x -q9.5y or -q[0-11] for standard brotli settings.\nOptional size hint -s to direct better compression\n\nThe -i parameter produces a cross human readdable IR representation of the file.\nThis can be ingested by other compressors.\nIR-specific options include:\n-findprior\n-speed="); + println_stderr!("Decompression:\nbrotli [input_file] [output_file]\nCompression:brotli -c -q9.5 -w22 [input_file] [output_file]\nQuality may be one of -q9.5 -q9.5x -q9.5y or -q[0-11] for standard brotli settings.\nOptional size hint -s to direct better compression\n\nStream concatenation options:\n-catable Create stream that can be concatenated with other catable streams\n-appendable Create stream that can have catable streams appended to it\n-bytealign Align output to byte boundaries (requires -catable or -appendable)\n-bare Output bare stream without wrapper (requires -catable or -appendable)\n\nThe -i parameter produces a cross human readdable IR representation of the file.\nThis can be ingested by other compressors.\nIR-specific options include:\n-findprior\n-speed="); return; } if filenames[0].is_empty() { @@ -837,7 +842,15 @@ fn main() { } panic!("Unknown Argument {:}", argument); } - if !filenames[0].is_empty() { + if params.bare_stream && !params.appendable { + println_stderr!("bare streams only supported when catable or appendable!"); + return; + } + if params.byte_align && !params.appendable { + println_stderr!("byte aligned streams only supported when catable or appendable!"); + return; + } + if filenames[0] != "" { let mut input = match File::open(Path::new(&filenames[0])) { Err(why) => panic!("couldn't open {:}\n{:}", filenames[0], why), Ok(file) => file, diff --git a/src/bin/test_broccoli.rs b/src/bin/test_broccoli.rs index 6a781b7c..82845e4e 100644 --- a/src/bin/test_broccoli.rs +++ b/src/bin/test_broccoli.rs @@ -408,3 +408,225 @@ fn test_concat() { concat_many_subsets(&mut files[..], &mut ufiles[..], None); concat_many_subsets(&mut files[..], &mut ufiles[..], Some(28)); // FIXME: make this 28 } + +// Helper function for simple byte concatenation +fn byte_concat_decompress( + files: &mut [UnlimitedBuffer], + brotli_files: &mut [UnlimitedBuffer], +) { + // Simple byte concatenation with proper finalization: + // 1. First file is -bare -appendable (header, no trailer) + // 2. Subsequent files are -bare -catable (no header, no trailer) + // 3. Add final byte (0x03) at the end + let mut concatenated = UnlimitedBuffer::new(&[]); + + // All files: add as-is + for brotli_file in brotli_files.iter_mut() { + concatenated.data.extend_from_slice(brotli_file.data()); + brotli_file.reset_read(); + } + // Add finalization byte + concatenated.data.push(0x03); + + concatenated.reset_read(); // Reset read offset before decompression + let mut decompressed = UnlimitedBuffer::new(&[]); + match super::decompress(&mut concatenated, &mut decompressed, 65536, Rebox::default()) { + Ok(_) => {} + Err(e) => panic!("Error decompressing concatenated stream: {:?}", e), + } + + // Verify output matches original files in order + let mut offset = 0; + for file in files { + assert_eq!( + &decompressed.data()[offset..offset + file.data().len()], + file.data(), + "Decompressed content doesn't match original" + ); + offset += file.data().len(); + } + assert_eq!(offset, decompressed.data().len(), "Decompressed size mismatch"); +} + +#[test] +fn test_bytealign_appendable_with_bare() { + // Test: appendable + bytealign base file with bare streams + let mut files = [ + UnlimitedBuffer::new(ALICE), + UnlimitedBuffer::new(UKKONOOA), + UnlimitedBuffer::new(QUICKFOX), + ]; + let mut brotli_files = [ + UnlimitedBuffer::new(&[]), + UnlimitedBuffer::new(&[]), + UnlimitedBuffer::new(&[]), + ]; + + // First file: bare + appendable (header, no trailer) + let mut params_base = BrotliEncoderParams::default(); + params_base.bare_stream = true; + params_base.byte_align = true; // implied by -bare + params_base.appendable = true; + params_base.lgwin = 22; + super::compress(&mut files[0], &mut brotli_files[0], 4096, ¶ms_base, &[], 1).unwrap(); + files[0].reset_read(); + + // Subsequent files: bare streams (no header) + for i in 1..files.len() { + let mut params_bare = BrotliEncoderParams::default(); + params_bare.bare_stream = true; + params_bare.byte_align = true; // implied by -bare + params_bare.catable = true; + params_bare.use_dictionary = false; // implied by -catable + params_bare.appendable = true; // implied by -catable + params_bare.lgwin = 22; + super::compress(&mut files[i], &mut brotli_files[i], 4096, ¶ms_bare, &[], 1).unwrap(); + files[i].reset_read(); + } + + // Test simple byte concatenation + byte_concat_decompress(&mut files[..], &mut brotli_files[..]); +} + +#[test] +fn test_bare_any_order() { + // Test: bare streams can be appended in any order + let mut files = [ + UnlimitedBuffer::new(ALICE), + UnlimitedBuffer::new(UKKONOOA), + UnlimitedBuffer::new(QUICKFOX), + ]; + let mut brotli_files = [ + UnlimitedBuffer::new(&[]), + UnlimitedBuffer::new(&[]), + UnlimitedBuffer::new(&[]), + ]; + + // Base file + let mut params_base = BrotliEncoderParams::default(); + params_base.bare_stream = true; + params_base.byte_align = true; + params_base.appendable = true; + params_base.lgwin = 22; + super::compress(&mut files[0], &mut brotli_files[0], 4096, ¶ms_base, &[], 1).unwrap(); + files[0].reset_read(); + + // Create bare streams + for i in 1..files.len() { + let mut params_bare = BrotliEncoderParams::default(); + params_bare.bare_stream = true; + params_bare.byte_align = true; + params_bare.catable = true; + params_bare.use_dictionary = false; + params_bare.appendable = true; + params_bare.lgwin = 22; + super::compress(&mut files[i], &mut brotli_files[i], 4096, ¶ms_bare, &[], 1).unwrap(); + files[i].reset_read(); + } + + // Test original order + byte_concat_decompress(&mut files[..], &mut brotli_files[..]); + + // Test reordered bare streams (base always first, swap the other two) + let mut files_reordered = [ + UnlimitedBuffer::new(files[0].data()), + UnlimitedBuffer::new(files[2].data()), + UnlimitedBuffer::new(files[1].data()), + ]; + for file in files_reordered.iter_mut() { + file.reset_read(); + } + + let mut brotli_reordered = [ + UnlimitedBuffer::new(brotli_files[0].data()), + UnlimitedBuffer::new(brotli_files[2].data()), + UnlimitedBuffer::new(brotli_files[1].data()), + ]; + for brotli_file in brotli_reordered.iter_mut() { + brotli_file.reset_read(); + } + + byte_concat_decompress(&mut files_reordered[..], &mut brotli_reordered[..]); +} + +#[test] +fn test_bytealign_with_empty() { + // Test: bytealign with empty files + let mut files = [ + UnlimitedBuffer::new(ALICE), + UnlimitedBuffer::new(EMPTY), + UnlimitedBuffer::new(QUICKFOX), + ]; + let mut brotli_files = [ + UnlimitedBuffer::new(&[]), + UnlimitedBuffer::new(&[]), + UnlimitedBuffer::new(&[]), + ]; + + // First file: appendable + bytealign + let mut params_base = BrotliEncoderParams::default(); + params_base.bare_stream = true; + params_base.byte_align = true; + params_base.appendable = true; + params_base.lgwin = 22; + super::compress(&mut files[0], &mut brotli_files[0], 4096, ¶ms_base, &[], 1).unwrap(); + files[0].reset_read(); + + // Remaining files: bare + for i in 1..files.len() { + let mut params_bare = BrotliEncoderParams::default(); + params_bare.bare_stream = true; + params_bare.byte_align = true; + params_bare.catable = true; + params_bare.use_dictionary = false; + params_bare.appendable = true; + params_bare.lgwin = 22; + super::compress(&mut files[i], &mut brotli_files[i], 4096, ¶ms_bare, &[], 1).unwrap(); + files[i].reset_read(); + } + + byte_concat_decompress(&mut files[..], &mut brotli_files[..]); +} + +#[test] +fn test_bytealign_various_data() { + // Test: bytealign with various data types + let mut files = [ + UnlimitedBuffer::new(RANDOM10K), + UnlimitedBuffer::new(RANDOMTHENUNICODE), + UnlimitedBuffer::new(ASYOULIKE), + UnlimitedBuffer::new(BACKWARD65536), + ]; + let mut brotli_files = [ + UnlimitedBuffer::new(&[]), + UnlimitedBuffer::new(&[]), + UnlimitedBuffer::new(&[]), + UnlimitedBuffer::new(&[]), + ]; + + // First file: appendable + bytealign + let mut params_base = BrotliEncoderParams::default(); + params_base.bare_stream = true; + params_base.byte_align = true; + params_base.appendable = true; + params_base.lgwin = 22; + light_debug_test(&mut params_base); + super::compress(&mut files[0], &mut brotli_files[0], 4096, ¶ms_base, &[], 1).unwrap(); + files[0].reset_read(); + + // Remaining files: bare + for i in 1..files.len() { + let mut params_bare = BrotliEncoderParams::default(); + params_bare.bare_stream = true; + params_bare.byte_align = true; + params_bare.catable = true; + params_bare.use_dictionary = false; + params_bare.appendable = true; + params_bare.lgwin = 22; + light_debug_test(&mut params_bare); + super::compress(&mut files[i], &mut brotli_files[i], 4096, ¶ms_bare, &[], 1).unwrap(); + files[i].reset_read(); + } + + byte_concat_decompress(&mut files[..], &mut brotli_files[..]); +} diff --git a/src/enc/backward_references/mod.rs b/src/enc/backward_references/mod.rs index a163f8ef..c6bf4b06 100644 --- a/src/enc/backward_references/mod.rs +++ b/src/enc/backward_references/mod.rs @@ -102,6 +102,12 @@ pub struct BrotliEncoderParams { pub large_window: bool, /// avoid search for the best ndirect vs npostfix parameters for distance pub avoid_distance_prefix_search: bool, + /// inserts an extra empty metadata block before the final empty metablock in + /// catable/appendable mode so concatination tools can just remove the last byte + pub byte_align: bool, + /// do not emit a empty last block at end of data - if not appendable, this + /// will also supress the stream header + pub bare_stream: bool, /// construct brotli in such a way that it may be concatenated with another brotli file using appropriate bit ops pub catable: bool, /// can use the dictionary (default yes unless catable is set) @@ -435,7 +441,7 @@ impl + SliceWrapper + BasicHashComputer> AnyHasher &data[cur_ix_masked..], max_length, ); - + if unbroken_len != 0 { let len = fix_unbroken_len(unbroken_len, prev_ix, cur_ix_masked, ring_buffer_break); let score: u64 = BackwardReferenceScore(len, backward, opts); diff --git a/src/enc/brotli_bit_stream.rs b/src/enc/brotli_bit_stream.rs index 7624b4ec..4b7d9aed 100644 --- a/src/enc/brotli_bit_stream.rs +++ b/src/enc/brotli_bit_stream.rs @@ -2837,12 +2837,51 @@ pub fn BrotliStoreSyncMetaBlock(storage_ix: &mut usize, storage: &mut [u8]) { JumpToByteBoundary(storage_ix, storage); } +pub fn BrotliWritePaddingMetaBlock(storage_ix: &mut usize, storage: &mut [u8]) { + if *storage_ix & 7 != 0 { + BrotliWriteBits(6, 6, storage_ix, storage); + JumpToByteBoundary(storage_ix, storage); + } +} + pub fn BrotliWriteEmptyLastMetaBlock(storage_ix: &mut usize, storage: &mut [u8]) { - BrotliWriteBits(1u8, 1u64, storage_ix, storage); - BrotliWriteBits(1u8, 1u64, storage_ix, storage); + BrotliWriteBits(1, 1, storage_ix, storage); + BrotliWriteBits(1, 1, storage_ix, storage); JumpToByteBoundary(storage_ix, storage); } +pub fn BrotliWriteRawMetadataMetaBlock( + storage_ix: &mut usize, + storage: &mut [u8], + len: usize, + data: &mut [u8], +) { + BrotliWriteBits(1, 0, storage_ix, storage); // not last + BrotliWriteBits(2, 3, storage_ix, storage); // MNIBBLES = 0 (pattern 1,1) + BrotliWriteBits(1, 0, storage_ix, storage); // reserved + if len > 16777215 { + panic!("metadata too large"); + } else if len > 65535 { + BrotliWriteBits(2, 3, storage_ix, storage); + BrotliWriteBits(8, ((len >> 16) & 255) as u64, storage_ix, storage); + BrotliWriteBits(8, ((len >> 8) & 255) as u64, storage_ix, storage); + BrotliWriteBits(8, (len & 255) as u64, storage_ix, storage); + } else if len > 255 { + BrotliWriteBits(2, 2, storage_ix, storage); + BrotliWriteBits(8, ((len >> 8) & 255) as u64, storage_ix, storage); + BrotliWriteBits(8, (len & 255) as u64, storage_ix, storage); + } else if len == 0 { + BrotliWriteBits(2, 0, storage_ix, storage); + } else { + BrotliWriteBits(2, 1, storage_ix, storage); + BrotliWriteBits(8, (len & 255) as u64, storage_ix, storage); + } + JumpToByteBoundary(storage_ix, storage); + for index in 0..len { + BrotliWriteBits(8, data[index] as u64, storage_ix, storage); + } +} + const MAX_SIZE_ENCODING: usize = 10; fn encode_base_128(mut value: u64) -> (usize, [u8; MAX_SIZE_ENCODING]) { diff --git a/src/enc/encode.rs b/src/enc/encode.rs index 7ac3196e..58479db8 100644 --- a/src/enc/encode.rs +++ b/src/enc/encode.rs @@ -13,7 +13,8 @@ use super::backward_references::{ use super::bit_cost::{shannon_entropy, BitsEntropy}; use super::brotli_bit_stream::{ store_meta_block, store_meta_block_fast, store_meta_block_trivial, - store_uncompressed_meta_block, BrotliWriteEmptyLastMetaBlock, BrotliWriteMetadataMetaBlock, + store_uncompressed_meta_block, BrotliWriteEmptyLastMetaBlock, + BrotliWritePaddingMetaBlock, BrotliWriteMetadataMetaBlock, MetaBlockSplit, RecoderState, }; use super::combined_alloc::BrotliAlloc; @@ -273,6 +274,13 @@ pub fn set_parameter( BROTLI_PARAM_APPENDABLE => params.appendable = value != 0, BROTLI_PARAM_MAGIC_NUMBER => params.magic_number = value != 0, BROTLI_PARAM_FAVOR_EFFICIENCY => params.favor_cpu_efficiency = value != 0, + BROTLI_PARAM_BYTE_ALIGN => params.byte_align = value != 0, + BROTLI_PARAM_BARE_STREAM => { + params.bare_stream = value != 0; + if !params.byte_align { + params.byte_align = value != 0; + } + } _ => return false, } true @@ -331,6 +339,8 @@ pub fn BrotliEncoderInitParams() -> BrotliEncoderParams { cdf_adaptation_detection: 0, prior_bitmask_detection: 0, literal_adaptation: [(0, 0); 4], + byte_align: false, + bare_stream: false, catable: false, use_dictionary: true, appendable: false, @@ -549,6 +559,12 @@ pub fn SanitizeParams(params: &mut BrotliEncoderParams) { } if params.catable { params.appendable = true; + params.use_dictionary = false; + } + if params.bare_stream { + params.byte_align = true; + } else if !params.appendable { + params.byte_align = false; } } @@ -658,12 +674,14 @@ impl BrotliEncoderStateStruct { if self.params.quality == 0i32 || self.params.quality == 1i32 { lgwin = max(lgwin, 18i32); } - EncodeWindowBits( - lgwin, - self.params.large_window, - &mut self.last_bytes_, - &mut self.last_bytes_bits_, - ); + if !(self.params.catable && self.params.bare_stream) { + EncodeWindowBits( + lgwin, + self.params.large_window, + &mut self.last_bytes_, + &mut self.last_bytes_bits_, + ); + } } if self.params.quality == 0i32 { InitCommandPrefixCodes( @@ -1908,6 +1926,19 @@ fn DecideOverLiteralContextModeling( ); } } +fn WriteEmptyLastBlocksInternal( + params: &BrotliEncoderParams, + storage_ix: &mut usize, + storage: &mut [u8], +) { + // insert empty block for byte alignment if required + if params.byte_align { + BrotliWritePaddingMetaBlock(storage_ix, storage); + } + if !params.bare_stream { + BrotliWriteEmptyLastMetaBlock(storage_ix, storage) + } +} fn WriteMetaBlockInternal( alloc: &mut Alloc, data: &[u8], @@ -1940,7 +1971,7 @@ fn WriteMetaBlockInternal( ), { let actual_is_last = is_last; - if params.appendable { + if params.appendable || params.byte_align { is_last = false; } else { assert!(!params.catable); // Sanitize Params senforces this constraint @@ -1950,8 +1981,7 @@ fn WriteMetaBlockInternal( let literal_context_lut = BROTLI_CONTEXT_LUT(literal_context_mode); let mut block_params = params.clone(); if bytes == 0usize { - BrotliWriteBits(2usize, 3, storage_ix, storage); - *storage_ix = storage_ix.wrapping_add(7u32 as usize) & !7u32 as usize; + WriteEmptyLastBlocksInternal(params, storage_ix, storage); return; } if !should_compress( @@ -1978,7 +2008,7 @@ fn WriteMetaBlockInternal( cb, ); if actual_is_last != is_last { - BrotliWriteEmptyLastMetaBlock(storage_ix, storage) + WriteEmptyLastBlocksInternal(params, storage_ix, storage); } return; } @@ -2133,7 +2163,7 @@ fn WriteMetaBlockInternal( ); } if actual_is_last != is_last { - BrotliWriteEmptyLastMetaBlock(storage_ix, storage) + WriteEmptyLastBlocksInternal(params, storage_ix, storage); } } @@ -2242,6 +2272,14 @@ impl BrotliEncoderStateStruct { *out_size = catable_header_size; self.is_first_mb = IsFirst::HeaderWritten; } + // fixup for empty stream - note: catable is always appendable + if bytes == 0 + && self.params.byte_align + && self.params.appendable + && !self.params.catable + { + BrotliWritePaddingMetaBlock(&mut storage_ix, self.storage_.slice_mut()); + } } if let IsFirst::BothCatableBytesWritten = self.is_first_mb { // nothing to do here, move along diff --git a/src/enc/parameters.rs b/src/enc/parameters.rs index 9f4fd921..45a362b3 100644 --- a/src/enc/parameters.rs +++ b/src/enc/parameters.rs @@ -29,6 +29,8 @@ pub enum BrotliEncoderParameter { BROTLI_PARAM_MAGIC_NUMBER = 169, BROTLI_PARAM_NO_DICTIONARY = 170, BROTLI_PARAM_FAVOR_EFFICIENCY = 171, + BROTLI_PARAM_BYTE_ALIGN = 172, + BROTLI_PARAM_BARE_STREAM = 173, UNUSED7 = 7, UNUSED8 = 8, UNUSED9 = 9, @@ -172,8 +174,6 @@ pub enum BrotliEncoderParameter { UNUSED147 = 147, UNUSED148 = 148, UNUSED149 = 149, - UNUSED172 = 172, - UNUSED173 = 173, UNUSED174 = 174, UNUSED175 = 175, UNUSED176 = 176, diff --git a/src/enc/prior_eval.rs b/src/enc/prior_eval.rs index 82f22eb3..ee3fccd5 100644 --- a/src/enc/prior_eval.rs +++ b/src/enc/prior_eval.rs @@ -46,7 +46,7 @@ pub trait Prior { selected_context: u8, actual_context: usize, high_nibble: Option, - ) -> CDF { + ) -> CDF<'_> { let index = Self::lookup_lin(stride_byte, selected_context, actual_context, high_nibble); CDF::from(&mut data[index]) } diff --git a/src/enc/stride_eval.rs b/src/enc/stride_eval.rs index bf2cb618..511730ff 100644 --- a/src/enc/stride_eval.rs +++ b/src/enc/stride_eval.rs @@ -50,7 +50,7 @@ impl Stride1Prior { selected_context: u8, actual_context: usize, high_nibble: Option, - ) -> CDF { + ) -> CDF<'_> { let index = Self::lookup_lin(stride_byte, selected_context, actual_context, high_nibble) * NIBBLE_PRIOR_SIZE; CDF::from(data.split_at_mut(index).1.split_at_mut(16).0)