Skip to content

Commit ba63194

Browse files
Improve allocations in encoders (#94)
1 parent da7cf74 commit ba63194

File tree

4 files changed

+41
-23
lines changed

4 files changed

+41
-23
lines changed

src/encode/bc.rs

Lines changed: 28 additions & 17 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,3 @@
1-
// helpers
2-
31
use glam::Vec4;
42

53
use crate::{
@@ -16,14 +14,26 @@ use super::{
1614
CompressionQuality, EncodeOptions, ErrorMetric, PreferredFragmentSize,
1715
};
1816

17+
type EncodeBlockFn<const BLOCK_BYTES: usize> =
18+
fn(&[[f32; 4]], usize, &EncodeOptions, &mut [u8; BLOCK_BYTES]);
19+
20+
fn block_4x4<const BLOCK_BYTES: usize>(
21+
args: Args,
22+
encode_block: EncodeBlockFn<BLOCK_BYTES>,
23+
) -> Result<(), EncodingError> {
24+
block_universal::<4, 4, 16, BLOCK_BYTES>(args, encode_block)
25+
}
1926
fn block_universal<
2027
const BLOCK_WIDTH: usize,
2128
const BLOCK_HEIGHT: usize,
29+
const BLOCK_SIZE: usize,
2230
const BLOCK_BYTES: usize,
2331
>(
2432
args: Args,
25-
encode_block: fn(&[[f32; 4]], usize, &EncodeOptions, &mut [u8; BLOCK_BYTES]),
33+
encode_block: EncodeBlockFn<BLOCK_BYTES>,
2634
) -> Result<(), EncodingError> {
35+
debug_assert_eq!(BLOCK_SIZE, BLOCK_WIDTH * BLOCK_HEIGHT);
36+
2737
let Args {
2838
image,
2939
writer,
@@ -34,7 +44,8 @@ fn block_universal<
3444
let width = image.width() as usize;
3545
let height = image.height() as usize;
3646

37-
let mut encoded_buffer = vec![[0_u8; BLOCK_BYTES]; util::div_ceil(width, BLOCK_WIDTH)];
47+
let mut encoded_buffer =
48+
vec![[0_u8; BLOCK_BYTES]; util::div_ceil(width, BLOCK_WIDTH)].into_boxed_slice();
3849

3950
// Report frequencies were chosen manually.
4051
// I just tried to pick frequencies such that every quality level reports
@@ -76,7 +87,7 @@ fn block_universal<
7687
let block_width = width - block_start;
7788

7889
// fill block data
79-
let mut block_data = vec![[0_f32; 4]; BLOCK_WIDTH * BLOCK_HEIGHT];
90+
let mut block_data = [[0_f32; 4]; BLOCK_SIZE];
8091
for i in 0..BLOCK_HEIGHT {
8192
let row = &mut block_data[i * BLOCK_WIDTH..(i + 1) * BLOCK_WIDTH];
8293
let partial_row = &rows[block_start + i * width..][..block_width];
@@ -185,7 +196,7 @@ fn get_bc1_options(options: &EncodeOptions) -> bc1::Bc1Options {
185196
}
186197
}
187198
pub(crate) const BC1_UNORM: EncoderSet = EncoderSet::new_bc(&[Encoder::new_universal(|args| {
188-
block_universal::<4, 4, 8>(args, |data, row_pitch, options, out| {
199+
block_4x4::<8>(args, |data, row_pitch, options, out| {
189200
let bc1_options = get_bc1_options(options);
190201
let mut block = get_4x4_rgba(data, row_pitch);
191202

@@ -228,7 +239,7 @@ fn bc2_alpha(alpha: [f32; 16], options: &EncodeOptions) -> [u8; 8] {
228239
}
229240

230241
pub(crate) const BC2_UNORM: EncoderSet = EncoderSet::new_bc(&[Encoder::new_universal(|args| {
231-
block_universal::<4, 4, 16>(args, |data, row_pitch, options, out| {
242+
block_4x4::<16>(args, |data, row_pitch, options, out| {
232243
let (bc1_options, _) = get_bc3_options(options);
233244

234245
let block = get_4x4_rgba(data, row_pitch);
@@ -244,7 +255,7 @@ pub(crate) const BC2_UNORM: EncoderSet = EncoderSet::new_bc(&[Encoder::new_unive
244255

245256
pub(crate) const BC2_UNORM_PREMULTIPLIED_ALPHA: EncoderSet =
246257
EncoderSet::new_bc(&[Encoder::new_universal(|args| {
247-
block_universal::<4, 4, 16>(args, |data, row_pitch, options, out| {
258+
block_4x4::<16>(args, |data, row_pitch, options, out| {
248259
let (bc1_options, _) = get_bc3_options(options);
249260

250261
let mut block = get_4x4_rgba(data, row_pitch);
@@ -269,7 +280,7 @@ fn get_bc3_options(options: &EncodeOptions) -> (bc1::Bc1Options, bc4::Bc4Options
269280
(bc1_options, bc4_options)
270281
}
271282
pub(crate) const BC3_UNORM: EncoderSet = EncoderSet::new_bc(&[Encoder::new_universal(|args| {
272-
block_universal::<4, 4, 16>(args, |data, row_pitch, options, out| {
283+
block_4x4::<16>(args, |data, row_pitch, options, out| {
273284
let (bc1_options, bc4_options) = get_bc3_options(options);
274285

275286
let block = get_4x4_rgba(data, row_pitch);
@@ -285,7 +296,7 @@ pub(crate) const BC3_UNORM: EncoderSet = EncoderSet::new_bc(&[Encoder::new_unive
285296

286297
pub(crate) const BC3_UNORM_PREMULTIPLIED_ALPHA: EncoderSet =
287298
EncoderSet::new_bc(&[Encoder::new_universal(|args| {
288-
block_universal::<4, 4, 16>(args, |data, row_pitch, options, out| {
299+
block_4x4::<16>(args, |data, row_pitch, options, out| {
289300
let (bc1_options, bc4_options) = get_bc3_options(options);
290301

291302
let mut block = get_4x4_rgba(data, row_pitch);
@@ -302,7 +313,7 @@ pub(crate) const BC3_UNORM_PREMULTIPLIED_ALPHA: EncoderSet =
302313

303314
pub(crate) const BC3_UNORM_RXGB: EncoderSet =
304315
EncoderSet::new_bc(&[Encoder::new_universal(|args| {
305-
block_universal::<4, 4, 16>(args, |data, row_pitch, options, out| {
316+
block_4x4::<16>(args, |data, row_pitch, options, out| {
306317
let (bc1_options, bc4_options) = get_bc3_options(options);
307318

308319
let block_r = get_4x4_select_channel::<0>(data, row_pitch);
@@ -329,7 +340,7 @@ pub(crate) const BC3_UNORM_RXGB: EncoderSet =
329340

330341
pub(crate) const BC3_UNORM_NORMAL: EncoderSet =
331342
EncoderSet::new_bc(&[Encoder::new_universal(|args| {
332-
block_universal::<4, 4, 16>(args, |data, row_pitch, options, out| {
343+
block_4x4::<16>(args, |data, row_pitch, options, out| {
333344
let (bc1_options, bc4_options) = get_bc3_options(options);
334345

335346
let block_a = get_4x4_select_channel::<0>(data, row_pitch);
@@ -378,7 +389,7 @@ fn get_bc4_options(options: &EncodeOptions) -> bc4::Bc4Options {
378389
}
379390

380391
pub(crate) const BC4_UNORM: EncoderSet = EncoderSet::new_bc(&[Encoder::new_universal(|args| {
381-
block_universal::<4, 4, 8>(args, |data, row_pitch, options, out| {
392+
block_4x4::<8>(args, |data, row_pitch, options, out| {
382393
let mut options = get_bc4_options(options);
383394
options.snorm = false;
384395
*out = handle_bc4(data, row_pitch, options);
@@ -388,7 +399,7 @@ pub(crate) const BC4_UNORM: EncoderSet = EncoderSet::new_bc(&[Encoder::new_unive
388399
.with_fragment_size(BC4_FRAGMENT_SIZE)]);
389400

390401
pub(crate) const BC4_SNORM: EncoderSet = EncoderSet::new_bc(&[Encoder::new_universal(|args| {
391-
block_universal::<4, 4, 8>(args, |data, row_pitch, options, out| {
402+
block_4x4::<8>(args, |data, row_pitch, options, out| {
392403
let mut options = get_bc4_options(options);
393404
options.snorm = true;
394405
*out = handle_bc4(data, row_pitch, options);
@@ -408,7 +419,7 @@ fn handle_bc5(data: &[[f32; 4]], row_pitch: usize, options: bc4::Bc4Options) ->
408419
}
409420

410421
pub(crate) const BC5_UNORM: EncoderSet = EncoderSet::new_bc(&[Encoder::new_universal(|args| {
411-
block_universal::<4, 4, 16>(args, |data, row_pitch, options, out| {
422+
block_4x4::<16>(args, |data, row_pitch, options, out| {
412423
let mut options = get_bc4_options(options);
413424
options.snorm = false;
414425
*out = handle_bc5(data, row_pitch, options);
@@ -418,7 +429,7 @@ pub(crate) const BC5_UNORM: EncoderSet = EncoderSet::new_bc(&[Encoder::new_unive
418429
.with_fragment_size(BC4_FRAGMENT_SIZE)]);
419430

420431
pub(crate) const BC5_SNORM: EncoderSet = EncoderSet::new_bc(&[Encoder::new_universal(|args| {
421-
block_universal::<4, 4, 16>(args, |data, row_pitch, options, out| {
432+
block_4x4::<16>(args, |data, row_pitch, options, out| {
422433
let mut options = get_bc4_options(options);
423434
options.snorm = true;
424435
*out = handle_bc5(data, row_pitch, options);
@@ -428,7 +439,7 @@ pub(crate) const BC5_SNORM: EncoderSet = EncoderSet::new_bc(&[Encoder::new_unive
428439
.with_fragment_size(BC4_FRAGMENT_SIZE)]);
429440

430441
pub(crate) const BC7_UNORM: EncoderSet = EncoderSet::new_bc(&[Encoder::new_universal(|args| {
431-
block_universal::<4, 4, 16>(args, |data, row_pitch, options, out| {
442+
block_4x4::<16>(args, |data, row_pitch, options, out| {
432443
let block_vec: [Vec4; 16] = get_4x4_rgba_vec4(data, row_pitch);
433444

434445
let block = if options.dithering == Dithering::None {

src/encode/bi_planar.rs

Lines changed: 9 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -37,8 +37,9 @@ fn bi_planar_universal<P1: ToLe + cast::Castable + Default + Copy, P2: ToLe + ca
3737
));
3838
}
3939

40-
let mut plane1_buffer = vec![P1::default(); width * BLOCK_HEIGHT];
41-
let mut plane2: Vec<P2> = Vec::new();
40+
let mut plane1_buffer = vec![P1::default(); width * BLOCK_HEIGHT].into_boxed_slice();
41+
let plane2_len = (width / BLOCK_WIDTH) * (height / BLOCK_HEIGHT);
42+
let mut plane2: Vec<P2> = Vec::with_capacity(plane2_len);
4243

4344
let line_group_count = util::div_ceil(height, BLOCK_HEIGHT);
4445
let report_frequency = util::div_ceil(1024 * 1024, width * BLOCK_HEIGHT);
@@ -76,6 +77,12 @@ fn bi_planar_universal<P1: ToLe + cast::Castable + Default + Copy, P2: ToLe + ca
7677
Ok(())
7778
})?;
7879

80+
debug_assert_eq!(
81+
plane2.len(),
82+
plane2_len,
83+
"expected the pre-allocated memory for plane2 to fit exactly"
84+
);
85+
7986
progress.check_cancelled()?;
8087
P2::to_le(&mut plane2);
8188
writer.write_all(cast::as_bytes(&plane2))?;

src/encode/uncompressed.rs

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -92,7 +92,7 @@ fn uncompressed_universal_dither(
9292
assert!(encoded_pixel_align <= std::mem::align_of::<EncodedBufferType>());
9393

9494
let error_padding = 2;
95-
let mut error_buffer = vec![Vec4::ZERO; 2 * (width + error_padding * 2)];
95+
let mut error_buffer = vec![Vec4::ZERO; 2 * (width + error_padding * 2)].into_boxed_slice();
9696
let (mut current_line_error, mut next_line_error) =
9797
error_buffer.split_at_mut(width + error_padding * 2);
9898

src/encode/write_util.rs

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -18,7 +18,7 @@ pub(crate) fn for_each_f32_rgba_rows<E>(
1818
let height = image.height() as usize;
1919

2020
// this is the one and only buffer we need
21-
let mut intermediate_buffer = vec![[0_f32; 4]; width * block_height];
21+
let mut intermediate_buffer = vec![[0_f32; 4]; width * block_height].into_boxed_slice();
2222

2323
// go through the image row by row, convert it to f32 RGBA, and then
2424
// pass it to the closure
@@ -35,7 +35,7 @@ pub(crate) fn for_each_f32_rgba_rows<E>(
3535
);
3636
}
3737

38-
f(intermediate_buffer.as_mut_slice())?;
38+
f(&mut intermediate_buffer)?;
3939
}
4040

4141
let rest_blocks = height % block_height;
@@ -57,7 +57,7 @@ pub(crate) fn for_each_f32_rgba_rows<E>(
5757
intermediate_buffer.copy_within(..width, i * width);
5858
}
5959

60-
f(intermediate_buffer.as_mut_slice())?;
60+
f(&mut intermediate_buffer)?;
6161
}
6262

6363
Ok(())

0 commit comments

Comments
 (0)