22// One example is tests/crashtest/images/imagetestsuite/b0b8914cc5f7a6eff409f16d8cc236c5.jpg
33// That's why wrapping operators are needed.
44use crate :: parser:: Dimensions ;
5- use std:: num:: Wrapping ;
5+ use std:: {
6+ convert:: TryFrom ,
7+ num:: Wrapping ,
8+ } ;
69
710pub ( crate ) fn choose_idct_size ( full_size : Dimensions , requested_size : Dimensions ) -> usize {
811 fn scaled ( len : u16 , scale : usize ) -> u16 { ( ( len as u32 * scale as u32 - 1 ) / 8 + 1 ) as u16 }
@@ -28,7 +31,7 @@ fn test_choose_idct_size() {
2831 assert_eq ! ( choose_idct_size( Dimensions { width: 5472 , height: 3648 } , Dimensions { width: 685 , height: 999 } ) , 2 ) ;
2932 assert_eq ! ( choose_idct_size( Dimensions { width: 5472 , height: 3648 } , Dimensions { width: 1000 , height: 1000 } ) , 2 ) ;
3033 assert_eq ! ( choose_idct_size( Dimensions { width: 5472 , height: 3648 } , Dimensions { width: 1400 , height: 1400 } ) , 4 ) ;
31-
34+
3235 assert_eq ! ( choose_idct_size( Dimensions { width: 5472 , height: 3648 } , Dimensions { width: 5472 , height: 3648 } ) , 8 ) ;
3336 assert_eq ! ( choose_idct_size( Dimensions { width: 5472 , height: 3648 } , Dimensions { width: 16384 , height: 16384 } ) , 8 ) ;
3437 assert_eq ! ( choose_idct_size( Dimensions { width: 1 , height: 1 } , Dimensions { width: 65535 , height: 65535 } ) , 8 ) ;
@@ -45,79 +48,74 @@ pub(crate) fn dequantize_and_idct_block(scale: usize, coefficients: &[i16], quan
4548 }
4649}
4750
48- // This is based on stb_image's 'stbi__idct_block'.
49- fn dequantize_and_idct_block_8x8 ( coefficients : & [ i16 ] , quantization_table : & [ u16 ; 64 ] , output_linestride : usize , output : & mut [ u8 ] ) {
51+ pub fn dequantize_and_idct_block_8x8 (
52+ coefficients : & [ i16 ] ,
53+ quantization_table : & [ u16 ; 64 ] ,
54+ output_linestride : usize ,
55+ output : & mut [ u8 ]
56+ ) {
5057 debug_assert_eq ! ( coefficients. len( ) , 64 ) ;
58+ let output = output
59+ . chunks_mut ( output_linestride) ;
60+ dequantize_and_idct_block_8x8_inner ( coefficients, quantization_table, output)
61+ }
62+
63+ // This is based on stb_image's 'stbi__idct_block'.
64+ fn dequantize_and_idct_block_8x8_inner < ' a , I > (
65+ coefficients : & [ i16 ] ,
66+ quantization_table : & [ u16 ; 64 ] ,
67+ output : I ,
68+ ) where
69+ I : IntoIterator < Item = & ' a mut [ u8 ] > ,
70+ I :: IntoIter : ExactSizeIterator < Item = & ' a mut [ u8 ] > ,
71+ {
72+ let output = output. into_iter ( ) ;
73+ debug_assert ! (
74+ output. len( ) >= 8 ,
75+ "Output iterator has the wrong length: {}" ,
76+ output. len( )
77+ ) ;
5178
52- let mut temp = [ Wrapping ( 0i32 ) ; 64 ] ;
79+ let mut temp = [ Wrapping ( 0 ) ; 64 ] ;
5380
5481 // columns
55- for i in 0 .. 8 {
56- // if all zeroes, shortcut -- this avoids dequantizing 0s and IDCTing
57- if coefficients[ i + 8 ] == 0 && coefficients[ i + 16 ] == 0 && coefficients[ i + 24 ] == 0 &&
58- coefficients[ i + 32 ] == 0 && coefficients[ i + 40 ] == 0 && coefficients[ i + 48 ] == 0 &&
59- coefficients[ i + 56 ] == 0 {
60- let dcterm = Wrapping ( coefficients[ i] as i32 * quantization_table[ i] as i32 ) << 2 ;
61- temp[ i] = dcterm;
62- temp[ i + 8 ] = dcterm;
82+ for i in 0 ..8 {
83+ if coefficients[ i + 8 ] == 0
84+ && coefficients[ i + 16 ] == 0
85+ && coefficients[ i + 24 ] == 0
86+ && coefficients[ i + 32 ] == 0
87+ && coefficients[ i + 40 ] == 0
88+ && coefficients[ i + 48 ] == 0
89+ && coefficients[ i + 56 ] == 0
90+ {
91+ let dcterm = dequantize ( coefficients[ i] , quantization_table[ i] ) << 2 ;
92+ temp[ i] = dcterm;
93+ temp[ i + 8 ] = dcterm;
6394 temp[ i + 16 ] = dcterm;
6495 temp[ i + 24 ] = dcterm;
6596 temp[ i + 32 ] = dcterm;
6697 temp[ i + 40 ] = dcterm;
6798 temp[ i + 48 ] = dcterm;
6899 temp[ i + 56 ] = dcterm;
69- }
70- else {
71- let s0 = Wrapping ( coefficients[ i] as i32 * quantization_table[ i] as i32 ) ;
72- let s1 = Wrapping ( coefficients[ i + 8 ] as i32 * quantization_table[ i + 8 ] as i32 ) ;
73- let s2 = Wrapping ( coefficients[ i + 16 ] as i32 * quantization_table[ i + 16 ] as i32 ) ;
74- let s3 = Wrapping ( coefficients[ i + 24 ] as i32 * quantization_table[ i + 24 ] as i32 ) ;
75- let s4 = Wrapping ( coefficients[ i + 32 ] as i32 * quantization_table[ i + 32 ] as i32 ) ;
76- let s5 = Wrapping ( coefficients[ i + 40 ] as i32 * quantization_table[ i + 40 ] as i32 ) ;
77- let s6 = Wrapping ( coefficients[ i + 48 ] as i32 * quantization_table[ i + 48 ] as i32 ) ;
78- let s7 = Wrapping ( coefficients[ i + 56 ] as i32 * quantization_table[ i + 56 ] as i32 ) ;
79-
80- let p2 = s2;
81- let p3 = s6;
82- let p1 = ( p2 + p3) * stbi_f2f ( 0.5411961 ) ;
83- let t2 = p1 + p3 * stbi_f2f ( -1.847759065 ) ;
84- let t3 = p1 + p2 * stbi_f2f ( 0.765366865 ) ;
85- let p2 = s0;
86- let p3 = s4;
87- let t0 = stbi_fsh ( p2 + p3) ;
88- let t1 = stbi_fsh ( p2 - p3) ;
89- let x0 = t0 + t3;
90- let x3 = t0 - t3;
91- let x1 = t1 + t2;
92- let x2 = t1 - t2;
93- let t0 = s7;
94- let t1 = s5;
95- let t2 = s3;
96- let t3 = s1;
97- let p3 = t0 + t2;
98- let p4 = t1 + t3;
99- let p1 = t0 + t3;
100- let p2 = t1 + t2;
101- let p5 = ( p3 + p4) * stbi_f2f ( 1.175875602 ) ;
102- let t0 = t0 * stbi_f2f ( 0.298631336 ) ;
103- let t1 = t1 * stbi_f2f ( 2.053119869 ) ;
104- let t2 = t2 * stbi_f2f ( 3.072711026 ) ;
105- let t3 = t3 * stbi_f2f ( 1.501321110 ) ;
106- let p1 = p5 + ( p1 * stbi_f2f ( -0.899976223 ) ) ;
107- let p2 = p5 + ( p2 * stbi_f2f ( -2.562915447 ) ) ;
108- let p3 = p3 * stbi_f2f ( -1.961570560 ) ;
109- let p4 = p4 * stbi_f2f ( -0.390180644 ) ;
110- let t3 = t3 + p1 + p4;
111- let t2 = t2 + p2 + p3;
112- let t1 = t1 + p2 + p4;
113- let t0 = t0 + p1 + p3;
114-
115- // constants scaled things up by 1<<12; let's bring them back
116- // down, but keep 2 extra bits of precision
117- let x0 = x0 + Wrapping ( 512 ) ;
118- let x1 = x1 + Wrapping ( 512 ) ;
119- let x2 = x2 + Wrapping ( 512 ) ;
120- let x3 = x3 + Wrapping ( 512 ) ;
100+ } else {
101+ let s0 = dequantize ( coefficients[ i] , quantization_table[ i] ) ;
102+ let s1 = dequantize ( coefficients[ i + 8 ] , quantization_table[ i + 8 ] ) ;
103+ let s2 = dequantize ( coefficients[ i + 16 ] , quantization_table[ i + 16 ] ) ;
104+ let s3 = dequantize ( coefficients[ i + 24 ] , quantization_table[ i + 24 ] ) ;
105+ let s4 = dequantize ( coefficients[ i + 32 ] , quantization_table[ i + 32 ] ) ;
106+ let s5 = dequantize ( coefficients[ i + 40 ] , quantization_table[ i + 40 ] ) ;
107+ let s6 = dequantize ( coefficients[ i + 48 ] , quantization_table[ i + 48 ] ) ;
108+ let s7 = dequantize ( coefficients[ i + 56 ] , quantization_table[ i + 56 ] ) ;
109+
110+ let Kernel {
111+ xs : [ x0, x1, x2, x3] ,
112+ ts : [ t0, t1, t2, t3] ,
113+ } = kernel (
114+ [ s0, s1, s2, s3, s4, s5, s6, s7] ,
115+ // constants scaled things up by 1<<12; let's bring them back
116+ // down, but keep 2 extra bits of precision
117+ 512 ,
118+ ) ;
121119
122120 temp[ i] = ( x0 + t3) >> 10 ;
123121 temp[ i + 56 ] = ( x0 - t3) >> 10 ;
@@ -130,72 +128,128 @@ fn dequantize_and_idct_block_8x8(coefficients: &[i16], quantization_table: &[u16
130128 }
131129 }
132130
133- for i in 0 .. 8 {
134- // no fast case since the first 1D IDCT spread components out
135- let s0 = temp[ i * 8 ] ;
136- let s1 = temp[ i * 8 + 1 ] ;
137- let s2 = temp[ i * 8 + 2 ] ;
138- let s3 = temp[ i * 8 + 3 ] ;
139- let s4 = temp[ i * 8 + 4 ] ;
140- let s5 = temp[ i * 8 + 5 ] ;
141- let s6 = temp[ i * 8 + 6 ] ;
142- let s7 = temp[ i * 8 + 7 ] ;
143-
144- let p2 = s2;
145- let p3 = s6;
146- let p1 = ( p2 + p3) * stbi_f2f ( 0.5411961 ) ;
147- let t2 = p1 + p3 * stbi_f2f ( -1.847759065 ) ;
148- let t3 = p1 + p2 * stbi_f2f ( 0.765366865 ) ;
149- let p2 = s0;
150- let p3 = s4;
151- let t0 = stbi_fsh ( p2 + p3) ;
152- let t1 = stbi_fsh ( p2 - p3) ;
153- let x0 = t0 + t3;
154- let x3 = t0 - t3;
155- let x1 = t1 + t2;
156- let x2 = t1 - t2;
157- let t0 = s7;
158- let t1 = s5;
159- let t2 = s3;
160- let t3 = s1;
161- let p3 = t0 + t2;
162- let p4 = t1 + t3;
163- let p1 = t0 + t3;
164- let p2 = t1 + t2;
165- let p5 = ( p3 + p4) * stbi_f2f ( 1.175875602 ) ;
166- let t0 = t0 * stbi_f2f ( 0.298631336 ) ;
167- let t1 = t1 * stbi_f2f ( 2.053119869 ) ;
168- let t2 = t2 * stbi_f2f ( 3.072711026 ) ;
169- let t3 = t3 * stbi_f2f ( 1.501321110 ) ;
170- let p1 = p5 + p1 * stbi_f2f ( -0.899976223 ) ;
171- let p2 = p5 + p2 * stbi_f2f ( -2.562915447 ) ;
172- let p3 = p3 * stbi_f2f ( -1.961570560 ) ;
173- let p4 = p4 * stbi_f2f ( -0.390180644 ) ;
174- let t3 = t3 + p1 + p4;
175- let t2 = t2 + p2 + p3;
176- let t1 = t1 + p2 + p4;
177- let t0 = t0 + p1 + p3;
131+ for ( chunk, output_chunk) in temp. chunks_exact ( 8 ) . zip ( output) {
132+ let chunk = <& [ _ ; 8 ] >:: try_from ( chunk) . unwrap ( ) ;
178133
179134 // constants scaled things up by 1<<12, plus we had 1<<2 from first
180135 // loop, plus horizontal and vertical each scale by sqrt(8) so together
181136 // we've got an extra 1<<3, so 1<<17 total we need to remove.
182137 // so we want to round that, which means adding 0.5 * 1<<17,
183138 // aka 65536. Also, we'll end up with -128 to 127 that we want
184139 // to encode as 0..255 by adding 128, so we'll add that before the shift
185- let x0 = x0 + Wrapping ( 65536 + ( 128 << 17 ) ) ;
186- let x1 = x1 + Wrapping ( 65536 + ( 128 << 17 ) ) ;
187- let x2 = x2 + Wrapping ( 65536 + ( 128 << 17 ) ) ;
188- let x3 = x3 + Wrapping ( 65536 + ( 128 << 17 ) ) ;
189-
190- output[ i * output_linestride] = stbi_clamp ( ( x0 + t3) >> 17 ) ;
191- output[ i * output_linestride + 7 ] = stbi_clamp ( ( x0 - t3) >> 17 ) ;
192- output[ i * output_linestride + 1 ] = stbi_clamp ( ( x1 + t2) >> 17 ) ;
193- output[ i * output_linestride + 6 ] = stbi_clamp ( ( x1 - t2) >> 17 ) ;
194- output[ i * output_linestride + 2 ] = stbi_clamp ( ( x2 + t1) >> 17 ) ;
195- output[ i * output_linestride + 5 ] = stbi_clamp ( ( x2 - t1) >> 17 ) ;
196- output[ i * output_linestride + 3 ] = stbi_clamp ( ( x3 + t0) >> 17 ) ;
197- output[ i * output_linestride + 4 ] = stbi_clamp ( ( x3 - t0) >> 17 ) ;
140+ const X_SCALE : i32 = 65536 + ( 128 << 17 ) ;
141+
142+ // TODO When the minimum rust version supports it
143+ // let [s0, rest @ ..] = chunk;
144+ let ( s0, rest) = chunk. split_first ( ) . unwrap ( ) ;
145+ if * rest == [ Wrapping ( 0 ) ; 7 ] {
146+ let dcterm = stbi_clamp ( ( stbi_fsh ( * s0) + Wrapping ( X_SCALE ) ) >> 17 ) ;
147+ output_chunk[ 0 ] = dcterm;
148+ output_chunk[ 1 ] = dcterm;
149+ output_chunk[ 2 ] = dcterm;
150+ output_chunk[ 3 ] = dcterm;
151+ output_chunk[ 4 ] = dcterm;
152+ output_chunk[ 5 ] = dcterm;
153+ output_chunk[ 6 ] = dcterm;
154+ output_chunk[ 7 ] = dcterm;
155+ } else {
156+ let Kernel {
157+ xs : [ x0, x1, x2, x3] ,
158+ ts : [ t0, t1, t2, t3] ,
159+ } = kernel ( * chunk, X_SCALE ) ;
160+
161+ output_chunk[ 0 ] = stbi_clamp ( ( x0 + t3) >> 17 ) ;
162+ output_chunk[ 7 ] = stbi_clamp ( ( x0 - t3) >> 17 ) ;
163+ output_chunk[ 1 ] = stbi_clamp ( ( x1 + t2) >> 17 ) ;
164+ output_chunk[ 6 ] = stbi_clamp ( ( x1 - t2) >> 17 ) ;
165+ output_chunk[ 2 ] = stbi_clamp ( ( x2 + t1) >> 17 ) ;
166+ output_chunk[ 5 ] = stbi_clamp ( ( x2 - t1) >> 17 ) ;
167+ output_chunk[ 3 ] = stbi_clamp ( ( x3 + t0) >> 17 ) ;
168+ output_chunk[ 4 ] = stbi_clamp ( ( x3 - t0) >> 17 ) ;
169+ }
170+ }
171+ }
172+
173+ struct Kernel {
174+ xs : [ Wrapping < i32 > ; 4 ] ,
175+ ts : [ Wrapping < i32 > ; 4 ] ,
176+ }
177+
178+ #[ inline]
179+ fn kernel_x ( [ s0, s2, s4, s6] : [ Wrapping < i32 > ; 4 ] , x_scale : i32 ) -> [ Wrapping < i32 > ; 4 ] {
180+ // Even `chunk` indicies
181+ let ( t2, t3) ;
182+ {
183+ let p2 = s2;
184+ let p3 = s6;
185+
186+ let p1 = ( p2 + p3) * stbi_f2f ( 0.5411961 ) ;
187+ t2 = p1 + p3 * stbi_f2f ( -1.847759065 ) ;
188+ t3 = p1 + p2 * stbi_f2f ( 0.765366865 ) ;
189+ }
190+
191+ let ( t0, t1) ;
192+ {
193+ let p2 = s0;
194+ let p3 = s4;
195+
196+ t0 = stbi_fsh ( p2 + p3) ;
197+ t1 = stbi_fsh ( p2 - p3) ;
198198 }
199+
200+ let x0 = t0 + t3;
201+ let x3 = t0 - t3;
202+ let x1 = t1 + t2;
203+ let x2 = t1 - t2;
204+
205+ let x_scale = Wrapping ( x_scale) ;
206+
207+ [ x0 + x_scale, x1 + x_scale, x2 + x_scale, x3 + x_scale]
208+ }
209+
210+ #[ inline]
211+ fn kernel_t ( [ s1, s3, s5, s7] : [ Wrapping < i32 > ; 4 ] ) -> [ Wrapping < i32 > ; 4 ] {
212+ // Odd `chunk` indicies
213+ let mut t0 = s7;
214+ let mut t1 = s5;
215+ let mut t2 = s3;
216+ let mut t3 = s1;
217+
218+ let p3 = t0 + t2;
219+ let p4 = t1 + t3;
220+ let p1 = t0 + t3;
221+ let p2 = t1 + t2;
222+ let p5 = ( p3 + p4) * stbi_f2f ( 1.175875602 ) ;
223+
224+ t0 *= stbi_f2f ( 0.298631336 ) ;
225+ t1 *= stbi_f2f ( 2.053119869 ) ;
226+ t2 *= stbi_f2f ( 3.072711026 ) ;
227+ t3 *= stbi_f2f ( 1.501321110 ) ;
228+
229+ let p1 = p5 + p1 * stbi_f2f ( -0.899976223 ) ;
230+ let p2 = p5 + p2 * stbi_f2f ( -2.562915447 ) ;
231+ let p3 = p3 * stbi_f2f ( -1.961570560 ) ;
232+ let p4 = p4 * stbi_f2f ( -0.390180644 ) ;
233+
234+ t3 += p1 + p4;
235+ t2 += p2 + p3;
236+ t1 += p2 + p4;
237+ t0 += p1 + p3;
238+
239+ [ t0, t1, t2, t3]
240+ }
241+
242+ #[ inline]
243+ fn kernel ( [ s0, s1, s2, s3, s4, s5, s6, s7] : [ Wrapping < i32 > ; 8 ] , x_scale : i32 ) -> Kernel {
244+ Kernel {
245+ xs : kernel_x ( [ s0, s2, s4, s6] , x_scale) ,
246+ ts : kernel_t ( [ s1, s3, s5, s7] ) ,
247+ }
248+ }
249+
250+ #[ inline( always) ]
251+ fn dequantize ( c : i16 , q : u16 ) -> Wrapping < i32 > {
252+ Wrapping ( i32:: from ( c) * i32:: from ( q) )
199253}
200254
201255// 4x4 and 2x2 IDCT based on Rakesh Dugad and Narendra Ahuja: "A Fast Scheme for Image Size Change in the Compressed Domain" (2001).
0 commit comments