Skip to content

Commit ce06772

Browse files
committed
Better MDCT/combfilter accuracy with MULT32_32_P31
1 parent c8eb062 commit ce06772

File tree

5 files changed

+22
-3
lines changed

5 files changed

+22
-3
lines changed

celt/_kiss_fft_guts.h

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -55,8 +55,8 @@
5555
#define SAMP_MIN -SAMP_MAX
5656

5757
#ifdef ENABLE_QEXT
58-
# define S_MUL(a,b) MULT32_32_Q31(b, a)
59-
# define S_MUL2(a,b) MULT32_32_Q31(b, a)
58+
# define S_MUL(a,b) MULT32_32_P31(b, a)
59+
# define S_MUL2(a,b) MULT32_32_P31(b, a)
6060
#else
6161
# define S_MUL(a,b) MULT16_32_Q15(b, a)
6262
# define S_MUL2(a,b) MULT16_32_Q16(b, a)

celt/arch.h

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -184,7 +184,7 @@ typedef opus_val16 opus_res;
184184
#ifdef ENABLE_QEXT
185185
typedef opus_val32 celt_coef;
186186
#define COEF_ONE Q31ONE
187-
#define MULT_COEF_32(a, b) MULT32_32_Q31(a,b)
187+
#define MULT_COEF_32(a, b) MULT32_32_P31(a,b)
188188
#define MAC_COEF_32_ARM(c, a, b) ADD32((c), MULT32_32_Q32(a,b))
189189
#define MULT_COEF(a, b) MULT32_32_Q31(a,b)
190190
#define MULT_COEF_TAPS(a, b) SHL32(MULT16_16(a,b), 1)
@@ -346,6 +346,7 @@ static OPUS_INLINE int celt_isnan(float x)
346346

347347
#define MULT32_32_Q16(a,b) ((a)*(b))
348348
#define MULT32_32_Q31(a,b) ((a)*(b))
349+
#define MULT32_32_P31(a,b) ((a)*(b))
349350

350351
#define MAC16_32_Q15(c,a,b) ((c)+(a)*(b))
351352
#define MAC16_32_Q16(c,a,b) ((c)+(a)*(b))

celt/celt.c

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -176,6 +176,10 @@ void comb_filter_const_c(opus_val32 *y, opus_val32 *x, int T, int N,
176176
+ MULT_COEF_32(g10,x2)
177177
+ MULT_COEF_32(g11,ADD32(x1,x3))
178178
+ MULT_COEF_32(g12,ADD32(x0,x4));
179+
#ifdef FIXED_POINT
180+
/* A bit of bias seems to help here. */
181+
y[i] = SUB32(y[i], 1);
182+
#endif
179183
y[i] = SATURATE(y[i], SIG_SAT);
180184
x4=x3;
181185
x3=x2;
@@ -237,6 +241,10 @@ void comb_filter(opus_val32 *y, opus_val32 *x, int T0, int T1, int N,
237241
+ MULT_COEF_32(MULT_COEF(f,g10),x2)
238242
+ MULT_COEF_32(MULT_COEF(f,g11),ADD32(x1,x3))
239243
+ MULT_COEF_32(MULT_COEF(f,g12),ADD32(x0,x4));
244+
#ifdef FIXED_POINT
245+
/* A bit of bias seems to help here. */
246+
y[i] = SUB32(y[i], 3);
247+
#endif
240248
y[i] = SATURATE(y[i], SIG_SAT);
241249
x4=x3;
242250
x3=x2;

celt/fixed_debug.h

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -41,8 +41,10 @@ OPUS_EXPORT opus_int64 celt_mips=0;
4141
extern opus_int64 celt_mips;
4242
#endif
4343

44+
#define MULT16_16U(a,b) ((opus_uint32)(a)*(opus_uint32)(b))
4445
#define MULT16_16SU(a,b) ((opus_val32)(opus_val16)(a)*(opus_val32)(opus_uint16)(b))
4546
#define MULT32_32_Q31(a,b) ADD32(ADD32(SHL32(MULT16_16(SHR32((a),16),SHR((b),16)),1), SHR32(MULT16_16SU(SHR32((a),16),((b)&0x0000ffff)),15)), SHR32(MULT16_16SU(SHR32((b),16),((a)&0x0000ffff)),15))
47+
#define MULT32_32_P31(a,b) ADD32(SHL32(MULT16_16(SHR((a),16),SHR((b),16)),1), SHR32(128+(opus_int32)(MULT16_16U(((a)&0x0000ffff),((b)&0x0000ffff))>>(16+7)) + SHR32(MULT16_16SU(SHR((a),16),((b)&0x0000ffff)),7) + SHR32(MULT16_16SU(SHR((b),16),((a)&0x0000ffff)),7), 8) )
4648
#define MULT32_32_Q32(a,b) ADD32(ADD32(MULT16_16(SHR((a),16),SHR((b),16)), SHR(MULT16_16SU(SHR((a),16),((b)&0x0000ffff)),16)), SHR(MULT16_16SU(SHR((b),16),((a)&0x0000ffff)),16))
4749

4850
/** 16x32 multiplication, followed by a 16-bit shift right. Results fits in 32 bits */

celt/fixed_generic.h

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -71,6 +71,14 @@
7171
#define MULT32_32_Q31(a,b) ADD32(ADD32(SHL(MULT16_16(SHR((a),16),SHR((b),16)),1), SHR(MULT16_16SU(SHR((a),16),((b)&0x0000ffff)),15)), SHR(MULT16_16SU(SHR((b),16),((a)&0x0000ffff)),15))
7272
#endif
7373

74+
/** 32x32 multiplication, followed by a 31-bit shift right (with rounding). Results fits in 32 bits */
75+
#if OPUS_FAST_INT64
76+
#define MULT32_32_P31(a,b) ((opus_val32)SHR(1073741824+(opus_int64)(a)*(opus_int64)(b),31))
77+
#else
78+
#define MULT16_16U(a,b) ((opus_uint32)(a)*(opus_uint32)(b))
79+
#define MULT32_32_P31(a,b) ADD32(SHL(MULT16_16(SHR((a),16),SHR((b),16)),1), SHR32(128+(opus_int32)SHR(MULT16_16U(((a)&0x0000ffff),((b)&0x0000ffff)),16+7) + SHR32(MULT16_16SU(SHR((a),16),((b)&0x0000ffff)),7) + SHR32(MULT16_16SU(SHR((b),16),((a)&0x0000ffff)),7), 8) )
80+
#endif
81+
7482
/** 32x32 multiplication, followed by a 32-bit shift right. Results fits in 32 bits */
7583
#if OPUS_FAST_INT64
7684
#define MULT32_32_Q32(a,b) ((opus_val32)SHR((opus_int64)(a)*(opus_int64)(b),32))

0 commit comments

Comments
 (0)