@@ -105,102 +105,69 @@ static inline v_float32 simd_cubicHermite(const v_float32 &v_A, const v_float32
105105}
106106#endif
107107
108- static void cubicInterpolate (const Mat1f & src, uint32_t dstlen, Mat1f & dst, uint32_t srclen )
108+ static void cubicInterpolate (const float * src/* [srclen] */ , uint32_t srclen, float * dst/* [dstlen] */ , uint32_t dstlen )
109109{
110- Mat1f tmp (Size (srclen + 3U , 1U ));
111- tmp.at <float >(0 ) = src.at <float >(0 );
112-
113- #if (CV_SIMD || CV_SIMD_SCALABLE)
114- v_float32 v_reg = vx_setall_f32 (src.at <float >(srclen - 1U ));
115- vx_store (tmp.ptr <float >(0 ) + (srclen - 1U ), v_reg);
116- #else // scalar version
117- tmp.at <float >(srclen + 1U ) = src.at <float >(srclen - 1U );
118- tmp.at <float >(srclen + 2U ) = src.at <float >(srclen - 1U );
119- #endif
110+ const int srclen_1 = (int )srclen - 1 ;
120111
121112 uint32_t i = 0U ;
122113
114+ const float dstToSrcScale = 1 .0f / (float )(dstlen - 1U ) * (float )srclen;
123115#if (CV_SIMD || CV_SIMD_SCALABLE)
124- uint32_t len_sub_vfloatStep = (uint32_t )std::max ((int64_t )srclen - (int64_t )v_float32_width, (int64_t )0 );
125- for (; i < len_sub_vfloatStep; i+= v_float32_width)
126- {
127- v_float32 v_copy = vx_load (src.ptr <float >(0 ) + i);
128- vx_store (tmp.ptr <float >(0 ) + (i + 1U ), v_copy);
129- }
130- #endif
131-
132- // if the tail exists or scalar version
133- for (; i < srclen; ++i)
134- {
135- tmp.at <float >(i + 1U ) = src.at <float >(i);
136- }
137-
138- i = 0U ;
116+ const v_float32 v_dst2src_scale = vx_setall_f32 (dstToSrcScale);
117+ const v_float32 v_half = vx_setall_f32 (0 .5f );
139118
140- #if (CV_SIMD || CV_SIMD_SCALABLE)
141119 int ptr_x_int[v_float32_max_width];
142- uint32_t j;
143-
144- v_float32 v_dstlen_sub_1 = vx_setall_f32 ((float )(dstlen - 1U ));
145- v_float32 v_one = vx_setall_f32 (1 .0f );
146- v_float32 v_x_start = v_div (v_one, v_dstlen_sub_1);
147- v_float32 v_u = vx_setall_f32 ((float )srclen);
148- v_float32 v_half = vx_setall_f32 (0 .5f );
149-
150- len_sub_vfloatStep = (uint32_t )std::max ((int64_t )dstlen - (int64_t )v_float32_width, (int64_t )0 );
151- for (; i < v_float32_width; ++i)
120+ for (unsigned j = 0 ; j < v_float32_width; ++j)
152121 {
153- ptr_x_int[i ] = (int )i ;
122+ ptr_x_int[j ] = (int )j ;
154123 }
124+ const v_float32 v_sequence = v_cvt_f32 (vx_load (ptr_x_int));
155125
156- float ptr_for_cubicHermite[v_float32_max_width];
157- v_float32 v_sequence = v_cvt_f32 (vx_load (ptr_x_int));
158- for (i = 0U ; i < len_sub_vfloatStep; i+= v_float32_width)
126+ for (i = 0U ; i <= dstlen - v_float32_width; i+= v_float32_width)
159127 {
160128 v_float32 v_reg_i = v_add (vx_setall_f32 ((float )i), v_sequence);
161129
162- v_float32 v_x = v_sub (v_mul (v_x_start, v_reg_i, v_u ), v_half);
130+ v_float32 v_x = v_sub (v_mul (v_reg_i, v_dst2src_scale ), v_half);
163131
164132 v_int32 v_x_int = v_trunc (v_x);
165133 v_float32 v_x_fract = v_sub (v_x, v_cvt_f32 (v_floor (v_x)));
166134
167135 vx_store (ptr_x_int, v_x_int);
168136
169- for (j = 0U ; j < v_float32_width; ++j)
170- ptr_for_cubicHermite[j] = *(tmp.ptr <float >(0 ) + (ptr_x_int[j] - 1 ));
171- v_float32 v_x_int_add_A = vx_load (ptr_for_cubicHermite);
172-
173- for (j = 0U ; j < v_float32_width; ++j)
174- ptr_for_cubicHermite[j] = *(tmp.ptr <float >(0 ) + (ptr_x_int[j]));
175- v_float32 v_x_int_add_B = vx_load (ptr_for_cubicHermite);
176-
177- for (j = 0U ; j < v_float32_width; ++j)
178- ptr_for_cubicHermite[j] = *(tmp.ptr <float >(0 ) + (ptr_x_int[j] + 1 ));
179- v_float32 v_x_int_add_C = vx_load (ptr_for_cubicHermite);
180-
181- for (j = 0U ; j < v_float32_width; ++j)
182- ptr_for_cubicHermite[j] = *(tmp.ptr <float >(0 ) + (ptr_x_int[j] + 2 ));
183- v_float32 v_x_int_add_D = vx_load (ptr_for_cubicHermite);
137+ float ptr_for_cubicHermiteA[v_float32_max_width];
138+ float ptr_for_cubicHermiteB[v_float32_max_width];
139+ float ptr_for_cubicHermiteC[v_float32_max_width];
140+ float ptr_for_cubicHermiteD[v_float32_max_width];
184141
142+ for (unsigned j = 0U ; j < v_float32_width; ++j)
143+ {
144+ int src_offset = ptr_x_int[j];
145+ ptr_for_cubicHermiteA[j] = src[std::min (std::max (0 , src_offset - 1 ), srclen_1)];
146+ ptr_for_cubicHermiteB[j] = src[std::min (std::max (0 , src_offset + 0 ), srclen_1)];
147+ ptr_for_cubicHermiteC[j] = src[std::min (std::max (0 , src_offset + 1 ), srclen_1)];
148+ ptr_for_cubicHermiteD[j] = src[std::min (std::max (0 , src_offset + 2 ), srclen_1)];
149+ }
150+ v_float32 v_x_int_add_A = vx_load (ptr_for_cubicHermiteA);
151+ v_float32 v_x_int_add_B = vx_load (ptr_for_cubicHermiteB);
152+ v_float32 v_x_int_add_C = vx_load (ptr_for_cubicHermiteC);
153+ v_float32 v_x_int_add_D = vx_load (ptr_for_cubicHermiteD);
185154
186- vx_store (dst. ptr < float >( 0 ) + i , simd_cubicHermite (v_x_int_add_A, v_x_int_add_B, v_x_int_add_C, v_x_int_add_D, v_x_fract));
155+ vx_store (& dst[i] , simd_cubicHermite (v_x_int_add_A, v_x_int_add_B, v_x_int_add_C, v_x_int_add_D, v_x_fract));
187156 }
188157#endif
189158
190159 // if the tail exists or scalar version
191- float *ptr = tmp.ptr <float >(0 ) + 1U ;
192- float lenScale = 1 .0f / (float )(dstlen - 1U );
193- float U, X, xfract;
194- int xint;
195160 for (; i < dstlen; ++i)
196161 {
197- U = (float )i * lenScale;
198- X = (U * (float )srclen) - 0 .5f ;
199- xfract = X - floor (X);
200- xint = (int )X;
201- dst.at <float >(i) = scal_cubicHermite (ptr[xint - 1 ], ptr[xint], ptr[xint + 1 ], ptr[xint + 2 ], xfract);
162+ float X = (float )i * dstToSrcScale - 0 .5f ;
163+ float xfract = X - floor (X);
164+ int xint = (int )X;
165+ float cubicHermiteA = src[std::min (std::max (0 , xint - 1 ), srclen_1)];
166+ float cubicHermiteB = src[std::min (std::max (0 , xint + 0 ), srclen_1)];
167+ float cubicHermiteC = src[std::min (std::max (0 , xint + 1 ), srclen_1)];
168+ float cubicHermiteD = src[std::min (std::max (0 , xint + 2 ), srclen_1)];
169+ dst[i] = scal_cubicHermite (cubicHermiteA, cubicHermiteB, cubicHermiteC, cubicHermiteD, xfract);
202170 }
203-
204171}
205172
206173static void fir_f32 (const float *pSrc, float *pDst,
@@ -332,7 +299,7 @@ static void fir_f32(const float *pSrc, float *pDst,
332299}
333300
334301void resampleSignal (InputArray inputSignal, OutputArray outputSignal,
335- const int inFreq, const int outFreq)
302+ const int inFreq, const int outFreq)
336303{
337304 CV_TRACE_FUNCTION ();
338305 CV_Assert (!inputSignal.empty ());
@@ -343,16 +310,18 @@ void resampleSignal(InputArray inputSignal, OutputArray outputSignal,
343310 inputSignal.copyTo (outputSignal);
344311 return ;
345312 }
346- uint32_t filtLen = 33U ;
347- float beta = 3 .395f ;
348- std::vector<float > filt_window (filtLen, 0 .f );
349- init_filter (beta, filtLen, filt_window.data ());
350313 float ratio = (float )outFreq / float (inFreq);
351314 Mat1f inMat = inputSignal.getMat ();
352- Mat1f outMat = Mat1f (Size (cvFloor (inMat.cols * ratio), 1 ));
353- cubicInterpolate (inMat, outMat.cols , outMat, inMat.cols );
315+ outputSignal.create (Size (cvFloor (inMat.cols * ratio), 1 ), CV_32FC1);
316+ Mat1f outMat = outputSignal.getMat ();
317+ cubicInterpolate (inMat.ptr <float >(0 ), inMat.cols , outMat.ptr <float >(0 ), outMat.cols );
354318 if (inFreq < 2 * outFreq)
355319 {
320+ uint32_t filtLen = 33U ;
321+ float beta = 3 .395f ;
322+ std::vector<float > filt_window (filtLen, 0 .f );
323+ init_filter (beta, filtLen, filt_window.data ());
324+
356325 std::vector<float > dlyl (filtLen * 2 - 1 , 0 .f );
357326 std::vector<float > ptmp (outMat.cols + 2 * filtLen, 0 .);
358327
@@ -367,7 +336,6 @@ void resampleSignal(InputArray inputSignal, OutputArray outputSignal,
367336 outMat.at <float >(i - filtLen) = ptmp2[i + cvFloor ((float )filtLen / 2 .f )];
368337 }
369338 }
370- outputSignal.assign (std::move (outMat));
371339}
372340
373341
0 commit comments