@@ -85,26 +85,24 @@ const Ncv32u NUM_SCAN_THREADS = 256;
8585const Ncv32u LOG2_NUM_SCAN_THREADS = 8 ;
8686
8787
88- template <class T_in , class T_out >
88+ template <class T_in , class T_out , bool tbDoSqr >
8989struct _scanElemOp
9090{
91- template <bool tbDoSqr>
92- static inline __host__ __device__ T_out scanElemOp (T_in elem)
93- {
94- return scanElemOp ( elem, Int2Type<(int )tbDoSqr>() );
95- }
96-
97- private:
98-
99- template <int v> struct Int2Type { enum { value = v }; };
91+ static __host__ __device__ T_out scanElemOp (T_in elem);
92+ };
10093
101- static inline __host__ __device__ T_out scanElemOp (T_in elem, Int2Type<0 >)
102- {
103- return (T_out)elem;
94+ template <class T_in , class T_out >
95+ struct _scanElemOp <T_in, T_out, false >
96+ {
97+ static inline __host__ __device__ T_out scanElemOp (T_in elem) {
98+ return (T_out)(elem);
10499 }
100+ };
105101
106- static inline __host__ __device__ T_out scanElemOp (T_in elem, Int2Type<1 >)
107- {
102+ template <class T_in , class T_out >
103+ struct _scanElemOp <T_in, T_out, true >
104+ {
105+ static inline __host__ __device__ T_out scanElemOp (T_in elem) {
108106 return (T_out)(elem*elem);
109107 }
110108};
@@ -177,15 +175,15 @@ __global__ void scanRows(cv::cudev::TexturePtr<Ncv8u> tex8u, T_in *d_src, Ncv32u
177175 Ncv32u curElemOffs = offsetX + threadIdx .x ;
178176 T_out curScanElem;
179177
180- T_in curElem;
178+ T_in curElem = 0 ;
181179 T_out curElemMod;
182180
183181 if (curElemOffs < srcWidth)
184182 {
185183 // load elements
186184 curElem = readElem<T_in>(tex8u, d_src, texOffs, srcStride, curElemOffs);
187185 }
188- curElemMod = _scanElemOp<T_in, T_out>::scanElemOp<tbDoSqr> (curElem);
186+ curElemMod = _scanElemOp<T_in, T_out, tbDoSqr >::scanElemOp (curElem);
189187
190188 // inclusive scan
191189 curScanElem = cv::cudev::blockScanInclusive<NUM_SCAN_THREADS>(curElemMod, shmem, threadIdx .x );
0 commit comments