@@ -92,7 +92,11 @@ namespace
9292 {
9393 typedef typename NPPTypeTraits<DEPTH>::npp_type npp_type;
9494
95+ #if USE_NPP_STREAM_CTX
96+ typedef NppStatus (*func_t )(const npp_type* pSrc1, int nSrc1Step, const npp_type* pConstants, npp_type* pDst, int nDstStep, NppiSize oSizeROI, NppStreamContext ctx);
97+ #else
9598 typedef NppStatus (*func_t )(const npp_type* pSrc1, int nSrc1Step, const npp_type* pConstants, npp_type* pDst, int nDstStep, NppiSize oSizeROI);
99+ #endif
96100 };
97101
98102 template <int DEPTH, int cn, typename NppBitwiseCFunc<DEPTH, cn>::func_t func> struct NppBitwiseC
@@ -116,7 +120,11 @@ namespace
116120 cv::saturate_cast<npp_type>(value[3 ])
117121 };
118122
123+ #if USE_NPP_STREAM_CTX
124+ nppSafeCall (func (src.ptr <npp_type>(), static_cast <int >(src.step ), pConstants, dst.ptr <npp_type>(), static_cast <int >(dst.step ), oSizeROI, h));
125+ #else
119126 nppSafeCall ( func (src.ptr <npp_type>(), static_cast <int >(src.step ), pConstants, dst.ptr <npp_type>(), static_cast <int >(dst.step ), oSizeROI) );
127+ #endif
120128
121129 if (stream == 0 )
122130 CV_CUDEV_SAFE_CALL ( cudaDeviceSynchronize () );
@@ -131,13 +139,39 @@ void bitScalar(const GpuMat& src, cv::Scalar value, bool, GpuMat& dst, const Gpu
131139 typedef void (*func_t )(const GpuMat& src, cv::Scalar value, GpuMat& dst, Stream& stream);
132140 static const func_t funcs[3 ][6 ][4 ] =
133141 {
142+ #if USE_NPP_STREAM_CTX
143+ {
144+ {BitScalar<uchar, bitScalarOp<bit_and, uchar> >::call , 0 , NppBitwiseC<CV_8U , 3 , nppiAndC_8u_C3R_Ctx >::call, BitScalar4< bitScalarOp<bit_and, uint> >::call},
145+ {BitScalar<uchar, bitScalarOp<bit_and, uchar> >::call , 0 , NppBitwiseC<CV_8U , 3 , nppiAndC_8u_C3R_Ctx >::call, BitScalar4< bitScalarOp<bit_and, uint> >::call},
146+ {BitScalar<ushort, bitScalarOp<bit_and, ushort> >::call, 0 , NppBitwiseC<CV_16U, 3 , nppiAndC_16u_C3R_Ctx>::call, NppBitwiseC<CV_16U, 4 , nppiAndC_16u_C4R_Ctx>::call},
147+ {BitScalar<ushort, bitScalarOp<bit_and, ushort> >::call, 0 , NppBitwiseC<CV_16U, 3 , nppiAndC_16u_C3R_Ctx>::call, NppBitwiseC<CV_16U, 4 , nppiAndC_16u_C4R_Ctx>::call},
148+ {BitScalar<uint, bitScalarOp<bit_and, uint> >::call , 0 , NppBitwiseC<CV_32S, 3 , nppiAndC_32s_C3R_Ctx>::call, NppBitwiseC<CV_32S, 4 , nppiAndC_32s_C4R_Ctx>::call},
149+ {BitScalar<uint, bitScalarOp<bit_and, uint> >::call , 0 , NppBitwiseC<CV_32S, 3 , nppiAndC_32s_C3R_Ctx>::call, NppBitwiseC<CV_32S, 4 , nppiAndC_32s_C4R_Ctx>::call}
150+ },
134151 {
135- {BitScalar<uchar, bitScalarOp<bit_and, uchar> >::call , 0 , NppBitwiseC<CV_8U , 3 , nppiAndC_8u_C3R >::call, BitScalar4< bitScalarOp<bit_and, uint> >::call},
136- {BitScalar<uchar, bitScalarOp<bit_and, uchar> >::call , 0 , NppBitwiseC<CV_8U , 3 , nppiAndC_8u_C3R >::call, BitScalar4< bitScalarOp<bit_and, uint> >::call},
137- {BitScalar<ushort, bitScalarOp<bit_and, ushort> >::call, 0 , NppBitwiseC<CV_16U, 3 , nppiAndC_16u_C3R>::call, NppBitwiseC<CV_16U, 4 , nppiAndC_16u_C4R>::call},
138- {BitScalar<ushort, bitScalarOp<bit_and, ushort> >::call, 0 , NppBitwiseC<CV_16U, 3 , nppiAndC_16u_C3R>::call, NppBitwiseC<CV_16U, 4 , nppiAndC_16u_C4R>::call},
139- {BitScalar<uint, bitScalarOp<bit_and, uint> >::call , 0 , NppBitwiseC<CV_32S, 3 , nppiAndC_32s_C3R>::call, NppBitwiseC<CV_32S, 4 , nppiAndC_32s_C4R>::call},
140- {BitScalar<uint, bitScalarOp<bit_and, uint> >::call , 0 , NppBitwiseC<CV_32S, 3 , nppiAndC_32s_C3R>::call, NppBitwiseC<CV_32S, 4 , nppiAndC_32s_C4R>::call}
152+ {BitScalar<uchar, bitScalarOp<bit_or, uchar> >::call , 0 , NppBitwiseC<CV_8U , 3 , nppiOrC_8u_C3R_Ctx >::call, BitScalar4< bitScalarOp<bit_or, uint> >::call},
153+ {BitScalar<uchar, bitScalarOp<bit_or, uchar> >::call , 0 , NppBitwiseC<CV_8U , 3 , nppiOrC_8u_C3R_Ctx >::call, BitScalar4< bitScalarOp<bit_or, uint> >::call},
154+ {BitScalar<ushort, bitScalarOp<bit_or, ushort> >::call, 0 , NppBitwiseC<CV_16U, 3 , nppiOrC_16u_C3R_Ctx>::call, NppBitwiseC<CV_16U, 4 , nppiOrC_16u_C4R_Ctx>::call},
155+ {BitScalar<ushort, bitScalarOp<bit_or, ushort> >::call, 0 , NppBitwiseC<CV_16U, 3 , nppiOrC_16u_C3R_Ctx>::call, NppBitwiseC<CV_16U, 4 , nppiOrC_16u_C4R_Ctx>::call},
156+ {BitScalar<uint, bitScalarOp<bit_or, uint> >::call , 0 , NppBitwiseC<CV_32S, 3 , nppiOrC_32s_C3R_Ctx>::call, NppBitwiseC<CV_32S, 4 , nppiOrC_32s_C4R_Ctx>::call},
157+ {BitScalar<uint, bitScalarOp<bit_or, uint> >::call , 0 , NppBitwiseC<CV_32S, 3 , nppiOrC_32s_C3R_Ctx>::call, NppBitwiseC<CV_32S, 4 , nppiOrC_32s_C4R_Ctx>::call}
158+ },
159+ {
160+ {BitScalar<uchar, bitScalarOp<bit_xor, uchar> >::call , 0 , NppBitwiseC<CV_8U , 3 , nppiXorC_8u_C3R_Ctx >::call, BitScalar4< bitScalarOp<bit_xor, uint> >::call},
161+ {BitScalar<uchar, bitScalarOp<bit_xor, uchar> >::call , 0 , NppBitwiseC<CV_8U , 3 , nppiXorC_8u_C3R_Ctx >::call, BitScalar4< bitScalarOp<bit_xor, uint> >::call},
162+ {BitScalar<ushort, bitScalarOp<bit_xor, ushort> >::call, 0 , NppBitwiseC<CV_16U, 3 , nppiXorC_16u_C3R_Ctx>::call, NppBitwiseC<CV_16U, 4 , nppiXorC_16u_C4R_Ctx>::call},
163+ {BitScalar<ushort, bitScalarOp<bit_xor, ushort> >::call, 0 , NppBitwiseC<CV_16U, 3 , nppiXorC_16u_C3R_Ctx>::call, NppBitwiseC<CV_16U, 4 , nppiXorC_16u_C4R_Ctx>::call},
164+ {BitScalar<uint, bitScalarOp<bit_xor, uint> >::call , 0 , NppBitwiseC<CV_32S, 3 , nppiXorC_32s_C3R_Ctx>::call, NppBitwiseC<CV_32S, 4 , nppiXorC_32s_C4R_Ctx>::call},
165+ {BitScalar<uint, bitScalarOp<bit_xor, uint> >::call , 0 , NppBitwiseC<CV_32S, 3 , nppiXorC_32s_C3R_Ctx>::call, NppBitwiseC<CV_32S, 4 , nppiXorC_32s_C4R_Ctx>::call}
166+ }
167+ #else
168+ {
169+ { BitScalar<uchar, bitScalarOp<bit_and, uchar> >::call, 0 , NppBitwiseC<CV_8U, 3 , nppiAndC_8u_C3R >::call, BitScalar4< bitScalarOp<bit_and, uint> >::call },
170+ { BitScalar<uchar, bitScalarOp<bit_and, uchar> >::call , 0 , NppBitwiseC<CV_8U , 3 , nppiAndC_8u_C3R >::call, BitScalar4< bitScalarOp<bit_and, uint> >::call },
171+ { BitScalar<ushort, bitScalarOp<bit_and, ushort> >::call, 0 , NppBitwiseC<CV_16U, 3 , nppiAndC_16u_C3R>::call, NppBitwiseC<CV_16U, 4 , nppiAndC_16u_C4R>::call },
172+ { BitScalar<ushort, bitScalarOp<bit_and, ushort> >::call, 0 , NppBitwiseC<CV_16U, 3 , nppiAndC_16u_C3R>::call, NppBitwiseC<CV_16U, 4 , nppiAndC_16u_C4R>::call },
173+ { BitScalar<uint, bitScalarOp<bit_and, uint> >::call , 0 , NppBitwiseC<CV_32S, 3 , nppiAndC_32s_C3R>::call, NppBitwiseC<CV_32S, 4 , nppiAndC_32s_C4R>::call },
174+ { BitScalar<uint, bitScalarOp<bit_and, uint> >::call , 0 , NppBitwiseC<CV_32S, 3 , nppiAndC_32s_C3R>::call, NppBitwiseC<CV_32S, 4 , nppiAndC_32s_C4R>::call }
141175 },
142176 {
143177 {BitScalar<uchar, bitScalarOp<bit_or, uchar> >::call , 0 , NppBitwiseC<CV_8U , 3 , nppiOrC_8u_C3R >::call, BitScalar4< bitScalarOp<bit_or, uint> >::call},
@@ -155,6 +189,7 @@ void bitScalar(const GpuMat& src, cv::Scalar value, bool, GpuMat& dst, const Gpu
155189 {BitScalar<uint, bitScalarOp<bit_xor, uint> >::call , 0 , NppBitwiseC<CV_32S, 3 , nppiXorC_32s_C3R>::call, NppBitwiseC<CV_32S, 4 , nppiXorC_32s_C4R>::call},
156190 {BitScalar<uint, bitScalarOp<bit_xor, uint> >::call , 0 , NppBitwiseC<CV_32S, 3 , nppiXorC_32s_C3R>::call, NppBitwiseC<CV_32S, 4 , nppiXorC_32s_C4R>::call}
157191 }
192+ #endif
158193 };
159194
160195 const int depth = src.depth ();
0 commit comments