|
23 | 23 | #include "binary_ops.h" |
24 | 24 | #include "comparison_ops.h" |
25 | 25 |
|
26 | | - |
27 | 26 | static NPY_CASTING |
28 | 27 | quad_comparison_op_resolve_descriptors(PyObject *self, PyArray_DTypeMeta *const dtypes[], |
29 | 28 | PyArray_Descr *const given_descrs[], |
@@ -145,6 +144,119 @@ quad_generic_comp_strided_loop_aligned(PyArrayMethod_Context *context, char *con |
145 | 144 | } |
146 | 145 | return 0; |
147 | 146 | } |
| 147 | +// todo: It'll be better to generate separate templates for aligned and unaligned loops |
| 148 | +// Resolve desc and strided loops for logical reduction (Bool, Quad) => Bool |
| 149 | +static NPY_CASTING |
| 150 | +quad_comparison_reduce_resolve_descriptors(PyObject *self, PyArray_DTypeMeta *const dtypes[], |
| 151 | + PyArray_Descr *const given_descrs[], |
| 152 | + PyArray_Descr *loop_descrs[], |
| 153 | + npy_intp *NPY_UNUSED(view_offset)) |
| 154 | +{ |
| 155 | + QuadPrecDTypeObject *descr_in1 = (QuadPrecDTypeObject *)given_descrs[0]; |
| 156 | + QuadPrecDTypeObject *descr_in2 = (QuadPrecDTypeObject *)given_descrs[1]; |
| 157 | + NPY_CASTING casting = NPY_SAFE_CASTING; |
| 158 | + |
| 159 | + for (int i = 0; i < 2; i++) { |
| 160 | + Py_INCREF(given_descrs[i]); |
| 161 | + loop_descrs[i] = given_descrs[i]; |
| 162 | + } |
| 163 | + |
| 164 | + // Set up output descriptor |
| 165 | + loop_descrs[2] = PyArray_DescrFromType(NPY_BOOL); |
| 166 | + if (!loop_descrs[2]) { |
| 167 | + return (NPY_CASTING)-1; |
| 168 | + } |
| 169 | + return casting; |
| 170 | +} |
| 171 | + |
| 172 | +template <cmp_quad_def sleef_comp, cmp_londouble_def ld_comp> |
| 173 | +int |
| 174 | +quad_reduce_comp_strided_loop_aligned(PyArrayMethod_Context *context, char *const data[], |
| 175 | + npy_intp const dimensions[], npy_intp const strides[], |
| 176 | + NpyAuxData *auxdata) |
| 177 | +{ |
| 178 | + npy_intp N = dimensions[0]; |
| 179 | + char *in1_ptr = data[0]; // bool |
| 180 | + char *in2_ptr = data[1]; // quad |
| 181 | + char *out_ptr = data[2]; // bool |
| 182 | + npy_intp in1_stride = strides[0]; |
| 183 | + npy_intp in2_stride = strides[1]; |
| 184 | + npy_intp out_stride = strides[2]; |
| 185 | + |
| 186 | + QuadPrecDTypeObject *descr = (QuadPrecDTypeObject *)context->descriptors[1]; |
| 187 | + QuadBackendType backend = descr->backend; |
| 188 | + while (N--) { |
| 189 | + npy_bool in1 = *(npy_bool *)in1_ptr; |
| 190 | + quad_value in1_quad; |
| 191 | + quad_value in2; |
| 192 | + |
| 193 | + npy_bool result; |
| 194 | + |
| 195 | + if (backend == BACKEND_SLEEF) { |
| 196 | + in1_quad.sleef_value = Sleef_cast_from_int64q1(in1); |
| 197 | + in2.sleef_value = *(Sleef_quad *)in2_ptr; |
| 198 | + result = sleef_comp(&in1_quad.sleef_value, &in2.sleef_value); |
| 199 | + } |
| 200 | + else { |
| 201 | + in1_quad.longdouble_value = static_cast<long double>(in1); |
| 202 | + in2.longdouble_value = *(long double *)in2_ptr; |
| 203 | + result = ld_comp(&in1_quad.longdouble_value, &in2.longdouble_value); |
| 204 | + } |
| 205 | + |
| 206 | + *(npy_bool *)out_ptr = result; |
| 207 | + |
| 208 | + in1_ptr += in1_stride; |
| 209 | + in2_ptr += in2_stride; |
| 210 | + out_ptr += out_stride; |
| 211 | + } |
| 212 | + return 0; |
| 213 | +} |
| 214 | + |
| 215 | +template <cmp_quad_def sleef_comp, cmp_londouble_def ld_comp> |
| 216 | +int |
| 217 | +quad_reduce_comp_strided_loop_unaligned(PyArrayMethod_Context *context, char *const data[], |
| 218 | + npy_intp const dimensions[], npy_intp const strides[], |
| 219 | + NpyAuxData *auxdata) |
| 220 | +{ |
| 221 | + npy_intp N = dimensions[0]; |
| 222 | + char *in1_ptr = data[0]; // bool |
| 223 | + char *in2_ptr = data[1]; // quad |
| 224 | + char *out_ptr = data[2]; // bool |
| 225 | + npy_intp in1_stride = strides[0]; |
| 226 | + npy_intp in2_stride = strides[1]; |
| 227 | + npy_intp out_stride = strides[2]; |
| 228 | + |
| 229 | + QuadPrecDTypeObject *descr = (QuadPrecDTypeObject *)context->descriptors[1]; |
| 230 | + QuadBackendType backend = descr->backend; |
| 231 | + size_t elem_size = (backend == BACKEND_SLEEF) ? sizeof(Sleef_quad) : sizeof(long double); |
| 232 | + |
| 233 | + npy_bool in1; |
| 234 | + quad_value in1_quad, in2; |
| 235 | + while (N--) { |
| 236 | + memcpy(&in1, in1_ptr, sizeof(npy_bool)); |
| 237 | + if(backend == BACKEND_SLEEF) |
| 238 | + in1_quad.sleef_value = Sleef_cast_from_int64q1(in1); |
| 239 | + else |
| 240 | + in1_quad.longdouble_value = static_cast<long double>(in1); |
| 241 | + memcpy(&in2, in2_ptr, elem_size); |
| 242 | + npy_bool result; |
| 243 | + |
| 244 | + if (backend == BACKEND_SLEEF) { |
| 245 | + result = sleef_comp(&in1_quad.sleef_value, &in2.sleef_value); |
| 246 | + } |
| 247 | + else { |
| 248 | + result = ld_comp(&in1_quad.longdouble_value, &in2.longdouble_value); |
| 249 | + } |
| 250 | + |
| 251 | + memcpy(out_ptr, &result, sizeof(npy_bool)); |
| 252 | + |
| 253 | + in1_ptr += in1_stride; |
| 254 | + in2_ptr += in2_stride; |
| 255 | + out_ptr += out_stride; |
| 256 | + } |
| 257 | + return 0; |
| 258 | +} |
| 259 | + |
148 | 260 |
|
149 | 261 | NPY_NO_EXPORT int |
150 | 262 | comparison_ufunc_promoter(PyUFuncObject *ufunc, PyArray_DTypeMeta *op_dtypes[], |
@@ -194,13 +306,38 @@ create_quad_comparison_ufunc(PyObject *numpy, const char *ufunc_name) |
194 | 306 | return -1; |
195 | 307 | } |
196 | 308 |
|
| 309 | + // registering the reduce methods |
| 310 | + PyArray_DTypeMeta *dtypes_reduce[3] = {&PyArray_BoolDType, &QuadPrecDType, &PyArray_BoolDType}; |
| 311 | + |
| 312 | + PyType_Slot slots_reduce[] = { |
| 313 | + {NPY_METH_resolve_descriptors, (void *)&quad_comparison_reduce_resolve_descriptors}, |
| 314 | + {NPY_METH_strided_loop, |
| 315 | + (void *)&quad_reduce_comp_strided_loop_unaligned<sleef_comp, ld_comp>}, |
| 316 | + {NPY_METH_unaligned_strided_loop, |
| 317 | + (void *)&quad_reduce_comp_strided_loop_unaligned<sleef_comp, ld_comp>}, |
| 318 | + {0, NULL}}; |
| 319 | + |
| 320 | + PyArrayMethod_Spec Spec_reduce = { |
| 321 | + .name = "quad_comp", |
| 322 | + .nin = 2, |
| 323 | + .nout = 1, |
| 324 | + .casting = NPY_SAFE_CASTING, |
| 325 | + .flags = NPY_METH_SUPPORTS_UNALIGNED, |
| 326 | + .dtypes = dtypes_reduce, |
| 327 | + .slots = slots_reduce, |
| 328 | + }; |
| 329 | + |
| 330 | + if (PyUFunc_AddLoopFromSpec(ufunc, &Spec_reduce) < 0) { |
| 331 | + return -1; |
| 332 | + } |
| 333 | + |
197 | 334 | PyObject *promoter_capsule = |
198 | 335 | PyCapsule_New((void *)&comparison_ufunc_promoter, "numpy._ufunc_promoter", NULL); |
199 | 336 | if (promoter_capsule == NULL) { |
200 | 337 | return -1; |
201 | 338 | } |
202 | 339 |
|
203 | | - PyObject *DTypes = PyTuple_Pack(3, Py_None, Py_None, Py_None); |
| 340 | + PyObject *DTypes = PyTuple_Pack(3, &PyArrayDescr_Type, &PyArrayDescr_Type, &PyArray_BoolDType); |
204 | 341 | if (DTypes == 0) { |
205 | 342 | Py_DECREF(promoter_capsule); |
206 | 343 | return -1; |
|
0 commit comments