|
20 | 20 |
|
21 | 21 | #include "../types/xsimd_neon_register.hpp" |
22 | 22 | #include "../types/xsimd_utils.hpp" |
| 23 | +#include "./common/xsimd_common_cast.hpp" |
23 | 24 |
|
24 | 25 | // Wrap intrinsics so we can pass them as function pointers |
25 | 26 | // - OP: intrinsics name prefix, e.g., vorrq |
@@ -3211,6 +3212,41 @@ namespace xsimd |
3211 | 3212 | { |
3212 | 3213 | return vreinterpretq_f32_u32(swizzle(batch<uint32_t, A>(vreinterpretq_u32_f32(self)), mask, A {})); |
3213 | 3214 | } |
| 3215 | + |
| 3216 | + /********* |
| 3217 | + * widen * |
| 3218 | + *********/ |
| 3219 | + template <class A, class T, detail::enable_sized_signed_t<T, 1> = 0> |
| 3220 | + XSIMD_INLINE std::array<batch<widen_t<T>, A>, 2> widen(batch<T, A> const& x, requires_arch<neon>) noexcept |
| 3221 | + { |
| 3222 | + return { batch<widen_t<T>, A>(vaddl_s8(vget_low_s8(x), vdup_n_s8(0))), batch<widen_t<T>, A>(vaddl_s8(vget_high_s8(x), vdup_n_s8(0))) }; |
| 3223 | + } |
| 3224 | + template <class A, class T, detail::enable_sized_unsigned_t<T, 1> = 0> |
| 3225 | + XSIMD_INLINE std::array<batch<widen_t<T>, A>, 2> widen(batch<T, A> const& x, requires_arch<neon>) noexcept |
| 3226 | + { |
| 3227 | + return { batch<widen_t<T>, A>(vaddl_u8(vget_low_u8(x), vdup_n_u8(0))), batch<widen_t<T>, A>(vaddl_u8(vget_high_u8(x), vdup_n_u8(0))) }; |
| 3228 | + } |
| 3229 | + template <class A, class T, detail::enable_sized_signed_t<T, 2> = 0> |
| 3230 | + XSIMD_INLINE std::array<batch<widen_t<T>, A>, 2> widen(batch<T, A> const& x, requires_arch<neon>) noexcept |
| 3231 | + { |
| 3232 | + return { batch<widen_t<T>, A>(vaddl_s16(vget_low_s16(x), vdup_n_s16(0))), batch<widen_t<T>, A>(vaddl_s16(vget_high_s16(x), vdup_n_s16(0))) }; |
| 3233 | + } |
| 3234 | + template <class A, class T, detail::enable_sized_unsigned_t<T, 2> = 0> |
| 3235 | + XSIMD_INLINE std::array<batch<widen_t<T>, A>, 2> widen(batch<T, A> const& x, requires_arch<neon>) noexcept |
| 3236 | + { |
| 3237 | + return { batch<widen_t<T>, A>(vaddl_u16(vget_low_u16(x), vdup_n_u16(0))), batch<widen_t<T>, A>(vaddl_u16(vget_high_u16(x), vdup_n_u16(0))) }; |
| 3238 | + } |
| 3239 | + template <class A, class T, detail::enable_sized_signed_t<T, 4> = 0> |
| 3240 | + XSIMD_INLINE std::array<batch<widen_t<T>, A>, 2> widen(batch<T, A> const& x, requires_arch<neon>) noexcept |
| 3241 | + { |
| 3242 | + return { batch<widen_t<T>, A>(vaddl_s32(vget_low_s32(x), vdup_n_s32(0))), batch<widen_t<T>, A>(vaddl_s32(vget_high_s32(x), vdup_n_s32(0))) }; |
| 3243 | + } |
| 3244 | + template <class A, class T, detail::enable_sized_unsigned_t<T, 4> = 0> |
| 3245 | + XSIMD_INLINE std::array<batch<widen_t<T>, A>, 2> widen(batch<T, A> const& x, requires_arch<neon>) noexcept |
| 3246 | + { |
| 3247 | + return { batch<widen_t<T>, A>(vaddl_u32(vget_low_u32(x), vdup_n_u32(0))), batch<widen_t<T>, A>(vaddl_u32(vget_high_u32(x), vdup_n_u32(0))) }; |
| 3248 | + } |
| 3249 | + |
3214 | 3250 | } |
3215 | 3251 |
|
3216 | 3252 | } |
|
0 commit comments