|
15 | 15 | #include <type_traits> |
16 | 16 |
|
17 | 17 | #include "../types/xsimd_sse4_1_register.hpp" |
| 18 | +#include "./common/xsimd_common_cast.hpp" |
18 | 19 |
|
19 | 20 | namespace xsimd |
20 | 21 | { |
@@ -67,6 +68,38 @@ namespace xsimd |
67 | 68 | } |
68 | 69 | } |
69 | 70 |
|
| 71 | + // upcast |
| 72 | + template <class T, class A> |
| 73 | + XSIMD_INLINE std::array<batch<detail::upcast_t<T>, A>, 2> batch_upcast(batch<T, A> const& x, requires_arch<sse4_1>) noexcept |
| 74 | + { |
| 75 | + __m128i x_shuf = _mm_unpackhi_epi64(x, x); |
| 76 | + __m128i lo, hi; |
| 77 | + XSIMD_IF_CONSTEXPR(sizeof(T) == 4) |
| 78 | + { |
| 79 | + lo = _mm_cvtepi32_epi64(x); |
| 80 | + hi = _mm_cvtepi32_epi64(x_shuf); |
| 81 | + } |
| 82 | + else XSIMD_IF_CONSTEXPR(sizeof(T) == 2) |
| 83 | + { |
| 84 | + lo = _mm_cvtepi16_epi32(x); |
| 85 | + hi = _mm_cvtepi16_epi32(x_shuf); |
| 86 | + } |
| 87 | + else XSIMD_IF_CONSTEXPR(sizeof(T) == 1) |
| 88 | + { |
| 89 | + lo = _mm_cvtepi8_epi16(x); |
| 90 | + hi = _mm_cvtepi8_epi16(x_shuf); |
| 91 | + } |
| 92 | + return { lo, hi }; |
| 93 | + } |
| 94 | + template <class A> |
| 95 | + XSIMD_INLINE std::array<batch<double, A>, 2> batch_upcast(batch<float, A> const& x, requires_arch<sse4_1>) noexcept |
| 96 | + { |
| 97 | + __m128 x_shuf = _mm_unpackhi_ps(x, x); |
| 98 | + __m128d lo = _mm_cvtps_pd(x); |
| 99 | + __m128d hi = _mm_cvtps_pd(x_shuf); |
| 100 | + return { lo, hi }; |
| 101 | + } |
| 102 | + |
70 | 103 | // eq |
71 | 104 | template <class A, class T, class = typename std::enable_if<std::is_integral<T>::value, void>::type> |
72 | 105 | XSIMD_INLINE batch_bool<T, A> eq(batch<T, A> const& self, batch<T, A> const& other, requires_arch<sse4_1>) noexcept |
|
0 commit comments