Skip to content

Commit 9e19732

Browse files
Extend support of batch_cast<...> to upcasting to a type twice as big
Fix #1179
1 parent cbf693c commit 9e19732

File tree

4 files changed

+84
-1
lines changed

4 files changed

+84
-1
lines changed

include/xsimd/arch/xsimd_avx.hpp

Lines changed: 16 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -527,6 +527,22 @@ namespace xsimd
527527
}
528528
}
529529

530+
// upcast
531+
template <class A, class T>
532+
XSIMD_INLINE std::array<batch<detail::upcast_t<T>, A>, 2> batch_upcast(batch<T, A> const& x, requires_arch<avx>) noexcept
533+
{
534+
auto pair_lo = batch_upcast(batch<T, sse4_2>(_mm256_extractf128_si256(x, 0)), sse4_2{});
535+
auto pair_hi = batch_upcast(batch<T, sse4_2>(_mm256_extractf128_si256(x, 1)), sse4_2{});
536+
return {detail::merge_sse(pair_lo[0], pair_lo[1]), detail::merge_sse(pair_hi[0], pair_hi[1])};
537+
}
538+
template <class A>
539+
XSIMD_INLINE std::array<batch<double, A>, 2> batch_upcast(batch<float, A> const& x, requires_arch<avx>) noexcept
540+
{
541+
__m256d lo = _mm256_cvtps_pd(_mm256_extractf128_ps(x, 0));
542+
__m256d hi = _mm256_cvtps_pd(_mm256_extractf128_ps(x, 1));
543+
return {lo, hi};
544+
}
545+
530546
// decr_if
531547
template <class A, class T, class = typename std::enable_if<std::is_integral<T>::value, void>::type>
532548
XSIMD_INLINE batch<T, A> decr_if(batch<T, A> const& self, batch_bool<T, A> const& mask, requires_arch<avx>) noexcept

include/xsimd/arch/xsimd_avx2.hpp

Lines changed: 20 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -431,6 +431,26 @@ namespace xsimd
431431
}
432432
}
433433

434+
// upcast
435+
template <class A, class T>
436+
XSIMD_INLINE std::array<batch<detail::upcast_t<T>, A>, 2> batch_upcast(batch<T, A> const& x, requires_arch<avx2>) noexcept
437+
{
438+
__m128i x_lo = _mm256_extracti128_si256(x, 0);
439+
__m128i x_hi = _mm256_extracti128_si256(x, 1);
440+
__m256i lo, hi;
441+
XSIMD_IF_CONSTEXPR(sizeof(T) == 4) {
442+
lo = _mm256_cvtepi32_epi64(x_lo);
443+
hi = _mm256_cvtepi32_epi64(x_hi);
444+
} else XSIMD_IF_CONSTEXPR(sizeof(T) == 2) {
445+
lo = _mm256_cvtepi16_epi32(x_lo);
446+
hi = _mm256_cvtepi16_epi32(x_hi);
447+
} else XSIMD_IF_CONSTEXPR(sizeof(T) == 1) {
448+
lo = _mm256_cvtepi8_epi16(x_lo);
449+
hi = _mm256_cvtepi8_epi16(x_hi);
450+
}
451+
return {lo, hi};
452+
}
453+
434454
// eq
435455
template <class A, class T, class = typename std::enable_if<std::is_integral<T>::value, void>::type>
436456
XSIMD_INLINE batch_bool<T, A> eq(batch<T, A> const& self, batch<T, A> const& other, requires_arch<avx2>) noexcept

include/xsimd/arch/xsimd_sse4_1.hpp

Lines changed: 39 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -67,6 +67,45 @@ namespace xsimd
6767
}
6868
}
6969

70+
// upcast
71+
namespace detail {
72+
template<typename T> struct upcast;
73+
template<> struct upcast<uint32_t> { using type = uint64_t; };
74+
template<> struct upcast<uint16_t> { using type = uint32_t; };
75+
template<> struct upcast<uint8_t> { using type = uint8_t; };
76+
template<> struct upcast<int32_t> { using type = int64_t; };
77+
template<> struct upcast<int16_t> { using type = int32_t; };
78+
template<> struct upcast<int8_t> { using type = int8_t; };
79+
template<typename T>
80+
using upcast_t = typename upcast<T>::type;
81+
}
82+
83+
template <class T, class A>
84+
XSIMD_INLINE std::array<batch<detail::upcast_t<T>, A>, 2> batch_upcast(batch<T, A> const& x, requires_arch<sse4_1>) noexcept
85+
{
86+
__m128i x_shuf = _mm_unpackhi_epi64(x, x);
87+
__m128i lo, hi;
88+
XSIMD_IF_CONSTEXPR(sizeof(T) == 4) {
89+
lo = _mm_cvtepi32_epi64(x);
90+
hi = _mm_cvtepi32_epi64(x_shuf);
91+
} else XSIMD_IF_CONSTEXPR(sizeof(T) == 2) {
92+
lo = _mm_cvtepi16_epi32(x);
93+
hi = _mm_cvtepi16_epi32(x_shuf);
94+
} else XSIMD_IF_CONSTEXPR(sizeof(T) == 1) {
95+
lo = _mm_cvtepi8_epi16(x);
96+
hi = _mm_cvtepi8_epi16(x_shuf);
97+
}
98+
return {lo, hi};
99+
}
100+
template <class A>
101+
XSIMD_INLINE std::array<batch<double, A>, 2> batch_upcast(batch<float, A> const& x, requires_arch<sse4_1>) noexcept
102+
{
103+
__m128 x_shuf = _mm_unpackhi_ps(x, x);
104+
__m128d lo = _mm_cvtps_pd(x);
105+
__m128d hi = _mm_cvtps_pd(x_shuf);
106+
return {lo, hi};
107+
}
108+
70109
// eq
71110
template <class A, class T, class = typename std::enable_if<std::is_integral<T>::value, void>::type>
72111
XSIMD_INLINE batch_bool<T, A> eq(batch<T, A> const& self, batch<T, A> const& other, requires_arch<sse4_1>) noexcept

include/xsimd/types/xsimd_api.hpp

Lines changed: 9 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -256,14 +256,22 @@ namespace xsimd
256256
* @param x batch of \c T_in
257257
* @return \c x cast to \c T_out
258258
*/
259-
template <class T_out, class T_in, class A>
259+
template <class T_out, class T_in, class A, class = typename std::enable_if<sizeof(T_out) == sizeof(T_in), void>::type>
260260
XSIMD_INLINE batch<T_out, A> batch_cast(batch<T_in, A> const& x) noexcept
261261
{
262262
detail::static_check_supported_config<T_out, A>();
263263
detail::static_check_supported_config<T_in, A>();
264264
return kernel::batch_cast<A>(x, batch<T_out, A> {}, A {});
265265
}
266266

267+
template <class T_out, class T_in, class A, class = typename std::enable_if<sizeof(T_out) == 2 * sizeof(T_in), void>::type>
268+
XSIMD_INLINE std::array<batch<T_out, A>, 2> batch_cast(batch<T_in, A> const& x) noexcept
269+
{
270+
detail::static_check_supported_config<T_out, A>();
271+
detail::static_check_supported_config<T_in, A>();
272+
return kernel::batch_upcast<A>(x, A {});
273+
}
274+
267275
/**
268276
* @ingroup batch_miscellaneous
269277
*

0 commit comments

Comments
 (0)