Skip to content

Commit 4d848a8

Browse files
Extend support of batch_cast<...> to upcasting to a type twice as big
Fix #1179
1 parent cbf693c commit 4d848a8

File tree

4 files changed

+120
-1
lines changed

4 files changed

+120
-1
lines changed

include/xsimd/arch/xsimd_avx.hpp

Lines changed: 16 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -527,6 +527,22 @@ namespace xsimd
527527
}
528528
}
529529

530+
// upcast
531+
template <class A, class T>
532+
XSIMD_INLINE std::array<batch<detail::upcast_t<T>, A>, 2> batch_upcast(batch<T, A> const& x, requires_arch<avx>) noexcept
533+
{
534+
auto pair_lo = batch_upcast(batch<T, sse4_2>(_mm256_extractf128_si256(x, 0)), sse4_2 {});
535+
auto pair_hi = batch_upcast(batch<T, sse4_2>(_mm256_extractf128_si256(x, 1)), sse4_2 {});
536+
return { detail::merge_sse(pair_lo[0], pair_lo[1]), detail::merge_sse(pair_hi[0], pair_hi[1]) };
537+
}
538+
template <class A>
539+
XSIMD_INLINE std::array<batch<double, A>, 2> batch_upcast(batch<float, A> const& x, requires_arch<avx>) noexcept
540+
{
541+
__m256d lo = _mm256_cvtps_pd(_mm256_extractf128_ps(x, 0));
542+
__m256d hi = _mm256_cvtps_pd(_mm256_extractf128_ps(x, 1));
543+
return { lo, hi };
544+
}
545+
530546
// decr_if
531547
template <class A, class T, class = typename std::enable_if<std::is_integral<T>::value, void>::type>
532548
XSIMD_INLINE batch<T, A> decr_if(batch<T, A> const& self, batch_bool<T, A> const& mask, requires_arch<avx>) noexcept

include/xsimd/arch/xsimd_avx2.hpp

Lines changed: 25 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -431,6 +431,31 @@ namespace xsimd
431431
}
432432
}
433433

434+
// upcast
435+
template <class A, class T>
436+
XSIMD_INLINE std::array<batch<detail::upcast_t<T>, A>, 2> batch_upcast(batch<T, A> const& x, requires_arch<avx2>) noexcept
437+
{
438+
__m128i x_lo = _mm256_extracti128_si256(x, 0);
439+
__m128i x_hi = _mm256_extracti128_si256(x, 1);
440+
__m256i lo, hi;
441+
XSIMD_IF_CONSTEXPR(sizeof(T) == 4)
442+
{
443+
lo = _mm256_cvtepi32_epi64(x_lo);
444+
hi = _mm256_cvtepi32_epi64(x_hi);
445+
}
446+
else XSIMD_IF_CONSTEXPR(sizeof(T) == 2)
447+
{
448+
lo = _mm256_cvtepi16_epi32(x_lo);
449+
hi = _mm256_cvtepi16_epi32(x_hi);
450+
}
451+
else XSIMD_IF_CONSTEXPR(sizeof(T) == 1)
452+
{
453+
lo = _mm256_cvtepi8_epi16(x_lo);
454+
hi = _mm256_cvtepi8_epi16(x_hi);
455+
}
456+
return { lo, hi };
457+
}
458+
434459
// eq
435460
template <class A, class T, class = typename std::enable_if<std::is_integral<T>::value, void>::type>
436461
XSIMD_INLINE batch_bool<T, A> eq(batch<T, A> const& self, batch<T, A> const& other, requires_arch<avx2>) noexcept

include/xsimd/arch/xsimd_sse4_1.hpp

Lines changed: 70 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -67,6 +67,76 @@ namespace xsimd
6767
}
6868
}
6969

70+
// upcast
71+
namespace detail
72+
{
73+
template <typename T>
74+
struct upcast;
75+
template <>
76+
struct upcast<uint32_t>
77+
{
78+
using type = uint64_t;
79+
};
80+
template <>
81+
struct upcast<uint16_t>
82+
{
83+
using type = uint32_t;
84+
};
85+
template <>
86+
struct upcast<uint8_t>
87+
{
88+
using type = uint8_t;
89+
};
90+
template <>
91+
struct upcast<int32_t>
92+
{
93+
using type = int64_t;
94+
};
95+
template <>
96+
struct upcast<int16_t>
97+
{
98+
using type = int32_t;
99+
};
100+
template <>
101+
struct upcast<int8_t>
102+
{
103+
using type = int8_t;
104+
};
105+
template <typename T>
106+
using upcast_t = typename upcast<T>::type;
107+
}
108+
109+
template <class T, class A>
110+
XSIMD_INLINE std::array<batch<detail::upcast_t<T>, A>, 2> batch_upcast(batch<T, A> const& x, requires_arch<sse4_1>) noexcept
111+
{
112+
__m128i x_shuf = _mm_unpackhi_epi64(x, x);
113+
__m128i lo, hi;
114+
XSIMD_IF_CONSTEXPR(sizeof(T) == 4)
115+
{
116+
lo = _mm_cvtepi32_epi64(x);
117+
hi = _mm_cvtepi32_epi64(x_shuf);
118+
}
119+
else XSIMD_IF_CONSTEXPR(sizeof(T) == 2)
120+
{
121+
lo = _mm_cvtepi16_epi32(x);
122+
hi = _mm_cvtepi16_epi32(x_shuf);
123+
}
124+
else XSIMD_IF_CONSTEXPR(sizeof(T) == 1)
125+
{
126+
lo = _mm_cvtepi8_epi16(x);
127+
hi = _mm_cvtepi8_epi16(x_shuf);
128+
}
129+
return { lo, hi };
130+
}
131+
template <class A>
132+
XSIMD_INLINE std::array<batch<double, A>, 2> batch_upcast(batch<float, A> const& x, requires_arch<sse4_1>) noexcept
133+
{
134+
__m128 x_shuf = _mm_unpackhi_ps(x, x);
135+
__m128d lo = _mm_cvtps_pd(x);
136+
__m128d hi = _mm_cvtps_pd(x_shuf);
137+
return { lo, hi };
138+
}
139+
70140
// eq
71141
template <class A, class T, class = typename std::enable_if<std::is_integral<T>::value, void>::type>
72142
XSIMD_INLINE batch_bool<T, A> eq(batch<T, A> const& self, batch<T, A> const& other, requires_arch<sse4_1>) noexcept

include/xsimd/types/xsimd_api.hpp

Lines changed: 9 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -256,14 +256,22 @@ namespace xsimd
256256
* @param x batch of \c T_in
257257
* @return \c x cast to \c T_out
258258
*/
259-
template <class T_out, class T_in, class A>
259+
template <class T_out, class T_in, class A, class = typename std::enable_if<sizeof(T_out) == sizeof(T_in), void>::type>
260260
XSIMD_INLINE batch<T_out, A> batch_cast(batch<T_in, A> const& x) noexcept
261261
{
262262
detail::static_check_supported_config<T_out, A>();
263263
detail::static_check_supported_config<T_in, A>();
264264
return kernel::batch_cast<A>(x, batch<T_out, A> {}, A {});
265265
}
266266

267+
template <class T_out, class T_in, class A, class = typename std::enable_if<sizeof(T_out) == 2 * sizeof(T_in), void>::type>
268+
XSIMD_INLINE std::array<batch<T_out, A>, 2> batch_cast(batch<T_in, A> const& x) noexcept
269+
{
270+
detail::static_check_supported_config<T_out, A>();
271+
detail::static_check_supported_config<T_in, A>();
272+
return kernel::batch_upcast<A>(x, A {});
273+
}
274+
267275
/**
268276
* @ingroup batch_miscellaneous
269277
*

0 commit comments

Comments
 (0)