|
15 | 15 | #include <type_traits> |
16 | 16 |
|
17 | 17 | #include "../types/xsimd_sse4_1_register.hpp" |
| 18 | +#include "./common/xsimd_common_cast.hpp" |
18 | 19 |
|
19 | 20 | namespace xsimd |
20 | 21 | { |
@@ -382,6 +383,63 @@ namespace xsimd |
382 | 383 | return _mm_round_pd(self, _MM_FROUND_TO_ZERO); |
383 | 384 | } |
384 | 385 |
|
| 386 | + // widen |
| 387 | + template <class A, class T> |
| 388 | + XSIMD_INLINE std::array<batch<widen_t<T>, A>, 2> widen(batch<T, A> const& x, requires_arch<sse4_1>) noexcept |
| 389 | + { |
| 390 | + __m128i x_lo = x; |
| 391 | + __m128i x_hi = _mm_unpackhi_epi64(x, x); |
| 392 | + __m128i lo, hi; |
| 393 | + XSIMD_IF_CONSTEXPR(sizeof(T) == 4) |
| 394 | + { |
| 395 | + XSIMD_IF_CONSTEXPR(std::is_signed<T>::value) |
| 396 | + { |
| 397 | + lo = _mm_cvtepi32_epi64(x_lo); |
| 398 | + hi = _mm_cvtepi32_epi64(x_hi); |
| 399 | + } |
| 400 | + else |
| 401 | + { |
| 402 | + lo = _mm_cvtepu32_epi64(x_lo); |
| 403 | + hi = _mm_cvtepu32_epi64(x_hi); |
| 404 | + } |
| 405 | + } |
| 406 | + else XSIMD_IF_CONSTEXPR(sizeof(T) == 2) |
| 407 | + { |
| 408 | + XSIMD_IF_CONSTEXPR(std::is_signed<T>::value) |
| 409 | + { |
| 410 | + lo = _mm_cvtepi16_epi32(x_lo); |
| 411 | + hi = _mm_cvtepi16_epi32(x_hi); |
| 412 | + } |
| 413 | + else |
| 414 | + { |
| 415 | + lo = _mm_cvtepu16_epi32(x_lo); |
| 416 | + hi = _mm_cvtepu16_epi32(x_hi); |
| 417 | + } |
| 418 | + } |
| 419 | + else XSIMD_IF_CONSTEXPR(sizeof(T) == 1) |
| 420 | + { |
| 421 | + XSIMD_IF_CONSTEXPR(std::is_signed<T>::value) |
| 422 | + { |
| 423 | + lo = _mm_cvtepi8_epi16(x_lo); |
| 424 | + hi = _mm_cvtepi8_epi16(x_hi); |
| 425 | + } |
| 426 | + else |
| 427 | + { |
| 428 | + lo = _mm_cvtepu8_epi16(x_lo); |
| 429 | + hi = _mm_cvtepu8_epi16(x_hi); |
| 430 | + } |
| 431 | + } |
| 432 | + return { lo, hi }; |
| 433 | + } |
| 434 | + template <class A> |
| 435 | + XSIMD_INLINE std::array<batch<double, A>, 2> widen(batch<float, A> const& x, requires_arch<sse4_1>) noexcept |
| 436 | + { |
| 437 | + __m128 x_shuf = _mm_unpackhi_ps(x, x); |
| 438 | + __m128d lo = _mm_cvtps_pd(x); |
| 439 | + __m128d hi = _mm_cvtps_pd(x_shuf); |
| 440 | + return { lo, hi }; |
| 441 | + } |
| 442 | + |
385 | 443 | } |
386 | 444 |
|
387 | 445 | } |
|
0 commit comments