|
13 | 13 | #define XSIMD_COMMON_MEMORY_HPP |
14 | 14 |
|
15 | 15 | #include <algorithm> |
| 16 | +#include <array> |
16 | 17 | #include <complex> |
17 | 18 | #include <stdexcept> |
18 | 19 |
|
@@ -348,6 +349,102 @@ namespace xsimd |
348 | 349 | return detail::load_unaligned<A>(mem, cvt, common {}, detail::conversion_type<A, T_in, T_out> {}); |
349 | 350 | } |
350 | 351 |
|
| 352 | + template <class A, class T> |
| 353 | + XSIMD_INLINE batch<T, A> load(T const* mem, aligned_mode, requires_arch<A>) noexcept |
| 354 | + { |
| 355 | + return load_aligned<A>(mem, convert<T> {}, A {}); |
| 356 | + } |
| 357 | + |
| 358 | + template <class A, class T> |
| 359 | + XSIMD_INLINE batch<T, A> load(T const* mem, unaligned_mode, requires_arch<A>) noexcept |
| 360 | + { |
| 361 | + return load_unaligned<A>(mem, convert<T> {}, A {}); |
| 362 | + } |
| 363 | + |
| 364 | + template <class A, class T_in, class T_out, bool... Values, class alignment> |
| 365 | + XSIMD_INLINE batch<T_out, A> |
| 366 | + load_masked(T_in const* mem, batch_bool_constant<T_out, A, Values...>, convert<T_out>, alignment, requires_arch<common>) noexcept |
| 367 | + { |
| 368 | + constexpr std::size_t size = batch<T_out, A>::size; |
| 369 | + alignas(A::alignment()) std::array<T_out, size> buffer {}; |
| 370 | + constexpr bool mask[size] = { Values... }; |
| 371 | + |
| 372 | + for (std::size_t i = 0; i < size; ++i) |
| 373 | + buffer[i] = mask[i] ? static_cast<T_out>(mem[i]) : T_out(0); |
| 374 | + |
| 375 | + return batch<T_out, A>::load(buffer.data(), aligned_mode {}); |
| 376 | + } |
| 377 | + |
| 378 | + template <class A, class T_in, class T_out, bool... Values, class alignment> |
| 379 | + XSIMD_INLINE void |
| 380 | + store_masked(T_out* mem, batch<T_in, A> const& src, batch_bool_constant<T_in, A, Values...>, alignment, requires_arch<common>) noexcept |
| 381 | + { |
| 382 | + constexpr std::size_t size = batch<T_in, A>::size; |
| 383 | + constexpr bool mask[size] = { Values... }; |
| 384 | + |
| 385 | + for (std::size_t i = 0; i < size; ++i) |
| 386 | + if (mask[i]) |
| 387 | + { |
| 388 | + mem[i] = static_cast<T_out>(src.get(i)); |
| 389 | + } |
| 390 | + } |
| 391 | + |
| 392 | + template <class A, bool... Values, class Mode> |
| 393 | + XSIMD_INLINE batch<int32_t, A> load_masked(int32_t const* mem, batch_bool_constant<int32_t, A, Values...>, convert<int32_t>, Mode, requires_arch<A>) noexcept |
| 394 | + { |
| 395 | + const auto f = load_masked<A>(reinterpret_cast<const float*>(mem), batch_bool_constant<float, A, Values...> {}, convert<float> {}, Mode {}, A {}); |
| 396 | + return bitwise_cast<int32_t>(f); |
| 397 | + } |
| 398 | + |
| 399 | + template <class A, bool... Values, class Mode> |
| 400 | + XSIMD_INLINE batch<uint32_t, A> load_masked(uint32_t const* mem, batch_bool_constant<uint32_t, A, Values...>, convert<uint32_t>, Mode, requires_arch<A>) noexcept |
| 401 | + { |
| 402 | + const auto f = load_masked<A>(reinterpret_cast<const float*>(mem), batch_bool_constant<float, A, Values...> {}, convert<float> {}, Mode {}, A {}); |
| 403 | + return bitwise_cast<uint32_t>(f); |
| 404 | + } |
| 405 | + |
| 406 | + template <class A, bool... Values, class Mode> |
| 407 | + XSIMD_INLINE typename std::enable_if<types::has_simd_register<double, A>::value, batch<int64_t, A>>::type |
| 408 | + load_masked(int64_t const* mem, batch_bool_constant<int64_t, A, Values...>, convert<int64_t>, Mode, requires_arch<A>) noexcept |
| 409 | + { |
| 410 | + const auto d = load_masked<A>(reinterpret_cast<const double*>(mem), batch_bool_constant<double, A, Values...> {}, convert<double> {}, Mode {}, A {}); |
| 411 | + return bitwise_cast<int64_t>(d); |
| 412 | + } |
| 413 | + |
| 414 | + template <class A, bool... Values, class Mode> |
| 415 | + XSIMD_INLINE typename std::enable_if<types::has_simd_register<double, A>::value, batch<uint64_t, A>>::type |
| 416 | + load_masked(uint64_t const* mem, batch_bool_constant<uint64_t, A, Values...>, convert<uint64_t>, Mode, requires_arch<A>) noexcept |
| 417 | + { |
| 418 | + const auto d = load_masked<A>(reinterpret_cast<const double*>(mem), batch_bool_constant<double, A, Values...> {}, convert<double> {}, Mode {}, A {}); |
| 419 | + return bitwise_cast<uint64_t>(d); |
| 420 | + } |
| 421 | + |
| 422 | + template <class A, bool... Values, class Mode> |
| 423 | + XSIMD_INLINE void store_masked(int32_t* mem, batch<int32_t, A> const& src, batch_bool_constant<int32_t, A, Values...>, Mode, requires_arch<A>) noexcept |
| 424 | + { |
| 425 | + store_masked<A>(reinterpret_cast<float*>(mem), bitwise_cast<float>(src), batch_bool_constant<float, A, Values...> {}, Mode {}, A {}); |
| 426 | + } |
| 427 | + |
| 428 | + template <class A, bool... Values, class Mode> |
| 429 | + XSIMD_INLINE void store_masked(uint32_t* mem, batch<uint32_t, A> const& src, batch_bool_constant<uint32_t, A, Values...>, Mode, requires_arch<A>) noexcept |
| 430 | + { |
| 431 | + store_masked<A>(reinterpret_cast<float*>(mem), bitwise_cast<float>(src), batch_bool_constant<float, A, Values...> {}, Mode {}, A {}); |
| 432 | + } |
| 433 | + |
| 434 | + template <class A, bool... Values, class Mode> |
| 435 | + XSIMD_INLINE typename std::enable_if<types::has_simd_register<double, A>::value, void>::type |
| 436 | + store_masked(int64_t* mem, batch<int64_t, A> const& src, batch_bool_constant<int64_t, A, Values...>, Mode, requires_arch<A>) noexcept |
| 437 | + { |
| 438 | + store_masked<A>(reinterpret_cast<double*>(mem), bitwise_cast<double>(src), batch_bool_constant<double, A, Values...> {}, Mode {}, A {}); |
| 439 | + } |
| 440 | + |
| 441 | + template <class A, bool... Values, class Mode> |
| 442 | + XSIMD_INLINE typename std::enable_if<types::has_simd_register<double, A>::value, void>::type |
| 443 | + store_masked(uint64_t* mem, batch<uint64_t, A> const& src, batch_bool_constant<uint64_t, A, Values...>, Mode, requires_arch<A>) noexcept |
| 444 | + { |
| 445 | + store_masked<A>(reinterpret_cast<double*>(mem), bitwise_cast<double>(src), batch_bool_constant<double, A, Values...> {}, Mode {}, A {}); |
| 446 | + } |
| 447 | + |
351 | 448 | // rotate_right |
352 | 449 | template <size_t N, class A, class T> |
353 | 450 | XSIMD_INLINE batch<T, A> rotate_right(batch<T, A> const& self, requires_arch<common>) noexcept |
|
0 commit comments