Skip to content

Commit 252fd00

Browse files
authored
Merge pull request #434 from guyuqi/add-zip_hilo
Add zip_hi/zip_lo for shuffling operations
2 parents afe4d7d + 3a9d2d2 commit 252fd00

17 files changed

+292
-0
lines changed

include/xsimd/types/xsimd_base.hpp

Lines changed: 36 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -282,6 +282,12 @@ namespace xsimd
282282
template <class X>
283283
batch_type_t<X> select(const typename simd_batch_traits<X>::batch_bool_type& cond, const simd_base<X>& a, const simd_base<X>& b);
284284

285+
template <class X>
286+
batch_type_t<X> zip_lo(const simd_base<X>& lhs, const simd_base<X>& rhs);
287+
288+
template <class X>
289+
batch_type_t<X> zip_hi(const simd_base<X>& lhs, const simd_base<X>& rhs);
290+
285291
template <class X>
286292
typename simd_batch_traits<X>::batch_bool_type
287293
isnan(const simd_base<X>& x);
@@ -1777,6 +1783,36 @@ namespace xsimd
17771783
return kernel::select(cond, a(), b());
17781784
}
17791785

1786+
/**
1787+
* Unpack and interleave data from the LOW half of batches \c lhs and \c rhs.
1788+
* Store the results in the Return value.
1789+
* @param lhs a batch of integer or floating point or double precision values.
1790+
* @param rhs a batch of integer or floating point or double precision values.
1791+
* @return a batch of the low part of shuffled values.
1792+
*/
1793+
template <class X>
1794+
inline batch_type_t<X> zip_lo(const simd_base<X>& lhs, const simd_base<X>& rhs)
1795+
{
1796+
using value_type = typename simd_batch_traits<X>::value_type;
1797+
using kernel = detail::batch_kernel<value_type, simd_batch_traits<X>::size>;
1798+
return kernel::zip_lo(lhs(), rhs());
1799+
}
1800+
1801+
/**
1802+
* Unpack and interleave data from the HIGH half of batches \c lhs and \c rhs.
1803+
* Store the results in the Return value.
1804+
* @param lhs a batch of integer or floating point or double precision values.
1805+
* @param rhs a batch of integer or floating point or double precision values.
1806+
* @return a batch of the high part of shuffled values.
1807+
*/
1808+
template <class X>
1809+
inline batch_type_t<X> zip_hi(const simd_base<X>& lhs, const simd_base<X>& rhs)
1810+
{
1811+
using value_type = typename simd_batch_traits<X>::value_type;
1812+
using kernel = detail::batch_kernel<value_type, simd_batch_traits<X>::size>;
1813+
return kernel::zip_hi(lhs(), rhs());
1814+
}
1815+
17801816
/**
17811817
* Determines if the scalars in the given batch \c x are NaN values.
17821818
* @param x batch of floating point values.

include/xsimd/types/xsimd_neon_double.hpp

Lines changed: 18 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -580,6 +580,24 @@ namespace xsimd
580580
return vbslq_f64(cond, a, b);
581581
}
582582

583+
static batch_type zip_lo(const batch_type& lhs, const batch_type& rhs)
584+
{
585+
#if XSIMD_ARM_INSTR_SET >= XSIMD_ARM8_64_NEON_VERSION
586+
return vzip1q_f64(lhs, rhs);
587+
#else
588+
return vcombine_f64(vget_low_f64(lhs), vget_low_f64(rhs));
589+
#endif
590+
}
591+
592+
static batch_type zip_hi(const batch_type& lhs, const batch_type& rhs)
593+
{
594+
#if XSIMD_ARM_INSTR_SET >= XSIMD_ARM8_64_NEON_VERSION
595+
return vzip2q_f64(lhs, rhs);
596+
#else
597+
return vcombine_f64(vget_high_f64(lhs), vget_high_f64(rhs));
598+
#endif
599+
}
600+
583601
static batch_bool_type isnan(const batch_type& x)
584602
{
585603
return !(x == x);

include/xsimd/types/xsimd_neon_float.hpp

Lines changed: 20 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -639,6 +639,26 @@ namespace xsimd
639639
return vbslq_f32(cond, a, b);
640640
}
641641

642+
static batch_type zip_lo(const batch_type& lhs, const batch_type& rhs)
643+
{
644+
#if XSIMD_ARM_INSTR_SET >= XSIMD_ARM8_64_NEON_VERSION
645+
return vzip1q_f32(lhs, rhs);
646+
#else
647+
float32x2x2_t tmp = vzip_f32(vget_low_f32(lhs), vget_low_f32(rhs));
648+
return vcombine_f32(tmp.val[0], tmp.val[1]);
649+
#endif
650+
}
651+
652+
static batch_type zip_hi(const batch_type& lhs, const batch_type& rhs)
653+
{
654+
#if XSIMD_ARM_INSTR_SET >= XSIMD_ARM8_64_NEON_VERSION
655+
return vzip2q_f32(lhs, rhs);
656+
#else
657+
float32x2x2_t tmp = vzip_f32(vget_high_f32(lhs), vget_high_f32(rhs));
658+
return vcombine_f32(tmp.val[0], tmp.val[1]);
659+
#endif
660+
}
661+
642662
static batch_bool_type isnan(const batch_type& x)
643663
{
644664
return !(x == x);

include/xsimd/types/xsimd_neon_int16.hpp

Lines changed: 20 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -344,6 +344,26 @@ namespace xsimd
344344
{
345345
return vbslq_s16(cond, a, b);
346346
}
347+
348+
static batch_type zip_lo(const batch_type& lhs, const batch_type& rhs)
349+
{
350+
#if XSIMD_ARM_INSTR_SET >= XSIMD_ARM8_64_NEON_VERSION
351+
return vzip1q_s16(lhs, rhs);
352+
#else
353+
int16x4x2_t tmp = vzip_s16(vget_low_s16(lhs), vget_low_s16(rhs));
354+
return vcombine_s16(tmp.val[0], tmp.val[1]);
355+
#endif
356+
}
357+
358+
static batch_type zip_hi(const batch_type& lhs, const batch_type& rhs)
359+
{
360+
#if XSIMD_ARM_INSTR_SET >= XSIMD_ARM8_64_NEON_VERSION
361+
return vzip2q_s16(lhs, rhs);
362+
#else
363+
int16x4x2_t tmp = vzip_s16(vget_high_s16(lhs), vget_high_s16(rhs));
364+
return vcombine_s16(tmp.val[0], tmp.val[1]);
365+
#endif
366+
}
347367
};
348368
}
349369

include/xsimd/types/xsimd_neon_int32.hpp

Lines changed: 20 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -424,6 +424,26 @@ namespace xsimd
424424
{
425425
return vbslq_s32(cond, a, b);
426426
}
427+
428+
static batch_type zip_lo(const batch_type& lhs, const batch_type& rhs)
429+
{
430+
#if XSIMD_ARM_INSTR_SET >= XSIMD_ARM8_64_NEON_VERSION
431+
return vzip1q_s32(lhs, rhs);
432+
#else
433+
int32x2x2_t tmp = vzip_s32(vget_low_s32(lhs), vget_low_s32(rhs));
434+
return vcombine_s32(tmp.val[0], tmp.val[1]);
435+
#endif
436+
}
437+
438+
static batch_type zip_hi(const batch_type& lhs, const batch_type& rhs)
439+
{
440+
#if XSIMD_ARM_INSTR_SET >= XSIMD_ARM8_64_NEON_VERSION
441+
return vzip2q_s32(lhs, rhs);
442+
#else
443+
int32x2x2_t tmp = vzip_s32(vget_high_s32(lhs), vget_high_s32(rhs));
444+
return vcombine_s32(tmp.val[0], tmp.val[1]);
445+
#endif
446+
}
427447
};
428448
}
429449

include/xsimd/types/xsimd_neon_int64.hpp

Lines changed: 18 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -439,6 +439,24 @@ namespace xsimd
439439
{
440440
return vbslq_s64(cond, a, b);
441441
}
442+
443+
static batch_type zip_lo(const batch_type& lhs, const batch_type& rhs)
444+
{
445+
#if XSIMD_ARM_INSTR_SET >= XSIMD_ARM8_64_NEON_VERSION
446+
return vzip1q_s64(lhs, rhs);
447+
#else
448+
return vcombine_s64(vget_low_s64(lhs), vget_low_s64(rhs));
449+
#endif
450+
}
451+
452+
static batch_type zip_hi(const batch_type& lhs, const batch_type& rhs)
453+
{
454+
#if XSIMD_ARM_INSTR_SET >= XSIMD_ARM8_64_NEON_VERSION
455+
return vzip2q_s64(lhs, rhs);
456+
#else
457+
return vcombine_s64(vget_high_s64(lhs), vget_high_s64(rhs));
458+
#endif
459+
}
442460
};
443461
}
444462

include/xsimd/types/xsimd_neon_int8.hpp

Lines changed: 20 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -345,6 +345,26 @@ namespace xsimd
345345
{
346346
return vbslq_s8(cond, a, b);
347347
}
348+
349+
static batch_type zip_lo(const batch_type& lhs, const batch_type& rhs)
350+
{
351+
#if XSIMD_ARM_INSTR_SET >= XSIMD_ARM8_64_NEON_VERSION
352+
return vzip1q_s8(lhs, rhs);
353+
#else
354+
int8x8x2_t tmp = vzip_s8(vget_low_s8(lhs), vget_low_s8(rhs));
355+
return vcombine_s8(tmp.val[0], tmp.val[1]);
356+
#endif
357+
}
358+
359+
static batch_type zip_hi(const batch_type& lhs, const batch_type& rhs)
360+
{
361+
#if XSIMD_ARM_INSTR_SET >= XSIMD_ARM8_64_NEON_VERSION
362+
return vzip2q_s8(lhs, rhs);
363+
#else
364+
int8x8x2_t tmp = vzip_s8(vget_high_s8(lhs), vget_high_s8(rhs));
365+
return vcombine_s8(tmp.val[0], tmp.val[1]);
366+
#endif
367+
}
348368
};
349369
}
350370

include/xsimd/types/xsimd_neon_uint16.hpp

Lines changed: 20 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -318,6 +318,26 @@ namespace xsimd
318318
{
319319
return vbslq_u16(cond, a, b);
320320
}
321+
322+
static batch_type zip_lo(const batch_type& lhs, const batch_type& rhs)
323+
{
324+
#if XSIMD_ARM_INSTR_SET >= XSIMD_ARM8_64_NEON_VERSION
325+
return vzip1q_u16(lhs, rhs);
326+
#else
327+
uint16x4x2_t tmp = vzip_u16(vget_low_u16(lhs), vget_low_u16(rhs));
328+
return vcombine_u16(tmp.val[0], tmp.val[1]);
329+
#endif
330+
}
331+
332+
static batch_type zip_hi(const batch_type& lhs, const batch_type& rhs)
333+
{
334+
#if XSIMD_ARM_INSTR_SET >= XSIMD_ARM8_64_NEON_VERSION
335+
return vzip2q_u16(lhs, rhs);
336+
#else
337+
uint16x4x2_t tmp = vzip_u16(vget_high_u16(lhs), vget_high_u16(rhs));
338+
return vcombine_u16(tmp.val[0], tmp.val[1]);
339+
#endif
340+
}
321341
};
322342
}
323343

include/xsimd/types/xsimd_neon_uint32.hpp

Lines changed: 21 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -415,6 +415,27 @@ namespace xsimd
415415
{
416416
return vbslq_u32(cond, a, b);
417417
}
418+
419+
static batch_type zip_lo(const batch_type& lhs, const batch_type& rhs)
420+
{
421+
#if XSIMD_ARM_INSTR_SET >= XSIMD_ARM8_64_NEON_VERSION
422+
return vzip1q_u32(lhs, rhs);
423+
#else
424+
uint32x2x2_t tmp = vzip_u32(vget_low_u32(lhs), vget_low_u32(rhs));
425+
return vcombine_u32(tmp.val[0], tmp.val[1]);
426+
#endif
427+
}
428+
429+
static batch_type zip_hi(const batch_type& lhs, const batch_type& rhs)
430+
{
431+
#if XSIMD_ARM_INSTR_SET >= XSIMD_ARM8_64_NEON_VERSION
432+
return vzip2q_u32(lhs, rhs);
433+
#else
434+
uint32x2x2_t tmp = vzip_u32(vget_high_u32(lhs), vget_high_u32(rhs));
435+
return vcombine_u32(tmp.val[0], tmp.val[1]);
436+
#endif
437+
}
438+
418439
};
419440

420441
inline batch<uint32_t, 4> shift_left(const batch<uint32_t, 4>& lhs, int32_t n)

include/xsimd/types/xsimd_neon_uint64.hpp

Lines changed: 19 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -467,6 +467,25 @@ namespace xsimd
467467
{
468468
return vbslq_u64(cond, a, b);
469469
}
470+
471+
static batch_type zip_lo(const batch_type& lhs, const batch_type& rhs)
472+
{
473+
#if XSIMD_ARM_INSTR_SET >= XSIMD_ARM8_64_NEON_VERSION
474+
return vzip1q_u64(lhs, rhs);
475+
#else
476+
return vcombine_u64(vget_low_u64(lhs), vget_low_u64(rhs));
477+
#endif
478+
}
479+
480+
static batch_type zip_hi(const batch_type& lhs, const batch_type& rhs)
481+
{
482+
#if XSIMD_ARM_INSTR_SET >= XSIMD_ARM8_64_NEON_VERSION
483+
return vzip2q_u64(lhs, rhs);
484+
#else
485+
return vcombine_u64(vget_high_u64(lhs), vget_high_u64(rhs));
486+
#endif
487+
}
488+
470489
};
471490

472491
inline batch<uint64_t, 2> shift_left(const batch<uint64_t, 2>& lhs, int32_t n)

0 commit comments

Comments
 (0)