Skip to content

Commit 50a69bf

Browse files
Provide batch_bool::count() to count the number of positive value in a batch
Fix #1042
1 parent 9cdeaab commit 50a69bf

File tree

4 files changed

+83
-0
lines changed

4 files changed

+83
-0
lines changed

include/xsimd/arch/generic/xsimd_generic_logical.hpp

Lines changed: 45 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -14,6 +14,8 @@
1414

1515
#include "./xsimd_generic_details.hpp"
1616

17+
#include <climits>
18+
1719
namespace xsimd
1820
{
1921

@@ -22,6 +24,49 @@ namespace xsimd
2224

2325
using namespace types;
2426

27+
// count
28+
template <class A, class T>
29+
XSIMD_INLINE size_t count(batch_bool<T, A> const& self, requires_arch<generic>) noexcept
30+
{
31+
uint64_t m = self.mask();
32+
XSIMD_IF_CONSTEXPR(batch_bool<T, A>::size < 14)
33+
{
34+
// https://graphics.stanford.edu/~seander/bithacks.html#CountBitsSet64
35+
return (m * 0x200040008001ULL & 0x111111111111111ULL) % 0xf;
36+
}
37+
else
38+
{
39+
#if defined __has_builtin
40+
#if __has_builtin(__builtin_popcountg)
41+
#define builtin_popcount(v) __builtin_popcountg(v)
42+
#endif
43+
#endif
44+
45+
#ifdef builtin_popcount
46+
return builtin_popcount(m);
47+
#else
48+
// FIXME: we could do better by dispatching to the appropriate
49+
// popcount instruction depending on the arch...
50+
XSIMD_IF_CONSTEXPR(batch_bool<T, A>::size <= 32)
51+
{
52+
uint32_t m32 = m;
53+
// https://graphics.stanford.edu/~seander/bithacks.html#CountBitsSetParallel
54+
m32 = m32 - ((m32 >> 1) & 0x55555555); // reuse input as temporary
55+
m32 = (m32 & 0x33333333) + ((m32 >> 2) & 0x33333333); // temp
56+
return (((m32 + (m32 >> 4)) & 0xF0F0F0F) * 0x1010101) >> 24; // count
57+
}
58+
else
59+
{
60+
// https://graphics.stanford.edu/~seander/bithacks.html#CountBitsSetParallel
61+
m = m - ((m >> 1) & (uint64_t) ~(uint64_t)0 / 3); // temp
62+
m = (m & (uint64_t) ~(uint64_t)0 / 15 * 3) + ((m >> 2) & (uint64_t) ~(uint64_t)0 / 15 * 3); // temp
63+
m = (m + (m >> 4)) & (uint64_t) ~(uint64_t)0 / 255 * 15; // temp
64+
return (m * ((uint64_t) ~(uint64_t)0 / 255)) >> (sizeof(uint64_t) - 1) * CHAR_BIT; // count
65+
}
66+
#endif
67+
}
68+
}
69+
2570
// from mask
2671
template <class A, class T>
2772
XSIMD_INLINE batch_bool<T, A> from_mask(batch_bool<T, A> const&, uint64_t mask, requires_arch<generic>) noexcept

include/xsimd/arch/xsimd_emulated.hpp

Lines changed: 14 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -230,6 +230,20 @@ namespace xsimd
230230
return r;
231231
}
232232

233+
#if 0
234+
// count
235+
template <class A, class T, size_t N = 8 * sizeof(T) * batch<T, A>::size>
236+
XSIMD_INLINE size_t count(batch_bool<T, A> const& x, requires_arch<emulated<N>>) noexcept
237+
{
238+
uint64_t m = x.mask();
239+
// https://graphics.stanford.edu/~seander/bithacks.html#CountBitsSetParallel
240+
m = m - ((m >> 1) & (uint64_t) ~(uint64_t)0 / 3); // temp
241+
m = (m & (uint64_t) ~(uint64_t)0 / 15 * 3) + ((m >> 2) & (uint64_t) ~(uint64_t)0 / 15 * 3); // temp
242+
m = (m + (m >> 4)) & (uint64_t) ~(uint64_t)0 / 255 * 15; // temp
243+
return (m * ((uint64_t) ~(uint64_t)0 / 255)) >> (sizeof(uint64_t) - 1) * CHAR_BIT; // count
244+
}
245+
#endif
246+
233247
// store_complex
234248
namespace detail
235249
{

include/xsimd/types/xsimd_api.hpp

Lines changed: 14 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -631,6 +631,20 @@ namespace xsimd
631631
return kernel::cosh<A>(x, A {});
632632
}
633633

634+
/**
635+
* @ingroup batch_reducers
636+
*
637+
* Count the number of values set to true in the batch \c x
638+
* @param x boolean or batch of boolean
639+
* @return the result of the counting.
640+
*/
641+
template <class T, class A>
642+
XSIMD_INLINE size_t count(batch_bool<T, A> const& x) noexcept
643+
{
644+
detail::static_check_supported_config<T, A>();
645+
return kernel::count<A>(x, A {});
646+
}
647+
634648
/**
635649
* @ingroup batch_arithmetic
636650
*

test/test_batch_bool.cpp

Lines changed: 10 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -441,6 +441,14 @@ struct batch_bool_test
441441
CHECK_EQ(batch_bool_type::from_mask(bool_g.interspersed.mask()).mask(), bool_g.interspersed.mask());
442442
}
443443

444+
void test_count() const
445+
{
446+
auto bool_g = xsimd::get_bool<batch_bool_type> {};
447+
CHECK_EQ(count(bool_g.all_false), 0);
448+
CHECK_EQ(count(bool_g.all_true), batch_bool_type::size);
449+
CHECK_EQ(count(bool_g.half), batch_bool_type::size / 2);
450+
}
451+
444452
void test_comparison() const
445453
{
446454
auto bool_g = xsimd::get_bool<batch_bool_type> {};
@@ -485,6 +493,8 @@ TEST_CASE_TEMPLATE("[xsimd batch bool]", B, BATCH_TYPES)
485493

486494
SUBCASE("mask") { Test.test_mask(); }
487495

496+
SUBCASE("count") { Test.test_count(); }
497+
488498
SUBCASE("eq neq") { Test.test_comparison(); }
489499
}
490500
#endif

0 commit comments

Comments
 (0)