1515
1616namespace xsimd
1717{
18- template <class I1 , class I2 , class O1 , class UF >
19- void transform (I1 first, I2 last, O1 out_first, UF&& f)
18+ template <class I1 , class I2 , class O1 , class UF , class UFB >
19+ void transform_batch (I1 first, I2 last, O1 out_first, UF&& f, UFB&& fb )
2020 {
2121 using value_type = typename std::decay<decltype (*first)>::type;
2222 using traits = simd_traits<value_type>;
@@ -43,7 +43,7 @@ namespace xsimd
4343 for (std::size_t i = align_begin; i < align_end; i += simd_size)
4444 {
4545 xsimd::load_aligned (&first[i], batch);
46- xsimd::store_aligned (&out_first[i], f (batch));
46+ xsimd::store_aligned (&out_first[i], fb (batch));
4747 }
4848
4949 for (std::size_t i = align_end; i < size; ++i)
@@ -62,7 +62,7 @@ namespace xsimd
6262 for (std::size_t i = align_begin; i < align_end; i += simd_size)
6363 {
6464 xsimd::load_aligned (&first[i], batch);
65- xsimd::store_unaligned (&out_first[i], f (batch));
65+ xsimd::store_unaligned (&out_first[i], fb (batch));
6666 }
6767
6868 for (std::size_t i = align_end; i < size; ++i)
@@ -72,8 +72,14 @@ namespace xsimd
7272 }
7373 }
7474
75- template <class I1 , class I2 , class I3 , class O1 , class UF >
76- void transform (I1 first_1, I2 last_1, I3 first_2, O1 out_first, UF&& f)
75+ template <class I1 , class I2 , class O1 , class UF >
76+ void transform (I1 first, I2 last, O1 out_first, UF&& f)
77+ {
78+ transform_batch (first, last, out_first, f, f);
79+ }
80+
81+ template <class I1 , class I2 , class I3 , class O1 , class UF , class UFB >
82+ void transform_batch (I1 first_1, I2 last_1, I3 first_2, O1 out_first, UF&& f, UFB&& fb)
7783 {
7884 using value_type = typename std::decay<decltype (*first_1)>::type;
7985 using traits = simd_traits<value_type>;
@@ -102,7 +108,7 @@ namespace xsimd
102108 { \
103109 xsimd::A1 (&first_1[i], batch_1); \
104110 xsimd::A2 (&first_2[i], batch_2); \
105- xsimd::A3 (&out_first[i], f (batch_1, batch_2)); \
111+ xsimd::A3 (&out_first[i], fb (batch_1, batch_2)); \
106112 } \
107113 \
108114 for (std::size_t i = align_end; i < size; ++i) \
@@ -130,6 +136,11 @@ namespace xsimd
130136 #undef XSIMD_LOOP_MACRO
131137 }
132138
139+ template <class I1 , class I2 , class I3 , class O1 , class UF >
140+ void transform (I1 first_1, I2 last_1, I3 first_2, O1 out_first, UF&& f)
141+ {
142+ transform_batch (first_1, last_1, first_2, out_first, f, f);
143+ }
133144
134145 // TODO: Remove this once we drop C++11 support
135146 namespace detail
@@ -141,9 +152,8 @@ namespace xsimd
141152 };
142153 }
143154
144-
145- template <class Iterator1 , class Iterator2 , class Init , class BinaryFunction = detail::plus>
146- Init reduce (Iterator1 first, Iterator2 last, Init init, BinaryFunction&& binfun = detail::plus{})
155+ template <class Iterator1 , class Iterator2 , class Init , class BinaryFunction , class BinaryFunctionBatch >
156+ Init reduce_batch (Iterator1 first, Iterator2 last, Init init, BinaryFunction&& binfun, BinaryFunctionBatch&& binfun_batch)
147157 {
148158 using value_type = typename std::decay<decltype (*first)>::type;
149159 using traits = simd_traits<value_type>;
@@ -180,7 +190,7 @@ namespace xsimd
180190 for (auto const end = ptr_begin + align_end; ptr < end; ptr += simd_size)
181191 {
182192 xsimd::load_aligned (ptr, batch);
183- batch_init = binfun (batch_init, batch);
193+ batch_init = binfun_batch (batch_init, batch);
184194 }
185195
186196 // reduce across batch
@@ -197,6 +207,93 @@ namespace xsimd
197207 return init;
198208 }
199209
210+ template <class Iterator1 , class Iterator2 , class Init , class BinaryFunction = detail::plus>
211+ Init reduce (Iterator1 first, Iterator2 last, Init init, BinaryFunction&& binfun = detail::plus{})
212+ {
213+ return reduce_batch (first, last, init, binfun, binfun);
214+ }
215+
216+ namespace detail
217+ {
218+ template <class T >
219+ struct count_batch
220+ {
221+ count_batch (T value)
222+ : value(value)
223+ {}
224+
225+ count_batch (const count_batch<T>&) = default ;
226+ count_batch (count_batch<T>&&) = default ;
227+
228+ template <class B >
229+ std::size_t operator ()(const B& b)
230+ {
231+ static auto zero = B (T (0 ));
232+ static auto one = B (T (1 ));
233+ return static_cast <std::size_t >(xsimd::hadd (xsimd::select (b == value, one, zero)));
234+ }
235+
236+ private:
237+ T value;
238+ };
239+ }
240+
241+ template <class Iterator1 , class Iterator2 , class UnaryPredicate , class UnaryPredicateBatch >
242+ std::size_t count_if (Iterator1 first, Iterator2 last, UnaryPredicate&& predicate, UnaryPredicateBatch&& predicate_batch)
243+ {
244+ using value_type = typename std::decay<decltype (*first)>::type;
245+ using traits = simd_traits<value_type>;
246+ using batch_type = typename traits::type;
247+
248+ std::size_t size = static_cast <std::size_t >(std::distance (first, last));
249+ constexpr std::size_t simd_size = traits::size;
250+
251+ std::size_t counter (0 );
252+ if (size < simd_size)
253+ {
254+ while (first != last)
255+ {
256+ counter += predicate (*first++);
257+ }
258+ return counter;
259+ }
260+
261+ const auto * const ptr_begin = &(*first);
262+
263+ std::size_t align_begin = xsimd::get_alignment_offset (ptr_begin, size, simd_size);
264+ std::size_t align_end = align_begin + ((size - align_begin) & ~(simd_size - 1 ));
265+
266+ // reduce initial unaligned part
267+ for (std::size_t i = 0 ; i < align_begin; ++i)
268+ {
269+ counter += predicate (first[i]);
270+ }
271+
272+ // reduce aligned part
273+ batch_type batch;
274+ auto ptr = ptr_begin + align_begin;
275+ for (auto const end = ptr_begin + align_end; ptr < end; ptr += simd_size)
276+ {
277+ xsimd::load_aligned (ptr, batch);
278+ counter += predicate_batch (batch);
279+ }
280+
281+ // reduce final unaligned part
282+ for (std::size_t i = align_end; i < size; ++i)
283+ {
284+ counter += predicate (first[i]);
285+ }
286+
287+ return counter;
288+ }
289+
290+ template <class Iterator1 , class Iterator2 , class T >
291+ std::size_t count (Iterator1 first, Iterator2 last, const T& value)
292+ {
293+ return count_if (first, last,
294+ [&value](const T& x) { return value == x; }, detail::count_batch<T>{value});
295+ }
296+
200297}
201298
202299#endif
0 commit comments