Skip to content

Commit cba930b

Browse files
Merge pull request #532 from DerThorsten/refactor_examples
Refactor examples
2 parents 4decfbd + 2127fc6 commit cba930b

File tree

1 file changed

+83
-210
lines changed

1 file changed

+83
-210
lines changed

examples/mandelbrot.cpp

Lines changed: 83 additions & 210 deletions
Original file line numberDiff line numberDiff line change
@@ -52,26 +52,31 @@ inline void writePPM(const std::string &fileName,
5252

5353
namespace xsimd {
5454

55-
template <std::size_t N>
56-
inline batch<int, N> mandel(const batch_bool<float, N> &_active,
57-
const batch<float, N> &c_re,
58-
const batch<float, N> &c_im,
55+
template <class arch>
56+
inline batch<int, arch> mandel(const batch_bool<float, arch> &_active,
57+
const batch<float, arch> &c_re,
58+
const batch<float, arch> &c_im,
5959
int maxIters)
6060
{
61-
batch<float, N> z_re = c_re;
62-
batch<float, N> z_im = c_im;
63-
batch<int, N> vi(0);
61+
using float_batch_type = batch<float, arch>;
62+
using int_batch_type = batch<int, arch>;
63+
64+
constexpr std::size_t N = float_batch_type::size;
65+
66+
float_batch_type z_re = c_re;
67+
float_batch_type z_im = c_im;
68+
int_batch_type vi(0);
6469

6570
for (int i = 0; i < maxIters; ++i)
6671
{
67-
auto active = _active & ((z_re * z_re + z_im * z_im) <= batch<float, N>(4.f));
72+
auto active = _active & ((z_re * z_re + z_im * z_im) <= float_batch_type(4.f));
6873
if (!xsimd::any(active))
6974
{
7075
break;
7176
}
7277

73-
batch<float, N> new_re = z_re * z_re - z_im * z_im;
74-
batch<float, N> new_im = 2.f * z_re * z_im;
78+
float_batch_type new_re = z_re * z_re - z_im * z_im;
79+
float_batch_type new_im = 2.f * z_re * z_im;
7580

7681
z_re = c_re + new_re;
7782
z_im = c_im + new_im;
@@ -82,7 +87,7 @@ namespace xsimd {
8287
return vi;
8388
}
8489

85-
template <std::size_t N>
90+
template <class arch>
8691
void mandelbrot(float x0,
8792
float y0,
8893
float x1,
@@ -92,29 +97,35 @@ namespace xsimd {
9297
int maxIters,
9398
int output[])
9499
{
100+
using float_batch_type = batch<float, arch>;
101+
using int_batch_type = batch<int, arch>;
102+
103+
constexpr std::size_t N = float_batch_type::size;
95104
float dx = (x1 - x0) / width;
96105
float dy = (y1 - y0) / height;
97106

98107
float arange[N];
99108
std::iota(&arange[0], &arange[N], 0.f);
100-
batch<float, N> programIndex(&arange[0], xsimd::aligned_mode());
109+
//float_batch_type programIndex(&arange[0], xsimd::aligned_mode());
110+
111+
auto programIndex = float_batch_type::load(&arange[0], xsimd::aligned_mode());
101112
// std::iota(programIndex.begin(), programIndex.end(), 0.f);
102113

103114
for (int j = 0; j < height; j++)
104115
{
105116
for (int i = 0; i < width; i += N)
106117
{
107-
batch<float, N> x(x0 + (i + programIndex) * dx);
108-
batch<float, N> y(y0 + j * dy);
118+
float_batch_type x(x0 + (i + programIndex) * dx);
119+
float_batch_type y(y0 + j * dy);
109120

110-
auto active = x < batch<float, N>(width);
121+
auto active = x < float_batch_type(width);
111122

112123
int base_index = (j * width + i);
113-
auto result = mandel(active, x, y, maxIters);
124+
auto result = mandel<arch>(active, x, y, maxIters);
114125

115126
// implement masked store!
116127
// xsimd::store_aligned(result, output + base_index, active);
117-
batch<int, N> prev_data(output + base_index);
128+
int_batch_type prev_data(output + base_index);
118129
select(bool_cast(active), result, prev_data)
119130
.store_aligned(output + base_index);
120131
}
@@ -217,6 +228,60 @@ namespace scalar {
217228

218229
} // namespace scalar
219230

231+
232+
233+
// run simd version of mandelbrot benchmark for a specific arch
234+
template<class arch, class bencher_t>
235+
void run_arch(
236+
bencher_t & bencher,
237+
float x0,
238+
float y0,
239+
float x1,
240+
float y1,
241+
int width,
242+
int height,
243+
int maxIters,
244+
std::vector<int, xsimd::aligned_allocator<int>> & buffer)
245+
{
246+
std::fill(buffer.begin(), buffer.end(), 0);
247+
auto stats = bencher([&]() {
248+
xsimd::mandelbrot<arch>(x0, y0, x1, y1, width, height, maxIters, buffer.data());
249+
});
250+
251+
const float scalar_min = stats.min().count();
252+
253+
std::cout << '\n' << arch::name() <<" "<< stats << '\n';
254+
auto filename = std::string("mandelbrot_") + std::string(arch::name()) + std::string(".ppm");
255+
writePPM(filename.c_str(), width, height, buffer.data());
256+
257+
}
258+
259+
template<class T>
260+
struct run_archlist;
261+
262+
// run simd version of mandelbrot benchmark for a list
263+
// of archs
264+
template<class ... Arch>
265+
struct run_archlist<xsimd::arch_list<Arch ...>>
266+
{
267+
template<class bencher_t>
268+
static void run(
269+
bencher_t & bencher,
270+
float x0,
271+
float y0,
272+
float x1,
273+
float y1,
274+
int width,
275+
int height,
276+
int maxIters,
277+
std::vector<int, xsimd::aligned_allocator<int>> & buffer)
278+
{
279+
using expand_type = int[];
280+
expand_type{(run_arch<Arch>(bencher, x0, y0,x1,x1,width,height, maxIters, buffer),0)...};
281+
}
282+
};
283+
284+
220285
int main()
221286
{
222287
using namespace std::chrono;
@@ -263,199 +328,7 @@ int main()
263328

264329
writePPM("mandelbrot_omp.ppm", width, height, buf.data());
265330

266-
// xsimd_1 run //////////////////////////////////////////////////////////////
267-
268-
std::fill(buf.begin(), buf.end(), 0);
269-
270-
auto stats_1 = bencher([&]() {
271-
xsimd::mandelbrot<1>(x0, y0, x1, y1, width, height, maxIters, buf.data());
272-
});
273-
274-
const float xsimd1_min = stats_1.min().count();
275-
276-
std::cout << '\n' << "xsimd_1 " << stats_1 << '\n';
277-
278-
writePPM("mandelbrot_xsimd1.ppm", width, height, buf.data());
279-
280-
// xsimd_4 run //////////////////////////////////////////////////////////////
281-
282-
std::fill(buf.begin(), buf.end(), 0);
283-
284-
auto stats_4 = bencher([&]() {
285-
xsimd::mandelbrot<4>(x0, y0, x1, y1, width, height, maxIters, buf.data());
286-
});
287-
288-
const float xsimd4_min = stats_4.min().count();
289-
290-
std::cout << '\n' << "xsimd_4 " << stats_4 << '\n';
291-
292-
writePPM("mandelbrot_xsimd4.ppm", width, height, buf.data());
293-
294-
// xsimd_8 run //////////////////////////////////////////////////////////////
295-
296-
std::fill(buf.begin(), buf.end(), 0);
297-
298-
auto stats_8 = bencher([&]() {
299-
xsimd::mandelbrot<8>(x0, y0, x1, y1, width, height, maxIters, buf.data());
300-
});
301-
302-
const float xsimd8_min = stats_8.min().count();
303-
304-
std::cout << '\n' << "xsimd_8 " << stats_8 << '\n';
305-
306-
writePPM("mandelbrot_xsimd8.ppm", width, height, buf.data());
307-
308-
// xsimd_16 run /////////////////////////////////////////////////////////////
309-
310-
std::fill(buf.begin(), buf.end(), 0);
311-
312-
auto stats_16 = bencher([&]() {
313-
xsimd::mandelbrot<16>(x0, y0, x1, y1, width, height, maxIters, buf.data());
314-
});
315-
316-
const float xsimd16_min = stats_16.min().count();
317-
318-
std::cout << '\n' << "xsimd_16 " << stats_16 << '\n';
319-
320-
writePPM("mandelbrot_xsimd16.ppm", width, height, buf.data());
321-
322-
// conclusions //////////////////////////////////////////////////////////////
323-
324-
std::cout << '\n' << "Conclusions: " << '\n';
325-
326-
// scalar //
327-
328-
std::cout << '\n'
329-
<< "--> scalar was " << omp_min / scalar_min
330-
<< "x the speed of omp";
331-
332-
std::cout << '\n'
333-
<< "--> scalar was " << xsimd1_min / scalar_min
334-
<< "x the speed of xsimd_1";
335-
336-
std::cout << '\n'
337-
<< "--> scalar was " << xsimd4_min / scalar_min
338-
<< "x the speed of xsimd_4";
339-
340-
std::cout << '\n'
341-
<< "--> scalar was " << xsimd8_min / scalar_min
342-
<< "x the speed of xsimd_8";
343-
344-
std::cout << '\n'
345-
<< "--> scalar was " << xsimd16_min / scalar_min
346-
<< "x the speed of xsimd_16" << '\n';
347-
348-
// omp //
349-
350-
std::cout << '\n'
351-
<< "--> omp was " << scalar_min / omp_min
352-
<< "x the speed of scalar";
353-
354-
std::cout << '\n'
355-
<< "--> omp was " << xsimd1_min / omp_min
356-
<< "x the speed of xsimd_1";
357-
358-
std::cout << '\n'
359-
<< "--> omp was " << xsimd4_min / omp_min
360-
<< "x the speed of xsimd_4";
361-
362-
std::cout << '\n'
363-
<< "--> omp was " << xsimd8_min / omp_min
364-
<< "x the speed of xsimd_8";
365-
366-
std::cout << '\n'
367-
<< "--> omp was " << xsimd16_min / omp_min
368-
<< "x the speed of xsimd_16" << '\n';
369-
370-
// xsimd1 //
371-
372-
std::cout << '\n'
373-
<< "--> xsimd1 was " << scalar_min / xsimd1_min
374-
<< "x the speed of scalar";
375-
376-
std::cout << '\n'
377-
<< "--> xsimd1 was " << omp_min / xsimd1_min
378-
<< "x the speed of omp";
379-
380-
std::cout << '\n'
381-
<< "--> xsimd1 was " << xsimd4_min / xsimd1_min
382-
<< "x the speed of xsimd_4";
383-
384-
std::cout << '\n'
385-
<< "--> xsimd1 was " << xsimd8_min / xsimd1_min
386-
<< "x the speed of xsimd_8";
387-
388-
std::cout << '\n'
389-
<< "--> xsimd1 was " << xsimd16_min / xsimd1_min
390-
<< "x the speed of xsimd_16" << '\n';
391-
392-
// xsimd4 //
393-
394-
std::cout << '\n'
395-
<< "--> xsimd4 was " << scalar_min / xsimd4_min
396-
<< "x the speed of scalar";
397-
398-
std::cout << '\n'
399-
<< "--> xsimd4 was " << omp_min / xsimd4_min
400-
<< "x the speed of omp";
401-
402-
std::cout << '\n'
403-
<< "--> xsimd4 was " << xsimd1_min / xsimd4_min
404-
<< "x the speed of xsimd_1";
405-
406-
std::cout << '\n'
407-
<< "--> xsimd4 was " << xsimd8_min / xsimd4_min
408-
<< "x the speed of xsimd_8";
409-
410-
std::cout << '\n'
411-
<< "--> xsimd4 was " << xsimd16_min / xsimd4_min
412-
<< "x the speed of xsimd_16" << '\n';
413-
414-
// xsimd8 //
415-
416-
std::cout << '\n'
417-
<< "--> xsimd8 was " << scalar_min / xsimd8_min
418-
<< "x the speed of scalar";
419-
420-
std::cout << '\n'
421-
<< "--> xsimd8 was " << omp_min / xsimd8_min
422-
<< "x the speed of omp";
423-
424-
std::cout << '\n'
425-
<< "--> xsimd8 was " << xsimd1_min / xsimd8_min
426-
<< "x the speed of xsimd_1";
427-
428-
std::cout << '\n'
429-
<< "--> xsimd8 was " << xsimd4_min / xsimd8_min
430-
<< "x the speed of xsimd_4";
431-
432-
std::cout << '\n'
433-
<< "--> xsimd8 was " << xsimd16_min / xsimd8_min
434-
<< "x the speed of xsimd_16" << '\n';
435-
436-
// xsimd16 //
437-
438-
std::cout << '\n'
439-
<< "--> xsimd16 was " << scalar_min / xsimd16_min
440-
<< "x the speed of scalar";
441-
442-
std::cout << '\n'
443-
<< "--> xsimd16 was " << omp_min / xsimd16_min
444-
<< "x the speed of omp";
445-
446-
std::cout << '\n'
447-
<< "--> xsimd16 was " << xsimd1_min / xsimd16_min
448-
<< "x the speed of xsimd_1";
449-
450-
std::cout << '\n'
451-
<< "--> xsimd16 was " << xsimd4_min / xsimd16_min
452-
<< "x the speed of xsimd_4";
453-
454-
std::cout << '\n'
455-
<< "--> xsimd16 was " << xsimd8_min / xsimd16_min
456-
<< "x the speed of xsimd_8" << '\n';
457-
458-
std::cout << '\n' << "wrote output images to 'mandelbrot_[type].ppm'" << '\n';
331+
run_archlist<xsimd::supported_architectures>::run(bencher, x0, y0, x1, y1, width, height, maxIters, buf);
459332

460333
return 0;
461334
}

0 commit comments

Comments
 (0)