Skip to content

Commit 41c1400

Browse files
committed
minor changes
1 parent 2b656bf commit 41c1400

File tree

1 file changed

+93
-171
lines changed

1 file changed

+93
-171
lines changed

examples/mandelbrot.cpp

Lines changed: 93 additions & 171 deletions
Original file line numberDiff line numberDiff line change
@@ -16,6 +16,7 @@
1616
#include <iostream>
1717
#include <string>
1818
#include <vector>
19+
#include <typeinfo> // operator typeid
1920

2021
#include "pico_bench.hpp"
2122

@@ -52,26 +53,31 @@ inline void writePPM(const std::string &fileName,
5253

5354
namespace xsimd {
5455

55-
template <std::size_t N>
56-
inline batch<int, N> mandel(const batch_bool<float, N> &_active,
57-
const batch<float, N> &c_re,
58-
const batch<float, N> &c_im,
56+
template <class arch>
57+
inline batch<int, arch> mandel(const batch_bool<float, arch> &_active,
58+
const batch<float, arch> &c_re,
59+
const batch<float, arch> &c_im,
5960
int maxIters)
6061
{
61-
batch<float, N> z_re = c_re;
62-
batch<float, N> z_im = c_im;
63-
batch<int, N> vi(0);
62+
using float_batch_type = batch<float, arch>;
63+
using int_batch_type = batch<int, arch>;
64+
65+
constexpr std::size_t N = float_batch_type::size;
66+
67+
float_batch_type z_re = c_re;
68+
float_batch_type z_im = c_im;
69+
int_batch_type vi(0);
6470

6571
for (int i = 0; i < maxIters; ++i)
6672
{
67-
auto active = _active & ((z_re * z_re + z_im * z_im) <= batch<float, N>(4.f));
73+
auto active = _active & ((z_re * z_re + z_im * z_im) <= float_batch_type(4.f));
6874
if (!xsimd::any(active))
6975
{
7076
break;
7177
}
7278

73-
batch<float, N> new_re = z_re * z_re - z_im * z_im;
74-
batch<float, N> new_im = 2.f * z_re * z_im;
79+
float_batch_type new_re = z_re * z_re - z_im * z_im;
80+
float_batch_type new_im = 2.f * z_re * z_im;
7581

7682
z_re = c_re + new_re;
7783
z_im = c_im + new_im;
@@ -82,7 +88,7 @@ namespace xsimd {
8288
return vi;
8389
}
8490

85-
template <std::size_t N>
91+
template <class arch>
8692
void mandelbrot(float x0,
8793
float y0,
8894
float x1,
@@ -92,29 +98,35 @@ namespace xsimd {
9298
int maxIters,
9399
int output[])
94100
{
101+
using float_batch_type = batch<float, arch>;
102+
using int_batch_type = batch<int, arch>;
103+
104+
constexpr std::size_t N = float_batch_type::size;
95105
float dx = (x1 - x0) / width;
96106
float dy = (y1 - y0) / height;
97107

98108
float arange[N];
99109
std::iota(&arange[0], &arange[N], 0.f);
100-
batch<float, N> programIndex(&arange[0], xsimd::aligned_mode());
110+
//float_batch_type programIndex(&arange[0], xsimd::aligned_mode());
111+
112+
auto programIndex = float_batch_type::load(&arange[0], xsimd::aligned_mode());
101113
// std::iota(programIndex.begin(), programIndex.end(), 0.f);
102114

103115
for (int j = 0; j < height; j++)
104116
{
105117
for (int i = 0; i < width; i += N)
106118
{
107-
batch<float, N> x(x0 + (i + programIndex) * dx);
108-
batch<float, N> y(y0 + j * dy);
119+
float_batch_type x(x0 + (i + programIndex) * dx);
120+
float_batch_type y(y0 + j * dy);
109121

110-
auto active = x < batch<float, N>(width);
122+
auto active = x < float_batch_type(width);
111123

112124
int base_index = (j * width + i);
113-
auto result = mandel(active, x, y, maxIters);
125+
auto result = mandel<arch>(active, x, y, maxIters);
114126

115127
// implement masked store!
116128
// xsimd::store_aligned(result, output + base_index, active);
117-
batch<int, N> prev_data(output + base_index);
129+
int_batch_type prev_data(output + base_index);
118130
select(bool_cast(active), result, prev_data)
119131
.store_aligned(output + base_index);
120132
}
@@ -217,6 +229,58 @@ namespace scalar {
217229

218230
} // namespace scalar
219231

232+
233+
234+
template<class arch, class bencher_t>
235+
void run_arch(
236+
bencher_t & bencher,
237+
float x0,
238+
float y0,
239+
float x1,
240+
float y1,
241+
int width,
242+
int height,
243+
int maxIters,
244+
std::vector<int, xsimd::aligned_allocator<int>> & buffer)
245+
{
246+
std::fill(buffer.begin(), buffer.end(), 0);
247+
auto stats = bencher([&]() {
248+
xsimd::mandelbrot<arch>(x0, y0, x1, y1, width, height, maxIters, buffer.data());
249+
});
250+
251+
252+
const float scalar_min = stats.min().count();
253+
254+
std::cout << '\n' << typeid(arch).name() <<" "<< stats << '\n';
255+
auto filename = std::string("mandelbrot_") + std::string(typeid(arch).name()) + std::string(".ppm");
256+
writePPM(filename.c_str(), width, height, buffer.data());
257+
258+
}
259+
260+
template<class T>
261+
struct RunMandelbrot;
262+
263+
template<class ... Arch>
264+
struct RunMandelbrot<xsimd::arch_list<Arch ...>>
265+
{
266+
template<class bencher_t>
267+
static void run(
268+
bencher_t & bencher,
269+
float x0,
270+
float y0,
271+
float x1,
272+
float y1,
273+
int width,
274+
int height,
275+
int maxIters,
276+
std::vector<int, xsimd::aligned_allocator<int>> & buffer)
277+
{
278+
using expand_type = int[];
279+
expand_type{(run_arch<Arch>(bencher, x0, y0,x1,x1,width,height, maxIters, buffer),0)...};
280+
}
281+
};
282+
283+
220284
int main()
221285
{
222286
using namespace std::chrono;
@@ -263,12 +327,20 @@ int main()
263327

264328
writePPM("mandelbrot_omp.ppm", width, height, buf.data());
265329

330+
331+
332+
RunMandelbrot<xsimd::supported_architectures>::run(bencher, x0, y0, x1, y1, width, height, maxIters, buf);
333+
334+
335+
336+
337+
266338
// xsimd_1 run //////////////////////////////////////////////////////////////
267339

268340
std::fill(buf.begin(), buf.end(), 0);
269341

270342
auto stats_1 = bencher([&]() {
271-
xsimd::mandelbrot<1>(x0, y0, x1, y1, width, height, maxIters, buf.data());
343+
xsimd::mandelbrot<xsimd::avx>(x0, y0, x1, y1, width, height, maxIters, buf.data());
272344
});
273345

274346
const float xsimd1_min = stats_1.min().count();
@@ -282,7 +354,7 @@ int main()
282354
std::fill(buf.begin(), buf.end(), 0);
283355

284356
auto stats_4 = bencher([&]() {
285-
xsimd::mandelbrot<4>(x0, y0, x1, y1, width, height, maxIters, buf.data());
357+
xsimd::mandelbrot<xsimd::avx>(x0, y0, x1, y1, width, height, maxIters, buf.data());
286358
});
287359

288360
const float xsimd4_min = stats_4.min().count();
@@ -296,7 +368,7 @@ int main()
296368
std::fill(buf.begin(), buf.end(), 0);
297369

298370
auto stats_8 = bencher([&]() {
299-
xsimd::mandelbrot<8>(x0, y0, x1, y1, width, height, maxIters, buf.data());
371+
xsimd::mandelbrot<xsimd::avx>(x0, y0, x1, y1, width, height, maxIters, buf.data());
300372
});
301373

302374
const float xsimd8_min = stats_8.min().count();
@@ -305,157 +377,7 @@ int main()
305377

306378
writePPM("mandelbrot_xsimd8.ppm", width, height, buf.data());
307379

308-
// xsimd_16 run /////////////////////////////////////////////////////////////
309-
310-
std::fill(buf.begin(), buf.end(), 0);
311-
312-
auto stats_16 = bencher([&]() {
313-
xsimd::mandelbrot<16>(x0, y0, x1, y1, width, height, maxIters, buf.data());
314-
});
315-
316-
const float xsimd16_min = stats_16.min().count();
317-
318-
std::cout << '\n' << "xsimd_16 " << stats_16 << '\n';
319-
320-
writePPM("mandelbrot_xsimd16.ppm", width, height, buf.data());
321-
322-
// conclusions //////////////////////////////////////////////////////////////
323-
324-
std::cout << '\n' << "Conclusions: " << '\n';
325-
326-
// scalar //
327-
328-
std::cout << '\n'
329-
<< "--> scalar was " << omp_min / scalar_min
330-
<< "x the speed of omp";
331-
332-
std::cout << '\n'
333-
<< "--> scalar was " << xsimd1_min / scalar_min
334-
<< "x the speed of xsimd_1";
335-
336-
std::cout << '\n'
337-
<< "--> scalar was " << xsimd4_min / scalar_min
338-
<< "x the speed of xsimd_4";
339-
340-
std::cout << '\n'
341-
<< "--> scalar was " << xsimd8_min / scalar_min
342-
<< "x the speed of xsimd_8";
343-
344-
std::cout << '\n'
345-
<< "--> scalar was " << xsimd16_min / scalar_min
346-
<< "x the speed of xsimd_16" << '\n';
347-
348-
// omp //
349-
350-
std::cout << '\n'
351-
<< "--> omp was " << scalar_min / omp_min
352-
<< "x the speed of scalar";
353-
354-
std::cout << '\n'
355-
<< "--> omp was " << xsimd1_min / omp_min
356-
<< "x the speed of xsimd_1";
357-
358-
std::cout << '\n'
359-
<< "--> omp was " << xsimd4_min / omp_min
360-
<< "x the speed of xsimd_4";
361-
362-
std::cout << '\n'
363-
<< "--> omp was " << xsimd8_min / omp_min
364-
<< "x the speed of xsimd_8";
365-
366-
std::cout << '\n'
367-
<< "--> omp was " << xsimd16_min / omp_min
368-
<< "x the speed of xsimd_16" << '\n';
369-
370-
// xsimd1 //
371-
372-
std::cout << '\n'
373-
<< "--> xsimd1 was " << scalar_min / xsimd1_min
374-
<< "x the speed of scalar";
375-
376-
std::cout << '\n'
377-
<< "--> xsimd1 was " << omp_min / xsimd1_min
378-
<< "x the speed of omp";
379-
380-
std::cout << '\n'
381-
<< "--> xsimd1 was " << xsimd4_min / xsimd1_min
382-
<< "x the speed of xsimd_4";
383-
384-
std::cout << '\n'
385-
<< "--> xsimd1 was " << xsimd8_min / xsimd1_min
386-
<< "x the speed of xsimd_8";
387-
388-
std::cout << '\n'
389-
<< "--> xsimd1 was " << xsimd16_min / xsimd1_min
390-
<< "x the speed of xsimd_16" << '\n';
391-
392-
// xsimd4 //
393-
394-
std::cout << '\n'
395-
<< "--> xsimd4 was " << scalar_min / xsimd4_min
396-
<< "x the speed of scalar";
397-
398-
std::cout << '\n'
399-
<< "--> xsimd4 was " << omp_min / xsimd4_min
400-
<< "x the speed of omp";
401-
402-
std::cout << '\n'
403-
<< "--> xsimd4 was " << xsimd1_min / xsimd4_min
404-
<< "x the speed of xsimd_1";
405-
406-
std::cout << '\n'
407-
<< "--> xsimd4 was " << xsimd8_min / xsimd4_min
408-
<< "x the speed of xsimd_8";
409-
410-
std::cout << '\n'
411-
<< "--> xsimd4 was " << xsimd16_min / xsimd4_min
412-
<< "x the speed of xsimd_16" << '\n';
413-
414-
// xsimd8 //
415-
416-
std::cout << '\n'
417-
<< "--> xsimd8 was " << scalar_min / xsimd8_min
418-
<< "x the speed of scalar";
419-
420-
std::cout << '\n'
421-
<< "--> xsimd8 was " << omp_min / xsimd8_min
422-
<< "x the speed of omp";
423-
424-
std::cout << '\n'
425-
<< "--> xsimd8 was " << xsimd1_min / xsimd8_min
426-
<< "x the speed of xsimd_1";
427-
428-
std::cout << '\n'
429-
<< "--> xsimd8 was " << xsimd4_min / xsimd8_min
430-
<< "x the speed of xsimd_4";
431-
432-
std::cout << '\n'
433-
<< "--> xsimd8 was " << xsimd16_min / xsimd8_min
434-
<< "x the speed of xsimd_16" << '\n';
435-
436-
// xsimd16 //
437-
438-
std::cout << '\n'
439-
<< "--> xsimd16 was " << scalar_min / xsimd16_min
440-
<< "x the speed of scalar";
441-
442-
std::cout << '\n'
443-
<< "--> xsimd16 was " << omp_min / xsimd16_min
444-
<< "x the speed of omp";
445-
446-
std::cout << '\n'
447-
<< "--> xsimd16 was " << xsimd1_min / xsimd16_min
448-
<< "x the speed of xsimd_1";
449-
450-
std::cout << '\n'
451-
<< "--> xsimd16 was " << xsimd4_min / xsimd16_min
452-
<< "x the speed of xsimd_4";
453-
454-
std::cout << '\n'
455-
<< "--> xsimd16 was " << xsimd8_min / xsimd16_min
456-
<< "x the speed of xsimd_8" << '\n';
457-
458-
std::cout << '\n' << "wrote output images to 'mandelbrot_[type].ppm'" << '\n';
380+
459381

460382
return 0;
461383
}

0 commit comments

Comments
 (0)