1616#include < iostream>
1717#include < string>
1818#include < vector>
19+ #include < typeinfo> // operator typeid
1920
2021#include " pico_bench.hpp"
2122
@@ -52,26 +53,31 @@ inline void writePPM(const std::string &fileName,
5253
5354namespace xsimd {
5455
55- template <std:: size_t N >
56- inline batch<int , N > mandel (const batch_bool<float , N > &_active,
57- const batch<float , N > &c_re,
58- const batch<float , N > &c_im,
56+ template <class arch >
57+ inline batch<int , arch > mandel (const batch_bool<float , arch > &_active,
58+ const batch<float , arch > &c_re,
59+ const batch<float , arch > &c_im,
5960 int maxIters)
6061 {
61- batch<float , N> z_re = c_re;
62- batch<float , N> z_im = c_im;
63- batch<int , N> vi (0 );
62+ using float_batch_type = batch<float , arch>;
63+ using int_batch_type = batch<int , arch>;
64+
65+ constexpr std::size_t N = float_batch_type::size;
66+
67+ float_batch_type z_re = c_re;
68+ float_batch_type z_im = c_im;
69+ int_batch_type vi (0 );
6470
6571 for (int i = 0 ; i < maxIters; ++i)
6672 {
67- auto active = _active & ((z_re * z_re + z_im * z_im) <= batch< float , N> (4 .f ));
73+ auto active = _active & ((z_re * z_re + z_im * z_im) <= float_batch_type (4 .f ));
6874 if (!xsimd::any (active))
6975 {
7076 break ;
7177 }
7278
73- batch< float , N> new_re = z_re * z_re - z_im * z_im;
74- batch< float , N> new_im = 2 .f * z_re * z_im;
79+ float_batch_type new_re = z_re * z_re - z_im * z_im;
80+ float_batch_type new_im = 2 .f * z_re * z_im;
7581
7682 z_re = c_re + new_re;
7783 z_im = c_im + new_im;
@@ -82,7 +88,7 @@ namespace xsimd {
8288 return vi;
8389 }
8490
85- template <std:: size_t N >
91+ template <class arch >
8692 void mandelbrot (float x0,
8793 float y0,
8894 float x1,
@@ -92,29 +98,35 @@ namespace xsimd {
9298 int maxIters,
9399 int output[])
94100 {
101+ using float_batch_type = batch<float , arch>;
102+ using int_batch_type = batch<int , arch>;
103+
104+ constexpr std::size_t N = float_batch_type::size;
95105 float dx = (x1 - x0) / width;
96106 float dy = (y1 - y0) / height;
97107
98108 float arange[N];
99109 std::iota (&arange[0 ], &arange[N], 0 .f );
100- batch<float , N> programIndex (&arange[0 ], xsimd::aligned_mode ());
110+ // float_batch_type programIndex(&arange[0], xsimd::aligned_mode());
111+
112+ auto programIndex = float_batch_type::load (&arange[0 ], xsimd::aligned_mode ());
101113 // std::iota(programIndex.begin(), programIndex.end(), 0.f);
102114
103115 for (int j = 0 ; j < height; j++)
104116 {
105117 for (int i = 0 ; i < width; i += N)
106118 {
107- batch< float , N> x (x0 + (i + programIndex) * dx);
108- batch< float , N> y (y0 + j * dy);
119+ float_batch_type x (x0 + (i + programIndex) * dx);
120+ float_batch_type y (y0 + j * dy);
109121
110- auto active = x < batch< float , N> (width);
122+ auto active = x < float_batch_type (width);
111123
112124 int base_index = (j * width + i);
113- auto result = mandel (active, x, y, maxIters);
125+ auto result = mandel<arch> (active, x, y, maxIters);
114126
115127 // implement masked store!
116128 // xsimd::store_aligned(result, output + base_index, active);
117- batch< int , N> prev_data (output + base_index);
129+ int_batch_type prev_data (output + base_index);
118130 select (bool_cast (active), result, prev_data)
119131 .store_aligned (output + base_index);
120132 }
@@ -217,6 +229,58 @@ namespace scalar {
217229
218230} // namespace scalar
219231
232+
233+
234+ template <class arch , class bencher_t >
235+ void run_arch (
236+ bencher_t & bencher,
237+ float x0,
238+ float y0,
239+ float x1,
240+ float y1,
241+ int width,
242+ int height,
243+ int maxIters,
244+ std::vector<int , xsimd::aligned_allocator<int >> & buffer)
245+ {
246+ std::fill (buffer.begin (), buffer.end (), 0 );
247+ auto stats = bencher ([&]() {
248+ xsimd::mandelbrot<arch>(x0, y0, x1, y1, width, height, maxIters, buffer.data ());
249+ });
250+
251+
252+ const float scalar_min = stats.min ().count ();
253+
254+ std::cout << ' \n ' << typeid (arch).name () <<" " << stats << ' \n ' ;
255+ auto filename = std::string (" mandelbrot_" ) + std::string (typeid (arch).name ()) + std::string (" .ppm" );
256+ writePPM (filename.c_str (), width, height, buffer.data ());
257+
258+ }
259+
260+ template <class T >
261+ struct RunMandelbrot ;
262+
263+ template <class ... Arch>
264+ struct RunMandelbrot <xsimd::arch_list<Arch ...>>
265+ {
266+ template <class bencher_t >
267+ static void run (
268+ bencher_t & bencher,
269+ float x0,
270+ float y0,
271+ float x1,
272+ float y1,
273+ int width,
274+ int height,
275+ int maxIters,
276+ std::vector<int , xsimd::aligned_allocator<int >> & buffer)
277+ {
278+ using expand_type = int [];
279+ expand_type{(run_arch<Arch>(bencher, x0, y0,x1,x1,width,height, maxIters, buffer),0 )...};
280+ }
281+ };
282+
283+
220284int main ()
221285{
222286 using namespace std ::chrono;
@@ -263,12 +327,20 @@ int main()
263327
264328 writePPM (" mandelbrot_omp.ppm" , width, height, buf.data ());
265329
330+
331+
332+ RunMandelbrot<xsimd::supported_architectures>::run (bencher, x0, y0, x1, y1, width, height, maxIters, buf);
333+
334+
335+
336+
337+
266338 // xsimd_1 run //////////////////////////////////////////////////////////////
267339
268340 std::fill (buf.begin (), buf.end (), 0 );
269341
270342 auto stats_1 = bencher ([&]() {
271- xsimd::mandelbrot<1 >(x0, y0, x1, y1, width, height, maxIters, buf.data ());
343+ xsimd::mandelbrot<xsimd::avx >(x0, y0, x1, y1, width, height, maxIters, buf.data ());
272344 });
273345
274346 const float xsimd1_min = stats_1.min ().count ();
@@ -282,7 +354,7 @@ int main()
282354 std::fill (buf.begin (), buf.end (), 0 );
283355
284356 auto stats_4 = bencher ([&]() {
285- xsimd::mandelbrot<4 >(x0, y0, x1, y1, width, height, maxIters, buf.data ());
357+ xsimd::mandelbrot<xsimd::avx >(x0, y0, x1, y1, width, height, maxIters, buf.data ());
286358 });
287359
288360 const float xsimd4_min = stats_4.min ().count ();
@@ -296,7 +368,7 @@ int main()
296368 std::fill (buf.begin (), buf.end (), 0 );
297369
298370 auto stats_8 = bencher ([&]() {
299- xsimd::mandelbrot<8 >(x0, y0, x1, y1, width, height, maxIters, buf.data ());
371+ xsimd::mandelbrot<xsimd::avx >(x0, y0, x1, y1, width, height, maxIters, buf.data ());
300372 });
301373
302374 const float xsimd8_min = stats_8.min ().count ();
@@ -305,157 +377,7 @@ int main()
305377
306378 writePPM (" mandelbrot_xsimd8.ppm" , width, height, buf.data ());
307379
308- // xsimd_16 run /////////////////////////////////////////////////////////////
309-
310- std::fill (buf.begin (), buf.end (), 0 );
311-
312- auto stats_16 = bencher ([&]() {
313- xsimd::mandelbrot<16 >(x0, y0, x1, y1, width, height, maxIters, buf.data ());
314- });
315-
316- const float xsimd16_min = stats_16.min ().count ();
317-
318- std::cout << ' \n ' << " xsimd_16 " << stats_16 << ' \n ' ;
319-
320- writePPM (" mandelbrot_xsimd16.ppm" , width, height, buf.data ());
321-
322- // conclusions //////////////////////////////////////////////////////////////
323-
324- std::cout << ' \n ' << " Conclusions: " << ' \n ' ;
325-
326- // scalar //
327-
328- std::cout << ' \n '
329- << " --> scalar was " << omp_min / scalar_min
330- << " x the speed of omp" ;
331-
332- std::cout << ' \n '
333- << " --> scalar was " << xsimd1_min / scalar_min
334- << " x the speed of xsimd_1" ;
335-
336- std::cout << ' \n '
337- << " --> scalar was " << xsimd4_min / scalar_min
338- << " x the speed of xsimd_4" ;
339-
340- std::cout << ' \n '
341- << " --> scalar was " << xsimd8_min / scalar_min
342- << " x the speed of xsimd_8" ;
343-
344- std::cout << ' \n '
345- << " --> scalar was " << xsimd16_min / scalar_min
346- << " x the speed of xsimd_16" << ' \n ' ;
347-
348- // omp //
349-
350- std::cout << ' \n '
351- << " --> omp was " << scalar_min / omp_min
352- << " x the speed of scalar" ;
353-
354- std::cout << ' \n '
355- << " --> omp was " << xsimd1_min / omp_min
356- << " x the speed of xsimd_1" ;
357-
358- std::cout << ' \n '
359- << " --> omp was " << xsimd4_min / omp_min
360- << " x the speed of xsimd_4" ;
361-
362- std::cout << ' \n '
363- << " --> omp was " << xsimd8_min / omp_min
364- << " x the speed of xsimd_8" ;
365-
366- std::cout << ' \n '
367- << " --> omp was " << xsimd16_min / omp_min
368- << " x the speed of xsimd_16" << ' \n ' ;
369-
370- // xsimd1 //
371-
372- std::cout << ' \n '
373- << " --> xsimd1 was " << scalar_min / xsimd1_min
374- << " x the speed of scalar" ;
375-
376- std::cout << ' \n '
377- << " --> xsimd1 was " << omp_min / xsimd1_min
378- << " x the speed of omp" ;
379-
380- std::cout << ' \n '
381- << " --> xsimd1 was " << xsimd4_min / xsimd1_min
382- << " x the speed of xsimd_4" ;
383-
384- std::cout << ' \n '
385- << " --> xsimd1 was " << xsimd8_min / xsimd1_min
386- << " x the speed of xsimd_8" ;
387-
388- std::cout << ' \n '
389- << " --> xsimd1 was " << xsimd16_min / xsimd1_min
390- << " x the speed of xsimd_16" << ' \n ' ;
391-
392- // xsimd4 //
393-
394- std::cout << ' \n '
395- << " --> xsimd4 was " << scalar_min / xsimd4_min
396- << " x the speed of scalar" ;
397-
398- std::cout << ' \n '
399- << " --> xsimd4 was " << omp_min / xsimd4_min
400- << " x the speed of omp" ;
401-
402- std::cout << ' \n '
403- << " --> xsimd4 was " << xsimd1_min / xsimd4_min
404- << " x the speed of xsimd_1" ;
405-
406- std::cout << ' \n '
407- << " --> xsimd4 was " << xsimd8_min / xsimd4_min
408- << " x the speed of xsimd_8" ;
409-
410- std::cout << ' \n '
411- << " --> xsimd4 was " << xsimd16_min / xsimd4_min
412- << " x the speed of xsimd_16" << ' \n ' ;
413-
414- // xsimd8 //
415-
416- std::cout << ' \n '
417- << " --> xsimd8 was " << scalar_min / xsimd8_min
418- << " x the speed of scalar" ;
419-
420- std::cout << ' \n '
421- << " --> xsimd8 was " << omp_min / xsimd8_min
422- << " x the speed of omp" ;
423-
424- std::cout << ' \n '
425- << " --> xsimd8 was " << xsimd1_min / xsimd8_min
426- << " x the speed of xsimd_1" ;
427-
428- std::cout << ' \n '
429- << " --> xsimd8 was " << xsimd4_min / xsimd8_min
430- << " x the speed of xsimd_4" ;
431-
432- std::cout << ' \n '
433- << " --> xsimd8 was " << xsimd16_min / xsimd8_min
434- << " x the speed of xsimd_16" << ' \n ' ;
435-
436- // xsimd16 //
437-
438- std::cout << ' \n '
439- << " --> xsimd16 was " << scalar_min / xsimd16_min
440- << " x the speed of scalar" ;
441-
442- std::cout << ' \n '
443- << " --> xsimd16 was " << omp_min / xsimd16_min
444- << " x the speed of omp" ;
445-
446- std::cout << ' \n '
447- << " --> xsimd16 was " << xsimd1_min / xsimd16_min
448- << " x the speed of xsimd_1" ;
449-
450- std::cout << ' \n '
451- << " --> xsimd16 was " << xsimd4_min / xsimd16_min
452- << " x the speed of xsimd_4" ;
453-
454- std::cout << ' \n '
455- << " --> xsimd16 was " << xsimd8_min / xsimd16_min
456- << " x the speed of xsimd_8" << ' \n ' ;
457-
458- std::cout << ' \n ' << " wrote output images to 'mandelbrot_[type].ppm'" << ' \n ' ;
380+
459381
460382 return 0 ;
461383}
0 commit comments