@@ -33,7 +33,6 @@ static const auto n_samples = 120'000'000;
3333
3434double estimate_pi (sycl::queue& q, size_t n_points) {
3535 double estimated_pi; // Estimated value of Pi
36- size_t n_under_curve = 0 ; // Number of points fallen under the curve
3736
3837 // Step 1. Generate n_points * 2 random numbers
3938 // 1.1. Generator initialization
@@ -42,15 +41,17 @@ double estimate_pi(sycl::queue& q, size_t n_points) {
4241 // Create an object of distribution (by default float, a = 0.0f, b = 1.0f)
4342 mkl::rng::uniform distr;
4443
45- float * rng_ptr = sycl::malloc_device <float >(n_points * 2 , q);
44+ float * rng_ptr = sycl::malloc_shared <float >(n_points * 2 , q);
4645
4746 // 1.2. Random number generation
4847 auto event = mkl::rng::generate (distr, engine, n_points * 2 , rng_ptr);
4948
5049 // Step 2. Count points under curve (x ^ 2 + y ^ 2 < 1.0f)
51- size_t count_per_thread = 32 ;
50+ constexpr size_t count_per_thread = 32 ;
51+ size_t *n_under_curve = sycl::malloc_host<size_t >(1 , q); // Number of points fallen under the curve
52+ *n_under_curve = 0 ;
53+ auto reductor = sycl::reduction (n_under_curve, size_t (0 ), std::plus<size_t >{});
5254
53- auto reductor = sycl::reduction (&n_under_curve, size_t (0 ), std::plus<size_t >{});
5455 q.parallel_for (sycl::range<1 >(n_points / count_per_thread), event, reductor,
5556 [=](sycl::item<1 > item, auto & sum) {
5657 sycl::vec<float , 2 > r;
@@ -65,9 +66,10 @@ double estimate_pi(sycl::queue& q, size_t n_points) {
6566 }).wait_and_throw ();
6667
6768 // Step 3. Calculate approximated value of Pi
68- estimated_pi = n_under_curve / ((double )n_points) * 4.0 ;
69+ estimated_pi = * n_under_curve / ((double )n_points) * 4.0 ;
6970
7071 sycl::free (rng_ptr, q);
72+ sycl::free (n_under_curve, q);
7173
7274 return estimated_pi;
7375
0 commit comments