@@ -574,7 +574,7 @@ template <typename inputT,
574574 typename ScanOpT,
575575 bool include_initial>
576576sycl::event inclusive_scan_iter_1d (sycl::queue &exec_q,
577- const std::size_t wg_size,
577+ const std::uint32_t wg_size,
578578 const std::size_t n_elems,
579579 const inputT *input,
580580 outputT *output,
@@ -768,7 +768,7 @@ accumulate_1d_contig_impl(sycl::queue &q,
768768 const sycl::device &dev = q.get_device ();
769769 if (dev.has (sycl::aspect::cpu)) {
770770 constexpr nwiT n_wi_for_cpu = 8 ;
771- const std::size_t wg_size = 256 ;
771+ const std::uint32_t wg_size = 256 ;
772772 comp_ev = inclusive_scan_iter_1d<srcT, dstT, n_wi_for_cpu, NoOpIndexerT,
773773 transformerT, AccumulateOpT,
774774 include_initial>(
@@ -779,7 +779,7 @@ accumulate_1d_contig_impl(sycl::queue &q,
779779 constexpr nwiT n_wi_for_gpu = 4 ;
780780 // base_scan_striped algorithm does not execute correctly
781781 // on HIP device with wg_size > 64
782- const std::size_t wg_size =
782+ const std::uint32_t wg_size =
783783 (q.get_backend () == sycl::backend::ext_oneapi_hip) ? 64 : 256 ;
784784 comp_ev = inclusive_scan_iter_1d<srcT, dstT, n_wi_for_gpu, NoOpIndexerT,
785785 transformerT, AccumulateOpT,
@@ -812,7 +812,7 @@ template <typename inputT,
812812 typename ScanOpT,
813813 bool include_initial>
814814sycl::event inclusive_scan_iter (sycl::queue &exec_q,
815- const std::size_t wg_size,
815+ const std::uint32_t wg_size,
816816 const std::size_t iter_nelems,
817817 const std::size_t acc_nelems,
818818 const inputT *input,
@@ -1173,7 +1173,7 @@ accumulate_strided_impl(sycl::queue &q,
11731173 sycl::event comp_ev;
11741174 if (dev.has (sycl::aspect::cpu)) {
11751175 constexpr nwiT n_wi_for_cpu = 8 ;
1176- const std::size_t wg_size = 256 ;
1176+ const std::uint32_t wg_size = 256 ;
11771177 comp_ev =
11781178 inclusive_scan_iter<srcT, dstT, n_wi_for_cpu, InpIndexerT,
11791179 OutIndexerT, InpIndexerT, OutIndexerT,
@@ -1186,7 +1186,7 @@ accumulate_strided_impl(sycl::queue &q,
11861186 constexpr nwiT n_wi_for_gpu = 4 ;
11871187 // base_scan_striped algorithm does not execute correctly
11881188 // on HIP device with wg_size > 64
1189- const std::size_t wg_size =
1189+ const std::uint32_t wg_size =
11901190 (q.get_backend () == sycl::backend::ext_oneapi_hip) ? 64 : 256 ;
11911191 comp_ev =
11921192 inclusive_scan_iter<srcT, dstT, n_wi_for_gpu, InpIndexerT,
@@ -1232,7 +1232,7 @@ std::size_t cumsum_val_contig_impl(sycl::queue &q,
12321232 const sycl::device &dev = q.get_device ();
12331233 if (dev.has (sycl::aspect::cpu)) {
12341234 constexpr nwiT n_wi_for_cpu = 8 ;
1235- const std::size_t wg_size = 256 ;
1235+ const std::uint32_t wg_size = 256 ;
12361236 comp_ev = inclusive_scan_iter_1d<maskT, cumsumT, n_wi_for_cpu,
12371237 NoOpIndexerT, transformerT,
12381238 AccumulateOpT, include_initial>(
@@ -1243,7 +1243,7 @@ std::size_t cumsum_val_contig_impl(sycl::queue &q,
12431243 constexpr nwiT n_wi_for_gpu = 4 ;
12441244 // base_scan_striped algorithm does not execute correctly
12451245 // on HIP device with wg_size > 64
1246- const std::size_t wg_size =
1246+ const std::uint32_t wg_size =
12471247 (q.get_backend () == sycl::backend::ext_oneapi_hip) ? 64 : 256 ;
12481248 comp_ev = inclusive_scan_iter_1d<maskT, cumsumT, n_wi_for_gpu,
12491249 NoOpIndexerT, transformerT,
@@ -1346,7 +1346,7 @@ cumsum_val_strided_impl(sycl::queue &q,
13461346 sycl::event comp_ev;
13471347 if (dev.has (sycl::aspect::cpu)) {
13481348 constexpr nwiT n_wi_for_cpu = 8 ;
1349- const std::size_t wg_size = 256 ;
1349+ const std::uint32_t wg_size = 256 ;
13501350 comp_ev = inclusive_scan_iter_1d<maskT, cumsumT, n_wi_for_cpu,
13511351 StridedIndexerT, transformerT,
13521352 AccumulateOpT, include_initial>(
@@ -1357,7 +1357,7 @@ cumsum_val_strided_impl(sycl::queue &q,
13571357 constexpr nwiT n_wi_for_gpu = 4 ;
13581358 // base_scan_striped algorithm does not execute correctly
13591359 // on HIP device with wg_size > 64
1360- const std::size_t wg_size =
1360+ const std::uint32_t wg_size =
13611361 (q.get_backend () == sycl::backend::ext_oneapi_hip) ? 64 : 256 ;
13621362 comp_ev = inclusive_scan_iter_1d<maskT, cumsumT, n_wi_for_gpu,
13631363 StridedIndexerT, transformerT,
0 commit comments