@@ -60,9 +60,9 @@ template <typename inputT, typename outputT> struct NonZeroIndicator
6060
6161 outputT operator ()(const inputT &val) const
6262 {
63- constexpr outputT out_one (1 );
64- constexpr outputT out_zero (0 );
65- constexpr inputT val_zero (0 );
63+ static constexpr outputT out_one (1 );
64+ static constexpr outputT out_zero (0 );
65+ static constexpr inputT val_zero (0 );
6666
6767 return (val == val_zero) ? out_zero : out_one;
6868 }
@@ -583,7 +583,7 @@ sycl::event update_local_chunks_1d(sycl::queue &exec_q,
583583 cgh.depends_on (dependent_event);
584584 cgh.use_kernel_bundle (kb);
585585
586- constexpr nwiT updates_per_wi = n_wi;
586+ static constexpr nwiT updates_per_wi = n_wi;
587587 const std::size_t n_items =
588588 ceiling_quotient<std::size_t >(src_size, sg_size * n_wi) * sg_size;
589589
@@ -594,8 +594,8 @@ sycl::event update_local_chunks_1d(sycl::queue &exec_q,
594594 cgh.parallel_for <UpdateKernelName>(
595595 ndRange,
596596 [chunk_size, src, src_size, local_scans](sycl::nd_item<1 > ndit) {
597- constexpr ScanOpT scan_op{};
598- constexpr outputT identity =
597+ static constexpr ScanOpT scan_op{};
598+ static constexpr outputT identity =
599599 su_ns::Identity<ScanOpT, outputT>::value;
600600
601601 const std::uint32_t lws = ndit.get_local_range (0 );
@@ -640,16 +640,17 @@ sycl::event inclusive_scan_iter_1d(sycl::queue &exec_q,
640640 std::vector<sycl::event> &host_tasks,
641641 const std::vector<sycl::event> &depends = {})
642642{
643- constexpr ScanOpT scan_op{};
644- constexpr outputT identity = su_ns::Identity<ScanOpT, outputT>::value;
643+ static constexpr ScanOpT scan_op{};
644+ static constexpr outputT identity =
645+ su_ns::Identity<ScanOpT, outputT>::value;
645646
646- constexpr std::size_t _iter_nelems = 1 ;
647+ static constexpr std::size_t _iter_nelems = 1 ;
647648
648649 using IterIndexerT = dpctl::tensor::offset_utils::TwoZeroOffsets_Indexer;
649- constexpr IterIndexerT _no_op_iter_indexer{};
650+ static constexpr IterIndexerT _no_op_iter_indexer{};
650651
651652 using NoOpIndexerT = dpctl::tensor::offset_utils::NoOpIndexer;
652- constexpr NoOpIndexerT _no_op_indexer{};
653+ static constexpr NoOpIndexerT _no_op_indexer{};
653654
654655 std::size_t n_groups;
655656 sycl::event inc_scan_phase1_ev =
@@ -687,7 +688,7 @@ sycl::event inclusive_scan_iter_1d(sycl::queue &exec_q,
687688 outputT *local_scans = temp;
688689
689690 using NoOpTransformerT = NoOpTransformer<outputT>;
690- constexpr NoOpTransformerT _no_op_transformer{};
691+ static constexpr NoOpTransformerT _no_op_transformer{};
691692 std::size_t size_to_update = n_elems;
692693 while (n_groups_ > 1 ) {
693694
@@ -761,16 +762,16 @@ accumulate_1d_contig_impl(sycl::queue &q,
761762 dstT *dst_data_ptr = reinterpret_cast <dstT *>(dst);
762763
763764 using NoOpIndexerT = dpctl::tensor::offset_utils::NoOpIndexer;
764- constexpr NoOpIndexerT flat_indexer{};
765- constexpr transformerT transformer{};
765+ static constexpr NoOpIndexerT flat_indexer{};
766+ static constexpr transformerT transformer{};
766767
767- constexpr std::size_t s0 = 0 ;
768- constexpr std::size_t s1 = 1 ;
768+ static constexpr std::size_t s0 = 0 ;
769+ static constexpr std::size_t s1 = 1 ;
769770
770771 sycl::event comp_ev;
771772 const sycl::device &dev = q.get_device ();
772773 if (dev.has (sycl::aspect::cpu)) {
773- constexpr nwiT n_wi_for_cpu = 8 ;
774+ static constexpr nwiT n_wi_for_cpu = 8 ;
774775 const std::uint32_t wg_size = 256 ;
775776 comp_ev = inclusive_scan_iter_1d<srcT, dstT, n_wi_for_cpu, NoOpIndexerT,
776777 transformerT, AccumulateOpT,
@@ -779,7 +780,7 @@ accumulate_1d_contig_impl(sycl::queue &q,
779780 flat_indexer, transformer, host_tasks, depends);
780781 }
781782 else {
782- constexpr nwiT n_wi_for_gpu = 4 ;
783+ static constexpr nwiT n_wi_for_gpu = 4 ;
783784 // base_scan_striped algorithm does not execute correctly
784785 // on HIP device with wg_size > 64
785786 const std::uint32_t wg_size =
@@ -829,7 +830,7 @@ sycl::event final_update_local_chunks(sycl::queue &exec_q,
829830 const std::uint32_t sg_size = krn.template get_info <
830831 sycl::info::kernel_device_specific::max_sub_group_size>(dev);
831832
832- constexpr nwiT updates_per_wi = n_wi;
833+ static constexpr nwiT updates_per_wi = n_wi;
833834 const std::size_t updates_per_sg = sg_size * updates_per_wi;
834835 const std::size_t update_nelems =
835836 ceiling_quotient (src_size, updates_per_sg) * sg_size;
@@ -845,8 +846,8 @@ sycl::event final_update_local_chunks(sycl::queue &exec_q,
845846 cgh.parallel_for <UpdateKernelName>(
846847 ndRange, [chunk_size, src_size, local_stride, src, local_scans,
847848 out_iter_indexer, out_indexer](sycl::nd_item<2 > ndit) {
848- constexpr ScanOpT scan_op{};
849- constexpr outputT identity =
849+ static constexpr ScanOpT scan_op{};
850+ static constexpr outputT identity =
850851 su_ns::Identity<ScanOpT, outputT>::value;
851852
852853 const std::uint32_t lws = ndit.get_local_range (1 );
@@ -898,8 +899,8 @@ sycl::event update_local_chunks(sycl::queue &exec_q,
898899 std::size_t local_stride,
899900 sycl::event dependent_event)
900901{
901- constexpr NoOpIndexer out_indexer{};
902- constexpr NoOpIndexer iter_out_indexer{};
902+ static constexpr NoOpIndexer out_indexer{};
903+ static constexpr NoOpIndexer iter_out_indexer{};
903904
904905 return final_update_local_chunks<UpdateKernelName, outputT, n_wi,
905906 NoOpIndexer, NoOpIndexer, ScanOpT>(
@@ -933,8 +934,9 @@ sycl::event inclusive_scan_iter(sycl::queue &exec_q,
933934 std::vector<sycl::event> &host_tasks,
934935 const std::vector<sycl::event> &depends = {})
935936{
936- constexpr ScanOpT scan_op{};
937- constexpr outputT identity = su_ns::Identity<ScanOpT, outputT>::value;
937+ static constexpr ScanOpT scan_op{};
938+ static constexpr outputT identity =
939+ su_ns::Identity<ScanOpT, outputT>::value;
938940
939941 using IterIndexerT =
940942 dpctl::tensor::offset_utils::TwoOffsets_CombinedIndexer<
@@ -977,9 +979,9 @@ sycl::event inclusive_scan_iter(sycl::queue &exec_q,
977979 outputT *local_scans = temp;
978980
979981 using NoOpIndexerT = dpctl::tensor::offset_utils::NoOpIndexer;
980- constexpr NoOpIndexerT _no_op_indexer{};
982+ static constexpr NoOpIndexerT _no_op_indexer{};
981983 using NoOpTransformerT = NoOpTransformer<outputT>;
982- constexpr NoOpTransformerT _no_op_transformer{};
984+ static constexpr NoOpTransformerT _no_op_transformer{};
983985 std::size_t size_to_update = acc_nelems;
984986
985987 {
@@ -1142,15 +1144,15 @@ accumulate_strided_impl(sycl::queue &q,
11421144 iter_shape_strides,
11431145 iter_shape_strides + 2 * iter_nd};
11441146
1145- constexpr transformerT transformer{};
1147+ static constexpr transformerT transformer{};
11461148
1147- constexpr std::size_t s0 = 0 ;
1148- constexpr std::size_t s1 = 1 ;
1149+ static constexpr std::size_t s0 = 0 ;
1150+ static constexpr std::size_t s1 = 1 ;
11491151
11501152 const sycl::device &dev = q.get_device ();
11511153 sycl::event comp_ev;
11521154 if (dev.has (sycl::aspect::cpu)) {
1153- constexpr nwiT n_wi_for_cpu = 8 ;
1155+ static constexpr nwiT n_wi_for_cpu = 8 ;
11541156 const std::uint32_t wg_size = 256 ;
11551157 comp_ev =
11561158 inclusive_scan_iter<srcT, dstT, n_wi_for_cpu, InpIndexerT,
@@ -1161,7 +1163,7 @@ accumulate_strided_impl(sycl::queue &q,
11611163 out_axis_indexer, transformer, host_tasks, depends);
11621164 }
11631165 else {
1164- constexpr nwiT n_wi_for_gpu = 4 ;
1166+ static constexpr nwiT n_wi_for_gpu = 4 ;
11651167 // base_scan_striped algorithm does not execute correctly
11661168 // on HIP device with wg_size > 64
11671169 const std::uint32_t wg_size =
@@ -1198,18 +1200,18 @@ std::size_t cumsum_val_contig_impl(sycl::queue &q,
11981200 cumsumT *cumsum_data_ptr = reinterpret_cast <cumsumT *>(cumsum);
11991201
12001202 using NoOpIndexerT = dpctl::tensor::offset_utils::NoOpIndexer;
1201- constexpr NoOpIndexerT flat_indexer{};
1202- constexpr transformerT transformer{};
1203+ static constexpr NoOpIndexerT flat_indexer{};
1204+ static constexpr transformerT transformer{};
12031205
1204- constexpr std::size_t s0 = 0 ;
1205- constexpr std::size_t s1 = 1 ;
1206- constexpr bool include_initial = false ;
1206+ static constexpr std::size_t s0 = 0 ;
1207+ static constexpr std::size_t s1 = 1 ;
1208+ static constexpr bool include_initial = false ;
12071209 using AccumulateOpT = sycl::plus<cumsumT>;
12081210
12091211 sycl::event comp_ev;
12101212 const sycl::device &dev = q.get_device ();
12111213 if (dev.has (sycl::aspect::cpu)) {
1212- constexpr nwiT n_wi_for_cpu = 8 ;
1214+ static constexpr nwiT n_wi_for_cpu = 8 ;
12131215 const std::uint32_t wg_size = 256 ;
12141216 comp_ev = inclusive_scan_iter_1d<maskT, cumsumT, n_wi_for_cpu,
12151217 NoOpIndexerT, transformerT,
@@ -1218,7 +1220,7 @@ std::size_t cumsum_val_contig_impl(sycl::queue &q,
12181220 flat_indexer, transformer, host_tasks, depends);
12191221 }
12201222 else {
1221- constexpr nwiT n_wi_for_gpu = 4 ;
1223+ static constexpr nwiT n_wi_for_gpu = 4 ;
12221224 // base_scan_striped algorithm does not execute correctly
12231225 // on HIP device with wg_size > 64
12241226 const std::uint32_t wg_size =
@@ -1313,17 +1315,17 @@ cumsum_val_strided_impl(sycl::queue &q,
13131315
13141316 using StridedIndexerT = dpctl::tensor::offset_utils::StridedIndexer;
13151317 const StridedIndexerT strided_indexer{nd, 0 , shape_strides};
1316- constexpr transformerT transformer{};
1318+ static constexpr transformerT transformer{};
13171319
1318- constexpr std::size_t s0 = 0 ;
1319- constexpr std::size_t s1 = 1 ;
1320- constexpr bool include_initial = false ;
1320+ static constexpr std::size_t s0 = 0 ;
1321+ static constexpr std::size_t s1 = 1 ;
1322+ static constexpr bool include_initial = false ;
13211323 using AccumulateOpT = sycl::plus<cumsumT>;
13221324
13231325 const sycl::device &dev = q.get_device ();
13241326 sycl::event comp_ev;
13251327 if (dev.has (sycl::aspect::cpu)) {
1326- constexpr nwiT n_wi_for_cpu = 8 ;
1328+ static constexpr nwiT n_wi_for_cpu = 8 ;
13271329 const std::uint32_t wg_size = 256 ;
13281330 comp_ev = inclusive_scan_iter_1d<maskT, cumsumT, n_wi_for_cpu,
13291331 StridedIndexerT, transformerT,
@@ -1332,7 +1334,7 @@ cumsum_val_strided_impl(sycl::queue &q,
13321334 strided_indexer, transformer, host_tasks, depends);
13331335 }
13341336 else {
1335- constexpr nwiT n_wi_for_gpu = 4 ;
1337+ static constexpr nwiT n_wi_for_gpu = 4 ;
13361338 // base_scan_striped algorithm does not execute correctly
13371339 // on HIP device with wg_size > 64
13381340 const std::uint32_t wg_size =
0 commit comments