@@ -200,18 +200,18 @@ py_accumulate_over_axis(const dpctl::tensor::usm_ndarray &src,
200200 }
201201
202202 using dpctl::tensor::offset_utils::device_allocate_and_pack;
203- const auto & ptr_size_event_tuple = device_allocate_and_pack<py::ssize_t >(
203+ auto ptr_size_event_tuple = device_allocate_and_pack<py::ssize_t >(
204204 exec_q, host_task_events, simplified_iter_shape,
205205 simplified_iter_src_strides, simplified_iter_dst_strides, acc_shape,
206206 acc_src_strides, acc_dst_strides);
207- py::ssize_t *packed_shapes_and_strides = std::get<0 >(ptr_size_event_tuple);
208- if (packed_shapes_and_strides == nullptr ) {
209- throw std::runtime_error (" Unexpected error" );
210- }
207+ auto packed_shapes_and_strides_owner =
208+ std::move (std::get<0 >(ptr_size_event_tuple));
211209 const auto ©_shapes_strides_ev = std::get<2 >(ptr_size_event_tuple);
210+ const py::ssize_t *packed_shapes_and_strides =
211+ packed_shapes_and_strides_owner.get ();
212212
213- py::ssize_t *iter_shape_and_strides = packed_shapes_and_strides;
214- py::ssize_t *acc_shapes_and_strides =
213+ const py::ssize_t *iter_shape_and_strides = packed_shapes_and_strides;
214+ const py::ssize_t *acc_shapes_and_strides =
215215 packed_shapes_and_strides + 3 * simplified_iter_shape.size ();
216216
217217 std::vector<sycl::event> all_deps;
@@ -224,14 +224,8 @@ py_accumulate_over_axis(const dpctl::tensor::usm_ndarray &src,
224224 iter_shape_and_strides, iter_src_offset, iter_dst_offset, acc_nd,
225225 acc_shapes_and_strides, dst_data, host_task_events, all_deps);
226226
227- sycl::event temp_cleanup_ev = exec_q.submit ([&](sycl::handler &cgh) {
228- cgh.depends_on (acc_ev);
229- const auto &ctx = exec_q.get_context ();
230- using dpctl::tensor::alloc_utils::sycl_free_noexcept;
231- cgh.host_task ([ctx, packed_shapes_and_strides] {
232- sycl_free_noexcept (packed_shapes_and_strides, ctx);
233- });
234- });
227+ sycl::event temp_cleanup_ev = dpctl::tensor::alloc_utils::async_smart_free (
228+ exec_q, {acc_ev}, packed_shapes_and_strides_owner);
235229 host_task_events.push_back (temp_cleanup_ev);
236230
237231 return std::make_pair (
@@ -384,18 +378,18 @@ std::pair<sycl::event, sycl::event> py_accumulate_final_axis_include_initial(
384378 }
385379
386380 using dpctl::tensor::offset_utils::device_allocate_and_pack;
387- const auto & ptr_size_event_tuple = device_allocate_and_pack<py::ssize_t >(
381+ auto ptr_size_event_tuple = device_allocate_and_pack<py::ssize_t >(
388382 exec_q, host_task_events, simplified_iter_shape,
389383 simplified_iter_src_strides, simplified_iter_dst_strides, acc_shape,
390384 acc_src_strides, acc_dst_strides);
391- py::ssize_t *packed_shapes_and_strides = std::get<0 >(ptr_size_event_tuple);
392- if (packed_shapes_and_strides == nullptr ) {
393- throw std::runtime_error (" Unexpected error" );
394- }
385+ auto packed_shapes_and_strides_owner =
386+ std::move (std::get<0 >(ptr_size_event_tuple));
395387 const auto ©_shapes_strides_ev = std::get<2 >(ptr_size_event_tuple);
388+ const py::ssize_t *packed_shapes_and_strides =
389+ packed_shapes_and_strides_owner.get ();
396390
397- py::ssize_t *iter_shape_and_strides = packed_shapes_and_strides;
398- py::ssize_t *acc_shapes_and_strides =
391+ const py::ssize_t *iter_shape_and_strides = packed_shapes_and_strides;
392+ const py::ssize_t *acc_shapes_and_strides =
399393 packed_shapes_and_strides + 3 * simplified_iter_shape.size ();
400394
401395 std::vector<sycl::event> all_deps;
@@ -408,14 +402,8 @@ std::pair<sycl::event, sycl::event> py_accumulate_final_axis_include_initial(
408402 iter_shape_and_strides, iter_src_offset, iter_dst_offset, acc_nd,
409403 acc_shapes_and_strides, dst_data, host_task_events, all_deps);
410404
411- sycl::event temp_cleanup_ev = exec_q.submit ([&](sycl::handler &cgh) {
412- cgh.depends_on (acc_ev);
413- const auto &ctx = exec_q.get_context ();
414- using dpctl::tensor::alloc_utils::sycl_free_noexcept;
415- cgh.host_task ([ctx, packed_shapes_and_strides] {
416- sycl_free_noexcept (packed_shapes_and_strides, ctx);
417- });
418- });
405+ sycl::event temp_cleanup_ev = dpctl::tensor::alloc_utils::async_smart_free (
406+ exec_q, {acc_ev}, packed_shapes_and_strides_owner);
419407 host_task_events.push_back (temp_cleanup_ev);
420408
421409 return std::make_pair (
0 commit comments