uxlfoundation · nilsfriess · Jun 11, 2025 · Jun 11, 2025 · illuhad · Jun 11, 2025
diff --git a/onemath/sycl/blas/CMakeLists.txt b/onemath/sycl/blas/CMakeLists.txt
@@ -74,13 +74,6 @@ if (SYCL_COMPILER MATCHES "adaptivecpp" OR ${CMAKE_CXX_COMPILER} MATCHES "acpp|s
       set(BLAS_ENABLE_COMPLEX OFF PARENT_SCOPE)
     endif()
   endif()
-  if (BLAS_ENABLE_USM)
-    message(STATUS "USM API is not supported on AdaptiveCpp/hipSYCL. USM API is disabled")
-    set(BLAS_ENABLE_USM OFF)
-    if (NOT ${CMAKE_CURRENT_SOURCE_DIR} STREQUAL ${CMAKE_SOURCE_DIR})
-      set(BLAS_ENABLE_USM OFF PARENT_SCOPE)
-    endif()
-  endif()
 endif()
 
 add_library(onemath_sycl_blas INTERFACE)

diff --git a/onemath/sycl/blas/include/sb_handle/handle.h b/onemath/sycl/blas/include/sb_handle/handle.h
@@ -48,23 +48,19 @@ class SB_Handle {
   using event_t = std::vector<sycl::event>;
   inline SB_Handle(queue_t q)
       :
-#ifndef __ADAPTIVECPP__
         tempMemPool_(nullptr),
-#endif
         q_(q),
         workGroupSize_(helper::get_work_group_size(q)),
         localMemorySupport_(helper::has_local_memory(q)),
         computeUnits_(helper::get_num_compute_units(q)) {
   }
 
-#ifndef __ADAPTIVECPP__
   inline SB_Handle(Temp_Mem_Pool* tmp)
       : tempMemPool_(tmp),
         q_(tmp->get_queue()),
         workGroupSize_(helper::get_work_group_size(q_)),
         localMemorySupport_(helper::has_local_memory(q_)),
         computeUnits_(helper::get_num_compute_units(q_)) {}
-#endif
 
   template <helper::AllocType alloc, typename value_t>
   typename std::enable_if<
@@ -192,9 +188,7 @@ class SB_Handle {
   const size_t workGroupSize_;
   const bool localMemorySupport_;
   const size_t computeUnits_;
-#ifndef __ADAPTIVECPP__
   Temp_Mem_Pool* tempMemPool_;
-#endif
 };
 
 }  // namespace blas

diff --git a/onemath/sycl/blas/include/sb_handle/temp_memory_pool.h b/onemath/sycl/blas/include/sb_handle/temp_memory_pool.h
@@ -23,7 +23,6 @@
 #ifndef TEMP_MEMORY_POOL_H
 #define TEMP_MEMORY_POOL_H
 
-#ifndef __ADAPTIVECPP__
 #include <map>
 #include <mutex>
 
@@ -112,6 +111,4 @@ class Temp_Mem_Pool {
 };
 }  // namespace blas
 
-#endif  // __ADAPTIVECPP__
-
 #endif
diff --git a/onemath/sycl/blas/src/interface/blas1_interface.hpp b/onemath/sycl/blas/src/interface/blas1_interface.hpp
@@ -149,7 +149,6 @@ typename sb_handle_t::event_t _sdsdot(
     sb_handle_t &sb_handle, index_t _N, float sb, container_0_t _vx,
     increment_t _incx, container_1_t _vy, increment_t _incy, container_2_t _rs,
     const typename sb_handle_t::event_t &_dependencies) {
-#ifndef __ADAPTIVECPP__
   if (!_N) {
     using element_t = typename ValueType<container_2_t>::type;
     sb_handle.wait(_dependencies);
@@ -167,11 +166,6 @@ typename sb_handle_t::event_t _sdsdot(
     auto ret = sb_handle.execute(assignOp2, dotOp);
     return blas::concatenate_vectors(dotOp, ret);
   }
-#else
-  throw std::runtime_error(
-      "Sdsdot is not supported with AdaptiveCpp as it uses SYCL 2020 "
-      "reduction.");
-#endif
 }
 
 /**
@@ -959,7 +953,6 @@ typename ValueType<container_0_t>::type _dot(
     sb_handle_t &sb_handle, index_t _N, container_0_t _vx, increment_t _incx,
     container_1_t _vy, increment_t _incy,
     const typename sb_handle_t::event_t &_dependencies) {
-#ifndef __ADAPTIVECPP__
   constexpr bool is_usm = std::is_pointer<container_0_t>::value;
   using element_t = typename ValueType<container_0_t>::type;
   element_t res{0};
@@ -982,10 +975,6 @@ typename ValueType<container_0_t>::type _dot(
                             : helper::AllocType::buffer>(gpu_res,
                                                          sb_handle.get_queue());
   return res;
-#else
-  throw std::runtime_error(
-      "Dot is not supported with AdaptiveCpp as it uses SYCL 2020 reduction.");
-#endif
 }
 
 /**
@@ -1109,7 +1098,6 @@ template <typename sb_handle_t, typename container_t, typename index_t,
 typename ValueType<container_t>::type _asum(
     sb_handle_t &sb_handle, index_t _N, container_t _vx, increment_t _incx,
     const typename sb_handle_t::event_t &_dependencies) {
-#ifndef __ADAPTIVECPP__
   constexpr bool is_usm = std::is_pointer<container_t>::value;
   using element_t = typename ValueType<container_t>::type;
   auto res = std::vector<element_t>(1, element_t(0));
@@ -1130,10 +1118,6 @@ typename ValueType<container_t>::type _asum(
                                   : helper::AllocType::buffer>(
       gpu_res, sb_handle.get_queue());
   return res[0];
-#else
-  throw std::runtime_error(
-      "Asum is not supported with AdaptiveCpp as it uses SYCL 2020 reduction.");
-#endif
 }
 
 /**
@@ -1149,7 +1133,6 @@ template <typename sb_handle_t, typename container_t, typename index_t,
 typename ValueType<container_t>::type _nrm2(
     sb_handle_t &sb_handle, index_t _N, container_t _vx, increment_t _incx,
     const typename sb_handle_t::event_t &_dependencies) {
-#ifndef __ADAPTIVECPP__
   constexpr bool is_usm = std::is_pointer<container_t>::value;
   using element_t = typename ValueType<container_t>::type;
   auto res = std::vector<element_t>(1, element_t(0));
@@ -1169,10 +1152,6 @@ typename ValueType<container_t>::type _nrm2(
                                   : helper::AllocType::buffer>(
       gpu_res, sb_handle.get_queue());
   return res[0];
-#else
-  throw std::runtime_error(
-      "Nrm2 is not supported with AdaptiveCpp as it uses SYCL 2020 reduction.");
-#endif
 }
 
 }  // namespace internal

diff --git a/onemath/sycl/blas/src/interface/blas2_interface.hpp b/onemath/sycl/blas/src/interface/blas2_interface.hpp
@@ -355,9 +355,6 @@ typename sb_handle_t::event_t _trsv_impl(
     sb_handle_t& sb_handle, index_t _N, container_t0 _mA, index_t _lda,
     container_t1 _vx, increment_t _incx,
     const typename sb_handle_t::event_t& _dependencies) {
-#ifdef __ADAPTIVECPP__
-  throw std::runtime_error("trsv requires SYCL 2020");
-#else
   static_assert(subgroup_size % subgroups == 0,
                 "`subgroups` needs to be a multiple of `subgroup_size`.");
   using one = constant<increment_t, const_val::one>;
@@ -404,7 +401,6 @@ typename sb_handle_t::event_t _trsv_impl(
   sb_handle.release_temp_mem(ret, sync_buffer);
 
   return ret;
-#endif
 }
 
 /*! _SYMV.
@@ -746,9 +742,6 @@ typename sb_handle_t::event_t _tbsv_impl(
     sb_handle_t& sb_handle, index_t _N, index_t _K, container_t0 _mA,
     index_t _lda, container_t1 _vx, increment_t _incx,
     const typename sb_handle_t::event_t& _dependencies) {
-#ifdef __ADAPTIVECPP__
-  throw std::runtime_error("tbsv requires SYCL 2020");
-#else
   static_assert(subgroup_size % subgroups == 0,
                 "`subgroups` needs to be a multiple of `subgroup_size`.");
 
@@ -799,7 +792,6 @@ typename sb_handle_t::event_t _tbsv_impl(
   sb_handle.release_temp_mem(ret, sync_buffer);
 
   return ret;
-#endif
 }
 
 template <uint32_t subgroup_size, uint32_t subgroups, uplo_type uplo,
@@ -809,9 +801,7 @@ template <uint32_t subgroup_size, uint32_t subgroups, uplo_type uplo,
 typename sb_handle_t::event_t _tpsv_impl(
     sb_handle_t& sb_handle, index_t _N, container_t0 _mA, container_t1 _vx,
     increment_t _incx, const typename sb_handle_t::event_t& _dependencies) {
-#ifdef __ADAPTIVECPP__
   throw std::runtime_error("tpsv requires SYCL 2020");
-#else
   static_assert(subgroup_size % subgroups == 0,
                 "`subgroups` needs to be a multiple of `subgroup_size`.");
 
@@ -864,7 +854,6 @@ typename sb_handle_t::event_t _tpsv_impl(
   sb_handle.release_temp_mem(ret, sync_buffer);
 
   return ret;
-#endif
 }
 
 /**** RANK 1 MODIFICATION ****/

diff --git a/onemath/sycl/blas/src/operations/blas2/txsv.hpp b/onemath/sycl/blas/src/operations/blas2/txsv.hpp
@@ -98,7 +98,6 @@ ONEMATH_SYCL_BLAS_INLINE
          is_upper, is_transposed, is_unitdiag>::eval(local_memory_t local_mem,
                                                      sycl::nd_item<1> ndItem) {
   value_t ret = 0;
-#ifndef __ADAPTIVECPP__
 
   constexpr bool is_forward =
       (is_upper && is_transposed) || (!is_upper && !is_transposed);
@@ -291,7 +290,6 @@ ONEMATH_SYCL_BLAS_INLINE
 
   sycl::atomic_fence(sycl::memory_order::seq_cst, sycl::memory_scope::device);
 
-#endif
   return ret;
 }
 

diff --git a/onemath/sycl/blas/src/sb_handle/handle.hpp b/onemath/sycl/blas/src/sb_handle/handle.hpp
@@ -42,11 +42,9 @@ typename std::enable_if<
     alloc == helper::AllocType::buffer,
     typename helper::AllocHelper<value_t, alloc>::type>::type
 SB_Handle::acquire_temp_mem(size_t size) {
-#ifndef __ADAPTIVECPP__
   if (tempMemPool_ != nullptr)
     return tempMemPool_->acquire_buff_mem<value_t>(size);
   else
-#endif
     return make_sycl_iterator_buffer<value_t>(size);
 }
 
@@ -58,11 +56,9 @@ typename std::enable_if<
     typename SB_Handle::event_t>::type
 SB_Handle::release_temp_mem(const typename SB_Handle::event_t& dependencies,
                             const container_t& mem) {
-#ifndef __ADAPTIVECPP__
   if (tempMemPool_ != nullptr)
     return tempMemPool_->release_buff_mem(dependencies, mem);
   else
-#endif
     return {};
 }
 
@@ -92,7 +88,11 @@ SB_Handle::release_temp_mem(const typename SB_Handle::event_t& dependencies,
     sycl::context context = q_.get_context();
     return {q_.submit([&](sycl::handler& cgh) {
       cgh.depends_on(dependencies);
+#ifndef __ADAPTIVECPP__
       cgh.host_task([=]() { sycl::free(mem, context); });
+#else
+      cgh.AdaptiveCpp_enqueue_custom_operation([=](sycl::interop_handle &) { sycl::free(mem, context); });
+#endif
     })};
   }
 }

diff --git a/onemath/sycl/blas/src/sb_handle/temp_memory_pool.hpp b/onemath/sycl/blas/src/sb_handle/temp_memory_pool.hpp
@@ -1,6 +1,5 @@
 #ifndef TEMP_MEMORY_POOL_HPP
 #define TEMP_MEMORY_POOL_HPP
-#ifndef __ADAPTIVECPP__
 #include "helper.h"
 
 namespace blas {
@@ -54,7 +53,11 @@ typename Temp_Mem_Pool::event_t Temp_Mem_Pool::release_buff_mem(
     const container_t& mem) {
   return {q_.submit([&](sycl::handler& cgh) {
     cgh.depends_on(dependencies);
+#ifndef __ADAPTIVECPP__
     cgh.host_task([&, mem]() { release_buff_mem_(mem); });
+#else
+    cgh.AdaptiveCpp_enqueue_custom_operation([&, mem](sycl::interop_handle &) { release_buff_mem_(mem); });
+#endif
   })};
 }
 
@@ -110,10 +113,13 @@ typename Temp_Mem_Pool::event_t Temp_Mem_Pool::release_usm_mem(
     const container_t& mem) {
   return {q_.submit([&](sycl::handler& cgh) {
     cgh.depends_on(dependencies);
+#ifndef __ADAPTIVECPP__
     cgh.host_task([&, mem]() { release_usm_mem_(mem); });
+#else
+    cgh.AdaptiveCpp_enqueue_custom_operation([&, mem](sycl::interop_handle &) { release_usm_mem_(mem); });
+#endif
   })};
 }
 }
 #endif  // SB_ENABLE_USM
-#endif  // __ADAPTIVECPP__
 #endif