uxlfoundation
diff --git a/‎docs/create_new_backend.rst‎
Lines changed: 4 additions & 0 deletions b/‎docs/create_new_backend.rst‎
Lines changed: 4 additions & 0 deletions
diff --git a/‎examples/blas/compile_time_dispatching/level3/gemm_usm_mklcpu_cublas.cpp‎
Lines changed: 4 additions & 2 deletions b/‎examples/blas/compile_time_dispatching/level3/gemm_usm_mklcpu_cublas.cpp‎
Lines changed: 4 additions & 2 deletions
diff --git a/‎examples/blas/run_time_dispatching/level3/gemm_usm.cpp‎
Lines changed: 4 additions & 0 deletions b/‎examples/blas/run_time_dispatching/level3/gemm_usm.cpp‎
Lines changed: 4 additions & 0 deletions
diff --git a/‎examples/lapack/compile_time_dispatching/getrs_usm_mklcpu_cusolver.cpp‎
Lines changed: 14 additions & 12 deletions b/‎examples/lapack/compile_time_dispatching/getrs_usm_mklcpu_cusolver.cpp‎
Lines changed: 14 additions & 12 deletions
diff --git a/‎examples/lapack/run_time_dispatching/getrs_usm.cpp‎
Lines changed: 13 additions & 13 deletions b/‎examples/lapack/run_time_dispatching/getrs_usm.cpp‎
Lines changed: 13 additions & 13 deletions
diff --git a/‎examples/rng/compile_time_dispatching/uniform_usm_mklcpu_curand.cpp‎
Lines changed: 4 additions & 1 deletion b/‎examples/rng/compile_time_dispatching/uniform_usm_mklcpu_curand.cpp‎
Lines changed: 4 additions & 1 deletion
diff --git a/‎examples/rng/run_time_dispatching/uniform_usm.cpp‎
Lines changed: 4 additions & 1 deletion b/‎examples/rng/run_time_dispatching/uniform_usm.cpp‎
Lines changed: 4 additions & 1 deletion
diff --git a/‎include/oneapi/mkl/blas.hpp‎
Lines changed: 4 additions & 0 deletions b/‎include/oneapi/mkl/blas.hpp‎
Lines changed: 4 additions & 0 deletions
diff --git a/‎include/oneapi/mkl/blas/detail/blas_ct_backends.hpp‎
Lines changed: 4 additions & 0 deletions b/‎include/oneapi/mkl/blas/detail/blas_ct_backends.hpp‎
Lines changed: 4 additions & 0 deletions
diff --git a/‎include/oneapi/mkl/blas/detail/blas_loader.hpp‎
Lines changed: 4 additions & 0 deletions b/‎include/oneapi/mkl/blas/detail/blas_loader.hpp‎
Lines changed: 4 additions & 0 deletions
@@ -270,7 +270,11 @@ The following code snippet is updated for ``src/blas/backends/newlib/newlib_wrap
 
 .. code-block:: diff
 
+        #if __has_include(<sycl/sycl.hpp>)
+        #include <sycl/sycl.hpp>
+        #else
         #include <CL/sycl.hpp>
+        #endif
         
         #include "oneapi/mkl/types.hpp"
         
 
@@ -42,7 +42,11 @@
 #include <vector>
 
 // oneMKL/SYCL includes
+#if __has_include(<sycl/sycl.hpp>)
+#include <sycl/sycl.hpp>
+#else
 #include <CL/sycl.hpp>
+#endif
 #include "oneapi/mkl.hpp"
 
 // local includes
@@ -218,14 +222,12 @@ void run_gemm_example(const sycl::device &cpu_dev, const sycl::device &gpu_dev)
     // output the top 2x2 block of C matrix from GPU
     print_2x2_matrix_values(result_gpu.data(), ldC, "(GPU) C");
 
-
     sycl::free(gpu_C, gpu_queue);
     sycl::free(gpu_B, gpu_queue);
     sycl::free(gpu_A, gpu_queue);
     sycl::free(cpu_C, cpu_queue);
     sycl::free(cpu_B, cpu_queue);
     sycl::free(cpu_A, cpu_queue);
-
 }
 
 //
 
@@ -42,7 +42,11 @@
 #include <iostream>
 #include <vector>
 
+#if __has_include(<sycl/sycl.hpp>)
+#include <sycl/sycl.hpp>
+#else
 #include <CL/sycl.hpp>
+#endif
 #include "oneapi/mkl.hpp"
 
 #include "example_helper.hpp"
 
@@ -36,7 +36,11 @@
 #include <vector>
 
 // oneMKL/SYCL includes
+#if __has_include(<sycl/sycl.hpp>)
+#include <sycl/sycl.hpp>
+#else
 #include <CL/sycl.hpp>
+#endif
 #include "oneapi/mkl.hpp"
 
 // local includes
@@ -144,8 +148,8 @@ void run_getrs_example(const sycl::device& cpu_device, const sycl::device& gpu_d
     std::int64_t cpu_getrf_scratchpad_size = oneapi::mkl::lapack::getrf_scratchpad_size<float>(
         oneapi::mkl::backend_selector<oneapi::mkl::backend::mklcpu>{ cpu_queue }, m, n, lda);
     std::int64_t cpu_getrs_scratchpad_size = oneapi::mkl::lapack::getrs_scratchpad_size<float>(
-        oneapi::mkl::backend_selector<oneapi::mkl::backend::mklcpu>{ cpu_queue },
-        trans, n, nrhs, lda, ldb);
+        oneapi::mkl::backend_selector<oneapi::mkl::backend::mklcpu>{ cpu_queue }, trans, n, nrhs,
+        lda, ldb);
     float* cpu_getrf_scratchpad = sycl::malloc_device<float>(
         cpu_getrf_scratchpad_size * sizeof(float), cpu_device, cpu_context);
     float* cpu_getrs_scratchpad = sycl::malloc_device<float>(
@@ -174,8 +178,8 @@ void run_getrs_example(const sycl::device& cpu_device, const sycl::device& gpu_d
     std::int64_t gpu_getrf_scratchpad_size = oneapi::mkl::lapack::getrf_scratchpad_size<float>(
         oneapi::mkl::backend_selector<oneapi::mkl::backend::cusolver>{ gpu_queue }, m, n, lda);
     std::int64_t gpu_getrs_scratchpad_size = oneapi::mkl::lapack::getrs_scratchpad_size<float>(
-        oneapi::mkl::backend_selector<oneapi::mkl::backend::cusolver>{ gpu_queue },
-        trans, n, nrhs, lda, ldb);
+        oneapi::mkl::backend_selector<oneapi::mkl::backend::cusolver>{ gpu_queue }, trans, n, nrhs,
+        lda, ldb);
     float* gpu_getrf_scratchpad = sycl::malloc_device<float>(
         gpu_getrf_scratchpad_size * sizeof(float), gpu_device, gpu_context);
     float* gpu_getrs_scratchpad = sycl::malloc_device<float>(
@@ -196,16 +200,16 @@ void run_getrs_example(const sycl::device& cpu_device, const sycl::device& gpu_d
         oneapi::mkl::backend_selector<oneapi::mkl::backend::mklcpu>{ cpu_queue }, m, n, cpu_A, lda,
         cpu_ipiv, cpu_getrf_scratchpad, cpu_getrf_scratchpad_size);
     cpu_getrs_done = oneapi::mkl::lapack::getrs(
-        oneapi::mkl::backend_selector<oneapi::mkl::backend::mklcpu>{ cpu_queue },
-        trans, n, nrhs, cpu_A, lda, cpu_ipiv, cpu_B, ldb,
-        cpu_getrs_scratchpad, cpu_getrs_scratchpad_size, { cpu_getrf_done });
+        oneapi::mkl::backend_selector<oneapi::mkl::backend::mklcpu>{ cpu_queue }, trans, n, nrhs,
+        cpu_A, lda, cpu_ipiv, cpu_B, ldb, cpu_getrs_scratchpad, cpu_getrs_scratchpad_size,
+        { cpu_getrf_done });
     gpu_getrf_done = oneapi::mkl::lapack::getrf(
         oneapi::mkl::backend_selector<oneapi::mkl::backend::cusolver>{ gpu_queue }, m, n, gpu_A,
         lda, gpu_ipiv, gpu_getrf_scratchpad, gpu_getrf_scratchpad_size);
     gpu_getrs_done = oneapi::mkl::lapack::getrs(
-        oneapi::mkl::backend_selector<oneapi::mkl::backend::cusolver>{ gpu_queue },
-        trans, n, nrhs, gpu_A, lda, gpu_ipiv, gpu_B, ldb,
-        gpu_getrs_scratchpad, gpu_getrs_scratchpad_size, { gpu_getrf_done });
+        oneapi::mkl::backend_selector<oneapi::mkl::backend::cusolver>{ gpu_queue }, trans, n, nrhs,
+        gpu_A, lda, gpu_ipiv, gpu_B, ldb, gpu_getrs_scratchpad, gpu_getrs_scratchpad_size,
+        { gpu_getrf_done });
 
     // Wait until calculations are done
     cpu_queue.wait_and_throw();
@@ -220,7 +224,6 @@ void run_getrs_example(const sycl::device& cpu_device, const sycl::device& gpu_d
     // copy data from GPU device back to host
     gpu_queue.memcpy(result_gpu.data(), gpu_B, B_size * sizeof(float)).wait_and_throw();
 
-
     // Print results
     std::cout << "\n\t\tGETRF and GETRS parameters:" << std::endl;
     std::cout << "\t\t\ttrans = "
@@ -252,7 +255,6 @@ void run_getrs_example(const sycl::device& cpu_device, const sycl::device& gpu_d
     sycl::free(cpu_ipiv, cpu_queue);
     sycl::free(cpu_B, cpu_queue);
     sycl::free(cpu_A, cpu_queue);
-
 }
 
 //
 
@@ -37,7 +37,11 @@
 #include <vector>
 
 // oneMKL/SYCL includes
+#if __has_include(<sycl/sycl.hpp>)
+#include <sycl/sycl.hpp>
+#else
 #include <CL/sycl.hpp>
+#endif
 #include "oneapi/mkl.hpp"
 
 // local includes
@@ -66,7 +70,6 @@ void run_getrs_example(const sycl::device& device) {
     std::int64_t ipiv_size = n;
     oneapi::mkl::transpose trans = oneapi::mkl::transpose::nontrans;
 
-
     // Asynchronous error handler
     auto error_handler = [&](sycl::exception_list exceptions) {
         for (auto const& e : exceptions) {
@@ -75,17 +78,15 @@ void run_getrs_example(const sycl::device& device) {
             }
             catch (oneapi::mkl::lapack::exception const& e) {
                 // Handle LAPACK related exceptions that happened during asynchronous call
-                std::cerr
-                    << "Caught asynchronous LAPACK exception during GETRF or GETRS:"
-                    << std::endl;
+                std::cerr << "Caught asynchronous LAPACK exception during GETRF or GETRS:"
+                          << std::endl;
                 std::cerr << "\t" << e.what() << std::endl;
                 std::cerr << "\tinfo: " << e.info() << std::endl;
             }
             catch (sycl::exception const& e) {
                 // Handle not LAPACK related exceptions that happened during asynchronous call
-                std::cerr
-                    << "Caught asynchronous SYCL exception during GETRF or GETRS:"
-                    << std::endl;
+                std::cerr << "Caught asynchronous SYCL exception during GETRF or GETRS:"
+                          << std::endl;
                 std::cerr << "\t" << e.what() << std::endl;
             }
         }
@@ -114,8 +115,8 @@ void run_getrs_example(const sycl::device& device) {
 
     std::int64_t getrf_scratchpad_size =
         oneapi::mkl::lapack::getrf_scratchpad_size<float>(queue, m, n, lda);
-    std::int64_t getrs_scratchpad_size = oneapi::mkl::lapack::getrs_scratchpad_size<float>(
-        queue, trans, n, nrhs, lda, ldb);
+    std::int64_t getrs_scratchpad_size =
+        oneapi::mkl::lapack::getrs_scratchpad_size<float>(queue, trans, n, nrhs, lda, ldb);
     float* getrf_scratchpad =
         sycl::malloc_shared<float>(getrf_scratchpad_size * sizeof(float), device, context);
     float* getrs_scratchpad =
@@ -137,9 +138,9 @@ void run_getrs_example(const sycl::device& device) {
     // Execute on device
     getrf_done = oneapi::mkl::lapack::getrf(queue, m, n, dev_A, lda, dev_ipiv, getrf_scratchpad,
                                             getrf_scratchpad_size);
-    getrs_done = oneapi::mkl::lapack::getrs(queue, trans, n, nrhs, dev_A,
-                                            lda, dev_ipiv, dev_B, ldb, getrs_scratchpad,
-                                            getrs_scratchpad_size, { getrf_done });
+    getrs_done =
+        oneapi::mkl::lapack::getrs(queue, trans, n, nrhs, dev_A, lda, dev_ipiv, dev_B, ldb,
+                                   getrs_scratchpad, getrs_scratchpad_size, { getrf_done });
 
     // Wait until calculations are done
     queue.wait_and_throw();
@@ -164,7 +165,6 @@ void run_getrs_example(const sycl::device& device) {
     // output the top 2x2 block of X matrix
     print_2x2_matrix_values(B.data(), ldb, "X");
 
-
     sycl::free(getrs_scratchpad, queue);
     sycl::free(getrf_scratchpad, queue);
     sycl::free(dev_ipiv, queue);
 
@@ -37,7 +37,11 @@
 #include <vector>
 
 // oneMKL/SYCL includes
+#if __has_include(<sycl/sycl.hpp>)
+#include <sycl/sycl.hpp>
+#else
 #include <CL/sycl.hpp>
+#endif
 #include "oneapi/mkl.hpp"
 
 // local includes
@@ -155,7 +159,6 @@ void run_uniform_example(const sycl::device& cpu_dev, const sycl::device& gpu_de
 
     sycl::free(dev_gpu, gpu_queue);
     sycl::free(dev_cpu, cpu_queue);
-
 }
 
 //
 
@@ -37,7 +37,11 @@
 #include <vector>
 
 // oneMKL/SYCL includes
+#if __has_include(<sycl/sycl.hpp>)
+#include <sycl/sycl.hpp>
+#else
 #include <CL/sycl.hpp>
+#endif
 #include "oneapi/mkl.hpp"
 
 // local includes
@@ -118,7 +122,6 @@ void run_uniform_example(const sycl::device& dev) {
     std::cout << std::endl;
 
     sycl::free(dev_r, queue);
-
 }
 
 //
 
@@ -20,7 +20,11 @@
 #ifndef _ONEMKL_BLAS_HPP_
 #define _ONEMKL_BLAS_HPP_
 
+#if __has_include(<sycl/sycl.hpp>)
+#include <sycl/sycl.hpp>
+#else
 #include <CL/sycl.hpp>
+#endif
 #include <complex>
 #include <cstdint>
 
 
@@ -20,7 +20,11 @@
 #ifndef _BLAS_CT_BACKENDS_HPP__
 #define _BLAS_CT_BACKENDS_HPP__
 
+#if __has_include(<sycl/sycl.hpp>)
+#include <sycl/sycl.hpp>
+#else
 #include <CL/sycl.hpp>
+#endif
 #include <complex>
 #include <cstdint>
 
 
@@ -22,7 +22,11 @@
 
 #include <complex>
 #include <cstdint>
+#if __has_include(<sycl/sycl.hpp>)
+#include <sycl/sycl.hpp>
+#else
 #include <CL/sycl.hpp>
+#endif
 
 #include "oneapi/mkl/types.hpp"