Skip to content

Commit 6976378

Browse files
authored
Refactor sycl header file (#199)
1 parent e9ea138 commit 6976378

File tree

272 files changed

+1106
-43
lines changed

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

272 files changed

+1106
-43
lines changed

docs/create_new_backend.rst

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -270,7 +270,11 @@ The following code snippet is updated for ``src/blas/backends/newlib/newlib_wrap
270270

271271
.. code-block:: diff
272272
273+
#if __has_include(<sycl/sycl.hpp>)
274+
#include <sycl/sycl.hpp>
275+
#else
273276
#include <CL/sycl.hpp>
277+
#endif
274278
275279
#include "oneapi/mkl/types.hpp"
276280

examples/blas/compile_time_dispatching/level3/gemm_usm_mklcpu_cublas.cpp

Lines changed: 4 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -42,7 +42,11 @@
4242
#include <vector>
4343

4444
// oneMKL/SYCL includes
45+
#if __has_include(<sycl/sycl.hpp>)
46+
#include <sycl/sycl.hpp>
47+
#else
4548
#include <CL/sycl.hpp>
49+
#endif
4650
#include "oneapi/mkl.hpp"
4751

4852
// local includes
@@ -218,14 +222,12 @@ void run_gemm_example(const sycl::device &cpu_dev, const sycl::device &gpu_dev)
218222
// output the top 2x2 block of C matrix from GPU
219223
print_2x2_matrix_values(result_gpu.data(), ldC, "(GPU) C");
220224

221-
222225
sycl::free(gpu_C, gpu_queue);
223226
sycl::free(gpu_B, gpu_queue);
224227
sycl::free(gpu_A, gpu_queue);
225228
sycl::free(cpu_C, cpu_queue);
226229
sycl::free(cpu_B, cpu_queue);
227230
sycl::free(cpu_A, cpu_queue);
228-
229231
}
230232

231233
//

examples/blas/run_time_dispatching/level3/gemm_usm.cpp

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -42,7 +42,11 @@
4242
#include <iostream>
4343
#include <vector>
4444

45+
#if __has_include(<sycl/sycl.hpp>)
46+
#include <sycl/sycl.hpp>
47+
#else
4548
#include <CL/sycl.hpp>
49+
#endif
4650
#include "oneapi/mkl.hpp"
4751

4852
#include "example_helper.hpp"

examples/lapack/compile_time_dispatching/getrs_usm_mklcpu_cusolver.cpp

Lines changed: 14 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -36,7 +36,11 @@
3636
#include <vector>
3737

3838
// oneMKL/SYCL includes
39+
#if __has_include(<sycl/sycl.hpp>)
40+
#include <sycl/sycl.hpp>
41+
#else
3942
#include <CL/sycl.hpp>
43+
#endif
4044
#include "oneapi/mkl.hpp"
4145

4246
// local includes
@@ -144,8 +148,8 @@ void run_getrs_example(const sycl::device& cpu_device, const sycl::device& gpu_d
144148
std::int64_t cpu_getrf_scratchpad_size = oneapi::mkl::lapack::getrf_scratchpad_size<float>(
145149
oneapi::mkl::backend_selector<oneapi::mkl::backend::mklcpu>{ cpu_queue }, m, n, lda);
146150
std::int64_t cpu_getrs_scratchpad_size = oneapi::mkl::lapack::getrs_scratchpad_size<float>(
147-
oneapi::mkl::backend_selector<oneapi::mkl::backend::mklcpu>{ cpu_queue },
148-
trans, n, nrhs, lda, ldb);
151+
oneapi::mkl::backend_selector<oneapi::mkl::backend::mklcpu>{ cpu_queue }, trans, n, nrhs,
152+
lda, ldb);
149153
float* cpu_getrf_scratchpad = sycl::malloc_device<float>(
150154
cpu_getrf_scratchpad_size * sizeof(float), cpu_device, cpu_context);
151155
float* cpu_getrs_scratchpad = sycl::malloc_device<float>(
@@ -174,8 +178,8 @@ void run_getrs_example(const sycl::device& cpu_device, const sycl::device& gpu_d
174178
std::int64_t gpu_getrf_scratchpad_size = oneapi::mkl::lapack::getrf_scratchpad_size<float>(
175179
oneapi::mkl::backend_selector<oneapi::mkl::backend::cusolver>{ gpu_queue }, m, n, lda);
176180
std::int64_t gpu_getrs_scratchpad_size = oneapi::mkl::lapack::getrs_scratchpad_size<float>(
177-
oneapi::mkl::backend_selector<oneapi::mkl::backend::cusolver>{ gpu_queue },
178-
trans, n, nrhs, lda, ldb);
181+
oneapi::mkl::backend_selector<oneapi::mkl::backend::cusolver>{ gpu_queue }, trans, n, nrhs,
182+
lda, ldb);
179183
float* gpu_getrf_scratchpad = sycl::malloc_device<float>(
180184
gpu_getrf_scratchpad_size * sizeof(float), gpu_device, gpu_context);
181185
float* gpu_getrs_scratchpad = sycl::malloc_device<float>(
@@ -196,16 +200,16 @@ void run_getrs_example(const sycl::device& cpu_device, const sycl::device& gpu_d
196200
oneapi::mkl::backend_selector<oneapi::mkl::backend::mklcpu>{ cpu_queue }, m, n, cpu_A, lda,
197201
cpu_ipiv, cpu_getrf_scratchpad, cpu_getrf_scratchpad_size);
198202
cpu_getrs_done = oneapi::mkl::lapack::getrs(
199-
oneapi::mkl::backend_selector<oneapi::mkl::backend::mklcpu>{ cpu_queue },
200-
trans, n, nrhs, cpu_A, lda, cpu_ipiv, cpu_B, ldb,
201-
cpu_getrs_scratchpad, cpu_getrs_scratchpad_size, { cpu_getrf_done });
203+
oneapi::mkl::backend_selector<oneapi::mkl::backend::mklcpu>{ cpu_queue }, trans, n, nrhs,
204+
cpu_A, lda, cpu_ipiv, cpu_B, ldb, cpu_getrs_scratchpad, cpu_getrs_scratchpad_size,
205+
{ cpu_getrf_done });
202206
gpu_getrf_done = oneapi::mkl::lapack::getrf(
203207
oneapi::mkl::backend_selector<oneapi::mkl::backend::cusolver>{ gpu_queue }, m, n, gpu_A,
204208
lda, gpu_ipiv, gpu_getrf_scratchpad, gpu_getrf_scratchpad_size);
205209
gpu_getrs_done = oneapi::mkl::lapack::getrs(
206-
oneapi::mkl::backend_selector<oneapi::mkl::backend::cusolver>{ gpu_queue },
207-
trans, n, nrhs, gpu_A, lda, gpu_ipiv, gpu_B, ldb,
208-
gpu_getrs_scratchpad, gpu_getrs_scratchpad_size, { gpu_getrf_done });
210+
oneapi::mkl::backend_selector<oneapi::mkl::backend::cusolver>{ gpu_queue }, trans, n, nrhs,
211+
gpu_A, lda, gpu_ipiv, gpu_B, ldb, gpu_getrs_scratchpad, gpu_getrs_scratchpad_size,
212+
{ gpu_getrf_done });
209213

210214
// Wait until calculations are done
211215
cpu_queue.wait_and_throw();
@@ -220,7 +224,6 @@ void run_getrs_example(const sycl::device& cpu_device, const sycl::device& gpu_d
220224
// copy data from GPU device back to host
221225
gpu_queue.memcpy(result_gpu.data(), gpu_B, B_size * sizeof(float)).wait_and_throw();
222226

223-
224227
// Print results
225228
std::cout << "\n\t\tGETRF and GETRS parameters:" << std::endl;
226229
std::cout << "\t\t\ttrans = "
@@ -252,7 +255,6 @@ void run_getrs_example(const sycl::device& cpu_device, const sycl::device& gpu_d
252255
sycl::free(cpu_ipiv, cpu_queue);
253256
sycl::free(cpu_B, cpu_queue);
254257
sycl::free(cpu_A, cpu_queue);
255-
256258
}
257259

258260
//

examples/lapack/run_time_dispatching/getrs_usm.cpp

Lines changed: 13 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -37,7 +37,11 @@
3737
#include <vector>
3838

3939
// oneMKL/SYCL includes
40+
#if __has_include(<sycl/sycl.hpp>)
41+
#include <sycl/sycl.hpp>
42+
#else
4043
#include <CL/sycl.hpp>
44+
#endif
4145
#include "oneapi/mkl.hpp"
4246

4347
// local includes
@@ -66,7 +70,6 @@ void run_getrs_example(const sycl::device& device) {
6670
std::int64_t ipiv_size = n;
6771
oneapi::mkl::transpose trans = oneapi::mkl::transpose::nontrans;
6872

69-
7073
// Asynchronous error handler
7174
auto error_handler = [&](sycl::exception_list exceptions) {
7275
for (auto const& e : exceptions) {
@@ -75,17 +78,15 @@ void run_getrs_example(const sycl::device& device) {
7578
}
7679
catch (oneapi::mkl::lapack::exception const& e) {
7780
// Handle LAPACK related exceptions that happened during asynchronous call
78-
std::cerr
79-
<< "Caught asynchronous LAPACK exception during GETRF or GETRS:"
80-
<< std::endl;
81+
std::cerr << "Caught asynchronous LAPACK exception during GETRF or GETRS:"
82+
<< std::endl;
8183
std::cerr << "\t" << e.what() << std::endl;
8284
std::cerr << "\tinfo: " << e.info() << std::endl;
8385
}
8486
catch (sycl::exception const& e) {
8587
// Handle not LAPACK related exceptions that happened during asynchronous call
86-
std::cerr
87-
<< "Caught asynchronous SYCL exception during GETRF or GETRS:"
88-
<< std::endl;
88+
std::cerr << "Caught asynchronous SYCL exception during GETRF or GETRS:"
89+
<< std::endl;
8990
std::cerr << "\t" << e.what() << std::endl;
9091
}
9192
}
@@ -114,8 +115,8 @@ void run_getrs_example(const sycl::device& device) {
114115

115116
std::int64_t getrf_scratchpad_size =
116117
oneapi::mkl::lapack::getrf_scratchpad_size<float>(queue, m, n, lda);
117-
std::int64_t getrs_scratchpad_size = oneapi::mkl::lapack::getrs_scratchpad_size<float>(
118-
queue, trans, n, nrhs, lda, ldb);
118+
std::int64_t getrs_scratchpad_size =
119+
oneapi::mkl::lapack::getrs_scratchpad_size<float>(queue, trans, n, nrhs, lda, ldb);
119120
float* getrf_scratchpad =
120121
sycl::malloc_shared<float>(getrf_scratchpad_size * sizeof(float), device, context);
121122
float* getrs_scratchpad =
@@ -137,9 +138,9 @@ void run_getrs_example(const sycl::device& device) {
137138
// Execute on device
138139
getrf_done = oneapi::mkl::lapack::getrf(queue, m, n, dev_A, lda, dev_ipiv, getrf_scratchpad,
139140
getrf_scratchpad_size);
140-
getrs_done = oneapi::mkl::lapack::getrs(queue, trans, n, nrhs, dev_A,
141-
lda, dev_ipiv, dev_B, ldb, getrs_scratchpad,
142-
getrs_scratchpad_size, { getrf_done });
141+
getrs_done =
142+
oneapi::mkl::lapack::getrs(queue, trans, n, nrhs, dev_A, lda, dev_ipiv, dev_B, ldb,
143+
getrs_scratchpad, getrs_scratchpad_size, { getrf_done });
143144

144145
// Wait until calculations are done
145146
queue.wait_and_throw();
@@ -164,7 +165,6 @@ void run_getrs_example(const sycl::device& device) {
164165
// output the top 2x2 block of X matrix
165166
print_2x2_matrix_values(B.data(), ldb, "X");
166167

167-
168168
sycl::free(getrs_scratchpad, queue);
169169
sycl::free(getrf_scratchpad, queue);
170170
sycl::free(dev_ipiv, queue);

examples/rng/compile_time_dispatching/uniform_usm_mklcpu_curand.cpp

Lines changed: 4 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -37,7 +37,11 @@
3737
#include <vector>
3838

3939
// oneMKL/SYCL includes
40+
#if __has_include(<sycl/sycl.hpp>)
41+
#include <sycl/sycl.hpp>
42+
#else
4043
#include <CL/sycl.hpp>
44+
#endif
4145
#include "oneapi/mkl.hpp"
4246

4347
// local includes
@@ -155,7 +159,6 @@ void run_uniform_example(const sycl::device& cpu_dev, const sycl::device& gpu_de
155159

156160
sycl::free(dev_gpu, gpu_queue);
157161
sycl::free(dev_cpu, cpu_queue);
158-
159162
}
160163

161164
//

examples/rng/run_time_dispatching/uniform_usm.cpp

Lines changed: 4 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -37,7 +37,11 @@
3737
#include <vector>
3838

3939
// oneMKL/SYCL includes
40+
#if __has_include(<sycl/sycl.hpp>)
41+
#include <sycl/sycl.hpp>
42+
#else
4043
#include <CL/sycl.hpp>
44+
#endif
4145
#include "oneapi/mkl.hpp"
4246

4347
// local includes
@@ -118,7 +122,6 @@ void run_uniform_example(const sycl::device& dev) {
118122
std::cout << std::endl;
119123

120124
sycl::free(dev_r, queue);
121-
122125
}
123126

124127
//

include/oneapi/mkl/blas.hpp

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -20,7 +20,11 @@
2020
#ifndef _ONEMKL_BLAS_HPP_
2121
#define _ONEMKL_BLAS_HPP_
2222

23+
#if __has_include(<sycl/sycl.hpp>)
24+
#include <sycl/sycl.hpp>
25+
#else
2326
#include <CL/sycl.hpp>
27+
#endif
2428
#include <complex>
2529
#include <cstdint>
2630

include/oneapi/mkl/blas/detail/blas_ct_backends.hpp

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -20,7 +20,11 @@
2020
#ifndef _BLAS_CT_BACKENDS_HPP__
2121
#define _BLAS_CT_BACKENDS_HPP__
2222

23+
#if __has_include(<sycl/sycl.hpp>)
24+
#include <sycl/sycl.hpp>
25+
#else
2326
#include <CL/sycl.hpp>
27+
#endif
2428
#include <complex>
2529
#include <cstdint>
2630

include/oneapi/mkl/blas/detail/blas_loader.hpp

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -22,7 +22,11 @@
2222

2323
#include <complex>
2424
#include <cstdint>
25+
#if __has_include(<sycl/sycl.hpp>)
26+
#include <sycl/sycl.hpp>
27+
#else
2528
#include <CL/sycl.hpp>
29+
#endif
2630

2731
#include "oneapi/mkl/types.hpp"
2832

0 commit comments

Comments
 (0)