Skip to content
This repository was archived by the owner on Aug 30, 2024. It is now read-only.

Commit 5b0327d

Browse files
authored
2024.0 compatibility (#143)
* 2024.0 compatibility * fix more warnings on 2024.1
1 parent 18d6ad0 commit 5b0327d

File tree

6 files changed

+100
-27
lines changed

6 files changed

+100
-27
lines changed

include/common/core/base_types.hpp

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -68,6 +68,8 @@ struct is_internal_type {
6868
static constexpr bool value = std::is_same<remove_const_t<T>, bf16>::value
6969
|| std::is_same<remove_const_t<T>, tf32>::value;
7070
};
71+
template <typename T>
72+
inline constexpr bool is_internal_type_v = is_internal_type<T>::value;
7173

7274
/// @brief Used to check if the type is floating_point.
7375
/// @tparam T is the data type
@@ -79,6 +81,8 @@ struct is_floating_point {
7981
|| std::is_same<remove_const_t<T>, float>::value
8082
|| std::is_same<remove_const_t<T>, double>::value;
8183
};
84+
template <typename T>
85+
inline constexpr bool is_floating_point_v = is_floating_point<T>::value;
8286

8387
/// @brief Used to check if the type is floating_point.
8488
/// @tparam T is the data type
@@ -93,6 +97,8 @@ struct is_integral {
9397
|| std::is_same<remove_const_t<T>, int64_t>::value
9498
|| std::is_same<remove_const_t<T>, uint64_t>::value;
9599
};
100+
template <typename T>
101+
inline constexpr bool is_integral_v = is_integral<T>::value;
96102

97103
/// @brief Set the native data type of T
98104
/// @tparam T is the data type

include/common/core/common.hpp

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -112,8 +112,9 @@ enum class data_size : uint8_t {
112112
/// The specific LSC shared function to fence with xetla_fence
113113
enum class memory_kind : uint8_t {
114114
untyped_global = 0, /// untyped global memory
115-
typed_global = 1, /// typed global memory
116-
shared_local = 2, /// shared local memory
115+
// "1" reserved for low-priority untyped global memory
116+
typed_global = 2, /// typed global memory
117+
shared_local = 3, /// shared local memory
117118
};
118119

119120
/// The xetla_fence operation to apply to caches

include/common/core/math_general.hpp

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -462,7 +462,11 @@ __XETLA_API xetla_vector<T, SZ> xetla_add_c(xetla_vector<T, SZ> src0,
462462
static_assert((std::is_same<remove_const_t<T>, uint32_t>::value),
463463
"For addc, only uint32_t is supported");
464464
xetla_vector<T, SZ> carry_tmp;
465+
#if __INTEL_LLVM_COMPILER >= 20240100
466+
xetla_vector<T, SZ> out = __ESIMD_NS::addc(carry_tmp, src0, src1);
467+
#else
465468
xetla_vector<T, SZ> out = __ESIMD_ENS::addc(carry_tmp, src0, src1);
469+
#endif
466470
carry = carry_tmp;
467471
return out;
468472
}
@@ -480,7 +484,11 @@ __XETLA_API xetla_vector<T, SZ> xetla_add_c(xetla_vector<T, SZ> src0, T src1,
480484
static_assert((std::is_same<remove_const_t<T>, uint32_t>::value),
481485
"For addc, only uint32_t is supported");
482486
xetla_vector<T, SZ> carry_tmp;
487+
#if __INTEL_LLVM_COMPILER >= 20240100
488+
xetla_vector<T, SZ> out = __ESIMD_NS::addc(carry_tmp, src0, src1);
489+
#else
483490
xetla_vector<T, SZ> out = __ESIMD_ENS::addc(carry_tmp, src0, src1);
491+
#endif
484492
carry = carry_tmp;
485493
return out;
486494
}

include/common/core/memory.hpp

Lines changed: 65 additions & 19 deletions
Original file line numberDiff line numberDiff line change
@@ -72,49 +72,89 @@ constexpr __ESIMD_ENS::lsc_data_size get_data_size(gpu::xetla::data_size ds) {
7272
/// @brief lookup table for memory kind.
7373
///
7474
///
75-
constexpr sycl::ext::intel::esimd::memory_kind get_memory_kind(
76-
gpu::xetla::memory_kind mk) {
75+
constexpr auto get_memory_kind(gpu::xetla::memory_kind mk) {
7776
switch (mk) {
77+
#if __INTEL_LLVM_COMPILER >= 20240100
7878
case gpu::xetla::memory_kind::untyped_global:
79-
return sycl::ext::intel::esimd::memory_kind::image;
79+
return __ESIMD_NS::memory_kind::global;
8080
case gpu::xetla::memory_kind::typed_global:
81-
return sycl::ext::intel::esimd::memory_kind::global;
81+
return __ESIMD_NS::memory_kind::image;
8282
case gpu::xetla::memory_kind::shared_local:
83-
return sycl::ext::intel::esimd::memory_kind::local;
83+
return __ESIMD_NS::memory_kind::local;
84+
#else // legacy experimental api
85+
case gpu::xetla::memory_kind::untyped_global:
86+
return __ESIMD_ENS::lsc_memory_kind::untyped_global;
87+
case gpu::xetla::memory_kind::typed_global:
88+
return __ESIMD_ENS::lsc_memory_kind::typed_global;
89+
case gpu::xetla::memory_kind::shared_local:
90+
return __ESIMD_ENS::lsc_memory_kind::shared_local;
91+
#endif
8492
}
8593
}
8694

8795
/// @brief lookup table for fence op.
8896
///
8997
///
90-
constexpr sycl::ext::intel::esimd::fence_flush_op get_fence_op(gpu::xetla::fence_op fo) {
98+
constexpr auto get_fence_op(gpu::xetla::fence_op fo) {
9199
switch (fo) {
92-
case gpu::xetla::fence_op::none: return sycl::ext::intel::esimd::fence_flush_op::none;
100+
#if __INTEL_LLVM_COMPILER >= 20240100
101+
case gpu::xetla::fence_op::none:
102+
return __ESIMD_NS::fence_flush_op::none;
103+
case gpu::xetla::fence_op::evict:
104+
return __ESIMD_NS::fence_flush_op::evict;
105+
case gpu::xetla::fence_op::invalidate:
106+
return __ESIMD_NS::fence_flush_op::invalidate;
107+
case gpu::xetla::fence_op::clean:
108+
return __ESIMD_NS::fence_flush_op::clean;
109+
#else // legacy experimental api
110+
case gpu::xetla::fence_op::none: //
111+
return __ESIMD_ENS::lsc_fence_op::none;
93112
case gpu::xetla::fence_op::evict:
94-
return sycl::ext::intel::esimd::fence_flush_op::evict;
113+
return __ESIMD_ENS::lsc_fence_op::evict;
95114
case gpu::xetla::fence_op::invalidate:
96-
return sycl::ext::intel::esimd::fence_flush_op::invalidate;
115+
return __ESIMD_ENS::lsc_fence_op::invalidate;
97116
case gpu::xetla::fence_op::clean:
98-
return sycl::ext::intel::esimd::fence_flush_op::clean;
117+
return __ESIMD_ENS::lsc_fence_op::clean;
118+
#endif
99119
}
100120
}
101121

102122
/// @brief lookup table for fence scope.
103123
///
104124
///
105-
constexpr sycl::ext::intel::esimd::fence_scope get_fence_scope(gpu::xetla::fence_scope fs) {
125+
constexpr auto get_fence_scope(gpu::xetla::fence_scope fs) {
106126
switch (fs) {
127+
#if __INTEL_LLVM_COMPILER >= 20240100
107128
case gpu::xetla::fence_scope::group:
108-
return sycl::ext::intel::esimd::fence_scope::group;
129+
return __ESIMD_NS::fence_scope::group;
109130
case gpu::xetla::fence_scope::local:
110-
return sycl::ext::intel::esimd::fence_scope::local;
111-
case gpu::xetla::fence_scope::tile: return sycl::ext::intel::esimd::fence_scope::tile;
112-
case gpu::xetla::fence_scope::gpu: return sycl::ext::intel::esimd::fence_scope::gpu;
113-
case gpu::xetla::fence_scope::gpus: return sycl::ext::intel::esimd::fence_scope::gpus;
131+
return __ESIMD_NS::fence_scope::local;
132+
case gpu::xetla::fence_scope::tile:
133+
return __ESIMD_NS::fence_scope::tile;
134+
case gpu::xetla::fence_scope::gpu: //
135+
return __ESIMD_NS::fence_scope::gpu;
136+
case gpu::xetla::fence_scope::gpus:
137+
return __ESIMD_NS::fence_scope::gpus;
114138
case gpu::xetla::fence_scope::system:
115-
return sycl::ext::intel::esimd::fence_scope::system;
139+
return __ESIMD_NS::fence_scope::system;
116140
case gpu::xetla::fence_scope::sysacq:
117-
return sycl::ext::intel::esimd::fence_scope::system_acquire;
141+
return __ESIMD_NS::fence_scope::system_acquire;
142+
#else // legacy experimental api
143+
case gpu::xetla::fence_scope::group:
144+
return __ESIMD_ENS::lsc_scope::group;
145+
case gpu::xetla::fence_scope::local:
146+
return __ESIMD_ENS::lsc_scope::local;
147+
case gpu::xetla::fence_scope::tile: //
148+
return __ESIMD_ENS::lsc_scope::tile;
149+
case gpu::xetla::fence_scope::gpu: //
150+
return __ESIMD_ENS::lsc_scope::gpu;
151+
case gpu::xetla::fence_scope::gpus: //
152+
return __ESIMD_ENS::lsc_scope::gpus;
153+
case gpu::xetla::fence_scope::system:
154+
return __ESIMD_ENS::lsc_scope::system;
155+
case gpu::xetla::fence_scope::sysacq:
156+
return __ESIMD_ENS::lsc_scope::sysacq;
157+
#endif
118158
}
119159
}
120160

@@ -630,9 +670,15 @@ template <memory_kind Kind = memory_kind::untyped_global,
630670
fence_op FenceOp = fence_op::none,
631671
fence_scope Scope = fence_scope::group, int N = 16>
632672
__XETLA_API void xetla_fence() {
633-
sycl::ext::intel::esimd::fence<gpu::xetla::detail::get_memory_kind(Kind),
673+
#if __INTEL_LLVM_COMPILER >= 20240100
674+
__ESIMD_NS::fence<gpu::xetla::detail::get_memory_kind(Kind),
634675
gpu::xetla::detail::get_fence_op(FenceOp),
635676
gpu::xetla::detail::get_fence_scope(Scope)>();
677+
#else
678+
__ESIMD_ENS::lsc_fence<gpu::xetla::detail::get_memory_kind(Kind),
679+
gpu::xetla::detail::get_fence_op(FenceOp),
680+
gpu::xetla::detail::get_fence_scope(Scope), N>(xetla_mask<N>(1));
681+
#endif
636682
}
637683

638684
/// @} xetla_core_memory

tests/integration/data_transformer/common.hpp

Lines changed: 17 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -15,12 +15,24 @@
1515
*******************************************************************************/
1616
#pragma once
1717

18-
#include <utils/common.hpp>
1918
#include "xetla.hpp"
19+
#include <utils/common.hpp>
2020

2121
using namespace gpu::xetla;
2222
using namespace cl::sycl;
2323

24+
namespace {
25+
// abs for floating point types is non-standard and has been deprecated.
26+
// Please use fabs instead. [-Wdeprecated-declarations]
27+
template <typename T>
28+
inline T _abs(const T &v) {
29+
if constexpr (is_floating_point_v<T>)
30+
return fabs(v);
31+
else
32+
return abs(v);
33+
};
34+
} // namespace
35+
2436
template <typename data_type_in, typename data_type_out, typename data_type_acc>
2537
int data_transformer_result_validate(data_type_in *in_device,
2638
data_type_out *out_device, size_t mat_m, size_t mat_n,
@@ -42,8 +54,8 @@ int data_transformer_result_validate(data_type_in *in_device,
4254
for (uint32_t j = 0; j < mat_n; j++) {
4355
int idx = i * mat_n + j;
4456

45-
cpu_max = (cpu_max > abs(in[idx])) ? cpu_max
46-
: abs((data_type_acc)in[idx]);
57+
cpu_max = (cpu_max > _abs(in[idx])) ? cpu_max
58+
: _abs((data_type_acc)in[idx]);
4759

4860
res = out[idx];
4961

@@ -56,7 +68,7 @@ int data_transformer_result_validate(data_type_in *in_device,
5668
: (data_type_out)(in[j * mat_m + i]);
5769
}
5870

59-
if (abs(res - ref) > abs(0.01 * res)) {
71+
if (_abs(res - ref) > _abs(0.01 * res)) {
6072
std::cout << "i: " << i << " j: " << j << " idx: " << idx
6173
<< " in: " << in[idx] << " cpu: " << ref
6274
<< " gpu: " << res << std::endl;
@@ -69,7 +81,7 @@ int data_transformer_result_validate(data_type_in *in_device,
6981
cpu_max = cpu_max * scale[0];
7082

7183
if (need_fp8_op) {
72-
if (abs(cpu_max - amax_ptr[0]) > abs(0.01 * cpu_max)) {
84+
if (_abs(cpu_max - amax_ptr[0]) > _abs(0.01 * cpu_max)) {
7385
std::cout << "cpu_max: " << cpu_max << " gpu_max: " << amax_ptr[0]
7486
<< std::endl;
7587
return 1;

tests/utils/profiling.hpp

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -103,7 +103,7 @@ class profiling_helper {
103103
//time mean square error
104104
for (int i = 1; i < iter; i++) {
105105
#if (__LIBSYCL_MAJOR_VERSION >= 7) && (__LIBSYCL_MINOR_VERSION >= 1)
106-
stat.variance += sycl::pow(time[i] - stat.mean, (double)2);
106+
stat.variance += sycl::pow(time[i] - stat.mean, 2.);
107107
#else
108108
stat.variance += pow(time[i] - stat.mean, 2);
109109
#endif

0 commit comments

Comments
 (0)