Skip to content
This repository was archived by the owner on Aug 30, 2024. It is now read-only.

Commit 18d6ad0

Browse files
authored
Upgrade oneapi 2024.1 (#119)
* upgrade oneapi to 2024.1 1/2 - lsc_fence -> fence * upgrade oneapi to 2024.1 2/2 - pow(T1, T2) -> pow(T, T) * enable elemwise_reduce and linear tile_op for arc
1 parent 54b776a commit 18d6ad0

File tree

4 files changed

+26
-35
lines changed

4 files changed

+26
-35
lines changed

include/common/core/common.hpp

Lines changed: 3 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -112,20 +112,17 @@ enum class data_size : uint8_t {
112112
/// The specific LSC shared function to fence with xetla_fence
113113
enum class memory_kind : uint8_t {
114114
untyped_global = 0, /// untyped global memory
115-
untyped_global_low_pri = 1, /// low-priority untyped global memory
116-
typed_global = 2, /// typed global memory
117-
shared_local = 3, /// shared local memory
115+
typed_global = 1, /// typed global memory
116+
shared_local = 2, /// shared local memory
118117
};
119118

120119
/// The xetla_fence operation to apply to caches
121120
enum class fence_op : uint8_t {
122121
none = 0, /// no operation
123122
evict = 1, /// dirty lines evicted and invalidated from L1
124123
invalidate = 2, /// invalidate all clean lines
125-
discard = 3, /// direct and clean lines are discarded w/o eviction
124+
126125
clean = 4, /// dirty lines are written to memory, but retained in cache
127-
/// in clean state
128-
flushl2 = 5, /// flush only L2
129126
};
130127
/// The scope that xetla_fence operation should apply to
131128
enum class fence_scope : uint8_t {

include/common/core/memory.hpp

Lines changed: 20 additions & 26 deletions
Original file line numberDiff line numberDiff line change
@@ -72,55 +72,49 @@ constexpr __ESIMD_ENS::lsc_data_size get_data_size(gpu::xetla::data_size ds) {
7272
/// @brief lookup table for memory kind.
7373
///
7474
///
75-
constexpr __ESIMD_ENS::lsc_memory_kind get_memory_kind(
75+
constexpr sycl::ext::intel::esimd::memory_kind get_memory_kind(
7676
gpu::xetla::memory_kind mk) {
7777
switch (mk) {
7878
case gpu::xetla::memory_kind::untyped_global:
79-
return __ESIMD_ENS::lsc_memory_kind::untyped_global;
80-
case gpu::xetla::memory_kind::untyped_global_low_pri:
81-
return __ESIMD_ENS::lsc_memory_kind::untyped_global_low_pri;
79+
return sycl::ext::intel::esimd::memory_kind::image;
8280
case gpu::xetla::memory_kind::typed_global:
83-
return __ESIMD_ENS::lsc_memory_kind::typed_global;
81+
return sycl::ext::intel::esimd::memory_kind::global;
8482
case gpu::xetla::memory_kind::shared_local:
85-
return __ESIMD_ENS::lsc_memory_kind::shared_local;
83+
return sycl::ext::intel::esimd::memory_kind::local;
8684
}
8785
}
8886

8987
/// @brief lookup table for fence op.
9088
///
9189
///
92-
constexpr __ESIMD_ENS::lsc_fence_op get_fence_op(gpu::xetla::fence_op fo) {
90+
constexpr sycl::ext::intel::esimd::fence_flush_op get_fence_op(gpu::xetla::fence_op fo) {
9391
switch (fo) {
94-
case gpu::xetla::fence_op::none: return __ESIMD_ENS::lsc_fence_op::none;
92+
case gpu::xetla::fence_op::none: return sycl::ext::intel::esimd::fence_flush_op::none;
9593
case gpu::xetla::fence_op::evict:
96-
return __ESIMD_ENS::lsc_fence_op::evict;
94+
return sycl::ext::intel::esimd::fence_flush_op::evict;
9795
case gpu::xetla::fence_op::invalidate:
98-
return __ESIMD_ENS::lsc_fence_op::invalidate;
99-
case gpu::xetla::fence_op::discard:
100-
return __ESIMD_ENS::lsc_fence_op::discard;
96+
return sycl::ext::intel::esimd::fence_flush_op::invalidate;
10197
case gpu::xetla::fence_op::clean:
102-
return __ESIMD_ENS::lsc_fence_op::clean;
103-
case gpu::xetla::fence_op::flushl2:
104-
return __ESIMD_ENS::lsc_fence_op::flushl3;
98+
return sycl::ext::intel::esimd::fence_flush_op::clean;
10599
}
106100
}
107101

108102
/// @brief lookup table for fence scope.
109103
///
110104
///
111-
constexpr __ESIMD_ENS::lsc_scope get_fence_scope(gpu::xetla::fence_scope fs) {
105+
constexpr sycl::ext::intel::esimd::fence_scope get_fence_scope(gpu::xetla::fence_scope fs) {
112106
switch (fs) {
113107
case gpu::xetla::fence_scope::group:
114-
return __ESIMD_ENS::lsc_scope::group;
108+
return sycl::ext::intel::esimd::fence_scope::group;
115109
case gpu::xetla::fence_scope::local:
116-
return __ESIMD_ENS::lsc_scope::local;
117-
case gpu::xetla::fence_scope::tile: return __ESIMD_ENS::lsc_scope::tile;
118-
case gpu::xetla::fence_scope::gpu: return __ESIMD_ENS::lsc_scope::gpu;
119-
case gpu::xetla::fence_scope::gpus: return __ESIMD_ENS::lsc_scope::gpus;
110+
return sycl::ext::intel::esimd::fence_scope::local;
111+
case gpu::xetla::fence_scope::tile: return sycl::ext::intel::esimd::fence_scope::tile;
112+
case gpu::xetla::fence_scope::gpu: return sycl::ext::intel::esimd::fence_scope::gpu;
113+
case gpu::xetla::fence_scope::gpus: return sycl::ext::intel::esimd::fence_scope::gpus;
120114
case gpu::xetla::fence_scope::system:
121-
return __ESIMD_ENS::lsc_scope::system;
115+
return sycl::ext::intel::esimd::fence_scope::system;
122116
case gpu::xetla::fence_scope::sysacq:
123-
return __ESIMD_ENS::lsc_scope::sysacq;
117+
return sycl::ext::intel::esimd::fence_scope::system_acquire;
124118
}
125119
}
126120

@@ -635,10 +629,10 @@ __XETLA_API xetla_vector<T, N> xetla_atomic_local(
635629
template <memory_kind Kind = memory_kind::untyped_global,
636630
fence_op FenceOp = fence_op::none,
637631
fence_scope Scope = fence_scope::group, int N = 16>
638-
__XETLA_API void xetla_fence(xetla_mask<N> pred = 1) {
639-
__ESIMD_ENS::lsc_fence<gpu::xetla::detail::get_memory_kind(Kind),
632+
__XETLA_API void xetla_fence() {
633+
sycl::ext::intel::esimd::fence<gpu::xetla::detail::get_memory_kind(Kind),
640634
gpu::xetla::detail::get_fence_op(FenceOp),
641-
gpu::xetla::detail::get_fence_scope(Scope), N>(pred);
635+
gpu::xetla::detail::get_fence_scope(Scope)>();
642636
}
643637

644638
/// @} xetla_core_memory

include/subgroup/tile/impl/tile_op_functor.hpp

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -719,7 +719,7 @@ struct elemwise_reduce_op_t {};
719719
/// @brief Is the element-wise reduce op functor, specialized for Xe architecture.
720720
template <reduce_op reduce_kind_, typename dtype_in_, gpu_arch arch_tag>
721721
struct elemwise_reduce_op_t<reduce_kind_, dtype_in_, arch_tag,
722-
std::enable_if_t<(arch_tag == gpu_arch::Xe)>> {
722+
std::enable_if_t<(arch_tag <= gpu_arch::Xe)>> {
723723
using dtype_in = dtype_in_;
724724
using mem_desc_in_t
725725
= mem_desc_t<dtype_in, mem_layout::row_major, mem_space::global>;
@@ -1148,7 +1148,7 @@ struct linear_op_t {};
11481148
/// @brief Is the linear_op functor, specialized for Xe architecture.
11491149
template <typename dtype_in_, gpu_arch arch_tag>
11501150
struct linear_op_t<dtype_in_, arch_tag,
1151-
std::enable_if_t<(arch_tag == gpu_arch::Xe)>> {
1151+
std::enable_if_t<(arch_tag <= gpu_arch::Xe)>> {
11521152
using dtype_in = dtype_in_;
11531153
using mem_desc_in_t
11541154
= mem_desc_t<dtype_in, mem_layout::row_major, mem_space::global>;

tests/utils/profiling.hpp

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -103,7 +103,7 @@ class profiling_helper {
103103
//time mean square error
104104
for (int i = 1; i < iter; i++) {
105105
#if (__LIBSYCL_MAJOR_VERSION >= 7) && (__LIBSYCL_MINOR_VERSION >= 1)
106-
stat.variance += sycl::pow(time[i] - stat.mean, 2);
106+
stat.variance += sycl::pow(time[i] - stat.mean, (double)2);
107107
#else
108108
stat.variance += pow(time[i] - stat.mean, 2);
109109
#endif

0 commit comments

Comments
 (0)