Skip to content
This repository was archived by the owner on Aug 30, 2024. It is now read-only.

Commit 23191ac

Browse files
sunjiweiswiftDDEle
authored andcommitted
update ESIMD API
1 parent 4c7560c commit 23191ac

File tree

1 file changed

+169
-3
lines changed

1 file changed

+169
-3
lines changed

include/common/core/memory.hpp

Lines changed: 169 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -320,6 +320,53 @@ __XETLA_API void xetla_prefetch_global(
320320
#endif
321321
}
322322

323+
/// 2D USM pointer block prefetch.
324+
/// Supported platforms: PVC
325+
/// VISA instruction: lsc_load_block2d.ugm
326+
///
327+
/// Prefetches elements located at specified address.
328+
///
329+
/// @tparam T is element type.
330+
/// @tparam BlockWidth is the block width in number of elements.
331+
/// @tparam BlockHeight is the block height in number of elements.
332+
/// @tparam NBlocks is the number of blocks.
333+
/// @tparam L1H is L1 cache hint.
334+
/// @tparam L2H is L2 cache hint.
335+
/// @tparam N is the data size
336+
/// @param Ptr is the surface base address for this operation.
337+
/// @param SurfaceWidth is the surface width minus 1 in bytes
338+
/// @param SurfaceHeight is the surface height minus 1 in rows
339+
/// @param SurfacePitch is the surface pitch minus 1 in bytes
340+
/// @param X is zero based X-coordinate of the left upper rectangle corner in
341+
/// number of elements.
342+
/// @param Y is zero based Y-coordinate of the left upper rectangle corner in
343+
/// rows.
344+
///
345+
template <
346+
typename T,
347+
int BlockWidth,
348+
int BlockHeight = 1,
349+
int NBlocks = 1,
350+
cache_hint L1H = cache_hint::none,
351+
cache_hint L2H = cache_hint::none,
352+
int N = __ESIMD_ENS::detail::get_lsc_block_2d_data_size<
353+
T,
354+
NBlocks,
355+
BlockHeight,
356+
BlockWidth,
357+
false,
358+
false>()>
359+
__XETLA_API void xetla_prefetch_global(
360+
const T* Ptr,
361+
unsigned SurfaceWidth,
362+
unsigned SurfaceHeight,
363+
unsigned SurfacePitch,
364+
int X,
365+
int Y) {
366+
return __ESIMD_ENS::lsc_prefetch_2d(
367+
Ptr, SurfaceWidth, SurfaceHeight, SurfacePitch, X, Y);
368+
}
369+
323370
/// template <typename T, int VS = 1, typename OffsetT,
324371
/// typename PropertyListT = empty_properties_t>
325372
/// void prefetch(const T *p, OffsetT byte_offset,
@@ -358,14 +405,71 @@ __XETLA_API void xetla_prefetch_global(T* p, uint64_t byte_offset = 0) {
358405
#endif
359406
}
360407

408+
/// 2D USM pointer block load.
409+
/// Supported platforms: PVC
410+
/// VISA instruction: lsc_load_block2d.ugm
411+
///
412+
/// Collects elements located at specified address and returns them
413+
/// as a single \ref simd object.
414+
///
415+
/// @tparam T is element type.
416+
/// @tparam BlockWidth is the block width in number of elements.
417+
/// @tparam BlockHeight is the block height in number of elements.
418+
/// @tparam NBlocks is the number of blocks.
419+
/// @tparam Transposed is the transposed version or not.
420+
/// @tparam Transformed is apply VNNI transform or not.
421+
/// @tparam L1H is L1 cache hint.
422+
/// @tparam L2H is L2 cache hint.
423+
/// @tparam N is the data size
424+
/// @param Ptr is the surface base address for this operation.
425+
/// @param SurfaceWidth is the surface width minus 1 in bytes
426+
/// @param SurfaceHeight is the surface height minus 1 in rows
427+
/// @param SurfacePitch is the surface pitch minus 1 in bytes
428+
/// @param X is zero based X-coordinate of the left upper rectangle corner in
429+
/// number of elements.
430+
/// @param Y is zero based Y-coordinate of the left upper rectangle corner in
431+
/// rows.
432+
/// @return is a vector of type T and size N, where N is
433+
/// BlockWidth * BlockHeight * NBlocks, if transformed;
434+
/// otherwise,
435+
/// N = roundUpNextMultiple(BlockHeight, 4 / sizeof(T)) *
436+
/// getNextPowerOf2(BlockWidth) * NBlocks
437+
///
438+
template <
439+
typename T,
440+
int BlockWidth,
441+
int BlockHeight = 1,
442+
int NBlocks = 1,
443+
bool Transposed = false,
444+
bool Transformed = false,
445+
cache_hint L1H = cache_hint::none,
446+
cache_hint L2H = cache_hint::none,
447+
int N = __ESIMD_ENS::detail::get_lsc_block_2d_data_size<
448+
T,
449+
NBlocks,
450+
BlockHeight,
451+
BlockWidth,
452+
Transposed,
453+
Transformed>()>
454+
__XETLA_API xetla_vector<T, N> xetla_load_global(
455+
const T* Ptr,
456+
size_t SurfaceWidth,
457+
size_t SurfaceHeight,
458+
size_t SurfacePitch,
459+
int X,
460+
int Y) {
461+
return __ESIMD_ENS::lsc_load_2d(
462+
Ptr, SurfaceWidth, SurfaceHeight, SurfacePitch, X, Y);
463+
}
464+
361465
/// simd<T, N> block_load(const T* ptr, size_t byte_offset,
362466
/// props={}); // (usm-bl-2)
363467
/// This function loads a contiguous memory block from address referenced
364468
/// by USM pointer \p ptr and the given \p byte_offset.
365469
///
366470
/// There may be temporary restrictions depending on L1, L2 cache hints,
367-
/// See details in the 'Restrictions' section below. The restrictions will be
368-
/// relaxed in the future.
471+
/// See details in the 'Restrictions' section below. The restrictions will
472+
/// be relaxed in the future.
369473
///
370474
/// The parameter \p props specifies the optional compile-time properties
371475
/// of the type esimd::properties and may include esimd::cache_hint_L1,
@@ -383,7 +487,8 @@ __XETLA_API void xetla_prefetch_global(T* p, uint64_t byte_offset = 0) {
383487
///
384488
/// Restrictions - cache hint imposed - temporary:
385489
/// If L1 or L2 cache hint is passed, then:
386-
/// R1: The pointer must be at least 4-byte aligned for elements of 4-bytes or
490+
/// R1: The pointer must be at least 4-byte aligned for elements of 4-bytes
491+
/// or
387492
/// smaller and 8-byte aligned for 8-byte elements.
388493
/// R2: The number of elements for 8-byte data: 1, 2, 3, 4, 8, 16, 32, 64;
389494
/// for 4-byte data: 1, 2, 3, 4, 8, 16, 32, 64,
@@ -574,6 +679,54 @@ __XETLA_API xetla_vector<T, N> xetla_load_global(
574679
#endif
575680
}
576681

682+
/// 2D USM pointer block store.
683+
/// Supported platforms: PVC
684+
/// VISA instruction: lsc_store_block2d.ugm
685+
///
686+
/// Stores elements at specified address.
687+
///
688+
/// @tparam T is element type.
689+
/// @tparam BlockWidth is the block width in number of elements.
690+
/// @tparam BlockHeight is the block height in number of elements.
691+
/// @tparam L1H is L1 cache hint.
692+
/// @tparam L2H is L2 cache hint.
693+
/// @tparam N is the data size
694+
/// @param Ptr is the surface base address for this operation.
695+
/// @param SurfaceWidth is the surface width minus 1 in bytes
696+
/// @param SurfaceHeight is the surface height minus 1 in rows
697+
/// @param SurfacePitch is the surface pitch minus 1 in bytes
698+
/// @param X is zero based X-coordinate of the left upper rectangle corner in
699+
/// number of elements.
700+
/// @param Y is zero based Y-coordinate of the left upper rectangle corner in
701+
/// rows.
702+
/// @param Vals is a vector to store of type T and size N, where
703+
/// N = roundUpNextMultiple(BlockHeight, 4 / sizeof(T)) *
704+
/// getNextPowerOf2(BlockWidth) * NBlocks
705+
///
706+
template <
707+
typename T,
708+
int BlockWidth,
709+
int BlockHeight = 1,
710+
cache_hint L1H = cache_hint::none,
711+
cache_hint L2H = cache_hint::none,
712+
int N = __ESIMD_ENS::detail::get_lsc_block_2d_data_size<
713+
T,
714+
1u,
715+
BlockHeight,
716+
BlockWidth,
717+
false,
718+
false>()>
719+
__XETLA_API void xetla_store_global(
720+
T* Ptr,
721+
unsigned SurfaceWidth,
722+
unsigned SurfaceHeight,
723+
unsigned SurfacePitch,
724+
int X,
725+
int Y,
726+
xetla_vector<T, N> Vals) {
727+
__ESIMD_ENS::lsc_store_2d(
728+
Ptr, SurfaceWidth, SurfaceHeight, SurfacePitch, X, Y, Vals);
729+
}
577730
/// template <typename T, int N, int VS = 1, typename OffsetT,
578731
/// typename PropertyListT = empty_properties_t>
579732
/// void scatter(T *p, simd<OffsetT, N / VS> byte_offsets, simd<T, N> vals,
@@ -951,6 +1104,10 @@ __XETLA_API xetla_vector<Ty, N * NElts> xetla_load_local(
9511104
xetla_vector<uint32_t, N> offsets,
9521105
xetla_mask<N> pred = 1) {
9531106
using T = native_type_t<Ty>;
1107+
DEBUG_INVOKE(
1108+
dbg_level::core,
1109+
core::general_1d<gpu_arch::XeHpc, Ty>::
1110+
template check_restriction<NElts, N>(offsets));
9541111

9551112
return __ESIMD_ENS::
9561113
lsc_slm_gather<T, NElts, gpu::xetla::detail::get_data_size(DS), N>(
@@ -975,6 +1132,11 @@ __XETLA_API xetla_vector<Ty, N * NElts> xetla_load_local(
9751132
template <typename Ty, int NElts = 1, data_size DS = data_size::default_size>
9761133
__XETLA_API xetla_vector<Ty, NElts> xetla_load_local(uint32_t offset) {
9771134
using T = native_type_t<Ty>;
1135+
// DEBUG_INVOKE(
1136+
// dbg_level::core,
1137+
// core::general_1d<gpu_arch::XeHpc, Ty>::template
1138+
// check_restriction<NElts>(
1139+
// (uint64_t)offset));
9781140

9791141
return __ESIMD_NS::slm_block_load<T, NElts>(offset);
9801142
}
@@ -1005,6 +1167,10 @@ __XETLA_API void xetla_store_local(
10051167
xetla_vector<Ty, N * NElts> vals,
10061168
xetla_mask<N> pred = 1) {
10071169
using T = native_type_t<Ty>;
1170+
DEBUG_INVOKE(
1171+
dbg_level::core,
1172+
core::general_1d<gpu_arch::XeHpc, Ty>::
1173+
template check_restriction<NElts, N, uint32_t>(offsets));
10081174

10091175
__ESIMD_ENS::
10101176
lsc_slm_scatter<T, NElts, gpu::xetla::detail::get_data_size(DS), N>(

0 commit comments

Comments
 (0)