This repository was archived by the owner on Aug 30, 2024. It is now read-only.
File tree Expand file tree Collapse file tree 2 files changed +4
-2
lines changed
include/subgroup/tile/impl Expand file tree Collapse file tree 2 files changed +4
-2
lines changed Original file line number Diff line number Diff line change @@ -93,6 +93,7 @@ tile_load(tile_t& tile, payload_t& payload) {
9393 static constexpr gpu_arch arch_tag = payload_t ::arch_tag;
9494
9595 static constexpr reg_layout reg_layout_ = tile_desc::register_layout;
96+ // In the case of pack, tranpose is in vnni format
9697 static constexpr bool is_vnni_reverse =
9798 payload_t ::mem_transpose_dtype_less4bytes &&
9899 ((reg_layout_ == reg_layout::tiled) ||
@@ -188,14 +189,13 @@ tile_load(tile_t& tile, payload_t& payload) {
188189 ((block_size_y * sizeof (dtype)) % sizeof (load_dtype) == 0 ),
189190 " check vnni limitation for DW transpose" );
190191
191- // auto payload_2d = payload.payloads.xetla_format<uint32_t, num_block, 16>();
192192#pragma unroll
193193 for (uint32_t i = 0 ; i < num_block_y; ++i) {
194- constexpr uint32_t load_block_elems = block_elems * arr_len;
195194 int offset_y = i * block_size_y;
196195#pragma unroll
197196 for (uint32_t j = 0 ; j < num_block_x; j += arr_len) {
198197 int32_t offset_x = j * block_size_x;
198+ constexpr uint32_t load_block_elems = block_elems * arr_len;
199199 auto reg_blk = tile.reg .xetla_select <load_block_elems, 1 >(
200200 (i * num_block_x + j) * block_elems);
201201 constexpr uint32_t ld_blk_height = (reg_transpose && trans)
Original file line number Diff line number Diff line change @@ -231,12 +231,14 @@ struct mem_payload_t<
231231 __XETLA_API void update_tdesc (int offset) {
232232 auto payloads_2d = payloads.xetla_format <uint32_t , num_block, 16 >();
233233 if constexpr (update_dir == tdesc_update_dir::x_dir) {
234+ offset_x += offset / scale_factor;
234235#pragma unroll
235236 for (uint32_t i = 0 ; i < num_block; i++) {
236237 xetla_update_tdesc_offsetx (
237238 payloads_2d.row (i), offset / int32_t (scale_factor));
238239 }
239240 } else {
241+ offset_y += offset;
240242#pragma unroll
241243 for (uint32_t i = 0 ; i < num_block; i++) {
242244 xetla_update_tdesc_offsety (payloads_2d.row (i), offset);
You can’t perform that action at this time.
0 commit comments