@@ -106,19 +106,31 @@ tile_load(tile_t& tile, payload_t& payload) {
106106 static constexpr bool mem_transform = payload_t ::mem_transform;
107107
108108 using load_store_attr = load_store_attr_t <msg_type::block_2d, arch_tag>;
109+
110+ // static constexpr uint32_t max_load_width_in_elem = trans
111+ // ? load_store_attr::max_trans_load_width_in_bytes / sizeof(dtype)
112+ // : load_store_attr::max_load_width_in_bytes / sizeof(dtype);
113+ // static constexpr uint32_t max_load_height_in_elem = trans
114+ // ? load_store_attr::max_trans_load_height_in_elem
115+ // : load_store_attr::max_load_height_in_elem;
116+ static constexpr uint32_t max_trans_load_width_in_elem =
117+ load_store_attr::max_trans_load_width_in_bytes / sizeof (dtype);
118+ static constexpr uint32_t max_load_width_in_elem =
119+ load_store_attr::max_load_width_in_bytes / sizeof (dtype);
120+
121+ // static constexpr uint32_t max_trans_load_height_in_elem =
122+ // load_store_attr::max_trans_load_height_in_elem;
123+ static constexpr uint32_t max_load_height_in_elem =
124+ load_store_attr::max_load_height_in_elem;
125+
109126 static constexpr uint32_t elems_per_CL =
110127 load_store_attr::cache_line_size_in_bytes / sizeof (dtype);
128+
111129 static constexpr uint32_t elems_per_reg =
112130 register_bytes_t <arch_tag>::reg_in_bytes / sizeof (dtype);
113- static constexpr int32_t max_load_block_height =
114- load_store_attr::max_load_height_in_elem;
115- static constexpr int32_t max_block_width =
116- load_store_attr::max_load_width_in_bytes / sizeof (dtype);
117- static constexpr int32_t max_trans_block_width =
118- load_store_attr::max_trans_load_width_in_bytes / sizeof (dtype);
119131
120132 static constexpr uint32_t ld_blk_size_y_limit =
121- mem_transpose ? max_trans_block_width : max_load_block_height ;
133+ mem_transpose ? max_trans_load_width_in_elem : max_load_height_in_elem ;
122134 static constexpr uint32_t ld_blk_size_y = reg_transpose
123135 ? block_size_y
124136 : std::min (ld_blk_size_y_limit, block_size_y);
@@ -150,20 +162,21 @@ tile_load(tile_t& tile, payload_t& payload) {
150162
151163 static_assert (
152164 reg_transpose || mem_transpose ||
153- (!mem_transpose && (block_size_x * arr_len) <= max_block_width),
165+ (!mem_transpose &&
166+ (block_size_x * arr_len) <= max_load_width_in_elem),
154167 " When reg_transpose was disabled, check 2d block width "
155168 " restriction" );
156169 static_assert (
157170 !reg_transpose ||
158171 (!mem_transpose &&
159- (block_size_x * arr_len) <= max_trans_block_width ) ||
160- (mem_transpose && (block_size_y * arr_len) <= max_block_width ),
172+ (block_size_x * arr_len) <= max_trans_load_width_in_elem ) ||
173+ (mem_transpose && (block_size_y * arr_len) <= max_load_width_in_elem ),
161174 " When reg_transpose was enabled, check 2d block width "
162175 " restriction" );
163176 static_assert (
164177 !reg_transpose ||
165- (!mem_transpose && (block_size_y <= max_load_block_height )) ||
166- (mem_transpose && (block_size_x) <= max_load_block_height ),
178+ (!mem_transpose && (block_size_y <= max_load_height_in_elem )) ||
179+ (mem_transpose && (block_size_x) <= max_load_height_in_elem ),
167180 " When reg_transpose was enabled, check 2d block height "
168181 " restriction" );
169182 static_assert (
0 commit comments