@@ -165,14 +165,60 @@ static u32 preparser_disable(bool state)
165165 return MI_ARB_CHECK | 1 << 8 | state ;
166166}
167167
168- u32 * gen12_emit_aux_table_inv (struct intel_gt * gt , u32 * cs , const i915_reg_t inv_reg )
168+ static i915_reg_t gen12_get_aux_inv_reg (struct intel_engine_cs * engine )
169169{
170- u32 gsi_offset = gt -> uncore -> gsi_offset ;
170+ switch (engine -> id ) {
171+ case RCS0 :
172+ return GEN12_CCS_AUX_INV ;
173+ case BCS0 :
174+ return GEN12_BCS0_AUX_INV ;
175+ case VCS0 :
176+ return GEN12_VD0_AUX_INV ;
177+ case VCS2 :
178+ return GEN12_VD2_AUX_INV ;
179+ case VECS0 :
180+ return GEN12_VE0_AUX_INV ;
181+ case CCS0 :
182+ return GEN12_CCS0_AUX_INV ;
183+ default :
184+ return INVALID_MMIO_REG ;
185+ }
186+ }
187+
188+ static bool gen12_needs_ccs_aux_inv (struct intel_engine_cs * engine )
189+ {
190+ i915_reg_t reg = gen12_get_aux_inv_reg (engine );
191+
192+ if (IS_PONTEVECCHIO (engine -> i915 ))
193+ return false;
194+
195+ /*
196+ * So far platforms supported by i915 having flat ccs do not require
197+ * AUX invalidation. Check also whether the engine requires it.
198+ */
199+ return i915_mmio_reg_valid (reg ) && !HAS_FLAT_CCS (engine -> i915 );
200+ }
201+
202+ u32 * gen12_emit_aux_table_inv (struct intel_engine_cs * engine , u32 * cs )
203+ {
204+ i915_reg_t inv_reg = gen12_get_aux_inv_reg (engine );
205+ u32 gsi_offset = engine -> gt -> uncore -> gsi_offset ;
206+
207+ if (!gen12_needs_ccs_aux_inv (engine ))
208+ return cs ;
171209
172210 * cs ++ = MI_LOAD_REGISTER_IMM (1 ) | MI_LRI_MMIO_REMAP_EN ;
173211 * cs ++ = i915_mmio_reg_offset (inv_reg ) + gsi_offset ;
174212 * cs ++ = AUX_INV ;
175- * cs ++ = MI_NOOP ;
213+
214+ * cs ++ = MI_SEMAPHORE_WAIT_TOKEN |
215+ MI_SEMAPHORE_REGISTER_POLL |
216+ MI_SEMAPHORE_POLL |
217+ MI_SEMAPHORE_SAD_EQ_SDD ;
218+ * cs ++ = 0 ;
219+ * cs ++ = i915_mmio_reg_offset (inv_reg ) + gsi_offset ;
220+ * cs ++ = 0 ;
221+ * cs ++ = 0 ;
176222
177223 return cs ;
178224}
@@ -202,41 +248,54 @@ int gen12_emit_flush_rcs(struct i915_request *rq, u32 mode)
202248{
203249 struct intel_engine_cs * engine = rq -> engine ;
204250
205- if (mode & EMIT_FLUSH ) {
206- u32 flags = 0 ;
251+ /*
252+ * On Aux CCS platforms the invalidation of the Aux
253+ * table requires quiescing memory traffic beforehand
254+ */
255+ if (mode & EMIT_FLUSH || gen12_needs_ccs_aux_inv (engine )) {
256+ u32 bit_group_0 = 0 ;
257+ u32 bit_group_1 = 0 ;
207258 int err ;
208259 u32 * cs ;
209260
210261 err = mtl_dummy_pipe_control (rq );
211262 if (err )
212263 return err ;
213264
214- flags |= PIPE_CONTROL_TILE_CACHE_FLUSH ;
215- flags |= PIPE_CONTROL_FLUSH_L3 ;
216- flags |= PIPE_CONTROL_RENDER_TARGET_CACHE_FLUSH ;
217- flags |= PIPE_CONTROL_DEPTH_CACHE_FLUSH ;
265+ bit_group_0 |= PIPE_CONTROL0_HDC_PIPELINE_FLUSH ;
266+
267+ /*
268+ * When required, in MTL and beyond platforms we
269+ * need to set the CCS_FLUSH bit in the pipe control
270+ */
271+ if (GRAPHICS_VER_FULL (rq -> i915 ) >= IP_VER (12 , 70 ))
272+ bit_group_0 |= PIPE_CONTROL_CCS_FLUSH ;
273+
274+ bit_group_1 |= PIPE_CONTROL_TILE_CACHE_FLUSH ;
275+ bit_group_1 |= PIPE_CONTROL_FLUSH_L3 ;
276+ bit_group_1 |= PIPE_CONTROL_RENDER_TARGET_CACHE_FLUSH ;
277+ bit_group_1 |= PIPE_CONTROL_DEPTH_CACHE_FLUSH ;
218278 /* Wa_1409600907:tgl,adl-p */
219- flags |= PIPE_CONTROL_DEPTH_STALL ;
220- flags |= PIPE_CONTROL_DC_FLUSH_ENABLE ;
221- flags |= PIPE_CONTROL_FLUSH_ENABLE ;
279+ bit_group_1 |= PIPE_CONTROL_DEPTH_STALL ;
280+ bit_group_1 |= PIPE_CONTROL_DC_FLUSH_ENABLE ;
281+ bit_group_1 |= PIPE_CONTROL_FLUSH_ENABLE ;
222282
223- flags |= PIPE_CONTROL_STORE_DATA_INDEX ;
224- flags |= PIPE_CONTROL_QW_WRITE ;
283+ bit_group_1 |= PIPE_CONTROL_STORE_DATA_INDEX ;
284+ bit_group_1 |= PIPE_CONTROL_QW_WRITE ;
225285
226- flags |= PIPE_CONTROL_CS_STALL ;
286+ bit_group_1 |= PIPE_CONTROL_CS_STALL ;
227287
228288 if (!HAS_3D_PIPELINE (engine -> i915 ))
229- flags &= ~PIPE_CONTROL_3D_ARCH_FLAGS ;
289+ bit_group_1 &= ~PIPE_CONTROL_3D_ARCH_FLAGS ;
230290 else if (engine -> class == COMPUTE_CLASS )
231- flags &= ~PIPE_CONTROL_3D_ENGINE_FLAGS ;
291+ bit_group_1 &= ~PIPE_CONTROL_3D_ENGINE_FLAGS ;
232292
233293 cs = intel_ring_begin (rq , 6 );
234294 if (IS_ERR (cs ))
235295 return PTR_ERR (cs );
236296
237- cs = gen12_emit_pipe_control (cs ,
238- PIPE_CONTROL0_HDC_PIPELINE_FLUSH ,
239- flags , LRC_PPHWSP_SCRATCH_ADDR );
297+ cs = gen12_emit_pipe_control (cs , bit_group_0 , bit_group_1 ,
298+ LRC_PPHWSP_SCRATCH_ADDR );
240299 intel_ring_advance (rq , cs );
241300 }
242301
@@ -267,10 +326,9 @@ int gen12_emit_flush_rcs(struct i915_request *rq, u32 mode)
267326 else if (engine -> class == COMPUTE_CLASS )
268327 flags &= ~PIPE_CONTROL_3D_ENGINE_FLAGS ;
269328
270- if (!HAS_FLAT_CCS (rq -> engine -> i915 ))
271- count = 8 + 4 ;
272- else
273- count = 8 ;
329+ count = 8 ;
330+ if (gen12_needs_ccs_aux_inv (rq -> engine ))
331+ count += 8 ;
274332
275333 cs = intel_ring_begin (rq , count );
276334 if (IS_ERR (cs ))
@@ -285,11 +343,7 @@ int gen12_emit_flush_rcs(struct i915_request *rq, u32 mode)
285343
286344 cs = gen8_emit_pipe_control (cs , flags , LRC_PPHWSP_SCRATCH_ADDR );
287345
288- if (!HAS_FLAT_CCS (rq -> engine -> i915 )) {
289- /* hsdes: 1809175790 */
290- cs = gen12_emit_aux_table_inv (rq -> engine -> gt ,
291- cs , GEN12_GFX_CCS_AUX_NV );
292- }
346+ cs = gen12_emit_aux_table_inv (engine , cs );
293347
294348 * cs ++ = preparser_disable (false);
295349 intel_ring_advance (rq , cs );
@@ -300,21 +354,14 @@ int gen12_emit_flush_rcs(struct i915_request *rq, u32 mode)
300354
301355int gen12_emit_flush_xcs (struct i915_request * rq , u32 mode )
302356{
303- intel_engine_mask_t aux_inv = 0 ;
304- u32 cmd , * cs ;
357+ u32 cmd = 4 ;
358+ u32 * cs ;
305359
306- cmd = 4 ;
307360 if (mode & EMIT_INVALIDATE ) {
308361 cmd += 2 ;
309362
310- if (!HAS_FLAT_CCS (rq -> engine -> i915 ) &&
311- (rq -> engine -> class == VIDEO_DECODE_CLASS ||
312- rq -> engine -> class == VIDEO_ENHANCEMENT_CLASS )) {
313- aux_inv = rq -> engine -> mask &
314- ~GENMASK (_BCS (I915_MAX_BCS - 1 ), BCS0 );
315- if (aux_inv )
316- cmd += 4 ;
317- }
363+ if (gen12_needs_ccs_aux_inv (rq -> engine ))
364+ cmd += 8 ;
318365 }
319366
320367 cs = intel_ring_begin (rq , cmd );
@@ -338,21 +385,18 @@ int gen12_emit_flush_xcs(struct i915_request *rq, u32 mode)
338385 cmd |= MI_INVALIDATE_TLB ;
339386 if (rq -> engine -> class == VIDEO_DECODE_CLASS )
340387 cmd |= MI_INVALIDATE_BSD ;
388+
389+ if (gen12_needs_ccs_aux_inv (rq -> engine ) &&
390+ rq -> engine -> class == COPY_ENGINE_CLASS )
391+ cmd |= MI_FLUSH_DW_CCS ;
341392 }
342393
343394 * cs ++ = cmd ;
344395 * cs ++ = LRC_PPHWSP_SCRATCH_ADDR ;
345396 * cs ++ = 0 ; /* upper addr */
346397 * cs ++ = 0 ; /* value */
347398
348- if (aux_inv ) { /* hsdes: 1809175790 */
349- if (rq -> engine -> class == VIDEO_DECODE_CLASS )
350- cs = gen12_emit_aux_table_inv (rq -> engine -> gt ,
351- cs , GEN12_VD0_AUX_NV );
352- else
353- cs = gen12_emit_aux_table_inv (rq -> engine -> gt ,
354- cs , GEN12_VE0_AUX_NV );
355- }
399+ cs = gen12_emit_aux_table_inv (rq -> engine , cs );
356400
357401 if (mode & EMIT_INVALIDATE )
358402 * cs ++ = preparser_disable (false);
0 commit comments