Skip to content

Commit 994b0bc

Browse files
vsbelgaumgregkh
authored andcommitted
drm/xe/bmg: Update Wa_22019338487
[ Upstream commit 84c0b4a ] Limit GT max frequency to 2600MHz and wait for frequency to reduce before proceeding with a transient flush. This is really only needed for the transient flush: if L2 flush is needed due to 16023588340 then there's no need to do this additional wait since we are already using the bigger hammer. v2: Use generic names, ensure user set max frequency requests wait for flush to complete (Rodrigo) v3: - User requests wait via wait_var_event_timeout (Lucas) - Close races on flush + user requests (Lucas) - Fix xe_guc_pc_remove_flush_freq_limit() being called on last gt rather than root gt (Lucas) v4: - Only apply the freq reducing part if a TDF is needed: L2 flush trumps the need for waiting a lower frequency Fixes: aaa0807 ("drm/xe/bmg: Apply Wa_22019338487") Reviewed-by: Rodrigo Vivi <rodrigo.vivi@intel.com> Signed-off-by: Vinay Belgaumkar <vinay.belgaumkar@intel.com> Link: https://lore.kernel.org/r/20250618-wa-22019338487-v5-4-b888388477f2@intel.com Signed-off-by: Lucas De Marchi <lucas.demarchi@intel.com> (cherry picked from commit deea6a7) Signed-off-by: Lucas De Marchi <lucas.demarchi@intel.com> Signed-off-by: Sasha Levin <sashal@kernel.org>
1 parent beb89ad commit 994b0bc

File tree

4 files changed

+179
-24
lines changed

4 files changed

+179
-24
lines changed

drivers/gpu/drm/xe/xe_device.c

Lines changed: 31 additions & 24 deletions
Original file line numberDiff line numberDiff line change
@@ -37,6 +37,7 @@
3737
#include "xe_gt_printk.h"
3838
#include "xe_gt_sriov_vf.h"
3939
#include "xe_guc.h"
40+
#include "xe_guc_pc.h"
4041
#include "xe_hw_engine_group.h"
4142
#include "xe_hwmon.h"
4243
#include "xe_irq.h"
@@ -871,31 +872,37 @@ void xe_device_td_flush(struct xe_device *xe)
871872
if (!IS_DGFX(xe) || GRAPHICS_VER(xe) < 20)
872873
return;
873874

874-
if (XE_WA(xe_root_mmio_gt(xe), 16023588340)) {
875+
gt = xe_root_mmio_gt(xe);
876+
if (XE_WA(gt, 16023588340)) {
877+
/* A transient flush is not sufficient: flush the L2 */
875878
xe_device_l2_flush(xe);
876-
return;
877-
}
878-
879-
for_each_gt(gt, xe, id) {
880-
if (xe_gt_is_media_type(gt))
881-
continue;
882-
883-
if (xe_force_wake_get(gt_to_fw(gt), XE_FW_GT))
884-
return;
885-
886-
xe_mmio_write32(gt, XE2_TDF_CTRL, TRANSIENT_FLUSH_REQUEST);
887-
/*
888-
* FIXME: We can likely do better here with our choice of
889-
* timeout. Currently we just assume the worst case, i.e. 150us,
890-
* which is believed to be sufficient to cover the worst case
891-
* scenario on current platforms if all cache entries are
892-
* transient and need to be flushed..
893-
*/
894-
if (xe_mmio_wait32(gt, XE2_TDF_CTRL, TRANSIENT_FLUSH_REQUEST, 0,
895-
150, NULL, false))
896-
xe_gt_err_once(gt, "TD flush timeout\n");
897-
898-
xe_force_wake_put(gt_to_fw(gt), XE_FW_GT);
879+
} else {
880+
xe_guc_pc_apply_flush_freq_limit(&gt->uc.guc.pc);
881+
882+
/* Execute TDF flush on all graphics GTs */
883+
for_each_gt(gt, xe, id) {
884+
if (xe_gt_is_media_type(gt))
885+
continue;
886+
887+
if (xe_force_wake_get(gt_to_fw(gt), XE_FW_GT))
888+
return;
889+
890+
xe_mmio_write32(gt, XE2_TDF_CTRL, TRANSIENT_FLUSH_REQUEST);
891+
/*
892+
* FIXME: We can likely do better here with our choice of
893+
* timeout. Currently we just assume the worst case, i.e. 150us,
894+
* which is believed to be sufficient to cover the worst case
895+
* scenario on current platforms if all cache entries are
896+
* transient and need to be flushed..
897+
*/
898+
if (xe_mmio_wait32(gt, XE2_TDF_CTRL, TRANSIENT_FLUSH_REQUEST, 0,
899+
150, NULL, false))
900+
xe_gt_err_once(gt, "TD flush timeout\n");
901+
902+
xe_force_wake_put(gt_to_fw(gt), XE_FW_GT);
903+
}
904+
905+
xe_guc_pc_remove_flush_freq_limit(&xe_root_mmio_gt(xe)->uc.guc.pc);
899906
}
900907
}
901908

drivers/gpu/drm/xe/xe_guc_pc.c

Lines changed: 144 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -6,6 +6,9 @@
66
#include "xe_guc_pc.h"
77

88
#include <linux/delay.h>
9+
#include <linux/jiffies.h>
10+
#include <linux/ktime.h>
11+
#include <linux/wait_bit.h>
912

1013
#include <drm/drm_managed.h>
1114
#include <generated/xe_wa_oob.h>
@@ -47,6 +50,12 @@
4750

4851
#define LNL_MERT_FREQ_CAP 800
4952
#define BMG_MERT_FREQ_CAP 2133
53+
#define BMG_MIN_FREQ 1200
54+
#define BMG_MERT_FLUSH_FREQ_CAP 2600
55+
56+
#define SLPC_RESET_TIMEOUT_MS 5 /* roughly 5ms, but no need for precision */
57+
#define SLPC_RESET_EXTENDED_TIMEOUT_MS 1000 /* To be used only at pc_start */
58+
#define SLPC_ACT_FREQ_TIMEOUT_MS 100
5059

5160
/**
5261
* DOC: GuC Power Conservation (PC)
@@ -133,6 +142,36 @@ static int wait_for_pc_state(struct xe_guc_pc *pc,
133142
return -ETIMEDOUT;
134143
}
135144

145+
static int wait_for_flush_complete(struct xe_guc_pc *pc)
146+
{
147+
const unsigned long timeout = msecs_to_jiffies(30);
148+
149+
if (!wait_var_event_timeout(&pc->flush_freq_limit,
150+
!atomic_read(&pc->flush_freq_limit),
151+
timeout))
152+
return -ETIMEDOUT;
153+
154+
return 0;
155+
}
156+
157+
static int wait_for_act_freq_limit(struct xe_guc_pc *pc, u32 freq)
158+
{
159+
int timeout_us = SLPC_ACT_FREQ_TIMEOUT_MS * USEC_PER_MSEC;
160+
int slept, wait = 10;
161+
162+
for (slept = 0; slept < timeout_us;) {
163+
if (xe_guc_pc_get_act_freq(pc) <= freq)
164+
return 0;
165+
166+
usleep_range(wait, wait << 1);
167+
slept += wait;
168+
wait <<= 1;
169+
if (slept + wait > timeout_us)
170+
wait = timeout_us - slept;
171+
}
172+
173+
return -ETIMEDOUT;
174+
}
136175
static int pc_action_reset(struct xe_guc_pc *pc)
137176
{
138177
struct xe_guc_ct *ct = pc_to_ct(pc);
@@ -584,6 +623,11 @@ int xe_guc_pc_set_max_freq(struct xe_guc_pc *pc, u32 freq)
584623
{
585624
int ret;
586625

626+
if (XE_WA(pc_to_gt(pc), 22019338487)) {
627+
if (wait_for_flush_complete(pc) != 0)
628+
return -EAGAIN;
629+
}
630+
587631
mutex_lock(&pc->freq_lock);
588632
if (!pc->freq_ready) {
589633
/* Might be in the middle of a gt reset */
@@ -793,6 +837,106 @@ static int pc_adjust_requested_freq(struct xe_guc_pc *pc)
793837
return ret;
794838
}
795839

840+
static bool needs_flush_freq_limit(struct xe_guc_pc *pc)
841+
{
842+
struct xe_gt *gt = pc_to_gt(pc);
843+
844+
return XE_WA(gt, 22019338487) &&
845+
pc->rp0_freq > BMG_MERT_FLUSH_FREQ_CAP;
846+
}
847+
848+
/**
849+
* xe_guc_pc_apply_flush_freq_limit() - Limit max GT freq during L2 flush
850+
* @pc: the xe_guc_pc object
851+
*
852+
* As per the WA, reduce max GT frequency during L2 cache flush
853+
*/
854+
void xe_guc_pc_apply_flush_freq_limit(struct xe_guc_pc *pc)
855+
{
856+
struct xe_gt *gt = pc_to_gt(pc);
857+
u32 max_freq;
858+
int ret;
859+
860+
if (!needs_flush_freq_limit(pc))
861+
return;
862+
863+
mutex_lock(&pc->freq_lock);
864+
865+
if (!pc->freq_ready) {
866+
mutex_unlock(&pc->freq_lock);
867+
return;
868+
}
869+
870+
ret = pc_action_query_task_state(pc);
871+
if (ret) {
872+
mutex_unlock(&pc->freq_lock);
873+
return;
874+
}
875+
876+
max_freq = pc_get_max_freq(pc);
877+
if (max_freq > BMG_MERT_FLUSH_FREQ_CAP) {
878+
ret = pc_set_max_freq(pc, BMG_MERT_FLUSH_FREQ_CAP);
879+
if (ret) {
880+
xe_gt_err_once(gt, "Failed to cap max freq on flush to %u, %pe\n",
881+
BMG_MERT_FLUSH_FREQ_CAP, ERR_PTR(ret));
882+
mutex_unlock(&pc->freq_lock);
883+
return;
884+
}
885+
886+
atomic_set(&pc->flush_freq_limit, 1);
887+
888+
/*
889+
* If user has previously changed max freq, stash that value to
890+
* restore later, otherwise use the current max. New user
891+
* requests wait on flush.
892+
*/
893+
if (pc->user_requested_max != 0)
894+
pc->stashed_max_freq = pc->user_requested_max;
895+
else
896+
pc->stashed_max_freq = max_freq;
897+
}
898+
899+
mutex_unlock(&pc->freq_lock);
900+
901+
/*
902+
* Wait for actual freq to go below the flush cap: even if the previous
903+
* max was below cap, the current one might still be above it
904+
*/
905+
ret = wait_for_act_freq_limit(pc, BMG_MERT_FLUSH_FREQ_CAP);
906+
if (ret)
907+
xe_gt_err_once(gt, "Actual freq did not reduce to %u, %pe\n",
908+
BMG_MERT_FLUSH_FREQ_CAP, ERR_PTR(ret));
909+
}
910+
911+
/**
912+
* xe_guc_pc_remove_flush_freq_limit() - Remove max GT freq limit after L2 flush completes.
913+
* @pc: the xe_guc_pc object
914+
*
915+
* Retrieve the previous GT max frequency value.
916+
*/
917+
void xe_guc_pc_remove_flush_freq_limit(struct xe_guc_pc *pc)
918+
{
919+
struct xe_gt *gt = pc_to_gt(pc);
920+
int ret = 0;
921+
922+
if (!needs_flush_freq_limit(pc))
923+
return;
924+
925+
if (!atomic_read(&pc->flush_freq_limit))
926+
return;
927+
928+
mutex_lock(&pc->freq_lock);
929+
930+
ret = pc_set_max_freq(&gt->uc.guc.pc, pc->stashed_max_freq);
931+
if (ret)
932+
xe_gt_err_once(gt, "Failed to restore max freq %u:%d",
933+
pc->stashed_max_freq, ret);
934+
935+
atomic_set(&pc->flush_freq_limit, 0);
936+
mutex_unlock(&pc->freq_lock);
937+
wake_up_var(&pc->flush_freq_limit);
938+
}
939+
796940
static int pc_set_mert_freq_cap(struct xe_guc_pc *pc)
797941
{
798942
int ret = 0;

drivers/gpu/drm/xe/xe_guc_pc.h

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -34,5 +34,7 @@ u64 xe_guc_pc_mc6_residency(struct xe_guc_pc *pc);
3434
void xe_guc_pc_init_early(struct xe_guc_pc *pc);
3535
int xe_guc_pc_restore_stashed_freq(struct xe_guc_pc *pc);
3636
void xe_guc_pc_raise_unslice(struct xe_guc_pc *pc);
37+
void xe_guc_pc_apply_flush_freq_limit(struct xe_guc_pc *pc);
38+
void xe_guc_pc_remove_flush_freq_limit(struct xe_guc_pc *pc);
3739

3840
#endif /* _XE_GUC_PC_H_ */

drivers/gpu/drm/xe/xe_guc_pc_types.h

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -15,6 +15,8 @@
1515
struct xe_guc_pc {
1616
/** @bo: GGTT buffer object that is shared with GuC PC */
1717
struct xe_bo *bo;
18+
/** @flush_freq_limit: 1 when max freq changes are limited by driver */
19+
atomic_t flush_freq_limit;
1820
/** @rp0_freq: HW RP0 frequency - The Maximum one */
1921
u32 rp0_freq;
2022
/** @rpe_freq: HW RPe frequency - The Efficient one */

0 commit comments

Comments
 (0)