Skip to content

Commit 079ae51

Browse files
jokim-amdalexdeucher
authored andcommitted
drm/amdkfd: fix suspend/resume all calls in mes based eviction path
Suspend/resume all gangs should be done with the device lock is held. Signed-off-by: Jonathan Kim <jonathan.kim@amd.com> Acked-by: Alex Deucher <alexander.deucher@amd.com> Reviewed-by: Harish Kasiviswanathan <harish.kasiviswanathan@amd.com> Signed-off-by: Alex Deucher <alexander.deucher@amd.com>
1 parent 277bb0f commit 079ae51

File tree

1 file changed

+21
-52
lines changed

1 file changed

+21
-52
lines changed

drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c

Lines changed: 21 additions & 52 deletions
Original file line numberDiff line numberDiff line change
@@ -1209,6 +1209,15 @@ static int evict_process_queues_cpsch(struct device_queue_manager *dqm,
12091209
pr_debug_ratelimited("Evicting process pid %d queues\n",
12101210
pdd->process->lead_thread->pid);
12111211

1212+
if (dqm->dev->kfd->shared_resources.enable_mes) {
1213+
pdd->last_evict_timestamp = get_jiffies_64();
1214+
retval = suspend_all_queues_mes(dqm);
1215+
if (retval) {
1216+
dev_err(dev, "Suspending all queues failed");
1217+
goto out;
1218+
}
1219+
}
1220+
12121221
/* Mark all queues as evicted. Deactivate all active queues on
12131222
* the qpd.
12141223
*/
@@ -1221,23 +1230,27 @@ static int evict_process_queues_cpsch(struct device_queue_manager *dqm,
12211230
decrement_queue_count(dqm, qpd, q);
12221231

12231232
if (dqm->dev->kfd->shared_resources.enable_mes) {
1224-
int err;
1225-
1226-
err = remove_queue_mes(dqm, q, qpd);
1227-
if (err) {
1233+
retval = remove_queue_mes(dqm, q, qpd);
1234+
if (retval) {
12281235
dev_err(dev, "Failed to evict queue %d\n",
12291236
q->properties.queue_id);
1230-
retval = err;
1237+
goto out;
12311238
}
12321239
}
12331240
}
1234-
pdd->last_evict_timestamp = get_jiffies_64();
1235-
if (!dqm->dev->kfd->shared_resources.enable_mes)
1241+
1242+
if (!dqm->dev->kfd->shared_resources.enable_mes) {
1243+
pdd->last_evict_timestamp = get_jiffies_64();
12361244
retval = execute_queues_cpsch(dqm,
12371245
qpd->is_debug ?
12381246
KFD_UNMAP_QUEUES_FILTER_ALL_QUEUES :
12391247
KFD_UNMAP_QUEUES_FILTER_DYNAMIC_QUEUES, 0,
12401248
USE_DEFAULT_GRACE_PERIOD);
1249+
} else {
1250+
retval = resume_all_queues_mes(dqm);
1251+
if (retval)
1252+
dev_err(dev, "Resuming all queues failed");
1253+
}
12411254

12421255
out:
12431256
dqm_unlock(dqm);
@@ -3098,61 +3111,17 @@ int kfd_dqm_suspend_bad_queue_mes(struct kfd_node *knode, u32 pasid, u32 doorbel
30983111
return ret;
30993112
}
31003113

3101-
static int kfd_dqm_evict_pasid_mes(struct device_queue_manager *dqm,
3102-
struct qcm_process_device *qpd)
3103-
{
3104-
struct device *dev = dqm->dev->adev->dev;
3105-
int ret = 0;
3106-
3107-
/* Check if process is already evicted */
3108-
dqm_lock(dqm);
3109-
if (qpd->evicted) {
3110-
/* Increment the evicted count to make sure the
3111-
* process stays evicted before its terminated.
3112-
*/
3113-
qpd->evicted++;
3114-
dqm_unlock(dqm);
3115-
goto out;
3116-
}
3117-
dqm_unlock(dqm);
3118-
3119-
ret = suspend_all_queues_mes(dqm);
3120-
if (ret) {
3121-
dev_err(dev, "Suspending all queues failed");
3122-
goto out;
3123-
}
3124-
3125-
ret = dqm->ops.evict_process_queues(dqm, qpd);
3126-
if (ret) {
3127-
dev_err(dev, "Evicting process queues failed");
3128-
goto out;
3129-
}
3130-
3131-
ret = resume_all_queues_mes(dqm);
3132-
if (ret)
3133-
dev_err(dev, "Resuming all queues failed");
3134-
3135-
out:
3136-
return ret;
3137-
}
3138-
31393114
int kfd_evict_process_device(struct kfd_process_device *pdd)
31403115
{
31413116
struct device_queue_manager *dqm;
31423117
struct kfd_process *p;
3143-
int ret = 0;
31443118

31453119
p = pdd->process;
31463120
dqm = pdd->dev->dqm;
31473121

31483122
WARN(debug_evictions, "Evicting pid %d", p->lead_thread->pid);
31493123

3150-
if (dqm->dev->kfd->shared_resources.enable_mes)
3151-
ret = kfd_dqm_evict_pasid_mes(dqm, &pdd->qpd);
3152-
else
3153-
ret = dqm->ops.evict_process_queues(dqm, &pdd->qpd);
3154-
3155-
return ret;
3124+
return dqm->ops.evict_process_queues(dqm, &pdd->qpd);
31563125
}
31573126

31583127
int reserve_debug_trap_vmid(struct device_queue_manager *dqm,

0 commit comments

Comments
 (0)