Skip to content

Commit 639fc79

Browse files
jlawrynogregkh
authored andcommitted
accel/ivpu: Fix error handling in recovery/reset
[ Upstream commit 41a2d82 ] Disable runtime PM for the duration of reset/recovery so it is possible to set the correct runtime PM state depending on the outcome of the `ivpu_resume()`. Don’t suspend or reset the HW if the NPU is suspended when the reset/recovery is requested. Also, move common reset/recovery code to separate functions for better code readability. Fixes: 27d1926 ("accel/ivpu: Improve recovery and reset support") Cc: stable@vger.kernel.org # v6.8+ Reviewed-by: Maciej Falkowski <maciej.falkowski@linux.intel.com> Reviewed-by: Jeffrey Hugo <quic_jhugo@quicinc.com> Signed-off-by: Jacek Lawrynowicz <jacek.lawrynowicz@linux.intel.com> Link: https://patchwork.freedesktop.org/patch/msgid/20250129124009.1039982-4-jacek.lawrynowicz@linux.intel.com Signed-off-by: Sasha Levin <sashal@kernel.org>
1 parent 6e25450 commit 639fc79

File tree

1 file changed

+43
-36
lines changed

1 file changed

+43
-36
lines changed

drivers/accel/ivpu/ivpu_pm.c

Lines changed: 43 additions & 36 deletions
Original file line numberDiff line numberDiff line change
@@ -111,41 +111,57 @@ static int ivpu_resume(struct ivpu_device *vdev)
111111
return ret;
112112
}
113113

114-
static void ivpu_pm_recovery_work(struct work_struct *work)
114+
static void ivpu_pm_reset_begin(struct ivpu_device *vdev)
115115
{
116-
struct ivpu_pm_info *pm = container_of(work, struct ivpu_pm_info, recovery_work);
117-
struct ivpu_device *vdev = pm->vdev;
118-
char *evt[2] = {"IVPU_PM_EVENT=IVPU_RECOVER", NULL};
119-
int ret;
120-
121-
ivpu_err(vdev, "Recovering the NPU (reset #%d)\n", atomic_read(&vdev->pm->reset_counter));
122-
123-
ret = pm_runtime_resume_and_get(vdev->drm.dev);
124-
if (ret)
125-
ivpu_err(vdev, "Failed to resume NPU: %d\n", ret);
126-
127-
ivpu_jsm_state_dump(vdev);
128-
ivpu_dev_coredump(vdev);
116+
pm_runtime_disable(vdev->drm.dev);
129117

130118
atomic_inc(&vdev->pm->reset_counter);
131119
atomic_set(&vdev->pm->reset_pending, 1);
132120
down_write(&vdev->pm->reset_lock);
121+
}
122+
123+
static void ivpu_pm_reset_complete(struct ivpu_device *vdev)
124+
{
125+
int ret;
133126

134-
ivpu_suspend(vdev);
135127
ivpu_pm_prepare_cold_boot(vdev);
136128
ivpu_jobs_abort_all(vdev);
137129
ivpu_ms_cleanup_all(vdev);
138130

139131
ret = ivpu_resume(vdev);
140-
if (ret)
132+
if (ret) {
141133
ivpu_err(vdev, "Failed to resume NPU: %d\n", ret);
134+
pm_runtime_set_suspended(vdev->drm.dev);
135+
} else {
136+
pm_runtime_set_active(vdev->drm.dev);
137+
}
142138

143139
up_write(&vdev->pm->reset_lock);
144140
atomic_set(&vdev->pm->reset_pending, 0);
145141

146-
kobject_uevent_env(&vdev->drm.dev->kobj, KOBJ_CHANGE, evt);
147142
pm_runtime_mark_last_busy(vdev->drm.dev);
148-
pm_runtime_put_autosuspend(vdev->drm.dev);
143+
pm_runtime_enable(vdev->drm.dev);
144+
}
145+
146+
static void ivpu_pm_recovery_work(struct work_struct *work)
147+
{
148+
struct ivpu_pm_info *pm = container_of(work, struct ivpu_pm_info, recovery_work);
149+
struct ivpu_device *vdev = pm->vdev;
150+
char *evt[2] = {"IVPU_PM_EVENT=IVPU_RECOVER", NULL};
151+
152+
ivpu_err(vdev, "Recovering the NPU (reset #%d)\n", atomic_read(&vdev->pm->reset_counter));
153+
154+
ivpu_pm_reset_begin(vdev);
155+
156+
if (!pm_runtime_status_suspended(vdev->drm.dev)) {
157+
ivpu_jsm_state_dump(vdev);
158+
ivpu_dev_coredump(vdev);
159+
ivpu_suspend(vdev);
160+
}
161+
162+
ivpu_pm_reset_complete(vdev);
163+
164+
kobject_uevent_env(&vdev->drm.dev->kobj, KOBJ_CHANGE, evt);
149165
}
150166

151167
void ivpu_pm_trigger_recovery(struct ivpu_device *vdev, const char *reason)
@@ -316,35 +332,26 @@ void ivpu_pm_reset_prepare_cb(struct pci_dev *pdev)
316332
struct ivpu_device *vdev = pci_get_drvdata(pdev);
317333

318334
ivpu_dbg(vdev, PM, "Pre-reset..\n");
319-
atomic_inc(&vdev->pm->reset_counter);
320-
atomic_set(&vdev->pm->reset_pending, 1);
321335

322-
pm_runtime_get_sync(vdev->drm.dev);
323-
down_write(&vdev->pm->reset_lock);
324-
ivpu_prepare_for_reset(vdev);
325-
ivpu_hw_reset(vdev);
326-
ivpu_pm_prepare_cold_boot(vdev);
327-
ivpu_jobs_abort_all(vdev);
328-
ivpu_ms_cleanup_all(vdev);
336+
ivpu_pm_reset_begin(vdev);
337+
338+
if (!pm_runtime_status_suspended(vdev->drm.dev)) {
339+
ivpu_prepare_for_reset(vdev);
340+
ivpu_hw_reset(vdev);
341+
}
329342

330343
ivpu_dbg(vdev, PM, "Pre-reset done.\n");
331344
}
332345

333346
void ivpu_pm_reset_done_cb(struct pci_dev *pdev)
334347
{
335348
struct ivpu_device *vdev = pci_get_drvdata(pdev);
336-
int ret;
337349

338350
ivpu_dbg(vdev, PM, "Post-reset..\n");
339-
ret = ivpu_resume(vdev);
340-
if (ret)
341-
ivpu_err(vdev, "Failed to set RESUME state: %d\n", ret);
342-
up_write(&vdev->pm->reset_lock);
343-
atomic_set(&vdev->pm->reset_pending, 0);
344-
ivpu_dbg(vdev, PM, "Post-reset done.\n");
345351

346-
pm_runtime_mark_last_busy(vdev->drm.dev);
347-
pm_runtime_put_autosuspend(vdev->drm.dev);
352+
ivpu_pm_reset_complete(vdev);
353+
354+
ivpu_dbg(vdev, PM, "Post-reset done.\n");
348355
}
349356

350357
void ivpu_pm_init(struct ivpu_device *vdev)

0 commit comments

Comments
 (0)