Skip to content

Commit 6e25450

Browse files
trusinowiczgregkh
authored andcommitted
accel/ivpu: Add FW state dump on TDR
[ Upstream commit 5e162f8 ] Send JSM state dump message at the beginning of TDR handler. This allows FW to collect debug info in the FW log before the state of the NPU is lost allowing to analyze the cause of a TDR. Wait a predefined timeout (10 ms) so the FW has a chance to write debug logs. We cannot wait for JSM response at this point because IRQs are already disabled before TDR handler is invoked. Signed-off-by: Tomasz Rusinowicz <tomasz.rusinowicz@intel.com> Reviewed-by: Jacek Lawrynowicz <jacek.lawrynowicz@linux.intel.com> Link: https://patchwork.freedesktop.org/patch/msgid/20240930195322.461209-9-jacek.lawrynowicz@linux.intel.com Signed-off-by: Jacek Lawrynowicz <jacek.lawrynowicz@linux.intel.com> Stable-dep-of: 41a2d82 ("accel/ivpu: Fix error handling in recovery/reset") Signed-off-by: Sasha Levin <sashal@kernel.org>
1 parent 509662f commit 6e25450

File tree

7 files changed

+43
-0
lines changed

7 files changed

+43
-0
lines changed

drivers/accel/ivpu/ivpu_drv.h

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -152,6 +152,7 @@ struct ivpu_device {
152152
int tdr;
153153
int autosuspend;
154154
int d0i3_entry_msg;
155+
int state_dump_msg;
155156
} timeout;
156157
};
157158

drivers/accel/ivpu/ivpu_hw.c

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -89,12 +89,14 @@ static void timeouts_init(struct ivpu_device *vdev)
8989
vdev->timeout.tdr = 2000000;
9090
vdev->timeout.autosuspend = -1;
9191
vdev->timeout.d0i3_entry_msg = 500;
92+
vdev->timeout.state_dump_msg = 10;
9293
} else if (ivpu_is_simics(vdev)) {
9394
vdev->timeout.boot = 50;
9495
vdev->timeout.jsm = 500;
9596
vdev->timeout.tdr = 10000;
9697
vdev->timeout.autosuspend = -1;
9798
vdev->timeout.d0i3_entry_msg = 100;
99+
vdev->timeout.state_dump_msg = 10;
98100
} else {
99101
vdev->timeout.boot = 1000;
100102
vdev->timeout.jsm = 500;
@@ -104,6 +106,7 @@ static void timeouts_init(struct ivpu_device *vdev)
104106
else
105107
vdev->timeout.autosuspend = 100;
106108
vdev->timeout.d0i3_entry_msg = 5;
109+
vdev->timeout.state_dump_msg = 10;
107110
}
108111
}
109112

drivers/accel/ivpu/ivpu_ipc.c

Lines changed: 26 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -353,6 +353,32 @@ int ivpu_ipc_send_receive(struct ivpu_device *vdev, struct vpu_jsm_msg *req,
353353
return ret;
354354
}
355355

356+
int ivpu_ipc_send_and_wait(struct ivpu_device *vdev, struct vpu_jsm_msg *req,
357+
u32 channel, unsigned long timeout_ms)
358+
{
359+
struct ivpu_ipc_consumer cons;
360+
int ret;
361+
362+
ret = ivpu_rpm_get(vdev);
363+
if (ret < 0)
364+
return ret;
365+
366+
ivpu_ipc_consumer_add(vdev, &cons, channel, NULL);
367+
368+
ret = ivpu_ipc_send(vdev, &cons, req);
369+
if (ret) {
370+
ivpu_warn_ratelimited(vdev, "IPC send failed: %d\n", ret);
371+
goto consumer_del;
372+
}
373+
374+
msleep(timeout_ms);
375+
376+
consumer_del:
377+
ivpu_ipc_consumer_del(vdev, &cons);
378+
ivpu_rpm_put(vdev);
379+
return ret;
380+
}
381+
356382
static bool
357383
ivpu_ipc_match_consumer(struct ivpu_device *vdev, struct ivpu_ipc_consumer *cons,
358384
struct ivpu_ipc_hdr *ipc_hdr, struct vpu_jsm_msg *jsm_msg)

drivers/accel/ivpu/ivpu_ipc.h

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -107,5 +107,7 @@ int ivpu_ipc_send_receive_internal(struct ivpu_device *vdev, struct vpu_jsm_msg
107107
int ivpu_ipc_send_receive(struct ivpu_device *vdev, struct vpu_jsm_msg *req,
108108
enum vpu_ipc_msg_type expected_resp, struct vpu_jsm_msg *resp,
109109
u32 channel, unsigned long timeout_ms);
110+
int ivpu_ipc_send_and_wait(struct ivpu_device *vdev, struct vpu_jsm_msg *req,
111+
u32 channel, unsigned long timeout_ms);
110112

111113
#endif /* __IVPU_IPC_H__ */

drivers/accel/ivpu/ivpu_jsm_msg.c

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -555,3 +555,11 @@ int ivpu_jsm_dct_disable(struct ivpu_device *vdev)
555555
return ivpu_ipc_send_receive_internal(vdev, &req, VPU_JSM_MSG_DCT_DISABLE_DONE, &resp,
556556
VPU_IPC_CHAN_ASYNC_CMD, vdev->timeout.jsm);
557557
}
558+
559+
int ivpu_jsm_state_dump(struct ivpu_device *vdev)
560+
{
561+
struct vpu_jsm_msg req = { .type = VPU_JSM_MSG_STATE_DUMP };
562+
563+
return ivpu_ipc_send_and_wait(vdev, &req, VPU_IPC_CHAN_ASYNC_CMD,
564+
vdev->timeout.state_dump_msg);
565+
}

drivers/accel/ivpu/ivpu_jsm_msg.h

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -43,4 +43,6 @@ int ivpu_jsm_metric_streamer_info(struct ivpu_device *vdev, u64 metric_group_mas
4343
u64 buffer_size, u32 *sample_size, u64 *info_size);
4444
int ivpu_jsm_dct_enable(struct ivpu_device *vdev, u32 active_us, u32 inactive_us);
4545
int ivpu_jsm_dct_disable(struct ivpu_device *vdev);
46+
int ivpu_jsm_state_dump(struct ivpu_device *vdev);
47+
4648
#endif

drivers/accel/ivpu/ivpu_pm.c

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -124,6 +124,7 @@ static void ivpu_pm_recovery_work(struct work_struct *work)
124124
if (ret)
125125
ivpu_err(vdev, "Failed to resume NPU: %d\n", ret);
126126

127+
ivpu_jsm_state_dump(vdev);
127128
ivpu_dev_coredump(vdev);
128129

129130
atomic_inc(&vdev->pm->reset_counter);

0 commit comments

Comments
 (0)