Skip to content

Commit 284fc30

Browse files
committed
Merge tag 'drm-next-2025-10-11-1' of https://gitlab.freedesktop.org/drm/kernel
Pull more drm fixes from Dave Airlie: "Just the follow up fixes for rc1 from the next branch, amdgpu and xe mostly with a single v3d fix in there. amdgpu: - DC DCE6 fixes - GPU reset fixes - Secure diplay messaging cleanup - MES fix - GPUVM locking fixes - PMFW messaging cleanup - PCI US/DS switch handling fix - VCN queue reset fix - DC FPU handling fix - DCN 3.5 fix - DC mirroring fix amdkfd: - Fix kfd process ref leak - mmap write lock handling fix - Fix comments in IOCTL xe: - Fix build with clang 16 - Fix handling of invalid configfs syntax usage and spell out the expected syntax in the documentation - Do not try late bind firmware when running as VF since it shouldn't handle firmware loading - Fix idle assertion for local BOs - Fix uninitialized variable for late binding - Do not require perfmon_capable to expose free memory at page granularity. Handle it like other drm drivers do - Fix lock handling on suspend error path - Fix I2C controller resume after S3 v3d: - fix fence locking" * tag 'drm-next-2025-10-11-1' of https://gitlab.freedesktop.org/drm/kernel: (34 commits) drm/amd/display: Incorrect Mirror Cositing drm/amd/display: Enable Dynamic DTBCLK Switch drm/amdgpu: Report individual reset error drm/amdgpu: partially revert "revert to old status lock handling v3" drm/amd/display: Fix unsafe uses of kernel mode FPU drm/amd/pm: Disable VCN queue reset on SMU v13.0.6 due to regression drm/amdgpu: Fix general protection fault in amdgpu_vm_bo_reset_state_machine drm/amdgpu: Check swus/ds for switch state save drm/amdkfd: Fix two comments in kfd_ioctl.h drm/amd/pm: Avoid interface mismatch messaging drm/amdgpu: Merge amdgpu_vm_set_pasid into amdgpu_vm_init drm/amd/amdgpu: Fix the mes version that support inv_tlbs drm/amd: Check whether secure display TA loaded successfully drm/amdkfd: Fix mmap write lock not release drm/amdkfd: Fix kfd process ref leaking when userptr unmapping drm/amdgpu: Fix for GPU reset being blocked by KIQ I/O. drm/amd/display: Disable scaling on DCE6 for now drm/amd/display: Properly disable scaling on DCE6 drm/amd/display: Properly clear SCL_*_FILTER_CONTROL on DCE6 drm/amd/display: Add missing DCE6 SCL_HORZ_FILTER_INIT* SRIs ...
2 parents 1e5d41b + c4b6ddc commit 284fc30

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

45 files changed

+391
-253
lines changed

drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c

Lines changed: 7 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -2586,12 +2586,17 @@ static int update_invalid_user_pages(struct amdkfd_process_info *process_info,
25862586
* from the KFD, trigger a segmentation fault in VM debug mode.
25872587
*/
25882588
if (amdgpu_ttm_adev(bo->tbo.bdev)->debug_vm_userptr) {
2589+
struct kfd_process *p;
2590+
25892591
pr_err("Pid %d unmapped memory before destroying userptr at GPU addr 0x%llx\n",
25902592
pid_nr(process_info->pid), mem->va);
25912593

25922594
// Send GPU VM fault to user space
2593-
kfd_signal_vm_fault_event_with_userptr(kfd_lookup_process_by_pid(process_info->pid),
2594-
mem->va);
2595+
p = kfd_lookup_process_by_pid(process_info->pid);
2596+
if (p) {
2597+
kfd_signal_vm_fault_event_with_userptr(p, mem->va);
2598+
kfd_unref_process(p);
2599+
}
25952600
}
25962601

25972602
ret = 0;

drivers/gpu/drm/amd/amdgpu/amdgpu_device.c

Lines changed: 30 additions & 18 deletions
Original file line numberDiff line numberDiff line change
@@ -6389,23 +6389,28 @@ static int amdgpu_device_sched_resume(struct list_head *device_list,
63896389
if (!drm_drv_uses_atomic_modeset(adev_to_drm(tmp_adev)) && !job_signaled)
63906390
drm_helper_resume_force_mode(adev_to_drm(tmp_adev));
63916391

6392-
if (tmp_adev->asic_reset_res)
6393-
r = tmp_adev->asic_reset_res;
6394-
6395-
tmp_adev->asic_reset_res = 0;
6396-
6397-
if (r) {
6392+
if (tmp_adev->asic_reset_res) {
63986393
/* bad news, how to tell it to userspace ?
63996394
* for ras error, we should report GPU bad status instead of
64006395
* reset failure
64016396
*/
64026397
if (reset_context->src != AMDGPU_RESET_SRC_RAS ||
64036398
!amdgpu_ras_eeprom_check_err_threshold(tmp_adev))
6404-
dev_info(tmp_adev->dev, "GPU reset(%d) failed\n",
6405-
atomic_read(&tmp_adev->gpu_reset_counter));
6406-
amdgpu_vf_error_put(tmp_adev, AMDGIM_ERROR_VF_GPU_RESET_FAIL, 0, r);
6399+
dev_info(
6400+
tmp_adev->dev,
6401+
"GPU reset(%d) failed with error %d \n",
6402+
atomic_read(
6403+
&tmp_adev->gpu_reset_counter),
6404+
tmp_adev->asic_reset_res);
6405+
amdgpu_vf_error_put(tmp_adev,
6406+
AMDGIM_ERROR_VF_GPU_RESET_FAIL, 0,
6407+
tmp_adev->asic_reset_res);
6408+
if (!r)
6409+
r = tmp_adev->asic_reset_res;
6410+
tmp_adev->asic_reset_res = 0;
64076411
} else {
6408-
dev_info(tmp_adev->dev, "GPU reset(%d) succeeded!\n", atomic_read(&tmp_adev->gpu_reset_counter));
6412+
dev_info(tmp_adev->dev, "GPU reset(%d) succeeded!\n",
6413+
atomic_read(&tmp_adev->gpu_reset_counter));
64096414
if (amdgpu_acpi_smart_shift_update(tmp_adev,
64106415
AMDGPU_SS_DEV_D0))
64116416
dev_warn(tmp_adev->dev,
@@ -7157,28 +7162,35 @@ void amdgpu_pci_resume(struct pci_dev *pdev)
71577162

71587163
static void amdgpu_device_cache_switch_state(struct amdgpu_device *adev)
71597164
{
7160-
struct pci_dev *parent = pci_upstream_bridge(adev->pdev);
7165+
struct pci_dev *swus, *swds;
71617166
int r;
71627167

7163-
if (!parent || parent->vendor != PCI_VENDOR_ID_ATI)
7168+
swds = pci_upstream_bridge(adev->pdev);
7169+
if (!swds || swds->vendor != PCI_VENDOR_ID_ATI ||
7170+
pci_pcie_type(swds) != PCI_EXP_TYPE_DOWNSTREAM)
7171+
return;
7172+
swus = pci_upstream_bridge(swds);
7173+
if (!swus ||
7174+
(swus->vendor != PCI_VENDOR_ID_ATI &&
7175+
swus->vendor != PCI_VENDOR_ID_AMD) ||
7176+
pci_pcie_type(swus) != PCI_EXP_TYPE_UPSTREAM)
71647177
return;
71657178

71667179
/* If already saved, return */
71677180
if (adev->pcie_reset_ctx.swus)
71687181
return;
71697182
/* Upstream bridge is ATI, assume it's SWUS/DS architecture */
7170-
r = pci_save_state(parent);
7183+
r = pci_save_state(swds);
71717184
if (r)
71727185
return;
7173-
adev->pcie_reset_ctx.swds_pcistate = pci_store_saved_state(parent);
7186+
adev->pcie_reset_ctx.swds_pcistate = pci_store_saved_state(swds);
71747187

7175-
parent = pci_upstream_bridge(parent);
7176-
r = pci_save_state(parent);
7188+
r = pci_save_state(swus);
71777189
if (r)
71787190
return;
7179-
adev->pcie_reset_ctx.swus_pcistate = pci_store_saved_state(parent);
7191+
adev->pcie_reset_ctx.swus_pcistate = pci_store_saved_state(swus);
71807192

7181-
adev->pcie_reset_ctx.swus = parent;
7193+
adev->pcie_reset_ctx.swus = swus;
71827194
}
71837195

71847196
static void amdgpu_device_load_switch_state(struct amdgpu_device *adev)

drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.c

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1102,6 +1102,9 @@ uint32_t amdgpu_kiq_rreg(struct amdgpu_device *adev, uint32_t reg, uint32_t xcc_
11021102

11031103
might_sleep();
11041104
while (r < 1 && cnt++ < MAX_KIQ_REG_TRY) {
1105+
if (amdgpu_in_reset(adev))
1106+
goto failed_kiq_read;
1107+
11051108
msleep(MAX_KIQ_REG_BAILOUT_INTERVAL);
11061109
r = amdgpu_fence_wait_polling(ring, seq, MAX_KIQ_REG_WAIT);
11071110
}
@@ -1171,6 +1174,8 @@ void amdgpu_kiq_wreg(struct amdgpu_device *adev, uint32_t reg, uint32_t v, uint3
11711174

11721175
might_sleep();
11731176
while (r < 1 && cnt++ < MAX_KIQ_REG_TRY) {
1177+
if (amdgpu_in_reset(adev))
1178+
goto failed_kiq_write;
11741179

11751180
msleep(MAX_KIQ_REG_BAILOUT_INTERVAL);
11761181
r = amdgpu_fence_wait_polling(ring, seq, MAX_KIQ_REG_WAIT);

drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c

Lines changed: 2 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -1421,14 +1421,10 @@ int amdgpu_driver_open_kms(struct drm_device *dev, struct drm_file *file_priv)
14211421

14221422
amdgpu_debugfs_vm_init(file_priv);
14231423

1424-
r = amdgpu_vm_init(adev, &fpriv->vm, fpriv->xcp_id);
1424+
r = amdgpu_vm_init(adev, &fpriv->vm, fpriv->xcp_id, pasid);
14251425
if (r)
14261426
goto error_pasid;
14271427

1428-
r = amdgpu_vm_set_pasid(adev, &fpriv->vm, pasid);
1429-
if (r)
1430-
goto error_vm;
1431-
14321428
fpriv->prt_va = amdgpu_vm_bo_add(adev, &fpriv->vm, NULL);
14331429
if (!fpriv->prt_va) {
14341430
r = -ENOMEM;
@@ -1468,10 +1464,8 @@ int amdgpu_driver_open_kms(struct drm_device *dev, struct drm_file *file_priv)
14681464
amdgpu_vm_fini(adev, &fpriv->vm);
14691465

14701466
error_pasid:
1471-
if (pasid) {
1467+
if (pasid)
14721468
amdgpu_pasid_free(pasid);
1473-
amdgpu_vm_set_pasid(adev, &fpriv->vm, 0);
1474-
}
14751469

14761470
kfree(fpriv);
14771471

drivers/gpu/drm/amd/amdgpu/amdgpu_psp.c

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -2352,7 +2352,7 @@ static int psp_securedisplay_initialize(struct psp_context *psp)
23522352
}
23532353

23542354
ret = psp_ta_load(psp, &psp->securedisplay_context.context);
2355-
if (!ret) {
2355+
if (!ret && !psp->securedisplay_context.context.resp_status) {
23562356
psp->securedisplay_context.context.initialized = true;
23572357
mutex_init(&psp->securedisplay_context.mutex);
23582358
} else

drivers/gpu/drm/amd/amdgpu/amdgpu_userq.c

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -726,12 +726,12 @@ amdgpu_userq_bo_validate(struct amdgpu_device *adev, struct drm_exec *exec,
726726
struct amdgpu_bo *bo;
727727
int ret;
728728

729-
spin_lock(&vm->invalidated_lock);
729+
spin_lock(&vm->status_lock);
730730
while (!list_empty(&vm->invalidated)) {
731731
bo_va = list_first_entry(&vm->invalidated,
732732
struct amdgpu_bo_va,
733733
base.vm_status);
734-
spin_unlock(&vm->invalidated_lock);
734+
spin_unlock(&vm->status_lock);
735735

736736
bo = bo_va->base.bo;
737737
ret = drm_exec_prepare_obj(exec, &bo->tbo.base, 2);
@@ -748,9 +748,9 @@ amdgpu_userq_bo_validate(struct amdgpu_device *adev, struct drm_exec *exec,
748748
if (ret)
749749
return ret;
750750

751-
spin_lock(&vm->invalidated_lock);
751+
spin_lock(&vm->status_lock);
752752
}
753-
spin_unlock(&vm->invalidated_lock);
753+
spin_unlock(&vm->status_lock);
754754

755755
return 0;
756756
}

0 commit comments

Comments
 (0)