Skip to content

Commit e96687c

Browse files
committed
Merge tag 'drm-fixes-2025-10-17' of https://gitlab.freedesktop.org/drm/kernel
Pull drm fixes from Dave Airlie: "As per usual xe/amdgpu are the leaders, with some i915 and then a bunch of scattered fixes. There are a bunch of stability fixes for some older amdgpu cards. draw: - Avoid color truncation gpuvm: - Avoid kernel-doc warning sched: - Avoid double free i915: - Skip GuC communication warning if reset is in progress - Couple frontbuffer related fixes - Deactivate PSR only on LNL and when selective fetch enabled xe: - Increase global invalidation timeout to handle some workloads - Fix NPD while evicting BOs in an array of VM binds - Fix resizable BAR to account for possibly needing to move BARs other than the LMEMBAR - Fix error handling in xe_migrate_init() - Fix atomic fault handling with mixed mappings or if the page is already in VRAM - Enable media samplers power gating for platforms before Xe2 - Fix de-registering exec queue from GuC when unbinding - Ensure data migration to system if indicated by madvise with SVM - Fix kerneldoc for kunit change - Always account for cacheline alignment on migration - Drop bogus assertion on eviction amdgpu: - Backlight fix - SI fixes - CIK fix - Make CE support debug only - IP discovery fix - Ring reset fixes - GPUVM fault memory barrier fix - Drop unused structures in amdgpu_drm.h - JPEG debugfs fix - VRAM handling fixes for GPUs without VRAM - GC 12 MES fixes amdkfd: - MES fix ast: - Fix display output after reboot bridge: - lt9211: Fix version check panthor: - Fix MCU suspend qaic: - Init bootlog in correct order - Treat remaining == 0 as error in find_and_map_user_pages() - Lock access to DBC request queue rockchip: - vop2: Fix destination size in atomic check" * tag 'drm-fixes-2025-10-17' of https://gitlab.freedesktop.org/drm/kernel: (44 commits) drm/sched: Fix potential double free in drm_sched_job_add_resv_dependencies drm/xe/evict: drop bogus assert drm/xe/migrate: don't misalign current bytes drm/xe/kunit: Fix kerneldoc for parameterized tests drm/xe/svm: Ensure data will be migrated to system if indicated by madvise. drm/gpuvm: Fix kernel-doc warning for drm_gpuvm_map_req.map drm/i915/psr: Deactivate PSR only on LNL and when selective fetch enabled drm/ast: Blank with VGACR17 sync enable, always clear VGACRB6 sync off accel/qaic: Synchronize access to DBC request queue head & tail pointer accel/qaic: Treat remaining == 0 as error in find_and_map_user_pages() accel/qaic: Fix bootlog initialization ordering drm/rockchip: vop2: use correct destination rectangle height check drm/draw: fix color truncation in drm_draw_fill24 drm/xe/guc: Check GuC running state before deregistering exec queue drm/xe: Enable media sampler power gating drm/xe: Handle mixed mappings and existing VRAM on atomic faults drm/xe/migrate: Fix an error path drm/xe: Move rebar to be done earlier drm/xe: Don't allow evicting of BOs in same VM in array of VM binds drm/xe: Increase global invalidation timeout to 1000us ...
2 parents 389dfd9 + 62cab42 commit e96687c

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

60 files changed

+355
-224
lines changed

drivers/accel/qaic/qaic.h

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -97,6 +97,8 @@ struct dma_bridge_chan {
9797
* response queue's head and tail pointer of this DBC.
9898
*/
9999
void __iomem *dbc_base;
100+
/* Synchronizes access to Request queue's head and tail pointer */
101+
struct mutex req_lock;
100102
/* Head of list where each node is a memory handle queued in request queue */
101103
struct list_head xfer_list;
102104
/* Synchronizes DBC readers during cleanup */

drivers/accel/qaic/qaic_control.c

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -407,7 +407,7 @@ static int find_and_map_user_pages(struct qaic_device *qdev,
407407
return -EINVAL;
408408
remaining = in_trans->size - resources->xferred_dma_size;
409409
if (remaining == 0)
410-
return 0;
410+
return -EINVAL;
411411

412412
if (check_add_overflow(xfer_start_addr, remaining, &end))
413413
return -EINVAL;

drivers/accel/qaic/qaic_data.c

Lines changed: 10 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1356,32 +1356,40 @@ static int __qaic_execute_bo_ioctl(struct drm_device *dev, void *data, struct dr
13561356
goto release_ch_rcu;
13571357
}
13581358

1359+
ret = mutex_lock_interruptible(&dbc->req_lock);
1360+
if (ret)
1361+
goto release_ch_rcu;
1362+
13591363
head = readl(dbc->dbc_base + REQHP_OFF);
13601364
tail = readl(dbc->dbc_base + REQTP_OFF);
13611365

13621366
if (head == U32_MAX || tail == U32_MAX) {
13631367
/* PCI link error */
13641368
ret = -ENODEV;
1365-
goto release_ch_rcu;
1369+
goto unlock_req_lock;
13661370
}
13671371

13681372
queue_level = head <= tail ? tail - head : dbc->nelem - (head - tail);
13691373

13701374
ret = send_bo_list_to_device(qdev, file_priv, exec, args->hdr.count, is_partial, dbc,
13711375
head, &tail);
13721376
if (ret)
1373-
goto release_ch_rcu;
1377+
goto unlock_req_lock;
13741378

13751379
/* Finalize commit to hardware */
13761380
submit_ts = ktime_get_ns();
13771381
writel(tail, dbc->dbc_base + REQTP_OFF);
1382+
mutex_unlock(&dbc->req_lock);
13781383

13791384
update_profiling_data(file_priv, exec, args->hdr.count, is_partial, received_ts,
13801385
submit_ts, queue_level);
13811386

13821387
if (datapath_polling)
13831388
schedule_work(&dbc->poll_work);
13841389

1390+
unlock_req_lock:
1391+
if (ret)
1392+
mutex_unlock(&dbc->req_lock);
13851393
release_ch_rcu:
13861394
srcu_read_unlock(&dbc->ch_lock, rcu_id);
13871395
unlock_dev_srcu:

drivers/accel/qaic/qaic_debugfs.c

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -218,6 +218,9 @@ static int qaic_bootlog_mhi_probe(struct mhi_device *mhi_dev, const struct mhi_d
218218
if (ret)
219219
goto destroy_workqueue;
220220

221+
dev_set_drvdata(&mhi_dev->dev, qdev);
222+
qdev->bootlog_ch = mhi_dev;
223+
221224
for (i = 0; i < BOOTLOG_POOL_SIZE; i++) {
222225
msg = devm_kzalloc(&qdev->pdev->dev, sizeof(*msg), GFP_KERNEL);
223226
if (!msg) {
@@ -233,8 +236,6 @@ static int qaic_bootlog_mhi_probe(struct mhi_device *mhi_dev, const struct mhi_d
233236
goto mhi_unprepare;
234237
}
235238

236-
dev_set_drvdata(&mhi_dev->dev, qdev);
237-
qdev->bootlog_ch = mhi_dev;
238239
return 0;
239240

240241
mhi_unprepare:

drivers/accel/qaic/qaic_drv.c

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -454,6 +454,9 @@ static struct qaic_device *create_qdev(struct pci_dev *pdev,
454454
return NULL;
455455
init_waitqueue_head(&qdev->dbc[i].dbc_release);
456456
INIT_LIST_HEAD(&qdev->dbc[i].bo_lists);
457+
ret = drmm_mutex_init(drm, &qdev->dbc[i].req_lock);
458+
if (ret)
459+
return NULL;
457460
}
458461

459462
return qdev;

drivers/gpu/drm/amd/amdgpu/amdgpu.h

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1290,6 +1290,7 @@ struct amdgpu_device {
12901290
bool debug_disable_gpu_ring_reset;
12911291
bool debug_vm_userptr;
12921292
bool debug_disable_ce_logs;
1293+
bool debug_enable_ce_cs;
12931294

12941295
/* Protection for the following isolation structure */
12951296
struct mutex enforce_isolation_mutex;

drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c

Lines changed: 2 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -2329,10 +2329,9 @@ void amdgpu_amdkfd_gpuvm_unmap_gtt_bo_from_kernel(struct kgd_mem *mem)
23292329
int amdgpu_amdkfd_gpuvm_get_vm_fault_info(struct amdgpu_device *adev,
23302330
struct kfd_vm_fault_info *mem)
23312331
{
2332-
if (atomic_read(&adev->gmc.vm_fault_info_updated) == 1) {
2332+
if (atomic_read_acquire(&adev->gmc.vm_fault_info_updated) == 1) {
23332333
*mem = *adev->gmc.vm_fault_info;
2334-
mb(); /* make sure read happened */
2335-
atomic_set(&adev->gmc.vm_fault_info_updated, 0);
2334+
atomic_set_release(&adev->gmc.vm_fault_info_updated, 0);
23362335
}
23372336
return 0;
23382337
}

drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c

Lines changed: 7 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -364,6 +364,12 @@ static int amdgpu_cs_p2_ib(struct amdgpu_cs_parser *p,
364364
if (p->uf_bo && ring->funcs->no_user_fence)
365365
return -EINVAL;
366366

367+
if (!p->adev->debug_enable_ce_cs &&
368+
chunk_ib->flags & AMDGPU_IB_FLAG_CE) {
369+
dev_err_ratelimited(p->adev->dev, "CE CS is blocked, use debug=0x400 to override\n");
370+
return -EINVAL;
371+
}
372+
367373
if (chunk_ib->ip_type == AMDGPU_HW_IP_GFX &&
368374
chunk_ib->flags & AMDGPU_IB_FLAG_PREEMPT) {
369375
if (chunk_ib->flags & AMDGPU_IB_FLAG_CE)
@@ -702,7 +708,7 @@ static void amdgpu_cs_get_threshold_for_moves(struct amdgpu_device *adev,
702708
*/
703709
const s64 us_upper_bound = 200000;
704710

705-
if (!adev->mm_stats.log2_max_MBps) {
711+
if ((!adev->mm_stats.log2_max_MBps) || !ttm_resource_manager_used(&adev->mman.vram_mgr.manager)) {
706712
*max_bytes = 0;
707713
*max_vis_bytes = 0;
708714
return;

drivers/gpu/drm/amd/amdgpu/amdgpu_device.c

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1882,6 +1882,13 @@ static bool amdgpu_device_pcie_dynamic_switching_supported(struct amdgpu_device
18821882

18831883
static bool amdgpu_device_aspm_support_quirk(struct amdgpu_device *adev)
18841884
{
1885+
/* Enabling ASPM causes randoms hangs on Tahiti and Oland on Zen4.
1886+
* It's unclear if this is a platform-specific or GPU-specific issue.
1887+
* Disable ASPM on SI for the time being.
1888+
*/
1889+
if (adev->family == AMDGPU_FAMILY_SI)
1890+
return true;
1891+
18851892
#if IS_ENABLED(CONFIG_X86)
18861893
struct cpuinfo_x86 *c = &cpu_data(0);
18871894

drivers/gpu/drm/amd/amdgpu/amdgpu_discovery.c

Lines changed: 17 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1033,7 +1033,9 @@ static uint8_t amdgpu_discovery_get_harvest_info(struct amdgpu_device *adev,
10331033
/* Until a uniform way is figured, get mask based on hwid */
10341034
switch (hw_id) {
10351035
case VCN_HWID:
1036-
harvest = ((1 << inst) & adev->vcn.inst_mask) == 0;
1036+
/* VCN vs UVD+VCE */
1037+
if (!amdgpu_ip_version(adev, VCE_HWIP, 0))
1038+
harvest = ((1 << inst) & adev->vcn.inst_mask) == 0;
10371039
break;
10381040
case DMU_HWID:
10391041
if (adev->harvest_ip_mask & AMD_HARVEST_IP_DMU_MASK)
@@ -2565,7 +2567,9 @@ int amdgpu_discovery_set_ip_blocks(struct amdgpu_device *adev)
25652567
amdgpu_discovery_init(adev);
25662568
vega10_reg_base_init(adev);
25672569
adev->sdma.num_instances = 2;
2570+
adev->sdma.sdma_mask = 3;
25682571
adev->gmc.num_umc = 4;
2572+
adev->gfx.xcc_mask = 1;
25692573
adev->ip_versions[MMHUB_HWIP][0] = IP_VERSION(9, 0, 0);
25702574
adev->ip_versions[ATHUB_HWIP][0] = IP_VERSION(9, 0, 0);
25712575
adev->ip_versions[OSSSYS_HWIP][0] = IP_VERSION(4, 0, 0);
@@ -2592,7 +2596,9 @@ int amdgpu_discovery_set_ip_blocks(struct amdgpu_device *adev)
25922596
amdgpu_discovery_init(adev);
25932597
vega10_reg_base_init(adev);
25942598
adev->sdma.num_instances = 2;
2599+
adev->sdma.sdma_mask = 3;
25952600
adev->gmc.num_umc = 4;
2601+
adev->gfx.xcc_mask = 1;
25962602
adev->ip_versions[MMHUB_HWIP][0] = IP_VERSION(9, 3, 0);
25972603
adev->ip_versions[ATHUB_HWIP][0] = IP_VERSION(9, 3, 0);
25982604
adev->ip_versions[OSSSYS_HWIP][0] = IP_VERSION(4, 0, 1);
@@ -2619,8 +2625,10 @@ int amdgpu_discovery_set_ip_blocks(struct amdgpu_device *adev)
26192625
amdgpu_discovery_init(adev);
26202626
vega10_reg_base_init(adev);
26212627
adev->sdma.num_instances = 1;
2628+
adev->sdma.sdma_mask = 1;
26222629
adev->vcn.num_vcn_inst = 1;
26232630
adev->gmc.num_umc = 2;
2631+
adev->gfx.xcc_mask = 1;
26242632
if (adev->apu_flags & AMD_APU_IS_RAVEN2) {
26252633
adev->ip_versions[MMHUB_HWIP][0] = IP_VERSION(9, 2, 0);
26262634
adev->ip_versions[ATHUB_HWIP][0] = IP_VERSION(9, 2, 0);
@@ -2665,7 +2673,9 @@ int amdgpu_discovery_set_ip_blocks(struct amdgpu_device *adev)
26652673
amdgpu_discovery_init(adev);
26662674
vega20_reg_base_init(adev);
26672675
adev->sdma.num_instances = 2;
2676+
adev->sdma.sdma_mask = 3;
26682677
adev->gmc.num_umc = 8;
2678+
adev->gfx.xcc_mask = 1;
26692679
adev->ip_versions[MMHUB_HWIP][0] = IP_VERSION(9, 4, 0);
26702680
adev->ip_versions[ATHUB_HWIP][0] = IP_VERSION(9, 4, 0);
26712681
adev->ip_versions[OSSSYS_HWIP][0] = IP_VERSION(4, 2, 0);
@@ -2693,8 +2703,10 @@ int amdgpu_discovery_set_ip_blocks(struct amdgpu_device *adev)
26932703
amdgpu_discovery_init(adev);
26942704
arct_reg_base_init(adev);
26952705
adev->sdma.num_instances = 8;
2706+
adev->sdma.sdma_mask = 0xff;
26962707
adev->vcn.num_vcn_inst = 2;
26972708
adev->gmc.num_umc = 8;
2709+
adev->gfx.xcc_mask = 1;
26982710
adev->ip_versions[MMHUB_HWIP][0] = IP_VERSION(9, 4, 1);
26992711
adev->ip_versions[ATHUB_HWIP][0] = IP_VERSION(9, 4, 1);
27002712
adev->ip_versions[OSSSYS_HWIP][0] = IP_VERSION(4, 2, 1);
@@ -2726,8 +2738,10 @@ int amdgpu_discovery_set_ip_blocks(struct amdgpu_device *adev)
27262738
amdgpu_discovery_init(adev);
27272739
aldebaran_reg_base_init(adev);
27282740
adev->sdma.num_instances = 5;
2741+
adev->sdma.sdma_mask = 0x1f;
27292742
adev->vcn.num_vcn_inst = 2;
27302743
adev->gmc.num_umc = 4;
2744+
adev->gfx.xcc_mask = 1;
27312745
adev->ip_versions[MMHUB_HWIP][0] = IP_VERSION(9, 4, 2);
27322746
adev->ip_versions[ATHUB_HWIP][0] = IP_VERSION(9, 4, 2);
27332747
adev->ip_versions[OSSSYS_HWIP][0] = IP_VERSION(4, 4, 0);
@@ -2762,6 +2776,8 @@ int amdgpu_discovery_set_ip_blocks(struct amdgpu_device *adev)
27622776
} else {
27632777
cyan_skillfish_reg_base_init(adev);
27642778
adev->sdma.num_instances = 2;
2779+
adev->sdma.sdma_mask = 3;
2780+
adev->gfx.xcc_mask = 1;
27652781
adev->ip_versions[MMHUB_HWIP][0] = IP_VERSION(2, 0, 3);
27662782
adev->ip_versions[ATHUB_HWIP][0] = IP_VERSION(2, 0, 3);
27672783
adev->ip_versions[OSSSYS_HWIP][0] = IP_VERSION(5, 0, 1);

0 commit comments

Comments
 (0)