Skip to content

Commit 62cab42

Browse files
committed
Merge tag 'drm-xe-fixes-2025-10-16' of https://gitlab.freedesktop.org/drm/xe/kernel into drm-fixes
Driver Changes: - Increase global invalidation timeout to handle some workloads (Kenneth Graunke) - Fix NPD while evicting BOs in an array of VM binds (Matthew Brost) - Fix resizable BAR to account for possibly needing to move BARs other than the LMEMBAR (Lucas De Marchi) - Fix error handling in xe_migrate_init() (Thomas Hellström) - Fix atomic fault handling with mixed mappings or if the page is already in VRAM (Matthew Brost) - Enable media samplers power gating for platforms before Xe2 (Vinay Belgaumkar) - Fix de-registering exec queue from GuC when unbinding (Matthew Brost) - Ensure data migration to system if indicated by madvise with SVM (Thomas Hellström) - Fix kerneldoc for kunit change (Matt Roper) - Always account for cacheline alignment on migration (Matthew Auld) - Drop bogus assertion on eviction (Matthew Auld) Signed-off-by: Dave Airlie <airlied@redhat.com> From: Lucas De Marchi <lucas.demarchi@intel.com> Link: https://lore.kernel.org/r/rch735eqkmprfyutk3ux2fsqa3e5ve4p77w7a5j66qdpgyquxr@ao3wzcqtpn6s
2 parents d6dd930 + 225bc03 commit 62cab42

File tree

13 files changed

+99
-31
lines changed

13 files changed

+99
-31
lines changed

drivers/gpu/drm/xe/regs/xe_gt_regs.h

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -342,6 +342,7 @@
342342
#define POWERGATE_ENABLE XE_REG(0xa210)
343343
#define RENDER_POWERGATE_ENABLE REG_BIT(0)
344344
#define MEDIA_POWERGATE_ENABLE REG_BIT(1)
345+
#define MEDIA_SAMPLERS_POWERGATE_ENABLE REG_BIT(2)
345346
#define VDN_HCP_POWERGATE_ENABLE(n) REG_BIT(3 + 2 * (n))
346347
#define VDN_MFXVDENC_POWERGATE_ENABLE(n) REG_BIT(4 + 2 * (n))
347348

drivers/gpu/drm/xe/tests/xe_pci.c

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -66,6 +66,7 @@ KUNIT_ARRAY_PARAM(platform, cases, xe_pci_fake_data_desc);
6666

6767
/**
6868
* xe_pci_fake_data_gen_params - Generate struct xe_pci_fake_data parameters
69+
* @test: test context object
6970
* @prev: the pointer to the previous parameter to iterate from or NULL
7071
* @desc: output buffer with minimum size of KUNIT_PARAM_DESC_SIZE
7172
*
@@ -242,6 +243,7 @@ KUNIT_ARRAY_PARAM(pci_id, pciidlist, xe_pci_id_kunit_desc);
242243

243244
/**
244245
* xe_pci_graphics_ip_gen_param - Generate graphics struct xe_ip parameters
246+
* @test: test context object
245247
* @prev: the pointer to the previous parameter to iterate from or NULL
246248
* @desc: output buffer with minimum size of KUNIT_PARAM_DESC_SIZE
247249
*
@@ -266,6 +268,7 @@ EXPORT_SYMBOL_IF_KUNIT(xe_pci_graphics_ip_gen_param);
266268

267269
/**
268270
* xe_pci_media_ip_gen_param - Generate media struct xe_ip parameters
271+
* @test: test context object
269272
* @prev: the pointer to the previous parameter to iterate from or NULL
270273
* @desc: output buffer with minimum size of KUNIT_PARAM_DESC_SIZE
271274
*
@@ -290,6 +293,7 @@ EXPORT_SYMBOL_IF_KUNIT(xe_pci_media_ip_gen_param);
290293

291294
/**
292295
* xe_pci_id_gen_param - Generate struct pci_device_id parameters
296+
* @test: test context object
293297
* @prev: the pointer to the previous parameter to iterate from or NULL
294298
* @desc: output buffer with minimum size of KUNIT_PARAM_DESC_SIZE
295299
*
@@ -376,6 +380,7 @@ EXPORT_SYMBOL_IF_KUNIT(xe_pci_fake_device_init);
376380

377381
/**
378382
* xe_pci_live_device_gen_param - Helper to iterate Xe devices as KUnit parameters
383+
* @test: test context object
379384
* @prev: the previously returned value, or NULL for the first iteration
380385
* @desc: the buffer for a parameter name
381386
*

drivers/gpu/drm/xe/xe_bo_evict.c

Lines changed: 0 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -182,7 +182,6 @@ int xe_bo_evict_all(struct xe_device *xe)
182182

183183
static int xe_bo_restore_and_map_ggtt(struct xe_bo *bo)
184184
{
185-
struct xe_device *xe = xe_bo_device(bo);
186185
int ret;
187186

188187
ret = xe_bo_restore_pinned(bo);
@@ -201,13 +200,6 @@ static int xe_bo_restore_and_map_ggtt(struct xe_bo *bo)
201200
}
202201
}
203202

204-
/*
205-
* We expect validate to trigger a move VRAM and our move code
206-
* should setup the iosys map.
207-
*/
208-
xe_assert(xe, !(bo->flags & XE_BO_FLAG_PINNED_LATE_RESTORE) ||
209-
!iosys_map_is_null(&bo->vmap));
210-
211203
return 0;
212204
}
213205

drivers/gpu/drm/xe/xe_device.c

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1070,7 +1070,7 @@ void xe_device_l2_flush(struct xe_device *xe)
10701070
spin_lock(&gt->global_invl_lock);
10711071

10721072
xe_mmio_write32(&gt->mmio, XE2_GLOBAL_INVAL, 0x1);
1073-
if (xe_mmio_wait32(&gt->mmio, XE2_GLOBAL_INVAL, 0x1, 0x0, 500, NULL, true))
1073+
if (xe_mmio_wait32(&gt->mmio, XE2_GLOBAL_INVAL, 0x1, 0x0, 1000, NULL, true))
10741074
xe_gt_err_once(gt, "Global invalidation timeout\n");
10751075

10761076
spin_unlock(&gt->global_invl_lock);

drivers/gpu/drm/xe/xe_gt_idle.c

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -124,6 +124,9 @@ void xe_gt_idle_enable_pg(struct xe_gt *gt)
124124
if (xe_gt_is_main_type(gt))
125125
gtidle->powergate_enable |= RENDER_POWERGATE_ENABLE;
126126

127+
if (MEDIA_VERx100(xe) >= 1100 && MEDIA_VERx100(xe) < 1255)
128+
gtidle->powergate_enable |= MEDIA_SAMPLERS_POWERGATE_ENABLE;
129+
127130
if (xe->info.platform != XE_DG1) {
128131
for (i = XE_HW_ENGINE_VCS0, j = 0; i <= XE_HW_ENGINE_VCS7; ++i, ++j) {
129132
if ((gt->info.engine_mask & BIT(i)))
@@ -246,6 +249,11 @@ int xe_gt_idle_pg_print(struct xe_gt *gt, struct drm_printer *p)
246249
drm_printf(p, "Media Slice%d Power Gate Status: %s\n", n,
247250
str_up_down(pg_status & media_slices[n].status_bit));
248251
}
252+
253+
if (MEDIA_VERx100(xe) >= 1100 && MEDIA_VERx100(xe) < 1255)
254+
drm_printf(p, "Media Samplers Power Gating Enabled: %s\n",
255+
str_yes_no(pg_enabled & MEDIA_SAMPLERS_POWERGATE_ENABLE));
256+
249257
return 0;
250258
}
251259

drivers/gpu/drm/xe/xe_guc_submit.c

Lines changed: 12 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -44,6 +44,7 @@
4444
#include "xe_ring_ops_types.h"
4545
#include "xe_sched_job.h"
4646
#include "xe_trace.h"
47+
#include "xe_uc_fw.h"
4748
#include "xe_vm.h"
4849

4950
static struct xe_guc *
@@ -1489,7 +1490,17 @@ static void __guc_exec_queue_process_msg_cleanup(struct xe_sched_msg *msg)
14891490
xe_gt_assert(guc_to_gt(guc), !(q->flags & EXEC_QUEUE_FLAG_PERMANENT));
14901491
trace_xe_exec_queue_cleanup_entity(q);
14911492

1492-
if (exec_queue_registered(q))
1493+
/*
1494+
* Expected state transitions for cleanup:
1495+
* - If the exec queue is registered and GuC firmware is running, we must first
1496+
* disable scheduling and deregister the queue to ensure proper teardown and
1497+
* resource release in the GuC, then destroy the exec queue on driver side.
1498+
* - If the GuC is already stopped (e.g., during driver unload or GPU reset),
1499+
* we cannot expect a response for the deregister request. In this case,
1500+
* it is safe to directly destroy the exec queue on driver side, as the GuC
1501+
* will not process further requests and all resources must be cleaned up locally.
1502+
*/
1503+
if (exec_queue_registered(q) && xe_uc_fw_is_running(&guc->fw))
14931504
disable_scheduling_deregister(guc, q);
14941505
else
14951506
__guc_exec_queue_destroy(guc, q);

drivers/gpu/drm/xe/xe_migrate.c

Lines changed: 4 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -434,7 +434,7 @@ int xe_migrate_init(struct xe_migrate *m)
434434

435435
err = xe_migrate_lock_prepare_vm(tile, m, vm);
436436
if (err)
437-
return err;
437+
goto err_out;
438438

439439
if (xe->info.has_usm) {
440440
struct xe_hw_engine *hwe = xe_gt_hw_engine(primary_gt,
@@ -2113,7 +2113,9 @@ int xe_migrate_access_memory(struct xe_migrate *m, struct xe_bo *bo,
21132113
if (current_bytes & ~PAGE_MASK) {
21142114
int pitch = 4;
21152115

2116-
current_bytes = min_t(int, current_bytes, S16_MAX * pitch);
2116+
current_bytes = min_t(int, current_bytes,
2117+
round_down(S16_MAX * pitch,
2118+
XE_CACHELINE_BYTES));
21172119
}
21182120

21192121
__fence = xe_migrate_vram(m, current_bytes,

drivers/gpu/drm/xe/xe_pci.c

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -867,6 +867,8 @@ static int xe_pci_probe(struct pci_dev *pdev, const struct pci_device_id *ent)
867867
if (err)
868868
return err;
869869

870+
xe_vram_resize_bar(xe);
871+
870872
err = xe_device_probe_early(xe);
871873
/*
872874
* In Boot Survivability mode, no drm card is exposed and driver

drivers/gpu/drm/xe/xe_svm.c

Lines changed: 15 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1034,6 +1034,9 @@ static int __xe_svm_handle_pagefault(struct xe_vm *vm, struct xe_vma *vma,
10341034
if (err)
10351035
return err;
10361036

1037+
dpagemap = xe_vma_resolve_pagemap(vma, tile);
1038+
if (!dpagemap && !ctx.devmem_only)
1039+
ctx.device_private_page_owner = NULL;
10371040
range = xe_svm_range_find_or_insert(vm, fault_addr, vma, &ctx);
10381041

10391042
if (IS_ERR(range))
@@ -1054,7 +1057,6 @@ static int __xe_svm_handle_pagefault(struct xe_vm *vm, struct xe_vma *vma,
10541057

10551058
range_debug(range, "PAGE FAULT");
10561059

1057-
dpagemap = xe_vma_resolve_pagemap(vma, tile);
10581060
if (--migrate_try_count >= 0 &&
10591061
xe_svm_range_needs_migrate_to_vram(range, vma, !!dpagemap || ctx.devmem_only)) {
10601062
ktime_t migrate_start = xe_svm_stats_ktime_get();
@@ -1073,7 +1075,17 @@ static int __xe_svm_handle_pagefault(struct xe_vm *vm, struct xe_vma *vma,
10731075
drm_dbg(&vm->xe->drm,
10741076
"VRAM allocation failed, falling back to retrying fault, asid=%u, errno=%pe\n",
10751077
vm->usm.asid, ERR_PTR(err));
1076-
goto retry;
1078+
1079+
/*
1080+
* In the devmem-only case, mixed mappings may
1081+
* be found. The get_pages function will fix
1082+
* these up to a single location, allowing the
1083+
* page fault handler to make forward progress.
1084+
*/
1085+
if (ctx.devmem_only)
1086+
goto get_pages;
1087+
else
1088+
goto retry;
10771089
} else {
10781090
drm_err(&vm->xe->drm,
10791091
"VRAM allocation failed, retry count exceeded, asid=%u, errno=%pe\n",
@@ -1083,6 +1095,7 @@ static int __xe_svm_handle_pagefault(struct xe_vm *vm, struct xe_vma *vma,
10831095
}
10841096
}
10851097

1098+
get_pages:
10861099
get_pages_start = xe_svm_stats_ktime_get();
10871100

10881101
range_debug(range, "GET PAGES");

drivers/gpu/drm/xe/xe_vm.c

Lines changed: 23 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -2832,7 +2832,7 @@ static void vm_bind_ioctl_ops_unwind(struct xe_vm *vm,
28322832
}
28332833

28342834
static int vma_lock_and_validate(struct drm_exec *exec, struct xe_vma *vma,
2835-
bool validate)
2835+
bool res_evict, bool validate)
28362836
{
28372837
struct xe_bo *bo = xe_vma_bo(vma);
28382838
struct xe_vm *vm = xe_vma_vm(vma);
@@ -2843,7 +2843,8 @@ static int vma_lock_and_validate(struct drm_exec *exec, struct xe_vma *vma,
28432843
err = drm_exec_lock_obj(exec, &bo->ttm.base);
28442844
if (!err && validate)
28452845
err = xe_bo_validate(bo, vm,
2846-
!xe_vm_in_preempt_fence_mode(vm), exec);
2846+
!xe_vm_in_preempt_fence_mode(vm) &&
2847+
res_evict, exec);
28472848
}
28482849

28492850
return err;
@@ -2913,14 +2914,23 @@ static int prefetch_ranges(struct xe_vm *vm, struct xe_vma_op *op)
29132914
}
29142915

29152916
static int op_lock_and_prep(struct drm_exec *exec, struct xe_vm *vm,
2916-
struct xe_vma_op *op)
2917+
struct xe_vma_ops *vops, struct xe_vma_op *op)
29172918
{
29182919
int err = 0;
2920+
bool res_evict;
2921+
2922+
/*
2923+
* We only allow evicting a BO within the VM if it is not part of an
2924+
* array of binds, as an array of binds can evict another BO within the
2925+
* bind.
2926+
*/
2927+
res_evict = !(vops->flags & XE_VMA_OPS_ARRAY_OF_BINDS);
29192928

29202929
switch (op->base.op) {
29212930
case DRM_GPUVA_OP_MAP:
29222931
if (!op->map.invalidate_on_bind)
29232932
err = vma_lock_and_validate(exec, op->map.vma,
2933+
res_evict,
29242934
!xe_vm_in_fault_mode(vm) ||
29252935
op->map.immediate);
29262936
break;
@@ -2931,11 +2941,13 @@ static int op_lock_and_prep(struct drm_exec *exec, struct xe_vm *vm,
29312941

29322942
err = vma_lock_and_validate(exec,
29332943
gpuva_to_vma(op->base.remap.unmap->va),
2934-
false);
2944+
res_evict, false);
29352945
if (!err && op->remap.prev)
2936-
err = vma_lock_and_validate(exec, op->remap.prev, true);
2946+
err = vma_lock_and_validate(exec, op->remap.prev,
2947+
res_evict, true);
29372948
if (!err && op->remap.next)
2938-
err = vma_lock_and_validate(exec, op->remap.next, true);
2949+
err = vma_lock_and_validate(exec, op->remap.next,
2950+
res_evict, true);
29392951
break;
29402952
case DRM_GPUVA_OP_UNMAP:
29412953
err = check_ufence(gpuva_to_vma(op->base.unmap.va));
@@ -2944,7 +2956,7 @@ static int op_lock_and_prep(struct drm_exec *exec, struct xe_vm *vm,
29442956

29452957
err = vma_lock_and_validate(exec,
29462958
gpuva_to_vma(op->base.unmap.va),
2947-
false);
2959+
res_evict, false);
29482960
break;
29492961
case DRM_GPUVA_OP_PREFETCH:
29502962
{
@@ -2959,7 +2971,7 @@ static int op_lock_and_prep(struct drm_exec *exec, struct xe_vm *vm,
29592971

29602972
err = vma_lock_and_validate(exec,
29612973
gpuva_to_vma(op->base.prefetch.va),
2962-
false);
2974+
res_evict, false);
29632975
if (!err && !xe_vma_has_no_bo(vma))
29642976
err = xe_bo_migrate(xe_vma_bo(vma),
29652977
region_to_mem_type[region],
@@ -3005,7 +3017,7 @@ static int vm_bind_ioctl_ops_lock_and_prep(struct drm_exec *exec,
30053017
return err;
30063018

30073019
list_for_each_entry(op, &vops->list, link) {
3008-
err = op_lock_and_prep(exec, vm, op);
3020+
err = op_lock_and_prep(exec, vm, vops, op);
30093021
if (err)
30103022
return err;
30113023
}
@@ -3638,6 +3650,8 @@ int xe_vm_bind_ioctl(struct drm_device *dev, void *data, struct drm_file *file)
36383650
}
36393651

36403652
xe_vma_ops_init(&vops, vm, q, syncs, num_syncs);
3653+
if (args->num_binds > 1)
3654+
vops.flags |= XE_VMA_OPS_ARRAY_OF_BINDS;
36413655
for (i = 0; i < args->num_binds; ++i) {
36423656
u64 range = bind_ops[i].range;
36433657
u64 addr = bind_ops[i].addr;

0 commit comments

Comments
 (0)