diff --git a/drivers/gpu/drm/xe/xe_devcoredump.c b/drivers/gpu/drm/xe/xe_devcoredump.c index ae26d8c6d01c5afcd941e1d9b7a3bf908682dca1..68d3d623a05bfa3dc8ffc0f1901dbba2b55d1529 100644 --- a/drivers/gpu/drm/xe/xe_devcoredump.c +++ b/drivers/gpu/drm/xe/xe_devcoredump.c @@ -17,6 +17,7 @@ #include "xe_guc_submit.h" #include "xe_hw_engine.h" #include "xe_sched_job.h" +#include "xe_vm.h" /** * DOC: Xe device coredump @@ -59,12 +60,22 @@ static struct xe_guc *exec_queue_to_guc(struct xe_exec_queue *q) return &q->gt->uc.guc; } +static void xe_devcoredump_deferred_snap_work(struct work_struct *work) +{ + struct xe_devcoredump_snapshot *ss = container_of(work, typeof(*ss), work); + + xe_force_wake_get(gt_to_fw(ss->gt), XE_FORCEWAKE_ALL); + if (ss->vm) + xe_vm_snapshot_capture_delayed(ss->vm); + xe_force_wake_put(gt_to_fw(ss->gt), XE_FORCEWAKE_ALL); +} + static ssize_t xe_devcoredump_read(char *buffer, loff_t offset, size_t count, void *data, size_t datalen) { struct xe_devcoredump *coredump = data; struct xe_device *xe = coredump_to_xe(coredump); - struct xe_devcoredump_snapshot *ss; + struct xe_devcoredump_snapshot *ss = &coredump->snapshot; struct drm_printer p; struct drm_print_iterator iter; struct timespec64 ts; @@ -74,12 +85,14 @@ static ssize_t xe_devcoredump_read(char *buffer, loff_t offset, if (!data || !coredump_to_xe(coredump)) return -ENODEV; + /* Ensure delayed work is captured before continuing */ + flush_work(&ss->work); + iter.data = buffer; iter.offset = 0; iter.start = offset; iter.remain = count; - ss = &coredump->snapshot; p = drm_coredump_printer(&iter); drm_printf(&p, "**** Xe Device Coredump ****\n"); @@ -104,6 +117,10 @@ static ssize_t xe_devcoredump_read(char *buffer, loff_t offset, if (coredump->snapshot.hwe[i]) xe_hw_engine_snapshot_print(coredump->snapshot.hwe[i], &p); + if (coredump->snapshot.vm) { + drm_printf(&p, "\n**** VM state ****\n"); + xe_vm_snapshot_print(coredump->snapshot.vm, &p); + } return count - iter.remain; } @@ -117,12 +134,15 @@ static void xe_devcoredump_free(void *data) if (!data || !coredump_to_xe(coredump)) return; + cancel_work_sync(&coredump->snapshot.work); + xe_guc_ct_snapshot_free(coredump->snapshot.ct); xe_guc_exec_queue_snapshot_free(coredump->snapshot.ge); xe_sched_job_snapshot_free(coredump->snapshot.job); for (i = 0; i < XE_NUM_HW_ENGINES; i++) if (coredump->snapshot.hwe[i]) xe_hw_engine_snapshot_free(coredump->snapshot.hwe[i]); + xe_vm_snapshot_free(coredump->snapshot.vm); /* To prevent stale data on next snapshot, clear everything */ memset(&coredump->snapshot, 0, sizeof(coredump->snapshot)); @@ -147,6 +167,9 @@ static void devcoredump_snapshot(struct xe_devcoredump *coredump, ss->snapshot_time = ktime_get_real(); ss->boot_time = ktime_get_boottime(); + ss->gt = q->gt; + INIT_WORK(&ss->work, xe_devcoredump_deferred_snap_work); + cookie = dma_fence_begin_signalling(); for (i = 0; q->width > 1 && i < XE_HW_ENGINE_MAX_INSTANCE;) { if (adj_logical_mask & BIT(i)) { @@ -162,6 +185,7 @@ static void devcoredump_snapshot(struct xe_devcoredump *coredump, coredump->snapshot.ct = xe_guc_ct_snapshot_capture(&guc->ct, true); coredump->snapshot.ge = xe_guc_exec_queue_snapshot_capture(job); coredump->snapshot.job = xe_sched_job_snapshot_capture(job); + coredump->snapshot.vm = xe_vm_snapshot_capture(q->vm); for_each_hw_engine(hwe, q->gt, id) { if (hwe->class != q->hwe->class || @@ -172,6 +196,9 @@ static void devcoredump_snapshot(struct xe_devcoredump *coredump, coredump->snapshot.hwe[id] = xe_hw_engine_snapshot_capture(hwe); } + if (ss->vm) + queue_work(system_unbound_wq, &ss->work); + xe_force_wake_put(gt_to_fw(q->gt), XE_FORCEWAKE_ALL); dma_fence_end_signalling(cookie); } @@ -205,3 +232,4 @@ void xe_devcoredump(struct xe_sched_job *job) xe_devcoredump_read, xe_devcoredump_free); } #endif + diff --git a/drivers/gpu/drm/xe/xe_devcoredump_types.h b/drivers/gpu/drm/xe/xe_devcoredump_types.h index d259119b2c9802cc64204c538981d4dd4c46c7ef..6f654b63c7f1cbbefb93c0dd2be7b5a41905baa7 100644 --- a/drivers/gpu/drm/xe/xe_devcoredump_types.h +++ b/drivers/gpu/drm/xe/xe_devcoredump_types.h @@ -12,6 +12,7 @@ #include "xe_hw_engine_types.h" struct xe_device; +struct xe_gt; /** * struct xe_devcoredump_snapshot - Crash snapshot @@ -26,6 +27,11 @@ struct xe_devcoredump_snapshot { /** @boot_time: Relative boot time so the uptime can be calculated. */ ktime_t boot_time; + /** @gt: Affected GT, used by forcewake for delayed capture */ + struct xe_gt *gt; + /** @work: Workqueue for deferred capture outside of signaling context */ + struct work_struct work; + /* GuC snapshots */ /** @ct: GuC CT snapshot */ struct xe_guc_ct_snapshot *ct; @@ -36,6 +42,8 @@ struct xe_devcoredump_snapshot { struct xe_hw_engine_snapshot *hwe[XE_NUM_HW_ENGINES]; /** @job: Snapshot of job state */ struct xe_sched_job_snapshot *job; + /** @vm: Snapshot of VM state */ + struct xe_vm_snapshot *vm; }; /** diff --git a/drivers/gpu/drm/xe/xe_vm.c b/drivers/gpu/drm/xe/xe_vm.c index 5c8cbaf0b8d985e673efff43e3cde3ca138e2cb5..5c1276d887c162e9d90636c17159e1ecd3c42f2d 100644 --- a/drivers/gpu/drm/xe/xe_vm.c +++ b/drivers/gpu/drm/xe/xe_vm.c @@ -13,6 +13,7 @@ #include <drm/ttm/ttm_execbuf_util.h> #include <drm/ttm/ttm_tt.h> #include <drm/xe_drm.h> +#include <linux/ascii85.h> #include <linux/delay.h> #include <linux/kthread.h> #include <linux/mm.h> @@ -1523,8 +1524,6 @@ void xe_vm_close_and_put(struct xe_vm *vm) up_write(&vm->lock); - mutex_destroy(&vm->snap_mutex); - mutex_lock(&xe->usm.lock); if (vm->flags & XE_VM_FLAG_FAULT_MODE) xe->usm.num_vm_in_fault_mode--; @@ -1550,6 +1549,8 @@ static void vm_destroy_work_func(struct work_struct *w) /* xe_vm_close_and_put was not called? */ xe_assert(xe, !vm->size); + mutex_destroy(&vm->snap_mutex); + if (!(vm->flags & XE_VM_FLAG_MIGRATION)) { xe_device_mem_access_put(xe); @@ -3269,3 +3270,168 @@ int xe_analyze_vm(struct drm_printer *p, struct xe_vm *vm, int gt_id) return 0; } + +struct xe_vm_snapshot { + unsigned long num_snaps; + struct { + u64 ofs, bo_ofs; + unsigned long len; + struct xe_bo *bo; + void *data; + struct mm_struct *mm; + } snap[]; +}; + +struct xe_vm_snapshot *xe_vm_snapshot_capture(struct xe_vm *vm) +{ + unsigned long num_snaps = 0, i; + struct xe_vm_snapshot *snap = NULL; + struct drm_gpuva *gpuva; + + if (!vm) + return NULL; + + mutex_lock(&vm->snap_mutex); + drm_gpuvm_for_each_va(gpuva, &vm->gpuvm) { + if (gpuva->flags & XE_VMA_DUMPABLE) + num_snaps++; + } + + if (num_snaps) + snap = kvzalloc(offsetof(struct xe_vm_snapshot, snap[num_snaps]), GFP_NOWAIT); + if (!snap) + goto out_unlock; + + snap->num_snaps = num_snaps; + i = 0; + drm_gpuvm_for_each_va(gpuva, &vm->gpuvm) { + struct xe_vma *vma = gpuva_to_vma(gpuva); + struct xe_bo *bo = vma->gpuva.gem.obj ? + gem_to_xe_bo(vma->gpuva.gem.obj) : NULL; + + if (!(gpuva->flags & XE_VMA_DUMPABLE)) + continue; + + snap->snap[i].ofs = xe_vma_start(vma); + snap->snap[i].len = xe_vma_size(vma); + if (bo) { + snap->snap[i].bo = xe_bo_get(bo); + snap->snap[i].bo_ofs = xe_vma_bo_offset(vma); + } else if (xe_vma_is_userptr(vma)) { + struct mm_struct *mm = + to_userptr_vma(vma)->userptr.notifier.mm; + + if (mmget_not_zero(mm)) + snap->snap[i].mm = mm; + else + snap->snap[i].data = ERR_PTR(-EFAULT); + + snap->snap[i].bo_ofs = xe_vma_userptr(vma); + } else { + snap->snap[i].data = ERR_PTR(-ENOENT); + } + i++; + } + +out_unlock: + mutex_unlock(&vm->snap_mutex); + return snap; +} + +void xe_vm_snapshot_capture_delayed(struct xe_vm_snapshot *snap) +{ + for (int i = 0; i < snap->num_snaps; i++) { + struct xe_bo *bo = snap->snap[i].bo; + struct iosys_map src; + int err; + + if (IS_ERR(snap->snap[i].data)) + continue; + + snap->snap[i].data = kvmalloc(snap->snap[i].len, GFP_USER); + if (!snap->snap[i].data) { + snap->snap[i].data = ERR_PTR(-ENOMEM); + goto cleanup_bo; + } + + if (bo) { + dma_resv_lock(bo->ttm.base.resv, NULL); + err = ttm_bo_vmap(&bo->ttm, &src); + if (!err) { + xe_map_memcpy_from(xe_bo_device(bo), + snap->snap[i].data, + &src, snap->snap[i].bo_ofs, + snap->snap[i].len); + ttm_bo_vunmap(&bo->ttm, &src); + } + dma_resv_unlock(bo->ttm.base.resv); + } else { + void __user *userptr = (void __user *)(size_t)snap->snap[i].bo_ofs; + + kthread_use_mm(snap->snap[i].mm); + if (!copy_from_user(snap->snap[i].data, userptr, snap->snap[i].len)) + err = 0; + else + err = -EFAULT; + kthread_unuse_mm(snap->snap[i].mm); + + mmput(snap->snap[i].mm); + snap->snap[i].mm = NULL; + } + + if (err) { + kvfree(snap->snap[i].data); + snap->snap[i].data = ERR_PTR(err); + } + +cleanup_bo: + xe_bo_put(bo); + snap->snap[i].bo = NULL; + } +} + +void xe_vm_snapshot_print(struct xe_vm_snapshot *snap, struct drm_printer *p) +{ + unsigned long i, j; + + for (i = 0; i < snap->num_snaps; i++) { + if (IS_ERR(snap->snap[i].data)) + goto uncaptured; + + drm_printf(p, "[%llx].length: 0x%lx\n", snap->snap[i].ofs, snap->snap[i].len); + drm_printf(p, "[%llx].data: ", + snap->snap[i].ofs); + + for (j = 0; j < snap->snap[i].len; j += sizeof(u32)) { + u32 *val = snap->snap[i].data + j; + char dumped[ASCII85_BUFSZ]; + + drm_puts(p, ascii85_encode(*val, dumped)); + } + + drm_puts(p, "\n"); + continue; + +uncaptured: + drm_printf(p, "Unable to capture range [%llx-%llx]: %li\n", + snap->snap[i].ofs, snap->snap[i].ofs + snap->snap[i].len - 1, + PTR_ERR(snap->snap[i].data)); + } +} + +void xe_vm_snapshot_free(struct xe_vm_snapshot *snap) +{ + unsigned long i; + + if (!snap) + return; + + for (i = 0; i < snap->num_snaps; i++) { + if (!IS_ERR(snap->snap[i].data)) + kvfree(snap->snap[i].data); + xe_bo_put(snap->snap[i].bo); + if (snap->snap[i].mm) + mmput(snap->snap[i].mm); + } + kvfree(snap); +} diff --git a/drivers/gpu/drm/xe/xe_vm.h b/drivers/gpu/drm/xe/xe_vm.h index df4a82e960ff0c21a6e682ede322fd26226de7b8..6df1f1c7f85d98a2b948ba41ec9f1ed5a287faf0 100644 --- a/drivers/gpu/drm/xe/xe_vm.h +++ b/drivers/gpu/drm/xe/xe_vm.h @@ -271,3 +271,8 @@ static inline void vm_dbg(const struct drm_device *dev, { /* noop */ } #endif #endif + +struct xe_vm_snapshot *xe_vm_snapshot_capture(struct xe_vm *vm); +void xe_vm_snapshot_capture_delayed(struct xe_vm_snapshot *snap); +void xe_vm_snapshot_print(struct xe_vm_snapshot *snap, struct drm_printer *p); +void xe_vm_snapshot_free(struct xe_vm_snapshot *snap);