[DG2] Assertion `!entry->pt->is_compact` failed!
On DG2, with CONFIG_DRM_XE_DEBUG_VM, I'm hitting this assertion when running vm_bind stuff:
[ 475.701024] ------------[ cut here ]------------
[ 475.701027] xe 0000:03:00.0: [drm] Assertion `!entry->pt->is_compact` failed!
platform: 7 subplatform: 3
graphics: Xe_HPG 12.55 step C0
media: Xe_HPM 12.55 step C0
display: enabled step C0
[ 475.701043] WARNING: CPU: 3 PID: 10 at drivers/gpu/drm/xe/xe_pt.c:943 xe_vm_dbg_print_entries+0x21c/0x240 [xe]
[ 475.701101] Modules linked in: xt_conntrack(E) xt_MASQUERADE(E) nf_conntrack_netlink(E) nfnetlink(E) xfrm_user(E) xfrm_algo(E) iptable_nat(E) nf_nat(E) nf_conntrack(E) nf_defrag_ipv6(E) nf_defrag_ipv4(E) libcrc32c(E) xt_addrtype(E) iptable_filter(E) br_netfilter(E) bridge(E) stp(E) llc(E) snd_seq_dummy(E) snd_hrtimer(E) snd_seq(E) snd_seq_device(E) qrtr(E) overlay(E) intel_rapl_msr(E) intel_rapl_common(E) sunrpc(E) x86_pkg_temp_thermal(E) intel_powerclamp(E) coretemp(E) kvm_intel(E) kvm(E) irqbypass(E) crc32_pclmul(E) ghash_clmulni_intel(E) snd_sof_pci_intel_cnl(E) sha512_ssse3(E) snd_sof_intel_hda_common(E) soundwire_intel(E) soundwire_generic_allocation(E) snd_sof_intel_hda_mlink(E) soundwire_cadence(E) snd_sof_intel_hda(E) snd_sof_pci(E) snd_sof_xtensa_dsp(E) snd_sof(E) snd_sof_utils(E) soundwire_bus(E) snd_soc_skl(E) snd_soc_hdac_hda(E) snd_hda_ext_core(E) snd_soc_sst_ipc(E) snd_soc_sst_dsp(E) snd_soc_acpi_intel_match(E) snd_soc_acpi(E) aesni_intel(E) snd_soc_core(E) crypto_simd(E) cryptd(E) snd_compress(E)
[ 475.701162] snd_intel_dspcfg(E) binfmt_misc(E) snd_intel_sdw_acpi(E) snd_hda_codec(E) rapl(E) snd_hda_core(E) snd_hwdep(E) intel_cstate(E) snd_pcm(E) intel_wmi_thunderbolt(E) iTCO_wdt(E) intel_pmc_bxt(E) intel_uncore(E) nls_ascii(E) snd_timer(E) iTCO_vendor_support(E) pcspkr(E) ee1004(E) wmi_bmof(E) nls_cp437(E) snd(E) watchdog(E) soundcore(E) vfat(E) fat(E) joydev(E) acpi_pad(E) intel_pmc_core(E) acpi_tad(E) button(E) intel_pch_thermal(E) evdev(E) serio_raw(E) sg(E) cfg80211(E) rfkill(E) msr(E) parport_pc(E) ppdev(E) lp(E) parport(E) loop(E) fuse(E) efi_pstore(E) dm_mod(E) configfs(E) efivarfs(E) ip_tables(E) x_tables(E) autofs4(E) hid_lenovo(E) hid_generic(E) xe(E) drm_exec(E) gpu_sched(E) drm_suballoc_helper(E) drm_ttm_helper(E) usbhid(E) hid(E) i915(E) sd_mod(E) i2c_algo_bit(E) drm_buddy(E) drm_display_helper(E) cec(E) ttm(E) nvme(E) drm_kms_helper(E) nvme_core(E) ahci(E) xhci_pci(E) t10_pi(E) libahci(E) xhci_hcd(E) e1000e(E) crc64_rocksoft(E) ptp(E) crc64(E) crc_t10dif(E) libata(E) intel_lpss_pci(E)
[ 475.701251] crct10dif_generic(E) drm(E) crct10dif_pclmul(E) crc32c_intel(E) crct10dif_common(E) intel_lpss(E) idma64(E) pps_core(E) i2c_i801(E) i2c_smbus(E) scsi_mod(E) usbcore(E) scsi_common(E) usb_common(E) fan(E) video(E) wmi(E)
[ 475.701271] CPU: 3 PID: 10 Comm: kworker/u12:0 Tainted: G W E 6.5.0-rc7pz+ #44
[ 475.701274] Hardware name: Intel Corporation CoffeeLake Client Platform/CoffeeLake S UDIMM RVP, BIOS CNLSFWR1.R00.X221.B00.2106281933 06/28/2021
[ 475.701276] Workqueue: events_unbound xe_vma_op_work_func [xe]
[ 475.701339] RIP: 0010:xe_vm_dbg_print_entries+0x21c/0x240 [xe]
[ 475.701394] Code: e0 c0 50 4c 8b 94 24 88 00 00 00 41 52 44 8b 8c 24 98 00 00 00 44 8b 84 24 9c 00 00 00 48 8b 94 24 a0 00 00 00 e8 f4 ec 01 ed <0f> 0b 44 8b 4b 10 48 83 c4 50 e9 34 fe ff ff 48 8b 17 e9 6d ff ff
[ 475.701396] RSP: 0018:ffffa89f400ef6f8 EFLAGS: 00010282
[ 475.701400] RAX: 0000000000000000 RBX: ffff8e2e86dd0800 RCX: 0000000000000000
[ 475.701402] RDX: 0000000000000002 RSI: 0000000000000027 RDI: 00000000ffffffff
[ 475.701404] RBP: ffff8e2f83378000 R08: 0000000000000000 R09: ffffffffaf0686e0
[ 475.701406] R10: ffffa89f400ef590 R11: ffffffffaf374f58 R12: 0000000000000002
[ 475.701409] R13: 0000000000000000 R14: 000000000000000c R15: ffffa89f400ef9d0
[ 475.701411] FS: 0000000000000000(0000) GS:ffff8e32ddcc0000(0000) knlGS:0000000000000000
[ 475.701413] CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033
[ 475.701415] CR2: 00007f6f35d9b000 CR3: 000000018b378006 CR4: 00000000003706e0
[ 475.701418] DR0: 0000000000000000 DR1: 0000000000000000 DR2: 0000000000000000
[ 475.701420] DR3: 0000000000000000 DR6: 00000000fffe0ff0 DR7: 0000000000000400
[ 475.701422] Call Trace:
[ 475.701424] <TASK>
[ 475.701426] ? xe_vm_dbg_print_entries+0x21c/0x240 [xe]
[ 475.701480] ? __warn+0x81/0x170
[ 475.701485] ? xe_vm_dbg_print_entries+0x21c/0x240 [xe]
[ 475.701539] ? report_bug+0x18d/0x1c0
[ 475.701545] ? handle_bug+0x3c/0x80
[ 475.701548] ? exc_invalid_op+0x13/0x60
[ 475.701551] ? asm_exc_invalid_op+0x16/0x20
[ 475.701559] ? xe_vm_dbg_print_entries+0x21c/0x240 [xe]
[ 475.701614] ? xe_vm_dbg_print_entries+0x21c/0x240 [xe]
[ 475.701676] __xe_pt_bind_vma+0x450/0xf00 [xe]
[ 475.701734] ? lock_is_held_type+0xce/0x120
[ 475.701753] ? __lock_acquire+0x416/0x2160
[ 475.701758] ? __lock_acquire+0x416/0x2160
[ 475.701765] ? lock_acquire+0xd4/0x2d0
[ 475.701769] ? lock_acquire+0xe4/0x2d0
[ 475.701775] xe_vm_bind_vma+0xc6/0x390 [xe]
[ 475.701838] xe_vm_bind+0x9a/0x3c0 [xe]
[ 475.701902] __xe_vma_op_execute+0x41e/0x860 [xe]
[ 475.701964] ? __lock_acquire+0x416/0x2160
[ 475.701978] xe_vma_op_work_func+0x127/0x3a0 [xe]
[ 475.702037] ? lock_release+0x140/0x280
[ 475.702043] process_one_work+0x26d/0x550
[ 475.702050] worker_thread+0x50/0x3a0
[ 475.702055] ? __pfx_worker_thread+0x10/0x10
[ 475.702057] kthread+0xfd/0x130
[ 475.702061] ? __pfx_kthread+0x10/0x10
[ 475.702065] ret_from_fork+0x2d/0x50
[ 475.702068] ? __pfx_kthread+0x10/0x10
[ 475.702071] ret_from_fork_asm+0x1b/0x30
[ 475.702081] </TASK>
[ 475.702083] irq event stamp: 2763851
[ 475.702084] hardirqs last enabled at (2763859): [<ffffffffadd4c53e>] __up_console_sem+0x5e/0x70
[ 475.702088] hardirqs last disabled at (2763866): [<ffffffffadd4c523>] __up_console_sem+0x43/0x70
[ 475.702091] softirqs last enabled at (2763020): [<ffffffffadcad0ff>] __irq_exit_rcu+0x7f/0xd0
[ 475.702094] softirqs last disabled at (2763013): [<ffffffffadcad0ff>] __irq_exit_rcu+0x7f/0xd0
[ 475.702097] ---[ end trace 0000000000000000 ]---
Which leads us to:
(gdb) list *(xe_vm_dbg_print_entries+0x21c)
0x3b24c is in xe_vm_dbg_print_entries (../drivers/gpu/drm/xe/xe_pt.c:943).
938 struct xe_pt *xe_pt = entry->pt;
939 u64 page_size = 1ull << xe_pt_shift(xe_pt->level);
940 u64 end;
941 u64 start;
942
943 xe_assert(xe, !entry->pt->is_compact);
944 start = entry->ofs * page_size;
945 end = start + page_size * entry->qwords;
946 vm_dbg(&xe->drm,
947 "\t%u: Update level %u at (%u + %u) [%llx...%llx) f:%x\n",
(gdb)