5.18-rc1: verify_link_capability bug [amdgpu]
Bug report 2 of 2, see Bug Report#1: #1965. The following happens with one HDMI output attached:
[ 3.311748] ------------[ cut here ]------------ [ 3.311752] WARNING: CPU: 0 PID: 610 at drivers/gpu/drm/amd/amdgpu/../display/dc/core/dc_link.c:938 verify_link_capability.isra.0+0x22e/0x290 [amdgpu] [ 3.311953] Modules linked in: acpi_cpufreq(E-) joydev(E) amdgpu(E+) stv6110x(E) intel_rapl_msr(E) snd_hda_codec_realtek(E) snd_hda_codec_generic(E) drm_ttm_helper(E) intel_rapl_common(E) ledtrig_audio(E) snd_hda_codec_hdmi(E) ttm(E) snd_hda_intel(E) iommu_v2(E) snd_intel_dspcfg(E) snd_intel_sdw_acpi(E) gpu_sched(E) iTCO_wdt(E) hid_generic(E) stv090x(E) intel_pmc_bxt(E) eeepc_wmi(E) iTCO_vendor_support(E) drm_dp_helper(E) snd_hda_codec(E) asus_wmi(E) snd_hwdep(E) cec(E) battery(E) rc_core(E) snd_hda_core(E) sparse_keymap(E) x86_pkg_temp_thermal(E) platform_profile(E) intel_powerclamp(E) video(E) saa716x_budget(E) drm_kms_helper(E) snd_pcm(E) fb_sys_fops(E) coretemp(E) saa716x_core(E) syscopyarea(E) deflate(E) snd_timer(E) rfkill(E) pcspkr(E) i2c_i801(E) snd(E) sysfillrect(E) sysimgblt(E) mei_me(E) dvb_core(E) wmi_bmof(E) efi_pstore(E) usbhid(E) mxm_wmi(E) i2c_algo_bit(E) i2c_smbus(E) soundcore(E) mc(E) lpc_ich(E) e1000e(E) mei(E) tiny_power_button(E) button(E) [ 3.311986] nls_iso8859_1(E) nls_cp437(E) vfat(E) fat(E) fuse(E) drm(E) configfs(E) ip_tables(E) x_tables(E) crct10dif_pclmul(E) crc32_pclmul(E) crc32c_intel(E) ghash_clmulni_intel(E) aesni_intel(E) crypto_simd(E) cryptd(E) xhci_pci(E) ehci_pci(E) firewire_ohci(E) xhci_hcd(E) ehci_hcd(E) firewire_core(E) crc_itu_t(E) usbcore(E) usb_common(E) wmi(E) sg(E) dm_multipath(E) dm_mod(E) scsi_dh_rdac(E) scsi_dh_emc(E) scsi_dh_alua(E) msr(E) efivarfs(E) [ 3.312016] CPU: 0 PID: 610 Comm: systemd-udevd Tainted: G W E 5.18.0-rc1-desktop-saa716x-O2-debug+ #137 [ 3.312019] Hardware name: System manufacturer System Product Name/P9X79, BIOS 4801 07/24/2014 [ 3.312022] RIP: 0010:verify_link_capability.isra.0+0x22e/0x290 [amdgpu] [ 3.312221] Code: c1 80 ba 43 05 00 00 00 74 e7 45 31 f6 48 8b 43 28 8b 00 8d 50 e0 83 e2 df 0f 84 6d ff ff ff 3d 80 00 00 00 0f 84 62 ff ff ff <0f> 0b 45 84 f6 0f 84 d7 fe ff ff eb 87 48 89 e7 e8 2d f2 00 00 83 [ 3.312226] RSP: 0018:ffffa26880967920 EFLAGS: 00010287 [ 3.312228] RAX: 0000000000000004 RBX: ffff941b68e49400 RCX: ffff941b0a7e0000 [ 3.312231] RDX: 00000000ffffffc4 RSI: 0000000000000000 RDI: ffff941b68e49400 [ 3.312233] RBP: ffffa26880967958 R08: ffffffffc13494ba R09: 0000000000000000 [ 3.312235] R10: 0000000000005368 R11: 0000000000000010 R12: 0000000000000000 [ 3.312237] R13: ffff941b980d0000 R14: 0000000000000000 R15: ffff941b98453f60 [ 3.312240] FS: 00007f0ec3850b00(0000) GS:ffff941e2fc00000(0000) knlGS:0000000000000000 [ 3.312243] CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 [ 3.312245] CR2: 0000557e8db81040 CR3: 0000000108d7e004 CR4: 00000000001706f0 [ 3.312247] Call Trace: [ 3.312250] <TASK> [ 3.312252] dc_link_detect+0x30/0x1a0 [amdgpu] [ 3.312448] amdgpu_dm_initialize_drm_device+0x596/0x821 [amdgpu] [ 3.312652] ? hdcp_create_workqueue+0x2ca/0x300 [amdgpu] [ 3.312856] amdgpu_dm_init.isra.0.cold+0x504/0x5a8 [amdgpu] [ 3.313070] ? psp_set_srm+0xa0/0xa0 [amdgpu] [ 3.313263] ? hdcp_update_display+0x220/0x220 [amdgpu] [ 3.313467] ? dev_printk_emit+0x49/0x60 [ 3.313472] dm_hw_init+0x9/0x20 [amdgpu] [ 3.313671] amdgpu_device_ip_init+0x331/0x512 [amdgpu] [ 3.313873] amdgpu_device_init.cold+0x268/0x6a5 [amdgpu] [ 3.314078] amdgpu_driver_load_kms+0x10/0x100 [amdgpu] [ 3.314240] amdgpu_pci_probe+0x1a3/0x3c0 [amdgpu] [ 3.314407] local_pci_probe+0x40/0x80 [ 3.314411] ? pci_match_device+0xd2/0x120 [ 3.314414] pci_device_probe+0xbd/0x1e0 [ 3.314418] really_probe+0x197/0x370 [ 3.314421] __driver_probe_device+0xf9/0x170 [ 3.314424] driver_probe_device+0x19/0x90 [ 3.314427] __driver_attach+0xbb/0x1c0 [ 3.314430] ? __device_attach_driver+0xd0/0xd0 [ 3.314432] ? __device_attach_driver+0xd0/0xd0 [ 3.314435] bus_for_each_dev+0x73/0xb0 [ 3.314437] bus_add_driver+0x143/0x1e0 [ 3.314440] driver_register+0x8a/0xe0 [ 3.314442] ? 0xffffffffc1601000 [ 3.314444] do_one_initcall+0x47/0x190 [ 3.314448] ? kmem_cache_alloc+0x30/0x3f0 [ 3.314451] do_init_module+0x42/0x210 [ 3.314455] __do_sys_finit_module+0xb4/0x120 [ 3.314459] do_syscall_64+0x5c/0xc0 [ 3.314463] ? do_syscall_64+0x69/0xc0 [ 3.314466] ? do_syscall_64+0x69/0xc0 [ 3.314468] entry_SYSCALL_64_after_hwframe+0x44/0xae [ 3.314471] RIP: 0033:0x7f0ec37264dd [ 3.314474] Code: 5b 41 5c c3 66 0f 1f 84 00 00 00 00 00 f3 0f 1e fa 48 89 f8 48 89 f7 48 89 d6 48 89 ca 4d 89 c2 4d 89 c8 4c 8b 4c 24 08 0f 05 <48> 3d 01 f0 ff ff 73 01 c3 48 8b 0d 1b 29 0f 00 f7 d8 64 89 01 48 [ 3.314477] RSP: 002b:00007ffc258ab178 EFLAGS: 00000246 ORIG_RAX: 0000000000000139 [ 3.314481] RAX: ffffffffffffffda RBX: 0000000000000000 RCX: 00007f0ec37264dd [ 3.314483] RDX: 0000000000000000 RSI: 0000557e8dd598e0 RDI: 000000000000001b [ 3.314485] RBP: 0000557e8dd5c250 R08: 0000000000000000 R09: 0000000000000002 [ 3.314487] R10: 000000000000001b R11: 0000000000000246 R12: 0000557e8dd598e0 [ 3.314489] R13: 0000557e8dd5b920 R14: 0000000000000000 R15: 0000557e8dd5c250 [ 3.314492] </TASK> [ 3.314494] ---[ end trace 0000000000000000 ]---
The offending line is the ASSERT(0) down below:
`static void verify_link_capability_destructive(struct dc_link *link, struct dc_sink *sink, enum dc_detect_reason reason) { bool should_prepare_phy_clocks = should_prepare_phy_clocks_for_link_verification(link->dc, reason);
if (should_prepare_phy_clocks)
prepare_phy_clocks_for_destructive_link_verification(link->dc);
if (dc_is_dp_signal(link->local_sink->sink_signal)) {
struct dc_link_settings known_limit_link_setting =
dp_get_max_link_cap(link);
set_all_streams_dpms_off_for_link(link);
dp_verify_link_cap_with_retries(
link, &known_limit_link_setting,
LINK_TRAINING_MAX_VERIFY_RETRY);
} else {
ASSERT(0);
}
if (should_prepare_phy_clocks)
restore_phy_clocks_for_destructive_link_verification(link->dc);
}`
lspci:
03:00.0 VGA compatible controller: Advanced Micro Devices, Inc. [AMD/ATI] Navi 10 [Radeon RX 5600 OEM/5600 XT / 5700/5700 XT] (rev c1) (prog-if 00 [VGA controller]) Subsystem: Micro-Star International Co., Ltd. [MSI] Device 3816 Flags: bus master, fast devsel, latency 0, IRQ 56, IOMMU group 23 Memory at c0000000 (64-bit, prefetchable) [size=256M] Memory at d0000000 (64-bit, prefetchable) [size=2M] I/O ports at e000 [size=256] Memory at fb800000 (32-bit, non-prefetchable) [size=512K] Expansion ROM at fb880000 [disabled] [size=128K] Capabilities: [48] Vendor Specific Information: Len=08 <?> Capabilities: [50] Power Management version 3 Capabilities: [64] Express Legacy Endpoint, MSI 00 Capabilities: [a0] MSI: Enable+ Count=1/1 Maskable- 64bit+ Capabilities: [100] Vendor Specific Information: ID=0001 Rev=1 Len=010 <?> Capabilities: [150] Advanced Error Reporting Capabilities: [200] Physical Resizable BAR Capabilities: [240] Power Budgeting <?> Capabilities: [270] Secondary PCI Express Capabilities: [2a0] Access Control Services Capabilities: [2b0] Address Translation Service (ATS) Capabilities: [2c0] Page Request Interface (PRI) Capabilities: [2d0] Process Address Space ID (PASID) Capabilities: [320] Latency Tolerance Reporting Capabilities: [400] Data Link Feature <?> Capabilities: [410] Physical Layer 16.0 GT/s <?> Capabilities: [440] Lane Margining at the Receiver <?> Kernel driver in use: amdgpu Kernel modules: amdgpu
dmesg.txt