Skip to content

radv: add support for a trap handler to get the faulty shader/instruction on GFX8

Samuel Pitoiset requested to merge hakzsam/mesa:radv_trap_handler_gfx8 into master

Needs !6387 (merged) and !6386 (merged)

When a shader exception happens (memory violation, divide by zero etc), the hardware can jump to a trap handler if configured by the driver. This allows to get some very useful debug information like the PC of the faulty instruction.

For memory violations, this isn't yet really accurate because the violation is reported when the addr is processed (more waitcnt states should help). Other shader exceptions aren't yet enabled because I want to validate them first.

The trap handler shader code is built using ACO IR for better maintenance (writing the disassembly by hand is more painful).

Only GFX8 is currently supported but GFX6-GFX7 should also work (just untested). Note that GFX9+ needs a different implementation.

The trap handler can be enabled with export RADV_TRAP_HANDLER=1.

See below for a real testcase (ie. #3251 (closed)). The hardware reported a memory violation \o/ (yeah, there is a VM fault with DMC5 and vkd3d-proton). As you can see, the faulty instruction is inaccurate (the correct one is the tbuffer_load_format_xy above).

Hardware registers:
        SQ_HW_REG_STATUS <- SCC = 0
                            SPI_PRIO = 0
                            USER_PRIO = 0
                            PRIV = 1
                            TRAP_EN = 1
                            TTRACE_EN = 0
                            EXPORT_RDY = 1
                            EXECZ = 0
                            VCCZ = 1
                            IN_TG = 0
                            IN_BARRIER = 0
                            HALT = 0
                            TRAP = 0
                            TTRACE_CU_EN = 0
                            VALID = 1
                            ECC_ERR = 0
                            SKIP_EXPORT = 0
                            PERF_EN = 0
                            COND_DBG_USER = 0
                            COND_DBG_SYS = 0
                            ALLOW_REPLAY = 0
                            INST_ATC = 0
                            MUST_EXPORT = 0
        SQ_HW_REG_TRAP_STS <- EXCP = MEMORY_VIOLATION
                              SAVE_CTX_vi = 0
                              EXCP_CYCLE = 0
                              DP_RATE = 0
        SQ_HW_REG_HW_ID <- WAVE_ID = 0
                           SIMD_ID = 3
                           PIPE_ID = 0
                           CU_ID = 3
                           SH_ID = 0
                           SE_ID = 0
                           TG_ID = 0
                           VM_ID = 0
                           QUEUE_ID = 0
                           STATE_ID = 0
                           ME_ID = 0
        SQ_HW_REG_IB_STS <- VM_CNT = 0
                            EXP_CNT = 0
                            LGKM_CNT = 2
                            VALU_CNT = 0
                            FIRST_REPLAY_vi = 0
                            RCNT_vi = 0


PC=0x1160d0c80, trapID=0, HT=0, PC_rewind=0
Faulty shader found VA=[0x1160d0900-0x1160d0cd0], instr_offset=896
	s_add_u32 s0, s4, 14                                        ; 80008e04 [PC=0x1160d0900, off=0, size=4]
	s_add_u32 s1, s4, 2                                         ; 80018204 [PC=0x1160d0904, off=4, size=4]
	s_mul_i32 s1, 16, s1                                        ; 92010190 [PC=0x1160d0908, off=8, size=4]
	s_add_i32 s8, s1, s2                                        ; 81080201 [PC=0x1160d090c, off=12, size=4]
	s_mov_b32 s9, 0                                             ; be890080 [PC=0x1160d0910, off=16, size=4]
	s_load_dwordx4 s[8:11], s[8:9], 0x0                         ; c00a0204 00000000 [PC=0x1160d0914, off=20, size=8]
	s_waitcnt lgkmcnt(0)                                        ; bf8cc07f [PC=0x1160d091c, off=28, size=4]
	s_buffer_load_dword s1, s[8:11], 0x0                        ; c0220044 00000000 [PC=0x1160d0920, off=32, size=8]
	s_mov_b32 s8, s3                                            ; be880003 [PC=0x1160d0928, off=40, size=4]
	s_mov_b32 s9, 0                                             ; be890080 [PC=0x1160d092c, off=44, size=4]
	s_mul_i32 s0, 16, s0                                        ; 92000090 [PC=0x1160d0930, off=48, size=4]
	s_add_i32 s0, 0, s0                                         ; 81000080 [PC=0x1160d0934, off=52, size=4]
	s_load_dwordx4 s[8:11], s[8:9], s0                          ; c0080204 00000000 [PC=0x1160d0938, off=56, size=8]
	s_waitcnt lgkmcnt(0)                                        ; bf8cc07f [PC=0x1160d0940, off=64, size=4]
	s_mul_i32 s3, s1, 28                                        ; 92039c01 [PC=0x1160d0944, off=68, size=4]
	v_mov_b32_e32 v1, s3                                        ; 7e020203 [PC=0x1160d0948, off=72, size=4]
	s_add_u32 s7, 1, s3                                         ; 80070381 [PC=0x1160d094c, off=76, size=4]
	v_mov_b32_e32 v2, s7                                        ; 7e040207 [PC=0x1160d0950, off=80, size=4]
	s_add_u32 s7, 2, s3                                         ; 80070382 [PC=0x1160d0954, off=84, size=4]
	v_mov_b32_e32 v3, s7                                        ; 7e060207 [PC=0x1160d0958, off=88, size=4]
	s_add_u32 s7, 3, s3                                         ; 80070383 [PC=0x1160d095c, off=92, size=4]
	v_mov_b32_e32 v4, s7                                        ; 7e080207 [PC=0x1160d0960, off=96, size=4]
	s_add_u32 s7, s3, 4                                         ; 80078403 [PC=0x1160d0964, off=100, size=4]
	v_mov_b32_e32 v5, s7                                        ; 7e0a0207 [PC=0x1160d0968, off=104, size=4]
	s_add_u32 s7, 5, s3                                         ; 80070385 [PC=0x1160d096c, off=108, size=4]
	v_mov_b32_e32 v6, s7                                        ; 7e0c0207 [PC=0x1160d0970, off=112, size=4]
	s_add_u32 s7, 6, s3                                         ; 80070386 [PC=0x1160d0974, off=116, size=4]
	v_mov_b32_e32 v7, s7                                        ; 7e0e0207 [PC=0x1160d0978, off=120, size=4]
	s_add_u32 s7, 7, s3                                         ; 80070387 [PC=0x1160d097c, off=124, size=4]
	v_mov_b32_e32 v8, s7                                        ; 7e100207 [PC=0x1160d0980, off=128, size=4]
	s_add_u32 s7, s3, 8                                         ; 80078803 [PC=0x1160d0984, off=132, size=4]
	v_mov_b32_e32 v9, s7                                        ; 7e120207 [PC=0x1160d0988, off=136, size=4]
	s_add_u32 s7, 9, s3                                         ; 80070389 [PC=0x1160d098c, off=140, size=4]
	v_mov_b32_e32 v10, s7                                       ; 7e140207 [PC=0x1160d0990, off=144, size=4]
	s_add_u32 s7, 10, s3                                        ; 8007038a [PC=0x1160d0994, off=148, size=4]
	v_mov_b32_e32 v11, s7                                       ; 7e160207 [PC=0x1160d0998, off=152, size=4]
	s_add_u32 s3, 11, s3                                        ; 8003038b [PC=0x1160d099c, off=156, size=4]
	v_mov_b32_e32 v12, s3                                       ; 7e180203 [PC=0x1160d09a0, off=160, size=4]
	buffer_load_format_x v1, v1, s[8:11], 0 idxen               ; e0002000 80020101 [PC=0x1160d09a4, off=164, size=8]
	buffer_load_format_x v2, v2, s[8:11], 0 idxen               ; e0002000 80020202 [PC=0x1160d09ac, off=172, size=8]
	buffer_load_format_x v3, v3, s[8:11], 0 idxen               ; e0002000 80020303 [PC=0x1160d09b4, off=180, size=8]
	buffer_load_format_x v4, v4, s[8:11], 0 idxen               ; e0002000 80020404 [PC=0x1160d09bc, off=188, size=8]
	buffer_load_format_x v5, v5, s[8:11], 0 idxen               ; e0002000 80020505 [PC=0x1160d09c4, off=196, size=8]
	buffer_load_format_x v6, v6, s[8:11], 0 idxen               ; e0002000 80020606 [PC=0x1160d09cc, off=204, size=8]
	buffer_load_format_x v7, v7, s[8:11], 0 idxen               ; e0002000 80020707 [PC=0x1160d09d4, off=212, size=8]
	buffer_load_format_x v8, v8, s[8:11], 0 idxen               ; e0002000 80020808 [PC=0x1160d09dc, off=220, size=8]
	buffer_load_format_x v9, v9, s[8:11], 0 idxen               ; e0002000 80020909 [PC=0x1160d09e4, off=228, size=8]
	buffer_load_format_x v10, v10, s[8:11], 0 idxen             ; e0002000 80020a0a [PC=0x1160d09ec, off=236, size=8]
	buffer_load_format_x v11, v11, s[8:11], 0 idxen             ; e0002000 80020b0b [PC=0x1160d09f4, off=244, size=8]
	buffer_load_format_x v12, v12, s[8:11], 0 idxen             ; e0002000 80020c0c [PC=0x1160d09fc, off=252, size=8]
	s_mov_b32 s0, s5                                            ; be800005 [PC=0x1160d0a04, off=260, size=4]
	s_mov_b32 s1, 0                                             ; be810080 [PC=0x1160d0a08, off=264, size=4]
	s_load_dwordx4 s[8:11], s[0:1], 0x0                         ; c00a0200 00000000 [PC=0x1160d0a0c, off=268, size=8]
	v_add_u32_e32 v0, vcc, s6, v0                               ; 32000006 [PC=0x1160d0a14, off=276, size=4]
	s_waitcnt lgkmcnt(0)                                        ; bf8cc07f [PC=0x1160d0a18, off=280, size=4]
	buffer_load_dwordx3 v[13:15], v0, s[8:11], 0 idxen          ; e0582000 80020d00 [PC=0x1160d0a1c, off=284, size=8]
	s_add_u32 s3, s4, 1                                         ; 80038104 [PC=0x1160d0a24, off=292, size=4]
	s_mul_i32 s3, 16, s3                                        ; 92030390 [PC=0x1160d0a28, off=296, size=4]
	s_add_i32 s6, s3, s2                                        ; 81060203 [PC=0x1160d0a2c, off=300, size=4]
	s_mov_b32 s7, 0                                             ; be870080 [PC=0x1160d0a30, off=304, size=4]
	s_load_dwordx4 s[8:11], s[6:7], 0x0                         ; c00a0203 00000000 [PC=0x1160d0a34, off=308, size=8]
	s_waitcnt vmcnt(0)                                          ; bf8c3f70 [PC=0x1160d0a3c, off=316, size=4]
	v_mul_f32_e32 v16, v2, v14                                  ; 0a201d02 [PC=0x1160d0a40, off=320, size=4]
	v_mac_f32_e32 v16, v1, v13                                  ; 2c201b01 [PC=0x1160d0a44, off=324, size=4]
	s_waitcnt lgkmcnt(0)                                        ; bf8cc07f [PC=0x1160d0a48, off=328, size=4]
	s_buffer_load_dwordx2 s[6:7], s[8:11], 0x0                  ; c0260184 00000000 [PC=0x1160d0a4c, off=332, size=8]
	v_mac_f32_e32 v16, v3, v15                                  ; 2c201f03 [PC=0x1160d0a54, off=340, size=4]
	v_add_f32_e32 v4, v4, v16                                   ; 02082104 [PC=0x1160d0a58, off=344, size=4]
	v_mul_f32_e32 v16, v6, v14                                  ; 0a201d06 [PC=0x1160d0a5c, off=348, size=4]
	v_mac_f32_e32 v16, v5, v13                                  ; 2c201b05 [PC=0x1160d0a60, off=352, size=4]
	v_mac_f32_e32 v16, v7, v15                                  ; 2c201f07 [PC=0x1160d0a64, off=356, size=4]
	v_add_f32_e32 v8, v8, v16                                   ; 02102108 [PC=0x1160d0a68, off=360, size=4]
	v_mul_f32_e32 v14, v10, v14                                 ; 0a1c1d0a [PC=0x1160d0a6c, off=364, size=4]
	v_mac_f32_e32 v14, v9, v13                                  ; 2c1c1b09 [PC=0x1160d0a70, off=368, size=4]
	v_mac_f32_e32 v14, v11, v15                                 ; 2c1c1f0b [PC=0x1160d0a74, off=372, size=4]
	v_add_f32_e32 v12, v12, v14                                 ; 02181d0c [PC=0x1160d0a78, off=376, size=4]
	s_waitcnt lgkmcnt(0)                                        ; bf8cc07f [PC=0x1160d0a7c, off=380, size=4]
	v_cmp_neq_f32_e64 s[12:13], s7, 0                           ; d04d000c 00010007 [PC=0x1160d0a80, off=384, size=8]
	v_cmp_neq_f32_e64 s[6:7], s6, 0                             ; d04d0006 00010006 [PC=0x1160d0a88, off=392, size=8]
	s_and_b64 s[6:7], s[6:7], s[12:13]                          ; 86860c06 [PC=0x1160d0a90, off=400, size=4]
	s_and_b64 s[6:7], s[6:7], exec                              ; 86867e06 [PC=0x1160d0a94, off=404, size=4]
	s_cbranch_scc0 BB6                                          ; bf84005e [PC=0x1160d0a98, off=408, size=4]
BB1: [PC=0x1160d0a9c, off=412, size=4]
	s_load_dwordx4 s[12:15], s[0:1], 0x10                       ; c00a0300 00000010 [PC=0x1160d0aa0, off=416, size=8]
	s_waitcnt lgkmcnt(0)                                        ; bf8cc07f [PC=0x1160d0aa8, off=424, size=4]
	tbuffer_load_format_xyz v[13:15], v0, s[12:15],  dfmt:10, nfmt:1, 0 idxen ; e8d12000 80030d00 [PC=0x1160d0aac, off=428, size=8]
	s_mul_i32 s3, 16, s4                                        ; 92030490 [PC=0x1160d0ab4, off=436, size=4]
	s_add_i32 s6, s3, s2                                        ; 81060203 [PC=0x1160d0ab8, off=440, size=4]
	s_mov_b32 s7, 0                                             ; be870080 [PC=0x1160d0abc, off=444, size=4]
	s_load_dwordx4 s[12:15], s[6:7], 0x0                        ; c00a0303 00000000 [PC=0x1160d0ac0, off=448, size=8]
	s_waitcnt vmcnt(0)                                          ; bf8c3f70 [PC=0x1160d0ac8, off=456, size=4]
	v_mul_f32_e32 v2, v2, v14                                   ; 0a041d02 [PC=0x1160d0acc, off=460, size=4]
	v_mac_f32_e32 v2, v1, v13                                   ; 2c041b01 [PC=0x1160d0ad0, off=464, size=4]
	s_waitcnt lgkmcnt(0)                                        ; bf8cc07f [PC=0x1160d0ad4, off=468, size=4]
	s_buffer_load_dword s3, s[12:15], 0xb4                      ; c02200c6 000000b4 [PC=0x1160d0ad8, off=472, size=8]
	v_mac_f32_e32 v2, v3, v15                                   ; 2c041f03 [PC=0x1160d0ae0, off=480, size=4]
	v_mul_f32_e32 v1, v6, v14                                   ; 0a021d06 [PC=0x1160d0ae4, off=484, size=4]
	v_mac_f32_e32 v1, v5, v13                                   ; 2c021b05 [PC=0x1160d0ae8, off=488, size=4]
	v_mac_f32_e32 v1, v7, v15                                   ; 2c021f07 [PC=0x1160d0aec, off=492, size=4]
	v_mul_f32_e32 v3, v10, v14                                  ; 0a061d0a [PC=0x1160d0af0, off=496, size=4]
	v_mac_f32_e32 v3, v9, v13                                   ; 2c061b09 [PC=0x1160d0af4, off=500, size=4]
	v_mac_f32_e32 v3, v11, v15                                  ; 2c061f0b [PC=0x1160d0af8, off=504, size=4]
	s_waitcnt lgkmcnt(0)                                        ; bf8cc07f [PC=0x1160d0afc, off=508, size=4]
	v_cmp_lt_f32_e64 vcc, 0.5, |s3|                             ; d041026a 000006f0 [PC=0x1160d0b00, off=512, size=8]
	s_and_b64 s[6:7], vcc, exec                                 ; 86867e6a [PC=0x1160d0b08, off=520, size=4]
	s_cbranch_scc0 BB3                                          ; bf840025 [PC=0x1160d0b0c, off=524, size=4]
BB2: [PC=0x1160d0b10, off=528, size=4]
	s_buffer_load_dword s3, s[12:15], 0x7c                      ; c02200c6 0000007c [PC=0x1160d0b14, off=532, size=8]
	s_buffer_load_dword s5, s[12:15], 0x8c                      ; c0220146 0000008c [PC=0x1160d0b1c, off=540, size=8]
	s_buffer_load_dword s6, s[12:15], 0x9c                      ; c0220186 0000009c [PC=0x1160d0b24, off=548, size=8]
	s_buffer_load_dword s7, s[12:15], 0x178                     ; c02201c6 00000178 [PC=0x1160d0b2c, off=556, size=8]
	s_buffer_load_dwordx2 s[8:9], s[8:11], 0x0                  ; c0260204 00000000 [PC=0x1160d0b34, off=564, size=8]
	s_waitcnt lgkmcnt(0)                                        ; bf8cc07f [PC=0x1160d0b3c, off=572, size=4]
	v_sub_f32_e32 v5, s3, v4                                    ; 040a0803 [PC=0x1160d0b40, off=576, size=4]
	v_sub_f32_e32 v6, s5, v8                                    ; 040c1005 [PC=0x1160d0b44, off=580, size=4]
	v_mul_f32_e32 v7, v6, v6                                    ; 0a0e0d06 [PC=0x1160d0b48, off=584, size=4]
	v_mac_f32_e32 v7, v5, v5                                    ; 2c0e0b05 [PC=0x1160d0b4c, off=588, size=4]
	v_sub_f32_e32 v9, s6, v12                                   ; 04121806 [PC=0x1160d0b50, off=592, size=4]
	v_mac_f32_e32 v7, v9, v9                                    ; 2c0e1309 [PC=0x1160d0b54, off=596, size=4]
	v_rsq_f32_e32 v7, v7                                        ; 7e0e4907 [PC=0x1160d0b58, off=600, size=4]
	v_mul_f32_e32 v5, v7, v5                                    ; 0a0a0b07 [PC=0x1160d0b5c, off=604, size=4]
	v_mul_f32_e32 v6, v7, v6                                    ; 0a0c0d07 [PC=0x1160d0b60, off=608, size=4]
	v_mul_f32_e32 v7, v7, v9                                    ; 0a0e1307 [PC=0x1160d0b64, off=612, size=4]
	v_mul_f32_e32 v1, v6, v1                                    ; 0a020306 [PC=0x1160d0b68, off=616, size=4]
	v_mad_f32 v1, v5, -v2, -v1                                  ; d1c10001 c4060505 [PC=0x1160d0b6c, off=620, size=8]
	v_mad_f32 v1, v7, -v3, v1 clamp                             ; d1c18001 44060707 [PC=0x1160d0b74, off=628, size=8]
	v_sub_f32_e32 v1, 1.0, v1                                   ; 040202f2 [PC=0x1160d0b7c, off=636, size=4]
	v_mul_f32_e32 v1, s7, v1                                    ; 0a020207 [PC=0x1160d0b80, off=640, size=4]
	v_mul_f32_e32 v1, s9, v1                                    ; 0a020209 [PC=0x1160d0b84, off=644, size=4]
	v_add_f32_e32 v1, s8, v1                                    ; 02020208 [PC=0x1160d0b88, off=648, size=4]
	v_mad_f32 v7, v7, -v1, v12                                  ; d1c10007 44320307 [PC=0x1160d0b8c, off=652, size=8]
	v_mad_f32 v2, v5, -v1, v4                                   ; d1c10002 44120305 [PC=0x1160d0b94, off=660, size=8]
	v_mad_f32 v1, v6, -v1, v8                                   ; d1c10001 44220306 [PC=0x1160d0b9c, off=668, size=8]
	s_branch BB4                                                ; bf82001a [PC=0x1160d0ba4, off=676, size=4]
BB3: [PC=0x1160d0ba8, off=680, size=4]
	s_buffer_load_dword s3, s[12:15], 0x78                      ; c02200c6 00000078 [PC=0x1160d0bac, off=684, size=8]
	s_buffer_load_dword s5, s[12:15], 0x88                      ; c0220146 00000088 [PC=0x1160d0bb4, off=692, size=8]
	s_buffer_load_dword s6, s[12:15], 0x98                      ; c0220186 00000098 [PC=0x1160d0bbc, off=700, size=8]
	s_buffer_load_dword s7, s[12:15], 0x178                     ; c02201c6 00000178 [PC=0x1160d0bc4, off=708, size=8]
	s_buffer_load_dwordx2 s[8:9], s[8:11], 0x0                  ; c0260204 00000000 [PC=0x1160d0bcc, off=716, size=8]
	s_waitcnt lgkmcnt(0)                                        ; bf8cc07f [PC=0x1160d0bd4, off=724, size=4]
	v_mul_f32_e32 v1, s5, v1                                    ; 0a020205 [PC=0x1160d0bd8, off=728, size=4]
	v_mad_f32 v1, s3, -v2, -v1                                  ; d1c10001 c4060403 [PC=0x1160d0bdc, off=732, size=8]
	v_mad_f32 v1, s6, -v3, v1 clamp                             ; d1c18001 44060606 [PC=0x1160d0be4, off=740, size=8]
	v_sub_f32_e32 v1, 1.0, v1                                   ; 040202f2 [PC=0x1160d0bec, off=748, size=4]
	v_mul_f32_e32 v1, s7, v1                                    ; 0a020207 [PC=0x1160d0bf0, off=752, size=4]
	v_mul_f32_e32 v1, s9, v1                                    ; 0a020209 [PC=0x1160d0bf4, off=756, size=4]
	v_add_f32_e32 v1, s8, v1                                    ; 02020208 [PC=0x1160d0bf8, off=760, size=4]
	v_mad_f32 v7, s6, -v1, v12                                  ; d1c10007 44320206 [PC=0x1160d0bfc, off=764, size=8]
	v_mad_f32 v2, s3, -v1, v4                                   ; d1c10002 44120203 [PC=0x1160d0c04, off=772, size=8]
	v_mad_f32 v1, s5, -v1, v8                                   ; d1c10001 44220205 [PC=0x1160d0c0c, off=780, size=8]
BB4: [PC=0x1160d0c14, off=788, size=4]
	v_mov_b32_e32 v4, v2                                        ; 7e080302 [PC=0x1160d0c18, off=792, size=4]
	v_mov_b32_e32 v8, v1                                        ; 7e100301 [PC=0x1160d0c1c, off=796, size=4]
	v_mov_b32_e32 v12, v7                                       ; 7e180307 [PC=0x1160d0c20, off=800, size=4]
BB6: [PC=0x1160d0c24, off=804, size=4]
	s_load_dwordx4 s[8:11], s[0:1], 0x20                        ; c00a0200 00000020 [PC=0x1160d0c28, off=808, size=8]
	s_waitcnt lgkmcnt(0)                                        ; bf8cc07f [PC=0x1160d0c30, off=816, size=4]
	tbuffer_load_format_xy v[0:1], v0, s[8:11],  dfmt:5, nfmt:7, 0 idxen ; eba8a000 80020000 [PC=0x1160d0c34, off=820, size=8]
	s_mul_i32 s0, 16, s4                                        ; 92000490 [PC=0x1160d0c3c, off=828, size=4]
	s_add_i32 s0, s0, s2                                        ; 81000200 [PC=0x1160d0c40, off=832, size=4]
	s_mov_b32 s1, 0                                             ; be810080 [PC=0x1160d0c44, off=836, size=4]
	s_load_dwordx4 s[0:3], s[0:1], 0x0                          ; c00a0000 00000000 [PC=0x1160d0c48, off=840, size=8]
	s_waitcnt lgkmcnt(0)                                        ; bf8cc07f [PC=0x1160d0c50, off=848, size=4]
	s_buffer_load_dwordx4 s[4:7], s[0:3], 0x10                  ; c02a0100 00000010 [PC=0x1160d0c54, off=852, size=8]
	s_buffer_load_dwordx4 s[8:11], s[0:3], 0x0                  ; c02a0200 00000000 [PC=0x1160d0c5c, off=860, size=8]
	s_buffer_load_dwordx4 s[12:15], s[0:3], 0x20                ; c02a0300 00000020 [PC=0x1160d0c64, off=868, size=8]
	s_buffer_load_dwordx4 s[0:3], s[0:3], 0x30                  ; c02a0000 00000030 [PC=0x1160d0c6c, off=876, size=8]
	s_waitcnt lgkmcnt(0)                                        ; bf8cc07f [PC=0x1160d0c74, off=884, size=4]
	v_mul_f32_e32 v2, s4, v8                                    ; 0a041004 [PC=0x1160d0c78, off=888, size=4]
	v_mul_f32_e32 v3, s5, v8                                    ; 0a061005 [PC=0x1160d0c7c, off=892, size=4]

!!! Faulty instruction below !!!
	v_mul_f32_e32 v5, s6, v8                                    ; 0a0a1006 [PC=0x1160d0c80, off=896, size=4]

	v_mul_f32_e32 v6, s7, v8                                    ; 0a0c1007 [PC=0x1160d0c84, off=900, size=4]
	v_mac_f32_e32 v2, s8, v4                                    ; 2c040808 [PC=0x1160d0c88, off=904, size=4]
	v_mac_f32_e32 v3, s9, v4                                    ; 2c060809 [PC=0x1160d0c8c, off=908, size=4]
	v_mac_f32_e32 v5, s10, v4                                   ; 2c0a080a [PC=0x1160d0c90, off=912, size=4]
	v_mac_f32_e32 v6, s11, v4                                   ; 2c0c080b [PC=0x1160d0c94, off=916, size=4]
	v_mac_f32_e32 v2, s12, v12                                  ; 2c04180c [PC=0x1160d0c98, off=920, size=4]
	v_mac_f32_e32 v3, s13, v12                                  ; 2c06180d [PC=0x1160d0c9c, off=924, size=4]
	v_mac_f32_e32 v5, s14, v12                                  ; 2c0a180e [PC=0x1160d0ca0, off=928, size=4]
	v_mac_f32_e32 v6, s15, v12                                  ; 2c0c180f [PC=0x1160d0ca4, off=932, size=4]
	v_add_f32_e32 v2, s0, v2                                    ; 02040400 [PC=0x1160d0ca8, off=936, size=4]
	v_add_f32_e32 v3, s1, v3                                    ; 02060601 [PC=0x1160d0cac, off=940, size=4]
	v_add_f32_e32 v4, s2, v5                                    ; 02080a02 [PC=0x1160d0cb0, off=944, size=4]
	v_add_f32_e32 v5, s3, v6                                    ; 020a0c03 [PC=0x1160d0cb4, off=948, size=4]
	exp pos0 v2, v3, v4, v5 done                                ; c40008cf 05040302 [PC=0x1160d0cb8, off=952, size=8]
	s_waitcnt vmcnt(0)                                          ; bf8c3f70 [PC=0x1160d0cc0, off=960, size=4]
	exp param0 v0, v1, off, off                                 ; c4000203 80800100 [PC=0x1160d0cc4, off=964, size=8]
	s_endpgm                                                    ; bf810000 [PC=0x1160d0ccc, off=972, size=4]
Edited by Samuel Pitoiset

Merge request reports