r300: crash when compiling some GSK shaders
Example affected shader:138.shader_test, crash can be reproduced with radeon drm-shim.
Regression from a782809f
a782809f81dc32079691b3a280580dbf7b800dba is the first bad commit
commit a782809f81dc32079691b3a280580dbf7b800dba
Author: Faith Ekstrand <faith.ekstrand@collabora.com>
Date: Mon Mar 18 18:12:41 2024 -0500
nir/builder: Correctly handle decl_reg or undef as the first instruction
These are both handled by inserting them directly at the top of the
nir_function_impl. However, if the cursor is already at the top, it
never gets updated so we end up inserting other stuff after the newly
inserted undef or decl_reg. It's an odd edge case to be sure but I hit
it with my new NIR CF pass for NAK.
Fixes: 1be4c61c957d ("nir/builder: Add a helper for creating undefs")
Reviewed-by: Alyssa Rosenzweig <alyssa@rosenzweig.io>
Reviewed-by: Daniel Schürmann <daniel@schuermann.dev>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/28300>
src/compiler/nir/nir_builder.c | 16 ++++++++++++++++
src/compiler/nir/nir_builder.h | 7 +++----
2 files changed, 19 insertions(+), 4 deletions(-)
Crash happens late in the backend during nir_lower_vec_to_regs
:
Thread 1 "run" received signal SIGSEGV, Segmentation fault.
nir_instr_prev (instr=0x1126298) at ../src/compiler/glsl/list.h:203
203 return n->prev == NULL;
Missing separate debuginfos, use: dnf debuginfo-install glibc-2.38-16.fc39.x86_64 libX11-xcb-1.8.7-1.fc39.x86_64 libXau-1.0.11-3.fc39.x86_64 libdrm-2.4.120-1.fc39.x86_64 libedit-3.1-48.20230828cvs.fc39.x86_64 libepoxy-1.5.10-4.fc39.x86_64 libffi-3.4.4-4.fc39.x86_64 libgomp-13.2.1-7.fc39.x86_64 libstdc++-13.2.1-7.fc39.x86_64 libwayland-client-1.22.0-2.fc39.x86_64 libwayland-server-1.22.0-2.fc39.x86_64 libxcb-1.13.1-12.fc39.x86_64 libxshmfence-1.3-13.fc39.x86_64 llvm-libs-17.0.6-3.fc39.x86_64 ncurses-libs-6.4-7.20230520.fc39.1.x86_64
(gdb) bt
#0 nir_instr_prev (instr=0x1126298) at ../src/compiler/glsl/list.h:203
#1 reduce_cursor (cursor=...) at ../src/compiler/nir/nir.c:997
#2 0x00007ffff6d05e45 in nir_cursors_equal (a=..., b=...) at ../src/compiler/nir/nir.c:1027
#3 0x00007ffff6d0ac36 in nir_builder_instr_insert_at_top (build=build@entry=0x7fffffffb190, instr=instr@entry=0xffbec8) at ../src/compiler/nir/nir_builder.c:387
#4 0x00007ffff704084f in nir_decl_reg (num_array_elems=0, bit_size=32, num_components=4, b=0x7fffffffb190) at ../src/compiler/nir/nir_builder.h:1834
#5 lower (b=b@entry=0x7fffffffb190, instr=instr@entry=0x1126dd8, data_=data_@entry=0x7fffffffb180) at ../src/compiler/nir/nir_lower_vec_to_regs.c:216
#6 0x00007ffff7041598 in lower (data_=0x7fffffffb180, instr=0x1126dd8, b=0x7fffffffb190) at ../src/compiler/nir/nir_lower_vec_to_regs.c:198
#7 nir_function_instructions_pass (pass=<optimized out>, preserved=3, cb_data=0x7fffffffb180, impl=0xfe1760) at ../src/compiler/nir/nir_builder.h:103
#8 nir_shader_instructions_pass (shader=0xfe1760, pass=<optimized out>, preserved=3, cb_data=0x7fffffffb180) at ../src/compiler/nir/nir_builder.h:135
#9 nir_lower_vec_to_regs (shader=shader@entry=0x4785c0, cb=cb@entry=0x0, _data=_data@entry=0x0) at ../src/compiler/nir/nir_lower_vec_to_regs.c:259
#10 0x00007ffff6ffb50b in nir_to_rc_options (s=<optimized out>, screen=0x475bb0, options=0x7ffff7501028 <hwtcl_r500_options>) at ../src/gallium/drivers/r300/compiler/nir_to_rc.c:2458
#11 0x00007ffff6fe744e in r300_create_vs_state (pipe=0x728980, shader=0x7fffffffb550) at ../src/gallium/drivers/r300/r300_state.c:1962
#12 0x00007ffff694dec9 in st_create_common_variant (st=st@entry=0x7ef1f0, prog=prog@entry=0xf79e50, key=key@entry=0x7fffffffb830) at ../src/mesa/state_tracker/st_program.c:781
#13 0x00007ffff695109b in st_get_common_variant (st=st@entry=0x7ef1f0, prog=prog@entry=0xf79e50, key=key@entry=0x7fffffffb830) at ../src/mesa/state_tracker/st_program.c:834
#14 0x00007ffff6951601 in st_precompile_shader_variant (prog=0xf79e50, st=0x7ef1f0) at ../src/mesa/state_tracker/st_program.c:1320
#15 st_finalize_program (st=0x7ef1f0, prog=0xf79e50) at ../src/mesa/state_tracker/st_program.c:1421
#16 0x00007ffff6bdc39f in st_link_glsl_to_nir (shader_program=0x4172e0, ctx=0x7ef1f0) at ../src/mesa/state_tracker/st_glsl_to_nir.cpp:753
#17 st_link_shader (ctx=ctx@entry=0x7fffef0a9010, prog=prog@entry=0x4172e0) at ../src/mesa/state_tracker/st_glsl_to_nir.cpp:989
#18 0x00007ffff6b8a29b in link_program (no_error=<optimized out>, shProg=<optimized out>, ctx=<optimized out>) at ../src/mesa/main/shaderapi.c:1336
#19 link_program_error (ctx=0x7fffef0a9010, shProg=0x4172e0) at ../src/mesa/main/shaderapi.c:1445
#20 0x0000000000403e7c in main._omp_fn.0 () at run.c:846
#21 0x00007ffff7e0a286 in GOMP_parallel () from /lib64/libgomp.so.1
#22 0x000000000040298d in main (argc=<optimized out>, argv=<optimized out>) at run.c:672
This is how NIR looks just before:
nir_convert_from_ssa
shader: MESA_SHADER_VERTEX
source_sha1: {0x29b48658, 0xf5a27373, 0xd6272160, 0x1889e085, 0x00d29486}
name: GLSL1
label: shaders/gnome-shell-42/138.shader_test
internal: false
stage: 0
next_stage: 4
num_ubos: 1
inputs_read: 15-16
outputs_written: 0,32-38
subgroup_size: 1
bit_sizes_float: 0x20
bit_sizes_int: 0x21
first_ubo_is_default_ubo: true
flrp_lowered: true
inputs: 2
outputs: 8
uniforms: 14
decl_var shader_in INTERP_MODE_NONE none vec2 aPosition (VERT_ATTRIB_GENERIC0.xy, 0, 0)
decl_var shader_in INTERP_MODE_NONE none vec4 aColor (VERT_ATTRIB_GENERIC1.xyzw, 1, 0)
decl_var shader_out INTERP_MODE_NONE none vec4 gl_Position (VARYING_SLOT_POS.xyzw, 0, 0)
decl_var shader_out INTERP_MODE_NONE none vec4 final_color (VARYING_SLOT_VAR0.xyzw, 1, 0)
decl_var shader_out INTERP_MODE_NONE none vec4 transformed_outside_outline (VARYING_SLOT_VAR1.xyzw, 2, 0)
decl_var shader_out INTERP_MODE_NONE none vec4 transformed_outside_outline#0 (VARYING_SLOT_VAR2.xyzw, 3, 0)
decl_var shader_out INTERP_MODE_NONE none vec4 transformed_outside_outline#1 (VARYING_SLOT_VAR3.xyzw, 4, 0)
decl_var shader_out INTERP_MODE_NONE none vec4 transformed_inside_outline (VARYING_SLOT_VAR4.xyzw, 5, 0)
decl_var shader_out INTERP_MODE_NONE none vec4 transformed_inside_outline#2 (VARYING_SLOT_VAR5.xyzw, 6, 0)
decl_var shader_out INTERP_MODE_NONE none vec4 transformed_inside_outline#3 (VARYING_SLOT_VAR6.xyzw, 7, 0)
decl_var ubo INTERP_MODE_NONE none vec4[14] uniform_0 (0, 0, 0)
decl_function main (0 params)
impl main {
con block b0: // preds:
32 %0 = load_const (0x00000000)
32x4 %5 = @load_ubo_vec4 (%0 (0x0), %0 (0x0)) (access=speculatable, base=7, component=0)
32x2 %1 = @load_input (%0 (0x0)) (base=0, range=1, component=0, dest_type=float32, io location=VERT_ATTRIB_GENERIC0 slots=1) // aPosition
32x4 %3 = @load_ubo_vec4 (%0 (0x0), %0 (0x0)) (access=speculatable, base=4, component=0)
32x4 %6 = ffma %3, %1.xxxx, %5
32x4 %4 = @load_ubo_vec4 (%0 (0x0), %0 (0x0)) (access=speculatable, base=5, component=0)
32x4 %7 = ffma %4, %1.yyyy, %6
32x4 %9 = @load_ubo_vec4 (%0 (0x0), %0 (0x0)) (access=speculatable, base=1, component=0)
32x4 %10 = fmul %9, %7.yyyy
32x4 %8 = @load_ubo_vec4 (%0 (0x0), %0 (0x0)) (access=speculatable, base=0, component=0)
32x4 %11 = ffma %8, %7.xxxx, %10
32x4 %12 = @load_ubo_vec4 (%0 (0x0), %0 (0x0)) (access=speculatable, base=2, component=0)
32x4 %13 = ffma %12, %7.zzzz, %11
32x4 %14 = @load_ubo_vec4 (%0 (0x0), %0 (0x0)) (access=speculatable, base=3, component=0)
32x4 %15 = ffma %14, %7.wwww, %13
32x4 %2 = @load_input (%0 (0x0)) (base=1, range=1, component=0, dest_type=float32, io location=VERT_ATTRIB_GENERIC1 slots=1) // aColor
32x3 %16 = fmul %2.xyz, %2.www
32 %18 = @load_ubo_vec4 (%0 (0x0), %0 (0x0)) (access=speculatable, base=8, component=0)
32x4 %17 = vec4 %16.x, %16.y, %16.z, %2.w
32x4 %19 = fmul %17, %18.xxxx
32x4 %22 = @load_ubo_vec4 (%0 (0x0), %0 (0x0)) (access=speculatable, base=11, component=0)
32x2 %23 = fadd %22.xy, %22.zw
32x4 %24 = @load_ubo_vec4 (%0 (0x0), %0 (0x0)) (access=speculatable, base=12, component=0)
32x2 %25 = fadd %22.xy, %24.xy
32x2 %26 = vec2 %23.x, %22.y
32x2 %20 = load_const (0xbf800000, 0x3f800000) = (-1.000000, 1.000000)
32x2 %27 = ffma %24.zw, %20 (-1.000000, 1.000000), %26
32x4 %28 = @load_ubo_vec4 (%0 (0x0), %0 (0x0)) (access=speculatable, base=13, component=0)
32x2 %29 = fneg %28.xy
32x2 %30 = fabs %28.xy
32x2 %31 = fadd %23, %29
32x2 %32 = vec2 %22.x, %23.y
32x2 %21 = load_const (0x3f800000, 0xbf800000) = (1.000000, -1.000000)
32x2 %33 = ffma %28.zw, %21 (1.000000, -1.000000), %32
32 %34 = @load_ubo_vec4 (%0 (0x0), %0 (0x0)) (access=speculatable, base=9, component=0)
32x4 %36 = fneg %34.xxxx
32x4 %35 = vec4 %32.x, %26.y, %26.x, %32.y
32x4 %37 = ffma %36, %21 (1.000000, -1.000000).xxyy, %35
32x2 %40 = fabs %24.xy
32 %41 = fneg %40.y
32 %42 = seq %40.x, %41
32x4 %38 = vec4 %25.x, %25.y, %27.x, %27.y
32x4 %107 = fneg %38
32x4 %84 = mov %42.xxxx
32x4 %108 = ffma %107, %84, %38
32x4 %43 = vec4 %37.x, %37.y, %38.z, %38.w
32x4 %109 = ffma %43, %84, %108
32x2 %110 = seq %43.zw, %35.zy
32 %47 = fmul %110.x, %110.y
32x4 %87 = mov %47.xxxx
32x4 %88 = fneg %87
32 %86 = load_const (0x00000000 = 0.000000)
32x4 %89 = slt %88, %86 (0.000000).xxxx
32x4 %104 = fneg %109
32x4 %105 = ffma %104, %89, %109
32x4 %48 = vec4 %109.x, %109.y, %37.z, %43.y
32x4 %106 = ffma %48, %89, %105
32 %50 = fneg %30.y
32 %51 = seq %30.x, %50
32x4 %39 = vec4 %31.x, %31.y, %33.x, %33.y
32x4 %101 = fneg %39
32x4 %91 = mov %51.xxxx
32x4 %102 = ffma %101, %91, %39
32x4 %52 = vec4 %48.z, %37.w, %39.z, %39.w
32x4 %103 = ffma %52, %91, %102
32x2 %111 = seq %52.zw, %35.xw
32 %56 = fmul %111.x, %111.y
32x4 %94 = mov %56.xxxx
32x4 %95 = fneg %94
32 %93 = load_const (0x00000000 = 0.000000)
32x4 %96 = slt %95, %93 (0.000000).xxxx
32x4 %98 = fneg %103
32x4 %99 = ffma %98, %96, %103
32x4 %57 = vec4 %103.x, %103.y, %43.x, %52.y
32x4 %100 = ffma %57, %96, %99
32x2 %59 = @load_ubo_vec4 (%0 (0x0), %0 (0x0)) (access=speculatable, base=10, component=0)
32x4 %60 = fadd %37, %59.xyxy
32x4 %61 = fadd %106, %59.xyxy
32x4 %62 = fadd %100, %59.xyxy
32x4 %63 = ffma %3.xyxy, %60.xxzz, %5.xyxy
32x4 %64 = ffma %4.xyxy, %60.yyww, %63
32x4 %65 = ffma %3.xyxy, %61.xxzz, %5.xyxy
32x4 %66 = ffma %4.xyxy, %61.yyww, %65
32x4 %67 = ffma %3.xyxy, %62.xxzz, %5.xyxy
32x4 %68 = ffma %4.xyxy, %62.yyww, %67
32x4 %112 = ffma %3.xyxy, %35.xxzz, %5.xyxy
32x4 %113 = ffma %4.xyxy, %35.yyww, %112
32x2 %73 = ffma %3.xy, %38.xx, %5.xy
32x2 %74 = ffma %4.xy, %38.yy, %73
32x2 %75 = ffma %3.xy, %43.zz, %5.xy
32x2 %76 = ffma %4.xy, %43.ww, %75
32x2 %77 = ffma %3.xy, %39.xx, %5.xy
32x2 %78 = ffma %4.xy, %39.yy, %77
32x2 %79 = ffma %3.xy, %52.zz, %5.xy
32x2 %80 = ffma %4.xy, %52.ww, %79
@store_output (%15, %0 (0x0)) (base=0, range=1, wrmask=xyzw, component=0, src_type=float32, io location=VARYING_SLOT_POS slots=1, xfb(), xfb2()) // gl_Position
@store_output (%19, %0 (0x0)) (base=1, range=1, wrmask=xyzw, component=0, src_type=float32, io location=VARYING_SLOT_VAR0 slots=1, xfb(), xfb2()) // final_color
@store_output (%64, %0 (0x0)) (base=2, range=1, wrmask=xyzw, component=0, src_type=float32, io location=VARYING_SLOT_VAR1 slots=1, xfb(), xfb2()) // transformed_outside_outline
@store_output (%66, %0 (0x0)) (base=3, range=1, wrmask=xyzw, component=0, src_type=float32, io location=VARYING_SLOT_VAR2 slots=1, xfb(), xfb2()) // transformed_outside_outline
@store_output (%68, %0 (0x0)) (base=4, range=1, wrmask=xyzw, component=0, src_type=float32, io location=VARYING_SLOT_VAR3 slots=1, xfb(), xfb2()) // transformed_outside_outline
32x4 %81 = vec4 %113.x, %113.y, %113.z, %113.w
@store_output (%81, %0 (0x0)) (base=5, range=1, wrmask=xyzw, component=0, src_type=float32, io location=VARYING_SLOT_VAR4 slots=1, xfb(), xfb2()) // transformed_inside_outline
32x4 %82 = vec4 %74.x, %74.y, %76.x, %76.y
@store_output (%82, %0 (0x0)) (base=6, range=1, wrmask=xyzw, component=0, src_type=float32, io location=VARYING_SLOT_VAR5 slots=1, xfb(), xfb2()) // transformed_inside_outline
32x4 %83 = vec4 %78.x, %78.y, %80.x, %80.y
@store_output (%83, %0 (0x0)) (base=7, range=1, wrmask=xyzw, component=0, src_type=float32, io location=VARYING_SLOT_VAR6 slots=1, xfb(), xfb2()) // transformed_inside_outline
// succs: b1
block b1:
}
CC @gfxstrand @alyssa I probably need to fix something in the r300 backend, but I thought I would ask first if you have any ideas as this might be obvious to you what goes wrong, before I start digging.