r600: bisected 5eb0136a breaks a number of piglits
With 5eb0136a (mesa/st: when creating draw shader variants, use the base nir and skip driver opts) number of piglits regress on r600:
spec@!opengl 1.0@gl-1.0-rendermode-feedback
spec@!opengl 1.1@gl_select - alpha-test enabled
spec@!opengl 1.1@gl_select - depth-test enabled
spec@!opengl 1.1@gl_select - no test function
spec@!opengl 1.1@gl_select - scissor-test enabled
spec@!opengl 1.1@gl_select - stencil-test enabled
The issue seems to be with using the base nir, i.e. applying
diff --git a/src/mesa/state_tracker/st_program.c b/src/mesa/state_tracker/st_program.c
index 99fba22d69e..d774da057f6 100644
--- a/src/mesa/state_tracker/st_program.c
+++ b/src/mesa/state_tracker/st_program.c
@@ -646,14 +646,12 @@ get_nir_shader(struct st_context *st, struct gl_program *prog, bool is_draw)
const struct nir_shader_compiler_options *options =
is_draw ? &draw_nir_options : st_get_nir_compiler_options(st, prog->info.stage);
- if (is_draw) {
- assert(prog->base_serialized_nir);
- blob_reader_init(&blob_reader, prog->base_serialized_nir, prog->base_serialized_nir_size);
- } else {
- assert(prog->serialized_nir);
- blob_reader_init(&blob_reader, prog->serialized_nir, prog->serialized_nir_size);
- }
- return nir_deserialize(NULL, options, &blob_reader);
+ assert(prog->serialized_nir);
+ blob_reader_init(&blob_reader, prog->serialized_nir, prog->serialized_nir_size);
fixes the issue.
The difference in the draw shaders at the entry point of draw_create_vs_llvm
looks like this:
decl_var shader_out INTERP_MODE_SMOOTH none vec4 VARYING_SLOT_POS (VARYING_SLOT_POS.xyzw, 0, 0)
decl_var shader_out INTERP_MODE_SMOOTH none vec4 VARYING_SLOT_COL0 (VARYING_SLOT_COL0.xyzw, 1, 0)
decl_var shader_out INTERP_MODE_SMOOTH none vec4 VARYING_SLOT_TEX0 (VARYING_SLOT_TEX0.xyzw, 2, 0)
-decl_var ubo INTERP_MODE_NONE none vec4[4] uniform_0 (0, 0, 0)
+decl_var ubo INTERP_MODE_NONE none vec4[8] uniform_0 (0, 0, 0)
decl_function main (0 params)
impl main {
con block b0: // preds:
- 32 %0 = load_const (0x00000000)
- 32x4 %1 = @load_ubo (%0 (0x0), %0 (0x0)) (access=none, align_mul=1073741824, align_offset=0, range_base=0, range=16)
- 32 %2 = load_const (0x00000010 = 16)
- 32x4 %3 = @load_ubo (%0 (0x0), %2 (0x10)) (access=none, align_mul=1073741824, align_offset=16, range_base=16, range=16)
- 32 %4 = load_const (0x00000020 = 32)
- 32x4 %5 = @load_ubo (%0 (0x0), %4 (0x20)) (access=none, align_mul=1073741824, align_offset=32, range_base=32, range=16)
- 32 %6 = load_const (0x00000030 = 48)
- 32x4 %7 = @load_ubo (%0 (0x0), %6 (0x30)) (access=none, align_mul=1073741824, align_offset=48, range_base=48, range=16)
+ 32 %0 = deref_var &state.matrix.mvp.transpose.row[0] (uniform vec4)
+ 32 %46 = load_const (0x00000000)
+ 32 %54 = load_const (0x00000000 = 0.000000)
+ 32 %55 = load_const (0x00000004)
+ 32 %56 = ishl %46 (0x0), %55 (0x4)
+ 32 %57 = load_const (0x00000010 = 16)
+ 32 %58 = iadd %56, %57 (0x10)
+ 32x4 %59 = @load_ubo (%54 (0x0), %58) (access=none, align_mul=1073741824, align_offset=16, range_base=16, range=16)
+ 32 %2 = deref_var &state.matrix.mvp.transpose.row[1] (uniform vec4)
+ 32 %48 = load_const (0x00000000)
+ 32 %60 = load_const (0x00000000 = 0.000000)
+ 32 %61 = load_const (0x00000004)
+ 32 %62 = ishl %48 (0x0), %61 (0x4)
+ 32 %63 = load_const (0x00000020 = 32)
+ 32 %64 = iadd %62, %63 (0x20)
+ 32x4 %65 = @load_ubo (%60 (0x0), %64) (access=none, align_mul=1073741824, align_offset=32, range_base=32, range=16)
+ 32 %4 = deref_var &state.matrix.mvp.transpose.row[2] (uniform vec4)
+ 32 %50 = load_const (0x00000000)
+ 32 %66 = load_const (0x00000000 = 0.000000)
+ 32 %67 = load_const (0x00000004)
+ 32 %68 = ishl %50 (0x0), %67 (0x4)
+ 32 %69 = load_const (0x00000030 = 48)
+ 32 %70 = iadd %68, %69 (0x30)
+ 32x4 %71 = @load_ubo (%66 (0x0), %70) (access=none, align_mul=1073741824, align_offset=48, range_base=48, range=16)
+ 32 %6 = deref_var &state.matrix.mvp.transpose.row[3] (uniform vec4)
+ 32 %52 = load_const (0x00000000)
+ 32 %72 = load_const (0x00000000 = 0.000000)
+ 32 %73 = load_const (0x00000004)
+ 32 %74 = ishl %52 (0x0), %73 (0x4)
+ 32 %75 = load_const (0x00000040 = 64)
+ 32 %76 = iadd %74, %75 (0x40)
+ 32x4 %77 = @load_ubo (%72 (0x0), %76) (access=none, align_mul=1073741824, align_offset=64, range_base=64, range=16)
32 %8 = deref_var &VERT_ATTRIB_POS (shader_in vec4)
32x4 %9 = @load_deref (%8) (access=none)
- 32 %10 = fmul %1.x, %9.x
- 32 %11 = fmul %1.y, %9.x
- 32 %12 = fmul %1.z, %9.x
- 32 %13 = fmul %1.w, %9.x
- 32 %14 = fmul %3.x, %9.y
- 32 %15 = fmul %3.y, %9.y
- 32 %16 = fmul %3.z, %9.y
- 32 %17 = fmul %3.w, %9.y
+ 32 %10 = fmul %59.x, %9.x
+ 32 %11 = fmul %59.y, %9.x
+ 32 %12 = fmul %59.z, %9.x
+ 32 %13 = fmul %59.w, %9.x
+ 32 %14 = fmul %65.x, %9.y
+ 32 %15 = fmul %65.y, %9.y
+ 32 %16 = fmul %65.z, %9.y
+ 32 %17 = fmul %65.w, %9.y
32 %18 = fadd %14, %10
32 %19 = fadd %15, %11
32 %20 = fadd %16, %12
32 %21 = fadd %17, %13
- 32 %22 = fmul %5.x, %9.z
- 32 %23 = fmul %5.y, %9.z
- 32 %24 = fmul %5.z, %9.z
- 32 %25 = fmul %5.w, %9.z
+ 32 %22 = fmul %71.x, %9.z
+ 32 %23 = fmul %71.y, %9.z
+ 32 %24 = fmul %71.z, %9.z
+ 32 %25 = fmul %71.w, %9.z
32 %26 = fadd %22, %18
32 %27 = fadd %23, %19
32 %28 = fadd %24, %20
32 %29 = fadd %25, %21
- 32 %30 = fmul %7.x, %9.w
- 32 %31 = fmul %7.y, %9.w
- 32 %32 = fmul %7.z, %9.w
- 32 %33 = fmul %7.w, %9.w
+ 32 %30 = fmul %77.x, %9.w
+ 32 %31 = fmul %77.y, %9.w
+ 32 %32 = fmul %77.z, %9.w
+ 32 %33 = fmul %77.w, %9.w
32 %34 = fadd %30, %26
32 %35 = fadd %31, %27
32 %36 = fadd %32, %28