r600: bisected 5eb0136a breaks a number of piglits

With 5eb0136a (mesa/st: when creating draw shader variants, use the base nir and skip driver opts) number of piglits regress on r600:

 spec@!opengl 1.0@gl-1.0-rendermode-feedback
 spec@!opengl 1.1@gl_select - alpha-test enabled
 spec@!opengl 1.1@gl_select - depth-test enabled
 spec@!opengl 1.1@gl_select - no test function
 spec@!opengl 1.1@gl_select - scissor-test enabled
 spec@!opengl 1.1@gl_select - stencil-test enabled

The issue seems to be with using the base nir, i.e. applying

diff --git a/src/mesa/state_tracker/st_program.c b/src/mesa/state_tracker/st_program.c
index 99fba22d69e..d774da057f6 100644
--- a/src/mesa/state_tracker/st_program.c
+++ b/src/mesa/state_tracker/st_program.c
@@ -646,14 +646,12 @@ get_nir_shader(struct st_context *st, struct gl_program *prog, bool is_draw)
    const struct nir_shader_compiler_options *options =
       is_draw ? &draw_nir_options : st_get_nir_compiler_options(st, prog->info.stage);
 
-   if (is_draw) {
-      assert(prog->base_serialized_nir);
-      blob_reader_init(&blob_reader, prog->base_serialized_nir, prog->base_serialized_nir_size);
-   } else {
-      assert(prog->serialized_nir);
-      blob_reader_init(&blob_reader, prog->serialized_nir, prog->serialized_nir_size);
-   }
-   return nir_deserialize(NULL, options, &blob_reader);
+   assert(prog->serialized_nir);
+   blob_reader_init(&blob_reader, prog->serialized_nir, prog->serialized_nir_size);

fixes the issue.

The difference in the draw shaders at the entry point of draw_create_vs_llvm looks like this:

 decl_var shader_out INTERP_MODE_SMOOTH none vec4 VARYING_SLOT_POS (VARYING_SLOT_POS.xyzw, 0, 0)
 decl_var shader_out INTERP_MODE_SMOOTH none vec4 VARYING_SLOT_COL0 (VARYING_SLOT_COL0.xyzw, 1, 0)
 decl_var shader_out INTERP_MODE_SMOOTH none vec4 VARYING_SLOT_TEX0 (VARYING_SLOT_TEX0.xyzw, 2, 0)
-decl_var ubo INTERP_MODE_NONE none vec4[4] uniform_0 (0, 0, 0)
+decl_var ubo INTERP_MODE_NONE none vec4[8] uniform_0 (0, 0, 0)
 decl_function main (0 params)
 
 impl main {
     con block b0:   // preds: 
-    32     %0 = load_const (0x00000000)
-    32x4   %1 = @load_ubo (%0 (0x0), %0 (0x0)) (access=none, align_mul=1073741824, align_offset=0, range_base=0, range=16)
-    32     %2 = load_const (0x00000010 = 16)
-    32x4   %3 = @load_ubo (%0 (0x0), %2 (0x10)) (access=none, align_mul=1073741824, align_offset=16, range_base=16, range=16)
-    32     %4 = load_const (0x00000020 = 32)
-    32x4   %5 = @load_ubo (%0 (0x0), %4 (0x20)) (access=none, align_mul=1073741824, align_offset=32, range_base=32, range=16)
-    32     %6 = load_const (0x00000030 = 48)
-    32x4   %7 = @load_ubo (%0 (0x0), %6 (0x30)) (access=none, align_mul=1073741824, align_offset=48, range_base=48, range=16)
+    32     %0 = deref_var &state.matrix.mvp.transpose.row[0] (uniform vec4)
+    32    %46 = load_const (0x00000000)
+    32    %54 = load_const (0x00000000 = 0.000000)
+    32    %55 = load_const (0x00000004)
+    32    %56 = ishl %46 (0x0), %55 (0x4)
+    32    %57 = load_const (0x00000010 = 16)
+    32    %58 = iadd %56, %57 (0x10)
+    32x4  %59 = @load_ubo (%54 (0x0), %58) (access=none, align_mul=1073741824, align_offset=16, range_base=16, range=16)
+    32     %2 = deref_var &state.matrix.mvp.transpose.row[1] (uniform vec4)
+    32    %48 = load_const (0x00000000)
+    32    %60 = load_const (0x00000000 = 0.000000)
+    32    %61 = load_const (0x00000004)
+    32    %62 = ishl %48 (0x0), %61 (0x4)
+    32    %63 = load_const (0x00000020 = 32)
+    32    %64 = iadd %62, %63 (0x20)
+    32x4  %65 = @load_ubo (%60 (0x0), %64) (access=none, align_mul=1073741824, align_offset=32, range_base=32, range=16)
+    32     %4 = deref_var &state.matrix.mvp.transpose.row[2] (uniform vec4)
+    32    %50 = load_const (0x00000000)
+    32    %66 = load_const (0x00000000 = 0.000000)
+    32    %67 = load_const (0x00000004)
+    32    %68 = ishl %50 (0x0), %67 (0x4)
+    32    %69 = load_const (0x00000030 = 48)
+    32    %70 = iadd %68, %69 (0x30)
+    32x4  %71 = @load_ubo (%66 (0x0), %70) (access=none, align_mul=1073741824, align_offset=48, range_base=48, range=16)
+    32     %6 = deref_var &state.matrix.mvp.transpose.row[3] (uniform vec4)
+    32    %52 = load_const (0x00000000)
+    32    %72 = load_const (0x00000000 = 0.000000)
+    32    %73 = load_const (0x00000004)
+    32    %74 = ishl %52 (0x0), %73 (0x4)
+    32    %75 = load_const (0x00000040 = 64)
+    32    %76 = iadd %74, %75 (0x40)
+    32x4  %77 = @load_ubo (%72 (0x0), %76) (access=none, align_mul=1073741824, align_offset=64, range_base=64, range=16)
     32     %8 = deref_var &VERT_ATTRIB_POS (shader_in vec4)
     32x4   %9 = @load_deref (%8) (access=none)
-    32    %10 = fmul %1.x, %9.x
-    32    %11 = fmul %1.y, %9.x
-    32    %12 = fmul %1.z, %9.x
-    32    %13 = fmul %1.w, %9.x
-    32    %14 = fmul %3.x, %9.y
-    32    %15 = fmul %3.y, %9.y
-    32    %16 = fmul %3.z, %9.y
-    32    %17 = fmul %3.w, %9.y
+    32    %10 = fmul %59.x, %9.x
+    32    %11 = fmul %59.y, %9.x
+    32    %12 = fmul %59.z, %9.x
+    32    %13 = fmul %59.w, %9.x
+    32    %14 = fmul %65.x, %9.y
+    32    %15 = fmul %65.y, %9.y
+    32    %16 = fmul %65.z, %9.y
+    32    %17 = fmul %65.w, %9.y
     32    %18 = fadd %14, %10
     32    %19 = fadd %15, %11
     32    %20 = fadd %16, %12
     32    %21 = fadd %17, %13
-    32    %22 = fmul %5.x, %9.z
-    32    %23 = fmul %5.y, %9.z
-    32    %24 = fmul %5.z, %9.z
-    32    %25 = fmul %5.w, %9.z
+    32    %22 = fmul %71.x, %9.z
+    32    %23 = fmul %71.y, %9.z
+    32    %24 = fmul %71.z, %9.z
+    32    %25 = fmul %71.w, %9.z
     32    %26 = fadd %22, %18
     32    %27 = fadd %23, %19
     32    %28 = fadd %24, %20
     32    %29 = fadd %25, %21
-    32    %30 = fmul %7.x, %9.w
-    32    %31 = fmul %7.y, %9.w
-    32    %32 = fmul %7.z, %9.w
-    32    %33 = fmul %7.w, %9.w
+    32    %30 = fmul %77.x, %9.w
+    32    %31 = fmul %77.y, %9.w
+    32    %32 = fmul %77.z, %9.w
+    32    %33 = fmul %77.w, %9.w
     32    %34 = fadd %30, %26
     32    %35 = fadd %31, %27
     32    %36 = fadd %32, %28

To upload designs, you'll need to enable LFS and have an admin enable hashed storage. More information