nir: Mesa regression on Compute shader
Just found there is regression with compute shader when run the command, please take a look error spin out after the command, and issue caused by one of commit from commits below
$ gst-launch-1.0 -f filesrc location= ~/clips/1080p_H264.mp4 ! qtdemux !
h264parse ! omxh264dec ! glimagesink
Setting pipeline to PAUSED ...
NIR validation failed TTN: after parsing TGSI and creating the NIR shader
1 errors:
shader: MESA_SHADER_COMPUTE
local-size: 8, 8, 1
shared-size: 0
inputs: 0
outputs: 0
uniforms: 7
shared: 0
decl_var uniform INTERP_MODE_NONE vec4[7] uniform_0 (0, 0, 0)
decl_var uniform INTERP_MODE_NONE sampler2DRect sampler (0, 0, 0)
decl_var uniform INTERP_MODE_NONE sampler2DRect sampler@0 (0, 0, 1)
decl_var uniform INTERP_MODE_NONE sampler2DRect sampler@1 (0, 0, 2)
decl_var uniform INTERP_MODE_NONE none image2D image (0, 0, 0)
decl_function main (0 params)
impl main {
decl_reg vec4 32 r0
decl_reg vec4 32 r1
decl_reg vec4 32 r2
decl_reg vec4 32 r3
decl_reg vec4 32 r4
decl_reg vec4 32 r5
decl_reg vec4 32 r6
decl_reg vec4 32 r7
block block_0:
/* preds: */
vec1 32 ssa_107 = undefined
vec4 32 ssa_0 = load_const (0x00000008 /* 0.000000 */, 0x00000008
/* 0.000000 */, 0x00000001 /* 0.000000 */, 0x00000000 /* 0.000000 */)
vec4 32 ssa_1 = load_const (0x3f800000 /* 1.000000 */, 0x00000000
/* 0.000000 */, 0x00000000 /* 0.000000 */, 0x00000000 /* 0.000000 */)
vec3 32 ssa_2 = intrinsic load_work_group_id () ()
vec4 32 ssa_3 = mov ssa_2.xyzz
vec4 32 ssa_4 = mov ssa_3.xyyy
vec4 32 ssa_5 = mov ssa_0.xyyy
vec3 32 ssa_6 = intrinsic load_local_invocation_id () ()
vec4 32 ssa_7 = mov ssa_6.xyzz
vec4 32 ssa_8 = mov ssa_7.xyyy
vec4 32 ssa_9 = imul ssa_4, ssa_5
vec4 32 ssa_10 = iadd ssa_9, ssa_8
r0.xy = mov ssa_10.xy
vec4 32 ssa_11 = mov r0.xyxy
vec1 32 ssa_12 = load_const (0x00000000 /* 0.000000 */)
vec4 32 ssa_13 = intrinsic load_uniform (ssa_12) (4, 1, 2) /*
base=4 */ /* range=1 */ /* type=int */
vec4 32 ssa_14 = mov ssa_13.xyxy
vec4 1 ssa_15 = uge ssa_11, ssa_14
vec4 32 ssa_16 = b2i32 ssa_15
vec4 32 ssa_17 = ineg ssa_16
r1.xy = mov ssa_17.xy
vec4 32 ssa_18 = mov r0.xyxy
vec1 32 ssa_19 = load_const (0x00000000 /* 0.000000 */)
vec4 32 ssa_20 = intrinsic load_uniform (ssa_19) (4, 1, 2) /*
base=4 */ /* range=1 */ /* type=int */
vec4 32 ssa_21 = mov ssa_20.zwzw
vec4 1 ssa_22 = ult ssa_18, ssa_21
vec4 32 ssa_23 = b2i32 ssa_22
vec4 32 ssa_24 = ineg ssa_23
r1.zw = mov ssa_24.zw
vec4 32 ssa_25 = mov r1.xxxx
vec4 32 ssa_26 = mov r1.yyyy
vec4 32 ssa_27 = iand ssa_25, ssa_26
r1.x = mov ssa_27.x
vec4 32 ssa_28 = mov r1.xxxx
vec4 32 ssa_29 = mov r1.zzzz
vec4 32 ssa_30 = iand ssa_28, ssa_29
r1.x = mov ssa_30.x
vec4 32 ssa_31 = mov r1.xxxx
vec4 32 ssa_32 = mov r1.wwww
vec4 32 ssa_33 = iand ssa_31, ssa_32
r1.x = mov ssa_33.x
vec4 32 ssa_34 = mov r1.xxxx
vec1 32 ssa_35 = load_const (0x00000000 /* 0.000000 */)
vec4 1 ssa_36 = ine ssa_34, ssa_35.xxxx
/* succs: block_1 block_2 */
if ssa_36 {
block block_1:
/* preds: block_0 */
vec4 32 ssa_37 = mov r0.xyyy
vec1 32 ssa_38 = load_const (0x00000000 /* 0.000000 */)
vec4 32 ssa_39 = intrinsic load_uniform (ssa_38) (5, 1, 2) /*
base=5 */ /* range=1 */ /* type=int */
vec4 32 ssa_40 = mov ssa_39.xyxy
vec4 32 ssa_41 = ineg ssa_40
vec4 32 ssa_42 = iadd ssa_37, ssa_41
r2.xy = mov ssa_42.xy
vec4 32 ssa_43 = mov r2.xyyy
vec4 32 ssa_44 = u2f32 ssa_43
r2.xy = mov ssa_44.xy
vec4 32 ssa_45 = mov r2.xyyy
vec1 32 ssa_46 = load_const (0x00000000 /* 0.000000 */)
vec4 32 ssa_47 = intrinsic load_uniform (ssa_46) (6, 1, 128) /*
base=6 */ /* range=1 */ /* type=float */
vec4 32 ssa_48 = mov ssa_47.xyyy
vec4 32 ssa_49 = fmul ssa_45, ssa_48
r3.xy = mov ssa_49.xy
vec4 32 ssa_50 = mov r2.xyyy
vec1 32 ssa_51 = load_const (0x00000000 /* 0.000000 */)
vec4 32 ssa_52 = intrinsic load_uniform (ssa_51) (3, 1, 128) /*
base=3 */ /* range=1 */ /* type=float */
vec4 32 ssa_53 = mov ssa_52.zwww
vec4 32 ssa_54 = fdiv ssa_50, ssa_53
r2.xy = mov ssa_54.xy
vec4 32 ssa_55 = mov r3.xyyy
vec1 32 ssa_56 = load_const (0x00000000 /* 0.000000 */)
vec4 32 ssa_57 = intrinsic load_uniform (ssa_56) (3, 1, 128) /*
base=3 */ /* range=1 */ /* type=float */
vec4 32 ssa_58 = mov ssa_57.zwww
vec4 32 ssa_59 = fdiv ssa_55, ssa_58
r3.xy = mov ssa_59.xy
vec4 32 ssa_60 = mov r2.xyyy
vec1 32 ssa_61 = deref_var &sampler (uniform sampler2DRect)
vec2 32 ssa_62 = mov ssa_60.xy
vec1 32 ssa_63 = load_const (0x00000000 /* 0.000000 */)
vec4 32 ssa_64 = (float)txl ssa_61 (texture_deref), ssa_61
(sampler_deref), ssa_62 (coord), ssa_63 (lod)
r4.x = mov ssa_64.x
vec4 32 ssa_65 = mov r3.xyyy
vec1 32 ssa_66 = deref_var &sampler@0 (uniform sampler2DRect)
vec2 32 ssa_67 = mov ssa_65.xy
vec1 32 ssa_68 = load_const (0x00000000 /* 0.000000 */)
vec4 32 ssa_69 = (float)txl ssa_66 (texture_deref), ssa_66
(sampler_deref), ssa_67 (coord), ssa_68 (lod)
r4.y = mov ssa_69.y
vec4 32 ssa_70 = mov r3.xyyy
vec1 32 ssa_71 = deref_var &sampler@1 (uniform sampler2DRect)
vec2 32 ssa_72 = mov ssa_70.xy
vec1 32 ssa_73 = load_const (0x00000000 /* 0.000000 */)
vec4 32 ssa_74 = (float)txl ssa_71 (texture_deref), ssa_71
(sampler_deref), ssa_72 (coord), ssa_73 (lod)
r4.z = mov ssa_74.z
vec4 32 ssa_75 = mov ssa_1.xxxx
vec4 32 ssa_76 = mov ssa_75
r4.w = mov ssa_76.w
vec1 32 ssa_77 = load_const (0x00000000 /* 0.000000 */)
vec4 32 ssa_78 = intrinsic load_uniform (ssa_77) (0, 1, 128) /*
base=0 */ /* range=1 */ /* type=float */ /* uniform_0 */
vec4 32 ssa_79 = mov r4
vec1 32 ssa_80 = fdot4 ssa_78, ssa_79
r7.x = mov ssa_80
vec1 32 ssa_81 = load_const (0x00000000 /* 0.000000 */)
vec4 32 ssa_82 = intrinsic load_uniform (ssa_81) (1, 1, 128) /*
base=1 */ /* range=1 */ /* type=float */
vec4 32 ssa_83 = mov r4
vec1 32 ssa_84 = fdot4 ssa_82, ssa_83
r7.y = mov ssa_84.x
vec1 32 ssa_85 = load_const (0x00000000 /* 0.000000 */)
vec4 32 ssa_86 = intrinsic load_uniform (ssa_85) (2, 1, 128) /*
base=2 */ /* range=1 */ /* type=float */
vec4 32 ssa_87 = mov r4
vec1 32 ssa_88 = fdot4 ssa_86, ssa_87
r7.z = mov ssa_88.x
vec4 32 ssa_89 = mov r4.zzzz
vec4 32 ssa_90 = mov ssa_89
r5.w = mov ssa_90.w
vec4 32 ssa_91 = mov r5.wwww
vec1 32 ssa_92 = load_const (0x00000000 /* 0.000000 */)
vec4 32 ssa_93 = intrinsic load_uniform (ssa_92) (3, 1, 128) /*
base=3 */ /* range=1 */ /* type=float */
vec4 32 ssa_94 = mov ssa_93.xxxx
vec4 32 ssa_95 = sge ssa_94, ssa_91
r6.w = mov ssa_95.w
vec4 32 ssa_96 = mov r5.wwww
vec1 32 ssa_97 = load_const (0x00000000 /* 0.000000 */)
vec4 32 ssa_98 = intrinsic load_uniform (ssa_97) (3, 1, 128) /*
base=3 */ /* range=1 */ /* type=float */
vec4 32 ssa_99 = mov ssa_98.yyyy
vec4 32 ssa_100 = slt ssa_99, ssa_96
r5.w = mov ssa_100.w
vec4 32 ssa_101 = mov r5.wwww
vec4 32 ssa_102 = mov r6.wwww
vec4 32 ssa_103 = fmax ssa_101, ssa_102
r7.w = mov ssa_103.w
vec4 32 ssa_104 = mov r0.xyyy
vec4 32 ssa_105 = mov r7
vec1 32 ssa_106 = deref_var &image (uniform image2D)
vec1 32 ssa_108 = load_const (0x00000000 /* 0.000000 */)
intrinsic image_deref_store (ssa_106, ssa_104, ssa_107,
ssa_105, ssa_108) (0, 0) /* access=0 */ /* type=invalid */
/* succs: block_3 */
} else {
block block_2:
/* preds: block_0 */
/* succs: block_3 */
}
block block_3:
/* preds: block_1 block_2 */
/* succs: block_4 */
block block_4:
}
1 additional errors:
error: src->ssa->num_components == num_components
(../src/compiler/nir/nir_validate.c:204)
Aborted (core dumped)
One of commits below causing issue:
commit 7f0cd6f1 (HEAD -> master) Author: Connor Abbott cwabbott0@gmail.com Date: Tue Apr 16 19:39:11 2019 +0200
nir/opt_if: Use early returns in opt_if_merge()
We would've had to add yet another level of indentation, or duplicated
finding the if conditions in the next commit. Refactor this function to
use early returns like our other optimizations, so that this isn't an
issue.
Reviewed-by: Jason Ekstrand <jason@jlekstrand.net>
Reviewed-by: Matt Turner <mattst88@gmail.com>
Part-of:
commit 656e428f Author: Connor Abbott cwabbott0@gmail.com Date: Tue Apr 16 19:31:45 2019 +0200
nir/opt_if: Remove open-coded nir_ssa_def_rewrite_uses()
So that we don't have to change these two places later.
Reviewed-by: Jason Ekstrand <jason@jlekstrand.net>
Reviewed-by: Matt Turner <mattst88@gmail.com>
Part-of:
commit c6f871b6 Author: Connor Abbott cwabbott0@gmail.com Date: Wed Apr 10 11:34:57 2019 +0200
nir/lower_returns: Use nir control flow insertion helpers
Reviewed-by: Jason Ekstrand <jason@jlekstrand.net>
Reviewed-by: Matt Turner <mattst88@gmail.com>
Part-of:
commit f103bded Author: Connor Abbott cwabbott0@gmail.com Date: Tue Apr 9 22:31:50 2019 +0200
ttn: Use nir control flow insertion helpers
As a side effect, we can delete the whole control flow stack thing.
v2 (Jason Ekstrand):
- Drop the ttn_if helper and just inline it in the two uses
Reviewed-by: Jason Ekstrand <jason@jlekstrand.net>
Reviewed-by: Eric Anholt <eric@anholt.net>
Reviewed-by: Matt Turner <mattst88@gmail.com>
Part-of:
commit f504eb68 Author: Connor Abbott cwabbott0@gmail.com Date: Tue Apr 9 22:31:06 2019 +0200
radv: Use nir control flow insertion helpers
v2 (Jason Ekstrand):
- Rebased and tweaked a few cases
- Use the helpers in build_timestamp_query_shader
Reviewed-by: Matt Turner <mattst88@gmail.com>
Part-of:
commit b2ede628 Author: Connor Abbott cwabbott0@gmail.com Date: Tue Apr 9 22:16:26 2019 +0200
intel/nir: Use nir control flow helpers
Reviewed-by: Jason Ekstrand <jason@jlekstrand.net>
Reviewed-by: Matt Turner <mattst88@gmail.com>
Part-of: