Mesa 20.0.7 / 20.1.0-rc4 regression, extremally long shader compilation time in NIR
Linux, amd64, Debian testing.
AMD Radeon R9 Fury X.
Mesa 20.0.7 from Debian. LLVM 10.0.0-4. Mesa 20.1.0-rc4 from Debian. LLVM 10.0.0-4. (Same issue)
I was testing various shaders on shadertoy.com, and I found one that does trigger infinite loop or extremally costly operation in Mesa.
Tried in Firefox Nightly 78.0a1 (2020-05-22) (64-bit), Firefox 68.8.0esr (64-bit), Chromium Version 81.0.4044.92 to same effect. In Firefox the tab becomes stuck forever. In Chromium after about 30 seconds, the execution resumes with CONTEXT_LOST_WEBGL exception.
https://www.shadertoy.com/view/wdjBWD
GLSL original shader code
#define PI 3.14159265359
#define Interpolation 1
#define CosineCurve 0
#define PerlinSmoothCurve 1
vec2 dirs[8] = vec2[]
(
vec2(1, 0),
vec2(0, 1),
vec2(1, 0),
vec2(0, 1),
vec2(0.70711, 0.70711),
vec2(0.70711, 0.70711),
vec2(0.70711, 0.70711),
vec2(0.70711, 0.70711)
);
const int tableSize = 512;
const int permutationMask = 512/2 - 1;
const int permutationTable[512] = int[]
(
180, 3, 121, 17, 22, 7, 69, 202, 72, 172,
56, 94, 92, 254, 122, 139, 118, 127, 76, 2,
46, 74, 169, 147, 228, 196, 47, 110, 138, 217,
155, 39, 91, 61, 45, 238, 242, 229, 251, 207,
192, 198, 175, 222, 176, 75, 234, 10, 25, 63,
250, 135, 159, 183, 253, 96, 68, 153, 87, 50,
226, 241, 114, 188, 13, 112, 21, 66, 249, 144,
126, 1, 233, 124, 148, 43, 199, 156, 208, 223,
164, 213, 18, 219, 59, 209, 108, 134, 27, 100,
197, 90, 53, 140, 168, 29, 165, 19, 65, 161,
141, 52, 195, 128, 151, 37, 117, 150, 36, 105,
49, 6, 187, 116, 235, 216, 201, 119, 182, 204,
221, 136, 0, 113, 24, 111, 158, 131, 212, 58,
41, 85, 102, 166, 77, 157, 64, 103, 184, 83,
237, 220, 23, 149, 84, 171, 99, 230, 152, 178,
190, 215, 38, 34, 163, 93, 14, 244, 79, 31,
9, 142, 240, 132, 145, 231, 95, 236, 8, 167,
82, 80, 133, 137, 252, 211, 57, 248, 89, 20,
30, 154, 32, 174, 205, 42, 98, 11, 15, 243,
78, 203, 146, 185, 107, 51, 224, 54, 70, 106,
194, 35, 177, 160, 255, 16, 104, 189, 12, 67,
125, 33, 123, 55, 120, 186, 40, 143, 181, 239,
193, 214, 115, 81, 4, 44, 200, 101, 206, 71,
73, 28, 86, 130, 48, 225, 210, 227, 245, 109,
232, 162, 129, 179, 218, 88, 247, 173, 191, 97,
62, 60, 246, 5, 26, 170,
180, 3, 121, 17, 22, 7, 69, 202, 72, 172,
56, 94, 92, 254, 122, 139, 118, 127, 76, 2,
46, 74, 169, 147, 228, 196, 47, 110, 138, 217,
155, 39, 91, 61, 45, 238, 242, 229, 251, 207,
192, 198, 175, 222, 176, 75, 234, 10, 25, 63,
250, 135, 159, 183, 253, 96, 68, 153, 87, 50,
226, 241, 114, 188, 13, 112, 21, 66, 249, 144,
126, 1, 233, 124, 148, 43, 199, 156, 208, 223,
164, 213, 18, 219, 59, 209, 108, 134, 27, 100,
197, 90, 53, 140, 168, 29, 165, 19, 65, 161,
141, 52, 195, 128, 151, 37, 117, 150, 36, 105,
49, 6, 187, 116, 235, 216, 201, 119, 182, 204,
221, 136, 0, 113, 24, 111, 158, 131, 212, 58,
41, 85, 102, 166, 77, 157, 64, 103, 184, 83,
237, 220, 23, 149, 84, 171, 99, 230, 152, 178,
190, 215, 38, 34, 163, 93, 14, 244, 79, 31,
9, 142, 240, 132, 145, 231, 95, 236, 8, 167,
82, 80, 133, 137, 252, 211, 57, 248, 89, 20,
30, 154, 32, 174, 205, 42, 98, 11, 15, 243,
78, 203, 146, 185, 107, 51, 224, 54, 70, 106,
194, 35, 177, 160, 255, 16, 104, 189, 12, 67,
125, 33, 123, 55, 120, 186, 40, 143, 181, 239,
193, 214, 115, 81, 4, 44, 200, 101, 206, 71,
73, 28, 86, 130, 48, 225, 210, 227, 245, 109,
232, 162, 129, 179, 218, 88, 247, 173, 191, 97,
62, 60, 246, 5, 26, 170
);
int Hash(vec2 x)
{
return permutationTable[permutationTable[int(x.x) & permutationMask] + int(x.y) & permutationMask];
}
float SmoothCurve(float t)
{
#if CosineCurve
return (1.0 - cos(t * PI)) * 0.5;
#elif PerlinSmoothCurve
return t * t * (3.0 - 2.0 * t);
#else
return t;
#endif
}
float GetIntegerNoise(vec2 p) // replace this by something better, p is essentially ivec2
{
p = 53.7 * fract( (p*0.3183099) + vec2(0.71,0.113));
return fract( p.x*p.y*(p.x+p.y) );
}
vec2 GetGradient(vec2 x)
{
return dirs[Hash(x) & 7];
}
//https://thebookofshaders.com/12/
vec2 Random2D( vec2 p ) {
return fract(sin(vec2(dot(p,vec2(127.1,311.7)),dot(p,vec2(269.5,183.3))))*43758.5453);
}
vec2 Rotate2D(vec2 v, float theta)
{
float c = cos(theta);
float s = sin(theta);
mat2 rotMat = mat2(c,s,-s,c);
return rotMat * v;
}
float GetWorleyNoise(vec2 uv)
{
float noise = 0.0;
vec2 p = floor(uv);
vec2 f = fract(uv);
float minDist = 99.0;
for(int x = -1; x <=1; ++x)
{
for(int y = -1; y <=1; ++y)
{
vec2 point = p + vec2(x, y);
vec2 g;
vec2 v;
v = Random2D(point);
v = (v + 1.0) * 0.5;
v = Rotate2D(v * 0.25, GetIntegerNoise(v) * iTime * PI);
//v = vec2(cos(v.x * iTime *PI), sin(v.x * iTime * PI)) * 0.25;
g = GetGradient(point);
//v = GetIntegerNoise(point) * g;
float d = distance(point + v, uv);
minDist = min(minDist, d);
}
}
return minDist;
}
void mainImage( out vec4 fragColor, in vec2 fragCoord )
{
vec2 uv = (2.0 * fragCoord - iResolution.xy) / iResolution.y;
uv *= 7.0;
//uv *= 0.5*(sin(iTime * 2.0) + 5.0) * 2.40;
float noise = GetWorleyNoise(uv);
//isolines
//noise = step(0.5, abs(sin(40.0 * noise)));
noise = 0.5 * noise + 0.5;
noise = pow(noise, 2.2);
vec3 color = noise * vec3(0.25, 1.0, 0.5);
fragColor = vec4(color, 1.0);
}
Shader capture: 19.shader_test
Reduced test case: 19_minified.shader_test
Firefox spins 100% and this is main place it is sitting in the driver:
0x00007f287d10fb55 in nir_deref_path_init (path=path@entry=0x7fff0ff37f40, deref=0x7f285762af10, mem_ctx=mem_ctx@entry=0x0) at ../../src/compiler/nir/nir_deref.c:60
60 if (d->deref_type == nir_deref_type_cast && is_trivial_deref_cast(d))
(gdb) bt
#0 0x00007f287d10fb55 in nir_deref_path_init (path=path@entry=0x7fff0ff37f40, deref=0x7f285762af10, mem_ctx=mem_ctx@entry=0x0) at ../../src/compiler/nir/nir_deref.c:60
#1 0x00007f287d111d9e in nir_compare_derefs (a=<optimized out>, b=b@entry=0x7f285579e2b0) at ../../src/compiler/nir/nir_deref.c:602
#2 0x00007f287d177458 in lookup_entry_and_kill_aliases (copies=copies@entry=0x7fff0ff38460, deref=deref@entry=0x7f285579e2b0, write_mask=<optimized out>)
at ../../src/compiler/nir/nir_opt_copy_prop_vars.c:343
#3 0x00007f287d1799a8 in get_entry_and_kill_aliases (write_mask=1, deref=0x7f285579e2b0, copies=0x7fff0ff38460) at ../../src/compiler/nir/nir_opt_copy_prop_vars.c:976
#4 copy_prop_vars_block (block=<optimized out>, block=<optimized out>, copies=0x7fff0ff38460, b=0x7fff0ff38060, state=0x7fff0ff38540) at ../../src/compiler/nir/nir_opt_copy_prop_vars.c:977
#5 copy_prop_vars_cf_node (state=0x7fff0ff38540, copies=0x7fff0ff38460, cf_node=<optimized out>) at ../../src/compiler/nir/nir_opt_copy_prop_vars.c:1107
#6 0x00007f287d1790d8 in copy_prop_vars_cf_node (state=0x7fff0ff38540, copies=0x0, cf_node=<optimized out>) at ../../src/compiler/nir/nir_opt_copy_prop_vars.c:1098
#7 0x00007f287d17a865 in nir_copy_prop_vars_impl (impl=0x7f2861b80190) at ../../src/compiler/nir/nir_opt_copy_prop_vars.c:1183
#8 nir_opt_copy_prop_vars (shader=shader@entry=0x7f2861a63310) at ../../src/compiler/nir/nir_opt_copy_prop_vars.c:1206
#9 0x00007f287cd23526 in st_nir_opts(nir_shader*) (nir=nir@entry=0x7f2861a63310) at ../../src/mesa/state_tracker/st_glsl_to_nir.cpp:265
#10 0x00007f287cd27c48 in st_nir_link_shaders (consumer=0x7f2861a63310, producer=0x7f2861a63130) at ../../src/mesa/state_tracker/st_glsl_to_nir.cpp:559
#11 st_link_nir(gl_context*, gl_shader_program*) (ctx=0x7f2862cb0000, shader_program=0x7f2861a581e0) at ../../src/mesa/state_tracker/st_glsl_to_nir.cpp:722
#12 0x00007f287cd70c66 in _mesa_glsl_link_shader(gl_context*, gl_shader_program*) (ctx=ctx@entry=0x7f2862cb0000, prog=prog@entry=0x7f2861a581e0)
at ../../src/mesa/program/ir_to_mesa.cpp:3113
#13 0x00007f287ce713e0 in link_program (no_error=<optimized out>, shProg=<optimized out>, ctx=<optimized out>) at ../../src/mesa/main/shaderapi.c:1311
#14 link_program_error (ctx=0x7f2862cb0000, shProg=0x7f2861a581e0) at ../../src/mesa/main/shaderapi.c:1419
#15 0x00007f2888d6e4e6 in mozilla::gl::GLContext::fLinkProgram(unsigned int) () at /home/user/Downloads/firefox/libxul.so
#16 0x00007f2889ac0817 in mozilla::WebGLProgram::LinkAndUpdate() () at /home/user/Downloads/firefox/libxul.so
#17 0x00007f2889ac0324 in mozilla::WebGLProgram::LinkProgram() () at /home/user/Downloads/firefox/libxul.so
#18 0x00007f2889aa94f3 in mozilla::WebGLContext::LinkProgram(mozilla::WebGLProgram&) () at /home/user/Downloads/firefox/libxul.so
#19 0x00007f288cc659f7 in void mozilla::RunOn<void (mozilla::HostWebGLContext::*)(unsigned long) const, &(mozilla::HostWebGLContext::LinkProgram(unsigned long) const), void, unsigned long const&>(mozilla::ClientWebGLContext const&, unsigned long const&) () at /home/user/Downloads/firefox/libxul.so
#20 0x00007f2889a65fc4 in mozilla::ClientWebGLContext::LinkProgram(mozilla::WebGLProgramJS&) const () at /home/user/Downloads/firefox/libxul.so
#21 0x00007f28896b51b7 in mozilla::dom::WebGL2RenderingContext_Binding::linkProgram(JSContext*, JS::Handle<JSObject*>, void*, JSJitMethodCallArgs const&) ()
at /home/user/Downloads/firefox/libxul.so
#22 0x00007f288be3b70f in bool mozilla::dom::binding_detail::GenericMethod<mozilla::dom::binding_detail::NormalThisPolicy, mozilla::dom::binding_detail::ThrowExceptions>(JSContext*, unsigned int, JS::Value*) () at /home/user/Downloads/firefox/libxul.so
#23 0x00007f288c07448b in js::InternalCallOrConstruct(JSContext*, JS::CallArgs const&, js::MaybeConstruct, js::CallReason) () at /home/user/Downloads/firefox/libxul.so
#24 0x00007f288c04ce50 in Interpret(JSContext*, js::RunState&) () at /home/user/Downloads/firefox/libxul.so
#25 0x00007f288c073f4e in js::InternalCallOrConstruct(JSContext*, JS::CallArgs const&, js::MaybeConstruct, js::CallReason) () at /home/user/Downloads/firefox/libxul.so
#26 0x00007f288c2919df in js::jit::DoCallFallback(JSContext*, js::jit::BaselineFrame*, js::jit::ICCall_Fallback*, unsigned int, JS::Value*, JS::MutableHandle<JS::Value>) ()
at /home/user/Downloads/firefox/libxul.so
...
Here are some variation of sampling with gdb
:
Another one
#0 0x00007f287d111db6 in nir_compare_derefs (a=<optimized out>, b=b@entry=0x7f28556652b0) at ../../src/compiler/nir/nir_deref.c:604
#1 0x00007f287d17986f in lookup_entry_for_deref (allowed_comparisons=nir_derefs_equal_bit, deref=0x7f28556652b0, copies=0x7fff0ff38460)
at ../../src/compiler/nir/nir_opt_copy_prop_vars.c:315
#2 copy_prop_vars_block (block=<optimized out>, block=<optimized out>, copies=0x7fff0ff38460, b=0x7fff0ff38060, state=0x7fff0ff38540) at ../../src/compiler/nir/nir_opt_copy_prop_vars.c:964
#3 copy_prop_vars_cf_node (state=0x7fff0ff38540, copies=0x7fff0ff38460, cf_node=<optimized out>) at ../../src/compiler/nir/nir_opt_copy_prop_vars.c:1107
#4 0x00007f287d1790d8 in copy_prop_vars_cf_node (state=0x7fff0ff38540, copies=0x0, cf_node=<optimized out>) at ../../src/compiler/nir/nir_opt_copy_prop_vars.c:1098
...
and
Another one
#0 0x00007f287d10fb87 in nir_src_as_deref (src=...) at ../../src/compiler/nir/nir.h:3574
#1 nir_deref_instr_parent (instr=<optimized out>) at ../../src/compiler/nir/nir.h:1423
#2 nir_deref_path_init (path=path@entry=0x7fff0ff37f70, deref=0x7f285819b070, mem_ctx=mem_ctx@entry=0x0) at ../../src/compiler/nir/nir_deref.c:59
#3 0x00007f287d111d9e in nir_compare_derefs (a=<optimized out>, b=b@entry=0x7f28556dd2b0) at ../../src/compiler/nir/nir_deref.c:602
#4 0x00007f287d17986f in lookup_entry_for_deref (allowed_comparisons=nir_derefs_equal_bit, deref=0x7f28556dd2b0, copies=0x7fff0ff38460)
at ../../src/compiler/nir/nir_opt_copy_prop_vars.c:315
#5 copy_prop_vars_block (block=<optimized out>, block=<optimized out>, copies=0x7fff0ff38460, b=0x7fff0ff38060, state=0x7fff0ff38540) at ../../src/compiler/nir/nir_opt_copy_prop_vars.c:964
#6 copy_prop_vars_cf_node (state=0x7fff0ff38540, copies=0x7fff0ff38460, cf_node=<optimized out>) at ../../src/compiler/nir/nir_opt_copy_prop_vars.c:1107
#7 0x00007f287d1790d8 in copy_prop_vars_cf_node (state=0x7fff0ff38540, copies=0x0, cf_node=<optimized out>) at ../../src/compiler/nir/nir_opt_copy_prop_vars.c:1098
So it looks like an issue in nir_compare_derefs
?
Looks like a 20.x regression.
This shader works fine with Mesa 19.3.3-1 from Debian testing, LLVM 9.0.1-12 from Debian testing. At least in Firefox nightly it works fine for me (compilation takes <0.2s, can't even measure it). Chromium does a bit more than in 20.0.7, but it does not work (but for some other reason that linking time).
It looks like a combination of many factors. Including the length of the permutationTable
:
elements | real_time |
---|---|
64 | 0.52s |
128 | 9.66s |
256 | 270.79s |
512 | 4776.81s |
In Mesa 19.3.3 it takes < 0.05s real time to compile these shader (even with 1024 elements).
Inlining manually some function also make problem go away.
Another way to make it work, is doing some modifications to the code, like removing indirections in Hash or GetDirection, or removing calls to them, or curiously removing forward declaration of mainImage:
void webgl_f473b6d23ddb8775(out vec4 webgl_e30b609a99317854, in vec2 webgl_af11434e7e975b00);
Leaving it there, make it slow.
webgl_f473b6d23ddb8775
is defined at the end (just before main()
injected by shadertoy framework) in the function, and corresponds for the mainImage
function.