[Bug] Rendering to CMs broken on Intel UHD

Mesa Version: 19.1.7-1

When attaching each face of a CM to an FBO individually, as individual color attachments of an FBO, e.g:

glFramebufferTexture2D(GL_DRAW_FRAMEBUFFER, GL_COLOR_ATTACHMENT0, GL_TEXTURE_CUBEMAP_POSITIVE_X, tex, 0);
glFramebufferTexture2D(GL_DRAW_FRAMEBUFFER, GL_COLOR_ATTACHMENT1, GL_TEXTURE_CUBEMAP_NEGATIVE_X, tex, 0);
glFramebufferTexture2D(GL_DRAW_FRAMEBUFFER, GL_COLOR_ATTACHMENT2, GL_TEXTURE_CUBEMAP_POSITIVE_Y, tex, 0);
glFramebufferTexture2D(GL_DRAW_FRAMEBUFFER, GL_COLOR_ATTACHMENT3, GL_TEXTURE_CUBEMAP_NEGATIVE_Y, tex, 0);
glFramebufferTexture2D(GL_DRAW_FRAMEBUFFER, GL_COLOR_ATTACHMENT4, GL_TEXTURE_CUBEMAP_POSITIVE_Z, tex, 0);
glFramebufferTexture2D(GL_DRAW_FRAMEBUFFER, GL_COLOR_ATTACHMENT5, GL_TEXTURE_CUBEMAP_NEGATIVE_Z, tex, 0);

With all six color attachments setup as draw buffers. Rendering to this CM with a simple quad-pass emitting values to each output produces garbage results. I tried various work arounds too that also did not work, e.g just enabling one face at a time in the FBO by setting one draw buffer at a time. That also renders incorrectly.

The attachment textures are all POT (power of two)
The attachment textures are all the same size
The attachment textures are all the same type (GL_RGBA8)
The attachment textures are all allocated with glTexImage2D

The following fragment shader is what I'm currently using to render the faces of the CM. u_sampler is also a CM that is sampled during this pass it's not the same CM as I'm rendering to.

#version 450 core
#define vec2f vec2
#define vec3f vec3
#define vec4f vec4
#define vec2i ivec2
#define vec3i ivec3
#define vec4i ivec4
#define vec4b vec4
#define mat3x3f mat3
#define mat4x4f mat4
#define mat3x4f mat3x4
#define bonesf mat3x4f[80]
#define rx_sampler1D sampler1D
#define rx_sampler2D sampler2D
#define rx_sampler3D sampler3D
#define rx_samplerCM samplerCube
#define rx_texture1D texture
#define rx_texture2D texture
#define rx_texture3D texture
#define rx_textureCM texture
#define rx_texture1DLod textureLod
#define rx_texture2DLod textureLod
#define rx_texture3DLod textureLod
#define rx_textureCMLod textureLod
#define rx_position gl_Position
#define rx_point_size gl_PointSize
in vec2f vs_coordinate;
layout(location = 1) out vec4f fs_color_nx;
layout(location = 3) out vec4f fs_color_ny;
layout(location = 5) out vec4f fs_color_nz;
layout(location = 0) out vec4f fs_color_px;
layout(location = 2) out vec4f fs_color_py;
layout(location = 4) out vec4f fs_color_pz;
uniform rx_samplerCM u_sampler;
uniform int u_texture_size;
#line 0
void main() {
  const mat3x3f rotations[6] = mat3x3f[](
    mat3x3f(vec3f( 0.0, 0.0, -1.0), vec3f(0.0, -1.0,  0.0), vec3f(-1.0,  0.0,  0.0)),  // right
    mat3x3f(vec3f( 0.0, 0.0,  1.0), vec3f(0.0, -1.0,  0.0), vec3f( 1.0,  0.0,  0.0)),  // left
    mat3x3f(vec3f( 1.0, 0.0,  0.0), vec3f(0.0,  0.0,  1.0), vec3f( 0.0, -1.0,  0.0)),  // top
    mat3x3f(vec3f( 1.0, 0.0,  0.0), vec3f(0.0,  0.0, -1.0), vec3f( 0.0,  1.0,  0.0)),  // bottom
    mat3x3f(vec3f( 1.0, 0.0,  0.0), vec3f(0.0, -1.0,  0.0), vec3f( 0.0,  0.0, -1.0)),  // front
    mat3x3f(vec3f(-1.0, 0.0,  0.0), vec3f(0.0, -1.0,  0.0), vec3f( 0.0,  0.0,  1.0))); // back

  vec3f base_normal = normalize(vec3f(vs_coordinate * 2.0 - 1.0, -1.0));

  vec4f colors[6] = vec4f[](
    vec4f(0.0),
    vec4f(0.0),
    vec4f(0.0),
    vec4f(0.0),
    vec4f(0.0),
    vec4f(0.0));

  for (int face = 0; face < 6; face++) {
    vec3f normal = rotations[face] * base_normal;
    vec4f color = vec4f(0.0);
    for (int i = 0; i < 6; i++) {
      mat3x3f rotation = rotations[i];
      for (int x = 0; x < u_texture_size; x++) {
        for (int y = 0; y < u_texture_size; y++) {
          vec2f coordinate = vec2f(float(x) / u_texture_size, float(y) / u_texture_size);
          vec3f direction = rotation * normalize(vec3f(coordinate * 2.0 - 1.0, -1.0));
          float mul = max(dot(normal, direction), 0.0);
          color += vec4f(rx_textureCM(u_sampler, direction).rgb * mul, mul);
        }
      }
    }
    colors[face] = vec4f(color.rgb / color.a, 1.0);
  }

  fs_color_px = colors[0];
  fs_color_nx = colors[1];
  fs_color_py = colors[2];
  fs_color_ny = colors[3];
  fs_color_pz = colors[4];
  fs_color_nz = colors[5];
}

Running the usual MESA debugging features uncovers an interesting error with the above shader.

FS compile failed: Failure to register allocate.  Reduce number of live scalar values to avoid this.
SIMD16 shader failed to compile: FS compile failed: Failure to register allocate.  Reduce number of live scalar values to avoid this.

However that's just SIMD16 failing, it appears to then succeed building SIMD8 variant. Note that none of these errors manifest on the GL API side. I don't get compilation errors or link errors.

To upload designs, you'll need to enable LFS and have an admin enable hashed storage. More information