Commit af482435 authored by Jesse Natalie's avatar Jesse Natalie Committed by Erik Faye-Lund
Browse files

microsoft/clc: Add an optimization pass for vec4 reads/writes of vec3 variables

Without this pass, copy_prop is unable to see that SPIR-V ends up creating
a vec4 out of a vec3 and undef, writing it to a vec3 variable, then reading
the vec4 out of the vec3 variable and just not using the w component.

This is apparently common with unoptimized LLVM passing vec3 as function args.
parent c91988cf
......@@ -1143,6 +1143,79 @@ scale_fdiv(nir_shader *nir)
return progress;
}
static bool
clc_opt_vec4_cast_of_vec3(nir_shader *nir)
{
bool progress = false;
nir_foreach_function(func, nir) {
if (!func->impl)
continue;
nir_foreach_block(block, func->impl) {
nir_foreach_instr_safe(instr, block) {
if (instr->type != nir_instr_type_intrinsic)
continue;
nir_intrinsic_instr *intr = nir_instr_as_intrinsic(instr);
if (intr->intrinsic != nir_intrinsic_store_deref &&
intr->intrinsic != nir_intrinsic_load_deref)
continue;
nir_ssa_def *value = intr->intrinsic == nir_intrinsic_store_deref ?
intr->src[1].ssa : &intr->dest.ssa;
if (value->num_components != 4)
continue;
if (intr->intrinsic == nir_intrinsic_store_deref) {
if (value->parent_instr->type != nir_instr_type_alu ||
nir_instr_as_alu(value->parent_instr)->op != nir_op_vec4)
continue;
nir_alu_instr *vec = nir_instr_as_alu(value->parent_instr);
if (vec->src[3].src.ssa->parent_instr->type != nir_instr_type_ssa_undef)
continue;
}
nir_deref_instr *dest = nir_src_as_deref(intr->src[0]);
if (dest->deref_type != nir_deref_type_cast)
continue;
nir_deref_instr *cast_src = nir_deref_instr_parent(dest);
if (!cast_src || cast_src->deref_type != nir_deref_type_var)
continue;
nir_variable *var = cast_src->var;
if (!glsl_type_is_vector(var->type) || glsl_get_vector_elements(var->type) != 3)
continue;
nir_builder b;
nir_builder_init(&b, func->impl);
b.cursor = nir_before_instr(instr);
nir_instr_rewrite_src(instr, &intr->src[0], nir_src_for_ssa(&cast_src->dest.ssa));
intr->num_components = 3;
if (intr->intrinsic == nir_intrinsic_store_deref) {
nir_instr_rewrite_src(instr, &intr->src[1], nir_src_for_ssa(nir_channels(&b, value, 0x7)));
nir_intrinsic_set_write_mask(intr, 0x7);
} else {
b.cursor = nir_after_instr(instr);
value->num_components = 3;
nir_ssa_def *vec4 = nir_vec4(&b, nir_channel(&b, value, 0),
nir_channel(&b, value, 1),
nir_channel(&b, value, 2),
nir_ssa_undef(&b, 1, value->bit_size));
nir_ssa_def_rewrite_uses_after(value, nir_src_for_ssa(vec4), vec4->parent_instr);
}
progress = true;
}
nir_metadata_preserve(func->impl, nir_metadata_block_index | nir_metadata_dominance | nir_metadata_loop_analysis);
}
}
return progress;
}
struct clc_dxil_object *
clc_to_dxil(struct clc_context *ctx,
const struct clc_object *obj,
......@@ -1325,6 +1398,7 @@ clc_to_dxil(struct clc_context *ctx,
NIR_PASS(progress, nir, nir_opt_dead_cf);
NIR_PASS(progress, nir, nir_opt_remove_phis);
NIR_PASS(progress, nir, nir_opt_peephole_select, 8, true, true);
NIR_PASS(progress, nir, clc_opt_vec4_cast_of_vec3);
} while (progress);
}
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment