Commit aedf2817 authored by Jesse Natalie's avatar Jesse Natalie Committed by Erik Faye-Lund
Browse files

microsoft/clc: Scale fdiv ops to respect CL's required behavior

Since DXIL is unspecified about whether divison is a single op,
or a split reciprocal multiply, and a split op has implications on
denorm flushing, scale the values so that we can do the split op
without flushing denorms.
parent 1a05afe9
...@@ -1100,6 +1100,49 @@ static bool shader_has_double(nir_shader *nir) ...@@ -1100,6 +1100,49 @@ static bool shader_has_double(nir_shader *nir)
return false; return false;
} }
static bool
scale_fdiv(nir_shader *nir)
{
bool progress = false;
nir_foreach_function(func, nir) {
if (!func->impl)
continue;
nir_builder b;
nir_builder_init(&b, func->impl);
nir_foreach_block(block, func->impl) {
nir_foreach_instr(instr, block) {
if (instr->type != nir_instr_type_alu)
continue;
nir_alu_instr *alu = nir_instr_as_alu(instr);
if (alu->op != nir_op_fdiv)
continue;
b.cursor = nir_before_instr(instr);
nir_ssa_def *fabs = nir_fabs(&b, alu->src[1].src.ssa);
nir_ssa_def *big = nir_flt(&b, nir_imm_int(&b, 0x7e800000), fabs);
nir_ssa_def *small = nir_flt(&b, fabs, nir_imm_int(&b, 0x00800000));
nir_ssa_def *scaled_down_a = nir_fmul_imm(&b, alu->src[0].src.ssa, 0.25);
nir_ssa_def *scaled_down_b = nir_fmul_imm(&b, alu->src[1].src.ssa, 0.25);
nir_ssa_def *scaled_up_a = nir_fmul_imm(&b, alu->src[0].src.ssa, 16777216.0);
nir_ssa_def *scaled_up_b = nir_fmul_imm(&b, alu->src[1].src.ssa, 16777216.0);
nir_ssa_def *final_a =
nir_bcsel(&b, big, scaled_down_a,
(nir_bcsel(&b, small, scaled_up_a, alu->src[0].src.ssa)));
nir_ssa_def *final_b =
nir_bcsel(&b, big, scaled_down_b,
(nir_bcsel(&b, small, scaled_up_b, alu->src[1].src.ssa)));
nir_instr_rewrite_src(instr, &alu->src[0].src, nir_src_for_ssa(final_a));
nir_instr_rewrite_src(instr, &alu->src[1].src, nir_src_for_ssa(final_b));
progress = true;
}
}
}
return progress;
}
struct clc_dxil_object * struct clc_dxil_object *
clc_to_dxil(struct clc_context *ctx, clc_to_dxil(struct clc_context *ctx,
const struct clc_object *obj, const struct clc_object *obj,
...@@ -1252,6 +1295,8 @@ clc_to_dxil(struct clc_context *ctx, ...@@ -1252,6 +1295,8 @@ clc_to_dxil(struct clc_context *ctx,
NIR_PASS_V(nir, clc_nir_dedupe_const_samplers); NIR_PASS_V(nir, clc_nir_dedupe_const_samplers);
NIR_PASS_V(nir, nir_remove_dead_variables, nir_var_uniform | nir_var_mem_ubo | nir_var_mem_constant); NIR_PASS_V(nir, nir_remove_dead_variables, nir_var_uniform | nir_var_mem_ubo | nir_var_mem_constant);
NIR_PASS_V(nir, scale_fdiv);
// Assign bindings for constant samplers // Assign bindings for constant samplers
nir_foreach_variable_safe(var, &nir->uniforms) { nir_foreach_variable_safe(var, &nir->uniforms) {
if (glsl_type_is_sampler(var->type) && if (glsl_type_is_sampler(var->type) &&
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment