Commit a610ba21 by Timothy Arceri

### nir: evaluate loop terminator ior use when false

```This allows some loops to unroll were they are guaranteed to
exit after the first iteration. For example:

loop {
block block_1:
/* preds: block_0 block_13 */
vec1 32 ssa_85 = load_const (0x00000002 /* 0.000000 */)
vec1 32 ssa_86 = ieq ssa_48, ssa_85
vec1 32 ssa_87 = load_const (0x00000001 /* 0.000000 */)
vec1 32 ssa_88 = ieq ssa_48, ssa_87
vec1 32 ssa_89 = ior ssa_86, ssa_88
vec1 32 ssa_90 = ieq ssa_48, ssa_0
vec1 32 ssa_91 = ior ssa_89, ssa_90

/* succs: block_2 block_3 */
if ssa_86 {
block block_2:
/* preds: block_1 */
...
break
/* succs: block_14 */
} else {
block block_3:
/* preds: block_1 */
/* succs: block_4 */
}
block block_4:
/* preds: block_3 */
/* succs: block_5 block_6 */
if ssa_88 {
block block_5:
/* preds: block_4 */
...
break
/* succs: block_14 */
} else {
block block_6:
/* preds: block_4 */
/* succs: block_7 */
}
block block_7:
/* preds: block_6 */
/* succs: block_8 block_9 */
if ssa_90 {
block block_8:
/* preds: block_7 */
...
break
/* succs: block_14 */
} else {
block block_9:
/* preds: block_7 */
/* succs: block_10 */
}
block block_10:
/* preds: block_9 */
vec1 32 ssa_107 = inot ssa_91
/* succs: block_11 block_12 */
if ssa_107 {
block block_11:
/* preds: block_10 */
break
/* succs: block_14 */
} else {
block block_12:
/* preds: block_10 */
/* succs: block_13 */
}
}

These loops have been seen in Bethesda games running over
DXVK. There is a slight increase in VGPR use but removing
the loops allows us to further optimise the code in
future. For example many of the unrolled if-statements
could now be merged as they apear in the shaders multiple
times.

vkpipeline results RADV (from a db of only 3 games):

SGPRS: 10920 -> 10440 (-4.40 %)
VGPRS: 6120 -> 6264 (2.35 %)
Spilled SGPRs: 0 -> 0 (0.00 %)
Spilled VGPRs: 0 -> 0 (0.00 %)
Private memory VGPRs: 0 -> 0 (0.00 %)
Scratch size: 0 -> 0 (0.00 %) dwords per thread
Code Size: 369952 -> 356608 (-3.61 %) bytes
LDS: 0 -> 0 (0.00 %) blocks
Max Waves: 2040 -> 2040 (0.00 %)
Wait states: 0 -> 0 (0.00 %)```
parent 1a453045
 ... ... @@ -544,6 +544,44 @@ opt_if_evaluate_condition_use_loop_terminator(nir_if *nif, nir_loop *loop, after_loop->index, NIR_TRUE, or_use, mem_ctx, true); } } else if (nir_boolean == NIR_FALSE && parent_instr->type == nir_instr_type_alu && nir_instr_as_alu(parent_instr)->op == nir_op_ior) { nir_alu_instr *alu = nir_instr_as_alu(parent_instr); nir_src *other_or_src = NULL; for (unsigned i = 0; i < 2; i++) { if (alu->src[i].src.ssa != use_src->ssa) { other_or_src = &alu->src[i].src; break; } } assert(other_or_src); nir_foreach_use_safe(or_use, &alu->dest.dest.ssa) { if (prev_block->index < or_use->parent_instr->block->index && after_loop->index > or_use->parent_instr->block->index) { nir_instr_rewrite_src(or_use->parent_instr, or_use, *other_or_src); progress = true; } } nir_foreach_if_use_safe(or_use, &alu->dest.dest.ssa) { if (or_use->parent_if != nif) { unsigned blk_idx_before_if = nir_cf_node_as_block(nir_cf_node_prev( &or_use->parent_if->cf_node))->index; if (prev_block->index <= blk_idx_before_if && after_loop->index > blk_idx_before_if) { nir_if_rewrite_condition(or_use->parent_if, *other_or_src); progress = true; } } } } else { progress = evaluate_term_condition_use(prev_block->index, after_loop->index, ... ...
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!