Commit a610ba21 authored by Timothy Arceri's avatar Timothy Arceri

nir: evaluate loop terminator ior use when false

This allows some loops to unroll were they are guaranteed to
exit after the first iteration. For example:

	loop {
		block block_1:
		/* preds: block_0 block_13 */
		vec1 32 ssa_85 = load_const (0x00000002 /* 0.000000 */)
		vec1 32 ssa_86 = ieq ssa_48, ssa_85
		vec1 32 ssa_87 = load_const (0x00000001 /* 0.000000 */)
		vec1 32 ssa_88 = ieq ssa_48, ssa_87
		vec1 32 ssa_89 = ior ssa_86, ssa_88
		vec1 32 ssa_90 = ieq ssa_48, ssa_0
		vec1 32 ssa_91 = ior ssa_89, ssa_90

		/* succs: block_2 block_3 */
		if ssa_86 {
			block block_2:
			/* preds: block_1 */
			 ...
			break
			/* succs: block_14 */
		} else {
			block block_3:
			/* preds: block_1 */
			/* succs: block_4 */
		}
		block block_4:
		/* preds: block_3 */
		/* succs: block_5 block_6 */
		if ssa_88 {
			block block_5:
			/* preds: block_4 */
			 ...
			break
			/* succs: block_14 */
		} else {
			block block_6:
			/* preds: block_4 */
			/* succs: block_7 */
		}
		block block_7:
		/* preds: block_6 */
		/* succs: block_8 block_9 */
		if ssa_90 {
			block block_8:
			/* preds: block_7 */
			 ...
			break
			/* succs: block_14 */
		} else {
			block block_9:
			/* preds: block_7 */
			/* succs: block_10 */
		}
		block block_10:
		/* preds: block_9 */
		vec1 32 ssa_107 = inot ssa_91
		/* succs: block_11 block_12 */
		if ssa_107 {
			block block_11:
			/* preds: block_10 */
			break
			/* succs: block_14 */
		} else {
			block block_12:
			/* preds: block_10 */
			/* succs: block_13 */
		}
	}

These loops have been seen in Bethesda games running over
DXVK. There is a slight increase in VGPR use but removing
the loops allows us to further optimise the code in
future. For example many of the unrolled if-statements
could now be merged as they apear in the shaders multiple
times.

vkpipeline results RADV (from a db of only 3 games):

Totals from affected shaders:
SGPRS: 10920 -> 10440 (-4.40 %)
VGPRS: 6120 -> 6264 (2.35 %)
Spilled SGPRs: 0 -> 0 (0.00 %)
Spilled VGPRs: 0 -> 0 (0.00 %)
Private memory VGPRs: 0 -> 0 (0.00 %)
Scratch size: 0 -> 0 (0.00 %) dwords per thread
Code Size: 369952 -> 356608 (-3.61 %) bytes
LDS: 0 -> 0 (0.00 %) blocks
Max Waves: 2040 -> 2040 (0.00 %)
Wait states: 0 -> 0 (0.00 %)
parent 1a453045
......@@ -544,6 +544,44 @@ opt_if_evaluate_condition_use_loop_terminator(nir_if *nif, nir_loop *loop,
after_loop->index, NIR_TRUE,
or_use, mem_ctx, true);
}
} else if (nir_boolean == NIR_FALSE &&
parent_instr->type == nir_instr_type_alu &&
nir_instr_as_alu(parent_instr)->op == nir_op_ior) {
nir_alu_instr *alu = nir_instr_as_alu(parent_instr);
nir_src *other_or_src = NULL;
for (unsigned i = 0; i < 2; i++) {
if (alu->src[i].src.ssa != use_src->ssa) {
other_or_src = &alu->src[i].src;
break;
}
}
assert(other_or_src);
nir_foreach_use_safe(or_use, &alu->dest.dest.ssa) {
if (prev_block->index < or_use->parent_instr->block->index &&
after_loop->index > or_use->parent_instr->block->index) {
nir_instr_rewrite_src(or_use->parent_instr, or_use,
*other_or_src);
progress = true;
}
}
nir_foreach_if_use_safe(or_use, &alu->dest.dest.ssa) {
if (or_use->parent_if != nif) {
unsigned blk_idx_before_if =
nir_cf_node_as_block(nir_cf_node_prev(
&or_use->parent_if->cf_node))->index;
if (prev_block->index <= blk_idx_before_if &&
after_loop->index > blk_idx_before_if) {
nir_if_rewrite_condition(or_use->parent_if, *other_or_src);
progress = true;
}
}
}
} else {
progress =
evaluate_term_condition_use(prev_block->index, after_loop->index,
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment