From e407920c6b38c7250eb6b89e75dbe2db1e1bcb83 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Timur=20Krist=C3=B3f?= Date: Mon, 5 Sep 2022 14:35:30 +0200 Subject: [PATCH 1/3] nir/opt_peephole_select: Consider one-invocation conditions in flat branches. MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit When an if condition is known to only be true on one invocation, this optimization is going to hurt more than it helps because it can move potentially expensive instructions outside a branch. Detect these cases: - if (elect()) - if (subgroup invocation id == N) And exclude them from the special treatment that peephole_select has for flattened branches. Signed-off-by: Timur Kristóf --- src/compiler/nir/nir.h | 3 ++ src/compiler/nir/nir_gather_info.c | 35 ++++++++++++++++++++++ src/compiler/nir/nir_opt_peephole_select.c | 7 +++-- 3 files changed, 43 insertions(+), 2 deletions(-) diff --git a/src/compiler/nir/nir.h b/src/compiler/nir/nir.h index 02db779113c1..f77c2c07f26c 100644 --- a/src/compiler/nir/nir.h +++ b/src/compiler/nir/nir.h @@ -921,6 +921,9 @@ nir_instr_is_last(const nir_instr *instr) return exec_node_is_tail_sentinel(exec_node_get_next_const(&instr->node)); } +bool +nir_instr_cond_one_invocation(const nir_instr *instr); + typedef struct nir_ssa_def { /** Instruction which produces this SSA value. */ nir_instr *parent_instr; diff --git a/src/compiler/nir/nir_gather_info.c b/src/compiler/nir/nir_gather_info.c index f332f119b7f6..9572ba14b5f5 100644 --- a/src/compiler/nir/nir_gather_info.c +++ b/src/compiler/nir/nir_gather_info.c @@ -463,6 +463,41 @@ nir_intrinsic_writes_external_memory(const nir_intrinsic_instr *instr) } } +/** + * Wheter the instruction is a condition that is known to be true + * on only 1 shader invocation. + */ +bool +nir_instr_cond_one_invocation(const nir_instr *instr) +{ + if (instr->type == nir_instr_type_intrinsic) { + nir_intrinsic_instr *intrin = nir_instr_as_intrinsic(instr); + + if (intrin->intrinsic == nir_intrinsic_elect) { + return true; + } + } else if (instr->type == nir_instr_type_alu) { + nir_alu_instr *alu = nir_instr_as_alu(instr); + + if (alu->op == nir_op_ieq) { + nir_instr *l = alu->src[0].src.ssa->parent_instr; + nir_instr *r = alu->src[1].src.ssa->parent_instr; + + if (l->type == nir_instr_type_intrinsic && + nir_instr_as_intrinsic(l)->intrinsic == + nir_intrinsic_load_subgroup_invocation) + return true; + + if (r->type == nir_instr_type_intrinsic && + nir_instr_as_intrinsic(r)->intrinsic == + nir_intrinsic_load_subgroup_invocation) + return true; + } + } + + return false; +} + static bool intrinsic_is_bindless(nir_intrinsic_instr *instr) { diff --git a/src/compiler/nir/nir_opt_peephole_select.c b/src/compiler/nir/nir_opt_peephole_select.c index 03d656dd8ebb..8395d77093c6 100644 --- a/src/compiler/nir/nir_opt_peephole_select.c +++ b/src/compiler/nir/nir_opt_peephole_select.c @@ -323,9 +323,12 @@ nir_opt_collapse_if(nir_if *if_stmt, nir_shader *shader, unsigned limit, } if (parent_if->control == nir_selection_control_flatten) { + /* Determine when the if condition is true on only 1 invocation. */ + nir_instr *cond_instr = parent_if->condition.ssa->parent_instr; + /* Override driver defaults */ - indirect_load_ok = true; - expensive_alu_ok = true; + if (!nir_instr_cond_one_invocation(cond_instr)) + indirect_load_ok = expensive_alu_ok = true; } /* check if the block before the nested if matches the requirements */ -- GitLab From 89c3932bfc23797c89de1d6bdd7a163e057fb979 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Timur=20Krist=C3=B3f?= Date: Mon, 15 Aug 2022 13:02:41 +0200 Subject: [PATCH 2/3] nir/opt_uniform_atomics: Flatten branches using elect() as condition. MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit We know for sure that a branch with elect() is always taken (by exactly 1 invocation in every subgroup) so we can flatten them. This allows the backend to remove a jump instruction. Fossil DB stats on Navi 21: Totals from 407 (0.30% of 134906) affected shaders: CodeSize: 2801760 -> 2785204 (-0.59%) Instrs: 533078 -> 528939 (-0.78%) Latency: 3637070 -> 3636025 (-0.03%); split: -0.03%, +0.00% InvThroughput: 711265 -> 711208 (-0.01%); split: -0.01%, +0.00% Branches: 35732 -> 31593 (-11.58%) Signed-off-by: Timur Kristóf --- src/compiler/nir/nir_opt_uniform_atomics.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/src/compiler/nir/nir_opt_uniform_atomics.c b/src/compiler/nir/nir_opt_uniform_atomics.c index e4564b0653f2..519248b70980 100644 --- a/src/compiler/nir/nir_opt_uniform_atomics.c +++ b/src/compiler/nir/nir_opt_uniform_atomics.c @@ -226,6 +226,9 @@ optimize_atomic(nir_builder *b, nir_intrinsic_instr *intrin, bool return_prev) nir_if *nif = nir_push_if(b, cond); + /* Exactly 1 invocation in all subgroups take the branch, so we can flatten it. */ + nif->control = nir_selection_control_flatten; + nir_instr_remove(&intrin->instr); nir_builder_instr_insert(b, &intrin->instr); -- GitLab From 2d0efa1507f59fc12a8171a66a4aa9b3eae393d2 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Timur=20Krist=C3=B3f?= Date: Mon, 15 Aug 2022 13:03:40 +0200 Subject: [PATCH 3/3] nir/opt_if: Flatten branches that use a one-invocation condition MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit We know for sure that such a branch is always taken (by exactly 1 invocation in every subgroup) so we can flatten them. This may allow the backend to remove a jump instruction. Fossil DB stats on Navi 21: Totals from 65 (0.05% of 134906) affected shaders: CodeSize: 338612 -> 337600 (-0.30%) Instrs: 63318 -> 63065 (-0.40%) Latency: 608526 -> 608465 (-0.01%); split: -0.02%, +0.01% Branches: 2318 -> 2065 (-10.91%) Signed-off-by: Timur Kristóf --- src/compiler/nir/nir_opt_if.c | 14 ++++++++++++++ 1 file changed, 14 insertions(+) diff --git a/src/compiler/nir/nir_opt_if.c b/src/compiler/nir/nir_opt_if.c index ea1486b9e110..4a5e7d5c0d3e 100644 --- a/src/compiler/nir/nir_opt_if.c +++ b/src/compiler/nir/nir_opt_if.c @@ -988,6 +988,19 @@ opt_if_phi_is_condition(nir_builder *b, nir_if *nif) return progress; } +/* Look at the if condition and consider flattening the branch + * when all subgroups always take the branch. + */ +static bool +opt_if_flatten(nir_if *nif) +{ + /* Already flattened. */ + if (nif->control == nir_selection_control_flatten) + return false; + + return nir_instr_cond_one_invocation(nif->condition.ssa->parent_instr); +} + /** * This optimization tries to merge two break statements into a single break. * For this purpose, it checks if both branch legs end in a break or @@ -1616,6 +1629,7 @@ opt_if_cf_list(nir_builder *b, struct exec_list *cf_list, progress |= opt_if_simplification(b, nif); if (options & nir_opt_if_optimize_phi_true_false) progress |= opt_if_phi_is_condition(b, nif); + progress |= opt_if_flatten(nif); break; } -- GitLab