From 3ecf9d9794a72a377c91b37e0bb9016947e504d9 Mon Sep 17 00:00:00 2001 From: Ian Romanick Date: Fri, 11 Jan 2019 12:04:49 -0800 Subject: [PATCH 1/4] nir: Silence warnings about type qualifiers on return types In file included from src/compiler/nir/nir.h:40:0, from src/compiler/nir/nir_opt_if.c:24: src/compiler/nir_types.h:49:1: warning: type qualifiers ignored on function return type [-Wignored-qualifiers] const int glsl_get_struct_field_offset(const struct glsl_type *type, ^~~~~ src/compiler/nir_types.h:52:1: warning: type qualifiers ignored on function return type [-Wignored-qualifiers] const unsigned glsl_get_explicit_stride(const struct glsl_type *type); ^~~~~ --- src/compiler/nir_types.cpp | 4 ++-- src/compiler/nir_types.h | 5 ++--- 2 files changed, 4 insertions(+), 5 deletions(-) diff --git a/src/compiler/nir_types.cpp b/src/compiler/nir_types.cpp index 356252276b4a..7ea5ef9b8310 100644 --- a/src/compiler/nir_types.cpp +++ b/src/compiler/nir_types.cpp @@ -71,14 +71,14 @@ glsl_get_struct_field(const glsl_type *type, unsigned index) return type->fields.structure[index].type; } -const int +int glsl_get_struct_field_offset(const struct glsl_type *type, unsigned index) { return type->fields.structure[index].offset; } -const unsigned +unsigned glsl_get_explicit_stride(const struct glsl_type *type) { return type->explicit_stride; diff --git a/src/compiler/nir_types.h b/src/compiler/nir_types.h index d17671b94000..900445ba40f9 100644 --- a/src/compiler/nir_types.h +++ b/src/compiler/nir_types.h @@ -46,10 +46,9 @@ const char *glsl_get_type_name(const struct glsl_type *type); const struct glsl_type *glsl_get_struct_field(const struct glsl_type *type, unsigned index); -const int glsl_get_struct_field_offset(const struct glsl_type *type, - unsigned index); +int glsl_get_struct_field_offset(const struct glsl_type *type, unsigned index); -const unsigned glsl_get_explicit_stride(const struct glsl_type *type); +unsigned glsl_get_explicit_stride(const struct glsl_type *type); const struct glsl_type *glsl_get_array_element(const struct glsl_type *type); const struct glsl_type *glsl_without_array(const struct glsl_type *type); const struct glsl_type *glsl_without_array_or_matrix(const struct glsl_type *type); -- GitLab From a81ca0408328eafc4210b7a98d68a5ba96f9692e Mon Sep 17 00:00:00 2001 From: Ian Romanick Date: Fri, 11 Jan 2019 12:27:25 -0800 Subject: [PATCH 2/4] WIP: nir: Copy propagate from phi node to bcsel source TODO: Refactor code common with opt_peel_loop_initial_if and opt_simpify_phi_of_bcsel (next commit). Bugzilla: https://bugs.freedesktop.org/show_bug.cgi?id=109216 Fixes: 8fb8ebfbb05 ("intel/compiler: More peephole select") --- src/compiler/nir/nir_opt_if.c | 145 ++++++++++++++++++++++++++++++++++ 1 file changed, 145 insertions(+) diff --git a/src/compiler/nir/nir_opt_if.c b/src/compiler/nir/nir_opt_if.c index c2f945d4d598..8a61b51e1a74 100644 --- a/src/compiler/nir/nir_opt_if.c +++ b/src/compiler/nir/nir_opt_if.c @@ -230,6 +230,150 @@ opt_peel_loop_initial_if(nir_loop *loop) return true; } +/** + * Attempt to "copy propagate" from a phi node to the source of a bcsel + * + * In order for this pass to make progress, several conditions must be met. + * + * 1. The condition of the bcsel must be a phi node that selects between a + * constant value from the block preceeding the loop and a constant value + * from inside the loop. + * + * 2. The bcsel source selected by the constant from the block preceeding the + * loop must be a phi node. + * + * 3. The phi node of that source must have a value from the block preceeding + * the loop. + * + * This allows code such as + * + * vec1 32 ssa_51 = load_const (0xffffffff) + * // succs: block_1 + * loop { + * block block_1: + * // preds: block_0 block_9 + * vec1 32 ssa_56 = phi block_0: ssa_3, block_9: ssa_59 + * vec1 32 ssa_57 = phi block_0: ssa_14, block_9: ssa_51 + * vec1 32 ssa_58 = iadd ssa_56, ssa_2 + * vec1 32 ssa_59 = b32csel ssa_57, ssa_58, ssa_56 + * + * to be converted to + * + * vec1 32 ssa_51 = load_const (0xffffffff) + * // succs: block_1 + * loop { + * block block_1: + * // preds: block_0 block_9 + * vec1 32 ssa_56 = phi block_0: ssa_3, block_9: ssa_59 + * vec1 32 ssa_57 = phi block_0: ssa_14, block_9: ssa_51 + * vec1 32 ssa_58 = iadd ssa_56, ssa_2 + * vec1 32 ssa_59 = b32csel ssa_57, ssa_58, ssa_3 + */ +static bool +opt_simpify_bcsel_of_phi(nir_loop *loop) +{ + bool progress = false; + nir_block *header_block = nir_loop_first_block(loop); + MAYBE_UNUSED nir_block *prev_block = + nir_cf_node_as_block(nir_cf_node_prev(&loop->cf_node)); + + /* It would be insane if this were not true */ + assert(_mesa_set_search(header_block->predecessors, prev_block)); + + /* The loop must have exactly one continue block which could be a block + * ending in a continue instruction or the "natural" continue from the + * last block in the loop back to the top. + */ + if (header_block->predecessors->entries != 2) + return false; + + nir_block *continue_block = find_continue_block(loop); + + /* FINISHME: This only iterates the instructions in the first block of the + * loop, but we really want to iterate all the instructions in the loop. + */ + nir_foreach_instr_safe(instr, header_block) { + if (instr->type == nir_instr_type_phi) + continue; + + if (instr->type != nir_instr_type_alu) + continue; + + nir_alu_instr *bcsel = nir_instr_as_alu(instr); + if (bcsel->op != nir_op_bcsel) + continue; + + if (!bcsel->src[0].src.is_ssa) + continue; + + nir_ssa_def *cond = bcsel->src[0].src.ssa; + if (cond->parent_instr->type != nir_instr_type_phi) + continue; + + nir_phi_instr *cond_phi = nir_instr_as_phi(cond->parent_instr); + if (cond->parent_instr->block != header_block) + continue; + + /* We already know we have exactly one continue */ + assert(exec_list_length(&cond_phi->srcs) == 2); + + uint32_t entry_val = 0, continue_val = 0; + unsigned num_const = 0; + nir_foreach_phi_src(src, cond_phi) { + assert(src->src.is_ssa); + nir_const_value *const_src = nir_src_as_const_value(src->src); + if (!const_src) + break; + + num_const++; + + if (src->pred == continue_block) { + continue_val = const_src->u32[0]; + } else { + assert(src->pred == prev_block); + entry_val = const_src->u32[0]; + } + } + + if (num_const != 2) + continue; + + /* If they both execute or both don't execute, this is a job for + * nir_dead_cf, not this pass. + */ + if ((entry_val && continue_val) || (!entry_val && !continue_val)) + continue; + + const unsigned entry_src = entry_val ? 1 : 2; + + if (!bcsel->src[entry_src].src.is_ssa) + continue; + + if (bcsel->src[entry_src].src.ssa->parent_instr->type != nir_instr_type_phi) + continue; + + if (bcsel->src[entry_src].src.ssa->parent_instr->block != header_block) + continue; + + nir_phi_instr *entry_phi = + nir_instr_as_phi(bcsel->src[entry_src].src.ssa->parent_instr); + + nir_foreach_phi_src(src, entry_phi) { + assert(src->src.is_ssa); + + if (src->pred == prev_block) { + nir_instr_rewrite_src(&bcsel->instr, + &bcsel->src[entry_src].src, + src->src); + progress = true; + break; + } + } + } + + return progress; +} + static bool is_block_empty(nir_block *block) { @@ -797,6 +941,7 @@ opt_if_cf_list(nir_builder *b, struct exec_list *cf_list) case nir_cf_node_loop: { nir_loop *loop = nir_cf_node_as_loop(cf_node); progress |= opt_if_cf_list(b, &loop->body); + progress |= opt_simpify_bcsel_of_phi(loop); progress |= opt_peel_loop_initial_if(loop); progress |= opt_if_loop_last_continue(loop); break; -- GitLab From e648a7941bf2489c7231acfa17a1809bf36f7d7e Mon Sep 17 00:00:00 2001 From: Ian Romanick Date: Fri, 11 Jan 2019 14:37:26 -0800 Subject: [PATCH 3/4] WIP: nir: Copy propagate from bcsel source to phi node TODO: Deal with source modifiers in the bcsel. Bugzilla: https://bugs.freedesktop.org/show_bug.cgi?id=109216 Fixes: 8fb8ebfbb05 ("intel/compiler: More peephole select") --- src/compiler/nir/nir_opt_if.c | 177 ++++++++++++++++++++++++++++++++++ 1 file changed, 177 insertions(+) diff --git a/src/compiler/nir/nir_opt_if.c b/src/compiler/nir/nir_opt_if.c index 8a61b51e1a74..36335bcc7249 100644 --- a/src/compiler/nir/nir_opt_if.c +++ b/src/compiler/nir/nir_opt_if.c @@ -374,6 +374,182 @@ opt_simpify_bcsel_of_phi(nir_loop *loop) return progress; } +/** + * Attempt to replace a bcsel with a phi node. + * + * In order for this pass to make progress, several conditions must be met. + * + * 1. The condition of the bcsel must be a phi node that selects between a + * constant value from the block preceeding the loop and a constant value + * from inside the loop. + * + * 2. The bcsel source selected by the constant from the loop continue must + * not be a phi node. Phi nodes can often be removed by + * \c opt_simpify_bcsel_of_phi. + * + * 3. The result of the bcsel must be used as a source of a phi node in the + * same block. + * + * 4. The phi node source for the entry block must be the same as the bcsel + * source that would be selected from the entry block. + * + * If these conditions are met, the use of the bcsel in the phi node source is + * replaced with the bcsel source selected by the constant from the loop + * continue. + * + * This allows code such as + * + * vec1 32 ssa_51 = load_const (0xffffffff) + * // succs: block_1 + * loop { + * block block_1: + * // preds: block_0 block_9 + * vec1 32 ssa_56 = phi block_0: ssa_3, block_9: ssa_59 + * vec1 32 ssa_57 = phi block_0: ssa_14, block_9: ssa_51 + * vec1 32 ssa_58 = iadd ssa_56, ssa_2 + * vec1 32 ssa_59 = b32csel ssa_57, ssa_58, ssa_3 + * + * to be converted to + * + * vec1 32 ssa_51 = load_const (0xffffffff) + * // succs: block_1 + * loop { + * block block_1: + * // preds: block_0 block_9 + * vec1 32 ssa_56 = phi block_0: ssa_3, block_9: ssa_58 + * vec1 32 ssa_57 = phi block_0: ssa_14, block_9: ssa_51 + * vec1 32 ssa_58 = iadd ssa_56, ssa_2 + */ +static bool +opt_simpify_phi_of_bcsel(nir_loop *loop) +{ + bool progress = false; + nir_block *header_block = nir_loop_first_block(loop); + MAYBE_UNUSED nir_block *prev_block = + nir_cf_node_as_block(nir_cf_node_prev(&loop->cf_node)); + + /* It would be insane if this were not true */ + assert(_mesa_set_search(header_block->predecessors, prev_block)); + + /* The loop must have exactly one continue block which could be a block + * ending in a continue instruction or the "natural" continue from the + * last block in the loop back to the top. + */ + if (header_block->predecessors->entries != 2) + return false; + + nir_block *continue_block = find_continue_block(loop); + + nir_foreach_instr_safe(instr, header_block) { + if (instr->type == nir_instr_type_phi) + continue; + + if (instr->type != nir_instr_type_alu) + continue; + + nir_alu_instr *bcsel = nir_instr_as_alu(instr); + if (bcsel->op != nir_op_bcsel) + continue; + + if (!bcsel->src[0].src.is_ssa) + continue; + + nir_ssa_def *cond = bcsel->src[0].src.ssa; + if (cond->parent_instr->type != nir_instr_type_phi) + continue; + + nir_phi_instr *cond_phi = nir_instr_as_phi(cond->parent_instr); + if (cond->parent_instr->block != header_block) + continue; + + /* We already know we have exactly one continue */ + assert(exec_list_length(&cond_phi->srcs) == 2); + + uint32_t entry_val = 0, continue_val = 0; + unsigned num_const = 0; + nir_foreach_phi_src(src, cond_phi) { + assert(src->src.is_ssa); + nir_const_value *const_src = nir_src_as_const_value(src->src); + if (!const_src) + break; + + num_const++; + + if (src->pred == continue_block) { + continue_val = const_src->u32[0]; + } else { + assert(src->pred == prev_block); + entry_val = const_src->u32[0]; + } + } + + if (num_const != 2) + continue; + + /* If they both execute or both don't execute, this is a job for + * nir_dead_cf, not this pass. + */ + if ((entry_val && continue_val) || (!entry_val && !continue_val)) + continue; + + const unsigned entry_src = entry_val ? 1 : 2; + const unsigned continue_src = entry_val ? 2 : 1; + + nir_foreach_use_safe(bcsel_use_src, &bcsel->dest.dest.ssa) { + assert(bcsel_use_src->is_ssa); + + if (bcsel_use_src->parent_instr->type != nir_instr_type_phi) + continue; + + if (bcsel_use_src->parent_instr->block != header_block) + continue; + + nir_phi_instr *use_phi = nir_instr_as_phi(bcsel_use_src->parent_instr); + + /* We already know we have exactly one continue */ + assert(exec_list_length(&use_phi->srcs) == 2); + + nir_phi_src *entry_phi_src = NULL; + nir_phi_src *continue_phi_src = NULL; + + nir_foreach_phi_src(src, use_phi) { + assert(src->src.is_ssa); + + if (src->pred == continue_block) { + continue_phi_src = src; + } else { + assert(src->pred == prev_block); + + entry_phi_src = + src->src.ssa == bcsel->src[entry_src].src.ssa ? src : NULL; + } + } + + if (entry_phi_src != NULL && continue_phi_src != NULL) { + /* Rewrite the phi to use the bcsel source. */ + nir_instr_rewrite_src(&use_phi->instr, + &continue_phi_src->src, + bcsel->src[continue_src].src); + + /* Rewrite the non-phi node users of the bcsel to use the phi. */ + nir_foreach_use_safe(use_src, &bcsel->dest.dest.ssa) { + if (use_src->parent_instr->type == nir_instr_type_phi) + continue; + + nir_instr_rewrite_src(use_src->parent_instr, + use_src, + nir_src_for_ssa(&use_phi->dest.ssa)); + } + + progress = true; + break; + } + } + } + + return progress; +} + static bool is_block_empty(nir_block *block) { @@ -942,6 +1118,7 @@ opt_if_cf_list(nir_builder *b, struct exec_list *cf_list) nir_loop *loop = nir_cf_node_as_loop(cf_node); progress |= opt_if_cf_list(b, &loop->body); progress |= opt_simpify_bcsel_of_phi(loop); + progress |= opt_simpify_phi_of_bcsel(loop); progress |= opt_peel_loop_initial_if(loop); progress |= opt_if_loop_last_continue(loop); break; -- GitLab From 8cfb4399bbb9f587be068f4089997af0e24efbe4 Mon Sep 17 00:00:00 2001 From: Ian Romanick Date: Wed, 9 Jan 2019 14:16:35 -0800 Subject: [PATCH 4/4] intel/compiler: Try nir_opt_if once before nir_opt_peephole_select MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The four helped sharders are two pair of SIMD8/SIMD16 fragment shaders in Bioshock Infinite. In all cases the shaders are helped because some comparisons are moved closer to the if-statements that use the condition, and the extra comparison to regenerat the flag is no longer necessary. This should also fix the loop-unrolling regression in every SPIR-V shader generated by glslang. All Gen7+ platforms had similar results. (Skylake shown) total instructions in shared programs: 15047460 -> 15047452 (<.01%) instructions in affected programs: 820 -> 812 (-0.98%) helped: 4 HURT: 0 helped stats (abs) min: 2 max: 2 x̄: 2.00 x̃: 2 helped stats (rel) min: 0.98% max: 0.98% x̄: 0.98% x̃: 0.98% 95% mean confidence interval for instructions value: -2.00 -2.00 95% mean confidence interval for instructions %-change: -0.98% -0.98% Instructions are helped. total cycles in shared programs: 369988163 -> 369988143 (<.01%) cycles in affected programs: 7970 -> 7950 (-0.25%) helped: 4 HURT: 0 helped stats (abs) min: 2 max: 8 x̄: 5.00 x̃: 5 helped stats (rel) min: 0.11% max: 0.36% x̄: 0.24% x̃: 0.24% 95% mean confidence interval for cycles value: -10.51 0.51 95% mean confidence interval for cycles %-change: -0.46% -0.01% Inconclusive result (value mean confidence interval includes 0). --- src/intel/compiler/brw_nir.c | 10 ++++++++++ 1 file changed, 10 insertions(+) diff --git a/src/intel/compiler/brw_nir.c b/src/intel/compiler/brw_nir.c index 749c00ebcc6d..75c240e4cfbc 100644 --- a/src/intel/compiler/brw_nir.c +++ b/src/intel/compiler/brw_nir.c @@ -569,6 +569,16 @@ brw_nir_optimize(nir_shader *nir, const struct brw_compiler *compiler, OPT(nir_opt_dce); OPT(nir_opt_cse); + /* Try nir_opt_if once before doing nir_opt_peephole_select. glslang + * generates for-loops with a pattern that nir_opt_peephole_select will + * convert to a nir_op_bcsel. However, this prevents nir_opt_if + * (opt_peel_loop_initial_if specifically) from moving the code to the + * end of the loop and eliminating the if-statement). This prevents the + * loop analysis code from detecting the induction variable, and the + * loop is therefore not unrolled. + */ + OPT(nir_opt_if); + /* Passing 0 to the peephole select pass causes it to convert * if-statements that contain only move instructions in the branches * regardless of the count. -- GitLab