From ac7ba2d05732d576e41101d2d5b2ec82284a077b Mon Sep 17 00:00:00 2001 From: Rhys Perry Date: Fri, 4 Sep 2020 15:57:40 +0100 Subject: [PATCH 01/10] lavapipe,nv50/ir,lima: run nir_opt_algebraic_late This will become necessary to lower fcanonicalize without creating a fmul(a, 1.0)<->fcanonicalize(a) optimization loop. Signed-off-by: Rhys Perry Reviewed-by: Vasily Khoruzhick (lima) Part-of: --- src/gallium/drivers/lima/lima_program.c | 4 ++++ src/gallium/drivers/nouveau/codegen/nv50_ir_from_nir.cpp | 3 +++ src/gallium/frontends/lavapipe/lvp_pipeline.c | 3 +++ 3 files changed, 10 insertions(+) diff --git a/src/gallium/drivers/lima/lima_program.c b/src/gallium/drivers/lima/lima_program.c index e43fa9e629e2..7714e0838adf 100644 --- a/src/gallium/drivers/lima/lima_program.c +++ b/src/gallium/drivers/lima/lima_program.c @@ -142,6 +142,7 @@ lima_program_optimize_vs_nir(struct nir_shader *s) NIR_PASS_V(s, nir_lower_bool_to_float); NIR_PASS_V(s, nir_copy_prop); + NIR_PASS_V(s, nir_opt_algebraic_late); NIR_PASS_V(s, nir_opt_dce); NIR_PASS_V(s, nir_lower_locals_to_regs); NIR_PASS_V(s, nir_convert_from_ssa, true); @@ -249,6 +250,9 @@ lima_program_optimize_fs_nir(struct nir_shader *s, NIR_PASS(progress, s, nir_opt_algebraic); } while (progress); + NIR_PASS_V(s, nir_opt_algebraic_late); + NIR_PASS_V(s, nir_opt_dce); + /* Must be run after optimization loop */ NIR_PASS_V(s, lima_nir_scale_trig); diff --git a/src/gallium/drivers/nouveau/codegen/nv50_ir_from_nir.cpp b/src/gallium/drivers/nouveau/codegen/nv50_ir_from_nir.cpp index 37ad8a1a53b7..8456ff86a7ca 100644 --- a/src/gallium/drivers/nouveau/codegen/nv50_ir_from_nir.cpp +++ b/src/gallium/drivers/nouveau/codegen/nv50_ir_from_nir.cpp @@ -3166,6 +3166,9 @@ Converter::run() NIR_PASS(progress, nir, nir_opt_dead_cf); } while (progress); + NIR_PASS_V(nir, nir_opt_algebraic_late); + NIR_PASS_V(nir, nir_opt_dce); + NIR_PASS_V(nir, nir_lower_bool_to_int32); NIR_PASS_V(nir, nir_convert_from_ssa, true); diff --git a/src/gallium/frontends/lavapipe/lvp_pipeline.c b/src/gallium/frontends/lavapipe/lvp_pipeline.c index 8be8701d9dfa..58e131133874 100644 --- a/src/gallium/frontends/lavapipe/lvp_pipeline.c +++ b/src/gallium/frontends/lavapipe/lvp_pipeline.c @@ -594,6 +594,9 @@ lvp_shader_compile_to_ir(struct lvp_pipeline *pipeline, NIR_PASS(progress, nir, nir_lower_alu_to_scalar, NULL, NULL); } while (progress); + NIR_PASS_V(nir, nir_opt_algebraic_late); + NIR_PASS_V(nir, nir_opt_dce); + NIR_PASS_V(nir, nir_lower_var_copies); NIR_PASS_V(nir, nir_remove_dead_variables, nir_var_function_temp, NULL); NIR_PASS_V(nir, nir_opt_dce); -- GitLab From a84a117d9804d4eb0a9a02b3743249a6dbd96d74 Mon Sep 17 00:00:00 2001 From: Rhys Perry Date: Wed, 26 May 2021 17:48:09 +0100 Subject: [PATCH 02/10] glsl,glsl/nir: emit exact comparisons for isnan() and isinf() Comparisons in GLSL assume operands are not NaN/Inf, so we need separate opcodes which don't have this assumtion to implement the isnan() and isinf() builtins without being optimized away by NIR. Signed-off-by: Rhys Perry Reviewed-by: Ian Romanick Part-of: --- src/compiler/glsl/builtin_functions.cpp | 4 ++-- src/compiler/glsl/glsl_to_nir.cpp | 10 ++++++++++ src/compiler/glsl/ir.cpp | 2 ++ src/compiler/glsl/ir_builder.cpp | 12 ++++++++++++ src/compiler/glsl/ir_builder.h | 2 ++ src/compiler/glsl/ir_expression_operation.py | 3 +++ src/compiler/glsl/ir_validate.cpp | 2 ++ src/compiler/glsl/opt_algebraic.cpp | 2 ++ src/mesa/program/ir_to_mesa.cpp | 2 ++ src/mesa/state_tracker/st_glsl_to_tgsi.cpp | 2 ++ 10 files changed, 39 insertions(+), 2 deletions(-) diff --git a/src/compiler/glsl/builtin_functions.cpp b/src/compiler/glsl/builtin_functions.cpp index 601263e42388..c62abae9a1ad 100644 --- a/src/compiler/glsl/builtin_functions.cpp +++ b/src/compiler/glsl/builtin_functions.cpp @@ -5559,7 +5559,7 @@ builtin_builder::_isnan(builtin_available_predicate avail, const glsl_type *type ir_variable *x = in_var(type, "x"); MAKE_SIG(glsl_type::bvec(type->vector_elements), avail, 1, x); - body.emit(ret(nequal(x, x))); + body.emit(ret(nequal_exact(x, x))); return sig; } @@ -5584,7 +5584,7 @@ builtin_builder::_isinf(builtin_available_predicate avail, const glsl_type *type } } - body.emit(ret(equal(abs(x), imm(type, infinities)))); + body.emit(ret(equal_exact(abs(x), imm(type, infinities)))); return sig; } diff --git a/src/compiler/glsl/glsl_to_nir.cpp b/src/compiler/glsl/glsl_to_nir.cpp index aa226643dc49..c23e71f3e838 100644 --- a/src/compiler/glsl/glsl_to_nir.cpp +++ b/src/compiler/glsl/glsl_to_nir.cpp @@ -2219,17 +2219,27 @@ nir_visitor::visit(ir_expression *ir) result = nir_uge(&b, srcs[0], srcs[1]); break; case ir_binop_equal: + case ir_binop_equal_exact: { + bool save_exact = b.exact; + b.exact |= ir->operation == ir_binop_equal_exact; if (type_is_float(types[0])) result = nir_feq(&b, srcs[0], srcs[1]); else result = nir_ieq(&b, srcs[0], srcs[1]); + b.exact = save_exact; break; + } case ir_binop_nequal: + case ir_binop_nequal_exact: { + bool save_exact = b.exact; + b.exact |= ir->operation == ir_binop_nequal_exact; if (type_is_float(types[0])) result = nir_fneu(&b, srcs[0], srcs[1]); else result = nir_ine(&b, srcs[0], srcs[1]); + b.exact = save_exact; break; + } case ir_binop_all_equal: if (type_is_float(types[0])) { switch (ir->operands[0]->type->vector_elements) { diff --git a/src/compiler/glsl/ir.cpp b/src/compiler/glsl/ir.cpp index f9e9c321db4f..83e3073ed857 100644 --- a/src/compiler/glsl/ir.cpp +++ b/src/compiler/glsl/ir.cpp @@ -529,6 +529,8 @@ ir_expression::ir_expression(int op, ir_rvalue *op0, ir_rvalue *op1) case ir_binop_equal: case ir_binop_nequal: + case ir_binop_equal_exact: + case ir_binop_nequal_exact: case ir_binop_gequal: case ir_binop_less: assert(op0->type == op1->type); diff --git a/src/compiler/glsl/ir_builder.cpp b/src/compiler/glsl/ir_builder.cpp index 416d8c71ed1c..003094721c56 100644 --- a/src/compiler/glsl/ir_builder.cpp +++ b/src/compiler/glsl/ir_builder.cpp @@ -362,6 +362,18 @@ nequal(operand a, operand b) return expr(ir_binop_nequal, a, b); } +ir_expression* +equal_exact(operand a, operand b) +{ + return expr(ir_binop_equal_exact, a, b); +} + +ir_expression* +nequal_exact(operand a, operand b) +{ + return expr(ir_binop_nequal_exact, a, b); +} + ir_expression* less(operand a, operand b) { diff --git a/src/compiler/glsl/ir_builder.h b/src/compiler/glsl/ir_builder.h index 9309039f9dea..04ed226d4dc7 100644 --- a/src/compiler/glsl/ir_builder.h +++ b/src/compiler/glsl/ir_builder.h @@ -160,6 +160,8 @@ ir_expression *sign(operand a); ir_expression *subr_to_int(operand a); ir_expression *equal(operand a, operand b); ir_expression *nequal(operand a, operand b); +ir_expression *equal_exact(operand a, operand b); +ir_expression *nequal_exact(operand a, operand b); ir_expression *less(operand a, operand b); ir_expression *greater(operand a, operand b); ir_expression *lequal(operand a, operand b); diff --git a/src/compiler/glsl/ir_expression_operation.py b/src/compiler/glsl/ir_expression_operation.py index c9f9831c346d..f8ad5d92ca95 100644 --- a/src/compiler/glsl/ir_expression_operation.py +++ b/src/compiler/glsl/ir_expression_operation.py @@ -653,6 +653,9 @@ ir_expression_operation = [ operation("gequal", 2, printable_name=">=", source_types=numeric_types, dest_type=bool_type, c_expression="{src0} >= {src1}"), operation("equal", 2, printable_name="==", source_types=all_types, dest_type=bool_type, c_expression="{src0} == {src1}"), operation("nequal", 2, printable_name="!=", source_types=all_types, dest_type=bool_type, c_expression="{src0} != {src1}"), + # Unlike the non-_exact versions, these should not be optimized in a way which assumes sources are not NaN. + operation("equal_exact", 2, source_types=real_types, dest_type=bool_type, c_expression="{src0} == {src1}"), + operation("nequal_exact", 2, source_types=real_types, dest_type=bool_type, c_expression="{src0} != {src1}"), # Returns single boolean for whether all components of operands[0] # equal the components of operands[1]. diff --git a/src/compiler/glsl/ir_validate.cpp b/src/compiler/glsl/ir_validate.cpp index c60c36cd2608..f15a149215b9 100644 --- a/src/compiler/glsl/ir_validate.cpp +++ b/src/compiler/glsl/ir_validate.cpp @@ -780,6 +780,8 @@ ir_validate::visit_leave(ir_expression *ir) case ir_binop_gequal: case ir_binop_equal: case ir_binop_nequal: + case ir_binop_equal_exact: + case ir_binop_nequal_exact: /* The semantics of the IR operators differ from the GLSL <, >, <=, >=, * ==, and != operators. The IR operators perform a component-wise * comparison on scalar or vector types and return a boolean scalar or diff --git a/src/compiler/glsl/opt_algebraic.cpp b/src/compiler/glsl/opt_algebraic.cpp index 7cef4fc6ef93..d8030953b96d 100644 --- a/src/compiler/glsl/opt_algebraic.cpp +++ b/src/compiler/glsl/opt_algebraic.cpp @@ -442,6 +442,8 @@ ir_algebraic_visitor::handle_expression(ir_expression *ir) case ir_binop_gequal: new_op = ir_binop_less; break; case ir_binop_equal: new_op = ir_binop_nequal; break; case ir_binop_nequal: new_op = ir_binop_equal; break; + case ir_binop_equal_exact: new_op = ir_binop_nequal_exact; break; + case ir_binop_nequal_exact: new_op = ir_binop_equal_exact; break; case ir_binop_all_equal: new_op = ir_binop_any_nequal; break; case ir_binop_any_nequal: new_op = ir_binop_all_equal; break; diff --git a/src/mesa/program/ir_to_mesa.cpp b/src/mesa/program/ir_to_mesa.cpp index 8c2dc5214436..01b7b55d4cab 100644 --- a/src/mesa/program/ir_to_mesa.cpp +++ b/src/mesa/program/ir_to_mesa.cpp @@ -1057,9 +1057,11 @@ ir_to_mesa_visitor::visit(ir_expression *ir) emit(ir, OPCODE_SGE, result_dst, op[0], op[1]); break; case ir_binop_equal: + case ir_binop_equal_exact: emit_seq(ir, result_dst, op[0], op[1]); break; case ir_binop_nequal: + case ir_binop_nequal_exact: emit_sne(ir, result_dst, op[0], op[1]); break; case ir_binop_all_equal: diff --git a/src/mesa/state_tracker/st_glsl_to_tgsi.cpp b/src/mesa/state_tracker/st_glsl_to_tgsi.cpp index 3ecdb8374b06..2f6fb8fd546d 100644 --- a/src/mesa/state_tracker/st_glsl_to_tgsi.cpp +++ b/src/mesa/state_tracker/st_glsl_to_tgsi.cpp @@ -1678,9 +1678,11 @@ glsl_to_tgsi_visitor::visit_expression(ir_expression* ir, st_src_reg *op) emit_asm(ir, TGSI_OPCODE_SGE, result_dst, op[0], op[1]); break; case ir_binop_equal: + case ir_binop_equal_exact: emit_asm(ir, TGSI_OPCODE_SEQ, result_dst, op[0], op[1]); break; case ir_binop_nequal: + case ir_binop_nequal_exact: emit_asm(ir, TGSI_OPCODE_SNE, result_dst, op[0], op[1]); break; case ir_binop_all_equal: -- GitLab From 42f7d1392195bc9aedeed9a3283a5b3da15d09f5 Mon Sep 17 00:00:00 2001 From: Rhys Perry Date: Wed, 3 Mar 2021 19:05:21 +0000 Subject: [PATCH 03/10] nir: constify search expression helpers and nir_ssa_def_bits_used Signed-off-by: Rhys Perry Reviewed-by: Ian Romanick Part-of: --- src/compiler/nir/nir_range_analysis.c | 8 ++++---- src/compiler/nir/nir_range_analysis.h | 2 +- src/compiler/nir/nir_search.h | 2 +- src/compiler/nir/nir_search_helpers.h | 18 +++++++++--------- 4 files changed, 15 insertions(+), 15 deletions(-) diff --git a/src/compiler/nir/nir_range_analysis.c b/src/compiler/nir/nir_range_analysis.c index 4e37881526f5..f8ce08acd2a5 100644 --- a/src/compiler/nir/nir_range_analysis.c +++ b/src/compiler/nir/nir_range_analysis.c @@ -1646,7 +1646,7 @@ nir_addition_might_overflow(nir_shader *shader, struct hash_table *range_ht, } static uint64_t -ssa_def_bits_used(nir_ssa_def *def, int recur) +ssa_def_bits_used(const nir_ssa_def *def, int recur) { uint64_t bits_used = 0; uint64_t all_bits = BITFIELD64_MASK(def->bit_size); @@ -1767,7 +1767,7 @@ ssa_def_bits_used(nir_ssa_def *def, int recur) } case nir_instr_type_intrinsic: { - nir_intrinsic_instr *use_intrin = + const nir_intrinsic_instr *use_intrin = nir_instr_as_intrinsic(src->parent_instr); unsigned src_idx = src - use_intrin->src; @@ -1820,7 +1820,7 @@ ssa_def_bits_used(nir_ssa_def *def, int recur) } case nir_instr_type_phi: { - nir_phi_instr *use_phi = nir_instr_as_phi(src->parent_instr); + const nir_phi_instr *use_phi = nir_instr_as_phi(src->parent_instr); bits_used |= ssa_def_bits_used(&use_phi->dest.ssa, recur); break; } @@ -1839,7 +1839,7 @@ ssa_def_bits_used(nir_ssa_def *def, int recur) } uint64_t -nir_ssa_def_bits_used(nir_ssa_def *def) +nir_ssa_def_bits_used(const nir_ssa_def *def) { return ssa_def_bits_used(def, 2); } diff --git a/src/compiler/nir/nir_range_analysis.h b/src/compiler/nir/nir_range_analysis.h index e30910d24112..7414e1a8e298 100644 --- a/src/compiler/nir/nir_range_analysis.h +++ b/src/compiler/nir/nir_range_analysis.h @@ -55,7 +55,7 @@ extern struct ssa_result_range nir_analyze_range(struct hash_table *range_ht, const nir_alu_instr *instr, unsigned src); -uint64_t nir_ssa_def_bits_used(nir_ssa_def *def); +uint64_t nir_ssa_def_bits_used(const nir_ssa_def *def); #ifdef __cplusplus } diff --git a/src/compiler/nir/nir_search.h b/src/compiler/nir/nir_search.h index aca3f46da186..28abfa821fff 100644 --- a/src/compiler/nir/nir_search.h +++ b/src/compiler/nir/nir_search.h @@ -165,7 +165,7 @@ typedef struct { * typically used to match an expressions uses such as the number of times * the expression is used, and whether its used by an if. */ - bool (*cond)(nir_alu_instr *instr); + bool (*cond)(const nir_alu_instr *instr); } nir_search_expression; struct per_op_table { diff --git a/src/compiler/nir/nir_search_helpers.h b/src/compiler/nir/nir_search_helpers.h index 24938484377b..5b2ebbd7a5d1 100644 --- a/src/compiler/nir/nir_search_helpers.h +++ b/src/compiler/nir/nir_search_helpers.h @@ -271,7 +271,7 @@ is_not_const_and_not_fsign(struct hash_table *ht, const nir_alu_instr *instr, } static inline bool -is_used_once(nir_alu_instr *instr) +is_used_once(const nir_alu_instr *instr) { bool zero_if_use = list_is_empty(&instr->dest.dest.ssa.if_uses); bool zero_use = list_is_empty(&instr->dest.dest.ssa.uses); @@ -293,19 +293,19 @@ is_used_once(nir_alu_instr *instr) } static inline bool -is_used_by_if(nir_alu_instr *instr) +is_used_by_if(const nir_alu_instr *instr) { return !list_is_empty(&instr->dest.dest.ssa.if_uses); } static inline bool -is_not_used_by_if(nir_alu_instr *instr) +is_not_used_by_if(const nir_alu_instr *instr) { return list_is_empty(&instr->dest.dest.ssa.if_uses); } static inline bool -is_used_by_non_fsat(nir_alu_instr *instr) +is_used_by_non_fsat(const nir_alu_instr *instr) { nir_foreach_use(src, &instr->dest.dest.ssa) { const nir_instr *const user_instr = src->parent_instr; @@ -324,7 +324,7 @@ is_used_by_non_fsat(nir_alu_instr *instr) } static inline bool -is_only_used_as_float(nir_alu_instr *instr) +is_only_used_as_float(const nir_alu_instr *instr) { nir_foreach_use(src, &instr->dest.dest.ssa) { const nir_instr *const user_instr = src->parent_instr; @@ -343,13 +343,13 @@ is_only_used_as_float(nir_alu_instr *instr) } static inline bool -only_lower_8_bits_used(nir_alu_instr *instr) +only_lower_8_bits_used(const nir_alu_instr *instr) { return (nir_ssa_def_bits_used(&instr->dest.dest.ssa) & ~0xffull) == 0; } static inline bool -only_lower_16_bits_used(nir_alu_instr *instr) +only_lower_16_bits_used(const nir_alu_instr *instr) { return (nir_ssa_def_bits_used(&instr->dest.dest.ssa) & ~0xffffull) == 0; } @@ -403,13 +403,13 @@ is_lower_half_zero(UNUSED struct hash_table *ht, const nir_alu_instr *instr, } static inline bool -no_signed_wrap(nir_alu_instr *instr) +no_signed_wrap(const nir_alu_instr *instr) { return instr->no_signed_wrap; } static inline bool -no_unsigned_wrap(nir_alu_instr *instr) +no_unsigned_wrap(const nir_alu_instr *instr) { return instr->no_unsigned_wrap; } -- GitLab From 661b51e229d943b23e73ab9732fc018e4dfb2601 Mon Sep 17 00:00:00 2001 From: Rhys Perry Date: Mon, 15 Feb 2021 16:10:55 +0000 Subject: [PATCH 04/10] nir/search: add wrappers to replace inexact By separating "inexact" into "imprecise" and "unsafe", we can enable imprecise optimizations for shaders which use float controls. Signed-off-by: Rhys Perry Reviewed-by: Ian Romanick Part-of: --- src/compiler/nir/nir_algebraic.py | 53 +++++++++++++++++++++++---- src/compiler/nir/nir_opt_algebraic.py | 2 + src/compiler/nir/nir_search.c | 10 +++-- src/compiler/nir/nir_search.h | 8 +++- 4 files changed, 61 insertions(+), 12 deletions(-) diff --git a/src/compiler/nir/nir_algebraic.py b/src/compiler/nir/nir_algebraic.py index 37fefd87a6b9..204e1086e28e 100644 --- a/src/compiler/nir/nir_algebraic.py +++ b/src/compiler/nir/nir_algebraic.py @@ -200,7 +200,9 @@ class Value(object): ${val.cond if val.cond else 'NULL'}, ${val.swizzle()}, % elif isinstance(val, Expression): - ${'true' if val.inexact else 'false'}, ${'true' if val.exact else 'false'}, + ${'true' if val.unsafe else 'false'}, + ${'true' if val.imprecise else 'false'}, + ${'true' if val.exact else 'false'}, ${val.comm_expr_idx}, ${val.comm_exprs}, ${val.c_opcode()}, { ${', '.join(src.c_value_ptr(cache) for src in val.sources)} }, @@ -359,22 +361,39 @@ class Variable(Value): _opcode_re = re.compile(r"(?P~)?(?P!)?(?P\w+)(?:@(?P\d+))?" r"(?P\([^\)]+\))?") +class SearchExpression(object): + def __init__(self, expr): + self.opcode = expr[0] + self.sources = expr[1:] + self.unsafe = False + self.imprecise = False + + @staticmethod + def create(val): + if isinstance(val, tuple): + return SearchExpression(val) + else: + assert(isinstance(val, SearchExpression)) + return val + class Expression(Value): def __init__(self, expr, name_base, varset): Value.__init__(self, expr, name_base, "expression") - assert isinstance(expr, tuple) - m = _opcode_re.match(expr[0]) + expr = SearchExpression.create(expr) + + m = _opcode_re.match(expr.opcode) assert m and m.group('opcode') is not None self.opcode = m.group('opcode') self._bit_size = int(m.group('bits')) if m.group('bits') else None - self.inexact = m.group('inexact') is not None + self.unsafe = (m.group('inexact') is not None) or expr.unsafe + self.imprecise = (m.group('inexact') is not None) or expr.imprecise self.exact = m.group('exact') is not None self.cond = m.group('cond') - assert not self.inexact or not self.exact, \ - 'Expression cannot be both exact and inexact.' + assert not (self.unsafe or self.imprecise) or not self.exact, \ + 'Expression cannot be both exact and unsafe/imprecise.' # "many-comm-expr" isn't really a condition. It's notification to the # generator that this pattern is known to have too many commutative @@ -390,7 +409,7 @@ class Expression(Value): self.many_commutative_expressions = True self.sources = [ Value.create(src, "{0}_{1}".format(name_base, i), varset) - for (i, src) in enumerate(expr[1:]) ] + for (i, src) in enumerate(expr.sources) ] # nir_search_expression::srcs is hard-coded to 4 assert len(self.sources) <= 4 @@ -1222,3 +1241,23 @@ class AlgebraicPass(object): automaton=self.automaton, get_c_opcode=get_c_opcode, itertools=itertools) + +# Don't match precise/invariant expressions +def imprecise(*expr): + expr = SearchExpression.create(expr) + expr.imprecise = True + return expr + +# Don't match if NaN/Inf/-0.0 guarantees are required. +def unsafe(*expr): + expr = SearchExpression.create(expr) + expr.unsafe = True + return expr + +# Disable the optimization for precise/invariant expressions or if NaN/Inf/-0.0 +# guarantees are required. +def unsafe_imprecise(*expr): + expr = SearchExpression.create(expr) + expr.unsafe = True + expr.imprecise = True + return expr diff --git a/src/compiler/nir/nir_opt_algebraic.py b/src/compiler/nir/nir_opt_algebraic.py index fb6afb2f60b2..a32276a26ac9 100644 --- a/src/compiler/nir/nir_opt_algebraic.py +++ b/src/compiler/nir/nir_opt_algebraic.py @@ -43,6 +43,8 @@ e = 'e' signed_zero_inf_nan_preserve_16 = 'nir_is_float_control_signed_zero_inf_nan_preserve(info->float_controls_execution_mode, 16)' signed_zero_inf_nan_preserve_32 = 'nir_is_float_control_signed_zero_inf_nan_preserve(info->float_controls_execution_mode, 32)' +imprecise = nir_algebraic.imprecise + # Written in the form (, ) where is an expression # and is either an expression or a value. An expression is # defined as a tuple of the form ([~], , , , ) diff --git a/src/compiler/nir/nir_search.c b/src/compiler/nir/nir_search.c index 437a24b9b02b..77d0fbdafff4 100644 --- a/src/compiler/nir/nir_search.c +++ b/src/compiler/nir/nir_search.c @@ -405,7 +405,7 @@ match_expression(const nir_search_expression *expr, nir_alu_instr *instr, instr->dest.dest.ssa.bit_size != expr->value.bit_size) return false; - state->inexact_match = expr->inexact || state->inexact_match; + state->inexact_match = expr->imprecise || state->inexact_match; state->has_exact_alu = instr->exact || state->has_exact_alu; if (state->inexact_match && state->has_exact_alu) return false; @@ -612,8 +612,10 @@ UNUSED static void dump_value(const nir_search_value *val) case nir_search_value_expression: { const nir_search_expression *expr = nir_search_value_as_expression(val); fprintf(stderr, "("); - if (expr->inexact) + if (expr->unsafe) fprintf(stderr, "~"); + if (expr->imprecise || expr->exact) + fprintf(stderr, "!"); switch (expr->opcode) { #define CASE(n) \ case nir_search_op_##n: fprintf(stderr, #n); break; @@ -877,7 +879,7 @@ nir_algebraic_instr(nir_builder *build, nir_instr *instr, unsigned bit_size = alu->dest.dest.ssa.bit_size; const unsigned execution_mode = build->shader->info.float_controls_execution_mode; - const bool ignore_inexact = + const bool ignore_unsafe = nir_is_float_control_signed_zero_inf_nan_preserve(execution_mode, bit_size) || nir_is_denorm_flush_to_zero(execution_mode, bit_size); @@ -886,7 +888,7 @@ nir_algebraic_instr(nir_builder *build, nir_instr *instr, for (uint16_t i = 0; i < transform_counts[xform_idx]; i++) { const struct transform *xform = &transforms[xform_idx][i]; if (condition_flags[xform->condition_offset] && - !(xform->search->inexact && ignore_inexact) && + !(xform->search->unsafe && ignore_unsafe) && nir_replace_instr(build, alu, range_ht, states, pass_op_table, xform->search, xform->replace, worklist)) { _mesa_hash_table_clear(range_ht, NULL); diff --git a/src/compiler/nir/nir_search.h b/src/compiler/nir/nir_search.h index 28abfa821fff..ef6fa32d0589 100644 --- a/src/compiler/nir/nir_search.h +++ b/src/compiler/nir/nir_search.h @@ -134,11 +134,17 @@ uint16_t nir_search_op_for_nir_op(nir_op op); typedef struct { nir_search_value value; + /* When set on a search expression, the expression will only match for + * shaders which do not require that denormals are flushed or -0.0/inf/NaN + * are preserved. + */ + bool unsafe; + /* When set on a search expression, the expression will only match an SSA * value that does *not* have the exact bit set. If unset, the exact bit * on the SSA value is ignored. */ - bool inexact; + bool imprecise; /** In a replacement, requests that the instruction be marked exact. */ bool exact; -- GitLab From 2289056921456bbb147b51cfbdc3889a6cbcedb7 Mon Sep 17 00:00:00 2001 From: Rhys Perry Date: Fri, 4 Sep 2020 13:30:39 +0100 Subject: [PATCH 05/10] nir: add fcanonicalize Signed-off-by: Rhys Perry Reviewed-by: Ian Romanick Part-of: --- src/compiler/nir/nir.h | 5 +++ src/compiler/nir/nir_opcodes.py | 5 +++ src/compiler/nir/nir_opt_algebraic.py | 45 +++++++++++++++++++++++++-- src/compiler/nir/nir_search_helpers.h | 14 +++++++++ 4 files changed, 66 insertions(+), 3 deletions(-) diff --git a/src/compiler/nir/nir.h b/src/compiler/nir/nir.h index 167806a32bba..bf60ced5f6e4 100644 --- a/src/compiler/nir/nir.h +++ b/src/compiler/nir/nir.h @@ -3670,6 +3670,11 @@ typedef struct nir_shader_compiler_options { * for rect texture lowering. */ bool has_txs; + /** Backend supports fcanonicalize, if not set fcanonicalize will be lowered + * to fmul(a, 1.0) + */ + bool has_fcanonicalize; + /* Whether to generate only scoped_barrier intrinsics instead of the set of * memory and control barrier intrinsics based on GLSL. */ diff --git a/src/compiler/nir/nir_opcodes.py b/src/compiler/nir/nir_opcodes.py index 17a1508c4d5a..c337b5c708f1 100644 --- a/src/compiler/nir/nir_opcodes.py +++ b/src/compiler/nir/nir_opcodes.py @@ -193,6 +193,11 @@ def unop_numeric_convert(name, out_type, in_type, const_expr): opcode(name, 0, out_type, [0], [in_type], True, "", const_expr) unop("mov", tuint, "src0") +# Flush subnormal values to zero with the same sign if required by +# the float controls execution mode. See +# https://llvm.org/docs/LangRef.html#llvm-canonicalize-intrinsic +# for more details. +unop("fcanonicalize", tfloat, "src0") unop("ineg", tint, "-src0") unop("fneg", tfloat, "-src0") diff --git a/src/compiler/nir/nir_opt_algebraic.py b/src/compiler/nir/nir_opt_algebraic.py index a32276a26ac9..393355f40c8b 100644 --- a/src/compiler/nir/nir_opt_algebraic.py +++ b/src/compiler/nir/nir_opt_algebraic.py @@ -40,6 +40,14 @@ c = 'c' d = 'd' e = 'e' +denorm_ftz_16 = 'nir_is_denorm_flush_to_zero(info->float_controls_execution_mode, 16)' +denorm_ftz_32 = 'nir_is_denorm_flush_to_zero(info->float_controls_execution_mode, 32)' +denorm_ftz_64 = 'nir_is_denorm_flush_to_zero(info->float_controls_execution_mode, 64)' + +denorm_preserve_16 = 'nir_is_denorm_preserve(info->float_controls_execution_mode, 16)' +denorm_preserve_32 = 'nir_is_denorm_preserve(info->float_controls_execution_mode, 32)' +denorm_preserve_64 = 'nir_is_denorm_preserve(info->float_controls_execution_mode, 64)' + signed_zero_inf_nan_preserve_16 = 'nir_is_float_control_signed_zero_inf_nan_preserve(info->float_controls_execution_mode, 16)' signed_zero_inf_nan_preserve_32 = 'nir_is_float_control_signed_zero_inf_nan_preserve(info->float_controls_execution_mode, 32)' @@ -94,6 +102,26 @@ def lowered_sincos(c): def intBitsToFloat(i): return struct.unpack('!f', struct.pack('!I', i))[0] +optimize_fcanonicalize = [ + # Eliminate all fcanonicalize if we are required to not flush denormals. + (('fcanonicalize', 'a@16'), a, denorm_preserve_16), + (('fcanonicalize', 'a@32'), a, denorm_preserve_32), + (('fcanonicalize', 'a@64'), a, denorm_preserve_64), + + # Eliminate inexact fcanonicalize if we are not required to flush denormals. + (imprecise('fcanonicalize', 'a@16'), a, '!'+denorm_ftz_16), + (imprecise('fcanonicalize', 'a@32'), a, '!'+denorm_ftz_32), + (imprecise('fcanonicalize', 'a@64'), a, '!'+denorm_ftz_64), + + # If denormals are required to be flushed or it's exact, we can still + # eliminate it if any denormals are already flushed or will be flushed. + (('fcanonicalize(is_only_used_as_float)', a), a), + (('fcanonicalize', 'a(is_created_as_float)'), a), + + # Integral numbers are not denormal. + (('fcanonicalize', 'a(is_integral)'), a), +] + optimizations = [ (('imul', a, '#b(is_pos_power_of_two)'), ('ishl', a, ('find_lsb', b)), '!options->lower_bitops'), @@ -276,6 +304,8 @@ optimizations.extend([ (('ishl', ('imul', a, '#b'), '#c'), ('imul', a, ('ishl', b, c))), ]) +optimizations.extend(optimize_fcanonicalize) + # Care must be taken here. Shifts in NIR uses only the lower log2(bitsize) # bits of the second source. These replacements must correctly handle the # case where (b % bitsize) + (c % bitsize) >= bitsize. @@ -1153,7 +1183,7 @@ optimizations.extend([ # Conversions from float32 to float64 and back can be removed as long as # it doesn't need to be precise, since the conversion may e.g. flush denorms - (('~f2f32', ('f2f64', 'a@32')), a), + (('f2f32', ('f2f64', 'a@32')), a), (('ffloor', 'a(is_integral)'), a), (('fceil', 'a(is_integral)'), a), @@ -2129,12 +2159,12 @@ for op in ['fpow']: (('bcsel', a, (op, b, c), (op + '(is_used_once)', d, c)), (op, ('bcsel', a, b, d), c)), ] -for op in ['frcp', 'frsq', 'fsqrt', 'fexp2', 'flog2', 'fsign', 'fsin', 'fcos', 'fneg', 'fabs', 'fsign']: +for op in ['frcp', 'frsq', 'fsqrt', 'fexp2', 'flog2', 'fsign', 'fsin', 'fcos', 'fneg', 'fabs', 'fsign', 'fcanonicalize']: optimizations += [ (('bcsel', c, (op + '(is_used_once)', a), (op + '(is_used_once)', b)), (op, ('bcsel', c, a, b))), ] -for op in ['ineg', 'iabs', 'inot', 'isign']: +for op in ['ineg', 'iabs', 'inot', 'isign', 'fcanonicalize']: optimizations += [ ((op, ('bcsel', c, '#a', '#b')), ('bcsel', c, (op, a), (op, b))), ] @@ -2411,6 +2441,15 @@ late_optimizations = [ (('ushr', a, 0), a), ] +# late_optimizations has a fneg(fneg(a)) optimization that can create fcanonicalize +late_optimizations += optimize_fcanonicalize + +late_optimizations += [ + # If we can't eliminate it, lower it so that backends don't have to deal with + # it. + (('fcanonicalize', a), ('fmul', a, 1.0), '!options->has_fcanonicalize'), +] + # A few more extract cases we'd rather leave late for N in [16, 32]: aN = 'a@{0}'.format(N) diff --git a/src/compiler/nir/nir_search_helpers.h b/src/compiler/nir/nir_search_helpers.h index 5b2ebbd7a5d1..aae8b1ba2f86 100644 --- a/src/compiler/nir/nir_search_helpers.h +++ b/src/compiler/nir/nir_search_helpers.h @@ -270,6 +270,20 @@ is_not_const_and_not_fsign(struct hash_table *ht, const nir_alu_instr *instr, !is_fsign(instr, src, num_components, swizzle); } +static inline bool +is_created_as_float(struct hash_table *ht, const nir_alu_instr *instr, unsigned src, + UNUSED unsigned num_components, UNUSED const uint8_t *swizzle) +{ + nir_alu_instr *src_alu = + nir_src_as_alu_instr(instr->src[src].src); + + if (src_alu == NULL) + return false; + + nir_alu_type output_type = nir_op_infos[src_alu->op].output_type; + return nir_alu_type_get_base_type(output_type) == nir_type_float; +} + static inline bool is_used_once(const nir_alu_instr *instr) { -- GitLab From c817cd8cc1a707717107fd5620c9c152330d62af Mon Sep 17 00:00:00 2001 From: Rhys Perry Date: Fri, 4 Sep 2020 14:44:10 +0100 Subject: [PATCH 06/10] nir: add shader_info to search variable condition signature Signed-off-by: Rhys Perry Reviewed-by: Ian Romanick Part-of: --- src/compiler/nir/nir_opt_comparison_pre.c | 4 +- src/compiler/nir/nir_search.c | 4 +- src/compiler/nir/nir_search.h | 5 +- src/compiler/nir/nir_search_helpers.h | 113 +++++++++++---------- src/gallium/auxiliary/gallivm/lp_bld_nir.c | 4 +- 5 files changed, 71 insertions(+), 59 deletions(-) diff --git a/src/compiler/nir/nir_opt_comparison_pre.c b/src/compiler/nir/nir_opt_comparison_pre.c index ae35e2c8d098..734aeb94ccf4 100644 --- a/src/compiler/nir/nir_opt_comparison_pre.c +++ b/src/compiler/nir/nir_opt_comparison_pre.c @@ -325,8 +325,8 @@ comparison_pre_block(nir_block *block, struct block_queue *bq, nir_builder *bld) * and neither operand is immediate value 0, add it to the set. */ if (is_used_by_if(alu) && - is_not_const_zero(NULL, alu, 0, 1, swizzle) && - is_not_const_zero(NULL, alu, 1, 1, swizzle)) + is_not_const_zero(NULL, NULL, alu, 0, 1, swizzle) && + is_not_const_zero(NULL, NULL, alu, 1, 1, swizzle)) add_instruction_for_block(bi, alu); break; diff --git a/src/compiler/nir/nir_search.c b/src/compiler/nir/nir_search.c index 77d0fbdafff4..975828259276 100644 --- a/src/compiler/nir/nir_search.c +++ b/src/compiler/nir/nir_search.c @@ -46,6 +46,7 @@ struct match_state { nir_alu_src variables[NIR_SEARCH_MAX_VARIABLES]; struct hash_table *range_ht; + nir_shader *shader; }; static bool @@ -313,7 +314,7 @@ match_value(const nir_search_value *value, nir_alu_instr *instr, unsigned src, instr->src[src].src.ssa->parent_instr->type != nir_instr_type_load_const) return false; - if (var->cond && !var->cond(state->range_ht, instr, + if (var->cond && !var->cond(state->range_ht, &state->shader->info, instr, src, num_components, new_swizzle)) return false; @@ -706,6 +707,7 @@ nir_replace_instr(nir_builder *build, nir_alu_instr *instr, state.inexact_match = false; state.has_exact_alu = false; state.range_ht = range_ht; + state.shader = build->shader; state.pass_op_table = pass_op_table; STATIC_ASSERT(sizeof(state.comm_op_direction) * 8 >= NIR_SEARCH_MAX_COMM_OPS); diff --git a/src/compiler/nir/nir_search.h b/src/compiler/nir/nir_search.h index ef6fa32d0589..18d5feb6af5f 100644 --- a/src/compiler/nir/nir_search.h +++ b/src/compiler/nir/nir_search.h @@ -95,8 +95,9 @@ typedef struct { * variables to require, for example, power-of-two in order for the search * to match. */ - bool (*cond)(struct hash_table *range_ht, const nir_alu_instr *instr, - unsigned src, unsigned num_components, const uint8_t *swizzle); + bool (*cond)(struct hash_table *range_ht, const shader_info *info, + const nir_alu_instr *instr, unsigned src, + unsigned num_components, const uint8_t *swizzle); /** Swizzle (for replace only) */ uint8_t swizzle[NIR_MAX_VEC_COMPONENTS]; diff --git a/src/compiler/nir/nir_search_helpers.h b/src/compiler/nir/nir_search_helpers.h index aae8b1ba2f86..3ddbb42f348d 100644 --- a/src/compiler/nir/nir_search_helpers.h +++ b/src/compiler/nir/nir_search_helpers.h @@ -33,8 +33,8 @@ #include static inline bool -is_pos_power_of_two(UNUSED struct hash_table *ht, const nir_alu_instr *instr, - unsigned src, unsigned num_components, +is_pos_power_of_two(UNUSED struct hash_table *ht, UNUSED const shader_info *info, + const nir_alu_instr *instr, unsigned src, unsigned num_components, const uint8_t *swizzle) { /* only constant srcs: */ @@ -65,8 +65,8 @@ is_pos_power_of_two(UNUSED struct hash_table *ht, const nir_alu_instr *instr, } static inline bool -is_neg_power_of_two(UNUSED struct hash_table *ht, const nir_alu_instr *instr, - unsigned src, unsigned num_components, +is_neg_power_of_two(UNUSED struct hash_table *ht, UNUSED const shader_info *info, + const nir_alu_instr *instr, unsigned src, unsigned num_components, const uint8_t *swizzle) { /* only constant srcs: */ @@ -96,6 +96,7 @@ is_neg_power_of_two(UNUSED struct hash_table *ht, const nir_alu_instr *instr, #define MULTIPLE(test) \ static inline bool \ is_unsigned_multiple_of_ ## test(UNUSED struct hash_table *ht, \ + UNUSED const shader_info *info, \ const nir_alu_instr *instr, \ unsigned src, unsigned num_components, \ const uint8_t *swizzle) \ @@ -121,8 +122,8 @@ MULTIPLE(32) MULTIPLE(64) static inline bool -is_zero_to_one(UNUSED struct hash_table *ht, const nir_alu_instr *instr, - unsigned src, unsigned num_components, +is_zero_to_one(UNUSED struct hash_table *ht, UNUSED const shader_info *info, + const nir_alu_instr *instr, unsigned src, unsigned num_components, const uint8_t *swizzle) { /* only constant srcs: */ @@ -152,8 +153,8 @@ is_zero_to_one(UNUSED struct hash_table *ht, const nir_alu_instr *instr, * 1 while this function tests 0 < src < 1. */ static inline bool -is_gt_0_and_lt_1(UNUSED struct hash_table *ht, const nir_alu_instr *instr, - unsigned src, unsigned num_components, +is_gt_0_and_lt_1(UNUSED struct hash_table *ht, UNUSED const shader_info *info, + const nir_alu_instr *instr, unsigned src, unsigned num_components, const uint8_t *swizzle) { /* only constant srcs: */ @@ -177,8 +178,8 @@ is_gt_0_and_lt_1(UNUSED struct hash_table *ht, const nir_alu_instr *instr, } static inline bool -is_not_const_zero(UNUSED struct hash_table *ht, const nir_alu_instr *instr, - unsigned src, unsigned num_components, +is_not_const_zero(UNUSED struct hash_table *ht, UNUSED const shader_info *info, + const nir_alu_instr *instr, unsigned src, unsigned num_components, const uint8_t *swizzle) { if (nir_src_as_const_value(instr->src[src].src) == NULL) @@ -206,16 +207,17 @@ is_not_const_zero(UNUSED struct hash_table *ht, const nir_alu_instr *instr, } static inline bool -is_not_const(UNUSED struct hash_table *ht, const nir_alu_instr *instr, - unsigned src, UNUSED unsigned num_components, +is_not_const(UNUSED struct hash_table *ht, UNUSED const shader_info *info, + const nir_alu_instr *instr, unsigned src, UNUSED unsigned num_components, UNUSED const uint8_t *swizzle) { return !nir_src_is_const(instr->src[src].src); } static inline bool -is_not_fmul(struct hash_table *ht, const nir_alu_instr *instr, unsigned src, - UNUSED unsigned num_components, UNUSED const uint8_t *swizzle) +is_not_fmul(struct hash_table *ht, UNUSED const shader_info *info, + const nir_alu_instr *instr, unsigned src, UNUSED unsigned num_components, + UNUSED const uint8_t *swizzle) { nir_alu_instr *src_alu = nir_src_as_alu_instr(instr->src[src].src); @@ -224,14 +226,15 @@ is_not_fmul(struct hash_table *ht, const nir_alu_instr *instr, unsigned src, return true; if (src_alu->op == nir_op_fneg) - return is_not_fmul(ht, src_alu, 0, 0, NULL); + return is_not_fmul(ht, info, src_alu, 0, 0, NULL); return src_alu->op != nir_op_fmul; } static inline bool -is_fmul(struct hash_table *ht, const nir_alu_instr *instr, unsigned src, - UNUSED unsigned num_components, UNUSED const uint8_t *swizzle) +is_fmul(struct hash_table *ht, UNUSED const shader_info *info, + const nir_alu_instr *instr, unsigned src, UNUSED unsigned num_components, + UNUSED const uint8_t *swizzle) { nir_alu_instr *src_alu = nir_src_as_alu_instr(instr->src[src].src); @@ -240,7 +243,7 @@ is_fmul(struct hash_table *ht, const nir_alu_instr *instr, unsigned src, return false; if (src_alu->op == nir_op_fneg) - return is_fmul(ht, src_alu, 0, 0, NULL); + return is_fmul(ht, info, src_alu, 0, 0, NULL); return src_alu->op == nir_op_fmul; } @@ -262,16 +265,17 @@ is_fsign(const nir_alu_instr *instr, unsigned src, } static inline bool -is_not_const_and_not_fsign(struct hash_table *ht, const nir_alu_instr *instr, - unsigned src, unsigned num_components, - const uint8_t *swizzle) +is_not_const_and_not_fsign(struct hash_table *ht, UNUSED const shader_info *info, + const nir_alu_instr *instr, unsigned src, + unsigned num_components, const uint8_t *swizzle) { - return is_not_const(ht, instr, src, num_components, swizzle) && + return is_not_const(ht, info, instr, src, num_components, swizzle) && !is_fsign(instr, src, num_components, swizzle); } static inline bool -is_created_as_float(struct hash_table *ht, const nir_alu_instr *instr, unsigned src, +is_created_as_float(struct hash_table *ht, UNUSED const shader_info *info, + const nir_alu_instr *instr, unsigned src, UNUSED unsigned num_components, UNUSED const uint8_t *swizzle) { nir_alu_instr *src_alu = @@ -374,8 +378,8 @@ only_lower_16_bits_used(const nir_alu_instr *instr) * of all its components is zero. */ static inline bool -is_upper_half_zero(UNUSED struct hash_table *ht, const nir_alu_instr *instr, - unsigned src, unsigned num_components, +is_upper_half_zero(UNUSED struct hash_table *ht, UNUSED const shader_info *info, + const nir_alu_instr *instr, unsigned src, unsigned num_components, const uint8_t *swizzle) { if (nir_src_as_const_value(instr->src[src].src) == NULL) @@ -399,8 +403,8 @@ is_upper_half_zero(UNUSED struct hash_table *ht, const nir_alu_instr *instr, * of all its components is zero. */ static inline bool -is_lower_half_zero(UNUSED struct hash_table *ht, const nir_alu_instr *instr, - unsigned src, unsigned num_components, +is_lower_half_zero(UNUSED struct hash_table *ht, UNUSED const shader_info *info, + const nir_alu_instr *instr, unsigned src, unsigned num_components, const uint8_t *swizzle) { if (nir_src_as_const_value(instr->src[src].src) == NULL) @@ -429,8 +433,9 @@ no_unsigned_wrap(const nir_alu_instr *instr) } static inline bool -is_integral(struct hash_table *ht, const nir_alu_instr *instr, unsigned src, - UNUSED unsigned num_components, UNUSED const uint8_t *swizzle) +is_integral(struct hash_table *ht, UNUSED const shader_info *info, + const nir_alu_instr *instr, unsigned src, UNUSED unsigned num_components, + UNUSED const uint8_t *swizzle) { const struct ssa_result_range r = nir_analyze_range(ht, instr, src); @@ -441,9 +446,9 @@ is_integral(struct hash_table *ht, const nir_alu_instr *instr, unsigned src, * Is the value finite? */ static inline bool -is_finite(UNUSED struct hash_table *ht, const nir_alu_instr *instr, - unsigned src, UNUSED unsigned num_components, - UNUSED const uint8_t *swizzle) +is_finite(UNUSED struct hash_table *ht, UNUSED const shader_info *info, + const nir_alu_instr *instr, unsigned src, unsigned num_components, + const uint8_t *swizzle) { const struct ssa_result_range v = nir_analyze_range(ht, instr, src); @@ -453,17 +458,18 @@ is_finite(UNUSED struct hash_table *ht, const nir_alu_instr *instr, #define RELATION(r) \ static inline bool \ -is_ ## r (struct hash_table *ht, const nir_alu_instr *instr, \ - unsigned src, UNUSED unsigned num_components, \ - UNUSED const uint8_t *swizzle) \ +is_ ## r (struct hash_table *ht, UNUSED const shader_info *info, \ + const nir_alu_instr *instr, unsigned src, \ + UNUSED unsigned num_components, UNUSED const uint8_t *swizzle)\ { \ - const struct ssa_result_range v = nir_analyze_range(ht, instr, src); \ + const struct ssa_result_range v = nir_analyze_range(ht, instr, src); \ return v.range == r; \ } \ \ static inline bool \ -is_a_number_ ## r (struct hash_table *ht, const nir_alu_instr *instr, \ - unsigned src, UNUSED unsigned num_components, \ +is_a_number_ ## r (struct hash_table *ht, UNUSED const shader_info *info, \ + const nir_alu_instr *instr, unsigned src, \ + UNUSED unsigned num_components, \ UNUSED const uint8_t *swizzle) \ { \ const struct ssa_result_range v = nir_analyze_range(ht, instr, src); \ @@ -477,7 +483,8 @@ RELATION(ge_zero) RELATION(ne_zero) static inline bool -is_not_negative(struct hash_table *ht, const nir_alu_instr *instr, unsigned src, +is_not_negative(struct hash_table *ht, UNUSED const shader_info *info, + const nir_alu_instr *instr, unsigned src, UNUSED unsigned num_components, UNUSED const uint8_t *swizzle) { const struct ssa_result_range v = nir_analyze_range(ht, instr, src); @@ -485,8 +492,8 @@ is_not_negative(struct hash_table *ht, const nir_alu_instr *instr, unsigned src, } static inline bool -is_a_number_not_negative(struct hash_table *ht, const nir_alu_instr *instr, - unsigned src, UNUSED unsigned num_components, +is_a_number_not_negative(struct hash_table *ht, UNUSED const shader_info *info, + const nir_alu_instr *instr, unsigned src, UNUSED unsigned num_components, UNUSED const uint8_t *swizzle) { const struct ssa_result_range v = nir_analyze_range(ht, instr, src); @@ -496,17 +503,18 @@ is_a_number_not_negative(struct hash_table *ht, const nir_alu_instr *instr, static inline bool -is_not_positive(struct hash_table *ht, const nir_alu_instr *instr, unsigned src, - UNUSED unsigned num_components, UNUSED const uint8_t *swizzle) +is_not_positive(struct hash_table *ht, UNUSED const shader_info *info, + const nir_alu_instr *instr, unsigned src, UNUSED unsigned num_components, + UNUSED const uint8_t *swizzle) { const struct ssa_result_range v = nir_analyze_range(ht, instr, src); return v.range == le_zero || v.range == lt_zero || v.range == eq_zero; } static inline bool -is_a_number_not_positive(struct hash_table *ht, const nir_alu_instr *instr, - unsigned src, UNUSED unsigned num_components, - UNUSED const uint8_t *swizzle) +is_a_number_not_positive(struct hash_table *ht, UNUSED const shader_info *info, + const nir_alu_instr *instr, unsigned src, + UNUSED unsigned num_components, UNUSED const uint8_t *swizzle) { const struct ssa_result_range v = nir_analyze_range(ht, instr, src); return v.is_a_number && @@ -514,16 +522,17 @@ is_a_number_not_positive(struct hash_table *ht, const nir_alu_instr *instr, } static inline bool -is_not_zero(struct hash_table *ht, const nir_alu_instr *instr, unsigned src, - UNUSED unsigned num_components, UNUSED const uint8_t *swizzle) +is_not_zero(struct hash_table *ht, UNUSED const shader_info *info, + const nir_alu_instr *instr, unsigned src, UNUSED unsigned num_components, + UNUSED const uint8_t *swizzle) { const struct ssa_result_range v = nir_analyze_range(ht, instr, src); return v.range == lt_zero || v.range == gt_zero || v.range == ne_zero; } static inline bool -is_a_number_not_zero(struct hash_table *ht, const nir_alu_instr *instr, - unsigned src, UNUSED unsigned num_components, +is_a_number_not_zero(struct hash_table *ht, UNUSED const shader_info *info, + const nir_alu_instr *instr, unsigned src, UNUSED unsigned num_components, UNUSED const uint8_t *swizzle) { const struct ssa_result_range v = nir_analyze_range(ht, instr, src); @@ -532,8 +541,8 @@ is_a_number_not_zero(struct hash_table *ht, const nir_alu_instr *instr, } static inline bool -is_a_number(struct hash_table *ht, const nir_alu_instr *instr, unsigned src, - UNUSED unsigned num_components, UNUSED const uint8_t *swizzle) +is_a_number(struct hash_table *ht, UNUSED const shader_info *info, const nir_alu_instr *instr, + unsigned src, UNUSED unsigned num_components, UNUSED const uint8_t *swizzle) { const struct ssa_result_range v = nir_analyze_range(ht, instr, src); return v.is_a_number; diff --git a/src/gallium/auxiliary/gallivm/lp_bld_nir.c b/src/gallium/auxiliary/gallivm/lp_bld_nir.c index 47466b6bb0e9..69f5917b5fcf 100644 --- a/src/gallium/auxiliary/gallivm/lp_bld_nir.c +++ b/src/gallium/auxiliary/gallivm/lp_bld_nir.c @@ -691,11 +691,11 @@ static LLVMValueRef do_alu_action(struct lp_build_nir_context *bld_base, /* If one of the sources is known to be a number (i.e., not NaN), then * better code can be generated by passing that information along. */ - if (is_a_number(bld_base->range_ht, instr, 1, + if (is_a_number(bld_base->range_ht, &bld_base->shader->info, instr, 1, 0 /* unused num_components */, NULL /* unused swizzle */)) { minmax_nan = GALLIVM_NAN_RETURN_OTHER_SECOND_NONNAN; - } else if (is_a_number(bld_base->range_ht, instr, 0, + } else if (is_a_number(bld_base->range_ht, &bld_base->shader->info, instr, 0, 0 /* unused num_components */, NULL /* unused swizzle */)) { first = 1; -- GitLab From 2deed55f7c62f158c917c03e62c2c14fb3bbd51f Mon Sep 17 00:00:00 2001 From: Rhys Perry Date: Fri, 4 Sep 2020 14:34:59 +0100 Subject: [PATCH 07/10] nir: use float controls in is_finite, is_a_number and add helpers for -0.0 fossil-db (GFX10.3): Totals from 6914 (4.73% of 146267) affected shaders: VGPRs: 534816 -> 534800 (-0.00%) CodeSize: 67746152 -> 67760484 (+0.02%); split: -0.00%, +0.02% Instrs: 12914451 -> 12914101 (-0.00%); split: -0.00%, +0.00% Signed-off-by: Rhys Perry Reviewed-by: Ian Romanick Part-of: --- src/compiler/nir/nir_search_helpers.h | 97 ++++++++++++++++++++++----- 1 file changed, 82 insertions(+), 15 deletions(-) diff --git a/src/compiler/nir/nir_search_helpers.h b/src/compiler/nir/nir_search_helpers.h index 3ddbb42f348d..83f0196d6030 100644 --- a/src/compiler/nir/nir_search_helpers.h +++ b/src/compiler/nir/nir_search_helpers.h @@ -442,20 +442,6 @@ is_integral(struct hash_table *ht, UNUSED const shader_info *info, return r.is_integral; } -/** - * Is the value finite? - */ -static inline bool -is_finite(UNUSED struct hash_table *ht, UNUSED const shader_info *info, - const nir_alu_instr *instr, unsigned src, unsigned num_components, - const uint8_t *swizzle) -{ - const struct ssa_result_range v = nir_analyze_range(ht, instr, src); - - return v.is_finite; -} - - #define RELATION(r) \ static inline bool \ is_ ## r (struct hash_table *ht, UNUSED const shader_info *info, \ @@ -541,11 +527,92 @@ is_a_number_not_zero(struct hash_table *ht, UNUSED const shader_info *info, } static inline bool -is_a_number(struct hash_table *ht, UNUSED const shader_info *info, const nir_alu_instr *instr, +is_signed_zero_inf_nan_preserve_for_instr(const shader_info *info, + const nir_alu_instr *instr, + unsigned src) +{ + if (instr->exact) { + /* These instructions produce a non-NaN result for a NaN input. + * Additional care must be taken in these cases. + */ + if (instr->op == nir_op_fmin || instr->op == nir_op_fmax || + instr->op == nir_op_fsat || instr->op == nir_op_fsign || + nir_alu_instr_is_comparison(instr)) + return true; + } + + nir_ssa_def *def = instr->src[src].src.ssa; + uint16_t controls = info->float_controls_execution_mode; + return nir_is_float_control_signed_zero_inf_nan_preserve(controls, def->bit_size); +} + +/** + * Is the value finite and not NaN? + */ +static inline bool +is_finite(struct hash_table *ht, const shader_info *info, const nir_alu_instr *instr, + unsigned src, UNUSED unsigned num_components, UNUSED const uint8_t *swizzle) +{ + if (!is_signed_zero_inf_nan_preserve_for_instr(info, instr, src)) + return true; + + const struct ssa_result_range v = nir_analyze_range(ht, instr, src); + return v.is_finite; +} + +static inline bool +is_a_number(struct hash_table *ht, const shader_info *info, const nir_alu_instr *instr, unsigned src, UNUSED unsigned num_components, UNUSED const uint8_t *swizzle) { + if (!is_signed_zero_inf_nan_preserve_for_instr(info, instr, src)) + return true; + const struct ssa_result_range v = nir_analyze_range(ht, instr, src); return v.is_a_number; } +static inline bool +can_elim_negative_zero(struct hash_table *ht, const shader_info *info, + const nir_alu_instr *instr, unsigned src, + unsigned num_components, const uint8_t *swizzle) +{ + if (!is_signed_zero_inf_nan_preserve_for_instr(info, instr, src)) + return true; + + if (!nir_src_is_const(instr->src[src].src)) + return is_not_zero(ht, info, instr, src, num_components, swizzle); + + for (unsigned i = 0; i < num_components; i++) { + double val = nir_src_comp_as_float(instr->src[src].src, swizzle[i]); + if (val == 0.0 && signbit(val)) + return false; + } + + return true; +} + +static inline bool +is_finite_and_can_elim_neg_zero(struct hash_table *ht, const shader_info *info, + const nir_alu_instr *instr, unsigned src, + unsigned num_components, const uint8_t *swizzle) +{ + if (!is_signed_zero_inf_nan_preserve_for_instr(info, instr, src)) + return true; + + const struct ssa_result_range v = nir_analyze_range(ht, instr, src); + + bool neg_zero = false; + if (nir_src_is_const(instr->src[src].src)) { + for (unsigned i = 0; i < num_components; i++) { + double val = nir_src_comp_as_float(instr->src[src].src, swizzle[i]); + if (val == 0.0 && signbit(val)) + return false; + } + } else { + neg_zero = v.range != lt_zero && v.range != gt_zero && v.range != ne_zero; + } + + return !neg_zero && v.is_finite; +} + #endif /* _NIR_SEARCH_ */ -- GitLab From 24d126d7f9acac85e76057be5d621e0f4fd7a0e0 Mon Sep 17 00:00:00 2001 From: Rhys Perry Date: Thu, 4 Feb 2021 16:59:07 +0000 Subject: [PATCH 08/10] nir/algebraic: mark several optimizations as imprecise instead of inexact This enables the optimizations even if float controls are used. There are probably more. fossil-db (GFX10.3, dxvk float controls): Totals from 72756 (49.74% of 146267) affected shaders: VGPRs: 3249200 -> 3261712 (+0.39%); split: -0.09%, +0.48% SpillSGPRs: 12269 -> 12267 (-0.02%); split: -0.06%, +0.04% CodeSize: 260352544 -> 259045808 (-0.50%); split: -0.56%, +0.06% MaxWaves: 1730252 -> 1726434 (-0.22%); split: +0.04%, -0.26% Instrs: 49455234 -> 49061239 (-0.80%); split: -0.84%, +0.05% Signed-off-by: Rhys Perry Reviewed-by: Ian Romanick Part-of: --- src/compiler/nir/nir_opt_algebraic.py | 32 +++++++++++++-------------- 1 file changed, 16 insertions(+), 16 deletions(-) diff --git a/src/compiler/nir/nir_opt_algebraic.py b/src/compiler/nir/nir_opt_algebraic.py index 393355f40c8b..8a71963b5b85 100644 --- a/src/compiler/nir/nir_opt_algebraic.py +++ b/src/compiler/nir/nir_opt_algebraic.py @@ -172,7 +172,7 @@ optimizations = [ (('iadd', a, 0), a), (('usadd_4x8_vc4', a, 0), a), (('usadd_4x8_vc4', a, ~0), ~0), - (('~fadd', ('fmul', a, b), ('fmul', a, c)), ('fmul', a, ('fadd', b, c))), + (imprecise('fadd', ('fmul', a, b), ('fmul', a, c)), ('fmul', a, ('fadd', b, c))), (('iadd', ('imul', a, b), ('imul', a, c)), ('imul', a, ('iadd', b, c))), (('iand', ('ior', a, b), ('ior', a, c)), ('ior', a, ('iand', b, c))), (('ior', ('iand', a, b), ('iand', a, c)), ('iand', a, ('ior', b, c))), @@ -1071,12 +1071,12 @@ optimizations.extend([ (('~fexp2', ('fmul', ('flog2', a), 0.5)), ('fsqrt', a)), (('~fexp2', ('fmul', ('flog2', a), 2.0)), ('fmul', a, a)), (('~fexp2', ('fmul', ('flog2', a), 4.0)), ('fmul', ('fmul', a, a), ('fmul', a, a))), - (('~fpow', a, 1.0), a), - (('~fpow', a, 2.0), ('fmul', a, a)), - (('~fpow', a, 4.0), ('fmul', ('fmul', a, a), ('fmul', a, a))), - (('~fpow', 2.0, a), ('fexp2', a)), - (('~fpow', ('fpow', a, 2.2), 0.454545), a), - (('~fpow', ('fabs', ('fpow', a, 2.2)), 0.454545), ('fabs', a)), + (imprecise('fpow', a, 1.0), ('fcanonicalize', a)), + (imprecise('fpow', a, 2.0), ('fmul', a, a)), + (imprecise('fpow', a, 4.0), ('fmul', ('fmul', a, a), ('fmul', a, a))), + (imprecise('fpow', 2.0, a), ('fexp2', a)), + (imprecise('fpow', ('fpow', a, 2.2), 0.454545), ('fcanonicalize', a)), + (imprecise('fpow', ('fabs', ('fpow', a, 2.2)), 0.454545), ('fabs', a)), (('~fsqrt', ('fexp2', a)), ('fexp2', ('fmul', 0.5, a))), (('~frcp', ('fexp2', a)), ('fexp2', ('fneg', a))), (('~frsq', ('fexp2', a)), ('fexp2', ('fmul', -0.5, a))), @@ -1088,12 +1088,12 @@ optimizations.extend([ (('bcsel', ('flt', a, 0.0), 0.0, ('fsqrt', a)), ('fsqrt', ('fmax', a, 0.0))), (('~fmul', ('fsqrt', a), ('fsqrt', a)), ('fabs',a)), # Division and reciprocal - (('~fdiv', 1.0, a), ('frcp', a)), + (imprecise('fdiv', 1.0, a), ('frcp', a)), (('fdiv', a, b), ('fmul', a, ('frcp', b)), 'options->lower_fdiv'), (('~frcp', ('frcp', a)), a), - (('~frcp', ('fsqrt', a)), ('frsq', a)), + (imprecise('frcp', ('fsqrt', a)), ('frsq', a)), (('fsqrt', a), ('frcp', ('frsq', a)), 'options->lower_fsqrt'), - (('~frcp', ('frsq', a)), ('fsqrt', a), '!options->lower_fsqrt'), + (imprecise('frcp', ('frsq', a)), ('fsqrt', a), '!options->lower_fsqrt'), # Trig (('fsin', a), lowered_sincos(0.5), 'options->lower_sincos'), (('fcos', a), lowered_sincos(0.75), 'options->lower_sincos'), @@ -1373,20 +1373,20 @@ optimizations.extend([ (('imul', ('ineg', a), b), ('ineg', ('imul', a, b))), # Propagate constants up multiplication chains - (('~fmul(is_used_once)', ('fmul(is_used_once)', 'a(is_not_const)', 'b(is_not_const)'), '#c'), ('fmul', ('fmul', a, c), b)), + (imprecise('fmul(is_used_once)', ('fmul(is_used_once)', 'a(is_not_const)', 'b(is_not_const)'), '#c'), ('fmul', ('fmul', a, c), b)), (('imul(is_used_once)', ('imul(is_used_once)', 'a(is_not_const)', 'b(is_not_const)'), '#c'), ('imul', ('imul', a, c), b)), # Prefer moving out a multiplication for more MAD/FMA-friendly code - (('~fadd(is_used_once)', ('fadd(is_used_once)', 'a(is_not_const)', 'b(is_fmul)'), '#c'), ('fadd', ('fadd', a, c), b)), - (('~fadd(is_used_once)', ('fadd(is_used_once)', 'a(is_not_const)', 'b(is_not_const)'), '#c'), ('fadd', ('fadd', a, c), b)), + (imprecise('fadd(is_used_once)', ('fadd(is_used_once)', 'a(is_not_const)', 'b(is_fmul)'), '#c(is_finite)'), ('fadd', ('fadd', a, c), b)), + (imprecise('fadd(is_used_once)', ('fadd(is_used_once)', 'a(is_not_const)', 'b(is_not_const)'), '#c(is_finite)'), ('fadd', ('fadd', a, c), b)), (('iadd(is_used_once)', ('iadd(is_used_once)', 'a(is_not_const)', 'b(is_not_const)'), '#c'), ('iadd', ('iadd', a, c), b)), # Reassociate constants in add/mul chains so they can be folded together. # For now, we mostly only handle cases where the constants are separated by # a single non-constant. We could do better eventually. - (('~fmul', '#a', ('fmul', 'b(is_not_const)', '#c')), ('fmul', ('fmul', a, c), b)), + (imprecise('fmul', '#a', ('fmul', 'b(is_not_const)', '#c')), ('fmul', ('fmul', a, c), b)), (('imul', '#a', ('imul', 'b(is_not_const)', '#c')), ('imul', ('imul', a, c), b)), - (('~fadd', '#a', ('fadd', 'b(is_not_const)', '#c')), ('fadd', ('fadd', a, c), b)), - (('~fadd', '#a', ('fneg', ('fadd', 'b(is_not_const)', '#c'))), ('fadd', ('fadd', a, ('fneg', c)), ('fneg', b))), + (imprecise('fadd', '#a', ('fadd', 'b(is_not_const)', '#c')), ('fadd', ('fadd', a, c), b)), + (imprecise('fadd', '#a', ('fneg', ('fadd', 'b(is_not_const)', '#c'))), ('fadd', ('fadd', a, ('fneg', c)), ('fneg', b))), (('iadd', '#a', ('iadd', 'b(is_not_const)', '#c')), ('iadd', ('iadd', a, c), b)), (('iand', '#a', ('iand', 'b(is_not_const)', '#c')), ('iand', ('iand', a, c), b)), (('ior', '#a', ('ior', 'b(is_not_const)', '#c')), ('ior', ('ior', a, c), b)), -- GitLab From e6f340099f07f437a76cd3b27808c102198f3fc5 Mon Sep 17 00:00:00 2001 From: Rhys Perry Date: Wed, 24 Feb 2021 10:31:06 +0000 Subject: [PATCH 09/10] nir/algebraic: make several safe using fcanonicalize and search helpers fossil-db (GFX10.3): Totals from 1830 (1.25% of 146267) affected shaders: VGPRs: 124504 -> 124528 (+0.02%); split: -0.03%, +0.04% CodeSize: 14211268 -> 14205716 (-0.04%); split: -0.06%, +0.02% MaxWaves: 31346 -> 31316 (-0.10%); split: +0.03%, -0.13% Instrs: 2661768 -> 2660899 (-0.03%); split: -0.06%, +0.02% fossil-db (GFX10.3, dxvk float controls): Totals from 20972 (14.34% of 146267) affected shaders: VGPRs: 1321496 -> 1318680 (-0.21%); split: -0.34%, +0.12% CodeSize: 132952848 -> 131962576 (-0.74%); split: -0.75%, +0.00% MaxWaves: 375254 -> 377140 (+0.50%); split: +0.54%, -0.04% Instrs: 25243319 -> 25018928 (-0.89%); split: -0.90%, +0.01% Signed-off-by: Rhys Perry Reviewed-by: Ian Romanick Part-of: --- src/compiler/nir/nir_opt_algebraic.py | 52 ++++++++++----------------- 1 file changed, 18 insertions(+), 34 deletions(-) diff --git a/src/compiler/nir/nir_opt_algebraic.py b/src/compiler/nir/nir_opt_algebraic.py index 8a71963b5b85..3eb0e90f99c2 100644 --- a/src/compiler/nir/nir_opt_algebraic.py +++ b/src/compiler/nir/nir_opt_algebraic.py @@ -155,7 +155,7 @@ optimizations = [ '!options->lower_bitops'), (('irem', a, '#b(is_neg_power_of_two)'), ('irem', a, ('iabs', b)), '!options->lower_bitops'), - (('~fneg', ('fneg', a)), a), + (('fneg', ('fneg', a)), ('fcanonicalize', a)), (('ineg', ('ineg', a)), a), (('fabs', ('fneg', a)), ('fabs', a)), (('fabs', ('u2f', a)), ('u2f', a)), @@ -163,12 +163,7 @@ optimizations = [ (('iabs', ('ineg', a)), ('iabs', a)), (('f2b', ('fneg', a)), ('f2b', a)), (('i2b', ('ineg', a)), ('i2b', a)), - (('~fadd', a, 0.0), a), - # a+0.0 is 'a' unless 'a' is denormal or -0.0. If it's only used by a - # floating point instruction, they should flush any input denormals and we - # can replace -0.0 with 0.0 if the float execution mode allows it. - (('fadd(is_only_used_as_float)', 'a@16', 0.0), a, '!'+signed_zero_inf_nan_preserve_16), - (('fadd(is_only_used_as_float)', 'a@32', 0.0), a, '!'+signed_zero_inf_nan_preserve_32), + (('fadd', 'a(can_elim_negative_zero)', 0.0), ('fcanonicalize', a)), (('iadd', a, 0), a), (('usadd_4x8_vc4', a, 0), a), (('usadd_4x8_vc4', a, ~0), ~0), @@ -183,18 +178,11 @@ optimizations = [ (('~fadd', ('fneg', a), ('fadd', a, b)), b), (('~fadd', a, ('fadd', ('fneg', a), b)), b), (('fadd', ('fsat', a), ('fsat', ('fneg', a))), ('fsat', ('fabs', a))), - (('~fmul', a, 0.0), 0.0), - # The only effect a*0.0 should have is when 'a' is infinity, -0.0 or NaN - (('fmul', 'a@16', 0.0), 0.0, '!'+signed_zero_inf_nan_preserve_16), - (('fmul', 'a@32', 0.0), 0.0, '!'+signed_zero_inf_nan_preserve_32), + (('fmul', 'a(is_finite_and_can_elim_neg_zero)', 0.0), 0.0), (('imul', a, 0), 0), (('umul_unorm_4x8_vc4', a, 0), 0), (('umul_unorm_4x8_vc4', a, ~0), a), - (('~fmul', a, 1.0), a), - # The only effect a*1.0 can have is flushing denormals. If it's only used by - # a floating point instruction, they should flush any input denormals and - # this multiplication isn't needed. - (('fmul(is_only_used_as_float)', a, 1.0), a), + (('fmul', a, 1.0), ('fcanonicalize', a)), (('imul', a, 1), a), (('fmul', a, -1.0), ('fneg', a)), (('imul', a, -1), ('ineg', a)), @@ -204,16 +192,14 @@ optimizations = [ # If a != a: fsign(a)*a*a => 0*NaN*NaN => abs(NaN)*NaN (('fmul', ('fsign', a), ('fmul', a, a)), ('fmul', ('fabs', a), a)), (('fmul', ('fmul', ('fsign', a), a), a), ('fmul', ('fabs', a), a)), - (('~ffma', 0.0, a, b), b), - (('ffma@16(is_only_used_as_float)', 0.0, a, b), b, '!'+signed_zero_inf_nan_preserve_16), - (('ffma@32(is_only_used_as_float)', 0.0, a, b), b, '!'+signed_zero_inf_nan_preserve_32), + (('ffma', 0.0, 'a(is_finite_and_can_elim_neg_zero)', b), ('fcanonicalize', b)), (('~ffma', a, b, 0.0), ('fmul', a, b)), (('ffma@16', a, b, 0.0), ('fmul', a, b), '!'+signed_zero_inf_nan_preserve_16), (('ffma@32', a, b, 0.0), ('fmul', a, b), '!'+signed_zero_inf_nan_preserve_32), (('ffma', 1.0, a, b), ('fadd', a, b)), (('ffma', -1.0, a, b), ('fadd', ('fneg', a), b)), - (('~flrp', a, b, 0.0), a), - (('~flrp', a, b, 1.0), b), + (('flrp', a, 'b(is_finite)', 0.0), ('fcanonicalize', a)), + (('flrp', 'a(is_finite)', b, 1.0), ('fcanonicalize', b)), (('~flrp', a, a, b), a), (('~flrp', 0.0, a, b), ('fmul', a, b)), @@ -586,8 +572,8 @@ optimizations.extend([ (('bcsel', a, a, b), ('ior', a, b)), (('bcsel', a, b, False), ('iand', a, b)), (('bcsel', a, b, a), ('iand', a, b)), - (('~fmin', a, a), a), - (('~fmax', a, a), a), + (('fmin', a, a), ('fcanonicalize', a)), + (('fmax', a, a), ('fcanonicalize', a)), (('imin', a, a), a), (('imax', a, a), a), (('umin', a, a), a), @@ -635,9 +621,9 @@ optimizations.extend([ (('imin', a, ('ineg', a)), ('ineg', ('iabs', a))), (('fmin', a, ('fneg', ('fabs', a))), ('fneg', ('fabs', a))), (('imin', a, ('ineg', ('iabs', a))), ('ineg', ('iabs', a))), - (('~fmin', a, ('fabs', a)), a), + (('fmin', a, ('fabs', a)), ('fcanonicalize', a)), (('imin', a, ('iabs', a)), a), - (('~fmax', a, ('fneg', ('fabs', a))), a), + (('fmax', a, ('fneg', ('fabs', a))), ('fcanonicalize', a)), (('imax', a, ('ineg', ('iabs', a))), a), (('fmax', a, ('fabs', a)), ('fabs', a)), (('imax', a, ('iabs', a)), ('iabs', a)), @@ -1090,7 +1076,7 @@ optimizations.extend([ # Division and reciprocal (imprecise('fdiv', 1.0, a), ('frcp', a)), (('fdiv', a, b), ('fmul', a, ('frcp', b)), 'options->lower_fdiv'), - (('~frcp', ('frcp', a)), a), + (imprecise('frcp', ('frcp', a)), ('fcanonicalize', a)), (imprecise('frcp', ('fsqrt', a)), ('frsq', a)), (('fsqrt', a), ('frcp', ('frsq', a)), 'options->lower_fsqrt'), (imprecise('frcp', ('frsq', a)), ('fsqrt', a), '!options->lower_fsqrt'), @@ -1124,7 +1110,7 @@ optimizations.extend([ (('bcsel@64', a, -0.0, -1.0), ('fneg', ('b2f', ('inot', a))), '!(options->lower_doubles_options & nir_lower_fp64_full_software)'), (('bcsel', a, b, b), b), - (('~fcsel', a, b, b), b), + (('fcsel', a, b, b), ('fcanonicalize', b)), # D3D Boolean emulation (('bcsel', a, -1, 0), ('ineg', ('b2i', 'a@1'))), @@ -1152,7 +1138,7 @@ optimizations.extend([ (('inot', ('f2b1', a)), ('feq', a, 0.0)), # Conversions from 16 bits to 32 bits and back can always be removed - (('f2fmp', ('f2f32', 'a@16')), a), + (('f2fmp', ('f2f32', 'a@16')), ('fcanonicalize', a)), (('i2imp', ('i2i32', 'a@16')), a), (('i2imp', ('u2u32', 'a@16')), a), @@ -1172,7 +1158,7 @@ optimizations.extend([ # Conversions to 16 bits would be lossy so they should only be removed if # the instruction was generated by the precision lowering pass. - (('f2f32', ('f2fmp', 'a@32')), a), + (('f2f32', ('f2fmp', 'a@32')), ('fcanonicalize', a)), (('i2i32', ('i2imp', 'a@32')), a), (('u2u32', ('i2imp', 'a@32')), a), @@ -1181,16 +1167,14 @@ optimizations.extend([ (('f2f32', ('i2fmp', 'a@32')), ('i2f32', a)), (('f2f32', ('u2fmp', 'a@32')), ('u2f32', a)), - # Conversions from float32 to float64 and back can be removed as long as - # it doesn't need to be precise, since the conversion may e.g. flush denorms - (('f2f32', ('f2f64', 'a@32')), a), + (('f2f32', ('f2f64', 'a@32')), ('fcanonicalize', a)), (('ffloor', 'a(is_integral)'), a), (('fceil', 'a(is_integral)'), a), (('ftrunc', 'a(is_integral)'), a), # fract(x) = x - floor(x), so fract(NaN) = NaN (('~ffract', 'a(is_integral)'), 0.0), - (('fabs', 'a(is_not_negative)'), a), + (('fabs', 'a(is_not_negative)'), ('fcanonicalize', a)), (('iabs', 'a(is_not_negative)'), a), (('fsat', 'a(is_not_positive)'), 0.0), @@ -2287,7 +2271,7 @@ late_optimizations = [ # nir_lower_to_source_mods will collapse this, but its existence during the # optimization loop can prevent other optimizations. - (('fneg', ('fneg', a)), a), + (('fneg', ('fneg', a)), ('fcanonicalize', a)), # Subtractions get lowered during optimization, so we need to recombine them (('fadd', a, ('fneg', 'b')), ('fsub', 'a', 'b'), 'options->has_fsub'), -- GitLab From 4e4e671388c314037c77c8173e82e93310421ef4 Mon Sep 17 00:00:00 2001 From: Rhys Perry Date: Wed, 24 Feb 2021 12:53:46 +0000 Subject: [PATCH 10/10] nir: look through bcsel in is_only_used_as_float and is_created_as_float fossil-db (GFX10.3): Totals from 1 (0.00% of 146267) affected shaders: CodeSize: 1740 -> 1736 (-0.23%) Instrs: 313 -> 312 (-0.32%) fossil-db (GFX10.3, dxvk float controls): Totals from 143 (0.10% of 146267) affected shaders: CodeSize: 1492300 -> 1491152 (-0.08%) Instrs: 300391 -> 300101 (-0.10%) Signed-off-by: Rhys Perry Reviewed-by: Ian Romanick Part-of: --- src/compiler/nir/nir.h | 14 ++++++++++++++ src/compiler/nir/nir_search_helpers.h | 15 +++++++++++++-- 2 files changed, 27 insertions(+), 2 deletions(-) diff --git a/src/compiler/nir/nir.h b/src/compiler/nir/nir.h index bf60ced5f6e4..09f7a1718e53 100644 --- a/src/compiler/nir/nir.h +++ b/src/compiler/nir/nir.h @@ -1248,6 +1248,20 @@ nir_op_is_vec(nir_op op) } } +static inline bool +nir_op_is_bcsel(nir_op op) +{ + switch (op) { + case nir_op_bcsel: + case nir_op_b8csel: + case nir_op_b16csel: + case nir_op_b32csel: + return true; + default: + return false; + } +} + static inline bool nir_is_float_control_signed_zero_inf_nan_preserve(unsigned execution_mode, unsigned bit_size) { diff --git a/src/compiler/nir/nir_search_helpers.h b/src/compiler/nir/nir_search_helpers.h index 83f0196d6030..f2fc000da1db 100644 --- a/src/compiler/nir/nir_search_helpers.h +++ b/src/compiler/nir/nir_search_helpers.h @@ -284,6 +284,11 @@ is_created_as_float(struct hash_table *ht, UNUSED const shader_info *info, if (src_alu == NULL) return false; + if (nir_op_is_bcsel(src_alu->op)) { + return is_created_as_float(ht, info, src_alu, 1, 0, NULL) && + is_created_as_float(ht, info, src_alu, 2, 0, NULL); + } + nir_alu_type output_type = nir_op_infos[src_alu->op].output_type; return nir_alu_type_get_base_type(output_type) == nir_type_float; } @@ -353,8 +358,14 @@ is_only_used_as_float(const nir_alu_instr *instr) assert(instr != user_alu); unsigned index = (nir_alu_src*)container_of(src, nir_alu_src, src) - user_alu->src; - if (nir_op_infos[user_alu->op].input_types[index] != nir_type_float) - return false; + + if (index != 0 && nir_op_is_bcsel(user_alu->op)) { + if (!is_only_used_as_float(user_alu)) + return false; + } else { + if (nir_op_infos[user_alu->op].input_types[index] != nir_type_float) + return false; + } } return true; -- GitLab