From 6ecb85794a58f1a0d2368418d6daea6ce97c75d8 Mon Sep 17 00:00:00 2001 From: Emma Anholt Date: Mon, 10 May 2021 08:01:23 -0700 Subject: [PATCH] freedreno/ir3: Fix ir3_cf's handling of SpvOpQuantize. We can't generally fold a 32-bit float conversion into the generating 16-bit ALU op, because we don't know if the f2f16 from OpQuantize has already been folded into it. Fixes most of the failing opquantize tests. The shader-db stats look pretty trivial: total instructions in shared programs: 11889317 -> 11889304 (<.01%) instructions in affected programs: 13376 -> 13363 (-0.10%) total nops in shared programs: 3877890 -> 3877877 (<.01%) nops in affected programs: 4113 -> 4100 (-0.32%) total dwords in shared programs: 17893496 -> 17893506 (<.01%) dwords in affected programs: 18822 -> 18832 (0.05%) total full in shared programs: 420421 -> 420422 (<.01%) full in affected programs: 8 -> 9 (12.50%) total sstall in shared programs: 928726 -> 928664 (<.01%) sstall in affected programs: 495 -> 433 (-12.53%) all changes seem to be some noise in non-GLES (so no fp16) shaders, and I a quick skim of some optmsgs didn't give me a clue as to what changed, but it's not about covs, just some sort of change in instruction scheduling. Closes: #3208 --- .../ci/deqp-freedreno-a630-fails.txt | 25 ------------------- src/freedreno/ir3/ir3_cf.c | 22 ++++++++-------- 2 files changed, 12 insertions(+), 35 deletions(-) diff --git a/src/freedreno/ci/deqp-freedreno-a630-fails.txt b/src/freedreno/ci/deqp-freedreno-a630-fails.txt index 2ec1dd70f655..0c212d57db55 100644 --- a/src/freedreno/ci/deqp-freedreno-a630-fails.txt +++ b/src/freedreno/ci/deqp-freedreno-a630-fails.txt @@ -152,36 +152,11 @@ dEQP-VK.spirv_assembly.instruction.graphics.float_controls.fp32.input_args.denor dEQP-VK.spirv_assembly.instruction.graphics.float_controls.fp32.input_args.denorm_nmax_denorm_preserve_vert,Fail dEQP-VK.spirv_assembly.instruction.graphics.float_controls.fp32.input_args.denorm_nmin_denorm_preserve_frag,Fail dEQP-VK.spirv_assembly.instruction.graphics.float_controls.fp32.input_args.denorm_nmin_denorm_preserve_vert,Fail -dEQP-VK.spirv_assembly.instruction.graphics.opquantize.carry_bit_geom,Fail -dEQP-VK.spirv_assembly.instruction.graphics.opquantize.carry_bit_tessc,Fail -dEQP-VK.spirv_assembly.instruction.graphics.opquantize.carry_to_exponent_frag,Fail -dEQP-VK.spirv_assembly.instruction.graphics.opquantize.carry_to_exponent_tesse,Fail -dEQP-VK.spirv_assembly.instruction.graphics.opquantize.carry_to_exponent_vert,Fail -dEQP-VK.spirv_assembly.instruction.graphics.opquantize.negative_round_up_or_round_down_frag,Fail -dEQP-VK.spirv_assembly.instruction.graphics.opquantize.negative_round_up_or_round_down_tesse,Fail -dEQP-VK.spirv_assembly.instruction.graphics.opquantize.negative_round_up_or_round_down_vert,Fail -dEQP-VK.spirv_assembly.instruction.graphics.opquantize.negative_too_small_frag,Fail -dEQP-VK.spirv_assembly.instruction.graphics.opquantize.negative_too_small_tesse,Fail -dEQP-VK.spirv_assembly.instruction.graphics.opquantize.negative_too_small_vert,Fail -dEQP-VK.spirv_assembly.instruction.graphics.opquantize.positive_round_up_or_round_down_geom,Fail -dEQP-VK.spirv_assembly.instruction.graphics.opquantize.positive_round_up_or_round_down_tessc,Fail dEQP-VK.spirv_assembly.instruction.graphics.opquantize.round_to_inf_frag,Fail dEQP-VK.spirv_assembly.instruction.graphics.opquantize.round_to_inf_tesse,Fail dEQP-VK.spirv_assembly.instruction.graphics.opquantize.round_to_inf_vert,Fail dEQP-VK.spirv_assembly.instruction.graphics.opquantize.round_to_negative_inf_geom,Fail dEQP-VK.spirv_assembly.instruction.graphics.opquantize.round_to_negative_inf_tessc,Fail -dEQP-VK.spirv_assembly.instruction.graphics.opquantize.spec_const_carry_bit_geom,Fail -dEQP-VK.spirv_assembly.instruction.graphics.opquantize.spec_const_carry_bit_tessc,Fail -dEQP-VK.spirv_assembly.instruction.graphics.opquantize.spec_const_carry_to_exponent_frag,Fail -dEQP-VK.spirv_assembly.instruction.graphics.opquantize.spec_const_carry_to_exponent_tesse,Fail -dEQP-VK.spirv_assembly.instruction.graphics.opquantize.spec_const_carry_to_exponent_vert,Fail -dEQP-VK.spirv_assembly.instruction.graphics.opquantize.spec_const_negative_round_up_or_round_down_frag,Fail -dEQP-VK.spirv_assembly.instruction.graphics.opquantize.spec_const_negative_round_up_or_round_down_tesse,Fail -dEQP-VK.spirv_assembly.instruction.graphics.opquantize.spec_const_negative_round_up_or_round_down_vert,Fail -dEQP-VK.spirv_assembly.instruction.graphics.opquantize.spec_const_positive_round_up_or_round_down_geom,Fail -dEQP-VK.spirv_assembly.instruction.graphics.opquantize.spec_const_positive_round_up_or_round_down_tessc,Fail -dEQP-VK.spirv_assembly.instruction.graphics.opquantize.too_small_geom,Fail -dEQP-VK.spirv_assembly.instruction.graphics.opquantize.too_small_tessc,Fail dEQP-VK.spirv_assembly.instruction.graphics.variable_pointers.graphics.writes_single_buffer_geom,Fail dEQP-VK.spirv_assembly.instruction.graphics.variable_pointers.graphics.writes_two_buffers_geom,Fail dEQP-VK.spirv_assembly.instruction.spirv1p4.opcopylogical.nested_arrays_different_inner_stride,Fail diff --git a/src/freedreno/ir3/ir3_cf.c b/src/freedreno/ir3/ir3_cf.c index d479bc10759b..0ede500baa20 100644 --- a/src/freedreno/ir3/ir3_cf.c +++ b/src/freedreno/ir3/ir3_cf.c @@ -21,6 +21,18 @@ * SOFTWARE. */ +/** + * @file ir3_cf.c + * + * Folds f2f32(16-bit) operations into the generating ALU instruction when all + * uses ofthe ALU instr are an f2f32. + * + * Note that we can't fold f2f16(32-bit) operations into the generating ALU + * instruction because for SpvOpQuantize we need an f2f32(f2f16(x)) to actually + * do a conversion to 16-bit. If it's valid to elide that conversion (in GLSL), + * it should already have happened at the NIR level. + */ + #include "util/ralloc.h" #include "ir3.h" @@ -49,10 +61,6 @@ is_fp16_conv(struct ir3_instruction *instr) instr->cat1.dst_type == TYPE_F16) return true; - if (instr->cat1.src_type == TYPE_F16 && - instr->cat1.dst_type == TYPE_F32) - return true; - return false; } @@ -102,12 +110,6 @@ try_conversion_folding(struct ir3_instruction *conv) if (!is_alu(src)) return false; - /* avoid folding f2f32(f2f16) together, in cases where this is legal to - * do (glsl) nir should have handled that for us already: - */ - if (is_fp16_conv(src)) - return false; - switch (src->opc) { case OPC_SEL_B32: case OPC_SEL_B16: -- GitLab