diff --git a/src/panfrost/bifrost/bi_opt_mod_props.c b/src/panfrost/bifrost/bi_opt_mod_props.c index 44eae29bd622d903380c0ed99cd2942e26d58696..ce06f9596ad73ae1e693682ab3b7deecb6ed57f0 100644 --- a/src/panfrost/bifrost/bi_opt_mod_props.c +++ b/src/panfrost/bifrost/bi_opt_mod_props.c @@ -25,6 +25,18 @@ #include "compiler.h" #include "bi_builder.h" +/* + * Due to a Bifrost encoding restriction, some instructions cannot have an abs + * modifier on both sources. Check if adding a fabs modifier to a given source + * of a binary instruction would cause this restriction to be hit. + */ +static bool +bi_would_impact_abs(unsigned arch, bi_instr *I, bi_index repl, unsigned s) +{ + return (arch <= 8) && I->src[1 - s].abs && + bi_is_word_equiv(I->src[1 - s], repl); +} + static bool bi_takes_fabs(unsigned arch, bi_instr *I, bi_index repl, unsigned s) { @@ -32,9 +44,15 @@ bi_takes_fabs(unsigned arch, bi_instr *I, bi_index repl, unsigned s) case BI_OPCODE_FCMP_V2F16: case BI_OPCODE_FMAX_V2F16: case BI_OPCODE_FMIN_V2F16: - /* Bifrost encoding restriction: can't have both abs if equal sources */ - return !(arch <= 8 && I->src[1 - s].abs - && bi_is_word_equiv(I->src[1 - s], repl)); + return !bi_would_impact_abs(arch, I, repl, s); + case BI_OPCODE_FADD_V2F16: + /* + * For FADD.v2f16, the FMA pipe has the abs encoding hazard, + * while the FADD pipe cannot encode a clamp. Either case in + * isolation can be worked around in the scheduler, but both + * together is impossible to encode. Avoid the hazard. + */ + return !(I->clamp && bi_would_impact_abs(arch, I, repl, s)); case BI_OPCODE_V2F32_TO_V2F16: /* TODO: Needs both match or lower */ return false; @@ -182,6 +200,10 @@ bi_takes_clamp(bi_instr *I) case BI_OPCODE_FMA_RSCALE_V2F16: case BI_OPCODE_FADD_RSCALE_F32: return false; + case BI_OPCODE_FADD_V2F16: + /* Encoding restriction */ + return !(I->src[0].abs && I->src[1].abs && + bi_is_word_equiv(I->src[0], I->src[1])); default: return bi_opcode_props[I->op].clamp; } diff --git a/src/panfrost/bifrost/bi_schedule.c b/src/panfrost/bifrost/bi_schedule.c index 88162ecc86edf69ebbb783f12e29b52d972f5efc..4c6770e77198ec370f1fb0612b0dc89e8c17fb0f 100644 --- a/src/panfrost/bifrost/bi_schedule.c +++ b/src/panfrost/bifrost/bi_schedule.c @@ -490,6 +490,18 @@ bi_can_iaddc(bi_instr *ins) ins->src[1].swizzle == BI_SWIZZLE_H01); } +/* + * The encoding of *FADD.v2f16 only specifies a single abs flag. All abs + * encodings are permitted by swapping operands; however, this scheme fails if + * both operands are equal. Test for this case. + */ +static bool +bi_impacted_abs(bi_instr *I) +{ + return I->src[0].abs && I->src[1].abs && + bi_is_word_equiv(I->src[0], I->src[1]); +} + bool bi_can_fma(bi_instr *ins) { @@ -497,6 +509,10 @@ bi_can_fma(bi_instr *ins) if (bi_can_iaddc(ins)) return true; + /* *FADD.v2f16 has restricted abs modifiers, use +FADD.v2f16 instead */ + if (ins->op == BI_OPCODE_FADD_V2F16 && bi_impacted_abs(ins)) + return false; + /* TODO: some additional fp16 constraints */ return bi_opcode_props[ins->op].fma; } diff --git a/src/panfrost/bifrost/test/test-optimizer.cpp b/src/panfrost/bifrost/test/test-optimizer.cpp index 923386845b3403ca6d8458f762d3e4624f2301b3..9b4188806474a701c65cbf9f728cbdbd1da09423 100644 --- a/src/panfrost/bifrost/test/test-optimizer.cpp +++ b/src/panfrost/bifrost/test/test-optimizer.cpp @@ -107,6 +107,78 @@ TEST_F(Optimizer, FusedFABSNEGForFP16) bi_fmin_v2f16_to(b, reg, negabsx, bi_neg(y))); } +TEST_F(Optimizer, FuseFADD_F32WithEqualSourcesAbsAbsAndClamp) +{ + CASE({ + bi_instr *I = bi_fadd_f32_to(b, reg, bi_fabsneg_f32(b, bi_abs(x)), bi_abs(x), BI_ROUND_NONE); + I->clamp = BI_CLAMP_CLAMP_0_1; + }, { + bi_instr *I = bi_fadd_f32_to(b, reg, bi_abs(x), bi_abs(x), BI_ROUND_NONE); + I->clamp = BI_CLAMP_CLAMP_0_1; + }); + + CASE({ + bi_instr *I = bi_fadd_f32_to(b, reg, bi_abs(x), bi_fabsneg_f32(b, bi_abs(x)), BI_ROUND_NONE); + I->clamp = BI_CLAMP_CLAMP_0_1; + }, { + bi_instr *I = bi_fadd_f32_to(b, reg, bi_abs(x), bi_abs(x), BI_ROUND_NONE); + I->clamp = BI_CLAMP_CLAMP_0_1; + }); + + CASE({ + bi_instr *I = bi_fclamp_f32_to(b, reg, bi_fadd_f32(b, bi_abs(x), bi_abs(x), BI_ROUND_NONE)); + I->clamp = BI_CLAMP_CLAMP_0_INF; + }, { + bi_instr *I = bi_fadd_f32_to(b, reg, bi_abs(x), bi_abs(x), BI_ROUND_NONE); + I->clamp = BI_CLAMP_CLAMP_0_INF; + }); +} + +TEST_F(Optimizer, FuseFADD_V2F16WithDifferentSourcesAbsAbsAndClamp) +{ + CASE({ + bi_instr *I = bi_fadd_v2f16_to(b, reg, bi_fabsneg_v2f16(b, bi_abs(x)), bi_abs(y), BI_ROUND_NONE); + I->clamp = BI_CLAMP_CLAMP_0_1; + }, { + bi_instr *I = bi_fadd_v2f16_to(b, reg, bi_abs(x), bi_abs(y), BI_ROUND_NONE); + I->clamp = BI_CLAMP_CLAMP_0_1; + }); + + CASE({ + bi_instr *I = bi_fadd_v2f16_to(b, reg, bi_abs(x), bi_fabsneg_v2f16(b, bi_abs(y)), BI_ROUND_NONE); + I->clamp = BI_CLAMP_CLAMP_0_1; + }, { + bi_instr *I = bi_fadd_v2f16_to(b, reg, bi_abs(x), bi_abs(y), BI_ROUND_NONE); + I->clamp = BI_CLAMP_CLAMP_0_1; + }); + + CASE({ + bi_instr *I = bi_fclamp_v2f16_to(b, reg, bi_fadd_v2f16(b, bi_abs(x), bi_abs(y), BI_ROUND_NONE)); + I->clamp = BI_CLAMP_CLAMP_0_INF; + }, { + bi_instr *I = bi_fadd_v2f16_to(b, reg, bi_abs(x), bi_abs(y), BI_ROUND_NONE); + I->clamp = BI_CLAMP_CLAMP_0_INF; + }); +} + +TEST_F(Optimizer, AvoidFADD_V2F16WithEqualSourcesAbsAbsAndClamp) +{ + NEGCASE({ + bi_instr *I = bi_fadd_v2f16_to(b, reg, bi_fabsneg_v2f16(b, bi_abs(x)), bi_abs(x), BI_ROUND_NONE); + I->clamp = BI_CLAMP_CLAMP_0_1; + }); + + NEGCASE({ + bi_instr *I = bi_fadd_v2f16_to(b, reg, bi_abs(x), bi_fabsneg_v2f16(b, bi_abs(x)), BI_ROUND_NONE); + I->clamp = BI_CLAMP_CLAMP_0_1; + }); + + NEGCASE({ + bi_instr *I = bi_fclamp_v2f16_to(b, reg, bi_fadd_v2f16(b, bi_abs(x), bi_abs(x), BI_ROUND_NONE)); + I->clamp = BI_CLAMP_CLAMP_0_INF; + }); +} + TEST_F(Optimizer, SwizzlesComposedForFP16) { CASE(bi_fadd_v2f16_to(b, reg, bi_fabsneg_v2f16(b, bi_swz_16(negabsx, true, false)), y, BI_ROUND_RTP), diff --git a/src/panfrost/bifrost/test/test-scheduler-predicates.cpp b/src/panfrost/bifrost/test/test-scheduler-predicates.cpp index f43754d29caed0d6e05122d34c9f798b59b9beca..93aaf7fe196442cb3ba227910e772e11e4c85e0e 100644 --- a/src/panfrost/bifrost/test/test-scheduler-predicates.cpp +++ b/src/panfrost/bifrost/test/test-scheduler-predicates.cpp @@ -107,3 +107,45 @@ TEST_F(SchedulerPredicates, RestrictionsOnModifiersOfSameCycleTemporaries) } } } + +TEST_F(SchedulerPredicates, RestrictionsOnFAddV2F16) +{ + bi_index x = bi_register(0); + bi_index y = bi_register(1); + + /* Basic */ + bi_instr *fadd = bi_fadd_v2f16_to(b, TMP(), x, x, BI_ROUND_NONE); + + ASSERT_TRUE(bi_can_fma(fadd)); + ASSERT_TRUE(bi_can_add(fadd)); + + /* With imbalanced abs */ + fadd->src[0].abs = true; + + ASSERT_TRUE(bi_can_fma(fadd)); + ASSERT_TRUE(bi_can_add(fadd)); + + /* With equal abs */ + fadd->src[1].abs = true; + + ASSERT_FALSE(bi_can_fma(fadd)); + ASSERT_TRUE(bi_can_add(fadd)); + + /* With equal abs but different sources */ + fadd->src[1] = bi_abs(y); + + ASSERT_TRUE(bi_can_fma(fadd)); + ASSERT_TRUE(bi_can_add(fadd)); + + /* With clamp */ + fadd->clamp = BI_CLAMP_CLAMP_M1_1; + + ASSERT_TRUE(bi_can_fma(fadd)); + ASSERT_FALSE(bi_can_add(fadd)); + + /* Impossible encoding (should never be seen) */ + fadd->src[1] = fadd->src[0]; + + ASSERT_FALSE(bi_can_fma(fadd)); + ASSERT_FALSE(bi_can_add(fadd)); +}