From 8e0eb592d5bbcf00f8bed55cc95013abf77fad12 Mon Sep 17 00:00:00 2001 From: Alyssa Rosenzweig Date: Thu, 17 Feb 2022 19:33:29 -0500 Subject: [PATCH 1/4] pan/bi: Avoid *FADD.v2f16 hazard in optimizer This is a very obscure encoding restriction in the Bifrost ISA. Unknown if any real apps or tests hit this, but we still need to get it right sadly. Signed-off-by: Alyssa Rosenzweig Cc: mesa-stable Part-of: --- src/panfrost/bifrost/bi_opt_mod_props.c | 28 ++++++++++++++++++++++--- 1 file changed, 25 insertions(+), 3 deletions(-) diff --git a/src/panfrost/bifrost/bi_opt_mod_props.c b/src/panfrost/bifrost/bi_opt_mod_props.c index 44eae29bd622..ce06f9596ad7 100644 --- a/src/panfrost/bifrost/bi_opt_mod_props.c +++ b/src/panfrost/bifrost/bi_opt_mod_props.c @@ -25,6 +25,18 @@ #include "compiler.h" #include "bi_builder.h" +/* + * Due to a Bifrost encoding restriction, some instructions cannot have an abs + * modifier on both sources. Check if adding a fabs modifier to a given source + * of a binary instruction would cause this restriction to be hit. + */ +static bool +bi_would_impact_abs(unsigned arch, bi_instr *I, bi_index repl, unsigned s) +{ + return (arch <= 8) && I->src[1 - s].abs && + bi_is_word_equiv(I->src[1 - s], repl); +} + static bool bi_takes_fabs(unsigned arch, bi_instr *I, bi_index repl, unsigned s) { @@ -32,9 +44,15 @@ bi_takes_fabs(unsigned arch, bi_instr *I, bi_index repl, unsigned s) case BI_OPCODE_FCMP_V2F16: case BI_OPCODE_FMAX_V2F16: case BI_OPCODE_FMIN_V2F16: - /* Bifrost encoding restriction: can't have both abs if equal sources */ - return !(arch <= 8 && I->src[1 - s].abs - && bi_is_word_equiv(I->src[1 - s], repl)); + return !bi_would_impact_abs(arch, I, repl, s); + case BI_OPCODE_FADD_V2F16: + /* + * For FADD.v2f16, the FMA pipe has the abs encoding hazard, + * while the FADD pipe cannot encode a clamp. Either case in + * isolation can be worked around in the scheduler, but both + * together is impossible to encode. Avoid the hazard. + */ + return !(I->clamp && bi_would_impact_abs(arch, I, repl, s)); case BI_OPCODE_V2F32_TO_V2F16: /* TODO: Needs both match or lower */ return false; @@ -182,6 +200,10 @@ bi_takes_clamp(bi_instr *I) case BI_OPCODE_FMA_RSCALE_V2F16: case BI_OPCODE_FADD_RSCALE_F32: return false; + case BI_OPCODE_FADD_V2F16: + /* Encoding restriction */ + return !(I->src[0].abs && I->src[1].abs && + bi_is_word_equiv(I->src[0], I->src[1])); default: return bi_opcode_props[I->op].clamp; } -- GitLab From 24d2bdb1e050134a25924487792ee0018f8478ae Mon Sep 17 00:00:00 2001 From: Alyssa Rosenzweig Date: Thu, 17 Feb 2022 19:34:04 -0500 Subject: [PATCH 2/4] pan/bi: Avoid *FADD.v2f16 hazard in scheduler Obscure encoding restriction. Fixes crash (assertion fail when instruction packing) in asphalt9/2659.shader_test on Bifrost. Signed-off-by: Alyssa Rosenzweig Cc: mesa-stable Part-of: --- src/panfrost/bifrost/bi_schedule.c | 16 ++++++++++++++++ 1 file changed, 16 insertions(+) diff --git a/src/panfrost/bifrost/bi_schedule.c b/src/panfrost/bifrost/bi_schedule.c index 88162ecc86ed..4c6770e77198 100644 --- a/src/panfrost/bifrost/bi_schedule.c +++ b/src/panfrost/bifrost/bi_schedule.c @@ -490,6 +490,18 @@ bi_can_iaddc(bi_instr *ins) ins->src[1].swizzle == BI_SWIZZLE_H01); } +/* + * The encoding of *FADD.v2f16 only specifies a single abs flag. All abs + * encodings are permitted by swapping operands; however, this scheme fails if + * both operands are equal. Test for this case. + */ +static bool +bi_impacted_abs(bi_instr *I) +{ + return I->src[0].abs && I->src[1].abs && + bi_is_word_equiv(I->src[0], I->src[1]); +} + bool bi_can_fma(bi_instr *ins) { @@ -497,6 +509,10 @@ bi_can_fma(bi_instr *ins) if (bi_can_iaddc(ins)) return true; + /* *FADD.v2f16 has restricted abs modifiers, use +FADD.v2f16 instead */ + if (ins->op == BI_OPCODE_FADD_V2F16 && bi_impacted_abs(ins)) + return false; + /* TODO: some additional fp16 constraints */ return bi_opcode_props[ins->op].fma; } -- GitLab From 9d95561c93c48d15786a9559cdc927218a68f03b Mon Sep 17 00:00:00 2001 From: Alyssa Rosenzweig Date: Thu, 17 Feb 2022 19:18:08 -0500 Subject: [PATCH 3/4] pan/bi: Test avoiding *FADD.v2f16 hazard in optimizer This hazard exists but is obscure enough to be missed on our existing test coverage (e.g the conformance tests). Add piles of unit tests for it. Signed-off-by: Alyssa Rosenzweig Part-of: --- src/panfrost/bifrost/test/test-optimizer.cpp | 72 ++++++++++++++++++++ 1 file changed, 72 insertions(+) diff --git a/src/panfrost/bifrost/test/test-optimizer.cpp b/src/panfrost/bifrost/test/test-optimizer.cpp index 923386845b34..9b4188806474 100644 --- a/src/panfrost/bifrost/test/test-optimizer.cpp +++ b/src/panfrost/bifrost/test/test-optimizer.cpp @@ -107,6 +107,78 @@ TEST_F(Optimizer, FusedFABSNEGForFP16) bi_fmin_v2f16_to(b, reg, negabsx, bi_neg(y))); } +TEST_F(Optimizer, FuseFADD_F32WithEqualSourcesAbsAbsAndClamp) +{ + CASE({ + bi_instr *I = bi_fadd_f32_to(b, reg, bi_fabsneg_f32(b, bi_abs(x)), bi_abs(x), BI_ROUND_NONE); + I->clamp = BI_CLAMP_CLAMP_0_1; + }, { + bi_instr *I = bi_fadd_f32_to(b, reg, bi_abs(x), bi_abs(x), BI_ROUND_NONE); + I->clamp = BI_CLAMP_CLAMP_0_1; + }); + + CASE({ + bi_instr *I = bi_fadd_f32_to(b, reg, bi_abs(x), bi_fabsneg_f32(b, bi_abs(x)), BI_ROUND_NONE); + I->clamp = BI_CLAMP_CLAMP_0_1; + }, { + bi_instr *I = bi_fadd_f32_to(b, reg, bi_abs(x), bi_abs(x), BI_ROUND_NONE); + I->clamp = BI_CLAMP_CLAMP_0_1; + }); + + CASE({ + bi_instr *I = bi_fclamp_f32_to(b, reg, bi_fadd_f32(b, bi_abs(x), bi_abs(x), BI_ROUND_NONE)); + I->clamp = BI_CLAMP_CLAMP_0_INF; + }, { + bi_instr *I = bi_fadd_f32_to(b, reg, bi_abs(x), bi_abs(x), BI_ROUND_NONE); + I->clamp = BI_CLAMP_CLAMP_0_INF; + }); +} + +TEST_F(Optimizer, FuseFADD_V2F16WithDifferentSourcesAbsAbsAndClamp) +{ + CASE({ + bi_instr *I = bi_fadd_v2f16_to(b, reg, bi_fabsneg_v2f16(b, bi_abs(x)), bi_abs(y), BI_ROUND_NONE); + I->clamp = BI_CLAMP_CLAMP_0_1; + }, { + bi_instr *I = bi_fadd_v2f16_to(b, reg, bi_abs(x), bi_abs(y), BI_ROUND_NONE); + I->clamp = BI_CLAMP_CLAMP_0_1; + }); + + CASE({ + bi_instr *I = bi_fadd_v2f16_to(b, reg, bi_abs(x), bi_fabsneg_v2f16(b, bi_abs(y)), BI_ROUND_NONE); + I->clamp = BI_CLAMP_CLAMP_0_1; + }, { + bi_instr *I = bi_fadd_v2f16_to(b, reg, bi_abs(x), bi_abs(y), BI_ROUND_NONE); + I->clamp = BI_CLAMP_CLAMP_0_1; + }); + + CASE({ + bi_instr *I = bi_fclamp_v2f16_to(b, reg, bi_fadd_v2f16(b, bi_abs(x), bi_abs(y), BI_ROUND_NONE)); + I->clamp = BI_CLAMP_CLAMP_0_INF; + }, { + bi_instr *I = bi_fadd_v2f16_to(b, reg, bi_abs(x), bi_abs(y), BI_ROUND_NONE); + I->clamp = BI_CLAMP_CLAMP_0_INF; + }); +} + +TEST_F(Optimizer, AvoidFADD_V2F16WithEqualSourcesAbsAbsAndClamp) +{ + NEGCASE({ + bi_instr *I = bi_fadd_v2f16_to(b, reg, bi_fabsneg_v2f16(b, bi_abs(x)), bi_abs(x), BI_ROUND_NONE); + I->clamp = BI_CLAMP_CLAMP_0_1; + }); + + NEGCASE({ + bi_instr *I = bi_fadd_v2f16_to(b, reg, bi_abs(x), bi_fabsneg_v2f16(b, bi_abs(x)), BI_ROUND_NONE); + I->clamp = BI_CLAMP_CLAMP_0_1; + }); + + NEGCASE({ + bi_instr *I = bi_fclamp_v2f16_to(b, reg, bi_fadd_v2f16(b, bi_abs(x), bi_abs(x), BI_ROUND_NONE)); + I->clamp = BI_CLAMP_CLAMP_0_INF; + }); +} + TEST_F(Optimizer, SwizzlesComposedForFP16) { CASE(bi_fadd_v2f16_to(b, reg, bi_fabsneg_v2f16(b, bi_swz_16(negabsx, true, false)), y, BI_ROUND_RTP), -- GitLab From 8f4b3c749ed482a365671fce51e00fd3d6122a73 Mon Sep 17 00:00:00 2001 From: Alyssa Rosenzweig Date: Thu, 17 Feb 2022 19:40:03 -0500 Subject: [PATCH 4/4] pan/bi: Test avoiding FADD.v2f16 hazards in scheduler There are many of them, and integration testing of the scheduler won't hit every case. Add targeted unit tests for the various scheduling hazards of this funny instruction. Signed-off-by: Alyssa Rosenzweig Part-of: --- .../test/test-scheduler-predicates.cpp | 42 +++++++++++++++++++ 1 file changed, 42 insertions(+) diff --git a/src/panfrost/bifrost/test/test-scheduler-predicates.cpp b/src/panfrost/bifrost/test/test-scheduler-predicates.cpp index f43754d29cae..93aaf7fe1964 100644 --- a/src/panfrost/bifrost/test/test-scheduler-predicates.cpp +++ b/src/panfrost/bifrost/test/test-scheduler-predicates.cpp @@ -107,3 +107,45 @@ TEST_F(SchedulerPredicates, RestrictionsOnModifiersOfSameCycleTemporaries) } } } + +TEST_F(SchedulerPredicates, RestrictionsOnFAddV2F16) +{ + bi_index x = bi_register(0); + bi_index y = bi_register(1); + + /* Basic */ + bi_instr *fadd = bi_fadd_v2f16_to(b, TMP(), x, x, BI_ROUND_NONE); + + ASSERT_TRUE(bi_can_fma(fadd)); + ASSERT_TRUE(bi_can_add(fadd)); + + /* With imbalanced abs */ + fadd->src[0].abs = true; + + ASSERT_TRUE(bi_can_fma(fadd)); + ASSERT_TRUE(bi_can_add(fadd)); + + /* With equal abs */ + fadd->src[1].abs = true; + + ASSERT_FALSE(bi_can_fma(fadd)); + ASSERT_TRUE(bi_can_add(fadd)); + + /* With equal abs but different sources */ + fadd->src[1] = bi_abs(y); + + ASSERT_TRUE(bi_can_fma(fadd)); + ASSERT_TRUE(bi_can_add(fadd)); + + /* With clamp */ + fadd->clamp = BI_CLAMP_CLAMP_M1_1; + + ASSERT_TRUE(bi_can_fma(fadd)); + ASSERT_FALSE(bi_can_add(fadd)); + + /* Impossible encoding (should never be seen) */ + fadd->src[1] = fadd->src[0]; + + ASSERT_FALSE(bi_can_fma(fadd)); + ASSERT_FALSE(bi_can_add(fadd)); +} -- GitLab