From 8f91e489c725812b639e8a59b6c4fb56c0c094b2 Mon Sep 17 00:00:00 2001
From: Alyssa Rosenzweig
Date: Thu, 6 May 2021 13:10:06 0400
Subject: [PATCH] nir/opt_algebraic: Add fmul(a, 2.0) > fadd(a, a)
Investigated for Bifrost, but should be the same or cheaper for any
reasonable architecture. For the compilers I maintain:
* Bifrost  fadd can be scheduled 2x as frequently as ffma, there is no
fmul separate from ffma.
* Midgard  fadd x, x is used as a canonical form, again for easier
scheduling.
* AGX  fmul and fadd are both native ops, but fmul is heavier weight
(unknown whether this is a performance issue or just power
consumption). Also saves a move / uniform file slot for the constant.
Since floating point multiplication is inherently more expensive than
addition, presumably this is a win for everyone else too.
Signedoffby: Alyssa Rosenzweig

src/compiler/nir/nir_opt_algebraic.py  4 ++++
src/panfrost/midgard/midgard_nir_algebraic.py  3 
2 files changed, 4 insertions(+), 3 deletions()
diff git a/src/compiler/nir/nir_opt_algebraic.py b/src/compiler/nir/nir_opt_algebraic.py
index 48b184853f07..1f35e8bcd8de 100644
 a/src/compiler/nir/nir_opt_algebraic.py
+++ b/src/compiler/nir/nir_opt_algebraic.py
@@ 2382,6 +2382,10 @@ late_optimizations = [
(('ishr', a, 0), a),
(('ishr', a, 32), a),
(('ushr', a, 0), a),
+
+ # Strength reduction (fadd is cheaper than fmul on many architectures, and
+ # it saves a constant).
+ (('~fmul', a, 2.0), ('fadd', a, a))
]
# A few more extract cases we'd rather leave late
diff git a/src/panfrost/midgard/midgard_nir_algebraic.py b/src/panfrost/midgard/midgard_nir_algebraic.py
index c66fd97dd7c9..ba8019bb2df6 100644
 a/src/panfrost/midgard/midgard_nir_algebraic.py
+++ b/src/panfrost/midgard/midgard_nir_algebraic.py
@@ 66,9 +66,6 @@ algebraic_late = [
(('ishl', 'a@8', b), ('u2u8', ('u2u16', ('ishl', ('u2u32', ('u2u16', a)), b)))),
(('ishr', 'a@8', b), ('i2i8', ('i2i16', ('ishr', ('i2i32', ('i2i16', a)), b)))),
(('ushr', 'a@8', b), ('u2u8', ('u2u16', ('ushr', ('u2u32', ('u2u16', a)), b)))),

 # Canonical form. The scheduler will convert back if it makes sense.
 (('fmul', a, 2.0), ('fadd', a, a))
]
# Size conversion is redundant to Midgard but needed for NIR, and writing this

GitLab