Commit 3b308147 authored by Jason Ekstrand's avatar Jason Ekstrand Committed by Jason Ekstrand
Browse files

nir/algebraic: Optimize 1-bit Booleans


Reviewed-by: Emma Anholt's avatarEric Anholt <eric@anholt.net>
Reviewed-by: Bas Nieuwenhuizen's avatarBas Nieuwenhuizen <bas@basnieuwenhuizen.nl>
Tested-by: Bas Nieuwenhuizen's avatarBas Nieuwenhuizen <bas@basnieuwenhuizen.nl>
parent 44227453
......@@ -210,8 +210,8 @@ class Constant(Value):
self._bit_size = None
if isinstance(self.value, bool):
assert self._bit_size is None or self._bit_size == 32
self._bit_size = 32
assert self._bit_size is None or self._bit_size == 1
self._bit_size = 1
def hex(self):
if isinstance(self.value, (bool)):
......@@ -258,8 +258,10 @@ class Variable(Value):
self._bit_size = int(m.group('bits')) if m.group('bits') else None
if self.required_type == 'bool':
assert self._bit_size is None or self._bit_size == 32
self._bit_size = 32
if self._bit_size is not None:
assert self._bit_size in type_sizes(self.required_type)
else:
self._bit_size = 1
if self.required_type is not None:
assert self.required_type in ('float', 'bool', 'int', 'uint')
......@@ -277,34 +279,6 @@ class Variable(Value):
_opcode_re = re.compile(r"(?P<inexact>~)?(?P<opcode>\w+)(?:@(?P<bits>\d+))?"
r"(?P<cond>\([^\)]+\))?")
opcode_remap = {
'flt' : 'flt32',
'fge' : 'fge32',
'feq' : 'feq32',
'fne' : 'fne32',
'ilt' : 'ilt32',
'ige' : 'ige32',
'ieq' : 'ieq32',
'ine' : 'ine32',
'ult' : 'ult32',
'uge' : 'uge32',
'ball_iequal2' : 'b32all_iequal2',
'ball_iequal3' : 'b32all_iequal3',
'ball_iequal4' : 'b32all_iequal4',
'bany_inequal2' : 'b32any_inequal2',
'bany_inequal3' : 'b32any_inequal3',
'bany_inequal4' : 'b32any_inequal4',
'ball_fequal2' : 'b32all_fequal2',
'ball_fequal3' : 'b32all_fequal3',
'ball_fequal4' : 'b32all_fequal4',
'bany_fnequal2' : 'b32any_fnequal2',
'bany_fnequal3' : 'b32any_fnequal3',
'bany_fnequal4' : 'b32any_fnequal4',
'bcsel' : 'b32csel',
}
class Expression(Value):
def __init__(self, expr, name_base, varset):
Value.__init__(self, expr, name_base, "expression")
......@@ -314,8 +288,6 @@ class Expression(Value):
assert m and m.group('opcode') is not None
self.opcode = m.group('opcode')
if self.opcode in opcode_remap:
self.opcode = opcode_remap[self.opcode]
self._bit_size = int(m.group('bits')) if m.group('bits') else None
self.inexact = m.group('inexact') is not None
self.cond = m.group('cond')
......
......@@ -122,17 +122,17 @@ optimizations = [
(('~flrp', a, b, 1.0), b),
(('~flrp', a, a, b), a),
(('~flrp', 0.0, a, b), ('fmul', a, b)),
(('~flrp', a, b, ('b2f', 'c@32')), ('bcsel', c, b, a), 'options->lower_flrp32'),
(('~flrp', a, b, ('b2f', 'c@1')), ('bcsel', c, b, a), 'options->lower_flrp32'),
(('~flrp', a, 0.0, c), ('fadd', ('fmul', ('fneg', a), c), a)),
(('flrp@32', a, b, c), ('fadd', ('fmul', c, ('fsub', b, a)), a), 'options->lower_flrp32'),
(('flrp@64', a, b, c), ('fadd', ('fmul', c, ('fsub', b, a)), a), 'options->lower_flrp64'),
(('ffloor', a), ('fsub', a, ('ffract', a)), 'options->lower_ffloor'),
(('ffract', a), ('fsub', a, ('ffloor', a)), 'options->lower_ffract'),
(('fceil', a), ('fneg', ('ffloor', ('fneg', a))), 'options->lower_fceil'),
(('~fadd', ('fmul', a, ('fadd', 1.0, ('fneg', ('b2f', 'c@32')))), ('fmul', b, ('b2f', c))), ('bcsel', c, b, a), 'options->lower_flrp32'),
(('~fadd', ('fmul', a, ('fadd', 1.0, ('fneg', ('b2f', 'c@1')))), ('fmul', b, ('b2f', c))), ('bcsel', c, b, a), 'options->lower_flrp32'),
(('~fadd@32', ('fmul', a, ('fadd', 1.0, ('fneg', c ))), ('fmul', b, c )), ('flrp', a, b, c), '!options->lower_flrp32'),
(('~fadd@64', ('fmul', a, ('fadd', 1.0, ('fneg', c ))), ('fmul', b, c )), ('flrp', a, b, c), '!options->lower_flrp64'),
(('~fadd', a, ('fmul', ('b2f', 'c@32'), ('fadd', b, ('fneg', a)))), ('bcsel', c, b, a), 'options->lower_flrp32'),
(('~fadd', a, ('fmul', ('b2f', 'c@1'), ('fadd', b, ('fneg', a)))), ('bcsel', c, b, a), 'options->lower_flrp32'),
(('~fadd@32', a, ('fmul', c , ('fadd', b, ('fneg', a)))), ('flrp', a, b, c), '!options->lower_flrp32'),
(('~fadd@64', a, ('fmul', c , ('fadd', b, ('fneg', a)))), ('flrp', a, b, c), '!options->lower_flrp64'),
(('ffma', a, b, c), ('fadd', ('fmul', a, b), c), 'options->lower_ffma'),
......@@ -172,50 +172,50 @@ optimizations = [
# b2f(a) <= 0.0
# b2f(a) == 0.0 because b2f(a) can only be 0 or 1
# inot(a)
(('fge', 0.0, ('b2f', 'a@32')), ('inot', a)),
(('fge', ('fneg', ('b2f', 'a@32')), 0.0), ('inot', a)),
(('fne', ('fadd', ('b2f', 'a@32'), ('b2f', 'b@32')), 0.0), ('ior', a, b)),
(('fne', ('fmax', ('b2f', 'a@32'), ('b2f', 'b@32')), 0.0), ('ior', a, b)),
(('fne', ('bcsel', a, 1.0, ('b2f', 'b@32')) , 0.0), ('ior', a, b)),
(('fne', ('b2f', 'a@32'), ('fneg', ('b2f', 'b@32'))), ('ior', a, b)),
(('fne', ('fmul', ('b2f', 'a@32'), ('b2f', 'b@32')), 0.0), ('iand', a, b)),
(('fne', ('fmin', ('b2f', 'a@32'), ('b2f', 'b@32')), 0.0), ('iand', a, b)),
(('fne', ('bcsel', a, ('b2f', 'b@32'), 0.0) , 0.0), ('iand', a, b)),
(('fne', ('fadd', ('b2f', 'a@32'), ('fneg', ('b2f', 'b@32'))), 0.0), ('ixor', a, b)),
(('fne', ('b2f', 'a@32') , ('b2f', 'b@32') ), ('ixor', a, b)),
(('fne', ('fneg', ('b2f', 'a@32')), ('fneg', ('b2f', 'b@32'))), ('ixor', a, b)),
(('feq', ('fadd', ('b2f', 'a@32'), ('b2f', 'b@32')), 0.0), ('inot', ('ior', a, b))),
(('feq', ('fmax', ('b2f', 'a@32'), ('b2f', 'b@32')), 0.0), ('inot', ('ior', a, b))),
(('feq', ('bcsel', a, 1.0, ('b2f', 'b@32')) , 0.0), ('inot', ('ior', a, b))),
(('feq', ('b2f', 'a@32'), ('fneg', ('b2f', 'b@32'))), ('inot', ('ior', a, b))),
(('feq', ('fmul', ('b2f', 'a@32'), ('b2f', 'b@32')), 0.0), ('inot', ('iand', a, b))),
(('feq', ('fmin', ('b2f', 'a@32'), ('b2f', 'b@32')), 0.0), ('inot', ('iand', a, b))),
(('feq', ('bcsel', a, ('b2f', 'b@32'), 0.0) , 0.0), ('inot', ('iand', a, b))),
(('feq', ('fadd', ('b2f', 'a@32'), ('fneg', ('b2f', 'b@32'))), 0.0), ('ieq', a, b)),
(('feq', ('b2f', 'a@32') , ('b2f', 'b@32') ), ('ieq', a, b)),
(('feq', ('fneg', ('b2f', 'a@32')), ('fneg', ('b2f', 'b@32'))), ('ieq', a, b)),
(('fge', 0.0, ('b2f', 'a@1')), ('inot', a)),
(('fge', ('fneg', ('b2f', 'a@1')), 0.0), ('inot', a)),
(('fne', ('fadd', ('b2f', 'a@1'), ('b2f', 'b@1')), 0.0), ('ior', a, b)),
(('fne', ('fmax', ('b2f', 'a@1'), ('b2f', 'b@1')), 0.0), ('ior', a, b)),
(('fne', ('bcsel', a, 1.0, ('b2f', 'b@1')) , 0.0), ('ior', a, b)),
(('fne', ('b2f', 'a@1'), ('fneg', ('b2f', 'b@1'))), ('ior', a, b)),
(('fne', ('fmul', ('b2f', 'a@1'), ('b2f', 'b@1')), 0.0), ('iand', a, b)),
(('fne', ('fmin', ('b2f', 'a@1'), ('b2f', 'b@1')), 0.0), ('iand', a, b)),
(('fne', ('bcsel', a, ('b2f', 'b@1'), 0.0) , 0.0), ('iand', a, b)),
(('fne', ('fadd', ('b2f', 'a@1'), ('fneg', ('b2f', 'b@1'))), 0.0), ('ixor', a, b)),
(('fne', ('b2f', 'a@1') , ('b2f', 'b@1') ), ('ixor', a, b)),
(('fne', ('fneg', ('b2f', 'a@1')), ('fneg', ('b2f', 'b@1'))), ('ixor', a, b)),
(('feq', ('fadd', ('b2f', 'a@1'), ('b2f', 'b@1')), 0.0), ('inot', ('ior', a, b))),
(('feq', ('fmax', ('b2f', 'a@1'), ('b2f', 'b@1')), 0.0), ('inot', ('ior', a, b))),
(('feq', ('bcsel', a, 1.0, ('b2f', 'b@1')) , 0.0), ('inot', ('ior', a, b))),
(('feq', ('b2f', 'a@1'), ('fneg', ('b2f', 'b@1'))), ('inot', ('ior', a, b))),
(('feq', ('fmul', ('b2f', 'a@1'), ('b2f', 'b@1')), 0.0), ('inot', ('iand', a, b))),
(('feq', ('fmin', ('b2f', 'a@1'), ('b2f', 'b@1')), 0.0), ('inot', ('iand', a, b))),
(('feq', ('bcsel', a, ('b2f', 'b@1'), 0.0) , 0.0), ('inot', ('iand', a, b))),
(('feq', ('fadd', ('b2f', 'a@1'), ('fneg', ('b2f', 'b@1'))), 0.0), ('ieq', a, b)),
(('feq', ('b2f', 'a@1') , ('b2f', 'b@1') ), ('ieq', a, b)),
(('feq', ('fneg', ('b2f', 'a@1')), ('fneg', ('b2f', 'b@1'))), ('ieq', a, b)),
# -(b2f(a) + b2f(b)) < 0
# 0 < b2f(a) + b2f(b)
# 0 != b2f(a) + b2f(b) b2f must be 0 or 1, so the sum is non-negative
# a || b
(('flt', ('fneg', ('fadd', ('b2f', 'a@32'), ('b2f', 'b@32'))), 0.0), ('ior', a, b)),
(('flt', 0.0, ('fadd', ('b2f', 'a@32'), ('b2f', 'b@32'))), ('ior', a, b)),
(('flt', ('fneg', ('fadd', ('b2f', 'a@1'), ('b2f', 'b@1'))), 0.0), ('ior', a, b)),
(('flt', 0.0, ('fadd', ('b2f', 'a@1'), ('b2f', 'b@1'))), ('ior', a, b)),
# -(b2f(a) + b2f(b)) >= 0
# 0 >= b2f(a) + b2f(b)
# 0 == b2f(a) + b2f(b) b2f must be 0 or 1, so the sum is non-negative
# !(a || b)
(('fge', ('fneg', ('fadd', ('b2f', 'a@32'), ('b2f', 'b@32'))), 0.0), ('inot', ('ior', a, b))),
(('fge', 0.0, ('fadd', ('b2f', 'a@32'), ('b2f', 'b@32'))), ('inot', ('ior', a, b))),
(('fge', ('fneg', ('fadd', ('b2f', 'a@1'), ('b2f', 'b@1'))), 0.0), ('inot', ('ior', a, b))),
(('fge', 0.0, ('fadd', ('b2f', 'a@1'), ('b2f', 'b@1'))), ('inot', ('ior', a, b))),
# Some optimizations (below) convert things like (a < b || c < b) into
# (min(a, c) < b). However, this interfers with the previous optimizations
# that try to remove comparisons with negated sums of b2f. This just
# breaks that apart.
(('flt', ('fmin', c, ('fneg', ('fadd', ('b2f', 'a@32'), ('b2f', 'b@32')))), 0.0),
(('flt', ('fmin', c, ('fneg', ('fadd', ('b2f', 'a@1'), ('b2f', 'b@1')))), 0.0),
('ior', ('flt', c, 0.0), ('ior', a, b))),
(('~flt', ('fadd', a, b), a), ('flt', b, 0.0)),
......@@ -237,13 +237,13 @@ optimizations = [
# The fge in the second replacement is not a typo. I leave the proof that
# "fmin(-b2f(a), b) >= 0 <=> fmin(-b2f(a), b) == 0" as an exercise for the
# reader.
(('fge', ('fmin', ('fneg', ('b2f', 'a@32')), 'b@32'), 0.0), ('iand', ('inot', a), ('fge', b, 0.0))),
(('feq', ('fmin', ('fneg', ('b2f', 'a@32')), 'b@32'), 0.0), ('iand', ('inot', a), ('fge', b, 0.0))),
(('fge', ('fmin', ('fneg', ('b2f', 'a@1')), 'b@1'), 0.0), ('iand', ('inot', a), ('fge', b, 0.0))),
(('feq', ('fmin', ('fneg', ('b2f', 'a@1')), 'b@1'), 0.0), ('iand', ('inot', a), ('fge', b, 0.0))),
(('feq', ('b2f', 'a@32'), 0.0), ('inot', a)),
(('fne', ('b2f', 'a@32'), 0.0), a),
(('ieq', ('b2i', 'a@32'), 0), ('inot', a)),
(('ine', ('b2i', 'a@32'), 0), a),
(('feq', ('b2f', 'a@1'), 0.0), ('inot', a)),
(('fne', ('b2f', 'a@1'), 0.0), a),
(('ieq', ('b2i', 'a@1'), 0), ('inot', a)),
(('ine', ('b2i', 'a@1'), 0), a),
(('fne', ('u2f', a), 0.0), ('ine', a, 0)),
(('feq', ('u2f', a), 0.0), ('ieq', a, 0)),
......@@ -277,10 +277,10 @@ optimizations = [
# 0.0 >= fabs(a)
(('fge', ('fneg', ('fabs', a)), 0.0), ('feq', a, 0.0)),
(('fmax', ('b2f(is_used_once)', 'a@32'), ('b2f', 'b@32')), ('b2f', ('ior', a, b))),
(('fmax', ('fneg(is_used_once)', ('b2f(is_used_once)', 'a@32')), ('fneg', ('b2f', 'b@32'))), ('fneg', ('b2f', ('ior', a, b)))),
(('fmin', ('b2f(is_used_once)', 'a@32'), ('b2f', 'b@32')), ('b2f', ('iand', a, b))),
(('fmin', ('fneg(is_used_once)', ('b2f(is_used_once)', 'a@32')), ('fneg', ('b2f', 'b@32'))), ('fneg', ('b2f', ('iand', a, b)))),
(('fmax', ('b2f(is_used_once)', 'a@1'), ('b2f', 'b@1')), ('b2f', ('ior', a, b))),
(('fmax', ('fneg(is_used_once)', ('b2f(is_used_once)', 'a@1')), ('fneg', ('b2f', 'b@1'))), ('fneg', ('b2f', ('ior', a, b)))),
(('fmin', ('b2f(is_used_once)', 'a@1'), ('b2f', 'b@1')), ('b2f', ('iand', a, b))),
(('fmin', ('fneg(is_used_once)', ('b2f(is_used_once)', 'a@1')), ('fneg', ('b2f', 'b@1'))), ('fneg', ('b2f', ('iand', a, b)))),
# fmin(b2f(a), b)
# bcsel(a, fmin(b2f(a), b), fmin(b2f(a), b))
......@@ -289,7 +289,7 @@ optimizations = [
#
# Since b is a constant, constant folding will eliminate the fmin and the
# fmax. If b is > 1.0, the bcsel will be replaced with a b2f.
(('fmin', ('b2f', 'a@32'), '#b'), ('bcsel', a, ('fmin', b, 1.0), ('fmin', b, 0.0))),
(('fmin', ('b2f', 'a@1'), '#b'), ('bcsel', a, ('fmin', b, 1.0), ('fmin', b, 0.0))),
(('flt', ('fadd(is_used_once)', a, ('fneg', b)), 0.0), ('flt', a, b)),
......@@ -438,14 +438,14 @@ optimizations = [
(('fne', ('fneg', a), a), ('fne', a, 0.0)),
(('feq', ('fneg', a), a), ('feq', a, 0.0)),
# Emulating booleans
(('imul', ('b2i', 'a@32'), ('b2i', 'b@32')), ('b2i', ('iand', a, b))),
(('fmul', ('b2f', 'a@32'), ('b2f', 'b@32')), ('b2f', ('iand', a, b))),
(('fsat', ('fadd', ('b2f', 'a@32'), ('b2f', 'b@32'))), ('b2f', ('ior', a, b))),
(('imul', ('b2i', 'a@1'), ('b2i', 'b@1')), ('b2i', ('iand', a, b))),
(('fmul', ('b2f', 'a@1'), ('b2f', 'b@1')), ('b2f', ('iand', a, b))),
(('fsat', ('fadd', ('b2f', 'a@1'), ('b2f', 'b@1'))), ('b2f', ('ior', a, b))),
(('iand', 'a@bool32', 1.0), ('b2f', a)),
# True/False are ~0 and 0 in NIR. b2i of True is 1, and -1 is ~0 (True).
(('ineg', ('b2i32', 'a@32')), a),
(('flt', ('fneg', ('b2f', 'a@32')), 0), a), # Generated by TGSI KILL_IF.
(('flt', ('fsub', 0.0, ('b2f', 'a@32')), 0), a), # Generated by TGSI KILL_IF.
(('flt', ('fneg', ('b2f', 'a@1')), 0), a), # Generated by TGSI KILL_IF.
(('flt', ('fsub', 0.0, ('b2f', 'a@1')), 0), a), # Generated by TGSI KILL_IF.
# Comparison with the same args. Note that these are not done for
# the float versions because NaN always returns false on float
# inequalities.
......@@ -536,14 +536,13 @@ optimizations = [
# Conversions
(('i2b32', ('b2i', 'a@32')), a),
(('i2b32', 'a@bool'), a),
(('f2i', ('ftrunc', a)), ('f2i', a)),
(('f2u', ('ftrunc', a)), ('f2u', a)),
(('i2b', ('ineg', a)), ('i2b', a)),
(('i2b', ('iabs', a)), ('i2b', a)),
(('fabs', ('b2f', a)), ('b2f', a)),
(('iabs', ('b2i', a)), ('b2i', a)),
(('inot', ('f2b32', a)), ('feq', a, 0.0)),
(('inot', ('f2b1', a)), ('feq', a, 0.0)),
# Ironically, mark these as imprecise because removing the conversions may
# preserve more precision than doing the conversions (e.g.,
......@@ -917,8 +916,8 @@ late_optimizations = [
(('fdot4', a, b), ('fdot_replicated4', a, b), 'options->fdot_replicates'),
(('fdph', a, b), ('fdph_replicated', a, b), 'options->fdot_replicates'),
(('b2f(is_used_more_than_once)', ('inot', 'a@32')), ('bcsel', a, 0.0, 1.0)),
(('fneg(is_used_more_than_once)', ('b2f', ('inot', 'a@32'))), ('bcsel', a, -0.0, -1.0)),
(('b2f(is_used_more_than_once)', ('inot', 'a@1')), ('bcsel', a, 0.0, 1.0)),
(('fneg(is_used_more_than_once)', ('b2f', ('inot', 'a@1'))), ('bcsel', a, -0.0, -1.0)),
# we do these late so that we don't get in the way of creating ffmas
(('fmin', ('fadd(is_used_once)', '#c', a), ('fadd(is_used_once)', '#c', b)), ('fadd', c, ('fmin', a, b))),
......
  • Steven Newbury
    @sjnewbury started a thread
    Last updated by Steven Newbury
    • Hi @jekstrand

      I've been bisecting a severe rendering bug with Elite Dangerous running under DXVK and Mesa-19.x, it was fine with Mesa-18.3. It came to this commit, which makes sense that it could cause a problem with booleans from D3D, but I'm surprised it would have gone unnoticed until now, especially since the following commit optimizes that specific case!

    • I'll admit it's a long time for such a bug to sit around but it's entirely possible. This may have suddenly enabled new optimizations that weren't happening before that are somehow causing a problem. If you know what shader is misrendering and can get a diff between this commit and the prvious one it might help figure out what's going wrong.

    • I'm more than happy to help find out, but I will need some pointers how I to capture shaders and work out which is broken. Visually, it looks like it could be the shader providing the ambient lighting/colouration from the local stars, it instead produces a posterised effect.

    • Is there a bugzilla bug for this? That'd be a better place to have this discussion than some random GitLab commit.

    • I made a comment on the bugs for ED on the Proton and DXVK bug trackers, but I haven't created a mesa one since I've been working to find out where there bug was coming from first, once I found it worked with older Mesa I just started bisecting and left the comment here once I found it. Shall I do so now?

    • Yeah, that'd be a good idea.

    • Bugzilla

      I've attached screenshots to the bug.

    • Easy bug number to remember! :-)

    • I don't know if it's sufficient, but on advise from doitsujin (the DXVK lead) I've uploaded RenderDoc captures before and after the commit to the bugzilla bug.

    Please register or sign in to reply
  • mentioned in issue #867 (closed)

    Toggle commit list
  • Ian Romanick @idr

    mentioned in merge request !6358 (merged)

    ·

    mentioned in merge request !6358 (merged)

    Toggle commit list
  • mentioned in commit 72233905

    Toggle commit list
Supports Markdown
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment