Commit a61c388d authored by Karol Herbst's avatar Karol Herbst 🐧
Browse files

nvc0/ir: replace cvt instructions with add to improve shader performance



gives me an performance boost of 0.2% in pixmark_piano on my gk106, gm204 and
gp107.

reduces the amount of generated convert instructions by roughly 30% in
shader-db.

v2: only for 32 bit operations
    move some common code out of the switch
    handle OP_SAT with modifiers
v3: only for registers and const memory
    rework if clauses
    merge isCvt into this patch
v4: merge isCvt into its use
Signed-off-by: Karol Herbst's avatarKarol Herbst <kherbst@redhat.com>
Reviewed-by: Ilia Mirkin's avatarIlia Mirkin <imirkin@alum.mit.edu>
parent a203eaa4
......@@ -719,6 +719,66 @@ NVC0LegalizePostRA::propagateJoin(BasicBlock *bb)
bb->remove(bb->getEntry());
}
// replaces instructions which would end up as f2f or i2i with faster
// alternatives:
// - fabs(a) -> fadd(0, abs a)
// - fneg(a) -> fadd(neg 0, neg a)
// - ineg(a) -> iadd(0, neg a)
// - fneg(abs a) -> fadd(neg 0, neg abs a)
// - sat(a) -> sat add(0, a)
void
NVC0LegalizePostRA::replaceCvt(Instruction *cvt)
{
if (!isFloatType(cvt->sType) && typeSizeof(cvt->sType) != 4)
return;
if (cvt->sType != cvt->dType)
return;
// we could make it work, but in this case we have optimizations disabled
// and we don't really care either way.
if (cvt->src(0).getFile() != FILE_GPR &&
cvt->src(0).getFile() != FILE_MEMORY_CONST)
return;
Modifier mod0, mod1;
switch (cvt->op) {
case OP_ABS:
if (cvt->src(0).mod)
return;
if (!isFloatType(cvt->sType))
return;
mod0 = 0;
mod1 = NV50_IR_MOD_ABS;
break;
case OP_NEG:
if (!isFloatType(cvt->sType) && cvt->src(0).mod)
return;
if (isFloatType(cvt->sType) &&
(cvt->src(0).mod && cvt->src(0).mod != Modifier(NV50_IR_MOD_ABS)))
return;
mod0 = isFloatType(cvt->sType) ? NV50_IR_MOD_NEG : 0;
mod1 = cvt->src(0).mod == Modifier(NV50_IR_MOD_ABS) ?
NV50_IR_MOD_NEG_ABS : NV50_IR_MOD_NEG;
break;
case OP_SAT:
if (!isFloatType(cvt->sType) && cvt->src(0).mod.abs())
return;
mod0 = 0;
mod1 = cvt->src(0).mod;
cvt->saturate = true;
break;
default:
return;
}
cvt->op = OP_ADD;
cvt->moveSources(0, 1);
cvt->setSrc(0, rZero);
cvt->src(0).mod = mod0;
cvt->src(1).mod = mod1;
}
bool
NVC0LegalizePostRA::visit(BasicBlock *bb)
{
......@@ -758,6 +818,9 @@ NVC0LegalizePostRA::visit(BasicBlock *bb)
next = hi;
}
if (i->op == OP_SAT || i->op == OP_NEG || i->op == OP_ABS)
replaceCvt(i);
if (i->op != OP_MOV && i->op != OP_PFETCH)
replaceZero(i);
}
......
......@@ -81,6 +81,7 @@ private:
virtual bool visit(Function *);
virtual bool visit(BasicBlock *);
void replaceCvt(Instruction *);
void replaceZero(Instruction *);
bool tryReplaceContWithBra(BasicBlock *);
void propagateJoin(BasicBlock *);
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment