Commit 71ef0ab6 authored by Connor Abbott's avatar Connor Abbott

bifrost: Add FMA_MSCALE and ADD_MSCALE opcodes

Used mainly for implementing special functions, although they can be
used to optimize post-multiplication by a constant power of two.
parent 36ee55ff
......@@ -333,6 +333,7 @@ enum FMASrcType {
FMAFMA,
FMAFMA16,
FMAFourSrc,
FMAFMAMscale,
};
struct FMAOpInfo {
......@@ -350,14 +351,7 @@ static const FMAOpInfo FMAOpInfos[] = {
{ 0x4ff98, "ADD.i32", FMATwoSrc },
{ 0x4ffd8, "SUB.i32", FMATwoSrc },
{ 0x4fff0, "SUBB.i32", FMATwoSrc },
// compute FMA of first three sources, then set exponent to the fourth
// source (as an integer).
{ 0x50000, "FMA_RSCALE", FMAFourSrc },
// Seems to compute src2 - src0 * src1... why don't they just use FMA?
{ 0x528c0, "FRCP_PT3", FMAThreeSrc },
// compute FMA of first three sources, then add the fourth argument to the
// scale (modify scale)
{ 0x54000, "FMA_MSCALE", FMAFourSrc },
{ 0x50000, "FMA_MSCALE", FMAFMAMscale },
{ 0x58000, "ADD.f32", FMAFAdd },
{ 0x5c000, "CSEL.FEQ.f32", FMAFourSrc },
{ 0x5c200, "CSEL.FGT.f32", FMAFourSrc },
......@@ -538,6 +532,9 @@ static FMAOpInfo findFMAOpInfo(unsigned op)
case FMAFourSrc:
opCmp = op & ~0x1ff;
break;
case FMAFMAMscale:
opCmp = op & ~0x7fff;
break;
}
if (FMAOpInfos[i].op == opCmp)
return FMAOpInfos[i];
......@@ -663,6 +660,32 @@ static void DumpFMA(uint64_t word, Regs regs, Regs nextRegs, uint64_t *consts)
printf(".f32");
else
printf(".v2f16");
} else if (info.srcType == FMAFMAMscale) {
if (FMA.op & (1 << 11)) {
switch ((FMA.op >> 9) & 0x3) {
case 0:
/* This mode seems to do a few things:
* - Makes 0 * infinity (and incidentally 0 * nan) return 0,
* since generating a nan would poison the result of
* 1/infinity and 1/0.
* - Fiddles with which nan is returned in nan * nan,
* presumably to make sure that the same exact nan is
* returned for 1/nan.
*/
printf(".rcp_mode");
break;
case 3:
/* Similar to the above, but src0 always wins when multiplying
* 0 by infinity.
*/
printf(".sqrt_mode");
break;
default:
printf(".unk%d_mode", (int) (FMA.op >> 9) & 0x3);
}
} else {
DumpOutputMod(bits(FMA.op, 9, 11));
}
}
printf(" ");
......@@ -807,6 +830,23 @@ static void DumpFMA(uint64_t word, Regs regs, Regs nextRegs, uint64_t *consts)
printf(", ");
DumpSrc((FMA.op >> 6) & 0x7, regs, consts, true);
break;
case FMAFMAMscale:
if (FMA.op & (1 << 12))
printf("abs(");
DumpSrc(FMA.src0, regs, consts, true);
if (FMA.op & (1 << 12))
printf(")");
printf(", ");
if (FMA.op & (1 << 13))
printf("-");
DumpSrc(FMA.op & 0x7, regs, consts, true);
printf(", ");
if (FMA.op & (1 << 14))
printf("-");
DumpSrc((FMA.op >> 3) & 0x7, regs, consts, true);
printf(", ");
DumpSrc((FMA.op >> 6) & 0x7, regs, consts, true);
break;
}
printf("\n");
}
......@@ -824,6 +864,7 @@ enum ADDSrcType {
ADDFAdd16,
ADDFMinMax16,
ADDThreeSrc,
ADDFAddMscale,
ADDFcmp,
ADDFcmp16,
ADDTexCompact, // texture instruction with embedded sampler
......@@ -953,6 +994,7 @@ static const ADDOpInfo ADDOpInfos[] = {
{ 0x0f6d8, "UCMP.D3D.GE", ADDTwoSrc },
{ 0x0f6e0, "ICMP.D3D.EQ", ADDTwoSrc },
{ 0x10000, "MAX.v2f16", ADDFMinMax16 },
{ 0x11000, "ADD_MSCALE.f32", ADDFAddMscale },
{ 0x12000, "MIN.v2f16", ADDFMinMax16 },
{ 0x14000, "ADD.v2f16", ADDFAdd16 },
{ 0x17000, "FCMP.D3D", ADDFcmp16 },
......@@ -1045,6 +1087,7 @@ static ADDOpInfo findADDOpInfo(unsigned op)
opCmp = op & ~0x1fff;
break;
case ADDFMinMax16:
case ADDFAddMscale:
opCmp = op & ~0xfff;
break;
case ADDFcmp:
......@@ -1169,6 +1212,21 @@ static void DumpADD(uint64_t word, Regs regs, Regs nextRegs, uint64_t *consts,
printf(".f32");
else
printf(".v2f16");
} else if (info.srcType == ADDFAddMscale) {
switch ((ADD.op >> 6) & 0x7) {
case 0: break;
// causes GPU hangs on G71
case 1: printf(".invalid"); break;
// Same as usual outmod value.
case 2: printf(".clamp_0_1"); break;
// If src0 is infinite or NaN, flush it to zero so that the other
// source is passed through unmodified.
case 3: printf(".flush_src0_inf_nan"); break;
// Vice versa.
case 4: printf(".flush_src1_inf_nan"); break;
// Every other case seems to behave the same as the above?
default: printf(".unk%d", (ADD.op >> 6) & 0x7); break;
}
} else if (info.srcType == ADDVaryingInterp) {
if (ADD.op & 0x200)
printf(".reuse");
......@@ -1546,6 +1604,26 @@ static void DumpADD(uint64_t word, Regs regs, Regs nextRegs, uint64_t *consts,
printf(")");
break;
}
case ADDFAddMscale: {
if (ADD.op & 0x400)
printf("-");
if (ADD.op & 0x200)
printf("abs(");
DumpSrc(ADD.src0, regs, consts, false);
if (ADD.op & 0x200)
printf(")");
printf(", ");
if (ADD.op & 0x800)
printf("-");
DumpSrc(ADD.op & 0x7, regs, consts, false);
printf(", ");
DumpSrc((ADD.op >> 3) & 0x7, regs, consts, false);
break;
}
case ADDFcmp:
if (ADD.op & 0x400) {
printf("-");
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment