Commit 36ee55ff authored by Connor Abbott's avatar Connor Abbott

bifrost: Add support for min/max mode and rounding mode

We split the original TwoSrcFmod category in two, since the two unknown
bits can mean two different things. They either specify the rounding
mode or the min/max mode. The nan_wins mode is used for implementing
exp() and exp2(), although the other 2 non-default modes are sadly
unused by the compiler. The different rounding modes aren't used for GL,
although apparently they can be used in OpenCL.
parent d56feffb
......@@ -260,6 +260,61 @@ static void DumpOutputMod(unsigned mod)
}
}
static void DumpMinMaxMode(unsigned mod)
{
switch (mod) {
case 0:
/* Same as fmax() and fmin() -- return the other number if any
* number is NaN. Also always return +0 if one argument is +0 and
* the other is -0.
*/
break;
case 1:
/* Instead of never returning a NaN, always return one. The
* "greater"/"lesser" NaN is always returned, first by checking the
* sign and then the mantissa bits.
*/
printf(".nan_wins"); break;
case 2:
/* For max, implement src0 > src1 ? src0 : src1
* For min, implement src0 < src1 ? src0 : src1
*
* This includes handling NaN's and signedness of 0 differently
* from above, since +0 and -0 compare equal and comparisons always
* return false for NaN's. As a result, this mode is *not*
* commutative.
*/
printf(".src1_wins"); break;
case 3:
/* For max, implement src0 < src1 ? src1 : src0
* For min, implement src0 > src1 ? src1 : src0
*/
printf(".src0_wins"); break;
default:
break;
}
}
static void DumpRoundMode(unsigned mod)
{
switch (mod) {
case 0:
/* roundTiesToEven, the IEEE default. */
break;
case 1:
/* roundTowardPositive in the IEEE spec. */
printf(".round_pos"); break;
case 2:
/* roundTowardNegative in the IEEE spec. */
printf(".round_neg"); break;
case 3:
/* roundTowardZero in the IEEE spec. */
printf(".round_zero"); break;
default:
break;
}
}
struct FMA {
uint64_t src0 : 3;
uint64_t op : 20;
......@@ -268,13 +323,15 @@ struct FMA {
enum FMASrcType {
FMAOneSrc,
FMATwoSrc,
FMATwoSrcFmod,
FMATwoSrcFmod16,
FMAFAdd,
FMAFMinMax,
FMAFAdd16,
FMAFMinMax16,
FMAFcmp,
FMAFcmp16,
FMAThreeSrc,
FMAThreeSrcFmod,
FMAThreeSrcFmod16,
FMAFMA,
FMAFMA16,
FMAFourSrc,
};
......@@ -285,9 +342,9 @@ struct FMAOpInfo {
};
static const FMAOpInfo FMAOpInfos[] = {
{ 0x00000, "FMA.f32", FMAThreeSrcFmod },
{ 0x40000, "MAX.f32", FMATwoSrcFmod },
{ 0x44000, "MIN.f32", FMATwoSrcFmod },
{ 0x00000, "FMA.f32", FMAFMA },
{ 0x40000, "MAX.f32", FMAFMinMax },
{ 0x44000, "MIN.f32", FMAFMinMax },
{ 0x48000, "FCMP.GL", FMAFcmp },
{ 0x4c000, "FCMP.D3D", FMAFcmp },
{ 0x4ff98, "ADD.i32", FMATwoSrc },
......@@ -301,7 +358,7 @@ static const FMAOpInfo FMAOpInfos[] = {
// compute FMA of first three sources, then add the fourth argument to the
// scale (modify scale)
{ 0x54000, "FMA_MSCALE", FMAFourSrc },
{ 0x58000, "ADD.f32", FMATwoSrcFmod },
{ 0x58000, "ADD.f32", FMAFAdd },
{ 0x5c000, "CSEL.FEQ.f32", FMAFourSrc },
{ 0x5c200, "CSEL.FGT.f32", FMAFourSrc },
{ 0x5c400, "CSEL.FGE.f32", FMAFourSrc },
......@@ -358,9 +415,9 @@ static const FMAOpInfo FMAOpInfos[] = {
{ 0x66a00, "ARSHIFT_ADD.i32", FMAThreeSrc },
{ 0x66e00, "ARSHIFT_SUB.i32", FMAThreeSrc },
{ 0x67200, "ARSHIFT_RSUB.i32", FMAThreeSrc },
{ 0x80000, "FMA.v2f16", FMAThreeSrcFmod16 },
{ 0xc0000, "MAX.v2f16", FMATwoSrcFmod16 },
{ 0xc4000, "MIN.v2f16", FMATwoSrcFmod16 },
{ 0x80000, "FMA.v2f16", FMAFMA16 },
{ 0xc0000, "MAX.v2f16", FMAFMinMax16 },
{ 0xc4000, "MIN.v2f16", FMAFMinMax16 },
{ 0xc8000, "FCMP.GL", FMAFcmp16 },
{ 0xcc000, "FCMP.D3D", FMAFcmp16 },
{ 0xcf900, "ADD.v2i16", FMATwoSrc },
......@@ -369,7 +426,7 @@ static const FMAOpInfo FMAOpInfos[] = {
{ 0xcfd90, "ADD.i32.u16.X", FMATwoSrc },
{ 0xcfdc0, "ADD.i32.i16.Y", FMATwoSrc },
{ 0xcfdd0, "ADD.i32.u16.Y", FMATwoSrc },
{ 0xd8000, "ADD.v2f16", FMATwoSrcFmod16 },
{ 0xd8000, "ADD.v2f16", FMAFAdd16 },
{ 0xdc000, "CSEL.FEQ.v2f16", FMAFourSrc },
{ 0xdc200, "CSEL.FGT.v2f16", FMAFourSrc },
{ 0xdc400, "CSEL.FGE.v2f16", FMAFourSrc },
......@@ -468,12 +525,14 @@ static FMAOpInfo findFMAOpInfo(unsigned op)
case FMAThreeSrc:
opCmp = op & ~0x3f;
break;
case FMATwoSrcFmod:
case FMATwoSrcFmod16:
case FMAFAdd:
case FMAFMinMax:
case FMAFAdd16:
case FMAFMinMax16:
opCmp = op & ~0x3fff;
break;
case FMAThreeSrcFmod:
case FMAThreeSrcFmod16:
case FMAFMA:
case FMAFMA16:
opCmp = op & ~0x3ffff;
break;
case FMAFourSrc:
......@@ -577,12 +636,27 @@ static void DumpFMA(uint64_t word, Regs regs, Regs nextRegs, uint64_t *consts)
FMAOpInfo info = findFMAOpInfo(FMA.op);
printf("%s", info.name);
if (info.srcType == FMATwoSrcFmod ||
info.srcType == FMAThreeSrcFmod ||
info.srcType == FMATwoSrcFmod16 ||
info.srcType == FMAThreeSrcFmod16) {
// output modifiers
if (info.srcType == FMAFAdd ||
info.srcType == FMAFMinMax ||
info.srcType == FMAFMA ||
info.srcType == FMAFAdd16 ||
info.srcType == FMAFMinMax16 ||
info.srcType == FMAFMA16) {
DumpOutputMod(bits(FMA.op, 12, 14));
switch (info.srcType) {
case FMAFAdd:
case FMAFMA:
case FMAFAdd16:
case FMAFMA16:
DumpRoundMode(bits(FMA.op, 10, 12));
break;
case FMAFMinMax:
case FMAFMinMax16:
DumpMinMaxMode(bits(FMA.op, 10, 12));
break;
default:
assert(0);
}
} else if (info.srcType == FMAFcmp || info.srcType == FMAFcmp16) {
DumpFCMP(bits(FMA.op, 10, 13));
if (info.srcType == FMAFcmp)
......@@ -609,7 +683,8 @@ static void DumpFMA(uint64_t word, Regs regs, Regs nextRegs, uint64_t *consts)
printf(", ");
DumpSrc(FMA.op & 0x7, regs, consts, true);
break;
case FMATwoSrcFmod:
case FMAFAdd:
case FMAFMinMax:
if (FMA.op & 0x10)
printf("-");
if (FMA.op & 0x200)
......@@ -628,7 +703,8 @@ static void DumpFMA(uint64_t word, Regs regs, Regs nextRegs, uint64_t *consts)
if (FMA.op & 0x8)
printf(")");
break;
case FMATwoSrcFmod16: {
case FMAFAdd16:
case FMAFMinMax16: {
bool abs1 = FMA.op & 0x8;
bool abs2 = (FMA.op & 0x7) < FMA.src0;
if (FMA.op & 0x10)
......@@ -683,7 +759,7 @@ static void DumpFMA(uint64_t word, Regs regs, Regs nextRegs, uint64_t *consts)
printf(", ");
DumpSrc((FMA.op >> 3) & 0x7, regs, consts, true);
break;
case FMAThreeSrcFmod:
case FMAFMA:
if (FMA.op & (1 << 14))
printf("-");
if (FMA.op & (1 << 9))
......@@ -708,7 +784,7 @@ static void DumpFMA(uint64_t word, Regs regs, Regs nextRegs, uint64_t *consts)
if (FMA.op & (1 << 17))
printf(")");
break;
case FMAThreeSrcFmod16:
case FMAFMA16:
if (FMA.op & (1 << 14))
printf("-");
DumpSrc(FMA.src0, regs, consts, true);
......@@ -743,9 +819,10 @@ struct ADD {
enum ADDSrcType {
ADDOneSrc,
ADDTwoSrc,
ADDTwoSrcFmod,
ADDTwoSrcFmod16,
ADDTwoSrcFmod16Commutative,
ADDFAdd,
ADDFMinMax,
ADDFAdd16,
ADDFMinMax16,
ADDThreeSrc,
ADDFcmp,
ADDFcmp16,
......@@ -766,9 +843,9 @@ struct ADDOpInfo {
};
static const ADDOpInfo ADDOpInfos[] = {
{ 0x00000, "MAX.f32", ADDTwoSrcFmod },
{ 0x02000, "MIN.f32", ADDTwoSrcFmod },
{ 0x04000, "ADD.f32", ADDTwoSrcFmod },
{ 0x00000, "MAX.f32", ADDFMinMax },
{ 0x02000, "MIN.f32", ADDFMinMax },
{ 0x04000, "ADD.f32", ADDFAdd },
{ 0x06000, "FCMP.GL", ADDFcmp },
{ 0x07000, "FCMP.D3D", ADDFcmp },
{ 0x07856, "F16_TO_I16", ADDOneSrc },
......@@ -875,9 +952,9 @@ static const ADDOpInfo ADDOpInfos[] = {
{ 0x0f6d0, "UCMP.D3D.GT", ADDTwoSrc },
{ 0x0f6d8, "UCMP.D3D.GE", ADDTwoSrc },
{ 0x0f6e0, "ICMP.D3D.EQ", ADDTwoSrc },
{ 0x10000, "MAX.v2f16", ADDTwoSrcFmod16Commutative },
{ 0x12000, "MIN.v2f16", ADDTwoSrcFmod16Commutative },
{ 0x14000, "ADD.v2f16", ADDTwoSrcFmod16 },
{ 0x10000, "MAX.v2f16", ADDFMinMax16 },
{ 0x12000, "MIN.v2f16", ADDFMinMax16 },
{ 0x14000, "ADD.v2f16", ADDFAdd16 },
{ 0x17000, "FCMP.D3D", ADDFcmp16 },
{ 0x178c0, "ADD.i32", ADDTwoSrc },
{ 0x17900, "ADD.v2i16", ADDTwoSrc },
......@@ -962,11 +1039,12 @@ static ADDOpInfo findADDOpInfo(unsigned op)
case ADDTex:
opCmp = op & ~0xf;
break;
case ADDTwoSrcFmod:
case ADDTwoSrcFmod16:
case ADDFAdd:
case ADDFMinMax:
case ADDFAdd16:
opCmp = op & ~0x1fff;
break;
case ADDTwoSrcFmod16Commutative:
case ADDFMinMax16:
opCmp = op & ~0xfff;
break;
case ADDFcmp:
......@@ -1078,9 +1156,13 @@ static void DumpADD(uint64_t word, Regs regs, Regs nextRegs, uint64_t *consts,
printf("%s", info.name);
// float16 seems like it doesn't support output modifiers
if (info.srcType == ADDTwoSrcFmod) {
if (info.srcType == ADDFAdd || info.srcType == ADDFMinMax) {
// output modifiers
DumpOutputMod(bits(ADD.op, 8, 10));
if (info.srcType == ADDFAdd)
DumpRoundMode(bits(ADD.op, 10, 12));
else
DumpMinMaxMode(bits(ADD.op, 10, 12));
} else if (info.srcType == ADDFcmp || info.srcType == ADDFcmp16) {
DumpFCMP(bits(ADD.op, 3, 6));
if (info.srcType == ADDFcmp)
......@@ -1384,7 +1466,8 @@ static void DumpADD(uint64_t word, Regs regs, Regs nextRegs, uint64_t *consts,
printf(", ");
DumpSrc((ADD.op >> 3) & 0x7, regs, consts, false);
break;
case ADDTwoSrcFmod:
case ADDFAdd:
case ADDFMinMax:
if (ADD.op & 0x10)
printf("-");
if (ADD.op & 0x1000)
......@@ -1422,7 +1505,7 @@ static void DumpADD(uint64_t word, Regs regs, Regs nextRegs, uint64_t *consts,
if (ADD.op & 0x8)
printf(")");
break;
case ADDTwoSrcFmod16:
case ADDFAdd16:
if (ADD.op & 0x10)
printf("-");
if (ADD.op & 0x1000)
......@@ -1441,7 +1524,7 @@ static void DumpADD(uint64_t word, Regs regs, Regs nextRegs, uint64_t *consts,
if (ADD.op & 0x8)
printf(")");
break;
case ADDTwoSrcFmod16Commutative: {
case ADDFMinMax16: {
bool abs1 = ADD.op & 0x8;
bool abs2 = (ADD.op & 0x7) < ADD.src0;
if (ADD.op & 0x10)
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment