Commit 02dfc0c9 authored by Connor Abbott's avatar Connor Abbott

bifrost: Decode 64-bit additions better

parent 6fd32829
......@@ -334,11 +334,12 @@ enum FMASrcType {
FMAFMA16,
FMAFourSrc,
FMAFMAMscale,
FMAShiftAdd64,
};
struct FMAOpInfo {
unsigned op;
char name[20];
char name[30];
FMASrcType srcType;
};
......@@ -497,15 +498,70 @@ static const FMAOpInfo FMAOpInfos[] = {
{ 0xe1885, "FLOOR", FMAOneSrc },
{ 0xe19b0, "ATAN_LDEXP.Y.f32", FMATwoSrc },
{ 0xe19b8, "ATAN_LDEXP.X.f32", FMATwoSrc },
// This acts like a normal 32-bit add, except that it sets a flag on
// overflow that gets listened to by load/store instructions in the ADD
// part of the instruction, and added appropriately to the upper 32 bits of
// the address. It lets you efficiently add a 32-bit offset to a 64-bit
// pointer when loading/storing.
{ 0xe1c80, "ADD_ADDR", FMATwoSrc },
// Similar to the above, but used for normal additions (paired with
// ADD_HIGH32 in the ADD slot to do 64-bit addition).
{ 0xe1cc0, "ADD_LOW32", FMATwoSrc },
// These instructions in the FMA slot, together with LSHIFT_ADD_HIGH32.i32
// in the ADD slot, allow one to do a 64-bit addition with an extra small
// shift on one of the sources. There are three possible scenarios:
//
// 1) Full 64-bit addition. Do:
// out.x = LSHIFT_ADD_LOW32.i64 src1.x, src2.x, shift
// out.y = LSHIFT_ADD_HIGH32.i32 src1.y, src2.y
//
// The shift amount is applied to src2 before adding. The shift amount, and
// any extra bits from src2 plus the overflow bit, are sent directly from
// FMA to ADD instead of being passed explicitly. Hence, these two must be
// bundled together into the same instruction.
//
// 2) Add a 64-bit value src1 to a zero-extended 32-bit value src2. Do:
// out.x = LSHIFT_ADD_LOW32.u32 src1.x, src2, shift
// out.y = LSHIFT_ADD_HIGH32.i32 src1.x, 0
//
// Note that in this case, the second argument to LSHIFT_ADD_HIGH32 is
// ignored, so it can actually be anything. As before, the shift is applied
// to src2 before adding.
//
// 3) Add a 64-bit value to a sign-extended 32-bit value src2. Do:
// out.x = LSHIFT_ADD_LOW32.i32 src1.x, src2, shift
// out.y = LSHIFT_ADD_HIGH32.i32 src1.x, 0
//
// The only difference is the .i32 instead of .u32. Otherwise, this is
// exactly the same as before.
//
// In all these instructions, the shift amount is stored where the third
// source would be, so the shift has to be a small immediate from 0 to 7.
// This is fine for the expected use-case of these instructions, which is
// manipulating 64-bit pointers.
//
// These instructions can also be combined with various load/store
// instructions which normally take a 64-bit pointer in order to add a
// 32-bit or 64-bit offset to the pointer before doing the operation,
// optionally shifting the offset. The load/store op implicity does
// LSHIFT_ADD_HIGH32.i32 internally. Letting ptr be the pointer, and offset
// the desired offset, the cases go as follows:
//
// 1) Add a 64-bit offset:
// LSHIFT_ADD_LOW32.i64 ptr.x, offset.x, shift
// ld_st_op ptr.y, offset.y, ...
//
// Note that the output of LSHIFT_ADD_LOW32.i64 is not used, instead being
// implicitly sent to the load/store op to serve as the low 32 bits of the
// pointer.
//
// 2) Add a 32-bit unsigned offset:
// temp = LSHIFT_ADD_LOW32.u32 ptr.x, offset, shift
// ld_st_op temp, ptr.y, ...
//
// Now, the low 32 bits of offset << shift + ptr are passed explicitly to
// the ld_st_op, to match the case where there is no offset and ld_st_op is
// called directly.
//
// 3) Add a 32-bit signed offset:
// temp = LSHIFT_ADD_LOW32.i32 ptr.x, offset, shift
// ld_st_op temp, ptr.y, ...
//
// Again, the same as the unsigned case except for the offset.
{ 0xe1c80, "LSHIFT_ADD_LOW32.u32", FMAShiftAdd64 },
{ 0xe1cc0, "LSHIFT_ADD_LOW32.i64", FMAShiftAdd64 },
{ 0xe1d80, "LSHIFT_ADD_LOW32.i32", FMAShiftAdd64 },
{ 0xe1e00, "SEL.XX.i16", FMATwoSrc },
{ 0xe1e08, "SEL.YX.i16", FMATwoSrc },
{ 0xe1e10, "SEL.XY.i16", FMATwoSrc },
......@@ -532,6 +588,7 @@ static FMAOpInfo findFMAOpInfo(unsigned op)
opCmp = op & ~0x1fff;
break;
case FMAThreeSrc:
case FMAShiftAdd64:
opCmp = op & ~0x3f;
break;
case FMAFAdd:
......@@ -790,6 +847,13 @@ static void DumpFMA(uint64_t word, Regs regs, Regs nextRegs, uint64_t *consts)
DumpSrc(FMA.op & 0x7, regs, consts, true);
Dump16Swizzle((FMA.op >> 8) & 0x3);
break;
case FMAShiftAdd64:
DumpSrc(FMA.src0, regs, consts, true);
printf(", ");
DumpSrc(FMA.op & 0x7, regs, consts, true);
printf(", ");
printf("shift:%u", (FMA.op >> 3) & 0x7);
break;
case FMAThreeSrc:
DumpSrc(FMA.src0, regs, consts, true);
printf(", ");
......@@ -893,7 +957,7 @@ enum ADDSrcType {
struct ADDOpInfo {
unsigned op;
char name[20];
char name[30];
ADDSrcType srcType;
bool hasDataReg;
};
......@@ -953,7 +1017,7 @@ static const ADDOpInfo ADDOpInfos[] = {
{ 0x07bc5, "FLOG_FREXPE", ADDOneSrc },
{ 0x07d45, "CEIL", ADDOneSrc },
{ 0x07d85, "FLOOR", ADDOneSrc },
{ 0x07f18, "ADD_HIGH32", ADDTwoSrc },
{ 0x07f18, "LSHIFT_ADD_HIGH32.i32", ADDTwoSrc },
{ 0x08000, "LD_ATTR.f16", ADDLoadAttr, true },
{ 0x08100, "LD_ATTR.v2f16", ADDLoadAttr, true },
{ 0x08200, "LD_ATTR.v3f16", ADDLoadAttr, true },
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment