Disasm.cpp 30.3 KB
Newer Older
1 2
#include "Disasm.h"
#include <stdio.h>
3 4 5
#include <inttypes.h>
#include <sstream>
#include <iomanip>
Connor Abbott's avatar
Connor Abbott committed
6
#include <vector>
7 8
#include <string>
#include <string.h>
9
#include <assert.h>
10

11
// return bits (high, lo]
Connor Abbott's avatar
Connor Abbott committed
12
static uint64_t bits(uint32_t word, unsigned lo, unsigned high)
13
{
14 15 16 17 18
	if (high == 32)
		return word >> lo;
	return (word & ((1 << high) - 1)) >> lo;
}

19
struct Regs {
20 21 22 23
	uint64_t uniformConst : 8;
	uint64_t reg2 : 6;
	uint64_t reg3 : 6;
	uint64_t reg0 : 5;
24 25
	uint64_t reg1 : 6;
	uint64_t ctrl : 4;
26 27
};

28 29 30 31 32 33 34 35 36 37
enum RegWriteUnit {
	RegWriteNone = 0, // don't write
	RegWrite2, // write using reg2
	RegWrite3, // write using reg3
};

// this represents the decoded version of the ctrl register field.
struct RegCtrl {
	bool readReg0;
	bool readReg1;
Connor Abbott's avatar
Connor Abbott committed
38
	bool readReg3;
39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57
	RegWriteUnit FMAWriteUnit;
	RegWriteUnit ADDWriteUnit;
	bool clauseStart;
};

static RegCtrl DecodeRegCtrl(Regs regs)
{
	RegCtrl decoded = {};
	unsigned ctrl;
	if (regs.ctrl == 0) {
		ctrl = regs.reg1 >> 2;
		decoded.readReg0 = !(regs.reg1 & 0x2);
		decoded.readReg1 = false;
	} else {
		ctrl = regs.ctrl;
		decoded.readReg0 = decoded.readReg1 = true;
	}
	switch (ctrl) {
		case 1:
Connor Abbott's avatar
Connor Abbott committed
58
			decoded.FMAWriteUnit = RegWrite2;
59 60
			break;
		case 3:
Connor Abbott's avatar
Connor Abbott committed
61 62
			decoded.FMAWriteUnit = RegWrite2;
			decoded.readReg3 = true;
63 64
			break;
		case 4:
Connor Abbott's avatar
Connor Abbott committed
65
			decoded.readReg3 = true;
66 67
			break;
		case 5:
Connor Abbott's avatar
Connor Abbott committed
68
			decoded.ADDWriteUnit = RegWrite2;
69 70
			break;
		case 6:
Connor Abbott's avatar
Connor Abbott committed
71 72
			decoded.ADDWriteUnit = RegWrite2;
			decoded.readReg3 = true;
73 74 75 76
			break;
		case 8:
			decoded.clauseStart = true;
			break;
77 78 79 80
		case 9:
			decoded.FMAWriteUnit = RegWrite2;
			decoded.clauseStart = true;
			break;
81 82 83
		case 11:
			break;
		case 12:
Connor Abbott's avatar
Connor Abbott committed
84
			decoded.readReg3 = true;
85
			decoded.clauseStart = true;
86
			break;
87 88 89
		case 13:
			decoded.ADDWriteUnit = RegWrite2;
			decoded.clauseStart = true;
90
			break;
91
		case 15:
92 93
			decoded.FMAWriteUnit = RegWrite3;
			decoded.ADDWriteUnit = RegWrite2;
94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116
			break;
		default:
			printf("# unknown reg ctrl %d\n", ctrl);
	}

	return decoded;
}

// Pass in the ADDWriteUnit or FMAWriteUnit, and this returns which register
// the ADD/FMA units are writing to
static unsigned GetRegToWrite(RegWriteUnit unit, Regs regs)
{
	switch (unit) {
		case RegWrite2:
			return regs.reg2;
		case RegWrite3:
			return regs.reg3;
		case RegWriteNone:
			assert(0);
	}
}

static void DumpRegs(Regs srcs)
117
{
118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141
	RegCtrl ctrl = DecodeRegCtrl(srcs);
	printf("# ");
	if (ctrl.readReg0) {
		unsigned reg0 = srcs.reg0;
		if (srcs.ctrl == 0)
			reg0 |= (srcs.reg1 & 1) << 5;
		printf("port 0: R%d ", srcs.reg0);
	}

	if (ctrl.readReg1)
		printf("port 1: R%d ", srcs.reg1);

	if (ctrl.FMAWriteUnit == RegWrite2)
		printf("port 2: R%d (write FMA) ", srcs.reg2);
	else if (ctrl.ADDWriteUnit == RegWrite2)
		printf("port 2: R%d (write ADD) ", srcs.reg2);

	if (ctrl.FMAWriteUnit == RegWrite3)
		printf("port 3: R%d (write FMA) ", srcs.reg3);
	else if (ctrl.ADDWriteUnit == RegWrite3)
		printf("port 3: R%d (write ADD) ", srcs.reg3);
	else if (ctrl.readReg3)
		printf("port 3: R%d (read) ", srcs.reg3);

142 143
	if (srcs.uniformConst) {
		if (srcs.uniformConst & 0x80) {
144
			printf("uniform: U%d", (srcs.uniformConst & 0x7f) * 2);
145 146
		}
	}
147 148

	printf("\n");
149 150
}

151 152 153 154 155 156 157 158 159 160
static void DumpConstImm(uint32_t imm)
{
	union {
		float f;
		uint32_t i;
	} fi;
	fi.i = imm;
	printf("%f (0x%08x)", fi.f, imm);
}

161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176
static uint64_t GetConst(uint64_t *consts, Regs srcs)
{
	unsigned low_bits = srcs.uniformConst & 0xf;
	uint64_t imm;
	switch (srcs.uniformConst >> 4) {
		case 4: imm = consts[0]; break;
		case 5: imm = consts[1]; break;
		case 6: imm = consts[2]; break;
		case 7: imm = consts[3]; break;
		case 2: imm = consts[4]; break;
		case 3: imm = consts[5]; break;
		default: assert(0); break;
	}
	return imm | low_bits;
}

177
static void DumpSrc(unsigned src, Regs srcs, uint64_t *consts, bool isFMA)
178 179
{
	switch (src) {
Connor Abbott's avatar
Connor Abbott committed
180 181 182 183 184 185 186
		case 0: {
			unsigned reg = srcs.reg0;
			if (srcs.ctrl == 0)
				reg |= (srcs.reg1 & 1) << 5;
			printf("R%d", reg);
			break;
		}
187
		case 1: printf("R%d", srcs.reg1); break;
Connor Abbott's avatar
Connor Abbott committed
188
		case 2: printf("R%d", srcs.reg3); break;
189
		case 3:
190
				if (isFMA)
191 192
					printf("0");
				else
193
					printf("T"); // i.e. the output of FMA this cycle
194 195 196 197 198
				break;
		case 4: {
			if (srcs.uniformConst & 0x80) {
				printf("U%d", (srcs.uniformConst & 0x7f) * 2);
			} else {
199 200 201
				unsigned low_bits = srcs.uniformConst & 0xf;
				uint32_t imm;
				bool valid = true;
202
				switch (srcs.uniformConst >> 4) {
203 204 205 206 207
					case 4: imm = ((uint32_t) consts[0]) | low_bits; break;
					case 5: imm = ((uint32_t) consts[1]) | low_bits; break;
					case 6: imm = ((uint32_t) consts[2]) | low_bits; break;
					case 7: imm = ((uint32_t) consts[3]) | low_bits; break;
					case 2: imm = ((uint32_t) consts[4]) | low_bits; break;
Connor Abbott's avatar
Connor Abbott committed
208
					case 3: imm = ((uint32_t) consts[5]) | low_bits; break;
209
					default: valid = false; break;
210
				}
211 212 213 214
				if (valid)
					DumpConstImm(imm);
				else
					printf("unkConstSrc");
215 216 217 218 219 220 221
			}
			break;
		}
		case 5: {
			if (srcs.uniformConst & 0x80) {
				printf("U%d", (srcs.uniformConst & 0x7f) * 2 + 1);
			} else {
222 223
				uint32_t imm;
				bool valid = true;
224
				switch (srcs.uniformConst >> 4) {
225 226 227 228 229
					case 4: imm = (uint32_t)(consts[0] >> 32); break;
					case 5: imm = (uint32_t)(consts[1] >> 32); break;
					case 6: imm = (uint32_t)(consts[2] >> 32); break;
					case 7: imm = (uint32_t)(consts[3] >> 32); break;
					case 2: imm = (uint32_t)(consts[4] >> 32); break;
Connor Abbott's avatar
Connor Abbott committed
230
					case 3: imm = (uint32_t)(consts[5] >> 32); break;
231
					default: valid = false; break;
232
				}
233 234 235 236
				if (valid)
					DumpConstImm(imm);
				else
					printf("unkConstSrc");
237 238 239 240 241 242 243 244 245 246 247 248 249 250
			}
			break;
		}
		case 6: printf("T0"); break;
		case 7: printf("T1"); break;
	}
}

static void DumpOutputMod(unsigned mod)
{
	switch (mod) {
		case 0:
			break;
		case 1:
251
			printf(".clamp_0_inf"); break; // max(out, 0)
252
		case 2:
253
			printf(".clamp_m1_1"); break; // clamp(out, -1, 1)
254
		case 3:
255
			printf(".clamp_0_1"); break; // clamp(out, 0, 1)
256 257 258 259 260
		default:
			break;
	}
}

261
struct FMA {
262
	uint64_t src0 : 3;
263
	uint64_t op : 20;
264 265
};

266 267 268 269
enum FMASrcType {
	FMAOneSrc,
	FMATwoSrc,
	FMATwoSrcFmod,
Connor Abbott's avatar
Connor Abbott committed
270
	FMAFcmp,
271 272 273
	FMAThreeSrc,
	FMAThreeSrcFmod,
	FMAFourSrc,
274 275
};

276
struct FMAOpInfo {
277
	unsigned op;
Connor Abbott's avatar
Connor Abbott committed
278
	char name[20];
279
	FMASrcType srcType;
280 281
};

282
static const FMAOpInfo FMAOpInfos[] = {
283 284 285 286 287 288 289 290
	{ 0x00000, "FMA",  FMAThreeSrcFmod },
	{ 0x40000, "FMAX", FMATwoSrcFmod },
	{ 0x44000, "FMIN", FMATwoSrcFmod },
	{ 0x48000, "FCMP.GL", FMAFcmp },
	{ 0x4c000, "FCMP.D3D", FMAFcmp },
	{ 0x4ff98, "ADD", FMATwoSrc },
	{ 0x4ffd8, "SUB", FMATwoSrc },
	{ 0x4fff0, "SUBB", FMATwoSrc },
291 292 293 294
	// compute FMA of first three sources, then set exponent to the fourth
	// source (as an integer).
	{ 0x50000, "FMA_RSCALE", FMAFourSrc },
	// Seems to compute src2 - src0 * src1... why don't they just use FMA?
Connor Abbott's avatar
Connor Abbott committed
295
	{ 0x528c0, "FRCP_PT3", FMAThreeSrc },
296 297 298
	// compute FMA of first three sources, then add the fourth argument to the
	// scale (modify scale)
	{ 0x54000, "FMA_MSCALE", FMAFourSrc },
299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343
	{ 0x58000, "FADD", FMATwoSrcFmod },
	{ 0x5c000, "CSEL.FEQ", FMAFourSrc },
	{ 0x5c200, "CSEL.FGT", FMAFourSrc },
	{ 0x5c400, "CSEL.FGE", FMAFourSrc },
	{ 0x5c600, "CSEL.IEQ", FMAFourSrc },
	{ 0x5c800, "CSEL.IGT", FMAFourSrc },
	{ 0x5ca00, "CSEL.IGE", FMAFourSrc },
	{ 0x5cc00, "CSEL.UGT", FMAFourSrc },
	{ 0x5ce00, "CSEL.UGE", FMAFourSrc },
	{ 0x5de40, "ICMP.GL.GT", FMATwoSrc }, // src0 > src1 ? 1 : 0
	{ 0x5de48, "ICMP.GL.GE", FMATwoSrc },
	{ 0x5de50, "UCMP.GL.GT", FMATwoSrc },
	{ 0x5de58, "UCMP.GL.GE", FMATwoSrc },
	{ 0x5de60, "ICMP.GL.EQ", FMATwoSrc },
	{ 0x5dec0, "ICMP.D3D.GT", FMATwoSrc }, // src0 > src1 ? ~0 : 0
	{ 0x5dec8, "ICMP.D3D.GE", FMATwoSrc },
	{ 0x5ded0, "UCMP.D3D.GT", FMATwoSrc },
	{ 0x5ded8, "UCMP.D3D.GE", FMATwoSrc },
	{ 0x5dee0, "ICMP.D3D.EQ", FMATwoSrc },
	{ 0x60200, "RSHIFT_NAND", FMAThreeSrc },
	{ 0x60e00, "RSHIFT_OR", FMAThreeSrc },
	{ 0x61200, "RSHIFT_AND", FMAThreeSrc },
	{ 0x61e00, "RSHIFT_NOR", FMAThreeSrc }, // ~((src0 << src2) | src1)
	{ 0x62200, "LSHIFT_NAND", FMAThreeSrc },
	{ 0x62e00, "LSHIFT_OR",  FMAThreeSrc }, // (src0 << src2) | src1
	{ 0x63200, "LSHIFT_AND", FMAThreeSrc }, // (src0 << src2) & src1
	{ 0x63e00, "LSHIFT_NOR", FMAThreeSrc },
	{ 0x64200, "RSHIFT_XOR", FMAThreeSrc },
	{ 0x64600, "RSHIFT_XNOR", FMAThreeSrc }, // ~((src0 >> src2) ^ src1)
	{ 0x64a00, "LSHIFT_XOR", FMAThreeSrc },
	{ 0x64e00, "LSHIFT_XNOR", FMAThreeSrc }, // ~((src0 >> src2) ^ src1)
	{ 0x65200, "LSHIFT_ADD", FMAThreeSrc },
	{ 0x65600, "LSHIFT_SUB", FMAThreeSrc }, // (src0 << src2) - src1
	{ 0x65a00, "LSHIFT_RSUB", FMAThreeSrc }, // src1 - (src0 << src2)
	{ 0x65e00, "RSHIFT_ADD", FMAThreeSrc },
	{ 0x66200, "RSHIFT_SUB", FMAThreeSrc },
	{ 0x66600, "RSHIFT_RSUB", FMAThreeSrc },
	{ 0x66a00, "ARSHIFT_ADD", FMAThreeSrc },
	{ 0x66e00, "ARSHIFT_SUB", FMAThreeSrc },
	{ 0x67200, "ARSHIFT_RSUB", FMAThreeSrc },
	{ 0xcfc10, "ADDC", FMATwoSrc },
	{ 0xe0136, "F2I", FMAOneSrc },
	{ 0xe0137, "F2U", FMAOneSrc },
	{ 0xe0178, "I2F", FMAOneSrc },
	{ 0xe0179, "U2F", FMAOneSrc },
344
	{ 0xe0199, "U32TOU16", FMAOneSrc }, // out = in & 0xffff
345
	{ 0xe032c, "NOP",  FMAOneSrc },
346
	{ 0xe032d, "MOV",  FMAOneSrc },
347 348 349 350 351 352 353 354 355 356 357 358
	// From the ARM patent US20160364209A1:
	// "Decompose v (the input) into numbers x1 and s such that v = x1 * 2^s,
	// and x1 is a floating point value in a predetermined range where the
	// value 1 is within the range and not at one extremity of the range (e.g.
	// choose a range where 1 is towards middle of range)."
	// 
	// This computes x1.
	{ 0xe0345, "LOG_FREXPM", FMAOneSrc },
	{ 0xe0365, "FRCP_TABLE", FMAOneSrc },
	// Compute required exponent for reciprocal (negate it, accounting for the offset.)
	{ 0xe038d, "FRCP_EXP", FMAOneSrc },
	{ 0xe03c5, "LOG_FREXPE", FMAOneSrc },
359 360 361 362
	{ 0xe0b80, "IMAX3", FMAThreeSrc },
	{ 0xe0bc0, "UMAX3", FMAThreeSrc },
	{ 0xe0c00, "IMIN3", FMAThreeSrc },
	{ 0xe0c40, "UMIN3", FMAThreeSrc },
363
	{ 0xe0f40, "CSEL", FMAThreeSrc }, // src2 != 0 ? src1 : src0
364 365
	{ 0xe1845, "CEIL", FMAOneSrc },
	{ 0xe1885, "FLOOR", FMAOneSrc },
366 367 368 369 370 371
	// This acts like a normal 32-bit add, except that it sets a flag on
	// overflow that gets listened to by load/store instructions in the ADD
	// part of the instruction, and added appropriately to the upper 32 bits of
	// the address. It lets you efficiently add a 32-bit offset to a 64-bit
	// pointer when loading/storing.
	{ 0xe1c80, "ADD_ADDR", FMATwoSrc },
372 373 374
	// Similar to the above, but used for normal additions (paired with
	// ADD_HIGH32 in the ADD slot to do 64-bit addition).
	{ 0xe1cc0, "ADD_LOW32", FMATwoSrc },
375 376
	{ 0xe7800, "IMAD", FMAThreeSrc },
	{ 0xe78db, "POPCNT", FMAOneSrc },
377 378 379 380
};

#define ARRAY_SIZE(x) (sizeof(x) / sizeof(x[0]))

381
static FMAOpInfo findFMAOpInfo(unsigned op)
382
{
383
	for (int i = 0; i < ARRAY_SIZE(FMAOpInfos); i++) {
384
		unsigned opCmp;
385 386
		switch (FMAOpInfos[i].srcType) {
			case FMAOneSrc:
387 388
				opCmp = op;
				break;
389
			case FMATwoSrc:
390
				opCmp = op & ~0x7;
391
				break;
Connor Abbott's avatar
Connor Abbott committed
392
			case FMAFcmp:
393
				opCmp = op & ~0x1fff;
Connor Abbott's avatar
Connor Abbott committed
394 395
				break;
			case FMAThreeSrc:
396
				opCmp = op & ~0x3f;
Connor Abbott's avatar
Connor Abbott committed
397
				break;
398
			case FMATwoSrcFmod:
399
				opCmp = op & ~0x3fff;
400
				break;
401
			case FMAThreeSrcFmod:
402
				opCmp = op & ~0x3ffff;
403
				break;
404
			case FMAFourSrc:
405
				opCmp = op & ~0x1ff;
Connor Abbott's avatar
Connor Abbott committed
406
				break;
407
		}
408 409
		if (FMAOpInfos[i].op == opCmp)
			return FMAOpInfos[i];
410 411
	}

412
	FMAOpInfo info;
413 414
	snprintf(info.name, sizeof(info.name), "op%04x", op);
	info.op = op;
415
	info.srcType = FMAThreeSrc;
416 417 418
	return info;
}

Connor Abbott's avatar
Connor Abbott committed
419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445
static void DumpFCMP(unsigned op)
{
		switch (op) {
			case 0:
				printf(".OEQ");
				break;
			case 1:
				printf(".OGT");
				break;
			case 2:
				printf(".OGE");
				break;
			case 3:
				printf(".UNE");
				break;
			case 4:
				printf(".OLT");
				break;
			case 5:
				printf(".OLE");
				break;
			default:
				printf(".unk%d", op);
				break;
		}
}

446
static void DumpFMA(uint64_t word, Regs regs, Regs nextRegs, uint64_t *consts)
447
{
448 449 450 451
	printf("# FMA: %016" PRIx64 "\n", word);
	FMA FMA;
	memcpy((char *) &FMA, (char *) &word, sizeof(FMA));
	FMAOpInfo info = findFMAOpInfo(FMA.op);
452 453

	printf("%s", info.name);
454 455
	if (info.srcType == FMATwoSrcFmod ||
		info.srcType == FMAThreeSrcFmod) {
456
		// output modifiers
457
		DumpOutputMod(bits(FMA.op, 12, 14));
Connor Abbott's avatar
Connor Abbott committed
458
	} else if (info.srcType == FMAFcmp) {
459
		DumpFCMP(bits(FMA.op, 10, 13));
460
	}
Connor Abbott's avatar
Connor Abbott committed
461

462
	printf(" ");
463 464 465 466 467 468 469 470

	RegCtrl nextCtrl = DecodeRegCtrl(nextRegs);
	if (nextCtrl.FMAWriteUnit != RegWriteNone) {
		printf("{R%d, T0}, ", GetRegToWrite(nextCtrl.FMAWriteUnit, nextRegs));
	} else {
		printf("T0, ");
	}

471
	switch (info.srcType) {
472
		case FMAOneSrc:
473
			DumpSrc(FMA.src0, regs, consts, true);
474
			break;
475
		case FMATwoSrc:
476
			DumpSrc(FMA.src0, regs, consts, true);
477
			printf(", ");
478
			DumpSrc(FMA.op & 0x7, regs, consts, true);
479
			break;
480
		case FMATwoSrcFmod:
481
			if (FMA.op & 0x10)
482
				printf("-");
483 484
			if (FMA.op & 0x200)
				printf("abs(");
485
			DumpSrc(FMA.src0, regs, consts, true);
486 487
			if (FMA.op & 0x200)
				printf(")");
488
			printf(", ");
489
			if (FMA.op & 0x20)
Connor Abbott's avatar
Connor Abbott committed
490
				printf("-");
491 492
			if (FMA.op & 0x8)
				printf("abs(");
493
			DumpSrc(FMA.op & 0x7, regs, consts, true);
494 495
			if (FMA.op & 0x8)
				printf(")");
Connor Abbott's avatar
Connor Abbott committed
496 497
			break;
		case FMAFcmp:
498
			if (FMA.op & 0x200)
Connor Abbott's avatar
Connor Abbott committed
499
				printf("abs(");
500
			DumpSrc(FMA.src0, regs, consts, true);
501
			if (FMA.op & 0x200)
Connor Abbott's avatar
Connor Abbott committed
502 503
				printf(")");
			printf(", ");
504
			if (FMA.op & 0x20)
505
				printf("-");
506
			if (FMA.op & 0x8)
Connor Abbott's avatar
Connor Abbott committed
507
				printf("abs(");
508
			DumpSrc(FMA.op & 0x7, regs, consts, true);
509
			if (FMA.op & 0x8)
Connor Abbott's avatar
Connor Abbott committed
510
				printf(")");
511
			break;
512
		case FMAThreeSrc:
513
			DumpSrc(FMA.src0, regs, consts, true);
514
			printf(", ");
515
			DumpSrc(FMA.op & 0x7, regs, consts, true);
516
			printf(", ");
517
			DumpSrc((FMA.op >> 3) & 0x7, regs, consts, true);
518
			break;
519
		case FMAThreeSrcFmod:
520
			if (FMA.op & (1 << 14))
521
				printf("-");
522 523
			if (FMA.op & (1 << 9))
				printf("abs(");
524
			DumpSrc(FMA.src0, regs, consts, true);
525 526
			if (FMA.op & (1 << 9))
				printf(")");
527
			printf(", ");
528 529
			if (FMA.op & (1 << 16))
				printf("abs(");
530
			DumpSrc(FMA.op & 0x7, regs, consts, true);
531 532
			if (FMA.op & (1 << 16))
				printf(")");
533
			printf(", ");
534
			if (FMA.op & (1 << 15))
535
				printf("-");
536 537
			if (FMA.op & (1 << 17))
				printf("abs(");
538
			DumpSrc((FMA.op >> 3) & 0x7, regs, consts, true);
539 540
			if (FMA.op & (1 << 17))
				printf(")");
541
			break;
542
		case FMAFourSrc:
543
			DumpSrc(FMA.src0, regs, consts, true);
Connor Abbott's avatar
Connor Abbott committed
544
			printf(", ");
545
			DumpSrc(FMA.op & 0x7, regs, consts, true);
Connor Abbott's avatar
Connor Abbott committed
546
			printf(", ");
547
			DumpSrc((FMA.op >> 3) & 0x7, regs, consts, true);
Connor Abbott's avatar
Connor Abbott committed
548
			printf(", ");
549
			DumpSrc((FMA.op >> 6) & 0x7, regs, consts, true);
Connor Abbott's avatar
Connor Abbott committed
550
			break;
551 552 553 554
	}
	printf("\n");
}

555
struct ADD {
556
	uint64_t src0 : 3;
557
	uint64_t op : 17;
558 559
};

560 561 562 563
enum ADDSrcType {
	ADDOneSrc,
	ADDTwoSrc,
	ADDTwoSrcFmod,
Connor Abbott's avatar
Connor Abbott committed
564
	ADDFcmp,
565 566
	ADDTexCompact, // texture instruction with embedded sampler
	ADDTex, // texture instruction with sampler/etc. in uniform port
567 568
};

569
struct ADDOpInfo {
570
	unsigned op;
Connor Abbott's avatar
Connor Abbott committed
571
	char name[20];
572
	ADDSrcType srcType;
573
	bool hasDataReg;
574 575
};

576
static const ADDOpInfo ADDOpInfos[] = {
577 578 579 580 581 582 583 584 585
	{ 0x00000, "FMAX", ADDTwoSrcFmod },
	{ 0x02000, "FMIN", ADDTwoSrcFmod },
	{ 0x04000, "FADD", ADDTwoSrcFmod },
	{ 0x06000, "FCMP.GL", ADDFcmp },
	{ 0x07000, "FCMP.D3D", ADDFcmp },
	{ 0x07936, "F2I", ADDOneSrc },
	{ 0x07937, "F2U", ADDOneSrc },
	{ 0x07978, "I2F", ADDOneSrc },
	{ 0x07979, "U2F", ADDOneSrc },
586
	{ 0x07999, "U32TOU16", ADDOneSrc },
587 588
	{ 0x07b2c, "NOP",  ADDOneSrc },
	{ 0x07b2d, "MOV",  ADDOneSrc },
589 590 591 592 593 594 595 596 597 598 599
	{ 0x07b8d, "FRCP_EXP", ADDOneSrc },
	// From the ARM patent US20160364209A1:
	// "Decompose v (the input) into numbers x1 and s such that v = x1 * 2^s,
	// and x1 is a floating point value in a predetermined range where the
	// value 1 is within the range and not at one extremity of the range (e.g.
	// choose a range where 1 is towards middle of range)."
	// 
	// This computes s.
	{ 0x07bc5, "FLOG_FREXPE", ADDOneSrc },
	{ 0x07d45, "CEIL", ADDOneSrc },
	{ 0x07d85, "FLOOR", ADDOneSrc },
600
	{ 0x07f18, "ADD_HIGH32", ADDTwoSrc },
601
	{ 0x0b000, "TEX", ADDTexCompact, true },
602 603 604 605 606 607
	{ 0x0c188, "LOAD.i32", ADDTwoSrc, true },
	{ 0x0c1c8, "LOAD.v2i32", ADDTwoSrc, true },
	{ 0x0c208, "LOAD.v4i32", ADDTwoSrc, true },
	{ 0x0c248, "STORE.v4i32", ADDTwoSrc, true },
	{ 0x0c588, "STORE.i32", ADDTwoSrc, true },
	{ 0x0c5c8, "STORE.v2i32", ADDTwoSrc, true },
608 609 610 611 612 613 614 615 616 617 618 619
	{ 0x0c648, "LOAD.u16", ADDTwoSrc, true }, // zero-extends
	{ 0x0ca88, "LOAD.v3i32", ADDTwoSrc, true },
	{ 0x0cb88, "STORE.v3i32", ADDTwoSrc, true },
	// Produce appropriate scale
	{ 0x0ce00, "FRCP_SCALE", ADDOneSrc },
	// Used in the argument reduction for log.
	// See the ARM patent for more information.
	{ 0x0ce60, "FRCP_APPROX", ADDOneSrc },
	{ 0x0cf50, "SIN_TABLE", ADDOneSrc },
	{ 0x0cf51, "COS_TABLE", ADDOneSrc },
	{ 0x0cf60, "FLOG2_TABLE", ADDOneSrc },
	{ 0x0cf64, "FLOGE_TABLE", ADDOneSrc },
620 621 622 623 624 625 626 627 628 629 630 631 632 633 634 635 636 637 638
	{ 0x0f640, "ICMP.GL.GT", ADDTwoSrc }, // src0 > src1 ? 1 : 0
	{ 0x0f648, "ICMP.GL.GE", ADDTwoSrc },
	{ 0x0f650, "UCMP.GL.GT", ADDTwoSrc },
	{ 0x0f658, "UCMP.GL.GE", ADDTwoSrc },
	{ 0x0f660, "ICMP.GL.EQ", ADDTwoSrc },
	{ 0x0f6c0, "ICMP.D3D.GT", ADDTwoSrc }, // src0 > src1 ? ~0 : 0
	{ 0x0f6c8, "ICMP.D3D.GE", ADDTwoSrc },
	{ 0x0f6d0, "UCMP.D3D.GT", ADDTwoSrc },
	{ 0x0f6d8, "UCMP.D3D.GE", ADDTwoSrc },
	{ 0x0f6e0, "ICMP.D3D.EQ", ADDTwoSrc },
	{ 0x178c0, "ADD",  ADDTwoSrc },
	{ 0x17ac0, "SUB",  ADDTwoSrc },
	{ 0x17c10, "ADDC", ADDTwoSrc }, // adds src0 to the bottom bit of src1
	{ 0x1dd18, "OR",  ADDTwoSrc },
	{ 0x1dd60, "LSHIFT", ADDTwoSrc },
	{ 0x1dd20, "AND",  ADDTwoSrc },
	{ 0x1dd50, "XOR",  ADDTwoSrc },
	{ 0x1dd84, "RSHIFT", ADDTwoSrc },
	{ 0x1dda4, "ARSHIFT", ADDTwoSrc },
639
	{ 0x1ae60, "TEX", ADDTex, true },
640 641
};

642
static ADDOpInfo findADDOpInfo(unsigned op)
643
{
644
	for (int i = 0; i < ARRAY_SIZE(ADDOpInfos); i++) {
645
		unsigned opCmp;
646 647
		switch (ADDOpInfos[i].srcType) {
			case ADDOneSrc:
648 649
				opCmp = op;
				break;
650 651 652
			case ADDTwoSrc:
				opCmp = op & ~0x7;
				break;
653 654 655
			case ADDTex:
				opCmp = op & ~0xf;
				break;
656
			case ADDTwoSrcFmod:
657
				opCmp = op & ~0x1fff;
658
				break;
Connor Abbott's avatar
Connor Abbott committed
659
			case ADDFcmp:
660
				opCmp = op & ~0x7ff;
Connor Abbott's avatar
Connor Abbott committed
661
				break;
662 663 664
			case ADDTexCompact:
				opCmp = op & ~0x3ff;
				break;
665
		}
666 667
		if (ADDOpInfos[i].op == opCmp)
			return ADDOpInfos[i];
668 669
	}

670
	ADDOpInfo info;
671 672
	snprintf(info.name, sizeof(info.name), "op%04x", op);
	info.op = op;
673
	info.srcType = ADDTwoSrc;
674
	info.hasDataReg = true;
675 676 677
	return info;
}

678 679 680 681 682 683 684 685 686 687 688 689 690 691 692 693 694 695 696 697 698 699 700 701 702 703 704
struct TexCtrl {
	unsigned samplerIndex : 4; // also used to signal indirects
	unsigned texIndex : 7;
	bool noMergeIndex : 1; // whether to merge (direct) sampler & texture indices
	bool filter : 1; // use the usual filtering pipeline (0 for texelFetch & textureGather)
	unsigned unk0 : 2;
	bool texelOffset : 1; // *Offset()
	bool isShadow : 1;
	bool isArray : 1;
	unsigned texType : 2; // 2D, 3D, Cube, Buffer
	bool computeLOD : 1; // 0 for *Lod()
	bool notSupplyLOD : 1; // 0 for *Lod() or when a bias is applied
	bool calcGradients : 1; // 0 for *Grad()
	unsigned unk1 : 1;
	unsigned resultType : 4; // integer, unsigned, float TODO: why is this 4 bits?
	unsigned unk2 : 4;
};

struct DualTexCtrl {
	unsigned samplerIndex0 : 2;
	unsigned unk0 : 2;
	unsigned texIndex0 : 2;
	unsigned samplerIndex1 : 2;
	unsigned texIndex1 : 2;
	unsigned unk1 : 22;
};

705
static void DumpADD(uint64_t word, Regs regs, Regs nextRegs, uint64_t *consts, unsigned dataReg)
706
{
707 708 709 710
	printf("# ADD: %016" PRIx64 "\n", word);
	ADD ADD;
	memcpy((char *) &ADD, (char *) &word, sizeof(ADD));
	ADDOpInfo info = findADDOpInfo(ADD.op);
711 712

	printf("%s", info.name);
713
	if (info.srcType == ADDTwoSrcFmod) {
714
		// output modifiers
715
		DumpOutputMod(bits(ADD.op, 8, 10));
Connor Abbott's avatar
Connor Abbott committed
716
	} else if (info.srcType == ADDFcmp) {
717
		DumpFCMP(bits(ADD.op, 3, 6));
718 719
	}
	printf(" ");
720 721 722 723 724 725 726 727

	RegCtrl nextCtrl = DecodeRegCtrl(nextRegs);
	if (nextCtrl.ADDWriteUnit != RegWriteNone) {
		printf("{R%d, T1}, ", GetRegToWrite(nextCtrl.ADDWriteUnit, nextRegs));
	} else {
		printf("T1, ");
	}

728
	switch (info.srcType) {
729
		case ADDOneSrc:
730
			DumpSrc(ADD.src0, regs, consts, false);
731
			break;
732 733 734 735 736 737 738 739 740 741 742 743 744 745 746 747 748 749 750 751 752 753 754 755 756 757 758 759 760 761 762 763 764 765 766 767 768 769 770 771 772 773 774 775 776 777 778 779 780 781 782 783 784 785 786 787 788 789 790 791 792 793 794 795 796 797 798 799 800 801 802 803 804 805 806 807 808 809 810 811 812 813 814 815 816 817 818 819 820 821 822 823 824 825 826 827 828 829 830 831 832 833 834 835 836 837 838 839 840 841 842 843 844 845
		case ADDTex:
		case ADDTexCompact: {
			int texIndex;
			int samplerIndex;
			bool dualTex = false;
			if (info.srcType == ADDTexCompact) {
				texIndex = (ADD.op >> 3) & 0x7;
				samplerIndex = (ADD.op >> 7) & 0x7;
				bool unknown = (ADD.op & 0x40);
				// TODO: figure out if the unknown bit is ever 0
				if (!unknown)
					printf("unknown ");
			} else {
				uint64_t constVal = GetConst(consts, regs);
				uint32_t controlBits = (ADD.op & 0x8) ? (constVal >> 32) : constVal;
				TexCtrl ctrl;
				memcpy((char *) &ctrl, (char *) &controlBits, sizeof(ctrl));

				// TODO: figure out what actually triggers dual-tex
				if (ctrl.resultType == 9) {
					DualTexCtrl dualCtrl;
					memcpy((char *) &dualCtrl, (char *) &controlBits, sizeof(ctrl));
					printf("(dualtex) tex0:%d samp0:%d tex1:%d samp1:%d ",
							dualCtrl.texIndex0, dualCtrl.samplerIndex0,
							dualCtrl.texIndex1, dualCtrl.samplerIndex1);
					if (dualCtrl.unk0 != 3)
						printf("unk:%d ", dualCtrl.unk0);
					dualTex = true;
				} else {
					if (ctrl.noMergeIndex) {
						texIndex = ctrl.texIndex;
						samplerIndex = ctrl.samplerIndex;
					} else {
						texIndex = samplerIndex = ctrl.texIndex;
						unsigned unk = ctrl.samplerIndex >> 2;
						if (unk != 3)
							printf("unk:%d ", unk);
						if (ctrl.samplerIndex & 1)
							texIndex = -1;
						if (ctrl.samplerIndex & 2)
							samplerIndex = -1;
					}

					if (ctrl.unk0 != 3)
						printf("unk0:%d ", ctrl.unk0);
					if (ctrl.unk1)
						printf("unk1 ");
					if (ctrl.unk2 != 0xf)
						printf("unk2:%x ", ctrl.unk2);

					switch (ctrl.resultType) {
						case 0x4:
							printf("f32 "); break;
						case 0xe:
							printf("i32 "); break;
						case 0xf:
							printf("u32 "); break;
						default:
							printf("unktype(%x) ", ctrl.resultType);
					}

					switch (ctrl.texType) {
						case 0:
							printf("cube "); break;
						case 1:
							printf("buffer "); break;
						case 2:
							printf("2D "); break;
						case 3:
							printf("3D "); break;
					}

					if (ctrl.isShadow)
						printf("shadow ");
					if (ctrl.isArray)
						printf("array ");

					if (!ctrl.filter) {
						if (ctrl.calcGradients) {
							int comp = (controlBits >> 20) & 0x3;
							printf("txg comp:%d ", comp);
						} else {
							printf("txf ");
						}
					} else {
						if (!ctrl.notSupplyLOD) {
							if (ctrl.computeLOD)
								printf("lod_bias ");
							else
								printf("lod ");
						}

						if (!ctrl.calcGradients)
							printf("grad ");
					}

					if (ctrl.texelOffset)
						printf("offset ");
				}
			}

			if (!dualTex) {
				if (texIndex == -1)
					printf("tex:indirect ");
				else
					printf("tex:%d ", texIndex);

				if (samplerIndex == -1)
					printf("samp:indirect ");
				else
					printf("samp:%d ", samplerIndex);
			}
			// fallthrough
		}
846
		case ADDTwoSrc:
847
			DumpSrc(ADD.src0, regs, consts, false);
848
			printf(", ");
849
			DumpSrc(ADD.op & 0x7, regs, consts, false);
850
			break;
851
		case ADDTwoSrcFmod:
852
			if (ADD.op & 0x10)
853
				printf("-");
854 855
			if (ADD.op & 0x1000)
				printf("abs(");
856
			DumpSrc(ADD.src0, regs, consts, false);
857 858
			if (ADD.op & 0x1000)
				printf(")");
859
			printf(", ");
860
			if (ADD.op & 0x20)
861
				printf("-");
862 863
			if (ADD.op & 0x8)
				printf("abs(");
864
			DumpSrc(ADD.op & 0x7, regs, consts, false);
865 866
			if (ADD.op & 0x8)
				printf(")");
867
			break;
Connor Abbott's avatar
Connor Abbott committed
868
		case ADDFcmp:
869
			if (ADD.op & 0x400) {
Connor Abbott's avatar
Connor Abbott committed
870 871
				printf("-");
			}
872
			if (ADD.op & 0x100) {
Connor Abbott's avatar
Connor Abbott committed
873 874
				printf("abs(");
			}
875
			DumpSrc(ADD.src0, regs, consts, false);
876
			if (ADD.op & 0x100) {
Connor Abbott's avatar
Connor Abbott committed
877 878 879
				printf(")");
			}
			printf(", ");
880
			if (ADD.op & 0x200) {
Connor Abbott's avatar
Connor Abbott committed
881 882
				printf("abs(");
			}
883
			DumpSrc(ADD.op & 0x7, regs, consts, false);
884
			if (ADD.op & 0x200) {
Connor Abbott's avatar
Connor Abbott committed
885 886
				printf(")");
			}
887
	}
888 889 890
	if (info.hasDataReg) {
		printf(", R%d", dataReg);
	}
891 892 893
	printf("\n");
}

Connor Abbott's avatar
Connor Abbott committed
894 895 896 897
// each of these structs represents an instruction that's dispatched in one
// cycle. Note that these instructions are packed in funny ways within the
// clause, hence the need for a separate struct.
struct AluInstr {
898
	uint64_t regBits;
899 900
	uint64_t FMABits;
	uint64_t ADDBits;
Connor Abbott's avatar
Connor Abbott committed
901 902
};

903
void DumpInstr(const AluInstr &instr, Regs nextRegs, uint64_t *consts, unsigned dataReg)
Connor Abbott's avatar
Connor Abbott committed
904
{
905 906 907 908
	printf("# regs: %016" PRIx32 "\n", instr.regBits);
	Regs regs;
	memcpy((char *) &regs, (char *) &instr.regBits, sizeof(regs));
	DumpRegs(regs);
909
	DumpFMA(instr.FMABits, regs, nextRegs, consts);
910
	DumpADD(instr.ADDBits, regs, nextRegs, consts, dataReg);
Connor Abbott's avatar
Connor Abbott committed
911 912
}

913 914 915 916 917 918 919 920 921 922 923 924
struct Header {
	uint64_t unk0 : 18;
	uint64_t dataReg : 6;
	uint64_t scoreboardDeps : 6;
	uint64_t unk1 : 2; // future expansion for scoreboardDeps?
	uint64_t scoreboardIndex : 3;
	uint64_t clauseType : 4;
	uint64_t unk2 : 1; // part of clauseType?
	uint64_t nextClauseType : 4;
	uint64_t unk3 : 1; // part of nextClauseType?
};

925 926 927 928 929 930 931 932 933 934 935 936 937 938 939 940 941 942 943 944 945 946 947 948 949
void DumpHeader(Header header)
{
	if (header.clauseType != 0) {
		printf("id(%d)", header.scoreboardIndex);
		if (header.scoreboardDeps != 0) {
			printf(", next-wait(");
			bool first = true;
			for (unsigned i = 0; i < 6; i++) {
				if (header.scoreboardDeps & (1 << i)) {
					if (!first) {
						printf(", ");
					}
					printf("%d", i);
					first = false;
				}
			}
			printf(")");
		}
		printf("\n");
	}

	printf("# clause type %d, next clause type %d\n",
		   header.clauseType, header.nextClauseType);
}

Connor Abbott's avatar
Connor Abbott committed
950
void DumpClause(uint32_t *words, unsigned *size)
951
{
Connor Abbott's avatar
Connor Abbott committed
952 953 954
	// State for a decoded clause
	AluInstr instrs[8] = {};
	uint64_t consts[6] = {};
955
	unsigned numInstrs = 0;
956
	unsigned numConsts = 0;
957
	uint64_t headerBits = 0;
Connor Abbott's avatar
Connor Abbott committed
958 959 960

	unsigned i;
	for (i = 0; ; i++, words += 4) {
961
		printf("# ");
Connor Abbott's avatar
Connor Abbott committed
962 963
		for (int j = 0; j < 4; j++)
			printf("%08x ", words[3 - j]); // low bit on the right
964
		printf("\n");
Connor Abbott's avatar
Connor Abbott committed
965 966 967 968 969 970 971 972 973 974 975 976 977 978 979 980 981 982 983 984 985 986 987
		unsigned tag = bits(words[0], 0, 8);

		// speculatively decode some things that are common between many formats, so we can share some code
		AluInstr mainInstr = {};
		// 20 bits
		mainInstr.ADDBits = bits(words[2], 2, 32 - 13);
		// 23 bits
		mainInstr.FMABits = bits(words[1], 11, 32) | bits(words[2], 0, 2) << (32 - 11);
		// 35 bits
		mainInstr.regBits = ((uint64_t) bits(words[1], 0, 11)) << 24 | (uint64_t) bits(words[0], 8, 32);

		uint64_t const0 = bits(words[0], 8, 32) << 4 | (uint64_t) words[1] << 28 | bits(words[2], 0, 4) << 60;
		uint64_t const1 = bits(words[2], 4, 32) << 4 | (uint64_t) words[3] << 32;

		bool stop = tag & 0x40;

		if (tag & 0x80) {
			unsigned idx = stop ? 5 : 2;
			mainInstr.ADDBits |= ((tag >> 3) & 0x7) << 17;
			instrs[idx + 1] = mainInstr;
			instrs[idx].ADDBits = bits(words[3], 0, 17) | ((tag & 0x7) << 17);
			instrs[idx].FMABits |= bits(words[2], 19, 32) << 10;
			consts[0] = bits(words[3], 17, 32) << 4;
Connor Abbott's avatar
Connor Abbott committed
988
		} else {
Connor Abbott's avatar
Connor Abbott committed
989 990
			bool done = false;
			switch ((tag >> 3) & 0x7) {
991
				case 0x0:
Connor Abbott's avatar
Connor Abbott committed
992 993 994 995
					switch (tag & 0x7) {
						case 0x3:
							mainInstr.ADDBits |= bits(words[3], 29, 32) << 17;
							instrs[1] = mainInstr;
996
							numInstrs = 2;
Connor Abbott's avatar
Connor Abbott committed
997 998 999 1000 1001 1002
							done = stop;
							break;
						case 0x4:
							instrs[2].ADDBits = bits(words[3], 0, 17) | bits(words[3], 29, 32) << 17;
							instrs[2].FMABits |= bits(words[2], 19, 32) << 10;
							consts[0] = const0;
1003
							numInstrs = 3;
1004
							numConsts = 1;
Connor Abbott's avatar
Connor Abbott committed
1005 1006 1007 1008 1009 1010 1011 1012 1013
							done = stop;
							break;
						case 0x1:
						case 0x5:
							instrs[2].ADDBits = bits(words[3], 0, 17) | bits(words[3], 29, 32) << 17;
							instrs[2].FMABits |= bits(words[2], 19, 32) << 10;
							mainInstr.ADDBits |= bits(words[3], 26, 29) << 17;
							instrs[3] = mainInstr;
							if ((tag & 0x7) == 0x5) {
1014
								numInstrs = 4;
Connor Abbott's avatar
Connor Abbott committed
1015 1016 1017 1018 1019 1020 1021
								done = stop;
							}
							break;
						case 0x6:
							instrs[5].ADDBits = bits(words[3], 0, 17) | bits(words[3], 29, 32) << 17;
							instrs[5].FMABits |= bits(words[2], 19, 32) << 10;
							consts[0] = const0;
1022
							numInstrs = 6;
1023
							numConsts = 1;
Connor Abbott's avatar
Connor Abbott committed
1024 1025 1026 1027 1028 1029 1030
							done = stop;
							break;
						case 0x7:
							instrs[5].ADDBits = bits(words[3], 0, 17) | bits(words[3], 29, 32) << 17;
							instrs[5].FMABits |= bits(words[2], 19, 32) << 10;
							mainInstr.ADDBits |= bits(words[3], 26, 29) << 17;
							instrs[6] = mainInstr;
1031
							numInstrs = 7;
Connor Abbott's avatar
Connor Abbott committed
1032 1033 1034 1035
							done = stop;
							break;
						default:
							printf("unknown tag bits 0x%02x\n", tag);
1036 1037 1038
					}
					break;
				case 0x1:
1039
					headerBits = bits(words[2], 19, 32) | ((uint64_t) words[3] << (32 - 19));
Connor Abbott's avatar
Connor Abbott committed
1040 1041
					mainInstr.ADDBits |= (tag & 0x7) << 17;
					instrs[0] = mainInstr;
1042
					numInstrs = 1;
Connor Abbott's avatar
Connor Abbott committed
1043 1044 1045
					done = stop;
					// only constants can come after this
					break;
1046
				case 0x5:
1047
					headerBits = bits(words[2], 19, 32) | ((uint64_t) words[3] << (32 - 19));
Connor Abbott's avatar
Connor Abbott committed
1048 1049
					mainInstr.ADDBits |= (tag & 0x7) << 17;
					instrs[0] = mainInstr;
1050 1051
					break;
				case 0x2:
Connor Abbott's avatar
Connor Abbott committed
1052 1053 1054 1055 1056
				case 0x3: {
					unsigned idx = ((tag >> 3) & 0x7) == 2 ? 4 : 7;
					mainInstr.ADDBits |= (tag & 0x7) << 17;
					instrs[idx] = mainInstr;
					consts[0] |= (bits(words[2], 19, 32) | ((uint64_t) words[3] << 13)) << 19;
1057
					numConsts = 1;
1058
					numInstrs = idx + 1;
Connor Abbott's avatar
Connor Abbott committed
1059
					done = stop;
1060
					break;
Connor Abbott's avatar
Connor Abbott committed
1061 1062 1063 1064 1065 1066 1067
				}
				case 0x4: {
					unsigned idx = stop ? 4 : 1;
					mainInstr.ADDBits |= (tag & 0x7) << 17;
					instrs[idx] = mainInstr;
					instrs[idx + 1].FMABits |= bits(words[3], 22, 32);
					instrs[idx + 1].regBits = bits(words[2], 19, 32) | (bits(words[3], 0, 22) << (32 - 19));
1068
					break;
Connor Abbott's avatar
Connor Abbott committed
1069
				}
1070
				case 0x6:
Connor Abbott's avatar
Connor Abbott committed
1071 1072 1073 1074 1075 1076 1077 1078 1079 1080 1081 1082 1083 1084 1085 1086 1087 1088 1089 1090 1091 1092 1093 1094 1095 1096 1097 1098 1099 1100 1101 1102 1103 1104 1105 1106 1107
				case 0x7: {
					unsigned pos = tag & 0xf;
					// note that `pos' encodes both the total number of
					// instructions and the position in the constant stream,
					// presumably because decoded constants and instructions
					// share a buffer in the decoder, but we only care about
					// the position in the constant stream; the total number of
					// instructions is redundant.
					unsigned const_idx = 7;
					switch (pos) {
						case 0:
						case 1:
						case 2:
						case 6:
							const_idx = 0;
							break;
						case 3:
						case 4:
						case 7:
						case 9:
							const_idx = 1;
							break;
						case 5:
						case 0xa:
							const_idx = 2;
							break;
						case 8:
						case 0xb:
						case 0xc:
							const_idx = 3;
							break;
						case 0xd:
							const_idx = 4;
							break;
						default:
							printf("# unknown pos 0x%x\n", pos);
					}
1108 1109
					if (numConsts < const_idx + 2)
						numConsts = const_idx + 2;
Connor Abbott's avatar
Connor Abbott committed
1110 1111 1112
					consts[const_idx] = const0;
					consts[const_idx + 1] = const1;
					done = stop;
1113
					break;
Connor Abbott's avatar
Connor Abbott committed
1114
				}
1115 1116 1117
				default:
					break;
			}
Connor Abbott's avatar
Connor Abbott committed
1118

Connor Abbott's avatar
Connor Abbott committed
1119 1120
			if (done)
				break;
Connor Abbott's avatar
Connor Abbott committed
1121 1122 1123
		}
	}

Connor Abbott's avatar
Connor Abbott committed
1124 1125
	*size = i + 1;

1126 1127 1128
	printf("# header: %012" PRIx64 "\n", headerBits);
	Header header;
	memcpy((char *) &header, (char *) &headerBits, sizeof(Header));
1129
	DumpHeader(header);
1130

1131
	printf("{\n");
1132
	for (i = 0; i < numInstrs; i++) {
1133
		Regs nextRegs;
1134
		if (i + 1 == numInstrs) {
1135 1136
			memcpy((char *) &nextRegs, (char *) &instrs[0].regBits,
					sizeof(nextRegs));
1137
		} else {
Connor Abbott's avatar
Connor Abbott committed
1138
			memcpy((char *) &nextRegs, (char *) &instrs[i + 1].regBits,
1139 1140 1141
					sizeof(nextRegs));
		}

1142
		DumpInstr(instrs[i], nextRegs, consts, header.dataReg);
Connor Abbott's avatar
Connor Abbott committed
1143
	}
1144
	printf("}\n");
Connor Abbott's avatar
Connor Abbott committed
1145

1146
	for (int i = 0; i < numConsts; i++) {
1147 1148
		printf("# const%d: %08x\n", 2 * i, consts[i] & 0xffffffff);
		printf("# const%d: %08x\n", 2 * i + 1, consts[i] >> 32);
1149 1150 1151 1152 1153 1154 1155 1156 1157
	}
}

void DumpInstructions(unsigned indent, uint8_t* instBlob, uint32_t size)
{
	uint32_t *words = (uint32_t *) instBlob;
	uint32_t *wordsEnd = words + (size / 4);
	while (words != wordsEnd)
	{
Connor Abbott's avatar
Connor Abbott committed
1158 1159 1160 1161 1162 1163 1164 1165
		// we don't know what the program-end bit is quite yet, so for now just
		// assume that an all-0 quadword is padding
		uint32_t zero[4] = {};
		if (memcmp(words, zero, 4 * sizeof(uint32_t)) == 0)
			break;
		unsigned size;
		DumpClause(words, &size);
		words += size * 4;
1166 1167 1168
	}
}