...
 
Commits (32)
#!/usr/bin/python3
"""
Copyright ©2018 Lyude Paul
Copyright ©2018 The Panfrost community
Permission is hereby granted, free of charge, to any person obtaining a copy
of this software and associated documentation files (the "Software"), to deal
in the Software without restriction, including without limitation the rights
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
copies of the Software, and to permit persons to whom the Software is
furnished to do so, subject to the following conditions:
The above copyright notice and this permission notice shall be included in
all copies or substantial portions of the Software.
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
THE SOFTWARE.
"""
import struct
import re
import argparse
import itertools
from copy import copy
from enum import Enum, IntEnum
from sys import stderr
import pprint
try:
import bitstring
from bitstring import BitArray, Bits, BitStream
except Exception:
print("The bitstring module for python3 is needed to run this!", file=stderr)
import sys
sys.exit(-1)
class ParsingException(Exception):
def __init__(self, msg):
self.msg = msg
super().__init__(msg)
class OpParserBase:
""" Base for op parser classes """
class Op:
def __init__(self, bitlen, name, opcode, srcs):
self.bitlen = bitlen
self.name = name
self.opcode = opcode
self.srcs = srcs
def encode(self):
"""
Placeholder for encode(), this should be overridden in
subclasses!
"""
return [s.encode_src() for s in self.srcs]
def __repr__(self):
return '<%s at %x; srcs=%s>' % (self.name, id(self), self.srcs)
class SrcCountException(ParsingException):
def __init__(self, expected, got):
super().__init__('Expected %d sources, got %d' % (expected, got))
def parse_op(self, clause, reg_file, srcs):
if self.src_cnt and len(srcs) != self.src_cnt:
raise self.SrcCountException(self.src_cnt, len(srcs))
return self.Op(self.bitlen, self.name, self.opcode, srcs)
def __init__(self, name, opcode, src_cnt=None):
self.name = name
self.opcode = opcode
if src_cnt:
self.src_cnt = src_cnt
class SrcOpParserBase(OpParserBase):
""" Base class for creating parsers for simple N src ops """
class Op(OpParserBase.Op):
def encode(self):
encoded = Bits(length=self.bitlen - len(self.srcs)*3,
uint=self.opcode)
for src in reversed(self.srcs):
encoded += src.encode_src()
return encoded
def __init__(self, name, opcode, src_cnt=None):
opcode >>= (src_cnt - 1) * 3
super().__init__(name, opcode, src_cnt)
class ClauseType(IntEnum):
NONE = 0
UBO_LOAD = 2
SSBO_LOAD = 5
SSBO_STORE = 6
BLEND = 9
ALPHA_TEST = 13
class fma:
NAME = 'FMA'
DST_MNEMONIC = 'T0'
class ZeroSrc():
encoded = Bits(length=3, uint=3)
@classmethod
def encode_src(cls):
return cls.encoded
class OpParser(OpParserBase):
bitlen = 23
class SrcOpParser(OpParser, SrcOpParserBase):
pass
class TwoSrcFmodOpParser(OpParser):
src_cnt = 2
class ThreeSrcFmodOpParser(OpParser):
src_cnt = 3
class TwoSrcFmod16OpParser(OpParser):
src_cnt = 2
class ThreeSrcFmod16OpParser(OpParser):
src_cnt = 3
class FcmpOpParser(OpParser):
src_cnt = 2
class Fcmp16OpParser(OpParser):
src_cnt = 2
# s/{ \(0x[0-9a-f]\+\),\s\+\("[^"]\+"\),\s\+\%\(FMA\|ADD\)\([A-Za-z0-9]\+\)\s\+},/\2: \3Op(\2, \1),/g
OP_MAP = {
"FMA.f32" : ThreeSrcFmodOpParser("FMA.f32", 0x00000),
"MAX.f32" : TwoSrcFmodOpParser("MAX.f32", 0x40000),
"MIN.f32" : TwoSrcFmodOpParser("MIN.f32", 0x44000),
"FCMP.GL" : FcmpOpParser("FCMP.GL", 0x48000),
"FCMP.D3D" : FcmpOpParser("FCMP.D3D", 0x4c000),
"ADD.i32" : SrcOpParser("ADD.i32", 0x4ff98, 2),
"SUB.i32" : SrcOpParser("SUB.i32", 0x4ffd8, 2),
"SUBB.i32" : SrcOpParser("SUBB.i32", 0x4fff0, 2),
# compute FMA of first three sources, then set exponent to the fourth
# source (as an integer).
"FMA_RSCALE" : SrcOpParser("FMA_RSCALE", 0x50000, 4),
# Seems to compute src2 - src0 * src1... why don't they just use FMA?
"FRCP_PT3" : SrcOpParser("FRCP_PT3", 0x528c0, 3),
# compute FMA of first three sources, then add the fourth argument to the
# scale (modify scale)
"FMA_MSCALE" : SrcOpParser("FMA_MSCALE", 0x54000, 4),
"ADD.f32" : TwoSrcFmodOpParser("ADD.f32", 0x58000),
"CSEL.FEQ.f32" : SrcOpParser("CSEL.FEQ.f32", 0x5c000, 4),
"CSEL.FGT.f32" : SrcOpParser("CSEL.FGT.f32", 0x5c200, 4),
"CSEL.FGE.f32" : SrcOpParser("CSEL.FGE.f32", 0x5c400, 4),
"CSEL.IEQ.f32" : SrcOpParser("CSEL.IEQ.f32", 0x5c600, 4),
"CSEL.IGT.i32" : SrcOpParser("CSEL.IGT.i32", 0x5c800, 4),
"CSEL.IGE.i32" : SrcOpParser("CSEL.IGE.i32", 0x5ca00, 4),
"CSEL.UGT.i32" : SrcOpParser("CSEL.UGT.i32", 0x5cc00, 4),
"CSEL.UGE.i32" : SrcOpParser("CSEL.UGE.i32", 0x5ce00, 4),
"ICMP.D3D.GT.v2i16" : SrcOpParser("ICMP.D3D.GT.v2i16", 0x5d8d0, 2),
"UCMP.D3D.GT.v2i16" : SrcOpParser("UCMP.D3D.GT.v2i16", 0x5d9d0, 2),
"ICMP.D3D.GE.v2i16" : SrcOpParser("ICMP.D3D.GE.v2i16", 0x5dad0, 2),
"UCMP.D3D.GE.v2i16" : SrcOpParser("UCMP.D3D.GE.v2i16", 0x5dbd0, 2),
"ICMP.D3D.EQ.v2i16" : SrcOpParser("ICMP.D3D.EQ.v2i16", 0x5dcd0, 2),
"ICMP.GL.GT.i32" : SrcOpParser("ICMP.GL.GT.i32", 0x5de40, 2), # src0 > src1 ? 1 : 0
"ICMP.GL.GE.i32" : SrcOpParser("ICMP.GL.GE.i32", 0x5de48, 2),
"UCMP.GL.GT.i32" : SrcOpParser("UCMP.GL.GT.i32", 0x5de50, 2),
"UCMP.GL.GE.i32" : SrcOpParser("UCMP.GL.GE.i32", 0x5de58, 2),
"ICMP.GL.EQ.i32" : SrcOpParser("ICMP.GL.EQ.i32", 0x5de60, 2),
"ICMP.D3D.GT.i32" : SrcOpParser("ICMP.D3D.GT.i32", 0x5dec0, 2), # src0 > src1 ? ~0 : 0
"ICMP.D3D.GE.i32" : SrcOpParser("ICMP.D3D.GE.i32", 0x5dec8, 2),
"UCMP.D3D.GT.i32" : SrcOpParser("UCMP.D3D.GT.i32", 0x5ded0, 2),
"UCMP.D3D.GE.i32" : SrcOpParser("UCMP.D3D.GE.i32", 0x5ded8, 2),
"ICMP.D3D.EQ.i32" : SrcOpParser("ICMP.D3D.EQ.i32", 0x5dee0, 2),
"RSHIFT_NAND.i32" : SrcOpParser("RSHIFT_NAND.i32", 0x60200, 3),
"RSHIFT_NAND.v2i16" : SrcOpParser("RSHIFT_NAND.v2i16", 0x603c0, 3),
"RSHIFT_OR.i32" : SrcOpParser("RSHIFT_OR.i32", 0x60e00, 3),
"RSHIFT_OR.v2i16" : SrcOpParser("RSHIFT_OR.v2i16", 0x60fc0, 3),
"RSHIFT_AND.i32" : SrcOpParser("RSHIFT_AND.i32", 0x61200, 3),
"RSHIFT_AND.v2i16" : SrcOpParser("RSHIFT_AND.v2i16", 0x613c0, 3),
"RSHIFT_NOR.i32" : SrcOpParser("RSHIFT_NOR.i32", 0x61e00, 3), # ~((src0 << src2) | src1)
"RSHIFT_NOR.v2i16" : SrcOpParser("RSHIFT_NOR.v2i16", 0x61fc0, 3), # ~((src0 << src2) | src1)
"LSHIFT_NAND.i32" : SrcOpParser("LSHIFT_NAND.i32", 0x62200, 3),
"LSHIFT_NAND.v2i16" : SrcOpParser("LSHIFT_NAND.v2i16", 0x623c0, 3),
"LSHIFT_OR.i32" : SrcOpParser("LSHIFT_OR.i32", 0x62e00, 3), # (src0 << src2) | src1
"LSHIFT_OR.v2i16" : SrcOpParser("LSHIFT_OR.v2i16", 0x62fc0, 3), # (src0 << src2) | src1
"LSHIFT_AND.i32" : SrcOpParser("LSHIFT_AND.i32", 0x63200, 3), # (src0 << src2) & src1
"LSHIFT_AND.v2i16" : SrcOpParser("LSHIFT_AND.v2i16", 0x633c0, 3),
"LSHIFT_NOR.i32" : SrcOpParser("LSHIFT_NOR.i32", 0x63e00, 3),
"LSHIFT_NOR.v2i16" : SrcOpParser("LSHIFT_NOR.v2i16", 0x63fc0, 3),
"RSHIFT_XOR.i32" : SrcOpParser("RSHIFT_XOR.i32", 0x64200, 3),
"RSHIFT_XOR.v2i16" : SrcOpParser("RSHIFT_XOR.v2i16", 0x643c0, 3),
"RSHIFT_XNOR.i32" : SrcOpParser("RSHIFT_XNOR.i32", 0x64600, 3), # ~((src0 >> src2) ^ src1)
"RSHIFT_XNOR.v2i16" : SrcOpParser("RSHIFT_XNOR.v2i16", 0x647c0, 3), # ~((src0 >> src2) ^ src1)
"LSHIFT_XOR.i32" : SrcOpParser("LSHIFT_XOR.i32", 0x64a00, 3),
"LSHIFT_XOR.v2i16" : SrcOpParser("LSHIFT_XOR.v2i16", 0x64bc0, 3),
"LSHIFT_XNOR.i32" : SrcOpParser("LSHIFT_XNOR.i32", 0x64e00, 3), # ~((src0 >> src2) ^ src1)
"LSHIFT_XNOR.v2i16" : SrcOpParser("LSHIFT_XNOR.v2i16", 0x64fc0, 3), # ~((src0 >> src2) ^ src1)
"LSHIFT_ADD.i32" : SrcOpParser("LSHIFT_ADD.i32", 0x65200, 3),
"LSHIFT_SUB.i32" : SrcOpParser("LSHIFT_SUB.i32", 0x65600, 3), # (src0 << src2) - src1
"LSHIFT_RSUB.i32" : SrcOpParser("LSHIFT_RSUB.i32", 0x65a00, 3), # src1 - (src0 << src2)
"RSHIFT_ADD.i32" : SrcOpParser("RSHIFT_ADD.i32", 0x65e00, 3),
"RSHIFT_SUB.i32" : SrcOpParser("RSHIFT_SUB.i32", 0x66200, 3),
"RSHIFT_RSUB.i32" : SrcOpParser("RSHIFT_RSUB.i32", 0x66600, 3),
"ARSHIFT_ADD.i32" : SrcOpParser("ARSHIFT_ADD.i32", 0x66a00, 3),
"ARSHIFT_SUB.i32" : SrcOpParser("ARSHIFT_SUB.i32", 0x66e00, 3),
"ARSHIFT_RSUB.i32" : SrcOpParser("ARSHIFT_RSUB.i32", 0x67200, 3),
"FMA.v2f16" : ThreeSrcFmod16OpParser("FMA.v2f16", 0x80000),
"MAX.v2f16" : TwoSrcFmod16OpParser("MAX.v2f16", 0xc0000),
"MIN.v2f16" : TwoSrcFmod16OpParser("MIN.v2f16", 0xc4000),
"FCMP.GL" : Fcmp16OpParser("FCMP.GL", 0xc8000),
"FCMP.D3D" : Fcmp16OpParser("FCMP.D3D", 0xcc000),
"ADD.v2i16" : SrcOpParser("ADD.v2i16", 0xcf900, 2),
"ADDC.i32" : SrcOpParser("ADDC.i32", 0xcfc10, 2),
"ADD.i32.i16.X" : SrcOpParser("ADD.i32.i16.X", 0xcfd80, 2),
"ADD.i32.u16.X" : SrcOpParser("ADD.i32.u16.X", 0xcfd90, 2),
"ADD.i32.i16.Y" : SrcOpParser("ADD.i32.i16.Y", 0xcfdc0, 2),
"ADD.i32.u16.Y" : SrcOpParser("ADD.i32.u16.Y", 0xcfdd0, 2),
"ADD.v2f16" : TwoSrcFmod16OpParser("ADD.v2f16", 0xd8000),
"CSEL.FEQ.v2f16" : SrcOpParser("CSEL.FEQ.v2f16", 0xdc000, 4),
"CSEL.FGT.v2f16" : SrcOpParser("CSEL.FGT.v2f16", 0xdc200, 4),
"CSEL.FGE.v2f16" : SrcOpParser("CSEL.FGE.v2f16", 0xdc400, 4),
"CSEL.IEQ.v2f16" : SrcOpParser("CSEL.IEQ.v2f16", 0xdc600, 4),
"CSEL.IGT.v2i16" : SrcOpParser("CSEL.IGT.v2i16", 0xdc800, 4),
"CSEL.IGE.v2i16" : SrcOpParser("CSEL.IGE.v2i16", 0xdca00, 4),
"CSEL.UGT.v2i16" : SrcOpParser("CSEL.UGT.v2i16", 0xdcc00, 4),
"CSEL.UGE.v2i16" : SrcOpParser("CSEL.UGE.v2i16", 0xdce00, 4),
"F32_TO_F16" : SrcOpParser("F32_TO_F16", 0xdd000, 2),
"F16_TO_I16.XX" : SrcOpParser("F16_TO_I16.XX", 0xe0046, 1),
"F16_TO_U16.XX" : SrcOpParser("F16_TO_U16.XX", 0xe0047, 1),
"F16_TO_I16.YX" : SrcOpParser("F16_TO_I16.YX", 0xe004e, 1),
"F16_TO_U16.YX" : SrcOpParser("F16_TO_U16.YX", 0xe004f, 1),
"F16_TO_I16.XY" : SrcOpParser("F16_TO_I16.XY", 0xe0056, 1),
"F16_TO_U16.XY" : SrcOpParser("F16_TO_U16.XY", 0xe0057, 1),
"F16_TO_I16.YY" : SrcOpParser("F16_TO_I16.YY", 0xe005e, 1),
"F16_TO_U16.YY" : SrcOpParser("F16_TO_U16.YY", 0xe005f, 1),
"I16_TO_F16.XX" : SrcOpParser("I16_TO_F16.XX", 0xe00c0, 1),
"U16_TO_F16.XX" : SrcOpParser("U16_TO_F16.XX", 0xe00c1, 1),
"I16_TO_F16.YX" : SrcOpParser("I16_TO_F16.YX", 0xe00c8, 1),
"U16_TO_F16.YX" : SrcOpParser("U16_TO_F16.YX", 0xe00c9, 1),
"I16_TO_F16.XY" : SrcOpParser("I16_TO_F16.XY", 0xe00d0, 1),
"U16_TO_F16.XY" : SrcOpParser("U16_TO_F16.XY", 0xe00d1, 1),
"I16_TO_F16.YY" : SrcOpParser("I16_TO_F16.YY", 0xe00d8, 1),
"U16_TO_F16.YY" : SrcOpParser("U16_TO_F16.YY", 0xe00d9, 1),
"F32_TO_I32" : SrcOpParser("F32_TO_I32", 0xe0136, 1),
"F32_TO_U32" : SrcOpParser("F32_TO_U32", 0xe0137, 1),
"I32_TO_F32" : SrcOpParser("I32_TO_F32", 0xe0178, 1),
"U32_TO_F32" : SrcOpParser("U32_TO_F32", 0xe0179, 1),
"I16_TO_I32.X" : SrcOpParser("I16_TO_I32.X", 0xe0198, 1),
"U16_TO_U32.X" : SrcOpParser("U16_TO_U32.X", 0xe0199, 1),
"I16_TO_I32.Y" : SrcOpParser("I16_TO_I32.Y", 0xe019a, 1),
"U16_TO_U32.Y" : SrcOpParser("U16_TO_U32.Y", 0xe019b, 1),
"I16_TO_F32.X" : SrcOpParser("I16_TO_F32.X", 0xe019c, 1),
"U16_TO_F32.X" : SrcOpParser("U16_TO_F32.X", 0xe019d, 1),
"I16_TO_F32.Y" : SrcOpParser("I16_TO_F32.Y", 0xe019e, 1),
"U16_TO_F32.Y" : SrcOpParser("U16_TO_F32.Y", 0xe019f, 1),
"F16_TO_F32.X" : SrcOpParser("F16_TO_F32.X", 0xe01a2, 1),
"F16_TO_F32.Y" : SrcOpParser("F16_TO_F32.Y", 0xe01a3, 1),
"NOP" : SrcOpParser("NOP", 0xe032c, 1),
"MOV" : SrcOpParser("MOV", 0xe032d, 1),
"SWZ.YY.v2i16" : SrcOpParser("SWZ.YY.v2i16", 0xe032f, 1),
# From the ARM patent US20160364209A1:
# "Decompose v (the input) into numbers x1 and s such that v = x1 * 2^s,
# and x1 is a floating point value in a predetermined range where the
# value 1 is within the range and not at one extremity of the range (e.g.
# choose a range where 1 is towards middle of range)."
#
# This computes x1.
"LOG_FREXPM" : SrcOpParser("LOG_FREXPM", 0xe0345, 1),
"FRCP_FREXPM" : SrcOpParser("FRCP_FREXPM", 0xe0365, 1),
# Compute required exponent for reciprocal (negate it, accounting for the offset.)
"FRCP_FREXPE" : SrcOpParser("FRCP_FREXPE", 0xe038d, 1),
"LOG_FREXPE" : SrcOpParser("LOG_FREXPE", 0xe03c5, 1),
"IMAX3" : SrcOpParser("IMAX3", 0xe0b80, 3),
"UMAX3" : SrcOpParser("UMAX3", 0xe0bc0, 3),
"IMIN3" : SrcOpParser("IMIN3", 0xe0c00, 3),
"UMIN3" : SrcOpParser("UMIN3", 0xe0c40, 3),
"CSEL" : SrcOpParser("CSEL", 0xe0f40, 3), # src2 != 0 ? src1 : src0
"CEIL" : SrcOpParser("CEIL", 0xe1845, 1),
"FLOOR" : SrcOpParser("FLOOR", 0xe1885, 1),
# This acts like a normal 32-bit add, except that it sets a flag on
# overflow that gets listened to by load/store instructions in the ADD
# part of the instruction, and added appropriately to the upper 32 bits of
# the address. It lets you efficiently add a 32-bit offset to a 64-bit
# pointer when loading/storing.
"ADD_ADDR" : SrcOpParser("ADD_ADDR", 0xe1c80, 2),
# Similar to the above, but used for normal additions (paired with
# ADD_HIGH32 in the ADD slot to do 64-bit addition).
"ADD_LOW32" : SrcOpParser("ADD_LOW32", 0xe1cc0, 2),
"SEL.XX.i16" : SrcOpParser("SEL.XX.i16", 0xe1e00, 2),
"SEL.YX.i16" : SrcOpParser("SEL.YX.i16", 0xe1e08, 2),
"SEL.XY.i16" : SrcOpParser("SEL.XY.i16", 0xe1e10, 2),
"SEL.YY.i16" : SrcOpParser("SEL.YY.i16", 0xe1e18, 2),
"IMAD" : SrcOpParser("IMAD", 0xe7800, 3),
"POPCNT" : SrcOpParser("POPCNT", 0xe78db, 1),
}
# ADD parsers/encoders
class add:
NAME = 'ADD'
DST_MNEMONIC = 'T1'
class OpParser(OpParserBase):
bitlen = 20
def __init__(self, name, opcode, src_cnt=None, has_data_reg=False, clause_type=None):
self.has_data_reg = has_data_reg
self.clause_type = clause_type
super().__init__(name, opcode, src_cnt)
def parse_op(self, clause, reg_file, srcs):
if self.clause_type:
clause.clause_type = self.clause_type
return super().parse_op(clause, reg_file, srcs)
class SrcOpParser(OpParser, SrcOpParserBase):
pass
class TwoSrcFmodOpParser(OpParser):
src_cnt = 2
class TwoSrcFmod16OpParser(OpParser):
src_cnt = 2
class TwoSrcFmod16CommutativeOpParser(OpParser):
src_cnt = 2
class FcmpOpParser(OpParser):
src_cnt = 2
class Fcmp16OpParser(OpParser):
src_cnt = 2
class ATestOpParser(SrcOpParser):
def parse_op(self, clause, reg_file, srcs):
clause.clause_type = ClauseType.ALPHA_TEST
reg_file.const_port = Bits(length=8, uint=5)
return super().parse_op(clause, reg_file, srcs)
class LoadAttrOpParser(OpParser):
def parse_op(self, clause, reg_file, srcs):
clause.clause_type = ClauseType.UBO_LOAD
return super().parse_op(clause, reg_file, srcs)
class VaryingInterpOpParser(OpParser):
pass
class TexCompactOpParser(OpParser):
pass
class VaryingAddressOpParser(OpParser):
def parse_op(self, clause, reg_file, srcs):
clause.clause_type = ClauseType.UBO_LOAD
return super().parse_op(clause, reg_file, srcs)
class BlendingOpParser(SrcOpParser):
name = "BLEND"
opcode = 0x1952c
src_cnt = 1
has_data_reg = True
clause_type = ClauseType.BLEND
class BlendDescriptor:
def encode_const_field(self):
return Bits(length=8, uint=0x8 | self.idx)
def __init__(self, idx):
self.idx = idx
def __repr__(self):
return '<BlendDescriptor at 0x%x; idx=%d>' % (
id(self), self.idx)
def parse_op(self, clause, reg_file, srcs):
loc_token = srcs.pop(0)
if not isinstance(loc_token, tuple) or loc_token[0] != 'location':
raise ParsingException("Invalid src '%s' (must be loc_token)" % loc_token)
reg_file.disable_port(1)
clause.clause_type = ClauseType.BLEND
try:
reg_file.const_port = self.BlendDescriptor(int(loc_token[1]))
except ValueError as e:
raise ParsingException("Invalid blend descriptor '%s'" % loc_token[1])
return super().parse_op(clause, reg_file, srcs)
def __init__(self):
pass
class TexOpParser(OpParser):
pass
class BranchOpParser(OpParser):
pass
# s/true/True/g
# s://:#:
# s/{ \(0x[0-9a-f]\+\),\s\+\("[^"]\+"\),\s\+ADD\([A-Za-z0-9]\+\)\(,\s\+True\)\?\s\+},/\2: \3Op(\2, \1\4),/g
OP_MAP = {
"MAX.f32" : TwoSrcFmodOpParser("MAX.f32", 0x00000),
"MIN.f32" : TwoSrcFmodOpParser("MIN.f32", 0x02000),
"ADD.f32" : TwoSrcFmodOpParser("ADD.f32", 0x04000),
"FCMP.GL" : FcmpOpParser("FCMP.GL", 0x06000),
"FCMP.D3D" : FcmpOpParser("FCMP.D3D", 0x07000),
"F16_TO_I16" : SrcOpParser("F16_TO_I16", 0x07856, 1),
"F16_TO_U16" : SrcOpParser("F16_TO_U16", 0x07857, 1),
"I16_TO_F16.XX" : SrcOpParser("I16_TO_F16.XX", 0x078c0, 1),
"U16_TO_F16.XX" : SrcOpParser("U16_TO_F16.XX", 0x078c1, 1),
"I16_TO_F16.YX" : SrcOpParser("I16_TO_F16.YX", 0x078c8, 1),
"U16_TO_F16.YX" : SrcOpParser("U16_TO_F16.YX", 0x078c9, 1),
"I16_TO_F16.XY" : SrcOpParser("I16_TO_F16.XY", 0x078d0, 1),
"U16_TO_F16.XY" : SrcOpParser("U16_TO_F16.XY", 0x078d1, 1),
"I16_TO_F16.YY" : SrcOpParser("I16_TO_F16.YY", 0x078d8, 1),
"U16_TO_F16.YY" : SrcOpParser("U16_TO_F16.YY", 0x078d9, 1),
"F32_TO_I32" : SrcOpParser("F32_TO_I32", 0x07936, 1),
"F32_TO_U32" : SrcOpParser("F32_TO_U32", 0x07937, 1),
"I32_TO_F32" : SrcOpParser("I32_TO_F32", 0x07978, 1),
"U32_TO_F32" : SrcOpParser("U32_TO_F32", 0x07979, 1),
"I16_TO_I32.X" : SrcOpParser("I16_TO_I32.X", 0x07998, 1),
"U16_TO_U32.X" : SrcOpParser("U16_TO_U32.X", 0x07999, 1),
"I16_TO_I32.Y" : SrcOpParser("I16_TO_I32.Y", 0x0799a, 1),
"U16_TO_U32.Y" : SrcOpParser("U16_TO_U32.Y", 0x0799b, 1),
"I16_TO_F32.X" : SrcOpParser("I16_TO_F32.X", 0x0799c, 1),
"U16_TO_F32.X" : SrcOpParser("U16_TO_F32.X", 0x0799d, 1),
"I16_TO_F32.Y" : SrcOpParser("I16_TO_F32.Y", 0x0799e, 1),
"U16_TO_F32.Y" : SrcOpParser("U16_TO_F32.Y", 0x0799f, 1),
# take the low 16 bits, and expand it to a 32-bit float
"F16_TO_F32.X" : SrcOpParser("F16_TO_F32.X", 0x079a2, 1),
# take the high 16 bits, ...
"F16_TO_F32.Y" : SrcOpParser("F16_TO_F32.Y", 0x079a3, 1),
"SWZ.YX.v2i16" : SrcOpParser("SWZ.YX.v2i16", 0x07b2b, 1),
"NOP" : SrcOpParser("NOP", 0x07b2c, 1),
"SWZ.XX.v2i16" : SrcOpParser("SWZ.XX.v2i16", 0x07b29, 1),
# Logically, this should be SWZ.XY, but that's equivalent to a move, and
# this seems to be the canonical way the blob generates a MOV.
"MOV" : SrcOpParser("MOV", 0x07b2d, 1),
"SWZ.YY.v2i16" : SrcOpParser("SWZ.YY.v2i16", 0x07b2f, 1),
"FRCP_FREXPE" : SrcOpParser("FRCP_FREXPE", 0x07b8d, 1),
# From the ARM patent US20160364209A1:
# "Decompose v (the input) into numbers x1 and s such that v = x1 * 2^s,
# and x1 is a floating point value in a predetermined range where the
# value 1 is within the range and not at one extremity of the range (e.g.
# choose a range where 1 is towards middle of range)."
#
# This computes s.
"FLOG_FREXPE" : SrcOpParser("FLOG_FREXPE", 0x07bc5, 1),
"CEIL" : SrcOpParser("CEIL", 0x07d45, 1),
"FLOOR" : SrcOpParser("FLOOR", 0x07d85, 1),
"ADD_HIGH32" : SrcOpParser("ADD_HIGH32", 0x07f18, 2),
"LD_ATTR.f16" : LoadAttrOpParser("LD_ATTR.f16", 0x08000, True),
"LD_ATTR.v2f16" : LoadAttrOpParser("LD_ATTR.v2f16", 0x08100, True),
"LD_ATTR.v3f16" : LoadAttrOpParser("LD_ATTR.v3f16", 0x08200, True),
"LD_ATTR.v4f16" : LoadAttrOpParser("LD_ATTR.v4f16", 0x08300, True),
"LD_ATTR.f32" : LoadAttrOpParser("LD_ATTR.f32", 0x08400, True),
"LD_ATTR.v3f32" : LoadAttrOpParser("LD_ATTR.v3f32", 0x08500, True),
"LD_ATTR.v3f32" : LoadAttrOpParser("LD_ATTR.v3f32", 0x08600, True),
"LD_ATTR.v4f32" : LoadAttrOpParser("LD_ATTR.v4f32", 0x08700, True),
"LD_ATTR.i32" : LoadAttrOpParser("LD_ATTR.i32", 0x08800, True),
"LD_ATTR.v3i32" : LoadAttrOpParser("LD_ATTR.v3i32", 0x08900, True),
"LD_ATTR.v3i32" : LoadAttrOpParser("LD_ATTR.v3i32", 0x08a00, True),
"LD_ATTR.v4i32" : LoadAttrOpParser("LD_ATTR.v4i32", 0x08b00, True),
"LD_ATTR.u32" : LoadAttrOpParser("LD_ATTR.u32", 0x08c00, True),
"LD_ATTR.v3u32" : LoadAttrOpParser("LD_ATTR.v3u32", 0x08d00, True),
"LD_ATTR.v3u32" : LoadAttrOpParser("LD_ATTR.v3u32", 0x08e00, True),
"LD_ATTR.v4u32" : LoadAttrOpParser("LD_ATTR.v4u32", 0x08f00, True),
"LD_VAR.32" : VaryingInterpOpParser("LD_VAR.32", 0x0a000, True),
"TEX" : TexCompactOpParser("TEX", 0x0b000, True),
"LOAD.i32" : SrcOpParser("LOAD.i32", 0x0c188, 2, True, ClauseType.SSBO_LOAD),
"LD_UBO.i32" : SrcOpParser("LD_UBO.i32", 0x0c1a0, 2, True, ClauseType.UBO_LOAD),
"LOAD.v2i32" : SrcOpParser("LOAD.v2i32", 0x0c1c8, 2, True, ClauseType.SSBO_LOAD),
"LD_UBO.v2i32" : SrcOpParser("LD_UBO.v2i32", 0x0c1e0, 2, True, ClauseType.UBO_LOAD),
"LOAD.v4i32" : SrcOpParser("LOAD.v4i32", 0x0c208, 2, True, ClauseType.SSBO_LOAD),
# src0 = offset, src1 = binding
"LD_UBO.v4i32" : SrcOpParser("LD_UBO.v4i32", 0x0c220, 2, True, ClauseType.UBO_LOAD),
"STORE.v4i32" : SrcOpParser("STORE.v4i32", 0x0c248, 2, True, ClauseType.SSBO_STORE),
"STORE.i32" : SrcOpParser("STORE.i32", 0x0c588, 2, True, ClauseType.SSBO_STORE),
"STORE.v2i32" : SrcOpParser("STORE.v2i32", 0x0c5c8, 2, True, ClauseType.SSBO_STORE),
"LOAD.u16" : SrcOpParser("LOAD.u16", 0x0c648, 2, True, ClauseType.UBO_LOAD), # zero-extends
"LOAD.v3i32" : SrcOpParser("LOAD.v3i32", 0x0ca88, 2, True, ClauseType.SSBO_LOAD),
"LD_UBO.v3i32" : SrcOpParser("LD_UBO.v3i32", 0x0caa0, 2, True, ClauseType.UBO_LOAD),
"STORE.v3i32" : SrcOpParser("STORE.v3i32", 0x0cb88, 2, True, ClauseType.SSBO_STORE),
# Does not exist on G71 (added to G51, G72, and everything after)
"FRCP_FAST.f32" : SrcOpParser("FRCP_FAST.f32", 0x0cc00, 1),
# Produce appropriate scale
# Given a floating point number m * 2^e, produces a table-based
# approximation of 2/m using the top 17 bits. Includes special cases for
# infinity, NaN, and zero, and copies the sign bit.
"FRCP_TABLE" : SrcOpParser("FRCP_TABLE", 0x0ce00, 1),
# Used in the argument reduction for log.
# See the ARM patent for more information.
"FRCP_APPROX" : SrcOpParser("FRCP_APPROX", 0x0ce60, 1),
"SIN_TABLE" : SrcOpParser("SIN_TABLE", 0x0cf50, 1),
"COS_TABLE" : SrcOpParser("COS_TABLE", 0x0cf51, 1),
"FLOG2_TABLE" : SrcOpParser("FLOG2_TABLE", 0x0cf60, 1),
"FLOGE_TABLE" : SrcOpParser("FLOGE_TABLE", 0x0cf64, 1),
"BRANCH" : BranchOpParser("BRANCH", 0x0d000),
"SEL.XX.i16" : SrcOpParser("SEL.XX.i16", 0x0ea60, 2),
"SEL.XY.i16" : SrcOpParser("SEL.XY.i16", 0x0ea70, 2),
"SEL.YX.i16" : SrcOpParser("SEL.YX.i16", 0x0ea68, 2),
"SEL.YY.i16" : SrcOpParser("SEL.YY.i16", 0x0ea78, 2),
"F32_TO_F16" : SrcOpParser("F32_TO_F16", 0x0ec00, 2),
"ICMP.GL.GT" : SrcOpParser("ICMP.GL.GT", 0x0f640, 2), # src0 > src1 ? 1 : 0
"ICMP.GL.GE" : SrcOpParser("ICMP.GL.GE", 0x0f648, 2),
"UCMP.GL.GT" : SrcOpParser("UCMP.GL.GT", 0x0f650, 2),
"UCMP.GL.GE" : SrcOpParser("UCMP.GL.GE", 0x0f658, 2),
"ICMP.GL.EQ" : SrcOpParser("ICMP.GL.EQ", 0x0f660, 2),
"ICMP.D3D.GT" : SrcOpParser("ICMP.D3D.GT", 0x0f6c0, 2), # src0 > src1 ? ~0 : 0
"ICMP.D3D.GE" : SrcOpParser("ICMP.D3D.GE", 0x0f6c8, 2),
"UCMP.D3D.GT" : SrcOpParser("UCMP.D3D.GT", 0x0f6d0, 2),
"UCMP.D3D.GE" : SrcOpParser("UCMP.D3D.GE", 0x0f6d8, 2),
"ICMP.D3D.EQ" : SrcOpParser("ICMP.D3D.EQ", 0x0f6e0, 2),
"MAX.v2f16" : TwoSrcFmod16CommutativeOpParser("MAX.v2f16", 0x10000),
"MIN.v2f16" : TwoSrcFmod16CommutativeOpParser("MIN.v2f16", 0x12000),
"ADD.v2f16" : TwoSrcFmod16OpParser("ADD.v2f16", 0x14000),
"FCMP.D3D" : Fcmp16OpParser("FCMP.D3D", 0x17000),
"ADD.i32" : SrcOpParser("ADD.i32", 0x178c0, 2),
"ADD.v2i16" : SrcOpParser("ADD.v2i16", 0x17900, 2),
"SUB.i32" : SrcOpParser("SUB.i32", 0x17ac0, 2),
"ADDC.i32" : SrcOpParser("ADDC.i32", 0x17c10, 2), # adds src0 to the bottom bit of src1
"ADD.i32.i16.X" : SrcOpParser("ADD.i32.i16.X", 0x17d80, 2),
"ADD.i32.u16.X" : SrcOpParser("ADD.i32.u16.X", 0x17d90, 2),
"ADD.i32.i16.Y" : SrcOpParser("ADD.i32.i16.Y", 0x17dc0, 2),
"ADD.i32.u16.Y" : SrcOpParser("ADD.i32.u16.Y", 0x17dd0, 2),
# Compute varying address and datatype (for storing in the vertex shader),
# and store the vec3 result in the data register. The result is passed as
# the 3 normal arguments to ST_VAR.
"LD_VAR_ADDR.f16" : VaryingAddressOpParser("LD_VAR_ADDR.f16", 0x18000, True),
"LD_VAR_ADDR.f32" : VaryingAddressOpParser("LD_VAR_ADDR.f32", 0x18100, True),
"LD_VAR_ADDR.i32" : VaryingAddressOpParser("LD_VAR_ADDR.i32", 0x18200, True),
"LD_VAR_ADDR.u32" : VaryingAddressOpParser("LD_VAR_ADDR.u32", 0x18300, True),
# Implements alpha-to-coverage, as well as possibly the late depth and
# stencil tests. The first source is the existing sample mask in R60
# (possibly modified by gl_SampleMask), and the second source is the alpha
# value. The sample mask is written right away based on the
# alpha-to-coverage result using the normal register write mechanism,
# since that doesn't need to read from any memory, and then written again
# later based on the result of the stencil and depth tests using the
# special register.
"ATEST.f32" : ATestOpParser("ATEST.f32", 0x191e8, 2, True),
"ATEST.X.f16" : ATestOpParser("ATEST.X.f16", 0x191f0, 2, True),
"ATEST.Y.f16" : ATestOpParser("ATEST.Y.f16", 0x191f8, 2, True),
# store a varying given the address and datatype from LD_VAR_ADDR
"ST_VAR.v1" : SrcOpParser("ST_VAR.v1", 0x19300, 3, True, ClauseType.SSBO_STORE),
"ST_VAR.v2" : SrcOpParser("ST_VAR.v2", 0x19340, 3, True, ClauseType.SSBO_STORE),
"ST_VAR.v3" : SrcOpParser("ST_VAR.v3", 0x19380, 3, True, ClauseType.SSBO_STORE),
"ST_VAR.v4" : SrcOpParser("ST_VAR.v4", 0x193c0, 3, True, ClauseType.SSBO_STORE),
# This takes the sample coverage mask (computed by ATEST above) as a
# regular argument, in addition to the vec4 color in the special register.
"BLEND" : BlendingOpParser(),
"LD_VAR.16" : VaryingInterpOpParser("LD_VAR.16", 0x1a000, True),
"TEX" : TexOpParser("TEX", 0x1ae60, True),
"RSHIFT_NAND.i32" : SrcOpParser("RSHIFT_NAND.i32", 0x1c000, 3),
"RSHIFT_OR.i32" : SrcOpParser("RSHIFT_OR.i32", 0x1c300, 3),
"RSHIFT_AND.i32" : SrcOpParser("RSHIFT_AND.i32", 0x1c400, 3),
"RSHIFT_NOR.i32" : SrcOpParser("RSHIFT_NOR.i32", 0x1c700, 3),
"LSHIFT_NAND.i32" : SrcOpParser("LSHIFT_NAND.i32", 0x1c800, 3),
"LSHIFT_OR.i32" : SrcOpParser("LSHIFT_OR.i32", 0x1cb00, 3),
"LSHIFT_AND.i32" : SrcOpParser("LSHIFT_AND.i32", 0x1cc00, 3),
"LSHIFT_NOR.i32" : SrcOpParser("LSHIFT_NOR.i32", 0x1cf00, 3),
"RSHIFT_XOR.i32" : SrcOpParser("RSHIFT_XOR.i32", 0x1d000, 3),
"RSHIFT_XNOR.i32" : SrcOpParser("RSHIFT_XNOR.i32", 0x1d100, 3),
"LSHIFT_XOR.i32" : SrcOpParser("LSHIFT_XOR.i32", 0x1d200, 3),
"LSHIFT_XNOR.i32" : SrcOpParser("LSHIFT_XNOR.i32", 0x1d300, 3),
"LSHIFT_ADD.i32" : SrcOpParser("LSHIFT_ADD.i32", 0x1d400, 3),
"LSHIFT_SUB.i32" : SrcOpParser("LSHIFT_SUB.i32", 0x1d500, 3),
"LSHIFT_RSUB.i32" : SrcOpParser("LSHIFT_RSUB.i32", 0x1d500, 3),
"RSHIFT_ADD.i32" : SrcOpParser("RSHIFT_ADD.i32", 0x1d700, 3),
"RSHIFT_SUB.i32" : SrcOpParser("RSHIFT_SUB.i32", 0x1d800, 3),
"RSHIFT_RSUB.i32" : SrcOpParser("RSHIFT_RSUB.i32", 0x1d900, 3),
"ARSHIFT_ADD.i32" : SrcOpParser("ARSHIFT_ADD.i32", 0x1da00, 3),
"ARSHIFT_SUB.i32" : SrcOpParser("ARSHIFT_SUB.i32", 0x1db00, 3),
"ARSHIFT_RSUB.i32" : SrcOpParser("ARSHIFT_RSUB.i32", 0x1dc00, 3),
"OR.i32" : SrcOpParser("OR.i32", 0x1dd18, 2),
"AND.i32" : SrcOpParser("AND.i32", 0x1dd20, 2),
"LSHIFT.i32" : SrcOpParser("LSHIFT.i32", 0x1dd60, 2),
"XOR.i32" : SrcOpParser("XOR.i32", 0x1dd50, 2),
"RSHIFT.i32" : SrcOpParser("RSHIFT.i32", 0x1dd80, 2),
"ARSHIFT.i32" : SrcOpParser("ARSHIFT.i32", 0x1dda0, 2),
}
class Parser:
INLINE_COMMENT = re.compile(r'/\*[^\(\*/\)]*\*/')
COOKIE = re.compile(r'^([A-Z0-9_]{4}|\t+.*)$')
def __init__(self, asm_file, verbose):
self._file = asm_file
self._verbose = verbose
@property
def lines(self):
for i, s in enumerate(self._file.readlines()):
# Skip comments and cookies
if s.strip().startswith('#') or self.COOKIE.match(s):
if self._verbose >= 3:
print('%d*: %s' % (i, s.rstrip()))
continue
if self._verbose >= 2:
print('%d%s: %s' % (
i, ' ' if self._verbose >= 3 else '', s.rstrip()))
# Strip in-line comments /* like this */
s = self.INLINE_COMMENT.sub('', s).rstrip()
if s != '':
self.last_line = i, s
yield (i+1, s)
class UniformToken:
def __init__(self, idx, high32):
self.idx = idx
self.high32 = high32
@property
def canonical_idx(self):
return (self.idx * 2) + int(self.high32)
def __repr__(self):
return '<UniformToken %d (%s)>' % (
self.canonical_idx, 'High32' if self.high32 else 'Low32')
class ImmediateToken:
class ReadType(Enum):
FULL32 = None
LOW64 = 'x'
HIGH64 = 'y'
def __init__(self, value, read_type):
self.value = value
self.read_type = read_type
@property
def bitlen(self):
if self.read_type == self.ReadType.FULL32:
return 32
else:
return 64
def __eq__(self, other):
if not isinstance(other, type(self)):
return False
return self.bitlen == other.bitlen and self.value == other.value
def __repr__(self):
return '<ImmediateToken at 0x%x 0x%x; type=%s>' % (
id(self), self.value, self.read_type.name)
class OpResult(Enum):
PREV_FMA = 'T0'
PREV_ADD = 'T1'
THIS_FMA = 'T'
def encode_src(self):
if self == OpResult.PREV_FMA:
field_val = 6
elif self == OpResult.PREV_ADD:
field_val = 7
elif self == OpResult.THIS_FMA:
field_val = 3
return bitstring.pack('uint:3', field_val)
class RegisterToken:
"""
A token for a register that hasn't been assigned to a RegisterFile yet
"""
def __init__(self, idx):
self.idx = idx
def __repr__(self):
return '<RegisterToken for R%d at %s>' % (self.idx, hex(id(self)))
def __eq__(self, other):
return self.idx == other.idx
class RegisterFile:
class Register:
def __init__(self, idx, write_stage=None):
self.idx = idx
self.write_stage = write_stage
self.port = None
def encode_src(self):
if self.port == 0:
field_val = 0
elif self.port == 1:
field_val = 1
elif self.port == 3:
field_val = 2
return bitstring.pack('uint:3', field_val)
def __repr__(self):
return '<Register %d %sat 0x%x>' % (
self.idx,
'P%d ' % self.port if self.port is not None else '',
id(self)
)
class ControlField(IntEnum):
WRITE_FMA_P2 = 1
WRITE_FMA_P2_READ_P3 = 3
READ_P3 = 4
WRITE_ADD_P2 = 5
WRITE_ADD_P2_READ_P3 = 6
FIRST_NONE = 8
FIRST_WRITE_FMA_P2 = 9
NONE = 11
FIRST_READ_P3 = 12
FIRST_WRITE_ADD_P2 = 13
WRITE_ADD_P2_FMA_P3 = 15
class NotEnoughPorts(ParsingException):
def __init__(self, type_, target):
super().__init__('Not enough %s ports for %s' % (type_, target))
class ConstPortInUse(ParsingException):
def __init__(self, port_current, value):
super().__init__("Can't use const port for reading %s, already in use by %s" % (
value, port_current))
class DisabledPort:
def __bool__(self):
return False
def __repr__(self):
return "<DisabledPort>"
def __init__(self):
# P0/1 are read only, P2 is write only, P3 is read/write
self.ports = [None]*4
self.read_regs = dict()
self.write_regs = dict()
self.__const_port = None
def __repr__(self):
return '<RegisterFile at 0x%x; ports=%s, const_port=%s>' % (
id(self), self.ports, self.const_port)
@property
def const_port(self):
return self.__const_port
@const_port.setter
def const_port(self, value):
if self.__const_port == value:
return
elif self.__const_port is not None:
if isinstance(self.__const_port, Instruction.PendingImmediateSlot):
if (not isinstance(value, ImmediateSlot) and
value is not ImmediateZeroSlot):
raise self.ConstPortInUse(self.__const_port, value)
else:
raise self.ConstPortInUse(self.__const_port, value)
self.__const_port = value
def port_assignment_possible(self):
read_cnt = len(self.read_regs)
if read_cnt > 3:
return False
write_cnt = len(self.write_regs)
if write_cnt > 2:
return False
return read_cnt + write_cnt <= 4
def add_reg_read(self, idx):
if idx in self.read_regs:
return self.read_regs[idx]
reg = self.Register(idx)
self.read_regs[idx] = reg
if not self.port_assignment_possible():
raise self.NotEnoughPorts('read', reg)
return reg
def add_reg_write(self, idx, stage):
if idx in self.write_regs:
return self.write_regs[idx]
reg = self.Register(idx, stage)
self.write_regs[idx] = reg
if not self.port_assignment_possible():
raise self.NotEnoughPorts('write', reg)
return reg
def _set_reg_port(self, port, reg):
if isinstance(self.ports[port], self.DisabledPort):
# Remember: we can only disable ports 1 and 0 which are both read
raise self.NotEnoughPorts('read', reg)
assert self.ports[port] is None
reg.port = port
self.ports[port] = reg
def disable_port(self, port):
assert port < 2
if self.ports[port] is not None:
raise ParsingException("Port %d needs to be disabled for this instruction, but it's being used for reading register %d" % (
port, self.ports[port].idx))
self.ports[port] = self.DisabledPort()
def assign_reg_ports(self):
read_regs = list(self.read_regs.values())
write_regs = list(self.write_regs.values())
if len(read_regs) == 3:
self._set_reg_port(3, read_regs.pop())
elif len(write_regs) == 2:
for reg in write_regs:
if reg.write_stage is add:
port = 2
else:
port = 3
self._set_reg_port(port, reg)
elif len(write_regs) == 1:
self._set_reg_port(2, write_regs[0])
if len(read_regs) == 2:
if read_regs[0].idx < read_regs[1].idx:
self._set_reg_port(0, read_regs.pop(0))
else:
self._set_reg_port(0, read_regs.pop(1))
self._set_reg_port(1, read_regs.pop())
elif len(read_regs) == 1:
self._set_reg_port(0, read_regs.pop())
def encode(self, instruction):
const_port = self.const_port
if const_port is None:
const_field = Bits(length=8, uint=0)
elif isinstance(const_port, Bits):
const_field = const_port
else:
const_field = const_port.encode_const_field()
# Figure out the value of the control field
if instruction.writes[fma]:
if instruction.writes[add]:
control_field = self.ControlField.WRITE_ADD_P2_FMA_P3
elif self.ports[3]:
control_field = self.ControlField.WRITE_FMA_P2_READ_P3
elif instruction.first:
control_field = self.ControlField.FIRST_WRITE_FMA_P2
else:
control_field = self.ControlField.WRITE_FMA_P2
elif instruction.writes[add]:
assert instruction.writes[add].port == 2
if self.ports[3]:
control_field = self.ControlField.WRITE_ADD_P2_READ_P3
elif instruction.first:
control_field = self.ControlField.FIRST_WRITE_ADD_P2
else:
control_field = self.ControlField.WRITE_ADD_P2
elif self.ports[3]:
if instruction.first:
control_field = self.ControlField.FIRST_READ_P3
else:
control_field = self.ControlField.READ_P3
elif instruction.first:
control_field = self.ControlField.FIRST_NONE
else:
control_field = self.ControlField.NONE
port_fields = self.ports.copy()
for idx, port in enumerate(port_fields):
if port:
port_fields[idx] = port.idx
else:
port_fields[idx] = 0
# When not using port 3 for reading or writing, the compiler always
# seems to set it's port index to the same value as port 2's index...
if not self.ports[3]:
port_fields[3] = port_fields[2]
# ...and vice versa.
if not self.ports[2]:
port_fields[2] = port_fields[3]
if self.ports[1]:
if port_fields[0] > 31:
# We made sure when assigning ports that port_fields[1] >
# port_fields[0]. By subtracting both from 63, we will have
# that port_fields[0] > port_fields[1], which will cause the
# decoder to make the exact opposite transform and get back the
# original register indices. Also, the new port_fields[0] will
# be less than 31, so it will fit in 5 bits.
for port, idx in enumerate(port_fields[0:1]):
port_fields[idx] = 63 - port_fields[idx]
return bitstring.pack('uint:4, uint:6, uint:5, uint:6, uint:6, bits:8',
control_field,
port_fields[1],
port_fields[0],
port_fields[3],
port_fields[2],
const_field)
else:
return bitstring.pack('uint:4=0, uint:4, bool, uint:6, uint:6, uint:6, bits:8',
control_field,
not bool(self.ports[0]),
port_fields[0],
port_fields[3],
port_fields[2],
const_field)
class ConstantSrc:
def __init__(self, high32):
self.high32 = high32
def encode_src(self):
return Bits(length=3, uint=5 if self.high32 else 4)
def __repr__(self):
return '<ConstantSrc (%s) at 0x%x>' % (
'high' if self.high32 else 'low', id(self))
class ImmediateZeroSlot:
@classmethod
def encode_const_field(cls):
return Bits(length=8, uint=0)
@classmethod
def get_src(cls, token):
assert token.read_type != ImmediateToken.ReadType.FULL32
return ConstantSrc(token.read_type == ImmediateToken.ReadType.HIGH64)
class ImmediateSlot:
""" A slot that can hold one 64 bit const, or two 32 bit consts. """
IDX_MAP = (4, 5, 6, 7, 2, 3)
def __init__(self, contents, idx):
assert len(contents) <= 2
self.idx = idx
self.contents = contents
def __contains__(self, item):
return all(i in self.contents for i in tuple(item))
def get_src(self, token):
if token.read_type == ImmediateToken.ReadType.FULL32:
return ConstantSrc(self.contents.index(token) == 1)
else:
return ConstantSrc(token.read_type == ImmediateToken.ReadType.HIGH64)
def encode_contents(self):
return sum(Bits(length=t.bitlen, uint=t.value) for t in
reversed(self.contents))
def encode_const_field(self):
encoded = BitArray(8)
encoded[1:4] = self.IDX_MAP[self.idx]
encoded[4:8] = self.encode_contents()[60:64]
return encoded
def __repr__(self):
return "<ImmediateSlot #%d at 0x%x; contents=%s>" % (
self.idx, id(self), self.contents)
class Uniform:
def __init__(self, idx):
self.idx = idx
def get_src(self, token):
if token.idx != self.idx:
raise ParsingException("Can't read from uniform %d, const port already being used for uniforms %s" % (
token.canonical_idx, self.canonical_idx_str()))
return ConstantSrc(token.high32)
def encode_const_field(self):
return Bits(length=8, uint=0x80 | self.idx)
def canonical_idx_str(self):
canonical_idx = self.idx * 2
return "%d&%d" % (canonical_idx, canonical_idx + 1)
def __repr__(self):
return "<Uniform %s at 0x%x>" % (self.canonical_idx_str(), id(self))
class Instruction:
BIT_LENGTH = 78
class ImmediateCountError(ParsingException):
def __init__(self, token):
if token.read_type == ImmediateToken.ReadType.FULL32:
mod_str = ""
else:
mod_str = ".%s" % self.read_type.value
super().__init__("No space left for immediate 0x%x%s" % (token.value,
mod_str))
class PendingImmediateSlot:
def __init__(self):
self.__contents = []
def __iter__(self):
return self.__contents.__iter__()
def __len__(self):
return self.__contents.__len__()
def __repr__(self):
return '<PendingImmediateSlot at 0x%x; contents=%s>' % (
id(self), self.__contents)
@property
def contents(self):
return self.__contents
@property
def bitlen(self):
return sum(t.bitlen for t in self.contents)
def add_immediate(self, token):
if token in self.__contents:
return
if self.bitlen + token.bitlen > 64:
raise ParsingException('Too many constants for one instruction cycle')
self.__contents.append(token)
def __init__(self, first):
self.first = first
self.reg_file = RegisterFile()
self.fma = None
self.add = None
self.writes = {fma: None, add: None}
def __repr__(self):
return '<Instruction at 0x%x; fma=%s, add=%s, first=%s, writes=%s, reg_file=%s>' % (
id(self), self.fma, self.add, self.first, self.writes,
self.reg_file
)
def add_pending_immediate(self, token):
"""
Add a pending immediate token this instruction and check that we
will still be able to encode this instruction.
"""
pending_slot = self.reg_file.const_port
if not self.has_pending_immediates():
pending_slot = self.PendingImmediateSlot()
self.reg_file.const_port = pending_slot
pending_slot.add_immediate(token)
def resolve_immediates(self, clause, slot=None):
if not slot:
slot = clause._get_immediate_slot(self.reg_file.const_port.contents)
self.reg_file.const_port = slot
# Replace ImmediateTokens in our src list with proper immediate
# sources
for stage in self.stages:
for idx, src in enumerate(stage.srcs):
if isinstance(src, ImmediateToken):
stage.srcs[idx] = slot.get_src(src)
def has_pending_immediates(self):
return isinstance(self.reg_file.const_port,
self.PendingImmediateSlot)
def add_uniform(self, token):
const_port = self.reg_file.const_port
if not isinstance(const_port, Uniform):
const_port = Uniform(token.idx)
self.reg_file.const_port = const_port
return const_port.get_src(token)
@property
def pending_stage(self):
if self.fma is None:
return fma
elif self.add is None:
return add
@property
def stages(self):
return (self.fma, self.add)
def encode(self):
return ( self.add.encode()
+ self.fma.encode()
+ self.reg_file.encode(self))
class Clause:
MAX_CONSTS_ALLOWED = 5 # FIXME: maybe make this 6 later?
QUADWORD_FORMATS = {
# (instructions, constants): pos
(1,0): 0x0,
(2,0): 0x1,
(4,0): 0x2,
(3,1): 0x3,
(5,1): 0x4,
(4,2): 0x5,
(7,0): 0x6,
(6,1): 0x7,
(5,3): 0x8,
(8,1): 0x9,
(7,2): 0xa,
(6,3): 0xb,
(8,3): 0xc,
(7,4): 0xd
}
def _header_flag(func):
old_func = func
def func(self, value):
if value is None:
raise ParsingException("Missing arguments for flag")
old_func(self, value)
return func
def _bool_header_flag(func):
old_func = func
def func(self, unused):
if unused is not None:
raise ParsingException("This flag takes no arguments")
old_func(self)
return func
class ClauseIdError(ParsingException):
def __init__(self, id_str, msg=None):
super().__init__("Invalid clause id '%s'%s" % (
id_str, ': %s' % msg if msg else ''
))
@_header_flag
def _parse_header_id(self, id_):
if id_[-1] != 'u':
raise self.ClauseIdError(id_)
try:
id_ = int(id_[:-1])
except ValueError as e:
raise self.ClauseIdError(id_) from e
if id_ < 0 or id_ > 5:
raise self.ClauseIdError(id_, "ID must be between 0 and 5")
self.scoreboard_entry = id_
@_header_flag
def _parse_header_next_wait(self, next_wait):
for id_ in next_wait.split(','):
try:
id_ = int(id_.strip())
except ValueError as e:
raise self.ClauseIdError(id_) from e
if id_ < 0 or id_ > 7:
raise self.ClauseIdError(id_, "ID must be between 0 and 7")
self.scoreboard_deps.append(id_)
@_bool_header_flag
def _parse_header_data_reg_barrier(self):
self.data_reg_write_barrier = True
@_bool_header_flag
def _parse_header_eos(self):
self.end_of_shader = True
@_bool_header_flag
def _parse_header_nbb(self):
self.back_to_back = False
def _validate_header_nbb(self):
if not hasattr(self, 'branch_conditional'):
raise ParsingException(
'Neither branch-cond or branch-uncond specified')
@_bool_header_flag
def _parse_header_we(self):
self.elide_writes = True
class BackToBackError(ParsingException):
def __init__(self):
super().__init__("Flag isn't applicable without nbb")
@_bool_header_flag
def _parse_header_branch_cond(self):
self.branch_conditional = True
def _validate_header_branch_cond(self):
if self.back_to_back:
raise self.BackToBackError()
if not self.branch_conditional:
raise ParsingException("Can't use this flag with branch-uncond")
@_bool_header_flag
def _parse_header_branch_uncond(self):
self.branch_conditional = False
def _validate_header_branch_uncond(self):
if self.back_to_back:
raise self.BackToBackError()
if self.branch_conditional:
raise ParsingException("Can't use this flag with branch-cond")
@_bool_header_flag
def _parse_header_unk0(self):
self.unk0 = True
@_header_flag
def _parse_header_unk1(self, value):
try:
value = int(value)
if value.bit_length() > 2:
raise ParsingException("Invalid value (known bitlength is 2)")
except ValueError as e:
raise ParsingException("Must be a value between 0-3") from e
self.unk1 = value
@_bool_header_flag
def _parse_header_unk3(self):
self.unk3 = True
@_header_flag
def _parse_header_unk4(self):
raise NotImplemented("it's 11 bits, the disasm doesn't have a format for this yet")
def _parse_header_flag(self, flag, value):
"""
Try looking up the parser for a given header flag and parse it
"""
try:
func_name = '_parse_header_%s' % flag.replace('-', '_')
getattr(self, func_name)(value)
except AttributeError as e:
raise ParsingException("Unknown flag '%s' in clause header" % flag) from e
except ParsingException as e:
raise ParsingException("Invalid value for flag '%s': %s" % (flag, e.msg)) from e
def _validate_header_flag(self, flag):
"""
Try looking up the validator function for a given header flag. If there
is one, run the validator to ensure that the header flag and it's value
are still valid after all of the other flags have been parsed
"""
func_name = '_validate_header_%s' % flag.replace('-', '_')
if not hasattr(self, func_name):
return
try:
getattr(self, func_name)()
except ParsingException as e:
raise ParsingException("Invalid use of flag '%s': %s" % (flag, e.msg)) from e
def _parse_header(self, header):
flags = set()
for flag, value in header.items():
flags.add(flag)
self._parse_header_flag(flag, value)
for flag in flags:
self._validate_header_flag(flag)
def encode_scoreboard_deps(self):
encoded = 0
for dep in self.scoreboard_deps:
encoded |= 1 << dep
return Bits(length=8, uint=encoded)
def encode_header(self):
return Bits(bitstring.pack(
"""
bool=unk4,
uint:4=next_clause_type,
bool=unk3,
uint:4=clause_type,
uint:3=scoreboard_index,
bits:8=scoreboard_deps,
uint:6=data_reg,
bool=data_reg_write_barrier,
bool=branch_cond,
bool=elide_writes,
uint:2=unk1,
bool=no_end_of_shader,
bool=back_to_back,
uint:11=unk0
""",
unk4=self.unk4,
next_clause_type=self.next_clause_type,
unk3=self.unk3,
clause_type=self.clause_type,
scoreboard_index=self.scoreboard_entry if self.scoreboard_entry else 0,
scoreboard_deps=self.encode_scoreboard_deps(),
data_reg=self.data_reg if self.data_reg else 0,
data_reg_write_barrier=self.data_reg_write_barrier,
branch_cond=True if self.back_to_back else self.branch_conditional,
elide_writes=self.elide_writes,
unk1=self.unk1,
no_end_of_shader=not self.end_of_shader,
back_to_back=self.back_to_back,
unk0=0 # TODO
))
@property
def data_reg(self):
return self.__data_reg
@data_reg.setter
def data_reg(self, val):
if self.__data_reg is not None and val != self.__data_reg:
raise ParsingException(("Only one data register allowed (already "
"had R%d)") % self.__data_reg)
self.__data_reg = val
@property
def clause_type(self):
return self.__clause_type
@clause_type.setter
def clause_type(self, value):
assert isinstance(self.__clause_type, ClauseType)
if (self.__clause_type is not ClauseType.NONE and
self.__clause_type != value):
raise ParsingException(
("Would need the clause's instruction type to be %s, but the"
" instruction type is already %s") % (
self.__clause_type.name, value.name)
)
self.__clause_type = value
def is_first_instruction(self):
return len(self.instructions) == 0
def is_finished(self):
return not hasattr(self, '__pending_inst')
def _get_immediate_slot(self, inst_reads):
"""
Find an immediate slot that fulfills the reads of the given
instruction. If there isn't one, create one
"""
for slot in self.immediate_slots:
if inst_reads in slot:
return slot
new_slot = None
if inst_reads[0].bitlen == 32 and len(inst_reads) == 2:
# No slot shares the exact same contents as ours. We can still
# however, reuse the immediate slot of another instruction if the
# slot is still pending, and it's bottom 32 bits contain one of the
# two 32 bit immediates in our own slots
for inst in clause.instructions:
if not inst.has_pending_immediates():
continue
pending_slot = inst.reg_file.const_port
bottom_half = pending_slot.contents[0]
assert bottom_half.bitlen != 64
if bottom_half not in inst_reads:
continue
inst_reads.remove(bottom_half)
new_slot = ImmediateSlot((bottom_half, inst_reads[0]),
len(self.immediate_slots))
inst.resolve_immediates(self, new_slot)
break
if not new_slot:
new_slot = ImmediateSlot(inst_reads, len(self.immediate_slots))
self.immediate_slots.append(new_slot)
return new_slot
def get_pending_instruction(self):
if not self.__pending_inst:
self.__pending_inst = Instruction(self.is_first_instruction())
return self.__pending_inst
def add_instruction_stage(self, op, dst, srcs):
"""
Begin/finish building an instruction based off the the given tokens
"""
if len(self.instructions) > 8:
raise ParsingException("Clauses can only contain up to 8 instructions")
inst = self.__pending_inst
for i, src in enumerate(srcs):
if isinstance(src, RegisterToken):
srcs[i] = inst.reg_file.add_reg_read(src.idx)
elif isinstance(src, ImmediateToken):
inst.add_pending_immediate(src)
elif isinstance(src, UniformToken):
srcs[i] = inst.add_uniform(src)
stage = inst.pending_stage
parser = stage.OP_MAP[op]
if stage is fma:
# Each instruction contains the writes for the previous instruction
# in it's register file, so add any pending writes from the previous
# instruction that we have into our register file
for prev_stage, write in self.__pending_writes.items():
if write:
inst.writes[prev_stage] = \
inst.reg_file.add_reg_write(write.idx, prev_stage)
self.__pending_writes = {fma: None, add: None}
inst.fma = parser.parse_op(self, inst.reg_file, srcs)
else:
inst.add = parser.parse_op(self, inst.reg_file, srcs)
# We attempt to match what the compiler does here. Basically: If
# this instruction uses enough immediates to fill an entire
# immediate slot, we resolve it's immediates immediately. If
# there's space for one more immediate, we wait until the end of
# the clause to resolve the immediates.
# The only exception we make to this is if we can use the special
# immediate zero slot.
if inst.has_pending_immediates():
pending = inst.reg_file.const_port
# FIXME: it looks like the compiler makes some interesting
# decisions regarding where it puts dummy clauses, for some
# reason consts 8/9 are swapped with consts 10/11 when the
# last instruction is the one reusing the same constant twice