Commit e67e0726 authored by Alyssa Rosenzweig's avatar Alyssa Rosenzweig 💜
Browse files

panfrost: Implement Midgard shader toolchain



This patch implements the free Midgard shader toolchain: the assembler,
the disassembler, and the NIR-based compiler. The assembler is a
standalone inaccessible Python script for reference purposes. The
disassembler and the compiler are implemented in C, accessible via the
standalone `midgard_compiler` binary. Later patches will use these
interfaces from the driver for online compilation.
Signed-off-by: Alyssa Rosenzweig's avatarAlyssa Rosenzweig <alyssa@rosenzweig.io>
Acked-by: Jason Ekstrand's avatarJason Ekstrand <jason@jlekstrand.net>
Acked-by: Rob Clark's avatarRob Clark <robdclark@gmail.com>
Acked-by: Emma Anholt's avatarEric Anholt <eric@anholt.net>
Acked-by: Emil Velikov's avatarEmil Velikov <emil.velikov@collabora.com>
parent 61d3ae6e
......@@ -23,6 +23,10 @@ files_panfrost = files(
'pan_public.h',
'pan_screen.c',
'pan_screen.h',
'midgard/midgard_compile.c',
'midgard/cppwrap.cpp',
'midgard/disassemble.c',
)
inc_panfrost = [
......@@ -32,12 +36,25 @@ inc_panfrost = [
inc_drm_uapi,
inc_include,
inc_src,
include_directories('include')
include_directories('include'),
include_directories('midgard'),
]
midgard_nir_algebraic_c = custom_target(
'midgard_nir_algebraic.c',
input : 'midgard/midgard_nir_algebraic.py',
output : 'midgard_nir_algebraic.c',
command : [
prog_python, '@INPUT@',
'-p', join_paths(meson.source_root(), 'src/compiler/nir/'),
],
capture : true,
depend_files : nir_algebraic_py,
)
libpanfrost = static_library(
'panfrost',
[files_panfrost],
[files_panfrost, midgard_nir_algebraic_c],
dependencies: [
dep_thread,
idep_nir
......@@ -50,3 +67,26 @@ driver_panfrost = declare_dependency(
compile_args : ['-DGALLIUM_PANFROST', '-Wno-pointer-arith'],
link_with : [libpanfrost, libpanfrostwinsys],
)
files_midgard = files(
'midgard/midgard_compile.c',
'midgard/cppwrap.cpp',
'midgard/disassemble.c',
'midgard/cmdline.c',
)
midgard_compiler = executable(
'midgard_compiler',
[files_midgard, midgard_nir_algebraic_c],
include_directories : inc_panfrost,
dependencies : [
dep_thread,
idep_nir
],
link_with : [
libgallium,
libglsl_standalone,
libmesa_util
],
build_by_default : true
)
"""
Copyright (C) 2018 Alyssa Rosenzweig
Copyright (c) 2013 Connor Abbott (connor@abbott.cx)
Permission is hereby granted, free of charge, to any person obtaining a copy
of this software and associated documentation files (the "Software"), to deal
in the Software without restriction, including without limitation the rights
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
copies of the Software, and to permit persons to whom the Software is
furnished to do so, subject to the following conditions:
The above copyright notice and this permission notice shall be included in
all copies or substantial portions of the Software.
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
THE SOFTWARE.
"""
import sys
import pprint
import struct
program = []
# Definitions from cwabbott's tools
t6xx_alu_ops = {
"fadd": 0x10,
"fmul": 0x14,
"fmin": 0x28,
"fmax": 0x2C,
"fmov": 0x30,
"ffloor": 0x36,
"fceil": 0x37,
"fdot3": 0x3C,
"fdot3r": 0x3D,
"fdot4": 0x3E,
"freduce": 0x3F,
"iadd": 0x40,
"isub": 0x46,
"imul": 0x58,
"imov": 0x7B,
"feq": 0x80,
"fne": 0x81,
"flt": 0x82,
"fle": 0x83,
"f2i": 0x99,
"f2u8": 0x9C,
"u2f": 0xBC,
"ieq": 0xA0,
"ine": 0xA1,
"ilt": 0xA4,
"ile": 0xA5,
"iand": 0x70,
"ior": 0x71,
"inot": 0x72,
"iandnot": 0x74,
"ixor": 0x76,
"ball": 0xA9,
"bany": 0xB1,
"i2f": 0xB8,
"csel": 0xC5,
"fatan_pt2": 0xE8,
"frcp": 0xF0,
"frsqrt": 0xF2,
"fsqrt": 0xF3,
"fexp2": 0xF4,
"flog2": 0xF5,
"fsin": 0xF6,
"fcos": 0xF7,
"fatan2_pt1": 0xF9,
}
t6xx_alu_bits = {
"vmul": 17,
"sadd": 19,
"vadd": 21,
"smul": 23,
"lut": 25,
"br": 26,
"branch": 27,
"constants": 32
}
t6xx_alu_size_bits = {
"vmul": 48,
"sadd": 32,
"vadd": 48,
"smul": 32,
"lut": 48,
"br": 16,
"branch": 48
}
t6xx_outmod = {
"none": 0,
"pos": 1,
"int": 2,
"sat": 3
}
t6xx_reg_mode = {
"quarter": 0,
"half": 1,
"full": 2,
"double": 3
}
t6xx_dest_override = {
"lower": 0,
"upper": 1,
"none": 2
}
t6xx_load_store_ops = {
"ld_st_noop": 0x03,
"ld_attr_16": 0x95,
"ld_attr_32": 0x94,
"ld_vary_16": 0x99,
"ld_vary_32": 0x98,
"ld_uniform_16": 0xAC,
"ld_uniform_32": 0xB0,
"st_vary_16": 0xD5,
"st_vary_32": 0xD4,
"ld_color_buffer_8": 0xBA
}
t6xx_tag = {
"texture": 0x3,
"load_store": 0x5,
"alu4": 0x8,
"alu8": 0x9,
"alu12": 0xA,
"alu16": 0xB,
}
def is_tag_alu(tag):
return (tag >= t6xx_tag["alu4"]) and (tag <= t6xx_tag["alu16"])
# Just an enum
ALU = 0
LDST = 1
TEXTURE = 2
# Constant types supported, mapping the constant prefix to the Python format
# string and the coercion function
constant_types = {
"f": ("f", float),
"h": ("e", float),
"i": ("i", int),
"s": ("h", int)
}
compact_branch_op = {
"jump": 1,
"branch": 2,
"discard": 4,
"write": 7
}
branch_condition = {
"false": 1,
"true": 2,
"always": 3,
}
# TODO: What else?
texture_op = {
"normal": 0x11,
"texelfetch": 0x14
}
texture_fmt = {
"2d": 0x02,
"3d": 0x03
}
with open(sys.argv[1], "r") as f:
for ln in f:
space = ln.strip().split(" ")
instruction = space[0]
rest = " ".join(space[1:])
arguments = [s.strip() for s in rest.split(",")]
program += [(instruction, arguments)]
swizzle_component = {
"x": 0,
"y": 1,
"z": 2,
"w": 3
}
def decode_reg_name(reg_name):
ireg = 0
upper = False
half = False
if reg_name[0] == 'r':
ireg = int(reg_name[1:])
elif reg_name[0] == 'h':
rreg = int(reg_name[2:])
# Decode half-register into its full register's half
ireg = rreg >> 1
upper = rreg & 1
half = True
else:
# Special case for load/store addresses
ireg = int(reg_name)
return (ireg, half, upper)
def standard_swizzle_from_parts(swizzle_parts):
swizzle_s = swizzle_parts[1] if len(swizzle_parts) > 1 else "xyzw"
swizzle = 0
for (i, c) in enumerate(swizzle_s):
swizzle |= swizzle_component[c] << (2 * i)
return swizzle
def mask_from_parts(mask_parts, large_mask):
mask_s = mask_parts[1] if len(mask_parts) > 1 else "xyzw"
if large_mask:
mask = sum([(3 << (2*swizzle_component[c]) if c in mask_s else 0) for c in "xyzw"])
else:
mask = sum([(1 << swizzle_component[c] if c in mask_s else 0) for c in "xyzw"])
return (mask, mask_s)
def decode_reg(reg):
if reg[0] == "#":
# Not actually a register, instead an immediate float
return (True, struct.unpack("H", struct.pack("e", float(reg[1:])))[0], 0, 0, 0, 0)
# Function call syntax used in abs() modifier
if reg[-1] == ')':
reg = reg[:-1]
swizzle_parts = reg.split(".")
reg_name = swizzle_parts[0]
modifiers = 0
if reg_name[0] == '-':
modifiers |= 2
reg_name = reg_name[1:]
if reg_name[0] == 'a':
modifiers |= 1
reg_name = reg_name[len("abs("):]
(ireg, half, upper) = decode_reg_name(reg_name)
return (False, ireg, standard_swizzle_from_parts(swizzle_parts), half, upper, modifiers)
def decode_masked_reg(reg, large_mask):
mask_parts = reg.split(".")
reg_name = mask_parts[0]
(ireg, half, upper) = decode_reg_name(reg_name)
(mask, mask_s) = mask_from_parts(mask_parts, large_mask)
component = max([0] + [swizzle_component[c] for c in "xyzw" if c in mask_s])
return (ireg, mask, component, half, upper)
# TODO: Fill these in XXX
# Texture pipeline registers in r28-r29
TEXTURE_BASE = 28
def decode_texture_reg_number(reg):
r = reg.split(".")[0]
if r[0] == "r":
return (True, int(r[1:]) - TEXTURE_BASE, 0)
else:
no = int(r[2:])
return (False, (no >> 1) - TEXTURE_BASE, no & 1)
def decode_texture_reg(reg):
(full, select, upper) = decode_texture_reg_number(reg)
# Swizzle mandatory for texture registers, afaict
swizzle = reg.split(".")[1]
swizzleL = swizzle_component[swizzle[0]]
swizzleR = swizzle_component[swizzle[1]]
return (full, select, upper, swizzleR, swizzleL)
def decode_texture_out_reg(reg):
(full, select, upper) = decode_texture_reg_number(reg)
(mask, _) = mask_from_parts(reg.split("."), False)
return (full, select, upper, mask)
instruction_stream = []
for p in program:
ins = p[0]
arguments = p[1]
family = ins_mod = ins.split(".")[0]
ins_op = (ins + ".").split(".")[1]
ins_outmod = (ins + "." + ".").split(".")[2]
try:
out_mod = t6xx_outmod[ins_outmod]
except:
out_mod = 0
if ins in t6xx_load_store_ops:
op = t6xx_load_store_ops[ins]
(reg, mask, component, half, upper) = decode_masked_reg(p[1][0], False)
(immediate, address, swizzle, half, upper, modifiers) = decode_reg(p[1][1])
unknown = int(p[1][2], 16)
b = (op << 0) | (reg << 8) | (mask << 13) | (swizzle << 17) | (unknown << 25) | (address << 51)
instruction_stream += [(LDST, b)]
elif ins_op in t6xx_alu_ops:
op = t6xx_alu_ops[ins_op]
(reg_out, mask, out_component, half0, upper0) = decode_masked_reg(p[1][0], True)
(_, reg_in1, swizzle1, half1, upper1, mod1) = decode_reg(p[1][1])
(immediate, reg_in2, swizzle2, half2, upper2, mod2) = decode_reg(p[1][2])
if immediate:
register_word = (reg_in1 << 0) | ((reg_in2 >> 11) << 5) | (reg_out << 10) | (1 << 15)
else:
register_word = (reg_in1 << 0) | (reg_in2 << 5) | (reg_out << 10)
if ins_mod in ["vadd", "vmul", "lut"]:
io_mode = t6xx_reg_mode["half" if half0 else "full"]
repsel = 0
i1half = half1
i2block = 0
output_override = 2 # NORMAL, TODO
wr_mask = 0
if (ins_outmod == "quarter"):
io_mode = t6xx_reg_mode["quarter"]
if half0:
# TODO: half actually
repsel = 2 * upper1
else:
repsel = upper1
if half0:
# Rare case...
(_, halfmask, _, _, _) = decode_masked_reg(p[1][0], False)
wr_mask = halfmask
else:
wr_mask = mask
if immediate:
# Inline constant: lower 11 bits
i2block = ((reg_in2 & 0xFF) << 3) | ((reg_in2 >> 8) & 0x7)
else:
if half0:
# TODO: replicate input 2 if half
pass
else:
# TODO: half selection
i2block = upper2 | (half2 << 2)
i2block |= swizzle2 << 3
# Extra modifier for some special cased stuff
try:
special = ins.split(".")[3]
if special == "low":
output_override = 0 # low
elif special == "fulllow":
# TODO: Not really a special case, just a bug?
io_mode = t6xx_reg_mode["full"]
output_override = 0 #low
wr_mask = 0xFF
except:
pass
instruction_word = (op << 0) | (io_mode << 8) | (mod1 << 10) | (repsel << 12) | (i1half << 14) | (swizzle1 << 15) | (mod2 << 23) | (i2block << 25) | (output_override << 36) | (out_mod << 38) | (wr_mask << 40)
elif ins_mod in ["sadd", "smul"]:
# TODO: What are these?
unknown2 = 0
unknown3 = 0
i1comp_block = 0
if half1:
i1comp_block = swizzle1 | (upper1 << 2)
else:
i1comp_block = swizzle1 << 1
i2block = 0
if immediate:
# Inline constant is splattered in a... bizarre way
i2block = (((reg_in2 >> 9) & 3) << 0) | (((reg_in2 >> 8) & 1) << 2) | (((reg_in2 >> 5) & 7) << 3) | (((reg_in2 >> 0) & 15) << 6)
else:
# TODO: half register
swizzle2 = (swizzle2 << 1) & 0x1F
i2block = (mod2 << 0) | ((not half2) << 2) | (swizzle2 << 3) | (unknown2 << 5)
outcomp_block = 0
if True:
outcomp_block = out_component << 1
else:
# TODO: half register
pass
instruction_word = (op << 0) | (mod1 << 8) | ((not half1) << 10) | (i1comp_block << 11) | (i2block << 14) | (unknown3 << 25) | (out_mod << 26) | ((not half0) << 28) | (outcomp_block) << 29
else:
instruction_word = op
instruction_stream += [(ALU, ins_mod, register_word, instruction_word)]
elif family == "texture":
# Texture ops use long series of modifiers to describe their needed
# capabilities, seperated by dots. Decode them here
parts = ins.split(".")
# First few modifiers are fixed, like an instruction name
tex_op = parts[1]
tex_fmt = parts[2]
# The remaining are variable, but strictly ordered
parts = parts[3:]
op = texture_op[tex_op]
# Some bits are defined directly in the modifier list
shadow = "shadow" in parts
cont = "cont" in parts
last = "last" in parts
has_filter = "raw" not in parts
# The remaining need order preserved since they have their own arguments
argument_parts = [part for part in parts if part not in ["shadow", "cont", "last", "raw"]]
bias_lod = 0
for argument, part in zip(argument_parts, arguments[4:]):
if argument == "bias":
bias_lod = int(float(part) * 256)
else:
print("Unknown argument: " + str(argument))
fmt = texture_fmt[tex_fmt]
has_offset = 0
magic1 = 1 # IDEK
magic2 = 2 # Where did this even come from?!
texture_handle = int(arguments[1][len("texture"):])
sampler_parts = arguments[2].split(".")
sampler_handle = int(sampler_parts[0][len("sampler"):])
swizzle0 = standard_swizzle_from_parts(sampler_parts)
(full0, select0, upper0, mask0) = decode_texture_out_reg(arguments[0])
(full1, select1, upper1, swizzleR1, swizzleL1) = decode_texture_reg(arguments[3])
tex = (op << 0) | (shadow << 6) | (cont << 8) | (last << 9) | (fmt << 10) | (has_offset << 15) | (has_filter << 16) | (select1 << 17) | (upper1 << 18) | (swizzleL1 << 19) | (swizzleR1 << 21) | (0 << 23) | (magic2 << 25) | (full0 << 29) | (magic1 << 30) | (select0 << 32) | (upper0 << 33) | (mask0 << 34) | (swizzle0 << 40) | (bias_lod << 72) | (texture_handle << 88) | (sampler_handle << 104)
instruction_stream += [(TEXTURE, tex)]
elif family == "br":
cond = ins.split(".")[2]
condition = branch_condition[cond]
bop = compact_branch_op[ins_op]
offset = int(arguments[0].split("->")[0])
# 2's complement and chill
if offset < 0:
offset = (1 << 7) - abs(offset)
# Find where we're going
dest_tag = int(arguments[0].split("->")[1])
br = (bop << 0) | (dest_tag << 3) | (offset << 7) | (condition << 14)
# TODO: Unconditional branch encoding
instruction_stream += [(ALU, "br", None, br)]
elif ins[1:] == "constants":
if ins[0] not in constant_types:
print("Unknown constant type " + str(constant_type))
break
(fmt, cast) = constant_types[ins[0]]
encoded = [struct.pack(fmt, cast(f)) for f in p[1]]
consts = bytearray()
for c in encoded:
consts += c
# consts must be exactly 4 quadwords, so pad with zeroes if necessary
consts += bytes(4*4 - len(consts))
instruction_stream += [(ALU, "constants", consts)]
# Emit from instruction stream
instructions = []
index = 0
while index < len(instruction_stream):
output_stream = bytearray()
ins = instruction_stream[index]
tag = ins[0]