Commit a170fbb3 authored by Connor Abbott's avatar Connor Abbott

bifrost/asm: Fix problem with the constant quadword format

It turns out that when using the constant quadword format, we sometimes
need to swap the first and second constants in the quadword, or else
some of the bits are corrupted when loading either constant. Doing this
is tricky, since it has to happen before encoding the instructions
(because we need to know which slot to load constants from) and hence we
have to duplicate some of the logic for encoding immediates to figure
out ahead of time which constants get grouped together. We also have to
insert dummy constants earlier, since a dummy constant may have to be
swapped with a non-dummy one.
parent ce76d73e
......@@ -967,9 +967,9 @@ class ImmediateSlot:
IDX_MAP = (4, 5, 6, 7, 2, 3)
def __init__(self, contents, idx):
def __init__(self, contents):
assert len(contents) <= 2
self.idx = idx
self.idx = None
self.contents = contents
def __contains__(self, item):
......@@ -992,8 +992,12 @@ class ImmediateSlot:
return encoded
def __repr__(self):
return "<ImmediateSlot #%d at 0x%x; contents=%s>" % (
self.idx, id(self), self.contents)
if self.idx is None:
return "<ImmediateSlot at 0x%x; contents=%s>" % (
id(self), self.contents)
else:
return "<ImmediateSlot #%d at 0x%x; contents=%s>" % (
self.idx, id(self), self.contents)
class Uniform:
def __init__(self, idx):
......@@ -1404,13 +1408,12 @@ class Clause:
inst_reads.remove(bottom_half)
new_slot = ImmediateSlot((bottom_half, inst_reads[0]),
len(self.immediate_slots))
new_slot = ImmediateSlot((bottom_half, inst_reads[0]))
inst.resolve_immediates(self, new_slot)
break
if not new_slot:
new_slot = ImmediateSlot(inst_reads, len(self.immediate_slots))
new_slot = ImmediateSlot(inst_reads)
self.immediate_slots.append(new_slot)
return new_slot
......@@ -1464,11 +1467,6 @@ class Clause:
# immediate zero slot.
if inst.has_pending_immediates():
pending = inst.reg_file.const_port
# FIXME: it looks like the compiler makes some interesting
# decisions regarding where it puts dummy clauses, for some
# reason consts 8/9 are swapped with consts 10/11 when the
# last instruction is the one reusing the same constant twice
# (and thus only requiring that half the slot be filled)
if all(t.value == 0 for t in pending):
inst.resolve_immediates(self, ImmediateZeroSlot)
elif pending.bitlen == 64:
......@@ -1507,17 +1505,45 @@ class Clause:
pending_slot.add_immediate(
ImmediateToken(0, ImmediateToken.ReadType.FULL32))
slot = ImmediateSlot(pending_slot.contents,
len(self.immediate_slots))
slot = ImmediateSlot(pending_slot.contents)
inst.resolve_immediates(self, slot)
self.immediate_slots.append(slot)
@staticmethod
def _next_immediate_to_encode(immediates):
if immediates:
return immediates.pop(0)
def _dummy_immediate():
return ImmediateSlot((ImmediateToken(0, ImmediateToken.ReadType.FULL32),
ImmediateToken(0, ImmediateToken.ReadType.FULL32)))
def _finalize_immediates(self):
if (len(self.instructions), 0) in self.QUADWORD_FORMATS:
immediate_quadwords_start = 0
else:
return Bits(64)
assert (len(self.instructions), 1) in self.QUADWORD_FORMATS
immediate_quadwords_start = 1
# insert dummy constant for the first immediate if necessary
if immediate_quadwords_start == 1 and len(self.immediate_slots) == 0:
self.immediate_slots.append(self._dummy_immediate())
# Step through each of the constant quadwords
for i in range(immediate_quadwords_start, len(self.immediate_slots), 2):
if len(self.immediate_slots) <= i + 1:
# Insert dummy constant to round out the quadword. We do this
# now since it may have to be swapped with the actual constant
# below.
self.immediate_slots.append(self._dummy_immediate())
# Workaround a weird issue, where if the high 4 bits of the first
# constant are greater than the high 4 bits of the second constant,
# we get garbage.
if (self.immediate_slots[i].encode_contents()[0:4].uint >
self.immediate_slots[i+1].encode_contents()[0:4].uint):
self.immediate_slots[i], self.immediate_slots[i+1] = \
self.immediate_slots[i+1], self.immediate_slots[i]
# Finally, resolve indices
for i, slot in enumerate(self.immediate_slots):
slot.idx = i
def _encode_quadwords(self, immediates):
instructions = [i.encode() for i in self.instructions]
......@@ -1573,7 +1599,7 @@ class Clause:
if not i3:
# Format 3.1
next_immediate = self._next_immediate_to_encode(immediates)
next_immediate = immediates.pop(0)
if immediates:
tag = 0b00000100
else:
......@@ -1585,7 +1611,7 @@ class Clause:
quadword[120:128] = tag
elif len(instructions) == 1:
# Format 3.2
next_immediate = self._next_immediate_to_encode(immediates)
next_immediate = immediates.pop(0)
tag = 0b10
quadword[0:15] = next_immediate[45:60]
......@@ -1648,7 +1674,7 @@ class Clause:
if not i6:
# Format 5.1
next_immediate = self._next_immediate_to_encode(immediates)
next_immediate = immediates.pop(0)
if immediates:
tag = 0b01000110
else:
......@@ -1672,7 +1698,7 @@ class Clause:
quadword[120:128] = tag
else:
# Format 5.3
next_immediate = self._next_immediate_to_encode(immediates)
next_immediate = immediates.pop(0)
tag = 0b11
quadword[0:15] = next_immediate[45:60]
......@@ -1703,6 +1729,7 @@ class Clause:
return encoded
def encode(self):
self._finalize_immediates()
immediates = [s.encode_contents() for s in self.immediate_slots]
encoded = BitStream()
......@@ -1726,8 +1753,7 @@ class Clause:
tag = 0b0111
quadword[60:120] = immediates.pop(0)[0:60]
if immediates:
quadword[0:60] = immediates.pop(0)[0:60]
quadword[0:60] = immediates.pop(0)[0:60]
quadword[120:124] = tag
quadword[124:128] = self.QUADWORD_FORMATS[instruction_count,
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment