Commit 30895017 authored by Alyssa Rosenzweig's avatar Alyssa Rosenzweig 💜
Browse files

pan/bi: Rewrite to fit dest = src constraint



Needed for TEXC as well as atomics.
Signed-off-by: Alyssa Rosenzweig's avatarAlyssa Rosenzweig <alyssa.rosenzweig@collabora.com>
Reviewed-by: Boris Brezillon's avatarBoris Brezillon <boris.brezillon@collabora.com>
Part-of: <mesa/mesa!7081>
parent 93840234
......@@ -84,28 +84,6 @@ bi_combine_sel16(bi_context *ctx, bi_instruction *parent, unsigned comp, unsigne
bi_emit_before(ctx, parent, sel);
}
/* Rewrites uses of an index. Again, this could be O(n) to the program but is
* currently O(nc) to the program and number of combines, so the pass becomes
* effectively O(n^2). Better bookkeeping would bring down to linear if that's
* an issue. */
static void
bi_rewrite_uses(bi_context *ctx,
unsigned old, unsigned oldc,
unsigned new, unsigned newc)
{
bi_foreach_instr_global(ctx, ins) {
bi_foreach_src(ins, s) {
if (ins->src[s] != old) continue;
for (unsigned i = 0; i < 16; ++i)
ins->swizzle[s][i] += (newc - oldc);
ins->src[s] = new;
}
}
}
/* Copies result of combine from the temp R to the instruction destination,
* given a bitsize sz */
......
......@@ -173,6 +173,19 @@ bi_register_allocate(bi_context *ctx)
struct lcra_state *l = NULL;
bool success = false;
/* For instructions that both read and write from a data register, it's
* the *same* data register. We enforce that constraint by just doing a
* quick rewrite. TODO: are there cases where this causes RA to have no
* solutions due to copyprop? */
bi_foreach_instr_global(ctx, ins) {
unsigned props = bi_class_props[ins->type];
unsigned both = BI_DATA_REG_SRC | BI_DATA_REG_DEST;
if ((props & both) != both) continue;
bi_rewrite_uses(ctx, ins->dest, 0, ins->src[0], 0);
ins->dest = ins->src[0];
}
do {
if (l) {
lcra_free(l);
......
......@@ -55,7 +55,7 @@ unsigned bi_class_props[BI_NUM_CLASSES] = {
[BI_TABLE] = BI_SCHED_ADD,
[BI_SELECT] = BI_SCHED_ALL | BI_SWIZZLABLE,
[BI_TEXS] = BI_SCHED_HI_LATENCY | BI_SCHED_ADD | BI_VECTOR | BI_DATA_REG_DEST,
[BI_TEXC] = BI_SCHED_HI_LATENCY | BI_SCHED_ADD | BI_VECTOR | BI_DATA_REG_DEST,
[BI_TEXC] = BI_SCHED_HI_LATENCY | BI_SCHED_ADD | BI_VECTOR | BI_DATA_REG_SRC | BI_DATA_REG_DEST,
[BI_TEXC_DUAL] = BI_SCHED_HI_LATENCY | BI_SCHED_ADD | BI_VECTOR | BI_DATA_REG_DEST,
[BI_ROUND] = BI_ROUNDMODE | BI_SCHED_ALL,
[BI_IMUL] = BI_SCHED_FMA,
......
......@@ -180,3 +180,26 @@ bi_writemask(bi_instruction *ins)
unsigned shift = ins->dest_offset * 4; /* 32-bit words */
return (mask << shift);
}
/* Rewrites uses of an index. This is O(nc) to the program and number of
* uses, so combine lowering is effectively O(n^2). Better bookkeeping
* would bring down to linear if that's an issue. */
void
bi_rewrite_uses(bi_context *ctx,
unsigned old, unsigned oldc,
unsigned new, unsigned newc)
{
bi_foreach_instr_global(ctx, ins) {
bi_foreach_src(ins, s) {
if (ins->src[s] != old) continue;
for (unsigned i = 0; i < 16; ++i)
ins->swizzle[s][i] += (newc - oldc);
ins->src[s] = new;
}
}
}
......@@ -118,7 +118,7 @@ extern unsigned bi_class_props[BI_NUM_CLASSES];
#define BI_VECTOR (1 << 8)
/* Use a data register for src0/dest respectively, bypassing the usual
* register accessor. Mutually exclusive. */
* register accessor. */
#define BI_DATA_REG_SRC (1 << 9)
#define BI_DATA_REG_DEST (1 << 10)
......@@ -624,6 +624,7 @@ uint16_t bi_bytemask_of_read_components(bi_instruction *ins, unsigned node);
uint64_t bi_get_immediate(bi_instruction *ins, unsigned index);
bool bi_writes_component(bi_instruction *ins, unsigned comp);
unsigned bi_writemask(bi_instruction *ins);
void bi_rewrite_uses(bi_context *ctx, unsigned old, unsigned oldc, unsigned new, unsigned newc);
/* BIR passes */
......
Supports Markdown
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment