Commit e1d95339 authored by Alyssa Rosenzweig's avatar Alyssa Rosenzweig 💜 Committed by Marge Bot
Browse files

pan/bi: Fix vector handling of readmasks



The issue was messing with liveness analysis... with Midgard we look at
the writemask to decide how the instruction behaves. Here, since our ALU
is scalar (except for subdivision which doesn't have proper writemasks
anyway) we just look at the component count directly -- either 4 for
vector instructions (essentially - for smaller loads we can replicate
manually without much burden), or 1 for scalar.
Signed-off-by: Alyssa Rosenzweig's avatarAlyssa Rosenzweig <alyssa.rosenzweig@collabora.com>
Part-of: <mesa/mesa!4158>
parent c63105f9
......@@ -28,10 +28,10 @@
unsigned bi_class_props[BI_NUM_CLASSES] = {
[BI_ADD] = BI_GENERIC | BI_MODS | BI_SCHED_ALL,
[BI_ATEST] = BI_SCHED_HI_LATENCY,
[BI_ATEST] = BI_SCHED_HI_LATENCY | BI_VECTOR,
[BI_BRANCH] = BI_SCHED_HI_LATENCY,
[BI_CMP] = BI_GENERIC | BI_MODS | BI_SCHED_ALL,
[BI_BLEND] = BI_SCHED_HI_LATENCY,
[BI_BLEND] = BI_SCHED_HI_LATENCY | BI_VECTOR,
[BI_BITWISE] = BI_GENERIC | BI_SCHED_ALL,
[BI_CONVERT] = BI_SCHED_ALL | BI_SWIZZLABLE,
[BI_CSEL] = BI_SCHED_FMA,
......@@ -39,18 +39,18 @@ unsigned bi_class_props[BI_NUM_CLASSES] = {
[BI_FMA] = BI_ROUNDMODE | BI_SCHED_FMA,
[BI_FREXP] = BI_SCHED_ALL,
[BI_ISUB] = BI_GENERIC | BI_SCHED_ALL,
[BI_LOAD] = BI_SCHED_HI_LATENCY,
[BI_LOAD_UNIFORM] = BI_SCHED_HI_LATENCY,
[BI_LOAD_ATTR] = BI_SCHED_HI_LATENCY,
[BI_LOAD_VAR] = BI_SCHED_HI_LATENCY,
[BI_LOAD] = BI_SCHED_HI_LATENCY | BI_VECTOR,
[BI_LOAD_UNIFORM] = BI_SCHED_HI_LATENCY | BI_VECTOR,
[BI_LOAD_ATTR] = BI_SCHED_HI_LATENCY | BI_VECTOR,
[BI_LOAD_VAR] = BI_SCHED_HI_LATENCY | BI_VECTOR,
[BI_LOAD_VAR_ADDRESS] = BI_SCHED_HI_LATENCY,
[BI_MINMAX] = BI_GENERIC | BI_SCHED_ALL,
[BI_MOV] = BI_MODS | BI_SCHED_ALL,
[BI_SHIFT] = BI_SCHED_ALL,
[BI_STORE] = BI_SCHED_HI_LATENCY,
[BI_STORE_VAR] = BI_SCHED_HI_LATENCY,
[BI_STORE] = BI_SCHED_HI_LATENCY | BI_VECTOR,
[BI_STORE_VAR] = BI_SCHED_HI_LATENCY | BI_VECTOR,
[BI_SPECIAL] = BI_SCHED_ADD | BI_SCHED_SLOW,
[BI_SWIZZLE] = BI_SCHED_ALL | BI_SWIZZLABLE,
[BI_TEX] = BI_SCHED_HI_LATENCY,
[BI_TEX] = BI_SCHED_HI_LATENCY | BI_VECTOR,
[BI_ROUND] = BI_GENERIC | BI_ROUNDMODE | BI_SCHED_ALL,
};
......@@ -75,10 +75,41 @@ bi_has_arg(bi_instruction *ins, unsigned arg)
return false;
}
uint16_t
bi_from_bytemask(uint16_t bytemask, unsigned bytes)
{
unsigned value = 0;
for (unsigned c = 0, d = 0; c < 16; c += bytes, ++d) {
bool a = (bytemask & (1 << c)) != 0;
for (unsigned q = c; q < bytes; ++q)
assert(((bytemask & (1 << q)) != 0) == a);
value |= (a << d);
}
return value;
}
unsigned
bi_get_component_count(bi_instruction *ins)
{
if (bi_class_props[ins->type] & BI_VECTOR) {
return 4;
} else {
/* Stores imply VECTOR */
assert(ins->dest_type);
unsigned bytes = MAX2(nir_alu_type_get_type_size(ins->dest_type), 8);
return 32 / bytes;
}
}
uint16_t
bi_bytemask_of_read_components(bi_instruction *ins, unsigned node)
{
uint16_t mask = 0x0;
unsigned component_count = bi_get_component_count(ins);
bi_foreach_src(ins, s) {
if (ins->src[s] != node) continue;
......@@ -87,7 +118,7 @@ bi_bytemask_of_read_components(bi_instruction *ins, unsigned node)
unsigned bytes = (MAX2(size, 8) / 8);
unsigned cmask = (1 << bytes) - 1;
for (unsigned i = 0; i < ARRAY_SIZE(ins->swizzle[s]); ++i) {
for (unsigned i = 0; i < component_count; ++i) {
unsigned c = ins->swizzle[s][i];
mask |= (cmask << (c * bytes));
}
......
......@@ -110,6 +110,9 @@ extern unsigned bi_class_props[BI_NUM_CLASSES];
* the end of a clause. Implies ADD */
#define BI_SCHED_HI_LATENCY ((1 << 7) | BI_SCHED_ADD)
/* Intrinsic is vectorized and should read 4 components regardless of writemask */
#define BI_VECTOR (1 << 8)
/* It can't get any worse than csel4... can it? */
#define BIR_SRC_COUNT 4
......@@ -497,6 +500,8 @@ bool bi_has_outmod(bi_instruction *ins);
bool bi_has_source_mods(bi_instruction *ins);
bool bi_is_src_swizzled(bi_instruction *ins, unsigned s);
bool bi_has_arg(bi_instruction *ins, unsigned arg);
uint16_t bi_from_bytemask(uint16_t bytemask, unsigned bytes);
unsigned bi_get_component_count(bi_instruction *ins);
uint16_t bi_bytemask_of_read_components(bi_instruction *ins, unsigned node);
/* BIR passes */
......
Supports Markdown
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment