Commit 38f0dd81 authored by Icecream95's avatar Icecream95
Browse files

panfrost: Push constant range combining

Makes panfrost_emit_const_buf about twice as fast.
parent 5abe82b1
Pipeline #307611 waiting for manual action with stages
......@@ -1168,7 +1168,9 @@ panfrost_emit_const_buf(struct panfrost_batch *batch,
uint32_t *push_cpu = (uint32_t *) push_transfer.cpu;
*push_constants = push_transfer.gpu;
for (unsigned i = 0; i < ss->info.push.count; ++i) {
unsigned dst_offset = 0;
for (unsigned i = 0; i < ss->info.push.num_ranges; ++i) {
struct panfrost_ubo_range src = ss->info.push.ranges[i];
/* Map the UBO, this should be cheap. However this is reading
......@@ -1179,8 +1181,9 @@ panfrost_emit_const_buf(struct panfrost_batch *batch,
const void *mapped_ubo = (src.ubo == sysval_ubo) ? transfer.cpu :
panfrost_map_constant_buffer_cpu(ctx, buf, src.ubo);
/* TODO: Is there any benefit to combining ranges */
memcpy(push_cpu + i, (uint8_t *) mapped_ubo + src.offset, 4);
memcpy(push_cpu + dst_offset, (uint8_t *) mapped_ubo + src.offset, src.size * 4);
dst_offset += src.size;
}
buf->dirty_mask = 0;
......
......@@ -1102,8 +1102,15 @@ get_push_uniforms(struct pan_pool *pool,
uint32_t *out = push_consts_buf.cpu;
uint8_t *in = (uint8_t *)inputs;
for (unsigned i = 0; i < shader->push.count; ++i)
memcpy(out + i, in + shader->push.ranges[i].offset, 4);
unsigned dst_offset = 0;
for (unsigned i = 0; i < shader->push.count; ++i) {
struct panfrost_ubo_range src = shader->push.ranges[i];
memcpy(out + dst_offset, in + src.offset, src.size * 4);
dst_offset += src.size;
}
return push_consts_buf.gpu;
}
......
......@@ -979,6 +979,9 @@ mir_demote_uniforms(compiler_context *ctx, unsigned new_cutoff)
unsigned idx = (23 - SSA_REG_FROM_FIXED(ins->src[i])) * 4;
assert(idx < ctx->info->push.count);
struct panfrost_ubo_range word =
pan_index_pushed_ubo(&ctx->info->push, idx);
midgard_instruction ld = {
.type = TAG_LOAD_STORE_4,
.mask = 0xF,
......@@ -988,10 +991,10 @@ mir_demote_uniforms(compiler_context *ctx, unsigned new_cutoff)
.swizzle = SWIZZLE_IDENTITY_4,
.op = midgard_op_ld_ubo_u128,
.load_store = {
.arg_1 = ctx->info->push.ranges[idx].ubo,
.arg_1 = word.ubo,
.arg_2 = 0x1E,
},
.constants.u32[0] = ctx->info->push.ranges[idx].offset
.constants.u32[0] = word.offset
};
mir_insert_instruction_before_scheduled(ctx, block, before, ld);
......
......@@ -135,14 +135,21 @@ pan_print_alu_type(nir_alu_type t, FILE *fp)
unsigned
pan_lookup_pushed_ubo(struct panfrost_ubo_push *push, unsigned ubo, unsigned offs)
{
struct panfrost_ubo_range range = {
.ubo = ubo,
.offset = offs
};
unsigned count = 0;
for (unsigned i = 0; i < push->num_ranges; ++i) {
struct panfrost_ubo_range range = push->ranges[i];
count += range.size;
if (range.ubo != ubo)
continue;
for (unsigned i = 0; i < push->count; ++i) {
if (memcmp(push->ranges + i, &range, sizeof(range)) == 0)
return i;
unsigned start = range.offset;
unsigned end = range.offset + (range.size * 4);
if (offs >= start && offs < end)
return count - (end - offs) / 4;
}
unreachable("UBO not pushed");
......@@ -152,10 +159,45 @@ pan_lookup_pushed_ubo(struct panfrost_ubo_push *push, unsigned ubo, unsigned off
void
pan_add_pushed_ubo(struct panfrost_ubo_push *push, unsigned ubo, unsigned offs)
{
++push->count;
if (push->num_ranges) {
struct panfrost_ubo_range *prev;
prev = &push->ranges[push->num_ranges - 1];
if (prev->ubo == ubo &&
prev->offset + prev->size * 4 == offs) {
++prev->size;
return;
}
}
struct panfrost_ubo_range range = {
.ubo = ubo,
.size = 1,
.offset = offs,
};
push->ranges[push->count++] = range;
push->ranges[push->num_ranges++] = range;
}
struct panfrost_ubo_range
pan_index_pushed_ubo(struct panfrost_ubo_push *push, unsigned push_word)
{
assert(push_word < push->count);
for (unsigned i = 0; i < push->num_ranges; ++i) {
struct panfrost_ubo_range range = push->ranges[i];
if (range.size > push_word) {
return (struct panfrost_ubo_range){
.ubo = range.ubo,
.offset = range.offset + push_word,
};
}
push_word -= range.size;
}
unreachable("Invalid panfrost_ubo_push state");
}
......@@ -97,6 +97,7 @@ struct panfrost_ubo_range {
struct panfrost_ubo_push {
unsigned count;
unsigned num_ranges;
struct panfrost_ubo_range ranges[PAN_MAX_PUSH];
};
......@@ -109,6 +110,12 @@ pan_lookup_pushed_ubo(struct panfrost_ubo_push *push, unsigned ubo, unsigned off
void
pan_add_pushed_ubo(struct panfrost_ubo_push *push, unsigned ubo, unsigned offs);
/* Get a panfrost_ubo_range struct (with size == 1) corresponding to the
* index in the push contants buffer. */
struct panfrost_ubo_range
pan_index_pushed_ubo(struct panfrost_ubo_push *push, unsigned push_word);
struct hash_table_u64 *
panfrost_init_sysvals(struct panfrost_sysvals *sysvals, void *memctx);
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment