From 21dff176f16e2c538d29c0666afeee99068d75f2 Mon Sep 17 00:00:00 2001 From: Icecream95 Date: Fri, 23 Jul 2021 12:14:32 +1200 Subject: [PATCH 01/18] pan/mdg: Use the correct swizzle for condition moves Fixes: 70072a20e00 ("pan/midgard: Refactor swizzles") --- src/panfrost/midgard/midgard_schedule.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/panfrost/midgard/midgard_schedule.c b/src/panfrost/midgard/midgard_schedule.c index f987b7f17fd4..f1b78ccba285 100644 --- a/src/panfrost/midgard/midgard_schedule.c +++ b/src/panfrost/midgard/midgard_schedule.c @@ -978,7 +978,7 @@ mir_schedule_condition(compiler_context *ctx, midgard_instruction *cond = mir_schedule_comparison( ctx, instructions, predicate, worklist, count, last->src[condition_index], - vector, last->swizzle[2], last); + vector, last->swizzle[condition_index], last); /* We have exclusive reign over this (possibly move) conditional * instruction. We can rewrite into a pipeline conditional register */ -- GitLab From 0dc960a55f90f172c094d7c07fb0a75c60f560f4 Mon Sep 17 00:00:00 2001 From: Icecream95 Date: Thu, 22 Jul 2021 22:15:30 +1200 Subject: [PATCH 02/18] pan/mdg: Unaligned uniform pushing --- src/panfrost/midgard/mir_promote_uniforms.c | 21 ++++++++++++++++++--- 1 file changed, 18 insertions(+), 3 deletions(-) diff --git a/src/panfrost/midgard/mir_promote_uniforms.c b/src/panfrost/midgard/mir_promote_uniforms.c index 5d7045a9c799..1d7bc29c3869 100644 --- a/src/panfrost/midgard/mir_promote_uniforms.c +++ b/src/panfrost/midgard/mir_promote_uniforms.c @@ -46,8 +46,14 @@ mir_is_ubo(midgard_instruction *ins) static bool mir_is_direct_aligned_ubo(midgard_instruction *ins) { - return mir_is_ubo(ins) && - !(ins->constants.u32[0] & 0xF) && + if (!mir_is_ubo(ins)) + return false; + + unsigned offset = ins->constants.u32[0]; + unsigned size = util_logbase2_ceil(mir_bytemask(ins)); + + return !(offset & 0x3) && + ((offset & 0xf) + size <= 16) && (ins->src[1] == ~0) && (ins->src[2] == ~0); } @@ -290,6 +296,7 @@ midgard_promote_uniforms(compiler_context *ctx) unsigned ubo = midgard_unpack_ubo_index_imm(ins->load_store); unsigned qword = ins->constants.u32[0] / 16; + unsigned offset = ins->constants.u32[0] % 16; if (!mir_is_direct_aligned_ubo(ins)) { if (ins->src[1] == ~0) @@ -318,6 +325,13 @@ midgard_promote_uniforms(compiler_context *ctx) assert(address < promoted_count); unsigned promoted = SSA_FIXED_REGISTER(uniform_reg); + unsigned comps = mir_components_for_type(ins->dest_type); + unsigned swz_offset = offset * comps / 16; + + unsigned swizzle[MIR_VEC_COMPONENTS]; + for (unsigned i = 0; i < comps - swz_offset; ++i) + swizzle[i] = i + swz_offset; + /* We do need the move for safety for a non-SSA dest, or if * we're being fed into a special class */ @@ -331,12 +345,13 @@ midgard_promote_uniforms(compiler_context *ctx) midgard_instruction mov = v_mov(promoted, ins->dest); mov.dest_type = nir_type_uint | type_size; mov.src_types[1] = mov.dest_type; + memcpy(mov.swizzle[1], swizzle, sizeof(swizzle)); uint16_t rounded = mir_round_bytemask_up(mir_bytemask(ins), type_size); mir_set_bytemask(&mov, rounded); mir_insert_instruction_before(ctx, ins, mov); } else { - mir_rewrite_index_src(ctx, ins->dest, promoted); + mir_rewrite_index_src_swizzle(ctx, ins->dest, promoted, swizzle); } mir_remove_instruction(ins); -- GitLab From 453314e3f3399a9fcc5d4b5025d251757896a806 Mon Sep 17 00:00:00 2001 From: Icecream95 Date: Tue, 11 May 2021 23:06:38 +1200 Subject: [PATCH 03/18] pan/mdg: Make sure the sysval push range is first --- src/panfrost/midgard/mir_promote_uniforms.c | 10 ++++++++-- 1 file changed, 8 insertions(+), 2 deletions(-) diff --git a/src/panfrost/midgard/mir_promote_uniforms.c b/src/panfrost/midgard/mir_promote_uniforms.c index 1d7bc29c3869..d8f8fc37b332 100644 --- a/src/panfrost/midgard/mir_promote_uniforms.c +++ b/src/panfrost/midgard/mir_promote_uniforms.c @@ -102,10 +102,13 @@ mir_analyze_ranges(compiler_context *ctx) * sophisticated. Select from the last UBO first to prioritize sysvals. */ static void -mir_pick_ubo(struct panfrost_ubo_push *push, struct mir_ubo_analysis *analysis, unsigned max_qwords) +mir_pick_ubo(struct panfrost_ubo_push *push, struct mir_ubo_analysis *analysis, unsigned max_qwords, unsigned sysval_ubo) { unsigned max_words = MIN2(PAN_MAX_PUSH, max_qwords * 4); + /* The sysval push range must be first */ + assert(sysval_ubo == analysis->nr_blocks - 1); + for (signed ubo = analysis->nr_blocks - 1; ubo >= 0; --ubo) { struct mir_ubo_block *block = &analysis->blocks[ubo]; @@ -282,8 +285,11 @@ midgard_promote_uniforms(compiler_context *ctx) unsigned work_count = mir_work_heuristic(ctx, &analysis); unsigned promoted_count = 24 - work_count; + unsigned sysval_ubo = + MAX2(ctx->inputs->sysval_ubo, ctx->nir->info.num_ubos); + /* Ensure we are 16 byte aligned to avoid underallocations */ - mir_pick_ubo(&ctx->info->push, &analysis, promoted_count); + mir_pick_ubo(&ctx->info->push, &analysis, promoted_count, sysval_ubo); ctx->info->push.count = ALIGN_POT(ctx->info->push.count, 4); /* First, figure out special indices a priori so we don't recompute a lot */ -- GitLab From 2e36a9cb545e95dffaa1838e4c7e7083438e06d3 Mon Sep 17 00:00:00 2001 From: Icecream95 Date: Wed, 26 May 2021 15:17:31 +1200 Subject: [PATCH 04/18] pan/bi: Don't use unaligned sysval loads Unaligned loads are problematic for uniform pushing code, instead use larger loads and then move the values to the right place. --- src/panfrost/bifrost/bifrost_compile.c | 17 ++++++++++++----- 1 file changed, 12 insertions(+), 5 deletions(-) diff --git a/src/panfrost/bifrost/bifrost_compile.c b/src/panfrost/bifrost/bifrost_compile.c index b3b7192eb4fe..28f7427d426e 100644 --- a/src/panfrost/bifrost/bifrost_compile.c +++ b/src/panfrost/bifrost/bifrost_compile.c @@ -378,7 +378,7 @@ bi_make_vec_to(bi_builder *b, bi_index final_dst, } } -static bi_instr * +static void bi_load_sysval_to(bi_builder *b, bi_index dest, int sysval, unsigned nr_components, unsigned offset) { @@ -388,11 +388,18 @@ bi_load_sysval_to(bi_builder *b, bi_index dest, int sysval, pan_lookup_sysval(b->shader->sysval_to_id, &b->shader->info->sysvals, sysval); - unsigned idx = (uniform * 16) + offset; + unsigned idx = uniform * 16; + + unsigned offset_words = offset / 4; + + bi_index load_dest = bi_temp(b->shader); + bi_load_to(b, (nr_components + offset_words) * 32, load_dest, + bi_imm_u32(idx), + bi_imm_u32(sysval_ubo), BI_SEG_UBO); - return bi_load_to(b, nr_components * 32, dest, - bi_imm_u32(idx), - bi_imm_u32(sysval_ubo), BI_SEG_UBO); + for (unsigned i = 0; i < nr_components; ++i) + bi_mov_i32_to(b, bi_word(dest, i), + bi_word(load_dest, i + offset_words)); } static void -- GitLab From b07a240a6c46ecd44e530a4f6c0305693897edfb Mon Sep 17 00:00:00 2001 From: Icecream95 Date: Sat, 8 May 2021 21:58:08 +1200 Subject: [PATCH 05/18] pan/bi: Make sure sysvals are pushed in a single range --- src/panfrost/bifrost/bi_opt_push_ubo.c | 16 ++++++++++++++-- 1 file changed, 14 insertions(+), 2 deletions(-) diff --git a/src/panfrost/bifrost/bi_opt_push_ubo.c b/src/panfrost/bifrost/bi_opt_push_ubo.c index caaa5ee872fd..25e68706c777 100644 --- a/src/panfrost/bifrost/bi_opt_push_ubo.c +++ b/src/panfrost/bifrost/bi_opt_push_ubo.c @@ -91,8 +91,12 @@ bi_analyze_ranges(bi_context *ctx) * sophisticated. Select from the last UBO first to prioritize sysvals. */ static void -bi_pick_ubo(struct panfrost_ubo_push *push, struct bi_ubo_analysis *analysis) +bi_pick_ubo(struct panfrost_ubo_push *push, struct bi_ubo_analysis *analysis, + unsigned sysval_ubo) { + /* The sysval push range must be first */ + assert(sysval_ubo == analysis->nr_blocks - 1); + for (signed ubo = analysis->nr_blocks - 1; ubo >= 0; --ubo) { struct bi_ubo_block *block = &analysis->blocks[ubo]; @@ -102,6 +106,11 @@ bi_pick_ubo(struct panfrost_ubo_push *push, struct bi_ubo_analysis *analysis) /* Don't push something we don't access */ if (range == 0) continue; + /* We want a single push range for sysvals, pretend + * there are no holes between sysvals */ + if (ubo == sysval_ubo) + range = 4; + /* Don't push more than possible */ if (push->count > PAN_MAX_PUSH - range) return; @@ -133,8 +142,11 @@ bi_opt_push_ubo(bi_context *ctx) /* This pass only runs once */ assert(ctx->info->push.count == 0); + unsigned sysval_ubo = + MAX2(ctx->inputs->sysval_ubo, ctx->nir->info.num_ubos); + struct bi_ubo_analysis analysis = bi_analyze_ranges(ctx); - bi_pick_ubo(&ctx->info->push, &analysis); + bi_pick_ubo(&ctx->info->push, &analysis, sysval_ubo); ctx->ubo_mask = 0; -- GitLab From 1fbfb9110c62e4783ef56d0fc0d14d001c7ee1d1 Mon Sep 17 00:00:00 2001 From: Icecream95 Date: Mon, 12 Apr 2021 23:45:44 +1200 Subject: [PATCH 06/18] panfrost: Deduplicate adding to the push uniform list The logic will be expanded to deal with ranges, so first move it to a separate function pan_add_pushed_ubo. Reviewed-by: Alyssa Rosenzweig --- src/panfrost/bifrost/bi_opt_push_ubo.c | 10 ++-------- src/panfrost/midgard/mir_promote_uniforms.c | 10 ++-------- src/panfrost/util/pan_ir.c | 11 +++++++++++ src/panfrost/util/pan_ir.h | 3 +++ 4 files changed, 18 insertions(+), 16 deletions(-) diff --git a/src/panfrost/bifrost/bi_opt_push_ubo.c b/src/panfrost/bifrost/bi_opt_push_ubo.c index 25e68706c777..b377171e980d 100644 --- a/src/panfrost/bifrost/bi_opt_push_ubo.c +++ b/src/panfrost/bifrost/bi_opt_push_ubo.c @@ -115,14 +115,8 @@ bi_pick_ubo(struct panfrost_ubo_push *push, struct bi_ubo_analysis *analysis, if (push->count > PAN_MAX_PUSH - range) return; - for (unsigned offs = 0; offs < range; ++offs) { - struct panfrost_ubo_word word = { - .ubo = ubo, - .offset = (r + offs) * 4 - }; - - push->words[push->count++] = word; - } + for (unsigned offs = 0; offs < range; ++offs) + pan_add_pushed_ubo(push, ubo, (r + offs) * 4); /* Mark it as pushed so we can rewrite */ BITSET_SET(block->pushed, r); diff --git a/src/panfrost/midgard/mir_promote_uniforms.c b/src/panfrost/midgard/mir_promote_uniforms.c index d8f8fc37b332..5791b6ff998f 100644 --- a/src/panfrost/midgard/mir_promote_uniforms.c +++ b/src/panfrost/midgard/mir_promote_uniforms.c @@ -118,14 +118,8 @@ mir_pick_ubo(struct panfrost_ubo_push *push, struct mir_ubo_analysis *analysis, if (push->count > max_words - 4) return; - for (unsigned offs = 0; offs < 4; ++offs) { - struct panfrost_ubo_word word = { - .ubo = ubo, - .offset = (vec4 * 16) + (offs * 4) - }; - - push->words[push->count++] = word; - } + for (unsigned offs = 0; offs < 4; ++offs) + pan_add_pushed_ubo(push, ubo, (vec4 * 16) + (offs * 4)); /* Mark it as pushed so we can rewrite */ BITSET_SET(block->pushed, vec4); diff --git a/src/panfrost/util/pan_ir.c b/src/panfrost/util/pan_ir.c index c469274933fb..56bc529bf19f 100644 --- a/src/panfrost/util/pan_ir.c +++ b/src/panfrost/util/pan_ir.c @@ -148,3 +148,14 @@ pan_lookup_pushed_ubo(struct panfrost_ubo_push *push, unsigned ubo, unsigned off unreachable("UBO not pushed"); } + +void +pan_add_pushed_ubo(struct panfrost_ubo_push *push, unsigned ubo, unsigned offs) +{ + struct panfrost_ubo_word word = { + .ubo = ubo, + .offset = offs, + }; + + push->words[push->count++] = word; +} diff --git a/src/panfrost/util/pan_ir.h b/src/panfrost/util/pan_ir.h index 1ff240b30705..6e1bc18993e0 100644 --- a/src/panfrost/util/pan_ir.h +++ b/src/panfrost/util/pan_ir.h @@ -107,6 +107,9 @@ struct panfrost_ubo_push { unsigned pan_lookup_pushed_ubo(struct panfrost_ubo_push *push, unsigned ubo, unsigned offs); +void +pan_add_pushed_ubo(struct panfrost_ubo_push *push, unsigned ubo, unsigned offs); + struct hash_table_u64 * panfrost_init_sysvals(struct panfrost_sysvals *sysvals, void *memctx); -- GitLab From e75eb627d2994bd5c6c4d37691f580ac6e345db1 Mon Sep 17 00:00:00 2001 From: Icecream95 Date: Fri, 23 Apr 2021 20:29:34 +1200 Subject: [PATCH 07/18] panfrost: Rename panfrost_ubo_word to panfrost_ubo_range --- src/gallium/drivers/panfrost/pan_cmdstream.c | 2 +- src/panfrost/lib/pan_indirect_dispatch.c | 2 +- src/panfrost/lib/pan_indirect_draw.c | 2 +- src/panfrost/midgard/midgard_ra.c | 6 +++--- src/panfrost/util/pan_ir.c | 8 ++++---- src/panfrost/util/pan_ir.h | 7 ++++--- 6 files changed, 14 insertions(+), 13 deletions(-) diff --git a/src/gallium/drivers/panfrost/pan_cmdstream.c b/src/gallium/drivers/panfrost/pan_cmdstream.c index 4ad732803203..add3886905ad 100644 --- a/src/gallium/drivers/panfrost/pan_cmdstream.c +++ b/src/gallium/drivers/panfrost/pan_cmdstream.c @@ -1260,7 +1260,7 @@ panfrost_emit_const_buf(struct panfrost_batch *batch, *push_constants = push_transfer.gpu; for (unsigned i = 0; i < ss->info.push.count; ++i) { - struct panfrost_ubo_word src = ss->info.push.words[i]; + struct panfrost_ubo_range src = ss->info.push.ranges[i]; if (src.ubo == sysval_ubo) { unsigned sysval_idx = src.offset / 16; diff --git a/src/panfrost/lib/pan_indirect_dispatch.c b/src/panfrost/lib/pan_indirect_dispatch.c index 79e055bece8b..a2df1e854f5d 100644 --- a/src/panfrost/lib/pan_indirect_dispatch.c +++ b/src/panfrost/lib/pan_indirect_dispatch.c @@ -106,7 +106,7 @@ get_push_uniforms(struct pan_pool *pool, uint8_t *in = (uint8_t *)inputs; for (unsigned i = 0; i < dev->indirect_dispatch.push.count; ++i) - memcpy(out + i, in + dev->indirect_dispatch.push.words[i].offset, 4); + memcpy(out + i, in + dev->indirect_dispatch.push.ranges[i].offset, 4); return push_consts_buf.gpu; } diff --git a/src/panfrost/lib/pan_indirect_draw.c b/src/panfrost/lib/pan_indirect_draw.c index 39eef07b0576..658404a0b932 100644 --- a/src/panfrost/lib/pan_indirect_draw.c +++ b/src/panfrost/lib/pan_indirect_draw.c @@ -1176,7 +1176,7 @@ get_push_uniforms(struct pan_pool *pool, uint8_t *in = (uint8_t *)inputs; for (unsigned i = 0; i < shader->push.count; ++i) - memcpy(out + i, in + shader->push.words[i].offset, 4); + memcpy(out + i, in + shader->push.ranges[i].offset, 4); return push_consts_buf.gpu; } diff --git a/src/panfrost/midgard/midgard_ra.c b/src/panfrost/midgard/midgard_ra.c index 704cfc874662..b0c505f791d8 100644 --- a/src/panfrost/midgard/midgard_ra.c +++ b/src/panfrost/midgard/midgard_ra.c @@ -1042,7 +1042,7 @@ mir_demote_uniforms(compiler_context *ctx, unsigned new_cutoff) unsigned idx = (23 - SSA_REG_FROM_FIXED(ins->src[i])) * 4; assert(idx < ctx->info->push.count); - ctx->ubo_mask |= BITSET_BIT(ctx->info->push.words[idx].ubo); + ctx->ubo_mask |= BITSET_BIT(ctx->info->push.ranges[idx].ubo); midgard_instruction ld = { .type = TAG_LOAD_STORE_4, @@ -1055,11 +1055,11 @@ mir_demote_uniforms(compiler_context *ctx, unsigned new_cutoff) .load_store = { .index_reg = REGISTER_LDST_ZERO, }, - .constants.u32[0] = ctx->info->push.words[idx].offset + .constants.u32[0] = ctx->info->push.ranges[idx].offset }; midgard_pack_ubo_index_imm(&ld.load_store, - ctx->info->push.words[idx].ubo); + ctx->info->push.ranges[idx].ubo); mir_insert_instruction_before_scheduled(ctx, block, before, ld); diff --git a/src/panfrost/util/pan_ir.c b/src/panfrost/util/pan_ir.c index 56bc529bf19f..d3ce1a88636e 100644 --- a/src/panfrost/util/pan_ir.c +++ b/src/panfrost/util/pan_ir.c @@ -135,13 +135,13 @@ pan_print_alu_type(nir_alu_type t, FILE *fp) unsigned pan_lookup_pushed_ubo(struct panfrost_ubo_push *push, unsigned ubo, unsigned offs) { - struct panfrost_ubo_word word = { + struct panfrost_ubo_range range = { .ubo = ubo, .offset = offs }; for (unsigned i = 0; i < push->count; ++i) { - if (memcmp(push->words + i, &word, sizeof(word)) == 0) + if (memcmp(push->ranges + i, &range, sizeof(range)) == 0) return i; } @@ -152,10 +152,10 @@ pan_lookup_pushed_ubo(struct panfrost_ubo_push *push, unsigned ubo, unsigned off void pan_add_pushed_ubo(struct panfrost_ubo_push *push, unsigned ubo, unsigned offs) { - struct panfrost_ubo_word word = { + struct panfrost_ubo_range range = { .ubo = ubo, .offset = offs, }; - push->words[push->count++] = word; + push->ranges[push->count++] = range; } diff --git a/src/panfrost/util/pan_ir.h b/src/panfrost/util/pan_ir.h index 6e1bc18993e0..f34e9bc72faa 100644 --- a/src/panfrost/util/pan_ir.h +++ b/src/panfrost/util/pan_ir.h @@ -91,14 +91,15 @@ struct panfrost_sysvals { /* Architectural invariants (Midgard and Bifrost): UBO must be <= 2^16 bytes so * an offset to a word must be < 2^16. There are less than 2^8 UBOs */ -struct panfrost_ubo_word { - uint16_t ubo; +struct panfrost_ubo_range { + uint8_t ubo; + uint8_t size; uint16_t offset; }; struct panfrost_ubo_push { unsigned count; - struct panfrost_ubo_word words[PAN_MAX_PUSH]; + struct panfrost_ubo_range ranges[PAN_MAX_PUSH]; }; /* Helper for searching the above. Note this is O(N) to the number of pushed -- GitLab From b7e198447a6224351b1196364b18ee4e881adcc4 Mon Sep 17 00:00:00 2001 From: Icecream95 Date: Sat, 8 May 2021 21:43:04 +1200 Subject: [PATCH 08/18] panfrost: Separate count of pushed and UBO sysvals We don't want to upload sysvals as both UBOs and push constants, split the count for each type. Don't change behaviour in this commit, the "UBO" sysvals will still be copied as push constants. --- src/gallium/drivers/panfrost/pan_cmdstream.c | 7 ++++--- src/panfrost/lib/pan_indirect_dispatch.c | 3 ++- src/panfrost/lib/pan_indirect_draw.c | 3 ++- src/panfrost/lib/pan_shader.c | 2 +- src/panfrost/util/pan_ir.h | 6 ++++-- src/panfrost/util/pan_sysval.c | 5 +++-- src/panfrost/vulkan/panvk_cmd_buffer.c | 4 ++-- src/panfrost/vulkan/panvk_cs.c | 4 ++-- src/panfrost/vulkan/panvk_pipeline.c | 16 ++++++++-------- src/panfrost/vulkan/panvk_shader.c | 2 +- 10 files changed, 29 insertions(+), 23 deletions(-) diff --git a/src/gallium/drivers/panfrost/pan_cmdstream.c b/src/gallium/drivers/panfrost/pan_cmdstream.c index add3886905ad..cfd23f28d0b7 100644 --- a/src/gallium/drivers/panfrost/pan_cmdstream.c +++ b/src/gallium/drivers/panfrost/pan_cmdstream.c @@ -1027,7 +1027,8 @@ panfrost_analyze_sysvals(struct panfrost_shader_state *ss) unsigned dirty_shader = PAN_DIRTY_STAGE_RENDERER | PAN_DIRTY_STAGE_CONST; - for (unsigned i = 0; i < ss->info.sysvals.sysval_count; ++i) { + for (unsigned i = 0; i < ss->info.sysvals.push_count + + ss->info.sysvals.ubo_count; ++i) { switch (PAN_SYSVAL_TYPE(ss->info.sysvals.sysvals[i])) { case PAN_SYSVAL_VIEWPORT_SCALE: case PAN_SYSVAL_VIEWPORT_OFFSET: @@ -1083,7 +1084,7 @@ panfrost_upload_sysvals(struct panfrost_batch *batch, { struct sysval_uniform *uniforms = ptr->cpu; - for (unsigned i = 0; i < ss->info.sysvals.sysval_count; ++i) { + for (unsigned i = 0; i < ss->info.sysvals.ubo_count; ++i) { int sysval = ss->info.sysvals.sysvals[i]; switch (PAN_SYSVAL_TYPE(sysval)) { @@ -1199,7 +1200,7 @@ panfrost_emit_const_buf(struct panfrost_batch *batch, struct panfrost_shader_state *ss = &all->variants[all->active_variant]; /* Allocate room for the sysval and the uniforms */ - size_t sys_size = sizeof(float) * 4 * ss->info.sysvals.sysval_count; + size_t sys_size = sizeof(float) * 4 * ss->info.sysvals.ubo_count; struct panfrost_ptr transfer = pan_pool_alloc_aligned(&batch->pool.base, sys_size, 16); diff --git a/src/panfrost/lib/pan_indirect_dispatch.c b/src/panfrost/lib/pan_indirect_dispatch.c index a2df1e854f5d..3135fd41877c 100644 --- a/src/panfrost/lib/pan_indirect_dispatch.c +++ b/src/panfrost/lib/pan_indirect_dispatch.c @@ -232,7 +232,8 @@ pan_indirect_dispatch_init(struct panfrost_device *dev) assert(!shader_info.tls_size); assert(!shader_info.wls_size); - assert(!shader_info.sysvals.sysval_count); + assert(!shader_info.sysvals.push_count); + assert(!shader_info.sysvals.ubo_count); dev->indirect_dispatch.bin = panfrost_bo_create(dev, binary.size, PAN_BO_EXECUTE, diff --git a/src/panfrost/lib/pan_indirect_draw.c b/src/panfrost/lib/pan_indirect_draw.c index 658404a0b932..fbce6dfee44b 100644 --- a/src/panfrost/lib/pan_indirect_draw.c +++ b/src/panfrost/lib/pan_indirect_draw.c @@ -1085,7 +1085,8 @@ create_indirect_draw_shader(struct panfrost_device *dev, assert(!shader_info.tls_size); assert(!shader_info.wls_size); - assert(!shader_info.sysvals.sysval_count); + assert(!shader_info.sysvals.push_count); + assert(!shader_info.sysvals.ubo_count); unsigned shader_id = get_shader_id(flags, index_size, index_min_max_search); struct pan_indirect_draw_shader *draw_shader = diff --git a/src/panfrost/lib/pan_shader.c b/src/panfrost/lib/pan_shader.c index f1d4d4402da5..a80d5ba586ff 100644 --- a/src/panfrost/lib/pan_shader.c +++ b/src/panfrost/lib/pan_shader.c @@ -277,7 +277,7 @@ pan_shader_compile(const struct panfrost_device *dev, info->outputs_written = s->info.outputs_written; /* Sysvals have dedicated UBO */ - if (info->sysvals.sysval_count) + if (info->sysvals.ubo_count) info->ubo_count = MAX2(s->info.num_ubos + 1, inputs->sysval_ubo + 1); else info->ubo_count = s->info.num_ubos; diff --git a/src/panfrost/util/pan_ir.h b/src/panfrost/util/pan_ir.h index f34e9bc72faa..1dbb5604f9aa 100644 --- a/src/panfrost/util/pan_ir.h +++ b/src/panfrost/util/pan_ir.h @@ -77,9 +77,11 @@ enum { }; struct panfrost_sysvals { - /* The mapping of sysvals to uniforms, the count, and the off-by-one inverse */ + /* The mapping of sysvals to uniforms, and the count of pushed and ubo + * sysvals */ unsigned sysvals[MAX_SYSVAL_COUNT]; - unsigned sysval_count; + unsigned push_count; + unsigned ubo_count; }; /* Technically Midgard could go up to 92 in a pathological case but we don't diff --git a/src/panfrost/util/pan_sysval.c b/src/panfrost/util/pan_sysval.c index 80a509f5b8b0..4bdbceb1f446 100644 --- a/src/panfrost/util/pan_sysval.c +++ b/src/panfrost/util/pan_sysval.c @@ -146,7 +146,7 @@ pan_lookup_sysval(struct hash_table_u64 *sysval_to_id, /* Else assign */ - unsigned id = sysvals->sysval_count++; + unsigned id = sysvals->ubo_count++; assert(id < MAX_SYSVAL_COUNT); _mesa_hash_table_u64_insert(sysval_to_id, sysval, (void *) ((uintptr_t) id + 1)); sysvals->sysvals[id] = sysval; @@ -157,6 +157,7 @@ pan_lookup_sysval(struct hash_table_u64 *sysval_to_id, struct hash_table_u64 * panfrost_init_sysvals(struct panfrost_sysvals *sysvals, void *memctx) { - sysvals->sysval_count = 0; + sysvals->push_count = 0; + sysvals->ubo_count = 0; return _mesa_hash_table_u64_create(memctx); } diff --git a/src/panfrost/vulkan/panvk_cmd_buffer.c b/src/panfrost/vulkan/panvk_cmd_buffer.c index 3da978b4837b..544122441384 100644 --- a/src/panfrost/vulkan/panvk_cmd_buffer.c +++ b/src/panfrost/vulkan/panvk_cmd_buffer.c @@ -984,7 +984,7 @@ panvk_cmd_prepare_sysvals(struct panvk_cmd_buffer *cmdbuf) return; for (unsigned i = 0; i < ARRAY_SIZE(desc_state->sysvals); i++) { - unsigned sysval_count = pipeline->sysvals[i].ids.sysval_count; + unsigned sysval_count = pipeline->sysvals[i].ids.ubo_count; if (!sysval_count || (desc_state->sysvals[i] && !(cmdbuf->state.dirty & pipeline->sysvals[i].dirty_mask))) @@ -994,7 +994,7 @@ panvk_cmd_prepare_sysvals(struct panvk_cmd_buffer *cmdbuf) pan_pool_alloc_aligned(&cmdbuf->desc_pool.base, sysval_count * 16, 16); union panvk_sysval_data *data = sysvals.cpu; - for (unsigned s = 0; s < pipeline->sysvals[i].ids.sysval_count; s++) { + for (unsigned s = 0; s < pipeline->sysvals[i].ids.ubo_count; s++) { panvk_cmd_upload_sysval(cmdbuf, pipeline->sysvals[i].ids.sysvals[s], &data[s]); } diff --git a/src/panfrost/vulkan/panvk_cs.c b/src/panfrost/vulkan/panvk_cs.c index aa8dd5a286dd..495fc9846da9 100644 --- a/src/panfrost/vulkan/panvk_cs.c +++ b/src/panfrost/vulkan/panvk_cs.c @@ -264,13 +264,13 @@ panvk_emit_ubos(const struct panvk_pipeline *pipeline, } for (unsigned i = 0; i < ARRAY_SIZE(pipeline->sysvals); i++) { - if (!pipeline->sysvals[i].ids.sysval_count) + if (!pipeline->sysvals[i].ids.ubo_count) continue; pan_pack(&ubos[pipeline->sysvals[i].ubo_idx], UNIFORM_BUFFER, cfg) { cfg.pointer = pipeline->sysvals[i].ubo ? : state->sysvals[i]; - cfg.entries = pipeline->sysvals[i].ids.sysval_count; + cfg.entries = pipeline->sysvals[i].ids.ubo_count; } } } diff --git a/src/panfrost/vulkan/panvk_pipeline.c b/src/panfrost/vulkan/panvk_pipeline.c index e51e9ec94648..7e55a5666586 100644 --- a/src/panfrost/vulkan/panvk_pipeline.c +++ b/src/panfrost/vulkan/panvk_pipeline.c @@ -136,7 +136,7 @@ panvk_pipeline_builder_compile_shaders(struct panvk_pipeline_builder *builder, if (!shader) return VK_ERROR_OUT_OF_HOST_MEMORY; - if (shader->info.sysvals.sysval_count) + if (shader->info.sysvals.ubo_count) sysval_ubo++; builder->shaders[stage] = shader; @@ -219,11 +219,11 @@ panvk_pipeline_builder_alloc_static_state_bo(struct panvk_pipeline_builder *buil for (uint32_t i = 0; i < MESA_SHADER_STAGES; i++) { const struct panvk_shader *shader = builder->shaders[i]; - if (!shader || !shader->info.sysvals.sysval_count) + if (!shader || !shader->info.sysvals.ubo_count) continue; bool static_sysvals = true; - for (unsigned s = 0; s < shader->info.sysvals.sysval_count; s++) { + for (unsigned s = 0; s < shader->info.sysvals.ubo_count; s++) { unsigned id = shader->info.sysvals.sysvals[i]; static_sysvals &= panvk_pipeline_static_sysval(pipeline, id); switch (PAN_SYSVAL_TYPE(id)) { @@ -243,7 +243,7 @@ panvk_pipeline_builder_alloc_static_state_bo(struct panvk_pipeline_builder *buil bo_size = ALIGN_POT(bo_size, 16); builder->stages[i].sysvals_offset = bo_size; - bo_size += shader->info.sysvals.sysval_count * 16; + bo_size += shader->info.sysvals.ubo_count * 16; } if (bo_size) { @@ -282,7 +282,7 @@ panvk_pipeline_builder_init_sysvals(struct panvk_pipeline_builder *builder, pipeline->sysvals[stage].ids = shader->info.sysvals; pipeline->sysvals[stage].ubo_idx = shader->sysval_ubo; - if (!shader->info.sysvals.sysval_count || + if (!shader->info.sysvals.ubo_count || builder->stages[stage].sysvals_offset == ~0) return; @@ -292,7 +292,7 @@ panvk_pipeline_builder_init_sysvals(struct panvk_pipeline_builder *builder, pipeline->sysvals[stage].ubo = pipeline->state_bo->ptr.gpu + builder->stages[stage].sysvals_offset; - for (unsigned i = 0; i < shader->info.sysvals.sysval_count; i++) { + for (unsigned i = 0; i < shader->info.sysvals.ubo_count; i++) { unsigned id = shader->info.sysvals.sysvals[i]; panvk_pipeline_builder_upload_sysval(builder, @@ -347,13 +347,13 @@ panvk_pipeline_builder_init_shaders(struct panvk_pipeline_builder *builder, pipeline->num_ubos = builder->layout->num_ubos; for (unsigned i = 0; i < ARRAY_SIZE(pipeline->sysvals); i++) { - if (pipeline->sysvals[i].ids.sysval_count) + if (pipeline->sysvals[i].ids.ubo_count) pipeline->num_ubos = MAX2(pipeline->num_ubos, pipeline->sysvals[i].ubo_idx + 1); } pipeline->num_sysvals = 0; for (unsigned i = 0; i < ARRAY_SIZE(pipeline->sysvals); i++) - pipeline->num_sysvals += pipeline->sysvals[i].ids.sysval_count; + pipeline->num_sysvals += pipeline->sysvals[i].ids.ubo_count; } diff --git a/src/panfrost/vulkan/panvk_shader.c b/src/panfrost/vulkan/panvk_shader.c index 756ad24e011a..4d7fcc4f51ab 100644 --- a/src/panfrost/vulkan/panvk_shader.c +++ b/src/panfrost/vulkan/panvk_shader.c @@ -402,7 +402,7 @@ panvk_shader_create(struct panvk_device *dev, /* Patch the descriptor count */ shader->info.ubo_count = - shader->info.sysvals.sysval_count ? sysval_ubo + 1 : layout->num_ubos; + shader->info.sysvals.ubo_count ? sysval_ubo + 1 : layout->num_ubos; shader->info.sampler_count = layout->num_samplers; shader->info.texture_count = layout->num_textures; -- GitLab From e6f11e5cb8849ed412aa7addf674e92db4a3e46a Mon Sep 17 00:00:00 2001 From: Icecream95 Date: Thu, 29 Apr 2021 14:55:26 +1200 Subject: [PATCH 09/18] panfrost: Move uploading a single sysval to a separate function --- src/gallium/drivers/panfrost/pan_cmdstream.c | 153 ++++++++++--------- 1 file changed, 77 insertions(+), 76 deletions(-) diff --git a/src/gallium/drivers/panfrost/pan_cmdstream.c b/src/gallium/drivers/panfrost/pan_cmdstream.c index cfd23f28d0b7..4c42167d2e62 100644 --- a/src/gallium/drivers/panfrost/pan_cmdstream.c +++ b/src/gallium/drivers/panfrost/pan_cmdstream.c @@ -1076,6 +1076,81 @@ panfrost_analyze_sysvals(struct panfrost_shader_state *ss) ss->dirty_shader = dirty_shader; } +static void +panfrost_upload_sysval(struct panfrost_batch *batch, + enum pipe_shader_type st, + int sysval, + struct sysval_uniform *uniform, + mali_ptr gpu) +{ + switch (PAN_SYSVAL_TYPE(sysval)) { + case PAN_SYSVAL_VIEWPORT_SCALE: + panfrost_upload_viewport_scale_sysval(batch, uniform); + break; + case PAN_SYSVAL_VIEWPORT_OFFSET: + panfrost_upload_viewport_offset_sysval(batch, uniform); + break; + case PAN_SYSVAL_TEXTURE_SIZE: + panfrost_upload_txs_sysval(batch, st, + PAN_SYSVAL_ID(sysval), + uniform); + break; + case PAN_SYSVAL_SSBO: + panfrost_upload_ssbo_sysval(batch, st, + PAN_SYSVAL_ID(sysval), + uniform); + break; + case PAN_SYSVAL_NUM_WORK_GROUPS: + for (unsigned j = 0; j < 3; j++) + batch->num_wg_sysval[j] = gpu + (j * 4); + panfrost_upload_num_work_groups_sysval(batch, uniform); + break; + case PAN_SYSVAL_LOCAL_GROUP_SIZE: + panfrost_upload_local_group_size_sysval(batch, uniform); + break; + case PAN_SYSVAL_WORK_DIM: + panfrost_upload_work_dim_sysval(batch, uniform); + break; + case PAN_SYSVAL_SAMPLER: + panfrost_upload_sampler_sysval(batch, st, + PAN_SYSVAL_ID(sysval), + uniform); + break; + case PAN_SYSVAL_IMAGE_SIZE: + panfrost_upload_image_size_sysval(batch, st, + PAN_SYSVAL_ID(sysval), + uniform); + break; + case PAN_SYSVAL_SAMPLE_POSITIONS: + panfrost_upload_sample_positions_sysval(batch, uniform); + break; + case PAN_SYSVAL_MULTISAMPLED: + panfrost_upload_multisampled_sysval(batch, uniform); + break; + case PAN_SYSVAL_RT_CONVERSION: + panfrost_upload_rt_conversion_sysval(batch, + PAN_SYSVAL_ID(sysval), + uniform); + break; + case PAN_SYSVAL_VERTEX_INSTANCE_OFFSETS: + batch->ctx->first_vertex_sysval_ptr = gpu; + batch->ctx->base_vertex_sysval_ptr = + batch->ctx->first_vertex_sysval_ptr + 4; + batch->ctx->base_instance_sysval_ptr = + batch->ctx->first_vertex_sysval_ptr + 8; + + uniform->u[0] = batch->ctx->offset_start; + uniform->u[1] = batch->ctx->base_vertex; + uniform->u[2] = batch->ctx->base_instance; + break; + case PAN_SYSVAL_DRAWID: + uniform->u[0] = batch->ctx->drawid; + break; + default: + assert(0); + } +} + static void panfrost_upload_sysvals(struct panfrost_batch *batch, const struct panfrost_ptr *ptr, @@ -1086,82 +1161,8 @@ panfrost_upload_sysvals(struct panfrost_batch *batch, for (unsigned i = 0; i < ss->info.sysvals.ubo_count; ++i) { int sysval = ss->info.sysvals.sysvals[i]; - - switch (PAN_SYSVAL_TYPE(sysval)) { - case PAN_SYSVAL_VIEWPORT_SCALE: - panfrost_upload_viewport_scale_sysval(batch, - &uniforms[i]); - break; - case PAN_SYSVAL_VIEWPORT_OFFSET: - panfrost_upload_viewport_offset_sysval(batch, - &uniforms[i]); - break; - case PAN_SYSVAL_TEXTURE_SIZE: - panfrost_upload_txs_sysval(batch, st, - PAN_SYSVAL_ID(sysval), - &uniforms[i]); - break; - case PAN_SYSVAL_SSBO: - panfrost_upload_ssbo_sysval(batch, st, - PAN_SYSVAL_ID(sysval), - &uniforms[i]); - break; - case PAN_SYSVAL_NUM_WORK_GROUPS: - for (unsigned j = 0; j < 3; j++) { - batch->num_wg_sysval[j] = - ptr->gpu + (i * sizeof(*uniforms)) + (j * 4); - } - panfrost_upload_num_work_groups_sysval(batch, - &uniforms[i]); - break; - case PAN_SYSVAL_LOCAL_GROUP_SIZE: - panfrost_upload_local_group_size_sysval(batch, - &uniforms[i]); - break; - case PAN_SYSVAL_WORK_DIM: - panfrost_upload_work_dim_sysval(batch, - &uniforms[i]); - break; - case PAN_SYSVAL_SAMPLER: - panfrost_upload_sampler_sysval(batch, st, - PAN_SYSVAL_ID(sysval), - &uniforms[i]); - break; - case PAN_SYSVAL_IMAGE_SIZE: - panfrost_upload_image_size_sysval(batch, st, - PAN_SYSVAL_ID(sysval), - &uniforms[i]); - break; - case PAN_SYSVAL_SAMPLE_POSITIONS: - panfrost_upload_sample_positions_sysval(batch, - &uniforms[i]); - break; - case PAN_SYSVAL_MULTISAMPLED: - panfrost_upload_multisampled_sysval(batch, - &uniforms[i]); - break; - case PAN_SYSVAL_RT_CONVERSION: - panfrost_upload_rt_conversion_sysval(batch, - PAN_SYSVAL_ID(sysval), &uniforms[i]); - break; - case PAN_SYSVAL_VERTEX_INSTANCE_OFFSETS: - batch->ctx->first_vertex_sysval_ptr = - ptr->gpu + (i * sizeof(*uniforms)); - batch->ctx->base_vertex_sysval_ptr = - batch->ctx->first_vertex_sysval_ptr + 4; - batch->ctx->base_instance_sysval_ptr = - batch->ctx->first_vertex_sysval_ptr + 8; - - uniforms[i].u[0] = batch->ctx->offset_start; - uniforms[i].u[1] = batch->ctx->base_vertex; - uniforms[i].u[2] = batch->ctx->base_instance; - break; - case PAN_SYSVAL_DRAWID: - uniforms[i].u[0] = batch->ctx->drawid; - break; - default: - assert(0); - } + mali_ptr gpu = ptr->gpu + (i * sizeof(*uniforms)); + panfrost_upload_sysval(batch, st, sysval, &uniforms[i], gpu); } } -- GitLab From 455da24ff5cfd3878f75c11f583f4ad81799d66d Mon Sep 17 00:00:00 2001 From: Icecream95 Date: Sat, 8 May 2021 22:18:18 +1200 Subject: [PATCH 10/18] panfrost: Upload pushed sysvals directly to the push constants ...though no sysvals are marked as pushed yet. --- src/gallium/drivers/panfrost/pan_cmdstream.c | 22 +++++++++++++++----- 1 file changed, 17 insertions(+), 5 deletions(-) diff --git a/src/gallium/drivers/panfrost/pan_cmdstream.c b/src/gallium/drivers/panfrost/pan_cmdstream.c index 4c42167d2e62..c1ce4d0e9d33 100644 --- a/src/gallium/drivers/panfrost/pan_cmdstream.c +++ b/src/gallium/drivers/panfrost/pan_cmdstream.c @@ -1153,15 +1153,27 @@ panfrost_upload_sysval(struct panfrost_batch *batch, static void panfrost_upload_sysvals(struct panfrost_batch *batch, - const struct panfrost_ptr *ptr, + const struct panfrost_ptr *ptr_push, + const struct panfrost_ptr *ptr_ubo, struct panfrost_shader_state *ss, enum pipe_shader_type st) { - struct sysval_uniform *uniforms = ptr->cpu; + unsigned push_count = ss->info.sysvals.push_count; + unsigned ubo_count = ss->info.sysvals.ubo_count; - for (unsigned i = 0; i < ss->info.sysvals.ubo_count; ++i) { + struct sysval_uniform *uniforms = ptr_push->cpu; + + for (unsigned i = 0; i < push_count; ++i) { int sysval = ss->info.sysvals.sysvals[i]; - mali_ptr gpu = ptr->gpu + (i * sizeof(*uniforms)); + mali_ptr gpu = ptr_push->gpu + (i * sizeof(*uniforms)); + panfrost_upload_sysval(batch, st, sysval, &uniforms[i], gpu); + } + + uniforms = ptr_ubo->cpu; + + for (unsigned i = 0; i < ubo_count; ++i) { + int sysval = ss->info.sysvals.sysvals[push_count + i]; + mali_ptr gpu = ptr_ubo->gpu + (i * sizeof(*uniforms)); panfrost_upload_sysval(batch, st, sysval, &uniforms[i], gpu); } } @@ -1206,7 +1218,7 @@ panfrost_emit_const_buf(struct panfrost_batch *batch, pan_pool_alloc_aligned(&batch->pool.base, sys_size, 16); /* Upload sysvals requested by the shader */ - panfrost_upload_sysvals(batch, &transfer, ss, stage); + panfrost_upload_sysvals(batch, NULL, &transfer, ss, stage); /* Next up, attach UBOs. UBO count includes gaps but no sysval UBO */ struct panfrost_shader_state *shader = panfrost_get_shader_state(ctx, stage); -- GitLab From 293d72aa04ed15116ee06a0239f720bc1fc8adf4 Mon Sep 17 00:00:00 2001 From: Icecream95 Date: Sat, 19 Jun 2021 12:12:59 +1200 Subject: [PATCH 11/18] panfrost: Move sysval upload after push constant allocation --- src/gallium/drivers/panfrost/pan_cmdstream.c | 35 ++++++++++---------- 1 file changed, 18 insertions(+), 17 deletions(-) diff --git a/src/gallium/drivers/panfrost/pan_cmdstream.c b/src/gallium/drivers/panfrost/pan_cmdstream.c index c1ce4d0e9d33..a87b9679a096 100644 --- a/src/gallium/drivers/panfrost/pan_cmdstream.c +++ b/src/gallium/drivers/panfrost/pan_cmdstream.c @@ -1212,13 +1212,12 @@ panfrost_emit_const_buf(struct panfrost_batch *batch, struct panfrost_constant_buffer *buf = &ctx->constant_buffer[stage]; struct panfrost_shader_state *ss = &all->variants[all->active_variant]; - /* Allocate room for the sysval and the uniforms */ + /* Allocate room for the sysvals and the uniforms */ size_t sys_size = sizeof(float) * 4 * ss->info.sysvals.ubo_count; - struct panfrost_ptr transfer = - pan_pool_alloc_aligned(&batch->pool.base, sys_size, 16); - - /* Upload sysvals requested by the shader */ - panfrost_upload_sysvals(batch, NULL, &transfer, ss, stage); + struct panfrost_ptr sysval_ptr = {0}; + if (sys_size) + sysval_ptr = pan_pool_alloc_aligned(&batch->pool.base, + sys_size, 16); /* Next up, attach UBOs. UBO count includes gaps but no sysval UBO */ struct panfrost_shader_state *shader = panfrost_get_shader_state(ctx, stage); @@ -1232,16 +1231,7 @@ panfrost_emit_const_buf(struct panfrost_batch *batch, uint64_t *ubo_ptr = (uint64_t *) ubos.cpu; - /* Upload sysval as a final UBO */ - - if (sys_size) { - pan_pack(ubo_ptr + ubo_count, UNIFORM_BUFFER, cfg) { - cfg.entries = DIV_ROUND_UP(sys_size, 16); - cfg.pointer = transfer.gpu; - } - } - - /* The rest are honest-to-goodness UBOs */ + /* Upload all used UBOs other than sysvals */ u_foreach_bit(ubo, ss->info.ubo_mask & buf->enabled_mask) { size_t usz = buf->cb[ubo].buffer_size; @@ -1273,6 +1263,17 @@ panfrost_emit_const_buf(struct panfrost_batch *batch, uint32_t *push_cpu = (uint32_t *) push_transfer.cpu; *push_constants = push_transfer.gpu; + /* Upload sysvals requested by the shader */ + panfrost_upload_sysvals(batch, &push_transfer, &sysval_ptr, ss, stage); + + /* Upload sysval as a final UBO */ + if (sys_size) { + pan_pack(ubo_ptr + ubo_count, UNIFORM_BUFFER, cfg) { + cfg.entries = DIV_ROUND_UP(sys_size, 16); + cfg.pointer = sysval_ptr.gpu; + } + } + for (unsigned i = 0; i < ss->info.push.count; ++i) { struct panfrost_ubo_range src = ss->info.push.ranges[i]; @@ -1315,7 +1316,7 @@ panfrost_emit_const_buf(struct panfrost_batch *batch, * off to upload sysvals to a staging buffer on the CPU on the * assumption sysvals will get pushed (TODO) */ - const void *mapped_ubo = (src.ubo == sysval_ubo) ? transfer.cpu : + const void *mapped_ubo = (src.ubo == sysval_ubo) ? sysval_ptr.cpu : panfrost_map_constant_buffer_cpu(ctx, buf, src.ubo); /* TODO: Is there any benefit to combining ranges */ -- GitLab From 49d720baa50a400c35eb0ad11019cb6dc5249ca4 Mon Sep 17 00:00:00 2001 From: Icecream95 Date: Sat, 19 Jun 2021 12:13:29 +1200 Subject: [PATCH 12/18] panfrost: Skip the sysval range in the main push upload loop They've already been uploaded with panfrost_upload_sysvals. --- src/gallium/drivers/panfrost/pan_cmdstream.c | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/src/gallium/drivers/panfrost/pan_cmdstream.c b/src/gallium/drivers/panfrost/pan_cmdstream.c index a87b9679a096..554bde9b8cb6 100644 --- a/src/gallium/drivers/panfrost/pan_cmdstream.c +++ b/src/gallium/drivers/panfrost/pan_cmdstream.c @@ -1274,7 +1274,10 @@ panfrost_emit_const_buf(struct panfrost_batch *batch, } } - for (unsigned i = 0; i < ss->info.push.count; ++i) { + unsigned sys_push = ss->info.sysvals.push_count * 4; + push_cpu += sys_push; + + for (unsigned i = sys_push ? 1 : 0; i < ss->info.push.count; ++i) { struct panfrost_ubo_range src = ss->info.push.ranges[i]; if (src.ubo == sysval_ubo) { -- GitLab From ac9431d6b40bd6a0fd1045bea3b86a687526c354 Mon Sep 17 00:00:00 2001 From: Icecream95 Date: Tue, 25 May 2021 22:05:22 +1200 Subject: [PATCH 13/18] panfrost: Account for pushed sysvals when uploading sysval UBO Subtract the space for pushed sysvals from the pointer so that the offsets for UBO loads don't need to be adjusted. --- src/gallium/drivers/panfrost/pan_cmdstream.c | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/src/gallium/drivers/panfrost/pan_cmdstream.c b/src/gallium/drivers/panfrost/pan_cmdstream.c index 554bde9b8cb6..b300374f742d 100644 --- a/src/gallium/drivers/panfrost/pan_cmdstream.c +++ b/src/gallium/drivers/panfrost/pan_cmdstream.c @@ -1268,9 +1268,10 @@ panfrost_emit_const_buf(struct panfrost_batch *batch, /* Upload sysval as a final UBO */ if (sys_size) { + unsigned sysval_push = ss->info.sysvals.push_count; pan_pack(ubo_ptr + ubo_count, UNIFORM_BUFFER, cfg) { - cfg.entries = DIV_ROUND_UP(sys_size, 16); - cfg.pointer = sysval_ptr.gpu; + cfg.entries = DIV_ROUND_UP(sys_size, 16) + sysval_push; + cfg.pointer = sysval_ptr.gpu - sysval_push * 16; } } -- GitLab From 3a1279b299825ff7b175f64af742066b9d1579ee Mon Sep 17 00:00:00 2001 From: Icecream95 Date: Thu, 13 May 2021 08:40:59 +1200 Subject: [PATCH 14/18] panfrost: Don't allocate push uniforms when there are none --- src/gallium/drivers/panfrost/pan_cmdstream.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/src/gallium/drivers/panfrost/pan_cmdstream.c b/src/gallium/drivers/panfrost/pan_cmdstream.c index b300374f742d..042c9c994217 100644 --- a/src/gallium/drivers/panfrost/pan_cmdstream.c +++ b/src/gallium/drivers/panfrost/pan_cmdstream.c @@ -1256,9 +1256,9 @@ panfrost_emit_const_buf(struct panfrost_batch *batch, return ubos.gpu; /* Copy push constants required by the shader */ - struct panfrost_ptr push_transfer = - pan_pool_alloc_aligned(&batch->pool.base, - ss->info.push.count * 4, 16); + struct panfrost_ptr push_transfer = {0}; + if (ss->info.push.count) + push_transfer = pan_pool_alloc_aligned(&batch->pool.base, ss->info.push.count * 4, 16); uint32_t *push_cpu = (uint32_t *) push_transfer.cpu; *push_constants = push_transfer.gpu; -- GitLab From eb628f94dd5233543605715f2dc129545c104c4a Mon Sep 17 00:00:00 2001 From: Icecream95 Date: Tue, 25 May 2021 22:04:36 +1200 Subject: [PATCH 15/18] Revert "panfrost: Don't upload empty push uniform table" Push constants are now only allocated when needed, and the commit will interfere with later changes to sysval upload. TODO: We could still return slightly early, is it worth it? This reverts commit df3edfc72994d26a87008ddd322ffbc38c2343b2. --- src/gallium/drivers/panfrost/pan_cmdstream.c | 3 --- 1 file changed, 3 deletions(-) diff --git a/src/gallium/drivers/panfrost/pan_cmdstream.c b/src/gallium/drivers/panfrost/pan_cmdstream.c index 042c9c994217..21c809bea23f 100644 --- a/src/gallium/drivers/panfrost/pan_cmdstream.c +++ b/src/gallium/drivers/panfrost/pan_cmdstream.c @@ -1252,9 +1252,6 @@ panfrost_emit_const_buf(struct panfrost_batch *batch, } } - if (ss->info.push.count == 0) - return ubos.gpu; - /* Copy push constants required by the shader */ struct panfrost_ptr push_transfer = {0}; if (ss->info.push.count) -- GitLab From f9027db9ca49863329c9117e3535b248d44055a0 Mon Sep 17 00:00:00 2001 From: Icecream95 Date: Mon, 12 Apr 2021 23:57:25 +1200 Subject: [PATCH 16/18] panfrost: Push constant range combining Makes panfrost_emit_const_buf about twice as fast. --- src/gallium/drivers/panfrost/pan_cmdstream.c | 7 ++- src/panfrost/lib/pan_indirect_dispatch.c | 7 ++- src/panfrost/lib/pan_indirect_draw.c | 7 ++- src/panfrost/midgard/midgard_ra.c | 10 ++-- src/panfrost/util/pan_ir.c | 59 +++++++++++++++++--- src/panfrost/util/pan_ir.h | 12 +++- 6 files changed, 82 insertions(+), 20 deletions(-) diff --git a/src/gallium/drivers/panfrost/pan_cmdstream.c b/src/gallium/drivers/panfrost/pan_cmdstream.c index 21c809bea23f..5f2ae8036c6d 100644 --- a/src/gallium/drivers/panfrost/pan_cmdstream.c +++ b/src/gallium/drivers/panfrost/pan_cmdstream.c @@ -1275,7 +1275,7 @@ panfrost_emit_const_buf(struct panfrost_batch *batch, unsigned sys_push = ss->info.sysvals.push_count * 4; push_cpu += sys_push; - for (unsigned i = sys_push ? 1 : 0; i < ss->info.push.count; ++i) { + for (unsigned i = sys_push ? 1 : 0; i < ss->info.push.num_ranges; ++i) { struct panfrost_ubo_range src = ss->info.push.ranges[i]; if (src.ubo == sysval_ubo) { @@ -1320,8 +1320,9 @@ panfrost_emit_const_buf(struct panfrost_batch *batch, const void *mapped_ubo = (src.ubo == sysval_ubo) ? sysval_ptr.cpu : panfrost_map_constant_buffer_cpu(ctx, buf, src.ubo); - /* TODO: Is there any benefit to combining ranges */ - memcpy(push_cpu + i, (uint8_t *) mapped_ubo + src.offset, 4); + memcpy(push_cpu, (uint8_t *) mapped_ubo + src.offset, src.size * 4); + + push_cpu += src.size; } return ubos.gpu; diff --git a/src/panfrost/lib/pan_indirect_dispatch.c b/src/panfrost/lib/pan_indirect_dispatch.c index 3135fd41877c..32bce5536229 100644 --- a/src/panfrost/lib/pan_indirect_dispatch.c +++ b/src/panfrost/lib/pan_indirect_dispatch.c @@ -105,8 +105,11 @@ get_push_uniforms(struct pan_pool *pool, uint32_t *out = push_consts_buf.cpu; uint8_t *in = (uint8_t *)inputs; - for (unsigned i = 0; i < dev->indirect_dispatch.push.count; ++i) - memcpy(out + i, in + dev->indirect_dispatch.push.ranges[i].offset, 4); + for (unsigned i = 0; i < dev->indirect_dispatch.push.num_ranges; ++i) { + struct panfrost_ubo_range src = dev->indirect_dispatch.push.ranges[i]; + memcpy(out, in + src.offset, src.size * 4); + out += src.size; + } return push_consts_buf.gpu; } diff --git a/src/panfrost/lib/pan_indirect_draw.c b/src/panfrost/lib/pan_indirect_draw.c index fbce6dfee44b..51dc09d61978 100644 --- a/src/panfrost/lib/pan_indirect_draw.c +++ b/src/panfrost/lib/pan_indirect_draw.c @@ -1176,8 +1176,11 @@ get_push_uniforms(struct pan_pool *pool, uint32_t *out = push_consts_buf.cpu; uint8_t *in = (uint8_t *)inputs; - for (unsigned i = 0; i < shader->push.count; ++i) - memcpy(out + i, in + shader->push.ranges[i].offset, 4); + for (unsigned i = 0; i < shader->push.num_ranges; ++i) { + struct panfrost_ubo_range src = shader->push.ranges[i]; + memcpy(out, in + src.offset, src.size * 4); + out += src.size; + } return push_consts_buf.gpu; } diff --git a/src/panfrost/midgard/midgard_ra.c b/src/panfrost/midgard/midgard_ra.c index b0c505f791d8..c6d2c7a6153a 100644 --- a/src/panfrost/midgard/midgard_ra.c +++ b/src/panfrost/midgard/midgard_ra.c @@ -1042,7 +1042,10 @@ mir_demote_uniforms(compiler_context *ctx, unsigned new_cutoff) unsigned idx = (23 - SSA_REG_FROM_FIXED(ins->src[i])) * 4; assert(idx < ctx->info->push.count); - ctx->ubo_mask |= BITSET_BIT(ctx->info->push.ranges[idx].ubo); + struct panfrost_ubo_range word = + pan_index_pushed_ubo(&ctx->info->push, idx); + + ctx->ubo_mask |= BITSET_BIT(word.ubo); midgard_instruction ld = { .type = TAG_LOAD_STORE_4, @@ -1055,11 +1058,10 @@ mir_demote_uniforms(compiler_context *ctx, unsigned new_cutoff) .load_store = { .index_reg = REGISTER_LDST_ZERO, }, - .constants.u32[0] = ctx->info->push.ranges[idx].offset + .constants.u32[0] = word.offset }; - midgard_pack_ubo_index_imm(&ld.load_store, - ctx->info->push.ranges[idx].ubo); + midgard_pack_ubo_index_imm(&ld.load_store, word.ubo); mir_insert_instruction_before_scheduled(ctx, block, before, ld); diff --git a/src/panfrost/util/pan_ir.c b/src/panfrost/util/pan_ir.c index d3ce1a88636e..10e40665afdf 100644 --- a/src/panfrost/util/pan_ir.c +++ b/src/panfrost/util/pan_ir.c @@ -135,14 +135,21 @@ pan_print_alu_type(nir_alu_type t, FILE *fp) unsigned pan_lookup_pushed_ubo(struct panfrost_ubo_push *push, unsigned ubo, unsigned offs) { - struct panfrost_ubo_range range = { - .ubo = ubo, - .offset = offs - }; + unsigned count = 0; + + for (unsigned i = 0; i < push->num_ranges; ++i) { + struct panfrost_ubo_range range = push->ranges[i]; + + count += range.size; + + if (range.ubo != ubo) + continue; - for (unsigned i = 0; i < push->count; ++i) { - if (memcmp(push->ranges + i, &range, sizeof(range)) == 0) - return i; + unsigned start = range.offset; + unsigned end = range.offset + (range.size * 4); + + if (offs >= start && offs < end) + return count - (end - offs) / 4; } unreachable("UBO not pushed"); @@ -152,10 +159,46 @@ pan_lookup_pushed_ubo(struct panfrost_ubo_push *push, unsigned ubo, unsigned off void pan_add_pushed_ubo(struct panfrost_ubo_push *push, unsigned ubo, unsigned offs) { + ++push->count; + + if (push->num_ranges) { + struct panfrost_ubo_range *prev; + prev = &push->ranges[push->num_ranges - 1]; + + if (prev->ubo == ubo && + prev->offset + prev->size * 4 == offs) { + ++prev->size; + return; + } + } + struct panfrost_ubo_range range = { .ubo = ubo, + .size = 1, .offset = offs, }; - push->ranges[push->count++] = range; + push->ranges[push->num_ranges++] = range; +} + +struct panfrost_ubo_range +pan_index_pushed_ubo(struct panfrost_ubo_push *push, unsigned push_word) +{ + assert(push_word < push->count); + + for (unsigned i = 0; i < push->num_ranges; ++i) { + struct panfrost_ubo_range range = push->ranges[i]; + + if (range.size > push_word) { + return (struct panfrost_ubo_range){ + .ubo = range.ubo, + .offset = range.offset + push_word * 4, + .index = i, + }; + } + + push_word -= range.size; + } + + unreachable("Invalid panfrost_ubo_push state"); } diff --git a/src/panfrost/util/pan_ir.h b/src/panfrost/util/pan_ir.h index 1dbb5604f9aa..21e59adbd87b 100644 --- a/src/panfrost/util/pan_ir.h +++ b/src/panfrost/util/pan_ir.h @@ -95,12 +95,16 @@ struct panfrost_sysvals { struct panfrost_ubo_range { uint8_t ubo; - uint8_t size; + union { + uint8_t size; + uint8_t index; + }; uint16_t offset; }; struct panfrost_ubo_push { unsigned count; + unsigned num_ranges; struct panfrost_ubo_range ranges[PAN_MAX_PUSH]; }; @@ -113,6 +117,12 @@ pan_lookup_pushed_ubo(struct panfrost_ubo_push *push, unsigned ubo, unsigned off void pan_add_pushed_ubo(struct panfrost_ubo_push *push, unsigned ubo, unsigned offs); +/* Get a panfrost_ubo_range struct corresponding to the index in the push + * contants buffer, with the range index pointing to the range. */ + +struct panfrost_ubo_range +pan_index_pushed_ubo(struct panfrost_ubo_push *push, unsigned push_word); + struct hash_table_u64 * panfrost_init_sysvals(struct panfrost_sysvals *sysvals, void *memctx); -- GitLab From 1b17888e2551dd5d00ce8612a6bddffcd1111259 Mon Sep 17 00:00:00 2001 From: Icecream95 Date: Sat, 8 May 2021 22:28:41 +1200 Subject: [PATCH 17/18] panfrost: Switch to uploading pushed sysvals directly After a number of commits of preparation, actually start setting sysvals.push_count to use the new upload path. Uploading the rest of the push constants can now be simplified a bit. --- src/gallium/drivers/panfrost/pan_cmdstream.c | 42 ++------------------ src/panfrost/bifrost/bi_opt_push_ubo.c | 2 + src/panfrost/midgard/midgard_ra.c | 6 +++ src/panfrost/midgard/mir_promote_uniforms.c | 2 + src/panfrost/util/pan_ir.c | 17 ++++++++ src/panfrost/util/pan_ir.h | 2 + 6 files changed, 32 insertions(+), 39 deletions(-) diff --git a/src/gallium/drivers/panfrost/pan_cmdstream.c b/src/gallium/drivers/panfrost/pan_cmdstream.c index 5f2ae8036c6d..e9df8c433d74 100644 --- a/src/gallium/drivers/panfrost/pan_cmdstream.c +++ b/src/gallium/drivers/panfrost/pan_cmdstream.c @@ -1222,7 +1222,6 @@ panfrost_emit_const_buf(struct panfrost_batch *batch, /* Next up, attach UBOs. UBO count includes gaps but no sysval UBO */ struct panfrost_shader_state *shader = panfrost_get_shader_state(ctx, stage); unsigned ubo_count = shader->info.ubo_count - (sys_size ? 1 : 0); - unsigned sysval_ubo = sys_size ? ubo_count : ~0; struct panfrost_ptr ubos = pan_pool_alloc_desc_array(&batch->pool.base, @@ -1278,47 +1277,12 @@ panfrost_emit_const_buf(struct panfrost_batch *batch, for (unsigned i = sys_push ? 1 : 0; i < ss->info.push.num_ranges; ++i) { struct panfrost_ubo_range src = ss->info.push.ranges[i]; - if (src.ubo == sysval_ubo) { - unsigned sysval_idx = src.offset / 16; - unsigned sysval_comp = (src.offset % 16) / 4; - unsigned sysval_type = PAN_SYSVAL_TYPE(ss->info.sysvals.sysvals[sysval_idx]); - mali_ptr ptr = push_transfer.gpu + (4 * i); - - switch (sysval_type) { - case PAN_SYSVAL_VERTEX_INSTANCE_OFFSETS: - switch (sysval_comp) { - case 0: - batch->ctx->first_vertex_sysval_ptr = ptr; - break; - case 1: - batch->ctx->base_vertex_sysval_ptr = ptr; - break; - case 2: - batch->ctx->base_instance_sysval_ptr = ptr; - break; - case 3: - /* Spurious (Midgard doesn't pack) */ - break; - default: - unreachable("Invalid vertex/instance offset component\n"); - } - break; - - case PAN_SYSVAL_NUM_WORK_GROUPS: - batch->num_wg_sysval[sysval_comp] = ptr; - break; - - default: - break; - } - } /* Map the UBO, this should be cheap. However this is reading * from write-combine memory which is _very_ slow. It might pay - * off to upload sysvals to a staging buffer on the CPU on the - * assumption sysvals will get pushed (TODO) */ + * off to upload UBOs to a staging buffer on the CPU on the + * assumption they will get pushed. */ - const void *mapped_ubo = (src.ubo == sysval_ubo) ? sysval_ptr.cpu : - panfrost_map_constant_buffer_cpu(ctx, buf, src.ubo); + const void *mapped_ubo = panfrost_map_constant_buffer_cpu(ctx, buf, src.ubo); memcpy(push_cpu, (uint8_t *) mapped_ubo + src.offset, src.size * 4); diff --git a/src/panfrost/bifrost/bi_opt_push_ubo.c b/src/panfrost/bifrost/bi_opt_push_ubo.c index b377171e980d..57c4e576792f 100644 --- a/src/panfrost/bifrost/bi_opt_push_ubo.c +++ b/src/panfrost/bifrost/bi_opt_push_ubo.c @@ -142,6 +142,8 @@ bi_opt_push_ubo(bi_context *ctx) struct bi_ubo_analysis analysis = bi_analyze_ranges(ctx); bi_pick_ubo(&ctx->info->push, &analysis, sysval_ubo); + pan_set_sysval_push(ctx->info, sysval_ubo); + ctx->ubo_mask = 0; bi_foreach_instr_global_safe(ctx, ins) { diff --git a/src/panfrost/midgard/midgard_ra.c b/src/panfrost/midgard/midgard_ra.c index c6d2c7a6153a..6f0fa70d783d 100644 --- a/src/panfrost/midgard/midgard_ra.c +++ b/src/panfrost/midgard/midgard_ra.c @@ -1071,6 +1071,12 @@ mir_demote_uniforms(compiler_context *ctx, unsigned new_cutoff) } ctx->info->push.count = MIN2(ctx->info->push.count, new_cutoff * 4); + + unsigned push_sysvals = ctx->info->sysvals.push_count; + if (push_sysvals > new_cutoff) { + ctx->info->sysvals.ubo_count += push_sysvals - new_cutoff; + ctx->info->sysvals.push_count = new_cutoff; + } } /* Run register allocation in a loop, spilling until we succeed */ diff --git a/src/panfrost/midgard/mir_promote_uniforms.c b/src/panfrost/midgard/mir_promote_uniforms.c index 5791b6ff998f..7fd1a08b37b8 100644 --- a/src/panfrost/midgard/mir_promote_uniforms.c +++ b/src/panfrost/midgard/mir_promote_uniforms.c @@ -286,6 +286,8 @@ midgard_promote_uniforms(compiler_context *ctx) mir_pick_ubo(&ctx->info->push, &analysis, promoted_count, sysval_ubo); ctx->info->push.count = ALIGN_POT(ctx->info->push.count, 4); + pan_set_sysval_push(ctx->info, sysval_ubo); + /* First, figure out special indices a priori so we don't recompute a lot */ BITSET_WORD *special = mir_special_indices(ctx); diff --git a/src/panfrost/util/pan_ir.c b/src/panfrost/util/pan_ir.c index 10e40665afdf..15dae46135bb 100644 --- a/src/panfrost/util/pan_ir.c +++ b/src/panfrost/util/pan_ir.c @@ -202,3 +202,20 @@ pan_index_pushed_ubo(struct panfrost_ubo_push *push, unsigned push_word) unreachable("Invalid panfrost_ubo_push state"); } + +void +pan_set_sysval_push(struct pan_shader_info *info, unsigned sysval_ubo) +{ + if (info->push.num_ranges && + info->push.ranges[0].ubo == sysval_ubo) { + + unsigned pushed_sysvals = info->push.ranges[0].size / 4; + + info->sysvals.ubo_count -= pushed_sysvals; + info->sysvals.push_count += pushed_sysvals; + + /* The sysval upload code can only handle a single range */ + assert(!(info->push.num_ranges > 1 && + info->push.ranges[1].ubo == sysval_ubo)); + } +} diff --git a/src/panfrost/util/pan_ir.h b/src/panfrost/util/pan_ir.h index 21e59adbd87b..ef425e4c0d03 100644 --- a/src/panfrost/util/pan_ir.h +++ b/src/panfrost/util/pan_ir.h @@ -350,6 +350,8 @@ void pan_print_alu_type(nir_alu_type t, FILE *fp); bool pan_has_source_mod(nir_alu_src *src, nir_op op); bool pan_has_dest_mod(nir_dest **dest, nir_op op); +void pan_set_sysval_push(struct pan_shader_info *info, unsigned sysval_ubo); + /* NIR passes to do some backend-specific lowering */ #define PAN_WRITEOUT_C 1 -- GitLab From 643dc8f4636ee414bdf941f7cb7250a205b0d47a Mon Sep 17 00:00:00 2001 From: Icecream95 Date: Tue, 11 May 2021 23:03:35 +1200 Subject: [PATCH 18/18] pan/mdg: Truncate the push ranges when demoting uniforms Avoid uploading unused push constants, by finding the last uniform still pushed and truncating the push ranges there. --- src/panfrost/midgard/midgard_ra.c | 14 +++++++++++++- 1 file changed, 13 insertions(+), 1 deletion(-) diff --git a/src/panfrost/midgard/midgard_ra.c b/src/panfrost/midgard/midgard_ra.c index 6f0fa70d783d..e4e706d647a2 100644 --- a/src/panfrost/midgard/midgard_ra.c +++ b/src/panfrost/midgard/midgard_ra.c @@ -1070,7 +1070,19 @@ mir_demote_uniforms(compiler_context *ctx, unsigned new_cutoff) } } - ctx->info->push.count = MIN2(ctx->info->push.count, new_cutoff * 4); + unsigned new_push_count = new_cutoff * 4; + if (new_push_count && new_push_count < ctx->info->push.count) { + struct panfrost_ubo_range last_pushed = + pan_index_pushed_ubo(&ctx->info->push, new_push_count - 1); + + ctx->info->push.num_ranges = last_pushed.index + 1; + + ctx->info->push.ranges[last_pushed.index].size = + (last_pushed.offset - + ctx->info->push.ranges[last_pushed.offset].offset) / 4 + 1; + } + + ctx->info->push.count = MIN2(ctx->info->push.count, new_push_count); unsigned push_sysvals = ctx->info->sysvals.push_count; if (push_sysvals > new_cutoff) { -- GitLab