From e8a0409d01e94ea03ae1b994e14e2c90bb236238 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Timur=20Krist=C3=B3f?= <timur.kristof@gmail.com> Date: Fri, 2 Oct 2020 14:31:40 +0200 Subject: [PATCH] aco/ngg: Use more efficient LDS layout to help reduce bank conflicts. MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The LLVM backend has a trick which helps reduce LDS bank conflicts by swizzling the LDS address where each vertex is emitted. This commit implements the same thing for ACO. Signed-off-by: Timur Kristóf <timur.kristof@gmail.com> Reviewed-by: Rhys Perry <pendingchaos02@gmail.com> Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/6964> --- src/amd/compiler/aco_instruction_selection.cpp | 9 +++++++++ 1 file changed, 9 insertions(+) diff --git a/src/amd/compiler/aco_instruction_selection.cpp b/src/amd/compiler/aco_instruction_selection.cpp index 7af0e5a8fc334..f34b5691a781a 100644 --- a/src/amd/compiler/aco_instruction_selection.cpp +++ b/src/amd/compiler/aco_instruction_selection.cpp @@ -3904,6 +3904,15 @@ Temp wave_count_in_threadgroup(isel_context *ctx) Temp ngg_gs_vertex_lds_addr(isel_context *ctx, Temp vertex_idx) { Builder bld(ctx->program, ctx->block); + unsigned write_stride_2exp = ffs(ctx->shader->info.gs.vertices_out) - 1; + + /* gs_max_out_vertices = 2^(write_stride_2exp) * some odd number */ + if (write_stride_2exp) { + Temp row = bld.vop2(aco_opcode::v_lshrrev_b32, bld.def(v1), Operand(5u), vertex_idx); + Temp swizzle = bld.vop2(aco_opcode::v_and_b32, bld.def(v1), Operand((1u << write_stride_2exp) - 1), row); + vertex_idx = bld.vop2(aco_opcode::v_xor_b32, bld.def(v1), vertex_idx, swizzle); + } + Temp vertex_idx_bytes = bld.v_mul24_imm(bld.def(v1), vertex_idx, ctx->ngg_gs_emit_vtx_bytes); return bld.vadd32(bld.def(v1), vertex_idx_bytes, Operand(ctx->ngg_gs_emit_addr)); } -- GitLab