diff --git a/src/amd/compiler/aco_instruction_selection.cpp b/src/amd/compiler/aco_instruction_selection.cpp index f613184ba872fe33e28d72af924bb1692eabbb2f..52371f08ff30c7ee31202a27a7dd7f842b8b83c7 100644 --- a/src/amd/compiler/aco_instruction_selection.cpp +++ b/src/amd/compiler/aco_instruction_selection.cpp @@ -3894,6 +3894,28 @@ Temp thread_id_in_threadgroup(isel_context *ctx) return bld.vadd32(bld.def(v1), Operand(num_pre_threads), Operand(tid_in_wave)); } +Temp ngg_gs_vertex_lds_addr(isel_context *ctx, Temp vertex_idx) +{ + Builder bld(ctx->program, ctx->block); + Temp vertex_idx_bytes = bld.v_mul24_imm(bld.def(v1), vertex_idx, ctx->ngg_gs_emit_vtx_bytes); + return bld.vadd32(bld.def(v1), vertex_idx_bytes, Operand(ctx->ngg_gs_emit_addr)); +} + +Temp ngg_gs_emit_vertex_lds_addr(isel_context *ctx, Temp emit_vertex_idx) +{ + /* Should be used by GS threads only (not by the NGG GS epilogue). + * Returns the LDS address of the given vertex index as emitted by the current GS thread. + */ + + Builder bld(ctx->program, ctx->block); + + Temp thread_id_in_tg = thread_id_in_threadgroup(ctx); + Temp thread_vertices_addr = bld.v_mul24_imm(bld.def(v1), thread_id_in_tg, ctx->shader->info.gs.vertices_out); + Temp vertex_idx = bld.vadd32(bld.def(v1), thread_vertices_addr, emit_vertex_idx); + + return ngg_gs_vertex_lds_addr(ctx, vertex_idx); +} + std::pair<Temp, unsigned> offset_add_from_nir(isel_context *ctx, const std::pair<Temp, unsigned> &base_offset, nir_src *off_src, unsigned stride = 1u) { Builder bld(ctx->program, ctx->block); diff --git a/src/amd/compiler/aco_instruction_selection.h b/src/amd/compiler/aco_instruction_selection.h index 4e6a6b75d95a36abd161d262b5d36a0bbea0638a..69ef7809405c8144f4f60dd2978cbfbabaa68d92 100644 --- a/src/amd/compiler/aco_instruction_selection.h +++ b/src/amd/compiler/aco_instruction_selection.h @@ -95,6 +95,10 @@ struct isel_context { /* GS inputs */ bool ngg_nogs_early_prim_export = false; Temp gs_wave_id; + unsigned ngg_gs_emit_addr = 0; + unsigned ngg_gs_emit_vtx_bytes = 0; + unsigned ngg_gs_scratch_addr = 0; + unsigned ngg_gs_primflags_offset = 0; /* VS output information */ bool export_clip_dists; diff --git a/src/amd/compiler/aco_instruction_selection_setup.cpp b/src/amd/compiler/aco_instruction_selection_setup.cpp index f8e329fcadfc1afc89925f825780d2dc1185016f..a3030346c4e77e83e49a304ab9d33f44c0c53657 100644 --- a/src/amd/compiler/aco_instruction_selection_setup.cpp +++ b/src/amd/compiler/aco_instruction_selection_setup.cpp @@ -501,6 +501,21 @@ void setup_gs_variables(isel_context *ctx, nir_shader *nir) radv_vs_output_info *outinfo = &ctx->program->info->vs.outinfo; setup_vs_output_info(ctx, nir, false, ctx->options->key.vs_common_out.export_clip_dists, outinfo); + + unsigned ngg_gs_scratch_bytes = ctx->args->shader_info->so.num_outputs ? (44u * 4u) : (8u * 4u); + unsigned ngg_emit_bytes = ctx->args->shader_info->ngg_info.ngg_emit_size * 4u; + unsigned esgs_ring_bytes = ctx->args->shader_info->ngg_info.esgs_ring_size; + + ctx->ngg_gs_primflags_offset = ctx->args->shader_info->gs.gsvs_vertex_size; + ctx->ngg_gs_emit_vtx_bytes = ctx->ngg_gs_primflags_offset + 4u; + ctx->ngg_gs_emit_addr = esgs_ring_bytes; + ctx->ngg_gs_scratch_addr = ctx->ngg_gs_emit_addr + ngg_emit_bytes; + + unsigned total_lds_bytes = esgs_ring_bytes + ngg_emit_bytes + ngg_gs_scratch_bytes; + ctx->program->config->lds_size = (total_lds_bytes + ctx->program->lds_alloc_granule - 1) / ctx->program->lds_alloc_granule; + + /* Make sure we have enough room for emitted GS vertices */ + assert((ngg_emit_bytes % (ctx->ngg_gs_emit_vtx_bytes * nir->info.gs.vertices_out)) == 0); } if (ctx->stage & sw_vs)