diff --git a/src/amd/compiler/aco_instruction_selection.cpp b/src/amd/compiler/aco_instruction_selection.cpp
index f613184ba872fe33e28d72af924bb1692eabbb2f..52371f08ff30c7ee31202a27a7dd7f842b8b83c7 100644
--- a/src/amd/compiler/aco_instruction_selection.cpp
+++ b/src/amd/compiler/aco_instruction_selection.cpp
@@ -3894,6 +3894,28 @@ Temp thread_id_in_threadgroup(isel_context *ctx)
    return bld.vadd32(bld.def(v1), Operand(num_pre_threads), Operand(tid_in_wave));
 }
 
+Temp ngg_gs_vertex_lds_addr(isel_context *ctx, Temp vertex_idx)
+{
+   Builder bld(ctx->program, ctx->block);
+   Temp vertex_idx_bytes = bld.v_mul24_imm(bld.def(v1), vertex_idx, ctx->ngg_gs_emit_vtx_bytes);
+   return bld.vadd32(bld.def(v1), vertex_idx_bytes, Operand(ctx->ngg_gs_emit_addr));
+}
+
+Temp ngg_gs_emit_vertex_lds_addr(isel_context *ctx, Temp emit_vertex_idx)
+{
+   /* Should be used by GS threads only (not by the NGG GS epilogue).
+    * Returns the LDS address of the given vertex index as emitted by the current GS thread.
+    */
+
+   Builder bld(ctx->program, ctx->block);
+
+   Temp thread_id_in_tg = thread_id_in_threadgroup(ctx);
+   Temp thread_vertices_addr = bld.v_mul24_imm(bld.def(v1), thread_id_in_tg, ctx->shader->info.gs.vertices_out);
+   Temp vertex_idx = bld.vadd32(bld.def(v1), thread_vertices_addr, emit_vertex_idx);
+
+   return ngg_gs_vertex_lds_addr(ctx, vertex_idx);
+}
+
 std::pair<Temp, unsigned> offset_add_from_nir(isel_context *ctx, const std::pair<Temp, unsigned> &base_offset, nir_src *off_src, unsigned stride = 1u)
 {
    Builder bld(ctx->program, ctx->block);
diff --git a/src/amd/compiler/aco_instruction_selection.h b/src/amd/compiler/aco_instruction_selection.h
index 4e6a6b75d95a36abd161d262b5d36a0bbea0638a..69ef7809405c8144f4f60dd2978cbfbabaa68d92 100644
--- a/src/amd/compiler/aco_instruction_selection.h
+++ b/src/amd/compiler/aco_instruction_selection.h
@@ -95,6 +95,10 @@ struct isel_context {
    /* GS inputs */
    bool ngg_nogs_early_prim_export = false;
    Temp gs_wave_id;
+   unsigned ngg_gs_emit_addr = 0;
+   unsigned ngg_gs_emit_vtx_bytes = 0;
+   unsigned ngg_gs_scratch_addr = 0;
+   unsigned ngg_gs_primflags_offset = 0;
 
    /* VS output information */
    bool export_clip_dists;
diff --git a/src/amd/compiler/aco_instruction_selection_setup.cpp b/src/amd/compiler/aco_instruction_selection_setup.cpp
index f8e329fcadfc1afc89925f825780d2dc1185016f..a3030346c4e77e83e49a304ab9d33f44c0c53657 100644
--- a/src/amd/compiler/aco_instruction_selection_setup.cpp
+++ b/src/amd/compiler/aco_instruction_selection_setup.cpp
@@ -501,6 +501,21 @@ void setup_gs_variables(isel_context *ctx, nir_shader *nir)
       radv_vs_output_info *outinfo = &ctx->program->info->vs.outinfo;
       setup_vs_output_info(ctx, nir, false,
                            ctx->options->key.vs_common_out.export_clip_dists, outinfo);
+
+      unsigned ngg_gs_scratch_bytes = ctx->args->shader_info->so.num_outputs ? (44u * 4u) : (8u * 4u);
+      unsigned ngg_emit_bytes = ctx->args->shader_info->ngg_info.ngg_emit_size * 4u;
+      unsigned esgs_ring_bytes = ctx->args->shader_info->ngg_info.esgs_ring_size;
+
+      ctx->ngg_gs_primflags_offset = ctx->args->shader_info->gs.gsvs_vertex_size;
+      ctx->ngg_gs_emit_vtx_bytes = ctx->ngg_gs_primflags_offset + 4u;
+      ctx->ngg_gs_emit_addr = esgs_ring_bytes;
+      ctx->ngg_gs_scratch_addr = ctx->ngg_gs_emit_addr + ngg_emit_bytes;
+
+      unsigned total_lds_bytes = esgs_ring_bytes + ngg_emit_bytes + ngg_gs_scratch_bytes;
+      ctx->program->config->lds_size = (total_lds_bytes + ctx->program->lds_alloc_granule - 1) / ctx->program->lds_alloc_granule;
+
+      /* Make sure we have enough room for emitted GS vertices */
+      assert((ngg_emit_bytes % (ctx->ngg_gs_emit_vtx_bytes * nir->info.gs.vertices_out)) == 0);
    }
 
    if (ctx->stage & sw_vs)