Commit 4e9915b5 authored by Marek Olšák's avatar Marek Olšák
Browse files

radeonsi/gfx11: enable NGG-only draw paths

parent 715d6f1c
This commit is part of merge request !16328. Comments created here will be created in the context of that merge request.
......@@ -103,7 +103,7 @@ radeonsi_include_dirs = [inc_src, inc_include, inc_gallium, inc_gallium_aux, inc
radeonsi_deps = [dep_llvm, dep_clock, dep_libdrm_radeon, idep_nir_headers, idep_amdgfxregs_h, idep_mesautil]
radeonsi_gfx_libs = []
foreach ver : ['6', '7', '8', '9', '10', '103']
foreach ver : ['6', '7', '8', '9', '10', '103', '11']
radeonsi_gfx_libs += static_library(
'radeonsi_gfx@0@'.format(ver),
['si_state_draw.cpp'],
......
......@@ -632,6 +632,9 @@ static struct pipe_context *si_create_context(struct pipe_screen *screen, unsign
case GFX10_3:
si_init_draw_functions_GFX10_3(sctx);
break;
case GFX11:
si_init_draw_functions_GFX11(sctx);
break;
default:
unreachable("unhandled chip class");
}
......@@ -1270,15 +1273,23 @@ static struct pipe_screen *radeonsi_screen_create_impl(struct radeon_winsys *ws,
sscreen->has_out_of_order_rast =
sscreen->info.has_out_of_order_rast && !(sscreen->debug_flags & DBG(NO_OUT_OF_ORDER));
sscreen->use_ngg = !(sscreen->debug_flags & DBG(NO_NGG)) &&
sscreen->info.chip_class >= GFX10 &&
(sscreen->info.family != CHIP_NAVI14 ||
sscreen->info.is_pro_graphics);
sscreen->use_ngg_culling = sscreen->use_ngg &&
sscreen->info.max_render_backends >= 2 &&
!((sscreen->debug_flags & DBG(NO_NGG_CULLING)) ||
LLVM_VERSION_MAJOR <= 11 /* hangs on 11, see #4874 */);
sscreen->use_ngg_streamout = false;
if (sscreen->info.chip_class >= GFX11) {
sscreen->use_ngg = true;
sscreen->use_ngg_streamout = true;
/* TODO: Disable for now. Investigate if it helps. */
sscreen->use_ngg_culling = (sscreen->debug_flags & DBG(ALWAYS_NGG_CULLING_ALL)) &&
!(sscreen->debug_flags & DBG(NO_NGG_CULLING));
} else {
sscreen->use_ngg = !(sscreen->debug_flags & DBG(NO_NGG)) &&
sscreen->info.chip_class >= GFX10 &&
(sscreen->info.family != CHIP_NAVI14 ||
sscreen->info.is_pro_graphics);
sscreen->use_ngg_streamout = false;
sscreen->use_ngg_culling = sscreen->use_ngg &&
sscreen->info.max_render_backends >= 2 &&
!(sscreen->debug_flags & DBG(NO_NGG_CULLING)) &&
LLVM_VERSION_MAJOR >= 12; /* hangs on 11, see #4874 */
}
/* Only set this for the cases that are known to work, which are:
* - GFX9 if bpp >= 4 (in bytes)
......
......@@ -601,6 +601,7 @@ void si_init_draw_functions_GFX8(struct si_context *sctx);
void si_init_draw_functions_GFX9(struct si_context *sctx);
void si_init_draw_functions_GFX10(struct si_context *sctx);
void si_init_draw_functions_GFX10_3(struct si_context *sctx);
void si_init_draw_functions_GFX11(struct si_context *sctx);
void si_init_spi_map_functions(struct si_context *sctx);
/* si_state_msaa.c */
......
......@@ -42,6 +42,8 @@
#define GFX(name) name##GFX10
#elif (GFX_VER == 103)
#define GFX(name) name##GFX10_3
#elif (GFX_VER == 11)
#define GFX(name) name##GFX11
#else
#error "Unknown gfx version"
#endif
......@@ -425,8 +427,26 @@ static void si_prefetch_shaders(struct si_context *sctx)
return;
/* Prefetch shaders and VBO descriptors to TC L2. */
if (GFX_VERSION >= GFX9) {
/* Choose the right spot for the VBO prefetch. */
if (GFX_VERSION >= GFX11) {
if (HAS_TESS) {
if (mode != PREFETCH_AFTER_DRAW) {
if (mask & SI_PREFETCH_HS)
si_prefetch_shader_async(sctx, sctx->queued.named.hs);
if (mode == PREFETCH_BEFORE_DRAW)
return;
}
if (mask & SI_PREFETCH_GS)
si_prefetch_shader_async(sctx, sctx->queued.named.gs);
} else if (mode != PREFETCH_AFTER_DRAW) {
if (mask & SI_PREFETCH_GS)
si_prefetch_shader_async(sctx, sctx->queued.named.gs);
if (mode == PREFETCH_BEFORE_DRAW)
return;
}
} else if (GFX_VERSION >= GFX9) {
if (HAS_TESS) {
if (mode != PREFETCH_AFTER_DRAW) {
if (mask & SI_PREFETCH_HS)
......@@ -1735,6 +1755,9 @@ void si_set_vertex_buffer_descriptor(struct si_screen *sscreen, struct si_vertex
case GFX10_3:
si_set_vb_descriptor<GFX10_3>(velems, vb, element_index, out);
break;
case GFX11:
si_set_vb_descriptor<GFX11>(velems, vb, element_index, out);
break;
default:
unreachable("unhandled chip class");
}
......@@ -2572,6 +2595,9 @@ static void si_init_draw_vbo(struct si_context *sctx)
if (NGG && GFX_VERSION < GFX10)
return;
if (!NGG && GFX_VERSION >= GFX11)
return;
sctx->draw_vbo[HAS_TESS][HAS_GS][NGG] =
si_draw_vbo<GFX_VERSION, HAS_TESS, HAS_GS, NGG>;
......
......@@ -999,6 +999,8 @@ static void si_shader_gs(struct si_screen *sscreen, struct si_shader *shader)
unsigned max_stream = util_last_bit(sel->info.base.gs.active_stream_mask);
unsigned offset;
assert(sscreen->info.chip_class < GFX11); /* gfx11 doesn't have the legacy pipeline */
pm4 = si_get_shader_pm4_state(shader);
if (!pm4)
return;
......@@ -3069,6 +3071,7 @@ static void *si_create_shader_selector(struct pipe_context *ctx,
* - LDS usage is too high
*/
sel->tess_turns_off_ngg = sscreen->info.chip_class >= GFX10 &&
sscreen->info.chip_class <= GFX10_3 &&
(sel->info.base.gs.invocations * sel->info.base.gs.vertices_out > 256 ||
sel->info.base.gs.invocations * sel->info.base.gs.vertices_out *
(sel->info.num_outputs * 4 + 1) > 6500 /* max dw per GS primitive */);
......
Supports Markdown
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment