From 52413a93afe408fe7841b641fcfa2ac9cae4c349 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Timur=20Krist=C3=B3f?= Date: Tue, 28 Sep 2021 16:21:42 +0200 Subject: [PATCH] radv: Enable NGG culling by default on GFX10.3, add nonggc debug flag. MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit This commit enables NGG culling on all GFX10.3 GPUs by default. A new debug flag environment variable RADV_DEBUG=nonggc is added to disable this feature on GPUs where it is enabled by default. The previous perf test flag RADV_PERFTEST=nggc will not be needed on GFX10.3 anymore but it can still be used to enable the feature on GPUs where it isn't on by default. Totals from 58239 (45.27% of 128647) affected shaders: VGPRs: 1989752 -> 2049408 (+3.00%); split: -3.21%, +6.21% SpillSGPRs: 675 -> 883 (+30.81%); split: -78.07%, +108.89% CodeSize: 72205968 -> 153572764 (+112.69%) LDS: 0 -> 227125248 (+inf%) MaxWaves: 1614598 -> 1646934 (+2.00%); split: +3.08%, -1.08% Instrs: 14202239 -> 29654042 (+108.80%) Latency: 87986508 -> 136960419 (+55.66%); split: -0.23%, +55.89% InvThroughput: 14444832 -> 21141875 (+46.36%); split: -0.01%, +46.37% VClause: 340794 -> 493067 (+44.68%); split: -1.33%, +46.01% SClause: 520983 -> 738636 (+41.78%); split: -0.25%, +42.03% Copies: 775639 -> 2787382 (+259.37%) Branches: 296911 -> 1225431 (+312.73%) PreSGPRs: 1316896 -> 2057270 (+56.22%); split: -0.14%, +56.36% PreVGPRs: 1473558 -> 1658432 (+12.55%); split: -1.44%, +13.99% Signed-off-by: Timur Kristóf Reviewed-by: Samuel Pitoiset Part-of: --- docs/envvars.rst | 4 +++- docs/relnotes/new_features.txt | 1 + src/amd/vulkan/radv_cmd_buffer.c | 2 +- src/amd/vulkan/radv_debug.h | 1 + src/amd/vulkan/radv_device.c | 8 ++++++++ src/amd/vulkan/radv_pipeline.c | 4 ++-- src/amd/vulkan/radv_private.h | 5 ++++- src/amd/vulkan/radv_shader.c | 9 ++------- 8 files changed, 22 insertions(+), 12 deletions(-) diff --git a/docs/envvars.rst b/docs/envvars.rst index be9096505edd..0a0a8e5877ab 100644 --- a/docs/envvars.rst +++ b/docs/envvars.rst @@ -657,6 +657,8 @@ RADV driver environment variables disable memory shaders cache ``nongg`` disable NGG for GFX10+ + ``nonggc`` + disable NGG culling on GPUs where it's enabled by default (GFX10.3+ only). ``nooutoforder`` disable out-of-order rasterization ``notccompatcmask`` @@ -712,7 +714,7 @@ RADV driver environment variables ``pswave32`` enable wave32 for pixel shaders (GFX10+) ``nggc`` - enable NGG culling on GFX10+ GPUs. + enable NGG culling on GPUs where it's not enabled by default (GFX10.1 only). ``rt`` enable rt extensions whose implementation is still experimental. ``sam`` diff --git a/docs/relnotes/new_features.txt b/docs/relnotes/new_features.txt index 3f0d07e81af5..cb5e9be7ec12 100644 --- a/docs/relnotes/new_features.txt +++ b/docs/relnotes/new_features.txt @@ -16,3 +16,4 @@ VK_KHR_shader_subgroup_extended_types on lavapipe VK_KHR_spirv_1_4 on lavapipe Experimental raytracing support on RADV VK_KHR_synchronization2 on Intel +NGG shader based culling is now enabled by default on GFX10.3 on RADV. \ No newline at end of file diff --git a/src/amd/vulkan/radv_cmd_buffer.c b/src/amd/vulkan/radv_cmd_buffer.c index 7027487b8e8d..ddc73bcd8a82 100644 --- a/src/amd/vulkan/radv_cmd_buffer.c +++ b/src/amd/vulkan/radv_cmd_buffer.c @@ -5927,7 +5927,7 @@ radv_emit_all_graphics_states(struct radv_cmd_buffer *cmd_buffer, const struct r cmd_buffer->state.emitted_pipeline != cmd_buffer->state.pipeline) radv_emit_rbplus_state(cmd_buffer); - if ((cmd_buffer->device->instance->perftest_flags & RADV_PERFTEST_NGGC) && + if (cmd_buffer->device->physical_device->use_ngg_culling && cmd_buffer->state.pipeline->graphics.is_ngg) radv_emit_ngg_culling_state(cmd_buffer, info); diff --git a/src/amd/vulkan/radv_debug.h b/src/amd/vulkan/radv_debug.h index 19dc22751461..0bfdd4889ceb 100644 --- a/src/amd/vulkan/radv_debug.h +++ b/src/amd/vulkan/radv_debug.h @@ -62,6 +62,7 @@ enum { RADV_DEBUG_NO_TC_COMPAT_CMASK = 1ull << 31, RADV_DEBUG_NO_VRS_FLAT_SHADING = 1ull << 32, RADV_DEBUG_NO_ATOC_DITHERING = 1ull << 33, + RADV_DEBUG_NO_NGGC = 1ull << 34, }; enum { diff --git a/src/amd/vulkan/radv_device.c b/src/amd/vulkan/radv_device.c index a1ff738fa1f2..362445be5aa4 100644 --- a/src/amd/vulkan/radv_device.c +++ b/src/amd/vulkan/radv_device.c @@ -705,6 +705,13 @@ radv_physical_device_try_create(struct radv_instance *instance, drmDevicePtr drm device->rad_info.family != CHIP_NAVI14 && !(device->instance->debug_flags & RADV_DEBUG_NO_NGG); + device->use_ngg_culling = + device->use_ngg && + device->rad_info.max_render_backends > 1 && + (device->rad_info.chip_class >= GFX10_3 || + (device->instance->perftest_flags & RADV_PERFTEST_NGGC)) && + !(device->instance->debug_flags & RADV_DEBUG_NO_NGGC); + device->use_ngg_streamout = false; /* Determine the number of threads per wave for all stages. */ @@ -841,6 +848,7 @@ static const struct debug_control radv_debug_options[] = { {"notccompatcmask", RADV_DEBUG_NO_TC_COMPAT_CMASK}, {"novrsflatshading", RADV_DEBUG_NO_VRS_FLAT_SHADING}, {"noatocdithering", RADV_DEBUG_NO_ATOC_DITHERING}, + {"nonggc", RADV_DEBUG_NO_NGGC}, {NULL, 0}}; const char * diff --git a/src/amd/vulkan/radv_pipeline.c b/src/amd/vulkan/radv_pipeline.c index 69510fdbf537..581a77fa808d 100644 --- a/src/amd/vulkan/radv_pipeline.c +++ b/src/amd/vulkan/radv_pipeline.c @@ -217,8 +217,8 @@ radv_get_hash_flags(const struct radv_device *device, bool stats) { uint32_t hash_flags = 0; - if (device->instance->perftest_flags & RADV_PERFTEST_NGGC) - hash_flags |= RADV_HASH_SHADER_FORCE_NGG_CULLING; + if (device->physical_device->use_ngg_culling) + hash_flags |= RADV_HASH_SHADER_USE_NGG_CULLING; if (device->instance->perftest_flags & RADV_PERFTEST_FORCE_EMULATE_RT) hash_flags |= RADV_HASH_SHADER_FORCE_EMULATE_RT; if (device->physical_device->cs_wave_size == 32) diff --git a/src/amd/vulkan/radv_private.h b/src/amd/vulkan/radv_private.h index 678d9d553dd5..f49d59ca8df5 100644 --- a/src/amd/vulkan/radv_private.h +++ b/src/amd/vulkan/radv_private.h @@ -262,6 +262,9 @@ struct radv_physical_device { /* Whether to enable NGG. */ bool use_ngg; + /* Whether to enable NGG culling. */ + bool use_ngg_culling; + /* Whether to enable NGG streamout. */ bool use_ngg_streamout; @@ -1649,7 +1652,7 @@ struct radv_event { #define RADV_HASH_SHADER_GE_WAVE32 (1 << 3) #define RADV_HASH_SHADER_LLVM (1 << 4) #define RADV_HASH_SHADER_KEEP_STATISTICS (1 << 8) -#define RADV_HASH_SHADER_FORCE_NGG_CULLING (1 << 13) +#define RADV_HASH_SHADER_USE_NGG_CULLING (1 << 13) #define RADV_HASH_SHADER_ROBUST_BUFFER_ACCESS (1 << 14) #define RADV_HASH_SHADER_ROBUST_BUFFER_ACCESS2 (1 << 15) #define RADV_HASH_SHADER_FORCE_EMULATE_RT (1 << 16) diff --git a/src/amd/vulkan/radv_shader.c b/src/amd/vulkan/radv_shader.c index 6c37a72c44b3..3069f2d02db1 100644 --- a/src/amd/vulkan/radv_shader.c +++ b/src/amd/vulkan/radv_shader.c @@ -899,10 +899,7 @@ radv_consider_culling(struct radv_device *device, struct nir_shader *nir, if (nir->info.outputs_written & (VARYING_BIT_VIEWPORT | VARYING_BIT_VIEWPORT_MASK)) return false; - /* TODO: enable by default on GFX10.3 when we're confident about performance. */ - bool culling_enabled = device->instance->perftest_flags & RADV_PERFTEST_NGGC; - - if (!culling_enabled) + if (!device->physical_device->use_ngg_culling) return false; /* Shader based culling efficiency can depend on PS throughput. @@ -912,9 +909,7 @@ radv_consider_culling(struct radv_device *device, struct nir_shader *nir, unsigned max_render_backends = device->physical_device->rad_info.max_render_backends; unsigned max_se = device->physical_device->rad_info.max_se; - if (max_render_backends < 2) - return false; /* Don't use NGG culling on 1 RB chips. */ - else if (max_render_backends / max_se == 4) + if (max_render_backends / max_se == 4) max_ps_params = 6; /* Sienna Cichlid and other GFX10.3 dGPUs. */ else max_ps_params = 4; /* Navi 1x. */ -- GitLab