diff --git a/docs/envvars.rst b/docs/envvars.rst index be9096505eddfea82ec5b54df2a5a52741255071..0a0a8e5877abbf17727cfe06489a0ef4e7763196 100644 --- a/docs/envvars.rst +++ b/docs/envvars.rst @@ -657,6 +657,8 @@ RADV driver environment variables disable memory shaders cache ``nongg`` disable NGG for GFX10+ + ``nonggc`` + disable NGG culling on GPUs where it's enabled by default (GFX10.3+ only). ``nooutoforder`` disable out-of-order rasterization ``notccompatcmask`` @@ -712,7 +714,7 @@ RADV driver environment variables ``pswave32`` enable wave32 for pixel shaders (GFX10+) ``nggc`` - enable NGG culling on GFX10+ GPUs. + enable NGG culling on GPUs where it's not enabled by default (GFX10.1 only). ``rt`` enable rt extensions whose implementation is still experimental. ``sam`` diff --git a/docs/relnotes/new_features.txt b/docs/relnotes/new_features.txt index 3f0d07e81af51dc020f16d369d4cfc9408b439fb..cb5e9be7ec12b833378bb1dc7b929640b010a351 100644 --- a/docs/relnotes/new_features.txt +++ b/docs/relnotes/new_features.txt @@ -16,3 +16,4 @@ VK_KHR_shader_subgroup_extended_types on lavapipe VK_KHR_spirv_1_4 on lavapipe Experimental raytracing support on RADV VK_KHR_synchronization2 on Intel +NGG shader based culling is now enabled by default on GFX10.3 on RADV. \ No newline at end of file diff --git a/src/amd/vulkan/radv_cmd_buffer.c b/src/amd/vulkan/radv_cmd_buffer.c index 7027487b8e8d3a79a3f3b7576feaa0d8aaa4e9ad..ddc73bcd8a82aeb82a798d99009f4ed562bc3efa 100644 --- a/src/amd/vulkan/radv_cmd_buffer.c +++ b/src/amd/vulkan/radv_cmd_buffer.c @@ -5927,7 +5927,7 @@ radv_emit_all_graphics_states(struct radv_cmd_buffer *cmd_buffer, const struct r cmd_buffer->state.emitted_pipeline != cmd_buffer->state.pipeline) radv_emit_rbplus_state(cmd_buffer); - if ((cmd_buffer->device->instance->perftest_flags & RADV_PERFTEST_NGGC) && + if (cmd_buffer->device->physical_device->use_ngg_culling && cmd_buffer->state.pipeline->graphics.is_ngg) radv_emit_ngg_culling_state(cmd_buffer, info); diff --git a/src/amd/vulkan/radv_debug.h b/src/amd/vulkan/radv_debug.h index 19dc22751461ef4b69c50ee4b8cc6fb475eab4f3..0bfdd4889ceb105179d91c55c03e8e48d7845690 100644 --- a/src/amd/vulkan/radv_debug.h +++ b/src/amd/vulkan/radv_debug.h @@ -62,6 +62,7 @@ enum { RADV_DEBUG_NO_TC_COMPAT_CMASK = 1ull << 31, RADV_DEBUG_NO_VRS_FLAT_SHADING = 1ull << 32, RADV_DEBUG_NO_ATOC_DITHERING = 1ull << 33, + RADV_DEBUG_NO_NGGC = 1ull << 34, }; enum { diff --git a/src/amd/vulkan/radv_device.c b/src/amd/vulkan/radv_device.c index a1ff738fa1f29aa4db97085e2f6f0d066306f8fe..362445be5aa41655a8b323c9d661b6f85de05f4d 100644 --- a/src/amd/vulkan/radv_device.c +++ b/src/amd/vulkan/radv_device.c @@ -705,6 +705,13 @@ radv_physical_device_try_create(struct radv_instance *instance, drmDevicePtr drm device->rad_info.family != CHIP_NAVI14 && !(device->instance->debug_flags & RADV_DEBUG_NO_NGG); + device->use_ngg_culling = + device->use_ngg && + device->rad_info.max_render_backends > 1 && + (device->rad_info.chip_class >= GFX10_3 || + (device->instance->perftest_flags & RADV_PERFTEST_NGGC)) && + !(device->instance->debug_flags & RADV_DEBUG_NO_NGGC); + device->use_ngg_streamout = false; /* Determine the number of threads per wave for all stages. */ @@ -841,6 +848,7 @@ static const struct debug_control radv_debug_options[] = { {"notccompatcmask", RADV_DEBUG_NO_TC_COMPAT_CMASK}, {"novrsflatshading", RADV_DEBUG_NO_VRS_FLAT_SHADING}, {"noatocdithering", RADV_DEBUG_NO_ATOC_DITHERING}, + {"nonggc", RADV_DEBUG_NO_NGGC}, {NULL, 0}}; const char * diff --git a/src/amd/vulkan/radv_pipeline.c b/src/amd/vulkan/radv_pipeline.c index 69510fdbf537487797cb657d5671ac139ad7d275..581a77fa808d86e75c25d0acf62c308b838056b5 100644 --- a/src/amd/vulkan/radv_pipeline.c +++ b/src/amd/vulkan/radv_pipeline.c @@ -217,8 +217,8 @@ radv_get_hash_flags(const struct radv_device *device, bool stats) { uint32_t hash_flags = 0; - if (device->instance->perftest_flags & RADV_PERFTEST_NGGC) - hash_flags |= RADV_HASH_SHADER_FORCE_NGG_CULLING; + if (device->physical_device->use_ngg_culling) + hash_flags |= RADV_HASH_SHADER_USE_NGG_CULLING; if (device->instance->perftest_flags & RADV_PERFTEST_FORCE_EMULATE_RT) hash_flags |= RADV_HASH_SHADER_FORCE_EMULATE_RT; if (device->physical_device->cs_wave_size == 32) diff --git a/src/amd/vulkan/radv_private.h b/src/amd/vulkan/radv_private.h index 678d9d553dd57cf09e6f08137f428d9a8927562d..f49d59ca8df50f8336011ddeb5f151d38e3b6b1a 100644 --- a/src/amd/vulkan/radv_private.h +++ b/src/amd/vulkan/radv_private.h @@ -262,6 +262,9 @@ struct radv_physical_device { /* Whether to enable NGG. */ bool use_ngg; + /* Whether to enable NGG culling. */ + bool use_ngg_culling; + /* Whether to enable NGG streamout. */ bool use_ngg_streamout; @@ -1649,7 +1652,7 @@ struct radv_event { #define RADV_HASH_SHADER_GE_WAVE32 (1 << 3) #define RADV_HASH_SHADER_LLVM (1 << 4) #define RADV_HASH_SHADER_KEEP_STATISTICS (1 << 8) -#define RADV_HASH_SHADER_FORCE_NGG_CULLING (1 << 13) +#define RADV_HASH_SHADER_USE_NGG_CULLING (1 << 13) #define RADV_HASH_SHADER_ROBUST_BUFFER_ACCESS (1 << 14) #define RADV_HASH_SHADER_ROBUST_BUFFER_ACCESS2 (1 << 15) #define RADV_HASH_SHADER_FORCE_EMULATE_RT (1 << 16) diff --git a/src/amd/vulkan/radv_shader.c b/src/amd/vulkan/radv_shader.c index 6c37a72c44b3a9cdfa9d8347fdd3152fa58c1210..3069f2d02db1de79bc1ec5db976e9da48f0b6c59 100644 --- a/src/amd/vulkan/radv_shader.c +++ b/src/amd/vulkan/radv_shader.c @@ -899,10 +899,7 @@ radv_consider_culling(struct radv_device *device, struct nir_shader *nir, if (nir->info.outputs_written & (VARYING_BIT_VIEWPORT | VARYING_BIT_VIEWPORT_MASK)) return false; - /* TODO: enable by default on GFX10.3 when we're confident about performance. */ - bool culling_enabled = device->instance->perftest_flags & RADV_PERFTEST_NGGC; - - if (!culling_enabled) + if (!device->physical_device->use_ngg_culling) return false; /* Shader based culling efficiency can depend on PS throughput. @@ -912,9 +909,7 @@ radv_consider_culling(struct radv_device *device, struct nir_shader *nir, unsigned max_render_backends = device->physical_device->rad_info.max_render_backends; unsigned max_se = device->physical_device->rad_info.max_se; - if (max_render_backends < 2) - return false; /* Don't use NGG culling on 1 RB chips. */ - else if (max_render_backends / max_se == 4) + if (max_render_backends / max_se == 4) max_ps_params = 6; /* Sienna Cichlid and other GFX10.3 dGPUs. */ else max_ps_params = 4; /* Navi 1x. */