Commit c923de68 authored by Samuel Pitoiset's avatar Samuel Pitoiset Committed by Marge Bot
Browse files

radv/gfx10: fix required ballot size with VK_EXT_subgroup_size_control

If compute shaders require a specific subgroup size (ie. Wave32),
we have to use the correct ballot size.

Fixes dEQP-VK.subgroups.ballot_other.compute.*_requiredsubgroupSize.

Fixes: fb07fd4e

 ("radv: implement VK_EXT_subgroup_size_control")
Signed-off-by: Samuel Pitoiset's avatarSamuel Pitoiset <samuel.pitoiset@gmail.com>
Reviewed-by: Bas Nieuwenhuizen's avatarBas Nieuwenhuizen <bas@basnieuwenhuizen.nl>
Tested-by: Marge Bot <mesa/mesa!4215>
Part-of: <mesa/mesa!4215>
parent 672d1061
...@@ -3925,7 +3925,8 @@ LLVMModuleRef ac_translate_nir_to_llvm(struct ac_llvm_compiler *ac_llvm, ...@@ -3925,7 +3925,8 @@ LLVMModuleRef ac_translate_nir_to_llvm(struct ac_llvm_compiler *ac_llvm,
ac_llvm_context_init(&ctx.ac, ac_llvm, args->options->chip_class, ac_llvm_context_init(&ctx.ac, ac_llvm, args->options->chip_class,
args->options->family, float_mode, args->options->family, float_mode,
args->shader_info->wave_size, 64); args->shader_info->wave_size,
args->shader_info->ballot_bit_size);
ctx.context = ctx.ac.context; ctx.context = ctx.ac.context;
ctx.max_workgroup_size = 0; ctx.max_workgroup_size = 0;
......
...@@ -2530,6 +2530,17 @@ radv_get_wave_size(struct radv_device *device, ...@@ -2530,6 +2530,17 @@ radv_get_wave_size(struct radv_device *device,
return device->physical_device->ge_wave_size; return device->physical_device->ge_wave_size;
} }
static uint8_t
radv_get_ballot_bit_size(struct radv_device *device,
const VkPipelineShaderStageCreateInfo *pStage,
gl_shader_stage stage,
const struct radv_shader_variant_key *key)
{
if (stage == MESA_SHADER_COMPUTE && key->cs.subgroup_size)
return key->cs.subgroup_size;
return 64;
}
static void static void
radv_fill_shader_info(struct radv_pipeline *pipeline, radv_fill_shader_info(struct radv_pipeline *pipeline,
const VkPipelineShaderStageCreateInfo **pStages, const VkPipelineShaderStageCreateInfo **pStages,
...@@ -2642,10 +2653,15 @@ radv_fill_shader_info(struct radv_pipeline *pipeline, ...@@ -2642,10 +2653,15 @@ radv_fill_shader_info(struct radv_pipeline *pipeline,
} }
for (int i = 0; i < MESA_SHADER_STAGES; i++) { for (int i = 0; i < MESA_SHADER_STAGES; i++) {
if (nir[i]) if (nir[i]) {
infos[i].wave_size = infos[i].wave_size =
radv_get_wave_size(pipeline->device, pStages[i], radv_get_wave_size(pipeline->device, pStages[i],
i, &keys[i]); i, &keys[i]);
infos[i].ballot_bit_size =
radv_get_ballot_bit_size(pipeline->device,
pStages[i], i,
&keys[i]);
}
} }
} }
...@@ -2788,7 +2804,7 @@ void radv_create_shaders(struct radv_pipeline *pipeline, ...@@ -2788,7 +2804,7 @@ void radv_create_shaders(struct radv_pipeline *pipeline,
for (unsigned i = 0; i < MESA_SHADER_STAGES; ++i) { for (unsigned i = 0; i < MESA_SHADER_STAGES; ++i) {
const VkPipelineShaderStageCreateInfo *stage = pStages[i]; const VkPipelineShaderStageCreateInfo *stage = pStages[i];
unsigned subgroup_size = 64; unsigned subgroup_size = 64, ballot_bit_size = 64;
if (!modules[i]) if (!modules[i])
continue; continue;
...@@ -2802,13 +2818,14 @@ void radv_create_shaders(struct radv_pipeline *pipeline, ...@@ -2802,13 +2818,14 @@ void radv_create_shaders(struct radv_pipeline *pipeline,
assert(device->physical_device->rad_info.chip_class >= GFX10 && assert(device->physical_device->rad_info.chip_class >= GFX10 &&
i == MESA_SHADER_COMPUTE); i == MESA_SHADER_COMPUTE);
subgroup_size = key->compute_subgroup_size; subgroup_size = key->compute_subgroup_size;
ballot_bit_size = key->compute_subgroup_size;
} }
nir[i] = radv_shader_compile_to_nir(device, modules[i], nir[i] = radv_shader_compile_to_nir(device, modules[i],
stage ? stage->pName : "main", i, stage ? stage->pName : "main", i,
stage ? stage->pSpecializationInfo : NULL, stage ? stage->pSpecializationInfo : NULL,
flags, pipeline->layout, flags, pipeline->layout,
subgroup_size); subgroup_size, ballot_bit_size);
/* We don't want to alter meta shaders IR directly so clone it /* We don't want to alter meta shaders IR directly so clone it
* first. * first.
...@@ -2888,6 +2905,7 @@ void radv_create_shaders(struct radv_pipeline *pipeline, ...@@ -2888,6 +2905,7 @@ void radv_create_shaders(struct radv_pipeline *pipeline,
pipeline->layout, &key, pipeline->layout, &key,
&info); &info);
info.wave_size = 64; /* Wave32 not supported. */ info.wave_size = 64; /* Wave32 not supported. */
info.ballot_bit_size = 64;
pipeline->gs_copy_shader = radv_create_gs_copy_shader( pipeline->gs_copy_shader = radv_create_gs_copy_shader(
device, nir[MESA_SHADER_GEOMETRY], &info, device, nir[MESA_SHADER_GEOMETRY], &info,
......
...@@ -293,7 +293,7 @@ radv_shader_compile_to_nir(struct radv_device *device, ...@@ -293,7 +293,7 @@ radv_shader_compile_to_nir(struct radv_device *device,
const VkSpecializationInfo *spec_info, const VkSpecializationInfo *spec_info,
const VkPipelineCreateFlags flags, const VkPipelineCreateFlags flags,
const struct radv_pipeline_layout *layout, const struct radv_pipeline_layout *layout,
unsigned subgroup_size) unsigned subgroup_size, unsigned ballot_bit_size)
{ {
nir_shader *nir; nir_shader *nir;
const nir_shader_compiler_options *nir_options = const nir_shader_compiler_options *nir_options =
...@@ -483,7 +483,7 @@ radv_shader_compile_to_nir(struct radv_device *device, ...@@ -483,7 +483,7 @@ radv_shader_compile_to_nir(struct radv_device *device,
bool gfx7minus = device->physical_device->rad_info.chip_class <= GFX7; bool gfx7minus = device->physical_device->rad_info.chip_class <= GFX7;
nir_lower_subgroups(nir, &(struct nir_lower_subgroups_options) { nir_lower_subgroups(nir, &(struct nir_lower_subgroups_options) {
.subgroup_size = subgroup_size, .subgroup_size = subgroup_size,
.ballot_bit_size = 64, .ballot_bit_size = ballot_bit_size,
.lower_to_scalar = 1, .lower_to_scalar = 1,
.lower_subgroup_masks = 1, .lower_subgroup_masks = 1,
.lower_shuffle = 1, .lower_shuffle = 1,
......
...@@ -236,6 +236,7 @@ struct radv_shader_info { ...@@ -236,6 +236,7 @@ struct radv_shader_info {
bool uses_invocation_id; bool uses_invocation_id;
bool uses_prim_id; bool uses_prim_id;
uint8_t wave_size; uint8_t wave_size;
uint8_t ballot_bit_size;
struct radv_userdata_locations user_sgprs_locs; struct radv_userdata_locations user_sgprs_locs;
unsigned num_user_sgprs; unsigned num_user_sgprs;
unsigned num_input_sgprs; unsigned num_input_sgprs;
...@@ -404,7 +405,7 @@ radv_shader_compile_to_nir(struct radv_device *device, ...@@ -404,7 +405,7 @@ radv_shader_compile_to_nir(struct radv_device *device,
const VkSpecializationInfo *spec_info, const VkSpecializationInfo *spec_info,
const VkPipelineCreateFlags flags, const VkPipelineCreateFlags flags,
const struct radv_pipeline_layout *layout, const struct radv_pipeline_layout *layout,
unsigned subgroup_size); unsigned subgroup_size, unsigned ballot_bit_size);
void * void *
radv_alloc_shader_memory(struct radv_device *device, radv_alloc_shader_memory(struct radv_device *device,
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment