...
 
Commits (1)
......@@ -585,6 +585,100 @@ static void radv_init_llvm_once(void)
call_once(&radv_init_llvm_target_once_flag, radv_init_llvm_target);
}
struct shader_stats {
unsigned num_sgprs;
unsigned num_vgprs;
unsigned spilled_sgprs;
unsigned spilled_vgprs;
unsigned lds_size;
unsigned scratch_bytes_per_wave;
unsigned code_size;
unsigned private_mem_vgprs;
unsigned max_simd_waves;
};
static void
generate_shader_stats(struct radv_device *device,
struct radv_shader_variant *variant,
gl_shader_stage stage,
struct _mesa_string_buffer *buf,
struct shader_stats *ss)
{
enum chip_class chip_class = device->physical_device->rad_info.chip_class;
unsigned lds_increment = chip_class >= CIK ? 512 : 256;
struct ac_shader_config *conf;
unsigned max_simd_waves;
unsigned lds_per_wave = 0;
max_simd_waves = ac_get_max_simd_waves(device->physical_device->rad_info.family);
conf = &variant->config;
if (stage == MESA_SHADER_FRAGMENT) {
lds_per_wave = conf->lds_size * lds_increment +
align(variant->info.fs.num_interp * 48,
lds_increment);
} else if (stage == MESA_SHADER_COMPUTE) {
unsigned max_workgroup_size =
radv_nir_get_max_workgroup_size(chip_class, variant->nir);
lds_per_wave = (conf->lds_size * lds_increment) /
DIV_ROUND_UP(max_workgroup_size, 64);
}
if (conf->num_sgprs)
max_simd_waves =
MIN2(max_simd_waves,
ac_get_num_physical_sgprs(chip_class) / conf->num_sgprs);
if (conf->num_vgprs)
max_simd_waves =
MIN2(max_simd_waves,
RADV_NUM_PHYSICAL_VGPRS / conf->num_vgprs);
/* LDS is 64KB per CU (4 SIMDs), divided into 16KB blocks per SIMD
* that PS can use.
*/
if (lds_per_wave)
max_simd_waves = MIN2(max_simd_waves, 16384 / lds_per_wave);
if (buf && stage == MESA_SHADER_FRAGMENT) {
_mesa_string_buffer_printf(buf, "*** SHADER CONFIG ***\n"
"SPI_PS_INPUT_ADDR = 0x%04x\n"
"SPI_PS_INPUT_ENA = 0x%04x\n",
conf->spi_ps_input_addr, conf->spi_ps_input_ena);
}
if (buf) {
_mesa_string_buffer_printf(buf, "*** SHADER STATS ***\n"
"SGPRS: %d\n"
"VGPRS: %d\n"
"Spilled SGPRs: %d\n"
"Spilled VGPRs: %d\n"
"PrivMem VGPRS: %d\n"
"Code Size: %d bytes\n"
"LDS: %d blocks\n"
"Scratch: %d bytes per wave\n"
"Max Waves: %d\n"
"********************\n\n\n",
conf->num_sgprs, conf->num_vgprs,
conf->spilled_sgprs, conf->spilled_vgprs,
variant->info.private_mem_vgprs, variant->code_size,
conf->lds_size, conf->scratch_bytes_per_wave,
max_simd_waves);
} else {
ss->num_sgprs = conf->num_sgprs;
ss->num_vgprs = conf->num_vgprs;
ss->spilled_sgprs = conf->spilled_sgprs;
ss->spilled_vgprs = conf->spilled_vgprs;
ss->private_mem_vgprs = variant->info.private_mem_vgprs;
ss->code_size = variant->code_size;
ss->lds_size = conf->lds_size;
ss->scratch_bytes_per_wave = conf->scratch_bytes_per_wave;
ss->max_simd_waves = max_simd_waves;
}
}
static struct radv_shader_variant *
shader_variant_create(struct radv_device *device,
struct radv_shader_module *module,
......@@ -641,6 +735,65 @@ shader_variant_create(struct radv_device *device,
radv_destroy_llvm_compiler(&ac_llvm, thread_compiler);
if (!gs_copy_shader) {
struct radv_shader_variant *sivariant;
sivariant = calloc(1, sizeof(struct radv_shader_variant));
struct shader_stats ss;
variant->nir = *shaders;
generate_shader_stats(device, variant, stage, NULL, &ss);
variant->nir = NULL;
if (!(device->instance->perftest_flags & RADV_PERFTEST_SISCHED))
tm_options |= AC_TM_SISCHED;
else
tm_options ^= AC_TM_SISCHED;
radv_init_llvm_compiler(&ac_llvm,
thread_compiler,
chip_family, tm_options);
struct ac_shader_binary sisched_binary;
radv_compile_nir_shader(&ac_llvm, &sisched_binary, &sivariant->config,
&sivariant->info, shaders, shader_count,
options);
struct shader_stats ss2;
sivariant->nir = *shaders;
generate_shader_stats(device, sivariant, stage, NULL, &ss2);
sivariant->nir = NULL;
if (ss2.max_simd_waves > ss.max_simd_waves ||
(ss2.max_simd_waves == ss.max_simd_waves &&
ss2.code_size < ss.code_size)) {
free(binary.code);
free(binary.config);
free(binary.rodata);
free(binary.global_symbol_offsets);
free(binary.relocs);
free(binary.disasm_string);
binary.code = sisched_binary.code;
binary.config = sisched_binary.config;
binary.rodata = sisched_binary.rodata;
binary.global_symbol_offsets = sisched_binary.global_symbol_offsets;
binary.relocs = sisched_binary.relocs;
binary.disasm_string = sisched_binary.disasm_string;
free(variant);
variant = sivariant;
} else {
free(sisched_binary.code);
free(sisched_binary.config);
free(sisched_binary.rodata);
free(sisched_binary.global_symbol_offsets);
free(sisched_binary.relocs);
free(sisched_binary.disasm_string);
}
}
// TODO: check stats then run GCM, sisched, alternate opt combos/order?
radv_fill_shader_variant(device, variant, &binary, stage);
if (code_out) {
......@@ -739,74 +892,6 @@ radv_get_shader_name(struct radv_shader_variant *var, gl_shader_stage stage)
};
}
static void
generate_shader_stats(struct radv_device *device,
struct radv_shader_variant *variant,
gl_shader_stage stage,
struct _mesa_string_buffer *buf)
{
enum chip_class chip_class = device->physical_device->rad_info.chip_class;
unsigned lds_increment = chip_class >= CIK ? 512 : 256;
struct ac_shader_config *conf;
unsigned max_simd_waves;
unsigned lds_per_wave = 0;
max_simd_waves = ac_get_max_simd_waves(device->physical_device->rad_info.family);
conf = &variant->config;
if (stage == MESA_SHADER_FRAGMENT) {
lds_per_wave = conf->lds_size * lds_increment +
align(variant->info.fs.num_interp * 48,
lds_increment);
} else if (stage == MESA_SHADER_COMPUTE) {
unsigned max_workgroup_size =
radv_nir_get_max_workgroup_size(chip_class, variant->nir);
lds_per_wave = (conf->lds_size * lds_increment) /
DIV_ROUND_UP(max_workgroup_size, 64);
}
if (conf->num_sgprs)
max_simd_waves =
MIN2(max_simd_waves,
ac_get_num_physical_sgprs(chip_class) / conf->num_sgprs);
if (conf->num_vgprs)
max_simd_waves =
MIN2(max_simd_waves,
RADV_NUM_PHYSICAL_VGPRS / conf->num_vgprs);
/* LDS is 64KB per CU (4 SIMDs), divided into 16KB blocks per SIMD
* that PS can use.
*/
if (lds_per_wave)
max_simd_waves = MIN2(max_simd_waves, 16384 / lds_per_wave);
if (stage == MESA_SHADER_FRAGMENT) {
_mesa_string_buffer_printf(buf, "*** SHADER CONFIG ***\n"
"SPI_PS_INPUT_ADDR = 0x%04x\n"
"SPI_PS_INPUT_ENA = 0x%04x\n",
conf->spi_ps_input_addr, conf->spi_ps_input_ena);
}
_mesa_string_buffer_printf(buf, "*** SHADER STATS ***\n"
"SGPRS: %d\n"
"VGPRS: %d\n"
"Spilled SGPRs: %d\n"
"Spilled VGPRs: %d\n"
"PrivMem VGPRS: %d\n"
"Code Size: %d bytes\n"
"LDS: %d blocks\n"
"Scratch: %d bytes per wave\n"
"Max Waves: %d\n"
"********************\n\n\n",
conf->num_sgprs, conf->num_vgprs,
conf->spilled_sgprs, conf->spilled_vgprs,
variant->info.private_mem_vgprs, variant->code_size,
conf->lds_size, conf->scratch_bytes_per_wave,
max_simd_waves);
}
void
radv_shader_dump_stats(struct radv_device *device,
struct radv_shader_variant *variant,
......@@ -815,7 +900,7 @@ radv_shader_dump_stats(struct radv_device *device,
{
struct _mesa_string_buffer *buf = _mesa_string_buffer_create(NULL, 256);
generate_shader_stats(device, variant, stage, buf);
generate_shader_stats(device, variant, stage, buf, NULL);
fprintf(file, "\n%s:\n", radv_get_shader_name(variant, stage));
fprintf(file, "%s", buf->buf);
......@@ -893,7 +978,7 @@ radv_GetShaderInfoAMD(VkDevice _device,
_mesa_string_buffer_printf(buf, "%s:\n", radv_get_shader_name(variant, stage));
_mesa_string_buffer_printf(buf, "%s\n\n", variant->llvm_ir_string);
_mesa_string_buffer_printf(buf, "%s\n\n", variant->disasm_string);
generate_shader_stats(device, variant, stage, buf);
generate_shader_stats(device, variant, stage, buf, NULL);
/* Need to include the null terminator. */
size_t length = buf->length + 1;
......