Commit 4fca5c36 authored by Daniel Schürmann's avatar Daniel Schürmann
Browse files

radv: add radv_shader_nir_to_asm_stitched() to compile and stitch binaries

The usecases are:
 - function calls with resume shaders
 - prolog/epilog shaders
 - merged shaders
parent 7815faae
Pipeline #627155 waiting for manual action with stages
......@@ -2129,6 +2129,101 @@ radv_aco_build_shader_binary(void **bin,
*binary = (struct radv_shader_binary*)legacy_binary;
}
static void
radv_aco_accumulate_binary(void **bin, gl_shader_stage stage, bool is_gs_copy_shader,
const struct ac_shader_config *config, const char *llvm_ir_str,
unsigned llvm_ir_size, const char *disasm_str, unsigned disasm_size,
uint32_t *statistics, uint32_t stats_size, uint32_t exec_size,
const uint32_t *code, uint32_t code_dw)
{
struct radv_shader_binary **binary = (struct radv_shader_binary **)bin;
struct radv_shader_binary_legacy *old_bin = (struct radv_shader_binary_legacy *)*binary;
if (old_bin == NULL) {
radv_aco_build_shader_binary(bin, stage, is_gs_copy_shader, config, llvm_ir_str, llvm_ir_size,
disasm_str, disasm_size, statistics, stats_size, exec_size, code,
code_dw);
return;
}
uint32_t code_size = code_dw * sizeof(uint32_t);
uint32_t null_bytes = (llvm_ir_size ? 1 : 0) + (disasm_size ? 1 : 0);
uint32_t total_size =
old_bin->base.total_size + code_size + llvm_ir_size + disasm_size - null_bytes;
struct radv_shader_binary_legacy *new_bin =
(struct radv_shader_binary_legacy *)calloc(total_size, 1);
/* Copy the struct, stats and the existing code */
uint32_t code_end = sizeof(struct radv_shader_binary_legacy) + stats_size + old_bin->code_size;
memcpy(new_bin, old_bin, code_end);
new_bin->base.total_size = total_size;
/* Update the stats */
if (stats_size) {
uint32_t *stats_ptr = (uint32_t *)new_bin->data;
stats_ptr[aco_statistic_hash] += statistics[aco_statistic_hash];
stats_ptr[aco_statistic_instructions] += statistics[aco_statistic_instructions];
stats_ptr[aco_statistic_copies] += statistics[aco_statistic_copies];
stats_ptr[aco_statistic_branches] += statistics[aco_statistic_branches];
stats_ptr[aco_statistic_latency] += statistics[aco_statistic_latency];
stats_ptr[aco_statistic_inv_throughput] += statistics[aco_statistic_inv_throughput];
stats_ptr[aco_statistic_vmem_clauses] += statistics[aco_statistic_vmem_clauses];
stats_ptr[aco_statistic_smem_clauses] += statistics[aco_statistic_smem_clauses];
stats_ptr[aco_statistic_sgpr_presched] =
MAX2(stats_ptr[aco_statistic_sgpr_presched], statistics[aco_statistic_sgpr_presched]);
stats_ptr[aco_statistic_vgpr_presched] =
MAX2(stats_ptr[aco_statistic_vgpr_presched], statistics[aco_statistic_vgpr_presched]);
}
uint32_t *new_bin_ptr = (uint32_t *)new_bin + code_end;
uint32_t *old_bin_ptr = (uint32_t *)old_bin + code_end;
/* Copy the new code */
memcpy(new_bin_ptr, code, code_size);
new_bin->code_size += code_size;
new_bin->exec_size += exec_size;
new_bin_ptr += code_size;
/* Merge the IR string */
if (llvm_ir_size) {
memcpy(new_bin_ptr, old_bin_ptr, old_bin->ir_size);
new_bin_ptr += old_bin->ir_size - 1;
old_bin_ptr += old_bin->ir_size;
memcpy(new_bin_ptr, llvm_ir_str, llvm_ir_size);
new_bin->ir_size += llvm_ir_size - 1;
new_bin_ptr += llvm_ir_size;
}
/* Merge the Disasm string */
if (disasm_size) {
memcpy(new_bin_ptr, old_bin_ptr, old_bin->disasm_size);
new_bin_ptr += old_bin->disasm_size - 1;
old_bin_ptr += old_bin->disasm_size;
memcpy(new_bin_ptr, disasm_str, disasm_size);
new_bin->disasm_size += disasm_size - 1;
new_bin_ptr += disasm_size;
}
assert((uint32_t *)old_bin + old_bin->base.total_size == old_bin_ptr);
assert((uint32_t *)new_bin + total_size == new_bin_ptr);
free(old_bin);
/* Update the config */
struct ac_shader_config *new_config = &new_bin->base.config;
new_config->num_sgprs = MAX2(new_config->num_sgprs, config->num_sgprs);
new_config->num_vgprs = MAX2(new_config->num_vgprs, config->num_vgprs);
new_config->num_shared_vgprs = MAX2(new_config->num_shared_vgprs, config->num_shared_vgprs);
new_config->spilled_sgprs = MAX2(new_config->spilled_sgprs, config->spilled_sgprs);
new_config->spilled_vgprs = MAX2(new_config->spilled_vgprs, config->spilled_vgprs);
new_config->lds_size = MAX2(new_config->lds_size, config->lds_size);
new_config->scratch_bytes_per_wave =
MAX2(new_config->scratch_bytes_per_wave, config->scratch_bytes_per_wave);
*binary = (struct radv_shader_binary *)new_bin;
}
static void
fill_radv_nir_compiler_options(struct radv_nir_compiler_options *options,
struct radv_device *device, const struct radv_pipeline_key *key,
......@@ -2233,6 +2328,67 @@ radv_shader_nir_to_asm(struct radv_device *device, struct radv_pipeline_stage *p
key, false, false, keep_shader_info, keep_statistic_info, binary_out);
}
struct radv_shader *
radv_shader_nir_to_asm_stitched(struct radv_device *device, struct radv_pipeline_stage *pl_stage,
struct nir_shader *const *shaders, int shader_count,
const struct radv_pipeline_key *key, bool keep_shader_info,
bool keep_statistic_info, struct radv_shader_binary **binary_out)
{
gl_shader_stage stage = shaders[0]->info.stage;
struct radv_nir_compiler_options options = {0};
fill_radv_nir_compiler_options(
&options, device, key, radv_should_use_wgp_mode(device, stage, &pl_stage->info),
radv_can_dump_shader(device, shaders[0], false), is_meta_shader(shaders[0]), keep_shader_info,
keep_statistic_info);
struct radv_shader_debug_data debug_data = {
.device = device,
.object = NULL,
};
options.debug.func = radv_compiler_debug;
options.debug.private_data = &debug_data;
struct radv_shader_binary *binary = NULL;
#ifdef LLVM_AVAILABLE
if (radv_use_llvm_for_stage(device, stage))
fprintf(stderr, "Partial compilation of %s not supported with LLVM. Using ACO instead.\n",
radv_get_shader_name(&pl_stage->info, stage));
#endif
struct aco_shader_info ac_info;
struct aco_compiler_options ac_opts;
radv_aco_convert_opts(&ac_opts, &options);
radv_aco_convert_shader_info(&ac_info, &pl_stage->info);
for (int i = 0; i < shader_count; i++)
aco_compile_shader(&ac_opts, &ac_info, 1, &shaders[i], &pl_stage->args,
&radv_aco_accumulate_binary, (void **)&binary);
struct radv_shader *shader =
radv_shader_create(device, binary, keep_shader_info, false, &pl_stage->args);
if (!shader) {
free(binary);
return NULL;
}
if (options.dump_shader) {
fprintf(stderr, "%s", radv_get_shader_name(&pl_stage->info, stage));
fprintf(stderr, "\ndisasm:\n%s\n", shader->disasm_string);
}
if (keep_shader_info) {
shader->nir_string = radv_dump_nir_shaders(shaders, shader_count);
}
/* Copy the shader binary configuration to store it in the cache. */
memcpy(&binary->config, &shader->config, sizeof(binary->config));
*binary_out = binary;
return shader;
}
struct radv_shader *
radv_create_gs_copy_shader(struct radv_device *device, struct nir_shader *shader,
struct radv_shader_info *info, const struct radv_shader_args *args,
......
......@@ -558,6 +558,11 @@ struct radv_shader *radv_shader_nir_to_asm(
struct radv_device *device, struct radv_pipeline_stage *stage, struct nir_shader *const *shaders,
int shader_count, const struct radv_pipeline_key *key, bool keep_shader_info, bool keep_statistic_info,
struct radv_shader_binary **binary_out);
struct radv_shader *
radv_shader_nir_to_asm_stitched(struct radv_device *device, struct radv_pipeline_stage *stage,
struct nir_shader *const *shaders, int shader_count,
const struct radv_pipeline_key *key, bool keep_shader_info,
bool keep_statistic_info, struct radv_shader_binary **binary_out);
bool radv_shader_binary_upload(struct radv_device *device, const struct radv_shader_binary *binary,
struct radv_shader *shader, void *dest_ptr);
......
Supports Markdown
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment