Commit 60b53a0e authored by Daniel Schürmann's avatar Daniel Schürmann
Browse files

aco: Format.

Manually adjusted some comments for more intuitive line breaks.
parent 088975d8
Pipeline #338597 waiting for manual action with stages
This diff is collapsed.
......@@ -40,7 +40,8 @@ struct dce_ctx {
std::vector<uint16_t> uses;
std::vector<std::vector<bool>> live;
dce_ctx(Program* program) : current_block(program->blocks.size() - 1), uses(program->peekAllocationId())
dce_ctx(Program* program)
: current_block(program->blocks.size() - 1), uses(program->peekAllocationId())
{
live.reserve(program->blocks.size());
for (Block& block : program->blocks)
......@@ -48,7 +49,8 @@ struct dce_ctx {
}
};
void process_block(dce_ctx& ctx, Block& block)
void
process_block(dce_ctx& ctx, Block& block)
{
std::vector<bool>& live = ctx.live[block.index];
assert(live.size() == block.instructions.size());
......@@ -72,23 +74,26 @@ void process_block(dce_ctx& ctx, Block& block)
if (process_predecessors) {
for (unsigned pred_idx : block.linear_preds)
ctx.current_block = std::max(ctx.current_block, (int) pred_idx);
ctx.current_block = std::max(ctx.current_block, (int)pred_idx);
}
}
} /* end namespace */
bool is_dead(const std::vector<uint16_t>& uses, Instruction *instr)
bool
is_dead(const std::vector<uint16_t>& uses, Instruction* instr)
{
if (instr->definitions.empty() || instr->isBranch())
return false;
if (std::any_of(instr->definitions.begin(), instr->definitions.end(),
[&uses] (const Definition& def) { return !def.isTemp() || uses[def.tempId()];}))
[&uses](const Definition& def) { return !def.isTemp() || uses[def.tempId()]; }))
return false;
return !(get_sync_info(instr).semantics & (semantic_volatile | semantic_acqrel));
}
std::vector<uint16_t> dead_code_analysis(Program *program) {
std::vector<uint16_t>
dead_code_analysis(Program* program)
{
dce_ctx ctx(program);
......@@ -105,5 +110,4 @@ std::vector<uint16_t> dead_code_analysis(Program *program) {
return ctx.uses;
}
}
} // namespace aco
......@@ -38,7 +38,8 @@
namespace aco {
void dominator_tree(Program* program)
void
dominator_tree(Program* program)
{
program->blocks[0].logical_idom = 0;
program->blocks[0].linear_idom = 0;
......@@ -48,7 +49,7 @@ void dominator_tree(Program* program)
int new_logical_idom = -1;
int new_linear_idom = -1;
for (unsigned pred_idx : block.logical_preds) {
if ((int) program->blocks[pred_idx].logical_idom == -1)
if ((int)program->blocks[pred_idx].logical_idom == -1)
continue;
if (new_logical_idom == -1) {
......@@ -56,16 +57,16 @@ void dominator_tree(Program* program)
continue;
}
while ((int) pred_idx != new_logical_idom) {
if ((int) pred_idx > new_logical_idom)
while ((int)pred_idx != new_logical_idom) {
if ((int)pred_idx > new_logical_idom)
pred_idx = program->blocks[pred_idx].logical_idom;
if ((int) pred_idx < new_logical_idom)
if ((int)pred_idx < new_logical_idom)
new_logical_idom = program->blocks[new_logical_idom].logical_idom;
}
}
for (unsigned pred_idx : block.linear_preds) {
if ((int) program->blocks[pred_idx].linear_idom == -1)
if ((int)program->blocks[pred_idx].linear_idom == -1)
continue;
if (new_linear_idom == -1) {
......@@ -73,10 +74,10 @@ void dominator_tree(Program* program)
continue;
}
while ((int) pred_idx != new_linear_idom) {
if ((int) pred_idx > new_linear_idom)
while ((int)pred_idx != new_linear_idom) {
if ((int)pred_idx > new_linear_idom)
pred_idx = program->blocks[pred_idx].linear_idom;
if ((int) pred_idx < new_linear_idom)
if ((int)pred_idx < new_linear_idom)
new_linear_idom = program->blocks[new_linear_idom].linear_idom;
}
}
......@@ -86,5 +87,5 @@ void dominator_tree(Program* program)
}
}
}
} // namespace aco
#endif
......@@ -31,15 +31,15 @@ namespace aco {
namespace {
/* there can also be LDS and VALU clauses, but I don't see how those are interesting */
enum clause_type
{
enum clause_type {
clause_vmem,
clause_flat,
clause_smem,
clause_other,
};
void emit_clause(Builder& bld, unsigned num_instrs, aco_ptr<Instruction> *instrs)
void
emit_clause(Builder& bld, unsigned num_instrs, aco_ptr<Instruction>* instrs)
{
unsigned start = 0;
......@@ -61,7 +61,8 @@ void emit_clause(Builder& bld, unsigned num_instrs, aco_ptr<Instruction> *instrs
} /* end namespace */
void form_hard_clauses(Program *program)
void
form_hard_clauses(Program* program)
{
for (Block& block : program->blocks) {
unsigned num_instrs = 0;
......@@ -77,7 +78,8 @@ void form_hard_clauses(Program *program)
clause_type type = clause_other;
if (instr->isVMEM() && !instr->operands.empty()) {
if (program->chip_class == GFX10 && instr->isMIMG() && get_mimg_nsa_dwords(instr.get()) > 0)
if (program->chip_class == GFX10 && instr->isMIMG() &&
get_mimg_nsa_dwords(instr.get()) > 0)
type = clause_other;
else
type = clause_vmem;
......@@ -109,4 +111,4 @@ void form_hard_clauses(Program *program)
block.instructions = std::move(new_instructions);
}
}
}
} // namespace aco
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
......@@ -39,7 +39,8 @@ struct shader_io_state {
uint8_t mask[VARYING_SLOT_MAX];
Temp temps[VARYING_SLOT_MAX * 4u];
shader_io_state() {
shader_io_state()
{
memset(mask, 0, sizeof(mask));
std::fill_n(temps, VARYING_SLOT_MAX * 4u, Temp(0, RegClass::v1));
}
......@@ -57,14 +58,14 @@ enum resource_flags {
};
struct isel_context {
const struct radv_nir_compiler_options *options;
struct radv_shader_args *args;
Program *program;
nir_shader *shader;
const struct radv_nir_compiler_options* options;
struct radv_shader_args* args;
Program* program;
nir_shader* shader;
uint32_t constant_data_offset;
Block *block;
Block* block;
uint32_t first_temp_id;
std::unordered_map<unsigned, std::array<Temp,NIR_MAX_VEC_COMPONENTS>> allocated_vec;
std::unordered_map<unsigned, std::array<Temp, NIR_MAX_VEC_COMPONENTS>> allocated_vec;
Stage stage;
struct {
bool has_branch;
......@@ -77,7 +78,8 @@ struct isel_context {
struct {
bool is_divergent = false;
} parent_if;
bool exec_potentially_empty_discard = false; /* set to false when loop_nest_depth==0 && parent_if.is_divergent==false */
bool exec_potentially_empty_discard =
false; /* set to false when loop_nest_depth==0 && parent_if.is_divergent==false */
uint16_t exec_potentially_empty_break_depth = UINT16_MAX;
/* Set to false when loop_nest_depth==exec_potentially_empty_break_depth
* and parent_if.is_divergent==false. Called _break but it's also used for
......@@ -87,7 +89,7 @@ struct isel_context {
} cf_info;
/* NIR range analysis. */
struct hash_table *range_ht;
struct hash_table* range_ht;
nir_unsigned_upper_bound_config ub_config;
uint32_t resource_flag_offsets[MAX_SETS];
......@@ -116,14 +118,16 @@ struct isel_context {
shader_io_state outputs;
};
inline Temp get_arg(isel_context *ctx, struct ac_arg arg)
inline Temp
get_arg(isel_context* ctx, struct ac_arg arg)
{
assert(arg.used);
return ctx->arg_temps[arg.arg_index];
}
inline void get_buffer_resource_flags(isel_context *ctx, nir_ssa_def *def, unsigned access,
uint8_t **flags, uint32_t *count)
inline void
get_buffer_resource_flags(isel_context* ctx, nir_ssa_def* def, unsigned access, uint8_t** flags,
uint32_t* count)
{
nir_binding binding = {0};
/* global resources (def=NULL) are considered aliasing with all other buffers and
......@@ -133,7 +137,7 @@ inline void get_buffer_resource_flags(isel_context *ctx, nir_ssa_def *def, unsig
binding = nir_chase_binding(nir_src_for_ssa(def));
if (binding.var) {
const glsl_type *type = binding.var->type->without_array();
const glsl_type* type = binding.var->type->without_array();
assert(type->is_image());
if (type->sampler_dimensionality != GLSL_SAMPLER_DIM_BUF) {
*flags = NULL;
......@@ -162,9 +166,10 @@ inline void get_buffer_resource_flags(isel_context *ctx, nir_ssa_def *def, unsig
*count = 1;
}
inline uint8_t get_all_buffer_resource_flags(isel_context *ctx, nir_ssa_def *def, unsigned access)
inline uint8_t
get_all_buffer_resource_flags(isel_context* ctx, nir_ssa_def* def, unsigned access)
{
uint8_t *flags;
uint8_t* flags;
uint32_t count;
get_buffer_resource_flags(ctx, def, access, &flags, &count);
......@@ -174,16 +179,12 @@ inline uint8_t get_all_buffer_resource_flags(isel_context *ctx, nir_ssa_def *def
return res;
}
void init_context(isel_context *ctx, nir_shader *shader);
void cleanup_context(isel_context *ctx);
void init_context(isel_context* ctx, nir_shader* shader);
void cleanup_context(isel_context* ctx);
isel_context
setup_isel_context(Program* program,
unsigned shader_count,
struct nir_shader *const *shaders,
ac_shader_config* config,
struct radv_shader_args *args,
bool is_gs_copy_shader);
isel_context setup_isel_context(Program* program, unsigned shader_count,
struct nir_shader* const* shaders, ac_shader_config* config,
struct radv_shader_args* args, bool is_gs_copy_shader);
} // namespace aco
......
......@@ -23,6 +23,7 @@
*/
#include "aco_interface.h"
#include "aco_ir.h"
#include "vulkan/radv_shader.h"
......@@ -37,23 +38,33 @@
static const std::array<aco_compiler_statistic_info, aco::num_statistics> statistic_infos = []()
{
std::array<aco_compiler_statistic_info, aco::num_statistics> ret{};
ret[aco::statistic_hash] = aco_compiler_statistic_info{"Hash", "CRC32 hash of code and constant data"};
ret[aco::statistic_instructions] = aco_compiler_statistic_info{"Instructions", "Instruction count"};
ret[aco::statistic_copies] = aco_compiler_statistic_info{"Copies", "Copy instructions created for pseudo-instructions"};
ret[aco::statistic_hash] =
aco_compiler_statistic_info{"Hash", "CRC32 hash of code and constant data"};
ret[aco::statistic_instructions] =
aco_compiler_statistic_info{"Instructions", "Instruction count"};
ret[aco::statistic_copies] =
aco_compiler_statistic_info{"Copies", "Copy instructions created for pseudo-instructions"};
ret[aco::statistic_branches] = aco_compiler_statistic_info{"Branches", "Branch instructions"};
ret[aco::statistic_latency] = aco_compiler_statistic_info{"Latency", "Issue cycles plus stall cycles"};
ret[aco::statistic_inv_throughput] = aco_compiler_statistic_info{"Inverse Throughput", "Estimated busy cycles to execute one wave"};
ret[aco::statistic_vmem_clauses] = aco_compiler_statistic_info{"VMEM Clause", "Number of VMEM clauses (includes 1-sized clauses)"};
ret[aco::statistic_smem_clauses] = aco_compiler_statistic_info{"SMEM Clause", "Number of SMEM clauses (includes 1-sized clauses)"};
ret[aco::statistic_sgpr_presched] = aco_compiler_statistic_info{"Pre-Sched SGPRs", "SGPR usage before scheduling"};
ret[aco::statistic_vgpr_presched] = aco_compiler_statistic_info{"Pre-Sched VGPRs", "VGPR usage before scheduling"};
ret[aco::statistic_latency] =
aco_compiler_statistic_info{"Latency", "Issue cycles plus stall cycles"};
ret[aco::statistic_inv_throughput] = aco_compiler_statistic_info{
"Inverse Throughput", "Estimated busy cycles to execute one wave"};
ret[aco::statistic_vmem_clauses] = aco_compiler_statistic_info{
"VMEM Clause", "Number of VMEM clauses (includes 1-sized clauses)"};
ret[aco::statistic_smem_clauses] = aco_compiler_statistic_info{
"SMEM Clause", "Number of SMEM clauses (includes 1-sized clauses)"};
ret[aco::statistic_sgpr_presched] =
aco_compiler_statistic_info{"Pre-Sched SGPRs", "SGPR usage before scheduling"};
ret[aco::statistic_vgpr_presched] =
aco_compiler_statistic_info{"Pre-Sched VGPRs", "VGPR usage before scheduling"};
return ret;
}();
const unsigned aco_num_statistics = aco::num_statistics;
const aco_compiler_statistic_info *aco_statistic_infos = statistic_infos.data();
const aco_compiler_statistic_info* aco_statistic_infos = statistic_infos.data();
static void validate(aco::Program *program)
static void
validate(aco::Program* program)
{
if (!(aco::debug_flags & aco::DEBUG_VALIDATE_IR))
return;
......@@ -62,10 +73,9 @@ static void validate(aco::Program *program)
assert(is_valid);
}
void aco_compile_shader(unsigned shader_count,
struct nir_shader *const *shaders,
struct radv_shader_binary **binary,
struct radv_shader_args *args)
void
aco_compile_shader(unsigned shader_count, struct nir_shader* const* shaders,
struct radv_shader_binary** binary, struct radv_shader_args* args)
{
aco::init();
......@@ -116,11 +126,11 @@ void aco_compile_shader(unsigned shader_count,
std::string llvm_ir;
if (args->options->record_ir) {
char *data = NULL;
char* data = NULL;
size_t size = 0;
u_memstream mem;
if (u_memstream_open(&mem, &data, &size)) {
FILE *const memf = u_memstream_get(&mem);
FILE* const memf = u_memstream_get(&mem);
aco_print_program(program.get(), memf);
fputc(0, memf);
u_memstream_close(&mem);
......@@ -137,8 +147,7 @@ void aco_compile_shader(unsigned shader_count,
aco_print_program(program.get(), stderr, live_vars, aco::print_live_vars | aco::print_kill);
if (!args->is_trap_handler_shader) {
if (!args->options->disable_optimizations &&
!(aco::debug_flags & aco::DEBUG_NO_SCHED))
if (!args->options->disable_optimizations && !(aco::debug_flags & aco::DEBUG_NO_SCHED))
aco::schedule_program(program.get(), live_vars);
validate(program.get());
......@@ -189,11 +198,11 @@ void aco_compile_shader(unsigned shader_count,
std::string disasm;
if (get_disasm) {
char *data = NULL;
char* data = NULL;
size_t disasm_size = 0;
struct u_memstream mem;
if (u_memstream_open(&mem, &data, &disasm_size)) {
FILE *const memf = u_memstream_get(&mem);
FILE* const memf = u_memstream_get(&mem);
aco::print_asm(program.get(), code, exec_size / 4u, memf);
fputc(0, memf);
u_memstream_close(&mem);
......@@ -214,10 +223,10 @@ void aco_compile_shader(unsigned shader_count,
* directly for the disk cache. Uninitialized data can appear because of
* padding in the struct or because legacy_binary->data can be at an offset
* from the start less than sizeof(radv_shader_binary_legacy). */
radv_shader_binary_legacy* legacy_binary = (radv_shader_binary_legacy*) calloc(size, 1);
radv_shader_binary_legacy* legacy_binary = (radv_shader_binary_legacy*)calloc(size, 1);
legacy_binary->base.type = RADV_BINARY_TYPE_LEGACY;
legacy_binary->base.stage = shaders[shader_count-1]->info.stage;
legacy_binary->base.stage = shaders[shader_count - 1]->info.stage;
legacy_binary->base.is_gs_copy_shader = args->is_gs_copy_shader;
legacy_binary->base.total_size = size;
......@@ -225,7 +234,8 @@ void aco_compile_shader(unsigned shader_count,
memcpy(legacy_binary->data, program->statistics, aco::num_statistics * sizeof(uint32_t));
legacy_binary->stats_size = stats_size;
memcpy(legacy_binary->data + legacy_binary->stats_size, code.data(), code.size() * sizeof(uint32_t));
memcpy(legacy_binary->data + legacy_binary->stats_size, code.data(),
code.size() * sizeof(uint32_t));
legacy_binary->exec_size = exec_size;
legacy_binary->code_size = code.size() * sizeof(uint32_t);
......@@ -233,12 +243,15 @@ void aco_compile_shader(unsigned shader_count,
legacy_binary->disasm_size = 0;
legacy_binary->ir_size = llvm_ir.size();
llvm_ir.copy((char*) legacy_binary->data + legacy_binary->stats_size + legacy_binary->code_size, llvm_ir.size());
llvm_ir.copy((char*)legacy_binary->data + legacy_binary->stats_size + legacy_binary->code_size,
llvm_ir.size());
if (get_disasm) {
disasm.copy((char*) legacy_binary->data + legacy_binary->stats_size + legacy_binary->code_size + llvm_ir.size(), disasm.size());
disasm.copy((char*)legacy_binary->data + legacy_binary->stats_size +
legacy_binary->code_size + llvm_ir.size(),
disasm.size());
legacy_binary->disasm_size = disasm.size();
}
*binary = (radv_shader_binary*) legacy_binary;
*binary = (radv_shader_binary*)legacy_binary;
}
......@@ -39,12 +39,10 @@ struct aco_compiler_statistic_info {
};
extern const unsigned aco_num_statistics;
extern const struct aco_compiler_statistic_info *aco_statistic_infos;
extern const struct aco_compiler_statistic_info* aco_statistic_infos;
void aco_compile_shader(unsigned shader_count,
struct nir_shader *const *shaders,
struct radv_shader_binary** binary,
struct radv_shader_args *args);
void aco_compile_shader(unsigned shader_count, struct nir_shader* const* shaders,
struct radv_shader_binary** binary, struct radv_shader_args* args);
#ifdef __cplusplus
}
......
......@@ -30,39 +30,40 @@ namespace aco {
uint64_t debug_flags = 0;
static const struct debug_control aco_debug_options[] = {
{"validateir", DEBUG_VALIDATE_IR},
{"validatera", DEBUG_VALIDATE_RA},
{"perfwarn", DEBUG_PERFWARN},
{"force-waitcnt", DEBUG_FORCE_WAITCNT},
{"novn", DEBUG_NO_VN},
{"noopt", DEBUG_NO_OPT},
{"nosched", DEBUG_NO_SCHED},
{"perfinfo", DEBUG_PERF_INFO},
{"liveinfo", DEBUG_LIVE_INFO},
{NULL, 0}
};
static const struct debug_control aco_debug_options[] = {{"validateir", DEBUG_VALIDATE_IR},
{"validatera", DEBUG_VALIDATE_RA},
{"perfwarn", DEBUG_PERFWARN},
{"force-waitcnt", DEBUG_FORCE_WAITCNT},
{"novn", DEBUG_NO_VN},
{"noopt", DEBUG_NO_OPT},
{"nosched", DEBUG_NO_SCHED},
{"perfinfo", DEBUG_PERF_INFO},
{"liveinfo", DEBUG_LIVE_INFO},
{NULL, 0}};
static once_flag init_once_flag = ONCE_FLAG_INIT;
static void init_once()
static void
init_once()
{
debug_flags = parse_debug_string(getenv("ACO_DEBUG"), aco_debug_options);
#ifndef NDEBUG
#ifndef NDEBUG
/* enable some flags by default on debug builds */
debug_flags |= aco::DEBUG_VALIDATE_IR;
#endif
#endif
}
void init()
void
init()
{
call_once(&init_once_flag, init_once);
}
void init_program(Program *program, Stage stage, struct radv_shader_info *info,
enum chip_class chip_class, enum radeon_family family,
bool wgp_mode, ac_shader_config *config)
void
init_program(Program* program, Stage stage, struct radv_shader_info* info,
enum chip_class chip_class, enum radeon_family family, bool wgp_mode,
ac_shader_config* config)
{
program->stage = stage;
program->config = config;
......@@ -70,24 +71,12 @@ void init_program(Program *program, Stage stage, struct radv_shader_info *info,
program->chip_class = chip_class;
if (family == CHIP_UNKNOWN) {
switch (chip_class) {
case GFX6:
program->family = CHIP_TAHITI;
break;
case GFX7:
program->family = CHIP_BONAIRE;
break;
case GFX8:
program->family = CHIP_POLARIS10;
break;
case GFX9:
program->family = CHIP_VEGA10;
break;
case GFX10:
program->family = CHIP_NAVI10;
break;
default:
program->family = CHIP_UNKNOWN;
break;
case GFX6: program->family = CHIP_TAHITI; break;
case GFX7: program->family = CHIP_BONAIRE; break;
case GFX8: program->family = CHIP_POLARIS10; break;
case GFX9: program->family = CHIP_VEGA10; break;
case GFX10: program->family = CHIP_NAVI10; break;
default: program->family = CHIP_UNKNOWN; break;
}
} else {
program->family = family;
......@@ -96,7 +85,8 @@ void init_program(Program *program, Stage stage, struct radv_shader_info *info,
program->lane_mask = program->wave_size == 32 ? s1 : s2;
program->dev.lds_encoding_granule = chip_class >= GFX7 ? 512 : 256;
program->dev.lds_alloc_granule = chip_class >= GFX10_3 ? 1024 : program->dev.lds_encoding_granule;
program->dev.lds_alloc_granule =
chip_class >= GFX10_3 ? 1024 : program->dev.lds_encoding_granule;
program->dev.lds_limit = chip_class >= GFX7 ? 65536 : 32768;
/* apparently gfx702 also has 16-bank LDS but I can't find a family for that */
program->dev.has_16bank_lds = family == CHIP_KABINI || family == CHIP_STONEY;
......@@ -109,7 +99,8 @@ void init_program(Program *program, Stage stage, struct radv_shader_info *info,
program->dev.physical_sgprs = 5120; /* doesn't matter as long as it's at least 128 * 40 */
program->dev.physical_vgprs = program->wave_size == 32 ? 1024 : 512;
program->dev.sgpr_alloc_granule = 128;
program->dev.sgpr_limit = 108; /* includes VCC, which can be treated as s[106-107] on GFX10+ */
program->dev.sgpr_limit =
108; /* includes VCC, which can be treated as s[106-107] on GFX10+ */
if (chip_class >= GFX10_3)
program->dev.vgpr_alloc_granule = program->wave_size == 32 ? 16 : 8;
else
......@@ -143,18 +134,14 @@ void init_program(Program *program, Stage stage, struct radv_shader_info *info,
/* GFX9 APUS */
case CHIP_RAVEN:
case CHIP_RAVEN2:
case CHIP_RENOIR:
program->dev.xnack_enabled = true;
break;
default:
break;
case CHIP_RENOIR: program->dev.xnack_enabled = true; break;
default: break;
}
program->dev.sram_ecc_enabled = program->family == CHIP_ARCTURUS;
/* apparently gfx702 also has fast v_fma_f32 but I can't find a family for that */
program->dev.has_fast_fma32 = program->chip_class >= GFX9;
if (program->family == CHIP_TAHITI ||
program->family == CHIP_CARRIZO ||
if (program->family == CHIP_TAHITI || program->family == CHIP_CARRIZO ||
program->family == CHIP_HAWAII)
program->dev.has_fast_fma32 = true;
......@@ -174,29 +161,24 @@ void init_program(Program *program, Stage stage, struct radv_shader_info *info,
program->next_fp_mode.round32 = fp_round_ne;
}
memory_sync_info get_sync_info(const Instruction* instr)
memory_sync_info
get_sync_info(const Instruction* instr)
{
switch (instr->format) {
case Format::SMEM:
return instr->smem().sync;
case Format::MUBUF:
return instr->mubuf().sync;
case Format::MIMG:
return instr->mimg().sync;
case Format::MTBUF:
return instr->mtbuf().sync;
case Format::SMEM: return instr->smem().sync;