diff --git a/src/amd/compiler/aco_insert_NOPs.cpp b/src/amd/compiler/aco_insert_NOPs.cpp index 361411d05e5d68c7db865c34cdba96ba0ac738f2..5dd1c7183e83f8e6e28017fdcd49791009537248 100644 --- a/src/amd/compiler/aco_insert_NOPs.cpp +++ b/src/amd/compiler/aco_insert_NOPs.cpp @@ -847,7 +847,7 @@ void mitigate_hazards(Program* program) { std::vector all_ctx(program->blocks.size()); - std::stack loop_header_indices; + std::stack> loop_header_indices; for (unsigned i = 0; i < program->blocks.size(); i++) { Block& block = program->blocks[i]; diff --git a/src/amd/compiler/aco_insert_waitcnt.cpp b/src/amd/compiler/aco_insert_waitcnt.cpp index d7fc87c126d6f3f2635346428c7a1109ec4ec277..cb6c2a60804c262199bb73bd37882ad5e20915c7 100644 --- a/src/amd/compiler/aco_insert_waitcnt.cpp +++ b/src/amd/compiler/aco_insert_waitcnt.cpp @@ -767,7 +767,7 @@ insert_wait_states(Program* program) std::vector in_ctx(program->blocks.size(), wait_ctx(program)); std::vector out_ctx(program->blocks.size(), wait_ctx(program)); - std::stack loop_header_indices; + std::stack> loop_header_indices; unsigned loop_progress = 0; for (unsigned i = 0; i < program->blocks.size();) { diff --git a/src/amd/compiler/aco_instruction_selection.cpp b/src/amd/compiler/aco_instruction_selection.cpp index bd81c50083c9c98a1a8f73fa26eb9d367bec43a0..d4fd9794fdc964d8236f347904188f4e32c4baf4 100644 --- a/src/amd/compiler/aco_instruction_selection.cpp +++ b/src/amd/compiler/aco_instruction_selection.cpp @@ -11811,7 +11811,7 @@ select_gs_copy_shader(Program* program, struct nir_shader* gs_shader, ac_shader_ Temp vtx_offset = bld.vop2(aco_opcode::v_lshlrev_b32, bld.def(v1), Operand::c32(2u), get_arg(&ctx, ctx.args->ac.vertex_id)); - std::stack if_contexts; + std::stack> if_contexts; for (unsigned stream = 0; stream < 4; stream++) { if (stream_id.isConstant() && stream != stream_id.constantValue()) diff --git a/src/amd/compiler/aco_spill.cpp b/src/amd/compiler/aco_spill.cpp index 205a687b7e4c266ce555547120071c25f5aae71f..96f3bb85061e51d9e47a0763c78d75928ba3fc3c 100644 --- a/src/amd/compiler/aco_spill.cpp +++ b/src/amd/compiler/aco_spill.cpp @@ -28,12 +28,26 @@ #include "common/sid.h" +#include +#include #include #include #include +#include #include #include +namespace std { +template <> struct hash { + size_t operator()(aco::Temp temp) const noexcept + { + uint32_t v; + std::memcpy(&v, &temp, sizeof(temp)); + return std::hash{}(v); + } +}; +} // namespace std + /* * Implements the spilling algorithm on SSA-form from * "Register Spilling and Live-Range Splitting for SSA-Form Programs" @@ -53,17 +67,19 @@ struct spill_ctx { Program* program; std::vector> register_demand; std::vector> renames; - std::vector> spills_entry; - std::vector> spills_exit; + std::vector> spills_entry; + std::vector> spills_exit; + std::vector processed; - std::stack loop_header; - std::vector>> next_use_distances_start; - std::vector>> next_use_distances_end; + std::stack> loop_header; + std::vector>> next_use_distances_start; + std::vector>> next_use_distances_end; + std::vector>> local_next_use_distance; /* Working buffer */ std::vector>> interferences; std::vector> affinities; std::vector is_reloaded; - std::map remat; - std::map remat_used; + std::unordered_map remat; + std::set unused_remats; unsigned wave_size; spill_ctx(const RegisterDemand target_pressure_, Program* program_, @@ -152,15 +168,17 @@ get_dominator(int idx_a, int idx_b, Program* program, bool is_linear) } void -next_uses_per_block(spill_ctx& ctx, unsigned block_idx, std::set& worklist) +next_uses_per_block(spill_ctx& ctx, unsigned block_idx, uint32_t& worklist) { Block* block = &ctx.program->blocks[block_idx]; - std::map> next_uses = ctx.next_use_distances_end[block_idx]; + ctx.next_use_distances_start[block_idx] = ctx.next_use_distances_end[block_idx]; + auto& next_use_distances_start = ctx.next_use_distances_start[block_idx]; /* to compute the next use distance at the beginning of the block, we have to add the block's * size */ - for (std::map>::iterator it = next_uses.begin(); - it != next_uses.end(); ++it) + for (std::unordered_map>::iterator it = + next_use_distances_start.begin(); + it != next_use_distances_start.end(); ++it) it->second.second = it->second.second + block->instructions.size(); int idx = block->instructions.size() - 1; @@ -172,7 +190,7 @@ next_uses_per_block(spill_ctx& ctx, unsigned block_idx, std::set& work for (const Definition& def : instr->definitions) { if (def.isTemp()) - next_uses.erase(def.getTemp()); + next_use_distances_start.erase(def.getTemp()); } for (const Operand& op : instr->operands) { @@ -182,59 +200,67 @@ next_uses_per_block(spill_ctx& ctx, unsigned block_idx, std::set& work if (op.regClass().type() == RegType::vgpr && op.regClass().is_linear()) continue; if (op.isTemp()) - next_uses[op.getTemp()] = {block_idx, idx}; + next_use_distances_start[op.getTemp()] = {block_idx, idx}; } idx--; } - assert(block_idx != 0 || next_uses.empty()); - ctx.next_use_distances_start[block_idx] = next_uses; + assert(block_idx != 0 || next_use_distances_start.empty()); + std::unordered_set phi_defs; while (idx >= 0) { aco_ptr& instr = block->instructions[idx]; assert(instr->opcode == aco_opcode::p_linear_phi || instr->opcode == aco_opcode::p_phi); std::pair distance{block_idx, 0}; - auto it = instr->definitions[0].isTemp() ? next_uses.find(instr->definitions[0].getTemp()) - : next_uses.end(); - if (it != next_uses.end()) { + auto it = instr->definitions[0].isTemp() ? next_use_distances_start.find(instr->definitions[0].getTemp()) + : next_use_distances_start.end(); + if (it != next_use_distances_start.end() && + phi_defs.insert(instr->definitions[0].getTemp()).second) { distance = it->second; - next_uses.erase(it); } for (unsigned i = 0; i < instr->operands.size(); i++) { unsigned pred_idx = instr->opcode == aco_opcode::p_phi ? block->logical_preds[i] : block->linear_preds[i]; if (instr->operands[i].isTemp()) { - if (ctx.next_use_distances_end[pred_idx].find(instr->operands[i].getTemp()) == - ctx.next_use_distances_end[pred_idx].end() || - ctx.next_use_distances_end[pred_idx][instr->operands[i].getTemp()] != distance) - worklist.insert(pred_idx); - ctx.next_use_distances_end[pred_idx][instr->operands[i].getTemp()] = distance; + auto insert_result = ctx.next_use_distances_end[pred_idx].insert( + std::make_pair(instr->operands[i].getTemp(), distance)); + const bool inserted = insert_result.second; + std::pair& entry_distance = insert_result.first->second; + if (inserted || entry_distance != distance) + worklist = std::max(worklist, pred_idx + 1); + entry_distance = distance; } } idx--; } /* all remaining live vars must be live-out at the predecessors */ - for (std::pair> pair : next_uses) { + for (std::pair>& pair : next_use_distances_start) { Temp temp = pair.first; + if (phi_defs.count(temp)) { + continue; + } uint32_t distance = pair.second.second; uint32_t dom = pair.second.first; std::vector& preds = temp.is_linear() ? block->linear_preds : block->logical_preds; for (unsigned pred_idx : preds) { if (ctx.program->blocks[pred_idx].loop_nest_depth > block->loop_nest_depth) distance += 0xFFFF; - if (ctx.next_use_distances_end[pred_idx].find(temp) != - ctx.next_use_distances_end[pred_idx].end()) { - dom = get_dominator(dom, ctx.next_use_distances_end[pred_idx][temp].first, ctx.program, - temp.is_linear()); - distance = std::min(ctx.next_use_distances_end[pred_idx][temp].second, distance); + auto insert_result = ctx.next_use_distances_end[pred_idx].insert( + std::make_pair(temp, std::pair{})); + const bool inserted = insert_result.second; + std::pair& entry_distance = insert_result.first->second; + + if (!inserted) { + dom = get_dominator(dom, entry_distance.first, ctx.program, temp.is_linear()); + distance = std::min(entry_distance.second, distance); + } + if (entry_distance != std::pair{dom, distance}) { + worklist = std::max(worklist, pred_idx + 1); + entry_distance = {dom, distance}; } - if (ctx.next_use_distances_end[pred_idx][temp] != - std::pair{dom, distance}) - worklist.insert(pred_idx); - ctx.next_use_distances_end[pred_idx][temp] = {dom, distance}; } } } @@ -244,14 +270,10 @@ compute_global_next_uses(spill_ctx& ctx) { ctx.next_use_distances_start.resize(ctx.program->blocks.size()); ctx.next_use_distances_end.resize(ctx.program->blocks.size()); - std::set worklist; - for (Block& block : ctx.program->blocks) - worklist.insert(block.index); - - while (!worklist.empty()) { - std::set::reverse_iterator b_it = worklist.rbegin(); - unsigned block_idx = *b_it; - worklist.erase(block_idx); + + uint32_t worklist = ctx.program->blocks.size(); + while (worklist) { + unsigned block_idx = --worklist; next_uses_per_block(ctx, block_idx, worklist); } } @@ -287,7 +309,7 @@ should_rematerialize(aco_ptr& instr) aco_ptr do_reload(spill_ctx& ctx, Temp tmp, Temp new_name, uint32_t spill_id) { - std::map::iterator remat = ctx.remat.find(tmp); + std::unordered_map::iterator remat = ctx.remat.find(tmp); if (remat != ctx.remat.end()) { Instruction* instr = remat->second.instr; assert((instr->isVOP1() || instr->isSOP1() || instr->isPseudo() || instr->isSOPK()) && @@ -317,7 +339,7 @@ do_reload(spill_ctx& ctx, Temp tmp, Temp new_name, uint32_t spill_id) if (instr->operands[i].isTemp()) { assert(false && "unsupported"); if (ctx.remat.count(instr->operands[i].getTemp())) - ctx.remat_used[ctx.remat[instr->operands[i].getTemp()].instr] = true; + ctx.unused_remats.erase(ctx.remat[instr->operands[i].getTemp()].instr); } } res->definitions[0] = Definition(new_name); @@ -346,7 +368,7 @@ get_rematerialize_info(spill_ctx& ctx) for (const Definition& def : instr->definitions) { if (def.isTemp()) { ctx.remat[def.getTemp()] = remat_info{instr.get()}; - ctx.remat_used[instr.get()] = false; + ctx.unused_remats.insert(instr.get()); } } } @@ -354,15 +376,22 @@ get_rematerialize_info(spill_ctx& ctx) } } -std::vector> -local_next_uses(spill_ctx& ctx, Block* block) +void +update_local_next_uses(spill_ctx& ctx, Block* block, + std::vector>>& local_next_uses) { - std::vector> local_next_uses(block->instructions.size()); + if (local_next_uses.size() < block->instructions.size()) { + /* Allocate more next-use-maps. Note that by never reducing the vector size, we enable + * future calls to this function to re-use already allocated map memory. */ + local_next_uses.resize(block->instructions.size()); + } - std::map next_uses; - for (std::pair> pair : - ctx.next_use_distances_end[block->index]) - next_uses[pair.first] = pair.second.second + block->instructions.size(); + local_next_uses[block->instructions.size() - 1].clear(); + for (std::pair>& pair : + ctx.next_use_distances_end[block->index]) { + local_next_uses[block->instructions.size() - 1].push_back(std::make_pair( + (Temp)pair.first, pair.second.second + block->instructions.size())); + } for (int idx = block->instructions.size() - 1; idx >= 0; idx--) { aco_ptr& instr = block->instructions[idx]; @@ -371,21 +400,35 @@ local_next_uses(spill_ctx& ctx, Block* block) if (instr->opcode == aco_opcode::p_phi || instr->opcode == aco_opcode::p_linear_phi) break; + if (idx != (int)block->instructions.size() - 1) { + local_next_uses[idx] = local_next_uses[idx + 1]; + } + for (const Operand& op : instr->operands) { if (op.isFixed() && op.physReg() == exec) continue; if (op.regClass().type() == RegType::vgpr && op.regClass().is_linear()) continue; - if (op.isTemp()) - next_uses[op.getTemp()] = idx; + if (op.isTemp()) { + auto it = std::find_if(local_next_uses[idx].begin(), local_next_uses[idx].end(), + [op](auto& pair) { return pair.first == op.getTemp(); }); + if (it == local_next_uses[idx].end()) { + local_next_uses[idx].push_back(std::make_pair(op.getTemp(), idx)); + } else { + it->second = idx; + } + } } for (const Definition& def : instr->definitions) { - if (def.isTemp()) - next_uses.erase(def.getTemp()); + if (def.isTemp()) { + auto it = std::find_if(local_next_uses[idx].begin(), local_next_uses[idx].end(), + [def](auto& pair) { return pair.first == def.getTemp(); }); + if (it != local_next_uses[idx].end()) { + local_next_uses[idx].erase(it); + } + } } - local_next_uses[idx] = next_uses; } - return local_next_uses; } RegisterDemand @@ -442,7 +485,7 @@ init_live_in_vars(spill_ctx& ctx, Block* block, unsigned block_idx) return {0, 0}; /* next use distances at the beginning of the current block */ - auto& next_use_distances = ctx.next_use_distances_start[block_idx]; + const auto& next_use_distances = ctx.next_use_distances_start[block_idx]; /* loop header block */ if (block->loop_nest_depth > ctx.program->blocks[block_idx - 1].loop_nest_depth) { @@ -489,12 +532,12 @@ init_live_in_vars(spill_ctx& ctx, Block* block, unsigned block_idx) unsigned distance = 0; Temp to_spill; - for (std::pair> pair : next_use_distances) { + for (const std::pair>& pair : + next_use_distances) { if (pair.first.type() == type && (pair.second.first >= loop_end || (ctx.remat.count(pair.first) && type == RegType::sgpr)) && - pair.second.second > distance && - ctx.spills_entry[block_idx].find(pair.first) == ctx.spills_entry[block_idx].end()) { + pair.second.second > distance && !ctx.spills_entry[block_idx].count(pair.first)) { to_spill = pair.first; distance = pair.second.second; } @@ -509,8 +552,7 @@ init_live_in_vars(spill_ctx& ctx, Block* block, unsigned block_idx) } uint32_t spill_id; - if (ctx.spills_exit[block_idx - 1].find(to_spill) == - ctx.spills_exit[block_idx - 1].end()) { + if (!ctx.spills_exit[block_idx - 1].count(to_spill)) { spill_id = ctx.allocate_spill_id(to_spill.regClass()); } else { spill_id = ctx.spills_exit[block_idx - 1][to_spill]; @@ -533,9 +575,10 @@ init_live_in_vars(spill_ctx& ctx, Block* block, unsigned block_idx) Temp to_spill; type = reg_pressure.vgpr > ctx.target_pressure.vgpr ? RegType::vgpr : RegType::sgpr; - for (std::pair> pair : next_use_distances) { + for (const std::pair>& pair : + next_use_distances) { if (pair.first.type() == type && pair.second.second > distance && - ctx.spills_entry[block_idx].find(pair.first) == ctx.spills_entry[block_idx].end()) { + !ctx.spills_entry[block_idx].count(pair.first)) { to_spill = pair.first; distance = pair.second.second; } @@ -554,9 +597,12 @@ init_live_in_vars(spill_ctx& ctx, Block* block, unsigned block_idx) /* keep variables spilled if they are alive and not used in the current block */ unsigned pred_idx = block->linear_preds[0]; for (std::pair pair : ctx.spills_exit[pred_idx]) { - if (pair.first.type() == RegType::sgpr && - next_use_distances.find(pair.first) != next_use_distances.end() && - next_use_distances[pair.first].first != block_idx) { + if (pair.first.type() != RegType::sgpr) { + continue; + } + auto next_use_distance_it = next_use_distances.find(pair.first); + if (next_use_distance_it != next_use_distances.end() && + next_use_distance_it->second.first != block_idx) { ctx.spills_entry[block_idx].insert(pair); spilled_registers.sgpr += pair.first.size(); } @@ -564,9 +610,12 @@ init_live_in_vars(spill_ctx& ctx, Block* block, unsigned block_idx) if (block->logical_preds.size() == 1) { pred_idx = block->logical_preds[0]; for (std::pair pair : ctx.spills_exit[pred_idx]) { - if (pair.first.type() == RegType::vgpr && - next_use_distances.find(pair.first) != next_use_distances.end() && - next_use_distances[pair.first].first != block_idx) { + if (pair.first.type() != RegType::vgpr) { + continue; + } + auto next_use_distance_it = next_use_distances.find(pair.first); + if (next_use_distance_it != next_use_distances.end() && + next_use_distance_it->second.first != block_idx) { ctx.spills_entry[block_idx].insert(pair); spilled_registers.vgpr += pair.first.size(); } @@ -578,8 +627,7 @@ init_live_in_vars(spill_ctx& ctx, Block* block, unsigned block_idx) if (block->register_demand.sgpr - spilled_registers.sgpr > ctx.target_pressure.sgpr) { pred_idx = block->linear_preds[0]; for (std::pair pair : ctx.spills_exit[pred_idx]) { - if (pair.first.type() == RegType::sgpr && - next_use_distances.find(pair.first) != next_use_distances.end() && + if (pair.first.type() == RegType::sgpr && next_use_distances.count(pair.first) && ctx.spills_entry[block_idx].insert(pair).second) { spilled_registers.sgpr += pair.first.size(); } @@ -589,8 +637,7 @@ init_live_in_vars(spill_ctx& ctx, Block* block, unsigned block_idx) block->logical_preds.size() == 1) { pred_idx = block->logical_preds[0]; for (std::pair pair : ctx.spills_exit[pred_idx]) { - if (pair.first.type() == RegType::vgpr && - next_use_distances.find(pair.first) != next_use_distances.end() && + if (pair.first.type() == RegType::vgpr && next_use_distances.count(pair.first) && ctx.spills_entry[block_idx].insert(pair).second) { spilled_registers.vgpr += pair.first.size(); } @@ -604,7 +651,7 @@ init_live_in_vars(spill_ctx& ctx, Block* block, unsigned block_idx) std::set partial_spills; /* keep variables spilled on all incoming paths */ - for (std::pair> pair : next_use_distances) { + for (const std::pair>& pair : next_use_distances) { std::vector& preds = pair.first.is_linear() ? block->linear_preds : block->logical_preds; /* If it can be rematerialized, keep the variable spilled if all predecessors do not reload @@ -618,12 +665,11 @@ init_live_in_vars(spill_ctx& ctx, Block* block, unsigned block_idx) uint32_t spill_id = 0; for (unsigned pred_idx : preds) { /* variable is not even live at the predecessor: probably from a phi */ - if (ctx.next_use_distances_end[pred_idx].find(pair.first) == - ctx.next_use_distances_end[pred_idx].end()) { + if (!ctx.next_use_distances_end[pred_idx].count(pair.first)) { spill = false; break; } - if (ctx.spills_exit[pred_idx].find(pair.first) == ctx.spills_exit[pred_idx].end()) { + if (!ctx.spills_exit[pred_idx].count(pair.first)) { if (!remat) spill = false; } else { @@ -660,8 +706,7 @@ init_live_in_vars(spill_ctx& ctx, Block* block, unsigned block_idx) continue; } - if (ctx.spills_exit[preds[i]].find(phi->operands[i].getTemp()) == - ctx.spills_exit[preds[i]].end()) + if (!ctx.spills_exit[preds[i]].count(phi->operands[i].getTemp())) spill = false; else partial_spills.insert(phi->definitions[0].getTemp()); @@ -686,10 +731,10 @@ init_live_in_vars(spill_ctx& ctx, Block* block, unsigned block_idx) RegType type = reg_pressure.vgpr > ctx.target_pressure.vgpr ? RegType::vgpr : RegType::sgpr; while (it != partial_spills.end()) { - assert(ctx.spills_entry[block_idx].find(*it) == ctx.spills_entry[block_idx].end()); + assert(!ctx.spills_entry[block_idx].count(*it)); - if (it->type() == type && next_use_distances[*it].second > distance) { - distance = next_use_distances[*it].second; + if (it->type() == type && next_use_distances.at(*it).second > distance) { + distance = next_use_distances.at(*it).second; to_spill = *it; } ++it; @@ -722,18 +767,19 @@ add_coupling_code(spill_ctx& ctx, Block* block, unsigned block_idx) unsigned insert_idx = 0; RegisterDemand demand_before = get_demand_before(ctx, block_idx, 0); - for (std::pair> live : + for (std::pair>& live : ctx.next_use_distances_start[block_idx]) { const unsigned pred_idx = block->linear_preds[0]; if (!live.first.is_linear()) continue; /* still spilled */ - if (ctx.spills_entry[block_idx].find(live.first) != ctx.spills_entry[block_idx].end()) + if (ctx.spills_entry[block_idx].count(live.first)) continue; /* in register at end of predecessor */ - if (ctx.spills_exit[pred_idx].find(live.first) == ctx.spills_exit[pred_idx].end()) { + auto spills_exit_it = ctx.spills_exit[pred_idx].find(live.first); + if (spills_exit_it == ctx.spills_exit[pred_idx].end()) { std::map::iterator it = ctx.renames[pred_idx].find(live.first); if (it != ctx.renames[pred_idx].end()) ctx.renames[block_idx].insert(*it); @@ -742,8 +788,7 @@ add_coupling_code(spill_ctx& ctx, Block* block, unsigned block_idx) /* variable is spilled at predecessor and live at current block: create reload instruction */ Temp new_name = ctx.program->allocateTmp(live.first.regClass()); - aco_ptr reload = - do_reload(ctx, live.first, new_name, ctx.spills_exit[pred_idx][live.first]); + aco_ptr reload = do_reload(ctx, live.first, new_name, spills_exit_it->second); instructions.emplace_back(std::move(reload)); reg_demand.push_back(demand_before); ctx.renames[block_idx][live.first] = new_name; @@ -758,16 +803,17 @@ add_coupling_code(spill_ctx& ctx, Block* block, unsigned block_idx) } while (instructions.back()->opcode != aco_opcode::p_logical_start); unsigned pred_idx = block->logical_preds[0]; - for (std::pair> live : + for (std::pair>& live : ctx.next_use_distances_start[block_idx]) { if (live.first.is_linear()) continue; /* still spilled */ - if (ctx.spills_entry[block_idx].find(live.first) != ctx.spills_entry[block_idx].end()) + if (ctx.spills_entry[block_idx].count(live.first)) continue; /* in register at end of predecessor */ - if (ctx.spills_exit[pred_idx].find(live.first) == ctx.spills_exit[pred_idx].end()) { + auto spills_exit_it = ctx.spills_exit[pred_idx].find(live.first); + if (spills_exit_it == ctx.spills_exit[pred_idx].end()) { std::map::iterator it = ctx.renames[pred_idx].find(live.first); if (it != ctx.renames[pred_idx].end()) ctx.renames[block_idx].insert(*it); @@ -778,7 +824,7 @@ add_coupling_code(spill_ctx& ctx, Block* block, unsigned block_idx) * create reload instruction */ Temp new_name = ctx.program->allocateTmp(live.first.regClass()); aco_ptr reload = - do_reload(ctx, live.first, new_name, ctx.spills_exit[pred_idx][live.first]); + do_reload(ctx, live.first, new_name, spills_exit_it->second); instructions.emplace_back(std::move(reload)); reg_demand.emplace_back(reg_demand.back()); ctx.renames[block_idx][live.first] = new_name; @@ -812,8 +858,7 @@ add_coupling_code(spill_ctx& ctx, Block* block, unsigned block_idx) /* if the phi is not spilled, add to instructions */ if (!phi->definitions[0].isTemp() || - ctx.spills_entry[block_idx].find(phi->definitions[0].getTemp()) == - ctx.spills_entry[block_idx].end()) { + !ctx.spills_entry[block_idx].count(phi->definitions[0].getTemp())) { instructions.emplace_back(std::move(phi)); continue; } @@ -836,10 +881,10 @@ add_coupling_code(spill_ctx& ctx, Block* block, unsigned block_idx) std::map::iterator rename_it = ctx.renames[pred_idx].find(var); /* prevent the definining instruction from being DCE'd if it could be rematerialized */ if (rename_it == ctx.renames[preds[i]].end() && ctx.remat.count(var)) - ctx.remat_used[ctx.remat[var].instr] = true; + ctx.unused_remats.erase(ctx.remat[var].instr); /* check if variable is already spilled at predecessor */ - std::map::iterator spilled = ctx.spills_exit[pred_idx].find(var); + auto spilled = ctx.spills_exit[pred_idx].find(var); if (spilled != ctx.spills_exit[pred_idx].end()) { if (spilled->second != def_spill_id) ctx.add_affinity(def_spill_id, spilled->second); @@ -889,7 +934,7 @@ add_coupling_code(spill_ctx& ctx, Block* block, unsigned block_idx) for (unsigned pred_idx : preds) { /* variable is already spilled at predecessor */ - std::map::iterator spilled = ctx.spills_exit[pred_idx].find(pair.first); + auto spilled = ctx.spills_exit[pred_idx].find(pair.first); if (spilled != ctx.spills_exit[pred_idx].end()) { if (spilled->second != pair.second) ctx.add_affinity(pair.second, spilled->second); @@ -897,8 +942,7 @@ add_coupling_code(spill_ctx& ctx, Block* block, unsigned block_idx) } /* variable is dead at predecessor, it must be from a phi: this works because of CSSA form */ - if (ctx.next_use_distances_end[pred_idx].find(pair.first) == - ctx.next_use_distances_end[pred_idx].end()) + if (!ctx.next_use_distances_end[pred_idx].count(pair.first)) continue; /* add interferences between spilled variable and predecessors exit spills */ @@ -938,8 +982,7 @@ add_coupling_code(spill_ctx& ctx, Block* block, unsigned block_idx) for (aco_ptr& phi : instructions) { assert(phi->opcode == aco_opcode::p_phi || phi->opcode == aco_opcode::p_linear_phi); assert(!phi->definitions[0].isTemp() || - ctx.spills_entry[block_idx].find(phi->definitions[0].getTemp()) == - ctx.spills_entry[block_idx].end()); + !ctx.spills_entry[block_idx].count(phi->definitions[0].getTemp())); std::vector& preds = phi->opcode == aco_opcode::p_phi ? block->logical_preds : block->linear_preds; @@ -949,15 +992,18 @@ add_coupling_code(spill_ctx& ctx, Block* block, unsigned block_idx) unsigned pred_idx = preds[i]; /* if the operand was reloaded, rename */ - if (ctx.spills_exit[pred_idx].find(phi->operands[i].getTemp()) == - ctx.spills_exit[pred_idx].end()) { + if (!ctx.spills_exit[pred_idx].count(phi->operands[i].getTemp())) { std::map::iterator it = ctx.renames[pred_idx].find(phi->operands[i].getTemp()); - if (it != ctx.renames[pred_idx].end()) + if (it != ctx.renames[pred_idx].end()) { phi->operands[i].setTemp(it->second); /* prevent the definining instruction from being DCE'd if it could be rematerialized */ - else if (ctx.remat.count(phi->operands[i].getTemp())) - ctx.remat_used[ctx.remat[phi->operands[i].getTemp()].instr] = true; + } else { + auto remat_it = ctx.remat.find(phi->operands[i].getTemp()); + if (remat_it != ctx.remat.end()) { + ctx.unused_remats.erase(remat_it->second.instr); + } + } continue; } @@ -993,10 +1039,10 @@ add_coupling_code(spill_ctx& ctx, Block* block, unsigned block_idx) /* iterate live variables for which to reload */ // TODO: reload at current block if variable is spilled on all predecessors - for (std::pair> pair : + for (std::pair>& pair : ctx.next_use_distances_start[block_idx]) { /* skip spilled variables */ - if (ctx.spills_entry[block_idx].find(pair.first) != ctx.spills_entry[block_idx].end()) + if (ctx.spills_entry[block_idx].count(pair.first)) continue; std::vector preds = pair.first.is_linear() ? block->linear_preds : block->logical_preds; @@ -1004,15 +1050,14 @@ add_coupling_code(spill_ctx& ctx, Block* block, unsigned block_idx) /* variable is dead at predecessor, it must be from a phi */ bool is_dead = false; for (unsigned pred_idx : preds) { - if (ctx.next_use_distances_end[pred_idx].find(pair.first) == - ctx.next_use_distances_end[pred_idx].end()) + if (!ctx.next_use_distances_end[pred_idx].count(pair.first)) is_dead = true; } if (is_dead) continue; for (unsigned pred_idx : preds) { /* the variable is not spilled at the predecessor */ - if (ctx.spills_exit[pred_idx].find(pair.first) == ctx.spills_exit[pred_idx].end()) + if (!ctx.spills_exit[pred_idx].count(pair.first)) continue; /* variable is spilled at predecessor and has to be reloaded */ @@ -1038,7 +1083,7 @@ add_coupling_code(spill_ctx& ctx, Block* block, unsigned block_idx) Temp rename = Temp(); bool is_same = true; for (unsigned pred_idx : preds) { - if (ctx.renames[pred_idx].find(pair.first) == ctx.renames[pred_idx].end()) { + if (!ctx.renames[pred_idx].count(pair.first)) { if (rename == Temp()) rename = pair.first; else @@ -1062,7 +1107,7 @@ add_coupling_code(spill_ctx& ctx, Block* block, unsigned block_idx) rename = ctx.program->allocateTmp(pair.first.regClass()); for (unsigned i = 0; i < phi->operands.size(); i++) { Temp tmp; - if (ctx.renames[preds[i]].find(pair.first) != ctx.renames[preds[i]].end()) { + if (ctx.renames[preds[i]].count(pair.first)) { tmp = ctx.renames[preds[i]][pair.first]; } else if (preds[i] >= block_idx) { tmp = rename; @@ -1070,7 +1115,7 @@ add_coupling_code(spill_ctx& ctx, Block* block, unsigned block_idx) tmp = pair.first; /* prevent the definining instruction from being DCE'd if it could be rematerialized */ if (ctx.remat.count(tmp)) - ctx.remat_used[ctx.remat[tmp].instr] = true; + ctx.unused_remats.erase(ctx.remat[tmp].instr); } phi->operands[i] = Operand(tmp); } @@ -1106,12 +1151,10 @@ add_coupling_code(spill_ctx& ctx, Block* block, unsigned block_idx) } void -process_block(spill_ctx& ctx, unsigned block_idx, Block* block, - std::map& current_spills, RegisterDemand spilled_registers) +process_block(spill_ctx& ctx, unsigned block_idx, Block* block, RegisterDemand spilled_registers) { assert(!ctx.processed[block_idx]); - std::vector> local_next_use_distance; std::vector> instructions; unsigned idx = 0; @@ -1121,25 +1164,35 @@ process_block(spill_ctx& ctx, unsigned block_idx, Block* block, instructions.emplace_back(std::move(block->instructions[idx++])); } - if (block->register_demand.exceeds(ctx.target_pressure)) - local_next_use_distance = local_next_uses(ctx, block); + if (block->register_demand.exceeds(ctx.target_pressure)) { + update_local_next_uses(ctx, block, ctx.local_next_use_distance); + } else { + /* We won't use local_next_use_distance, so no initialization needed */ + } + + auto& current_spills = ctx.spills_exit[block_idx]; while (idx < block->instructions.size()) { aco_ptr& instr = block->instructions[idx]; std::map> reloads; - std::map spills; + /* rename and reload operands */ for (Operand& op : instr->operands) { if (!op.isTemp()) continue; - if (current_spills.find(op.getTemp()) == current_spills.end()) { + if (!current_spills.count(op.getTemp())) { /* the Operand is in register: check if it was renamed */ - if (ctx.renames[block_idx].find(op.getTemp()) != ctx.renames[block_idx].end()) - op.setTemp(ctx.renames[block_idx][op.getTemp()]); - /* prevent it's definining instruction from being DCE'd if it could be rematerialized */ - else if (ctx.remat.count(op.getTemp())) - ctx.remat_used[ctx.remat[op.getTemp()].instr] = true; + auto rename_it = ctx.renames[block_idx].find(op.getTemp()); + if (rename_it != ctx.renames[block_idx].end()) { + op.setTemp(rename_it->second); + } else { + /* prevent its definining instruction from being DCE'd if it could be rematerialized */ + auto remat_it = ctx.remat.find(op.getTemp()); + if (remat_it != ctx.remat.end()) { + ctx.unused_remats.erase(remat_it->second.instr); + } + } continue; } /* the Operand is spilled: add it to reloads */ @@ -1157,7 +1210,7 @@ process_block(spill_ctx& ctx, unsigned block_idx, Block* block, RegisterDemand new_demand = ctx.register_demand[block_idx][idx]; new_demand.update(get_demand_before(ctx, block_idx, idx)); - assert(!local_next_use_distance.empty()); + assert(!ctx.local_next_use_distance.empty()); /* if reg pressure is too high, spill variable with furthest next use */ while ((new_demand - spilled_registers).exceeds(ctx.target_pressure)) { @@ -1168,15 +1221,13 @@ process_block(spill_ctx& ctx, unsigned block_idx, Block* block, if (new_demand.vgpr - spilled_registers.vgpr > ctx.target_pressure.vgpr) type = RegType::vgpr; - for (std::pair pair : local_next_use_distance[idx]) { + for (std::pair pair : ctx.local_next_use_distance[idx]) { if (pair.first.type() != type) continue; bool can_rematerialize = ctx.remat.count(pair.first); if (((pair.second > distance && can_rematerialize == do_rematerialize) || (can_rematerialize && !do_rematerialize && pair.second > idx)) && - current_spills.find(pair.first) == current_spills.end() && - ctx.spills_exit[block_idx].find(pair.first) == - ctx.spills_exit[block_idx].end()) { + !current_spills.count(pair.first)) { to_spill = pair.first; distance = pair.second; do_rematerialize = can_rematerialize; @@ -1189,14 +1240,14 @@ process_block(spill_ctx& ctx, unsigned block_idx, Block* block, /* add interferences with currently spilled variables */ for (std::pair pair : current_spills) ctx.add_interference(spill_id, pair.second); - for (std::pair> pair : reloads) + for (std::pair>& pair : reloads) ctx.add_interference(spill_id, pair.second.second); current_spills[to_spill] = spill_id; spilled_registers += to_spill; /* rename if necessary */ - if (ctx.renames[block_idx].find(to_spill) != ctx.renames[block_idx].end()) { + if (ctx.renames[block_idx].count(to_spill)) { to_spill = ctx.renames[block_idx][to_spill]; } @@ -1210,7 +1261,7 @@ process_block(spill_ctx& ctx, unsigned block_idx, Block* block, } /* add reloads and instruction to new instructions */ - for (std::pair> pair : reloads) { + for (std::pair>& pair : reloads) { aco_ptr reload = do_reload(ctx, pair.second.first, pair.first, pair.second.second); instructions.emplace_back(std::move(reload)); @@ -1220,7 +1271,6 @@ process_block(spill_ctx& ctx, unsigned block_idx, Block* block, } block->instructions = std::move(instructions); - ctx.spills_exit[block_idx].insert(current_spills.begin(), current_spills.end()); } void @@ -1244,21 +1294,22 @@ spill_block(spill_ctx& ctx, unsigned block_idx) add_coupling_code(ctx, block, block_idx); } - std::map current_spills = ctx.spills_entry[block_idx]; + const auto& current_spills = ctx.spills_entry[block_idx]; /* check conditions to process this block */ bool process = (block->register_demand - spilled_registers).exceeds(ctx.target_pressure) || - !ctx.renames[block_idx].empty() || ctx.remat_used.size(); + !ctx.renames[block_idx].empty() || ctx.unused_remats.size(); for (auto it = current_spills.begin(); !process && it != current_spills.end(); ++it) { - if (ctx.next_use_distances_start[block_idx][it->first].first == block_idx) + if (ctx.next_use_distances_start[block_idx].at(it->first).first == block_idx) process = true; } - if (process) - process_block(ctx, block_idx, block, current_spills, spilled_registers); - else - ctx.spills_exit[block_idx].insert(current_spills.begin(), current_spills.end()); + assert(ctx.spills_exit[block_idx].empty()); + ctx.spills_exit[block_idx] = current_spills; + if (process) { + process_block(ctx, block_idx, block, spilled_registers); + } ctx.processed[block_idx] = true; @@ -1754,7 +1805,7 @@ assign_spill_slots(spill_ctx& ctx, unsigned spills_to_vgpr) reload->definitions[0] = (*it)->definitions[0]; instructions.emplace_back(aco_ptr(reload)); } - } else if (!ctx.remat_used.count(it->get()) || ctx.remat_used[it->get()]) { + } else if (!ctx.unused_remats.count(it->get())) { instructions.emplace_back(std::move(*it)); } }