Commit fef919e0 authored by Daniel Schürmann's avatar Daniel Schürmann
Browse files

aco: use VSKIP instead of branches in case of few instructions

This patch relaxes the conditions under which
we remove branch instructions. Additionally, it
introduces the use of VSKIP for GCN.

Removing the branches now happens especially
in these two situations:
- divergent breaks: the outer branch instruction can be removed
- optimized atomics: the branch condition is always true for one lane

Totals from 20846 (14.96% of 139391) affected shaders:
CodeSize: 156790560 -> 156294524 (-0.32%)
Instrs: 29870459 -> 29748936 (-0.41%)
Cycles: 1641405628 -> 1611934772 (-1.80%); split: -1.80%, +0.00%
VMEM: 4538623 -> 4536635 (-0.04%)
SMEM: 1318984 -> 1318787 (-0.01%)
Branches: 830286 -> 708791 (-14.63%)
parent fbf6511e
Pipeline #338743 waiting for manual action with stages
......@@ -519,7 +519,7 @@ formats = [("pseudo", [Format.PSEUDO], 'Pseudo_instruction', list(itertools.prod
("sop2", [Format.SOP2], 'SOP2_instruction', itertools.product([1, 2], [2, 3])),
("sopk", [Format.SOPK], 'SOPK_instruction', itertools.product([0, 1, 2], [0, 1])),
("sopp", [Format.SOPP], 'SOPP_instruction', itertools.product([0, 1], [0, 1])),
("sopc", [Format.SOPC], 'SOPC_instruction', [(1, 2)]),
("sopc", [Format.SOPC], 'SOPC_instruction', [(1, 2), (0, 2)]),
("smem", [Format.SMEM], 'SMEM_instruction', [(0, 4), (0, 3), (1, 0), (1, 3), (1, 2), (0, 0)]),
("ds", [Format.DS], 'DS_instruction', [(1, 1), (1, 2), (0, 3), (0, 4)]),
("mubuf", [Format.MUBUF], 'MUBUF_instruction', [(0, 4), (1, 3)]),
......
......@@ -2096,46 +2096,110 @@ void lower_to_hw_instr(Program* program)
}
} else if (instr->isBranch()) {
Pseudo_branch_instruction* branch = &instr->branch();
uint32_t target = branch->target[0];
/* check if all blocks from current to target are empty */
/* In case there are <= 4 SALU or <= 2 VALU instructions, remove the branch */
bool can_remove = block->index < target;
const uint32_t target = branch->target[0];
const bool divergent_branch = branch->opcode == aco_opcode::p_cbranch_z &&
branch->operands[0].physReg() == exec;
/* Check if all blocks from current to target are empty
* In case there are <= 4 SALU or <= 2 VALU instructions, remove the branch
* In case there are <= 8 VALU/VMEM instructions, use vskip */
bool use_vskip = block->index < target;
unsigned num_scalar = 0;
unsigned num_vector = 0;
for (unsigned i = block->index + 1; can_remove && i < branch->target[0]; i++) {
/* uniform branches must not be ignored if they
* are about to jump over actual instructions */
if (!program->blocks[i].instructions.empty() &&
(branch->opcode != aco_opcode::p_cbranch_z ||
branch->operands[0].physReg() != exec)) {
can_remove = false;
break;
}
unsigned num_memory = 0;
bool has_sopp = false;
bool has_barrier = false;
/* check the instructions between branch and target */
for (unsigned i = block->index + 1; use_vskip && i < branch->target[0]; i++) {
for (aco_ptr<Instruction>& inst : program->blocks[i].instructions) {
if (inst->isSOPP()) {
can_remove = false;
if (!divergent_branch) {
/* uniform conditional branches must not be ignored if they
* are about to jump over actual instructions */
use_vskip = false;
} else if (inst->isSOPP()) {
/* we allow at most one branch, and only if we remove
* the current branch and don't rely on setting vskip*/
if (has_sopp ||
inst->opcode == aco_opcode::s_endpgm ||
inst->opcode == aco_opcode::s_sendmsg ||
inst->opcode == aco_opcode::s_sendmsghalt ||
inst->opcode == aco_opcode::s_trap)
use_vskip = false;
has_sopp = true;
} else if (inst->isSALU()) {
num_scalar++;
} else if (inst->isVALU()) {
} else if (inst->isVALU() || inst->isVINTRP()) {
num_vector++;
/* VALU which writes SGPRs are always executed on GFX10+ */
if (ctx.program->chip_class >= GFX10) {
for (Definition& def : inst->definitions) {
if (def.regClass().type() == RegType::sgpr)
num_vector += 3;
}
}
} else if (inst->isVMEM() || inst->isFlatLike()) {
num_memory++;
} else if (inst->isDS()) {
num_memory++;
if (inst->ds().gds)
use_vskip = false;
} else if (inst->isEXP()) {
num_memory++;
/* Exports with empty exec mask cause hangs on GFX10 */
if (ctx.program->chip_class >= GFX10)
use_vskip = false;
} else if (inst->isSMEM()) {
/* SMEM are at least as expensive as branches */
use_vskip = false;
} else if (inst->isBarrier()) {
/* Assume that at least one lane will be active */
has_barrier = true;
} else {
can_remove = false;
use_vskip = false;
assert(false && "Pseudo instructions should be lowered by this point.");
}
if (num_scalar + num_vector * 2 > 4)
can_remove = false;
/* under these conditions, we shouldn't remove the branch */
if (has_barrier) {
/* In case of a barrier, always try to use vskip */
num_scalar = num_vector = num_memory = 0;
} else if (ctx.program->chip_class >= GFX10) {
/* GFX10 cannot entirely skip VMEM instructions. */
if (num_scalar * 2 + num_vector + num_memory * 4 > 8)
use_vskip = false;
} else {
if (num_scalar * 2 + num_vector + num_memory > 8 ||
(has_sopp && (num_vector > 2 || num_memory > 0)))
use_vskip = false;
}
if (!can_remove)
if (!use_vskip)
break;
}
}
if (can_remove)
continue;
if (use_vskip) {
/* GFX10+ automatically skips vector instructions if exec == 0 */
if (ctx.program->chip_class >= GFX10)
continue;
switch (instr->opcode) {
/* remove the branch entirely if executing
* the instructions is cheaper than skipping them */
if ((num_vector <= 2 && num_memory == 0) ||
(num_vector == 0 && num_memory <= 1))
continue;
/* enable vskip */
bld.sopc(aco_opcode::s_setvskip, Operand(execz, s1), Operand(0u));
/* disable vskip before target */
bld.reset(&program->blocks[target - 1]);
bld.sopc(aco_opcode::s_setvskip, Operand(0u), Operand(0u));
} else {
/* emit branch instruction */
switch (instr->opcode) {
case aco_opcode::p_branch:
assert(block->linear_succs[0] == target);
bld.sopp(aco_opcode::s_branch, branch->definitions[0], target);
......@@ -2164,6 +2228,7 @@ void lower_to_hw_instr(Program* program)
break;
default:
unreachable("Unknown Pseudo branch instruction!");
}
}
} else if (instr->isReduction()) {
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment