Commit a9f12474 authored by Rhys Perry's avatar Rhys Perry
Browse files

aco: CSE iadd and uadd_carry



fossil-db (navi21):
Totals from 5 (0.00% of 135636) affected shaders:
Instrs: 7851 -> 7783 (-0.87%)
CodeSize: 43828 -> 43560 (-0.61%); split: -0.62%, +0.01%
Latency: 382660 -> 382416 (-0.06%); split: -0.07%, +0.00%
InvThroughput: 238335 -> 238173 (-0.07%); split: -0.07%, +0.00%
VClause: 271 -> 269 (-0.74%)

fossil-db (vega10):
Totals from 1 (0.00% of 134698) affected shaders:
CodeSize: 7900 -> 7904 (+0.05%)
Copies: 157 -> 156 (-0.64%)

Signed-off-by: Rhys Perry's avatarRhys Perry <pendingchaos02@gmail.com>
Closes: mesa/mesa#5545
parent c16feac5
Pipeline #675630 waiting for manual action with stages
......@@ -433,6 +433,9 @@ public:
if (!post_ra && (!b.op.hasRegClass() || b.op.regClass().type() == RegType::sgpr))
b = copy(def(v1), b);
/* create a carry-out definition to CSE with uadd_carry */
carry_out |= !post_ra;
if (!carry_in.op.isUndefined())
return vop2(aco_opcode::v_addc_co_u32, Definition(dst), def(lm), a, b, carry_in);
else if (program->gfx_level >= GFX10 && carry_out)
......
......@@ -2048,8 +2048,11 @@ label_instruction(opt_ctx& ctx, aco_ptr<Instruction>& instr)
}
/* Don't remove label_extract if we can't apply the extract to
* neg/abs instructions because we'll likely combine it into another valu. */
if (!(ctx.info[instr->definitions[0].tempId()].label & (label_neg | label_abs)))
* neg/abs instructions because we'll likely combine it into another valu. VOP3 additions
* might be turned into VOP2.
*/
if (!(ctx.info[instr->definitions[0].tempId()].label &
(label_neg | label_abs | (ctx.program->gfx_level >= GFX9 ? label_add_sub : 0))))
check_sdwa_extract(ctx, instr);
}
......@@ -3796,6 +3799,26 @@ combine_instruction(opt_ctx& ctx, aco_ptr<Instruction>& instr)
return;
if (instr->isVALU()) {
/* shrink addition instructions */
if (ctx.program->gfx_level >= GFX9 &&
(instr->opcode == aco_opcode::v_add_co_u32_e64 ||
instr->opcode == aco_opcode::v_add_co_u32) &&
ctx.uses[instr->definitions[1].tempId()] == 0) {
if (instr->opcode == aco_opcode::v_add_co_u32_e64)
instr->format = asVOP3(Format::VOP2);
instr->opcode = aco_opcode::v_add_u32;
instr->definitions.pop_back();
if (instr->usesModifiers()) {
/* leave the format alone */
} else if (instr->operands[1].isOfType(RegType::vgpr)) {
instr->format = Format::VOP2;
} else if (instr->operands[0].isOfType(RegType::vgpr)) {
std::swap(instr->operands[0], instr->operands[1]);
instr->format = Format::VOP2;
}
}
/* Apply SDWA. Do this after label_instruction() so it can remove
* label_extract if not all instructions can take SDWA. */
for (unsigned i = 0; i < instr->operands.size(); i++) {
......
Supports Markdown
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment