Commit 79aea322 authored by Daniel Schürmann's avatar Daniel Schürmann
Browse files

aco/optimizer: combine extract into subdword SDWA instructions

This fixes an issue, when an SDWA instruction already selected
a variable only partially.

Totals from 194 (0.14% of 134913) affected shaders: (GFX10.3)
SpillVGPRs: 944 -> 946 (+0.21%); split: -1.17%, +1.38%
CodeSize: 997680 -> 991232 (-0.65%); split: -0.78%, +0.13%
Instrs: 169978 -> 169141 (-0.49%); split: -0.59%, +0.09%
Latency: 2932016 -> 2908588 (-0.80%)
InvThroughput: 1335218 -> 1323196 (-0.90%)
VClause: 3881 -> 3872 (-0.23%); split: -0.36%, +0.13%
Copies: 31058 -> 30211 (-2.73%); split: -3.08%, +0.36%
PreVGPRs: 4724 -> 4587 (-2.90%)
parent 2bad5d10
Pipeline #617048 waiting for manual action with stages
......@@ -1008,6 +1008,29 @@ parse_insert(Instruction* instr)
}
}
SubdwordSel
match_subdword_selection(SubdwordSel inner, SubdwordSel outer)
{
/* the offset of outer must be within extracted range of inner */
if (outer.offset() >= inner.size())
return SubdwordSel();
/* don't remove the sign-extension when increasing the size further */
bool sign_extend = false;
if (outer.size() == 4)
sign_extend = inner.sign_extend();
else if (outer.size() <= inner.size())
sign_extend = outer.sign_extend();
else if (outer.sign_extend())
sign_extend = inner.sign_extend();
else if (inner.sign_extend())
return SubdwordSel();
unsigned size = std::min(inner.size(), outer.size());
unsigned offset = inner.offset() + outer.offset();
return SubdwordSel(size, offset, sign_extend);
}
bool
can_apply_extract(opt_ctx& ctx, aco_ptr<Instruction>& instr, unsigned idx, ssa_info& info)
{
......@@ -1025,8 +1048,10 @@ can_apply_extract(opt_ctx& ctx, aco_ptr<Instruction>& instr, unsigned idx, ssa_i
return true;
} else if (can_use_SDWA(ctx.program->gfx_level, instr, true) &&
(tmp.type() == RegType::vgpr || ctx.program->gfx_level >= GFX9)) {
if (instr->isSDWA() && instr->sdwa().sel[idx] != SubdwordSel::dword)
return false;
if (instr->isSDWA()) {
if (!match_subdword_selection(sel, instr->sdwa().sel[idx]))
return false;
}
return true;
} else if (instr->isVOP3() && sel.size() == 2 &&
can_use_opsel(ctx.program->gfx_level, instr->opcode, idx) &&
......@@ -1034,15 +1059,8 @@ can_apply_extract(opt_ctx& ctx, aco_ptr<Instruction>& instr, unsigned idx, ssa_i
return true;
} else if (instr->opcode == aco_opcode::p_extract) {
SubdwordSel instrSel = parse_extract(instr.get());
/* the outer offset must be within extracted range */
if (instrSel.offset() >= sel.size())
return false;
/* don't remove the sign-extension when increasing the size further */
if (instrSel.size() > sel.size() && !instrSel.sign_extend() && sel.sign_extend())
if (!match_subdword_selection(sel, instrSel))
return false;
return true;
}
......@@ -1082,21 +1100,16 @@ apply_extract(opt_ctx& ctx, aco_ptr<Instruction>& instr, unsigned idx, ssa_info&
} else if (can_use_SDWA(ctx.program->gfx_level, instr, true) &&
(tmp.type() == RegType::vgpr || ctx.program->gfx_level >= GFX9)) {
to_SDWA(ctx, instr);
sel = match_subdword_selection(sel, instr->sdwa().sel[idx]);
static_cast<SDWA_instruction*>(instr.get())->sel[idx] = sel;
} else if (instr->isVOP3()) {
if (sel.offset())
instr->vop3().opsel |= 1 << idx;
} else if (instr->opcode == aco_opcode::p_extract) {
SubdwordSel instrSel = parse_extract(instr.get());
unsigned size = std::min(sel.size(), instrSel.size());
unsigned offset = sel.offset() + instrSel.offset();
unsigned sign_extend =
instrSel.sign_extend() && (sel.sign_extend() || instrSel.size() <= sel.size());
instr->operands[1] = Operand::c32(offset / size);
instr->operands[2] = Operand::c32(size * 8u);
instr->operands[3] = Operand::c32(sign_extend);
sel = match_subdword_selection(sel, parse_extract(instr.get()));
instr->operands[1] = Operand::c32(sel.offset() / sel.size());
instr->operands[2] = Operand::c32(sel.size() * 8u);
instr->operands[3] = Operand::c32(sel.sign_extend());
return;
}
......
Supports Markdown
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment