Commit 173871df authored by Rob Clark's avatar Rob Clark

freedreno/ir3: lower immeds to const

Helps reduce register pressure and instruction counts for immediates
that would otherwise require a mov into gpr.

total instructions in shared programs:          4455332 -> 4369297 (-1.93%)
total dwords in shared programs:                8807872 -> 8614432 (-2.20%)
total full registers used in shared programs:   263062 -> 250846 (-4.64%)
total half registers used in shader programs:   9845 -> 9845 (0.00%)
total const registers used in shared programs:  1029735 -> 1466993 (42.46%)

                 half       full      const      instr     dwords
    helped           0       10415           0       17861        5912
      hurt           0        1157       21458         947          33
Signed-off-by: default avatarRob Clark <robclark@freedesktop.org>
parent b15c7fc2
......@@ -659,8 +659,11 @@ fd3_emit_state(struct fd_context *ctx, struct fd_ringbuffer *ring,
ir3_emit_consts(vp, ring, ctx, emit->info, dirty);
if (!emit->key.binning_pass)
ir3_emit_consts(fp, ring, ctx, emit->info, dirty);
/* mark clean after emitting consts: */
ctx->prog.dirty = 0;
/* mark clean after emitting consts.. a bit ugly, but since binning
* pass is emitted first, we want to do this only for main draw:
*/
if (!emit->key.binning_pass)
ctx->prog.dirty = 0;
}
if (dirty & (FD_DIRTY_BLEND | FD_DIRTY_FRAMEBUFFER)) {
......
......@@ -648,8 +648,11 @@ fd4_emit_state(struct fd_context *ctx, struct fd_ringbuffer *ring,
ir3_emit_consts(vp, ring, ctx, emit->info, dirty);
if (!emit->key.binning_pass)
ir3_emit_consts(fp, ring, ctx, emit->info, dirty);
/* mark clean after emitting consts: */
ctx->prog.dirty = 0;
/* mark clean after emitting consts.. a bit ugly, but since binning
* pass is emitted first, we want to do this only for main draw:
*/
if (!emit->key.binning_pass)
ctx->prog.dirty = 0;
}
if ((dirty & FD_DIRTY_BLEND)) {
......
......@@ -29,13 +29,16 @@
#include "freedreno_util.h"
#include "ir3.h"
#include "ir3_shader.h"
/*
* Copy Propagate:
*/
struct ir3_cp_ctx {
struct ir3 *shader;
struct ir3_shader_variant *so;
unsigned immediate_idx;
};
/* is it a type preserving mov, with ok flags? */
......@@ -233,6 +236,62 @@ static void combine_flags(unsigned *dstflags, struct ir3_instruction *src)
*dstflags &= ~IR3_REG_SABS;
}
static struct ir3_register *
lower_immed(struct ir3_cp_ctx *ctx, struct ir3_register *reg, unsigned new_flags)
{
unsigned swiz, idx, i;
reg = ir3_reg_clone(ctx->shader, reg);
/* in some cases, there are restrictions on (abs)/(neg) plus const..
* so just evaluate those and clear the flags:
*/
if (new_flags & IR3_REG_SABS) {
reg->iim_val = abs(reg->iim_val);
new_flags &= ~IR3_REG_SABS;
}
if (new_flags & IR3_REG_FABS) {
reg->fim_val = fabs(reg->fim_val);
new_flags &= ~IR3_REG_FABS;
}
if (new_flags & IR3_REG_SNEG) {
reg->iim_val = -reg->iim_val;
new_flags &= ~IR3_REG_SNEG;
}
if (new_flags & IR3_REG_FNEG) {
reg->fim_val = -reg->fim_val;
new_flags &= ~IR3_REG_FNEG;
}
for (i = 0; i < ctx->immediate_idx; i++) {
swiz = i % 4;
idx = i / 4;
if (ctx->so->immediates[idx].val[swiz] == reg->uim_val) {
break;
}
}
if (i == ctx->immediate_idx) {
/* need to generate a new immediate: */
swiz = i % 4;
idx = i / 4;
ctx->so->immediates[idx].val[swiz] = reg->uim_val;
ctx->so->immediates_count = idx + 1;
ctx->immediate_idx++;
}
new_flags &= ~IR3_REG_IMMED;
new_flags |= IR3_REG_CONST;
reg->flags = new_flags;
reg->num = i + (4 * ctx->so->first_immediate);
return reg;
}
/**
* Handle cp for a given src register. This additionally handles
* the cases of collapsing immedate/const (which replace the src
......@@ -281,6 +340,13 @@ reg_cp(struct ir3_cp_ctx *ctx, struct ir3_instruction *instr,
combine_flags(&new_flags, src);
if (!valid_flags(instr, n, new_flags)) {
/* See if lowering an immediate to const would help. */
if (valid_flags(instr, n, (new_flags & ~IR3_REG_IMMED) | IR3_REG_CONST)) {
debug_assert(new_flags & IR3_REG_IMMED);
instr->regs[n + 1] = lower_immed(ctx, src_reg, new_flags);
return;
}
/* special case for "normal" mad instructions, we can
* try swapping the first two args if that fits better.
*
......@@ -378,6 +444,9 @@ reg_cp(struct ir3_cp_ctx *ctx, struct ir3_instruction *instr,
src_reg->flags = new_flags;
src_reg->iim_val = iim_val;
instr->regs[n+1] = src_reg;
} else if (valid_flags(instr, n, (new_flags & ~IR3_REG_IMMED) | IR3_REG_CONST)) {
/* See if lowering an immediate to const would help. */
instr->regs[n+1] = lower_immed(ctx, src_reg, new_flags);
}
return;
......@@ -484,6 +553,7 @@ void
ir3_cp(struct ir3 *ir, struct ir3_shader_variant *so)
{
struct ir3_cp_ctx ctx = {
.shader = ir,
.so = so,
};
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment