Commit 23e7a344 authored by Rob Clark's avatar Rob Clark 💬

freedreno/ir3: consolidate const state

Combine the offsets of differenet parts of the constant space with (what
was formerly known as) ir3_driver_const_layout.  Bunch of churn, but no
functional change.
Signed-off-by: Rob Clark's avatarRob Clark <robdclark@chromium.org>
parent ef3eecd6
......@@ -217,10 +217,11 @@ get_image_offset(struct ir3_context *ctx, const nir_variable *var,
/* to calculate the byte offset (yes, uggg) we need (up to) three
* const values to know the bytes per pixel, and y and z stride:
*/
unsigned cb = regid(ctx->so->constbase.image_dims, 0) +
ctx->so->const_layout.image_dims.off[var->data.driver_location];
struct ir3_const_state *const_state = &ctx->so->const_state;
unsigned cb = regid(const_state->offsets.image_dims, 0) +
const_state->image_dims.off[var->data.driver_location];
debug_assert(ctx->so->const_layout.image_dims.mask &
debug_assert(const_state->image_dims.mask &
(1 << var->data.driver_location));
/* offset = coords.x * bytes_per_pixel: */
......
......@@ -107,7 +107,8 @@ create_driver_param(struct ir3_context *ctx, enum ir3_driver_param dp)
{
/* first four vec4 sysval's reserved for UBOs: */
/* NOTE: dp is in scalar, but there can be >4 dp components: */
unsigned n = ctx->so->constbase.driver_param;
struct ir3_const_state *const_state = &ctx->so->const_state;
unsigned n = const_state->offsets.driver_param;
unsigned r = regid(n + dp / 4, dp % 4);
return create_uniform(ctx->block, r);
}
......@@ -683,7 +684,8 @@ emit_intrinsic_load_ubo(struct ir3_context *ctx, nir_intrinsic_instr *intr,
/* UBO addresses are the first driver params, but subtract 2 here to
* account for nir_lower_uniforms_to_ubo rebasing the UBOs such that UBO 0
* is the uniforms: */
unsigned ubo = regid(ctx->so->constbase.ubo, 0) - 2;
struct ir3_const_state *const_state = &ctx->so->const_state;
unsigned ubo = regid(const_state->offsets.ubo, 0) - 2;
const unsigned ptrsz = ir3_pointer_size(ctx->compiler);
int off = 0;
......@@ -751,11 +753,12 @@ emit_intrinsic_ssbo_size(struct ir3_context *ctx, nir_intrinsic_instr *intr,
struct ir3_instruction **dst)
{
/* SSBO size stored as a const starting at ssbo_sizes: */
struct ir3_const_state *const_state = &ctx->so->const_state;
unsigned blk_idx = nir_src_as_uint(intr->src[0]);
unsigned idx = regid(ctx->so->constbase.ssbo_sizes, 0) +
ctx->so->const_layout.ssbo_size.off[blk_idx];
unsigned idx = regid(const_state->offsets.ssbo_sizes, 0) +
const_state->ssbo_size.off[blk_idx];
debug_assert(ctx->so->const_layout.ssbo_size.mask & (1 << blk_idx));
debug_assert(const_state->ssbo_size.mask & (1 << blk_idx));
dst[0] = create_uniform(ctx->block, idx);
}
......@@ -1006,8 +1009,9 @@ emit_intrinsic_image_size(struct ir3_context *ctx, nir_intrinsic_instr *intr,
* bytes-per-pixel should have been emitted in 2nd slot of
* image_dims. See ir3_shader::emit_image_dims().
*/
unsigned cb = regid(ctx->so->constbase.image_dims, 0) +
ctx->so->const_layout.image_dims.off[var->data.driver_location];
struct ir3_const_state *const_state = &ctx->so->const_state;
unsigned cb = regid(const_state->offsets.image_dims, 0) +
const_state->image_dims.off[var->data.driver_location];
struct ir3_instruction *aux = create_uniform(b, cb + 1);
tmp[0] = ir3_SHR_B(b, tmp[0], 0, aux, 0);
......@@ -2225,7 +2229,6 @@ emit_cf_list(struct ir3_context *ctx, struct exec_list *list)
static void
emit_stream_out(struct ir3_context *ctx)
{
struct ir3_shader_variant *v = ctx->so;
struct ir3 *ir = ctx->ir;
struct ir3_stream_output_info *strmout =
&ctx->so->shader->stream_output;
......@@ -2283,10 +2286,11 @@ emit_stream_out(struct ir3_context *ctx)
* stripped out in the backend.
*/
for (unsigned i = 0; i < IR3_MAX_SO_BUFFERS; i++) {
struct ir3_const_state *const_state = &ctx->so->const_state;
unsigned stride = strmout->stride[i];
struct ir3_instruction *base, *off;
base = create_uniform(ctx->block, regid(v->constbase.tfbo, i));
base = create_uniform(ctx->block, regid(const_state->offsets.tfbo, i));
/* 24-bit should be enough: */
off = ir3_MUL_U(ctx->block, vtxcnt, 0,
......
......@@ -101,51 +101,34 @@ ir3_context_init(struct ir3_compiler *compiler,
nir_print_shader(ctx->s, stderr);
}
ir3_nir_scan_driver_consts(ctx->s, &so->const_layout);
ir3_ibo_mapping_init(&so->image_mapping, ctx->s->info.num_textures);
so->num_uniforms = ctx->s->num_uniforms;
so->num_ubos = ctx->s->info.num_ubos;
struct ir3_const_state *const_state = &so->const_state;
memset(&const_state->offsets, ~0, sizeof(const_state->offsets));
ir3_ibo_mapping_init(&so->image_mapping, ctx->s->info.num_textures);
ir3_nir_scan_driver_consts(ctx->s, const_state);
const_state->num_uniforms = ctx->s->num_uniforms;
const_state->num_ubos = ctx->s->info.num_ubos;
/* Layout of constant registers, each section aligned to vec4. Note
* that pointer size (ubo, etc) changes depending on generation.
*
* user consts
* UBO addresses
* SSBO sizes
* if (vertex shader) {
* driver params (IR3_DP_*)
* if (stream_output.num_outputs > 0)
* stream-out addresses
* }
* immediates
*
* Immediates go last mostly because they are inserted in the CP pass
* after the nir -> ir3 frontend.
*
* Note UBO size in bytes should be aligned to vec4
*/
debug_assert((ctx->so->shader->ubo_state.size % 16) == 0);
unsigned constoff = align(ctx->so->shader->ubo_state.size / 16, 4);
unsigned ptrsz = ir3_pointer_size(ctx->compiler);
memset(&so->constbase, ~0, sizeof(so->constbase));
if (so->num_ubos > 0) {
so->constbase.ubo = constoff;
if (const_state->num_ubos > 0) {
const_state->offsets.ubo = constoff;
constoff += align(ctx->s->info.num_ubos * ptrsz, 4) / 4;
}
if (so->const_layout.ssbo_size.count > 0) {
unsigned cnt = so->const_layout.ssbo_size.count;
so->constbase.ssbo_sizes = constoff;
if (const_state->ssbo_size.count > 0) {
unsigned cnt = const_state->ssbo_size.count;
const_state->offsets.ssbo_sizes = constoff;
constoff += align(cnt, 4) / 4;
}
if (so->const_layout.image_dims.count > 0) {
unsigned cnt = so->const_layout.image_dims.count;
so->constbase.image_dims = constoff;
if (const_state->image_dims.count > 0) {
unsigned cnt = const_state->image_dims.count;
const_state->offsets.image_dims = constoff;
constoff += align(cnt, 4) / 4;
}
......@@ -156,17 +139,17 @@ ir3_context_init(struct ir3_compiler *compiler,
num_driver_params = IR3_DP_CS_COUNT;
}
so->constbase.driver_param = constoff;
const_state->offsets.driver_param = constoff;
constoff += align(num_driver_params, 4) / 4;
if ((so->type == MESA_SHADER_VERTEX) &&
(compiler->gpu_id < 500) &&
so->shader->stream_output.num_outputs > 0) {
so->constbase.tfbo = constoff;
const_state->offsets.tfbo = constoff;
constoff += align(IR3_MAX_SO_BUFFERS * ptrsz, 4) / 4;
}
so->constbase.immediate = constoff;
const_state->offsets.immediate = constoff;
return ctx;
}
......
......@@ -323,10 +323,12 @@ lower_immed(struct ir3_cp_ctx *ctx, struct ir3_register *reg, unsigned new_flags
ctx->immediate_idx++;
}
struct ir3_const_state *const_state = &ctx->so->const_state;
new_flags &= ~IR3_REG_IMMED;
new_flags |= IR3_REG_CONST;
reg->flags = new_flags;
reg->num = i + (4 * ctx->so->constbase.immediate);
reg->num = i + (4 * const_state->offsets.immediate);
return reg;
}
......
......@@ -278,7 +278,7 @@ ir3_optimize_nir(struct ir3_shader *shader, nir_shader *s,
void
ir3_nir_scan_driver_consts(nir_shader *shader,
struct ir3_driver_const_layout *layout)
struct ir3_const_state *layout)
{
nir_foreach_function(function, shader) {
if (!function->impl)
......
......@@ -33,7 +33,7 @@
#include "ir3_shader.h"
void ir3_nir_scan_driver_consts(nir_shader *shader, struct ir3_driver_const_layout *layout);
void ir3_nir_scan_driver_consts(nir_shader *shader, struct ir3_const_state *layout);
bool ir3_nir_apply_trig_workarounds(nir_shader *shader);
bool ir3_nir_lower_tg4_to_tex(nir_shader *shader);
......
......@@ -350,8 +350,9 @@ ir3_shader_disasm(struct ir3_shader_variant *so, uint32_t *bin, FILE *out)
(regid >> 2), "xyzw"[regid & 0x3], i);
}
struct ir3_const_state *const_state = &so->const_state;
for (i = 0; i < so->immediates_count; i++) {
fprintf(out, "@const(c%d.x)\t", so->constbase.immediate + i);
fprintf(out, "@const(c%d.x)\t", const_state->offsets.immediate + i);
fprintf(out, "0x%08x, 0x%08x, 0x%08x, 0x%08x\n",
so->immediates[i].val[0],
so->immediates[i].val[1],
......
......@@ -71,6 +71,14 @@ enum ir3_driver_param {
/**
* Describes the layout of shader consts. This includes:
* + Driver lowered UBO ranges
* + SSBO sizes
* + Image sizes/dimensions
* + Driver params (ie. IR3_DP_*)
* + TFBO addresses (for generations that do not have hardware streamout)
* + Lowered immediates
*
* For consts needed to pass internal values to shader which may or may not
* be required, rather than allocating worst-case const space, we scan the
* shader and allocate consts as-needed:
......@@ -80,8 +88,46 @@ enum ir3_driver_param {
*
* + Image dimensions: needed to calculate pixel offset, but only for
* images that have a image_store intrinsic
*
* Layout of constant registers, each section aligned to vec4. Note
* that pointer size (ubo, etc) changes depending on generation.
*
* user consts
* UBO addresses
* SSBO sizes
* if (vertex shader) {
* driver params (IR3_DP_*)
* if (stream_output.num_outputs > 0)
* stream-out addresses
* } else if (compute_shader) {
* driver params (IR3_DP_*)
* }
* immediates
*
* Immediates go last mostly because they are inserted in the CP pass
* after the nir -> ir3 frontend.
*
* Note UBO size in bytes should be aligned to vec4
*/
struct ir3_driver_const_layout {
struct ir3_const_state {
/* number of uniforms (in vec4), not including built-in compiler
* constants, etc.
*/
unsigned num_uniforms;
unsigned num_ubos;
struct {
/* user const start at zero */
unsigned ubo;
/* NOTE that a3xx might need a section for SSBO addresses too */
unsigned ssbo_sizes;
unsigned image_dims;
unsigned driver_param;
unsigned tfbo;
unsigned immediate;
} offsets;
struct {
uint32_t mask; /* bitmask of SSBOs that have get_buffer_size */
uint32_t count; /* number of consts allocated */
......@@ -340,7 +386,7 @@ struct ir3_shader_variant {
bool binning_pass;
struct ir3_shader_variant *binning;
struct ir3_driver_const_layout const_layout;
struct ir3_const_state const_state;
struct ir3_info info;
struct ir3 *ir;
......@@ -361,13 +407,6 @@ struct ir3_shader_variant {
*/
unsigned constlen;
/* number of uniforms (in vec4), not including built-in compiler
* constants, etc.
*/
unsigned num_uniforms;
unsigned num_ubos;
/* About Linkage:
* + Let the frag shader determine the position/compmask for the
* varyings, since it is the place where we know if the varying
......@@ -451,21 +490,6 @@ struct ir3_shader_variant {
bool per_samp;
/* Layout of constant registers, each section (in vec4). Pointer size
* is 32b (a3xx, a4xx), or 64b (a5xx+), which effects the size of the
* UBO and stream-out consts.
*/
struct {
/* user const start at zero */
unsigned ubo;
/* NOTE that a3xx might need a section for SSBO addresses too */
unsigned ssbo_sizes;
unsigned image_dims;
unsigned driver_param;
unsigned tfbo;
unsigned immediate;
} constbase;
unsigned immediates_count;
unsigned immediates_size;
struct {
......
......@@ -241,7 +241,8 @@ emit_user_consts(struct fd_context *ctx, const struct ir3_shader_variant *v,
* the user consts early to avoid HLSQ lockup caused by
* writing too many consts
*/
uint32_t max_const = MIN2(v->num_uniforms, v->constlen);
const struct ir3_const_state *const_state = &v->const_state;
uint32_t max_const = MIN2(const_state->num_uniforms, v->constlen);
/* and even if the start of the const buffer is before
* first_immediate, the end may not be:
......@@ -280,9 +281,10 @@ static void
emit_ubos(struct fd_context *ctx, const struct ir3_shader_variant *v,
struct fd_ringbuffer *ring, struct fd_constbuf_stateobj *constbuf)
{
uint32_t offset = v->constbase.ubo;
const struct ir3_const_state *const_state = &v->const_state;
uint32_t offset = const_state->offsets.ubo;
if (v->constlen > offset) {
uint32_t params = v->num_ubos;
uint32_t params = const_state->num_ubos;
uint32_t offsets[params];
struct pipe_resource *prscs[params];
......@@ -309,14 +311,15 @@ static void
emit_ssbo_sizes(struct fd_context *ctx, const struct ir3_shader_variant *v,
struct fd_ringbuffer *ring, struct fd_shaderbuf_stateobj *sb)
{
uint32_t offset = v->constbase.ssbo_sizes;
const struct ir3_const_state *const_state = &v->const_state;
uint32_t offset = const_state->offsets.ssbo_sizes;
if (v->constlen > offset) {
uint32_t sizes[align(v->const_layout.ssbo_size.count, 4)];
unsigned mask = v->const_layout.ssbo_size.mask;
uint32_t sizes[align(const_state->ssbo_size.count, 4)];
unsigned mask = const_state->ssbo_size.mask;
while (mask) {
unsigned index = u_bit_scan(&mask);
unsigned off = v->const_layout.ssbo_size.off[index];
unsigned off = const_state->ssbo_size.off[index];
sizes[off] = sb->sb[index].buffer_size;
}
......@@ -330,16 +333,17 @@ static void
emit_image_dims(struct fd_context *ctx, const struct ir3_shader_variant *v,
struct fd_ringbuffer *ring, struct fd_shaderimg_stateobj *si)
{
uint32_t offset = v->constbase.image_dims;
const struct ir3_const_state *const_state = &v->const_state;
uint32_t offset = const_state->offsets.image_dims;
if (v->constlen > offset) {
uint32_t dims[align(v->const_layout.image_dims.count, 4)];
unsigned mask = v->const_layout.image_dims.mask;
uint32_t dims[align(const_state->image_dims.count, 4)];
unsigned mask = const_state->image_dims.mask;
while (mask) {
struct pipe_image_view *img;
struct fd_resource *rsc;
unsigned index = u_bit_scan(&mask);
unsigned off = v->const_layout.image_dims.off[index];
unsigned off = const_state->image_dims.off[index];
img = &si->si[index];
rsc = fd_resource(img->resource);
......@@ -382,8 +386,9 @@ static void
emit_immediates(struct fd_context *ctx, const struct ir3_shader_variant *v,
struct fd_ringbuffer *ring)
{
const struct ir3_const_state *const_state = &v->const_state;
uint32_t base = const_state->offsets.immediate;
int size = v->immediates_count;
uint32_t base = v->constbase.immediate;
/* truncate size to avoid writing constants that shader
* does not use:
......@@ -407,7 +412,8 @@ emit_tfbos(struct fd_context *ctx, const struct ir3_shader_variant *v,
struct fd_ringbuffer *ring)
{
/* streamout addresses after driver-params: */
uint32_t offset = v->constbase.tfbo;
const struct ir3_const_state *const_state = &v->const_state;
uint32_t offset = const_state->offsets.tfbo;
if (v->constlen > offset) {
struct fd_streamout_stateobj *so = &ctx->streamout;
struct ir3_stream_output_info *info = &v->shader->stream_output;
......@@ -534,7 +540,8 @@ ir3_emit_vs_consts(const struct ir3_shader_variant *v, struct fd_ringbuffer *rin
/* emit driver params every time: */
/* TODO skip emit if shader doesn't use driver params to avoid WFI.. */
if (info) {
uint32_t offset = v->constbase.driver_param;
const struct ir3_const_state *const_state = &v->const_state;
uint32_t offset = const_state->offsets.driver_param;
if (v->constlen > offset) {
uint32_t vertex_params[IR3_DP_VS_COUNT] = {
[IR3_DP_VTXID_BASE] = info->index_size ?
......@@ -628,7 +635,8 @@ ir3_emit_cs_consts(const struct ir3_shader_variant *v, struct fd_ringbuffer *rin
emit_common_consts(v, ring, ctx, PIPE_SHADER_COMPUTE);
/* emit compute-shader driver-params: */
uint32_t offset = v->constbase.driver_param;
const struct ir3_const_state *const_state = &v->const_state;
uint32_t offset = const_state->offsets.driver_param;
if (v->constlen > offset) {
ring_wfi(ctx->batch, ring);
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment