Commit da4992b1 authored by Rob Clark's avatar Rob Clark 💬
Browse files

WIP: freedreno/ir3: Add support for load_kernel_input

Used for function arguments to compute kernels (ie. OpenCL).
parent 725c6f57
......@@ -851,6 +851,41 @@ emit_intrinsic_load_ubo(struct ir3_context *ctx, nir_intrinsic_instr *intr,
}
}
/* Load a kernel param: src[] = { address }. */
static void
emit_intrinsic_load_kernel_input(struct ir3_context *ctx,
nir_intrinsic_instr *intr,
struct ir3_instruction **dst)
{
const struct ir3_const_state *const_state = ir3_const_state(ctx->so);
struct ir3_block *b = ctx->block;
unsigned offset = nir_intrinsic_base(intr);
unsigned p = regid(const_state->offsets.kernel_params, 0);
struct ir3_instruction *src0 = ir3_get_src(ctx, &intr->src[0])[0];
if (is_same_type_mov(src0) && (src0->srcs[0]->flags & IR3_REG_IMMED)) {
offset += src0->srcs[0]->iim_val;
/* kernel param position is in bytes, but constant space is 32b registers: */
compile_assert(ctx, !(offset & 0x3));
dst[0] = create_uniform(b, p + (offset / 4));
} else {
/* kernel param position is in bytes, but constant space is 32b registers: */
compile_assert(ctx, !(offset & 0x3));
/* TODO we should probably be lowering this in nir, and also handling
* non-32b inputs.. Also we probably don't want to be using
* SP_MODE_CONTROL.CONSTANT_DEMOTION_ENABLE for KERNEL shaders..
*/
src0 = ir3_SHR_B(b, src0, 0, create_immed(b, 2), 0);
dst[0] = create_uniform_indirect(b, offset / 4, TYPE_U32,
ir3_get_addr0(ctx, src0, 1));
}
}
/* src[] = { block_index } */
static void
emit_intrinsic_ssbo_size(struct ir3_context *ctx, nir_intrinsic_instr *intr,
......@@ -1777,6 +1812,9 @@ emit_intrinsic(struct ir3_context *ctx, nir_intrinsic_instr *intr)
case nir_intrinsic_load_input:
setup_input(ctx, intr);
break;
case nir_intrinsic_load_kernel_input:
emit_intrinsic_load_kernel_input(ctx, intr, dst);
break;
/* All SSBO intrinsics should have been lowered by 'lower_io_offsets'
* pass and replaced by an ir3-specifc version that adds the
* dword-offset in the last source.
......
......@@ -883,6 +883,11 @@ ir3_setup_const_state(nir_shader *nir, struct ir3_shader_variant *v,
constoff += align(cnt, 4) / 4;
}
if (v->type == MESA_SHADER_KERNEL) {
const_state->offsets.kernel_params = constoff;
constoff += align(v->shader->cs.req_input_mem, 4) / 4;
}
if (const_state->num_driver_params > 0) {
/* num_driver_params in dwords. we only need to align to vec4s for the
* common case of immediate constant uploads, but for indirect dispatch
......
......@@ -146,12 +146,14 @@ struct ir3_ubo_analysis_state {
* user consts
* UBO addresses
* SSBO sizes
* image dimensions
* if (vertex shader) {
* driver params (IR3_DP_*)
* driver params (IR3_DP_VS_COUNT)
* if (stream_output.num_outputs > 0)
* stream-out addresses
* } else if (compute_shader) {
* driver params (IR3_DP_*)
* kernel params
* driver params (IR3_DP_CS_COUNT)
* }
* immediates
*
......@@ -171,6 +173,7 @@ struct ir3_const_state {
/* user const start at zero */
unsigned ubo;
unsigned image_dims;
unsigned kernel_params;
unsigned driver_param;
unsigned tfbo;
unsigned primitive_param;
......@@ -740,6 +743,14 @@ struct ir3_shader {
struct nir_shader *nir;
struct ir3_stream_output_info stream_output;
/* per shader stage specific info: */
union {
/* for compute shaders: */
struct {
unsigned req_input_mem; /* in dwords */
} cs;
};
struct ir3_shader_variant *variants;
mtx_t variants_lock;
......
......@@ -433,6 +433,22 @@ emit_common_consts(const struct ir3_shader_variant *v,
}
}
/* emit kernel params */
static inline void
emit_kernel_params(struct fd_context *ctx, const struct ir3_shader_variant *v,
struct fd_ringbuffer *ring, const struct pipe_grid_info *info)
assert_dt
{
const struct ir3_const_state *const_state = ir3_const_state(v);
uint32_t offset = const_state->offsets.kernel_params;
if (v->constlen > offset) {
ring_wfi(ctx->batch, ring);
emit_const_user(ring, v, offset * 4,
align(v->shader->cs.req_input_mem, 4),
info->input);
}
}
static inline void
ir3_emit_vs_driver_params(const struct ir3_shader_variant *v,
struct fd_ringbuffer *ring, struct fd_context *ctx,
......@@ -552,6 +568,7 @@ ir3_emit_cs_consts(const struct ir3_shader_variant *v,
debug_assert(gl_shader_stage_is_compute(v->type));
emit_common_consts(v, ring, ctx, PIPE_SHADER_COMPUTE);
emit_kernel_params(ctx, v, ring, info);
/* emit compute-shader driver-params: */
const struct ir3_const_state *const_state = ir3_const_state(v);
......
......@@ -308,6 +308,8 @@ ir3_shader_compute_state_create(struct pipe_context *pctx,
}
struct ir3_shader *shader = ir3_shader_from_nir(compiler, nir, 0, NULL);
shader->cs.req_input_mem = align(cso->req_input_mem, 4) / 4; /* byte->dword */
struct ir3_shader_state *hwcso = calloc(1, sizeof(*hwcso));
util_queue_fence_init(&hwcso->ready);
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment