Commit 1752a058 authored by Qiang Yu's avatar Qiang Yu
Browse files

radeonsi: replace llvm ls/hs interface lds ops with nir lowered ones



Use ac nir lower pass to generate these lds load/store ops explicitly.
Signed-off-by: Qiang Yu's avatarQiang Yu <yuq825@gmail.com>
parent 8f331ad4
Pipeline #586017 waiting for manual action with stages
in 7 seconds
......@@ -3459,17 +3459,10 @@ static LLVMValueRef visit_load(struct ac_nir_context *ctx, nir_intrinsic_instr *
if (ctx->stage == MESA_SHADER_TESS_CTRL ||
(ctx->stage == MESA_SHADER_TESS_EVAL && !is_output)) {
bool vertex_index_is_invoc_id =
vertex_index_src &&
vertex_index_src->ssa->parent_instr->type == nir_instr_type_intrinsic &&
nir_instr_as_intrinsic(vertex_index_src->ssa->parent_instr)->intrinsic ==
nir_intrinsic_load_invocation_id;
LLVMValueRef result = ctx->abi->load_tess_varyings(ctx->abi, component_type,
vertex_index, indir_index,
base, component,
count, !is_output,
vertex_index_is_invoc_id);
count, !is_output);
if (instr->dest.ssa.bit_size == 16) {
result = ac_to_integer(&ctx->ac, result);
result = LLVMBuildTrunc(ctx->ac.builder, result, dest_type, "");
......
......@@ -67,8 +67,7 @@ struct ac_shader_abi {
LLVMValueRef (*load_tess_varyings)(struct ac_shader_abi *abi, LLVMTypeRef type,
LLVMValueRef vertex_index, LLVMValueRef param_index,
unsigned driver_location, unsigned component,
unsigned num_components,
bool load_inputs, bool vertex_index_is_invoc_id);
unsigned num_components, bool load_inputs);
void (*store_tcs_outputs)(struct ac_shader_abi *abi,
LLVMValueRef vertex_index, LLVMValueRef param_index,
......
......@@ -1490,6 +1490,31 @@ static bool si_nir_kill_outputs(nir_shader *nir, const union si_shader_key *key)
return progress;
}
static unsigned si_map_io_driver_location(unsigned semantic)
{
return si_shader_io_get_unique_index(semantic, false);
}
static bool si_lower_io_mem(const union si_shader_key *key,
nir_shader *nir,
uint64_t tcs_vgpr_only_inputs)
{
if (nir->info.stage == MESA_SHADER_VERTEX) {
if (key->ge.as_ls) {
ac_nir_lower_ls_outputs_to_mem(nir, si_map_io_driver_location,
key->ge.opt.same_patch_vertices,
tcs_vgpr_only_inputs);
return true;
}
} else if (nir->info.stage == MESA_SHADER_TESS_CTRL) {
ac_nir_lower_hs_inputs_to_mem(nir, si_map_io_driver_location,
key->ge.opt.same_patch_vertices);
return true;
}
return false;
}
struct nir_shader *si_get_nir_shader(struct si_shader_selector *sel,
const union si_shader_key *key,
bool *free_nir,
......@@ -1603,10 +1628,23 @@ struct nir_shader *si_get_nir_shader(struct si_shader_selector *sel,
* this should be done after that.
*/
progress2 |= ac_nir_lower_indirect_derefs(nir, sel->screen->info.gfx_level);
if (progress2)
bool opt_offsets = false;
opt_offsets |= si_lower_io_mem(key, nir, tcs_vgpr_only_inputs);
if (progress2 || opt_offsets)
si_nir_opts(sel->screen, nir, false);
if (progress || progress2)
if (opt_offsets) {
static const nir_opt_offsets_options offset_options = {
.uniform_max = 0,
.buffer_max = ~0,
.shared_max = ~0,
};
NIR_PASS_V(nir, nir_opt_offsets, &offset_options);
}
if (progress || progress2 || opt_offsets)
si_nir_late_opts(nir);
/* This helps LLVM form VMEM clauses and thus get more GPU cache hits.
......
......@@ -39,7 +39,6 @@ struct si_shader_output_values {
struct si_shader_context {
struct ac_llvm_context ac;
struct si_shader *shader;
struct si_shader_selector *next_shader_sel;
struct si_screen *screen;
struct pipe_stream_output_info so;
......
......@@ -743,10 +743,10 @@ static LLVMValueRef si_llvm_load_intrinsic(struct ac_shader_abi *abi, nir_intrin
}
case nir_intrinsic_load_tess_level_outer:
return abi->load_tess_varyings(abi, ctx->ac.f32, NULL, NULL, info->num_inputs, 0, 4, true, false);
return abi->load_tess_varyings(abi, ctx->ac.f32, NULL, NULL, info->num_inputs, 0, 4, true);
case nir_intrinsic_load_tess_level_inner:
return abi->load_tess_varyings(abi, ctx->ac.f32, NULL, NULL, info->num_inputs + 1, 0, 4, true, false);
return abi->load_tess_varyings(abi, ctx->ac.f32, NULL, NULL, info->num_inputs + 1, 0, 4, true);
case nir_intrinsic_load_tess_level_outer_default:
case nir_intrinsic_load_tess_level_inner_default: {
......@@ -1240,9 +1240,6 @@ bool si_llvm_compile_shader(struct si_screen *sscreen, struct ac_llvm_compiler *
si_llvm_build_tcs_epilog(&ctx, &tcs_epilog_key);
parts[3] = ctx.main_fn;
/* VS as LS main part */
ctx.next_shader_sel = ctx.shader->selector;
struct si_shader shader_ls = {};
shader_ls.selector = ls;
shader_ls.key.ge.part.vs.prolog = shader->key.ge.part.tcs.ls_prolog;
......
......@@ -384,44 +384,31 @@ void si_llvm_preload_tes_rings(struct si_shader_context *ctx)
static LLVMValueRef si_nir_load_tcs_varyings(struct ac_shader_abi *abi, LLVMTypeRef type,
LLVMValueRef vertex_index, LLVMValueRef param_index,
unsigned driver_location, unsigned component,
unsigned num_components, bool load_input,
bool vertex_index_is_invoc_id)
unsigned num_components, bool load_input)
{
struct si_shader_context *ctx = si_shader_context_from_abi(abi);
struct si_shader_info *info = &ctx->shader->selector->info;
LLVMValueRef dw_addr, stride;
ubyte semantic;
LLVMValueRef value[4];
if (load_input) {
semantic = info->input[driver_location].semantic;
} else {
semantic = info->output_semantic[driver_location];
}
assert(ctx->shader->key.ge.opt.same_patch_vertices && !param_index);
/* Load the TCS input from a VGPR if possible. */
if (ctx->shader->key.ge.opt.same_patch_vertices &&
load_input && vertex_index_is_invoc_id && !param_index) {
unsigned func_param = ctx->args.tcs_rel_ids.arg_index + 1 +
si_shader_io_get_unique_index(semantic, false) * 4;
LLVMValueRef value[4];
/* Load the TCS input from a VGPR. */
unsigned func_param = ctx->args.tcs_rel_ids.arg_index + 1 + driver_location * 4;
for (unsigned i = component; i < component + num_components; i++) {
value[i] = LLVMGetParam(ctx->main_fn, func_param + i);
value[i] = LLVMBuildBitCast(ctx->ac.builder, value[i], type, "");
}
} else {
ubyte semantic = info->output_semantic[driver_location];
return ac_build_varying_gather_values(&ctx->ac, value, num_components, component);
}
bool is_patch = vertex_index == NULL;
assert((semantic >= VARYING_SLOT_PATCH0 ||
semantic == VARYING_SLOT_TESS_LEVEL_INNER ||
semantic == VARYING_SLOT_TESS_LEVEL_OUTER) == is_patch);
bool is_patch = vertex_index == NULL;
assert((semantic >= VARYING_SLOT_PATCH0 ||
semantic == VARYING_SLOT_TESS_LEVEL_INNER ||
semantic == VARYING_SLOT_TESS_LEVEL_OUTER) == is_patch);
if (load_input) {
stride = si_get_tcs_in_vertex_dw_stride(ctx);
dw_addr = get_tcs_in_current_patch_offset(ctx);
} else {
LLVMValueRef dw_addr, stride;
if (is_patch) {
stride = NULL;
dw_addr = get_tcs_out_current_patch_data_offset(ctx);
......@@ -429,14 +416,13 @@ static LLVMValueRef si_nir_load_tcs_varyings(struct ac_shader_abi *abi, LLVMType
stride = get_tcs_out_vertex_dw_stride(ctx);
dw_addr = get_tcs_out_current_patch_offset(ctx);
}
}
dw_addr = get_dw_address_from_generic_indices(ctx, stride, dw_addr, vertex_index, param_index,
semantic);
dw_addr = get_dw_address_from_generic_indices(ctx, stride, dw_addr, vertex_index,
param_index, semantic);
LLVMValueRef value[4];
for (unsigned i = component; i < component + num_components; i++)
value[i] = lshs_lds_load(ctx, type, i, dw_addr);
for (unsigned i = component; i < component + num_components; i++)
value[i] = lshs_lds_load(ctx, type, i, dw_addr);
}
return ac_build_varying_gather_values(&ctx->ac, value, num_components, component);
}
......@@ -444,8 +430,7 @@ static LLVMValueRef si_nir_load_tcs_varyings(struct ac_shader_abi *abi, LLVMType
static LLVMValueRef si_nir_load_input_tes(struct ac_shader_abi *abi, LLVMTypeRef type,
LLVMValueRef vertex_index, LLVMValueRef param_index,
unsigned driver_location, unsigned component,
unsigned num_components,
bool load_input, bool vertex_index_is_invoc_id)
unsigned num_components, bool load_input)
{
struct si_shader_context *ctx = si_shader_context_from_abi(abi);
struct si_shader_info *info = &ctx->shader->selector->info;
......@@ -877,58 +862,20 @@ void si_llvm_ls_build_end(struct si_shader_context *ctx)
{
struct si_shader *shader = ctx->shader;
struct si_shader_info *info = &shader->selector->info;
unsigned i, chan;
LLVMValueRef vertex_id;
if (ctx->screen->info.gfx_level >= GFX11) {
vertex_id = ac_build_imad(&ctx->ac, si_unpack_param(ctx, ctx->args.tcs_wave_id, 0, 5),
LLVMConstInt(ctx->ac.i32, ctx->ac.wave_size, 0),
ac_get_thread_id(&ctx->ac));
} else {
vertex_id = ac_get_arg(&ctx->ac, ctx->args.vs_rel_patch_id);
}
LLVMValueRef vertex_dw_stride = si_get_tcs_in_vertex_dw_stride(ctx);
LLVMValueRef base_dw_addr = LLVMBuildMul(ctx->ac.builder, vertex_id, vertex_dw_stride, "");
LLVMValueRef *addrs = ctx->abi.outputs;
unsigned ret_offset = 8 + GFX9_TCS_NUM_USER_SGPR + 2;
/* Write outputs to LDS. The next shader (TCS aka HS) will read
* its inputs from it. */
for (i = 0; i < info->num_outputs; i++) {
unsigned semantic = info->output_semantic[i];
/* The ARB_shader_viewport_layer_array spec contains the
* following issue:
*
* 2) What happens if gl_ViewportIndex or gl_Layer is
* written in the vertex shader and a geometry shader is
* present?
*
* RESOLVED: The value written by the last vertex processing
* stage is used. If the last vertex processing stage
* (vertex, tessellation evaluation or geometry) does not
* statically assign to gl_ViewportIndex or gl_Layer, index
* or layer zero is assumed.
*
* So writes to those outputs in VS-as-LS are simply ignored.
*/
if (semantic == VARYING_SLOT_LAYER || semantic == VARYING_SLOT_VIEWPORT)
continue;
int param = si_shader_io_get_unique_index(semantic, false);
LLVMValueRef dw_addr =
LLVMBuildAdd(ctx->ac.builder, base_dw_addr, LLVMConstInt(ctx->ac.i32, param * 4, 0), "");
for (chan = 0; chan < 4; chan++) {
if (!(info->output_usagemask[i] & (1 << chan)))
continue;
if (shader->key.ge.opt.same_patch_vertices) {
for (unsigned i = 0; i < info->num_outputs; i++) {
unsigned semantic = info->output_semantic[i];
int param = si_shader_io_get_unique_index(semantic, false);
LLVMValueRef value = LLVMBuildLoad(ctx->ac.builder, addrs[4 * i + chan], "");
for (unsigned chan = 0; chan < 4; chan++) {
if (!(info->output_usagemask[i] & (1 << chan)))
continue;
if (!shader->key.ge.opt.same_patch_vertices ||
!(ctx->next_shader_sel->info.tcs_vgpr_only_inputs & (1ull << semantic)))
lshs_lds_store(ctx, chan, dw_addr, value);
LLVMValueRef value = LLVMBuildLoad(ctx->ac.builder, addrs[4 * i + chan], "");
if (shader->key.ge.opt.same_patch_vertices) {
ctx->return_value = LLVMBuildInsertValue(ctx->ac.builder, ctx->return_value,
value, ret_offset + param * 4 + chan, "");
}
......
Supports Markdown
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment