From 9ff01d724a0b1bc47d62d7f5a3290457f68adcf4 Mon Sep 17 00:00:00 2001 From: Mike Blumenkrantz Date: Tue, 23 Feb 2021 11:24:35 -0500 Subject: [PATCH 01/14] zink: remove ntv streamout assert this was added during review, but it was never correct and just crashes valid cases like streamout from a mat3x4 type Fixes: b6f8f3a3ba4 ("zink: fix streamout for clipdistance") Reviewed-by: Dave Airlie Part-of: --- src/gallium/drivers/zink/nir_to_spirv/nir_to_spirv.c | 1 - 1 file changed, 1 deletion(-) diff --git a/src/gallium/drivers/zink/nir_to_spirv/nir_to_spirv.c b/src/gallium/drivers/zink/nir_to_spirv/nir_to_spirv.c index a0f7e5e2fb85..52789ae6f0fe 100644 --- a/src/gallium/drivers/zink/nir_to_spirv/nir_to_spirv.c +++ b/src/gallium/drivers/zink/nir_to_spirv/nir_to_spirv.c @@ -1309,7 +1309,6 @@ emit_so_outputs(struct ntv_context *ctx, while (!output) output = ctx->outputs[location--]; location++; - assert(orig_location - location < 8); SpvId output_type = ctx->so_output_types[location]; const struct glsl_type *out_type = ctx->so_output_gl_types[location]; -- GitLab From 1b25e3a7019ca529c2ab665a35e06f21ec4b4ebb Mon Sep 17 00:00:00 2001 From: Mike Blumenkrantz Date: Tue, 23 Feb 2021 11:27:20 -0500 Subject: [PATCH 02/14] zink: fix streamout emission for super-enhanced layouts if we get some crazy matrix types in here then we need to ensure that we accurately unwrap them and copy the components fixes KHR-GL46.enhanced_layouts.xfb_stride Fixes: 1b130c42b8d ("zink: implement streamout and xfb handling in ntv") Reviewed-by: Dave Airlie Part-of: --- .../drivers/zink/nir_to_spirv/nir_to_spirv.c | 14 ++++++++++---- 1 file changed, 10 insertions(+), 4 deletions(-) diff --git a/src/gallium/drivers/zink/nir_to_spirv/nir_to_spirv.c b/src/gallium/drivers/zink/nir_to_spirv/nir_to_spirv.c index 52789ae6f0fe..f73c4a7ee0ab 100644 --- a/src/gallium/drivers/zink/nir_to_spirv/nir_to_spirv.c +++ b/src/gallium/drivers/zink/nir_to_spirv/nir_to_spirv.c @@ -1343,12 +1343,18 @@ emit_so_outputs(struct ntv_context *ctx, * and re-pack them into the desired output type */ for (unsigned c = 0; c < so_output.num_components; c++) { - uint32_t member[] = { so_output.start_component + c }; - SpvId base_type = get_glsl_type(ctx, glsl_without_array(out_type)); + uint32_t member[2]; + unsigned member_idx = 0; + if (glsl_type_is_matrix(out_type)) { + member_idx = 1; + member[0] = so_output.register_index; + } + member[member_idx] = so_output.start_component + c; + SpvId base_type = get_glsl_basetype(ctx, glsl_get_base_type(glsl_without_array_or_matrix(out_type))); if (slot == VARYING_SLOT_CLIP_DIST1) - member[0] += 4; - components[c] = spirv_builder_emit_composite_extract(&ctx->builder, base_type, src, member, 1); + member[member_idx] += 4; + components[c] = spirv_builder_emit_composite_extract(&ctx->builder, base_type, src, member, 1 + member_idx); } result = spirv_builder_emit_composite_construct(&ctx->builder, type, components, so_output.num_components); } -- GitLab From 96024a8dc965816513ef69d4b34a34a8c407b4ae Mon Sep 17 00:00:00 2001 From: Mike Blumenkrantz Date: Wed, 24 Feb 2021 14:52:11 -0500 Subject: [PATCH 03/14] zink: fix slot mapping for fat io variables big types like dmat2x3 need multiple slots, and trying to jam them into single slots breaks everything Reviewed-by: Dave Airlie Part-of: --- .../drivers/zink/nir_to_spirv/nir_to_spirv.c | 17 ++++++++++------- 1 file changed, 10 insertions(+), 7 deletions(-) diff --git a/src/gallium/drivers/zink/nir_to_spirv/nir_to_spirv.c b/src/gallium/drivers/zink/nir_to_spirv/nir_to_spirv.c index f73c4a7ee0ab..14403adec86f 100644 --- a/src/gallium/drivers/zink/nir_to_spirv/nir_to_spirv.c +++ b/src/gallium/drivers/zink/nir_to_spirv/nir_to_spirv.c @@ -383,21 +383,24 @@ create_shared_block(struct ntv_context *ctx, unsigned shared_size) } static inline unsigned char -reserve_slot(struct ntv_context *ctx) +reserve_slot(struct ntv_context *ctx, unsigned num_slots) { /* TODO: this should actually be clamped to the limits value as in the table * in 14.1.4 of the vulkan spec, though there's not really any recourse * other than aborting if we do hit it... */ - assert(ctx->shader_slots_reserved < MAX_VARYING); - return ctx->shader_slots_reserved++; + assert(ctx->shader_slots_reserved + num_slots <= MAX_VARYING); + unsigned ret = ctx->shader_slots_reserved; + ctx->shader_slots_reserved += num_slots; + return ret; } static inline unsigned -handle_slot(struct ntv_context *ctx, unsigned slot) +handle_slot(struct ntv_context *ctx, unsigned slot, unsigned num_slots) { + assert(num_slots); if (ctx->shader_slot_map[slot] == SLOT_UNSET) - ctx->shader_slot_map[slot] = reserve_slot(ctx); + ctx->shader_slot_map[slot] = reserve_slot(ctx, num_slots); slot = ctx->shader_slot_map[slot]; assert(slot < MAX_VARYING); return slot; @@ -420,7 +423,7 @@ handle_handle_slot(struct ntv_context *ctx, struct nir_variable *var, bool outpu (!output && ctx->stage == MESA_SHADER_TESS_EVAL))) { return var->data.location - VARYING_SLOT_VAR0; } - return handle_slot(ctx, var->data.location); + return handle_slot(ctx, var->data.location, glsl_count_vec4_slots(var->type, false, false)); } static SpvId @@ -465,7 +468,7 @@ emit_input(struct ntv_context *ctx, struct nir_variable *var) HANDLE_EMIT_BUILTIN(FACE, FrontFacing); default: - slot = handle_slot(ctx, slot); + slot = handle_slot(ctx, slot, glsl_count_vec4_slots(var->type, false, false)); spirv_builder_emit_location(&ctx->builder, var_id, slot); } if (var->data.centroid) -- GitLab From 6d8b5e7f09cd941e74adec5d069edaed39b434c2 Mon Sep 17 00:00:00 2001 From: Mike Blumenkrantz Date: Wed, 24 Feb 2021 14:53:51 -0500 Subject: [PATCH 04/14] zink: fix location usage for explicit xfb outputs ensure that this accurately handles multi-slot emission Reviewed-by: Dave Airlie Part-of: --- src/gallium/drivers/zink/nir_to_spirv/nir_to_spirv.c | 9 ++++++--- 1 file changed, 6 insertions(+), 3 deletions(-) diff --git a/src/gallium/drivers/zink/nir_to_spirv/nir_to_spirv.c b/src/gallium/drivers/zink/nir_to_spirv/nir_to_spirv.c index 14403adec86f..be682c7d7341 100644 --- a/src/gallium/drivers/zink/nir_to_spirv/nir_to_spirv.c +++ b/src/gallium/drivers/zink/nir_to_spirv/nir_to_spirv.c @@ -26,6 +26,7 @@ #include "nir.h" #include "pipe/p_state.h" +#include "util/u_math.h" #include "util/u_memory.h" #include "util/hash_table.h" @@ -1249,6 +1250,7 @@ get_output_type(struct ntv_context *ctx, unsigned register_index, unsigned num_c static void emit_so_info(struct ntv_context *ctx, const struct zink_so_info *so_info) { + unsigned output = 0; for (unsigned i = 0; i < so_info->so_info.num_outputs; i++) { struct pipe_stream_output so_output = so_info->so_info.output[i]; unsigned slot = so_info->so_info_slots[i] << 2 | so_output.start_component; @@ -1260,7 +1262,7 @@ emit_so_info(struct ntv_context *ctx, const struct zink_so_info *so_info) SpvStorageClassOutput); char name[10]; - snprintf(name, 10, "xfb%d", i); + snprintf(name, 10, "xfb%d", output); spirv_builder_emit_name(&ctx->builder, var_id, name); spirv_builder_emit_offset(&ctx->builder, var_id, (so_output.dst_offset * 4)); spirv_builder_emit_xfb_buffer(&ctx->builder, var_id, so_output.output_buffer); @@ -1272,8 +1274,8 @@ emit_so_info(struct ntv_context *ctx, const struct zink_so_info *so_info) * so we need to ensure that the new xfb location slot doesn't conflict with any previously-emitted * outputs. */ - uint32_t location = ctx->shader_slots_reserved + i; - assert(location < VARYING_SLOT_VAR0); + uint32_t location = ctx->shader_slots_reserved + output; + assert(location < VARYING_SLOT_VAR0 && ctx->shader_slot_map[location] == SLOT_UNSET); spirv_builder_emit_location(&ctx->builder, var_id, location); /* note: gl_ClipDistance[4] can the 0-indexed member of VARYING_SLOT_CLIP_DIST1 here, @@ -1288,6 +1290,7 @@ emit_so_info(struct ntv_context *ctx, const struct zink_so_info *so_info) assert(ctx->num_entry_ifaces < ARRAY_SIZE(ctx->entry_ifaces)); ctx->entry_ifaces[ctx->num_entry_ifaces++] = var_id; + output += align(so_output.num_components, 4) / 4; } } -- GitLab From 086262fc53ab7e1f68e7a4c4a188b1431c9c8554 Mon Sep 17 00:00:00 2001 From: Mike Blumenkrantz Date: Wed, 24 Feb 2021 14:55:24 -0500 Subject: [PATCH 05/14] zink: run more nir passes for tess shaders running nir_lower_io_arrays_to_elements_no_indirects for only some stages breaks location-setting for the stages which don't run it when e.g., dmat2x3 variables are sometimes split across locations and sometimes jammed into a single location (TCS I'm looking at you) Reviewed-by: Dave Airlie Part-of: --- src/gallium/drivers/zink/zink_compiler.c | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/src/gallium/drivers/zink/zink_compiler.c b/src/gallium/drivers/zink/zink_compiler.c index 62ba11fd4a91..db42e2c73afb 100644 --- a/src/gallium/drivers/zink/zink_compiler.c +++ b/src/gallium/drivers/zink/zink_compiler.c @@ -547,6 +547,11 @@ zink_shader_create(struct zink_screen *screen, struct nir_shader *nir, if (nir->info.stage == MESA_SHADER_VERTEX) create_vs_pushconst(nir); + else if (nir->info.stage == MESA_SHADER_TESS_CTRL || + nir->info.stage == MESA_SHADER_TESS_EVAL) { + NIR_PASS_V(nir, nir_lower_indirect_derefs, nir_var_shader_in | nir_var_shader_out, UINT_MAX); + NIR_PASS_V(nir, nir_lower_io_arrays_to_elements_no_indirects, false); + } /* only do uniforms -> ubo if we have uniforms, otherwise we're just * screwing with the bindings for no reason -- GitLab From 7cef91dd43671f8eada82778c94dbed7386cf0e3 Mon Sep 17 00:00:00 2001 From: Mike Blumenkrantz Date: Wed, 24 Feb 2021 14:57:03 -0500 Subject: [PATCH 06/14] zink: stop allocating xfb slot map this can just be inlined since it's a small static size Reviewed-by: Dave Airlie Part-of: --- src/gallium/drivers/zink/zink_compiler.c | 6 ++---- src/gallium/drivers/zink/zink_compiler.h | 3 ++- 2 files changed, 4 insertions(+), 5 deletions(-) diff --git a/src/gallium/drivers/zink/zink_compiler.c b/src/gallium/drivers/zink/zink_compiler.c index db42e2c73afb..620bc434618e 100644 --- a/src/gallium/drivers/zink/zink_compiler.c +++ b/src/gallium/drivers/zink/zink_compiler.c @@ -422,6 +422,7 @@ update_so_info(struct zink_shader *sh, /* Map Gallium's condensed "slots" back to real VARYING_SLOT_* enums */ sh->streamout.so_info_slots[i] = reverse_map[output->register_index]; } + sh->streamout.have_xfb = true; } VkShaderModule @@ -434,7 +435,7 @@ zink_shader_compile(struct zink_screen *screen, struct zink_shader *zs, struct z /* TODO: use a separate mem ctx here for ralloc */ if (zs->nir->info.stage < MESA_SHADER_FRAGMENT) { if (zink_vs_key(key)->last_vertex_stage) { - if (zs->streamout.so_info_slots) + if (zs->streamout.have_xfb) streamout = &zs->streamout; if (!zink_vs_key(key)->clip_halfz) { @@ -664,8 +665,6 @@ zink_shader_create(struct zink_screen *screen, struct nir_shader *nir, ret->nir = nir; if (so_info) { memcpy(&ret->streamout.so_info, so_info, sizeof(struct pipe_stream_output_info)); - ret->streamout.so_info_slots = malloc(so_info->num_outputs * sizeof(unsigned int)); - assert(ret->streamout.so_info_slots); update_so_info(ret, nir->info.outputs_written, have_psiz); } @@ -693,7 +692,6 @@ zink_shader_free(struct zink_context *ctx, struct zink_shader *shader) } } _mesa_set_destroy(shader->programs, NULL); - free(shader->streamout.so_info_slots); ralloc_free(shader->nir); FREE(shader); } diff --git a/src/gallium/drivers/zink/zink_compiler.h b/src/gallium/drivers/zink/zink_compiler.h index 80b2b88fdf32..e61d8ae45b7e 100644 --- a/src/gallium/drivers/zink/zink_compiler.h +++ b/src/gallium/drivers/zink/zink_compiler.h @@ -50,7 +50,8 @@ struct set; struct tgsi_token; struct zink_so_info { struct pipe_stream_output_info so_info; - unsigned *so_info_slots; + unsigned so_info_slots[PIPE_MAX_SO_OUTPUTS]; + bool have_xfb; }; -- GitLab From 6d40db84c9d507768f118fe20fc4437cbfa6be41 Mon Sep 17 00:00:00 2001 From: Mike Blumenkrantz Date: Wed, 24 Feb 2021 15:00:22 -0500 Subject: [PATCH 07/14] zink: handle direct xfb output from output variables if an entire variable is being dumped into an xfb buffer, there's no need to create an explicit xfb variable to copy the value into, and instead the xfb attributes can just be set normally on the variable this doesn't work for geometry shaders because outputs are per-vertex fixes all KHR-GL46.enhanced_layouts xfb tests Reviewed-by: Dave Airlie Part-of: --- .../drivers/zink/nir_to_spirv/nir_to_spirv.c | 12 ++++++++ src/gallium/drivers/zink/zink_compiler.c | 30 ++++++++++++++++++- src/gallium/drivers/zink/zink_compiler.h | 1 + 3 files changed, 42 insertions(+), 1 deletion(-) diff --git a/src/gallium/drivers/zink/nir_to_spirv/nir_to_spirv.c b/src/gallium/drivers/zink/nir_to_spirv/nir_to_spirv.c index be682c7d7341..7bb5f5723d45 100644 --- a/src/gallium/drivers/zink/nir_to_spirv/nir_to_spirv.c +++ b/src/gallium/drivers/zink/nir_to_spirv/nir_to_spirv.c @@ -621,6 +621,14 @@ emit_output(struct ntv_context *ctx, struct nir_variable *var) if (var->data.patch) spirv_builder_emit_decoration(&ctx->builder, var_id, SpvDecorationPatch); + if (var->data.explicit_xfb_buffer) { + spirv_builder_emit_offset(&ctx->builder, var_id, var->data.offset); + spirv_builder_emit_xfb_buffer(&ctx->builder, var_id, var->data.xfb.buffer); + spirv_builder_emit_xfb_stride(&ctx->builder, var_id, var->data.xfb.stride); + if (var->data.stream) + spirv_builder_emit_stream(&ctx->builder, var_id, var->data.stream); + } + _mesa_hash_table_insert(ctx->vars, var, (void *)(intptr_t)var_id); assert(ctx->num_entry_ifaces < ARRAY_SIZE(ctx->entry_ifaces)); @@ -1252,6 +1260,8 @@ emit_so_info(struct ntv_context *ctx, const struct zink_so_info *so_info) { unsigned output = 0; for (unsigned i = 0; i < so_info->so_info.num_outputs; i++) { + if (so_info->skip[i]) + continue; struct pipe_stream_output so_output = so_info->so_info.output[i]; unsigned slot = so_info->so_info_slots[i] << 2 | so_output.start_component; SpvId out_type = get_output_type(ctx, slot, so_output.num_components); @@ -1299,6 +1309,8 @@ emit_so_outputs(struct ntv_context *ctx, const struct zink_so_info *so_info) { for (unsigned i = 0; i < so_info->so_info.num_outputs; i++) { + if (so_info->skip[i]) + continue; uint32_t components[NIR_MAX_VEC_COMPONENTS]; unsigned slot = so_info->so_info_slots[i]; struct pipe_stream_output so_output = so_info->so_info.output[i]; diff --git a/src/gallium/drivers/zink/zink_compiler.c b/src/gallium/drivers/zink/zink_compiler.c index 620bc434618e..38e95c194a43 100644 --- a/src/gallium/drivers/zink/zink_compiler.c +++ b/src/gallium/drivers/zink/zink_compiler.c @@ -402,13 +402,13 @@ check_psiz(struct nir_shader *s) return false; } -/* semi-copied from iris */ static void update_so_info(struct zink_shader *sh, uint64_t outputs_written, bool have_psiz) { uint8_t reverse_map[64] = {}; unsigned slot = 0; + /* semi-copied from iris */ while (outputs_written) { int bit = u_bit_scan64(&outputs_written); /* PSIZ from nir_lower_point_size_mov breaks stream output, so always skip it */ @@ -417,8 +417,36 @@ update_so_info(struct zink_shader *sh, reverse_map[slot++] = bit; } + nir_foreach_shader_out_variable(var, sh->nir) + var->data.explicit_xfb_buffer = 0; + + bool inlined[64] = {0}; for (unsigned i = 0; i < sh->streamout.so_info.num_outputs; i++) { struct pipe_stream_output *output = &sh->streamout.so_info.output[i]; + unsigned slot = reverse_map[output->register_index]; + if ((sh->nir->info.stage != MESA_SHADER_GEOMETRY || util_bitcount(sh->nir->info.gs.active_stream_mask) == 1) && + !output->start_component) { + nir_variable *var = NULL; + while (!var) + var = nir_find_variable_with_location(sh->nir, nir_var_shader_out, slot--); + slot++; + if (inlined[slot]) { + sh->streamout.skip[i] = true; + continue; + } + assert(var && var->data.location == slot); + /* if this is the entire variable, try to blast it out during the initial declaration */ + if (glsl_get_components(var->type) == output->num_components) { + var->data.explicit_xfb_buffer = 1; + var->data.xfb.buffer = output->output_buffer; + var->data.xfb.stride = sh->streamout.so_info.stride[output->output_buffer] * 4; + var->data.offset = output->dst_offset * 4; + var->data.stream = output->stream; + sh->streamout.skip[i] = true; + inlined[slot] = true; + continue; + } + } /* Map Gallium's condensed "slots" back to real VARYING_SLOT_* enums */ sh->streamout.so_info_slots[i] = reverse_map[output->register_index]; } diff --git a/src/gallium/drivers/zink/zink_compiler.h b/src/gallium/drivers/zink/zink_compiler.h index e61d8ae45b7e..bc2c57df257c 100644 --- a/src/gallium/drivers/zink/zink_compiler.h +++ b/src/gallium/drivers/zink/zink_compiler.h @@ -51,6 +51,7 @@ struct tgsi_token; struct zink_so_info { struct pipe_stream_output_info so_info; unsigned so_info_slots[PIPE_MAX_SO_OUTPUTS]; + bool skip[PIPE_MAX_SO_OUTPUTS]; bool have_xfb; }; -- GitLab From 5c5e1abea24f44adaa904239031af71b4ff283c0 Mon Sep 17 00:00:00 2001 From: Mike Blumenkrantz Date: Wed, 24 Feb 2021 09:39:49 -0500 Subject: [PATCH 08/14] zink: evaluate existing slot map during program init and force new map as needed if the number of explicit xfb outputs or new varyings added to the existing size of the slot map would cause an overflow, we have to force a new slot map to ensure that everything fits this means iterating all the stages which can produce new varyings and calculating all the slots required in order to compare against the max size available Reviewed-by: Dave Airlie Part-of: --- src/gallium/drivers/zink/zink_program.c | 55 ++++++++++++++++++++++++- 1 file changed, 53 insertions(+), 2 deletions(-) diff --git a/src/gallium/drivers/zink/zink_program.c b/src/gallium/drivers/zink/zink_program.c index 94203dd3f314..aa6ce49c3f9d 100644 --- a/src/gallium/drivers/zink/zink_program.c +++ b/src/gallium/drivers/zink/zink_program.c @@ -415,15 +415,66 @@ static void init_slot_map(struct zink_context *ctx, struct zink_gfx_program *prog) { unsigned existing_shaders = 0; + bool needs_new_map = false; - /* if there's a case where we'll be reusing any shaders, we need to reuse the slot map too */ + /* if there's a case where we'll be reusing any shaders, we need to (maybe) reuse the slot map too */ if (ctx->curr_program) { for (int i = 0; i < ZINK_SHADER_COUNT; ++i) { if (ctx->curr_program->shaders[i]) existing_shaders |= 1 << i; } + /* if there's reserved slots, check whether we have enough remaining slots */ + if (ctx->curr_program->shader_slots_reserved) { + uint64_t max_outputs = 0; + uint32_t num_xfb_outputs = 0; + for (int i = 0; i < ZINK_SHADER_COUNT; ++i) { + if (i != PIPE_SHADER_TESS_CTRL && + i != PIPE_SHADER_FRAGMENT && + ctx->gfx_stages[i]) { + uint32_t user_outputs = ctx->gfx_stages[i]->nir->info.outputs_written >> 32; + uint32_t builtin_outputs = ctx->gfx_stages[i]->nir->info.outputs_written; + num_xfb_outputs = MAX2(num_xfb_outputs, ctx->gfx_stages[i]->streamout.so_info.num_outputs); + unsigned user_outputs_count = 0; + /* check builtins first */ + u_foreach_bit(slot, builtin_outputs) { + switch (slot) { + /* none of these require slot map entries */ + case VARYING_SLOT_POS: + case VARYING_SLOT_PSIZ: + case VARYING_SLOT_LAYER: + case VARYING_SLOT_PRIMITIVE_ID: + case VARYING_SLOT_CULL_DIST0: + case VARYING_SLOT_CLIP_DIST0: + case VARYING_SLOT_VIEWPORT: + case VARYING_SLOT_TESS_LEVEL_INNER: + case VARYING_SLOT_TESS_LEVEL_OUTER: + break; + default: + /* remaining legacy builtins only require 1 slot each */ + if (ctx->curr_program->shader_slot_map[slot] == -1) + user_outputs_count++; + break; + } + } + u_foreach_bit(slot, user_outputs) { + if (ctx->curr_program->shader_slot_map[slot] == -1) { + /* user variables can span multiple slots */ + nir_variable *var = nir_find_variable_with_location(ctx->gfx_stages[i]->nir, + nir_var_shader_out, slot); + assert(var); + user_outputs_count += glsl_count_vec4_slots(var->type, false, false); + } + } + max_outputs = MAX2(max_outputs, user_outputs_count); + } + } + /* slot map can only hold 32 entries, so dump this one if we'll exceed that */ + if (ctx->curr_program->shader_slots_reserved + max_outputs + num_xfb_outputs > 32) + needs_new_map = true; + } } - if (ctx->dirty_shader_stages == existing_shaders || !existing_shaders) { + + if (needs_new_map || ctx->dirty_shader_stages == existing_shaders || !existing_shaders) { /* all shaders are being recompiled: new slot map */ memset(prog->shader_slot_map, -1, sizeof(prog->shader_slot_map)); /* we need the slot map to match up, so we can't reuse the previous cache if we can't guarantee -- GitLab From eebd00329f39aaa0b681a76c281b3b8a07f98910 Mon Sep 17 00:00:00 2001 From: Mike Blumenkrantz Date: Wed, 24 Feb 2021 18:01:16 -0500 Subject: [PATCH 09/14] zink: rename variable in update_so_info() be more consistent Reviewed-by: Dave Airlie Part-of: --- src/gallium/drivers/zink/zink_compiler.c | 22 +++++++++++----------- 1 file changed, 11 insertions(+), 11 deletions(-) diff --git a/src/gallium/drivers/zink/zink_compiler.c b/src/gallium/drivers/zink/zink_compiler.c index 38e95c194a43..66bc7c8d2e68 100644 --- a/src/gallium/drivers/zink/zink_compiler.c +++ b/src/gallium/drivers/zink/zink_compiler.c @@ -403,7 +403,7 @@ check_psiz(struct nir_shader *s) } static void -update_so_info(struct zink_shader *sh, +update_so_info(struct zink_shader *zs, uint64_t outputs_written, bool have_psiz) { uint8_t reverse_map[64] = {}; @@ -417,21 +417,21 @@ update_so_info(struct zink_shader *sh, reverse_map[slot++] = bit; } - nir_foreach_shader_out_variable(var, sh->nir) + nir_foreach_shader_out_variable(var, zs->nir) var->data.explicit_xfb_buffer = 0; bool inlined[64] = {0}; - for (unsigned i = 0; i < sh->streamout.so_info.num_outputs; i++) { - struct pipe_stream_output *output = &sh->streamout.so_info.output[i]; + for (unsigned i = 0; i < zs->streamout.so_info.num_outputs; i++) { + struct pipe_stream_output *output = &zs->streamout.so_info.output[i]; unsigned slot = reverse_map[output->register_index]; - if ((sh->nir->info.stage != MESA_SHADER_GEOMETRY || util_bitcount(sh->nir->info.gs.active_stream_mask) == 1) && + if ((zs->nir->info.stage != MESA_SHADER_GEOMETRY || util_bitcount(zs->nir->info.gs.active_stream_mask) == 1) && !output->start_component) { nir_variable *var = NULL; while (!var) - var = nir_find_variable_with_location(sh->nir, nir_var_shader_out, slot--); + var = nir_find_variable_with_location(zs->nir, nir_var_shader_out, slot--); slot++; if (inlined[slot]) { - sh->streamout.skip[i] = true; + zs->streamout.skip[i] = true; continue; } assert(var && var->data.location == slot); @@ -439,18 +439,18 @@ update_so_info(struct zink_shader *sh, if (glsl_get_components(var->type) == output->num_components) { var->data.explicit_xfb_buffer = 1; var->data.xfb.buffer = output->output_buffer; - var->data.xfb.stride = sh->streamout.so_info.stride[output->output_buffer] * 4; + var->data.xfb.stride = zs->streamout.so_info.stride[output->output_buffer] * 4; var->data.offset = output->dst_offset * 4; var->data.stream = output->stream; - sh->streamout.skip[i] = true; + zs->streamout.skip[i] = true; inlined[slot] = true; continue; } } /* Map Gallium's condensed "slots" back to real VARYING_SLOT_* enums */ - sh->streamout.so_info_slots[i] = reverse_map[output->register_index]; + zs->streamout.so_info_slots[i] = reverse_map[output->register_index]; } - sh->streamout.have_xfb = true; + zs->streamout.have_xfb = true; } VkShaderModule -- GitLab From 0d741b8dfe3ca4418ef7c2b780ca9c556ce34bb9 Mon Sep 17 00:00:00 2001 From: Mike Blumenkrantz Date: Wed, 24 Feb 2021 18:19:37 -0500 Subject: [PATCH 10/14] zink: use info.has_transform_feedback_varyings to determine xfb enablement Reviewed-by: Dave Airlie Part-of: --- src/gallium/drivers/zink/nir_to_spirv/nir_to_spirv.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/gallium/drivers/zink/nir_to_spirv/nir_to_spirv.c b/src/gallium/drivers/zink/nir_to_spirv/nir_to_spirv.c index 7bb5f5723d45..db43787842a3 100644 --- a/src/gallium/drivers/zink/nir_to_spirv/nir_to_spirv.c +++ b/src/gallium/drivers/zink/nir_to_spirv/nir_to_spirv.c @@ -3832,7 +3832,7 @@ nir_to_spirv(struct nir_shader *s, const struct zink_so_info *so_info, default: break; } - if (so_info && so_info->so_info.num_outputs) { + if (s->info.has_transform_feedback_varyings) { spirv_builder_emit_cap(&ctx.builder, SpvCapabilityTransformFeedback); spirv_builder_emit_exec_mode(&ctx.builder, entry_point, SpvExecutionModeXfb); -- GitLab From 0fb7680b2666601660a9f1fb3fd0245fe43e8d73 Mon Sep 17 00:00:00 2001 From: Mike Blumenkrantz Date: Wed, 24 Feb 2021 18:20:55 -0500 Subject: [PATCH 11/14] zink: pass so_info directly to update_so_info() Reviewed-by: Dave Airlie Part-of: --- src/gallium/drivers/zink/zink_compiler.c | 11 ++++++----- 1 file changed, 6 insertions(+), 5 deletions(-) diff --git a/src/gallium/drivers/zink/zink_compiler.c b/src/gallium/drivers/zink/zink_compiler.c index 66bc7c8d2e68..229050121f93 100644 --- a/src/gallium/drivers/zink/zink_compiler.c +++ b/src/gallium/drivers/zink/zink_compiler.c @@ -403,7 +403,7 @@ check_psiz(struct nir_shader *s) } static void -update_so_info(struct zink_shader *zs, +update_so_info(struct zink_shader *zs, const struct pipe_stream_output_info *so_info, uint64_t outputs_written, bool have_psiz) { uint8_t reverse_map[64] = {}; @@ -421,8 +421,8 @@ update_so_info(struct zink_shader *zs, var->data.explicit_xfb_buffer = 0; bool inlined[64] = {0}; - for (unsigned i = 0; i < zs->streamout.so_info.num_outputs; i++) { - struct pipe_stream_output *output = &zs->streamout.so_info.output[i]; + for (unsigned i = 0; i < so_info->num_outputs; i++) { + const struct pipe_stream_output *output = &so_info->output[i]; unsigned slot = reverse_map[output->register_index]; if ((zs->nir->info.stage != MESA_SHADER_GEOMETRY || util_bitcount(zs->nir->info.gs.active_stream_mask) == 1) && !output->start_component) { @@ -439,7 +439,7 @@ update_so_info(struct zink_shader *zs, if (glsl_get_components(var->type) == output->num_components) { var->data.explicit_xfb_buffer = 1; var->data.xfb.buffer = output->output_buffer; - var->data.xfb.stride = zs->streamout.so_info.stride[output->output_buffer] * 4; + var->data.xfb.stride = so_info->stride[output->output_buffer] * 4; var->data.offset = output->dst_offset * 4; var->data.stream = output->stream; zs->streamout.skip[i] = true; @@ -693,9 +693,10 @@ zink_shader_create(struct zink_screen *screen, struct nir_shader *nir, ret->nir = nir; if (so_info) { memcpy(&ret->streamout.so_info, so_info, sizeof(struct pipe_stream_output_info)); - update_so_info(ret, nir->info.outputs_written, have_psiz); + update_so_info(ret, so_info, nir->info.outputs_written, have_psiz); } + return ret; } -- GitLab From 1f42ff77dffc75252019884519fe5577c691d710 Mon Sep 17 00:00:00 2001 From: Mike Blumenkrantz Date: Wed, 24 Feb 2021 18:23:43 -0500 Subject: [PATCH 12/14] zink: use slightly stricter check for update_so_info() callsite Reviewed-by: Dave Airlie Part-of: --- src/gallium/drivers/zink/zink_compiler.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/gallium/drivers/zink/zink_compiler.c b/src/gallium/drivers/zink/zink_compiler.c index 229050121f93..de5ef7f44cf7 100644 --- a/src/gallium/drivers/zink/zink_compiler.c +++ b/src/gallium/drivers/zink/zink_compiler.c @@ -691,7 +691,7 @@ zink_shader_create(struct zink_screen *screen, struct nir_shader *nir, } ret->nir = nir; - if (so_info) { + if (so_info && nir->info.outputs_written && nir->info.has_transform_feedback_varyings) { memcpy(&ret->streamout.so_info, so_info, sizeof(struct pipe_stream_output_info)); update_so_info(ret, so_info, nir->info.outputs_written, have_psiz); } -- GitLab From 7ed57e60fcbd7b0f0f71f4e4ebef0c1e81dff362 Mon Sep 17 00:00:00 2001 From: Mike Blumenkrantz Date: Wed, 24 Feb 2021 15:24:39 -0500 Subject: [PATCH 13/14] zink: only export necessary xfb outputs to ntv the full-variable outputs can be skipped, leaving only the varyings which actually need explicit emission due to packed layouts or whatever Reviewed-by: Dave Airlie Part-of: --- .../drivers/zink/nir_to_spirv/nir_to_spirv.c | 4 ---- src/gallium/drivers/zink/zink_compiler.c | 15 ++++++--------- src/gallium/drivers/zink/zink_compiler.h | 1 - 3 files changed, 6 insertions(+), 14 deletions(-) diff --git a/src/gallium/drivers/zink/nir_to_spirv/nir_to_spirv.c b/src/gallium/drivers/zink/nir_to_spirv/nir_to_spirv.c index db43787842a3..5055d8dd5f4a 100644 --- a/src/gallium/drivers/zink/nir_to_spirv/nir_to_spirv.c +++ b/src/gallium/drivers/zink/nir_to_spirv/nir_to_spirv.c @@ -1260,8 +1260,6 @@ emit_so_info(struct ntv_context *ctx, const struct zink_so_info *so_info) { unsigned output = 0; for (unsigned i = 0; i < so_info->so_info.num_outputs; i++) { - if (so_info->skip[i]) - continue; struct pipe_stream_output so_output = so_info->so_info.output[i]; unsigned slot = so_info->so_info_slots[i] << 2 | so_output.start_component; SpvId out_type = get_output_type(ctx, slot, so_output.num_components); @@ -1309,8 +1307,6 @@ emit_so_outputs(struct ntv_context *ctx, const struct zink_so_info *so_info) { for (unsigned i = 0; i < so_info->so_info.num_outputs; i++) { - if (so_info->skip[i]) - continue; uint32_t components[NIR_MAX_VEC_COMPONENTS]; unsigned slot = so_info->so_info_slots[i]; struct pipe_stream_output so_output = so_info->so_info.output[i]; diff --git a/src/gallium/drivers/zink/zink_compiler.c b/src/gallium/drivers/zink/zink_compiler.c index de5ef7f44cf7..1f56708cad90 100644 --- a/src/gallium/drivers/zink/zink_compiler.c +++ b/src/gallium/drivers/zink/zink_compiler.c @@ -424,16 +424,16 @@ update_so_info(struct zink_shader *zs, const struct pipe_stream_output_info *so_ for (unsigned i = 0; i < so_info->num_outputs; i++) { const struct pipe_stream_output *output = &so_info->output[i]; unsigned slot = reverse_map[output->register_index]; + /* always set stride to be used during draw */ + zs->streamout.so_info.stride[output->output_buffer] = so_info->stride[output->output_buffer]; if ((zs->nir->info.stage != MESA_SHADER_GEOMETRY || util_bitcount(zs->nir->info.gs.active_stream_mask) == 1) && !output->start_component) { nir_variable *var = NULL; while (!var) var = nir_find_variable_with_location(zs->nir, nir_var_shader_out, slot--); slot++; - if (inlined[slot]) { - zs->streamout.skip[i] = true; + if (inlined[slot]) continue; - } assert(var && var->data.location == slot); /* if this is the entire variable, try to blast it out during the initial declaration */ if (glsl_get_components(var->type) == output->num_components) { @@ -442,13 +442,13 @@ update_so_info(struct zink_shader *zs, const struct pipe_stream_output_info *so_ var->data.xfb.stride = so_info->stride[output->output_buffer] * 4; var->data.offset = output->dst_offset * 4; var->data.stream = output->stream; - zs->streamout.skip[i] = true; inlined[slot] = true; continue; } } + zs->streamout.so_info.output[zs->streamout.so_info.num_outputs] = *output; /* Map Gallium's condensed "slots" back to real VARYING_SLOT_* enums */ - zs->streamout.so_info_slots[i] = reverse_map[output->register_index]; + zs->streamout.so_info_slots[zs->streamout.so_info.num_outputs++] = reverse_map[output->register_index]; } zs->streamout.have_xfb = true; } @@ -691,11 +691,8 @@ zink_shader_create(struct zink_screen *screen, struct nir_shader *nir, } ret->nir = nir; - if (so_info && nir->info.outputs_written && nir->info.has_transform_feedback_varyings) { - memcpy(&ret->streamout.so_info, so_info, sizeof(struct pipe_stream_output_info)); + if (so_info && nir->info.outputs_written && nir->info.has_transform_feedback_varyings) update_so_info(ret, so_info, nir->info.outputs_written, have_psiz); - } - return ret; } diff --git a/src/gallium/drivers/zink/zink_compiler.h b/src/gallium/drivers/zink/zink_compiler.h index bc2c57df257c..e61d8ae45b7e 100644 --- a/src/gallium/drivers/zink/zink_compiler.h +++ b/src/gallium/drivers/zink/zink_compiler.h @@ -51,7 +51,6 @@ struct tgsi_token; struct zink_so_info { struct pipe_stream_output_info so_info; unsigned so_info_slots[PIPE_MAX_SO_OUTPUTS]; - bool skip[PIPE_MAX_SO_OUTPUTS]; bool have_xfb; }; -- GitLab From 8937b5f2685a00d93c3b304ce4153489cb5f507b Mon Sep 17 00:00:00 2001 From: Mike Blumenkrantz Date: Wed, 24 Feb 2021 18:27:31 -0500 Subject: [PATCH 14/14] zink: don't pass so_info to ntv at all unless it's necessary this is only needed for explicit xfb outputs Reviewed-by: Dave Airlie Part-of: --- src/gallium/drivers/zink/zink_compiler.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/gallium/drivers/zink/zink_compiler.c b/src/gallium/drivers/zink/zink_compiler.c index 1f56708cad90..6d3688b0163c 100644 --- a/src/gallium/drivers/zink/zink_compiler.c +++ b/src/gallium/drivers/zink/zink_compiler.c @@ -450,7 +450,7 @@ update_so_info(struct zink_shader *zs, const struct pipe_stream_output_info *so_ /* Map Gallium's condensed "slots" back to real VARYING_SLOT_* enums */ zs->streamout.so_info_slots[zs->streamout.so_info.num_outputs++] = reverse_map[output->register_index]; } - zs->streamout.have_xfb = true; + zs->streamout.have_xfb = !!zs->streamout.so_info.num_outputs; } VkShaderModule -- GitLab