From 28cfd570a2653aa2d197696d285f186399b385b9 Mon Sep 17 00:00:00 2001 From: Gert Wollny Date: Wed, 17 Feb 2021 16:10:28 +0100 Subject: [PATCH 01/20] vrend: move fbfetch coherency support from key to shader config The property doesn't change. so it doesn't make sense to set it in the shader key. Signed-off-by: Gert Wollny Reviewed-by: Rohan Garg --- src/vrend_renderer.c | 2 +- src/vrend_shader.c | 4 ++-- src/vrend_shader.h | 2 +- 3 files changed, 4 insertions(+), 4 deletions(-) diff --git a/src/vrend_renderer.c b/src/vrend_renderer.c index aa76a9595..7816d9571 100644 --- a/src/vrend_renderer.c +++ b/src/vrend_renderer.c @@ -3343,7 +3343,6 @@ static inline void vrend_fill_shader_key(struct vrend_sub_context *sub_ctx, if (type == PIPE_SHADER_FRAGMENT && vrend_state.use_gles && can_emulate_logicop(sub_ctx->blend_state.logicop_func)) { key->fs_logicop_enabled = sub_ctx->blend_state.logicop_enable; key->fs_logicop_func = sub_ctx->blend_state.logicop_func; - key->fs_logicop_emulate_coherent = !has_feature(feat_framebuffer_fetch_non_coherent); } key->invert_fs_origin = !sub_ctx->inverted_fbo_content; @@ -6454,6 +6453,7 @@ struct vrend_context *vrend_create_context(int id, uint32_t nlen, const char *de grctx->shader_cfg.has_conservative_depth = has_feature(feat_conservative_depth); grctx->shader_cfg.use_integer = vrend_state.use_integer; grctx->shader_cfg.has_dual_src_blend = has_feature(feat_dual_src_blend); + grctx->shader_cfg.has_fbfetch_coherent = has_feature(feat_framebuffer_fetch); vrend_renderer_create_sub_ctx(grctx, 0); vrend_renderer_set_sub_ctx(grctx, 0); diff --git a/src/vrend_shader.c b/src/vrend_shader.c index f7ace6f8b..6ebdfe339 100644 --- a/src/vrend_shader.c +++ b/src/vrend_shader.c @@ -5473,7 +5473,7 @@ static void emit_header(const struct dump_ctx *ctx, struct vrend_glsl_strbufs *g } if (logiop_require_inout(ctx->key)) { - if (ctx->key->fs_logicop_emulate_coherent) + if (ctx->cfg->has_fbfetch_coherent) emit_ext(glsl_strbufs, "EXT_shader_framebuffer_fetch", "require"); else emit_ext(glsl_strbufs, "EXT_shader_framebuffer_fetch_non_coherent", "require"); @@ -6398,7 +6398,7 @@ static void emit_ios_fs(const struct dump_ctx *ctx, emit_hdrf(glsl_strbufs, "%s fsout_tmp_c%d;\n", type, i); if (logiop_require_inout(ctx->key)) { - const char *noncoherent = ctx->key->fs_logicop_emulate_coherent ? "" : ", noncoherent"; + const char *noncoherent = ctx->cfg->has_fbfetch_coherent ? "" : ", noncoherent"; emit_hdrf(glsl_strbufs, "layout (location=%d%s) inout highp %s fsout_c%d;\n", i, noncoherent, type, i); } else emit_hdrf(glsl_strbufs, "layout (location=%d) out %s fsout_c%d;\n", i, diff --git a/src/vrend_shader.h b/src/vrend_shader.h index fd2356b7e..6b8503bfb 100644 --- a/src/vrend_shader.h +++ b/src/vrend_shader.h @@ -132,7 +132,6 @@ struct vrend_shader_key { bool flatshade; bool guest_sent_io_arrays; bool fs_logicop_enabled; - bool fs_logicop_emulate_coherent; enum pipe_logicop fs_logicop_func; uint8_t surface_component_bits[PIPE_MAX_COLOR_BUFS]; @@ -171,6 +170,7 @@ struct vrend_shader_cfg { bool has_conservative_depth; bool use_integer; bool has_dual_src_blend; + bool has_fbfetch_coherent; }; struct vrend_context; -- GitLab From c35df49c4243353872a6de956422e05b5be2e924 Mon Sep 17 00:00:00 2001 From: Gert Wollny Date: Wed, 17 Feb 2021 16:17:46 +0100 Subject: [PATCH 02/20] vrend: refactor shader key evaluation - don't write zeros, the memory is initialized to zero anyway - reorder evaluation to check whether it is a FS only once. Signed-off-by: Gert Wollny Reviewed-by: Rohan Garg --- src/vrend_renderer.c | 72 +++++++++++++++----------------------------- 1 file changed, 25 insertions(+), 47 deletions(-) diff --git a/src/vrend_renderer.c b/src/vrend_renderer.c index 7816d9571..06856e104 100644 --- a/src/vrend_renderer.c +++ b/src/vrend_renderer.c @@ -3303,10 +3303,10 @@ static inline void vrend_fill_shader_key(struct vrend_sub_context *sub_ctx, { unsigned type = sel->type; - if (vrend_state.use_core_profile == true) { + if (vrend_state.use_core_profile) { int i; bool add_alpha_test = true; - key->cbufs_are_a8_bitmask = 0; + // Only use integer info when drawing to avoid stale info. if (vrend_state.use_integer && sub_ctx->drawing) { key->attrib_signed_int_bitmask = sub_ctx->ve->signed_int_bitmask; @@ -3335,29 +3335,15 @@ static inline void vrend_fill_shader_key(struct vrend_sub_context *sub_ctx, key->clip_plane_enable = sub_ctx->rs_state.clip_plane_enable; key->flatshade = sub_ctx->rs_state.flatshade ? true : false; - } else { - key->add_alpha_test = 0; - key->pstipple_tex = 0; - } - - if (type == PIPE_SHADER_FRAGMENT && vrend_state.use_gles && can_emulate_logicop(sub_ctx->blend_state.logicop_func)) { - key->fs_logicop_enabled = sub_ctx->blend_state.logicop_enable; - key->fs_logicop_func = sub_ctx->blend_state.logicop_func; } key->invert_fs_origin = !sub_ctx->inverted_fbo_content; - if (type == PIPE_SHADER_FRAGMENT) - key->fs_swizzle_output_rgb_to_bgr = sub_ctx->swizzle_output_rgb_to_bgr; - - if (sub_ctx->shaders[PIPE_SHADER_GEOMETRY]) - key->gs_present = true; - if (sub_ctx->shaders[PIPE_SHADER_TESS_CTRL]) - key->tcs_present = true; - if (sub_ctx->shaders[PIPE_SHADER_TESS_EVAL]) - key->tes_present = true; + key->gs_present = !!sub_ctx->shaders[PIPE_SHADER_GEOMETRY]; + key->tcs_present = !!sub_ctx->shaders[PIPE_SHADER_TESS_CTRL]; + key->tes_present = !!sub_ctx->shaders[PIPE_SHADER_TESS_EVAL]; - int prev_type = -1; + int prev_type = type != PIPE_SHADER_VERTEX ? PIPE_SHADER_VERTEX : -1; /* Gallium sends and binds the shaders in the reverse order, so if an * old shader is still bound we should ignore the "previous" (as in @@ -3368,25 +3354,16 @@ static inline void vrend_fill_shader_key(struct vrend_sub_context *sub_ctx, case PIPE_SHADER_GEOMETRY: if (key->tcs_present || key->tes_present) prev_type = PIPE_SHADER_TESS_EVAL; - else - prev_type = PIPE_SHADER_VERTEX; break; case PIPE_SHADER_FRAGMENT: if (key->gs_present) prev_type = PIPE_SHADER_GEOMETRY; else if (key->tcs_present || key->tes_present) prev_type = PIPE_SHADER_TESS_EVAL; - else - prev_type = PIPE_SHADER_VERTEX; break; case PIPE_SHADER_TESS_EVAL: if (key->tcs_present) prev_type = PIPE_SHADER_TESS_CTRL; - else - prev_type = PIPE_SHADER_VERTEX; - break; - case PIPE_SHADER_TESS_CTRL: - prev_type = PIPE_SHADER_VERTEX; break; default: break; @@ -3407,9 +3384,17 @@ static inline void vrend_fill_shader_key(struct vrend_sub_context *sub_ctx, key->force_invariant_inputs = sub_ctx->shaders[prev_type]->sinfo.invariant_outputs; } - // Only use coord_replace if frag shader receives GL_POINTS + int next_type = -1; + if (type == PIPE_SHADER_FRAGMENT) { + key->fs_swizzle_output_rgb_to_bgr = sub_ctx->swizzle_output_rgb_to_bgr; + if (vrend_state.use_gles && can_emulate_logicop(sub_ctx->blend_state.logicop_func)) { + key->fs_logicop_enabled = sub_ctx->blend_state.logicop_enable; + key->fs_logicop_func = sub_ctx->blend_state.logicop_func; + } int fs_prim_mode = sub_ctx->prim_mode; // inherit draw-call's mode + + // Only use coord_replace if frag shader receives GL_POINTS switch (prev_type) { case PIPE_SHADER_TESS_EVAL: if (sub_ctx->shaders[PIPE_SHADER_TESS_EVAL]->sinfo.tes_point_mode) @@ -3424,9 +3409,16 @@ static inline void vrend_fill_shader_key(struct vrend_sub_context *sub_ctx, && key->fs_prim_is_points ? sub_ctx->rs_state.sprite_coord_enable : 0x0; - } + } else { + if (sub_ctx->shaders[PIPE_SHADER_FRAGMENT]) { + struct vrend_shader *fs = + sub_ctx->shaders[PIPE_SHADER_FRAGMENT]->current; + key->compiled_fs_uid = fs->uid; + key->fs_info = &fs->sel->sinfo; + next_type = PIPE_SHADER_FRAGMENT; + } + } - int next_type = -1; switch (type) { case PIPE_SHADER_VERTEX: if (key->tcs_present) @@ -3438,20 +3430,14 @@ static inline void vrend_fill_shader_key(struct vrend_sub_context *sub_ctx, next_type = PIPE_SHADER_TESS_EVAL; else next_type = PIPE_SHADER_TESS_CTRL; - } else - next_type = PIPE_SHADER_FRAGMENT; + } break; case PIPE_SHADER_TESS_CTRL: next_type = PIPE_SHADER_TESS_EVAL; break; - case PIPE_SHADER_GEOMETRY: - next_type = PIPE_SHADER_FRAGMENT; - break; case PIPE_SHADER_TESS_EVAL: if (key->gs_present) next_type = PIPE_SHADER_GEOMETRY; - else - next_type = PIPE_SHADER_FRAGMENT; default: break; } @@ -3462,14 +3448,6 @@ static inline void vrend_fill_shader_key(struct vrend_sub_context *sub_ctx, key->num_indirect_patch_outputs = sub_ctx->shaders[next_type]->sinfo.num_indirect_patch_inputs; key->generic_outputs_expected_mask = sub_ctx->shaders[next_type]->sinfo.generic_inputs_emitted_mask; } - - if (type != PIPE_SHADER_FRAGMENT && - sub_ctx->shaders[PIPE_SHADER_FRAGMENT]) { - struct vrend_shader *fs = - sub_ctx->shaders[PIPE_SHADER_FRAGMENT]->current; - key->compiled_fs_uid = fs->uid; - key->fs_info = &fs->sel->sinfo; - } } static int vrend_shader_create(struct vrend_context *ctx, -- GitLab From 4663b0b1089fbbc92b5b3489cf04b38dcc716dfd Mon Sep 17 00:00:00 2001 From: Gert Wollny Date: Wed, 17 Feb 2021 17:37:52 +0100 Subject: [PATCH 03/20] vrend: make shader interpinfo a fixed size array Use a bitfield to declare vrend_interp_info and make it a fixed size array. One one hand this avoids all the hassles with allocating and freeing memory, and it will make it possible to shrink the size of the data that is passed from the sinfo to the shader key. Signed-off-by: Gert Wollny Reviewed-by: Rohan Garg --- src/vrend_renderer.c | 1 - src/vrend_shader.c | 18 ++---------------- src/vrend_shader.h | 10 +++++----- 3 files changed, 7 insertions(+), 22 deletions(-) diff --git a/src/vrend_renderer.c b/src/vrend_renderer.c index 06856e104..019e4dee6 100644 --- a/src/vrend_renderer.c +++ b/src/vrend_renderer.c @@ -1139,7 +1139,6 @@ static void vrend_destroy_shader_selector(struct vrend_shader_selector *sel) free(sel->sinfo.so_names[i]); free(sel->tmp_buf); free(sel->sinfo.so_names); - free(sel->sinfo.interpinfo); free(sel->sinfo.sampler_arrays); free(sel->sinfo.image_arrays); free(sel->tokens); diff --git a/src/vrend_shader.c b/src/vrend_shader.c index 6ebdfe339..59f7dc519 100644 --- a/src/vrend_shader.c +++ b/src/vrend_shader.c @@ -6775,26 +6775,12 @@ static boolean fill_fragment_interpolants(const struct dump_ctx *ctx, struct vre static boolean fill_interpolants(const struct dump_ctx *ctx, struct vrend_shader_info *sinfo) { - boolean ret; - if (!ctx->num_interps) return true; if (ctx->prog_type == TGSI_PROCESSOR_VERTEX || ctx->prog_type == TGSI_PROCESSOR_GEOMETRY) return true; - free(sinfo->interpinfo); - sinfo->interpinfo = calloc(ctx->num_interps, sizeof(struct vrend_interp_info)); - if (!sinfo->interpinfo) - return false; - - ret = fill_fragment_interpolants(ctx, sinfo); - if (ret == false) - goto out_fail; - - return true; - out_fail: - free(sinfo->interpinfo); - return false; + return fill_fragment_interpolants(ctx, sinfo); } static boolean analyze_instruction(struct tgsi_iterate_context *iter, @@ -7128,7 +7114,7 @@ static bool vrend_patch_vertex_shader_interpolants(MAYBE_UNUSED const struct vre if (!vs_info || !fs_info) return true; - if (!fs_info->interpinfo) + if (!fs_info->num_interps) return true; if (fs_info->has_sample_input) { diff --git a/src/vrend_shader.h b/src/vrend_shader.h index 6b8503bfb..5ca624923 100644 --- a/src/vrend_shader.h +++ b/src/vrend_shader.h @@ -54,10 +54,10 @@ enum gl_advanced_blend_mode /* need to store patching info for interpolation */ struct vrend_interp_info { - int semantic_name; - int semantic_index; - int interpolate; - unsigned location; + unsigned semantic_name : 6; + unsigned semantic_index : 16; + unsigned interpolate : 3; + unsigned location : 3; }; struct vrend_array { @@ -112,7 +112,7 @@ struct vrend_shader_info { struct pipe_stream_output_info so_info; - struct vrend_interp_info *interpinfo; + struct vrend_interp_info interpinfo[PIPE_MAX_SHADER_INPUTS]; char **so_names; uint64_t invariant_outputs; }; -- GitLab From e91d27e146a07b2c1185226411645f5a609d1016 Mon Sep 17 00:00:00 2001 From: Gert Wollny Date: Wed, 17 Feb 2021 18:26:02 +0100 Subject: [PATCH 04/20] shader: Fix copying the generic and patch output layouts The info should not be overwritten by other outputs. Signed-off-by: Gert Wollny Reviewed-by: Rohan Garg --- src/vrend_shader.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/vrend_shader.c b/src/vrend_shader.c index 59f7dc519..20ed0da26 100644 --- a/src/vrend_shader.c +++ b/src/vrend_shader.c @@ -6867,13 +6867,13 @@ static void fill_sinfo(const struct dump_ctx *ctx, struct vrend_shader_info *sin sinfo->guest_sent_io_arrays = ctx->guest_sent_io_arrays; sinfo->num_generic_and_patch_outputs = 0; for(unsigned i = 0; i < ctx->num_outputs; i++) { + if (ctx->outputs[i].name == TGSI_SEMANTIC_GENERIC || ctx->outputs[i].name == TGSI_SEMANTIC_PATCH) { sinfo->generic_outputs_layout[sinfo->num_generic_and_patch_outputs].name = ctx->outputs[i].name; sinfo->generic_outputs_layout[sinfo->num_generic_and_patch_outputs].sid = ctx->outputs[i].sid; sinfo->generic_outputs_layout[sinfo->num_generic_and_patch_outputs].location = ctx->outputs[i].layout_location; sinfo->generic_outputs_layout[sinfo->num_generic_and_patch_outputs].array_id = ctx->outputs[i].array_id; sinfo->generic_outputs_layout[sinfo->num_generic_and_patch_outputs].usage_mask = ctx->outputs[i].usage_mask; - if (ctx->outputs[i].name == TGSI_SEMANTIC_GENERIC || ctx->outputs[i].name == TGSI_SEMANTIC_PATCH) { - sinfo->num_generic_and_patch_outputs++; + sinfo->num_generic_and_patch_outputs++; } } -- GitLab From eb3114e9657341d724fe3398239dccf25df6a07a Mon Sep 17 00:00:00 2001 From: Gert Wollny Date: Wed, 17 Feb 2021 18:28:55 +0100 Subject: [PATCH 05/20] shader: factor out the fs_info that is needed in the shader key Keeping all the information in one place might come in handy when we want to further refactor this. On the way also compress the structure. Signed-off-by: Gert Wollny Reviewed-by: Rohan Garg --- src/vrend_renderer.c | 2 +- src/vrend_shader.c | 22 +++++++++++----------- src/vrend_shader.h | 13 +++++++++---- 3 files changed, 21 insertions(+), 16 deletions(-) diff --git a/src/vrend_renderer.c b/src/vrend_renderer.c index 019e4dee6..b17d29356 100644 --- a/src/vrend_renderer.c +++ b/src/vrend_renderer.c @@ -3413,7 +3413,7 @@ static inline void vrend_fill_shader_key(struct vrend_sub_context *sub_ctx, struct vrend_shader *fs = sub_ctx->shaders[PIPE_SHADER_FRAGMENT]->current; key->compiled_fs_uid = fs->uid; - key->fs_info = &fs->sel->sinfo; + key->fs_info = &fs->sel->sinfo.fs_info; next_type = PIPE_SHADER_FRAGMENT; } } diff --git a/src/vrend_shader.c b/src/vrend_shader.c index 20ed0da26..9b797ff25 100644 --- a/src/vrend_shader.c +++ b/src/vrend_shader.c @@ -6748,7 +6748,7 @@ static int emit_ios(const struct dump_ctx *ctx, return glsl_ver_required; } -static boolean fill_fragment_interpolants(const struct dump_ctx *ctx, struct vrend_shader_info *sinfo) +static boolean fill_fragment_interpolants(const struct dump_ctx *ctx, struct vrend_fs_shader_info *fs_info) { uint32_t i, index = 0; @@ -6764,10 +6764,10 @@ static boolean fill_fragment_interpolants(const struct dump_ctx *ctx, struct vre vrend_printf( "mismatch in number of interps %d %d\n", index, ctx->num_interps); return true; } - sinfo->interpinfo[index].semantic_name = ctx->inputs[i].name; - sinfo->interpinfo[index].semantic_index = ctx->inputs[i].sid; - sinfo->interpinfo[index].interpolate = ctx->inputs[i].interpolate; - sinfo->interpinfo[index].location = ctx->inputs[i].location; + fs_info->interpinfo[index].semantic_name = ctx->inputs[i].name; + fs_info->interpinfo[index].semantic_index = ctx->inputs[i].sid; + fs_info->interpinfo[index].interpolate = ctx->inputs[i].interpolate; + fs_info->interpinfo[index].location = ctx->inputs[i].location; index++; } return true; @@ -6780,7 +6780,7 @@ static boolean fill_interpolants(const struct dump_ctx *ctx, struct vrend_shader if (ctx->prog_type == TGSI_PROCESSOR_VERTEX || ctx->prog_type == TGSI_PROCESSOR_GEOMETRY) return true; - return fill_fragment_interpolants(ctx, sinfo); + return fill_fragment_interpolants(ctx, &sinfo->fs_info); } static boolean analyze_instruction(struct tgsi_iterate_context *iter, @@ -6819,7 +6819,7 @@ static void fill_sinfo(const struct dump_ctx *ctx, struct vrend_shader_info *sin { sinfo->num_ucp = ctx->key->clip_plane_enable ? 8 : 0; sinfo->has_pervertex_in = ctx->has_pervertex; - sinfo->has_sample_input = ctx->has_sample_input; + sinfo->fs_info.has_sample_input = ctx->has_sample_input; bool has_prop = (ctx->num_clip_dist_prop + ctx->num_cull_dist_prop) > 0; sinfo->num_clip_out = has_prop ? ctx->num_clip_dist_prop : (ctx->num_clip_dist ? ctx->num_clip_dist : 8); sinfo->num_cull_out = has_prop ? ctx->num_cull_dist_prop : 0; @@ -6843,10 +6843,10 @@ static void fill_sinfo(const struct dump_ctx *ctx, struct vrend_shader_info *sin sinfo->num_indirect_patch_outputs = ctx->patch_ios.output_range.io.last - ctx->patch_ios.output_range.io.sid + 1; sinfo->num_inputs = ctx->num_inputs; - sinfo->num_interps = ctx->num_interps; + sinfo->fs_info.num_interps = ctx->num_interps; sinfo->num_outputs = ctx->num_outputs; sinfo->shadow_samp_mask = ctx->shadow_samp_mask; - sinfo->glsl_ver = ctx->glsl_ver_required; + sinfo->fs_info.glsl_ver = ctx->glsl_ver_required; sinfo->gs_out_prim = ctx->gs_out_prim; sinfo->tes_prim = ctx->tes_prim_mode; sinfo->tes_point_mode = ctx->tes_point_mode; @@ -6927,7 +6927,7 @@ static bool vrend_patch_vertex_shader_interpolants(MAYBE_UNUSED const struct vre const struct vrend_shader_cfg *cfg, struct vrend_strarray *prog_strings, const struct vrend_shader_info *vs_info, - const struct vrend_shader_info *fs_info, + const struct vrend_fs_shader_info *fs_info, const char *oprefix, bool flatshade); @@ -7105,7 +7105,7 @@ static bool vrend_patch_vertex_shader_interpolants(MAYBE_UNUSED const struct vre const struct vrend_shader_cfg *cfg, struct vrend_strarray *prog_strings, const struct vrend_shader_info *vs_info, - const struct vrend_shader_info *fs_info, + const struct vrend_fs_shader_info *fs_info, const char *oprefix, bool flatshade) { int i; diff --git a/src/vrend_shader.h b/src/vrend_shader.h index 5ca624923..f1a52a6af 100644 --- a/src/vrend_shader.h +++ b/src/vrend_shader.h @@ -73,6 +73,13 @@ struct vrend_layout_info { int usage_mask; }; +struct vrend_fs_shader_info { + int num_interps; + int glsl_ver; + bool has_sample_input; + struct vrend_interp_info interpinfo[PIPE_MAX_SHADER_INPUTS]; +}; + struct vrend_shader_info { uint32_t samplers_used_mask; uint32_t images_used_mask; @@ -84,7 +91,6 @@ struct vrend_shader_info { struct vrend_layout_info generic_outputs_layout[64]; int num_consts; int num_inputs; - int num_interps; int num_outputs; bool ubo_indirect; uint8_t num_indirect_generic_outputs; @@ -93,8 +99,6 @@ struct vrend_shader_info { uint8_t num_indirect_patch_inputs; uint32_t generic_inputs_emitted_mask; int num_ucp; - int glsl_ver; - bool has_sample_input; uint8_t num_clip_out; uint8_t num_cull_out; uint32_t shadow_samp_mask; @@ -112,7 +116,8 @@ struct vrend_shader_info { struct pipe_stream_output_info so_info; - struct vrend_interp_info interpinfo[PIPE_MAX_SHADER_INPUTS]; + struct vrend_fs_shader_info fs_info; + char **so_names; uint64_t invariant_outputs; }; -- GitLab From 15edd6cbcef020ee49581981553cc85aba183334 Mon Sep 17 00:00:00 2001 From: Gert Wollny Date: Wed, 17 Feb 2021 18:34:27 +0100 Subject: [PATCH 06/20] shader: compress the shader key members Signed-off-by: Gert Wollny Reviewed-by: Rohan Garg --- src/vrend_shader.h | 63 +++++++++++++++++++++++----------------------- 1 file changed, 32 insertions(+), 31 deletions(-) diff --git a/src/vrend_shader.h b/src/vrend_shader.h index f1a52a6af..659d080ca 100644 --- a/src/vrend_shader.h +++ b/src/vrend_shader.h @@ -66,11 +66,11 @@ struct vrend_array { }; struct vrend_layout_info { - unsigned name; - int sid; - int location; - int array_id; - int usage_mask; + unsigned name : 6; + unsigned sid : 16 ; + unsigned location : 16 ; + unsigned array_id : 16 ; + unsigned usage_mask : 5; }; struct vrend_fs_shader_info { @@ -123,44 +123,45 @@ struct vrend_shader_info { }; struct vrend_shader_key { - bool fs_prim_is_points; - uint32_t coord_replace; - bool invert_fs_origin; - bool pstipple_tex; - bool add_alpha_test; - bool color_two_side; - uint8_t alpha_test; - uint8_t clip_plane_enable; - bool gs_present; - bool tcs_present; - bool tes_present; - bool flatshade; - bool guest_sent_io_arrays; - bool fs_logicop_enabled; - enum pipe_logicop fs_logicop_func; - uint8_t surface_component_bits[PIPE_MAX_COLOR_BUFS]; + uint64_t force_invariant_inputs; - uint32_t num_prev_generic_and_patch_outputs; - struct vrend_layout_info prev_stage_generic_and_patch_outputs_layout[64]; + struct vrend_fs_shader_info *fs_info; - uint8_t prev_stage_num_clip_out; - uint8_t prev_stage_num_cull_out; - bool next_stage_pervertex_in; + uint32_t coord_replace; + uint32_t num_prev_generic_and_patch_outputs; uint32_t cbufs_are_a8_bitmask; uint32_t cbufs_signed_int_bitmask; uint32_t cbufs_unsigned_int_bitmask; uint32_t attrib_signed_int_bitmask; uint32_t attrib_unsigned_int_bitmask; + uint32_t generic_outputs_expected_mask; + uint32_t compiled_fs_uid; + + uint32_t fs_prim_is_points : 1; + uint32_t invert_fs_origin : 1; + uint32_t pstipple_tex : 1; + uint32_t add_alpha_test : 1; + uint32_t color_two_side : 1; + uint32_t gs_present : 1; + uint32_t tcs_present : 1; + uint32_t tes_present : 1; + uint32_t flatshade : 1; + uint32_t guest_sent_io_arrays : 1; + uint32_t fs_logicop_enabled : 1; + uint32_t next_stage_pervertex_in : 1; + uint32_t fs_logicop_func : 4; + + uint8_t alpha_test; + uint8_t clip_plane_enable; + uint8_t prev_stage_num_clip_out; + uint8_t prev_stage_num_cull_out; uint8_t num_indirect_generic_outputs; uint8_t num_indirect_patch_outputs; uint8_t num_indirect_generic_inputs; uint8_t num_indirect_patch_inputs; - uint32_t generic_outputs_expected_mask; uint8_t fs_swizzle_output_rgb_to_bgr; - uint64_t force_invariant_inputs; - - uint32_t compiled_fs_uid; - struct vrend_shader_info *fs_info; + uint8_t surface_component_bits[PIPE_MAX_COLOR_BUFS]; + struct vrend_layout_info prev_stage_generic_and_patch_outputs_layout[64]; }; struct vrend_shader_cfg { -- GitLab From ff3a93637a30a2e3cc18ebbc976ff44426cb12f6 Mon Sep 17 00:00:00 2001 From: Gert Wollny Date: Wed, 17 Feb 2021 18:34:54 +0100 Subject: [PATCH 07/20] vrend: copy only as many layout info fields as needed Signed-off-by: Gert Wollny Reviewed-by: Rohan Garg --- src/vrend_renderer.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/vrend_renderer.c b/src/vrend_renderer.c index b17d29356..53124a06a 100644 --- a/src/vrend_renderer.c +++ b/src/vrend_renderer.c @@ -3379,7 +3379,7 @@ static inline void vrend_fill_shader_key(struct vrend_sub_context *sub_ctx, memcpy(key->prev_stage_generic_and_patch_outputs_layout, sub_ctx->shaders[prev_type]->sinfo.generic_outputs_layout, - 64 * sizeof (struct vrend_layout_info)); + sub_ctx->shaders[prev_type]->sinfo.num_generic_and_patch_outputs * sizeof (struct vrend_layout_info)); key->force_invariant_inputs = sub_ctx->shaders[prev_type]->sinfo.invariant_outputs; } -- GitLab From 47c899764a357e987ff37ed2e2bc652f8fe11d4b Mon Sep 17 00:00:00 2001 From: Gert Wollny Date: Wed, 17 Feb 2021 18:45:19 +0100 Subject: [PATCH 08/20] shader: reorder shader info structure to improve alignment of elements Signed-off-by: Gert Wollny --- src/vrend_shader.c | 2 +- src/vrend_shader.h | 47 +++++++++++++++++++++++----------------------- 2 files changed, 24 insertions(+), 25 deletions(-) diff --git a/src/vrend_shader.c b/src/vrend_shader.c index 9b797ff25..3de31c251 100644 --- a/src/vrend_shader.c +++ b/src/vrend_shader.c @@ -6830,7 +6830,7 @@ static void fill_sinfo(const struct dump_ctx *ctx, struct vrend_shader_info *sin sinfo->ssbo_used_mask = ctx->ssbo_used_mask; - sinfo->ubo_indirect = ctx->info.dimension_indirect_files & (1 << TGSI_FILE_CONSTANT); + sinfo->ubo_indirect = !!(ctx->info.dimension_indirect_files & (1 << TGSI_FILE_CONSTANT)); if (ctx->generic_ios.input_range.used) sinfo->num_indirect_generic_inputs = ctx->generic_ios.input_range.io.last - ctx->generic_ios.input_range.io.sid + 1; diff --git a/src/vrend_shader.h b/src/vrend_shader.h index 659d080ca..229b2b6cf 100644 --- a/src/vrend_shader.h +++ b/src/vrend_shader.h @@ -81,45 +81,44 @@ struct vrend_fs_shader_info { }; struct vrend_shader_info { + uint64_t invariant_outputs; + struct vrend_layout_info generic_outputs_layout[64]; + struct vrend_array *sampler_arrays; + struct vrend_array *image_arrays; + char **so_names; + struct vrend_fs_shader_info fs_info; + struct pipe_stream_output_info so_info; + uint32_t samplers_used_mask; uint32_t images_used_mask; uint32_t ubo_used_mask; uint32_t ssbo_used_mask; uint32_t num_generic_and_patch_outputs; - bool has_pervertex_in; - bool guest_sent_io_arrays; - struct vrend_layout_info generic_outputs_layout[64]; + uint32_t generic_inputs_emitted_mask; + uint32_t shadow_samp_mask; + uint32_t attrib_input_mask; + uint32_t fs_blend_equation_advanced; + int num_consts; int num_inputs; int num_outputs; - bool ubo_indirect; - uint8_t num_indirect_generic_outputs; - uint8_t num_indirect_patch_outputs; - uint8_t num_indirect_generic_inputs; - uint8_t num_indirect_patch_inputs; - uint32_t generic_inputs_emitted_mask; int num_ucp; - uint8_t num_clip_out; - uint8_t num_cull_out; - uint32_t shadow_samp_mask; int gs_out_prim; int tes_prim; - bool tes_point_mode; - uint32_t attrib_input_mask; - uint32_t fs_blend_equation_advanced; - - struct vrend_array *sampler_arrays; int num_sampler_arrays; - - struct vrend_array *image_arrays; int num_image_arrays; - struct pipe_stream_output_info so_info; - - struct vrend_fs_shader_info fs_info; + uint8_t num_indirect_generic_outputs; + uint8_t num_indirect_patch_outputs; + uint8_t num_indirect_generic_inputs; + uint8_t num_indirect_patch_inputs; + uint8_t num_clip_out; + uint8_t num_cull_out; - char **so_names; - uint64_t invariant_outputs; + uint8_t has_pervertex_in : 1; + uint8_t guest_sent_io_arrays : 1; + uint8_t ubo_indirect : 1; + uint8_t tes_point_mode : 1; }; struct vrend_shader_key { -- GitLab From 729a81927490c402ec02b2d49e950d4c5cb4b539 Mon Sep 17 00:00:00 2001 From: Gert Wollny Date: Wed, 17 Feb 2021 19:32:02 +0100 Subject: [PATCH 09/20] vrend/shader: combine shader info to be passed to next into a structure In addition combine values into a bitfield so that the compiler can use a 64 bit move instead of individual moves. Signed-off-by: Gert Wollny Reviewed-by: Rohan Garg --- src/vrend_renderer.c | 16 +++----- src/vrend_shader.c | 88 ++++++++++++++++++++++---------------------- src/vrend_shader.h | 26 +++++++------ 3 files changed, 64 insertions(+), 66 deletions(-) diff --git a/src/vrend_renderer.c b/src/vrend_renderer.c index 53124a06a..360201dd3 100644 --- a/src/vrend_renderer.c +++ b/src/vrend_renderer.c @@ -3369,18 +3369,14 @@ static inline void vrend_fill_shader_key(struct vrend_sub_context *sub_ctx, } } - if (prev_type != -1 && sub_ctx->shaders[prev_type]) { - key->prev_stage_num_clip_out = sub_ctx->shaders[prev_type]->sinfo.num_clip_out; - key->prev_stage_num_cull_out = sub_ctx->shaders[prev_type]->sinfo.num_cull_out; - key->num_indirect_generic_inputs = sub_ctx->shaders[prev_type]->sinfo.num_indirect_generic_outputs; - key->num_indirect_patch_inputs = sub_ctx->shaders[prev_type]->sinfo.num_indirect_patch_outputs; - key->num_prev_generic_and_patch_outputs = sub_ctx->shaders[prev_type]->sinfo.num_generic_and_patch_outputs; - key->guest_sent_io_arrays = sub_ctx->shaders[prev_type]->sinfo.guest_sent_io_arrays; + struct vrend_shader_selector *prev = sub_ctx->shaders[prev_type]; + if (prev_type != -1 && prev) { + key->input = prev->sinfo.out; + key->force_invariant_inputs = prev->sinfo.invariant_outputs; memcpy(key->prev_stage_generic_and_patch_outputs_layout, - sub_ctx->shaders[prev_type]->sinfo.generic_outputs_layout, - sub_ctx->shaders[prev_type]->sinfo.num_generic_and_patch_outputs * sizeof (struct vrend_layout_info)); - key->force_invariant_inputs = sub_ctx->shaders[prev_type]->sinfo.invariant_outputs; + prev->sinfo.generic_outputs_layout, + prev->sinfo.out.num_generic_and_patch * sizeof (struct vrend_layout_info)); } int next_type = -1; diff --git a/src/vrend_shader.c b/src/vrend_shader.c index 3de31c251..5c1dba9ac 100644 --- a/src/vrend_shader.c +++ b/src/vrend_shader.c @@ -2891,9 +2891,9 @@ create_swizzled_clipdist(const struct dump_ctx *ctx, char clip_indirect[32] = ""; - bool has_prev_vals = (ctx->key->prev_stage_num_cull_out + ctx->key->prev_stage_num_clip_out) > 0; - int num_culls = has_prev_vals ? ctx->key->prev_stage_num_cull_out : 0; - int num_clips = has_prev_vals ? ctx->key->prev_stage_num_clip_out : ctx->num_in_clip_dist; + bool has_prev_vals = (ctx->key->input.num_cull + ctx->key->input.num_clip) > 0; + int num_culls = has_prev_vals ? ctx->key->input.num_cull : 0; + int num_clips = has_prev_vals ? ctx->key->input.num_clip : ctx->num_in_clip_dist; int base_idx = ctx->inputs[input_idx].sid * 4; /* With arrays enabled , but only when gl_ClipDistance or gl_CullDistance are emitted (>4) @@ -2920,12 +2920,12 @@ create_swizzled_clipdist(const struct dump_ctx *ctx, idx -= num_clips; cc_name = "gl_CullDistance"; } - if (ctx->key->prev_stage_num_cull_out) - if (idx >= ctx->key->prev_stage_num_cull_out) + if (ctx->key->input.num_cull) + if (idx >= ctx->key->input.num_cull) idx = 0; } else { - if (ctx->key->prev_stage_num_clip_out) - if (idx >= ctx->key->prev_stage_num_clip_out) + if (ctx->key->input.num_clip) + if (idx >= ctx->key->input.num_clip) idx = 0; } if (gl_in) @@ -4368,8 +4368,8 @@ static void rewrite_io_ranged(struct dump_ctx *ctx) { if ((ctx->info.indirect_files & (1 << TGSI_FILE_INPUT)) || - ctx->key->num_indirect_generic_inputs || - ctx->key->num_indirect_patch_inputs) { + ctx->key->input.num_indirect_generic || + ctx->key->input.num_indirect_patch) { for (uint i = 0; i < ctx->num_inputs; ++i) { if (ctx->inputs[i].name == TGSI_SEMANTIC_PATCH) { @@ -4402,10 +4402,10 @@ void rewrite_io_ranged(struct dump_ctx *ctx) ctx->generic_ios.input_range.io.last = ctx->inputs[i].sid; } - if (ctx->key->num_indirect_generic_inputs > 0) - ctx->generic_ios.input_range.io.last = ctx->generic_ios.input_range.io.sid + ctx->key->num_indirect_generic_inputs - 1; - if (ctx->key->num_indirect_patch_inputs > 0) - ctx->patch_ios.input_range.io.last = ctx->patch_ios.input_range.io.sid + ctx->key->num_indirect_patch_inputs - 1; + if (ctx->key->input.num_indirect_generic > 0) + ctx->generic_ios.input_range.io.last = ctx->generic_ios.input_range.io.sid + ctx->key->input.num_indirect_generic - 1; + if (ctx->key->input.num_indirect_patch > 0) + ctx->patch_ios.input_range.io.last = ctx->patch_ios.input_range.io.sid + ctx->key->input.num_indirect_patch - 1; } snprintf(ctx->patch_ios.input_range.io.glsl_name, 64, "%s_p%d", get_stage_input_name_prefix(ctx, ctx->prog_type), ctx->patch_ios.input_range.io.sid); @@ -4571,7 +4571,7 @@ void emit_fs_clipdistance_load(const struct dump_ctx *ctx, if (!ctx->fs_uses_clipdist_input) return; - int prev_num = ctx->key->prev_stage_num_clip_out + ctx->key->prev_stage_num_cull_out; + int prev_num = ctx->key->input.num_clip + ctx->key->input.num_cull; int ndists; const char *prefix=""; @@ -4595,12 +4595,12 @@ void emit_fs_clipdistance_load(const struct dump_ctx *ctx, } bool is_cull = false; if (prev_num > 0) { - if (i >= ctx->key->prev_stage_num_clip_out && i < prev_num) + if (i >= ctx->key->input.num_clip && i < prev_num) is_cull = true; } const char *clip_cull = is_cull ? "Cull" : "Clip"; emit_buff(glsl_strbufs, "clip_dist_temp[%d].%c = %sgl_%sDistance[%d];\n", clipidx, wm, prefix, clip_cull, - is_cull ? i - ctx->key->prev_stage_num_clip_out : i); + is_cull ? i - ctx->key->input.num_clip : i); } } @@ -4627,7 +4627,7 @@ static bool apply_prev_layout(const struct vrend_shader_key *key, if (io->name == TGSI_SEMANTIC_GENERIC || io->name == TGSI_SEMANTIC_PATCH) { const struct vrend_layout_info *layout = key->prev_stage_generic_and_patch_outputs_layout; - for (unsigned generic_index = 0; generic_index < key->num_prev_generic_and_patch_outputs; ++generic_index, ++layout) { + for (unsigned generic_index = 0; generic_index < key->input.num_generic_and_patch; ++generic_index, ++layout) { bool already_found_one = false; @@ -5524,7 +5524,7 @@ static void emit_header(const struct dump_ctx *ctx, struct vrend_glsl_strbufs *g if (ctx->ubo_used_mask) emit_ext(glsl_strbufs, "ARB_uniform_buffer_object", "require"); - if (ctx->num_cull_dist_prop || ctx->key->prev_stage_num_cull_out) + if (ctx->num_cull_dist_prop || ctx->key->input.num_cull) emit_ext(glsl_strbufs, "ARB_cull_distance", "require"); if (ctx->ssbo_used_mask) emit_ext(glsl_strbufs, "ARB_shader_storage_buffer_object", "require"); @@ -6014,10 +6014,10 @@ static void emit_ios_indirect_generics_input(const struct dump_ctx *ctx, if (ctx->generic_ios.input_range.used) { int size = ctx->generic_ios.input_range.io.last - ctx->generic_ios.input_range.io.sid + 1; assert(size < 256 && size >= 0); - if (size < ctx->key->num_indirect_generic_inputs) { + if (size < ctx->key->input.num_indirect_generic) { VREND_DEBUG(dbg_shader, NULL, "WARNING: shader key indicates less indirect inputs" " (%d) then are actually used (%d)\n", - ctx->key->num_indirect_generic_inputs, size); + ctx->key->input.num_indirect_generic, size); } if (prefer_generic_io_block(ctx, io_in)) { @@ -6436,14 +6436,14 @@ static void emit_ios_fs(const struct dump_ctx *ctx, } if (ctx->num_in_clip_dist) { - if (ctx->key->prev_stage_num_clip_out) { - emit_hdrf(glsl_strbufs, "in float gl_ClipDistance[%d];\n", ctx->key->prev_stage_num_clip_out); - } else if (ctx->num_in_clip_dist > 4 && !ctx->key->prev_stage_num_cull_out) { + if (ctx->key->input.num_clip) { + emit_hdrf(glsl_strbufs, "in float gl_ClipDistance[%d];\n", ctx->key->input.num_clip); + } else if (ctx->num_in_clip_dist > 4 && !ctx->key->input.num_cull) { emit_hdrf(glsl_strbufs, "in float gl_ClipDistance[%d];\n", ctx->num_in_clip_dist); } - if (ctx->key->prev_stage_num_cull_out) { - emit_hdrf(glsl_strbufs, "in float gl_CullDistance[%d];\n", ctx->key->prev_stage_num_cull_out); + if (ctx->key->input.num_cull) { + emit_hdrf(glsl_strbufs, "in float gl_CullDistance[%d];\n", ctx->key->input.num_cull); } if(ctx->fs_uses_clipdist_input) emit_hdr(glsl_strbufs, "vec4 clip_dist_temp[2];\n"); @@ -6515,8 +6515,8 @@ static void emit_ios_geom(const struct dump_ctx *ctx, char clip_var[64] = ""; char cull_var[64] = ""; - clip_dist = ctx->key->prev_stage_num_clip_out ? ctx->key->prev_stage_num_clip_out : ctx->num_in_clip_dist; - cull_dist = ctx->key->prev_stage_num_cull_out; + clip_dist = ctx->key->input.num_clip ? ctx->key->input.num_clip : ctx->num_in_clip_dist; + cull_dist = ctx->key->input.num_cull; if (clip_dist) snprintf(clip_var, 64, "float gl_ClipDistance[%d];\n", clip_dist); @@ -6592,8 +6592,8 @@ static void emit_ios_tcs(const struct dump_ctx *ctx, int clip_dist, cull_dist; char clip_var[64] = "", cull_var[64] = ""; - clip_dist = ctx->key->prev_stage_num_clip_out ? ctx->key->prev_stage_num_clip_out : ctx->num_in_clip_dist; - cull_dist = ctx->key->prev_stage_num_cull_out; + clip_dist = ctx->key->input.num_clip ? ctx->key->input.num_clip : ctx->num_in_clip_dist; + cull_dist = ctx->key->input.num_cull; if (clip_dist) snprintf(clip_var, 64, "float gl_ClipDistance[%d];\n", clip_dist); @@ -6651,8 +6651,8 @@ static void emit_ios_tes(const struct dump_ctx *ctx, int clip_dist, cull_dist; char clip_var[64] = "", cull_var[64] = ""; - clip_dist = ctx->key->prev_stage_num_clip_out ? ctx->key->prev_stage_num_clip_out : ctx->num_in_clip_dist; - cull_dist = ctx->key->prev_stage_num_cull_out; + clip_dist = ctx->key->input.num_clip ? ctx->key->input.num_clip : ctx->num_in_clip_dist; + cull_dist = ctx->key->input.num_cull; if (clip_dist) snprintf(clip_var, 64, "float gl_ClipDistance[%d];\n", clip_dist); @@ -6821,8 +6821,8 @@ static void fill_sinfo(const struct dump_ctx *ctx, struct vrend_shader_info *sin sinfo->has_pervertex_in = ctx->has_pervertex; sinfo->fs_info.has_sample_input = ctx->has_sample_input; bool has_prop = (ctx->num_clip_dist_prop + ctx->num_cull_dist_prop) > 0; - sinfo->num_clip_out = has_prop ? ctx->num_clip_dist_prop : (ctx->num_clip_dist ? ctx->num_clip_dist : 8); - sinfo->num_cull_out = has_prop ? ctx->num_cull_dist_prop : 0; + sinfo->out.num_clip = has_prop ? ctx->num_clip_dist_prop : (ctx->num_clip_dist ? ctx->num_clip_dist : 8); + sinfo->out.num_cull = has_prop ? ctx->num_cull_dist_prop : 0; sinfo->samplers_used_mask = ctx->samplers_used; sinfo->images_used_mask = ctx->images_used_mask; sinfo->num_consts = ctx->num_consts; @@ -6838,9 +6838,9 @@ static void fill_sinfo(const struct dump_ctx *ctx, struct vrend_shader_info *sin sinfo->num_indirect_patch_inputs = ctx->patch_ios.input_range.io.last - ctx->patch_ios.input_range.io.sid + 1; if (ctx->generic_ios.output_range.used) - sinfo->num_indirect_generic_outputs = ctx->generic_ios.output_range.io.last - ctx->generic_ios.output_range.io.sid + 1; + sinfo->out.num_indirect_generic = ctx->generic_ios.output_range.io.last - ctx->generic_ios.output_range.io.sid + 1; if (ctx->patch_ios.output_range.used) - sinfo->num_indirect_patch_outputs = ctx->patch_ios.output_range.io.last - ctx->patch_ios.output_range.io.sid + 1; + sinfo->out.num_indirect_patch = ctx->patch_ios.output_range.io.last - ctx->patch_ios.output_range.io.sid + 1; sinfo->num_inputs = ctx->num_inputs; sinfo->fs_info.num_interps = ctx->num_interps; @@ -6864,16 +6864,16 @@ static void fill_sinfo(const struct dump_ctx *ctx, struct vrend_shader_info *sin * to the next shader stage. mesa/tgsi doesn't provide this information for * TCS, TES, and GEOM shaders. */ - sinfo->guest_sent_io_arrays = ctx->guest_sent_io_arrays; - sinfo->num_generic_and_patch_outputs = 0; + sinfo->out.guest_sent_io_arrays = ctx->guest_sent_io_arrays; + sinfo->out.num_generic_and_patch = 0; for(unsigned i = 0; i < ctx->num_outputs; i++) { if (ctx->outputs[i].name == TGSI_SEMANTIC_GENERIC || ctx->outputs[i].name == TGSI_SEMANTIC_PATCH) { - sinfo->generic_outputs_layout[sinfo->num_generic_and_patch_outputs].name = ctx->outputs[i].name; - sinfo->generic_outputs_layout[sinfo->num_generic_and_patch_outputs].sid = ctx->outputs[i].sid; - sinfo->generic_outputs_layout[sinfo->num_generic_and_patch_outputs].location = ctx->outputs[i].layout_location; - sinfo->generic_outputs_layout[sinfo->num_generic_and_patch_outputs].array_id = ctx->outputs[i].array_id; - sinfo->generic_outputs_layout[sinfo->num_generic_and_patch_outputs].usage_mask = ctx->outputs[i].usage_mask; - sinfo->num_generic_and_patch_outputs++; + sinfo->generic_outputs_layout[sinfo->out.num_generic_and_patch].name = ctx->outputs[i].name; + sinfo->generic_outputs_layout[sinfo->out.num_generic_and_patch].sid = ctx->outputs[i].sid; + sinfo->generic_outputs_layout[sinfo->out.num_generic_and_patch].location = ctx->outputs[i].layout_location; + sinfo->generic_outputs_layout[sinfo->out.num_generic_and_patch].array_id = ctx->outputs[i].array_id; + sinfo->generic_outputs_layout[sinfo->out.num_generic_and_patch].usage_mask = ctx->outputs[i].usage_mask; + sinfo->out.num_generic_and_patch++; } } @@ -6971,7 +6971,7 @@ bool vrend_convert_shader(const struct vrend_context *rctx, ctx.ssbo_atomic_array_base = 0xffffffff; ctx.has_sample_input = false; ctx.req_local_mem = req_local_mem; - ctx.guest_sent_io_arrays = key->guest_sent_io_arrays; + ctx.guest_sent_io_arrays = key->input.guest_sent_io_arrays; ctx.generic_ios.outputs_expected_mask = key->generic_outputs_expected_mask; tgsi_scan_shader(tokens, &ctx.info); diff --git a/src/vrend_shader.h b/src/vrend_shader.h index 229b2b6cf..b858563e4 100644 --- a/src/vrend_shader.h +++ b/src/vrend_shader.h @@ -80,8 +80,19 @@ struct vrend_fs_shader_info { struct vrend_interp_info interpinfo[PIPE_MAX_SHADER_INPUTS]; }; +struct vrend_shader_info_out { + uint64_t num_clip : 8; + uint64_t num_cull : 8; + uint64_t num_indirect_generic : 8; + uint64_t num_indirect_patch : 8; + uint64_t num_generic_and_patch : 8; + uint64_t guest_sent_io_arrays : 1; +}; + struct vrend_shader_info { uint64_t invariant_outputs; + struct vrend_shader_info_out out; + struct vrend_layout_info generic_outputs_layout[64]; struct vrend_array *sampler_arrays; struct vrend_array *image_arrays; @@ -93,7 +104,6 @@ struct vrend_shader_info { uint32_t images_used_mask; uint32_t ubo_used_mask; uint32_t ssbo_used_mask; - uint32_t num_generic_and_patch_outputs; uint32_t generic_inputs_emitted_mask; uint32_t shadow_samp_mask; uint32_t attrib_input_mask; @@ -108,15 +118,12 @@ struct vrend_shader_info { int num_sampler_arrays; int num_image_arrays; - uint8_t num_indirect_generic_outputs; - uint8_t num_indirect_patch_outputs; + uint8_t num_indirect_generic_inputs; uint8_t num_indirect_patch_inputs; - uint8_t num_clip_out; - uint8_t num_cull_out; uint8_t has_pervertex_in : 1; - uint8_t guest_sent_io_arrays : 1; + uint8_t ubo_indirect : 1; uint8_t tes_point_mode : 1; }; @@ -125,9 +132,9 @@ struct vrend_shader_key { uint64_t force_invariant_inputs; struct vrend_fs_shader_info *fs_info; + struct vrend_shader_info_out input; uint32_t coord_replace; - uint32_t num_prev_generic_and_patch_outputs; uint32_t cbufs_are_a8_bitmask; uint32_t cbufs_signed_int_bitmask; uint32_t cbufs_unsigned_int_bitmask; @@ -145,19 +152,14 @@ struct vrend_shader_key { uint32_t tcs_present : 1; uint32_t tes_present : 1; uint32_t flatshade : 1; - uint32_t guest_sent_io_arrays : 1; uint32_t fs_logicop_enabled : 1; uint32_t next_stage_pervertex_in : 1; uint32_t fs_logicop_func : 4; uint8_t alpha_test; uint8_t clip_plane_enable; - uint8_t prev_stage_num_clip_out; - uint8_t prev_stage_num_cull_out; uint8_t num_indirect_generic_outputs; uint8_t num_indirect_patch_outputs; - uint8_t num_indirect_generic_inputs; - uint8_t num_indirect_patch_inputs; uint8_t fs_swizzle_output_rgb_to_bgr; uint8_t surface_component_bits[PIPE_MAX_COLOR_BUFS]; struct vrend_layout_info prev_stage_generic_and_patch_outputs_layout[64]; -- GitLab From 1cc53f715cdf9d5df9021f41e5c2baa4a68ddd6f Mon Sep 17 00:00:00 2001 From: Gert Wollny Date: Thu, 18 Feb 2021 10:07:40 +0100 Subject: [PATCH 10/20] shader: move output interface info into struct Compress the structure so that it fits into a 64 bit value. Signed-off-by: Gert Wollny Reviewed-by: Rohan Garg --- src/vrend_renderer.c | 10 +++------- src/vrend_shader.c | 34 ++++++++++++++++++---------------- src/vrend_shader.h | 23 +++++++++++------------ 3 files changed, 32 insertions(+), 35 deletions(-) diff --git a/src/vrend_renderer.c b/src/vrend_renderer.c index 360201dd3..d25bd2b06 100644 --- a/src/vrend_renderer.c +++ b/src/vrend_renderer.c @@ -3400,7 +3400,7 @@ static inline void vrend_fill_shader_key(struct vrend_sub_context *sub_ctx, break; } key->fs_prim_is_points = (fs_prim_mode == PIPE_PRIM_POINTS); - key->coord_replace = sub_ctx->rs_state.point_quad_rasterization + key->fs_coord_replace = sub_ctx->rs_state.point_quad_rasterization && key->fs_prim_is_points ? sub_ctx->rs_state.sprite_coord_enable : 0x0; @@ -3437,12 +3437,8 @@ static inline void vrend_fill_shader_key(struct vrend_sub_context *sub_ctx, break; } - if (next_type != -1 && sub_ctx->shaders[next_type]) { - key->next_stage_pervertex_in = sub_ctx->shaders[next_type]->sinfo.has_pervertex_in; - key->num_indirect_generic_outputs = sub_ctx->shaders[next_type]->sinfo.num_indirect_generic_inputs; - key->num_indirect_patch_outputs = sub_ctx->shaders[next_type]->sinfo.num_indirect_patch_inputs; - key->generic_outputs_expected_mask = sub_ctx->shaders[next_type]->sinfo.generic_inputs_emitted_mask; - } + if (next_type != -1 && sub_ctx->shaders[next_type]) + key->output = sub_ctx->shaders[next_type]->sinfo.in; } static int vrend_shader_create(struct vrend_context *ctx, diff --git a/src/vrend_shader.c b/src/vrend_shader.c index 5c1dba9ac..731766b00 100644 --- a/src/vrend_shader.c +++ b/src/vrend_shader.c @@ -1171,7 +1171,7 @@ iter_declaration(struct tgsi_iterate_context *iter, case TGSI_SEMANTIC_PATCH: case TGSI_SEMANTIC_GENERIC: if (iter->processor.Processor == TGSI_PROCESSOR_FRAGMENT) { - if (ctx->key->coord_replace & (1 << ctx->inputs[i].sid)) { + if (ctx->key->fs_coord_replace & (1 << ctx->inputs[i].sid)) { if (ctx->cfg->use_gles) name_prefix = "vec4(gl_PointCoord.x, mix(1.0 - gl_PointCoord.y, gl_PointCoord.y, clamp(winsys_adjust_y, 0.0, 1.0)), 0.0, 1.0)"; else @@ -4425,8 +4425,8 @@ void rewrite_io_ranged(struct dump_ctx *ctx) } if ((ctx->info.indirect_files & (1 << TGSI_FILE_OUTPUT)) || - ctx->key->num_indirect_generic_outputs || - ctx->key->num_indirect_patch_outputs) { + ctx->key->output.num_indirect_generic || + ctx->key->output.num_indirect_patch) { for (uint i = 0; i < ctx->num_outputs; ++i) { if (ctx->outputs[i].name == TGSI_SEMANTIC_PATCH) { @@ -4763,7 +4763,7 @@ static void handle_io_arrays(struct dump_ctx *ctx) if (ctx->num_inputs > 0) if (evaluate_layout_overlays(ctx->num_inputs, ctx->inputs, get_stage_input_name_prefix(ctx, ctx->prog_type), - ctx->key->coord_replace)) { + ctx->key->fs_coord_replace)) { require_enhanced_layouts = true; } @@ -4779,7 +4779,7 @@ static void handle_io_arrays(struct dump_ctx *ctx) rewrite_io_ranged(ctx); rewrite_components(ctx->num_inputs, ctx->inputs, get_stage_input_name_prefix(ctx, ctx->prog_type), - ctx->key->coord_replace, true); + ctx->key->fs_coord_replace, true); rewrite_components(ctx->num_outputs, ctx->outputs, get_stage_output_name_prefix(ctx->prog_type), 0, true); @@ -6084,10 +6084,12 @@ emit_ios_generic(const struct dump_ctx *ctx, postfix); if (io->name == TGSI_SEMANTIC_GENERIC) { - if (iot == io_in) + assert(io->sid < 32); + if (iot == io_in) { generic_ios->inputs_emitted_mask |= 1 << io->sid; - else + } else { generic_ios->outputs_emitted_mask |= 1 << io->sid; + } } } else { @@ -6293,7 +6295,7 @@ static void emit_ios_vs(const struct dump_ctx *ctx, if (ctx->key->clip_plane_enable) { emit_hdr(glsl_strbufs, "uniform vec4 clipp[8];\n"); } - if ((ctx->key->gs_present || ctx->key->tes_present) && ctx->key->next_stage_pervertex_in) { + if ((ctx->key->gs_present || ctx->key->tes_present) && ctx->key->output.use_pervertex) { emit_hdrf(glsl_strbufs, "out gl_PerVertex {\n vec4 gl_Position;\n %s%s};\n", clip_buf, cull_buf); } else { emit_hdrf(glsl_strbufs, "%s%s", clip_buf, cull_buf); @@ -6362,7 +6364,7 @@ static void emit_ios_fs(const struct dump_ctx *ctx, } if (ctx->cfg->use_gles && !ctx->winsys_adjust_y_emitted && - (ctx->key->coord_replace & (1 << ctx->inputs[i].sid))) { + (ctx->key->fs_coord_replace & (1 << ctx->inputs[i].sid))) { *winsys_adjust_y_emitted = true; emit_hdr(glsl_strbufs, "uniform float winsys_adjust_y;\n"); } @@ -6603,7 +6605,7 @@ static void emit_ios_tcs(const struct dump_ctx *ctx, *has_pervertex = true; emit_hdrf(glsl_strbufs, "in gl_PerVertex {\n vec4 gl_Position; \n %s%s} gl_in[];\n", clip_var, cull_var); } - if (ctx->num_clip_dist && ctx->key->next_stage_pervertex_in) { + if (ctx->num_clip_dist && ctx->key->output.use_pervertex) { emit_hdrf(glsl_strbufs, "out gl_PerVertex {\n vec4 gl_Position;\n float gl_ClipDistance[%d];\n} gl_out[];\n", ctx->num_clip_dist); emit_hdr(glsl_strbufs, "vec4 clip_dist_temp[2];\n"); } @@ -6662,7 +6664,7 @@ static void emit_ios_tes(const struct dump_ctx *ctx, *has_pervertex = true; emit_hdrf(glsl_strbufs, "in gl_PerVertex {\n vec4 gl_Position; \n %s%s} gl_in[];\n", clip_var, cull_var); } - if (ctx->num_clip_dist && ctx->key->next_stage_pervertex_in) { + if (ctx->num_clip_dist && ctx->key->output.use_pervertex) { emit_hdrf(glsl_strbufs, "out gl_PerVertex {\n vec4 gl_Position;\n float gl_ClipDistance[%d];\n} gl_out[];\n", ctx->num_clip_dist); emit_hdr(glsl_strbufs, "vec4 clip_dist_temp[2];\n"); } @@ -6818,7 +6820,7 @@ static boolean analyze_instruction(struct tgsi_iterate_context *iter, static void fill_sinfo(const struct dump_ctx *ctx, struct vrend_shader_info *sinfo) { sinfo->num_ucp = ctx->key->clip_plane_enable ? 8 : 0; - sinfo->has_pervertex_in = ctx->has_pervertex; + sinfo->in.use_pervertex = ctx->has_pervertex; sinfo->fs_info.has_sample_input = ctx->has_sample_input; bool has_prop = (ctx->num_clip_dist_prop + ctx->num_cull_dist_prop) > 0; sinfo->out.num_clip = has_prop ? ctx->num_clip_dist_prop : (ctx->num_clip_dist ? ctx->num_clip_dist : 8); @@ -6833,9 +6835,9 @@ static void fill_sinfo(const struct dump_ctx *ctx, struct vrend_shader_info *sin sinfo->ubo_indirect = !!(ctx->info.dimension_indirect_files & (1 << TGSI_FILE_CONSTANT)); if (ctx->generic_ios.input_range.used) - sinfo->num_indirect_generic_inputs = ctx->generic_ios.input_range.io.last - ctx->generic_ios.input_range.io.sid + 1; + sinfo->in.num_indirect_generic = ctx->generic_ios.input_range.io.last - ctx->generic_ios.input_range.io.sid + 1; if (ctx->patch_ios.input_range.used) - sinfo->num_indirect_patch_inputs = ctx->patch_ios.input_range.io.last - ctx->patch_ios.input_range.io.sid + 1; + sinfo->in.num_indirect_patch = ctx->patch_ios.input_range.io.last - ctx->patch_ios.input_range.io.sid + 1; if (ctx->generic_ios.output_range.used) sinfo->out.num_indirect_generic = ctx->generic_ios.output_range.io.last - ctx->generic_ios.output_range.io.sid + 1; @@ -6887,7 +6889,7 @@ static void fill_sinfo(const struct dump_ctx *ctx, struct vrend_shader_info *sin free(sinfo->image_arrays); sinfo->image_arrays = ctx->image_arrays; sinfo->num_image_arrays = ctx->num_image_arrays; - sinfo->generic_inputs_emitted_mask = ctx->generic_ios.inputs_emitted_mask; + sinfo->in.generic_emitted_mask = ctx->generic_ios.inputs_emitted_mask; for (unsigned i = 0; i < ctx->num_outputs; ++i) { if (ctx->outputs[i].invariant) @@ -6972,7 +6974,7 @@ bool vrend_convert_shader(const struct vrend_context *rctx, ctx.has_sample_input = false; ctx.req_local_mem = req_local_mem; ctx.guest_sent_io_arrays = key->input.guest_sent_io_arrays; - ctx.generic_ios.outputs_expected_mask = key->generic_outputs_expected_mask; + ctx.generic_ios.outputs_expected_mask = key->output.generic_emitted_mask; tgsi_scan_shader(tokens, &ctx.info); /* if we are in core profile mode we should use GLSL 1.40 */ diff --git a/src/vrend_shader.h b/src/vrend_shader.h index b858563e4..b26ed3100 100644 --- a/src/vrend_shader.h +++ b/src/vrend_shader.h @@ -89,9 +89,18 @@ struct vrend_shader_info_out { uint64_t guest_sent_io_arrays : 1; }; +struct vrend_shader_info_in { + uint64_t generic_emitted_mask : 32; + uint64_t num_indirect_generic : 8; + uint64_t num_indirect_patch : 8; + uint64_t use_pervertex : 1; +}; + + struct vrend_shader_info { uint64_t invariant_outputs; struct vrend_shader_info_out out; + struct vrend_shader_info_in in; struct vrend_layout_info generic_outputs_layout[64]; struct vrend_array *sampler_arrays; @@ -104,7 +113,6 @@ struct vrend_shader_info { uint32_t images_used_mask; uint32_t ubo_used_mask; uint32_t ssbo_used_mask; - uint32_t generic_inputs_emitted_mask; uint32_t shadow_samp_mask; uint32_t attrib_input_mask; uint32_t fs_blend_equation_advanced; @@ -118,12 +126,6 @@ struct vrend_shader_info { int num_sampler_arrays; int num_image_arrays; - - uint8_t num_indirect_generic_inputs; - uint8_t num_indirect_patch_inputs; - - uint8_t has_pervertex_in : 1; - uint8_t ubo_indirect : 1; uint8_t tes_point_mode : 1; }; @@ -133,14 +135,14 @@ struct vrend_shader_key { struct vrend_fs_shader_info *fs_info; struct vrend_shader_info_out input; + struct vrend_shader_info_in output; - uint32_t coord_replace; + uint32_t fs_coord_replace; uint32_t cbufs_are_a8_bitmask; uint32_t cbufs_signed_int_bitmask; uint32_t cbufs_unsigned_int_bitmask; uint32_t attrib_signed_int_bitmask; uint32_t attrib_unsigned_int_bitmask; - uint32_t generic_outputs_expected_mask; uint32_t compiled_fs_uid; uint32_t fs_prim_is_points : 1; @@ -153,13 +155,10 @@ struct vrend_shader_key { uint32_t tes_present : 1; uint32_t flatshade : 1; uint32_t fs_logicop_enabled : 1; - uint32_t next_stage_pervertex_in : 1; uint32_t fs_logicop_func : 4; uint8_t alpha_test; uint8_t clip_plane_enable; - uint8_t num_indirect_generic_outputs; - uint8_t num_indirect_patch_outputs; uint8_t fs_swizzle_output_rgb_to_bgr; uint8_t surface_component_bits[PIPE_MAX_COLOR_BUFS]; struct vrend_layout_info prev_stage_generic_and_patch_outputs_layout[64]; -- GitLab From 0f6af022225114c1d25bce6156f72cc812d27959 Mon Sep 17 00:00:00 2001 From: Gert Wollny Date: Thu, 18 Feb 2021 10:15:44 +0100 Subject: [PATCH 11/20] shader: move FS specific key values into sub-structure Signed-off-by: Gert Wollny Reviewed-by: Rohan Garg --- src/vrend_renderer.c | 14 +++++++------- src/vrend_shader.c | 32 ++++++++++++++++---------------- src/vrend_shader.h | 16 ++++++++++------ 3 files changed, 33 insertions(+), 29 deletions(-) diff --git a/src/vrend_renderer.c b/src/vrend_renderer.c index d25bd2b06..3196eb20b 100644 --- a/src/vrend_renderer.c +++ b/src/vrend_renderer.c @@ -3336,7 +3336,7 @@ static inline void vrend_fill_shader_key(struct vrend_sub_context *sub_ctx, key->flatshade = sub_ctx->rs_state.flatshade ? true : false; } - key->invert_fs_origin = !sub_ctx->inverted_fbo_content; + key->fs.invert_origin = !sub_ctx->inverted_fbo_content; key->gs_present = !!sub_ctx->shaders[PIPE_SHADER_GEOMETRY]; key->tcs_present = !!sub_ctx->shaders[PIPE_SHADER_TESS_CTRL]; @@ -3382,10 +3382,10 @@ static inline void vrend_fill_shader_key(struct vrend_sub_context *sub_ctx, int next_type = -1; if (type == PIPE_SHADER_FRAGMENT) { - key->fs_swizzle_output_rgb_to_bgr = sub_ctx->swizzle_output_rgb_to_bgr; + key->fs.swizzle_output_rgb_to_bgr = sub_ctx->swizzle_output_rgb_to_bgr; if (vrend_state.use_gles && can_emulate_logicop(sub_ctx->blend_state.logicop_func)) { - key->fs_logicop_enabled = sub_ctx->blend_state.logicop_enable; - key->fs_logicop_func = sub_ctx->blend_state.logicop_func; + key->fs.logicop_enabled = sub_ctx->blend_state.logicop_enable; + key->fs.logicop_func = sub_ctx->blend_state.logicop_func; } int fs_prim_mode = sub_ctx->prim_mode; // inherit draw-call's mode @@ -3399,9 +3399,9 @@ static inline void vrend_fill_shader_key(struct vrend_sub_context *sub_ctx, fs_prim_mode = sub_ctx->shaders[PIPE_SHADER_GEOMETRY]->sinfo.gs_out_prim; break; } - key->fs_prim_is_points = (fs_prim_mode == PIPE_PRIM_POINTS); - key->fs_coord_replace = sub_ctx->rs_state.point_quad_rasterization - && key->fs_prim_is_points + key->fs.prim_is_points = (fs_prim_mode == PIPE_PRIM_POINTS); + key->fs.coord_replace = sub_ctx->rs_state.point_quad_rasterization + && key->fs.prim_is_points ? sub_ctx->rs_state.sprite_coord_enable : 0x0; } else { diff --git a/src/vrend_shader.c b/src/vrend_shader.c index 731766b00..758732b33 100644 --- a/src/vrend_shader.c +++ b/src/vrend_shader.c @@ -533,7 +533,7 @@ static inline bool fs_emit_layout(const struct dump_ctx *ctx) if coord_origin is 0 and invert is 1 - emit nothing (lower) if coord origin is 1 and invert is 0 - emit nothing (lower) if coord_origin is 1 and invert is 1 - emit origin upper left */ - if (!(ctx->fs_coord_origin ^ ctx->key->invert_fs_origin)) + if (!(ctx->fs_coord_origin ^ ctx->key->fs.invert_origin)) return true; return false; } @@ -919,10 +919,10 @@ iter_inputs(struct tgsi_iterate_context *iter, static bool logiop_require_inout(const struct vrend_shader_key *key) { - if (!key->fs_logicop_enabled) + if (!key->fs.logicop_enabled) return false; - switch (key->fs_logicop_func) { + switch (key->fs.logicop_func) { case PIPE_LOGICOP_CLEAR: case PIPE_LOGICOP_SET: case PIPE_LOGICOP_COPY: @@ -1171,7 +1171,7 @@ iter_declaration(struct tgsi_iterate_context *iter, case TGSI_SEMANTIC_PATCH: case TGSI_SEMANTIC_GENERIC: if (iter->processor.Processor == TGSI_PROCESSOR_FRAGMENT) { - if (ctx->key->fs_coord_replace & (1 << ctx->inputs[i].sid)) { + if (ctx->key->fs.coord_replace & (1 << ctx->inputs[i].sid)) { if (ctx->cfg->use_gles) name_prefix = "vec4(gl_PointCoord.x, mix(1.0 - gl_PointCoord.y, gl_PointCoord.y, clamp(winsys_adjust_y, 0.0, 1.0)), 0.0, 1.0)"; else @@ -1344,7 +1344,7 @@ iter_declaration(struct tgsi_iterate_context *iter, name_prefix = "ex"; break; } else if (iter->processor.Processor == TGSI_PROCESSOR_FRAGMENT && - ctx->key->fs_logicop_enabled) { + ctx->key->fs.logicop_enabled) { name_prefix = "fsout_tmp"; break; } @@ -2060,7 +2060,7 @@ static void emit_fragment_logicop(const struct dump_ctx *ctx, for (unsigned i = 0; i < ctx->num_outputs; i++) { mask[i] = (1 << ctx->key->surface_component_bits[i]) - 1; scale[i] = mask[i]; - switch (ctx->key->fs_logicop_func) { + switch (ctx->key->fs.logicop_func) { case PIPE_LOGICOP_INVERT: snprintf(src_fb[i], ARRAY_SIZE(src_fb[i]), "ivec4(%f * fsout_c%d + 0.5)", scale[i], i); @@ -2091,7 +2091,7 @@ static void emit_fragment_logicop(const struct dump_ctx *ctx, } for (unsigned i = 0; i < ctx->num_outputs; i++) { - switch (ctx->key->fs_logicop_func) { + switch (ctx->key->fs.logicop_func) { case PIPE_LOGICOP_CLEAR: snprintf(full_op[i], ARRAY_SIZE(full_op[i]), "%s", "vec4(0)"); @@ -2159,7 +2159,7 @@ static void emit_fragment_logicop(const struct dump_ctx *ctx, } for (unsigned i = 0; i < ctx->num_outputs; i++) { - switch (ctx->key->fs_logicop_func) { + switch (ctx->key->fs.logicop_func) { case PIPE_LOGICOP_NOOP: break; case PIPE_LOGICOP_COPY: @@ -2177,7 +2177,7 @@ static void emit_cbuf_swizzle(const struct dump_ctx *ctx, struct vrend_glsl_strbufs *glsl_strbufs) { for (uint i = 0; i < ctx->num_outputs; i++) { - if (ctx->key->fs_swizzle_output_rgb_to_bgr & (1 << i)) { + if (ctx->key->fs.swizzle_output_rgb_to_bgr & (1 << i)) { emit_buff(glsl_strbufs, "fsout_c%d = fsout_c%d.zyxw;\n", i, i); } } @@ -2196,10 +2196,10 @@ static void handle_fragment_proc_exit(const struct dump_ctx *ctx, emit_alpha_test(ctx, glsl_strbufs); - if (ctx->key->fs_logicop_enabled) + if (ctx->key->fs.logicop_enabled) emit_fragment_logicop(ctx, glsl_strbufs); - if (ctx->key->fs_swizzle_output_rgb_to_bgr) + if (ctx->key->fs.swizzle_output_rgb_to_bgr) emit_cbuf_swizzle(ctx, glsl_strbufs); if (ctx->write_all_cbufs) @@ -4763,7 +4763,7 @@ static void handle_io_arrays(struct dump_ctx *ctx) if (ctx->num_inputs > 0) if (evaluate_layout_overlays(ctx->num_inputs, ctx->inputs, get_stage_input_name_prefix(ctx, ctx->prog_type), - ctx->key->fs_coord_replace)) { + ctx->key->fs.coord_replace)) { require_enhanced_layouts = true; } @@ -4779,7 +4779,7 @@ static void handle_io_arrays(struct dump_ctx *ctx) rewrite_io_ranged(ctx); rewrite_components(ctx->num_inputs, ctx->inputs, get_stage_input_name_prefix(ctx, ctx->prog_type), - ctx->key->fs_coord_replace, true); + ctx->key->fs.coord_replace, true); rewrite_components(ctx->num_outputs, ctx->outputs, get_stage_output_name_prefix(ctx->prog_type), 0, true); @@ -6328,7 +6328,7 @@ static void emit_ios_fs(const struct dump_ctx *ctx, uint32_t i; if (fs_emit_layout(ctx)) { - bool upper_left = !(ctx->fs_coord_origin ^ ctx->key->invert_fs_origin); + bool upper_left = !(ctx->fs_coord_origin ^ ctx->key->fs.invert_origin); char comma = (upper_left && ctx->fs_pixel_center) ? ',' : ' '; if (!ctx->cfg->use_gles) @@ -6364,7 +6364,7 @@ static void emit_ios_fs(const struct dump_ctx *ctx, } if (ctx->cfg->use_gles && !ctx->winsys_adjust_y_emitted && - (ctx->key->fs_coord_replace & (1 << ctx->inputs[i].sid))) { + (ctx->key->fs.coord_replace & (1 << ctx->inputs[i].sid))) { *winsys_adjust_y_emitted = true; emit_hdr(glsl_strbufs, "uniform float winsys_adjust_y;\n"); } @@ -6396,7 +6396,7 @@ static void emit_ios_fs(const struct dump_ctx *ctx, for (i = 0; i < (uint32_t)ctx->cfg->max_draw_buffers; i++) { if (ctx->cfg->use_gles) { - if (ctx->key->fs_logicop_enabled) + if (ctx->key->fs.logicop_enabled) emit_hdrf(glsl_strbufs, "%s fsout_tmp_c%d;\n", type, i); if (logiop_require_inout(ctx->key)) { diff --git a/src/vrend_shader.h b/src/vrend_shader.h index b26ed3100..68196620d 100644 --- a/src/vrend_shader.h +++ b/src/vrend_shader.h @@ -137,7 +137,15 @@ struct vrend_shader_key { struct vrend_shader_info_out input; struct vrend_shader_info_in output; - uint32_t fs_coord_replace; + struct { + uint32_t swizzle_output_rgb_to_bgr : 8; + uint32_t logicop_func : 4; + uint32_t logicop_enabled : 1; + uint32_t prim_is_points : 1; + uint32_t invert_origin : 1; + uint32_t coord_replace; + } fs; + uint32_t cbufs_are_a8_bitmask; uint32_t cbufs_signed_int_bitmask; uint32_t cbufs_unsigned_int_bitmask; @@ -145,8 +153,6 @@ struct vrend_shader_key { uint32_t attrib_unsigned_int_bitmask; uint32_t compiled_fs_uid; - uint32_t fs_prim_is_points : 1; - uint32_t invert_fs_origin : 1; uint32_t pstipple_tex : 1; uint32_t add_alpha_test : 1; uint32_t color_two_side : 1; @@ -154,12 +160,10 @@ struct vrend_shader_key { uint32_t tcs_present : 1; uint32_t tes_present : 1; uint32_t flatshade : 1; - uint32_t fs_logicop_enabled : 1; - uint32_t fs_logicop_func : 4; uint8_t alpha_test; uint8_t clip_plane_enable; - uint8_t fs_swizzle_output_rgb_to_bgr; + uint8_t surface_component_bits[PIPE_MAX_COLOR_BUFS]; struct vrend_layout_info prev_stage_generic_and_patch_outputs_layout[64]; }; -- GitLab From 60f932152b1bce63cea45a11f7888a3a5ae455ed Mon Sep 17 00:00:00 2001 From: Gert Wollny Date: Thu, 18 Feb 2021 14:16:16 +0100 Subject: [PATCH 12/20] vrend: make UPDATE_INT_SIGN_MASK a macro to be able to pass different masks Signed-off-by: Gert Wollny Reviewed-by: Rohan Garg --- src/vrend_renderer.c | 29 +++++++++++++---------------- 1 file changed, 13 insertions(+), 16 deletions(-) diff --git a/src/vrend_renderer.c b/src/vrend_renderer.c index 3196eb20b..af760cd93 100644 --- a/src/vrend_renderer.c +++ b/src/vrend_renderer.c @@ -2720,17 +2720,14 @@ void vrend_set_viewport_states(struct vrend_context *ctx, } } -static void update_int_sign_masks(enum pipe_format fmt, int i, - uint32_t *signed_mask, - uint32_t *unsigned_mask) { - if (vrend_state.use_integer && - util_format_is_pure_integer(fmt)) { - if (util_format_is_pure_uint(fmt)) - (*unsigned_mask) |= (1 << i); - else - (*signed_mask) |= (1 << i); +#define UPDATE_INT_SIGN_MASK(fmt, i, signed_mask, unsigned_mask) \ + if (vrend_state.use_integer && \ + util_format_is_pure_integer(fmt)) { \ + if (util_format_is_pure_uint(fmt)) \ + unsigned_mask |= (1 << i); \ + else \ + signed_mask |= (1 << i); \ } -} int vrend_create_vertex_elements_state(struct vrend_context *ctx, uint32_t handle, @@ -2828,9 +2825,9 @@ int vrend_create_vertex_elements_state(struct vrend_context *ctx, struct vrend_vertex_element *ve = &v->elements[i]; if (util_format_is_pure_integer(ve->base.src_format)) { - update_int_sign_masks(ve->base.src_format, i, - &v->signed_int_bitmask, - &v->unsigned_int_bitmask); + UPDATE_INT_SIGN_MASK(ve->base.src_format, i, + v->signed_int_bitmask, + v->unsigned_int_bitmask); glVertexAttribIFormat(i, ve->nr_chan, ve->type, ve->base.src_offset); } else @@ -3318,9 +3315,9 @@ static inline void vrend_fill_shader_key(struct vrend_sub_context *sub_ctx, key->cbufs_are_a8_bitmask |= (1 << i); if (util_format_is_pure_integer(sub_ctx->surf[i]->format)) { add_alpha_test = false; - update_int_sign_masks(sub_ctx->surf[i]->format, i, - &key->cbufs_signed_int_bitmask, - &key->cbufs_unsigned_int_bitmask); + UPDATE_INT_SIGN_MASK(sub_ctx->surf[i]->format, i, + key->cbufs_signed_int_bitmask, + key->cbufs_unsigned_int_bitmask); } key->surface_component_bits[i] = util_format_get_component_bits(sub_ctx->surf[i]->format, UTIL_FORMAT_COLORSPACE_RGB, 0); } -- GitLab From 8f7010371762dd36bcd018a6b801dac43832b1bc Mon Sep 17 00:00:00 2001 From: Gert Wollny Date: Thu, 18 Feb 2021 14:35:13 +0100 Subject: [PATCH 13/20] vrend/shader: make cbuf masks 8 bit and move them into fs specific section Mesa clamps the number of cbufs to eight, so we can use uint8_t bitmask for cbufs states. Be save for the future by only reporting support for at most eight cbufs. Signed-off-by: Gert Wollny Reviewed-by: Rohan Garg --- src/vrend_renderer.c | 12 +++++++++--- src/vrend_shader.c | 10 +++++----- src/vrend_shader.h | 8 ++++---- 3 files changed, 18 insertions(+), 12 deletions(-) diff --git a/src/vrend_renderer.c b/src/vrend_renderer.c index af760cd93..56cb8df18 100644 --- a/src/vrend_renderer.c +++ b/src/vrend_renderer.c @@ -3312,12 +3312,12 @@ static inline void vrend_fill_shader_key(struct vrend_sub_context *sub_ctx, if (!sub_ctx->surf[i]) continue; if (vrend_format_is_emulated_alpha(sub_ctx->surf[i]->format)) - key->cbufs_are_a8_bitmask |= (1 << i); + key->fs.cbufs_are_a8_bitmask |= (1 << i); if (util_format_is_pure_integer(sub_ctx->surf[i]->format)) { add_alpha_test = false; UPDATE_INT_SIGN_MASK(sub_ctx->surf[i]->format, i, - key->cbufs_signed_int_bitmask, - key->cbufs_unsigned_int_bitmask); + key->fs.cbufs_signed_int_bitmask, + key->fs.cbufs_unsigned_int_bitmask); } key->surface_component_bits[i] = util_format_get_component_bits(sub_ctx->surf[i]->format, UTIL_FORMAT_COLORSPACE_RGB, 0); } @@ -6186,6 +6186,12 @@ int vrend_renderer_init(const struct vrend_if_cbs *cbs, uint32_t flags) glGetIntegerv(GL_MAX_DRAW_BUFFERS, (GLint *) &vrend_state.max_draw_buffers); + /* Mesa clamps this value to 8 anyway, so just make sure that this side + * doesn't exceed the number to be on the save side when using 8-bit masks + * for the color buffers */ + if (vrend_state.max_draw_buffers > 8) + vrend_state.max_draw_buffers = 8; + if (!has_feature(feat_arb_robustness) && !has_feature(feat_gles_khr_robustness)) { vrend_printf("WARNING: running without ARB/KHR robustness in place may crash\n"); diff --git a/src/vrend_shader.c b/src/vrend_shader.c index 758732b33..810647b71 100644 --- a/src/vrend_shader.c +++ b/src/vrend_shader.c @@ -1328,8 +1328,8 @@ iter_declaration(struct tgsi_iterate_context *iter, break; case TGSI_SEMANTIC_COLOR: if (iter->processor.Processor == TGSI_PROCESSOR_FRAGMENT) { - ctx->outputs[i].type = get_type(ctx->key->cbufs_signed_int_bitmask, - ctx->key->cbufs_unsigned_int_bitmask, + ctx->outputs[i].type = get_type(ctx->key->fs.cbufs_signed_int_bitmask, + ctx->key->fs.cbufs_unsigned_int_bitmask, ctx->outputs[i].sid); } @@ -2189,7 +2189,7 @@ static void handle_fragment_proc_exit(const struct dump_ctx *ctx, if (ctx->key->pstipple_tex) emit_pstipple_pass(glsl_strbufs); - if (ctx->key->cbufs_are_a8_bitmask) + if (ctx->key->fs.cbufs_are_a8_bitmask) emit_a8_swizzle(glsl_strbufs); if (ctx->key->add_alpha_test) @@ -6389,9 +6389,9 @@ static void emit_ios_fs(const struct dump_ctx *ctx, if (ctx->write_all_cbufs) { const char* type = "vec4"; - if (ctx->key->cbufs_unsigned_int_bitmask) + if (ctx->key->fs.cbufs_unsigned_int_bitmask) type = "uvec4"; - else if (ctx->key->cbufs_signed_int_bitmask) + else if (ctx->key->fs.cbufs_signed_int_bitmask) type = "ivec4"; for (i = 0; i < (uint32_t)ctx->cfg->max_draw_buffers; i++) { diff --git a/src/vrend_shader.h b/src/vrend_shader.h index 68196620d..6f86437ec 100644 --- a/src/vrend_shader.h +++ b/src/vrend_shader.h @@ -138,7 +138,10 @@ struct vrend_shader_key { struct vrend_shader_info_in output; struct { - uint32_t swizzle_output_rgb_to_bgr : 8; + uint8_t swizzle_output_rgb_to_bgr; + uint8_t cbufs_are_a8_bitmask; + uint8_t cbufs_signed_int_bitmask; + uint8_t cbufs_unsigned_int_bitmask; uint32_t logicop_func : 4; uint32_t logicop_enabled : 1; uint32_t prim_is_points : 1; @@ -146,9 +149,6 @@ struct vrend_shader_key { uint32_t coord_replace; } fs; - uint32_t cbufs_are_a8_bitmask; - uint32_t cbufs_signed_int_bitmask; - uint32_t cbufs_unsigned_int_bitmask; uint32_t attrib_signed_int_bitmask; uint32_t attrib_unsigned_int_bitmask; uint32_t compiled_fs_uid; -- GitLab From 4233c35791d2c6ab10cdec6c88f3c9e0e167a4c2 Mon Sep 17 00:00:00 2001 From: Gert Wollny Date: Thu, 18 Feb 2021 14:35:54 +0100 Subject: [PATCH 14/20] vrend: refactor vrender_get_glsl_version to return the value There is no reason why this value should be obtained using a return parameter. Also fix the version to return zero when the version string doesn't follow the standard. Signed-off-by: Gert Wollny Reviewed-by: Rohan Garg --- src/vrend_renderer.c | 13 +++++-------- 1 file changed, 5 insertions(+), 8 deletions(-) diff --git a/src/vrend_renderer.c b/src/vrend_renderer.c index 56cb8df18..b28eaa755 100644 --- a/src/vrend_renderer.c +++ b/src/vrend_renderer.c @@ -757,7 +757,7 @@ static void vrend_destroy_query_object(void *obj_ptr); static void vrend_finish_context_switch(struct vrend_context *ctx); static void vrend_patch_blend_state(struct vrend_sub_context *sub_ctx); static void vrend_update_frontface_state(struct vrend_sub_context *ctx); -static void vrender_get_glsl_version(int *glsl_version); +static int vrender_get_glsl_version(void); static void vrend_destroy_program(struct vrend_linked_shader_program *ent); static void vrend_apply_sampler_state(struct vrend_sub_context *sub_ctx, struct vrend_resource *res, @@ -6430,7 +6430,7 @@ struct vrend_context *vrend_create_context(int id, uint32_t nlen, const char *de vrend_renderer_create_sub_ctx(grctx, 0); vrend_renderer_set_sub_ctx(grctx, 0); - vrender_get_glsl_version(&grctx->shader_cfg.glsl_version); + grctx->shader_cfg.glsl_version = vrender_get_glsl_version(); if (!grctx->ctx_id) grctx->fence_retire = vrend_clicbs->ctx0_fence_retire; @@ -9883,12 +9883,11 @@ int vrend_create_so_target(struct vrend_context *ctx, return 0; } -static void vrender_get_glsl_version(int *glsl_version) +static int vrender_get_glsl_version(void) { - int major_local, minor_local; + int major_local = 0, minor_local = 0; const GLubyte *version_str; MAYBE_UNUSED int c; - int version; version_str = glGetString(GL_SHADING_LANGUAGE_VERSION); if (vrend_state.use_gles) { @@ -9902,9 +9901,7 @@ static void vrender_get_glsl_version(int *glsl_version) assert(c == 2); } - version = (major_local * 100) + minor_local; - if (glsl_version) - *glsl_version = version; + return (major_local * 100) + minor_local; } static void vrend_fill_caps_glsl_version(int gl_ver, int gles_ver, -- GitLab From cf039b3af3df54b6071d71542127aed8bc57f0e8 Mon Sep 17 00:00:00 2001 From: Gert Wollny Date: Thu, 11 Mar 2021 18:53:36 +0100 Subject: [PATCH 15/20] shader: reorder more shader key components Seperate more elements into the shader stages that they are used in and evaluate them only for the stage they are relevant for. Signed-off-by: Gert Wollny Reviewed-by: Rohan Garg --- src/vrend_renderer.c | 36 +++++++++++---------- src/vrend_shader.c | 6 ++-- src/vrend_shader.h | 76 +++++++++++++++++++++++--------------------- 3 files changed, 62 insertions(+), 56 deletions(-) diff --git a/src/vrend_renderer.c b/src/vrend_renderer.c index b28eaa755..806f18395 100644 --- a/src/vrend_renderer.c +++ b/src/vrend_renderer.c @@ -3304,26 +3304,29 @@ static inline void vrend_fill_shader_key(struct vrend_sub_context *sub_ctx, bool add_alpha_test = true; // Only use integer info when drawing to avoid stale info. - if (vrend_state.use_integer && sub_ctx->drawing) { - key->attrib_signed_int_bitmask = sub_ctx->ve->signed_int_bitmask; - key->attrib_unsigned_int_bitmask = sub_ctx->ve->unsigned_int_bitmask; - } - for (i = 0; i < sub_ctx->nr_cbufs; i++) { - if (!sub_ctx->surf[i]) - continue; - if (vrend_format_is_emulated_alpha(sub_ctx->surf[i]->format)) - key->fs.cbufs_are_a8_bitmask |= (1 << i); - if (util_format_is_pure_integer(sub_ctx->surf[i]->format)) { + if (vrend_state.use_integer && sub_ctx->drawing && + type == PIPE_SHADER_VERTEX) { + key->vs.attrib_signed_int_bitmask = sub_ctx->ve->signed_int_bitmask; + key->vs.attrib_unsigned_int_bitmask = sub_ctx->ve->unsigned_int_bitmask; + } + if (type == PIPE_SHADER_FRAGMENT) { + for (i = 0; i < sub_ctx->nr_cbufs; i++) { + if (!sub_ctx->surf[i]) + continue; + if (vrend_format_is_emulated_alpha(sub_ctx->surf[i]->format)) + key->fs.cbufs_are_a8_bitmask |= (1 << i); + if (util_format_is_pure_integer(sub_ctx->surf[i]->format)) { add_alpha_test = false; UPDATE_INT_SIGN_MASK(sub_ctx->surf[i]->format, i, key->fs.cbufs_signed_int_bitmask, key->fs.cbufs_unsigned_int_bitmask); + } + key->fs.surface_component_bits[i] = util_format_get_component_bits(sub_ctx->surf[i]->format, UTIL_FORMAT_COLORSPACE_RGB, 0); + } + if (add_alpha_test) { + key->add_alpha_test = sub_ctx->dsa_state.alpha.enabled; + key->alpha_test = sub_ctx->dsa_state.alpha.func; } - key->surface_component_bits[i] = util_format_get_component_bits(sub_ctx->surf[i]->format, UTIL_FORMAT_COLORSPACE_RGB, 0); - } - if (add_alpha_test) { - key->add_alpha_test = sub_ctx->dsa_state.alpha.enabled; - key->alpha_test = sub_ctx->dsa_state.alpha.func; } key->pstipple_tex = sub_ctx->rs_state.poly_stipple_enable; @@ -3333,8 +3336,6 @@ static inline void vrend_fill_shader_key(struct vrend_sub_context *sub_ctx, key->flatshade = sub_ctx->rs_state.flatshade ? true : false; } - key->fs.invert_origin = !sub_ctx->inverted_fbo_content; - key->gs_present = !!sub_ctx->shaders[PIPE_SHADER_GEOMETRY]; key->tcs_present = !!sub_ctx->shaders[PIPE_SHADER_TESS_CTRL]; key->tes_present = !!sub_ctx->shaders[PIPE_SHADER_TESS_EVAL]; @@ -3379,6 +3380,7 @@ static inline void vrend_fill_shader_key(struct vrend_sub_context *sub_ctx, int next_type = -1; if (type == PIPE_SHADER_FRAGMENT) { + key->fs.invert_origin = !sub_ctx->inverted_fbo_content; key->fs.swizzle_output_rgb_to_bgr = sub_ctx->swizzle_output_rgb_to_bgr; if (vrend_state.use_gles && can_emulate_logicop(sub_ctx->blend_state.logicop_func)) { key->fs.logicop_enabled = sub_ctx->blend_state.logicop_enable; diff --git a/src/vrend_shader.c b/src/vrend_shader.c index 810647b71..4cc6eefbc 100644 --- a/src/vrend_shader.c +++ b/src/vrend_shader.c @@ -974,8 +974,8 @@ iter_declaration(struct tgsi_iterate_context *iter, } if (iter->processor.Processor == TGSI_PROCESSOR_VERTEX) { ctx->attrib_input_mask |= (1 << decl->Range.First); - ctx->inputs[i].type = get_type(ctx->key->attrib_signed_int_bitmask, - ctx->key->attrib_unsigned_int_bitmask, + ctx->inputs[i].type = get_type(ctx->key->vs.attrib_signed_int_bitmask, + ctx->key->vs.attrib_unsigned_int_bitmask, decl->Range.First); } ctx->inputs[i].name = decl->Semantic.Name; @@ -2058,7 +2058,7 @@ static void emit_fragment_logicop(const struct dump_ctx *ctx, char full_op[PIPE_MAX_COLOR_BUFS][128 + 8]; for (unsigned i = 0; i < ctx->num_outputs; i++) { - mask[i] = (1 << ctx->key->surface_component_bits[i]) - 1; + mask[i] = (1 << ctx->key->fs.surface_component_bits[i]) - 1; scale[i] = mask[i]; switch (ctx->key->fs.logicop_func) { case PIPE_LOGICOP_INVERT: diff --git a/src/vrend_shader.h b/src/vrend_shader.h index 6f86437ec..68f710a97 100644 --- a/src/vrend_shader.h +++ b/src/vrend_shader.h @@ -136,51 +136,55 @@ struct vrend_shader_key { struct vrend_fs_shader_info *fs_info; struct vrend_shader_info_out input; struct vrend_shader_info_in output; + struct vrend_layout_info prev_stage_generic_and_patch_outputs_layout[64]; - struct { - uint8_t swizzle_output_rgb_to_bgr; - uint8_t cbufs_are_a8_bitmask; - uint8_t cbufs_signed_int_bitmask; - uint8_t cbufs_unsigned_int_bitmask; - uint32_t logicop_func : 4; - uint32_t logicop_enabled : 1; - uint32_t prim_is_points : 1; - uint32_t invert_origin : 1; - uint32_t coord_replace; - } fs; - - uint32_t attrib_signed_int_bitmask; - uint32_t attrib_unsigned_int_bitmask; - uint32_t compiled_fs_uid; + union { + struct { + uint8_t surface_component_bits[PIPE_MAX_COLOR_BUFS]; + uint32_t coord_replace; + uint8_t swizzle_output_rgb_to_bgr; + uint8_t cbufs_are_a8_bitmask; + uint8_t cbufs_signed_int_bitmask; + uint8_t cbufs_unsigned_int_bitmask; + uint32_t logicop_func : 4; + uint32_t logicop_enabled : 1; + uint32_t prim_is_points : 1; + uint32_t invert_origin : 1; + } fs; + + struct { + uint32_t attrib_signed_int_bitmask; + uint32_t attrib_unsigned_int_bitmask; + } vs; + }; - uint32_t pstipple_tex : 1; - uint32_t add_alpha_test : 1; - uint32_t color_two_side : 1; - uint32_t gs_present : 1; - uint32_t tcs_present : 1; - uint32_t tes_present : 1; - uint32_t flatshade : 1; + uint32_t compiled_fs_uid; uint8_t alpha_test; uint8_t clip_plane_enable; + uint8_t pstipple_tex : 1; + uint8_t add_alpha_test : 1; + uint8_t color_two_side : 1; + uint8_t gs_present : 1; + uint8_t tcs_present : 1; + uint8_t tes_present : 1; + uint8_t flatshade : 1; - uint8_t surface_component_bits[PIPE_MAX_COLOR_BUFS]; - struct vrend_layout_info prev_stage_generic_and_patch_outputs_layout[64]; }; struct vrend_shader_cfg { - int glsl_version; - int max_draw_buffers; - bool use_gles; - bool use_core_profile; - bool use_explicit_locations; - bool has_arrays_of_arrays; - bool has_gpu_shader5; - bool has_es31_compat; - bool has_conservative_depth; - bool use_integer; - bool has_dual_src_blend; - bool has_fbfetch_coherent; + uint32_t glsl_version : 12; + uint32_t max_draw_buffers : 4; + uint32_t use_gles : 1; + uint32_t use_core_profile : 1; + uint32_t use_explicit_locations : 1; + uint32_t has_arrays_of_arrays : 1; + uint32_t has_gpu_shader5 : 1; + uint32_t has_es31_compat : 1; + uint32_t has_conservative_depth : 1; + uint32_t use_integer : 1; + uint32_t has_dual_src_blend : 1; + uint32_t has_fbfetch_coherent : 1; }; struct vrend_context; -- GitLab From 31178f23272243cce1cdac7739fbed69229d144f Mon Sep 17 00:00:00 2001 From: Gert Wollny Date: Thu, 18 Feb 2021 15:01:58 +0100 Subject: [PATCH 16/20] vrend: reorder and compress vrend_state structure Signed-off-by: Gert Wollny Reviewed-by: Rohan Garg --- src/vrend_renderer.c | 64 +++++++++++++++++++++----------------------- 1 file changed, 31 insertions(+), 33 deletions(-) diff --git a/src/vrend_renderer.c b/src/vrend_renderer.c index 806f18395..8e1d972a5 100644 --- a/src/vrend_renderer.c +++ b/src/vrend_renderer.c @@ -307,54 +307,52 @@ static const struct { }; struct global_renderer_state { - int gl_major_ver; - int gl_minor_ver; - + struct vrend_context *ctx0; struct vrend_context *current_ctx; struct vrend_context *current_hw_ctx; - struct list_head waiting_query_list; - - bool finishing; - bool use_gles; - bool use_core_profile; - bool use_external_blob; - bool use_integer; -#ifdef HAVE_EPOXY_EGL_H - bool use_egl_fence; -#endif - - bool features[feat_last]; - /* these appeared broken on at least one driver */ - bool use_explicit_locations; - uint32_t max_draw_buffers; - uint32_t max_texture_2d_size; - uint32_t max_texture_3d_size; - uint32_t max_texture_cube_size; - - /* threaded sync */ - bool stop_sync_thread; - int eventfd; - - pipe_mutex fence_mutex; - /* a fence is always on either of the lists, or is pointed to by - * fence_waiting - */ + struct list_head waiting_query_list; struct list_head fence_list; struct list_head fence_wait_list; struct vrend_fence *fence_waiting; - pipe_condvar fence_cond; - struct vrend_context *ctx0; + int gl_major_ver; + int gl_minor_ver; + pipe_mutex fence_mutex; pipe_thread sync_thread; virgl_gl_context sync_context; - /* Needed on GLES to inject a TCS */ + pipe_condvar fence_cond; + float tess_factors[6]; + int eventfd; + + uint32_t max_draw_buffers; + uint32_t max_texture_2d_size; + uint32_t max_texture_3d_size; + uint32_t max_texture_cube_size; /* inferred GL caching type */ uint32_t inferred_gl_caching_type; + + bool features[feat_last]; + + uint32_t finishing : 1; + uint32_t use_gles : 1; + uint32_t use_core_profile : 1; + uint32_t use_external_blob : 1; + uint32_t use_integer : 1; + /* these appeared broken on at least one driver */ + uint32_t use_explicit_locations : 1; + /* threaded sync */ + uint32_t stop_sync_thread : 1; + /* Needed on GLES to inject a TCS */ + uint32_t bgra_srgb_emulation_loaded : 1; + +#ifdef HAVE_EPOXY_EGL_H + uint32_t use_egl_fence : 1; +#endif }; static struct global_renderer_state vrend_state; -- GitLab From a2a20e1a1c15db6eca3a792e984497c8c11d64a8 Mon Sep 17 00:00:00 2001 From: Gert Wollny Date: Thu, 18 Feb 2021 17:51:29 +0100 Subject: [PATCH 17/20] vrend: use a bitfieled array for the feature list Signed-off-by: Gert Wollny Reviewed-by: Rohan Garg --- src/vrend_renderer.c | 25 ++++++++++++++++++++----- 1 file changed, 20 insertions(+), 5 deletions(-) diff --git a/src/vrend_renderer.c b/src/vrend_renderer.c index 8e1d972a5..426a73912 100644 --- a/src/vrend_renderer.c +++ b/src/vrend_renderer.c @@ -336,7 +336,7 @@ struct global_renderer_state { /* inferred GL caching type */ uint32_t inferred_gl_caching_type; - bool features[feat_last]; + uint64_t features[feat_last / 64 + 1]; uint32_t finishing : 1; uint32_t use_gles : 1; @@ -359,17 +359,31 @@ static struct global_renderer_state vrend_state; static inline bool has_feature(enum features_id feature_id) { + int slot = feature_id / 64; + uint64_t mask = 1ull << (feature_id & 63); + bool retval = vrend_state.features[slot] & mask ? true : false; VREND_DEBUG(dbg_feature_use, NULL, "Try using feature %s:%d\n", feature_list[feature_id].log_name, - vrend_state.features[feature_id]); - return vrend_state.features[feature_id]; + retval); + return retval; } + static inline void set_feature(enum features_id feature_id) { - vrend_state.features[feature_id] = true; + int slot = feature_id / 64; + uint64_t mask = 1ull << (feature_id & 63); + vrend_state.features[slot] |= mask; } +static inline void clear_feature(enum features_id feature_id) +{ + int slot = feature_id / 64; + uint64_t mask = 1ull << (feature_id & 63); + vrend_state.features[slot] &= ~mask; +} + + struct vrend_linked_shader_program { struct list_head head; struct list_head sl[PIPE_SHADER_TYPES]; @@ -6182,7 +6196,8 @@ int vrend_renderer_init(const struct vrend_if_cbs *cbs, uint32_t flags) init_features(gles ? 0 : gl_ver, gles ? gl_ver : 0); - vrend_state.features[feat_srgb_write_control] &= vrend_winsys_has_gl_colorspace(); + if (!vrend_winsys_has_gl_colorspace()) + clear_feature(feat_srgb_write_control) ; glGetIntegerv(GL_MAX_DRAW_BUFFERS, (GLint *) &vrend_state.max_draw_buffers); -- GitLab From e853d88d882458a7fd79baed71b9756cfe19a804 Mon Sep 17 00:00:00 2001 From: Gert Wollny Date: Thu, 18 Feb 2021 17:55:54 +0100 Subject: [PATCH 18/20] shader: factor out evaluating the swizzle offset and num components This is needed to reduce the size occupied by these fields. Signed-off-by: Gert Wollny Reviewed-by: Rohan Garg --- src/vrend_shader.c | 20 ++++++++++++++------ 1 file changed, 14 insertions(+), 6 deletions(-) diff --git a/src/vrend_shader.c b/src/vrend_shader.c index 4cc6eefbc..7eaa4115a 100644 --- a/src/vrend_shader.c +++ b/src/vrend_shader.c @@ -945,6 +945,15 @@ static enum vec_type get_type(uint32_t signed_int_mask, return VEC_FLOAT; } +static void get_swizzle_offset_and_num_components(struct vrend_shader_io *io) +{ + unsigned mask_temp = io->usage_mask; + int start, num_comp; + u_bit_scan_consecutive_range(&mask_temp, &start, &num_comp); + io->swizzle_offset = start; + io->num_components = num_comp; +} + static boolean iter_declaration(struct tgsi_iterate_context *iter, struct tgsi_full_declaration *decl) @@ -987,7 +996,7 @@ iter_declaration(struct tgsi_iterate_context *iter, ctx->inputs[i].last = decl->Range.Last; ctx->inputs[i].array_id = decl->Declaration.Array ? decl->Array.ArrayID : 0; ctx->inputs[i].usage_mask = mask_temp = decl->Declaration.UsageMask; - u_bit_scan_consecutive_range(&mask_temp, &ctx->inputs[i].swizzle_offset, &ctx->inputs[i].num_components); + get_swizzle_offset_and_num_components(&ctx->inputs[i]); ctx->inputs[i].glsl_predefined_no_emit = false; ctx->inputs[i].glsl_no_index = false; @@ -1254,7 +1263,7 @@ iter_declaration(struct tgsi_iterate_context *iter, ctx->outputs[i].layout_location = 0; ctx->outputs[i].array_id = decl->Declaration.Array ? decl->Array.ArrayID : 0; ctx->outputs[i].usage_mask = mask_temp = decl->Declaration.UsageMask; - u_bit_scan_consecutive_range(&mask_temp, &ctx->outputs[i].swizzle_offset, &ctx->outputs[i].num_components); + get_swizzle_offset_and_num_components(&ctx->outputs[i]); ctx->outputs[i].glsl_predefined_no_emit = false; ctx->outputs[i].glsl_no_index = false; ctx->outputs[i].override_no_wm = ctx->outputs[i].num_components == 1; @@ -4633,7 +4642,6 @@ static bool apply_prev_layout(const struct vrend_shader_key *key, /* Identify by sid and arrays_id */ if (io->sid == layout->sid && (io->array_id == layout->array_id)) { - unsigned new_mask = io->usage_mask; /* We have already one IO with the same SID and arrays ID, so we need to duplicate it */ if (already_found_one) { @@ -4649,11 +4657,11 @@ static bool apply_prev_layout(const struct vrend_shader_key *key, } if (already_found_one) { - new_mask = io->usage_mask = (uint8_t)layout->usage_mask; + io->usage_mask = (uint8_t)layout->usage_mask; io->layout_location = layout->location; io->array_id = layout->array_id; - u_bit_scan_consecutive_range(&new_mask, &io->swizzle_offset, &io->num_components); + get_swizzle_offset_and_num_components(io); require_enhanced_layouts |= io->swizzle_offset > 0; if (io->num_components == 1) io->override_no_wm = true; @@ -7204,7 +7212,7 @@ iter_vs_declaration(struct tgsi_iterate_context *iter, ctx->inputs[i].last = decl->Range.Last; ctx->inputs[i].array_id = decl->Declaration.Array ? decl->Array.ArrayID : 0; ctx->inputs[i].usage_mask = mask_temp = decl->Declaration.UsageMask; - u_bit_scan_consecutive_range(&mask_temp, &ctx->inputs[i].swizzle_offset, &ctx->inputs[i].num_components); + get_swizzle_offset_and_num_components(&ctx->inputs[i]); ctx->inputs[i].glsl_predefined_no_emit = false; ctx->inputs[i].glsl_no_index = false; -- GitLab From a9e9aff91dee8a924bd3d0246576f66464ca9e31 Mon Sep 17 00:00:00 2001 From: Gert Wollny Date: Thu, 18 Feb 2021 17:56:13 +0100 Subject: [PATCH 19/20] shader: compress the size of the shader io info Signed-off-by: Gert Wollny Reviewed-by: Rohan Garg --- src/vrend_shader.c | 47 +++++++++++++++++++++++----------------------- 1 file changed, 24 insertions(+), 23 deletions(-) diff --git a/src/vrend_shader.c b/src/vrend_shader.c index 7eaa4115a..1d4205668 100644 --- a/src/vrend_shader.c +++ b/src/vrend_shader.c @@ -87,30 +87,31 @@ enum vec_type { }; struct vrend_shader_io { - unsigned name; - unsigned gpr; - unsigned done; - int sid; - unsigned interpolate; - int first; - int last; - int array_id; - uint8_t usage_mask; - int swizzle_offset; - int num_components; - int layout_location; - unsigned location; - bool invariant; - bool precise; - bool glsl_predefined_no_emit; - bool glsl_no_index; - bool glsl_gl_block; - bool override_no_wm; - bool is_int; - enum vec_type type; - bool fbfetch_used; char glsl_name[128]; - unsigned stream; + + unsigned sid : 16; + unsigned first : 16; + unsigned last : 16; + unsigned array_id : 10; + unsigned interpolate : 4; + unsigned location : 2; + + unsigned name : 8; + unsigned stream : 2; + unsigned usage_mask : 4; + unsigned type : 2; + unsigned num_components : 3; + unsigned swizzle_offset : 3; + + unsigned layout_location : 1; + unsigned invariant : 1; + unsigned precise : 1; + unsigned glsl_predefined_no_emit : 1; + unsigned glsl_no_index : 1; + unsigned glsl_gl_block : 1; + unsigned override_no_wm : 1; + unsigned is_int : 1; + unsigned fbfetch_used : 1; }; struct vrend_shader_sampler { -- GitLab From 49edf4eda6470e5d976f4205e08f7847de7a2b93 Mon Sep 17 00:00:00 2001 From: Gert Wollny Date: Thu, 18 Feb 2021 18:55:10 +0100 Subject: [PATCH 20/20] shader: Fix string truncatation warning Signed-off-by: Gert Wollny Reviewed-by: Rohan Garg --- src/vrend_shader.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/src/vrend_shader.c b/src/vrend_shader.c index 1d4205668..63858f8b9 100644 --- a/src/vrend_shader.c +++ b/src/vrend_shader.c @@ -2897,7 +2897,7 @@ create_swizzled_clipdist(const struct dump_ctx *ctx, const char *prefix, const char *arrayname, int offset) { - char clipdistvec[4][64] = { 0, }; + char clipdistvec[4][80] = { 0, }; char clip_indirect[32] = ""; @@ -2939,9 +2939,9 @@ create_swizzled_clipdist(const struct dump_ctx *ctx, idx = 0; } if (gl_in) - snprintf(clipdistvec[cc], 64, "%sgl_in%s.%s[%s %d]", prefix, arrayname, cc_name, clip_indirect, idx); + snprintf(clipdistvec[cc], 80, "%sgl_in%s.%s[%s %d]", prefix, arrayname, cc_name, clip_indirect, idx); else - snprintf(clipdistvec[cc], 64, "%s%s%s[%s %d]", prefix, arrayname, cc_name, clip_indirect, idx); + snprintf(clipdistvec[cc], 80, "%s%s%s[%s %d]", prefix, arrayname, cc_name, clip_indirect, idx); } strbuf_fmt(result, "%s(vec4(%s,%s,%s,%s))", stypeprefix, clipdistvec[0], clipdistvec[1], clipdistvec[2], clipdistvec[3]); } -- GitLab