diff --git a/src/vrend_renderer.c b/src/vrend_renderer.c index 3337f75a93aa1c7dc3e5ac4f5b9930cf15d69f13..d40e80ba2c97c9acb68fe433d47319313895cc7b 100644 --- a/src/vrend_renderer.c +++ b/src/vrend_renderer.c @@ -342,6 +342,7 @@ struct global_renderer_state { uint32_t max_texture_2d_size; uint32_t max_texture_3d_size; uint32_t max_texture_cube_size; + uint32_t max_shader_patch_varyings; /* inferred GL caching type */ uint32_t inferred_gl_caching_type; @@ -451,6 +452,7 @@ struct vrend_shader { struct vrend_strarray glsl_strings; GLuint id; GLuint program_id; /* only used for separable shaders */ + GLuint last_pipeline_id; uint32_t uid; bool is_compiled; bool is_linked; /* only used for separable shaders */ @@ -1266,6 +1268,7 @@ static bool vrend_compile_shader(struct vrend_sub_context *sub_ctx, if (shader->sel->sinfo.separable_program) { shader->program_id = glCreateProgram(); + shader->last_pipeline_id = 0xffffffff; glProgramParameteri(shader->program_id, GL_PROGRAM_SEPARABLE, GL_TRUE); glAttachShader(shader->program_id, shader->id); } @@ -1649,9 +1652,10 @@ static void bind_virgl_block_loc(struct vrend_linked_shader_program *sprog, } } -static void rebind_ubo_locs(struct vrend_linked_shader_program *sprog, - enum pipe_shader_type last_shader) +static void rebind_ubo_and_sampler_locs(struct vrend_linked_shader_program *sprog, + enum pipe_shader_type last_shader) { + int next_sampler_id = 0; int next_ubo_id = 0; for (enum pipe_shader_type shader_type = PIPE_SHADER_VERTEX; @@ -1660,7 +1664,11 @@ static void rebind_ubo_locs(struct vrend_linked_shader_program *sprog, if (!sprog->ss[shader_type]) continue; + next_sampler_id = bind_sampler_locs(sprog, shader_type, next_sampler_id); next_ubo_id = bind_ubo_locs(sprog, shader_type, next_ubo_id); + + if (sprog->is_pipeline) + sprog->ss[shader_type]->last_pipeline_id = sprog->id.pipeline; } /* Now `next_ubo_id` is the last ubo id, which is used for the VirglBlock. */ @@ -2020,19 +2028,17 @@ static struct vrend_linked_shader_program *add_shader_program(struct vrend_sub_c vrend_use_program(sprog); - int next_sampler_id = 0; for (enum pipe_shader_type shader_type = PIPE_SHADER_VERTEX; shader_type <= last_shader; shader_type++) { if (!sprog->ss[shader_type]) continue; - next_sampler_id = bind_sampler_locs(sprog, shader_type, next_sampler_id); bind_const_locs(sprog, shader_type); bind_image_locs(sprog, shader_type); bind_ssbo_locs(sprog, shader_type); } - rebind_ubo_locs(sprog, last_shader); + rebind_ubo_and_sampler_locs(sprog, last_shader); if (!has_feature(feat_gles31_vertex_attrib_binding)) { if (vs->sel->sinfo.num_inputs) { @@ -3811,27 +3817,27 @@ static inline void vrend_sync_shader_io(struct vrend_sub_context *sub_ctx, key->require_output_arrays = next->sinfo.has_input_arrays; key->out_generic_expected_mask = next->sinfo.in_generic_emitted_mask; key->out_texcoord_expected_mask = next->sinfo.in_texcoord_emitted_mask; - } - /* FS gets the clip/cull info in the key from this shader, so - * we can avoid re-translating this shader by not updating the - * info in the key */ - if (next_type != PIPE_SHADER_FRAGMENT) { - key->num_out_clip = sub_ctx->shaders[next_type]->current->var_sinfo.num_in_clip; - key->num_out_cull = sub_ctx->shaders[next_type]->current->var_sinfo.num_in_cull; - } - - if (next_type == PIPE_SHADER_FRAGMENT) { - struct vrend_shader *fs = - sub_ctx->shaders[PIPE_SHADER_FRAGMENT]->current; - key->fs_info = fs->var_sinfo.fs_info; - if (type == PIPE_SHADER_VERTEX && sub_ctx->shaders[type]) { - uint32_t fog_input = sub_ctx->shaders[next_type]->sinfo.fog_input_mask; - uint32_t fog_output = sub_ctx->shaders[type]->sinfo.fog_output_mask; + /* FS gets the clip/cull info in the key from this shader, so + * we can avoid re-translating this shader by not updating the + * info in the key */ + if (next_type != PIPE_SHADER_FRAGMENT) { + key->num_out_clip = sub_ctx->shaders[next_type]->current->var_sinfo.num_in_clip; + key->num_out_cull = sub_ctx->shaders[next_type]->current->var_sinfo.num_in_cull; + } - // We only want to issue the fixup for inputs not fed by - // the outputs of the previous stage - key->vs.fog_fixup_mask = (fog_input ^ fog_output) & fog_input; + if (next_type == PIPE_SHADER_FRAGMENT) { + struct vrend_shader *fs = + sub_ctx->shaders[PIPE_SHADER_FRAGMENT]->current; + key->fs_info = fs->var_sinfo.fs_info; + if (type == PIPE_SHADER_VERTEX && sub_ctx->shaders[type]) { + uint32_t fog_input = sub_ctx->shaders[next_type]->sinfo.fog_input_mask; + uint32_t fog_output = sub_ctx->shaders[type]->sinfo.fog_output_mask; + + // We only want to issue the fixup for inputs not fed by + // the outputs of the previous stage + key->vs.fog_fixup_mask = (fog_input ^ fog_output) & fog_input; + } } } } @@ -4018,15 +4024,13 @@ static int vrend_finish_shader(struct vrend_context *ctx, struct vrend_shader_selector *sel, const struct tgsi_token *tokens) { - int r; - sel->tokens = tgsi_dup_tokens(tokens); - r = vrend_shader_select(ctx->sub, sel, NULL); - if (r) { - return EINVAL; - } - return 0; + if (!ctx->shader_cfg.use_gles && sel->type != PIPE_SHADER_COMPUTE) + sel->sinfo.separable_program = + vrend_shader_query_separable_program(sel->tokens, &ctx->shader_cfg); + + return vrend_shader_select(ctx->sub, sel, NULL) ? EINVAL : 0; } int vrend_create_shader(struct vrend_context *ctx, @@ -5188,10 +5192,23 @@ vrend_select_program(struct vrend_sub_context *sub_ctx, ubyte vertices_per_patch * because it's shared across multiple pipelines and some things like * transform feedback require relinking, so we have to make sure the * blocks are bound. */ - int last_shader = tes_id ? PIPE_SHADER_TESS_EVAL : - (gs_id ? PIPE_SHADER_GEOMETRY : - PIPE_SHADER_FRAGMENT); - rebind_ubo_locs(prog, last_shader); + enum pipe_shader_type last_shader = tes_id ? PIPE_SHADER_TESS_EVAL : + (gs_id ? PIPE_SHADER_GEOMETRY : + PIPE_SHADER_FRAGMENT); + bool need_rebind = false; + + for (enum pipe_shader_type shader_type = PIPE_SHADER_VERTEX; + shader_type <= last_shader && !need_rebind; + shader_type++) { + if (!prog->ss[shader_type]) + continue; + need_rebind |= prog->ss[shader_type]->last_pipeline_id != prog->id.pipeline; + } + + if (need_rebind) { + vrend_use_program(prog); + rebind_ubo_and_sampler_locs(prog, last_shader); + } } sub_ctx->last_shader_idx = sub_ctx->shaders[PIPE_SHADER_TESS_EVAL] ? PIPE_SHADER_TESS_EVAL : (sub_ctx->shaders[PIPE_SHADER_GEOMETRY] ? PIPE_SHADER_GEOMETRY : PIPE_SHADER_FRAGMENT); @@ -7227,6 +7244,7 @@ struct vrend_context *vrend_create_context(int id, uint32_t nlen, const char *de grctx->res_hash = vrend_ctx_resource_init_table(); list_inithead(&grctx->untyped_resources); + grctx->shader_cfg.max_shader_patch_varyings = vrend_state.max_shader_patch_varyings; grctx->shader_cfg.use_gles = vrend_state.use_gles; grctx->shader_cfg.use_core_profile = vrend_state.use_core_profile; grctx->shader_cfg.use_explicit_locations = vrend_state.use_explicit_locations; @@ -11208,6 +11226,8 @@ static void vrend_renderer_fill_caps_v2(int gl_ver, int gles_ver, union virgl_c } else caps->v2.max_shader_patch_varyings = 0; + vrend_state.max_shader_patch_varyings = caps->v2.max_shader_patch_varyings; + if (has_feature(feat_texture_gather)) { glGetIntegerv(GL_MIN_PROGRAM_TEXTURE_GATHER_OFFSET, &caps->v2.min_texture_gather_offset); glGetIntegerv(GL_MAX_PROGRAM_TEXTURE_GATHER_OFFSET, &caps->v2.max_texture_gather_offset); @@ -11531,6 +11551,9 @@ static void vrend_renderer_fill_caps_v2(int gl_ver, int gles_ver, union virgl_c glGetIntegerv(GL_MAX_COMPUTE_UNIFORM_COMPONENTS, &max); caps->v2.max_const_buffer_size[PIPE_SHADER_COMPUTE] = max * 4; } + + if (has_feature(feat_separate_shader_objects)) + caps->v2.capability_bits_v2 |= VIRGL_CAP_V2_SSO; } void vrend_renderer_fill_caps(uint32_t set, uint32_t version, diff --git a/src/vrend_shader.c b/src/vrend_shader.c index d7f95f2e807af77345f5bd74025162e4d82ba8bb..2f744e81bcc4adbf4126161ffa17b1926c4d9c78 100644 --- a/src/vrend_shader.c +++ b/src/vrend_shader.c @@ -80,6 +80,8 @@ #define FRONT_COLOR_EMITTED (1 << 0) #define BACK_COLOR_EMITTED (1 << 1); +#define MAX_VARYING 32 + enum vrend_sysval_uniform { UNIFORM_WINSYS_ADJUST_Y, UNIFORM_CLIP_PLANE, @@ -3180,13 +3182,14 @@ static void translate_tex(struct dump_ctx *ctx, } } + char buf[255]; + const char *new_srcs[4] = { buf, srcs[1], srcs[2], srcs[3] }; + /* We have to unnormalize the coordinate for all but the texel fetch instruction */ if (inst->Instruction.Opcode != TGSI_OPCODE_TXF && vrend_shader_sampler_views_mask_get(ctx->key->sampler_views_emulated_rect_mask, sinfo->sreg_index)) { - char buf[255]; const char *bias = ""; - const char *new_srcs[4] = { buf, srcs[1], srcs[2], srcs[3] }; /* No LOD for these texture types, but on GLES we emulate RECT by using * a normal 2D texture, so we have to give LOD 0 */ @@ -6474,13 +6477,13 @@ emit_ios_generic(const struct dump_ctx *ctx, if (ctx->separable_program && io->name == TGSI_SEMANTIC_GENERIC && !(ctx->prog_type == TGSI_PROCESSOR_FRAGMENT && strcmp(inout, "in") != 0)) { - snprintf(layout, sizeof(layout), "layout(location = %d)\n", io->sid); + snprintf(layout, sizeof(layout), "layout(location = %d) ", 31 - io->sid); } if (io->first == io->last) { emit_hdr(glsl_strbufs, layout); /* ugly leave spaces to patch interp in later */ - emit_hdrf(glsl_strbufs, "%s%s\n%s %s %s %s%s;\n", + emit_hdrf(glsl_strbufs, "%s%s %s %s %s %s%s;\n", io->precise ? "precise" : "", io->invariant ? "invariant" : "", prefix, @@ -6669,7 +6672,7 @@ emit_ios_patch(struct vrend_glsl_strbufs *glsl_strbufs, /* We start these locations from 32 and proceed downwards, to avoid * conflicting with generic IO locations. */ if (emit_location) - emit_hdrf(glsl_strbufs, "layout(location = %d) ", 32 - io->sid); + emit_hdrf(glsl_strbufs, "layout(location = %d) ", io->sid); if (io->last == io->first) { emit_hdrf(glsl_strbufs, "%s %s vec4 %s;\n", prefix, inout, io->glsl_name); @@ -7532,6 +7535,102 @@ static int compare_sid(const void *lhs, const void *rhs) return l->sid - r->sid; } +struct sso_scan_ctx { + struct tgsi_iterate_context iter; + const struct vrend_shader_cfg *cfg; + uint8_t max_generic_in_sid; + uint8_t max_patch_in_sid; + uint8_t max_generic_out_sid; + uint8_t max_patch_out_sid; + bool separable_program; + bool unsupported_io; +}; + +static boolean +iter_prop_for_separable(struct tgsi_iterate_context *iter, + struct tgsi_full_property *prop) +{ + struct sso_scan_ctx *ctx = (struct sso_scan_ctx *) iter; + + if (prop->Property.PropertyName == TGSI_PROPERTY_SEPARABLE_PROGRAM) + ctx->separable_program = prop->u[0].Data != 0; + return true; +} + +static boolean +iter_decl_for_overlap(struct tgsi_iterate_context *iter, + struct tgsi_full_declaration *decl) +{ + struct sso_scan_ctx *ctx = (struct sso_scan_ctx *) iter; + + /* VS inputs and FS outputs are of no interest + * when it comes to IO matching */ + if (decl->Declaration.File == TGSI_FILE_INPUT && + iter->processor.Processor == TGSI_PROCESSOR_VERTEX) + return true; + + if (decl->Declaration.File == TGSI_FILE_OUTPUT && + iter->processor.Processor == TGSI_PROCESSOR_FRAGMENT) + return true; + + switch (decl->Semantic.Name) { + case TGSI_SEMANTIC_PATCH: + if (decl->Declaration.File == TGSI_FILE_INPUT) { + if (ctx->max_patch_in_sid < decl->Semantic.Index) + ctx->max_patch_in_sid = decl->Semantic.Index; + } else { + if (ctx->max_patch_out_sid < decl->Semantic.Index) + ctx->max_patch_out_sid = decl->Semantic.Index; + } + break; + case TGSI_SEMANTIC_GENERIC: + if (decl->Declaration.File == TGSI_FILE_INPUT) { + if (ctx->max_generic_in_sid < decl->Semantic.Index) + ctx->max_generic_in_sid = decl->Semantic.Index; + } else { + if (ctx->max_generic_out_sid < decl->Semantic.Index) + ctx->max_generic_out_sid = decl->Semantic.Index; + } + break; + case TGSI_SEMANTIC_COLOR: + case TGSI_SEMANTIC_CLIPVERTEX: + case TGSI_SEMANTIC_BCOLOR: + case TGSI_SEMANTIC_TEXCOORD: + case TGSI_SEMANTIC_FOG: + /* These are semantics that need to be matched by name and since we can't + * guarantee that they exist in all the stages of separable shaders + * we can't emit the shader as SSO */ + ctx->unsupported_io = true; + break; + default: + ; + } + return true; +} + + +bool vrend_shader_query_separable_program(const struct tgsi_token *tokens, + const struct vrend_shader_cfg *cfg) +{ + struct sso_scan_ctx ctx = {0}; + ctx.cfg = cfg; + ctx.iter.iterate_property = iter_prop_for_separable; + ctx.iter.iterate_declaration = iter_decl_for_overlap; + tgsi_iterate_shader(tokens, &ctx.iter); + + /* Since we have to match by location, and have to handle generics and patches + * at in the limited range of 32 locations, we have to make sure that the + * the generics range and the patch range don't overlap. In addition, to + * work around that radeonsi doesn't support patch locations above 30 we have + * to check that limit too. */ + bool supports_separable = !ctx.unsupported_io && + (ctx.max_generic_in_sid + ctx.max_patch_in_sid < MAX_VARYING) && + (ctx.max_generic_out_sid + ctx.max_patch_out_sid < MAX_VARYING) && + (ctx.max_patch_in_sid < ctx.cfg->max_shader_patch_varyings) && + (ctx.max_patch_out_sid < ctx.cfg->max_shader_patch_varyings); + return ctx.separable_program && supports_separable; +} + bool vrend_convert_shader(const struct vrend_context *rctx, const struct vrend_shader_cfg *cfg, const struct tgsi_token *tokens, @@ -7545,6 +7644,7 @@ bool vrend_convert_shader(const struct vrend_context *rctx, boolean bret; memset(&ctx, 0, sizeof(struct dump_ctx)); + ctx.cfg = cfg; /* First pass to deal with edge cases. */ ctx.iter.iterate_declaration = iter_decls; diff --git a/src/vrend_shader.h b/src/vrend_shader.h index 4a4d715a293e74538fac43e16e814536fa49d4df..05bfbd9f5eaaafad286678f93729cc664211c5bc 100644 --- a/src/vrend_shader.h +++ b/src/vrend_shader.h @@ -220,6 +220,7 @@ struct vrend_shader_key { struct vrend_shader_cfg { uint32_t glsl_version : 12; uint32_t max_draw_buffers : 4; + uint32_t max_shader_patch_varyings : 6; uint32_t use_gles : 1; uint32_t use_core_profile : 1; uint32_t use_explicit_locations : 1; @@ -265,6 +266,9 @@ bool vrend_shader_create_passthrough_tcs(const struct vrend_context *ctx, bool vrend_shader_needs_alpha_func(const struct vrend_shader_key *key); +bool vrend_shader_query_separable_program(const struct tgsi_token *tokens, + const struct vrend_shader_cfg *cfg); + static inline bool vrend_shader_sampler_views_mask_get( const uint64_t mask[static VREND_SHADER_SAMPLER_VIEWS_MASK_LENGTH], int index)