diff --git a/src/vrend_renderer.c b/src/vrend_renderer.c index aa76a9595ed7c5a3d28d87fc46568b5d0755e0de..426a7391221b39df4cd0379d752090914cba924b 100644 --- a/src/vrend_renderer.c +++ b/src/vrend_renderer.c @@ -307,71 +307,83 @@ static const struct { }; struct global_renderer_state { - int gl_major_ver; - int gl_minor_ver; - + struct vrend_context *ctx0; struct vrend_context *current_ctx; struct vrend_context *current_hw_ctx; - struct list_head waiting_query_list; - bool finishing; - bool use_gles; - bool use_core_profile; - bool use_external_blob; - bool use_integer; -#ifdef HAVE_EPOXY_EGL_H - bool use_egl_fence; -#endif - - bool features[feat_last]; - - /* these appeared broken on at least one driver */ - bool use_explicit_locations; - uint32_t max_draw_buffers; - uint32_t max_texture_2d_size; - uint32_t max_texture_3d_size; - uint32_t max_texture_cube_size; - - /* threaded sync */ - bool stop_sync_thread; - int eventfd; - - pipe_mutex fence_mutex; - /* a fence is always on either of the lists, or is pointed to by - * fence_waiting - */ + struct list_head waiting_query_list; struct list_head fence_list; struct list_head fence_wait_list; struct vrend_fence *fence_waiting; - pipe_condvar fence_cond; - struct vrend_context *ctx0; + int gl_major_ver; + int gl_minor_ver; + pipe_mutex fence_mutex; pipe_thread sync_thread; virgl_gl_context sync_context; - /* Needed on GLES to inject a TCS */ + pipe_condvar fence_cond; + float tess_factors[6]; + int eventfd; + + uint32_t max_draw_buffers; + uint32_t max_texture_2d_size; + uint32_t max_texture_3d_size; + uint32_t max_texture_cube_size; /* inferred GL caching type */ uint32_t inferred_gl_caching_type; + + uint64_t features[feat_last / 64 + 1]; + + uint32_t finishing : 1; + uint32_t use_gles : 1; + uint32_t use_core_profile : 1; + uint32_t use_external_blob : 1; + uint32_t use_integer : 1; + /* these appeared broken on at least one driver */ + uint32_t use_explicit_locations : 1; + /* threaded sync */ + uint32_t stop_sync_thread : 1; + /* Needed on GLES to inject a TCS */ + uint32_t bgra_srgb_emulation_loaded : 1; + +#ifdef HAVE_EPOXY_EGL_H + uint32_t use_egl_fence : 1; +#endif }; static struct global_renderer_state vrend_state; static inline bool has_feature(enum features_id feature_id) { + int slot = feature_id / 64; + uint64_t mask = 1ull << (feature_id & 63); + bool retval = vrend_state.features[slot] & mask ? true : false; VREND_DEBUG(dbg_feature_use, NULL, "Try using feature %s:%d\n", feature_list[feature_id].log_name, - vrend_state.features[feature_id]); - return vrend_state.features[feature_id]; + retval); + return retval; } + static inline void set_feature(enum features_id feature_id) { - vrend_state.features[feature_id] = true; + int slot = feature_id / 64; + uint64_t mask = 1ull << (feature_id & 63); + vrend_state.features[slot] |= mask; +} + +static inline void clear_feature(enum features_id feature_id) +{ + int slot = feature_id / 64; + uint64_t mask = 1ull << (feature_id & 63); + vrend_state.features[slot] &= ~mask; } + struct vrend_linked_shader_program { struct list_head head; struct list_head sl[PIPE_SHADER_TYPES]; @@ -757,7 +769,7 @@ static void vrend_destroy_query_object(void *obj_ptr); static void vrend_finish_context_switch(struct vrend_context *ctx); static void vrend_patch_blend_state(struct vrend_sub_context *sub_ctx); static void vrend_update_frontface_state(struct vrend_sub_context *ctx); -static void vrender_get_glsl_version(int *glsl_version); +static int vrender_get_glsl_version(void); static void vrend_destroy_program(struct vrend_linked_shader_program *ent); static void vrend_apply_sampler_state(struct vrend_sub_context *sub_ctx, struct vrend_resource *res, @@ -1139,7 +1151,6 @@ static void vrend_destroy_shader_selector(struct vrend_shader_selector *sel) free(sel->sinfo.so_names[i]); free(sel->tmp_buf); free(sel->sinfo.so_names); - free(sel->sinfo.interpinfo); free(sel->sinfo.sampler_arrays); free(sel->sinfo.image_arrays); free(sel->tokens); @@ -2721,17 +2732,14 @@ void vrend_set_viewport_states(struct vrend_context *ctx, } } -static void update_int_sign_masks(enum pipe_format fmt, int i, - uint32_t *signed_mask, - uint32_t *unsigned_mask) { - if (vrend_state.use_integer && - util_format_is_pure_integer(fmt)) { - if (util_format_is_pure_uint(fmt)) - (*unsigned_mask) |= (1 << i); - else - (*signed_mask) |= (1 << i); +#define UPDATE_INT_SIGN_MASK(fmt, i, signed_mask, unsigned_mask) \ + if (vrend_state.use_integer && \ + util_format_is_pure_integer(fmt)) { \ + if (util_format_is_pure_uint(fmt)) \ + unsigned_mask |= (1 << i); \ + else \ + signed_mask |= (1 << i); \ } -} int vrend_create_vertex_elements_state(struct vrend_context *ctx, uint32_t handle, @@ -2829,9 +2837,9 @@ int vrend_create_vertex_elements_state(struct vrend_context *ctx, struct vrend_vertex_element *ve = &v->elements[i]; if (util_format_is_pure_integer(ve->base.src_format)) { - update_int_sign_masks(ve->base.src_format, i, - &v->signed_int_bitmask, - &v->unsigned_int_bitmask); + UPDATE_INT_SIGN_MASK(ve->base.src_format, i, + v->signed_int_bitmask, + v->unsigned_int_bitmask); glVertexAttribIFormat(i, ve->nr_chan, ve->type, ve->base.src_offset); } else @@ -3303,31 +3311,34 @@ static inline void vrend_fill_shader_key(struct vrend_sub_context *sub_ctx, { unsigned type = sel->type; - if (vrend_state.use_core_profile == true) { + if (vrend_state.use_core_profile) { int i; bool add_alpha_test = true; - key->cbufs_are_a8_bitmask = 0; + // Only use integer info when drawing to avoid stale info. - if (vrend_state.use_integer && sub_ctx->drawing) { - key->attrib_signed_int_bitmask = sub_ctx->ve->signed_int_bitmask; - key->attrib_unsigned_int_bitmask = sub_ctx->ve->unsigned_int_bitmask; - } - for (i = 0; i < sub_ctx->nr_cbufs; i++) { - if (!sub_ctx->surf[i]) - continue; - if (vrend_format_is_emulated_alpha(sub_ctx->surf[i]->format)) - key->cbufs_are_a8_bitmask |= (1 << i); - if (util_format_is_pure_integer(sub_ctx->surf[i]->format)) { + if (vrend_state.use_integer && sub_ctx->drawing && + type == PIPE_SHADER_VERTEX) { + key->vs.attrib_signed_int_bitmask = sub_ctx->ve->signed_int_bitmask; + key->vs.attrib_unsigned_int_bitmask = sub_ctx->ve->unsigned_int_bitmask; + } + if (type == PIPE_SHADER_FRAGMENT) { + for (i = 0; i < sub_ctx->nr_cbufs; i++) { + if (!sub_ctx->surf[i]) + continue; + if (vrend_format_is_emulated_alpha(sub_ctx->surf[i]->format)) + key->fs.cbufs_are_a8_bitmask |= (1 << i); + if (util_format_is_pure_integer(sub_ctx->surf[i]->format)) { add_alpha_test = false; - update_int_sign_masks(sub_ctx->surf[i]->format, i, - &key->cbufs_signed_int_bitmask, - &key->cbufs_unsigned_int_bitmask); + UPDATE_INT_SIGN_MASK(sub_ctx->surf[i]->format, i, + key->fs.cbufs_signed_int_bitmask, + key->fs.cbufs_unsigned_int_bitmask); + } + key->fs.surface_component_bits[i] = util_format_get_component_bits(sub_ctx->surf[i]->format, UTIL_FORMAT_COLORSPACE_RGB, 0); + } + if (add_alpha_test) { + key->add_alpha_test = sub_ctx->dsa_state.alpha.enabled; + key->alpha_test = sub_ctx->dsa_state.alpha.func; } - key->surface_component_bits[i] = util_format_get_component_bits(sub_ctx->surf[i]->format, UTIL_FORMAT_COLORSPACE_RGB, 0); - } - if (add_alpha_test) { - key->add_alpha_test = sub_ctx->dsa_state.alpha.enabled; - key->alpha_test = sub_ctx->dsa_state.alpha.func; } key->pstipple_tex = sub_ctx->rs_state.poly_stipple_enable; @@ -3335,30 +3346,13 @@ static inline void vrend_fill_shader_key(struct vrend_sub_context *sub_ctx, key->clip_plane_enable = sub_ctx->rs_state.clip_plane_enable; key->flatshade = sub_ctx->rs_state.flatshade ? true : false; - } else { - key->add_alpha_test = 0; - key->pstipple_tex = 0; } - if (type == PIPE_SHADER_FRAGMENT && vrend_state.use_gles && can_emulate_logicop(sub_ctx->blend_state.logicop_func)) { - key->fs_logicop_enabled = sub_ctx->blend_state.logicop_enable; - key->fs_logicop_func = sub_ctx->blend_state.logicop_func; - key->fs_logicop_emulate_coherent = !has_feature(feat_framebuffer_fetch_non_coherent); - } - - key->invert_fs_origin = !sub_ctx->inverted_fbo_content; - - if (type == PIPE_SHADER_FRAGMENT) - key->fs_swizzle_output_rgb_to_bgr = sub_ctx->swizzle_output_rgb_to_bgr; + key->gs_present = !!sub_ctx->shaders[PIPE_SHADER_GEOMETRY]; + key->tcs_present = !!sub_ctx->shaders[PIPE_SHADER_TESS_CTRL]; + key->tes_present = !!sub_ctx->shaders[PIPE_SHADER_TESS_EVAL]; - if (sub_ctx->shaders[PIPE_SHADER_GEOMETRY]) - key->gs_present = true; - if (sub_ctx->shaders[PIPE_SHADER_TESS_CTRL]) - key->tcs_present = true; - if (sub_ctx->shaders[PIPE_SHADER_TESS_EVAL]) - key->tes_present = true; - - int prev_type = -1; + int prev_type = type != PIPE_SHADER_VERTEX ? PIPE_SHADER_VERTEX : -1; /* Gallium sends and binds the shaders in the reverse order, so if an * old shader is still bound we should ignore the "previous" (as in @@ -3369,48 +3363,44 @@ static inline void vrend_fill_shader_key(struct vrend_sub_context *sub_ctx, case PIPE_SHADER_GEOMETRY: if (key->tcs_present || key->tes_present) prev_type = PIPE_SHADER_TESS_EVAL; - else - prev_type = PIPE_SHADER_VERTEX; break; case PIPE_SHADER_FRAGMENT: if (key->gs_present) prev_type = PIPE_SHADER_GEOMETRY; else if (key->tcs_present || key->tes_present) prev_type = PIPE_SHADER_TESS_EVAL; - else - prev_type = PIPE_SHADER_VERTEX; break; case PIPE_SHADER_TESS_EVAL: if (key->tcs_present) prev_type = PIPE_SHADER_TESS_CTRL; - else - prev_type = PIPE_SHADER_VERTEX; - break; - case PIPE_SHADER_TESS_CTRL: - prev_type = PIPE_SHADER_VERTEX; break; default: break; } } - if (prev_type != -1 && sub_ctx->shaders[prev_type]) { - key->prev_stage_num_clip_out = sub_ctx->shaders[prev_type]->sinfo.num_clip_out; - key->prev_stage_num_cull_out = sub_ctx->shaders[prev_type]->sinfo.num_cull_out; - key->num_indirect_generic_inputs = sub_ctx->shaders[prev_type]->sinfo.num_indirect_generic_outputs; - key->num_indirect_patch_inputs = sub_ctx->shaders[prev_type]->sinfo.num_indirect_patch_outputs; - key->num_prev_generic_and_patch_outputs = sub_ctx->shaders[prev_type]->sinfo.num_generic_and_patch_outputs; - key->guest_sent_io_arrays = sub_ctx->shaders[prev_type]->sinfo.guest_sent_io_arrays; + struct vrend_shader_selector *prev = sub_ctx->shaders[prev_type]; + if (prev_type != -1 && prev) { + key->input = prev->sinfo.out; + key->force_invariant_inputs = prev->sinfo.invariant_outputs; memcpy(key->prev_stage_generic_and_patch_outputs_layout, - sub_ctx->shaders[prev_type]->sinfo.generic_outputs_layout, - 64 * sizeof (struct vrend_layout_info)); - key->force_invariant_inputs = sub_ctx->shaders[prev_type]->sinfo.invariant_outputs; + prev->sinfo.generic_outputs_layout, + prev->sinfo.out.num_generic_and_patch * sizeof (struct vrend_layout_info)); } - // Only use coord_replace if frag shader receives GL_POINTS + int next_type = -1; + if (type == PIPE_SHADER_FRAGMENT) { + key->fs.invert_origin = !sub_ctx->inverted_fbo_content; + key->fs.swizzle_output_rgb_to_bgr = sub_ctx->swizzle_output_rgb_to_bgr; + if (vrend_state.use_gles && can_emulate_logicop(sub_ctx->blend_state.logicop_func)) { + key->fs.logicop_enabled = sub_ctx->blend_state.logicop_enable; + key->fs.logicop_func = sub_ctx->blend_state.logicop_func; + } int fs_prim_mode = sub_ctx->prim_mode; // inherit draw-call's mode + + // Only use coord_replace if frag shader receives GL_POINTS switch (prev_type) { case PIPE_SHADER_TESS_EVAL: if (sub_ctx->shaders[PIPE_SHADER_TESS_EVAL]->sinfo.tes_point_mode) @@ -3420,14 +3410,21 @@ static inline void vrend_fill_shader_key(struct vrend_sub_context *sub_ctx, fs_prim_mode = sub_ctx->shaders[PIPE_SHADER_GEOMETRY]->sinfo.gs_out_prim; break; } - key->fs_prim_is_points = (fs_prim_mode == PIPE_PRIM_POINTS); - key->coord_replace = sub_ctx->rs_state.point_quad_rasterization - && key->fs_prim_is_points + key->fs.prim_is_points = (fs_prim_mode == PIPE_PRIM_POINTS); + key->fs.coord_replace = sub_ctx->rs_state.point_quad_rasterization + && key->fs.prim_is_points ? sub_ctx->rs_state.sprite_coord_enable : 0x0; - } + } else { + if (sub_ctx->shaders[PIPE_SHADER_FRAGMENT]) { + struct vrend_shader *fs = + sub_ctx->shaders[PIPE_SHADER_FRAGMENT]->current; + key->compiled_fs_uid = fs->uid; + key->fs_info = &fs->sel->sinfo.fs_info; + next_type = PIPE_SHADER_FRAGMENT; + } + } - int next_type = -1; switch (type) { case PIPE_SHADER_VERTEX: if (key->tcs_present) @@ -3439,38 +3436,20 @@ static inline void vrend_fill_shader_key(struct vrend_sub_context *sub_ctx, next_type = PIPE_SHADER_TESS_EVAL; else next_type = PIPE_SHADER_TESS_CTRL; - } else - next_type = PIPE_SHADER_FRAGMENT; + } break; case PIPE_SHADER_TESS_CTRL: next_type = PIPE_SHADER_TESS_EVAL; break; - case PIPE_SHADER_GEOMETRY: - next_type = PIPE_SHADER_FRAGMENT; - break; case PIPE_SHADER_TESS_EVAL: if (key->gs_present) next_type = PIPE_SHADER_GEOMETRY; - else - next_type = PIPE_SHADER_FRAGMENT; default: break; } - if (next_type != -1 && sub_ctx->shaders[next_type]) { - key->next_stage_pervertex_in = sub_ctx->shaders[next_type]->sinfo.has_pervertex_in; - key->num_indirect_generic_outputs = sub_ctx->shaders[next_type]->sinfo.num_indirect_generic_inputs; - key->num_indirect_patch_outputs = sub_ctx->shaders[next_type]->sinfo.num_indirect_patch_inputs; - key->generic_outputs_expected_mask = sub_ctx->shaders[next_type]->sinfo.generic_inputs_emitted_mask; - } - - if (type != PIPE_SHADER_FRAGMENT && - sub_ctx->shaders[PIPE_SHADER_FRAGMENT]) { - struct vrend_shader *fs = - sub_ctx->shaders[PIPE_SHADER_FRAGMENT]->current; - key->compiled_fs_uid = fs->uid; - key->fs_info = &fs->sel->sinfo; - } + if (next_type != -1 && sub_ctx->shaders[next_type]) + key->output = sub_ctx->shaders[next_type]->sinfo.in; } static int vrend_shader_create(struct vrend_context *ctx, @@ -6217,10 +6196,17 @@ int vrend_renderer_init(const struct vrend_if_cbs *cbs, uint32_t flags) init_features(gles ? 0 : gl_ver, gles ? gl_ver : 0); - vrend_state.features[feat_srgb_write_control] &= vrend_winsys_has_gl_colorspace(); + if (!vrend_winsys_has_gl_colorspace()) + clear_feature(feat_srgb_write_control) ; glGetIntegerv(GL_MAX_DRAW_BUFFERS, (GLint *) &vrend_state.max_draw_buffers); + /* Mesa clamps this value to 8 anyway, so just make sure that this side + * doesn't exceed the number to be on the save side when using 8-bit masks + * for the color buffers */ + if (vrend_state.max_draw_buffers > 8) + vrend_state.max_draw_buffers = 8; + if (!has_feature(feat_arb_robustness) && !has_feature(feat_gles_khr_robustness)) { vrend_printf("WARNING: running without ARB/KHR robustness in place may crash\n"); @@ -6454,11 +6440,12 @@ struct vrend_context *vrend_create_context(int id, uint32_t nlen, const char *de grctx->shader_cfg.has_conservative_depth = has_feature(feat_conservative_depth); grctx->shader_cfg.use_integer = vrend_state.use_integer; grctx->shader_cfg.has_dual_src_blend = has_feature(feat_dual_src_blend); + grctx->shader_cfg.has_fbfetch_coherent = has_feature(feat_framebuffer_fetch); vrend_renderer_create_sub_ctx(grctx, 0); vrend_renderer_set_sub_ctx(grctx, 0); - vrender_get_glsl_version(&grctx->shader_cfg.glsl_version); + grctx->shader_cfg.glsl_version = vrender_get_glsl_version(); if (!grctx->ctx_id) grctx->fence_retire = vrend_clicbs->ctx0_fence_retire; @@ -9911,12 +9898,11 @@ int vrend_create_so_target(struct vrend_context *ctx, return 0; } -static void vrender_get_glsl_version(int *glsl_version) +static int vrender_get_glsl_version(void) { - int major_local, minor_local; + int major_local = 0, minor_local = 0; const GLubyte *version_str; MAYBE_UNUSED int c; - int version; version_str = glGetString(GL_SHADING_LANGUAGE_VERSION); if (vrend_state.use_gles) { @@ -9930,9 +9916,7 @@ static void vrender_get_glsl_version(int *glsl_version) assert(c == 2); } - version = (major_local * 100) + minor_local; - if (glsl_version) - *glsl_version = version; + return (major_local * 100) + minor_local; } static void vrend_fill_caps_glsl_version(int gl_ver, int gles_ver, diff --git a/src/vrend_shader.c b/src/vrend_shader.c index f7ace6f8bd6816046bdedc640f2c07df1bbc7fa2..63858f8b94ffa647decfd08b1c26fac06f6e2ab7 100644 --- a/src/vrend_shader.c +++ b/src/vrend_shader.c @@ -87,30 +87,31 @@ enum vec_type { }; struct vrend_shader_io { - unsigned name; - unsigned gpr; - unsigned done; - int sid; - unsigned interpolate; - int first; - int last; - int array_id; - uint8_t usage_mask; - int swizzle_offset; - int num_components; - int layout_location; - unsigned location; - bool invariant; - bool precise; - bool glsl_predefined_no_emit; - bool glsl_no_index; - bool glsl_gl_block; - bool override_no_wm; - bool is_int; - enum vec_type type; - bool fbfetch_used; char glsl_name[128]; - unsigned stream; + + unsigned sid : 16; + unsigned first : 16; + unsigned last : 16; + unsigned array_id : 10; + unsigned interpolate : 4; + unsigned location : 2; + + unsigned name : 8; + unsigned stream : 2; + unsigned usage_mask : 4; + unsigned type : 2; + unsigned num_components : 3; + unsigned swizzle_offset : 3; + + unsigned layout_location : 1; + unsigned invariant : 1; + unsigned precise : 1; + unsigned glsl_predefined_no_emit : 1; + unsigned glsl_no_index : 1; + unsigned glsl_gl_block : 1; + unsigned override_no_wm : 1; + unsigned is_int : 1; + unsigned fbfetch_used : 1; }; struct vrend_shader_sampler { @@ -533,7 +534,7 @@ static inline bool fs_emit_layout(const struct dump_ctx *ctx) if coord_origin is 0 and invert is 1 - emit nothing (lower) if coord origin is 1 and invert is 0 - emit nothing (lower) if coord_origin is 1 and invert is 1 - emit origin upper left */ - if (!(ctx->fs_coord_origin ^ ctx->key->invert_fs_origin)) + if (!(ctx->fs_coord_origin ^ ctx->key->fs.invert_origin)) return true; return false; } @@ -919,10 +920,10 @@ iter_inputs(struct tgsi_iterate_context *iter, static bool logiop_require_inout(const struct vrend_shader_key *key) { - if (!key->fs_logicop_enabled) + if (!key->fs.logicop_enabled) return false; - switch (key->fs_logicop_func) { + switch (key->fs.logicop_func) { case PIPE_LOGICOP_CLEAR: case PIPE_LOGICOP_SET: case PIPE_LOGICOP_COPY: @@ -945,6 +946,15 @@ static enum vec_type get_type(uint32_t signed_int_mask, return VEC_FLOAT; } +static void get_swizzle_offset_and_num_components(struct vrend_shader_io *io) +{ + unsigned mask_temp = io->usage_mask; + int start, num_comp; + u_bit_scan_consecutive_range(&mask_temp, &start, &num_comp); + io->swizzle_offset = start; + io->num_components = num_comp; +} + static boolean iter_declaration(struct tgsi_iterate_context *iter, struct tgsi_full_declaration *decl) @@ -974,8 +984,8 @@ iter_declaration(struct tgsi_iterate_context *iter, } if (iter->processor.Processor == TGSI_PROCESSOR_VERTEX) { ctx->attrib_input_mask |= (1 << decl->Range.First); - ctx->inputs[i].type = get_type(ctx->key->attrib_signed_int_bitmask, - ctx->key->attrib_unsigned_int_bitmask, + ctx->inputs[i].type = get_type(ctx->key->vs.attrib_signed_int_bitmask, + ctx->key->vs.attrib_unsigned_int_bitmask, decl->Range.First); } ctx->inputs[i].name = decl->Semantic.Name; @@ -987,7 +997,7 @@ iter_declaration(struct tgsi_iterate_context *iter, ctx->inputs[i].last = decl->Range.Last; ctx->inputs[i].array_id = decl->Declaration.Array ? decl->Array.ArrayID : 0; ctx->inputs[i].usage_mask = mask_temp = decl->Declaration.UsageMask; - u_bit_scan_consecutive_range(&mask_temp, &ctx->inputs[i].swizzle_offset, &ctx->inputs[i].num_components); + get_swizzle_offset_and_num_components(&ctx->inputs[i]); ctx->inputs[i].glsl_predefined_no_emit = false; ctx->inputs[i].glsl_no_index = false; @@ -1171,7 +1181,7 @@ iter_declaration(struct tgsi_iterate_context *iter, case TGSI_SEMANTIC_PATCH: case TGSI_SEMANTIC_GENERIC: if (iter->processor.Processor == TGSI_PROCESSOR_FRAGMENT) { - if (ctx->key->coord_replace & (1 << ctx->inputs[i].sid)) { + if (ctx->key->fs.coord_replace & (1 << ctx->inputs[i].sid)) { if (ctx->cfg->use_gles) name_prefix = "vec4(gl_PointCoord.x, mix(1.0 - gl_PointCoord.y, gl_PointCoord.y, clamp(winsys_adjust_y, 0.0, 1.0)), 0.0, 1.0)"; else @@ -1254,7 +1264,7 @@ iter_declaration(struct tgsi_iterate_context *iter, ctx->outputs[i].layout_location = 0; ctx->outputs[i].array_id = decl->Declaration.Array ? decl->Array.ArrayID : 0; ctx->outputs[i].usage_mask = mask_temp = decl->Declaration.UsageMask; - u_bit_scan_consecutive_range(&mask_temp, &ctx->outputs[i].swizzle_offset, &ctx->outputs[i].num_components); + get_swizzle_offset_and_num_components(&ctx->outputs[i]); ctx->outputs[i].glsl_predefined_no_emit = false; ctx->outputs[i].glsl_no_index = false; ctx->outputs[i].override_no_wm = ctx->outputs[i].num_components == 1; @@ -1328,8 +1338,8 @@ iter_declaration(struct tgsi_iterate_context *iter, break; case TGSI_SEMANTIC_COLOR: if (iter->processor.Processor == TGSI_PROCESSOR_FRAGMENT) { - ctx->outputs[i].type = get_type(ctx->key->cbufs_signed_int_bitmask, - ctx->key->cbufs_unsigned_int_bitmask, + ctx->outputs[i].type = get_type(ctx->key->fs.cbufs_signed_int_bitmask, + ctx->key->fs.cbufs_unsigned_int_bitmask, ctx->outputs[i].sid); } @@ -1344,7 +1354,7 @@ iter_declaration(struct tgsi_iterate_context *iter, name_prefix = "ex"; break; } else if (iter->processor.Processor == TGSI_PROCESSOR_FRAGMENT && - ctx->key->fs_logicop_enabled) { + ctx->key->fs.logicop_enabled) { name_prefix = "fsout_tmp"; break; } @@ -2058,9 +2068,9 @@ static void emit_fragment_logicop(const struct dump_ctx *ctx, char full_op[PIPE_MAX_COLOR_BUFS][128 + 8]; for (unsigned i = 0; i < ctx->num_outputs; i++) { - mask[i] = (1 << ctx->key->surface_component_bits[i]) - 1; + mask[i] = (1 << ctx->key->fs.surface_component_bits[i]) - 1; scale[i] = mask[i]; - switch (ctx->key->fs_logicop_func) { + switch (ctx->key->fs.logicop_func) { case PIPE_LOGICOP_INVERT: snprintf(src_fb[i], ARRAY_SIZE(src_fb[i]), "ivec4(%f * fsout_c%d + 0.5)", scale[i], i); @@ -2091,7 +2101,7 @@ static void emit_fragment_logicop(const struct dump_ctx *ctx, } for (unsigned i = 0; i < ctx->num_outputs; i++) { - switch (ctx->key->fs_logicop_func) { + switch (ctx->key->fs.logicop_func) { case PIPE_LOGICOP_CLEAR: snprintf(full_op[i], ARRAY_SIZE(full_op[i]), "%s", "vec4(0)"); @@ -2159,7 +2169,7 @@ static void emit_fragment_logicop(const struct dump_ctx *ctx, } for (unsigned i = 0; i < ctx->num_outputs; i++) { - switch (ctx->key->fs_logicop_func) { + switch (ctx->key->fs.logicop_func) { case PIPE_LOGICOP_NOOP: break; case PIPE_LOGICOP_COPY: @@ -2177,7 +2187,7 @@ static void emit_cbuf_swizzle(const struct dump_ctx *ctx, struct vrend_glsl_strbufs *glsl_strbufs) { for (uint i = 0; i < ctx->num_outputs; i++) { - if (ctx->key->fs_swizzle_output_rgb_to_bgr & (1 << i)) { + if (ctx->key->fs.swizzle_output_rgb_to_bgr & (1 << i)) { emit_buff(glsl_strbufs, "fsout_c%d = fsout_c%d.zyxw;\n", i, i); } } @@ -2189,17 +2199,17 @@ static void handle_fragment_proc_exit(const struct dump_ctx *ctx, if (ctx->key->pstipple_tex) emit_pstipple_pass(glsl_strbufs); - if (ctx->key->cbufs_are_a8_bitmask) + if (ctx->key->fs.cbufs_are_a8_bitmask) emit_a8_swizzle(glsl_strbufs); if (ctx->key->add_alpha_test) emit_alpha_test(ctx, glsl_strbufs); - if (ctx->key->fs_logicop_enabled) + if (ctx->key->fs.logicop_enabled) emit_fragment_logicop(ctx, glsl_strbufs); - if (ctx->key->fs_swizzle_output_rgb_to_bgr) + if (ctx->key->fs.swizzle_output_rgb_to_bgr) emit_cbuf_swizzle(ctx, glsl_strbufs); if (ctx->write_all_cbufs) @@ -2887,13 +2897,13 @@ create_swizzled_clipdist(const struct dump_ctx *ctx, const char *prefix, const char *arrayname, int offset) { - char clipdistvec[4][64] = { 0, }; + char clipdistvec[4][80] = { 0, }; char clip_indirect[32] = ""; - bool has_prev_vals = (ctx->key->prev_stage_num_cull_out + ctx->key->prev_stage_num_clip_out) > 0; - int num_culls = has_prev_vals ? ctx->key->prev_stage_num_cull_out : 0; - int num_clips = has_prev_vals ? ctx->key->prev_stage_num_clip_out : ctx->num_in_clip_dist; + bool has_prev_vals = (ctx->key->input.num_cull + ctx->key->input.num_clip) > 0; + int num_culls = has_prev_vals ? ctx->key->input.num_cull : 0; + int num_clips = has_prev_vals ? ctx->key->input.num_clip : ctx->num_in_clip_dist; int base_idx = ctx->inputs[input_idx].sid * 4; /* With arrays enabled , but only when gl_ClipDistance or gl_CullDistance are emitted (>4) @@ -2920,18 +2930,18 @@ create_swizzled_clipdist(const struct dump_ctx *ctx, idx -= num_clips; cc_name = "gl_CullDistance"; } - if (ctx->key->prev_stage_num_cull_out) - if (idx >= ctx->key->prev_stage_num_cull_out) + if (ctx->key->input.num_cull) + if (idx >= ctx->key->input.num_cull) idx = 0; } else { - if (ctx->key->prev_stage_num_clip_out) - if (idx >= ctx->key->prev_stage_num_clip_out) + if (ctx->key->input.num_clip) + if (idx >= ctx->key->input.num_clip) idx = 0; } if (gl_in) - snprintf(clipdistvec[cc], 64, "%sgl_in%s.%s[%s %d]", prefix, arrayname, cc_name, clip_indirect, idx); + snprintf(clipdistvec[cc], 80, "%sgl_in%s.%s[%s %d]", prefix, arrayname, cc_name, clip_indirect, idx); else - snprintf(clipdistvec[cc], 64, "%s%s%s[%s %d]", prefix, arrayname, cc_name, clip_indirect, idx); + snprintf(clipdistvec[cc], 80, "%s%s%s[%s %d]", prefix, arrayname, cc_name, clip_indirect, idx); } strbuf_fmt(result, "%s(vec4(%s,%s,%s,%s))", stypeprefix, clipdistvec[0], clipdistvec[1], clipdistvec[2], clipdistvec[3]); } @@ -4368,8 +4378,8 @@ static void rewrite_io_ranged(struct dump_ctx *ctx) { if ((ctx->info.indirect_files & (1 << TGSI_FILE_INPUT)) || - ctx->key->num_indirect_generic_inputs || - ctx->key->num_indirect_patch_inputs) { + ctx->key->input.num_indirect_generic || + ctx->key->input.num_indirect_patch) { for (uint i = 0; i < ctx->num_inputs; ++i) { if (ctx->inputs[i].name == TGSI_SEMANTIC_PATCH) { @@ -4402,10 +4412,10 @@ void rewrite_io_ranged(struct dump_ctx *ctx) ctx->generic_ios.input_range.io.last = ctx->inputs[i].sid; } - if (ctx->key->num_indirect_generic_inputs > 0) - ctx->generic_ios.input_range.io.last = ctx->generic_ios.input_range.io.sid + ctx->key->num_indirect_generic_inputs - 1; - if (ctx->key->num_indirect_patch_inputs > 0) - ctx->patch_ios.input_range.io.last = ctx->patch_ios.input_range.io.sid + ctx->key->num_indirect_patch_inputs - 1; + if (ctx->key->input.num_indirect_generic > 0) + ctx->generic_ios.input_range.io.last = ctx->generic_ios.input_range.io.sid + ctx->key->input.num_indirect_generic - 1; + if (ctx->key->input.num_indirect_patch > 0) + ctx->patch_ios.input_range.io.last = ctx->patch_ios.input_range.io.sid + ctx->key->input.num_indirect_patch - 1; } snprintf(ctx->patch_ios.input_range.io.glsl_name, 64, "%s_p%d", get_stage_input_name_prefix(ctx, ctx->prog_type), ctx->patch_ios.input_range.io.sid); @@ -4425,8 +4435,8 @@ void rewrite_io_ranged(struct dump_ctx *ctx) } if ((ctx->info.indirect_files & (1 << TGSI_FILE_OUTPUT)) || - ctx->key->num_indirect_generic_outputs || - ctx->key->num_indirect_patch_outputs) { + ctx->key->output.num_indirect_generic || + ctx->key->output.num_indirect_patch) { for (uint i = 0; i < ctx->num_outputs; ++i) { if (ctx->outputs[i].name == TGSI_SEMANTIC_PATCH) { @@ -4571,7 +4581,7 @@ void emit_fs_clipdistance_load(const struct dump_ctx *ctx, if (!ctx->fs_uses_clipdist_input) return; - int prev_num = ctx->key->prev_stage_num_clip_out + ctx->key->prev_stage_num_cull_out; + int prev_num = ctx->key->input.num_clip + ctx->key->input.num_cull; int ndists; const char *prefix=""; @@ -4595,12 +4605,12 @@ void emit_fs_clipdistance_load(const struct dump_ctx *ctx, } bool is_cull = false; if (prev_num > 0) { - if (i >= ctx->key->prev_stage_num_clip_out && i < prev_num) + if (i >= ctx->key->input.num_clip && i < prev_num) is_cull = true; } const char *clip_cull = is_cull ? "Cull" : "Clip"; emit_buff(glsl_strbufs, "clip_dist_temp[%d].%c = %sgl_%sDistance[%d];\n", clipidx, wm, prefix, clip_cull, - is_cull ? i - ctx->key->prev_stage_num_clip_out : i); + is_cull ? i - ctx->key->input.num_clip : i); } } @@ -4627,13 +4637,12 @@ static bool apply_prev_layout(const struct vrend_shader_key *key, if (io->name == TGSI_SEMANTIC_GENERIC || io->name == TGSI_SEMANTIC_PATCH) { const struct vrend_layout_info *layout = key->prev_stage_generic_and_patch_outputs_layout; - for (unsigned generic_index = 0; generic_index < key->num_prev_generic_and_patch_outputs; ++generic_index, ++layout) { + for (unsigned generic_index = 0; generic_index < key->input.num_generic_and_patch; ++generic_index, ++layout) { bool already_found_one = false; /* Identify by sid and arrays_id */ if (io->sid == layout->sid && (io->array_id == layout->array_id)) { - unsigned new_mask = io->usage_mask; /* We have already one IO with the same SID and arrays ID, so we need to duplicate it */ if (already_found_one) { @@ -4649,11 +4658,11 @@ static bool apply_prev_layout(const struct vrend_shader_key *key, } if (already_found_one) { - new_mask = io->usage_mask = (uint8_t)layout->usage_mask; + io->usage_mask = (uint8_t)layout->usage_mask; io->layout_location = layout->location; io->array_id = layout->array_id; - u_bit_scan_consecutive_range(&new_mask, &io->swizzle_offset, &io->num_components); + get_swizzle_offset_and_num_components(io); require_enhanced_layouts |= io->swizzle_offset > 0; if (io->num_components == 1) io->override_no_wm = true; @@ -4763,7 +4772,7 @@ static void handle_io_arrays(struct dump_ctx *ctx) if (ctx->num_inputs > 0) if (evaluate_layout_overlays(ctx->num_inputs, ctx->inputs, get_stage_input_name_prefix(ctx, ctx->prog_type), - ctx->key->coord_replace)) { + ctx->key->fs.coord_replace)) { require_enhanced_layouts = true; } @@ -4779,7 +4788,7 @@ static void handle_io_arrays(struct dump_ctx *ctx) rewrite_io_ranged(ctx); rewrite_components(ctx->num_inputs, ctx->inputs, get_stage_input_name_prefix(ctx, ctx->prog_type), - ctx->key->coord_replace, true); + ctx->key->fs.coord_replace, true); rewrite_components(ctx->num_outputs, ctx->outputs, get_stage_output_name_prefix(ctx->prog_type), 0, true); @@ -5473,7 +5482,7 @@ static void emit_header(const struct dump_ctx *ctx, struct vrend_glsl_strbufs *g } if (logiop_require_inout(ctx->key)) { - if (ctx->key->fs_logicop_emulate_coherent) + if (ctx->cfg->has_fbfetch_coherent) emit_ext(glsl_strbufs, "EXT_shader_framebuffer_fetch", "require"); else emit_ext(glsl_strbufs, "EXT_shader_framebuffer_fetch_non_coherent", "require"); @@ -5524,7 +5533,7 @@ static void emit_header(const struct dump_ctx *ctx, struct vrend_glsl_strbufs *g if (ctx->ubo_used_mask) emit_ext(glsl_strbufs, "ARB_uniform_buffer_object", "require"); - if (ctx->num_cull_dist_prop || ctx->key->prev_stage_num_cull_out) + if (ctx->num_cull_dist_prop || ctx->key->input.num_cull) emit_ext(glsl_strbufs, "ARB_cull_distance", "require"); if (ctx->ssbo_used_mask) emit_ext(glsl_strbufs, "ARB_shader_storage_buffer_object", "require"); @@ -6014,10 +6023,10 @@ static void emit_ios_indirect_generics_input(const struct dump_ctx *ctx, if (ctx->generic_ios.input_range.used) { int size = ctx->generic_ios.input_range.io.last - ctx->generic_ios.input_range.io.sid + 1; assert(size < 256 && size >= 0); - if (size < ctx->key->num_indirect_generic_inputs) { + if (size < ctx->key->input.num_indirect_generic) { VREND_DEBUG(dbg_shader, NULL, "WARNING: shader key indicates less indirect inputs" " (%d) then are actually used (%d)\n", - ctx->key->num_indirect_generic_inputs, size); + ctx->key->input.num_indirect_generic, size); } if (prefer_generic_io_block(ctx, io_in)) { @@ -6084,10 +6093,12 @@ emit_ios_generic(const struct dump_ctx *ctx, postfix); if (io->name == TGSI_SEMANTIC_GENERIC) { - if (iot == io_in) + assert(io->sid < 32); + if (iot == io_in) { generic_ios->inputs_emitted_mask |= 1 << io->sid; - else + } else { generic_ios->outputs_emitted_mask |= 1 << io->sid; + } } } else { @@ -6293,7 +6304,7 @@ static void emit_ios_vs(const struct dump_ctx *ctx, if (ctx->key->clip_plane_enable) { emit_hdr(glsl_strbufs, "uniform vec4 clipp[8];\n"); } - if ((ctx->key->gs_present || ctx->key->tes_present) && ctx->key->next_stage_pervertex_in) { + if ((ctx->key->gs_present || ctx->key->tes_present) && ctx->key->output.use_pervertex) { emit_hdrf(glsl_strbufs, "out gl_PerVertex {\n vec4 gl_Position;\n %s%s};\n", clip_buf, cull_buf); } else { emit_hdrf(glsl_strbufs, "%s%s", clip_buf, cull_buf); @@ -6326,7 +6337,7 @@ static void emit_ios_fs(const struct dump_ctx *ctx, uint32_t i; if (fs_emit_layout(ctx)) { - bool upper_left = !(ctx->fs_coord_origin ^ ctx->key->invert_fs_origin); + bool upper_left = !(ctx->fs_coord_origin ^ ctx->key->fs.invert_origin); char comma = (upper_left && ctx->fs_pixel_center) ? ',' : ' '; if (!ctx->cfg->use_gles) @@ -6362,7 +6373,7 @@ static void emit_ios_fs(const struct dump_ctx *ctx, } if (ctx->cfg->use_gles && !ctx->winsys_adjust_y_emitted && - (ctx->key->coord_replace & (1 << ctx->inputs[i].sid))) { + (ctx->key->fs.coord_replace & (1 << ctx->inputs[i].sid))) { *winsys_adjust_y_emitted = true; emit_hdr(glsl_strbufs, "uniform float winsys_adjust_y;\n"); } @@ -6387,18 +6398,18 @@ static void emit_ios_fs(const struct dump_ctx *ctx, if (ctx->write_all_cbufs) { const char* type = "vec4"; - if (ctx->key->cbufs_unsigned_int_bitmask) + if (ctx->key->fs.cbufs_unsigned_int_bitmask) type = "uvec4"; - else if (ctx->key->cbufs_signed_int_bitmask) + else if (ctx->key->fs.cbufs_signed_int_bitmask) type = "ivec4"; for (i = 0; i < (uint32_t)ctx->cfg->max_draw_buffers; i++) { if (ctx->cfg->use_gles) { - if (ctx->key->fs_logicop_enabled) + if (ctx->key->fs.logicop_enabled) emit_hdrf(glsl_strbufs, "%s fsout_tmp_c%d;\n", type, i); if (logiop_require_inout(ctx->key)) { - const char *noncoherent = ctx->key->fs_logicop_emulate_coherent ? "" : ", noncoherent"; + const char *noncoherent = ctx->cfg->has_fbfetch_coherent ? "" : ", noncoherent"; emit_hdrf(glsl_strbufs, "layout (location=%d%s) inout highp %s fsout_c%d;\n", i, noncoherent, type, i); } else emit_hdrf(glsl_strbufs, "layout (location=%d) out %s fsout_c%d;\n", i, @@ -6436,14 +6447,14 @@ static void emit_ios_fs(const struct dump_ctx *ctx, } if (ctx->num_in_clip_dist) { - if (ctx->key->prev_stage_num_clip_out) { - emit_hdrf(glsl_strbufs, "in float gl_ClipDistance[%d];\n", ctx->key->prev_stage_num_clip_out); - } else if (ctx->num_in_clip_dist > 4 && !ctx->key->prev_stage_num_cull_out) { + if (ctx->key->input.num_clip) { + emit_hdrf(glsl_strbufs, "in float gl_ClipDistance[%d];\n", ctx->key->input.num_clip); + } else if (ctx->num_in_clip_dist > 4 && !ctx->key->input.num_cull) { emit_hdrf(glsl_strbufs, "in float gl_ClipDistance[%d];\n", ctx->num_in_clip_dist); } - if (ctx->key->prev_stage_num_cull_out) { - emit_hdrf(glsl_strbufs, "in float gl_CullDistance[%d];\n", ctx->key->prev_stage_num_cull_out); + if (ctx->key->input.num_cull) { + emit_hdrf(glsl_strbufs, "in float gl_CullDistance[%d];\n", ctx->key->input.num_cull); } if(ctx->fs_uses_clipdist_input) emit_hdr(glsl_strbufs, "vec4 clip_dist_temp[2];\n"); @@ -6515,8 +6526,8 @@ static void emit_ios_geom(const struct dump_ctx *ctx, char clip_var[64] = ""; char cull_var[64] = ""; - clip_dist = ctx->key->prev_stage_num_clip_out ? ctx->key->prev_stage_num_clip_out : ctx->num_in_clip_dist; - cull_dist = ctx->key->prev_stage_num_cull_out; + clip_dist = ctx->key->input.num_clip ? ctx->key->input.num_clip : ctx->num_in_clip_dist; + cull_dist = ctx->key->input.num_cull; if (clip_dist) snprintf(clip_var, 64, "float gl_ClipDistance[%d];\n", clip_dist); @@ -6592,8 +6603,8 @@ static void emit_ios_tcs(const struct dump_ctx *ctx, int clip_dist, cull_dist; char clip_var[64] = "", cull_var[64] = ""; - clip_dist = ctx->key->prev_stage_num_clip_out ? ctx->key->prev_stage_num_clip_out : ctx->num_in_clip_dist; - cull_dist = ctx->key->prev_stage_num_cull_out; + clip_dist = ctx->key->input.num_clip ? ctx->key->input.num_clip : ctx->num_in_clip_dist; + cull_dist = ctx->key->input.num_cull; if (clip_dist) snprintf(clip_var, 64, "float gl_ClipDistance[%d];\n", clip_dist); @@ -6603,7 +6614,7 @@ static void emit_ios_tcs(const struct dump_ctx *ctx, *has_pervertex = true; emit_hdrf(glsl_strbufs, "in gl_PerVertex {\n vec4 gl_Position; \n %s%s} gl_in[];\n", clip_var, cull_var); } - if (ctx->num_clip_dist && ctx->key->next_stage_pervertex_in) { + if (ctx->num_clip_dist && ctx->key->output.use_pervertex) { emit_hdrf(glsl_strbufs, "out gl_PerVertex {\n vec4 gl_Position;\n float gl_ClipDistance[%d];\n} gl_out[];\n", ctx->num_clip_dist); emit_hdr(glsl_strbufs, "vec4 clip_dist_temp[2];\n"); } @@ -6651,8 +6662,8 @@ static void emit_ios_tes(const struct dump_ctx *ctx, int clip_dist, cull_dist; char clip_var[64] = "", cull_var[64] = ""; - clip_dist = ctx->key->prev_stage_num_clip_out ? ctx->key->prev_stage_num_clip_out : ctx->num_in_clip_dist; - cull_dist = ctx->key->prev_stage_num_cull_out; + clip_dist = ctx->key->input.num_clip ? ctx->key->input.num_clip : ctx->num_in_clip_dist; + cull_dist = ctx->key->input.num_cull; if (clip_dist) snprintf(clip_var, 64, "float gl_ClipDistance[%d];\n", clip_dist); @@ -6662,7 +6673,7 @@ static void emit_ios_tes(const struct dump_ctx *ctx, *has_pervertex = true; emit_hdrf(glsl_strbufs, "in gl_PerVertex {\n vec4 gl_Position; \n %s%s} gl_in[];\n", clip_var, cull_var); } - if (ctx->num_clip_dist && ctx->key->next_stage_pervertex_in) { + if (ctx->num_clip_dist && ctx->key->output.use_pervertex) { emit_hdrf(glsl_strbufs, "out gl_PerVertex {\n vec4 gl_Position;\n float gl_ClipDistance[%d];\n} gl_out[];\n", ctx->num_clip_dist); emit_hdr(glsl_strbufs, "vec4 clip_dist_temp[2];\n"); } @@ -6748,7 +6759,7 @@ static int emit_ios(const struct dump_ctx *ctx, return glsl_ver_required; } -static boolean fill_fragment_interpolants(const struct dump_ctx *ctx, struct vrend_shader_info *sinfo) +static boolean fill_fragment_interpolants(const struct dump_ctx *ctx, struct vrend_fs_shader_info *fs_info) { uint32_t i, index = 0; @@ -6764,10 +6775,10 @@ static boolean fill_fragment_interpolants(const struct dump_ctx *ctx, struct vre vrend_printf( "mismatch in number of interps %d %d\n", index, ctx->num_interps); return true; } - sinfo->interpinfo[index].semantic_name = ctx->inputs[i].name; - sinfo->interpinfo[index].semantic_index = ctx->inputs[i].sid; - sinfo->interpinfo[index].interpolate = ctx->inputs[i].interpolate; - sinfo->interpinfo[index].location = ctx->inputs[i].location; + fs_info->interpinfo[index].semantic_name = ctx->inputs[i].name; + fs_info->interpinfo[index].semantic_index = ctx->inputs[i].sid; + fs_info->interpinfo[index].interpolate = ctx->inputs[i].interpolate; + fs_info->interpinfo[index].location = ctx->inputs[i].location; index++; } return true; @@ -6775,26 +6786,12 @@ static boolean fill_fragment_interpolants(const struct dump_ctx *ctx, struct vre static boolean fill_interpolants(const struct dump_ctx *ctx, struct vrend_shader_info *sinfo) { - boolean ret; - if (!ctx->num_interps) return true; if (ctx->prog_type == TGSI_PROCESSOR_VERTEX || ctx->prog_type == TGSI_PROCESSOR_GEOMETRY) return true; - free(sinfo->interpinfo); - sinfo->interpinfo = calloc(ctx->num_interps, sizeof(struct vrend_interp_info)); - if (!sinfo->interpinfo) - return false; - - ret = fill_fragment_interpolants(ctx, sinfo); - if (ret == false) - goto out_fail; - - return true; - out_fail: - free(sinfo->interpinfo); - return false; + return fill_fragment_interpolants(ctx, &sinfo->fs_info); } static boolean analyze_instruction(struct tgsi_iterate_context *iter, @@ -6832,11 +6829,11 @@ static boolean analyze_instruction(struct tgsi_iterate_context *iter, static void fill_sinfo(const struct dump_ctx *ctx, struct vrend_shader_info *sinfo) { sinfo->num_ucp = ctx->key->clip_plane_enable ? 8 : 0; - sinfo->has_pervertex_in = ctx->has_pervertex; - sinfo->has_sample_input = ctx->has_sample_input; + sinfo->in.use_pervertex = ctx->has_pervertex; + sinfo->fs_info.has_sample_input = ctx->has_sample_input; bool has_prop = (ctx->num_clip_dist_prop + ctx->num_cull_dist_prop) > 0; - sinfo->num_clip_out = has_prop ? ctx->num_clip_dist_prop : (ctx->num_clip_dist ? ctx->num_clip_dist : 8); - sinfo->num_cull_out = has_prop ? ctx->num_cull_dist_prop : 0; + sinfo->out.num_clip = has_prop ? ctx->num_clip_dist_prop : (ctx->num_clip_dist ? ctx->num_clip_dist : 8); + sinfo->out.num_cull = has_prop ? ctx->num_cull_dist_prop : 0; sinfo->samplers_used_mask = ctx->samplers_used; sinfo->images_used_mask = ctx->images_used_mask; sinfo->num_consts = ctx->num_consts; @@ -6844,23 +6841,23 @@ static void fill_sinfo(const struct dump_ctx *ctx, struct vrend_shader_info *sin sinfo->ssbo_used_mask = ctx->ssbo_used_mask; - sinfo->ubo_indirect = ctx->info.dimension_indirect_files & (1 << TGSI_FILE_CONSTANT); + sinfo->ubo_indirect = !!(ctx->info.dimension_indirect_files & (1 << TGSI_FILE_CONSTANT)); if (ctx->generic_ios.input_range.used) - sinfo->num_indirect_generic_inputs = ctx->generic_ios.input_range.io.last - ctx->generic_ios.input_range.io.sid + 1; + sinfo->in.num_indirect_generic = ctx->generic_ios.input_range.io.last - ctx->generic_ios.input_range.io.sid + 1; if (ctx->patch_ios.input_range.used) - sinfo->num_indirect_patch_inputs = ctx->patch_ios.input_range.io.last - ctx->patch_ios.input_range.io.sid + 1; + sinfo->in.num_indirect_patch = ctx->patch_ios.input_range.io.last - ctx->patch_ios.input_range.io.sid + 1; if (ctx->generic_ios.output_range.used) - sinfo->num_indirect_generic_outputs = ctx->generic_ios.output_range.io.last - ctx->generic_ios.output_range.io.sid + 1; + sinfo->out.num_indirect_generic = ctx->generic_ios.output_range.io.last - ctx->generic_ios.output_range.io.sid + 1; if (ctx->patch_ios.output_range.used) - sinfo->num_indirect_patch_outputs = ctx->patch_ios.output_range.io.last - ctx->patch_ios.output_range.io.sid + 1; + sinfo->out.num_indirect_patch = ctx->patch_ios.output_range.io.last - ctx->patch_ios.output_range.io.sid + 1; sinfo->num_inputs = ctx->num_inputs; - sinfo->num_interps = ctx->num_interps; + sinfo->fs_info.num_interps = ctx->num_interps; sinfo->num_outputs = ctx->num_outputs; sinfo->shadow_samp_mask = ctx->shadow_samp_mask; - sinfo->glsl_ver = ctx->glsl_ver_required; + sinfo->fs_info.glsl_ver = ctx->glsl_ver_required; sinfo->gs_out_prim = ctx->gs_out_prim; sinfo->tes_prim = ctx->tes_prim_mode; sinfo->tes_point_mode = ctx->tes_point_mode; @@ -6878,16 +6875,16 @@ static void fill_sinfo(const struct dump_ctx *ctx, struct vrend_shader_info *sin * to the next shader stage. mesa/tgsi doesn't provide this information for * TCS, TES, and GEOM shaders. */ - sinfo->guest_sent_io_arrays = ctx->guest_sent_io_arrays; - sinfo->num_generic_and_patch_outputs = 0; + sinfo->out.guest_sent_io_arrays = ctx->guest_sent_io_arrays; + sinfo->out.num_generic_and_patch = 0; for(unsigned i = 0; i < ctx->num_outputs; i++) { - sinfo->generic_outputs_layout[sinfo->num_generic_and_patch_outputs].name = ctx->outputs[i].name; - sinfo->generic_outputs_layout[sinfo->num_generic_and_patch_outputs].sid = ctx->outputs[i].sid; - sinfo->generic_outputs_layout[sinfo->num_generic_and_patch_outputs].location = ctx->outputs[i].layout_location; - sinfo->generic_outputs_layout[sinfo->num_generic_and_patch_outputs].array_id = ctx->outputs[i].array_id; - sinfo->generic_outputs_layout[sinfo->num_generic_and_patch_outputs].usage_mask = ctx->outputs[i].usage_mask; - if (ctx->outputs[i].name == TGSI_SEMANTIC_GENERIC || ctx->outputs[i].name == TGSI_SEMANTIC_PATCH) { - sinfo->num_generic_and_patch_outputs++; + if (ctx->outputs[i].name == TGSI_SEMANTIC_GENERIC || ctx->outputs[i].name == TGSI_SEMANTIC_PATCH) { + sinfo->generic_outputs_layout[sinfo->out.num_generic_and_patch].name = ctx->outputs[i].name; + sinfo->generic_outputs_layout[sinfo->out.num_generic_and_patch].sid = ctx->outputs[i].sid; + sinfo->generic_outputs_layout[sinfo->out.num_generic_and_patch].location = ctx->outputs[i].layout_location; + sinfo->generic_outputs_layout[sinfo->out.num_generic_and_patch].array_id = ctx->outputs[i].array_id; + sinfo->generic_outputs_layout[sinfo->out.num_generic_and_patch].usage_mask = ctx->outputs[i].usage_mask; + sinfo->out.num_generic_and_patch++; } } @@ -6901,7 +6898,7 @@ static void fill_sinfo(const struct dump_ctx *ctx, struct vrend_shader_info *sin free(sinfo->image_arrays); sinfo->image_arrays = ctx->image_arrays; sinfo->num_image_arrays = ctx->num_image_arrays; - sinfo->generic_inputs_emitted_mask = ctx->generic_ios.inputs_emitted_mask; + sinfo->in.generic_emitted_mask = ctx->generic_ios.inputs_emitted_mask; for (unsigned i = 0; i < ctx->num_outputs; ++i) { if (ctx->outputs[i].invariant) @@ -6941,7 +6938,7 @@ static bool vrend_patch_vertex_shader_interpolants(MAYBE_UNUSED const struct vre const struct vrend_shader_cfg *cfg, struct vrend_strarray *prog_strings, const struct vrend_shader_info *vs_info, - const struct vrend_shader_info *fs_info, + const struct vrend_fs_shader_info *fs_info, const char *oprefix, bool flatshade); @@ -6985,8 +6982,8 @@ bool vrend_convert_shader(const struct vrend_context *rctx, ctx.ssbo_atomic_array_base = 0xffffffff; ctx.has_sample_input = false; ctx.req_local_mem = req_local_mem; - ctx.guest_sent_io_arrays = key->guest_sent_io_arrays; - ctx.generic_ios.outputs_expected_mask = key->generic_outputs_expected_mask; + ctx.guest_sent_io_arrays = key->input.guest_sent_io_arrays; + ctx.generic_ios.outputs_expected_mask = key->output.generic_emitted_mask; tgsi_scan_shader(tokens, &ctx.info); /* if we are in core profile mode we should use GLSL 1.40 */ @@ -7119,7 +7116,7 @@ static bool vrend_patch_vertex_shader_interpolants(MAYBE_UNUSED const struct vre const struct vrend_shader_cfg *cfg, struct vrend_strarray *prog_strings, const struct vrend_shader_info *vs_info, - const struct vrend_shader_info *fs_info, + const struct vrend_fs_shader_info *fs_info, const char *oprefix, bool flatshade) { int i; @@ -7128,7 +7125,7 @@ static bool vrend_patch_vertex_shader_interpolants(MAYBE_UNUSED const struct vre if (!vs_info || !fs_info) return true; - if (!fs_info->interpinfo) + if (!fs_info->num_interps) return true; if (fs_info->has_sample_input) { @@ -7216,7 +7213,7 @@ iter_vs_declaration(struct tgsi_iterate_context *iter, ctx->inputs[i].last = decl->Range.Last; ctx->inputs[i].array_id = decl->Declaration.Array ? decl->Array.ArrayID : 0; ctx->inputs[i].usage_mask = mask_temp = decl->Declaration.UsageMask; - u_bit_scan_consecutive_range(&mask_temp, &ctx->inputs[i].swizzle_offset, &ctx->inputs[i].num_components); + get_swizzle_offset_and_num_components(&ctx->inputs[i]); ctx->inputs[i].glsl_predefined_no_emit = false; ctx->inputs[i].glsl_no_index = false; diff --git a/src/vrend_shader.h b/src/vrend_shader.h index fd2356b7e33152820f8333f1ba4c241baebeb047..68f710a97d4f15e7b8526e954d4608b50f59c132 100644 --- a/src/vrend_shader.h +++ b/src/vrend_shader.h @@ -54,10 +54,10 @@ enum gl_advanced_blend_mode /* need to store patching info for interpolation */ struct vrend_interp_info { - int semantic_name; - int semantic_index; - int interpolate; - unsigned location; + unsigned semantic_name : 6; + unsigned semantic_index : 16; + unsigned interpolate : 3; + unsigned location : 3; }; struct vrend_array { @@ -66,111 +66,125 @@ struct vrend_array { }; struct vrend_layout_info { - unsigned name; - int sid; - int location; - int array_id; - int usage_mask; + unsigned name : 6; + unsigned sid : 16 ; + unsigned location : 16 ; + unsigned array_id : 16 ; + unsigned usage_mask : 5; }; +struct vrend_fs_shader_info { + int num_interps; + int glsl_ver; + bool has_sample_input; + struct vrend_interp_info interpinfo[PIPE_MAX_SHADER_INPUTS]; +}; + +struct vrend_shader_info_out { + uint64_t num_clip : 8; + uint64_t num_cull : 8; + uint64_t num_indirect_generic : 8; + uint64_t num_indirect_patch : 8; + uint64_t num_generic_and_patch : 8; + uint64_t guest_sent_io_arrays : 1; +}; + +struct vrend_shader_info_in { + uint64_t generic_emitted_mask : 32; + uint64_t num_indirect_generic : 8; + uint64_t num_indirect_patch : 8; + uint64_t use_pervertex : 1; +}; + + struct vrend_shader_info { + uint64_t invariant_outputs; + struct vrend_shader_info_out out; + struct vrend_shader_info_in in; + + struct vrend_layout_info generic_outputs_layout[64]; + struct vrend_array *sampler_arrays; + struct vrend_array *image_arrays; + char **so_names; + struct vrend_fs_shader_info fs_info; + struct pipe_stream_output_info so_info; + uint32_t samplers_used_mask; uint32_t images_used_mask; uint32_t ubo_used_mask; uint32_t ssbo_used_mask; - uint32_t num_generic_and_patch_outputs; - bool has_pervertex_in; - bool guest_sent_io_arrays; - struct vrend_layout_info generic_outputs_layout[64]; + uint32_t shadow_samp_mask; + uint32_t attrib_input_mask; + uint32_t fs_blend_equation_advanced; + int num_consts; int num_inputs; - int num_interps; int num_outputs; - bool ubo_indirect; - uint8_t num_indirect_generic_outputs; - uint8_t num_indirect_patch_outputs; - uint8_t num_indirect_generic_inputs; - uint8_t num_indirect_patch_inputs; - uint32_t generic_inputs_emitted_mask; int num_ucp; - int glsl_ver; - bool has_sample_input; - uint8_t num_clip_out; - uint8_t num_cull_out; - uint32_t shadow_samp_mask; int gs_out_prim; int tes_prim; - bool tes_point_mode; - uint32_t attrib_input_mask; - uint32_t fs_blend_equation_advanced; - - struct vrend_array *sampler_arrays; int num_sampler_arrays; - - struct vrend_array *image_arrays; int num_image_arrays; - struct pipe_stream_output_info so_info; - - struct vrend_interp_info *interpinfo; - char **so_names; - uint64_t invariant_outputs; + uint8_t ubo_indirect : 1; + uint8_t tes_point_mode : 1; }; struct vrend_shader_key { - bool fs_prim_is_points; - uint32_t coord_replace; - bool invert_fs_origin; - bool pstipple_tex; - bool add_alpha_test; - bool color_two_side; - uint8_t alpha_test; - uint8_t clip_plane_enable; - bool gs_present; - bool tcs_present; - bool tes_present; - bool flatshade; - bool guest_sent_io_arrays; - bool fs_logicop_enabled; - bool fs_logicop_emulate_coherent; - enum pipe_logicop fs_logicop_func; - uint8_t surface_component_bits[PIPE_MAX_COLOR_BUFS]; - - uint32_t num_prev_generic_and_patch_outputs; + uint64_t force_invariant_inputs; + + struct vrend_fs_shader_info *fs_info; + struct vrend_shader_info_out input; + struct vrend_shader_info_in output; struct vrend_layout_info prev_stage_generic_and_patch_outputs_layout[64]; - uint8_t prev_stage_num_clip_out; - uint8_t prev_stage_num_cull_out; - bool next_stage_pervertex_in; - uint32_t cbufs_are_a8_bitmask; - uint32_t cbufs_signed_int_bitmask; - uint32_t cbufs_unsigned_int_bitmask; - uint32_t attrib_signed_int_bitmask; - uint32_t attrib_unsigned_int_bitmask; - uint8_t num_indirect_generic_outputs; - uint8_t num_indirect_patch_outputs; - uint8_t num_indirect_generic_inputs; - uint8_t num_indirect_patch_inputs; - uint32_t generic_outputs_expected_mask; - uint8_t fs_swizzle_output_rgb_to_bgr; - uint64_t force_invariant_inputs; + union { + struct { + uint8_t surface_component_bits[PIPE_MAX_COLOR_BUFS]; + uint32_t coord_replace; + uint8_t swizzle_output_rgb_to_bgr; + uint8_t cbufs_are_a8_bitmask; + uint8_t cbufs_signed_int_bitmask; + uint8_t cbufs_unsigned_int_bitmask; + uint32_t logicop_func : 4; + uint32_t logicop_enabled : 1; + uint32_t prim_is_points : 1; + uint32_t invert_origin : 1; + } fs; + + struct { + uint32_t attrib_signed_int_bitmask; + uint32_t attrib_unsigned_int_bitmask; + } vs; + }; uint32_t compiled_fs_uid; - struct vrend_shader_info *fs_info; + + uint8_t alpha_test; + uint8_t clip_plane_enable; + uint8_t pstipple_tex : 1; + uint8_t add_alpha_test : 1; + uint8_t color_two_side : 1; + uint8_t gs_present : 1; + uint8_t tcs_present : 1; + uint8_t tes_present : 1; + uint8_t flatshade : 1; + }; struct vrend_shader_cfg { - int glsl_version; - int max_draw_buffers; - bool use_gles; - bool use_core_profile; - bool use_explicit_locations; - bool has_arrays_of_arrays; - bool has_gpu_shader5; - bool has_es31_compat; - bool has_conservative_depth; - bool use_integer; - bool has_dual_src_blend; + uint32_t glsl_version : 12; + uint32_t max_draw_buffers : 4; + uint32_t use_gles : 1; + uint32_t use_core_profile : 1; + uint32_t use_explicit_locations : 1; + uint32_t has_arrays_of_arrays : 1; + uint32_t has_gpu_shader5 : 1; + uint32_t has_es31_compat : 1; + uint32_t has_conservative_depth : 1; + uint32_t use_integer : 1; + uint32_t has_dual_src_blend : 1; + uint32_t has_fbfetch_coherent : 1; }; struct vrend_context;