Commit c3432ad8 authored by Marek Olšák's avatar Marek Olšák Committed by Marge Bot

radeonsi: add an option to enable 2x2 coarse shading for non-GUI elements

This is for experiments with VRS.
Acked-by: Pierre-Eric Pelloux-Prayer's avatarPierre-Eric Pelloux-Prayer <pierre-eric.pelloux-prayer@amd.com>
Part-of: <!7646>
parent c13370e8
Pipeline #230378 waiting for manual action with stages
in 20 seconds
......@@ -12,5 +12,6 @@ OPT_BOOL(no_infinite_interp, false, "Kill PS with infinite interp coeff")
OPT_BOOL(clamp_div_by_zero, false, "Clamp div by zero (x / 0 becomes FLT_MAX instead of NaN)")
OPT_BOOL(no_trunc_coord, false, "Always set TRUNC_COORD=0")
OPT_BOOL(shader_culling, false, "Cull primitives in shaders when benefical (without tess and GS)")
OPT_BOOL(vrs2x2, false, "Enable 2x2 coarse shading for non-GUI elements")
#undef OPT_BOOL
......@@ -347,6 +347,7 @@ void si_set_tracked_regs_to_clear_state(struct si_context *ctx)
ctx->tracked_regs.reg_value[SI_TRACKED_PA_CL_CLIP_CNTL] = 0x00090000;
ctx->tracked_regs.reg_value[SI_TRACKED_PA_SC_BINNER_CNTL_0] = 0x00000003;
ctx->tracked_regs.reg_value[SI_TRACKED_DB_DFSM_CONTROL] = 0x00000000;
ctx->tracked_regs.reg_value[SI_TRACKED_DB_VRS_OVERRIDE_CNTL] = 0x00000000;
ctx->tracked_regs.reg_value[SI_TRACKED_PA_CL_GB_VERT_CLIP_ADJ] = 0x3f800000;
ctx->tracked_regs.reg_value[SI_TRACKED_PA_CL_GB_VERT_DISC_ADJ] = 0x3f800000;
ctx->tracked_regs.reg_value[SI_TRACKED_PA_CL_GB_HORZ_CLIP_ADJ] = 0x3f800000;
......
......@@ -1080,6 +1080,9 @@ static struct pipe_screen *radeonsi_screen_create_impl(struct radeon_winsys *ws,
#include "si_debug_options.h"
}
if (sscreen->info.chip_class < GFX10_3)
sscreen->options.vrs2x2 = false;
si_disk_cache_create(sscreen);
/* Determine the number of shader compiler threads. */
......
......@@ -605,12 +605,13 @@ void si_llvm_build_vs_exports(struct si_shader_context *ctx,
bool writes_psize = shader->selector->info.writes_psize && !shader->key.opt.kill_pointsize;
bool pos_writes_edgeflag = shader->selector->info.writes_edgeflag && !shader->key.as_ngg;
bool writes_vrs = ctx->screen->options.vrs2x2;
/* Write the misc vector (point size, edgeflag, layer, viewport). */
if (writes_psize || pos_writes_edgeflag ||
if (writes_psize || pos_writes_edgeflag || writes_vrs ||
shader->selector->info.writes_viewport_index || shader->selector->info.writes_layer) {
pos_args[1].enabled_channels = writes_psize |
(pos_writes_edgeflag << 1) |
((pos_writes_edgeflag | writes_vrs) << 1) |
(shader->selector->info.writes_layer << 2);
pos_args[1].valid_mask = 0; /* EXEC mask */
......@@ -635,6 +636,32 @@ void si_llvm_build_vs_exports(struct si_shader_context *ctx,
pos_args[1].out[1] = ac_to_float(&ctx->ac, edgeflag_value);
}
if (writes_vrs) {
/* Bits [2:3] = VRS rate X
* Bits [4:5] = VRS rate Y
*
* The range is [-2, 1]. Values:
* 1: 2x coarser shading rate in that direction.
* 0: normal shading rate
* -1: 2x finer shading rate (sample shading, not directional)
* -2: 4x finer shading rate (sample shading, not directional)
*
* Sample shading can't go above 8 samples, so both numbers can't be -2
* at the same time.
*/
LLVMValueRef rates = LLVMConstInt(ctx->ac.i32, (1 << 2) | (1 << 4), 0);
/* If Pos.W != 1 (typical for non-GUI elements), use 2x2 coarse shading. */
rates = LLVMBuildSelect(ctx->ac.builder,
LLVMBuildFCmp(ctx->ac.builder, LLVMRealUNE,
pos_args[0].out[3], ctx->ac.f32_1, ""),
rates, ctx->ac.i32_0, "");
LLVMValueRef v = ac_to_integer(&ctx->ac, pos_args[1].out[1]);
v = LLVMBuildOr(ctx->ac.builder, v, rates, "");
pos_args[1].out[1] = ac_to_float(&ctx->ac, v);
}
if (ctx->screen->info.chip_class >= GFX9) {
/* GFX9 has the layer in out.z[10:0] and the viewport
* index in out.z[19:16].
......
......@@ -751,7 +751,8 @@ static void si_emit_clip_regs(struct si_context *sctx)
unsigned initial_cdw = sctx->gfx_cs->current.cdw;
unsigned pa_cl_cntl = S_02881C_VS_OUT_CCDIST0_VEC_ENA((vs_out_mask & 0x0F) != 0) |
S_02881C_VS_OUT_CCDIST1_VEC_ENA((vs_out_mask & 0xF0) != 0) |
S_02881C_BYPASS_VTX_RATE_COMBINER(sctx->chip_class >= GFX10_3) |
S_02881C_BYPASS_VTX_RATE_COMBINER(sctx->chip_class >= GFX10_3 &&
!sctx->screen->options.vrs2x2) |
S_02881C_BYPASS_PRIM_RATE_COMBINER(sctx->chip_class >= GFX10_3) |
clipdist_mask | (culldist_mask << 8);
......@@ -1407,6 +1408,21 @@ static void si_emit_db_render_state(struct si_context *sctx)
radeon_opt_set_context_reg(sctx, R_02880C_DB_SHADER_CONTROL, SI_TRACKED_DB_SHADER_CONTROL,
db_shader_control);
if (sctx->screen->options.vrs2x2) {
/* If the shader is using discard, turn off coarse shading because
* discard at 2x2 pixel granularity degrades quality too much.
*
* MIN allows sample shading but not coarse shading.
*/
unsigned mode = G_02880C_KILL_ENABLE(db_shader_control) ? V_028064_VRS_COMB_MODE_MIN
: V_028064_VRS_COMB_MODE_PASSTHRU;
radeon_opt_set_context_reg(sctx, R_028064_DB_VRS_OVERRIDE_CNTL,
SI_TRACKED_DB_VRS_OVERRIDE_CNTL,
S_028064_VRS_OVERRIDE_RATE_COMBINER_MODE(mode) |
S_028064_VRS_OVERRIDE_RATE_X(0) |
S_028064_VRS_OVERRIDE_RATE_Y(0));
}
if (initial_cdw != sctx->gfx_cs->current.cdw)
sctx->context_roll = true;
}
......@@ -5366,9 +5382,18 @@ void si_init_cs_preamble_state(struct si_context *sctx, bool uses_reg_shadowing)
if (sctx->chip_class >= GFX10_3) {
si_pm4_set_reg(pm4, R_028750_SX_PS_DOWNCONVERT_CONTROL, 0xff);
/* This allows sample shading. */
/* The rate combiners have no effect if they are disabled like this:
* VERTEX_RATE: BYPASS_VTX_RATE_COMBINER = 1
* PRIMITIVE_RATE: BYPASS_PRIM_RATE_COMBINER = 1
* HTILE_RATE: VRS_HTILE_ENCODING = 0
* SAMPLE_ITER: PS_ITER_SAMPLE = 0
*
* Use OVERRIDE, which will ignore results from previous combiners.
* (e.g. enabled sample shading overrides the vertex rate)
*/
si_pm4_set_reg(pm4, R_028848_PA_CL_VRS_CNTL,
S_028848_SAMPLE_ITER_COMBINER_MODE(1));
S_028848_VERTEX_RATE_COMBINER_MODE(V_028848_VRS_COMB_MODE_OVERRIDE) |
S_028848_SAMPLE_ITER_COMBINER_MODE(V_028848_VRS_COMB_MODE_OVERRIDE));
}
sctx->cs_preamble_state = pm4;
......
......@@ -250,7 +250,8 @@ struct si_shader_data {
#define SI_TRACKED_PA_CL_VS_OUT_CNTL__VS_MASK \
(S_02881C_USE_VTX_POINT_SIZE(1) | S_02881C_USE_VTX_EDGE_FLAG(1) | \
S_02881C_USE_VTX_RENDER_TARGET_INDX(1) | S_02881C_USE_VTX_VIEWPORT_INDX(1) | \
S_02881C_VS_OUT_MISC_VEC_ENA(1) | S_02881C_VS_OUT_MISC_SIDE_BUS_ENA(1))
S_02881C_VS_OUT_MISC_VEC_ENA(1) | S_02881C_VS_OUT_MISC_SIDE_BUS_ENA(1) | \
S_02881C_USE_VTX_VRS_RATE(1))
/* The list of registers whose emitted values are remembered by si_context. */
enum si_tracked_reg
......@@ -283,6 +284,7 @@ enum si_tracked_reg
SI_TRACKED_PA_SC_BINNER_CNTL_0,
SI_TRACKED_DB_DFSM_CONTROL,
SI_TRACKED_DB_VRS_OVERRIDE_CNTL,
SI_TRACKED_PA_CL_GB_VERT_CLIP_ADJ, /* 4 consecutive registers */
SI_TRACKED_PA_CL_GB_VERT_DISC_ADJ,
......
......@@ -89,6 +89,12 @@ void si_get_ir_cache_key(struct si_shader_selector *sel, bool ngg, bool es,
shader_variant_flags |= 1 << 8;
if (sel->screen->debug_flags & DBG(GISEL))
shader_variant_flags |= 1 << 9;
if ((sel->info.stage == MESA_SHADER_VERTEX ||
sel->info.stage == MESA_SHADER_TESS_EVAL ||
sel->info.stage == MESA_SHADER_GEOMETRY) &&
!es &&
sel->screen->options.vrs2x2)
shader_variant_flags |= 1 << 10;
struct mesa_sha1 ctx;
_mesa_sha1_init(&ctx);
......@@ -1056,9 +1062,11 @@ static unsigned si_get_vs_out_cntl(const struct si_shader_selector *sel,
writes_psize &= !shader->key.opt.kill_pointsize;
bool misc_vec_ena = writes_psize || (sel->info.writes_edgeflag && !ngg) ||
sel->screen->options.vrs2x2 ||
sel->info.writes_layer || sel->info.writes_viewport_index;
return S_02881C_USE_VTX_POINT_SIZE(writes_psize) |
S_02881C_USE_VTX_EDGE_FLAG(sel->info.writes_edgeflag && !ngg) |
S_02881C_USE_VTX_VRS_RATE(sel->screen->options.vrs2x2) |
S_02881C_USE_VTX_RENDER_TARGET_INDX(sel->info.writes_layer) |
S_02881C_USE_VTX_VIEWPORT_INDX(sel->info.writes_viewport_index) |
S_02881C_VS_OUT_MISC_VEC_ENA(misc_vec_ena) |
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment