Commit 04122532 authored by Marek Olšák's avatar Marek Olšák

radeonsi: invalidate caches at the beginning of the prim discard compute IB

Acked-by: default avatarNicolai Hähnle <nicolai.haehnle@amd.com>
parent 9f505ce2
......@@ -1196,6 +1196,17 @@ void si_dispatch_prim_discard_cs_and_draw(struct si_context *sctx,
}
/* 2) IB initialization. */
/* This needs to be done at the beginning of IBs due to possible
* TTM buffer moves in the kernel.
*/
si_emit_surface_sync(sctx, cs,
S_0085F0_TC_ACTION_ENA(1) |
S_0085F0_TCL1_ACTION_ENA(1) |
S_0301F0_TC_WB_ACTION_ENA(sctx->chip_class >= GFX8) |
S_0085F0_SH_ICACHE_ACTION_ENA(1) |
S_0085F0_SH_KCACHE_ACTION_ENA(1));
/* Restore the GDS prim restart counter if needed. */
if (sctx->preserve_prim_restart_gds_at_flush) {
si_cp_copy_data(sctx, cs,
......
......@@ -604,6 +604,8 @@ void si_shader_selector_key_vs(struct si_context *sctx,
struct si_vs_prolog_bits *prolog_key);
/* si_state_draw.c */
void si_emit_surface_sync(struct si_context *sctx, struct radeon_cmdbuf *cs,
unsigned cp_coher_cntl);
void si_prim_discard_signal_next_compute_ib_start(struct si_context *sctx);
void si_emit_cache_flush(struct si_context *sctx);
void si_trace_emit(struct si_context *sctx);
......
......@@ -889,12 +889,13 @@ static void si_emit_draw_packets(struct si_context *sctx,
}
}
static void si_emit_surface_sync(struct si_context *sctx,
unsigned cp_coher_cntl)
void si_emit_surface_sync(struct si_context *sctx, struct radeon_cmdbuf *cs,
unsigned cp_coher_cntl)
{
struct radeon_cmdbuf *cs = sctx->gfx_cs;
bool compute_ib = !sctx->has_graphics ||
cs == sctx->prim_discard_compute_cs;
if (sctx->chip_class >= GFX9 || !sctx->has_graphics) {
if (sctx->chip_class >= GFX9 || compute_ib) {
/* Flush caches and wait for the caches to assert idle. */
radeon_emit(cs, PKT3(PKT3_ACQUIRE_MEM, 5, 0));
radeon_emit(cs, cp_coher_cntl); /* CP_COHER_CNTL */
......@@ -914,7 +915,7 @@ static void si_emit_surface_sync(struct si_context *sctx,
/* ACQUIRE_MEM has an implicit context roll if the current context
* is busy. */
if (sctx->has_graphics)
if (!compute_ib)
sctx->context_roll = true;
}
......@@ -1162,7 +1163,7 @@ void si_emit_cache_flush(struct si_context *sctx)
/* Invalidate L1 & L2. (L1 is always invalidated on GFX6)
* WB must be set on GFX8+ when TC_ACTION is set.
*/
si_emit_surface_sync(sctx, cp_coher_cntl |
si_emit_surface_sync(sctx, sctx->gfx_cs, cp_coher_cntl |
S_0085F0_TC_ACTION_ENA(1) |
S_0085F0_TCL1_ACTION_ENA(1) |
S_0301F0_TC_WB_ACTION_ENA(sctx->chip_class >= GFX8));
......@@ -1179,7 +1180,7 @@ void si_emit_cache_flush(struct si_context *sctx)
*
* WB doesn't work without NC.
*/
si_emit_surface_sync(sctx, cp_coher_cntl |
si_emit_surface_sync(sctx, sctx->gfx_cs, cp_coher_cntl |
S_0301F0_TC_WB_ACTION_ENA(1) |
S_0301F0_TC_NC_ACTION_ENA(1));
cp_coher_cntl = 0;
......@@ -1187,7 +1188,7 @@ void si_emit_cache_flush(struct si_context *sctx)
}
if (flags & SI_CONTEXT_INV_VMEM_L1) {
/* Invalidate per-CU VMEM L1. */
si_emit_surface_sync(sctx, cp_coher_cntl |
si_emit_surface_sync(sctx, sctx->gfx_cs, cp_coher_cntl |
S_0085F0_TCL1_ACTION_ENA(1));
cp_coher_cntl = 0;
}
......@@ -1195,7 +1196,7 @@ void si_emit_cache_flush(struct si_context *sctx)
/* If TC flushes haven't cleared this... */
if (cp_coher_cntl)
si_emit_surface_sync(sctx, cp_coher_cntl);
si_emit_surface_sync(sctx, sctx->gfx_cs, cp_coher_cntl);
if (is_barrier)
si_prim_discard_signal_next_compute_ib_start(sctx);
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment