Commit 1d6e358c authored by Marek Olšák's avatar Marek Olšák

radeonsi: rename and re-document cache flush flags

SMEM and VMEM caches are L0 on gfx10.
Tested-by: Dieter Nützel's avatarDieter Nützel <Dieter@nuetzel-hh.de>
Reviewed-by: Bas Nieuwenhuizen's avatarBas Nieuwenhuizen <bas@basnieuwenhuizen.nl>
parent aa8d6e05
...@@ -917,7 +917,7 @@ static void si_launch_grid( ...@@ -917,7 +917,7 @@ static void si_launch_grid(
/* Indirect buffers use TC L2 on GFX9, but not older hw. */ /* Indirect buffers use TC L2 on GFX9, but not older hw. */
if (sctx->chip_class <= GFX8 && if (sctx->chip_class <= GFX8 &&
si_resource(info->indirect)->TC_L2_dirty) { si_resource(info->indirect)->TC_L2_dirty) {
sctx->flags |= SI_CONTEXT_WRITEBACK_GLOBAL_L2; sctx->flags |= SI_CONTEXT_WB_L2;
si_resource(info->indirect)->TC_L2_dirty = false; si_resource(info->indirect)->TC_L2_dirty = false;
} }
} }
......
...@@ -51,9 +51,9 @@ unsigned si_get_flush_flags(struct si_context *sctx, enum si_coherency coher, ...@@ -51,9 +51,9 @@ unsigned si_get_flush_flags(struct si_context *sctx, enum si_coherency coher,
case SI_COHERENCY_CP: case SI_COHERENCY_CP:
return 0; return 0;
case SI_COHERENCY_SHADER: case SI_COHERENCY_SHADER:
return SI_CONTEXT_INV_SMEM_L1 | return SI_CONTEXT_INV_SCACHE |
SI_CONTEXT_INV_VMEM_L1 | SI_CONTEXT_INV_VCACHE |
(cache_policy == L2_BYPASS ? SI_CONTEXT_INV_GLOBAL_L2 : 0); (cache_policy == L2_BYPASS ? SI_CONTEXT_INV_L2 : 0);
case SI_COHERENCY_CB_META: case SI_COHERENCY_CB_META:
return SI_CONTEXT_FLUSH_AND_INV_CB; return SI_CONTEXT_FLUSH_AND_INV_CB;
} }
...@@ -172,7 +172,7 @@ static void si_compute_do_clear_or_copy(struct si_context *sctx, ...@@ -172,7 +172,7 @@ static void si_compute_do_clear_or_copy(struct si_context *sctx,
enum si_cache_policy cache_policy = get_cache_policy(sctx, coher, size); enum si_cache_policy cache_policy = get_cache_policy(sctx, coher, size);
sctx->flags |= SI_CONTEXT_CS_PARTIAL_FLUSH | sctx->flags |= SI_CONTEXT_CS_PARTIAL_FLUSH |
(cache_policy == L2_BYPASS ? SI_CONTEXT_WRITEBACK_GLOBAL_L2 : 0); (cache_policy == L2_BYPASS ? SI_CONTEXT_WB_L2 : 0);
if (cache_policy != L2_BYPASS) if (cache_policy != L2_BYPASS)
si_resource(dst)->TC_L2_dirty = true; si_resource(dst)->TC_L2_dirty = true;
...@@ -418,7 +418,7 @@ void si_compute_copy_image(struct si_context *sctx, ...@@ -418,7 +418,7 @@ void si_compute_copy_image(struct si_context *sctx,
ctx->launch_grid(ctx, &info); ctx->launch_grid(ctx, &info);
sctx->flags |= SI_CONTEXT_CS_PARTIAL_FLUSH | sctx->flags |= SI_CONTEXT_CS_PARTIAL_FLUSH |
(sctx->chip_class <= GFX8 ? SI_CONTEXT_WRITEBACK_GLOBAL_L2 : 0) | (sctx->chip_class <= GFX8 ? SI_CONTEXT_WB_L2 : 0) |
si_get_flush_flags(sctx, SI_COHERENCY_SHADER, L2_STREAM); si_get_flush_flags(sctx, SI_COHERENCY_SHADER, L2_STREAM);
ctx->bind_compute_state(ctx, saved_cs); ctx->bind_compute_state(ctx, saved_cs);
ctx->set_shader_images(ctx, PIPE_SHADER_COMPUTE, 0, 2, saved_image); ctx->set_shader_images(ctx, PIPE_SHADER_COMPUTE, 0, 2, saved_image);
...@@ -597,7 +597,7 @@ void si_compute_clear_render_target(struct pipe_context *ctx, ...@@ -597,7 +597,7 @@ void si_compute_clear_render_target(struct pipe_context *ctx,
ctx->launch_grid(ctx, &info); ctx->launch_grid(ctx, &info);
sctx->flags |= SI_CONTEXT_CS_PARTIAL_FLUSH | sctx->flags |= SI_CONTEXT_CS_PARTIAL_FLUSH |
(sctx->chip_class <= GFX8 ? SI_CONTEXT_WRITEBACK_GLOBAL_L2 : 0) | (sctx->chip_class <= GFX8 ? SI_CONTEXT_WB_L2 : 0) |
si_get_flush_flags(sctx, SI_COHERENCY_SHADER, L2_STREAM); si_get_flush_flags(sctx, SI_COHERENCY_SHADER, L2_STREAM);
ctx->bind_compute_state(ctx, saved_cs); ctx->bind_compute_state(ctx, saved_cs);
ctx->set_shader_images(ctx, PIPE_SHADER_COMPUTE, 0, 1, &saved_image); ctx->set_shader_images(ctx, PIPE_SHADER_COMPUTE, 0, 1, &saved_image);
......
...@@ -1881,7 +1881,7 @@ static void si_upload_bindless_descriptors(struct si_context *sctx) ...@@ -1881,7 +1881,7 @@ static void si_upload_bindless_descriptors(struct si_context *sctx)
} }
/* Invalidate L1 because it doesn't know that L2 changed. */ /* Invalidate L1 because it doesn't know that L2 changed. */
sctx->flags |= SI_CONTEXT_INV_SMEM_L1; sctx->flags |= SI_CONTEXT_INV_SCACHE;
si_emit_cache_flush(sctx); si_emit_cache_flush(sctx);
sctx->bindless_descriptors_dirty = false; sctx->bindless_descriptors_dirty = false;
......
...@@ -83,7 +83,7 @@ void si_flush_gfx_cs(struct si_context *ctx, unsigned flags, ...@@ -83,7 +83,7 @@ void si_flush_gfx_cs(struct si_context *ctx, unsigned flags,
if (!ctx->screen->info.kernel_flushes_tc_l2_after_ib) { if (!ctx->screen->info.kernel_flushes_tc_l2_after_ib) {
wait_flags |= SI_CONTEXT_PS_PARTIAL_FLUSH | wait_flags |= SI_CONTEXT_PS_PARTIAL_FLUSH |
SI_CONTEXT_CS_PARTIAL_FLUSH | SI_CONTEXT_CS_PARTIAL_FLUSH |
SI_CONTEXT_INV_GLOBAL_L2; SI_CONTEXT_INV_L2;
} else if (ctx->chip_class == GFX6) { } else if (ctx->chip_class == GFX6) {
/* The kernel flushes L2 before shaders are finished. */ /* The kernel flushes L2 before shaders are finished. */
wait_flags |= SI_CONTEXT_PS_PARTIAL_FLUSH | wait_flags |= SI_CONTEXT_PS_PARTIAL_FLUSH |
...@@ -304,9 +304,9 @@ void si_begin_new_gfx_cs(struct si_context *ctx) ...@@ -304,9 +304,9 @@ void si_begin_new_gfx_cs(struct si_context *ctx)
* TODO: Do we also need to invalidate CB & DB caches? * TODO: Do we also need to invalidate CB & DB caches?
*/ */
ctx->flags |= SI_CONTEXT_INV_ICACHE | ctx->flags |= SI_CONTEXT_INV_ICACHE |
SI_CONTEXT_INV_SMEM_L1 | SI_CONTEXT_INV_SCACHE |
SI_CONTEXT_INV_VMEM_L1 | SI_CONTEXT_INV_VCACHE |
SI_CONTEXT_INV_GLOBAL_L2 | SI_CONTEXT_INV_L2 |
SI_CONTEXT_START_PIPELINE_STATS; SI_CONTEXT_START_PIPELINE_STATS;
ctx->cs_shader_state.initialized = false; ctx->cs_shader_state.initialized = false;
......
...@@ -1155,11 +1155,11 @@ radeonsi_screen_create_impl(struct radeon_winsys *ws, ...@@ -1155,11 +1155,11 @@ radeonsi_screen_create_impl(struct radeon_winsys *ws,
sscreen->use_monolithic_shaders = sscreen->use_monolithic_shaders =
(sscreen->debug_flags & DBG(MONOLITHIC_SHADERS)) != 0; (sscreen->debug_flags & DBG(MONOLITHIC_SHADERS)) != 0;
sscreen->barrier_flags.cp_to_L2 = SI_CONTEXT_INV_SMEM_L1 | sscreen->barrier_flags.cp_to_L2 = SI_CONTEXT_INV_SCACHE |
SI_CONTEXT_INV_VMEM_L1; SI_CONTEXT_INV_VCACHE;
if (sscreen->info.chip_class <= GFX8) { if (sscreen->info.chip_class <= GFX8) {
sscreen->barrier_flags.cp_to_L2 |= SI_CONTEXT_INV_GLOBAL_L2; sscreen->barrier_flags.cp_to_L2 |= SI_CONTEXT_INV_L2;
sscreen->barrier_flags.L2_to_cp |= SI_CONTEXT_WRITEBACK_GLOBAL_L2; sscreen->barrier_flags.L2_to_cp |= SI_CONTEXT_WB_L2;
} }
if (debug_get_bool_option("RADEON_DUMP_SHADERS", false)) if (debug_get_bool_option("RADEON_DUMP_SHADERS", false))
......
...@@ -65,16 +65,18 @@ ...@@ -65,16 +65,18 @@
#define SI_CONTEXT_FLUSH_FOR_RENDER_COND (1 << 2) #define SI_CONTEXT_FLUSH_FOR_RENDER_COND (1 << 2)
/* Instruction cache. */ /* Instruction cache. */
#define SI_CONTEXT_INV_ICACHE (1 << 3) #define SI_CONTEXT_INV_ICACHE (1 << 3)
/* SMEM L1, other names: KCACHE, constant cache, DCACHE, data cache */ /* Scalar L1 cache. */
#define SI_CONTEXT_INV_SMEM_L1 (1 << 4) #define SI_CONTEXT_INV_SCACHE (1 << 4)
/* VMEM L1 can optionally be bypassed (GLC=1). Other names: TC L1 */ /* Vector L1 cache. */
#define SI_CONTEXT_INV_VMEM_L1 (1 << 5) #define SI_CONTEXT_INV_VCACHE (1 << 5)
/* Used by everything except CB/DB, can be bypassed (SLC=1). Other names: TC L2 */ /* L2 cache + L2 metadata cache writeback & invalidate.
#define SI_CONTEXT_INV_GLOBAL_L2 (1 << 6) * GFX6-8: Used by shaders only. GFX9-10: Used by everything. */
/* Write dirty L2 lines back to memory (shader and CP DMA stores), but don't #define SI_CONTEXT_INV_L2 (1 << 6)
* invalidate L2. GFX6-GFX7 can't do it, so they will do complete invalidation. */ /* L2 writeback (write dirty L2 lines to memory for non-L2 clients).
#define SI_CONTEXT_WRITEBACK_GLOBAL_L2 (1 << 7) * Only used for coherency with non-L2 clients like CB, DB, CP on GFX6-8.
/* Writeback & invalidate the L2 metadata cache. It can only be coupled with * GFX6-7 will do complete invalidation, because the writeback is unsupported. */
#define SI_CONTEXT_WB_L2 (1 << 7)
/* Writeback & invalidate the L2 metadata cache only. It can only be coupled with
* a CB or DB flush. */ * a CB or DB flush. */
#define SI_CONTEXT_INV_L2_METADATA (1 << 8) #define SI_CONTEXT_INV_L2_METADATA (1 << 8)
/* Framebuffer caches. */ /* Framebuffer caches. */
...@@ -1646,7 +1648,7 @@ si_make_CB_shader_coherent(struct si_context *sctx, unsigned num_samples, ...@@ -1646,7 +1648,7 @@ si_make_CB_shader_coherent(struct si_context *sctx, unsigned num_samples,
bool shaders_read_metadata, bool dcc_pipe_aligned) bool shaders_read_metadata, bool dcc_pipe_aligned)
{ {
sctx->flags |= SI_CONTEXT_FLUSH_AND_INV_CB | sctx->flags |= SI_CONTEXT_FLUSH_AND_INV_CB |
SI_CONTEXT_INV_VMEM_L1; SI_CONTEXT_INV_VCACHE;
if (sctx->chip_class >= GFX9) { if (sctx->chip_class >= GFX9) {
/* Single-sample color is coherent with shaders on GFX9, but /* Single-sample color is coherent with shaders on GFX9, but
...@@ -1655,12 +1657,12 @@ si_make_CB_shader_coherent(struct si_context *sctx, unsigned num_samples, ...@@ -1655,12 +1657,12 @@ si_make_CB_shader_coherent(struct si_context *sctx, unsigned num_samples,
*/ */
if (num_samples >= 2 || if (num_samples >= 2 ||
(shaders_read_metadata && !dcc_pipe_aligned)) (shaders_read_metadata && !dcc_pipe_aligned))
sctx->flags |= SI_CONTEXT_INV_GLOBAL_L2; sctx->flags |= SI_CONTEXT_INV_L2;
else if (shaders_read_metadata) else if (shaders_read_metadata)
sctx->flags |= SI_CONTEXT_INV_L2_METADATA; sctx->flags |= SI_CONTEXT_INV_L2_METADATA;
} else { } else {
/* GFX6-GFX8 */ /* GFX6-GFX8 */
sctx->flags |= SI_CONTEXT_INV_GLOBAL_L2; sctx->flags |= SI_CONTEXT_INV_L2;
} }
} }
...@@ -1669,7 +1671,7 @@ si_make_DB_shader_coherent(struct si_context *sctx, unsigned num_samples, ...@@ -1669,7 +1671,7 @@ si_make_DB_shader_coherent(struct si_context *sctx, unsigned num_samples,
bool include_stencil, bool shaders_read_metadata) bool include_stencil, bool shaders_read_metadata)
{ {
sctx->flags |= SI_CONTEXT_FLUSH_AND_INV_DB | sctx->flags |= SI_CONTEXT_FLUSH_AND_INV_DB |
SI_CONTEXT_INV_VMEM_L1; SI_CONTEXT_INV_VCACHE;
if (sctx->chip_class >= GFX9) { if (sctx->chip_class >= GFX9) {
/* Single-sample depth (not stencil) is coherent with shaders /* Single-sample depth (not stencil) is coherent with shaders
...@@ -1677,12 +1679,12 @@ si_make_DB_shader_coherent(struct si_context *sctx, unsigned num_samples, ...@@ -1677,12 +1679,12 @@ si_make_DB_shader_coherent(struct si_context *sctx, unsigned num_samples,
* metadata. * metadata.
*/ */
if (num_samples >= 2 || include_stencil) if (num_samples >= 2 || include_stencil)
sctx->flags |= SI_CONTEXT_INV_GLOBAL_L2; sctx->flags |= SI_CONTEXT_INV_L2;
else if (shaders_read_metadata) else if (shaders_read_metadata)
sctx->flags |= SI_CONTEXT_INV_L2_METADATA; sctx->flags |= SI_CONTEXT_INV_L2_METADATA;
} else { } else {
/* GFX6-GFX8 */ /* GFX6-GFX8 */
sctx->flags |= SI_CONTEXT_INV_GLOBAL_L2; sctx->flags |= SI_CONTEXT_INV_L2;
} }
} }
......
...@@ -4792,11 +4792,11 @@ static void si_memory_barrier(struct pipe_context *ctx, unsigned flags) ...@@ -4792,11 +4792,11 @@ static void si_memory_barrier(struct pipe_context *ctx, unsigned flags)
/* Subsequent commands must wait for all shader invocations to /* Subsequent commands must wait for all shader invocations to
* complete. */ * complete. */
sctx->flags |= SI_CONTEXT_PS_PARTIAL_FLUSH | sctx->flags |= SI_CONTEXT_PS_PARTIAL_FLUSH |
SI_CONTEXT_CS_PARTIAL_FLUSH; SI_CONTEXT_CS_PARTIAL_FLUSH;
if (flags & PIPE_BARRIER_CONSTANT_BUFFER) if (flags & PIPE_BARRIER_CONSTANT_BUFFER)
sctx->flags |= SI_CONTEXT_INV_SMEM_L1 | sctx->flags |= SI_CONTEXT_INV_SCACHE |
SI_CONTEXT_INV_VMEM_L1; SI_CONTEXT_INV_VCACHE;
if (flags & (PIPE_BARRIER_VERTEX_BUFFER | if (flags & (PIPE_BARRIER_VERTEX_BUFFER |
PIPE_BARRIER_SHADER_BUFFER | PIPE_BARRIER_SHADER_BUFFER |
...@@ -4807,7 +4807,7 @@ static void si_memory_barrier(struct pipe_context *ctx, unsigned flags) ...@@ -4807,7 +4807,7 @@ static void si_memory_barrier(struct pipe_context *ctx, unsigned flags)
/* As far as I can tell, L1 contents are written back to L2 /* As far as I can tell, L1 contents are written back to L2
* automatically at end of shader, but the contents of other * automatically at end of shader, but the contents of other
* L1 caches might still be stale. */ * L1 caches might still be stale. */
sctx->flags |= SI_CONTEXT_INV_VMEM_L1; sctx->flags |= SI_CONTEXT_INV_VCACHE;
} }
if (flags & PIPE_BARRIER_INDEX_BUFFER) { if (flags & PIPE_BARRIER_INDEX_BUFFER) {
...@@ -4815,7 +4815,7 @@ static void si_memory_barrier(struct pipe_context *ctx, unsigned flags) ...@@ -4815,7 +4815,7 @@ static void si_memory_barrier(struct pipe_context *ctx, unsigned flags)
* L1 isn't used. * L1 isn't used.
*/ */
if (sctx->screen->info.chip_class <= GFX7) if (sctx->screen->info.chip_class <= GFX7)
sctx->flags |= SI_CONTEXT_WRITEBACK_GLOBAL_L2; sctx->flags |= SI_CONTEXT_WB_L2;
} }
/* MSAA color, any depth and any stencil are flushed in /* MSAA color, any depth and any stencil are flushed in
...@@ -4826,13 +4826,13 @@ static void si_memory_barrier(struct pipe_context *ctx, unsigned flags) ...@@ -4826,13 +4826,13 @@ static void si_memory_barrier(struct pipe_context *ctx, unsigned flags)
sctx->flags |= SI_CONTEXT_FLUSH_AND_INV_CB; sctx->flags |= SI_CONTEXT_FLUSH_AND_INV_CB;
if (sctx->chip_class <= GFX8) if (sctx->chip_class <= GFX8)
sctx->flags |= SI_CONTEXT_WRITEBACK_GLOBAL_L2; sctx->flags |= SI_CONTEXT_WB_L2;
} }
/* Indirect buffers use TC L2 on GFX9, but not older hw. */ /* Indirect buffers use TC L2 on GFX9, but not older hw. */
if (sctx->screen->info.chip_class <= GFX8 && if (sctx->screen->info.chip_class <= GFX8 &&
flags & PIPE_BARRIER_INDIRECT_BUFFER) flags & PIPE_BARRIER_INDIRECT_BUFFER)
sctx->flags |= SI_CONTEXT_WRITEBACK_GLOBAL_L2; sctx->flags |= SI_CONTEXT_WB_L2;
} }
static void *si_create_blend_custom(struct si_context *sctx, unsigned mode) static void *si_create_blend_custom(struct si_context *sctx, unsigned mode)
......
...@@ -959,10 +959,10 @@ void si_emit_cache_flush(struct si_context *sctx) ...@@ -959,10 +959,10 @@ void si_emit_cache_flush(struct si_context *sctx)
if (!sctx->has_graphics) { if (!sctx->has_graphics) {
/* Only process compute flags. */ /* Only process compute flags. */
flags &= SI_CONTEXT_INV_ICACHE | flags &= SI_CONTEXT_INV_ICACHE |
SI_CONTEXT_INV_SMEM_L1 | SI_CONTEXT_INV_SCACHE |
SI_CONTEXT_INV_VMEM_L1 | SI_CONTEXT_INV_VCACHE |
SI_CONTEXT_INV_GLOBAL_L2 | SI_CONTEXT_INV_L2 |
SI_CONTEXT_WRITEBACK_GLOBAL_L2 | SI_CONTEXT_WB_L2 |
SI_CONTEXT_INV_L2_METADATA | SI_CONTEXT_INV_L2_METADATA |
SI_CONTEXT_CS_PARTIAL_FLUSH; SI_CONTEXT_CS_PARTIAL_FLUSH;
} }
...@@ -996,7 +996,7 @@ void si_emit_cache_flush(struct si_context *sctx) ...@@ -996,7 +996,7 @@ void si_emit_cache_flush(struct si_context *sctx)
if (flags & SI_CONTEXT_INV_ICACHE) if (flags & SI_CONTEXT_INV_ICACHE)
cp_coher_cntl |= S_0085F0_SH_ICACHE_ACTION_ENA(1); cp_coher_cntl |= S_0085F0_SH_ICACHE_ACTION_ENA(1);
if (flags & SI_CONTEXT_INV_SMEM_L1) if (flags & SI_CONTEXT_INV_SCACHE)
cp_coher_cntl |= S_0085F0_SH_KCACHE_ACTION_ENA(1); cp_coher_cntl |= S_0085F0_SH_KCACHE_ACTION_ENA(1);
if (sctx->chip_class <= GFX8) { if (sctx->chip_class <= GFX8) {
...@@ -1114,15 +1114,15 @@ void si_emit_cache_flush(struct si_context *sctx) ...@@ -1114,15 +1114,15 @@ void si_emit_cache_flush(struct si_context *sctx)
} }
/* Ideally flush TC together with CB/DB. */ /* Ideally flush TC together with CB/DB. */
if (flags & SI_CONTEXT_INV_GLOBAL_L2) { if (flags & SI_CONTEXT_INV_L2) {
/* Writeback and invalidate everything in L2 & L1. */ /* Writeback and invalidate everything in L2 & L1. */
tc_flags = EVENT_TC_ACTION_ENA | tc_flags = EVENT_TC_ACTION_ENA |
EVENT_TC_WB_ACTION_ENA; EVENT_TC_WB_ACTION_ENA;
/* Clear the flags. */ /* Clear the flags. */
flags &= ~(SI_CONTEXT_INV_GLOBAL_L2 | flags &= ~(SI_CONTEXT_INV_L2 |
SI_CONTEXT_WRITEBACK_GLOBAL_L2 | SI_CONTEXT_WB_L2 |
SI_CONTEXT_INV_VMEM_L1); SI_CONTEXT_INV_VCACHE);
sctx->num_L2_invalidates++; sctx->num_L2_invalidates++;
} }
...@@ -1146,9 +1146,9 @@ void si_emit_cache_flush(struct si_context *sctx) ...@@ -1146,9 +1146,9 @@ void si_emit_cache_flush(struct si_context *sctx)
if (sctx->has_graphics && if (sctx->has_graphics &&
(cp_coher_cntl || (cp_coher_cntl ||
(flags & (SI_CONTEXT_CS_PARTIAL_FLUSH | (flags & (SI_CONTEXT_CS_PARTIAL_FLUSH |
SI_CONTEXT_INV_VMEM_L1 | SI_CONTEXT_INV_VCACHE |
SI_CONTEXT_INV_GLOBAL_L2 | SI_CONTEXT_INV_L2 |
SI_CONTEXT_WRITEBACK_GLOBAL_L2)))) { SI_CONTEXT_WB_L2)))) {
radeon_emit(cs, PKT3(PKT3_PFP_SYNC_ME, 0, 0)); radeon_emit(cs, PKT3(PKT3_PFP_SYNC_ME, 0, 0));
radeon_emit(cs, 0); radeon_emit(cs, 0);
} }
...@@ -1162,9 +1162,9 @@ void si_emit_cache_flush(struct si_context *sctx) ...@@ -1162,9 +1162,9 @@ void si_emit_cache_flush(struct si_context *sctx)
* *
* GFX6-GFX7 don't support L2 write-back. * GFX6-GFX7 don't support L2 write-back.
*/ */
if (flags & SI_CONTEXT_INV_GLOBAL_L2 || if (flags & SI_CONTEXT_INV_L2 ||
(sctx->chip_class <= GFX7 && (sctx->chip_class <= GFX7 &&
(flags & SI_CONTEXT_WRITEBACK_GLOBAL_L2))) { (flags & SI_CONTEXT_WB_L2))) {
/* Invalidate L1 & L2. (L1 is always invalidated on GFX6) /* Invalidate L1 & L2. (L1 is always invalidated on GFX6)
* WB must be set on GFX8+ when TC_ACTION is set. * WB must be set on GFX8+ when TC_ACTION is set.
*/ */
...@@ -1178,7 +1178,7 @@ void si_emit_cache_flush(struct si_context *sctx) ...@@ -1178,7 +1178,7 @@ void si_emit_cache_flush(struct si_context *sctx)
/* L1 invalidation and L2 writeback must be done separately, /* L1 invalidation and L2 writeback must be done separately,
* because both operations can't be done together. * because both operations can't be done together.
*/ */
if (flags & SI_CONTEXT_WRITEBACK_GLOBAL_L2) { if (flags & SI_CONTEXT_WB_L2) {
/* WB = write-back /* WB = write-back
* NC = apply to non-coherent MTYPEs * NC = apply to non-coherent MTYPEs
* (i.e. MTYPE <= 1, which is what we use everywhere) * (i.e. MTYPE <= 1, which is what we use everywhere)
...@@ -1191,7 +1191,7 @@ void si_emit_cache_flush(struct si_context *sctx) ...@@ -1191,7 +1191,7 @@ void si_emit_cache_flush(struct si_context *sctx)
cp_coher_cntl = 0; cp_coher_cntl = 0;
sctx->num_L2_writebacks++; sctx->num_L2_writebacks++;
} }
if (flags & SI_CONTEXT_INV_VMEM_L1) { if (flags & SI_CONTEXT_INV_VCACHE) {
/* Invalidate per-CU VMEM L1. */ /* Invalidate per-CU VMEM L1. */
si_emit_surface_sync(sctx, sctx->gfx_cs, cp_coher_cntl | si_emit_surface_sync(sctx, sctx->gfx_cs, cp_coher_cntl |
S_0085F0_TCL1_ACTION_ENA(1)); S_0085F0_TCL1_ACTION_ENA(1));
...@@ -1588,7 +1588,7 @@ static void si_draw_vbo(struct pipe_context *ctx, const struct pipe_draw_info *i ...@@ -1588,7 +1588,7 @@ static void si_draw_vbo(struct pipe_context *ctx, const struct pipe_draw_info *i
si_resource(indexbuf)->TC_L2_dirty) { si_resource(indexbuf)->TC_L2_dirty) {
/* GFX8 reads index buffers through TC L2, so it doesn't /* GFX8 reads index buffers through TC L2, so it doesn't
* need this. */ * need this. */
sctx->flags |= SI_CONTEXT_WRITEBACK_GLOBAL_L2; sctx->flags |= SI_CONTEXT_WB_L2;
si_resource(indexbuf)->TC_L2_dirty = false; si_resource(indexbuf)->TC_L2_dirty = false;
} }
} }
...@@ -1607,13 +1607,13 @@ static void si_draw_vbo(struct pipe_context *ctx, const struct pipe_draw_info *i ...@@ -1607,13 +1607,13 @@ static void si_draw_vbo(struct pipe_context *ctx, const struct pipe_draw_info *i
/* Indirect buffers use TC L2 on GFX9, but not older hw. */ /* Indirect buffers use TC L2 on GFX9, but not older hw. */
if (sctx->chip_class <= GFX8) { if (sctx->chip_class <= GFX8) {
if (si_resource(indirect->buffer)->TC_L2_dirty) { if (si_resource(indirect->buffer)->TC_L2_dirty) {
sctx->flags |= SI_CONTEXT_WRITEBACK_GLOBAL_L2; sctx->flags |= SI_CONTEXT_WB_L2;
si_resource(indirect->buffer)->TC_L2_dirty = false; si_resource(indirect->buffer)->TC_L2_dirty = false;
} }
if (indirect->indirect_draw_count && if (indirect->indirect_draw_count &&
si_resource(indirect->indirect_draw_count)->TC_L2_dirty) { si_resource(indirect->indirect_draw_count)->TC_L2_dirty) {
sctx->flags |= SI_CONTEXT_WRITEBACK_GLOBAL_L2; sctx->flags |= SI_CONTEXT_WB_L2;
si_resource(indirect->indirect_draw_count)->TC_L2_dirty = false; si_resource(indirect->indirect_draw_count)->TC_L2_dirty = false;
} }
} }
......
...@@ -121,9 +121,9 @@ static void si_set_streamout_targets(struct pipe_context *ctx, ...@@ -121,9 +121,9 @@ static void si_set_streamout_targets(struct pipe_context *ctx,
* VS_PARTIAL_FLUSH is required if the buffers are going to be * VS_PARTIAL_FLUSH is required if the buffers are going to be
* used as an input immediately. * used as an input immediately.
*/ */
sctx->flags |= SI_CONTEXT_INV_SMEM_L1 | sctx->flags |= SI_CONTEXT_INV_SCACHE |
SI_CONTEXT_INV_VMEM_L1 | SI_CONTEXT_INV_VCACHE |
SI_CONTEXT_VS_PARTIAL_FLUSH; SI_CONTEXT_VS_PARTIAL_FLUSH;
} }
/* All readers of the streamout targets need to be finished before we can /* All readers of the streamout targets need to be finished before we can
......
...@@ -233,8 +233,8 @@ void si_test_dma_perf(struct si_screen *sscreen) ...@@ -233,8 +233,8 @@ void si_test_dma_perf(struct si_screen *sscreen)
sctx->cs_user_data[i] = clear_value; sctx->cs_user_data[i] = clear_value;
} }
sctx->flags |= SI_CONTEXT_INV_VMEM_L1 | sctx->flags |= SI_CONTEXT_INV_VCACHE |
SI_CONTEXT_INV_SMEM_L1; SI_CONTEXT_INV_SCACHE;
ctx->set_shader_buffers(ctx, PIPE_SHADER_COMPUTE, 0, ctx->set_shader_buffers(ctx, PIPE_SHADER_COMPUTE, 0,
is_copy ? 2 : 1, sb, 0x1); is_copy ? 2 : 1, sb, 0x1);
...@@ -252,7 +252,7 @@ void si_test_dma_perf(struct si_screen *sscreen) ...@@ -252,7 +252,7 @@ void si_test_dma_perf(struct si_screen *sscreen)
/* Flush L2, so that we don't just test L2 cache performance. */ /* Flush L2, so that we don't just test L2 cache performance. */
if (!test_sdma) { if (!test_sdma) {
sctx->flags |= SI_CONTEXT_WRITEBACK_GLOBAL_L2; sctx->flags |= SI_CONTEXT_WB_L2;
si_emit_cache_flush(sctx); si_emit_cache_flush(sctx);
} }
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment