Commit c85ea824 authored by Felix Degrood's avatar Felix Degrood Committed by Marge Bot
Browse files

iris: reduce redundant tile cache flushes



We are flushing tile cache more often than is necessary. In
unified cache mode, tile cache flushing is expensive, evicting all
depth/pixel data from the L3$. This is only need for a handful of
cases, such as: making cpu or gpu changes globally visible
(e.g. map), fast color clears, or slow depth clears. Tile cache
flushing is a gen12+ feature.

Remove blanket flushing of tile cache on all depth/RT flushes.
Replace with selective tile cache flushing.

Improves performance in several workloads:
AztecRuins.ogl-high-offscreen-1440p 1%
UnigineValley.ogl-g2                1%
Dota 2 (replay Jul 2020).ogl-g2     1%
Counter-Strike GO.ogl-g2            1%
Manhattan.ogl-Off-19x10             2%
CarChase.ogl-Off-19x10              1%
Bioshock Infinite.ogl-g2            1%
Reviewed-by: Kenneth Graunke's avatarKenneth Graunke <kenneth@whitecape.org>
Part-of: <!10217>
parent bfe2c5f6
......@@ -295,7 +295,8 @@ fast_clear_color(struct iris_context *ice,
*/
iris_emit_end_of_pipe_sync(batch,
"fast clear: pre-flush",
PIPE_CONTROL_RENDER_TARGET_FLUSH);
PIPE_CONTROL_RENDER_TARGET_FLUSH |
PIPE_CONTROL_TILE_CACHE_FLUSH);
iris_batch_sync_region_start(batch);
......@@ -493,6 +494,8 @@ fast_clear_depth(struct iris_context *ice,
ISL_AUX_OP_FULL_RESOLVE, false);
iris_resource_set_aux_state(ice, res, res_level, layer, 1,
ISL_AUX_STATE_RESOLVED);
iris_emit_pipe_control_flush(batch, "hiz op: post depth resolve",
PIPE_CONTROL_TILE_CACHE_FLUSH);
}
}
const union isl_color_value clear_value = { .f32 = {depth, } };
......@@ -607,7 +610,8 @@ clear_depth_stencil(struct iris_context *ice,
blorp_batch_finish(&blorp_batch);
iris_batch_sync_region_end(batch);
iris_flush_and_dirty_for_history(ice, batch, res, 0,
iris_flush_and_dirty_for_history(ice, batch, res,
PIPE_CONTROL_TILE_CACHE_FLUSH,
"cache history: post slow ZS clear");
if (clear_depth && z_res) {
......
......@@ -336,6 +336,7 @@ enum pipe_control_flags
#define PIPE_CONTROL_CACHE_FLUSH_BITS \
(PIPE_CONTROL_DEPTH_CACHE_FLUSH | \
PIPE_CONTROL_DATA_CACHE_FLUSH | \
PIPE_CONTROL_TILE_CACHE_FLUSH | \
PIPE_CONTROL_RENDER_TARGET_FLUSH)
#define PIPE_CONTROL_CACHE_INVALIDATE_BITS \
......
......@@ -66,6 +66,7 @@ iris_fine_fence_new(struct iris_batch *batch, unsigned flags)
} else {
pc = PIPE_CONTROL_WRITE_IMMEDIATE |
PIPE_CONTROL_RENDER_TARGET_FLUSH |
PIPE_CONTROL_TILE_CACHE_FLUSH |
PIPE_CONTROL_DEPTH_CACHE_FLUSH |
PIPE_CONTROL_DATA_CACHE_FLUSH;
}
......
......@@ -292,6 +292,7 @@ iris_flush_all_caches(struct iris_batch *batch)
PIPE_CONTROL_DATA_CACHE_FLUSH |
PIPE_CONTROL_DEPTH_CACHE_FLUSH |
PIPE_CONTROL_RENDER_TARGET_FLUSH |
PIPE_CONTROL_TILE_CACHE_FLUSH |
PIPE_CONTROL_VF_CACHE_INVALIDATE |
PIPE_CONTROL_INSTRUCTION_INVALIDATE |
PIPE_CONTROL_TEXTURE_CACHE_INVALIDATE |
......
......@@ -366,6 +366,7 @@ iris_cache_flush_for_render(struct iris_batch *batch,
iris_emit_pipe_control_flush(batch,
"cache tracker: aux usage mismatch",
PIPE_CONTROL_RENDER_TARGET_FLUSH |
PIPE_CONTROL_TILE_CACHE_FLUSH |
PIPE_CONTROL_CS_STALL);
entry->data = v_aux_usage;
}
......
......@@ -1540,6 +1540,7 @@ iris_map_copy_region(struct iris_transfer *map)
iris_emit_pipe_control_flush(map->batch,
"transfer read: flush before mapping",
PIPE_CONTROL_RENDER_TARGET_FLUSH |
PIPE_CONTROL_TILE_CACHE_FLUSH |
PIPE_CONTROL_CS_STALL);
}
......@@ -2051,7 +2052,8 @@ iris_transfer_flush_region(struct pipe_context *ctx,
if (res->base.b.target == PIPE_BUFFER) {
if (map->staging)
history_flush |= PIPE_CONTROL_RENDER_TARGET_FLUSH;
history_flush |= PIPE_CONTROL_RENDER_TARGET_FLUSH |
PIPE_CONTROL_TILE_CACHE_FLUSH;
if (map->dest_had_defined_contents)
history_flush |= iris_flush_bits_for_history(ice, res);
......
......@@ -7616,23 +7616,6 @@ iris_emit_raw_pipe_control(struct iris_batch *batch,
flags |= PIPE_CONTROL_CS_STALL;
}
if (GFX_VER >= 12 && ((flags & PIPE_CONTROL_RENDER_TARGET_FLUSH) ||
(flags & PIPE_CONTROL_DEPTH_CACHE_FLUSH))) {
/* From the PIPE_CONTROL instruction table, bit 28 (Tile Cache Flush
* Enable):
*
* Unified Cache (Tile Cache Disabled):
*
* When the Color and Depth (Z) streams are enabled to be cached in
* the DC space of L2, Software must use "Render Target Cache Flush
* Enable" and "Depth Cache Flush Enable" along with "Tile Cache
* Flush" for getting the color and depth (Z) write data to be
* globally observable. In this mode of operation it is not required
* to set "CS Stall" upon setting "Tile Cache Flush" bit.
*/
flags |= PIPE_CONTROL_TILE_CACHE_FLUSH;
}
if (GFX_VER == 9 && devinfo->gt == 4) {
/* TODO: The big Skylake GT4 post sync op workaround */
}
......@@ -7737,7 +7720,7 @@ iris_emit_raw_pipe_control(struct iris_batch *batch,
if (INTEL_DEBUG & DEBUG_PIPE_CONTROL) {
fprintf(stderr,
" PC [%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%"PRIx64"]: %s\n",
" PC [%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%"PRIx64"]: %s\n",
(flags & PIPE_CONTROL_FLUSH_ENABLE) ? "PipeCon " : "",
(flags & PIPE_CONTROL_CS_STALL) ? "CS " : "",
(flags & PIPE_CONTROL_STALL_AT_SCOREBOARD) ? "Scoreboard " : "",
......@@ -7747,6 +7730,7 @@ iris_emit_raw_pipe_control(struct iris_batch *batch,
(flags & PIPE_CONTROL_TEXTURE_CACHE_INVALIDATE) ? "TC " : "",
(flags & PIPE_CONTROL_DATA_CACHE_FLUSH) ? "DC " : "",
(flags & PIPE_CONTROL_DEPTH_CACHE_FLUSH) ? "ZFlush " : "",
(flags & PIPE_CONTROL_TILE_CACHE_FLUSH) ? "Tile " : "",
(flags & PIPE_CONTROL_DEPTH_STALL) ? "ZStall " : "",
(flags & PIPE_CONTROL_STATE_CACHE_INVALIDATE) ? "State " : "",
(flags & PIPE_CONTROL_TLB_INVALIDATE) ? "TLB " : "",
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment