Commit 8ea7ee15 authored by Samuel Pitoiset's avatar Samuel Pitoiset

radv: rename and re-document cache flush flags

SMEM and VMEM caches are L0 on gfx10. Ported from RadeonSI.
Signed-off-by: Samuel Pitoiset's avatarSamuel Pitoiset <samuel.pitoiset@gmail.com>
Reviewed-by: Bas Nieuwenhuizen's avatarBas Nieuwenhuizen <bas@basnieuwenhuizen.nl>
parent 5411f470
......@@ -2576,7 +2576,7 @@ radv_src_access_flush(struct radv_cmd_buffer *cmd_buffer,
case VK_ACCESS_SHADER_WRITE_BIT:
case VK_ACCESS_TRANSFORM_FEEDBACK_WRITE_BIT_EXT:
case VK_ACCESS_TRANSFORM_FEEDBACK_COUNTER_WRITE_BIT_EXT:
flush_bits |= RADV_CMD_FLAG_WRITEBACK_GLOBAL_L2;
flush_bits |= RADV_CMD_FLAG_WB_L2;
break;
case VK_ACCESS_COLOR_ATTACHMENT_WRITE_BIT:
flush_bits |= RADV_CMD_FLAG_FLUSH_AND_INV_CB;
......@@ -2591,7 +2591,7 @@ radv_src_access_flush(struct radv_cmd_buffer *cmd_buffer,
case VK_ACCESS_TRANSFER_WRITE_BIT:
flush_bits |= RADV_CMD_FLAG_FLUSH_AND_INV_CB |
RADV_CMD_FLAG_FLUSH_AND_INV_DB |
RADV_CMD_FLAG_INV_GLOBAL_L2;
RADV_CMD_FLAG_INV_L2;
if (flush_CB_meta)
flush_bits |= RADV_CMD_FLAG_FLUSH_AND_INV_CB_META;
......@@ -2648,19 +2648,19 @@ radv_dst_access_flush(struct radv_cmd_buffer *cmd_buffer,
case VK_ACCESS_TRANSFORM_FEEDBACK_COUNTER_WRITE_BIT_EXT:
break;
case VK_ACCESS_UNIFORM_READ_BIT:
flush_bits |= RADV_CMD_FLAG_INV_VMEM_L1 | RADV_CMD_FLAG_INV_SMEM_L1;
flush_bits |= RADV_CMD_FLAG_INV_VCACHE | RADV_CMD_FLAG_INV_SCACHE;
break;
case VK_ACCESS_VERTEX_ATTRIBUTE_READ_BIT:
case VK_ACCESS_TRANSFER_READ_BIT:
case VK_ACCESS_INPUT_ATTACHMENT_READ_BIT:
flush_bits |= RADV_CMD_FLAG_INV_VMEM_L1 |
RADV_CMD_FLAG_INV_GLOBAL_L2;
flush_bits |= RADV_CMD_FLAG_INV_VCACHE |
RADV_CMD_FLAG_INV_L2;
break;
case VK_ACCESS_SHADER_READ_BIT:
flush_bits |= RADV_CMD_FLAG_INV_VMEM_L1;
flush_bits |= RADV_CMD_FLAG_INV_VCACHE;
if (!image_is_coherent)
flush_bits |= RADV_CMD_FLAG_INV_GLOBAL_L2;
flush_bits |= RADV_CMD_FLAG_INV_L2;
break;
case VK_ACCESS_COLOR_ATTACHMENT_READ_BIT:
if (flush_CB)
......@@ -3355,7 +3355,7 @@ VkResult radv_EndCommandBuffer(
if (cmd_buffer->queue_family_index != RADV_QUEUE_TRANSFER) {
if (cmd_buffer->device->physical_device->rad_info.chip_class == GFX6)
cmd_buffer->state.flush_bits |= RADV_CMD_FLAG_CS_PARTIAL_FLUSH | RADV_CMD_FLAG_PS_PARTIAL_FLUSH | RADV_CMD_FLAG_WRITEBACK_GLOBAL_L2;
cmd_buffer->state.flush_bits |= RADV_CMD_FLAG_CS_PARTIAL_FLUSH | RADV_CMD_FLAG_PS_PARTIAL_FLUSH | RADV_CMD_FLAG_WB_L2;
/* Make sure to sync all pending active queries at the end of
* command buffer.
......
......@@ -2704,9 +2704,9 @@ radv_get_preamble_cs(struct radv_queue *queue,
queue->device->physical_device->rad_info.chip_class >= GFX7,
(queue->queue_family_index == RADV_QUEUE_COMPUTE ? RADV_CMD_FLAG_CS_PARTIAL_FLUSH : (RADV_CMD_FLAG_CS_PARTIAL_FLUSH | RADV_CMD_FLAG_PS_PARTIAL_FLUSH)) |
RADV_CMD_FLAG_INV_ICACHE |
RADV_CMD_FLAG_INV_SMEM_L1 |
RADV_CMD_FLAG_INV_VMEM_L1 |
RADV_CMD_FLAG_INV_GLOBAL_L2 |
RADV_CMD_FLAG_INV_SCACHE |
RADV_CMD_FLAG_INV_VCACHE |
RADV_CMD_FLAG_INV_L2 |
RADV_CMD_FLAG_START_PIPELINE_STATS, 0);
} else if (i == 1) {
si_cs_emit_cache_flush(cs,
......@@ -2715,9 +2715,9 @@ radv_get_preamble_cs(struct radv_queue *queue,
queue->queue_family_index == RING_COMPUTE &&
queue->device->physical_device->rad_info.chip_class >= GFX7,
RADV_CMD_FLAG_INV_ICACHE |
RADV_CMD_FLAG_INV_SMEM_L1 |
RADV_CMD_FLAG_INV_VMEM_L1 |
RADV_CMD_FLAG_INV_GLOBAL_L2 |
RADV_CMD_FLAG_INV_SCACHE |
RADV_CMD_FLAG_INV_VCACHE |
RADV_CMD_FLAG_INV_L2 |
RADV_CMD_FLAG_START_PIPELINE_STATS, 0);
}
......
......@@ -415,8 +415,8 @@ uint32_t radv_fill_buffer(struct radv_cmd_buffer *cmd_buffer,
if (size >= RADV_BUFFER_OPS_CS_THRESHOLD) {
fill_buffer_shader(cmd_buffer, bo, offset, size, value);
flush_bits = RADV_CMD_FLAG_CS_PARTIAL_FLUSH |
RADV_CMD_FLAG_INV_VMEM_L1 |
RADV_CMD_FLAG_WRITEBACK_GLOBAL_L2;
RADV_CMD_FLAG_INV_VCACHE |
RADV_CMD_FLAG_WB_L2;
} else if (size) {
uint64_t va = radv_buffer_get_va(bo);
va += offset;
......
......@@ -870,8 +870,8 @@ clear_htile_mask(struct radv_cmd_buffer *cmd_buffer,
radv_meta_restore(&saved_state, cmd_buffer);
return RADV_CMD_FLAG_CS_PARTIAL_FLUSH |
RADV_CMD_FLAG_INV_VMEM_L1 |
RADV_CMD_FLAG_WRITEBACK_GLOBAL_L2;
RADV_CMD_FLAG_INV_VCACHE |
RADV_CMD_FLAG_WB_L2;
}
static uint32_t
......
......@@ -873,7 +873,7 @@ radv_decompress_dcc_compute(struct radv_cmd_buffer *cmd_buffer,
radv_meta_restore(&saved_state, cmd_buffer);
state->flush_bits |= RADV_CMD_FLAG_CS_PARTIAL_FLUSH |
RADV_CMD_FLAG_INV_VMEM_L1;
RADV_CMD_FLAG_INV_VCACHE;
/* Initialize the DCC metadata as "fully expanded". */
......
......@@ -169,7 +169,7 @@ radv_expand_fmask_image_inplace(struct radv_cmd_buffer *cmd_buffer,
radv_meta_restore(&saved_state, cmd_buffer);
cmd_buffer->state.flush_bits |= RADV_CMD_FLAG_CS_PARTIAL_FLUSH |
RADV_CMD_FLAG_INV_GLOBAL_L2;
RADV_CMD_FLAG_INV_L2;
/* Re-initialize FMASK in fully expanded mode. */
radv_initialize_fmask(cmd_buffer, image, subresourceRange);
......
......@@ -952,7 +952,7 @@ radv_cmd_buffer_resolve_subpass_cs(struct radv_cmd_buffer *cmd_buffer)
}
cmd_buffer->state.flush_bits |= RADV_CMD_FLAG_CS_PARTIAL_FLUSH |
RADV_CMD_FLAG_INV_VMEM_L1;
RADV_CMD_FLAG_INV_VCACHE;
}
void
......@@ -1037,7 +1037,7 @@ radv_depth_stencil_resolve_subpass_cs(struct radv_cmd_buffer *cmd_buffer,
}
cmd_buffer->state.flush_bits |= RADV_CMD_FLAG_CS_PARTIAL_FLUSH |
RADV_CMD_FLAG_INV_VMEM_L1;
RADV_CMD_FLAG_INV_VCACHE;
if (radv_image_has_htile(dst_image)) {
if (aspects == VK_IMAGE_ASPECT_DEPTH_BIT) {
......
......@@ -914,29 +914,33 @@ enum radv_cmd_dirty_bits {
};
enum radv_cmd_flush_bits {
RADV_CMD_FLAG_INV_ICACHE = 1 << 0,
/* SMEM L1, other names: KCACHE, constant cache, DCACHE, data cache */
RADV_CMD_FLAG_INV_SMEM_L1 = 1 << 1,
/* VMEM L1 can optionally be bypassed (GLC=1). Other names: TC L1 */
RADV_CMD_FLAG_INV_VMEM_L1 = 1 << 2,
/* Used by everything except CB/DB, can be bypassed (SLC=1). Other names: TC L2 */
RADV_CMD_FLAG_INV_GLOBAL_L2 = 1 << 3,
/* Same as above, but only writes back and doesn't invalidate */
RADV_CMD_FLAG_WRITEBACK_GLOBAL_L2 = 1 << 4,
/* Instruction cache. */
RADV_CMD_FLAG_INV_ICACHE = 1 << 0,
/* Scalar L1 cache. */
RADV_CMD_FLAG_INV_SCACHE = 1 << 1,
/* Vector L1 cache. */
RADV_CMD_FLAG_INV_VCACHE = 1 << 2,
/* L2 cache + L2 metadata cache writeback & invalidate.
* GFX6-8: Used by shaders only. GFX9-10: Used by everything. */
RADV_CMD_FLAG_INV_L2 = 1 << 3,
/* L2 writeback (write dirty L2 lines to memory for non-L2 clients).
* Only used for coherency with non-L2 clients like CB, DB, CP on GFX6-8.
* GFX6-7 will do complete invalidation, because the writeback is unsupported. */
RADV_CMD_FLAG_WB_L2 = 1 << 4,
/* Framebuffer caches */
RADV_CMD_FLAG_FLUSH_AND_INV_CB_META = 1 << 5,
RADV_CMD_FLAG_FLUSH_AND_INV_DB_META = 1 << 6,
RADV_CMD_FLAG_FLUSH_AND_INV_DB = 1 << 7,
RADV_CMD_FLAG_FLUSH_AND_INV_CB = 1 << 8,
RADV_CMD_FLAG_FLUSH_AND_INV_CB_META = 1 << 5,
RADV_CMD_FLAG_FLUSH_AND_INV_DB_META = 1 << 6,
RADV_CMD_FLAG_FLUSH_AND_INV_DB = 1 << 7,
RADV_CMD_FLAG_FLUSH_AND_INV_CB = 1 << 8,
/* Engine synchronization. */
RADV_CMD_FLAG_VS_PARTIAL_FLUSH = 1 << 9,
RADV_CMD_FLAG_PS_PARTIAL_FLUSH = 1 << 10,
RADV_CMD_FLAG_CS_PARTIAL_FLUSH = 1 << 11,
RADV_CMD_FLAG_VGT_FLUSH = 1 << 12,
RADV_CMD_FLAG_VS_PARTIAL_FLUSH = 1 << 9,
RADV_CMD_FLAG_PS_PARTIAL_FLUSH = 1 << 10,
RADV_CMD_FLAG_CS_PARTIAL_FLUSH = 1 << 11,
RADV_CMD_FLAG_VGT_FLUSH = 1 << 12,
/* Pipeline query controls. */
RADV_CMD_FLAG_START_PIPELINE_STATS = 1 << 13,
RADV_CMD_FLAG_STOP_PIPELINE_STATS = 1 << 14,
RADV_CMD_FLAG_VGT_STREAMOUT_SYNC = 1 << 15,
RADV_CMD_FLAG_START_PIPELINE_STATS = 1 << 13,
RADV_CMD_FLAG_STOP_PIPELINE_STATS = 1 << 14,
RADV_CMD_FLAG_VGT_STREAMOUT_SYNC = 1 << 15,
RADV_CMD_FLUSH_AND_INV_FRAMEBUFFER = (RADV_CMD_FLAG_FLUSH_AND_INV_CB |
RADV_CMD_FLAG_FLUSH_AND_INV_CB_META |
......
......@@ -1012,8 +1012,8 @@ static void radv_query_shader(struct radv_cmd_buffer *cmd_buffer,
VK_SHADER_STAGE_COMPUTE_BIT, 0, sizeof(push_constants),
&push_constants);
cmd_buffer->state.flush_bits |= RADV_CMD_FLAG_INV_GLOBAL_L2 |
RADV_CMD_FLAG_INV_VMEM_L1;
cmd_buffer->state.flush_bits |= RADV_CMD_FLAG_INV_L2 |
RADV_CMD_FLAG_INV_VCACHE;
if (flags & VK_QUERY_RESULT_WAIT_BIT)
cmd_buffer->state.flush_bits |= RADV_CMD_FLUSH_AND_INV_FRAMEBUFFER;
......@@ -1639,8 +1639,8 @@ static void emit_end_query(struct radv_cmd_buffer *cmd_buffer,
cmd_buffer->active_query_flush_bits |= RADV_CMD_FLAG_PS_PARTIAL_FLUSH |
RADV_CMD_FLAG_CS_PARTIAL_FLUSH |
RADV_CMD_FLAG_INV_GLOBAL_L2 |
RADV_CMD_FLAG_INV_VMEM_L1;
RADV_CMD_FLAG_INV_L2 |
RADV_CMD_FLAG_INV_VCACHE;
if (cmd_buffer->device->physical_device->rad_info.chip_class >= GFX9) {
cmd_buffer->active_query_flush_bits |= RADV_CMD_FLAG_FLUSH_AND_INV_CB |
RADV_CMD_FLAG_FLUSH_AND_INV_DB;
......
......@@ -781,7 +781,7 @@ si_cs_emit_cache_flush(struct radeon_cmdbuf *cs,
if (flush_bits & RADV_CMD_FLAG_INV_ICACHE)
cp_coher_cntl |= S_0085F0_SH_ICACHE_ACTION_ENA(1);
if (flush_bits & RADV_CMD_FLAG_INV_SMEM_L1)
if (flush_bits & RADV_CMD_FLAG_INV_SCACHE)
cp_coher_cntl |= S_0085F0_SH_KCACHE_ACTION_ENA(1);
if (chip_class <= GFX8) {
......@@ -859,16 +859,16 @@ si_cs_emit_cache_flush(struct radeon_cmdbuf *cs,
EVENT_TC_MD_ACTION_ENA;
/* Ideally flush TC together with CB/DB. */
if (flush_bits & RADV_CMD_FLAG_INV_GLOBAL_L2) {
if (flush_bits & RADV_CMD_FLAG_INV_L2) {
/* Writeback and invalidate everything in L2 & L1. */
tc_flags = EVENT_TC_ACTION_ENA |
EVENT_TC_WB_ACTION_ENA;
/* Clear the flags. */
flush_bits &= ~(RADV_CMD_FLAG_INV_GLOBAL_L2 |
RADV_CMD_FLAG_WRITEBACK_GLOBAL_L2 |
RADV_CMD_FLAG_INV_VMEM_L1);
flush_bits &= ~(RADV_CMD_FLAG_INV_L2 |
RADV_CMD_FLAG_WB_L2 |
RADV_CMD_FLAG_INV_VCACHE);
}
assert(flush_cnt);
(*flush_cnt)++;
......@@ -898,16 +898,16 @@ si_cs_emit_cache_flush(struct radeon_cmdbuf *cs,
*/
if ((cp_coher_cntl ||
(flush_bits & (RADV_CMD_FLAG_CS_PARTIAL_FLUSH |
RADV_CMD_FLAG_INV_VMEM_L1 |
RADV_CMD_FLAG_INV_GLOBAL_L2 |
RADV_CMD_FLAG_WRITEBACK_GLOBAL_L2))) &&
RADV_CMD_FLAG_INV_VCACHE |
RADV_CMD_FLAG_INV_L2 |
RADV_CMD_FLAG_WB_L2))) &&
!is_mec) {
radeon_emit(cs, PKT3(PKT3_PFP_SYNC_ME, 0, 0));
radeon_emit(cs, 0);
}
if ((flush_bits & RADV_CMD_FLAG_INV_GLOBAL_L2) ||
(chip_class <= GFX7 && (flush_bits & RADV_CMD_FLAG_WRITEBACK_GLOBAL_L2))) {
if ((flush_bits & RADV_CMD_FLAG_INV_L2) ||
(chip_class <= GFX7 && (flush_bits & RADV_CMD_FLAG_WB_L2))) {
si_emit_acquire_mem(cs, is_mec, chip_class >= GFX9,
cp_coher_cntl |
S_0085F0_TC_ACTION_ENA(1) |
......@@ -915,7 +915,7 @@ si_cs_emit_cache_flush(struct radeon_cmdbuf *cs,
S_0301F0_TC_WB_ACTION_ENA(chip_class >= GFX8));
cp_coher_cntl = 0;
} else {
if(flush_bits & RADV_CMD_FLAG_WRITEBACK_GLOBAL_L2) {
if(flush_bits & RADV_CMD_FLAG_WB_L2) {
/* WB = write-back
* NC = apply to non-coherent MTYPEs
* (i.e. MTYPE <= 1, which is what we use everywhere)
......@@ -929,7 +929,7 @@ si_cs_emit_cache_flush(struct radeon_cmdbuf *cs,
S_0301F0_TC_NC_ACTION_ENA(1));
cp_coher_cntl = 0;
}
if (flush_bits & RADV_CMD_FLAG_INV_VMEM_L1) {
if (flush_bits & RADV_CMD_FLAG_INV_VCACHE) {
si_emit_acquire_mem(cs, is_mec,
chip_class >= GFX9,
cp_coher_cntl |
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment