Skip to content
Snippets Groups Projects
Commit 424b60dc authored by Lionel Landwerlin's avatar Lionel Landwerlin Committed by Juan A. Suárez
Browse files

anv: rework queries writes to ensure ordering memory writes


We use a mix of MI & PIPE_CONTROL commands to write our queries' data
(results & availability). Those commands' memory write order is not
guaranteed with regard to their order in the command stream, unless CS
stalls are inserted between them. This is problematic for 2 reasons :

   1. We copy results from the device using MI commands even though
      the values are generated from PIPE_CONTROL, meaning we could
      copy unlanded values into the results and then copy the
      availability that is inconsistent with the values.

   2. We allow the user to poll on the availability values of the
      query pool from the CPU. If the availability lands in memory
      before the values then we could return invalid values.

This change does 2 things to address this problem :

      - We use either PIPE_CONTROL or MI commands to write both
        queries values and availability, so that the ordering of the
        memory writes guarantees that if availability is visible,
        results are also visible.

      - For the occlusion & timestamp queries we apply a CS stall
        before copying the results on the device, to ensure copying
        with MI commands see the correct values of previous
        PIPE_CONTROL writes of availability (required by the Vulkan
        spec).

Signed-off-by: default avatarLionel Landwerlin <lionel.g.landwerlin@intel.com>
Reported-by: default avatarIago Toral Quiroga <itoral@igalia.com>
Cc: mesa-stable@lists.freedesktop.org
Reviewed-by: default avatarJason Ekstrand <jason@jlekstrand.net>
(cherry picked from commit a07d06f1)
parent 9d610c1c
No related branches found
No related tags found
Loading
......@@ -346,14 +346,23 @@ emit_ps_depth_count(struct anv_cmd_buffer *cmd_buffer,
}
static void
emit_query_availability(struct anv_cmd_buffer *cmd_buffer,
struct anv_address addr)
emit_query_mi_availability(struct gen_mi_builder *b,
struct anv_address addr,
bool available)
{
gen_mi_store(b, gen_mi_mem64(addr), gen_mi_imm(available));
}
static void
emit_query_pc_availability(struct anv_cmd_buffer *cmd_buffer,
struct anv_address addr,
bool available)
{
anv_batch_emit(&cmd_buffer->batch, GENX(PIPE_CONTROL), pc) {
pc.DestinationAddressType = DAT_PPGTT;
pc.PostSyncOperation = WriteImmediateData;
pc.Address = addr;
pc.ImmediateData = 1;
pc.ImmediateData = available;
}
}
......@@ -366,11 +375,39 @@ emit_zero_queries(struct anv_cmd_buffer *cmd_buffer,
struct gen_mi_builder *b, struct anv_query_pool *pool,
uint32_t first_index, uint32_t num_queries)
{
for (uint32_t i = 0; i < num_queries; i++) {
struct anv_address slot_addr =
anv_query_address(pool, first_index + i);
gen_mi_memset(b, anv_address_add(slot_addr, 8), 0, pool->stride - 8);
emit_query_availability(cmd_buffer, slot_addr);
switch (pool->type) {
case VK_QUERY_TYPE_OCCLUSION:
case VK_QUERY_TYPE_TIMESTAMP:
/* These queries are written with a PIPE_CONTROL so clear them using the
* PIPE_CONTROL as well so we don't have to synchronize between 2 types
* of operations.
*/
assert((pool->stride % 8) == 0);
for (uint32_t i = 0; i < num_queries; i++) {
struct anv_address slot_addr =
anv_query_address(pool, first_index + i);
for (uint32_t qword = 1; qword < (pool->stride / 8); qword++) {
emit_query_pc_availability(cmd_buffer,
anv_address_add(slot_addr, qword * 8),
false);
}
emit_query_pc_availability(cmd_buffer, slot_addr, true);
}
break;
case VK_QUERY_TYPE_PIPELINE_STATISTICS:
case VK_QUERY_TYPE_TRANSFORM_FEEDBACK_STREAM_EXT:
for (uint32_t i = 0; i < num_queries; i++) {
struct anv_address slot_addr =
anv_query_address(pool, first_index + i);
gen_mi_memset(b, anv_address_add(slot_addr, 8), 0, pool->stride - 8);
emit_query_mi_availability(b, slot_addr, true);
}
break;
default:
unreachable("Unsupported query type");
}
}
......@@ -383,11 +420,28 @@ void genX(CmdResetQueryPool)(
ANV_FROM_HANDLE(anv_cmd_buffer, cmd_buffer, commandBuffer);
ANV_FROM_HANDLE(anv_query_pool, pool, queryPool);
for (uint32_t i = 0; i < queryCount; i++) {
anv_batch_emit(&cmd_buffer->batch, GENX(MI_STORE_DATA_IMM), sdm) {
sdm.Address = anv_query_address(pool, firstQuery + i);
sdm.ImmediateData = 0;
switch (pool->type) {
case VK_QUERY_TYPE_OCCLUSION:
case VK_QUERY_TYPE_TIMESTAMP:
for (uint32_t i = 0; i < queryCount; i++) {
emit_query_pc_availability(cmd_buffer,
anv_query_address(pool, firstQuery + i),
false);
}
break;
case VK_QUERY_TYPE_PIPELINE_STATISTICS:
case VK_QUERY_TYPE_TRANSFORM_FEEDBACK_STREAM_EXT: {
struct gen_mi_builder b;
gen_mi_builder_init(&b, &cmd_buffer->batch);
for (uint32_t i = 0; i < queryCount; i++)
emit_query_mi_availability(&b, anv_query_address(pool, firstQuery + i), false);
break;
}
default:
unreachable("Unsupported query type");
}
}
......@@ -525,7 +579,7 @@ void genX(CmdEndQueryIndexedEXT)(
switch (pool->type) {
case VK_QUERY_TYPE_OCCLUSION:
emit_ps_depth_count(cmd_buffer, anv_address_add(query_addr, 16));
emit_query_availability(cmd_buffer, query_addr);
emit_query_pc_availability(cmd_buffer, query_addr, true);
break;
case VK_QUERY_TYPE_PIPELINE_STATISTICS: {
......@@ -543,7 +597,7 @@ void genX(CmdEndQueryIndexedEXT)(
offset += 16;
}
emit_query_availability(cmd_buffer, query_addr);
emit_query_mi_availability(&b, query_addr, true);
break;
}
......@@ -554,7 +608,7 @@ void genX(CmdEndQueryIndexedEXT)(
}
emit_xfb_query(&b, index, anv_address_add(query_addr, 16));
emit_query_availability(cmd_buffer, query_addr);
emit_query_mi_availability(&b, query_addr, true);
break;
default:
......@@ -613,7 +667,7 @@ void genX(CmdWriteTimestamp)(
break;
}
emit_query_availability(cmd_buffer, query_addr);
emit_query_pc_availability(cmd_buffer, query_addr, true);
/* When multiview is active the spec requires that N consecutive query
* indices are used, where N is the number of active views in the subpass.
......@@ -684,7 +738,20 @@ void genX(CmdCopyQueryPoolResults)(
}
if ((flags & VK_QUERY_RESULT_WAIT_BIT) ||
(cmd_buffer->state.pending_pipe_bits & ANV_PIPE_FLUSH_BITS)) {
(cmd_buffer->state.pending_pipe_bits & ANV_PIPE_FLUSH_BITS) ||
/* Occlusion & timestamp queries are written using a PIPE_CONTROL and
* because we're about to copy values from MI commands, we need to
* stall the command streamer to make sure the PIPE_CONTROL values have
* landed, otherwise we could see inconsistent values & availability.
*
* From the vulkan spec:
*
* "vkCmdCopyQueryPoolResults is guaranteed to see the effect of
* previous uses of vkCmdResetQueryPool in the same queue, without
* any additional synchronization."
*/
pool->type == VK_QUERY_TYPE_OCCLUSION ||
pool->type == VK_QUERY_TYPE_TIMESTAMP) {
cmd_buffer->state.pending_pipe_bits |= ANV_PIPE_CS_STALL_BIT;
genX(cmd_buffer_apply_pipe_flushes)(cmd_buffer);
}
......
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment