Commit d6fc72e2 authored by Mark Janes's avatar Mark Janes
Browse files

intel: support secondary command buffers in INTEL_MEASURE



When a secondary command buffer is encountered, insert an event that
links to the new batch.

This commit leaves intel_measure timestamp buffer objects mmapped,
which is more efficient than mapping/unmapping several times.  With
the BOs mapped at all times, timestamp buffers can be managed directly
by intel_measure, where it will iterate over timestamps of linked
secondary buffers.

With timestamp buffers managed by intel_measure, a more efficient and
accurate check for render completion can be moved into intel_measure
from anv/iris.
Reviewed-by: Lionel Landwerlin's avatarLionel Landwerlin <lionel.g.landwerlin@intel.com>
Acked-by: Kenneth Graunke's avatarKenneth Graunke <kenneth@whitecape.org>
Part-of: <!7354>
parent 9eacbfaf
......@@ -105,32 +105,25 @@ iris_init_batch_measure(struct iris_context *ice, struct iris_batch *batch)
memset(batch->measure, 0, batch_bytes);
struct iris_measure_batch *measure = batch->measure;
measure->bo =
iris_bo_alloc_tiled(bufmgr, "measure",
config->batch_size * sizeof(uint64_t),
1, /* alignment */
IRIS_MEMZONE_OTHER,
I915_TILING_NONE,
0, /* pitch */
BO_ALLOC_ZEROED);
measure->bo = iris_bo_alloc_tiled(bufmgr, "measure",
config->batch_size * sizeof(uint64_t),
1, /* alignment */
IRIS_MEMZONE_OTHER,
I915_TILING_NONE,
0, /* pitch */
BO_ALLOC_ZEROED);
measure->base.timestamps = iris_bo_map(NULL, measure->bo, MAP_READ);
measure->base.framebuffer =
(uintptr_t)util_hash_crc32(&ice->state.framebuffer,
sizeof(ice->state.framebuffer));
}
static bool
iris_measure_ready(struct iris_measure_batch *measure)
{
return !iris_bo_busy(measure->bo);
}
void
iris_destroy_batch_measure(struct iris_measure_batch *batch)
{
if (!batch)
return;
iris_bo_unmap(batch->bo);
iris_bo_unreference(batch->bo);
batch->bo = NULL;
free(batch);
......@@ -347,21 +340,8 @@ iris_measure_gather(struct iris_context *ice)
list_first_entry(&measure_device->queued_snapshots,
struct iris_measure_batch, link);
if (!iris_measure_ready(measure)) {
/* command buffer has begun execution on the gpu, but has not
* completed.
*/
break;
}
/* iris_bo_wait returns immediately if the batch has been submitted but
* not started execution. The first timestamp will be non-zero if the
* buffer object is ready.
*/
uint64_t *map = iris_bo_map(NULL, measure->bo, MAP_READ);
if (map[0] == 0) {
/* The command buffer has not begun execution on the gpu. */
iris_bo_unmap(measure->bo);
if (!intel_measure_ready(&measure->base)) {
/* batch has not completed execution */
break;
}
......@@ -369,9 +349,9 @@ iris_measure_gather(struct iris_context *ice)
assert(measure->bo);
assert(measure->base.index % 2 == 0);
intel_measure_push_result(measure_device, &measure->base, map);
intel_measure_push_result(measure_device, &measure->base);
iris_bo_unmap(measure->bo);
/* iris_bo_unmap(measure->bo); */
measure->base.index = 0;
measure->base.frame = 0;
iris_destroy_batch_measure(measure);
......
......@@ -233,6 +233,7 @@ intel_measure_snapshot_string(enum intel_measure_snapshot_type type)
[INTEL_SNAPSHOT_MCS_PARTIAL_RESOLVE] = "mcs partial resolve",
[INTEL_SNAPSHOT_SLOW_COLOR_CLEAR] = "slow color clear",
[INTEL_SNAPSHOT_SLOW_DEPTH_CLEAR] = "slow depth clear",
[INTEL_SNAPSHOT_SECONDARY_BATCH] = "secondary command buffer",
[INTEL_SNAPSHOT_END] = "end",
};
assert(type < ARRAY_SIZE(names));
......@@ -368,6 +369,19 @@ raw_timestamp_delta(uint64_t time0, uint64_t time1)
}
}
/**
* Verify that rendering has completed for the batch
*
* Rendering is complete when the last timestamp has been written.
*/
bool
intel_measure_ready(struct intel_measure_batch *batch)
{
assert(batch->timestamps);
assert(batch->index > 1);
return (batch->timestamps[batch->index - 1] != 0);
}
/**
* Submit completed snapshots for buffering.
*
......@@ -377,16 +391,27 @@ raw_timestamp_delta(uint64_t time0, uint64_t time1)
*/
void
intel_measure_push_result(struct intel_measure_device *device,
struct intel_measure_batch *batch,
uint64_t *timestamps)
struct intel_measure_batch *batch)
{
struct intel_measure_ringbuffer *rb = device->ringbuffer;
uint64_t *timestamps = batch->timestamps;
assert(timestamps != NULL);
assert(timestamps[0] != 0);
for (int i = 0; i < batch->index; i += 2) {
const struct intel_measure_snapshot *begin = &batch->snapshots[i];
const struct intel_measure_snapshot *end = &batch->snapshots[i+1];
assert (end->type == INTEL_SNAPSHOT_END);
if (begin->type == INTEL_SNAPSHOT_SECONDARY_BATCH) {
assert(begin->secondary != NULL);
begin->secondary->batch_count = batch->batch_count;
intel_measure_push_result(device, begin->secondary);
continue;
}
const uint64_t prev_end_ts = rb->results[rb->head].end_ts;
/* advance ring buffer */
......@@ -402,7 +427,7 @@ intel_measure_push_result(struct intel_measure_device *device,
config.buffer_size);
warned = true;
}
return;
break;
}
struct intel_measure_buffered_result *buffered_result =
......
......@@ -48,6 +48,7 @@ enum intel_measure_snapshot_type {
INTEL_SNAPSHOT_MCS_PARTIAL_RESOLVE,
INTEL_SNAPSHOT_SLOW_COLOR_CLEAR,
INTEL_SNAPSHOT_SLOW_DEPTH_CLEAR,
INTEL_SNAPSHOT_SECONDARY_BATCH,
INTEL_SNAPSHOT_END,
};
......@@ -101,11 +102,15 @@ struct intel_measure_config {
bool enabled;
};
struct intel_measure_batch;
struct intel_measure_snapshot {
enum intel_measure_snapshot_type type;
unsigned count, event_count;
const char* event_name;
uintptr_t framebuffer, vs, tcs, tes, gs, fs, cs;
/* for vulkan secondary command buffers */
struct intel_measure_batch *secondary;
};
struct intel_measure_buffered_result {
......@@ -139,6 +144,7 @@ struct intel_measure_batch {
unsigned index;
unsigned frame, batch_count, event_count;
uintptr_t framebuffer;
uint64_t *timestamps;
struct intel_measure_snapshot snapshots[0];
};
......@@ -148,9 +154,11 @@ bool intel_measure_state_changed(const struct intel_measure_batch *batch,
uintptr_t vs, uintptr_t tcs, uintptr_t tes,
uintptr_t gs, uintptr_t fs, uintptr_t cs);
void intel_measure_frame_transition(unsigned frame);
bool intel_measure_ready(struct intel_measure_batch *batch);
void intel_measure_push_result(struct intel_measure_device *device,
struct intel_measure_batch *batch,
uint64_t *timestamps);
struct intel_measure_batch *batch);
struct gen_device_info;
void intel_measure_print(struct intel_measure_device *device,
......
......@@ -28,6 +28,7 @@
#include <fcntl.h>
#include "anv_private.h"
#include "anv_measure.h"
#include "genxml/gen8_pack.h"
#include "genxml/genX_bits.h"
......@@ -1013,6 +1014,7 @@ void
anv_cmd_buffer_add_secondary(struct anv_cmd_buffer *primary,
struct anv_cmd_buffer *secondary)
{
anv_measure_add_secondary(primary, secondary);
switch (secondary->exec_mode) {
case ANV_CMD_BUFFER_EXEC_MODE_EMIT:
anv_batch_emit_batch(&primary->batch, &secondary->batch);
......
......@@ -124,17 +124,13 @@ anv_measure_init(struct anv_cmd_buffer *cmd_buffer)
config->batch_size * sizeof(uint64_t),
ANV_BO_ALLOC_MAPPED,
0,
&measure->bo);
(struct anv_bo**)&measure->bo);
measure->base.timestamps = measure->bo->map;
assert(result == VK_SUCCESS);
cmd_buffer->measure = measure;
}
static bool
anv_measure_ready(struct anv_device *device,
struct anv_measure_batch *measure)
{
/* anv_device_bo_busy returns VK_NOT_READY if the bo is busy */
return(VK_SUCCESS == anv_device_bo_busy(device, measure->bo));
list_inithead(&measure->link);
cmd_buffer->measure = measure;
}
/**
......@@ -154,32 +150,18 @@ anv_measure_gather(struct anv_device *device)
list_first_entry(&measure_device->queued_snapshots,
struct anv_measure_batch, link);
if (!anv_measure_ready(device, measure)) {
if (!intel_measure_ready(&measure->base)) {
/* command buffer has begun execution on the gpu, but has not
* completed.
*/
break;
}
uint64_t *map = anv_gem_mmap(device, measure->bo->gem_handle, 0,
measure->base.index * sizeof(uint64_t), 0);
if (map[0] == 0) {
/* The first timestamp is still zero. The Command buffer has not
* begun execution on the gpu. It was recently submitted, perhaps by
* another thread.
*/
anv_gem_munmap(device, map, measure->base.index * sizeof(uint64_t));
break;
}
list_del(&measure->link);
assert(measure->bo);
assert(measure->base.index % 2 == 0);
intel_measure_push_result(measure_device, &measure->base, map);
intel_measure_push_result(measure_device, &measure->base);
anv_gem_munmap(device, map, measure->base.index * sizeof(uint64_t));
measure->base.index = 0;
measure->base.frame = 0;
}
......@@ -209,6 +191,11 @@ anv_measure_start_snapshot(struct anv_cmd_buffer *cmd_buffer,
uintptr_t framebuffer = (uintptr_t)cmd_buffer->state.framebuffer;
if (!measure->base.framebuffer &&
cmd_buffer->level == VK_COMMAND_BUFFER_LEVEL_SECONDARY)
/* secondary command buffer inherited the framebuffer from the primary */
measure->base.framebuffer = framebuffer;
/* verify framebuffer has been properly tracked */
assert(type == INTEL_SNAPSHOT_END ||
framebuffer == measure->base.framebuffer ||
......@@ -266,6 +253,11 @@ state_changed(struct anv_cmd_buffer *cmd_buffer,
enum intel_measure_snapshot_type type)
{
uintptr_t vs=0, tcs=0, tes=0, gs=0, fs=0, cs=0;
if (cmd_buffer->usage_flags & VK_COMMAND_BUFFER_USAGE_SIMULTANEOUS_USE_BIT)
/* can't record timestamps in this mode */
return false;
if (type == INTEL_SNAPSHOT_COMPUTE) {
const struct anv_compute_pipeline *cs_pipe =
cmd_buffer->state.compute.pipeline;
......@@ -375,6 +367,7 @@ anv_measure_reset(struct anv_cmd_buffer *cmd_buffer)
measure->base.framebuffer = 0;
measure->base.frame = 0;
measure->base.event_count = 0;
list_inithead(&measure->link);
anv_device_release_bo(device, measure->bo);
VkResult result =
......@@ -382,7 +375,8 @@ anv_measure_reset(struct anv_cmd_buffer *cmd_buffer)
config->batch_size * sizeof(uint64_t),
ANV_BO_ALLOC_MAPPED,
0,
&measure->bo);
(struct anv_bo**)&measure->bo);
measure->base.timestamps = measure->bo->map;
assert(result == VK_SUCCESS);
}
......@@ -526,3 +520,36 @@ _anv_measure_beginrenderpass(struct anv_cmd_buffer *cmd_buffer)
measure->base.framebuffer = (uintptr_t) cmd_buffer->state.framebuffer;
}
void
_anv_measure_add_secondary(struct anv_cmd_buffer *primary,
struct anv_cmd_buffer *secondary)
{
struct intel_measure_config *config = config_from_command_buffer(primary);
struct anv_measure_batch *measure = primary->measure;
if (!config)
return;
if (measure == NULL)
return;
if (config->flags & (INTEL_MEASURE_BATCH | INTEL_MEASURE_FRAME))
/* secondary timing will be contained within the primary */
return;
if (secondary->usage_flags & VK_COMMAND_BUFFER_USAGE_SIMULTANEOUS_USE_BIT) {
static bool warned = false;
if (unlikely(!warned)) {
fprintf(config->file,
"WARNING: INTEL_MEASURE cannot capture timings of commands "
"in secondary command buffers with "
"VK_COMMAND_BUFFER_USAGE_SIMULTANEOUS_USE_BIT set.\n");
}
return;
}
if (measure->base.index % 2 == 1)
anv_measure_end_snapshot(primary, measure->base.event_count);
struct intel_measure_snapshot *snapshot = &(measure->base.snapshots[measure->base.index]);
_anv_measure_snapshot(primary, INTEL_SNAPSHOT_SECONDARY_BATCH, NULL, 0);
snapshot->secondary = &secondary->measure->base;
}
......@@ -51,6 +51,10 @@ void anv_measure_acquire(struct anv_device *device);
/* should be combined with endcommandbuffer */
void _anv_measure_submit(struct anv_cmd_buffer *cmd_buffer);
void
_anv_measure_add_secondary(struct anv_cmd_buffer *primary,
struct anv_cmd_buffer *secondary);
#define anv_measure_snapshot(cmd_buffer, type, event_name, count) \
if (unlikely(cmd_buffer->measure)) \
_anv_measure_snapshot(cmd_buffer, type, event_name, count)
......@@ -67,5 +71,8 @@ void _anv_measure_submit(struct anv_cmd_buffer *cmd_buffer);
if (unlikely(cmd_buffer->measure)) \
_anv_measure_submit(cmd_buffer)
#define anv_measure_add_secondary(primary, secondary) \
if (unlikely(primary->measure)) \
_anv_measure_add_secondary(primary, secondary)
#endif /* ANV_MEASURE_H */
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment