Commit 74f6cdf2 authored by Lionel Landwerlin's avatar Lionel Landwerlin
Browse files

anv: implement VK_KHR_synchronization2


Signed-off-by: Lionel Landwerlin's avatarLionel Landwerlin <lionel.g.landwerlin@intel.com>
parent 9c562c43
Pipeline #270809 waiting for manual action with stages
in 9 seconds
......@@ -248,6 +248,7 @@ get_device_extensions(const struct anv_physical_device *device,
.KHR_swapchain = true,
.KHR_swapchain_mutable_format = true,
#endif
.KHR_synchronization2 = true,
.KHR_timeline_semaphore = true,
.KHR_uniform_buffer_standard_layout = true,
.KHR_variable_pointers = true,
......@@ -1624,6 +1625,13 @@ void anv_GetPhysicalDeviceFeatures2(
break;
}
case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_SYNCHRONIZATION_2_FEATURES_KHR: {
VkPhysicalDeviceSynchronization2FeaturesKHR *features =
(VkPhysicalDeviceSynchronization2FeaturesKHR *)ext;
features->synchronization2 = true;
break;
}
case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_TIMELINE_SEMAPHORE_FEATURES_KHR: {
VkPhysicalDeviceTimelineSemaphoreFeaturesKHR *features =
(VkPhysicalDeviceTimelineSemaphoreFeaturesKHR *) ext;
......
......@@ -112,6 +112,7 @@ EXTENSIONS = [
Extension('VK_KHR_surface_protected_capabilities', 1, 'ANV_HAS_SURFACE'),
Extension('VK_KHR_swapchain', 70, 'ANV_HAS_SURFACE'),
Extension('VK_KHR_swapchain_mutable_format', 1, 'ANV_HAS_SURFACE'),
Extension('VK_KHR_synchronization2', 1, True),
Extension('VK_KHR_timeline_semaphore', 2, True),
Extension('VK_KHR_uniform_buffer_standard_layout', 1, True),
Extension('VK_KHR_variable_pointers', 1, True),
......
......@@ -1314,6 +1314,19 @@ vk_image_layout_to_usage_flags(VkImageLayout layout,
assert(aspect == VK_IMAGE_ASPECT_COLOR_BIT);
return vk_image_layout_to_usage_flags(VK_IMAGE_LAYOUT_GENERAL, aspect);
case VK_IMAGE_LAYOUT_READ_ONLY_OPTIMAL_KHR:
return VK_IMAGE_USAGE_SAMPLED_BIT |
VK_IMAGE_USAGE_INPUT_ATTACHMENT_BIT;
case VK_IMAGE_LAYOUT_ATTACHMENT_OPTIMAL_KHR:
if (aspect == VK_IMAGE_ASPECT_DEPTH_BIT ||
aspect == VK_IMAGE_ASPECT_STENCIL_BIT) {
return VK_IMAGE_USAGE_DEPTH_STENCIL_ATTACHMENT_BIT;
} else {
assert(aspect == VK_IMAGE_ASPECT_COLOR_BIT);
return VK_IMAGE_USAGE_COLOR_ATTACHMENT_BIT;
}
case VK_IMAGE_LAYOUT_SHADING_RATE_OPTIMAL_NV:
assert(aspect == VK_IMAGE_ASPECT_COLOR_BIT);
return VK_IMAGE_USAGE_SHADING_RATE_IMAGE_BIT_NV;
......@@ -1347,6 +1360,7 @@ vk_image_layout_is_read_only(VkImageLayout layout,
case VK_IMAGE_LAYOUT_SHARED_PRESENT_KHR:
case VK_IMAGE_LAYOUT_DEPTH_ATTACHMENT_OPTIMAL:
case VK_IMAGE_LAYOUT_STENCIL_ATTACHMENT_OPTIMAL:
case VK_IMAGE_LAYOUT_ATTACHMENT_OPTIMAL_KHR:
return false;
case VK_IMAGE_LAYOUT_DEPTH_STENCIL_READ_ONLY_OPTIMAL:
......@@ -1357,6 +1371,7 @@ vk_image_layout_is_read_only(VkImageLayout layout,
case VK_IMAGE_LAYOUT_FRAGMENT_DENSITY_MAP_OPTIMAL_EXT:
case VK_IMAGE_LAYOUT_DEPTH_READ_ONLY_OPTIMAL:
case VK_IMAGE_LAYOUT_STENCIL_READ_ONLY_OPTIMAL:
case VK_IMAGE_LAYOUT_READ_ONLY_OPTIMAL_KHR:
return true;
case VK_IMAGE_LAYOUT_DEPTH_READ_ONLY_STENCIL_ATTACHMENT_OPTIMAL:
......
......@@ -2440,11 +2440,11 @@ enum anv_pipe_bits {
enum anv_pipe_bits
anv_pipe_flush_bits_for_access_flags(struct anv_device *device,
VkAccessFlags flags);
VkAccessFlags2KHR flags);
enum anv_pipe_bits
anv_pipe_invalidate_bits_for_access_flags(struct anv_device *device,
VkAccessFlags flags);
VkAccessFlags2KHR flags);
#define VK_IMAGE_ASPECT_ANY_COLOR_BIT_ANV ( \
VK_IMAGE_ASPECT_COLOR_BIT | \
......
......@@ -1180,11 +1180,9 @@ anv_post_queue_fence_update(struct anv_device *device, VkFence _fence)
static VkResult
anv_queue_submit(struct anv_queue *queue,
struct anv_cmd_buffer *cmd_buffer,
const VkSemaphore *in_semaphores,
const uint64_t *in_values,
const VkSemaphoreSubmitInfoKHR *in_semaphores,
uint32_t num_in_semaphores,
const VkSemaphore *out_semaphores,
const uint64_t *out_values,
const VkSemaphoreSubmitInfoKHR *out_semaphores,
uint32_t num_out_semaphores,
struct anv_bo *wsi_signal_bo,
VkFence fence,
......@@ -1201,16 +1199,16 @@ anv_queue_submit(struct anv_queue *queue,
VkResult result = VK_SUCCESS;
for (uint32_t i = 0; i < num_in_semaphores; i++) {
result = anv_queue_submit_add_in_semaphore(submit, device,
in_semaphores[i],
in_values ? in_values[i] : 0);
in_semaphores[i].semaphore,
in_semaphores[i].value);
if (result != VK_SUCCESS)
goto error;
}
for (uint32_t i = 0; i < num_out_semaphores; i++) {
result = anv_queue_submit_add_out_semaphore(submit, device,
out_semaphores[i],
out_values ? out_values[i] : 0);
out_semaphores[i].semaphore,
out_semaphores[i].value);
if (result != VK_SUCCESS)
goto error;
}
......@@ -1238,10 +1236,10 @@ anv_queue_submit(struct anv_queue *queue,
return result;
}
VkResult anv_QueueSubmit(
VkResult anv_QueueSubmit2KHR(
VkQueue _queue,
uint32_t submitCount,
const VkSubmitInfo* pSubmits,
const VkSubmitInfo2KHR* pSubmits,
VkFence fence)
{
ANV_FROM_HANDLE(anv_queue, queue, _queue);
......@@ -1266,8 +1264,11 @@ VkResult anv_QueueSubmit(
* come up with something more efficient but this shouldn't be a
* common case.
*/
result = anv_queue_submit(queue, NULL, NULL, NULL, 0, NULL, NULL, 0,
NULL, fence, -1);
result = anv_queue_submit(queue, NULL /* cmd_buffer */,
NULL /* in_semaphores */, 0,
NULL /* out_semaphores */, 0,
NULL /* wsi_signal_bo */,
fence, -1);
goto out;
}
......@@ -1282,32 +1283,21 @@ VkResult anv_QueueSubmit(
mem_signal_info && mem_signal_info->memory != VK_NULL_HANDLE ?
anv_device_memory_from_handle(mem_signal_info->memory)->bo : NULL;
const VkTimelineSemaphoreSubmitInfoKHR *timeline_info =
vk_find_struct_const(pSubmits[i].pNext,
TIMELINE_SEMAPHORE_SUBMIT_INFO_KHR);
const VkPerformanceQuerySubmitInfoKHR *perf_info =
vk_find_struct_const(pSubmits[i].pNext,
PERFORMANCE_QUERY_SUBMIT_INFO_KHR);
const uint64_t *wait_values =
timeline_info && timeline_info->waitSemaphoreValueCount ?
timeline_info->pWaitSemaphoreValues : NULL;
const uint64_t *signal_values =
timeline_info && timeline_info->signalSemaphoreValueCount ?
timeline_info->pSignalSemaphoreValues : NULL;
if (pSubmits[i].commandBufferCount == 0) {
if (pSubmits[i].commandBufferInfoCount == 0) {
/* If we don't have any command buffers, we need to submit a dummy
* batch to give GEM something to wait on. We could, potentially,
* come up with something more efficient but this shouldn't be a
* common case.
*/
result = anv_queue_submit(queue, NULL,
pSubmits[i].pWaitSemaphores,
wait_values,
pSubmits[i].waitSemaphoreCount,
pSubmits[i].pSignalSemaphores,
signal_values,
pSubmits[i].signalSemaphoreCount,
pSubmits[i].pWaitSemaphoreInfos,
pSubmits[i].waitSemaphoreInfoCount,
pSubmits[i].pSignalSemaphoreInfos,
pSubmits[i].signalSemaphoreInfoCount,
wsi_signal_bo,
submit_fence,
-1);
......@@ -1317,41 +1307,35 @@ VkResult anv_QueueSubmit(
continue;
}
for (uint32_t j = 0; j < pSubmits[i].commandBufferCount; j++) {
for (uint32_t j = 0; j < pSubmits[i].commandBufferInfoCount; j++) {
ANV_FROM_HANDLE(anv_cmd_buffer, cmd_buffer,
pSubmits[i].pCommandBuffers[j]);
pSubmits[i].pCommandBufferInfos[j].commandBuffer);
assert(cmd_buffer->level == VK_COMMAND_BUFFER_LEVEL_PRIMARY);
assert(!anv_batch_has_error(&cmd_buffer->batch));
anv_measure_submit(cmd_buffer);
/* Fence for this execbuf. NULL for all but the last one */
VkFence execbuf_fence =
(j == pSubmits[i].commandBufferCount - 1) ?
(j == pSubmits[i].commandBufferInfoCount - 1) ?
submit_fence : VK_NULL_HANDLE;
const VkSemaphore *in_semaphores = NULL, *out_semaphores = NULL;
const uint64_t *in_values = NULL, *out_values = NULL;
const VkSemaphoreSubmitInfoKHR *in_semaphores = NULL, *out_semaphores = NULL;
uint32_t num_in_semaphores = 0, num_out_semaphores = 0;
if (j == 0) {
/* Only the first batch gets the in semaphores */
in_semaphores = pSubmits[i].pWaitSemaphores;
in_values = wait_values;
num_in_semaphores = pSubmits[i].waitSemaphoreCount;
in_semaphores = pSubmits[i].pWaitSemaphoreInfos;
num_in_semaphores = pSubmits[i].waitSemaphoreInfoCount;
}
const bool is_last_cmd_buffer = j == pSubmits[i].commandBufferCount - 1;
if (is_last_cmd_buffer) {
if (j == pSubmits[i].commandBufferInfoCount - 1) {
/* Only the last batch gets the out semaphores */
out_semaphores = pSubmits[i].pSignalSemaphores;
out_values = signal_values;
num_out_semaphores = pSubmits[i].signalSemaphoreCount;
out_semaphores = pSubmits[i].pSignalSemaphoreInfos;
num_out_semaphores = pSubmits[i].signalSemaphoreInfoCount;
}
result = anv_queue_submit(queue, cmd_buffer,
in_semaphores, in_values, num_in_semaphores,
out_semaphores, out_values, num_out_semaphores,
is_last_cmd_buffer ? wsi_signal_bo : NULL,
execbuf_fence,
in_semaphores, num_in_semaphores,
out_semaphores, num_out_semaphores,
wsi_signal_bo, execbuf_fence,
perf_info ? perf_info->counterPassIndex : 0);
if (result != VK_SUCCESS)
goto out;
......@@ -1376,7 +1360,7 @@ out:
* anv_device_set_lost() would have been called already by a callee of
* anv_queue_submit().
*/
result = anv_device_set_lost(queue->device, "vkQueueSubmit() failed");
result = anv_device_set_lost(queue->device, "vkQueueSubmit2KHR() failed");
}
return result;
......
......@@ -116,35 +116,36 @@ __vk_errorf(struct anv_instance *instance,
enum anv_pipe_bits
anv_pipe_flush_bits_for_access_flags(struct anv_device *device,
VkAccessFlags flags)
VkAccessFlags2KHR flags)
{
enum anv_pipe_bits pipe_bits = 0;
unsigned b;
for_each_bit(b, flags) {
switch ((VkAccessFlagBits)(1 << b)) {
case VK_ACCESS_SHADER_WRITE_BIT:
switch ((VkAccessFlags2KHR)(1 << b)) {
case MESA_VK_ACCESS_2_SHADER_WRITE_BIT_KHR:
case MESA_VK_ACCESS_2_SHADER_STORAGE_WRITE_BIT_KHR:
/* We're transitioning a buffer that was previously used as write
* destination through the data port. To make its content available
* to future operations, flush the data cache.
*/
pipe_bits |= ANV_PIPE_DATA_CACHE_FLUSH_BIT;
break;
case VK_ACCESS_COLOR_ATTACHMENT_WRITE_BIT:
case MESA_VK_ACCESS_2_COLOR_ATTACHMENT_WRITE_BIT_KHR:
/* We're transitioning a buffer that was previously used as render
* target. To make its content available to future operations, flush
* the render target cache.
*/
pipe_bits |= ANV_PIPE_RENDER_TARGET_CACHE_FLUSH_BIT;
break;
case VK_ACCESS_DEPTH_STENCIL_ATTACHMENT_WRITE_BIT:
case MESA_VK_ACCESS_2_DEPTH_STENCIL_ATTACHMENT_WRITE_BIT_KHR:
/* We're transitioning a buffer that was previously used as depth
* buffer. To make its content available to future operations, flush
* the depth cache.
*/
pipe_bits |= ANV_PIPE_DEPTH_CACHE_FLUSH_BIT;
break;
case VK_ACCESS_TRANSFER_WRITE_BIT:
case MESA_VK_ACCESS_2_TRANSFER_WRITE_BIT_KHR:
/* We're transitioning a buffer that was previously used as a
* transfer write destination. Generic write operations include color
* & depth operations as well as buffer operations like :
......@@ -161,14 +162,14 @@ anv_pipe_flush_bits_for_access_flags(struct anv_device *device,
pipe_bits |= ANV_PIPE_RENDER_TARGET_CACHE_FLUSH_BIT;
pipe_bits |= ANV_PIPE_DEPTH_CACHE_FLUSH_BIT;
break;
case VK_ACCESS_MEMORY_WRITE_BIT:
case MESA_VK_ACCESS_2_MEMORY_WRITE_BIT_KHR:
/* We're transitioning a buffer for generic write operations. Flush
* all the caches.
*/
pipe_bits |= ANV_PIPE_FLUSH_BITS;
break;
case VK_ACCESS_TRANSFORM_FEEDBACK_WRITE_BIT_EXT:
case VK_ACCESS_TRANSFORM_FEEDBACK_COUNTER_WRITE_BIT_EXT:
case MESA_VK_ACCESS_2_TRANSFORM_FEEDBACK_WRITE_BIT_EXT:
case MESA_VK_ACCESS_2_TRANSFORM_FEEDBACK_COUNTER_WRITE_BIT_EXT:
/* We're transitioning a buffer written either from VS stage or from
* the command streamer (see CmdEndTransformFeedbackEXT), we just
* need to stall the CS.
......@@ -185,14 +186,14 @@ anv_pipe_flush_bits_for_access_flags(struct anv_device *device,
enum anv_pipe_bits
anv_pipe_invalidate_bits_for_access_flags(struct anv_device *device,
VkAccessFlags flags)
VkAccessFlags2KHR flags)
{
enum anv_pipe_bits pipe_bits = 0;
unsigned b;
for_each_bit(b, flags) {
switch ((VkAccessFlagBits)(1 << b)) {
case VK_ACCESS_INDIRECT_COMMAND_READ_BIT:
switch ((VkAccessFlags2KHR)(1 << b)) {
case MESA_VK_ACCESS_2_INDIRECT_COMMAND_READ_BIT_KHR:
/* Indirect draw commands take a buffer as input that we're going to
* read from the command streamer to load some of the HW registers
* (see genX_cmd_buffer.c:load_indirect_parameters). This requires a
......@@ -209,15 +210,15 @@ anv_pipe_invalidate_bits_for_access_flags(struct anv_device *device,
*/
pipe_bits |= ANV_PIPE_CONSTANT_CACHE_INVALIDATE_BIT;
break;
case VK_ACCESS_INDEX_READ_BIT:
case VK_ACCESS_VERTEX_ATTRIBUTE_READ_BIT:
case MESA_VK_ACCESS_2_INDEX_READ_BIT_KHR:
case MESA_VK_ACCESS_2_VERTEX_ATTRIBUTE_READ_BIT_KHR:
/* We transitioning a buffer to be used for as input for vkCmdDraw*
* commands, so we invalidate the VF cache to make sure there is no
* stale data when we start rendering.
*/
pipe_bits |= ANV_PIPE_VF_CACHE_INVALIDATE_BIT;
break;
case VK_ACCESS_UNIFORM_READ_BIT:
case MESA_VK_ACCESS_2_UNIFORM_READ_BIT_KHR:
/* We transitioning a buffer to be used as uniform data. Because
* uniform is accessed through the data port & sampler, we need to
* invalidate the texture cache (sampler) & constant cache (data
......@@ -229,28 +230,28 @@ anv_pipe_invalidate_bits_for_access_flags(struct anv_device *device,
else
pipe_bits |= ANV_PIPE_DATA_CACHE_FLUSH_BIT;
break;
case VK_ACCESS_SHADER_READ_BIT:
case VK_ACCESS_INPUT_ATTACHMENT_READ_BIT:
case VK_ACCESS_TRANSFER_READ_BIT:
case MESA_VK_ACCESS_2_SHADER_READ_BIT_KHR:
case MESA_VK_ACCESS_2_INPUT_ATTACHMENT_READ_BIT_KHR:
case MESA_VK_ACCESS_2_TRANSFER_READ_BIT_KHR:
/* Transitioning a buffer to be read through the sampler, so
* invalidate the texture cache, we don't want any stale data.
*/
pipe_bits |= ANV_PIPE_TEXTURE_CACHE_INVALIDATE_BIT;
break;
case VK_ACCESS_MEMORY_READ_BIT:
case MESA_VK_ACCESS_2_MEMORY_READ_BIT_KHR:
/* Transitioning a buffer for generic read, invalidate all the
* caches.
*/
pipe_bits |= ANV_PIPE_INVALIDATE_BITS;
break;
case VK_ACCESS_MEMORY_WRITE_BIT:
case MESA_VK_ACCESS_2_MEMORY_WRITE_BIT_KHR:
/* Generic write, make sure all previously written things land in
* memory.
*/
pipe_bits |= ANV_PIPE_FLUSH_BITS;
break;
case VK_ACCESS_CONDITIONAL_RENDERING_READ_BIT_EXT:
case VK_ACCESS_TRANSFORM_FEEDBACK_COUNTER_READ_BIT_EXT:
case MESA_VK_ACCESS_2_CONDITIONAL_RENDERING_READ_BIT_EXT:
case MESA_VK_ACCESS_2_TRANSFORM_FEEDBACK_COUNTER_READ_BIT_EXT:
/* Transitioning a buffer for conditional rendering or transform
* feedback. We'll load the content of this buffer into HW registers
* using the command streamer, so we need to stall the command
......
......@@ -2362,43 +2362,41 @@ genX(cmd_buffer_apply_pipe_flushes)(struct anv_cmd_buffer *cmd_buffer)
cmd_buffer->state.pending_pipe_bits = bits;
}
void genX(CmdPipelineBarrier)(
VkCommandBuffer commandBuffer,
VkPipelineStageFlags srcStageMask,
VkPipelineStageFlags destStageMask,
VkBool32 byRegion,
uint32_t memoryBarrierCount,
const VkMemoryBarrier* pMemoryBarriers,
uint32_t bufferMemoryBarrierCount,
const VkBufferMemoryBarrier* pBufferMemoryBarriers,
uint32_t imageMemoryBarrierCount,
const VkImageMemoryBarrier* pImageMemoryBarriers)
/* Return the stages the barrier operation is waiting on. */
static VkPipelineStageFlags2KHR
genX(cmd_buffer_barrier)(struct anv_cmd_buffer *cmd_buffer,
const VkDependencyInfoKHR *dep_info)
{
ANV_FROM_HANDLE(anv_cmd_buffer, cmd_buffer, commandBuffer);
/* XXX: Right now, we're really dumb and just flush whatever categories
* the app asks for. One of these days we may make this a bit better
* but right now that's all the hardware allows for in most areas.
*/
VkAccessFlags src_flags = 0;
VkAccessFlags dst_flags = 0;
VkAccessFlags2KHR src_flags = 0;
VkAccessFlags2KHR dst_flags = 0;
VkPipelineStageFlags2KHR src_stages = 0;
for (uint32_t i = 0; i < memoryBarrierCount; i++) {
src_flags |= pMemoryBarriers[i].srcAccessMask;
dst_flags |= pMemoryBarriers[i].dstAccessMask;
for (uint32_t i = 0; i < dep_info->memoryBarrierCount; i++) {
src_stages |= dep_info->pMemoryBarriers[i].srcStageMask;
src_flags |= dep_info->pMemoryBarriers[i].srcAccessMask;
dst_flags |= dep_info->pMemoryBarriers[i].dstAccessMask;
}
for (uint32_t i = 0; i < bufferMemoryBarrierCount; i++) {
src_flags |= pBufferMemoryBarriers[i].srcAccessMask;
dst_flags |= pBufferMemoryBarriers[i].dstAccessMask;
for (uint32_t i = 0; i < dep_info->bufferMemoryBarrierCount; i++) {
src_stages |= dep_info->pBufferMemoryBarriers[i].srcStageMask;
src_flags |= dep_info->pBufferMemoryBarriers[i].srcAccessMask;
dst_flags |= dep_info->pBufferMemoryBarriers[i].dstAccessMask;
}
for (uint32_t i = 0; i < imageMemoryBarrierCount; i++) {
src_flags |= pImageMemoryBarriers[i].srcAccessMask;
dst_flags |= pImageMemoryBarriers[i].dstAccessMask;
ANV_FROM_HANDLE(anv_image, image, pImageMemoryBarriers[i].image);
const VkImageSubresourceRange *range =
&pImageMemoryBarriers[i].subresourceRange;
for (uint32_t i = 0; i < dep_info->imageMemoryBarrierCount; i++) {
const VkImageMemoryBarrier2KHR *img_barrier =
&dep_info->pImageMemoryBarriers[i];
src_stages |= img_barrier->srcStageMask;
src_flags |= img_barrier->srcAccessMask;
dst_flags |= img_barrier->dstAccessMask;
ANV_FROM_HANDLE(anv_image, image, img_barrier->image);
const VkImageSubresourceRange *range = &img_barrier->subresourceRange;
uint32_t base_layer, layer_count;
if (image->type == VK_IMAGE_TYPE_3D) {
......@@ -2412,8 +2410,8 @@ void genX(CmdPipelineBarrier)(
if (range->aspectMask & VK_IMAGE_ASPECT_DEPTH_BIT) {
transition_depth_buffer(cmd_buffer, image,
base_layer, layer_count,
pImageMemoryBarriers[i].oldLayout,
pImageMemoryBarriers[i].newLayout,
img_barrier->oldLayout,
img_barrier->newLayout,
false /* will_full_fast_clear */);
}
......@@ -2422,8 +2420,8 @@ void genX(CmdPipelineBarrier)(
range->baseMipLevel,
anv_get_levelCount(image, range),
base_layer, layer_count,
pImageMemoryBarriers[i].oldLayout,
pImageMemoryBarriers[i].newLayout,
img_barrier->oldLayout,
img_barrier->newLayout,
false /* will_full_fast_clear */);
}
......@@ -2436,8 +2434,8 @@ void genX(CmdPipelineBarrier)(
range->baseMipLevel,
anv_get_levelCount(image, range),
base_layer, layer_count,
pImageMemoryBarriers[i].oldLayout,
pImageMemoryBarriers[i].newLayout,
img_barrier->oldLayout,
img_barrier->newLayout,
false /* will_full_fast_clear */);
}
}
......@@ -2446,6 +2444,17 @@ void genX(CmdPipelineBarrier)(
cmd_buffer->state.pending_pipe_bits |=
anv_pipe_flush_bits_for_access_flags(cmd_buffer->device, src_flags) |
anv_pipe_invalidate_bits_for_access_flags(cmd_buffer->device, dst_flags);
return src_stages;
}
void genX(CmdPipelineBarrier2KHR)(
VkCommandBuffer commandBuffer,
const VkDependencyInfoKHR* pDependencyInfo)
{
ANV_FROM_HANDLE(anv_cmd_buffer, cmd_buffer, commandBuffer);
genX(cmd_buffer_barrier)(cmd_buffer, pDependencyInfo);
}
static void
......@@ -6289,19 +6298,44 @@ void genX(CmdEndConditionalRenderingEXT)(
VK_PIPELINE_STAGE_ALL_COMMANDS_BIT | \
VK_PIPELINE_STAGE_TRANSFORM_FEEDBACK_BIT_EXT)
void genX(CmdSetEvent)(
static VkPipelineStageFlags2KHR anv_pipeline_stage_pipelined_bits =
VK_PIPELINE_STAGE_2_VERTEX_INPUT_BIT_KHR |
VK_PIPELINE_STAGE_2_VERTEX_SHADER_BIT_KHR |
VK_PIPELINE_STAGE_2_TESSELLATION_CONTROL_SHADER_BIT_KHR |
VK_PIPELINE_STAGE_2_TESSELLATION_EVALUATION_SHADER_BIT_KHR |
VK_PIPELINE_STAGE_2_GEOMETRY_SHADER_BIT_KHR |
VK_PIPELINE_STAGE_2_FRAGMENT_SHADER_BIT_KHR |
VK_PIPELINE_STAGE_2_EARLY_FRAGMENT_TESTS_BIT_KHR |
VK_PIPELINE_STAGE_2_LATE_FRAGMENT_TESTS_BIT_KHR |
VK_PIPELINE_STAGE_2_COLOR_ATTACHMENT_OUTPUT_BIT_KHR |
VK_PIPELINE_STAGE_2_COMPUTE_SHADER_BIT_KHR |
VK_PIPELINE_STAGE_2_ALL_TRANSFER_BIT_KHR |
VK_PIPELINE_STAGE_2_TRANSFER_BIT_KHR |
VK_PIPELINE_STAGE_2_BOTTOM_OF_PIPE_BIT_KHR |
VK_PIPELINE_STAGE_2_ALL_GRAPHICS_BIT_KHR |
VK_PIPELINE_STAGE_2_ALL_COMMANDS_BIT_KHR |
VK_PIPELINE_STAGE_2_COPY_BIT_KHR |
VK_PIPELINE_STAGE_2_RESOLVE_BIT_KHR |
VK_PIPELINE_STAGE_2_BLIT_BIT_KHR |
VK_PIPELINE_STAGE_2_CLEAR_BIT_KHR |
VK_PIPELINE_STAGE_2_TRANSFORM_FEEDBACK_BIT_EXT;
void genX(CmdSetEvent2KHR)(
VkCommandBuffer commandBuffer,
VkEvent _event,
VkPipelineStageFlags stageMask)
const VkDependencyInfoKHR* pDependencyInfo)
{
ANV_FROM_HANDLE(anv_cmd_buffer, cmd_buffer, commandBuffer);
ANV_FROM_HANDLE(anv_event, event, _event);
VkPipelineStageFlags2KHR stageMask =
genX(cmd_buffer_barrier)(cmd_buffer, pDependencyInfo);
cmd_buffer->state.pending_pipe_bits |= ANV_PIPE_POST_SYNC_BIT;
genX(cmd_buffer_apply_pipe_flushes)(cmd_buffer);
anv_batch_emit(&cmd_buffer->batch, GENX(PIPE_CONTROL), pc) {
if (stageMask & ANV_PIPELINE_STAGE_PIPELINED_BITS) {
if (stageMask & anv_pipeline_stage_pipelined_bits) {
pc.StallAtPixelScoreboard = true;
pc.CommandStreamerStallEnable = true;
}
......@@ -6316,10 +6350,10 @@ void genX(CmdSetEvent)(
}
}
void genX(CmdResetEvent)(
void genX(CmdResetEvent2KHR)(
VkCommandBuffer commandBuffer,
VkEvent _event,
VkPipelineStageFlags stageMask)
VkPipelineStageFlags2KHR stageMask)
{
ANV_FROM_HANDLE(anv_cmd_buffer, cmd_buffer, commandBuffer);
ANV_FROM_HANDLE(anv_event, event, _event);
......@@ -6328,13 +6362,13 @@ void genX(CmdResetEvent)(
genX(cmd_buffer_apply_pipe_flushes)(cmd_buffer);
anv_batch_emit(&cmd_buffer->batch, GENX(PIPE_CONTROL), pc) {
if (stageMask & ANV_PIPELINE_STAGE_PIPELINED_BITS) {
if (stageMask & anv_pipeline_stage_pipelined_bits) {
pc.StallAtPixelScoreboard = true;
pc.CommandStreamerStallEnable = true;
}
pc.DestinationAddressType = DAT_PPGTT;
pc.PostSyncOperation = WriteImmediateData;
pc.DestinationAddressType = DAT_PPGTT,
pc.PostSyncOperation = WriteImmediateData,
pc.Address = (struct anv_address) {
cmd_buffer->device->dynamic_state_pool.block_pool.bo,
event->state.offset
......@@ -6343,22 +6377,15 @@ void genX(CmdResetEvent)(
}
}
void genX(CmdWaitEvents)(
void genX(CmdWaitEvents2KHR)(
VkCommandBuffer commandBuffer,
uint32_t eventCount,
const VkEvent* pEvents,
VkPipelineStageFlags srcStageMask,
VkPipelineStageFlags destStageMask,
uint32_t memoryBarrierCount,
const VkMemoryBarrier* pMemoryBarriers,
uint32_t bufferMemoryBarrierCount,
const VkBufferMemoryBarrier* pBufferMemoryBarriers,
uint32_t imageMemoryBarrierCount,
const VkImageMemoryBarrier* pImageMemoryBarriers)
const VkDependencyInfoKHR* pDependencyInfos)
{
#if GEN_GEN >= 8
ANV_FROM_HANDLE(anv_cmd_buffer, cmd_buffer, commandBuffer);
#if GEN_GEN >= 8
for (uint32_t i = 0; i < eventCount; i++) {
ANV_FROM_HANDLE(anv_event, event, pEvents[i]);
......@@ -6376,11 +6403,7 @@ void genX(CmdWaitEvents)(
anv_finishme("Implement events on gen7");
#endif
genX(CmdPipelineBarrier)(commandBuffer, srcStageMask, destStageMask,
false, /* byRegion */
memoryBarrierCount, pMemoryBarriers,
bufferMemoryBarrierCount, pBufferMemoryBarriers,
imageMemoryBarrierCount, pImageMemoryBarriers);