Commits (73)
......@@ -444,16 +444,38 @@ Vulkan 1.1 -- all DONE: anv, radv
VK_KHR_storage_buffer_storage_class DONE (anv, radv)
VK_KHR_variable_pointers DONE (anv, radv)
Khronos extensions that are not part of any Vulkan version:
Vulkan 1.2 -- all DONE: anv
VK_KHR_8bit_storage DONE (anv/gen8+, radv)
VK_KHR_android_surface not started
VK_KHR_buffer_device_address DONE (anv/gen8+, radv)
VK_KHR_create_renderpass2 DONE (anv, radv)
VK_KHR_depth_stencil_resolve DONE (anv, radv)
VK_KHR_display DONE (anv, radv)
VK_KHR_display_swapchain not started
VK_KHR_draw_indirect_count DONE (anv, radv)
VK_KHR_driver_properties DONE (anv, radv)
VK_KHR_image_format_list DONE (anv, radv)
VK_KHR_imageless_framebuffer DONE (anv, radv)
VK_KHR_sampler_mirror_clamp_to_edge DONE (anv, radv)
VK_KHR_separate_depth_stencil_layouts DONE (anv, radv)
VK_KHR_shader_atomic_int64 DONE (anv, radv)
VK_KHR_shader_float16_int8 DONE (anv/gen8+, radv)
VK_KHR_shader_float_controls DONE (anv/gen8+, radv)
VK_KHR_shader_subgroup_extended_types DONE (anv/gen8+, radv)
VK_KHR_spirv_1_4 DONE (anv, radv)
VK_KHR_timeline_semaphore DONE (anv, radv)
VK_KHR_uniform_buffer_standard_layout DONE (anv, radv)
VK_KHR_vulkan_memory_model DONE (anv, radv)
VK_EXT_descriptor_indexing DONE (anv/gen9+, radv)
VK_EXT_host_query_reset DONE (anv, radv)
VK_EXT_sampler_filter_minmax DONE (anv/gen9+, radv)
VK_EXT_scalar_block_layout DONE (anv, radv/gfx7+)
VK_EXT_separate_stencil_usage DONE (anv)
VK_EXT_shader_viewport_index_layer DONE (anv, radv)
Khronos extensions that are not part of any Vulkan version:
VK_KHR_android_surface not started
VK_KHR_display DONE (anv, radv)
VK_KHR_display_swapchain not started
VK_KHR_external_fence_fd DONE (anv, radv)
VK_KHR_external_fence_win32 not started
VK_KHR_external_memory_fd DONE (anv, radv, v3dv)
......@@ -462,24 +484,18 @@ Khronos extensions that are not part of any Vulkan version:
VK_KHR_external_semaphore_win32 not started
VK_KHR_get_display_properties2 DONE (anv, radv)
VK_KHR_get_surface_capabilities2 DONE (anv, radv)
VK_KHR_image_format_list DONE (anv, radv)
VK_KHR_imageless_framebuffer DONE (anv, radv)
VK_KHR_incremental_present DONE (anv, radv)
VK_KHR_mir_surface not started
VK_KHR_performance_query DONE (anv/gen8+)
VK_KHR_pipeline_executable_properties DONE (anv, radv)
VK_KHR_push_descriptor DONE (anv, radv)
VK_KHR_sampler_mirror_clamp_to_edge DONE (anv, radv)
VK_KHR_shader_atomic_int64 DONE (anv, radv)
VK_KHR_shader_float16_int8 DONE (anv/gen8+, radv)
VK_KHR_shader_float_controls DONE (anv/gen8+, radv)
VK_KHR_shader_subgroup_extended_types DONE (radv)
VK_KHR_shader_clock DONE (anv, radv)
VK_KHR_shader_non_semantic_info DONE (anv, radv)
VK_KHR_shared_presentable_image not started
VK_KHR_surface DONE (anv, radv)
VK_KHR_surface_protected_capabilities DONE (anv, radv)
VK_KHR_swapchain DONE (anv, radv)
VK_KHR_swapchain_mutable_format DONE (anv, radv)
VK_KHR_uniform_buffer_standard_layout DONE (anv, radv)
VK_KHR_vulkan_memory_model not started
VK_KHR_wayland_surface DONE (anv, radv)
VK_KHR_win32_keyed_mutex not started
VK_KHR_win32_surface not started
......
......@@ -592,6 +592,7 @@ The integer capabilities:
* ``PIPE_CAP_NIR_ATOMICS_AS_DEREF``: Whether NIR atomics instructions should reference atomics as NIR derefs instead of by indices.
* ``PIPE_CAP_NO_CLIP_ON_COPY_TEX``: Driver doesn't want x/y/width/height clipped based on src size when doing a copy texture operation (eg: may want out-of-bounds reads that produce 0 instead of leaving the texture content undefined)
* ``PIPE_CAP_MAX_TEXTURE_MB``: Maximum texture size in MB (default is 1024)
* ``PIPE_CAP_DEVICE_PROTECTED_CONTENT``: Whether the device support protected / encrypted content.
.. _pipe_capf:
......
......@@ -1319,7 +1319,7 @@ struct __DRIdri2ExtensionRec {
* extensions.
*/
#define __DRI_IMAGE "DRI_IMAGE"
#define __DRI_IMAGE_VERSION 17
#define __DRI_IMAGE_VERSION 18
/**
* These formats correspond to the similarly named MESA_FORMAT_*
......@@ -1367,6 +1367,7 @@ struct __DRIdri2ExtensionRec {
* could be read after a flush."
*/
#define __DRI_IMAGE_USE_BACKBUFFER 0x0010
#define __DRI_IMAGE_USE_PROTECTED 0x0020
#define __DRI_IMAGE_TRANSFER_READ 0x1
......@@ -1486,6 +1487,11 @@ enum __DRIChromaSiting {
#define __BLIT_FLAG_FLUSH 0x0001
#define __BLIT_FLAG_FINISH 0x0002
/**
* Flags for createImageFromDmaBufs3
*/
#define __DRI_IMAGE_PROTECTED_CONTENT_FLAG 0x00000001
/**
* queryDmaBufFormatModifierAttribs attributes
*/
......@@ -1768,6 +1774,26 @@ struct __DRIimageExtensionRec {
int renderbuffer,
void *loaderPrivate,
unsigned *error);
/*
* Like createImageFromDmaBufs2, but with an added flags parameter.
*
* See __DRI_IMAGE_*_FLAG for valid definitions of flags.
*
* \since 18
*/
__DRIimage *(*createImageFromDmaBufs3)(__DRIscreen *screen,
int width, int height, int fourcc,
uint64_t modifier,
int *fds, int num_fds,
int *strides, int *offsets,
enum __DRIYUVColorSpace color_space,
enum __DRISampleRange sample_range,
enum __DRIChromaSiting horiz_siting,
enum __DRIChromaSiting vert_siting,
uint32_t flags,
unsigned *error,
void *loaderPrivate);
};
......@@ -1932,6 +1958,8 @@ typedef struct __DRIDriverVtableExtensionRec {
#define __DRI2_RENDERER_HAS_CONTEXT_PRIORITY_MEDIUM (1 << 1)
#define __DRI2_RENDERER_HAS_CONTEXT_PRIORITY_HIGH (1 << 2)
#define __DRI2_RENDERER_HAS_PROTECTED_CONTENT 0x000e
typedef struct __DRI2rendererQueryExtensionRec __DRI2rendererQueryExtension;
struct __DRI2rendererQueryExtensionRec {
__DRIextension base;
......
......@@ -6797,25 +6797,20 @@ void visit_load_sample_mask_in(isel_context *ctx, nir_intrinsic_instr *instr) {
log2_ps_iter_samples = ctx->options->key.fs.log2_ps_iter_samples;
}
/* The bit pattern matches that used by fixed function fragment
* processing. */
static const unsigned ps_iter_masks[] = {
0xffff, /* not used */
0x5555,
0x1111,
0x0101,
0x0001,
};
assert(log2_ps_iter_samples < ARRAY_SIZE(ps_iter_masks));
Builder bld(ctx->program, ctx->block);
Temp sample_id = bld.vop3(aco_opcode::v_bfe_u32, bld.def(v1),
get_arg(ctx, ctx->args->ac.ancillary), Operand(8u), Operand(4u));
Temp ps_iter_mask = bld.copy(bld.def(v1), Operand(ps_iter_masks[log2_ps_iter_samples]));
Temp mask = bld.vop2(aco_opcode::v_lshlrev_b32, bld.def(v1), sample_id, ps_iter_mask);
Temp dst = get_ssa_temp(ctx, &instr->dest.ssa);
bld.vop2(aco_opcode::v_and_b32, Definition(dst), mask, get_arg(ctx, ctx->args->ac.sample_coverage));
if (log2_ps_iter_samples) {
/* gl_SampleMaskIn[0] = (SampleCoverage & (1 << gl_SampleID)). */
Temp sample_id = bld.vop3(aco_opcode::v_bfe_u32, bld.def(v1),
get_arg(ctx, ctx->args->ac.ancillary), Operand(8u), Operand(4u));
Temp mask = bld.vop2(aco_opcode::v_lshlrev_b32, bld.def(v1), sample_id,
bld.copy(bld.def(v1), Operand(1u)));
bld.vop2(aco_opcode::v_and_b32, Definition(dst), mask, get_arg(ctx, ctx->args->ac.sample_coverage));
} else {
bld.copy(Definition(dst), get_arg(ctx, ctx->args->ac.sample_coverage));
}
}
unsigned gs_outprim_vertices(unsigned outprim)
......
......@@ -744,24 +744,17 @@ static LLVMValueRef load_sample_mask_in(struct ac_shader_abi *abi)
log2_ps_iter_samples = ctx->args->options->key.fs.log2_ps_iter_samples;
}
/* The bit pattern matches that used by fixed function fragment
* processing. */
static const uint16_t ps_iter_masks[] = {
0xffff, /* not used */
0x5555,
0x1111,
0x0101,
0x0001,
};
assert(log2_ps_iter_samples < ARRAY_SIZE(ps_iter_masks));
uint32_t ps_iter_mask = ps_iter_masks[log2_ps_iter_samples];
LLVMValueRef result, sample_id;
sample_id = ac_unpack_param(&ctx->ac, ac_get_arg(&ctx->ac, ctx->args->ac.ancillary), 8, 4);
sample_id = LLVMBuildShl(ctx->ac.builder, LLVMConstInt(ctx->ac.i32, ps_iter_mask, false), sample_id, "");
result = LLVMBuildAnd(ctx->ac.builder, sample_id,
ac_get_arg(&ctx->ac, ctx->args->ac.sample_coverage), "");
if (log2_ps_iter_samples) {
/* gl_SampleMaskIn[0] = (SampleCoverage & (1 << gl_SampleID)). */
sample_id = ac_unpack_param(&ctx->ac, ac_get_arg(&ctx->ac, ctx->args->ac.ancillary), 8, 4);
sample_id = LLVMBuildShl(ctx->ac.builder, LLVMConstInt(ctx->ac.i32, 1, false), sample_id, "");
result = LLVMBuildAnd(ctx->ac.builder, sample_id,
ac_get_arg(&ctx->ac, ctx->args->ac.sample_coverage), "");
} else {
result = ac_get_arg(&ctx->ac, ctx->args->ac.sample_coverage);
}
return result;
}
......
......@@ -820,6 +820,12 @@ v3dv_job_init(struct v3dv_job *job,
*/
cmd_buffer->state.dirty = ~0;
/* Honor inheritance of occlussion queries in secondaries if requested */
if (cmd_buffer->level == VK_COMMAND_BUFFER_LEVEL_SECONDARY &&
cmd_buffer->state.inheritance.occlusion_query_enable) {
cmd_buffer->state.dirty &= ~V3DV_CMD_DIRTY_OCCLUSION_QUERY;
}
/* Keep track of the first subpass that we are recording in this new job.
* We will use this when we emit the RCL to decide how to emit our loads
* and stores.
......@@ -957,6 +963,17 @@ v3dv_DestroyCommandPool(VkDevice _device,
vk_free2(&device->alloc, pAllocator, pool);
}
void
v3dv_TrimCommandPool(VkDevice device,
VkCommandPool commandPool,
VkCommandPoolTrimFlags flags)
{
/* We don't need to do anything here, our command pools never hold on to
* any resources from command buffers that are freed or reset.
*/
}
static void
cmd_buffer_subpass_handle_pending_resolves(struct v3dv_cmd_buffer *cmd_buffer)
{
......@@ -1058,10 +1075,15 @@ cmd_buffer_begin_render_pass_secondary(
cmd_buffer->state.framebuffer =
v3dv_framebuffer_from_handle(inheritance_info->framebuffer);
assert(inheritance_info->subpass < cmd_buffer->state.pass->subpass_count);
cmd_buffer->state.subpass_idx = inheritance_info->subpass;
cmd_buffer->state.inheritance.occlusion_query_enable =
inheritance_info->occlusionQueryEnable;
/* Secondaries that execute inside a render pass won't start subpasses
* so we want to create a job for them here.
*/
assert(inheritance_info->subpass < cmd_buffer->state.pass->subpass_count);
struct v3dv_job *job =
v3dv_cmd_buffer_start_job(cmd_buffer, inheritance_info->subpass,
V3DV_JOB_TYPE_GPU_CL_SECONDARY);
......@@ -1070,8 +1092,6 @@ cmd_buffer_begin_render_pass_secondary(
return VK_ERROR_OUT_OF_HOST_MEMORY;
}
cmd_buffer->state.subpass_idx = inheritance_info->subpass;
/* Secondary command buffers don't know about the render area, but our
* scissor setup accounts for it, so let's make sure we make it large
* enough that it doesn't actually constrain any rendering. This should
......@@ -1120,12 +1140,6 @@ v3dv_BeginCommandBuffer(VkCommandBuffer commandBuffer,
if (result != VK_SUCCESS)
return result;
}
/* If the primary may have an active occlusion query we need to honor
* that in the secondary.
*/
if (pBeginInfo->pInheritanceInfo->occlusionQueryEnable)
cmd_buffer->state.dirty &= ~V3DV_CMD_DIRTY_OCCLUSION_QUERY;
}
cmd_buffer->status = V3DV_CMD_BUFFER_STATUS_RECORDING;
......@@ -2532,7 +2546,12 @@ cmd_buffer_execute_inside_pass(struct v3dv_cmd_buffer *primary,
{
assert(primary->state.job);
if (primary->state.dirty & V3DV_CMD_DIRTY_OCCLUSION_QUERY)
/* Emit occlusion query state if needed so the draw calls inside our
* secondaries update the counters.
*/
bool has_occlusion_query =
primary->state.dirty & V3DV_CMD_DIRTY_OCCLUSION_QUERY;
if (has_occlusion_query)
emit_occlusion_query(primary);
/* FIXME: if our primary job tiling doesn't enable MSSA but any of the
......@@ -2581,6 +2600,12 @@ cmd_buffer_execute_inside_pass(struct v3dv_cmd_buffer *primary,
cmd_buffer_subpass_split_for_barrier(primary,
needs_bcl_barrier);
v3dv_return_if_oom(primary, NULL);
/* Since we have created a new primary we need to re-emit
* occlusion query state.
*/
if (has_occlusion_query)
emit_occlusion_query(primary);
}
/* Make sure our primary job has all required BO references */
......@@ -5010,7 +5035,30 @@ v3dv_CmdWriteTimestamp(VkCommandBuffer commandBuffer,
VkQueryPool queryPool,
uint32_t query)
{
unreachable("Timestamp queries are not supported.");
V3DV_FROM_HANDLE(v3dv_cmd_buffer, cmd_buffer, commandBuffer);
V3DV_FROM_HANDLE(v3dv_query_pool, query_pool, queryPool);
/* If this is called inside a render pass we need to finish the current
* job here...
*/
if (cmd_buffer->state.pass)
v3dv_cmd_buffer_finish_job(cmd_buffer);
struct v3dv_job *job =
v3dv_cmd_buffer_create_cpu_job(cmd_buffer->device,
V3DV_JOB_TYPE_CPU_TIMESTAMP_QUERY,
cmd_buffer, -1);
v3dv_return_if_oom(cmd_buffer, NULL);
job->cpu.query_timestamp.pool = query_pool;
job->cpu.query_timestamp.query = query;
list_addtail(&job->list_link, &cmd_buffer->jobs);
cmd_buffer->state.job = NULL;
/* ...and resume the subpass after the timestamp */
if (cmd_buffer->state.pass)
v3dv_cmd_buffer_subpass_resume(cmd_buffer, cmd_buffer->state.subpass_idx);
}
static void
......
......@@ -814,6 +814,11 @@ v3dv_GetPhysicalDeviceProperties(VkPhysicalDevice physicalDevice,
const VkSampleCountFlags supported_sample_counts =
VK_SAMPLE_COUNT_1_BIT | VK_SAMPLE_COUNT_4_BIT;
struct timespec clock_res;
clock_getres(CLOCK_MONOTONIC, &clock_res);
const float timestamp_period =
clock_res.tv_sec * 1000000000.0f + clock_res.tv_nsec;
/* FIXME: this will probably require an in-depth review */
VkPhysicalDeviceLimits limits = {
.maxImageDimension1D = 4096,
......@@ -923,8 +928,8 @@ v3dv_GetPhysicalDeviceProperties(VkPhysicalDevice physicalDevice,
.sampledImageStencilSampleCounts = supported_sample_counts,
.storageImageSampleCounts = VK_SAMPLE_COUNT_1_BIT,
.maxSampleMaskWords = 1,
.timestampComputeAndGraphics = false,
.timestampPeriod = 0.0f,
.timestampComputeAndGraphics = true,
.timestampPeriod = timestamp_period,
.maxClipDistances = 8,
.maxCullDistances = 0,
.maxCombinedClipAndCullDistances = 8,
......@@ -990,7 +995,7 @@ v3dv_queue_family_properties = {
VK_QUEUE_COMPUTE_BIT |
VK_QUEUE_TRANSFER_BIT,
.queueCount = 1,
.timestampValidBits = 0, /* FIXME */
.timestampValidBits = 64,
.minImageTransferGranularity = { 1, 1, 1 },
};
......
......@@ -65,6 +65,7 @@ EXTENSIONS = [
Extension('VK_KHR_external_memory_fd', 1, True),
Extension('VK_KHR_get_physical_device_properties2', 1, True),
Extension('VK_KHR_get_surface_capabilities2', 1, 'V3DV_HAS_SURFACE'),
Extension('VK_KHR_maintenance1', 2, True),
Extension('VK_KHR_surface', 25, 'V3DV_HAS_SURFACE'),
Extension('VK_KHR_swapchain', 68, 'V3DV_HAS_SURFACE'),
Extension('VK_KHR_wayland_surface', 6, 'VK_USE_PLATFORM_WAYLAND_KHR'),
......
......@@ -327,7 +327,7 @@ v3dv_CreateImage(VkDevice _device,
image->array_size = pCreateInfo->arrayLayers;
image->samples = pCreateInfo->samples;
image->usage = pCreateInfo->usage;
image->create_flags = pCreateInfo->flags;
image->flags = pCreateInfo->flags;
image->drm_format_mod = modifier;
image->tiling = tiling;
......@@ -596,6 +596,13 @@ v3dv_CreateImageView(VkDevice _device,
case VK_IMAGE_TYPE_3D:
assert(range->baseArrayLayer + v3dv_layer_count(image, range) - 1
<= u_minify(image->extent.depth, range->baseMipLevel));
/* VK_KHR_maintenance1 */
assert(pCreateInfo->viewType != VK_IMAGE_VIEW_TYPE_2D ||
((image->flags & VK_IMAGE_CREATE_2D_ARRAY_COMPATIBLE_BIT) &&
range->levelCount == 1 && range->layerCount == 1));
assert(pCreateInfo->viewType != VK_IMAGE_VIEW_TYPE_2D_ARRAY ||
((image->flags & VK_IMAGE_CREATE_2D_ARRAY_COMPATIBLE_BIT) &&
range->levelCount == 1));
break;
default:
unreachable("bad VkImageType");
......
......@@ -610,7 +610,7 @@ emit_copy_layer_to_buffer_per_tile_list(struct v3dv_job *job,
struct framebuffer_data *framebuffer,
struct v3dv_buffer *buffer,
struct v3dv_image *image,
uint32_t layer,
uint32_t layer_offset,
const VkBufferImageCopy *region)
{
struct v3dv_cl *cl = &job->indirect;
......@@ -621,13 +621,19 @@ emit_copy_layer_to_buffer_per_tile_list(struct v3dv_job *job,
cl_emit(cl, TILE_COORDINATES_IMPLICIT, coords);
const VkImageSubresourceLayers *imgrsc = &region->imageSubresource;
assert((image->type != VK_IMAGE_TYPE_3D && layer < imgrsc->layerCount) ||
layer < image->extent.depth);
/* Load image to TLB */
emit_image_load(cl, framebuffer, image, imgrsc->aspectMask,
imgrsc->baseArrayLayer + layer, imgrsc->mipLevel,
assert((image->type != VK_IMAGE_TYPE_3D &&
layer_offset < region->imageSubresource.layerCount) ||
layer_offset < image->extent.depth);
const uint32_t image_layer = image->type != VK_IMAGE_TYPE_3D ?
region->imageSubresource.baseArrayLayer + layer_offset :
region->imageOffset.z + layer_offset;
emit_image_load(cl, framebuffer, image,
region->imageSubresource.aspectMask,
image_layer,
region->imageSubresource.mipLevel,
true, false);
cl_emit(cl, END_OF_LOADS, end);
......@@ -654,13 +660,15 @@ emit_copy_layer_to_buffer_per_tile_list(struct v3dv_job *job,
* Vulkan spec states that the output buffer must have packed stencil
* values, where each stencil value is 1 byte.
*/
uint32_t cpp = imgrsc->aspectMask & VK_IMAGE_ASPECT_STENCIL_BIT ?
1 : image->cpp;
uint32_t cpp =
region->imageSubresource.aspectMask & VK_IMAGE_ASPECT_STENCIL_BIT ?
1 : image->cpp;
uint32_t buffer_stride = width * cpp;
uint32_t buffer_offset =
buffer->mem_offset + region->bufferOffset + height * buffer_stride * layer;
uint32_t buffer_offset = buffer->mem_offset + region->bufferOffset +
height * buffer_stride * layer_offset;
uint32_t format = choose_tlb_format(framebuffer, imgrsc->aspectMask,
uint32_t format = choose_tlb_format(framebuffer,
region->imageSubresource.aspectMask,
true, true, false);
bool msaa = image->samples > VK_SAMPLE_COUNT_1_BIT;
......@@ -1177,7 +1185,7 @@ emit_copy_image_layer_per_tile_list(struct v3dv_job *job,
struct framebuffer_data *framebuffer,
struct v3dv_image *dst,
struct v3dv_image *src,
uint32_t layer,
uint32_t layer_offset,
const VkImageCopy *region)
{
struct v3dv_cl *cl = &job->indirect;
......@@ -1188,24 +1196,36 @@ emit_copy_image_layer_per_tile_list(struct v3dv_job *job,
cl_emit(cl, TILE_COORDINATES_IMPLICIT, coords);
const VkImageSubresourceLayers *srcrsc = &region->srcSubresource;
assert((src->type != VK_IMAGE_TYPE_3D && layer < srcrsc->layerCount) ||
layer < src->extent.depth);
assert((src->type != VK_IMAGE_TYPE_3D &&
layer_offset < region->srcSubresource.layerCount) ||
layer_offset < src->extent.depth);
emit_image_load(cl, framebuffer, src, srcrsc->aspectMask,
srcrsc->baseArrayLayer + layer, srcrsc->mipLevel,
const uint32_t src_layer = src->type != VK_IMAGE_TYPE_3D ?
region->srcSubresource.baseArrayLayer + layer_offset :
region->srcOffset.z + layer_offset;
emit_image_load(cl, framebuffer, src,
region->srcSubresource.aspectMask,
src_layer,
region->srcSubresource.mipLevel,
false, false);
cl_emit(cl, END_OF_LOADS, end);
cl_emit(cl, BRANCH_TO_IMPLICIT_TILE_LIST, branch);
const VkImageSubresourceLayers *dstrsc = &region->dstSubresource;
assert((dst->type != VK_IMAGE_TYPE_3D && layer < dstrsc->layerCount) ||
layer < dst->extent.depth);
assert((dst->type != VK_IMAGE_TYPE_3D &&
layer_offset < region->dstSubresource.layerCount) ||
layer_offset < dst->extent.depth);
const uint32_t dst_layer = dst->type != VK_IMAGE_TYPE_3D ?
region->dstSubresource.baseArrayLayer + layer_offset :
region->dstOffset.z + layer_offset;
emit_image_store(cl, framebuffer, dst, dstrsc->aspectMask,
dstrsc->baseArrayLayer + layer, dstrsc->mipLevel,
emit_image_store(cl, framebuffer, dst,
region->dstSubresource.aspectMask,
dst_layer,
region->dstSubresource.mipLevel,
false, false, false);
cl_emit(cl, END_OF_TILE_MARKER, end);
......@@ -1275,12 +1295,16 @@ copy_image_tlb(struct v3dv_cmd_buffer *cmd_buffer,
region->dstSubresource.aspectMask,
&internal_type, &internal_bpp);
/* From the Vulkan spec, VkImageCopy valid usage:
/* From the Vulkan spec with VK_KHR_maintenance1, VkImageCopy valid usage:
*
* "The layerCount member of srcSubresource and dstSubresource must match"
* "The number of slices of the extent (for 3D) or layers of the
* srcSubresource (for non-3D) must match the number of slices of the
* extent (for 3D) or layers of the dstSubresource (for non-3D)."
*/
assert(region->srcSubresource.layerCount ==
region->dstSubresource.layerCount);
assert((src->type != VK_IMAGE_TYPE_3D ?
region->srcSubresource.layerCount : region->extent.depth) ==
(dst->type != VK_IMAGE_TYPE_3D ?
region->dstSubresource.layerCount : region->extent.depth));
uint32_t num_layers;
if (dst->type != VK_IMAGE_TYPE_3D)
num_layers = region->dstSubresource.layerCount;
......@@ -4457,7 +4481,7 @@ emit_resolve_image_layer_per_tile_list(struct v3dv_job *job,
struct framebuffer_data *framebuffer,
struct v3dv_image *dst,
struct v3dv_image *src,
uint32_t layer,
uint32_t layer_offset,
const VkImageResolve *region)
{
struct v3dv_cl *cl = &job->indirect;
......@@ -4468,24 +4492,36 @@ emit_resolve_image_layer_per_tile_list(struct v3dv_job *job,
cl_emit(cl, TILE_COORDINATES_IMPLICIT, coords);
const VkImageSubresourceLayers *srcrsc = &region->srcSubresource;
assert((src->type != VK_IMAGE_TYPE_3D && layer < srcrsc->layerCount) ||
layer < src->extent.depth);
assert((src->type != VK_IMAGE_TYPE_3D &&
layer_offset < region->srcSubresource.layerCount) ||
layer_offset < src->extent.depth);
const uint32_t src_layer = src->type != VK_IMAGE_TYPE_3D ?
region->srcSubresource.baseArrayLayer + layer_offset :
region->srcOffset.z + layer_offset;
emit_image_load(cl, framebuffer, src, srcrsc->aspectMask,
srcrsc->baseArrayLayer + layer, srcrsc->mipLevel,
emit_image_load(cl, framebuffer, src,
region->srcSubresource.aspectMask,
src_layer,
region->srcSubresource.mipLevel,
false, false);
cl_emit(cl, END_OF_LOADS, end);
cl_emit(cl, BRANCH_TO_IMPLICIT_TILE_LIST, branch);
const VkImageSubresourceLayers *dstrsc = &region->dstSubresource;
assert((dst->type != VK_IMAGE_TYPE_3D && layer < dstrsc->layerCount) ||
layer < dst->extent.depth);
assert((dst->type != VK_IMAGE_TYPE_3D &&
layer_offset < region->dstSubresource.layerCount) ||
layer_offset < dst->extent.depth);
const uint32_t dst_layer = dst->type != VK_IMAGE_TYPE_3D ?
region->dstSubresource.baseArrayLayer + layer_offset :
region->dstOffset.z + layer_offset;
emit_image_store(cl, framebuffer, dst, dstrsc->aspectMask,
dstrsc->baseArrayLayer + layer, dstrsc->mipLevel,
emit_image_store(cl, framebuffer, dst,
region->dstSubresource.aspectMask,
dst_layer,
region->dstSubresource.mipLevel,
false, false, true);
cl_emit(cl, END_OF_TILE_MARKER, end);
......
......@@ -425,7 +425,7 @@ struct v3dv_image {
uint32_t array_size;
uint32_t samples;
VkImageUsageFlags usage;
VkImageCreateFlags create_flags;
VkImageCreateFlags flags;
VkImageTiling tiling;
VkFormat vk_format;
......@@ -744,6 +744,7 @@ enum v3dv_job_type {
V3DV_JOB_TYPE_CPU_CLEAR_ATTACHMENTS,
V3DV_JOB_TYPE_CPU_COPY_BUFFER_TO_IMAGE,
V3DV_JOB_TYPE_CPU_CSD_INDIRECT,
V3DV_JOB_TYPE_CPU_TIMESTAMP_QUERY,
};
struct v3dv_reset_query_cpu_job_info {
......@@ -810,6 +811,11 @@ struct v3dv_csd_indirect_cpu_job_info {
bool needs_wg_uniform_rewrite;
};
struct v3dv_timestamp_query_cpu_job_info {
struct v3dv_query_pool *pool;
uint32_t query;
};
struct v3dv_job {
struct list_head list_link;
......@@ -881,6 +887,7 @@ struct v3dv_job {
struct v3dv_clear_attachments_cpu_job_info clear_attachments;
struct v3dv_copy_buffer_to_image_cpu_job_info copy_buffer_to_image;
struct v3dv_csd_indirect_cpu_job_info csd_indirect;
struct v3dv_timestamp_query_cpu_job_info query_timestamp;
} cpu;
/* Job specs for TFU jobs */
......@@ -976,6 +983,11 @@ struct v3dv_cmd_buffer_state {
bool has_barrier;
bool has_bcl_barrier;
/* Secondary command buffer state */
struct {
bool occlusion_query_enable;
} inheritance;
/* Command buffer state saved during a meta operation */
struct {
uint32_t subpass_idx;
......@@ -1084,10 +1096,14 @@ struct v3dv_resource {
struct v3dv_query {
bool maybe_available;
struct v3dv_bo *bo;
union {
struct v3dv_bo *bo; /* Used by GPU queries (occlusion) */
uint64_t value; /* Used by CPU queries (timestamp) */
};
};
struct v3dv_query_pool {
VkQueryType query_type;
uint32_t query_count;
struct v3dv_query *queries;
};
......
......@@ -31,12 +31,12 @@ v3dv_CreateQueryPool(VkDevice _device,
{
V3DV_FROM_HANDLE(v3dv_device, device, _device);
assert(pCreateInfo->queryType == VK_QUERY_TYPE_OCCLUSION);
assert(pCreateInfo->queryType == VK_QUERY_TYPE_OCCLUSION ||
pCreateInfo->queryType == VK_QUERY_TYPE_TIMESTAMP);
assert(pCreateInfo->queryCount > 0);
/* FIXME: the hw allows us to allocate up to 16 queries in a single block
* so we should try to use that.
* for occlussion queries so we should try to use that.
*/
struct v3dv_query_pool *pool =
vk_alloc2(&device->alloc, pAllocator, sizeof(*pool), 8,
......@@ -44,6 +44,7 @@ v3dv_CreateQueryPool(VkDevice _device,
if (pool == NULL)
return vk_error(device->instance, VK_ERROR_OUT_OF_HOST_MEMORY);
pool->query_type = pCreateInfo->queryType;
pool->query_count = pCreateInfo->queryCount;
VkResult result;
......@@ -59,16 +60,24 @@ v3dv_CreateQueryPool(VkDevice _device,
uint32_t i;
for (i = 0; i < pool->query_count; i++) {
pool->queries[i].maybe_available = false;
pool->queries[i].bo = v3dv_bo_alloc(device, 4096, "query", true);
if (!pool->queries[i].bo) {
result = vk_error(device->instance, VK_ERROR_OUT_OF_DEVICE_MEMORY);
goto fail_alloc_bo;
}
/* For occlusion queries we only need a 4-byte counter */
if (!v3dv_bo_map(device, pool->queries[i].bo, 4)) {
result = vk_error(device->instance, VK_ERROR_OUT_OF_DEVICE_MEMORY);
goto fail_alloc_bo;
switch (pool->query_type) {
case VK_QUERY_TYPE_OCCLUSION:
pool->queries[i].bo = v3dv_bo_alloc(device, 4096, "query", true);
if (!pool->queries[i].bo) {
result = vk_error(device->instance, VK_ERROR_OUT_OF_DEVICE_MEMORY);
goto fail_alloc_bo;
}
/* For occlusion queries we only need a 4-byte counter */
if (!v3dv_bo_map(device, pool->queries[i].bo, 4)) {
result = vk_error(device->instance, VK_ERROR_OUT_OF_DEVICE_MEMORY);
goto fail_alloc_bo;
}
break;
case VK_QUERY_TYPE_TIMESTAMP:
pool->queries[i].value = 0;
break;
default:
unreachable("Unsupported query type");
}
}
......@@ -98,21 +107,105 @@ v3dv_DestroyQueryPool(VkDevice _device,
if (!pool)
return;
for (uint32_t i = 0; i < pool->query_count; i++)
v3dv_bo_free(device, pool->queries[i].bo);
if (pool->query_type == VK_QUERY_TYPE_OCCLUSION) {
for (uint32_t i = 0; i < pool->query_count; i++)
v3dv_bo_free(device, pool->queries[i].bo);
}
vk_free2(&device->alloc, pAllocator, pool->queries);
vk_free2(&device->alloc, pAllocator, pool);
}
static void
write_query_result(void *dst, uint32_t idx, bool do_64bit, uint32_t value)
write_query_result(void *dst, uint32_t idx, bool do_64bit, uint64_t value)
{
if (do_64bit) {
uint64_t *dst64 = (uint64_t *) dst;
dst64[idx] = value;
} else {
uint32_t *dst32 = (uint32_t *) dst;
dst32[idx] = value;
dst32[idx] = (uint32_t) value;
}
}
static uint64_t
get_occlusion_query_result(struct v3dv_device *device,
struct v3dv_query_pool *pool,
uint32_t query,
bool do_wait,
bool *available)
{
assert(pool && pool->query_type == VK_QUERY_TYPE_OCCLUSION);
struct v3dv_query *q = &pool->queries[query];
assert(q->bo && q->bo->map);
if (do_wait) {
/* From the Vulkan 1.0 spec:
*
* "If VK_QUERY_RESULT_WAIT_BIT is set, (...) If the query does not
* become available in a finite amount of time (e.g. due to not
* issuing a query since the last reset), a VK_ERROR_DEVICE_LOST
* error may occur."
*/
if (!q->maybe_available)
return vk_error(device->instance, VK_ERROR_DEVICE_LOST);
if (!v3dv_bo_wait(device, q->bo, 0xffffffffffffffffull))
return vk_error(device->instance, VK_ERROR_DEVICE_LOST);
*available = true;
} else {
*available = q->maybe_available && v3dv_bo_wait(device, q->bo, 0);
}
return (uint64_t) *((uint32_t *) q->bo->map);
}
static uint64_t
get_timestamp_query_result(struct v3dv_device *device,
struct v3dv_query_pool *pool,
uint32_t query,
bool do_wait,
bool *available)
{
assert(pool && pool->query_type == VK_QUERY_TYPE_TIMESTAMP);
struct v3dv_query *q = &pool->queries[query];
if (do_wait) {
/* From the Vulkan 1.0 spec:
*
* "If VK_QUERY_RESULT_WAIT_BIT is set, (...) If the query does not
* become available in a finite amount of time (e.g. due to not
* issuing a query since the last reset), a VK_ERROR_DEVICE_LOST
* error may occur."
*/
if (!q->maybe_available)
return vk_error(device->instance, VK_ERROR_DEVICE_LOST);
*available = true;
} else {
*available = q->maybe_available;
}
return q->value;
}
static uint64_t
get_query_result(struct v3dv_device *device,
struct v3dv_query_pool *pool,
uint32_t query,
bool do_wait,
bool *available)
{
switch (pool->query_type) {
case VK_QUERY_TYPE_OCCLUSION:
return get_occlusion_query_result(device, pool, query, do_wait, available);
case VK_QUERY_TYPE_TIMESTAMP:
return get_timestamp_query_result(device, pool, query, do_wait, available);
default:
unreachable("Unsupported query type");
}
}
......@@ -135,30 +228,8 @@ v3dv_get_query_pool_results_cpu(struct v3dv_device *device,
VkResult result = VK_SUCCESS;
for (uint32_t i = first; i < first + count; i++) {
assert(pool->queries[i].bo && pool->queries[i].bo->map);
struct v3dv_bo *bo = pool->queries[i].bo;
const uint32_t *counter = (const uint32_t *) bo->map;
bool available;
if (do_wait) {
/* From the Vulkan 1.0 spec:
*
* "If VK_QUERY_RESULT_WAIT_BIT is set, (...) If the query does not
* become available in a finite amount of time (e.g. due to not
* issuing a query since the last reset), a VK_ERROR_DEVICE_LOST
* error may occur."
*/
if (!pool->queries[i].maybe_available)
return vk_error(device->instance, VK_ERROR_DEVICE_LOST);
if (!v3dv_bo_wait(device, bo, 0xffffffffffffffffull))
return vk_error(device->instance, VK_ERROR_DEVICE_LOST);
available = true;
} else {
available = pool->queries[i].maybe_available &&
v3dv_bo_wait(device, bo, 0);
}
uint64_t value = get_query_result(device, pool, i, do_wait, &available);
/**
* From the Vulkan 1.0 spec:
......@@ -174,7 +245,7 @@ v3dv_get_query_pool_results_cpu(struct v3dv_device *device,
const bool write_result = available || do_partial;
if (write_result)
write_query_result(data, slot, do_64bit, *counter);
write_query_result(data, slot, do_64bit, value);
slot++;
if (flags & VK_QUERY_RESULT_WITH_AVAILABILITY_BIT)
......
......@@ -154,22 +154,37 @@ static VkResult
handle_reset_query_cpu_job(struct v3dv_job *job)
{
/* We are about to reset query counters so we need to make sure that
* The GPU is not using them.
* The GPU is not using them. The exception is timestamp queries, since
* we handle those in the CPU.
*
* FIXME: we could avoid blocking the main thread for this if we use
* submission thread.
*/
VkResult result = gpu_queue_wait_idle(&job->device->queue);
if (result != VK_SUCCESS)
return result;
struct v3dv_reset_query_cpu_job_info *info = &job->cpu.query_reset;
assert(info->pool);
if (info->pool->query_type == VK_QUERY_TYPE_OCCLUSION) {
VkResult result = gpu_queue_wait_idle(&job->device->queue);
if (result != VK_SUCCESS)
return result;
}
for (uint32_t i = info->first; i < info->first + info->count; i++) {
assert(i < info->pool->query_count);
struct v3dv_query *query = &info->pool->queries[i];
query->maybe_available = false;
uint32_t *counter = (uint32_t *) query->bo->map;
*counter = 0;
switch (info->pool->query_type) {
case VK_QUERY_TYPE_OCCLUSION: {
uint32_t *counter = (uint32_t *) query->bo->map;
*counter = 0;
break;
}
case VK_QUERY_TYPE_TIMESTAMP:
query->value = 0;
break;
default:
unreachable("Unsupported query type");
}
}
return VK_SUCCESS;
......@@ -419,6 +434,26 @@ handle_copy_buffer_to_image_cpu_job(struct v3dv_job *job)
return VK_SUCCESS;
}
static VkResult
handle_timestamp_query_cpu_job(struct v3dv_job *job)
{
assert(job->type == V3DV_JOB_TYPE_CPU_TIMESTAMP_QUERY);
struct v3dv_timestamp_query_cpu_job_info *info = &job->cpu.query_timestamp;
/* Wait for completion of all work queued before the timestamp query */
v3dv_QueueWaitIdle(v3dv_queue_to_handle(&job->device->queue));
/* Compute timestamp */
struct timespec t;
clock_gettime(CLOCK_MONOTONIC, &t);
assert(info->query < info->pool->query_count);
struct v3dv_query *query = &info->pool->queries[info->query];
query->maybe_available = true;
query->value = t.tv_sec * 1000000000ull + t.tv_nsec;
return<