Commit c520f4de authored by Jason Ekstrand's avatar Jason Ekstrand
Browse files

anv: Add a concept of a descriptor buffer



This buffer goes along side the CPU data structure and may contain
pointers, bindless handles, or any other descriptor information.
Currently, all descriptors are size zero and nothing goes in the buffer
but this commit sets up the framework we will need later.
Reviewed-by: Lionel Landwerlin's avatarLionel Landwerlin <lionel.g.landwerlin@intel.com>
parent 5c30fffe
......@@ -1001,6 +1001,37 @@ anv_cmd_buffer_push_descriptor_set(struct anv_cmd_buffer *cmd_buffer,
set->buffer_view_count = layout->buffer_view_count;
set->buffer_views = (*push_set)->buffer_views;
if (layout->descriptor_buffer_size &&
((*push_set)->set_used_on_gpu ||
set->desc_mem.alloc_size < layout->descriptor_buffer_size)) {
/* The previous buffer is either actively used by some GPU command (so
* we can't modify it) or is too small. Allocate a new one.
*/
struct anv_state desc_mem =
anv_state_stream_alloc(&cmd_buffer->dynamic_state_stream,
layout->descriptor_buffer_size, 32);
if (set->desc_mem.alloc_size) {
/* TODO: Do we really need to copy all the time? */
memcpy(desc_mem.map, set->desc_mem.map,
MIN2(desc_mem.alloc_size, set->desc_mem.alloc_size));
}
set->desc_mem = desc_mem;
struct anv_address addr = {
.bo = cmd_buffer->dynamic_state_stream.state_pool->block_pool.bo,
.offset = set->desc_mem.offset,
};
const struct isl_device *isl_dev = &cmd_buffer->device->isl_dev;
set->desc_surface_state =
anv_state_stream_alloc(&cmd_buffer->surface_state_stream,
isl_dev->ss.size, isl_dev->ss.align);
anv_fill_buffer_surface_state(cmd_buffer->device,
set->desc_surface_state,
ISL_FORMAT_R32G32B32A32_FLOAT,
addr, layout->descriptor_buffer_size, 1);
}
return set;
}
......
......@@ -82,6 +82,33 @@ anv_descriptor_data_for_type(const struct anv_physical_device *device,
return data;
}
static unsigned
anv_descriptor_data_size(enum anv_descriptor_data data)
{
return 0;
}
/** Returns the size in bytes of each descriptor with the given layout */
unsigned
anv_descriptor_size(const struct anv_descriptor_set_binding_layout *layout)
{
return anv_descriptor_data_size(layout->data);
}
/** Returns the size in bytes of each descriptor of the given type
*
* This version of the function does not have access to the entire layout so
* it may only work on certain descriptor types where the descriptor size is
* entirely determined by the descriptor type. Whenever possible, code should
* use anv_descriptor_size() instead.
*/
unsigned
anv_descriptor_type_size(const struct anv_physical_device *pdevice,
VkDescriptorType type)
{
return anv_descriptor_data_size(anv_descriptor_data_for_type(pdevice, type));
}
void anv_GetDescriptorSetLayoutSupport(
VkDevice device,
const VkDescriptorSetLayoutCreateInfo* pCreateInfo,
......@@ -198,6 +225,7 @@ VkResult anv_CreateDescriptorSetLayout(
uint32_t buffer_view_count = 0;
uint32_t dynamic_offset_count = 0;
uint32_t descriptor_buffer_size = 0;
for (uint32_t j = 0; j < pCreateInfo->bindingCount; j++) {
const VkDescriptorSetLayoutBinding *binding = &pCreateInfo->pBindings[j];
......@@ -267,11 +295,16 @@ VkResult anv_CreateDescriptorSetLayout(
break;
}
set_layout->binding[b].descriptor_offset = descriptor_buffer_size;
descriptor_buffer_size += anv_descriptor_size(&set_layout->binding[b]) *
binding->descriptorCount;
set_layout->shader_stages |= binding->stageFlags;
}
set_layout->buffer_view_count = buffer_view_count;
set_layout->dynamic_offset_count = dynamic_offset_count;
set_layout->descriptor_buffer_size = descriptor_buffer_size;
*pSetLayout = anv_descriptor_set_layout_to_handle(set_layout);
......@@ -315,6 +348,7 @@ sha1_update_descriptor_set_binding_layout(struct mesa_sha1 *ctx,
SHA1_UPDATE_VALUE(ctx, layout->descriptor_index);
SHA1_UPDATE_VALUE(ctx, layout->dynamic_offset_index);
SHA1_UPDATE_VALUE(ctx, layout->buffer_view_index);
SHA1_UPDATE_VALUE(ctx, layout->descriptor_offset);
if (layout->immutable_samplers) {
for (uint16_t i = 0; i < layout->array_size; i++)
......@@ -331,6 +365,7 @@ sha1_update_descriptor_set_layout(struct mesa_sha1 *ctx,
SHA1_UPDATE_VALUE(ctx, layout->shader_stages);
SHA1_UPDATE_VALUE(ctx, layout->buffer_view_count);
SHA1_UPDATE_VALUE(ctx, layout->dynamic_offset_count);
SHA1_UPDATE_VALUE(ctx, layout->descriptor_buffer_size);
for (uint16_t i = 0; i < layout->binding_count; i++)
sha1_update_descriptor_set_binding_layout(ctx, &layout->binding[i]);
......@@ -420,6 +455,12 @@ void anv_DestroyPipelineLayout(
* and the free lists lets us recycle blocks for case 2).
*/
/* The vma heap reserves 0 to mean NULL; we have to offset by some ammount to
* ensure we can allocate the entire BO without hitting zero. The actual
* amount doesn't matter.
*/
#define POOL_HEAP_OFFSET 64
#define EMPTY 1
VkResult anv_CreateDescriptorPool(
......@@ -433,6 +474,7 @@ VkResult anv_CreateDescriptorPool(
uint32_t descriptor_count = 0;
uint32_t buffer_view_count = 0;
uint32_t descriptor_bo_size = 0;
for (uint32_t i = 0; i < pCreateInfo->poolSizeCount; i++) {
enum anv_descriptor_data desc_data =
anv_descriptor_data_for_type(&device->instance->physicalDevice,
......@@ -441,8 +483,22 @@ VkResult anv_CreateDescriptorPool(
if (desc_data & ANV_DESCRIPTOR_BUFFER_VIEW)
buffer_view_count += pCreateInfo->pPoolSizes[i].descriptorCount;
unsigned desc_data_size = anv_descriptor_data_size(desc_data) *
pCreateInfo->pPoolSizes[i].descriptorCount;
descriptor_bo_size += desc_data_size;
descriptor_count += pCreateInfo->pPoolSizes[i].descriptorCount;
}
/* We have to align descriptor buffer allocations to 32B so that we can
* push descriptor buffers. This means that each descriptor buffer
* allocated may burn up to 32B of extra space to get the right alignment.
* (Technically, it's at most 28B because we're always going to start at
* least 4B aligned but we're being conservative here.) Allocate enough
* extra space that we can chop it into maxSets pieces and align each one
* of them to 32B.
*/
descriptor_bo_size += 32 * pCreateInfo->maxSets;
descriptor_bo_size = ALIGN(descriptor_bo_size, 4096);
const size_t pool_size =
pCreateInfo->maxSets * sizeof(struct anv_descriptor_set) +
......@@ -459,6 +515,33 @@ VkResult anv_CreateDescriptorPool(
pool->next = 0;
pool->free_list = EMPTY;
if (descriptor_bo_size > 0) {
VkResult result = anv_bo_init_new(&pool->bo, device, descriptor_bo_size);
if (result != VK_SUCCESS) {
vk_free2(&device->alloc, pAllocator, pool);
return result;
}
anv_gem_set_caching(device, pool->bo.gem_handle, I915_CACHING_CACHED);
pool->bo.map = anv_gem_mmap(device, pool->bo.gem_handle, 0,
descriptor_bo_size, 0);
if (pool->bo.map == NULL) {
anv_gem_close(device, pool->bo.gem_handle);
vk_free2(&device->alloc, pAllocator, pool);
return vk_error(VK_ERROR_OUT_OF_HOST_MEMORY);
}
if (device->instance->physicalDevice.use_softpin) {
pool->bo.flags |= EXEC_OBJECT_PINNED;
anv_vma_alloc(device, &pool->bo);
}
util_vma_heap_init(&pool->bo_heap, POOL_HEAP_OFFSET, descriptor_bo_size);
} else {
pool->bo.size = 0;
}
anv_state_stream_init(&pool->surface_state_stream,
&device->surface_state_pool, 4096);
pool->surface_state_free_list = NULL;
......@@ -479,6 +562,11 @@ void anv_DestroyDescriptorPool(
if (!pool)
return;
if (pool->bo.size) {
anv_gem_munmap(pool->bo.map, pool->bo.size);
anv_vma_free(device, &pool->bo);
anv_gem_close(device, pool->bo.gem_handle);
}
anv_state_stream_finish(&pool->surface_state_stream);
vk_free2(&device->alloc, pAllocator, pool);
}
......@@ -493,6 +581,12 @@ VkResult anv_ResetDescriptorPool(
pool->next = 0;
pool->free_list = EMPTY;
if (pool->bo.size) {
util_vma_heap_finish(&pool->bo_heap);
util_vma_heap_init(&pool->bo_heap, POOL_HEAP_OFFSET, pool->bo.size);
}
anv_state_stream_finish(&pool->surface_state_stream);
anv_state_stream_init(&pool->surface_state_stream,
&device->surface_state_pool, 4096);
......@@ -606,6 +700,37 @@ anv_descriptor_set_create(struct anv_device *device,
if (result != VK_SUCCESS)
return result;
if (layout->descriptor_buffer_size) {
/* Align the size to 32 so that alignment gaps don't cause extra holes
* in the heap which can lead to bad performance.
*/
uint64_t pool_vma_offset =
util_vma_heap_alloc(&pool->bo_heap,
ALIGN(layout->descriptor_buffer_size, 32), 32);
if (pool_vma_offset == 0) {
anv_descriptor_pool_free_set(pool, set);
return vk_error(VK_ERROR_FRAGMENTED_POOL);
}
assert(pool_vma_offset >= POOL_HEAP_OFFSET &&
pool_vma_offset - POOL_HEAP_OFFSET <= INT32_MAX);
set->desc_mem.offset = pool_vma_offset - POOL_HEAP_OFFSET;
set->desc_mem.alloc_size = layout->descriptor_buffer_size;
set->desc_mem.map = pool->bo.map + set->desc_mem.offset;
set->desc_surface_state = anv_descriptor_pool_alloc_state(pool);
anv_fill_buffer_surface_state(device, set->desc_surface_state,
ISL_FORMAT_R32G32B32A32_FLOAT,
(struct anv_address) {
.bo = &pool->bo,
.offset = set->desc_mem.offset,
},
layout->descriptor_buffer_size, 1);
} else {
set->desc_mem = ANV_STATE_NULL;
set->desc_surface_state = ANV_STATE_NULL;
}
set->pool = pool;
set->layout = layout;
anv_descriptor_set_layout_ref(layout);
......@@ -656,6 +781,13 @@ anv_descriptor_set_destroy(struct anv_device *device,
{
anv_descriptor_set_layout_unref(device, set->layout);
if (set->desc_mem.alloc_size) {
util_vma_heap_free(&pool->bo_heap,
(uint64_t)set->desc_mem.offset + POOL_HEAP_OFFSET,
set->desc_mem.alloc_size);
anv_descriptor_pool_free_state(pool, set->desc_surface_state);
}
for (uint32_t b = 0; b < set->buffer_view_count; b++)
anv_descriptor_pool_free_state(pool, set->buffer_views[b].surface_state);
......@@ -925,6 +1057,16 @@ void anv_UpdateDescriptorSets(
for (uint32_t j = 0; j < copy->descriptorCount; j++)
dst_desc[j] = src_desc[j];
unsigned desc_size = anv_descriptor_size(src_layout);
if (desc_size > 0) {
assert(desc_size == anv_descriptor_size(dst_layout));
memcpy(dst->desc_mem.map + dst_layout->descriptor_offset +
copy->dstArrayElement * desc_size,
src->desc_mem.map + src_layout->descriptor_offset +
copy->srcArrayElement * desc_size,
copy->descriptorCount * desc_size);
}
}
}
......
......@@ -27,6 +27,8 @@
#include "compiler/brw_nir.h"
struct apply_pipeline_layout_state {
const struct anv_physical_device *pdevice;
nir_shader *shader;
nir_builder builder;
......@@ -38,6 +40,9 @@ struct apply_pipeline_layout_state {
bool uses_constants;
uint8_t constants_offset;
struct {
bool desc_buffer_used;
uint8_t desc_offset;
BITSET_WORD *used;
uint8_t *surface_offsets;
uint8_t *sampler_offsets;
......@@ -49,7 +54,17 @@ static void
add_binding(struct apply_pipeline_layout_state *state,
uint32_t set, uint32_t binding)
{
const struct anv_descriptor_set_binding_layout *bind_layout =
&state->layout->set[set].layout->binding[binding];
BITSET_SET(state->set[set].used, binding);
/* Only flag the descriptor buffer as used if there's actually data for
* this binding. This lets us be lazy and call this function constantly
* without worrying about unnecessarily enabling the buffer.
*/
if (anv_descriptor_size(bind_layout))
state->set[set].desc_buffer_used = true;
}
static void
......@@ -440,6 +455,7 @@ anv_nir_apply_pipeline_layout(const struct anv_physical_device *pdevice,
struct anv_pipeline_bind_map *map)
{
struct apply_pipeline_layout_state state = {
.pdevice = pdevice,
.shader = shader,
.layout = layout,
.add_bounds_checks = robust_buffer_access,
......@@ -464,6 +480,18 @@ anv_nir_apply_pipeline_layout(const struct anv_physical_device *pdevice,
get_used_bindings_block(block, &state);
}
for (unsigned s = 0; s < layout->num_sets; s++) {
if (state.set[s].desc_buffer_used) {
map->surface_to_descriptor[map->surface_count] =
(struct anv_pipeline_binding) {
.set = ANV_DESCRIPTOR_SET_DESCRIPTORS,
.binding = s,
};
state.set[s].desc_offset = map->surface_count;
map->surface_count++;
}
}
if (state.uses_constants) {
state.constants_offset = map->surface_count;
map->surface_to_descriptor[map->surface_count].set =
......
......@@ -1530,10 +1530,18 @@ struct anv_descriptor_set_binding_layout {
/* Index into the descriptor set buffer views */
int16_t buffer_view_index;
/* Offset into the descriptor buffer where this descriptor lives */
uint32_t descriptor_offset;
/* Immutable samplers (or NULL if no immutable samplers) */
struct anv_sampler **immutable_samplers;
};
unsigned anv_descriptor_size(const struct anv_descriptor_set_binding_layout *layout);
unsigned anv_descriptor_type_size(const struct anv_physical_device *pdevice,
VkDescriptorType type);
struct anv_descriptor_set_layout {
/* Descriptor set layouts can be destroyed at almost any time */
uint32_t ref_cnt;
......@@ -1553,6 +1561,9 @@ struct anv_descriptor_set_layout {
/* Number of dynamic offsets used by this descriptor set */
uint16_t dynamic_offset_count;
/* Size of the descriptor buffer for this descriptor set */
uint32_t descriptor_buffer_size;
/* Bindings in this descriptor set */
struct anv_descriptor_set_binding_layout binding[0];
};
......@@ -1594,8 +1605,15 @@ struct anv_descriptor {
};
struct anv_descriptor_set {
struct anv_descriptor_pool *pool;
struct anv_descriptor_set_layout *layout;
uint32_t size;
/* State relative to anv_descriptor_pool::bo */
struct anv_state desc_mem;
/* Surface state for the descriptor buffer */
struct anv_state desc_surface_state;
uint32_t buffer_view_count;
struct anv_buffer_view *buffer_views;
struct anv_descriptor descriptors[0];
......@@ -1620,6 +1638,12 @@ struct anv_push_descriptor_set {
/* Put this field right behind anv_descriptor_set so it fills up the
* descriptors[0] field. */
struct anv_descriptor descriptors[MAX_PUSH_DESCRIPTORS];
/** True if the descriptor set buffer has been referenced by a draw or
* dispatch command.
*/
bool set_used_on_gpu;
struct anv_buffer_view buffer_views[MAX_PUSH_DESCRIPTORS];
};
......@@ -1628,6 +1652,9 @@ struct anv_descriptor_pool {
uint32_t next;
uint32_t free_list;
struct anv_bo bo;
struct util_vma_heap bo_heap;
struct anv_state_stream surface_state_stream;
void *surface_state_free_list;
......@@ -1724,6 +1751,7 @@ anv_descriptor_set_destroy(struct anv_device *device,
struct anv_descriptor_pool *pool,
struct anv_descriptor_set *set);
#define ANV_DESCRIPTOR_SET_DESCRIPTORS (UINT8_MAX - 3)
#define ANV_DESCRIPTOR_SET_NUM_WORK_GROUPS (UINT8_MAX - 2)
#define ANV_DESCRIPTOR_SET_SHADER_CONSTANTS (UINT8_MAX - 1)
#define ANV_DESCRIPTOR_SET_COLOR_ATTACHMENTS UINT8_MAX
......
......@@ -2029,6 +2029,31 @@ dynamic_offset_for_binding(const struct anv_cmd_pipeline_state *pipe_state,
return pipe_state->dynamic_offsets[dynamic_offset_idx];
}
static struct anv_address
anv_descriptor_set_address(struct anv_cmd_buffer *cmd_buffer,
struct anv_descriptor_set *set)
{
if (set->pool) {
/* This is a normal descriptor set */
return (struct anv_address) {
.bo = &set->pool->bo,
.offset = set->desc_mem.offset,
};
} else {
/* This is a push descriptor set. We have to flag it as used on the GPU
* so that the next time we push descriptors, we grab a new memory.
*/
struct anv_push_descriptor_set *push_set =
(struct anv_push_descriptor_set *)set;
push_set->set_used_on_gpu = true;
return (struct anv_address) {
.bo = cmd_buffer->dynamic_state_stream.state_pool->block_pool.bo,
.offset = set->desc_mem.offset,
};
}
}
static VkResult
emit_binding_table(struct anv_cmd_buffer *cmd_buffer,
gl_shader_stage stage,
......@@ -2149,6 +2174,18 @@ emit_binding_table(struct anv_cmd_buffer *cmd_buffer,
add_surface_reloc(cmd_buffer, surface_state,
cmd_buffer->state.compute.num_workgroups);
continue;
} else if (binding->set == ANV_DESCRIPTOR_SET_DESCRIPTORS) {
/* This is a descriptor set buffer so the set index is actually
* given by binding->binding. (Yes, that's confusing.)
*/
struct anv_descriptor_set *set =
pipe_state->descriptors[binding->binding];
assert(set->desc_mem.alloc_size);
assert(set->desc_surface_state.alloc_size);
bt_map[s] = set->desc_surface_state.offset + state_offset;
add_surface_reloc(cmd_buffer, set->desc_surface_state,
anv_descriptor_set_address(cmd_buffer, set));
continue;
}
const struct anv_descriptor *desc =
......@@ -2518,6 +2555,21 @@ cmd_buffer_flush_push_constants(struct anv_cmd_buffer *cmd_buffer,
DIV_ROUND_UP(constant_data_size, 32) - range->start);
read_addr = anv_address_add(constant_data,
range->start * 32);
} else if (binding->set == ANV_DESCRIPTOR_SET_DESCRIPTORS) {
/* This is a descriptor set buffer so the set index is
* actually given by binding->binding. (Yes, that's
* confusing.)
*/
struct anv_descriptor_set *set =
gfx_state->base.descriptors[binding->binding];
struct anv_address desc_buffer_addr =
anv_descriptor_set_address(cmd_buffer, set);
const unsigned desc_buffer_size = set->desc_mem.alloc_size;
read_len = MIN2(range->length,
DIV_ROUND_UP(desc_buffer_size, 32) - range->start);
read_addr = anv_address_add(desc_buffer_addr,
range->start * 32);
} else {
const struct anv_descriptor *desc =
anv_descriptor_for_binding(&gfx_state->base, binding);
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment