Commit 983af3a6 authored by Dave Airlie's avatar Dave Airlie Committed by Bas Nieuwenhuizen
Browse files

radv: add a compute shader implementation for buffer to image



This implements the reverse of the current buffer->image path
and can be used when we need to do image transfer on compute queues

This just adds the code turned off as we don't support separate
computes queues yet, and we don't want to use this path on the GFX
queues for DCC reasons.
Reviewed-by: Bas Nieuwenhuizen's avatarBas Nieuwenhuizen <bas@basnieuwenhuizen.nl>
parent 35cf08ef
......@@ -166,6 +166,12 @@ void radv_meta_image_to_buffer(struct radv_cmd_buffer *cmd_buffer,
unsigned num_rects,
struct radv_meta_blit2d_rect *rects);
void radv_meta_buffer_to_image_cs(struct radv_cmd_buffer *cmd_buffer,
struct radv_meta_blit2d_buffer *src,
struct radv_meta_blit2d_surf *dst,
unsigned num_rects,
struct radv_meta_blit2d_rect *rects);
void radv_decompress_depth_image_inplace(struct radv_cmd_buffer *cmd_buffer,
struct radv_image *image,
VkImageSubresourceRange *subresourceRange);
......
......@@ -225,10 +225,206 @@ radv_device_finish_meta_itob_state(struct radv_device *device)
}
}
static nir_shader *
build_nir_btoi_compute_shader(struct radv_device *dev)
{
nir_builder b;
const struct glsl_type *buf_type = glsl_sampler_type(GLSL_SAMPLER_DIM_BUF,
false,
false,
GLSL_TYPE_FLOAT);
const struct glsl_type *img_type = glsl_sampler_type(GLSL_SAMPLER_DIM_2D,
false,
false,
GLSL_TYPE_FLOAT);
nir_builder_init_simple_shader(&b, NULL, MESA_SHADER_COMPUTE, NULL);
b.shader->info->name = ralloc_strdup(b.shader, "meta_btoi_cs");
b.shader->info->cs.local_size[0] = 16;
b.shader->info->cs.local_size[1] = 16;
b.shader->info->cs.local_size[2] = 1;
nir_variable *input_img = nir_variable_create(b.shader, nir_var_uniform,
buf_type, "s_tex");
input_img->data.descriptor_set = 0;
input_img->data.binding = 0;
nir_variable *output_img = nir_variable_create(b.shader, nir_var_uniform,
img_type, "out_img");
output_img->data.descriptor_set = 0;
output_img->data.binding = 1;
nir_ssa_def *invoc_id = nir_load_system_value(&b, nir_intrinsic_load_local_invocation_id, 0);
nir_ssa_def *wg_id = nir_load_system_value(&b, nir_intrinsic_load_work_group_id, 0);
nir_ssa_def *block_size = nir_imm_ivec4(&b,
b.shader->info->cs.local_size[0],
b.shader->info->cs.local_size[1],
b.shader->info->cs.local_size[2], 0);
nir_ssa_def *global_id = nir_iadd(&b, nir_imul(&b, wg_id, block_size), invoc_id);
nir_intrinsic_instr *offset = nir_intrinsic_instr_create(b.shader, nir_intrinsic_load_push_constant);
offset->src[0] = nir_src_for_ssa(nir_imm_int(&b, 0));
offset->num_components = 2;
nir_ssa_dest_init(&offset->instr, &offset->dest, 2, 32, "offset");
nir_builder_instr_insert(&b, &offset->instr);
nir_intrinsic_instr *stride = nir_intrinsic_instr_create(b.shader, nir_intrinsic_load_push_constant);
stride->src[0] = nir_src_for_ssa(nir_imm_int(&b, 8));
stride->num_components = 1;
nir_ssa_dest_init(&stride->instr, &stride->dest, 1, 32, "stride");
nir_builder_instr_insert(&b, &stride->instr);
nir_ssa_def *pos_x = nir_channel(&b, global_id, 0);
nir_ssa_def *pos_y = nir_channel(&b, global_id, 1);
nir_ssa_def *tmp = nir_imul(&b, pos_y, &stride->dest.ssa);
tmp = nir_iadd(&b, tmp, pos_x);
nir_ssa_def *buf_coord = nir_vec4(&b, tmp, tmp, tmp, tmp);
nir_ssa_def *img_coord = nir_iadd(&b, global_id, &offset->dest.ssa);
nir_tex_instr *tex = nir_tex_instr_create(b.shader, 2);
tex->sampler_dim = GLSL_SAMPLER_DIM_BUF;
tex->op = nir_texop_txf;
tex->src[0].src_type = nir_tex_src_coord;
tex->src[0].src = nir_src_for_ssa(buf_coord);
tex->src[1].src_type = nir_tex_src_lod;
tex->src[1].src = nir_src_for_ssa(nir_imm_int(&b, 0));
tex->dest_type = nir_type_float;
tex->is_array = false;
tex->coord_components = 1;
tex->texture = nir_deref_var_create(tex, input_img);
tex->sampler = NULL;
nir_ssa_dest_init(&tex->instr, &tex->dest, 4, 32, "tex");
nir_builder_instr_insert(&b, &tex->instr);
nir_ssa_def *outval = &tex->dest.ssa;
nir_intrinsic_instr *store = nir_intrinsic_instr_create(b.shader, nir_intrinsic_image_store);
store->src[0] = nir_src_for_ssa(img_coord);
store->src[1] = nir_src_for_ssa(nir_ssa_undef(&b, 1, 32));
store->src[2] = nir_src_for_ssa(outval);
store->variables[0] = nir_deref_var_create(store, output_img);
nir_builder_instr_insert(&b, &store->instr);
return b.shader;
}
/* Buffer to image - don't write use image accessors */
static VkResult
radv_device_init_meta_btoi_state(struct radv_device *device)
{
VkResult result;
struct radv_shader_module cs = { .nir = NULL };
zero(device->meta_state.btoi);
cs.nir = build_nir_btoi_compute_shader(device);
/*
* two descriptors one for the image being sampled
* one for the buffer being written.
*/
VkDescriptorSetLayoutCreateInfo ds_create_info = {
.sType = VK_STRUCTURE_TYPE_DESCRIPTOR_SET_LAYOUT_CREATE_INFO,
.bindingCount = 2,
.pBindings = (VkDescriptorSetLayoutBinding[]) {
{
.binding = 0,
.descriptorType = VK_DESCRIPTOR_TYPE_STORAGE_TEXEL_BUFFER,
.descriptorCount = 1,
.stageFlags = VK_SHADER_STAGE_COMPUTE_BIT,
.pImmutableSamplers = NULL
},
{
.binding = 1,
.descriptorType = VK_DESCRIPTOR_TYPE_STORAGE_IMAGE,
.descriptorCount = 1,
.stageFlags = VK_SHADER_STAGE_COMPUTE_BIT,
.pImmutableSamplers = NULL
},
}
};
result = radv_CreateDescriptorSetLayout(radv_device_to_handle(device),
&ds_create_info,
&device->meta_state.alloc,
&device->meta_state.btoi.img_ds_layout);
if (result != VK_SUCCESS)
goto fail;
VkPipelineLayoutCreateInfo pl_create_info = {
.sType = VK_STRUCTURE_TYPE_PIPELINE_LAYOUT_CREATE_INFO,
.setLayoutCount = 1,
.pSetLayouts = &device->meta_state.btoi.img_ds_layout,
.pushConstantRangeCount = 1,
.pPushConstantRanges = &(VkPushConstantRange){VK_SHADER_STAGE_COMPUTE_BIT, 0, 12},
};
result = radv_CreatePipelineLayout(radv_device_to_handle(device),
&pl_create_info,
&device->meta_state.alloc,
&device->meta_state.btoi.img_p_layout);
if (result != VK_SUCCESS)
goto fail;
/* compute shader */
VkPipelineShaderStageCreateInfo pipeline_shader_stage = {
.sType = VK_STRUCTURE_TYPE_PIPELINE_SHADER_STAGE_CREATE_INFO,
.stage = VK_SHADER_STAGE_COMPUTE_BIT,
.module = radv_shader_module_to_handle(&cs),
.pName = "main",
.pSpecializationInfo = NULL,
};
VkComputePipelineCreateInfo vk_pipeline_info = {
.sType = VK_STRUCTURE_TYPE_COMPUTE_PIPELINE_CREATE_INFO,
.stage = pipeline_shader_stage,
.flags = 0,
.layout = device->meta_state.btoi.img_p_layout,
};
result = radv_CreateComputePipelines(radv_device_to_handle(device),
radv_pipeline_cache_to_handle(&device->meta_state.cache),
1, &vk_pipeline_info, NULL,
&device->meta_state.btoi.pipeline);
if (result != VK_SUCCESS)
goto fail;
ralloc_free(cs.nir);
return VK_SUCCESS;
fail:
ralloc_free(cs.nir);
return result;
}
static void
radv_device_finish_meta_btoi_state(struct radv_device *device)
{
if (device->meta_state.btoi.img_p_layout) {
radv_DestroyPipelineLayout(radv_device_to_handle(device),
device->meta_state.btoi.img_p_layout,
&device->meta_state.alloc);
}
if (device->meta_state.btoi.img_ds_layout) {
radv_DestroyDescriptorSetLayout(radv_device_to_handle(device),
device->meta_state.btoi.img_ds_layout,
&device->meta_state.alloc);
}
if (device->meta_state.btoi.pipeline) {
radv_DestroyPipeline(radv_device_to_handle(device),
device->meta_state.btoi.pipeline,
&device->meta_state.alloc);
}
}
void
radv_device_finish_meta_bufimage_state(struct radv_device *device)
{
radv_device_finish_meta_itob_state(device);
radv_device_finish_meta_btoi_state(device);
}
VkResult
......@@ -239,6 +435,12 @@ radv_device_init_meta_bufimage_state(struct radv_device *device)
result = radv_device_init_meta_itob_state(device);
if (result != VK_SUCCESS)
return result;
result = radv_device_init_meta_btoi_state(device);
if (result != VK_SUCCESS) {
radv_device_finish_meta_itob_state(device);
return result;
}
return VK_SUCCESS;
}
......@@ -351,7 +553,7 @@ itob_bind_descriptors(struct radv_cmd_buffer *cmd_buffer,
}
static void
bind_pipeline(struct radv_cmd_buffer *cmd_buffer)
itob_bind_pipeline(struct radv_cmd_buffer *cmd_buffer)
{
VkPipeline pipeline =
cmd_buffer->device->meta_state.itob.pipeline;
......@@ -376,7 +578,7 @@ radv_meta_image_to_buffer(struct radv_cmd_buffer *cmd_buffer,
create_bview(cmd_buffer, dst->buffer, dst->offset, dst->format, &temps.dst_bview);
itob_bind_descriptors(cmd_buffer, &temps);
bind_pipeline(cmd_buffer);
itob_bind_pipeline(cmd_buffer);
for (unsigned r = 0; r < num_rects; ++r) {
unsigned push_constants[3] = {
......@@ -393,3 +595,99 @@ radv_meta_image_to_buffer(struct radv_cmd_buffer *cmd_buffer,
}
radv_temp_descriptor_set_destroy(cmd_buffer->device, temps.set);
}
struct btoi_temps {
struct radv_buffer_view src_bview;
struct radv_image_view dst_iview;
VkDescriptorSet set;
};
static void
btoi_bind_descriptors(struct radv_cmd_buffer *cmd_buffer,
struct btoi_temps *tmp)
{
struct radv_device *device = cmd_buffer->device;
VkDevice vk_device = radv_device_to_handle(cmd_buffer->device);
radv_temp_descriptor_set_create(device, cmd_buffer,
device->meta_state.btoi.img_ds_layout,
&tmp->set);
radv_UpdateDescriptorSets(vk_device,
2, /* writeCount */
(VkWriteDescriptorSet[]) {
{
.sType = VK_STRUCTURE_TYPE_WRITE_DESCRIPTOR_SET,
.dstSet = tmp->set,
.dstBinding = 0,
.dstArrayElement = 0,
.descriptorCount = 1,
.descriptorType = VK_DESCRIPTOR_TYPE_STORAGE_TEXEL_BUFFER,
.pTexelBufferView = (VkBufferView[]) { radv_buffer_view_to_handle(&tmp->src_bview) },
},
{
.sType = VK_STRUCTURE_TYPE_WRITE_DESCRIPTOR_SET,
.dstSet = tmp->set,
.dstBinding = 1,
.dstArrayElement = 0,
.descriptorCount = 1,
.descriptorType = VK_DESCRIPTOR_TYPE_STORAGE_IMAGE,
.pImageInfo = (VkDescriptorImageInfo[]) {
{
.sampler = NULL,
.imageView = radv_image_view_to_handle(&tmp->dst_iview),
.imageLayout = VK_IMAGE_LAYOUT_GENERAL,
},
}
}
}, 0, NULL);
radv_CmdBindDescriptorSets(radv_cmd_buffer_to_handle(cmd_buffer),
VK_PIPELINE_BIND_POINT_COMPUTE,
device->meta_state.btoi.img_p_layout, 0, 1,
&tmp->set, 0, NULL);
}
static void
btoi_bind_pipeline(struct radv_cmd_buffer *cmd_buffer)
{
VkPipeline pipeline =
cmd_buffer->device->meta_state.btoi.pipeline;
if (cmd_buffer->state.compute_pipeline != radv_pipeline_from_handle(pipeline)) {
radv_CmdBindPipeline(radv_cmd_buffer_to_handle(cmd_buffer),
VK_PIPELINE_BIND_POINT_COMPUTE, pipeline);
}
}
void
radv_meta_buffer_to_image_cs(struct radv_cmd_buffer *cmd_buffer,
struct radv_meta_blit2d_buffer *src,
struct radv_meta_blit2d_surf *dst,
unsigned num_rects,
struct radv_meta_blit2d_rect *rects)
{
struct radv_device *device = cmd_buffer->device;
struct btoi_temps temps;
create_bview(cmd_buffer, src->buffer, src->offset, src->format, &temps.src_bview);
create_iview(cmd_buffer, dst, VK_IMAGE_USAGE_STORAGE_BIT, &temps.dst_iview);
btoi_bind_descriptors(cmd_buffer, &temps);
btoi_bind_pipeline(cmd_buffer);
for (unsigned r = 0; r < num_rects; ++r) {
unsigned push_constants[3] = {
rects[r].dst_x,
rects[r].dst_y,
src->pitch
};
radv_CmdPushConstants(radv_cmd_buffer_to_handle(cmd_buffer),
device->meta_state.btoi.img_p_layout,
VK_SHADER_STAGE_COMPUTE_BIT, 0, 12,
push_constants);
radv_unaligned_dispatch(cmd_buffer, rects[r].width, rects[r].height, 1);
}
radv_temp_descriptor_set_destroy(cmd_buffer->device, temps.set);
}
......@@ -100,6 +100,11 @@ blit_surf_for_image_level_layer(struct radv_image *image,
};
}
union meta_saved_state {
struct radv_meta_saved_state gfx;
struct radv_meta_saved_compute_state compute;
};
static void
meta_copy_buffer_to_image(struct radv_cmd_buffer *cmd_buffer,
struct radv_buffer* buffer,
......@@ -107,14 +112,18 @@ meta_copy_buffer_to_image(struct radv_cmd_buffer *cmd_buffer,
uint32_t regionCount,
const VkBufferImageCopy* pRegions)
{
struct radv_meta_saved_state saved_state;
bool cs = cmd_buffer->queue_family_index == RADV_QUEUE_COMPUTE;
union meta_saved_state saved_state;
/* The Vulkan 1.0 spec says "dstImage must have a sample count equal to
* VK_SAMPLE_COUNT_1_BIT."
*/
assert(image->samples == 1);
radv_meta_save_graphics_reset_vport_scissor(&saved_state, cmd_buffer);
if (cs)
radv_meta_begin_bufimage(cmd_buffer, &saved_state.compute);
else
radv_meta_save_graphics_reset_vport_scissor(&saved_state.gfx, cmd_buffer);
for (unsigned r = 0; r < regionCount; r++) {
......@@ -172,7 +181,10 @@ meta_copy_buffer_to_image(struct radv_cmd_buffer *cmd_buffer,
/* Perform Blit */
radv_meta_blit2d(cmd_buffer, NULL, &buf_bsurf, &img_bsurf, 1, &rect);
if (cs)
radv_meta_buffer_to_image_cs(cmd_buffer, &buf_bsurf, &img_bsurf, 1, &rect);
else
radv_meta_blit2d(cmd_buffer, NULL, &buf_bsurf, &img_bsurf, 1, &rect);
/* Once we've done the blit, all of the actual information about
* the image is embedded in the command buffer so we can just
......@@ -188,7 +200,10 @@ meta_copy_buffer_to_image(struct radv_cmd_buffer *cmd_buffer,
slice_array++;
}
}
radv_meta_restore(&saved_state, cmd_buffer);
if (cs)
radv_meta_end_bufimage(cmd_buffer, &saved_state.compute);
else
radv_meta_restore(&saved_state.gfx, cmd_buffer);
}
void radv_CmdCopyBufferToImage(
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment