Commit dfe18be0 authored by Jason Ekstrand's avatar Jason Ekstrand

anv: Implement vkCmdDispatchBase

This is part of the device groups extension/feature but it's a decent
chunk of work in its own right so it's worth breaking into its own
patch.  The mechanism we use is fairly straightforward: we just push the
base work group id into the shader and add it to the work group id we
get from dispatch.
Reviewed-by: Samuel Iglesias Gonsálvez's avatarSamuel Iglesias Gonsálvez <siglesias@igalia.com>
parent ff9db1a4
......@@ -226,6 +226,7 @@ VULKAN_FILES := \
vulkan/anv_image.c \
vulkan/anv_intel.c \
vulkan/anv_nir.h \
vulkan/anv_nir_add_base_work_group_id.c \
vulkan/anv_nir_apply_pipeline_layout.c \
vulkan/anv_nir_lower_input_attachments.c \
vulkan/anv_nir_lower_multiview.c \
......
......@@ -551,6 +551,9 @@ enum brw_param_builtin {
BRW_PARAM_BUILTIN_TESS_LEVEL_INNER_X,
BRW_PARAM_BUILTIN_TESS_LEVEL_INNER_Y,
BRW_PARAM_BUILTIN_BASE_WORK_GROUP_ID_X,
BRW_PARAM_BUILTIN_BASE_WORK_GROUP_ID_Y,
BRW_PARAM_BUILTIN_BASE_WORK_GROUP_ID_Z,
BRW_PARAM_BUILTIN_SUBGROUP_ID,
};
......
......@@ -688,6 +688,12 @@ anv_push_constant_value(struct anv_push_constants *data, uint32_t param)
switch (param) {
case BRW_PARAM_BUILTIN_ZERO:
return 0;
case BRW_PARAM_BUILTIN_BASE_WORK_GROUP_ID_X:
return data->base_work_group_id[0];
case BRW_PARAM_BUILTIN_BASE_WORK_GROUP_ID_Y:
return data->base_work_group_id[1];
case BRW_PARAM_BUILTIN_BASE_WORK_GROUP_ID_Z:
return data->base_work_group_id[2];
default:
unreachable("Invalid param builtin");
}
......
......@@ -46,6 +46,9 @@ void anv_nir_apply_pipeline_layout(struct anv_pipeline *pipeline,
struct brw_stage_prog_data *prog_data,
struct anv_pipeline_bind_map *map);
bool anv_nir_add_base_work_group_id(nir_shader *shader,
struct brw_cs_prog_data *prog_data);
#ifdef __cplusplus
}
#endif
......
/*
* Copyright © 2017 Intel Corporation
*
* Permission is hereby granted, free of charge, to any person obtaining a
* copy of this software and associated documentation files (the "Software"),
* to deal in the Software without restriction, including without limitation
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
* and/or sell copies of the Software, and to permit persons to whom the
* Software is furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice (including the next
* paragraph) shall be included in all copies or substantial portions of the
* Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
* IN THE SOFTWARE.
*/
#include "anv_nir.h"
#include "nir/nir_builder.h"
#include "compiler/brw_compiler.h"
bool
anv_nir_add_base_work_group_id(nir_shader *shader,
struct brw_cs_prog_data *prog_data)
{
assert(shader->info.stage == MESA_SHADER_COMPUTE);
nir_builder b;
int base_id_offset = -1;
bool progress = false;
nir_foreach_function(function, shader) {
if (!function->impl)
continue;
nir_builder_init(&b, function->impl);
nir_foreach_block(block, function->impl) {
nir_foreach_instr_safe(instr, block) {
if (instr->type != nir_instr_type_intrinsic)
continue;
nir_intrinsic_instr *load_id = nir_instr_as_intrinsic(instr);
if (load_id->intrinsic != nir_intrinsic_load_work_group_id)
continue;
b.cursor = nir_after_instr(&load_id->instr);
if (base_id_offset < 0) {
/* If we don't have a set of BASE_WORK_GROUP_ID params,
* add them.
*/
assert(shader->num_uniforms == prog_data->base.nr_params * 4);
uint32_t *param =
brw_stage_prog_data_add_params(&prog_data->base, 3);
param[0] = BRW_PARAM_BUILTIN_BASE_WORK_GROUP_ID_X;
param[1] = BRW_PARAM_BUILTIN_BASE_WORK_GROUP_ID_Y;
param[2] = BRW_PARAM_BUILTIN_BASE_WORK_GROUP_ID_Z;
base_id_offset = shader->num_uniforms;
shader->num_uniforms += 12;
}
nir_intrinsic_instr *load_base =
nir_intrinsic_instr_create(shader, nir_intrinsic_load_uniform);
load_base->num_components = 3;
load_base->src[0] = nir_src_for_ssa(nir_imm_int(&b, 0));
nir_ssa_dest_init(&load_base->instr, &load_base->dest, 3, 32, NULL);
nir_intrinsic_set_base(load_base, base_id_offset);
nir_intrinsic_set_range(load_base, 3 * sizeof(uint32_t));
nir_builder_instr_insert(&b, &load_base->instr);
nir_ssa_def *id = nir_iadd(&b, &load_id->dest.ssa,
&load_base->dest.ssa);
nir_ssa_def_rewrite_uses_after(&load_id->dest.ssa,
nir_src_for_ssa(id),
id->parent_instr);
progress = true;
}
}
nir_metadata_preserve(function->impl, nir_metadata_block_index |
nir_metadata_dominance);
}
return progress;
}
......@@ -1042,6 +1042,8 @@ anv_pipeline_compile_cs(struct anv_pipeline *pipeline,
return vk_error(VK_ERROR_OUT_OF_HOST_MEMORY);
}
NIR_PASS_V(nir, anv_nir_add_base_work_group_id, &prog_data);
anv_fill_binding_table(&prog_data.base, 1);
const unsigned *shader_code =
......
......@@ -1632,6 +1632,9 @@ struct anv_push_constants {
/* Push constant data provided by the client through vkPushConstants */
uint8_t client_data[MAX_PUSH_CONSTANTS_SIZE];
/* Used for vkCmdDispatchBase */
uint32_t base_work_group_id[3];
/* Image data for image_load_store on pre-SKL */
struct brw_image_param images[MAX_IMAGES];
};
......
......@@ -3032,16 +3032,61 @@ verify_cmd_parser(const struct anv_device *device,
#endif
static void
anv_cmd_buffer_push_base_group_id(struct anv_cmd_buffer *cmd_buffer,
uint32_t baseGroupX,
uint32_t baseGroupY,
uint32_t baseGroupZ)
{
if (anv_batch_has_error(&cmd_buffer->batch))
return;
VkResult result =
anv_cmd_buffer_ensure_push_constant_field(cmd_buffer, MESA_SHADER_COMPUTE,
base_work_group_id);
if (result != VK_SUCCESS) {
cmd_buffer->batch.status = result;
return;
}
struct anv_push_constants *push =
cmd_buffer->state.push_constants[MESA_SHADER_COMPUTE];
if (push->base_work_group_id[0] != baseGroupX ||
push->base_work_group_id[1] != baseGroupY ||
push->base_work_group_id[2] != baseGroupZ) {
push->base_work_group_id[0] = baseGroupX;
push->base_work_group_id[1] = baseGroupY;
push->base_work_group_id[2] = baseGroupZ;
cmd_buffer->state.push_constants_dirty |= VK_SHADER_STAGE_COMPUTE_BIT;
}
}
void genX(CmdDispatch)(
VkCommandBuffer commandBuffer,
uint32_t x,
uint32_t y,
uint32_t z)
{
genX(CmdDispatchBase)(commandBuffer, 0, 0, 0, x, y, z);
}
void genX(CmdDispatchBase)(
VkCommandBuffer commandBuffer,
uint32_t baseGroupX,
uint32_t baseGroupY,
uint32_t baseGroupZ,
uint32_t groupCountX,
uint32_t groupCountY,
uint32_t groupCountZ)
{
ANV_FROM_HANDLE(anv_cmd_buffer, cmd_buffer, commandBuffer);
struct anv_pipeline *pipeline = cmd_buffer->state.compute.base.pipeline;
const struct brw_cs_prog_data *prog_data = get_cs_prog_data(pipeline);
anv_cmd_buffer_push_base_group_id(cmd_buffer, baseGroupX,
baseGroupY, baseGroupZ);
if (anv_batch_has_error(&cmd_buffer->batch))
return;
......@@ -3049,9 +3094,9 @@ void genX(CmdDispatch)(
struct anv_state state =
anv_cmd_buffer_alloc_dynamic_state(cmd_buffer, 12, 4);
uint32_t *sizes = state.map;
sizes[0] = x;
sizes[1] = y;
sizes[2] = z;
sizes[0] = groupCountX;
sizes[1] = groupCountY;
sizes[2] = groupCountZ;
anv_state_flush(cmd_buffer->device, state);
cmd_buffer->state.compute.num_workgroups = (struct anv_address) {
.bo = &cmd_buffer->device->dynamic_state_pool.block_pool.bo,
......@@ -3066,9 +3111,9 @@ void genX(CmdDispatch)(
ggw.ThreadDepthCounterMaximum = 0;
ggw.ThreadHeightCounterMaximum = 0;
ggw.ThreadWidthCounterMaximum = prog_data->threads - 1;
ggw.ThreadGroupIDXDimension = x;
ggw.ThreadGroupIDYDimension = y;
ggw.ThreadGroupIDZDimension = z;
ggw.ThreadGroupIDXDimension = groupCountX;
ggw.ThreadGroupIDYDimension = groupCountY;
ggw.ThreadGroupIDZDimension = groupCountZ;
ggw.RightExecutionMask = pipeline->cs_right_mask;
ggw.BottomExecutionMask = 0xffffffff;
}
......@@ -3093,6 +3138,8 @@ void genX(CmdDispatchIndirect)(
uint32_t bo_offset = buffer->offset + offset;
struct anv_batch *batch = &cmd_buffer->batch;
anv_cmd_buffer_push_base_group_id(cmd_buffer, 0, 0, 0);
#if GEN_GEN == 7
/* Linux 4.4 added command parser version 5 which allows the GPGPU
* indirect dispatch registers to be written.
......
......@@ -131,6 +131,7 @@ libanv_files = files(
'anv_image.c',
'anv_intel.c',
'anv_nir.h',
'anv_nir_add_base_work_group_id.c',
'anv_nir_apply_pipeline_layout.c',
'anv_nir_lower_input_attachments.c',
'anv_nir_lower_multiview.c',
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment