Commit b4eb0290 authored by Samuel Pitoiset's avatar Samuel Pitoiset

radv: implement VK_EXT_transform_feedback

This implementation should work and potential bugs can be
fixed during the release candidates window anyway.
Signed-off-by: Samuel Pitoiset's avatarSamuel Pitoiset <samuel.pitoiset@gmail.com>
Reviewed-by: 's avatarDave Airlie <airlied@redhat.com>
parent f8d03372
......@@ -119,6 +119,7 @@
#define STRMOUT_OFFSET_FROM_VGT_FILLED_SIZE 1
#define STRMOUT_OFFSET_FROM_MEM 2
#define STRMOUT_OFFSET_NONE 3
#define STRMOUT_DATA_TYPE(x) (((unsigned)(x) & 0x1) << 7)
#define STRMOUT_SELECT_BUFFER(x) (((unsigned)(x) & 0x3) << 8)
#define PKT3_DRAW_INDEX_OFFSET_2 0x35
#define PKT3_WRITE_DATA 0x37
......
This diff is collapsed.
......@@ -840,6 +840,13 @@ void radv_GetPhysicalDeviceFeatures2(
features->vertexAttributeInstanceRateZeroDivisor = VK_TRUE;
break;
}
case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_TRANSFORM_FEEDBACK_FEATURES_EXT: {
VkPhysicalDeviceTransformFeedbackFeaturesEXT *features =
(VkPhysicalDeviceTransformFeedbackFeaturesEXT*)ext;
features->transformFeedback = true;
features->geometryStreams = true;
break;
}
default:
break;
}
......@@ -1213,6 +1220,21 @@ void radv_GetPhysicalDeviceProperties2(
};
break;
}
case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_TRANSFORM_FEEDBACK_PROPERTIES_EXT: {
VkPhysicalDeviceTransformFeedbackPropertiesEXT *properties =
(VkPhysicalDeviceTransformFeedbackPropertiesEXT *)ext;
properties->maxTransformFeedbackStreams = MAX_SO_STREAMS;
properties->maxTransformFeedbackBuffers = MAX_SO_BUFFERS;
properties->maxTransformFeedbackBufferSize = UINT32_MAX;
properties->maxTransformFeedbackStreamDataSize = 512;
properties->maxTransformFeedbackBufferDataSize = UINT32_MAX;
properties->maxTransformFeedbackBufferDataStride = 512;
properties->transformFeedbackQueries = true;
properties->transformFeedbackStreamsLinesTriangles = false;
properties->transformFeedbackRasterizationStreamSelect = false;
properties->transformFeedbackDraw = true;
break;
}
default:
break;
}
......
......@@ -109,6 +109,7 @@ EXTENSIONS = [
Extension('VK_EXT_sampler_filter_minmax', 1, 'device->rad_info.chip_class >= CIK'),
Extension('VK_EXT_shader_viewport_index_layer', 1, True),
Extension('VK_EXT_shader_stencil_export', 1, True),
Extension('VK_EXT_transform_feedback', 1, True),
Extension('VK_EXT_vertex_attribute_divisor', 3, True),
Extension('VK_AMD_draw_indirect_count', 1, True),
Extension('VK_AMD_gcn_shader', 1, True),
......
......@@ -3482,6 +3482,22 @@ radv_compute_vertex_input_state(struct radv_pipeline *pipeline,
}
}
static struct radv_shader_variant *
radv_pipeline_get_streamout_shader(struct radv_pipeline *pipeline)
{
int i;
for (i = MESA_SHADER_GEOMETRY; i >= MESA_SHADER_VERTEX; i--) {
struct radv_shader_variant *shader =
radv_get_shader(pipeline, i);
if (shader && shader->info.info.so.num_outputs > 0)
return shader;
}
return NULL;
}
static VkResult
radv_pipeline_init(struct radv_pipeline *pipeline,
struct radv_device *device,
......@@ -3597,6 +3613,9 @@ radv_pipeline_init(struct radv_pipeline *pipeline,
pipeline->graphics.vtx_emit_num = 2;
}
/* Find the last vertex shader stage that eventually uses streamout. */
pipeline->streamout_shader = radv_pipeline_get_streamout_shader(pipeline);
result = radv_pipeline_scratch_init(device, pipeline);
radv_pipeline_generate_pm4(pipeline, pCreateInfo, extra, &blend, &tess, &gs, prim, gs_out);
......
......@@ -843,6 +843,7 @@ enum radv_cmd_dirty_bits {
RADV_CMD_DIRTY_INDEX_BUFFER = 1 << 11,
RADV_CMD_DIRTY_FRAMEBUFFER = 1 << 12,
RADV_CMD_DIRTY_VERTEX_BUFFER = 1 << 13,
RADV_CMD_DIRTY_STREAMOUT_BUFFER = 1 << 14,
};
enum radv_cmd_flush_bits {
......@@ -868,6 +869,7 @@ enum radv_cmd_flush_bits {
/* Pipeline query controls. */
RADV_CMD_FLAG_START_PIPELINE_STATS = 1 << 13,
RADV_CMD_FLAG_STOP_PIPELINE_STATS = 1 << 14,
RADV_CMD_FLAG_VGT_STREAMOUT_SYNC = 1 << 15,
RADV_CMD_FLUSH_AND_INV_FRAMEBUFFER = (RADV_CMD_FLAG_FLUSH_AND_INV_CB |
RADV_CMD_FLAG_FLUSH_AND_INV_CB_META |
......@@ -880,6 +882,29 @@ struct radv_vertex_binding {
VkDeviceSize offset;
};
struct radv_streamout_binding {
struct radv_buffer *buffer;
VkDeviceSize offset;
VkDeviceSize size;
};
struct radv_streamout_state {
/* Mask of bound streamout buffers. */
uint8_t enabled_mask;
/* External state that comes from the last vertex stage, it must be
* set explicitely when binding a new graphics pipeline.
*/
uint16_t stride_in_dw[MAX_SO_BUFFERS];
uint32_t enabled_stream_buffers_mask; /* stream0 buffers0-3 in 4 LSB */
/* State of VGT_STRMOUT_BUFFER_(CONFIG|END) */
uint32_t hw_enabled_mask;
/* State of VGT_STRMOUT_(CONFIG|EN) */
bool streamout_enabled;
};
struct radv_viewport_state {
uint32_t count;
VkViewport viewports[MAX_VIEWPORTS];
......@@ -987,6 +1012,7 @@ struct radv_cmd_state {
const struct radv_subpass * subpass;
struct radv_dynamic_state dynamic;
struct radv_attachment_state * attachments;
struct radv_streamout_state streamout;
VkRect2D render_area;
/* Index buffer */
......@@ -1056,6 +1082,7 @@ struct radv_cmd_buffer {
struct radeon_cmdbuf *cs;
struct radv_cmd_state state;
struct radv_vertex_binding vertex_bindings[MAX_VBS];
struct radv_streamout_binding streamout_bindings[MAX_SO_BUFFERS];
uint32_t queue_family_index;
uint8_t push_constants[MAX_PUSH_CONSTANTS_SIZE];
......@@ -1353,6 +1380,9 @@ struct radv_pipeline {
unsigned max_waves;
unsigned scratch_bytes_per_wave;
/* Not NULL if graphics pipeline uses streamout. */
struct radv_shader_variant *streamout_shader;
};
static inline bool radv_pipeline_has_gs(const struct radv_pipeline *pipeline)
......
......@@ -789,6 +789,9 @@ VkResult radv_CreateQueryPool(
case VK_QUERY_TYPE_TIMESTAMP:
pool->stride = 8;
break;
case VK_QUERY_TYPE_TRANSFORM_FEEDBACK_STREAM_EXT:
pool->stride = 32;
break;
default:
unreachable("creating unhandled query type");
}
......@@ -951,6 +954,44 @@ VkResult radv_GetQueryPoolResults(
}
break;
}
case VK_QUERY_TYPE_TRANSFORM_FEEDBACK_STREAM_EXT: {
volatile uint64_t const *src64 = (volatile uint64_t const *)src;
uint64_t num_primitives_written;
uint64_t primitive_storage_needed;
/* SAMPLE_STREAMOUTSTATS stores this structure:
* {
* u64 NumPrimitivesWritten;
* u64 PrimitiveStorageNeeded;
* }
*/
available = 1;
for (int j = 0; j < 4; j++) {
if (!(src64[j] & 0x8000000000000000UL))
available = 0;
}
if (!available && !(flags & VK_QUERY_RESULT_PARTIAL_BIT)) {
result = VK_NOT_READY;
break;
}
num_primitives_written = src64[3] - src64[1];
primitive_storage_needed = src64[2] - src64[0];
if (flags & VK_QUERY_RESULT_64_BIT) {
*(uint64_t *)dest = num_primitives_written;
dest += 8;
*(uint64_t *)dest = primitive_storage_needed;
dest += 8;
} else {
*(uint32_t *)dest = num_primitives_written;
dest += 4;
*(uint32_t *)dest = primitive_storage_needed;
dest += 4;
}
break;
}
default:
unreachable("trying to get results of unhandled query type");
}
......@@ -1109,10 +1150,22 @@ void radv_CmdResetQueryPool(
}
}
static unsigned event_type_for_stream(unsigned stream)
{
switch (stream) {
default:
case 0: return V_028A90_SAMPLE_STREAMOUTSTATS;
case 1: return V_028A90_SAMPLE_STREAMOUTSTATS1;
case 2: return V_028A90_SAMPLE_STREAMOUTSTATS2;
case 3: return V_028A90_SAMPLE_STREAMOUTSTATS3;
}
}
static void emit_begin_query(struct radv_cmd_buffer *cmd_buffer,
uint64_t va,
VkQueryType query_type,
VkQueryControlFlags flags)
VkQueryControlFlags flags,
uint32_t index)
{
struct radeon_cmdbuf *cs = cmd_buffer->cs;
switch (query_type) {
......@@ -1161,6 +1214,16 @@ static void emit_begin_query(struct radv_cmd_buffer *cmd_buffer,
radeon_emit(cs, va);
radeon_emit(cs, va >> 32);
break;
case VK_QUERY_TYPE_TRANSFORM_FEEDBACK_STREAM_EXT:
radeon_check_space(cmd_buffer->device->ws, cs, 4);
assert(index < MAX_SO_STREAMS);
radeon_emit(cs, PKT3(PKT3_EVENT_WRITE, 2, 0));
radeon_emit(cs, EVENT_TYPE(event_type_for_stream(index)) | EVENT_INDEX(3));
radeon_emit(cs, va);
radeon_emit(cs, va >> 32);
break;
default:
unreachable("beginning unhandled query type");
}
......@@ -1169,7 +1232,7 @@ static void emit_begin_query(struct radv_cmd_buffer *cmd_buffer,
static void emit_end_query(struct radv_cmd_buffer *cmd_buffer,
uint64_t va, uint64_t avail_va,
VkQueryType query_type)
VkQueryType query_type, uint32_t index)
{
struct radeon_cmdbuf *cs = cmd_buffer->cs;
switch (query_type) {
......@@ -1215,16 +1278,27 @@ static void emit_end_query(struct radv_cmd_buffer *cmd_buffer,
avail_va, 0, 1,
cmd_buffer->gfx9_eop_bug_va);
break;
case VK_QUERY_TYPE_TRANSFORM_FEEDBACK_STREAM_EXT:
radeon_check_space(cmd_buffer->device->ws, cs, 4);
assert(index < MAX_SO_STREAMS);
radeon_emit(cs, PKT3(PKT3_EVENT_WRITE, 2, 0));
radeon_emit(cs, EVENT_TYPE(event_type_for_stream(index)) | EVENT_INDEX(3));
radeon_emit(cs, (va + 16));
radeon_emit(cs, (va + 16) >> 32);
break;
default:
unreachable("ending unhandled query type");
}
}
void radv_CmdBeginQuery(
void radv_CmdBeginQueryIndexedEXT(
VkCommandBuffer commandBuffer,
VkQueryPool queryPool,
uint32_t query,
VkQueryControlFlags flags)
VkQueryControlFlags flags,
uint32_t index)
{
RADV_FROM_HANDLE(radv_cmd_buffer, cmd_buffer, commandBuffer);
RADV_FROM_HANDLE(radv_query_pool, pool, queryPool);
......@@ -1247,14 +1321,23 @@ void radv_CmdBeginQuery(
va += pool->stride * query;
emit_begin_query(cmd_buffer, va, pool->type, flags);
emit_begin_query(cmd_buffer, va, pool->type, flags, index);
}
void radv_CmdBeginQuery(
VkCommandBuffer commandBuffer,
VkQueryPool queryPool,
uint32_t query,
VkQueryControlFlags flags)
{
radv_CmdBeginQueryIndexedEXT(commandBuffer, queryPool, query, flags, 0);
}
void radv_CmdEndQuery(
void radv_CmdEndQueryIndexedEXT(
VkCommandBuffer commandBuffer,
VkQueryPool queryPool,
uint32_t query)
uint32_t query,
uint32_t index)
{
RADV_FROM_HANDLE(radv_cmd_buffer, cmd_buffer, commandBuffer);
RADV_FROM_HANDLE(radv_query_pool, pool, queryPool);
......@@ -1265,7 +1348,7 @@ void radv_CmdEndQuery(
/* Do not need to add the pool BO to the list because the query must
* currently be active, which means the BO is already in the list.
*/
emit_end_query(cmd_buffer, va, avail_va, pool->type);
emit_end_query(cmd_buffer, va, avail_va, pool->type, index);
/*
* For multiview we have to emit a query for each bit in the mask,
......@@ -1282,12 +1365,20 @@ void radv_CmdEndQuery(
for (unsigned i = 1; i < util_bitcount(cmd_buffer->state.subpass->view_mask); i++) {
va += pool->stride;
avail_va += 4;
emit_begin_query(cmd_buffer, va, pool->type, 0);
emit_end_query(cmd_buffer, va, avail_va, pool->type);
emit_begin_query(cmd_buffer, va, pool->type, 0, 0);
emit_end_query(cmd_buffer, va, avail_va, pool->type, 0);
}
}
}
void radv_CmdEndQuery(
VkCommandBuffer commandBuffer,
VkQueryPool queryPool,
uint32_t query)
{
radv_CmdEndQueryIndexedEXT(commandBuffer, queryPool, query, 0);
}
void radv_CmdWriteTimestamp(
VkCommandBuffer commandBuffer,
VkPipelineStageFlagBits pipelineStage,
......
......@@ -243,6 +243,8 @@ radv_shader_compile_to_nir(struct radv_device *device,
.runtime_descriptor_array = true,
.stencil_export = true,
.storage_16bit = true,
.geometry_streams = true,
.transform_feedback = true,
},
};
entry_point = spirv_to_nir(spirv, module->size / 4,
......@@ -434,7 +436,12 @@ radv_fill_shader_variant(struct radv_device *device,
variant->code_size = radv_get_shader_binary_size(binary);
variant->rsrc2 = S_00B12C_USER_SGPR(variant->info.num_user_sgprs) |
S_00B12C_USER_SGPR_MSB(variant->info.num_user_sgprs >> 5) |
S_00B12C_SCRATCH_EN(scratch_enabled);
S_00B12C_SCRATCH_EN(scratch_enabled) |
S_00B12C_SO_BASE0_EN(!!info->so.strides[0]) |
S_00B12C_SO_BASE1_EN(!!info->so.strides[1]) |
S_00B12C_SO_BASE2_EN(!!info->so.strides[2]) |
S_00B12C_SO_BASE3_EN(!!info->so.strides[3]) |
S_00B12C_SO_EN(!!info->so.num_outputs);
variant->rsrc1 = S_00B848_VGPRS((variant->config.num_vgprs - 1) / 4) |
S_00B848_SGPRS((variant->config.num_sgprs - 1) / 8) |
......
......@@ -883,6 +883,12 @@ si_cs_emit_cache_flush(struct radeon_cmdbuf *cs,
radeon_emit(cs, EVENT_TYPE(V_028A90_VGT_FLUSH) | EVENT_INDEX(0));
}
/* VGT streamout state sync */
if (flush_bits & RADV_CMD_FLAG_VGT_STREAMOUT_SYNC) {
radeon_emit(cs, PKT3(PKT3_EVENT_WRITE, 0, 0));
radeon_emit(cs, EVENT_TYPE(V_028A90_VGT_STREAMOUT_SYNC) | EVENT_INDEX(0));
}
/* Make sure ME is idle (it executes most packets) before continuing.
* This prevents read-after-write hazards between PFP and ME.
*/
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment