Commit b157a5d0 authored by Connor Abbott's avatar Connor Abbott Committed by Marge Bot
Browse files

tu: Implement non-aligned multisample GMEM STORE_OP_STORE

We have to a bit careful here when disabling draw states. This also
necessitates moving the actual recording of the stores to the end so
that we set the dirty flag correctly.

Closes: #4462
Part-of: <!12102>
parent 7948c4b0
Pipeline #372869 waiting for manual action with stages
......@@ -17,9 +17,6 @@ dEQP-VK.api.device_init.create_instance_device_intentional_alloc_fail,Fail
dEQP-VK.compute.basic.max_local_size_x,Crash
dEQP-VK.compute.basic.max_local_size_y,Crash
# https://gitlab.freedesktop.org/mesa/mesa/-/issues/4462
dEQP-VK.pipeline.framebuffer_attachment.diff_attachments_2d_19x27_32x32_ms,Fail
# https://gitlab.khronos.org/Tracker/vk-gl-cts/-/issues/3019
# should be fixed by https://gerrit.khronos.org/c/vk-gl-cts/+/7745
dEQP-VK.renderpass.dedicated_allocation.attachment_allocation.input_output.7,Fail
......
......@@ -894,6 +894,36 @@ r3d_src_buffer(struct tu_cmd_buffer *cmd,
r3d_src_common(cmd, cs, desc, 0, 0, VK_FILTER_NEAREST);
}
static void
r3d_src_gmem(struct tu_cmd_buffer *cmd,
struct tu_cs *cs,
const struct tu_image_view *iview,
VkFormat format,
uint32_t gmem_offset,
uint32_t cpp)
{
uint32_t desc[A6XX_TEX_CONST_DWORDS];
memcpy(desc, iview->descriptor, sizeof(desc));
/* patch the format so that depth/stencil get the right format */
desc[0] &= ~A6XX_TEX_CONST_0_FMT__MASK;
desc[0] |= A6XX_TEX_CONST_0_FMT(tu6_format_texture(format, TILE6_2).fmt);
/* patched for gmem */
desc[0] &= ~(A6XX_TEX_CONST_0_SWAP__MASK | A6XX_TEX_CONST_0_TILE_MODE__MASK);
desc[0] |= A6XX_TEX_CONST_0_TILE_MODE(TILE6_2);
desc[2] =
A6XX_TEX_CONST_2_TYPE(A6XX_TEX_2D) |
A6XX_TEX_CONST_2_PITCH(cmd->state.framebuffer->tile0.width * cpp);
desc[3] = 0;
desc[4] = cmd->device->physical_device->gmem_base + gmem_offset;
desc[5] = A6XX_TEX_CONST_5_DEPTH(1);
for (unsigned i = 6; i < A6XX_TEX_CONST_DWORDS; i++)
desc[i] = 0;
r3d_src_common(cmd, cs, desc, 0, 0, VK_FILTER_NEAREST);
}
static void
r3d_dst(struct tu_cs *cs, const struct tu_image_view *iview, uint32_t layer)
{
......@@ -2733,6 +2763,42 @@ store_cp_blit(struct tu_cmd_buffer *cmd,
tu6_emit_event_write(cmd, cs, PC_CCU_FLUSH_COLOR_TS);
}
static void
store_3d_blit(struct tu_cmd_buffer *cmd,
struct tu_cs *cs,
const struct tu_image_view *iview,
uint32_t dst_samples,
bool separate_stencil,
VkFormat format,
const VkRect2D *render_area,
uint32_t gmem_offset,
uint32_t cpp)
{
r3d_setup(cmd, cs, format, VK_IMAGE_ASPECT_COLOR_BIT, 0, false,
iview->ubwc_enabled, dst_samples);
r3d_coords(cs, &render_area->offset, &render_area->offset, &render_area->extent);
if (separate_stencil)
r3d_dst_stencil(cs, iview, 0);
else
r3d_dst(cs, iview, 0);
r3d_src_gmem(cmd, cs, iview, format, gmem_offset, cpp);
/* sync GMEM writes with CACHE. */
tu6_emit_event_write(cmd, cs, CACHE_INVALIDATE);
r3d_run(cmd, cs);
/* Draws write to the CCU, unlike CP_EVENT_WRITE::BLIT which writes to
* sysmem, and we generally assume that GMEM renderpasses leave their
* results in sysmem, so we need to flush manually here. The 3d blit path
* writes to depth images as a color RT, so there's no need to flush depth.
*/
tu6_emit_event_write(cmd, cs, PC_CCU_FLUSH_COLOR_TS);
}
void
tu_store_gmem_attachment(struct tu_cmd_buffer *cmd,
struct tu_cs *cs,
......@@ -2782,26 +2848,39 @@ tu_store_gmem_attachment(struct tu_cmd_buffer *cmd,
return;
}
if (dst->samples > 1) {
/* I guess we need to use shader path in this case?
* need a testcase which fails because of this
*/
tu_finishme("unaligned store of msaa attachment\n");
return;
}
r2d_coords(cs, &render_area->offset, &render_area->offset, &render_area->extent);
VkFormat format = src->format;
if (format == VK_FORMAT_D32_SFLOAT_S8_UINT)
format = VK_FORMAT_D32_SFLOAT;
if (dst->store) {
store_cp_blit(cmd, cs, iview, src->samples, resolve_d32s8_s8, format,
src->gmem_offset, src->cpp);
}
if (dst->store_stencil) {
store_cp_blit(cmd, cs, iview, src->samples, true, VK_FORMAT_S8_UINT,
src->gmem_offset_stencil, src->samples);
if (dst->samples > 1) {
/* If we hit this path, we have to disable draw states after every tile
* instead of once at the end of the renderpass, so that they aren't
* executed when calling CP_DRAW.
*
* TODO: store a flag somewhere so we don't do this more than once and
* don't do it after the renderpass when this happens.
*/
if (dst->store || dst->store_stencil)
tu_disable_draw_states(cmd, cs);
if (dst->store) {
store_3d_blit(cmd, cs, iview, dst->samples, resolve_d32s8_s8, format,
render_area, src->gmem_offset, src->cpp);
}
if (dst->store_stencil) {
store_3d_blit(cmd, cs, iview, dst->samples, true, VK_FORMAT_S8_UINT,
render_area, src->gmem_offset, src->samples);
}
} else {
r2d_coords(cs, &render_area->offset, &render_area->offset, &render_area->extent);
if (dst->store) {
store_cp_blit(cmd, cs, iview, src->samples, resolve_d32s8_s8, format,
src->gmem_offset, src->cpp);
}
if (dst->store_stencil) {
store_cp_blit(cmd, cs, iview, src->samples, true, VK_FORMAT_S8_UINT,
src->gmem_offset_stencil, src->samples);
}
}
}
......@@ -684,7 +684,7 @@ tu6_emit_tile_store(struct tu_cmd_buffer *cmd, struct tu_cs *cs)
}
}
static void
void
tu_disable_draw_states(struct tu_cmd_buffer *cmd, struct tu_cs *cs)
{
tu_cs_emit_pkt7(cs, CP_SET_DRAW_STATE, 3);
......@@ -2913,8 +2913,6 @@ tu_CmdBeginRenderPass2(VkCommandBuffer commandBuffer,
cmd->state.framebuffer = fb;
cmd->state.render_area = pRenderPassBegin->renderArea;
tu6_emit_tile_store(cmd, &cmd->tile_store_cs);
/* Note: because this is external, any flushes will happen before draw_cs
* gets called. However deferred flushes could have to happen later as part
* of the subpass.
......@@ -4349,6 +4347,8 @@ tu_CmdEndRenderPass2(VkCommandBuffer commandBuffer,
{
TU_FROM_HANDLE(tu_cmd_buffer, cmd_buffer, commandBuffer);
tu6_emit_tile_store(cmd_buffer, &cmd_buffer->tile_store_cs);
tu_cs_end(&cmd_buffer->draw_cs);
tu_cs_end(&cmd_buffer->tile_store_cs);
tu_cs_end(&cmd_buffer->draw_epilogue_cs);
......@@ -4358,10 +4358,9 @@ tu_CmdEndRenderPass2(VkCommandBuffer commandBuffer,
else
tu_cmd_render_tiles(cmd_buffer);
/* outside of renderpasses we assume all draw states are disabled
* we can do this in the main cs because no resolve/store commands
* should use a draw command (TODO: this will change if unaligned
* GMEM stores are supported)
/* Outside of renderpasses we assume all draw states are disabled. We do
* this outside the draw CS for the normal case where 3d gmem stores aren't
* used.
*/
tu_disable_draw_states(cmd_buffer, &cmd_buffer->cs);
......
......@@ -1239,6 +1239,8 @@ void tu6_emit_window_scissor(struct tu_cs *cs, uint32_t x1, uint32_t y1, uint32_
void tu6_emit_window_offset(struct tu_cs *cs, uint32_t x1, uint32_t y1);
void tu_disable_draw_states(struct tu_cmd_buffer *cmd, struct tu_cs *cs);
struct tu_pvtmem_config {
uint64_t iova;
uint32_t per_fiber_size;
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment