diff --git a/src/freedreno/ci/deqp-freedreno-a630-fails.txt b/src/freedreno/ci/deqp-freedreno-a630-fails.txt index 463a09a07267c072a52b9c95de6d6f551ecf86f5..0de08181475548397edae96c549aa08d8e1c6448 100644 --- a/src/freedreno/ci/deqp-freedreno-a630-fails.txt +++ b/src/freedreno/ci/deqp-freedreno-a630-fails.txt @@ -17,9 +17,6 @@ dEQP-VK.api.device_init.create_instance_device_intentional_alloc_fail,Fail dEQP-VK.compute.basic.max_local_size_x,Crash dEQP-VK.compute.basic.max_local_size_y,Crash -# https://gitlab.freedesktop.org/mesa/mesa/-/issues/4462 -dEQP-VK.pipeline.framebuffer_attachment.diff_attachments_2d_19x27_32x32_ms,Fail - # https://gitlab.khronos.org/Tracker/vk-gl-cts/-/issues/3019 # should be fixed by https://gerrit.khronos.org/c/vk-gl-cts/+/7745 dEQP-VK.renderpass.dedicated_allocation.attachment_allocation.input_output.7,Fail diff --git a/src/freedreno/vulkan/tu_clear_blit.c b/src/freedreno/vulkan/tu_clear_blit.c index fd8e75b13f2d7c978d585d8bf377e928a790481a..5ecf11ce530a2a98e3113176973b28b2e7c8cf67 100644 --- a/src/freedreno/vulkan/tu_clear_blit.c +++ b/src/freedreno/vulkan/tu_clear_blit.c @@ -894,6 +894,36 @@ r3d_src_buffer(struct tu_cmd_buffer *cmd, r3d_src_common(cmd, cs, desc, 0, 0, VK_FILTER_NEAREST); } +static void +r3d_src_gmem(struct tu_cmd_buffer *cmd, + struct tu_cs *cs, + const struct tu_image_view *iview, + VkFormat format, + uint32_t gmem_offset, + uint32_t cpp) +{ + uint32_t desc[A6XX_TEX_CONST_DWORDS]; + memcpy(desc, iview->descriptor, sizeof(desc)); + + /* patch the format so that depth/stencil get the right format */ + desc[0] &= ~A6XX_TEX_CONST_0_FMT__MASK; + desc[0] |= A6XX_TEX_CONST_0_FMT(tu6_format_texture(format, TILE6_2).fmt); + + /* patched for gmem */ + desc[0] &= ~(A6XX_TEX_CONST_0_SWAP__MASK | A6XX_TEX_CONST_0_TILE_MODE__MASK); + desc[0] |= A6XX_TEX_CONST_0_TILE_MODE(TILE6_2); + desc[2] = + A6XX_TEX_CONST_2_TYPE(A6XX_TEX_2D) | + A6XX_TEX_CONST_2_PITCH(cmd->state.framebuffer->tile0.width * cpp); + desc[3] = 0; + desc[4] = cmd->device->physical_device->gmem_base + gmem_offset; + desc[5] = A6XX_TEX_CONST_5_DEPTH(1); + for (unsigned i = 6; i < A6XX_TEX_CONST_DWORDS; i++) + desc[i] = 0; + + r3d_src_common(cmd, cs, desc, 0, 0, VK_FILTER_NEAREST); +} + static void r3d_dst(struct tu_cs *cs, const struct tu_image_view *iview, uint32_t layer) { @@ -2733,6 +2763,42 @@ store_cp_blit(struct tu_cmd_buffer *cmd, tu6_emit_event_write(cmd, cs, PC_CCU_FLUSH_COLOR_TS); } +static void +store_3d_blit(struct tu_cmd_buffer *cmd, + struct tu_cs *cs, + const struct tu_image_view *iview, + uint32_t dst_samples, + bool separate_stencil, + VkFormat format, + const VkRect2D *render_area, + uint32_t gmem_offset, + uint32_t cpp) +{ + r3d_setup(cmd, cs, format, VK_IMAGE_ASPECT_COLOR_BIT, 0, false, + iview->ubwc_enabled, dst_samples); + + r3d_coords(cs, &render_area->offset, &render_area->offset, &render_area->extent); + + if (separate_stencil) + r3d_dst_stencil(cs, iview, 0); + else + r3d_dst(cs, iview, 0); + + r3d_src_gmem(cmd, cs, iview, format, gmem_offset, cpp); + + /* sync GMEM writes with CACHE. */ + tu6_emit_event_write(cmd, cs, CACHE_INVALIDATE); + + r3d_run(cmd, cs); + + /* Draws write to the CCU, unlike CP_EVENT_WRITE::BLIT which writes to + * sysmem, and we generally assume that GMEM renderpasses leave their + * results in sysmem, so we need to flush manually here. The 3d blit path + * writes to depth images as a color RT, so there's no need to flush depth. + */ + tu6_emit_event_write(cmd, cs, PC_CCU_FLUSH_COLOR_TS); +} + void tu_store_gmem_attachment(struct tu_cmd_buffer *cmd, struct tu_cs *cs, @@ -2782,26 +2848,39 @@ tu_store_gmem_attachment(struct tu_cmd_buffer *cmd, return; } - if (dst->samples > 1) { - /* I guess we need to use shader path in this case? - * need a testcase which fails because of this - */ - tu_finishme("unaligned store of msaa attachment\n"); - return; - } - - r2d_coords(cs, &render_area->offset, &render_area->offset, &render_area->extent); - VkFormat format = src->format; if (format == VK_FORMAT_D32_SFLOAT_S8_UINT) format = VK_FORMAT_D32_SFLOAT; - if (dst->store) { - store_cp_blit(cmd, cs, iview, src->samples, resolve_d32s8_s8, format, - src->gmem_offset, src->cpp); - } - if (dst->store_stencil) { - store_cp_blit(cmd, cs, iview, src->samples, true, VK_FORMAT_S8_UINT, - src->gmem_offset_stencil, src->samples); + if (dst->samples > 1) { + /* If we hit this path, we have to disable draw states after every tile + * instead of once at the end of the renderpass, so that they aren't + * executed when calling CP_DRAW. + * + * TODO: store a flag somewhere so we don't do this more than once and + * don't do it after the renderpass when this happens. + */ + if (dst->store || dst->store_stencil) + tu_disable_draw_states(cmd, cs); + + if (dst->store) { + store_3d_blit(cmd, cs, iview, dst->samples, resolve_d32s8_s8, format, + render_area, src->gmem_offset, src->cpp); + } + if (dst->store_stencil) { + store_3d_blit(cmd, cs, iview, dst->samples, true, VK_FORMAT_S8_UINT, + render_area, src->gmem_offset, src->samples); + } + } else { + r2d_coords(cs, &render_area->offset, &render_area->offset, &render_area->extent); + + if (dst->store) { + store_cp_blit(cmd, cs, iview, src->samples, resolve_d32s8_s8, format, + src->gmem_offset, src->cpp); + } + if (dst->store_stencil) { + store_cp_blit(cmd, cs, iview, src->samples, true, VK_FORMAT_S8_UINT, + src->gmem_offset_stencil, src->samples); + } } } diff --git a/src/freedreno/vulkan/tu_cmd_buffer.c b/src/freedreno/vulkan/tu_cmd_buffer.c index f9bf3e14f61684da5fac9bb47b38e6d52a5fcc93..47aa21789fa2a60fcd640e0427bd497b04a9261e 100644 --- a/src/freedreno/vulkan/tu_cmd_buffer.c +++ b/src/freedreno/vulkan/tu_cmd_buffer.c @@ -684,7 +684,7 @@ tu6_emit_tile_store(struct tu_cmd_buffer *cmd, struct tu_cs *cs) } } -static void +void tu_disable_draw_states(struct tu_cmd_buffer *cmd, struct tu_cs *cs) { tu_cs_emit_pkt7(cs, CP_SET_DRAW_STATE, 3); @@ -1169,7 +1169,7 @@ static void tu6_sysmem_render_end(struct tu_cmd_buffer *cmd, struct tu_cs *cs) { /* Do any resolves of the last subpass. These are handled in the - * tile_store_ib in the gmem path. + * tile_store_cs in the gmem path. */ tu6_emit_sysmem_resolves(cmd, cs, cmd->state.subpass); @@ -1242,7 +1242,7 @@ tu6_render_tile(struct tu_cmd_buffer *cmd, struct tu_cs *cs) tu_cs_emit(cs, A6XX_CP_SET_MARKER_0_MODE(RM6_ENDVIS)); } - tu_cs_emit_ib(cs, &cmd->state.tile_store_ib); + tu_cs_emit_call(cs, &cmd->tile_store_cs); tu_cs_sanity_check(cs); } @@ -1299,25 +1299,6 @@ tu_cmd_render_sysmem(struct tu_cmd_buffer *cmd) tu6_sysmem_render_end(cmd, &cmd->cs); } -static void -tu_cmd_prepare_tile_store_ib(struct tu_cmd_buffer *cmd) -{ - const uint32_t tile_store_space = 7 + (35 * 2) * cmd->state.pass->attachment_count; - struct tu_cs sub_cs; - - VkResult result = - tu_cs_begin_sub_stream(&cmd->sub_cs, tile_store_space, &sub_cs); - if (result != VK_SUCCESS) { - cmd->record_result = result; - return; - } - - /* emit to tile-store sub_cs */ - tu6_emit_tile_store(cmd, &sub_cs); - - cmd->state.tile_store_ib = tu_cs_end_sub_stream(&cmd->sub_cs, &sub_cs); -} - static VkResult tu_create_cmd_buffer(struct tu_device *device, struct tu_cmd_pool *pool, @@ -1349,6 +1330,7 @@ tu_create_cmd_buffer(struct tu_device *device, tu_cs_init(&cmd_buffer->cs, device, TU_CS_MODE_GROW, 4096); tu_cs_init(&cmd_buffer->draw_cs, device, TU_CS_MODE_GROW, 4096); + tu_cs_init(&cmd_buffer->tile_store_cs, device, TU_CS_MODE_GROW, 2048); tu_cs_init(&cmd_buffer->draw_epilogue_cs, device, TU_CS_MODE_GROW, 4096); tu_cs_init(&cmd_buffer->sub_cs, device, TU_CS_MODE_SUB_STREAM, 2048); @@ -1364,6 +1346,7 @@ tu_cmd_buffer_destroy(struct tu_cmd_buffer *cmd_buffer) tu_cs_finish(&cmd_buffer->cs); tu_cs_finish(&cmd_buffer->draw_cs); + tu_cs_finish(&cmd_buffer->tile_store_cs); tu_cs_finish(&cmd_buffer->draw_epilogue_cs); tu_cs_finish(&cmd_buffer->sub_cs); @@ -1377,6 +1360,7 @@ tu_reset_cmd_buffer(struct tu_cmd_buffer *cmd_buffer) tu_cs_reset(&cmd_buffer->cs); tu_cs_reset(&cmd_buffer->draw_cs); + tu_cs_reset(&cmd_buffer->tile_store_cs); tu_cs_reset(&cmd_buffer->draw_epilogue_cs); tu_cs_reset(&cmd_buffer->sub_cs); @@ -1510,6 +1494,7 @@ tu_BeginCommandBuffer(VkCommandBuffer commandBuffer, tu_cs_begin(&cmd_buffer->cs); tu_cs_begin(&cmd_buffer->draw_cs); + tu_cs_begin(&cmd_buffer->tile_store_cs); tu_cs_begin(&cmd_buffer->draw_epilogue_cs); /* setup initial configuration into command buffer */ @@ -2051,6 +2036,7 @@ tu_EndCommandBuffer(VkCommandBuffer commandBuffer) tu_cs_end(&cmd_buffer->cs); tu_cs_end(&cmd_buffer->draw_cs); + tu_cs_end(&cmd_buffer->tile_store_cs); tu_cs_end(&cmd_buffer->draw_epilogue_cs); cmd_buffer->status = TU_CMD_BUFFER_STATUS_EXECUTABLE; @@ -2927,8 +2913,6 @@ tu_CmdBeginRenderPass2(VkCommandBuffer commandBuffer, cmd->state.framebuffer = fb; cmd->state.render_area = pRenderPassBegin->renderArea; - tu_cmd_prepare_tile_store_ib(cmd); - /* Note: because this is external, any flushes will happen before draw_cs * gets called. However deferred flushes could have to happen later as part * of the subpass. @@ -4363,7 +4347,10 @@ tu_CmdEndRenderPass2(VkCommandBuffer commandBuffer, { TU_FROM_HANDLE(tu_cmd_buffer, cmd_buffer, commandBuffer); + tu6_emit_tile_store(cmd_buffer, &cmd_buffer->tile_store_cs); + tu_cs_end(&cmd_buffer->draw_cs); + tu_cs_end(&cmd_buffer->tile_store_cs); tu_cs_end(&cmd_buffer->draw_epilogue_cs); if (use_sysmem_rendering(cmd_buffer)) @@ -4371,10 +4358,9 @@ tu_CmdEndRenderPass2(VkCommandBuffer commandBuffer, else tu_cmd_render_tiles(cmd_buffer); - /* outside of renderpasses we assume all draw states are disabled - * we can do this in the main cs because no resolve/store commands - * should use a draw command (TODO: this will change if unaligned - * GMEM stores are supported) + /* Outside of renderpasses we assume all draw states are disabled. We do + * this outside the draw CS for the normal case where 3d gmem stores aren't + * used. */ tu_disable_draw_states(cmd_buffer, &cmd_buffer->cs); @@ -4382,6 +4368,8 @@ tu_CmdEndRenderPass2(VkCommandBuffer commandBuffer, rendered */ tu_cs_discard_entries(&cmd_buffer->draw_cs); tu_cs_begin(&cmd_buffer->draw_cs); + tu_cs_discard_entries(&cmd_buffer->tile_store_cs); + tu_cs_begin(&cmd_buffer->tile_store_cs); tu_cs_discard_entries(&cmd_buffer->draw_epilogue_cs); tu_cs_begin(&cmd_buffer->draw_epilogue_cs); diff --git a/src/freedreno/vulkan/tu_private.h b/src/freedreno/vulkan/tu_private.h index c0d4e8ef899afc4c6e615a1c951936246cdbf817..5400072c859c5157b711ba32ee37113d1b48b7ad 100644 --- a/src/freedreno/vulkan/tu_private.h +++ b/src/freedreno/vulkan/tu_private.h @@ -974,8 +974,6 @@ struct tu_cmd_state const struct tu_framebuffer *framebuffer; VkRect2D render_area; - struct tu_cs_entry tile_store_ib; - bool xfb_used; bool has_tess; bool has_subpass_predication; @@ -1034,6 +1032,7 @@ struct tu_cmd_buffer struct tu_cs cs; struct tu_cs draw_cs; + struct tu_cs tile_store_cs; struct tu_cs draw_epilogue_cs; struct tu_cs sub_cs; @@ -1240,6 +1239,8 @@ void tu6_emit_window_scissor(struct tu_cs *cs, uint32_t x1, uint32_t y1, uint32_ void tu6_emit_window_offset(struct tu_cs *cs, uint32_t x1, uint32_t y1); +void tu_disable_draw_states(struct tu_cmd_buffer *cmd, struct tu_cs *cs); + struct tu_pvtmem_config { uint64_t iova; uint32_t per_fiber_size;