Commit 824cfc1e authored by Samuel Pitoiset's avatar Samuel Pitoiset

radv: rework the TC-compat HTILE hardware bug with COND_EXEC

After investigating on this, it appears that COND_WRITE doesn't
work correctly in some situations. I don't know exactly why does
it fail to update DB_Z_INFO.ZRANGE_PRECISION, but as AMDVLK
also uses COND_EXEC I think there is a reason.

Now the driver stores a new metadata value in order to reflect
the last fast depth clear state. If a TC-compat HTILE is fast cleared
with 0.0f, we have to update ZRANGE_PRECISION to 0 in order to
work around that hardware bug.

This fixes rendering issues with The Forest and DXVK and doesn't
seem to introduce any regressions.

Bugzilla: https://bugs.freedesktop.org/show_bug.cgi?id=108914
Fixes: 68dead11 ("radv: update the ZRANGE_PRECISION value for the TC-compat bug")
Signed-off-by: Samuel Pitoiset's avatarSamuel Pitoiset <samuel.pitoiset@gmail.com>
Reviewed-by: Bas Nieuwenhuizen's avatarBas Nieuwenhuizen <bas@basnieuwenhuizen.nl>
parent 2669dbf8
......@@ -1067,7 +1067,7 @@ static void
radv_update_zrange_precision(struct radv_cmd_buffer *cmd_buffer,
struct radv_ds_buffer_info *ds,
struct radv_image *image, VkImageLayout layout,
bool requires_cond_write)
bool requires_cond_exec)
{
uint32_t db_z_info = ds->db_z_info;
uint32_t db_z_info_reg;
......@@ -1091,38 +1091,21 @@ radv_update_zrange_precision(struct radv_cmd_buffer *cmd_buffer,
}
/* When we don't know the last fast clear value we need to emit a
* conditional packet, otherwise we can update DB_Z_INFO directly.
* conditional packet that will eventually skip the following
* SET_CONTEXT_REG packet.
*/
if (requires_cond_write) {
radeon_emit(cmd_buffer->cs, PKT3(PKT3_COND_WRITE, 7, 0));
const uint32_t write_space = 0 << 8; /* register */
const uint32_t poll_space = 1 << 4; /* memory */
const uint32_t function = 3 << 0; /* equal to the reference */
const uint32_t options = write_space | poll_space | function;
radeon_emit(cmd_buffer->cs, options);
/* poll address - location of the depth clear value */
if (requires_cond_exec) {
uint64_t va = radv_buffer_get_va(image->bo);
va += image->offset + image->clear_value_offset;
/* In presence of stencil format, we have to adjust the base
* address because the first value is the stencil clear value.
*/
if (vk_format_is_stencil(image->vk_format))
va += 4;
va += image->offset + image->tc_compat_zrange_offset;
radeon_emit(cmd_buffer->cs, PKT3(PKT3_COND_EXEC, 3, 0));
radeon_emit(cmd_buffer->cs, va);
radeon_emit(cmd_buffer->cs, va >> 32);
radeon_emit(cmd_buffer->cs, fui(0.0f)); /* reference value */
radeon_emit(cmd_buffer->cs, (uint32_t)-1); /* comparison mask */
radeon_emit(cmd_buffer->cs, db_z_info_reg >> 2); /* write address low */
radeon_emit(cmd_buffer->cs, 0u); /* write address high */
radeon_emit(cmd_buffer->cs, db_z_info);
} else {
radeon_set_context_reg(cmd_buffer->cs, db_z_info_reg, db_z_info);
radeon_emit(cmd_buffer->cs, 0);
radeon_emit(cmd_buffer->cs, 3); /* SET_CONTEXT_REG size */
}
radeon_set_context_reg(cmd_buffer->cs, db_z_info_reg, db_z_info);
}
static void
......@@ -1269,6 +1252,45 @@ radv_set_ds_clear_metadata(struct radv_cmd_buffer *cmd_buffer,
radeon_emit(cs, fui(ds_clear_value.depth));
}
/**
* Update the TC-compat metadata value for this image.
*/
static void
radv_set_tc_compat_zrange_metadata(struct radv_cmd_buffer *cmd_buffer,
struct radv_image *image,
uint32_t value)
{
struct radeon_cmdbuf *cs = cmd_buffer->cs;
uint64_t va = radv_buffer_get_va(image->bo);
va += image->offset + image->tc_compat_zrange_offset;
radeon_emit(cs, PKT3(PKT3_WRITE_DATA, 3, 0));
radeon_emit(cs, S_370_DST_SEL(V_370_MEM_ASYNC) |
S_370_WR_CONFIRM(1) |
S_370_ENGINE_SEL(V_370_PFP));
radeon_emit(cs, va);
radeon_emit(cs, va >> 32);
radeon_emit(cs, value);
}
static void
radv_update_tc_compat_zrange_metadata(struct radv_cmd_buffer *cmd_buffer,
struct radv_image *image,
VkClearDepthStencilValue ds_clear_value)
{
struct radeon_cmdbuf *cs = cmd_buffer->cs;
uint64_t va = radv_buffer_get_va(image->bo);
va += image->offset + image->tc_compat_zrange_offset;
uint32_t cond_val;
/* Conditionally set DB_Z_INFO.ZRANGE_PRECISION to 0 when the last
* depth clear value is 0.0f.
*/
cond_val = ds_clear_value.depth == 0.0f ? UINT_MAX : 0;
radv_set_tc_compat_zrange_metadata(cmd_buffer, image, cond_val);
}
/**
* Update the clear depth/stencil values for this image.
*/
......@@ -1282,6 +1304,12 @@ radv_update_ds_clear_metadata(struct radv_cmd_buffer *cmd_buffer,
radv_set_ds_clear_metadata(cmd_buffer, image, ds_clear_value, aspects);
if (radv_image_is_tc_compat_htile(image) &&
(aspects & VK_IMAGE_ASPECT_DEPTH_BIT)) {
radv_update_tc_compat_zrange_metadata(cmd_buffer, image,
ds_clear_value);
}
radv_update_bound_fast_clear_ds(cmd_buffer, image, ds_clear_value,
aspects);
}
......@@ -4229,6 +4257,15 @@ static void radv_initialize_htile(struct radv_cmd_buffer *cmd_buffer,
aspects |= VK_IMAGE_ASPECT_STENCIL_BIT;
radv_set_ds_clear_metadata(cmd_buffer, image, value, aspects);
if (radv_image_is_tc_compat_htile(image)) {
/* Initialize the TC-compat metada value to 0 because by
* default DB_Z_INFO.RANGE_PRECISION is set to 1, and we only
* need have to conditionally update its value when performing
* a fast depth clear.
*/
radv_set_tc_compat_zrange_metadata(cmd_buffer, image, 0);
}
}
static void radv_handle_depth_image_transition(struct radv_cmd_buffer *cmd_buffer,
......
......@@ -870,6 +870,14 @@ radv_image_alloc_htile(struct radv_image *image)
/* + 8 for storing the clear values */
image->clear_value_offset = image->htile_offset + image->surface.htile_size;
image->size = image->clear_value_offset + 8;
if (radv_image_is_tc_compat_htile(image)) {
/* Metadata for the TC-compatible HTILE hardware bug which
* have to be fixed by updating ZRANGE_PRECISION when doing
* fast depth clears to 0.0f.
*/
image->tc_compat_zrange_offset = image->clear_value_offset + 8;
image->size = image->clear_value_offset + 16;
}
image->alignment = align64(image->alignment, image->surface.htile_alignment);
}
......@@ -1014,8 +1022,8 @@ radv_image_create(VkDevice _device,
/* Otherwise, try to enable HTILE for depth surfaces. */
if (radv_image_can_enable_htile(image) &&
!(device->instance->debug_flags & RADV_DEBUG_NO_HIZ)) {
radv_image_alloc_htile(image);
image->tc_compatible_htile = image->surface.flags & RADEON_SURF_TC_COMPATIBLE_HTILE;
radv_image_alloc_htile(image);
} else {
image->surface.htile_size = 0;
}
......
......@@ -1503,6 +1503,14 @@ struct radv_image {
uint64_t clear_value_offset;
uint64_t fce_pred_offset;
/*
* Metadata for the TC-compat zrange workaround. If the 32-bit value
* stored at this offset is UINT_MAX, the driver will emit
* DB_Z_INFO.ZRANGE_PRECISION=0, otherwise it will skip the
* SET_CONTEXT_REG packet.
*/
uint64_t tc_compat_zrange_offset;
/* For VK_ANDROID_native_buffer, the WSI image owns the memory, */
VkDeviceMemory owned_memory;
};
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment