Commit 912a9c8d authored by Jonathan Marek's avatar Jonathan Marek Committed by Rob Clark

freedreno: a2xx: clear fixes and fast clear path

This fixes the depth/stencil clear on a20x, and adds a fast clear path.

The fast clear path is only used for a20x, needs performance tests on a22x.
Signed-off-by: Jonathan Marek's avatarJonathan Marek <jonathan@marek.ca>
parent cb2322c7
......@@ -54,6 +54,8 @@ create_solid_vertexbuf(struct pipe_context *pctx)
+0.000000, +0.000000,
+1.000000, +0.000000,
+0.000000, +1.000000,
/* SCREEN_SCISSOR_BR value (must be at 60 byte offset in page) */
0.0,
};
struct pipe_resource *prsc = pipe_buffer_create(pctx->screen,
PIPE_BIND_CUSTOM, PIPE_USAGE_IMMUTABLE, sizeof(init_shader_const));
......
......@@ -33,4 +33,11 @@
void fd2_draw_init(struct pipe_context *pctx);
enum {
GMEM_PATCH_FASTCLEAR_COLOR,
GMEM_PATCH_FASTCLEAR_DEPTH,
GMEM_PATCH_FASTCLEAR_COLOR_DEPTH,
GMEM_PATCH_RESTORE_INFO,
};
#endif /* FD2_DRAW_H_ */
......@@ -360,7 +360,7 @@ fd2_emit_state(struct fd_context *ctx, const enum fd_dirty_3d_state dirty)
if (dirty & (FD_DIRTY_BLEND | FD_DIRTY_ZSA)) {
OUT_PKT3(ring, CP_SET_CONSTANT, 2);
OUT_RING(ring, CP_REG(REG_A2XX_RB_COLORCONTROL));
OUT_RING(ring, blend ? zsa->rb_colorcontrol | blend->rb_colorcontrol : 0);
OUT_RING(ring, zsa->rb_colorcontrol | blend->rb_colorcontrol);
}
if (dirty & (FD_DIRTY_BLEND | FD_DIRTY_FRAMEBUFFER)) {
......@@ -370,13 +370,13 @@ fd2_emit_state(struct fd_context *ctx, const enum fd_dirty_3d_state dirty)
OUT_PKT3(ring, CP_SET_CONSTANT, 2);
OUT_RING(ring, CP_REG(REG_A2XX_RB_BLEND_CONTROL));
OUT_RING(ring, blend ? blend->rb_blendcontrol_alpha |
OUT_RING(ring, blend->rb_blendcontrol_alpha |
COND(has_alpha, blend->rb_blendcontrol_rgb) |
COND(!has_alpha, blend->rb_blendcontrol_no_alpha_rgb) : 0);
COND(!has_alpha, blend->rb_blendcontrol_no_alpha_rgb));
OUT_PKT3(ring, CP_SET_CONSTANT, 2);
OUT_RING(ring, CP_REG(REG_A2XX_RB_COLOR_MASK));
OUT_RING(ring, blend ? blend->rb_colormask : 0xf);
OUT_RING(ring, blend->rb_colormask);
}
if (dirty & FD_DIRTY_BLEND_COLOR) {
......
......@@ -39,6 +39,7 @@
#include "fd2_program.h"
#include "fd2_util.h"
#include "fd2_zsa.h"
#include "fd2_draw.h"
#include "instr-a2xx.h"
static uint32_t fmt2swap(enum pipe_format format)
......@@ -473,6 +474,58 @@ fd2_emit_tile_init(struct fd_batch *batch)
reg |= A2XX_RB_DEPTH_INFO_DEPTH_FORMAT(fd_pipe2depth(pfb->zsbuf->format));
OUT_RING(ring, reg); /* RB_DEPTH_INFO */
/* fast clear patches */
int depth_size = -1;
int color_size = -1;
if (pfb->cbufs[0])
color_size = util_format_get_blocksizebits(format) == 32 ? 4 : 2;
if (pfb->zsbuf)
depth_size = fd_pipe2depth(pfb->zsbuf->format) == 1 ? 4 : 2;
for (int i = 0; i < fd_patch_num_elements(&batch->gmem_patches); i++) {
struct fd_cs_patch *patch = fd_patch_element(&batch->gmem_patches, i);
uint32_t color_base = 0, depth_base = gmem->zsbuf_base[0];
uint32_t size, lines;
/* note: 1 "line" is 512 bytes in both color/depth areas (1K total) */
switch (patch->val) {
case GMEM_PATCH_FASTCLEAR_COLOR:
size = align(gmem->bin_w * gmem->bin_h * color_size, 0x4000);
lines = size / 1024;
depth_base = size / 2;
break;
case GMEM_PATCH_FASTCLEAR_DEPTH:
size = align(gmem->bin_w * gmem->bin_h * depth_size, 0x4000);
lines = size / 1024;
color_base = depth_base;
depth_base = depth_base + size / 2;
break;
case GMEM_PATCH_FASTCLEAR_COLOR_DEPTH:
lines = align(gmem->bin_w * gmem->bin_h * color_size * 2, 0x4000) / 1024;
break;
case GMEM_PATCH_RESTORE_INFO:
patch->cs[0] = gmem->bin_w;
patch->cs[1] = A2XX_RB_COLOR_INFO_SWAP(fmt2swap(format)) |
A2XX_RB_COLOR_INFO_FORMAT(fd2_pipe2color(format));
patch->cs[2] = A2XX_RB_DEPTH_INFO_DEPTH_BASE(gmem->zsbuf_base[0]);
if (pfb->zsbuf)
patch->cs[2] |= A2XX_RB_DEPTH_INFO_DEPTH_FORMAT(fd_pipe2depth(pfb->zsbuf->format));
continue;
default:
continue;
}
patch->cs[0] = A2XX_PA_SC_SCREEN_SCISSOR_BR_X(32) |
A2XX_PA_SC_SCREEN_SCISSOR_BR_Y(lines);
patch->cs[4] = A2XX_RB_COLOR_INFO_BASE(color_base) |
A2XX_RB_COLOR_INFO_FORMAT(COLORX_8_8_8_8);
patch->cs[5] = A2XX_RB_DEPTH_INFO_DEPTH_BASE(depth_base) |
A2XX_RB_DEPTH_INFO_DEPTH_FORMAT(1);
}
util_dynarray_resize(&batch->gmem_patches, 0);
/* set to zero, for some reason hardware doesn't like certain values */
OUT_PKT3(ring, CP_SET_CONSTANT, 2);
OUT_RING(ring, CP_REG(REG_A2XX_VGT_CURRENT_BIN_ID_MIN));
......@@ -607,6 +660,7 @@ static void
fd2_emit_tile_renderprep(struct fd_batch *batch, struct fd_tile *tile)
{
struct fd_context *ctx = batch->ctx;
struct fd2_context *fd2_ctx = fd2_context(ctx);
struct fd_ringbuffer *ring = batch->gmem;
struct pipe_framebuffer_state *pfb = &batch->framebuffer;
enum pipe_format format = pipe_surface_format(pfb->cbufs[0]);
......@@ -624,6 +678,12 @@ fd2_emit_tile_renderprep(struct fd_batch *batch, struct fd_tile *tile)
OUT_RING(ring, A2XX_PA_SC_WINDOW_OFFSET_X(-tile->xoff) |
A2XX_PA_SC_WINDOW_OFFSET_Y(-tile->yoff));
/* write SCISSOR_BR to memory so fast clear path can restore from it */
OUT_PKT3(ring, CP_MEM_WRITE, 2);
OUT_RELOC(ring, fd_resource(fd2_ctx->solid_vertexbuf)->bo, 60, 0, 0);
OUT_RING(ring, A2XX_PA_SC_SCREEN_SCISSOR_BR_X(tile->bin_w) |
A2XX_PA_SC_SCREEN_SCISSOR_BR_Y(tile->bin_h));
/* tile offset for gl_FragCoord on a20x (C64 in fragment shader) */
if (is_a20x(batch->ctx->screen)) {
OUT_PKT3(ring, CP_SET_CONSTANT, 5);
......
......@@ -90,8 +90,10 @@ batch_init(struct fd_batch *batch)
util_dynarray_init(&batch->draw_patches, NULL);
if (is_a2xx(ctx->screen))
if (is_a2xx(ctx->screen)) {
util_dynarray_init(&batch->shader_patches, NULL);
util_dynarray_init(&batch->gmem_patches, NULL);
}
if (is_a3xx(ctx->screen))
util_dynarray_init(&batch->rbrc_patches, NULL);
......@@ -167,8 +169,10 @@ batch_fini(struct fd_batch *batch)
util_dynarray_fini(&batch->draw_patches);
if (is_a2xx(batch->ctx->screen))
if (is_a2xx(batch->ctx->screen)) {
util_dynarray_fini(&batch->shader_patches);
util_dynarray_fini(&batch->gmem_patches);
}
if (is_a3xx(batch->ctx->screen))
util_dynarray_fini(&batch->rbrc_patches);
......
......@@ -145,6 +145,11 @@ struct fd_batch {
*/
struct util_dynarray rbrc_patches;
/* Keep track of GMEM related values that need to be patched up once we
* know the gmem layout:
*/
struct util_dynarray gmem_patches;
/* Keep track of pointer to start of MEM exports for a20x binning shaders
*
* this is so the end of the shader can be cut off at the right point
......
......@@ -77,24 +77,25 @@ static uint32_t bin_width(struct fd_screen *screen)
static uint32_t
total_size(uint8_t cbuf_cpp[], uint8_t zsbuf_cpp[2],
uint32_t bin_w, uint32_t bin_h, struct fd_gmem_stateobj *gmem)
uint32_t bin_w, uint32_t bin_h, uint32_t gmem_align,
struct fd_gmem_stateobj *gmem)
{
uint32_t total = 0, i;
for (i = 0; i < MAX_RENDER_TARGETS; i++) {
if (cbuf_cpp[i]) {
gmem->cbuf_base[i] = align(total, 0x4000);
gmem->cbuf_base[i] = align(total, gmem_align);
total = gmem->cbuf_base[i] + cbuf_cpp[i] * bin_w * bin_h;
}
}
if (zsbuf_cpp[0]) {
gmem->zsbuf_base[0] = align(total, 0x4000);
gmem->zsbuf_base[0] = align(total, gmem_align);
total = gmem->zsbuf_base[0] + zsbuf_cpp[0] * bin_w * bin_h;
}
if (zsbuf_cpp[1]) {
gmem->zsbuf_base[1] = align(total, 0x4000);
gmem->zsbuf_base[1] = align(total, gmem_align);
total = gmem->zsbuf_base[1] + zsbuf_cpp[1] * bin_w * bin_h;
}
......@@ -116,6 +117,7 @@ calculate_tiles(struct fd_batch *batch)
uint32_t minx, miny, width, height;
uint32_t nbins_x = 1, nbins_y = 1;
uint32_t bin_w, bin_h;
uint32_t gmem_align = 0x4000;
uint32_t max_width = bin_width(screen);
uint8_t cbuf_cpp[MAX_RENDER_TARGETS] = {0}, zsbuf_cpp[2] = {0};
uint32_t i, j, t, xoff, yoff;
......@@ -178,10 +180,18 @@ calculate_tiles(struct fd_batch *batch)
zsbuf_cpp[0], width, height);
}
if (is_a20x(screen) && batch->cleared) {
/* under normal circumstances the requirement would be 4K
* but the fast clear path requires an alignment of 32K
*/
gmem_align = 0x8000;
}
/* then find a bin width/height that satisfies the memory
* constraints:
*/
while (total_size(cbuf_cpp, zsbuf_cpp, bin_w, bin_h, gmem) > gmem_size) {
while (total_size(cbuf_cpp, zsbuf_cpp, bin_w, bin_h, gmem_align, gmem) >
gmem_size) {
if (bin_w > bin_h) {
nbins_x++;
bin_w = align(width / nbins_x, gmem_alignw);
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment