Commit 2f6d47a7 authored by Keith Whitwell's avatar Keith Whitwell Committed by Jose Fonseca

llvmpipe: use single swizzled tile

Use a single swizzled tile per colorbuf (and per thread) to avoid
accumulating large amounts of cached swizzled data.

Now that the SSE3 code has been merged to master, the performance delta
of this change is minimal, the main benefit is reduced memory usage
due to no longer keeping swizzled copies of render targets.

It's clear from the performance of the in-place version of this code
that there is still quite a bit of time being spent swizzling &
unswizzling, but it's not clear exactly how to reduce that.
parent b7fff13d
......@@ -29,32 +29,17 @@
#include "lp_limits.h"
#include "lp_memory.h"
/** 32bpp RGBA dummy tile to use in out of memory conditions */
static PIPE_ALIGN_VAR(16) uint8_t lp_dummy_tile[TILE_SIZE * TILE_SIZE * 4];
static unsigned lp_out_of_memory = 0;
uint8_t *
lp_get_dummy_tile(void)
{
if (lp_out_of_memory++ < 10) {
debug_printf("llvmpipe: out of memory. Using dummy tile memory.\n");
}
return lp_dummy_tile;
}
uint8_t *
lp_get_dummy_tile_silent(void)
{
return lp_dummy_tile;
}
boolean
lp_is_dummy_tile(void *tile)
{
return tile == lp_dummy_tile;
}
/**
* 32bpp RGBA swizzled tiles. One for for each thread and each
* possible colorbuf. Adds up to quite a bit 8*8*64*64*4 == 1MB.
* Several schemes exist to reduce this, such as scaling back the
* number of threads or using a smaller tilesize when multiple
* colorbuffers are bound.
*/
PIPE_ALIGN_VAR(16) uint8_t lp_swizzled_cbuf[LP_MAX_THREADS][PIPE_MAX_COLOR_BUFS][TILE_SIZE * TILE_SIZE * 4];
/* A single dummy tile used in a couple of out-of-memory situations.
*/
PIPE_ALIGN_VAR(16) uint8_t lp_dummy_tile[TILE_SIZE * TILE_SIZE * 4];
......@@ -30,16 +30,11 @@
#include "pipe/p_compiler.h"
#include "pipe/p_state.h"
#include "lp_limits.h"
extern PIPE_ALIGN_VAR(16) uint8_t lp_swizzled_cbuf[LP_MAX_THREADS][PIPE_MAX_COLOR_BUFS][TILE_SIZE * TILE_SIZE * 4];
extern uint8_t *
lp_get_dummy_tile(void);
uint8_t *
lp_get_dummy_tile_silent(void);
extern boolean
lp_is_dummy_tile(void *tile);
extern PIPE_ALIGN_VAR(16) uint8_t lp_dummy_tile[TILE_SIZE * TILE_SIZE * 4];
#endif /* LP_MEMORY_H */
......@@ -67,7 +67,7 @@ lp_rast_begin( struct lp_rasterizer *rast,
cbuf->level,
cbuf->zslice,
LP_TEX_USAGE_READ_WRITE,
LP_TEX_LAYOUT_NONE);
LP_TEX_LAYOUT_LINEAR);
}
if (fb->zsbuf) {
......@@ -271,11 +271,6 @@ lp_rast_clear_zstencil(struct lp_rasterizer_task *task,
dst = task->depth_tile;
if (lp_is_dummy_tile(dst))
return;
assert(dst == lp_rast_get_depth_block_pointer(task, task->x, task->y));
switch (block_size) {
case 1:
memset(dst, (uint8_t) clear_value, height * width);
......@@ -375,10 +370,15 @@ lp_rast_store_linear_color( struct lp_rasterizer_task *task,
struct pipe_surface *cbuf = scene->fb.cbufs[buf];
const unsigned face = cbuf->face, level = cbuf->level;
struct llvmpipe_resource *lpt = llvmpipe_resource(cbuf->texture);
/* this will convert the tiled data to linear if needed */
(void) llvmpipe_get_texture_tile_linear(lpt, face, level,
LP_TEX_USAGE_READ,
task->x, task->y);
if (!task->color_tiles[buf])
continue;
llvmpipe_unswizzle_cbuf_tile(lpt,
face,
level,
task->x, task->y,
task->color_tiles[buf]);
}
}
......@@ -589,6 +589,11 @@ lp_rast_tile_end(struct lp_rasterizer_task *task)
(void) outline_subtiles;
#endif
{
union lp_rast_cmd_arg dummy = {0};
lp_rast_store_linear_color(task, dummy);
}
/* debug */
memset(task->color_tiles, 0, sizeof(task->color_tiles));
task->depth_tile = NULL;
......@@ -751,30 +756,8 @@ debug_bin( const struct cmd_bin *bin )
static boolean
is_empty_bin( const struct cmd_bin *bin )
{
const struct cmd_block *head = bin->commands.head;
int i;
if (0)
debug_bin(bin);
/* We emit at most two load-tile commands at the start of the first
* command block. In addition we seem to emit a couple of
* set-state commands even in empty bins.
*
* As a heuristic, if a bin has more than 4 commands, consider it
* non-empty.
*/
if (head->next != NULL ||
head->count > 4) {
return FALSE;
}
for (i = 0; i < head->count; i++)
if (head->cmd[i] != lp_rast_store_linear_color) {
return FALSE;
}
return TRUE;
if (0) debug_bin(bin);
return bin->commands.head->count == 0;
}
......@@ -984,6 +967,10 @@ lp_rast_create( unsigned num_threads )
/* for synchronizing rasterization threads */
pipe_barrier_init( &rast->barrier, rast->num_threads );
memset(lp_swizzled_cbuf, 0, sizeof lp_swizzled_cbuf);
memset(lp_dummy_tile, 0, sizeof lp_dummy_tile);
return rast;
}
......
......@@ -148,7 +148,7 @@ lp_rast_get_depth_block_pointer(struct lp_rasterizer_task *task,
* the oom warning as this most likely because there is no
* zsbuf.
*/
return lp_get_dummy_tile_silent();
return lp_dummy_tile;
}
depth = (rast->zsbuf.map +
......@@ -178,15 +178,14 @@ lp_rast_get_color_tile_pointer(struct lp_rasterizer_task *task,
struct llvmpipe_resource *lpt;
assert(cbuf);
lpt = llvmpipe_resource(cbuf->texture);
task->color_tiles[buf] = llvmpipe_get_texture_tile(lpt,
cbuf->face + cbuf->zslice,
cbuf->level,
usage,
task->x,
task->y);
if (!task->color_tiles[buf]) {
/* out of memory - use dummy tile memory */
return lp_get_dummy_tile();
task->color_tiles[buf] = lp_swizzled_cbuf[task->thread_index][buf];
if (usage != LP_TEX_USAGE_WRITE_ALL) {
llvmpipe_swizzle_cbuf_tile(lpt,
cbuf->face + cbuf->zslice,
cbuf->level,
task->x, task->y,
task->color_tiles[buf]);
}
}
......@@ -212,10 +211,7 @@ lp_rast_get_color_block_pointer(struct lp_rasterizer_task *task,
assert((y % TILE_VECTOR_HEIGHT) == 0);
color = lp_rast_get_color_tile_pointer(task, buf, LP_TEX_USAGE_READ_WRITE);
if (!color) {
/* out of memory - use dummy tile memory */
return lp_get_dummy_tile();
}
assert(color);
px = x % TILE_SIZE;
py = y % TILE_SIZE;
......
......@@ -280,20 +280,6 @@ lp_setup_flush( struct lp_setup_context *setup,
LP_DBG(DEBUG_SETUP, "%s\n", __FUNCTION__);
if (setup->scene) {
struct lp_scene *scene = lp_setup_get_current_scene(setup);
union lp_rast_cmd_arg dummy = {0};
if (flags & (PIPE_FLUSH_SWAPBUFFERS |
PIPE_FLUSH_FRAME)) {
/* Store colors in the linear color buffer(s).
* If we don't do this here, we'll end up converting the tiled
* data to linear in the texture_unmap() function, which will
* not be a parallel/threaded operation as here.
*/
lp_scene_bin_everywhere(scene, lp_rast_store_linear_color, dummy);
}
if (fence) {
/* if we're going to flush the setup/rasterization modules, emit
* a fence.
......@@ -642,7 +628,7 @@ lp_setup_set_fragment_sampler_views(struct lp_setup_context *setup,
if (!jit_tex->data[j]) {
/* out of memory - use dummy tile memory */
jit_tex->data[j] = lp_get_dummy_tile();
jit_tex->data[j] = lp_dummy_tile;
jit_tex->width = TILE_SIZE;
jit_tex->height = TILE_SIZE;
jit_tex->depth = 1;
......
......@@ -1208,6 +1208,94 @@ llvmpipe_get_texture_tile(struct llvmpipe_resource *lpr,
}
/**
* Get pointer to tiled data for rendering.
* \return pointer to the tiled data at the given tile position
*/
void
llvmpipe_unswizzle_cbuf_tile(struct llvmpipe_resource *lpr,
unsigned face_slice, unsigned level,
unsigned x, unsigned y,
uint8_t *tile)
{
struct llvmpipe_texture_image *linear_img = &lpr->linear[level];
const unsigned tx = x / TILE_SIZE, ty = y / TILE_SIZE;
uint8_t *linear_image;
assert(x % TILE_SIZE == 0);
assert(y % TILE_SIZE == 0);
if (!linear_img->data) {
/* allocate memory for the linear image now */
alloc_image_data(lpr, level, LP_TEX_LAYOUT_LINEAR);
}
/* compute address of the slice/face of the image that contains the tile */
linear_image = llvmpipe_get_texture_image_address(lpr, face_slice, level,
LP_TEX_LAYOUT_LINEAR);
{
uint ii = x, jj = y;
uint tile_offset = jj / TILE_SIZE + ii / TILE_SIZE;
uint byte_offset = tile_offset * TILE_SIZE * TILE_SIZE * 4;
/* Note that lp_tiled_to_linear expects the tile parameter to
* point at the first tile in a whole-image sized array. In
* this code, we have only a single tile and have to do some
* pointer arithmetic to figure out where the "image" would have
* started.
*/
lp_tiled_to_linear(tile - byte_offset, linear_image,
x, y, TILE_SIZE, TILE_SIZE,
lpr->base.format,
lpr->row_stride[level],
1); /* tiles per row */
}
llvmpipe_set_texture_tile_layout(lpr, face_slice, level, tx, ty,
LP_TEX_LAYOUT_LINEAR);
}
/**
* Get pointer to tiled data for rendering.
* \return pointer to the tiled data at the given tile position
*/
void
llvmpipe_swizzle_cbuf_tile(struct llvmpipe_resource *lpr,
unsigned face_slice, unsigned level,
unsigned x, unsigned y,
uint8_t *tile)
{
uint8_t *linear_image;
assert(x % TILE_SIZE == 0);
assert(y % TILE_SIZE == 0);
/* compute address of the slice/face of the image that contains the tile */
linear_image = llvmpipe_get_texture_image_address(lpr, face_slice, level,
LP_TEX_LAYOUT_LINEAR);
if (linear_image) {
uint ii = x, jj = y;
uint tile_offset = jj / TILE_SIZE + ii / TILE_SIZE;
uint byte_offset = tile_offset * TILE_SIZE * TILE_SIZE * 4;
/* Note that lp_linear_to_tiled expects the tile parameter to
* point at the first tile in a whole-image sized array. In
* this code, we have only a single tile and have to do some
* pointer arithmetic to figure out where the "image" would have
* started.
*/
lp_linear_to_tiled(linear_image, tile - byte_offset,
x, y, TILE_SIZE, TILE_SIZE,
lpr->base.format,
lpr->row_stride[level],
1); /* tiles per row */
}
}
/**
* Return size of resource in bytes
*/
......
......@@ -223,6 +223,17 @@ llvmpipe_get_texture_tile(struct llvmpipe_resource *lpr,
unsigned x, unsigned y);
void
llvmpipe_unswizzle_cbuf_tile(struct llvmpipe_resource *lpr,
unsigned face_slice, unsigned level,
unsigned x, unsigned y,
uint8_t *tile);
void
llvmpipe_swizzle_cbuf_tile(struct llvmpipe_resource *lpr,
unsigned face_slice, unsigned level,
unsigned x, unsigned y,
uint8_t *tile);
extern void
llvmpipe_print_resources(void);
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment