Commit c2f2c8e4 authored by Jason Ekstrand's avatar Jason Ekstrand

anv: Use different BOs for different scratch sizes and stages

This solves a race condition where we can end up having different stages
stomp on each other because they're all trying to scratch in the same BO
but they have different views of its layout.
Signed-off-by: Jason Ekstrand's avatarJason Ekstrand <jason@jlekstrand.net>
Cc: "12.0" <mesa-stable@lists.freedesktop.org>
parent 45c0f609
......@@ -878,7 +878,7 @@ VkResult anv_CreateDevice(
anv_bo_init_new(&device->workaround_bo, device, 1024);
anv_block_pool_init(&device->scratch_block_pool, device, 0x10000);
anv_scratch_pool_init(device, &device->scratch_pool);
anv_queue_init(device, &device->queue);
......@@ -947,7 +947,7 @@ void anv_DestroyDevice(
anv_block_pool_finish(&device->instruction_block_pool);
anv_state_pool_finish(&device->surface_state_pool);
anv_block_pool_finish(&device->surface_state_block_pool);
anv_block_pool_finish(&device->scratch_block_pool);
anv_scratch_pool_finish(device, &device->scratch_pool);
close(device->fd);
......
......@@ -397,22 +397,8 @@ anv_pipeline_add_compiled_stage(struct anv_pipeline *pipeline,
const struct brw_stage_prog_data *prog_data,
struct anv_pipeline_bind_map *map)
{
struct brw_device_info *devinfo = &pipeline->device->info;
uint32_t max_threads[] = {
[MESA_SHADER_VERTEX] = devinfo->max_vs_threads,
[MESA_SHADER_TESS_CTRL] = devinfo->max_hs_threads,
[MESA_SHADER_TESS_EVAL] = devinfo->max_ds_threads,
[MESA_SHADER_GEOMETRY] = devinfo->max_gs_threads,
[MESA_SHADER_FRAGMENT] = devinfo->max_wm_threads,
[MESA_SHADER_COMPUTE] = devinfo->max_cs_threads,
};
pipeline->prog_data[stage] = prog_data;
pipeline->active_stages |= mesa_to_vk_shader_stage(stage);
pipeline->scratch_start[stage] = pipeline->total_scratch;
pipeline->total_scratch =
align_u32(pipeline->total_scratch, 1024) +
prog_data->total_scratch * max_threads[stage];
pipeline->bindings[stage] = *map;
}
......@@ -1176,7 +1162,6 @@ anv_pipeline_init(struct anv_pipeline *pipeline,
* of various prog_data pointers. Make them NULL by default.
*/
memset(pipeline->prog_data, 0, sizeof(pipeline->prog_data));
memset(pipeline->scratch_start, 0, sizeof(pipeline->scratch_start));
memset(pipeline->bindings, 0, sizeof(pipeline->bindings));
pipeline->vs_simd8 = NO_KERNEL;
......@@ -1185,7 +1170,6 @@ anv_pipeline_init(struct anv_pipeline *pipeline,
pipeline->ps_ksp0 = NO_KERNEL;
pipeline->active_stages = 0;
pipeline->total_scratch = 0;
const VkPipelineShaderStageCreateInfo *pStages[MESA_SHADER_STAGES] = { 0, };
struct anv_shader_module *modules[MESA_SHADER_STAGES] = { 0, };
......@@ -1278,10 +1262,6 @@ anv_pipeline_init(struct anv_pipeline *pipeline,
if (extra && extra->use_rectlist)
pipeline->topology = _3DPRIM_RECTLIST;
while (anv_block_pool_size(&device->scratch_block_pool) <
pipeline->total_scratch)
anv_block_pool_alloc(&device->scratch_block_pool);
return VK_SUCCESS;
}
......
......@@ -711,7 +711,7 @@ struct anv_device {
struct anv_queue queue;
struct anv_block_pool scratch_block_pool;
struct anv_scratch_pool scratch_pool;
uint32_t default_mocs;
......@@ -1471,8 +1471,6 @@ struct anv_pipeline {
bool needs_data_cache;
const struct brw_stage_prog_data * prog_data[MESA_SHADER_STAGES];
uint32_t scratch_start[MESA_SHADER_STAGES];
uint32_t total_scratch;
struct {
uint32_t start[MESA_SHADER_GEOMETRY + 1];
uint32_t size[MESA_SHADER_GEOMETRY + 1];
......
......@@ -252,8 +252,10 @@ genX(graphics_pipeline_create)(
vs.KernelStartPointer = pipeline->vs_vec4;
vs.ScratchSpaceBasePointer = (struct anv_address) {
.bo = NULL,
.offset = pipeline->scratch_start[MESA_SHADER_VERTEX],
.bo = anv_scratch_pool_alloc(device, &device->scratch_pool,
MESA_SHADER_VERTEX,
vs_prog_data->base.base.total_scratch),
.offset = 0,
};
vs.PerThreadScratchSpace = scratch_space(&vs_prog_data->base.base);
......@@ -276,8 +278,10 @@ genX(graphics_pipeline_create)(
gs.KernelStartPointer = pipeline->gs_kernel;
gs.ScratchSpaceBasePointer = (struct anv_address) {
.bo = NULL,
.offset = pipeline->scratch_start[MESA_SHADER_GEOMETRY],
.bo = anv_scratch_pool_alloc(device, &device->scratch_pool,
MESA_SHADER_GEOMETRY,
gs_prog_data->base.base.total_scratch),
.offset = 0,
};
gs.PerThreadScratchSpace = scratch_space(&gs_prog_data->base.base);
......@@ -338,8 +342,10 @@ genX(graphics_pipeline_create)(
ps.KernelStartPointer0 = pipeline->ps_ksp0;
ps.ScratchSpaceBasePointer = (struct anv_address) {
.bo = NULL,
.offset = pipeline->scratch_start[MESA_SHADER_FRAGMENT],
.bo = anv_scratch_pool_alloc(device, &device->scratch_pool,
MESA_SHADER_FRAGMENT,
wm_prog_data->base.total_scratch),
.offset = 0,
};
ps.PerThreadScratchSpace = scratch_space(&wm_prog_data->base);
ps.MaximumNumberofThreads = device->info.max_wm_threads - 1;
......
......@@ -361,8 +361,10 @@ genX(graphics_pipeline_create)(
gs.ExpectedVertexCount = gs_prog_data->vertices_in;
gs.ScratchSpaceBasePointer = (struct anv_address) {
.bo = NULL,
.offset = pipeline->scratch_start[MESA_SHADER_GEOMETRY],
.bo = anv_scratch_pool_alloc(device, &device->scratch_pool,
MESA_SHADER_GEOMETRY,
gs_prog_data->base.base.total_scratch),
.offset = 0,
};
gs.PerThreadScratchSpace = scratch_space(&gs_prog_data->base.base);
gs.OutputVertexSize = gs_prog_data->output_vertex_size_hwords * 2 - 1;
......@@ -431,8 +433,10 @@ genX(graphics_pipeline_create)(
vs.SoftwareExceptionEnable = false;
vs.ScratchSpaceBasePointer = (struct anv_address) {
.bo = NULL,
.offset = pipeline->scratch_start[MESA_SHADER_VERTEX],
.bo = anv_scratch_pool_alloc(device, &device->scratch_pool,
MESA_SHADER_VERTEX,
vs_prog_data->base.base.total_scratch),
.offset = 0,
};
vs.PerThreadScratchSpace = scratch_space(&vs_prog_data->base.base);
......@@ -483,8 +487,10 @@ genX(graphics_pipeline_create)(
ps.MaximumNumberofThreadsPerPSD = 64 - num_thread_bias;
ps.ScratchSpaceBasePointer = (struct anv_address) {
.bo = NULL,
.offset = pipeline->scratch_start[MESA_SHADER_FRAGMENT],
.bo = anv_scratch_pool_alloc(device, &device->scratch_pool,
MESA_SHADER_FRAGMENT,
wm_prog_data->base.total_scratch),
.offset = 0,
};
ps.PerThreadScratchSpace = scratch_space(&wm_prog_data->base);
......
......@@ -33,12 +33,6 @@ void
genX(cmd_buffer_emit_state_base_address)(struct anv_cmd_buffer *cmd_buffer)
{
struct anv_device *device = cmd_buffer->device;
struct anv_bo *scratch_bo = NULL;
cmd_buffer->state.scratch_size =
anv_block_pool_size(&device->scratch_block_pool);
if (cmd_buffer->state.scratch_size > 0)
scratch_bo = &device->scratch_block_pool.bo;
/* XXX: Do we need this on more than just BDW? */
#if (GEN_GEN >= 8)
......@@ -55,7 +49,7 @@ genX(cmd_buffer_emit_state_base_address)(struct anv_cmd_buffer *cmd_buffer)
#endif
anv_batch_emit(&cmd_buffer->batch, GENX(STATE_BASE_ADDRESS), sba) {
sba.GeneralStateBaseAddress = (struct anv_address) { scratch_bo, 0 };
sba.GeneralStateBaseAddress = (struct anv_address) { NULL, 0 };
sba.GeneralStateMemoryObjectControlState = GENX(MOCS);
sba.GeneralStateBaseAddressModifyEnable = true;
......@@ -503,13 +497,6 @@ genX(cmd_buffer_flush_state)(struct anv_cmd_buffer *cmd_buffer)
cmd_buffer->state.vb_dirty &= ~vb_emit;
if (cmd_buffer->state.dirty & ANV_CMD_DIRTY_PIPELINE) {
/* If somebody compiled a pipeline after starting a command buffer the
* scratch bo may have grown since we started this cmd buffer (and
* emitted STATE_BASE_ADDRESS). If we're binding that pipeline now,
* reemit STATE_BASE_ADDRESS so that we use the bigger scratch bo. */
if (cmd_buffer->state.scratch_size < pipeline->total_scratch)
anv_cmd_buffer_emit_state_base_address(cmd_buffer);
anv_batch_emit_batch(&cmd_buffer->batch, &pipeline->batch);
/* The exact descriptor layout is pulled from the pipeline, so we need
......
......@@ -64,7 +64,6 @@ genX(compute_pipeline_create)(
* of various prog_data pointers. Make them NULL by default.
*/
memset(pipeline->prog_data, 0, sizeof(pipeline->prog_data));
memset(pipeline->scratch_start, 0, sizeof(pipeline->scratch_start));
memset(pipeline->bindings, 0, sizeof(pipeline->bindings));
pipeline->vs_simd8 = NO_KERNEL;
......@@ -72,7 +71,6 @@ genX(compute_pipeline_create)(
pipeline->gs_kernel = NO_KERNEL;
pipeline->active_stages = 0;
pipeline->total_scratch = 0;
pipeline->needs_data_cache = false;
......@@ -103,8 +101,10 @@ genX(compute_pipeline_create)(
anv_batch_emit(&pipeline->batch, GENX(MEDIA_VFE_STATE), vfe) {
vfe.ScratchSpaceBasePointer = (struct anv_address) {
.bo = NULL,
.offset = pipeline->scratch_start[MESA_SHADER_COMPUTE],
.bo = anv_scratch_pool_alloc(device, &device->scratch_pool,
MESA_SHADER_COMPUTE,
cs_prog_data->base.total_scratch),
.offset = 0,
};
vfe.PerThreadScratchSpace = ffs(cs_prog_data->base.total_scratch / 2048);
#if GEN_GEN > 7
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment