Commit 3963b614 authored by Ryan Houdek's avatar Ryan Houdek

Merge branch 'tomeu-perf' into 'master'

Performance improvements, etc

See merge request !16
parents 478c0600 151b63be
...@@ -1294,7 +1294,7 @@ dri2_initialize_x11_swrast(_EGLDriver *drv, _EGLDisplay *disp) ...@@ -1294,7 +1294,7 @@ dri2_initialize_x11_swrast(_EGLDriver *drv, _EGLDisplay *disp)
* Every hardware driver_name is set using strdup. Doing the same in * Every hardware driver_name is set using strdup. Doing the same in
* here will allow is to simply free the memory at dri2_terminate(). * here will allow is to simply free the memory at dri2_terminate().
*/ */
dri2_dpy->driver_name = strdup("panfrost"); dri2_dpy->driver_name = strdup("swrast");
if (!dri2_load_driver_swrast(disp)) if (!dri2_load_driver_swrast(disp))
goto cleanup; goto cleanup;
......
...@@ -45,10 +45,12 @@ ...@@ -45,10 +45,12 @@
* channels, but outputs to only a single output channel, like dot products. * channels, but outputs to only a single output channel, like dot products.
* For these, to determine the effective mask, this quirk can be set. We have * For these, to determine the effective mask, this quirk can be set. We have
* an intentional off-by-one (a la MALI_POSITIVE), since 0-channel makes no * an intentional off-by-one (a la MALI_POSITIVE), since 0-channel makes no
* sense but we need to fit 4 channels in 2-bits */ * sense but we need to fit 4 channels in 2-bits. Similarly, 1-channel doesn't
* make sense (since then why are we quirked?), so that corresponds to "no
* count set" */
#define OP_CHANNEL_COUNT(c) ((c - 1) << 0) #define OP_CHANNEL_COUNT(c) ((c - 1) << 0)
#define GET_CHANNEL_COUNT(c) ((c & (0x3 << 0)) + 1) #define GET_CHANNEL_COUNT(c) ((c & (0x3 << 0)) ? ((c & (0x3 << 0)) + 1) : 0)
/* Vector-independant shorthands for the above; these numbers are arbitrary and /* Vector-independant shorthands for the above; these numbers are arbitrary and
* not from the ISA. Convert to the above with unit_enum_to_midgard */ * not from the ISA. Convert to the above with unit_enum_to_midgard */
......
...@@ -3237,7 +3237,7 @@ midgard_compile_shader_nir(nir_shader *nir, midgard_program *program, bool is_bl ...@@ -3237,7 +3237,7 @@ midgard_compile_shader_nir(nir_shader *nir, midgard_program *program, bool is_bl
compiler_context *ctx = &ictx; compiler_context *ctx = &ictx;
/* TODO: Decide this at runtime */ /* TODO: Decide this at runtime */
ctx->uniform_cutoff = 12; ctx->uniform_cutoff = 8;
switch (ctx->stage) { switch (ctx->stage) {
case MESA_SHADER_VERTEX: case MESA_SHADER_VERTEX:
......
...@@ -499,7 +499,5 @@ panfrost_make_fixed_blend_mode(const struct pipe_rt_blend_state *blend, struct m ...@@ -499,7 +499,5 @@ panfrost_make_fixed_blend_mode(const struct pipe_rt_blend_state *blend, struct m
/* Gallium and Mali represent colour masks identically. XXX: Static assert for future proof */ /* Gallium and Mali represent colour masks identically. XXX: Static assert for future proof */
out->color_mask = colormask; out->color_mask = colormask;
panfrost_print_blend_equation(*out);
return true; return true;
} }
...@@ -390,6 +390,11 @@ panfrost_clear( ...@@ -390,6 +390,11 @@ panfrost_clear(
{ {
struct panfrost_context *ctx = panfrost_context(pipe); struct panfrost_context *ctx = panfrost_context(pipe);
if (!color) {
printf("Warning: clear color null?\n");
return;
}
/* Save settings for FBO switch */ /* Save settings for FBO switch */
ctx->last_clear.buffers = buffers; ctx->last_clear.buffers = buffers;
ctx->last_clear.color = color; ctx->last_clear.color = color;
...@@ -562,9 +567,13 @@ panfrost_viewport(struct panfrost_context *ctx, ...@@ -562,9 +567,13 @@ panfrost_viewport(struct panfrost_context *ctx,
/* Reset per-frame context, called on context initialisation as well as after /* Reset per-frame context, called on context initialisation as well as after
* flushing a frame */ * flushing a frame */
static int last_persistent_stack = 0;
static void static void
panfrost_invalidate_frame(struct panfrost_context *ctx) panfrost_invalidate_frame(struct panfrost_context *ctx)
{ {
printf("Uploaded transient %d bytes and persistent %d bytes, \n", ctx->transient_pools[ctx->cmdstream_i].entry_index*ctx->transient_pools[0].entry_size + ctx->transient_pools[ctx->cmdstream_i].entry_offset, ctx->cmdstream_persistent.stack_bottom - last_persistent_stack);
last_persistent_stack = ctx->cmdstream_persistent.stack_bottom;
/* Rotate cmdstream */ /* Rotate cmdstream */
if ((++ctx->cmdstream_i) == (sizeof(ctx->transient_pools) / sizeof(ctx->transient_pools[0]))) if ((++ctx->cmdstream_i) == (sizeof(ctx->transient_pools) / sizeof(ctx->transient_pools[0])))
ctx->cmdstream_i = 0; ctx->cmdstream_i = 0;
...@@ -948,9 +957,9 @@ panfrost_fragment_job(struct panfrost_context *ctx) ...@@ -948,9 +957,9 @@ panfrost_fragment_job(struct panfrost_context *ctx)
if (rsrc->has_checksum) { if (rsrc->has_checksum) {
//ctx->fragment_fbd.unk3 |= 0xa00000; //ctx->fragment_fbd.unk3 |= 0xa00000;
ctx->fragment_fbd.unk3 = 0xa02100; //ctx->fragment_fbd.unk3 = 0xa02100;
ctx->fragment_fbd.unk3 |= MALI_MFBD_EXTRA; ctx->fragment_fbd.unk3 |= MALI_MFBD_EXTRA;
ctx->fragment_extra.unk = 0x420; ctx->fragment_extra.unk |= 0x420;
ctx->fragment_extra.checksum_stride = rsrc->checksum_stride; ctx->fragment_extra.checksum_stride = rsrc->checksum_stride;
ctx->fragment_extra.checksum = rsrc->gpu[0] + stride * rsrc->base.height0; ctx->fragment_extra.checksum = rsrc->gpu[0] + stride * rsrc->base.height0;
} }
...@@ -1017,13 +1026,23 @@ panfrost_emit_vertex_data(struct panfrost_context *ctx) ...@@ -1017,13 +1026,23 @@ panfrost_emit_vertex_data(struct panfrost_context *ctx)
/* Offset vertex count by draw_start to make sure we upload enough */ /* Offset vertex count by draw_start to make sure we upload enough */
attrs[i].stride = buf->stride; attrs[i].stride = buf->stride;
//attrs[i].size = buf->stride * (ctx->payload_vertex.draw_start + ctx->vertex_count); attrs[i].size = buf->stride * (ctx->payload_vertex.draw_start + ctx->vertex_count);
/* TODO: The above calculation is wrong. Do it better. For now, force resources */ /* TODO: The above calculation is wrong. Do it better. For now, force resources */
assert(!buf->is_user_buffer); assert(!buf->is_user_buffer);
attrs[i].size = buf->buffer.resource->width0 - buf->buffer_offset; //attrs[i].size = buf->buffer.resource->width0 - buf->buffer_offset;
/* Vertex elements are -already- GPU-visible, at
* rsrc->gpu. However, attribute buffers must be 64 aligned. If
* it is not, for now we have to duplicate the buffer. */
attrs[i].elements = panfrost_upload_transient(ctx, rsrc->cpu[0] + buf->buffer_offset, attrs[i].size) | 1; mali_ptr effective_address = (rsrc->gpu[0] + buf->buffer_offset);
if (effective_address & 0x3F) {
attrs[i].elements = panfrost_upload_transient(ctx, rsrc->cpu[0] + buf->buffer_offset, attrs[i].size) | 1;
} else {
attrs[i].elements = effective_address | 1;
}
} }
for (int i = 0; i < ctx->vs->varyings.varying_buffer_count; ++i) { for (int i = 0; i < ctx->vs->varyings.varying_buffer_count; ++i) {
...@@ -1198,6 +1217,8 @@ panfrost_emit_for_draw(struct panfrost_context *ctx, bool with_vertex_data) ...@@ -1198,6 +1217,8 @@ panfrost_emit_for_draw(struct panfrost_context *ctx, bool with_vertex_data)
/* Upload samplers back to back, no padding */ /* Upload samplers back to back, no padding */
for (int t = 0; t <= PIPE_SHADER_FRAGMENT; ++t) { for (int t = 0; t <= PIPE_SHADER_FRAGMENT; ++t) {
if (!ctx->sampler_count[t]) continue;
struct panfrost_transfer transfer = panfrost_allocate_transient(ctx, sizeof(struct mali_sampler_descriptor) * ctx->sampler_count[t]); struct panfrost_transfer transfer = panfrost_allocate_transient(ctx, sizeof(struct mali_sampler_descriptor) * ctx->sampler_count[t]);
struct mali_sampler_descriptor *desc = (struct mali_sampler_descriptor *) transfer.cpu; struct mali_sampler_descriptor *desc = (struct mali_sampler_descriptor *) transfer.cpu;
...@@ -1250,7 +1271,10 @@ panfrost_emit_for_draw(struct panfrost_context *ctx, bool with_vertex_data) ...@@ -1250,7 +1271,10 @@ panfrost_emit_for_draw(struct panfrost_context *ctx, bool with_vertex_data)
/* Restore */ /* Restore */
ctx->sampler_views[t][i]->hw.nr_mipmap_levels = s; ctx->sampler_views[t][i]->hw.nr_mipmap_levels = s;
#ifdef T6XX
ctx->sampler_views[t][i]->hw.unknown3A = 0; ctx->sampler_views[t][i]->hw.unknown3A = 0;
#endif
} }
mali_ptr trampoline = panfrost_upload_transient(ctx, trampolines, sizeof(uint64_t) * ctx->sampler_view_count[t]); mali_ptr trampoline = panfrost_upload_transient(ctx, trampolines, sizeof(uint64_t) * ctx->sampler_view_count[t]);
...@@ -1480,8 +1504,9 @@ panfrost_submit_frame(struct panfrost_context *ctx, bool flush_immediate) ...@@ -1480,8 +1504,9 @@ panfrost_submit_frame(struct panfrost_context *ctx, bool flush_immediate)
#ifndef DRY_RUN #ifndef DRY_RUN
/* XXX: flush_immediate was causing lock-ups wrt readpixels in dEQP. Investigate. */ /* XXX: flush_immediate was causing lock-ups wrt readpixels in dEQP. Investigate. */
struct pipe_surface *surf = ctx->pipe_framebuffer.cbufs[0];
base_external_resource framebuffer[] = { base_external_resource framebuffer[] = {
{.ext_resource = ((struct panfrost_resource *) ctx->pipe_framebuffer.cbufs[0]->texture)->gpu[0] | (BASE_EXT_RES_ACCESS_EXCLUSIVE & LOCAL_PAGE_LSB)}, {.ext_resource = surf ? (((struct panfrost_resource *) surf->texture)->gpu[0] | (BASE_EXT_RES_ACCESS_EXCLUSIVE & LOCAL_PAGE_LSB)) : 0},
}; };
int vt_atom = allocate_atom(); int vt_atom = allocate_atom();
...@@ -1529,14 +1554,14 @@ panfrost_submit_frame(struct panfrost_context *ctx, bool flush_immediate) ...@@ -1529,14 +1554,14 @@ panfrost_submit_frame(struct panfrost_context *ctx, bool flush_immediate)
/* If visual, we can stall a frame */ /* If visual, we can stall a frame */
if (!flush_immediate) if (panfrost_is_scanout(ctx) && !flush_immediate)
force_flush_fragment(ctx); force_flush_fragment(ctx);
last_fragment_id = atoms[1].atom_number; last_fragment_id = atoms[1].atom_number;
last_fragment_flushed = false; last_fragment_flushed = false;
/* If readback, flush now (hurts the pipelined performance) */ /* If readback, flush now (hurts the pipelined performance) */
if (flush_immediate) if (panfrost_is_scanout(ctx) && flush_immediate)
force_flush_fragment(ctx); force_flush_fragment(ctx);
#endif #endif
...@@ -2286,7 +2311,7 @@ panfrost_resource_create_front(struct pipe_screen *screen, ...@@ -2286,7 +2311,7 @@ panfrost_resource_create_front(struct pipe_screen *screen,
* zero-copy operation */ * zero-copy operation */
/* Tiling textures is almost always faster, unless we only use it once */ /* Tiling textures is almost always faster, unless we only use it once */
so->tiled = (template->usage != PIPE_USAGE_STREAM); so->tiled = (template->usage != PIPE_USAGE_STREAM) && (template->bind & PIPE_BIND_SAMPLER_VIEW);
if (so->tiled) { if (so->tiled) {
/* For tiled, we don't map directly, so just malloc any old buffer */ /* For tiled, we don't map directly, so just malloc any old buffer */
...@@ -2356,14 +2381,9 @@ panfrost_transfer_map(struct pipe_context *pctx, ...@@ -2356,14 +2381,9 @@ panfrost_transfer_map(struct pipe_context *pctx,
/* If non-zero level, it's a mipmapped resource and needs to be treated as such */ /* If non-zero level, it's a mipmapped resource and needs to be treated as such */
rsrc->is_mipmap |= transfer->level; rsrc->is_mipmap |= transfer->level;
if (transfer->usage & PIPE_TRANSFER_MAP_DIRECTLY) { if (transfer->usage & PIPE_TRANSFER_MAP_DIRECTLY && rsrc->tiled) {
/* We cannot directly map tiled textures */ /* We cannot directly map tiled textures */
return NULL;
if (rsrc->tiled)
return NULL;
/* Otherwise, we're good to go! */
rsrc->mapped_direct = true;
} }
if (resource->bind & PIPE_BIND_DISPLAY_TARGET || if (resource->bind & PIPE_BIND_DISPLAY_TARGET ||
...@@ -2439,7 +2459,7 @@ panfrost_set_framebuffer_state(struct pipe_context *pctx, ...@@ -2439,7 +2459,7 @@ panfrost_set_framebuffer_state(struct pipe_context *pctx,
panfrost_enable_afbc(ctx, tex, false); panfrost_enable_afbc(ctx, tex, false);
} }
if (is_scanout && !tex->has_checksum && USE_TRANSACTION_ELIMINATION) { if (!is_scanout && !tex->has_checksum) {
/* Enable transaction elimination if we can */ /* Enable transaction elimination if we can */
panfrost_enable_checksum(ctx, tex); panfrost_enable_checksum(ctx, tex);
} }
...@@ -2724,10 +2744,6 @@ panfrost_tile_texture(struct panfrost_context *ctx, struct panfrost_resource *rs ...@@ -2724,10 +2744,6 @@ panfrost_tile_texture(struct panfrost_context *ctx, struct panfrost_resource *rs
int bytes_per_pixel = util_format_get_blocksize(rsrc->base.format); int bytes_per_pixel = util_format_get_blocksize(rsrc->base.format);
int stride = bytes_per_pixel * rsrc->base.width0; /* TODO: Alignment? */ int stride = bytes_per_pixel * rsrc->base.width0; /* TODO: Alignment? */
/* If we're direct mapped, we're done; don't do any swizzling / copies / etc */
if (rsrc->mapped_direct)
return;
int width = rsrc->base.width0 >> level; int width = rsrc->base.width0 >> level;
int height = rsrc->base.height0 >> level; int height = rsrc->base.height0 >> level;
...@@ -2753,16 +2769,10 @@ panfrost_tile_texture(struct panfrost_context *ctx, struct panfrost_resource *rs ...@@ -2753,16 +2769,10 @@ panfrost_tile_texture(struct panfrost_context *ctx, struct panfrost_resource *rs
rsrc->entry[level] = p_entry; rsrc->entry[level] = p_entry;
rsrc->gpu[level] = backing->gpu + p_entry->offset; rsrc->gpu[level] = backing->gpu + p_entry->offset;
if (rsrc->tiled) { /* Run actual texture swizzle, writing directly to the mapped
/* Run actual texture swizzle, writing directly to the mapped * GPU chunk we allocated */
* GPU chunk we allocated */
panfrost_texture_swizzle(width, height, bytes_per_pixel, stride, rsrc->cpu[level], swizzled);
} else {
/* If indirect linear, just do a dumb copy */
memcpy(swizzled, rsrc->cpu[level], stride * height); panfrost_texture_swizzle(width, height, bytes_per_pixel, stride, rsrc->cpu[level], swizzled);
}
} }
static void static void
...@@ -2776,8 +2786,11 @@ panfrost_transfer_unmap(struct pipe_context *pctx, ...@@ -2776,8 +2786,11 @@ panfrost_transfer_unmap(struct pipe_context *pctx,
struct panfrost_resource *prsrc = (struct panfrost_resource *) transfer->resource; struct panfrost_resource *prsrc = (struct panfrost_resource *) transfer->resource;
/* Gallium thinks writeback happens here; instead, this is our cue to tile */ /* Gallium thinks writeback happens here; instead, this is our cue to tile */
assert(!prsrc->has_afbc); if (prsrc->has_afbc) {
panfrost_tile_texture(ctx, prsrc, transfer->level); printf("Warning: writes to afbc surface can't possibly work out well for you...\n");
} else if (prsrc->tiled) {
panfrost_tile_texture(ctx, prsrc, transfer->level);
}
} }
} }
...@@ -2903,13 +2916,13 @@ panfrost_allocate_slab(struct panfrost_context *ctx, ...@@ -2903,13 +2916,13 @@ panfrost_allocate_slab(struct panfrost_context *ctx,
static void static void
panfrost_flush_resource(struct pipe_context *pctx, struct pipe_resource *prsc) panfrost_flush_resource(struct pipe_context *pctx, struct pipe_resource *prsc)
{ {
fprintf(stderr, "TODO %s\n", __func__); //fprintf(stderr, "TODO %s\n", __func__);
} }
static void static void
panfrost_invalidate_resource(struct pipe_context *pctx, struct pipe_resource *prsc) panfrost_invalidate_resource(struct pipe_context *pctx, struct pipe_resource *prsc)
{ {
fprintf(stderr, "TODO %s\n", __func__); //fprintf(stderr, "TODO %s\n", __func__);
} }
static void static void
......
...@@ -342,9 +342,8 @@ struct panfrost_resource { ...@@ -342,9 +342,8 @@ struct panfrost_resource {
struct sw_displaytarget *dt; struct sw_displaytarget *dt;
/* Set for tiled, clear for linear. For linear, set if directly mapped and clear for memcpy */ /* Set for tiled, clear for linear. */
bool tiled; bool tiled;
bool mapped_direct;
/* If AFBC is enabled for this resource, we lug around an AFBC /* If AFBC is enabled for this resource, we lug around an AFBC
* metadata buffer as well. The actual AFBC resource is also in * metadata buffer as well. The actual AFBC resource is also in
......
...@@ -782,7 +782,7 @@ panfrost_resource_get_handle(struct pipe_screen *pscreen, ...@@ -782,7 +782,7 @@ panfrost_resource_get_handle(struct pipe_screen *pscreen,
return TRUE; return TRUE;
} else { } else {
printf("Missed nonrenderonly KMS handle\n"); printf("Missed nonscanout FD handle\n");
assert(0); assert(0);
return FALSE; return FALSE;
} }
......
...@@ -43,7 +43,7 @@ sha1_h = custom_target( ...@@ -43,7 +43,7 @@ sha1_h = custom_target(
'git_sha1.h', 'git_sha1.h',
output : 'git_sha1.h', output : 'git_sha1.h',
command : [prog_python, git_sha1_gen_py, '--output', '@OUTPUT@'], command : [prog_python, git_sha1_gen_py, '--output', '@OUTPUT@'],
build_always : true, # commit sha1 can change without having touched these files # build_always : true, # commit sha1 can change without having touched these files
) )
subdir('gtest') subdir('gtest')
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment