Commit fdbda905 authored by Alyssa Rosenzweig's avatar Alyssa Rosenzweig

Merge branch 'perf-cnt-v3' into 'master'

Performance counter dumping; GL_QUADS and friends

See merge request !33
parents f328260e aa0e742c
......@@ -57,32 +57,38 @@ enum mali_job_type {
JOB_TYPE_CACHE_FLUSH = 3,
JOB_TYPE_COMPUTE = 4,
JOB_TYPE_VERTEX = 5,
JOB_TYPE_GEOMETRY = 6,
JOB_TYPE_TILER = 7,
JOB_TYPE_FUSED = 8,
JOB_TYPE_FRAGMENT = 9,
};
enum mali_gl_mode {
MALI_GL_NONE = 0x0,
MALI_GL_POINTS = 0x1,
MALI_GL_LINES = 0x2,
MALI_GL_LINE_STRIP = 0x4,
MALI_GL_LINE_LOOP = 0x6,
MALI_GL_TRIANGLES = 0x8,
MALI_GL_TRIANGLE_STRIP = 0xA,
MALI_GL_TRIANGLE_FAN = 0xC,
enum mali_draw_mode {
MALI_DRAW_NONE = 0x0,
MALI_POINTS = 0x1,
MALI_LINES = 0x2,
MALI_LINE_STRIP = 0x4,
MALI_LINE_LOOP = 0x6,
MALI_TRIANGLES = 0x8,
MALI_TRIANGLE_STRIP = 0xA,
MALI_TRIANGLE_FAN = 0xC,
MALI_POLYGON = 0xD,
MALI_QUADS = 0xE,
MALI_QUAD_STRIP = 0xF,
/* All other modes invalid */
};
/* Applies to tiler_gl_enables */
#define MALI_GL_CULL_FACE_BACK 0x80
#define MALI_GL_CULL_FACE_FRONT 0x40
#define MALI_CULL_FACE_BACK 0x80
#define MALI_CULL_FACE_FRONT 0x40
#define MALI_GL_FRONT_FACE(v) (v << 5)
#define MALI_GL_CCW (0)
#define MALI_GL_CW (1)
#define MALI_FRONT_FACE(v) (v << 5)
#define MALI_CCW (0)
#define MALI_CW (1)
#define MALI_GL_OCCLUSION_BOOLEAN 0x8
#define MALI_OCCLUSION_BOOLEAN 0x8
/* TODO: Might this actually be a finer bitfield? */
#define MALI_DEPTH_STENCIL_ENABLE 0x6400
......@@ -1129,19 +1135,19 @@ struct mali_texture_descriptor {
/* Used as part of filter_mode */
#define MALI_GL_LINEAR 0
#define MALI_GL_NEAREST 1
#define MALI_GL_MIP_LINEAR (0x18)
#define MALI_LINEAR 0
#define MALI_NEAREST 1
#define MALI_MIP_LINEAR (0x18)
/* Used to construct low bits of filter_mode */
#define MALI_GL_TEX_MAG(mode) (((mode) & 1) << 0)
#define MALI_GL_TEX_MIN(mode) (((mode) & 1) << 1)
#define MALI_TEX_MAG(mode) (((mode) & 1) << 0)
#define MALI_TEX_MIN(mode) (((mode) & 1) << 1)
#define MALI_GL_TEX_MAG_MASK (1)
#define MALI_GL_TEX_MIN_MASK (2)
#define MALI_TEX_MAG_MASK (1)
#define MALI_TEX_MIN_MASK (2)
#define MALI_FILTER_NAME(filter) (filter ? "MALI_GL_NEAREST" : "MALI_GL_LINEAR")
#define MALI_FILTER_NAME(filter) (filter ? "MALI_NEAREST" : "MALI_LINEAR")
/* Used for lod encoding. Thanks @urjaman for pointing out these routines can
* be cleaned up a lot. */
......
......@@ -40,7 +40,10 @@ panfrost_allocate_chunk(struct panfrost_context *ctx, size_t size, unsigned heap
{
size = ALIGN(size, ALIGNMENT);
struct pb_slab_entry *entry = pb_slab_alloc(&ctx->slabs, size, heap_id);
struct pipe_context *gallium = (struct pipe_context *) ctx;
struct panfrost_screen *screen = panfrost_screen(gallium->screen);
struct pb_slab_entry *entry = pb_slab_alloc(&screen->slabs, size, heap_id);
struct panfrost_memory_entry *p_entry = (struct panfrost_memory_entry *) entry;
struct panfrost_memory *backing = (struct panfrost_memory *) entry->slab;
......@@ -77,7 +80,9 @@ panfrost_allocate_transient(struct panfrost_context *ctx, size_t sz)
if (pool->entry_index >= pool->entry_count) {
/* Don't overflow the pool -- allocate a new one */
struct pb_slab_entry *entry = pb_slab_alloc(&ctx->slabs, pool->entry_size, HEAP_TRANSIENT);
struct pipe_context *gallium = (struct pipe_context *) ctx;
struct panfrost_screen *screen = panfrost_screen(gallium->screen);
struct pb_slab_entry *entry = pb_slab_alloc(&screen->slabs, pool->entry_size, HEAP_TRANSIENT);
pool->entry_count++;
pool->entries[pool->entry_index] = (struct panfrost_memory_entry *) entry;
......
This diff is collapsed.
......@@ -220,9 +220,6 @@ struct panfrost_context {
struct pipe_blend_color blend_color;
struct pipe_depth_stencil_alpha_state *depth_stencil;
struct pipe_stencil_ref stencil_ref;
/* Memory management is based on subdividing slabs with AMD's allocator */
struct pb_slabs slabs;
};
/* Corresponds to the CSO */
......
......@@ -111,7 +111,7 @@ panfrost_nondrm_create_bo(struct panfrost_screen *screen, const struct pipe_reso
/* Allocate the framebuffer as its own slab of GPU-accessible memory */
struct panfrost_memory slab;
screen->driver->allocate_slab(screen->any_context, &slab, (sz / 4096) + 1, false, 0, 0, 0);
screen->driver->allocate_slab(screen, &slab, (sz / 4096) + 1, false, 0, 0, 0);
/* Make the resource out of the slab */
bo->base.cpu[0] = slab.cpu;
......@@ -133,7 +133,7 @@ panfrost_nondrm_create_bo(struct panfrost_screen *screen, const struct pipe_reso
} else {
/* But for linear, we can! */
struct pb_slab_entry *entry = pb_slab_alloc(&screen->any_context->slabs, sz, HEAP_TEXTURE);
struct pb_slab_entry *entry = pb_slab_alloc(&screen->slabs, sz, HEAP_TEXTURE);
struct panfrost_memory_entry *p_entry = (struct panfrost_memory_entry *) entry;
struct panfrost_memory *backing = (struct panfrost_memory *) entry->slab;
bo->base.entry[0] = p_entry;
......@@ -220,7 +220,7 @@ panfrost_nondrm_map_bo(struct panfrost_context *ctx, struct pipe_transfer *trans
}
static void
panfrost_tile_texture(struct panfrost_context *ctx, struct panfrost_resource *rsrc, int level)
panfrost_tile_texture(struct panfrost_screen *screen, struct panfrost_resource *rsrc, int level)
{
struct panfrost_nondrm_bo *bo = (struct panfrost_nondrm_bo *)rsrc->bo;
int bytes_per_pixel = util_format_get_blocksize(rsrc->base.format);
......@@ -235,7 +235,7 @@ panfrost_tile_texture(struct panfrost_context *ctx, struct panfrost_resource *rs
int swizzled_sz = panfrost_swizzled_size(width, height, bytes_per_pixel);
/* Allocate the transfer given that known size but do not copy */
struct pb_slab_entry *entry = pb_slab_alloc(&ctx->slabs, swizzled_sz, HEAP_TEXTURE);
struct pb_slab_entry *entry = pb_slab_alloc(&screen->slabs, swizzled_sz, HEAP_TEXTURE);
struct panfrost_memory_entry *p_entry = (struct panfrost_memory_entry *) entry;
struct panfrost_memory *backing = (struct panfrost_memory *) entry->slab;
uint8_t *swizzled = backing->cpu + p_entry->offset;
......@@ -245,7 +245,7 @@ panfrost_tile_texture(struct panfrost_context *ctx, struct panfrost_resource *rs
if (bo->base.entry[level] != NULL) {
bo->base.entry[level]->freed = true;
pb_slab_free(&ctx->slabs, &bo->base.entry[level]->base);
pb_slab_free(&screen->slabs, &bo->base.entry[level]->base);
}
bo->base.entry[level] = p_entry;
......@@ -271,7 +271,9 @@ panfrost_nondrm_unmap_bo(struct panfrost_context *ctx,
if (bo->base.has_afbc) {
printf("Warning: writes to afbc surface can't possibly work out well for you...\n");
} else if (bo->base.tiled) {
panfrost_tile_texture(ctx, prsrc, transfer->level);
struct pipe_context *gallium = (struct pipe_context *) ctx;
struct panfrost_screen *screen = pan_screen(gallium->screen);
panfrost_tile_texture(screen, prsrc, transfer->level);
}
}
}
......@@ -280,7 +282,6 @@ panfrost_nondrm_unmap_bo(struct panfrost_context *ctx,
static void
panfrost_nondrm_destroy_bo(struct panfrost_screen *screen, struct panfrost_bo *pbo)
{
struct panfrost_context *ctx = screen->any_context;
struct panfrost_nondrm_bo *bo = (struct panfrost_nondrm_bo *)pbo;
if (bo->base.tiled) {
......@@ -291,7 +292,7 @@ panfrost_nondrm_destroy_bo(struct panfrost_screen *screen, struct panfrost_bo *p
}
} else if (bo->base.entry[0] != NULL) {
bo->base.entry[0]->freed = true;
pb_slab_free(&ctx->slabs, &bo->base.entry[0]->base);
pb_slab_free(&screen->slabs, &bo->base.entry[0]->base);
} else {
printf("--leaking main allocation--\n");
}
......@@ -322,6 +323,14 @@ panfrost_nondrm_submit_job(struct panfrost_context *ctx, mali_ptr addr, int nr_a
if (pandev_ioctl(nondrm->fd, KBASE_IOCTL_JOB_SUBMIT, &submit))
printf("Error submitting\n");
#ifdef DUMP_PERFORMANCE_COUNTERS
/* Dump the performance counters as soon as we submit work */
if (pandev_ioctl(nondrm->fd, KBASE_IOCTL_HWCNT_DUMP, NULL)) {
fprintf(stderr, "Error dumping counters\n");
return;
}
#endif
}
/* Forces a flush, to make sure everything is consistent.
......@@ -356,7 +365,7 @@ panfrost_nondrm_force_flush_fragment(struct panfrost_context *ctx)
}
static void
panfrost_nondrm_allocate_slab(struct panfrost_context *ctx,
panfrost_nondrm_allocate_slab(struct panfrost_screen *screen,
struct panfrost_memory *mem,
size_t pages,
bool same_va,
......@@ -364,8 +373,6 @@ panfrost_nondrm_allocate_slab(struct panfrost_context *ctx,
int commit_count,
int extent)
{
struct pipe_context *gallium = (struct pipe_context *) ctx;
struct panfrost_screen *screen = panfrost_screen(gallium->screen);
struct panfrost_nondrm *nondrm = (struct panfrost_nondrm *)screen->driver;
int flags = BASE_MEM_PROT_CPU_RD | BASE_MEM_PROT_CPU_WR |
BASE_MEM_PROT_GPU_RD | BASE_MEM_PROT_GPU_WR |
......@@ -406,6 +413,25 @@ panfrost_nondrm_allocate_slab(struct panfrost_context *ctx,
mem->stack_bottom = 0;
}
static void
panfrost_nondrm_enable_counters(struct panfrost_screen *screen)
{
struct panfrost_nondrm *nondrm = (struct panfrost_nondrm *) screen->driver;
struct kbase_ioctl_hwcnt_enable enable_flags = {
.dump_buffer = screen->perf_counters.gpu,
.jm_bm = ~0,
.shader_bm = ~0,
.tiler_bm = ~0,
.mmu_l2_bm = ~0
};
if (pandev_ioctl(nondrm->fd, KBASE_IOCTL_HWCNT_ENABLE, &enable_flags)) {
fprintf(stderr, "Error enabling performance counters\n");
return;
}
}
struct panfrost_driver *
panfrost_create_nondrm_driver(int fd)
{
......@@ -424,6 +450,7 @@ panfrost_create_nondrm_driver(int fd)
driver->base.submit_job = panfrost_nondrm_submit_job;
driver->base.force_flush_fragment = panfrost_nondrm_force_flush_fragment;
driver->base.allocate_slab = panfrost_nondrm_allocate_slab;
driver->base.enable_counters = panfrost_nondrm_enable_counters;
ret = ioctl(fd, KBASE_IOCTL_VERSION_CHECK, &version);
if (ret != 0) {
......
......@@ -322,6 +322,53 @@ panfrost_transfer_unmap(struct pipe_context *pctx,
free(transfer);
}
static struct pb_slab *
panfrost_slab_alloc(void *priv, unsigned heap, unsigned entry_size, unsigned group_index)
{
struct panfrost_screen *screen = (struct panfrost_screen *) priv;
struct panfrost_memory *mem = CALLOC_STRUCT(panfrost_memory);
size_t slab_size = (1 << (MAX_SLAB_ENTRY_SIZE + 1));
mem->slab.num_entries = slab_size / entry_size;
mem->slab.num_free = mem->slab.num_entries;
LIST_INITHEAD(&mem->slab.free);
for (unsigned i = 0; i < mem->slab.num_entries; ++i) {
/* Create a slab entry */
struct panfrost_memory_entry *entry = CALLOC_STRUCT(panfrost_memory_entry);
entry->offset = entry_size * i;
entry->base.slab = &mem->slab;
entry->base.group_index = group_index;
LIST_ADDTAIL(&entry->base.head, &mem->slab.free);
}
/* Actually allocate the memory from kernel-space. Mapped, same_va, no
* special flags */
screen->driver->allocate_slab(screen, mem, slab_size / 4096, true, 0, 0, 0);
return &mem->slab;
}
static bool
panfrost_slab_can_reclaim(void *priv, struct pb_slab_entry *entry)
{
struct panfrost_memory_entry *p_entry = (struct panfrost_memory_entry *) entry;
return p_entry->freed;
}
static void
panfrost_slab_free(void *priv, struct pb_slab *slab)
{
/* STUB */
//struct panfrost_memory *mem = (struct panfrost_memory *) slab;
printf("stub: Tried to free slab\n");
}
static void
panfrost_invalidate_resource(struct pipe_context *pctx, struct pipe_resource *prsc)
{
......@@ -351,6 +398,18 @@ panfrost_resource_screen_init(struct panfrost_screen *pscreen)
pscreen->base.transfer_helper = u_transfer_helper_create(&transfer_vtbl,
true, true,
true, true);
pb_slabs_init(&pscreen->slabs,
MIN_SLAB_ENTRY_SIZE,
MAX_SLAB_ENTRY_SIZE,
3, /* Number of heaps */
pscreen,
panfrost_slab_can_reclaim,
panfrost_slab_alloc,
panfrost_slab_free);
}
void
......
......@@ -691,6 +691,11 @@ panfrost_create_screen(int fd, struct renderonly *ro, bool is_drm)
else
screen->driver = panfrost_create_nondrm_driver(fd);
#ifdef DUMP_PERFORMANCE_COUNTERS
screen->driver->allocate_slab(screen, &screen->perf_counters, 64, true, 0, 0, 0);
screen->driver->enable_counters(screen);
#endif
screen->base.destroy = panfrost_destroy_screen;
screen->base.get_name = panfrost_get_name;
......
......@@ -42,6 +42,8 @@ struct panfrost_context;
struct panfrost_resource;
struct panfrost_screen;
//#define DUMP_PERFORMANCE_COUNTERS
struct panfrost_driver {
struct panfrost_bo * (*create_bo) (struct panfrost_screen *screen, const struct pipe_resource *template);
struct panfrost_bo * (*import_bo) (struct panfrost_screen *screen, struct winsys_handle *whandle);
......@@ -51,13 +53,14 @@ struct panfrost_driver {
void (*submit_job) (struct panfrost_context *ctx, mali_ptr addr, int nr_atoms);
void (*force_flush_fragment) (struct panfrost_context *ctx);
void (*allocate_slab) (struct panfrost_context *ctx,
void (*allocate_slab) (struct panfrost_screen *screen,
struct panfrost_memory *mem,
size_t pages,
bool same_va,
int extra_flags,
int commit_count,
int extent);
void (*enable_counters) (struct panfrost_screen *screen);
};
struct panfrost_screen {
......@@ -66,7 +69,10 @@ struct panfrost_screen {
struct renderonly *ro;
struct panfrost_driver *driver;
struct panfrost_context *any_context;
struct panfrost_memory perf_counters;
/* Memory management is based on subdividing slabs with AMD's allocator */
struct pb_slabs slabs;
/* TODO: Where? */
struct panfrost_resource *display_target;
......
......@@ -146,7 +146,7 @@ panfrost_draw_wallpaper(struct pipe_context *pipe)
* be a little more DRY */
ctx->payload_tiler.draw_start = 0;
ctx->payload_tiler.prefix.draw_mode = MALI_GL_TRIANGLE_STRIP;
ctx->payload_tiler.prefix.draw_mode = MALI_TRIANGLE_STRIP;
ctx->vertex_count = 4;
ctx->payload_tiler.prefix.invocation_count = MALI_POSITIVE(4);
ctx->payload_tiler.prefix.unknown_draw &= ~(0x3000 | 0x18000);
......
......@@ -46,7 +46,7 @@
panwrap_prop("%s = %s_%d_p", #p, #p, no); \
}
#define FLAG_INFO(flag) { MALI_GL_##flag, "MALI_GL_" #flag }
#define FLAG_INFO(flag) { MALI_##flag, "MALI_" #flag }
static const struct panwrap_flag_info gl_enable_flag_info[] = {
FLAG_INFO(CULL_FACE_FRONT),
FLAG_INFO(CULL_FACE_BACK),
......@@ -133,22 +133,25 @@ panwrap_job_type_name(enum mali_job_type type)
}
static char *
panwrap_gl_mode_name(enum mali_gl_mode mode)
panwrap_draw_mode_name(enum mali_draw_mode mode)
{
#define DEFINE_CASE(name) case MALI_ ## name: return "MALI_" #name
switch (mode) {
DEFINE_CASE(GL_NONE);
DEFINE_CASE(GL_POINTS);
DEFINE_CASE(GL_LINES);
DEFINE_CASE(GL_TRIANGLES);
DEFINE_CASE(GL_TRIANGLE_STRIP);
DEFINE_CASE(GL_TRIANGLE_FAN);
DEFINE_CASE(GL_LINE_STRIP);
DEFINE_CASE(GL_LINE_LOOP);
DEFINE_CASE(DRAW_NONE);
DEFINE_CASE(POINTS);
DEFINE_CASE(LINES);
DEFINE_CASE(TRIANGLES);
DEFINE_CASE(TRIANGLE_STRIP);
DEFINE_CASE(TRIANGLE_FAN);
DEFINE_CASE(LINE_STRIP);
DEFINE_CASE(LINE_LOOP);
DEFINE_CASE(POLYGON);
DEFINE_CASE(QUADS);
DEFINE_CASE(QUAD_STRIP);
default:
return "MALI_GL_TRIANGLES /* XXX: Unknown GL mode, check dump */";
return "MALI_TRIANGLES /* XXX: Unknown GL mode, check dump */";
}
#undef DEFINE_CASE
......@@ -920,7 +923,7 @@ panwrap_replay_vertex_tiler_prefix(struct mali_vertex_tiler_prefix *p, int job_n
panwrap_prop("unknown_draw = 0x%" PRIx32, p->unknown_draw);
panwrap_prop("workgroups_x_shift_3 = 0x%" PRIx32, p->workgroups_x_shift_3);
panwrap_prop("draw_mode = %s", panwrap_gl_mode_name(p->draw_mode));
panwrap_prop("draw_mode = %s", panwrap_draw_mode_name(p->draw_mode));
/* Index count only exists for tiler jobs anyway */
......@@ -1466,9 +1469,9 @@ panwrap_replay_vertex_tiler_postfix_pre(const struct mali_vertex_tiler_postfix *
panwrap_indent++;
/* Only the lower two bits are understood right now; the rest we display as hex */
panwrap_log(".filter_mode = MALI_GL_TEX_MIN(%s) | MALI_GL_TEX_MAG(%s) | 0x%" PRIx32",\n",
MALI_FILTER_NAME(s->filter_mode & MALI_GL_TEX_MIN_MASK),
MALI_FILTER_NAME(s->filter_mode & MALI_GL_TEX_MAG_MASK),
panwrap_log(".filter_mode = MALI_TEX_MIN(%s) | MALI_TEX_MAG(%s) | 0x%" PRIx32",\n",
MALI_FILTER_NAME(s->filter_mode & MALI_TEX_MIN_MASK),
MALI_FILTER_NAME(s->filter_mode & MALI_TEX_MAG_MASK),
s->filter_mode & ~3);
panwrap_prop("min_lod = FIXED_16(%f)", DECODE_FIXED_16(s->min_lod));
......@@ -1651,10 +1654,10 @@ panwrap_replay_gl_enables(uint32_t gl_enables, int job_type)
panwrap_log(".gl_enables = ");
if (job_type == JOB_TYPE_TILER) {
panwrap_log_cont("MALI_GL_FRONT_FACE(MALI_GL_%s) | ",
gl_enables & MALI_GL_FRONT_FACE(MALI_GL_CW) ? "CW" : "CCW");
panwrap_log_cont("MALI_FRONT_FACE(MALI_%s) | ",
gl_enables & MALI_FRONT_FACE(MALI_CW) ? "CW" : "CCW");
gl_enables &= ~(MALI_GL_FRONT_FACE(1));
gl_enables &= ~(MALI_FRONT_FACE(1));
}
panwrap_log_decoded_flags(gl_enable_flag_info, gl_enables);
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment