Commit fc7bcee8 authored by Alyssa Rosenzweig's avatar Alyssa Rosenzweig 💜

panfrost: Replace pantrace with direct decoding

History lesson! In the early days of a Panfrost, we had a library
independent of the driver called `panwrap` which would be LD_PRELOAD'ed
into a driver to decode its cmdstream in real-time. When upstreaming
Panfrost, we realized that we would much rather have this decode
functionality maintained in-tree to avoid divergence, but that we could
not upstream panwrap because of its use with the legacy API. So we
instead dumped GPU memory to the filesystem with an out-of-tree panwrap,
and decoded that with the in-tree pandecode module. When we migrated to
the new kernel, we just added support for doing this memory dump
directly from the driver (via a module "pantrace").

This works, but dumping memory every frame is sloooooooooooooow and
error-prone. I figured if we have pandecode in-tree, we might as well
link to it directly in the driver, allowing us to decode Panfrost's
command streams without dumping memory to the filesystem first. This
cleans up the code *substantially* and improves dumping performance by a
HUGE margin. I'm talking "several seconds per frame" to "dumping in
real-time" kind of jump.

Note to users: this removes the environmental option "PANTRACE_BASE".
Instead, for equivalent functionality set "PAN_MESA_DEBUG=trace" and
redirect stdout to the file of your choosing.

This should be debugging Panfrost much more pleasant.
Signed-off-by: Alyssa Rosenzweig's avatarAlyssa Rosenzweig <alyssa.rosenzweig@collabora.com>
parent 845ec857
......@@ -42,11 +42,13 @@ files_panfrost = files(
'bifrost/disassemble.c',
'pandecode/common.c',
'pandecode/decode.c',
'pan_context.c',
'pan_afbc.c',
'pan_blit.c',
'pan_job.c',
'pan_trace.c',
'pan_drm.c',
'pan_allocate.c',
'pan_assemble.c',
......@@ -135,6 +137,7 @@ bifrost_compiler = executable(
files_pandecode = files(
'pandecode/cmdline.c',
'pandecode/common.c',
'pandecode/decode.c',
'pan_pretty_print.c',
......
......@@ -36,7 +36,8 @@
#include "pan_resource.h"
#include "pan_context.h"
#include "pan_drm.h"
#include "pan_trace.h"
#include "pan_util.h"
#include "pandecode/decode.h"
struct panfrost_drm {
struct panfrost_driver base;
......@@ -91,8 +92,8 @@ panfrost_drm_allocate_slab(struct panfrost_screen *screen,
}
/* Record the mmap if we're tracing */
if (!(extra_flags & PAN_ALLOCATE_GROWABLE))
pantrace_mmap(mem->gpu, mem->cpu, mem->size, NULL);
if (pan_debug & PAN_DBG_TRACE)
pandecode_inject_mmap(mem->gpu, mem->cpu, mem->size, NULL);
}
static void
......@@ -159,7 +160,8 @@ panfrost_drm_import_bo(struct panfrost_screen *screen, struct winsys_handle *wha
}
/* Record the mmap if we're tracing */
pantrace_mmap(bo->gpu, bo->cpu, bo->size, NULL);
if (pan_debug & PAN_DBG_TRACE)
pandecode_inject_mmap(bo->gpu, bo->cpu, bo->size, NULL);
return bo;
}
......@@ -237,17 +239,15 @@ panfrost_drm_submit_job(struct panfrost_context *ctx, u64 job_desc, int reqs, st
bo_handles[submit.bo_handle_count++] = ctx->misc_0.gem_handle;
submit.bo_handles = (u64) (uintptr_t) bo_handles;
/* Dump memory _before_ submitting so we're not corrupted with actual GPU results */
pantrace_dump_memory();
if (drmIoctl(drm->fd, DRM_IOCTL_PANFROST_SUBMIT, &submit)) {
fprintf(stderr, "Error submitting: %m\n");
return errno;
}
/* Trace the job if we're doing that and do a memory dump. We may
* want to adjust this logic once we're ready to trace FBOs */
pantrace_submit_job(submit.jc, submit.requirements, FALSE);
/* Trace the job if we're doing that */
if (pan_debug & PAN_DBG_TRACE)
pandecode_replay_jc(submit.jc, FALSE);
return 0;
}
......
......@@ -3,6 +3,7 @@
* Copyright 2008 VMware, Inc.
* Copyright 2014 Broadcom
* Copyright 2018 Alyssa Rosenzweig
* Copyright 2019 Collabora
* All Rights Reserved.
*
* Permission is hereby granted, free of charge, to any person obtaining a
......@@ -48,12 +49,14 @@
#include "pan_resource.h"
#include "pan_public.h"
#include "pan_util.h"
#include "pandecode/decode.h"
#include "pan_context.h"
#include "midgard/midgard_compile.h"
static const struct debug_named_value debug_options[] = {
{"msgs", PAN_DBG_MSGS, "Print debug messages"},
{"trace", PAN_DBG_TRACE, "Trace the command stream"},
DEBUG_NAMED_VALUE_END
};
......@@ -584,19 +587,17 @@ panfrost_create_screen(int fd, struct renderonly *ro)
screen->driver = panfrost_create_drm_driver(fd);
/* Dump memory and/or performance counters iff asked for in the environment */
const char *pantrace_base = getenv("PANTRACE_BASE");
/* Dump performance counters iff asked for in the environment */
pan_counters_base = getenv("PANCOUNTERS_BASE");
if (pantrace_base) {
pantrace_initialize(pantrace_base);
}
if (pan_counters_base) {
screen->driver->allocate_slab(screen, &screen->perf_counters, 64, true, 0, 0, 0);
screen->driver->enable_counters(screen);
}
if (pan_debug & PAN_DBG_TRACE)
pandecode_initialize();
screen->base.destroy = panfrost_destroy_screen;
screen->base.get_name = panfrost_get_name;
......
......@@ -35,7 +35,6 @@
#include <panfrost-misc.h>
#include "pan_allocate.h"
#include "pan_trace.h"
struct panfrost_context;
struct panfrost_resource;
......
/*
* Copyright (C) 2019 Alyssa Rosenzweig
*
* Permission is hereby granted, free of charge, to any person obtaining a
* copy of this software and associated documentation files (the "Software"),
* to deal in the Software without restriction, including without limitation
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
* and/or sell copies of the Software, and to permit persons to whom the
* Software is furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice (including the next
* paragraph) shall be included in all copies or substantial portions of the
* Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
* SOFTWARE.
*
*/
#include <stdio.h>
#include <stdlib.h>
#include <stdbool.h>
#include <panfrost-job.h>
#include "pan_trace.h"
#include "util/list.h"
/* The pandecode utility is capable of parsing a command stream trace and
* disassembling any referenced shaders. Traces themselves are glorified memory
* dumps, a directory consisting of .bin's for each memory segment, and a
* simple plain-text description of the interesting kernel activity.
* Historically, these dumps have been produced via panwrap, an LD_PRELOAD shim
* sitting between the driver and the kernel. However, for modern Panfrost, we
* can just produce the dumps ourselves, which is rather less fragile. This
* file (pantrace) implements this functionality. */
static FILE *pan_control_log;
static const char *pan_control_base;
/* Represent the abstraction for a single mmap chunk */
static unsigned pantrace_memory_count = 0;
struct pantrace_memory {
struct list_head node;
mali_ptr gpu;
void *cpu;
size_t sz;
char *full_filename;
};
static struct pantrace_memory mmaps;
void
pantrace_initialize(const char *base)
{
/* Open the control.log */
char fn[128];
snprintf(fn, 128, "%s/control.log", base);
pan_control_log = fopen(fn, "w+");
assert(pan_control_log);
/* Save the base for later */
pan_control_base = base;
/* Initialize the mmap list */
list_inithead(&mmaps.node);
}
static bool
pantrace_is_initialized(void)
{
return pan_control_log && pan_control_base;
}
/* Traces a submitted job with a given job chain, core requirements, and
* platform */
void
pantrace_submit_job(mali_ptr jc, unsigned core_req, unsigned is_bifrost)
{
if (!pantrace_is_initialized())
return;
fprintf(pan_control_log, "JS %" PRIx64 " %x %x\n",
jc, core_req, is_bifrost);
fflush(pan_control_log);
}
/* Dumps a given mapped memory buffer with the given label. If no label
* is given (label == NULL), one is created */
void
pantrace_mmap(mali_ptr gpu, void *cpu, size_t sz, char *label)
{
if (!pantrace_is_initialized())
return;
char *filename = NULL;
char *full_filename = NULL;
/* Create a filename based on the label or count */
if (label) {
asprintf(&filename, "%s.bin", label);
} else {
asprintf(&filename, "memory_%d.bin", pantrace_memory_count++);
}
/* Emit an mmap for it */
fprintf(pan_control_log, "MMAP %" PRIx64 " %s\n", gpu, filename);
fflush(pan_control_log);
/* Dump the memory itself */
asprintf(&full_filename, "%s/%s", pan_control_base, filename);
free(filename);
struct pantrace_memory *mem = malloc(sizeof(*mem));
list_inithead(&mem->node);
mem->gpu = gpu;
mem->cpu = cpu;
mem->sz = sz;
mem->full_filename = full_filename;
list_add(&mem->node, &mmaps.node);
}
/* Dump all memory at once, once everything has been written */
void
pantrace_dump_memory(void)
{
if (!pantrace_is_initialized())
return;
list_for_each_entry(struct pantrace_memory, pos, &mmaps.node, node) {
/* Save the mapping */
FILE *fp = fopen(pos->full_filename, "wb");
fwrite(pos->cpu, 1, pos->sz, fp);
fclose(fp);
}
}
......@@ -29,6 +29,7 @@
#define PAN_UTIL_H
#define PAN_DBG_MSGS 0x0001
#define PAN_DBG_TRACE 0x0002
extern int pan_debug;
......
/*
* Copyright (C) 2019 Alyssa Rosenzweig
* Copyright (C) 2017-2018 Lyude Paul
*
* Permission is hereby granted, free of charge, to any person obtaining a
* copy of this software and associated documentation files (the "Software"),
......@@ -28,45 +27,7 @@
#include <stdint.h>
#include <string.h>
#include "mmap.h"
int pandecode_replay_jc(mali_ptr jc_gpu_va, bool bifrost);
/* Memory handling */
static struct pandecode_mapped_memory mmaps;
struct pandecode_mapped_memory *
pandecode_find_mapped_gpu_mem_containing(mali_ptr addr)
{
list_for_each_entry(struct pandecode_mapped_memory, pos, &mmaps.node, node) {
if (addr >= pos->gpu_va && addr < pos->gpu_va + pos->length)
return pos;
}
return NULL;
}
char *
pointer_as_memory_reference(mali_ptr ptr)
{
struct pandecode_mapped_memory *mapped;
char *out = malloc(128);
/* Try to find the corresponding mapped zone */
mapped = pandecode_find_mapped_gpu_mem_containing(ptr);
if (mapped) {
snprintf(out, 128, "%s + %d", mapped->name, (int) (ptr - mapped->gpu_va));
return out;
}
/* Just use the raw address if other options are exhausted */
snprintf(out, 128, MALI_PTR_FMT, ptr);
return out;
}
#include "decode.h"
/* Parsing */
......@@ -101,21 +62,7 @@ pandecode_read_memory(const char *base, const char *name, mali_ptr gpu_va)
fread(buf, 1, sz, fp);
fclose(fp);
/* Now that we have the memory loaded in, create a mmap entry for it so
* we remember it later */
struct pandecode_mapped_memory *mapped_mem = NULL;
mapped_mem = malloc(sizeof(*mapped_mem));
list_inithead(&mapped_mem->node);
mapped_mem->gpu_va = gpu_va;
mapped_mem->length = sz;
mapped_mem->addr = buf;
memcpy(mapped_mem->name, name, strlen(name));
list_add(&mapped_mem->node, &mmaps.node);
pandecode_inject_mmap(gpu_va, buf, sz, name);
}
static void
......@@ -141,6 +88,8 @@ pandecode_read_job_submit(const char *base, const char *line)
pandecode_replay_jc(addr, is_bifrost);
}
/* Reads the control file, processing as it goes. */
static void
......@@ -180,10 +129,7 @@ main(int argc, char **argv)
fprintf(stderr, "Usage: pandecode [directory]\n");
exit(1);
}
/* Initialize */
list_inithead(&mmaps.node);
/* Let's go! */
pandecode_initialize();
pandecode_read_control(argv[1]);
}
/*
* Copyright (C) 2019 Alyssa Rosenzweig
* Copyright (C) 2017-2018 Lyude Paul
* Copyright (C) 2019 Collabora
*
* Permission is hereby granted, free of charge, to any person obtaining a
* copy of this software and associated documentation files (the "Software"),
......@@ -19,15 +21,82 @@
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
* SOFTWARE.
*
*/
#ifndef __PAN_TRACE_H__
#define __PAN_TRACE_H__
#include <stdio.h>
#include <stdlib.h>
#include <assert.h>
#include <stdint.h>
#include <string.h>
#include "decode.h"
#include "util/macros.h"
/* Memory handling */
static struct pandecode_mapped_memory mmaps;
struct pandecode_mapped_memory *
pandecode_find_mapped_gpu_mem_containing(mali_ptr addr)
{
list_for_each_entry(struct pandecode_mapped_memory, pos, &mmaps.node, node) {
if (addr >= pos->gpu_va && addr < pos->gpu_va + pos->length)
return pos;
}
return NULL;
}
void
pandecode_inject_mmap(mali_ptr gpu_va, void *cpu, unsigned sz, const char *name)
{
struct pandecode_mapped_memory *mapped_mem = NULL;
mapped_mem = malloc(sizeof(*mapped_mem));
list_inithead(&mapped_mem->node);
mapped_mem->gpu_va = gpu_va;
mapped_mem->length = sz;
mapped_mem->addr = cpu;
if (!name) {
/* If we don't have a name, assign one */
snprintf(mapped_mem->name, ARRAY_SIZE(mapped_mem->name) - 1,
"memory_%" PRIx64, gpu_va);
} else {
assert(strlen(name) < ARRAY_SIZE(mapped_mem->name));
memcpy(mapped_mem->name, name, strlen(name));
}
list_add(&mapped_mem->node, &mmaps.node);
}
char *
pointer_as_memory_reference(mali_ptr ptr)
{
struct pandecode_mapped_memory *mapped;
char *out = malloc(128);
/* Try to find the corresponding mapped zone */
mapped = pandecode_find_mapped_gpu_mem_containing(ptr);
if (mapped) {
snprintf(out, 128, "%s + %d", mapped->name, (int) (ptr - mapped->gpu_va));
return out;
}
/* Just use the raw address if other options are exhausted */
snprintf(out, 128, MALI_PTR_FMT, ptr);
return out;
}
void pantrace_initialize(const char *base);
void pantrace_submit_job(mali_ptr jc, unsigned core_req, unsigned is_bifrost);
void pantrace_mmap(mali_ptr gpu, void *cpu, size_t sz, char *label);
void pantrace_dump_memory(void);
void
pandecode_initialize(void)
{
list_inithead(&mmaps.node);
#endif
}
......@@ -28,7 +28,7 @@
#include <memory.h>
#include <stdbool.h>
#include <stdarg.h>
#include "mmap.h"
#include "decode.h"
#include "util/u_math.h"
#include "../pan_pretty_print.h"
......
......@@ -23,8 +23,8 @@
*
*/
#ifndef __MMAP_TRACE_H__
#define __MMAP_TRACE_H__
#ifndef __PAN_DECODE_H__
#define __PAN_DECODE_H__
#include <stdlib.h>
#include <stddef.h>
......@@ -42,10 +42,15 @@ struct pandecode_mapped_memory {
char name[32];
};
void pandecode_initialize(void);
char *pointer_as_memory_reference(mali_ptr ptr);
struct pandecode_mapped_memory *pandecode_find_mapped_gpu_mem_containing(mali_ptr addr);
void
pandecode_inject_mmap(mali_ptr gpu_va, void *cpu, unsigned sz, const char *name);
static inline void *
__pandecode_fetch_gpu_mem(const struct pandecode_mapped_memory *mem,
mali_ptr gpu_va, size_t size,
......@@ -75,4 +80,7 @@ __pandecode_fetch_gpu_mem(const struct pandecode_mapped_memory *mem,
name = __pandecode_fetch_gpu_mem(mem, gpu_va, sizeof(*name), \
__LINE__, __FILE__)
/* Common entrypoint */
int pandecode_replay_jc(mali_ptr jc_gpu_va, bool bifrost);
#endif /* __MMAP_TRACE_H__ */
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment