Commit 82537009 authored by Adrian Larumbe's avatar Adrian Larumbe Committed by Adrian Larumbe
Browse files

panfrost: Add userspace crash dump decoder and analyser



Introduces a binary dump analyser and BO decoder whose format was partially
borrowed from etnaviv's kernel driver. It leverages the pandecode library to
analyse BO's attached to the GPU job that caused the crash.

Binary dump should be generated by a component of Panfrost's kernel driver.
Signed-off-by: Adrian Larumbe's avatarAdrián Larumbe <adrian.larumbe@collabora.com>
parent 9c722a06
Pipeline #467624 waiting for manual action with stages
in 13 seconds
......@@ -224,6 +224,37 @@ struct drm_panfrost_madvise {
__u32 retained; /* out, whether backing store still exists */
};
/* Definitions for coredump decoding in user space */
enum panfrost_dump_object_header_type {
PANFROSTDUMP_BUF_REG,
PANFROSTDUMP_BUF_BOMAP,
PANFROSTDUMP_BUF_BO,
PANFROSTDUMP_BUF_END,
};
#define PANFROSTDUMP_VERSION_1 1
#define PANFROSTDUMP_MAGIC 0xCAFECAFE
struct panfrost_dump_object_header {
__le32 magic;
__le32 type;
__le64 version;
__le64 nbos;
__le64 jc;
__le32 file_offset;
__le32 file_size;
__le64 iova;
__le32 gpu_id;
__le32 valid;
__le32 data[2];
};
/* Registers object, an array of these */
struct panfrost_dump_registers {
__le32 reg;
__le32 value;
};
#if defined(__cplusplus)
}
#endif
......
......@@ -35,6 +35,7 @@ subdir('bifrost')
if with_gallium_panfrost or with_panfrost_vk
subdir('lib')
subdir('perf')
subdir('tools')
endif
files_bifrost = files(
......
# Copyright © 2021 Collabora
# Permission is hereby granted, free of charge, to any person obtaining a copy
# of this software and associated documentation files (the "Software"), to deal
# in the Software without restriction, including without limitation the rights
# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
# copies of the Software, and to permit persons to whom the Software is
# furnished to do so, subject to the following conditions:
# The above copyright notice and this permission notice shall be included in
# all copies or substantial portions of the Software.
# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
# SOFTWARE.
coredumpdec = executable(
'panfrostdump',
files('panfrostdump.c'),
c_args : [c_msvc_compat_args, no_override_init_args],
gnu_symbol_visibility : 'hidden',
include_directories : [inc_include, inc_src, inc_mesa],
dependencies: [libpanfrost_dep],
build_by_default : with_tools.contains('panfrost'),
install: true
)
/*
* Copyright (C) 2021 Collabora, Ltd.
*
* Permission is hereby granted, free of charge, to any person obtaining a
* copy of this software and associated documentation files (the "Software"),
* to deal in the Software without restriction, including without limitation
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
* and/or sell copies of the Software, and to permit persons to whom the
* Software is furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice (including the next
* paragraph) shall be included in all copies or substantial portions of the
* Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
* SOFTWARE.
*
*/
/*
* Debug dump analyser for panfrost. In case of a gpu crash/hang,
* the coredump should be found in:
*
* /sys/class/devcoredump/devcd<n>/data
*
* The crashdump will hang around for 5min, it can be cleared by writing to
* the file, ie:
*
* echo 1 > /sys/class/devcoredump/devcd<n>/data
*
* (the driver won't log any new crashdumps until the previous one is cleared
* or times out after 5min)
*/
#include <stdbool.h>
#include <string.h>
#include <unistd.h>
#include <stdio.h>
#include <stdlib.h>
#include <errno.h>
#include <stdint.h>
#include <endian.h>
#include "decode.h"
#include <drm-uapi/panfrost_drm.h>
#define MAX_BODUMP_FILENAME 32
#define PAGE_SIZE 4096
static int
read_header(FILE * fp, struct panfrost_dump_object_header *pdoh)
{
/* Fields in the coredump file header structures
* are found in little-endian order
*/
struct panfrost_dump_object_header doh_le;
size_t nr;
nr = fread(&doh_le, 1, sizeof(struct panfrost_dump_object_header), fp);
if (nr < sizeof(struct panfrost_dump_object_header)) {
fprintf(stderr, "Wrong header read\n");
return 0;
}
/* Convert from little-endian to host byte order */
pdoh->magic = le32toh(doh_le.magic);
if (pdoh->magic != PANFROSTDUMP_MAGIC) {
fprintf(stderr, "Wrong header magic\n");
return 0;
}
pdoh->type = le32toh(doh_le.type);
pdoh->jc = le64toh(doh_le.jc);
pdoh->version = le64toh(doh_le.version);
pdoh->nbos = le64toh(doh_le.nbos);
pdoh->file_offset = le32toh(doh_le.file_offset);
pdoh->file_size = le32toh(doh_le.file_size);
pdoh->iova = le64toh(doh_le.iova);
pdoh->gpu_id = le32toh(doh_le.gpu_id);
pdoh->valid = le32toh(doh_le.valid);
pdoh->data[0] = le32toh(doh_le.data[0]);
pdoh->data[1] = le32toh(doh_le.data[1]);
return 1;
}
static int
read_register(uint32_t * ro, uint32_t * rv, FILE * fp)
{
/* Register pair we read form memory is
* laid out in little-endian order
*/
struct panfrost_dump_registers reg_le;
size_t nr;
nr = fread(&reg_le, 1, sizeof(reg_le), fp);
if (nr < sizeof(reg_le)) {
fprintf(stderr, "Wrong register read\n");
return 0;
}
*ro = le32toh(reg_le.reg);
*rv = le32toh(reg_le.value);
return 1;
}
static int
read_page_addr(uint64_t * phys_page, FILE * fp)
{
uint64_t phys_addr_le;
size_t nr;
nr = fread(&phys_addr_le, 1, sizeof(uint64_t), fp);
if (nr < sizeof(uint64_t)) {
fprintf(stderr, "Wrong page address read\n");
/* Skip over to the next address */
if (fseek(fp, sizeof(uint64_t) - nr, SEEK_CUR)) {
perror("fseek error");
return 0;
}
return 0;
}
*phys_page = le64toh(phys_addr_le);
return 1;
}
static FILE *hdr_fp;
static FILE *data_fp;
static char **bos;
static uint32_t bo_num;
static void
cleanup(void)
{
if (hdr_fp != NULL)
fclose(hdr_fp);
if (data_fp != NULL)
fclose(data_fp);
if (bos != NULL) {
for (int k = 0; k < bo_num; k++) {
if (bos[k] != NULL)
free(bos[k]);
}
free(bos);
}
}
int
main(int argc, const char **argv)
{
struct panfrost_dump_object_header doh;
uint32_t gpu_id;
uint64_t jc;
size_t nbytes;
int i, j, k;
if ((argc != 2) || !strcmp(argv[1], "-h") || !strcmp(argv[1], "--help")) {
fprintf(stderr, "Usage: %s coredump_file\n", argv[0]);
return EXIT_FAILURE;
}
i = j = k = 0;
atexit(cleanup);
pandecode_initialize(false);
hdr_fp = fopen(argv[1], "r");
if (!hdr_fp) {
perror("failed to open file");
return EXIT_FAILURE;
}
data_fp = fopen(argv[1], "r");
if (!data_fp) {
perror("failed to open file");
return EXIT_FAILURE;
}
/* Read register header */
if (!read_header(hdr_fp, &doh))
return EXIT_FAILURE;
if (fseek(data_fp, doh.file_offset, SEEK_SET)) {
perror("fseek error");
return EXIT_FAILURE;
}
if (doh.type == PANFROSTDUMP_BUF_REG) {
jc = doh.jc;
gpu_id = doh.gpu_id;
bo_num = doh.nbos;
bos = calloc(sizeof(char *), bo_num);
if (!bos) {
fprintf(stderr, "Failed to allocate memory for BO pointer array\n");
return EXIT_FAILURE;
}
printf("JC: %#lX, GPU_ID: %#X\n", jc, gpu_id);
puts("GPU registers:");
for (i = 0;
i < (doh.file_size / sizeof(struct panfrost_dump_registers));
i++) {
uint32_t reg_offset;
uint32_t reg_val;
if (read_register(&reg_offset, &reg_val, data_fp))
printf("0x%04X : 0x%08X\n", reg_offset, reg_val);
}
}
if (!read_header(hdr_fp, &doh))
return EXIT_FAILURE;
if (doh.type == PANFROSTDUMP_BUF_BOMAP) {
uint32_t bomap_offset = doh.file_offset;
if (!read_header(hdr_fp, &doh))
return EXIT_FAILURE;
while (doh.type != PANFROSTDUMP_BUF_END) {
if (doh.valid) {
if (fseek(data_fp, bomap_offset + doh.data[0], SEEK_SET)) {
perror("fseek error");
return EXIT_FAILURE;
}
printf("BO(%u) VA(%#llx) SZ(%#x) page addresses:\n",
j, doh.iova, doh.file_size);
for (k = 0; k < (doh.file_size / PAGE_SIZE); k++) {
uint64_t phys_addr;
if (!read_page_addr(&phys_addr, data_fp))
continue;
printf("%u: %#lX\n", k, phys_addr);
}
/* Copy the BO into external file */
char bodump_filename[MAX_BODUMP_FILENAME];
FILE *bodump;
snprintf(bodump_filename, MAX_BODUMP_FILENAME - 1,
"bodump-%u.dump", j);
if ((bodump = fopen(bodump_filename, "wb"))) {
if (fseek(data_fp, doh.file_offset, SEEK_SET)) {
perror("fseek error");
return EXIT_FAILURE;
}
bos[j] = malloc(doh.file_size);
if (!bos[j]) {
fprintf(stderr, "Failed to allocate memory for BO\n");
return EXIT_FAILURE;
}
fseek(data_fp, doh.file_offset, SEEK_SET);
nbytes = fread(bos[j], 1, doh.file_size, data_fp);
if (nbytes < doh.file_size) {
fprintf(stderr, "Read less than BO size: %u\n", errno);
return EXIT_FAILURE;
}
nbytes = fwrite(bos[j], 1, doh.file_size, bodump);
if (nbytes < doh.file_size) {
fprintf(stderr,
"Failed to write BO contents into file: %u\n",
errno);
return EXIT_FAILURE;
}
fclose(bodump);
pandecode_inject_mmap(doh.iova, bos[j], doh.file_size, NULL);
}
else
perror("failed to open BO dump file");
}
else
fprintf(stderr, "BO(%u) isn't valid\n", j);
if (!read_header(hdr_fp, &doh))
return EXIT_FAILURE;
j++;
}
}
else {
if (!read_header(hdr_fp, &doh))
return EXIT_FAILURE;
}
if (doh.type != PANFROSTDUMP_BUF_END)
fprintf(stderr, "Trailing header isn't right\n");
pandecode_jc(jc, gpu_id);
pandecode_close();
fclose(data_fp);
fclose(hdr_fp);
data_fp = hdr_fp = NULL;
return EXIT_SUCCESS;
}
Supports Markdown
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment