From 7e070e51a7dfeada2029a5031a136f85b7e90728 Mon Sep 17 00:00:00 2001 From: Alyssa Rosenzweig <alyssa.rosenzweig@collabora.com> Date: Sat, 8 Jan 2022 20:55:06 -0500 Subject: [PATCH] WIP: dummy job workaround --- drivers/gpu/drm/panfrost/Makefile | 1 + drivers/gpu/drm/panfrost/panfrost_drv.c | 6 + drivers/gpu/drm/panfrost/panfrost_dummy_job.c | 285 ++++++++++++++++++ drivers/gpu/drm/panfrost/panfrost_dummy_job.h | 10 + 4 files changed, 302 insertions(+) create mode 100644 drivers/gpu/drm/panfrost/panfrost_dummy_job.c create mode 100644 drivers/gpu/drm/panfrost/panfrost_dummy_job.h diff --git a/drivers/gpu/drm/panfrost/Makefile b/drivers/gpu/drm/panfrost/Makefile index b719358624179..3112696672ed4 100644 --- a/drivers/gpu/drm/panfrost/Makefile +++ b/drivers/gpu/drm/panfrost/Makefile @@ -4,6 +4,7 @@ panfrost-y := \ panfrost_drv.o \ panfrost_device.o \ panfrost_devfreq.o \ + panfrost_dummy_job.o \ panfrost_gem.o \ panfrost_gem_shrinker.o \ panfrost_gpu.o \ diff --git a/drivers/gpu/drm/panfrost/panfrost_drv.c b/drivers/gpu/drm/panfrost/panfrost_drv.c index 44172c4fdd6aa..9b9f2b7806cc1 100644 --- a/drivers/gpu/drm/panfrost/panfrost_drv.c +++ b/drivers/gpu/drm/panfrost/panfrost_drv.c @@ -19,6 +19,7 @@ #include "panfrost_job.h" #include "panfrost_gpu.h" #include "panfrost_perfcnt.h" +#include "panfrost_dummy_job.h" static bool unstable_ioctls; module_param_unsafe(unstable_ioctls, bool, 0600); @@ -580,6 +581,11 @@ static int panfrost_probe(struct platform_device *pdev) goto err_out0; } + /* XXX is this too late? */ + err = panfrost_run_dummy_job(pfdev); + if (err) + goto err_out1; + pm_runtime_set_active(pfdev->dev); pm_runtime_mark_last_busy(pfdev->dev); pm_runtime_enable(pfdev->dev); diff --git a/drivers/gpu/drm/panfrost/panfrost_dummy_job.c b/drivers/gpu/drm/panfrost/panfrost_dummy_job.c new file mode 100644 index 0000000000000..07d726ed92be9 --- /dev/null +++ b/drivers/gpu/drm/panfrost/panfrost_dummy_job.c @@ -0,0 +1,285 @@ +// SPDX-License-Identifier: GPL-2.0 +/* Copyright 2022 Collabora Ltd */ + +#include <drm/drm_file.h> +#include <drm/drm_gem_shmem_helper.h> +#include <drm/drm_device.h> +#include <drm/drm_drv.h> +#include <drm/drm_client.h> +#include <drm/panfrost_drm.h> +#include <linux/iopoll.h> + +#include "panfrost_dummy_job.h" +#include "panfrost_gem.h" +#include "panfrost_mmu.h" +#include "panfrost_regs.h" + +#define job_write(dev, reg, data) writel(data, dev->iomem + (reg)) +#define job_read(dev, reg) readl(dev->iomem + (reg)) + +#define MALI_JOB_TYPE_NULL 1 +#define MALI_JOB_TYPE_WRITE_VALUE 2 +#define MALI_JOB_TYPE_FRAGMENT 9 + +struct mali_job_header { + u32 padding_1[4]; + + u8 size : 1; + u8 type : 7; + u8 padding_2 : 8; + u16 index : 16; + + u32 padding_3[3]; +} __attribute__((packed)); + +struct mali_write_value_job { + struct mali_job_header header; + u64 address; + uint32_t write_value_type; + uint32_t padding; + u64 immediate; +} __attribute__((packed)); + +/* + * 00000000 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 |................| + 00000010 12 00 01 00 00 00 00 00 00 00 00 00 00 00 00 00 |................| + 00000020 00 00 00 00 3f 00 01 00 81 00 00 00 01 00 00 00 |....?...........| + 00000030 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 |................| + * + 000000a0 ff 03 1f 00 00 00 00 00 ff 03 1f 00 00 d0 00 10 |................| + 000000b0 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 |................| + * + 00000100 00 00 00 04 98 00 88 06 00 00 00 00 00 00 00 00 |................| + 00000110 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 |................| +*/ +struct mali_render_target { + u32 padding_1 : 26; + u32 internal_format : 6; + u32 cfg; +} __attribute__((packed)); + +struct mali_framebuffer_descriptor { + u32 padding_1[8]; + u16 width; + u16 height; + u16 bound_min_x; + u16 bound_min_y; + u16 bound_max_x; + u16 bound_max_y; + u32 cfg; + u32 padding_2[20]; + struct mali_render_target rt; +} __attribute__((packed)); + +struct mali_fragment_job { + struct mali_job_header header; + + uint16_t bound_min_x; + uint16_t bound_min_y; + uint16_t bound_max_x; + uint16_t bound_max_y; + + uint64_t framebuffer_descriptor; +} __attribute__((packed)); + +static void +panfrost_emit_dummy_job(void *buffer, uint64_t gpu_va) +{ + memset(buffer, 0, 4096); + +#if 0 +#if 1 + struct mali_write_value_job job = { + .header.type = MALI_JOB_TYPE_WRITE_VALUE, + .header.index = 1, + .address = gpu_va + 0x200, + .write_value_type = 6, + .immediate = 0xCAFEBABE + }; +#else + struct mali_job_header job = { + .type = MALI_JOB_TYPE_NULL, + .index = 1, + }; +#endif +#endif + + struct mali_fragment_job job = { + .header.type = MALI_JOB_TYPE_FRAGMENT, + .header.index = 1, + .bound_max_x = 64 - 1, + .bound_max_y = 2 - 1, + .framebuffer_descriptor = (gpu_va + 0x80) | 1 + }; + + struct mali_framebuffer_descriptor fbd = { + .width = 1024 - 1, + .height = 32 - 1, + .bound_max_x = 1024 - 1, + .bound_max_y = 32 - 1, + .cfg = 0x1000d000, + .rt.internal_format = 1, // RGBA8 + .rt.cfg = 0x06880098, + }; + + memcpy(buffer, &job, sizeof(job)); + memcpy(buffer + 0x80, &fbd, sizeof(fbd)); + + print_hex_dump(KERN_WARNING, "job", DUMP_PREFIX_OFFSET, 16, 1, buffer, 128, + false); +} + +int +panfrost_run_existing_dummy_job(struct panfrost_device *pfdev, + int js, int as, + u64 jc, u64 affinity) +{ + int ret; + u32 js_state; + u32 saved_job_mask, saved_gpu_mask; + u32 interrupt; + u32 state; + + u32 cfg = as | + JS_CONFIG_THREAD_PRI(8) | + JS_CONFIG_START_FLUSH_CLEAN_INVALIDATE | + JS_CONFIG_END_FLUSH_CLEAN_INVALIDATE; + + printk("Powering off..\n"); + job_write(pfdev, SHADER_PWROFF_LO, lower_32_bits(affinity)); + job_write(pfdev, SHADER_PWROFF_HI, upper_32_bits(affinity)); + + readl_poll_timeout(pfdev->iomem + SHADER_READY_LO, state, + state == 0, 10, 500 * 1000); + + printk("Powering on..\n"); + job_write(pfdev, SHADER_PWRON_LO, lower_32_bits(affinity)); + job_write(pfdev, SHADER_PWRON_HI, upper_32_bits(affinity)); + + readl_poll_timeout(pfdev->iomem + SHADER_READY_LO, state, + state == affinity, 10, 500 * 1000); + + if (job_read(pfdev, SHADER_READY_LO) != affinity) { + dev_err(pfdev->dev, "Shader cores didn't come up during dummy workaround\n"); + return -EFAULT; + } + + printk("JS0 features %X\n", pfdev->features.js_features[0]); + printk("JS1 features %X\n", pfdev->features.js_features[1]); + printk("JS2 features %X\n", pfdev->features.js_features[2]); + + printk("js_head %X\n", job_read(pfdev, JS_COMMAND_NEXT(js))); + + /* Mask out IRQs so we don't try to dequeue the dummy job when it finishes */ + saved_gpu_mask = job_read(pfdev, GPU_INT_MASK); + saved_job_mask = job_read(pfdev, JOB_INT_MASK); + + job_write(pfdev, JOB_INT_MASK, 0); + job_write(pfdev, GPU_INT_MASK, 0); + + printk("dummy job jc=%llx, affinity=%llx, as=%d, js=%d\n", + jc, affinity, as, js); + + job_write(pfdev, JS_HEAD_NEXT_LO(js), lower_32_bits(jc)); + job_write(pfdev, JS_HEAD_NEXT_HI(js), upper_32_bits(jc)); + + job_write(pfdev, JS_AFFINITY_NEXT_LO(js), lower_32_bits(affinity)); + job_write(pfdev, JS_AFFINITY_NEXT_HI(js), upper_32_bits(affinity)); + + job_write(pfdev, JS_CONFIG_NEXT(js), cfg); + job_write(pfdev, JS_COMMAND_NEXT(js), JS_COMMAND_START); + + ret = readl_poll_timeout(pfdev->iomem + JOB_INT_RAWSTAT, js_state, + js_state & MK_JS_MASK(js), 10, 500 * 1000); + + interrupt = job_read(pfdev, JOB_INT_RAWSTAT) & MK_JS_MASK(js); + + if (ret) + dev_err(pfdev->dev, "Dummy job hung\n"); + + if (interrupt != JOB_INT_MASK_DONE(js)) { + uint32_t js_status = job_read(pfdev, JS_STATUS(js)); + printk("Interrupt %X\n", interrupt); + dev_err(pfdev->dev, "Dummy job failed, status=%X\n", js_status); + dev_err(pfdev->dev, "fault status=%X\n", + job_read(pfdev, GPU_FAULT_STATUS)); + dev_err(pfdev->dev, "fault address=%X%X\n", + job_read(pfdev, GPU_FAULT_ADDRESS_HI), + job_read(pfdev, GPU_FAULT_ADDRESS_LO)); + } + + printk("Dummy jobbed\n"); + job_write(pfdev, JOB_INT_CLEAR, interrupt); + + /* Restore IRQs */ + job_write(pfdev, GPU_INT_MASK, saved_gpu_mask); + job_write(pfdev, JOB_INT_MASK, saved_job_mask); + return 0; +} + +int +panfrost_run_dummy_job(struct panfrost_device *pfdev) +{ + const struct drm_driver *driver = pfdev->ddev->driver; + struct drm_gem_shmem_object *bo; + struct panfrost_gem_mapping *mapping; + struct drm_file *file; + size_t bosize = SZ_4K; + struct dma_buf_map map; + u64 gpu_va; + int ret; + int js = 0, as; + u64 affinity; + + struct drm_client_dev client = { + .dev = pfdev->ddev + }; + + printk("working around\n"); + ret = drm_client_open(&client); + if (ret) + return ret; + + file = client.file; + + ret = driver->open(pfdev->ddev, file); + if (ret) + return ret; + + /* Hunk of code from panfrost_perfcnt */ + bo = drm_gem_shmem_create(pfdev->ddev, bosize); + if (IS_ERR(bo)) + return PTR_ERR(bo); + + /* Map the buf in the address space attached to file_priv. */ + ret = panfrost_gem_open(&bo->base, file); + if (ret) + return ret; + + mapping = panfrost_gem_mapping_get(to_panfrost_bo(&bo->base), file->driver_priv); + if (!mapping) + return -EINVAL; + + ret = drm_gem_shmem_vmap(bo, &map); + if (ret) + return ret; + + gpu_va = mapping->mmnode.start << PAGE_SHIFT; + panfrost_emit_dummy_job(map.vaddr, gpu_va); + drm_gem_shmem_vunmap(bo, &map); + + /* Submit it */ + as = panfrost_mmu_as_get(pfdev, mapping->mmu); + affinity = pfdev->features.shader_present; + panfrost_run_existing_dummy_job(pfdev, js, as, gpu_va, affinity); + + /* Closing */ + panfrost_gem_mapping_put(mapping); + panfrost_gem_close(&bo->base, file); + drm_gem_object_put(&bo->base); + + pfdev->ran_dummy_job = true; + + drm_client_close(&client); + return 0; +} diff --git a/drivers/gpu/drm/panfrost/panfrost_dummy_job.h b/drivers/gpu/drm/panfrost/panfrost_dummy_job.h new file mode 100644 index 0000000000000..7a66e6f4f7ad8 --- /dev/null +++ b/drivers/gpu/drm/panfrost/panfrost_dummy_job.h @@ -0,0 +1,10 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +/* Copyright 2022 Collabora Ltd */ +#ifndef __PANFROST_DUMMY_JOB_H__ +#define __PANFROST_DUMMY_JOB_H__ + +#include "panfrost_device.h" + +int panfrost_run_dummy_job(struct panfrost_device *pfdev); + +#endif -- GitLab