From 7e070e51a7dfeada2029a5031a136f85b7e90728 Mon Sep 17 00:00:00 2001
From: Alyssa Rosenzweig <alyssa.rosenzweig@collabora.com>
Date: Sat, 8 Jan 2022 20:55:06 -0500
Subject: [PATCH] WIP: dummy job workaround

---
 drivers/gpu/drm/panfrost/Makefile             |   1 +
 drivers/gpu/drm/panfrost/panfrost_drv.c       |   6 +
 drivers/gpu/drm/panfrost/panfrost_dummy_job.c | 285 ++++++++++++++++++
 drivers/gpu/drm/panfrost/panfrost_dummy_job.h |  10 +
 4 files changed, 302 insertions(+)
 create mode 100644 drivers/gpu/drm/panfrost/panfrost_dummy_job.c
 create mode 100644 drivers/gpu/drm/panfrost/panfrost_dummy_job.h

diff --git a/drivers/gpu/drm/panfrost/Makefile b/drivers/gpu/drm/panfrost/Makefile
index b719358624179..3112696672ed4 100644
--- a/drivers/gpu/drm/panfrost/Makefile
+++ b/drivers/gpu/drm/panfrost/Makefile
@@ -4,6 +4,7 @@ panfrost-y := \
 	panfrost_drv.o \
 	panfrost_device.o \
 	panfrost_devfreq.o \
+	panfrost_dummy_job.o \
 	panfrost_gem.o \
 	panfrost_gem_shrinker.o \
 	panfrost_gpu.o \
diff --git a/drivers/gpu/drm/panfrost/panfrost_drv.c b/drivers/gpu/drm/panfrost/panfrost_drv.c
index 44172c4fdd6aa..9b9f2b7806cc1 100644
--- a/drivers/gpu/drm/panfrost/panfrost_drv.c
+++ b/drivers/gpu/drm/panfrost/panfrost_drv.c
@@ -19,6 +19,7 @@
 #include "panfrost_job.h"
 #include "panfrost_gpu.h"
 #include "panfrost_perfcnt.h"
+#include "panfrost_dummy_job.h"
 
 static bool unstable_ioctls;
 module_param_unsafe(unstable_ioctls, bool, 0600);
@@ -580,6 +581,11 @@ static int panfrost_probe(struct platform_device *pdev)
 		goto err_out0;
 	}
 
+	/* XXX is this too late? */
+	err = panfrost_run_dummy_job(pfdev);
+	if (err)
+		goto err_out1;
+
 	pm_runtime_set_active(pfdev->dev);
 	pm_runtime_mark_last_busy(pfdev->dev);
 	pm_runtime_enable(pfdev->dev);
diff --git a/drivers/gpu/drm/panfrost/panfrost_dummy_job.c b/drivers/gpu/drm/panfrost/panfrost_dummy_job.c
new file mode 100644
index 0000000000000..07d726ed92be9
--- /dev/null
+++ b/drivers/gpu/drm/panfrost/panfrost_dummy_job.c
@@ -0,0 +1,285 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright 2022 Collabora Ltd */
+
+#include <drm/drm_file.h>
+#include <drm/drm_gem_shmem_helper.h>
+#include <drm/drm_device.h>
+#include <drm/drm_drv.h>
+#include <drm/drm_client.h>
+#include <drm/panfrost_drm.h>
+#include <linux/iopoll.h>
+
+#include "panfrost_dummy_job.h"
+#include "panfrost_gem.h"
+#include "panfrost_mmu.h"
+#include "panfrost_regs.h"
+
+#define job_write(dev, reg, data) writel(data, dev->iomem + (reg))
+#define job_read(dev, reg) readl(dev->iomem + (reg))
+
+#define MALI_JOB_TYPE_NULL 1
+#define MALI_JOB_TYPE_WRITE_VALUE 2
+#define MALI_JOB_TYPE_FRAGMENT 9
+
+struct mali_job_header {
+	u32 padding_1[4];
+
+	u8 size : 1;
+	u8 type : 7;
+	u8 padding_2 : 8;
+	u16 index : 16;
+
+	u32 padding_3[3];
+} __attribute__((packed));
+
+struct mali_write_value_job {
+	struct mali_job_header header;
+	u64 address;
+	uint32_t write_value_type;
+	uint32_t padding;
+	u64 immediate;
+} __attribute__((packed));
+
+/*
+ * 00000000  00 00 00 00 00 00 00 00  00 00 00 00 00 00 00 00  |................|
+   00000010  12 00 01 00 00 00 00 00  00 00 00 00 00 00 00 00  |................|
+   00000020  00 00 00 00 3f 00 01 00  81 00 00 00 01 00 00 00  |....?...........|
+   00000030  00 00 00 00 00 00 00 00  00 00 00 00 00 00 00 00  |................|
+   *
+   000000a0  ff 03 1f 00 00 00 00 00  ff 03 1f 00 00 d0 00 10  |................|
+   000000b0  00 00 00 00 00 00 00 00  00 00 00 00 00 00 00 00  |................|
+   *
+   00000100  00 00 00 04 98 00 88 06  00 00 00 00 00 00 00 00  |................|
+   00000110  00 00 00 00 00 00 00 00  00 00 00 00 00 00 00 00  |................|
+*/
+struct mali_render_target {
+	u32 padding_1 : 26;
+	u32 internal_format : 6;
+	u32 cfg;
+} __attribute__((packed));
+
+struct mali_framebuffer_descriptor {
+	u32 padding_1[8];
+	u16 width;
+	u16 height;
+	u16 bound_min_x;
+	u16 bound_min_y;
+	u16 bound_max_x;
+	u16 bound_max_y;
+	u32 cfg;
+	u32 padding_2[20];
+	struct mali_render_target rt;
+} __attribute__((packed));
+
+struct mali_fragment_job {
+	struct mali_job_header header;
+
+	uint16_t bound_min_x;
+	uint16_t bound_min_y;
+	uint16_t bound_max_x;
+	uint16_t bound_max_y;
+
+	uint64_t framebuffer_descriptor;
+} __attribute__((packed));
+
+static void
+panfrost_emit_dummy_job(void *buffer, uint64_t gpu_va)
+{
+	memset(buffer, 0, 4096);
+
+#if 0
+#if 1
+	struct mali_write_value_job job = {
+		.header.type = MALI_JOB_TYPE_WRITE_VALUE,
+		.header.index = 1,
+		.address = gpu_va + 0x200,
+		.write_value_type = 6,
+		.immediate = 0xCAFEBABE
+	};
+#else
+	struct mali_job_header job = {
+		.type = MALI_JOB_TYPE_NULL,
+		.index = 1,
+	};
+#endif
+#endif
+
+	struct mali_fragment_job job = {
+		.header.type = MALI_JOB_TYPE_FRAGMENT,
+		.header.index = 1,
+		.bound_max_x = 64 - 1,
+		.bound_max_y = 2 - 1,
+		.framebuffer_descriptor = (gpu_va + 0x80) | 1
+	};
+
+	struct mali_framebuffer_descriptor fbd = {
+		.width = 1024 - 1,
+		.height = 32 - 1,
+		.bound_max_x = 1024 - 1,
+		.bound_max_y = 32 - 1,
+		.cfg = 0x1000d000,
+		.rt.internal_format = 1, // RGBA8
+		.rt.cfg = 0x06880098,
+	};
+
+	memcpy(buffer, &job, sizeof(job));
+	memcpy(buffer + 0x80, &fbd, sizeof(fbd));
+
+	print_hex_dump(KERN_WARNING, "job", DUMP_PREFIX_OFFSET, 16, 1, buffer, 128,
+			false);
+}
+
+int
+panfrost_run_existing_dummy_job(struct panfrost_device *pfdev,
+				int js, int as,
+				u64 jc, u64 affinity)
+{
+	int ret;
+	u32 js_state;
+	u32 saved_job_mask, saved_gpu_mask;
+	u32 interrupt;
+	u32 state;
+
+	u32 cfg = as |
+		JS_CONFIG_THREAD_PRI(8) |
+		JS_CONFIG_START_FLUSH_CLEAN_INVALIDATE |
+		JS_CONFIG_END_FLUSH_CLEAN_INVALIDATE;
+
+	printk("Powering off..\n");
+	job_write(pfdev, SHADER_PWROFF_LO, lower_32_bits(affinity));
+	job_write(pfdev, SHADER_PWROFF_HI, upper_32_bits(affinity));
+
+	readl_poll_timeout(pfdev->iomem + SHADER_READY_LO, state,
+				 state == 0, 10, 500 * 1000);
+
+	printk("Powering on..\n");
+	job_write(pfdev, SHADER_PWRON_LO, lower_32_bits(affinity));
+	job_write(pfdev, SHADER_PWRON_HI, upper_32_bits(affinity));
+
+	readl_poll_timeout(pfdev->iomem + SHADER_READY_LO, state,
+				 state == affinity, 10, 500 * 1000);
+
+	if (job_read(pfdev, SHADER_READY_LO) != affinity) {
+		dev_err(pfdev->dev, "Shader cores didn't come up during dummy workaround\n");
+		return -EFAULT;
+	}
+
+	printk("JS0 features %X\n", pfdev->features.js_features[0]);
+	printk("JS1 features %X\n", pfdev->features.js_features[1]);
+	printk("JS2 features %X\n", pfdev->features.js_features[2]);
+
+	printk("js_head %X\n", job_read(pfdev, JS_COMMAND_NEXT(js)));
+
+	/* Mask out IRQs so we don't try to dequeue the dummy job when it finishes */
+	saved_gpu_mask = job_read(pfdev, GPU_INT_MASK);
+	saved_job_mask = job_read(pfdev, JOB_INT_MASK);
+
+	job_write(pfdev, JOB_INT_MASK, 0);
+	job_write(pfdev, GPU_INT_MASK, 0);
+
+	printk("dummy job jc=%llx, affinity=%llx, as=%d, js=%d\n",
+			jc, affinity, as, js);
+
+	job_write(pfdev, JS_HEAD_NEXT_LO(js), lower_32_bits(jc));
+	job_write(pfdev, JS_HEAD_NEXT_HI(js), upper_32_bits(jc));
+
+	job_write(pfdev, JS_AFFINITY_NEXT_LO(js), lower_32_bits(affinity));
+	job_write(pfdev, JS_AFFINITY_NEXT_HI(js), upper_32_bits(affinity));
+
+	job_write(pfdev, JS_CONFIG_NEXT(js), cfg);
+	job_write(pfdev, JS_COMMAND_NEXT(js), JS_COMMAND_START);
+
+	ret = readl_poll_timeout(pfdev->iomem + JOB_INT_RAWSTAT, js_state,
+				 js_state & MK_JS_MASK(js), 10, 500 * 1000);
+
+	interrupt = job_read(pfdev, JOB_INT_RAWSTAT) & MK_JS_MASK(js);
+
+	if (ret)
+		dev_err(pfdev->dev, "Dummy job hung\n");
+
+	if (interrupt != JOB_INT_MASK_DONE(js)) {
+		uint32_t js_status = job_read(pfdev, JS_STATUS(js));
+		printk("Interrupt %X\n", interrupt);
+		dev_err(pfdev->dev, "Dummy job failed, status=%X\n", js_status);
+		dev_err(pfdev->dev, "fault status=%X\n",
+				job_read(pfdev, GPU_FAULT_STATUS));
+		dev_err(pfdev->dev, "fault address=%X%X\n",
+				job_read(pfdev, GPU_FAULT_ADDRESS_HI),
+				job_read(pfdev, GPU_FAULT_ADDRESS_LO));
+	}
+
+	printk("Dummy jobbed\n");
+	job_write(pfdev, JOB_INT_CLEAR, interrupt);
+
+	/* Restore IRQs */
+	job_write(pfdev, GPU_INT_MASK, saved_gpu_mask);
+	job_write(pfdev, JOB_INT_MASK, saved_job_mask);
+	return 0;
+}
+
+int
+panfrost_run_dummy_job(struct panfrost_device *pfdev)
+{
+	const struct drm_driver *driver = pfdev->ddev->driver;
+	struct drm_gem_shmem_object *bo;
+	struct panfrost_gem_mapping *mapping;
+	struct drm_file *file;
+	size_t bosize = SZ_4K;
+	struct dma_buf_map map;
+	u64 gpu_va;
+	int ret;
+	int js = 0, as;
+	u64 affinity;
+
+	struct drm_client_dev client = {
+		.dev = pfdev->ddev
+	};
+
+	printk("working around\n");
+	ret = drm_client_open(&client);
+	if (ret)
+		return ret;
+
+	file = client.file;
+
+	ret = driver->open(pfdev->ddev, file);
+	if (ret)
+		return ret;
+
+	/* Hunk of code from panfrost_perfcnt */
+	bo = drm_gem_shmem_create(pfdev->ddev, bosize);
+	if (IS_ERR(bo))
+		return PTR_ERR(bo);
+
+	/* Map the buf in the address space attached to file_priv. */
+	ret = panfrost_gem_open(&bo->base, file);
+	if (ret)
+		return ret;
+
+	mapping = panfrost_gem_mapping_get(to_panfrost_bo(&bo->base), file->driver_priv);
+	if (!mapping)
+		return -EINVAL;
+
+	ret = drm_gem_shmem_vmap(bo, &map);
+	if (ret)
+		return ret;
+
+	gpu_va = mapping->mmnode.start << PAGE_SHIFT;
+	panfrost_emit_dummy_job(map.vaddr, gpu_va);
+	drm_gem_shmem_vunmap(bo, &map);
+
+	/* Submit it */
+	as = panfrost_mmu_as_get(pfdev, mapping->mmu);
+	affinity = pfdev->features.shader_present;
+	panfrost_run_existing_dummy_job(pfdev, js, as, gpu_va, affinity);
+
+	/* Closing */
+	panfrost_gem_mapping_put(mapping);
+	panfrost_gem_close(&bo->base, file);
+	drm_gem_object_put(&bo->base);
+
+	pfdev->ran_dummy_job = true;
+
+	drm_client_close(&client);
+	return 0;
+}
diff --git a/drivers/gpu/drm/panfrost/panfrost_dummy_job.h b/drivers/gpu/drm/panfrost/panfrost_dummy_job.h
new file mode 100644
index 0000000000000..7a66e6f4f7ad8
--- /dev/null
+++ b/drivers/gpu/drm/panfrost/panfrost_dummy_job.h
@@ -0,0 +1,10 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/* Copyright 2022 Collabora Ltd */
+#ifndef __PANFROST_DUMMY_JOB_H__
+#define __PANFROST_DUMMY_JOB_H__
+
+#include "panfrost_device.h"
+
+int panfrost_run_dummy_job(struct panfrost_device *pfdev);
+
+#endif
-- 
GitLab