From 1de8e0029f59be740b4a02a44e694fab06798081 Mon Sep 17 00:00:00 2001
From: Tomeu Vizoso <tomeu@tomeuvizoso.net>
Date: Fri, 14 Feb 2025 17:55:09 +0100
Subject: [PATCH] fixup: multiple runtime PM

---
 drivers/accel/rocket/rocket_core.c   | 17 +++++-
 drivers/accel/rocket/rocket_device.h |  5 --
 drivers/accel/rocket/rocket_drv.c    | 80 +++++++---------------------
 drivers/accel/rocket/rocket_gem.c    | 70 +++++++++++++++---------
 drivers/accel/rocket/rocket_job.c    | 38 ++++---------
 drivers/accel/rocket/rocket_job.h    |  2 +-
 6 files changed, 89 insertions(+), 123 deletions(-)

diff --git a/drivers/accel/rocket/rocket_core.c b/drivers/accel/rocket/rocket_core.c
index 37447f34dbb98..705ed78e24c55 100644
--- a/drivers/accel/rocket/rocket_core.c
+++ b/drivers/accel/rocket/rocket_core.c
@@ -63,10 +63,9 @@ int rocket_core_init(struct rocket_core *core)
 {
 	struct device *dev = core->dev;
 	struct platform_device *pdev = to_platform_device(dev);
+	uint32_t version;
 	int err = 0;
 
-	printk("*** %s: core %px %d\n", __func__, core, core->index);
-
 	err = rocket_clk_init(core);
 	if (err) {
 		dev_err(dev, "clk init failed %d\n", err);
@@ -87,6 +86,20 @@ int rocket_core_init(struct rocket_core *core)
 	if (err)
 		return err;
 
+	pm_runtime_use_autosuspend(dev);
+	pm_runtime_set_autosuspend_delay(dev, 50); /* ~3 frames */
+	pm_runtime_enable(dev);
+
+	err = pm_runtime_get_sync(dev);
+
+	version = rocket_read(core, REG_PC_VERSION);
+	version += rocket_read(core, REG_PC_VERSION_NUM) & 0xffff;
+
+	pm_runtime_mark_last_busy(dev);
+	pm_runtime_put_autosuspend(dev);
+
+	dev_info(dev, "Rockchip NPU core %d version: %d\n", core->index, version);
+
 	return 0;
 }
 
diff --git a/drivers/accel/rocket/rocket_device.h b/drivers/accel/rocket/rocket_device.h
index 86da2484cc2db..f7e0bfd2a7a3c 100644
--- a/drivers/accel/rocket/rocket_device.h
+++ b/drivers/accel/rocket/rocket_device.h
@@ -22,9 +22,4 @@ int rocket_device_init(struct rocket_device *rdev);
 void rocket_device_fini(struct rocket_device *rdev);
 void rocket_device_reset(struct rocket_device *rdev);
 
-static inline struct device *get_top_core_device(struct rocket_device *rdev)
-{
-	return rdev->cores[0].dev;
-}
-
 #endif
diff --git a/drivers/accel/rocket/rocket_drv.c b/drivers/accel/rocket/rocket_drv.c
index f0ca0417c96a1..e66150630e534 100644
--- a/drivers/accel/rocket/rocket_drv.c
+++ b/drivers/accel/rocket/rocket_drv.c
@@ -6,7 +6,6 @@
 #include "linux/clk.h"
 #include "linux/component.h"
 #include "linux/dma-mapping.h"
-#include "linux/printk.h"
 #include "rocket_registers.h"
 #include <linux/of.h>
 #include <linux/module.h>
@@ -146,45 +145,10 @@ static int rocket_drm_init(struct device *dev, bool with_components)
 			goto err_drm_dev;
 	}
 
-	pm_runtime_use_autosuspend(dev);
-	pm_runtime_set_autosuspend_delay(dev, 50); /* ~3 frames */
-	pm_runtime_enable(dev);
-
-	printk("*** %s: %d dev %s usage_count %d links_count %d\n", __func__, 98, dev_name(dev), atomic_read(&dev->power.usage_count), dev->power.links_count);
-
-	printk("*** %s: %d dev %s usage_count %d links_count %d\n", __func__, 99, dev_name(dev), atomic_read(&dev->power.usage_count), dev->power.links_count);
-	printk("*** %s: dev %s dev->power.runtime_error %d\n", __func__, dev_name(dev), dev->power.runtime_error);
-	printk("*** %s: dev %s dev->power.usage_count %d\n", __func__, dev_name(dev), atomic_read(&dev->power.usage_count));
-	printk("*** %s: dev %s dev->power.ignore_children %d\n", __func__, dev_name(dev), dev->power.ignore_children);
-	printk("*** %s: dev %s dev->power.child_count %d\n", __func__, dev_name(dev), atomic_read(&dev->power.child_count));
-	printk("*** %s: dev %s dev->power.deferred_resume %d\n", __func__, dev_name(dev), dev->power.deferred_resume);
-	printk("*** %s: dev %s dev->power.runtime_status %d\n", __func__, dev_name(dev), dev->power.runtime_status);
-	printk("*** %s: dev %s dev->power.request_pending %d\n", __func__, dev_name(dev), dev->power.request_pending);
-	printk("*** %s: dev %s dev->power.request %d\n", __func__, dev_name(dev), dev->power.request);
-
-	/*
-	 * Register the DRM device with the core and the connectors with
-	 * sysfs
-	 */
 	err = drm_dev_register(ddev, 0);
 	if (err < 0)
 		goto err_pm_runtime;
 
-	printk("*** %s: before reading\n", __func__);
-	err = pm_runtime_get_sync(dev);
-
-	for (unsigned int core = 0; core < rdev->num_cores; core++) {
-		uint32_t version;
-
-		version = rocket_read(&rdev->cores[core], REG_PC_VERSION);
-		version += rocket_read(&rdev->cores[core], REG_PC_VERSION_NUM) & 0xffff;
-
-		dev_info(dev, "Rockchip NPU core %d version: %d\n", core, version);
-	}
-
-	pm_runtime_mark_last_busy(dev);
-	pm_runtime_put_autosuspend(dev);
-
 	return 0;
 
 err_pm_runtime:
@@ -234,8 +198,8 @@ static int rocket_core_bind(struct device *dev, struct device *master, void *dat
 	rdev->cores[core].rdev = rdev;
 	rdev->cores[core].dev = dev;
 	rdev->cores[core].index = core;
-	// rdev->cores[core].link = device_link_add(dev, rdev->cores[0].dev,
-	// 					 DL_FLAG_STATELESS);
+	rdev->cores[core].link = device_link_add(dev, rdev->cores[0].dev,
+						 DL_FLAG_STATELESS);
 
 	rdev->num_cores++;
 
@@ -255,7 +219,7 @@ static void rocket_core_unbind(struct device *dev, struct device *master, void *
 	for (unsigned int core = 0; core < rdev->num_cores; core++) {
 		if (rdev->cores[core].dev == dev) {
 			rocket_core_fini(&rdev->cores[core]);
-			//device_link_del(rdev->cores[core].link);
+			device_link_del(rdev->cores[core].link);
 			break;
 		}
 	}
@@ -321,23 +285,19 @@ static int rocket_device_runtime_resume(struct device *dev)
 {
 	struct rocket_device *rdev = dev_get_drvdata(dev);
 
-	printk("*** %s: %d dev %s\n", __func__, 1, dev_name(dev));
-
-	clk_prepare_enable(rdev->clk_npu);
-	clk_prepare_enable(rdev->pclk);
-
 	for (unsigned int core = 0; core < rdev->num_cores; core++) {
-		printk("*** %s: %d core %d\n", __func__, 2, core);
+		if (dev != rdev->cores[core].dev)
+			continue;
 
-		struct iommu_domain *domain = iommu_get_domain_for_dev(rdev->cores[core].dev);
-		iommu_attach_device(domain, rdev->cores[core].dev);
+		if (core == 0) {
+			clk_prepare_enable(rdev->clk_npu);
+			clk_prepare_enable(rdev->pclk);
+		}
 
 		clk_prepare_enable(rdev->cores[core].a_clk);
 		clk_prepare_enable(rdev->cores[core].h_clk);
 	}
 
-	printk("*** %s: %d dev %s\n", __func__, 99, dev_name(dev));
-
 	return 0;
 }
 
@@ -345,26 +305,22 @@ static int rocket_device_runtime_suspend(struct device *dev)
 {
 	struct rocket_device *rdev = dev_get_drvdata(dev);
 
-	printk("*** %s: %d dev %s\n", __func__, 1, dev_name(dev));
-
-	if (!rocket_job_is_idle(rdev))
-		return -EBUSY;
-
 	for (unsigned int core = 0; core < rdev->num_cores; core++) {
-		printk("*** %s: %d core %d\n", __func__, 2, core);
+		if (dev != rdev->cores[core].dev)
+			continue;
+
+		if (!rocket_job_is_idle(&rdev->cores[core]))
+			return -EBUSY;
 
 		clk_disable_unprepare(rdev->cores[core].a_clk);
 		clk_disable_unprepare(rdev->cores[core].h_clk);
 
-		struct iommu_domain *domain = iommu_get_domain_for_dev(rdev->cores[core].dev);
-		iommu_detach_device(domain, rdev->cores[core].dev);
+		if (core == 0) {
+			clk_disable_unprepare(rdev->pclk);
+			clk_disable_unprepare(rdev->clk_npu);
+		}
 	}
 
-	clk_disable_unprepare(rdev->pclk);
-	clk_disable_unprepare(rdev->clk_npu);
-
-	printk("*** %s: %d dev %s\n", __func__, 99, dev_name(dev));
-
 	return 0;
 }
 
diff --git a/drivers/accel/rocket/rocket_gem.c b/drivers/accel/rocket/rocket_gem.c
index 41a94206bd5d0..b83dbe4c0e0d8 100644
--- a/drivers/accel/rocket/rocket_gem.c
+++ b/drivers/accel/rocket/rocket_gem.c
@@ -9,6 +9,34 @@
 #include "rocket_gem.h"
 #include "rocket_device.h"
 
+static void rocket_gem_bo_free(struct drm_gem_object *obj)
+{
+	struct rocket_device *rdev = obj->dev->dev_private;
+	struct rocket_gem_object *bo = to_rocket_bo(obj);
+
+	drm_WARN_ON(obj->dev, bo->base.pages_use_count > 1);
+
+	/* Unmap this object from the IOMMUs for cores > 0 */
+	for (unsigned int core = 1; core < rdev->num_cores; core++)
+		dma_unmap_sgtable(rdev->cores[core].dev, bo->base.sgt,
+				  DMA_BIDIRECTIONAL, 0);
+
+	/* This will unmap the pages from the IOMMU linked to core 0*/
+	drm_gem_shmem_free(&bo->base);
+}
+
+static const struct drm_gem_object_funcs rocket_gem_funcs = {
+	.free = rocket_gem_bo_free,
+	.print_info = drm_gem_shmem_object_print_info,
+	.pin = drm_gem_shmem_object_pin,
+	.unpin = drm_gem_shmem_object_unpin,
+	.get_sg_table = drm_gem_shmem_object_get_sg_table,
+	.vmap = drm_gem_shmem_object_vmap,
+	.vunmap = drm_gem_shmem_object_vunmap,
+	.mmap = drm_gem_shmem_object_mmap,
+	.vm_ops = &drm_gem_shmem_vm_ops,
+};
+
 /**
  * rocket_gem_create_object - Implementation of driver->gem_create_object.
  * @dev: DRM device
@@ -25,6 +53,8 @@ struct drm_gem_object *rocket_gem_create_object(struct drm_device *dev, size_t s
 	if (!obj)
 		return ERR_PTR(-ENOMEM);
 
+	obj->base.base.funcs = &rocket_gem_funcs;
+
 	return &obj->base.base;
 }
 
@@ -38,8 +68,6 @@ int rocket_ioctl_create_bo(struct drm_device *dev, void *data, struct drm_file *
 	struct sg_table *sgt;
 	int ret;
 
-	printk("*** %s: %d\n", __func__, 1);
-
 	shmem_obj = drm_gem_shmem_create(dev, args->size);
 	if (IS_ERR(shmem_obj))
 		return PTR_ERR(shmem_obj);
@@ -56,22 +84,23 @@ int rocket_ioctl_create_bo(struct drm_device *dev, void *data, struct drm_file *
 	if (ret)
 		goto err;
 
-	printk("*** %s: %d before drm_gem_shmem_get_pages_sgt\n", __func__, 1);
+	/* This will map the pages to the IOMMU linked to core 0 */
 	sgt = drm_gem_shmem_get_pages_sgt(shmem_obj);
 	if (IS_ERR(sgt)) {
 		ret = PTR_ERR(sgt);
 		goto err;
 	}
 
-	ret = dma_map_sgtable(rdev->cores[1].dev, sgt, DMA_BIDIRECTIONAL, 0);
-	ret = dma_map_sgtable(rdev->cores[2].dev, sgt, DMA_BIDIRECTIONAL, 0);
-	printk("*** %s: %d after drm_gem_shmem_get_pages_sgt\n", __func__, 1);
+	/* Map the pages to the IOMMUs linked to the other cores, so all cores can access this BO */
+	for (unsigned int core = 1; core < rdev->num_cores; core++) {
+		ret = dma_map_sgtable(rdev->cores[core].dev, sgt, DMA_BIDIRECTIONAL, 0);
+		if (ret)
+			goto err; /* TODO: Cleanup? */
+	}
 
 	args->offset = drm_vma_node_offset_addr(&gem_obj->vma_node);
 	args->dma_address = sg_dma_address(shmem_obj->sgt->sgl);
 
-	printk("*** %s: %d\n", __func__, 99);
-
 	return 0;
 
 err:
@@ -100,8 +129,6 @@ int rocket_ioctl_prep_bo(struct drm_device *dev, void *data, struct drm_file *fi
 	bool write = !!(args->op & ROCKET_PREP_WRITE);
 	long ret = 0;
 
-	printk("*** %s: %d\n", __func__, 1);
-
 	if (args->op & ~(ROCKET_PREP_READ | ROCKET_PREP_WRITE))
 		return -EINVAL;
 
@@ -116,16 +143,15 @@ int rocket_ioctl_prep_bo(struct drm_device *dev, void *data, struct drm_file *fi
 
 	shmem_obj = &to_rocket_bo(gem_obj)->base;
 
-	dma_sync_sgtable_for_cpu(rdev->cores[0].dev, shmem_obj->sgt, rocket_op_to_dma_dir(args->op));
-	dma_sync_sgtable_for_cpu(rdev->cores[1].dev, shmem_obj->sgt, rocket_op_to_dma_dir(args->op));
-	dma_sync_sgtable_for_cpu(rdev->cores[2].dev, shmem_obj->sgt, rocket_op_to_dma_dir(args->op));
+	for (unsigned int core = 1; core < rdev->num_cores; core++) {
+		dma_sync_sgtable_for_cpu(rdev->cores[core].dev, shmem_obj->sgt,
+					 rocket_op_to_dma_dir(args->op));
+	}
 
 	to_rocket_bo(gem_obj)->last_cpu_prep_op = args->op;
 
 	drm_gem_object_put(gem_obj);
 
-	printk("*** %s: %d\n", __func__, 99);
-
 	return ret;
 }
 
@@ -137,8 +163,6 @@ int rocket_ioctl_fini_bo(struct drm_device *dev, void *data, struct drm_file *fi
 	struct drm_gem_shmem_object *shmem_obj;
 	struct rocket_device *rdev = dev->dev_private;
 
-	printk("*** %s: %d\n", __func__, 1);
-
 	gem_obj = drm_gem_object_lookup(file, args->handle);
 	if (!gem_obj)
 		return -ENOENT;
@@ -148,18 +172,14 @@ int rocket_ioctl_fini_bo(struct drm_device *dev, void *data, struct drm_file *fi
 
 	WARN_ON(rkt_obj->last_cpu_prep_op == 0);
 
-	dma_sync_sgtable_for_device(rdev->cores[0].dev, shmem_obj->sgt,
-				    rocket_op_to_dma_dir(rkt_obj->last_cpu_prep_op));
-	dma_sync_sgtable_for_device(rdev->cores[1].dev, shmem_obj->sgt,
-				    rocket_op_to_dma_dir(rkt_obj->last_cpu_prep_op));
-	dma_sync_sgtable_for_device(rdev->cores[2].dev, shmem_obj->sgt,
-				    rocket_op_to_dma_dir(rkt_obj->last_cpu_prep_op));
+	for (unsigned int core = 1; core < rdev->num_cores; core++) {
+		dma_sync_sgtable_for_device(rdev->cores[core].dev, shmem_obj->sgt,
+					    rocket_op_to_dma_dir(rkt_obj->last_cpu_prep_op));
+	}
 
 	rkt_obj->last_cpu_prep_op = 0;
 
 	drm_gem_object_put(gem_obj);
 
-	printk("*** %s: %d\n", __func__, 99);
-
 	return 0;
 }
diff --git a/drivers/accel/rocket/rocket_job.c b/drivers/accel/rocket/rocket_job.c
index d37668fa590f4..817886c5026f5 100644
--- a/drivers/accel/rocket/rocket_job.c
+++ b/drivers/accel/rocket/rocket_job.c
@@ -304,8 +304,6 @@ static struct dma_fence *rocket_job_run(struct drm_sched_job *sched_job)
 	struct dma_fence *fence = NULL;
 	int ret;
 
-	printk("*** %s: %d dev %s job %px\n", __func__, 1, dev_name(core->dev), job);
-
 	if (unlikely(job->base.s_fence->finished.error))
 		return NULL;
 
@@ -323,7 +321,7 @@ static struct dma_fence *rocket_job_run(struct drm_sched_job *sched_job)
 		dma_fence_put(job->done_fence);
 	job->done_fence = dma_fence_get(fence);
 
-	ret = pm_runtime_get_sync(get_top_core_device(rdev));
+	ret = pm_runtime_get_sync(core->dev);
 	if (ret < 0)
 		return fence;
 
@@ -340,10 +338,6 @@ static struct dma_fence *rocket_job_run(struct drm_sched_job *sched_job)
 static void rocket_job_handle_done(struct rocket_core *core,
 				   struct rocket_job *job)
 {
-	struct rocket_device *rdev = core->rdev;
-
-	printk("*** %s: %d dev %s job %px\n", __func__, 1, dev_name(core->dev), job);
-
 	if (job->next_task_idx < job->task_count) {
 		rocket_job_hw_submit(core, job);
 		return;
@@ -351,16 +345,14 @@ static void rocket_job_handle_done(struct rocket_core *core,
 
 	core->in_flight_job = NULL;
 	dma_fence_signal_locked(job->done_fence);
-	pm_runtime_put_autosuspend(get_top_core_device(rdev));
+	pm_runtime_put_autosuspend(core->dev);
 }
 
 static void rocket_job_handle_irq(struct rocket_core *core)
 {
 	uint32_t status, raw_status;
 
-	printk("*** %s: %d dev %s\n", __func__, 1, dev_name(core->dev));
-
-	pm_runtime_mark_last_busy(get_top_core_device(core->rdev));
+	pm_runtime_mark_last_busy(core->dev);
 
 	status = rocket_read(core, REG_PC_INTERRUPT_STATUS);
 	raw_status = rocket_read(core, REG_PC_INTERRUPT_RAW_STATUS);
@@ -422,14 +414,14 @@ rocket_reset(struct rocket_core *core, struct drm_sched_job *bad)
 	 */
 	spin_lock(&core->job_lock);
 	if (core->in_flight_job)
-		pm_runtime_put_noidle(get_top_core_device(core->rdev));
+		pm_runtime_put_noidle(core->dev);
 
 	core->in_flight_job = NULL;
 	spin_unlock(&core->job_lock);
 
 	/* Proceed with reset now. */
-	pm_runtime_force_suspend(get_top_core_device(core->rdev));
-	pm_runtime_force_resume(get_top_core_device(core->rdev));
+	pm_runtime_force_suspend(core->dev);
+	pm_runtime_force_resume(core->dev);
 
 	/* GPU has been reset, we can clear the reset pending bit. */
 	atomic_set(&core->reset.pending, 0);
@@ -457,8 +449,6 @@ static enum drm_gpu_sched_stat rocket_job_timedout(struct drm_sched_job *sched_j
 	struct rocket_device *rdev = job->rdev;
 	struct rocket_core *core = sched_to_core(rdev, sched_job->sched);
 
-	printk("*** %s: %d dev %s job %px\n", __func__, 1, dev_name(core->dev), job);
-
 	/*
 	 * If the GPU managed to complete this jobs fence, the timeout is
 	 * spurious. Bail out.
@@ -538,7 +528,6 @@ int rocket_job_init(struct rocket_core *core)
 	spin_lock_init(&core->job_lock);
 
 	core->irq = platform_get_irq_byname(to_platform_device(core->dev), "npu_irq");
-	printk("*** %s: %d dev %s core->irq %d\n", __func__, 1, dev_name(core->dev), core->irq);
 	if (core->irq < 0)
 		return core->irq;
 
@@ -615,15 +604,11 @@ void rocket_job_close(struct rocket_file_priv *rocket_priv)
 	drm_sched_entity_destroy(entity);
 }
 
-int rocket_job_is_idle(struct rocket_device *rdev)
+int rocket_job_is_idle(struct rocket_core *core)
 {
-	unsigned int core;
-
-	for (core = 0; core < rdev->num_cores; core++) {
-		/* If there are any jobs in any HW queue, we're not idle */
-		if (atomic_read(&rdev->cores[core].sched.credit_count))
-			return false;
-	}
+	/* If there are any jobs in this HW queue, we're not idle */
+	if (atomic_read(&core->sched.credit_count))
+		return false;
 
 	return true;
 }
@@ -635,9 +620,6 @@ static int rocket_ioctl_submit_job(struct drm_device *dev, struct drm_file *file
 	struct rocket_job *rjob = NULL;
 	int ret = 0;
 
-	udelay(1000);
-	printk("*** %s: %d dev %s\n", __func__, 1, dev_name(dev->dev));
-
 	if (job->task_count == 0)
 		return -EINVAL;
 
diff --git a/drivers/accel/rocket/rocket_job.h b/drivers/accel/rocket/rocket_job.h
index 0c3c90e47d391..2302d376f4761 100644
--- a/drivers/accel/rocket/rocket_job.h
+++ b/drivers/accel/rocket/rocket_job.h
@@ -44,6 +44,6 @@ int rocket_job_init(struct rocket_core *core);
 void rocket_job_fini(struct rocket_core *core);
 int rocket_job_open(struct rocket_file_priv *rocket_priv);
 void rocket_job_close(struct rocket_file_priv *rocket_priv);
-int rocket_job_is_idle(struct rocket_device *rdev);
+int rocket_job_is_idle(struct rocket_core *core);
 
 #endif
\ No newline at end of file
-- 
GitLab