From c3180ff11fcbdcd7da2809bad57bb8d9bfdfd44e Mon Sep 17 00:00:00 2001
From: Tomeu Vizoso <tomeu.vizoso@collabora.com>
Date: Wed, 27 Feb 2019 14:58:05 +0100
Subject: [PATCH] drm/panfrost: Implement implicit out fences

Signed-off-by: Tomeu Vizoso <tomeu.vizoso@collabora.com>
---
 drivers/gpu/drm/panfrost/panfrost_device.c |   2 +
 drivers/gpu/drm/panfrost/panfrost_device.h |   2 +
 drivers/gpu/drm/panfrost/panfrost_drv.c    |  84 ++++++++++++-
 drivers/gpu/drm/panfrost/panfrost_gem.c    |  17 ++-
 drivers/gpu/drm/panfrost/panfrost_gem.h    |   4 +
 drivers/gpu/drm/panfrost/panfrost_job.c    | 133 ++++++++++++++++++++-
 drivers/gpu/drm/panfrost/panfrost_job.h    |   8 ++
 include/uapi/drm/panfrost_drm.h            |   2 +
 8 files changed, 244 insertions(+), 8 deletions(-)

diff --git a/drivers/gpu/drm/panfrost/panfrost_device.c b/drivers/gpu/drm/panfrost/panfrost_device.c
index 3efedb23a01d7..34c3bccc621a5 100644
--- a/drivers/gpu/drm/panfrost/panfrost_device.c
+++ b/drivers/gpu/drm/panfrost/panfrost_device.c
@@ -68,6 +68,8 @@ int panfrost_device_init(struct panfrost_device *pfdev)
 	int err;
 	struct resource *res;
 
+	mutex_init(&pfdev->sched_lock);
+
 	err = panfrost_clk_init(pfdev);
 	if (err) {
 		dev_err(pfdev->dev, "clk init failed %d\n", err);
diff --git a/drivers/gpu/drm/panfrost/panfrost_device.h b/drivers/gpu/drm/panfrost/panfrost_device.h
index 82d2a0ac4d4e5..3aca60fa60a8c 100644
--- a/drivers/gpu/drm/panfrost/panfrost_device.h
+++ b/drivers/gpu/drm/panfrost/panfrost_device.h
@@ -66,6 +66,8 @@ struct panfrost_device {
 	struct panfrost_job_slot *js;
 
 	struct panfrost_job *jobs[3];
+
+	struct mutex sched_lock;
 };
 
 struct panfrost_file_priv {
diff --git a/drivers/gpu/drm/panfrost/panfrost_drv.c b/drivers/gpu/drm/panfrost/panfrost_drv.c
index 19821f15b98d6..3791c06a894bc 100644
--- a/drivers/gpu/drm/panfrost/panfrost_drv.c
+++ b/drivers/gpu/drm/panfrost/panfrost_drv.c
@@ -129,13 +129,86 @@ static int panfrost_ioctl_gem_info(struct drm_device *dev, void *data,
 	return drm_gem_dumb_map_offset(file, dev, args->handle, &args->offset);
 }
 
+/**
+ * panfrost_lookup_bos() - Sets up job->bo[] with the GEM objects
+ * referenced by the job.
+ * @dev: DRM device
+ * @file_priv: DRM file for this fd
+ * @job: job being set up
+ *
+ * The command validator needs to reference BOs by their index within
+ * the submitted job's BO list.  This does the validation of the job's
+ * BO list and reference counting for the lifetime of the job.
+ *
+ * Note that this function doesn't need to unreference the BOs on
+ * failure, because that will happen at panfrost_job_cleanup() time.
+ */
+static int
+panfrost_lookup_bos(struct drm_device *dev,
+		  struct drm_file *file_priv,
+		  struct drm_panfrost_gem_submit_atom *atom,
+		  struct panfrost_job *job)
+{
+	u32 *handles;
+	int ret = 0;
+	int i;
+
+	job->bo_count = atom->bo_handle_count;
+
+	if (!job->bo_count)
+		return 0;
+
+	job->bos = kvmalloc_array(job->bo_count,
+				  sizeof(struct drm_panfrost_gem_object *),
+				  GFP_KERNEL | __GFP_ZERO);
+	if (!job->bos) {
+		DRM_DEBUG("Failed to allocate validated BO pointers\n");
+		return -ENOMEM;
+	}
+
+	handles = kvmalloc_array(job->bo_count, sizeof(u32), GFP_KERNEL);
+	if (!handles) {
+		ret = -ENOMEM;
+		DRM_DEBUG("Failed to allocate incoming GEM handles\n");
+		goto fail;
+	}
+
+	if (copy_from_user(handles,
+			   (void __user *)(uintptr_t)atom->bo_handles,
+			   job->bo_count * sizeof(u32))) {
+		ret = -EFAULT;
+		DRM_DEBUG("Failed to copy in GEM handles\n");
+		goto fail;
+	}
+
+	spin_lock(&file_priv->table_lock);
+	for (i = 0; i < job->bo_count; i++) {
+		struct drm_gem_object *bo = idr_find(&file_priv->object_idr,
+						     handles[i]);
+		if (!bo) {
+			DRM_DEBUG("Failed to look up GEM BO %d: %d\n",
+				  i, handles[i]);
+			ret = -ENOENT;
+			spin_unlock(&file_priv->table_lock);
+			goto fail;
+		}
+		drm_gem_object_get(bo);
+		job->bos[i] = to_panfrost_bo(bo);
+	}
+	spin_unlock(&file_priv->table_lock);
+
+fail:
+	kvfree(handles);
+	return ret;
+}
+
 static int panfrost_ioctl_gem_submit(struct drm_device *dev, void *data,
 		struct drm_file *file)
 {
-	struct drm_panfrost_gem_submit *args = data;
 	struct panfrost_device *pfdev = dev->dev_private;
-	struct panfrost_job *job;
+	struct drm_panfrost_gem_submit *args = data;
 	struct drm_panfrost_gem_submit_atom *atoms;
+	struct panfrost_job *job;
 	u32 latest_flush_id;
 	int i, ret = 0;
 
@@ -163,6 +236,8 @@ static int panfrost_ioctl_gem_submit(struct drm_device *dev, void *data,
 		if (!job)
 			return -ENOMEM;
 
+		kref_init(&job->refcount);
+
 		job->pfdev = pfdev;
 		job->file_priv = file->driver_priv;
 		job->jc = atoms[i].jc;
@@ -170,6 +245,10 @@ static int panfrost_ioctl_gem_submit(struct drm_device *dev, void *data,
 		job->requirements = atoms[i].requirements;
 		job->flush_id = latest_flush_id;
 
+		ret = panfrost_lookup_bos(dev, file, &atoms[i], job);
+		if (ret)
+			goto err_free_atoms;
+
 		ret = panfrost_job_push(job);
 		if (ret)
 			goto err_free_atoms;
@@ -261,6 +340,7 @@ static struct drm_driver panfrost_drm_driver = {
 	.prime_fd_to_handle	= drm_gem_prime_fd_to_handle,
 	.gem_prime_import_sg_table = panfrost_gem_prime_import_sg_table,
 	.gem_prime_mmap		= drm_gem_prime_mmap,
+	.gem_prime_res_obj	= panfrost_prime_res_obj,
 };
 
 static int panfrost_pdev_probe(struct platform_device *pdev)
diff --git a/drivers/gpu/drm/panfrost/panfrost_gem.c b/drivers/gpu/drm/panfrost/panfrost_gem.c
index 6ca69a4966f89..9f74b4d063da7 100644
--- a/drivers/gpu/drm/panfrost/panfrost_gem.c
+++ b/drivers/gpu/drm/panfrost/panfrost_gem.c
@@ -6,6 +6,7 @@
 #include <linux/err.h>
 #include <linux/ktime.h>
 #include <linux/slab.h>
+#include <linux/dma-buf.h>
 
 #include <drm/panfrost_drm.h>
 #include "panfrost_device.h"
@@ -79,6 +80,9 @@ struct drm_gem_object *panfrost_gem_create_object(struct drm_device *dev, size_t
 
 	obj->base.base.funcs = &panfrost_gem_funcs;
 
+	obj->resv = &obj->_resv;
+	reservation_object_init(obj->resv);
+
 	spin_lock(&pfdev->mm_lock);
 	ret = drm_mm_insert_node(&pfdev->mm, &obj->node,
 				 roundup(size, PAGE_SIZE) >> PAGE_SHIFT);
@@ -99,14 +103,25 @@ panfrost_gem_prime_import_sg_table(struct drm_device *dev,
 				   struct sg_table *sgt)
 {
 	struct drm_gem_object *obj;
+	struct panfrost_gem_object *pobj;
 
 	obj = drm_gem_shmem_prime_import_sg_table(dev, attach, sgt);
+	pobj = to_panfrost_bo(obj);
+
+	pobj->resv = attach->dmabuf->resv;
 
-	panfrost_mmu_map(to_panfrost_bo(obj));
+	panfrost_mmu_map(pobj);
 
 	return obj;
 }
 
+struct reservation_object *panfrost_prime_res_obj(struct drm_gem_object *obj)
+{
+	struct panfrost_gem_object *bo = to_panfrost_bo(obj);
+
+	return bo->resv;
+}
+
 int panfrost_gem_cpu_prep(struct drm_gem_object *obj, uint32_t op, ktime_t *timeout)
 {
 	bool write = !!(op & ETNA_PREP_WRITE);
diff --git a/drivers/gpu/drm/panfrost/panfrost_gem.h b/drivers/gpu/drm/panfrost/panfrost_gem.h
index e90a39a060b53..5c56617040798 100644
--- a/drivers/gpu/drm/panfrost/panfrost_gem.h
+++ b/drivers/gpu/drm/panfrost/panfrost_gem.h
@@ -32,6 +32,9 @@ struct panfrost_gem_object {
 	/* cache maintenance */
 	u32 last_cpu_prep_op;
 
+	/* normally (resv == &_resv) except for imported bo's */
+	struct reservation_object *resv;
+	struct reservation_object _resv;
 };
 
 static inline
@@ -84,6 +87,7 @@ struct drm_gem_object *
 panfrost_gem_prime_import_sg_table(struct drm_device *dev,
 				   struct dma_buf_attachment *attach,
 				   struct sg_table *sgt);
+struct reservation_object *panfrost_prime_res_obj(struct drm_gem_object *obj);
 
 void panfrost_submit_put(struct panfrost_gem_submit * submit);
 
diff --git a/drivers/gpu/drm/panfrost/panfrost_job.c b/drivers/gpu/drm/panfrost/panfrost_job.c
index 88c0353ee9bc3..fb098f1947a1f 100644
--- a/drivers/gpu/drm/panfrost/panfrost_job.c
+++ b/drivers/gpu/drm/panfrost/panfrost_job.c
@@ -1,6 +1,7 @@
 // SPDX-License-Identifier:	GPL-2.0
 /* Copyright 2018 Panfrost Team */
 
+#include <linux/reservation.h>
 #include <drm/gpu_scheduler.h>
 #include <drm/panfrost_drm.h>
 
@@ -8,6 +9,7 @@
 #include "panfrost_job.h"
 #include "panfrost_regs.h"
 #include "panfrost_features.h"
+#include "panfrost_gem.h"
 
 #define JOB_BASE 0x1000
 
@@ -85,7 +87,7 @@ to_panfrost_job(struct drm_sched_job *sched_job)
 struct panfrost_fence {
 	struct dma_fence base;
 	struct drm_device *dev;
-	/* v3d seqno for signaled() test */
+	/* panfrost seqno for signaled() test */
 	u64 seqno;
 	int queue;
 };
@@ -265,19 +267,140 @@ static void kbase_job_hw_submit(struct panfrost_job *job,
 	job_write(pfdev, JS_COMMAND_NEXT(js), JS_COMMAND_START);
 }
 
+
+static void
+panfrost_unlock_bo_reservations(struct panfrost_gem_object **bos,
+			   int bo_count,
+			   struct ww_acquire_ctx *acquire_ctx)
+{
+	int i;
+
+	for (i = 0; i < bo_count; i++)
+		ww_mutex_unlock(&bos[i]->resv->lock);
+
+	ww_acquire_fini(acquire_ctx);
+}
+
+/* Takes the reservation lock on all the BOs being referenced, so that
+ * at queue submit time we can update the reservations.
+ *
+ * We don't lock the RCL the tile alloc/state BOs, or overflow memory
+ * (all of which are on exec->unref_list).  They're entirely private
+ * to panfrost, so we don't attach dma-buf fences to them.
+ */
+static int
+panfrost_lock_bo_reservations(struct panfrost_gem_object **bos,
+			 int bo_count,
+			 struct ww_acquire_ctx *acquire_ctx)
+{
+	int contended_lock = -1;
+	int i, ret;
+
+	ww_acquire_init(acquire_ctx, &reservation_ww_class);
+
+retry:
+	if (contended_lock != -1) {
+		struct panfrost_gem_object *bo = bos[contended_lock];
+
+		ret = ww_mutex_lock_slow_interruptible(&bo->resv->lock,
+						       acquire_ctx);
+		if (ret) {
+			ww_acquire_done(acquire_ctx);
+			return ret;
+		}
+	}
+
+	for (i = 0; i < bo_count; i++) {
+		if (i == contended_lock)
+			continue;
+
+		ret = ww_mutex_lock_interruptible(&bos[i]->resv->lock,
+						  acquire_ctx);
+		if (ret) {
+			int j;
+
+			for (j = 0; j < i; j++)
+				ww_mutex_unlock(&bos[j]->resv->lock);
+
+			if (contended_lock != -1 && contended_lock >= i) {
+				struct panfrost_gem_object *bo = bos[contended_lock];
+
+				ww_mutex_unlock(&bo->resv->lock);
+			}
+
+			if (ret == -EDEADLK) {
+				contended_lock = i;
+				goto retry;
+			}
+
+			ww_acquire_done(acquire_ctx);
+			return ret;
+		}
+	}
+
+	ww_acquire_done(acquire_ctx);
+
+	/* Reserve space for our shared (read-only) fence references,
+	 * before we commit the job to the hardware.
+	 */
+	for (i = 0; i < bo_count; i++) {
+		ret = reservation_object_reserve_shared(bos[i]->resv, 1);
+		if (ret) {
+			panfrost_unlock_bo_reservations(bos, bo_count,
+						   acquire_ctx);
+			return ret;
+		}
+	}
+
+	return 0;
+}
+
+static void
+panfrost_attach_object_fences(struct panfrost_gem_object **bos, int bo_count,
+			 struct dma_fence *fence)
+{
+	int i;
+
+	for (i = 0; i < bo_count; i++) {
+		/* XXX: Use shared fences for read-only objects. */
+		reservation_object_add_excl_fence(bos[i]->resv, fence);
+	}
+}
+
 int panfrost_job_push(struct panfrost_job *job)
 {
+	struct panfrost_device *pfdev = job->pfdev;
 	int slot = panfrost_job_get_slot(job);
 	struct drm_sched_entity *entity = &job->file_priv->sched_entity[slot];
-	int ret;
+	struct ww_acquire_ctx acquire_ctx;
+	int ret = 0;
 
-	ret = drm_sched_job_init(&job->base, entity, NULL);
+	mutex_lock(&pfdev->sched_lock);
+
+	ret = panfrost_lock_bo_reservations(job->bos, job->bo_count,
+					    &acquire_ctx);
 	if (ret)
 		return ret;
 
+	ret = drm_sched_job_init(&job->base, entity, NULL);
+	if (ret)
+		goto unlock;
+
+	job->render_done_fence = dma_fence_get(&job->base.s_fence->finished);
+
+	kref_get(&job->refcount); /* put by scheduler job completion */
+
 	drm_sched_entity_push_job(&job->base, entity);
 
-	return 0;
+	mutex_unlock(&pfdev->sched_lock);
+
+	panfrost_attach_object_fences(job->bos, job->bo_count,
+				 job->render_done_fence);
+
+unlock:
+	panfrost_unlock_bo_reservations(job->bos, job->bo_count, &acquire_ctx);
+
+	return ret;
 }
 
 #endif
@@ -290,7 +413,7 @@ panfrost_job_free(struct drm_sched_job *sched_job)
 
 /**
  * Returns the fences that the bin or render job depends on, one by one.
- * v3d_job_run() won't be called until all of them have been signaled.
+ * panfrost_job_run() won't be called until all of them have been signaled.
  */
 static struct dma_fence *
 panfrost_job_dependency(struct drm_sched_job *sched_job,
diff --git a/drivers/gpu/drm/panfrost/panfrost_job.h b/drivers/gpu/drm/panfrost/panfrost_job.h
index b5c8835e187c0..4a80fb11c09a7 100644
--- a/drivers/gpu/drm/panfrost/panfrost_job.h
+++ b/drivers/gpu/drm/panfrost/panfrost_job.h
@@ -9,11 +9,14 @@
 #define NUM_JOB_SLOTS	2	/* Don't need 3rd one until we have compute support */
 
 struct panfrost_device;
+struct panfrost_gem_object;
 struct panfrost_file_priv;
 
 struct panfrost_job {
 	struct drm_sched_job base;
 
+	struct kref refcount;
+
 	struct panfrost_device *pfdev;
 	struct panfrost_file_priv *file_priv;
 
@@ -23,6 +26,11 @@ struct panfrost_job {
 	__u32 atom_nr;
 	__u32 requirements;
 	__u32 flush_id;
+
+	struct panfrost_gem_object **bos;
+	u32 bo_count;
+
+	struct dma_fence *render_done_fence;
 };
 
 int panfrost_job_init(struct panfrost_device *pfdev);
diff --git a/include/uapi/drm/panfrost_drm.h b/include/uapi/drm/panfrost_drm.h
index cd3bb30175f8d..bb4c83b1065b1 100644
--- a/include/uapi/drm/panfrost_drm.h
+++ b/include/uapi/drm/panfrost_drm.h
@@ -84,6 +84,8 @@ struct drm_panfrost_gem_submit_atom {
 	__u64 jc;           /* in, address to GPU mapping of job descriptor */
 	__u32 atom_nr;      /* in, job ID */
 	__u32 requirements; /* in, a combination of PANFROST_JD_REQ_* */
+	__u64 bo_handles;
+	__u32 bo_handle_count;
 };
 
 struct drm_panfrost_gem_submit {
-- 
GitLab