From bf0257ae834f48aaf34425dda0fd0ab5bb71bd19 Mon Sep 17 00:00:00 2001
From: Christoph Manszewski <christoph.manszewski@intel.com>
Date: Wed, 29 Jan 2025 18:12:04 +0100
Subject: [PATCH] drm/xe: Implement SR-IOV and eudebug exclusivity

Signed-off-by: Christoph Manszewski <christoph.manszewski@intel.com>
---
 drivers/gpu/drm/xe/tests/xe_eudebug.c |  8 +++-
 drivers/gpu/drm/xe/xe_device.c        |  4 +-
 drivers/gpu/drm/xe/xe_device_types.h  |  5 ++-
 drivers/gpu/drm/xe/xe_eudebug.c       | 65 ++++++++++++++++++++++++---
 drivers/gpu/drm/xe/xe_eudebug.h       |  8 ++++
 drivers/gpu/drm/xe/xe_eudebug_types.h | 15 +++++++
 drivers/gpu/drm/xe/xe_exec_queue.c    |  2 +-
 drivers/gpu/drm/xe/xe_gt.c            |  3 +-
 drivers/gpu/drm/xe/xe_pci_sriov.c     |  9 ++++
 9 files changed, 106 insertions(+), 13 deletions(-)

diff --git a/drivers/gpu/drm/xe/tests/xe_eudebug.c b/drivers/gpu/drm/xe/tests/xe_eudebug.c
index 297c26ca57b01..8d157e46768ef 100644
--- a/drivers/gpu/drm/xe/tests/xe_eudebug.c
+++ b/drivers/gpu/drm/xe/tests/xe_eudebug.c
@@ -146,7 +146,13 @@ static void check_regs(struct xe_device *xe, bool enable_eudebug)
 static int toggle_reg_value(struct xe_device *xe)
 {
 	struct kunit *test = kunit_get_current_test();
-	bool enable_eudebug = xe->eudebug.enable;
+	bool enable_eudebug = xe->eudebug.state == XE_EUDEBUG_ENABLED;
+
+	if (IS_SRIOV_VF(xe))
+		kunit_skip(test, "eudebug not available in SR-IOV VF mode\n");
+
+	if (xe->eudebug.state == XE_EUDEBUG_NOT_SUPPORTED)
+		kunit_skip(test, "eudebug not supported\n");
 
 	kunit_printk(KERN_DEBUG, test, "Test eudebug WAs for graphics version: %u\n",
 		     GRAPHICS_VERx100(xe));
diff --git a/drivers/gpu/drm/xe/xe_device.c b/drivers/gpu/drm/xe/xe_device.c
index 71d91d6641ded..4d65cc388ee4e 100644
--- a/drivers/gpu/drm/xe/xe_device.c
+++ b/drivers/gpu/drm/xe/xe_device.c
@@ -479,8 +479,6 @@ struct xe_device *xe_device_create(struct pci_dev *pdev,
 	INIT_LIST_HEAD(&xe->pinned.external_vram);
 	INIT_LIST_HEAD(&xe->pinned.evicted);
 
-	xe_eudebug_init(xe);
-
 	xe->preempt_fence_wq = alloc_ordered_workqueue("xe-preempt-fence-wq",
 						       WQ_MEM_RECLAIM);
 	xe->ordered_wq = alloc_ordered_workqueue("xe-ordered-wq", 0);
@@ -714,6 +712,8 @@ int xe_device_probe_early(struct xe_device *xe)
 
 	sriov_update_device_info(xe);
 
+	xe_eudebug_init(xe);
+
 	err = xe_pcode_probe_early(xe);
 	if (err)
 		return err;
diff --git a/drivers/gpu/drm/xe/xe_device_types.h b/drivers/gpu/drm/xe/xe_device_types.h
index f680633bd616d..1aa9b2dbc82a8 100644
--- a/drivers/gpu/drm/xe/xe_device_types.h
+++ b/drivers/gpu/drm/xe/xe_device_types.h
@@ -13,6 +13,7 @@
 #include <drm/ttm/ttm_device.h>
 
 #include "xe_devcoredump_types.h"
+#include "xe_eudebug_types.h"
 #include "xe_heci_gsc.h"
 #include "xe_lmtt_types.h"
 #include "xe_memirq_types.h"
@@ -565,8 +566,8 @@ struct xe_device {
 		/** discovery_lock: used for discovery to block xe ioctls */
 		struct rw_semaphore discovery_lock;
 
-		/** @enable: is the debugging functionality enabled */
-		bool enable;
+		/** @state: debugging functionality state */
+		enum xe_eudebug_state state;
 
 		/** @attention_scan: attention scan worker */
 		struct delayed_work attention_scan;
diff --git a/drivers/gpu/drm/xe/xe_eudebug.c b/drivers/gpu/drm/xe/xe_eudebug.c
index f80e07822fc05..297a8267be4f4 100644
--- a/drivers/gpu/drm/xe/xe_eudebug.c
+++ b/drivers/gpu/drm/xe/xe_eudebug.c
@@ -2320,12 +2320,50 @@ int xe_eudebug_connect_ioctl(struct drm_device *dev,
 
 	lockdep_assert_held(&xe->eudebug.discovery_lock);
 
-	if (!xe->eudebug.enable)
+	if (!xe_eudebug_is_enabled(xe))
 		return -ENODEV;
 
 	return xe_eudebug_connect(xe, param);
 }
 
+bool xe_eudebug_is_enabled(struct xe_device *xe)
+{
+	if (XE_WARN_ON(!xe->eudebug.state))
+		return false;
+
+	return xe->eudebug.state == XE_EUDEBUG_ENABLED;
+}
+
+static int __xe_eudebug_toggle_support(struct xe_device *xe, bool enable)
+{
+	down_write(&xe->eudebug.discovery_lock);
+	if (XE_WARN_ON(xe->eudebug.state <= XE_EUDEBUG_NOT_AVAILABLE)) {
+		up_write(&xe->eudebug.discovery_lock);
+		return -EINVAL;
+	}
+
+	if (!enable && xe_eudebug_is_enabled(xe)) {
+		up_write(&xe->eudebug.discovery_lock);
+		return -EPERM;
+	}
+
+	xe->eudebug.state = enable ? XE_EUDEBUG_SUPPORTED : XE_EUDEBUG_NOT_SUPPORTED;
+
+	up_write(&xe->eudebug.discovery_lock);
+
+	return 0;
+}
+
+void xe_eudebug_support_enable(struct xe_device *xe)
+{
+	__xe_eudebug_toggle_support(xe, true);
+}
+
+int xe_eudebug_support_disable(struct xe_device *xe)
+{
+	return __xe_eudebug_toggle_support(xe, false);
+}
+
 static void add_sr_entry(struct xe_hw_engine *hwe,
 			 struct xe_reg_mcr mcr_reg,
 			 u32 mask, bool enable)
@@ -2381,12 +2419,17 @@ static int xe_eudebug_enable(struct xe_device *xe, bool enable)
 	 */
 	down_write(&xe->eudebug.discovery_lock);
 
+	if (xe->eudebug.state == XE_EUDEBUG_NOT_SUPPORTED) {
+		up_write(&xe->eudebug.discovery_lock);
+		return -EPERM;
+	}
+
 	if (!enable && !list_empty(&xe->eudebug.list)) {
 		up_write(&xe->eudebug.discovery_lock);
 		return -EBUSY;
 	}
 
-	if (enable == xe->eudebug.enable) {
+	if (enable == xe_eudebug_is_enabled(xe)) {
 		up_write(&xe->eudebug.discovery_lock);
 		return 0;
 	}
@@ -2403,7 +2446,7 @@ static int xe_eudebug_enable(struct xe_device *xe, bool enable)
 		flush_work(&gt->reset.worker);
 	}
 
-	xe->eudebug.enable = enable;
+	xe->eudebug.state = enable ? XE_EUDEBUG_ENABLED : XE_EUDEBUG_SUPPORTED;
 	up_write(&xe->eudebug.discovery_lock);
 
 	if (enable)
@@ -2418,7 +2461,7 @@ static ssize_t enable_eudebug_show(struct device *dev, struct device_attribute *
 {
 	struct xe_device *xe = pdev_to_xe_device(to_pci_dev(dev));
 
-	return sysfs_emit(buf, "%u\n", xe->eudebug.enable);
+	return sysfs_emit(buf, "%u\n", xe->eudebug.state == XE_EUDEBUG_ENABLED);
 }
 
 static ssize_t enable_eudebug_store(struct device *dev, struct device_attribute *attr,
@@ -2461,17 +2504,27 @@ void xe_eudebug_init(struct xe_device *xe)
 	init_rwsem(&xe->eudebug.discovery_lock);
 	INIT_DELAYED_WORK(&xe->eudebug.attention_scan, attention_scan_fn);
 
+	if (IS_SRIOV_VF(xe)) {
+		drm_info(&xe->drm, "eudebug not available in SR-IOV VF mode\n");
+		xe->eudebug.state = XE_EUDEBUG_NOT_AVAILABLE;
+		return;
+	}
+
 	xe->eudebug.ordered_wq = alloc_ordered_workqueue("xe-eudebug-ordered-wq", 0);
 	if (!xe->eudebug.ordered_wq) {
 		drm_warn(&xe->drm, "eudebug ordered workqueue alloc failed\n");
+		xe->eudebug.state = XE_EUDEBUG_NOT_AVAILABLE;
 		return;
 	}
 
 	ret = sysfs_create_file(&xe->drm.dev->kobj, &dev_attr_enable_eudebug.attr);
-	if (ret)
+	if (ret) {
 		drm_warn(&xe->drm, "eudebug sysfs init failed: %d, debugger unavailable\n", ret);
-	else
+		xe->eudebug.state = XE_EUDEBUG_NOT_AVAILABLE;
+	} else {
 		devm_add_action_or_reset(dev, xe_eudebug_sysfs_fini, xe);
+		xe->eudebug.state = XE_EUDEBUG_SUPPORTED;
+	}
 }
 
 void xe_eudebug_fini(struct xe_device *xe)
diff --git a/drivers/gpu/drm/xe/xe_eudebug.h b/drivers/gpu/drm/xe/xe_eudebug.h
index 8fadabb336bf9..2aa3bff0eafc8 100644
--- a/drivers/gpu/drm/xe/xe_eudebug.h
+++ b/drivers/gpu/drm/xe/xe_eudebug.h
@@ -28,6 +28,10 @@ int xe_eudebug_connect_ioctl(struct drm_device *dev,
 			     void *data,
 			     struct drm_file *file);
 
+void xe_eudebug_support_enable(struct xe_device *xe);
+int xe_eudebug_support_disable(struct xe_device *xe);
+bool xe_eudebug_is_enabled(struct xe_device *xe);
+
 void xe_eudebug_init(struct xe_device *xe);
 void xe_eudebug_fini(struct xe_device *xe);
 
@@ -69,6 +73,10 @@ static inline int xe_eudebug_connect_ioctl(struct drm_device *dev,
 					   void *data,
 					   struct drm_file *file) { return 0; }
 
+static inline void xe_eudebug_support_enable(struct xe_device *xe) { }
+static inline int xe_eudebug_support_disable(struct xe_device *xe) { return 0; }
+static inline bool xe_eudebug_is_enabled(struct xe_device *xe) { return false; }
+
 static inline void xe_eudebug_init(struct xe_device *xe) { }
 static inline void xe_eudebug_fini(struct xe_device *xe) { }
 
diff --git a/drivers/gpu/drm/xe/xe_eudebug_types.h b/drivers/gpu/drm/xe/xe_eudebug_types.h
index bf250735c37e8..2c1b9e287e697 100644
--- a/drivers/gpu/drm/xe/xe_eudebug_types.h
+++ b/drivers/gpu/drm/xe/xe_eudebug_types.h
@@ -176,6 +176,21 @@ struct xe_eudebug {
 	struct dma_fence __rcu *pf_fence;
 };
 
+/**
+ * enum xe_eudebug_state - eudebug capability state
+ *
+ * @XE_EUDEBUG_NOT_AVAILABLE: eudebug feature not available
+ * @XE_EUDEBUG_NOT_SUPPORTED: eudebug feature support off
+ * @XE_EUDEBUG_SUPPORTED: eudebug feature supported but disabled
+ * @XE_EUDEBUG_ENABLED: eudebug enabled
+ */
+enum xe_eudebug_state {
+	XE_EUDEBUG_NOT_AVAILABLE = 1,
+	XE_EUDEBUG_NOT_SUPPORTED,
+	XE_EUDEBUG_SUPPORTED,
+	XE_EUDEBUG_ENABLED,
+};
+
 /**
  * struct xe_eudebug_event - Internal base event struct for eudebug
  */
diff --git a/drivers/gpu/drm/xe/xe_exec_queue.c b/drivers/gpu/drm/xe/xe_exec_queue.c
index 16736f2b4c399..46baad9ad8a83 100644
--- a/drivers/gpu/drm/xe/xe_exec_queue.c
+++ b/drivers/gpu/drm/xe/xe_exec_queue.c
@@ -489,7 +489,7 @@ static int exec_queue_set_eudebug(struct xe_device *xe, struct xe_exec_queue *q,
 		return -EINVAL;
 
 #if IS_ENABLED(CONFIG_DRM_XE_EUDEBUG)
-	if (XE_IOCTL_DBG(xe, !xe->eudebug.enable))
+	if (XE_IOCTL_DBG(xe, xe->eudebug.state != XE_EUDEBUG_ENABLED))
 		return -EPERM;
 #endif
 
diff --git a/drivers/gpu/drm/xe/xe_gt.c b/drivers/gpu/drm/xe/xe_gt.c
index 52b150da5abe6..b8dbc92cc7108 100644
--- a/drivers/gpu/drm/xe/xe_gt.c
+++ b/drivers/gpu/drm/xe/xe_gt.c
@@ -20,6 +20,7 @@
 #include "xe_bb.h"
 #include "xe_bo.h"
 #include "xe_device.h"
+#include "xe_eudebug.h"
 #include "xe_exec_queue.h"
 #include "xe_execlist.h"
 #include "xe_force_wake.h"
@@ -707,7 +708,7 @@ static int do_gt_reset(struct xe_gt *gt)
 
 	xe_gsc_wa_14015076503(gt, true);
 
-	if (xe->eudebug.enable)
+	if (xe_eudebug_is_enabled(xe))
 		do_render_reset(gt);
 
 	xe_mmio_write32(&gt->mmio, GDRST, GRDOM_FULL);
diff --git a/drivers/gpu/drm/xe/xe_pci_sriov.c b/drivers/gpu/drm/xe/xe_pci_sriov.c
index aaceee748287e..13f56851c048c 100644
--- a/drivers/gpu/drm/xe/xe_pci_sriov.c
+++ b/drivers/gpu/drm/xe/xe_pci_sriov.c
@@ -5,6 +5,7 @@
 
 #include "xe_assert.h"
 #include "xe_device.h"
+#include "xe_eudebug.h"
 #include "xe_gt_sriov_pf_config.h"
 #include "xe_gt_sriov_pf_control.h"
 #include "xe_pci_sriov.h"
@@ -73,6 +74,10 @@ static int pf_enable_vfs(struct xe_device *xe, int num_vfs)
 	xe_assert(xe, num_vfs <= total_vfs);
 	xe_sriov_dbg(xe, "enabling %u VF%s\n", num_vfs, str_plural(num_vfs));
 
+	err = xe_eudebug_support_disable(xe);
+	if (err < 0)
+		goto failed_eudebug;
+
 	/*
 	 * We must hold additional reference to the runtime PM to keep PF in D0
 	 * during VFs lifetime, as our VFs do not implement the PM capability.
@@ -99,7 +104,9 @@ static int pf_enable_vfs(struct xe_device *xe, int num_vfs)
 failed:
 	pf_unprovision_vfs(xe, num_vfs);
 	xe_pm_runtime_put(xe);
+	xe_eudebug_support_enable(xe);
 
+failed_eudebug:
 	xe_sriov_notice(xe, "Failed to enable %u VF%s (%pe)\n",
 			num_vfs, str_plural(num_vfs), ERR_PTR(err));
 	return err;
@@ -126,6 +133,8 @@ static int pf_disable_vfs(struct xe_device *xe)
 	/* not needed anymore - see pf_enable_vfs() */
 	xe_pm_runtime_put(xe);
 
+	xe_eudebug_support_enable(xe);
+
 	xe_sriov_info(xe, "Disabled %u VF%s\n", num_vfs, str_plural(num_vfs));
 	return 0;
 }
-- 
GitLab