Compare revisions

Christian König · FengWei · Harish Kasiviswanathan · Alex Deucher · Alex Deucher · Candice Li
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
@@ -227,6 +227,24 @@ static ssize_t amdgpu_device_get_pcie_replay_count(struct device *dev,
 static DEVICE_ATTR(pcie_replay_count, 0444,
 		amdgpu_device_get_pcie_replay_count, NULL);

+static int amdgpu_device_attr_sysfs_init(struct amdgpu_device *adev)
+{
+	int ret = 0;
+
+	if (!amdgpu_sriov_vf(adev))
+		ret = sysfs_create_file(&adev->dev->kobj,
+					&dev_attr_pcie_replay_count.attr);
+
+	return ret;
+}
+
+static void amdgpu_device_attr_sysfs_fini(struct amdgpu_device *adev)
+{
+	if (!amdgpu_sriov_vf(adev))
+		sysfs_remove_file(&adev->dev->kobj,
+				  &dev_attr_pcie_replay_count.attr);
+}
+
 static ssize_t amdgpu_sysfs_reg_state_get(struct file *f, struct kobject *kobj,
 					  struct bin_attribute *attr, char *buf,
 					  loff_t ppos, size_t count)
@@ -4166,11 +4184,6 @@ static bool amdgpu_device_check_iommu_remap(struct amdgpu_device *adev)
 }
 #endif

-static const struct attribute *amdgpu_dev_attributes[] = {
-	&dev_attr_pcie_replay_count.attr,
-	NULL
-};
-
 static void amdgpu_device_set_mcbp(struct amdgpu_device *adev)
 {
 	if (amdgpu_mcbp == 1)
@@ -4276,6 +4289,8 @@ int amdgpu_device_init(struct amdgpu_device *adev,
 	/* Initialize the mutex for cleaner shader isolation between GFX and compute processes */
 	mutex_init(&adev->enforce_isolation_mutex);
 	mutex_init(&adev->gfx.kfd_sch_mutex);
+	mutex_init(&adev->gfx.workload_profile_mutex);
+	mutex_init(&adev->vcn.workload_profile_mutex);

 	amdgpu_device_init_apu_flags(adev);

@@ -4616,7 +4631,7 @@ int amdgpu_device_init(struct amdgpu_device *adev,
 	} else
 		adev->ucode_sysfs_en = true;

-	r = sysfs_create_files(&adev->dev->kobj, amdgpu_dev_attributes);
+	r = amdgpu_device_attr_sysfs_init(adev);
 	if (r)
 		dev_err(adev->dev, "Could not create amdgpu device attr\n");

@@ -4753,7 +4768,7 @@ void amdgpu_device_fini_hw(struct amdgpu_device *adev)
 		amdgpu_pm_sysfs_fini(adev);
 	if (adev->ucode_sysfs_en)
 		amdgpu_ucode_sysfs_fini(adev);
-	sysfs_remove_files(&adev->dev->kobj, amdgpu_dev_attributes);
+	amdgpu_device_attr_sysfs_fini(adev);
 	amdgpu_fru_sysfs_fini(adev);

 	amdgpu_reg_state_sysfs_fini(adev);
@@ -6902,18 +6917,26 @@ struct dma_fence *amdgpu_device_switch_gang(struct amdgpu_device *adev,
 {
 	struct dma_fence *old = NULL;

+	dma_fence_get(gang);
 	do {
 		dma_fence_put(old);
 		old = amdgpu_device_get_gang(adev);
 		if (old == gang)
 			break;

-		if (!dma_fence_is_signaled(old))
+		if (!dma_fence_is_signaled(old)) {
+			dma_fence_put(gang);
 			return old;
+		}

 	} while (cmpxchg((struct dma_fence __force **)&adev->gang_submit,
 			 old, gang) != old);

+	/*
+	 * Drop it once for the exchanged reference in adev and once for the
+	 * thread local reference acquired in amdgpu_device_get_gang().
+	 */
+	dma_fence_put(old);
 	dma_fence_put(old);
 	return NULL;
 }

--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c
@@ -141,6 +141,7 @@ enum AMDGPU_DEBUG_MASK {
 	AMDGPU_DEBUG_ENABLE_RAS_ACA = BIT(4),
 	AMDGPU_DEBUG_ENABLE_EXP_RESETS = BIT(5),
 	AMDGPU_DEBUG_DISABLE_GPU_RING_RESET = BIT(6),
+	AMDGPU_DEBUG_SMU_POOL = BIT(7),
 };

 unsigned int amdgpu_vram_limit = UINT_MAX;
@@ -1063,6 +1064,11 @@ module_param_named(seamless, amdgpu_seamless, int, 0444);
 *   limits the VRAM size reported to ROCm applications to the visible
 *   size, usually 256MB.
 * - 0x4: Disable GPU soft recovery, always do a full reset
+ * - 0x8: Use VRAM for firmware loading
+ * - 0x10: Enable ACA based RAS logging
+ * - 0x20: Enable experimental resets
+ * - 0x40: Disable ring resets
+ * - 0x80: Use VRAM for SMU pool
 */
 MODULE_PARM_DESC(debug_mask, "debug options for amdgpu, disabled by default");
 module_param_named_unsafe(debug_mask, amdgpu_debug_mask, uint, 0444);
@@ -2240,6 +2246,10 @@ static void amdgpu_init_debug_options(struct amdgpu_device *adev)
 		pr_info("debug: ring reset disabled\n");
 		adev->debug_disable_gpu_ring_reset = true;
 	}
+	if (amdgpu_debug_mask & AMDGPU_DEBUG_SMU_POOL) {
+		pr_info("debug: use vram for smu pool\n");
+		adev->pm.smu_debug_mask |= SMU_DEBUG_POOL_USE_VRAM;
+	}
 }

 static unsigned long amdgpu_fix_asic_type(struct pci_dev *pdev, unsigned long flags)

--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.c
@@ -2160,11 +2160,16 @@ void amdgpu_gfx_profile_idle_work_handler(struct work_struct *work)
 	for (i = 0; i < (AMDGPU_MAX_COMPUTE_RINGS * AMDGPU_MAX_GC_INSTANCES); ++i)
 		fences += amdgpu_fence_count_emitted(&adev->gfx.compute_ring[i]);
 	if (!fences && !atomic_read(&adev->gfx.total_submission_cnt)) {
-		r = amdgpu_dpm_switch_power_profile(adev, profile, false);
-		if (r)
-			dev_warn(adev->dev, "(%d) failed to disable %s power profile mode\n", r,
-				 profile == PP_SMC_POWER_PROFILE_FULLSCREEN3D ?
-				 "fullscreen 3D" : "compute");
+		mutex_lock(&adev->gfx.workload_profile_mutex);
+		if (adev->gfx.workload_profile_active) {
+			r = amdgpu_dpm_switch_power_profile(adev, profile, false);
+			if (r)
+				dev_warn(adev->dev, "(%d) failed to disable %s power profile mode\n", r,
+					 profile == PP_SMC_POWER_PROFILE_FULLSCREEN3D ?
+					 "fullscreen 3D" : "compute");
+			adev->gfx.workload_profile_active = false;
+		}
+		mutex_unlock(&adev->gfx.workload_profile_mutex);
 	} else {
 		schedule_delayed_work(&adev->gfx.idle_work, GFX_PROFILE_IDLE_TIMEOUT);
 	}
@@ -2183,13 +2188,25 @@ void amdgpu_gfx_profile_ring_begin_use(struct amdgpu_ring *ring)

 	atomic_inc(&adev->gfx.total_submission_cnt);

-	if (!cancel_delayed_work_sync(&adev->gfx.idle_work)) {
+	cancel_delayed_work_sync(&adev->gfx.idle_work);
+
+	/* We can safely return early here because we've cancelled the
+	 * the delayed work so there is no one else to set it to false
+	 * and we don't care if someone else sets it to true.
+	 */
+	if (adev->gfx.workload_profile_active)
+		return;
+
+	mutex_lock(&adev->gfx.workload_profile_mutex);
+	if (!adev->gfx.workload_profile_active) {
 		r = amdgpu_dpm_switch_power_profile(adev, profile, true);
 		if (r)
 			dev_warn(adev->dev, "(%d) failed to disable %s power profile mode\n", r,
 				 profile == PP_SMC_POWER_PROFILE_FULLSCREEN3D ?
 				 "fullscreen 3D" : "compute");
+		adev->gfx.workload_profile_active = true;
 	}
+	mutex_unlock(&adev->gfx.workload_profile_mutex);
 }

 void amdgpu_gfx_profile_ring_end_use(struct amdgpu_ring *ring)

--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.h
@@ -483,6 +483,8 @@ struct amdgpu_gfx {

 	atomic_t			total_submission_cnt;
 	struct delayed_work		idle_work;
+	bool				workload_profile_active;
+	struct mutex                    workload_profile_mutex;
 };

 struct amdgpu_gfx_ras_reg_entry {

--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_psp.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_psp.c
@@ -1861,6 +1861,7 @@ int psp_ras_initialize(struct psp_context *psp)
 	if (adev->gmc.gmc_funcs->query_mem_partition_mode)
 		ras_cmd->ras_in_message.init_flags.nps_mode =
 			adev->gmc.gmc_funcs->query_mem_partition_mode(adev);
+	ras_cmd->ras_in_message.init_flags.active_umc_mask = adev->umc.active_mask;

 	ret = psp_ta_load(psp, &psp->ras_context.context);


--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ras_eeprom.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ras_eeprom.c
@@ -161,6 +161,7 @@ static bool __is_ras_eeprom_supported(struct amdgpu_device *adev)
 	case IP_VERSION(13, 0, 10):
 		return true;
 	case IP_VERSION(13, 0, 6):
+	case IP_VERSION(13, 0, 12):
 	case IP_VERSION(13, 0, 14):
 		return (adev->gmc.is_app_apu) ? false : true;
 	default:
@@ -223,6 +224,7 @@ static bool __get_eeprom_i2c_addr(struct amdgpu_device *adev,
 		return true;
 	case IP_VERSION(13, 0, 6):
 	case IP_VERSION(13, 0, 10):
+	case IP_VERSION(13, 0, 12):
 	case IP_VERSION(13, 0, 14):
 		control->i2c_address = EEPROM_I2C_MADDR_4;
 		return true;

--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vcn.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vcn.c
@@ -438,10 +438,15 @@ static void amdgpu_vcn_idle_work_handler(struct work_struct *work)

 	if (!fences && !atomic_read(&vcn_inst->total_submission_cnt)) {
 		vcn_inst->set_pg_state(vcn_inst, AMD_PG_STATE_GATE);
-		r = amdgpu_dpm_switch_power_profile(adev, PP_SMC_POWER_PROFILE_VIDEO,
-						    false);
-		if (r)
-			dev_warn(adev->dev, "(%d) failed to disable video power profile mode\n", r);
+		mutex_lock(&adev->vcn.workload_profile_mutex);
+		if (adev->vcn.workload_profile_active) {
+			r = amdgpu_dpm_switch_power_profile(adev, PP_SMC_POWER_PROFILE_VIDEO,
+							    false);
+			if (r)
+				dev_warn(adev->dev, "(%d) failed to disable video power profile mode\n", r);
+			adev->vcn.workload_profile_active = false;
+		}
+		mutex_unlock(&adev->vcn.workload_profile_mutex);
 	} else {
 		schedule_delayed_work(&vcn_inst->idle_work, VCN_IDLE_TIMEOUT);
 	}
@@ -455,13 +460,26 @@ void amdgpu_vcn_ring_begin_use(struct amdgpu_ring *ring)

 	atomic_inc(&vcn_inst->total_submission_cnt);

-	if (!cancel_delayed_work_sync(&vcn_inst->idle_work)) {
+	cancel_delayed_work_sync(&vcn_inst->idle_work);
+
+	/* We can safely return early here because we've cancelled the
+	 * the delayed work so there is no one else to set it to false
+	 * and we don't care if someone else sets it to true.
+	 */
+	if (adev->vcn.workload_profile_active)
+		goto pg_lock;
+
+	mutex_lock(&adev->vcn.workload_profile_mutex);
+	if (!adev->vcn.workload_profile_active) {
 		r = amdgpu_dpm_switch_power_profile(adev, PP_SMC_POWER_PROFILE_VIDEO,
-				true);
+						    true);
 		if (r)
 			dev_warn(adev->dev, "(%d) failed to switch to video power profile mode\n", r);
+		adev->vcn.workload_profile_active = true;
 	}
+	mutex_unlock(&adev->vcn.workload_profile_mutex);

+pg_lock:
 	mutex_lock(&vcn_inst->vcn_pg_lock);
 	vcn_inst->set_pg_state(vcn_inst, AMD_PG_STATE_UNGATE);


--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vcn.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vcn.h
@@ -358,6 +358,9 @@ struct amdgpu_vcn {

 	bool			per_inst_fw;
 	unsigned		fw_version;
+
+	bool			workload_profile_active;
+	struct mutex            workload_profile_mutex;
 };

 struct amdgpu_fw_shared_rb_ptrs_struct {

--- a/drivers/gpu/drm/amd/amdgpu/ta_ras_if.h
+++ b/drivers/gpu/drm/amd/amdgpu/ta_ras_if.h
@@ -151,6 +151,7 @@ struct ta_ras_init_flags {
 	uint16_t xcc_mask;
 	uint8_t channel_dis_num;
 	uint8_t nps_mode;
+	uint32_t active_umc_mask;
 };

 struct ta_ras_mca_addr {

--- a/drivers/gpu/drm/amd/amdgpu/vcn_v2_5.c
+++ b/drivers/gpu/drm/amd/amdgpu/vcn_v2_5.c
@@ -147,10 +147,15 @@ static void vcn_v2_5_idle_work_handler(struct work_struct *work)
 	if (!fences && !atomic_read(&adev->vcn.inst[0].total_submission_cnt)) {
 		amdgpu_device_ip_set_powergating_state(adev, AMD_IP_BLOCK_TYPE_VCN,
 						       AMD_PG_STATE_GATE);
-		r = amdgpu_dpm_switch_power_profile(adev, PP_SMC_POWER_PROFILE_VIDEO,
-						    false);
-		if (r)
-			dev_warn(adev->dev, "(%d) failed to disable video power profile mode\n", r);
+		mutex_lock(&adev->vcn.workload_profile_mutex);
+		if (adev->vcn.workload_profile_active) {
+			r = amdgpu_dpm_switch_power_profile(adev, PP_SMC_POWER_PROFILE_VIDEO,
+							    false);
+			if (r)
+				dev_warn(adev->dev, "(%d) failed to disable video power profile mode\n", r);
+			adev->vcn.workload_profile_active = false;
+		}
+		mutex_unlock(&adev->vcn.workload_profile_mutex);
 	} else {
 		schedule_delayed_work(&adev->vcn.inst[0].idle_work, VCN_IDLE_TIMEOUT);
 	}
@@ -164,13 +169,26 @@ static void vcn_v2_5_ring_begin_use(struct amdgpu_ring *ring)

 	atomic_inc(&adev->vcn.inst[0].total_submission_cnt);

-	if (!cancel_delayed_work_sync(&adev->vcn.inst[0].idle_work)) {
+	cancel_delayed_work_sync(&adev->vcn.inst[0].idle_work);
+
+	/* We can safely return early here because we've cancelled the
+	 * the delayed work so there is no one else to set it to false
+	 * and we don't care if someone else sets it to true.
+	 */
+	if (adev->vcn.workload_profile_active)
+		goto pg_lock;
+
+	mutex_lock(&adev->vcn.workload_profile_mutex);
+	if (!adev->vcn.workload_profile_active) {
 		r = amdgpu_dpm_switch_power_profile(adev, PP_SMC_POWER_PROFILE_VIDEO,
 						    true);
 		if (r)
 			dev_warn(adev->dev, "(%d) failed to switch to video power profile mode\n", r);
+		adev->vcn.workload_profile_active = true;
 	}
+	mutex_unlock(&adev->vcn.workload_profile_mutex);

+pg_lock:
 	mutex_lock(&adev->vcn.inst[0].vcn_pg_lock);
 	amdgpu_device_ip_set_powergating_state(adev, AMD_IP_BLOCK_TYPE_VCN,
 					       AMD_PG_STATE_UNGATE);

--- a/drivers/gpu/drm/amd/amdkfd/kfd_packet_manager.c
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_packet_manager.c
@@ -418,6 +418,10 @@ int pm_config_dequeue_wait_counts(struct packet_manager *pm,
 	    !pm->pmf->config_dequeue_wait_counts_size)
 		return 0;

+	if (cmd == KFD_DEQUEUE_WAIT_INIT && (KFD_GC_VERSION(pm->dqm->dev) < IP_VERSION(9, 4, 1) ||
+	   KFD_GC_VERSION(pm->dqm->dev) >= IP_VERSION(10, 0, 0)))
+		return 0;
+
 	size = pm->pmf->config_dequeue_wait_counts_size;

 	mutex_lock(&pm->lock);
@@ -436,16 +440,16 @@ int pm_config_dequeue_wait_counts(struct packet_manager *pm,

 		retval = pm->pmf->config_dequeue_wait_counts(pm, buffer,
 							     cmd, value);
-		if (!retval)
+		if (!retval) {
 			retval = kq_submit_packet(pm->priv_queue);
-		else
+
+			/* If default value is modified, cache that in dqm->wait_times */
+			if (!retval && cmd == KFD_DEQUEUE_WAIT_INIT)
+				update_dqm_wait_times(pm->dqm);
+		} else {
 			kq_rollback_packet(pm->priv_queue);
+		}
 	}
-
-	/* If default value is modified, cache that value in dqm->wait_times */
-	if (!retval && cmd == KFD_DEQUEUE_WAIT_INIT)
-		update_dqm_wait_times(pm->dqm);
-
 out:
 	mutex_unlock(&pm->lock);
 	return retval;

--- a/drivers/gpu/drm/amd/amdkfd/kfd_packet_manager_v9.c
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_packet_manager_v9.c
@@ -310,6 +310,13 @@ static inline void pm_build_dequeue_wait_counts_packet_info(struct packet_manage
 		reg_data);
 }

+/* pm_config_dequeue_wait_counts_v9: Builds WRITE_DATA packet with
+ *    register/value for configuring dequeue wait counts
+ *
+ * @return: -ve for failure and 0 for success and buffer is
+ *  filled in with packet
+ *
+ **/
 static int pm_config_dequeue_wait_counts_v9(struct packet_manager *pm,
 		uint32_t *buffer,
 		enum kfd_config_dequeue_wait_counts_cmd cmd,
@@ -321,24 +328,25 @@ static int pm_config_dequeue_wait_counts_v9(struct packet_manager *pm,

 	switch (cmd) {
 	case KFD_DEQUEUE_WAIT_INIT: {
-		uint32_t sch_wave = 0, que_sleep = 0;
-		/* Reduce CP_IQ_WAIT_TIME2.QUE_SLEEP to 0x1 from default 0x40.
+		uint32_t sch_wave = 0, que_sleep = 1;
+
+		/* For all gfx9 ASICs > gfx941,
+		 * Reduce CP_IQ_WAIT_TIME2.QUE_SLEEP to 0x1 from default 0x40.
 		 * On a 1GHz machine this is roughly 1 microsecond, which is
 		 * about how long it takes to load data out of memory during
 		 * queue connect
 		 * QUE_SLEEP: Wait Count for Dequeue Retry.
+		 *
+		 * Set CWSR grace period to 1x1000 cycle for GFX9.4.3 APU
 		 */
-		if (KFD_GC_VERSION(pm->dqm->dev) >= IP_VERSION(9, 4, 1) &&
-		    KFD_GC_VERSION(pm->dqm->dev) < IP_VERSION(10, 0, 0)) {
-			que_sleep = 1;
-
-			/* Set CWSR grace period to 1x1000 cycle for GFX9.4.3 APU */
-			if (amdgpu_emu_mode == 0 && pm->dqm->dev->adev->gmc.is_app_apu &&
-			(KFD_GC_VERSION(pm->dqm->dev) == IP_VERSION(9, 4, 3)))
-				sch_wave = 1;
-		} else {
-			return 0;
-		}
+		if (KFD_GC_VERSION(pm->dqm->dev) < IP_VERSION(9, 4, 1) ||
+		    KFD_GC_VERSION(pm->dqm->dev) >= IP_VERSION(10, 0, 0))
+			return -EPERM;
+
+		if (amdgpu_emu_mode == 0 && pm->dqm->dev->adev->gmc.is_app_apu &&
+		    (KFD_GC_VERSION(pm->dqm->dev) == IP_VERSION(9, 4, 3)))
+			sch_wave = 1;
+
 		pm_build_dequeue_wait_counts_packet_info(pm, sch_wave, que_sleep,
 			&reg_offset, &reg_data);


--- a/drivers/gpu/drm/amd/pm/inc/amdgpu_dpm.h
+++ b/drivers/gpu/drm/amd/pm/inc/amdgpu_dpm.h
@@ -295,7 +295,8 @@ enum ip_power_state {
 };

 /* Used to mask smu debug modes */
-#define SMU_DEBUG_HALT_ON_ERROR		0x1
+#define SMU_DEBUG_HALT_ON_ERROR		BIT(0)
+#define SMU_DEBUG_POOL_USE_VRAM		BIT(1)

 #define MAX_SMU_I2C_BUSES       2


--- a/drivers/gpu/drm/amd/pm/swsmu/amdgpu_smu.c
+++ b/drivers/gpu/drm/amd/pm/swsmu/amdgpu_smu.c
@@ -1027,7 +1027,10 @@ static int smu_alloc_memory_pool(struct smu_context *smu)

 	memory_pool->size = pool_size;
 	memory_pool->align = PAGE_SIZE;
-	memory_pool->domain = AMDGPU_GEM_DOMAIN_GTT;
+	memory_pool->domain =
+		(adev->pm.smu_debug_mask & SMU_DEBUG_POOL_USE_VRAM) ?
+			AMDGPU_GEM_DOMAIN_VRAM :
+			AMDGPU_GEM_DOMAIN_GTT;

 	switch (pool_size) {
 	case SMU_MEMORY_POOL_SIZE_256_MB:

--- a/drivers/gpu/drm/radeon/radeon_uvd.c
+++ b/drivers/gpu/drm/radeon/radeon_uvd.c
@@ -961,7 +961,7 @@ int radeon_uvd_calc_upll_dividers(struct radeon_device *rdev,
 	unsigned optimal_score = ~0;

 	/* loop through vco from low to high */
-	vco_min = max(max(vco_min, vclk), dclk);
+	vco_min = max3(vco_min, vclk, dclk);
 	for (vco_freq = vco_min; vco_freq <= vco_max; vco_freq += 100) {

 		uint64_t fb_div = (uint64_t)vco_freq * fb_factor;
No results found