Skip to content
Snippets Groups Projects

Compare revisions

Changes are shown as if the source revision was being merged into the target revision. Learn more about comparing revisions.

Source

Select target project
No results found

Target

Select target project
  • agd5f/linux
  • ltuikov/linux
  • FireBurn/linux
  • u6/linux
  • nikitalita/linux
  • siqueira/linux
  • magali/linux
  • amos/linux-display
  • isinyaaa/linux
  • somalapuram/linux
  • nirmoy/linux
  • hwentland/linux
  • hakzsam/linux
  • siqueira/linux-kunit
  • xushuotao/linux
  • lixian/linux
  • asheplyakov/linux
  • ap6711451/linux
  • alonsopascacioflores/linux
  • pyuan/linux
  • pepp/linux
  • alex.hung/linux
  • ckborah/linux-colorpipeline
  • lucmann/linux
  • MSkeffington/amdgfx-linux-fork
25 results
Show changes
Commits on Source (12)
Showing
with 159 additions and 48 deletions
......@@ -227,6 +227,24 @@ static ssize_t amdgpu_device_get_pcie_replay_count(struct device *dev,
static DEVICE_ATTR(pcie_replay_count, 0444,
amdgpu_device_get_pcie_replay_count, NULL);
static int amdgpu_device_attr_sysfs_init(struct amdgpu_device *adev)
{
int ret = 0;
if (!amdgpu_sriov_vf(adev))
ret = sysfs_create_file(&adev->dev->kobj,
&dev_attr_pcie_replay_count.attr);
return ret;
}
static void amdgpu_device_attr_sysfs_fini(struct amdgpu_device *adev)
{
if (!amdgpu_sriov_vf(adev))
sysfs_remove_file(&adev->dev->kobj,
&dev_attr_pcie_replay_count.attr);
}
static ssize_t amdgpu_sysfs_reg_state_get(struct file *f, struct kobject *kobj,
struct bin_attribute *attr, char *buf,
loff_t ppos, size_t count)
......@@ -4166,11 +4184,6 @@ static bool amdgpu_device_check_iommu_remap(struct amdgpu_device *adev)
}
#endif
static const struct attribute *amdgpu_dev_attributes[] = {
&dev_attr_pcie_replay_count.attr,
NULL
};
static void amdgpu_device_set_mcbp(struct amdgpu_device *adev)
{
if (amdgpu_mcbp == 1)
......@@ -4276,6 +4289,8 @@ int amdgpu_device_init(struct amdgpu_device *adev,
/* Initialize the mutex for cleaner shader isolation between GFX and compute processes */
mutex_init(&adev->enforce_isolation_mutex);
mutex_init(&adev->gfx.kfd_sch_mutex);
mutex_init(&adev->gfx.workload_profile_mutex);
mutex_init(&adev->vcn.workload_profile_mutex);
amdgpu_device_init_apu_flags(adev);
......@@ -4616,7 +4631,7 @@ int amdgpu_device_init(struct amdgpu_device *adev,
} else
adev->ucode_sysfs_en = true;
r = sysfs_create_files(&adev->dev->kobj, amdgpu_dev_attributes);
r = amdgpu_device_attr_sysfs_init(adev);
if (r)
dev_err(adev->dev, "Could not create amdgpu device attr\n");
......@@ -4753,7 +4768,7 @@ void amdgpu_device_fini_hw(struct amdgpu_device *adev)
amdgpu_pm_sysfs_fini(adev);
if (adev->ucode_sysfs_en)
amdgpu_ucode_sysfs_fini(adev);
sysfs_remove_files(&adev->dev->kobj, amdgpu_dev_attributes);
amdgpu_device_attr_sysfs_fini(adev);
amdgpu_fru_sysfs_fini(adev);
amdgpu_reg_state_sysfs_fini(adev);
......@@ -6902,18 +6917,26 @@ struct dma_fence *amdgpu_device_switch_gang(struct amdgpu_device *adev,
{
struct dma_fence *old = NULL;
dma_fence_get(gang);
do {
dma_fence_put(old);
old = amdgpu_device_get_gang(adev);
if (old == gang)
break;
if (!dma_fence_is_signaled(old))
if (!dma_fence_is_signaled(old)) {
dma_fence_put(gang);
return old;
}
} while (cmpxchg((struct dma_fence __force **)&adev->gang_submit,
old, gang) != old);
/*
* Drop it once for the exchanged reference in adev and once for the
* thread local reference acquired in amdgpu_device_get_gang().
*/
dma_fence_put(old);
dma_fence_put(old);
return NULL;
}
......
......@@ -141,6 +141,7 @@ enum AMDGPU_DEBUG_MASK {
AMDGPU_DEBUG_ENABLE_RAS_ACA = BIT(4),
AMDGPU_DEBUG_ENABLE_EXP_RESETS = BIT(5),
AMDGPU_DEBUG_DISABLE_GPU_RING_RESET = BIT(6),
AMDGPU_DEBUG_SMU_POOL = BIT(7),
};
unsigned int amdgpu_vram_limit = UINT_MAX;
......@@ -1063,6 +1064,11 @@ module_param_named(seamless, amdgpu_seamless, int, 0444);
* limits the VRAM size reported to ROCm applications to the visible
* size, usually 256MB.
* - 0x4: Disable GPU soft recovery, always do a full reset
* - 0x8: Use VRAM for firmware loading
* - 0x10: Enable ACA based RAS logging
* - 0x20: Enable experimental resets
* - 0x40: Disable ring resets
* - 0x80: Use VRAM for SMU pool
*/
MODULE_PARM_DESC(debug_mask, "debug options for amdgpu, disabled by default");
module_param_named_unsafe(debug_mask, amdgpu_debug_mask, uint, 0444);
......@@ -2240,6 +2246,10 @@ static void amdgpu_init_debug_options(struct amdgpu_device *adev)
pr_info("debug: ring reset disabled\n");
adev->debug_disable_gpu_ring_reset = true;
}
if (amdgpu_debug_mask & AMDGPU_DEBUG_SMU_POOL) {
pr_info("debug: use vram for smu pool\n");
adev->pm.smu_debug_mask |= SMU_DEBUG_POOL_USE_VRAM;
}
}
static unsigned long amdgpu_fix_asic_type(struct pci_dev *pdev, unsigned long flags)
......
......@@ -2160,11 +2160,16 @@ void amdgpu_gfx_profile_idle_work_handler(struct work_struct *work)
for (i = 0; i < (AMDGPU_MAX_COMPUTE_RINGS * AMDGPU_MAX_GC_INSTANCES); ++i)
fences += amdgpu_fence_count_emitted(&adev->gfx.compute_ring[i]);
if (!fences && !atomic_read(&adev->gfx.total_submission_cnt)) {
r = amdgpu_dpm_switch_power_profile(adev, profile, false);
if (r)
dev_warn(adev->dev, "(%d) failed to disable %s power profile mode\n", r,
profile == PP_SMC_POWER_PROFILE_FULLSCREEN3D ?
"fullscreen 3D" : "compute");
mutex_lock(&adev->gfx.workload_profile_mutex);
if (adev->gfx.workload_profile_active) {
r = amdgpu_dpm_switch_power_profile(adev, profile, false);
if (r)
dev_warn(adev->dev, "(%d) failed to disable %s power profile mode\n", r,
profile == PP_SMC_POWER_PROFILE_FULLSCREEN3D ?
"fullscreen 3D" : "compute");
adev->gfx.workload_profile_active = false;
}
mutex_unlock(&adev->gfx.workload_profile_mutex);
} else {
schedule_delayed_work(&adev->gfx.idle_work, GFX_PROFILE_IDLE_TIMEOUT);
}
......@@ -2183,13 +2188,25 @@ void amdgpu_gfx_profile_ring_begin_use(struct amdgpu_ring *ring)
atomic_inc(&adev->gfx.total_submission_cnt);
if (!cancel_delayed_work_sync(&adev->gfx.idle_work)) {
cancel_delayed_work_sync(&adev->gfx.idle_work);
/* We can safely return early here because we've cancelled the
* the delayed work so there is no one else to set it to false
* and we don't care if someone else sets it to true.
*/
if (adev->gfx.workload_profile_active)
return;
mutex_lock(&adev->gfx.workload_profile_mutex);
if (!adev->gfx.workload_profile_active) {
r = amdgpu_dpm_switch_power_profile(adev, profile, true);
if (r)
dev_warn(adev->dev, "(%d) failed to disable %s power profile mode\n", r,
profile == PP_SMC_POWER_PROFILE_FULLSCREEN3D ?
"fullscreen 3D" : "compute");
adev->gfx.workload_profile_active = true;
}
mutex_unlock(&adev->gfx.workload_profile_mutex);
}
void amdgpu_gfx_profile_ring_end_use(struct amdgpu_ring *ring)
......
......@@ -483,6 +483,8 @@ struct amdgpu_gfx {
atomic_t total_submission_cnt;
struct delayed_work idle_work;
bool workload_profile_active;
struct mutex workload_profile_mutex;
};
struct amdgpu_gfx_ras_reg_entry {
......
......@@ -1861,6 +1861,7 @@ int psp_ras_initialize(struct psp_context *psp)
if (adev->gmc.gmc_funcs->query_mem_partition_mode)
ras_cmd->ras_in_message.init_flags.nps_mode =
adev->gmc.gmc_funcs->query_mem_partition_mode(adev);
ras_cmd->ras_in_message.init_flags.active_umc_mask = adev->umc.active_mask;
ret = psp_ta_load(psp, &psp->ras_context.context);
......
......@@ -161,6 +161,7 @@ static bool __is_ras_eeprom_supported(struct amdgpu_device *adev)
case IP_VERSION(13, 0, 10):
return true;
case IP_VERSION(13, 0, 6):
case IP_VERSION(13, 0, 12):
case IP_VERSION(13, 0, 14):
return (adev->gmc.is_app_apu) ? false : true;
default:
......@@ -223,6 +224,7 @@ static bool __get_eeprom_i2c_addr(struct amdgpu_device *adev,
return true;
case IP_VERSION(13, 0, 6):
case IP_VERSION(13, 0, 10):
case IP_VERSION(13, 0, 12):
case IP_VERSION(13, 0, 14):
control->i2c_address = EEPROM_I2C_MADDR_4;
return true;
......
......@@ -438,10 +438,15 @@ static void amdgpu_vcn_idle_work_handler(struct work_struct *work)
if (!fences && !atomic_read(&vcn_inst->total_submission_cnt)) {
vcn_inst->set_pg_state(vcn_inst, AMD_PG_STATE_GATE);
r = amdgpu_dpm_switch_power_profile(adev, PP_SMC_POWER_PROFILE_VIDEO,
false);
if (r)
dev_warn(adev->dev, "(%d) failed to disable video power profile mode\n", r);
mutex_lock(&adev->vcn.workload_profile_mutex);
if (adev->vcn.workload_profile_active) {
r = amdgpu_dpm_switch_power_profile(adev, PP_SMC_POWER_PROFILE_VIDEO,
false);
if (r)
dev_warn(adev->dev, "(%d) failed to disable video power profile mode\n", r);
adev->vcn.workload_profile_active = false;
}
mutex_unlock(&adev->vcn.workload_profile_mutex);
} else {
schedule_delayed_work(&vcn_inst->idle_work, VCN_IDLE_TIMEOUT);
}
......@@ -455,13 +460,26 @@ void amdgpu_vcn_ring_begin_use(struct amdgpu_ring *ring)
atomic_inc(&vcn_inst->total_submission_cnt);
if (!cancel_delayed_work_sync(&vcn_inst->idle_work)) {
cancel_delayed_work_sync(&vcn_inst->idle_work);
/* We can safely return early here because we've cancelled the
* the delayed work so there is no one else to set it to false
* and we don't care if someone else sets it to true.
*/
if (adev->vcn.workload_profile_active)
goto pg_lock;
mutex_lock(&adev->vcn.workload_profile_mutex);
if (!adev->vcn.workload_profile_active) {
r = amdgpu_dpm_switch_power_profile(adev, PP_SMC_POWER_PROFILE_VIDEO,
true);
true);
if (r)
dev_warn(adev->dev, "(%d) failed to switch to video power profile mode\n", r);
adev->vcn.workload_profile_active = true;
}
mutex_unlock(&adev->vcn.workload_profile_mutex);
pg_lock:
mutex_lock(&vcn_inst->vcn_pg_lock);
vcn_inst->set_pg_state(vcn_inst, AMD_PG_STATE_UNGATE);
......
......@@ -358,6 +358,9 @@ struct amdgpu_vcn {
bool per_inst_fw;
unsigned fw_version;
bool workload_profile_active;
struct mutex workload_profile_mutex;
};
struct amdgpu_fw_shared_rb_ptrs_struct {
......
......@@ -151,6 +151,7 @@ struct ta_ras_init_flags {
uint16_t xcc_mask;
uint8_t channel_dis_num;
uint8_t nps_mode;
uint32_t active_umc_mask;
};
struct ta_ras_mca_addr {
......
......@@ -147,10 +147,15 @@ static void vcn_v2_5_idle_work_handler(struct work_struct *work)
if (!fences && !atomic_read(&adev->vcn.inst[0].total_submission_cnt)) {
amdgpu_device_ip_set_powergating_state(adev, AMD_IP_BLOCK_TYPE_VCN,
AMD_PG_STATE_GATE);
r = amdgpu_dpm_switch_power_profile(adev, PP_SMC_POWER_PROFILE_VIDEO,
false);
if (r)
dev_warn(adev->dev, "(%d) failed to disable video power profile mode\n", r);
mutex_lock(&adev->vcn.workload_profile_mutex);
if (adev->vcn.workload_profile_active) {
r = amdgpu_dpm_switch_power_profile(adev, PP_SMC_POWER_PROFILE_VIDEO,
false);
if (r)
dev_warn(adev->dev, "(%d) failed to disable video power profile mode\n", r);
adev->vcn.workload_profile_active = false;
}
mutex_unlock(&adev->vcn.workload_profile_mutex);
} else {
schedule_delayed_work(&adev->vcn.inst[0].idle_work, VCN_IDLE_TIMEOUT);
}
......@@ -164,13 +169,26 @@ static void vcn_v2_5_ring_begin_use(struct amdgpu_ring *ring)
atomic_inc(&adev->vcn.inst[0].total_submission_cnt);
if (!cancel_delayed_work_sync(&adev->vcn.inst[0].idle_work)) {
cancel_delayed_work_sync(&adev->vcn.inst[0].idle_work);
/* We can safely return early here because we've cancelled the
* the delayed work so there is no one else to set it to false
* and we don't care if someone else sets it to true.
*/
if (adev->vcn.workload_profile_active)
goto pg_lock;
mutex_lock(&adev->vcn.workload_profile_mutex);
if (!adev->vcn.workload_profile_active) {
r = amdgpu_dpm_switch_power_profile(adev, PP_SMC_POWER_PROFILE_VIDEO,
true);
if (r)
dev_warn(adev->dev, "(%d) failed to switch to video power profile mode\n", r);
adev->vcn.workload_profile_active = true;
}
mutex_unlock(&adev->vcn.workload_profile_mutex);
pg_lock:
mutex_lock(&adev->vcn.inst[0].vcn_pg_lock);
amdgpu_device_ip_set_powergating_state(adev, AMD_IP_BLOCK_TYPE_VCN,
AMD_PG_STATE_UNGATE);
......
......@@ -418,6 +418,10 @@ int pm_config_dequeue_wait_counts(struct packet_manager *pm,
!pm->pmf->config_dequeue_wait_counts_size)
return 0;
if (cmd == KFD_DEQUEUE_WAIT_INIT && (KFD_GC_VERSION(pm->dqm->dev) < IP_VERSION(9, 4, 1) ||
KFD_GC_VERSION(pm->dqm->dev) >= IP_VERSION(10, 0, 0)))
return 0;
size = pm->pmf->config_dequeue_wait_counts_size;
mutex_lock(&pm->lock);
......@@ -436,16 +440,16 @@ int pm_config_dequeue_wait_counts(struct packet_manager *pm,
retval = pm->pmf->config_dequeue_wait_counts(pm, buffer,
cmd, value);
if (!retval)
if (!retval) {
retval = kq_submit_packet(pm->priv_queue);
else
/* If default value is modified, cache that in dqm->wait_times */
if (!retval && cmd == KFD_DEQUEUE_WAIT_INIT)
update_dqm_wait_times(pm->dqm);
} else {
kq_rollback_packet(pm->priv_queue);
}
}
/* If default value is modified, cache that value in dqm->wait_times */
if (!retval && cmd == KFD_DEQUEUE_WAIT_INIT)
update_dqm_wait_times(pm->dqm);
out:
mutex_unlock(&pm->lock);
return retval;
......
......@@ -310,6 +310,13 @@ static inline void pm_build_dequeue_wait_counts_packet_info(struct packet_manage
reg_data);
}
/* pm_config_dequeue_wait_counts_v9: Builds WRITE_DATA packet with
* register/value for configuring dequeue wait counts
*
* @return: -ve for failure and 0 for success and buffer is
* filled in with packet
*
**/
static int pm_config_dequeue_wait_counts_v9(struct packet_manager *pm,
uint32_t *buffer,
enum kfd_config_dequeue_wait_counts_cmd cmd,
......@@ -321,24 +328,25 @@ static int pm_config_dequeue_wait_counts_v9(struct packet_manager *pm,
switch (cmd) {
case KFD_DEQUEUE_WAIT_INIT: {
uint32_t sch_wave = 0, que_sleep = 0;
/* Reduce CP_IQ_WAIT_TIME2.QUE_SLEEP to 0x1 from default 0x40.
uint32_t sch_wave = 0, que_sleep = 1;
/* For all gfx9 ASICs > gfx941,
* Reduce CP_IQ_WAIT_TIME2.QUE_SLEEP to 0x1 from default 0x40.
* On a 1GHz machine this is roughly 1 microsecond, which is
* about how long it takes to load data out of memory during
* queue connect
* QUE_SLEEP: Wait Count for Dequeue Retry.
*
* Set CWSR grace period to 1x1000 cycle for GFX9.4.3 APU
*/
if (KFD_GC_VERSION(pm->dqm->dev) >= IP_VERSION(9, 4, 1) &&
KFD_GC_VERSION(pm->dqm->dev) < IP_VERSION(10, 0, 0)) {
que_sleep = 1;
/* Set CWSR grace period to 1x1000 cycle for GFX9.4.3 APU */
if (amdgpu_emu_mode == 0 && pm->dqm->dev->adev->gmc.is_app_apu &&
(KFD_GC_VERSION(pm->dqm->dev) == IP_VERSION(9, 4, 3)))
sch_wave = 1;
} else {
return 0;
}
if (KFD_GC_VERSION(pm->dqm->dev) < IP_VERSION(9, 4, 1) ||
KFD_GC_VERSION(pm->dqm->dev) >= IP_VERSION(10, 0, 0))
return -EPERM;
if (amdgpu_emu_mode == 0 && pm->dqm->dev->adev->gmc.is_app_apu &&
(KFD_GC_VERSION(pm->dqm->dev) == IP_VERSION(9, 4, 3)))
sch_wave = 1;
pm_build_dequeue_wait_counts_packet_info(pm, sch_wave, que_sleep,
&reg_offset, &reg_data);
......
......@@ -295,7 +295,8 @@ enum ip_power_state {
};
/* Used to mask smu debug modes */
#define SMU_DEBUG_HALT_ON_ERROR 0x1
#define SMU_DEBUG_HALT_ON_ERROR BIT(0)
#define SMU_DEBUG_POOL_USE_VRAM BIT(1)
#define MAX_SMU_I2C_BUSES 2
......
......@@ -1027,7 +1027,10 @@ static int smu_alloc_memory_pool(struct smu_context *smu)
memory_pool->size = pool_size;
memory_pool->align = PAGE_SIZE;
memory_pool->domain = AMDGPU_GEM_DOMAIN_GTT;
memory_pool->domain =
(adev->pm.smu_debug_mask & SMU_DEBUG_POOL_USE_VRAM) ?
AMDGPU_GEM_DOMAIN_VRAM :
AMDGPU_GEM_DOMAIN_GTT;
switch (pool_size) {
case SMU_MEMORY_POOL_SIZE_256_MB:
......
......@@ -961,7 +961,7 @@ int radeon_uvd_calc_upll_dividers(struct radeon_device *rdev,
unsigned optimal_score = ~0;
/* loop through vco from low to high */
vco_min = max(max(vco_min, vclk), dclk);
vco_min = max3(vco_min, vclk, dclk);
for (vco_freq = vco_min; vco_freq <= vco_max; vco_freq += 100) {
uint64_t fb_div = (uint64_t)vco_freq * fb_factor;
......