diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu.h b/drivers/gpu/drm/amd/amdgpu/amdgpu.h index 2a9a41f4e7485e2cccc4f182384f7deaeb476efa..6d83ccfa42eeb0b00ca18635a8c1225de88a06e4 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu.h @@ -1194,9 +1194,15 @@ struct amdgpu_device { bool debug_exp_resets; bool debug_disable_gpu_ring_reset; - bool enforce_isolation[MAX_XCP]; - /* Added this mutex for cleaner shader isolation between GFX and compute processes */ + /* Protection for the following isolation structure */ struct mutex enforce_isolation_mutex; + bool enforce_isolation[MAX_XCP]; + struct amdgpu_isolation { + void *owner; + struct dma_fence *spearhead; + struct amdgpu_sync active; + struct amdgpu_sync prev; + } isolation[MAX_XCP]; struct amdgpu_init_level *init_lvl; @@ -1482,6 +1488,9 @@ void amdgpu_device_pcie_port_wreg(struct amdgpu_device *adev, struct dma_fence *amdgpu_device_get_gang(struct amdgpu_device *adev); struct dma_fence *amdgpu_device_switch_gang(struct amdgpu_device *adev, struct dma_fence *gang); +struct dma_fence *amdgpu_device_enforce_isolation(struct amdgpu_device *adev, + struct amdgpu_ring *ring, + struct amdgpu_job *job); bool amdgpu_device_has_display_hardware(struct amdgpu_device *adev); ssize_t amdgpu_get_soft_full_reset_mask(struct amdgpu_ring *ring); ssize_t amdgpu_show_reset_mask(char *buf, uint32_t supported_reset); diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_aca.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_aca.c index ffd4c64e123c7baa45ba8cd56ba20a2054e1ed19..dc47f5fd4ea15ee6e5de59126ba33b7e68071c91 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_aca.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_aca.c @@ -391,6 +391,7 @@ static void aca_banks_generate_cper(struct amdgpu_device *adev, { struct aca_bank_node *node; struct aca_bank *bank; + int r; if (!adev->cper.enabled) return; @@ -402,11 +403,27 @@ static void aca_banks_generate_cper(struct amdgpu_device *adev, /* UEs must be encoded into separate CPER entries */ if (type == ACA_SMU_TYPE_UE) { + struct aca_banks de_banks; + + aca_banks_init(&de_banks); list_for_each_entry(node, &banks->list, node) { bank = &node->bank; - if (amdgpu_cper_generate_ue_record(adev, bank)) - dev_warn(adev->dev, "fail to generate ue cper records\n"); + if (bank->aca_err_type == ACA_ERROR_TYPE_DEFERRED) { + r = aca_banks_add_bank(&de_banks, bank); + if (r) + dev_warn(adev->dev, "fail to add de banks, ret = %d\n", r); + } else { + if (amdgpu_cper_generate_ue_record(adev, bank)) + dev_warn(adev->dev, "fail to generate ue cper records\n"); + } + } + + if (!list_empty(&de_banks.list)) { + if (amdgpu_cper_generate_ce_records(adev, &de_banks, de_banks.nr_banks)) + dev_warn(adev->dev, "fail to generate de cper records\n"); } + + aca_banks_release(&de_banks); } else { /* * SMU_TYPE_CE banks are combined into 1 CPER entries, @@ -541,6 +558,10 @@ static int __aca_get_error_data(struct amdgpu_device *adev, struct aca_handle *h if (ret) return ret; + /* DEs may contain in CEs or UEs */ + if (type != ACA_ERROR_TYPE_DEFERRED) + aca_log_aca_error(handle, ACA_ERROR_TYPE_DEFERRED, err_data); + return aca_log_aca_error(handle, type, err_data); } diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_aca.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_aca.h index 6f62e5d80ed6b2d6a1106d250ae276f01ba103e4..6b180f1b33fda9f84e818dcbe30d7ca537552a0c 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_aca.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_aca.h @@ -76,11 +76,17 @@ struct ras_query_context; #define mmSMNAID_XCD1_MCA_SMU 0x38430400 /* SMN AID XCD1 */ #define mmSMNXCD_XCD0_MCA_SMU 0x40430400 /* SMN XCD XCD0 */ -#define ACA_BANK_ERR_CE_DE_DECODE(bank) \ - ((ACA_REG__STATUS__POISON((bank)->regs[ACA_REG_IDX_STATUS]) || \ - ACA_REG__STATUS__DEFERRED((bank)->regs[ACA_REG_IDX_STATUS])) ? \ - ACA_ERROR_TYPE_DEFERRED : \ - ACA_ERROR_TYPE_CE) +#define ACA_BANK_ERR_IS_DEFFERED(bank) \ + (ACA_REG__STATUS__POISON((bank)->regs[ACA_REG_IDX_STATUS]) || \ + ACA_REG__STATUS__DEFERRED((bank)->regs[ACA_REG_IDX_STATUS])) + +#define ACA_BANK_ERR_CE_DE_DECODE(bank) \ + (ACA_BANK_ERR_IS_DEFFERED(bank) ? ACA_ERROR_TYPE_DEFERRED : \ + ACA_ERROR_TYPE_CE) + +#define ACA_BANK_ERR_UE_DE_DECODE(bank) \ + (ACA_BANK_ERR_IS_DEFFERED(bank) ? ACA_ERROR_TYPE_DEFERRED : \ + ACA_ERROR_TYPE_UE) enum aca_reg_idx { ACA_REG_IDX_CTL = 0, diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c index 2ac6d4fa0601983f1b1881bc12de4f93d4cdd101..d2ec4130a316ddb13fe4c59f1f84fefc337cbc81 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c @@ -491,7 +491,7 @@ static int vm_update_pds(struct amdgpu_vm *vm, struct amdgpu_sync *sync) if (ret) return ret; - return amdgpu_sync_fence(sync, vm->last_update); + return amdgpu_sync_fence(sync, vm->last_update, GFP_KERNEL); } static uint64_t get_pte_flags(struct amdgpu_device *adev, struct kgd_mem *mem) @@ -1249,7 +1249,7 @@ static int unmap_bo_from_gpuvm(struct kgd_mem *mem, (void)amdgpu_vm_clear_freed(adev, vm, &bo_va->last_pt_update); - (void)amdgpu_sync_fence(sync, bo_va->last_pt_update); + (void)amdgpu_sync_fence(sync, bo_va->last_pt_update, GFP_KERNEL); return 0; } @@ -1273,7 +1273,7 @@ static int update_gpuvm_pte(struct kgd_mem *mem, return ret; } - return amdgpu_sync_fence(sync, bo_va->last_pt_update); + return amdgpu_sync_fence(sync, bo_va->last_pt_update, GFP_KERNEL); } static int map_bo_to_gpuvm(struct kgd_mem *mem, @@ -2913,7 +2913,7 @@ int amdgpu_amdkfd_gpuvm_restore_process_bos(void *info, struct dma_fence __rcu * } dma_resv_for_each_fence(&cursor, bo->tbo.base.resv, DMA_RESV_USAGE_KERNEL, fence) { - ret = amdgpu_sync_fence(&sync_obj, fence); + ret = amdgpu_sync_fence(&sync_obj, fence, GFP_KERNEL); if (ret) { pr_debug("Memory eviction: Sync BO fence failed. Try again\n"); goto validate_map_fail; diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_cper.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_cper.c index 3f291b30b79f617eb78eb8ca91dc788bc0cc6672..360e07a5c7c1fb574a321c091bcb47aafabfd740 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_cper.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_cper.c @@ -455,10 +455,10 @@ static u32 amdgpu_cper_ring_get_ent_sz(struct amdgpu_ring *ring, u64 pos) return umin(rec_len, chunk); } -void amdgpu_cper_ring_write(struct amdgpu_ring *ring, - void *src, int count) +void amdgpu_cper_ring_write(struct amdgpu_ring *ring, void *src, int count) { u64 pos, wptr_old, rptr = *ring->rptr_cpu_addr & ring->ptr_mask; + int rec_cnt_dw = count >> 2; u32 chunk, ent_sz; u8 *s = (u8 *)src; @@ -485,6 +485,9 @@ void amdgpu_cper_ring_write(struct amdgpu_ring *ring, s += chunk; } + if (ring->count_dw < rec_cnt_dw) + ring->count_dw = 0; + /* the buffer is overflow, adjust rptr */ if (((wptr_old < rptr) && (rptr <= ring->wptr)) || ((ring->wptr < wptr_old) && (wptr_old < rptr)) || @@ -501,12 +504,10 @@ void amdgpu_cper_ring_write(struct amdgpu_ring *ring, pos = rptr; } while (!amdgpu_cper_is_hdr(ring, rptr)); } - mutex_unlock(&ring->adev->cper.ring_lock); - if (ring->count_dw >= (count >> 2)) - ring->count_dw -= (count >> 2); - else - ring->count_dw = 0; + if (ring->count_dw >= rec_cnt_dw) + ring->count_dw -= rec_cnt_dw; + mutex_unlock(&ring->adev->cper.ring_lock); } static u64 amdgpu_cper_ring_get_rptr(struct amdgpu_ring *ring) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c index 5cc5f59e30184ff956823d8ff44040e211ec93eb..82df06a72ee025d5dc22fb0b954d38f394fb5c5e 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c @@ -428,7 +428,7 @@ static int amdgpu_cs_p2_dependencies(struct amdgpu_cs_parser *p, dma_fence_put(old); } - r = amdgpu_sync_fence(&p->sync, fence); + r = amdgpu_sync_fence(&p->sync, fence, GFP_KERNEL); dma_fence_put(fence); if (r) return r; @@ -450,7 +450,7 @@ static int amdgpu_syncobj_lookup_and_add(struct amdgpu_cs_parser *p, return r; } - r = amdgpu_sync_fence(&p->sync, fence); + r = amdgpu_sync_fence(&p->sync, fence, GFP_KERNEL); dma_fence_put(fence); return r; } @@ -1111,7 +1111,7 @@ static int amdgpu_cs_vm_handling(struct amdgpu_cs_parser *p) struct drm_gpu_scheduler *sched = entity->rq->sched; struct amdgpu_ring *ring = to_amdgpu_ring(sched); - if (amdgpu_vmid_uses_reserved(adev, vm, ring->vm_hub)) + if (amdgpu_vmid_uses_reserved(vm, ring->vm_hub)) return -EINVAL; } } @@ -1124,7 +1124,8 @@ static int amdgpu_cs_vm_handling(struct amdgpu_cs_parser *p) if (r) return r; - r = amdgpu_sync_fence(&p->sync, fpriv->prt_va->last_pt_update); + r = amdgpu_sync_fence(&p->sync, fpriv->prt_va->last_pt_update, + GFP_KERNEL); if (r) return r; @@ -1135,7 +1136,8 @@ static int amdgpu_cs_vm_handling(struct amdgpu_cs_parser *p) if (r) return r; - r = amdgpu_sync_fence(&p->sync, bo_va->last_pt_update); + r = amdgpu_sync_fence(&p->sync, bo_va->last_pt_update, + GFP_KERNEL); if (r) return r; } @@ -1154,7 +1156,8 @@ static int amdgpu_cs_vm_handling(struct amdgpu_cs_parser *p) if (r) return r; - r = amdgpu_sync_fence(&p->sync, bo_va->last_pt_update); + r = amdgpu_sync_fence(&p->sync, bo_va->last_pt_update, + GFP_KERNEL); if (r) return r; } @@ -1167,7 +1170,7 @@ static int amdgpu_cs_vm_handling(struct amdgpu_cs_parser *p) if (r) return r; - r = amdgpu_sync_fence(&p->sync, vm->last_update); + r = amdgpu_sync_fence(&p->sync, vm->last_update, GFP_KERNEL); if (r) return r; @@ -1248,7 +1251,8 @@ static int amdgpu_cs_sync_rings(struct amdgpu_cs_parser *p) continue; } - r = amdgpu_sync_fence(&p->gang_leader->explicit_sync, fence); + r = amdgpu_sync_fence(&p->gang_leader->explicit_sync, fence, + GFP_KERNEL); dma_fence_put(fence); if (r) return r; diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c index 17ce1677378e2bd57c8038bc08b3abd6742df3c9..6ebf6179064b72b3a938b53ef59010a2922be0fa 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c @@ -227,6 +227,24 @@ static ssize_t amdgpu_device_get_pcie_replay_count(struct device *dev, static DEVICE_ATTR(pcie_replay_count, 0444, amdgpu_device_get_pcie_replay_count, NULL); +static int amdgpu_device_attr_sysfs_init(struct amdgpu_device *adev) +{ + int ret = 0; + + if (!amdgpu_sriov_vf(adev)) + ret = sysfs_create_file(&adev->dev->kobj, + &dev_attr_pcie_replay_count.attr); + + return ret; +} + +static void amdgpu_device_attr_sysfs_fini(struct amdgpu_device *adev) +{ + if (!amdgpu_sriov_vf(adev)) + sysfs_remove_file(&adev->dev->kobj, + &dev_attr_pcie_replay_count.attr); +} + static ssize_t amdgpu_sysfs_reg_state_get(struct file *f, struct kobject *kobj, struct bin_attribute *attr, char *buf, loff_t ppos, size_t count) @@ -4172,11 +4190,6 @@ static bool amdgpu_device_check_iommu_remap(struct amdgpu_device *adev) } #endif -static const struct attribute *amdgpu_dev_attributes[] = { - &dev_attr_pcie_replay_count.attr, - NULL -}; - static void amdgpu_device_set_mcbp(struct amdgpu_device *adev) { if (amdgpu_mcbp == 1) @@ -4281,7 +4294,14 @@ int amdgpu_device_init(struct amdgpu_device *adev, mutex_init(&adev->gfx.reset_sem_mutex); /* Initialize the mutex for cleaner shader isolation between GFX and compute processes */ mutex_init(&adev->enforce_isolation_mutex); + for (i = 0; i < MAX_XCP; ++i) { + adev->isolation[i].spearhead = dma_fence_get_stub(); + amdgpu_sync_create(&adev->isolation[i].active); + amdgpu_sync_create(&adev->isolation[i].prev); + } mutex_init(&adev->gfx.kfd_sch_mutex); + mutex_init(&adev->gfx.workload_profile_mutex); + mutex_init(&adev->vcn.workload_profile_mutex); amdgpu_device_init_apu_flags(adev); @@ -4399,10 +4419,17 @@ int amdgpu_device_init(struct amdgpu_device *adev, if (r) return r; - /* Get rid of things like offb */ - r = aperture_remove_conflicting_pci_devices(adev->pdev, amdgpu_kms_driver.name); - if (r) - return r; + /* + * No need to remove conflicting FBs for non-display class devices. + * This prevents the sysfb from being freed accidently. + */ + if ((pdev->class >> 8) == PCI_CLASS_DISPLAY_VGA || + (pdev->class >> 8) == PCI_CLASS_DISPLAY_OTHER) { + /* Get rid of things like offb */ + r = aperture_remove_conflicting_pci_devices(adev->pdev, amdgpu_kms_driver.name); + if (r) + return r; + } /* Enable TMZ based on IP_VERSION */ amdgpu_gmc_tmz_set(adev); @@ -4613,7 +4640,7 @@ int amdgpu_device_init(struct amdgpu_device *adev, } else adev->ucode_sysfs_en = true; - r = sysfs_create_files(&adev->dev->kobj, amdgpu_dev_attributes); + r = amdgpu_device_attr_sysfs_init(adev); if (r) dev_err(adev->dev, "Could not create amdgpu device attr\n"); @@ -4750,7 +4777,7 @@ void amdgpu_device_fini_hw(struct amdgpu_device *adev) amdgpu_pm_sysfs_fini(adev); if (adev->ucode_sysfs_en) amdgpu_ucode_sysfs_fini(adev); - sysfs_remove_files(&adev->dev->kobj, amdgpu_dev_attributes); + amdgpu_device_attr_sysfs_fini(adev); amdgpu_fru_sysfs_fini(adev); amdgpu_reg_state_sysfs_fini(adev); @@ -4777,7 +4804,7 @@ void amdgpu_device_fini_hw(struct amdgpu_device *adev) void amdgpu_device_fini_sw(struct amdgpu_device *adev) { - int idx; + int i, idx; bool px; amdgpu_device_ip_fini(adev); @@ -4785,6 +4812,11 @@ void amdgpu_device_fini_sw(struct amdgpu_device *adev) amdgpu_ucode_release(&adev->firmware.gpu_info_fw); adev->accel_working = false; dma_fence_put(rcu_dereference_protected(adev->gang_submit, true)); + for (i = 0; i < MAX_XCP; ++i) { + dma_fence_put(adev->isolation[i].spearhead); + amdgpu_sync_free(&adev->isolation[i].active); + amdgpu_sync_free(&adev->isolation[i].prev); + } amdgpu_reset_fini(adev); @@ -4800,6 +4832,9 @@ void amdgpu_device_fini_sw(struct amdgpu_device *adev) kfree(adev->fru_info); adev->fru_info = NULL; + kfree(adev->xcp_mgr); + adev->xcp_mgr = NULL; + px = amdgpu_device_supports_px(adev_to_drm(adev)); if (px || (!dev_is_removable(&adev->pdev->dev) && @@ -5331,6 +5366,7 @@ static int amdgpu_device_reset_sriov(struct amdgpu_device *adev, if (amdgpu_ip_version(adev, GC_HWIP, 0) == IP_VERSION(9, 4, 2) || amdgpu_ip_version(adev, GC_HWIP, 0) == IP_VERSION(9, 4, 3) || amdgpu_ip_version(adev, GC_HWIP, 0) == IP_VERSION(9, 4, 4) || + amdgpu_ip_version(adev, GC_HWIP, 0) == IP_VERSION(9, 5, 0) || amdgpu_ip_version(adev, GC_HWIP, 0) == IP_VERSION(11, 0, 3)) amdgpu_ras_resume(adev); @@ -6903,22 +6939,117 @@ struct dma_fence *amdgpu_device_switch_gang(struct amdgpu_device *adev, { struct dma_fence *old = NULL; + dma_fence_get(gang); do { dma_fence_put(old); old = amdgpu_device_get_gang(adev); if (old == gang) break; - if (!dma_fence_is_signaled(old)) + if (!dma_fence_is_signaled(old)) { + dma_fence_put(gang); return old; + } } while (cmpxchg((struct dma_fence __force **)&adev->gang_submit, old, gang) != old); + /* + * Drop it once for the exchanged reference in adev and once for the + * thread local reference acquired in amdgpu_device_get_gang(). + */ + dma_fence_put(old); dma_fence_put(old); return NULL; } +/** + * amdgpu_device_enforce_isolation - enforce HW isolation + * @adev: the amdgpu device pointer + * @ring: the HW ring the job is supposed to run on + * @job: the job which is about to be pushed to the HW ring + * + * Makes sure that only one client at a time can use the GFX block. + * Returns: The dependency to wait on before the job can be pushed to the HW. + * The function is called multiple times until NULL is returned. + */ +struct dma_fence *amdgpu_device_enforce_isolation(struct amdgpu_device *adev, + struct amdgpu_ring *ring, + struct amdgpu_job *job) +{ + struct amdgpu_isolation *isolation = &adev->isolation[ring->xcp_id]; + struct drm_sched_fence *f = job->base.s_fence; + struct dma_fence *dep; + void *owner; + int r; + + /* + * For now enforce isolation only for the GFX block since we only need + * the cleaner shader on those rings. + */ + if (ring->funcs->type != AMDGPU_RING_TYPE_GFX && + ring->funcs->type != AMDGPU_RING_TYPE_COMPUTE) + return NULL; + + /* + * All submissions where enforce isolation is false are handled as if + * they come from a single client. Use ~0l as the owner to distinct it + * from kernel submissions where the owner is NULL. + */ + owner = job->enforce_isolation ? f->owner : (void *)~0l; + + mutex_lock(&adev->enforce_isolation_mutex); + + /* + * The "spearhead" submission is the first one which changes the + * ownership to its client. We always need to wait for it to be + * pushed to the HW before proceeding with anything. + */ + if (&f->scheduled != isolation->spearhead && + !dma_fence_is_signaled(isolation->spearhead)) { + dep = isolation->spearhead; + goto out_grab_ref; + } + + if (isolation->owner != owner) { + + /* + * Wait for any gang to be assembled before switching to a + * different owner or otherwise we could deadlock the + * submissions. + */ + if (!job->gang_submit) { + dep = amdgpu_device_get_gang(adev); + if (!dma_fence_is_signaled(dep)) + goto out_return_dep; + dma_fence_put(dep); + } + + dma_fence_put(isolation->spearhead); + isolation->spearhead = dma_fence_get(&f->scheduled); + amdgpu_sync_move(&isolation->active, &isolation->prev); + trace_amdgpu_isolation(isolation->owner, owner); + isolation->owner = owner; + } + + /* + * Specifying the ring here helps to pipeline submissions even when + * isolation is enabled. If that is not desired for testing NULL can be + * used instead of the ring to enforce a CPU round trip while switching + * between clients. + */ + dep = amdgpu_sync_peek_fence(&isolation->prev, ring); + r = amdgpu_sync_fence(&isolation->active, &f->finished, GFP_NOWAIT); + if (r) + DRM_WARN("OOM tracking isolation\n"); + +out_grab_ref: + dma_fence_get(dep); +out_return_dep: + mutex_unlock(&adev->enforce_isolation_mutex); + return dep; +} + bool amdgpu_device_has_display_hardware(struct amdgpu_device *adev) { switch (adev->asic_type) { diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_discovery.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_discovery.c index 967a992829bdbc9433c690f5784da5174f0e85f9..dc2713ec95a5bdab8b0b06150b0a3e721f6a4a62 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_discovery.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_discovery.c @@ -113,8 +113,13 @@ #include "amdgpu_isp.h" #endif -#define FIRMWARE_IP_DISCOVERY "amdgpu/ip_discovery.bin" -MODULE_FIRMWARE(FIRMWARE_IP_DISCOVERY); +MODULE_FIRMWARE("amdgpu/ip_discovery.bin"); +MODULE_FIRMWARE("amdgpu/vega10_ip_discovery.bin"); +MODULE_FIRMWARE("amdgpu/vega12_ip_discovery.bin"); +MODULE_FIRMWARE("amdgpu/vega20_ip_discovery.bin"); +MODULE_FIRMWARE("amdgpu/raven_ip_discovery.bin"); +MODULE_FIRMWARE("amdgpu/raven2_ip_discovery.bin"); +MODULE_FIRMWARE("amdgpu/picasso_ip_discovery.bin"); #define mmIP_DISCOVERY_VERSION 0x16A00 #define mmRCC_CONFIG_MEMSIZE 0xde3 @@ -297,21 +302,13 @@ static int amdgpu_discovery_read_binary_from_mem(struct amdgpu_device *adev, return ret; } -static int amdgpu_discovery_read_binary_from_file(struct amdgpu_device *adev, uint8_t *binary) +static int amdgpu_discovery_read_binary_from_file(struct amdgpu_device *adev, + uint8_t *binary, + const char *fw_name) { const struct firmware *fw; - const char *fw_name; int r; - switch (amdgpu_discovery) { - case 2: - fw_name = FIRMWARE_IP_DISCOVERY; - break; - default: - dev_warn(adev->dev, "amdgpu_discovery is not set properly\n"); - return -EINVAL; - } - r = request_firmware(&fw, fw_name, adev->dev); if (r) { dev_err(adev->dev, "can't load firmware \"%s\"\n", @@ -404,10 +401,39 @@ static int amdgpu_discovery_verify_npsinfo(struct amdgpu_device *adev, return 0; } +static const char *amdgpu_discovery_get_fw_name(struct amdgpu_device *adev) +{ + if (amdgpu_discovery == 2) + return "amdgpu/ip_discovery.bin"; + + switch (adev->asic_type) { + case CHIP_VEGA10: + return "amdgpu/vega10_ip_discovery.bin"; + case CHIP_VEGA12: + return "amdgpu/vega12_ip_discovery.bin"; + case CHIP_RAVEN: + if (adev->apu_flags & AMD_APU_IS_RAVEN2) + return "amdgpu/raven2_ip_discovery.bin"; + else if (adev->apu_flags & AMD_APU_IS_PICASSO) + return "amdgpu/picasso_ip_discovery.bin"; + else + return "amdgpu/raven_ip_discovery.bin"; + case CHIP_VEGA20: + return "amdgpu/vega20_ip_discovery.bin"; + case CHIP_ARCTURUS: + return "amdgpu/arcturus_ip_discovery.bin"; + case CHIP_ALDEBARAN: + return "amdgpu/aldebaran_ip_discovery.bin"; + default: + return NULL; + } +} + static int amdgpu_discovery_init(struct amdgpu_device *adev) { struct table_info *info; struct binary_header *bhdr; + const char *fw_name; uint16_t offset; uint16_t size; uint16_t checksum; @@ -419,9 +445,10 @@ static int amdgpu_discovery_init(struct amdgpu_device *adev) return -ENOMEM; /* Read from file if it is the preferred option */ - if (amdgpu_discovery == 2) { + fw_name = amdgpu_discovery_get_fw_name(adev); + if (fw_name != NULL) { dev_info(adev->dev, "use ip discovery information from file"); - r = amdgpu_discovery_read_binary_from_file(adev, adev->mman.discovery_bin); + r = amdgpu_discovery_read_binary_from_file(adev, adev->mman.discovery_bin, fw_name); if (r) { dev_err(adev->dev, "failed to read ip discovery binary from file\n"); @@ -1290,6 +1317,7 @@ static int amdgpu_discovery_reg_base_init(struct amdgpu_device *adev) uint16_t die_offset; uint16_t ip_offset; uint16_t num_dies; + uint32_t wafl_ver; uint16_t num_ips; uint16_t hw_id; uint8_t inst; @@ -1303,6 +1331,7 @@ static int amdgpu_discovery_reg_base_init(struct amdgpu_device *adev) return r; } + wafl_ver = 0; adev->gfx.xcc_mask = 0; adev->sdma.sdma_mask = 0; adev->vcn.inst_mask = 0; @@ -1403,6 +1432,10 @@ static int amdgpu_discovery_reg_base_init(struct amdgpu_device *adev) adev->gfx.xcc_mask |= (1U << ip->instance_number); + if (!wafl_ver && le16_to_cpu(ip->hw_id) == WAFLC_HWID) + wafl_ver = IP_VERSION_FULL(ip->major, ip->minor, + ip->revision, 0, 0); + for (k = 0; k < num_base_address; k++) { /* * convert the endianness of base addresses in place, @@ -1468,6 +1501,9 @@ static int amdgpu_discovery_reg_base_init(struct amdgpu_device *adev) } } + if (wafl_ver && !adev->ip_versions[XGMI_HWIP][0]) + adev->ip_versions[XGMI_HWIP][0] = wafl_ver; + return 0; } @@ -2509,6 +2545,38 @@ int amdgpu_discovery_set_ip_blocks(struct amdgpu_device *adev) { int r; + switch (adev->asic_type) { + case CHIP_VEGA10: + case CHIP_VEGA12: + case CHIP_RAVEN: + case CHIP_VEGA20: + case CHIP_ARCTURUS: + case CHIP_ALDEBARAN: + /* this is not fatal. We have a fallback below + * if the new firmwares are not present. some of + * this will be overridden below to keep things + * consistent with the current behavior. + */ + r = amdgpu_discovery_reg_base_init(adev); + if (!r) { + amdgpu_discovery_harvest_ip(adev); + amdgpu_discovery_get_gfx_info(adev); + amdgpu_discovery_get_mall_info(adev); + amdgpu_discovery_get_vcn_info(adev); + } + break; + default: + r = amdgpu_discovery_reg_base_init(adev); + if (r) + return -EINVAL; + + amdgpu_discovery_harvest_ip(adev); + amdgpu_discovery_get_gfx_info(adev); + amdgpu_discovery_get_mall_info(adev); + amdgpu_discovery_get_vcn_info(adev); + break; + } + switch (adev->asic_type) { case CHIP_VEGA10: vega10_reg_base_init(adev); @@ -2673,14 +2741,6 @@ int amdgpu_discovery_set_ip_blocks(struct amdgpu_device *adev) adev->ip_versions[XGMI_HWIP][0] = IP_VERSION(6, 1, 0); break; default: - r = amdgpu_discovery_reg_base_init(adev); - if (r) - return -EINVAL; - - amdgpu_discovery_harvest_ip(adev); - amdgpu_discovery_get_gfx_info(adev); - amdgpu_discovery_get_mall_info(adev); - amdgpu_discovery_get_vcn_info(adev); break; } @@ -2772,10 +2832,6 @@ int amdgpu_discovery_set_ip_blocks(struct amdgpu_device *adev) break; } - if (amdgpu_ip_version(adev, GC_HWIP, 0) == IP_VERSION(9, 4, 3) || - amdgpu_ip_version(adev, GC_HWIP, 0) == IP_VERSION(9, 4, 4)) - adev->ip_versions[XGMI_HWIP][0] = IP_VERSION(6, 4, 0); - /* set NBIO version */ switch (amdgpu_ip_version(adev, NBIO_HWIP, 0)) { case IP_VERSION(6, 1, 0): diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c index 653f2bc775306088aedd99b888fc07ccaa6ebd8e..23cfce5aa1fcea0f0bfaa5f1faf2f41f2c3e7a68 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c @@ -139,6 +139,7 @@ enum AMDGPU_DEBUG_MASK { AMDGPU_DEBUG_ENABLE_RAS_ACA = BIT(4), AMDGPU_DEBUG_ENABLE_EXP_RESETS = BIT(5), AMDGPU_DEBUG_DISABLE_GPU_RING_RESET = BIT(6), + AMDGPU_DEBUG_SMU_POOL = BIT(7), }; unsigned int amdgpu_vram_limit = UINT_MAX; @@ -176,6 +177,7 @@ uint amdgpu_sdma_phase_quantum = 32; char *amdgpu_disable_cu; char *amdgpu_virtual_display; bool enforce_isolation; +int amdgpu_modeset = -1; /* Specifies the default granularity for SVM, used in buffer * migration and restoration of backing memory when handling @@ -1037,6 +1039,13 @@ module_param_named(user_partt_mode, amdgpu_user_partt_mode, uint, 0444); module_param(enforce_isolation, bool, 0444); MODULE_PARM_DESC(enforce_isolation, "enforce process isolation between graphics and compute . enforce_isolation = on"); +/** + * DOC: modeset (int) + * Override nomodeset (1 = override, -1 = auto). The default is -1 (auto). + */ +MODULE_PARM_DESC(modeset, "Override nomodeset (1 = enable, -1 = auto)"); +module_param_named(modeset, amdgpu_modeset, int, 0444); + /** * DOC: seamless (int) * Seamless boot will keep the image on the screen during the boot process. @@ -1053,6 +1062,11 @@ module_param_named(seamless, amdgpu_seamless, int, 0444); * limits the VRAM size reported to ROCm applications to the visible * size, usually 256MB. * - 0x4: Disable GPU soft recovery, always do a full reset + * - 0x8: Use VRAM for firmware loading + * - 0x10: Enable ACA based RAS logging + * - 0x20: Enable experimental resets + * - 0x40: Disable ring resets + * - 0x80: Use VRAM for SMU pool */ MODULE_PARM_DESC(debug_mask, "debug options for amdgpu, disabled by default"); module_param_named_unsafe(debug_mask, amdgpu_debug_mask, uint, 0444); @@ -2230,6 +2244,10 @@ static void amdgpu_init_debug_options(struct amdgpu_device *adev) pr_info("debug: ring reset disabled\n"); adev->debug_disable_gpu_ring_reset = true; } + if (amdgpu_debug_mask & AMDGPU_DEBUG_SMU_POOL) { + pr_info("debug: use vram for smu pool\n"); + adev->pm.smu_debug_mask |= SMU_DEBUG_POOL_USE_VRAM; + } } static unsigned long amdgpu_fix_asic_type(struct pci_dev *pdev, unsigned long flags) @@ -2257,6 +2275,12 @@ static int amdgpu_pci_probe(struct pci_dev *pdev, int ret, retry = 0, i; bool supports_atomic = false; + if ((pdev->class >> 8) == PCI_CLASS_DISPLAY_VGA || + (pdev->class >> 8) == PCI_CLASS_DISPLAY_OTHER) { + if (drm_firmware_drivers_only() && amdgpu_modeset == -1) + return -EINVAL; + } + /* skip devices which are owned by radeon */ for (i = 0; i < ARRAY_SIZE(amdgpu_unsupported_pciidlist); i++) { if (amdgpu_unsupported_pciidlist[i] == pdev->device) @@ -2990,9 +3014,6 @@ static int __init amdgpu_init(void) { int r; - if (drm_firmware_drivers_only()) - return -EINVAL; - r = amdgpu_sync_init(); if (r) goto error_sync; diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.c index 984e6ff6e4632a02ca01a616171efdcd93c1b7d1..72af5e5a894a29dcaa368be52ada491438725b84 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.c @@ -1665,15 +1665,8 @@ static ssize_t amdgpu_gfx_set_enforce_isolation(struct device *dev, } mutex_lock(&adev->enforce_isolation_mutex); - for (i = 0; i < num_partitions; i++) { - if (adev->enforce_isolation[i] && !partition_values[i]) - /* Going from enabled to disabled */ - amdgpu_vmid_free_reserved(adev, AMDGPU_GFXHUB(i)); - else if (!adev->enforce_isolation[i] && partition_values[i]) - /* Going from disabled to enabled */ - amdgpu_vmid_alloc_reserved(adev, AMDGPU_GFXHUB(i)); + for (i = 0; i < num_partitions; i++) adev->enforce_isolation[i] = partition_values[i]; - } mutex_unlock(&adev->enforce_isolation_mutex); amdgpu_mes_update_enforce_isolation(adev); @@ -2160,11 +2153,16 @@ void amdgpu_gfx_profile_idle_work_handler(struct work_struct *work) for (i = 0; i < (AMDGPU_MAX_COMPUTE_RINGS * AMDGPU_MAX_GC_INSTANCES); ++i) fences += amdgpu_fence_count_emitted(&adev->gfx.compute_ring[i]); if (!fences && !atomic_read(&adev->gfx.total_submission_cnt)) { - r = amdgpu_dpm_switch_power_profile(adev, profile, false); - if (r) - dev_warn(adev->dev, "(%d) failed to disable %s power profile mode\n", r, - profile == PP_SMC_POWER_PROFILE_FULLSCREEN3D ? - "fullscreen 3D" : "compute"); + mutex_lock(&adev->gfx.workload_profile_mutex); + if (adev->gfx.workload_profile_active) { + r = amdgpu_dpm_switch_power_profile(adev, profile, false); + if (r) + dev_warn(adev->dev, "(%d) failed to disable %s power profile mode\n", r, + profile == PP_SMC_POWER_PROFILE_FULLSCREEN3D ? + "fullscreen 3D" : "compute"); + adev->gfx.workload_profile_active = false; + } + mutex_unlock(&adev->gfx.workload_profile_mutex); } else { schedule_delayed_work(&adev->gfx.idle_work, GFX_PROFILE_IDLE_TIMEOUT); } @@ -2183,13 +2181,25 @@ void amdgpu_gfx_profile_ring_begin_use(struct amdgpu_ring *ring) atomic_inc(&adev->gfx.total_submission_cnt); - if (!cancel_delayed_work_sync(&adev->gfx.idle_work)) { + cancel_delayed_work_sync(&adev->gfx.idle_work); + + /* We can safely return early here because we've cancelled the + * the delayed work so there is no one else to set it to false + * and we don't care if someone else sets it to true. + */ + if (adev->gfx.workload_profile_active) + return; + + mutex_lock(&adev->gfx.workload_profile_mutex); + if (!adev->gfx.workload_profile_active) { r = amdgpu_dpm_switch_power_profile(adev, profile, true); if (r) dev_warn(adev->dev, "(%d) failed to disable %s power profile mode\n", r, profile == PP_SMC_POWER_PROFILE_FULLSCREEN3D ? "fullscreen 3D" : "compute"); + adev->gfx.workload_profile_active = true; } + mutex_unlock(&adev->gfx.workload_profile_mutex); } void amdgpu_gfx_profile_ring_end_use(struct amdgpu_ring *ring) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.h index 4b1675f79caa5a4bb8824bb7dcad0e7f02840f8b..87e86218876679636d057746ebd5b5f899352a97 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.h @@ -482,6 +482,8 @@ struct amdgpu_gfx { atomic_t total_submission_cnt; struct delayed_work idle_work; + bool workload_profile_active; + struct mutex workload_profile_mutex; }; struct amdgpu_gfx_ras_reg_entry { diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_gmc.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_gmc.c index 4eefa17fa39bdcedb09146003753dc276f04616e..464625282872aadeff85ea3b3ce1e1e6c9424a2a 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_gmc.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_gmc.c @@ -573,6 +573,7 @@ int amdgpu_gmc_allocate_vm_inv_eng(struct amdgpu_device *adev) unsigned vm_inv_engs[AMDGPU_MAX_VMHUBS] = {0}; unsigned i; unsigned vmhub, inv_eng; + struct amdgpu_ring *shared_ring; /* init the vm inv eng for all vmhubs */ for_each_set_bit(i, adev->vmhubs_mask, AMDGPU_MAX_VMHUBS) { @@ -595,6 +596,10 @@ int amdgpu_gmc_allocate_vm_inv_eng(struct amdgpu_device *adev) ring == &adev->cper.ring_buf) continue; + /* Skip if the ring is a shared ring */ + if (amdgpu_sdma_is_shared_inv_eng(adev, ring)) + continue; + inv_eng = ffs(vm_inv_engs[vmhub]); if (!inv_eng) { dev_err(adev->dev, "no VM inv eng for ring %s\n", @@ -607,6 +612,21 @@ int amdgpu_gmc_allocate_vm_inv_eng(struct amdgpu_device *adev) dev_info(adev->dev, "ring %s uses VM inv eng %u on hub %u\n", ring->name, ring->vm_inv_eng, ring->vm_hub); + /* SDMA has a special packet which allows it to use the same + * invalidation engine for all the rings in one instance. + * Therefore, we do not allocate a separate VM invalidation engine + * for SDMA page rings. Instead, they share the VM invalidation + * engine with the SDMA gfx ring. This change ensures efficient + * resource management and avoids the issue of insufficient VM + * invalidation engines. + */ + shared_ring = amdgpu_sdma_get_shared_ring(adev, ring); + if (shared_ring) { + shared_ring->vm_inv_eng = ring->vm_inv_eng; + dev_info(adev->dev, "ring %s shares VM invalidation engine %u with ring %s on hub %u\n", + ring->name, ring->vm_inv_eng, shared_ring->name, ring->vm_hub); + continue; + } } return 0; diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ids.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ids.c index 8e712a11aba5d23459aba35ff8d58b9022efc447..4c4e087230ac51450ecdc3083328094be2bde9e2 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ids.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ids.c @@ -209,7 +209,7 @@ static int amdgpu_vmid_grab_idle(struct amdgpu_ring *ring, return 0; } - fences = kmalloc_array(id_mgr->num_ids, sizeof(void *), GFP_KERNEL); + fences = kmalloc_array(id_mgr->num_ids, sizeof(void *), GFP_NOWAIT); if (!fences) return -ENOMEM; @@ -287,46 +287,34 @@ static int amdgpu_vmid_grab_reserved(struct amdgpu_vm *vm, (*id)->flushed_updates < updates || !(*id)->last_flush || ((*id)->last_flush->context != fence_context && - !dma_fence_is_signaled((*id)->last_flush))) { + !dma_fence_is_signaled((*id)->last_flush))) + needs_flush = true; + + if ((*id)->owner != vm->immediate.fence_context || + (!adev->vm_manager.concurrent_flush && needs_flush)) { struct dma_fence *tmp; - /* Wait for the gang to be assembled before using a - * reserved VMID or otherwise the gang could deadlock. + /* Don't use per engine and per process VMID at the + * same time */ - tmp = amdgpu_device_get_gang(adev); - if (!dma_fence_is_signaled(tmp) && tmp != job->gang_submit) { + if (adev->vm_manager.concurrent_flush) + ring = NULL; + + /* to prevent one context starved by another context */ + (*id)->pd_gpu_addr = 0; + tmp = amdgpu_sync_peek_fence(&(*id)->active, ring); + if (tmp) { *id = NULL; - *fence = tmp; + *fence = dma_fence_get(tmp); return 0; } - dma_fence_put(tmp); - - /* Make sure the id is owned by the gang before proceeding */ - if (!job->gang_submit || - (*id)->owner != vm->immediate.fence_context) { - - /* Don't use per engine and per process VMID at the - * same time - */ - if (adev->vm_manager.concurrent_flush) - ring = NULL; - - /* to prevent one context starved by another context */ - (*id)->pd_gpu_addr = 0; - tmp = amdgpu_sync_peek_fence(&(*id)->active, ring); - if (tmp) { - *id = NULL; - *fence = dma_fence_get(tmp); - return 0; - } - } - needs_flush = true; } /* Good we can use this VMID. Remember this submission as * user of the VMID. */ - r = amdgpu_sync_fence(&(*id)->active, &job->base.s_fence->finished); + r = amdgpu_sync_fence(&(*id)->active, &job->base.s_fence->finished, + GFP_NOWAIT); if (r) return r; @@ -385,7 +373,8 @@ static int amdgpu_vmid_grab_used(struct amdgpu_vm *vm, * user of the VMID. */ r = amdgpu_sync_fence(&(*id)->active, - &job->base.s_fence->finished); + &job->base.s_fence->finished, + GFP_NOWAIT); if (r) return r; @@ -422,7 +411,7 @@ int amdgpu_vmid_grab(struct amdgpu_vm *vm, struct amdgpu_ring *ring, if (r || !idle) goto error; - if (amdgpu_vmid_uses_reserved(adev, vm, vmhub)) { + if (amdgpu_vmid_uses_reserved(vm, vmhub)) { r = amdgpu_vmid_grab_reserved(vm, ring, job, &id, fence); if (r || !id) goto error; @@ -437,7 +426,8 @@ int amdgpu_vmid_grab(struct amdgpu_vm *vm, struct amdgpu_ring *ring, /* Remember this submission as user of the VMID */ r = amdgpu_sync_fence(&id->active, - &job->base.s_fence->finished); + &job->base.s_fence->finished, + GFP_NOWAIT); if (r) goto error; @@ -474,19 +464,14 @@ int amdgpu_vmid_grab(struct amdgpu_vm *vm, struct amdgpu_ring *ring, /* * amdgpu_vmid_uses_reserved - check if a VM will use a reserved VMID - * @adev: amdgpu_device pointer * @vm: the VM to check * @vmhub: the VMHUB which will be used * * Returns: True if the VM will use a reserved VMID. */ -bool amdgpu_vmid_uses_reserved(struct amdgpu_device *adev, - struct amdgpu_vm *vm, unsigned int vmhub) +bool amdgpu_vmid_uses_reserved(struct amdgpu_vm *vm, unsigned int vmhub) { - return vm->reserved_vmid[vmhub] || - (adev->enforce_isolation[(vm->root.bo->xcp_id != AMDGPU_XCP_NO_PARTITION) ? - vm->root.bo->xcp_id : 0] && - AMDGPU_IS_GFXHUB(vmhub)); + return vm->reserved_vmid[vmhub]; } int amdgpu_vmid_alloc_reserved(struct amdgpu_device *adev, diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ids.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_ids.h index 4012fb2dd08a59960ebea95cafdbfa503de170c3..240fa675126029a0050a44cd4d5065eefcf70bef 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ids.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ids.h @@ -78,8 +78,7 @@ void amdgpu_pasid_free_delayed(struct dma_resv *resv, bool amdgpu_vmid_had_gpu_reset(struct amdgpu_device *adev, struct amdgpu_vmid *id); -bool amdgpu_vmid_uses_reserved(struct amdgpu_device *adev, - struct amdgpu_vm *vm, unsigned int vmhub); +bool amdgpu_vmid_uses_reserved(struct amdgpu_vm *vm, unsigned int vmhub); int amdgpu_vmid_alloc_reserved(struct amdgpu_device *adev, unsigned vmhub); void amdgpu_vmid_free_reserved(struct amdgpu_device *adev, diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_job.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_job.c index 935df2cdcc16c873aaab6a5fba23ee321c11d45f..acb21fc8b3ce5d8c745e3e3384bd9ec24b91be3f 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_job.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_job.c @@ -361,17 +361,24 @@ amdgpu_job_prepare_job(struct drm_sched_job *sched_job, { struct amdgpu_ring *ring = to_amdgpu_ring(s_entity->rq->sched); struct amdgpu_job *job = to_amdgpu_job(sched_job); - struct dma_fence *fence = NULL; + struct dma_fence *fence; int r; r = drm_sched_entity_error(s_entity); if (r) goto error; - if (job->gang_submit) + if (job->gang_submit) { fence = amdgpu_device_switch_gang(ring->adev, job->gang_submit); + if (fence) + return fence; + } + + fence = amdgpu_device_enforce_isolation(ring->adev, ring, job); + if (fence) + return fence; - if (!fence && job->vm && !job->vmid) { + if (job->vm && !job->vmid) { r = amdgpu_vmid_grab(job->vm, ring, job, &fence); if (r) { dev_err(ring->adev->dev, "Error getting VM ID (%d)\n", r); @@ -384,9 +391,10 @@ amdgpu_job_prepare_job(struct drm_sched_job *sched_job, */ if (!fence) job->vm = NULL; + return fence; } - return fence; + return NULL; error: dma_fence_set_error(&job->base.s_fence->finished, r); diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_mes.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_mes.c index 709c11cbeabd885549078871ba4b132cba8d6288..85f774063f9b1078af1c51532e7d944cdcde6174 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_mes.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_mes.c @@ -145,9 +145,8 @@ int amdgpu_mes_init(struct amdgpu_device *adev) adev->mes.vmid_mask_gfxhub = 0xffffff00; for (i = 0; i < AMDGPU_MES_MAX_COMPUTE_PIPES; i++) { - /* use only 1st MEC pipes */ - if (i >= adev->gfx.mec.num_pipe_per_mec) - continue; + if (i >= (adev->gfx.mec.num_pipe_per_mec * adev->gfx.mec.num_mec)) + break; adev->mes.compute_hqd_mask[i] = 0xc; } @@ -155,14 +154,9 @@ int amdgpu_mes_init(struct amdgpu_device *adev) adev->mes.gfx_hqd_mask[i] = i ? 0 : 0xfffffffe; for (i = 0; i < AMDGPU_MES_MAX_SDMA_PIPES; i++) { - if (amdgpu_ip_version(adev, SDMA0_HWIP, 0) < - IP_VERSION(6, 0, 0)) - adev->mes.sdma_hqd_mask[i] = i ? 0 : 0x3fc; - /* zero sdma_hqd_mask for non-existent engine */ - else if (adev->sdma.num_instances == 1) - adev->mes.sdma_hqd_mask[i] = i ? 0 : 0xfc; - else - adev->mes.sdma_hqd_mask[i] = 0xfc; + if (i >= adev->sdma.num_instances) + break; + adev->mes.sdma_hqd_mask[i] = 0xfc; } for (i = 0; i < AMDGPU_MAX_MES_PIPES; i++) { @@ -1336,14 +1330,14 @@ int amdgpu_mes_ctx_map_meta_data(struct amdgpu_device *adev, DRM_ERROR("failed to do vm_bo_update on meta data\n"); goto error_del_bo_va; } - amdgpu_sync_fence(&sync, bo_va->last_pt_update); + amdgpu_sync_fence(&sync, bo_va->last_pt_update, GFP_KERNEL); r = amdgpu_vm_update_pdes(adev, vm, false); if (r) { DRM_ERROR("failed to update pdes on meta data\n"); goto error_del_bo_va; } - amdgpu_sync_fence(&sync, vm->last_update); + amdgpu_sync_fence(&sync, vm->last_update, GFP_KERNEL); amdgpu_sync_wait(&sync, false); drm_exec_fini(&exec); diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_psp.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_psp.c index 0f2eb69ad715bfb8ad2a4ea7a172146981332b7d..d54bb13772622d3d200ed48dc1e23630062e168b 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_psp.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_psp.c @@ -153,6 +153,9 @@ static int psp_init_sriov_microcode(struct psp_context *psp) adev->virt.autoload_ucode_id = AMDGPU_UCODE_ID_CP_MES1_DATA; ret = psp_init_cap_microcode(psp, ucode_prefix); break; + case IP_VERSION(13, 0, 12): + ret = psp_init_ta_microcode(psp, ucode_prefix); + break; default: return -EINVAL; } @@ -1861,6 +1864,7 @@ int psp_ras_initialize(struct psp_context *psp) if (adev->gmc.gmc_funcs->query_mem_partition_mode) ras_cmd->ras_in_message.init_flags.nps_mode = adev->gmc.gmc_funcs->query_mem_partition_mode(adev); + ras_cmd->ras_in_message.init_flags.active_umc_mask = adev->umc.active_mask; ret = psp_ta_load(psp, &psp->ras_context.context); diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c index cfec29835634c10db736a6b83c9e460c5267a2e9..bed2603ae4c4386cfa645daee820fb317022c141 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c @@ -3473,6 +3473,13 @@ int amdgpu_ras_init_badpage_info(struct amdgpu_device *adev) adev, control->bad_channel_bitmap); con->update_channel_flag = false; } + + /* The format action is only applied to new ASICs */ + if (IP_VERSION_MAJ(amdgpu_ip_version(adev, UMC_HWIP, 0)) >= 12 && + control->tbl_hdr.version < RAS_TABLE_VER_V3) + if (!amdgpu_ras_eeprom_reset_table(control)) + if (amdgpu_ras_save_bad_pages(adev, NULL)) + dev_warn(adev->dev, "Failed to format RAS EEPROM data in V3 version!\n"); } return ret; diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ras_eeprom.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ras_eeprom.c index 3597ecd9baca34ceacc1120397d672c8c214a0a4..0ea7cfaf3587dfd771a285dfa27bee86c25756c2 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ras_eeprom.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ras_eeprom.c @@ -161,6 +161,7 @@ static bool __is_ras_eeprom_supported(struct amdgpu_device *adev) case IP_VERSION(13, 0, 10): return true; case IP_VERSION(13, 0, 6): + case IP_VERSION(13, 0, 12): case IP_VERSION(13, 0, 14): return (adev->gmc.is_app_apu) ? false : true; default: @@ -223,6 +224,7 @@ static bool __get_eeprom_i2c_addr(struct amdgpu_device *adev, return true; case IP_VERSION(13, 0, 6): case IP_VERSION(13, 0, 10): + case IP_VERSION(13, 0, 12): case IP_VERSION(13, 0, 14): control->i2c_address = EEPROM_I2C_MADDR_4; return true; @@ -413,9 +415,11 @@ static void amdgpu_ras_set_eeprom_table_version(struct amdgpu_ras_eeprom_control switch (amdgpu_ip_version(adev, UMC_HWIP, 0)) { case IP_VERSION(8, 10, 0): - case IP_VERSION(12, 0, 0): hdr->version = RAS_TABLE_VER_V2_1; return; + case IP_VERSION(12, 0, 0): + hdr->version = RAS_TABLE_VER_V3; + return; default: hdr->version = RAS_TABLE_VER_V1; return; @@ -443,7 +447,7 @@ int amdgpu_ras_eeprom_reset_table(struct amdgpu_ras_eeprom_control *control) hdr->header = RAS_TABLE_HDR_VAL; amdgpu_ras_set_eeprom_table_version(control); - if (hdr->version == RAS_TABLE_VER_V2_1) { + if (hdr->version >= RAS_TABLE_VER_V2_1) { hdr->first_rec_offset = RAS_RECORD_START_V2_1; hdr->tbl_size = RAS_TABLE_HEADER_SIZE + RAS_TABLE_V2_1_INFO_SIZE; @@ -461,7 +465,7 @@ int amdgpu_ras_eeprom_reset_table(struct amdgpu_ras_eeprom_control *control) } csum = __calc_hdr_byte_sum(control); - if (hdr->version == RAS_TABLE_VER_V2_1) + if (hdr->version >= RAS_TABLE_VER_V2_1) csum += __calc_ras_info_byte_sum(control); csum = -csum; hdr->checksum = csum; @@ -757,7 +761,7 @@ amdgpu_ras_eeprom_update_header(struct amdgpu_ras_eeprom_control *control) "Saved bad pages %d reaches threshold value %d\n", control->ras_num_bad_pages, ras->bad_page_cnt_threshold); control->tbl_hdr.header = RAS_TABLE_HDR_BAD; - if (control->tbl_hdr.version == RAS_TABLE_VER_V2_1) { + if (control->tbl_hdr.version >= RAS_TABLE_VER_V2_1) { control->tbl_rai.rma_status = GPU_RETIRED__ECC_REACH_THRESHOLD; control->tbl_rai.health_percent = 0; } @@ -770,7 +774,7 @@ amdgpu_ras_eeprom_update_header(struct amdgpu_ras_eeprom_control *control) amdgpu_dpm_send_rma_reason(adev); } - if (control->tbl_hdr.version == RAS_TABLE_VER_V2_1) + if (control->tbl_hdr.version >= RAS_TABLE_VER_V2_1) control->tbl_hdr.tbl_size = RAS_TABLE_HEADER_SIZE + RAS_TABLE_V2_1_INFO_SIZE + control->ras_num_recs * RAS_TABLE_RECORD_SIZE; @@ -810,7 +814,7 @@ amdgpu_ras_eeprom_update_header(struct amdgpu_ras_eeprom_control *control) * now calculate gpu health percent */ if (amdgpu_bad_page_threshold != 0 && - control->tbl_hdr.version == RAS_TABLE_VER_V2_1 && + control->tbl_hdr.version >= RAS_TABLE_VER_V2_1 && control->ras_num_bad_pages <= ras->bad_page_cnt_threshold) control->tbl_rai.health_percent = ((ras->bad_page_cnt_threshold - control->ras_num_bad_pages) * 100) / @@ -823,7 +827,7 @@ amdgpu_ras_eeprom_update_header(struct amdgpu_ras_eeprom_control *control) csum += *pp; csum += __calc_hdr_byte_sum(control); - if (control->tbl_hdr.version == RAS_TABLE_VER_V2_1) + if (control->tbl_hdr.version >= RAS_TABLE_VER_V2_1) csum += __calc_ras_info_byte_sum(control); /* avoid sign extension when assigning to "checksum" */ csum = -csum; @@ -1040,7 +1044,7 @@ uint32_t amdgpu_ras_eeprom_max_record_count(struct amdgpu_ras_eeprom_control *co /* get available eeprom table version first before eeprom table init */ amdgpu_ras_set_eeprom_table_version(control); - if (control->tbl_hdr.version == RAS_TABLE_VER_V2_1) + if (control->tbl_hdr.version >= RAS_TABLE_VER_V2_1) return RAS_MAX_RECORD_COUNT_V2_1; else return RAS_MAX_RECORD_COUNT; @@ -1285,7 +1289,7 @@ static int __verify_ras_table_checksum(struct amdgpu_ras_eeprom_control *control int buf_size, res; u8 csum, *buf, *pp; - if (control->tbl_hdr.version == RAS_TABLE_VER_V2_1) + if (control->tbl_hdr.version >= RAS_TABLE_VER_V2_1) buf_size = RAS_TABLE_HEADER_SIZE + RAS_TABLE_V2_1_INFO_SIZE + control->ras_num_recs * RAS_TABLE_RECORD_SIZE; @@ -1388,7 +1392,7 @@ int amdgpu_ras_eeprom_init(struct amdgpu_ras_eeprom_control *control) __decode_table_header_from_buf(hdr, buf); - if (hdr->version == RAS_TABLE_VER_V2_1) { + if (hdr->version >= RAS_TABLE_VER_V2_1) { control->ras_num_recs = RAS_NUM_RECS_V2_1(hdr); control->ras_record_offset = RAS_RECORD_START_V2_1; control->ras_max_record_count = RAS_MAX_RECORD_COUNT_V2_1; @@ -1428,7 +1432,7 @@ int amdgpu_ras_eeprom_check(struct amdgpu_ras_eeprom_control *control) DRM_DEBUG_DRIVER("Found existing EEPROM table with %d records", control->ras_num_bad_pages); - if (hdr->version == RAS_TABLE_VER_V2_1) { + if (hdr->version >= RAS_TABLE_VER_V2_1) { res = __read_table_ras_info(control); if (res) return res; @@ -1448,7 +1452,7 @@ int amdgpu_ras_eeprom_check(struct amdgpu_ras_eeprom_control *control) ras->bad_page_cnt_threshold); } else if (hdr->header == RAS_TABLE_HDR_BAD && amdgpu_bad_page_threshold != 0) { - if (hdr->version == RAS_TABLE_VER_V2_1) { + if (hdr->version >= RAS_TABLE_VER_V2_1) { res = __read_table_ras_info(control); if (res) return res; diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ras_eeprom.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_ras_eeprom.h index 13f7eda9a6960517e3e3a383ddd443829c66b628..ec6d7ea37ad071d102e25277e2562f059b808f8e 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ras_eeprom.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ras_eeprom.h @@ -28,6 +28,7 @@ #define RAS_TABLE_VER_V1 0x00010000 #define RAS_TABLE_VER_V2_1 0x00021000 +#define RAS_TABLE_VER_V3 0x00030000 struct amdgpu_device; diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.h index b4fd1e17205e92ce4d9fcb8f71bde306a9ca1adf..bb2b663852237ce97ed23e132660f79b53c4457c 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.h @@ -37,7 +37,7 @@ struct amdgpu_job; struct amdgpu_vm; /* max number of rings */ -#define AMDGPU_MAX_RINGS 133 +#define AMDGPU_MAX_RINGS 149 #define AMDGPU_MAX_HWIP_RINGS 64 #define AMDGPU_MAX_GFX_RINGS 2 #define AMDGPU_MAX_SW_GFX_RINGS 2 diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_sdma.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_sdma.c index 3a4cef896018574318cfd3fecc4406a375694ebd..529c9696c2f32a4057b1d689d0e3b8567ec2b0dd 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_sdma.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_sdma.c @@ -504,6 +504,39 @@ void amdgpu_sdma_sysfs_reset_mask_fini(struct amdgpu_device *adev) } } +struct amdgpu_ring *amdgpu_sdma_get_shared_ring(struct amdgpu_device *adev, struct amdgpu_ring *ring) +{ + if (adev->sdma.has_page_queue && + (ring->me < adev->sdma.num_instances) && + (ring == &adev->sdma.instance[ring->me].ring)) + return &adev->sdma.instance[ring->me].page; + else + return NULL; +} + +/** +* amdgpu_sdma_is_shared_inv_eng - Check if a ring is an SDMA ring that shares a VM invalidation engine +* @adev: Pointer to the AMDGPU device structure +* @ring: Pointer to the ring structure to check +* +* This function checks if the given ring is an SDMA ring that shares a VM invalidation engine. +* It returns true if the ring is such an SDMA ring, false otherwise. +*/ +bool amdgpu_sdma_is_shared_inv_eng(struct amdgpu_device *adev, struct amdgpu_ring *ring) +{ + int i = ring->me; + + if (!adev->sdma.has_page_queue || i >= adev->sdma.num_instances) + return false; + + if (amdgpu_ip_version(adev, GC_HWIP, 0) == IP_VERSION(9, 4, 3) || + amdgpu_ip_version(adev, GC_HWIP, 0) == IP_VERSION(9, 4, 4) || + amdgpu_ip_version(adev, GC_HWIP, 0) == IP_VERSION(9, 5, 0)) + return (ring == &adev->sdma.instance[i].page); + else + return false; +} + /** * amdgpu_sdma_register_on_reset_callbacks - Register SDMA reset callbacks * @funcs: Pointer to the callback structure containing pre_reset and post_reset functions @@ -532,7 +565,6 @@ void amdgpu_sdma_register_on_reset_callbacks(struct amdgpu_device *adev, struct * amdgpu_sdma_reset_engine - Reset a specific SDMA engine * @adev: Pointer to the AMDGPU device * @instance_id: ID of the SDMA engine instance to reset - * @suspend_user_queues: check if suspend user queue. * * This function performs the following steps: * 1. Calls all registered pre_reset callbacks to allow KFD and AMDGPU to save their state. @@ -541,22 +573,16 @@ void amdgpu_sdma_register_on_reset_callbacks(struct amdgpu_device *adev, struct * * Returns: 0 on success, or a negative error code on failure. */ -int amdgpu_sdma_reset_engine(struct amdgpu_device *adev, uint32_t instance_id, bool suspend_user_queues) +int amdgpu_sdma_reset_engine(struct amdgpu_device *adev, uint32_t instance_id) { struct sdma_on_reset_funcs *funcs; int ret = 0; - struct amdgpu_sdma_instance *sdma_instance = &adev->sdma.instance[instance_id];; + struct amdgpu_sdma_instance *sdma_instance = &adev->sdma.instance[instance_id]; struct amdgpu_ring *gfx_ring = &sdma_instance->ring; struct amdgpu_ring *page_ring = &sdma_instance->page; bool gfx_sched_stopped = false, page_sched_stopped = false; - /* Suspend KFD if suspend_user_queues is true. - * prevent the destruction of in-flight healthy user queue packets and - * avoid race conditions between KFD and KGD during the reset process. - */ - if (suspend_user_queues) - amdgpu_amdkfd_suspend(adev, false); - + mutex_lock(&sdma_instance->engine_reset_mutex); /* Stop the scheduler's work queue for the GFX and page rings if they are running. * This ensures that no new tasks are submitted to the queues while * the reset is in progress. @@ -609,7 +635,7 @@ int amdgpu_sdma_reset_engine(struct amdgpu_device *adev, uint32_t instance_id, b * if they were stopped by this function. This allows new tasks * to be submitted to the queues after the reset is complete. */ - if (ret) { + if (!ret) { if (gfx_sched_stopped && amdgpu_ring_sched_ready(gfx_ring)) { drm_sched_wqueue_start(&gfx_ring->sched); } @@ -617,9 +643,7 @@ int amdgpu_sdma_reset_engine(struct amdgpu_device *adev, uint32_t instance_id, b drm_sched_wqueue_start(&page_ring->sched); } } - - if (suspend_user_queues) - amdgpu_amdkfd_resume(adev, false); + mutex_unlock(&sdma_instance->engine_reset_mutex); return ret; } diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_sdma.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_sdma.h index 9651693200655095651e926fce0ea602629d29a9..47d56fd0589fc1c4677bf2578bf90cba09cd33a3 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_sdma.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_sdma.h @@ -64,6 +64,11 @@ struct amdgpu_sdma_instance { struct amdgpu_bo *sdma_fw_obj; uint64_t sdma_fw_gpu_addr; uint32_t *sdma_fw_ptr; + struct mutex engine_reset_mutex; + /* track guilty state of GFX and PAGE queues */ + bool gfx_guilty; + bool page_guilty; + }; enum amdgpu_sdma_ras_memory_id { @@ -126,9 +131,6 @@ struct amdgpu_sdma { uint32_t *ip_dump; uint32_t supported_reset; struct list_head reset_callback_list; - /* track guilty state of GFX and PAGE queues */ - bool gfx_guilty; - bool page_guilty; }; /* @@ -169,7 +171,7 @@ struct amdgpu_buffer_funcs { }; void amdgpu_sdma_register_on_reset_callbacks(struct amdgpu_device *adev, struct sdma_on_reset_funcs *funcs); -int amdgpu_sdma_reset_engine(struct amdgpu_device *adev, uint32_t instance_id, bool suspend_user_queues); +int amdgpu_sdma_reset_engine(struct amdgpu_device *adev, uint32_t instance_id); #define amdgpu_emit_copy_buffer(adev, ib, s, d, b, t) (adev)->mman.buffer_funcs->emit_copy_buffer((ib), (s), (d), (b), (t)) #define amdgpu_emit_fill_buffer(adev, ib, s, d, b) (adev)->mman.buffer_funcs->emit_fill_buffer((ib), (s), (d), (b)) @@ -194,4 +196,7 @@ int amdgpu_sdma_ras_sw_init(struct amdgpu_device *adev); void amdgpu_debugfs_sdma_sched_mask_init(struct amdgpu_device *adev); int amdgpu_sdma_sysfs_reset_mask_init(struct amdgpu_device *adev); void amdgpu_sdma_sysfs_reset_mask_fini(struct amdgpu_device *adev); +bool amdgpu_sdma_is_shared_inv_eng(struct amdgpu_device *adev, struct amdgpu_ring *ring); +struct amdgpu_ring *amdgpu_sdma_get_shared_ring(struct amdgpu_device *adev, + struct amdgpu_ring *ring); #endif diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_sync.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_sync.c index c586ab4c911bfeb0681cba5ef23aaea5cc8d46cf..5576ed0b508f056ad03357f4a09265f8b57aaf8d 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_sync.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_sync.c @@ -135,11 +135,16 @@ static bool amdgpu_sync_add_later(struct amdgpu_sync *sync, struct dma_fence *f) struct amdgpu_sync_entry *e; hash_for_each_possible(sync->fences, e, node, f->context) { - if (unlikely(e->fence->context != f->context)) - continue; + if (dma_fence_is_signaled(e->fence)) { + dma_fence_put(e->fence); + e->fence = dma_fence_get(f); + return true; + } - amdgpu_sync_keep_later(&e->fence, f); - return true; + if (likely(e->fence->context == f->context)) { + amdgpu_sync_keep_later(&e->fence, f); + return true; + } } return false; } @@ -149,10 +154,12 @@ static bool amdgpu_sync_add_later(struct amdgpu_sync *sync, struct dma_fence *f) * * @sync: sync object to add fence to * @f: fence to sync to + * @flags: memory allocation flags to use when allocating sync entry * * Add the fence to the sync object. */ -int amdgpu_sync_fence(struct amdgpu_sync *sync, struct dma_fence *f) +int amdgpu_sync_fence(struct amdgpu_sync *sync, struct dma_fence *f, + gfp_t flags) { struct amdgpu_sync_entry *e; @@ -162,7 +169,7 @@ int amdgpu_sync_fence(struct amdgpu_sync *sync, struct dma_fence *f) if (amdgpu_sync_add_later(sync, f)) return 0; - e = kmem_cache_alloc(amdgpu_sync_slab, GFP_KERNEL); + e = kmem_cache_alloc(amdgpu_sync_slab, flags); if (!e) return -ENOMEM; @@ -249,7 +256,7 @@ int amdgpu_sync_resv(struct amdgpu_device *adev, struct amdgpu_sync *sync, struct dma_fence *tmp = dma_fence_chain_contained(f); if (amdgpu_sync_test_fence(adev, mode, owner, tmp)) { - r = amdgpu_sync_fence(sync, f); + r = amdgpu_sync_fence(sync, f, GFP_KERNEL); dma_fence_put(f); if (r) return r; @@ -281,7 +288,7 @@ int amdgpu_sync_kfd(struct amdgpu_sync *sync, struct dma_resv *resv) if (fence_owner != AMDGPU_FENCE_OWNER_KFD) continue; - r = amdgpu_sync_fence(sync, f); + r = amdgpu_sync_fence(sync, f, GFP_KERNEL); if (r) break; } @@ -388,7 +395,7 @@ int amdgpu_sync_clone(struct amdgpu_sync *source, struct amdgpu_sync *clone) hash_for_each_safe(source->fences, i, tmp, e, node) { f = e->fence; if (!dma_fence_is_signaled(f)) { - r = amdgpu_sync_fence(clone, f); + r = amdgpu_sync_fence(clone, f, GFP_KERNEL); if (r) return r; } else { @@ -399,6 +406,25 @@ int amdgpu_sync_clone(struct amdgpu_sync *source, struct amdgpu_sync *clone) return 0; } +/** + * amdgpu_sync_move - move all fences from src to dst + * + * @src: source of the fences, empty after function + * @dst: destination for the fences + * + * Moves all fences from source to destination. All fences in destination are + * freed and source is empty after the function call. + */ +void amdgpu_sync_move(struct amdgpu_sync *src, struct amdgpu_sync *dst) +{ + unsigned int i; + + amdgpu_sync_free(dst); + + for (i = 0; i < HASH_SIZE(src->fences); ++i) + hlist_move_list(&src->fences[i], &dst->fences[i]); +} + /** * amdgpu_sync_push_to_job - push fences into job * @sync: sync object to get the fences from diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_sync.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_sync.h index e3272dce798d79843e02a98b1c4100a6fda5074e..51eb4382c91ebd216b3be4ae80d8923ff9c5c8db 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_sync.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_sync.h @@ -47,7 +47,8 @@ struct amdgpu_sync { }; void amdgpu_sync_create(struct amdgpu_sync *sync); -int amdgpu_sync_fence(struct amdgpu_sync *sync, struct dma_fence *f); +int amdgpu_sync_fence(struct amdgpu_sync *sync, struct dma_fence *f, + gfp_t flags); int amdgpu_sync_resv(struct amdgpu_device *adev, struct amdgpu_sync *sync, struct dma_resv *resv, enum amdgpu_sync_mode mode, void *owner); @@ -56,6 +57,7 @@ struct dma_fence *amdgpu_sync_peek_fence(struct amdgpu_sync *sync, struct amdgpu_ring *ring); struct dma_fence *amdgpu_sync_get_fence(struct amdgpu_sync *sync); int amdgpu_sync_clone(struct amdgpu_sync *source, struct amdgpu_sync *clone); +void amdgpu_sync_move(struct amdgpu_sync *src, struct amdgpu_sync *dst); int amdgpu_sync_push_to_job(struct amdgpu_sync *sync, struct amdgpu_job *job); int amdgpu_sync_wait(struct amdgpu_sync *sync, bool intr); void amdgpu_sync_free(struct amdgpu_sync *sync); diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_trace.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_trace.h index 383fce40d4dd7e549f3ef0179afef6b87d1f25d3..11dd2e0f797964bd4661e75adc6abed6c5e61dd3 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_trace.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_trace.h @@ -457,6 +457,38 @@ DEFINE_EVENT(amdgpu_pasid, amdgpu_pasid_freed, TP_ARGS(pasid) ); +TRACE_EVENT(amdgpu_isolation, + TP_PROTO(void *prev, void *next), + TP_ARGS(prev, next), + TP_STRUCT__entry( + __field(void *, prev) + __field(void *, next) + ), + + TP_fast_assign( + __entry->prev = prev; + __entry->next = next; + ), + TP_printk("prev=%p, next=%p", + __entry->prev, + __entry->next) +); + +TRACE_EVENT(amdgpu_cleaner_shader, + TP_PROTO(struct amdgpu_ring *ring, struct dma_fence *fence), + TP_ARGS(ring, fence), + TP_STRUCT__entry( + __string(ring, ring->name) + __field(u64, seqno) + ), + + TP_fast_assign( + __assign_str(ring); + __entry->seqno = fence->seqno; + ), + TP_printk("ring=%s, seqno=%Lu", __get_str(ring), __entry->seqno) +); + TRACE_EVENT(amdgpu_bo_list_set, TP_PROTO(struct amdgpu_bo_list *list, struct amdgpu_bo *bo), TP_ARGS(list, bo), diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vcn.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_vcn.c index 8d8b39e6d197a1159ea7923f74eba5f01d061dae..1991dd3d1056a16885ab4ed4b95066759d9fd790 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vcn.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vcn.c @@ -438,10 +438,15 @@ static void amdgpu_vcn_idle_work_handler(struct work_struct *work) if (!fences && !atomic_read(&vcn_inst->total_submission_cnt)) { vcn_inst->set_pg_state(vcn_inst, AMD_PG_STATE_GATE); - r = amdgpu_dpm_switch_power_profile(adev, PP_SMC_POWER_PROFILE_VIDEO, - false); - if (r) - dev_warn(adev->dev, "(%d) failed to disable video power profile mode\n", r); + mutex_lock(&adev->vcn.workload_profile_mutex); + if (adev->vcn.workload_profile_active) { + r = amdgpu_dpm_switch_power_profile(adev, PP_SMC_POWER_PROFILE_VIDEO, + false); + if (r) + dev_warn(adev->dev, "(%d) failed to disable video power profile mode\n", r); + adev->vcn.workload_profile_active = false; + } + mutex_unlock(&adev->vcn.workload_profile_mutex); } else { schedule_delayed_work(&vcn_inst->idle_work, VCN_IDLE_TIMEOUT); } @@ -455,13 +460,26 @@ void amdgpu_vcn_ring_begin_use(struct amdgpu_ring *ring) atomic_inc(&vcn_inst->total_submission_cnt); - if (!cancel_delayed_work_sync(&vcn_inst->idle_work)) { + cancel_delayed_work_sync(&vcn_inst->idle_work); + + /* We can safely return early here because we've cancelled the + * the delayed work so there is no one else to set it to false + * and we don't care if someone else sets it to true. + */ + if (adev->vcn.workload_profile_active) + goto pg_lock; + + mutex_lock(&adev->vcn.workload_profile_mutex); + if (!adev->vcn.workload_profile_active) { r = amdgpu_dpm_switch_power_profile(adev, PP_SMC_POWER_PROFILE_VIDEO, - true); + true); if (r) dev_warn(adev->dev, "(%d) failed to switch to video power profile mode\n", r); + adev->vcn.workload_profile_active = true; } + mutex_unlock(&adev->vcn.workload_profile_mutex); +pg_lock: mutex_lock(&vcn_inst->vcn_pg_lock); vcn_inst->set_pg_state(vcn_inst, AMD_PG_STATE_UNGATE); diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vcn.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_vcn.h index 26c9c2d90f4554d51b2060782dcf8b644d5201cc..cdcdae7f71ce979e90dfe1a421bce3acb04b29ab 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vcn.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vcn.h @@ -358,6 +358,9 @@ struct amdgpu_vcn { bool per_inst_fw; unsigned fw_version; + + bool workload_profile_active; + struct mutex workload_profile_mutex; }; struct amdgpu_fw_shared_rb_ptrs_struct { diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c index 1c0fd95c3820471888231addf80eb2c29c1e9094..ce52b4d75e9439abc1a8fd309d91111ce398eb5d 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c @@ -754,6 +754,7 @@ int amdgpu_vm_flush(struct amdgpu_ring *ring, struct amdgpu_job *job, bool need_pipe_sync) { struct amdgpu_device *adev = ring->adev; + struct amdgpu_isolation *isolation = &adev->isolation[ring->xcp_id]; unsigned vmhub = ring->vm_hub; struct amdgpu_vmid_mgr *id_mgr = &adev->vm_manager.id_mgr[vmhub]; struct amdgpu_vmid *id = &id_mgr->ids[job->vmid]; @@ -761,8 +762,9 @@ int amdgpu_vm_flush(struct amdgpu_ring *ring, struct amdgpu_job *job, bool gds_switch_needed = ring->funcs->emit_gds_switch && job->gds_switch_needed; bool vm_flush_needed = job->vm_needs_flush; - struct dma_fence *fence = NULL; + bool cleaner_shader_needed = false; bool pasid_mapping_needed = false; + struct dma_fence *fence = NULL; unsigned int patch; int r; @@ -785,8 +787,12 @@ int amdgpu_vm_flush(struct amdgpu_ring *ring, struct amdgpu_job *job, pasid_mapping_needed &= adev->gmc.gmc_funcs->emit_pasid_mapping && ring->funcs->emit_wreg; + cleaner_shader_needed = adev->gfx.enable_cleaner_shader && + ring->funcs->emit_cleaner_shader && job->base.s_fence && + &job->base.s_fence->scheduled == isolation->spearhead; + if (!vm_flush_needed && !gds_switch_needed && !need_pipe_sync && - !(job->enforce_isolation && !job->vmid)) + !cleaner_shader_needed) return 0; amdgpu_ring_ib_begin(ring); @@ -797,9 +803,7 @@ int amdgpu_vm_flush(struct amdgpu_ring *ring, struct amdgpu_job *job, if (need_pipe_sync) amdgpu_ring_emit_pipeline_sync(ring); - if (adev->gfx.enable_cleaner_shader && - ring->funcs->emit_cleaner_shader && - job->enforce_isolation) + if (cleaner_shader_needed) ring->funcs->emit_cleaner_shader(ring); if (vm_flush_needed) { @@ -821,7 +825,7 @@ int amdgpu_vm_flush(struct amdgpu_ring *ring, struct amdgpu_job *job, job->oa_size); } - if (vm_flush_needed || pasid_mapping_needed) { + if (vm_flush_needed || pasid_mapping_needed || cleaner_shader_needed) { r = amdgpu_fence_emit(ring, &fence, NULL, 0); if (r) return r; @@ -843,6 +847,18 @@ int amdgpu_vm_flush(struct amdgpu_ring *ring, struct amdgpu_job *job, id->pasid_mapping = dma_fence_get(fence); mutex_unlock(&id_mgr->lock); } + + /* + * Make sure that all other submissions wait for the cleaner shader to + * finish before we push them to the HW. + */ + if (cleaner_shader_needed) { + trace_amdgpu_cleaner_shader(ring, fence); + mutex_lock(&adev->enforce_isolation_mutex); + dma_fence_put(isolation->spearhead); + isolation->spearhead = dma_fence_get(fence); + mutex_unlock(&adev->enforce_isolation_mutex); + } dma_fence_put(fence); amdgpu_ring_patch_cond_exec(ring, patch); diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v11_0.c b/drivers/gpu/drm/amd/amdgpu/gfx_v11_0.c index b9f6d89dafb2ec929814092cfb34d1a98b2db733..d8772cd6db6334f2c973153308ec3b6d84530a25 100644 --- a/drivers/gpu/drm/amd/amdgpu/gfx_v11_0.c +++ b/drivers/gpu/drm/amd/amdgpu/gfx_v11_0.c @@ -1626,6 +1626,20 @@ static int gfx_v11_0_sw_init(struct amdgpu_ip_block *ip_block) } } break; + case IP_VERSION(11, 5, 0): + case IP_VERSION(11, 5, 1): + adev->gfx.cleaner_shader_ptr = gfx_11_0_3_cleaner_shader_hex; + adev->gfx.cleaner_shader_size = sizeof(gfx_11_0_3_cleaner_shader_hex); + if (adev->gfx.mec_fw_version >= 26 && + adev->mes.fw_version[0] >= 114) { + adev->gfx.enable_cleaner_shader = true; + r = amdgpu_gfx_cleaner_shader_sw_init(adev, adev->gfx.cleaner_shader_size); + if (r) { + adev->gfx.enable_cleaner_shader = false; + dev_err(adev->dev, "Failed to initialize cleaner shader\n"); + } + } + break; default: adev->gfx.enable_cleaner_shader = false; break; diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v12_0.c b/drivers/gpu/drm/amd/amdgpu/gfx_v12_0.c index da364e04f09c9ecf78e22e6ed8a1892f928cfc25..dceb5ad38862c35708cc1d51e311daa5dbba9379 100644 --- a/drivers/gpu/drm/amd/amdgpu/gfx_v12_0.c +++ b/drivers/gpu/drm/amd/amdgpu/gfx_v12_0.c @@ -2637,7 +2637,6 @@ static int gfx_v12_0_cp_gfx_resume(struct amdgpu_device *adev) u32 tmp; u32 rb_bufsz; u64 rb_addr, rptr_addr, wptr_gpu_addr; - u32 i; /* Set the write pointer delay */ WREG32_SOC15(GC, 0, regCP_RB_WPTR_DELAY, 0); @@ -2692,12 +2691,6 @@ static int gfx_v12_0_cp_gfx_resume(struct amdgpu_device *adev) /* start the ring */ gfx_v12_0_cp_gfx_start(adev); - - for (i = 0; i < adev->gfx.num_gfx_rings; i++) { - ring = &adev->gfx.gfx_ring[i]; - ring->sched.ready = true; - } - return 0; } @@ -3037,10 +3030,6 @@ static int gfx_v12_0_cp_async_gfx_ring_resume(struct amdgpu_device *adev) if (r) goto done; - for (i = 0; i < adev->gfx.num_gfx_rings; i++) { - ring = &adev->gfx.gfx_ring[i]; - ring->sched.ready = true; - } done: return r; } diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v9_4_3.c b/drivers/gpu/drm/amd/amdgpu/gfx_v9_4_3.c index efe45e4edfd70b0c9725b05af9947b7a8ec45bbb..736398b0d16d9200861774d4c25c2444bd492fa2 100644 --- a/drivers/gpu/drm/amd/amdgpu/gfx_v9_4_3.c +++ b/drivers/gpu/drm/amd/amdgpu/gfx_v9_4_3.c @@ -867,9 +867,8 @@ static int gfx_v9_4_3_aca_bank_parser(struct aca_handle *handle, switch (type) { case ACA_SMU_TYPE_UE: - bank->aca_err_type = ACA_ERROR_TYPE_UE; - ret = aca_error_cache_log_bank_error(handle, &info, - ACA_ERROR_TYPE_UE, 1ULL); + bank->aca_err_type = ACA_BANK_ERR_UE_DE_DECODE(bank); + ret = aca_error_cache_log_bank_error(handle, &info, bank->aca_err_type, 1ULL); break; case ACA_SMU_TYPE_CE: bank->aca_err_type = ACA_BANK_ERR_CE_DE_DECODE(bank); diff --git a/drivers/gpu/drm/amd/amdgpu/sdma_v4_4_2.c b/drivers/gpu/drm/amd/amdgpu/sdma_v4_4_2.c index fd34dc13808110cf010d1fbfe29eff81d08742ec..dc94d58d33a6bfd333f2edc7d13874fecf5ea013 100644 --- a/drivers/gpu/drm/amd/amdgpu/sdma_v4_4_2.c +++ b/drivers/gpu/drm/amd/amdgpu/sdma_v4_4_2.c @@ -31,6 +31,7 @@ #include "amdgpu_ucode.h" #include "amdgpu_trace.h" #include "amdgpu_reset.h" +#include "gc/gc_9_0_sh_mask.h" #include "sdma/sdma_4_4_2_offset.h" #include "sdma/sdma_4_4_2_sh_mask.h" @@ -672,12 +673,11 @@ static uint32_t sdma_v4_4_2_rb_cntl(struct amdgpu_ring *ring, uint32_t rb_cntl) * @adev: amdgpu_device pointer * @i: instance to resume * @restore: used to restore wptr when restart - * @guilty: boolean indicating whether this queue is the guilty one (caused the timeout/error) * * Set up the gfx DMA ring buffers and enable them. * Returns 0 for success, error for failure. */ -static void sdma_v4_4_2_gfx_resume(struct amdgpu_device *adev, unsigned int i, bool restore, bool guilty) +static void sdma_v4_4_2_gfx_resume(struct amdgpu_device *adev, unsigned int i, bool restore) { struct amdgpu_ring *ring = &adev->sdma.instance[i].ring; u32 rb_cntl, ib_cntl, wptr_poll_cntl; @@ -714,7 +714,7 @@ static void sdma_v4_4_2_gfx_resume(struct amdgpu_device *adev, unsigned int i, b /* For the guilty queue, set RPTR to the current wptr to skip bad commands, * It is not a guilty queue, restore cache_rptr and continue execution. */ - if (guilty) + if (adev->sdma.instance[i].gfx_guilty) rwptr = ring->wptr; else rwptr = ring->cached_rptr; @@ -779,12 +779,11 @@ static void sdma_v4_4_2_gfx_resume(struct amdgpu_device *adev, unsigned int i, b * @adev: amdgpu_device pointer * @i: instance to resume * @restore: boolean to say restore needed or not - * @guilty: boolean indicating whether this queue is the guilty one (caused the timeout/error) * * Set up the page DMA ring buffers and enable them. * Returns 0 for success, error for failure. */ -static void sdma_v4_4_2_page_resume(struct amdgpu_device *adev, unsigned int i, bool restore, bool guilty) +static void sdma_v4_4_2_page_resume(struct amdgpu_device *adev, unsigned int i, bool restore) { struct amdgpu_ring *ring = &adev->sdma.instance[i].page; u32 rb_cntl, ib_cntl, wptr_poll_cntl; @@ -803,7 +802,7 @@ static void sdma_v4_4_2_page_resume(struct amdgpu_device *adev, unsigned int i, /* For the guilty queue, set RPTR to the current wptr to skip bad commands, * It is not a guilty queue, restore cache_rptr and continue execution. */ - if (guilty) + if (adev->sdma.instance[i].page_guilty) rwptr = ring->wptr; else rwptr = ring->cached_rptr; @@ -989,9 +988,9 @@ static int sdma_v4_4_2_inst_start(struct amdgpu_device *adev, uint32_t temp; WREG32_SDMA(i, regSDMA_SEM_WAIT_FAIL_TIMER_CNTL, 0); - sdma_v4_4_2_gfx_resume(adev, i, restore, adev->sdma.gfx_guilty); + sdma_v4_4_2_gfx_resume(adev, i, restore); if (adev->sdma.has_page_queue) - sdma_v4_4_2_page_resume(adev, i, restore, adev->sdma.page_guilty); + sdma_v4_4_2_page_resume(adev, i, restore); /* set utc l1 enable flag always to 1 */ temp = RREG32_SDMA(i, regSDMA_CNTL); @@ -1292,21 +1291,71 @@ static void sdma_v4_4_2_ring_emit_pipeline_sync(struct amdgpu_ring *ring) seq, 0xffffffff, 4); } - -/** - * sdma_v4_4_2_ring_emit_vm_flush - vm flush using sDMA +/* + * sdma_v4_4_2_get_invalidate_req - Construct the VM_INVALIDATE_ENG0_REQ register value + * @vmid: The VMID to invalidate + * @flush_type: The type of flush (0 = legacy, 1 = lightweight, 2 = heavyweight) * - * @ring: amdgpu_ring pointer - * @vmid: vmid number to use - * @pd_addr: address + * This function constructs the VM_INVALIDATE_ENG0_REQ register value for the specified VMID + * and flush type. It ensures that all relevant page table cache levels (L1 PTEs, L2 PTEs, and + * L2 PDEs) are invalidated. + */ +static uint32_t sdma_v4_4_2_get_invalidate_req(unsigned int vmid, + uint32_t flush_type) +{ + u32 req = 0; + + req = REG_SET_FIELD(req, VM_INVALIDATE_ENG0_REQ, + PER_VMID_INVALIDATE_REQ, 1 << vmid); + req = REG_SET_FIELD(req, VM_INVALIDATE_ENG0_REQ, FLUSH_TYPE, flush_type); + req = REG_SET_FIELD(req, VM_INVALIDATE_ENG0_REQ, INVALIDATE_L2_PTES, 1); + req = REG_SET_FIELD(req, VM_INVALIDATE_ENG0_REQ, INVALIDATE_L2_PDE0, 1); + req = REG_SET_FIELD(req, VM_INVALIDATE_ENG0_REQ, INVALIDATE_L2_PDE1, 1); + req = REG_SET_FIELD(req, VM_INVALIDATE_ENG0_REQ, INVALIDATE_L2_PDE2, 1); + req = REG_SET_FIELD(req, VM_INVALIDATE_ENG0_REQ, INVALIDATE_L1_PTES, 1); + req = REG_SET_FIELD(req, VM_INVALIDATE_ENG0_REQ, + CLEAR_PROTECTION_FAULT_STATUS_ADDR, 0); + + return req; +} + +/* + * sdma_v4_4_2_ring_emit_vm_flush - Emit VM flush commands for SDMA + * @ring: The SDMA ring + * @vmid: The VMID to flush + * @pd_addr: The page directory address * - * Update the page table base and flush the VM TLB - * using sDMA. + * This function emits the necessary register writes and waits to perform a VM flush for the + * specified VMID. It updates the PTB address registers and issues a VM invalidation request + * using the specified VM invalidation engine. */ static void sdma_v4_4_2_ring_emit_vm_flush(struct amdgpu_ring *ring, - unsigned vmid, uint64_t pd_addr) + unsigned int vmid, uint64_t pd_addr) { - amdgpu_gmc_emit_flush_gpu_tlb(ring, vmid, pd_addr); + struct amdgpu_device *adev = ring->adev; + uint32_t req = sdma_v4_4_2_get_invalidate_req(vmid, 0); + unsigned int eng = ring->vm_inv_eng; + struct amdgpu_vmhub *hub = &adev->vmhub[ring->vm_hub]; + + amdgpu_ring_emit_wreg(ring, hub->ctx0_ptb_addr_lo32 + + (hub->ctx_addr_distance * vmid), + lower_32_bits(pd_addr)); + + amdgpu_ring_emit_wreg(ring, hub->ctx0_ptb_addr_hi32 + + (hub->ctx_addr_distance * vmid), + upper_32_bits(pd_addr)); + /* + * Construct and emit the VM invalidation packet + */ + amdgpu_ring_write(ring, + SDMA_PKT_VM_INVALIDATION_HEADER_OP(SDMA_OP_VM_INVALIDATE) | + SDMA_PKT_VM_INVALIDATION_HEADER_SUB_OP(SDMA_SUBOP_VM_INVALIDATE) | + SDMA_PKT_VM_INVALIDATION_HEADER_XCC0_ENG_ID(0x1f) | + SDMA_PKT_VM_INVALIDATION_HEADER_XCC1_ENG_ID(0x1f) | + SDMA_PKT_VM_INVALIDATION_HEADER_MMHUB_ENG_ID(eng)); + amdgpu_ring_write(ring, SDMA_PKT_VM_INVALIDATION_INVALIDATEREQ_INVALIDATEREQ(req)); + amdgpu_ring_write(ring, 0); + amdgpu_ring_write(ring, SDMA_PKT_VM_INVALIDATION_ADDRESSRANGEHI_INVALIDATEACK(BIT(vmid))); } static void sdma_v4_4_2_ring_emit_wreg(struct amdgpu_ring *ring, @@ -1445,6 +1494,11 @@ static int sdma_v4_4_2_sw_init(struct amdgpu_ip_block *ip_block) } for (i = 0; i < adev->sdma.num_instances; i++) { + mutex_init(&adev->sdma.instance[i].engine_reset_mutex); + /* Initialize guilty flags for GFX and PAGE queues */ + adev->sdma.instance[i].gfx_guilty = false; + adev->sdma.instance[i].page_guilty = false; + ring = &adev->sdma.instance[i].ring; ring->ring_obj = NULL; ring->use_doorbell = true; @@ -1506,9 +1560,6 @@ static int sdma_v4_4_2_sw_init(struct amdgpu_ip_block *ip_block) r = amdgpu_sdma_sysfs_reset_mask_init(adev); if (r) return r; - /* Initialize guilty flags for GFX and PAGE queues */ - adev->sdma.gfx_guilty = false; - adev->sdma.page_guilty = false; return r; } @@ -1666,7 +1717,16 @@ static int sdma_v4_4_2_reset_queue(struct amdgpu_ring *ring, unsigned int vmid) { struct amdgpu_device *adev = ring->adev; u32 id = GET_INST(SDMA0, ring->me); - return amdgpu_sdma_reset_engine(adev, id, true); + int r; + + if (!(adev->sdma.supported_reset & AMDGPU_RESET_TYPE_PER_QUEUE)) + return -EOPNOTSUPP; + + amdgpu_amdkfd_suspend(adev, false); + r = amdgpu_sdma_reset_engine(adev, id); + amdgpu_amdkfd_resume(adev, false); + + return r; } static int sdma_v4_4_2_stop_queue(struct amdgpu_device *adev, uint32_t instance_id) @@ -1679,9 +1739,11 @@ static int sdma_v4_4_2_stop_queue(struct amdgpu_device *adev, uint32_t instance_ return -EINVAL; /* Check if this queue is the guilty one */ - adev->sdma.gfx_guilty = sdma_v4_4_2_is_queue_selected(adev, instance_id, false); + adev->sdma.instance[instance_id].gfx_guilty = + sdma_v4_4_2_is_queue_selected(adev, instance_id, false); if (adev->sdma.has_page_queue) - adev->sdma.page_guilty = sdma_v4_4_2_is_queue_selected(adev, instance_id, true); + adev->sdma.instance[instance_id].page_guilty = + sdma_v4_4_2_is_queue_selected(adev, instance_id, true); /* Cache the rptr before reset, after the reset, * all of the registers will be reset to 0 @@ -2115,8 +2177,7 @@ static const struct amdgpu_ring_funcs sdma_v4_4_2_ring_funcs = { 3 + /* hdp invalidate */ 6 + /* sdma_v4_4_2_ring_emit_pipeline_sync */ /* sdma_v4_4_2_ring_emit_vm_flush */ - SOC15_FLUSH_GPU_TLB_NUM_WREG * 3 + - SOC15_FLUSH_GPU_TLB_NUM_REG_WAIT * 6 + + 4 + 2 * 3 + 10 + 10 + 10, /* sdma_v4_4_2_ring_emit_fence x3 for user fence, vm fence */ .emit_ib_size = 7 + 6, /* sdma_v4_4_2_ring_emit_ib */ .emit_ib = sdma_v4_4_2_ring_emit_ib, @@ -2148,8 +2209,7 @@ static const struct amdgpu_ring_funcs sdma_v4_4_2_page_ring_funcs = { 3 + /* hdp invalidate */ 6 + /* sdma_v4_4_2_ring_emit_pipeline_sync */ /* sdma_v4_4_2_ring_emit_vm_flush */ - SOC15_FLUSH_GPU_TLB_NUM_WREG * 3 + - SOC15_FLUSH_GPU_TLB_NUM_REG_WAIT * 6 + + 4 + 2 * 3 + 10 + 10 + 10, /* sdma_v4_4_2_ring_emit_fence x3 for user fence, vm fence */ .emit_ib_size = 7 + 6, /* sdma_v4_4_2_ring_emit_ib */ .emit_ib = sdma_v4_4_2_ring_emit_ib, @@ -2347,6 +2407,9 @@ static void sdma_v4_4_2_set_vm_pte_funcs(struct amdgpu_device *adev) */ static void sdma_v4_4_2_update_reset_mask(struct amdgpu_device *adev) { + /* per queue reset not supported for SRIOV */ + if (amdgpu_sriov_vf(adev)) + return; /* * the user queue relies on MEC fw and pmfw when the sdma queue do reset. diff --git a/drivers/gpu/drm/amd/amdgpu/ta_ras_if.h b/drivers/gpu/drm/amd/amdgpu/ta_ras_if.h index 64891f0993666e62b26aa95f5412ffc217ba45c6..a3b5fda224328bfbf28999a19c0322bff9e4d860 100644 --- a/drivers/gpu/drm/amd/amdgpu/ta_ras_if.h +++ b/drivers/gpu/drm/amd/amdgpu/ta_ras_if.h @@ -151,6 +151,7 @@ struct ta_ras_init_flags { uint16_t xcc_mask; uint8_t channel_dis_num; uint8_t nps_mode; + uint32_t active_umc_mask; }; struct ta_ras_mca_addr { diff --git a/drivers/gpu/drm/amd/amdgpu/vcn_v2_5.c b/drivers/gpu/drm/amd/amdgpu/vcn_v2_5.c index ff03436698a4f13fec9b74e4830188fd097cae8d..d716510b8dd686c5930dd3d2e704d7f363f6626f 100644 --- a/drivers/gpu/drm/amd/amdgpu/vcn_v2_5.c +++ b/drivers/gpu/drm/amd/amdgpu/vcn_v2_5.c @@ -147,10 +147,15 @@ static void vcn_v2_5_idle_work_handler(struct work_struct *work) if (!fences && !atomic_read(&adev->vcn.inst[0].total_submission_cnt)) { amdgpu_device_ip_set_powergating_state(adev, AMD_IP_BLOCK_TYPE_VCN, AMD_PG_STATE_GATE); - r = amdgpu_dpm_switch_power_profile(adev, PP_SMC_POWER_PROFILE_VIDEO, - false); - if (r) - dev_warn(adev->dev, "(%d) failed to disable video power profile mode\n", r); + mutex_lock(&adev->vcn.workload_profile_mutex); + if (adev->vcn.workload_profile_active) { + r = amdgpu_dpm_switch_power_profile(adev, PP_SMC_POWER_PROFILE_VIDEO, + false); + if (r) + dev_warn(adev->dev, "(%d) failed to disable video power profile mode\n", r); + adev->vcn.workload_profile_active = false; + } + mutex_unlock(&adev->vcn.workload_profile_mutex); } else { schedule_delayed_work(&adev->vcn.inst[0].idle_work, VCN_IDLE_TIMEOUT); } @@ -164,13 +169,26 @@ static void vcn_v2_5_ring_begin_use(struct amdgpu_ring *ring) atomic_inc(&adev->vcn.inst[0].total_submission_cnt); - if (!cancel_delayed_work_sync(&adev->vcn.inst[0].idle_work)) { + cancel_delayed_work_sync(&adev->vcn.inst[0].idle_work); + + /* We can safely return early here because we've cancelled the + * the delayed work so there is no one else to set it to false + * and we don't care if someone else sets it to true. + */ + if (adev->vcn.workload_profile_active) + goto pg_lock; + + mutex_lock(&adev->vcn.workload_profile_mutex); + if (!adev->vcn.workload_profile_active) { r = amdgpu_dpm_switch_power_profile(adev, PP_SMC_POWER_PROFILE_VIDEO, true); if (r) dev_warn(adev->dev, "(%d) failed to switch to video power profile mode\n", r); + adev->vcn.workload_profile_active = true; } + mutex_unlock(&adev->vcn.workload_profile_mutex); +pg_lock: mutex_lock(&adev->vcn.inst[0].vcn_pg_lock); amdgpu_device_ip_set_powergating_state(adev, AMD_IP_BLOCK_TYPE_VCN, AMD_PG_STATE_UNGATE); diff --git a/drivers/gpu/drm/amd/amdgpu/vega10_sdma_pkt_open.h b/drivers/gpu/drm/amd/amdgpu/vega10_sdma_pkt_open.h index 8de4ccce5e38c9a594ccf572dec52f4082b03d2d..3ca8a417c6d828cff60089488ed41993c8313f57 100644 --- a/drivers/gpu/drm/amd/amdgpu/vega10_sdma_pkt_open.h +++ b/drivers/gpu/drm/amd/amdgpu/vega10_sdma_pkt_open.h @@ -64,6 +64,9 @@ #define HEADER_BARRIER 5 #define SDMA_OP_AQL_COPY 0 #define SDMA_OP_AQL_BARRIER_OR 0 +/* vm invalidation is only available for GC9.4.3/GC9.4.4/GC9.5.0 */ +#define SDMA_OP_VM_INVALIDATE 8 +#define SDMA_SUBOP_VM_INVALIDATE 4 /*define for op field*/ #define SDMA_PKT_HEADER_op_offset 0 @@ -3331,5 +3334,72 @@ #define SDMA_AQL_PKT_BARRIER_OR_COMPLETION_SIGNAL_HI_completion_signal_63_32_shift 0 #define SDMA_AQL_PKT_BARRIER_OR_COMPLETION_SIGNAL_HI_COMPLETION_SIGNAL_63_32(x) (((x) & SDMA_AQL_PKT_BARRIER_OR_COMPLETION_SIGNAL_HI_completion_signal_63_32_mask) << SDMA_AQL_PKT_BARRIER_OR_COMPLETION_SIGNAL_HI_completion_signal_63_32_shift) +/* +** Definitions for SDMA_PKT_VM_INVALIDATION packet +*/ + +/*define for HEADER word*/ +/*define for op field*/ +#define SDMA_PKT_VM_INVALIDATION_HEADER_op_offset 0 +#define SDMA_PKT_VM_INVALIDATION_HEADER_op_mask 0x000000FF +#define SDMA_PKT_VM_INVALIDATION_HEADER_op_shift 0 +#define SDMA_PKT_VM_INVALIDATION_HEADER_OP(x) ((x & SDMA_PKT_VM_INVALIDATION_HEADER_op_mask) << SDMA_PKT_VM_INVALIDATION_HEADER_op_shift) + +/*define for sub_op field*/ +#define SDMA_PKT_VM_INVALIDATION_HEADER_sub_op_offset 0 +#define SDMA_PKT_VM_INVALIDATION_HEADER_sub_op_mask 0x000000FF +#define SDMA_PKT_VM_INVALIDATION_HEADER_sub_op_shift 8 +#define SDMA_PKT_VM_INVALIDATION_HEADER_SUB_OP(x) ((x & SDMA_PKT_VM_INVALIDATION_HEADER_sub_op_mask) << SDMA_PKT_VM_INVALIDATION_HEADER_sub_op_shift) + +/*define for xcc0_eng_id field*/ +#define SDMA_PKT_VM_INVALIDATION_HEADER_xcc0_eng_id_offset 0 +#define SDMA_PKT_VM_INVALIDATION_HEADER_xcc0_eng_id_mask 0x0000001F +#define SDMA_PKT_VM_INVALIDATION_HEADER_xcc0_eng_id_shift 16 +#define SDMA_PKT_VM_INVALIDATION_HEADER_XCC0_ENG_ID(x) ((x & SDMA_PKT_VM_INVALIDATION_HEADER_xcc0_eng_id_mask) << SDMA_PKT_VM_INVALIDATION_HEADER_xcc0_eng_id_shift) + +/*define for xcc1_eng_id field*/ +#define SDMA_PKT_VM_INVALIDATION_HEADER_xcc1_eng_id_offset 0 +#define SDMA_PKT_VM_INVALIDATION_HEADER_xcc1_eng_id_mask 0x0000001F +#define SDMA_PKT_VM_INVALIDATION_HEADER_xcc1_eng_id_shift 21 +#define SDMA_PKT_VM_INVALIDATION_HEADER_XCC1_ENG_ID(x) ((x & SDMA_PKT_VM_INVALIDATION_HEADER_xcc1_eng_id_mask) << SDMA_PKT_VM_INVALIDATION_HEADER_xcc1_eng_id_shift) + +/*define for mmhub_eng_id field*/ +#define SDMA_PKT_VM_INVALIDATION_HEADER_mmhub_eng_id_offset 0 +#define SDMA_PKT_VM_INVALIDATION_HEADER_mmhub_eng_id_mask 0x0000001F +#define SDMA_PKT_VM_INVALIDATION_HEADER_mmhub_eng_id_shift 26 +#define SDMA_PKT_VM_INVALIDATION_HEADER_MMHUB_ENG_ID(x) ((x & SDMA_PKT_VM_INVALIDATION_HEADER_mmhub_eng_id_mask) << SDMA_PKT_VM_INVALIDATION_HEADER_mmhub_eng_id_shift) + +/*define for INVALIDATEREQ word*/ +/*define for invalidatereq field*/ +#define SDMA_PKT_VM_INVALIDATION_INVALIDATEREQ_invalidatereq_offset 1 +#define SDMA_PKT_VM_INVALIDATION_INVALIDATEREQ_invalidatereq_mask 0xFFFFFFFF +#define SDMA_PKT_VM_INVALIDATION_INVALIDATEREQ_invalidatereq_shift 0 +#define SDMA_PKT_VM_INVALIDATION_INVALIDATEREQ_INVALIDATEREQ(x) ((x & SDMA_PKT_VM_INVALIDATION_INVALIDATEREQ_invalidatereq_mask) << SDMA_PKT_VM_INVALIDATION_INVALIDATEREQ_invalidatereq_shift) + +/*define for ADDRESSRANGELO word*/ +/*define for addressrangelo field*/ +#define SDMA_PKT_VM_INVALIDATION_ADDRESSRANGELO_addressrangelo_offset 2 +#define SDMA_PKT_VM_INVALIDATION_ADDRESSRANGELO_addressrangelo_mask 0xFFFFFFFF +#define SDMA_PKT_VM_INVALIDATION_ADDRESSRANGELO_addressrangelo_shift 0 +#define SDMA_PKT_VM_INVALIDATION_ADDRESSRANGELO_ADDRESSRANGELO(x) ((x & SDMA_PKT_VM_INVALIDATION_ADDRESSRANGELO_addressrangelo_mask) << SDMA_PKT_VM_INVALIDATION_ADDRESSRANGELO_addressrangelo_shift) + +/*define for ADDRESSRANGEHI word*/ +/*define for invalidateack field*/ +#define SDMA_PKT_VM_INVALIDATION_ADDRESSRANGEHI_invalidateack_offset 3 +#define SDMA_PKT_VM_INVALIDATION_ADDRESSRANGEHI_invalidateack_mask 0x0000FFFF +#define SDMA_PKT_VM_INVALIDATION_ADDRESSRANGEHI_invalidateack_shift 0 +#define SDMA_PKT_VM_INVALIDATION_ADDRESSRANGEHI_INVALIDATEACK(x) ((x & SDMA_PKT_VM_INVALIDATION_ADDRESSRANGEHI_invalidateack_mask) << SDMA_PKT_VM_INVALIDATION_ADDRESSRANGEHI_invalidateack_shift) + +/*define for addressrangehi field*/ +#define SDMA_PKT_VM_INVALIDATION_ADDRESSRANGEHI_addressrangehi_offset 3 +#define SDMA_PKT_VM_INVALIDATION_ADDRESSRANGEHI_addressrangehi_mask 0x0000001F +#define SDMA_PKT_VM_INVALIDATION_ADDRESSRANGEHI_addressrangehi_shift 16 +#define SDMA_PKT_VM_INVALIDATION_ADDRESSRANGEHI_ADDRESSRANGEHI(x) ((x & SDMA_PKT_VM_INVALIDATION_ADDRESSRANGEHI_addressrangehi_mask) << SDMA_PKT_VM_INVALIDATION_ADDRESSRANGEHI_addressrangehi_shift) + +/*define for reserved field*/ +#define SDMA_PKT_VM_INVALIDATION_ADDRESSRANGEHI_reserved_offset 3 +#define SDMA_PKT_VM_INVALIDATION_ADDRESSRANGEHI_reserved_mask 0x000001FF +#define SDMA_PKT_VM_INVALIDATION_ADDRESSRANGEHI_reserved_shift 23 +#define SDMA_PKT_VM_INVALIDATION_ADDRESSRANGEHI_RESERVED(x) ((x & SDMA_PKT_VM_INVALIDATION_ADDRESSRANGEHI_reserved_mask) << SDMA_PKT_VM_INVALIDATION_ADDRESSRANGEHI_reserved_shift) #endif /* __SDMA_PKT_OPEN_H_ */ diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c b/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c index 2ed003d3ff0e01b08b0c4e58772bd233dbe30716..c610e172a2b83725d9742fb9649a084853bb8d4b 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c +++ b/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c @@ -2310,7 +2310,7 @@ static int reset_hung_queues_sdma(struct device_queue_manager *dqm) continue; /* Reset engine and check. */ - if (amdgpu_sdma_reset_engine(dqm->dev->adev, i, false) || + if (amdgpu_sdma_reset_engine(dqm->dev->adev, i) || dqm->dev->kfd2kgd->hqd_sdma_get_doorbell(dqm->dev->adev, i, j) || !set_sdma_queue_as_reset(dqm, doorbell_off)) { r = -ENOTRECOVERABLE; diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_packet_manager.c b/drivers/gpu/drm/amd/amdkfd/kfd_packet_manager.c index 3f574d82b5fca02487384c176f9b08de3b25a10e..271c567242ab29082075ffa853d4630e7cbe2ea3 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_packet_manager.c +++ b/drivers/gpu/drm/amd/amdkfd/kfd_packet_manager.c @@ -418,6 +418,10 @@ int pm_config_dequeue_wait_counts(struct packet_manager *pm, !pm->pmf->config_dequeue_wait_counts_size) return 0; + if (cmd == KFD_DEQUEUE_WAIT_INIT && (KFD_GC_VERSION(pm->dqm->dev) < IP_VERSION(9, 4, 1) || + KFD_GC_VERSION(pm->dqm->dev) >= IP_VERSION(10, 0, 0))) + return 0; + size = pm->pmf->config_dequeue_wait_counts_size; mutex_lock(&pm->lock); @@ -436,16 +440,16 @@ int pm_config_dequeue_wait_counts(struct packet_manager *pm, retval = pm->pmf->config_dequeue_wait_counts(pm, buffer, cmd, value); - if (!retval) + if (!retval) { retval = kq_submit_packet(pm->priv_queue); - else + + /* If default value is modified, cache that in dqm->wait_times */ + if (!retval && cmd == KFD_DEQUEUE_WAIT_INIT) + update_dqm_wait_times(pm->dqm); + } else { kq_rollback_packet(pm->priv_queue); + } } - - /* If default value is modified, cache that value in dqm->wait_times */ - if (!retval && cmd == KFD_DEQUEUE_WAIT_INIT) - update_dqm_wait_times(pm->dqm); - out: mutex_unlock(&pm->lock); return retval; diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_packet_manager_v9.c b/drivers/gpu/drm/amd/amdkfd/kfd_packet_manager_v9.c index d440df602393403524a0dafc54dfda992b9e4115..2893fd5e5d0038805da7f6a625124501369ae7bb 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_packet_manager_v9.c +++ b/drivers/gpu/drm/amd/amdkfd/kfd_packet_manager_v9.c @@ -310,6 +310,13 @@ static inline void pm_build_dequeue_wait_counts_packet_info(struct packet_manage reg_data); } +/* pm_config_dequeue_wait_counts_v9: Builds WRITE_DATA packet with + * register/value for configuring dequeue wait counts + * + * @return: -ve for failure and 0 for success and buffer is + * filled in with packet + * + **/ static int pm_config_dequeue_wait_counts_v9(struct packet_manager *pm, uint32_t *buffer, enum kfd_config_dequeue_wait_counts_cmd cmd, @@ -321,24 +328,25 @@ static int pm_config_dequeue_wait_counts_v9(struct packet_manager *pm, switch (cmd) { case KFD_DEQUEUE_WAIT_INIT: { - uint32_t sch_wave = 0, que_sleep = 0; - /* Reduce CP_IQ_WAIT_TIME2.QUE_SLEEP to 0x1 from default 0x40. + uint32_t sch_wave = 0, que_sleep = 1; + + /* For all gfx9 ASICs > gfx941, + * Reduce CP_IQ_WAIT_TIME2.QUE_SLEEP to 0x1 from default 0x40. * On a 1GHz machine this is roughly 1 microsecond, which is * about how long it takes to load data out of memory during * queue connect * QUE_SLEEP: Wait Count for Dequeue Retry. + * + * Set CWSR grace period to 1x1000 cycle for GFX9.4.3 APU */ - if (KFD_GC_VERSION(pm->dqm->dev) >= IP_VERSION(9, 4, 1) && - KFD_GC_VERSION(pm->dqm->dev) < IP_VERSION(10, 0, 0)) { - que_sleep = 1; - - /* Set CWSR grace period to 1x1000 cycle for GFX9.4.3 APU */ - if (amdgpu_emu_mode == 0 && pm->dqm->dev->adev->gmc.is_app_apu && - (KFD_GC_VERSION(pm->dqm->dev) == IP_VERSION(9, 4, 3))) - sch_wave = 1; - } else { - return 0; - } + if (KFD_GC_VERSION(pm->dqm->dev) < IP_VERSION(9, 4, 1) || + KFD_GC_VERSION(pm->dqm->dev) >= IP_VERSION(10, 0, 0)) + return -EPERM; + + if (amdgpu_emu_mode == 0 && pm->dqm->dev->adev->gmc.is_app_apu && + (KFD_GC_VERSION(pm->dqm->dev) == IP_VERSION(9, 4, 3))) + sch_wave = 1; + pm_build_dequeue_wait_counts_packet_info(pm, sch_wave, que_sleep, ®_offset, ®_data); diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_topology.c b/drivers/gpu/drm/amd/amdkfd/kfd_topology.c index 27e7356eed6f5526e876fb11e4ca5eada0681ccf..e477d7509646aa593ecf68bc2e10db2659eebbc2 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_topology.c +++ b/drivers/gpu/drm/amd/amdkfd/kfd_topology.c @@ -2006,10 +2006,6 @@ static void kfd_topology_set_capabilities(struct kfd_topology_device *dev) dev->node_props.debug_prop |= HSA_DBG_WATCH_ADDR_MASK_LO_BIT_GFX10 | HSA_DBG_WATCH_ADDR_MASK_HI_BIT; - if (KFD_GC_VERSION(dev->gpu) >= IP_VERSION(11, 0, 0)) - dev->node_props.capability |= - HSA_CAP_TRAP_DEBUG_PRECISE_MEMORY_OPERATIONS_SUPPORTED; - if (KFD_GC_VERSION(dev->gpu) >= IP_VERSION(12, 0, 0)) dev->node_props.capability |= HSA_CAP_TRAP_DEBUG_PRECISE_ALU_OPERATIONS_SUPPORTED; diff --git a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c index 4f1cf8afe25f15651b28c741b240b9fde06f6f7b..bae83a129b5f2741c87fc0b0259069f759edd81d 100644 --- a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c +++ b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c @@ -1752,7 +1752,7 @@ static void retrieve_dmi_info(struct amdgpu_display_manager *dm, struct dc_init_ } if (quirk_entries.support_edp0_on_dp1) { init_data->flags.support_edp0_on_dp1 = true; - drm_info(dev, "aux_hpd_discon_quirk attached\n"); + drm_info(dev, "support_edp0_on_dp1 attached\n"); } } diff --git a/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn315/dcn315_clk_mgr.c b/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn315/dcn315_clk_mgr.c index 19a15acd15095771ac0ad45a480621fdaa793ad0..e4d22f74f9869166b61af7f57af8400608425774 100644 --- a/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn315/dcn315_clk_mgr.c +++ b/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn315/dcn315_clk_mgr.c @@ -130,7 +130,7 @@ static void dcn315_update_clocks(struct clk_mgr *clk_mgr_base, struct clk_mgr_internal *clk_mgr = TO_CLK_MGR_INTERNAL(clk_mgr_base); struct dc_clocks *new_clocks = &context->bw_ctx.bw.dcn.clk; struct dc *dc = clk_mgr_base->ctx->dc; - int display_count; + int display_count = 0; bool update_dppclk = false; bool update_dispclk = false; bool dpp_clock_lowered = false; @@ -202,15 +202,19 @@ static void dcn315_update_clocks(struct clk_mgr *clk_mgr_base, update_dppclk = true; } - if (should_set_clock(safe_to_lower, new_clocks->dispclk_khz, clk_mgr_base->clks.dispclk_khz)) { - /* No need to apply the w/a if we haven't taken over from bios yet */ - if (clk_mgr_base->clks.dispclk_khz) - dcn315_disable_otg_wa(clk_mgr_base, context, true); + if (should_set_clock(safe_to_lower, new_clocks->dispclk_khz, clk_mgr_base->clks.dispclk_khz) && + (new_clocks->dispclk_khz > 0 || (safe_to_lower && display_count == 0))) { + int requested_dispclk_khz = new_clocks->dispclk_khz; + dcn315_disable_otg_wa(clk_mgr_base, context, true); + + /* Clamp the requested clock to PMFW based on their limit. */ + if (dc->debug.min_disp_clk_khz > 0 && requested_dispclk_khz < dc->debug.min_disp_clk_khz) + requested_dispclk_khz = dc->debug.min_disp_clk_khz; + + dcn315_smu_set_dispclk(clk_mgr, requested_dispclk_khz); clk_mgr_base->clks.dispclk_khz = new_clocks->dispclk_khz; - dcn315_smu_set_dispclk(clk_mgr, clk_mgr_base->clks.dispclk_khz); - if (clk_mgr_base->clks.dispclk_khz) - dcn315_disable_otg_wa(clk_mgr_base, context, false); + dcn315_disable_otg_wa(clk_mgr_base, context, false); update_dispclk = true; } diff --git a/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn316/dcn316_clk_mgr.c b/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn316/dcn316_clk_mgr.c index 4b19d9cf27cee250894539628b35eb21b348454b..49efea0c8fcffa09b698e1bf28589e6c8de677c8 100644 --- a/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn316/dcn316_clk_mgr.c +++ b/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn316/dcn316_clk_mgr.c @@ -140,7 +140,7 @@ static void dcn316_update_clocks(struct clk_mgr *clk_mgr_base, struct clk_mgr_internal *clk_mgr = TO_CLK_MGR_INTERNAL(clk_mgr_base); struct dc_clocks *new_clocks = &context->bw_ctx.bw.dcn.clk; struct dc *dc = clk_mgr_base->ctx->dc; - int display_count; + int display_count = 0; bool update_dppclk = false; bool update_dispclk = false; bool dpp_clock_lowered = false; @@ -209,11 +209,18 @@ static void dcn316_update_clocks(struct clk_mgr *clk_mgr_base, update_dppclk = true; } - if (should_set_clock(safe_to_lower, new_clocks->dispclk_khz, clk_mgr_base->clks.dispclk_khz)) { + if (should_set_clock(safe_to_lower, new_clocks->dispclk_khz, clk_mgr_base->clks.dispclk_khz) && + (new_clocks->dispclk_khz > 0 || (safe_to_lower && display_count == 0))) { + int requested_dispclk_khz = new_clocks->dispclk_khz; + dcn316_disable_otg_wa(clk_mgr_base, context, safe_to_lower, true); + /* Clamp the requested clock to PMFW based on their limit. */ + if (dc->debug.min_disp_clk_khz > 0 && requested_dispclk_khz < dc->debug.min_disp_clk_khz) + requested_dispclk_khz = dc->debug.min_disp_clk_khz; + + dcn316_smu_set_dispclk(clk_mgr, requested_dispclk_khz); clk_mgr_base->clks.dispclk_khz = new_clocks->dispclk_khz; - dcn316_smu_set_dispclk(clk_mgr, clk_mgr_base->clks.dispclk_khz); dcn316_disable_otg_wa(clk_mgr_base, context, safe_to_lower, false); update_dispclk = true; diff --git a/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn35/dcn35_clk_mgr.c b/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn35/dcn35_clk_mgr.c index af722519a1fa5309fec6491cae46eca9626cb7a1..142de8938d7c3d7f99a46dcf9f43917432a55ad3 100644 --- a/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn35/dcn35_clk_mgr.c +++ b/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn35/dcn35_clk_mgr.c @@ -204,6 +204,8 @@ static void dcn35_disable_otg_wa(struct clk_mgr *clk_mgr_base, struct dc_state * struct link_encoder *new_pipe_link_enc = new_pipe->link_res.dio_link_enc; struct link_encoder *pipe_link_enc = pipe->link_res.dio_link_enc; bool stream_changed_otg_dig_on = false; + bool has_active_hpo = false; + if (pipe->top_pipe || pipe->prev_odm_pipe) continue; @@ -225,20 +227,15 @@ static void dcn35_disable_otg_wa(struct clk_mgr *clk_mgr_base, struct dc_state * new_pipe->stream_res.stream_enc->funcs->is_fifo_enabled && new_pipe->stream_res.stream_enc->funcs->is_fifo_enabled(new_pipe->stream_res.stream_enc); - bool has_active_hpo = false; - if (old_pipe->stream && new_pipe->stream && old_pipe->stream == new_pipe->stream) { has_active_hpo = dccg->ctx->dc->link_srv->dp_is_128b_132b_signal(old_pipe) && dccg->ctx->dc->link_srv->dp_is_128b_132b_signal(new_pipe); - } - - - if (!has_active_hpo && !dccg->ctx->dc->link_srv->dp_is_128b_132b_signal(pipe) && - (pipe->stream && (pipe->stream->dpms_off || dc_is_virtual_signal(pipe->stream->signal) || - !pipe_link_enc) && !stream_changed_otg_dig_on)) { - + } + if (!has_active_hpo && !stream_changed_otg_dig_on && pipe->stream && + (pipe->stream->dpms_off || dc_is_virtual_signal(pipe->stream->signal) || !pipe_link_enc) && + !dccg->ctx->dc->link_srv->dp_is_128b_132b_signal(pipe)) { /* This w/a should not trigger when we have a dig active */ if (disable) { if (pipe->stream_res.tg && pipe->stream_res.tg->funcs->immediate_disable_crtc) diff --git a/drivers/gpu/drm/amd/display/dc/core/dc_link_exports.c b/drivers/gpu/drm/amd/display/dc/core/dc_link_exports.c index 261c3bc4d46e16307bab471802d9b70306fd4dbe..71e15da4bb69af110d9f90c70457cb875f0ced7a 100644 --- a/drivers/gpu/drm/amd/display/dc/core/dc_link_exports.c +++ b/drivers/gpu/drm/amd/display/dc/core/dc_link_exports.c @@ -370,15 +370,10 @@ bool dc_link_should_enable_fec(const struct dc_link *link) return link->dc->link_srv->dp_should_enable_fec(link); } -int dc_link_dp_dpia_handle_usb4_bandwidth_allocation_for_link( +void dc_link_dp_dpia_handle_usb4_bandwidth_allocation_for_link( struct dc_link *link, int peak_bw) { - return link->dc->link_srv->dpia_handle_usb4_bandwidth_allocation_for_link(link, peak_bw); -} - -void dc_link_handle_usb4_bw_alloc_response(struct dc_link *link, uint8_t bw, uint8_t result) -{ - link->dc->link_srv->dpia_handle_bw_alloc_response(link, bw, result); + link->dc->link_srv->dpia_handle_usb4_bandwidth_allocation_for_link(link, peak_bw); } bool dc_link_check_link_loss_status( diff --git a/drivers/gpu/drm/amd/display/dc/dc.h b/drivers/gpu/drm/amd/display/dc/dc.h index 67e1bb6fa3351de555a88ab08a07ef3f313fd3e6..7c2ee052692604c3235af4abd721f5bbe55e6683 100644 --- a/drivers/gpu/drm/amd/display/dc/dc.h +++ b/drivers/gpu/drm/amd/display/dc/dc.h @@ -53,7 +53,7 @@ struct aux_payload; struct set_config_cmd_payload; struct dmub_notification; -#define DC_VER "3.2.324" +#define DC_VER "3.2.325" /** * MAX_SURFACES - representative of the upper bound of surfaces that can be piped to a single CRTC @@ -2353,19 +2353,6 @@ unsigned int dc_dp_trace_get_link_loss_count(struct dc_link *link); */ void dc_link_set_usb4_req_bw_req(struct dc_link *link, int req_bw); -/* - * Handle function for when the status of the Request above is complete. - * We will find out the result of allocating on CM and update structs. - * - * @link: pointer to the dc_link struct instance - * @bw: Allocated or Estimated BW depending on the result - * @result: Response type - * - * return: none - */ -void dc_link_handle_usb4_bw_alloc_response(struct dc_link *link, - uint8_t bw, uint8_t result); - /* * Handle the USB4 BW Allocation related functionality here: * Plug => Try to allocate max bw from timing parameters supported by the sink @@ -2374,9 +2361,8 @@ void dc_link_handle_usb4_bw_alloc_response(struct dc_link *link, * @link: pointer to the dc_link struct instance * @peak_bw: Peak bw used by the link/sink * - * return: allocated bw else return 0 */ -int dc_link_dp_dpia_handle_usb4_bandwidth_allocation_for_link( +void dc_link_dp_dpia_handle_usb4_bandwidth_allocation_for_link( struct dc_link *link, int peak_bw); /* diff --git a/drivers/gpu/drm/amd/display/dc/dc_dmub_srv.c b/drivers/gpu/drm/amd/display/dc/dc_dmub_srv.c index ca6da53f45adb88ef7affdaee23523b0219352a4..614e03bfd598460fb78c138028befecb5cd54b74 100644 --- a/drivers/gpu/drm/amd/display/dc/dc_dmub_srv.c +++ b/drivers/gpu/drm/amd/display/dc/dc_dmub_srv.c @@ -70,28 +70,20 @@ void dc_dmub_srv_destroy(struct dc_dmub_srv **dmub_srv) } } -bool dc_dmub_srv_wait_for_pending(struct dc_dmub_srv *dc_dmub_srv) +void dc_dmub_srv_wait_idle(struct dc_dmub_srv *dc_dmub_srv) { - struct dmub_srv *dmub; - struct dc_context *dc_ctx; + struct dmub_srv *dmub = dc_dmub_srv->dmub; + struct dc_context *dc_ctx = dc_dmub_srv->ctx; enum dmub_status status; - if (!dc_dmub_srv || !dc_dmub_srv->dmub) - return false; - - dc_ctx = dc_dmub_srv->ctx; - dmub = dc_dmub_srv->dmub; - do { - status = dmub_srv_wait_for_pending(dmub, 100000); + status = dmub_srv_wait_for_idle(dmub, 100000); } while (dc_dmub_srv->ctx->dc->debug.disable_timeout && status != DMUB_STATUS_OK); if (status != DMUB_STATUS_OK) { DC_ERROR("Error waiting for DMUB idle: status=%d\n", status); dc_dmub_srv_log_diagnostic_data(dc_dmub_srv); } - - return status == DMUB_STATUS_OK; } void dc_dmub_srv_clear_inbox0_ack(struct dc_dmub_srv *dc_dmub_srv) @@ -134,49 +126,7 @@ void dc_dmub_srv_send_inbox0_cmd(struct dc_dmub_srv *dc_dmub_srv, } } -static bool dc_dmub_srv_reg_cmd_list_queue_execute(struct dc_dmub_srv *dc_dmub_srv, - unsigned int count, - union dmub_rb_cmd *cmd_list) -{ - struct dc_context *dc_ctx; - struct dmub_srv *dmub; - enum dmub_status status = DMUB_STATUS_OK; - int i; - - if (!dc_dmub_srv || !dc_dmub_srv->dmub) - return false; - - dc_ctx = dc_dmub_srv->ctx; - dmub = dc_dmub_srv->dmub; - - for (i = 0 ; i < count; i++) { - /* confirm no messages pending */ - do { - status = dmub_srv_wait_for_idle(dmub, 100000); - } while (dc_dmub_srv->ctx->dc->debug.disable_timeout && status != DMUB_STATUS_OK); - - /* queue command */ - if (status == DMUB_STATUS_OK) - status = dmub_srv_reg_cmd_execute(dmub, &cmd_list[i]); - - /* check for errors */ - if (status != DMUB_STATUS_OK) { - break; - } - } - - if (status != DMUB_STATUS_OK) { - if (status != DMUB_STATUS_POWER_STATE_D3) { - DC_ERROR("Error starting DMUB execution: status=%d\n", status); - dc_dmub_srv_log_diagnostic_data(dc_dmub_srv); - } - return false; - } - - return true; -} - -static bool dc_dmub_srv_fb_cmd_list_queue_execute(struct dc_dmub_srv *dc_dmub_srv, +bool dc_dmub_srv_cmd_list_queue_execute(struct dc_dmub_srv *dc_dmub_srv, unsigned int count, union dmub_rb_cmd *cmd_list) { @@ -193,16 +143,11 @@ static bool dc_dmub_srv_fb_cmd_list_queue_execute(struct dc_dmub_srv *dc_dmub_sr for (i = 0 ; i < count; i++) { // Queue command - if (!cmd_list[i].cmd_common.header.multi_cmd_pending || - dmub_rb_num_free(&dmub->inbox1.rb) >= count - i) { - status = dmub_srv_fb_cmd_queue(dmub, &cmd_list[i]); - } else { - status = DMUB_STATUS_QUEUE_FULL; - } + status = dmub_srv_cmd_queue(dmub, &cmd_list[i]); if (status == DMUB_STATUS_QUEUE_FULL) { /* Execute and wait for queue to become empty again. */ - status = dmub_srv_fb_cmd_execute(dmub); + status = dmub_srv_cmd_execute(dmub); if (status == DMUB_STATUS_POWER_STATE_D3) return false; @@ -211,7 +156,7 @@ static bool dc_dmub_srv_fb_cmd_list_queue_execute(struct dc_dmub_srv *dc_dmub_sr } while (dc_dmub_srv->ctx->dc->debug.disable_timeout && status != DMUB_STATUS_OK); /* Requeue the command. */ - status = dmub_srv_fb_cmd_queue(dmub, &cmd_list[i]); + status = dmub_srv_cmd_queue(dmub, &cmd_list[i]); } if (status != DMUB_STATUS_OK) { @@ -223,7 +168,7 @@ static bool dc_dmub_srv_fb_cmd_list_queue_execute(struct dc_dmub_srv *dc_dmub_sr } } - status = dmub_srv_fb_cmd_execute(dmub); + status = dmub_srv_cmd_execute(dmub); if (status != DMUB_STATUS_OK) { if (status != DMUB_STATUS_POWER_STATE_D3) { DC_ERROR("Error starting DMUB execution: status=%d\n", status); @@ -235,23 +180,6 @@ static bool dc_dmub_srv_fb_cmd_list_queue_execute(struct dc_dmub_srv *dc_dmub_sr return true; } -bool dc_dmub_srv_cmd_list_queue_execute(struct dc_dmub_srv *dc_dmub_srv, - unsigned int count, - union dmub_rb_cmd *cmd_list) -{ - bool res = false; - - if (dc_dmub_srv && dc_dmub_srv->dmub) { - if (dc_dmub_srv->dmub->inbox_type == DMUB_CMD_INTERFACE_REG) { - res = dc_dmub_srv_reg_cmd_list_queue_execute(dc_dmub_srv, count, cmd_list); - } else { - res = dc_dmub_srv_fb_cmd_list_queue_execute(dc_dmub_srv, count, cmd_list); - } - } - - return res; -} - bool dc_dmub_srv_wait_for_idle(struct dc_dmub_srv *dc_dmub_srv, enum dm_dmub_wait_type wait_type, union dmub_rb_cmd *cmd_list) @@ -274,8 +202,7 @@ bool dc_dmub_srv_wait_for_idle(struct dc_dmub_srv *dc_dmub_srv, DC_LOG_DEBUG("No reply for DMUB command: status=%d\n", status); if (!dmub->debug.timeout_info.timeout_occured) { dmub->debug.timeout_info.timeout_occured = true; - if (cmd_list) - dmub->debug.timeout_info.timeout_cmd = *cmd_list; + dmub->debug.timeout_info.timeout_cmd = *cmd_list; dmub->debug.timeout_info.timestamp = dm_get_timestamp(dc_dmub_srv->ctx); } dc_dmub_srv_log_diagnostic_data(dc_dmub_srv); @@ -283,9 +210,8 @@ bool dc_dmub_srv_wait_for_idle(struct dc_dmub_srv *dc_dmub_srv, } // Copy data back from ring buffer into command - if (wait_type == DM_DMUB_WAIT_TYPE_WAIT_WITH_REPLY && cmd_list) { - dmub_srv_cmd_get_response(dc_dmub_srv->dmub, cmd_list); - } + if (wait_type == DM_DMUB_WAIT_TYPE_WAIT_WITH_REPLY) + dmub_rb_get_return_data(&dmub->inbox1_rb, cmd_list); } return true; @@ -298,10 +224,74 @@ bool dc_dmub_srv_cmd_run(struct dc_dmub_srv *dc_dmub_srv, union dmub_rb_cmd *cmd bool dc_dmub_srv_cmd_run_list(struct dc_dmub_srv *dc_dmub_srv, unsigned int count, union dmub_rb_cmd *cmd_list, enum dm_dmub_wait_type wait_type) { - if (!dc_dmub_srv_cmd_list_queue_execute(dc_dmub_srv, count, cmd_list)) + struct dc_context *dc_ctx; + struct dmub_srv *dmub; + enum dmub_status status; + int i; + + if (!dc_dmub_srv || !dc_dmub_srv->dmub) + return false; + + dc_ctx = dc_dmub_srv->ctx; + dmub = dc_dmub_srv->dmub; + + for (i = 0 ; i < count; i++) { + // Queue command + status = dmub_srv_cmd_queue(dmub, &cmd_list[i]); + + if (status == DMUB_STATUS_QUEUE_FULL) { + /* Execute and wait for queue to become empty again. */ + status = dmub_srv_cmd_execute(dmub); + if (status == DMUB_STATUS_POWER_STATE_D3) + return false; + + status = dmub_srv_wait_for_idle(dmub, 100000); + if (status != DMUB_STATUS_OK) + return false; + + /* Requeue the command. */ + status = dmub_srv_cmd_queue(dmub, &cmd_list[i]); + } + + if (status != DMUB_STATUS_OK) { + if (status != DMUB_STATUS_POWER_STATE_D3) { + DC_ERROR("Error queueing DMUB command: status=%d\n", status); + dc_dmub_srv_log_diagnostic_data(dc_dmub_srv); + } + return false; + } + } + + status = dmub_srv_cmd_execute(dmub); + if (status != DMUB_STATUS_OK) { + if (status != DMUB_STATUS_POWER_STATE_D3) { + DC_ERROR("Error starting DMUB execution: status=%d\n", status); + dc_dmub_srv_log_diagnostic_data(dc_dmub_srv); + } return false; + } + + // Wait for DMUB to process command + if (wait_type != DM_DMUB_WAIT_TYPE_NO_WAIT) { + if (dc_dmub_srv->ctx->dc->debug.disable_timeout) { + do { + status = dmub_srv_wait_for_idle(dmub, 100000); + } while (status != DMUB_STATUS_OK); + } else + status = dmub_srv_wait_for_idle(dmub, 100000); - return dc_dmub_srv_wait_for_idle(dc_dmub_srv, wait_type, cmd_list); + if (status != DMUB_STATUS_OK) { + DC_LOG_DEBUG("No reply for DMUB command: status=%d\n", status); + dc_dmub_srv_log_diagnostic_data(dc_dmub_srv); + return false; + } + + // Copy data back from ring buffer into command + if (wait_type == DM_DMUB_WAIT_TYPE_WAIT_WITH_REPLY) + dmub_rb_get_return_data(&dmub->inbox1_rb, cmd_list); + } + + return true; } bool dc_dmub_srv_optimized_init_done(struct dc_dmub_srv *dc_dmub_srv) @@ -1253,7 +1243,7 @@ static void dc_dmub_srv_notify_idle(const struct dc *dc, bool allow_idle) ips_fw->signals.bits.ips1_commit, ips_fw->signals.bits.ips2_commit); - dc_dmub_srv_wait_for_idle(dc->ctx->dmub_srv, DM_DMUB_WAIT_TYPE_WAIT, NULL); + dc_dmub_srv_wait_idle(dc->ctx->dmub_srv); memset(&new_signals, 0, sizeof(new_signals)); @@ -1410,7 +1400,7 @@ static void dc_dmub_srv_exit_low_power_state(const struct dc *dc) ips_fw->signals.bits.ips1_commit, ips_fw->signals.bits.ips2_commit); - dmub_srv_sync_inboxes(dc->ctx->dmub_srv->dmub); + dmub_srv_sync_inbox1(dc->ctx->dmub_srv->dmub); } } @@ -1664,8 +1654,7 @@ void dc_dmub_srv_fams2_update_config(struct dc *dc, /* fill in generic command header */ global_cmd->header.type = DMUB_CMD__FW_ASSISTED_MCLK_SWITCH; global_cmd->header.sub_type = DMUB_CMD__FAMS2_CONFIG; - global_cmd->header.payload_bytes = - sizeof(struct dmub_rb_cmd_fams2) - sizeof(struct dmub_cmd_header); + global_cmd->header.payload_bytes = sizeof(struct dmub_rb_cmd_fams2) - sizeof(struct dmub_cmd_header); if (enable) { /* send global configuration parameters */ @@ -1684,13 +1673,11 @@ void dc_dmub_srv_fams2_update_config(struct dc *dc, /* configure command header */ stream_base_cmd->header.type = DMUB_CMD__FW_ASSISTED_MCLK_SWITCH; stream_base_cmd->header.sub_type = DMUB_CMD__FAMS2_CONFIG; - stream_base_cmd->header.payload_bytes = - sizeof(struct dmub_rb_cmd_fams2) - sizeof(struct dmub_cmd_header); + stream_base_cmd->header.payload_bytes = sizeof(struct dmub_rb_cmd_fams2) - sizeof(struct dmub_cmd_header); stream_base_cmd->header.multi_cmd_pending = 1; stream_sub_state_cmd->header.type = DMUB_CMD__FW_ASSISTED_MCLK_SWITCH; stream_sub_state_cmd->header.sub_type = DMUB_CMD__FAMS2_CONFIG; - stream_sub_state_cmd->header.payload_bytes = - sizeof(struct dmub_rb_cmd_fams2) - sizeof(struct dmub_cmd_header); + stream_sub_state_cmd->header.payload_bytes = sizeof(struct dmub_rb_cmd_fams2) - sizeof(struct dmub_cmd_header); stream_sub_state_cmd->header.multi_cmd_pending = 1; /* copy stream static base state */ memcpy(&stream_base_cmd->config, @@ -1736,8 +1723,7 @@ void dc_dmub_srv_fams2_drr_update(struct dc *dc, cmd.fams2_drr_update.dmub_optc_state_req.v_total_mid_frame_num = vtotal_mid_frame_num; cmd.fams2_drr_update.dmub_optc_state_req.program_manual_trigger = program_manual_trigger; - cmd.fams2_drr_update.header.payload_bytes = - sizeof(cmd.fams2_drr_update) - sizeof(cmd.fams2_drr_update.header); + cmd.fams2_drr_update.header.payload_bytes = sizeof(cmd.fams2_drr_update) - sizeof(cmd.fams2_drr_update.header); dm_execute_dmub_cmd(dc->ctx, &cmd, DM_DMUB_WAIT_TYPE_WAIT); } @@ -1773,8 +1759,7 @@ void dc_dmub_srv_fams2_passthrough_flip( /* build command header */ cmds[num_cmds].fams2_flip.header.type = DMUB_CMD__FW_ASSISTED_MCLK_SWITCH; cmds[num_cmds].fams2_flip.header.sub_type = DMUB_CMD__FAMS2_FLIP; - cmds[num_cmds].fams2_flip.header.payload_bytes = - sizeof(struct dmub_rb_cmd_fams2_flip) - sizeof(struct dmub_cmd_header); + cmds[num_cmds].fams2_flip.header.payload_bytes = sizeof(struct dmub_rb_cmd_fams2_flip); /* for chaining multiple commands, all but last command should set to 1 */ cmds[num_cmds].fams2_flip.header.multi_cmd_pending = 1; diff --git a/drivers/gpu/drm/amd/display/dc/dc_dmub_srv.h b/drivers/gpu/drm/amd/display/dc/dc_dmub_srv.h index ada5c2fb2db300b13de4c2f4e3de61ceac490deb..a636f4c3f01db09a29b535f55d00c3fc5048e12c 100644 --- a/drivers/gpu/drm/amd/display/dc/dc_dmub_srv.h +++ b/drivers/gpu/drm/amd/display/dc/dc_dmub_srv.h @@ -58,7 +58,7 @@ struct dc_dmub_srv { bool needs_idle_wake; }; -bool dc_dmub_srv_wait_for_pending(struct dc_dmub_srv *dc_dmub_srv); +void dc_dmub_srv_wait_idle(struct dc_dmub_srv *dc_dmub_srv); bool dc_dmub_srv_optimized_init_done(struct dc_dmub_srv *dc_dmub_srv); diff --git a/drivers/gpu/drm/amd/display/dc/dc_helper.c b/drivers/gpu/drm/amd/display/dc/dc_helper.c index 72b87b78da0e7712124d9ba3b1223447036e5e0d..8f077e15b4f0610cc8a59c116674cb86d1529d15 100644 --- a/drivers/gpu/drm/amd/display/dc/dc_helper.c +++ b/drivers/gpu/drm/amd/display/dc/dc_helper.c @@ -682,7 +682,7 @@ void reg_sequence_wait_done(const struct dc_context *ctx) if (offload && ctx->dc->debug.dmub_offload_enabled && !ctx->dc->debug.dmcub_emulation) { - dc_dmub_srv_wait_for_idle(ctx->dmub_srv, DM_DMUB_WAIT_TYPE_WAIT, NULL); + dc_dmub_srv_wait_idle(ctx->dmub_srv); } } diff --git a/drivers/gpu/drm/amd/display/dc/dc_types.h b/drivers/gpu/drm/amd/display/dc/dc_types.h index acd3b373a18e73b0ea5bfbc071ba13acb38c1e33..83ffaae9f4396c0425434e3d51009543460a16e0 100644 --- a/drivers/gpu/drm/amd/display/dc/dc_types.h +++ b/drivers/gpu/drm/amd/display/dc/dc_types.h @@ -1224,7 +1224,6 @@ struct dc_dpia_bw_alloc { int bw_granularity; // BW Granularity int dp_overhead; // DP overhead in dp tunneling bool bw_alloc_enabled; // The BW Alloc Mode Support is turned ON for all 3: DP-Tx & Dpia & CM - bool response_ready; // Response ready from the CM side uint8_t nrd_max_lane_count; // Non-reduced max lane count uint8_t nrd_max_link_rate; // Non-reduced max link rate }; diff --git a/drivers/gpu/drm/amd/display/dc/dce/dmub_abm_lcd.c b/drivers/gpu/drm/amd/display/dc/dce/dmub_abm_lcd.c index a641ae04450c4ac4c4a292995f7fc0b3e4017d11..0d7e7f3b81a1f82fe0e279cc0bb4df43c983f960 100644 --- a/drivers/gpu/drm/amd/display/dc/dce/dmub_abm_lcd.c +++ b/drivers/gpu/drm/amd/display/dc/dce/dmub_abm_lcd.c @@ -240,8 +240,7 @@ bool dmub_abm_save_restore( cmd.abm_save_restore.abm_init_config_data.version = DMUB_CMD_ABM_CONTROL_VERSION_1; cmd.abm_save_restore.abm_init_config_data.panel_mask = panel_mask; - cmd.abm_save_restore.header.payload_bytes = - sizeof(struct dmub_rb_cmd_abm_save_restore) - sizeof(struct dmub_cmd_header); + cmd.abm_save_restore.header.payload_bytes = sizeof(struct dmub_rb_cmd_abm_save_restore); dc_wake_and_execute_dmub_cmd(dc, &cmd, DM_DMUB_WAIT_TYPE_WAIT); diff --git a/drivers/gpu/drm/amd/display/dc/dce/dmub_hw_lock_mgr.c b/drivers/gpu/drm/amd/display/dc/dce/dmub_hw_lock_mgr.c index a262598aa6766aca5379d59b698b23ea80dc076b..d37ecfdde4f1bc06d833b8be09853420dee1181b 100644 --- a/drivers/gpu/drm/amd/display/dc/dce/dmub_hw_lock_mgr.c +++ b/drivers/gpu/drm/amd/display/dc/dce/dmub_hw_lock_mgr.c @@ -73,5 +73,16 @@ bool should_use_dmub_lock(struct dc_link *link) if (link->replay_settings.replay_feature_enabled) return true; + /* only use HW lock for PSR1 on single eDP */ + if (link->psr_settings.psr_version == DC_PSR_VERSION_1) { + struct dc_link *edp_links[MAX_NUM_EDP]; + int edp_num; + + dc_get_edp_links(link->dc, edp_links, &edp_num); + + if (edp_num == 1) + return true; + } + return false; } diff --git a/drivers/gpu/drm/amd/display/dc/dce/dmub_replay.c b/drivers/gpu/drm/amd/display/dc/dce/dmub_replay.c index fcd3d86ad5173a0e6c7663c7624857eee9526525..c31e4f26a305b0d5ba1002c38a6eb4e7877fa7fe 100644 --- a/drivers/gpu/drm/amd/display/dc/dce/dmub_replay.c +++ b/drivers/gpu/drm/amd/display/dc/dce/dmub_replay.c @@ -280,9 +280,7 @@ static void dmub_replay_set_power_opt_and_coasting_vtotal(struct dmub_replay *dm memset(&cmd, 0, sizeof(cmd)); pCmd->header.type = DMUB_CMD__REPLAY; pCmd->header.sub_type = DMUB_CMD__REPLAY_SET_POWER_OPT_AND_COASTING_VTOTAL; - pCmd->header.payload_bytes = - sizeof(struct dmub_rb_cmd_replay_set_power_opt_and_coasting_vtotal) - - sizeof(struct dmub_cmd_header); + pCmd->header.payload_bytes = sizeof(struct dmub_rb_cmd_replay_set_power_opt_and_coasting_vtotal); pCmd->replay_set_power_opt_data.power_opt = power_opt; pCmd->replay_set_power_opt_data.panel_inst = panel_inst; pCmd->replay_set_coasting_vtotal_data.coasting_vtotal = (coasting_vtotal & 0xFFFF); @@ -321,8 +319,7 @@ static void dmub_replay_send_cmd(struct dmub_replay *dmub, cmd.replay_set_timing_sync.header.sub_type = DMUB_CMD__REPLAY_SET_TIMING_SYNC_SUPPORTED; cmd.replay_set_timing_sync.header.payload_bytes = - sizeof(struct dmub_rb_cmd_replay_set_timing_sync) - - sizeof(struct dmub_cmd_header); + sizeof(struct dmub_rb_cmd_replay_set_timing_sync); //Cmd Body cmd.replay_set_timing_sync.replay_set_timing_sync_data.panel_inst = cmd_element->sync_data.panel_inst; @@ -334,8 +331,7 @@ static void dmub_replay_send_cmd(struct dmub_replay *dmub, cmd.replay_set_frameupdate_timer.header.sub_type = DMUB_CMD__REPLAY_SET_RESIDENCY_FRAMEUPDATE_TIMER; cmd.replay_set_frameupdate_timer.header.payload_bytes = - sizeof(struct dmub_rb_cmd_replay_set_frameupdate_timer) - - sizeof(struct dmub_cmd_header); + sizeof(struct dmub_rb_cmd_replay_set_frameupdate_timer); //Cmd Body cmd.replay_set_frameupdate_timer.data.panel_inst = cmd_element->panel_inst; @@ -349,8 +345,7 @@ static void dmub_replay_send_cmd(struct dmub_replay *dmub, cmd.replay_set_pseudo_vtotal.header.sub_type = DMUB_CMD__REPLAY_SET_PSEUDO_VTOTAL; cmd.replay_set_pseudo_vtotal.header.payload_bytes = - sizeof(struct dmub_rb_cmd_replay_set_pseudo_vtotal) - - sizeof(struct dmub_cmd_header); + sizeof(struct dmub_rb_cmd_replay_set_pseudo_vtotal); //Cmd Body cmd.replay_set_pseudo_vtotal.data.panel_inst = cmd_element->pseudo_vtotal_data.panel_inst; @@ -362,8 +357,7 @@ static void dmub_replay_send_cmd(struct dmub_replay *dmub, cmd.replay_disabled_adaptive_sync_sdp.header.sub_type = DMUB_CMD__REPLAY_DISABLED_ADAPTIVE_SYNC_SDP; cmd.replay_disabled_adaptive_sync_sdp.header.payload_bytes = - sizeof(struct dmub_rb_cmd_replay_disabled_adaptive_sync_sdp) - - sizeof(struct dmub_cmd_header); + sizeof(struct dmub_rb_cmd_replay_disabled_adaptive_sync_sdp); //Cmd Body cmd.replay_disabled_adaptive_sync_sdp.data.panel_inst = cmd_element->disabled_adaptive_sync_sdp_data.panel_inst; @@ -375,8 +369,7 @@ static void dmub_replay_send_cmd(struct dmub_replay *dmub, cmd.replay_set_general_cmd.header.sub_type = DMUB_CMD__REPLAY_SET_GENERAL_CMD; cmd.replay_set_general_cmd.header.payload_bytes = - sizeof(struct dmub_rb_cmd_replay_set_general_cmd) - - sizeof(struct dmub_cmd_header); + sizeof(struct dmub_rb_cmd_replay_set_general_cmd); //Cmd Body cmd.replay_set_general_cmd.data.panel_inst = cmd_element->set_general_cmd_data.panel_inst; diff --git a/drivers/gpu/drm/amd/display/dc/dml/dcn35/dcn35_fpu.c b/drivers/gpu/drm/amd/display/dc/dml/dcn35/dcn35_fpu.c index e8efffcc69a162f6d8548ee3fe0e251d2822bdfb..92f0a099d089ac1c0b82ee8427cebb03871f1182 100644 --- a/drivers/gpu/drm/amd/display/dc/dml/dcn35/dcn35_fpu.c +++ b/drivers/gpu/drm/amd/display/dc/dml/dcn35/dcn35_fpu.c @@ -159,7 +159,7 @@ struct _vcs_dpi_soc_bounding_box_st dcn3_5_soc = { .dppclk_mhz = 1200.0, .phyclk_mhz = 810.0, .phyclk_d18_mhz = 667.0, - .dscclk_mhz = 417.0, + .dscclk_mhz = 400.0, .dtbclk_mhz = 600.0, }, }, diff --git a/drivers/gpu/drm/amd/display/dc/dml2/display_mode_core.c b/drivers/gpu/drm/amd/display/dc/dml2/display_mode_core.c index 84a2de9a76d4c33202a6d46092f79be112a75f2e..7ae9c0ba0c9e6aa22848591f209a39c147257ee3 100644 --- a/drivers/gpu/drm/amd/display/dc/dml2/display_mode_core.c +++ b/drivers/gpu/drm/amd/display/dc/dml2/display_mode_core.c @@ -32,6 +32,7 @@ #define DML2_MAX_FMT_420_BUFFER_WIDTH 4096 #define TB_BORROWED_MAX 400 +#define DML_MAX_VSTARTUP_START 1023 // --------------------------- // Declaration Begins @@ -6210,6 +6211,7 @@ static dml_uint_t CalculateMaxVStartup( dml_print("DML::%s: vblank_avail = %u\n", __func__, vblank_avail); dml_print("DML::%s: max_vstartup_lines = %u\n", __func__, max_vstartup_lines); #endif + max_vstartup_lines = (dml_uint_t) dml_min(max_vstartup_lines, DML_MAX_VSTARTUP_START); return max_vstartup_lines; } diff --git a/drivers/gpu/drm/amd/display/dc/dml2/dml2_translation_helper.c b/drivers/gpu/drm/amd/display/dc/dml2/dml2_translation_helper.c index 2061d43b92e1b99467cd68700a4f1280f5c778e3..70c39df62533e55a67f4a1f5f231bc16938d7ffa 100644 --- a/drivers/gpu/drm/amd/display/dc/dml2/dml2_translation_helper.c +++ b/drivers/gpu/drm/amd/display/dc/dml2/dml2_translation_helper.c @@ -590,11 +590,11 @@ void dml2_init_soc_states(struct dml2_context *dml2, const struct dc *in_dc, p->out_states->state_array[i].dtbclk_mhz = max_dtbclk_mhz; p->out_states->state_array[i].phyclk_mhz = max_phyclk_mhz; - p->out_states->state_array[i].dscclk_mhz = max_dispclk_mhz / 3.0; p->out_states->state_array[i].phyclk_mhz = max_phyclk_mhz; p->out_states->state_array[i].dtbclk_mhz = max_dtbclk_mhz; /* Dependent states. */ + p->out_states->state_array[i].dscclk_mhz = p->in_states->state_array[i].dscclk_mhz; p->out_states->state_array[i].dram_speed_mts = p->in_states->state_array[i].dram_speed_mts; p->out_states->state_array[i].fabricclk_mhz = p->in_states->state_array[i].fabricclk_mhz; p->out_states->state_array[i].socclk_mhz = p->in_states->state_array[i].socclk_mhz; diff --git a/drivers/gpu/drm/amd/display/dc/inc/link.h b/drivers/gpu/drm/amd/display/dc/inc/link.h index fd1f9d3db039bbac89579dc01b85a97d289b853b..2948a696ee12088dbc045c1e3e533e46b89dad47 100644 --- a/drivers/gpu/drm/amd/display/dc/inc/link.h +++ b/drivers/gpu/drm/amd/display/dc/inc/link.h @@ -218,10 +218,8 @@ struct link_service { /*************************** DP DPIA/PHY ******************************/ - int (*dpia_handle_usb4_bandwidth_allocation_for_link)( + void (*dpia_handle_usb4_bandwidth_allocation_for_link)( struct dc_link *link, int peak_bw); - void (*dpia_handle_bw_alloc_response)( - struct dc_link *link, uint8_t bw, uint8_t result); void (*dp_set_drive_settings)( struct dc_link *link, const struct link_resource *link_res, diff --git a/drivers/gpu/drm/amd/display/dc/link/link_dpms.c b/drivers/gpu/drm/amd/display/dc/link/link_dpms.c index 321fd1785370197cd2d3941e7818176611723baf..268626e73c543d442f6194a8062c44905944ba80 100644 --- a/drivers/gpu/drm/amd/display/dc/link/link_dpms.c +++ b/drivers/gpu/drm/amd/display/dc/link/link_dpms.c @@ -2291,22 +2291,7 @@ static bool allocate_usb4_bandwidth_for_stream(struct dc_stream_state *stream, i link->dpia_bw_alloc_config.dp_overhead = link_dp_dpia_get_dp_overhead_in_dp_tunneling(link); req_bw += link->dpia_bw_alloc_config.dp_overhead; - if (link_dp_dpia_allocate_usb4_bandwidth_for_stream(link, req_bw)) { - if (req_bw <= link->dpia_bw_alloc_config.allocated_bw) { - DC_LOG_DEBUG("%s, Success in allocate bw for link(%d), allocated_bw(%d), dp_overhead(%d)\n", - __func__, link->link_index, link->dpia_bw_alloc_config.allocated_bw, - link->dpia_bw_alloc_config.dp_overhead); - } else { - // Cannot get the required bandwidth. - DC_LOG_ERROR("%s, Failed to allocate bw for link(%d), allocated_bw(%d), dp_overhead(%d)\n", - __func__, link->link_index, link->dpia_bw_alloc_config.allocated_bw, - link->dpia_bw_alloc_config.dp_overhead); - return false; - } - } else { - DC_LOG_DEBUG("%s, usb4 request bw timeout\n", __func__); - return false; - } + link_dp_dpia_allocate_usb4_bandwidth_for_stream(link, req_bw); if (stream->signal == SIGNAL_TYPE_DISPLAY_PORT_MST) { int i = 0; diff --git a/drivers/gpu/drm/amd/display/dc/link/link_factory.c b/drivers/gpu/drm/amd/display/dc/link/link_factory.c index a7877d57a00fa13ce8acddfcac9a687d132e5f4c..f6b6b19e748149dd46c8267298dbf253c4cc6ec0 100644 --- a/drivers/gpu/drm/amd/display/dc/link/link_factory.c +++ b/drivers/gpu/drm/amd/display/dc/link/link_factory.c @@ -175,7 +175,6 @@ static void construct_link_service_dp_phy_or_dpia(struct link_service *link_srv) { link_srv->dpia_handle_usb4_bandwidth_allocation_for_link = dpia_handle_usb4_bandwidth_allocation_for_link; - link_srv->dpia_handle_bw_alloc_response = dpia_handle_bw_alloc_response; link_srv->dp_set_drive_settings = dp_set_drive_settings; link_srv->dpcd_write_rx_power_ctrl = dpcd_write_rx_power_ctrl; } diff --git a/drivers/gpu/drm/amd/display/dc/link/protocols/link_dp_dpia.c b/drivers/gpu/drm/amd/display/dc/link/protocols/link_dp_dpia.c index c149210096ac723026a06380df6cf5849224c156..0d123e647652ea41121524d46ed63c9f9707b871 100644 --- a/drivers/gpu/drm/amd/display/dc/link/protocols/link_dp_dpia.c +++ b/drivers/gpu/drm/amd/display/dc/link/protocols/link_dp_dpia.c @@ -92,7 +92,6 @@ bool dpia_query_hpd_status(struct dc_link *link) /* prepare QUERY_HPD command */ cmd.query_hpd.header.type = DMUB_CMD__QUERY_HPD_STATE; - cmd.query_hpd.header.payload_bytes = sizeof(cmd.query_hpd.data); cmd.query_hpd.data.instance = link->link_id.enum_id - ENUM_ID_1; cmd.query_hpd.data.ch_type = AUX_CHANNEL_DPIA; diff --git a/drivers/gpu/drm/amd/display/dc/link/protocols/link_dp_dpia_bw.c b/drivers/gpu/drm/amd/display/dc/link/protocols/link_dp_dpia_bw.c index a5541b8fc95bdc355fdc2ca42a7380934c36995d..a254ead2f7e80addeb75b8d8a0e257326f997e8d 100644 --- a/drivers/gpu/drm/amd/display/dc/link/protocols/link_dp_dpia_bw.c +++ b/drivers/gpu/drm/amd/display/dc/link/protocols/link_dp_dpia_bw.c @@ -24,7 +24,7 @@ * */ /*********************************************************************/ -// USB4 DPIA BANDWIDTH ALLOCATION LOGIC +// USB4 DPIA BANDWIDTH ALLOCATION LOGIC /*********************************************************************/ #include "link_dp_dpia_bw.h" #include "link_dpcd.h" @@ -36,7 +36,7 @@ #define Kbps_TO_Gbps (1000 * 1000) // ------------------------------------------------------------------ -// PRIVATE FUNCTIONS +// PRIVATE FUNCTIONS // ------------------------------------------------------------------ /* * Always Check the following: @@ -44,11 +44,11 @@ * - Is HPD HIGH? * - Is BW Allocation Support Mode enabled on DP-Tx? */ -static bool get_bw_alloc_proceed_flag(struct dc_link *tmp) +static bool link_dp_is_bw_alloc_available(struct dc_link *link) { - return (tmp && DISPLAY_ENDPOINT_USB4_DPIA == tmp->ep_type - && tmp->hpd_status - && tmp->dpia_bw_alloc_config.bw_alloc_enabled); + return (link && link->ep_type == DISPLAY_ENDPOINT_USB4_DPIA + && link->hpd_status + && link->dpia_bw_alloc_config.bw_alloc_enabled); } static void reset_bw_alloc_struct(struct dc_link *link) @@ -60,7 +60,6 @@ static void reset_bw_alloc_struct(struct dc_link *link) link->dpia_bw_alloc_config.estimated_bw = 0; link->dpia_bw_alloc_config.bw_granularity = 0; link->dpia_bw_alloc_config.dp_overhead = 0; - link->dpia_bw_alloc_config.response_ready = false; link->dpia_bw_alloc_config.nrd_max_lane_count = 0; link->dpia_bw_alloc_config.nrd_max_link_rate = 0; for (int i = 0; i < MAX_SINKS_PER_LINK; i++) @@ -243,20 +242,20 @@ static int get_host_router_total_dp_tunnel_bw(const struct dc *dc, uint8_t hr_in static void dpia_bw_alloc_unplug(struct dc_link *link) { if (link) { - DC_LOG_DEBUG("%s: resetting bw alloc config for link(%d)\n", + DC_LOG_DEBUG("%s: resetting BW alloc config for link(%d)\n", __func__, link->link_index); reset_bw_alloc_struct(link); } } -static void set_usb4_req_bw_req(struct dc_link *link, int req_bw) +static void link_dpia_send_bw_alloc_request(struct dc_link *link, int req_bw) { uint8_t requested_bw; uint32_t temp; /* Error check whether request bw greater than allocated */ if (req_bw > link->dpia_bw_alloc_config.estimated_bw) { - DC_LOG_ERROR("%s: Request bw greater than estimated bw for link(%d)\n", + DC_LOG_ERROR("%s: Request BW greater than estimated BW for link(%d)\n", __func__, link->link_index); req_bw = link->dpia_bw_alloc_config.estimated_bw; } @@ -271,32 +270,17 @@ static void set_usb4_req_bw_req(struct dc_link *link, int req_bw) /* Error check whether requested and allocated are equal */ req_bw = requested_bw * (Kbps_TO_Gbps / link->dpia_bw_alloc_config.bw_granularity); if (req_bw && (req_bw == link->dpia_bw_alloc_config.allocated_bw)) { - DC_LOG_ERROR("%s: Request bw equals to allocated bw for link(%d)\n", + DC_LOG_ERROR("%s: Request BW equals to allocated BW for link(%d)\n", __func__, link->link_index); } - link->dpia_bw_alloc_config.response_ready = false; // Reset flag - core_link_write_dpcd( - link, - REQUESTED_BW, + core_link_write_dpcd(link, REQUESTED_BW, &requested_bw, sizeof(uint8_t)); } -/* - * Return the response_ready flag from dc_link struct - * - * @link: pointer to the dc_link struct instance - * - * return: response_ready flag from dc_link struct - */ -static bool get_cm_response_ready_flag(struct dc_link *link) -{ - return link->dpia_bw_alloc_config.response_ready; -} - // ------------------------------------------------------------------ -// PUBLIC FUNCTIONS +// PUBLIC FUNCTIONS // ------------------------------------------------------------------ bool link_dp_dpia_set_dptx_usb4_bw_alloc_support(struct dc_link *link) { @@ -370,9 +354,15 @@ void link_dp_dpia_handle_bw_alloc_status(struct dc_link *link, uint8_t status) DC_LOG_DEBUG("%s: BW Allocation request succeeded on link(%d)", __func__, link->link_index); } else if (status & DP_TUNNELING_BW_REQUEST_FAILED) { + link->dpia_bw_alloc_config.estimated_bw = get_estimated_bw(link); + DC_LOG_DEBUG("%s: BW Allocation request failed on link(%d) allocated/estimated BW=%d", __func__, link->link_index, link->dpia_bw_alloc_config.estimated_bw); + + link_dpia_send_bw_alloc_request(link, link->dpia_bw_alloc_config.estimated_bw); } else if (status & DP_TUNNELING_ESTIMATED_BW_CHANGED) { + link->dpia_bw_alloc_config.estimated_bw = get_estimated_bw(link); + DC_LOG_DEBUG("%s: Estimated BW changed on link(%d) new estimated BW=%d", __func__, link->link_index, link->dpia_bw_alloc_config.estimated_bw); } @@ -382,141 +372,36 @@ void link_dp_dpia_handle_bw_alloc_status(struct dc_link *link, uint8_t status) &status, sizeof(status)); } -void dpia_handle_bw_alloc_response(struct dc_link *link, uint8_t bw, uint8_t result) +/* + * Handle the DP Bandwidth allocation for DPIA + * + */ +void dpia_handle_usb4_bandwidth_allocation_for_link(struct dc_link *link, int peak_bw) { - int bw_needed = 0; - int estimated = 0; - - if (!get_bw_alloc_proceed_flag((link))) - return; - - switch (result) { - - case DPIA_BW_REQ_FAILED: - - /* - * Ideally, we shouldn't run into this case as we always validate available - * bandwidth and request within that limit - */ - estimated = bw * (Kbps_TO_Gbps / link->dpia_bw_alloc_config.bw_granularity); - - DC_LOG_ERROR("%s: BW REQ FAILURE for DP-TX Request for link(%d)\n", - __func__, link->link_index); - DC_LOG_ERROR("%s: current estimated_bw(%d), new estimated_bw(%d)\n", - __func__, link->dpia_bw_alloc_config.estimated_bw, estimated); - - /* Update the new Estimated BW value updated by CM */ - link->dpia_bw_alloc_config.estimated_bw = estimated; - - /* Allocate the previously requested bandwidth */ - set_usb4_req_bw_req(link, link->dpia_bw_alloc_config.estimated_bw); - - /* - * If FAIL then it is either: - * 1. Due to DP-Tx trying to allocate more than available i.e. it failed locally - * => get estimated and allocate that - * 2. Due to the fact that DP-Tx tried to allocated ESTIMATED BW and failed then - * CM will have to update 0xE0023 with new ESTIMATED BW value. - */ - break; - - case DPIA_BW_REQ_SUCCESS: - - bw_needed = bw * (Kbps_TO_Gbps / link->dpia_bw_alloc_config.bw_granularity); - - DC_LOG_DEBUG("%s: BW REQ SUCCESS for DP-TX Request for link(%d)\n", - __func__, link->link_index); - DC_LOG_DEBUG("%s: current allocated_bw(%d), new allocated_bw(%d)\n", - __func__, link->dpia_bw_alloc_config.allocated_bw, bw_needed); - - link->dpia_bw_alloc_config.allocated_bw = bw_needed; + if (link && link->ep_type == DISPLAY_ENDPOINT_USB4_DPIA && link->dpia_bw_alloc_config.bw_alloc_enabled) { + //1. Hot Plug + if (link->hpd_status && peak_bw > 0) { + // If DP over USB4 then we need to check BW allocation + link->dpia_bw_alloc_config.link_max_bw = peak_bw; - link->dpia_bw_alloc_config.response_ready = true; - break; - - case DPIA_EST_BW_CHANGED: - - estimated = bw * (Kbps_TO_Gbps / link->dpia_bw_alloc_config.bw_granularity); - - DC_LOG_DEBUG("%s: ESTIMATED BW CHANGED for link(%d)\n", - __func__, link->link_index); - DC_LOG_DEBUG("%s: current estimated_bw(%d), new estimated_bw(%d)\n", - __func__, link->dpia_bw_alloc_config.estimated_bw, estimated); - - link->dpia_bw_alloc_config.estimated_bw = estimated; - break; - - case DPIA_BW_ALLOC_CAPS_CHANGED: - - DC_LOG_ERROR("%s: BW ALLOC CAPABILITY CHANGED to Disabled for link(%d)\n", - __func__, link->link_index); - link->dpia_bw_alloc_config.bw_alloc_enabled = false; - break; + link_dpia_send_bw_alloc_request(link, peak_bw); + } + //2. Cold Unplug + else if (!link->hpd_status) + dpia_bw_alloc_unplug(link); } } -int dpia_handle_usb4_bandwidth_allocation_for_link(struct dc_link *link, int peak_bw) -{ - int ret = 0; - uint8_t timeout = 10; - - if (!(link && DISPLAY_ENDPOINT_USB4_DPIA == link->ep_type - && link->dpia_bw_alloc_config.bw_alloc_enabled)) - goto out; - - //1. Hot Plug - if (link->hpd_status && peak_bw > 0) { - - // If DP over USB4 then we need to check BW allocation - link->dpia_bw_alloc_config.link_max_bw = peak_bw; - set_usb4_req_bw_req(link, link->dpia_bw_alloc_config.link_max_bw); - do { - if (timeout > 0) - timeout--; - else - break; - msleep(10); - } while (!get_cm_response_ready_flag(link)); - - if (!timeout) - ret = 0;// ERROR TIMEOUT waiting for response for allocating bw - else if (link->dpia_bw_alloc_config.allocated_bw > 0) - ret = link->dpia_bw_alloc_config.allocated_bw; - } - //2. Cold Unplug - else if (!link->hpd_status) - dpia_bw_alloc_unplug(link); - -out: - return ret; -} -bool link_dp_dpia_allocate_usb4_bandwidth_for_stream(struct dc_link *link, int req_bw) +void link_dp_dpia_allocate_usb4_bandwidth_for_stream(struct dc_link *link, int req_bw) { - bool ret = false; - uint8_t timeout = 10; - DC_LOG_DEBUG("%s: ENTER: link(%d), hpd_status(%d), current allocated_bw(%d), req_bw(%d)\n", __func__, link->link_index, link->hpd_status, link->dpia_bw_alloc_config.allocated_bw, req_bw); - if (!get_bw_alloc_proceed_flag(link)) - goto out; - - set_usb4_req_bw_req(link, req_bw); - do { - if (timeout > 0) - timeout--; - else - break; - msleep(10); - } while (!get_cm_response_ready_flag(link)); - - if (timeout) - ret = true; - -out: - DC_LOG_DEBUG("%s: EXIT: timeout(%d), ret(%d)\n", __func__, timeout, ret); - return ret; + if (link_dp_is_bw_alloc_available(link)) + link_dpia_send_bw_alloc_request(link, req_bw); + else + DC_LOG_DEBUG("%s: Not able to send the BW Allocation request", __func__); } bool dpia_validate_usb4_bw(struct dc_link **link, int *bw_needed_per_dpia, const unsigned int num_dpias) @@ -567,7 +452,7 @@ int link_dp_dpia_get_dp_overhead_in_dp_tunneling(struct dc_link *link) { int dp_overhead = 0, link_mst_overhead = 0; - if (!get_bw_alloc_proceed_flag((link))) + if (!link_dp_is_bw_alloc_available(link)) return dp_overhead; /* if its mst link, add MTPH overhead */ diff --git a/drivers/gpu/drm/amd/display/dc/link/protocols/link_dp_dpia_bw.h b/drivers/gpu/drm/amd/display/dc/link/protocols/link_dp_dpia_bw.h index 1b240a2f6ce0dbaa2c2319c6216e95e9ddf365b6..6df9b946b00feca538384601121a01c27b75bd1c 100644 --- a/drivers/gpu/drm/amd/display/dc/link/protocols/link_dp_dpia_bw.h +++ b/drivers/gpu/drm/amd/display/dc/link/protocols/link_dp_dpia_bw.h @@ -59,9 +59,8 @@ bool link_dp_dpia_set_dptx_usb4_bw_alloc_support(struct dc_link *link); * @link: pointer to the dc_link struct instance * @req_bw: Bw requested by the stream * - * return: true if allocated successfully */ -bool link_dp_dpia_allocate_usb4_bandwidth_for_stream(struct dc_link *link, int req_bw); +void link_dp_dpia_allocate_usb4_bandwidth_for_stream(struct dc_link *link, int req_bw); /* * Handle the USB4 BW Allocation related functionality here: @@ -71,21 +70,8 @@ bool link_dp_dpia_allocate_usb4_bandwidth_for_stream(struct dc_link *link, int r * @link: pointer to the dc_link struct instance * @peak_bw: Peak bw used by the link/sink * - * return: allocated bw else return 0 */ -int dpia_handle_usb4_bandwidth_allocation_for_link(struct dc_link *link, int peak_bw); - -/* - * Handle function for when the status of the Request above is complete. - * We will find out the result of allocating on CM and update structs. - * - * @link: pointer to the dc_link struct instance - * @bw: Allocated or Estimated BW depending on the result - * @result: Response type - * - * return: none - */ -void dpia_handle_bw_alloc_response(struct dc_link *link, uint8_t bw, uint8_t result); +void dpia_handle_usb4_bandwidth_allocation_for_link(struct dc_link *link, int peak_bw); /* * Handle the validation of total BW here and confirm that the bw used by each diff --git a/drivers/gpu/drm/amd/display/dmub/dmub_srv.h b/drivers/gpu/drm/amd/display/dmub/dmub_srv.h index 203e3a440845ab6218a3b57658641362c2304a8c..4e0efff92dcafd01591753d0b29a64c0807b5819 100644 --- a/drivers/gpu/drm/amd/display/dmub/dmub_srv.h +++ b/drivers/gpu/drm/amd/display/dmub/dmub_srv.h @@ -51,8 +51,8 @@ * for the cache windows. * * The call to dmub_srv_hw_init() programs the DMCUB registers to prepare - * for command submission. Commands can be queued via dmub_srv_fb_cmd_queue() - * and executed via dmub_srv_fb_cmd_execute(). + * for command submission. Commands can be queued via dmub_srv_cmd_queue() + * and executed via dmub_srv_cmd_execute(). * * If the queue is full the dmub_srv_wait_for_idle() call can be used to * wait until the queue has been cleared. @@ -170,13 +170,6 @@ enum dmub_srv_power_state_type { DMUB_POWER_STATE_D3 = 8 }; -/* enum dmub_inbox_cmd_interface type - defines default interface for host->dmub commands */ -enum dmub_inbox_cmd_interface_type { - DMUB_CMD_INTERFACE_DEFAULT = 0, - DMUB_CMD_INTERFACE_FB = 1, - DMUB_CMD_INTERFACE_REG = 2, -}; - /** * struct dmub_region - dmub hw memory region * @base: base address for region, must be 256 byte aligned @@ -356,21 +349,6 @@ struct dmub_diagnostic_data { uint8_t is_cw6_enabled : 1; }; -struct dmub_srv_inbox { - /* generic status */ - uint64_t num_submitted; - uint64_t num_reported; - union { - /* frame buffer mailbox status */ - struct dmub_rb rb; - /* register mailbox status */ - struct { - bool is_pending; - bool is_multi_pending; - }; - }; -}; - /** * struct dmub_srv_base_funcs - Driver specific base callbacks */ @@ -484,21 +462,18 @@ struct dmub_srv_hw_funcs { void (*init_reg_offsets)(struct dmub_srv *dmub, struct dc_context *ctx); void (*subvp_save_surf_addr)(struct dmub_srv *dmub, const struct dc_plane_address *addr, uint8_t subvp_index); - void (*send_reg_inbox0_cmd_msg)(struct dmub_srv *dmub, union dmub_rb_cmd *cmd); uint32_t (*read_reg_inbox0_rsp_int_status)(struct dmub_srv *dmub); void (*read_reg_inbox0_cmd_rsp)(struct dmub_srv *dmub, union dmub_rb_cmd *cmd); void (*write_reg_inbox0_rsp_int_ack)(struct dmub_srv *dmub); - void (*clear_reg_inbox0_rsp_int_ack)(struct dmub_srv *dmub); - void (*enable_reg_inbox0_rsp_int)(struct dmub_srv *dmub, bool enable); - uint32_t (*read_reg_outbox0_rdy_int_status)(struct dmub_srv *dmub); void (*write_reg_outbox0_rdy_int_ack)(struct dmub_srv *dmub); void (*read_reg_outbox0_msg)(struct dmub_srv *dmub, uint32_t *msg); void (*write_reg_outbox0_rsp)(struct dmub_srv *dmub, uint32_t *rsp); uint32_t (*read_reg_outbox0_rsp_int_status)(struct dmub_srv *dmub); + void (*enable_reg_inbox0_rsp_int)(struct dmub_srv *dmub, bool enable); void (*enable_reg_outbox0_rdy_int)(struct dmub_srv *dmub, bool enable); }; @@ -518,7 +493,6 @@ struct dmub_srv_create_params { enum dmub_asic asic; uint32_t fw_version; bool is_virtual; - enum dmub_inbox_cmd_interface_type inbox_type; }; /** @@ -547,9 +521,8 @@ struct dmub_srv { const struct dmub_srv_dcn401_regs *regs_dcn401; struct dmub_srv_base_funcs funcs; struct dmub_srv_hw_funcs hw_funcs; - struct dmub_srv_inbox inbox1; + struct dmub_rb inbox1_rb; uint32_t inbox1_last_wptr; - struct dmub_srv_inbox reg_inbox0; /** * outbox1_rb is accessed without locks (dal & dc) * and to be used only in dmub_srv_stat_get_notification() @@ -569,7 +542,6 @@ struct dmub_srv { struct dmub_fw_meta_info meta_info; struct dmub_feature_caps feature_caps; struct dmub_visual_confirm_color visual_confirm_color; - enum dmub_inbox_cmd_interface_type inbox_type; enum dmub_srv_power_state_type power_state; struct dmub_diagnostic_data debug; @@ -727,7 +699,19 @@ enum dmub_status dmub_srv_hw_init(struct dmub_srv *dmub, enum dmub_status dmub_srv_hw_reset(struct dmub_srv *dmub); /** - * dmub_srv_fb_cmd_queue() - queues a command to the DMUB + * dmub_srv_sync_inbox1() - sync sw state with hw state + * @dmub: the dmub service + * + * Sync sw state with hw state when resume from S0i3 + * + * Return: + * DMUB_STATUS_OK - success + * DMUB_STATUS_INVALID - unspecified error + */ +enum dmub_status dmub_srv_sync_inbox1(struct dmub_srv *dmub); + +/** + * dmub_srv_cmd_queue() - queues a command to the DMUB * @dmub: the dmub service * @cmd: the command to queue * @@ -739,11 +723,11 @@ enum dmub_status dmub_srv_hw_reset(struct dmub_srv *dmub); * DMUB_STATUS_QUEUE_FULL - no remaining room in queue * DMUB_STATUS_INVALID - unspecified error */ -enum dmub_status dmub_srv_fb_cmd_queue(struct dmub_srv *dmub, +enum dmub_status dmub_srv_cmd_queue(struct dmub_srv *dmub, const union dmub_rb_cmd *cmd); /** - * dmub_srv_fb_cmd_execute() - Executes a queued sequence to the dmub + * dmub_srv_cmd_execute() - Executes a queued sequence to the dmub * @dmub: the dmub service * * Begins execution of queued commands on the dmub. @@ -752,7 +736,7 @@ enum dmub_status dmub_srv_fb_cmd_queue(struct dmub_srv *dmub, * DMUB_STATUS_OK - success * DMUB_STATUS_INVALID - unspecified error */ -enum dmub_status dmub_srv_fb_cmd_execute(struct dmub_srv *dmub); +enum dmub_status dmub_srv_cmd_execute(struct dmub_srv *dmub); /** * dmub_srv_wait_for_hw_pwr_up() - Waits for firmware hardware power up is completed @@ -810,23 +794,6 @@ enum dmub_status dmub_srv_wait_for_auto_load(struct dmub_srv *dmub, enum dmub_status dmub_srv_wait_for_phy_init(struct dmub_srv *dmub, uint32_t timeout_us); -/** - * dmub_srv_wait_for_pending() - Re-entrant wait for messages currently pending - * @dmub: the dmub service - * @timeout_us: the maximum number of microseconds to wait - * - * Waits until the commands queued prior to this call are complete. - * If interfaces remain busy due to additional work being submitted - * concurrently, this function will not continue to wait. - * - * Return: - * DMUB_STATUS_OK - success - * DMUB_STATUS_TIMEOUT - wait for buffer to flush timed out - * DMUB_STATUS_INVALID - unspecified error - */ -enum dmub_status dmub_srv_wait_for_pending(struct dmub_srv *dmub, - uint32_t timeout_us); - /** * dmub_srv_wait_for_idle() - Waits for the DMUB to be idle * @dmub: the dmub service @@ -925,6 +892,9 @@ enum dmub_status dmub_srv_get_fw_boot_status(struct dmub_srv *dmub, enum dmub_status dmub_srv_get_fw_boot_option(struct dmub_srv *dmub, union dmub_fw_boot_options *option); +enum dmub_status dmub_srv_cmd_with_reply_data(struct dmub_srv *dmub, + union dmub_rb_cmd *cmd); + enum dmub_status dmub_srv_set_skip_panel_power_sequence(struct dmub_srv *dmub, bool skip); @@ -989,51 +959,35 @@ enum dmub_status dmub_srv_clear_inbox0_ack(struct dmub_srv *dmub); void dmub_srv_subvp_save_surf_addr(struct dmub_srv *dmub, const struct dc_plane_address *addr, uint8_t subvp_index); /** - * dmub_srv_set_power_state() - Track DC power state in dmub_srv + * dmub_srv_send_reg_inbox0_cmd() - send a dmub command and wait for the command + * being processed by DMUB. * @dmub: The dmub service - * @power_state: DC power state setting - * - * Store DC power state in dmub_srv. If dmub_srv is in D3, then don't send messages to DMUB + * @cmd: The dmub command being sent. If with_replay is true, the function will + * update cmd with replied data. + * @with_reply: true if DMUB reply needs to be copied back to cmd. false if the + * cmd doesn't need to be replied. + * @timeout_us: timeout in microseconds. * * Return: - * void + * DMUB_STATUS_OK - success + * DMUB_STATUS_TIMEOUT - DMUB fails to process the command within the timeout + * interval. */ -void dmub_srv_set_power_state(struct dmub_srv *dmub, enum dmub_srv_power_state_type dmub_srv_power_state); +enum dmub_status dmub_srv_send_reg_inbox0_cmd( + struct dmub_srv *dmub, + union dmub_rb_cmd *cmd, + bool with_reply, uint32_t timeout_us); /** - * dmub_srv_reg_cmd_execute() - Executes provided command to the dmub - * @dmub: the dmub service - * @cmd: the command packet to be executed - * - * Executes a single command for the dmub. - * - * Return: - * DMUB_STATUS_OK - success - * DMUB_STATUS_INVALID - unspecified error - */ -enum dmub_status dmub_srv_reg_cmd_execute(struct dmub_srv *dmub, union dmub_rb_cmd *cmd); - - -/** - * dmub_srv_cmd_get_response() - Copies return data for command into buffer - * @dmub: the dmub service - * @cmd_rsp: response buffer - * - * Copies return data for command into buffer - */ -void dmub_srv_cmd_get_response(struct dmub_srv *dmub, - union dmub_rb_cmd *cmd_rsp); - -/** - * dmub_srv_sync_inboxes() - Sync inbox state - * @dmub: the dmub service + * dmub_srv_set_power_state() - Track DC power state in dmub_srv + * @dmub: The dmub service + * @power_state: DC power state setting * - * Sync inbox state + * Store DC power state in dmub_srv. If dmub_srv is in D3, then don't send messages to DMUB * * Return: - * DMUB_STATUS_OK - success - * DMUB_STATUS_INVALID - unspecified error + * void */ -enum dmub_status dmub_srv_sync_inboxes(struct dmub_srv *dmub); +void dmub_srv_set_power_state(struct dmub_srv *dmub, enum dmub_srv_power_state_type dmub_srv_power_state); #endif /* _DMUB_SRV_H_ */ diff --git a/drivers/gpu/drm/amd/display/dmub/src/dmub_dcn401.c b/drivers/gpu/drm/amd/display/dmub/src/dmub_dcn401.c index 731ca9b6a6cfc74f147052fdc92793f8f8aae759..e67f7c4784eb79dce4747d9ba2385f6259736abc 100644 --- a/drivers/gpu/drm/amd/display/dmub/src/dmub_dcn401.c +++ b/drivers/gpu/drm/amd/display/dmub/src/dmub_dcn401.c @@ -517,69 +517,28 @@ void dmub_dcn401_send_reg_inbox0_cmd_msg(struct dmub_srv *dmub, union dmub_rb_cmd *cmd) { uint32_t *dwords = (uint32_t *)cmd; - int32_t payload_size_bytes = cmd->cmd_common.header.payload_bytes; - uint32_t msg_index; - static_assert(sizeof(*cmd) == 64, "DMUB command size mismatch"); - - /* read remaining data based on payload size */ - for (msg_index = 0; msg_index < 15; msg_index++) { - if (payload_size_bytes <= msg_index * 4) { - break; - } - switch (msg_index) { - case 0: - REG_WRITE(DMCUB_REG_INBOX0_MSG0, dwords[msg_index + 1]); - break; - case 1: - REG_WRITE(DMCUB_REG_INBOX0_MSG1, dwords[msg_index + 1]); - break; - case 2: - REG_WRITE(DMCUB_REG_INBOX0_MSG2, dwords[msg_index + 1]); - break; - case 3: - REG_WRITE(DMCUB_REG_INBOX0_MSG3, dwords[msg_index + 1]); - break; - case 4: - REG_WRITE(DMCUB_REG_INBOX0_MSG4, dwords[msg_index + 1]); - break; - case 5: - REG_WRITE(DMCUB_REG_INBOX0_MSG5, dwords[msg_index + 1]); - break; - case 6: - REG_WRITE(DMCUB_REG_INBOX0_MSG6, dwords[msg_index + 1]); - break; - case 7: - REG_WRITE(DMCUB_REG_INBOX0_MSG7, dwords[msg_index + 1]); - break; - case 8: - REG_WRITE(DMCUB_REG_INBOX0_MSG8, dwords[msg_index + 1]); - break; - case 9: - REG_WRITE(DMCUB_REG_INBOX0_MSG9, dwords[msg_index + 1]); - break; - case 10: - REG_WRITE(DMCUB_REG_INBOX0_MSG10, dwords[msg_index + 1]); - break; - case 11: - REG_WRITE(DMCUB_REG_INBOX0_MSG11, dwords[msg_index + 1]); - break; - case 12: - REG_WRITE(DMCUB_REG_INBOX0_MSG12, dwords[msg_index + 1]); - break; - case 13: - REG_WRITE(DMCUB_REG_INBOX0_MSG13, dwords[msg_index + 1]); - break; - case 14: - REG_WRITE(DMCUB_REG_INBOX0_MSG14, dwords[msg_index + 1]); - break; - } - } + static_assert(sizeof(*cmd) == 64, "DMUB command size mismatch"); + REG_WRITE(DMCUB_REG_INBOX0_MSG0, dwords[0]); + REG_WRITE(DMCUB_REG_INBOX0_MSG1, dwords[1]); + REG_WRITE(DMCUB_REG_INBOX0_MSG2, dwords[2]); + REG_WRITE(DMCUB_REG_INBOX0_MSG3, dwords[3]); + REG_WRITE(DMCUB_REG_INBOX0_MSG4, dwords[4]); + REG_WRITE(DMCUB_REG_INBOX0_MSG5, dwords[5]); + REG_WRITE(DMCUB_REG_INBOX0_MSG6, dwords[6]); + REG_WRITE(DMCUB_REG_INBOX0_MSG7, dwords[7]); + REG_WRITE(DMCUB_REG_INBOX0_MSG8, dwords[8]); + REG_WRITE(DMCUB_REG_INBOX0_MSG9, dwords[9]); + REG_WRITE(DMCUB_REG_INBOX0_MSG10, dwords[10]); + REG_WRITE(DMCUB_REG_INBOX0_MSG11, dwords[11]); + REG_WRITE(DMCUB_REG_INBOX0_MSG12, dwords[12]); + REG_WRITE(DMCUB_REG_INBOX0_MSG13, dwords[13]); + REG_WRITE(DMCUB_REG_INBOX0_MSG14, dwords[14]); /* writing to INBOX RDY register will trigger DMUB REG INBOX0 RDY * interrupt. */ - REG_WRITE(DMCUB_REG_INBOX0_RDY, dwords[0]); + REG_WRITE(DMCUB_REG_INBOX0_RDY, dwords[15]); } uint32_t dmub_dcn401_read_reg_inbox0_rsp_int_status(struct dmub_srv *dmub) @@ -597,39 +556,30 @@ void dmub_dcn401_read_reg_inbox0_cmd_rsp(struct dmub_srv *dmub, static_assert(sizeof(*cmd) == 64, "DMUB command size mismatch"); - dwords[0] = REG_READ(DMCUB_REG_INBOX0_RSP); - dwords[1] = REG_READ(DMCUB_REG_INBOX0_MSG0); - dwords[2] = REG_READ(DMCUB_REG_INBOX0_MSG1); - dwords[3] = REG_READ(DMCUB_REG_INBOX0_MSG2); - dwords[4] = REG_READ(DMCUB_REG_INBOX0_MSG3); - dwords[5] = REG_READ(DMCUB_REG_INBOX0_MSG4); - dwords[6] = REG_READ(DMCUB_REG_INBOX0_MSG5); - dwords[7] = REG_READ(DMCUB_REG_INBOX0_MSG6); - dwords[8] = REG_READ(DMCUB_REG_INBOX0_MSG7); - dwords[9] = REG_READ(DMCUB_REG_INBOX0_MSG8); - dwords[10] = REG_READ(DMCUB_REG_INBOX0_MSG9); - dwords[11] = REG_READ(DMCUB_REG_INBOX0_MSG10); - dwords[12] = REG_READ(DMCUB_REG_INBOX0_MSG11); - dwords[13] = REG_READ(DMCUB_REG_INBOX0_MSG12); - dwords[14] = REG_READ(DMCUB_REG_INBOX0_MSG13); - dwords[15] = REG_READ(DMCUB_REG_INBOX0_MSG14); + dwords[0] = REG_READ(DMCUB_REG_INBOX0_MSG0); + dwords[1] = REG_READ(DMCUB_REG_INBOX0_MSG1); + dwords[2] = REG_READ(DMCUB_REG_INBOX0_MSG2); + dwords[3] = REG_READ(DMCUB_REG_INBOX0_MSG3); + dwords[4] = REG_READ(DMCUB_REG_INBOX0_MSG4); + dwords[5] = REG_READ(DMCUB_REG_INBOX0_MSG5); + dwords[6] = REG_READ(DMCUB_REG_INBOX0_MSG6); + dwords[7] = REG_READ(DMCUB_REG_INBOX0_MSG7); + dwords[8] = REG_READ(DMCUB_REG_INBOX0_MSG8); + dwords[9] = REG_READ(DMCUB_REG_INBOX0_MSG9); + dwords[10] = REG_READ(DMCUB_REG_INBOX0_MSG10); + dwords[11] = REG_READ(DMCUB_REG_INBOX0_MSG11); + dwords[12] = REG_READ(DMCUB_REG_INBOX0_MSG12); + dwords[13] = REG_READ(DMCUB_REG_INBOX0_MSG13); + dwords[14] = REG_READ(DMCUB_REG_INBOX0_MSG14); + dwords[15] = REG_READ(DMCUB_REG_INBOX0_RSP); } void dmub_dcn401_write_reg_inbox0_rsp_int_ack(struct dmub_srv *dmub) { REG_UPDATE(HOST_INTERRUPT_CSR, HOST_REG_INBOX0_RSP_INT_ACK, 1); -} - -void dmub_dcn401_clear_reg_inbox0_rsp_int_ack(struct dmub_srv *dmub) -{ REG_UPDATE(HOST_INTERRUPT_CSR, HOST_REG_INBOX0_RSP_INT_ACK, 0); } -void dmub_dcn401_enable_reg_inbox0_rsp_int(struct dmub_srv *dmub, bool enable) -{ - REG_UPDATE(HOST_INTERRUPT_CSR, HOST_REG_INBOX0_RSP_INT_EN, enable ? 1:0); -} - void dmub_dcn401_write_reg_outbox0_rdy_int_ack(struct dmub_srv *dmub) { REG_UPDATE(HOST_INTERRUPT_CSR, HOST_REG_OUTBOX0_RDY_INT_ACK, 1); @@ -654,6 +604,11 @@ uint32_t dmub_dcn401_read_reg_outbox0_rsp_int_status(struct dmub_srv *dmub) return status; } +void dmub_dcn401_enable_reg_inbox0_rsp_int(struct dmub_srv *dmub, bool enable) +{ + REG_UPDATE(HOST_INTERRUPT_CSR, HOST_REG_INBOX0_RSP_INT_EN, enable ? 1:0); +} + void dmub_dcn401_enable_reg_outbox0_rdy_int(struct dmub_srv *dmub, bool enable) { REG_UPDATE(HOST_INTERRUPT_CSR, HOST_REG_OUTBOX0_RDY_INT_EN, enable ? 1:0); diff --git a/drivers/gpu/drm/amd/display/dmub/src/dmub_dcn401.h b/drivers/gpu/drm/amd/display/dmub/src/dmub_dcn401.h index 88c3a44d67d9a0b62b4e7c5276827ff87fc979c5..c35be52676f6f280eb09739487341208f0a5c14e 100644 --- a/drivers/gpu/drm/amd/display/dmub/src/dmub_dcn401.h +++ b/drivers/gpu/drm/amd/display/dmub/src/dmub_dcn401.h @@ -277,13 +277,11 @@ uint32_t dmub_dcn401_read_reg_inbox0_rsp_int_status(struct dmub_srv *dmub); void dmub_dcn401_read_reg_inbox0_cmd_rsp(struct dmub_srv *dmub, union dmub_rb_cmd *cmd); void dmub_dcn401_write_reg_inbox0_rsp_int_ack(struct dmub_srv *dmub); -void dmub_dcn401_clear_reg_inbox0_rsp_int_ack(struct dmub_srv *dmub); -void dmub_dcn401_enable_reg_inbox0_rsp_int(struct dmub_srv *dmub, bool enable); - void dmub_dcn401_write_reg_outbox0_rdy_int_ack(struct dmub_srv *dmub); void dmub_dcn401_read_reg_outbox0_msg(struct dmub_srv *dmub, uint32_t *msg); void dmub_dcn401_write_reg_outbox0_rsp(struct dmub_srv *dmub, uint32_t *msg); uint32_t dmub_dcn401_read_reg_outbox0_rsp_int_status(struct dmub_srv *dmub); +void dmub_dcn401_enable_reg_inbox0_rsp_int(struct dmub_srv *dmub, bool enable); void dmub_dcn401_enable_reg_outbox0_rdy_int(struct dmub_srv *dmub, bool enable); uint32_t dmub_dcn401_read_reg_outbox0_rdy_int_status(struct dmub_srv *dmub); diff --git a/drivers/gpu/drm/amd/display/dmub/src/dmub_srv.c b/drivers/gpu/drm/amd/display/dmub/src/dmub_srv.c index 713576a1f6fa162234689077482aaacecb03e3a8..ae8133816b43c95b1f5843645a6bd8fd5d417644 100644 --- a/drivers/gpu/drm/amd/display/dmub/src/dmub_srv.c +++ b/drivers/gpu/drm/amd/display/dmub/src/dmub_srv.c @@ -157,9 +157,6 @@ static bool dmub_srv_hw_setup(struct dmub_srv *dmub, enum dmub_asic asic) { struct dmub_srv_hw_funcs *funcs = &dmub->hw_funcs; - /* default to specifying now inbox type */ - enum dmub_inbox_cmd_interface_type default_inbox_type = DMUB_CMD_INTERFACE_DEFAULT; - switch (asic) { case DMUB_ASIC_DCN20: case DMUB_ASIC_DCN21: @@ -398,15 +395,10 @@ static bool dmub_srv_hw_setup(struct dmub_srv *dmub, enum dmub_asic asic) funcs->get_current_time = dmub_dcn401_get_current_time; funcs->get_diagnostic_data = dmub_dcn401_get_diagnostic_data; - funcs->send_reg_inbox0_cmd_msg = dmub_dcn401_send_reg_inbox0_cmd_msg; funcs->read_reg_inbox0_rsp_int_status = dmub_dcn401_read_reg_inbox0_rsp_int_status; funcs->read_reg_inbox0_cmd_rsp = dmub_dcn401_read_reg_inbox0_cmd_rsp; funcs->write_reg_inbox0_rsp_int_ack = dmub_dcn401_write_reg_inbox0_rsp_int_ack; - funcs->clear_reg_inbox0_rsp_int_ack = dmub_dcn401_clear_reg_inbox0_rsp_int_ack; - funcs->enable_reg_inbox0_rsp_int = dmub_dcn401_enable_reg_inbox0_rsp_int; - default_inbox_type = DMUB_CMD_INTERFACE_FB; // still default to FB for now - funcs->write_reg_outbox0_rdy_int_ack = dmub_dcn401_write_reg_outbox0_rdy_int_ack; funcs->read_reg_outbox0_msg = dmub_dcn401_read_reg_outbox0_msg; funcs->write_reg_outbox0_rsp = dmub_dcn401_write_reg_outbox0_rsp; @@ -419,20 +411,6 @@ static bool dmub_srv_hw_setup(struct dmub_srv *dmub, enum dmub_asic asic) return false; } - /* set default inbox type if not overriden */ - if (dmub->inbox_type == DMUB_CMD_INTERFACE_DEFAULT) { - if (default_inbox_type != DMUB_CMD_INTERFACE_DEFAULT) { - /* use default inbox type as specified by DCN rev */ - dmub->inbox_type = default_inbox_type; - } else if (funcs->send_reg_inbox0_cmd_msg) { - /* prefer reg as default inbox type if present */ - dmub->inbox_type = DMUB_CMD_INTERFACE_REG; - } else { - /* use fb as fallback */ - dmub->inbox_type = DMUB_CMD_INTERFACE_FB; - } - } - return true; } @@ -448,7 +426,6 @@ enum dmub_status dmub_srv_create(struct dmub_srv *dmub, dmub->asic = params->asic; dmub->fw_version = params->fw_version; dmub->is_virtual = params->is_virtual; - dmub->inbox_type = params->inbox_type; /* Setup asic dependent hardware funcs. */ if (!dmub_srv_hw_setup(dmub, params->asic)) { @@ -718,7 +695,7 @@ enum dmub_status dmub_srv_hw_init(struct dmub_srv *dmub, inbox1.base = cw4.region.base; inbox1.top = cw4.region.base + DMUB_RB_SIZE; outbox1.base = inbox1.top; - outbox1.top = inbox1.top + DMUB_RB_SIZE; + outbox1.top = cw4.region.top; cw5.offset.quad_part = tracebuff_fb->gpu_addr; cw5.region.base = DMUB_CW5_BASE; @@ -731,7 +708,7 @@ enum dmub_status dmub_srv_hw_init(struct dmub_srv *dmub, cw6.region.base = DMUB_CW6_BASE; cw6.region.top = cw6.region.base + fw_state_fb->size; - dmub->fw_state = fw_state_fb->cpu_addr; + dmub->fw_state = (void *)((uintptr_t)(fw_state_fb->cpu_addr) + DMUB_DEBUG_FW_STATE_OFFSET); region6.offset.quad_part = shared_state_fb->gpu_addr; region6.region.base = DMUB_CW6_BASE; @@ -760,7 +737,7 @@ enum dmub_status dmub_srv_hw_init(struct dmub_srv *dmub, rb_params.ctx = dmub; rb_params.base_address = mail_fb->cpu_addr; rb_params.capacity = DMUB_RB_SIZE; - dmub_rb_init(&dmub->inbox1.rb, &rb_params); + dmub_rb_init(&dmub->inbox1_rb, &rb_params); // Initialize outbox1 ring buffer rb_params.ctx = dmub; @@ -791,6 +768,27 @@ enum dmub_status dmub_srv_hw_init(struct dmub_srv *dmub, return DMUB_STATUS_OK; } +enum dmub_status dmub_srv_sync_inbox1(struct dmub_srv *dmub) +{ + if (!dmub->sw_init) + return DMUB_STATUS_INVALID; + + if (dmub->hw_funcs.get_inbox1_rptr && dmub->hw_funcs.get_inbox1_wptr) { + uint32_t rptr = dmub->hw_funcs.get_inbox1_rptr(dmub); + uint32_t wptr = dmub->hw_funcs.get_inbox1_wptr(dmub); + + if (rptr > dmub->inbox1_rb.capacity || wptr > dmub->inbox1_rb.capacity) { + return DMUB_STATUS_HW_FAILURE; + } else { + dmub->inbox1_rb.rptr = rptr; + dmub->inbox1_rb.wrpt = wptr; + dmub->inbox1_last_wptr = dmub->inbox1_rb.wrpt; + } + } + + return DMUB_STATUS_OK; +} + enum dmub_status dmub_srv_hw_reset(struct dmub_srv *dmub) { if (!dmub->sw_init) @@ -801,13 +799,8 @@ enum dmub_status dmub_srv_hw_reset(struct dmub_srv *dmub) /* mailboxes have been reset in hw, so reset the sw state as well */ dmub->inbox1_last_wptr = 0; - dmub->inbox1.rb.wrpt = 0; - dmub->inbox1.rb.rptr = 0; - dmub->inbox1.num_reported = 0; - dmub->inbox1.num_submitted = 0; - dmub->reg_inbox0.num_reported = 0; - dmub->reg_inbox0.num_submitted = 0; - dmub->reg_inbox0.is_pending = 0; + dmub->inbox1_rb.wrpt = 0; + dmub->inbox1_rb.rptr = 0; dmub->outbox0_rb.wrpt = 0; dmub->outbox0_rb.rptr = 0; dmub->outbox1_rb.wrpt = 0; @@ -818,7 +811,7 @@ enum dmub_status dmub_srv_hw_reset(struct dmub_srv *dmub) return DMUB_STATUS_OK; } -enum dmub_status dmub_srv_fb_cmd_queue(struct dmub_srv *dmub, +enum dmub_status dmub_srv_cmd_queue(struct dmub_srv *dmub, const union dmub_rb_cmd *cmd) { if (!dmub->hw_init) @@ -827,20 +820,18 @@ enum dmub_status dmub_srv_fb_cmd_queue(struct dmub_srv *dmub, if (dmub->power_state != DMUB_POWER_STATE_D0) return DMUB_STATUS_POWER_STATE_D3; - if (dmub->inbox1.rb.rptr > dmub->inbox1.rb.capacity || - dmub->inbox1.rb.wrpt > dmub->inbox1.rb.capacity) { + if (dmub->inbox1_rb.rptr > dmub->inbox1_rb.capacity || + dmub->inbox1_rb.wrpt > dmub->inbox1_rb.capacity) { return DMUB_STATUS_HW_FAILURE; } - if (dmub_rb_push_front(&dmub->inbox1.rb, cmd)) { - dmub->inbox1.num_submitted++; + if (dmub_rb_push_front(&dmub->inbox1_rb, cmd)) return DMUB_STATUS_OK; - } return DMUB_STATUS_QUEUE_FULL; } -enum dmub_status dmub_srv_fb_cmd_execute(struct dmub_srv *dmub) +enum dmub_status dmub_srv_cmd_execute(struct dmub_srv *dmub) { struct dmub_rb flush_rb; @@ -855,13 +846,13 @@ enum dmub_status dmub_srv_fb_cmd_execute(struct dmub_srv *dmub) * been flushed to framebuffer memory. Otherwise DMCUB might * read back stale, fully invalid or partially invalid data. */ - flush_rb = dmub->inbox1.rb; + flush_rb = dmub->inbox1_rb; flush_rb.rptr = dmub->inbox1_last_wptr; dmub_rb_flush_pending(&flush_rb); - dmub->hw_funcs.set_inbox1_wptr(dmub, dmub->inbox1.rb.wrpt); + dmub->hw_funcs.set_inbox1_wptr(dmub, dmub->inbox1_rb.wrpt); - dmub->inbox1_last_wptr = dmub->inbox1.rb.wrpt; + dmub->inbox1_last_wptr = dmub->inbox1_rb.wrpt; return DMUB_STATUS_OK; } @@ -919,97 +910,26 @@ enum dmub_status dmub_srv_wait_for_auto_load(struct dmub_srv *dmub, return DMUB_STATUS_TIMEOUT; } -static void dmub_srv_update_reg_inbox0_status(struct dmub_srv *dmub) -{ - if (dmub->reg_inbox0.is_pending) { - dmub->reg_inbox0.is_pending = dmub->hw_funcs.read_reg_inbox0_rsp_int_status && - !dmub->hw_funcs.read_reg_inbox0_rsp_int_status(dmub); - - if (!dmub->reg_inbox0.is_pending) { - /* ack the rsp interrupt */ - if (dmub->hw_funcs.write_reg_inbox0_rsp_int_ack) - dmub->hw_funcs.write_reg_inbox0_rsp_int_ack(dmub); - - /* only update the reported count if commands aren't being batched */ - if (!dmub->reg_inbox0.is_pending && !dmub->reg_inbox0.is_multi_pending) { - dmub->reg_inbox0.num_reported = dmub->reg_inbox0.num_submitted; - } - } - } -} - -enum dmub_status dmub_srv_wait_for_pending(struct dmub_srv *dmub, - uint32_t timeout_us) -{ - uint32_t i; - const uint32_t polling_interval_us = 1; - struct dmub_srv_inbox scratch_reg_inbox0 = dmub->reg_inbox0; - struct dmub_srv_inbox scratch_inbox1 = dmub->inbox1; - const volatile struct dmub_srv_inbox *reg_inbox0 = &dmub->reg_inbox0; - const volatile struct dmub_srv_inbox *inbox1 = &dmub->inbox1; - - if (!dmub->hw_init || - !dmub->hw_funcs.get_inbox1_wptr) - return DMUB_STATUS_INVALID; - - /* take a snapshot of the required mailbox state */ - scratch_inbox1.rb.wrpt = dmub->hw_funcs.get_inbox1_wptr(dmub); - - for (i = 0; i <= timeout_us; i += polling_interval_us) { - scratch_inbox1.rb.rptr = dmub->hw_funcs.get_inbox1_rptr(dmub); - - scratch_reg_inbox0.is_pending = scratch_reg_inbox0.is_pending && - dmub->hw_funcs.read_reg_inbox0_rsp_int_status && - !dmub->hw_funcs.read_reg_inbox0_rsp_int_status(dmub); - - if (scratch_inbox1.rb.rptr > dmub->inbox1.rb.capacity) - return DMUB_STATUS_HW_FAILURE; - - /* check current HW state first, but use command submission vs reported as a fallback */ - if ((dmub_rb_empty(&scratch_inbox1.rb) || - inbox1->num_reported >= scratch_inbox1.num_submitted) && - (!scratch_reg_inbox0.is_pending || - reg_inbox0->num_reported >= scratch_reg_inbox0.num_submitted)) - return DMUB_STATUS_OK; - - udelay(polling_interval_us); - } - - return DMUB_STATUS_TIMEOUT; -} - enum dmub_status dmub_srv_wait_for_idle(struct dmub_srv *dmub, uint32_t timeout_us) { uint32_t i, rptr; - const uint32_t polling_interval_us = 1; if (!dmub->hw_init) return DMUB_STATUS_INVALID; - for (i = 0; i < timeout_us; i += polling_interval_us) { - /* update inbox1 state */ + for (i = 0; i <= timeout_us; ++i) { rptr = dmub->hw_funcs.get_inbox1_rptr(dmub); - if (rptr > dmub->inbox1.rb.capacity) + if (rptr > dmub->inbox1_rb.capacity) return DMUB_STATUS_HW_FAILURE; - if (dmub->inbox1.rb.rptr > rptr) { - /* rb wrapped */ - dmub->inbox1.num_reported += (rptr + dmub->inbox1.rb.capacity - dmub->inbox1.rb.rptr) / DMUB_RB_CMD_SIZE; - } else { - dmub->inbox1.num_reported += (rptr - dmub->inbox1.rb.rptr) / DMUB_RB_CMD_SIZE; - } - dmub->inbox1.rb.rptr = rptr; - - /* update reg_inbox0 */ - dmub_srv_update_reg_inbox0_status(dmub); + dmub->inbox1_rb.rptr = rptr; - /* check for idle */ - if (dmub_rb_empty(&dmub->inbox1.rb) && !dmub->reg_inbox0.is_pending) + if (dmub_rb_empty(&dmub->inbox1_rb)) return DMUB_STATUS_OK; - udelay(polling_interval_us); + udelay(1); } return DMUB_STATUS_TIMEOUT; @@ -1120,6 +1040,35 @@ enum dmub_status dmub_srv_set_skip_panel_power_sequence(struct dmub_srv *dmub, return DMUB_STATUS_OK; } +enum dmub_status dmub_srv_cmd_with_reply_data(struct dmub_srv *dmub, + union dmub_rb_cmd *cmd) +{ + enum dmub_status status = DMUB_STATUS_OK; + + // Queue command + status = dmub_srv_cmd_queue(dmub, cmd); + + if (status != DMUB_STATUS_OK) + return status; + + // Execute command + status = dmub_srv_cmd_execute(dmub); + + if (status != DMUB_STATUS_OK) + return status; + + // Wait for DMUB to process command + status = dmub_srv_wait_for_idle(dmub, 100000); + + if (status != DMUB_STATUS_OK) + return status; + + // Copy data back from ring buffer into command + dmub_rb_get_return_data(&dmub->inbox1_rb, cmd); + + return status; +} + static inline bool dmub_rb_out_trace_buffer_front(struct dmub_rb *rb, void *entry) { @@ -1211,105 +1160,47 @@ void dmub_srv_subvp_save_surf_addr(struct dmub_srv *dmub, const struct dc_plane_ } } -void dmub_srv_set_power_state(struct dmub_srv *dmub, enum dmub_srv_power_state_type dmub_srv_power_state) -{ - if (!dmub || !dmub->hw_init) - return; - - dmub->power_state = dmub_srv_power_state; -} -enum dmub_status dmub_srv_reg_cmd_execute(struct dmub_srv *dmub, union dmub_rb_cmd *cmd) +enum dmub_status dmub_srv_send_reg_inbox0_cmd( + struct dmub_srv *dmub, + union dmub_rb_cmd *cmd, + bool with_reply, uint32_t timeout_us) { - uint32_t num_pending = 0; - - if (!dmub->hw_init) - return DMUB_STATUS_INVALID; - - if (dmub->power_state != DMUB_POWER_STATE_D0) - return DMUB_STATUS_POWER_STATE_D3; - - if (!dmub->hw_funcs.send_reg_inbox0_cmd_msg || - !dmub->hw_funcs.clear_reg_inbox0_rsp_int_ack) - return DMUB_STATUS_INVALID; - - if (dmub->reg_inbox0.num_submitted >= dmub->reg_inbox0.num_reported) - num_pending = dmub->reg_inbox0.num_submitted - dmub->reg_inbox0.num_reported; - else - /* num_submitted wrapped */ - num_pending = DMUB_REG_INBOX0_RB_MAX_ENTRY - - (dmub->reg_inbox0.num_reported - dmub->reg_inbox0.num_submitted); - - if (num_pending >= DMUB_REG_INBOX0_RB_MAX_ENTRY) - return DMUB_STATUS_QUEUE_FULL; + uint32_t rsp_ready = 0; + uint32_t i; - /* clear last rsp ack and send message */ - dmub->hw_funcs.clear_reg_inbox0_rsp_int_ack(dmub); dmub->hw_funcs.send_reg_inbox0_cmd_msg(dmub, cmd); - dmub->reg_inbox0.num_submitted++; - dmub->reg_inbox0.is_pending = true; - dmub->reg_inbox0.is_multi_pending = cmd->cmd_common.header.multi_cmd_pending; - - return DMUB_STATUS_OK; -} - -void dmub_srv_cmd_get_response(struct dmub_srv *dmub, - union dmub_rb_cmd *cmd_rsp) -{ - if (dmub) { - if (dmub->inbox_type == DMUB_CMD_INTERFACE_REG && - dmub->hw_funcs.read_reg_inbox0_cmd_rsp) { - dmub->hw_funcs.read_reg_inbox0_cmd_rsp(dmub, cmd_rsp); - } else { - dmub_rb_get_return_data(&dmub->inbox1.rb, cmd_rsp); - } + for (i = 0; i < timeout_us; i++) { + rsp_ready = dmub->hw_funcs.read_reg_inbox0_rsp_int_status(dmub); + if (rsp_ready) + break; + udelay(1); } -} - -static enum dmub_status dmub_srv_sync_reg_inbox0(struct dmub_srv *dmub) -{ - if (!dmub || !dmub->sw_init) - return DMUB_STATUS_INVALID; - - dmub->reg_inbox0.is_pending = 0; - dmub->reg_inbox0.is_multi_pending = 0; - - return DMUB_STATUS_OK; -} + if (rsp_ready == 0) + return DMUB_STATUS_TIMEOUT; -static enum dmub_status dmub_srv_sync_inbox1(struct dmub_srv *dmub) -{ - if (!dmub->sw_init) - return DMUB_STATUS_INVALID; + if (with_reply) + dmub->hw_funcs.read_reg_inbox0_cmd_rsp(dmub, cmd); - if (dmub->hw_funcs.get_inbox1_rptr && dmub->hw_funcs.get_inbox1_wptr) { - uint32_t rptr = dmub->hw_funcs.get_inbox1_rptr(dmub); - uint32_t wptr = dmub->hw_funcs.get_inbox1_wptr(dmub); + dmub->hw_funcs.write_reg_inbox0_rsp_int_ack(dmub); - if (rptr > dmub->inbox1.rb.capacity || wptr > dmub->inbox1.rb.capacity) { - return DMUB_STATUS_HW_FAILURE; - } else { - dmub->inbox1.rb.rptr = rptr; - dmub->inbox1.rb.wrpt = wptr; - dmub->inbox1_last_wptr = dmub->inbox1.rb.wrpt; - } + /* wait for rsp int status is cleared to initial state before exit */ + for (; i <= timeout_us; i++) { + rsp_ready = dmub->hw_funcs.read_reg_inbox0_rsp_int_status(dmub); + if (rsp_ready == 0) + break; + udelay(1); } + ASSERT(rsp_ready == 0); return DMUB_STATUS_OK; } -enum dmub_status dmub_srv_sync_inboxes(struct dmub_srv *dmub) +void dmub_srv_set_power_state(struct dmub_srv *dmub, enum dmub_srv_power_state_type dmub_srv_power_state) { - enum dmub_status status; - - status = dmub_srv_sync_reg_inbox0(dmub); - if (status != DMUB_STATUS_OK) - return status; - - status = dmub_srv_sync_inbox1(dmub); - if (status != DMUB_STATUS_OK) - return status; + if (!dmub || !dmub->hw_init) + return; - return DMUB_STATUS_OK; + dmub->power_state = dmub_srv_power_state; } diff --git a/drivers/gpu/drm/amd/include/amd_shared.h b/drivers/gpu/drm/amd/include/amd_shared.h index 485b713cfad0ac5526f0efb965a0729fec8c8bed..4c95b885d1d004c8e58a58f045b6764116b495d4 100644 --- a/drivers/gpu/drm/amd/include/amd_shared.h +++ b/drivers/gpu/drm/amd/include/amd_shared.h @@ -358,6 +358,18 @@ enum DC_DEBUG_MASK { * @DC_DISABLE_CUSTOM_BRIGHTNESS_CURVE: If set, disable support for custom brightness curves */ DC_DISABLE_CUSTOM_BRIGHTNESS_CURVE = 0x40000, + + /** + * @DC_HDCP_LC_FORCE_FW_ENABLE: If set, use HDCP Locality Check FW + * path regardless of reported HW capabilities. + */ + DC_HDCP_LC_FORCE_FW_ENABLE = 0x80000, + + /** + * @DC_HDCP_LC_ENABLE_SW_FALLBACK If set, upon HDCP Locality Check FW + * path failure, retry using legacy SW path. + */ + DC_HDCP_LC_ENABLE_SW_FALLBACK = 0x100000, }; enum amd_dpm_forced_level; diff --git a/drivers/gpu/drm/amd/pm/inc/amdgpu_dpm.h b/drivers/gpu/drm/amd/pm/inc/amdgpu_dpm.h index 9fb26b5c8ae7aa1ada38c04a8edb2a82bdff33b0..f93d287dbf1376bf1257ea57bcdf7a25d4f26b29 100644 --- a/drivers/gpu/drm/amd/pm/inc/amdgpu_dpm.h +++ b/drivers/gpu/drm/amd/pm/inc/amdgpu_dpm.h @@ -295,7 +295,8 @@ enum ip_power_state { }; /* Used to mask smu debug modes */ -#define SMU_DEBUG_HALT_ON_ERROR 0x1 +#define SMU_DEBUG_HALT_ON_ERROR BIT(0) +#define SMU_DEBUG_POOL_USE_VRAM BIT(1) #define MAX_SMU_I2C_BUSES 2 diff --git a/drivers/gpu/drm/amd/pm/swsmu/amdgpu_smu.c b/drivers/gpu/drm/amd/pm/swsmu/amdgpu_smu.c index d1aa6075c4f411b648366f1687f4126ca56c0185..033c3229b555f04e94bc1a1e4d2e170c8b88256b 100644 --- a/drivers/gpu/drm/amd/pm/swsmu/amdgpu_smu.c +++ b/drivers/gpu/drm/amd/pm/swsmu/amdgpu_smu.c @@ -1027,7 +1027,10 @@ static int smu_alloc_memory_pool(struct smu_context *smu) memory_pool->size = pool_size; memory_pool->align = PAGE_SIZE; - memory_pool->domain = AMDGPU_GEM_DOMAIN_GTT; + memory_pool->domain = + (adev->pm.smu_debug_mask & SMU_DEBUG_POOL_USE_VRAM) ? + AMDGPU_GEM_DOMAIN_VRAM : + AMDGPU_GEM_DOMAIN_GTT; switch (pool_size) { case SMU_MEMORY_POOL_SIZE_256_MB: diff --git a/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_12_ppt.c b/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_12_ppt.c index 51078accc0f8fdd294d3dfc8d79ad0dfa3f38c3f..238bd71baa6ddf8210051cba831013dccc6bc10f 100644 --- a/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_12_ppt.c +++ b/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_12_ppt.c @@ -478,8 +478,8 @@ ssize_t smu_v13_0_12_get_gpu_metrics(struct smu_context *smu, void **table) } } - gpu_metrics->xgmi_link_width = SMUQ10_ROUND(metrics->XgmiWidth); - gpu_metrics->xgmi_link_speed = SMUQ10_ROUND(metrics->XgmiBitrate); + gpu_metrics->xgmi_link_width = metrics->XgmiWidth; + gpu_metrics->xgmi_link_speed = metrics->XgmiBitrate; gpu_metrics->firmware_timestamp = metrics->Timestamp; diff --git a/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_6_ppt.c b/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_6_ppt.c index 1e1d8989c77aa370c0acda9238f8359c6a2118db..682646068000614d8d92c821cda363e26be5eb37 100644 --- a/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_6_ppt.c +++ b/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_6_ppt.c @@ -231,7 +231,11 @@ static const struct cmn2asic_mapping smu_v13_0_6_feature_mask_map[SMU_FEATURE_CO SMU_13_0_6_FEA_MAP(SMU_FEATURE_FW_CTF_BIT, FEATURE_FW_CTF), SMU_13_0_6_FEA_MAP(SMU_FEATURE_THERMAL_BIT, FEATURE_THERMAL), SMU_13_0_6_FEA_MAP(SMU_FEATURE_XGMI_PER_LINK_PWR_DWN_BIT, FEATURE_XGMI_PER_LINK_PWR_DOWN), - SMU_13_0_6_FEA_MAP(SMU_FEATURE_DF_CSTATE_BIT, FEATURE_DF_CSTATE), + SMU_13_0_6_FEA_MAP(SMU_FEATURE_DF_CSTATE_BIT, FEATURE_DF_CSTATE), + SMU_13_0_6_FEA_MAP(SMU_FEATURE_DS_VCN_BIT, FEATURE_DS_VCN), + SMU_13_0_6_FEA_MAP(SMU_FEATURE_DS_MP1CLK_BIT, FEATURE_DS_MP1CLK), + SMU_13_0_6_FEA_MAP(SMU_FEATURE_DS_MPIOCLK_BIT, FEATURE_DS_MPIOCLK), + SMU_13_0_6_FEA_MAP(SMU_FEATURE_DS_MP0CLK_BIT, FEATURE_DS_MP0CLK), }; #define TABLE_PMSTATUSLOG 0 @@ -2682,8 +2686,8 @@ static ssize_t smu_v13_0_6_get_gpu_metrics(struct smu_context *smu, void **table } } - gpu_metrics->xgmi_link_width = SMUQ10_ROUND(GET_METRIC_FIELD(XgmiWidth, version)); - gpu_metrics->xgmi_link_speed = SMUQ10_ROUND(GET_METRIC_FIELD(XgmiBitrate, version)); + gpu_metrics->xgmi_link_width = GET_METRIC_FIELD(XgmiWidth, version); + gpu_metrics->xgmi_link_speed = GET_METRIC_FIELD(XgmiBitrate, version); gpu_metrics->firmware_timestamp = GET_METRIC_FIELD(Timestamp, version); diff --git a/drivers/gpu/drm/amd/pm/swsmu/smu14/smu_v14_0_2_ppt.c b/drivers/gpu/drm/amd/pm/swsmu/smu14/smu_v14_0_2_ppt.c index 21d1b34ada05354f2454abb2655dd168bf5d12cc..f7cfe1f35caeab52d1d9f17ee29e8f322ba18572 100644 --- a/drivers/gpu/drm/amd/pm/swsmu/smu14/smu_v14_0_2_ppt.c +++ b/drivers/gpu/drm/amd/pm/swsmu/smu14/smu_v14_0_2_ppt.c @@ -1203,16 +1203,9 @@ static int smu_v14_0_2_print_clk_levels(struct smu_context *smu, PP_OD_FEATURE_GFXCLK_BIT)) break; - PPTable_t *pptable = smu->smu_table.driver_pptable; - const OverDriveLimits_t * const overdrive_upperlimits = - &pptable->SkuTable.OverDriveLimitsBasicMax; - const OverDriveLimits_t * const overdrive_lowerlimits = - &pptable->SkuTable.OverDriveLimitsBasicMin; - size += sysfs_emit_at(buf, size, "OD_SCLK_OFFSET:\n"); - size += sysfs_emit_at(buf, size, "0: %dMhz\n1: %uMhz\n", - overdrive_lowerlimits->GfxclkFoffset, - overdrive_upperlimits->GfxclkFoffset); + size += sysfs_emit_at(buf, size, "%dMhz\n", + od_table->OverDriveTable.GfxclkFoffset); break; case SMU_OD_MCLK: @@ -1346,13 +1339,9 @@ static int smu_v14_0_2_print_clk_levels(struct smu_context *smu, size += sysfs_emit_at(buf, size, "%s:\n", "OD_RANGE"); if (smu_v14_0_2_is_od_feature_supported(smu, PP_OD_FEATURE_GFXCLK_BIT)) { - smu_v14_0_2_get_od_setting_limits(smu, - PP_OD_FEATURE_GFXCLK_FMIN, - &min_value, - NULL); smu_v14_0_2_get_od_setting_limits(smu, PP_OD_FEATURE_GFXCLK_FMAX, - NULL, + &min_value, &max_value); size += sysfs_emit_at(buf, size, "SCLK_OFFSET: %7dMhz %10uMhz\n", min_value, max_value); @@ -2460,36 +2449,24 @@ static int smu_v14_0_2_od_edit_dpm_table(struct smu_context *smu, return -ENOTSUPP; } - for (i = 0; i < size; i += 2) { - if (i + 2 > size) { - dev_info(adev->dev, "invalid number of input parameters %d\n", size); - return -EINVAL; - } - - switch (input[i]) { - case 1: - smu_v14_0_2_get_od_setting_limits(smu, - PP_OD_FEATURE_GFXCLK_FMAX, - &minimum, - &maximum); - if (input[i + 1] < minimum || - input[i + 1] > maximum) { - dev_info(adev->dev, "GfxclkFmax (%ld) must be within [%u, %u]!\n", - input[i + 1], minimum, maximum); - return -EINVAL; - } - - od_table->OverDriveTable.GfxclkFoffset = input[i + 1]; - od_table->OverDriveTable.FeatureCtrlMask |= 1U << PP_OD_FEATURE_GFXCLK_BIT; - break; + if (size != 1) { + dev_info(adev->dev, "invalid number of input parameters %d\n", size); + return -EINVAL; + } - default: - dev_info(adev->dev, "Invalid SCLK_VDDC_TABLE index: %ld\n", input[i]); - dev_info(adev->dev, "Supported indices: [0:min,1:max]\n"); - return -EINVAL; - } + smu_v14_0_2_get_od_setting_limits(smu, + PP_OD_FEATURE_GFXCLK_FMAX, + &minimum, + &maximum); + if (input[0] < minimum || + input[0] > maximum) { + dev_info(adev->dev, "GfxclkFoffset must be within [%d, %u]!\n", + minimum, maximum); + return -EINVAL; } + od_table->OverDriveTable.GfxclkFoffset = input[0]; + od_table->OverDriveTable.FeatureCtrlMask |= 1U << PP_OD_FEATURE_GFXCLK_BIT; break; case PP_OD_EDIT_MCLK_VDDC_TABLE: diff --git a/drivers/gpu/drm/radeon/radeon_uvd.c b/drivers/gpu/drm/radeon/radeon_uvd.c index 058a1c8451b2d946b4a6a6ce7790d7f84815c843..ded5747a58d138a1d61ea4ae08bb691e95a39c0c 100644 --- a/drivers/gpu/drm/radeon/radeon_uvd.c +++ b/drivers/gpu/drm/radeon/radeon_uvd.c @@ -961,7 +961,7 @@ int radeon_uvd_calc_upll_dividers(struct radeon_device *rdev, unsigned optimal_score = ~0; /* loop through vco from low to high */ - vco_min = max(max(vco_min, vclk), dclk); + vco_min = max3(vco_min, vclk, dclk); for (vco_freq = vco_min; vco_freq <= vco_max; vco_freq += 100) { uint64_t fb_div = (uint64_t)vco_freq * fb_factor;