Skip to content
Snippets Groups Projects

Compare revisions

Changes are shown as if the source revision was being merged into the target revision. Learn more about comparing revisions.

Source

Select target project
No results found

Target

Select target project
  • agd5f/linux
  • ltuikov/linux
  • FireBurn/linux
  • u6/linux
  • nikitalita/linux
  • siqueira/linux
  • magali/linux
  • amos/linux-display
  • isinyaaa/linux
  • somalapuram/linux
  • nirmoy/linux
  • hwentland/linux
  • hakzsam/linux
  • siqueira/linux-kunit
  • xushuotao/linux
  • lixian/linux
  • asheplyakov/linux
  • ap6711451/linux
  • alonsopascacioflores/linux
  • pyuan/linux
  • pepp/linux
  • alex.hung/linux
  • ckborah/linux-colorpipeline
  • lucmann/linux
  • MSkeffington/amdgfx-linux-fork
25 results
Show changes
Commits on Source (31)
Showing
with 388 additions and 140 deletions
......@@ -1194,9 +1194,15 @@ struct amdgpu_device {
bool debug_exp_resets;
bool debug_disable_gpu_ring_reset;
bool enforce_isolation[MAX_XCP];
/* Added this mutex for cleaner shader isolation between GFX and compute processes */
/* Protection for the following isolation structure */
struct mutex enforce_isolation_mutex;
bool enforce_isolation[MAX_XCP];
struct amdgpu_isolation {
void *owner;
struct dma_fence *spearhead;
struct amdgpu_sync active;
struct amdgpu_sync prev;
} isolation[MAX_XCP];
struct amdgpu_init_level *init_lvl;
......@@ -1482,6 +1488,9 @@ void amdgpu_device_pcie_port_wreg(struct amdgpu_device *adev,
struct dma_fence *amdgpu_device_get_gang(struct amdgpu_device *adev);
struct dma_fence *amdgpu_device_switch_gang(struct amdgpu_device *adev,
struct dma_fence *gang);
struct dma_fence *amdgpu_device_enforce_isolation(struct amdgpu_device *adev,
struct amdgpu_ring *ring,
struct amdgpu_job *job);
bool amdgpu_device_has_display_hardware(struct amdgpu_device *adev);
ssize_t amdgpu_get_soft_full_reset_mask(struct amdgpu_ring *ring);
ssize_t amdgpu_show_reset_mask(char *buf, uint32_t supported_reset);
......
......@@ -391,6 +391,7 @@ static void aca_banks_generate_cper(struct amdgpu_device *adev,
{
struct aca_bank_node *node;
struct aca_bank *bank;
int r;
if (!adev->cper.enabled)
return;
......@@ -402,11 +403,27 @@ static void aca_banks_generate_cper(struct amdgpu_device *adev,
/* UEs must be encoded into separate CPER entries */
if (type == ACA_SMU_TYPE_UE) {
struct aca_banks de_banks;
aca_banks_init(&de_banks);
list_for_each_entry(node, &banks->list, node) {
bank = &node->bank;
if (amdgpu_cper_generate_ue_record(adev, bank))
dev_warn(adev->dev, "fail to generate ue cper records\n");
if (bank->aca_err_type == ACA_ERROR_TYPE_DEFERRED) {
r = aca_banks_add_bank(&de_banks, bank);
if (r)
dev_warn(adev->dev, "fail to add de banks, ret = %d\n", r);
} else {
if (amdgpu_cper_generate_ue_record(adev, bank))
dev_warn(adev->dev, "fail to generate ue cper records\n");
}
}
if (!list_empty(&de_banks.list)) {
if (amdgpu_cper_generate_ce_records(adev, &de_banks, de_banks.nr_banks))
dev_warn(adev->dev, "fail to generate de cper records\n");
}
aca_banks_release(&de_banks);
} else {
/*
* SMU_TYPE_CE banks are combined into 1 CPER entries,
......@@ -541,6 +558,10 @@ static int __aca_get_error_data(struct amdgpu_device *adev, struct aca_handle *h
if (ret)
return ret;
/* DEs may contain in CEs or UEs */
if (type != ACA_ERROR_TYPE_DEFERRED)
aca_log_aca_error(handle, ACA_ERROR_TYPE_DEFERRED, err_data);
return aca_log_aca_error(handle, type, err_data);
}
......
......@@ -76,11 +76,17 @@ struct ras_query_context;
#define mmSMNAID_XCD1_MCA_SMU 0x38430400 /* SMN AID XCD1 */
#define mmSMNXCD_XCD0_MCA_SMU 0x40430400 /* SMN XCD XCD0 */
#define ACA_BANK_ERR_CE_DE_DECODE(bank) \
((ACA_REG__STATUS__POISON((bank)->regs[ACA_REG_IDX_STATUS]) || \
ACA_REG__STATUS__DEFERRED((bank)->regs[ACA_REG_IDX_STATUS])) ? \
ACA_ERROR_TYPE_DEFERRED : \
ACA_ERROR_TYPE_CE)
#define ACA_BANK_ERR_IS_DEFFERED(bank) \
(ACA_REG__STATUS__POISON((bank)->regs[ACA_REG_IDX_STATUS]) || \
ACA_REG__STATUS__DEFERRED((bank)->regs[ACA_REG_IDX_STATUS]))
#define ACA_BANK_ERR_CE_DE_DECODE(bank) \
(ACA_BANK_ERR_IS_DEFFERED(bank) ? ACA_ERROR_TYPE_DEFERRED : \
ACA_ERROR_TYPE_CE)
#define ACA_BANK_ERR_UE_DE_DECODE(bank) \
(ACA_BANK_ERR_IS_DEFFERED(bank) ? ACA_ERROR_TYPE_DEFERRED : \
ACA_ERROR_TYPE_UE)
enum aca_reg_idx {
ACA_REG_IDX_CTL = 0,
......
......@@ -491,7 +491,7 @@ static int vm_update_pds(struct amdgpu_vm *vm, struct amdgpu_sync *sync)
if (ret)
return ret;
return amdgpu_sync_fence(sync, vm->last_update);
return amdgpu_sync_fence(sync, vm->last_update, GFP_KERNEL);
}
static uint64_t get_pte_flags(struct amdgpu_device *adev, struct kgd_mem *mem)
......@@ -1249,7 +1249,7 @@ static int unmap_bo_from_gpuvm(struct kgd_mem *mem,
(void)amdgpu_vm_clear_freed(adev, vm, &bo_va->last_pt_update);
(void)amdgpu_sync_fence(sync, bo_va->last_pt_update);
(void)amdgpu_sync_fence(sync, bo_va->last_pt_update, GFP_KERNEL);
return 0;
}
......@@ -1273,7 +1273,7 @@ static int update_gpuvm_pte(struct kgd_mem *mem,
return ret;
}
return amdgpu_sync_fence(sync, bo_va->last_pt_update);
return amdgpu_sync_fence(sync, bo_va->last_pt_update, GFP_KERNEL);
}
static int map_bo_to_gpuvm(struct kgd_mem *mem,
......@@ -2913,7 +2913,7 @@ int amdgpu_amdkfd_gpuvm_restore_process_bos(void *info, struct dma_fence __rcu *
}
dma_resv_for_each_fence(&cursor, bo->tbo.base.resv,
DMA_RESV_USAGE_KERNEL, fence) {
ret = amdgpu_sync_fence(&sync_obj, fence);
ret = amdgpu_sync_fence(&sync_obj, fence, GFP_KERNEL);
if (ret) {
pr_debug("Memory eviction: Sync BO fence failed. Try again\n");
goto validate_map_fail;
......
......@@ -428,7 +428,7 @@ static int amdgpu_cs_p2_dependencies(struct amdgpu_cs_parser *p,
dma_fence_put(old);
}
r = amdgpu_sync_fence(&p->sync, fence);
r = amdgpu_sync_fence(&p->sync, fence, GFP_KERNEL);
dma_fence_put(fence);
if (r)
return r;
......@@ -450,7 +450,7 @@ static int amdgpu_syncobj_lookup_and_add(struct amdgpu_cs_parser *p,
return r;
}
r = amdgpu_sync_fence(&p->sync, fence);
r = amdgpu_sync_fence(&p->sync, fence, GFP_KERNEL);
dma_fence_put(fence);
return r;
}
......@@ -1111,7 +1111,7 @@ static int amdgpu_cs_vm_handling(struct amdgpu_cs_parser *p)
struct drm_gpu_scheduler *sched = entity->rq->sched;
struct amdgpu_ring *ring = to_amdgpu_ring(sched);
if (amdgpu_vmid_uses_reserved(adev, vm, ring->vm_hub))
if (amdgpu_vmid_uses_reserved(vm, ring->vm_hub))
return -EINVAL;
}
}
......@@ -1124,7 +1124,8 @@ static int amdgpu_cs_vm_handling(struct amdgpu_cs_parser *p)
if (r)
return r;
r = amdgpu_sync_fence(&p->sync, fpriv->prt_va->last_pt_update);
r = amdgpu_sync_fence(&p->sync, fpriv->prt_va->last_pt_update,
GFP_KERNEL);
if (r)
return r;
......@@ -1135,7 +1136,8 @@ static int amdgpu_cs_vm_handling(struct amdgpu_cs_parser *p)
if (r)
return r;
r = amdgpu_sync_fence(&p->sync, bo_va->last_pt_update);
r = amdgpu_sync_fence(&p->sync, bo_va->last_pt_update,
GFP_KERNEL);
if (r)
return r;
}
......@@ -1154,7 +1156,8 @@ static int amdgpu_cs_vm_handling(struct amdgpu_cs_parser *p)
if (r)
return r;
r = amdgpu_sync_fence(&p->sync, bo_va->last_pt_update);
r = amdgpu_sync_fence(&p->sync, bo_va->last_pt_update,
GFP_KERNEL);
if (r)
return r;
}
......@@ -1167,7 +1170,7 @@ static int amdgpu_cs_vm_handling(struct amdgpu_cs_parser *p)
if (r)
return r;
r = amdgpu_sync_fence(&p->sync, vm->last_update);
r = amdgpu_sync_fence(&p->sync, vm->last_update, GFP_KERNEL);
if (r)
return r;
......@@ -1248,7 +1251,8 @@ static int amdgpu_cs_sync_rings(struct amdgpu_cs_parser *p)
continue;
}
r = amdgpu_sync_fence(&p->gang_leader->explicit_sync, fence);
r = amdgpu_sync_fence(&p->gang_leader->explicit_sync, fence,
GFP_KERNEL);
dma_fence_put(fence);
if (r)
return r;
......
......@@ -4294,6 +4294,11 @@ int amdgpu_device_init(struct amdgpu_device *adev,
mutex_init(&adev->gfx.reset_sem_mutex);
/* Initialize the mutex for cleaner shader isolation between GFX and compute processes */
mutex_init(&adev->enforce_isolation_mutex);
for (i = 0; i < MAX_XCP; ++i) {
adev->isolation[i].spearhead = dma_fence_get_stub();
amdgpu_sync_create(&adev->isolation[i].active);
amdgpu_sync_create(&adev->isolation[i].prev);
}
mutex_init(&adev->gfx.kfd_sch_mutex);
mutex_init(&adev->gfx.workload_profile_mutex);
mutex_init(&adev->vcn.workload_profile_mutex);
......@@ -4799,7 +4804,7 @@ void amdgpu_device_fini_hw(struct amdgpu_device *adev)
void amdgpu_device_fini_sw(struct amdgpu_device *adev)
{
int idx;
int i, idx;
bool px;
amdgpu_device_ip_fini(adev);
......@@ -4807,6 +4812,11 @@ void amdgpu_device_fini_sw(struct amdgpu_device *adev)
amdgpu_ucode_release(&adev->firmware.gpu_info_fw);
adev->accel_working = false;
dma_fence_put(rcu_dereference_protected(adev->gang_submit, true));
for (i = 0; i < MAX_XCP; ++i) {
dma_fence_put(adev->isolation[i].spearhead);
amdgpu_sync_free(&adev->isolation[i].active);
amdgpu_sync_free(&adev->isolation[i].prev);
}
amdgpu_reset_fini(adev);
......@@ -5356,6 +5366,7 @@ static int amdgpu_device_reset_sriov(struct amdgpu_device *adev,
if (amdgpu_ip_version(adev, GC_HWIP, 0) == IP_VERSION(9, 4, 2) ||
amdgpu_ip_version(adev, GC_HWIP, 0) == IP_VERSION(9, 4, 3) ||
amdgpu_ip_version(adev, GC_HWIP, 0) == IP_VERSION(9, 4, 4) ||
amdgpu_ip_version(adev, GC_HWIP, 0) == IP_VERSION(9, 5, 0) ||
amdgpu_ip_version(adev, GC_HWIP, 0) == IP_VERSION(11, 0, 3))
amdgpu_ras_resume(adev);
......@@ -6952,6 +6963,93 @@ struct dma_fence *amdgpu_device_switch_gang(struct amdgpu_device *adev,
return NULL;
}
/**
* amdgpu_device_enforce_isolation - enforce HW isolation
* @adev: the amdgpu device pointer
* @ring: the HW ring the job is supposed to run on
* @job: the job which is about to be pushed to the HW ring
*
* Makes sure that only one client at a time can use the GFX block.
* Returns: The dependency to wait on before the job can be pushed to the HW.
* The function is called multiple times until NULL is returned.
*/
struct dma_fence *amdgpu_device_enforce_isolation(struct amdgpu_device *adev,
struct amdgpu_ring *ring,
struct amdgpu_job *job)
{
struct amdgpu_isolation *isolation = &adev->isolation[ring->xcp_id];
struct drm_sched_fence *f = job->base.s_fence;
struct dma_fence *dep;
void *owner;
int r;
/*
* For now enforce isolation only for the GFX block since we only need
* the cleaner shader on those rings.
*/
if (ring->funcs->type != AMDGPU_RING_TYPE_GFX &&
ring->funcs->type != AMDGPU_RING_TYPE_COMPUTE)
return NULL;
/*
* All submissions where enforce isolation is false are handled as if
* they come from a single client. Use ~0l as the owner to distinct it
* from kernel submissions where the owner is NULL.
*/
owner = job->enforce_isolation ? f->owner : (void *)~0l;
mutex_lock(&adev->enforce_isolation_mutex);
/*
* The "spearhead" submission is the first one which changes the
* ownership to its client. We always need to wait for it to be
* pushed to the HW before proceeding with anything.
*/
if (&f->scheduled != isolation->spearhead &&
!dma_fence_is_signaled(isolation->spearhead)) {
dep = isolation->spearhead;
goto out_grab_ref;
}
if (isolation->owner != owner) {
/*
* Wait for any gang to be assembled before switching to a
* different owner or otherwise we could deadlock the
* submissions.
*/
if (!job->gang_submit) {
dep = amdgpu_device_get_gang(adev);
if (!dma_fence_is_signaled(dep))
goto out_return_dep;
dma_fence_put(dep);
}
dma_fence_put(isolation->spearhead);
isolation->spearhead = dma_fence_get(&f->scheduled);
amdgpu_sync_move(&isolation->active, &isolation->prev);
trace_amdgpu_isolation(isolation->owner, owner);
isolation->owner = owner;
}
/*
* Specifying the ring here helps to pipeline submissions even when
* isolation is enabled. If that is not desired for testing NULL can be
* used instead of the ring to enforce a CPU round trip while switching
* between clients.
*/
dep = amdgpu_sync_peek_fence(&isolation->prev, ring);
r = amdgpu_sync_fence(&isolation->active, &f->finished, GFP_NOWAIT);
if (r)
DRM_WARN("OOM tracking isolation\n");
out_grab_ref:
dma_fence_get(dep);
out_return_dep:
mutex_unlock(&adev->enforce_isolation_mutex);
return dep;
}
bool amdgpu_device_has_display_hardware(struct amdgpu_device *adev)
{
switch (adev->asic_type) {
......
......@@ -113,8 +113,13 @@
#include "amdgpu_isp.h"
#endif
#define FIRMWARE_IP_DISCOVERY "amdgpu/ip_discovery.bin"
MODULE_FIRMWARE(FIRMWARE_IP_DISCOVERY);
MODULE_FIRMWARE("amdgpu/ip_discovery.bin");
MODULE_FIRMWARE("amdgpu/vega10_ip_discovery.bin");
MODULE_FIRMWARE("amdgpu/vega12_ip_discovery.bin");
MODULE_FIRMWARE("amdgpu/vega20_ip_discovery.bin");
MODULE_FIRMWARE("amdgpu/raven_ip_discovery.bin");
MODULE_FIRMWARE("amdgpu/raven2_ip_discovery.bin");
MODULE_FIRMWARE("amdgpu/picasso_ip_discovery.bin");
#define mmIP_DISCOVERY_VERSION 0x16A00
#define mmRCC_CONFIG_MEMSIZE 0xde3
......@@ -297,21 +302,13 @@ static int amdgpu_discovery_read_binary_from_mem(struct amdgpu_device *adev,
return ret;
}
static int amdgpu_discovery_read_binary_from_file(struct amdgpu_device *adev, uint8_t *binary)
static int amdgpu_discovery_read_binary_from_file(struct amdgpu_device *adev,
uint8_t *binary,
const char *fw_name)
{
const struct firmware *fw;
const char *fw_name;
int r;
switch (amdgpu_discovery) {
case 2:
fw_name = FIRMWARE_IP_DISCOVERY;
break;
default:
dev_warn(adev->dev, "amdgpu_discovery is not set properly\n");
return -EINVAL;
}
r = request_firmware(&fw, fw_name, adev->dev);
if (r) {
dev_err(adev->dev, "can't load firmware \"%s\"\n",
......@@ -404,10 +401,39 @@ static int amdgpu_discovery_verify_npsinfo(struct amdgpu_device *adev,
return 0;
}
static const char *amdgpu_discovery_get_fw_name(struct amdgpu_device *adev)
{
if (amdgpu_discovery == 2)
return "amdgpu/ip_discovery.bin";
switch (adev->asic_type) {
case CHIP_VEGA10:
return "amdgpu/vega10_ip_discovery.bin";
case CHIP_VEGA12:
return "amdgpu/vega12_ip_discovery.bin";
case CHIP_RAVEN:
if (adev->apu_flags & AMD_APU_IS_RAVEN2)
return "amdgpu/raven2_ip_discovery.bin";
else if (adev->apu_flags & AMD_APU_IS_PICASSO)
return "amdgpu/picasso_ip_discovery.bin";
else
return "amdgpu/raven_ip_discovery.bin";
case CHIP_VEGA20:
return "amdgpu/vega20_ip_discovery.bin";
case CHIP_ARCTURUS:
return "amdgpu/arcturus_ip_discovery.bin";
case CHIP_ALDEBARAN:
return "amdgpu/aldebaran_ip_discovery.bin";
default:
return NULL;
}
}
static int amdgpu_discovery_init(struct amdgpu_device *adev)
{
struct table_info *info;
struct binary_header *bhdr;
const char *fw_name;
uint16_t offset;
uint16_t size;
uint16_t checksum;
......@@ -419,9 +445,10 @@ static int amdgpu_discovery_init(struct amdgpu_device *adev)
return -ENOMEM;
/* Read from file if it is the preferred option */
if (amdgpu_discovery == 2) {
fw_name = amdgpu_discovery_get_fw_name(adev);
if (fw_name != NULL) {
dev_info(adev->dev, "use ip discovery information from file");
r = amdgpu_discovery_read_binary_from_file(adev, adev->mman.discovery_bin);
r = amdgpu_discovery_read_binary_from_file(adev, adev->mman.discovery_bin, fw_name);
if (r) {
dev_err(adev->dev, "failed to read ip discovery binary from file\n");
......@@ -2518,6 +2545,38 @@ int amdgpu_discovery_set_ip_blocks(struct amdgpu_device *adev)
{
int r;
switch (adev->asic_type) {
case CHIP_VEGA10:
case CHIP_VEGA12:
case CHIP_RAVEN:
case CHIP_VEGA20:
case CHIP_ARCTURUS:
case CHIP_ALDEBARAN:
/* this is not fatal. We have a fallback below
* if the new firmwares are not present. some of
* this will be overridden below to keep things
* consistent with the current behavior.
*/
r = amdgpu_discovery_reg_base_init(adev);
if (!r) {
amdgpu_discovery_harvest_ip(adev);
amdgpu_discovery_get_gfx_info(adev);
amdgpu_discovery_get_mall_info(adev);
amdgpu_discovery_get_vcn_info(adev);
}
break;
default:
r = amdgpu_discovery_reg_base_init(adev);
if (r)
return -EINVAL;
amdgpu_discovery_harvest_ip(adev);
amdgpu_discovery_get_gfx_info(adev);
amdgpu_discovery_get_mall_info(adev);
amdgpu_discovery_get_vcn_info(adev);
break;
}
switch (adev->asic_type) {
case CHIP_VEGA10:
vega10_reg_base_init(adev);
......@@ -2682,14 +2741,6 @@ int amdgpu_discovery_set_ip_blocks(struct amdgpu_device *adev)
adev->ip_versions[XGMI_HWIP][0] = IP_VERSION(6, 1, 0);
break;
default:
r = amdgpu_discovery_reg_base_init(adev);
if (r)
return -EINVAL;
amdgpu_discovery_harvest_ip(adev);
amdgpu_discovery_get_gfx_info(adev);
amdgpu_discovery_get_mall_info(adev);
amdgpu_discovery_get_vcn_info(adev);
break;
}
......
......@@ -1665,15 +1665,8 @@ static ssize_t amdgpu_gfx_set_enforce_isolation(struct device *dev,
}
mutex_lock(&adev->enforce_isolation_mutex);
for (i = 0; i < num_partitions; i++) {
if (adev->enforce_isolation[i] && !partition_values[i])
/* Going from enabled to disabled */
amdgpu_vmid_free_reserved(adev, AMDGPU_GFXHUB(i));
else if (!adev->enforce_isolation[i] && partition_values[i])
/* Going from disabled to enabled */
amdgpu_vmid_alloc_reserved(adev, AMDGPU_GFXHUB(i));
for (i = 0; i < num_partitions; i++)
adev->enforce_isolation[i] = partition_values[i];
}
mutex_unlock(&adev->enforce_isolation_mutex);
amdgpu_mes_update_enforce_isolation(adev);
......
......@@ -57,8 +57,8 @@ enum amdgpu_gfx_pipe_priority {
#define AMDGPU_GFX_QUEUE_PRIORITY_MINIMUM 0
#define AMDGPU_GFX_QUEUE_PRIORITY_MAXIMUM 15
/* 10 millisecond timeout */
#define GFX_PROFILE_IDLE_TIMEOUT msecs_to_jiffies(10)
/* 1 second timeout */
#define GFX_PROFILE_IDLE_TIMEOUT msecs_to_jiffies(1000)
enum amdgpu_gfx_partition {
AMDGPU_SPX_PARTITION_MODE = 0,
......
......@@ -573,6 +573,7 @@ int amdgpu_gmc_allocate_vm_inv_eng(struct amdgpu_device *adev)
unsigned vm_inv_engs[AMDGPU_MAX_VMHUBS] = {0};
unsigned i;
unsigned vmhub, inv_eng;
struct amdgpu_ring *shared_ring;
/* init the vm inv eng for all vmhubs */
for_each_set_bit(i, adev->vmhubs_mask, AMDGPU_MAX_VMHUBS) {
......@@ -595,6 +596,10 @@ int amdgpu_gmc_allocate_vm_inv_eng(struct amdgpu_device *adev)
ring == &adev->cper.ring_buf)
continue;
/* Skip if the ring is a shared ring */
if (amdgpu_sdma_is_shared_inv_eng(adev, ring))
continue;
inv_eng = ffs(vm_inv_engs[vmhub]);
if (!inv_eng) {
dev_err(adev->dev, "no VM inv eng for ring %s\n",
......@@ -607,6 +612,21 @@ int amdgpu_gmc_allocate_vm_inv_eng(struct amdgpu_device *adev)
dev_info(adev->dev, "ring %s uses VM inv eng %u on hub %u\n",
ring->name, ring->vm_inv_eng, ring->vm_hub);
/* SDMA has a special packet which allows it to use the same
* invalidation engine for all the rings in one instance.
* Therefore, we do not allocate a separate VM invalidation engine
* for SDMA page rings. Instead, they share the VM invalidation
* engine with the SDMA gfx ring. This change ensures efficient
* resource management and avoids the issue of insufficient VM
* invalidation engines.
*/
shared_ring = amdgpu_sdma_get_shared_ring(adev, ring);
if (shared_ring) {
shared_ring->vm_inv_eng = ring->vm_inv_eng;
dev_info(adev->dev, "ring %s shares VM invalidation engine %u with ring %s on hub %u\n",
ring->name, ring->vm_inv_eng, shared_ring->name, ring->vm_hub);
continue;
}
}
return 0;
......
......@@ -209,7 +209,7 @@ static int amdgpu_vmid_grab_idle(struct amdgpu_ring *ring,
return 0;
}
fences = kmalloc_array(id_mgr->num_ids, sizeof(void *), GFP_KERNEL);
fences = kmalloc_array(id_mgr->num_ids, sizeof(void *), GFP_NOWAIT);
if (!fences)
return -ENOMEM;
......@@ -287,46 +287,34 @@ static int amdgpu_vmid_grab_reserved(struct amdgpu_vm *vm,
(*id)->flushed_updates < updates ||
!(*id)->last_flush ||
((*id)->last_flush->context != fence_context &&
!dma_fence_is_signaled((*id)->last_flush))) {
!dma_fence_is_signaled((*id)->last_flush)))
needs_flush = true;
if ((*id)->owner != vm->immediate.fence_context ||
(!adev->vm_manager.concurrent_flush && needs_flush)) {
struct dma_fence *tmp;
/* Wait for the gang to be assembled before using a
* reserved VMID or otherwise the gang could deadlock.
/* Don't use per engine and per process VMID at the
* same time
*/
tmp = amdgpu_device_get_gang(adev);
if (!dma_fence_is_signaled(tmp) && tmp != job->gang_submit) {
if (adev->vm_manager.concurrent_flush)
ring = NULL;
/* to prevent one context starved by another context */
(*id)->pd_gpu_addr = 0;
tmp = amdgpu_sync_peek_fence(&(*id)->active, ring);
if (tmp) {
*id = NULL;
*fence = tmp;
*fence = dma_fence_get(tmp);
return 0;
}
dma_fence_put(tmp);
/* Make sure the id is owned by the gang before proceeding */
if (!job->gang_submit ||
(*id)->owner != vm->immediate.fence_context) {
/* Don't use per engine and per process VMID at the
* same time
*/
if (adev->vm_manager.concurrent_flush)
ring = NULL;
/* to prevent one context starved by another context */
(*id)->pd_gpu_addr = 0;
tmp = amdgpu_sync_peek_fence(&(*id)->active, ring);
if (tmp) {
*id = NULL;
*fence = dma_fence_get(tmp);
return 0;
}
}
needs_flush = true;
}
/* Good we can use this VMID. Remember this submission as
* user of the VMID.
*/
r = amdgpu_sync_fence(&(*id)->active, &job->base.s_fence->finished);
r = amdgpu_sync_fence(&(*id)->active, &job->base.s_fence->finished,
GFP_NOWAIT);
if (r)
return r;
......@@ -385,7 +373,8 @@ static int amdgpu_vmid_grab_used(struct amdgpu_vm *vm,
* user of the VMID.
*/
r = amdgpu_sync_fence(&(*id)->active,
&job->base.s_fence->finished);
&job->base.s_fence->finished,
GFP_NOWAIT);
if (r)
return r;
......@@ -422,7 +411,7 @@ int amdgpu_vmid_grab(struct amdgpu_vm *vm, struct amdgpu_ring *ring,
if (r || !idle)
goto error;
if (amdgpu_vmid_uses_reserved(adev, vm, vmhub)) {
if (amdgpu_vmid_uses_reserved(vm, vmhub)) {
r = amdgpu_vmid_grab_reserved(vm, ring, job, &id, fence);
if (r || !id)
goto error;
......@@ -437,7 +426,8 @@ int amdgpu_vmid_grab(struct amdgpu_vm *vm, struct amdgpu_ring *ring,
/* Remember this submission as user of the VMID */
r = amdgpu_sync_fence(&id->active,
&job->base.s_fence->finished);
&job->base.s_fence->finished,
GFP_NOWAIT);
if (r)
goto error;
......@@ -474,19 +464,14 @@ int amdgpu_vmid_grab(struct amdgpu_vm *vm, struct amdgpu_ring *ring,
/*
* amdgpu_vmid_uses_reserved - check if a VM will use a reserved VMID
* @adev: amdgpu_device pointer
* @vm: the VM to check
* @vmhub: the VMHUB which will be used
*
* Returns: True if the VM will use a reserved VMID.
*/
bool amdgpu_vmid_uses_reserved(struct amdgpu_device *adev,
struct amdgpu_vm *vm, unsigned int vmhub)
bool amdgpu_vmid_uses_reserved(struct amdgpu_vm *vm, unsigned int vmhub)
{
return vm->reserved_vmid[vmhub] ||
(adev->enforce_isolation[(vm->root.bo->xcp_id != AMDGPU_XCP_NO_PARTITION) ?
vm->root.bo->xcp_id : 0] &&
AMDGPU_IS_GFXHUB(vmhub));
return vm->reserved_vmid[vmhub];
}
int amdgpu_vmid_alloc_reserved(struct amdgpu_device *adev,
......
......@@ -78,8 +78,7 @@ void amdgpu_pasid_free_delayed(struct dma_resv *resv,
bool amdgpu_vmid_had_gpu_reset(struct amdgpu_device *adev,
struct amdgpu_vmid *id);
bool amdgpu_vmid_uses_reserved(struct amdgpu_device *adev,
struct amdgpu_vm *vm, unsigned int vmhub);
bool amdgpu_vmid_uses_reserved(struct amdgpu_vm *vm, unsigned int vmhub);
int amdgpu_vmid_alloc_reserved(struct amdgpu_device *adev,
unsigned vmhub);
void amdgpu_vmid_free_reserved(struct amdgpu_device *adev,
......
......@@ -361,17 +361,24 @@ amdgpu_job_prepare_job(struct drm_sched_job *sched_job,
{
struct amdgpu_ring *ring = to_amdgpu_ring(s_entity->rq->sched);
struct amdgpu_job *job = to_amdgpu_job(sched_job);
struct dma_fence *fence = NULL;
struct dma_fence *fence;
int r;
r = drm_sched_entity_error(s_entity);
if (r)
goto error;
if (job->gang_submit)
if (job->gang_submit) {
fence = amdgpu_device_switch_gang(ring->adev, job->gang_submit);
if (fence)
return fence;
}
fence = amdgpu_device_enforce_isolation(ring->adev, ring, job);
if (fence)
return fence;
if (!fence && job->vm && !job->vmid) {
if (job->vm && !job->vmid) {
r = amdgpu_vmid_grab(job->vm, ring, job, &fence);
if (r) {
dev_err(ring->adev->dev, "Error getting VM ID (%d)\n", r);
......@@ -384,9 +391,10 @@ amdgpu_job_prepare_job(struct drm_sched_job *sched_job,
*/
if (!fence)
job->vm = NULL;
return fence;
}
return fence;
return NULL;
error:
dma_fence_set_error(&job->base.s_fence->finished, r);
......
......@@ -145,9 +145,8 @@ int amdgpu_mes_init(struct amdgpu_device *adev)
adev->mes.vmid_mask_gfxhub = 0xffffff00;
for (i = 0; i < AMDGPU_MES_MAX_COMPUTE_PIPES; i++) {
/* use only 1st MEC pipes */
if (i >= adev->gfx.mec.num_pipe_per_mec)
continue;
if (i >= (adev->gfx.mec.num_pipe_per_mec * adev->gfx.mec.num_mec))
break;
adev->mes.compute_hqd_mask[i] = 0xc;
}
......@@ -155,14 +154,9 @@ int amdgpu_mes_init(struct amdgpu_device *adev)
adev->mes.gfx_hqd_mask[i] = i ? 0 : 0xfffffffe;
for (i = 0; i < AMDGPU_MES_MAX_SDMA_PIPES; i++) {
if (amdgpu_ip_version(adev, SDMA0_HWIP, 0) <
IP_VERSION(6, 0, 0))
adev->mes.sdma_hqd_mask[i] = i ? 0 : 0x3fc;
/* zero sdma_hqd_mask for non-existent engine */
else if (adev->sdma.num_instances == 1)
adev->mes.sdma_hqd_mask[i] = i ? 0 : 0xfc;
else
adev->mes.sdma_hqd_mask[i] = 0xfc;
if (i >= adev->sdma.num_instances)
break;
adev->mes.sdma_hqd_mask[i] = 0xfc;
}
for (i = 0; i < AMDGPU_MAX_MES_PIPES; i++) {
......
......@@ -153,6 +153,9 @@ static int psp_init_sriov_microcode(struct psp_context *psp)
adev->virt.autoload_ucode_id = AMDGPU_UCODE_ID_CP_MES1_DATA;
ret = psp_init_cap_microcode(psp, ucode_prefix);
break;
case IP_VERSION(13, 0, 12):
ret = psp_init_ta_microcode(psp, ucode_prefix);
break;
default:
return -EINVAL;
}
......
......@@ -37,7 +37,7 @@ struct amdgpu_job;
struct amdgpu_vm;
/* max number of rings */
#define AMDGPU_MAX_RINGS 133
#define AMDGPU_MAX_RINGS 149
#define AMDGPU_MAX_HWIP_RINGS 64
#define AMDGPU_MAX_GFX_RINGS 2
#define AMDGPU_MAX_SW_GFX_RINGS 2
......
......@@ -504,6 +504,39 @@ void amdgpu_sdma_sysfs_reset_mask_fini(struct amdgpu_device *adev)
}
}
struct amdgpu_ring *amdgpu_sdma_get_shared_ring(struct amdgpu_device *adev, struct amdgpu_ring *ring)
{
if (adev->sdma.has_page_queue &&
(ring->me < adev->sdma.num_instances) &&
(ring == &adev->sdma.instance[ring->me].ring))
return &adev->sdma.instance[ring->me].page;
else
return NULL;
}
/**
* amdgpu_sdma_is_shared_inv_eng - Check if a ring is an SDMA ring that shares a VM invalidation engine
* @adev: Pointer to the AMDGPU device structure
* @ring: Pointer to the ring structure to check
*
* This function checks if the given ring is an SDMA ring that shares a VM invalidation engine.
* It returns true if the ring is such an SDMA ring, false otherwise.
*/
bool amdgpu_sdma_is_shared_inv_eng(struct amdgpu_device *adev, struct amdgpu_ring *ring)
{
int i = ring->me;
if (!adev->sdma.has_page_queue || i >= adev->sdma.num_instances)
return false;
if (amdgpu_ip_version(adev, GC_HWIP, 0) == IP_VERSION(9, 4, 3) ||
amdgpu_ip_version(adev, GC_HWIP, 0) == IP_VERSION(9, 4, 4) ||
amdgpu_ip_version(adev, GC_HWIP, 0) == IP_VERSION(9, 5, 0))
return (ring == &adev->sdma.instance[i].page);
else
return false;
}
/**
* amdgpu_sdma_register_on_reset_callbacks - Register SDMA reset callbacks
* @funcs: Pointer to the callback structure containing pre_reset and post_reset functions
......@@ -532,7 +565,6 @@ void amdgpu_sdma_register_on_reset_callbacks(struct amdgpu_device *adev, struct
* amdgpu_sdma_reset_engine - Reset a specific SDMA engine
* @adev: Pointer to the AMDGPU device
* @instance_id: ID of the SDMA engine instance to reset
* @suspend_user_queues: check if suspend user queue.
*
* This function performs the following steps:
* 1. Calls all registered pre_reset callbacks to allow KFD and AMDGPU to save their state.
......@@ -541,22 +573,16 @@ void amdgpu_sdma_register_on_reset_callbacks(struct amdgpu_device *adev, struct
*
* Returns: 0 on success, or a negative error code on failure.
*/
int amdgpu_sdma_reset_engine(struct amdgpu_device *adev, uint32_t instance_id, bool suspend_user_queues)
int amdgpu_sdma_reset_engine(struct amdgpu_device *adev, uint32_t instance_id)
{
struct sdma_on_reset_funcs *funcs;
int ret = 0;
struct amdgpu_sdma_instance *sdma_instance = &adev->sdma.instance[instance_id];;
struct amdgpu_sdma_instance *sdma_instance = &adev->sdma.instance[instance_id];
struct amdgpu_ring *gfx_ring = &sdma_instance->ring;
struct amdgpu_ring *page_ring = &sdma_instance->page;
bool gfx_sched_stopped = false, page_sched_stopped = false;
/* Suspend KFD if suspend_user_queues is true.
* prevent the destruction of in-flight healthy user queue packets and
* avoid race conditions between KFD and KGD during the reset process.
*/
if (suspend_user_queues)
amdgpu_amdkfd_suspend(adev, false);
mutex_lock(&sdma_instance->engine_reset_mutex);
/* Stop the scheduler's work queue for the GFX and page rings if they are running.
* This ensures that no new tasks are submitted to the queues while
* the reset is in progress.
......@@ -617,9 +643,7 @@ int amdgpu_sdma_reset_engine(struct amdgpu_device *adev, uint32_t instance_id, b
drm_sched_wqueue_start(&page_ring->sched);
}
}
if (suspend_user_queues)
amdgpu_amdkfd_resume(adev, false);
mutex_unlock(&sdma_instance->engine_reset_mutex);
return ret;
}
......@@ -64,6 +64,11 @@ struct amdgpu_sdma_instance {
struct amdgpu_bo *sdma_fw_obj;
uint64_t sdma_fw_gpu_addr;
uint32_t *sdma_fw_ptr;
struct mutex engine_reset_mutex;
/* track guilty state of GFX and PAGE queues */
bool gfx_guilty;
bool page_guilty;
};
enum amdgpu_sdma_ras_memory_id {
......@@ -126,9 +131,6 @@ struct amdgpu_sdma {
uint32_t *ip_dump;
uint32_t supported_reset;
struct list_head reset_callback_list;
/* track guilty state of GFX and PAGE queues */
bool gfx_guilty;
bool page_guilty;
};
/*
......@@ -169,7 +171,7 @@ struct amdgpu_buffer_funcs {
};
void amdgpu_sdma_register_on_reset_callbacks(struct amdgpu_device *adev, struct sdma_on_reset_funcs *funcs);
int amdgpu_sdma_reset_engine(struct amdgpu_device *adev, uint32_t instance_id, bool suspend_user_queues);
int amdgpu_sdma_reset_engine(struct amdgpu_device *adev, uint32_t instance_id);
#define amdgpu_emit_copy_buffer(adev, ib, s, d, b, t) (adev)->mman.buffer_funcs->emit_copy_buffer((ib), (s), (d), (b), (t))
#define amdgpu_emit_fill_buffer(adev, ib, s, d, b) (adev)->mman.buffer_funcs->emit_fill_buffer((ib), (s), (d), (b))
......@@ -194,4 +196,7 @@ int amdgpu_sdma_ras_sw_init(struct amdgpu_device *adev);
void amdgpu_debugfs_sdma_sched_mask_init(struct amdgpu_device *adev);
int amdgpu_sdma_sysfs_reset_mask_init(struct amdgpu_device *adev);
void amdgpu_sdma_sysfs_reset_mask_fini(struct amdgpu_device *adev);
bool amdgpu_sdma_is_shared_inv_eng(struct amdgpu_device *adev, struct amdgpu_ring *ring);
struct amdgpu_ring *amdgpu_sdma_get_shared_ring(struct amdgpu_device *adev,
struct amdgpu_ring *ring);
#endif
......@@ -135,11 +135,16 @@ static bool amdgpu_sync_add_later(struct amdgpu_sync *sync, struct dma_fence *f)
struct amdgpu_sync_entry *e;
hash_for_each_possible(sync->fences, e, node, f->context) {
if (unlikely(e->fence->context != f->context))
continue;
if (dma_fence_is_signaled(e->fence)) {
dma_fence_put(e->fence);
e->fence = dma_fence_get(f);
return true;
}
amdgpu_sync_keep_later(&e->fence, f);
return true;
if (likely(e->fence->context == f->context)) {
amdgpu_sync_keep_later(&e->fence, f);
return true;
}
}
return false;
}
......@@ -149,10 +154,12 @@ static bool amdgpu_sync_add_later(struct amdgpu_sync *sync, struct dma_fence *f)
*
* @sync: sync object to add fence to
* @f: fence to sync to
* @flags: memory allocation flags to use when allocating sync entry
*
* Add the fence to the sync object.
*/
int amdgpu_sync_fence(struct amdgpu_sync *sync, struct dma_fence *f)
int amdgpu_sync_fence(struct amdgpu_sync *sync, struct dma_fence *f,
gfp_t flags)
{
struct amdgpu_sync_entry *e;
......@@ -162,7 +169,7 @@ int amdgpu_sync_fence(struct amdgpu_sync *sync, struct dma_fence *f)
if (amdgpu_sync_add_later(sync, f))
return 0;
e = kmem_cache_alloc(amdgpu_sync_slab, GFP_KERNEL);
e = kmem_cache_alloc(amdgpu_sync_slab, flags);
if (!e)
return -ENOMEM;
......@@ -249,7 +256,7 @@ int amdgpu_sync_resv(struct amdgpu_device *adev, struct amdgpu_sync *sync,
struct dma_fence *tmp = dma_fence_chain_contained(f);
if (amdgpu_sync_test_fence(adev, mode, owner, tmp)) {
r = amdgpu_sync_fence(sync, f);
r = amdgpu_sync_fence(sync, f, GFP_KERNEL);
dma_fence_put(f);
if (r)
return r;
......@@ -281,7 +288,7 @@ int amdgpu_sync_kfd(struct amdgpu_sync *sync, struct dma_resv *resv)
if (fence_owner != AMDGPU_FENCE_OWNER_KFD)
continue;
r = amdgpu_sync_fence(sync, f);
r = amdgpu_sync_fence(sync, f, GFP_KERNEL);
if (r)
break;
}
......@@ -388,7 +395,7 @@ int amdgpu_sync_clone(struct amdgpu_sync *source, struct amdgpu_sync *clone)
hash_for_each_safe(source->fences, i, tmp, e, node) {
f = e->fence;
if (!dma_fence_is_signaled(f)) {
r = amdgpu_sync_fence(clone, f);
r = amdgpu_sync_fence(clone, f, GFP_KERNEL);
if (r)
return r;
} else {
......@@ -399,6 +406,25 @@ int amdgpu_sync_clone(struct amdgpu_sync *source, struct amdgpu_sync *clone)
return 0;
}
/**
* amdgpu_sync_move - move all fences from src to dst
*
* @src: source of the fences, empty after function
* @dst: destination for the fences
*
* Moves all fences from source to destination. All fences in destination are
* freed and source is empty after the function call.
*/
void amdgpu_sync_move(struct amdgpu_sync *src, struct amdgpu_sync *dst)
{
unsigned int i;
amdgpu_sync_free(dst);
for (i = 0; i < HASH_SIZE(src->fences); ++i)
hlist_move_list(&src->fences[i], &dst->fences[i]);
}
/**
* amdgpu_sync_push_to_job - push fences into job
* @sync: sync object to get the fences from
......
......@@ -47,7 +47,8 @@ struct amdgpu_sync {
};
void amdgpu_sync_create(struct amdgpu_sync *sync);
int amdgpu_sync_fence(struct amdgpu_sync *sync, struct dma_fence *f);
int amdgpu_sync_fence(struct amdgpu_sync *sync, struct dma_fence *f,
gfp_t flags);
int amdgpu_sync_resv(struct amdgpu_device *adev, struct amdgpu_sync *sync,
struct dma_resv *resv, enum amdgpu_sync_mode mode,
void *owner);
......@@ -56,6 +57,7 @@ struct dma_fence *amdgpu_sync_peek_fence(struct amdgpu_sync *sync,
struct amdgpu_ring *ring);
struct dma_fence *amdgpu_sync_get_fence(struct amdgpu_sync *sync);
int amdgpu_sync_clone(struct amdgpu_sync *source, struct amdgpu_sync *clone);
void amdgpu_sync_move(struct amdgpu_sync *src, struct amdgpu_sync *dst);
int amdgpu_sync_push_to_job(struct amdgpu_sync *sync, struct amdgpu_job *job);
int amdgpu_sync_wait(struct amdgpu_sync *sync, bool intr);
void amdgpu_sync_free(struct amdgpu_sync *sync);
......