diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu.h b/drivers/gpu/drm/amd/amdgpu/amdgpu.h index 2a9a41f4e7485e2cccc4f182384f7deaeb476efa..6d83ccfa42eeb0b00ca18635a8c1225de88a06e4 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu.h @@ -1194,9 +1194,15 @@ struct amdgpu_device { bool debug_exp_resets; bool debug_disable_gpu_ring_reset; - bool enforce_isolation[MAX_XCP]; - /* Added this mutex for cleaner shader isolation between GFX and compute processes */ + /* Protection for the following isolation structure */ struct mutex enforce_isolation_mutex; + bool enforce_isolation[MAX_XCP]; + struct amdgpu_isolation { + void *owner; + struct dma_fence *spearhead; + struct amdgpu_sync active; + struct amdgpu_sync prev; + } isolation[MAX_XCP]; struct amdgpu_init_level *init_lvl; @@ -1482,6 +1488,9 @@ void amdgpu_device_pcie_port_wreg(struct amdgpu_device *adev, struct dma_fence *amdgpu_device_get_gang(struct amdgpu_device *adev); struct dma_fence *amdgpu_device_switch_gang(struct amdgpu_device *adev, struct dma_fence *gang); +struct dma_fence *amdgpu_device_enforce_isolation(struct amdgpu_device *adev, + struct amdgpu_ring *ring, + struct amdgpu_job *job); bool amdgpu_device_has_display_hardware(struct amdgpu_device *adev); ssize_t amdgpu_get_soft_full_reset_mask(struct amdgpu_ring *ring); ssize_t amdgpu_show_reset_mask(char *buf, uint32_t supported_reset); diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c index 4ed5e26a13d52d3af62f0ae1147850a7aa446e36..662c83d72509ea223f5bbe0e34e6d2a1fefd3db5 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c @@ -4294,6 +4294,11 @@ int amdgpu_device_init(struct amdgpu_device *adev, mutex_init(&adev->gfx.reset_sem_mutex); /* Initialize the mutex for cleaner shader isolation between GFX and compute processes */ mutex_init(&adev->enforce_isolation_mutex); + for (i = 0; i < MAX_XCP; ++i) { + adev->isolation[i].spearhead = dma_fence_get_stub(); + amdgpu_sync_create(&adev->isolation[i].active); + amdgpu_sync_create(&adev->isolation[i].prev); + } mutex_init(&adev->gfx.kfd_sch_mutex); mutex_init(&adev->gfx.workload_profile_mutex); mutex_init(&adev->vcn.workload_profile_mutex); @@ -4799,7 +4804,7 @@ void amdgpu_device_fini_hw(struct amdgpu_device *adev) void amdgpu_device_fini_sw(struct amdgpu_device *adev) { - int idx; + int i, idx; bool px; amdgpu_device_ip_fini(adev); @@ -4807,6 +4812,11 @@ void amdgpu_device_fini_sw(struct amdgpu_device *adev) amdgpu_ucode_release(&adev->firmware.gpu_info_fw); adev->accel_working = false; dma_fence_put(rcu_dereference_protected(adev->gang_submit, true)); + for (i = 0; i < MAX_XCP; ++i) { + dma_fence_put(adev->isolation[i].spearhead); + amdgpu_sync_free(&adev->isolation[i].active); + amdgpu_sync_free(&adev->isolation[i].prev); + } amdgpu_reset_fini(adev); @@ -6953,6 +6963,92 @@ struct dma_fence *amdgpu_device_switch_gang(struct amdgpu_device *adev, return NULL; } +/** + * amdgpu_device_enforce_isolation - enforce HW isolation + * @adev: the amdgpu device pointer + * @ring: the HW ring the job is supposed to run on + * @job: the job which is about to be pushed to the HW ring + * + * Makes sure that only one client at a time can use the GFX block. + * Returns: The dependency to wait on before the job can be pushed to the HW. + * The function is called multiple times until NULL is returned. + */ +struct dma_fence *amdgpu_device_enforce_isolation(struct amdgpu_device *adev, + struct amdgpu_ring *ring, + struct amdgpu_job *job) +{ + struct amdgpu_isolation *isolation = &adev->isolation[ring->xcp_id]; + struct drm_sched_fence *f = job->base.s_fence; + struct dma_fence *dep; + void *owner; + int r; + + /* + * For now enforce isolation only for the GFX block since we only need + * the cleaner shader on those rings. + */ + if (ring->funcs->type != AMDGPU_RING_TYPE_GFX && + ring->funcs->type != AMDGPU_RING_TYPE_COMPUTE) + return NULL; + + /* + * All submissions where enforce isolation is false are handled as if + * they come from a single client. Use ~0l as the owner to distinct it + * from kernel submissions where the owner is NULL. + */ + owner = job->enforce_isolation ? f->owner : (void *)~0l; + + mutex_lock(&adev->enforce_isolation_mutex); + + /* + * The "spearhead" submission is the first one which changes the + * ownership to its client. We always need to wait for it to be + * pushed to the HW before proceeding with anything. + */ + if (&f->scheduled != isolation->spearhead && + !dma_fence_is_signaled(isolation->spearhead)) { + dep = isolation->spearhead; + goto out_grab_ref; + } + + if (isolation->owner != owner) { + + /* + * Wait for any gang to be assembled before switching to a + * different owner or otherwise we could deadlock the + * submissions. + */ + if (!job->gang_submit) { + dep = amdgpu_device_get_gang(adev); + if (!dma_fence_is_signaled(dep)) + goto out_return_dep; + dma_fence_put(dep); + } + + dma_fence_put(isolation->spearhead); + isolation->spearhead = dma_fence_get(&f->scheduled); + amdgpu_sync_move(&isolation->active, &isolation->prev); + isolation->owner = owner; + } + + /* + * Specifying the ring here helps to pipeline submissions even when + * isolation is enabled. If that is not desired for testing NULL can be + * used instead of the ring to enforce a CPU round trip while switching + * between clients. + */ + dep = amdgpu_sync_peek_fence(&isolation->prev, ring); + r = amdgpu_sync_fence(&isolation->active, &f->finished, GFP_NOWAIT); + if (r) + DRM_WARN("OOM tracking isolation\n"); + +out_grab_ref: + dma_fence_get(dep); +out_return_dep: + mutex_unlock(&adev->enforce_isolation_mutex); + return dep; +} + bool amdgpu_device_has_display_hardware(struct amdgpu_device *adev) { switch (adev->asic_type) { diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ids.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ids.c index 56d27cea052ed2a7b77a2fc1f494317762df5cf9..92ab821afc06ae561929cfed74c72ad0ac8d87b2 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ids.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ids.c @@ -287,40 +287,27 @@ static int amdgpu_vmid_grab_reserved(struct amdgpu_vm *vm, (*id)->flushed_updates < updates || !(*id)->last_flush || ((*id)->last_flush->context != fence_context && - !dma_fence_is_signaled((*id)->last_flush))) { + !dma_fence_is_signaled((*id)->last_flush))) + needs_flush = true; + + if ((*id)->owner != vm->immediate.fence_context || + (!adev->vm_manager.concurrent_flush && needs_flush)) { struct dma_fence *tmp; - /* Wait for the gang to be assembled before using a - * reserved VMID or otherwise the gang could deadlock. + /* Don't use per engine and per process VMID at the + * same time */ - tmp = amdgpu_device_get_gang(adev); - if (!dma_fence_is_signaled(tmp) && tmp != job->gang_submit) { + if (adev->vm_manager.concurrent_flush) + ring = NULL; + + /* to prevent one context starved by another context */ + (*id)->pd_gpu_addr = 0; + tmp = amdgpu_sync_peek_fence(&(*id)->active, ring); + if (tmp) { *id = NULL; - *fence = tmp; + *fence = dma_fence_get(tmp); return 0; } - dma_fence_put(tmp); - - /* Make sure the id is owned by the gang before proceeding */ - if (!job->gang_submit || - (*id)->owner != vm->immediate.fence_context) { - - /* Don't use per engine and per process VMID at the - * same time - */ - if (adev->vm_manager.concurrent_flush) - ring = NULL; - - /* to prevent one context starved by another context */ - (*id)->pd_gpu_addr = 0; - tmp = amdgpu_sync_peek_fence(&(*id)->active, ring); - if (tmp) { - *id = NULL; - *fence = dma_fence_get(tmp); - return 0; - } - } - needs_flush = true; } /* Good we can use this VMID. Remember this submission as diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_job.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_job.c index 935df2cdcc16c873aaab6a5fba23ee321c11d45f..acb21fc8b3ce5d8c745e3e3384bd9ec24b91be3f 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_job.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_job.c @@ -361,17 +361,24 @@ amdgpu_job_prepare_job(struct drm_sched_job *sched_job, { struct amdgpu_ring *ring = to_amdgpu_ring(s_entity->rq->sched); struct amdgpu_job *job = to_amdgpu_job(sched_job); - struct dma_fence *fence = NULL; + struct dma_fence *fence; int r; r = drm_sched_entity_error(s_entity); if (r) goto error; - if (job->gang_submit) + if (job->gang_submit) { fence = amdgpu_device_switch_gang(ring->adev, job->gang_submit); + if (fence) + return fence; + } + + fence = amdgpu_device_enforce_isolation(ring->adev, ring, job); + if (fence) + return fence; - if (!fence && job->vm && !job->vmid) { + if (job->vm && !job->vmid) { r = amdgpu_vmid_grab(job->vm, ring, job, &fence); if (r) { dev_err(ring->adev->dev, "Error getting VM ID (%d)\n", r); @@ -384,9 +391,10 @@ amdgpu_job_prepare_job(struct drm_sched_job *sched_job, */ if (!fence) job->vm = NULL; + return fence; } - return fence; + return NULL; error: dma_fence_set_error(&job->base.s_fence->finished, r); diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_sync.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_sync.c index bfe12164d27da7546844afa8bdb30671c57becab..c5f9db6b32a496b2cb1b949a429b20240dfde9a8 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_sync.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_sync.c @@ -405,6 +405,25 @@ int amdgpu_sync_clone(struct amdgpu_sync *source, struct amdgpu_sync *clone) return 0; } +/** + * amdgpu_sync_move - move all fences from src to dst + * + * @src: source of the fences, empty after function + * @dst: destination for the fences + * + * Moves all fences from source to destination. All fences in destination are + * freed and source is empty after the function call. + */ +void amdgpu_sync_move(struct amdgpu_sync *src, struct amdgpu_sync *dst) +{ + unsigned int i; + + amdgpu_sync_free(dst); + + for (i = 0; i < HASH_SIZE(src->fences); ++i) + hlist_move_list(&src->fences[i], &dst->fences[i]); +} + /** * amdgpu_sync_push_to_job - push fences into job * @sync: sync object to get the fences from diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_sync.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_sync.h index 1504f5e7fc46598f7595250c0eefb38a6d5177cd..51eb4382c91ebd216b3be4ae80d8923ff9c5c8db 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_sync.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_sync.h @@ -57,6 +57,7 @@ struct dma_fence *amdgpu_sync_peek_fence(struct amdgpu_sync *sync, struct amdgpu_ring *ring); struct dma_fence *amdgpu_sync_get_fence(struct amdgpu_sync *sync); int amdgpu_sync_clone(struct amdgpu_sync *source, struct amdgpu_sync *clone); +void amdgpu_sync_move(struct amdgpu_sync *src, struct amdgpu_sync *dst); int amdgpu_sync_push_to_job(struct amdgpu_sync *sync, struct amdgpu_job *job); int amdgpu_sync_wait(struct amdgpu_sync *sync, bool intr); void amdgpu_sync_free(struct amdgpu_sync *sync);