diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c index cdf46bd0d8d5bc4f4e4dd5a9892a99e23ac7b8f5..6f01c6145a87f5bd929ee9bf8d5d86c22dd7d273 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c @@ -2041,19 +2041,26 @@ static int update_invalid_user_pages(struct amdkfd_process_info *process_info, /* Get updated user pages */ ret = amdgpu_ttm_tt_get_user_pages(bo, bo->tbo.ttm->pages); if (ret) { - pr_debug("%s: Failed to get user pages: %d\n", - __func__, ret); + pr_debug("Failed %d to get user pages\n", ret); + + /* Return -EFAULT bad address error as success. It will + * fail later with a VM fault if the GPU tries to access + * it. Better than hanging indefinitely with stalled + * user mode queues. + * + * Return other error -EBUSY or -ENOMEM to retry restore + */ + if (ret != -EFAULT) + return ret; + } else { - /* Return error -EBUSY or -ENOMEM, retry restore */ - return ret; + /* + * FIXME: Cannot ignore the return code, must hold + * notifier_lock + */ + amdgpu_ttm_tt_get_user_pages_done(bo->tbo.ttm); } - /* - * FIXME: Cannot ignore the return code, must hold - * notifier_lock - */ - amdgpu_ttm_tt_get_user_pages_done(bo->tbo.ttm); - /* Mark the BO as valid unless it was invalidated * again concurrently. */ diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c index d784f8d3a834727327f0deb6d89a1bd5ef9833cf..ae6694f2c73d498678d4ad0c31c5da21bb8fed31 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c @@ -693,6 +693,9 @@ int amdgpu_ttm_tt_get_user_pages(struct amdgpu_bo *bo, struct page **pages) r = amdgpu_hmm_range_get_pages(&bo->notifier, mm, pages, start, ttm->num_pages, >t->range, readonly, false, NULL); + if (r) + pr_debug("failed %d to get user pages 0x%llx\n", r, start); + out_putmm: mmput(mm);