Skip to content
Snippets Groups Projects

Compare revisions

Changes are shown as if the source revision was being merged into the target revision. Learn more about comparing revisions.

Source

Select target project
No results found

Target

Select target project
  • agd5f/linux
  • ltuikov/linux
  • FireBurn/linux
  • u6/linux
  • nikitalita/linux
  • siqueira/linux
  • magali/linux
  • amos/linux-display
  • isinyaaa/linux
  • somalapuram/linux
  • nirmoy/linux
  • hwentland/linux
  • hakzsam/linux
  • siqueira/linux-kunit
  • xushuotao/linux
  • lixian/linux
  • asheplyakov/linux
  • ap6711451/linux
  • alonsopascacioflores/linux
  • pyuan/linux
  • pepp/linux
  • alex.hung/linux
  • ckborah/linux-colorpipeline
  • lucmann/linux
  • MSkeffington/amdgfx-linux-fork
25 results
Show changes
Showing
with 401 additions and 113 deletions
......@@ -37,7 +37,7 @@ struct amdgpu_job;
struct amdgpu_vm;
/* max number of rings */
#define AMDGPU_MAX_RINGS 133
#define AMDGPU_MAX_RINGS 149
#define AMDGPU_MAX_HWIP_RINGS 64
#define AMDGPU_MAX_GFX_RINGS 2
#define AMDGPU_MAX_SW_GFX_RINGS 2
......
......@@ -504,6 +504,39 @@ void amdgpu_sdma_sysfs_reset_mask_fini(struct amdgpu_device *adev)
}
}
struct amdgpu_ring *amdgpu_sdma_get_shared_ring(struct amdgpu_device *adev, struct amdgpu_ring *ring)
{
if (adev->sdma.has_page_queue &&
(ring->me < adev->sdma.num_instances) &&
(ring == &adev->sdma.instance[ring->me].ring))
return &adev->sdma.instance[ring->me].page;
else
return NULL;
}
/**
* amdgpu_sdma_is_shared_inv_eng - Check if a ring is an SDMA ring that shares a VM invalidation engine
* @adev: Pointer to the AMDGPU device structure
* @ring: Pointer to the ring structure to check
*
* This function checks if the given ring is an SDMA ring that shares a VM invalidation engine.
* It returns true if the ring is such an SDMA ring, false otherwise.
*/
bool amdgpu_sdma_is_shared_inv_eng(struct amdgpu_device *adev, struct amdgpu_ring *ring)
{
int i = ring->me;
if (!adev->sdma.has_page_queue || i >= adev->sdma.num_instances)
return false;
if (amdgpu_ip_version(adev, GC_HWIP, 0) == IP_VERSION(9, 4, 3) ||
amdgpu_ip_version(adev, GC_HWIP, 0) == IP_VERSION(9, 4, 4) ||
amdgpu_ip_version(adev, GC_HWIP, 0) == IP_VERSION(9, 5, 0))
return (ring == &adev->sdma.instance[i].page);
else
return false;
}
/**
* amdgpu_sdma_register_on_reset_callbacks - Register SDMA reset callbacks
* @funcs: Pointer to the callback structure containing pre_reset and post_reset functions
......@@ -532,7 +565,6 @@ void amdgpu_sdma_register_on_reset_callbacks(struct amdgpu_device *adev, struct
* amdgpu_sdma_reset_engine - Reset a specific SDMA engine
* @adev: Pointer to the AMDGPU device
* @instance_id: ID of the SDMA engine instance to reset
* @suspend_user_queues: check if suspend user queue.
*
* This function performs the following steps:
* 1. Calls all registered pre_reset callbacks to allow KFD and AMDGPU to save their state.
......@@ -541,22 +573,16 @@ void amdgpu_sdma_register_on_reset_callbacks(struct amdgpu_device *adev, struct
*
* Returns: 0 on success, or a negative error code on failure.
*/
int amdgpu_sdma_reset_engine(struct amdgpu_device *adev, uint32_t instance_id, bool suspend_user_queues)
int amdgpu_sdma_reset_engine(struct amdgpu_device *adev, uint32_t instance_id)
{
struct sdma_on_reset_funcs *funcs;
int ret = 0;
struct amdgpu_sdma_instance *sdma_instance = &adev->sdma.instance[instance_id];;
struct amdgpu_sdma_instance *sdma_instance = &adev->sdma.instance[instance_id];
struct amdgpu_ring *gfx_ring = &sdma_instance->ring;
struct amdgpu_ring *page_ring = &sdma_instance->page;
bool gfx_sched_stopped = false, page_sched_stopped = false;
/* Suspend KFD if suspend_user_queues is true.
* prevent the destruction of in-flight healthy user queue packets and
* avoid race conditions between KFD and KGD during the reset process.
*/
if (suspend_user_queues)
amdgpu_amdkfd_suspend(adev, false);
mutex_lock(&sdma_instance->engine_reset_mutex);
/* Stop the scheduler's work queue for the GFX and page rings if they are running.
* This ensures that no new tasks are submitted to the queues while
* the reset is in progress.
......@@ -609,7 +635,7 @@ int amdgpu_sdma_reset_engine(struct amdgpu_device *adev, uint32_t instance_id, b
* if they were stopped by this function. This allows new tasks
* to be submitted to the queues after the reset is complete.
*/
if (ret) {
if (!ret) {
if (gfx_sched_stopped && amdgpu_ring_sched_ready(gfx_ring)) {
drm_sched_wqueue_start(&gfx_ring->sched);
}
......@@ -617,9 +643,7 @@ int amdgpu_sdma_reset_engine(struct amdgpu_device *adev, uint32_t instance_id, b
drm_sched_wqueue_start(&page_ring->sched);
}
}
if (suspend_user_queues)
amdgpu_amdkfd_resume(adev, false);
mutex_unlock(&sdma_instance->engine_reset_mutex);
return ret;
}
......@@ -64,6 +64,11 @@ struct amdgpu_sdma_instance {
struct amdgpu_bo *sdma_fw_obj;
uint64_t sdma_fw_gpu_addr;
uint32_t *sdma_fw_ptr;
struct mutex engine_reset_mutex;
/* track guilty state of GFX and PAGE queues */
bool gfx_guilty;
bool page_guilty;
};
enum amdgpu_sdma_ras_memory_id {
......@@ -126,9 +131,6 @@ struct amdgpu_sdma {
uint32_t *ip_dump;
uint32_t supported_reset;
struct list_head reset_callback_list;
/* track guilty state of GFX and PAGE queues */
bool gfx_guilty;
bool page_guilty;
};
/*
......@@ -169,7 +171,7 @@ struct amdgpu_buffer_funcs {
};
void amdgpu_sdma_register_on_reset_callbacks(struct amdgpu_device *adev, struct sdma_on_reset_funcs *funcs);
int amdgpu_sdma_reset_engine(struct amdgpu_device *adev, uint32_t instance_id, bool suspend_user_queues);
int amdgpu_sdma_reset_engine(struct amdgpu_device *adev, uint32_t instance_id);
#define amdgpu_emit_copy_buffer(adev, ib, s, d, b, t) (adev)->mman.buffer_funcs->emit_copy_buffer((ib), (s), (d), (b), (t))
#define amdgpu_emit_fill_buffer(adev, ib, s, d, b) (adev)->mman.buffer_funcs->emit_fill_buffer((ib), (s), (d), (b))
......@@ -194,4 +196,7 @@ int amdgpu_sdma_ras_sw_init(struct amdgpu_device *adev);
void amdgpu_debugfs_sdma_sched_mask_init(struct amdgpu_device *adev);
int amdgpu_sdma_sysfs_reset_mask_init(struct amdgpu_device *adev);
void amdgpu_sdma_sysfs_reset_mask_fini(struct amdgpu_device *adev);
bool amdgpu_sdma_is_shared_inv_eng(struct amdgpu_device *adev, struct amdgpu_ring *ring);
struct amdgpu_ring *amdgpu_sdma_get_shared_ring(struct amdgpu_device *adev,
struct amdgpu_ring *ring);
#endif
......@@ -135,11 +135,16 @@ static bool amdgpu_sync_add_later(struct amdgpu_sync *sync, struct dma_fence *f)
struct amdgpu_sync_entry *e;
hash_for_each_possible(sync->fences, e, node, f->context) {
if (unlikely(e->fence->context != f->context))
continue;
if (dma_fence_is_signaled(e->fence)) {
dma_fence_put(e->fence);
e->fence = dma_fence_get(f);
return true;
}
amdgpu_sync_keep_later(&e->fence, f);
return true;
if (likely(e->fence->context == f->context)) {
amdgpu_sync_keep_later(&e->fence, f);
return true;
}
}
return false;
}
......@@ -149,10 +154,12 @@ static bool amdgpu_sync_add_later(struct amdgpu_sync *sync, struct dma_fence *f)
*
* @sync: sync object to add fence to
* @f: fence to sync to
* @flags: memory allocation flags to use when allocating sync entry
*
* Add the fence to the sync object.
*/
int amdgpu_sync_fence(struct amdgpu_sync *sync, struct dma_fence *f)
int amdgpu_sync_fence(struct amdgpu_sync *sync, struct dma_fence *f,
gfp_t flags)
{
struct amdgpu_sync_entry *e;
......@@ -162,7 +169,7 @@ int amdgpu_sync_fence(struct amdgpu_sync *sync, struct dma_fence *f)
if (amdgpu_sync_add_later(sync, f))
return 0;
e = kmem_cache_alloc(amdgpu_sync_slab, GFP_KERNEL);
e = kmem_cache_alloc(amdgpu_sync_slab, flags);
if (!e)
return -ENOMEM;
......@@ -249,7 +256,7 @@ int amdgpu_sync_resv(struct amdgpu_device *adev, struct amdgpu_sync *sync,
struct dma_fence *tmp = dma_fence_chain_contained(f);
if (amdgpu_sync_test_fence(adev, mode, owner, tmp)) {
r = amdgpu_sync_fence(sync, f);
r = amdgpu_sync_fence(sync, f, GFP_KERNEL);
dma_fence_put(f);
if (r)
return r;
......@@ -281,7 +288,7 @@ int amdgpu_sync_kfd(struct amdgpu_sync *sync, struct dma_resv *resv)
if (fence_owner != AMDGPU_FENCE_OWNER_KFD)
continue;
r = amdgpu_sync_fence(sync, f);
r = amdgpu_sync_fence(sync, f, GFP_KERNEL);
if (r)
break;
}
......@@ -388,7 +395,7 @@ int amdgpu_sync_clone(struct amdgpu_sync *source, struct amdgpu_sync *clone)
hash_for_each_safe(source->fences, i, tmp, e, node) {
f = e->fence;
if (!dma_fence_is_signaled(f)) {
r = amdgpu_sync_fence(clone, f);
r = amdgpu_sync_fence(clone, f, GFP_KERNEL);
if (r)
return r;
} else {
......@@ -399,6 +406,25 @@ int amdgpu_sync_clone(struct amdgpu_sync *source, struct amdgpu_sync *clone)
return 0;
}
/**
* amdgpu_sync_move - move all fences from src to dst
*
* @src: source of the fences, empty after function
* @dst: destination for the fences
*
* Moves all fences from source to destination. All fences in destination are
* freed and source is empty after the function call.
*/
void amdgpu_sync_move(struct amdgpu_sync *src, struct amdgpu_sync *dst)
{
unsigned int i;
amdgpu_sync_free(dst);
for (i = 0; i < HASH_SIZE(src->fences); ++i)
hlist_move_list(&src->fences[i], &dst->fences[i]);
}
/**
* amdgpu_sync_push_to_job - push fences into job
* @sync: sync object to get the fences from
......
......@@ -47,7 +47,8 @@ struct amdgpu_sync {
};
void amdgpu_sync_create(struct amdgpu_sync *sync);
int amdgpu_sync_fence(struct amdgpu_sync *sync, struct dma_fence *f);
int amdgpu_sync_fence(struct amdgpu_sync *sync, struct dma_fence *f,
gfp_t flags);
int amdgpu_sync_resv(struct amdgpu_device *adev, struct amdgpu_sync *sync,
struct dma_resv *resv, enum amdgpu_sync_mode mode,
void *owner);
......@@ -56,6 +57,7 @@ struct dma_fence *amdgpu_sync_peek_fence(struct amdgpu_sync *sync,
struct amdgpu_ring *ring);
struct dma_fence *amdgpu_sync_get_fence(struct amdgpu_sync *sync);
int amdgpu_sync_clone(struct amdgpu_sync *source, struct amdgpu_sync *clone);
void amdgpu_sync_move(struct amdgpu_sync *src, struct amdgpu_sync *dst);
int amdgpu_sync_push_to_job(struct amdgpu_sync *sync, struct amdgpu_job *job);
int amdgpu_sync_wait(struct amdgpu_sync *sync, bool intr);
void amdgpu_sync_free(struct amdgpu_sync *sync);
......
......@@ -457,6 +457,38 @@ DEFINE_EVENT(amdgpu_pasid, amdgpu_pasid_freed,
TP_ARGS(pasid)
);
TRACE_EVENT(amdgpu_isolation,
TP_PROTO(void *prev, void *next),
TP_ARGS(prev, next),
TP_STRUCT__entry(
__field(void *, prev)
__field(void *, next)
),
TP_fast_assign(
__entry->prev = prev;
__entry->next = next;
),
TP_printk("prev=%p, next=%p",
__entry->prev,
__entry->next)
);
TRACE_EVENT(amdgpu_cleaner_shader,
TP_PROTO(struct amdgpu_ring *ring, struct dma_fence *fence),
TP_ARGS(ring, fence),
TP_STRUCT__entry(
__string(ring, ring->name)
__field(u64, seqno)
),
TP_fast_assign(
__assign_str(ring);
__entry->seqno = fence->seqno;
),
TP_printk("ring=%s, seqno=%Lu", __get_str(ring), __entry->seqno)
);
TRACE_EVENT(amdgpu_bo_list_set,
TP_PROTO(struct amdgpu_bo_list *list, struct amdgpu_bo *bo),
TP_ARGS(list, bo),
......
......@@ -438,10 +438,15 @@ static void amdgpu_vcn_idle_work_handler(struct work_struct *work)
if (!fences && !atomic_read(&vcn_inst->total_submission_cnt)) {
vcn_inst->set_pg_state(vcn_inst, AMD_PG_STATE_GATE);
r = amdgpu_dpm_switch_power_profile(adev, PP_SMC_POWER_PROFILE_VIDEO,
false);
if (r)
dev_warn(adev->dev, "(%d) failed to disable video power profile mode\n", r);
mutex_lock(&adev->vcn.workload_profile_mutex);
if (adev->vcn.workload_profile_active) {
r = amdgpu_dpm_switch_power_profile(adev, PP_SMC_POWER_PROFILE_VIDEO,
false);
if (r)
dev_warn(adev->dev, "(%d) failed to disable video power profile mode\n", r);
adev->vcn.workload_profile_active = false;
}
mutex_unlock(&adev->vcn.workload_profile_mutex);
} else {
schedule_delayed_work(&vcn_inst->idle_work, VCN_IDLE_TIMEOUT);
}
......@@ -455,13 +460,26 @@ void amdgpu_vcn_ring_begin_use(struct amdgpu_ring *ring)
atomic_inc(&vcn_inst->total_submission_cnt);
if (!cancel_delayed_work_sync(&vcn_inst->idle_work)) {
cancel_delayed_work_sync(&vcn_inst->idle_work);
/* We can safely return early here because we've cancelled the
* the delayed work so there is no one else to set it to false
* and we don't care if someone else sets it to true.
*/
if (adev->vcn.workload_profile_active)
goto pg_lock;
mutex_lock(&adev->vcn.workload_profile_mutex);
if (!adev->vcn.workload_profile_active) {
r = amdgpu_dpm_switch_power_profile(adev, PP_SMC_POWER_PROFILE_VIDEO,
true);
true);
if (r)
dev_warn(adev->dev, "(%d) failed to switch to video power profile mode\n", r);
adev->vcn.workload_profile_active = true;
}
mutex_unlock(&adev->vcn.workload_profile_mutex);
pg_lock:
mutex_lock(&vcn_inst->vcn_pg_lock);
vcn_inst->set_pg_state(vcn_inst, AMD_PG_STATE_UNGATE);
......
......@@ -358,6 +358,9 @@ struct amdgpu_vcn {
bool per_inst_fw;
unsigned fw_version;
bool workload_profile_active;
struct mutex workload_profile_mutex;
};
struct amdgpu_fw_shared_rb_ptrs_struct {
......
......@@ -754,6 +754,7 @@ int amdgpu_vm_flush(struct amdgpu_ring *ring, struct amdgpu_job *job,
bool need_pipe_sync)
{
struct amdgpu_device *adev = ring->adev;
struct amdgpu_isolation *isolation = &adev->isolation[ring->xcp_id];
unsigned vmhub = ring->vm_hub;
struct amdgpu_vmid_mgr *id_mgr = &adev->vm_manager.id_mgr[vmhub];
struct amdgpu_vmid *id = &id_mgr->ids[job->vmid];
......@@ -761,8 +762,9 @@ int amdgpu_vm_flush(struct amdgpu_ring *ring, struct amdgpu_job *job,
bool gds_switch_needed = ring->funcs->emit_gds_switch &&
job->gds_switch_needed;
bool vm_flush_needed = job->vm_needs_flush;
struct dma_fence *fence = NULL;
bool cleaner_shader_needed = false;
bool pasid_mapping_needed = false;
struct dma_fence *fence = NULL;
unsigned int patch;
int r;
......@@ -785,8 +787,12 @@ int amdgpu_vm_flush(struct amdgpu_ring *ring, struct amdgpu_job *job,
pasid_mapping_needed &= adev->gmc.gmc_funcs->emit_pasid_mapping &&
ring->funcs->emit_wreg;
cleaner_shader_needed = adev->gfx.enable_cleaner_shader &&
ring->funcs->emit_cleaner_shader && job->base.s_fence &&
&job->base.s_fence->scheduled == isolation->spearhead;
if (!vm_flush_needed && !gds_switch_needed && !need_pipe_sync &&
!(job->enforce_isolation && !job->vmid))
!cleaner_shader_needed)
return 0;
amdgpu_ring_ib_begin(ring);
......@@ -797,9 +803,7 @@ int amdgpu_vm_flush(struct amdgpu_ring *ring, struct amdgpu_job *job,
if (need_pipe_sync)
amdgpu_ring_emit_pipeline_sync(ring);
if (adev->gfx.enable_cleaner_shader &&
ring->funcs->emit_cleaner_shader &&
job->enforce_isolation)
if (cleaner_shader_needed)
ring->funcs->emit_cleaner_shader(ring);
if (vm_flush_needed) {
......@@ -821,7 +825,7 @@ int amdgpu_vm_flush(struct amdgpu_ring *ring, struct amdgpu_job *job,
job->oa_size);
}
if (vm_flush_needed || pasid_mapping_needed) {
if (vm_flush_needed || pasid_mapping_needed || cleaner_shader_needed) {
r = amdgpu_fence_emit(ring, &fence, NULL, 0);
if (r)
return r;
......@@ -843,6 +847,18 @@ int amdgpu_vm_flush(struct amdgpu_ring *ring, struct amdgpu_job *job,
id->pasid_mapping = dma_fence_get(fence);
mutex_unlock(&id_mgr->lock);
}
/*
* Make sure that all other submissions wait for the cleaner shader to
* finish before we push them to the HW.
*/
if (cleaner_shader_needed) {
trace_amdgpu_cleaner_shader(ring, fence);
mutex_lock(&adev->enforce_isolation_mutex);
dma_fence_put(isolation->spearhead);
isolation->spearhead = dma_fence_get(fence);
mutex_unlock(&adev->enforce_isolation_mutex);
}
dma_fence_put(fence);
amdgpu_ring_patch_cond_exec(ring, patch);
......
......@@ -1626,6 +1626,20 @@ static int gfx_v11_0_sw_init(struct amdgpu_ip_block *ip_block)
}
}
break;
case IP_VERSION(11, 5, 0):
case IP_VERSION(11, 5, 1):
adev->gfx.cleaner_shader_ptr = gfx_11_0_3_cleaner_shader_hex;
adev->gfx.cleaner_shader_size = sizeof(gfx_11_0_3_cleaner_shader_hex);
if (adev->gfx.mec_fw_version >= 26 &&
adev->mes.fw_version[0] >= 114) {
adev->gfx.enable_cleaner_shader = true;
r = amdgpu_gfx_cleaner_shader_sw_init(adev, adev->gfx.cleaner_shader_size);
if (r) {
adev->gfx.enable_cleaner_shader = false;
dev_err(adev->dev, "Failed to initialize cleaner shader\n");
}
}
break;
default:
adev->gfx.enable_cleaner_shader = false;
break;
......
......@@ -2637,7 +2637,6 @@ static int gfx_v12_0_cp_gfx_resume(struct amdgpu_device *adev)
u32 tmp;
u32 rb_bufsz;
u64 rb_addr, rptr_addr, wptr_gpu_addr;
u32 i;
/* Set the write pointer delay */
WREG32_SOC15(GC, 0, regCP_RB_WPTR_DELAY, 0);
......@@ -2692,12 +2691,6 @@ static int gfx_v12_0_cp_gfx_resume(struct amdgpu_device *adev)
/* start the ring */
gfx_v12_0_cp_gfx_start(adev);
for (i = 0; i < adev->gfx.num_gfx_rings; i++) {
ring = &adev->gfx.gfx_ring[i];
ring->sched.ready = true;
}
return 0;
}
......@@ -3037,10 +3030,6 @@ static int gfx_v12_0_cp_async_gfx_ring_resume(struct amdgpu_device *adev)
if (r)
goto done;
for (i = 0; i < adev->gfx.num_gfx_rings; i++) {
ring = &adev->gfx.gfx_ring[i];
ring->sched.ready = true;
}
done:
return r;
}
......
......@@ -867,9 +867,8 @@ static int gfx_v9_4_3_aca_bank_parser(struct aca_handle *handle,
switch (type) {
case ACA_SMU_TYPE_UE:
bank->aca_err_type = ACA_ERROR_TYPE_UE;
ret = aca_error_cache_log_bank_error(handle, &info,
ACA_ERROR_TYPE_UE, 1ULL);
bank->aca_err_type = ACA_BANK_ERR_UE_DE_DECODE(bank);
ret = aca_error_cache_log_bank_error(handle, &info, bank->aca_err_type, 1ULL);
break;
case ACA_SMU_TYPE_CE:
bank->aca_err_type = ACA_BANK_ERR_CE_DE_DECODE(bank);
......
......@@ -31,6 +31,7 @@
#include "amdgpu_ucode.h"
#include "amdgpu_trace.h"
#include "amdgpu_reset.h"
#include "gc/gc_9_0_sh_mask.h"
#include "sdma/sdma_4_4_2_offset.h"
#include "sdma/sdma_4_4_2_sh_mask.h"
......@@ -672,12 +673,11 @@ static uint32_t sdma_v4_4_2_rb_cntl(struct amdgpu_ring *ring, uint32_t rb_cntl)
* @adev: amdgpu_device pointer
* @i: instance to resume
* @restore: used to restore wptr when restart
* @guilty: boolean indicating whether this queue is the guilty one (caused the timeout/error)
*
* Set up the gfx DMA ring buffers and enable them.
* Returns 0 for success, error for failure.
*/
static void sdma_v4_4_2_gfx_resume(struct amdgpu_device *adev, unsigned int i, bool restore, bool guilty)
static void sdma_v4_4_2_gfx_resume(struct amdgpu_device *adev, unsigned int i, bool restore)
{
struct amdgpu_ring *ring = &adev->sdma.instance[i].ring;
u32 rb_cntl, ib_cntl, wptr_poll_cntl;
......@@ -714,7 +714,7 @@ static void sdma_v4_4_2_gfx_resume(struct amdgpu_device *adev, unsigned int i, b
/* For the guilty queue, set RPTR to the current wptr to skip bad commands,
* It is not a guilty queue, restore cache_rptr and continue execution.
*/
if (guilty)
if (adev->sdma.instance[i].gfx_guilty)
rwptr = ring->wptr;
else
rwptr = ring->cached_rptr;
......@@ -779,12 +779,11 @@ static void sdma_v4_4_2_gfx_resume(struct amdgpu_device *adev, unsigned int i, b
* @adev: amdgpu_device pointer
* @i: instance to resume
* @restore: boolean to say restore needed or not
* @guilty: boolean indicating whether this queue is the guilty one (caused the timeout/error)
*
* Set up the page DMA ring buffers and enable them.
* Returns 0 for success, error for failure.
*/
static void sdma_v4_4_2_page_resume(struct amdgpu_device *adev, unsigned int i, bool restore, bool guilty)
static void sdma_v4_4_2_page_resume(struct amdgpu_device *adev, unsigned int i, bool restore)
{
struct amdgpu_ring *ring = &adev->sdma.instance[i].page;
u32 rb_cntl, ib_cntl, wptr_poll_cntl;
......@@ -803,7 +802,7 @@ static void sdma_v4_4_2_page_resume(struct amdgpu_device *adev, unsigned int i,
/* For the guilty queue, set RPTR to the current wptr to skip bad commands,
* It is not a guilty queue, restore cache_rptr and continue execution.
*/
if (guilty)
if (adev->sdma.instance[i].page_guilty)
rwptr = ring->wptr;
else
rwptr = ring->cached_rptr;
......@@ -989,9 +988,9 @@ static int sdma_v4_4_2_inst_start(struct amdgpu_device *adev,
uint32_t temp;
WREG32_SDMA(i, regSDMA_SEM_WAIT_FAIL_TIMER_CNTL, 0);
sdma_v4_4_2_gfx_resume(adev, i, restore, adev->sdma.gfx_guilty);
sdma_v4_4_2_gfx_resume(adev, i, restore);
if (adev->sdma.has_page_queue)
sdma_v4_4_2_page_resume(adev, i, restore, adev->sdma.page_guilty);
sdma_v4_4_2_page_resume(adev, i, restore);
/* set utc l1 enable flag always to 1 */
temp = RREG32_SDMA(i, regSDMA_CNTL);
......@@ -1292,21 +1291,71 @@ static void sdma_v4_4_2_ring_emit_pipeline_sync(struct amdgpu_ring *ring)
seq, 0xffffffff, 4);
}
/**
* sdma_v4_4_2_ring_emit_vm_flush - vm flush using sDMA
/*
* sdma_v4_4_2_get_invalidate_req - Construct the VM_INVALIDATE_ENG0_REQ register value
* @vmid: The VMID to invalidate
* @flush_type: The type of flush (0 = legacy, 1 = lightweight, 2 = heavyweight)
*
* @ring: amdgpu_ring pointer
* @vmid: vmid number to use
* @pd_addr: address
* This function constructs the VM_INVALIDATE_ENG0_REQ register value for the specified VMID
* and flush type. It ensures that all relevant page table cache levels (L1 PTEs, L2 PTEs, and
* L2 PDEs) are invalidated.
*/
static uint32_t sdma_v4_4_2_get_invalidate_req(unsigned int vmid,
uint32_t flush_type)
{
u32 req = 0;
req = REG_SET_FIELD(req, VM_INVALIDATE_ENG0_REQ,
PER_VMID_INVALIDATE_REQ, 1 << vmid);
req = REG_SET_FIELD(req, VM_INVALIDATE_ENG0_REQ, FLUSH_TYPE, flush_type);
req = REG_SET_FIELD(req, VM_INVALIDATE_ENG0_REQ, INVALIDATE_L2_PTES, 1);
req = REG_SET_FIELD(req, VM_INVALIDATE_ENG0_REQ, INVALIDATE_L2_PDE0, 1);
req = REG_SET_FIELD(req, VM_INVALIDATE_ENG0_REQ, INVALIDATE_L2_PDE1, 1);
req = REG_SET_FIELD(req, VM_INVALIDATE_ENG0_REQ, INVALIDATE_L2_PDE2, 1);
req = REG_SET_FIELD(req, VM_INVALIDATE_ENG0_REQ, INVALIDATE_L1_PTES, 1);
req = REG_SET_FIELD(req, VM_INVALIDATE_ENG0_REQ,
CLEAR_PROTECTION_FAULT_STATUS_ADDR, 0);
return req;
}
/*
* sdma_v4_4_2_ring_emit_vm_flush - Emit VM flush commands for SDMA
* @ring: The SDMA ring
* @vmid: The VMID to flush
* @pd_addr: The page directory address
*
* Update the page table base and flush the VM TLB
* using sDMA.
* This function emits the necessary register writes and waits to perform a VM flush for the
* specified VMID. It updates the PTB address registers and issues a VM invalidation request
* using the specified VM invalidation engine.
*/
static void sdma_v4_4_2_ring_emit_vm_flush(struct amdgpu_ring *ring,
unsigned vmid, uint64_t pd_addr)
unsigned int vmid, uint64_t pd_addr)
{
amdgpu_gmc_emit_flush_gpu_tlb(ring, vmid, pd_addr);
struct amdgpu_device *adev = ring->adev;
uint32_t req = sdma_v4_4_2_get_invalidate_req(vmid, 0);
unsigned int eng = ring->vm_inv_eng;
struct amdgpu_vmhub *hub = &adev->vmhub[ring->vm_hub];
amdgpu_ring_emit_wreg(ring, hub->ctx0_ptb_addr_lo32 +
(hub->ctx_addr_distance * vmid),
lower_32_bits(pd_addr));
amdgpu_ring_emit_wreg(ring, hub->ctx0_ptb_addr_hi32 +
(hub->ctx_addr_distance * vmid),
upper_32_bits(pd_addr));
/*
* Construct and emit the VM invalidation packet
*/
amdgpu_ring_write(ring,
SDMA_PKT_VM_INVALIDATION_HEADER_OP(SDMA_OP_VM_INVALIDATE) |
SDMA_PKT_VM_INVALIDATION_HEADER_SUB_OP(SDMA_SUBOP_VM_INVALIDATE) |
SDMA_PKT_VM_INVALIDATION_HEADER_XCC0_ENG_ID(0x1f) |
SDMA_PKT_VM_INVALIDATION_HEADER_XCC1_ENG_ID(0x1f) |
SDMA_PKT_VM_INVALIDATION_HEADER_MMHUB_ENG_ID(eng));
amdgpu_ring_write(ring, SDMA_PKT_VM_INVALIDATION_INVALIDATEREQ_INVALIDATEREQ(req));
amdgpu_ring_write(ring, 0);
amdgpu_ring_write(ring, SDMA_PKT_VM_INVALIDATION_ADDRESSRANGEHI_INVALIDATEACK(BIT(vmid)));
}
static void sdma_v4_4_2_ring_emit_wreg(struct amdgpu_ring *ring,
......@@ -1445,6 +1494,11 @@ static int sdma_v4_4_2_sw_init(struct amdgpu_ip_block *ip_block)
}
for (i = 0; i < adev->sdma.num_instances; i++) {
mutex_init(&adev->sdma.instance[i].engine_reset_mutex);
/* Initialize guilty flags for GFX and PAGE queues */
adev->sdma.instance[i].gfx_guilty = false;
adev->sdma.instance[i].page_guilty = false;
ring = &adev->sdma.instance[i].ring;
ring->ring_obj = NULL;
ring->use_doorbell = true;
......@@ -1506,9 +1560,6 @@ static int sdma_v4_4_2_sw_init(struct amdgpu_ip_block *ip_block)
r = amdgpu_sdma_sysfs_reset_mask_init(adev);
if (r)
return r;
/* Initialize guilty flags for GFX and PAGE queues */
adev->sdma.gfx_guilty = false;
adev->sdma.page_guilty = false;
return r;
}
......@@ -1666,7 +1717,16 @@ static int sdma_v4_4_2_reset_queue(struct amdgpu_ring *ring, unsigned int vmid)
{
struct amdgpu_device *adev = ring->adev;
u32 id = GET_INST(SDMA0, ring->me);
return amdgpu_sdma_reset_engine(adev, id, true);
int r;
if (!(adev->sdma.supported_reset & AMDGPU_RESET_TYPE_PER_QUEUE))
return -EOPNOTSUPP;
amdgpu_amdkfd_suspend(adev, false);
r = amdgpu_sdma_reset_engine(adev, id);
amdgpu_amdkfd_resume(adev, false);
return r;
}
static int sdma_v4_4_2_stop_queue(struct amdgpu_device *adev, uint32_t instance_id)
......@@ -1679,9 +1739,11 @@ static int sdma_v4_4_2_stop_queue(struct amdgpu_device *adev, uint32_t instance_
return -EINVAL;
/* Check if this queue is the guilty one */
adev->sdma.gfx_guilty = sdma_v4_4_2_is_queue_selected(adev, instance_id, false);
adev->sdma.instance[instance_id].gfx_guilty =
sdma_v4_4_2_is_queue_selected(adev, instance_id, false);
if (adev->sdma.has_page_queue)
adev->sdma.page_guilty = sdma_v4_4_2_is_queue_selected(adev, instance_id, true);
adev->sdma.instance[instance_id].page_guilty =
sdma_v4_4_2_is_queue_selected(adev, instance_id, true);
/* Cache the rptr before reset, after the reset,
* all of the registers will be reset to 0
......@@ -2115,8 +2177,7 @@ static const struct amdgpu_ring_funcs sdma_v4_4_2_ring_funcs = {
3 + /* hdp invalidate */
6 + /* sdma_v4_4_2_ring_emit_pipeline_sync */
/* sdma_v4_4_2_ring_emit_vm_flush */
SOC15_FLUSH_GPU_TLB_NUM_WREG * 3 +
SOC15_FLUSH_GPU_TLB_NUM_REG_WAIT * 6 +
4 + 2 * 3 +
10 + 10 + 10, /* sdma_v4_4_2_ring_emit_fence x3 for user fence, vm fence */
.emit_ib_size = 7 + 6, /* sdma_v4_4_2_ring_emit_ib */
.emit_ib = sdma_v4_4_2_ring_emit_ib,
......@@ -2148,8 +2209,7 @@ static const struct amdgpu_ring_funcs sdma_v4_4_2_page_ring_funcs = {
3 + /* hdp invalidate */
6 + /* sdma_v4_4_2_ring_emit_pipeline_sync */
/* sdma_v4_4_2_ring_emit_vm_flush */
SOC15_FLUSH_GPU_TLB_NUM_WREG * 3 +
SOC15_FLUSH_GPU_TLB_NUM_REG_WAIT * 6 +
4 + 2 * 3 +
10 + 10 + 10, /* sdma_v4_4_2_ring_emit_fence x3 for user fence, vm fence */
.emit_ib_size = 7 + 6, /* sdma_v4_4_2_ring_emit_ib */
.emit_ib = sdma_v4_4_2_ring_emit_ib,
......@@ -2347,6 +2407,9 @@ static void sdma_v4_4_2_set_vm_pte_funcs(struct amdgpu_device *adev)
*/
static void sdma_v4_4_2_update_reset_mask(struct amdgpu_device *adev)
{
/* per queue reset not supported for SRIOV */
if (amdgpu_sriov_vf(adev))
return;
/*
* the user queue relies on MEC fw and pmfw when the sdma queue do reset.
......
......@@ -151,6 +151,7 @@ struct ta_ras_init_flags {
uint16_t xcc_mask;
uint8_t channel_dis_num;
uint8_t nps_mode;
uint32_t active_umc_mask;
};
struct ta_ras_mca_addr {
......
......@@ -147,10 +147,15 @@ static void vcn_v2_5_idle_work_handler(struct work_struct *work)
if (!fences && !atomic_read(&adev->vcn.inst[0].total_submission_cnt)) {
amdgpu_device_ip_set_powergating_state(adev, AMD_IP_BLOCK_TYPE_VCN,
AMD_PG_STATE_GATE);
r = amdgpu_dpm_switch_power_profile(adev, PP_SMC_POWER_PROFILE_VIDEO,
false);
if (r)
dev_warn(adev->dev, "(%d) failed to disable video power profile mode\n", r);
mutex_lock(&adev->vcn.workload_profile_mutex);
if (adev->vcn.workload_profile_active) {
r = amdgpu_dpm_switch_power_profile(adev, PP_SMC_POWER_PROFILE_VIDEO,
false);
if (r)
dev_warn(adev->dev, "(%d) failed to disable video power profile mode\n", r);
adev->vcn.workload_profile_active = false;
}
mutex_unlock(&adev->vcn.workload_profile_mutex);
} else {
schedule_delayed_work(&adev->vcn.inst[0].idle_work, VCN_IDLE_TIMEOUT);
}
......@@ -164,13 +169,26 @@ static void vcn_v2_5_ring_begin_use(struct amdgpu_ring *ring)
atomic_inc(&adev->vcn.inst[0].total_submission_cnt);
if (!cancel_delayed_work_sync(&adev->vcn.inst[0].idle_work)) {
cancel_delayed_work_sync(&adev->vcn.inst[0].idle_work);
/* We can safely return early here because we've cancelled the
* the delayed work so there is no one else to set it to false
* and we don't care if someone else sets it to true.
*/
if (adev->vcn.workload_profile_active)
goto pg_lock;
mutex_lock(&adev->vcn.workload_profile_mutex);
if (!adev->vcn.workload_profile_active) {
r = amdgpu_dpm_switch_power_profile(adev, PP_SMC_POWER_PROFILE_VIDEO,
true);
if (r)
dev_warn(adev->dev, "(%d) failed to switch to video power profile mode\n", r);
adev->vcn.workload_profile_active = true;
}
mutex_unlock(&adev->vcn.workload_profile_mutex);
pg_lock:
mutex_lock(&adev->vcn.inst[0].vcn_pg_lock);
amdgpu_device_ip_set_powergating_state(adev, AMD_IP_BLOCK_TYPE_VCN,
AMD_PG_STATE_UNGATE);
......
......@@ -64,6 +64,9 @@
#define HEADER_BARRIER 5
#define SDMA_OP_AQL_COPY 0
#define SDMA_OP_AQL_BARRIER_OR 0
/* vm invalidation is only available for GC9.4.3/GC9.4.4/GC9.5.0 */
#define SDMA_OP_VM_INVALIDATE 8
#define SDMA_SUBOP_VM_INVALIDATE 4
/*define for op field*/
#define SDMA_PKT_HEADER_op_offset 0
......@@ -3331,5 +3334,72 @@
#define SDMA_AQL_PKT_BARRIER_OR_COMPLETION_SIGNAL_HI_completion_signal_63_32_shift 0
#define SDMA_AQL_PKT_BARRIER_OR_COMPLETION_SIGNAL_HI_COMPLETION_SIGNAL_63_32(x) (((x) & SDMA_AQL_PKT_BARRIER_OR_COMPLETION_SIGNAL_HI_completion_signal_63_32_mask) << SDMA_AQL_PKT_BARRIER_OR_COMPLETION_SIGNAL_HI_completion_signal_63_32_shift)
/*
** Definitions for SDMA_PKT_VM_INVALIDATION packet
*/
/*define for HEADER word*/
/*define for op field*/
#define SDMA_PKT_VM_INVALIDATION_HEADER_op_offset 0
#define SDMA_PKT_VM_INVALIDATION_HEADER_op_mask 0x000000FF
#define SDMA_PKT_VM_INVALIDATION_HEADER_op_shift 0
#define SDMA_PKT_VM_INVALIDATION_HEADER_OP(x) ((x & SDMA_PKT_VM_INVALIDATION_HEADER_op_mask) << SDMA_PKT_VM_INVALIDATION_HEADER_op_shift)
/*define for sub_op field*/
#define SDMA_PKT_VM_INVALIDATION_HEADER_sub_op_offset 0
#define SDMA_PKT_VM_INVALIDATION_HEADER_sub_op_mask 0x000000FF
#define SDMA_PKT_VM_INVALIDATION_HEADER_sub_op_shift 8
#define SDMA_PKT_VM_INVALIDATION_HEADER_SUB_OP(x) ((x & SDMA_PKT_VM_INVALIDATION_HEADER_sub_op_mask) << SDMA_PKT_VM_INVALIDATION_HEADER_sub_op_shift)
/*define for xcc0_eng_id field*/
#define SDMA_PKT_VM_INVALIDATION_HEADER_xcc0_eng_id_offset 0
#define SDMA_PKT_VM_INVALIDATION_HEADER_xcc0_eng_id_mask 0x0000001F
#define SDMA_PKT_VM_INVALIDATION_HEADER_xcc0_eng_id_shift 16
#define SDMA_PKT_VM_INVALIDATION_HEADER_XCC0_ENG_ID(x) ((x & SDMA_PKT_VM_INVALIDATION_HEADER_xcc0_eng_id_mask) << SDMA_PKT_VM_INVALIDATION_HEADER_xcc0_eng_id_shift)
/*define for xcc1_eng_id field*/
#define SDMA_PKT_VM_INVALIDATION_HEADER_xcc1_eng_id_offset 0
#define SDMA_PKT_VM_INVALIDATION_HEADER_xcc1_eng_id_mask 0x0000001F
#define SDMA_PKT_VM_INVALIDATION_HEADER_xcc1_eng_id_shift 21
#define SDMA_PKT_VM_INVALIDATION_HEADER_XCC1_ENG_ID(x) ((x & SDMA_PKT_VM_INVALIDATION_HEADER_xcc1_eng_id_mask) << SDMA_PKT_VM_INVALIDATION_HEADER_xcc1_eng_id_shift)
/*define for mmhub_eng_id field*/
#define SDMA_PKT_VM_INVALIDATION_HEADER_mmhub_eng_id_offset 0
#define SDMA_PKT_VM_INVALIDATION_HEADER_mmhub_eng_id_mask 0x0000001F
#define SDMA_PKT_VM_INVALIDATION_HEADER_mmhub_eng_id_shift 26
#define SDMA_PKT_VM_INVALIDATION_HEADER_MMHUB_ENG_ID(x) ((x & SDMA_PKT_VM_INVALIDATION_HEADER_mmhub_eng_id_mask) << SDMA_PKT_VM_INVALIDATION_HEADER_mmhub_eng_id_shift)
/*define for INVALIDATEREQ word*/
/*define for invalidatereq field*/
#define SDMA_PKT_VM_INVALIDATION_INVALIDATEREQ_invalidatereq_offset 1
#define SDMA_PKT_VM_INVALIDATION_INVALIDATEREQ_invalidatereq_mask 0xFFFFFFFF
#define SDMA_PKT_VM_INVALIDATION_INVALIDATEREQ_invalidatereq_shift 0
#define SDMA_PKT_VM_INVALIDATION_INVALIDATEREQ_INVALIDATEREQ(x) ((x & SDMA_PKT_VM_INVALIDATION_INVALIDATEREQ_invalidatereq_mask) << SDMA_PKT_VM_INVALIDATION_INVALIDATEREQ_invalidatereq_shift)
/*define for ADDRESSRANGELO word*/
/*define for addressrangelo field*/
#define SDMA_PKT_VM_INVALIDATION_ADDRESSRANGELO_addressrangelo_offset 2
#define SDMA_PKT_VM_INVALIDATION_ADDRESSRANGELO_addressrangelo_mask 0xFFFFFFFF
#define SDMA_PKT_VM_INVALIDATION_ADDRESSRANGELO_addressrangelo_shift 0
#define SDMA_PKT_VM_INVALIDATION_ADDRESSRANGELO_ADDRESSRANGELO(x) ((x & SDMA_PKT_VM_INVALIDATION_ADDRESSRANGELO_addressrangelo_mask) << SDMA_PKT_VM_INVALIDATION_ADDRESSRANGELO_addressrangelo_shift)
/*define for ADDRESSRANGEHI word*/
/*define for invalidateack field*/
#define SDMA_PKT_VM_INVALIDATION_ADDRESSRANGEHI_invalidateack_offset 3
#define SDMA_PKT_VM_INVALIDATION_ADDRESSRANGEHI_invalidateack_mask 0x0000FFFF
#define SDMA_PKT_VM_INVALIDATION_ADDRESSRANGEHI_invalidateack_shift 0
#define SDMA_PKT_VM_INVALIDATION_ADDRESSRANGEHI_INVALIDATEACK(x) ((x & SDMA_PKT_VM_INVALIDATION_ADDRESSRANGEHI_invalidateack_mask) << SDMA_PKT_VM_INVALIDATION_ADDRESSRANGEHI_invalidateack_shift)
/*define for addressrangehi field*/
#define SDMA_PKT_VM_INVALIDATION_ADDRESSRANGEHI_addressrangehi_offset 3
#define SDMA_PKT_VM_INVALIDATION_ADDRESSRANGEHI_addressrangehi_mask 0x0000001F
#define SDMA_PKT_VM_INVALIDATION_ADDRESSRANGEHI_addressrangehi_shift 16
#define SDMA_PKT_VM_INVALIDATION_ADDRESSRANGEHI_ADDRESSRANGEHI(x) ((x & SDMA_PKT_VM_INVALIDATION_ADDRESSRANGEHI_addressrangehi_mask) << SDMA_PKT_VM_INVALIDATION_ADDRESSRANGEHI_addressrangehi_shift)
/*define for reserved field*/
#define SDMA_PKT_VM_INVALIDATION_ADDRESSRANGEHI_reserved_offset 3
#define SDMA_PKT_VM_INVALIDATION_ADDRESSRANGEHI_reserved_mask 0x000001FF
#define SDMA_PKT_VM_INVALIDATION_ADDRESSRANGEHI_reserved_shift 23
#define SDMA_PKT_VM_INVALIDATION_ADDRESSRANGEHI_RESERVED(x) ((x & SDMA_PKT_VM_INVALIDATION_ADDRESSRANGEHI_reserved_mask) << SDMA_PKT_VM_INVALIDATION_ADDRESSRANGEHI_reserved_shift)
#endif /* __SDMA_PKT_OPEN_H_ */
......@@ -2310,7 +2310,7 @@ static int reset_hung_queues_sdma(struct device_queue_manager *dqm)
continue;
/* Reset engine and check. */
if (amdgpu_sdma_reset_engine(dqm->dev->adev, i, false) ||
if (amdgpu_sdma_reset_engine(dqm->dev->adev, i) ||
dqm->dev->kfd2kgd->hqd_sdma_get_doorbell(dqm->dev->adev, i, j) ||
!set_sdma_queue_as_reset(dqm, doorbell_off)) {
r = -ENOTRECOVERABLE;
......
......@@ -418,6 +418,10 @@ int pm_config_dequeue_wait_counts(struct packet_manager *pm,
!pm->pmf->config_dequeue_wait_counts_size)
return 0;
if (cmd == KFD_DEQUEUE_WAIT_INIT && (KFD_GC_VERSION(pm->dqm->dev) < IP_VERSION(9, 4, 1) ||
KFD_GC_VERSION(pm->dqm->dev) >= IP_VERSION(10, 0, 0)))
return 0;
size = pm->pmf->config_dequeue_wait_counts_size;
mutex_lock(&pm->lock);
......@@ -436,16 +440,16 @@ int pm_config_dequeue_wait_counts(struct packet_manager *pm,
retval = pm->pmf->config_dequeue_wait_counts(pm, buffer,
cmd, value);
if (!retval)
if (!retval) {
retval = kq_submit_packet(pm->priv_queue);
else
/* If default value is modified, cache that in dqm->wait_times */
if (!retval && cmd == KFD_DEQUEUE_WAIT_INIT)
update_dqm_wait_times(pm->dqm);
} else {
kq_rollback_packet(pm->priv_queue);
}
}
/* If default value is modified, cache that value in dqm->wait_times */
if (!retval && cmd == KFD_DEQUEUE_WAIT_INIT)
update_dqm_wait_times(pm->dqm);
out:
mutex_unlock(&pm->lock);
return retval;
......
......@@ -310,6 +310,13 @@ static inline void pm_build_dequeue_wait_counts_packet_info(struct packet_manage
reg_data);
}
/* pm_config_dequeue_wait_counts_v9: Builds WRITE_DATA packet with
* register/value for configuring dequeue wait counts
*
* @return: -ve for failure and 0 for success and buffer is
* filled in with packet
*
**/
static int pm_config_dequeue_wait_counts_v9(struct packet_manager *pm,
uint32_t *buffer,
enum kfd_config_dequeue_wait_counts_cmd cmd,
......@@ -321,24 +328,25 @@ static int pm_config_dequeue_wait_counts_v9(struct packet_manager *pm,
switch (cmd) {
case KFD_DEQUEUE_WAIT_INIT: {
uint32_t sch_wave = 0, que_sleep = 0;
/* Reduce CP_IQ_WAIT_TIME2.QUE_SLEEP to 0x1 from default 0x40.
uint32_t sch_wave = 0, que_sleep = 1;
/* For all gfx9 ASICs > gfx941,
* Reduce CP_IQ_WAIT_TIME2.QUE_SLEEP to 0x1 from default 0x40.
* On a 1GHz machine this is roughly 1 microsecond, which is
* about how long it takes to load data out of memory during
* queue connect
* QUE_SLEEP: Wait Count for Dequeue Retry.
*
* Set CWSR grace period to 1x1000 cycle for GFX9.4.3 APU
*/
if (KFD_GC_VERSION(pm->dqm->dev) >= IP_VERSION(9, 4, 1) &&
KFD_GC_VERSION(pm->dqm->dev) < IP_VERSION(10, 0, 0)) {
que_sleep = 1;
/* Set CWSR grace period to 1x1000 cycle for GFX9.4.3 APU */
if (amdgpu_emu_mode == 0 && pm->dqm->dev->adev->gmc.is_app_apu &&
(KFD_GC_VERSION(pm->dqm->dev) == IP_VERSION(9, 4, 3)))
sch_wave = 1;
} else {
return 0;
}
if (KFD_GC_VERSION(pm->dqm->dev) < IP_VERSION(9, 4, 1) ||
KFD_GC_VERSION(pm->dqm->dev) >= IP_VERSION(10, 0, 0))
return -EPERM;
if (amdgpu_emu_mode == 0 && pm->dqm->dev->adev->gmc.is_app_apu &&
(KFD_GC_VERSION(pm->dqm->dev) == IP_VERSION(9, 4, 3)))
sch_wave = 1;
pm_build_dequeue_wait_counts_packet_info(pm, sch_wave, que_sleep,
&reg_offset, &reg_data);
......
......@@ -2006,10 +2006,6 @@ static void kfd_topology_set_capabilities(struct kfd_topology_device *dev)
dev->node_props.debug_prop |= HSA_DBG_WATCH_ADDR_MASK_LO_BIT_GFX10 |
HSA_DBG_WATCH_ADDR_MASK_HI_BIT;
if (KFD_GC_VERSION(dev->gpu) >= IP_VERSION(11, 0, 0))
dev->node_props.capability |=
HSA_CAP_TRAP_DEBUG_PRECISE_MEMORY_OPERATIONS_SUPPORTED;
if (KFD_GC_VERSION(dev->gpu) >= IP_VERSION(12, 0, 0))
dev->node_props.capability |=
HSA_CAP_TRAP_DEBUG_PRECISE_ALU_OPERATIONS_SUPPORTED;
......