Skip to content
Snippets Groups Projects

Compare revisions

Changes are shown as if the source revision was being merged into the target revision. Learn more about comparing revisions.

Source

Select target project
No results found

Target

Select target project
  • drm/i915/kernel
  • drm/nova
  • jani/kernel
  • mripard/kernel
  • vigneshraman/kernel
  • zhanghe9702/kernel
  • mripard/kernel-other
7 results
Show changes
Commits on Source (153)
Showing
with 393 additions and 129 deletions
...@@ -1194,9 +1194,15 @@ struct amdgpu_device { ...@@ -1194,9 +1194,15 @@ struct amdgpu_device {
bool debug_exp_resets; bool debug_exp_resets;
bool debug_disable_gpu_ring_reset; bool debug_disable_gpu_ring_reset;
bool enforce_isolation[MAX_XCP]; /* Protection for the following isolation structure */
/* Added this mutex for cleaner shader isolation between GFX and compute processes */
struct mutex enforce_isolation_mutex; struct mutex enforce_isolation_mutex;
bool enforce_isolation[MAX_XCP];
struct amdgpu_isolation {
void *owner;
struct dma_fence *spearhead;
struct amdgpu_sync active;
struct amdgpu_sync prev;
} isolation[MAX_XCP];
struct amdgpu_init_level *init_lvl; struct amdgpu_init_level *init_lvl;
...@@ -1482,6 +1488,9 @@ void amdgpu_device_pcie_port_wreg(struct amdgpu_device *adev, ...@@ -1482,6 +1488,9 @@ void amdgpu_device_pcie_port_wreg(struct amdgpu_device *adev,
struct dma_fence *amdgpu_device_get_gang(struct amdgpu_device *adev); struct dma_fence *amdgpu_device_get_gang(struct amdgpu_device *adev);
struct dma_fence *amdgpu_device_switch_gang(struct amdgpu_device *adev, struct dma_fence *amdgpu_device_switch_gang(struct amdgpu_device *adev,
struct dma_fence *gang); struct dma_fence *gang);
struct dma_fence *amdgpu_device_enforce_isolation(struct amdgpu_device *adev,
struct amdgpu_ring *ring,
struct amdgpu_job *job);
bool amdgpu_device_has_display_hardware(struct amdgpu_device *adev); bool amdgpu_device_has_display_hardware(struct amdgpu_device *adev);
ssize_t amdgpu_get_soft_full_reset_mask(struct amdgpu_ring *ring); ssize_t amdgpu_get_soft_full_reset_mask(struct amdgpu_ring *ring);
ssize_t amdgpu_show_reset_mask(char *buf, uint32_t supported_reset); ssize_t amdgpu_show_reset_mask(char *buf, uint32_t supported_reset);
......
...@@ -391,6 +391,7 @@ static void aca_banks_generate_cper(struct amdgpu_device *adev, ...@@ -391,6 +391,7 @@ static void aca_banks_generate_cper(struct amdgpu_device *adev,
{ {
struct aca_bank_node *node; struct aca_bank_node *node;
struct aca_bank *bank; struct aca_bank *bank;
int r;
if (!adev->cper.enabled) if (!adev->cper.enabled)
return; return;
...@@ -402,11 +403,27 @@ static void aca_banks_generate_cper(struct amdgpu_device *adev, ...@@ -402,11 +403,27 @@ static void aca_banks_generate_cper(struct amdgpu_device *adev,
/* UEs must be encoded into separate CPER entries */ /* UEs must be encoded into separate CPER entries */
if (type == ACA_SMU_TYPE_UE) { if (type == ACA_SMU_TYPE_UE) {
struct aca_banks de_banks;
aca_banks_init(&de_banks);
list_for_each_entry(node, &banks->list, node) { list_for_each_entry(node, &banks->list, node) {
bank = &node->bank; bank = &node->bank;
if (amdgpu_cper_generate_ue_record(adev, bank)) if (bank->aca_err_type == ACA_ERROR_TYPE_DEFERRED) {
dev_warn(adev->dev, "fail to generate ue cper records\n"); r = aca_banks_add_bank(&de_banks, bank);
if (r)
dev_warn(adev->dev, "fail to add de banks, ret = %d\n", r);
} else {
if (amdgpu_cper_generate_ue_record(adev, bank))
dev_warn(adev->dev, "fail to generate ue cper records\n");
}
}
if (!list_empty(&de_banks.list)) {
if (amdgpu_cper_generate_ce_records(adev, &de_banks, de_banks.nr_banks))
dev_warn(adev->dev, "fail to generate de cper records\n");
} }
aca_banks_release(&de_banks);
} else { } else {
/* /*
* SMU_TYPE_CE banks are combined into 1 CPER entries, * SMU_TYPE_CE banks are combined into 1 CPER entries,
...@@ -541,6 +558,10 @@ static int __aca_get_error_data(struct amdgpu_device *adev, struct aca_handle *h ...@@ -541,6 +558,10 @@ static int __aca_get_error_data(struct amdgpu_device *adev, struct aca_handle *h
if (ret) if (ret)
return ret; return ret;
/* DEs may contain in CEs or UEs */
if (type != ACA_ERROR_TYPE_DEFERRED)
aca_log_aca_error(handle, ACA_ERROR_TYPE_DEFERRED, err_data);
return aca_log_aca_error(handle, type, err_data); return aca_log_aca_error(handle, type, err_data);
} }
......
...@@ -76,11 +76,17 @@ struct ras_query_context; ...@@ -76,11 +76,17 @@ struct ras_query_context;
#define mmSMNAID_XCD1_MCA_SMU 0x38430400 /* SMN AID XCD1 */ #define mmSMNAID_XCD1_MCA_SMU 0x38430400 /* SMN AID XCD1 */
#define mmSMNXCD_XCD0_MCA_SMU 0x40430400 /* SMN XCD XCD0 */ #define mmSMNXCD_XCD0_MCA_SMU 0x40430400 /* SMN XCD XCD0 */
#define ACA_BANK_ERR_CE_DE_DECODE(bank) \ #define ACA_BANK_ERR_IS_DEFFERED(bank) \
((ACA_REG__STATUS__POISON((bank)->regs[ACA_REG_IDX_STATUS]) || \ (ACA_REG__STATUS__POISON((bank)->regs[ACA_REG_IDX_STATUS]) || \
ACA_REG__STATUS__DEFERRED((bank)->regs[ACA_REG_IDX_STATUS])) ? \ ACA_REG__STATUS__DEFERRED((bank)->regs[ACA_REG_IDX_STATUS]))
ACA_ERROR_TYPE_DEFERRED : \
ACA_ERROR_TYPE_CE) #define ACA_BANK_ERR_CE_DE_DECODE(bank) \
(ACA_BANK_ERR_IS_DEFFERED(bank) ? ACA_ERROR_TYPE_DEFERRED : \
ACA_ERROR_TYPE_CE)
#define ACA_BANK_ERR_UE_DE_DECODE(bank) \
(ACA_BANK_ERR_IS_DEFFERED(bank) ? ACA_ERROR_TYPE_DEFERRED : \
ACA_ERROR_TYPE_UE)
enum aca_reg_idx { enum aca_reg_idx {
ACA_REG_IDX_CTL = 0, ACA_REG_IDX_CTL = 0,
......
...@@ -189,7 +189,7 @@ const struct kfd2kgd_calls aldebaran_kfd2kgd = { ...@@ -189,7 +189,7 @@ const struct kfd2kgd_calls aldebaran_kfd2kgd = {
.set_address_watch = kgd_gfx_aldebaran_set_address_watch, .set_address_watch = kgd_gfx_aldebaran_set_address_watch,
.clear_address_watch = kgd_gfx_v9_clear_address_watch, .clear_address_watch = kgd_gfx_v9_clear_address_watch,
.get_iq_wait_times = kgd_gfx_v9_get_iq_wait_times, .get_iq_wait_times = kgd_gfx_v9_get_iq_wait_times,
.build_grace_period_packet_info = kgd_gfx_v9_build_grace_period_packet_info, .build_dequeue_wait_counts_packet_info = kgd_gfx_v9_build_dequeue_wait_counts_packet_info,
.program_trap_handler_settings = kgd_gfx_v9_program_trap_handler_settings, .program_trap_handler_settings = kgd_gfx_v9_program_trap_handler_settings,
.hqd_get_pq_addr = kgd_gfx_v9_hqd_get_pq_addr, .hqd_get_pq_addr = kgd_gfx_v9_hqd_get_pq_addr,
.hqd_reset = kgd_gfx_v9_hqd_reset, .hqd_reset = kgd_gfx_v9_hqd_reset,
......
...@@ -415,7 +415,7 @@ const struct kfd2kgd_calls arcturus_kfd2kgd = { ...@@ -415,7 +415,7 @@ const struct kfd2kgd_calls arcturus_kfd2kgd = {
.set_address_watch = kgd_gfx_v9_set_address_watch, .set_address_watch = kgd_gfx_v9_set_address_watch,
.clear_address_watch = kgd_gfx_v9_clear_address_watch, .clear_address_watch = kgd_gfx_v9_clear_address_watch,
.get_iq_wait_times = kgd_gfx_v9_get_iq_wait_times, .get_iq_wait_times = kgd_gfx_v9_get_iq_wait_times,
.build_grace_period_packet_info = kgd_gfx_v9_build_grace_period_packet_info, .build_dequeue_wait_counts_packet_info = kgd_gfx_v9_build_dequeue_wait_counts_packet_info,
.get_cu_occupancy = kgd_gfx_v9_get_cu_occupancy, .get_cu_occupancy = kgd_gfx_v9_get_cu_occupancy,
.program_trap_handler_settings = kgd_gfx_v9_program_trap_handler_settings, .program_trap_handler_settings = kgd_gfx_v9_program_trap_handler_settings,
.hqd_get_pq_addr = kgd_gfx_v9_hqd_get_pq_addr, .hqd_get_pq_addr = kgd_gfx_v9_hqd_get_pq_addr,
......
...@@ -541,8 +541,8 @@ const struct kfd2kgd_calls gc_9_4_3_kfd2kgd = { ...@@ -541,8 +541,8 @@ const struct kfd2kgd_calls gc_9_4_3_kfd2kgd = {
.get_cu_occupancy = kgd_gfx_v9_get_cu_occupancy, .get_cu_occupancy = kgd_gfx_v9_get_cu_occupancy,
.program_trap_handler_settings = .program_trap_handler_settings =
kgd_gfx_v9_program_trap_handler_settings, kgd_gfx_v9_program_trap_handler_settings,
.build_grace_period_packet_info = .build_dequeue_wait_counts_packet_info =
kgd_gfx_v9_build_grace_period_packet_info, kgd_gfx_v9_build_dequeue_wait_counts_packet_info,
.get_iq_wait_times = kgd_gfx_v9_get_iq_wait_times, .get_iq_wait_times = kgd_gfx_v9_get_iq_wait_times,
.enable_debug_trap = kgd_aldebaran_enable_debug_trap, .enable_debug_trap = kgd_aldebaran_enable_debug_trap,
.disable_debug_trap = kgd_gfx_v9_4_3_disable_debug_trap, .disable_debug_trap = kgd_gfx_v9_4_3_disable_debug_trap,
......
...@@ -1021,25 +1021,25 @@ void kgd_gfx_v10_get_iq_wait_times(struct amdgpu_device *adev, ...@@ -1021,25 +1021,25 @@ void kgd_gfx_v10_get_iq_wait_times(struct amdgpu_device *adev,
*wait_times = RREG32(SOC15_REG_OFFSET(GC, 0, mmCP_IQ_WAIT_TIME2)); *wait_times = RREG32(SOC15_REG_OFFSET(GC, 0, mmCP_IQ_WAIT_TIME2));
} }
void kgd_gfx_v10_build_grace_period_packet_info(struct amdgpu_device *adev, void kgd_gfx_v10_build_dequeue_wait_counts_packet_info(struct amdgpu_device *adev,
uint32_t wait_times, uint32_t wait_times,
uint32_t grace_period, uint32_t sch_wave,
uint32_t que_sleep,
uint32_t *reg_offset, uint32_t *reg_offset,
uint32_t *reg_data) uint32_t *reg_data)
{ {
*reg_data = wait_times; *reg_data = wait_times;
/* if (sch_wave)
* The CP cannont handle a 0 grace period input and will result in *reg_data = REG_SET_FIELD(*reg_data,
* an infinite grace period being set so set to 1 to prevent this. CP_IQ_WAIT_TIME2,
*/ SCH_WAVE,
if (grace_period == 0) sch_wave);
grace_period = 1; if (que_sleep)
*reg_data = REG_SET_FIELD(*reg_data,
*reg_data = REG_SET_FIELD(*reg_data, CP_IQ_WAIT_TIME2,
CP_IQ_WAIT_TIME2, QUE_SLEEP,
SCH_WAVE, que_sleep);
grace_period);
*reg_offset = SOC15_REG_OFFSET(GC, 0, mmCP_IQ_WAIT_TIME2); *reg_offset = SOC15_REG_OFFSET(GC, 0, mmCP_IQ_WAIT_TIME2);
} }
...@@ -1115,7 +1115,7 @@ const struct kfd2kgd_calls gfx_v10_kfd2kgd = { ...@@ -1115,7 +1115,7 @@ const struct kfd2kgd_calls gfx_v10_kfd2kgd = {
.set_address_watch = kgd_gfx_v10_set_address_watch, .set_address_watch = kgd_gfx_v10_set_address_watch,
.clear_address_watch = kgd_gfx_v10_clear_address_watch, .clear_address_watch = kgd_gfx_v10_clear_address_watch,
.get_iq_wait_times = kgd_gfx_v10_get_iq_wait_times, .get_iq_wait_times = kgd_gfx_v10_get_iq_wait_times,
.build_grace_period_packet_info = kgd_gfx_v10_build_grace_period_packet_info, .build_dequeue_wait_counts_packet_info = kgd_gfx_v10_build_dequeue_wait_counts_packet_info,
.program_trap_handler_settings = program_trap_handler_settings, .program_trap_handler_settings = program_trap_handler_settings,
.hqd_get_pq_addr = kgd_gfx_v10_hqd_get_pq_addr, .hqd_get_pq_addr = kgd_gfx_v10_hqd_get_pq_addr,
.hqd_reset = kgd_gfx_v10_hqd_reset, .hqd_reset = kgd_gfx_v10_hqd_reset,
......
...@@ -51,9 +51,10 @@ uint32_t kgd_gfx_v10_clear_address_watch(struct amdgpu_device *adev, ...@@ -51,9 +51,10 @@ uint32_t kgd_gfx_v10_clear_address_watch(struct amdgpu_device *adev,
void kgd_gfx_v10_get_iq_wait_times(struct amdgpu_device *adev, void kgd_gfx_v10_get_iq_wait_times(struct amdgpu_device *adev,
uint32_t *wait_times, uint32_t *wait_times,
uint32_t inst); uint32_t inst);
void kgd_gfx_v10_build_grace_period_packet_info(struct amdgpu_device *adev, void kgd_gfx_v10_build_dequeue_wait_counts_packet_info(struct amdgpu_device *adev,
uint32_t wait_times, uint32_t wait_times,
uint32_t grace_period, uint32_t sch_wave,
uint32_t que_sleep,
uint32_t *reg_offset, uint32_t *reg_offset,
uint32_t *reg_data); uint32_t *reg_data);
uint64_t kgd_gfx_v10_hqd_get_pq_addr(struct amdgpu_device *adev, uint64_t kgd_gfx_v10_hqd_get_pq_addr(struct amdgpu_device *adev,
......
...@@ -673,7 +673,7 @@ const struct kfd2kgd_calls gfx_v10_3_kfd2kgd = { ...@@ -673,7 +673,7 @@ const struct kfd2kgd_calls gfx_v10_3_kfd2kgd = {
.set_vm_context_page_table_base = set_vm_context_page_table_base_v10_3, .set_vm_context_page_table_base = set_vm_context_page_table_base_v10_3,
.program_trap_handler_settings = program_trap_handler_settings_v10_3, .program_trap_handler_settings = program_trap_handler_settings_v10_3,
.get_iq_wait_times = kgd_gfx_v10_get_iq_wait_times, .get_iq_wait_times = kgd_gfx_v10_get_iq_wait_times,
.build_grace_period_packet_info = kgd_gfx_v10_build_grace_period_packet_info, .build_dequeue_wait_counts_packet_info = kgd_gfx_v10_build_dequeue_wait_counts_packet_info,
.enable_debug_trap = kgd_gfx_v10_enable_debug_trap, .enable_debug_trap = kgd_gfx_v10_enable_debug_trap,
.disable_debug_trap = kgd_gfx_v10_disable_debug_trap, .disable_debug_trap = kgd_gfx_v10_disable_debug_trap,
.validate_trap_override_request = kgd_gfx_v10_validate_trap_override_request, .validate_trap_override_request = kgd_gfx_v10_validate_trap_override_request,
......
...@@ -1077,25 +1077,25 @@ void kgd_gfx_v9_get_cu_occupancy(struct amdgpu_device *adev, ...@@ -1077,25 +1077,25 @@ void kgd_gfx_v9_get_cu_occupancy(struct amdgpu_device *adev,
adev->gfx.cu_info.max_waves_per_simd; adev->gfx.cu_info.max_waves_per_simd;
} }
void kgd_gfx_v9_build_grace_period_packet_info(struct amdgpu_device *adev, void kgd_gfx_v9_build_dequeue_wait_counts_packet_info(struct amdgpu_device *adev,
uint32_t wait_times, uint32_t wait_times,
uint32_t grace_period, uint32_t sch_wave,
uint32_t que_sleep,
uint32_t *reg_offset, uint32_t *reg_offset,
uint32_t *reg_data) uint32_t *reg_data)
{ {
*reg_data = wait_times; *reg_data = wait_times;
/* if (sch_wave)
* The CP cannot handle a 0 grace period input and will result in *reg_data = REG_SET_FIELD(*reg_data,
* an infinite grace period being set so set to 1 to prevent this. CP_IQ_WAIT_TIME2,
*/ SCH_WAVE,
if (grace_period == 0) sch_wave);
grace_period = 1; if (que_sleep)
*reg_data = REG_SET_FIELD(*reg_data,
*reg_data = REG_SET_FIELD(*reg_data, CP_IQ_WAIT_TIME2,
CP_IQ_WAIT_TIME2, QUE_SLEEP,
SCH_WAVE, que_sleep);
grace_period);
*reg_offset = SOC15_REG_OFFSET(GC, 0, mmCP_IQ_WAIT_TIME2); *reg_offset = SOC15_REG_OFFSET(GC, 0, mmCP_IQ_WAIT_TIME2);
} }
...@@ -1255,7 +1255,7 @@ const struct kfd2kgd_calls gfx_v9_kfd2kgd = { ...@@ -1255,7 +1255,7 @@ const struct kfd2kgd_calls gfx_v9_kfd2kgd = {
.set_address_watch = kgd_gfx_v9_set_address_watch, .set_address_watch = kgd_gfx_v9_set_address_watch,
.clear_address_watch = kgd_gfx_v9_clear_address_watch, .clear_address_watch = kgd_gfx_v9_clear_address_watch,
.get_iq_wait_times = kgd_gfx_v9_get_iq_wait_times, .get_iq_wait_times = kgd_gfx_v9_get_iq_wait_times,
.build_grace_period_packet_info = kgd_gfx_v9_build_grace_period_packet_info, .build_dequeue_wait_counts_packet_info = kgd_gfx_v9_build_dequeue_wait_counts_packet_info,
.get_cu_occupancy = kgd_gfx_v9_get_cu_occupancy, .get_cu_occupancy = kgd_gfx_v9_get_cu_occupancy,
.program_trap_handler_settings = kgd_gfx_v9_program_trap_handler_settings, .program_trap_handler_settings = kgd_gfx_v9_program_trap_handler_settings,
.hqd_get_pq_addr = kgd_gfx_v9_hqd_get_pq_addr, .hqd_get_pq_addr = kgd_gfx_v9_hqd_get_pq_addr,
......
...@@ -97,9 +97,10 @@ uint32_t kgd_gfx_v9_clear_address_watch(struct amdgpu_device *adev, ...@@ -97,9 +97,10 @@ uint32_t kgd_gfx_v9_clear_address_watch(struct amdgpu_device *adev,
void kgd_gfx_v9_get_iq_wait_times(struct amdgpu_device *adev, void kgd_gfx_v9_get_iq_wait_times(struct amdgpu_device *adev,
uint32_t *wait_times, uint32_t *wait_times,
uint32_t inst); uint32_t inst);
void kgd_gfx_v9_build_grace_period_packet_info(struct amdgpu_device *adev, void kgd_gfx_v9_build_dequeue_wait_counts_packet_info(struct amdgpu_device *adev,
uint32_t wait_times, uint32_t wait_times,
uint32_t grace_period, uint32_t sch_wave,
uint32_t que_sleep,
uint32_t *reg_offset, uint32_t *reg_offset,
uint32_t *reg_data); uint32_t *reg_data);
uint64_t kgd_gfx_v9_hqd_get_pq_addr(struct amdgpu_device *adev, uint64_t kgd_gfx_v9_hqd_get_pq_addr(struct amdgpu_device *adev,
......
...@@ -491,7 +491,7 @@ static int vm_update_pds(struct amdgpu_vm *vm, struct amdgpu_sync *sync) ...@@ -491,7 +491,7 @@ static int vm_update_pds(struct amdgpu_vm *vm, struct amdgpu_sync *sync)
if (ret) if (ret)
return ret; return ret;
return amdgpu_sync_fence(sync, vm->last_update); return amdgpu_sync_fence(sync, vm->last_update, GFP_KERNEL);
} }
static uint64_t get_pte_flags(struct amdgpu_device *adev, struct kgd_mem *mem) static uint64_t get_pte_flags(struct amdgpu_device *adev, struct kgd_mem *mem)
...@@ -1249,7 +1249,7 @@ static int unmap_bo_from_gpuvm(struct kgd_mem *mem, ...@@ -1249,7 +1249,7 @@ static int unmap_bo_from_gpuvm(struct kgd_mem *mem,
(void)amdgpu_vm_clear_freed(adev, vm, &bo_va->last_pt_update); (void)amdgpu_vm_clear_freed(adev, vm, &bo_va->last_pt_update);
(void)amdgpu_sync_fence(sync, bo_va->last_pt_update); (void)amdgpu_sync_fence(sync, bo_va->last_pt_update, GFP_KERNEL);
return 0; return 0;
} }
...@@ -1273,7 +1273,7 @@ static int update_gpuvm_pte(struct kgd_mem *mem, ...@@ -1273,7 +1273,7 @@ static int update_gpuvm_pte(struct kgd_mem *mem,
return ret; return ret;
} }
return amdgpu_sync_fence(sync, bo_va->last_pt_update); return amdgpu_sync_fence(sync, bo_va->last_pt_update, GFP_KERNEL);
} }
static int map_bo_to_gpuvm(struct kgd_mem *mem, static int map_bo_to_gpuvm(struct kgd_mem *mem,
...@@ -2913,7 +2913,7 @@ int amdgpu_amdkfd_gpuvm_restore_process_bos(void *info, struct dma_fence __rcu * ...@@ -2913,7 +2913,7 @@ int amdgpu_amdkfd_gpuvm_restore_process_bos(void *info, struct dma_fence __rcu *
} }
dma_resv_for_each_fence(&cursor, bo->tbo.base.resv, dma_resv_for_each_fence(&cursor, bo->tbo.base.resv,
DMA_RESV_USAGE_KERNEL, fence) { DMA_RESV_USAGE_KERNEL, fence) {
ret = amdgpu_sync_fence(&sync_obj, fence); ret = amdgpu_sync_fence(&sync_obj, fence, GFP_KERNEL);
if (ret) { if (ret) {
pr_debug("Memory eviction: Sync BO fence failed. Try again\n"); pr_debug("Memory eviction: Sync BO fence failed. Try again\n");
goto validate_map_fail; goto validate_map_fail;
......
...@@ -455,10 +455,10 @@ static u32 amdgpu_cper_ring_get_ent_sz(struct amdgpu_ring *ring, u64 pos) ...@@ -455,10 +455,10 @@ static u32 amdgpu_cper_ring_get_ent_sz(struct amdgpu_ring *ring, u64 pos)
return umin(rec_len, chunk); return umin(rec_len, chunk);
} }
void amdgpu_cper_ring_write(struct amdgpu_ring *ring, void amdgpu_cper_ring_write(struct amdgpu_ring *ring, void *src, int count)
void *src, int count)
{ {
u64 pos, wptr_old, rptr = *ring->rptr_cpu_addr & ring->ptr_mask; u64 pos, wptr_old, rptr = *ring->rptr_cpu_addr & ring->ptr_mask;
int rec_cnt_dw = count >> 2;
u32 chunk, ent_sz; u32 chunk, ent_sz;
u8 *s = (u8 *)src; u8 *s = (u8 *)src;
...@@ -485,6 +485,9 @@ void amdgpu_cper_ring_write(struct amdgpu_ring *ring, ...@@ -485,6 +485,9 @@ void amdgpu_cper_ring_write(struct amdgpu_ring *ring,
s += chunk; s += chunk;
} }
if (ring->count_dw < rec_cnt_dw)
ring->count_dw = 0;
/* the buffer is overflow, adjust rptr */ /* the buffer is overflow, adjust rptr */
if (((wptr_old < rptr) && (rptr <= ring->wptr)) || if (((wptr_old < rptr) && (rptr <= ring->wptr)) ||
((ring->wptr < wptr_old) && (wptr_old < rptr)) || ((ring->wptr < wptr_old) && (wptr_old < rptr)) ||
...@@ -501,12 +504,10 @@ void amdgpu_cper_ring_write(struct amdgpu_ring *ring, ...@@ -501,12 +504,10 @@ void amdgpu_cper_ring_write(struct amdgpu_ring *ring,
pos = rptr; pos = rptr;
} while (!amdgpu_cper_is_hdr(ring, rptr)); } while (!amdgpu_cper_is_hdr(ring, rptr));
} }
mutex_unlock(&ring->adev->cper.ring_lock);
if (ring->count_dw >= (count >> 2)) if (ring->count_dw >= rec_cnt_dw)
ring->count_dw -= (count >> 2); ring->count_dw -= rec_cnt_dw;
else mutex_unlock(&ring->adev->cper.ring_lock);
ring->count_dw = 0;
} }
static u64 amdgpu_cper_ring_get_rptr(struct amdgpu_ring *ring) static u64 amdgpu_cper_ring_get_rptr(struct amdgpu_ring *ring)
......
...@@ -428,7 +428,7 @@ static int amdgpu_cs_p2_dependencies(struct amdgpu_cs_parser *p, ...@@ -428,7 +428,7 @@ static int amdgpu_cs_p2_dependencies(struct amdgpu_cs_parser *p,
dma_fence_put(old); dma_fence_put(old);
} }
r = amdgpu_sync_fence(&p->sync, fence); r = amdgpu_sync_fence(&p->sync, fence, GFP_KERNEL);
dma_fence_put(fence); dma_fence_put(fence);
if (r) if (r)
return r; return r;
...@@ -450,7 +450,7 @@ static int amdgpu_syncobj_lookup_and_add(struct amdgpu_cs_parser *p, ...@@ -450,7 +450,7 @@ static int amdgpu_syncobj_lookup_and_add(struct amdgpu_cs_parser *p,
return r; return r;
} }
r = amdgpu_sync_fence(&p->sync, fence); r = amdgpu_sync_fence(&p->sync, fence, GFP_KERNEL);
dma_fence_put(fence); dma_fence_put(fence);
return r; return r;
} }
...@@ -1111,7 +1111,7 @@ static int amdgpu_cs_vm_handling(struct amdgpu_cs_parser *p) ...@@ -1111,7 +1111,7 @@ static int amdgpu_cs_vm_handling(struct amdgpu_cs_parser *p)
struct drm_gpu_scheduler *sched = entity->rq->sched; struct drm_gpu_scheduler *sched = entity->rq->sched;
struct amdgpu_ring *ring = to_amdgpu_ring(sched); struct amdgpu_ring *ring = to_amdgpu_ring(sched);
if (amdgpu_vmid_uses_reserved(adev, vm, ring->vm_hub)) if (amdgpu_vmid_uses_reserved(vm, ring->vm_hub))
return -EINVAL; return -EINVAL;
} }
} }
...@@ -1124,7 +1124,8 @@ static int amdgpu_cs_vm_handling(struct amdgpu_cs_parser *p) ...@@ -1124,7 +1124,8 @@ static int amdgpu_cs_vm_handling(struct amdgpu_cs_parser *p)
if (r) if (r)
return r; return r;
r = amdgpu_sync_fence(&p->sync, fpriv->prt_va->last_pt_update); r = amdgpu_sync_fence(&p->sync, fpriv->prt_va->last_pt_update,
GFP_KERNEL);
if (r) if (r)
return r; return r;
...@@ -1135,7 +1136,8 @@ static int amdgpu_cs_vm_handling(struct amdgpu_cs_parser *p) ...@@ -1135,7 +1136,8 @@ static int amdgpu_cs_vm_handling(struct amdgpu_cs_parser *p)
if (r) if (r)
return r; return r;
r = amdgpu_sync_fence(&p->sync, bo_va->last_pt_update); r = amdgpu_sync_fence(&p->sync, bo_va->last_pt_update,
GFP_KERNEL);
if (r) if (r)
return r; return r;
} }
...@@ -1154,7 +1156,8 @@ static int amdgpu_cs_vm_handling(struct amdgpu_cs_parser *p) ...@@ -1154,7 +1156,8 @@ static int amdgpu_cs_vm_handling(struct amdgpu_cs_parser *p)
if (r) if (r)
return r; return r;
r = amdgpu_sync_fence(&p->sync, bo_va->last_pt_update); r = amdgpu_sync_fence(&p->sync, bo_va->last_pt_update,
GFP_KERNEL);
if (r) if (r)
return r; return r;
} }
...@@ -1167,7 +1170,7 @@ static int amdgpu_cs_vm_handling(struct amdgpu_cs_parser *p) ...@@ -1167,7 +1170,7 @@ static int amdgpu_cs_vm_handling(struct amdgpu_cs_parser *p)
if (r) if (r)
return r; return r;
r = amdgpu_sync_fence(&p->sync, vm->last_update); r = amdgpu_sync_fence(&p->sync, vm->last_update, GFP_KERNEL);
if (r) if (r)
return r; return r;
...@@ -1248,7 +1251,8 @@ static int amdgpu_cs_sync_rings(struct amdgpu_cs_parser *p) ...@@ -1248,7 +1251,8 @@ static int amdgpu_cs_sync_rings(struct amdgpu_cs_parser *p)
continue; continue;
} }
r = amdgpu_sync_fence(&p->gang_leader->explicit_sync, fence); r = amdgpu_sync_fence(&p->gang_leader->explicit_sync, fence,
GFP_KERNEL);
dma_fence_put(fence); dma_fence_put(fence);
if (r) if (r)
return r; return r;
......
...@@ -1990,7 +1990,7 @@ static int amdgpu_debugfs_sclk_set(void *data, u64 val) ...@@ -1990,7 +1990,7 @@ static int amdgpu_debugfs_sclk_set(void *data, u64 val)
uint32_t max_freq, min_freq; uint32_t max_freq, min_freq;
struct amdgpu_device *adev = (struct amdgpu_device *)data; struct amdgpu_device *adev = (struct amdgpu_device *)data;
if (amdgpu_sriov_vf(adev) && !amdgpu_sriov_is_pp_one_vf(adev)) if (amdgpu_sriov_multi_vf_mode(adev))
return -EINVAL; return -EINVAL;
ret = pm_runtime_get_sync(adev_to_drm(adev)->dev); ret = pm_runtime_get_sync(adev_to_drm(adev)->dev);
......
...@@ -227,6 +227,24 @@ static ssize_t amdgpu_device_get_pcie_replay_count(struct device *dev, ...@@ -227,6 +227,24 @@ static ssize_t amdgpu_device_get_pcie_replay_count(struct device *dev,
static DEVICE_ATTR(pcie_replay_count, 0444, static DEVICE_ATTR(pcie_replay_count, 0444,
amdgpu_device_get_pcie_replay_count, NULL); amdgpu_device_get_pcie_replay_count, NULL);
static int amdgpu_device_attr_sysfs_init(struct amdgpu_device *adev)
{
int ret = 0;
if (!amdgpu_sriov_vf(adev))
ret = sysfs_create_file(&adev->dev->kobj,
&dev_attr_pcie_replay_count.attr);
return ret;
}
static void amdgpu_device_attr_sysfs_fini(struct amdgpu_device *adev)
{
if (!amdgpu_sriov_vf(adev))
sysfs_remove_file(&adev->dev->kobj,
&dev_attr_pcie_replay_count.attr);
}
static ssize_t amdgpu_sysfs_reg_state_get(struct file *f, struct kobject *kobj, static ssize_t amdgpu_sysfs_reg_state_get(struct file *f, struct kobject *kobj,
struct bin_attribute *attr, char *buf, struct bin_attribute *attr, char *buf,
loff_t ppos, size_t count) loff_t ppos, size_t count)
...@@ -2757,6 +2775,9 @@ static int amdgpu_device_ip_early_init(struct amdgpu_device *adev) ...@@ -2757,6 +2775,9 @@ static int amdgpu_device_ip_early_init(struct amdgpu_device *adev)
if (!total) if (!total)
return -ENODEV; return -ENODEV;
if (adev->gmc.xgmi.supported)
amdgpu_xgmi_early_init(adev);
ip_block = amdgpu_device_ip_get_ip_block(adev, AMD_IP_BLOCK_TYPE_GFX); ip_block = amdgpu_device_ip_get_ip_block(adev, AMD_IP_BLOCK_TYPE_GFX);
if (ip_block->status.valid != false) if (ip_block->status.valid != false)
amdgpu_amdkfd_device_probe(adev); amdgpu_amdkfd_device_probe(adev);
...@@ -4169,11 +4190,6 @@ static bool amdgpu_device_check_iommu_remap(struct amdgpu_device *adev) ...@@ -4169,11 +4190,6 @@ static bool amdgpu_device_check_iommu_remap(struct amdgpu_device *adev)
} }
#endif #endif
static const struct attribute *amdgpu_dev_attributes[] = {
&dev_attr_pcie_replay_count.attr,
NULL
};
static void amdgpu_device_set_mcbp(struct amdgpu_device *adev) static void amdgpu_device_set_mcbp(struct amdgpu_device *adev)
{ {
if (amdgpu_mcbp == 1) if (amdgpu_mcbp == 1)
...@@ -4278,7 +4294,14 @@ int amdgpu_device_init(struct amdgpu_device *adev, ...@@ -4278,7 +4294,14 @@ int amdgpu_device_init(struct amdgpu_device *adev,
mutex_init(&adev->gfx.reset_sem_mutex); mutex_init(&adev->gfx.reset_sem_mutex);
/* Initialize the mutex for cleaner shader isolation between GFX and compute processes */ /* Initialize the mutex for cleaner shader isolation between GFX and compute processes */
mutex_init(&adev->enforce_isolation_mutex); mutex_init(&adev->enforce_isolation_mutex);
for (i = 0; i < MAX_XCP; ++i) {
adev->isolation[i].spearhead = dma_fence_get_stub();
amdgpu_sync_create(&adev->isolation[i].active);
amdgpu_sync_create(&adev->isolation[i].prev);
}
mutex_init(&adev->gfx.kfd_sch_mutex); mutex_init(&adev->gfx.kfd_sch_mutex);
mutex_init(&adev->gfx.workload_profile_mutex);
mutex_init(&adev->vcn.workload_profile_mutex);
amdgpu_device_init_apu_flags(adev); amdgpu_device_init_apu_flags(adev);
...@@ -4396,10 +4419,17 @@ int amdgpu_device_init(struct amdgpu_device *adev, ...@@ -4396,10 +4419,17 @@ int amdgpu_device_init(struct amdgpu_device *adev,
if (r) if (r)
return r; return r;
/* Get rid of things like offb */ /*
r = aperture_remove_conflicting_pci_devices(adev->pdev, amdgpu_kms_driver.name); * No need to remove conflicting FBs for non-display class devices.
if (r) * This prevents the sysfb from being freed accidently.
return r; */
if ((pdev->class >> 8) == PCI_CLASS_DISPLAY_VGA ||
(pdev->class >> 8) == PCI_CLASS_DISPLAY_OTHER) {
/* Get rid of things like offb */
r = aperture_remove_conflicting_pci_devices(adev->pdev, amdgpu_kms_driver.name);
if (r)
return r;
}
/* Enable TMZ based on IP_VERSION */ /* Enable TMZ based on IP_VERSION */
amdgpu_gmc_tmz_set(adev); amdgpu_gmc_tmz_set(adev);
...@@ -4610,7 +4640,7 @@ int amdgpu_device_init(struct amdgpu_device *adev, ...@@ -4610,7 +4640,7 @@ int amdgpu_device_init(struct amdgpu_device *adev,
} else } else
adev->ucode_sysfs_en = true; adev->ucode_sysfs_en = true;
r = sysfs_create_files(&adev->dev->kobj, amdgpu_dev_attributes); r = amdgpu_device_attr_sysfs_init(adev);
if (r) if (r)
dev_err(adev->dev, "Could not create amdgpu device attr\n"); dev_err(adev->dev, "Could not create amdgpu device attr\n");
...@@ -4747,7 +4777,7 @@ void amdgpu_device_fini_hw(struct amdgpu_device *adev) ...@@ -4747,7 +4777,7 @@ void amdgpu_device_fini_hw(struct amdgpu_device *adev)
amdgpu_pm_sysfs_fini(adev); amdgpu_pm_sysfs_fini(adev);
if (adev->ucode_sysfs_en) if (adev->ucode_sysfs_en)
amdgpu_ucode_sysfs_fini(adev); amdgpu_ucode_sysfs_fini(adev);
sysfs_remove_files(&adev->dev->kobj, amdgpu_dev_attributes); amdgpu_device_attr_sysfs_fini(adev);
amdgpu_fru_sysfs_fini(adev); amdgpu_fru_sysfs_fini(adev);
amdgpu_reg_state_sysfs_fini(adev); amdgpu_reg_state_sysfs_fini(adev);
...@@ -4774,7 +4804,7 @@ void amdgpu_device_fini_hw(struct amdgpu_device *adev) ...@@ -4774,7 +4804,7 @@ void amdgpu_device_fini_hw(struct amdgpu_device *adev)
void amdgpu_device_fini_sw(struct amdgpu_device *adev) void amdgpu_device_fini_sw(struct amdgpu_device *adev)
{ {
int idx; int i, idx;
bool px; bool px;
amdgpu_device_ip_fini(adev); amdgpu_device_ip_fini(adev);
...@@ -4782,6 +4812,11 @@ void amdgpu_device_fini_sw(struct amdgpu_device *adev) ...@@ -4782,6 +4812,11 @@ void amdgpu_device_fini_sw(struct amdgpu_device *adev)
amdgpu_ucode_release(&adev->firmware.gpu_info_fw); amdgpu_ucode_release(&adev->firmware.gpu_info_fw);
adev->accel_working = false; adev->accel_working = false;
dma_fence_put(rcu_dereference_protected(adev->gang_submit, true)); dma_fence_put(rcu_dereference_protected(adev->gang_submit, true));
for (i = 0; i < MAX_XCP; ++i) {
dma_fence_put(adev->isolation[i].spearhead);
amdgpu_sync_free(&adev->isolation[i].active);
amdgpu_sync_free(&adev->isolation[i].prev);
}
amdgpu_reset_fini(adev); amdgpu_reset_fini(adev);
...@@ -4797,6 +4832,9 @@ void amdgpu_device_fini_sw(struct amdgpu_device *adev) ...@@ -4797,6 +4832,9 @@ void amdgpu_device_fini_sw(struct amdgpu_device *adev)
kfree(adev->fru_info); kfree(adev->fru_info);
adev->fru_info = NULL; adev->fru_info = NULL;
kfree(adev->xcp_mgr);
adev->xcp_mgr = NULL;
px = amdgpu_device_supports_px(adev_to_drm(adev)); px = amdgpu_device_supports_px(adev_to_drm(adev));
if (px || (!dev_is_removable(&adev->pdev->dev) && if (px || (!dev_is_removable(&adev->pdev->dev) &&
...@@ -5328,6 +5366,7 @@ static int amdgpu_device_reset_sriov(struct amdgpu_device *adev, ...@@ -5328,6 +5366,7 @@ static int amdgpu_device_reset_sriov(struct amdgpu_device *adev,
if (amdgpu_ip_version(adev, GC_HWIP, 0) == IP_VERSION(9, 4, 2) || if (amdgpu_ip_version(adev, GC_HWIP, 0) == IP_VERSION(9, 4, 2) ||
amdgpu_ip_version(adev, GC_HWIP, 0) == IP_VERSION(9, 4, 3) || amdgpu_ip_version(adev, GC_HWIP, 0) == IP_VERSION(9, 4, 3) ||
amdgpu_ip_version(adev, GC_HWIP, 0) == IP_VERSION(9, 4, 4) || amdgpu_ip_version(adev, GC_HWIP, 0) == IP_VERSION(9, 4, 4) ||
amdgpu_ip_version(adev, GC_HWIP, 0) == IP_VERSION(9, 5, 0) ||
amdgpu_ip_version(adev, GC_HWIP, 0) == IP_VERSION(11, 0, 3)) amdgpu_ip_version(adev, GC_HWIP, 0) == IP_VERSION(11, 0, 3))
amdgpu_ras_resume(adev); amdgpu_ras_resume(adev);
...@@ -6900,22 +6939,117 @@ struct dma_fence *amdgpu_device_switch_gang(struct amdgpu_device *adev, ...@@ -6900,22 +6939,117 @@ struct dma_fence *amdgpu_device_switch_gang(struct amdgpu_device *adev,
{ {
struct dma_fence *old = NULL; struct dma_fence *old = NULL;
dma_fence_get(gang);
do { do {
dma_fence_put(old); dma_fence_put(old);
old = amdgpu_device_get_gang(adev); old = amdgpu_device_get_gang(adev);
if (old == gang) if (old == gang)
break; break;
if (!dma_fence_is_signaled(old)) if (!dma_fence_is_signaled(old)) {
dma_fence_put(gang);
return old; return old;
}
} while (cmpxchg((struct dma_fence __force **)&adev->gang_submit, } while (cmpxchg((struct dma_fence __force **)&adev->gang_submit,
old, gang) != old); old, gang) != old);
/*
* Drop it once for the exchanged reference in adev and once for the
* thread local reference acquired in amdgpu_device_get_gang().
*/
dma_fence_put(old);
dma_fence_put(old); dma_fence_put(old);
return NULL; return NULL;
} }
/**
* amdgpu_device_enforce_isolation - enforce HW isolation
* @adev: the amdgpu device pointer
* @ring: the HW ring the job is supposed to run on
* @job: the job which is about to be pushed to the HW ring
*
* Makes sure that only one client at a time can use the GFX block.
* Returns: The dependency to wait on before the job can be pushed to the HW.
* The function is called multiple times until NULL is returned.
*/
struct dma_fence *amdgpu_device_enforce_isolation(struct amdgpu_device *adev,
struct amdgpu_ring *ring,
struct amdgpu_job *job)
{
struct amdgpu_isolation *isolation = &adev->isolation[ring->xcp_id];
struct drm_sched_fence *f = job->base.s_fence;
struct dma_fence *dep;
void *owner;
int r;
/*
* For now enforce isolation only for the GFX block since we only need
* the cleaner shader on those rings.
*/
if (ring->funcs->type != AMDGPU_RING_TYPE_GFX &&
ring->funcs->type != AMDGPU_RING_TYPE_COMPUTE)
return NULL;
/*
* All submissions where enforce isolation is false are handled as if
* they come from a single client. Use ~0l as the owner to distinct it
* from kernel submissions where the owner is NULL.
*/
owner = job->enforce_isolation ? f->owner : (void *)~0l;
mutex_lock(&adev->enforce_isolation_mutex);
/*
* The "spearhead" submission is the first one which changes the
* ownership to its client. We always need to wait for it to be
* pushed to the HW before proceeding with anything.
*/
if (&f->scheduled != isolation->spearhead &&
!dma_fence_is_signaled(isolation->spearhead)) {
dep = isolation->spearhead;
goto out_grab_ref;
}
if (isolation->owner != owner) {
/*
* Wait for any gang to be assembled before switching to a
* different owner or otherwise we could deadlock the
* submissions.
*/
if (!job->gang_submit) {
dep = amdgpu_device_get_gang(adev);
if (!dma_fence_is_signaled(dep))
goto out_return_dep;
dma_fence_put(dep);
}
dma_fence_put(isolation->spearhead);
isolation->spearhead = dma_fence_get(&f->scheduled);
amdgpu_sync_move(&isolation->active, &isolation->prev);
trace_amdgpu_isolation(isolation->owner, owner);
isolation->owner = owner;
}
/*
* Specifying the ring here helps to pipeline submissions even when
* isolation is enabled. If that is not desired for testing NULL can be
* used instead of the ring to enforce a CPU round trip while switching
* between clients.
*/
dep = amdgpu_sync_peek_fence(&isolation->prev, ring);
r = amdgpu_sync_fence(&isolation->active, &f->finished, GFP_NOWAIT);
if (r)
DRM_WARN("OOM tracking isolation\n");
out_grab_ref:
dma_fence_get(dep);
out_return_dep:
mutex_unlock(&adev->enforce_isolation_mutex);
return dep;
}
bool amdgpu_device_has_display_hardware(struct amdgpu_device *adev) bool amdgpu_device_has_display_hardware(struct amdgpu_device *adev)
{ {
switch (adev->asic_type) { switch (adev->asic_type) {
......
...@@ -113,8 +113,13 @@ ...@@ -113,8 +113,13 @@
#include "amdgpu_isp.h" #include "amdgpu_isp.h"
#endif #endif
#define FIRMWARE_IP_DISCOVERY "amdgpu/ip_discovery.bin" MODULE_FIRMWARE("amdgpu/ip_discovery.bin");
MODULE_FIRMWARE(FIRMWARE_IP_DISCOVERY); MODULE_FIRMWARE("amdgpu/vega10_ip_discovery.bin");
MODULE_FIRMWARE("amdgpu/vega12_ip_discovery.bin");
MODULE_FIRMWARE("amdgpu/vega20_ip_discovery.bin");
MODULE_FIRMWARE("amdgpu/raven_ip_discovery.bin");
MODULE_FIRMWARE("amdgpu/raven2_ip_discovery.bin");
MODULE_FIRMWARE("amdgpu/picasso_ip_discovery.bin");
#define mmIP_DISCOVERY_VERSION 0x16A00 #define mmIP_DISCOVERY_VERSION 0x16A00
#define mmRCC_CONFIG_MEMSIZE 0xde3 #define mmRCC_CONFIG_MEMSIZE 0xde3
...@@ -297,21 +302,13 @@ static int amdgpu_discovery_read_binary_from_mem(struct amdgpu_device *adev, ...@@ -297,21 +302,13 @@ static int amdgpu_discovery_read_binary_from_mem(struct amdgpu_device *adev,
return ret; return ret;
} }
static int amdgpu_discovery_read_binary_from_file(struct amdgpu_device *adev, uint8_t *binary) static int amdgpu_discovery_read_binary_from_file(struct amdgpu_device *adev,
uint8_t *binary,
const char *fw_name)
{ {
const struct firmware *fw; const struct firmware *fw;
const char *fw_name;
int r; int r;
switch (amdgpu_discovery) {
case 2:
fw_name = FIRMWARE_IP_DISCOVERY;
break;
default:
dev_warn(adev->dev, "amdgpu_discovery is not set properly\n");
return -EINVAL;
}
r = request_firmware(&fw, fw_name, adev->dev); r = request_firmware(&fw, fw_name, adev->dev);
if (r) { if (r) {
dev_err(adev->dev, "can't load firmware \"%s\"\n", dev_err(adev->dev, "can't load firmware \"%s\"\n",
...@@ -404,10 +401,39 @@ static int amdgpu_discovery_verify_npsinfo(struct amdgpu_device *adev, ...@@ -404,10 +401,39 @@ static int amdgpu_discovery_verify_npsinfo(struct amdgpu_device *adev,
return 0; return 0;
} }
static const char *amdgpu_discovery_get_fw_name(struct amdgpu_device *adev)
{
if (amdgpu_discovery == 2)
return "amdgpu/ip_discovery.bin";
switch (adev->asic_type) {
case CHIP_VEGA10:
return "amdgpu/vega10_ip_discovery.bin";
case CHIP_VEGA12:
return "amdgpu/vega12_ip_discovery.bin";
case CHIP_RAVEN:
if (adev->apu_flags & AMD_APU_IS_RAVEN2)
return "amdgpu/raven2_ip_discovery.bin";
else if (adev->apu_flags & AMD_APU_IS_PICASSO)
return "amdgpu/picasso_ip_discovery.bin";
else
return "amdgpu/raven_ip_discovery.bin";
case CHIP_VEGA20:
return "amdgpu/vega20_ip_discovery.bin";
case CHIP_ARCTURUS:
return "amdgpu/arcturus_ip_discovery.bin";
case CHIP_ALDEBARAN:
return "amdgpu/aldebaran_ip_discovery.bin";
default:
return NULL;
}
}
static int amdgpu_discovery_init(struct amdgpu_device *adev) static int amdgpu_discovery_init(struct amdgpu_device *adev)
{ {
struct table_info *info; struct table_info *info;
struct binary_header *bhdr; struct binary_header *bhdr;
const char *fw_name;
uint16_t offset; uint16_t offset;
uint16_t size; uint16_t size;
uint16_t checksum; uint16_t checksum;
...@@ -419,9 +445,10 @@ static int amdgpu_discovery_init(struct amdgpu_device *adev) ...@@ -419,9 +445,10 @@ static int amdgpu_discovery_init(struct amdgpu_device *adev)
return -ENOMEM; return -ENOMEM;
/* Read from file if it is the preferred option */ /* Read from file if it is the preferred option */
if (amdgpu_discovery == 2) { fw_name = amdgpu_discovery_get_fw_name(adev);
if (fw_name != NULL) {
dev_info(adev->dev, "use ip discovery information from file"); dev_info(adev->dev, "use ip discovery information from file");
r = amdgpu_discovery_read_binary_from_file(adev, adev->mman.discovery_bin); r = amdgpu_discovery_read_binary_from_file(adev, adev->mman.discovery_bin, fw_name);
if (r) { if (r) {
dev_err(adev->dev, "failed to read ip discovery binary from file\n"); dev_err(adev->dev, "failed to read ip discovery binary from file\n");
...@@ -1290,6 +1317,7 @@ static int amdgpu_discovery_reg_base_init(struct amdgpu_device *adev) ...@@ -1290,6 +1317,7 @@ static int amdgpu_discovery_reg_base_init(struct amdgpu_device *adev)
uint16_t die_offset; uint16_t die_offset;
uint16_t ip_offset; uint16_t ip_offset;
uint16_t num_dies; uint16_t num_dies;
uint32_t wafl_ver;
uint16_t num_ips; uint16_t num_ips;
uint16_t hw_id; uint16_t hw_id;
uint8_t inst; uint8_t inst;
...@@ -1303,6 +1331,7 @@ static int amdgpu_discovery_reg_base_init(struct amdgpu_device *adev) ...@@ -1303,6 +1331,7 @@ static int amdgpu_discovery_reg_base_init(struct amdgpu_device *adev)
return r; return r;
} }
wafl_ver = 0;
adev->gfx.xcc_mask = 0; adev->gfx.xcc_mask = 0;
adev->sdma.sdma_mask = 0; adev->sdma.sdma_mask = 0;
adev->vcn.inst_mask = 0; adev->vcn.inst_mask = 0;
...@@ -1403,6 +1432,10 @@ static int amdgpu_discovery_reg_base_init(struct amdgpu_device *adev) ...@@ -1403,6 +1432,10 @@ static int amdgpu_discovery_reg_base_init(struct amdgpu_device *adev)
adev->gfx.xcc_mask |= adev->gfx.xcc_mask |=
(1U << ip->instance_number); (1U << ip->instance_number);
if (!wafl_ver && le16_to_cpu(ip->hw_id) == WAFLC_HWID)
wafl_ver = IP_VERSION_FULL(ip->major, ip->minor,
ip->revision, 0, 0);
for (k = 0; k < num_base_address; k++) { for (k = 0; k < num_base_address; k++) {
/* /*
* convert the endianness of base addresses in place, * convert the endianness of base addresses in place,
...@@ -1468,6 +1501,9 @@ static int amdgpu_discovery_reg_base_init(struct amdgpu_device *adev) ...@@ -1468,6 +1501,9 @@ static int amdgpu_discovery_reg_base_init(struct amdgpu_device *adev)
} }
} }
if (wafl_ver && !adev->ip_versions[XGMI_HWIP][0])
adev->ip_versions[XGMI_HWIP][0] = wafl_ver;
return 0; return 0;
} }
...@@ -2509,6 +2545,38 @@ int amdgpu_discovery_set_ip_blocks(struct amdgpu_device *adev) ...@@ -2509,6 +2545,38 @@ int amdgpu_discovery_set_ip_blocks(struct amdgpu_device *adev)
{ {
int r; int r;
switch (adev->asic_type) {
case CHIP_VEGA10:
case CHIP_VEGA12:
case CHIP_RAVEN:
case CHIP_VEGA20:
case CHIP_ARCTURUS:
case CHIP_ALDEBARAN:
/* this is not fatal. We have a fallback below
* if the new firmwares are not present. some of
* this will be overridden below to keep things
* consistent with the current behavior.
*/
r = amdgpu_discovery_reg_base_init(adev);
if (!r) {
amdgpu_discovery_harvest_ip(adev);
amdgpu_discovery_get_gfx_info(adev);
amdgpu_discovery_get_mall_info(adev);
amdgpu_discovery_get_vcn_info(adev);
}
break;
default:
r = amdgpu_discovery_reg_base_init(adev);
if (r)
return -EINVAL;
amdgpu_discovery_harvest_ip(adev);
amdgpu_discovery_get_gfx_info(adev);
amdgpu_discovery_get_mall_info(adev);
amdgpu_discovery_get_vcn_info(adev);
break;
}
switch (adev->asic_type) { switch (adev->asic_type) {
case CHIP_VEGA10: case CHIP_VEGA10:
vega10_reg_base_init(adev); vega10_reg_base_init(adev);
...@@ -2673,14 +2741,6 @@ int amdgpu_discovery_set_ip_blocks(struct amdgpu_device *adev) ...@@ -2673,14 +2741,6 @@ int amdgpu_discovery_set_ip_blocks(struct amdgpu_device *adev)
adev->ip_versions[XGMI_HWIP][0] = IP_VERSION(6, 1, 0); adev->ip_versions[XGMI_HWIP][0] = IP_VERSION(6, 1, 0);
break; break;
default: default:
r = amdgpu_discovery_reg_base_init(adev);
if (r)
return -EINVAL;
amdgpu_discovery_harvest_ip(adev);
amdgpu_discovery_get_gfx_info(adev);
amdgpu_discovery_get_mall_info(adev);
amdgpu_discovery_get_vcn_info(adev);
break; break;
} }
...@@ -2772,13 +2832,6 @@ int amdgpu_discovery_set_ip_blocks(struct amdgpu_device *adev) ...@@ -2772,13 +2832,6 @@ int amdgpu_discovery_set_ip_blocks(struct amdgpu_device *adev)
break; break;
} }
if (amdgpu_ip_version(adev, XGMI_HWIP, 0) == IP_VERSION(4, 8, 0))
adev->gmc.xgmi.supported = true;
if (amdgpu_ip_version(adev, GC_HWIP, 0) == IP_VERSION(9, 4, 3) ||
amdgpu_ip_version(adev, GC_HWIP, 0) == IP_VERSION(9, 4, 4))
adev->ip_versions[XGMI_HWIP][0] = IP_VERSION(6, 4, 0);
/* set NBIO version */ /* set NBIO version */
switch (amdgpu_ip_version(adev, NBIO_HWIP, 0)) { switch (amdgpu_ip_version(adev, NBIO_HWIP, 0)) {
case IP_VERSION(6, 1, 0): case IP_VERSION(6, 1, 0):
......
...@@ -122,9 +122,10 @@ ...@@ -122,9 +122,10 @@
* - 3.60.0 - Add AMDGPU_TILING_GFX12_DCC_WRITE_COMPRESS_DISABLE (Vulkan requirement) * - 3.60.0 - Add AMDGPU_TILING_GFX12_DCC_WRITE_COMPRESS_DISABLE (Vulkan requirement)
* - 3.61.0 - Contains fix for RV/PCO compute queues * - 3.61.0 - Contains fix for RV/PCO compute queues
* - 3.62.0 - Add AMDGPU_IDS_FLAGS_MODE_PF, AMDGPU_IDS_FLAGS_MODE_VF & AMDGPU_IDS_FLAGS_MODE_PT * - 3.62.0 - Add AMDGPU_IDS_FLAGS_MODE_PF, AMDGPU_IDS_FLAGS_MODE_VF & AMDGPU_IDS_FLAGS_MODE_PT
* - 3.63.0 - GFX12 display DCC supports 256B max compressed block size
*/ */
#define KMS_DRIVER_MAJOR 3 #define KMS_DRIVER_MAJOR 3
#define KMS_DRIVER_MINOR 62 #define KMS_DRIVER_MINOR 63
#define KMS_DRIVER_PATCHLEVEL 0 #define KMS_DRIVER_PATCHLEVEL 0
/* /*
...@@ -138,6 +139,7 @@ enum AMDGPU_DEBUG_MASK { ...@@ -138,6 +139,7 @@ enum AMDGPU_DEBUG_MASK {
AMDGPU_DEBUG_ENABLE_RAS_ACA = BIT(4), AMDGPU_DEBUG_ENABLE_RAS_ACA = BIT(4),
AMDGPU_DEBUG_ENABLE_EXP_RESETS = BIT(5), AMDGPU_DEBUG_ENABLE_EXP_RESETS = BIT(5),
AMDGPU_DEBUG_DISABLE_GPU_RING_RESET = BIT(6), AMDGPU_DEBUG_DISABLE_GPU_RING_RESET = BIT(6),
AMDGPU_DEBUG_SMU_POOL = BIT(7),
}; };
unsigned int amdgpu_vram_limit = UINT_MAX; unsigned int amdgpu_vram_limit = UINT_MAX;
...@@ -175,6 +177,7 @@ uint amdgpu_sdma_phase_quantum = 32; ...@@ -175,6 +177,7 @@ uint amdgpu_sdma_phase_quantum = 32;
char *amdgpu_disable_cu; char *amdgpu_disable_cu;
char *amdgpu_virtual_display; char *amdgpu_virtual_display;
bool enforce_isolation; bool enforce_isolation;
int amdgpu_modeset = -1;
/* Specifies the default granularity for SVM, used in buffer /* Specifies the default granularity for SVM, used in buffer
* migration and restoration of backing memory when handling * migration and restoration of backing memory when handling
...@@ -1036,6 +1039,13 @@ module_param_named(user_partt_mode, amdgpu_user_partt_mode, uint, 0444); ...@@ -1036,6 +1039,13 @@ module_param_named(user_partt_mode, amdgpu_user_partt_mode, uint, 0444);
module_param(enforce_isolation, bool, 0444); module_param(enforce_isolation, bool, 0444);
MODULE_PARM_DESC(enforce_isolation, "enforce process isolation between graphics and compute . enforce_isolation = on"); MODULE_PARM_DESC(enforce_isolation, "enforce process isolation between graphics and compute . enforce_isolation = on");
/**
* DOC: modeset (int)
* Override nomodeset (1 = override, -1 = auto). The default is -1 (auto).
*/
MODULE_PARM_DESC(modeset, "Override nomodeset (1 = enable, -1 = auto)");
module_param_named(modeset, amdgpu_modeset, int, 0444);
/** /**
* DOC: seamless (int) * DOC: seamless (int)
* Seamless boot will keep the image on the screen during the boot process. * Seamless boot will keep the image on the screen during the boot process.
...@@ -1052,6 +1062,11 @@ module_param_named(seamless, amdgpu_seamless, int, 0444); ...@@ -1052,6 +1062,11 @@ module_param_named(seamless, amdgpu_seamless, int, 0444);
* limits the VRAM size reported to ROCm applications to the visible * limits the VRAM size reported to ROCm applications to the visible
* size, usually 256MB. * size, usually 256MB.
* - 0x4: Disable GPU soft recovery, always do a full reset * - 0x4: Disable GPU soft recovery, always do a full reset
* - 0x8: Use VRAM for firmware loading
* - 0x10: Enable ACA based RAS logging
* - 0x20: Enable experimental resets
* - 0x40: Disable ring resets
* - 0x80: Use VRAM for SMU pool
*/ */
MODULE_PARM_DESC(debug_mask, "debug options for amdgpu, disabled by default"); MODULE_PARM_DESC(debug_mask, "debug options for amdgpu, disabled by default");
module_param_named_unsafe(debug_mask, amdgpu_debug_mask, uint, 0444); module_param_named_unsafe(debug_mask, amdgpu_debug_mask, uint, 0444);
...@@ -2229,6 +2244,10 @@ static void amdgpu_init_debug_options(struct amdgpu_device *adev) ...@@ -2229,6 +2244,10 @@ static void amdgpu_init_debug_options(struct amdgpu_device *adev)
pr_info("debug: ring reset disabled\n"); pr_info("debug: ring reset disabled\n");
adev->debug_disable_gpu_ring_reset = true; adev->debug_disable_gpu_ring_reset = true;
} }
if (amdgpu_debug_mask & AMDGPU_DEBUG_SMU_POOL) {
pr_info("debug: use vram for smu pool\n");
adev->pm.smu_debug_mask |= SMU_DEBUG_POOL_USE_VRAM;
}
} }
static unsigned long amdgpu_fix_asic_type(struct pci_dev *pdev, unsigned long flags) static unsigned long amdgpu_fix_asic_type(struct pci_dev *pdev, unsigned long flags)
...@@ -2256,6 +2275,12 @@ static int amdgpu_pci_probe(struct pci_dev *pdev, ...@@ -2256,6 +2275,12 @@ static int amdgpu_pci_probe(struct pci_dev *pdev,
int ret, retry = 0, i; int ret, retry = 0, i;
bool supports_atomic = false; bool supports_atomic = false;
if ((pdev->class >> 8) == PCI_CLASS_DISPLAY_VGA ||
(pdev->class >> 8) == PCI_CLASS_DISPLAY_OTHER) {
if (drm_firmware_drivers_only() && amdgpu_modeset == -1)
return -EINVAL;
}
/* skip devices which are owned by radeon */ /* skip devices which are owned by radeon */
for (i = 0; i < ARRAY_SIZE(amdgpu_unsupported_pciidlist); i++) { for (i = 0; i < ARRAY_SIZE(amdgpu_unsupported_pciidlist); i++) {
if (amdgpu_unsupported_pciidlist[i] == pdev->device) if (amdgpu_unsupported_pciidlist[i] == pdev->device)
...@@ -2989,9 +3014,6 @@ static int __init amdgpu_init(void) ...@@ -2989,9 +3014,6 @@ static int __init amdgpu_init(void)
{ {
int r; int r;
if (drm_firmware_drivers_only())
return -EINVAL;
r = amdgpu_sync_init(); r = amdgpu_sync_init();
if (r) if (r)
goto error_sync; goto error_sync;
......
...@@ -1665,15 +1665,8 @@ static ssize_t amdgpu_gfx_set_enforce_isolation(struct device *dev, ...@@ -1665,15 +1665,8 @@ static ssize_t amdgpu_gfx_set_enforce_isolation(struct device *dev,
} }
mutex_lock(&adev->enforce_isolation_mutex); mutex_lock(&adev->enforce_isolation_mutex);
for (i = 0; i < num_partitions; i++) { for (i = 0; i < num_partitions; i++)
if (adev->enforce_isolation[i] && !partition_values[i])
/* Going from enabled to disabled */
amdgpu_vmid_free_reserved(adev, AMDGPU_GFXHUB(i));
else if (!adev->enforce_isolation[i] && partition_values[i])
/* Going from disabled to enabled */
amdgpu_vmid_alloc_reserved(adev, AMDGPU_GFXHUB(i));
adev->enforce_isolation[i] = partition_values[i]; adev->enforce_isolation[i] = partition_values[i];
}
mutex_unlock(&adev->enforce_isolation_mutex); mutex_unlock(&adev->enforce_isolation_mutex);
amdgpu_mes_update_enforce_isolation(adev); amdgpu_mes_update_enforce_isolation(adev);
...@@ -2002,8 +1995,8 @@ void amdgpu_gfx_enforce_isolation_handler(struct work_struct *work) ...@@ -2002,8 +1995,8 @@ void amdgpu_gfx_enforce_isolation_handler(struct work_struct *work)
if (adev->kfd.init_complete) { if (adev->kfd.init_complete) {
WARN_ON_ONCE(!adev->gfx.kfd_sch_inactive[idx]); WARN_ON_ONCE(!adev->gfx.kfd_sch_inactive[idx]);
WARN_ON_ONCE(adev->gfx.kfd_sch_req_count[idx]); WARN_ON_ONCE(adev->gfx.kfd_sch_req_count[idx]);
amdgpu_amdkfd_start_sched(adev, idx); amdgpu_amdkfd_start_sched(adev, idx);
adev->gfx.kfd_sch_inactive[idx] = false; adev->gfx.kfd_sch_inactive[idx] = false;
} }
} }
mutex_unlock(&adev->enforce_isolation_mutex); mutex_unlock(&adev->enforce_isolation_mutex);
...@@ -2160,11 +2153,16 @@ void amdgpu_gfx_profile_idle_work_handler(struct work_struct *work) ...@@ -2160,11 +2153,16 @@ void amdgpu_gfx_profile_idle_work_handler(struct work_struct *work)
for (i = 0; i < (AMDGPU_MAX_COMPUTE_RINGS * AMDGPU_MAX_GC_INSTANCES); ++i) for (i = 0; i < (AMDGPU_MAX_COMPUTE_RINGS * AMDGPU_MAX_GC_INSTANCES); ++i)
fences += amdgpu_fence_count_emitted(&adev->gfx.compute_ring[i]); fences += amdgpu_fence_count_emitted(&adev->gfx.compute_ring[i]);
if (!fences && !atomic_read(&adev->gfx.total_submission_cnt)) { if (!fences && !atomic_read(&adev->gfx.total_submission_cnt)) {
r = amdgpu_dpm_switch_power_profile(adev, profile, false); mutex_lock(&adev->gfx.workload_profile_mutex);
if (r) if (adev->gfx.workload_profile_active) {
dev_warn(adev->dev, "(%d) failed to disable %s power profile mode\n", r, r = amdgpu_dpm_switch_power_profile(adev, profile, false);
profile == PP_SMC_POWER_PROFILE_FULLSCREEN3D ? if (r)
"fullscreen 3D" : "compute"); dev_warn(adev->dev, "(%d) failed to disable %s power profile mode\n", r,
profile == PP_SMC_POWER_PROFILE_FULLSCREEN3D ?
"fullscreen 3D" : "compute");
adev->gfx.workload_profile_active = false;
}
mutex_unlock(&adev->gfx.workload_profile_mutex);
} else { } else {
schedule_delayed_work(&adev->gfx.idle_work, GFX_PROFILE_IDLE_TIMEOUT); schedule_delayed_work(&adev->gfx.idle_work, GFX_PROFILE_IDLE_TIMEOUT);
} }
...@@ -2183,13 +2181,25 @@ void amdgpu_gfx_profile_ring_begin_use(struct amdgpu_ring *ring) ...@@ -2183,13 +2181,25 @@ void amdgpu_gfx_profile_ring_begin_use(struct amdgpu_ring *ring)
atomic_inc(&adev->gfx.total_submission_cnt); atomic_inc(&adev->gfx.total_submission_cnt);
if (!cancel_delayed_work_sync(&adev->gfx.idle_work)) { cancel_delayed_work_sync(&adev->gfx.idle_work);
/* We can safely return early here because we've cancelled the
* the delayed work so there is no one else to set it to false
* and we don't care if someone else sets it to true.
*/
if (adev->gfx.workload_profile_active)
return;
mutex_lock(&adev->gfx.workload_profile_mutex);
if (!adev->gfx.workload_profile_active) {
r = amdgpu_dpm_switch_power_profile(adev, profile, true); r = amdgpu_dpm_switch_power_profile(adev, profile, true);
if (r) if (r)
dev_warn(adev->dev, "(%d) failed to disable %s power profile mode\n", r, dev_warn(adev->dev, "(%d) failed to disable %s power profile mode\n", r,
profile == PP_SMC_POWER_PROFILE_FULLSCREEN3D ? profile == PP_SMC_POWER_PROFILE_FULLSCREEN3D ?
"fullscreen 3D" : "compute"); "fullscreen 3D" : "compute");
adev->gfx.workload_profile_active = true;
} }
mutex_unlock(&adev->gfx.workload_profile_mutex);
} }
void amdgpu_gfx_profile_ring_end_use(struct amdgpu_ring *ring) void amdgpu_gfx_profile_ring_end_use(struct amdgpu_ring *ring)
......
...@@ -482,6 +482,8 @@ struct amdgpu_gfx { ...@@ -482,6 +482,8 @@ struct amdgpu_gfx {
atomic_t total_submission_cnt; atomic_t total_submission_cnt;
struct delayed_work idle_work; struct delayed_work idle_work;
bool workload_profile_active;
struct mutex workload_profile_mutex;
}; };
struct amdgpu_gfx_ras_reg_entry { struct amdgpu_gfx_ras_reg_entry {
......