Commit ca1130de authored by Dave Airlie's avatar Dave Airlie

Merge tag 'drm-amdkfd-next-2015-01-21' of git://people.freedesktop.org/~gabbayo/linux into drm-next

- Infrastructure work in amdkfd to prepare for VI support. This work mainly
  includes separating modules into ASIC-specific functionality, adding
  new properties that are relevant for VI, making sure that shared code is
  reused, etc.

- Improve mechanism of submitting packets to HIQ (the kernel queue that amdkfd
  uses to issue commands to the GPU). The driver used to verify that each CS
  was read by the GPU. However, this proved to be both unnecessary and erroneous.
  Therefore, we cancelled this verification.

- Moved initialization of compute VMIDs into radeon driver

- Various minor fixes

* tag 'drm-amdkfd-next-2015-01-21' of git://people.freedesktop.org/~gabbayo/linux: (22 commits)
  drm/amdkfd: Fix description of sched_policy module parameter
  drm/amdkfd: Remove sync_with_hw() from amdkfd
  drm/amdkfd: Remove unused function busy_wait()
  drm/amdkfd: Replace cpu_relax() with schedule() in DQM
  drm/amdkfd: Fix for-loop when allocating HQD (non-HWS)
  drm/amdkfd: Add initial VI support for KQ
  drm/amdkfd: Encapsulate KQ functions in ops structure
  drm/amdkfd: Add initial VI support for DQM
  drm/amdkfd: Encapsulate DQM functions in ops structure
  drm/amdkfd: Don't BUG on freeing GART sub-allocation
  drm/amdkfd: Fix logic of destroy_queue_nocpsch()
  MAINTAINERS: Update amdkfd files
  drm/amdkfd: Change MQD manager to be H/W specific
  drm/amdkfd: Add asic property to kfd_device_info
  drm/amdkfd: Make KFD_MQD_TYPE enum types H/W agnostic
  drm/amdkfd: Add new VI-specific queue properties
  drm/radeon: Use new cik_structs.h file
  drm/amdkfd: Don't include header files from radeon
  drm/amd: Put cik structures in a common place
  drm/radeon: Don't use relative paths in #include
  ...
parents fc839753 cb2ac441
......@@ -624,6 +624,8 @@ L: dri-devel@lists.freedesktop.org
T: git git://people.freedesktop.org/~gabbayo/linux.git
S: Supported
F: drivers/gpu/drm/amd/amdkfd/
F: drivers/gpu/drm/amd/include/cik_structs.h
F: drivers/gpu/drm/amd/include/kgd_kfd_interface.h
F: drivers/gpu/drm/radeon/radeon_kfd.c
F: drivers/gpu/drm/radeon/radeon_kfd.h
F: include/uapi/linux/kfd_ioctl.h
......
......@@ -7,8 +7,11 @@ ccflags-y := -Iinclude/drm -Idrivers/gpu/drm/amd/include/
amdkfd-y := kfd_module.o kfd_device.o kfd_chardev.o kfd_topology.o \
kfd_pasid.o kfd_doorbell.o kfd_flat_memory.o \
kfd_process.o kfd_queue.o kfd_mqd_manager.o \
kfd_kernel_queue.o kfd_packet_manager.o \
kfd_mqd_manager_cik.o kfd_mqd_manager_vi.o \
kfd_kernel_queue.o kfd_kernel_queue_cik.o \
kfd_kernel_queue_vi.o kfd_packet_manager.o \
kfd_process_queue_manager.o kfd_device_queue_manager.o \
kfd_device_queue_manager_cik.o kfd_device_queue_manager_vi.o \
kfd_interrupt.o
obj-$(CONFIG_HSA_AMD) += amdkfd.o
......@@ -168,6 +168,8 @@
#define IB_ATC_EN (1U << 23)
#define DEFAULT_MIN_IB_AVAIL_SIZE (3U << 20)
#define AQL_ENABLE 1
#define CP_HQD_DEQUEUE_REQUEST 0xC974
#define DEQUEUE_REQUEST_DRAIN 1
#define DEQUEUE_REQUEST_RESET 2
......@@ -188,6 +190,17 @@
#define MQD_VMID_MASK (0xf << 0)
#define MQD_CONTROL_PRIV_STATE_EN (1U << 8)
#define SDMA_RB_VMID(x) (x << 24)
#define SDMA_RB_ENABLE (1 << 0)
#define SDMA_RB_SIZE(x) ((x) << 1) /* log2 */
#define SDMA_RPTR_WRITEBACK_ENABLE (1 << 12)
#define SDMA_RPTR_WRITEBACK_TIMER(x) ((x) << 16) /* log2 */
#define SDMA_OFFSET(x) (x << 0)
#define SDMA_DB_ENABLE (1 << 28)
#define SDMA_ATC (1 << 0)
#define SDMA_VA_PTR32 (1 << 4)
#define SDMA_VA_SHARED_BASE(x) (x << 8)
#define GRBM_GFX_INDEX 0x30800
#define INSTANCE_INDEX(x) ((x) << 0)
#define SH_INDEX(x) ((x) << 8)
......
......@@ -145,6 +145,8 @@ static long kfd_ioctl_get_version(struct file *filep, struct kfd_process *p,
static int set_queue_properties_from_user(struct queue_properties *q_properties,
struct kfd_ioctl_create_queue_args *args)
{
void *tmp;
if (args->queue_percentage > KFD_MAX_QUEUE_PERCENTAGE) {
pr_err("kfd: queue percentage must be between 0 to KFD_MAX_QUEUE_PERCENTAGE\n");
return -EINVAL;
......@@ -182,6 +184,20 @@ static int set_queue_properties_from_user(struct queue_properties *q_properties,
return -EFAULT;
}
tmp = (void *)(uintptr_t)args->eop_buffer_address;
if (tmp != NULL &&
!access_ok(VERIFY_WRITE, tmp, sizeof(uint32_t))) {
pr_debug("kfd: can't access eop buffer");
return -EFAULT;
}
tmp = (void *)(uintptr_t)args->ctx_save_restore_address;
if (tmp != NULL &&
!access_ok(VERIFY_WRITE, tmp, sizeof(uint32_t))) {
pr_debug("kfd: can't access ctx save restore buffer");
return -EFAULT;
}
q_properties->is_interop = false;
q_properties->queue_percent = args->queue_percentage;
q_properties->priority = args->queue_priority;
......@@ -189,6 +205,11 @@ static int set_queue_properties_from_user(struct queue_properties *q_properties,
q_properties->queue_size = args->ring_size;
q_properties->read_ptr = (uint32_t *) args->read_pointer_address;
q_properties->write_ptr = (uint32_t *) args->write_pointer_address;
q_properties->eop_ring_buffer_address = args->eop_buffer_address;
q_properties->eop_ring_buffer_size = args->eop_buffer_size;
q_properties->ctx_save_restore_area_address =
args->ctx_save_restore_address;
q_properties->ctx_save_restore_area_size = args->ctx_save_restore_size;
if (args->queue_type == KFD_IOC_QUEUE_TYPE_COMPUTE ||
args->queue_type == KFD_IOC_QUEUE_TYPE_COMPUTE_AQL)
q_properties->type = KFD_QUEUE_TYPE_COMPUTE;
......@@ -220,6 +241,11 @@ static int set_queue_properties_from_user(struct queue_properties *q_properties,
pr_debug("Queue Format (%d)\n", q_properties->format);
pr_debug("Queue EOP (0x%llX)\n", q_properties->eop_ring_buffer_address);
pr_debug("Queue CTX save arex (0x%llX)\n",
q_properties->ctx_save_restore_area_address);
return 0;
}
......@@ -244,9 +270,12 @@ static long kfd_ioctl_create_queue(struct file *filep, struct kfd_process *p,
if (err)
return err;
pr_debug("kfd: looking for gpu id 0x%x\n", args.gpu_id);
dev = kfd_device_by_id(args.gpu_id);
if (dev == NULL)
if (dev == NULL) {
pr_debug("kfd: gpu id 0x%x was not found\n", args.gpu_id);
return -EINVAL;
}
mutex_lock(&p->mutex);
......@@ -410,7 +439,7 @@ static long kfd_ioctl_set_memory_policy(struct file *filep,
(args.alternate_policy == KFD_IOC_CACHE_POLICY_COHERENT)
? cache_policy_coherent : cache_policy_noncoherent;
if (!dev->dqm->set_cache_memory_policy(dev->dqm,
if (!dev->dqm->ops.set_cache_memory_policy(dev->dqm,
&pdd->qpd,
default_policy,
alternate_policy,
......
......@@ -31,6 +31,14 @@
#define MQD_SIZE_ALIGNED 768
static const struct kfd_device_info kaveri_device_info = {
.asic_family = CHIP_KAVERI,
.max_pasid_bits = 16,
.ih_ring_entry_size = 4 * sizeof(uint32_t),
.mqd_size_aligned = MQD_SIZE_ALIGNED
};
static const struct kfd_device_info carrizo_device_info = {
.asic_family = CHIP_CARRIZO,
.max_pasid_bits = 16,
.ih_ring_entry_size = 4 * sizeof(uint32_t),
.num_of_watch_points = 4,
......@@ -65,7 +73,7 @@ static const struct kfd_deviceid supported_devices[] = {
{ 0x1318, &kaveri_device_info }, /* Kaveri */
{ 0x131B, &kaveri_device_info }, /* Kaveri */
{ 0x131C, &kaveri_device_info }, /* Kaveri */
{ 0x131D, &kaveri_device_info }, /* Kaveri */
{ 0x131D, &kaveri_device_info } /* Kaveri */
};
static int kfd_gtt_sa_init(struct kfd_dev *kfd, unsigned int buf_size,
......@@ -245,7 +253,7 @@ bool kgd2kfd_device_init(struct kfd_dev *kfd,
goto device_queue_manager_error;
}
if (kfd->dqm->start(kfd->dqm) != 0) {
if (kfd->dqm->ops.start(kfd->dqm) != 0) {
dev_err(kfd_device,
"Error starting queuen manager for device (%x:%x)\n",
kfd->pdev->vendor, kfd->pdev->device);
......@@ -299,7 +307,7 @@ void kgd2kfd_suspend(struct kfd_dev *kfd)
BUG_ON(kfd == NULL);
if (kfd->init_complete) {
kfd->dqm->stop(kfd->dqm);
kfd->dqm->ops.stop(kfd->dqm);
amd_iommu_set_invalidate_ctx_cb(kfd->pdev, NULL);
amd_iommu_free_device(kfd->pdev);
}
......@@ -320,7 +328,7 @@ int kgd2kfd_resume(struct kfd_dev *kfd)
return -ENXIO;
amd_iommu_set_invalidate_ctx_cb(kfd->pdev,
iommu_pasid_shutdown_callback);
kfd->dqm->start(kfd->dqm);
kfd->dqm->ops.start(kfd->dqm);
}
return 0;
......@@ -503,7 +511,10 @@ int kfd_gtt_sa_free(struct kfd_dev *kfd, struct kfd_mem_obj *mem_obj)
unsigned int bit;
BUG_ON(!kfd);
BUG_ON(!mem_obj);
/* Act like kfree when trying to free a NULL object */
if (!mem_obj)
return 0;
pr_debug("kfd: free mem_obj = %p, range_start = %d, range_end = %d\n",
mem_obj, mem_obj->range_start, mem_obj->range_end);
......
......@@ -46,7 +46,7 @@ struct device_process_node {
};
/**
* struct device_queue_manager
* struct device_queue_manager_ops
*
* @create_queue: Queue creation routine.
*
......@@ -81,15 +81,9 @@ struct device_process_node {
* @set_cache_memory_policy: Sets memory policy (cached/ non cached) for the
* memory apertures.
*
* This struct is a base class for the kfd queues scheduler in the
* device level. The device base class should expose the basic operations
* for queue creation and queue destruction. This base class hides the
* scheduling mode of the driver and the specific implementation of the
* concrete device. This class is the only class in the queues scheduler
* that configures the H/W.
*/
struct device_queue_manager {
struct device_queue_manager_ops {
int (*create_queue)(struct device_queue_manager *dqm,
struct queue *q,
struct qcm_process_device *qpd,
......@@ -124,7 +118,23 @@ struct device_queue_manager {
enum cache_policy alternate_policy,
void __user *alternate_aperture_base,
uint64_t alternate_aperture_size);
};
/**
* struct device_queue_manager
*
* This struct is a base class for the kfd queues scheduler in the
* device level. The device base class should expose the basic operations
* for queue creation and queue destruction. This base class hides the
* scheduling mode of the driver and the specific implementation of the
* concrete device. This class is the only class in the queues scheduler
* that configures the H/W.
*
*/
struct device_queue_manager {
struct device_queue_manager_ops ops;
struct device_queue_manager_ops ops_asic_specific;
struct mqd_manager *mqds[KFD_MQD_TYPE_MAX];
struct packet_manager packets;
......@@ -146,6 +156,14 @@ struct device_queue_manager {
bool active_runlist;
};
void device_queue_manager_init_cik(struct device_queue_manager_ops *ops);
void device_queue_manager_init_vi(struct device_queue_manager_ops *ops);
void program_sh_mem_settings(struct device_queue_manager *dqm,
struct qcm_process_device *qpd);
inline unsigned int get_sh_mem_bases_32(struct kfd_process_device *qpd);
inline unsigned int get_sh_mem_bases_nybble_64(struct kfd_process_device *pdd);
int init_pipelines(struct device_queue_manager *dqm,
unsigned int pipes_num, unsigned int first_pipe);
inline unsigned int get_pipes_num(struct device_queue_manager *dqm);
#endif /* KFD_DEVICE_QUEUE_MANAGER_H_ */
/*
* Copyright 2014 Advanced Micro Devices, Inc.
*
* Permission is hereby granted, free of charge, to any person obtaining a
* copy of this software and associated documentation files (the "Software"),
* to deal in the Software without restriction, including without limitation
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
* and/or sell copies of the Software, and to permit persons to whom the
* Software is furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice shall be included in
* all copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
* THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
* OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
* ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
* OTHER DEALINGS IN THE SOFTWARE.
*
*/
#include "kfd_device_queue_manager.h"
#include "cik_regs.h"
static bool set_cache_memory_policy_cik(struct device_queue_manager *dqm,
struct qcm_process_device *qpd,
enum cache_policy default_policy,
enum cache_policy alternate_policy,
void __user *alternate_aperture_base,
uint64_t alternate_aperture_size);
static int register_process_cik(struct device_queue_manager *dqm,
struct qcm_process_device *qpd);
static int initialize_cpsch_cik(struct device_queue_manager *dqm);
void device_queue_manager_init_cik(struct device_queue_manager_ops *ops)
{
ops->set_cache_memory_policy = set_cache_memory_policy_cik;
ops->register_process = register_process_cik;
ops->initialize = initialize_cpsch_cik;
}
static uint32_t compute_sh_mem_bases_64bit(unsigned int top_address_nybble)
{
/* In 64-bit mode, we can only control the top 3 bits of the LDS,
* scratch and GPUVM apertures.
* The hardware fills in the remaining 59 bits according to the
* following pattern:
* LDS: X0000000'00000000 - X0000001'00000000 (4GB)
* Scratch: X0000001'00000000 - X0000002'00000000 (4GB)
* GPUVM: Y0010000'00000000 - Y0020000'00000000 (1TB)
*
* (where X/Y is the configurable nybble with the low-bit 0)
*
* LDS and scratch will have the same top nybble programmed in the
* top 3 bits of SH_MEM_BASES.PRIVATE_BASE.
* GPUVM can have a different top nybble programmed in the
* top 3 bits of SH_MEM_BASES.SHARED_BASE.
* We don't bother to support different top nybbles
* for LDS/Scratch and GPUVM.
*/
BUG_ON((top_address_nybble & 1) || top_address_nybble > 0xE ||
top_address_nybble == 0);
return PRIVATE_BASE(top_address_nybble << 12) |
SHARED_BASE(top_address_nybble << 12);
}
static bool set_cache_memory_policy_cik(struct device_queue_manager *dqm,
struct qcm_process_device *qpd,
enum cache_policy default_policy,
enum cache_policy alternate_policy,
void __user *alternate_aperture_base,
uint64_t alternate_aperture_size)
{
uint32_t default_mtype;
uint32_t ape1_mtype;
default_mtype = (default_policy == cache_policy_coherent) ?
MTYPE_NONCACHED :
MTYPE_CACHED;
ape1_mtype = (alternate_policy == cache_policy_coherent) ?
MTYPE_NONCACHED :
MTYPE_CACHED;
qpd->sh_mem_config = (qpd->sh_mem_config & PTR32)
| ALIGNMENT_MODE(SH_MEM_ALIGNMENT_MODE_UNALIGNED)
| DEFAULT_MTYPE(default_mtype)
| APE1_MTYPE(ape1_mtype);
return true;
}
static int register_process_cik(struct device_queue_manager *dqm,
struct qcm_process_device *qpd)
{
struct kfd_process_device *pdd;
unsigned int temp;
BUG_ON(!dqm || !qpd);
pdd = qpd_to_pdd(qpd);
/* check if sh_mem_config register already configured */
if (qpd->sh_mem_config == 0) {
qpd->sh_mem_config =
ALIGNMENT_MODE(SH_MEM_ALIGNMENT_MODE_UNALIGNED) |
DEFAULT_MTYPE(MTYPE_NONCACHED) |
APE1_MTYPE(MTYPE_NONCACHED);
qpd->sh_mem_ape1_limit = 0;
qpd->sh_mem_ape1_base = 0;
}
if (qpd->pqm->process->is_32bit_user_mode) {
temp = get_sh_mem_bases_32(pdd);
qpd->sh_mem_bases = SHARED_BASE(temp);
qpd->sh_mem_config |= PTR32;
} else {
temp = get_sh_mem_bases_nybble_64(pdd);
qpd->sh_mem_bases = compute_sh_mem_bases_64bit(temp);
}
pr_debug("kfd: is32bit process: %d sh_mem_bases nybble: 0x%X and register 0x%X\n",
qpd->pqm->process->is_32bit_user_mode, temp, qpd->sh_mem_bases);
return 0;
}
static int initialize_cpsch_cik(struct device_queue_manager *dqm)
{
return init_pipelines(dqm, get_pipes_num(dqm), 0);
}
/*
* Copyright 2014 Advanced Micro Devices, Inc.
*
* Permission is hereby granted, free of charge, to any person obtaining a
* copy of this software and associated documentation files (the "Software"),
* to deal in the Software without restriction, including without limitation
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
* and/or sell copies of the Software, and to permit persons to whom the
* Software is furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice shall be included in
* all copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
* THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
* OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
* ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
* OTHER DEALINGS IN THE SOFTWARE.
*
*/
#include "kfd_device_queue_manager.h"
static bool set_cache_memory_policy_vi(struct device_queue_manager *dqm,
struct qcm_process_device *qpd,
enum cache_policy default_policy,
enum cache_policy alternate_policy,
void __user *alternate_aperture_base,
uint64_t alternate_aperture_size);
static int register_process_vi(struct device_queue_manager *dqm,
struct qcm_process_device *qpd);
static int initialize_cpsch_vi(struct device_queue_manager *dqm);
void device_queue_manager_init_vi(struct device_queue_manager_ops *ops)
{
pr_warn("amdkfd: VI DQM is not currently supported\n");
ops->set_cache_memory_policy = set_cache_memory_policy_vi;
ops->register_process = register_process_vi;
ops->initialize = initialize_cpsch_vi;
}
static bool set_cache_memory_policy_vi(struct device_queue_manager *dqm,
struct qcm_process_device *qpd,
enum cache_policy default_policy,
enum cache_policy alternate_policy,
void __user *alternate_aperture_base,
uint64_t alternate_aperture_size)
{
return false;
}
static int register_process_vi(struct device_queue_manager *dqm,
struct qcm_process_device *qpd)
{
return -1;
}
static int initialize_cpsch_vi(struct device_queue_manager *dqm)
{
return 0;
}
......@@ -56,8 +56,8 @@ static bool initialize(struct kernel_queue *kq, struct kfd_dev *dev,
switch (type) {
case KFD_QUEUE_TYPE_DIQ:
case KFD_QUEUE_TYPE_HIQ:
kq->mqd = dev->dqm->get_mqd_manager(dev->dqm,
KFD_MQD_TYPE_CIK_HIQ);
kq->mqd = dev->dqm->ops.get_mqd_manager(dev->dqm,
KFD_MQD_TYPE_HIQ);
break;
default:
BUG();
......@@ -73,13 +73,16 @@ static bool initialize(struct kernel_queue *kq, struct kfd_dev *dev,
goto err_get_kernel_doorbell;
retval = kfd_gtt_sa_allocate(dev, queue_size, &kq->pq);
if (retval != 0)
goto err_pq_allocate_vidmem;
kq->pq_kernel_addr = kq->pq->cpu_ptr;
kq->pq_gpu_addr = kq->pq->gpu_addr;
retval = kq->ops_asic_specific.initialize(kq, dev, type, queue_size);
if (retval == false)
goto err_eop_allocate_vidmem;
retval = kfd_gtt_sa_allocate(dev, sizeof(*kq->rptr_kernel),
&kq->rptr_mem);
......@@ -111,6 +114,8 @@ static bool initialize(struct kernel_queue *kq, struct kfd_dev *dev,
prop.queue_address = kq->pq_gpu_addr;
prop.read_ptr = (uint32_t *) kq->rptr_gpu_addr;
prop.write_ptr = (uint32_t *) kq->wptr_gpu_addr;
prop.eop_ring_buffer_address = kq->eop_gpu_addr;
prop.eop_ring_buffer_size = PAGE_SIZE;
if (init_queue(&kq->queue, prop) != 0)
goto err_init_queue;
......@@ -156,6 +161,8 @@ static bool initialize(struct kernel_queue *kq, struct kfd_dev *dev,
err_wptr_allocate_vidmem:
kfd_gtt_sa_free(dev, kq->rptr_mem);
err_rptr_allocate_vidmem:
kfd_gtt_sa_free(dev, kq->eop_mem);
err_eop_allocate_vidmem:
kfd_gtt_sa_free(dev, kq->pq);
err_pq_allocate_vidmem:
pr_err("kfd: error init pq\n");
......@@ -182,6 +189,7 @@ static void uninitialize(struct kernel_queue *kq)
kfd_gtt_sa_free(kq->dev, kq->rptr_mem);
kfd_gtt_sa_free(kq->dev, kq->wptr_mem);
kq->ops_asic_specific.uninitialize(kq);
kfd_gtt_sa_free(kq->dev, kq->pq);
kfd_release_kernel_doorbell(kq->dev,
kq->queue->properties.doorbell_ptr);
......@@ -254,28 +262,6 @@ static void submit_packet(struct kernel_queue *kq)
kq->pending_wptr);
}
static int sync_with_hw(struct kernel_queue *kq, unsigned long timeout_ms)
{
unsigned long org_timeout_ms;
BUG_ON(!kq);
org_timeout_ms = timeout_ms;
timeout_ms += jiffies * 1000 / HZ;
while (*kq->wptr_kernel != *kq->rptr_kernel) {
if (time_after(jiffies * 1000 / HZ, timeout_ms)) {
pr_err("kfd: kernel_queue %s timeout expired %lu\n",
__func__, org_timeout_ms);
pr_err("kfd: wptr: %d rptr: %d\n",
*kq->wptr_kernel, *kq->rptr_kernel);
return -ETIME;
}
schedule();
}
return 0;
}
static void rollback_packet(struct kernel_queue *kq)
{
BUG_ON(!kq);
......@@ -293,14 +279,20 @@ struct kernel_queue *kernel_queue_init(struct kfd_dev *dev,
if (!kq)
return NULL;
kq->initialize = initialize;
kq->uninitialize = uninitialize;
kq->acquire_packet_buffer = acquire_packet_buffer;
kq->submit_packet = submit_packet;
kq->sync_with_hw = sync_with_hw;
kq->rollback_packet = rollback_packet;
kq->ops.initialize = initialize;
kq->ops.uninitialize = uninitialize;
kq->ops.acquire_packet_buffer = acquire_packet_buffer;
kq->ops.submit_packet = submit_packet;
kq->ops.rollback_packet = rollback_packet;
switch (dev->device_info->asic_family) {
case CHIP_CARRIZO:
kernel_queue_init_vi(&kq->ops_asic_specific);
case CHIP_KAVERI:
kernel_queue_init_cik(&kq->ops_asic_specific);
}
if (kq->initialize(kq, dev, type, KFD_KERNEL_QUEUE_SIZE) == false) {
if (kq->ops.initialize(kq, dev, type, KFD_KERNEL_QUEUE_SIZE) == false) {
pr_err("kfd: failed to init kernel queue\n");
kfree(kq);
return NULL;
......@@ -312,7 +304,7 @@ void kernel_queue_uninit(struct kernel_queue *kq)
{
BUG_ON(!kq);
kq->uninitialize(kq);
kq->ops.uninitialize(kq);
kfree(kq);
}
......@@ -324,19 +316,18 @@ static __attribute__((unused)) void test_kq(struct kfd_dev *dev)
BUG_ON(!dev);
pr_debug("kfd: starting kernel queue test\n");
pr_err("kfd: starting kernel queue test\n");
kq = kernel_queue_init(dev, KFD_QUEUE_TYPE_HIQ);
BUG_ON(!kq);
retval = kq->acquire_packet_buffer(kq, 5, &buffer);
retval = kq->ops.acquire_packet_buffer(kq, 5, &buffer);
BUG_ON(retval != 0);
for (i = 0; i < 5; i++)
buffer[i] = kq->nop_packet;
kq->submit_packet(kq);
kq->sync_with_hw(kq, 1000);
kq->ops.submit_packet(kq);
pr_debug("kfd: ending kernel queue test\n");
pr_err("kfd: ending kernel queue test\n");
}
......@@ -28,8 +28,31 @@
#include <linux/types.h>
#include "kfd_priv.h"
struct kernel_queue {
/* interface */
/**
* struct kernel_queue_ops
*
* @initialize: Initialize a kernel queue, including allocations of GART memory
* needed for the queue.
*
* @uninitialize: Uninitialize a kernel queue and free all its memory usages.
*
* @acquire_packet_buffer: Returns a pointer to the location in the kernel
* queue ring buffer where the calling function can write its packet. It is
* Guaranteed that there is enough space for that packet. It also updates the
* pending write pointer to that location so subsequent calls to
* acquire_packet_buffer will get a correct write pointer
*
* @submit_packet: Update the write pointer and doorbell of a kernel queue.
*
* @sync_with_hw: Wait until the write pointer and the read pointer of a kernel
* queue are equal, which means the CP has read all the submitted packets.
*
* @rollback_packet: This routine is called if we failed to build an acquired
* packet for some reason. It just overwrites the pending wptr with the current
* one
*
*/
struct kernel_queue_ops {
bool (*initialize)(struct kernel_queue *kq, struct kfd_dev *dev,
enum kfd_queue_type type, unsigned int queue_size);
void (*uninitialize)(struct kernel_queue *kq);
......@@ -38,9 +61,12 @@ struct kernel_queue {
unsigned int **buffer_ptr);
void (*submit_packet)(struct kernel_queue *kq);
int (*sync_with_hw)(struct kernel_queue *kq,
unsigned long timeout_ms);
void (*rollback_packet)(struct kernel_queue *kq);
};
struct kernel_queue {
struct kernel_queue_ops ops;
struct kernel_queue_ops ops_asic_specific;
/* data */
struct kfd_dev *dev;
......@@ -58,6 +84,9 @@ struct kernel_queue {
struct kfd_mem_obj *pq;
uint64_t pq_gpu_addr;
uint32_t *pq_kernel_addr;
struct kfd_mem_obj *eop_mem;
uint64_t eop_gpu_addr;
uint32_t *eop_kernel_addr;
struct kfd_mem_obj *fence_mem_obj;
uint64_t fence_gpu_addr;
......@@ -66,4 +95,7 @@ struct kernel_queue {
struct list_head list;
};
void kernel_queue_init_cik(struct kernel_queue_ops *ops);
void kernel_queue_init_vi(struct kernel_queue_ops *ops);
#endif /* KFD_KERNEL_QUEUE_H_ */
/*
* Copyright 2014 Advanced Micro Devices, Inc.
*
* Permission is hereby granted, free of charge, to any person obtaining a
* copy of this software and associated documentation files (the "Software"),