diff --git a/drivers/Kconfig b/drivers/Kconfig index 7bdad836fc6207727300e79c2d6f7db485baf80a..257af611384d849fde7bca73e1ae91dec5072ec8 100644 --- a/drivers/Kconfig +++ b/drivers/Kconfig @@ -245,4 +245,6 @@ source "drivers/cdx/Kconfig" source "drivers/dpll/Kconfig" +source "drivers/rknpu/Kconfig" + endmenu diff --git a/drivers/Makefile b/drivers/Makefile index 3bf5cab4b45191e4ff9bb657a186e958b306e348..29feb5cd62d88fce265e3609a5648577e586e18d 100644 --- a/drivers/Makefile +++ b/drivers/Makefile @@ -193,6 +193,7 @@ obj-$(CONFIG_COUNTER) += counter/ obj-$(CONFIG_MOST) += most/ obj-$(CONFIG_PECI) += peci/ obj-$(CONFIG_HTE) += hte/ +obj-$(CONFIG_ROCKCHIP_RKNPU) += rknpu/ obj-$(CONFIG_DRM_ACCEL) += accel/ obj-$(CONFIG_CDX_BUS) += cdx/ obj-$(CONFIG_DPLL) += dpll/ diff --git a/drivers/rknpu/Kconfig b/drivers/rknpu/Kconfig new file mode 100644 index 0000000000000000000000000000000000000000..c3343eece9c6db2adcaefa1f45ffdb960b530ed7 --- /dev/null +++ b/drivers/rknpu/Kconfig @@ -0,0 +1,60 @@ +# SPDX-License-Identifier: GPL-2.0 +menu "RKNPU" + depends on ARCH_ROCKCHIP + +config ROCKCHIP_RKNPU + tristate "ROCKCHIP_RKNPU" + depends on DRM || DMABUF_HEAPS_ROCKCHIP_CMA_HEAP + help + rknpu module. + +if ROCKCHIP_RKNPU + +config ROCKCHIP_RKNPU_DEBUG_FS + bool "RKNPU debugfs" + depends on DEBUG_FS + default y + help + Enable debugfs to debug RKNPU usage. + +config ROCKCHIP_RKNPU_PROC_FS + bool "RKNPU procfs" + depends on PROC_FS + help + Enable procfs to debug RKNPU usage. + +config ROCKCHIP_RKNPU_FENCE + bool "RKNPU fence" + depends on SYNC_FILE + help + Enable fence support for RKNPU. + +config ROCKCHIP_RKNPU_SRAM + bool "RKNPU SRAM" + depends on NO_GKI + help + Enable RKNPU SRAM support + +choice + prompt "RKNPU memory manager" + default ROCKCHIP_RKNPU_DRM_GEM + help + Select RKNPU memory manager + +config ROCKCHIP_RKNPU_DRM_GEM + bool "RKNPU DRM GEM" + depends on DRM + help + Enable RKNPU memory manager by DRM GEM. + +config ROCKCHIP_RKNPU_DMA_HEAP + bool "RKNPU DMA heap" + depends on DMABUF_HEAPS_ROCKCHIP_CMA_HEAP + help + Enable RKNPU memory manager by DMA Heap. + +endchoice + +endif + +endmenu diff --git a/drivers/rknpu/Makefile b/drivers/rknpu/Makefile new file mode 100644 index 0000000000000000000000000000000000000000..41dacc93157ceec1d16f90b6a7cc971c207f563b --- /dev/null +++ b/drivers/rknpu/Makefile @@ -0,0 +1,15 @@ +# SPDX-License-Identifier: GPL-2.0 +obj-$(CONFIG_ROCKCHIP_RKNPU) += rknpu.o + +ccflags-y += -I$(srctree)/$(src)/include +ccflags-y += -I$(src)/include +ccflags-y += -Werror + +rknpu-y += rknpu_drv.o +rknpu-y += rknpu_reset.o +rknpu-y += rknpu_job.o +rknpu-y += rknpu_debugger.o +rknpu-$(CONFIG_ROCKCHIP_RKNPU_SRAM) += rknpu_mm.o +rknpu-$(CONFIG_ROCKCHIP_RKNPU_FENCE) += rknpu_fence.o +rknpu-$(CONFIG_ROCKCHIP_RKNPU_DRM_GEM) += rknpu_gem.o +rknpu-$(CONFIG_ROCKCHIP_RKNPU_DMA_HEAP) += rknpu_mem.o diff --git a/drivers/rknpu/include/rknpu_debugger.h b/drivers/rknpu/include/rknpu_debugger.h new file mode 100644 index 0000000000000000000000000000000000000000..3f4420d443e1e0c4646b48d6285be3a621fc4e31 --- /dev/null +++ b/drivers/rknpu/include/rknpu_debugger.h @@ -0,0 +1,88 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +/* + * Copyright (C) Rockchip Electronics Co.Ltd + * Author: Felix Zeng <felix.zeng@rock-chips.com> + */ + +#ifndef __LINUX_RKNPU_DEBUGGER_H_ +#define __LINUX_RKNPU_DEBUGGER_H_ + +#include <linux/seq_file.h> + +/* + * struct rknpu_debugger - rknpu debugger information + * + * This structure represents a debugger to be created by the rknpu driver + * or core. + */ +struct rknpu_debugger { +#ifdef CONFIG_ROCKCHIP_RKNPU_DEBUG_FS + /* Directory of debugfs file */ + struct dentry *debugfs_dir; + struct list_head debugfs_entry_list; + struct mutex debugfs_lock; +#endif +#ifdef CONFIG_ROCKCHIP_RKNPU_PROC_FS + /* Directory of procfs file */ + struct proc_dir_entry *procfs_dir; + struct list_head procfs_entry_list; + struct mutex procfs_lock; +#endif +}; + +/* + * struct rknpu_debugger_list - debugfs/procfs info list entry + * + * This structure represents a debugfs/procfs file to be created by the npu + * driver or core. + */ +struct rknpu_debugger_list { + /* File name */ + const char *name; + /* + * Show callback. &seq_file->private will be set to the &struct + * rknpu_debugger_node corresponding to the instance of this info + * on a given &struct rknpu_debugger. + */ + int (*show)(struct seq_file *seq, void *data); + /* + * Write callback. &seq_file->private will be set to the &struct + * rknpu_debugger_node corresponding to the instance of this info + * on a given &struct rknpu_debugger. + */ + ssize_t (*write)(struct file *file, const char __user *ubuf, size_t len, + loff_t *offp); + /* Procfs/Debugfs private data. */ + void *data; +}; + +/* + * struct rknpu_debugger_node - Nodes for debugfs/procfs + * + * This structure represents each instance of procfs/debugfs created from the + * template. + */ +struct rknpu_debugger_node { + struct rknpu_debugger *debugger; + + /* template for this node. */ + const struct rknpu_debugger_list *info_ent; + + /* Each Procfs/Debugfs file. */ +#ifdef CONFIG_ROCKCHIP_RKNPU_DEBUG_FS + struct dentry *dent; +#endif + +#ifdef CONFIG_ROCKCHIP_RKNPU_PROC_FS + struct proc_dir_entry *pent; +#endif + + struct list_head list; +}; + +struct rknpu_device; + +int rknpu_debugger_init(struct rknpu_device *rknpu_dev); +int rknpu_debugger_remove(struct rknpu_device *rknpu_dev); + +#endif /* __LINUX_RKNPU_FENCE_H_ */ diff --git a/drivers/rknpu/include/rknpu_drv.h b/drivers/rknpu/include/rknpu_drv.h new file mode 100644 index 0000000000000000000000000000000000000000..816dd16de219a63c8af65f53b67ab172b2db3e40 --- /dev/null +++ b/drivers/rknpu/include/rknpu_drv.h @@ -0,0 +1,158 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +/* + * Copyright (C) Rockchip Electronics Co.Ltd + * Author: Felix Zeng <felix.zeng@rock-chips.com> + */ + +#ifndef __LINUX_RKNPU_DRV_H_ +#define __LINUX_RKNPU_DRV_H_ + +#include <linux/completion.h> +#include <linux/device.h> +#include <linux/kref.h> +#include <linux/platform_device.h> +#include <linux/spinlock.h> +#include <linux/regulator/consumer.h> +#include <linux/version.h> +#include <linux/hrtimer.h> +#include <linux/miscdevice.h> + +#ifndef FPGA_PLATFORM +#if KERNEL_VERSION(5, 10, 0) <= LINUX_VERSION_CODE +#include <soc/rockchip/rockchip_opp_select.h> +#endif +#endif + +#include "rknpu_job.h" +#include "rknpu_fence.h" +#include "rknpu_debugger.h" +#include "rknpu_mm.h" + +#define DRIVER_NAME "rknpu" +#define DRIVER_DESC "RKNPU driver" +#define DRIVER_DATE "20221110" +#define DRIVER_MAJOR 0 +#define DRIVER_MINOR 8 +#define DRIVER_PATCHLEVEL 3 + +#define LOG_TAG "RKNPU" + +/* sample interval: 1000ms */ +#define RKNPU_LOAD_INTERVAL 1000000000 + +#define LOG_INFO(fmt, args...) pr_info(LOG_TAG ": " fmt, ##args) +#if KERNEL_VERSION(5, 5, 0) <= LINUX_VERSION_CODE +#define LOG_WARN(fmt, args...) pr_warn(LOG_TAG ": " fmt, ##args) +#else +#define LOG_WARN(fmt, args...) pr_warning(LOG_TAG ": " fmt, ##args) +#endif +#define LOG_DEBUG(fmt, args...) pr_devel(LOG_TAG ": " fmt, ##args) +#define LOG_ERROR(fmt, args...) pr_err(LOG_TAG ": " fmt, ##args) + +#define LOG_DEV_INFO(dev, fmt, args...) dev_info(dev, LOG_TAG ": " fmt, ##args) +#define LOG_DEV_WARN(dev, fmt, args...) dev_warn(dev, LOG_TAG ": " fmt, ##args) +#define LOG_DEV_DEBUG(dev, fmt, args...) dev_dbg(dev, LOG_TAG ": " fmt, ##args) +#define LOG_DEV_ERROR(dev, fmt, args...) dev_err(dev, LOG_TAG ": " fmt, ##args) + +struct rknpu_reset_data { + const char *srst_a_name; + const char *srst_h_name; +}; + +struct rknpu_config { + __u32 bw_priority_addr; + __u32 bw_priority_length; + __u64 dma_mask; + __u32 pc_data_amount_scale; + __u32 pc_task_number_bits; + __u32 pc_task_number_mask; + __u32 pc_task_status_offset; + __u32 bw_enable; + const struct rknpu_irqs_data *irqs; + const struct rknpu_reset_data *resets; + int num_irqs; + int num_resets; +}; + +struct rknpu_timer { + __u32 busy_time; + __u32 busy_time_record; +}; + +struct rknpu_subcore_data { + struct list_head todo_list; + wait_queue_head_t job_done_wq; + struct rknpu_job *job; + int64_t task_num; + struct rknpu_timer timer; +}; + +/** + * RKNPU device + * + * @base: IO mapped base address for device + * @dev: Device instance + * @drm_dev: DRM device instance + */ +struct rknpu_device { + void __iomem *base[RKNPU_MAX_CORES]; + struct device *dev; +#ifdef CONFIG_ROCKCHIP_RKNPU_DRM_GEM + struct drm_device *drm_dev; +#endif +#ifdef CONFIG_ROCKCHIP_RKNPU_DMA_HEAP + struct miscdevice miscdev; + struct rk_dma_heap *heap; +#endif + atomic_t sequence; + spinlock_t lock; + spinlock_t irq_lock; + struct mutex power_lock; + struct mutex reset_lock; + struct rknpu_subcore_data subcore_datas[RKNPU_MAX_CORES]; + const struct rknpu_config *config; + void __iomem *bw_priority_base; + struct rknpu_fence_context *fence_ctx; + bool iommu_en; + struct reset_control *srst_a[RKNPU_MAX_CORES]; + struct reset_control *srst_h[RKNPU_MAX_CORES]; + struct clk_bulk_data *clks; + int num_clks; + struct regulator *vdd; + struct regulator *mem; + struct thermal_cooling_device *devfreq_cooling; + struct devfreq *devfreq; + unsigned long ondemand_freq; +#ifndef FPGA_PLATFORM +#if KERNEL_VERSION(5, 10, 0) <= LINUX_VERSION_CODE + struct rockchip_opp_info opp_info; +#endif +#endif + unsigned long current_freq; + unsigned long current_volt; + int bypass_irq_handler; + int bypass_soft_reset; + bool soft_reseting; + struct device *genpd_dev_npu0; + struct device *genpd_dev_npu1; + struct device *genpd_dev_npu2; + bool multiple_domains; + atomic_t power_refcount; + atomic_t cmdline_power_refcount; + struct delayed_work power_off_work; + struct workqueue_struct *power_off_wq; + struct rknpu_debugger debugger; + struct hrtimer timer; + ktime_t kt; + phys_addr_t sram_start; + phys_addr_t sram_end; + uint32_t sram_size; + void __iomem *sram_base_io; + struct rknpu_mm *sram_mm; + unsigned long power_put_delay; +}; + +int rknpu_power_get(struct rknpu_device *rknpu_dev); +int rknpu_power_put(struct rknpu_device *rknpu_dev); + +#endif /* __LINUX_RKNPU_DRV_H_ */ diff --git a/drivers/rknpu/include/rknpu_fence.h b/drivers/rknpu/include/rknpu_fence.h new file mode 100644 index 0000000000000000000000000000000000000000..164f6de4116b33c19ad6a7ab772d39b9c786f981 --- /dev/null +++ b/drivers/rknpu/include/rknpu_fence.h @@ -0,0 +1,24 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +/* + * Copyright (C) Rockchip Electronics Co.Ltd + * Author: Felix Zeng <felix.zeng@rock-chips.com> + */ + +#ifndef __LINUX_RKNPU_FENCE_H_ +#define __LINUX_RKNPU_FENCE_H_ + +#include "rknpu_job.h" + +struct rknpu_fence_context { + unsigned int context; + unsigned int seqno; + spinlock_t spinlock; +}; + +int rknpu_fence_context_alloc(struct rknpu_device *rknpu_dev); + +int rknpu_fence_alloc(struct rknpu_job *job); + +int rknpu_fence_get_fd(struct rknpu_job *job); + +#endif /* __LINUX_RKNPU_FENCE_H_ */ diff --git a/drivers/rknpu/include/rknpu_gem.h b/drivers/rknpu/include/rknpu_gem.h new file mode 100644 index 0000000000000000000000000000000000000000..954586607b16562090e9fe6fb3a0f6d530a247e1 --- /dev/null +++ b/drivers/rknpu/include/rknpu_gem.h @@ -0,0 +1,200 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +/* + * Copyright (C) Rockchip Electronics Co.Ltd + * Author: Felix Zeng <felix.zeng@rock-chips.com> + */ + +#ifndef __LINUX_RKNPU_GEM_H +#define __LINUX_RKNPU_GEM_H + +#include <linux/mm_types.h> +#include <linux/version.h> + +#include <drm/drm_device.h> +#include <drm/drm_vma_manager.h> +#include <drm/drm_gem.h> +#include <drm/drm_mode.h> + +#if KERNEL_VERSION(4, 14, 0) > LINUX_VERSION_CODE +#include <drm/drm_mem_util.h> +#endif + +#include "rknpu_mm.h" + +#define to_rknpu_obj(x) container_of(x, struct rknpu_gem_object, base) + +/* + * rknpu drm buffer structure. + * + * @base: a gem object. + * - a new handle to this gem object would be created + * by drm_gem_handle_create(). + * @flags: indicate memory type to allocated buffer and cache attribute. + * @size: size requested from user, in bytes and this size is aligned + * in page unit. + * @cookie: cookie returned by dma_alloc_attrs + * @kv_addr: kernel virtual address to allocated memory region. + * @dma_addr: bus address(accessed by dma) to allocated memory region. + * - this address could be physical address without IOMMU and + * device address with IOMMU. + * @pages: Array of backing pages. + * @sgt: Imported sg_table. + * + * P.S. this object would be transferred to user as kms_bo.handle so + * user can access the buffer through kms_bo.handle. + */ +struct rknpu_gem_object { + struct drm_gem_object base; + unsigned int flags; + unsigned long size; + unsigned long sram_size; + struct rknpu_mm_obj *sram_obj; + dma_addr_t iova_start; + unsigned long iova_size; + void *cookie; + void __iomem *kv_addr; + dma_addr_t dma_addr; + unsigned long dma_attrs; + unsigned long num_pages; + struct page **pages; + struct sg_table *sgt; + struct drm_mm_node mm_node; +}; + +/* create a new buffer with gem object */ +struct rknpu_gem_object *rknpu_gem_object_create(struct drm_device *dev, + unsigned int flags, + unsigned long size, + unsigned long sram_size); + +/* destroy a buffer with gem object */ +void rknpu_gem_object_destroy(struct rknpu_gem_object *rknpu_obj); + +/* request gem object creation and buffer allocation as the size */ +int rknpu_gem_create_ioctl(struct drm_device *dev, void *data, + struct drm_file *file_priv); + +/* get fake-offset of gem object that can be used with mmap. */ +int rknpu_gem_map_ioctl(struct drm_device *dev, void *data, + struct drm_file *file_priv); + +int rknpu_gem_destroy_ioctl(struct drm_device *dev, void *data, + struct drm_file *file_priv); + +/* + * get rknpu drm object, + * gem object reference count would be increased. + */ +static inline void rknpu_gem_object_get(struct drm_gem_object *obj) +{ +#if KERNEL_VERSION(4, 13, 0) < LINUX_VERSION_CODE + drm_gem_object_get(obj); +#else + drm_gem_object_reference(obj); +#endif +} + +/* + * put rknpu drm object acquired from rknpu_gem_object_find() or rknpu_gem_object_get(), + * gem object reference count would be decreased. + */ +static inline void rknpu_gem_object_put(struct drm_gem_object *obj) +{ +#if KERNEL_VERSION(5, 9, 0) <= LINUX_VERSION_CODE + drm_gem_object_put(obj); +#elif KERNEL_VERSION(4, 13, 0) < LINUX_VERSION_CODE + drm_gem_object_put_unlocked(obj); +#else + drm_gem_object_unreference_unlocked(obj); +#endif +} + +/* + * get rknpu drm object from gem handle, this function could be used for + * other drivers such as 2d/3d acceleration drivers. + * with this function call, gem object reference count would be increased. + */ +static inline struct rknpu_gem_object * +rknpu_gem_object_find(struct drm_file *filp, unsigned int handle) +{ + struct drm_gem_object *obj; + + obj = drm_gem_object_lookup(filp, handle); + if (!obj) { + // DRM_ERROR("failed to lookup gem object.\n"); + return NULL; + } + + rknpu_gem_object_put(obj); + + return to_rknpu_obj(obj); +} + +/* get buffer information to memory region allocated by gem. */ +int rknpu_gem_get_ioctl(struct drm_device *dev, void *data, + struct drm_file *file_priv); + +/* free gem object. */ +void rknpu_gem_free_object(struct drm_gem_object *obj); + +/* create memory region for drm framebuffer. */ +int rknpu_gem_dumb_create(struct drm_file *file_priv, struct drm_device *dev, + struct drm_mode_create_dumb *args); + +#if KERNEL_VERSION(4, 19, 0) > LINUX_VERSION_CODE +/* map memory region for drm framebuffer to user space. */ +int rknpu_gem_dumb_map_offset(struct drm_file *file_priv, + struct drm_device *dev, uint32_t handle, + uint64_t *offset); +#endif + +/* page fault handler and mmap fault address(virtual) to physical memory. */ +#if KERNEL_VERSION(4, 15, 0) <= LINUX_VERSION_CODE +vm_fault_t rknpu_gem_fault(struct vm_fault *vmf); +#elif KERNEL_VERSION(4, 14, 0) <= LINUX_VERSION_CODE +int rknpu_gem_fault(struct vm_fault *vmf); +#else +int rknpu_gem_fault(struct vm_area_struct *vma, struct vm_fault *vmf); +#endif + +/* set vm_flags and we can change the vm attribute to other one at here. */ +int rknpu_gem_mmap(struct file *filp, struct vm_area_struct *vma); + +/* low-level interface prime helpers */ +#if KERNEL_VERSION(4, 13, 0) <= LINUX_VERSION_CODE +struct drm_gem_object *rknpu_gem_prime_import(struct drm_device *dev, + struct dma_buf *dma_buf); +#endif +struct sg_table *rknpu_gem_prime_get_sg_table(struct drm_gem_object *obj); +struct drm_gem_object * +rknpu_gem_prime_import_sg_table(struct drm_device *dev, + struct dma_buf_attachment *attach, + struct sg_table *sgt); +void *rknpu_gem_prime_vmap(struct drm_gem_object *obj); +void rknpu_gem_prime_vunmap(struct drm_gem_object *obj, void *vaddr); +int rknpu_gem_prime_mmap(struct drm_gem_object *obj, + struct vm_area_struct *vma); + +int rknpu_gem_sync_ioctl(struct drm_device *dev, void *data, + struct drm_file *file_priv); + +static inline void *rknpu_gem_alloc_page(size_t nr_pages) +{ +#if KERNEL_VERSION(4, 13, 0) <= LINUX_VERSION_CODE + return kvmalloc_array(nr_pages, sizeof(struct page *), + GFP_KERNEL | __GFP_ZERO); +#else + return drm_calloc_large(nr_pages, sizeof(struct page *)); +#endif +} + +static inline void rknpu_gem_free_page(void *pages) +{ +#if KERNEL_VERSION(4, 13, 0) <= LINUX_VERSION_CODE + kvfree(pages); +#else + drm_free_large(pages); +#endif +} + +#endif diff --git a/drivers/rknpu/include/rknpu_ioctl.h b/drivers/rknpu/include/rknpu_ioctl.h new file mode 100644 index 0000000000000000000000000000000000000000..fc7225fb7b473d0db491dc4517f76bbc68601497 --- /dev/null +++ b/drivers/rknpu/include/rknpu_ioctl.h @@ -0,0 +1,322 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +/* + * Copyright (C) Rockchip Electronics Co.Ltd + * Author: Felix Zeng <felix.zeng@rock-chips.com> + */ + +#ifndef __LINUX_RKNPU_IOCTL_H +#define __LINUX_RKNPU_IOCTL_H + +#include <linux/ioctl.h> +#include <linux/types.h> + +#if !defined(__KERNEL__) +#define __user +#endif + +#ifndef __packed +#define __packed __attribute__((packed)) +#endif + +#define RKNPU_OFFSET_VERSION 0x0 +#define RKNPU_OFFSET_VERSION_NUM 0x4 +#define RKNPU_OFFSET_PC_OP_EN 0x8 +#define RKNPU_OFFSET_PC_DATA_ADDR 0x10 +#define RKNPU_OFFSET_PC_DATA_AMOUNT 0x14 +#define RKNPU_OFFSET_PC_TASK_CONTROL 0x30 +#define RKNPU_OFFSET_PC_DMA_BASE_ADDR 0x34 + +#define RKNPU_OFFSET_INT_MASK 0x20 +#define RKNPU_OFFSET_INT_CLEAR 0x24 +#define RKNPU_OFFSET_INT_STATUS 0x28 +#define RKNPU_OFFSET_INT_RAW_STATUS 0x2c + +#define RKNPU_OFFSET_CLR_ALL_RW_AMOUNT 0x8010 +#define RKNPU_OFFSET_DT_WR_AMOUNT 0x8034 +#define RKNPU_OFFSET_DT_RD_AMOUNT 0x8038 +#define RKNPU_OFFSET_WT_RD_AMOUNT 0x803c + +#define RKNPU_OFFSET_ENABLE_MASK 0xf008 + +#define RKNPU_INT_CLEAR 0x1ffff + +#define RKNPU_PC_DATA_EXTRA_AMOUNT 4 + +#define RKNPU_STR_HELPER(x) #x + +#define RKNPU_GET_DRV_VERSION_STRING(MAJOR, MINOR, PATCHLEVEL) \ + RKNPU_STR_HELPER(MAJOR) \ + "." RKNPU_STR_HELPER(MINOR) "." RKNPU_STR_HELPER(PATCHLEVEL) +#define RKNPU_GET_DRV_VERSION_CODE(MAJOR, MINOR, PATCHLEVEL) \ + (MAJOR * 10000 + MINOR * 100 + PATCHLEVEL) +#define RKNPU_GET_DRV_VERSION_MAJOR(CODE) (CODE / 10000) +#define RKNPU_GET_DRV_VERSION_MINOR(CODE) ((CODE % 10000) / 100) +#define RKNPU_GET_DRV_VERSION_PATCHLEVEL(CODE) (CODE % 100) + +/* memory type definitions. */ +enum e_rknpu_mem_type { + /* physically continuous memory and used as default. */ + RKNPU_MEM_CONTIGUOUS = 0 << 0, + /* physically non-continuous memory. */ + RKNPU_MEM_NON_CONTIGUOUS = 1 << 0, + /* non-cacheable mapping and used as default. */ + RKNPU_MEM_NON_CACHEABLE = 0 << 1, + /* cacheable mapping. */ + RKNPU_MEM_CACHEABLE = 1 << 1, + /* write-combine mapping. */ + RKNPU_MEM_WRITE_COMBINE = 1 << 2, + /* dma attr kernel mapping */ + RKNPU_MEM_KERNEL_MAPPING = 1 << 3, + /* iommu mapping */ + RKNPU_MEM_IOMMU = 1 << 4, + /* zero mapping */ + RKNPU_MEM_ZEROING = 1 << 5, + /* allocate secure buffer */ + RKNPU_MEM_SECURE = 1 << 6, + /* allocate from non-dma32 zone */ + RKNPU_MEM_NON_DMA32 = 1 << 7, + /* request SRAM */ + RKNPU_MEM_TRY_ALLOC_SRAM = 1 << 8, + RKNPU_MEM_MASK = RKNPU_MEM_NON_CONTIGUOUS | RKNPU_MEM_CACHEABLE | + RKNPU_MEM_WRITE_COMBINE | RKNPU_MEM_KERNEL_MAPPING | + RKNPU_MEM_IOMMU | RKNPU_MEM_ZEROING | + RKNPU_MEM_SECURE | RKNPU_MEM_NON_DMA32 | + RKNPU_MEM_TRY_ALLOC_SRAM +}; + +/* sync mode definitions. */ +enum e_rknpu_mem_sync_mode { + RKNPU_MEM_SYNC_TO_DEVICE = 1 << 0, + RKNPU_MEM_SYNC_FROM_DEVICE = 1 << 1, + RKNPU_MEM_SYNC_MASK = + RKNPU_MEM_SYNC_TO_DEVICE | RKNPU_MEM_SYNC_FROM_DEVICE +}; + +/* job mode definitions. */ +enum e_rknpu_job_mode { + RKNPU_JOB_SLAVE = 0 << 0, + RKNPU_JOB_PC = 1 << 0, + RKNPU_JOB_BLOCK = 0 << 1, + RKNPU_JOB_NONBLOCK = 1 << 1, + RKNPU_JOB_PINGPONG = 1 << 2, + RKNPU_JOB_FENCE_IN = 1 << 3, + RKNPU_JOB_FENCE_OUT = 1 << 4, + RKNPU_JOB_MASK = RKNPU_JOB_PC | RKNPU_JOB_NONBLOCK | + RKNPU_JOB_PINGPONG | RKNPU_JOB_FENCE_IN | + RKNPU_JOB_FENCE_OUT +}; + +/* action definitions */ +enum e_rknpu_action { + RKNPU_GET_HW_VERSION = 0, + RKNPU_GET_DRV_VERSION = 1, + RKNPU_GET_FREQ = 2, + RKNPU_SET_FREQ = 3, + RKNPU_GET_VOLT = 4, + RKNPU_SET_VOLT = 5, + RKNPU_ACT_RESET = 6, + RKNPU_GET_BW_PRIORITY = 7, + RKNPU_SET_BW_PRIORITY = 8, + RKNPU_GET_BW_EXPECT = 9, + RKNPU_SET_BW_EXPECT = 10, + RKNPU_GET_BW_TW = 11, + RKNPU_SET_BW_TW = 12, + RKNPU_ACT_CLR_TOTAL_RW_AMOUNT = 13, + RKNPU_GET_DT_WR_AMOUNT = 14, + RKNPU_GET_DT_RD_AMOUNT = 15, + RKNPU_GET_WT_RD_AMOUNT = 16, + RKNPU_GET_TOTAL_RW_AMOUNT = 17, + RKNPU_GET_IOMMU_EN = 18, + RKNPU_SET_PROC_NICE = 19, + RKNPU_POWER_ON = 20, + RKNPU_POWER_OFF = 21, + RKNPU_GET_TOTAL_SRAM_SIZE = 22, + RKNPU_GET_FREE_SRAM_SIZE = 23, +}; + +/** + * User-desired buffer creation information structure. + * + * @handle: The handle of the created GEM object. + * @flags: user request for setting memory type or cache attributes. + * @size: user-desired memory allocation size. + * - this size value would be page-aligned internally. + * @obj_addr: address of RKNPU memory object. + * @dma_addr: dma address that access by rknpu. + * @sram_size: user-desired sram memory allocation size. + * - this size value would be page-aligned internally. + */ +struct rknpu_mem_create { + __u32 handle; + __u32 flags; + __u64 size; + __u64 obj_addr; + __u64 dma_addr; + __u64 sram_size; +}; + +/** + * A structure for getting a fake-offset that can be used with mmap. + * + * @handle: handle of gem object. + * @reserved: just padding to be 64-bit aligned. + * @offset: a fake-offset of gem object. + */ +struct rknpu_mem_map { + __u32 handle; + __u32 reserved; + __u64 offset; +}; + +/** + * For destroying DMA buffer + * + * @handle: handle of the buffer. + * @reserved: reserved for padding. + * @obj_addr: rknpu_mem_object addr. + */ +struct rknpu_mem_destroy { + __u32 handle; + __u32 reserved; + __u64 obj_addr; +}; + +/** + * For synchronizing DMA buffer + * + * @flags: user request for setting memory type or cache attributes. + * @reserved: reserved for padding. + * @obj_addr: address of RKNPU memory object. + * @offset: offset in bytes from start address of buffer. + * @size: size of memory region. + * + */ +struct rknpu_mem_sync { + __u32 flags; + __u32 reserved; + __u64 obj_addr; + __u64 offset; + __u64 size; +}; + +/** + * struct rknpu_task structure for task information + * + * @flags: flags for task + * @op_idx: operator index + * @enable_mask: enable mask + * @int_mask: interrupt mask + * @int_clear: interrupt clear + * @int_status: interrupt status + * @regcfg_amount: register config number + * @regcfg_offset: offset for register config + * @regcmd_addr: address for register command + * + */ +struct rknpu_task { + __u32 flags; + __u32 op_idx; + __u32 enable_mask; + __u32 int_mask; + __u32 int_clear; + __u32 int_status; + __u32 regcfg_amount; + __u32 regcfg_offset; + __u64 regcmd_addr; +} __packed; + +/** + * struct rknpu_subcore_task structure for subcore task index + * + * @task_start: task start index + * @task_number: task number + * + */ +struct rknpu_subcore_task { + __u32 task_start; + __u32 task_number; +}; + +/** + * struct rknpu_submit structure for job submit + * + * @flags: flags for job submit + * @timeout: submit timeout + * @task_start: task start index + * @task_number: task number + * @task_counter: task counter + * @priority: submit priority + * @task_obj_addr: address of task object + * @regcfg_obj_addr: address of register config object + * @task_base_addr: task base address + * @user_data: (optional) user data + * @core_mask: core mask of rknpu + * @fence_fd: dma fence fd + * @subcore_task: subcore task + * + */ +struct rknpu_submit { + __u32 flags; + __u32 timeout; + __u32 task_start; + __u32 task_number; + __u32 task_counter; + __s32 priority; + __u64 task_obj_addr; + __u64 regcfg_obj_addr; + __u64 task_base_addr; + __u64 user_data; + __u32 core_mask; + __s32 fence_fd; + struct rknpu_subcore_task subcore_task[5]; +}; + +/** + * struct rknpu_task structure for action (GET, SET or ACT) + * + * @flags: flags for action + * @value: GET or SET value + * + */ +struct rknpu_action { + __u32 flags; + __u32 value; +}; + +#define RKNPU_ACTION 0x00 +#define RKNPU_SUBMIT 0x01 +#define RKNPU_MEM_CREATE 0x02 +#define RKNPU_MEM_MAP 0x03 +#define RKNPU_MEM_DESTROY 0x04 +#define RKNPU_MEM_SYNC 0x05 + +#define RKNPU_IOC_MAGIC 'r' +#define RKNPU_IOW(nr, type) _IOW(RKNPU_IOC_MAGIC, nr, type) +#define RKNPU_IOR(nr, type) _IOR(RKNPU_IOC_MAGIC, nr, type) +#define RKNPU_IOWR(nr, type) _IOWR(RKNPU_IOC_MAGIC, nr, type) + +#include <drm/drm.h> + +#define DRM_IOCTL_RKNPU_ACTION \ + DRM_IOWR(DRM_COMMAND_BASE + RKNPU_ACTION, struct rknpu_action) +#define DRM_IOCTL_RKNPU_SUBMIT \ + DRM_IOWR(DRM_COMMAND_BASE + RKNPU_SUBMIT, struct rknpu_submit) +#define DRM_IOCTL_RKNPU_MEM_CREATE \ + DRM_IOWR(DRM_COMMAND_BASE + RKNPU_MEM_CREATE, struct rknpu_mem_create) +#define DRM_IOCTL_RKNPU_MEM_MAP \ + DRM_IOWR(DRM_COMMAND_BASE + RKNPU_MEM_MAP, struct rknpu_mem_map) +#define DRM_IOCTL_RKNPU_MEM_DESTROY \ + DRM_IOWR(DRM_COMMAND_BASE + RKNPU_MEM_DESTROY, struct rknpu_mem_destroy) +#define DRM_IOCTL_RKNPU_MEM_SYNC \ + DRM_IOWR(DRM_COMMAND_BASE + RKNPU_MEM_SYNC, struct rknpu_mem_sync) + +#define IOCTL_RKNPU_ACTION RKNPU_IOWR(RKNPU_ACTION, struct rknpu_action) +#define IOCTL_RKNPU_SUBMIT RKNPU_IOWR(RKNPU_SUBMIT, struct rknpu_submit) +#define IOCTL_RKNPU_MEM_CREATE \ + RKNPU_IOWR(RKNPU_MEM_CREATE, struct rknpu_mem_create) +#define IOCTL_RKNPU_MEM_MAP RKNPU_IOWR(RKNPU_MEM_MAP, struct rknpu_mem_map) +#define IOCTL_RKNPU_MEM_DESTROY \ + RKNPU_IOWR(RKNPU_MEM_DESTROY, struct rknpu_mem_destroy) +#define IOCTL_RKNPU_MEM_SYNC RKNPU_IOWR(RKNPU_MEM_SYNC, struct rknpu_mem_sync) + +#endif diff --git a/drivers/rknpu/include/rknpu_job.h b/drivers/rknpu/include/rknpu_job.h new file mode 100644 index 0000000000000000000000000000000000000000..6ef52d439277e1f8c5797a71172ccb5aeeb02ae8 --- /dev/null +++ b/drivers/rknpu/include/rknpu_job.h @@ -0,0 +1,78 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +/* + * Copyright (C) Rockchip Electronics Co.Ltd + * Author: Felix Zeng <felix.zeng@rock-chips.com> + */ + +#ifndef __LINUX_RKNPU_JOB_H_ +#define __LINUX_RKNPU_JOB_H_ + +#include <linux/spinlock.h> +#include <linux/dma-fence.h> +#include <linux/irq.h> + +#include <drm/drm_device.h> + +#include "rknpu_ioctl.h" + +#define RKNPU_MAX_CORES 3 + +#define RKNPU_JOB_DONE (1 << 0) +#define RKNPU_JOB_ASYNC (1 << 1) +#define RKNPU_JOB_DETACHED (1 << 2) + +#define RKNPU_CORE_AUTO_MASK 0x00 +#define RKNPU_CORE0_MASK 0x01 +#define RKNPU_CORE1_MASK 0x02 +#define RKNPU_CORE2_MASK 0x04 + +struct rknpu_job { + struct rknpu_device *rknpu_dev; + struct list_head head[RKNPU_MAX_CORES]; + struct work_struct cleanup_work; + bool in_queue[RKNPU_MAX_CORES]; + bool irq_entry[RKNPU_MAX_CORES]; + unsigned int flags; + int ret; + struct rknpu_submit *args; + bool args_owner; + struct rknpu_task *first_task; + struct rknpu_task *last_task; + uint32_t int_mask[RKNPU_MAX_CORES]; + uint32_t int_status[RKNPU_MAX_CORES]; + struct dma_fence *fence; + ktime_t timestamp; + uint32_t use_core_num; + uint32_t run_count; + uint32_t interrupt_count; + ktime_t hw_recoder_time; +}; + +irqreturn_t rknpu_core0_irq_handler(int irq, void *data); +irqreturn_t rknpu_core1_irq_handler(int irq, void *data); +irqreturn_t rknpu_core2_irq_handler(int irq, void *data); + +#ifdef CONFIG_ROCKCHIP_RKNPU_DRM_GEM +int rknpu_submit_ioctl(struct drm_device *dev, void *data, + struct drm_file *file_priv); +#endif +#ifdef CONFIG_ROCKCHIP_RKNPU_DMA_HEAP +int rknpu_submit_ioctl(struct rknpu_device *rknpu_dev, unsigned long data); +#endif + +int rknpu_get_hw_version(struct rknpu_device *rknpu_dev, uint32_t *version); + +int rknpu_get_bw_priority(struct rknpu_device *rknpu_dev, uint32_t *priority, + uint32_t *expect, uint32_t *tw); + +int rknpu_set_bw_priority(struct rknpu_device *rknpu_dev, uint32_t priority, + uint32_t expect, uint32_t tw); + +int rknpu_clear_rw_amount(struct rknpu_device *rknpu_dev); + +int rknpu_get_rw_amount(struct rknpu_device *rknpu_dev, uint32_t *dt_wr, + uint32_t *dt_rd, uint32_t *wd_rd); + +int rknpu_get_total_rw_amount(struct rknpu_device *rknpu_dev, uint32_t *amount); + +#endif /* __LINUX_RKNPU_JOB_H_ */ diff --git a/drivers/rknpu/include/rknpu_mem.h b/drivers/rknpu/include/rknpu_mem.h new file mode 100644 index 0000000000000000000000000000000000000000..925535c85f067973828dd065aa394ede2d454934 --- /dev/null +++ b/drivers/rknpu/include/rknpu_mem.h @@ -0,0 +1,43 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +/* + * Copyright (C) Rockchip Electronics Co.Ltd + * Author: Felix Zeng <felix.zeng@rock-chips.com> + */ + +#ifndef __LINUX_RKNPU_MEM_H +#define __LINUX_RKNPU_MEM_H + +#include <linux/mm_types.h> +#include <linux/version.h> + +/* + * rknpu DMA buffer structure. + * + * @flags: indicate memory type to allocated buffer and cache attribute. + * @size: size requested from user, in bytes and this size is aligned + * in page unit. + * @kv_addr: kernel virtual address to allocated memory region. + * @dma_addr: bus address(accessed by dma) to allocated memory region. + * - this address could be physical address without IOMMU and + * device address with IOMMU. + * @pages: Array of backing pages. + * @sgt: Imported sg_table. + * @dmabuf: buffer for this attachment. + * @owner: Is this memory internally allocated. + */ +struct rknpu_mem_object { + unsigned long flags; + unsigned long size; + void __iomem *kv_addr; + dma_addr_t dma_addr; + struct page **pages; + struct sg_table *sgt; + struct dma_buf *dmabuf; + unsigned int owner; +}; + +int rknpu_mem_create_ioctl(struct rknpu_device *rknpu_dev, unsigned long data); +int rknpu_mem_destroy_ioctl(struct rknpu_device *rknpu_dev, unsigned long data); +int rknpu_mem_sync_ioctl(struct rknpu_device *rknpu_dev, unsigned long data); + +#endif diff --git a/drivers/rknpu/include/rknpu_mm.h b/drivers/rknpu/include/rknpu_mm.h new file mode 100644 index 0000000000000000000000000000000000000000..84a8c393f1be326ce643766f8afa4494bd159efc --- /dev/null +++ b/drivers/rknpu/include/rknpu_mm.h @@ -0,0 +1,60 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +/* + * Copyright (C) Rockchip Electronics Co.Ltd + * Author: Felix Zeng <felix.zeng@rock-chips.com> + */ + +#ifndef __LINUX_RKNPU_MM_H +#define __LINUX_RKNPU_MM_H + +#include <linux/mutex.h> +#include <linux/seq_file.h> +#include <linux/iommu.h> +#include <linux/iova.h> + +#include "rknpu_drv.h" + +struct rknpu_mm { + void *bitmap; + struct mutex lock; + unsigned int chunk_size; + unsigned int total_chunks; + unsigned int free_chunks; +}; + +struct rknpu_mm_obj { + uint32_t range_start; + uint32_t range_end; +}; + +int rknpu_mm_create(unsigned int mem_size, unsigned int chunk_size, + struct rknpu_mm **mm); + +void rknpu_mm_destroy(struct rknpu_mm *mm); + +int rknpu_mm_alloc(struct rknpu_mm *mm, unsigned int size, + struct rknpu_mm_obj **mm_obj); + +int rknpu_mm_free(struct rknpu_mm *mm, struct rknpu_mm_obj *mm_obj); + +int rknpu_mm_dump(struct seq_file *m, void *data); + +enum iommu_dma_cookie_type { + IOMMU_DMA_IOVA_COOKIE, + IOMMU_DMA_MSI_COOKIE, +}; + +struct rknpu_iommu_dma_cookie { + enum iommu_dma_cookie_type type; + + /* Full allocator for IOMMU_DMA_IOVA_COOKIE */ + struct iova_domain iovad; +}; + +dma_addr_t rknpu_iommu_dma_alloc_iova(struct iommu_domain *domain, size_t size, + u64 dma_limit, struct device *dev); + +void rknpu_iommu_dma_free_iova(struct rknpu_iommu_dma_cookie *cookie, + dma_addr_t iova, size_t size); + +#endif diff --git a/drivers/rknpu/include/rknpu_reset.h b/drivers/rknpu/include/rknpu_reset.h new file mode 100644 index 0000000000000000000000000000000000000000..b80e29b321b0ef857328bf83827b5c00475ef847 --- /dev/null +++ b/drivers/rknpu/include/rknpu_reset.h @@ -0,0 +1,18 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +/* + * Copyright (C) Rockchip Electronics Co.Ltd + * Author: Felix Zeng <felix.zeng@rock-chips.com> + */ + +#ifndef __LINUX_RKNPU_RESET_H +#define __LINUX_RKNPU_RESET_H + +#include <linux/reset.h> + +#include "rknpu_drv.h" + +int rknpu_reset_get(struct rknpu_device *rknpu_dev); + +int rknpu_soft_reset(struct rknpu_device *rknpu_dev); + +#endif diff --git a/drivers/rknpu/rknpu_debugger.c b/drivers/rknpu/rknpu_debugger.c new file mode 100644 index 0000000000000000000000000000000000000000..0e4b5239bbf5fead554b9ac576f3d2db47f74fca --- /dev/null +++ b/drivers/rknpu/rknpu_debugger.c @@ -0,0 +1,601 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * Copyright (C) Rockchip Electronics Co.Ltd + * Author: Felix Zeng <felix.zeng@rock-chips.com> + */ + +#include <linux/slab.h> +#include <linux/delay.h> +#include <linux/syscalls.h> +#include <linux/debugfs.h> +#include <linux/proc_fs.h> +#include <linux/devfreq.h> +#include <linux/clk.h> +#include <asm/div64.h> + +#ifndef FPGA_PLATFORM +#ifdef CONFIG_PM_DEVFREQ +#include <../drivers/devfreq/governor.h> +#endif +#endif + +#include "rknpu_drv.h" +#include "rknpu_mm.h" +#include "rknpu_reset.h" +#include "rknpu_debugger.h" + +#define RKNPU_DEBUGGER_ROOT_NAME "rknpu" + +#if defined(CONFIG_ROCKCHIP_RKNPU_DEBUG_FS) || \ + defined(CONFIG_ROCKCHIP_RKNPU_PROC_FS) +static int rknpu_version_show(struct seq_file *m, void *data) +{ + seq_printf(m, "%s: v%d.%d.%d\n", DRIVER_DESC, DRIVER_MAJOR, + DRIVER_MINOR, DRIVER_PATCHLEVEL); + + return 0; +} + +static int rknpu_load_show(struct seq_file *m, void *data) +{ + struct rknpu_debugger_node *node = m->private; + struct rknpu_debugger *debugger = node->debugger; + struct rknpu_device *rknpu_dev = + container_of(debugger, struct rknpu_device, debugger); + struct rknpu_subcore_data *subcore_data = NULL; + unsigned long flags; + int i; + int load; + uint64_t busy_time_total, div_value; + + seq_puts(m, "NPU load: "); + for (i = 0; i < rknpu_dev->config->num_irqs; i++) { + subcore_data = &rknpu_dev->subcore_datas[i]; + + if (rknpu_dev->config->num_irqs > 1) + seq_printf(m, " Core%d: ", i); + + spin_lock_irqsave(&rknpu_dev->irq_lock, flags); + + busy_time_total = subcore_data->timer.busy_time_record; + + spin_unlock_irqrestore(&rknpu_dev->irq_lock, flags); + + div_value = (RKNPU_LOAD_INTERVAL / 100000); + do_div(busy_time_total, div_value); + load = busy_time_total; + + if (rknpu_dev->config->num_irqs > 1) + seq_printf(m, "%2.d%%,", load); + else + seq_printf(m, "%2.d%%", load); + } + seq_puts(m, "\n"); + + return 0; +} + +static int rknpu_power_show(struct seq_file *m, void *data) +{ + struct rknpu_debugger_node *node = m->private; + struct rknpu_debugger *debugger = node->debugger; + struct rknpu_device *rknpu_dev = + container_of(debugger, struct rknpu_device, debugger); + + if (atomic_read(&rknpu_dev->power_refcount) > 0) + seq_puts(m, "on\n"); + else + seq_puts(m, "off\n"); + + return 0; +} + +static ssize_t rknpu_power_set(struct file *file, const char __user *ubuf, + size_t len, loff_t *offp) +{ + struct seq_file *priv = file->private_data; + struct rknpu_debugger_node *node = priv->private; + struct rknpu_debugger *debugger = node->debugger; + struct rknpu_device *rknpu_dev = + container_of(debugger, struct rknpu_device, debugger); + char buf[8]; + + if (len > sizeof(buf) - 1) + return -EINVAL; + if (copy_from_user(buf, ubuf, len)) + return -EFAULT; + buf[len - 1] = '\0'; + + if (strcmp(buf, "on") == 0) { + atomic_inc(&rknpu_dev->cmdline_power_refcount); + rknpu_power_get(rknpu_dev); + LOG_INFO("rknpu power is on!"); + } else if (strcmp(buf, "off") == 0) { + if (atomic_read(&rknpu_dev->power_refcount) > 0 && + atomic_dec_if_positive( + &rknpu_dev->cmdline_power_refcount) >= 0) { + atomic_sub( + atomic_read(&rknpu_dev->cmdline_power_refcount), + &rknpu_dev->power_refcount); + atomic_set(&rknpu_dev->cmdline_power_refcount, 0); + rknpu_power_put(rknpu_dev); + } + if (atomic_read(&rknpu_dev->power_refcount) <= 0) + LOG_INFO("rknpu power is off!"); + } else { + LOG_ERROR("rknpu power node params is invalid!"); + } + + return len; +} + +static int rknpu_power_put_delay_show(struct seq_file *m, void *data) +{ + struct rknpu_debugger_node *node = m->private; + struct rknpu_debugger *debugger = node->debugger; + struct rknpu_device *rknpu_dev = + container_of(debugger, struct rknpu_device, debugger); + + seq_printf(m, "%lu\n", rknpu_dev->power_put_delay); + + return 0; +} + +static ssize_t rknpu_power_put_delay_set(struct file *file, + const char __user *ubuf, size_t len, + loff_t *offp) +{ + struct seq_file *priv = file->private_data; + struct rknpu_debugger_node *node = priv->private; + struct rknpu_debugger *debugger = node->debugger; + struct rknpu_device *rknpu_dev = + container_of(debugger, struct rknpu_device, debugger); + char buf[16]; + unsigned long power_put_delay = 0; + int ret = 0; + + if (len > sizeof(buf) - 1) + return -EINVAL; + if (copy_from_user(buf, ubuf, len)) + return -EFAULT; + buf[len - 1] = '\0'; + + ret = kstrtoul(buf, 10, &power_put_delay); + if (ret) { + LOG_ERROR("failed to parse power put delay string: %s\n", buf); + return -EFAULT; + } + + rknpu_dev->power_put_delay = power_put_delay; + + LOG_INFO("set rknpu power put delay time %lums\n", + rknpu_dev->power_put_delay); + + return len; +} + +static int rknpu_freq_show(struct seq_file *m, void *data) +{ + struct rknpu_debugger_node *node = m->private; + struct rknpu_debugger *debugger = node->debugger; + struct rknpu_device *rknpu_dev = + container_of(debugger, struct rknpu_device, debugger); + unsigned long current_freq = 0; + + rknpu_power_get(rknpu_dev); + + current_freq = clk_get_rate(rknpu_dev->clks[0].clk); + + rknpu_power_put(rknpu_dev); + + seq_printf(m, "%lu\n", current_freq); + + return 0; +} + +#ifdef CONFIG_PM_DEVFREQ +static ssize_t rknpu_freq_set(struct file *file, const char __user *ubuf, + size_t len, loff_t *offp) +{ + struct seq_file *priv = file->private_data; + struct rknpu_debugger_node *node = priv->private; + struct rknpu_debugger *debugger = node->debugger; + struct rknpu_device *rknpu_dev = + container_of(debugger, struct rknpu_device, debugger); + unsigned long current_freq = 0; + char buf[16]; + unsigned long freq = 0; + int ret = 0; + + if (len > sizeof(buf) - 1) + return -EINVAL; + if (copy_from_user(buf, ubuf, len)) + return -EFAULT; + buf[len - 1] = '\0'; + + ret = kstrtoul(buf, 10, &freq); + if (ret) { + LOG_ERROR("failed to parse freq string: %s\n", buf); + return -EFAULT; + } + + if (!rknpu_dev->devfreq) + return -EFAULT; + + rknpu_power_get(rknpu_dev); + + current_freq = clk_get_rate(rknpu_dev->clks[0].clk); + if (freq != current_freq) { + rknpu_dev->ondemand_freq = freq; + mutex_lock(&rknpu_dev->devfreq->lock); + update_devfreq(rknpu_dev->devfreq); + mutex_unlock(&rknpu_dev->devfreq->lock); + } + + rknpu_power_put(rknpu_dev); + + return len; +} +#else +static ssize_t rknpu_freq_set(struct file *file, const char __user *ubuf, + size_t len, loff_t *offp) +{ + return -EFAULT; +} +#endif + +static int rknpu_volt_show(struct seq_file *m, void *data) +{ + struct rknpu_debugger_node *node = m->private; + struct rknpu_debugger *debugger = node->debugger; + struct rknpu_device *rknpu_dev = + container_of(debugger, struct rknpu_device, debugger); + unsigned long current_volt = 0; + + current_volt = regulator_get_voltage(rknpu_dev->vdd); + + seq_printf(m, "%lu\n", current_volt); + + return 0; +} + +static int rknpu_reset_show(struct seq_file *m, void *data) +{ + struct rknpu_debugger_node *node = m->private; + struct rknpu_debugger *debugger = node->debugger; + struct rknpu_device *rknpu_dev = + container_of(debugger, struct rknpu_device, debugger); + + if (!rknpu_dev->bypass_soft_reset) + seq_puts(m, "on\n"); + else + seq_puts(m, "off\n"); + + return 0; +} + +static ssize_t rknpu_reset_set(struct file *file, const char __user *ubuf, + size_t len, loff_t *offp) +{ + struct seq_file *priv = file->private_data; + struct rknpu_debugger_node *node = priv->private; + struct rknpu_debugger *debugger = node->debugger; + struct rknpu_device *rknpu_dev = + container_of(debugger, struct rknpu_device, debugger); + char buf[8]; + + if (len > sizeof(buf) - 1) + return -EINVAL; + if (copy_from_user(buf, ubuf, len)) + return -EFAULT; + buf[len - 1] = '\0'; + + if (strcmp(buf, "1") == 0 && + atomic_read(&rknpu_dev->power_refcount) > 0) + rknpu_soft_reset(rknpu_dev); + else if (strcmp(buf, "on") == 0) + rknpu_dev->bypass_soft_reset = 0; + else if (strcmp(buf, "off") == 0) + rknpu_dev->bypass_soft_reset = 1; + + return len; +} + +static struct rknpu_debugger_list rknpu_debugger_root_list[] = { + { "version", rknpu_version_show, NULL, NULL }, + { "load", rknpu_load_show, NULL, NULL }, + { "power", rknpu_power_show, rknpu_power_set, NULL }, + { "freq", rknpu_freq_show, rknpu_freq_set, NULL }, + { "volt", rknpu_volt_show, NULL, NULL }, + { "delayms", rknpu_power_put_delay_show, rknpu_power_put_delay_set, + NULL }, + { "reset", rknpu_reset_show, rknpu_reset_set, NULL }, +#ifdef CONFIG_ROCKCHIP_RKNPU_SRAM + { "mm", rknpu_mm_dump, NULL, NULL }, +#endif +}; + +static ssize_t rknpu_debugger_write(struct file *file, const char __user *ubuf, + size_t len, loff_t *offp) +{ + struct seq_file *priv = file->private_data; + struct rknpu_debugger_node *node = priv->private; + + if (node->info_ent->write) + return node->info_ent->write(file, ubuf, len, offp); + else + return len; +} + +static int rknpu_debugfs_open(struct inode *inode, struct file *file) +{ + struct rknpu_debugger_node *node = inode->i_private; + + return single_open(file, node->info_ent->show, node); +} + +static const struct file_operations rknpu_debugfs_fops = { + .owner = THIS_MODULE, + .open = rknpu_debugfs_open, + .read = seq_read, + .llseek = seq_lseek, + .release = single_release, + .write = rknpu_debugger_write, +}; +#endif /* #if defined(CONFIG_ROCKCHIP_RKNPU_DEBUG_FS) || defined(CONFIG_ROCKCHIP_RKNPU_PROC_FS) */ + +#ifdef CONFIG_ROCKCHIP_RKNPU_DEBUG_FS +static int rknpu_debugfs_remove_files(struct rknpu_debugger *debugger) +{ + struct rknpu_debugger_node *pos, *q; + struct list_head *entry_list; + + mutex_lock(&debugger->debugfs_lock); + + /* Delete debugfs entry list */ + entry_list = &debugger->debugfs_entry_list; + list_for_each_entry_safe(pos, q, entry_list, list) { + if (pos->dent == NULL) + continue; + list_del(&pos->list); + kfree(pos); + pos = NULL; + } + + /* Delete all debugfs node in this directory */ + debugfs_remove_recursive(debugger->debugfs_dir); + debugger->debugfs_dir = NULL; + + mutex_unlock(&debugger->debugfs_lock); + + return 0; +} + +static int rknpu_debugfs_create_files(const struct rknpu_debugger_list *files, + int count, struct dentry *root, + struct rknpu_debugger *debugger) +{ + int i; + struct dentry *ent; + struct rknpu_debugger_node *tmp; + + for (i = 0; i < count; i++) { + tmp = kmalloc(sizeof(struct rknpu_debugger_node), GFP_KERNEL); + if (tmp == NULL) { + LOG_ERROR( + "Cannot alloc node path /sys/kernel/debug/%pd/%s\n", + root, files[i].name); + goto MALLOC_FAIL; + } + + tmp->info_ent = &files[i]; + tmp->debugger = debugger; + + ent = debugfs_create_file(files[i].name, S_IFREG | S_IRUGO, + root, tmp, &rknpu_debugfs_fops); + if (!ent) { + LOG_ERROR("Cannot create /sys/kernel/debug/%pd/%s\n", + root, files[i].name); + goto CREATE_FAIL; + } + + tmp->dent = ent; + + mutex_lock(&debugger->debugfs_lock); + list_add_tail(&tmp->list, &debugger->debugfs_entry_list); + mutex_unlock(&debugger->debugfs_lock); + } + + return 0; + +CREATE_FAIL: + kfree(tmp); +MALLOC_FAIL: + rknpu_debugfs_remove_files(debugger); + + return -1; +} + +static int rknpu_debugfs_remove(struct rknpu_debugger *debugger) +{ + rknpu_debugfs_remove_files(debugger); + + return 0; +} + +static int rknpu_debugfs_init(struct rknpu_debugger *debugger) +{ + int ret; + + debugger->debugfs_dir = + debugfs_create_dir(RKNPU_DEBUGGER_ROOT_NAME, NULL); + if (IS_ERR_OR_NULL(debugger->debugfs_dir)) { + LOG_ERROR("failed on mkdir /sys/kernel/debug/%s\n", + RKNPU_DEBUGGER_ROOT_NAME); + debugger->debugfs_dir = NULL; + return -EIO; + } + + ret = rknpu_debugfs_create_files(rknpu_debugger_root_list, + ARRAY_SIZE(rknpu_debugger_root_list), + debugger->debugfs_dir, debugger); + if (ret) { + LOG_ERROR( + "Could not install rknpu_debugger_root_list debugfs\n"); + goto CREATE_FAIL; + } + + return 0; + +CREATE_FAIL: + rknpu_debugfs_remove(debugger); + + return ret; +} +#endif /* #ifdef CONFIG_ROCKCHIP_RKNPU_DEBUG_FS */ + +#ifdef CONFIG_ROCKCHIP_RKNPU_PROC_FS +static int rknpu_procfs_open(struct inode *inode, struct file *file) +{ + struct rknpu_debugger_node *node = pde_data(inode); + + return single_open(file, node->info_ent->show, node); +} + +static const struct proc_ops rknpu_procfs_fops = { + .proc_open = rknpu_procfs_open, + .proc_read = seq_read, + .proc_lseek = seq_lseek, + .proc_release = single_release, + .proc_write = rknpu_debugger_write, +}; + +static int rknpu_procfs_remove_files(struct rknpu_debugger *debugger) +{ + struct rknpu_debugger_node *pos, *q; + struct list_head *entry_list; + + mutex_lock(&debugger->procfs_lock); + + /* Delete procfs entry list */ + entry_list = &debugger->procfs_entry_list; + list_for_each_entry_safe(pos, q, entry_list, list) { + if (pos->pent == NULL) + continue; + list_del(&pos->list); + kfree(pos); + pos = NULL; + } + + /* Delete all procfs node in this directory */ + proc_remove(debugger->procfs_dir); + debugger->procfs_dir = NULL; + + mutex_unlock(&debugger->procfs_lock); + + return 0; +} + +static int rknpu_procfs_create_files(const struct rknpu_debugger_list *files, + int count, struct proc_dir_entry *root, + struct rknpu_debugger *debugger) +{ + int i; + struct proc_dir_entry *ent; + struct rknpu_debugger_node *tmp; + + for (i = 0; i < count; i++) { + tmp = kmalloc(sizeof(struct rknpu_debugger_node), GFP_KERNEL); + if (tmp == NULL) { + LOG_ERROR("Cannot alloc node path for /proc/%s/%s\n", + RKNPU_DEBUGGER_ROOT_NAME, files[i].name); + goto MALLOC_FAIL; + } + + tmp->info_ent = &files[i]; + tmp->debugger = debugger; + + ent = proc_create_data(files[i].name, S_IFREG | S_IRUGO, root, + &rknpu_procfs_fops, tmp); + if (!ent) { + LOG_ERROR("Cannot create /proc/%s/%s\n", + RKNPU_DEBUGGER_ROOT_NAME, files[i].name); + goto CREATE_FAIL; + } + + tmp->pent = ent; + + mutex_lock(&debugger->procfs_lock); + list_add_tail(&tmp->list, &debugger->procfs_entry_list); + mutex_unlock(&debugger->procfs_lock); + } + + return 0; + +CREATE_FAIL: + kfree(tmp); +MALLOC_FAIL: + rknpu_procfs_remove_files(debugger); + return -1; +} + +static int rknpu_procfs_remove(struct rknpu_debugger *debugger) +{ + rknpu_procfs_remove_files(debugger); + + return 0; +} + +static int rknpu_procfs_init(struct rknpu_debugger *debugger) +{ + int ret; + + debugger->procfs_dir = proc_mkdir(RKNPU_DEBUGGER_ROOT_NAME, NULL); + if (IS_ERR_OR_NULL(debugger->procfs_dir)) { + pr_err("failed on mkdir /proc/%s\n", RKNPU_DEBUGGER_ROOT_NAME); + debugger->procfs_dir = NULL; + return -EIO; + } + + ret = rknpu_procfs_create_files(rknpu_debugger_root_list, + ARRAY_SIZE(rknpu_debugger_root_list), + debugger->procfs_dir, debugger); + if (ret) { + pr_err("Could not install rknpu_debugger_root_list procfs\n"); + goto CREATE_FAIL; + } + + return 0; + +CREATE_FAIL: + rknpu_procfs_remove(debugger); + + return ret; +} +#endif /* #ifdef CONFIG_ROCKCHIP_RKNPU_PROC_FS */ + +int rknpu_debugger_init(struct rknpu_device *rknpu_dev) +{ +#ifdef CONFIG_ROCKCHIP_RKNPU_DEBUG_FS + mutex_init(&rknpu_dev->debugger.debugfs_lock); + INIT_LIST_HEAD(&rknpu_dev->debugger.debugfs_entry_list); + rknpu_debugfs_init(&rknpu_dev->debugger); +#endif +#ifdef CONFIG_ROCKCHIP_RKNPU_PROC_FS + mutex_init(&rknpu_dev->debugger.procfs_lock); + INIT_LIST_HEAD(&rknpu_dev->debugger.procfs_entry_list); + rknpu_procfs_init(&rknpu_dev->debugger); +#endif + return 0; +} + +int rknpu_debugger_remove(struct rknpu_device *rknpu_dev) +{ +#ifdef CONFIG_ROCKCHIP_RKNPU_DEBUG_FS + rknpu_debugfs_remove(&rknpu_dev->debugger); +#endif +#ifdef CONFIG_ROCKCHIP_RKNPU_PROC_FS + rknpu_procfs_remove(&rknpu_dev->debugger); +#endif + return 0; +} diff --git a/drivers/rknpu/rknpu_drv.c b/drivers/rknpu/rknpu_drv.c new file mode 100644 index 0000000000000000000000000000000000000000..7690c5a5804892aa3a918bab75dc9ea19a22e479 --- /dev/null +++ b/drivers/rknpu/rknpu_drv.c @@ -0,0 +1,1483 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * Copyright (C) Rockchip Electronics Co.Ltd + * Author: Felix Zeng <felix.zeng@rock-chips.com> + */ + +#include <linux/dma-buf.h> +#include <linux/dma-mapping.h> +#include <linux/fs.h> +#include <linux/interrupt.h> +#include <linux/irq.h> +#include <linux/irqdomain.h> +#include <linux/iopoll.h> +#include <linux/module.h> +#include <linux/of.h> +#include <linux/of_device.h> +#include <linux/of_irq.h> +#include <linux/of_platform.h> +#include <linux/of_reserved_mem.h> +#include <linux/platform_device.h> +#include <linux/printk.h> +#include <linux/slab.h> +#include <linux/spinlock.h> +#include <linux/time.h> +#include <linux/uaccess.h> +#include <linux/ktime.h> +#include <linux/delay.h> +#include <linux/wait.h> +#include <linux/sched.h> +#include <linux/clk.h> +#include <linux/clk-provider.h> +#include <linux/pm_domain.h> +#include <linux/pm_runtime.h> +#include <linux/devfreq_cooling.h> +#include <linux/regmap.h> +#include <linux/of_address.h> + +#ifndef FPGA_PLATFORM +#include <soc/rockchip/rockchip_opp_select.h> +#ifdef CONFIG_PM_DEVFREQ +#include <../drivers/devfreq/governor.h> +#endif +#endif + +#include "rknpu_ioctl.h" +#include "rknpu_reset.h" +#include "rknpu_fence.h" +#include "rknpu_drv.h" +#include "rknpu_gem.h" + +#ifdef CONFIG_ROCKCHIP_RKNPU_DRM_GEM +#include <drm/drm_device.h> +#include <drm/drm_ioctl.h> +#include <drm/drm_file.h> +#include <drm/drm_drv.h> +#include "rknpu_gem.h" +#endif + +#ifdef CONFIG_ROCKCHIP_RKNPU_DMA_HEAP +#include <linux/rk-dma-heap.h> +#include "rknpu_mem.h" +#endif + +#define POWER_DOWN_FREQ 200000000 +#define NPU_MMU_DISABLED_POLL_PERIOD_US 1000 +#define NPU_MMU_DISABLED_POLL_TIMEOUT_US 20000 + +static int bypass_irq_handler; +module_param(bypass_irq_handler, int, 0644); +MODULE_PARM_DESC(bypass_irq_handler, + "bypass RKNPU irq handler if set it to 1, disabled by default"); + +static int bypass_soft_reset; +module_param(bypass_soft_reset, int, 0644); +MODULE_PARM_DESC(bypass_soft_reset, + "bypass RKNPU soft reset if set it to 1, disabled by default"); + +struct rknpu_irqs_data { + const char *name; + irqreturn_t (*irq_hdl)(int irq, void *ctx); +}; + +static const struct rknpu_irqs_data rknpu_irqs[] = { + { "npu_irq", rknpu_core0_irq_handler } +}; + +static const struct rknpu_irqs_data rk3588_npu_irqs[] = { + { "npu0_irq", rknpu_core0_irq_handler }, + { "npu1_irq", rknpu_core1_irq_handler }, + { "npu2_irq", rknpu_core2_irq_handler } +}; + +static const struct rknpu_irqs_data rv110x_npu_irqs[] = { + { "npu_irq", rknpu_core0_irq_handler } +}; + +static const struct rknpu_reset_data rknpu_resets[] = { + { "srst_a", "srst_h" } +}; + +static const struct rknpu_reset_data rk3588_npu_resets[] = { + { "srst_a0", "srst_h0" }, + { "srst_a1", "srst_h1" }, + { "srst_a2", "srst_h2" } +}; + +static const struct rknpu_reset_data rv110x_npu_resets[] = { + { "srst_a", "srst_h" } +}; + +static const struct rknpu_config rk356x_rknpu_config = { + .bw_priority_addr = 0xfe180008, + .bw_priority_length = 0x10, + .dma_mask = DMA_BIT_MASK(32), + .pc_data_amount_scale = 1, + .pc_task_number_bits = 12, + .pc_task_number_mask = 0xfff, + .pc_task_status_offset = 0x3c, + .bw_enable = 1, + .irqs = rknpu_irqs, + .resets = rknpu_resets, + .num_irqs = ARRAY_SIZE(rknpu_irqs), + .num_resets = ARRAY_SIZE(rknpu_resets) +}; + +static const struct rknpu_config rk3588_rknpu_config = { + .bw_priority_addr = 0x0, + .bw_priority_length = 0x0, + .dma_mask = DMA_BIT_MASK(40), + .pc_data_amount_scale = 2, + .pc_task_number_bits = 12, + .pc_task_number_mask = 0xfff, + .pc_task_status_offset = 0x3c, + .bw_enable = 0, + .irqs = rk3588_npu_irqs, + .resets = rk3588_npu_resets, + .num_irqs = ARRAY_SIZE(rk3588_npu_irqs), + .num_resets = ARRAY_SIZE(rk3588_npu_resets) +}; + +static const struct rknpu_config rv1106_rknpu_config = { + .bw_priority_addr = 0x0, + .bw_priority_length = 0x0, + .dma_mask = DMA_BIT_MASK(32), + .pc_data_amount_scale = 2, + .pc_task_number_bits = 16, + .pc_task_number_mask = 0xffff, + .pc_task_status_offset = 0x3c, + .bw_enable = 1, + .irqs = rv110x_npu_irqs, + .resets = rv110x_npu_resets, + .num_irqs = ARRAY_SIZE(rv110x_npu_irqs), + .num_resets = ARRAY_SIZE(rv110x_npu_resets) +}; + +static const struct rknpu_config rk3562_rknpu_config = { + .bw_priority_addr = 0x0, + .bw_priority_length = 0x0, + .dma_mask = DMA_BIT_MASK(40), + .pc_data_amount_scale = 2, + .pc_task_number_bits = 16, + .pc_task_number_mask = 0xffff, + .pc_task_status_offset = 0x48, + .bw_enable = 1, + .irqs = rknpu_irqs, + .resets = rknpu_resets, + .num_irqs = ARRAY_SIZE(rknpu_irqs), + .num_resets = ARRAY_SIZE(rknpu_resets) +}; + +/* driver probe and init */ +static const struct of_device_id rknpu_of_match[] = { + { + .compatible = "rockchip,rknpu", + .data = &rk356x_rknpu_config, + }, + { + .compatible = "rockchip,rk3568-rknpu", + .data = &rk356x_rknpu_config, + }, + { + .compatible = "rockchip,rk3588-rknpu", + .data = &rk3588_rknpu_config, + }, + { + .compatible = "rockchip,rv1106-rknpu", + .data = &rv1106_rknpu_config, + }, + { + .compatible = "rockchip,rk3562-rknpu", + .data = &rk3562_rknpu_config, + }, + {}, +}; + +static int rknpu_get_drv_version(uint32_t *version) +{ + *version = RKNPU_GET_DRV_VERSION_CODE(DRIVER_MAJOR, DRIVER_MINOR, + DRIVER_PATCHLEVEL); + return 0; +} + +static int rknpu_power_on(struct rknpu_device *rknpu_dev); +static int rknpu_power_off(struct rknpu_device *rknpu_dev); + +static void rknpu_power_off_delay_work(struct work_struct *power_off_work) +{ + struct rknpu_device *rknpu_dev = + container_of(to_delayed_work(power_off_work), + struct rknpu_device, power_off_work); + mutex_lock(&rknpu_dev->power_lock); + if (atomic_dec_if_positive(&rknpu_dev->power_refcount) == 0) + rknpu_power_off(rknpu_dev); + mutex_unlock(&rknpu_dev->power_lock); +} + +int rknpu_power_get(struct rknpu_device *rknpu_dev) +{ + int ret = 0; + + cancel_delayed_work(&rknpu_dev->power_off_work); + mutex_lock(&rknpu_dev->power_lock); + if (atomic_inc_return(&rknpu_dev->power_refcount) == 1) + ret = rknpu_power_on(rknpu_dev); + mutex_unlock(&rknpu_dev->power_lock); + + return ret; +} + +int rknpu_power_put(struct rknpu_device *rknpu_dev) +{ + int ret = 0; + + mutex_lock(&rknpu_dev->power_lock); + if (atomic_dec_if_positive(&rknpu_dev->power_refcount) == 0) + ret = rknpu_power_off(rknpu_dev); + mutex_unlock(&rknpu_dev->power_lock); + + return ret; +} + +static int rknpu_power_put_delay(struct rknpu_device *rknpu_dev) +{ + mutex_lock(&rknpu_dev->power_lock); + if (atomic_read(&rknpu_dev->power_refcount) == 1) + queue_delayed_work( + rknpu_dev->power_off_wq, &rknpu_dev->power_off_work, + msecs_to_jiffies(rknpu_dev->power_put_delay)); + else + atomic_dec_if_positive(&rknpu_dev->power_refcount); + mutex_unlock(&rknpu_dev->power_lock); + return 0; +} + +static int rknpu_action(struct rknpu_device *rknpu_dev, + struct rknpu_action *args) +{ + int ret = -EINVAL; + + switch (args->flags) { + case RKNPU_GET_HW_VERSION: + ret = rknpu_get_hw_version(rknpu_dev, &args->value); + break; + case RKNPU_GET_DRV_VERSION: + ret = rknpu_get_drv_version(&args->value); + break; + case RKNPU_GET_FREQ: +#ifndef FPGA_PLATFORM + args->value = clk_get_rate(rknpu_dev->clks[0].clk); +#endif + ret = 0; + break; + case RKNPU_SET_FREQ: + break; + case RKNPU_GET_VOLT: +#ifndef FPGA_PLATFORM + args->value = regulator_get_voltage(rknpu_dev->vdd); +#endif + ret = 0; + break; + case RKNPU_SET_VOLT: + break; + case RKNPU_ACT_RESET: + ret = rknpu_soft_reset(rknpu_dev); + break; + case RKNPU_GET_BW_PRIORITY: + ret = rknpu_get_bw_priority(rknpu_dev, &args->value, NULL, + NULL); + break; + case RKNPU_SET_BW_PRIORITY: + ret = rknpu_set_bw_priority(rknpu_dev, args->value, 0, 0); + break; + case RKNPU_GET_BW_EXPECT: + ret = rknpu_get_bw_priority(rknpu_dev, NULL, &args->value, + NULL); + break; + case RKNPU_SET_BW_EXPECT: + ret = rknpu_set_bw_priority(rknpu_dev, 0, args->value, 0); + break; + case RKNPU_GET_BW_TW: + ret = rknpu_get_bw_priority(rknpu_dev, NULL, NULL, + &args->value); + break; + case RKNPU_SET_BW_TW: + ret = rknpu_set_bw_priority(rknpu_dev, 0, 0, args->value); + break; + case RKNPU_ACT_CLR_TOTAL_RW_AMOUNT: + ret = rknpu_clear_rw_amount(rknpu_dev); + break; + case RKNPU_GET_DT_WR_AMOUNT: + ret = rknpu_get_rw_amount(rknpu_dev, &args->value, NULL, NULL); + break; + case RKNPU_GET_DT_RD_AMOUNT: + ret = rknpu_get_rw_amount(rknpu_dev, NULL, &args->value, NULL); + break; + case RKNPU_GET_WT_RD_AMOUNT: + ret = rknpu_get_rw_amount(rknpu_dev, NULL, NULL, &args->value); + break; + case RKNPU_GET_TOTAL_RW_AMOUNT: + ret = rknpu_get_total_rw_amount(rknpu_dev, &args->value); + break; + case RKNPU_GET_IOMMU_EN: + args->value = rknpu_dev->iommu_en; + ret = 0; + break; + case RKNPU_SET_PROC_NICE: + set_user_nice(current, *(int32_t *)&args->value); + ret = 0; + break; + case RKNPU_GET_TOTAL_SRAM_SIZE: + if (rknpu_dev->sram_mm) + args->value = rknpu_dev->sram_mm->total_chunks * + rknpu_dev->sram_mm->chunk_size; + else + args->value = 0; + ret = 0; + break; + case RKNPU_GET_FREE_SRAM_SIZE: + if (rknpu_dev->sram_mm) + args->value = rknpu_dev->sram_mm->free_chunks * + rknpu_dev->sram_mm->chunk_size; + else + args->value = 0; + ret = 0; + break; + default: + ret = -EINVAL; + break; + } + + return ret; +} + +#ifdef CONFIG_ROCKCHIP_RKNPU_DMA_HEAP +static int rknpu_open(struct inode *inode, struct file *file) +{ + return nonseekable_open(inode, file); +} + +static int rknpu_release(struct inode *inode, struct file *file) +{ + return 0; +} + +static int rknpu_action_ioctl(struct rknpu_device *rknpu_dev, + unsigned long data) +{ + struct rknpu_action args; + int ret = -EINVAL; + + if (unlikely(copy_from_user(&args, (struct rknpu_action *)data, + sizeof(struct rknpu_action)))) { + LOG_ERROR("%s: copy_from_user failed\n", __func__); + ret = -EFAULT; + return ret; + } + + ret = rknpu_action(rknpu_dev, &args); + + if (unlikely(copy_to_user((struct rknpu_action *)data, &args, + sizeof(struct rknpu_action)))) { + LOG_ERROR("%s: copy_to_user failed\n", __func__); + ret = -EFAULT; + return ret; + } + + return ret; +} + +static long rknpu_ioctl(struct file *file, uint32_t cmd, unsigned long arg) +{ + long ret = -EINVAL; + struct rknpu_device *rknpu_dev = + container_of(file->private_data, struct rknpu_device, miscdev); + + rknpu_power_get(rknpu_dev); + + switch (cmd) { + case IOCTL_RKNPU_ACTION: + ret = rknpu_action_ioctl(rknpu_dev, arg); + break; + case IOCTL_RKNPU_SUBMIT: + ret = rknpu_submit_ioctl(rknpu_dev, arg); + break; + case IOCTL_RKNPU_MEM_CREATE: + ret = rknpu_mem_create_ioctl(rknpu_dev, arg); + break; + case RKNPU_MEM_MAP: + break; + case IOCTL_RKNPU_MEM_DESTROY: + ret = rknpu_mem_destroy_ioctl(rknpu_dev, arg); + break; + case IOCTL_RKNPU_MEM_SYNC: + ret = rknpu_mem_sync_ioctl(rknpu_dev, arg); + break; + default: + break; + } + + rknpu_power_put_delay(rknpu_dev); + + return ret; +} +const struct file_operations rknpu_fops = { + .owner = THIS_MODULE, + .open = rknpu_open, + .release = rknpu_release, + .unlocked_ioctl = rknpu_ioctl, +#ifdef CONFIG_COMPAT + .compat_ioctl = rknpu_ioctl, +#endif +}; +#endif + +#ifdef CONFIG_ROCKCHIP_RKNPU_DRM_GEM +static const struct vm_operations_struct rknpu_gem_vm_ops = { + .fault = rknpu_gem_fault, + .open = drm_gem_vm_open, + .close = drm_gem_vm_close, +}; + +static int rknpu_action_ioctl(struct drm_device *dev, void *data, + struct drm_file *file_priv) +{ + struct rknpu_device *rknpu_dev = dev_get_drvdata(dev->dev); + + return rknpu_action(rknpu_dev, (struct rknpu_action *)data); +} + +#define RKNPU_IOCTL(func) \ + static int __##func(struct drm_device *dev, void *data, \ + struct drm_file *file_priv) \ + { \ + struct rknpu_device *rknpu_dev = dev_get_drvdata(dev->dev); \ + int ret = -EINVAL; \ + rknpu_power_get(rknpu_dev); \ + ret = func(dev, data, file_priv); \ + rknpu_power_put_delay(rknpu_dev); \ + return ret; \ + } + +RKNPU_IOCTL(rknpu_action_ioctl); +RKNPU_IOCTL(rknpu_submit_ioctl); +RKNPU_IOCTL(rknpu_gem_create_ioctl); +RKNPU_IOCTL(rknpu_gem_map_ioctl); +RKNPU_IOCTL(rknpu_gem_destroy_ioctl); +RKNPU_IOCTL(rknpu_gem_sync_ioctl); + +static const struct drm_ioctl_desc rknpu_ioctls[] = { + DRM_IOCTL_DEF_DRV(RKNPU_ACTION, __rknpu_action_ioctl, DRM_RENDER_ALLOW), + DRM_IOCTL_DEF_DRV(RKNPU_SUBMIT, __rknpu_submit_ioctl, DRM_RENDER_ALLOW), + DRM_IOCTL_DEF_DRV(RKNPU_MEM_CREATE, __rknpu_gem_create_ioctl, + DRM_RENDER_ALLOW), + DRM_IOCTL_DEF_DRV(RKNPU_MEM_MAP, __rknpu_gem_map_ioctl, + DRM_RENDER_ALLOW), + DRM_IOCTL_DEF_DRV(RKNPU_MEM_DESTROY, __rknpu_gem_destroy_ioctl, + DRM_RENDER_ALLOW), + DRM_IOCTL_DEF_DRV(RKNPU_MEM_SYNC, __rknpu_gem_sync_ioctl, + DRM_RENDER_ALLOW), +}; + +static const struct file_operations rknpu_drm_driver_fops = { + .owner = THIS_MODULE, + .open = drm_open, + .mmap = rknpu_gem_mmap, + .poll = drm_poll, + .read = drm_read, + .unlocked_ioctl = drm_ioctl, +#ifdef CONFIG_COMPAT + .compat_ioctl = drm_compat_ioctl, +#endif + .release = drm_release, + .llseek = noop_llseek, +}; + +static struct drm_driver rknpu_drm_driver = { +// #if KERNEL_VERSION(5, 4, 0) <= LINUX_VERSION_CODE +// .driver_features = DRIVER_GEM | DRIVER_RENDER, +// #else + // .driver_features = DRIVER_GEM | DRIVER_PRIME | DRIVER_RENDER, + .driver_features = DRIVER_GEM | DRIVER_RENDER, +// #endif + // .gem_free_object_unlocked = rknpu_gem_free_object, + // .gem_vm_ops = &rknpu_gem_vm_ops, + // .dumb_create = rknpu_gem_dumb_create, +// #if KERNEL_VERSION(4, 19, 0) > LINUX_VERSION_CODE +// .dumb_map_offset = rknpu_gem_dumb_map_offset, +// #else + .dumb_map_offset = drm_gem_dumb_map_offset, +// #endif + // .dumb_destroy = drm_gem_dumb_destroy, + .prime_handle_to_fd = drm_gem_prime_handle_to_fd, + .prime_fd_to_handle = drm_gem_prime_fd_to_handle, + // .gem_prime_export = drm_gem_prime_export, +// #if KERNEL_VERSION(4, 13, 0) <= LINUX_VERSION_CODE +// .gem_prime_import = rknpu_gem_prime_import, +// #else + .gem_prime_import = drm_gem_prime_import, +// #endif + // .gem_prime_get_sg_table = rknpu_gem_prime_get_sg_table, + .gem_prime_import_sg_table = rknpu_gem_prime_import_sg_table, + // .gem_prime_vmap = rknpu_gem_prime_vmap, + // .gem_prime_vunmap = rknpu_gem_prime_vunmap, + .gem_prime_mmap = rknpu_gem_prime_mmap, + .ioctls = rknpu_ioctls, + .num_ioctls = ARRAY_SIZE(rknpu_ioctls), + .fops = &rknpu_drm_driver_fops, + .name = DRIVER_NAME, + .desc = DRIVER_DESC, + .date = DRIVER_DATE, + .major = DRIVER_MAJOR, + .minor = DRIVER_MINOR, + .patchlevel = DRIVER_PATCHLEVEL, +}; + +#endif + +static enum hrtimer_restart hrtimer_handler(struct hrtimer *timer) +{ + struct rknpu_device *rknpu_dev = + container_of(timer, struct rknpu_device, timer); + struct rknpu_subcore_data *subcore_data = NULL; + struct rknpu_job *job = NULL; + ktime_t now = ktime_get(); + unsigned long flags; + int i; + + for (i = 0; i < rknpu_dev->config->num_irqs; i++) { + subcore_data = &rknpu_dev->subcore_datas[i]; + + spin_lock_irqsave(&rknpu_dev->irq_lock, flags); + + job = subcore_data->job; + if (job) { + subcore_data->timer.busy_time += + ktime_us_delta(now, job->hw_recoder_time); + job->hw_recoder_time = ktime_get(); + } + + subcore_data->timer.busy_time_record = + subcore_data->timer.busy_time; + subcore_data->timer.busy_time = 0; + spin_unlock_irqrestore(&rknpu_dev->irq_lock, flags); + } + + hrtimer_forward_now(timer, rknpu_dev->kt); + return HRTIMER_RESTART; +} + +static void rknpu_init_timer(struct rknpu_device *rknpu_dev) +{ + rknpu_dev->kt = ktime_set(0, RKNPU_LOAD_INTERVAL); + hrtimer_init(&rknpu_dev->timer, CLOCK_MONOTONIC, HRTIMER_MODE_REL); + rknpu_dev->timer.function = hrtimer_handler; + hrtimer_start(&rknpu_dev->timer, rknpu_dev->kt, HRTIMER_MODE_REL); +} + +static void rknpu_cancel_timer(struct rknpu_device *rknpu_dev) +{ + hrtimer_cancel(&rknpu_dev->timer); +} + +static bool rknpu_is_iommu_enable(struct device *dev) +{ + struct device_node *iommu = NULL; + + iommu = of_parse_phandle(dev->of_node, "iommus", 0); + if (!iommu) { + LOG_DEV_INFO( + dev, + "rknpu iommu device-tree entry not found!, using non-iommu mode\n"); + return false; + } + + if (!of_device_is_available(iommu)) { + LOG_DEV_INFO(dev, + "rknpu iommu is disabled, using non-iommu mode\n"); + of_node_put(iommu); + return false; + } + of_node_put(iommu); + + LOG_DEV_INFO(dev, "rknpu iommu is enabled, using iommu mode\n"); + + return true; +} + +#ifdef CONFIG_ROCKCHIP_RKNPU_DRM_GEM +static int rknpu_drm_probe(struct rknpu_device *rknpu_dev) +{ + struct device *dev = rknpu_dev->dev; + struct drm_device *drm_dev = NULL; + int ret = -EINVAL; + + drm_dev = drm_dev_alloc(&rknpu_drm_driver, dev); + if (IS_ERR(drm_dev)) + return PTR_ERR(drm_dev); + + /* register the DRM device */ + ret = drm_dev_register(drm_dev, 0); + if (ret < 0) + goto err_free_drm; + + drm_dev->dev_private = rknpu_dev; + rknpu_dev->drm_dev = drm_dev; + + return 0; + +err_free_drm: +#if KERNEL_VERSION(4, 15, 0) <= LINUX_VERSION_CODE + drm_dev_put(drm_dev); +#else + drm_dev_unref(drm_dev); +#endif + + return ret; +} + +static void rknpu_drm_remove(struct rknpu_device *rknpu_dev) +{ + struct drm_device *drm_dev = rknpu_dev->drm_dev; + + drm_dev_unregister(drm_dev); + +#if KERNEL_VERSION(4, 15, 0) <= LINUX_VERSION_CODE + drm_dev_put(drm_dev); +#else + drm_dev_unref(drm_dev); +#endif +} +#endif + +static int rknpu_power_on(struct rknpu_device *rknpu_dev) +{ + struct device *dev = rknpu_dev->dev; + int ret = -EINVAL; + +#ifndef FPGA_PLATFORM + if (rknpu_dev->vdd) { + ret = regulator_enable(rknpu_dev->vdd); + if (ret) { + LOG_DEV_ERROR( + dev, + "failed to enable vdd reg for rknpu, ret: %d\n", + ret); + return ret; + } + } + + if (rknpu_dev->mem) { + ret = regulator_enable(rknpu_dev->mem); + if (ret) { + LOG_DEV_ERROR( + dev, + "failed to enable mem reg for rknpu, ret: %d\n", + ret); + return ret; + } + } +#endif + + ret = clk_bulk_prepare_enable(rknpu_dev->num_clks, rknpu_dev->clks); + if (ret) { + LOG_DEV_ERROR(dev, "failed to enable clk for rknpu, ret: %d\n", + ret); + return ret; + } + + if (rknpu_dev->multiple_domains) { + if (rknpu_dev->genpd_dev_npu0) { + ret = pm_runtime_get_sync(rknpu_dev->genpd_dev_npu0); + if (ret < 0) { + LOG_DEV_ERROR( + dev, + "failed to get pm runtime for npu0, ret: %d\n", + ret); + goto out; + } + } + if (rknpu_dev->genpd_dev_npu1) { + ret = pm_runtime_get_sync(rknpu_dev->genpd_dev_npu1); + if (ret < 0) { + LOG_DEV_ERROR( + dev, + "failed to get pm runtime for npu1, ret: %d\n", + ret); + goto out; + } + } + if (rknpu_dev->genpd_dev_npu2) { + ret = pm_runtime_get_sync(rknpu_dev->genpd_dev_npu2); + if (ret < 0) { + LOG_DEV_ERROR( + dev, + "failed to get pm runtime for npu2, ret: %d\n", + ret); + goto out; + } + } + } + ret = pm_runtime_get_sync(dev); + if (ret < 0) { + LOG_DEV_ERROR(dev, + "failed to get pm runtime for rknpu, ret: %d\n", + ret); + } + +out: + return ret; +} + +static int rknpu_power_off(struct rknpu_device *rknpu_dev) +{ + struct device *dev = rknpu_dev->dev; + + pm_runtime_put_sync(dev); + + if (rknpu_dev->multiple_domains) { + if (rknpu_dev->genpd_dev_npu2) + pm_runtime_put_sync(rknpu_dev->genpd_dev_npu2); + if (rknpu_dev->genpd_dev_npu1) + pm_runtime_put_sync(rknpu_dev->genpd_dev_npu1); + if (rknpu_dev->genpd_dev_npu0) + pm_runtime_put_sync(rknpu_dev->genpd_dev_npu0); + } + + clk_bulk_disable_unprepare(rknpu_dev->num_clks, rknpu_dev->clks); + +#ifndef FPGA_PLATFORM + if (rknpu_dev->vdd) + regulator_disable(rknpu_dev->vdd); + + if (rknpu_dev->mem) + regulator_disable(rknpu_dev->mem); +#endif + + return 0; +} + +static int npu_devfreq_target(struct device *dev, unsigned long *target_freq, + u32 flags) +{ + struct rknpu_device *rknpu_dev = dev_get_drvdata(dev); + struct dev_pm_opp *opp = NULL; + unsigned long freq = *target_freq; + unsigned long old_freq = rknpu_dev->current_freq; + unsigned long volt, old_volt = rknpu_dev->current_volt; + int ret = -EINVAL; + + opp = devfreq_recommended_opp(dev, &freq, flags); + if (IS_ERR(opp)) { + LOG_DEV_ERROR(dev, "failed to get opp (%ld)\n", PTR_ERR(opp)); + return PTR_ERR(opp); + } + volt = dev_pm_opp_get_voltage(opp); + + /* + * Only update if there is a change of frequency + */ + if (old_freq == freq) { + *target_freq = freq; + if (old_volt == volt) + return 0; + ret = regulator_set_voltage(rknpu_dev->vdd, volt, INT_MAX); + if (ret) { + LOG_DEV_ERROR(dev, "failed to set volt %lu\n", volt); + return ret; + } + rknpu_dev->current_volt = volt; + return 0; + } + + if (rknpu_dev->vdd && old_volt != volt && old_freq < freq) { + ret = regulator_set_voltage(rknpu_dev->vdd, volt, INT_MAX); + if (ret) { + LOG_DEV_ERROR(dev, "failed to increase volt %lu\n", + volt); + return ret; + } + } + LOG_DEV_DEBUG(dev, "%luHz %luuV -> %luHz %luuV\n", old_freq, old_volt, + freq, volt); + ret = clk_set_rate(rknpu_dev->clks[0].clk, freq); + if (ret) { + LOG_DEV_ERROR(dev, "failed to set clock %lu\n", freq); + return ret; + } + *target_freq = freq; + rknpu_dev->current_freq = freq; + + if (rknpu_dev->devfreq) + rknpu_dev->devfreq->last_status.current_frequency = freq; + + if (rknpu_dev->vdd && old_volt != volt && old_freq > freq) { + ret = regulator_set_voltage(rknpu_dev->vdd, volt, INT_MAX); + if (ret) { + LOG_DEV_ERROR(dev, "failed to decrease volt %lu\n", + volt); + return ret; + } + } + rknpu_dev->current_volt = volt; + + LOG_DEV_INFO(dev, "set rknpu freq: %lu, volt: %lu\n", + rknpu_dev->current_freq, rknpu_dev->current_volt); + + return ret; +} + +static int npu_devfreq_get_dev_status(struct device *dev, + struct devfreq_dev_status *stat) +{ + return 0; +} + +static int npu_devfreq_get_cur_freq(struct device *dev, unsigned long *freq) +{ + struct rknpu_device *rknpu_dev = dev_get_drvdata(dev); + + *freq = rknpu_dev->current_freq; + + return 0; +} + +static struct devfreq_dev_profile npu_devfreq_profile = { + .polling_ms = 50, + .target = npu_devfreq_target, + .get_dev_status = npu_devfreq_get_dev_status, + .get_cur_freq = npu_devfreq_get_cur_freq, +}; + +#ifdef CONFIG_PM_DEVFREQ +static int devfreq_rknpu_ondemand_func(struct devfreq *df, unsigned long *freq) +{ + struct rknpu_device *rknpu_dev = df->data; + + if (rknpu_dev) + *freq = rknpu_dev->ondemand_freq; + else + *freq = df->previous_freq; + + return 0; +} + +static int devfreq_rknpu_ondemand_handler(struct devfreq *devfreq, + unsigned int event, void *data) +{ + return 0; +} + +static struct devfreq_governor devfreq_rknpu_ondemand = { + .name = "rknpu_ondemand", + .get_target_freq = devfreq_rknpu_ondemand_func, + .event_handler = devfreq_rknpu_ondemand_handler, +}; +#endif + +static int npu_devfreq_adjust_current_freq_volt(struct device *dev, + struct rknpu_device *rknpu_dev) +{ + unsigned long volt, old_freq, freq; + struct dev_pm_opp *opp = NULL; + int ret = -EINVAL; + + old_freq = clk_get_rate(rknpu_dev->clks[0].clk); + freq = old_freq; + + opp = devfreq_recommended_opp(dev, &freq, 0); + volt = dev_pm_opp_get_voltage(opp); + + if (freq >= old_freq && rknpu_dev->vdd) { + ret = regulator_set_voltage(rknpu_dev->vdd, volt, INT_MAX); + if (ret) { + LOG_DEV_ERROR(dev, "failed to set volt %lu\n", volt); + return ret; + } + } + LOG_DEV_DEBUG(dev, "adjust current freq=%luHz, volt=%luuV\n", freq, + volt); + ret = clk_set_rate(rknpu_dev->clks[0].clk, freq); + if (ret) { + LOG_DEV_ERROR(dev, "failed to set clock %lu\n", freq); + return ret; + } + if (freq < old_freq && rknpu_dev->vdd) { + ret = regulator_set_voltage(rknpu_dev->vdd, volt, INT_MAX); + if (ret) { + LOG_DEV_ERROR(dev, "failed to set volt %lu\n", volt); + return ret; + } + } + rknpu_dev->current_freq = freq; + rknpu_dev->current_volt = volt; + + return 0; +} + +static int rknpu_devfreq_init(struct rknpu_device *rknpu_dev) +{ + struct device *dev = rknpu_dev->dev; + struct devfreq_dev_profile *dp = &npu_devfreq_profile; + int ret = -EINVAL; + + ret = rockchip_init_opp_table(dev, NULL, "npu_leakage", "rknpu"); + if (ret) { + LOG_DEV_ERROR(dev, "failed to init_opp_table\n"); + return ret; + } + + ret = npu_devfreq_adjust_current_freq_volt(dev, rknpu_dev); + if (ret) { + LOG_DEV_ERROR(dev, "failed to adjust current freq volt\n"); + goto err_remove_table; + } + dp->initial_freq = rknpu_dev->current_freq; + +#ifdef CONFIG_PM_DEVFREQ + ret = devfreq_add_governor(&devfreq_rknpu_ondemand); + if (ret) { + LOG_DEV_ERROR(dev, "failed to add rknpu_ondemand governor\n"); + goto err_remove_table; + } +#endif + + rknpu_dev->devfreq = devm_devfreq_add_device(dev, dp, "rknpu_ondemand", + (void *)rknpu_dev); + if (IS_ERR(rknpu_dev->devfreq)) { + LOG_DEV_ERROR(dev, "failed to add devfreq\n"); + ret = PTR_ERR(rknpu_dev->devfreq); + goto err_remove_governor; + } + devm_devfreq_register_opp_notifier(dev, rknpu_dev->devfreq); + + rknpu_dev->devfreq->last_status.current_frequency = dp->initial_freq; + rknpu_dev->devfreq->last_status.total_time = 1; + rknpu_dev->devfreq->last_status.busy_time = 1; + + rknpu_dev->current_freq = clk_get_rate(rknpu_dev->clks[0].clk); + rknpu_dev->current_volt = regulator_get_voltage(rknpu_dev->vdd); + + if (IS_ERR_OR_NULL(rknpu_dev->devfreq_cooling)) + LOG_DEV_ERROR(dev, "failed to register cooling device\n"); + + return 0; + +err_remove_governor: +#ifdef CONFIG_PM_DEVFREQ + devfreq_remove_governor(&devfreq_rknpu_ondemand); +#endif +err_remove_table: + dev_pm_opp_of_remove_table(dev); + + rknpu_dev->devfreq = NULL; + + return ret; +} + +static int rknpu_devfreq_remove(struct rknpu_device *rknpu_dev) +{ + if (rknpu_dev->devfreq) { + devfreq_unregister_opp_notifier(rknpu_dev->dev, + rknpu_dev->devfreq); + dev_pm_opp_of_remove_table(rknpu_dev->dev); +#ifdef CONFIG_PM_DEVFREQ + devfreq_remove_governor(&devfreq_rknpu_ondemand); +#endif + } + + return 0; +} + +static int rknpu_register_irq(struct platform_device *pdev, + struct rknpu_device *rknpu_dev) +{ + const struct rknpu_config *config = rknpu_dev->config; + struct device *dev = &pdev->dev; + struct resource *res; + int i, ret, irq; + + res = platform_get_resource_byname(pdev, IORESOURCE_IRQ, + config->irqs[0].name); + if (res) { + /* there are irq names in dts */ + for (i = 0; i < config->num_irqs; i++) { + irq = platform_get_irq_byname(pdev, + config->irqs[i].name); + if (irq < 0) { + LOG_DEV_ERROR(dev, "no npu %s in dts\n", + config->irqs[i].name); + return irq; + } + + ret = devm_request_irq(dev, irq, + config->irqs[i].irq_hdl, + IRQF_SHARED, dev_name(dev), + rknpu_dev); + if (ret < 0) { + LOG_DEV_ERROR(dev, "request %s failed: %d\n", + config->irqs[i].name, ret); + return ret; + } + } + } else { + /* no irq names in dts */ + irq = platform_get_irq(pdev, 0); + if (irq < 0) { + LOG_DEV_ERROR(dev, "no npu irq in dts\n"); + return irq; + } + + ret = devm_request_irq(dev, irq, rknpu_core0_irq_handler, + IRQF_SHARED, dev_name(dev), rknpu_dev); + if (ret < 0) { + LOG_DEV_ERROR(dev, "request irq failed: %d\n", ret); + return ret; + } + } + + return 0; +} + +static int rknpu_find_sram_resource(struct rknpu_device *rknpu_dev) +{ + struct device *dev = rknpu_dev->dev; + struct device_node *sram_node = NULL; + struct resource sram_res; + uint32_t sram_size = 0; + int ret = -EINVAL; + + /* get sram device node */ + sram_node = of_parse_phandle(dev->of_node, "rockchip,sram", 0); + rknpu_dev->sram_size = 0; + if (!sram_node) + return -EINVAL; + + /* get sram start and size */ + ret = of_address_to_resource(sram_node, 0, &sram_res); + of_node_put(sram_node); + if (ret) + return ret; + + /* check sram start and size is PAGE_SIZE align */ + rknpu_dev->sram_start = round_up(sram_res.start, PAGE_SIZE); + rknpu_dev->sram_end = round_down( + sram_res.start + resource_size(&sram_res), PAGE_SIZE); + if (rknpu_dev->sram_end <= rknpu_dev->sram_start) { + LOG_DEV_WARN( + dev, + "invalid sram resource, sram start %pa, sram end %pa\n", + &rknpu_dev->sram_start, &rknpu_dev->sram_end); + return -EINVAL; + } + + sram_size = rknpu_dev->sram_end - rknpu_dev->sram_start; + + rknpu_dev->sram_base_io = + devm_ioremap(dev, rknpu_dev->sram_start, sram_size); + if (IS_ERR(rknpu_dev->sram_base_io)) { + LOG_DEV_ERROR(dev, "failed to remap sram base io!\n"); + rknpu_dev->sram_base_io = NULL; + } + + rknpu_dev->sram_size = sram_size; + + LOG_DEV_INFO(dev, "sram region: [%pa, %pa), sram size: %#x\n", + &rknpu_dev->sram_start, &rknpu_dev->sram_end, + rknpu_dev->sram_size); + + return 0; +} + +static int rknpu_probe(struct platform_device *pdev) +{ + struct resource *res = NULL; + struct rknpu_device *rknpu_dev = NULL; + struct device *dev = &pdev->dev; + struct device *virt_dev = NULL; + const struct of_device_id *match = NULL; + const struct rknpu_config *config = NULL; + int ret = -EINVAL, i = 0; + + if (!pdev->dev.of_node) { + LOG_DEV_ERROR(dev, "rknpu device-tree data is missing!\n"); + return -ENODEV; + } + + match = of_match_device(rknpu_of_match, dev); + if (!match) { + LOG_DEV_ERROR(dev, "rknpu device-tree entry is missing!\n"); + return -ENODEV; + } + + rknpu_dev = devm_kzalloc(dev, sizeof(*rknpu_dev), GFP_KERNEL); + if (!rknpu_dev) { + LOG_DEV_ERROR(dev, "failed to allocate rknpu device!\n"); + return -ENOMEM; + } + + config = of_device_get_match_data(dev); + if (!config) + return -EINVAL; + + rknpu_dev->config = config; + rknpu_dev->dev = dev; + + rknpu_dev->iommu_en = rknpu_is_iommu_enable(dev); + if (!rknpu_dev->iommu_en) { + /* Initialize reserved memory resources */ + ret = of_reserved_mem_device_init(dev); + if (!ret) { + LOG_DEV_INFO( + dev, + "initialize reserved memory for rknpu device!\n"); + } + } + + rknpu_dev->bypass_irq_handler = bypass_irq_handler; + rknpu_dev->bypass_soft_reset = bypass_soft_reset; + + rknpu_reset_get(rknpu_dev); + + rknpu_dev->num_clks = devm_clk_bulk_get_all(dev, &rknpu_dev->clks); + if (rknpu_dev->num_clks < 1) { + LOG_DEV_ERROR(dev, "failed to get clk source for rknpu\n"); +#ifndef FPGA_PLATFORM + return -ENODEV; +#endif + } + +#ifndef FPGA_PLATFORM +// #if KERNEL_VERSION(5, 10, 0) <= LINUX_VERSION_CODE +// if (strstr(__clk_get_name(rknpu_dev->clks[0].clk), "scmi")) +// rknpu_dev->opp_info.scmi_clk = rknpu_dev->clks[0].clk; +// #endif + + rknpu_dev->vdd = devm_regulator_get_optional(dev, "rknpu"); + if (IS_ERR(rknpu_dev->vdd)) { + if (PTR_ERR(rknpu_dev->vdd) != -ENODEV) { + ret = PTR_ERR(rknpu_dev->vdd); + LOG_DEV_ERROR( + dev, + "failed to get vdd regulator for rknpu: %d\n", + ret); + return ret; + } + rknpu_dev->vdd = NULL; + } + + rknpu_dev->mem = devm_regulator_get_optional(dev, "mem"); + if (IS_ERR(rknpu_dev->mem)) { + if (PTR_ERR(rknpu_dev->mem) != -ENODEV) { + ret = PTR_ERR(rknpu_dev->mem); + LOG_DEV_ERROR( + dev, + "failed to get mem regulator for rknpu: %d\n", + ret); + return ret; + } + rknpu_dev->mem = NULL; + } +#endif + + spin_lock_init(&rknpu_dev->lock); + spin_lock_init(&rknpu_dev->irq_lock); + mutex_init(&rknpu_dev->power_lock); + mutex_init(&rknpu_dev->reset_lock); + for (i = 0; i < config->num_irqs; i++) { + INIT_LIST_HEAD(&rknpu_dev->subcore_datas[i].todo_list); + init_waitqueue_head(&rknpu_dev->subcore_datas[i].job_done_wq); + rknpu_dev->subcore_datas[i].task_num = 0; + res = platform_get_resource(pdev, IORESOURCE_MEM, i); + if (!res) { + LOG_DEV_ERROR( + dev, + "failed to get memory resource for rknpu\n"); + return -ENXIO; + } + + rknpu_dev->base[i] = devm_ioremap_resource(dev, res); + if (PTR_ERR(rknpu_dev->base[i]) == -EBUSY) { + rknpu_dev->base[i] = devm_ioremap(dev, res->start, + resource_size(res)); + } + + if (IS_ERR(rknpu_dev->base[i])) { + LOG_DEV_ERROR(dev, + "failed to remap register for rknpu\n"); + return PTR_ERR(rknpu_dev->base[i]); + } + } + + if (config->bw_priority_length > 0) { + rknpu_dev->bw_priority_base = + devm_ioremap(dev, config->bw_priority_addr, + config->bw_priority_length); + if (IS_ERR(rknpu_dev->bw_priority_base)) { + LOG_DEV_ERROR( + rknpu_dev->dev, + "failed to remap bw priority register for rknpu\n"); + rknpu_dev->bw_priority_base = NULL; + } + } + + if (!rknpu_dev->bypass_irq_handler) { + ret = rknpu_register_irq(pdev, rknpu_dev); + if (ret) + return ret; + } else { + LOG_DEV_WARN(dev, "bypass irq handler!\n"); + } + +#ifdef CONFIG_ROCKCHIP_RKNPU_DRM_GEM + ret = rknpu_drm_probe(rknpu_dev); + if (ret) { + LOG_DEV_ERROR(dev, "failed to probe device for rknpu\n"); + return ret; + } +#endif +#ifdef CONFIG_ROCKCHIP_RKNPU_DMA_HEAP + rknpu_dev->miscdev.minor = MISC_DYNAMIC_MINOR; + rknpu_dev->miscdev.name = "rknpu"; + rknpu_dev->miscdev.fops = &rknpu_fops; + + ret = misc_register(&rknpu_dev->miscdev); + if (ret) { + LOG_DEV_ERROR(dev, "cannot register miscdev (%d)\n", ret); + return ret; + } + + rknpu_dev->heap = rk_dma_heap_find("rk-dma-heap-cma"); + if (!rknpu_dev->heap) { + LOG_DEV_ERROR(dev, "failed to find cma heap\n"); + return -ENOMEM; + } + rk_dma_heap_set_dev(dev); + LOG_DEV_INFO(dev, "Initialized %s: v%d.%d.%d for %s\n", DRIVER_DESC, + DRIVER_MAJOR, DRIVER_MINOR, DRIVER_PATCHLEVEL, + DRIVER_DATE); +#endif + +#ifdef CONFIG_ROCKCHIP_RKNPU_FENCE + ret = rknpu_fence_context_alloc(rknpu_dev); + if (ret) { + LOG_DEV_ERROR(dev, + "failed to allocate fence context for rknpu\n"); + goto err_remove_drv; + } +#endif + + platform_set_drvdata(pdev, rknpu_dev); + + pm_runtime_enable(dev); + + if (of_count_phandle_with_args(dev->of_node, "power-domains", + "#power-domain-cells") > 1) { + virt_dev = dev_pm_domain_attach_by_name(dev, "npu0"); + if (!IS_ERR(virt_dev)) + rknpu_dev->genpd_dev_npu0 = virt_dev; + virt_dev = dev_pm_domain_attach_by_name(dev, "npu1"); + if (!IS_ERR(virt_dev)) + rknpu_dev->genpd_dev_npu1 = virt_dev; + virt_dev = dev_pm_domain_attach_by_name(dev, "npu2"); + if (!IS_ERR(virt_dev)) + rknpu_dev->genpd_dev_npu2 = virt_dev; + rknpu_dev->multiple_domains = true; + } + + ret = rknpu_power_on(rknpu_dev); + if (ret) + goto err_remove_drv; + +#ifndef FPGA_PLATFORM + rknpu_devfreq_init(rknpu_dev); +#endif + + // set default power put delay to 3s + rknpu_dev->power_put_delay = 3000; + rknpu_dev->power_off_wq = + create_freezable_workqueue("rknpu_power_off_wq"); + if (!rknpu_dev->power_off_wq) { + LOG_DEV_ERROR(dev, "rknpu couldn't create power_off workqueue"); + ret = -ENOMEM; + goto err_devfreq_remove; + } + INIT_DEFERRABLE_WORK(&rknpu_dev->power_off_work, + rknpu_power_off_delay_work); + + if (IS_ENABLED(CONFIG_ROCKCHIP_RKNPU_SRAM) && rknpu_dev->iommu_en) { + if (!rknpu_find_sram_resource(rknpu_dev)) { + ret = rknpu_mm_create(rknpu_dev->sram_size, PAGE_SIZE, + &rknpu_dev->sram_mm); + if (ret != 0) + goto err_remove_wq; + } else { + LOG_DEV_WARN(dev, "could not find sram resource!\n"); + } + } + + rknpu_power_off(rknpu_dev); + atomic_set(&rknpu_dev->power_refcount, 0); + atomic_set(&rknpu_dev->cmdline_power_refcount, 0); + + rknpu_debugger_init(rknpu_dev); + rknpu_init_timer(rknpu_dev); + + return 0; + +err_remove_wq: + destroy_workqueue(rknpu_dev->power_off_wq); + +err_devfreq_remove: +#ifndef FPGA_PLATFORM + rknpu_devfreq_remove(rknpu_dev); +#endif + +err_remove_drv: +#ifdef CONFIG_ROCKCHIP_RKNPU_DRM_GEM + rknpu_drm_remove(rknpu_dev); +#endif +#ifdef CONFIG_ROCKCHIP_RKNPU_DMA_HEAP + misc_deregister(&(rknpu_dev->miscdev)); +#endif + + return ret; +} + +static int rknpu_remove(struct platform_device *pdev) +{ + struct rknpu_device *rknpu_dev = platform_get_drvdata(pdev); + int i = 0; + + cancel_delayed_work_sync(&rknpu_dev->power_off_work); + destroy_workqueue(rknpu_dev->power_off_wq); + + if (IS_ENABLED(CONFIG_ROCKCHIP_RKNPU_SRAM) && rknpu_dev->sram_mm) + rknpu_mm_destroy(rknpu_dev->sram_mm); + + rknpu_debugger_remove(rknpu_dev); + rknpu_cancel_timer(rknpu_dev); + + for (i = 0; i < rknpu_dev->config->num_irqs; i++) { + WARN_ON(rknpu_dev->subcore_datas[i].job); + WARN_ON(!list_empty(&rknpu_dev->subcore_datas[i].todo_list)); + } + +#ifdef CONFIG_ROCKCHIP_RKNPU_DRM_GEM + rknpu_drm_remove(rknpu_dev); +#endif +#ifdef CONFIG_ROCKCHIP_RKNPU_DMA_HEAP + misc_deregister(&(rknpu_dev->miscdev)); +#endif + +#ifndef FPGA_PLATFORM + rknpu_devfreq_remove(rknpu_dev); +#endif + + mutex_lock(&rknpu_dev->power_lock); + if (atomic_read(&rknpu_dev->power_refcount) > 0) + rknpu_power_off(rknpu_dev); + mutex_unlock(&rknpu_dev->power_lock); + + if (rknpu_dev->multiple_domains) { + if (rknpu_dev->genpd_dev_npu0) + dev_pm_domain_detach(rknpu_dev->genpd_dev_npu0, true); + if (rknpu_dev->genpd_dev_npu1) + dev_pm_domain_detach(rknpu_dev->genpd_dev_npu1, true); + if (rknpu_dev->genpd_dev_npu2) + dev_pm_domain_detach(rknpu_dev->genpd_dev_npu2, true); + } + + pm_runtime_disable(&pdev->dev); + + return 0; +} + +#ifndef FPGA_PLATFORM +// #if KERNEL_VERSION(5, 10, 0) <= LINUX_VERSION_CODE +// static int rknpu_runtime_suspend(struct device *dev) +// { +// struct rknpu_device *rknpu_dev = dev_get_drvdata(dev); +// struct rockchip_opp_info *opp_info = &rknpu_dev->opp_info; + +// if (opp_info->scmi_clk) { +// if (clk_set_rate(opp_info->scmi_clk, POWER_DOWN_FREQ)) +// LOG_DEV_ERROR(dev, "failed to restore clk rate\n"); +// } +// opp_info->current_rm = UINT_MAX; + +// return 0; +// } + +// static int rknpu_runtime_resume(struct device *dev) +// { +// struct rknpu_device *rknpu_dev = dev_get_drvdata(dev); +// struct rockchip_opp_info *opp_info = &rknpu_dev->opp_info; +// int ret = 0; + +// if (!rknpu_dev->current_freq || !rknpu_dev->current_volt) +// return 0; + +// ret = clk_bulk_prepare_enable(opp_info->num_clks, opp_info->clks); +// if (ret) { +// LOG_DEV_ERROR(dev, "failed to enable opp clks\n"); +// return ret; +// } + +// if (opp_info->data && opp_info->data->set_read_margin) +// opp_info->data->set_read_margin(dev, opp_info, +// opp_info->target_rm); +// if (opp_info->scmi_clk) { +// if (clk_set_rate(opp_info->scmi_clk, rknpu_dev->current_freq)) +// LOG_DEV_ERROR(dev, "failed to set power down rate\n"); +// } + +// clk_bulk_disable_unprepare(opp_info->num_clks, opp_info->clks); + +// return ret; +// } + +// static const struct dev_pm_ops rknpu_pm_ops = { +// SET_SYSTEM_SLEEP_PM_OPS(pm_runtime_force_suspend, +// pm_runtime_force_resume) +// SET_RUNTIME_PM_OPS(rknpu_runtime_suspend, rknpu_runtime_resume, +// NULL) +// }; +// #endif +#endif + +static struct platform_driver rknpu_driver = { + .probe = rknpu_probe, + .remove = rknpu_remove, + .driver = { + .owner = THIS_MODULE, + .name = "RKNPU", +#ifndef FPGA_PLATFORM +// #if KERNEL_VERSION(5, 5, 0) < LINUX_VERSION_CODE +// .pm = &rknpu_pm_ops, +// #endif +#endif + .of_match_table = of_match_ptr(rknpu_of_match), + }, +}; + +static int rknpu_init(void) +{ + return platform_driver_register(&rknpu_driver); +} + +static void rknpu_exit(void) +{ + platform_driver_unregister(&rknpu_driver); +} + +late_initcall(rknpu_init); +module_exit(rknpu_exit); + +MODULE_DESCRIPTION("rknpu driver"); +MODULE_AUTHOR("Felix Zeng <felix.zeng@rock-chips.com>"); +MODULE_ALIAS("rockchip-rknpu"); +MODULE_LICENSE("GPL v2"); +MODULE_VERSION(RKNPU_GET_DRV_VERSION_STRING(DRIVER_MAJOR, DRIVER_MINOR, + DRIVER_PATCHLEVEL)); +MODULE_IMPORT_NS(DMA_BUF); diff --git a/drivers/rknpu/rknpu_fence.c b/drivers/rknpu/rknpu_fence.c new file mode 100644 index 0000000000000000000000000000000000000000..dc22ea1c4e120abab4293bf02ca2a152a92e0bc8 --- /dev/null +++ b/drivers/rknpu/rknpu_fence.c @@ -0,0 +1,80 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * Copyright (C) Rockchip Electronics Co.Ltd + * Author: Felix Zeng <felix.zeng@rock-chips.com> + */ + +#include <linux/slab.h> +#include <linux/file.h> +#include <linux/dma-fence.h> +#include <linux/sync_file.h> + +#include "rknpu_drv.h" +#include "rknpu_job.h" + +#include "rknpu_fence.h" + +static const char *rknpu_fence_get_name(struct dma_fence *fence) +{ + return DRIVER_NAME; +} + +static const struct dma_fence_ops rknpu_fence_ops = { + .get_driver_name = rknpu_fence_get_name, + .get_timeline_name = rknpu_fence_get_name, +}; + +int rknpu_fence_context_alloc(struct rknpu_device *rknpu_dev) +{ + struct rknpu_fence_context *fence_ctx = NULL; + + fence_ctx = + devm_kzalloc(rknpu_dev->dev, sizeof(*fence_ctx), GFP_KERNEL); + if (!fence_ctx) + return -ENOMEM; + + fence_ctx->context = dma_fence_context_alloc(1); + spin_lock_init(&fence_ctx->spinlock); + + rknpu_dev->fence_ctx = fence_ctx; + + return 0; +} + +int rknpu_fence_alloc(struct rknpu_job *job) +{ + struct rknpu_fence_context *fence_ctx = job->rknpu_dev->fence_ctx; + struct dma_fence *fence = NULL; + + fence = kzalloc(sizeof(*fence), GFP_KERNEL); + if (!fence) + return -ENOMEM; + + dma_fence_init(fence, &rknpu_fence_ops, &fence_ctx->spinlock, + fence_ctx->context, ++fence_ctx->seqno); + + job->fence = fence; + + return 0; +} + +int rknpu_fence_get_fd(struct rknpu_job *job) +{ + struct sync_file *sync_file = NULL; + int fence_fd = -1; + + if (!job->fence) + return -EINVAL; + + fence_fd = get_unused_fd_flags(O_CLOEXEC); + if (fence_fd < 0) + return fence_fd; + + sync_file = sync_file_create(job->fence); + if (!sync_file) + return -ENOMEM; + + fd_install(fence_fd, sync_file->file); + + return fence_fd; +} diff --git a/drivers/rknpu/rknpu_gem.c b/drivers/rknpu/rknpu_gem.c new file mode 100644 index 0000000000000000000000000000000000000000..e0b151ec43c78c8cd2eeacdf62b9e22fbb9e09bc --- /dev/null +++ b/drivers/rknpu/rknpu_gem.c @@ -0,0 +1,1295 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * Copyright (C) Rockchip Electronics Co.Ltd + * Author: Felix Zeng <felix.zeng@rock-chips.com> + */ + +#include <drm/drm_device.h> +#include <drm/drm_vma_manager.h> +#include <drm/drm_prime.h> +#include <drm/drm_file.h> +#include <drm/drm_drv.h> + +#include <linux/shmem_fs.h> +#include <linux/dma-buf.h> +#include <linux/iommu.h> +#include <linux/dma-iommu.h> +#include <linux/pfn_t.h> +#include <linux/version.h> +#include <asm/cacheflush.h> + +// #if KERNEL_VERSION(5, 10, 0) <= LINUX_VERSION_CODE +// #include <linux/dma-map-ops.h> +// #endif + +#include "rknpu_drv.h" +#include "rknpu_ioctl.h" +#include "rknpu_gem.h" + +#define RKNPU_GEM_ALLOC_FROM_PAGES 1 + +#if RKNPU_GEM_ALLOC_FROM_PAGES +static int rknpu_gem_get_pages(struct rknpu_gem_object *rknpu_obj) +{ + struct drm_device *drm = rknpu_obj->base.dev; + struct scatterlist *s = NULL; + dma_addr_t dma_addr = 0; + dma_addr_t phys = 0; + int ret = -EINVAL, i = 0; + + rknpu_obj->pages = drm_gem_get_pages(&rknpu_obj->base); + if (IS_ERR(rknpu_obj->pages)) { + ret = PTR_ERR(rknpu_obj->pages); + LOG_ERROR("failed to get pages: %d\n", ret); + return ret; + } + + rknpu_obj->num_pages = rknpu_obj->size >> PAGE_SHIFT; + +// #if KERNEL_VERSION(5, 10, 0) <= LINUX_VERSION_CODE +// rknpu_obj->sgt = drm_prime_pages_to_sg(drm, rknpu_obj->pages, +// rknpu_obj->num_pages); +// #else + // rknpu_obj->sgt = + // drm_prime_pages_to_sg(rknpu_obj->pages, rknpu_obj->num_pages); + rknpu_obj->sgt = + drm_prime_pages_to_sg(drm, rknpu_obj->pages, rknpu_obj->num_pages); +// #endif + if (IS_ERR(rknpu_obj->sgt)) { + ret = PTR_ERR(rknpu_obj->sgt); + LOG_ERROR("failed to allocate sgt: %d\n", ret); + goto put_pages; + } + + ret = dma_map_sg(drm->dev, rknpu_obj->sgt->sgl, rknpu_obj->sgt->nents, + DMA_BIDIRECTIONAL); + if (ret == 0) { + ret = -EFAULT; + LOG_DEV_ERROR(drm->dev, "%s: dma map %zu fail\n", __func__, + rknpu_obj->size); + goto free_sgt; + } + + if (rknpu_obj->flags & RKNPU_MEM_KERNEL_MAPPING) { + rknpu_obj->cookie = vmap(rknpu_obj->pages, rknpu_obj->num_pages, + VM_MAP, PAGE_KERNEL); + if (!rknpu_obj->cookie) { + ret = -ENOMEM; + LOG_ERROR("failed to vmap: %d\n", ret); + goto unmap_sg; + } + rknpu_obj->kv_addr = rknpu_obj->cookie; + } + + dma_addr = sg_dma_address(rknpu_obj->sgt->sgl); + rknpu_obj->dma_addr = dma_addr; + + for_each_sg(rknpu_obj->sgt->sgl, s, rknpu_obj->sgt->nents, i) { + dma_addr += s->length; + phys = sg_phys(s); + LOG_DEBUG( + "gem pages alloc sgt[%d], dma_address: %pad, length: %#x, phys: %pad, virt: %p\n", + i, &dma_addr, s->length, &phys, sg_virt(s)); + } + + return 0; + +unmap_sg: + dma_unmap_sg(drm->dev, rknpu_obj->sgt->sgl, rknpu_obj->sgt->nents, + DMA_BIDIRECTIONAL); + +free_sgt: + sg_free_table(rknpu_obj->sgt); + kfree(rknpu_obj->sgt); + +put_pages: + drm_gem_put_pages(&rknpu_obj->base, rknpu_obj->pages, false, false); + + return ret; +} + +static void rknpu_gem_put_pages(struct rknpu_gem_object *rknpu_obj) +{ + struct drm_device *drm = rknpu_obj->base.dev; + + if (rknpu_obj->flags & RKNPU_MEM_KERNEL_MAPPING) { + vunmap(rknpu_obj->kv_addr); + rknpu_obj->kv_addr = NULL; + } + + if (rknpu_obj->sgt != NULL) { + dma_unmap_sg(drm->dev, rknpu_obj->sgt->sgl, + rknpu_obj->sgt->nents, DMA_BIDIRECTIONAL); + sg_free_table(rknpu_obj->sgt); + kfree(rknpu_obj->sgt); + } + + drm_gem_put_pages(&rknpu_obj->base, rknpu_obj->pages, true, true); +} +#endif + +static int rknpu_gem_alloc_buf(struct rknpu_gem_object *rknpu_obj) +{ + struct drm_device *drm = rknpu_obj->base.dev; + struct rknpu_device *rknpu_dev = drm->dev_private; + unsigned int nr_pages = 0; + struct sg_table *sgt = NULL; + struct scatterlist *s = NULL; + gfp_t gfp_mask = GFP_KERNEL; + int ret = -EINVAL, i = 0; + + if (rknpu_obj->dma_addr) { + LOG_DEBUG("buffer already allocated.\n"); + return 0; + } + + rknpu_obj->dma_attrs = 0; + + /* + * if RKNPU_MEM_CONTIGUOUS, fully physically contiguous memory + * region will be allocated else physically contiguous + * as possible. + */ + if (!(rknpu_obj->flags & RKNPU_MEM_NON_CONTIGUOUS)) + rknpu_obj->dma_attrs |= DMA_ATTR_FORCE_CONTIGUOUS; + + // cacheable mapping or writecombine mapping + if (rknpu_obj->flags & RKNPU_MEM_CACHEABLE) { +#ifdef DMA_ATTR_NON_CONSISTENT + rknpu_obj->dma_attrs |= DMA_ATTR_NON_CONSISTENT; +#endif +#ifdef DMA_ATTR_SYS_CACHE_ONLY + rknpu_obj->dma_attrs |= DMA_ATTR_SYS_CACHE_ONLY; +#endif + } else if (rknpu_obj->flags & RKNPU_MEM_WRITE_COMBINE) { + rknpu_obj->dma_attrs |= DMA_ATTR_WRITE_COMBINE; + } + + if (!(rknpu_obj->flags & RKNPU_MEM_KERNEL_MAPPING)) + rknpu_obj->dma_attrs |= DMA_ATTR_NO_KERNEL_MAPPING; + +#ifdef DMA_ATTR_SKIP_ZEROING + if (!(rknpu_obj->flags & RKNPU_MEM_ZEROING)) + rknpu_obj->dma_attrs |= DMA_ATTR_SKIP_ZEROING; +#endif + +#if RKNPU_GEM_ALLOC_FROM_PAGES + if ((rknpu_obj->flags & RKNPU_MEM_NON_CONTIGUOUS) && + rknpu_dev->iommu_en) { + return rknpu_gem_get_pages(rknpu_obj); + } +#endif + + if (rknpu_obj->flags & RKNPU_MEM_ZEROING) + gfp_mask |= __GFP_ZERO; + + if (!(rknpu_obj->flags & RKNPU_MEM_NON_DMA32)) { + gfp_mask &= ~__GFP_HIGHMEM; + gfp_mask |= __GFP_DMA32; + } + + nr_pages = rknpu_obj->size >> PAGE_SHIFT; + + rknpu_obj->pages = rknpu_gem_alloc_page(nr_pages); + if (!rknpu_obj->pages) { + LOG_ERROR("failed to allocate pages.\n"); + return -ENOMEM; + } + + rknpu_obj->cookie = + dma_alloc_attrs(drm->dev, rknpu_obj->size, &rknpu_obj->dma_addr, + gfp_mask, rknpu_obj->dma_attrs); + if (!rknpu_obj->cookie) { + /* + * when RKNPU_MEM_CONTIGUOUS and IOMMU is available + * try to fallback to allocate non-contiguous buffer + */ + if (!(rknpu_obj->flags & RKNPU_MEM_NON_CONTIGUOUS) && + rknpu_dev->iommu_en) { + LOG_DEV_WARN( + drm->dev, + "try to fallback to allocate non-contiguous %lu buffer.\n", + rknpu_obj->size); + rknpu_obj->dma_attrs &= ~DMA_ATTR_FORCE_CONTIGUOUS; + rknpu_obj->flags |= RKNPU_MEM_NON_CONTIGUOUS; + rknpu_obj->cookie = + dma_alloc_attrs(drm->dev, rknpu_obj->size, + &rknpu_obj->dma_addr, gfp_mask, + rknpu_obj->dma_attrs); + if (!rknpu_obj->cookie) { + LOG_DEV_ERROR( + drm->dev, + "failed to allocate non-contiguous %lu buffer.\n", + rknpu_obj->size); + goto err_free; + } + } else { + LOG_DEV_ERROR(drm->dev, + "failed to allocate %lu buffer.\n", + rknpu_obj->size); + goto err_free; + } + } + + if (rknpu_obj->flags & RKNPU_MEM_KERNEL_MAPPING) + rknpu_obj->kv_addr = rknpu_obj->cookie; + + sgt = kzalloc(sizeof(*sgt), GFP_KERNEL); + if (!sgt) { + ret = -ENOMEM; + goto err_free_dma; + } + + ret = dma_get_sgtable_attrs(drm->dev, sgt, rknpu_obj->cookie, + rknpu_obj->dma_addr, rknpu_obj->size, + rknpu_obj->dma_attrs); + if (ret < 0) { + LOG_DEV_ERROR(drm->dev, "failed to get sgtable.\n"); + goto err_free_sgt; + } + + for_each_sg(sgt->sgl, s, sgt->nents, i) { + sg_dma_address(s) = sg_phys(s); + LOG_DEBUG("dma alloc sgt[%d], phys_address: %pad, length: %u\n", + i, &s->dma_address, s->length); + } + + if (drm_prime_sg_to_page_array(sgt, rknpu_obj->pages, + nr_pages)) { + LOG_DEV_ERROR(drm->dev, "invalid sgtable.\n"); + ret = -EINVAL; + goto err_free_sg_table; + } + + rknpu_obj->sgt = sgt; + + return ret; + +err_free_sg_table: + sg_free_table(sgt); +err_free_sgt: + kfree(sgt); +err_free_dma: + dma_free_attrs(drm->dev, rknpu_obj->size, rknpu_obj->cookie, + rknpu_obj->dma_addr, rknpu_obj->dma_attrs); +err_free: + rknpu_gem_free_page(rknpu_obj->pages); + + return ret; +} + +static void rknpu_gem_free_buf(struct rknpu_gem_object *rknpu_obj) +{ + struct drm_device *drm = rknpu_obj->base.dev; +#if RKNPU_GEM_ALLOC_FROM_PAGES + struct rknpu_device *rknpu_dev = drm->dev_private; +#endif + + if (!rknpu_obj->dma_addr) { + LOG_DEBUG("dma handle is invalid.\n"); + return; + } + +#if RKNPU_GEM_ALLOC_FROM_PAGES + if ((rknpu_obj->flags & RKNPU_MEM_NON_CONTIGUOUS) && + rknpu_dev->iommu_en) { + rknpu_gem_put_pages(rknpu_obj); + return; + } +#endif + + sg_free_table(rknpu_obj->sgt); + kfree(rknpu_obj->sgt); + + dma_free_attrs(drm->dev, rknpu_obj->size, rknpu_obj->cookie, + rknpu_obj->dma_addr, rknpu_obj->dma_attrs); + + rknpu_gem_free_page(rknpu_obj->pages); + + rknpu_obj->dma_addr = 0; +} + +static int rknpu_gem_handle_create(struct drm_gem_object *obj, + struct drm_file *file_priv, + unsigned int *handle) +{ + int ret = -EINVAL; + /* + * allocate a id of idr table where the obj is registered + * and handle has the id what user can see. + */ + ret = drm_gem_handle_create(file_priv, obj, handle); + if (ret) + return ret; + + LOG_DEBUG("gem handle: %#x\n", *handle); + + /* drop reference from allocate - handle holds it now. */ + rknpu_gem_object_put(obj); + + return 0; +} + +static int rknpu_gem_handle_destroy(struct drm_file *file_priv, + unsigned int handle) +{ + return drm_gem_handle_delete(file_priv, handle); +} + +static struct rknpu_gem_object *rknpu_gem_init(struct drm_device *drm, + unsigned long size) +{ + struct rknpu_gem_object *rknpu_obj = NULL; + struct drm_gem_object *obj = NULL; + gfp_t gfp_mask; + int ret = -EINVAL; + + rknpu_obj = kzalloc(sizeof(*rknpu_obj), GFP_KERNEL); + if (!rknpu_obj) + return ERR_PTR(-ENOMEM); + + obj = &rknpu_obj->base; + + ret = drm_gem_object_init(drm, obj, size); + if (ret < 0) { + LOG_DEV_ERROR(drm->dev, "failed to initialize gem object\n"); + kfree(rknpu_obj); + return ERR_PTR(ret); + } + + rknpu_obj->size = rknpu_obj->base.size; + + gfp_mask = mapping_gfp_mask(obj->filp->f_mapping); + + if (rknpu_obj->flags & RKNPU_MEM_ZEROING) + gfp_mask |= __GFP_ZERO; + + if (!(rknpu_obj->flags & RKNPU_MEM_NON_DMA32)) { + gfp_mask &= ~__GFP_HIGHMEM; + gfp_mask |= __GFP_DMA32; + } + + mapping_set_gfp_mask(obj->filp->f_mapping, gfp_mask); + + return rknpu_obj; +} + +static void rknpu_gem_release(struct rknpu_gem_object *rknpu_obj) +{ + /* release file pointer to gem object. */ + drm_gem_object_release(&rknpu_obj->base); + kfree(rknpu_obj); +} + +static int rknpu_gem_alloc_buf_with_sram(struct rknpu_gem_object *rknpu_obj) +{ + struct drm_device *drm = rknpu_obj->base.dev; + struct rknpu_device *rknpu_dev = drm->dev_private; + struct iommu_domain *domain = NULL; + struct rknpu_iommu_dma_cookie *cookie = NULL; + struct iova_domain *iovad = NULL; + struct scatterlist *s = NULL; + unsigned long length = 0; + unsigned long size = 0; + unsigned long offset = 0; + int i = 0; + int ret = -EINVAL; + + /* iova map to sram */ + domain = iommu_get_domain_for_dev(rknpu_dev->dev); + if (!domain) { + LOG_ERROR("failed to get iommu domain!"); + return -EINVAL; + } + + cookie = domain->iova_cookie; + iovad = &cookie->iovad; + rknpu_obj->iova_size = + iova_align(iovad, rknpu_obj->sram_size + rknpu_obj->size); + rknpu_obj->iova_start = rknpu_iommu_dma_alloc_iova( + domain, rknpu_obj->iova_size, dma_get_mask(drm->dev), drm->dev); + if (!rknpu_obj->iova_start) { + LOG_ERROR("iommu_dma_alloc_iova failed\n"); + return -ENOMEM; + } + + LOG_INFO("allocate iova start: %pad, size: %lu\n", + &rknpu_obj->iova_start, rknpu_obj->iova_size); + + /* + * Overview SRAM + DDR map to IOVA + * -------- + * sram_size: rknpu_obj->sram_size + * - allocate from SRAM, this size value has been page-aligned + * size: rknpu_obj->size + * - allocate from DDR pages, this size value has been page-aligned + * iova_size: rknpu_obj->iova_size + * - from iova_align(sram_size + size) + * - it may be larger than the (sram_size + size), and the larger part is not mapped + * -------- + * + * |<- sram_size ->| |<- - - - size - - - ->| + * +---------------+ +----------------------+ + * | SRAM | | DDR | + * +---------------+ +----------------------+ + * | | + * | V | V | + * +---------------------------------------+ + * | IOVA range | + * +---------------------------------------+ + * |<- - - - - - - iova_size - - - - - - ->| + * + */ + offset = rknpu_obj->sram_obj->range_start * + rknpu_dev->sram_mm->chunk_size; + ret = iommu_map(domain, rknpu_obj->iova_start, + rknpu_dev->sram_start + offset, rknpu_obj->sram_size, + IOMMU_READ | IOMMU_WRITE); + if (ret) { + LOG_ERROR("sram iommu_map error: %d\n", ret); + goto free_iova; + } + + rknpu_obj->dma_addr = rknpu_obj->iova_start; + + if (rknpu_obj->size == 0) { + LOG_INFO("allocate sram size: %lu\n", rknpu_obj->sram_size); + return 0; + } + + rknpu_obj->pages = drm_gem_get_pages(&rknpu_obj->base); + if (IS_ERR(rknpu_obj->pages)) { + ret = PTR_ERR(rknpu_obj->pages); + LOG_ERROR("failed to get pages: %d\n", ret); + goto sram_unmap; + } + + rknpu_obj->num_pages = rknpu_obj->size >> PAGE_SHIFT; + +// #if KERNEL_VERSION(5, 10, 0) <= LINUX_VERSION_CODE +// rknpu_obj->sgt = drm_prime_pages_to_sg(drm, rknpu_obj->pages, +// rknpu_obj->num_pages); +// #else + rknpu_obj->sgt = + drm_prime_pages_to_sg(drm, rknpu_obj->pages, rknpu_obj->num_pages); +// #endif + if (IS_ERR(rknpu_obj->sgt)) { + ret = PTR_ERR(rknpu_obj->sgt); + LOG_ERROR("failed to allocate sgt: %d\n", ret); + goto put_pages; + } + + length = rknpu_obj->size; + offset = rknpu_obj->iova_start + rknpu_obj->sram_size; + + for_each_sg(rknpu_obj->sgt->sgl, s, rknpu_obj->sgt->nents, i) { + size = (length < s->length) ? length : s->length; + + ret = iommu_map(domain, offset, sg_phys(s), size, + IOMMU_READ | IOMMU_WRITE); + if (ret) { + LOG_ERROR("ddr iommu_map error: %d\n", ret); + goto sgl_unmap; + } + + length -= size; + offset += size; + + if (length == 0) + break; + } + + LOG_INFO("allocate size: %lu with sram size: %lu\n", rknpu_obj->size, + rknpu_obj->sram_size); + + return 0; + +sgl_unmap: + iommu_unmap(domain, rknpu_obj->iova_start + rknpu_obj->sram_size, + rknpu_obj->size - length); + sg_free_table(rknpu_obj->sgt); + kfree(rknpu_obj->sgt); + +put_pages: + drm_gem_put_pages(&rknpu_obj->base, rknpu_obj->pages, false, false); + +sram_unmap: + iommu_unmap(domain, rknpu_obj->iova_start, rknpu_obj->sram_size); + +free_iova: + rknpu_iommu_dma_free_iova(domain->iova_cookie, rknpu_obj->iova_start, + rknpu_obj->iova_size); + + return ret; +} + +static void rknpu_gem_free_buf_with_sram(struct rknpu_gem_object *rknpu_obj) +{ + struct drm_device *drm = rknpu_obj->base.dev; + struct rknpu_device *rknpu_dev = drm->dev_private; + struct iommu_domain *domain = NULL; + + domain = iommu_get_domain_for_dev(rknpu_dev->dev); + if (domain) { + iommu_unmap(domain, rknpu_obj->iova_start, + rknpu_obj->sram_size); + if (rknpu_obj->size > 0) + iommu_unmap(domain, + rknpu_obj->iova_start + + rknpu_obj->sram_size, + rknpu_obj->size); + rknpu_iommu_dma_free_iova(domain->iova_cookie, + rknpu_obj->iova_start, + rknpu_obj->iova_size); + } + + if (rknpu_obj->pages) + drm_gem_put_pages(&rknpu_obj->base, rknpu_obj->pages, true, + true); + + if (rknpu_obj->sgt != NULL) { + sg_free_table(rknpu_obj->sgt); + kfree(rknpu_obj->sgt); + } +} + +struct rknpu_gem_object *rknpu_gem_object_create(struct drm_device *drm, + unsigned int flags, + unsigned long size, + unsigned long sram_size) +{ + struct rknpu_device *rknpu_dev = drm->dev_private; + struct rknpu_gem_object *rknpu_obj = NULL; + size_t remain_ddr_size = 0; + int ret = -EINVAL; + + if (!size) { + LOG_DEV_ERROR(drm->dev, "invalid buffer size: %lu\n", size); + return ERR_PTR(-EINVAL); + } + + remain_ddr_size = round_up(size, PAGE_SIZE); + + if (!rknpu_dev->iommu_en && (flags & RKNPU_MEM_NON_CONTIGUOUS)) { + /* + * when no IOMMU is available, all allocated buffers are + * contiguous anyway, so drop RKNPU_MEM_NON_CONTIGUOUS flag + */ + flags &= ~RKNPU_MEM_NON_CONTIGUOUS; + LOG_WARN( + "non-contiguous allocation is not supported without IOMMU, falling back to contiguous buffer\n"); + } + + if (IS_ENABLED(CONFIG_ROCKCHIP_RKNPU_SRAM) && + (flags & RKNPU_MEM_TRY_ALLOC_SRAM) && rknpu_dev->sram_size > 0) { + size_t sram_free_size = 0; + size_t real_sram_size = 0; + + if (sram_size != 0) + sram_size = round_up(sram_size, PAGE_SIZE); + + rknpu_obj = rknpu_gem_init(drm, remain_ddr_size); + if (IS_ERR(rknpu_obj)) + return rknpu_obj; + + /* set memory type and cache attribute from user side. */ + rknpu_obj->flags = flags; + + sram_free_size = rknpu_dev->sram_mm->free_chunks * + rknpu_dev->sram_mm->chunk_size; + if (sram_free_size > 0) { + real_sram_size = remain_ddr_size; + if (sram_size != 0 && remain_ddr_size > sram_size) + real_sram_size = sram_size; + if (real_sram_size > sram_free_size) + real_sram_size = sram_free_size; + ret = rknpu_mm_alloc(rknpu_dev->sram_mm, real_sram_size, + &rknpu_obj->sram_obj); + if (ret != 0) { + sram_free_size = + rknpu_dev->sram_mm->free_chunks * + rknpu_dev->sram_mm->chunk_size; + LOG_WARN( + "mm allocate %zu failed, ret: %d, free size: %zu\n", + real_sram_size, ret, sram_free_size); + real_sram_size = 0; + } + } + + if (real_sram_size > 0) { + rknpu_obj->sram_size = real_sram_size; + + ret = rknpu_gem_alloc_buf_with_sram(rknpu_obj); + if (ret < 0) + goto mm_free; + remain_ddr_size = 0; + } + } + + if (remain_ddr_size > 0) { + rknpu_obj = rknpu_gem_init(drm, remain_ddr_size); + if (IS_ERR(rknpu_obj)) + return rknpu_obj; + + /* set memory type and cache attribute from user side. */ + rknpu_obj->flags = flags; + + ret = rknpu_gem_alloc_buf(rknpu_obj); + if (ret < 0) + goto gem_release; + } + + if (rknpu_obj) + LOG_DEBUG( + "created dma addr: %pad, cookie: %p, ddr size: %lu, sram size: %lu, attrs: %#lx, flags: %#x\n", + &rknpu_obj->dma_addr, rknpu_obj->cookie, rknpu_obj->size, + rknpu_obj->sram_size, rknpu_obj->dma_attrs, rknpu_obj->flags); + + return rknpu_obj; + +mm_free: + if (IS_ENABLED(CONFIG_ROCKCHIP_RKNPU_SRAM) && + rknpu_obj->sram_obj != NULL) + rknpu_mm_free(rknpu_dev->sram_mm, rknpu_obj->sram_obj); + +gem_release: + rknpu_gem_release(rknpu_obj); + + return ERR_PTR(ret); +} + +void rknpu_gem_object_destroy(struct rknpu_gem_object *rknpu_obj) +{ + struct drm_gem_object *obj = &rknpu_obj->base; + + LOG_DEBUG( + "destroy dma addr: %pad, cookie: %p, size: %lu, attrs: %#lx, flags: %#x, handle count: %d\n", + &rknpu_obj->dma_addr, rknpu_obj->cookie, rknpu_obj->size, + rknpu_obj->dma_attrs, rknpu_obj->flags, obj->handle_count); + + /* + * do not release memory region from exporter. + * + * the region will be released by exporter + * once dmabuf's refcount becomes 0. + */ + if (obj->import_attach) { + drm_prime_gem_destroy(obj, rknpu_obj->sgt); + rknpu_gem_free_page(rknpu_obj->pages); + } else { + if (IS_ENABLED(CONFIG_ROCKCHIP_RKNPU_SRAM) && + rknpu_obj->sram_size > 0) { + struct rknpu_device *rknpu_dev = obj->dev->dev_private; + + if (rknpu_obj->sram_obj != NULL) + rknpu_mm_free(rknpu_dev->sram_mm, + rknpu_obj->sram_obj); + rknpu_gem_free_buf_with_sram(rknpu_obj); + } else { + rknpu_gem_free_buf(rknpu_obj); + } + } + + rknpu_gem_release(rknpu_obj); +} + +int rknpu_gem_create_ioctl(struct drm_device *dev, void *data, + struct drm_file *file_priv) +{ + struct rknpu_mem_create *args = data; + struct rknpu_gem_object *rknpu_obj = NULL; + int ret = -EINVAL; + + rknpu_obj = rknpu_gem_object_find(file_priv, args->handle); + if (!rknpu_obj) { + rknpu_obj = rknpu_gem_object_create( + dev, args->flags, args->size, args->sram_size); + if (IS_ERR(rknpu_obj)) + return PTR_ERR(rknpu_obj); + + ret = rknpu_gem_handle_create(&rknpu_obj->base, file_priv, + &args->handle); + if (ret) { + rknpu_gem_object_destroy(rknpu_obj); + return ret; + } + } + + // rknpu_gem_object_get(&rknpu_obj->base); + + args->size = rknpu_obj->size; + args->sram_size = rknpu_obj->sram_size; + args->obj_addr = (__u64)(uintptr_t)rknpu_obj; + args->dma_addr = rknpu_obj->dma_addr; + + return 0; +} + +int rknpu_gem_map_ioctl(struct drm_device *dev, void *data, + struct drm_file *file_priv) +{ + struct rknpu_mem_map *args = data; + +// #if KERNEL_VERSION(4, 19, 0) > LINUX_VERSION_CODE +// return rknpu_gem_dumb_map_offset(file_priv, dev, args->handle, +// &args->offset); +// #else + return drm_gem_dumb_map_offset(file_priv, dev, args->handle, + &args->offset); +// #endif +} + +int rknpu_gem_destroy_ioctl(struct drm_device *dev, void *data, + struct drm_file *file_priv) +{ + struct rknpu_gem_object *rknpu_obj = NULL; + struct rknpu_mem_destroy *args = data; + + rknpu_obj = rknpu_gem_object_find(file_priv, args->handle); + if (!rknpu_obj) + return -EINVAL; + + // rknpu_gem_object_put(&rknpu_obj->base); + + return rknpu_gem_handle_destroy(file_priv, args->handle); +} + +#if RKNPU_GEM_ALLOC_FROM_PAGES +/* + * __vm_map_pages - maps range of kernel pages into user vma + * @vma: user vma to map to + * @pages: pointer to array of source kernel pages + * @num: number of pages in page array + * @offset: user's requested vm_pgoff + * + * This allows drivers to map range of kernel pages into a user vma. + * + * Return: 0 on success and error code otherwise. + */ +static int __vm_map_pages(struct vm_area_struct *vma, struct page **pages, + unsigned long num, unsigned long offset) +{ + unsigned long count = vma_pages(vma); + unsigned long uaddr = vma->vm_start; + int ret = -EINVAL, i = 0; + + /* Fail if the user requested offset is beyond the end of the object */ + if (offset >= num) + return -ENXIO; + + /* Fail if the user requested size exceeds available object size */ + if (count > num - offset) + return -ENXIO; + + for (i = 0; i < count; i++) { + ret = vm_insert_page(vma, uaddr, pages[offset + i]); + if (ret < 0) + return ret; + uaddr += PAGE_SIZE; + } + + return 0; +} + +static int rknpu_gem_mmap_pages(struct rknpu_gem_object *rknpu_obj, + struct vm_area_struct *vma) +{ + struct drm_device *drm = rknpu_obj->base.dev; + int ret = -EINVAL; + + vma->vm_flags |= VM_MIXEDMAP; + + ret = __vm_map_pages(vma, rknpu_obj->pages, rknpu_obj->num_pages, + vma->vm_pgoff); + if (ret < 0) + LOG_DEV_ERROR(drm->dev, "failed to map pages into vma: %d\n", + ret); + + return ret; +} +#endif + +static int rknpu_gem_mmap_buffer(struct rknpu_gem_object *rknpu_obj, + struct vm_area_struct *vma) +{ + struct drm_device *drm = rknpu_obj->base.dev; +#if RKNPU_GEM_ALLOC_FROM_PAGES + struct rknpu_device *rknpu_dev = drm->dev_private; +#endif + unsigned long vm_size = 0; + int ret = -EINVAL; + + /* + * clear the VM_PFNMAP flag that was set by drm_gem_mmap(), and set the + * vm_pgoff (used as a fake buffer offset by DRM) to 0 as we want to map + * the whole buffer. + */ + vma->vm_flags &= ~VM_PFNMAP; + vma->vm_pgoff = 0; + + vm_size = vma->vm_end - vma->vm_start; + + /* check if user-requested size is valid. */ + if (vm_size > rknpu_obj->size) + return -EINVAL; + + if (rknpu_obj->sram_size > 0) { + unsigned long offset = 0; + unsigned long num_pages = 0; + int i = 0; + + vma->vm_flags |= VM_MIXEDMAP; + + offset = rknpu_obj->sram_obj->range_start * + rknpu_dev->sram_mm->chunk_size; + vma->vm_pgoff = __phys_to_pfn(rknpu_dev->sram_start + offset); + + ret = remap_pfn_range(vma, vma->vm_start, vma->vm_pgoff, + rknpu_obj->sram_size, vma->vm_page_prot); + if (ret) + return -EAGAIN; + + if (rknpu_obj->size == 0) + return 0; + + offset = rknpu_obj->sram_size; + + num_pages = (vm_size - rknpu_obj->sram_size) / PAGE_SIZE; + for (i = 0; i < num_pages; ++i) { + ret = vm_insert_page(vma, vma->vm_start + offset, + rknpu_obj->pages[i]); + if (ret < 0) + return ret; + offset += PAGE_SIZE; + } + + return 0; + } + +#if RKNPU_GEM_ALLOC_FROM_PAGES + if ((rknpu_obj->flags & RKNPU_MEM_NON_CONTIGUOUS) && + rknpu_dev->iommu_en) { + return rknpu_gem_mmap_pages(rknpu_obj, vma); + } +#endif + + ret = dma_mmap_attrs(drm->dev, vma, rknpu_obj->cookie, + rknpu_obj->dma_addr, rknpu_obj->size, + rknpu_obj->dma_attrs); + if (ret < 0) { + LOG_DEV_ERROR(drm->dev, "failed to mmap, ret: %d\n", ret); + return ret; + } + + return 0; +} + +void rknpu_gem_free_object(struct drm_gem_object *obj) +{ + rknpu_gem_object_destroy(to_rknpu_obj(obj)); +} + +int rknpu_gem_dumb_create(struct drm_file *file_priv, struct drm_device *drm, + struct drm_mode_create_dumb *args) +{ + struct rknpu_device *rknpu_dev = drm->dev_private; + struct rknpu_gem_object *rknpu_obj = NULL; + unsigned int flags = 0; + int ret = -EINVAL; + + /* + * allocate memory to be used for framebuffer. + * - this callback would be called by user application + * with DRM_IOCTL_MODE_CREATE_DUMB command. + */ + args->pitch = args->width * ((args->bpp + 7) / 8); + args->size = args->pitch * args->height; + + if (rknpu_dev->iommu_en) + flags = RKNPU_MEM_NON_CONTIGUOUS | RKNPU_MEM_WRITE_COMBINE; + else + flags = RKNPU_MEM_CONTIGUOUS | RKNPU_MEM_WRITE_COMBINE; + + rknpu_obj = rknpu_gem_object_create(drm, flags, args->size, 0); + if (IS_ERR(rknpu_obj)) { + LOG_DEV_ERROR(drm->dev, "gem object allocate failed.\n"); + return PTR_ERR(rknpu_obj); + } + + ret = rknpu_gem_handle_create(&rknpu_obj->base, file_priv, + &args->handle); + if (ret) { + rknpu_gem_object_destroy(rknpu_obj); + return ret; + } + + return 0; +} + +// #if KERNEL_VERSION(4, 19, 0) > LINUX_VERSION_CODE +// int rknpu_gem_dumb_map_offset(struct drm_file *file_priv, +// struct drm_device *drm, uint32_t handle, +// uint64_t *offset) +// { +// struct rknpu_gem_object *rknpu_obj = NULL; +// struct drm_gem_object *obj = NULL; +// int ret = -EINVAL; + +// rknpu_obj = rknpu_gem_object_find(file_priv, handle); +// if (!rknpu_obj) +// return 0; + +// /* Don't allow imported objects to be mapped */ +// obj = &rknpu_obj->base; +// if (obj->import_attach) +// return -EINVAL; + +// ret = drm_gem_create_mmap_offset(obj); +// if (ret) +// return ret; + +// *offset = drm_vma_node_offset_addr(&obj->vma_node); + +// return 0; +// } +// #endif + +// #if KERNEL_VERSION(4, 15, 0) <= LINUX_VERSION_CODE +// vm_fault_t rknpu_gem_fault(struct vm_fault *vmf) +// { +// struct vm_area_struct *vma = vmf->vma; +// struct drm_gem_object *obj = vma->vm_private_data; +// struct rknpu_gem_object *rknpu_obj = to_rknpu_obj(obj); +// struct drm_device *drm = rknpu_obj->base.dev; +// unsigned long pfn = 0; +// pgoff_t page_offset = 0; + +// page_offset = (vmf->address - vma->vm_start) >> PAGE_SHIFT; + +// if (page_offset >= (rknpu_obj->size >> PAGE_SHIFT)) { +// LOG_DEV_ERROR(drm->dev, "invalid page offset\n"); +// return VM_FAULT_SIGBUS; +// } + +// pfn = page_to_pfn(rknpu_obj->pages[page_offset]); +// return vmf_insert_mixed(vma, vmf->address, +// __pfn_to_pfn_t(pfn, PFN_DEV)); +// } +// #elif KERNEL_VERSION(4, 14, 0) <= LINUX_VERSION_CODE +// int rknpu_gem_fault(struct vm_fault *vmf) +// { +// struct vm_area_struct *vma = vmf->vma; +// struct drm_gem_object *obj = vma->vm_private_data; +// struct rknpu_gem_object *rknpu_obj = to_rknpu_obj(obj); +// struct drm_device *drm = rknpu_obj->base.dev; +// unsigned long pfn = 0; +// pgoff_t page_offset = 0; +// int ret = -EINVAL; + +// page_offset = (vmf->address - vma->vm_start) >> PAGE_SHIFT; + +// if (page_offset >= (rknpu_obj->size >> PAGE_SHIFT)) { +// LOG_DEV_ERROR(drm->dev, "invalid page offset\n"); +// ret = -EINVAL; +// goto out; +// } + +// pfn = page_to_pfn(rknpu_obj->pages[page_offset]); +// ret = vm_insert_mixed(vma, vmf->address, __pfn_to_pfn_t(pfn, PFN_DEV)); + +// out: +// switch (ret) { +// case 0: +// case -ERESTARTSYS: +// case -EINTR: +// return VM_FAULT_NOPAGE; +// case -ENOMEM: +// return VM_FAULT_OOM; +// default: +// return VM_FAULT_SIGBUS; +// } +// } +// #else +int rknpu_gem_fault(struct vm_area_struct *vma, struct vm_fault *vmf) +{ + struct drm_gem_object *obj = vma->vm_private_data; + struct rknpu_gem_object *rknpu_obj = to_rknpu_obj(obj); + struct drm_device *drm = rknpu_obj->base.dev; + unsigned long pfn = 0; + pgoff_t page_offset = 0; + int ret = -EINVAL; + + page_offset = ((unsigned long)vmf->virtual_address - vma->vm_start) >> + PAGE_SHIFT; + + if (page_offset >= (rknpu_obj->size >> PAGE_SHIFT)) { + LOG_DEV_ERROR(drm->dev, "invalid page offset\n"); + ret = -EINVAL; + goto out; + } + + pfn = page_to_pfn(rknpu_obj->pages[page_offset]); + ret = vm_insert_mixed(vma, (unsigned long)vmf->virtual_address, + __pfn_to_pfn_t(pfn, PFN_DEV)); + +out: + switch (ret) { + case 0: + case -ERESTARTSYS: + case -EINTR: + return VM_FAULT_NOPAGE; + case -ENOMEM: + return VM_FAULT_OOM; + default: + return VM_FAULT_SIGBUS; + } +} +// #endif + +static int rknpu_gem_mmap_obj(struct drm_gem_object *obj, + struct vm_area_struct *vma) +{ + struct rknpu_gem_object *rknpu_obj = to_rknpu_obj(obj); + int ret = -EINVAL; + + LOG_DEBUG("flags: %#x\n", rknpu_obj->flags); + + /* non-cacheable as default. */ + if (rknpu_obj->flags & RKNPU_MEM_CACHEABLE) { + vma->vm_page_prot = vm_get_page_prot(vma->vm_flags); + } else if (rknpu_obj->flags & RKNPU_MEM_WRITE_COMBINE) { + vma->vm_page_prot = + pgprot_writecombine(vm_get_page_prot(vma->vm_flags)); + } else { + vma->vm_page_prot = + pgprot_noncached(vm_get_page_prot(vma->vm_flags)); + } + + ret = rknpu_gem_mmap_buffer(rknpu_obj, vma); + if (ret) + goto err_close_vm; + + return 0; + +err_close_vm: + drm_gem_vm_close(vma); + + return ret; +} + +int rknpu_gem_mmap(struct file *filp, struct vm_area_struct *vma) +{ + struct drm_gem_object *obj = NULL; + int ret = -EINVAL; + + /* set vm_area_struct. */ + ret = drm_gem_mmap(filp, vma); + if (ret < 0) { + LOG_ERROR("failed to mmap, ret: %d\n", ret); + return ret; + } + + obj = vma->vm_private_data; + + if (obj->import_attach) + return dma_buf_mmap(obj->dma_buf, vma, 0); + + return rknpu_gem_mmap_obj(obj, vma); +} + +/* low-level interface prime helpers */ +// #if KERNEL_VERSION(4, 13, 0) <= LINUX_VERSION_CODE +// struct drm_gem_object *rknpu_gem_prime_import(struct drm_device *dev, +// struct dma_buf *dma_buf) +// { +// return drm_gem_prime_import_dev(dev, dma_buf, dev->dev); +// } +// #endif + +struct sg_table *rknpu_gem_prime_get_sg_table(struct drm_gem_object *obj) +{ + struct rknpu_gem_object *rknpu_obj = to_rknpu_obj(obj); + int npages = 0; + + npages = rknpu_obj->size >> PAGE_SHIFT; + +// #if KERNEL_VERSION(5, 10, 0) <= LINUX_VERSION_CODE +// return drm_prime_pages_to_sg(obj->dev, rknpu_obj->pages, npages); +// #else + return drm_prime_pages_to_sg(obj->dev, rknpu_obj->pages, npages); +// #endif +} + +struct drm_gem_object * +rknpu_gem_prime_import_sg_table(struct drm_device *dev, + struct dma_buf_attachment *attach, + struct sg_table *sgt) +{ + struct rknpu_gem_object *rknpu_obj = NULL; + int npages = 0; + int ret = -EINVAL; + + rknpu_obj = rknpu_gem_init(dev, attach->dmabuf->size); + if (IS_ERR(rknpu_obj)) { + ret = PTR_ERR(rknpu_obj); + return ERR_PTR(ret); + } + + rknpu_obj->dma_addr = sg_dma_address(sgt->sgl); + + npages = rknpu_obj->size >> PAGE_SHIFT; + rknpu_obj->pages = rknpu_gem_alloc_page(npages); + if (!rknpu_obj->pages) { + ret = -ENOMEM; + goto err; + } + + ret = drm_prime_sg_to_page_array(sgt, rknpu_obj->pages, npages); + if (ret < 0) + goto err_free_large; + + rknpu_obj->sgt = sgt; + + if (sgt->nents == 1) { + /* always physically continuous memory if sgt->nents is 1. */ + rknpu_obj->flags |= RKNPU_MEM_CONTIGUOUS; + } else { + /* + * this case could be CONTIG or NONCONTIG type but for now + * sets NONCONTIG. + * TODO. we have to find a way that exporter can notify + * the type of its own buffer to importer. + */ + rknpu_obj->flags |= RKNPU_MEM_NON_CONTIGUOUS; + } + + return &rknpu_obj->base; + +err_free_large: + rknpu_gem_free_page(rknpu_obj->pages); +err: + rknpu_gem_release(rknpu_obj); + return ERR_PTR(ret); +} + +void *rknpu_gem_prime_vmap(struct drm_gem_object *obj) +{ + struct rknpu_gem_object *rknpu_obj = to_rknpu_obj(obj); + + if (!rknpu_obj->pages) + return NULL; + + return vmap(rknpu_obj->pages, rknpu_obj->num_pages, VM_MAP, + PAGE_KERNEL); +} + +void rknpu_gem_prime_vunmap(struct drm_gem_object *obj, void *vaddr) +{ + vunmap(vaddr); +} + +int rknpu_gem_prime_mmap(struct drm_gem_object *obj, struct vm_area_struct *vma) +{ + int ret = -EINVAL; + + ret = drm_gem_mmap_obj(obj, obj->size, vma); + if (ret < 0) + return ret; + + return rknpu_gem_mmap_obj(obj, vma); +} + +int rknpu_gem_sync_ioctl(struct drm_device *dev, void *data, + struct drm_file *file_priv) +{ + struct rknpu_gem_object *rknpu_obj = NULL; + struct rknpu_mem_sync *args = data; + struct scatterlist *sg; + unsigned long length, offset = 0; + unsigned long sg_left, size = 0; + unsigned long len = 0; + int i; + + rknpu_obj = (struct rknpu_gem_object *)(uintptr_t)args->obj_addr; + if (!rknpu_obj) + return -EINVAL; + + if (!(rknpu_obj->flags & RKNPU_MEM_CACHEABLE)) + return -EINVAL; + + if (!(rknpu_obj->flags & RKNPU_MEM_NON_CONTIGUOUS)) { + if (args->flags & RKNPU_MEM_SYNC_TO_DEVICE) { + dma_sync_single_range_for_device( + dev->dev, rknpu_obj->dma_addr, args->offset, + args->size, DMA_TO_DEVICE); + } + if (args->flags & RKNPU_MEM_SYNC_FROM_DEVICE) { + dma_sync_single_range_for_cpu(dev->dev, + rknpu_obj->dma_addr, + args->offset, args->size, + DMA_FROM_DEVICE); + } + } else { + length = args->size; + offset = args->offset; + + if (IS_ENABLED(CONFIG_ROCKCHIP_RKNPU_SRAM) && rknpu_obj->sram_size > 0) { + struct drm_gem_object *obj = &rknpu_obj->base; + struct rknpu_device *rknpu_dev = obj->dev->dev_private; + unsigned long sram_offset = + rknpu_obj->sram_obj->range_start * + rknpu_dev->sram_mm->chunk_size; + if ((offset + length) <= rknpu_obj->sram_size) { + __dma_map_area(rknpu_dev->sram_base_io + + offset + sram_offset, + length, DMA_TO_DEVICE); + __dma_unmap_area(rknpu_dev->sram_base_io + + offset + sram_offset, + length, DMA_FROM_DEVICE); + length = 0; + offset = 0; + } else if (offset >= rknpu_obj->sram_size) { + offset -= rknpu_obj->sram_size; + } else { + unsigned long sram_length = + rknpu_obj->sram_size - offset; + __dma_map_area(rknpu_dev->sram_base_io + + offset + sram_offset, + sram_length, DMA_TO_DEVICE); + __dma_unmap_area(rknpu_dev->sram_base_io + + offset + sram_offset, + sram_length, DMA_FROM_DEVICE); + length -= sram_length; + offset = 0; + } + } + + for_each_sg(rknpu_obj->sgt->sgl, sg, rknpu_obj->sgt->nents, + i) { + if (length == 0) + break; + + len += sg->length; + if (len <= offset) + continue; + + sg_left = len - offset; + size = (length < sg_left) ? length : sg_left; + + if (args->flags & RKNPU_MEM_SYNC_TO_DEVICE) { + dma_sync_sg_for_device(dev->dev, sg, 1, + DMA_TO_DEVICE); + } + + if (args->flags & RKNPU_MEM_SYNC_FROM_DEVICE) { + dma_sync_sg_for_cpu(dev->dev, sg, 1, + DMA_FROM_DEVICE); + } + + offset += size; + length -= size; + } + } + + return 0; +} diff --git a/drivers/rknpu/rknpu_job.c b/drivers/rknpu/rknpu_job.c new file mode 100644 index 0000000000000000000000000000000000000000..dbfffdfff7a5fa4d80f2198543eda539dab789f3 --- /dev/null +++ b/drivers/rknpu/rknpu_job.c @@ -0,0 +1,910 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * Copyright (C) Rockchip Electronics Co.Ltd + * Author: Felix Zeng <felix.zeng@rock-chips.com> + */ + +#include <linux/slab.h> +#include <linux/delay.h> +#include <linux/sync_file.h> +#include <linux/io.h> + +#include "rknpu_ioctl.h" +#include "rknpu_drv.h" +#include "rknpu_reset.h" +#include "rknpu_gem.h" +#include "rknpu_fence.h" +#include "rknpu_job.h" +#include "rknpu_mem.h" + +#define _REG_READ(base, offset) readl(base + (offset)) +#define _REG_WRITE(base, value, offset) writel(value, base + (offset)) + +#define REG_READ(offset) _REG_READ(rknpu_core_base, offset) +#define REG_WRITE(value, offset) _REG_WRITE(rknpu_core_base, value, offset) + +static int rknpu_core_index(int core_mask) +{ + int index = 0; + + if (core_mask & RKNPU_CORE0_MASK) + index = 0; + else if (core_mask & RKNPU_CORE1_MASK) + index = 1; + else if (core_mask & RKNPU_CORE2_MASK) + index = 2; + + return index; +} + +static int rknpu_core_mask(int core_index) +{ + int core_mask = RKNPU_CORE_AUTO_MASK; + + switch (core_index) { + case 0: + core_mask = RKNPU_CORE0_MASK; + break; + case 1: + core_mask = RKNPU_CORE1_MASK; + break; + case 2: + core_mask = RKNPU_CORE2_MASK; + break; + default: + break; + } + + return core_mask; +} + +static int rknn_get_task_number(struct rknpu_job *job, int core_index) +{ + int task_num = job->args->task_number; + + if (job->use_core_num == 2) + task_num = job->args->subcore_task[core_index].task_number; + else if (job->use_core_num == 3) + task_num = job->args->subcore_task[core_index + 2].task_number; + + return task_num; +} + +static void rknpu_job_free(struct rknpu_job *job) +{ +#ifdef CONFIG_ROCKCHIP_RKNPU_DRM_GEM + struct rknpu_gem_object *task_obj = NULL; + + task_obj = + (struct rknpu_gem_object *)(uintptr_t)job->args->task_obj_addr; + if (task_obj) + rknpu_gem_object_put(&task_obj->base); +#endif + + if (job->fence) + dma_fence_put(job->fence); + + if (job->args_owner) + kfree(job->args); + + kfree(job); +} + +static int rknpu_job_cleanup(struct rknpu_job *job) +{ + rknpu_job_free(job); + + return 0; +} + +static void rknpu_job_cleanup_work(struct work_struct *work) +{ + struct rknpu_job *job = + container_of(work, struct rknpu_job, cleanup_work); + + rknpu_job_cleanup(job); +} + +static inline struct rknpu_job *rknpu_job_alloc(struct rknpu_device *rknpu_dev, + struct rknpu_submit *args) +{ + struct rknpu_job *job = NULL; +#ifdef CONFIG_ROCKCHIP_RKNPU_DRM_GEM + struct rknpu_gem_object *task_obj = NULL; +#endif + if (rknpu_dev->config->num_irqs == 1) + args->core_mask = RKNPU_CORE0_MASK; + + job = kzalloc(sizeof(*job), GFP_KERNEL); + if (!job) + return NULL; + + job->timestamp = ktime_get(); + job->rknpu_dev = rknpu_dev; + job->use_core_num = (args->core_mask & RKNPU_CORE0_MASK) + + ((args->core_mask & RKNPU_CORE1_MASK) >> 1) + + ((args->core_mask & RKNPU_CORE2_MASK) >> 2); + job->run_count = job->use_core_num; + job->interrupt_count = job->use_core_num; +#ifdef CONFIG_ROCKCHIP_RKNPU_DRM_GEM + task_obj = (struct rknpu_gem_object *)(uintptr_t)args->task_obj_addr; + if (task_obj) + rknpu_gem_object_get(&task_obj->base); +#endif + + if (!(args->flags & RKNPU_JOB_NONBLOCK)) { + job->args = args; + job->args_owner = false; + return job; + } + + job->args = kzalloc(sizeof(*args), GFP_KERNEL); + if (!job->args) { + kfree(job); + return NULL; + } + *job->args = *args; + job->args_owner = true; + + INIT_WORK(&job->cleanup_work, rknpu_job_cleanup_work); + + return job; +} + +static inline int rknpu_job_wait(struct rknpu_job *job) +{ + struct rknpu_device *rknpu_dev = job->rknpu_dev; + struct rknpu_submit *args = job->args; + struct rknpu_task *last_task = NULL; + struct rknpu_subcore_data *subcore_data = NULL; + void __iomem *rknpu_core_base = NULL; + int core_index = rknpu_core_index(job->args->core_mask); + unsigned long flags; + int wait_count = 0; + int ret = -EINVAL; + + subcore_data = &rknpu_dev->subcore_datas[core_index]; + + do { + ret = wait_event_interruptible_timeout( + subcore_data->job_done_wq, + job->flags & RKNPU_JOB_DONE || rknpu_dev->soft_reseting, + msecs_to_jiffies(args->timeout)); + if (++wait_count >= 3) + break; + } while (ret == 0 && job->in_queue[core_index]); + + if (job->in_queue[core_index]) { + spin_lock_irqsave(&rknpu_dev->lock, flags); + list_del_init(&job->head[core_index]); + subcore_data->task_num -= rknn_get_task_number(job, core_index); + job->in_queue[core_index] = false; + spin_unlock_irqrestore(&rknpu_dev->lock, flags); + return ret < 0 ? ret : -EINVAL; + } + + last_task = job->last_task; + if (!last_task) + return ret < 0 ? ret : -EINVAL; + + last_task->int_status = job->int_status[core_index]; + + if (ret <= 0) { + args->task_counter = 0; + rknpu_core_base = rknpu_dev->base[core_index]; + if (args->flags & RKNPU_JOB_PC) { + uint32_t task_status = REG_READ( + rknpu_dev->config->pc_task_status_offset); + args->task_counter = + (task_status & + rknpu_dev->config->pc_task_number_mask); + } + + LOG_ERROR( + "failed to wait job, task counter: %d, flags: %#x, ret = %d, elapsed time: %lldus\n", + args->task_counter, args->flags, ret, + ktime_to_us(ktime_sub(ktime_get(), job->timestamp))); + + return ret < 0 ? ret : -ETIMEDOUT; + } + + if (!(job->flags & RKNPU_JOB_DONE)) + return -EINVAL; + + args->task_counter = args->task_number; + + return 0; +} + +static inline int rknpu_job_commit_pc(struct rknpu_job *job, int core_index) +{ + struct rknpu_device *rknpu_dev = job->rknpu_dev; + struct rknpu_submit *args = job->args; +#ifdef CONFIG_ROCKCHIP_RKNPU_DRM_GEM + struct rknpu_gem_object *task_obj = + (struct rknpu_gem_object *)(uintptr_t)args->task_obj_addr; +#endif +#ifdef CONFIG_ROCKCHIP_RKNPU_DMA_HEAP + struct rknpu_mem_object *task_obj = + (struct rknpu_mem_object *)(uintptr_t)args->task_obj_addr; +#endif + struct rknpu_task *task_base = NULL; + struct rknpu_task *first_task = NULL; + struct rknpu_task *last_task = NULL; + void __iomem *rknpu_core_base = rknpu_dev->base[core_index]; + int task_start = args->task_start; + int task_end = args->task_start + args->task_number - 1; + int task_number = args->task_number; + int task_pp_en = args->flags & RKNPU_JOB_PINGPONG ? 1 : 0; + int pc_data_amount_scale = rknpu_dev->config->pc_data_amount_scale; + int pc_task_number_bits = rknpu_dev->config->pc_task_number_bits; + int i = 0; + + if (!task_obj) + return -EINVAL; + + if (rknpu_dev->config->num_irqs > 1) { + for (i = 0; i < rknpu_dev->config->num_irqs; i++) { + if (i == core_index) { + REG_WRITE((0xe + 0x10000000 * i), 0x1004); + REG_WRITE((0xe + 0x10000000 * i), 0x3004); + } + } + + if (job->use_core_num == 1) { + task_start = args->subcore_task[core_index].task_start; + task_end = args->subcore_task[core_index].task_start + + args->subcore_task[core_index].task_number - + 1; + task_number = + args->subcore_task[core_index].task_number; + } else if (job->use_core_num == 2) { + task_start = args->subcore_task[core_index].task_start; + task_end = args->subcore_task[core_index].task_start + + args->subcore_task[core_index].task_number - + 1; + task_number = + args->subcore_task[core_index].task_number; + } else if (job->use_core_num == 3) { + task_start = + args->subcore_task[core_index + 2].task_start; + task_end = + args->subcore_task[core_index + 2].task_start + + args->subcore_task[core_index + 2].task_number - + 1; + task_number = + args->subcore_task[core_index + 2].task_number; + } + } + + task_base = task_obj->kv_addr; + + first_task = &task_base[task_start]; + last_task = &task_base[task_end]; + + REG_WRITE(first_task->regcmd_addr, RKNPU_OFFSET_PC_DATA_ADDR); + + REG_WRITE((first_task->regcfg_amount + RKNPU_PC_DATA_EXTRA_AMOUNT + + pc_data_amount_scale - 1) / + pc_data_amount_scale - + 1, + RKNPU_OFFSET_PC_DATA_AMOUNT); + + REG_WRITE(last_task->int_mask, RKNPU_OFFSET_INT_MASK); + + REG_WRITE(first_task->int_mask, RKNPU_OFFSET_INT_CLEAR); + + REG_WRITE(((0x6 | task_pp_en) << pc_task_number_bits) | task_number, + RKNPU_OFFSET_PC_TASK_CONTROL); + + REG_WRITE(args->task_base_addr, RKNPU_OFFSET_PC_DMA_BASE_ADDR); + + job->first_task = first_task; + job->last_task = last_task; + job->int_mask[core_index] = last_task->int_mask; + + REG_WRITE(0x1, RKNPU_OFFSET_PC_OP_EN); + REG_WRITE(0x0, RKNPU_OFFSET_PC_OP_EN); + + return 0; +} + +static int rknpu_job_commit(struct rknpu_job *job, int core_index) +{ + struct rknpu_device *rknpu_dev = job->rknpu_dev; + struct rknpu_submit *args = job->args; + void __iomem *rknpu_core_base = rknpu_dev->base[core_index]; + + // switch to slave mode + REG_WRITE(0x1, RKNPU_OFFSET_PC_DATA_ADDR); + + if (!(args->flags & RKNPU_JOB_PC)) + return -EINVAL; + + return rknpu_job_commit_pc(job, core_index); +} + +static void rknpu_job_next(struct rknpu_device *rknpu_dev, int core_index) +{ + struct rknpu_job *job = NULL; + struct rknpu_subcore_data *subcore_data = NULL; + unsigned long flags; + + if (rknpu_dev->soft_reseting) + return; + + subcore_data = &rknpu_dev->subcore_datas[core_index]; + + spin_lock_irqsave(&rknpu_dev->irq_lock, flags); + + if (subcore_data->job || list_empty(&subcore_data->todo_list)) { + spin_unlock_irqrestore(&rknpu_dev->irq_lock, flags); + return; + } + + job = list_first_entry(&subcore_data->todo_list, struct rknpu_job, + head[core_index]); + + list_del_init(&job->head[core_index]); + job->in_queue[core_index] = false; + subcore_data->job = job; + job->run_count--; + job->hw_recoder_time = ktime_get(); + spin_unlock_irqrestore(&rknpu_dev->irq_lock, flags); + + if (job->run_count == 0) { + if (job->args->core_mask & RKNPU_CORE0_MASK) + job->ret = rknpu_job_commit(job, 0); + if (job->args->core_mask & RKNPU_CORE1_MASK) + job->ret = rknpu_job_commit(job, 1); + if (job->args->core_mask & RKNPU_CORE2_MASK) + job->ret = rknpu_job_commit(job, 2); + } +} + +static void rknpu_job_done(struct rknpu_job *job, int ret, int core_index) +{ + struct rknpu_device *rknpu_dev = job->rknpu_dev; + struct rknpu_subcore_data *subcore_data = NULL; + unsigned long flags; + ktime_t now = ktime_get(); + + subcore_data = &rknpu_dev->subcore_datas[core_index]; + + spin_lock_irqsave(&rknpu_dev->irq_lock, flags); + subcore_data->job = NULL; + subcore_data->task_num -= rknn_get_task_number(job, core_index); + job->interrupt_count--; + subcore_data->timer.busy_time += + ktime_us_delta(now, job->hw_recoder_time); + spin_unlock_irqrestore(&rknpu_dev->irq_lock, flags); + + if (job->interrupt_count == 0) { + int use_core_num = job->use_core_num; + + job->flags |= RKNPU_JOB_DONE; + job->ret = ret; + + if (job->fence) + dma_fence_signal(job->fence); + + if (job->flags & RKNPU_JOB_ASYNC) + schedule_work(&job->cleanup_work); + + if (use_core_num > 1) + wake_up(&(&rknpu_dev->subcore_datas[0])->job_done_wq); + else + wake_up(&subcore_data->job_done_wq); + } + + rknpu_job_next(rknpu_dev, core_index); +} + +static void rknpu_job_schedule(struct rknpu_job *job) +{ + struct rknpu_device *rknpu_dev = job->rknpu_dev; + struct rknpu_subcore_data *subcore_data = NULL; + int i = 0, core_index = 0; + unsigned long flags; + int task_num_list[3] = { 0, 1, 2 }; + int tmp = 0; + + if ((job->args->core_mask & 0x07) == RKNPU_CORE_AUTO_MASK) { + if (rknpu_dev->subcore_datas[0].task_num > + rknpu_dev->subcore_datas[1].task_num) { + tmp = task_num_list[1]; + task_num_list[1] = task_num_list[0]; + task_num_list[0] = tmp; + } + if (rknpu_dev->subcore_datas[task_num_list[0]].task_num > + rknpu_dev->subcore_datas[2].task_num) { + tmp = task_num_list[2]; + task_num_list[2] = task_num_list[1]; + task_num_list[1] = task_num_list[0]; + task_num_list[0] = tmp; + } else if (rknpu_dev->subcore_datas[task_num_list[1]].task_num > + rknpu_dev->subcore_datas[2].task_num) { + tmp = task_num_list[2]; + task_num_list[2] = task_num_list[1]; + task_num_list[1] = tmp; + } + if (!rknpu_dev->subcore_datas[task_num_list[0]].job) + core_index = task_num_list[0]; + else if (!rknpu_dev->subcore_datas[task_num_list[1]].job) + core_index = task_num_list[1]; + else if (!rknpu_dev->subcore_datas[task_num_list[2]].job) + core_index = task_num_list[2]; + else + core_index = task_num_list[0]; + + job->args->core_mask = rknpu_core_mask(core_index); + job->use_core_num = 1; + job->interrupt_count = 1; + job->run_count = 1; + } + + for (i = 0; i < rknpu_dev->config->num_irqs; i++) { + if (job->args->core_mask & rknpu_core_mask(i)) { + subcore_data = &rknpu_dev->subcore_datas[i]; + spin_lock_irqsave(&rknpu_dev->irq_lock, flags); + list_add_tail(&job->head[i], &subcore_data->todo_list); + subcore_data->task_num += rknn_get_task_number(job, i); + job->in_queue[i] = true; + spin_unlock_irqrestore(&rknpu_dev->irq_lock, flags); + } + } + + for (i = 0; i < rknpu_dev->config->num_irqs; i++) { + if (job->args->core_mask & rknpu_core_mask(i)) + rknpu_job_next(rknpu_dev, i); + } +} + +static void rknpu_job_abort(struct rknpu_job *job) +{ + struct rknpu_device *rknpu_dev = job->rknpu_dev; + struct rknpu_subcore_data *subcore_data = NULL; + int core_index = rknpu_core_index(job->args->core_mask); + void __iomem *rknpu_core_base = rknpu_dev->base[core_index]; + unsigned long flags; + int i = 0; + + msleep(100); + + for (i = 0; i < rknpu_dev->config->num_irqs; i++) { + if (job->args->core_mask & rknpu_core_mask(i)) { + subcore_data = &rknpu_dev->subcore_datas[i]; + spin_lock_irqsave(&rknpu_dev->irq_lock, flags); + if (job == subcore_data->job && !job->irq_entry[i]) { + subcore_data->job = NULL; + subcore_data->task_num -= + rknn_get_task_number(job, i); + } + spin_unlock_irqrestore(&rknpu_dev->irq_lock, flags); + } + } + + if (job->ret == -ETIMEDOUT) { + LOG_ERROR( + "job timeout, flags: %#x, irq status: %#x, raw status: %#x, require mask: %#x, task counter: %#x, elapsed time: %lldus\n", + job->flags, REG_READ(RKNPU_OFFSET_INT_STATUS), + REG_READ(RKNPU_OFFSET_INT_RAW_STATUS), + job->int_mask[core_index], + (REG_READ(rknpu_dev->config->pc_task_status_offset) & + rknpu_dev->config->pc_task_number_mask), + ktime_to_us(ktime_sub(ktime_get(), job->timestamp))); + rknpu_soft_reset(rknpu_dev); + } else { + LOG_ERROR( + "job abort, flags: %#x, ret: %d, elapsed time: %lldus\n", + job->flags, job->ret, + ktime_to_us(ktime_sub(ktime_get(), job->timestamp))); + } + + rknpu_job_cleanup(job); +} + +static inline uint32_t rknpu_fuzz_status(uint32_t status) +{ + uint32_t fuzz_status = 0; + + if ((status & 0x3) != 0) + fuzz_status |= 0x3; + + if ((status & 0xc) != 0) + fuzz_status |= 0xc; + + if ((status & 0x30) != 0) + fuzz_status |= 0x30; + + if ((status & 0xc0) != 0) + fuzz_status |= 0xc0; + + if ((status & 0x300) != 0) + fuzz_status |= 0x300; + + if ((status & 0xc00) != 0) + fuzz_status |= 0xc00; + + return fuzz_status; +} + +static inline irqreturn_t rknpu_irq_handler(int irq, void *data, int core_index) +{ + struct rknpu_device *rknpu_dev = data; + void __iomem *rknpu_core_base = rknpu_dev->base[core_index]; + struct rknpu_subcore_data *subcore_data = NULL; + struct rknpu_job *job = NULL; + uint32_t status = 0; + unsigned long flags; + + subcore_data = &rknpu_dev->subcore_datas[core_index]; + + spin_lock_irqsave(&rknpu_dev->irq_lock, flags); + job = subcore_data->job; + if (!job) { + spin_unlock_irqrestore(&rknpu_dev->irq_lock, flags); + REG_WRITE(RKNPU_INT_CLEAR, RKNPU_OFFSET_INT_CLEAR); + rknpu_job_next(rknpu_dev, core_index); + return IRQ_HANDLED; + } + job->irq_entry[core_index] = true; + spin_unlock_irqrestore(&rknpu_dev->irq_lock, flags); + + status = REG_READ(RKNPU_OFFSET_INT_STATUS); + + job->int_status[core_index] = status; + + if (rknpu_fuzz_status(status) != job->int_mask[core_index]) { + LOG_ERROR( + "invalid irq status: %#x, raw status: %#x, require mask: %#x, task counter: %#x\n", + status, REG_READ(RKNPU_OFFSET_INT_RAW_STATUS), + job->int_mask[core_index], + (REG_READ(rknpu_dev->config->pc_task_status_offset) & + rknpu_dev->config->pc_task_number_mask)); + REG_WRITE(RKNPU_INT_CLEAR, RKNPU_OFFSET_INT_CLEAR); + return IRQ_HANDLED; + } + + REG_WRITE(RKNPU_INT_CLEAR, RKNPU_OFFSET_INT_CLEAR); + + rknpu_job_done(job, 0, core_index); + + return IRQ_HANDLED; +} + +irqreturn_t rknpu_core0_irq_handler(int irq, void *data) +{ + return rknpu_irq_handler(irq, data, 0); +} + +irqreturn_t rknpu_core1_irq_handler(int irq, void *data) +{ + return rknpu_irq_handler(irq, data, 1); +} + +irqreturn_t rknpu_core2_irq_handler(int irq, void *data) +{ + return rknpu_irq_handler(irq, data, 2); +} + +static void rknpu_job_timeout_clean(struct rknpu_device *rknpu_dev, + int core_mask) +{ + struct rknpu_job *job = NULL; + unsigned long flags; + ktime_t now = ktime_get(); + struct rknpu_subcore_data *subcore_data = NULL; + int i = 0; + + for (i = 0; i < rknpu_dev->config->num_irqs; i++) { + if (core_mask & rknpu_core_mask(i)) { + subcore_data = &rknpu_dev->subcore_datas[i]; + job = subcore_data->job; + if (job && + ktime_to_ms(ktime_sub(now, job->timestamp)) >= + job->args->timeout) { + rknpu_soft_reset(rknpu_dev); + + spin_lock_irqsave(&rknpu_dev->irq_lock, flags); + subcore_data->job = NULL; + spin_unlock_irqrestore(&rknpu_dev->irq_lock, + flags); + + do { + schedule_work(&job->cleanup_work); + + spin_lock_irqsave(&rknpu_dev->irq_lock, + flags); + + if (!list_empty( + &subcore_data->todo_list)) { + job = list_first_entry( + &subcore_data->todo_list, + struct rknpu_job, + head[i]); + list_del_init(&job->head[i]); + job->in_queue[i] = false; + } else { + job = NULL; + } + + spin_unlock_irqrestore( + &rknpu_dev->irq_lock, flags); + } while (job); + } + } + } +} + +static int rknpu_submit(struct rknpu_device *rknpu_dev, + struct rknpu_submit *args) +{ + struct rknpu_job *job = NULL; + int ret = -EINVAL; + + if (args->task_number == 0) { + LOG_ERROR("invalid rknpu task number!\n"); + return -EINVAL; + } + + job = rknpu_job_alloc(rknpu_dev, args); + if (!job) { + LOG_ERROR("failed to allocate rknpu job!\n"); + return -ENOMEM; + } + + if (args->flags & RKNPU_JOB_FENCE_IN) { +#ifdef CONFIG_ROCKCHIP_RKNPU_FENCE + struct dma_fence *in_fence; + + in_fence = sync_file_get_fence(args->fence_fd); + + if (!in_fence) { + LOG_ERROR("invalid fence in fd, fd: %d\n", + args->fence_fd); + return -EINVAL; + } + args->fence_fd = -1; + + /* + * Wait if the fence is from a foreign context, or if the fence + * array contains any fence from a foreign context. + */ + ret = 0; + if (!dma_fence_match_context(in_fence, + rknpu_dev->fence_ctx->context)) + ret = dma_fence_wait_timeout(in_fence, true, + args->timeout); + dma_fence_put(in_fence); + if (ret < 0) { + if (ret != -ERESTARTSYS) + LOG_ERROR("Error (%d) waiting for fence!\n", + ret); + + return ret; + } +#else + LOG_ERROR( + "failed to use rknpu fence, please enable rknpu fence config!\n"); + rknpu_job_free(job); + return -EINVAL; +#endif + } + + if (args->flags & RKNPU_JOB_FENCE_OUT) { +#ifdef CONFIG_ROCKCHIP_RKNPU_FENCE + ret = rknpu_fence_alloc(job); + if (ret) { + rknpu_job_free(job); + return ret; + } + job->args->fence_fd = rknpu_fence_get_fd(job); + args->fence_fd = job->args->fence_fd; +#else + LOG_ERROR( + "failed to use rknpu fence, please enable rknpu fence config!\n"); + rknpu_job_free(job); + return -EINVAL; +#endif + } + + if (args->flags & RKNPU_JOB_NONBLOCK) { + job->flags |= RKNPU_JOB_ASYNC; + rknpu_job_timeout_clean(rknpu_dev, job->args->core_mask); + rknpu_job_schedule(job); + ret = job->ret; + if (ret) { + rknpu_job_abort(job); + return ret; + } + } else { + rknpu_job_schedule(job); + if (args->flags & RKNPU_JOB_PC) + job->ret = rknpu_job_wait(job); + + args->task_counter = job->args->task_counter; + ret = job->ret; + if (!ret) + rknpu_job_cleanup(job); + else + rknpu_job_abort(job); + } + + return ret; +} + +#ifdef CONFIG_ROCKCHIP_RKNPU_DRM_GEM +int rknpu_submit_ioctl(struct drm_device *dev, void *data, + struct drm_file *file_priv) +{ + struct rknpu_device *rknpu_dev = dev_get_drvdata(dev->dev); + struct rknpu_submit *args = data; + + return rknpu_submit(rknpu_dev, args); +} +#endif + +#ifdef CONFIG_ROCKCHIP_RKNPU_DMA_HEAP +int rknpu_submit_ioctl(struct rknpu_device *rknpu_dev, unsigned long data) +{ + struct rknpu_submit args; + int ret = -EINVAL; + + if (unlikely(copy_from_user(&args, (struct rknpu_submit *)data, + sizeof(struct rknpu_submit)))) { + LOG_ERROR("%s: copy_from_user failed\n", __func__); + ret = -EFAULT; + return ret; + } + + ret = rknpu_submit(rknpu_dev, &args); + + if (unlikely(copy_to_user((struct rknpu_submit *)data, &args, + sizeof(struct rknpu_submit)))) { + LOG_ERROR("%s: copy_to_user failed\n", __func__); + ret = -EFAULT; + return ret; + } + + return ret; +} +#endif + +int rknpu_get_hw_version(struct rknpu_device *rknpu_dev, uint32_t *version) +{ + void __iomem *rknpu_core_base = rknpu_dev->base[0]; + + if (version == NULL) + return -EINVAL; + + *version = REG_READ(RKNPU_OFFSET_VERSION) + + (REG_READ(RKNPU_OFFSET_VERSION_NUM) & 0xffff); + + return 0; +} + +int rknpu_get_bw_priority(struct rknpu_device *rknpu_dev, uint32_t *priority, + uint32_t *expect, uint32_t *tw) +{ + void __iomem *base = rknpu_dev->bw_priority_base; + + if (!rknpu_dev->config->bw_enable) { + LOG_WARN("Get bw_priority is not supported on this device!\n"); + return 0; + } + + if (!base) + return -EINVAL; + + spin_lock(&rknpu_dev->lock); + + if (priority != NULL) + *priority = _REG_READ(base, 0x0); + + if (expect != NULL) + *expect = _REG_READ(base, 0x8); + + if (tw != NULL) + *tw = _REG_READ(base, 0xc); + + spin_unlock(&rknpu_dev->lock); + + return 0; +} + +int rknpu_set_bw_priority(struct rknpu_device *rknpu_dev, uint32_t priority, + uint32_t expect, uint32_t tw) +{ + void __iomem *base = rknpu_dev->bw_priority_base; + + if (!rknpu_dev->config->bw_enable) { + LOG_WARN("Set bw_priority is not supported on this device!\n"); + return 0; + } + + if (!base) + return -EINVAL; + + spin_lock(&rknpu_dev->lock); + + if (priority != 0) + _REG_WRITE(base, priority, 0x0); + + if (expect != 0) + _REG_WRITE(base, expect, 0x8); + + if (tw != 0) + _REG_WRITE(base, tw, 0xc); + + spin_unlock(&rknpu_dev->lock); + + return 0; +} + +int rknpu_clear_rw_amount(struct rknpu_device *rknpu_dev) +{ + void __iomem *rknpu_core_base = rknpu_dev->base[0]; + + if (!rknpu_dev->config->bw_enable) { + LOG_WARN("Clear rw_amount is not supported on this device!\n"); + return 0; + } + + spin_lock(&rknpu_dev->lock); + + REG_WRITE(0x80000101, RKNPU_OFFSET_CLR_ALL_RW_AMOUNT); + REG_WRITE(0x00000101, RKNPU_OFFSET_CLR_ALL_RW_AMOUNT); + + spin_unlock(&rknpu_dev->lock); + + return 0; +} + +int rknpu_get_rw_amount(struct rknpu_device *rknpu_dev, uint32_t *dt_wr, + uint32_t *dt_rd, uint32_t *wd_rd) +{ + void __iomem *rknpu_core_base = rknpu_dev->base[0]; + int amount_scale = rknpu_dev->config->pc_data_amount_scale; + + if (!rknpu_dev->config->bw_enable) { + LOG_WARN("Get rw_amount is not supported on this device!\n"); + return 0; + } + + spin_lock(&rknpu_dev->lock); + + if (dt_wr != NULL) + *dt_wr = REG_READ(RKNPU_OFFSET_DT_WR_AMOUNT) * amount_scale; + + if (dt_rd != NULL) + *dt_rd = REG_READ(RKNPU_OFFSET_DT_RD_AMOUNT) * amount_scale; + + if (wd_rd != NULL) + *wd_rd = REG_READ(RKNPU_OFFSET_WT_RD_AMOUNT) * amount_scale; + + spin_unlock(&rknpu_dev->lock); + + return 0; +} + +int rknpu_get_total_rw_amount(struct rknpu_device *rknpu_dev, uint32_t *amount) +{ + uint32_t dt_wr = 0; + uint32_t dt_rd = 0; + uint32_t wd_rd = 0; + int ret = -EINVAL; + + if (!rknpu_dev->config->bw_enable) { + LOG_WARN( + "Get total_rw_amount is not supported on this device!\n"); + return 0; + } + + ret = rknpu_get_rw_amount(rknpu_dev, &dt_wr, &dt_rd, &wd_rd); + + if (amount != NULL) + *amount = dt_wr + dt_rd + wd_rd; + + return ret; +} diff --git a/drivers/rknpu/rknpu_mem.c b/drivers/rknpu/rknpu_mem.c new file mode 100644 index 0000000000000000000000000000000000000000..5535598f2acabe9ca1ff811c3f0b90955296ecbc --- /dev/null +++ b/drivers/rknpu/rknpu_mem.c @@ -0,0 +1,228 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * Copyright (C) Rockchip Electronics Co.Ltd + * Author: Felix Zeng <felix.zeng@rock-chips.com> + */ + +#include <linux/version.h> +#include <linux/rk-dma-heap.h> + +#if KERNEL_VERSION(5, 10, 0) <= LINUX_VERSION_CODE +#include <linux/dma-map-ops.h> +#endif + +#include "rknpu_drv.h" +#include "rknpu_ioctl.h" +#include "rknpu_mem.h" + +int rknpu_mem_create_ioctl(struct rknpu_device *rknpu_dev, unsigned long data) +{ + struct rknpu_mem_create args; + int ret = -EINVAL; + struct dma_buf_attachment *attachment; + struct sg_table *table; + struct scatterlist *sgl; + dma_addr_t phys; + struct dma_buf *dmabuf; + struct page **pages; + struct page *page; + struct rknpu_mem_object *rknpu_obj = NULL; + int i, fd; + unsigned int length, page_count; + + if (unlikely(copy_from_user(&args, (struct rknpu_mem_create *)data, + sizeof(struct rknpu_mem_create)))) { + LOG_ERROR("%s: copy_from_user failed\n", __func__); + ret = -EFAULT; + return ret; + } + + if (args.flags & RKNPU_MEM_NON_CONTIGUOUS) { + LOG_ERROR("%s: malloc iommu memory unsupported in current!\n", + __func__); + ret = -EINVAL; + return ret; + } + + rknpu_obj = kzalloc(sizeof(*rknpu_obj), GFP_KERNEL); + if (!rknpu_obj) + return -ENOMEM; + + if (args.handle > 0) { + fd = args.handle; + + dmabuf = dma_buf_get(fd); + if (IS_ERR(dmabuf)) { + ret = PTR_ERR(dmabuf); + goto err_free_obj; + } + + rknpu_obj->dmabuf = dmabuf; + rknpu_obj->owner = 0; + } else { + /* Start test kernel alloc/free dma buf */ + dmabuf = rk_dma_heap_buffer_alloc(rknpu_dev->heap, args.size, + O_CLOEXEC | O_RDWR, 0x0, + dev_name(rknpu_dev->dev)); + if (IS_ERR(dmabuf)) { + ret = PTR_ERR(dmabuf); + goto err_free_obj; + } + + rknpu_obj->dmabuf = dmabuf; + rknpu_obj->owner = 1; + + fd = dma_buf_fd(dmabuf, O_CLOEXEC | O_RDWR); + if (fd < 0) { + ret = -EFAULT; + goto err_free_dma_buf; + } + } + + attachment = dma_buf_attach(dmabuf, rknpu_dev->dev); + if (IS_ERR(attachment)) { + ret = PTR_ERR(attachment); + goto err_free_dma_buf; + } + + table = dma_buf_map_attachment(attachment, DMA_BIDIRECTIONAL); + if (IS_ERR(table)) { + dma_buf_detach(dmabuf, attachment); + ret = PTR_ERR(table); + goto err_free_dma_buf; + } + + for_each_sgtable_sg(table, sgl, i) { + phys = sg_dma_address(sgl); + page = sg_page(sgl); + length = sg_dma_len(sgl); + LOG_DEBUG("%s, %d, phys: %pad, length: %u\n", __func__, + __LINE__, &phys, length); + } + + page_count = length >> PAGE_SHIFT; + pages = kmalloc_array(page_count, sizeof(struct page), GFP_KERNEL); + if (!pages) { + ret = -ENOMEM; + goto err_detach_dma_buf; + } + + for (i = 0; i < page_count; i++) + pages[i] = &page[i]; + + rknpu_obj->kv_addr = vmap(pages, page_count, VM_MAP, PAGE_KERNEL); + if (!rknpu_obj->kv_addr) { + ret = -ENOMEM; + goto err_free_pages; + } + + rknpu_obj->size = PAGE_ALIGN(args.size); + rknpu_obj->dma_addr = phys; + rknpu_obj->sgt = table; + + args.size = rknpu_obj->size; + args.obj_addr = (__u64)(uintptr_t)rknpu_obj; + args.dma_addr = rknpu_obj->dma_addr; + args.handle = fd; + + LOG_DEBUG( + "args.handle: %d, args.size: %lld, rknpu_obj: %#llx, rknpu_obj->dma_addr: %#llx\n", + args.handle, args.size, (__u64)(uintptr_t)rknpu_obj, + (__u64)rknpu_obj->dma_addr); + + if (unlikely(copy_to_user((struct rknpu_mem_create *)data, &args, + sizeof(struct rknpu_mem_create)))) { + LOG_ERROR("%s: copy_to_user failed\n", __func__); + ret = -EFAULT; + goto err_unmap_kv_addr; + } + + kfree(pages); + dma_buf_unmap_attachment(attachment, table, DMA_BIDIRECTIONAL); + dma_buf_detach(dmabuf, attachment); + + return 0; + +err_unmap_kv_addr: + vunmap(rknpu_obj->kv_addr); + rknpu_obj->kv_addr = NULL; + +err_free_pages: + kfree(pages); + +err_detach_dma_buf: + dma_buf_unmap_attachment(attachment, table, DMA_BIDIRECTIONAL); + dma_buf_detach(dmabuf, attachment); + +err_free_dma_buf: + if (rknpu_obj->owner) + rk_dma_heap_buffer_free(dmabuf); + else + dma_buf_put(dmabuf); + +err_free_obj: + kfree(rknpu_obj); + + return ret; +} + +int rknpu_mem_destroy_ioctl(struct rknpu_device *rknpu_dev, unsigned long data) +{ + struct rknpu_mem_object *rknpu_obj = NULL; + struct rknpu_mem_destroy args; + struct dma_buf *dmabuf; + int ret = -EFAULT; + + if (unlikely(copy_from_user(&args, (struct rknpu_mem_destroy *)data, + sizeof(struct rknpu_mem_destroy)))) { + LOG_ERROR("%s: copy_from_user failed\n", __func__); + ret = -EFAULT; + return ret; + } + + rknpu_obj = (struct rknpu_mem_object *)(uintptr_t)args.obj_addr; + dmabuf = rknpu_obj->dmabuf; + LOG_DEBUG( + "free args.handle: %d, rknpu_obj: %#llx, rknpu_obj->dma_addr: %#llx\n", + args.handle, (__u64)(uintptr_t)rknpu_obj, + (__u64)rknpu_obj->dma_addr); + + vunmap(rknpu_obj->kv_addr); + rknpu_obj->kv_addr = NULL; + + if (!rknpu_obj->owner) + dma_buf_put(dmabuf); + + kfree(rknpu_obj); + + return 0; +} + +int rknpu_mem_sync_ioctl(struct rknpu_device *rknpu_dev, unsigned long data) +{ + struct rknpu_mem_object *rknpu_obj = NULL; + struct rknpu_mem_sync args; + struct dma_buf *dmabuf; + int ret = -EFAULT; + + if (unlikely(copy_from_user(&args, (struct rknpu_mem_sync *)data, + sizeof(struct rknpu_mem_sync)))) { + LOG_ERROR("%s: copy_from_user failed\n", __func__); + ret = -EFAULT; + return ret; + } + + rknpu_obj = (struct rknpu_mem_object *)(uintptr_t)args.obj_addr; + dmabuf = rknpu_obj->dmabuf; + + if (args.flags & RKNPU_MEM_SYNC_TO_DEVICE) { + dmabuf->ops->end_cpu_access_partial(dmabuf, DMA_TO_DEVICE, + args.offset, args.size); + } + if (args.flags & RKNPU_MEM_SYNC_FROM_DEVICE) { + dmabuf->ops->begin_cpu_access_partial(dmabuf, DMA_FROM_DEVICE, + args.offset, args.size); + } + + return 0; +} diff --git a/drivers/rknpu/rknpu_mm.c b/drivers/rknpu/rknpu_mm.c new file mode 100644 index 0000000000000000000000000000000000000000..9a13c3e256a4226f809f2465818c215611043e8d --- /dev/null +++ b/drivers/rknpu/rknpu_mm.c @@ -0,0 +1,289 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * Copyright (C) Rockchip Electronics Co.Ltd + * Author: Felix Zeng <felix.zeng@rock-chips.com> + */ + +#include "rknpu_debugger.h" +#include "rknpu_mm.h" + +int rknpu_mm_create(unsigned int mem_size, unsigned int chunk_size, + struct rknpu_mm **mm) +{ + unsigned int num_of_longs; + int ret = -EINVAL; + + if (WARN_ON(mem_size < chunk_size)) + return -EINVAL; + if (WARN_ON(mem_size == 0)) + return -EINVAL; + if (WARN_ON(chunk_size == 0)) + return -EINVAL; + + *mm = kzalloc(sizeof(struct rknpu_mm), GFP_KERNEL); + if (!(*mm)) + return -ENOMEM; + + (*mm)->chunk_size = chunk_size; + (*mm)->total_chunks = mem_size / chunk_size; + (*mm)->free_chunks = (*mm)->total_chunks; + + num_of_longs = + ((*mm)->total_chunks + BITS_PER_LONG - 1) / BITS_PER_LONG; + + (*mm)->bitmap = kcalloc(num_of_longs, sizeof(long), GFP_KERNEL); + if (!(*mm)->bitmap) { + ret = -ENOMEM; + goto free_mm; + } + + mutex_init(&(*mm)->lock); + + LOG_DEBUG("total_chunks: %d, bitmap: %p\n", (*mm)->total_chunks, + (*mm)->bitmap); + + return 0; + +free_mm: + kfree(mm); + return ret; +} + +void rknpu_mm_destroy(struct rknpu_mm *mm) +{ + if (mm != NULL) { + mutex_destroy(&mm->lock); + kfree(mm->bitmap); + kfree(mm); + } +} + +int rknpu_mm_alloc(struct rknpu_mm *mm, unsigned int size, + struct rknpu_mm_obj **mm_obj) +{ + unsigned int found, start_search, cur_size; + + if (size == 0) + return -EINVAL; + + if (size > mm->total_chunks * mm->chunk_size) + return -ENOMEM; + + *mm_obj = kzalloc(sizeof(struct rknpu_mm_obj), GFP_KERNEL); + if (!(*mm_obj)) + return -ENOMEM; + + start_search = 0; + + mutex_lock(&mm->lock); + +mm_restart_search: + /* Find the first chunk that is free */ + found = find_next_zero_bit(mm->bitmap, mm->total_chunks, start_search); + + /* If there wasn't any free chunk, bail out */ + if (found == mm->total_chunks) + goto mm_no_free_chunk; + + /* Update fields of mm_obj */ + (*mm_obj)->range_start = found; + (*mm_obj)->range_end = found; + + /* If we need only one chunk, mark it as allocated and get out */ + if (size <= mm->chunk_size) { + set_bit(found, mm->bitmap); + goto mm_out; + } + + /* Otherwise, try to see if we have enough contiguous chunks */ + cur_size = size - mm->chunk_size; + do { + (*mm_obj)->range_end = find_next_zero_bit( + mm->bitmap, mm->total_chunks, ++found); + /* + * If next free chunk is not contiguous than we need to + * restart our search from the last free chunk we found (which + * wasn't contiguous to the previous ones + */ + if ((*mm_obj)->range_end != found) { + start_search = found; + goto mm_restart_search; + } + + /* + * If we reached end of buffer, bail out with error + */ + if (found == mm->total_chunks) + goto mm_no_free_chunk; + + /* Check if we don't need another chunk */ + if (cur_size <= mm->chunk_size) + cur_size = 0; + else + cur_size -= mm->chunk_size; + + } while (cur_size > 0); + + /* Mark the chunks as allocated */ + for (found = (*mm_obj)->range_start; found <= (*mm_obj)->range_end; + found++) + set_bit(found, mm->bitmap); + +mm_out: + mm->free_chunks -= ((*mm_obj)->range_end - (*mm_obj)->range_start + 1); + mutex_unlock(&mm->lock); + + LOG_DEBUG("mm allocate, mm_obj: %p, range_start: %d, range_end: %d\n", + *mm_obj, (*mm_obj)->range_start, (*mm_obj)->range_end); + + return 0; + +mm_no_free_chunk: + mutex_unlock(&mm->lock); + kfree(*mm_obj); + + return -ENOMEM; +} + +int rknpu_mm_free(struct rknpu_mm *mm, struct rknpu_mm_obj *mm_obj) +{ + unsigned int bit; + + /* Act like kfree when trying to free a NULL object */ + if (!mm_obj) + return 0; + + LOG_DEBUG("mm free, mem_obj: %p, range_start: %d, range_end: %d\n", + mm_obj, mm_obj->range_start, mm_obj->range_end); + + mutex_lock(&mm->lock); + + /* Mark the chunks as free */ + for (bit = mm_obj->range_start; bit <= mm_obj->range_end; bit++) + clear_bit(bit, mm->bitmap); + + mm->free_chunks += (mm_obj->range_end - mm_obj->range_start + 1); + + mutex_unlock(&mm->lock); + + kfree(mm_obj); + + return 0; +} + +int rknpu_mm_dump(struct seq_file *m, void *data) +{ + struct rknpu_debugger_node *node = m->private; + struct rknpu_debugger *debugger = node->debugger; + struct rknpu_device *rknpu_dev = + container_of(debugger, struct rknpu_device, debugger); + struct rknpu_mm *mm = NULL; + int cur = 0, rbot = 0, rtop = 0; + size_t ret = 0; + char buf[64]; + size_t size = sizeof(buf); + int seg_chunks = 32, seg_id = 0; + int free_size = 0; + int i = 0; + + mm = rknpu_dev->sram_mm; + if (mm == NULL) + return 0; + + seq_printf(m, "SRAM bitmap: \"*\" - used, \".\" - free (1bit = %dKB)\n", + mm->chunk_size / 1024); + + rbot = cur = find_first_bit(mm->bitmap, mm->total_chunks); + for (i = 0; i < cur; ++i) { + ret += scnprintf(buf + ret, size - ret, "."); + if (ret >= seg_chunks) { + seq_printf(m, "[%03d] [%s]\n", seg_id++, buf); + ret = 0; + } + } + while (cur < mm->total_chunks) { + rtop = cur; + cur = find_next_bit(mm->bitmap, mm->total_chunks, cur + 1); + if (cur < mm->total_chunks && cur <= rtop + 1) + continue; + + for (i = rbot; i <= rtop; ++i) { + ret += scnprintf(buf + ret, size - ret, "*"); + if (ret >= seg_chunks) { + seq_printf(m, "[%03d] [%s]\n", seg_id++, buf); + ret = 0; + } + } + + for (i = rtop + 1; i < cur; ++i) { + ret += scnprintf(buf + ret, size - ret, "."); + if (ret >= seg_chunks) { + seq_printf(m, "[%03d] [%s]\n", seg_id++, buf); + ret = 0; + } + } + + rbot = cur; + } + + if (ret > 0) + seq_printf(m, "[%03d] [%s]\n", seg_id++, buf); + + free_size = mm->free_chunks * mm->chunk_size; + seq_printf(m, "SRAM total size: %d, used: %d, free: %d\n", + rknpu_dev->sram_size, rknpu_dev->sram_size - free_size, + free_size); + + return 0; +} + +dma_addr_t rknpu_iommu_dma_alloc_iova(struct iommu_domain *domain, size_t size, + u64 dma_limit, struct device *dev) +{ + struct rknpu_iommu_dma_cookie *cookie = domain->iova_cookie; + struct iova_domain *iovad = &cookie->iovad; + unsigned long shift, iova_len, iova = 0; +#if (KERNEL_VERSION(5, 4, 0) > LINUX_VERSION_CODE) + dma_addr_t limit; +#endif + + shift = iova_shift(iovad); + iova_len = size >> shift; + /* + * Freeing non-power-of-two-sized allocations back into the IOVA caches + * will come back to bite us badly, so we have to waste a bit of space + * rounding up anything cacheable to make sure that can't happen. The + * order of the unadjusted size will still match upon freeing. + */ + if (iova_len < (1 << (IOVA_RANGE_CACHE_MAX_SIZE - 1))) + iova_len = roundup_pow_of_two(iova_len); + +#if (KERNEL_VERSION(5, 10, 0) <= LINUX_VERSION_CODE) + dma_limit = min_not_zero(dma_limit, dev->bus_dma_limit); +#else + if (dev->bus_dma_mask) + dma_limit &= dev->bus_dma_mask; +#endif + + if (domain->geometry.force_aperture) + dma_limit = + min_t(u64, dma_limit, domain->geometry.aperture_end); + +#if (KERNEL_VERSION(5, 4, 0) <= LINUX_VERSION_CODE) + iova = alloc_iova_fast(iovad, iova_len, dma_limit >> shift, true); +#else + limit = min_t(dma_addr_t, dma_limit >> shift, iovad->end_pfn); + + iova = alloc_iova_fast(iovad, iova_len, limit, true); +#endif + + return (dma_addr_t)iova << shift; +} + +void rknpu_iommu_dma_free_iova(struct rknpu_iommu_dma_cookie *cookie, + dma_addr_t iova, size_t size) +{ + struct iova_domain *iovad = &cookie->iovad; + + free_iova_fast(iovad, iova_pfn(iovad, iova), size >> iova_shift(iovad)); +} diff --git a/drivers/rknpu/rknpu_reset.c b/drivers/rknpu/rknpu_reset.c new file mode 100644 index 0000000000000000000000000000000000000000..91c9b75d68e77eea1745cd4327a4d27048b427a8 --- /dev/null +++ b/drivers/rknpu/rknpu_reset.c @@ -0,0 +1,148 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * Copyright (C) Rockchip Electronics Co.Ltd + * Author: Felix Zeng <felix.zeng@rock-chips.com> + */ + +#include <linux/delay.h> +#include <linux/iommu.h> + +#include "rknpu_reset.h" + +#ifndef FPGA_PLATFORM +static inline struct reset_control *rknpu_reset_control_get(struct device *dev, + const char *name) +{ + struct reset_control *rst = NULL; + + rst = devm_reset_control_get(dev, name); + if (IS_ERR(rst)) + LOG_DEV_ERROR(dev, + "failed to get rknpu reset control: %s, %ld\n", + name, PTR_ERR(rst)); + + return rst; +} +#endif + +int rknpu_reset_get(struct rknpu_device *rknpu_dev) +{ +#ifndef FPGA_PLATFORM + struct reset_control *srst_a = NULL; + struct reset_control *srst_h = NULL; + int i = 0; + + for (i = 0; i < rknpu_dev->config->num_resets; i++) { + srst_a = rknpu_reset_control_get( + rknpu_dev->dev, + rknpu_dev->config->resets[i].srst_a_name); + if (IS_ERR(srst_a)) + return PTR_ERR(srst_a); + + rknpu_dev->srst_a[i] = srst_a; + + srst_h = rknpu_reset_control_get( + rknpu_dev->dev, + rknpu_dev->config->resets[i].srst_h_name); + if (IS_ERR(srst_h)) + return PTR_ERR(srst_h); + + rknpu_dev->srst_h[i] = srst_h; + } +#endif + + return 0; +} + +#ifndef FPGA_PLATFORM +static int rknpu_reset_assert(struct reset_control *rst) +{ + int ret = -EINVAL; + + if (!rst) + return -EINVAL; + + ret = reset_control_assert(rst); + if (ret < 0) { + LOG_ERROR("failed to assert rknpu reset: %d\n", ret); + return ret; + } + + return 0; +} + +static int rknpu_reset_deassert(struct reset_control *rst) +{ + int ret = -EINVAL; + + if (!rst) + return -EINVAL; + + ret = reset_control_deassert(rst); + if (ret < 0) { + LOG_ERROR("failed to deassert rknpu reset: %d\n", ret); + return ret; + } + + return 0; +} +#endif + +int rknpu_soft_reset(struct rknpu_device *rknpu_dev) +{ +#ifndef FPGA_PLATFORM + struct iommu_domain *domain = NULL; + struct rknpu_subcore_data *subcore_data = NULL; + int ret = -EINVAL, i = 0; + + if (rknpu_dev->bypass_soft_reset) { + LOG_WARN("bypass soft reset\n"); + return 0; + } + + if (!mutex_trylock(&rknpu_dev->reset_lock)) + return 0; + + rknpu_dev->soft_reseting = true; + + msleep(100); + + for (i = 0; i < rknpu_dev->config->num_irqs; ++i) { + subcore_data = &rknpu_dev->subcore_datas[i]; + wake_up(&subcore_data->job_done_wq); + } + + LOG_INFO("soft reset\n"); + + for (i = 0; i < rknpu_dev->config->num_resets; i++) { + ret = rknpu_reset_assert(rknpu_dev->srst_a[i]); + ret |= rknpu_reset_assert(rknpu_dev->srst_h[i]); + + udelay(10); + + ret |= rknpu_reset_deassert(rknpu_dev->srst_a[i]); + ret |= rknpu_reset_deassert(rknpu_dev->srst_h[i]); + } + + if (ret) { + LOG_DEV_ERROR(rknpu_dev->dev, + "failed to soft reset for rknpu: %d\n", ret); + mutex_unlock(&rknpu_dev->reset_lock); + return ret; + } + + if (rknpu_dev->iommu_en) + domain = iommu_get_domain_for_dev(rknpu_dev->dev); + + if (domain) { + iommu_detach_device(domain, rknpu_dev->dev); + iommu_attach_device(domain, rknpu_dev->dev); + } + + rknpu_dev->soft_reseting = false; + + mutex_unlock(&rknpu_dev->reset_lock); +#endif + + return 0; +}