diff --git a/drivers/gpu/drm/Kconfig b/drivers/gpu/drm/Kconfig index 8f3bfed137aadbb5bba08bb0fa04edb385c4ac9c..00a9b17abd980e9e50c1285ace3976a03c8f8004 100644 --- a/drivers/gpu/drm/Kconfig +++ b/drivers/gpu/drm/Kconfig @@ -511,6 +511,8 @@ source "drivers/gpu/drm/sprd/Kconfig" source "drivers/gpu/drm/imagination/Kconfig" +source "drivers/gpu/drm/tyr/Kconfig" + config DRM_HYPERV tristate "DRM Support for Hyper-V synthetic video device" depends on DRM && PCI && MMU && HYPERV diff --git a/drivers/gpu/drm/Makefile b/drivers/gpu/drm/Makefile index 7e52712fcff498327c5551d952c44fa6e51cf174..dccbbe80a2614ae7fe9e89110bd533a4a5ea3d31 100644 --- a/drivers/gpu/drm/Makefile +++ b/drivers/gpu/drm/Makefile @@ -229,3 +229,4 @@ obj-y += solomon/ obj-$(CONFIG_DRM_SPRD) += sprd/ obj-$(CONFIG_DRM_LOONGSON) += loongson/ obj-$(CONFIG_DRM_POWERVR) += imagination/ +obj-$(CONFIG_DRM_TYR) += tyr/ diff --git a/drivers/gpu/drm/tyr/Kconfig b/drivers/gpu/drm/tyr/Kconfig new file mode 100644 index 0000000000000000000000000000000000000000..c6b1826a3f1cd3b07eb71142023208a997edb180 --- /dev/null +++ b/drivers/gpu/drm/tyr/Kconfig @@ -0,0 +1,31 @@ +# SPDX-License-Identifier: GPL-2.0 or MIT + +config TYR_DRM_GEM_SHMEM_HELPER + bool + select DRM_GEM_SHMEM_HELPER + +config TYR_DRM_GPUVM + bool + select DRM_GPUVM + +config DRM_TYR + tristate "Tyr (Rust DRM support for ARM Mali CSF-based GPUs)" + depends on DRM=y + depends on RUST + depends on RUST_FW_LOADER_ABSTRACTIONS + depends on ARM || ARM64 || COMPILE_TEST + depends on !GENERIC_ATOMIC64 # for IOMMU_IO_PGTABLE_LPAE + depends on MMU + select TYR_DRM_GPUVM + select TYR_DRM_GEM_SHMEM_HELPER + select IOMMU_IO_PGTABLE_LPAE + depends on IOMMU_SUPPORT + help + Rust DRM driver for ARM Mali CSF-based GPUs. + + This driver is for Mali (or Immortalis) Valhall Gxxx GPUs. + + Note that the Mali-G68 and Mali-G78, while Valhall architecture, will + be supported with the panfrost driver as they are not CSF GPUs. + + if M is selected, the module will be called tyr. diff --git a/drivers/gpu/drm/tyr/Makefile b/drivers/gpu/drm/tyr/Makefile new file mode 100644 index 0000000000000000000000000000000000000000..ba545f65f2c0823b9a4a5a54e39b867e4f9bf812 --- /dev/null +++ b/drivers/gpu/drm/tyr/Makefile @@ -0,0 +1,3 @@ +# SPDX-License-Identifier: GPL-2.0 or MIT + +obj-$(CONFIG_DRM_TYR) += tyr.o diff --git a/drivers/gpu/drm/tyr/driver.rs b/drivers/gpu/drm/tyr/driver.rs new file mode 100644 index 0000000000000000000000000000000000000000..7f72f4f30e6cbeb74341a79b8dac4ead354b2364 --- /dev/null +++ b/drivers/gpu/drm/tyr/driver.rs @@ -0,0 +1,349 @@ +// SPDX-License-Identifier: GPL-2.0 or MIT + +use kernel::bits::bit_u32; +use kernel::c_str; +use kernel::clk::Clk; +use kernel::devres::Devres; +use kernel::drm; +use kernel::drm::drv; +use kernel::drm::ioctl; +use kernel::io; +use kernel::io::mem::IoMem; +use kernel::irq::request::Registration as IrqRegistration; +use kernel::new_mutex; +use kernel::of; +use kernel::platform; +use kernel::prelude::*; +use kernel::regulator::Regulator; +use kernel::sync::Arc; +use kernel::sync::Mutex; +use kernel::time; +use kernel::types::ARef; + +use crate::file::File; +use crate::fw; +use crate::fw::irq::JobIrqHandler; +use crate::fw::Firmware; +use crate::gpu; +use crate::gpu::irq::GpuIrqHandler; +use crate::gpu::wait::PowerOnWait; +use crate::gpu::GpuInfo; +use crate::mmu; +use crate::mmu::irq::MmuIrqHandler; +use crate::mmu::Mmu; +use crate::regs::*; + +use core::pin::Pin; + +/// Convienence type alias for the DRM device type for this driver +pub(crate) type TyrDevice = drm::device::Device<TyrDriver>; + +#[pin_data(PinnedDrop)] +pub(crate) struct TyrDriver { + device: ARef<TyrDevice>, + + #[pin] + gpu_irq: IrqRegistration<GpuIrqHandler>, + + #[pin] + mmu_irq: IrqRegistration<MmuIrqHandler>, + + #[pin] + job_irq: IrqRegistration<JobIrqHandler>, +} + +impl TyrDriver { + // We have switched to polling for now until SpinLockIrq advances a bit. + fn wait_for_mcu_to_boot(&self) -> Result { + pr_info!("Polling on JOB_INT_GLOBAL_IF\n"); + let iomem = &self.device.data().iomem; + + let op = || JOB_INT_RAWSTAT.read(iomem); + let cond = |raw_stat: &u32| -> bool { *raw_stat & JOB_INT_GLOBAL_IF != 0 }; + + io::poll::read_poll_timeout( + op, + cond, + time::Delta::from_millis(0), + Some(time::Delta::from_millis(100)), + )?; + + JOB_INT_CLEAR.write(iomem, JOB_INT_GLOBAL_IF)?; + pr_info!("MCU booted\n"); + Ok(()) + } + + // We have switched to polling for now until SpinLockIrq advances a bit. + fn wait_for_poweron(&self, poweron_wait: Arc<PowerOnWait>) -> Result { + pr_info!("Polling on RESET_COMPLETED\n"); + let iomem = &self.device.data().iomem; + + let op = || GPU_INT_RAWSTAT.read(iomem); + let cond = |raw_stat: &u32| -> bool { *raw_stat == 0x700 }; + + io::poll::read_poll_timeout( + op, + cond, + time::Delta::from_millis(0), + Some(time::Delta::from_millis(100)), + )?; + + GPU_INT_CLEAR.write(iomem, 0x700)?; + pr_info!("GPU has powered on\n"); + Ok(()) + } + + // fn wait_for_mcu_to_boot(&self) -> Result { + // let data = self.device.data(); + // let op = || Ok(*data.fw.booted.lock()); + // let cond = |booted: &bool| *booted; + + // data.fw + // .req_wait + // .wait_interruptible_timeout(1000, op, &cond, true) + // } + + // fn wait_for_poweron(&self, poweron_wait: Arc<PowerOnWait>) -> Result { + // let mut powered_on = poweron_wait.powered_on.lock(); + + // while !*powered_on { + // match poweron_wait + // .wait + // .wait_interruptible_timeout(&mut powered_on, msecs_to_jiffies(100)) + // { + // CondVarTimeoutResult::Timeout => return Err(ETIMEDOUT), + // _ => {} + // } + // } + + // Ok(()) + // } +} + +#[pin_data] +pub(crate) struct TyrData { + pub(crate) pdev: platform::Device, + + #[pin] + clks: Mutex<Clocks>, + + #[pin] + regulators: Mutex<Regulators>, + + // Some inforation on the GPU. This is mainly queried by userspace (mesa). + pub(crate) gpu_info: GpuInfo, + + /// The firmware running on the MCU. + #[pin] + pub(crate) fw: Firmware, + + /// True if the CPU/GPU are memory coherent. + pub(crate) coherent: bool, + + /// MMU management. + pub(crate) mmu: Pin<KBox<Mutex<Mmu>>>, + + /// The MMIO region. + pub(crate) iomem: Arc<Devres<IoMem>>, +} + +unsafe impl Send for TyrData {} +unsafe impl Sync for TyrData {} + +fn issue_soft_reset(iomem: &Devres<IoMem<0>>) -> Result<()> { + let irq_enable_cmd = 1 | bit_u32(8); + GPU_CMD.write(iomem, irq_enable_cmd)?; + + let op = || GPU_INT_RAWSTAT.read(iomem); + let cond = |raw_stat: &u32| -> bool { (*raw_stat >> 8) & 1 == 1 }; + let res = io::poll::read_poll_timeout( + op, + cond, + time::Delta::from_millis(100), + Some(time::Delta::from_micros(20000)), + ); + + if let Err(e) = res { + pr_err!("GPU reset failed with errno {}\n", e.to_errno()); + pr_err!("GPU_INT_RAWSTAT is {}\n", GPU_INT_RAWSTAT.read(iomem)?); + } + + Ok(()) +} + +kernel::of_device_table!( + OF_TABLE, + MODULE_OF_TABLE, + <TyrDriver as platform::Driver>::IdInfo, + [ + (of::DeviceId::new(c_str!("rockchip,rk3588-mali")), ()), + (of::DeviceId::new(c_str!("arm,mali-valhall-csf")), ()) + ] +); + +impl platform::Driver for TyrDriver { + type IdInfo = (); + const OF_ID_TABLE: Option<of::IdTable<Self::IdInfo>> = Some(&OF_TABLE); + + fn probe(pdev: &mut platform::Device, _info: Option<&Self::IdInfo>) -> Result<Pin<KBox<Self>>> { + dev_dbg!(pdev.as_ref(), "Probed Tyr\n"); + + let core_clk = Clk::new(pdev.as_ref(), Some(c_str!("core")))?; + let stacks_clk = Clk::new(pdev.as_ref(), Some(c_str!("stacks")))?; + let coregroup_clk = Clk::new(pdev.as_ref(), Some(c_str!("coregroup")))?; + + core_clk.prepare_enable()?; + stacks_clk.prepare_enable()?; + coregroup_clk.prepare_enable()?; + + let mali_regulator = Regulator::get(pdev.as_ref(), c_str!("mali"))?; + let sram_regulator = Regulator::get(pdev.as_ref(), c_str!("sram"))?; + + mali_regulator.enable()?; + sram_regulator.enable()?; + + let resource = pdev.resource(0).ok_or(EINVAL)?; + + let iomem = Arc::new(pdev.ioremap_resource(resource)?, GFP_KERNEL)?; + + // Disable all interrupts for now. + JOB_INT_MASK.write(&iomem, 0)?; + MMU_INT_MASK.write(&iomem, 0)?; + GPU_INT_MASK.write(&iomem, 0)?; + + issue_soft_reset(&iomem)?; + gpu::l2_power_on(&iomem)?; + + let gpu_info = GpuInfo::new(&iomem)?; + gpu_info.log(pdev); + + pdev.as_ref().dma_set_max_seg_size(u32::MAX); + pdev.as_ref() + .dma_set_mask_and_coherent(u64::from(gpu_info.pa_bits()))?; + + let platform = pdev.clone(); + let tdev: ARef<TyrDevice> = drm::device::Device::new_no_data(pdev.as_ref())?; + + let mmu = KBox::pin_init(new_mutex!(Mmu::new()?), GFP_KERNEL)?; + let fw = Firmware::init(&tdev, pdev.clone(), &gpu_info, mmu.as_ref(), iomem.clone())?; + + // Ideally we'd find a way around this useless clone too... + let t = tdev.clone(); + let p = platform.clone(); + let i = iomem.clone(); + let data = Arc::pin_init( + try_pin_init!(TyrData { + pdev: p.clone(), + clks <- new_mutex!(Clocks { + core: core_clk, + stacks: stacks_clk, + coregroup: coregroup_clk, + }), + regulators <- new_mutex!(Regulators { + mali: mali_regulator, + sram: sram_regulator, + }), + gpu_info, + fw <- fw, + coherent: false, // TODO. The GPU is not IO coherent on rk3588, which is what I am testing on. + mmu, + iomem: i.clone(), + }), + GFP_KERNEL, + )?; + + // We must find a way around this. It's being discussed on Zulip already. + // + // Note that this is a problem, because if we fail at probe, then the + // drop code expects the data to be set, which leads to a crash. + unsafe { tdev.clone().init_data(data) }; + drm::drv::Registration::new_foreign_owned(tdev.clone(), 0)?; + + let poweron_wait = PowerOnWait::new()?; + let pow = poweron_wait.clone(); + let i = iomem.clone(); + let driver = KBox::pin_init( + try_pin_init!(TyrDriver { + device: t.clone(), + gpu_irq <- gpu::irq::gpu_irq_init(t.clone(), platform.clone(), i.clone(), pow)?, + mmu_irq <- mmu::irq::mmu_irq_init(t.clone(), platform.clone(), i.clone())?, + job_irq <- fw::irq::job_irq_init(t.clone(), platform.clone(), i.clone())?, + }), + GFP_KERNEL, + )?; + + driver.wait_for_poweron(poweron_wait.clone())?; + + pr_info!("Booting the MCU"); + + MCU_CONTROL.write(&iomem, MCU_CONTROL_AUTO)?; + + driver.wait_for_mcu_to_boot()?; + let data = tdev.data(); + let gpu_info = &data.gpu_info; + let core_clk = &data.clks.lock().core; + + data.fw.enable(iomem.clone(), gpu_info, core_clk)?; + dev_info!(pdev.as_ref(), "Tyr initialized correctly.\n"); + Ok(driver) + } +} + +#[pinned_drop] +impl PinnedDrop for TyrDriver { + fn drop(self: Pin<&mut Self>) { + // XXX: we will not have the `data` field here if we failed the + // initialization, i.e.: if probe failed. + // + // We need to figure out with the community how to properly split the + // creation of a DRM device from the place where the data is set and + // from the place where it is registered to overcome this. + // + // The current solution, i.e.: `new_from_closure` is just a hack, and it + // shows its shortcomings here, for example. + // + // dev_dbg!(self.device.data().pdev.as_ref(), "Removed Tyr.\n"); + } +} + +const INFO: drm::drv::DriverInfo = drm::drv::DriverInfo { + major: 0, + minor: 0, + patchlevel: 0, + name: c_str!("tyr"), + desc: c_str!("ARM Mali CSF-based GPU driver"), + date: c_str!("20252501"), +}; + +#[vtable] +impl drm::drv::Driver for TyrDriver { + type Data = Arc<TyrData>; + type File = File; + type Object = crate::gem::Object; + + const INFO: drm::drv::DriverInfo = INFO; + const FEATURES: u32 = drv::FEAT_GEM | drv::FEAT_GEM_GPUVA; + + kernel::declare_drm_ioctls! { + (TYR_DEV_QUERY, drm_panthor_dev_query, ioctl::RENDER_ALLOW, File::dev_query), + (TYR_VM_CREATE, drm_panthor_vm_create, ioctl::RENDER_ALLOW, File::vm_create), + (TYR_VM_DESTROY, drm_panthor_vm_destroy, ioctl::RENDER_ALLOW, File::vm_destroy), + (TYR_VM_BIND, drm_panthor_vm_bind, ioctl::RENDER_ALLOW, File::vm_bind), + (TYR_VM_GET_STATE, drm_panthor_vm_get_state, ioctl::RENDER_ALLOW, File::vm_get_state), + (TYR_BO_CREATE, drm_panthor_bo_create, ioctl::RENDER_ALLOW, File::bo_create), + (TYR_BO_MMAP_OFFSET, drm_panthor_bo_mmap_offset, ioctl::RENDER_ALLOW, File::bo_mmap_offset), + } +} + +#[pin_data] +struct Clocks { + core: Clk, + stacks: Clk, + coregroup: Clk, +} + +#[pin_data] +struct Regulators { + mali: Regulator, + sram: Regulator, +} diff --git a/drivers/gpu/drm/tyr/file.rs b/drivers/gpu/drm/tyr/file.rs new file mode 100644 index 0000000000000000000000000000000000000000..34a1a317ebcb60e06b43b1d29facb476faf4b09a --- /dev/null +++ b/drivers/gpu/drm/tyr/file.rs @@ -0,0 +1,235 @@ +// SPDX-License-Identifier: GPL-2.0 or MIT + +use kernel::alloc::flags::*; +use kernel::drm; +use kernel::drm::device::Device as DrmDevice; +use kernel::drm::gem::BaseObject; +use kernel::prelude::*; +use kernel::sync::Arc; +use kernel::transmute::FromBytes; +use kernel::uaccess::UserSlice; +use kernel::uapi; + +use crate::driver::TyrDevice; +use crate::driver::TyrDriver; +use crate::gem; +use crate::mmu::vm; +use crate::mmu::vm::pool::Pool; +use crate::mmu::vm::VmLayout; +use crate::mmu::vm::VmUserSize; + +pub(crate) struct File { + /// A pool storing our VMs for this particular context. + vm_pool: Pool, +} + +/// Convenience type alias for our DRM `File` type +pub(crate) type DrmFile = drm::file::File<File>; + +impl drm::file::DriverFile for File { + type Driver = TyrDriver; + + fn open(dev: &DrmDevice<Self::Driver>) -> Result<Pin<KBox<Self>>> { + dev_dbg!(dev.as_ref(), "drm::device::Device::open\n"); + + let file = Self { + vm_pool: Pool::create(), + }; + + Ok(KBox::new(file, GFP_KERNEL)?.into()) + } +} + +impl File { + pub(crate) fn dev_query( + tdev: &TyrDevice, + devquery: &mut uapi::drm_panthor_dev_query, + _file: &DrmFile, + ) -> Result<u32> { + if devquery.pointer == 0 { + match devquery.type_ { + uapi::drm_panthor_dev_query_type_DRM_PANTHOR_DEV_QUERY_GPU_INFO => { + devquery.size = core::mem::size_of_val(&tdev.data().gpu_info) as u32; + Ok(0) + } + _ => Err(EINVAL), + } + } else { + match devquery.type_ { + uapi::drm_panthor_dev_query_type_DRM_PANTHOR_DEV_QUERY_GPU_INFO => { + let mut writer = + UserSlice::new(devquery.pointer as usize, devquery.size as usize).writer(); + + writer.write(&tdev.data().gpu_info)?; + + Ok(0) + } + _ => Err(EINVAL), + } + } + } + + pub(crate) fn vm_create( + tdev: &TyrDevice, + vmcreate: &mut uapi::drm_panthor_vm_create, + file: &DrmFile, + ) -> Result<u32> { + let id = file.inner().vm_pool().create_vm( + tdev, + VmLayout::from_user_sz(tdev, VmUserSize::Custom(vmcreate.user_va_range)), + )?; + + vmcreate.id = id as u32; + Ok(0) + } + + pub(crate) fn vm_destroy( + tdev: &TyrDevice, + vmdestroy: &mut uapi::drm_panthor_vm_destroy, + file: &DrmFile, + ) -> Result<u32> { + let iomem = tdev.data().iomem.clone(); + + file.inner() + .vm_pool() + .destroy_vm(vmdestroy.id as usize, iomem)?; + + Ok(0) + } + + pub(crate) fn vm_bind( + tdev: &TyrDevice, + vmbind: &mut uapi::drm_panthor_vm_bind, + file: &DrmFile, + ) -> Result<u32> { + if vmbind.flags & uapi::drm_panthor_vm_bind_flags_DRM_PANTHOR_VM_BIND_ASYNC != 0 { + dev_info!(tdev.as_ref(), "We do not support async VM_BIND yet"); + return Err(ENOTSUPP); + } + + if vmbind.ops.stride as usize != core::mem::size_of::<uapi::drm_panthor_vm_bind_op>() { + dev_info!( + tdev.as_ref(), + "We cannot graciously handle stride mismatches yet" + ); + return Err(ENOTSUPP); + } + + let stride = vmbind.ops.stride as usize; + let count = vmbind.ops.count as usize; + + let mut reader = UserSlice::new(vmbind.ops.array as usize, stride).reader(); + let iomem = tdev.data().iomem.clone(); + + for i in 0..count { + let res = { + let op: VmBindOp = reader.read()?; + let mask = uapi::drm_panthor_vm_bind_op_flags_DRM_PANTHOR_VM_BIND_OP_TYPE_MASK; + + match op.0.flags as i32 & mask { + uapi::drm_panthor_vm_bind_op_flags_DRM_PANTHOR_VM_BIND_OP_TYPE_MAP => { + let bo = gem::lookup_handle(file, op.0.bo_handle)?; + let range = op.0.va..op.0.va + op.0.size; + + let vm = file + .inner() + .vm_pool() + .get_vm(vmbind.vm_id as usize) + .ok_or(EINVAL)?; + + vm.lock().bind_gem( + iomem.clone(), + &bo.gem, + op.0.bo_offset, + range, + vm::map_flags::Flags::try_from(op.0.flags & 0b111)?, + )?; + } + + uapi::drm_panthor_vm_bind_op_flags_DRM_PANTHOR_VM_BIND_OP_TYPE_UNMAP => { + if op.0.bo_handle != 0 || op.0.bo_offset != 0 { + return Err(EINVAL); + } + + let range = op.0.va..op.0.va + op.0.size; + + let vm = file + .inner() + .vm_pool() + .get_vm(vmbind.vm_id as usize) + .ok_or(EINVAL)?; + + vm.lock().unmap_range(iomem.clone(), range)?; + } + _ => return Err(ENOTSUPP), + } + + Ok(0) + }; + + if let Err(e) = res { + vmbind.ops.count = i as u32; + return Err(e); + } + } + + Ok(0) + } + + pub(crate) fn vm_get_state( + _tdev: &TyrDevice, + _vmgetstate: &mut uapi::drm_panthor_vm_get_state, + _file: &DrmFile, + ) -> Result<u32> { + Err(ENOTSUPP) + } + + pub(crate) fn bo_create( + tdev: &TyrDevice, + bocreate: &mut uapi::drm_panthor_bo_create, + file: &DrmFile, + ) -> Result<u32> { + if bocreate.flags & !uapi::drm_panthor_bo_flags_DRM_PANTHOR_BO_NO_MMAP != 0 { + dev_err!( + tdev.as_ref(), + "bo_create: invalid flags {}\n", + bocreate.flags + ); + + return Err(EINVAL); + } + + let bo = gem::new_object(tdev, bocreate.size as usize, bocreate.flags)?; + + let handle = bo.gem.create_handle(file)?; + bocreate.handle = handle; + bocreate.size = bo.gem.size() as u64; + + Ok(0) + } + + pub(crate) fn bo_mmap_offset( + _tdev: &TyrDevice, + bommap: &mut uapi::drm_panthor_bo_mmap_offset, + file: &DrmFile, + ) -> Result<u32> { + let bo = gem::lookup_handle(file, bommap.handle)?; + + bommap.offset = bo.gem.create_mmap_offset()?; + + Ok(0) + } + + fn vm_pool(self: Pin<&Self>) -> Pin<&Pool> { + // SAFETY: Field projection, we never move out of this field. + unsafe { self.map_unchecked(|f| &f.vm_pool) } + } +} + +#[repr(transparent)] +struct VmBindOp(uapi::drm_panthor_vm_bind_op); + +// XXX: we cannot implement this trait for the uapi type directly, hence the +// wrapper. +// SAFETY: this struct is safe to be transmuted from a byte slice. +unsafe impl FromBytes for VmBindOp {} diff --git a/drivers/gpu/drm/tyr/flags.rs b/drivers/gpu/drm/tyr/flags.rs new file mode 100644 index 0000000000000000000000000000000000000000..387b3f439404392e744e9149b5fc49c678f0c83a --- /dev/null +++ b/drivers/gpu/drm/tyr/flags.rs @@ -0,0 +1,137 @@ +// SPDX-License-Identifier: GPL-2.0 or MIT + +//! A general flags type adapted from the WIP work from Felipe Xavier. +//! +//! This will be replaced by his patch once it's ready. + +#[macro_export] +/// Creates a new flags type. +macro_rules! impl_flags { + ($flags:ident, $flag:ident, $ty:ty) => { + #[allow(missing_docs)] + #[repr(transparent)] + #[derive(Copy, Clone, Default, Debug, PartialEq, Eq)] + pub struct $flags($ty); + + #[allow(missing_docs)] + #[derive(Copy, Clone, Debug, PartialEq, Eq)] + pub struct $flag($ty); + + impl From<$flag> for $flags { + #[inline] + fn from(value: $flag) -> Self { + Self(value.0) + } + } + + impl From<$flags> for $ty { + #[inline] + fn from(value: $flags) -> Self { + value.0 + } + } + + impl core::ops::BitOr for $flags { + type Output = Self; + + #[inline] + fn bitor(self, rhs: Self) -> Self::Output { + Self(self.0 | rhs.0) + } + } + + impl core::ops::BitOrAssign for $flags { + #[inline] + fn bitor_assign(&mut self, rhs: Self) { + *self = *self | rhs; + } + } + + impl core::ops::BitAnd for $flags { + type Output = Self; + + #[inline] + fn bitand(self, rhs: Self) -> Self::Output { + Self(self.0 & rhs.0) + } + } + + impl core::ops::BitAndAssign for $flags { + #[inline] + fn bitand_assign(&mut self, rhs: Self) { + *self = *self & rhs; + } + } + + impl core::ops::BitOr<$flag> for $flags { + type Output = Self; + + #[inline] + fn bitor(self, rhs: $flag) -> Self::Output { + self | Self::from(rhs) + } + } + + impl core::ops::BitOrAssign<$flag> for $flags { + #[inline] + fn bitor_assign(&mut self, rhs: $flag) { + *self = *self | rhs; + } + } + + impl core::ops::BitAnd<$flag> for $flags { + type Output = Self; + + #[inline] + fn bitand(self, rhs: $flag) -> Self::Output { + self & Self::from(rhs) + } + } + + impl core::ops::BitAndAssign<$flag> for $flags { + #[inline] + fn bitand_assign(&mut self, rhs: $flag) { + *self = *self & rhs; + } + } + + impl core::ops::BitXor for $flags { + type Output = Self; + + #[inline] + fn bitxor(self, rhs: Self) -> Self::Output { + Self(self.0 ^ rhs.0) + } + } + + impl core::ops::BitXorAssign for $flags { + #[inline] + fn bitxor_assign(&mut self, rhs: Self) { + *self = *self ^ rhs; + } + } + + impl core::ops::Neg for $flags { + type Output = Self; + + #[inline] + fn neg(self) -> Self::Output { + Self(!self.0) + } + } + + impl $flags { + /// Returns an empty instance of <type> where no flags are set. + #[inline] + pub const fn empty() -> Self { + Self(0) + } + + /// Checks if a specific flag is set. + #[inline] + pub fn contains(self, flag: $flag) -> bool { + (self.0 & flag.0) == flag.0 + } + } + }; +} diff --git a/drivers/gpu/drm/tyr/fw.rs b/drivers/gpu/drm/tyr/fw.rs new file mode 100644 index 0000000000000000000000000000000000000000..d9f986a17584c530ec2f39b9e53c60e795b33bc2 --- /dev/null +++ b/drivers/gpu/drm/tyr/fw.rs @@ -0,0 +1,263 @@ +// SPDX-License-Identifier: GPL-2.0 or MIT + +use global::GlobalInterface; +use kernel::bindings::SZ_1G; +use kernel::clk::Clk; +use kernel::devres::Devres; +use kernel::io::mem::IoMem; +use kernel::new_mutex; +use kernel::new_spinlock_irq; +use kernel::platform; +use kernel::prelude::*; +use kernel::sync::Arc; +use kernel::sync::Mutex; +use kernel::sync::SpinLockIrq; +use parse::Section; +use wait::ReqWait; + +use crate::driver::TyrDevice; +use crate::gpu::GpuInfo; +use crate::mmu::vm::Vm; +use crate::mmu::vm::VmLayout; +use crate::mmu::Mmu; + +const CSF_MCU_SHARED_REGION_START: u32 = 0x04000000; +const CSF_MCU_SHARED_REGION_SIZE: u32 = 0x04000000; + +mod global; +pub(crate) mod irq; +mod parse; +pub(crate) mod wait; + +#[repr(transparent)] +#[derive(Debug, Clone, Copy)] +/// An offset into the shared section that is known to be valid. +/// +/// This can be obtained via a call to [`Firmware::kmap_offset(mcu_va)`]. +/// +/// # Invariants +/// +/// `self.0` is a valid offset into the shared section. This means that it can +/// safely be dereferenced by the CPU. +struct CheckedKmapOffset(usize); + +impl CheckedKmapOffset { + fn as_mut_ptr(&self, shared_section: &mut Section) -> Result<*mut core::ffi::c_void> { + let vmap = shared_section.mem.vmap()?; + let vmap = vmap.as_mut_ptr(); + + // SAFETY: safe by the type invariant. + let offset = unsafe { vmap.add(self.0) }; + + Ok(offset) + } + + fn read<T>(&self, shared_section: &mut Section) -> Result<T> { + let ptr = self.as_mut_ptr(shared_section)?; + + // SAFETY: we know that this pointer is aligned and valid for reads for + // at least size_of::<Self>() bytes. + Ok(unsafe { core::ptr::read_volatile(ptr as *const T) }) + } + + fn write<T>(&self, shared_section: &mut Section, value: T) -> Result { + let ptr = self.as_mut_ptr(shared_section)?; + + // SAFETY: we know that this pointer is aligned and valid for writes for + // at least size_of::<Self>() bytes. + unsafe { + core::ptr::write_volatile(ptr as *mut T, value); + } + + Ok(()) + } +} + +#[repr(transparent)] +/// An offset into the shared section that is known to point to the request field. +/// +/// It is more convenient to use this type than reading or writing the memory +/// areas directly if all you want is to change the request field. +struct ReqKmapOffset(CheckedKmapOffset); + +impl ReqKmapOffset { + /// Toggle acknowledge bits to send an event to the FW + /// + /// The Host -> FW event/message passing was designed to be lockless, with each side of + /// the channel having its writeable section. Events are signaled as a difference between + /// the host and FW side in the req/ack registers (when a bit differs, there's an event + /// pending, when they are the same, nothing needs attention). + /// + /// This helper allows one to update the req register based on the current value of the + /// ack register managed by the FW. Toggling a specific bit will flag an event. In order + /// for events to be re-evaluated, the interface doorbell needs to be rung. + fn toggle_reqs( + &self, + shared_section: &mut Section, + ack: CheckedKmapOffset, + reqs: u32, + ) -> Result { + let cur_req_val = self.0.read::<u32>(shared_section)?; + + let ack_val = ack.read::<u32>(shared_section)?; + let new_val = ((ack_val ^ reqs) & reqs) | (cur_req_val & !reqs); + + self.0.write::<u32>(shared_section, new_val) + } + + /// Update bits to reflect a configuration change + /// + /// Some configuration get passed through req registers that are also used to + /// send events to the FW. + fn update_reqs(&self, shared_section: &mut Section, val: u32, reqs: u32) -> Result { + let cur_req_val = self.0.read::<u32>(shared_section)?; + let new_val = (cur_req_val & !reqs) | (val & reqs); + + self.0.write::<u32>(shared_section, new_val) + } + + /// Returns whether any requests are pending. + /// + /// Requests are pending when the value of the given bit in the req differs + /// from the one in ack. + fn pending_reqs( + &self, + shared_section: &mut Section, + ack: CheckedKmapOffset, + reqs: u32, + ) -> Result<bool> { + let cur_req_val = self.0.read::<u32>(shared_section)? & reqs; + let cur_ack_val = ack.read::<u32>(shared_section)? & reqs; + + Ok((cur_req_val ^ cur_ack_val) != 0) + } +} + +/// Our interface to the MCU. +#[pin_data] +pub(crate) struct Firmware { + #[pin] + /// The sections read from the firmware binary. These sections are loaded + /// into GPU memory via BOs. + sections: Mutex<KVec<Section>>, + + /// The global FW interface. + global_iface: Arc<GlobalInterface>, + + /// The VM where we load the firmware into. This VM is always bound to AS0. + vm: Arc<Mutex<Vm>>, + + #[pin] + /// A condvar representing a wait on a MCU request. + /// + /// We notify all waiters on every interrupt. + pub(crate) req_wait: Arc<ReqWait>, + + #[pin] + /// Whether the MCU has booted. + pub(crate) booted: SpinLockIrq<bool>, +} + +impl Firmware { + pub(crate) fn init( + tdev: &TyrDevice, + pdev: platform::Device, + gpu_info: &GpuInfo, + mmu: Pin<&Mutex<Mmu>>, + iomem: Arc<Devres<IoMem>>, + ) -> Result<impl PinInit<Self>> { + let vm = { + let auto_kernel_va = CSF_MCU_SHARED_REGION_START as u64 + ..CSF_MCU_SHARED_REGION_START as u64 + CSF_MCU_SHARED_REGION_SIZE as u64; + + let mut mmu = mmu.lock(); + + // Create the FW VM. This will be used to communicate between the CPU + // and the MCU. + let vm = mmu.create_vm( + tdev, + pdev.clone(), + gpu_info, + true, + VmLayout { + user: 0..0, + kernel: 0..4 * SZ_1G as u64, + }, + auto_kernel_va, + )?; + + mmu.bind_vm(vm.clone(), gpu_info, &iomem)?; + + vm + }; + + let (sections, shared_section) = + Self::read_sections(tdev, iomem.clone(), gpu_info, vm.clone())?; + + let req_wait = ReqWait::new()?; + let global_iface = GlobalInterface::new(shared_section, iomem.clone(), req_wait.clone())?; + + Ok(pin_init!(Self { + sections <- new_mutex!(sections), + global_iface, + vm, + req_wait, + booted <- new_spinlock_irq!(false), + })) + } + + /// Enables the FW interfaces. + pub(crate) fn enable( + &self, + iomem: Arc<Devres<IoMem>>, + gpu_info: &GpuInfo, + core_clk: &Clk, + ) -> Result { + self.global_iface.enable(iomem, gpu_info, core_clk)?; + self.global_iface.ping_once()?; + // self.global_iface.arm_watchdog()?; + // TODO: enable other interfaces in the future. + Ok(()) + } + + // Wait for acks from the MCU. + // + // `req_ptr` and `ack_ptr` are pointers to memory regions shared with the + // MCU. + // + // Plain references are not used because the underlying shared memory can + // be mutated at any time, violating Rust assumptions about its contents. + // + // # Safety + // + // - Callers must ensure that `req_ptr` and `ack_ptr` are valid pointers. + // unsafe fn wait_acks( + // req_ptr: *const u32, + // ack_ptr: *mut u32, + // wait: &CondVar, + // req_mask: u32, + // timeout_ms: u32, + // ) -> Result<u32> { + // // SAFETY: safe as per the safety requirements. + // let req = unsafe { *req_ptr } & req_mask; + // let acked = req_mask; + + // // SAFETY: safe as per the safety requirements. + // let op = || unsafe { *ack_ptr }; + + // let cond = |acked: &u32| *acked & req_mask == req; + + // let poll_res = io::poll::read_poll_timeout( + // op, + // cond, + // time::Delta::from_millis(100), + // Some(time::Delta::from_millis(200)), + // )?; + + // if let Err(ETIMEDOUT) = poll_res { + // wait.wait_interruptible_timeout(guard, jiffies) + // } else { + // poll_res + // } + // } +} diff --git a/drivers/gpu/drm/tyr/fw/global.rs b/drivers/gpu/drm/tyr/fw/global.rs new file mode 100644 index 0000000000000000000000000000000000000000..a7d619ad45576ace1d93cf2d9d9a467e4d0794b7 --- /dev/null +++ b/drivers/gpu/drm/tyr/fw/global.rs @@ -0,0 +1,430 @@ +// SPDX-License-Identifier: GPL-2.0 or MIT + +//! Code to control the global interface of the CSF firmware. + +use kernel::bits::genmask_u32; +use kernel::clk::Clk; +use kernel::devres::Devres; +use kernel::impl_has_work; +use kernel::interrupt::interrupt_disable; +use kernel::io; +use kernel::io::mem::IoMem; +use kernel::new_spinlock_irq; +use kernel::new_work; +use kernel::prelude::*; +use kernel::sync::Arc; +use kernel::sync::SpinLockIrq; +use kernel::time; +use kernel::time::Delta; +use kernel::workqueue; +use kernel::workqueue::Work; +use kernel::workqueue::WorkItem; + +use crate::fw::ReqKmapOffset; +use crate::gpu::GpuInfo; +use crate::regs::Doorbell; +use crate::regs::CSF_GLB_DOORBELL_ID; + +use super::{CheckedKmapOffset, ReqWait, Section}; + +#[allow(dead_code)] +mod constants { + use kernel::bits::{bit_u32, genmask_u32}; + + pub(super) const GLB_TIMER_SOURCE_GPU_COUNTER: u32 = bit_u32(31); + pub(super) const PROGRESS_TIMEOUT_CYCLES: u32 = 5 * 500 * 1024 * 1024; + pub(super) const PROGRESS_TIMEOUT_SCALE_SHIFT: u32 = 10; + pub(super) const IDLE_HYSTERESIS_US: u32 = 800; + pub(super) const PWROFF_HYSTERESIS_US: u32 = 10000; + pub(super) const GLB_HALT: u32 = bit_u32(0); + pub(super) const GLB_CFG_PROGRESS_TIMER: u32 = bit_u32(1); + pub(super) const GLB_CFG_ALLOC_EN: u32 = bit_u32(2); + pub(super) const GLB_CFG_POWEROFF_TIMER: u32 = bit_u32(3); + pub(super) const GLB_PROTM_ENTER: u32 = bit_u32(4); + pub(super) const GLB_PERFCNT_EN: u32 = bit_u32(5); + pub(super) const GLB_PERFCNT_SAMPLE: u32 = bit_u32(6); + pub(super) const GLB_COUNTER_EN: u32 = bit_u32(7); + pub(super) const GLB_PING: u32 = bit_u32(8); + pub(super) const GLB_FWCFG_UPDATE: u32 = bit_u32(9); + pub(super) const GLB_IDLE_EN: u32 = bit_u32(10); + pub(super) const GLB_SLEEP: u32 = bit_u32(12); + pub(super) const GLB_INACTIVE_COMPUTE: u32 = bit_u32(20); + pub(super) const GLB_INACTIVE_FRAGMENT: u32 = bit_u32(21); + pub(super) const GLB_INACTIVE_TILER: u32 = bit_u32(22); + pub(super) const GLB_PROTM_EXIT: u32 = bit_u32(23); + pub(super) const GLB_PERFCNT_THRESHOLD: u32 = bit_u32(24); + pub(super) const GLB_PERFCNT_OVERFLOW: u32 = bit_u32(25); + pub(super) const GLB_IDLE: u32 = bit_u32(26); + pub(super) const GLB_DBG_CSF: u32 = bit_u32(30); + pub(super) const GLB_DBG_HOST: u32 = bit_u32(31); + pub(super) const GLB_REQ_MASK: u32 = genmask_u32(10, 0); + pub(super) const GLB_EVT_MASK: u32 = genmask_u32(26, 20); + + pub(super) const PING_INTERVAL_MS: i64 = 12000; +} + +use constants::*; + +fn glb_timer_val(val: u32) -> u32 { + val & genmask_u32(30, 0) +} + +#[repr(transparent)] +/// A value that is valid to pass for timeout fields in the global interface. +struct TimeoutCycles(u32); + +impl TimeoutCycles { + fn from_micro(core_clk: &Clk, timeout_us: u32) -> Result<Self> { + let timer_rate = core_clk.rate() as u64; + + if timer_rate == 0 { + return Err(EINVAL); + } + + let mut mod_cycles = (u64::from(timeout_us) * timer_rate).div_ceil(1000000 << 10); + + if mod_cycles > glb_timer_val(u32::MAX).into() { + pr_err!("Invalid timeout computed\n"); + mod_cycles = glb_timer_val(u32::MAX).into(); + } + + let mod_cycles = u32::try_from(mod_cycles)?; + Ok(Self( + glb_timer_val(mod_cycles) | GLB_TIMER_SOURCE_GPU_COUNTER, + )) + } +} + +impl From<TimeoutCycles> for u32 { + fn from(value: TimeoutCycles) -> Self { + value.0 + } +} + +/// The global control interface. +#[repr(C)] +pub(crate) struct Control { + pub(crate) version: u32, + pub(crate) features: u32, + pub(crate) input_va: u32, + pub(crate) output_va: u32, + pub(crate) group_num: u32, + pub(crate) group_stride: u32, + pub(crate) perfcnt_size: u32, + pub(crate) instr_features: u32, +} + +impl Control { + /// CSF major version. + pub(crate) fn version_major(&self) -> u32 { + self.version >> 24 + } + + /// CSF minor version. + pub(crate) fn version_minor(&self) -> u32 { + (self.version >> 16) & 0xff + } + + /// CSF patch version. + pub(crate) fn version_patch(&self) -> u32 { + self.version & 0xffff + } +} + +#[repr(C)] +#[derive(Debug)] +/// The input area for the global interface +pub(crate) struct Input { + pub(crate) req: u32, + pub(crate) ack_irq_mask: u32, + pub(crate) doorbell_req: u32, + pub(crate) reserved1: u32, + pub(crate) progress_timer: u32, + pub(crate) poweroff_timer: u32, + pub(crate) core_en_mask: u32, + pub(crate) reserved2: u32, + pub(crate) perfcnt_as: u32, + pub(crate) perfcnt_base: u64, + pub(crate) perfcnt_extratct: u32, + pub(crate) reserved3: [u32; 3], + pub(crate) percnt_config: u32, + pub(crate) percnt_csg_select: u32, + pub(crate) perfcnt_fw_enable: u32, + pub(crate) perfcnt_csg_enable: u32, + pub(crate) perfcnt_csf_enable: u32, + pub(crate) perfcnt_shader_enable: u32, + pub(crate) perfcnt_tiler_enable: u32, + pub(crate) perfcnt_mmu_l2_enable: u32, + pub(crate) reserved4: [u32; 8], + pub(crate) idle_timer: u32, +} + +#[repr(C)] +#[derive(Debug)] +/// The output area for the global interface +pub(crate) struct Output { + pub(crate) ack: u32, + pub(crate) reserved1: u32, + pub(crate) doorbell_ack: u32, + pub(crate) reserved2: u32, + pub(crate) halt_status: u32, + pub(crate) perfcnt_status: u32, + pub(crate) perfcnt_insert: u32, +} + +macro_rules! impl_shared_section_rw { + ($type:ty) => { + impl $type { + /// Reads the control interface from the given pointer. + /// + /// Note that the area pointed to by `ptr` is shared with the MCU, so we + /// cannot simply parse it or cast it to &Self. + /// + /// Merely taking a reference to it would be UB, as the MCU can change the + /// underlying memory at any time, as it is a core running its own code. + pub(super) fn read( + shared_section: &mut Section, + offset: CheckedKmapOffset, + ) -> Result<Self> { + let ptr = offset.as_mut_ptr(shared_section)?; + // SAFETY: we know that this pointer is aligned and valid for reads for + // at least size_of::<Self>() bytes. + Ok(unsafe { core::ptr::read_volatile(ptr as *mut Self) }) + } + + /// Writes the control interface to the given pointer. + /// + /// Note that the area pointed to by `ptr` is shared with the MCU, so we + /// cannot simply parse it or cast it to &mut Self. + /// + /// Merely taking a reference to it would be UB, as the MCU can change the + /// underlying memory at any time, as it is a core running its own code. + pub(super) fn write( + self, + shared_section: &mut Section, + offset: CheckedKmapOffset, + ) -> Result<()> { + let ptr = offset.as_mut_ptr(shared_section)?; + // SAFETY: we know that this pointer is aligned and valid for writes for + // at least size_of::<Self>() bytes. + unsafe { + core::ptr::write_volatile(ptr as *mut Self, self); + } + + Ok(()) + } + } + }; +} + +impl_shared_section_rw!(Control); +impl_shared_section_rw!(Input); +impl_shared_section_rw!(Output); + +enum GlobalInterfaceState { + Disabled, + Enabled { + control_offset: CheckedKmapOffset, + input_offset: CheckedKmapOffset, + output_offset: CheckedKmapOffset, + }, +} + +#[pin_data] +/// The global interface. +pub(super) struct GlobalInterface { + #[pin] + state: SpinLockIrq<GlobalInterfaceState>, + + #[pin] + ping_work: Work<Self>, + + iomem: Arc<Devres<IoMem>>, + + #[pin] + shared_section: SpinLockIrq<Section>, + + req_wait: Arc<ReqWait>, +} + +impl GlobalInterface { + pub(super) fn new( + shared_section: Section, + iomem: Arc<Devres<IoMem>>, + req_wait: Arc<ReqWait>, + ) -> Result<Arc<Self>> { + let init = pin_init!(Self { + state <- new_spinlock_irq!(GlobalInterfaceState::Disabled), + ping_work <- new_work!("TyrFwPingWork"), + iomem, + shared_section <- new_spinlock_irq!(shared_section), + req_wait, + }); + + Arc::pin_init(init, GFP_KERNEL) + } + + pub(super) fn enable( + self: &Arc<Self>, + iomem: Arc<Devres<IoMem>>, + gpu_info: &GpuInfo, + core_clk: &Clk, + ) -> Result { + // This takes a mutex internally in clk_prepare(). + let poweroff_timer = TimeoutCycles::from_micro(core_clk, PWROFF_HYSTERESIS_US)?.into(); + + let interrupt_disable = interrupt_disable(); + let mut shared_section = self.shared_section.lock_with(&interrupt_disable); + + let control_offset = CheckedKmapOffset(0); + + let op = || Control::read(&mut shared_section, control_offset); + let cond = |control: &Control| -> bool { control.version != 0 }; + let _ = io::poll::read_poll_timeout( + op, + cond, + time::Delta::from_millis(0), + Some(time::Delta::from_millis(200)), + ); + + let control = Control::read(&mut shared_section, control_offset)?; + if control.version == 0 { + pr_err!("MCU firmware version is 0. Firmware may have failed to boot\n"); + return Err(EINVAL); + } + + let input_offset = super::Firmware::kmap_offset(&shared_section, control.input_va.into())?; + + let output_offset = + super::Firmware::kmap_offset(&shared_section, control.output_va.into())?; + + pr_info!( + "CSF FW using interface v.{}.{}.{}, Features {} Instrumentation features {}\n", + control.version_major(), + control.version_minor(), + control.version_patch(), + control.features, + control.instr_features + ); + + let mut input = Input::read(&mut shared_section, input_offset)?; + + // Enable all shader cores. + input.core_en_mask = gpu_info.shader_present as u32; + + // Setup timers. + input.poweroff_timer = poweroff_timer; + input.progress_timer = PROGRESS_TIMEOUT_CYCLES >> PROGRESS_TIMEOUT_SCALE_SHIFT; + input.idle_timer = IDLE_HYSTERESIS_US; + + // Enable the interrupts we care about. + input.ack_irq_mask = GLB_CFG_ALLOC_EN + | GLB_PING + | GLB_CFG_POWEROFF_TIMER + | GLB_CFG_POWEROFF_TIMER + | GLB_IDLE_EN + | GLB_IDLE; + + input.write(&mut shared_section, input_offset)?; + + // Req is the first field of the input area. + let req = ReqKmapOffset(input_offset); + req.update_reqs(&mut shared_section, GLB_IDLE_EN, GLB_IDLE_EN)?; + + // Ack is the first field of the output area. + let ack_offset = output_offset; + let reqs = GLB_CFG_ALLOC_EN | GLB_CFG_POWEROFF_TIMER | GLB_CFG_PROGRESS_TIMER; + req.toggle_reqs(&mut shared_section, ack_offset, reqs)?; + + // Make lockdep happy, and also make sure that this lock is not held + // when the interrupt fires, which can be immediately after the doorbell + // is rung. + drop(shared_section); + Doorbell::new(CSF_GLB_DOORBELL_ID).write(&iomem, 1)?; + + let mut state = self.state.lock(); + + *state = GlobalInterfaceState::Enabled { + control_offset, + input_offset, + output_offset, + }; + + Ok(()) + } + + pub(crate) fn arm_watchdog(self: &Arc<Self>) -> Result { + workqueue::system_unbound() + .enqueue(self.clone()) + .map_err(|_| EINVAL) + } + + pub(crate) fn ping_once(&self) -> Result { + // Have this here until we support delayed works. Instead of a + // heartbeat, we get a one-shot ping, but that is OK for now. + kernel::time::delay::fsleep(Delta::from_millis(100)); + + // let interrupt_disable = interrupt_disable(); + // let mut state = this.state.lock_with(&interrupt_disable); + + // Unfortunately, we cannot have both interrupts disabled and CondVar at + // the same time for now. + // + // TODO: I don't think we access GlobalInterfaceState from IRQ context, + // so we can possibly switch to a regular SpinLock or even a Mutex. + let state = self.state.lock(); + if let &GlobalInterfaceState::Enabled { + input_offset, + output_offset, + .. + } = &*state + { + pr_info!("Pinging the CSF global interface\n"); + + // Req is the first field of the input area. + let req = ReqKmapOffset(input_offset); + + // Ack is the first field of the output area. + let ack = output_offset; + + req.toggle_reqs(&mut self.shared_section.lock(), ack, GLB_PING)?; + Doorbell::new(CSF_GLB_DOORBELL_ID).write(&self.iomem, 1)?; + + if !req.pending_reqs(&mut self.shared_section.lock(), ack, GLB_PING)? { + pr_info!("CSF has responded to a ping request\n"); + } else { + let op = || req.pending_reqs(&mut self.shared_section.lock(), ack, GLB_PING); + let cond = |pending_reqs: &bool| !*pending_reqs; + + io::poll::read_poll_timeout( + op, + cond, + time::Delta::from_millis(0), + Some(time::Delta::from_millis(100)), + )?; + + if !req.pending_reqs(&mut self.shared_section.lock(), ack, GLB_PING)? { + pr_info!("CSF has responded to a ping request\n"); + } else { + pr_err!("CSF has not responded to a ping request\n"); + return Err(ETIMEDOUT); + } + } + } + Ok(()) + } +} + +impl_has_work! { + impl HasWork<Self> for GlobalInterface { + self.ping_work + } +} + +impl WorkItem for GlobalInterface { + type Pointer = Arc<Self>; + + fn run(this: Self::Pointer) { + /* TODO: we need support for delayed_work */ + } +} diff --git a/drivers/gpu/drm/tyr/fw/irq.rs b/drivers/gpu/drm/tyr/fw/irq.rs new file mode 100644 index 0000000000000000000000000000000000000000..be05c226a6a025d337086f0414394910385ce9cf --- /dev/null +++ b/drivers/gpu/drm/tyr/fw/irq.rs @@ -0,0 +1,74 @@ +// SPDX-License-Identifier: GPL-2.0 or MIT + +//! The IRQ handling for the Job IRQs. +//! +//! The Job IRQ controls our interactions with the MCU. + +use kernel::c_str; +use kernel::devres::Devres; +use kernel::io::mem::IoMem; +use kernel::irq; +use kernel::irq::request::Handler as IrqHandler; +use kernel::irq::request::IrqReturn; +use kernel::irq::request::Registration as IrqRegistration; +use kernel::platform; +use kernel::prelude::*; +use kernel::sync::Arc; +use kernel::types::ARef; + +use crate::driver::TyrDevice; +use crate::regs; + +pub(crate) struct JobIrqHandler { + tdev: ARef<TyrDevice>, + iomem: Arc<Devres<IoMem>>, +} + +impl IrqHandler for JobIrqHandler { + fn handle_irq(&self) -> IrqReturn { + let rawstat = regs::JOB_INT_RAWSTAT.read(&self.iomem).unwrap_or_default(); + + dev_info!(self.tdev.as_ref(), "Acknoledging job IRQ\n"); + + let _ = regs::JOB_INT_CLEAR.write(&self.iomem, rawstat); + + let data = self.tdev.data(); + let mut booted = data.fw.booted.lock(); + + if !*booted && rawstat & regs::JOB_INT_GLOBAL_IF != 0 { + *booted = true; + + dev_info!(self.tdev.as_ref(), "GPU is ready to accept requests\n"); + } + + // Notify everyone waiting on a response from CSF. + data.fw.req_wait.notify_all(); + IrqReturn::Handled + } +} + +pub(crate) fn job_irq_init( + tdev: ARef<TyrDevice>, + pdev: platform::Device, + iomem: Arc<Devres<IoMem>>, +) -> Result<impl PinInit<IrqRegistration<JobIrqHandler>, Error>> { + let job_irq = pdev.irq_by_name(c_str!("job"))?; + + let irq_handler = JobIrqHandler { + tdev, + iomem: iomem.clone(), + }; + + // Lets disable IRQs in favor of explicit polling for now due to issues with + // SpinLockIrq and CondVar. + // + // JOB_INT_MASK.write(&iomem, u32::MAX)?; + regs::JOB_INT_MASK.write(&iomem, 0)?; + + Ok(IrqRegistration::register( + job_irq, + irq::request::flags::SHARED, + c_str!("tyr-job"), + irq_handler, + )) +} diff --git a/drivers/gpu/drm/tyr/fw/parse.rs b/drivers/gpu/drm/tyr/fw/parse.rs new file mode 100644 index 0000000000000000000000000000000000000000..9da3241b01d46cf549bb60f6c3025d85296f3bf3 --- /dev/null +++ b/drivers/gpu/drm/tyr/fw/parse.rs @@ -0,0 +1,572 @@ +// SPDX-License-Identifier: GPL-2.0 or MIT + +//! Code to parse the firmware binary. + +use core::ops::Range; + +use cursor::Cursor; +use kernel::alloc::KVec; +use kernel::bits::bit_u32; +use kernel::c_str; +use kernel::devres::Devres; +use kernel::fmt; +use kernel::io::mem::IoMem; +use kernel::prelude::*; +use kernel::str::CString; +use kernel::sync::Arc; +use kernel::sync::Mutex; + +use crate::driver::TyrDevice; +use crate::fw::CheckedKmapOffset; +use crate::fw::Firmware; +use crate::fw::CSF_MCU_SHARED_REGION_START; +use crate::gem; +use crate::gem::KernelVaPlacement; +use crate::gpu::GpuId; +use crate::gpu::GpuInfo; +use crate::mmu::vm; +use crate::mmu::vm::Vm; + +mod cursor; + +const FW_BINARY_MAGIC: u32 = 0xc3f13a6e; +const FW_BINARY_MAJOR_MAX: u8 = 0; + +mod flags { + use kernel::bits::bit_u32; + use kernel::bits::genmask_u32; + use kernel::prelude::*; + + use crate::impl_flags; + + impl_flags!(Flags, Flag, u32); + + const CACHE_MODE_MASK: Flags = Flags(genmask_u32(4, 3)); + + impl Flags { + pub(crate) fn cache_mode(&self) -> Flags { + *self & CACHE_MODE_MASK + } + } + + impl TryFrom<u32> for Flags { + type Error = Error; + + fn try_from(value: u32) -> Result<Self, Self::Error> { + if value & valid_flags().0 != value { + Err(EINVAL) + } else { + Ok(Self(value)) + } + } + } + + pub(crate) fn valid_flags() -> Flags { + Flags::from(READ) + | Flags::from(WRITE) + | Flags::from(EXEC) + | CACHE_MODE_MASK + | Flags::from(PROT) + | Flags::from(SHARED) + | Flags::from(ZERO) + } + + pub(crate) const READ: Flag = Flag(bit_u32(0)); + pub(crate) const WRITE: Flag = Flag(bit_u32(1)); + pub(crate) const EXEC: Flag = Flag(bit_u32(2)); + pub(crate) const CACHE_MODE_NONE: Flag = Flag(0 << 3); + pub(crate) const CACHE_MODE_CACHED: Flag = Flag(1 << 3); + pub(crate) const CACHE_MODE_UNCACHED_COHERENT: Flag = Flag(2 << 3); + pub(crate) const CACHE_MODE_CACHED_COHERENT: Flag = Flag(3 << 3); + pub(crate) const PROT: Flag = Flag(bit_u32(5)); + pub(crate) const SHARED: Flag = Flag(bit_u32(30)); + pub(crate) const ZERO: Flag = Flag(bit_u32(31)); +} + +struct BuildInfoHeader(Range<u32>); + +/// A parsed section of the firmware binary. +pub(super) struct Section { + /// Flags for this section. + flags: flags::Flags, + + /// The name of the section in the binary, if any. + name: Option<CString>, + + /// The raw parsed data for reset purposes. + data: KVec<u8>, + + /// The BO that this section was loaded into. + pub(super) mem: gem::ObjectRef, + + /// The VA range for this section. + /// + /// The MCU expects the firmware to be loaded at a specific addresses. + va: Range<u32>, + + /// The flags used to map this section. + vm_map_flags: vm::map_flags::Flags, +} + +/// The firmware header. +struct BinaryHeader { + /// Magic value to check binary validity. + magic: u32, + + /// Minor FW version. + minor: u8, + + /// Major FW version. + major: u8, + + /// Padding. Must be set to zero. + _padding1: u16, + + /// FW Version hash + version_hash: u32, + + /// Padding. Must be set to zero. + _padding2: u32, + + /// FW binary size + size: u32, +} + +impl BinaryHeader { + fn new(tdev: &TyrDevice, cursor: &mut Cursor<'_>) -> Result<Self> { + let magic = cursor.read_u32(tdev)?; + if magic != FW_BINARY_MAGIC { + dev_err!(tdev.as_ref(), "Invalid firmware magic"); + return Err(EINVAL); + } + + let minor = cursor.read_u8(tdev)?; + let major = cursor.read_u8(tdev)?; + let padding1 = cursor.read_u16(tdev)?; + let version_hash = cursor.read_u32(tdev)?; + let padding2 = cursor.read_u32(tdev)?; + let size = cursor.read_u32(tdev)?; + + if padding1 != 0 || padding2 != 0 { + dev_err!( + tdev.as_ref(), + "Invalid firmware file: header padding is not zero" + ); + return Err(EINVAL); + } + + Ok(Self { + magic, + minor, + major, + _padding1: padding1, + version_hash, + _padding2: padding2, + size, + }) + } +} + +#[derive(Clone, Copy, Debug)] +enum BinaryEntryType { + /// Host <-> FW interface. + Iface = 0, + /// FW config. + Config = 1, + /// Unit tests. + FutfTest = 2, + /// Trace buffer interface. + TraceBuffer = 3, + /// Timeline metadata interface, + TimelineMetadata = 4, + /// Metadata about how the FW binary was built + BuildInfoMetadata = 6, +} + +impl TryFrom<u8> for BinaryEntryType { + type Error = Error; + + fn try_from(value: u8) -> Result<Self, Self::Error> { + match value { + 0 => Ok(BinaryEntryType::Iface), + 1 => Ok(BinaryEntryType::Config), + 2 => Ok(BinaryEntryType::FutfTest), + 3 => Ok(BinaryEntryType::TraceBuffer), + 4 => Ok(BinaryEntryType::TimelineMetadata), + 6 => Ok(BinaryEntryType::BuildInfoMetadata), + _ => Err(EINVAL), + } + } +} + +#[derive(Debug)] +struct BinarySectionEntryHeader { + /// Section flags + flags: flags::Flags, + /// MCU virtual range to map this binary section to. + va: Range<u32>, + /// References the data in the FW binary. + data: Range<u32>, +} + +impl BinarySectionEntryHeader { + fn new(tdev: &TyrDevice, cursor: &mut Cursor<'_>) -> Result<Self> { + let flags = cursor.read_u32(tdev)?; + let flags = flags::Flags::try_from(flags)?; + + let va_start = cursor.read_u32(tdev)?; + let va_end = cursor.read_u32(tdev)?; + + let va = va_start..va_end; + + if va.is_empty() { + dev_err!( + tdev.as_ref(), + "Invalid firmware file: empty VA range at pos {}\n", + cursor.pos(), + ); + return Err(EINVAL); + } + + let data_start = cursor.read_u32(tdev)?; + let data_end = cursor.read_u32(tdev)?; + let data = data_start..data_end; + + Ok(Self { flags, va, data }) + } +} + +struct BinaryEntryHeader(u32); + +impl BinaryEntryHeader { + /// The entry type. + fn entry_ty(&self) -> Result<BinaryEntryType> { + let v = (self.0 & 0xff) as u8; + BinaryEntryType::try_from(v) + } + + /// Whether this entry is optional. + fn optional(&self) -> bool { + self.0 & bit_u32(31) != 0 + } + + /// The size of the entry. + fn size(&self) -> u32 { + self.0 >> 8 & 0xff + } +} + +struct BinaryEntrySection { + hdr: BinaryEntryHeader, + inner: Option<Section>, +} + +impl Firmware { + /// Parses the firmware sections from the binary. + pub(super) fn read_sections( + tdev: &TyrDevice, + iomem: Arc<Devres<IoMem>>, + gpu_info: &GpuInfo, + vm: Arc<Mutex<Vm>>, + ) -> Result<(KVec<Section>, Section)> { + let gpu_id = GpuId::from(gpu_info.gpu_id); + + let fw_path = CString::try_from_fmt(fmt!( + "arm/mali/arch{}.{}/mali_csffw.bin", + gpu_id.arch_major, + gpu_id.arch_minor + ))?; + + let fw = kernel::firmware::Firmware::request(&fw_path, tdev.as_ref())?; + + let mut cursor = Cursor::new(fw.data()); + + dev_err!( + tdev.as_ref(), + "Requested {} bytes of firmware successfully\n", + fw.data().len() + ); + let fw_bin_hdr = match BinaryHeader::new(tdev, &mut cursor) { + Ok(fw_bin_hdr) => fw_bin_hdr, + Err(e) => { + dev_err!(tdev.as_ref(), "Invalid firmware file: {}", e.to_errno()); + return Err(e); + } + }; + + if fw_bin_hdr.magic != FW_BINARY_MAGIC { + dev_err!(tdev.as_ref(), "Invalid firmware magic"); + return Err(EINVAL); + } + + if fw_bin_hdr.major > FW_BINARY_MAJOR_MAX { + dev_err!( + tdev.as_ref(), + "Unsupported firmware binary version: {}.{}", + fw_bin_hdr.major, + fw_bin_hdr.minor + ); + return Err(EINVAL); + } + + if fw_bin_hdr.size > cursor.len() as u32 { + dev_err!(tdev.as_ref(), "Firmware image is truncated"); + return Err(EINVAL); + } + + let mut sections = Vec::new(); + let mut shared_section = None; + + while (cursor.pos() as u32) < fw_bin_hdr.size { + match Self::read_entry(&mut cursor, tdev, iomem.clone(), &fw, vm.clone())? { + section => { + cursor.advance((section.hdr.size() - 4) as usize)?; + + match section.inner { + Some(section) => { + // TODO: refactor this. + if section.flags.contains(flags::SHARED) { + shared_section = Some(section); + } else { + sections.push(section, GFP_KERNEL)? + } + } + None => continue, + } + } + } + } + + let shared_section = shared_section.ok_or_else(|| { + dev_err!(tdev.as_ref(), "No shared section found in firmware"); + EINVAL + })?; + + Ok((sections, shared_section)) + } + + fn read_entry( + cursor: &mut Cursor<'_>, + tdev: &TyrDevice, + iomem: Arc<Devres<IoMem>>, + fw: &kernel::firmware::Firmware, + vm: Arc<Mutex<Vm>>, + ) -> Result<BinaryEntrySection> { + let section = BinaryEntrySection { + hdr: BinaryEntryHeader(cursor.read_u32(tdev)?), + inner: None, + }; + + let section_size = section.hdr.size() as usize - core::mem::size_of::<BinaryEntryHeader>(); + + let entry_ty = match section.hdr.entry_ty() { + Ok(entry_ty) => entry_ty, + Err(e) => { + if section.hdr.optional() { + dev_info!( + tdev.as_ref(), + "Skipping unknown optional firmware entry type: {}", + e.to_errno() + ); + return Ok(section); + } else { + dev_err!( + tdev.as_ref(), + "Invalid firmware entry type: {}", + e.to_errno() + ); + return Err(EINVAL); + } + } + }; + + if cursor.pos() % core::mem::size_of::<u32>() != 0 { + dev_err!( + tdev.as_ref(), + "Invalid firmware file: entry not aligned to 4 bytes at pos {}\n", + cursor.pos() + ); + return Err(EINVAL); + } + + let mut entry_cursor = cursor.view(cursor.pos()..cursor.pos() + section_size)?; + + match entry_ty { + BinaryEntryType::Iface => Ok(BinaryEntrySection { + hdr: section.hdr, + inner: Self::read_section(tdev, iomem, &mut entry_cursor, fw, vm.clone())?, + }), + + BinaryEntryType::BuildInfoMetadata => { + // TODO: Read build metadata + Ok(section) + } + + BinaryEntryType::Config + | BinaryEntryType::FutfTest + | BinaryEntryType::TraceBuffer + | BinaryEntryType::TimelineMetadata => Ok(section), + + _ => { + if !section.hdr.optional() { + dev_info!( + tdev.as_ref(), + "Unsupported non-optional entry type: {}", + entry_ty as u32 + ); + + Err(EINVAL) + } else { + dev_info!( + tdev.as_ref(), + "Skipping unsupported firmware entry type: {}", + entry_ty as u32 + ); + + Ok(section) + } + } + } + } + + fn read_section( + tdev: &TyrDevice, + iomem: Arc<Devres<IoMem>>, + cursor: &mut Cursor<'_>, + fw: &kernel::firmware::Firmware, + vm: Arc<Mutex<Vm>>, + ) -> Result<Option<Section>> { + let hdr = BinarySectionEntryHeader::new(tdev, cursor)?; + + if hdr.flags.contains(flags::PROT) { + dev_warn!( + tdev.as_ref(), + "Firmware protected mode entry not supported, ignoring" + ); + return Ok(None); + } + + if hdr.va.start == CSF_MCU_SHARED_REGION_START && !hdr.flags.contains(flags::SHARED) { + dev_err!( + tdev.as_ref(), + "Interface at 0x{:x} must be shared", + CSF_MCU_SHARED_REGION_START + ); + return Err(EINVAL); + } + + let name_len = cursor.len() - cursor.pos(); + let name_bytes = cursor.read(tdev, name_len)?; + + let mut name = KVec::with_capacity(name_bytes.len() + 1, GFP_KERNEL)?; + name.extend_from_slice(name_bytes, GFP_KERNEL)?; + name.push(0, GFP_KERNEL)?; + + let name = CStr::from_bytes_with_nul(&name) + .ok() + .and_then(|name| CString::try_from(name).ok()); + + let fw = fw.data(); + let section_start = hdr.data.start as usize; + let section_end = hdr.data.end as usize; + + let mut data = KVec::new(); + data.extend_from_slice(&fw[section_start..section_end], GFP_KERNEL)?; + + let bo_len = (hdr.va.end - hdr.va.start) as usize; + + let cache_mode = hdr.flags.cache_mode(); + + let mut vm_map_flags = vm::map_flags::Flags::empty(); + + if !hdr.flags.contains(flags::WRITE) { + vm_map_flags |= vm::map_flags::READONLY; + } + if !hdr.flags.contains(flags::EXEC) { + vm_map_flags |= vm::map_flags::NOEXEC; + } + if cache_mode != flags::CACHE_MODE_CACHED.into() { + vm_map_flags |= vm::map_flags::UNCACHED; + } + + let mut mem = gem::new_kernel_object( + tdev, + iomem, + bo_len, + vm, + KernelVaPlacement::At(hdr.va.start as u64..hdr.va.end as u64), + vm_map_flags, + )?; + + let vmap = mem.vmap()?; + let vmap = vmap.as_mut_slice(); + + vmap[0..data.len()].copy_from_slice(&data); + + if hdr.flags.contains(flags::ZERO) { + vmap[data.len()..].fill(0); + } + + dev_info!( + tdev.as_ref(), + "Copied firmware data to BO {:p} of size {} with flags {}\n", + &mem.gem, + bo_len, + vm_map_flags + ); + + Ok(Some(Section { + flags: hdr.flags, + name, + data, + mem, + va: hdr.va, + vm_map_flags, + })) + } + + fn read_build_info(cursor: &mut Cursor<'_>, tdev: &TyrDevice) -> Result<()> { + let meta_start = cursor.read_u32(tdev)? as usize; + let meta_end = cursor.read_u32(tdev)? as usize; + + let expected_hdr = b"git_sha: "; + let hdr = cursor.read(tdev, expected_hdr.len())?; + + if hdr != expected_hdr { + dev_warn!(tdev.as_ref(), "Firmware's git sha is missing\n"); + return Ok(()); + } + + let sz = meta_end - meta_start - expected_hdr.len(); + let sha = cursor.read(tdev, sz)?; + if sha[sha.len()] != 0 { + dev_warn!(tdev.as_ref(), "Firmware's git sha is not NULL terminated\n"); + return Ok(()); // Don't treat as fatal + } + + let sha = CStr::from_bytes_with_nul(sha).unwrap_or(c_str!("")); + dev_info!( + tdev.as_ref(), + "Firmware git sha: {}\n", + sha.to_str().unwrap() + ); + + Ok(()) + } + + /// Computes the offset into the shared section for a given VA in the shared + /// area. + /// + /// The result is an offset that can be safely dereferenced by the CPU. + pub(super) fn kmap_offset(shared_section: &Section, mcu_va: u64) -> Result<CheckedKmapOffset> { + let shared_mem_start = u64::from(shared_section.va.start); + let shared_mem_end = u64::from(shared_section.va.end); + + if mcu_va < shared_mem_start || mcu_va >= shared_mem_end { + Err(EINVAL) + } else { + let offset = (mcu_va - shared_mem_start) as usize; + Ok(CheckedKmapOffset(offset)) + } + } +} diff --git a/drivers/gpu/drm/tyr/fw/parse/cursor.rs b/drivers/gpu/drm/tyr/fw/parse/cursor.rs new file mode 100644 index 0000000000000000000000000000000000000000..dcb94f81cfd329f64c223aef9aad64445e24d725 --- /dev/null +++ b/drivers/gpu/drm/tyr/fw/parse/cursor.rs @@ -0,0 +1,91 @@ +// SPDX-License-Identifier: GPL-2.0 or MIT + +//! A bare-bones std::io::Cursor<[u8]> clone to keep track of the current +//! position in the firmware binary. + +use core::ops::Range; + +use kernel::prelude::*; + +use crate::driver::TyrDevice; + +pub(crate) struct Cursor<'a> { + data: &'a [u8], + pos: usize, +} + +impl<'a> Cursor<'a> { + pub(crate) fn new(data: &'a [u8]) -> Self { + Self { data, pos: 0 } + } + + pub(super) fn len(&self) -> usize { + self.data.len() + } + + pub(super) fn pos(&self) -> usize { + self.pos + } + + pub(super) fn advance(&mut self, nbytes: usize) -> Result { + if self.pos + nbytes > self.data.len() { + return Err(EINVAL); + } + + self.pos += nbytes; + Ok(()) + } + + /// Returns a view into the cursor's data. + /// + /// This spawns a new cursor, leaving the current cursor unchanged. + pub(super) fn view(&self, range: Range<usize>) -> Result<Cursor<'_>> { + if range.start < self.pos || range.end > self.data.len() { + pr_err!( + "Invalid cursor range {:?} for data of length {}", + range, + self.data.len() + ); + + Err(EINVAL) + } else { + Ok(Self { + data: &self.data[range], + pos: 0, + }) + } + } + + pub(super) fn read(&mut self, tdev: &TyrDevice, nbytes: usize) -> Result<&[u8]> { + let start = self.pos; + let end = start + nbytes; + + if end > self.data.len() { + dev_err!( + tdev.as_ref(), + "Invalid firmware file: read of size {} at position {} is out of bounds", + nbytes, + start, + ); + return Err(EINVAL); + } + + self.pos += nbytes; + Ok(&self.data[start..end]) + } + + pub(super) fn read_u8(&mut self, tdev: &TyrDevice) -> Result<u8> { + let bytes = self.read(tdev, 1)?; + Ok(bytes[0]) + } + + pub(super) fn read_u16(&mut self, tdev: &TyrDevice) -> Result<u16> { + let bytes = self.read(tdev, 2)?; + Ok(u16::from_le_bytes(bytes.try_into().unwrap())) + } + + pub(super) fn read_u32(&mut self, tdev: &TyrDevice) -> Result<u32> { + let bytes = self.read(tdev, 4)?; + Ok(u32::from_le_bytes(bytes.try_into().unwrap())) + } +} diff --git a/drivers/gpu/drm/tyr/fw/wait.rs b/drivers/gpu/drm/tyr/fw/wait.rs new file mode 100644 index 0000000000000000000000000000000000000000..2b424f6115213ef734eb08d2ea12ec29ed49e205 --- /dev/null +++ b/drivers/gpu/drm/tyr/fw/wait.rs @@ -0,0 +1,108 @@ +// SPDX-License-Identifier: GPL-2.0 or MIT + +//! Code to wait on CSF responses. + +use kernel::io; +use kernel::new_condvar; +use kernel::new_spinlock_irq; +use kernel::prelude::*; +use kernel::sync::Arc; +use kernel::sync::CondVar; +use kernel::sync::SpinLockIrq; +use kernel::time; +use kernel::time::msecs_to_jiffies; + +#[pin_data] +/// Represents a wait on a request made to CSF. +pub(crate) struct ReqWait { + #[pin] + /// The actual wait/signal mechanism. + cond: CondVar, + + #[pin] + /// Serializes the access to the lock. + /// + /// This deviates a bit from the general Rust pattern of having the lock + /// wrap the data. That is because the "data" is actually a shared section + /// of GPU memory whose layout has to match the firmware's expectations. + lock: SpinLockIrq<()>, +} + +impl ReqWait { + /// A convenience function to initialize the `ReqWait` struct. + /// + /// There is only one instance of this struct in the entire driver. + /// + /// Code that needs to wait on a given CSF request take a reference to + /// `ReqWait`. A `notify_all()` is called every time the job IRQ is + /// triggered. + pub(crate) fn new() -> Result<Arc<Self>> { + Arc::pin_init( + pin_init!(Self { + cond <- new_condvar!(), + lock <- new_spinlock_irq!(()), + }), + GFP_KERNEL, + ) + } + + /// Waits for the completion of a previously submitted CSF request. + /// + /// It's usually useful to busy-wait for a short period of time before going + /// to sleep, as some requests can be answered extremely quickly. + pub(crate) fn wait_interruptible_timeout<Op, Cond, T>( + &self, + timeout_ms: u32, + mut op: Op, + mut cond: Cond, + busy_wait_first: bool, + ) -> Result + where + Op: FnMut() -> Result<T>, + Cond: FnMut(&T) -> bool, + { + if busy_wait_first { + let res = io::poll::read_poll_timeout( + &mut op, + &mut cond, + time::Delta::from_millis(0), + Some(time::Delta::from_micros(10)), + ); + + if res.is_ok() { + return Ok(()); + } + } + + let mut guard = self.lock.lock(); + let mut remaining_time = msecs_to_jiffies(timeout_ms); + + loop { + match self + .cond + .wait_interruptible_timeout(&mut guard, remaining_time) + { + kernel::sync::CondVarTimeoutResult::Woken { jiffies } => { + let op = op()?; + if cond(&op) { + return Ok(()); + } else { + remaining_time -= jiffies + } + } + kernel::sync::CondVarTimeoutResult::Timeout => return Err(ETIMEDOUT), + kernel::sync::CondVarTimeoutResult::Signal { .. } => return Err(ERESTARTSYS), + } + } + } + + pub(crate) fn notify_one(&self) { + let _guard = self.lock.lock(); + self.cond.notify_one(); + } + + pub(crate) fn notify_all(&self) { + let _guard = self.lock.lock(); + self.cond.notify_all(); + } +} diff --git a/drivers/gpu/drm/tyr/gem.rs b/drivers/gpu/drm/tyr/gem.rs new file mode 100644 index 0000000000000000000000000000000000000000..5a14a62a7a349fed1fd40f24cbad1b8cc4f92a32 --- /dev/null +++ b/drivers/gpu/drm/tyr/gem.rs @@ -0,0 +1,150 @@ +// SPDX-License-Identifier: GPL-2.0 or MIT + +use core::ops::Range; + +use kernel::devres::Devres; +use kernel::drm::gem::shmem; +use kernel::drm::gem::BaseObject; +use kernel::drm::gem::{self}; +use kernel::drm::mm; +use kernel::io::mem::IoMem; +use kernel::prelude::*; +use kernel::sync::Arc; +use kernel::sync::Mutex; + +use crate::driver::TyrDevice; +use crate::driver::TyrDriver; +use crate::file::DrmFile; +use crate::mmu::vm; +use crate::mmu::vm::Vm; + +/// GEM Object inner driver data +#[pin_data] +pub(crate) struct DriverObject { + /// Whether this is a kernel or user BO. + ty: ObjectType, + + /// The flags received at BO creation time. + flags: u32, +} + +enum ObjectType { + Kernel { + // Kernel objects have their VA managed by the MM allocator. This node + // represents the allocation. + node: mm::Node<(), ()>, + }, + + User, +} + +/// Type alias for the GEM object tyoe for this driver. +pub(crate) type Object = gem::shmem::Object<DriverObject>; + +/// Type alias for the SGTable type for this driver. +pub(crate) type SGTable = shmem::SGTable<DriverObject>; + +impl gem::BaseDriverObject<Object> for DriverObject { + fn new(dev: &TyrDevice, _size: usize) -> impl PinInit<Self, Error> { + dev_dbg!(dev.as_ref(), "DriverObject::new\n"); + DriverObject { + ty: ObjectType::User, + flags: 0, + } + } +} + +impl gem::shmem::DriverObject for DriverObject { + type Driver = TyrDriver; +} + +/// A shared reference to a GEM object for this driver. +pub(crate) struct ObjectRef { + /// The underlying GEM object reference + pub(crate) gem: gem::ObjectRef<shmem::Object<DriverObject>>, + + /// The kernel-side VMap of this object, if any. + vmap: Option<shmem::VMap<DriverObject>>, +} + +impl ObjectRef { + /// Create a new wrapper for a raw GEM object reference. + pub(crate) fn new(gem: gem::ObjectRef<shmem::Object<DriverObject>>) -> ObjectRef { + ObjectRef { gem, vmap: None } + } + + /// Return the `VMap` for this object, creating it if necessary. + pub(crate) fn vmap(&mut self) -> Result<&mut shmem::VMap<DriverObject>> { + if self.vmap.is_none() { + self.vmap = Some(self.gem.vmap()?); + } + Ok(self.vmap.as_mut().unwrap()) + } + + /// Returns the size of an object in bytes + pub(crate) fn size(&self) -> usize { + self.gem.size() + } +} + +/// Create a new DRM GEM object. +pub(crate) fn new_object(dev: &TyrDevice, size: usize, flags: u32) -> Result<ObjectRef> { + let aligned_size = size.next_multiple_of(1 << 12); + + if size == 0 || size > aligned_size { + return Err(EINVAL); + } + + let mut gem = Object::new(dev, aligned_size)?; + gem.set_wc(true); + gem.flags = flags; + + Ok(ObjectRef::new(gem.into_ref())) +} + +/// Look up a GEM object handle for a `File` and return an `ObjectRef` for it. +pub(crate) fn lookup_handle(file: &DrmFile, handle: u32) -> Result<ObjectRef> { + Ok(ObjectRef::new(shmem::Object::lookup_handle(file, handle)?)) +} + +/// Create a new kernel-owned GEM object. +pub(crate) fn new_kernel_object( + tdev: &TyrDevice, + iomem: Arc<Devres<IoMem>>, + size: usize, + vm: Arc<Mutex<Vm>>, + va: KernelVaPlacement, + flags: vm::map_flags::Flags, +) -> Result<ObjectRef> { + let aligned_size = size.next_multiple_of(1 << 12); + + let mut gem: gem::UniqueObjectRef<shmem::Object<DriverObject>> = + shmem::Object::<DriverObject>::new(tdev, aligned_size)?; + gem.set_wc(true); + + let node = vm.lock().alloc_kernel_range(va)?; + + let range = node.start()..node.start() + node.size(); + gem.ty = ObjectType::Kernel { node }; + + let gem = ObjectRef::new(gem.into_ref()); + vm.lock().bind_gem(iomem, &gem.gem, 0, range, flags)?; + + Ok(gem) +} + +/// Creates a dummy GEM object to serve as the root of a GPUVM. +pub(crate) fn new_dummy_object(tdev: &TyrDevice) -> Result<ObjectRef> { + let mut gem = Object::new(tdev, 4096)?; + gem.set_wc(true); + + Ok(ObjectRef::new(gem.into_ref())) +} + +/// Controls the VA range assigned to a kernel-owned GEM object. +pub(crate) enum KernelVaPlacement { + /// Automatically place this object in a free spot in the kernel VA range. + Auto, + /// Place this object at a given address. + At(Range<u64>), +} diff --git a/drivers/gpu/drm/tyr/gpu.rs b/drivers/gpu/drm/tyr/gpu.rs new file mode 100644 index 0000000000000000000000000000000000000000..c94b87e07bf8e75f25eee3bca6cb2fff075b65bb --- /dev/null +++ b/drivers/gpu/drm/tyr/gpu.rs @@ -0,0 +1,213 @@ +// SPDX-License-Identifier: GPL-2.0 or MIT + +use crate::regs::*; +use kernel::bits; +use kernel::bits::genmask_u32; +use kernel::devres::Devres; +use kernel::io; +use kernel::io::mem::IoMem; +use kernel::platform; +use kernel::prelude::*; +use kernel::time; +use kernel::transmute::AsBytes; + +pub(crate) mod irq; +pub(crate) mod wait; + +#[repr(C)] +// This can be queried by userspace to get information about the GPU. +pub(crate) struct GpuInfo { + pub(crate) gpu_id: u32, + pub(crate) csf_id: u32, + pub(crate) gpu_rev: u32, + pub(crate) core_features: u32, + pub(crate) l2_features: u32, + pub(crate) tiler_features: u32, + pub(crate) mem_features: u32, + pub(crate) mmu_features: u32, + pub(crate) thread_features: u32, + pub(crate) max_threads: u32, + pub(crate) thread_max_workgroup_size: u32, + pub(crate) thread_max_barrier_size: u32, + pub(crate) coherency_features: u32, + pub(crate) texture_features: [u32; 4], + pub(crate) as_present: u32, + pub(crate) shader_present: u64, + pub(crate) tiler_present: u64, + pub(crate) l2_present: u64, +} + +impl GpuInfo { + pub(crate) fn new(iomem: &Devres<IoMem>) -> Result<Self> { + let gpu_id = GPU_ID.read(iomem)?; + let csf_id = GPU_CSF_ID.read(iomem)?; + let gpu_rev = GPU_REVID.read(iomem)?; + let core_features = GPU_CORE_FEATURES.read(iomem)?; + let l2_features = GPU_L2_FEATURES.read(iomem)?; + let tiler_features = GPU_TILER_FEATURES.read(iomem)?; + let mem_features = GPU_MEM_FEATURES.read(iomem)?; + let mmu_features = GPU_MMU_FEATURES.read(iomem)?; + let thread_features = GPU_THREAD_FEATURES.read(iomem)?; + let max_threads = GPU_THREAD_MAX_THREADS.read(iomem)?; + let thread_max_workgroup_size = GPU_THREAD_MAX_WORKGROUP_SIZE.read(iomem)?; + let thread_max_barrier_size = GPU_THREAD_MAX_BARRIER_SIZE.read(iomem)?; + let coherency_features = GPU_COHERENCY_FEATURES.read(iomem)?; + + let texture_features = GPU_TEXTURE_FEATURES0.read(iomem)?; + + let as_present = GPU_AS_PRESENT.read(iomem)?; + + let shader_present = GPU_SHADER_PRESENT_LO.read(iomem)? as u64; + let shader_present = shader_present | (GPU_SHADER_PRESENT_HI.read(iomem)? as u64) << 32; + + let tiler_present = GPU_TILER_PRESENT_LO.read(iomem)? as u64; + let tiler_present = tiler_present | (GPU_TILER_PRESENT_HI.read(iomem)? as u64) << 32; + + let l2_present = GPU_L2_PRESENT_LO.read(iomem)? as u64; + let l2_present = l2_present | (GPU_L2_PRESENT_HI.read(iomem)? as u64) << 32; + + Ok(Self { + gpu_id, + csf_id, + gpu_rev, + core_features, + l2_features, + tiler_features, + mem_features, + mmu_features, + thread_features, + max_threads, + thread_max_workgroup_size, + thread_max_barrier_size, + coherency_features, + texture_features: [texture_features, 0, 0, 0], + as_present, + shader_present, + tiler_present, + l2_present, + }) + } + + pub(crate) fn log(&self, pdev: &platform::Device) { + let major = (self.gpu_id >> 16) & 0xff; + let minor = (self.gpu_id >> 8) & 0xff; + let status = self.gpu_id & 0xff; + + let model_name = if let Some(model) = GPU_MODELS + .iter() + .find(|&f| f.major == major && f.minor == minor) + { + model.name + } else { + "unknown" + }; + + dev_info!( + pdev.as_ref(), + "mali-{} id 0x{:x} major 0x{:x} minor 0x{:x} status 0x{:x}", + model_name, + self.gpu_id >> 16, + major, + minor, + status + ); + + dev_info!( + pdev.as_ref(), + "Features: L2:{:#x} Tiler:{:#x} Mem:{:#x} MMU:{:#x} AS:{:#x}", + self.l2_features, + self.tiler_features, + self.mem_features, + self.mmu_features, + self.as_present + ); + + dev_info!( + pdev.as_ref(), + "shader_present=0x{:016x} l2_present=0x{:016x} tiler_present=0x{:016x}", + self.shader_present, + self.l2_present, + self.tiler_present + ); + } + + pub(crate) fn va_bits(&self) -> u32 { + self.mmu_features & bits::genmask_u32(7, 0) + } + + pub(crate) fn pa_bits(&self) -> u32 { + (self.mmu_features >> 8) & bits::genmask_u32(7, 0) + } +} + +// SAFETY: +// +// This type is the same type exposed by Panthor's uAPI. As it's declared as +// #repr(C), we can be sure that the layout is the same. Therefore, it is safe +// to expose this to userspace. +unsafe impl AsBytes for GpuInfo {} + +struct GpuModels { + name: &'static str, + major: u32, + minor: u32, +} + +const GPU_MODELS: [GpuModels; 1] = [GpuModels { + name: "g610", + major: 10, + minor: 7, +}]; + +#[allow(dead_code)] +pub(crate) struct GpuId { + pub(crate) arch_major: u32, + pub(crate) arch_minor: u32, + pub(crate) arch_rev: u32, + pub(crate) prod_major: u32, + pub(crate) ver_major: u32, + pub(crate) ver_minor: u32, + pub(crate) ver_status: u32, +} + +impl From<u32> for GpuId { + fn from(value: u32) -> Self { + GpuId { + arch_major: (value & genmask_u32(31, 28)) >> 28, + arch_minor: (value & genmask_u32(27, 24)) >> 24, + arch_rev: (value & genmask_u32(23, 20)) >> 20, + prod_major: (value & genmask_u32(19, 16)) >> 16, + ver_major: (value & genmask_u32(15, 12)) >> 12, + ver_minor: (value & genmask_u32(11, 4)) >> 4, + ver_status: value & genmask_u32(3, 0), + } + } +} + +/// Powers on the l2 block. +pub(crate) fn l2_power_on(iomem: &Devres<IoMem>) -> Result<()> { + let op = || L2_PWRTRANS_LO.read(iomem); + + let cond = |pwr_trans: &u32| *pwr_trans == 0; + + let _ = io::poll::read_poll_timeout( + op, + cond, + time::Delta::from_millis(100), + Some(time::Delta::from_millis(200)), + )?; + + L2_PWRON_LO.write(iomem, 1)?; + + let op = || L2_READY_LO.read(iomem); + let cond = |l2_ready: &u32| *l2_ready == 1; + + let _ = io::poll::read_poll_timeout( + op, + cond, + time::Delta::from_millis(100), + Some(time::Delta::from_millis(200)), + )?; + + Ok(()) +} diff --git a/drivers/gpu/drm/tyr/gpu/irq.rs b/drivers/gpu/drm/tyr/gpu/irq.rs new file mode 100644 index 0000000000000000000000000000000000000000..b26338a7889ee5631074248c3647dbea4a01b772 --- /dev/null +++ b/drivers/gpu/drm/tyr/gpu/irq.rs @@ -0,0 +1,74 @@ +// SPDX-License-Identifier: GPL-2.0 or MIT + +//! GPU IRQ handler. + +use kernel::bits::bit_u32; +use kernel::c_str; +use kernel::devres::Devres; +use kernel::io::mem::IoMem; +use kernel::irq; +use kernel::irq::request::Handler as IrqHandler; +use kernel::irq::request::IrqReturn; +use kernel::irq::request::Registration as IrqRegistration; +use kernel::platform; +use kernel::prelude::*; +use kernel::sync::Arc; +use kernel::types::ARef; + +use crate::driver::TyrDevice; +use crate::gpu::wait::PowerOnWait; +use crate::regs; + +const RESET_COMPLETED: u32 = bit_u32(8); +const POWER_CHANGED_SINGLE: u32 = bit_u32(9); +const POWER_CHANGED_ALL: u32 = bit_u32(10); + +pub(crate) struct GpuIrqHandler { + _tdev: ARef<TyrDevice>, + iomem: Arc<Devres<IoMem>>, + poweron_wait: Arc<PowerOnWait>, +} + +impl IrqHandler for GpuIrqHandler { + fn handle_irq(&self) -> IrqReturn { + let int_stat = regs::GPU_INT_RAWSTAT.read(&self.iomem).unwrap_or_default(); + + pr_info!("Acknowledging GPU_INT_RAWSTAT: {:#x}\n", int_stat); + + let _ = regs::GPU_INT_CLEAR.write(&self.iomem, int_stat); + + if int_stat == RESET_COMPLETED | POWER_CHANGED_SINGLE | POWER_CHANGED_ALL { + *self.poweron_wait.powered_on.lock() = true; + self.poweron_wait.wait.notify_one(); + } + + IrqReturn::Handled + } +} + +pub(crate) fn gpu_irq_init( + tdev: ARef<TyrDevice>, + pdevice: platform::Device, + iomem: Arc<Devres<IoMem>>, + poweron_wait: Arc<PowerOnWait>, +) -> Result<impl PinInit<IrqRegistration<GpuIrqHandler>, Error>> { + let gpu_irq = pdevice.irq_by_name(c_str!("gpu"))?; + let irq_handler = GpuIrqHandler { + _tdev: tdev, + iomem: iomem.clone(), + poweron_wait, + }; + + // Lets disable IRQs in favor of explicit polling for now due to issues with + // SpinLockIrq and CondVar. + // + // GPU_INT_MASK.write(&iomem, core::u32::MAX)?; + regs::GPU_INT_MASK.write(&iomem, 0)?; + + Ok(IrqRegistration::register( + gpu_irq, + irq::request::flags::SHARED, + c_str!("tyr-gpu"), + irq_handler, + )) +} diff --git a/drivers/gpu/drm/tyr/gpu/wait.rs b/drivers/gpu/drm/tyr/gpu/wait.rs new file mode 100644 index 0000000000000000000000000000000000000000..ce29bf161686ffc9bc02dc7b8524fc7ca2343c4d --- /dev/null +++ b/drivers/gpu/drm/tyr/gpu/wait.rs @@ -0,0 +1,30 @@ +// SPDX-License-Identifier: GPL-2.0 or MIT + +//! A wait on GPU events. Currently only used for power on. + +use kernel::new_condvar; +use kernel::new_spinlock; +use kernel::prelude::*; +use kernel::sync::Arc; +use kernel::sync::CondVar; +use kernel::sync::SpinLock; + +#[pin_data] +pub(crate) struct PowerOnWait { + #[pin] + pub(crate) wait: CondVar, + #[pin] + pub(crate) powered_on: SpinLock<bool>, +} + +impl PowerOnWait { + pub(crate) fn new() -> Result<Arc<Self>> { + Arc::pin_init( + pin_init!(PowerOnWait { + wait <- new_condvar!(), + powered_on <- new_spinlock!(false), + }), + GFP_KERNEL, + ) + } +} diff --git a/drivers/gpu/drm/tyr/mmu.rs b/drivers/gpu/drm/tyr/mmu.rs new file mode 100644 index 0000000000000000000000000000000000000000..fc3e5cae431a6c5a3b10786896f3198c84b5f8c7 --- /dev/null +++ b/drivers/gpu/drm/tyr/mmu.rs @@ -0,0 +1,208 @@ +// SPDX-License-Identifier: GPL-2.0 or MIT + +use core::ops::Range; + +use as_lock::AsLockToken; +use faults::decode_faults; +use kernel::devres::Devres; +use kernel::io; +use kernel::io::mem::IoMem; +use kernel::io_pgtable; +use kernel::new_mutex; +use kernel::platform; +use kernel::prelude::*; +use kernel::sync::Arc; +use kernel::sync::Mutex; +use kernel::time::Delta; +use kernel::types::ForeignOwnable; +use vm::Vm; +use vm::VmLayout; + +use crate::driver::TyrDevice; +use crate::gpu::GpuInfo; +use crate::regs::*; + +mod as_lock; +mod faults; +pub(crate) mod irq; +pub(crate) mod vm; + +pub(crate) struct Mmu { + /// List containing all VMs. + vms: KVec<Arc<Mutex<Vm>>>, + /// Tracks which of the 32 AS slots are free. + free_slots: usize, + // slot_allocator: Arc<Mutex<SlotAllocator>>, +} + +impl Mmu { + pub(crate) fn new() -> Result<Self> { + Ok(Self { + vms: KVec::new(), + // slot_allocator: Arc::pin_init( + // new_mutex!(SlotAllocator { + // free_mask: u32::MAX, + // }), + // GFP_KERNEL, + // )?, + free_slots: usize::MAX, + }) + } + + pub(crate) fn create_vm( + &mut self, + tdev: &TyrDevice, + pdev: platform::Device, + gpu_info: &GpuInfo, + for_mcu: bool, + layout: VmLayout, + auto_kernel_va: Range<u64>, + /* coherent: bool, */ + ) -> Result<Arc<Mutex<Vm>>> { + let vm = Vm::create(tdev, pdev, for_mcu, gpu_info, layout, auto_kernel_va)?; + + let vm = Arc::pin_init(new_mutex!(vm), GFP_KERNEL)?; + self.vms.push(vm.clone(), GFP_KERNEL)?; + Ok(vm) + } + + fn flush_range(iomem: &Devres<IoMem>, as_nr: usize, range: Range<u64>) -> Result { + Self::do_as_command(iomem, as_nr, AS_COMMAND_FLUSH_PT, range) + } + + fn allocate_as(&mut self) -> Result<usize> { + let slot = self.free_slots.trailing_zeros(); + if slot == 32 { + return Err(EBUSY); + } + + self.free_slots |= 1 << slot; + Ok(slot as usize) + } + + fn wait_ready(iomem: &Devres<IoMem>, as_nr: usize) -> Result { + let op = || as_status(as_nr)?.read(iomem); + let cond = |status: &u32| -> bool { *status & AS_STATUS_ACTIVE == 0 }; + let _ = io::poll::read_poll_timeout( + op, + cond, + Delta::from_millis(0), + Some(Delta::from_micros(10000)), + )?; + + Ok(()) + } + + /// TODO: The code to manage AS slots is still TODO. + fn free_as(&mut self, as_nr: usize) { + self.free_slots &= !(1 << as_nr); + } + + fn do_as_command( + iomem: &Devres<IoMem>, + as_nr: usize, + command: u32, + region: Range<u64>, + ) -> Result { + if command == AS_COMMAND_UNLOCK { + as_command(as_nr)?.write(iomem, command)?; + } else { + let _lock = AsLockToken::lock_region(iomem, as_nr, region)?; + Self::wait_ready(iomem, as_nr)?; + as_command(as_nr)?.write(iomem, command)?; + Self::wait_ready(iomem, as_nr)?; + } + + Ok(()) + } + + pub(crate) fn bind_vm( + &mut self, + vm: Arc<Mutex<Vm>>, + gpu_info: &GpuInfo, + iomem: &Devres<IoMem>, + ) -> Result { + let mut vm = vm.lock(); + let va_bits = gpu_info.va_bits(); + + let transtab = vm.gpuvm.exec_lock(None)?.page_table.cfg().ttbr; + let transcfg = AS_TRANSCFG_PTW_MEMATTR_WB + | AS_TRANSCFG_PTW_RA + | AS_TRANSCFG_ADRMODE_AARCH64_4K + | as_transcfg_ina_bits((55 - va_bits).into()); + + let memattr = vm.memattr; + let as_nr = if vm.for_mcu { 0 } else { self.allocate_as()? }; + + Self::enable_as(iomem, as_nr as usize, transtab, transcfg.into(), memattr)?; + + vm.address_space = Some(as_nr as usize); + Ok(()) + } + + fn enable_as( + iomem: &Devres<IoMem>, + as_nr: usize, + transtab: u64, + transcfg: u64, + memattr: u64, + ) -> Result { + let active = as_status(as_nr)?.read(iomem)? & AS_STATUS_ACTIVE != 0; + if active { + return Err(EBUSY); + } + + Self::do_as_command(iomem, as_nr, AS_COMMAND_FLUSH_MEM, 0..u64::MAX)?; + + let transtab_lo = (transtab & 0xffffffff) as u32; + let transtab_hi = (transtab >> 32) as u32; + + let transcfg_lo = (transcfg & 0xffffffff) as u32; + let transcfg_hi = (transcfg >> 32) as u32; + + let memattr_lo = (memattr & 0xffffffff) as u32; + let memattr_hi = (memattr >> 32) as u32; + + as_transtab_lo(as_nr)?.write(iomem, transtab_lo)?; + as_transtab_hi(as_nr)?.write(iomem, transtab_hi)?; + + as_transcfg_lo(as_nr)?.write(iomem, transcfg_lo)?; + as_transcfg_hi(as_nr)?.write(iomem, transcfg_hi)?; + + as_memattr_lo(as_nr)?.write(iomem, memattr_lo)?; + as_memattr_hi(as_nr)?.write(iomem, memattr_hi)?; + + as_command(as_nr)?.write(iomem, AS_COMMAND_UPDATE)?; + + let op = || as_status(as_nr)?.read(iomem); + let cond = |status: &u32| -> bool { *status & AS_STATUS_ACTIVE == 0 }; + let _ = io::poll::read_poll_timeout( + op, + cond, + Delta::from_millis(0), + Some(Delta::from_micros(200)), + )?; + + Ok(()) + } +} + +/* dummy TLB ops, the real TLB flush happens in panthor_vm_flush_range() */ +impl io_pgtable::FlushOps for Mmu { + type Data = (); + + fn tlb_flush_all(_data: <Self::Data as ForeignOwnable>::Borrowed<'_>) {} + fn tlb_flush_walk( + _data: <Self::Data as ForeignOwnable>::Borrowed<'_>, + _iova: usize, + _size: usize, + _granule: usize, + ) { + } + fn tlb_add_page( + _data: <Self::Data as ForeignOwnable>::Borrowed<'_>, + _iova: usize, + _granule: usize, + ) { + } +} diff --git a/drivers/gpu/drm/tyr/mmu/as_lock.rs b/drivers/gpu/drm/tyr/mmu/as_lock.rs new file mode 100644 index 0000000000000000000000000000000000000000..dd3ed253743ec66bfc798504adeaeb1f166bd12c --- /dev/null +++ b/drivers/gpu/drm/tyr/mmu/as_lock.rs @@ -0,0 +1,89 @@ +// SPDX-License-Identifier: GPL-2.0 or MIT + +//! Address space locking. + +use core::ops::Range; + +use kernel::bits::genmask_u64; +use kernel::devres::Devres; +use kernel::io::mem::IoMem; +use kernel::prelude::*; + +use crate::mmu::Mmu; +use crate::regs::*; + +/// A token type that represents a lock on a region of a given address space. +pub(super) struct AsLockToken<'a> { + iomem: &'a Devres<IoMem>, + as_nr: usize, +} + +impl<'a> AsLockToken<'a> { + /// Lock a `region` of `as_nr`. + pub(super) fn lock_region( + iomem: &'a Devres<IoMem>, + as_nr: usize, + region: Range<u64>, + ) -> Result<Self> { + if region.end - region.start == 0 { + return Err(EINVAL); + } + + // The locked region is a naturally aligned power of 2 block encoded as + // log2 minus(1). + // + // Calculate the desired start/end and look for the highest bit which + // differs. The smallest naturally aligned block must include this bit + // change, the desired region starts with this bit (and subsequent bits) + // zeroed and ends with the bit (and subsequent bits) set to one. + let region_width = core::cmp::max( + (region.start ^ (region.end - 1)).leading_zeros() as u8, + 64 - AS_LOCK_REGION_MIN_SIZE.trailing_zeros() as u8, + ) - 1; + + // Mask off the low bits of region.start, which would be ignored by the + // hardware anyways. + let region_start = region.start & genmask_u64(63, region_width as u32); + + let region = (region_width as u64) | region_start; + + let region_lo = (region & 0xffffffff) as u32; + let region_hi = (region >> 32) as u32; + + // Lock the region that needs to be updated. + as_lockaddr_lo(as_nr)?.write(iomem, region_lo)?; + as_lockaddr_hi(as_nr)?.write(iomem, region_hi)?; + as_command(as_nr)?.write(iomem, AS_COMMAND_LOCK)?; + + Ok(Self { iomem, as_nr }) + } +} + +impl Drop for AsLockToken<'_> { + fn drop(&mut self) { + let as_cmd = as_command(self.as_nr); + match as_cmd { + Ok(as_cmd) => { + if let Err(err) = Mmu::wait_ready(self.iomem, self.as_nr) { + pr_err!("MMU is busy for AS{}: {:?}\n", self.as_nr, err); + return; + } + if let Err(err) = as_cmd.write(self.iomem, AS_COMMAND_FLUSH_PT) { + pr_err!( + "Failed to flush page tables for AS{}: {:?}\n", + self.as_nr, + err + ); + return; + } + if let Err(err) = Mmu::wait_ready(self.iomem, self.as_nr) { + pr_err!("MMU is busy for AS{}: {:?}\n", self.as_nr, err); + } + } + + Err(err) => { + pr_err!("Failed to unlock AS{}: {:?}\n", self.as_nr, err); + } + } + } +} diff --git a/drivers/gpu/drm/tyr/mmu/faults.rs b/drivers/gpu/drm/tyr/mmu/faults.rs new file mode 100644 index 0000000000000000000000000000000000000000..a9509648b33d519146d6c0d8e6fc07db7e899757 --- /dev/null +++ b/drivers/gpu/drm/tyr/mmu/faults.rs @@ -0,0 +1,126 @@ +// SPDX-License-Identifier: GPL-2.0 or MIT + +//! Fault reporting. + +use crate::regs::*; +use kernel::c_str; +use kernel::devres::Devres; +use kernel::io::mem::IoMem; +use kernel::prelude::*; +use kernel::str::CStr; + +pub(crate) const EXCEPTION_MAP: &[(u32, &CStr)] = &[ + (0x00, c_str!("OK")), + (0x04, c_str!("TERMINATED")), + (0x05, c_str!("KABOOM")), + (0x06, c_str!("EUREKA")), + (0x08, c_str!("ACTIVE")), + (0x0f, c_str!("CS_RES_TERM")), + (0x3f, c_str!("MAX_NON_FAULT")), + (0x40, c_str!("CS_CONFIG_FAULT")), + (0x41, c_str!("CS_UNRECOVERABLE")), + (0x44, c_str!("CS_ENDPOINT_FAULT")), + (0x48, c_str!("CS_BUS_FAULT")), + (0x49, c_str!("CS_INSTR_INVALID")), + (0x4a, c_str!("CS_CALL_STACK_OVERFLOW")), + (0x4b, c_str!("CS_INHERIT_FAULT")), + (0x50, c_str!("INSTR_INVALID_PC")), + (0x51, c_str!("INSTR_INVALID_ENC")), + (0x55, c_str!("INSTR_BARRIER_FAULT")), + (0x58, c_str!("DATA_INVALID_FAULT")), + (0x59, c_str!("TILE_RANGE_FAULT")), + (0x5a, c_str!("ADDR_RANGE_FAULT")), + (0x5b, c_str!("IMPRECISE_FAULT")), + (0x60, c_str!("OOM")), + (0x68, c_str!("CSF_FW_INTERNAL_ERROR")), + (0x69, c_str!("CSF_RES_EVICTION_TIMEOUT")), + (0x80, c_str!("GPU_BUS_FAULT")), + (0x88, c_str!("GPU_SHAREABILITY_FAULT")), + (0x89, c_str!("SYS_SHAREABILITY_FAULT")), + (0x8a, c_str!("GPU_CACHEABILITY_FAULT")), + (0xc0, c_str!("TRANSLATION_FAULT_0")), + (0xc1, c_str!("TRANSLATION_FAULT_1")), + (0xc2, c_str!("TRANSLATION_FAULT_2")), + (0xc3, c_str!("TRANSLATION_FAULT_3")), + (0xc4, c_str!("TRANSLATION_FAULT_4")), + (0xc8, c_str!("PERM_FAULT_0")), + (0xc9, c_str!("PERM_FAULT_1")), + (0xca, c_str!("PERM_FAULT_2")), + (0xcb, c_str!("PERM_FAULT_3")), + (0xd9, c_str!("ACCESS_FLAG_1")), + (0xda, c_str!("ACCESS_FLAG_2")), + (0xdb, c_str!("ACCESS_FLAG_3")), + (0xe0, c_str!("ADDR_SIZE_FAULT_IN")), + (0xe4, c_str!("ADDR_SIZE_FAULT_OUT0")), + (0xe5, c_str!("ADDR_SIZE_FAULT_OUT1")), + (0xe6, c_str!("ADDR_SIZE_FAULT_OUT2")), + (0xe7, c_str!("ADDR_SIZE_FAULT_OUT3")), + (0xe8, c_str!("MEM_ATTR_FAULT_0")), + (0xe9, c_str!("MEM_ATTR_FAULT_1")), + (0xea, c_str!("MEM_ATTR_FAULT_2")), + (0xeb, c_str!("MEM_ATTR_FAULT_3")), +]; + +pub(crate) fn get_exception_name(code: u32) -> &'static CStr { + for &(exception_code, name) in EXCEPTION_MAP { + if exception_code == code { + return name; + } + } + c_str!("UNKNOWN") +} + +pub(crate) fn access_type_name(fault_status: u32) -> &'static str { + match fault_status & AS_FAULTSTATUS_ACCESS_TYPE_MASK { + AS_FAULTSTATUS_ACCESS_TYPE_ATOMIC => "ATOMIC", + AS_FAULTSTATUS_ACCESS_TYPE_READ => "READ", + AS_FAULTSTATUS_ACCESS_TYPE_WRITE => "WRITE", + AS_FAULTSTATUS_ACCESS_TYPE_EX => "EXECUTE", + _ => "UNKNOWN", + } +} + +/// Decodes a MMU fault, printing a message to the kernel log. +pub(super) fn decode_faults(mut status: u32, iomem: &Devres<IoMem>) -> Result { + while status != 0 { + let as_index = (status | (status >> 16)).trailing_zeros(); + let mask = kernel::bits::bit_u32(as_index); + + let mut addr: u64; + + let fault_status: u32 = as_faultstatus(as_index as usize).unwrap().read(iomem)?; + addr = as_faultaddress_lo(as_index as usize).unwrap().read(iomem)? as u64; + addr |= (as_faultaddress_hi(as_index as usize).unwrap().read(iomem)? as u64) << 32; + + let exception_type: u32 = fault_status & 0xff; + let access_type: u32 = (fault_status >> 8) & 0x3; + let source_id: u32 = fault_status >> 16; + + pr_err!( + "Unhandled Page fault in AS{} at VA 0x{:016X}\n\ + raw fault status: 0x{:X}\n\ + decoded fault status: {}\n\ + exception type 0x{:X}: {}\n\ + access type 0x{:X}: {}\n\ + source id 0x{:X}\n", + as_index, + addr, + fault_status, + if fault_status & (1 << 10) != 0 { + "DECODER FAULT" + } else { + "SLAVE FAULT" + }, + exception_type, + get_exception_name(exception_type), + access_type, + access_type_name(fault_status), + source_id + ); + + // Update status to process the next fault + status &= !mask; + } + + Ok(()) +} diff --git a/drivers/gpu/drm/tyr/mmu/irq.rs b/drivers/gpu/drm/tyr/mmu/irq.rs new file mode 100644 index 0000000000000000000000000000000000000000..1e8362c47524c1fa6f74ef3a88e3799838ad18fb --- /dev/null +++ b/drivers/gpu/drm/tyr/mmu/irq.rs @@ -0,0 +1,66 @@ +// SPDX-License-Identifier: GPL-2.0 or MIT + +//! MMU IRQ handler. +//! +//! The interrupts return, among many other things, information about faulting +//! addresses. + +use kernel::c_str; +use kernel::devres::Devres; +use kernel::io::mem::IoMem; +use kernel::irq::request::IrqReturn; +use kernel::irq::Registration; +use kernel::platform; +use kernel::prelude::*; +use kernel::sync::Arc; +use kernel::types::ARef; + +use crate::driver::TyrDevice; +use crate::mmu::decode_faults; +use crate::regs; + +pub(crate) struct MmuIrqHandler { + _tdev: ARef<TyrDevice>, + iomem: Arc<Devres<IoMem>>, +} + +impl kernel::irq::Handler for MmuIrqHandler { + fn handle_irq(&self) -> IrqReturn { + let rawstat = regs::MMU_INT_RAWSTAT.read(&self.iomem).unwrap_or_default(); + + pr_info!("Acknowledging MMU_INT_RAWSTAT: {:#x}\n", rawstat); + let _ = regs::MMU_INT_CLEAR.write(&self.iomem, rawstat); + + let status = rawstat & kernel::bits::genmask_u32(15, 0); + + let _ = decode_faults(status, &self.iomem); + + IrqReturn::Handled + } +} + +pub(crate) fn mmu_irq_init( + tdev: ARef<TyrDevice>, + pdev: platform::Device, + iomem: Arc<Devres<IoMem>>, +) -> Result<impl PinInit<Registration<MmuIrqHandler>, Error>> { + let mmu_irq = pdev.irq_by_name(c_str!("mmu"))?; + + let irq_handler = MmuIrqHandler { + _tdev: tdev, + iomem: iomem.clone(), + }; + + // Lets disable IRQs in favor of explicit polling for now due to issues with + // SpinLockIrq and CondVar. + // + // MMU_INT_MASK.write(&iomem, core::u32::MAX)?; + regs::MMU_INT_MASK.write(&iomem, 0)?; + + Ok(Registration::register( + mmu_irq, + kernel::irq::request::flags::SHARED, + c_str!("tyr-mmu"), + irq_handler, + )) +} diff --git a/drivers/gpu/drm/tyr/mmu/slot_allocator.rs b/drivers/gpu/drm/tyr/mmu/slot_allocator.rs new file mode 100644 index 0000000000000000000000000000000000000000..8ba5f8cd3d08a79d3fb96bf74899728f2a1f03bd --- /dev/null +++ b/drivers/gpu/drm/tyr/mmu/slot_allocator.rs @@ -0,0 +1,59 @@ +// SPDX-License-Identifier: GPL-2.0 or MIT + +//! All VMs have to be placed on a physical slot to become active. This file +//! implements an allocator to track which slots are active, and later to evict +//! the least recently used one if needed. +//! +//! Implementing this allocator is a TODO. For now, we just return EBUSY when +//! all slots are taken, and slots are never freed once inactive. + +// /// Alocates HW AS slots, which represent a physical slot where a VM can be +// /// placed in. +// /// +// /// Panthor keeps a LRU list for the purposes of evicting VMs when a slot is +// /// requested but no one is free. We defer this to a future implementation. +// /// +// /// Note that this is still TODO: this type doesn't yet track any VMs. +// struct SlotAllocator { +// /// How many slots are free. +// free_mask: u32, +// } + +// impl SlotAllocator { +// fn alloc_slot(allocator: Arc<Mutex<Self>>, vm: &mut Vm) { +// let mut alloc = allocator.lock(); +// let slot = alloc.free_mask.trailing_zeros(); + +// if slot < 32 { +// alloc.free_mask |= 1 << slot; +// let slot_allocation = SlotAllocation { +// allocator: allocator.clone(), +// slot: slot as u8, +// }; +// vm.binding = Some(slot_allocation); +// } +// } + +// fn free_slot(vm: &mut Vm) { +// vm.binding = None; +// } +// } + +// /// Represents a slot allocation. +// /// +// /// This type returns the slot to the allocator once it is dropped. +// /// +// /// +// /// Note that this is still TODO: this type doesn't yet track any VMs. +// struct SlotAllocation { +// /// The allocator that allocated this slot. +// allocator: Arc<Mutex<SlotAllocator>>, +// /// The actual slot value. +// slot: u8, +// } + +// impl Drop for SlotAllocation { +// fn drop(&mut self) { +// self.allocator.lock().free_mask &= !(1 << self.slot); +// } +// } diff --git a/drivers/gpu/drm/tyr/mmu/vm.rs b/drivers/gpu/drm/tyr/mmu/vm.rs new file mode 100644 index 0000000000000000000000000000000000000000..a96880a4a7a18501a344d71c98f8b4203879b350 --- /dev/null +++ b/drivers/gpu/drm/tyr/mmu/vm.rs @@ -0,0 +1,358 @@ +// SPDX-License-Identifier: GPL-2.0 or MIT + +//! VM management. +//! +//! VMs represent a given address space. It provides memory isolation and the +//! illusion of owning the entire VA range, much like CPU virtual memory. +//! +//! VMs can be placed into a hardware slots (i.e.: AS slots), which will make +//! them active. The number of AS's is limited, and one VM must evict an inactive +//! one if all slots are taken. In Panthor, this is implemented by keeping a LRU +//! list, but this is currently not implemented here. +//! +//! A VM is assigned an AS by means of a VM_BIND call if the request operation +//! is OP_MAP. +//! +//! If there is no unactive VM to evict, the call to VM_BIND should fail with +//! EBUSY, but note that slot management is somewhat WIP for now, as we have no +//! tests for that yet. +//! +//! AS0 is special, in the sense that it's the slot used by the firmware's VM. +//! No other VM can occupy AS0 at any time. + +use core::ops::Range; + +use gpuvm::LockedVm; +use gpuvm::StepContext; +use kernel::bindings::SZ_2M; +use kernel::c_str; +use kernel::devres::Devres; +use kernel::drm::gem::shmem; +use kernel::drm::mm; +use kernel::io::mem::IoMem; +use kernel::io_pgtable::ARM64LPAES1; +use kernel::io_pgtable::{self}; +use kernel::platform; +use kernel::prelude::*; +use kernel::sizes::SZ_4K; +use kernel::sync::Arc; +use kernel::types::ARef; + +use crate::driver::TyrDevice; +use crate::gem; +use crate::gem::DriverObject; +use crate::gem::KernelVaPlacement; +use crate::gpu::GpuInfo; +use crate::mmu::Mmu; +use crate::regs; + +mod gpuvm; +pub(crate) mod map_flags; +pub(crate) mod pool; + +// TODO: we need *all* of these in kernel::bindings. +const SZ_4G: u64 = 4 * kernel::bindings::SZ_1G as u64; + +pub(crate) struct Vm { + /// A dummy object to serve as GPUVM's root. We need ownership of this. + _dummy_obj: kernel::drm::gem::ObjectRef<shmem::Object<DriverObject>>, + + pub(super) gpuvm: ARef<kernel::drm::gpuvm::GpuVm<LockedVm>>, + + /// The AS to which this VM is bound, if any. + pub(super) address_space: Option<usize>, + + // binding: Option<SlotAllocation>, + /// The memory attributes for this VM. + pub(super) memattr: u64, + + /// The layout describing how the VM is split between user and kernel space. + _layout: VmLayout, + + /// Whether this is the MCU VM. + pub(super) for_mcu: bool, + + /// The range to automatically allocate kernel VAs from, if requested. + auto_kernel_va: Range<u64>, + + /// Whether this VM was destroyed by userspace. + /// + /// Destroyed VMs are unmapped and cannot be the target of map operations + /// anymore. + pub(super) destroyed: bool, +} + +impl Vm { + pub(super) fn create( + tdev: &TyrDevice, + pdev: platform::Device, + for_mcu: bool, + gpu_info: &GpuInfo, + layout: VmLayout, + auto_kernel_va: Range<u64>, + ) -> Result<Self> { + // We should ideally not allocate memory for this, but there is no way + // to create dummy GPUVM GEM objects for now. + // + // This is being discussed on Zulip. For now we have to waste 4k on + // this. + let dummy_obj = gem::new_dummy_object(tdev)?; + + let va_bits = gpu_info.va_bits(); + let pa_bits = gpu_info.pa_bits(); + + pr_info!( + "Creating VM with VA bits: {}, PA bits: {}\n", + va_bits, + pa_bits + ); + + let full_va_range = 1u64 << va_bits; + + let va_range = if for_mcu { 0..SZ_4G } else { 0..full_va_range }; + + let kernel_mm = mm::Allocator::new( + layout.kernel.start, + layout.kernel.end - layout.kernel.start, + (), + )?; + + let page_table = ARM64LPAES1::new( + pdev.as_ref(), + io_pgtable::Config { + pgsize_bitmap: SZ_4K | SZ_2M as usize, + ias: va_bits as usize, + oas: pa_bits as usize, + coherent_walk: false, + quirks: 0, + }, + (), + )?; + + let memattr = mair_to_memattr(page_table.cfg().mair); + + Ok(Vm { + _dummy_obj: dummy_obj.gem.clone(), + gpuvm: kernel::drm::gpuvm::GpuVm::new( + c_str!("Tyr::GpuVm"), + tdev, + &*(dummy_obj.gem), + va_range.clone(), + 0..0, + LockedVm::new(page_table, kernel_mm), + )?, + // binding: None, + address_space: None, + memattr, + _layout: layout, + for_mcu, + auto_kernel_va, + destroyed: false, + }) + } + + /// Allocs a kernel range using the MM allocator. + /// + /// Kernel VAs are used for the FW, for synchronization objects, ring + /// buffers and other kernel-only data structures. + pub(crate) fn alloc_kernel_range(&mut self, va: KernelVaPlacement) -> Result<mm::Node<(), ()>> { + let mut inner = self.gpuvm.exec_lock(None)?; + match va { + KernelVaPlacement::Auto => inner.kernel_mm.insert_node_in_range( + (), + 4096, + 4096, + 0, + self.auto_kernel_va.start, + self.auto_kernel_va.end, + mm::InsertMode::Best, + ), + KernelVaPlacement::At(va) => { + inner + .kernel_mm + .reserve_node((), va.start, va.end - va.start, 0) + } + } + } + + /// Binds a GEM object to the VM, starting at `bo_offset`. + /// + /// `va_range` controls where in the VA space the BO will be mapped to. + pub(crate) fn bind_gem( + &mut self, + iomem: Arc<Devres<IoMem>>, + bo: &gem::Object, + bo_offset: u64, + va_range: Range<u64>, + vm_map_flags: map_flags::Flags, + ) -> Result { + // XXX: do not rearrange this or it will deadlock. + // + // Sadly, `inner` will lock the reservation for `bo`, and we need + // `inner` to produce `vm_bo`. + // + // In the natural drop order, the `ARef` for `vm_bo` will attempt to + // lock the reservation to decrement the refcount, but it's already + // locked by the call that produced `inner`. + // + // We can prove the above by just enabling lockdep. + // + // This means that it's trivially easy to deadlock when obtain_bo() is + // called if the drop order is not inverted. A solution to this will + // probably be beyond the scope of this driver. This problem also + // apparently predates Rust4Linux, from what I could gather. + // + // Here we just move `vm_bo` into `ctx`, to make sure it gets dropped + // after `inner`, on top of it also being needed in the `step_map` + // callback. + // + // Note that sg_table() will also lock the reservation, so it too needs + // to come before `inner`. + let mut ctx = StepContext { + iomem, + vm_bo: None, + vm_map_flags: Some(vm_map_flags), + vm_as_nr: self.address_space, + preallocated_vas: StepContext::preallocate_vas()?, + }; + let sgt = bo.sg_table()?; + + let mut locked_vm = self.gpuvm.exec_lock(Some(bo))?; + + let vm_bo = locked_vm.obtain_bo()?; + + let mut vm_bo_guard = vm_bo.inner().sgt.lock(); + if vm_bo_guard.is_none() { + *vm_bo_guard = Some(sgt); + } + core::mem::drop(vm_bo_guard); + + ctx.vm_bo = Some(vm_bo); + locked_vm.sm_map( + &mut ctx, + va_range.start, + va_range.end - va_range.start, + bo_offset, + ) + } + + /// Unmap a given VA range. + pub(crate) fn unmap_range(&mut self, iomem: Arc<Devres<IoMem>>, range: Range<u64>) -> Result { + let mut locked_vm = self.gpuvm.exec_lock(None)?; + + let mut ctx = StepContext { + iomem, + vm_bo: None, + vm_map_flags: None, + vm_as_nr: None, + preallocated_vas: StepContext::preallocate_vas()?, + }; + + locked_vm.sm_unmap(&mut ctx, range.start, range.end - range.start) + } + + /// Flush L2 caches for the entirety of a VM's AS. + pub(crate) fn flush(&self, tdev: &TyrDevice) -> Result { + let data = tdev.data(); + let iomem = &data.iomem; + + let as_nr = self.address_space.ok_or(EINVAL)?; + let range = self.gpuvm.mm_start()..self.gpuvm.mm_range(); + Mmu::flush_range(iomem, as_nr, range) + } + + /// Unmap the whole VM. + pub(crate) fn unmap_all(&mut self, iomem: Arc<Devres<IoMem>>) -> Result { + let range = self.gpuvm.mm_start()..self.gpuvm.mm_range(); + + self.unmap_range(iomem, range)?; + self.address_space = None; + + Ok(()) + } +} + +/// 256M of every VM is reserved for kernel objects by default, i.e.: heap +/// chunks, heapcontext, ring buffers, kernel synchronization objects and etc. +/// +/// The user VA space always start at 0x0, and the kernel VA space is always +/// placed after the user VA range. +const MIN_KERNEL_VA_SIZE: u64 = 0x10000000; + +pub(crate) struct VmLayout { + /// Section reserved for user objects. + pub(crate) user: Range<u64>, + + /// Section reserved for kernel objects. + pub(crate) kernel: Range<u64>, +} + +impl VmLayout { + /// Automatically manages a layout given the a `VmSize` + pub(crate) fn from_user_sz(tdev: &TyrDevice, user_sz: VmUserSize) -> Self { + let va_bits = tdev.data().gpu_info.va_bits(); + let max_va_range = 1u64 << va_bits; + + let user; + let kernel; + + match user_sz { + VmUserSize::Auto | VmUserSize::Custom(0) => { + user = 0..max_va_range - MIN_KERNEL_VA_SIZE; + kernel = user.end..user.end + MIN_KERNEL_VA_SIZE; + } + VmUserSize::Custom(user_sz) => { + let user_sz = core::cmp::min(user_sz, max_va_range - MIN_KERNEL_VA_SIZE); + user = 0..user_sz; + kernel = user_sz..user_sz + MIN_KERNEL_VA_SIZE; + } + } + + Self { user, kernel } + } +} + +/// Controls the size of the user VA space. +pub(crate) enum VmUserSize { + /// Lets the kernel decide the user/kernel split. + Auto, + /// Sets the user VA space to a custom size. Things will crash if not enough + /// is left for kernel objects. + Custom(u64), +} + +fn as_memattr_aarch64_inner_alloc_expl(inner: bool, outer: bool) -> u8 { + ((inner as u8) << 1) | (outer as u8) +} + +fn mair_to_memattr(mair: u64) -> u64 { + let mut memattr: u64 = 0; + + for i in 0..8 { + let in_attr = (mair >> (8 * i)) as u8; + let outer = in_attr >> 4; + let inner = in_attr & 0xf; + + // For caching to be enabled, inner and outer caching policy + // have to be both write-back, if one of them is write-through + // or non-cacheable, we just choose non-cacheable. Device + // memory is also translated to non-cacheable. + let out_attr = if (outer & 3 == 0) || (outer & 4 == 0) || (inner & 4 == 0) { + regs::AS_MEMATTR_AARCH64_INNER_OUTER_NC + | regs::AS_MEMATTR_AARCH64_SH_MIDGARD_INNER + | as_memattr_aarch64_inner_alloc_expl(false, false) as u32 + } else { + // Use SH_CPU_INNER mode so SH_IS, which is used when + // IOMMU_CACHE is set, actually maps to the standard + // definition of inner-shareable and not Mali's + // internal-shareable mode. + regs::AS_MEMATTR_AARCH64_INNER_OUTER_WB + | regs::AS_MEMATTR_AARCH64_SH_CPU_INNER + | as_memattr_aarch64_inner_alloc_expl(inner & 1 != 0, inner & 2 != 0) as u32 + }; + + memattr |= (out_attr as u64) << (8 * i); + } + + memattr +} diff --git a/drivers/gpu/drm/tyr/mmu/vm/gpuvm.rs b/drivers/gpu/drm/tyr/mmu/vm/gpuvm.rs new file mode 100644 index 0000000000000000000000000000000000000000..1f3ae4a1933d9a65ce213ab8427504ae294e8e1c --- /dev/null +++ b/drivers/gpu/drm/tyr/mmu/vm/gpuvm.rs @@ -0,0 +1,327 @@ +// SPDX-License-Identifier: GPL-2.0 or MIT + +//! The GPUVM driver implementation. +//! +//! GPUVM will split a given sm_map/sm_unmap request into a series of map, unmap +//! and remap operations in order to manage the VA range. +//! +//! This file contains the driver-specific implementation, which includes the +//! map, unmap and remap driver callbacks. + +use core::ops::Range; + +use kernel::devres::Devres; +use kernel::drm::gpuvm::DriverGpuVa; +use kernel::drm::gpuvm::{self}; +use kernel::drm::mm; +use kernel::io::mem::IoMem; +use kernel::io_pgtable::IoPageTable; +use kernel::io_pgtable::ARM64LPAES1; +use kernel::new_mutex; +use kernel::prelude::*; +use kernel::sync::Arc; +use kernel::sync::Mutex; +use kernel::types::ARef; + +use crate::driver; +use crate::gem; +use crate::mmu::vm; +use crate::mmu::Mmu; + +/// A convenience so that we do not have to spell this whole thing out every +/// time. +type PinnedVa = Pin<KBox<gpuvm::GpuVa<LockedVm>>>; + +/// A context that is passed throughout the map/unmap/remap steps. +pub(in crate::mmu) struct StepContext { + /// The Vm <=> BO connection, + pub(super) vm_bo: Option<ARef<gpuvm::GpuVmBo<LockedVm>>>, + + /// The used when mapping the VM that we are doing the steps on. + pub(super) vm_map_flags: Option<vm::map_flags::Flags>, + + /// The address space number for the VM we are executing the operations on. + pub(super) vm_as_nr: Option<usize>, + + /// We may need to access the MMIO region when performing the steps. + pub(super) iomem: Arc<Devres<IoMem>>, + + /// This handles the remap case. + /// + /// Partial unmap requests or map requests overlapping existing mappings + /// will trigger a remap call, which needs to register up to three VA + /// objects (one for the new mapping, and two for the previous and next + /// mappings). + pub(super) preallocated_vas: [Option<PinnedVa>; 3], +} + +impl StepContext { + /// Finds one of our pre-allocated VAs. + /// + /// It is a logic error to call this more than three times for a given + /// StepContext. + fn preallocated_va(&mut self) -> Result<PinnedVa> { + self.preallocated_vas + .iter_mut() + .find_map(|f| f.take()) + .ok_or(EINVAL) + } + + pub(super) fn preallocate_vas() -> Result<[Option<PinnedVa>; 3]> { + Ok([ + Some(gpuvm::GpuVa::<LockedVm>::new(init::zeroed())?), + Some(gpuvm::GpuVa::<LockedVm>::new(init::zeroed())?), + Some(gpuvm::GpuVa::<LockedVm>::new(init::zeroed())?), + ]) + } +} + +pub(crate) struct GpuVa {/* TODO */} +unsafe impl init::Zeroable for GpuVa {} + +impl DriverGpuVa for GpuVa {} + +/// A state that can only be accessed when the GPUVM is locked. +pub(in crate::mmu) struct LockedVm { + /// The page table for this VM. + pub(in crate::mmu) page_table: ARM64LPAES1<Mmu>, + /// The allocator keeping track of what ranges are in use for the kernel VA + /// range. + pub(super) kernel_mm: mm::Allocator<(), ()>, +} + +impl LockedVm { + pub(super) fn new( + page_table: ARM64LPAES1<Mmu>, + kernel_mm: mm::Allocator<(), ()>, + ) -> impl Init<Self> { + init!(LockedVm { + page_table, + kernel_mm, + }) + } + + fn unmap_pages( + &mut self, + iomem: &Devres<IoMem>, + as_nr: Option<usize>, + iova: Range<u64>, + ) -> Result { + let mut total_unmapped = 0; + let size = iova.end - iova.start; + + while total_unmapped < size { + let pgsize = 4096; + let pgcount = (size - total_unmapped).div_ceil(pgsize); + + let unmapped_sz = + self.page_table + .unmap_pages(iova.start as usize, pgsize as usize, pgcount as usize); + + if unmapped_sz as u64 != pgsize * pgcount { + let range = iova.start..iova.start + total_unmapped + unmapped_sz as u64; + + pr_err!( + "AS ({:#?}): failed to unmap range {:#x} - {:#x}, unmapped only {:#x} bytes\n", + as_nr, + iova.start, + iova.start + size, + unmapped_sz, + ); + + if let Some(as_nr) = as_nr { + Mmu::flush_range(iomem, as_nr, range)?; + } + + return Err(EINVAL); + } + + pr_info!( + "AS ({:#?}): unmapped {} bytes, iova: {:#x}, pgsize: {}, pgcount: {}, len: {}\n", + as_nr, + unmapped_sz, + iova.start, + pgsize, + pgcount, + size + ); + + total_unmapped += unmapped_sz as u64; + } + + if let Some(as_nr) = as_nr { + Mmu::flush_range(iomem, as_nr, iova)?; + } + + Ok(()) + } +} + +impl gpuvm::DriverGpuVm for LockedVm { + type Driver = driver::TyrDriver; + type GpuVmBo = VmBo; + type StepContext = StepContext; + + type GpuVa = GpuVa; + + fn step_map( + self: &mut gpuvm::UpdatingGpuVm<'_, Self>, + op: &mut gpuvm::OpMap<Self>, + ctx: &mut Self::StepContext, + ) -> Result { + // This is the mapping algorithm from Asahi. + + let mut iova = op.addr(); + let mut left = op.range() as usize; + let mut offset = op.offset() as usize; + let gpuva = ctx.preallocated_va()?; + + let vm_bo = ctx.vm_bo.as_ref().ok_or(EINVAL)?; + let sgt = vm_bo.inner().sgt.lock(); + let prot = ctx.vm_map_flags.ok_or(EINVAL)?.to_prot(); + + pr_info!("mapping {} bytes, iova: {:#x}, prot {}\n", left, iova, prot); + + for range in sgt + .as_ref() + .expect("SGT should be set before step_map") + .iter() + { + let mut addr = range.dma_address(); + let mut len = range.dma_len(); + + if left == 0 { + break; + } + + if offset > 0 { + let skip = len.min(offset); + addr += skip; + len -= skip; + offset -= skip; + } + + if len == 0 { + continue; + } + + assert!(offset == 0); + + len = len.min(left); + + let pgsize = 4096; + let pgcount = len.div_ceil(pgsize); + + let _ = self.page_table.map_pages( + iova as usize, + addr, + pgsize as usize, + pgcount as usize, + prot, + )?; + + left -= len; + iova += len as u64; + } + + if op + .map_and_link_va( + self, + gpuva, + ctx.vm_bo.as_ref().expect("step_map with no BO"), + ) + .is_err() + { + pr_err!( + "map_and_link_va failed: {:#x} [{:#x}] -> {:#x}\n", + op.offset(), + op.range(), + op.addr() + ); + return Err(EINVAL); + } + Ok(()) + } + + fn step_unmap( + self: &mut gpuvm::UpdatingGpuVm<'_, Self>, + op: &mut gpuvm::OpUnMap<Self>, + ctx: &mut Self::StepContext, + ) -> Result { + // This is always set by drm_gpuvm.c:op_unmap_cb(), not sure why it's an + // Option. + // + // XXX: discuss this with everybody else + let va = op.va().expect("This is always set by GPUVM"); + let iova = va.addr()..va.addr() + va.range(); + + self.unmap_pages(&ctx.iomem, ctx.vm_as_nr, iova)?; + + let _ = op.unmap_and_unlink_va().ok_or(EINVAL)?; + Ok(()) + } + + fn step_remap( + self: &mut gpuvm::UpdatingGpuVm<'_, Self>, + op: &mut gpuvm::OpReMap<Self>, + _vm_bo: &gpuvm::GpuVmBo<Self>, + ctx: &mut Self::StepContext, + ) -> Result { + let prev_va = ctx.preallocated_va()?; + let next_va = ctx.preallocated_va()?; + let vm_bo = ctx.vm_bo.as_ref().ok_or(EINVAL)?; + + let va = op.unmap().va().ok_or(EINVAL)?; + let orig_addr = va.addr(); + let orig_range: u64 = va.range(); + + // Only unmap the hole between prev/next, if they exist + let unmap_start = if let Some(op) = op.prev_map() { + op.addr() + op.range() + } else { + orig_addr + }; + + let unmap_end = if let Some(op) = op.next_map() { + op.addr() + } else { + orig_addr + orig_range + }; + + let unmap_range = unmap_start..unmap_end; + + self.unmap_pages(&ctx.iomem, ctx.vm_as_nr, unmap_range)?; + let _ = op.unmap().unmap_and_unlink_va().ok_or(EINVAL)?; + + if let Some(prev_op) = op.prev_map() { + if prev_op.map_and_link_va(self, prev_va, vm_bo).is_err() { + pr_err!("step_remap: could not relink prev gpuva\n"); + return Err(EINVAL); + } + } + + if let Some(next_op) = op.next_map() { + if next_op.map_and_link_va(self, next_va, vm_bo).is_err() { + pr_err!("step_remap: could not relink next gpuva\n"); + return Err(EINVAL); + } + } + + Ok(()) + } +} + +/// Data associated with a VM <=> BO pairing +#[pin_data] +pub(in crate::mmu) struct VmBo { + #[pin] + pub(super) sgt: Mutex<Option<gem::SGTable>>, +} + +impl gpuvm::DriverGpuVmBo for VmBo { + fn new() -> impl PinInit<Self> { + pin_init!(VmBo { + sgt <- new_mutex!(None, "VmBinding"), + }) + } +} diff --git a/drivers/gpu/drm/tyr/mmu/vm/map_flags.rs b/drivers/gpu/drm/tyr/mmu/vm/map_flags.rs new file mode 100644 index 0000000000000000000000000000000000000000..88f2f1b9126ff10f41e3473f385c68ddc01aecf7 --- /dev/null +++ b/drivers/gpu/drm/tyr/mmu/vm/map_flags.rs @@ -0,0 +1,66 @@ +use kernel::bits::bit_u32; +use kernel::io_pgtable; +use kernel::prelude::*; + +use crate::impl_flags; + +impl_flags!(Flags, Flag, u32); + +impl Flags { + /// Convert the flags to `io_pgtable::prot`. + pub(super) fn to_prot(&self) -> u32 { + let mut prot = 0; + + if self.contains(READONLY) { + prot |= io_pgtable::prot::READ; + } else { + prot |= io_pgtable::prot::READ | io_pgtable::prot::WRITE; + } + + if self.contains(NOEXEC) { + prot |= io_pgtable::prot::NOEXEC; + } + + if !self.contains(UNCACHED) { + prot |= io_pgtable::prot::CACHE; + } + + prot + } +} + +pub(crate) const READONLY: Flag = Flag(bit_u32(1)); +pub(crate) const NOEXEC: Flag = Flag(bit_u32(2)); +pub(crate) const UNCACHED: Flag = Flag(bit_u32(3)); + +impl core::fmt::Display for Flags { + fn fmt(&self, f: &mut core::fmt::Formatter<'_>) -> core::fmt::Result { + if self.contains(READONLY) { + write!(f, "| READONLY")?; + } + if self.contains(NOEXEC) { + write!(f, " | NOEXEC")?; + } + + if self.contains(UNCACHED) { + write!(f, " | UNCACHED")?; + } + + Ok(()) + } +} + +impl TryFrom<u32> for Flags { + type Error = Error; + + fn try_from(value: u32) -> core::result::Result<Self, Self::Error> { + let valid = Flags::from(READONLY) | Flags::from(NOEXEC) | Flags::from(UNCACHED); + + if value & !valid.0 != value { + pr_err!("Invalid VM map flags: {:#x}\n", value); + Err(EINVAL) + } else { + Ok(Self(value)) + } + } +} diff --git a/drivers/gpu/drm/tyr/mmu/vm/pool.rs b/drivers/gpu/drm/tyr/mmu/vm/pool.rs new file mode 100644 index 0000000000000000000000000000000000000000..4d948badb0dfd30206d9252aab5c21c37d37c122 --- /dev/null +++ b/drivers/gpu/drm/tyr/mmu/vm/pool.rs @@ -0,0 +1,67 @@ +// SPDX-License-Identifier: GPL-2.0 or MIT + +//! VMs created by userspace are placed in a pool so they can be find by other +//! VM ioctls like VM_BIND or VM_DESTROY. + +use kernel::devres::Devres; +use kernel::io::mem::IoMem; +use kernel::prelude::*; +use kernel::sync::Arc; +use kernel::sync::Mutex; +use kernel::xarray; +use kernel::xarray::XArray; + +use crate::driver::TyrDevice; +use crate::mmu::vm::Vm; +use crate::mmu::vm::VmLayout; + +/// The pool for user VMs. +pub(crate) struct Pool { + xa: XArray<Arc<Mutex<Vm>>>, +} + +impl Pool { + pub(crate) fn create() -> Self { + Self { + xa: XArray::new(xarray::flags::ALLOC1), + } + } + + fn xa(self: Pin<&Self>) -> Pin<&XArray<Arc<Mutex<Vm>>>> { + // SAFETY: We're projecting this field and never move out of it. + unsafe { self.map_unchecked(|p| &p.xa) } + } + + pub(crate) fn create_vm(self: Pin<&Self>, tdev: &TyrDevice, layout: VmLayout) -> Result<usize> { + let data = tdev.data(); + let auto_kernel_va = layout.kernel.clone(); + + let vm = { + let mut mmu = data.mmu.lock(); + mmu.create_vm( + tdev, + data.pdev.clone(), + &data.gpu_info, + false, + layout, + auto_kernel_va, + )? + }; + + self.xa().alloc_limits(vm, 1, 32) + } + + pub(crate) fn get_vm(self: Pin<&Self>, index: usize) -> Option<Arc<Mutex<Vm>>> { + // Get a reference immediately so we can drop the XArray spinlock. + let vm = self.xa().get(index)?; + Some(Arc::from(vm.borrow())) + } + + pub(crate) fn destroy_vm(self: Pin<&Self>, index: usize, iomem: Arc<Devres<IoMem>>) -> Result { + let vm = self.xa().remove(index).ok_or(EINVAL)?; + let mut vm = vm.lock(); + + vm.destroyed = true; + vm.unmap_all(iomem) + } +} diff --git a/drivers/gpu/drm/tyr/regs.rs b/drivers/gpu/drm/tyr/regs.rs new file mode 100644 index 0000000000000000000000000000000000000000..f012db6156a16372e5bd69c5ae4bed0156caf3c1 --- /dev/null +++ b/drivers/gpu/drm/tyr/regs.rs @@ -0,0 +1,255 @@ +// SPDX-License-Identifier: GPL-2.0 or MIT + +#![allow(dead_code)] + +use kernel::bits::bit_u64; +use kernel::devres::Devres; +use kernel::io::mem::IoMem; +use kernel::{bits::bit_u32, prelude::*}; + +/// Represents a register in the Register Set +pub(crate) struct Register<const OFFSET: usize>; + +impl<const OFFSET: usize> Register<OFFSET> { + #[inline] + pub(crate) fn read(&self, iomem: &Devres<IoMem>) -> Result<u32> { + (*iomem).try_access().ok_or(ENODEV)?.try_readl(OFFSET) + } + + #[inline] + pub(crate) fn write(&self, iomem: &Devres<IoMem>, value: u32) -> Result<()> { + (*iomem) + .try_access() + .ok_or(ENODEV)? + .try_writel(value, OFFSET) + } +} + +pub(crate) const GPU_ID: Register<0x0> = Register; +pub(crate) const GPU_L2_FEATURES: Register<0x4> = Register; +pub(crate) const GPU_CORE_FEATURES: Register<0x8> = Register; +pub(crate) const GPU_CSF_ID: Register<0x1c> = Register; +pub(crate) const GPU_REVID: Register<0x280> = Register; +pub(crate) const GPU_TILER_FEATURES: Register<0xc> = Register; +pub(crate) const GPU_MEM_FEATURES: Register<0x10> = Register; +pub(crate) const GPU_MMU_FEATURES: Register<0x14> = Register; +pub(crate) const GPU_AS_PRESENT: Register<0x18> = Register; +pub(crate) const GPU_INT_RAWSTAT: Register<0x20> = Register; + +pub(crate) const GPU_INT_RAWSTAT_FAULT: u32 = bit_u32(0); +pub(crate) const GPU_INT_RAWSTAT_PROTECTED_FAULT: u32 = bit_u32(1); +pub(crate) const GPU_INT_RAWSTAT_RESET_COMPLETED: u32 = bit_u32(8); +pub(crate) const GPU_INT_RAWSTAT_POWER_CHANGED_SINGLE: u32 = bit_u32(9); +pub(crate) const GPU_INT_RAWSTAT_POWER_CHANGED_ALL: u32 = bit_u32(10); +pub(crate) const GPU_INT_RAWSTAT_CLEAN_CACHES_COMPLETED: u32 = bit_u32(17); +pub(crate) const GPU_INT_RAWSTAT_DOORBELL_STATUS: u32 = bit_u32(18); +pub(crate) const GPU_INT_RAWSTAT_MCU_STATUS: u32 = bit_u32(19); + +pub(crate) const GPU_INT_CLEAR: Register<0x24> = Register; +pub(crate) const GPU_INT_MASK: Register<0x28> = Register; +pub(crate) const GPU_CMD: Register<0x30> = Register; +pub(crate) const GPU_THREAD_FEATURES: Register<0xac> = Register; +pub(crate) const GPU_THREAD_MAX_THREADS: Register<0xa0> = Register; +pub(crate) const GPU_THREAD_MAX_WORKGROUP_SIZE: Register<0xa4> = Register; +pub(crate) const GPU_THREAD_MAX_BARRIER_SIZE: Register<0xa8> = Register; +pub(crate) const GPU_TEXTURE_FEATURES0: Register<0xb0> = Register; +pub(crate) const GPU_SHADER_PRESENT_LO: Register<0x100> = Register; +pub(crate) const GPU_SHADER_PRESENT_HI: Register<0x104> = Register; +pub(crate) const GPU_TILER_PRESENT_LO: Register<0x110> = Register; +pub(crate) const GPU_TILER_PRESENT_HI: Register<0x114> = Register; +pub(crate) const GPU_L2_PRESENT_LO: Register<0x120> = Register; +pub(crate) const GPU_L2_PRESENT_HI: Register<0x124> = Register; +pub(crate) const L2_READY_LO: Register<0x160> = Register; +pub(crate) const L2_READY_HI: Register<0x164> = Register; +pub(crate) const L2_PWRON_LO: Register<0x1a0> = Register; +pub(crate) const L2_PWRON_HI: Register<0x1a4> = Register; +pub(crate) const L2_PWRTRANS_LO: Register<0x220> = Register; +pub(crate) const L2_PWRTRANS_HI: Register<0x204> = Register; +pub(crate) const L2_PWRACTIVE_LO: Register<0x260> = Register; +pub(crate) const L2_PWRACTIVE_HI: Register<0x264> = Register; + +pub(crate) const MCU_CONTROL: Register<0x700> = Register; +pub(crate) const MCU_CONTROL_ENABLE: u32 = 1; +pub(crate) const MCU_CONTROL_AUTO: u32 = 2; +pub(crate) const MCU_CONTROL_DISABLE: u32 = 0; + +pub(crate) const MCU_STATUS: Register<0x704> = Register; +pub(crate) const MCU_STATUS_DISABLED: u32 = 0; +pub(crate) const MCU_STATUS_ENABLED: u32 = 1; +pub(crate) const MCU_STATUS_HALT: u32 = 2; +pub(crate) const MCU_STATUS_FATAL: u32 = 3; + +pub(crate) const GPU_COHERENCY_FEATURES: Register<0x300> = Register; + +pub(crate) const JOB_INT_RAWSTAT: Register<0x1000> = Register; +pub(crate) const JOB_INT_CLEAR: Register<0x1004> = Register; +pub(crate) const JOB_INT_MASK: Register<0x1008> = Register; + +pub(crate) const JOB_INT_GLOBAL_IF: u32 = bit_u32(31); + +pub(crate) const MMU_INT_RAWSTAT: Register<0x2000> = Register; +pub(crate) const MMU_INT_CLEAR: Register<0x2004> = Register; +pub(crate) const MMU_INT_MASK: Register<0x2008> = Register; + +pub(crate) const AS_TRANSCFG_ADRMODE_UNMAPPED: u64 = bit_u64(0); +pub(crate) const AS_TRANSCFG_ADRMODE_IDENTITY: u64 = bit_u64(1); +pub(crate) const AS_TRANSCFG_ADRMODE_AARCH64_4K: u64 = bit_u64(2) | bit_u64(1); +pub(crate) const AS_TRANSCFG_ADRMODE_AARCH64_64K: u64 = bit_u64(3); +pub(crate) const fn as_transcfg_ina_bits(x: u64) -> u64 { + x << 6 +} +pub(crate) const fn as_transcfg_outa_bits(x: u64) -> u64 { + x << 14 +} +pub(crate) const AS_TRANSCFG_SL_CONCAT: u64 = bit_u64(22); +pub(crate) const AS_TRANSCFG_PTW_MEMATTR_NC: u64 = bit_u64(24); +pub(crate) const AS_TRANSCFG_PTW_MEMATTR_WB: u64 = bit_u64(25); +pub(crate) const AS_TRANSCFG_PTW_SH_NS: u64 = 0 << 28; +pub(crate) const AS_TRANSCFG_PTW_SH_OS: u64 = bit_u64(29); +pub(crate) const AS_TRANSCFG_PTW_SH_IS: u64 = bit_u64(29) | bit_u64(28); +pub(crate) const AS_TRANSCFG_PTW_RA: u64 = bit_u64(30); +pub(crate) const AS_TRANSCFG_DISABLE_HIER_AP: u64 = bit_u64(33); +pub(crate) const AS_TRANSCFG_DISABLE_AF_FAULT: u64 = bit_u64(34); +pub(crate) const AS_TRANSCFG_WXN: u64 = bit_u64(35); + +pub(crate) const MMU_BASE: usize = 0x2400; +pub(crate) const MMU_AS_SHIFT: usize = 6; + +const fn mmu_as(as_nr: usize) -> usize { + MMU_BASE + (as_nr << MMU_AS_SHIFT) +} + +pub(crate) struct AsRegister(usize); + +impl AsRegister { + fn new(as_nr: usize, offset: usize) -> Result<Self> { + if as_nr >= 32 { + Err(EINVAL) + } else { + Ok(AsRegister(mmu_as(as_nr) + offset)) + } + } + + #[inline] + pub(crate) fn read(&self, iomem: &Devres<IoMem>) -> Result<u32> { + (*iomem) + .try_access() + .ok_or(ENODEV)? + .try_readl(self.0 as usize) + } + + #[inline] + pub(crate) fn write(&self, iomem: &Devres<IoMem>, value: u32) -> Result<()> { + (*iomem) + .try_access() + .ok_or(ENODEV)? + .try_writel(value, self.0 as usize) + } +} + +pub(crate) fn as_transtab_lo(as_nr: usize) -> Result<AsRegister> { + AsRegister::new(as_nr, 0x0) +} + +pub(crate) fn as_transtab_hi(as_nr: usize) -> Result<AsRegister> { + AsRegister::new(as_nr, 0x4) +} + +pub(crate) fn as_memattr_lo(as_nr: usize) -> Result<AsRegister> { + AsRegister::new(as_nr, 0x8) +} + +pub(crate) fn as_memattr_hi(as_nr: usize) -> Result<AsRegister> { + AsRegister::new(as_nr, 0xc) +} + +pub(crate) fn as_lockaddr_lo(as_nr: usize) -> Result<AsRegister> { + AsRegister::new(as_nr, 0x10) +} + +pub(crate) fn as_lockaddr_hi(as_nr: usize) -> Result<AsRegister> { + AsRegister::new(as_nr, 0x14) +} + +pub(crate) fn as_command(as_nr: usize) -> Result<AsRegister> { + AsRegister::new(as_nr, 0x18) +} + +pub(crate) fn as_faultstatus(as_nr: usize) -> Result<AsRegister> { + AsRegister::new(as_nr, 0x1c) +} + +pub(crate) const AS_FAULTSTATUS_ACCESS_TYPE_MASK: u32 = 0x3 << 8; +pub(crate) const AS_FAULTSTATUS_ACCESS_TYPE_ATOMIC: u32 = 0x0 << 8; +pub(crate) const AS_FAULTSTATUS_ACCESS_TYPE_EX: u32 = 0x1 << 8; +pub(crate) const AS_FAULTSTATUS_ACCESS_TYPE_READ: u32 = 0x2 << 8; +pub(crate) const AS_FAULTSTATUS_ACCESS_TYPE_WRITE: u32 = 0x3 << 8; + +pub(crate) fn as_faultaddress_lo(as_nr: usize) -> Result<AsRegister> { + AsRegister::new(as_nr, 0x20) +} + +pub(crate) fn as_faultaddress_hi(as_nr: usize) -> Result<AsRegister> { + AsRegister::new(as_nr, 0x24) +} + +pub(crate) const AS_COMMAND_NOP: u32 = 0; +pub(crate) const AS_COMMAND_UPDATE: u32 = 1; +pub(crate) const AS_COMMAND_LOCK: u32 = 2; +pub(crate) const AS_COMMAND_UNLOCK: u32 = 3; +pub(crate) const AS_COMMAND_FLUSH_PT: u32 = 4; +pub(crate) const AS_COMMAND_FLUSH_MEM: u32 = 5; + +pub(crate) fn as_status(as_nr: usize) -> Result<AsRegister> { + AsRegister::new(as_nr, 0x28) +} + +pub(crate) const AS_STATUS_ACTIVE: u32 = bit_u32(0); + +pub(crate) fn as_transcfg_lo(as_nr: usize) -> Result<AsRegister> { + AsRegister::new(as_nr, 0x30) +} +pub(crate) fn as_transcfg_hi(as_nr: usize) -> Result<AsRegister> { + AsRegister::new(as_nr, 0x34) +} + +pub(crate) const AS_LOCK_REGION_MIN_SIZE: u32 = bit_u32(15); + +pub(crate) const AS_MEMATTR_AARCH64_INNER_ALLOC_IMPL: u32 = 2 << 2; + +pub(crate) fn as_memattr_aarch64_inner_alloc_expl(w: bool, r: bool) -> u32 { + (3 << 2) | ((w as u32) << 0) | ((r as u32) << 1) +} +pub(crate) const AS_MEMATTR_AARCH64_SH_MIDGARD_INNER: u32 = 0 << 4; +pub(crate) const AS_MEMATTR_AARCH64_SH_CPU_INNER: u32 = 1 << 4; +pub(crate) const AS_MEMATTR_AARCH64_SH_CPU_INNER_SHADER_COH: u32 = 2 << 4; +pub(crate) const AS_MEMATTR_AARCH64_SHARED: u32 = 0 << 6; +pub(crate) const AS_MEMATTR_AARCH64_INNER_OUTER_NC: u32 = 1 << 6; +pub(crate) const AS_MEMATTR_AARCH64_INNER_OUTER_WB: u32 = 2 << 6; +pub(crate) const AS_MEMATTR_AARCH64_FAULT: u32 = 3 << 6; + +pub(crate) struct Doorbell(usize); + +impl Doorbell { + pub(crate) fn new(doorbell_id: usize) -> Self { + Doorbell(0x80000 + (doorbell_id * 0x10000)) + } + + #[inline] + pub(crate) fn read(&self, iomem: &Devres<IoMem>) -> Result<u32> { + (*iomem) + .try_access() + .ok_or(ENODEV)? + .try_readl(self.0 as usize) + } + + #[inline] + pub(crate) fn write(&self, iomem: &Devres<IoMem>, value: u32) -> Result<()> { + (*iomem) + .try_access() + .ok_or(ENODEV)? + .try_writel(value, self.0 as usize) + } +} + +pub(crate) const CSF_GLB_DOORBELL_ID: usize = 0; diff --git a/drivers/gpu/drm/tyr/tyr.rs b/drivers/gpu/drm/tyr/tyr.rs new file mode 100644 index 0000000000000000000000000000000000000000..55ba10a00d1b6090c4876d1d79b4370b779ab455 --- /dev/null +++ b/drivers/gpu/drm/tyr/tyr.rs @@ -0,0 +1,53 @@ +// SPDX-License-Identifier: GPL-2.0 or MIT + +//!Rust driver for ARM Mali CSF-based GPUs +//! +//! The skeleton is basically taken from Nova and also rust_platform_driver.rs. +//! +//! So far, this is just a very early-stage experiment, but it looks promissing: +//! +//! - We use the same uAPI as Panthor, although this needs a bit of work, since +//! bindgen cannot translate #defines into Rust. +//! +//! - The DRM registration and a few IOCTLs are implemented. There is an igt +//! branch with tests. +//! +//! - Basic iomem and register set implementation, so it's possible to program +//! the device. +//! +//! - IRQ handling, so we can receive notifications from the device. +//! +//! - We can boot the firmware. +//! +//! - We can communicate with CSF using the global interface. We can submit +//! requests and the MCU will appropriately respond in the ack field. +//! +//! - There is GEM_CREATE and VM_BIND support. +//! - We can send a PING request to CSF, and it will acknowledge it +//! successfully. +//! +//! Notably missing (apart from literally everything else): +//! - Job subission logic through drm_scheduler and completion through dma_fences +//! - Devfreq, pm_idle, etc. +//! +//! The name "Tyr" is inspired by Norse mythology, reflecting ARM's tradition of +//! naming their GPUs after Nordic mythological figures and places. + +use crate::driver::TyrDriver; + +mod driver; +mod file; +mod flags; +mod fw; +mod gem; +mod gpu; +mod mmu; +mod regs; + +kernel::module_platform_driver! { + type: TyrDriver, + name: "tyr", + author: "The Tyr driver authors", + description: "Rust driver for ARM Mali CSF-based GPUs", + license: "Dual MIT/GPL", +} diff --git a/include/uapi/drm/panthor_drm.h b/include/uapi/drm/panthor_drm.h index 87c9cb555dd1d19ba7e042203d38de0e74f69e05..9bbe91c145c88e3f0e4ef368e87ef5a547833686 100644 --- a/include/uapi/drm/panthor_drm.h +++ b/include/uapi/drm/panthor_drm.h @@ -1010,6 +1010,17 @@ struct drm_panthor_tiler_heap_destroy { __u32 pad; }; +// XXX: this kludge is required for bindgen to pick up the symbol for Rust. +enum { + DRM_IOCTL_TYR_DEV_QUERY = DRM_IOCTL_PANTHOR_DEV_QUERY, + DRM_IOCTL_TYR_VM_CREATE = DRM_IOCTL_PANTHOR_VM_CREATE, + DRM_IOCTL_TYR_VM_DESTROY = DRM_IOCTL_PANTHOR_VM_DESTROY, + DRM_IOCTL_TYR_VM_BIND = DRM_IOCTL_PANTHOR_VM_BIND, + DRM_IOCTL_TYR_VM_GET_STATE = DRM_IOCTL_PANTHOR_VM_GET_STATE, + DRM_IOCTL_TYR_BO_CREATE = DRM_IOCTL_PANTHOR_BO_CREATE, + DRM_IOCTL_TYR_BO_MMAP_OFFSET = DRM_IOCTL_PANTHOR_BO_MMAP_OFFSET, +}; + #if defined(__cplusplus) } #endif diff --git a/rust/bindings/bindings_helper.h b/rust/bindings/bindings_helper.h index e5ee2d6e1eb2f7c454b4ca1f1176c81697faae75..53de99d7160c8a68a1f671e104c3298e790614c4 100644 --- a/rust/bindings/bindings_helper.h +++ b/rust/bindings/bindings_helper.h @@ -52,6 +52,8 @@ #include <trace/events/rust_sample.h> #include <linux/xarray.h> + + /* `bindgen` gets confused at certain things. */ const size_t RUST_CONST_HELPER_ARCH_SLAB_MINALIGN = ARCH_SLAB_MINALIGN; const size_t RUST_CONST_HELPER_PAGE_SIZE = PAGE_SIZE; diff --git a/rust/kernel/clk.rs b/rust/kernel/clk.rs index f4a8afc270088ed77aa662b7e68b80a0a6902a45..12f9f9aad76e9b854cdb3bb9f8a8afa3b6d6f204 100644 --- a/rust/kernel/clk.rs +++ b/rust/kernel/clk.rs @@ -51,6 +51,20 @@ impl Clk { // received from the C code. unsafe { bindings::clk_disable_unprepare(self.0) }; } + + /// Gets the clock rate. + pub fn rate(&self) -> usize { + // SAFETY: It is safe to call `clk_get_rate()`, on a `struct clk *` + // pointer earlier received from the C code. + unsafe { bindings::clk_get_rate(self.0) } + } + + /// Sets the clock rate. + pub fn set_rate(&self, rate: usize) { + // SAFETY: It is safe to call `clk_set_rate()`, on a `struct clk *` + // pointer earlier received from the C code. + unsafe { bindings::clk_set_rate(self.0, rate) }; + } } impl Drop for Clk { diff --git a/rust/kernel/device.rs b/rust/kernel/device.rs index 22a4e8fe5ddbf202430e861d62eacd9c2a00aa0d..96a1157e74b6aa14e136e694b81d6ccb354cd15a 100644 --- a/rust/kernel/device.rs +++ b/rust/kernel/device.rs @@ -60,7 +60,7 @@ impl Device { } /// Obtain the raw `struct device *`. - pub(crate) fn as_raw(&self) -> *mut bindings::device { + pub fn as_raw(&self) -> *mut bindings::device { self.0.get() } diff --git a/rust/kernel/drm/gpuvm.rs b/rust/kernel/drm/gpuvm.rs index 1376fefe24ff0625cfb4c4e36b26a25a3c22f46e..5ce110a0db02b46798acc8bc718998f4405d47da 100644 --- a/rust/kernel/drm/gpuvm.rs +++ b/rust/kernel/drm/gpuvm.rs @@ -498,6 +498,18 @@ impl<T: DriverGpuVm> GpuVm<T> { // SAFETY: This is safe to call as long as the arguments are valid pointers. unsafe { bindings::drm_gpuvm_is_extobj(self.gpuvm() as *mut _, gem) } } + + /// The start of the VA space. + pub fn mm_start(&self) -> u64 { + // SAFETY: this was initialized on Self::new() + unsafe { *self.gpuvm() }.mm_start + } + + /// The length of the address space + pub fn mm_range(&self) -> u64 { + // SAFETY: this was initialized on Self::new() + unsafe { *self.gpuvm() }.mm_range + } } // SAFETY: DRM GpuVm objects are always reference counted and the get/put functions diff --git a/rust/kernel/platform.rs b/rust/kernel/platform.rs index 680cbe8df2e1410895f6aa17688b424829b20a98..34e6c5b25c2103af897e9d428910e0117c3206c6 100644 --- a/rust/kernel/platform.rs +++ b/rust/kernel/platform.rs @@ -192,7 +192,8 @@ impl Device { Self(dev) } - fn as_raw(&self) -> *mut bindings::platform_device { + /// Obtain the raw `struct platform_device *`. + pub fn as_raw(&self) -> *mut bindings::platform_device { // SAFETY: By the type invariant `self.0.as_raw` is a pointer to the `struct device` // embedded in `struct platform_device`. unsafe { container_of!(self.0.as_raw(), bindings::platform_device, dev) }.cast_mut() diff --git a/rust/uapi/uapi_helper.h b/rust/uapi/uapi_helper.h index 1409441359f510236256bc17851f9aac65c45c4e..d4a239cf2a64fce964b28959ff807ee187b2610d 100644 --- a/rust/uapi/uapi_helper.h +++ b/rust/uapi/uapi_helper.h @@ -9,6 +9,7 @@ #include <uapi/asm-generic/ioctl.h> #include <uapi/drm/drm.h> #include <uapi/drm/nova_drm.h> +#include <uapi/drm/panthor_drm.h> #include <uapi/linux/mdio.h> #include <uapi/linux/mii.h> #include <uapi/linux/ethtool.h>