From 313505cb1d6916d6506e99ee76f61622fe141a70 Mon Sep 17 00:00:00 2001 From: Beata Michalska <beata.michalska@arm.com> Date: Mon, 10 Mar 2025 12:03:36 +0100 Subject: [PATCH] drm: panthor-rs: Add first phase for MMU related bits Signed-off-by: Beata Michalska <beata.michalska@arm.com> --- drivers/gpu/drm/panthor-rs/driver.rs | 30 +- drivers/gpu/drm/panthor-rs/mmu.rs | 1159 +++++++++++++++++++++++++- 2 files changed, 1181 insertions(+), 8 deletions(-) diff --git a/drivers/gpu/drm/panthor-rs/driver.rs b/drivers/gpu/drm/panthor-rs/driver.rs index af95812d7fbd3..9c88a0606f437 100644 --- a/drivers/gpu/drm/panthor-rs/driver.rs +++ b/drivers/gpu/drm/panthor-rs/driver.rs @@ -3,13 +3,23 @@ //! Top-level GPU driver implementation. use kernel::{ - bindings, c_str, drm, drm::drv, drm::ioctl, error::Result, error::to_result, of, platform, prelude::*, sync::Arc, + bindings, + c_str, + drm, drm::drv, drm::ioctl, + error::Result, error::to_result, + new_mutex, + of, + platform, + prelude::*, + sync::{Arc, Mutex}, }; use core::ffi; use kernel::macros::vtable; use kernel::types::{ ARef, ForeignOwnable}; +use crate::mmu::Mmu; + #[cfg(CONFIG_PM)] use kernel::runtime_dev_pm_ops; @@ -41,6 +51,9 @@ pub(crate) struct PanthorData { #[pin] pub(crate) ptdev: *mut bindings::panthor_device, pub(crate) pdev: platform::Device, + // @TODO: Temporarily wrapped in Mutex & Option to support lazy init + #[pin] + pub(crate) mmu: Mutex<Option<Arc<Mmu>>>, } //HACK @@ -178,11 +191,14 @@ impl platform::Driver for PanthorDriver { to_result(unsafe { panthor_init() })?; - let data = Arc::new( - PanthorData { + // This is needed due to moving the var to closure in try_pin_init + let pdev_c = pdev.clone(); + let data = Arc::pin_init( + try_pin_init!( PanthorData { ptdev: unsafe { bindings::panthor_device_alloc() }, - pdev: pdev.clone(), - }, + pdev: pdev_c, + mmu <- new_mutex!(None), + }), GFP_KERNEL, )?; @@ -194,6 +210,10 @@ impl platform::Driver for PanthorDriver { panthor_device_init(data.ptdev); } + // Init MMU + let mut mmu_locked = data.mmu.lock(); + mmu_locked.replace(Mmu::new(drm.clone())?); + drm::drv::Registration::new_foreign_owned(drm.clone(), 0)?; dev_info!(pdev.as_ref(), "Probed!\n"); diff --git a/drivers/gpu/drm/panthor-rs/mmu.rs b/drivers/gpu/drm/panthor-rs/mmu.rs index b47c26a91822d..27cc88e790b0d 100644 --- a/drivers/gpu/drm/panthor-rs/mmu.rs +++ b/drivers/gpu/drm/panthor-rs/mmu.rs @@ -1,11 +1,619 @@ // SPDX-License-Identifier: GPL-2.0-only OR MIT // -use core::marker::PhantomPinned; +use core:: { + arch::asm, + marker::PhantomPinned, + ops::Range, + sync::atomic::{AtomicU32, Ordering} +}; + use kernel:: { bindings, - prelude::* + c_str, + drm:: {gem::IntoGEMObject, gem::shmem::SGTable, gpuvm }, + error::to_result, + io_pgtable::{ARM64LPAES1, FlushOps, IoPageTable}, + list:: {List, ListArc, ListLinks}, + new_mutex, + prelude::*, + sizes::SZ_4K, + sync::{Arc, lock::{ Guard, mutex::MutexBackend}, Mutex, UniqueArc}, + types:: {ARef, ForeignOwnable, Opaque, ScopeGuard}, + uapi +}; + +use crate::common::cold_path; +use crate::driver:: { PanthorDevice, PanthorDevRef, PanthorDriver }; +use crate::regs::*; + +pub(crate) const SZ_2M: usize = bindings::SZ_2M as usize; +pub(crate) const SZ_1G: usize = bindings::SZ_1G as usize; + +// Really.... ? +use uapi::{ + drm_panthor_vm_bind_op_flags_DRM_PANTHOR_VM_BIND_OP_MAP_NOEXEC, + drm_panthor_vm_bind_op_flags_DRM_PANTHOR_VM_BIND_OP_MAP_UNCACHED, + drm_panthor_vm_bind_op_flags_DRM_PANTHOR_VM_BIND_OP_MAP_READONLY, + drm_panthor_vm_bind_op_flags_DRM_PANTHOR_VM_BIND_OP_TYPE_MAP, + drm_panthor_vm_bind_op_flags_DRM_PANTHOR_VM_BIND_OP_TYPE_UNMAP, + drm_panthor_vm_bind_op_flags_DRM_PANTHOR_VM_BIND_OP_TYPE_MASK, + drm_panthor_vm_bind_op_flags_DRM_PANTHOR_VM_BIND_OP_TYPE_SYNC_ONLY, }; -use crate::common::*; + +macro_rules! alias_op_map_flags{ + ($flag:ident) => { + const $flag: u32 = + kernel::macros::concat_idents!(drm_panthor_vm_bind_op_flags_, $flag) + as u32; + } +} + +alias_op_map_flags!(DRM_PANTHOR_VM_BIND_OP_MAP_NOEXEC); +alias_op_map_flags!(DRM_PANTHOR_VM_BIND_OP_MAP_UNCACHED); +alias_op_map_flags!(DRM_PANTHOR_VM_BIND_OP_MAP_READONLY); +alias_op_map_flags!(DRM_PANTHOR_VM_BIND_OP_TYPE_MAP); +alias_op_map_flags!(DRM_PANTHOR_VM_BIND_OP_TYPE_UNMAP); +alias_op_map_flags!(DRM_PANTHOR_VM_BIND_OP_TYPE_MASK); +alias_op_map_flags!(DRM_PANTHOR_VM_BIND_OP_TYPE_SYNC_ONLY); + +const PANTHOR_VM_MAP_FLAGS: u32 = ( + DRM_PANTHOR_VM_BIND_OP_MAP_READONLY + | DRM_PANTHOR_VM_BIND_OP_MAP_NOEXEC + | DRM_PANTHOR_VM_BIND_OP_MAP_UNCACHED +) as u32; + +const PANTHOR_VM_BIND_OP_MAP_FLAGS: u32 = ( + DRM_PANTHOR_VM_BIND_OP_MAP_READONLY + | DRM_PANTHOR_VM_BIND_OP_MAP_NOEXEC + | DRM_PANTHOR_VM_BIND_OP_MAP_UNCACHED + | DRM_PANTHOR_VM_BIND_OP_TYPE_MASK +) as u32; + +/// Handling drm_dev_[enter/exit] +/// Currently applied to functions returning Result +/// Requires specifying access to drm_device struct through capture +macro_rules! drm_dev_guard { + ( + $pub:vis fn $name:ident($($arg:ident: $arg_t:ty),*) -> Result $body:block + capture($capture_dev:tt) + ) => { + $pub fn $name($($arg: $arg_t),*) -> Result { + let mut cookie: i32 = 0; + let drm_dev = $capture_dev; + + if !unsafe { + bindings::drm_dev_enter(drm_dev, &mut cookie as _) + } { + return Err(EINVAL); + } + + let _guard = ScopeGuard::new(|| { + unsafe { bindings::drm_dev_exit(cookie) }; + }); + + + $body + } + } +} + +// Convenience macro to get a hold of Mmu instace, which is currently +// guarded by lock and wrapped in an option, to allow late initialization. +// This is a very tempotary solution until all parts are properly plugged in. +macro_rules! dev_get_mmu_locked { + ($dev: ident) => { + { + let dev_data = $dev.data(); + let mmu_locked = dev_data.mmu.lock(); + mmu_locked.as_ref().cloned().expect("Missing MMU context!") + } + } +} + +// Convenience IDs for handling List nodes (type-checking) +#[repr(u64)] +enum ListID { + VmaListID = 0x100, // or whatever ... + VmListID = 0x200, +} + +#[derive(Default)] +struct VmaCore {} + +#[pin_data] +pub(crate) struct Vma { + // node: Deffered release + #[pin] + node: ListLinks<{ListID::VmaListID as u64}>, + gpuva: Pin<KBox<gpuvm::GpuVa<VmInner>>>, +} + +impl Vma{ + fn prot(flags: u32) -> u32 { + let mut prot: u32 = 0; + + if flags & DRM_PANTHOR_VM_BIND_OP_MAP_NOEXEC as u32 > 0 { + prot |= bindings::IOMMU_NOEXEC; + } + + if flags & DRM_PANTHOR_VM_BIND_OP_MAP_UNCACHED as u32 == 0 { + prot |= bindings::IOMMU_CACHE; + } + + if flags & DRM_PANTHOR_VM_BIND_OP_MAP_READONLY as u32 > 0 { + prot |= bindings::IOMMU_READ; + } else { + prot |= bindings::IOMMU_READ | bindings::IOMMU_WRITE; + } + + prot + } +} + +kernel::list::impl_has_list_links! { + impl HasListLinks<{ListID::VmaListID as u64}> for Vma { self.node } +} + +kernel::list::impl_list_arc_safe! { + impl ListArcSafe<{ListID::VmaListID as u64}> for Vma { untracked; } +} + +kernel::list::impl_list_item! { + impl ListItem<{ListID::VmaListID as u64}> for Vma { using ListLinks; } +} + +impl gpuvm::DriverGpuVa for VmaCore {} + +#[derive(Default)] +#[pin_data] +struct MapCtx { + vm_bo: Option<ARef<gpuvm::GpuVmBo<VmInner>>>, + bo_offset: u64, + #[pin] + sgt: Option<SGTable<crate::gem::DriverObject>>, + new_vma: Option<KBox<Vma>> +} + +#[pin_data] +#[repr(C)] +struct As { + active_count: Opaque<bindings::refcount_t>, + id: i32, + unhandled_faults: bool, +} + +struct VmInner { + dev: PanthorDevRef, + core: Arc<Mutex<VmCore>>, +} + +#[pin_data] +pub(crate) struct VmOpCtx { + rsvd_page_tables: KMemSet, + preallocated_vmas: KVec<Pin<KBox<gpuvm::GpuVa<VmInner>>>>, + flags: u32, + va: Range<u64>, + // @TODO: TBD + returned_vmas: List<Vma, {ListID::VmaListID as u64}>, + #[pin] + map: Mutex<MapCtx>, +} + +impl VmOpCtx { + fn prealloc_vmas( + flags: u32 + ) -> Result<KVec<Pin<KBox<gpuvm::GpuVa<VmInner>>>>> { + let count = match flags & DRM_PANTHOR_VM_BIND_OP_TYPE_MASK { + DRM_PANTHOR_VM_BIND_OP_TYPE_MAP => 3, + DRM_PANTHOR_VM_BIND_OP_TYPE_UNMAP => 1, + _ => return Err(EINVAL), + }; + + let mut vmas = KVec::with_capacity(count as usize, GFP_KERNEL)?; + + for _ in 0..count { + vmas.push( + gpuvm::GpuVa::<VmInner>::new(init::default())?, + GFP_KERNEL + )?; + } + Ok(vmas) + } + + fn new( + vm: &Vm, + bo: &Option<crate::gem::ObjectRef>, + cache: &KMemCache, + offset: u64, + size: u64, + va: u64, + flags: u32 + ) -> Result<Pin<KBox<Self>>> { + + let mut vm_bo = None; + let mut sgt = None; + + let mut pin_guard = None; + + let (op_flags, pt_count) = + match flags & DRM_PANTHOR_VM_BIND_OP_TYPE_MASK { + DRM_PANTHOR_VM_BIND_OP_TYPE_MAP => { + if (flags & !PANTHOR_VM_BIND_OP_MAP_FLAGS) != 0 { + return Err(EINVAL); + } + + match bo { + Some(bo) => { + // Make sure the VA and size are aligned and in-bounds. + if size > TryInto::<u64>::try_into(bo.size())? + || offset > TryInto::<u64>::try_into(bo.size())? - size { + return Err(EINVAL); + } + + let mut inner = vm.inner.exec_lock(Some(&bo.gem))?; + + // @TODO: + // If the BO has an exclusive VM attached, + // it can't be mapped to other VMs. + + if bo.gem.gem_obj().import_attach.is_null() + && inner.find_bo().is_none(){ + // Pre-reserve the BO pages, so the map + // operation doesn't have to allocate + unsafe { + to_result( + bindings::drm_gem_shmem_pin( + bo.gem.gem_obj() as *const _ as *mut _ + ) + )? + }; + pin_guard = Some(ScopeGuard::new(|| { + unsafe { + bindings::drm_gem_shmem_unpin( + bo.gem.gem_obj() as *const _ as *mut _ + ) + }; + })); + } + sgt = Some(bo.gem.sg_table()?); + + vm_bo = Some(inner.obtain_bo()?); + }, + None => return Err(EINVAL), + } + + // @TODO: + // If the BO has an exclusive VM attached, it can't be mapped to other VMs. + // if (bo->exclusive_vm_root_gem && + // bo->exclusive_vm_root_gem != panthor_vm_root_gem(vm)) -> EINVAL + // if (!bo->base.base.import_attach) + // ret = drm_gem_shmem_pin(&bo->base); + + // L1, L2 and L3 page tables. + // We could optimize L3 allocation by iterating over + // the sgt and merging 2M contiguous blocks, but it's + // simpler to over-provision and return the pages if + // they're not used. + let count = + ( + align!(va + size, bit!(39, u64), u64) + - align_down!(va, bit!(39, u64), u64) + ) >> 39 + + ( + align!(va + size, bit!(30, u64), u64) + - align_down!(va, bit!(30, u64), u64) + ) >> 30 + + ( + align!(va + size, bit!(21, u64), u64) + - align_down!(va, bit!(21, u64), u64) + ) >> 21; + + (flags, count) + }, + + DRM_PANTHOR_VM_BIND_OP_TYPE_UNMAP => { + let mut count = 0; + // Pre-allocate L3 page tables to account for + // the split-2M-block situation on unmap. + if va != align!(va, SZ_2M, u64) { + count += 1; + } + if va + size != align!(va + size, SZ_2M, u64) + && align!(va + size, SZ_2M, u64) != align!(va, SZ_2M, u64) { + count += 1; + } + (DRM_PANTHOR_VM_BIND_OP_TYPE_UNMAP, count) + }, + + DRM_PANTHOR_VM_BIND_OP_TYPE_SYNC_ONLY => { + (DRM_PANTHOR_VM_BIND_OP_TYPE_SYNC_ONLY, 0) + } + _ => return Err(EINVAL), + }; + + let ctx = KBox::pin_init(try_pin_init!(Self { + rsvd_page_tables : unsafe { + Pin::new_unchecked(cache).alloc_bulk(pt_count.try_into()?)? + }, + preallocated_vmas: Self::prealloc_vmas(flags)?, + flags: op_flags, + va: Range{start: va,end: va + size}, + returned_vmas: List::new(), + map <- new_mutex!(MapCtx { + vm_bo: vm_bo, + bo_offset: 0, + sgt: sgt, + new_vma : None, + }), + }), GFP_KERNEL)?; + + // @TODO: Add vm_bo to extobj list: drm_gpuvm_bo_extobj_add + + // All good so dismiss the guard + if let Some(pin_guard) = pin_guard { + pin_guard.dismiss(); + } + + Ok(ctx) + } +} + +impl VmInner { + fn map_pages( + &mut self, + iova: u64, + prot: u32, + sgt: &SGTable<crate::gem::DriverObject>, + offset: u64, + mut size: usize + ) -> Result<u64> { + + if size == 0 { + return Err(EINVAL); + }; + + //@TODO: This is a mess: different frameworks deal with different types + let mut offset: usize = offset as usize; + let mut iova: usize = iova as usize; + + let start_iova = iova; + + for range in sgt.iter() { + let mut paddr = range.dma_address(); + let mut len = range.dma_len(); + + if len <= offset { + offset -=len; + continue; + } + + paddr += offset; + len -= offset; + len = core::cmp::min(len, size); + size -= len; + + drm_dbg!(self.dev.as_raw(), "map: as={}, iova={}, paddr={}, len={}", + self.core.lock().r#as.id, iova, paddr, len); + + while len > 0 { + let (pgsize, pgcount) = get_pgsize((iova | paddr).try_into().unwrap(), len); + + let mapped = self.core.lock().pg_tlb.map_pages(iova, paddr, pgsize, pgcount, prot)?; + + if mapped != 0 { + iova += mapped; + paddr += mapped; + len -= mapped; + } else { + if drm_warn_on!(self.dev.as_raw(), mapped == 0) { + // If something failed, unmap what we've already mapped before + // returning. The unmap call is not supposed to fail. + drm_warn_on!(self.dev.as_raw(), + self.unmap_pages( + sgt, + start_iova.try_into().unwrap(), + (iova - start_iova).try_into().unwrap() + ).is_err() + ); + + return Err(ENOMEM); + } + } + } + + if size == 0{ + break; + } + offset = 0; + } + + // @TODO: Another mess ... + self.flush_range(start_iova.try_into()?, + (iova - start_iova).try_into()?)?; + Ok((iova - start_iova).try_into()?) + } + + fn unmap_pages( + &mut self, + sgt: &SGTable<crate::gem::DriverObject>, + iova: u64, + size: u64 + ) -> Result<u64> { + + drm_dbg!(self.dev.as_raw(), "unmap: as={}, iova={}, len={}", + self.core.lock().r#as.id, iova, size); + + let mut offset: usize = 0; + let size: usize = size as usize; + + while offset < size { + let (pgsize, pgcount) = get_pgsize(iova + offset as u64, (size - offset) as usize); + let unmapped; + + unmapped = self.core.lock().pg_tlb.unmap_pages(iova as usize + offset, pgsize, pgcount); + + if drm_warn_on!(self.dev.as_raw(), unmapped != pgsize * pgcount) { + drm_err!(self.dev.as_raw(), "failed to unmap range {}-{} (requested range {} - {})\n", + iova as usize + offset + unmapped, + iova as usize + offset + pgsize * pgcount, + iova, + iova as usize + size); + self.flush_range(iova, (offset + unmapped).try_into()?)?; + return Err(EINVAL); + } + offset += unmapped; + + } + self.flush_range(iova, size as u64)?; + Ok(0) + } + + drm_dev_guard!{ + fn flush_range(self: &Self, iova: u64, size: u64) -> Result { + let dev = &self.dev; + let mmu_locked = dev_get_mmu_locked!(dev); + + mmu_locked.hw_do_operation( + &self.dev, + self.core.lock().r#as.id as u32, + iova, + size, + AS_COMMAND_FLUSH_PT.into()) + } + capture({self.dev.as_raw()}) + } +} + +impl gpuvm::DriverGpuVm for VmInner{ + type Driver = PanthorDriver; + type GpuVa = VmaCore; + type StepContext = VmOpCtx; + + fn step_map( + self: &mut gpuvm::UpdatingGpuVm<'_, Self>, + op: &mut gpuvm::OpMap<Self>, + ctx: &mut Self::StepContext, + ) -> Result { + // Get available vma + let vma = ctx.preallocated_vmas.swap_remove( + ctx.preallocated_vmas.len() - 1 + ); + + let mut map_locked = ctx.map.lock(); + self.map_pages( + op.addr(), + Vma::prot(ctx.flags & PANTHOR_VM_MAP_FLAGS), + map_locked.sgt.as_ref().expect("No SGTable available for setp_map"), + op.offset(), + op.range() as usize + )?; + + if op.map_and_link_va( + self, + vma, + map_locked.vm_bo.as_ref().expect("Missing GpuVmBo object for step_map") + ).is_err() { + return Err(EINVAL); + } + // @TODO: Make sure this is safe + map_locked.vm_bo = None; + Ok(()) + } + + fn step_unmap ( + self: &mut gpuvm::UpdatingGpuVm<'_, Self>, + op: &mut gpuvm::OpUnMap<Self>, + ctx: &mut Self::StepContext, + ) -> Result { + + let map_locked = ctx.map.lock(); + let va = op.va().expect("No va specified for step_unmap"); + self.unmap_pages( + map_locked.sgt.as_ref().expect("No SGTable available for setp_unmap"), + va.addr(), + va.range() + )?; + + //Safety: the ownership is transfered here and the va is guaranteed to be valid + match op.unmap_and_unlink_va() { + Some(va) => { + let vma = UniqueArc::pin_init(pin_init!( + Vma { + node <- ListLinks::new(), + gpuva: va, + } + ), GFP_KERNEL)?; + ctx.returned_vmas.push_back(ListArc::from(vma)); + }, + None => {}, + } + Ok(()) + } + + fn step_remap( + self: &mut gpuvm::UpdatingGpuVm<'_, Self>, + op: &mut gpuvm::OpReMap<Self>, + ctx: &mut Self::StepContext, + ) -> Result { + + let unmap_range = Range { + start : match op.prev_map() { + Some(prev_map) => prev_map.addr() + prev_map.range(), + None => op.unmap().va().expect("Missing va for unmap op").addr(), + }, + end : match op.next_map() { + Some(next_map) => next_map.addr(), + None => { + let va = op.unmap().va().expect("Missing va for unmap op"); + va.addr() + va.range() + } + } + }; + + let map_locked = ctx.map.lock(); + self.unmap_pages( + map_locked.sgt.as_ref().expect("No SGTable available for setp_remap"), + unmap_range.start, + unmap_range.end - unmap_range.start + )?; + + op.unmap().unmap_and_unlink_va(); + + let prev_va = op.prev_map().as_ref().map( + |v| ctx.preallocated_vmas.swap_remove(ctx.preallocated_vmas.len()-1) + ); + let next_va = op.next_map().as_ref().map( + |v| ctx.preallocated_vmas.swap_remove(ctx.preallocated_vmas.len()-1) + ); + + op.remap(prev_va, next_va); + + Ok(()) + } +} + +// Dummy TLB ops +// @TODO: Decide which type should actually implement those +impl FlushOps for VmInner { + type Data = (); + + fn tlb_flush_all( + _data: <Self::Data as ForeignOwnable>::Borrowed<'_> + ) {} + + fn tlb_flush_walk( + _data: <Self::Data as ForeignOwnable>::Borrowed<'_>, + _iova: usize, + _size: usize, + _granule: usize + ) {} + + fn tlb_add_page( + _data: <Self::Data as ForeignOwnable>::Borrowed<'_>, + _iova: usize, + _granule: usize + ) {} +} /// Thin wrapper for kmem_cache pub(crate) struct KMemCache { @@ -110,3 +718,548 @@ impl Drop for KMemCache { } } +/// Common data extracted and shared between Vm and VmInner to avoid +/// cumbersome tricks to make one or the other access the data +/// It just overall simplifies things +struct VmCore { + r#as: As, + pg_tlb: ARM64LPAES1<VmInner>, +} + +#[pin_data] +pub(crate) struct Vm { + core: Arc<Mutex<VmCore>>, + inner: ARef<gpuvm::GpuVm<VmInner>>, + #[pin] + node: ListLinks<{ListID::VmListID as u64}>, + unusable: bool, + mcu_target: bool, + memattr: u64, +} + +impl Vm { + fn has_unhandled_faults(&self) -> bool { + self.core.lock().r#as.unhandled_faults + } + + fn is_unusable(&self) -> bool { + self.unusable + } + + drm_dev_guard! { + pub(crate) fn set_active(self: &mut Self, dev: &PanthorDevice) -> Result { + + let dev_data = dev.data(); + let mmu_locked = dev_get_mmu_locked!(dev); + //@TODO: FIXME - revisit the refcount and locking + let mut as_context: Guard<'_, AsContext, MutexBackend>; + let mut as_id; + + with_lock!({ + if unsafe { + bindings::refcount_inc_not_zero(vm_core.r#as.active_count.get()) + } { + return Err(EINVAL); + } + + as_context = mmu_locked.as_ctx.lock(); + + if unsafe { + bindings::refcount_inc_not_zero(vm_core.r#as.active_count.get()) + } { + return Err(EINVAL); + } + as_id = vm_core.r#as.id; + + } lock(vm_core <- self.core)); + + let alloc_mask = as_context.alloc_mask.load(Ordering::Relaxed); + + loop { + if as_id < 0 { + // Check for free slot + as_id = if self.mcu_target { + drm_warn_on!(dev.as_raw(), alloc_mask & bit!(0, u32) != 0); + 0 + } else { + (!alloc_mask | bit!(0, u32)) + .trailing_zeros() + .try_into()? + }; + + if bit!(as_id, u32) + & unsafe { (*dev_data.ptdev).gpu_info.as_present} + == 0 + { + as_id = as_context.get_free_slot(dev)? as i32; + } + + self.core.lock().r#as.id = as_id; + as_context.reserve_slot(self, as_id as u32); + + } else { + if as_context.faulty_mask + & Mmu::as_fault_mask(as_id as u32) + == 0 + { + break; + } + } + + let va_bits = unsafe { + Mmu::get_mmu_features_va_bits( + (*dev_data.ptdev).gpu_info.mmu_features + ) + }; + let transtab: u64 = unsafe { + (*self.core.lock().pg_tlb + .raw_cfg()).__bindgen_anon_1.arm_lpae_s1_cfg.ttbr + }; + + let mut transcfg = (AS_TRANSCFG_PTW_MEMATTR_WB + | AS_TRANSCFG_PTW_RA + | AS_TRANSCFG_ADRMODE_AARCH64_4K) as u64 + | as_transcfg_ina_bits((55 - va_bits).into()); + + if unsafe { (*dev_data.ptdev).coherent } { + transcfg |= AS_TRANSCFG_PTW_SH_OS as u64; + } + + /* If the VM is re-activated, we clear the fault. */ + self.core.lock().r#as.unhandled_faults = false; + + as_context.clear_fault(as_id as u32); + mmu_locked.enable_interrupts(dev, as_id as u32, !as_context.faulty_mask); + mmu_locked.enable_as(dev, as_id as u32, transtab, transcfg, self.memattr)?; + + break + } + + // Activate VM + unsafe { + bindings::refcount_set(self.core.lock().r#as.active_count.get(), 1); + as_context.lru_list.remove(self); + } + + Ok(()) + } capture( {dev.as_raw()} ) } + + /// This takes ownership of Arc<Self> + pub(crate) fn mark_idle(self: Arc<Self>, dev: &PanthorDevice) { + let anchor = self.core.clone(); + let vm_core = anchor.lock(); + + if !unsafe { + bindings::refcount_dec_and_test(vm_core.r#as.active_count.get()) + } { + return; + } + + let mmu_locked = dev_get_mmu_locked!(dev); + let mut as_context = mmu_locked.as_ctx.lock(); + + if !drm_warn_on!(dev.as_raw(), vm_core.r#as.id == -1) { + if let Some(unique_self) = self.into_unique_or_drop() { + as_context.lru_list.push_back( + ListArc::<Self,{ListID::VmListID as u64}>:: from(unique_self) + ); + } else { + drm_err!(dev.as_raw(), "Mismatched ref counting on VM object"); + } + } + + unsafe { + bindings::refcount_set(vm_core.r#as.active_count.get(), 0); + } + + } + + /// Mark VM as unused + fn release(&self, dev: &PanthorDevice) { + let mut vm_core = self.core.lock(); + + if drm_warn_on!(dev.as_raw(), vm_core.r#as.id < 0) { + return; + } + unsafe { bindings::refcount_set(vm_core.r#as.active_count.get(), 0) }; + vm_core.r#as.id = -1; + } +} + +kernel::list::impl_has_list_links! { + impl HasListLinks<{ListID::VmListID as u64}> for Vm { self.node } +} + +kernel::list::impl_list_arc_safe! { + impl ListArcSafe<{ListID::VmListID as u64}> for Vm { untracked; } +} + +kernel::list::impl_list_item! { + impl ListItem<{ListID::VmListID as u64}> for Vm { using ListLinks; } +} + +const MAX_AS_SLOTS: usize = 32; +/// AddressSpace Context +/// @alloc_mask: Bitmask encoding the allocated slots. +/// @faulty_mask: Bitmask encoding the faulty slots. +/// @slots: VMs currently bound to the AS slots. +struct AsContext { + alloc_mask: AtomicU32, + faulty_mask: u32, + slots: KVec<Arc<Mutex<VmCore>>>, + lru_list: List<Vm, {ListID::VmListID as u64}>, +} + +macro_rules! mmu_as { + ($as_nr:expr) => { + (MMU_BASE + (($as_nr) << MMU_AS_SHIFT)) as usize + } +} + +struct AsReg { + addr_lo: usize, + addr_hi: usize, +} + +impl AsReg { + fn new(addr: usize, offset_lo: usize, offset_hi: usize) -> Self { + Self{ addr_lo: addr + offset_lo, addr_hi: addr + offset_hi } + } + + fn write(&self, dev: &PanthorDevice, value: u64) -> Result { + // @TODO: Move to proper iomem + gpu_write(dev, self.addr_lo, (value & genmask!(31, 0, u64)) as u32); + gpu_write(dev, self.addr_hi, (value >> 32) as u32); + Ok(()) + } +} + +#[inline] +fn as_transtab_write(dev: &PanthorDevice, slot_id: u32, value: u64) -> Result { + AsReg::new(mmu_as!(slot_id), 0x00, 0x04).write(dev, value) +} + +#[inline] +fn as_memattr_write(dev: &PanthorDevice, slot_id: u32, value: u64) -> Result { + AsReg::new(mmu_as!(slot_id), 0x08, 0x0C).write(dev, value) +} + +#[inline] +fn as_transcfg_write(dev: &PanthorDevice, slot_id: u32, value: u64) -> Result { + AsReg::new(mmu_as!(slot_id), 0x30, 0x34).write(dev, value) +} + +#[inline] +fn as_lockaddr_write(dev: &PanthorDevice, slot_id: u32, value: u64) -> Result { + AsReg::new(mmu_as!(slot_id), 0x10, 0x14).write(dev, value) +} + +impl AsContext { + #[inline] + fn as_id_valid(id: i32) -> bool { + id >= 0 + } + + fn get_free_slot(&mut self, dev: &PanthorDevice) -> Result<u32> { + match self.lru_list.pop_front() { + Some(vm) => { + let vm_core = vm.core.lock(); + let slot_id: u32 = vm_core.r#as.id as u32; + + drm_warn_on!(dev.as_raw(), + unsafe { + bindings::refcount_read(vm_core.r#as.active_count.get()) + } != 0); + + vm.as_arc().release(dev); + self.release_slot(slot_id); + Ok(slot_id) + } + None => Err(ENOENT) + } + } + + #[inline] + fn reserve_slot(&mut self, vm: &Vm, slot_id: u32) { + self.alloc_mask.fetch_or(slot_id, Ordering::Relaxed); + self.slots[slot_id as usize] = vm.core.clone(); + + } + + #[inline] + fn release_slot(&mut self, slot_id: u32) { + self.alloc_mask.fetch_nand(slot_id, Ordering::Relaxed); + self.slots.swap_remove(slot_id as usize); + } + + #[inline] + fn clear_fault(&mut self, slot_id: u32) { + self.faulty_mask &= !Mmu::as_fault_mask(slot_id) + } +} + +#[pin_data] +pub(crate) struct Mmu { + pt_cache: Pin<KBox<KMemCache>>, + #[pin] + as_ctx: Mutex<AsContext> +} + +macro_rules! mmu_as_status { + ($as_nr:expr) => { + mmu_as!($as_nr) + 0x28 + } +} + +const AS_STATUS_ACTIVE: u32 = bit!(0, u32); + +// @TODO: Drop when moved to Rust iomem +fn gpu_write(dev: &PanthorDevice, reg: usize, cmd: u32) { + unsafe { bindings::writel(cmd, (*dev.data().ptdev).iomem.add(reg)) } +} + +impl Mmu { + pub(crate) fn new(dev: PanthorDevRef) -> Result<Arc<Self>> { + let mut va_bits = unsafe { + Self::get_mmu_features_va_bits((*dev.data().ptdev).gpu_info.mmu_features) + }; + + // @TODO: Move to rust once the gpuinfo is ready + if bindings::__BITS_PER_LONG < va_bits { + va_bits = unsafe { (*dev.data().ptdev).gpu_info.mmu_features }; + va_bits &= !genmask!(7, 0, u32); + va_bits |= bindings::__BITS_PER_LONG; + + unsafe { + (*dev.data().ptdev).gpu_info.mmu_features = va_bits; + } + } + + Arc::pin_init( + try_pin_init!( Self { + pt_cache: KBox::pin( + KMemCache::new( + c_str!("panthor-mmu-pt"), + SZ_4K as u32, + SZ_4K as u32, + 0 + )?, + GFP_KERNEL + )?, + as_ctx <- new_mutex!(AsContext{ + alloc_mask: AtomicU32::new(0), + faulty_mask: 0, + slots: KVec::<Arc<Mutex<VmCore>>>::with_capacity( + MAX_AS_SLOTS, GFP_KERNEL + )?, + lru_list: List::new() + }) + } + ), GFP_KERNEL ) + } + + #[inline] + fn get_mmu_features_va_bits(features: u32) -> u32 { + features & genmask!(7, 0, u32) + } + + #[inline] + fn get_mmu_features_pa_bits(features: u32) -> u32 { + (features >> 8 ) & genmask!(7, 0, u32) + } + + #[inline] + fn as_fault_mask(id: u32) -> u32 { + bit!(id, u32) + } + + #[inline] + fn access_type_name(fault_status: u32) -> &'static str { + match fault_status & AS_FAULTSTATUS_ACCESS_TYPE_MASK { + AS_FAULTSTATUS_ACCESS_TYPE_ATOMIC => "ATOMIC", + AS_FAULTSTATUS_ACCESS_TYPE_READ => "READ", + AS_FAULTSTATUS_ACCESS_TYPE_WRITE => "WRITE", + AS_FAULTSTATUS_ACCESS_TYPE_EX => "EXECUTE", + _ => "UNKNOWN" + } + } + + fn mair_to_memattr(mair: u64) -> u64 { + let mut memattr: u64 = 0; + + for i in 0..8 { + let in_attr: u8 = (mair >> (8 * 1)) as u8; + let out_attr: u8; + let (inner, outer) = (in_attr & 0xf, in_attr >> 4); + + // For caching to be enabled, inner and outer caching policy + // have to be both write-back, if one of them is write-through + // or non-cacheable, we just choose non-cacheable. Device + // memory is also translated to non-cacheable. + if (outer & 3) == 0 || (outer & 4) == 0 || (inner & 4) == 0 { + out_attr = ( + AS_MEMATTR_AARCH64_INNER_OUTER_NC + | AS_MEMATTR_AARCH64_SH_MIDGARD_INNER + | as_memattr_aarch64_inner_alloc_expl!(false, false) + ).try_into().unwrap(); + } else { + // Use SH_CPU_INNER mode so SH_IS, which is used when + // IOMMU_CACHE is set, actually maps to the standard + // definition of inner-shareable and not Mali's + // internal-shareable mode. + out_attr = ( + AS_MEMATTR_AARCH64_INNER_OUTER_WB + | AS_MEMATTR_AARCH64_SH_CPU_INNER + | as_memattr_aarch64_inner_alloc_expl!(inner & 1 != 0, + inner & 2 != 0) + ).try_into().unwrap(); + } + memattr |= (out_attr as u64) << (8 * i); + } + memattr + } + + fn wait_ready(dev: &PanthorDevice, nr: u32) -> Result { + // @TODO: This is nasty but it's either this or adding bindings + // for iopoll.h + // Mimicking readl_relaxed_poll_timeout_atomic + let timeout_us: u64 = 100_000; + let delay_us: u64 = 10; + let delay_ns: u64 = delay_us * bindings::NSEC_PER_USEC as u64; + let addr = unsafe { (*dev.data().ptdev).iomem.add(mmu_as_status!(nr) as usize) }; + let mut left_ns: i64 = (timeout_us * bindings::NSEC_PER_USEC as u64).try_into().unwrap(); + let mut val: u32; + + loop { + val = unsafe {bindings::readl_relaxed(addr) }; + if val & AS_STATUS_ACTIVE == 0 || left_ns < 0 { + break; + } + unsafe { bindings::__udelay(delay_us) }; + left_ns -= delay_ns as i64 - 1; + // @TODO: cpu_relax : Really !!!!!?????? Revisit + unsafe { + asm!( + "dmb ish", + "yield", + "dmb ish", + options(nostack, preserves_flags) + ); + } + } + + if val & AS_STATUS_ACTIVE != 0 { + // @TODO: + // panthor_device_schedule_reset(ptdev); + drm_err!((*dev.data().ptdev).base, "AS_ACTIVE bit stuck\n"); + return Err(Error::from_errno(-(bindings::ETIMEDOUT as i32))); + } + + Ok(()) + } + + fn write_cmd(dev: &PanthorDevice, slot_id: u32, cmd: u32) -> Result { + let status = Self::wait_ready(dev, slot_id); + if status.is_ok() { + gpu_write(dev, (MMU_BASE + (slot_id << MMU_AS_SHIFT) + 0x18) as usize, cmd); + } + status + } + + fn lock_region(dev: &PanthorDevice, slot_id: u32, mut region_start: u64, size: u64) { + if size != 0 { + let region_end = region_start + size; + // The locked region is a naturally aligned power of 2 block encoded as + // log2 minus(1). + // Calculate the desired start/end and look for the highest bit which + // differs. The smallest naturally aligned block must include this bit + // change, the desired region starts with this bit (and subsequent bits) + // zeroed and ends with the bit (and subsequent bits) set to one. + let region_width: u64 = (core::cmp::max( + u64::BITS - (region_start ^ (region_end - 1)).leading_zeros(), + AS_LOCK_REGION_MIN_SIZE.ilog2() + ) - 1).into(); + // Mask off the low bits of region_start (which would be ignored by + // the hardware anyway) + region_start &= genmask!(63, region_width, u64); + let region = region_width | region_start; + // Lock the region that needs to be updated + let _ = as_lockaddr_write(dev, slot_id, region); + let _ = Self::write_cmd(dev, slot_id, AS_COMMAND_LOCK); + } + } + + fn hw_do_operation( + self: &Self, + dev: &PanthorDevice, + slot_id:u32, + iova: u64, + size: u64, + op: u32 + ) -> Result { + let as_ctx_locked = self.as_ctx.lock(); + + if AsContext::as_id_valid(slot_id as i32) { + // If the AS number is greater than zero, then we can be sure + // the device is up and running, so we don't need to explicitly + // power it up + if op != AS_COMMAND_UNLOCK { + Self::lock_region(dev, slot_id, iova, size); + } + // Run the MMU operation + Self::write_cmd(dev, slot_id, op)?; + // Wait for the flush to complete + Self::wait_ready(dev, slot_id) + } else { + Err(EINVAL) + } + } + + fn enable_interrupts(&self, dev: &PanthorDevice, slot_id: u32, mask: u32 ) { + gpu_write(dev, MMU_INT_CLEAR as usize, Self::as_fault_mask(slot_id)); + gpu_write(dev, MMU_INT_MASK as usize, mask); + } + + fn enable_as(&self, dev: &PanthorDevice, slot_id: u32, transtab: u64, transcfg: u64, memattr: u64) -> Result { + self.hw_do_operation(dev, slot_id, 0, !0, AS_COMMAND_FLUSH_MEM)?; + + as_transtab_write(dev, slot_id, transtab)?; + as_memattr_write (dev, slot_id, memattr)?; + as_transcfg_write(dev, slot_id, transcfg)?; + + Self::write_cmd(dev, slot_id, AS_COMMAND_UPDATE) + } + + fn disable_as(&self, dev: &PanthorDevice, slot_id: u32) -> Result { + self.hw_do_operation(dev, slot_id, 0, !0, AS_COMMAND_FLUSH_MEM)?; + + as_transtab_write(dev, slot_id, 0)?; + as_memattr_write (dev, slot_id, 0)?; + as_transcfg_write(dev, slot_id, 0)?; + + Self::write_cmd(dev, slot_id, AS_COMMAND_UPDATE) + } +} + +fn get_pgsize(addr: u64, size: usize) -> (usize, usize) { + // io-pgtable only operates on multiple pages within a single table + // entry, so we need to split at boundaries of the table size, i.e. + // the next block size up. The distance from address A to the next + // boundary of block size B is logically B - A % B, but in unsigned + // two's complement where B is a power of two we get the equivalence + // B - A % B == (B - A) % B == (n * B - A) % B, and choose n = 0 :) + let mut blk_offset: usize = ( addr.wrapping_neg() % SZ_2M as u64) as usize; + let count: usize; + + if blk_offset != 0 || size < SZ_2M as usize { + count = [blk_offset, size].into_iter().filter(|&x| x != 0).min().unwrap_or(0); + return (SZ_4K, count); + } + + if addr.wrapping_neg() % SZ_1G as u64 == 0 { + blk_offset = SZ_1G as usize; + } + count = core::cmp::min(blk_offset, size) / SZ_2M as usize; + return (SZ_2M as usize, count) +} + -- GitLab