From 313505cb1d6916d6506e99ee76f61622fe141a70 Mon Sep 17 00:00:00 2001
From: Beata Michalska <beata.michalska@arm.com>
Date: Mon, 10 Mar 2025 12:03:36 +0100
Subject: [PATCH] drm: panthor-rs: Add first phase for MMU related bits

Signed-off-by: Beata Michalska <beata.michalska@arm.com>
---
 drivers/gpu/drm/panthor-rs/driver.rs |   30 +-
 drivers/gpu/drm/panthor-rs/mmu.rs    | 1159 +++++++++++++++++++++++++-
 2 files changed, 1181 insertions(+), 8 deletions(-)

diff --git a/drivers/gpu/drm/panthor-rs/driver.rs b/drivers/gpu/drm/panthor-rs/driver.rs
index af95812d7fbd3..9c88a0606f437 100644
--- a/drivers/gpu/drm/panthor-rs/driver.rs
+++ b/drivers/gpu/drm/panthor-rs/driver.rs
@@ -3,13 +3,23 @@
 //! Top-level GPU driver implementation.
 
 use kernel::{
-    bindings, c_str, drm, drm::drv, drm::ioctl, error::Result, error::to_result, of, platform, prelude::*, sync::Arc,
+    bindings,
+    c_str,
+    drm, drm::drv, drm::ioctl,
+    error::Result, error::to_result,
+    new_mutex,
+    of,
+    platform,
+    prelude::*,
+    sync::{Arc, Mutex},
 };
 
 use core::ffi;
 use kernel::macros::vtable;
 use kernel::types::{ ARef, ForeignOwnable};
 
+use crate::mmu::Mmu;
+
 #[cfg(CONFIG_PM)]
 use kernel::runtime_dev_pm_ops;
 
@@ -41,6 +51,9 @@ pub(crate) struct PanthorData {
     #[pin]
     pub(crate) ptdev: *mut bindings::panthor_device,
     pub(crate) pdev: platform::Device,
+    // @TODO: Temporarily wrapped in Mutex & Option to support lazy init
+    #[pin]
+    pub(crate) mmu: Mutex<Option<Arc<Mmu>>>,
 }
 
 //HACK
@@ -178,11 +191,14 @@ impl platform::Driver for PanthorDriver {
 
         to_result(unsafe { panthor_init() })?;
 
-        let data = Arc::new(
-            PanthorData {
+        // This is needed due to moving the var to closure in try_pin_init
+        let pdev_c = pdev.clone();
+        let data = Arc::pin_init(
+            try_pin_init!( PanthorData {
                 ptdev: unsafe { bindings::panthor_device_alloc() },
-                pdev: pdev.clone(),
-            },
+                pdev: pdev_c,
+                mmu <- new_mutex!(None),
+            }),
             GFP_KERNEL,
         )?;
 
@@ -194,6 +210,10 @@ impl platform::Driver for PanthorDriver {
             panthor_device_init(data.ptdev);
         }
 
+        // Init MMU
+        let mut mmu_locked = data.mmu.lock();
+        mmu_locked.replace(Mmu::new(drm.clone())?);
+
         drm::drv::Registration::new_foreign_owned(drm.clone(), 0)?;
 
         dev_info!(pdev.as_ref(), "Probed!\n");
diff --git a/drivers/gpu/drm/panthor-rs/mmu.rs b/drivers/gpu/drm/panthor-rs/mmu.rs
index b47c26a91822d..27cc88e790b0d 100644
--- a/drivers/gpu/drm/panthor-rs/mmu.rs
+++ b/drivers/gpu/drm/panthor-rs/mmu.rs
@@ -1,11 +1,619 @@
  // SPDX-License-Identifier: GPL-2.0-only OR MIT
  //
-use core::marker::PhantomPinned;
+use core:: {
+    arch::asm,
+    marker::PhantomPinned,
+    ops::Range,
+    sync::atomic::{AtomicU32, Ordering}
+};
+
 use kernel:: {
     bindings,
-    prelude::*
+    c_str,
+    drm:: {gem::IntoGEMObject, gem::shmem::SGTable, gpuvm },
+    error::to_result,
+    io_pgtable::{ARM64LPAES1, FlushOps, IoPageTable},
+    list:: {List, ListArc, ListLinks},
+    new_mutex,
+    prelude::*,
+    sizes::SZ_4K,
+    sync::{Arc, lock::{ Guard, mutex::MutexBackend}, Mutex, UniqueArc},
+    types:: {ARef, ForeignOwnable, Opaque, ScopeGuard},
+    uapi
+};
+
+use crate::common::cold_path;
+use crate::driver:: { PanthorDevice, PanthorDevRef, PanthorDriver };
+use crate::regs::*;
+
+pub(crate) const SZ_2M: usize = bindings::SZ_2M as usize;
+pub(crate) const SZ_1G: usize = bindings::SZ_1G as usize;
+
+// Really.... ?
+use  uapi::{
+    drm_panthor_vm_bind_op_flags_DRM_PANTHOR_VM_BIND_OP_MAP_NOEXEC,
+    drm_panthor_vm_bind_op_flags_DRM_PANTHOR_VM_BIND_OP_MAP_UNCACHED,
+    drm_panthor_vm_bind_op_flags_DRM_PANTHOR_VM_BIND_OP_MAP_READONLY,
+    drm_panthor_vm_bind_op_flags_DRM_PANTHOR_VM_BIND_OP_TYPE_MAP,
+    drm_panthor_vm_bind_op_flags_DRM_PANTHOR_VM_BIND_OP_TYPE_UNMAP,
+    drm_panthor_vm_bind_op_flags_DRM_PANTHOR_VM_BIND_OP_TYPE_MASK,
+    drm_panthor_vm_bind_op_flags_DRM_PANTHOR_VM_BIND_OP_TYPE_SYNC_ONLY,
 };
-use crate::common::*;
+
+macro_rules! alias_op_map_flags{
+    ($flag:ident) => {
+        const $flag: u32 =
+            kernel::macros::concat_idents!(drm_panthor_vm_bind_op_flags_, $flag)
+            as u32;
+    }
+}
+
+alias_op_map_flags!(DRM_PANTHOR_VM_BIND_OP_MAP_NOEXEC);
+alias_op_map_flags!(DRM_PANTHOR_VM_BIND_OP_MAP_UNCACHED);
+alias_op_map_flags!(DRM_PANTHOR_VM_BIND_OP_MAP_READONLY);
+alias_op_map_flags!(DRM_PANTHOR_VM_BIND_OP_TYPE_MAP);
+alias_op_map_flags!(DRM_PANTHOR_VM_BIND_OP_TYPE_UNMAP);
+alias_op_map_flags!(DRM_PANTHOR_VM_BIND_OP_TYPE_MASK);
+alias_op_map_flags!(DRM_PANTHOR_VM_BIND_OP_TYPE_SYNC_ONLY);
+
+const PANTHOR_VM_MAP_FLAGS: u32 = (
+        DRM_PANTHOR_VM_BIND_OP_MAP_READONLY
+      | DRM_PANTHOR_VM_BIND_OP_MAP_NOEXEC
+      | DRM_PANTHOR_VM_BIND_OP_MAP_UNCACHED
+) as u32;
+
+const PANTHOR_VM_BIND_OP_MAP_FLAGS: u32 = (
+        DRM_PANTHOR_VM_BIND_OP_MAP_READONLY
+      | DRM_PANTHOR_VM_BIND_OP_MAP_NOEXEC
+      | DRM_PANTHOR_VM_BIND_OP_MAP_UNCACHED
+      | DRM_PANTHOR_VM_BIND_OP_TYPE_MASK
+) as u32;
+
+/// Handling drm_dev_[enter/exit]
+/// Currently applied to functions returning Result
+/// Requires specifying access to drm_device struct through capture
+macro_rules! drm_dev_guard {
+    (
+        $pub:vis fn $name:ident($($arg:ident: $arg_t:ty),*) -> Result $body:block
+        capture($capture_dev:tt)
+    ) => {
+            $pub fn $name($($arg: $arg_t),*) -> Result {
+                let mut cookie: i32 = 0;
+                let drm_dev = $capture_dev;
+
+                if  !unsafe {
+                    bindings::drm_dev_enter(drm_dev, &mut cookie as _)
+                } {
+                    return Err(EINVAL);
+                }
+
+                let _guard = ScopeGuard::new(|| {
+                    unsafe { bindings::drm_dev_exit(cookie) };
+                });
+
+
+                $body
+            }
+        }
+}
+
+// Convenience macro to get a hold of Mmu instace, which is currently
+// guarded by lock and wrapped in an option, to allow late initialization.
+// This is a very tempotary solution until all parts are properly plugged in.
+macro_rules! dev_get_mmu_locked {
+    ($dev: ident) => {
+        {
+            let dev_data = $dev.data();
+            let mmu_locked = dev_data.mmu.lock();
+            mmu_locked.as_ref().cloned().expect("Missing MMU context!")
+        }
+    }
+}
+
+// Convenience IDs for handling List nodes (type-checking)
+#[repr(u64)]
+enum ListID {
+    VmaListID = 0x100, // or whatever ...
+    VmListID  = 0x200,
+}
+
+#[derive(Default)]
+struct VmaCore {}
+
+#[pin_data]
+pub(crate) struct Vma {
+    // node: Deffered release
+    #[pin]
+    node: ListLinks<{ListID::VmaListID as u64}>,
+    gpuva: Pin<KBox<gpuvm::GpuVa<VmInner>>>,
+}
+
+impl Vma{
+    fn prot(flags: u32) -> u32 {
+        let mut prot: u32 = 0;
+
+        if flags & DRM_PANTHOR_VM_BIND_OP_MAP_NOEXEC as u32 > 0 {
+            prot |= bindings::IOMMU_NOEXEC;
+        }
+
+        if flags & DRM_PANTHOR_VM_BIND_OP_MAP_UNCACHED as u32 == 0 {
+            prot |= bindings::IOMMU_CACHE;
+        }
+
+        if flags & DRM_PANTHOR_VM_BIND_OP_MAP_READONLY as u32 > 0 {
+            prot |= bindings::IOMMU_READ;
+        } else {
+            prot |= bindings::IOMMU_READ | bindings::IOMMU_WRITE;
+        }
+
+        prot
+    }
+}
+
+kernel::list::impl_has_list_links! {
+    impl HasListLinks<{ListID::VmaListID as u64}> for Vma { self.node }
+}
+
+kernel::list::impl_list_arc_safe! {
+    impl ListArcSafe<{ListID::VmaListID as u64}> for Vma { untracked; }
+}
+
+kernel::list::impl_list_item! {
+    impl ListItem<{ListID::VmaListID as u64}> for Vma { using ListLinks; }
+}
+
+impl gpuvm::DriverGpuVa for VmaCore {}
+
+#[derive(Default)]
+#[pin_data]
+struct MapCtx {
+    vm_bo: Option<ARef<gpuvm::GpuVmBo<VmInner>>>,
+    bo_offset: u64,
+    #[pin]
+    sgt: Option<SGTable<crate::gem::DriverObject>>,
+    new_vma: Option<KBox<Vma>>
+}
+
+#[pin_data]
+#[repr(C)]
+struct As {
+    active_count: Opaque<bindings::refcount_t>,
+    id: i32,
+    unhandled_faults: bool,
+}
+
+struct VmInner {
+    dev: PanthorDevRef,
+    core: Arc<Mutex<VmCore>>,
+}
+
+#[pin_data]
+pub(crate) struct VmOpCtx {
+    rsvd_page_tables: KMemSet,
+    preallocated_vmas: KVec<Pin<KBox<gpuvm::GpuVa<VmInner>>>>,
+    flags: u32,
+    va: Range<u64>,
+    // @TODO: TBD
+    returned_vmas:  List<Vma, {ListID::VmaListID as u64}>,
+    #[pin]
+    map: Mutex<MapCtx>,
+}
+
+impl VmOpCtx {
+    fn prealloc_vmas(
+        flags: u32
+    ) -> Result<KVec<Pin<KBox<gpuvm::GpuVa<VmInner>>>>> {
+        let count = match flags & DRM_PANTHOR_VM_BIND_OP_TYPE_MASK {
+            DRM_PANTHOR_VM_BIND_OP_TYPE_MAP   => 3,
+            DRM_PANTHOR_VM_BIND_OP_TYPE_UNMAP => 1,
+            _ => return Err(EINVAL),
+        };
+
+        let mut vmas = KVec::with_capacity(count as usize, GFP_KERNEL)?;
+
+        for _ in 0..count {
+            vmas.push(
+                gpuvm::GpuVa::<VmInner>::new(init::default())?,
+                GFP_KERNEL
+            )?;
+        }
+        Ok(vmas)
+    }
+
+    fn new(
+        vm: &Vm,
+        bo: &Option<crate::gem::ObjectRef>,
+        cache: &KMemCache,
+        offset: u64,
+        size: u64,
+        va: u64,
+        flags: u32
+    ) -> Result<Pin<KBox<Self>>> {
+
+        let mut vm_bo = None;
+        let mut sgt = None;
+
+        let mut pin_guard = None;
+
+        let (op_flags, pt_count) =
+            match flags & DRM_PANTHOR_VM_BIND_OP_TYPE_MASK {
+                DRM_PANTHOR_VM_BIND_OP_TYPE_MAP => {
+                    if (flags & !PANTHOR_VM_BIND_OP_MAP_FLAGS) != 0 {
+                        return Err(EINVAL);
+                    }
+
+                    match bo {
+                        Some(bo) => {
+                            // Make sure the VA and size are aligned and in-bounds.
+                            if size  > TryInto::<u64>::try_into(bo.size())?
+                            || offset > TryInto::<u64>::try_into(bo.size())? - size {
+                                return Err(EINVAL);
+                            }
+
+                            let mut inner = vm.inner.exec_lock(Some(&bo.gem))?;
+
+                            // @TODO:
+                            // If the BO has an exclusive VM attached,
+                            // it can't be mapped to other VMs.
+
+                            if bo.gem.gem_obj().import_attach.is_null()
+                                && inner.find_bo().is_none(){
+                                // Pre-reserve the BO pages, so the map
+                                // operation doesn't have to allocate
+                                unsafe {
+                                    to_result(
+                                        bindings::drm_gem_shmem_pin(
+                                            bo.gem.gem_obj() as *const _ as *mut _
+                                        )
+                                    )?
+                                };
+                                pin_guard = Some(ScopeGuard::new(|| {
+                                    unsafe {
+                                        bindings::drm_gem_shmem_unpin(
+                                            bo.gem.gem_obj() as *const _ as *mut _
+                                        )
+                                    };
+                                }));
+                            }
+                            sgt = Some(bo.gem.sg_table()?);
+
+                            vm_bo = Some(inner.obtain_bo()?);
+                        },
+                        None => return Err(EINVAL),
+                    }
+
+                    // @TODO:
+                    // If the BO has an exclusive VM attached, it can't be mapped to other VMs.
+                    // if (bo->exclusive_vm_root_gem &&
+                    // bo->exclusive_vm_root_gem != panthor_vm_root_gem(vm)) -> EINVAL
+                    // if (!bo->base.base.import_attach)
+                    //  ret = drm_gem_shmem_pin(&bo->base);
+
+                    // L1, L2 and L3 page tables.
+                    //  We could optimize L3 allocation by iterating over
+                    //  the sgt and merging 2M contiguous blocks, but it's
+                    //  simpler to over-provision and return the pages if
+                    //  they're not used.
+                    let count =
+                        (
+                            align!(va + size, bit!(39, u64), u64)
+                            - align_down!(va, bit!(39, u64), u64)
+                        ) >> 39
+                        + (
+                            align!(va + size, bit!(30, u64), u64)
+                            - align_down!(va, bit!(30, u64), u64)
+                          ) >> 30
+                        + (
+                            align!(va + size, bit!(21, u64), u64)
+                            - align_down!(va, bit!(21, u64), u64)
+                          ) >> 21;
+
+                    (flags, count)
+                },
+
+                DRM_PANTHOR_VM_BIND_OP_TYPE_UNMAP => {
+                    let mut count = 0;
+                    // Pre-allocate L3 page tables to account for
+                    // the split-2M-block situation on unmap.
+                    if va != align!(va, SZ_2M, u64) {
+                        count += 1;
+                    }
+                    if va + size != align!(va + size, SZ_2M, u64)
+                        && align!(va + size, SZ_2M, u64) != align!(va, SZ_2M, u64) {
+                            count += 1;
+                    }
+                    (DRM_PANTHOR_VM_BIND_OP_TYPE_UNMAP, count)
+                },
+
+                DRM_PANTHOR_VM_BIND_OP_TYPE_SYNC_ONLY => {
+                    (DRM_PANTHOR_VM_BIND_OP_TYPE_SYNC_ONLY, 0)
+                }
+                _ => return Err(EINVAL),
+            };
+
+        let ctx = KBox::pin_init(try_pin_init!(Self {
+            rsvd_page_tables : unsafe {
+                Pin::new_unchecked(cache).alloc_bulk(pt_count.try_into()?)?
+            },
+            preallocated_vmas: Self::prealloc_vmas(flags)?,
+            flags: op_flags,
+            va: Range{start: va,end: va + size},
+            returned_vmas: List::new(),
+            map <- new_mutex!(MapCtx {
+                vm_bo: vm_bo,
+                bo_offset: 0,
+                sgt: sgt,
+                new_vma : None,
+            }),
+        }), GFP_KERNEL)?;
+
+        // @TODO: Add vm_bo to extobj list: drm_gpuvm_bo_extobj_add
+
+        // All good so dismiss the guard
+        if let Some(pin_guard)  = pin_guard {
+            pin_guard.dismiss();
+        }
+
+        Ok(ctx)
+    }
+}
+
+impl VmInner {
+    fn map_pages(
+        &mut self,
+        iova: u64,
+        prot: u32,
+        sgt: &SGTable<crate::gem::DriverObject>,
+        offset: u64,
+        mut size: usize
+    ) -> Result<u64> {
+
+        if size == 0 {
+            return Err(EINVAL);
+        };
+
+        //@TODO: This is a mess: different frameworks deal with different types
+        let mut offset: usize = offset as usize;
+        let mut iova:   usize = iova as usize;
+
+        let start_iova = iova;
+
+        for range in sgt.iter() {
+            let mut paddr = range.dma_address();
+            let mut len = range.dma_len();
+
+            if len <= offset {
+                offset -=len;
+                continue;
+            }
+
+            paddr += offset;
+            len -= offset;
+            len = core::cmp::min(len, size);
+            size -= len;
+
+            drm_dbg!(self.dev.as_raw(), "map: as={}, iova={}, paddr={}, len={}",
+                     self.core.lock().r#as.id, iova, paddr, len);
+
+            while len > 0 {
+                let (pgsize, pgcount) = get_pgsize((iova | paddr).try_into().unwrap(), len);
+
+                let mapped = self.core.lock().pg_tlb.map_pages(iova, paddr, pgsize, pgcount, prot)?;
+
+                if mapped != 0 {
+                    iova += mapped;
+                    paddr += mapped;
+                    len -= mapped;
+                } else {
+                    if drm_warn_on!(self.dev.as_raw(), mapped == 0) {
+                        // If something failed, unmap what we've already mapped before
+                        // returning. The unmap call is not supposed to fail.
+                        drm_warn_on!(self.dev.as_raw(),
+                                     self.unmap_pages(
+                                         sgt,
+                                         start_iova.try_into().unwrap(),
+                                         (iova - start_iova).try_into().unwrap()
+                                         ).is_err()
+                                     );
+
+                        return Err(ENOMEM);
+                    }
+                }
+            }
+
+            if size == 0{
+                break;
+            }
+            offset = 0;
+        }
+
+        // @TODO: Another mess ...
+        self.flush_range(start_iova.try_into()?,
+                         (iova - start_iova).try_into()?)?;
+        Ok((iova - start_iova).try_into()?)
+    }
+
+    fn unmap_pages(
+        &mut self,
+        sgt: &SGTable<crate::gem::DriverObject>,
+        iova: u64,
+        size: u64
+    ) -> Result<u64> {
+
+        drm_dbg!(self.dev.as_raw(), "unmap: as={}, iova={}, len={}",
+                 self.core.lock().r#as.id, iova, size);
+
+        let mut offset: usize = 0;
+        let size:       usize = size as usize;
+
+        while offset < size {
+            let (pgsize, pgcount) = get_pgsize(iova + offset as u64, (size - offset) as usize);
+            let unmapped;
+
+            unmapped = self.core.lock().pg_tlb.unmap_pages(iova as usize + offset, pgsize, pgcount);
+
+            if drm_warn_on!(self.dev.as_raw(), unmapped != pgsize * pgcount) {
+                drm_err!(self.dev.as_raw(), "failed to unmap range {}-{} (requested range {} - {})\n",
+                        iova as usize + offset + unmapped,
+                        iova as usize + offset + pgsize * pgcount,
+                        iova,
+                        iova as usize + size);
+                self.flush_range(iova, (offset + unmapped).try_into()?)?;
+                return Err(EINVAL);
+            }
+            offset += unmapped;
+
+        }
+        self.flush_range(iova, size as u64)?;
+        Ok(0)
+    }
+
+    drm_dev_guard!{
+        fn flush_range(self: &Self, iova: u64, size: u64) -> Result {
+            let dev = &self.dev;
+            let mmu_locked = dev_get_mmu_locked!(dev);
+
+            mmu_locked.hw_do_operation(
+                 &self.dev,
+                 self.core.lock().r#as.id as u32,
+                 iova,
+                 size,
+                 AS_COMMAND_FLUSH_PT.into())
+        }
+        capture({self.dev.as_raw()})
+    }
+}
+
+impl gpuvm::DriverGpuVm for VmInner{
+    type Driver = PanthorDriver;
+    type GpuVa  = VmaCore;
+    type StepContext = VmOpCtx;
+
+    fn step_map(
+        self: &mut gpuvm::UpdatingGpuVm<'_, Self>,
+          op: &mut gpuvm::OpMap<Self>,
+         ctx: &mut Self::StepContext,
+    ) -> Result {
+        // Get available vma
+        let vma = ctx.preallocated_vmas.swap_remove(
+            ctx.preallocated_vmas.len() - 1
+        );
+
+        let mut map_locked = ctx.map.lock();
+        self.map_pages(
+            op.addr(),
+            Vma::prot(ctx.flags & PANTHOR_VM_MAP_FLAGS),
+            map_locked.sgt.as_ref().expect("No SGTable available for setp_map"),
+            op.offset(),
+            op.range() as usize
+        )?;
+
+        if op.map_and_link_va(
+            self,
+            vma,
+            map_locked.vm_bo.as_ref().expect("Missing GpuVmBo object for step_map")
+        ).is_err() {
+            return Err(EINVAL);
+        }
+        // @TODO: Make sure this is safe
+        map_locked.vm_bo = None;
+        Ok(())
+    }
+
+    fn step_unmap (
+        self: &mut gpuvm::UpdatingGpuVm<'_, Self>,
+          op: &mut gpuvm::OpUnMap<Self>,
+         ctx: &mut Self::StepContext,
+    ) -> Result {
+
+        let map_locked = ctx.map.lock();
+        let va = op.va().expect("No va specified for step_unmap");
+        self.unmap_pages(
+            map_locked.sgt.as_ref().expect("No SGTable available for setp_unmap"),
+            va.addr(),
+            va.range()
+        )?;
+
+        //Safety: the ownership is transfered here and the va is guaranteed to be valid
+        match op.unmap_and_unlink_va() {
+            Some(va) => {
+                let vma = UniqueArc::pin_init(pin_init!(
+                    Vma {
+                        node <- ListLinks::new(),
+                        gpuva: va,
+                    }
+                ), GFP_KERNEL)?;
+                ctx.returned_vmas.push_back(ListArc::from(vma));
+            },
+            None => {},
+        }
+        Ok(())
+    }
+
+    fn step_remap(
+        self: &mut gpuvm::UpdatingGpuVm<'_, Self>,
+          op: &mut gpuvm::OpReMap<Self>,
+         ctx: &mut Self::StepContext,
+    ) -> Result {
+
+        let unmap_range = Range {
+            start : match op.prev_map() {
+                Some(prev_map) => prev_map.addr() + prev_map.range(),
+                None => op.unmap().va().expect("Missing va for unmap op").addr(),
+            },
+            end : match op.next_map() {
+                Some(next_map) => next_map.addr(),
+                None => {
+                    let va = op.unmap().va().expect("Missing va for unmap op");
+                    va.addr() + va.range()
+                }
+            }
+        };
+
+        let map_locked = ctx.map.lock();
+        self.unmap_pages(
+            map_locked.sgt.as_ref().expect("No SGTable available for setp_remap"),
+            unmap_range.start,
+            unmap_range.end - unmap_range.start
+        )?;
+
+        op.unmap().unmap_and_unlink_va();
+
+        let prev_va = op.prev_map().as_ref().map(
+            |v| ctx.preallocated_vmas.swap_remove(ctx.preallocated_vmas.len()-1)
+        );
+        let next_va = op.next_map().as_ref().map(
+            |v| ctx.preallocated_vmas.swap_remove(ctx.preallocated_vmas.len()-1)
+        );
+
+        op.remap(prev_va, next_va);
+
+        Ok(())
+    }
+}
+
+// Dummy TLB ops
+// @TODO: Decide which type should actually implement those
+impl FlushOps for VmInner {
+    type Data = ();
+
+    fn tlb_flush_all(
+        _data: <Self::Data as ForeignOwnable>::Borrowed<'_>
+    ) {}
+
+    fn tlb_flush_walk(
+        _data: <Self::Data as ForeignOwnable>::Borrowed<'_>,
+        _iova: usize,
+        _size: usize,
+        _granule: usize
+    ) {}
+
+    fn tlb_add_page(
+        _data: <Self::Data as ForeignOwnable>::Borrowed<'_>,
+        _iova: usize,
+        _granule: usize
+    ) {}
+}
 
 /// Thin wrapper for kmem_cache
 pub(crate) struct KMemCache {
@@ -110,3 +718,548 @@ impl Drop for KMemCache {
     }
 }
 
+/// Common data extracted and shared between Vm and VmInner to avoid
+/// cumbersome tricks to make one or the other access the data
+/// It just overall simplifies things
+struct VmCore {
+    r#as: As,
+    pg_tlb: ARM64LPAES1<VmInner>,
+}
+
+#[pin_data]
+pub(crate) struct Vm {
+    core: Arc<Mutex<VmCore>>,
+    inner: ARef<gpuvm::GpuVm<VmInner>>,
+    #[pin]
+    node: ListLinks<{ListID::VmListID as u64}>,
+    unusable: bool,
+    mcu_target: bool,
+    memattr: u64,
+}
+
+impl Vm {
+    fn has_unhandled_faults(&self) -> bool {
+        self.core.lock().r#as.unhandled_faults
+    }
+
+    fn is_unusable(&self) -> bool {
+        self.unusable
+    }
+
+    drm_dev_guard! {
+    pub(crate) fn set_active(self: &mut Self, dev: &PanthorDevice) -> Result {
+
+        let dev_data = dev.data();
+        let mmu_locked = dev_get_mmu_locked!(dev);
+        //@TODO: FIXME - revisit the refcount and locking
+        let mut as_context: Guard<'_, AsContext, MutexBackend>;
+        let mut as_id;
+
+        with_lock!({
+            if unsafe {
+                bindings::refcount_inc_not_zero(vm_core.r#as.active_count.get())
+            } {
+                return Err(EINVAL);
+            }
+
+            as_context = mmu_locked.as_ctx.lock();
+
+            if unsafe {
+                bindings::refcount_inc_not_zero(vm_core.r#as.active_count.get())
+            } {
+                return Err(EINVAL);
+            }
+            as_id = vm_core.r#as.id;
+
+        } lock(vm_core <- self.core));
+
+        let alloc_mask = as_context.alloc_mask.load(Ordering::Relaxed);
+
+        loop {
+            if as_id < 0 {
+                // Check for free slot
+                as_id = if self.mcu_target {
+                    drm_warn_on!(dev.as_raw(), alloc_mask & bit!(0, u32) != 0);
+                    0
+                } else {
+                     (!alloc_mask | bit!(0, u32))
+                         .trailing_zeros()
+                         .try_into()?
+                };
+
+                if bit!(as_id, u32)
+                    & unsafe { (*dev_data.ptdev).gpu_info.as_present}
+                    == 0
+                {
+                    as_id = as_context.get_free_slot(dev)? as i32;
+                }
+
+                self.core.lock().r#as.id = as_id;
+                as_context.reserve_slot(self, as_id as u32);
+
+            } else {
+                if as_context.faulty_mask
+                    & Mmu::as_fault_mask(as_id as u32)
+                    == 0
+                {
+                    break;
+                }
+            }
+
+            let va_bits = unsafe {
+                Mmu::get_mmu_features_va_bits(
+                    (*dev_data.ptdev).gpu_info.mmu_features
+                )
+            };
+            let transtab: u64 = unsafe {
+                (*self.core.lock().pg_tlb
+                    .raw_cfg()).__bindgen_anon_1.arm_lpae_s1_cfg.ttbr
+            };
+
+            let mut transcfg = (AS_TRANSCFG_PTW_MEMATTR_WB
+                            | AS_TRANSCFG_PTW_RA
+                            | AS_TRANSCFG_ADRMODE_AARCH64_4K) as u64
+                            | as_transcfg_ina_bits((55 - va_bits).into());
+
+            if unsafe { (*dev_data.ptdev).coherent } {
+                transcfg |= AS_TRANSCFG_PTW_SH_OS as u64;
+            }
+
+            /* If the VM is re-activated, we clear the fault. */
+            self.core.lock().r#as.unhandled_faults = false;
+
+            as_context.clear_fault(as_id as u32);
+            mmu_locked.enable_interrupts(dev, as_id as u32, !as_context.faulty_mask);
+            mmu_locked.enable_as(dev, as_id as u32, transtab, transcfg, self.memattr)?;
+
+            break
+        }
+
+        // Activate VM
+        unsafe {
+             bindings::refcount_set(self.core.lock().r#as.active_count.get(), 1);
+             as_context.lru_list.remove(self);
+        }
+
+        Ok(())
+    } capture( {dev.as_raw()} ) }
+
+    /// This takes ownership of Arc<Self>
+    pub(crate) fn mark_idle(self: Arc<Self>, dev: &PanthorDevice) {
+        let anchor = self.core.clone();
+        let vm_core = anchor.lock();
+
+        if !unsafe {
+                bindings::refcount_dec_and_test(vm_core.r#as.active_count.get())
+        } {
+            return;
+        }
+
+        let mmu_locked = dev_get_mmu_locked!(dev);
+        let mut as_context = mmu_locked.as_ctx.lock();
+
+        if !drm_warn_on!(dev.as_raw(), vm_core.r#as.id == -1) {
+            if let Some(unique_self) =  self.into_unique_or_drop() {
+                as_context.lru_list.push_back(
+                    ListArc::<Self,{ListID::VmListID as u64}>:: from(unique_self)
+                );
+            } else {
+                drm_err!(dev.as_raw(), "Mismatched ref counting on VM object");
+            }
+        }
+
+        unsafe {
+            bindings::refcount_set(vm_core.r#as.active_count.get(), 0);
+        }
+
+    }
+
+    /// Mark VM as unused
+    fn release(&self, dev: &PanthorDevice) {
+        let mut vm_core = self.core.lock();
+
+        if drm_warn_on!(dev.as_raw(), vm_core.r#as.id < 0) {
+            return;
+        }
+        unsafe { bindings::refcount_set(vm_core.r#as.active_count.get(), 0) };
+        vm_core.r#as.id = -1;
+    }
+}
+
+kernel::list::impl_has_list_links! {
+    impl HasListLinks<{ListID::VmListID as u64}> for Vm { self.node }
+}
+
+kernel::list::impl_list_arc_safe! {
+    impl ListArcSafe<{ListID::VmListID as u64}> for Vm { untracked; }
+}
+
+kernel::list::impl_list_item! {
+    impl ListItem<{ListID::VmListID as u64}> for Vm { using ListLinks; }
+}
+
+const MAX_AS_SLOTS: usize = 32;
+/// AddressSpace Context
+/// @alloc_mask: Bitmask encoding the allocated slots.
+/// @faulty_mask: Bitmask encoding the faulty slots.
+/// @slots: VMs currently bound to the AS slots.
+struct AsContext {
+    alloc_mask: AtomicU32,
+    faulty_mask: u32,
+    slots:  KVec<Arc<Mutex<VmCore>>>,
+    lru_list: List<Vm, {ListID::VmListID as u64}>,
+}
+
+macro_rules! mmu_as {
+    ($as_nr:expr) => {
+        (MMU_BASE + (($as_nr) << MMU_AS_SHIFT)) as usize
+    }
+}
+
+struct AsReg {
+    addr_lo: usize,
+    addr_hi: usize,
+}
+
+impl AsReg {
+    fn new(addr: usize, offset_lo: usize, offset_hi: usize) -> Self {
+       Self{ addr_lo: addr + offset_lo, addr_hi: addr + offset_hi }
+    }
+
+    fn write(&self, dev: &PanthorDevice, value: u64) -> Result {
+        // @TODO: Move to proper iomem
+        gpu_write(dev, self.addr_lo, (value & genmask!(31, 0, u64)) as u32);
+        gpu_write(dev, self.addr_hi, (value >> 32) as u32);
+        Ok(())
+    }
+}
+
+#[inline]
+fn as_transtab_write(dev: &PanthorDevice, slot_id: u32, value: u64) -> Result {
+    AsReg::new(mmu_as!(slot_id), 0x00, 0x04).write(dev, value)
+}
+
+#[inline]
+fn as_memattr_write(dev: &PanthorDevice, slot_id: u32, value: u64) -> Result {
+    AsReg::new(mmu_as!(slot_id), 0x08, 0x0C).write(dev, value)
+}
+
+#[inline]
+fn as_transcfg_write(dev: &PanthorDevice, slot_id: u32, value: u64) -> Result {
+    AsReg::new(mmu_as!(slot_id), 0x30, 0x34).write(dev, value)
+}
+
+#[inline]
+fn as_lockaddr_write(dev: &PanthorDevice, slot_id: u32, value: u64) -> Result {
+    AsReg::new(mmu_as!(slot_id), 0x10, 0x14).write(dev, value)
+}
+
+impl AsContext {
+    #[inline]
+    fn as_id_valid(id: i32) -> bool {
+        id >= 0
+    }
+
+    fn get_free_slot(&mut self, dev: &PanthorDevice) -> Result<u32> {
+        match self.lru_list.pop_front() {
+            Some(vm) => {
+                let vm_core = vm.core.lock();
+                let slot_id: u32 = vm_core.r#as.id as u32;
+
+                drm_warn_on!(dev.as_raw(),
+                        unsafe {
+                            bindings::refcount_read(vm_core.r#as.active_count.get())
+                        } != 0);
+
+                vm.as_arc().release(dev);
+                self.release_slot(slot_id);
+                Ok(slot_id)
+            }
+            None =>  Err(ENOENT)
+        }
+    }
+
+    #[inline]
+    fn reserve_slot(&mut self, vm: &Vm, slot_id: u32) {
+        self.alloc_mask.fetch_or(slot_id, Ordering::Relaxed);
+        self.slots[slot_id as usize] = vm.core.clone();
+
+    }
+
+    #[inline]
+    fn release_slot(&mut self, slot_id: u32) {
+        self.alloc_mask.fetch_nand(slot_id, Ordering::Relaxed);
+        self.slots.swap_remove(slot_id as usize);
+    }
+
+    #[inline]
+    fn clear_fault(&mut self, slot_id: u32) {
+        self.faulty_mask &= !Mmu::as_fault_mask(slot_id)
+    }
+}
+
+#[pin_data]
+pub(crate) struct Mmu {
+    pt_cache: Pin<KBox<KMemCache>>,
+    #[pin]
+    as_ctx: Mutex<AsContext>
+}
+
+macro_rules! mmu_as_status {
+    ($as_nr:expr) => {
+        mmu_as!($as_nr) + 0x28
+    }
+}
+
+const AS_STATUS_ACTIVE: u32 = bit!(0, u32);
+
+// @TODO: Drop when moved to Rust iomem
+fn gpu_write(dev: &PanthorDevice, reg: usize, cmd: u32) {
+    unsafe { bindings::writel(cmd, (*dev.data().ptdev).iomem.add(reg)) }
+}
+
+impl Mmu {
+    pub(crate) fn new(dev: PanthorDevRef) -> Result<Arc<Self>> {
+        let mut va_bits = unsafe {
+            Self::get_mmu_features_va_bits((*dev.data().ptdev).gpu_info.mmu_features)
+        };
+
+        // @TODO: Move to rust once the gpuinfo is ready
+        if bindings::__BITS_PER_LONG < va_bits {
+            va_bits = unsafe { (*dev.data().ptdev).gpu_info.mmu_features };
+            va_bits &= !genmask!(7, 0, u32);
+            va_bits |= bindings::__BITS_PER_LONG;
+
+            unsafe {
+                 (*dev.data().ptdev).gpu_info.mmu_features = va_bits;
+            }
+        }
+
+        Arc::pin_init(
+            try_pin_init!( Self {
+                    pt_cache: KBox::pin(
+                                  KMemCache::new(
+                                      c_str!("panthor-mmu-pt"),
+                                      SZ_4K as u32,
+                                      SZ_4K as u32,
+                                      0
+                                  )?,
+                                  GFP_KERNEL
+                                )?,
+                    as_ctx <- new_mutex!(AsContext{
+                        alloc_mask: AtomicU32::new(0),
+                        faulty_mask: 0,
+                        slots: KVec::<Arc<Mutex<VmCore>>>::with_capacity(
+                            MAX_AS_SLOTS, GFP_KERNEL
+                        )?,
+                        lru_list: List::new()
+                    })
+                }
+            ), GFP_KERNEL )
+    }
+
+    #[inline]
+    fn get_mmu_features_va_bits(features: u32) -> u32 {
+        features & genmask!(7, 0, u32)
+    }
+
+    #[inline]
+    fn get_mmu_features_pa_bits(features: u32) -> u32 {
+        (features >> 8 ) & genmask!(7, 0, u32)
+    }
+
+    #[inline]
+    fn as_fault_mask(id: u32) -> u32 {
+        bit!(id, u32)
+    }
+
+    #[inline]
+    fn access_type_name(fault_status: u32) -> &'static str {
+        match fault_status & AS_FAULTSTATUS_ACCESS_TYPE_MASK {
+            AS_FAULTSTATUS_ACCESS_TYPE_ATOMIC   => "ATOMIC",
+            AS_FAULTSTATUS_ACCESS_TYPE_READ     => "READ",
+            AS_FAULTSTATUS_ACCESS_TYPE_WRITE    => "WRITE",
+            AS_FAULTSTATUS_ACCESS_TYPE_EX       => "EXECUTE",
+            _                                   => "UNKNOWN"
+        }
+    }
+
+    fn mair_to_memattr(mair: u64) -> u64 {
+        let mut memattr: u64 = 0;
+
+        for i in 0..8 {
+            let in_attr: u8 = (mair >> (8 * 1)) as u8;
+            let out_attr: u8;
+            let (inner, outer) = (in_attr & 0xf, in_attr >> 4);
+
+            // For caching to be enabled, inner and outer caching policy
+            // have to be both write-back, if one of them is write-through
+            // or non-cacheable, we just choose non-cacheable. Device
+            // memory is also translated to non-cacheable.
+            if (outer & 3) == 0 || (outer & 4) == 0 || (inner & 4) == 0 {
+                out_attr = (
+                        AS_MEMATTR_AARCH64_INNER_OUTER_NC
+                        | AS_MEMATTR_AARCH64_SH_MIDGARD_INNER
+                        | as_memattr_aarch64_inner_alloc_expl!(false, false)
+                ).try_into().unwrap();
+            } else {
+                // Use SH_CPU_INNER mode so SH_IS, which is used when
+                // IOMMU_CACHE is set, actually maps to the standard
+                // definition of inner-shareable and not Mali's
+                // internal-shareable mode.
+                out_attr = (
+                        AS_MEMATTR_AARCH64_INNER_OUTER_WB
+                        | AS_MEMATTR_AARCH64_SH_CPU_INNER
+                        | as_memattr_aarch64_inner_alloc_expl!(inner & 1 != 0,
+                                                              inner & 2 != 0)
+                ).try_into().unwrap();
+            }
+            memattr |= (out_attr as u64) << (8 * i);
+        }
+        memattr
+    }
+
+    fn wait_ready(dev: &PanthorDevice, nr: u32) -> Result {
+        // @TODO: This is nasty but it's either this or adding bindings
+        // for iopoll.h
+        // Mimicking readl_relaxed_poll_timeout_atomic
+        let timeout_us: u64 = 100_000;
+        let delay_us: u64 = 10;
+        let delay_ns: u64 = delay_us * bindings::NSEC_PER_USEC as u64;
+        let addr = unsafe { (*dev.data().ptdev).iomem.add(mmu_as_status!(nr) as usize) };
+        let mut left_ns: i64 = (timeout_us * bindings::NSEC_PER_USEC as u64).try_into().unwrap();
+        let mut val: u32;
+
+        loop {
+            val = unsafe {bindings::readl_relaxed(addr) };
+            if val & AS_STATUS_ACTIVE == 0 || left_ns < 0 {
+                break;
+            }
+            unsafe { bindings::__udelay(delay_us) };
+            left_ns -= delay_ns as i64 - 1;
+            // @TODO: cpu_relax : Really !!!!!?????? Revisit
+            unsafe {
+                 asm!(
+                     "dmb ish",
+                     "yield",
+                     "dmb ish",
+                     options(nostack, preserves_flags)
+                );
+            }
+        }
+
+        if val & AS_STATUS_ACTIVE != 0 {
+            // @TODO:
+            // panthor_device_schedule_reset(ptdev);
+            drm_err!((*dev.data().ptdev).base, "AS_ACTIVE bit stuck\n");
+            return Err(Error::from_errno(-(bindings::ETIMEDOUT as i32)));
+        }
+
+        Ok(())
+    }
+
+    fn write_cmd(dev: &PanthorDevice, slot_id: u32, cmd: u32) -> Result {
+        let status = Self::wait_ready(dev, slot_id);
+        if status.is_ok() {
+            gpu_write(dev, (MMU_BASE + (slot_id << MMU_AS_SHIFT) + 0x18) as usize, cmd);
+        }
+        status
+    }
+
+    fn lock_region(dev: &PanthorDevice, slot_id: u32, mut region_start: u64, size: u64) {
+        if size != 0 {
+            let region_end = region_start + size;
+            // The locked region is a naturally aligned power of 2 block encoded as
+            // log2 minus(1).
+            // Calculate the desired start/end and look for the highest bit which
+            // differs. The smallest naturally aligned block must include this bit
+            // change, the desired region starts with this bit (and subsequent bits)
+            // zeroed and ends with the bit (and subsequent bits) set to one.
+            let region_width: u64 = (core::cmp::max(
+                     u64::BITS - (region_start ^ (region_end - 1)).leading_zeros(),
+                     AS_LOCK_REGION_MIN_SIZE.ilog2()
+            ) - 1).into();
+            // Mask off the low bits of region_start (which would be ignored by
+            // the hardware anyway)
+            region_start &= genmask!(63, region_width, u64);
+            let region = region_width | region_start;
+            // Lock the region that needs to be updated
+            let _ = as_lockaddr_write(dev, slot_id, region);
+            let _ = Self::write_cmd(dev, slot_id, AS_COMMAND_LOCK);
+        }
+    }
+
+    fn hw_do_operation(
+        self: &Self,
+        dev: &PanthorDevice,
+        slot_id:u32,
+        iova: u64,
+        size: u64,
+        op: u32
+    ) -> Result {
+        let as_ctx_locked = self.as_ctx.lock();
+
+        if AsContext::as_id_valid(slot_id as i32) {
+            // If the AS number is greater than zero, then we can be sure
+            // the device is up and running, so we don't need to explicitly
+            // power it up
+            if op != AS_COMMAND_UNLOCK {
+                Self::lock_region(dev, slot_id, iova, size);
+            }
+            // Run the MMU operation
+            Self::write_cmd(dev, slot_id, op)?;
+            // Wait for the flush to complete
+            Self::wait_ready(dev, slot_id)
+        } else {
+            Err(EINVAL)
+        }
+    }
+
+    fn enable_interrupts(&self, dev: &PanthorDevice, slot_id: u32, mask: u32 ) {
+        gpu_write(dev, MMU_INT_CLEAR as usize, Self::as_fault_mask(slot_id));
+        gpu_write(dev, MMU_INT_MASK as usize, mask);
+    }
+
+    fn enable_as(&self, dev: &PanthorDevice, slot_id: u32, transtab: u64, transcfg: u64, memattr: u64) -> Result {
+        self.hw_do_operation(dev, slot_id, 0, !0, AS_COMMAND_FLUSH_MEM)?;
+
+        as_transtab_write(dev, slot_id, transtab)?;
+        as_memattr_write (dev, slot_id, memattr)?;
+        as_transcfg_write(dev, slot_id, transcfg)?;
+
+        Self::write_cmd(dev, slot_id, AS_COMMAND_UPDATE)
+    }
+
+    fn disable_as(&self, dev: &PanthorDevice, slot_id: u32) -> Result {
+        self.hw_do_operation(dev, slot_id, 0, !0, AS_COMMAND_FLUSH_MEM)?;
+
+        as_transtab_write(dev, slot_id, 0)?;
+        as_memattr_write (dev, slot_id, 0)?;
+        as_transcfg_write(dev, slot_id, 0)?;
+
+        Self::write_cmd(dev, slot_id, AS_COMMAND_UPDATE)
+    }
+}
+
+fn get_pgsize(addr: u64, size: usize) -> (usize, usize) {
+    // io-pgtable only operates on multiple pages within a single table
+    // entry, so we need to split at boundaries of the table size, i.e.
+    // the next block size up. The distance from address A to the next
+    // boundary of block size B is logically B - A % B, but in unsigned
+    // two's complement where B is a power of two we get the equivalence
+    // B - A % B == (B - A) % B == (n * B - A) % B, and choose n = 0 :)
+    let mut blk_offset: usize = ( addr.wrapping_neg() % SZ_2M as u64) as usize;
+    let count: usize;
+
+    if blk_offset != 0 || size < SZ_2M as usize {
+        count = [blk_offset, size].into_iter().filter(|&x| x != 0).min().unwrap_or(0);
+        return (SZ_4K, count);
+    }
+
+    if addr.wrapping_neg() % SZ_1G as u64 == 0 {
+        blk_offset = SZ_1G as usize;
+    }
+    count = core::cmp::min(blk_offset, size) / SZ_2M as usize;
+    return (SZ_2M as usize, count)
+}
+
-- 
GitLab