diff --git a/drivers/gpu/drm/Kconfig b/drivers/gpu/drm/Kconfig index 8f3bfed137aadbb5bba08bb0fa04edb385c4ac9c..00a9b17abd980e9e50c1285ace3976a03c8f8004 100644 --- a/drivers/gpu/drm/Kconfig +++ b/drivers/gpu/drm/Kconfig @@ -511,6 +511,8 @@ source "drivers/gpu/drm/sprd/Kconfig" source "drivers/gpu/drm/imagination/Kconfig" +source "drivers/gpu/drm/tyr/Kconfig" + config DRM_HYPERV tristate "DRM Support for Hyper-V synthetic video device" depends on DRM && PCI && MMU && HYPERV diff --git a/drivers/gpu/drm/Makefile b/drivers/gpu/drm/Makefile index 7e52712fcff498327c5551d952c44fa6e51cf174..dccbbe80a2614ae7fe9e89110bd533a4a5ea3d31 100644 --- a/drivers/gpu/drm/Makefile +++ b/drivers/gpu/drm/Makefile @@ -229,3 +229,4 @@ obj-y += solomon/ obj-$(CONFIG_DRM_SPRD) += sprd/ obj-$(CONFIG_DRM_LOONGSON) += loongson/ obj-$(CONFIG_DRM_POWERVR) += imagination/ +obj-$(CONFIG_DRM_TYR) += tyr/ diff --git a/drivers/gpu/drm/tyr/Kconfig b/drivers/gpu/drm/tyr/Kconfig new file mode 100644 index 0000000000000000000000000000000000000000..c6b1826a3f1cd3b07eb71142023208a997edb180 --- /dev/null +++ b/drivers/gpu/drm/tyr/Kconfig @@ -0,0 +1,31 @@ +# SPDX-License-Identifier: GPL-2.0 or MIT + +config TYR_DRM_GEM_SHMEM_HELPER + bool + select DRM_GEM_SHMEM_HELPER + +config TYR_DRM_GPUVM + bool + select DRM_GPUVM + +config DRM_TYR + tristate "Tyr (Rust DRM support for ARM Mali CSF-based GPUs)" + depends on DRM=y + depends on RUST + depends on RUST_FW_LOADER_ABSTRACTIONS + depends on ARM || ARM64 || COMPILE_TEST + depends on !GENERIC_ATOMIC64 # for IOMMU_IO_PGTABLE_LPAE + depends on MMU + select TYR_DRM_GPUVM + select TYR_DRM_GEM_SHMEM_HELPER + select IOMMU_IO_PGTABLE_LPAE + depends on IOMMU_SUPPORT + help + Rust DRM driver for ARM Mali CSF-based GPUs. + + This driver is for Mali (or Immortalis) Valhall Gxxx GPUs. + + Note that the Mali-G68 and Mali-G78, while Valhall architecture, will + be supported with the panfrost driver as they are not CSF GPUs. + + if M is selected, the module will be called tyr. diff --git a/drivers/gpu/drm/tyr/Makefile b/drivers/gpu/drm/tyr/Makefile new file mode 100644 index 0000000000000000000000000000000000000000..ba545f65f2c0823b9a4a5a54e39b867e4f9bf812 --- /dev/null +++ b/drivers/gpu/drm/tyr/Makefile @@ -0,0 +1,3 @@ +# SPDX-License-Identifier: GPL-2.0 or MIT + +obj-$(CONFIG_DRM_TYR) += tyr.o diff --git a/drivers/gpu/drm/tyr/driver.rs b/drivers/gpu/drm/tyr/driver.rs new file mode 100644 index 0000000000000000000000000000000000000000..5bfd295929dd5476fd2050e16572ab93e454601d --- /dev/null +++ b/drivers/gpu/drm/tyr/driver.rs @@ -0,0 +1,244 @@ +use kernel::bindings; +use kernel::bits::bit_u32; +use kernel::c_str; +use kernel::clk::Clk; +use kernel::devres::Devres; +use kernel::drm; +use kernel::drm::drv; +use kernel::drm::ioctl; +use kernel::io; +use kernel::io::mem::IoMem; +use kernel::irq::request::Registration as IrqRegistration; +use kernel::new_mutex; +use kernel::of; +use kernel::platform; +use kernel::prelude::*; +use kernel::regulator::Regulator; +use kernel::sync::Arc; +use kernel::sync::Mutex; +use kernel::time; +use kernel::types::ARef; + +use crate::file::File; +use crate::fw; +use crate::fw::Firmware; +use crate::fw::JobIrqHandler; +use crate::gpu; +use crate::gpu::GpuInfo; +use crate::gpu::GpuIrqHandler; +use crate::mmu; +use crate::mmu::Mmu; +use crate::mmu::MmuIrqHandler; +use crate::regs::*; + +use core::pin::Pin; + +/// Convienence type alias for the DRM device type for this driver +pub(crate) type TyrDevice = drm::device::Device<TyrDriver>; + +#[pin_data(PinnedDrop)] +pub(crate) struct TyrDriver { + device: ARef<TyrDevice>, +} + +#[pin_data] +pub(crate) struct TyrData { + pub(crate) pdev: platform::Device, + + #[pin] + clks: Mutex<Clocks>, + + #[pin] + regulators: Mutex<Regulators>, + + // Some inforation on the GPU. This is mainly queried by userspace (mesa). + pub(crate) gpu_info: GpuInfo, + + /// The firmware running on the MCU. + pub(crate) fw: Firmware, + + /// True if the CPU/GPU are memory coherent. + pub(crate) coherent: bool, + + /// MMU management. + pub(crate) mmu: Pin<KBox<Mutex<Mmu>>>, + + #[pin] + gpu_irq: IrqRegistration<GpuIrqHandler>, + #[pin] + mmu_irq: IrqRegistration<MmuIrqHandler>, + #[pin] + job_irq: IrqRegistration<JobIrqHandler>, +} + +unsafe impl Send for TyrData {} +unsafe impl Sync for TyrData {} + +fn issue_soft_reset(iomem: &Devres<IoMem<0>>) -> Result<()> { + let irq_enable_cmd = 1 | bit_u32(8); + GPU_CMD.write(iomem, irq_enable_cmd)?; + + let op = || GPU_INT_RAWSTAT.read(iomem); + let cond = |raw_stat: &u32| -> bool { (*raw_stat >> 8) & 1 == 1 }; + let res = io::poll::read_poll_timeout( + op, + cond, + time::Delta::from_millis(100), + Some(time::Delta::from_micros(20000)), + ); + + if let Err(e) = res { + pr_err!("GPU reset failed with errno {}\n", e.to_errno()); + pr_err!("GPU_INT_RAWSTAT is {}\n", GPU_INT_RAWSTAT.read(iomem)?); + } + + Ok(()) +} + +kernel::of_device_table!( + OF_TABLE, + MODULE_OF_TABLE, + <TyrDriver as platform::Driver>::IdInfo, + [ + (of::DeviceId::new(c_str!("rockchip,rk3588-mali")), ()), + (of::DeviceId::new(c_str!("arm,mali-valhall-csf")), ()) + ] +); + +impl platform::Driver for TyrDriver { + type IdInfo = (); + const OF_ID_TABLE: Option<of::IdTable<Self::IdInfo>> = Some(&OF_TABLE); + + fn probe(pdev: &mut platform::Device, _info: Option<&Self::IdInfo>) -> Result<Pin<KBox<Self>>> { + dev_dbg!(pdev.as_ref(), "Probed Tyr\n"); + + let core_clk = Clk::new(pdev.as_ref(), Some(c_str!("core")))?; + let stacks_clk = Clk::new(pdev.as_ref(), Some(c_str!("stacks")))?; + let coregroup_clk = Clk::new(pdev.as_ref(), Some(c_str!("coregroup")))?; + + core_clk.prepare_enable()?; + stacks_clk.prepare_enable()?; + coregroup_clk.prepare_enable()?; + + let mali_regulator = Regulator::get(pdev.as_ref(), c_str!("mali"))?; + let sram_regulator = Regulator::get(pdev.as_ref(), c_str!("sram"))?; + + mali_regulator.enable()?; + sram_regulator.enable()?; + + let resource = pdev.resource(0).ok_or(EINVAL)?; + + let iomem = Arc::new(pdev.ioremap_resource(resource)?, GFP_KERNEL)?; + + issue_soft_reset(&iomem)?; + gpu::l2_power_on(&iomem)?; + + let gpu_info = GpuInfo::new(&iomem)?; + gpu_info.log(&pdev); + + pdev.as_ref().dma_set_max_seg_size(u32::MAX); + pdev.as_ref() + .dma_set_mask_and_coherent(u64::from(gpu_info.pa_bits()))?; + + let platform = pdev.clone(); + let tdev: ARef<TyrDevice> = drm::device::Device::new_no_data(pdev.as_ref())?; + + let mmu = KBox::pin_init(new_mutex!(Mmu::new()?), GFP_KERNEL)?; + let fw = Firmware::init(tdev.clone(), pdev.clone(), &gpu_info, mmu.as_ref(), &iomem)?; + + // Ideally we'd find a way around this useless clone too... + let t = tdev.clone(); + let data = Arc::pin_init( + try_pin_init!(TyrData { + pdev: platform.clone(), + clks <- new_mutex!(Clocks { + core: core_clk, + stacks: stacks_clk, + coregroup: coregroup_clk, + }), + regulators <- new_mutex!(Regulators { + mali: mali_regulator, + sram: sram_regulator, + }), + gpu_info, + fw, + coherent: false, // TODO. The GPU is not IO coherent on rk3588, which is what I am testing on. + mmu, + gpu_irq <- gpu::gpu_irq_init(t.clone(), platform.clone(), iomem.clone() )?, + mmu_irq <- mmu::mmu_irq_init(t.clone(), platform.clone(), iomem.clone())?, + job_irq <- fw::job_irq_init(t.clone(), platform.clone(), iomem.clone())?, + }), + GFP_KERNEL, + )?; + + // We must find a way around this. It's being discussed on Zulip already. + // + // Note that this is a problem, because if we fail at probe, then the + // drop code expects the data to be set, which leads to a crash. + unsafe { tdev.clone().init_data(data) }; + drm::drv::Registration::new_foreign_owned(tdev.clone(), 0)?; + + let driver = KBox::pin_init( + try_pin_init!(TyrDriver { + device: tdev.clone(), + }), + GFP_KERNEL, + )?; + + dev_err!(pdev.as_ref(), "Tyr initialized correctly.\n"); + Ok(driver) + } +} + +#[pinned_drop] +impl PinnedDrop for TyrDriver { + fn drop(self: Pin<&mut Self>) { + // XXX: we will not have the `data` field here if we failed the + // initialization, i.e.: if probe failed. + // + // We need to figure out with the community how to properly split the + // creation of a DRM device from the place where the data is set and + // from the place where it is registered to overcome this. + // + // The current solution, i.e.: `new_from_closure` is just a hack, and it + // shows its shortcomings here, for example. + // + // dev_dbg!(self.device.data().pdev.as_ref(), "Removed Tyr.\n"); + } +} + +const INFO: drm::drv::DriverInfo = drm::drv::DriverInfo { + major: 0, + minor: 0, + patchlevel: 0, + name: c_str!("tyr"), + desc: c_str!("ARM Mali CSF-based GPU driver"), + date: c_str!("20252501"), +}; + +#[vtable] +impl drm::drv::Driver for TyrDriver { + type Data = Arc<TyrData>; + type File = File; + type Object = crate::gem::Object; + + const INFO: drm::drv::DriverInfo = INFO; + const FEATURES: u32 = drv::FEAT_GEM | drv::FEAT_GEM_GPUVA; + + kernel::declare_drm_ioctls! { + (TYR_DEV_QUERY, drm_panthor_dev_query, ioctl::RENDER_ALLOW, File::dev_query), + } +} + +#[pin_data] +struct Clocks { + core: Clk, + stacks: Clk, + coregroup: Clk, +} + +#[pin_data] +struct Regulators { + mali: Regulator, + sram: Regulator, +} diff --git a/drivers/gpu/drm/tyr/file.rs b/drivers/gpu/drm/tyr/file.rs new file mode 100644 index 0000000000000000000000000000000000000000..1fd5fa6f0f8fd2d4b36b3a7d34d7260cb578885c --- /dev/null +++ b/drivers/gpu/drm/tyr/file.rs @@ -0,0 +1,58 @@ +// SPDX-License-Identifier: GPL-2.0 or MIT + +use crate::driver::{TyrDevice, TyrDriver}; + +use kernel::{ + alloc::flags::*, + bindings, + drm::{self, device::Device as DrmDevice}, + prelude::*, + uaccess::{UserSlice, UserSliceWriter}, + uapi, +}; + +pub(crate) struct File; + +/// Convenience type alias for our DRM `File` type +pub(crate) type DrmFile = drm::file::File<File>; + +impl drm::file::DriverFile for File { + type Driver = TyrDriver; + + fn open(dev: &DrmDevice<Self::Driver>) -> Result<Pin<KBox<Self>>> { + dev_dbg!(dev.as_ref(), "drm::device::Device::open\n"); + + Ok(KBox::new(Self, GFP_KERNEL)?.into()) + } +} + +impl File { + /// IOCTL: dev_query: Query device information. + pub(crate) fn dev_query( + tdev: &TyrDevice, + devquery: &mut uapi::drm_panthor_dev_query, + _file: &DrmFile, + ) -> Result<u32> { + if devquery.pointer == 0 { + match devquery.type_ { + uapi::drm_panthor_dev_query_type_DRM_PANTHOR_DEV_QUERY_GPU_INFO => { + devquery.size = core::mem::size_of_val(&tdev.data().gpu_info) as u32; + Ok(0) + } + _ => Err(EINVAL), + } + } else { + match devquery.type_ { + uapi::drm_panthor_dev_query_type_DRM_PANTHOR_DEV_QUERY_GPU_INFO => { + let mut writer = + UserSlice::new(devquery.pointer as usize, devquery.size as usize).writer(); + + writer.write(&tdev.data().gpu_info)?; + + Ok(0) + } + _ => Err(EINVAL), + } + } + } +} diff --git a/drivers/gpu/drm/tyr/fw.rs b/drivers/gpu/drm/tyr/fw.rs new file mode 100644 index 0000000000000000000000000000000000000000..8927b0356fa2351ec15fdeb2f41cb860bc597061 --- /dev/null +++ b/drivers/gpu/drm/tyr/fw.rs @@ -0,0 +1,653 @@ +// SPDX-License-Identifier: GPL-2.0 or MIT + +use core::ops::Range; +use kernel::bits::bit_u32; +use kernel::c_str; +use kernel::devres::Devres; +use kernel::firmware; +use kernel::io::mem::IoMem; +use kernel::irq; +use kernel::irq::request::Handler as IrqHandler; +use kernel::irq::request::IrqReturn; +use kernel::irq::request::Registration as IrqRegistration; +use kernel::platform; +use kernel::prelude::*; +use kernel::str::CStr; +use kernel::str::CString; +use kernel::sync::Arc; +use kernel::sync::Mutex; +use kernel::types::ARef; + +use crate::driver::TyrDevice; +use crate::gem; +use crate::gpu::GpuId; +use crate::gpu::GpuInfo; +use crate::mmu; +use crate::mmu::Mmu; +use crate::mmu::Vm; +use crate::regs::JOB_INT_CLEAR; +use crate::regs::JOB_INT_GLOBAL_IF; +use crate::regs::JOB_INT_MASK; +use crate::regs::JOB_INT_RAWSTAT; + +const FW_BINARY_MAGIC: u32 = 0xc3f13a6e; +const FW_BINARY_MAJOR_MAX: u8 = 0; + +struct Cursor<'a> { + data: &'a [u8], + pos: usize, +} + +impl<'a> Cursor<'a> { + fn read(&mut self, tdev: &TyrDevice, nbytes: usize) -> Result<&[u8]> { + let start = self.pos; + let end = start + nbytes; + + if end > self.data.len() { + dev_err!( + tdev.as_ref(), + "Invalid firmware file: read of size {} at position {} is out of bounds", + nbytes, + start, + ); + return Err(EINVAL); + } + + self.pos += nbytes; + Ok(&self.data[start..end]) + } + + fn read_u8(&mut self, tdev: &TyrDevice) -> Result<u8> { + let bytes = self.read(tdev, 1)?; + Ok(bytes[0]) + } + + fn read_u16(&mut self, tdev: &TyrDevice) -> Result<u16> { + let bytes = self.read(tdev, 2)?; + Ok(u16::from_le_bytes(bytes.try_into().unwrap())) + } + + fn read_u32(&mut self, tdev: &TyrDevice) -> Result<u32> { + let bytes = self.read(tdev, 4)?; + Ok(u32::from_le_bytes(bytes.try_into().unwrap())) + } +} + +struct BinaryEntryHeader(u32); + +impl BinaryEntryHeader { + fn entry_ty(&self) -> Result<BinaryEntryType, u8> { + let v = (self.0 & 0xff) as u8; + BinaryEntryType::try_from(v) + } + fn optional(&self) -> bool { + self.0 & bit_u32(31) != 0 + } + fn size(&self) -> u32 { + self.0 >> 8 & 0xff + } +} + +struct BinaryEntrySection { + hdr: BinaryEntryHeader, + inner: Option<Section>, +} + +const CSF_MCU_SHARED_REGION_START: u32 = 0x04000000; +const CSF_MCU_SHARED_REGION_SIZE: u32 = 0x04000000; + +pub(crate) struct Firmware { + sections: KVec<Section>, + vm: Arc<Mutex<Vm>>, +} + +impl Firmware { + pub(crate) fn init( + tdev: ARef<TyrDevice>, + pdev: platform::Device, + gpu_info: &GpuInfo, + mmu: Pin<&Mutex<Mmu>>, + iomem: &Devres<IoMem>, + ) -> Result<Self> { + let sections = Self::read_sections(&tdev, gpu_info)?; + + let mut mmu = mmu.lock(); + + // Create the FW VM. This will be used to communicate between the CPU + // and the MCU. + let vm = mmu.create_vm( + tdev.clone(), + pdev.clone(), + gpu_info, + true, + true, /* coherent: bool */ + )?; + + mmu.bind_vm(vm.clone(), 0, gpu_info, iomem)?; + + for section in §ions { + vm.lock().bind_object( + tdev.clone(), + §ion.mem.gem, + u64::from(section.va.start)..u64::from(section.va.end), + section.vm_map_flags, + )?; + + pr_info!( + "Bound firmware section to VA range {:#x}..{:#x}, with flags {} to BO {:p}\n", + section.va.start, + section.va.end, + section.vm_map_flags, + §ion.mem.gem + ); + } + + Ok(Self { sections, vm }) + } + + pub(crate) fn read_sections(tdev: &TyrDevice, gpu_info: &GpuInfo) -> Result<KVec<Section>> { + let gpu_id = GpuId::from(gpu_info.gpu_id); + + let fw_path = CString::try_from_fmt(fmt!( + "arm/mali/arch{}.{}/mali_csffw.bin", + gpu_id.arch_major, + gpu_id.arch_minor + ))?; + + let fw = firmware::Firmware::request(&fw_path, tdev.as_ref())?; + + let mut cursor = Cursor { + data: fw.data(), + pos: 0, + }; + + dev_err!( + tdev.as_ref(), + "Requested {} bytes of firmware successfully\n", + fw.data().len() + ); + let fw_bin_hdr = match BinaryHeader::new(tdev, &mut cursor) { + Ok(fw_bin_hdr) => fw_bin_hdr, + Err(e) => { + dev_err!(tdev.as_ref(), "Invalid firmware file: {}", e.to_errno()); + return Err(e); + } + }; + + if fw_bin_hdr.magic != FW_BINARY_MAGIC { + dev_err!(tdev.as_ref(), "Invalid firmware magic"); + return Err(EINVAL); + } + + if fw_bin_hdr.major > FW_BINARY_MAJOR_MAX { + dev_err!( + tdev.as_ref(), + "Unsupported firmware binary version: {}.{}", + fw_bin_hdr.major, + fw_bin_hdr.minor + ); + return Err(EINVAL); + } + + if fw_bin_hdr.size > cursor.data.len() as u32 { + dev_err!(tdev.as_ref(), "Firmware image is truncated"); + return Err(EINVAL); + } + + let mut sections = Vec::new(); + while (cursor.pos as u32) < fw_bin_hdr.size { + match Self::read_entry(&mut cursor, tdev, &fw)? { + section => { + cursor.pos += (section.hdr.size() - 4) as usize; + match section.inner { + Some(section) => sections.push(section, GFP_KERNEL)?, + None => continue, + } + } + } + } + + Ok(sections) + } + + fn read_entry( + cursor: &mut Cursor<'_>, + tdev: &TyrDevice, + fw: &firmware::Firmware, + ) -> Result<BinaryEntrySection> { + let section = BinaryEntrySection { + hdr: BinaryEntryHeader(cursor.read_u32(tdev)?), + inner: None, + }; + + let section_size = section.hdr.size() as usize - core::mem::size_of::<BinaryEntryHeader>(); + + let entry_ty = match section.hdr.entry_ty() { + Ok(entry_ty) => entry_ty, + Err(e) => { + if section.hdr.optional() { + dev_info!( + tdev.as_ref(), + "Skipping unknown optional firmware entry type: {}", + e + ); + return Ok(section); + } else { + dev_err!(tdev.as_ref(), "Invalid firmware entry type: {}", e); + return Err(EINVAL); + } + } + }; + + if cursor.pos % core::mem::size_of::<u32>() != 0 { + dev_err!( + tdev.as_ref(), + "Invalid firmware file: entry not aligned to 4 bytes at pos {}\n", + cursor.pos + ); + return Err(EINVAL); + } + + let mut entry_cursor = Cursor { + data: &cursor.data[cursor.pos..cursor.pos + section_size], + pos: 0, + }; + + match entry_ty { + BinaryEntryType::Iface => Ok(BinaryEntrySection { + hdr: section.hdr, + inner: Self::read_section(tdev, &mut entry_cursor, fw)?, + }), + + BinaryEntryType::BuildInfoMetadata => { + // TODO: Read build metadata + Ok(section) + } + + BinaryEntryType::Config + | BinaryEntryType::FutfTest + | BinaryEntryType::TraceBuffer + | BinaryEntryType::TimelineMetadata => Ok(section), + + _ => { + if !section.hdr.optional() { + dev_info!( + tdev.as_ref(), + "Unsupported non-optional entry type: {}", + entry_ty as u32 + ); + + Err(EINVAL) + } else { + dev_info!( + tdev.as_ref(), + "Skipping unsupported firmware entry type: {}", + entry_ty as u32 + ); + + Ok(section) + } + } + } + } + + fn read_section( + tdev: &TyrDevice, + cursor: &mut Cursor<'_>, + fw: &firmware::Firmware, + ) -> Result<Option<Section>> { + let hdr = BinarySectionEntryHeader::new(tdev, cursor)?; + + if !iface_flags::iface_supported_flags().contains(hdr.flags) { + dev_err!( + tdev.as_ref(), + "Firmware contains interface with unsupported flags (0x{:x})", + hdr.flags.0 + ); + return Err(EINVAL); + } + + if hdr.flags.contains(iface_flags::PROT) { + dev_warn!( + tdev.as_ref(), + "Firmware protected mode entry not supported, ignoring" + ); + return Ok(None); + } + + if hdr.va.start == CSF_MCU_SHARED_REGION_START && !hdr.flags.contains(iface_flags::SHARED) { + dev_err!( + tdev.as_ref(), + "Interface at 0x{:x} must be shared", + CSF_MCU_SHARED_REGION_START + ); + return Err(EINVAL); + } + + let name_len = cursor.data.len() - cursor.pos; + let name_bytes = cursor.read(tdev, name_len)?; + + let mut name = KVec::with_capacity(name_bytes.len() + 1, GFP_KERNEL)?; + name.extend_from_slice(name_bytes, GFP_KERNEL)?; + name.push(0, GFP_KERNEL)?; + + let name = CStr::from_bytes_with_nul(&name) + .ok() + .and_then(|name| CString::try_from(name).ok()); + + let fw = fw.data(); + let section_start = hdr.data.start as usize; + let section_end = hdr.data.end as usize; + + let mut data = KVec::new(); + data.extend_from_slice(&fw[section_start..section_end], GFP_KERNEL)?; + + let bo_len = (hdr.va.end - hdr.va.start) as usize; + let mut mem = gem::new_kernel_object(&tdev, bo_len)?; + + let vmap = mem.vmap()?; + if !hdr.flags.contains(iface_flags::SHARED) { + let vmap = vmap.as_mut_slice(); + + vmap[0..data.len()].copy_from_slice(&data); + + if hdr.flags.contains(iface_flags::ZERO) { + vmap[data.len()..].fill(0); + } + } + + let mut vm_map_flags = mmu::vm_map_flags::NONE; + if !hdr.flags.contains(iface_flags::WR) { + vm_map_flags |= mmu::vm_map_flags::READONLY; + } + if !hdr.flags.contains(iface_flags::EX) { + vm_map_flags |= mmu::vm_map_flags::NOEXEC; + } + if !hdr.flags.contains(iface_flags::CACHE_MODE_CACHED) { + vm_map_flags |= mmu::vm_map_flags::UNCACHED; + } + + dev_info!( + tdev.as_ref(), + "Copied firmware data to BO {:p} of size {} with flags {}\n", + &mem.gem, + bo_len, + vm_map_flags + ); + + Ok(Some(Section { + flags: hdr.flags, + name, + data, + mem, + va: hdr.va, + vm_map_flags, + })) + } + + fn read_build_info(cursor: &mut Cursor<'_>, tdev: &TyrDevice) -> Result<()> { + let meta_start = cursor.read_u32(tdev)? as usize; + let meta_end = cursor.read_u32(tdev)? as usize; + + let expected_hdr = b"git_sha: "; + let hdr = cursor.read(tdev, expected_hdr.len())?; + + if hdr != expected_hdr { + dev_warn!(tdev.as_ref(), "Firmware's git sha is missing\n"); + return Ok(()); + } + + let sz = meta_end - meta_start - expected_hdr.len(); + let sha = cursor.read(tdev, sz)?; + if sha[sha.len()] != 0 { + dev_warn!(tdev.as_ref(), "Firmware's git sha is not NULL terminated\n"); + return Ok(()); // Don't treat as fatal + } + + let sha = CStr::from_bytes_with_nul(sha).unwrap_or(c_str!("")); + dev_info!( + tdev.as_ref(), + "Firmware git sha: {}\n", + sha.to_str().unwrap() + ); + + Ok(()) + } +} + +struct BinaryHeader { + /// Magic value to check binary validity. + magic: u32, + /// Minor FW version. + minor: u8, + /// Major FW version. + major: u8, + /// Padding. Must be set to zero. + _padding1: u16, + /// FW Version hash + version_hash: u32, + /// Padding. Must be set to zero. + _padding2: u32, + /// FW binary size + size: u32, +} + +impl BinaryHeader { + fn new(tdev: &TyrDevice, cursor: &mut Cursor<'_>) -> Result<Self> { + let magic = cursor.read_u32(tdev)?; + if magic != FW_BINARY_MAGIC { + dev_err!(tdev.as_ref(), "Invalid firmware magic"); + return Err(EINVAL); + } + + let minor = cursor.read_u8(tdev)?; + let major = cursor.read_u8(tdev)?; + let padding1 = cursor.read_u16(tdev)?; + let version_hash = cursor.read_u32(tdev)?; + let padding2 = cursor.read_u32(tdev)?; + let size = cursor.read_u32(tdev)?; + + if padding1 != 0 || padding2 != 0 { + dev_err!( + tdev.as_ref(), + "Invalid firmware file: header padding is not zero" + ); + return Err(EINVAL); + } + + Ok(Self { + magic, + minor, + major, + _padding1: padding1, + version_hash, + _padding2: padding2, + size, + }) + } +} + +#[derive(Clone, Copy, Debug)] +enum BinaryEntryType { + /// Host <-> FW interface. + Iface = 0, + /// FW config. + Config = 1, + /// Unit tests. + FutfTest = 2, + /// Trace buffer interface. + TraceBuffer = 3, + /// Timeline metadata interface, + TimelineMetadata = 4, + /// Metadata about how the FW binary was built + BuildInfoMetadata = 6, +} + +impl BinaryEntryType {} + +impl TryFrom<u8> for BinaryEntryType { + type Error = u8; + + fn try_from(value: u8) -> Result<Self, Self::Error> { + match value { + 0 => Ok(BinaryEntryType::Iface), + 1 => Ok(BinaryEntryType::Config), + 2 => Ok(BinaryEntryType::FutfTest), + 3 => Ok(BinaryEntryType::TraceBuffer), + 4 => Ok(BinaryEntryType::TimelineMetadata), + 6 => Ok(BinaryEntryType::BuildInfoMetadata), + other => Err(other), + } + } +} + +#[derive(Debug)] +struct BinarySectionEntryHeader { + /// Section flags + flags: Flags, + /// MCU virtual range to map this binary section to. + va: Range<u32>, + /// References the data in the FW binary. + data: Range<u32>, +} + +impl BinarySectionEntryHeader { + fn new(tdev: &TyrDevice, cursor: &mut Cursor<'_>) -> Result<Self> { + let flags = cursor.read_u32(tdev)?; + let flags = Flags(flags); + + let va_start = cursor.read_u32(tdev)?; + let va_end = cursor.read_u32(tdev)?; + + let va = va_start..va_end; + + if va.len() == 0 { + dev_err!( + tdev.as_ref(), + "Invalid firmware file: empty VA range at pos {}\n", + cursor.pos + ); + return Err(EINVAL); + } + + let data_start = cursor.read_u32(tdev)?; + let data_end = cursor.read_u32(tdev)?; + let data = data_start..data_end; + + Ok(Self { flags, va, data }) + } +} + +struct BuildInfoHeader(Range<u32>); + +pub(crate) struct Section { + flags: Flags, + // TODO: We need to copy this into a BO so that the GPU (and the MCU) can + // access it. + name: Option<CString>, + data: KVec<u8>, + mem: gem::ObjectRef, + va: Range<u32>, + vm_map_flags: mmu::VmMapFlags, +} + +/// Firmware flags as read from the firmware binary. +/// +/// They can be combined with the operators `|`, `&`, and `!`. +/// +/// Values can be used from the [`flags`] module. +#[derive(Clone, Copy, PartialEq, Debug)] +pub(crate) struct Flags(u32); + +impl Flags { + /// Check whether `flags` is contained in `self`. + pub(crate) fn contains(self, flags: Flags) -> bool { + (self & flags) == flags + } +} + +impl core::ops::BitOr for Flags { + type Output = Self; + fn bitor(self, rhs: Self) -> Self::Output { + Self(self.0 | rhs.0) + } +} + +impl core::ops::BitAnd for Flags { + type Output = Self; + fn bitand(self, rhs: Self) -> Self::Output { + Self(self.0 & rhs.0) + } +} + +impl core::ops::Not for Flags { + type Output = Self; + fn not(self) -> Self::Output { + Self(!self.0) + } +} + +mod iface_flags { + use super::Flags; + use kernel::bits::bit_u32; + use kernel::bits::genmask_u32; + + pub(crate) fn iface_supported_flags() -> Flags { + RD | WR | EX | CACHE_MODE_MASK | PROT | SHARED | ZERO + } + + pub(crate) const RD: Flags = Flags(bit_u32(0)); + pub(crate) const WR: Flags = Flags(bit_u32(1)); + pub(crate) const EX: Flags = Flags(bit_u32(2)); + pub(crate) const CACHE_MODE_NONE: Flags = Flags(0 << 3); + pub(crate) const CACHE_MODE_CACHED: Flags = Flags(1 << 3); + pub(crate) const CACHE_MODE_UNCACHED_COHERENT: Flags = Flags(2 << 3); + pub(crate) const CACHE_MODE_CACHED_COHERENT: Flags = Flags(3 << 3); + pub(crate) const CACHE_MODE_MASK: Flags = Flags(genmask_u32(4, 3)); + pub(crate) const PROT: Flags = Flags(bit_u32(5)); + pub(crate) const SHARED: Flags = Flags(bit_u32(30)); + pub(crate) const ZERO: Flags = Flags(bit_u32(31)); +} + +pub(crate) struct JobIrqHandler { + tdev: ARef<TyrDevice>, + iomem: Arc<Devres<IoMem>>, +} + +impl IrqHandler for JobIrqHandler { + fn handle_irq(&self) -> IrqReturn { + let rawstat = JOB_INT_RAWSTAT.read(&self.iomem).unwrap_or_default(); + + dev_info!(self.tdev.as_ref(), "Acknoledging job IRQ\n"); + + let _ = JOB_INT_CLEAR.write(&self.iomem, rawstat); + + if rawstat & JOB_INT_GLOBAL_IF != 0 { + dev_info!(self.tdev.as_ref(), "GPU is ready to accept requests\n"); + } + + IrqReturn::Handled + } +} + +pub(crate) fn job_irq_init( + tdev: ARef<TyrDevice>, + pdev: platform::Device, + iomem: Arc<Devres<IoMem>>, +) -> Result<impl PinInit<IrqRegistration<JobIrqHandler>, Error>> { + let job_irq = pdev.irq_by_name(c_str!("job"))?; + + let irq_handler = JobIrqHandler { + tdev, + iomem: iomem.clone(), + }; + + JOB_INT_MASK.write(&iomem, u32::MAX)?; + + Ok(IrqRegistration::register( + job_irq, + irq::request::flags::SHARED, + c_str!("tyr-job"), + irq_handler, + )) +} diff --git a/drivers/gpu/drm/tyr/gem.rs b/drivers/gpu/drm/tyr/gem.rs new file mode 100644 index 0000000000000000000000000000000000000000..5ad86d395a152067ef71d0106d1d239e349edaf2 --- /dev/null +++ b/drivers/gpu/drm/tyr/gem.rs @@ -0,0 +1,96 @@ +// SPDX-License-Identifier: GPL-2.0 or MIT + +use kernel::drm::gem::shmem; +use kernel::drm::gem::BaseObject; +use kernel::drm::gem::{self}; +use kernel::drm::mm; +use kernel::prelude::*; + +use crate::driver::TyrDevice; +use crate::driver::TyrDriver; +use crate::file::DrmFile; + +/// GEM Object inner driver data +#[pin_data] +pub(crate) struct DriverObject {} + +/// Type alias for the GEM object tyoe for this driver. +pub(crate) type Object = gem::shmem::Object<DriverObject>; + +/// Type alias for the SGTable type for this driver. +pub(crate) type SGTable = shmem::SGTable<DriverObject>; + +impl gem::BaseDriverObject<Object> for DriverObject { + fn new(dev: &TyrDevice, _size: usize) -> impl PinInit<Self, Error> { + dev_dbg!(dev.as_ref(), "DriverObject::new\n"); + DriverObject {} + } +} + +impl gem::shmem::DriverObject for DriverObject { + type Driver = TyrDriver; +} + +/// A shared reference to a GEM object for this driver. +pub(crate) struct ObjectRef { + /// The underlying GEM object reference + pub(crate) gem: gem::ObjectRef<shmem::Object<DriverObject>>, + /// The kernel-side VMap of this object, if needed + vmap: Option<shmem::VMap<DriverObject>>, + /// The VA node from the allocator, representing where we are bound, if at + /// all. + /// + /// Note: this is still TODO. Will be used to undo the mappings when the bo + /// is destroyed. + node: Option<mm::Node<(), ()>>, +} + +impl ObjectRef { + /// Create a new wrapper for a raw GEM object reference. + pub(crate) fn new(gem: gem::ObjectRef<shmem::Object<DriverObject>>) -> ObjectRef { + ObjectRef { + gem, + vmap: None, + node: None, + } + } + + /// Return the `VMap` for this object, creating it if necessary. + pub(crate) fn vmap(&mut self) -> Result<&mut shmem::VMap<DriverObject>> { + if self.vmap.is_none() { + self.vmap = Some(self.gem.vmap()?); + } + Ok(self.vmap.as_mut().unwrap()) + } + + /// Returns the size of an object in bytes + pub(crate) fn size(&self) -> usize { + self.gem.size() + } +} + +/// Create a new DRM GEM object. +pub(crate) fn object_new(dev: &TyrDevice, size: usize) -> Result<ObjectRef> { + let aligned_size = size.next_multiple_of(1 << 12); + + if size == 0 || size > aligned_size { + return Err(EINVAL); + } + + let gem = Object::new(dev, aligned_size)?; + + Ok(ObjectRef::new(gem.into_ref())) +} + +/// Look up a GEM object handle for a `File` and return an `ObjectRef` for it. +pub(crate) fn lookup_handle(file: &DrmFile, handle: u32) -> Result<ObjectRef> { + Ok(ObjectRef::new(shmem::Object::lookup_handle(file, handle)?)) +} + +/// Create a new kernel-owned GEM object. +pub(crate) fn new_kernel_object(tdev: &TyrDevice, size: usize) -> Result<ObjectRef> { + let aligned_size = size.next_multiple_of(1 << 12); + let gem = shmem::Object::<DriverObject>::new(tdev, aligned_size)?; + + Ok(ObjectRef::new(gem.into_ref())) +} diff --git a/drivers/gpu/drm/tyr/gpu.rs b/drivers/gpu/drm/tyr/gpu.rs new file mode 100644 index 0000000000000000000000000000000000000000..27aef2a06e4a811b3f5f7f718dde6243a7539f64 --- /dev/null +++ b/drivers/gpu/drm/tyr/gpu.rs @@ -0,0 +1,262 @@ +// SPDX-License-Identifier: GPL-2.0 or MIT + +use kernel::bits; +use kernel::bits::genmask_u32; +use kernel::c_str; +use kernel::devres::Devres; +use kernel::io; +use kernel::io::mem::IoMem; +use kernel::irq; +use kernel::irq::request::Handler as IrqHandler; +use kernel::irq::request::IrqReturn; +use kernel::irq::request::Registration as IrqRegistration; +use kernel::platform; +use kernel::prelude::*; +use kernel::sync::Arc; +use kernel::time; +use kernel::transmute::AsBytes; +use kernel::types::ARef; + +use crate::driver::TyrDevice; +use crate::regs::*; + +#[repr(C)] +// This can be queried by userspace to get information about the GPU. +pub(crate) struct GpuInfo { + pub(crate) gpu_id: u32, + pub(crate) csf_id: u32, + pub(crate) gpu_rev: u32, + pub(crate) core_features: u32, + pub(crate) l2_features: u32, + pub(crate) tiler_features: u32, + pub(crate) mem_features: u32, + pub(crate) mmu_features: u32, + pub(crate) thread_features: u32, + pub(crate) max_threads: u32, + pub(crate) thread_max_workgroup_size: u32, + pub(crate) thread_max_barrier_size: u32, + pub(crate) coherency_features: u32, + pub(crate) texture_features: [u32; 4], + pub(crate) as_present: u32, + pub(crate) shader_present: u64, + pub(crate) tiler_present: u64, + pub(crate) l2_present: u64, +} + +impl GpuInfo { + pub(crate) fn new(iomem: &Devres<IoMem>) -> Result<Self> { + let gpu_id = GPU_ID.read(iomem)?; + let csf_id = GPU_CSF_ID.read(iomem)?; + let gpu_rev = GPU_REVID.read(iomem)?; + let core_features = GPU_CORE_FEATURES.read(iomem)?; + let l2_features = GPU_L2_FEATURES.read(iomem)?; + let tiler_features = GPU_TILER_FEATURES.read(iomem)?; + let mem_features = GPU_MEM_FEATURES.read(iomem)?; + let mmu_features = GPU_MMU_FEATURES.read(iomem)?; + let thread_features = GPU_THREAD_FEATURES.read(iomem)?; + let max_threads = GPU_THREAD_MAX_THREADS.read(iomem)?; + let thread_max_workgroup_size = GPU_THREAD_MAX_WORKGROUP_SIZE.read(iomem)?; + let thread_max_barrier_size = GPU_THREAD_MAX_BARRIER_SIZE.read(iomem)?; + let coherency_features = GPU_COHERENCY_FEATURES.read(iomem)?; + + let texture_features = GPU_TEXTURE_FEATURES0.read(iomem)?; + + let as_present = GPU_AS_PRESENT.read(iomem)?; + + let shader_present = GPU_SHADER_PRESENT_LO.read(iomem)? as u64; + let shader_present = shader_present | (GPU_SHADER_PRESENT_HI.read(iomem)? as u64) << 32; + + let tiler_present = GPU_TILER_PRESENT_LO.read(iomem)? as u64; + let tiler_present = tiler_present | (GPU_TILER_PRESENT_HI.read(iomem)? as u64) << 32; + + let l2_present = GPU_L2_PRESENT_LO.read(iomem)? as u64; + let l2_present = l2_present | (GPU_L2_PRESENT_HI.read(iomem)? as u64) << 32; + + Ok(Self { + gpu_id, + csf_id, + gpu_rev, + core_features, + l2_features, + tiler_features, + mem_features, + mmu_features, + thread_features, + max_threads, + thread_max_workgroup_size, + thread_max_barrier_size, + coherency_features, + texture_features: [texture_features, 0, 0, 0], + as_present, + shader_present, + tiler_present, + l2_present, + }) + } + + pub(crate) fn log(&self, pdev: &platform::Device) { + let major = (self.gpu_id >> 16) & 0xff; + let minor = (self.gpu_id >> 8) & 0xff; + let status = self.gpu_id & 0xff; + + let model_name = if let Some(model) = GPU_MODELS + .iter() + .find(|&f| f.major == major && f.minor == minor) + { + model.name + } else { + "unknown" + }; + + dev_info!( + pdev.as_ref(), + "mali-{} id 0x{:x} major 0x{:x} minor 0x{:x} status 0x{:x}", + model_name, + self.gpu_id >> 16, + major, + minor, + status + ); + + dev_info!( + pdev.as_ref(), + "Features: L2:{:#x} Tiler:{:#x} Mem:{:#x} MMU:{:#x} AS:{:#x}", + self.l2_features, + self.tiler_features, + self.mem_features, + self.mmu_features, + self.as_present + ); + + dev_info!( + pdev.as_ref(), + "shader_present=0x{:016x} l2_present=0x{:016x} tiler_present=0x{:016x}", + self.shader_present, + self.l2_present, + self.tiler_present + ); + } + + pub(crate) fn va_bits(&self) -> u32 { + self.mmu_features & bits::genmask_u32(7, 0) + } + + pub(crate) fn pa_bits(&self) -> u32 { + (self.mmu_features >> 8) & bits::genmask_u32(7, 0) + } +} + +// This type is the same type exposed by Panthor's uAPI. As it's declared as +// #repr(C), we can be sure that the layout is the same. Therefore, it is safe +// to expose this to userspace. +unsafe impl AsBytes for GpuInfo {} + +struct GpuModels { + name: &'static str, + major: u32, + minor: u32, +} + +const GPU_MODELS: [GpuModels; 1] = [GpuModels { + name: "g610", + major: 10, + minor: 7, +}]; + +pub(crate) struct GpuId { + pub(crate) arch_major: u32, + pub(crate) arch_minor: u32, + pub(crate) arch_rev: u32, + pub(crate) prod_major: u32, + pub(crate) ver_major: u32, + pub(crate) ver_minor: u32, + pub(crate) ver_status: u32, +} + +impl From<u32> for GpuId { + fn from(value: u32) -> Self { + GpuId { + arch_major: (value & genmask_u32(31, 28)) >> 28, + arch_minor: (value & genmask_u32(27, 24)) >> 24, + arch_rev: (value & genmask_u32(23, 20)) >> 20, + prod_major: (value & genmask_u32(19, 16)) >> 16, + ver_major: (value & genmask_u32(15, 12)) >> 12, + ver_minor: (value & genmask_u32(11, 4)) >> 4, + ver_status: value & genmask_u32(3, 0), + } + } +} + +/// Powers on the l2 block. +pub(crate) fn l2_power_on(iomem: &Devres<IoMem>) -> Result<()> { + let op = || L2_PWRTRANS_LO.read(&iomem); + + let cond = |pwr_trans: &u32| *pwr_trans == 0; + + let _ = io::poll::read_poll_timeout( + op, + cond, + time::Delta::from_millis(100), + Some(time::Delta::from_millis(200)), + )?; + + L2_PWRON_LO.write(&iomem, 1)?; + + let op = || L2_READY_LO.read(&iomem); + let cond = |l2_ready: &u32| *l2_ready == 1; + + let _ = io::poll::read_poll_timeout( + op, + cond, + time::Delta::from_millis(100), + Some(time::Delta::from_millis(200)), + )?; + + Ok(()) +} + +pub(crate) struct GpuIrqHandler { + tdev: ARef<TyrDevice>, + iomem: Arc<Devres<IoMem>>, +} + +impl IrqHandler for GpuIrqHandler { + fn handle_irq(&self) -> IrqReturn { + // let data = self.0.data(); + + let int_stat = GPU_INT_RAWSTAT.read(&self.iomem).unwrap_or_default(); + + pr_info!("Acknowledging GPU_INT_RAWSTAT: {:#x}\n", int_stat); + + let _ = GPU_INT_CLEAR.write(&self.iomem, int_stat); + + let _ = MCU_CONTROL.write(&self.iomem, MCU_CONTROL_AUTO); + + pr_info!("Enabled MCU control\n"); + + IrqReturn::Handled + } +} + +pub(crate) fn gpu_irq_init( + tdev: ARef<TyrDevice>, + pdevice: platform::Device, + iomem: Arc<Devres<IoMem>>, +) -> Result<impl PinInit<IrqRegistration<GpuIrqHandler>, Error>> { + let gpu_irq = pdevice.irq_by_name(c_str!("gpu"))?; + let irq_handler = GpuIrqHandler { + tdev, + iomem: iomem.clone(), + }; + + // Enable all interrupts. + let irq_enable_mask = core::u32::MAX; + GPU_INT_MASK.write(&iomem, irq_enable_mask)?; + + Ok(IrqRegistration::register( + gpu_irq, + irq::request::flags::SHARED, + c_str!("tyr-gpu"), + irq_handler, + )) +} diff --git a/drivers/gpu/drm/tyr/mmu.rs b/drivers/gpu/drm/tyr/mmu.rs new file mode 100644 index 0000000000000000000000000000000000000000..c62d4b2a863705c9c2f92f2d364b2ab2f67d7691 --- /dev/null +++ b/drivers/gpu/drm/tyr/mmu.rs @@ -0,0 +1,839 @@ +// SPDX-License-Identifier: GPL-2.0 or MIT + +use core::ops::Range; + +use kernel::bindings::SZ_1G; +use kernel::bindings::SZ_2M; +use kernel::bindings::SZ_4K; +use kernel::bits::genmask_u64; +use kernel::c_str; +use kernel::devres::Devres; +use kernel::drm::gem::shmem; +use kernel::drm::gpuvm; +use kernel::drm::gpuvm::DriverGpuVa; +use kernel::drm::mm; +use kernel::io; +use kernel::io::mem::IoMem; +use kernel::io_pgtable; +use kernel::io_pgtable::IoPageTable; +use kernel::io_pgtable::ARM64LPAES1; +use kernel::irq; +use kernel::irq::request::Handler as IrqHandler; +use kernel::irq::request::IrqReturn; +use kernel::irq::request::Registration as IrqRegistration; +use kernel::new_mutex; +use kernel::platform; +use kernel::prelude::*; +use kernel::str::CStr; +use kernel::sync::Arc; +use kernel::sync::Mutex; +use kernel::time::Delta; +use kernel::types::ARef; +use kernel::types::ForeignOwnable; + +use crate::driver::TyrDevice; +use crate::driver::{self}; +use crate::gem::DriverObject; +use crate::gem::{self}; +use crate::gpu::GpuInfo; +use crate::regs::*; + +pub(crate) const EXCEPTION_MAP: &[(u32, &'static CStr)] = &[ + (0x00, c_str!("OK")), + (0x04, c_str!("TERMINATED")), + (0x05, c_str!("KABOOM")), + (0x06, c_str!("EUREKA")), + (0x08, c_str!("ACTIVE")), + (0x0f, c_str!("CS_RES_TERM")), + (0x3f, c_str!("MAX_NON_FAULT")), + (0x40, c_str!("CS_CONFIG_FAULT")), + (0x41, c_str!("CS_UNRECOVERABLE")), + (0x44, c_str!("CS_ENDPOINT_FAULT")), + (0x48, c_str!("CS_BUS_FAULT")), + (0x49, c_str!("CS_INSTR_INVALID")), + (0x4a, c_str!("CS_CALL_STACK_OVERFLOW")), + (0x4b, c_str!("CS_INHERIT_FAULT")), + (0x50, c_str!("INSTR_INVALID_PC")), + (0x51, c_str!("INSTR_INVALID_ENC")), + (0x55, c_str!("INSTR_BARRIER_FAULT")), + (0x58, c_str!("DATA_INVALID_FAULT")), + (0x59, c_str!("TILE_RANGE_FAULT")), + (0x5a, c_str!("ADDR_RANGE_FAULT")), + (0x5b, c_str!("IMPRECISE_FAULT")), + (0x60, c_str!("OOM")), + (0x68, c_str!("CSF_FW_INTERNAL_ERROR")), + (0x69, c_str!("CSF_RES_EVICTION_TIMEOUT")), + (0x80, c_str!("GPU_BUS_FAULT")), + (0x88, c_str!("GPU_SHAREABILITY_FAULT")), + (0x89, c_str!("SYS_SHAREABILITY_FAULT")), + (0x8a, c_str!("GPU_CACHEABILITY_FAULT")), + (0xc0, c_str!("TRANSLATION_FAULT_0")), + (0xc1, c_str!("TRANSLATION_FAULT_1")), + (0xc2, c_str!("TRANSLATION_FAULT_2")), + (0xc3, c_str!("TRANSLATION_FAULT_3")), + (0xc4, c_str!("TRANSLATION_FAULT_4")), + (0xc8, c_str!("PERM_FAULT_0")), + (0xc9, c_str!("PERM_FAULT_1")), + (0xca, c_str!("PERM_FAULT_2")), + (0xcb, c_str!("PERM_FAULT_3")), + (0xd9, c_str!("ACCESS_FLAG_1")), + (0xda, c_str!("ACCESS_FLAG_2")), + (0xdb, c_str!("ACCESS_FLAG_3")), + (0xe0, c_str!("ADDR_SIZE_FAULT_IN")), + (0xe4, c_str!("ADDR_SIZE_FAULT_OUT0")), + (0xe5, c_str!("ADDR_SIZE_FAULT_OUT1")), + (0xe6, c_str!("ADDR_SIZE_FAULT_OUT2")), + (0xe7, c_str!("ADDR_SIZE_FAULT_OUT3")), + (0xe8, c_str!("MEM_ATTR_FAULT_0")), + (0xe9, c_str!("MEM_ATTR_FAULT_1")), + (0xea, c_str!("MEM_ATTR_FAULT_2")), + (0xeb, c_str!("MEM_ATTR_FAULT_3")), +]; + +pub(crate) fn get_exception_name(code: u32) -> &'static CStr { + for &(exception_code, name) in EXCEPTION_MAP { + if exception_code == code { + return name; + } + } + c_str!("UNKNOWN") +} + +pub(crate) fn access_type_name(fault_status: u32) -> &'static str { + match fault_status & AS_FAULTSTATUS_ACCESS_TYPE_MASK { + AS_FAULTSTATUS_ACCESS_TYPE_ATOMIC => "ATOMIC", + AS_FAULTSTATUS_ACCESS_TYPE_READ => "READ", + AS_FAULTSTATUS_ACCESS_TYPE_WRITE => "WRITE", + AS_FAULTSTATUS_ACCESS_TYPE_EX => "EXECUTE", + _ => "UNKNOWN", + } +} + +const SZ_4G: u64 = 4 * SZ_1G as u64; + +fn flags_to_prot(flags: VmMapFlags) -> u32 { + let mut prot = 0; + + if flags.contains(vm_map_flags::READONLY) { + prot |= io_pgtable::prot::READ; + } else { + prot |= io_pgtable::prot::READ | io_pgtable::prot::WRITE; + } + + if flags.contains(vm_map_flags::NOEXEC) { + prot |= io_pgtable::prot::NOEXEC; + } + + if !flags.contains(vm_map_flags::UNCACHED) { + prot |= io_pgtable::prot::CACHE; + } + + prot +} + +fn as_memattr_aarch64_inner_alloc_expl(inner: bool, outer: bool) -> u8 { + ((inner as u8) << 1) | (outer as u8) +} + +fn mair_to_memattr(mair: u64) -> u64 { + let mut memattr: u64 = 0; + + for i in 0..8 { + let in_attr = (mair >> (8 * i)) as u8; + let outer = in_attr >> 4; + let inner = in_attr & 0xf; + let out_attr; + + // For caching to be enabled, inner and outer caching policy + // have to be both write-back, if one of them is write-through + // or non-cacheable, we just choose non-cacheable. Device + // memory is also translated to non-cacheable. + if (outer & 3 == 0) || (outer & 4 == 0) || (inner & 4 == 0) { + out_attr = AS_MEMATTR_AARCH64_INNER_OUTER_NC + | AS_MEMATTR_AARCH64_SH_MIDGARD_INNER + | as_memattr_aarch64_inner_alloc_expl(false, false) as u32; + } else { + // Use SH_CPU_INNER mode so SH_IS, which is used when + // IOMMU_CACHE is set, actually maps to the standard + // definition of inner-shareable and not Mali's + // internal-shareable mode. + out_attr = AS_MEMATTR_AARCH64_INNER_OUTER_WB + | AS_MEMATTR_AARCH64_SH_CPU_INNER + | as_memattr_aarch64_inner_alloc_expl(inner & 1 != 0, inner & 2 != 0) as u32; + } + + memattr |= (out_attr as u64) << (8 * i); + } + + memattr +} + +/// Alocates HW AS slots, which represent a physical slot where a VM can be +/// placed in. +/// +/// Panthor keeps a LRU list for the purposes of evicting VMs when a slot is +/// requested but no one is free. We defer this to a future implementation. +/// +/// Note that this is still TODO: this type doesn't yet track any VMs. +struct SlotAllocator { + /// How many slots are free. + free_mask: u32, +} + +impl SlotAllocator { + fn alloc_slot(allocator: Arc<Mutex<Self>>, vm: &mut Vm) { + let mut alloc = allocator.lock(); + let slot = alloc.free_mask.trailing_zeros(); + + if slot < 32 { + alloc.free_mask |= 1 << slot; + let slot_allocation = SlotAllocation { + allocator: allocator.clone(), + slot: slot as u8, + }; + vm.binding = Some(slot_allocation); + } + } + + fn free_slot(vm: &mut Vm) { + vm.binding = None; + } +} + +/// Represents a slot allocation. +/// +/// This type returns the slot to the allocator once it is dropped. +/// +/// +/// Note that this is still TODO: this type doesn't yet track any VMs. +struct SlotAllocation { + /// The allocator that allocated this slot. + allocator: Arc<Mutex<SlotAllocator>>, + /// The actual slot value. + slot: u8, +} + +impl Drop for SlotAllocation { + fn drop(&mut self) { + self.allocator.lock().free_mask &= !(1 << self.slot); + } +} + +pub(crate) struct Mmu { + /// List containing all VMs. + vms: KVec<Arc<Mutex<Vm>>>, + slot_allocator: Arc<Mutex<SlotAllocator>>, +} + +impl Mmu { + pub(crate) fn new() -> Result<Self> { + Ok(Self { + vms: KVec::new(), + slot_allocator: Arc::pin_init( + new_mutex!(SlotAllocator { + free_mask: u32::MAX, + }), + GFP_KERNEL, + )?, + }) + } + + pub(crate) fn create_vm( + &mut self, + tdev: ARef<TyrDevice>, + pdev: platform::Device, + gpu_info: &GpuInfo, + for_mcu: bool, + is_kernel: bool, + /* coherent: bool, */ + ) -> Result<Arc<Mutex<Vm>>> { + let vm = Vm::create( + tdev.clone(), + pdev, + for_mcu, + is_kernel, + gpu_info, + /* coherent */ + )?; + let vm = Arc::pin_init(new_mutex!(vm), GFP_KERNEL)?; + self.vms.push(vm.clone(), GFP_KERNEL); + Ok(vm) + } + + pub(crate) fn bind_vm( + &self, + vm: Arc<Mutex<Vm>>, + as_nr: usize, + gpu_info: &GpuInfo, + iomem: &Devres<IoMem>, + ) -> Result { + let vm = vm.lock(); + let inner = vm.inner.exec_lock(None)?; + let va_bits = gpu_info.va_bits(); + + let transtab = inner.page_table.cfg().ttbr; + let transcfg = AS_TRANSCFG_PTW_MEMATTR_WB + | AS_TRANSCFG_PTW_RA + | AS_TRANSCFG_ADRMODE_AARCH64_4K + | as_transcfg_ina_bits((55 - va_bits).into()); + + let memattr = vm.memattr; + self.enable_as(iomem, as_nr, transtab, transcfg.into(), memattr) + } + + fn lock_region(&self, iomem: Devres<IoMem>, as_nr: usize, region: Range<u64>) -> Result { + if region.end - region.start == 0 { + return Err(EINVAL); + } + + // The locked region is a naturally aligned power of 2 block encoded as + // log2 minus(1). + // + // Calculate the desired start/end and look for the highest bit which + // differs. The smallest naturally aligned block must include this bit + // change, the desired region starts with this bit (and subsequent bits) + // zeroed and ends with the bit (and subsequent bits) set to one. + let region_width = core::cmp::max( + (region.start ^ (region.end - 1)).leading_zeros() as u8, + 64 - AS_LOCK_REGION_MIN_SIZE.trailing_zeros() as u8, + ) - 1; + + // Mask off the low bits of region.start, which would be ignored by the + // hardware anyways. + let region_start = region.start & genmask_u64(63, region_width as u32); + + let region = (region_width as u64) | region.start; + + let region_lo = (region & 0xffffffff) as u32; + let region_hi = (region >> 32) as u32; + + // Lock the region that needs to be updated. + as_lockaddr_lo(as_nr)?.write(&iomem, region_lo)?; + as_lockaddr_hi(as_nr)?.write(&iomem, region_hi)?; + as_command(as_nr)?.write(&iomem, AS_COMMAND_LOCK)?; + + Ok(()) + } + + fn enable_as( + &self, + iomem: &Devres<IoMem>, + as_nr: usize, + transtab: u64, + transcfg: u64, + memattr: u64, + ) -> Result { + let active = as_status(as_nr)?.read(iomem)? & AS_STATUS_ACTIVE != 0; + if active { + return Err(EBUSY); + } + + // TODO: AS_COMMAND_FLUSH_MEM + let transtab_lo = (transtab & 0xffffffff) as u32; + let transtab_hi = (transtab >> 32) as u32; + + let transcfg_lo = (transcfg & 0xffffffff) as u32; + let transcfg_hi = (transcfg >> 32) as u32; + + let memattr_lo = (memattr & 0xffffffff) as u32; + let memattr_hi = (memattr >> 32) as u32; + + as_transtab_lo(as_nr)?.write(iomem, transtab_lo)?; + as_transtab_hi(as_nr)?.write(iomem, transtab_hi)?; + + as_transcfg_lo(as_nr)?.write(iomem, transcfg_lo)?; + as_transcfg_hi(as_nr)?.write(iomem, transcfg_hi)?; + + as_memattr_lo(as_nr)?.write(iomem, memattr_lo)?; + as_memattr_hi(as_nr)?.write(iomem, memattr_hi)?; + + as_command(as_nr)?.write(iomem, AS_COMMAND_UPDATE)?; + + let op = || as_status(as_nr)?.read(iomem); + let cond = |status: &u32| -> bool { *status & AS_STATUS_ACTIVE == 0 }; + let _ = io::poll::read_poll_timeout( + op, + cond, + Delta::from_millis(0), + Some(Delta::from_micros(200)), + )?; + + Ok(()) + } +} + +/* dummy TLB ops, the real TLB flush happens in panthor_vm_flush_range() */ +impl io_pgtable::FlushOps for Mmu { + type Data = (); + + fn tlb_flush_all(_data: <Self::Data as ForeignOwnable>::Borrowed<'_>) {} + fn tlb_flush_walk( + _data: <Self::Data as ForeignOwnable>::Borrowed<'_>, + _iova: usize, + _size: usize, + _granule: usize, + ) { + } + fn tlb_add_page( + _data: <Self::Data as ForeignOwnable>::Borrowed<'_>, + _iova: usize, + _granule: usize, + ) { + } +} + +/// Data associated with a VM <=> BO pairing +#[pin_data] +pub(crate) struct VmBo { + #[pin] + sgt: Mutex<Option<gem::SGTable>>, +} + +impl gpuvm::DriverGpuVmBo for VmBo { + fn new() -> impl PinInit<Self> { + pin_init!(VmBo { + sgt <- new_mutex!(None, "VmBinding"), + }) + } +} + +pub(crate) struct StepContext { + // XXX: This Option type is here merely to get around some deadlock issues. + // See the comment in `Mmu::bind_object`. + // + // It can be safely unwrapped in the GPUVM callbacks. + vm_bo: Option<ARef<gpuvm::GpuVmBo<VmInner>>>, + vm_map_flags: VmMapFlags, + tdev: ARef<TyrDevice>, +} +pub(crate) struct GpuVa {/* TODO */} +unsafe impl init::Zeroable for GpuVa {} + +impl DriverGpuVa for GpuVa {} + +pub(crate) struct VmInner { + /// A handle to the device. + tdev: ARef<TyrDevice>, + /// The range of virtual addresses that this VM can use. + va_range: Range<u64>, + /// Whether this is a kernel BO. + is_kernel: bool, + /// The page table for this VM. + page_table: ARM64LPAES1<Mmu>, + /// The allocator keeping track of what ranges are in use. + mm: mm::Allocator<(), ()>, +} + +impl gpuvm::DriverGpuVm for VmInner { + type Driver = driver::TyrDriver; + type GpuVmBo = VmBo; + type StepContext = StepContext; + + type GpuVa = GpuVa; + + fn step_map( + self: &mut gpuvm::UpdatingGpuVm<'_, Self>, + op: &mut gpuvm::OpMap<Self>, + ctx: &mut Self::StepContext, + ) -> Result { + // This is the mapping algorithm from Asahi. + + let mut iova = op.addr(); + let mut left = op.range() as usize; + let mut offset = op.offset() as usize; + + let vm_bo = ctx.vm_bo.as_ref().ok_or(EINVAL)?; + let sgt = vm_bo.inner().sgt.lock(); + let prot = flags_to_prot(ctx.vm_map_flags); + + for range in sgt + .as_ref() + .expect("SGT should be set before step_map") + .iter() + { + let mut addr = range.dma_address(); + let mut len = range.dma_len(); + + if left == 0 { + break; + } + + if offset > 0 { + let skip = len.min(offset); + addr += skip; + len -= skip; + offset -= skip; + } + + if len == 0 { + continue; + } + + assert!(offset == 0); + + len = len.min(left); + + pr_info!("mapping paddr {:#x} (len: {})\n", addr, len); + + let pgsize = 4096; + let pgcount = len.div_ceil(pgsize); + + let mapped = self.page_table.map_pages( + iova as usize, + addr, + pgsize as usize, + pgcount as usize, + prot, + )?; + + pr_info!( + "mapped {} bytes, iova: {:#x}, paddr: {:#x}, pgsize: {}, pgcount: {}, len: {} prot {}\n", + mapped, + iova, + addr, + pgsize, + pgcount, + len, + prot + ); + + left -= len; + iova += len as u64; + } + + let gpuva = gpuvm::GpuVa::<VmInner>::new(init::zeroed())?; + if op + .map_and_link_va( + self, + gpuva, + ctx.vm_bo.as_ref().expect("step_map with no BO"), + ) + .is_err() + { + pr_err!( + "map_and_link_va failed: {:#x} [{:#x}] -> {:#x}\n", + op.offset(), + op.range(), + op.addr() + ); + return Err(EINVAL); + } + Ok(()) + } + + fn step_unmap( + self: &mut gpuvm::UpdatingGpuVm<'_, Self>, + _op: &mut gpuvm::OpUnMap<Self>, + _ctx: &mut Self::StepContext, + ) -> Result { + todo!("We can't unmap VA ranges yet"); + } + + fn step_remap( + self: &mut gpuvm::UpdatingGpuVm<'_, Self>, + _op: &mut gpuvm::OpReMap<Self>, + _vm_bo: &gpuvm::GpuVmBo<Self>, + _ctx: &mut Self::StepContext, + ) -> Result { + todo!("We can't remap VA ranges yet"); + } +} + +pub(crate) struct Vm { + dummy_obj: kernel::drm::gem::ObjectRef<shmem::Object<DriverObject>>, + inner: ARef<gpuvm::GpuVm<VmInner>>, + binding: Option<SlotAllocation>, + /// The memory attributes for this VM. + memattr: u64, +} + +impl Vm { + fn create( + tdev: ARef<TyrDevice>, + pdev: platform::Device, + for_mcu: bool, + is_kernel: bool, + gpu_info: &GpuInfo, + // coherent: bool, (forced to false for now) + // kernel_va_range: Range<u64>, + // auto_kernel_va_range: Range<u64>, + ) -> Result<Self> { + // We should ideally not allocate memory for this, but there is no way + // to create dummy GPUVM GEM objects for now. + // + // This is being discussed on Zulip. For now we have to waste 4k on + // this. + let dummy_obj = gem::new_kernel_object(&tdev.clone(), 4096)?; + + let va_bits = gpu_info.va_bits(); + let pa_bits = gpu_info.pa_bits(); + + pr_info!( + "Creating VM with VA bits: {}, PA bits: {}\n", + va_bits, + pa_bits + ); + + let full_va_range = 1u64 << va_bits; + + let va_range = if for_mcu { 0..SZ_4G } else { 0..full_va_range }; + + let mm = mm::Allocator::new(va_range.start, va_range.end, ())?; + + let page_table = ARM64LPAES1::new( + pdev.as_ref(), + io_pgtable::Config { + pgsize_bitmap: (SZ_4K | SZ_2M) as usize, + ias: va_bits as usize, + oas: pa_bits as usize, + coherent_walk: false, + quirks: 0, + }, + (), + )?; + + let memattr = mair_to_memattr(page_table.cfg().mair); + + Ok(Vm { + dummy_obj: dummy_obj.gem.clone(), + inner: gpuvm::GpuVm::new( + c_str!("Tyr::GpuVm"), + &tdev.clone(), + &*(dummy_obj.gem), + va_range.clone(), + 0..0, + init!(VmInner { + tdev: tdev.clone(), + va_range, + is_kernel, + page_table, + mm, + }), + )?, + binding: None, + memattr, + }) + } + + fn alloc_va_range(&mut self, range: Range<u64>) -> Result<mm::Node<(), ()>> { + let mut inner = self.inner.exec_lock(None)?; + inner.mm.insert_node_in_range( + (), + 4096, + 4096, + 0, + range.start, + range.end, + mm::InsertMode::Best, + ) + } + + pub(crate) fn bind_object( + &mut self, + tdev: ARef<TyrDevice>, + bo: &gem::Object, + range: Range<u64>, + vm_map_flags: VmMapFlags, + ) -> Result { + // XXX: do not rearrange this or it will deadlock. + // + // Sadly, `inner` will lock the reservation for `bo`, and we need + // `inner` to produce `vm_bo`. + // + // In the natural drop order, the `ARef` for `vm_bo` will attempt to + // lock the reservation to decrement the refcount, but it's already + // locked by the call that produced `inner`. + // + // We can prove the above by just enabling lockdep. + // + // This means that it's trivially easy to deadlock when obtain_bo() is + // called if the drop order is not inverted. A solution to this will + // probably be beyond the scope of this driver. This problem also + // apparently predates Rust4Linux, from what I could gather. + // + // Here we just move `vm_bo` into `ctx`, to make sure it gets dropped + // after `inner`, on top of it also being needed in the `step_map` + // callback. + // + // Note that sg_table() will also lock the reservation, so it too needs + // to come before `inner`. + let mut ctx = StepContext { + vm_bo: None, + vm_map_flags, + tdev, + }; + let sgt = bo.sg_table()?; + let node: Pin<Box<mm::NodeData<(), ()>, kernel::alloc::allocator::Kmalloc>> = + self.alloc_va_range(range.clone())?; + let mut inner = self.inner.exec_lock(Some(bo))?; + + let vm_bo = inner.obtain_bo()?; + + let mut vm_bo_guard = vm_bo.inner().sgt.lock(); + if vm_bo_guard.is_none() { + *vm_bo_guard = Some(sgt); + } + core::mem::drop(vm_bo_guard); + + ctx.vm_bo = Some(vm_bo); + inner.sm_map(&mut ctx, node.start(), range.end - range.start, 0) + } +} + +fn decode_faults(mut status: u32, iomem: &Devres<IoMem>) -> Result { + while status != 0 { + let as_index = (status | (status >> 16)).trailing_zeros(); + let mask = kernel::bits::bit_u32(as_index) as u32; + + let mut addr: u64; + let fault_status: u32; + let exception_type: u32; + let access_type: u32; + let source_id: u32; + + fault_status = as_faultstatus(as_index as usize) + .unwrap() + .read(iomem) + .unwrap_or_default(); + addr = as_faultaddress_lo(as_index as usize) + .unwrap() + .read(iomem) + .unwrap_or_default() as u64; + addr |= (as_faultaddress_hi(as_index as usize) + .unwrap() + .read(iomem) + .unwrap_or_default() as u64) + << 32; + + exception_type = fault_status & 0xff; + access_type = (fault_status >> 8) & 0x3; + source_id = fault_status >> 16; + + pr_err!( + "Unhandled Page fault in AS{} at VA 0x{:016X}\n\ + raw fault status: 0x{:X}\n\ + decoded fault status: {}\n\ + exception type 0x{:X}: {}\n\ + access type 0x{:X}: {}\n\ + source id 0x{:X}\n", + as_index, + addr, + fault_status, + if fault_status & (1 << 10) != 0 { + "DECODER FAULT" + } else { + "SLAVE FAULT" + }, + exception_type, + get_exception_name(exception_type), + access_type, + access_type_name(fault_status), + source_id + ); + + // Update status to process the next fault + status &= !mask; + } + + Ok(()) +} + +pub(crate) struct MmuIrqHandler { + tdev: ARef<TyrDevice>, + iomem: Arc<Devres<IoMem>>, +} + +impl IrqHandler for MmuIrqHandler { + fn handle_irq(&self) -> IrqReturn { + let rawstat = MMU_INT_RAWSTAT.read(&self.iomem).unwrap_or_default(); + + pr_info!("Acknowledging MMU_INT_RAWSTAT: {:#x}\n", rawstat); + let _ = MMU_INT_CLEAR.write(&self.iomem, rawstat); + + let status = rawstat & kernel::bits::genmask_u32(15, 0); + + let _ = decode_faults(status, &self.iomem); + + IrqReturn::Handled + } +} + +pub(crate) fn mmu_irq_init( + tdev: ARef<TyrDevice>, + pdev: platform::Device, + iomem: Arc<Devres<IoMem>>, +) -> Result<impl PinInit<IrqRegistration<MmuIrqHandler>, Error>> { + let mmu_irq = pdev.irq_by_name(c_str!("mmu"))?; + + let irq_handler = MmuIrqHandler { + tdev, + iomem: iomem.clone(), + }; + + MMU_INT_MASK.write(&iomem, core::u32::MAX)?; + + Ok(IrqRegistration::register( + mmu_irq, + irq::request::flags::SHARED, + c_str!("tyr-mmu"), + irq_handler, + )) +} + +/// Flags to be used when mapping memory. +/// +/// They can be combined with the operators `|`, `&`, and `!`. +/// +/// Values can be used from the [`flags`] module. +#[derive(Clone, Copy, PartialEq, Debug)] +pub(crate) struct VmMapFlags(u32); + +impl VmMapFlags { + /// Check whether `flags` is contained in `self`. + pub(crate) fn contains(self, flags: VmMapFlags) -> bool { + (self & flags) == flags + } +} + +impl core::ops::BitOr for VmMapFlags { + type Output = Self; + fn bitor(self, rhs: Self) -> Self::Output { + Self(self.0 | rhs.0) + } +} + +impl core::ops::BitAnd for VmMapFlags { + type Output = Self; + fn bitand(self, rhs: Self) -> Self::Output { + Self(self.0 & rhs.0) + } +} + +impl core::ops::Not for VmMapFlags { + type Output = Self; + fn not(self) -> Self::Output { + Self(!self.0) + } +} + +impl core::ops::BitOrAssign for VmMapFlags { + fn bitor_assign(&mut self, rhs: Self) { + self.0 |= rhs.0; + } +} + +impl core::fmt::Display for VmMapFlags { + fn fmt(&self, f: &mut core::fmt::Formatter<'_>) -> core::fmt::Result { + write!(f, "{}", self.0) + } +} + +pub(crate) mod vm_map_flags { + use super::VmMapFlags; + use kernel::bits::bit_u32; + + pub(crate) const NONE: VmMapFlags = VmMapFlags(bit_u32(0)); + pub(crate) const READONLY: VmMapFlags = VmMapFlags(bit_u32(1)); + pub(crate) const NOEXEC: VmMapFlags = VmMapFlags(bit_u32(2)); + pub(crate) const UNCACHED: VmMapFlags = VmMapFlags(bit_u32(3)); +} diff --git a/drivers/gpu/drm/tyr/regs.rs b/drivers/gpu/drm/tyr/regs.rs new file mode 100644 index 0000000000000000000000000000000000000000..8ea89eab9b68f2f0befdb309b7d617b69848f272 --- /dev/null +++ b/drivers/gpu/drm/tyr/regs.rs @@ -0,0 +1,229 @@ +// SPDX-License-Identifier: GPL-2.0 or MIT + +#![allow(dead_code)] + +use kernel::bits::bit_u64; +use kernel::devres::Devres; +use kernel::io::mem::IoMem; +use kernel::{bits::bit_u32, prelude::*}; + +/// Represents a register in the Register Set +pub(crate) struct Register<const OFFSET: usize>; + +impl<const OFFSET: usize> Register<OFFSET> { + #[inline] + pub(crate) fn read(&self, iomem: &Devres<IoMem>) -> Result<u32> { + (*iomem).try_access().ok_or(ENODEV)?.try_readl(OFFSET) + } + + #[inline] + pub(crate) fn write(&self, iomem: &Devres<IoMem>, value: u32) -> Result<()> { + (*iomem) + .try_access() + .ok_or(ENODEV)? + .try_writel(value, OFFSET) + } +} + +pub(crate) const GPU_ID: Register<0x0> = Register; +pub(crate) const GPU_L2_FEATURES: Register<0x4> = Register; +pub(crate) const GPU_CORE_FEATURES: Register<0x8> = Register; +pub(crate) const GPU_CSF_ID: Register<0x1c> = Register; +pub(crate) const GPU_REVID: Register<0x280> = Register; +pub(crate) const GPU_TILER_FEATURES: Register<0xc> = Register; +pub(crate) const GPU_MEM_FEATURES: Register<0x10> = Register; +pub(crate) const GPU_MMU_FEATURES: Register<0x14> = Register; +pub(crate) const GPU_AS_PRESENT: Register<0x18> = Register; +pub(crate) const GPU_INT_RAWSTAT: Register<0x20> = Register; + +pub(crate) const GPU_INT_RAWSTAT_FAULT: u32 = bit_u32(0); +pub(crate) const GPU_INT_RAWSTAT_PROTECTED_FAULT: u32 = bit_u32(1); +pub(crate) const GPU_INT_RAWSTAT_RESET_COMPLETED: u32 = bit_u32(8); +pub(crate) const GPU_INT_RAWSTAT_POWER_CHANGED_SINGLE: u32 = bit_u32(9); +pub(crate) const GPU_INT_RAWSTAT_POWER_CHANGED_ALL: u32 = bit_u32(10); +pub(crate) const GPU_INT_RAWSTAT_CLEAN_CACHES_COMPLETED: u32 = bit_u32(17); +pub(crate) const GPU_INT_RAWSTAT_DOORBELL_STATUS: u32 = bit_u32(18); +pub(crate) const GPU_INT_RAWSTAT_MCU_STATUS: u32 = bit_u32(19); + +pub(crate) const GPU_INT_CLEAR: Register<0x24> = Register; +pub(crate) const GPU_INT_MASK: Register<0x28> = Register; +pub(crate) const GPU_CMD: Register<0x30> = Register; +pub(crate) const GPU_THREAD_FEATURES: Register<0xac> = Register; +pub(crate) const GPU_THREAD_MAX_THREADS: Register<0xa0> = Register; +pub(crate) const GPU_THREAD_MAX_WORKGROUP_SIZE: Register<0xa4> = Register; +pub(crate) const GPU_THREAD_MAX_BARRIER_SIZE: Register<0xa8> = Register; +pub(crate) const GPU_TEXTURE_FEATURES0: Register<0xb0> = Register; +pub(crate) const GPU_SHADER_PRESENT_LO: Register<0x100> = Register; +pub(crate) const GPU_SHADER_PRESENT_HI: Register<0x104> = Register; +pub(crate) const GPU_TILER_PRESENT_LO: Register<0x110> = Register; +pub(crate) const GPU_TILER_PRESENT_HI: Register<0x114> = Register; +pub(crate) const GPU_L2_PRESENT_LO: Register<0x120> = Register; +pub(crate) const GPU_L2_PRESENT_HI: Register<0x124> = Register; +pub(crate) const L2_READY_LO: Register<0x160> = Register; +pub(crate) const L2_READY_HI: Register<0x164> = Register; +pub(crate) const L2_PWRON_LO: Register<0x1a0> = Register; +pub(crate) const L2_PWRON_HI: Register<0x1a4> = Register; +pub(crate) const L2_PWRTRANS_LO: Register<0x220> = Register; +pub(crate) const L2_PWRTRANS_HI: Register<0x204> = Register; +pub(crate) const L2_PWRACTIVE_LO: Register<0x260> = Register; +pub(crate) const L2_PWRACTIVE_HI: Register<0x264> = Register; + +pub(crate) const MCU_CONTROL: Register<0x700> = Register; +pub(crate) const MCU_CONTROL_ENABLE: u32 = 1; +pub(crate) const MCU_CONTROL_AUTO: u32 = 2; +pub(crate) const MCU_CONTROL_DISABLE: u32 = 0; + +pub(crate) const MCU_STATUS: Register<0x704> = Register; +pub(crate) const MCU_STATUS_DISABLED: u32 = 0; +pub(crate) const MCU_STATUS_ENABLED: u32 = 1; +pub(crate) const MCU_STATUS_HALT: u32 = 2; +pub(crate) const MCU_STATUS_FATAL: u32 = 3; + +pub(crate) const GPU_COHERENCY_FEATURES: Register<0x300> = Register; + +pub(crate) const JOB_INT_RAWSTAT: Register<0x1000> = Register; +pub(crate) const JOB_INT_CLEAR: Register<0x1004> = Register; +pub(crate) const JOB_INT_MASK: Register<0x1008> = Register; + +pub(crate) const JOB_INT_GLOBAL_IF: u32 = bit_u32(31); + +pub(crate) const MMU_INT_RAWSTAT: Register<0x2000> = Register; +pub(crate) const MMU_INT_CLEAR: Register<0x2004> = Register; +pub(crate) const MMU_INT_MASK: Register<0x2008> = Register; + +pub(crate) const AS_TRANSCFG_ADRMODE_UNMAPPED: u64 = bit_u64(0); +pub(crate) const AS_TRANSCFG_ADRMODE_IDENTITY: u64 = bit_u64(1); +pub(crate) const AS_TRANSCFG_ADRMODE_AARCH64_4K: u64 = bit_u64(2) | bit_u64(1); +pub(crate) const AS_TRANSCFG_ADRMODE_AARCH64_64K: u64 = bit_u64(3); +pub(crate) const fn as_transcfg_ina_bits(x: u64) -> u64 { + x << 6 +} +pub(crate) const fn as_transcfg_outa_bits(x: u64) -> u64 { + x << 14 +} +pub(crate) const AS_TRANSCFG_SL_CONCAT: u64 = bit_u64(22); +pub(crate) const AS_TRANSCFG_PTW_MEMATTR_NC: u64 = bit_u64(24); +pub(crate) const AS_TRANSCFG_PTW_MEMATTR_WB: u64 = bit_u64(25); +pub(crate) const AS_TRANSCFG_PTW_SH_NS: u64 = 0 << 28; +pub(crate) const AS_TRANSCFG_PTW_SH_OS: u64 = bit_u64(29); +pub(crate) const AS_TRANSCFG_PTW_SH_IS: u64 = bit_u64(29) | bit_u64(28); +pub(crate) const AS_TRANSCFG_PTW_RA: u64 = bit_u64(30); +pub(crate) const AS_TRANSCFG_DISABLE_HIER_AP: u64 = bit_u64(33); +pub(crate) const AS_TRANSCFG_DISABLE_AF_FAULT: u64 = bit_u64(34); +pub(crate) const AS_TRANSCFG_WXN: u64 = bit_u64(35); + +pub(crate) const MMU_BASE: usize = 0x2400; +pub(crate) const MMU_AS_SHIFT: usize = 6; + +const fn mmu_as(as_nr: usize) -> usize { + MMU_BASE + (as_nr << MMU_AS_SHIFT) +} + +pub(crate) struct AsRegister(usize); + +impl AsRegister { + fn new(as_nr: usize, offset: usize) -> Result<Self> { + if as_nr >= 32 { + Err(EINVAL) + } else { + Ok(AsRegister(mmu_as(as_nr) + offset)) + } + } + + #[inline] + pub(crate) fn read(&self, iomem: &Devres<IoMem>) -> Result<u32> { + (*iomem) + .try_access() + .ok_or(ENODEV)? + .try_readl(self.0 as usize) + } + + #[inline] + pub(crate) fn write(&self, iomem: &Devres<IoMem>, value: u32) -> Result<()> { + (*iomem) + .try_access() + .ok_or(ENODEV)? + .try_writel(value, self.0 as usize) + } +} + +pub(crate) fn as_transtab_lo(as_nr: usize) -> Result<AsRegister> { + AsRegister::new(as_nr, 0x0) +} + +pub(crate) fn as_transtab_hi(as_nr: usize) -> Result<AsRegister> { + AsRegister::new(as_nr, 0x4) +} + +pub(crate) fn as_memattr_lo(as_nr: usize) -> Result<AsRegister> { + AsRegister::new(as_nr, 0x8) +} + +pub(crate) fn as_memattr_hi(as_nr: usize) -> Result<AsRegister> { + AsRegister::new(as_nr, 0xc) +} + +pub(crate) fn as_lockaddr_lo(as_nr: usize) -> Result<AsRegister> { + AsRegister::new(as_nr, 0x10) +} + +pub(crate) fn as_lockaddr_hi(as_nr: usize) -> Result<AsRegister> { + AsRegister::new(as_nr, 0x14) +} + +pub(crate) fn as_command(as_nr: usize) -> Result<AsRegister> { + AsRegister::new(as_nr, 0x18) +} + +pub(crate) fn as_faultstatus(as_nr: usize) -> Result<AsRegister> { + AsRegister::new(as_nr, 0x1c) +} + +pub(crate) const AS_FAULTSTATUS_ACCESS_TYPE_MASK: u32 = 0x3 << 8; +pub(crate) const AS_FAULTSTATUS_ACCESS_TYPE_ATOMIC: u32 = 0x0 << 8; +pub(crate) const AS_FAULTSTATUS_ACCESS_TYPE_EX: u32 = 0x1 << 8; +pub(crate) const AS_FAULTSTATUS_ACCESS_TYPE_READ: u32 = 0x2 << 8; +pub(crate) const AS_FAULTSTATUS_ACCESS_TYPE_WRITE: u32 = 0x3 << 8; + +pub(crate) fn as_faultaddress_lo(as_nr: usize) -> Result<AsRegister> { + AsRegister::new(as_nr, 0x20) +} + +pub(crate) fn as_faultaddress_hi(as_nr: usize) -> Result<AsRegister> { + AsRegister::new(as_nr, 0x24) +} + +pub(crate) const AS_COMMAND_NOP: u32 = 0; +pub(crate) const AS_COMMAND_UPDATE: u32 = 1; +pub(crate) const AS_COMMAND_LOCK: u32 = 2; +pub(crate) const AS_COMMAND_UNLOCK: u32 = 3; +pub(crate) const AS_COMMAND_FLUSH_PT: u32 = 4; +pub(crate) const AS_COMMAND_FLUSH_MEM: u32 = 5; + +pub(crate) fn as_status(as_nr: usize) -> Result<AsRegister> { + AsRegister::new(as_nr, 0x28) +} + +pub(crate) const AS_STATUS_ACTIVE: u32 = bit_u32(0); + +pub(crate) fn as_transcfg_lo(as_nr: usize) -> Result<AsRegister> { + AsRegister::new(as_nr, 0x30) +} +pub(crate) fn as_transcfg_hi(as_nr: usize) -> Result<AsRegister> { + AsRegister::new(as_nr, 0x34) +} + +pub(crate) const AS_LOCK_REGION_MIN_SIZE: u32 = bit_u32(15); + +pub(crate) const AS_MEMATTR_AARCH64_INNER_ALLOC_IMPL: u32 = 2 << 2; + +pub(crate) fn as_memattr_aarch64_inner_alloc_expl(w: bool, r: bool) -> u32 { + (3 << 2) | ((w as u32) << 0) | ((r as u32) << 1) +} +pub(crate) const AS_MEMATTR_AARCH64_SH_MIDGARD_INNER: u32 = 0 << 4; +pub(crate) const AS_MEMATTR_AARCH64_SH_CPU_INNER: u32 = 1 << 4; +pub(crate) const AS_MEMATTR_AARCH64_SH_CPU_INNER_SHADER_COH: u32 = 2 << 4; +pub(crate) const AS_MEMATTR_AARCH64_SHARED: u32 = 0 << 6; +pub(crate) const AS_MEMATTR_AARCH64_INNER_OUTER_NC: u32 = 1 << 6; +pub(crate) const AS_MEMATTR_AARCH64_INNER_OUTER_WB: u32 = 2 << 6; +pub(crate) const AS_MEMATTR_AARCH64_FAULT: u32 = 3 << 6; diff --git a/drivers/gpu/drm/tyr/tyr.rs b/drivers/gpu/drm/tyr/tyr.rs new file mode 100644 index 0000000000000000000000000000000000000000..a1c8a52c8792e4b094d4f8d836405818478207f7 --- /dev/null +++ b/drivers/gpu/drm/tyr/tyr.rs @@ -0,0 +1,42 @@ +// SPDX-License-Identifier: GPL-2.0 or MIT + +//! Rust driver for ARM Mali CSF-based GPUs +//! +//! The skeleton is basically taken from Nova and also rust_platform_driver.rs. +//! +//! So far, this is just a very early-stage experiment, but it looks promissing: +//! +//! - A UAPI template is implemented. +//! +//! - The DRM registration and IOCTL boilerplate is implemented. +//! +//! - Basic iomem and register set implementation, so it's possible to program +//! the device. We can even attest that it works already. +//! +//! - IRQ handling, so we can be receive notifications from the device. +//! +//! Notably missing (apart from literally everything else): +//! - Job subission logic through drm_scheduler and completion through dma_fences +//! - VM_BIND support through drm_gpuvm +//! - Devfreq, pm_idle, etc. +//! +//! The name "Tyr" is inspired by Norse mythology, reflecting ARM's tradition of +//! naming their GPUs after Nordic mythological figures and places. + +use crate::driver::TyrDriver; + +mod driver; +mod file; +mod fw; +mod gem; +mod gpu; +mod mmu; +mod regs; + +kernel::module_platform_driver! { + type: TyrDriver, + name: "tyr", + author: "The Tyr driver authors", + description: "Rust driver for ARM Mali CSF-based GPUs", + license: "Dual MIT/GPL", +} diff --git a/include/uapi/drm/panthor_drm.h b/include/uapi/drm/panthor_drm.h index 87c9cb555dd1d19ba7e042203d38de0e74f69e05..3a798dbbfcc6a24446b0b29fb0488ccb5e05b221 100644 --- a/include/uapi/drm/panthor_drm.h +++ b/include/uapi/drm/panthor_drm.h @@ -1010,6 +1010,11 @@ struct drm_panthor_tiler_heap_destroy { __u32 pad; }; +// XXX: this kludge is required for bindgen to pick up the symbol for Rust. +enum { + DRM_IOCTL_TYR_DEV_QUERY = DRM_IOCTL_PANTHOR_DEV_QUERY +}; + #if defined(__cplusplus) } #endif diff --git a/rust/bindings/bindings_helper.h b/rust/bindings/bindings_helper.h index 18d1affbf96fdef82b46ab9481e8fb4b5a513402..320b40a0b6420abcd79395bcaa21d33445d833fc 100644 --- a/rust/bindings/bindings_helper.h +++ b/rust/bindings/bindings_helper.h @@ -51,6 +51,8 @@ #include <linux/workqueue.h> #include <trace/events/rust_sample.h> + + /* `bindgen` gets confused at certain things. */ const size_t RUST_CONST_HELPER_ARCH_SLAB_MINALIGN = ARCH_SLAB_MINALIGN; const size_t RUST_CONST_HELPER_PAGE_SIZE = PAGE_SIZE; diff --git a/rust/kernel/device.rs b/rust/kernel/device.rs index 22a4e8fe5ddbf202430e861d62eacd9c2a00aa0d..96a1157e74b6aa14e136e694b81d6ccb354cd15a 100644 --- a/rust/kernel/device.rs +++ b/rust/kernel/device.rs @@ -60,7 +60,7 @@ impl Device { } /// Obtain the raw `struct device *`. - pub(crate) fn as_raw(&self) -> *mut bindings::device { + pub fn as_raw(&self) -> *mut bindings::device { self.0.get() } diff --git a/rust/kernel/platform.rs b/rust/kernel/platform.rs index 680cbe8df2e1410895f6aa17688b424829b20a98..34e6c5b25c2103af897e9d428910e0117c3206c6 100644 --- a/rust/kernel/platform.rs +++ b/rust/kernel/platform.rs @@ -192,7 +192,8 @@ impl Device { Self(dev) } - fn as_raw(&self) -> *mut bindings::platform_device { + /// Obtain the raw `struct platform_device *`. + pub fn as_raw(&self) -> *mut bindings::platform_device { // SAFETY: By the type invariant `self.0.as_raw` is a pointer to the `struct device` // embedded in `struct platform_device`. unsafe { container_of!(self.0.as_raw(), bindings::platform_device, dev) }.cast_mut() diff --git a/rust/uapi/uapi_helper.h b/rust/uapi/uapi_helper.h index 1409441359f510236256bc17851f9aac65c45c4e..d4a239cf2a64fce964b28959ff807ee187b2610d 100644 --- a/rust/uapi/uapi_helper.h +++ b/rust/uapi/uapi_helper.h @@ -9,6 +9,7 @@ #include <uapi/asm-generic/ioctl.h> #include <uapi/drm/drm.h> #include <uapi/drm/nova_drm.h> +#include <uapi/drm/panthor_drm.h> #include <uapi/linux/mdio.h> #include <uapi/linux/mii.h> #include <uapi/linux/ethtool.h>