Skip to content
Snippets Groups Projects
Commit 96d7356d authored by Daniel Almeida's avatar Daniel Almeida
Browse files

Introduce Tyr


Rust driver for ARM Mali CSF-based GPUs

The skeleton is basically taken from Nova and also rust_platform_driver.rs.

So far, this is just a very early-stage experiment, but it looks promissing:

- We use the same uAPI as Panthor, although this needs a bit of work, since
  bindgen cannot translate #defines into Rust.

- The DRM registration and a few IOCTLs are implemented. There is an igt
  branch with tests.

- Basic iomem and register set implementation, so it's possible to program
the device.

- IRQ handling, so we can receive notifications from the device.

- We can boot the firmware.

- We can communicate with CSF using the global interface. We can submit
  requests and the MCU will appropriately respond in the ack field.

- There is GEM_CREATE and VM_BIND support.
- We can send a PING request to CSF, and it will acknowledge it
  successfully.

Notably missing (apart from literally everything else):
- Job subission logic through drm_scheduler and completion through dma_fences
- Devfreq, pm_idle, etc.

The name "Tyr" is inspired by Norse mythology, reflecting ARM's tradition of
naming their GPUs after Nordic mythological figures and places.

Co-developed-by: default avatarAlice Ryhl <alice.ryhl@google.com>
Signed-off-by: default avatarAlice Ryhl <alice.ryhl@google.com>
Signed-off-by: default avatarDaniel Almeida <daniel.almeida@collabora.com>
parent 4fc4b8d4
No related branches found
No related tags found
No related merge requests found
Showing
with 3186 additions and 0 deletions
......@@ -511,6 +511,8 @@ source "drivers/gpu/drm/sprd/Kconfig"
source "drivers/gpu/drm/imagination/Kconfig"
source "drivers/gpu/drm/tyr/Kconfig"
config DRM_HYPERV
tristate "DRM Support for Hyper-V synthetic video device"
depends on DRM && PCI && MMU && HYPERV
......
......@@ -229,3 +229,4 @@ obj-y += solomon/
obj-$(CONFIG_DRM_SPRD) += sprd/
obj-$(CONFIG_DRM_LOONGSON) += loongson/
obj-$(CONFIG_DRM_POWERVR) += imagination/
obj-$(CONFIG_DRM_TYR) += tyr/
# SPDX-License-Identifier: GPL-2.0 or MIT
config TYR_DRM_GEM_SHMEM_HELPER
bool
select DRM_GEM_SHMEM_HELPER
config TYR_DRM_GPUVM
bool
select DRM_GPUVM
config DRM_TYR
tristate "Tyr (Rust DRM support for ARM Mali CSF-based GPUs)"
depends on DRM=y
depends on RUST
depends on RUST_FW_LOADER_ABSTRACTIONS
depends on ARM || ARM64 || COMPILE_TEST
depends on !GENERIC_ATOMIC64 # for IOMMU_IO_PGTABLE_LPAE
depends on MMU
select TYR_DRM_GPUVM
select TYR_DRM_GEM_SHMEM_HELPER
select IOMMU_IO_PGTABLE_LPAE
depends on IOMMU_SUPPORT
help
Rust DRM driver for ARM Mali CSF-based GPUs.
This driver is for Mali (or Immortalis) Valhall Gxxx GPUs.
Note that the Mali-G68 and Mali-G78, while Valhall architecture, will
be supported with the panfrost driver as they are not CSF GPUs.
if M is selected, the module will be called tyr.
# SPDX-License-Identifier: GPL-2.0 or MIT
obj-$(CONFIG_DRM_TYR) += tyr.o
// SPDX-License-Identifier: GPL-2.0 or MIT
use kernel::bits::bit_u32;
use kernel::c_str;
use kernel::clk::Clk;
use kernel::devres::Devres;
use kernel::drm;
use kernel::drm::drv;
use kernel::drm::ioctl;
use kernel::io;
use kernel::io::mem::IoMem;
use kernel::irq::request::Registration as IrqRegistration;
use kernel::new_mutex;
use kernel::of;
use kernel::platform;
use kernel::prelude::*;
use kernel::regulator::Regulator;
use kernel::sync::Arc;
use kernel::sync::Mutex;
use kernel::time;
use kernel::types::ARef;
use crate::file::File;
use crate::fw;
use crate::fw::irq::JobIrqHandler;
use crate::fw::Firmware;
use crate::gpu;
use crate::gpu::irq::GpuIrqHandler;
use crate::gpu::wait::PowerOnWait;
use crate::gpu::GpuInfo;
use crate::mmu;
use crate::mmu::irq::MmuIrqHandler;
use crate::mmu::Mmu;
use crate::regs::*;
use core::pin::Pin;
/// Convienence type alias for the DRM device type for this driver
pub(crate) type TyrDevice = drm::device::Device<TyrDriver>;
#[pin_data(PinnedDrop)]
pub(crate) struct TyrDriver {
device: ARef<TyrDevice>,
#[pin]
gpu_irq: IrqRegistration<GpuIrqHandler>,
#[pin]
mmu_irq: IrqRegistration<MmuIrqHandler>,
#[pin]
job_irq: IrqRegistration<JobIrqHandler>,
}
impl TyrDriver {
// We have switched to polling for now until SpinLockIrq advances a bit.
fn wait_for_mcu_to_boot(&self) -> Result {
pr_info!("Polling on JOB_INT_GLOBAL_IF\n");
let iomem = &self.device.data().iomem;
let op = || JOB_INT_RAWSTAT.read(iomem);
let cond = |raw_stat: &u32| -> bool { *raw_stat & JOB_INT_GLOBAL_IF != 0 };
io::poll::read_poll_timeout(
op,
cond,
time::Delta::from_millis(0),
Some(time::Delta::from_millis(100)),
)?;
JOB_INT_CLEAR.write(iomem, JOB_INT_GLOBAL_IF)?;
pr_info!("MCU booted\n");
Ok(())
}
// We have switched to polling for now until SpinLockIrq advances a bit.
fn wait_for_poweron(&self, poweron_wait: Arc<PowerOnWait>) -> Result {
pr_info!("Polling on RESET_COMPLETED\n");
let iomem = &self.device.data().iomem;
let op = || GPU_INT_RAWSTAT.read(iomem);
let cond = |raw_stat: &u32| -> bool { *raw_stat == 0x700 };
io::poll::read_poll_timeout(
op,
cond,
time::Delta::from_millis(0),
Some(time::Delta::from_millis(100)),
)?;
GPU_INT_CLEAR.write(iomem, 0x700)?;
pr_info!("GPU has powered on\n");
Ok(())
}
// fn wait_for_mcu_to_boot(&self) -> Result {
// let data = self.device.data();
// let op = || Ok(*data.fw.booted.lock());
// let cond = |booted: &bool| *booted;
// data.fw
// .req_wait
// .wait_interruptible_timeout(1000, op, &cond, true)
// }
// fn wait_for_poweron(&self, poweron_wait: Arc<PowerOnWait>) -> Result {
// let mut powered_on = poweron_wait.powered_on.lock();
// while !*powered_on {
// match poweron_wait
// .wait
// .wait_interruptible_timeout(&mut powered_on, msecs_to_jiffies(100))
// {
// CondVarTimeoutResult::Timeout => return Err(ETIMEDOUT),
// _ => {}
// }
// }
// Ok(())
// }
}
#[pin_data]
pub(crate) struct TyrData {
pub(crate) pdev: platform::Device,
#[pin]
clks: Mutex<Clocks>,
#[pin]
regulators: Mutex<Regulators>,
// Some inforation on the GPU. This is mainly queried by userspace (mesa).
pub(crate) gpu_info: GpuInfo,
/// The firmware running on the MCU.
#[pin]
pub(crate) fw: Firmware,
/// True if the CPU/GPU are memory coherent.
pub(crate) coherent: bool,
/// MMU management.
pub(crate) mmu: Pin<KBox<Mutex<Mmu>>>,
/// The MMIO region.
pub(crate) iomem: Arc<Devres<IoMem>>,
}
unsafe impl Send for TyrData {}
unsafe impl Sync for TyrData {}
fn issue_soft_reset(iomem: &Devres<IoMem<0>>) -> Result<()> {
let irq_enable_cmd = 1 | bit_u32(8);
GPU_CMD.write(iomem, irq_enable_cmd)?;
let op = || GPU_INT_RAWSTAT.read(iomem);
let cond = |raw_stat: &u32| -> bool { (*raw_stat >> 8) & 1 == 1 };
let res = io::poll::read_poll_timeout(
op,
cond,
time::Delta::from_millis(100),
Some(time::Delta::from_micros(20000)),
);
if let Err(e) = res {
pr_err!("GPU reset failed with errno {}\n", e.to_errno());
pr_err!("GPU_INT_RAWSTAT is {}\n", GPU_INT_RAWSTAT.read(iomem)?);
}
Ok(())
}
kernel::of_device_table!(
OF_TABLE,
MODULE_OF_TABLE,
<TyrDriver as platform::Driver>::IdInfo,
[
(of::DeviceId::new(c_str!("rockchip,rk3588-mali")), ()),
(of::DeviceId::new(c_str!("arm,mali-valhall-csf")), ())
]
);
impl platform::Driver for TyrDriver {
type IdInfo = ();
const OF_ID_TABLE: Option<of::IdTable<Self::IdInfo>> = Some(&OF_TABLE);
fn probe(pdev: &mut platform::Device, _info: Option<&Self::IdInfo>) -> Result<Pin<KBox<Self>>> {
dev_dbg!(pdev.as_ref(), "Probed Tyr\n");
let core_clk = Clk::new(pdev.as_ref(), Some(c_str!("core")))?;
let stacks_clk = Clk::new(pdev.as_ref(), Some(c_str!("stacks")))?;
let coregroup_clk = Clk::new(pdev.as_ref(), Some(c_str!("coregroup")))?;
core_clk.prepare_enable()?;
stacks_clk.prepare_enable()?;
coregroup_clk.prepare_enable()?;
let mali_regulator = Regulator::get(pdev.as_ref(), c_str!("mali"))?;
let sram_regulator = Regulator::get(pdev.as_ref(), c_str!("sram"))?;
mali_regulator.enable()?;
sram_regulator.enable()?;
let resource = pdev.resource(0).ok_or(EINVAL)?;
let iomem = Arc::new(pdev.ioremap_resource(resource)?, GFP_KERNEL)?;
// Disable all interrupts for now.
JOB_INT_MASK.write(&iomem, 0)?;
MMU_INT_MASK.write(&iomem, 0)?;
GPU_INT_MASK.write(&iomem, 0)?;
issue_soft_reset(&iomem)?;
gpu::l2_power_on(&iomem)?;
let gpu_info = GpuInfo::new(&iomem)?;
gpu_info.log(pdev);
pdev.as_ref().dma_set_max_seg_size(u32::MAX);
pdev.as_ref()
.dma_set_mask_and_coherent(u64::from(gpu_info.pa_bits()))?;
let platform = pdev.clone();
let tdev: ARef<TyrDevice> = drm::device::Device::new_no_data(pdev.as_ref())?;
let mmu = KBox::pin_init(new_mutex!(Mmu::new()?), GFP_KERNEL)?;
let fw = Firmware::init(&tdev, pdev.clone(), &gpu_info, mmu.as_ref(), iomem.clone())?;
// Ideally we'd find a way around this useless clone too...
let t = tdev.clone();
let p = platform.clone();
let i = iomem.clone();
let data = Arc::pin_init(
try_pin_init!(TyrData {
pdev: p.clone(),
clks <- new_mutex!(Clocks {
core: core_clk,
stacks: stacks_clk,
coregroup: coregroup_clk,
}),
regulators <- new_mutex!(Regulators {
mali: mali_regulator,
sram: sram_regulator,
}),
gpu_info,
fw <- fw,
coherent: false, // TODO. The GPU is not IO coherent on rk3588, which is what I am testing on.
mmu,
iomem: i.clone(),
}),
GFP_KERNEL,
)?;
// We must find a way around this. It's being discussed on Zulip already.
//
// Note that this is a problem, because if we fail at probe, then the
// drop code expects the data to be set, which leads to a crash.
unsafe { tdev.clone().init_data(data) };
drm::drv::Registration::new_foreign_owned(tdev.clone(), 0)?;
let poweron_wait = PowerOnWait::new()?;
let pow = poweron_wait.clone();
let i = iomem.clone();
let driver = KBox::pin_init(
try_pin_init!(TyrDriver {
device: t.clone(),
gpu_irq <- gpu::irq::gpu_irq_init(t.clone(), platform.clone(), i.clone(), pow)?,
mmu_irq <- mmu::irq::mmu_irq_init(t.clone(), platform.clone(), i.clone())?,
job_irq <- fw::irq::job_irq_init(t.clone(), platform.clone(), i.clone())?,
}),
GFP_KERNEL,
)?;
driver.wait_for_poweron(poweron_wait.clone())?;
pr_info!("Booting the MCU");
MCU_CONTROL.write(&iomem, MCU_CONTROL_AUTO)?;
driver.wait_for_mcu_to_boot()?;
let data = tdev.data();
let gpu_info = &data.gpu_info;
let core_clk = &data.clks.lock().core;
data.fw.enable(iomem.clone(), gpu_info, core_clk)?;
dev_info!(pdev.as_ref(), "Tyr initialized correctly.\n");
Ok(driver)
}
}
#[pinned_drop]
impl PinnedDrop for TyrDriver {
fn drop(self: Pin<&mut Self>) {
// XXX: we will not have the `data` field here if we failed the
// initialization, i.e.: if probe failed.
//
// We need to figure out with the community how to properly split the
// creation of a DRM device from the place where the data is set and
// from the place where it is registered to overcome this.
//
// The current solution, i.e.: `new_from_closure` is just a hack, and it
// shows its shortcomings here, for example.
//
// dev_dbg!(self.device.data().pdev.as_ref(), "Removed Tyr.\n");
}
}
const INFO: drm::drv::DriverInfo = drm::drv::DriverInfo {
major: 0,
minor: 0,
patchlevel: 0,
name: c_str!("tyr"),
desc: c_str!("ARM Mali CSF-based GPU driver"),
date: c_str!("20252501"),
};
#[vtable]
impl drm::drv::Driver for TyrDriver {
type Data = Arc<TyrData>;
type File = File;
type Object = crate::gem::Object;
const INFO: drm::drv::DriverInfo = INFO;
const FEATURES: u32 = drv::FEAT_GEM | drv::FEAT_GEM_GPUVA;
kernel::declare_drm_ioctls! {
(TYR_DEV_QUERY, drm_panthor_dev_query, ioctl::RENDER_ALLOW, File::dev_query),
(TYR_VM_CREATE, drm_panthor_vm_create, ioctl::RENDER_ALLOW, File::vm_create),
(TYR_VM_DESTROY, drm_panthor_vm_destroy, ioctl::RENDER_ALLOW, File::vm_destroy),
(TYR_VM_BIND, drm_panthor_vm_bind, ioctl::RENDER_ALLOW, File::vm_bind),
(TYR_VM_GET_STATE, drm_panthor_vm_get_state, ioctl::RENDER_ALLOW, File::vm_get_state),
(TYR_BO_CREATE, drm_panthor_bo_create, ioctl::RENDER_ALLOW, File::bo_create),
(TYR_BO_MMAP_OFFSET, drm_panthor_bo_mmap_offset, ioctl::RENDER_ALLOW, File::bo_mmap_offset),
}
}
#[pin_data]
struct Clocks {
core: Clk,
stacks: Clk,
coregroup: Clk,
}
#[pin_data]
struct Regulators {
mali: Regulator,
sram: Regulator,
}
// SPDX-License-Identifier: GPL-2.0 or MIT
use kernel::alloc::flags::*;
use kernel::drm;
use kernel::drm::device::Device as DrmDevice;
use kernel::drm::gem::BaseObject;
use kernel::prelude::*;
use kernel::sync::Arc;
use kernel::transmute::FromBytes;
use kernel::uaccess::UserSlice;
use kernel::uapi;
use crate::driver::TyrDevice;
use crate::driver::TyrDriver;
use crate::gem;
use crate::mmu::vm;
use crate::mmu::vm::pool::Pool;
use crate::mmu::vm::VmLayout;
use crate::mmu::vm::VmUserSize;
pub(crate) struct File {
/// A pool storing our VMs for this particular context.
vm_pool: Pool,
}
/// Convenience type alias for our DRM `File` type
pub(crate) type DrmFile = drm::file::File<File>;
impl drm::file::DriverFile for File {
type Driver = TyrDriver;
fn open(dev: &DrmDevice<Self::Driver>) -> Result<Pin<KBox<Self>>> {
dev_dbg!(dev.as_ref(), "drm::device::Device::open\n");
let file = Self {
vm_pool: Pool::create(),
};
Ok(KBox::new(file, GFP_KERNEL)?.into())
}
}
impl File {
pub(crate) fn dev_query(
tdev: &TyrDevice,
devquery: &mut uapi::drm_panthor_dev_query,
_file: &DrmFile,
) -> Result<u32> {
if devquery.pointer == 0 {
match devquery.type_ {
uapi::drm_panthor_dev_query_type_DRM_PANTHOR_DEV_QUERY_GPU_INFO => {
devquery.size = core::mem::size_of_val(&tdev.data().gpu_info) as u32;
Ok(0)
}
_ => Err(EINVAL),
}
} else {
match devquery.type_ {
uapi::drm_panthor_dev_query_type_DRM_PANTHOR_DEV_QUERY_GPU_INFO => {
let mut writer =
UserSlice::new(devquery.pointer as usize, devquery.size as usize).writer();
writer.write(&tdev.data().gpu_info)?;
Ok(0)
}
_ => Err(EINVAL),
}
}
}
pub(crate) fn vm_create(
tdev: &TyrDevice,
vmcreate: &mut uapi::drm_panthor_vm_create,
file: &DrmFile,
) -> Result<u32> {
let id = file.inner().vm_pool().create_vm(
tdev,
VmLayout::from_user_sz(tdev, VmUserSize::Custom(vmcreate.user_va_range)),
)?;
vmcreate.id = id as u32;
Ok(0)
}
pub(crate) fn vm_destroy(
tdev: &TyrDevice,
vmdestroy: &mut uapi::drm_panthor_vm_destroy,
file: &DrmFile,
) -> Result<u32> {
let iomem = tdev.data().iomem.clone();
file.inner()
.vm_pool()
.destroy_vm(vmdestroy.id as usize, iomem)?;
Ok(0)
}
pub(crate) fn vm_bind(
tdev: &TyrDevice,
vmbind: &mut uapi::drm_panthor_vm_bind,
file: &DrmFile,
) -> Result<u32> {
if vmbind.flags & uapi::drm_panthor_vm_bind_flags_DRM_PANTHOR_VM_BIND_ASYNC != 0 {
dev_info!(tdev.as_ref(), "We do not support async VM_BIND yet");
return Err(ENOTSUPP);
}
if vmbind.ops.stride as usize != core::mem::size_of::<uapi::drm_panthor_vm_bind_op>() {
dev_info!(
tdev.as_ref(),
"We cannot graciously handle stride mismatches yet"
);
return Err(ENOTSUPP);
}
let stride = vmbind.ops.stride as usize;
let count = vmbind.ops.count as usize;
let mut reader = UserSlice::new(vmbind.ops.array as usize, stride).reader();
let iomem = tdev.data().iomem.clone();
for i in 0..count {
let res = {
let op: VmBindOp = reader.read()?;
let mask = uapi::drm_panthor_vm_bind_op_flags_DRM_PANTHOR_VM_BIND_OP_TYPE_MASK;
match op.0.flags as i32 & mask {
uapi::drm_panthor_vm_bind_op_flags_DRM_PANTHOR_VM_BIND_OP_TYPE_MAP => {
let bo = gem::lookup_handle(file, op.0.bo_handle)?;
let range = op.0.va..op.0.va + op.0.size;
let vm = file
.inner()
.vm_pool()
.get_vm(vmbind.vm_id as usize)
.ok_or(EINVAL)?;
vm.lock().bind_gem(
iomem.clone(),
&bo.gem,
op.0.bo_offset,
range,
vm::map_flags::Flags::try_from(op.0.flags & 0b111)?,
)?;
}
uapi::drm_panthor_vm_bind_op_flags_DRM_PANTHOR_VM_BIND_OP_TYPE_UNMAP => {
if op.0.bo_handle != 0 || op.0.bo_offset != 0 {
return Err(EINVAL);
}
let range = op.0.va..op.0.va + op.0.size;
let vm = file
.inner()
.vm_pool()
.get_vm(vmbind.vm_id as usize)
.ok_or(EINVAL)?;
vm.lock().unmap_range(iomem.clone(), range)?;
}
_ => return Err(ENOTSUPP),
}
Ok(0)
};
if let Err(e) = res {
vmbind.ops.count = i as u32;
return Err(e);
}
}
Ok(0)
}
pub(crate) fn vm_get_state(
_tdev: &TyrDevice,
_vmgetstate: &mut uapi::drm_panthor_vm_get_state,
_file: &DrmFile,
) -> Result<u32> {
Err(ENOTSUPP)
}
pub(crate) fn bo_create(
tdev: &TyrDevice,
bocreate: &mut uapi::drm_panthor_bo_create,
file: &DrmFile,
) -> Result<u32> {
if bocreate.flags & !uapi::drm_panthor_bo_flags_DRM_PANTHOR_BO_NO_MMAP != 0 {
dev_err!(
tdev.as_ref(),
"bo_create: invalid flags {}\n",
bocreate.flags
);
return Err(EINVAL);
}
let bo = gem::new_object(tdev, bocreate.size as usize, bocreate.flags)?;
let handle = bo.gem.create_handle(file)?;
bocreate.handle = handle;
bocreate.size = bo.gem.size() as u64;
Ok(0)
}
pub(crate) fn bo_mmap_offset(
_tdev: &TyrDevice,
bommap: &mut uapi::drm_panthor_bo_mmap_offset,
file: &DrmFile,
) -> Result<u32> {
let bo = gem::lookup_handle(file, bommap.handle)?;
bommap.offset = bo.gem.create_mmap_offset()?;
Ok(0)
}
fn vm_pool(self: Pin<&Self>) -> Pin<&Pool> {
// SAFETY: Field projection, we never move out of this field.
unsafe { self.map_unchecked(|f| &f.vm_pool) }
}
}
#[repr(transparent)]
struct VmBindOp(uapi::drm_panthor_vm_bind_op);
// XXX: we cannot implement this trait for the uapi type directly, hence the
// wrapper.
// SAFETY: this struct is safe to be transmuted from a byte slice.
unsafe impl FromBytes for VmBindOp {}
// SPDX-License-Identifier: GPL-2.0 or MIT
//! A general flags type adapted from the WIP work from Felipe Xavier.
//!
//! This will be replaced by his patch once it's ready.
#[macro_export]
/// Creates a new flags type.
macro_rules! impl_flags {
($flags:ident, $flag:ident, $ty:ty) => {
#[allow(missing_docs)]
#[repr(transparent)]
#[derive(Copy, Clone, Default, Debug, PartialEq, Eq)]
pub struct $flags($ty);
#[allow(missing_docs)]
#[derive(Copy, Clone, Debug, PartialEq, Eq)]
pub struct $flag($ty);
impl From<$flag> for $flags {
#[inline]
fn from(value: $flag) -> Self {
Self(value.0)
}
}
impl From<$flags> for $ty {
#[inline]
fn from(value: $flags) -> Self {
value.0
}
}
impl core::ops::BitOr for $flags {
type Output = Self;
#[inline]
fn bitor(self, rhs: Self) -> Self::Output {
Self(self.0 | rhs.0)
}
}
impl core::ops::BitOrAssign for $flags {
#[inline]
fn bitor_assign(&mut self, rhs: Self) {
*self = *self | rhs;
}
}
impl core::ops::BitAnd for $flags {
type Output = Self;
#[inline]
fn bitand(self, rhs: Self) -> Self::Output {
Self(self.0 & rhs.0)
}
}
impl core::ops::BitAndAssign for $flags {
#[inline]
fn bitand_assign(&mut self, rhs: Self) {
*self = *self & rhs;
}
}
impl core::ops::BitOr<$flag> for $flags {
type Output = Self;
#[inline]
fn bitor(self, rhs: $flag) -> Self::Output {
self | Self::from(rhs)
}
}
impl core::ops::BitOrAssign<$flag> for $flags {
#[inline]
fn bitor_assign(&mut self, rhs: $flag) {
*self = *self | rhs;
}
}
impl core::ops::BitAnd<$flag> for $flags {
type Output = Self;
#[inline]
fn bitand(self, rhs: $flag) -> Self::Output {
self & Self::from(rhs)
}
}
impl core::ops::BitAndAssign<$flag> for $flags {
#[inline]
fn bitand_assign(&mut self, rhs: $flag) {
*self = *self & rhs;
}
}
impl core::ops::BitXor for $flags {
type Output = Self;
#[inline]
fn bitxor(self, rhs: Self) -> Self::Output {
Self(self.0 ^ rhs.0)
}
}
impl core::ops::BitXorAssign for $flags {
#[inline]
fn bitxor_assign(&mut self, rhs: Self) {
*self = *self ^ rhs;
}
}
impl core::ops::Neg for $flags {
type Output = Self;
#[inline]
fn neg(self) -> Self::Output {
Self(!self.0)
}
}
impl $flags {
/// Returns an empty instance of <type> where no flags are set.
#[inline]
pub const fn empty() -> Self {
Self(0)
}
/// Checks if a specific flag is set.
#[inline]
pub fn contains(self, flag: $flag) -> bool {
(self.0 & flag.0) == flag.0
}
}
};
}
// SPDX-License-Identifier: GPL-2.0 or MIT
use global::GlobalInterface;
use kernel::bindings::SZ_1G;
use kernel::clk::Clk;
use kernel::devres::Devres;
use kernel::io::mem::IoMem;
use kernel::new_mutex;
use kernel::new_spinlock_irq;
use kernel::platform;
use kernel::prelude::*;
use kernel::sync::Arc;
use kernel::sync::Mutex;
use kernel::sync::SpinLockIrq;
use parse::Section;
use wait::ReqWait;
use crate::driver::TyrDevice;
use crate::gpu::GpuInfo;
use crate::mmu::vm::Vm;
use crate::mmu::vm::VmLayout;
use crate::mmu::Mmu;
const CSF_MCU_SHARED_REGION_START: u32 = 0x04000000;
const CSF_MCU_SHARED_REGION_SIZE: u32 = 0x04000000;
mod global;
pub(crate) mod irq;
mod parse;
pub(crate) mod wait;
#[repr(transparent)]
#[derive(Debug, Clone, Copy)]
/// An offset into the shared section that is known to be valid.
///
/// This can be obtained via a call to [`Firmware::kmap_offset(mcu_va)`].
///
/// # Invariants
///
/// `self.0` is a valid offset into the shared section. This means that it can
/// safely be dereferenced by the CPU.
struct CheckedKmapOffset(usize);
impl CheckedKmapOffset {
fn as_mut_ptr(&self, shared_section: &mut Section) -> Result<*mut core::ffi::c_void> {
let vmap = shared_section.mem.vmap()?;
let vmap = vmap.as_mut_ptr();
// SAFETY: safe by the type invariant.
let offset = unsafe { vmap.add(self.0) };
Ok(offset)
}
fn read<T>(&self, shared_section: &mut Section) -> Result<T> {
let ptr = self.as_mut_ptr(shared_section)?;
// SAFETY: we know that this pointer is aligned and valid for reads for
// at least size_of::<Self>() bytes.
Ok(unsafe { core::ptr::read_volatile(ptr as *const T) })
}
fn write<T>(&self, shared_section: &mut Section, value: T) -> Result {
let ptr = self.as_mut_ptr(shared_section)?;
// SAFETY: we know that this pointer is aligned and valid for writes for
// at least size_of::<Self>() bytes.
unsafe {
core::ptr::write_volatile(ptr as *mut T, value);
}
Ok(())
}
}
#[repr(transparent)]
/// An offset into the shared section that is known to point to the request field.
///
/// It is more convenient to use this type than reading or writing the memory
/// areas directly if all you want is to change the request field.
struct ReqKmapOffset(CheckedKmapOffset);
impl ReqKmapOffset {
/// Toggle acknowledge bits to send an event to the FW
///
/// The Host -> FW event/message passing was designed to be lockless, with each side of
/// the channel having its writeable section. Events are signaled as a difference between
/// the host and FW side in the req/ack registers (when a bit differs, there's an event
/// pending, when they are the same, nothing needs attention).
///
/// This helper allows one to update the req register based on the current value of the
/// ack register managed by the FW. Toggling a specific bit will flag an event. In order
/// for events to be re-evaluated, the interface doorbell needs to be rung.
fn toggle_reqs(
&self,
shared_section: &mut Section,
ack: CheckedKmapOffset,
reqs: u32,
) -> Result {
let cur_req_val = self.0.read::<u32>(shared_section)?;
let ack_val = ack.read::<u32>(shared_section)?;
let new_val = ((ack_val ^ reqs) & reqs) | (cur_req_val & !reqs);
self.0.write::<u32>(shared_section, new_val)
}
/// Update bits to reflect a configuration change
///
/// Some configuration get passed through req registers that are also used to
/// send events to the FW.
fn update_reqs(&self, shared_section: &mut Section, val: u32, reqs: u32) -> Result {
let cur_req_val = self.0.read::<u32>(shared_section)?;
let new_val = (cur_req_val & !reqs) | (val & reqs);
self.0.write::<u32>(shared_section, new_val)
}
/// Returns whether any requests are pending.
///
/// Requests are pending when the value of the given bit in the req differs
/// from the one in ack.
fn pending_reqs(
&self,
shared_section: &mut Section,
ack: CheckedKmapOffset,
reqs: u32,
) -> Result<bool> {
let cur_req_val = self.0.read::<u32>(shared_section)? & reqs;
let cur_ack_val = ack.read::<u32>(shared_section)? & reqs;
Ok((cur_req_val ^ cur_ack_val) != 0)
}
}
/// Our interface to the MCU.
#[pin_data]
pub(crate) struct Firmware {
#[pin]
/// The sections read from the firmware binary. These sections are loaded
/// into GPU memory via BOs.
sections: Mutex<KVec<Section>>,
/// The global FW interface.
global_iface: Arc<GlobalInterface>,
/// The VM where we load the firmware into. This VM is always bound to AS0.
vm: Arc<Mutex<Vm>>,
#[pin]
/// A condvar representing a wait on a MCU request.
///
/// We notify all waiters on every interrupt.
pub(crate) req_wait: Arc<ReqWait>,
#[pin]
/// Whether the MCU has booted.
pub(crate) booted: SpinLockIrq<bool>,
}
impl Firmware {
pub(crate) fn init(
tdev: &TyrDevice,
pdev: platform::Device,
gpu_info: &GpuInfo,
mmu: Pin<&Mutex<Mmu>>,
iomem: Arc<Devres<IoMem>>,
) -> Result<impl PinInit<Self>> {
let vm = {
let auto_kernel_va = CSF_MCU_SHARED_REGION_START as u64
..CSF_MCU_SHARED_REGION_START as u64 + CSF_MCU_SHARED_REGION_SIZE as u64;
let mut mmu = mmu.lock();
// Create the FW VM. This will be used to communicate between the CPU
// and the MCU.
let vm = mmu.create_vm(
tdev,
pdev.clone(),
gpu_info,
true,
VmLayout {
user: 0..0,
kernel: 0..4 * SZ_1G as u64,
},
auto_kernel_va,
)?;
mmu.bind_vm(vm.clone(), gpu_info, &iomem)?;
vm
};
let (sections, shared_section) =
Self::read_sections(tdev, iomem.clone(), gpu_info, vm.clone())?;
let req_wait = ReqWait::new()?;
let global_iface = GlobalInterface::new(shared_section, iomem.clone(), req_wait.clone())?;
Ok(pin_init!(Self {
sections <- new_mutex!(sections),
global_iface,
vm,
req_wait,
booted <- new_spinlock_irq!(false),
}))
}
/// Enables the FW interfaces.
pub(crate) fn enable(
&self,
iomem: Arc<Devres<IoMem>>,
gpu_info: &GpuInfo,
core_clk: &Clk,
) -> Result {
self.global_iface.enable(iomem, gpu_info, core_clk)?;
self.global_iface.ping_once()?;
// self.global_iface.arm_watchdog()?;
// TODO: enable other interfaces in the future.
Ok(())
}
// Wait for acks from the MCU.
//
// `req_ptr` and `ack_ptr` are pointers to memory regions shared with the
// MCU.
//
// Plain references are not used because the underlying shared memory can
// be mutated at any time, violating Rust assumptions about its contents.
//
// # Safety
//
// - Callers must ensure that `req_ptr` and `ack_ptr` are valid pointers.
// unsafe fn wait_acks(
// req_ptr: *const u32,
// ack_ptr: *mut u32,
// wait: &CondVar,
// req_mask: u32,
// timeout_ms: u32,
// ) -> Result<u32> {
// // SAFETY: safe as per the safety requirements.
// let req = unsafe { *req_ptr } & req_mask;
// let acked = req_mask;
// // SAFETY: safe as per the safety requirements.
// let op = || unsafe { *ack_ptr };
// let cond = |acked: &u32| *acked & req_mask == req;
// let poll_res = io::poll::read_poll_timeout(
// op,
// cond,
// time::Delta::from_millis(100),
// Some(time::Delta::from_millis(200)),
// )?;
// if let Err(ETIMEDOUT) = poll_res {
// wait.wait_interruptible_timeout(guard, jiffies)
// } else {
// poll_res
// }
// }
}
// SPDX-License-Identifier: GPL-2.0 or MIT
//! Code to control the global interface of the CSF firmware.
use kernel::bits::genmask_u32;
use kernel::clk::Clk;
use kernel::devres::Devres;
use kernel::impl_has_work;
use kernel::interrupt::interrupt_disable;
use kernel::io;
use kernel::io::mem::IoMem;
use kernel::new_spinlock_irq;
use kernel::new_work;
use kernel::prelude::*;
use kernel::sync::Arc;
use kernel::sync::SpinLockIrq;
use kernel::time;
use kernel::time::Delta;
use kernel::workqueue;
use kernel::workqueue::Work;
use kernel::workqueue::WorkItem;
use crate::fw::ReqKmapOffset;
use crate::gpu::GpuInfo;
use crate::regs::Doorbell;
use crate::regs::CSF_GLB_DOORBELL_ID;
use super::{CheckedKmapOffset, ReqWait, Section};
#[allow(dead_code)]
mod constants {
use kernel::bits::{bit_u32, genmask_u32};
pub(super) const GLB_TIMER_SOURCE_GPU_COUNTER: u32 = bit_u32(31);
pub(super) const PROGRESS_TIMEOUT_CYCLES: u32 = 5 * 500 * 1024 * 1024;
pub(super) const PROGRESS_TIMEOUT_SCALE_SHIFT: u32 = 10;
pub(super) const IDLE_HYSTERESIS_US: u32 = 800;
pub(super) const PWROFF_HYSTERESIS_US: u32 = 10000;
pub(super) const GLB_HALT: u32 = bit_u32(0);
pub(super) const GLB_CFG_PROGRESS_TIMER: u32 = bit_u32(1);
pub(super) const GLB_CFG_ALLOC_EN: u32 = bit_u32(2);
pub(super) const GLB_CFG_POWEROFF_TIMER: u32 = bit_u32(3);
pub(super) const GLB_PROTM_ENTER: u32 = bit_u32(4);
pub(super) const GLB_PERFCNT_EN: u32 = bit_u32(5);
pub(super) const GLB_PERFCNT_SAMPLE: u32 = bit_u32(6);
pub(super) const GLB_COUNTER_EN: u32 = bit_u32(7);
pub(super) const GLB_PING: u32 = bit_u32(8);
pub(super) const GLB_FWCFG_UPDATE: u32 = bit_u32(9);
pub(super) const GLB_IDLE_EN: u32 = bit_u32(10);
pub(super) const GLB_SLEEP: u32 = bit_u32(12);
pub(super) const GLB_INACTIVE_COMPUTE: u32 = bit_u32(20);
pub(super) const GLB_INACTIVE_FRAGMENT: u32 = bit_u32(21);
pub(super) const GLB_INACTIVE_TILER: u32 = bit_u32(22);
pub(super) const GLB_PROTM_EXIT: u32 = bit_u32(23);
pub(super) const GLB_PERFCNT_THRESHOLD: u32 = bit_u32(24);
pub(super) const GLB_PERFCNT_OVERFLOW: u32 = bit_u32(25);
pub(super) const GLB_IDLE: u32 = bit_u32(26);
pub(super) const GLB_DBG_CSF: u32 = bit_u32(30);
pub(super) const GLB_DBG_HOST: u32 = bit_u32(31);
pub(super) const GLB_REQ_MASK: u32 = genmask_u32(10, 0);
pub(super) const GLB_EVT_MASK: u32 = genmask_u32(26, 20);
pub(super) const PING_INTERVAL_MS: i64 = 12000;
}
use constants::*;
fn glb_timer_val(val: u32) -> u32 {
val & genmask_u32(30, 0)
}
#[repr(transparent)]
/// A value that is valid to pass for timeout fields in the global interface.
struct TimeoutCycles(u32);
impl TimeoutCycles {
fn from_micro(core_clk: &Clk, timeout_us: u32) -> Result<Self> {
let timer_rate = core_clk.rate() as u64;
if timer_rate == 0 {
return Err(EINVAL);
}
let mut mod_cycles = (u64::from(timeout_us) * timer_rate).div_ceil(1000000 << 10);
if mod_cycles > glb_timer_val(u32::MAX).into() {
pr_err!("Invalid timeout computed\n");
mod_cycles = glb_timer_val(u32::MAX).into();
}
let mod_cycles = u32::try_from(mod_cycles)?;
Ok(Self(
glb_timer_val(mod_cycles) | GLB_TIMER_SOURCE_GPU_COUNTER,
))
}
}
impl From<TimeoutCycles> for u32 {
fn from(value: TimeoutCycles) -> Self {
value.0
}
}
/// The global control interface.
#[repr(C)]
pub(crate) struct Control {
pub(crate) version: u32,
pub(crate) features: u32,
pub(crate) input_va: u32,
pub(crate) output_va: u32,
pub(crate) group_num: u32,
pub(crate) group_stride: u32,
pub(crate) perfcnt_size: u32,
pub(crate) instr_features: u32,
}
impl Control {
/// CSF major version.
pub(crate) fn version_major(&self) -> u32 {
self.version >> 24
}
/// CSF minor version.
pub(crate) fn version_minor(&self) -> u32 {
(self.version >> 16) & 0xff
}
/// CSF patch version.
pub(crate) fn version_patch(&self) -> u32 {
self.version & 0xffff
}
}
#[repr(C)]
#[derive(Debug)]
/// The input area for the global interface
pub(crate) struct Input {
pub(crate) req: u32,
pub(crate) ack_irq_mask: u32,
pub(crate) doorbell_req: u32,
pub(crate) reserved1: u32,
pub(crate) progress_timer: u32,
pub(crate) poweroff_timer: u32,
pub(crate) core_en_mask: u32,
pub(crate) reserved2: u32,
pub(crate) perfcnt_as: u32,
pub(crate) perfcnt_base: u64,
pub(crate) perfcnt_extratct: u32,
pub(crate) reserved3: [u32; 3],
pub(crate) percnt_config: u32,
pub(crate) percnt_csg_select: u32,
pub(crate) perfcnt_fw_enable: u32,
pub(crate) perfcnt_csg_enable: u32,
pub(crate) perfcnt_csf_enable: u32,
pub(crate) perfcnt_shader_enable: u32,
pub(crate) perfcnt_tiler_enable: u32,
pub(crate) perfcnt_mmu_l2_enable: u32,
pub(crate) reserved4: [u32; 8],
pub(crate) idle_timer: u32,
}
#[repr(C)]
#[derive(Debug)]
/// The output area for the global interface
pub(crate) struct Output {
pub(crate) ack: u32,
pub(crate) reserved1: u32,
pub(crate) doorbell_ack: u32,
pub(crate) reserved2: u32,
pub(crate) halt_status: u32,
pub(crate) perfcnt_status: u32,
pub(crate) perfcnt_insert: u32,
}
macro_rules! impl_shared_section_rw {
($type:ty) => {
impl $type {
/// Reads the control interface from the given pointer.
///
/// Note that the area pointed to by `ptr` is shared with the MCU, so we
/// cannot simply parse it or cast it to &Self.
///
/// Merely taking a reference to it would be UB, as the MCU can change the
/// underlying memory at any time, as it is a core running its own code.
pub(super) fn read(
shared_section: &mut Section,
offset: CheckedKmapOffset,
) -> Result<Self> {
let ptr = offset.as_mut_ptr(shared_section)?;
// SAFETY: we know that this pointer is aligned and valid for reads for
// at least size_of::<Self>() bytes.
Ok(unsafe { core::ptr::read_volatile(ptr as *mut Self) })
}
/// Writes the control interface to the given pointer.
///
/// Note that the area pointed to by `ptr` is shared with the MCU, so we
/// cannot simply parse it or cast it to &mut Self.
///
/// Merely taking a reference to it would be UB, as the MCU can change the
/// underlying memory at any time, as it is a core running its own code.
pub(super) fn write(
self,
shared_section: &mut Section,
offset: CheckedKmapOffset,
) -> Result<()> {
let ptr = offset.as_mut_ptr(shared_section)?;
// SAFETY: we know that this pointer is aligned and valid for writes for
// at least size_of::<Self>() bytes.
unsafe {
core::ptr::write_volatile(ptr as *mut Self, self);
}
Ok(())
}
}
};
}
impl_shared_section_rw!(Control);
impl_shared_section_rw!(Input);
impl_shared_section_rw!(Output);
enum GlobalInterfaceState {
Disabled,
Enabled {
control_offset: CheckedKmapOffset,
input_offset: CheckedKmapOffset,
output_offset: CheckedKmapOffset,
},
}
#[pin_data]
/// The global interface.
pub(super) struct GlobalInterface {
#[pin]
state: SpinLockIrq<GlobalInterfaceState>,
#[pin]
ping_work: Work<Self>,
iomem: Arc<Devres<IoMem>>,
#[pin]
shared_section: SpinLockIrq<Section>,
req_wait: Arc<ReqWait>,
}
impl GlobalInterface {
pub(super) fn new(
shared_section: Section,
iomem: Arc<Devres<IoMem>>,
req_wait: Arc<ReqWait>,
) -> Result<Arc<Self>> {
let init = pin_init!(Self {
state <- new_spinlock_irq!(GlobalInterfaceState::Disabled),
ping_work <- new_work!("TyrFwPingWork"),
iomem,
shared_section <- new_spinlock_irq!(shared_section),
req_wait,
});
Arc::pin_init(init, GFP_KERNEL)
}
pub(super) fn enable(
self: &Arc<Self>,
iomem: Arc<Devres<IoMem>>,
gpu_info: &GpuInfo,
core_clk: &Clk,
) -> Result {
// This takes a mutex internally in clk_prepare().
let poweroff_timer = TimeoutCycles::from_micro(core_clk, PWROFF_HYSTERESIS_US)?.into();
let interrupt_disable = interrupt_disable();
let mut shared_section = self.shared_section.lock_with(&interrupt_disable);
let control_offset = CheckedKmapOffset(0);
let op = || Control::read(&mut shared_section, control_offset);
let cond = |control: &Control| -> bool { control.version != 0 };
let _ = io::poll::read_poll_timeout(
op,
cond,
time::Delta::from_millis(0),
Some(time::Delta::from_millis(200)),
);
let control = Control::read(&mut shared_section, control_offset)?;
if control.version == 0 {
pr_err!("MCU firmware version is 0. Firmware may have failed to boot\n");
return Err(EINVAL);
}
let input_offset = super::Firmware::kmap_offset(&shared_section, control.input_va.into())?;
let output_offset =
super::Firmware::kmap_offset(&shared_section, control.output_va.into())?;
pr_info!(
"CSF FW using interface v.{}.{}.{}, Features {} Instrumentation features {}\n",
control.version_major(),
control.version_minor(),
control.version_patch(),
control.features,
control.instr_features
);
let mut input = Input::read(&mut shared_section, input_offset)?;
// Enable all shader cores.
input.core_en_mask = gpu_info.shader_present as u32;
// Setup timers.
input.poweroff_timer = poweroff_timer;
input.progress_timer = PROGRESS_TIMEOUT_CYCLES >> PROGRESS_TIMEOUT_SCALE_SHIFT;
input.idle_timer = IDLE_HYSTERESIS_US;
// Enable the interrupts we care about.
input.ack_irq_mask = GLB_CFG_ALLOC_EN
| GLB_PING
| GLB_CFG_POWEROFF_TIMER
| GLB_CFG_POWEROFF_TIMER
| GLB_IDLE_EN
| GLB_IDLE;
input.write(&mut shared_section, input_offset)?;
// Req is the first field of the input area.
let req = ReqKmapOffset(input_offset);
req.update_reqs(&mut shared_section, GLB_IDLE_EN, GLB_IDLE_EN)?;
// Ack is the first field of the output area.
let ack_offset = output_offset;
let reqs = GLB_CFG_ALLOC_EN | GLB_CFG_POWEROFF_TIMER | GLB_CFG_PROGRESS_TIMER;
req.toggle_reqs(&mut shared_section, ack_offset, reqs)?;
// Make lockdep happy, and also make sure that this lock is not held
// when the interrupt fires, which can be immediately after the doorbell
// is rung.
drop(shared_section);
Doorbell::new(CSF_GLB_DOORBELL_ID).write(&iomem, 1)?;
let mut state = self.state.lock();
*state = GlobalInterfaceState::Enabled {
control_offset,
input_offset,
output_offset,
};
Ok(())
}
pub(crate) fn arm_watchdog(self: &Arc<Self>) -> Result {
workqueue::system_unbound()
.enqueue(self.clone())
.map_err(|_| EINVAL)
}
pub(crate) fn ping_once(&self) -> Result {
// Have this here until we support delayed works. Instead of a
// heartbeat, we get a one-shot ping, but that is OK for now.
kernel::time::delay::fsleep(Delta::from_millis(100));
// let interrupt_disable = interrupt_disable();
// let mut state = this.state.lock_with(&interrupt_disable);
// Unfortunately, we cannot have both interrupts disabled and CondVar at
// the same time for now.
//
// TODO: I don't think we access GlobalInterfaceState from IRQ context,
// so we can possibly switch to a regular SpinLock or even a Mutex.
let state = self.state.lock();
if let &GlobalInterfaceState::Enabled {
input_offset,
output_offset,
..
} = &*state
{
pr_info!("Pinging the CSF global interface\n");
// Req is the first field of the input area.
let req = ReqKmapOffset(input_offset);
// Ack is the first field of the output area.
let ack = output_offset;
req.toggle_reqs(&mut self.shared_section.lock(), ack, GLB_PING)?;
Doorbell::new(CSF_GLB_DOORBELL_ID).write(&self.iomem, 1)?;
if !req.pending_reqs(&mut self.shared_section.lock(), ack, GLB_PING)? {
pr_info!("CSF has responded to a ping request\n");
} else {
let op = || req.pending_reqs(&mut self.shared_section.lock(), ack, GLB_PING);
let cond = |pending_reqs: &bool| !*pending_reqs;
io::poll::read_poll_timeout(
op,
cond,
time::Delta::from_millis(0),
Some(time::Delta::from_millis(100)),
)?;
if !req.pending_reqs(&mut self.shared_section.lock(), ack, GLB_PING)? {
pr_info!("CSF has responded to a ping request\n");
} else {
pr_err!("CSF has not responded to a ping request\n");
return Err(ETIMEDOUT);
}
}
}
Ok(())
}
}
impl_has_work! {
impl HasWork<Self> for GlobalInterface {
self.ping_work
}
}
impl WorkItem for GlobalInterface {
type Pointer = Arc<Self>;
fn run(this: Self::Pointer) {
/* TODO: we need support for delayed_work */
}
}
// SPDX-License-Identifier: GPL-2.0 or MIT
//! The IRQ handling for the Job IRQs.
//!
//! The Job IRQ controls our interactions with the MCU.
use kernel::c_str;
use kernel::devres::Devres;
use kernel::io::mem::IoMem;
use kernel::irq;
use kernel::irq::request::Handler as IrqHandler;
use kernel::irq::request::IrqReturn;
use kernel::irq::request::Registration as IrqRegistration;
use kernel::platform;
use kernel::prelude::*;
use kernel::sync::Arc;
use kernel::types::ARef;
use crate::driver::TyrDevice;
use crate::regs;
pub(crate) struct JobIrqHandler {
tdev: ARef<TyrDevice>,
iomem: Arc<Devres<IoMem>>,
}
impl IrqHandler for JobIrqHandler {
fn handle_irq(&self) -> IrqReturn {
let rawstat = regs::JOB_INT_RAWSTAT.read(&self.iomem).unwrap_or_default();
dev_info!(self.tdev.as_ref(), "Acknoledging job IRQ\n");
let _ = regs::JOB_INT_CLEAR.write(&self.iomem, rawstat);
let data = self.tdev.data();
let mut booted = data.fw.booted.lock();
if !*booted && rawstat & regs::JOB_INT_GLOBAL_IF != 0 {
*booted = true;
dev_info!(self.tdev.as_ref(), "GPU is ready to accept requests\n");
}
// Notify everyone waiting on a response from CSF.
data.fw.req_wait.notify_all();
IrqReturn::Handled
}
}
pub(crate) fn job_irq_init(
tdev: ARef<TyrDevice>,
pdev: platform::Device,
iomem: Arc<Devres<IoMem>>,
) -> Result<impl PinInit<IrqRegistration<JobIrqHandler>, Error>> {
let job_irq = pdev.irq_by_name(c_str!("job"))?;
let irq_handler = JobIrqHandler {
tdev,
iomem: iomem.clone(),
};
// Lets disable IRQs in favor of explicit polling for now due to issues with
// SpinLockIrq and CondVar.
//
// JOB_INT_MASK.write(&iomem, u32::MAX)?;
regs::JOB_INT_MASK.write(&iomem, 0)?;
Ok(IrqRegistration::register(
job_irq,
irq::request::flags::SHARED,
c_str!("tyr-job"),
irq_handler,
))
}
// SPDX-License-Identifier: GPL-2.0 or MIT
//! Code to parse the firmware binary.
use core::ops::Range;
use cursor::Cursor;
use kernel::alloc::KVec;
use kernel::bits::bit_u32;
use kernel::c_str;
use kernel::devres::Devres;
use kernel::fmt;
use kernel::io::mem::IoMem;
use kernel::prelude::*;
use kernel::str::CString;
use kernel::sync::Arc;
use kernel::sync::Mutex;
use crate::driver::TyrDevice;
use crate::fw::CheckedKmapOffset;
use crate::fw::Firmware;
use crate::fw::CSF_MCU_SHARED_REGION_START;
use crate::gem;
use crate::gem::KernelVaPlacement;
use crate::gpu::GpuId;
use crate::gpu::GpuInfo;
use crate::mmu::vm;
use crate::mmu::vm::Vm;
mod cursor;
const FW_BINARY_MAGIC: u32 = 0xc3f13a6e;
const FW_BINARY_MAJOR_MAX: u8 = 0;
mod flags {
use kernel::bits::bit_u32;
use kernel::bits::genmask_u32;
use kernel::prelude::*;
use crate::impl_flags;
impl_flags!(Flags, Flag, u32);
const CACHE_MODE_MASK: Flags = Flags(genmask_u32(4, 3));
impl Flags {
pub(crate) fn cache_mode(&self) -> Flags {
*self & CACHE_MODE_MASK
}
}
impl TryFrom<u32> for Flags {
type Error = Error;
fn try_from(value: u32) -> Result<Self, Self::Error> {
if value & valid_flags().0 != value {
Err(EINVAL)
} else {
Ok(Self(value))
}
}
}
pub(crate) fn valid_flags() -> Flags {
Flags::from(READ)
| Flags::from(WRITE)
| Flags::from(EXEC)
| CACHE_MODE_MASK
| Flags::from(PROT)
| Flags::from(SHARED)
| Flags::from(ZERO)
}
pub(crate) const READ: Flag = Flag(bit_u32(0));
pub(crate) const WRITE: Flag = Flag(bit_u32(1));
pub(crate) const EXEC: Flag = Flag(bit_u32(2));
pub(crate) const CACHE_MODE_NONE: Flag = Flag(0 << 3);
pub(crate) const CACHE_MODE_CACHED: Flag = Flag(1 << 3);
pub(crate) const CACHE_MODE_UNCACHED_COHERENT: Flag = Flag(2 << 3);
pub(crate) const CACHE_MODE_CACHED_COHERENT: Flag = Flag(3 << 3);
pub(crate) const PROT: Flag = Flag(bit_u32(5));
pub(crate) const SHARED: Flag = Flag(bit_u32(30));
pub(crate) const ZERO: Flag = Flag(bit_u32(31));
}
struct BuildInfoHeader(Range<u32>);
/// A parsed section of the firmware binary.
pub(super) struct Section {
/// Flags for this section.
flags: flags::Flags,
/// The name of the section in the binary, if any.
name: Option<CString>,
/// The raw parsed data for reset purposes.
data: KVec<u8>,
/// The BO that this section was loaded into.
pub(super) mem: gem::ObjectRef,
/// The VA range for this section.
///
/// The MCU expects the firmware to be loaded at a specific addresses.
va: Range<u32>,
/// The flags used to map this section.
vm_map_flags: vm::map_flags::Flags,
}
/// The firmware header.
struct BinaryHeader {
/// Magic value to check binary validity.
magic: u32,
/// Minor FW version.
minor: u8,
/// Major FW version.
major: u8,
/// Padding. Must be set to zero.
_padding1: u16,
/// FW Version hash
version_hash: u32,
/// Padding. Must be set to zero.
_padding2: u32,
/// FW binary size
size: u32,
}
impl BinaryHeader {
fn new(tdev: &TyrDevice, cursor: &mut Cursor<'_>) -> Result<Self> {
let magic = cursor.read_u32(tdev)?;
if magic != FW_BINARY_MAGIC {
dev_err!(tdev.as_ref(), "Invalid firmware magic");
return Err(EINVAL);
}
let minor = cursor.read_u8(tdev)?;
let major = cursor.read_u8(tdev)?;
let padding1 = cursor.read_u16(tdev)?;
let version_hash = cursor.read_u32(tdev)?;
let padding2 = cursor.read_u32(tdev)?;
let size = cursor.read_u32(tdev)?;
if padding1 != 0 || padding2 != 0 {
dev_err!(
tdev.as_ref(),
"Invalid firmware file: header padding is not zero"
);
return Err(EINVAL);
}
Ok(Self {
magic,
minor,
major,
_padding1: padding1,
version_hash,
_padding2: padding2,
size,
})
}
}
#[derive(Clone, Copy, Debug)]
enum BinaryEntryType {
/// Host <-> FW interface.
Iface = 0,
/// FW config.
Config = 1,
/// Unit tests.
FutfTest = 2,
/// Trace buffer interface.
TraceBuffer = 3,
/// Timeline metadata interface,
TimelineMetadata = 4,
/// Metadata about how the FW binary was built
BuildInfoMetadata = 6,
}
impl TryFrom<u8> for BinaryEntryType {
type Error = Error;
fn try_from(value: u8) -> Result<Self, Self::Error> {
match value {
0 => Ok(BinaryEntryType::Iface),
1 => Ok(BinaryEntryType::Config),
2 => Ok(BinaryEntryType::FutfTest),
3 => Ok(BinaryEntryType::TraceBuffer),
4 => Ok(BinaryEntryType::TimelineMetadata),
6 => Ok(BinaryEntryType::BuildInfoMetadata),
_ => Err(EINVAL),
}
}
}
#[derive(Debug)]
struct BinarySectionEntryHeader {
/// Section flags
flags: flags::Flags,
/// MCU virtual range to map this binary section to.
va: Range<u32>,
/// References the data in the FW binary.
data: Range<u32>,
}
impl BinarySectionEntryHeader {
fn new(tdev: &TyrDevice, cursor: &mut Cursor<'_>) -> Result<Self> {
let flags = cursor.read_u32(tdev)?;
let flags = flags::Flags::try_from(flags)?;
let va_start = cursor.read_u32(tdev)?;
let va_end = cursor.read_u32(tdev)?;
let va = va_start..va_end;
if va.is_empty() {
dev_err!(
tdev.as_ref(),
"Invalid firmware file: empty VA range at pos {}\n",
cursor.pos(),
);
return Err(EINVAL);
}
let data_start = cursor.read_u32(tdev)?;
let data_end = cursor.read_u32(tdev)?;
let data = data_start..data_end;
Ok(Self { flags, va, data })
}
}
struct BinaryEntryHeader(u32);
impl BinaryEntryHeader {
/// The entry type.
fn entry_ty(&self) -> Result<BinaryEntryType> {
let v = (self.0 & 0xff) as u8;
BinaryEntryType::try_from(v)
}
/// Whether this entry is optional.
fn optional(&self) -> bool {
self.0 & bit_u32(31) != 0
}
/// The size of the entry.
fn size(&self) -> u32 {
self.0 >> 8 & 0xff
}
}
struct BinaryEntrySection {
hdr: BinaryEntryHeader,
inner: Option<Section>,
}
impl Firmware {
/// Parses the firmware sections from the binary.
pub(super) fn read_sections(
tdev: &TyrDevice,
iomem: Arc<Devres<IoMem>>,
gpu_info: &GpuInfo,
vm: Arc<Mutex<Vm>>,
) -> Result<(KVec<Section>, Section)> {
let gpu_id = GpuId::from(gpu_info.gpu_id);
let fw_path = CString::try_from_fmt(fmt!(
"arm/mali/arch{}.{}/mali_csffw.bin",
gpu_id.arch_major,
gpu_id.arch_minor
))?;
let fw = kernel::firmware::Firmware::request(&fw_path, tdev.as_ref())?;
let mut cursor = Cursor::new(fw.data());
dev_err!(
tdev.as_ref(),
"Requested {} bytes of firmware successfully\n",
fw.data().len()
);
let fw_bin_hdr = match BinaryHeader::new(tdev, &mut cursor) {
Ok(fw_bin_hdr) => fw_bin_hdr,
Err(e) => {
dev_err!(tdev.as_ref(), "Invalid firmware file: {}", e.to_errno());
return Err(e);
}
};
if fw_bin_hdr.magic != FW_BINARY_MAGIC {
dev_err!(tdev.as_ref(), "Invalid firmware magic");
return Err(EINVAL);
}
if fw_bin_hdr.major > FW_BINARY_MAJOR_MAX {
dev_err!(
tdev.as_ref(),
"Unsupported firmware binary version: {}.{}",
fw_bin_hdr.major,
fw_bin_hdr.minor
);
return Err(EINVAL);
}
if fw_bin_hdr.size > cursor.len() as u32 {
dev_err!(tdev.as_ref(), "Firmware image is truncated");
return Err(EINVAL);
}
let mut sections = Vec::new();
let mut shared_section = None;
while (cursor.pos() as u32) < fw_bin_hdr.size {
match Self::read_entry(&mut cursor, tdev, iomem.clone(), &fw, vm.clone())? {
section => {
cursor.advance((section.hdr.size() - 4) as usize)?;
match section.inner {
Some(section) => {
// TODO: refactor this.
if section.flags.contains(flags::SHARED) {
shared_section = Some(section);
} else {
sections.push(section, GFP_KERNEL)?
}
}
None => continue,
}
}
}
}
let shared_section = shared_section.ok_or_else(|| {
dev_err!(tdev.as_ref(), "No shared section found in firmware");
EINVAL
})?;
Ok((sections, shared_section))
}
fn read_entry(
cursor: &mut Cursor<'_>,
tdev: &TyrDevice,
iomem: Arc<Devres<IoMem>>,
fw: &kernel::firmware::Firmware,
vm: Arc<Mutex<Vm>>,
) -> Result<BinaryEntrySection> {
let section = BinaryEntrySection {
hdr: BinaryEntryHeader(cursor.read_u32(tdev)?),
inner: None,
};
let section_size = section.hdr.size() as usize - core::mem::size_of::<BinaryEntryHeader>();
let entry_ty = match section.hdr.entry_ty() {
Ok(entry_ty) => entry_ty,
Err(e) => {
if section.hdr.optional() {
dev_info!(
tdev.as_ref(),
"Skipping unknown optional firmware entry type: {}",
e.to_errno()
);
return Ok(section);
} else {
dev_err!(
tdev.as_ref(),
"Invalid firmware entry type: {}",
e.to_errno()
);
return Err(EINVAL);
}
}
};
if cursor.pos() % core::mem::size_of::<u32>() != 0 {
dev_err!(
tdev.as_ref(),
"Invalid firmware file: entry not aligned to 4 bytes at pos {}\n",
cursor.pos()
);
return Err(EINVAL);
}
let mut entry_cursor = cursor.view(cursor.pos()..cursor.pos() + section_size)?;
match entry_ty {
BinaryEntryType::Iface => Ok(BinaryEntrySection {
hdr: section.hdr,
inner: Self::read_section(tdev, iomem, &mut entry_cursor, fw, vm.clone())?,
}),
BinaryEntryType::BuildInfoMetadata => {
// TODO: Read build metadata
Ok(section)
}
BinaryEntryType::Config
| BinaryEntryType::FutfTest
| BinaryEntryType::TraceBuffer
| BinaryEntryType::TimelineMetadata => Ok(section),
_ => {
if !section.hdr.optional() {
dev_info!(
tdev.as_ref(),
"Unsupported non-optional entry type: {}",
entry_ty as u32
);
Err(EINVAL)
} else {
dev_info!(
tdev.as_ref(),
"Skipping unsupported firmware entry type: {}",
entry_ty as u32
);
Ok(section)
}
}
}
}
fn read_section(
tdev: &TyrDevice,
iomem: Arc<Devres<IoMem>>,
cursor: &mut Cursor<'_>,
fw: &kernel::firmware::Firmware,
vm: Arc<Mutex<Vm>>,
) -> Result<Option<Section>> {
let hdr = BinarySectionEntryHeader::new(tdev, cursor)?;
if hdr.flags.contains(flags::PROT) {
dev_warn!(
tdev.as_ref(),
"Firmware protected mode entry not supported, ignoring"
);
return Ok(None);
}
if hdr.va.start == CSF_MCU_SHARED_REGION_START && !hdr.flags.contains(flags::SHARED) {
dev_err!(
tdev.as_ref(),
"Interface at 0x{:x} must be shared",
CSF_MCU_SHARED_REGION_START
);
return Err(EINVAL);
}
let name_len = cursor.len() - cursor.pos();
let name_bytes = cursor.read(tdev, name_len)?;
let mut name = KVec::with_capacity(name_bytes.len() + 1, GFP_KERNEL)?;
name.extend_from_slice(name_bytes, GFP_KERNEL)?;
name.push(0, GFP_KERNEL)?;
let name = CStr::from_bytes_with_nul(&name)
.ok()
.and_then(|name| CString::try_from(name).ok());
let fw = fw.data();
let section_start = hdr.data.start as usize;
let section_end = hdr.data.end as usize;
let mut data = KVec::new();
data.extend_from_slice(&fw[section_start..section_end], GFP_KERNEL)?;
let bo_len = (hdr.va.end - hdr.va.start) as usize;
let cache_mode = hdr.flags.cache_mode();
let mut vm_map_flags = vm::map_flags::Flags::empty();
if !hdr.flags.contains(flags::WRITE) {
vm_map_flags |= vm::map_flags::READONLY;
}
if !hdr.flags.contains(flags::EXEC) {
vm_map_flags |= vm::map_flags::NOEXEC;
}
if cache_mode != flags::CACHE_MODE_CACHED.into() {
vm_map_flags |= vm::map_flags::UNCACHED;
}
let mut mem = gem::new_kernel_object(
tdev,
iomem,
bo_len,
vm,
KernelVaPlacement::At(hdr.va.start as u64..hdr.va.end as u64),
vm_map_flags,
)?;
let vmap = mem.vmap()?;
let vmap = vmap.as_mut_slice();
vmap[0..data.len()].copy_from_slice(&data);
if hdr.flags.contains(flags::ZERO) {
vmap[data.len()..].fill(0);
}
dev_info!(
tdev.as_ref(),
"Copied firmware data to BO {:p} of size {} with flags {}\n",
&mem.gem,
bo_len,
vm_map_flags
);
Ok(Some(Section {
flags: hdr.flags,
name,
data,
mem,
va: hdr.va,
vm_map_flags,
}))
}
fn read_build_info(cursor: &mut Cursor<'_>, tdev: &TyrDevice) -> Result<()> {
let meta_start = cursor.read_u32(tdev)? as usize;
let meta_end = cursor.read_u32(tdev)? as usize;
let expected_hdr = b"git_sha: ";
let hdr = cursor.read(tdev, expected_hdr.len())?;
if hdr != expected_hdr {
dev_warn!(tdev.as_ref(), "Firmware's git sha is missing\n");
return Ok(());
}
let sz = meta_end - meta_start - expected_hdr.len();
let sha = cursor.read(tdev, sz)?;
if sha[sha.len()] != 0 {
dev_warn!(tdev.as_ref(), "Firmware's git sha is not NULL terminated\n");
return Ok(()); // Don't treat as fatal
}
let sha = CStr::from_bytes_with_nul(sha).unwrap_or(c_str!(""));
dev_info!(
tdev.as_ref(),
"Firmware git sha: {}\n",
sha.to_str().unwrap()
);
Ok(())
}
/// Computes the offset into the shared section for a given VA in the shared
/// area.
///
/// The result is an offset that can be safely dereferenced by the CPU.
pub(super) fn kmap_offset(shared_section: &Section, mcu_va: u64) -> Result<CheckedKmapOffset> {
let shared_mem_start = u64::from(shared_section.va.start);
let shared_mem_end = u64::from(shared_section.va.end);
if mcu_va < shared_mem_start || mcu_va >= shared_mem_end {
Err(EINVAL)
} else {
let offset = (mcu_va - shared_mem_start) as usize;
Ok(CheckedKmapOffset(offset))
}
}
}
// SPDX-License-Identifier: GPL-2.0 or MIT
//! A bare-bones std::io::Cursor<[u8]> clone to keep track of the current
//! position in the firmware binary.
use core::ops::Range;
use kernel::prelude::*;
use crate::driver::TyrDevice;
pub(crate) struct Cursor<'a> {
data: &'a [u8],
pos: usize,
}
impl<'a> Cursor<'a> {
pub(crate) fn new(data: &'a [u8]) -> Self {
Self { data, pos: 0 }
}
pub(super) fn len(&self) -> usize {
self.data.len()
}
pub(super) fn pos(&self) -> usize {
self.pos
}
pub(super) fn advance(&mut self, nbytes: usize) -> Result {
if self.pos + nbytes > self.data.len() {
return Err(EINVAL);
}
self.pos += nbytes;
Ok(())
}
/// Returns a view into the cursor's data.
///
/// This spawns a new cursor, leaving the current cursor unchanged.
pub(super) fn view(&self, range: Range<usize>) -> Result<Cursor<'_>> {
if range.start < self.pos || range.end > self.data.len() {
pr_err!(
"Invalid cursor range {:?} for data of length {}",
range,
self.data.len()
);
Err(EINVAL)
} else {
Ok(Self {
data: &self.data[range],
pos: 0,
})
}
}
pub(super) fn read(&mut self, tdev: &TyrDevice, nbytes: usize) -> Result<&[u8]> {
let start = self.pos;
let end = start + nbytes;
if end > self.data.len() {
dev_err!(
tdev.as_ref(),
"Invalid firmware file: read of size {} at position {} is out of bounds",
nbytes,
start,
);
return Err(EINVAL);
}
self.pos += nbytes;
Ok(&self.data[start..end])
}
pub(super) fn read_u8(&mut self, tdev: &TyrDevice) -> Result<u8> {
let bytes = self.read(tdev, 1)?;
Ok(bytes[0])
}
pub(super) fn read_u16(&mut self, tdev: &TyrDevice) -> Result<u16> {
let bytes = self.read(tdev, 2)?;
Ok(u16::from_le_bytes(bytes.try_into().unwrap()))
}
pub(super) fn read_u32(&mut self, tdev: &TyrDevice) -> Result<u32> {
let bytes = self.read(tdev, 4)?;
Ok(u32::from_le_bytes(bytes.try_into().unwrap()))
}
}
// SPDX-License-Identifier: GPL-2.0 or MIT
//! Code to wait on CSF responses.
use kernel::io;
use kernel::new_condvar;
use kernel::new_spinlock_irq;
use kernel::prelude::*;
use kernel::sync::Arc;
use kernel::sync::CondVar;
use kernel::sync::SpinLockIrq;
use kernel::time;
use kernel::time::msecs_to_jiffies;
#[pin_data]
/// Represents a wait on a request made to CSF.
pub(crate) struct ReqWait {
#[pin]
/// The actual wait/signal mechanism.
cond: CondVar,
#[pin]
/// Serializes the access to the lock.
///
/// This deviates a bit from the general Rust pattern of having the lock
/// wrap the data. That is because the "data" is actually a shared section
/// of GPU memory whose layout has to match the firmware's expectations.
lock: SpinLockIrq<()>,
}
impl ReqWait {
/// A convenience function to initialize the `ReqWait` struct.
///
/// There is only one instance of this struct in the entire driver.
///
/// Code that needs to wait on a given CSF request take a reference to
/// `ReqWait`. A `notify_all()` is called every time the job IRQ is
/// triggered.
pub(crate) fn new() -> Result<Arc<Self>> {
Arc::pin_init(
pin_init!(Self {
cond <- new_condvar!(),
lock <- new_spinlock_irq!(()),
}),
GFP_KERNEL,
)
}
/// Waits for the completion of a previously submitted CSF request.
///
/// It's usually useful to busy-wait for a short period of time before going
/// to sleep, as some requests can be answered extremely quickly.
pub(crate) fn wait_interruptible_timeout<Op, Cond, T>(
&self,
timeout_ms: u32,
mut op: Op,
mut cond: Cond,
busy_wait_first: bool,
) -> Result
where
Op: FnMut() -> Result<T>,
Cond: FnMut(&T) -> bool,
{
if busy_wait_first {
let res = io::poll::read_poll_timeout(
&mut op,
&mut cond,
time::Delta::from_millis(0),
Some(time::Delta::from_micros(10)),
);
if res.is_ok() {
return Ok(());
}
}
let mut guard = self.lock.lock();
let mut remaining_time = msecs_to_jiffies(timeout_ms);
loop {
match self
.cond
.wait_interruptible_timeout(&mut guard, remaining_time)
{
kernel::sync::CondVarTimeoutResult::Woken { jiffies } => {
let op = op()?;
if cond(&op) {
return Ok(());
} else {
remaining_time -= jiffies
}
}
kernel::sync::CondVarTimeoutResult::Timeout => return Err(ETIMEDOUT),
kernel::sync::CondVarTimeoutResult::Signal { .. } => return Err(ERESTARTSYS),
}
}
}
pub(crate) fn notify_one(&self) {
let _guard = self.lock.lock();
self.cond.notify_one();
}
pub(crate) fn notify_all(&self) {
let _guard = self.lock.lock();
self.cond.notify_all();
}
}
// SPDX-License-Identifier: GPL-2.0 or MIT
use core::ops::Range;
use kernel::devres::Devres;
use kernel::drm::gem::shmem;
use kernel::drm::gem::BaseObject;
use kernel::drm::gem::{self};
use kernel::drm::mm;
use kernel::io::mem::IoMem;
use kernel::prelude::*;
use kernel::sync::Arc;
use kernel::sync::Mutex;
use crate::driver::TyrDevice;
use crate::driver::TyrDriver;
use crate::file::DrmFile;
use crate::mmu::vm;
use crate::mmu::vm::Vm;
/// GEM Object inner driver data
#[pin_data]
pub(crate) struct DriverObject {
/// Whether this is a kernel or user BO.
ty: ObjectType,
/// The flags received at BO creation time.
flags: u32,
}
enum ObjectType {
Kernel {
// Kernel objects have their VA managed by the MM allocator. This node
// represents the allocation.
node: mm::Node<(), ()>,
},
User,
}
/// Type alias for the GEM object tyoe for this driver.
pub(crate) type Object = gem::shmem::Object<DriverObject>;
/// Type alias for the SGTable type for this driver.
pub(crate) type SGTable = shmem::SGTable<DriverObject>;
impl gem::BaseDriverObject<Object> for DriverObject {
fn new(dev: &TyrDevice, _size: usize) -> impl PinInit<Self, Error> {
dev_dbg!(dev.as_ref(), "DriverObject::new\n");
DriverObject {
ty: ObjectType::User,
flags: 0,
}
}
}
impl gem::shmem::DriverObject for DriverObject {
type Driver = TyrDriver;
}
/// A shared reference to a GEM object for this driver.
pub(crate) struct ObjectRef {
/// The underlying GEM object reference
pub(crate) gem: gem::ObjectRef<shmem::Object<DriverObject>>,
/// The kernel-side VMap of this object, if any.
vmap: Option<shmem::VMap<DriverObject>>,
}
impl ObjectRef {
/// Create a new wrapper for a raw GEM object reference.
pub(crate) fn new(gem: gem::ObjectRef<shmem::Object<DriverObject>>) -> ObjectRef {
ObjectRef { gem, vmap: None }
}
/// Return the `VMap` for this object, creating it if necessary.
pub(crate) fn vmap(&mut self) -> Result<&mut shmem::VMap<DriverObject>> {
if self.vmap.is_none() {
self.vmap = Some(self.gem.vmap()?);
}
Ok(self.vmap.as_mut().unwrap())
}
/// Returns the size of an object in bytes
pub(crate) fn size(&self) -> usize {
self.gem.size()
}
}
/// Create a new DRM GEM object.
pub(crate) fn new_object(dev: &TyrDevice, size: usize, flags: u32) -> Result<ObjectRef> {
let aligned_size = size.next_multiple_of(1 << 12);
if size == 0 || size > aligned_size {
return Err(EINVAL);
}
let mut gem = Object::new(dev, aligned_size)?;
gem.set_wc(true);
gem.flags = flags;
Ok(ObjectRef::new(gem.into_ref()))
}
/// Look up a GEM object handle for a `File` and return an `ObjectRef` for it.
pub(crate) fn lookup_handle(file: &DrmFile, handle: u32) -> Result<ObjectRef> {
Ok(ObjectRef::new(shmem::Object::lookup_handle(file, handle)?))
}
/// Create a new kernel-owned GEM object.
pub(crate) fn new_kernel_object(
tdev: &TyrDevice,
iomem: Arc<Devres<IoMem>>,
size: usize,
vm: Arc<Mutex<Vm>>,
va: KernelVaPlacement,
flags: vm::map_flags::Flags,
) -> Result<ObjectRef> {
let aligned_size = size.next_multiple_of(1 << 12);
let mut gem: gem::UniqueObjectRef<shmem::Object<DriverObject>> =
shmem::Object::<DriverObject>::new(tdev, aligned_size)?;
gem.set_wc(true);
let node = vm.lock().alloc_kernel_range(va)?;
let range = node.start()..node.start() + node.size();
gem.ty = ObjectType::Kernel { node };
let gem = ObjectRef::new(gem.into_ref());
vm.lock().bind_gem(iomem, &gem.gem, 0, range, flags)?;
Ok(gem)
}
/// Creates a dummy GEM object to serve as the root of a GPUVM.
pub(crate) fn new_dummy_object(tdev: &TyrDevice) -> Result<ObjectRef> {
let mut gem = Object::new(tdev, 4096)?;
gem.set_wc(true);
Ok(ObjectRef::new(gem.into_ref()))
}
/// Controls the VA range assigned to a kernel-owned GEM object.
pub(crate) enum KernelVaPlacement {
/// Automatically place this object in a free spot in the kernel VA range.
Auto,
/// Place this object at a given address.
At(Range<u64>),
}
// SPDX-License-Identifier: GPL-2.0 or MIT
use crate::regs::*;
use kernel::bits;
use kernel::bits::genmask_u32;
use kernel::devres::Devres;
use kernel::io;
use kernel::io::mem::IoMem;
use kernel::platform;
use kernel::prelude::*;
use kernel::time;
use kernel::transmute::AsBytes;
pub(crate) mod irq;
pub(crate) mod wait;
#[repr(C)]
// This can be queried by userspace to get information about the GPU.
pub(crate) struct GpuInfo {
pub(crate) gpu_id: u32,
pub(crate) csf_id: u32,
pub(crate) gpu_rev: u32,
pub(crate) core_features: u32,
pub(crate) l2_features: u32,
pub(crate) tiler_features: u32,
pub(crate) mem_features: u32,
pub(crate) mmu_features: u32,
pub(crate) thread_features: u32,
pub(crate) max_threads: u32,
pub(crate) thread_max_workgroup_size: u32,
pub(crate) thread_max_barrier_size: u32,
pub(crate) coherency_features: u32,
pub(crate) texture_features: [u32; 4],
pub(crate) as_present: u32,
pub(crate) shader_present: u64,
pub(crate) tiler_present: u64,
pub(crate) l2_present: u64,
}
impl GpuInfo {
pub(crate) fn new(iomem: &Devres<IoMem>) -> Result<Self> {
let gpu_id = GPU_ID.read(iomem)?;
let csf_id = GPU_CSF_ID.read(iomem)?;
let gpu_rev = GPU_REVID.read(iomem)?;
let core_features = GPU_CORE_FEATURES.read(iomem)?;
let l2_features = GPU_L2_FEATURES.read(iomem)?;
let tiler_features = GPU_TILER_FEATURES.read(iomem)?;
let mem_features = GPU_MEM_FEATURES.read(iomem)?;
let mmu_features = GPU_MMU_FEATURES.read(iomem)?;
let thread_features = GPU_THREAD_FEATURES.read(iomem)?;
let max_threads = GPU_THREAD_MAX_THREADS.read(iomem)?;
let thread_max_workgroup_size = GPU_THREAD_MAX_WORKGROUP_SIZE.read(iomem)?;
let thread_max_barrier_size = GPU_THREAD_MAX_BARRIER_SIZE.read(iomem)?;
let coherency_features = GPU_COHERENCY_FEATURES.read(iomem)?;
let texture_features = GPU_TEXTURE_FEATURES0.read(iomem)?;
let as_present = GPU_AS_PRESENT.read(iomem)?;
let shader_present = GPU_SHADER_PRESENT_LO.read(iomem)? as u64;
let shader_present = shader_present | (GPU_SHADER_PRESENT_HI.read(iomem)? as u64) << 32;
let tiler_present = GPU_TILER_PRESENT_LO.read(iomem)? as u64;
let tiler_present = tiler_present | (GPU_TILER_PRESENT_HI.read(iomem)? as u64) << 32;
let l2_present = GPU_L2_PRESENT_LO.read(iomem)? as u64;
let l2_present = l2_present | (GPU_L2_PRESENT_HI.read(iomem)? as u64) << 32;
Ok(Self {
gpu_id,
csf_id,
gpu_rev,
core_features,
l2_features,
tiler_features,
mem_features,
mmu_features,
thread_features,
max_threads,
thread_max_workgroup_size,
thread_max_barrier_size,
coherency_features,
texture_features: [texture_features, 0, 0, 0],
as_present,
shader_present,
tiler_present,
l2_present,
})
}
pub(crate) fn log(&self, pdev: &platform::Device) {
let major = (self.gpu_id >> 16) & 0xff;
let minor = (self.gpu_id >> 8) & 0xff;
let status = self.gpu_id & 0xff;
let model_name = if let Some(model) = GPU_MODELS
.iter()
.find(|&f| f.major == major && f.minor == minor)
{
model.name
} else {
"unknown"
};
dev_info!(
pdev.as_ref(),
"mali-{} id 0x{:x} major 0x{:x} minor 0x{:x} status 0x{:x}",
model_name,
self.gpu_id >> 16,
major,
minor,
status
);
dev_info!(
pdev.as_ref(),
"Features: L2:{:#x} Tiler:{:#x} Mem:{:#x} MMU:{:#x} AS:{:#x}",
self.l2_features,
self.tiler_features,
self.mem_features,
self.mmu_features,
self.as_present
);
dev_info!(
pdev.as_ref(),
"shader_present=0x{:016x} l2_present=0x{:016x} tiler_present=0x{:016x}",
self.shader_present,
self.l2_present,
self.tiler_present
);
}
pub(crate) fn va_bits(&self) -> u32 {
self.mmu_features & bits::genmask_u32(7, 0)
}
pub(crate) fn pa_bits(&self) -> u32 {
(self.mmu_features >> 8) & bits::genmask_u32(7, 0)
}
}
// SAFETY:
//
// This type is the same type exposed by Panthor's uAPI. As it's declared as
// #repr(C), we can be sure that the layout is the same. Therefore, it is safe
// to expose this to userspace.
unsafe impl AsBytes for GpuInfo {}
struct GpuModels {
name: &'static str,
major: u32,
minor: u32,
}
const GPU_MODELS: [GpuModels; 1] = [GpuModels {
name: "g610",
major: 10,
minor: 7,
}];
#[allow(dead_code)]
pub(crate) struct GpuId {
pub(crate) arch_major: u32,
pub(crate) arch_minor: u32,
pub(crate) arch_rev: u32,
pub(crate) prod_major: u32,
pub(crate) ver_major: u32,
pub(crate) ver_minor: u32,
pub(crate) ver_status: u32,
}
impl From<u32> for GpuId {
fn from(value: u32) -> Self {
GpuId {
arch_major: (value & genmask_u32(31, 28)) >> 28,
arch_minor: (value & genmask_u32(27, 24)) >> 24,
arch_rev: (value & genmask_u32(23, 20)) >> 20,
prod_major: (value & genmask_u32(19, 16)) >> 16,
ver_major: (value & genmask_u32(15, 12)) >> 12,
ver_minor: (value & genmask_u32(11, 4)) >> 4,
ver_status: value & genmask_u32(3, 0),
}
}
}
/// Powers on the l2 block.
pub(crate) fn l2_power_on(iomem: &Devres<IoMem>) -> Result<()> {
let op = || L2_PWRTRANS_LO.read(iomem);
let cond = |pwr_trans: &u32| *pwr_trans == 0;
let _ = io::poll::read_poll_timeout(
op,
cond,
time::Delta::from_millis(100),
Some(time::Delta::from_millis(200)),
)?;
L2_PWRON_LO.write(iomem, 1)?;
let op = || L2_READY_LO.read(iomem);
let cond = |l2_ready: &u32| *l2_ready == 1;
let _ = io::poll::read_poll_timeout(
op,
cond,
time::Delta::from_millis(100),
Some(time::Delta::from_millis(200)),
)?;
Ok(())
}
// SPDX-License-Identifier: GPL-2.0 or MIT
//! GPU IRQ handler.
use kernel::bits::bit_u32;
use kernel::c_str;
use kernel::devres::Devres;
use kernel::io::mem::IoMem;
use kernel::irq;
use kernel::irq::request::Handler as IrqHandler;
use kernel::irq::request::IrqReturn;
use kernel::irq::request::Registration as IrqRegistration;
use kernel::platform;
use kernel::prelude::*;
use kernel::sync::Arc;
use kernel::types::ARef;
use crate::driver::TyrDevice;
use crate::gpu::wait::PowerOnWait;
use crate::regs;
const RESET_COMPLETED: u32 = bit_u32(8);
const POWER_CHANGED_SINGLE: u32 = bit_u32(9);
const POWER_CHANGED_ALL: u32 = bit_u32(10);
pub(crate) struct GpuIrqHandler {
_tdev: ARef<TyrDevice>,
iomem: Arc<Devres<IoMem>>,
poweron_wait: Arc<PowerOnWait>,
}
impl IrqHandler for GpuIrqHandler {
fn handle_irq(&self) -> IrqReturn {
let int_stat = regs::GPU_INT_RAWSTAT.read(&self.iomem).unwrap_or_default();
pr_info!("Acknowledging GPU_INT_RAWSTAT: {:#x}\n", int_stat);
let _ = regs::GPU_INT_CLEAR.write(&self.iomem, int_stat);
if int_stat == RESET_COMPLETED | POWER_CHANGED_SINGLE | POWER_CHANGED_ALL {
*self.poweron_wait.powered_on.lock() = true;
self.poweron_wait.wait.notify_one();
}
IrqReturn::Handled
}
}
pub(crate) fn gpu_irq_init(
tdev: ARef<TyrDevice>,
pdevice: platform::Device,
iomem: Arc<Devres<IoMem>>,
poweron_wait: Arc<PowerOnWait>,
) -> Result<impl PinInit<IrqRegistration<GpuIrqHandler>, Error>> {
let gpu_irq = pdevice.irq_by_name(c_str!("gpu"))?;
let irq_handler = GpuIrqHandler {
_tdev: tdev,
iomem: iomem.clone(),
poweron_wait,
};
// Lets disable IRQs in favor of explicit polling for now due to issues with
// SpinLockIrq and CondVar.
//
// GPU_INT_MASK.write(&iomem, core::u32::MAX)?;
regs::GPU_INT_MASK.write(&iomem, 0)?;
Ok(IrqRegistration::register(
gpu_irq,
irq::request::flags::SHARED,
c_str!("tyr-gpu"),
irq_handler,
))
}
// SPDX-License-Identifier: GPL-2.0 or MIT
//! A wait on GPU events. Currently only used for power on.
use kernel::new_condvar;
use kernel::new_spinlock;
use kernel::prelude::*;
use kernel::sync::Arc;
use kernel::sync::CondVar;
use kernel::sync::SpinLock;
#[pin_data]
pub(crate) struct PowerOnWait {
#[pin]
pub(crate) wait: CondVar,
#[pin]
pub(crate) powered_on: SpinLock<bool>,
}
impl PowerOnWait {
pub(crate) fn new() -> Result<Arc<Self>> {
Arc::pin_init(
pin_init!(PowerOnWait {
wait <- new_condvar!(),
powered_on <- new_spinlock!(false),
}),
GFP_KERNEL,
)
}
}
// SPDX-License-Identifier: GPL-2.0 or MIT
use core::ops::Range;
use as_lock::AsLockToken;
use faults::decode_faults;
use kernel::devres::Devres;
use kernel::io;
use kernel::io::mem::IoMem;
use kernel::io_pgtable;
use kernel::new_mutex;
use kernel::platform;
use kernel::prelude::*;
use kernel::sync::Arc;
use kernel::sync::Mutex;
use kernel::time::Delta;
use kernel::types::ForeignOwnable;
use vm::Vm;
use vm::VmLayout;
use crate::driver::TyrDevice;
use crate::gpu::GpuInfo;
use crate::regs::*;
mod as_lock;
mod faults;
pub(crate) mod irq;
pub(crate) mod vm;
pub(crate) struct Mmu {
/// List containing all VMs.
vms: KVec<Arc<Mutex<Vm>>>,
/// Tracks which of the 32 AS slots are free.
free_slots: usize,
// slot_allocator: Arc<Mutex<SlotAllocator>>,
}
impl Mmu {
pub(crate) fn new() -> Result<Self> {
Ok(Self {
vms: KVec::new(),
// slot_allocator: Arc::pin_init(
// new_mutex!(SlotAllocator {
// free_mask: u32::MAX,
// }),
// GFP_KERNEL,
// )?,
free_slots: usize::MAX,
})
}
pub(crate) fn create_vm(
&mut self,
tdev: &TyrDevice,
pdev: platform::Device,
gpu_info: &GpuInfo,
for_mcu: bool,
layout: VmLayout,
auto_kernel_va: Range<u64>,
/* coherent: bool, */
) -> Result<Arc<Mutex<Vm>>> {
let vm = Vm::create(tdev, pdev, for_mcu, gpu_info, layout, auto_kernel_va)?;
let vm = Arc::pin_init(new_mutex!(vm), GFP_KERNEL)?;
self.vms.push(vm.clone(), GFP_KERNEL)?;
Ok(vm)
}
fn flush_range(iomem: &Devres<IoMem>, as_nr: usize, range: Range<u64>) -> Result {
Self::do_as_command(iomem, as_nr, AS_COMMAND_FLUSH_PT, range)
}
fn allocate_as(&mut self) -> Result<usize> {
let slot = self.free_slots.trailing_zeros();
if slot == 32 {
return Err(EBUSY);
}
self.free_slots |= 1 << slot;
Ok(slot as usize)
}
fn wait_ready(iomem: &Devres<IoMem>, as_nr: usize) -> Result {
let op = || as_status(as_nr)?.read(iomem);
let cond = |status: &u32| -> bool { *status & AS_STATUS_ACTIVE == 0 };
let _ = io::poll::read_poll_timeout(
op,
cond,
Delta::from_millis(0),
Some(Delta::from_micros(10000)),
)?;
Ok(())
}
/// TODO: The code to manage AS slots is still TODO.
fn free_as(&mut self, as_nr: usize) {
self.free_slots &= !(1 << as_nr);
}
fn do_as_command(
iomem: &Devres<IoMem>,
as_nr: usize,
command: u32,
region: Range<u64>,
) -> Result {
if command == AS_COMMAND_UNLOCK {
as_command(as_nr)?.write(iomem, command)?;
} else {
let _lock = AsLockToken::lock_region(iomem, as_nr, region)?;
Self::wait_ready(iomem, as_nr)?;
as_command(as_nr)?.write(iomem, command)?;
Self::wait_ready(iomem, as_nr)?;
}
Ok(())
}
pub(crate) fn bind_vm(
&mut self,
vm: Arc<Mutex<Vm>>,
gpu_info: &GpuInfo,
iomem: &Devres<IoMem>,
) -> Result {
let mut vm = vm.lock();
let va_bits = gpu_info.va_bits();
let transtab = vm.gpuvm.exec_lock(None)?.page_table.cfg().ttbr;
let transcfg = AS_TRANSCFG_PTW_MEMATTR_WB
| AS_TRANSCFG_PTW_RA
| AS_TRANSCFG_ADRMODE_AARCH64_4K
| as_transcfg_ina_bits((55 - va_bits).into());
let memattr = vm.memattr;
let as_nr = if vm.for_mcu { 0 } else { self.allocate_as()? };
Self::enable_as(iomem, as_nr as usize, transtab, transcfg.into(), memattr)?;
vm.address_space = Some(as_nr as usize);
Ok(())
}
fn enable_as(
iomem: &Devres<IoMem>,
as_nr: usize,
transtab: u64,
transcfg: u64,
memattr: u64,
) -> Result {
let active = as_status(as_nr)?.read(iomem)? & AS_STATUS_ACTIVE != 0;
if active {
return Err(EBUSY);
}
Self::do_as_command(iomem, as_nr, AS_COMMAND_FLUSH_MEM, 0..u64::MAX)?;
let transtab_lo = (transtab & 0xffffffff) as u32;
let transtab_hi = (transtab >> 32) as u32;
let transcfg_lo = (transcfg & 0xffffffff) as u32;
let transcfg_hi = (transcfg >> 32) as u32;
let memattr_lo = (memattr & 0xffffffff) as u32;
let memattr_hi = (memattr >> 32) as u32;
as_transtab_lo(as_nr)?.write(iomem, transtab_lo)?;
as_transtab_hi(as_nr)?.write(iomem, transtab_hi)?;
as_transcfg_lo(as_nr)?.write(iomem, transcfg_lo)?;
as_transcfg_hi(as_nr)?.write(iomem, transcfg_hi)?;
as_memattr_lo(as_nr)?.write(iomem, memattr_lo)?;
as_memattr_hi(as_nr)?.write(iomem, memattr_hi)?;
as_command(as_nr)?.write(iomem, AS_COMMAND_UPDATE)?;
let op = || as_status(as_nr)?.read(iomem);
let cond = |status: &u32| -> bool { *status & AS_STATUS_ACTIVE == 0 };
let _ = io::poll::read_poll_timeout(
op,
cond,
Delta::from_millis(0),
Some(Delta::from_micros(200)),
)?;
Ok(())
}
}
/* dummy TLB ops, the real TLB flush happens in panthor_vm_flush_range() */
impl io_pgtable::FlushOps for Mmu {
type Data = ();
fn tlb_flush_all(_data: <Self::Data as ForeignOwnable>::Borrowed<'_>) {}
fn tlb_flush_walk(
_data: <Self::Data as ForeignOwnable>::Borrowed<'_>,
_iova: usize,
_size: usize,
_granule: usize,
) {
}
fn tlb_add_page(
_data: <Self::Data as ForeignOwnable>::Borrowed<'_>,
_iova: usize,
_granule: usize,
) {
}
}
// SPDX-License-Identifier: GPL-2.0 or MIT
//! Address space locking.
use core::ops::Range;
use kernel::bits::genmask_u64;
use kernel::devres::Devres;
use kernel::io::mem::IoMem;
use kernel::prelude::*;
use crate::mmu::Mmu;
use crate::regs::*;
/// A token type that represents a lock on a region of a given address space.
pub(super) struct AsLockToken<'a> {
iomem: &'a Devres<IoMem>,
as_nr: usize,
}
impl<'a> AsLockToken<'a> {
/// Lock a `region` of `as_nr`.
pub(super) fn lock_region(
iomem: &'a Devres<IoMem>,
as_nr: usize,
region: Range<u64>,
) -> Result<Self> {
if region.end - region.start == 0 {
return Err(EINVAL);
}
// The locked region is a naturally aligned power of 2 block encoded as
// log2 minus(1).
//
// Calculate the desired start/end and look for the highest bit which
// differs. The smallest naturally aligned block must include this bit
// change, the desired region starts with this bit (and subsequent bits)
// zeroed and ends with the bit (and subsequent bits) set to one.
let region_width = core::cmp::max(
(region.start ^ (region.end - 1)).leading_zeros() as u8,
64 - AS_LOCK_REGION_MIN_SIZE.trailing_zeros() as u8,
) - 1;
// Mask off the low bits of region.start, which would be ignored by the
// hardware anyways.
let region_start = region.start & genmask_u64(63, region_width as u32);
let region = (region_width as u64) | region_start;
let region_lo = (region & 0xffffffff) as u32;
let region_hi = (region >> 32) as u32;
// Lock the region that needs to be updated.
as_lockaddr_lo(as_nr)?.write(iomem, region_lo)?;
as_lockaddr_hi(as_nr)?.write(iomem, region_hi)?;
as_command(as_nr)?.write(iomem, AS_COMMAND_LOCK)?;
Ok(Self { iomem, as_nr })
}
}
impl Drop for AsLockToken<'_> {
fn drop(&mut self) {
let as_cmd = as_command(self.as_nr);
match as_cmd {
Ok(as_cmd) => {
if let Err(err) = Mmu::wait_ready(self.iomem, self.as_nr) {
pr_err!("MMU is busy for AS{}: {:?}\n", self.as_nr, err);
return;
}
if let Err(err) = as_cmd.write(self.iomem, AS_COMMAND_FLUSH_PT) {
pr_err!(
"Failed to flush page tables for AS{}: {:?}\n",
self.as_nr,
err
);
return;
}
if let Err(err) = Mmu::wait_ready(self.iomem, self.as_nr) {
pr_err!("MMU is busy for AS{}: {:?}\n", self.as_nr, err);
}
}
Err(err) => {
pr_err!("Failed to unlock AS{}: {:?}\n", self.as_nr, err);
}
}
}
}
// SPDX-License-Identifier: GPL-2.0 or MIT
//! Fault reporting.
use crate::regs::*;
use kernel::c_str;
use kernel::devres::Devres;
use kernel::io::mem::IoMem;
use kernel::prelude::*;
use kernel::str::CStr;
pub(crate) const EXCEPTION_MAP: &[(u32, &CStr)] = &[
(0x00, c_str!("OK")),
(0x04, c_str!("TERMINATED")),
(0x05, c_str!("KABOOM")),
(0x06, c_str!("EUREKA")),
(0x08, c_str!("ACTIVE")),
(0x0f, c_str!("CS_RES_TERM")),
(0x3f, c_str!("MAX_NON_FAULT")),
(0x40, c_str!("CS_CONFIG_FAULT")),
(0x41, c_str!("CS_UNRECOVERABLE")),
(0x44, c_str!("CS_ENDPOINT_FAULT")),
(0x48, c_str!("CS_BUS_FAULT")),
(0x49, c_str!("CS_INSTR_INVALID")),
(0x4a, c_str!("CS_CALL_STACK_OVERFLOW")),
(0x4b, c_str!("CS_INHERIT_FAULT")),
(0x50, c_str!("INSTR_INVALID_PC")),
(0x51, c_str!("INSTR_INVALID_ENC")),
(0x55, c_str!("INSTR_BARRIER_FAULT")),
(0x58, c_str!("DATA_INVALID_FAULT")),
(0x59, c_str!("TILE_RANGE_FAULT")),
(0x5a, c_str!("ADDR_RANGE_FAULT")),
(0x5b, c_str!("IMPRECISE_FAULT")),
(0x60, c_str!("OOM")),
(0x68, c_str!("CSF_FW_INTERNAL_ERROR")),
(0x69, c_str!("CSF_RES_EVICTION_TIMEOUT")),
(0x80, c_str!("GPU_BUS_FAULT")),
(0x88, c_str!("GPU_SHAREABILITY_FAULT")),
(0x89, c_str!("SYS_SHAREABILITY_FAULT")),
(0x8a, c_str!("GPU_CACHEABILITY_FAULT")),
(0xc0, c_str!("TRANSLATION_FAULT_0")),
(0xc1, c_str!("TRANSLATION_FAULT_1")),
(0xc2, c_str!("TRANSLATION_FAULT_2")),
(0xc3, c_str!("TRANSLATION_FAULT_3")),
(0xc4, c_str!("TRANSLATION_FAULT_4")),
(0xc8, c_str!("PERM_FAULT_0")),
(0xc9, c_str!("PERM_FAULT_1")),
(0xca, c_str!("PERM_FAULT_2")),
(0xcb, c_str!("PERM_FAULT_3")),
(0xd9, c_str!("ACCESS_FLAG_1")),
(0xda, c_str!("ACCESS_FLAG_2")),
(0xdb, c_str!("ACCESS_FLAG_3")),
(0xe0, c_str!("ADDR_SIZE_FAULT_IN")),
(0xe4, c_str!("ADDR_SIZE_FAULT_OUT0")),
(0xe5, c_str!("ADDR_SIZE_FAULT_OUT1")),
(0xe6, c_str!("ADDR_SIZE_FAULT_OUT2")),
(0xe7, c_str!("ADDR_SIZE_FAULT_OUT3")),
(0xe8, c_str!("MEM_ATTR_FAULT_0")),
(0xe9, c_str!("MEM_ATTR_FAULT_1")),
(0xea, c_str!("MEM_ATTR_FAULT_2")),
(0xeb, c_str!("MEM_ATTR_FAULT_3")),
];
pub(crate) fn get_exception_name(code: u32) -> &'static CStr {
for &(exception_code, name) in EXCEPTION_MAP {
if exception_code == code {
return name;
}
}
c_str!("UNKNOWN")
}
pub(crate) fn access_type_name(fault_status: u32) -> &'static str {
match fault_status & AS_FAULTSTATUS_ACCESS_TYPE_MASK {
AS_FAULTSTATUS_ACCESS_TYPE_ATOMIC => "ATOMIC",
AS_FAULTSTATUS_ACCESS_TYPE_READ => "READ",
AS_FAULTSTATUS_ACCESS_TYPE_WRITE => "WRITE",
AS_FAULTSTATUS_ACCESS_TYPE_EX => "EXECUTE",
_ => "UNKNOWN",
}
}
/// Decodes a MMU fault, printing a message to the kernel log.
pub(super) fn decode_faults(mut status: u32, iomem: &Devres<IoMem>) -> Result {
while status != 0 {
let as_index = (status | (status >> 16)).trailing_zeros();
let mask = kernel::bits::bit_u32(as_index);
let mut addr: u64;
let fault_status: u32 = as_faultstatus(as_index as usize).unwrap().read(iomem)?;
addr = as_faultaddress_lo(as_index as usize).unwrap().read(iomem)? as u64;
addr |= (as_faultaddress_hi(as_index as usize).unwrap().read(iomem)? as u64) << 32;
let exception_type: u32 = fault_status & 0xff;
let access_type: u32 = (fault_status >> 8) & 0x3;
let source_id: u32 = fault_status >> 16;
pr_err!(
"Unhandled Page fault in AS{} at VA 0x{:016X}\n\
raw fault status: 0x{:X}\n\
decoded fault status: {}\n\
exception type 0x{:X}: {}\n\
access type 0x{:X}: {}\n\
source id 0x{:X}\n",
as_index,
addr,
fault_status,
if fault_status & (1 << 10) != 0 {
"DECODER FAULT"
} else {
"SLAVE FAULT"
},
exception_type,
get_exception_name(exception_type),
access_type,
access_type_name(fault_status),
source_id
);
// Update status to process the next fault
status &= !mask;
}
Ok(())
}
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment