diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_gmc.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_gmc.h index 43357d699e6ee787a8e45ce37daeebc96c37c510..6794edd1d2d2aeb637d643f361f2a20b5b52aee1 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_gmc.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_gmc.h @@ -148,6 +148,10 @@ struct amdgpu_gmc_funcs { void (*get_vm_pte)(struct amdgpu_device *adev, struct amdgpu_bo_va_mapping *mapping, uint64_t *flags); + /* override per-page pte flags */ + void (*override_vm_pte_flags)(struct amdgpu_device *dev, + struct amdgpu_vm *vm, + uint64_t addr, uint64_t *flags); /* get the amount of memory used by the vbios for pre-OS console */ unsigned int (*get_vbios_fb_size)(struct amdgpu_device *adev); @@ -336,6 +340,9 @@ struct amdgpu_gmc { #define amdgpu_gmc_map_mtype(adev, flags) (adev)->gmc.gmc_funcs->map_mtype((adev),(flags)) #define amdgpu_gmc_get_vm_pde(adev, level, dst, flags) (adev)->gmc.gmc_funcs->get_vm_pde((adev), (level), (dst), (flags)) #define amdgpu_gmc_get_vm_pte(adev, mapping, flags) (adev)->gmc.gmc_funcs->get_vm_pte((adev), (mapping), (flags)) +#define amdgpu_gmc_override_vm_pte_flags(adev, vm, addr, pte_flags) \ + (adev)->gmc.gmc_funcs->override_vm_pte_flags \ + ((adev), (vm), (addr), (pte_flags)) #define amdgpu_gmc_get_vbios_fb_size(adev) (adev)->gmc.gmc_funcs->get_vbios_fb_size((adev)) /** diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm_pt.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm_pt.c index bc5d126b600b4ed0f00fef2c1768c62aeda4e9cc..60b1da93b06dd00d12217190ef9820fc27e5164c 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm_pt.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm_pt.c @@ -786,13 +786,14 @@ static void amdgpu_vm_pte_update_flags(struct amdgpu_vm_update_params *params, uint64_t pe, uint64_t addr, unsigned int count, uint32_t incr, uint64_t flags) - { + struct amdgpu_device *adev = params->adev; + if (level != AMDGPU_VM_PTB) { flags |= AMDGPU_PDE_PTE; - amdgpu_gmc_get_vm_pde(params->adev, level, &addr, &flags); + amdgpu_gmc_get_vm_pde(adev, level, &addr, &flags); - } else if (params->adev->asic_type >= CHIP_VEGA10 && + } else if (adev->asic_type >= CHIP_VEGA10 && !(flags & AMDGPU_PTE_VALID) && !(flags & AMDGPU_PTE_PRT)) { @@ -800,6 +801,21 @@ static void amdgpu_vm_pte_update_flags(struct amdgpu_vm_update_params *params, flags |= AMDGPU_PTE_EXECUTABLE; } + /* APUs mapping system memory may need different MTYPEs on different + * NUMA nodes. Only do this for contiguous ranges that can be assumed + * to be on the same NUMA node. + */ + if ((flags & AMDGPU_PTE_SYSTEM) && (adev->flags & AMD_IS_APU) && + adev->gmc.gmc_funcs->override_vm_pte_flags && + num_possible_nodes() > 1) { + if (!params->pages_addr) + amdgpu_gmc_override_vm_pte_flags(adev, params->vm, + addr, &flags); + else + dev_dbg(adev->dev, + "override_vm_pte_flags skipped: non-contiguous\n"); + } + params->vm->update_funcs->update(params, pt, pe, addr, count, incr, flags); } diff --git a/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c b/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c index e69d2e82794e2302a788beb28eb50c2097ee215e..b502452a893353746b9ef67c605f14874469ebb8 100644 --- a/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c +++ b/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c @@ -1302,6 +1302,69 @@ static void gmc_v9_0_get_vm_pte(struct amdgpu_device *adev, mapping, flags); } +static void gmc_v9_0_override_vm_pte_flags(struct amdgpu_device *adev, + struct amdgpu_vm *vm, + uint64_t addr, uint64_t *flags) +{ + int local_node, nid; + + /* Only GFX 9.4.3 APUs associate GPUs with NUMA nodes. Local system + * memory can use more efficient MTYPEs. + */ + if (adev->ip_versions[GC_HWIP][0] != IP_VERSION(9, 4, 3)) + return; + + /* Only direct-mapped memory allows us to determine the NUMA node from + * the DMA address. + */ + if (!adev->ram_is_direct_mapped) { + dev_dbg(adev->dev, "RAM is not direct mapped\n"); + return; + } + + /* Only override mappings with MTYPE_NC, which is the safe default for + * cacheable memory. + */ + if ((*flags & AMDGPU_PTE_MTYPE_VG10_MASK) != + AMDGPU_PTE_MTYPE_VG10(MTYPE_NC)) { + dev_dbg(adev->dev, "MTYPE is not NC\n"); + return; + } + + /* TODO: memory partitions. mem_id is hard-coded to 0 for now. + * FIXME: Only supported on native mode for now. For carve-out, the + * NUMA affinity of the GPU/VM needs to come from the PCI info because + * memory partitions are not associated with different NUMA nodes. + */ + if (adev->gmc.is_app_apu) { + local_node = adev->gmc.mem_partitions[/*vm->mem_id*/0].numa.node; + } else { + dev_dbg(adev->dev, "Only native mode APU is supported.\n"); + return; + } + + /* Only handle real RAM. Mappings of PCIe resources don't have struct + * page or NUMA nodes. + */ + if (!page_is_ram(addr >> PAGE_SHIFT)) { + dev_dbg(adev->dev, "Page is not RAM.\n"); + return; + } + nid = pfn_to_nid(addr >> PAGE_SHIFT); + dev_dbg(adev->dev, "vm->mem_id=%d, local_node=%d, nid=%d\n", + /*vm->mem_id*/0, local_node, nid); + if (nid == local_node) { + unsigned int mtype_local = + amdgpu_use_mtype_cc_wa ? MTYPE_CC : MTYPE_RW; + uint64_t old_flags = *flags; + + *flags = (*flags & ~AMDGPU_PTE_MTYPE_VG10_MASK) | + AMDGPU_PTE_MTYPE_VG10(mtype_local); + dev_dbg(adev->dev, "flags updated from %llx to %llx\n", + old_flags, *flags); + } +} + static unsigned gmc_v9_0_get_vbios_fb_size(struct amdgpu_device *adev) { u32 d1vga_control = RREG32_SOC15(DCE, 0, mmD1VGA_CONTROL); @@ -1373,6 +1436,7 @@ static const struct amdgpu_gmc_funcs gmc_v9_0_gmc_funcs = { .map_mtype = gmc_v9_0_map_mtype, .get_vm_pde = gmc_v9_0_get_vm_pde, .get_vm_pte = gmc_v9_0_get_vm_pte, + .override_vm_pte_flags = gmc_v9_0_override_vm_pte_flags, .get_vbios_fb_size = gmc_v9_0_get_vbios_fb_size, .query_mem_partition_mode = &gmc_v9_0_query_memory_partition, };