Commit df06b37f authored by Keith Busch's avatar Keith Busch Committed by Linus Torvalds

mm/gup: cache dev_pagemap while pinning pages

Getting pages from ZONE_DEVICE memory needs to check the backing device's
live-ness, which is tracked in the device's dev_pagemap metadata.  This
metadata is stored in a radix tree and looking it up adds measurable
software overhead.

This patch avoids repeating this relatively costly operation when
dev_pagemap is used by caching the last dev_pagemap while getting user
pages.  The gup_benchmark kernel self test reports this reduces time to
get user pages to as low as 1/3 of the previous time.

Link: http://lkml.kernel.org/r/20181012173040.15669-1-keith.busch@intel.comSigned-off-by: default avatarKeith Busch <keith.busch@intel.com>
Reviewed-by: default avatarDan Williams <dan.j.williams@intel.com>
Acked-by: default avatarKirill A. Shutemov <kirill.shutemov@linux.intel.com>
Cc: Dave Hansen <dave.hansen@intel.com>
Signed-off-by: default avatarAndrew Morton <akpm@linux-foundation.org>
Signed-off-by: default avatarLinus Torvalds <torvalds@linux-foundation.org>
parent 9fd61bc9
...@@ -213,9 +213,9 @@ static inline int hpage_nr_pages(struct page *page) ...@@ -213,9 +213,9 @@ static inline int hpage_nr_pages(struct page *page)
} }
struct page *follow_devmap_pmd(struct vm_area_struct *vma, unsigned long addr, struct page *follow_devmap_pmd(struct vm_area_struct *vma, unsigned long addr,
pmd_t *pmd, int flags); pmd_t *pmd, int flags, struct dev_pagemap **pgmap);
struct page *follow_devmap_pud(struct vm_area_struct *vma, unsigned long addr, struct page *follow_devmap_pud(struct vm_area_struct *vma, unsigned long addr,
pud_t *pud, int flags); pud_t *pud, int flags, struct dev_pagemap **pgmap);
extern vm_fault_t do_huge_pmd_numa_page(struct vm_fault *vmf, pmd_t orig_pmd); extern vm_fault_t do_huge_pmd_numa_page(struct vm_fault *vmf, pmd_t orig_pmd);
...@@ -344,13 +344,13 @@ static inline void mm_put_huge_zero_page(struct mm_struct *mm) ...@@ -344,13 +344,13 @@ static inline void mm_put_huge_zero_page(struct mm_struct *mm)
} }
static inline struct page *follow_devmap_pmd(struct vm_area_struct *vma, static inline struct page *follow_devmap_pmd(struct vm_area_struct *vma,
unsigned long addr, pmd_t *pmd, int flags) unsigned long addr, pmd_t *pmd, int flags, struct dev_pagemap **pgmap)
{ {
return NULL; return NULL;
} }
static inline struct page *follow_devmap_pud(struct vm_area_struct *vma, static inline struct page *follow_devmap_pud(struct vm_area_struct *vma,
unsigned long addr, pud_t *pud, int flags) unsigned long addr, pud_t *pud, int flags, struct dev_pagemap **pgmap)
{ {
return NULL; return NULL;
} }
......
...@@ -2536,16 +2536,8 @@ static inline vm_fault_t vmf_error(int err) ...@@ -2536,16 +2536,8 @@ static inline vm_fault_t vmf_error(int err)
return VM_FAULT_SIGBUS; return VM_FAULT_SIGBUS;
} }
struct page *follow_page_mask(struct vm_area_struct *vma, struct page *follow_page(struct vm_area_struct *vma, unsigned long address,
unsigned long address, unsigned int foll_flags, unsigned int foll_flags);
unsigned int *page_mask);
static inline struct page *follow_page(struct vm_area_struct *vma,
unsigned long address, unsigned int foll_flags)
{
unsigned int unused_page_mask;
return follow_page_mask(vma, address, foll_flags, &unused_page_mask);
}
#define FOLL_WRITE 0x01 /* check pte is writable */ #define FOLL_WRITE 0x01 /* check pte is writable */
#define FOLL_TOUCH 0x02 /* mark page accessed */ #define FOLL_TOUCH 0x02 /* mark page accessed */
......
...@@ -20,6 +20,11 @@ ...@@ -20,6 +20,11 @@
#include "internal.h" #include "internal.h"
struct follow_page_context {
struct dev_pagemap *pgmap;
unsigned int page_mask;
};
static struct page *no_page_table(struct vm_area_struct *vma, static struct page *no_page_table(struct vm_area_struct *vma,
unsigned int flags) unsigned int flags)
{ {
...@@ -71,10 +76,10 @@ static inline bool can_follow_write_pte(pte_t pte, unsigned int flags) ...@@ -71,10 +76,10 @@ static inline bool can_follow_write_pte(pte_t pte, unsigned int flags)
} }
static struct page *follow_page_pte(struct vm_area_struct *vma, static struct page *follow_page_pte(struct vm_area_struct *vma,
unsigned long address, pmd_t *pmd, unsigned int flags) unsigned long address, pmd_t *pmd, unsigned int flags,
struct dev_pagemap **pgmap)
{ {
struct mm_struct *mm = vma->vm_mm; struct mm_struct *mm = vma->vm_mm;
struct dev_pagemap *pgmap = NULL;
struct page *page; struct page *page;
spinlock_t *ptl; spinlock_t *ptl;
pte_t *ptep, pte; pte_t *ptep, pte;
...@@ -116,8 +121,8 @@ static struct page *follow_page_pte(struct vm_area_struct *vma, ...@@ -116,8 +121,8 @@ static struct page *follow_page_pte(struct vm_area_struct *vma,
* Only return device mapping pages in the FOLL_GET case since * Only return device mapping pages in the FOLL_GET case since
* they are only valid while holding the pgmap reference. * they are only valid while holding the pgmap reference.
*/ */
pgmap = get_dev_pagemap(pte_pfn(pte), NULL); *pgmap = get_dev_pagemap(pte_pfn(pte), *pgmap);
if (pgmap) if (*pgmap)
page = pte_page(pte); page = pte_page(pte);
else else
goto no_page; goto no_page;
...@@ -152,15 +157,8 @@ static struct page *follow_page_pte(struct vm_area_struct *vma, ...@@ -152,15 +157,8 @@ static struct page *follow_page_pte(struct vm_area_struct *vma,
goto retry; goto retry;
} }
if (flags & FOLL_GET) { if (flags & FOLL_GET)
get_page(page); get_page(page);
/* drop the pgmap reference now that we hold the page */
if (pgmap) {
put_dev_pagemap(pgmap);
pgmap = NULL;
}
}
if (flags & FOLL_TOUCH) { if (flags & FOLL_TOUCH) {
if ((flags & FOLL_WRITE) && if ((flags & FOLL_WRITE) &&
!pte_dirty(pte) && !PageDirty(page)) !pte_dirty(pte) && !PageDirty(page))
...@@ -210,7 +208,8 @@ static struct page *follow_page_pte(struct vm_area_struct *vma, ...@@ -210,7 +208,8 @@ static struct page *follow_page_pte(struct vm_area_struct *vma,
static struct page *follow_pmd_mask(struct vm_area_struct *vma, static struct page *follow_pmd_mask(struct vm_area_struct *vma,
unsigned long address, pud_t *pudp, unsigned long address, pud_t *pudp,
unsigned int flags, unsigned int *page_mask) unsigned int flags,
struct follow_page_context *ctx)
{ {
pmd_t *pmd, pmdval; pmd_t *pmd, pmdval;
spinlock_t *ptl; spinlock_t *ptl;
...@@ -258,13 +257,13 @@ static struct page *follow_pmd_mask(struct vm_area_struct *vma, ...@@ -258,13 +257,13 @@ static struct page *follow_pmd_mask(struct vm_area_struct *vma,
} }
if (pmd_devmap(pmdval)) { if (pmd_devmap(pmdval)) {
ptl = pmd_lock(mm, pmd); ptl = pmd_lock(mm, pmd);
page = follow_devmap_pmd(vma, address, pmd, flags); page = follow_devmap_pmd(vma, address, pmd, flags, &ctx->pgmap);
spin_unlock(ptl); spin_unlock(ptl);
if (page) if (page)
return page; return page;
} }
if (likely(!pmd_trans_huge(pmdval))) if (likely(!pmd_trans_huge(pmdval)))
return follow_page_pte(vma, address, pmd, flags); return follow_page_pte(vma, address, pmd, flags, &ctx->pgmap);
if ((flags & FOLL_NUMA) && pmd_protnone(pmdval)) if ((flags & FOLL_NUMA) && pmd_protnone(pmdval))
return no_page_table(vma, flags); return no_page_table(vma, flags);
...@@ -284,7 +283,7 @@ static struct page *follow_pmd_mask(struct vm_area_struct *vma, ...@@ -284,7 +283,7 @@ static struct page *follow_pmd_mask(struct vm_area_struct *vma,
} }
if (unlikely(!pmd_trans_huge(*pmd))) { if (unlikely(!pmd_trans_huge(*pmd))) {
spin_unlock(ptl); spin_unlock(ptl);
return follow_page_pte(vma, address, pmd, flags); return follow_page_pte(vma, address, pmd, flags, &ctx->pgmap);
} }
if (flags & FOLL_SPLIT) { if (flags & FOLL_SPLIT) {
int ret; int ret;
...@@ -307,18 +306,18 @@ static struct page *follow_pmd_mask(struct vm_area_struct *vma, ...@@ -307,18 +306,18 @@ static struct page *follow_pmd_mask(struct vm_area_struct *vma,
} }
return ret ? ERR_PTR(ret) : return ret ? ERR_PTR(ret) :
follow_page_pte(vma, address, pmd, flags); follow_page_pte(vma, address, pmd, flags, &ctx->pgmap);
} }
page = follow_trans_huge_pmd(vma, address, pmd, flags); page = follow_trans_huge_pmd(vma, address, pmd, flags);
spin_unlock(ptl); spin_unlock(ptl);
*page_mask = HPAGE_PMD_NR - 1; ctx->page_mask = HPAGE_PMD_NR - 1;
return page; return page;
} }
static struct page *follow_pud_mask(struct vm_area_struct *vma, static struct page *follow_pud_mask(struct vm_area_struct *vma,
unsigned long address, p4d_t *p4dp, unsigned long address, p4d_t *p4dp,
unsigned int flags, unsigned int *page_mask) unsigned int flags,
struct follow_page_context *ctx)
{ {
pud_t *pud; pud_t *pud;
spinlock_t *ptl; spinlock_t *ptl;
...@@ -344,7 +343,7 @@ static struct page *follow_pud_mask(struct vm_area_struct *vma, ...@@ -344,7 +343,7 @@ static struct page *follow_pud_mask(struct vm_area_struct *vma,
} }
if (pud_devmap(*pud)) { if (pud_devmap(*pud)) {
ptl = pud_lock(mm, pud); ptl = pud_lock(mm, pud);
page = follow_devmap_pud(vma, address, pud, flags); page = follow_devmap_pud(vma, address, pud, flags, &ctx->pgmap);
spin_unlock(ptl); spin_unlock(ptl);
if (page) if (page)
return page; return page;
...@@ -352,13 +351,13 @@ static struct page *follow_pud_mask(struct vm_area_struct *vma, ...@@ -352,13 +351,13 @@ static struct page *follow_pud_mask(struct vm_area_struct *vma,
if (unlikely(pud_bad(*pud))) if (unlikely(pud_bad(*pud)))
return no_page_table(vma, flags); return no_page_table(vma, flags);
return follow_pmd_mask(vma, address, pud, flags, page_mask); return follow_pmd_mask(vma, address, pud, flags, ctx);
} }
static struct page *follow_p4d_mask(struct vm_area_struct *vma, static struct page *follow_p4d_mask(struct vm_area_struct *vma,
unsigned long address, pgd_t *pgdp, unsigned long address, pgd_t *pgdp,
unsigned int flags, unsigned int *page_mask) unsigned int flags,
struct follow_page_context *ctx)
{ {
p4d_t *p4d; p4d_t *p4d;
struct page *page; struct page *page;
...@@ -378,7 +377,7 @@ static struct page *follow_p4d_mask(struct vm_area_struct *vma, ...@@ -378,7 +377,7 @@ static struct page *follow_p4d_mask(struct vm_area_struct *vma,
return page; return page;
return no_page_table(vma, flags); return no_page_table(vma, flags);
} }
return follow_pud_mask(vma, address, p4d, flags, page_mask); return follow_pud_mask(vma, address, p4d, flags, ctx);
} }
/** /**
...@@ -396,13 +395,13 @@ static struct page *follow_p4d_mask(struct vm_area_struct *vma, ...@@ -396,13 +395,13 @@ static struct page *follow_p4d_mask(struct vm_area_struct *vma,
*/ */
struct page *follow_page_mask(struct vm_area_struct *vma, struct page *follow_page_mask(struct vm_area_struct *vma,
unsigned long address, unsigned int flags, unsigned long address, unsigned int flags,
unsigned int *page_mask) struct follow_page_context *ctx)
{ {
pgd_t *pgd; pgd_t *pgd;
struct page *page; struct page *page;
struct mm_struct *mm = vma->vm_mm; struct mm_struct *mm = vma->vm_mm;
*page_mask = 0; ctx->page_mask = 0;
/* make this handle hugepd */ /* make this handle hugepd */
page = follow_huge_addr(mm, address, flags & FOLL_WRITE); page = follow_huge_addr(mm, address, flags & FOLL_WRITE);
...@@ -431,7 +430,19 @@ struct page *follow_page_mask(struct vm_area_struct *vma, ...@@ -431,7 +430,19 @@ struct page *follow_page_mask(struct vm_area_struct *vma,
return no_page_table(vma, flags); return no_page_table(vma, flags);
} }
return follow_p4d_mask(vma, address, pgd, flags, page_mask); return follow_p4d_mask(vma, address, pgd, flags, ctx);
}
struct page *follow_page(struct vm_area_struct *vma, unsigned long address,
unsigned int foll_flags)
{
struct follow_page_context ctx = { NULL };
struct page *page;
page = follow_page_mask(vma, address, foll_flags, &ctx);
if (ctx.pgmap)
put_dev_pagemap(ctx.pgmap);
return page;
} }
static int get_gate_page(struct mm_struct *mm, unsigned long address, static int get_gate_page(struct mm_struct *mm, unsigned long address,
...@@ -659,9 +670,9 @@ static long __get_user_pages(struct task_struct *tsk, struct mm_struct *mm, ...@@ -659,9 +670,9 @@ static long __get_user_pages(struct task_struct *tsk, struct mm_struct *mm,
unsigned int gup_flags, struct page **pages, unsigned int gup_flags, struct page **pages,
struct vm_area_struct **vmas, int *nonblocking) struct vm_area_struct **vmas, int *nonblocking)
{ {
long i = 0; long ret = 0, i = 0;
unsigned int page_mask;
struct vm_area_struct *vma = NULL; struct vm_area_struct *vma = NULL;
struct follow_page_context ctx = { NULL };
if (!nr_pages) if (!nr_pages)
return 0; return 0;
...@@ -691,12 +702,14 @@ static long __get_user_pages(struct task_struct *tsk, struct mm_struct *mm, ...@@ -691,12 +702,14 @@ static long __get_user_pages(struct task_struct *tsk, struct mm_struct *mm,
pages ? &pages[i] : NULL); pages ? &pages[i] : NULL);
if (ret) if (ret)
return i ? : ret; return i ? : ret;
page_mask = 0; ctx.page_mask = 0;
goto next_page; goto next_page;
} }
if (!vma || check_vma_flags(vma, gup_flags)) if (!vma || check_vma_flags(vma, gup_flags)) {
return i ? : -EFAULT; ret = -EFAULT;
goto out;
}
if (is_vm_hugetlb_page(vma)) { if (is_vm_hugetlb_page(vma)) {
i = follow_hugetlb_page(mm, vma, pages, vmas, i = follow_hugetlb_page(mm, vma, pages, vmas,
&start, &nr_pages, i, &start, &nr_pages, i,
...@@ -709,23 +722,26 @@ static long __get_user_pages(struct task_struct *tsk, struct mm_struct *mm, ...@@ -709,23 +722,26 @@ static long __get_user_pages(struct task_struct *tsk, struct mm_struct *mm,
* If we have a pending SIGKILL, don't keep faulting pages and * If we have a pending SIGKILL, don't keep faulting pages and
* potentially allocating memory. * potentially allocating memory.
*/ */
if (unlikely(fatal_signal_pending(current))) if (unlikely(fatal_signal_pending(current))) {
return i ? i : -ERESTARTSYS; ret = -ERESTARTSYS;
goto out;
}
cond_resched(); cond_resched();
page = follow_page_mask(vma, start, foll_flags, &page_mask);
page = follow_page_mask(vma, start, foll_flags, &ctx);
if (!page) { if (!page) {
int ret;
ret = faultin_page(tsk, vma, start, &foll_flags, ret = faultin_page(tsk, vma, start, &foll_flags,
nonblocking); nonblocking);
switch (ret) { switch (ret) {
case 0: case 0:
goto retry; goto retry;
case -EBUSY:
ret = 0;
/* FALLTHRU */
case -EFAULT: case -EFAULT:
case -ENOMEM: case -ENOMEM:
case -EHWPOISON: case -EHWPOISON:
return i ? i : ret; goto out;
case -EBUSY:
return i;
case -ENOENT: case -ENOENT:
goto next_page; goto next_page;
} }
...@@ -737,27 +753,31 @@ static long __get_user_pages(struct task_struct *tsk, struct mm_struct *mm, ...@@ -737,27 +753,31 @@ static long __get_user_pages(struct task_struct *tsk, struct mm_struct *mm,
*/ */
goto next_page; goto next_page;
} else if (IS_ERR(page)) { } else if (IS_ERR(page)) {
return i ? i : PTR_ERR(page); ret = PTR_ERR(page);
goto out;
} }
if (pages) { if (pages) {
pages[i] = page; pages[i] = page;
flush_anon_page(vma, page, start); flush_anon_page(vma, page, start);
flush_dcache_page(page); flush_dcache_page(page);
page_mask = 0; ctx.page_mask = 0;
} }
next_page: next_page:
if (vmas) { if (vmas) {
vmas[i] = vma; vmas[i] = vma;
page_mask = 0; ctx.page_mask = 0;
} }
page_increm = 1 + (~(start >> PAGE_SHIFT) & page_mask); page_increm = 1 + (~(start >> PAGE_SHIFT) & ctx.page_mask);
if (page_increm > nr_pages) if (page_increm > nr_pages)
page_increm = nr_pages; page_increm = nr_pages;
i += page_increm; i += page_increm;
start += page_increm * PAGE_SIZE; start += page_increm * PAGE_SIZE;
nr_pages -= page_increm; nr_pages -= page_increm;
} while (nr_pages); } while (nr_pages);
return i; out:
if (ctx.pgmap)
put_dev_pagemap(ctx.pgmap);
return i ? i : ret;
} }
static bool vma_permits_fault(struct vm_area_struct *vma, static bool vma_permits_fault(struct vm_area_struct *vma,
......
...@@ -852,11 +852,10 @@ static void touch_pmd(struct vm_area_struct *vma, unsigned long addr, ...@@ -852,11 +852,10 @@ static void touch_pmd(struct vm_area_struct *vma, unsigned long addr,
} }
struct page *follow_devmap_pmd(struct vm_area_struct *vma, unsigned long addr, struct page *follow_devmap_pmd(struct vm_area_struct *vma, unsigned long addr,
pmd_t *pmd, int flags) pmd_t *pmd, int flags, struct dev_pagemap **pgmap)
{ {
unsigned long pfn = pmd_pfn(*pmd); unsigned long pfn = pmd_pfn(*pmd);
struct mm_struct *mm = vma->vm_mm; struct mm_struct *mm = vma->vm_mm;
struct dev_pagemap *pgmap;
struct page *page; struct page *page;
assert_spin_locked(pmd_lockptr(mm, pmd)); assert_spin_locked(pmd_lockptr(mm, pmd));
...@@ -886,12 +885,11 @@ struct page *follow_devmap_pmd(struct vm_area_struct *vma, unsigned long addr, ...@@ -886,12 +885,11 @@ struct page *follow_devmap_pmd(struct vm_area_struct *vma, unsigned long addr,
return ERR_PTR(-EEXIST); return ERR_PTR(-EEXIST);
pfn += (addr & ~PMD_MASK) >> PAGE_SHIFT; pfn += (addr & ~PMD_MASK) >> PAGE_SHIFT;
pgmap = get_dev_pagemap(pfn, NULL); *pgmap = get_dev_pagemap(pfn, *pgmap);
if (!pgmap) if (!*pgmap)
return ERR_PTR(-EFAULT); return ERR_PTR(-EFAULT);
page = pfn_to_page(pfn); page = pfn_to_page(pfn);
get_page(page); get_page(page);
put_dev_pagemap(pgmap);
return page; return page;
} }
...@@ -1000,11 +998,10 @@ static void touch_pud(struct vm_area_struct *vma, unsigned long addr, ...@@ -1000,11 +998,10 @@ static void touch_pud(struct vm_area_struct *vma, unsigned long addr,
} }
struct page *follow_devmap_pud(struct vm_area_struct *vma, unsigned long addr, struct page *follow_devmap_pud(struct vm_area_struct *vma, unsigned long addr,
pud_t *pud, int flags) pud_t *pud, int flags, struct dev_pagemap **pgmap)
{ {
unsigned long pfn = pud_pfn(*pud); unsigned long pfn = pud_pfn(*pud);
struct mm_struct *mm = vma->vm_mm; struct mm_struct *mm = vma->vm_mm;
struct dev_pagemap *pgmap;
struct page *page; struct page *page;
assert_spin_locked(pud_lockptr(mm, pud)); assert_spin_locked(pud_lockptr(mm, pud));
...@@ -1028,12 +1025,11 @@ struct page *follow_devmap_pud(struct vm_area_struct *vma, unsigned long addr, ...@@ -1028,12 +1025,11 @@ struct page *follow_devmap_pud(struct vm_area_struct *vma, unsigned long addr,
return ERR_PTR(-EEXIST); return ERR_PTR(-EEXIST);
pfn += (addr & ~PUD_MASK) >> PAGE_SHIFT; pfn += (addr & ~PUD_MASK) >> PAGE_SHIFT;
pgmap = get_dev_pagemap(pfn, NULL); *pgmap = get_dev_pagemap(pfn, *pgmap);
if (!pgmap) if (!*pgmap)
return ERR_PTR(-EFAULT); return ERR_PTR(-EFAULT);
page = pfn_to_page(pfn); page = pfn_to_page(pfn);
get_page(page); get_page(page);
put_dev_pagemap(pgmap);
return page; return page;
} }
......
...@@ -1709,11 +1709,9 @@ SYSCALL_DEFINE5(mremap, unsigned long, addr, unsigned long, old_len, ...@@ -1709,11 +1709,9 @@ SYSCALL_DEFINE5(mremap, unsigned long, addr, unsigned long, old_len,
return ret; return ret;
} }
struct page *follow_page_mask(struct vm_area_struct *vma, struct page *follow_page(struct vm_area_struct *vma, unsigned long address,
unsigned long address, unsigned int flags, unsigned int foll_flags)
unsigned int *page_mask)
{ {
*page_mask = 0;
return NULL; return NULL;
} }
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment