Commit f3f0e1d2 authored by Kirill A. Shutemov's avatar Kirill A. Shutemov Committed by Linus Torvalds

khugepaged: add support of collapse for tmpfs/shmem pages

This patch extends khugepaged to support collapse of tmpfs/shmem pages.
We share fair amount of infrastructure with anon-THP collapse.

Few design points:

  - First we are looking for VMA which can be suitable for mapping huge
    page;

  - If the VMA maps shmem file, the rest scan/collapse operations
    operates on page cache, not on page tables as in anon VMA case.

  - khugepaged_scan_shmem() finds a range which is suitable for huge
    page. The scan is lockless and shouldn't disturb system too much.

  - once the candidate for collapse is found, collapse_shmem() attempts
    to create a huge page:

      + scan over radix tree, making the range point to new huge page;

      + new huge page is not-uptodate, locked and freezed (refcount
        is 0), so nobody can touch them until we say so.

      + we swap in pages during the scan. khugepaged_scan_shmem()
        filters out ranges with more than khugepaged_max_ptes_swap
	swapped out pages. It's HPAGE_PMD_NR/8 by default.

      + old pages are isolated, unmapped and put to local list in case
        to be restored back if collapse failed.

  - if collapse succeed, we retract pte page tables from VMAs where huge
    pages mapping is possible. The huge page will be mapped as PMD on
    next minor fault into the range.

Link: http://lkml.kernel.org/r/1466021202-61880-35-git-send-email-kirill.shutemov@linux.intel.comSigned-off-by: default avatarKirill A. Shutemov <kirill.shutemov@linux.intel.com>
Signed-off-by: default avatarAndrew Morton <akpm@linux-foundation.org>
Signed-off-by: default avatarLinus Torvalds <torvalds@linux-foundation.org>
parent 4595ef88
...@@ -54,6 +54,7 @@ extern unsigned long shmem_get_unmapped_area(struct file *, unsigned long addr, ...@@ -54,6 +54,7 @@ extern unsigned long shmem_get_unmapped_area(struct file *, unsigned long addr,
unsigned long len, unsigned long pgoff, unsigned long flags); unsigned long len, unsigned long pgoff, unsigned long flags);
extern int shmem_lock(struct file *file, int lock, struct user_struct *user); extern int shmem_lock(struct file *file, int lock, struct user_struct *user);
extern bool shmem_mapping(struct address_space *mapping); extern bool shmem_mapping(struct address_space *mapping);
extern bool shmem_huge_enabled(struct vm_area_struct *vma);
extern void shmem_unlock_mapping(struct address_space *mapping); extern void shmem_unlock_mapping(struct address_space *mapping);
extern struct page *shmem_read_mapping_page_gfp(struct address_space *mapping, extern struct page *shmem_read_mapping_page_gfp(struct address_space *mapping,
pgoff_t index, gfp_t gfp_mask); pgoff_t index, gfp_t gfp_mask);
...@@ -64,6 +65,19 @@ extern unsigned long shmem_swap_usage(struct vm_area_struct *vma); ...@@ -64,6 +65,19 @@ extern unsigned long shmem_swap_usage(struct vm_area_struct *vma);
extern unsigned long shmem_partial_swap_usage(struct address_space *mapping, extern unsigned long shmem_partial_swap_usage(struct address_space *mapping,
pgoff_t start, pgoff_t end); pgoff_t start, pgoff_t end);
/* Flag allocation requirements to shmem_getpage */
enum sgp_type {
SGP_READ, /* don't exceed i_size, don't allocate page */
SGP_CACHE, /* don't exceed i_size, may allocate page */
SGP_NOHUGE, /* like SGP_CACHE, but no huge pages */
SGP_HUGE, /* like SGP_CACHE, huge pages preferred */
SGP_WRITE, /* may exceed i_size, may allocate !Uptodate page */
SGP_FALLOC, /* like SGP_WRITE, but make existing page Uptodate */
};
extern int shmem_getpage(struct inode *inode, pgoff_t index,
struct page **pagep, enum sgp_type sgp);
static inline struct page *shmem_read_mapping_page( static inline struct page *shmem_read_mapping_page(
struct address_space *mapping, pgoff_t index) struct address_space *mapping, pgoff_t index)
{ {
...@@ -71,6 +85,15 @@ static inline struct page *shmem_read_mapping_page( ...@@ -71,6 +85,15 @@ static inline struct page *shmem_read_mapping_page(
mapping_gfp_mask(mapping)); mapping_gfp_mask(mapping));
} }
static inline bool shmem_file(struct file *file)
{
if (!IS_ENABLED(CONFIG_SHMEM))
return false;
if (!file || !file->f_mapping)
return false;
return shmem_mapping(file->f_mapping);
}
extern bool shmem_charge(struct inode *inode, long pages); extern bool shmem_charge(struct inode *inode, long pages);
extern void shmem_uncharge(struct inode *inode, long pages); extern void shmem_uncharge(struct inode *inode, long pages);
......
...@@ -29,7 +29,8 @@ ...@@ -29,7 +29,8 @@
EM( SCAN_DEL_PAGE_LRU, "could_not_delete_page_from_lru")\ EM( SCAN_DEL_PAGE_LRU, "could_not_delete_page_from_lru")\
EM( SCAN_ALLOC_HUGE_PAGE_FAIL, "alloc_huge_page_failed") \ EM( SCAN_ALLOC_HUGE_PAGE_FAIL, "alloc_huge_page_failed") \
EM( SCAN_CGROUP_CHARGE_FAIL, "ccgroup_charge_failed") \ EM( SCAN_CGROUP_CHARGE_FAIL, "ccgroup_charge_failed") \
EMe( SCAN_EXCEED_SWAP_PTE, "exceed_swap_pte") EM( SCAN_EXCEED_SWAP_PTE, "exceed_swap_pte") \
EMe(SCAN_TRUNCATED, "truncated") \
#undef EM #undef EM
#undef EMe #undef EMe
......
This diff is collapsed.
...@@ -32,6 +32,7 @@ ...@@ -32,6 +32,7 @@
#include <linux/export.h> #include <linux/export.h>
#include <linux/swap.h> #include <linux/swap.h>
#include <linux/uio.h> #include <linux/uio.h>
#include <linux/khugepaged.h>
static struct vfsmount *shm_mnt; static struct vfsmount *shm_mnt;
...@@ -97,16 +98,6 @@ struct shmem_falloc { ...@@ -97,16 +98,6 @@ struct shmem_falloc {
pgoff_t nr_unswapped; /* how often writepage refused to swap out */ pgoff_t nr_unswapped; /* how often writepage refused to swap out */
}; };
/* Flag allocation requirements to shmem_getpage */
enum sgp_type {
SGP_READ, /* don't exceed i_size, don't allocate page */
SGP_CACHE, /* don't exceed i_size, may allocate page */
SGP_NOHUGE, /* like SGP_CACHE, but no huge pages */
SGP_HUGE, /* like SGP_CACHE, huge pages preferred */
SGP_WRITE, /* may exceed i_size, may allocate !Uptodate page */
SGP_FALLOC, /* like SGP_WRITE, but make existing page Uptodate */
};
#ifdef CONFIG_TMPFS #ifdef CONFIG_TMPFS
static unsigned long shmem_default_max_blocks(void) static unsigned long shmem_default_max_blocks(void)
{ {
...@@ -126,7 +117,7 @@ static int shmem_getpage_gfp(struct inode *inode, pgoff_t index, ...@@ -126,7 +117,7 @@ static int shmem_getpage_gfp(struct inode *inode, pgoff_t index,
struct page **pagep, enum sgp_type sgp, struct page **pagep, enum sgp_type sgp,
gfp_t gfp, struct mm_struct *fault_mm, int *fault_type); gfp_t gfp, struct mm_struct *fault_mm, int *fault_type);
static inline int shmem_getpage(struct inode *inode, pgoff_t index, int shmem_getpage(struct inode *inode, pgoff_t index,
struct page **pagep, enum sgp_type sgp) struct page **pagep, enum sgp_type sgp)
{ {
return shmem_getpage_gfp(inode, index, pagep, sgp, return shmem_getpage_gfp(inode, index, pagep, sgp,
...@@ -1899,6 +1890,11 @@ static int shmem_mmap(struct file *file, struct vm_area_struct *vma) ...@@ -1899,6 +1890,11 @@ static int shmem_mmap(struct file *file, struct vm_area_struct *vma)
{ {
file_accessed(file); file_accessed(file);
vma->vm_ops = &shmem_vm_ops; vma->vm_ops = &shmem_vm_ops;
if (IS_ENABLED(CONFIG_TRANSPARENT_HUGEPAGE) &&
((vma->vm_start + ~HPAGE_PMD_MASK) & HPAGE_PMD_MASK) <
(vma->vm_end & HPAGE_PMD_MASK)) {
khugepaged_enter(vma, vma->vm_flags);
}
return 0; return 0;
} }
...@@ -3803,6 +3799,37 @@ static ssize_t shmem_enabled_store(struct kobject *kobj, ...@@ -3803,6 +3799,37 @@ static ssize_t shmem_enabled_store(struct kobject *kobj,
struct kobj_attribute shmem_enabled_attr = struct kobj_attribute shmem_enabled_attr =
__ATTR(shmem_enabled, 0644, shmem_enabled_show, shmem_enabled_store); __ATTR(shmem_enabled, 0644, shmem_enabled_show, shmem_enabled_store);
bool shmem_huge_enabled(struct vm_area_struct *vma)
{
struct inode *inode = file_inode(vma->vm_file);
struct shmem_sb_info *sbinfo = SHMEM_SB(inode->i_sb);
loff_t i_size;
pgoff_t off;
if (shmem_huge == SHMEM_HUGE_FORCE)
return true;
if (shmem_huge == SHMEM_HUGE_DENY)
return false;
switch (sbinfo->huge) {
case SHMEM_HUGE_NEVER:
return false;
case SHMEM_HUGE_ALWAYS:
return true;
case SHMEM_HUGE_WITHIN_SIZE:
off = round_up(vma->vm_pgoff, HPAGE_PMD_NR);
i_size = round_up(i_size_read(inode), PAGE_SIZE);
if (i_size >= HPAGE_PMD_SIZE &&
i_size >> PAGE_SHIFT >= off)
return true;
case SHMEM_HUGE_ADVISE:
/* TODO: implement fadvise() hints */
return (vma->vm_flags & VM_HUGEPAGE);
default:
VM_BUG_ON(1);
return false;
}
}
#endif /* CONFIG_TRANSPARENT_HUGEPAGE && CONFIG_SYSFS */ #endif /* CONFIG_TRANSPARENT_HUGEPAGE && CONFIG_SYSFS */
#else /* !CONFIG_SHMEM */ #else /* !CONFIG_SHMEM */
...@@ -3982,6 +4009,13 @@ int shmem_zero_setup(struct vm_area_struct *vma) ...@@ -3982,6 +4009,13 @@ int shmem_zero_setup(struct vm_area_struct *vma)
fput(vma->vm_file); fput(vma->vm_file);
vma->vm_file = file; vma->vm_file = file;
vma->vm_ops = &shmem_vm_ops; vma->vm_ops = &shmem_vm_ops;
if (IS_ENABLED(CONFIG_TRANSPARENT_HUGEPAGE) &&
((vma->vm_start + ~HPAGE_PMD_MASK) & HPAGE_PMD_MASK) <
(vma->vm_end & HPAGE_PMD_MASK)) {
khugepaged_enter(vma, vma->vm_flags);
}
return 0; return 0;
} }
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment