summaryrefslogtreecommitdiffstats
path: root/mm/memory.c
diff options
context:
space:
mode:
Diffstat (limited to 'mm/memory.c')
-rw-r--r--mm/memory.c274
1 files changed, 160 insertions, 114 deletions
diff --git a/mm/memory.c b/mm/memory.c
index a78814413ac0..df678fa30cdb 100644
--- a/mm/memory.c
+++ b/mm/memory.c
@@ -52,6 +52,7 @@
#include <linux/highmem.h>
#include <linux/pagemap.h>
#include <linux/memremap.h>
+#include <linux/kmsan.h>
#include <linux/ksm.h>
#include <linux/rmap.h>
#include <linux/export.h>
@@ -66,6 +67,7 @@
#include <linux/gfp.h>
#include <linux/migrate.h>
#include <linux/string.h>
+#include <linux/memory-tiers.h>
#include <linux/debugfs.h>
#include <linux/userfaultfd_k.h>
#include <linux/dax.h>
@@ -74,6 +76,7 @@
#include <linux/perf_event.h>
#include <linux/ptrace.h>
#include <linux/vmalloc.h>
+#include <linux/sched/sysctl.h>
#include <trace/events/kmem.h>
@@ -125,18 +128,6 @@ int randomize_va_space __read_mostly =
2;
#endif
-#ifndef arch_faults_on_old_pte
-static inline bool arch_faults_on_old_pte(void)
-{
- /*
- * Those arches which don't have hw access flag feature need to
- * implement their own helper. By default, "true" means pagefault
- * will be hit on old pte.
- */
- return true;
-}
-#endif
-
#ifndef arch_wants_old_prefaulted_pte
static inline bool arch_wants_old_prefaulted_pte(void)
{
@@ -402,12 +393,21 @@ void free_pgd_range(struct mmu_gather *tlb,
} while (pgd++, addr = next, addr != end);
}
-void free_pgtables(struct mmu_gather *tlb, struct vm_area_struct *vma,
- unsigned long floor, unsigned long ceiling)
+void free_pgtables(struct mmu_gather *tlb, struct maple_tree *mt,
+ struct vm_area_struct *vma, unsigned long floor,
+ unsigned long ceiling)
{
- while (vma) {
- struct vm_area_struct *next = vma->vm_next;
+ MA_STATE(mas, mt, vma->vm_end, vma->vm_end);
+
+ do {
unsigned long addr = vma->vm_start;
+ struct vm_area_struct *next;
+
+ /*
+ * Note: USER_PGTABLES_CEILING may be passed as ceiling and may
+ * be 0. This will underflow and is okay.
+ */
+ next = mas_find(&mas, ceiling - 1);
/*
* Hide vma from rmap and truncate_pagecache before freeing
@@ -426,7 +426,7 @@ void free_pgtables(struct mmu_gather *tlb, struct vm_area_struct *vma,
while (next && next->vm_start <= vma->vm_end + PMD_SIZE
&& !is_vm_hugetlb_page(next)) {
vma = next;
- next = vma->vm_next;
+ next = mas_find(&mas, ceiling - 1);
unlink_anon_vmas(vma);
unlink_file_vma(vma);
}
@@ -434,7 +434,7 @@ void free_pgtables(struct mmu_gather *tlb, struct vm_area_struct *vma,
floor, next ? next->vm_start : ceiling);
}
vma = next;
- }
+ } while (vma);
}
void pmd_install(struct mm_struct *mm, pmd_t *pmd, pgtable_t *pte)
@@ -1685,10 +1685,8 @@ static void unmap_single_vma(struct mmu_gather *tlb,
if (vma->vm_file) {
zap_flags_t zap_flags = details ?
details->zap_flags : 0;
- i_mmap_lock_write(vma->vm_file->f_mapping);
__unmap_hugepage_range_final(tlb, vma, start, end,
NULL, zap_flags);
- i_mmap_unlock_write(vma->vm_file->f_mapping);
}
} else
unmap_page_range(tlb, vma, start, end, details);
@@ -1698,6 +1696,7 @@ static void unmap_single_vma(struct mmu_gather *tlb,
/**
* unmap_vmas - unmap a range of memory covered by a list of vma's
* @tlb: address of the caller's struct mmu_gather
+ * @mt: the maple tree
* @vma: the starting vma
* @start_addr: virtual address at which to start unmapping
* @end_addr: virtual address at which to end unmapping
@@ -1713,7 +1712,7 @@ static void unmap_single_vma(struct mmu_gather *tlb,
* ensure that any thus-far unmapped pages are flushed before unmap_vmas()
* drops the lock and schedules.
*/
-void unmap_vmas(struct mmu_gather *tlb,
+void unmap_vmas(struct mmu_gather *tlb, struct maple_tree *mt,
struct vm_area_struct *vma, unsigned long start_addr,
unsigned long end_addr)
{
@@ -1723,12 +1722,14 @@ void unmap_vmas(struct mmu_gather *tlb,
/* Careful - we need to zap private pages too! */
.even_cows = true,
};
+ MA_STATE(mas, mt, vma->vm_end, vma->vm_end);
mmu_notifier_range_init(&range, MMU_NOTIFY_UNMAP, 0, vma, vma->vm_mm,
start_addr, end_addr);
mmu_notifier_invalidate_range_start(&range);
- for ( ; vma && vma->vm_start < end_addr; vma = vma->vm_next)
+ do {
unmap_single_vma(tlb, vma, start_addr, end_addr, &details);
+ } while ((vma = mas_find(&mas, end_addr - 1)) != NULL);
mmu_notifier_invalidate_range_end(&range);
}
@@ -1743,8 +1744,11 @@ void unmap_vmas(struct mmu_gather *tlb,
void zap_page_range(struct vm_area_struct *vma, unsigned long start,
unsigned long size)
{
+ struct maple_tree *mt = &vma->vm_mm->mm_mt;
+ unsigned long end = start + size;
struct mmu_notifier_range range;
struct mmu_gather tlb;
+ MA_STATE(mas, mt, vma->vm_end, vma->vm_end);
lru_add_drain();
mmu_notifier_range_init(&range, MMU_NOTIFY_CLEAR, 0, vma, vma->vm_mm,
@@ -1752,8 +1756,9 @@ void zap_page_range(struct vm_area_struct *vma, unsigned long start,
tlb_gather_mmu(&tlb, vma->vm_mm);
update_hiwater_rss(vma->vm_mm);
mmu_notifier_invalidate_range_start(&range);
- for ( ; vma && vma->vm_start < range.end; vma = vma->vm_next)
+ do {
unmap_single_vma(&tlb, vma, start, range.end, NULL);
+ } while ((vma = mas_find(&mas, end - 1)) != NULL);
mmu_notifier_invalidate_range_end(&range);
tlb_finish_mmu(&tlb);
}
@@ -2870,7 +2875,7 @@ static inline bool __wp_page_copy_user(struct page *dst, struct page *src,
* On architectures with software "accessed" bits, we would
* take a double page fault, so mark it accessed here.
*/
- if (arch_faults_on_old_pte() && !pte_young(vmf->orig_pte)) {
+ if (!arch_has_hw_pte_young() && !pte_young(vmf->orig_pte)) {
pte_t entry;
vmf->pte = pte_offset_map_lock(mm, vmf->pmd, addr, &vmf->ptl);
@@ -3128,6 +3133,7 @@ static vm_fault_t wp_page_copy(struct vm_fault *vmf)
delayacct_wpcopy_end();
return 0;
}
+ kmsan_copy_page_meta(new_page, old_page);
}
if (mem_cgroup_charge(page_folio(new_page), mm, GFP_KERNEL))
@@ -3362,6 +3368,7 @@ static vm_fault_t do_wp_page(struct vm_fault *vmf)
{
const bool unshare = vmf->flags & FAULT_FLAG_UNSHARE;
struct vm_area_struct *vma = vmf->vma;
+ struct folio *folio;
VM_BUG_ON(unshare && (vmf->flags & FAULT_FLAG_WRITE));
VM_BUG_ON(!unshare && !(vmf->flags & FAULT_FLAG_WRITE));
@@ -3408,48 +3415,47 @@ static vm_fault_t do_wp_page(struct vm_fault *vmf)
* Take out anonymous pages first, anonymous shared vmas are
* not dirty accountable.
*/
- if (PageAnon(vmf->page)) {
- struct page *page = vmf->page;
-
+ folio = page_folio(vmf->page);
+ if (folio_test_anon(folio)) {
/*
* If the page is exclusive to this process we must reuse the
* page without further checks.
*/
- if (PageAnonExclusive(page))
+ if (PageAnonExclusive(vmf->page))
goto reuse;
/*
- * We have to verify under page lock: these early checks are
- * just an optimization to avoid locking the page and freeing
+ * We have to verify under folio lock: these early checks are
+ * just an optimization to avoid locking the folio and freeing
* the swapcache if there is little hope that we can reuse.
*
- * PageKsm() doesn't necessarily raise the page refcount.
+ * KSM doesn't necessarily raise the folio refcount.
*/
- if (PageKsm(page) || page_count(page) > 3)
+ if (folio_test_ksm(folio) || folio_ref_count(folio) > 3)
goto copy;
- if (!PageLRU(page))
+ if (!folio_test_lru(folio))
/*
* Note: We cannot easily detect+handle references from
- * remote LRU pagevecs or references to PageLRU() pages.
+ * remote LRU pagevecs or references to LRU folios.
*/
lru_add_drain();
- if (page_count(page) > 1 + PageSwapCache(page))
+ if (folio_ref_count(folio) > 1 + folio_test_swapcache(folio))
goto copy;
- if (!trylock_page(page))
+ if (!folio_trylock(folio))
goto copy;
- if (PageSwapCache(page))
- try_to_free_swap(page);
- if (PageKsm(page) || page_count(page) != 1) {
- unlock_page(page);
+ if (folio_test_swapcache(folio))
+ folio_free_swap(folio);
+ if (folio_test_ksm(folio) || folio_ref_count(folio) != 1) {
+ folio_unlock(folio);
goto copy;
}
/*
- * Ok, we've got the only page reference from our mapping
- * and the page is locked, it's dark out, and we're wearing
+ * Ok, we've got the only folio reference from our mapping
+ * and the folio is locked, it's dark out, and we're wearing
* sunglasses. Hit it.
*/
- page_move_anon_rmap(page, vma);
- unlock_page(page);
+ page_move_anon_rmap(vmf->page, vma);
+ folio_unlock(folio);
reuse:
if (unlikely(unshare)) {
pte_unmap_unlock(vmf->pte, vmf->ptl);
@@ -3612,11 +3618,11 @@ EXPORT_SYMBOL(unmap_mapping_range);
*/
static vm_fault_t remove_device_exclusive_entry(struct vm_fault *vmf)
{
- struct page *page = vmf->page;
+ struct folio *folio = page_folio(vmf->page);
struct vm_area_struct *vma = vmf->vma;
struct mmu_notifier_range range;
- if (!lock_page_or_retry(page, vma->vm_mm, vmf->flags))
+ if (!folio_lock_or_retry(folio, vma->vm_mm, vmf->flags))
return VM_FAULT_RETRY;
mmu_notifier_range_init_owner(&range, MMU_NOTIFY_EXCLUSIVE, 0, vma,
vma->vm_mm, vmf->address & PAGE_MASK,
@@ -3626,23 +3632,23 @@ static vm_fault_t remove_device_exclusive_entry(struct vm_fault *vmf)
vmf->pte = pte_offset_map_lock(vma->vm_mm, vmf->pmd, vmf->address,
&vmf->ptl);
if (likely(pte_same(*vmf->pte, vmf->orig_pte)))
- restore_exclusive_pte(vma, page, vmf->address, vmf->pte);
+ restore_exclusive_pte(vma, vmf->page, vmf->address, vmf->pte);
pte_unmap_unlock(vmf->pte, vmf->ptl);
- unlock_page(page);
+ folio_unlock(folio);
mmu_notifier_invalidate_range_end(&range);
return 0;
}
-static inline bool should_try_to_free_swap(struct page *page,
+static inline bool should_try_to_free_swap(struct folio *folio,
struct vm_area_struct *vma,
unsigned int fault_flags)
{
- if (!PageSwapCache(page))
+ if (!folio_test_swapcache(folio))
return false;
- if (mem_cgroup_swap_full(page) || (vma->vm_flags & VM_LOCKED) ||
- PageMlocked(page))
+ if (mem_cgroup_swap_full(folio) || (vma->vm_flags & VM_LOCKED) ||
+ folio_test_mlocked(folio))
return true;
/*
* If we want to map a page that's in the swapcache writable, we
@@ -3650,8 +3656,8 @@ static inline bool should_try_to_free_swap(struct page *page,
* user. Try freeing the swapcache to get rid of the swapcache
* reference only in case it's likely that we'll be the exlusive user.
*/
- return (fault_flags & FAULT_FLAG_WRITE) && !PageKsm(page) &&
- page_count(page) == 2;
+ return (fault_flags & FAULT_FLAG_WRITE) && !folio_test_ksm(folio) &&
+ folio_ref_count(folio) == 2;
}
static vm_fault_t pte_marker_clear(struct vm_fault *vmf)
@@ -3718,7 +3724,8 @@ static vm_fault_t handle_pte_marker(struct vm_fault *vmf)
vm_fault_t do_swap_page(struct vm_fault *vmf)
{
struct vm_area_struct *vma = vmf->vma;
- struct page *page = NULL, *swapcache;
+ struct folio *swapcache, *folio = NULL;
+ struct page *page;
struct swap_info_struct *si = NULL;
rmap_t rmap_flags = RMAP_NONE;
bool exclusive = false;
@@ -3760,21 +3767,25 @@ vm_fault_t do_swap_page(struct vm_fault *vmf)
if (unlikely(!si))
goto out;
- page = lookup_swap_cache(entry, vma, vmf->address);
- swapcache = page;
+ folio = swap_cache_get_folio(entry, vma, vmf->address);
+ if (folio)
+ page = folio_file_page(folio, swp_offset(entry));
+ swapcache = folio;
- if (!page) {
+ if (!folio) {
if (data_race(si->flags & SWP_SYNCHRONOUS_IO) &&
__swap_count(entry) == 1) {
/* skip swapcache */
- page = alloc_page_vma(GFP_HIGHUSER_MOVABLE, vma,
- vmf->address);
- if (page) {
- __SetPageLocked(page);
- __SetPageSwapBacked(page);
-
- if (mem_cgroup_swapin_charge_page(page,
- vma->vm_mm, GFP_KERNEL, entry)) {
+ folio = vma_alloc_folio(GFP_HIGHUSER_MOVABLE, 0,
+ vma, vmf->address, false);
+ page = &folio->page;
+ if (folio) {
+ __folio_set_locked(folio);
+ __folio_set_swapbacked(folio);
+
+ if (mem_cgroup_swapin_charge_folio(folio,
+ vma->vm_mm, GFP_KERNEL,
+ entry)) {
ret = VM_FAULT_OOM;
goto out_page;
}
@@ -3782,23 +3793,24 @@ vm_fault_t do_swap_page(struct vm_fault *vmf)
shadow = get_shadow_from_swap_cache(entry);
if (shadow)
- workingset_refault(page_folio(page),
- shadow);
+ workingset_refault(folio, shadow);
- lru_cache_add(page);
+ folio_add_lru(folio);
/* To provide entry to swap_readpage() */
- set_page_private(page, entry.val);
+ folio_set_swap_entry(folio, entry);
swap_readpage(page, true, NULL);
- set_page_private(page, 0);
+ folio->private = NULL;
}
} else {
page = swapin_readahead(entry, GFP_HIGHUSER_MOVABLE,
vmf);
- swapcache = page;
+ if (page)
+ folio = page_folio(page);
+ swapcache = folio;
}
- if (!page) {
+ if (!folio) {
/*
* Back out if somebody else faulted in this pte
* while we released the pte lock.
@@ -3823,7 +3835,7 @@ vm_fault_t do_swap_page(struct vm_fault *vmf)
goto out_release;
}
- locked = lock_page_or_retry(page, vma->vm_mm, vmf->flags);
+ locked = folio_lock_or_retry(folio, vma->vm_mm, vmf->flags);
if (!locked) {
ret |= VM_FAULT_RETRY;
@@ -3832,13 +3844,13 @@ vm_fault_t do_swap_page(struct vm_fault *vmf)
if (swapcache) {
/*
- * Make sure try_to_free_swap or swapoff did not release the
+ * Make sure folio_free_swap() or swapoff did not release the
* swapcache from under us. The page pin, and pte_same test
* below, are not enough to exclude that. Even if it is still
* swapcache, we need to check that the page's swap has not
* changed.
*/
- if (unlikely(!PageSwapCache(page) ||
+ if (unlikely(!folio_test_swapcache(folio) ||
page_private(page) != entry.val))
goto out_page;
@@ -3850,9 +3862,9 @@ vm_fault_t do_swap_page(struct vm_fault *vmf)
page = ksm_might_need_to_copy(page, vma, vmf->address);
if (unlikely(!page)) {
ret = VM_FAULT_OOM;
- page = swapcache;
goto out_page;
}
+ folio = page_folio(page);
/*
* If we want to map a page that's in the swapcache writable, we
@@ -3860,8 +3872,8 @@ vm_fault_t do_swap_page(struct vm_fault *vmf)
* owner. Try removing the extra reference from the local LRU
* pagevecs if required.
*/
- if ((vmf->flags & FAULT_FLAG_WRITE) && page == swapcache &&
- !PageKsm(page) && !PageLRU(page))
+ if ((vmf->flags & FAULT_FLAG_WRITE) && folio == swapcache &&
+ !folio_test_ksm(folio) && !folio_test_lru(folio))
lru_add_drain();
}
@@ -3875,7 +3887,7 @@ vm_fault_t do_swap_page(struct vm_fault *vmf)
if (unlikely(!pte_same(*vmf->pte, vmf->orig_pte)))
goto out_nomap;
- if (unlikely(!PageUptodate(page))) {
+ if (unlikely(!folio_test_uptodate(folio))) {
ret = VM_FAULT_SIGBUS;
goto out_nomap;
}
@@ -3888,26 +3900,26 @@ vm_fault_t do_swap_page(struct vm_fault *vmf)
* check after taking the PT lock and making sure that nobody
* concurrently faulted in this page and set PG_anon_exclusive.
*/
- BUG_ON(!PageAnon(page) && PageMappedToDisk(page));
- BUG_ON(PageAnon(page) && PageAnonExclusive(page));
+ BUG_ON(!folio_test_anon(folio) && folio_test_mappedtodisk(folio));
+ BUG_ON(folio_test_anon(folio) && PageAnonExclusive(page));
/*
* Check under PT lock (to protect against concurrent fork() sharing
* the swap entry concurrently) for certainly exclusive pages.
*/
- if (!PageKsm(page)) {
+ if (!folio_test_ksm(folio)) {
/*
* Note that pte_swp_exclusive() == false for architectures
* without __HAVE_ARCH_PTE_SWP_EXCLUSIVE.
*/
exclusive = pte_swp_exclusive(vmf->orig_pte);
- if (page != swapcache) {
+ if (folio != swapcache) {
/*
* We have a fresh page that is not exposed to the
* swapcache -> certainly exclusive.
*/
exclusive = true;
- } else if (exclusive && PageWriteback(page) &&
+ } else if (exclusive && folio_test_writeback(folio) &&
data_race(si->flags & SWP_STABLE_WRITES)) {
/*
* This is tricky: not all swap backends support
@@ -3937,8 +3949,8 @@ vm_fault_t do_swap_page(struct vm_fault *vmf)
* yet.
*/
swap_free(entry);
- if (should_try_to_free_swap(page, vma, vmf->flags))
- try_to_free_swap(page);
+ if (should_try_to_free_swap(folio, vma, vmf->flags))
+ folio_free_swap(folio);
inc_mm_counter_fast(vma->vm_mm, MM_ANONPAGES);
dec_mm_counter_fast(vma->vm_mm, MM_SWAPENTS);
@@ -3950,7 +3962,8 @@ vm_fault_t do_swap_page(struct vm_fault *vmf)
* exposing them to the swapcache or because the swap entry indicates
* exclusivity.
*/
- if (!PageKsm(page) && (exclusive || page_count(page) == 1)) {
+ if (!folio_test_ksm(folio) &&
+ (exclusive || folio_ref_count(folio) == 1)) {
if (vmf->flags & FAULT_FLAG_WRITE) {
pte = maybe_mkwrite(pte_mkdirty(pte), vma);
vmf->flags &= ~FAULT_FLAG_WRITE;
@@ -3968,19 +3981,20 @@ vm_fault_t do_swap_page(struct vm_fault *vmf)
vmf->orig_pte = pte;
/* ksm created a completely new copy */
- if (unlikely(page != swapcache && swapcache)) {
+ if (unlikely(folio != swapcache && swapcache)) {
page_add_new_anon_rmap(page, vma, vmf->address);
- lru_cache_add_inactive_or_unevictable(page, vma);
+ folio_add_lru_vma(folio, vma);
} else {
page_add_anon_rmap(page, vma, vmf->address, rmap_flags);
}
- VM_BUG_ON(!PageAnon(page) || (pte_write(pte) && !PageAnonExclusive(page)));
+ VM_BUG_ON(!folio_test_anon(folio) ||
+ (pte_write(pte) && !PageAnonExclusive(page)));
set_pte_at(vma->vm_mm, vmf->address, vmf->pte, pte);
arch_do_swap_page(vma->vm_mm, vma, vmf->address, pte, vmf->orig_pte);
- unlock_page(page);
- if (page != swapcache && swapcache) {
+ folio_unlock(folio);
+ if (folio != swapcache && swapcache) {
/*
* Hold the lock to avoid the swap entry to be reused
* until we take the PT lock for the pte_same() check
@@ -3989,8 +4003,8 @@ vm_fault_t do_swap_page(struct vm_fault *vmf)
* so that the swap count won't change under a
* parallel locked swapcache.
*/
- unlock_page(swapcache);
- put_page(swapcache);
+ folio_unlock(swapcache);
+ folio_put(swapcache);
}
if (vmf->flags & FAULT_FLAG_WRITE) {
@@ -4011,12 +4025,12 @@ out:
out_nomap:
pte_unmap_unlock(vmf->pte, vmf->ptl);
out_page:
- unlock_page(page);
+ folio_unlock(folio);
out_release:
- put_page(page);
- if (page != swapcache && swapcache) {
- unlock_page(swapcache);
- put_page(swapcache);
+ folio_put(folio);
+ if (folio != swapcache && swapcache) {
+ folio_unlock(swapcache);
+ folio_put(swapcache);
}
if (si)
put_swap_device(si);
@@ -4731,8 +4745,16 @@ static vm_fault_t do_numa_page(struct vm_fault *vmf)
if (page_mapcount(page) > 1 && (vma->vm_flags & VM_SHARED))
flags |= TNF_SHARED;
- last_cpupid = page_cpupid_last(page);
page_nid = page_to_nid(page);
+ /*
+ * For memory tiering mode, cpupid of slow memory page is used
+ * to record page access time. So use default value.
+ */
+ if ((sysctl_numa_balancing_mode & NUMA_BALANCING_MEMORY_TIERING) &&
+ !node_is_toptier(page_nid))
+ last_cpupid = (-1 & LAST_CPUPID_MASK);
+ else
+ last_cpupid = page_cpupid_last(page);
target_nid = numa_migrate_prep(page, vma, vmf->address, page_nid,
&flags);
if (target_nid == NUMA_NO_NODE) {
@@ -4991,7 +5013,7 @@ static vm_fault_t __handle_mm_fault(struct vm_area_struct *vma,
return VM_FAULT_OOM;
retry_pud:
if (pud_none(*vmf.pud) &&
- hugepage_vma_check(vma, vm_flags, false, true)) {
+ hugepage_vma_check(vma, vm_flags, false, true, true)) {
ret = create_huge_pud(&vmf);
if (!(ret & VM_FAULT_FALLBACK))
return ret;
@@ -5025,7 +5047,7 @@ retry_pud:
goto retry_pud;
if (pmd_none(*vmf.pmd) &&
- hugepage_vma_check(vma, vm_flags, false, true)) {
+ hugepage_vma_check(vma, vm_flags, false, true, true)) {
ret = create_huge_pmd(&vmf);
if (!(ret & VM_FAULT_FALLBACK))
return ret;
@@ -5120,6 +5142,27 @@ static inline void mm_account_fault(struct pt_regs *regs,
perf_sw_event(PERF_COUNT_SW_PAGE_FAULTS_MIN, 1, regs, address);
}
+#ifdef CONFIG_LRU_GEN
+static void lru_gen_enter_fault(struct vm_area_struct *vma)
+{
+ /* the LRU algorithm doesn't apply to sequential or random reads */
+ current->in_lru_fault = !(vma->vm_flags & (VM_SEQ_READ | VM_RAND_READ));
+}
+
+static void lru_gen_exit_fault(void)
+{
+ current->in_lru_fault = false;
+}
+#else
+static void lru_gen_enter_fault(struct vm_area_struct *vma)
+{
+}
+
+static void lru_gen_exit_fault(void)
+{
+}
+#endif /* CONFIG_LRU_GEN */
+
/*
* By the time we get here, we already hold the mm semaphore
*
@@ -5151,11 +5194,15 @@ vm_fault_t handle_mm_fault(struct vm_area_struct *vma, unsigned long address,
if (flags & FAULT_FLAG_USER)
mem_cgroup_enter_user_fault();
+ lru_gen_enter_fault(vma);
+
if (unlikely(is_vm_hugetlb_page(vma)))
ret = hugetlb_fault(vma->vm_mm, vma, address, flags);
else
ret = __handle_mm_fault(vma, address, flags);
+ lru_gen_exit_fault();
+
if (flags & FAULT_FLAG_USER) {
mem_cgroup_exit_user_fault();
/*
@@ -5643,11 +5690,11 @@ static void clear_gigantic_page(struct page *page,
unsigned int pages_per_huge_page)
{
int i;
- struct page *p = page;
+ struct page *p;
might_sleep();
- for (i = 0; i < pages_per_huge_page;
- i++, p = mem_map_next(p, page, i)) {
+ for (i = 0; i < pages_per_huge_page; i++) {
+ p = nth_page(page, i);
cond_resched();
clear_user_highpage(p, addr + i * PAGE_SIZE);
}
@@ -5683,13 +5730,12 @@ static void copy_user_gigantic_page(struct page *dst, struct page *src,
struct page *dst_base = dst;
struct page *src_base = src;
- for (i = 0; i < pages_per_huge_page; ) {
+ for (i = 0; i < pages_per_huge_page; i++) {
+ dst = nth_page(dst_base, i);
+ src = nth_page(src_base, i);
+
cond_resched();
copy_user_highpage(dst, src, addr + i*PAGE_SIZE, vma);
-
- i++;
- dst = mem_map_next(dst, dst_base, i);
- src = mem_map_next(src, src_base, i);
}
}
@@ -5736,10 +5782,10 @@ long copy_huge_page_from_user(struct page *dst_page,
void *page_kaddr;
unsigned long i, rc = 0;
unsigned long ret_val = pages_per_huge_page * PAGE_SIZE;
- struct page *subpage = dst_page;
+ struct page *subpage;
- for (i = 0; i < pages_per_huge_page;
- i++, subpage = mem_map_next(subpage, dst_page, i)) {
+ for (i = 0; i < pages_per_huge_page; i++) {
+ subpage = nth_page(dst_page, i);
if (allow_pagefault)
page_kaddr = kmap(subpage);
else