summaryrefslogtreecommitdiffstats
path: root/mm/mempolicy.c
diff options
context:
space:
mode:
authorLinus Torvalds <torvalds@linux-foundation.org>2022-05-26 12:32:41 -0700
committerLinus Torvalds <torvalds@linux-foundation.org>2022-05-26 12:32:41 -0700
commit98931dd95fd489fcbfa97da563505a6f071d7c77 (patch)
tree44683fc4a92efa614acdca2742a7ff19d26da1e3 /mm/mempolicy.c
parentdf202b452fe6c6d6f1351bad485e2367ef1e644e (diff)
parentf403f22f8ccb12860b2b62fec3173c6ccd45938b (diff)
downloadlinux-98931dd95fd489fcbfa97da563505a6f071d7c77.tar.gz
linux-98931dd95fd489fcbfa97da563505a6f071d7c77.tar.bz2
linux-98931dd95fd489fcbfa97da563505a6f071d7c77.zip
Merge tag 'mm-stable-2022-05-25' of git://git.kernel.org/pub/scm/linux/kernel/git/akpm/mm
Pull MM updates from Andrew Morton: "Almost all of MM here. A few things are still getting finished off, reviewed, etc. - Yang Shi has improved the behaviour of khugepaged collapsing of readonly file-backed transparent hugepages. - Johannes Weiner has arranged for zswap memory use to be tracked and managed on a per-cgroup basis. - Munchun Song adds a /proc knob ("hugetlb_optimize_vmemmap") for runtime enablement of the recent huge page vmemmap optimization feature. - Baolin Wang contributes a series to fix some issues around hugetlb pagetable invalidation. - Zhenwei Pi has fixed some interactions between hwpoisoned pages and virtualization. - Tong Tiangen has enabled the use of the presently x86-only page_table_check debugging feature on arm64 and riscv. - David Vernet has done some fixup work on the memcg selftests. - Peter Xu has taught userfaultfd to handle write protection faults against shmem- and hugetlbfs-backed files. - More DAMON development from SeongJae Park - adding online tuning of the feature and support for monitoring of fixed virtual address ranges. Also easier discovery of which monitoring operations are available. - Nadav Amit has done some optimization of TLB flushing during mprotect(). - Neil Brown continues to labor away at improving our swap-over-NFS support. - David Hildenbrand has some fixes to anon page COWing versus get_user_pages(). - Peng Liu fixed some errors in the core hugetlb code. - Joao Martins has reduced the amount of memory consumed by device-dax's compound devmaps. - Some cleanups of the arch-specific pagemap code from Anshuman Khandual. - Muchun Song has found and fixed some errors in the TLB flushing of transparent hugepages. - Roman Gushchin has done more work on the memcg selftests. ... and, of course, many smaller fixes and cleanups. Notably, the customary million cleanup serieses from Miaohe Lin" * tag 'mm-stable-2022-05-25' of git://git.kernel.org/pub/scm/linux/kernel/git/akpm/mm: (381 commits) mm: kfence: use PAGE_ALIGNED helper selftests: vm: add the "settings" file with timeout variable selftests: vm: add "test_hmm.sh" to TEST_FILES selftests: vm: check numa_available() before operating "merge_across_nodes" in ksm_tests selftests: vm: add migration to the .gitignore selftests/vm/pkeys: fix typo in comment ksm: fix typo in comment selftests: vm: add process_mrelease tests Revert "mm/vmscan: never demote for memcg reclaim" mm/kfence: print disabling or re-enabling message include/trace/events/percpu.h: cleanup for "percpu: improve percpu_alloc_percpu event trace" include/trace/events/mmflags.h: cleanup for "tracing: incorrect gfp_t conversion" mm: fix a potential infinite loop in start_isolate_page_range() MAINTAINERS: add Muchun as co-maintainer for HugeTLB zram: fix Kconfig dependency warning mm/shmem: fix shmem folio swapoff hang cgroup: fix an error handling path in alloc_pagecache_max_30M() mm: damon: use HPAGE_PMD_SIZE tracing: incorrect isolate_mote_t cast in mm_vmscan_lru_isolate nodemask.h: fix compilation error with GCC12 ...
Diffstat (limited to 'mm/mempolicy.c')
-rw-r--r--mm/mempolicy.c74
1 files changed, 37 insertions, 37 deletions
diff --git a/mm/mempolicy.c b/mm/mempolicy.c
index 8c74107a2b15..d39b01fd52fe 100644
--- a/mm/mempolicy.c
+++ b/mm/mempolicy.c
@@ -104,6 +104,7 @@
#include <linux/swapops.h>
#include <asm/tlbflush.h>
+#include <asm/tlb.h>
#include <linux/uaccess.h>
#include "internal.h"
@@ -350,7 +351,7 @@ static void mpol_rebind_preferred(struct mempolicy *pol,
*/
static void mpol_rebind_policy(struct mempolicy *pol, const nodemask_t *newmask)
{
- if (!pol)
+ if (!pol || pol->mode == MPOL_LOCAL)
return;
if (!mpol_store_user_nodemask(pol) &&
nodes_equal(pol->w.cpuset_mems_allowed, *newmask))
@@ -440,12 +441,11 @@ static inline bool queue_pages_required(struct page *page,
}
/*
- * queue_pages_pmd() has four possible return values:
+ * queue_pages_pmd() has three possible return values:
* 0 - pages are placed on the right node or queued successfully, or
* special page is met, i.e. huge zero page.
* 1 - there is unmovable page, and MPOL_MF_MOVE* & MPOL_MF_STRICT were
* specified.
- * 2 - THP was split.
* -EIO - is migration entry or only MPOL_MF_STRICT was specified and an
* existing page was already on a node that does not follow the
* policy.
@@ -507,18 +507,13 @@ static int queue_pages_pte_range(pmd_t *pmd, unsigned long addr,
struct page *page;
struct queue_pages *qp = walk->private;
unsigned long flags = qp->flags;
- int ret;
bool has_unmovable = false;
pte_t *pte, *mapped_pte;
spinlock_t *ptl;
ptl = pmd_trans_huge_lock(pmd, vma);
- if (ptl) {
- ret = queue_pages_pmd(pmd, ptl, addr, end, walk);
- if (ret != 2)
- return ret;
- }
- /* THP was split, fall through to pte walk */
+ if (ptl)
+ return queue_pages_pmd(pmd, ptl, addr, end, walk);
if (pmd_trans_unstable(pmd))
return 0;
@@ -636,12 +631,18 @@ unlock:
unsigned long change_prot_numa(struct vm_area_struct *vma,
unsigned long addr, unsigned long end)
{
+ struct mmu_gather tlb;
int nr_updated;
- nr_updated = change_protection(vma, addr, end, PAGE_NONE, MM_CP_PROT_NUMA);
+ tlb_gather_mmu(&tlb, vma->vm_mm);
+
+ nr_updated = change_protection(&tlb, vma, addr, end, PAGE_NONE,
+ MM_CP_PROT_NUMA);
if (nr_updated)
count_vm_numa_events(NUMA_PTE_UPDATES, nr_updated);
+ tlb_finish_mmu(&tlb);
+
return nr_updated;
}
#else
@@ -2135,44 +2136,55 @@ static struct page *alloc_pages_preferred_many(gfp_t gfp, unsigned int order,
}
/**
- * alloc_pages_vma - Allocate a page for a VMA.
+ * vma_alloc_folio - Allocate a folio for a VMA.
* @gfp: GFP flags.
- * @order: Order of the GFP allocation.
+ * @order: Order of the folio.
* @vma: Pointer to VMA or NULL if not available.
* @addr: Virtual address of the allocation. Must be inside @vma.
* @hugepage: For hugepages try only the preferred node if possible.
*
- * Allocate a page for a specific address in @vma, using the appropriate
+ * Allocate a folio for a specific address in @vma, using the appropriate
* NUMA policy. When @vma is not NULL the caller must hold the mmap_lock
* of the mm_struct of the VMA to prevent it from going away. Should be
- * used for all allocations for pages that will be mapped into user space.
+ * used for all allocations for folios that will be mapped into user space.
*
- * Return: The page on success or NULL if allocation fails.
+ * Return: The folio on success or NULL if allocation fails.
*/
-struct page *alloc_pages_vma(gfp_t gfp, int order, struct vm_area_struct *vma,
+struct folio *vma_alloc_folio(gfp_t gfp, int order, struct vm_area_struct *vma,
unsigned long addr, bool hugepage)
{
struct mempolicy *pol;
int node = numa_node_id();
- struct page *page;
+ struct folio *folio;
int preferred_nid;
nodemask_t *nmask;
pol = get_vma_policy(vma, addr);
if (pol->mode == MPOL_INTERLEAVE) {
+ struct page *page;
unsigned nid;
nid = interleave_nid(pol, vma, addr, PAGE_SHIFT + order);
mpol_cond_put(pol);
+ gfp |= __GFP_COMP;
page = alloc_page_interleave(gfp, order, nid);
+ if (page && order > 1)
+ prep_transhuge_page(page);
+ folio = (struct folio *)page;
goto out;
}
if (pol->mode == MPOL_PREFERRED_MANY) {
+ struct page *page;
+
node = policy_node(gfp, pol, node);
+ gfp |= __GFP_COMP;
page = alloc_pages_preferred_many(gfp, order, node, pol);
mpol_cond_put(pol);
+ if (page && order > 1)
+ prep_transhuge_page(page);
+ folio = (struct folio *)page;
goto out;
}
@@ -2199,8 +2211,8 @@ struct page *alloc_pages_vma(gfp_t gfp, int order, struct vm_area_struct *vma,
* First, try to allocate THP only on local node, but
* don't reclaim unnecessarily, just compact.
*/
- page = __alloc_pages_node(hpage_node,
- gfp | __GFP_THISNODE | __GFP_NORETRY, order);
+ folio = __folio_alloc_node(gfp | __GFP_THISNODE |
+ __GFP_NORETRY, order, hpage_node);
/*
* If hugepage allocations are configured to always
@@ -2208,8 +2220,9 @@ struct page *alloc_pages_vma(gfp_t gfp, int order, struct vm_area_struct *vma,
* to prefer hugepage backing, retry allowing remote
* memory with both reclaim and compact as well.
*/
- if (!page && (gfp & __GFP_DIRECT_RECLAIM))
- page = __alloc_pages(gfp, order, hpage_node, nmask);
+ if (!folio && (gfp & __GFP_DIRECT_RECLAIM))
+ folio = __folio_alloc(gfp, order, hpage_node,
+ nmask);
goto out;
}
@@ -2217,25 +2230,12 @@ struct page *alloc_pages_vma(gfp_t gfp, int order, struct vm_area_struct *vma,
nmask = policy_nodemask(gfp, pol);
preferred_nid = policy_node(gfp, pol, node);
- page = __alloc_pages(gfp, order, preferred_nid, nmask);
+ folio = __folio_alloc(gfp, order, preferred_nid, nmask);
mpol_cond_put(pol);
out:
- return page;
-}
-EXPORT_SYMBOL(alloc_pages_vma);
-
-struct folio *vma_alloc_folio(gfp_t gfp, int order, struct vm_area_struct *vma,
- unsigned long addr, bool hugepage)
-{
- struct folio *folio;
-
- folio = (struct folio *)alloc_pages_vma(gfp, order, vma, addr,
- hugepage);
- if (folio && order > 1)
- prep_transhuge_page(&folio->page);
-
return folio;
}
+EXPORT_SYMBOL(vma_alloc_folio);
/**
* alloc_pages - Allocate pages.