summaryrefslogtreecommitdiffstats
path: root/mm/mempolicy.c
diff options
context:
space:
mode:
authorLinus Torvalds <torvalds@linux-foundation.org>2022-01-15 20:37:06 +0200
committerLinus Torvalds <torvalds@linux-foundation.org>2022-01-15 20:37:06 +0200
commitf56caedaf94f9ced5dbfcdb0060a3e788d2078af (patch)
treee213532d1b3d32f9f0e81948f3b23804baff287d /mm/mempolicy.c
parenta33f5c380c4bd3fa5278d690421b72052456d9fe (diff)
parent76fd0285b447991267e838842c0be7395eb454bb (diff)
downloadlinux-f56caedaf94f9ced5dbfcdb0060a3e788d2078af.tar.gz
linux-f56caedaf94f9ced5dbfcdb0060a3e788d2078af.tar.bz2
linux-f56caedaf94f9ced5dbfcdb0060a3e788d2078af.zip
Merge branch 'akpm' (patches from Andrew)
Merge misc updates from Andrew Morton: "146 patches. Subsystems affected by this patch series: kthread, ia64, scripts, ntfs, squashfs, ocfs2, vfs, and mm (slab-generic, slab, kmemleak, dax, kasan, debug, pagecache, gup, shmem, frontswap, memremap, memcg, selftests, pagemap, dma, vmalloc, memory-failure, hugetlb, userfaultfd, vmscan, mempolicy, oom-kill, hugetlbfs, migration, thp, ksm, page-poison, percpu, rmap, zswap, zram, cleanups, hmm, and damon)" * emailed patches from Andrew Morton <akpm@linux-foundation.org>: (146 commits) mm/damon: hide kernel pointer from tracepoint event mm/damon/vaddr: hide kernel pointer from damon_va_three_regions() failure log mm/damon/vaddr: use pr_debug() for damon_va_three_regions() failure logging mm/damon/dbgfs: remove an unnecessary variable mm/damon: move the implementation of damon_insert_region to damon.h mm/damon: add access checking for hugetlb pages Docs/admin-guide/mm/damon/usage: update for schemes statistics mm/damon/dbgfs: support all DAMOS stats Docs/admin-guide/mm/damon/reclaim: document statistics parameters mm/damon/reclaim: provide reclamation statistics mm/damon/schemes: account how many times quota limit has exceeded mm/damon/schemes: account scheme actions that successfully applied mm/damon: remove a mistakenly added comment for a future feature Docs/admin-guide/mm/damon/usage: update for kdamond_pid and (mk|rm)_contexts Docs/admin-guide/mm/damon/usage: mention tracepoint at the beginning Docs/admin-guide/mm/damon/usage: remove redundant information Docs/admin-guide/mm/damon/usage: update for scheme quotas and watermarks mm/damon: convert macro functions to static inline functions mm/damon: modify damon_rand() macro to static inline function mm/damon: move damon_rand() definition into damon.h ...
Diffstat (limited to 'mm/mempolicy.c')
-rw-r--r--mm/mempolicy.c95
1 files changed, 89 insertions, 6 deletions
diff --git a/mm/mempolicy.c b/mm/mempolicy.c
index f6248affaf38..028e8dd82b44 100644
--- a/mm/mempolicy.c
+++ b/mm/mempolicy.c
@@ -134,6 +134,8 @@ static struct mempolicy preferred_node_policy[MAX_NUMNODES];
* @node: Node id to start the search
*
* Lookup the next closest node by distance if @nid is not online.
+ *
+ * Return: this @node if it is online, otherwise the closest node by distance
*/
int numa_map_to_online_node(int node)
{
@@ -296,6 +298,7 @@ static struct mempolicy *mpol_new(unsigned short mode, unsigned short flags,
atomic_set(&policy->refcnt, 1);
policy->mode = mode;
policy->flags = flags;
+ policy->home_node = NUMA_NO_NODE;
return policy;
}
@@ -810,7 +813,8 @@ static int mbind_range(struct mm_struct *mm, unsigned long start,
((vmstart - vma->vm_start) >> PAGE_SHIFT);
prev = vma_merge(mm, prev, vmstart, vmend, vma->vm_flags,
vma->anon_vma, vma->vm_file, pgoff,
- new_pol, vma->vm_userfaultfd_ctx);
+ new_pol, vma->vm_userfaultfd_ctx,
+ vma_anon_name(vma));
if (prev) {
vma = prev;
next = vma->vm_next;
@@ -1477,6 +1481,77 @@ static long kernel_mbind(unsigned long start, unsigned long len,
return do_mbind(start, len, lmode, mode_flags, &nodes, flags);
}
+SYSCALL_DEFINE4(set_mempolicy_home_node, unsigned long, start, unsigned long, len,
+ unsigned long, home_node, unsigned long, flags)
+{
+ struct mm_struct *mm = current->mm;
+ struct vm_area_struct *vma;
+ struct mempolicy *new;
+ unsigned long vmstart;
+ unsigned long vmend;
+ unsigned long end;
+ int err = -ENOENT;
+
+ start = untagged_addr(start);
+ if (start & ~PAGE_MASK)
+ return -EINVAL;
+ /*
+ * flags is used for future extension if any.
+ */
+ if (flags != 0)
+ return -EINVAL;
+
+ /*
+ * Check home_node is online to avoid accessing uninitialized
+ * NODE_DATA.
+ */
+ if (home_node >= MAX_NUMNODES || !node_online(home_node))
+ return -EINVAL;
+
+ len = (len + PAGE_SIZE - 1) & PAGE_MASK;
+ end = start + len;
+
+ if (end < start)
+ return -EINVAL;
+ if (end == start)
+ return 0;
+ mmap_write_lock(mm);
+ vma = find_vma(mm, start);
+ for (; vma && vma->vm_start < end; vma = vma->vm_next) {
+
+ vmstart = max(start, vma->vm_start);
+ vmend = min(end, vma->vm_end);
+ new = mpol_dup(vma_policy(vma));
+ if (IS_ERR(new)) {
+ err = PTR_ERR(new);
+ break;
+ }
+ /*
+ * Only update home node if there is an existing vma policy
+ */
+ if (!new)
+ continue;
+
+ /*
+ * If any vma in the range got policy other than MPOL_BIND
+ * or MPOL_PREFERRED_MANY we return error. We don't reset
+ * the home node for vmas we already updated before.
+ */
+ if (new->mode != MPOL_BIND && new->mode != MPOL_PREFERRED_MANY) {
+ err = -EOPNOTSUPP;
+ break;
+ }
+
+ new->home_node = home_node;
+ err = mbind_range(mm, vmstart, vmend, new);
+ mpol_put(new);
+ if (err)
+ break;
+ }
+ mmap_write_unlock(mm);
+ return err;
+}
+
SYSCALL_DEFINE6(mbind, unsigned long, start, unsigned long, len,
unsigned long, mode, const unsigned long __user *, nmask,
unsigned long, maxnode, unsigned int, flags)
@@ -1801,6 +1876,11 @@ static int policy_node(gfp_t gfp, struct mempolicy *policy, int nd)
WARN_ON_ONCE(policy->mode == MPOL_BIND && (gfp & __GFP_THISNODE));
}
+ if ((policy->mode == MPOL_BIND ||
+ policy->mode == MPOL_PREFERRED_MANY) &&
+ policy->home_node != NUMA_NO_NODE)
+ return policy->home_node;
+
return nd;
}
@@ -2061,7 +2141,7 @@ static struct page *alloc_pages_preferred_many(gfp_t gfp, unsigned int order,
preferred_gfp &= ~(__GFP_DIRECT_RECLAIM | __GFP_NOFAIL);
page = __alloc_pages(preferred_gfp, order, nid, &pol->nodes);
if (!page)
- page = __alloc_pages(gfp, order, numa_node_id(), NULL);
+ page = __alloc_pages(gfp, order, nid, NULL);
return page;
}
@@ -2072,7 +2152,6 @@ static struct page *alloc_pages_preferred_many(gfp_t gfp, unsigned int order,
* @order: Order of the GFP allocation.
* @vma: Pointer to VMA or NULL if not available.
* @addr: Virtual address of the allocation. Must be inside @vma.
- * @node: Which node to prefer for allocation (modulo policy).
* @hugepage: For hugepages try only the preferred node if possible.
*
* Allocate a page for a specific address in @vma, using the appropriate
@@ -2083,9 +2162,10 @@ static struct page *alloc_pages_preferred_many(gfp_t gfp, unsigned int order,
* Return: The page on success or NULL if allocation fails.
*/
struct page *alloc_pages_vma(gfp_t gfp, int order, struct vm_area_struct *vma,
- unsigned long addr, int node, bool hugepage)
+ unsigned long addr, bool hugepage)
{
struct mempolicy *pol;
+ int node = numa_node_id();
struct page *page;
int preferred_nid;
nodemask_t *nmask;
@@ -2102,6 +2182,7 @@ struct page *alloc_pages_vma(gfp_t gfp, int order, struct vm_area_struct *vma,
}
if (pol->mode == MPOL_PREFERRED_MANY) {
+ node = policy_node(gfp, pol, node);
page = alloc_pages_preferred_many(gfp, order, node, pol);
mpol_cond_put(pol);
goto out;
@@ -2185,7 +2266,7 @@ struct page *alloc_pages(gfp_t gfp, unsigned order)
page = alloc_page_interleave(gfp, order, interleave_nodes(pol));
else if (pol->mode == MPOL_PREFERRED_MANY)
page = alloc_pages_preferred_many(gfp, order,
- numa_node_id(), pol);
+ policy_node(gfp, pol, numa_node_id()), pol);
else
page = __alloc_pages(gfp, order,
policy_node(gfp, pol, numa_node_id()),
@@ -2341,6 +2422,8 @@ bool __mpol_equal(struct mempolicy *a, struct mempolicy *b)
return false;
if (a->flags != b->flags)
return false;
+ if (a->home_node != b->home_node)
+ return false;
if (mpol_store_user_nodemask(a))
if (!nodes_equal(a->w.user_nodemask, b->w.user_nodemask))
return false;
@@ -2884,7 +2967,7 @@ static const char * const policy_modes[] =
* Format of input:
* <mode>[=<flags>][:<nodelist>]
*
- * On success, returns 0, else 1
+ * Return: %0 on success, else %1
*/
int mpol_parse_str(char *str, struct mempolicy **mpol)
{