diff options
author | Linus Torvalds <torvalds@linux-foundation.org> | 2022-01-15 20:37:06 +0200 |
---|---|---|
committer | Linus Torvalds <torvalds@linux-foundation.org> | 2022-01-15 20:37:06 +0200 |
commit | f56caedaf94f9ced5dbfcdb0060a3e788d2078af (patch) | |
tree | e213532d1b3d32f9f0e81948f3b23804baff287d /mm/mempolicy.c | |
parent | a33f5c380c4bd3fa5278d690421b72052456d9fe (diff) | |
parent | 76fd0285b447991267e838842c0be7395eb454bb (diff) | |
download | linux-f56caedaf94f9ced5dbfcdb0060a3e788d2078af.tar.gz linux-f56caedaf94f9ced5dbfcdb0060a3e788d2078af.tar.bz2 linux-f56caedaf94f9ced5dbfcdb0060a3e788d2078af.zip |
Merge branch 'akpm' (patches from Andrew)
Merge misc updates from Andrew Morton:
"146 patches.
Subsystems affected by this patch series: kthread, ia64, scripts,
ntfs, squashfs, ocfs2, vfs, and mm (slab-generic, slab, kmemleak,
dax, kasan, debug, pagecache, gup, shmem, frontswap, memremap,
memcg, selftests, pagemap, dma, vmalloc, memory-failure, hugetlb,
userfaultfd, vmscan, mempolicy, oom-kill, hugetlbfs, migration, thp,
ksm, page-poison, percpu, rmap, zswap, zram, cleanups, hmm, and
damon)"
* emailed patches from Andrew Morton <akpm@linux-foundation.org>: (146 commits)
mm/damon: hide kernel pointer from tracepoint event
mm/damon/vaddr: hide kernel pointer from damon_va_three_regions() failure log
mm/damon/vaddr: use pr_debug() for damon_va_three_regions() failure logging
mm/damon/dbgfs: remove an unnecessary variable
mm/damon: move the implementation of damon_insert_region to damon.h
mm/damon: add access checking for hugetlb pages
Docs/admin-guide/mm/damon/usage: update for schemes statistics
mm/damon/dbgfs: support all DAMOS stats
Docs/admin-guide/mm/damon/reclaim: document statistics parameters
mm/damon/reclaim: provide reclamation statistics
mm/damon/schemes: account how many times quota limit has exceeded
mm/damon/schemes: account scheme actions that successfully applied
mm/damon: remove a mistakenly added comment for a future feature
Docs/admin-guide/mm/damon/usage: update for kdamond_pid and (mk|rm)_contexts
Docs/admin-guide/mm/damon/usage: mention tracepoint at the beginning
Docs/admin-guide/mm/damon/usage: remove redundant information
Docs/admin-guide/mm/damon/usage: update for scheme quotas and watermarks
mm/damon: convert macro functions to static inline functions
mm/damon: modify damon_rand() macro to static inline function
mm/damon: move damon_rand() definition into damon.h
...
Diffstat (limited to 'mm/mempolicy.c')
-rw-r--r-- | mm/mempolicy.c | 95 |
1 files changed, 89 insertions, 6 deletions
diff --git a/mm/mempolicy.c b/mm/mempolicy.c index f6248affaf38..028e8dd82b44 100644 --- a/mm/mempolicy.c +++ b/mm/mempolicy.c @@ -134,6 +134,8 @@ static struct mempolicy preferred_node_policy[MAX_NUMNODES]; * @node: Node id to start the search * * Lookup the next closest node by distance if @nid is not online. + * + * Return: this @node if it is online, otherwise the closest node by distance */ int numa_map_to_online_node(int node) { @@ -296,6 +298,7 @@ static struct mempolicy *mpol_new(unsigned short mode, unsigned short flags, atomic_set(&policy->refcnt, 1); policy->mode = mode; policy->flags = flags; + policy->home_node = NUMA_NO_NODE; return policy; } @@ -810,7 +813,8 @@ static int mbind_range(struct mm_struct *mm, unsigned long start, ((vmstart - vma->vm_start) >> PAGE_SHIFT); prev = vma_merge(mm, prev, vmstart, vmend, vma->vm_flags, vma->anon_vma, vma->vm_file, pgoff, - new_pol, vma->vm_userfaultfd_ctx); + new_pol, vma->vm_userfaultfd_ctx, + vma_anon_name(vma)); if (prev) { vma = prev; next = vma->vm_next; @@ -1477,6 +1481,77 @@ static long kernel_mbind(unsigned long start, unsigned long len, return do_mbind(start, len, lmode, mode_flags, &nodes, flags); } +SYSCALL_DEFINE4(set_mempolicy_home_node, unsigned long, start, unsigned long, len, + unsigned long, home_node, unsigned long, flags) +{ + struct mm_struct *mm = current->mm; + struct vm_area_struct *vma; + struct mempolicy *new; + unsigned long vmstart; + unsigned long vmend; + unsigned long end; + int err = -ENOENT; + + start = untagged_addr(start); + if (start & ~PAGE_MASK) + return -EINVAL; + /* + * flags is used for future extension if any. + */ + if (flags != 0) + return -EINVAL; + + /* + * Check home_node is online to avoid accessing uninitialized + * NODE_DATA. + */ + if (home_node >= MAX_NUMNODES || !node_online(home_node)) + return -EINVAL; + + len = (len + PAGE_SIZE - 1) & PAGE_MASK; + end = start + len; + + if (end < start) + return -EINVAL; + if (end == start) + return 0; + mmap_write_lock(mm); + vma = find_vma(mm, start); + for (; vma && vma->vm_start < end; vma = vma->vm_next) { + + vmstart = max(start, vma->vm_start); + vmend = min(end, vma->vm_end); + new = mpol_dup(vma_policy(vma)); + if (IS_ERR(new)) { + err = PTR_ERR(new); + break; + } + /* + * Only update home node if there is an existing vma policy + */ + if (!new) + continue; + + /* + * If any vma in the range got policy other than MPOL_BIND + * or MPOL_PREFERRED_MANY we return error. We don't reset + * the home node for vmas we already updated before. + */ + if (new->mode != MPOL_BIND && new->mode != MPOL_PREFERRED_MANY) { + err = -EOPNOTSUPP; + break; + } + + new->home_node = home_node; + err = mbind_range(mm, vmstart, vmend, new); + mpol_put(new); + if (err) + break; + } + mmap_write_unlock(mm); + return err; +} + SYSCALL_DEFINE6(mbind, unsigned long, start, unsigned long, len, unsigned long, mode, const unsigned long __user *, nmask, unsigned long, maxnode, unsigned int, flags) @@ -1801,6 +1876,11 @@ static int policy_node(gfp_t gfp, struct mempolicy *policy, int nd) WARN_ON_ONCE(policy->mode == MPOL_BIND && (gfp & __GFP_THISNODE)); } + if ((policy->mode == MPOL_BIND || + policy->mode == MPOL_PREFERRED_MANY) && + policy->home_node != NUMA_NO_NODE) + return policy->home_node; + return nd; } @@ -2061,7 +2141,7 @@ static struct page *alloc_pages_preferred_many(gfp_t gfp, unsigned int order, preferred_gfp &= ~(__GFP_DIRECT_RECLAIM | __GFP_NOFAIL); page = __alloc_pages(preferred_gfp, order, nid, &pol->nodes); if (!page) - page = __alloc_pages(gfp, order, numa_node_id(), NULL); + page = __alloc_pages(gfp, order, nid, NULL); return page; } @@ -2072,7 +2152,6 @@ static struct page *alloc_pages_preferred_many(gfp_t gfp, unsigned int order, * @order: Order of the GFP allocation. * @vma: Pointer to VMA or NULL if not available. * @addr: Virtual address of the allocation. Must be inside @vma. - * @node: Which node to prefer for allocation (modulo policy). * @hugepage: For hugepages try only the preferred node if possible. * * Allocate a page for a specific address in @vma, using the appropriate @@ -2083,9 +2162,10 @@ static struct page *alloc_pages_preferred_many(gfp_t gfp, unsigned int order, * Return: The page on success or NULL if allocation fails. */ struct page *alloc_pages_vma(gfp_t gfp, int order, struct vm_area_struct *vma, - unsigned long addr, int node, bool hugepage) + unsigned long addr, bool hugepage) { struct mempolicy *pol; + int node = numa_node_id(); struct page *page; int preferred_nid; nodemask_t *nmask; @@ -2102,6 +2182,7 @@ struct page *alloc_pages_vma(gfp_t gfp, int order, struct vm_area_struct *vma, } if (pol->mode == MPOL_PREFERRED_MANY) { + node = policy_node(gfp, pol, node); page = alloc_pages_preferred_many(gfp, order, node, pol); mpol_cond_put(pol); goto out; @@ -2185,7 +2266,7 @@ struct page *alloc_pages(gfp_t gfp, unsigned order) page = alloc_page_interleave(gfp, order, interleave_nodes(pol)); else if (pol->mode == MPOL_PREFERRED_MANY) page = alloc_pages_preferred_many(gfp, order, - numa_node_id(), pol); + policy_node(gfp, pol, numa_node_id()), pol); else page = __alloc_pages(gfp, order, policy_node(gfp, pol, numa_node_id()), @@ -2341,6 +2422,8 @@ bool __mpol_equal(struct mempolicy *a, struct mempolicy *b) return false; if (a->flags != b->flags) return false; + if (a->home_node != b->home_node) + return false; if (mpol_store_user_nodemask(a)) if (!nodes_equal(a->w.user_nodemask, b->w.user_nodemask)) return false; @@ -2884,7 +2967,7 @@ static const char * const policy_modes[] = * Format of input: * <mode>[=<flags>][:<nodelist>] * - * On success, returns 0, else 1 + * Return: %0 on success, else %1 */ int mpol_parse_str(char *str, struct mempolicy **mpol) { |