summaryrefslogtreecommitdiffstats
path: root/mm
diff options
context:
space:
mode:
Diffstat (limited to 'mm')
-rw-r--r--mm/debug.c10
-rw-r--r--mm/debug_vm_pgtable.c1
-rw-r--r--mm/filemap.c46
-rw-r--r--mm/frame_vector.c6
-rw-r--r--mm/gup.c73
-rw-r--r--mm/hmm.c2
-rw-r--r--mm/huge_memory.c8
-rw-r--r--mm/hugetlb.c3
-rw-r--r--mm/init-mm.c4
-rw-r--r--mm/internal.h6
-rw-r--r--mm/khugepaged.c72
-rw-r--r--mm/ksm.c48
-rw-r--r--mm/maccess.c278
-rw-r--r--mm/madvise.c40
-rw-r--r--mm/memcontrol.c10
-rw-r--r--mm/memory.c61
-rw-r--r--mm/mempolicy.c36
-rw-r--r--mm/migrate.c16
-rw-r--r--mm/mincore.c6
-rw-r--r--mm/mlock.c22
-rw-r--r--mm/mmap.c74
-rw-r--r--mm/mmu_gather.c2
-rw-r--r--mm/mmu_notifier.c22
-rw-r--r--mm/mprotect.c22
-rw-r--r--mm/mremap.c14
-rw-r--r--mm/msync.c8
-rw-r--r--mm/nommu.c22
-rw-r--r--mm/oom_kill.c14
-rw-r--r--mm/page_io.c1
-rw-r--r--mm/page_reporting.h2
-rw-r--r--mm/pagewalk.c12
-rw-r--r--mm/pgtable-generic.c6
-rw-r--r--mm/process_vm_access.c4
-rw-r--r--mm/ptdump.c4
-rw-r--r--mm/rmap.c12
-rw-r--r--mm/shmem.c5
-rw-r--r--mm/sparse-vmemmap.c1
-rw-r--r--mm/sparse.c1
-rw-r--r--mm/swap_state.c5
-rw-r--r--mm/swapfile.c5
-rw-r--r--mm/userfaultfd.c26
-rw-r--r--mm/util.c12
-rw-r--r--mm/vmacache.c1
-rw-r--r--mm/zsmalloc.c2
44 files changed, 529 insertions, 496 deletions
diff --git a/mm/debug.c b/mm/debug.c
index f2ede2df585a..b5b1de8c71ac 100644
--- a/mm/debug.c
+++ b/mm/debug.c
@@ -120,9 +120,9 @@ void __dump_page(struct page *page, const char *reason)
* mapping can be invalid pointer and we don't want to crash
* accessing it, so probe everything depending on it carefully
*/
- if (probe_kernel_read_strict(&host, &mapping->host,
- sizeof(struct inode *)) ||
- probe_kernel_read_strict(&a_ops, &mapping->a_ops,
+ if (probe_kernel_read(&host, &mapping->host,
+ sizeof(struct inode *)) ||
+ probe_kernel_read(&a_ops, &mapping->a_ops,
sizeof(struct address_space_operations *))) {
pr_warn("failed to read mapping->host or a_ops, mapping not a valid kernel address?\n");
goto out_mapping;
@@ -133,7 +133,7 @@ void __dump_page(struct page *page, const char *reason)
goto out_mapping;
}
- if (probe_kernel_read_strict(&dentry_first,
+ if (probe_kernel_read(&dentry_first,
&host->i_dentry.first, sizeof(struct hlist_node *))) {
pr_warn("mapping->a_ops:%ps with invalid mapping->host inode address %px\n",
a_ops, host);
@@ -146,7 +146,7 @@ void __dump_page(struct page *page, const char *reason)
}
dentry_ptr = container_of(dentry_first, struct dentry, d_u.d_alias);
- if (probe_kernel_read_strict(&dentry, dentry_ptr,
+ if (probe_kernel_read(&dentry, dentry_ptr,
sizeof(struct dentry))) {
pr_warn("mapping->aops:%ps with invalid mapping->host->i_dentry.first %px\n",
a_ops, dentry_ptr);
diff --git a/mm/debug_vm_pgtable.c b/mm/debug_vm_pgtable.c
index 188c18908964..9ec59c38d6a2 100644
--- a/mm/debug_vm_pgtable.c
+++ b/mm/debug_vm_pgtable.c
@@ -28,7 +28,6 @@
#include <linux/start_kernel.h>
#include <linux/sched/mm.h>
#include <asm/pgalloc.h>
-#include <asm/pgtable.h>
#define VMFLAGS (VM_READ|VM_WRITE|VM_EXEC)
diff --git a/mm/filemap.c b/mm/filemap.c
index b1a41890d80e..f0ae9a6308cb 100644
--- a/mm/filemap.c
+++ b/mm/filemap.c
@@ -76,16 +76,16 @@
* ->i_mutex
* ->i_mmap_rwsem (truncate->unmap_mapping_range)
*
- * ->mmap_sem
+ * ->mmap_lock
* ->i_mmap_rwsem
* ->page_table_lock or pte_lock (various, mainly in memory.c)
* ->i_pages lock (arch-dependent flush_dcache_mmap_lock)
*
- * ->mmap_sem
+ * ->mmap_lock
* ->lock_page (access_process_vm)
*
* ->i_mutex (generic_perform_write)
- * ->mmap_sem (fault_in_pages_readable->do_page_fault)
+ * ->mmap_lock (fault_in_pages_readable->do_page_fault)
*
* bdi->wb.list_lock
* sb_lock (fs/fs-writeback.c)
@@ -1371,27 +1371,27 @@ EXPORT_SYMBOL_GPL(__lock_page_killable);
/*
* Return values:
- * 1 - page is locked; mmap_sem is still held.
+ * 1 - page is locked; mmap_lock is still held.
* 0 - page is not locked.
- * mmap_sem has been released (up_read()), unless flags had both
+ * mmap_lock has been released (mmap_read_unlock(), unless flags had both
* FAULT_FLAG_ALLOW_RETRY and FAULT_FLAG_RETRY_NOWAIT set, in
- * which case mmap_sem is still held.
+ * which case mmap_lock is still held.
*
* If neither ALLOW_RETRY nor KILLABLE are set, will always return 1
- * with the page locked and the mmap_sem unperturbed.
+ * with the page locked and the mmap_lock unperturbed.
*/
int __lock_page_or_retry(struct page *page, struct mm_struct *mm,
unsigned int flags)
{
if (fault_flag_allow_retry_first(flags)) {
/*
- * CAUTION! In this case, mmap_sem is not released
+ * CAUTION! In this case, mmap_lock is not released
* even though return 0.
*/
if (flags & FAULT_FLAG_RETRY_NOWAIT)
return 0;
- up_read(&mm->mmap_sem);
+ mmap_read_unlock(mm);
if (flags & FAULT_FLAG_KILLABLE)
wait_on_page_locked_killable(page);
else
@@ -1403,7 +1403,7 @@ int __lock_page_or_retry(struct page *page, struct mm_struct *mm,
ret = __lock_page_killable(page);
if (ret) {
- up_read(&mm->mmap_sem);
+ mmap_read_unlock(mm);
return 0;
}
} else
@@ -2313,14 +2313,14 @@ EXPORT_SYMBOL(generic_file_read_iter);
#ifdef CONFIG_MMU
#define MMAP_LOTSAMISS (100)
/*
- * lock_page_maybe_drop_mmap - lock the page, possibly dropping the mmap_sem
+ * lock_page_maybe_drop_mmap - lock the page, possibly dropping the mmap_lock
* @vmf - the vm_fault for this fault.
* @page - the page to lock.
* @fpin - the pointer to the file we may pin (or is already pinned).
*
- * This works similar to lock_page_or_retry in that it can drop the mmap_sem.
+ * This works similar to lock_page_or_retry in that it can drop the mmap_lock.
* It differs in that it actually returns the page locked if it returns 1 and 0
- * if it couldn't lock the page. If we did have to drop the mmap_sem then fpin
+ * if it couldn't lock the page. If we did have to drop the mmap_lock then fpin
* will point to the pinned file and needs to be fput()'ed at a later point.
*/
static int lock_page_maybe_drop_mmap(struct vm_fault *vmf, struct page *page,
@@ -2331,7 +2331,7 @@ static int lock_page_maybe_drop_mmap(struct vm_fault *vmf, struct page *page,
/*
* NOTE! This will make us return with VM_FAULT_RETRY, but with
- * the mmap_sem still held. That's how FAULT_FLAG_RETRY_NOWAIT
+ * the mmap_lock still held. That's how FAULT_FLAG_RETRY_NOWAIT
* is supposed to work. We have way too many special cases..
*/
if (vmf->flags & FAULT_FLAG_RETRY_NOWAIT)
@@ -2341,13 +2341,13 @@ static int lock_page_maybe_drop_mmap(struct vm_fault *vmf, struct page *page,
if (vmf->flags & FAULT_FLAG_KILLABLE) {
if (__lock_page_killable(page)) {
/*
- * We didn't have the right flags to drop the mmap_sem,
+ * We didn't have the right flags to drop the mmap_lock,
* but all fault_handlers only check for fatal signals
* if we return VM_FAULT_RETRY, so we need to drop the
- * mmap_sem here and return 0 if we don't have a fpin.
+ * mmap_lock here and return 0 if we don't have a fpin.
*/
if (*fpin == NULL)
- up_read(&vmf->vma->vm_mm->mmap_sem);
+ mmap_read_unlock(vmf->vma->vm_mm);
return 0;
}
} else
@@ -2409,7 +2409,7 @@ static struct file *do_sync_mmap_readahead(struct vm_fault *vmf)
/*
* Asynchronous readahead happens when we find the page and PG_readahead,
* so we want to possibly extend the readahead further. We return the file that
- * was pinned if we have to drop the mmap_sem in order to do IO.
+ * was pinned if we have to drop the mmap_lock in order to do IO.
*/
static struct file *do_async_mmap_readahead(struct vm_fault *vmf,
struct page *page)
@@ -2444,12 +2444,12 @@ static struct file *do_async_mmap_readahead(struct vm_fault *vmf,
* it in the page cache, and handles the special cases reasonably without
* having a lot of duplicated code.
*
- * vma->vm_mm->mmap_sem must be held on entry.
+ * vma->vm_mm->mmap_lock must be held on entry.
*
- * If our return value has VM_FAULT_RETRY set, it's because the mmap_sem
+ * If our return value has VM_FAULT_RETRY set, it's because the mmap_lock
* may be dropped before doing I/O or by lock_page_maybe_drop_mmap().
*
- * If our return value does not have VM_FAULT_RETRY set, the mmap_sem
+ * If our return value does not have VM_FAULT_RETRY set, the mmap_lock
* has not been released.
*
* We never return with VM_FAULT_RETRY and a bit from VM_FAULT_ERROR set.
@@ -2519,7 +2519,7 @@ retry_find:
goto page_not_uptodate;
/*
- * We've made it this far and we had to drop our mmap_sem, now is the
+ * We've made it this far and we had to drop our mmap_lock, now is the
* time to return to the upper layer and have it re-find the vma and
* redo the fault.
*/
@@ -2569,7 +2569,7 @@ page_not_uptodate:
out_retry:
/*
- * We dropped the mmap_sem, we need to return to the fault handler to
+ * We dropped the mmap_lock, we need to return to the fault handler to
* re-find the vma and come back and find our hopefully still populated
* page.
*/
diff --git a/mm/frame_vector.c b/mm/frame_vector.c
index 4107dbca0056..10f82d5643b6 100644
--- a/mm/frame_vector.c
+++ b/mm/frame_vector.c
@@ -29,7 +29,7 @@
* different type underlying the specified range of virtual addresses.
* When the function isn't able to map a single page, it returns error.
*
- * This function takes care of grabbing mmap_sem as necessary.
+ * This function takes care of grabbing mmap_lock as necessary.
*/
int get_vaddr_frames(unsigned long start, unsigned int nr_frames,
unsigned int gup_flags, struct frame_vector *vec)
@@ -48,7 +48,7 @@ int get_vaddr_frames(unsigned long start, unsigned int nr_frames,
start = untagged_addr(start);
- down_read(&mm->mmap_sem);
+ mmap_read_lock(mm);
locked = 1;
vma = find_vma_intersection(mm, start, start + 1);
if (!vma) {
@@ -102,7 +102,7 @@ int get_vaddr_frames(unsigned long start, unsigned int nr_frames,
} while (vma && vma->vm_flags & (VM_IO | VM_PFNMAP));
out:
if (locked)
- up_read(&mm->mmap_sem);
+ mmap_read_unlock(mm);
if (!ret)
ret = -EFAULT;
if (ret > 0)
diff --git a/mm/gup.c b/mm/gup.c
index 8bd090b36d1d..de9e36262ccb 100644
--- a/mm/gup.c
+++ b/mm/gup.c
@@ -19,7 +19,6 @@
#include <linux/sched/mm.h>
#include <asm/mmu_context.h>
-#include <asm/pgtable.h>
#include <asm/tlbflush.h>
#include "internal.h"
@@ -593,7 +592,7 @@ retry:
pmdval = READ_ONCE(*pmd);
/*
* MADV_DONTNEED may convert the pmd to null because
- * mmap_sem is held in read mode
+ * mmap_lock is held in read mode
*/
if (pmd_none(pmdval))
return no_page_table(vma, flags);
@@ -856,8 +855,8 @@ unmap:
}
/*
- * mmap_sem must be held on entry. If @locked != NULL and *@flags
- * does not include FOLL_NOWAIT, the mmap_sem may be released. If it
+ * mmap_lock must be held on entry. If @locked != NULL and *@flags
+ * does not include FOLL_NOWAIT, the mmap_lock may be released. If it
* is, *@locked will be set to 0 and -EBUSY returned.
*/
static int faultin_page(struct task_struct *tsk, struct vm_area_struct *vma,
@@ -980,7 +979,7 @@ static int check_vma_flags(struct vm_area_struct *vma, unsigned long gup_flags)
* only intends to ensure the pages are faulted in.
* @vmas: array of pointers to vmas corresponding to each page.
* Or NULL if the caller does not require them.
- * @locked: whether we're still with the mmap_sem held
+ * @locked: whether we're still with the mmap_lock held
*
* Returns either number of pages pinned (which may be less than the
* number requested), or an error. Details about the return value:
@@ -993,9 +992,9 @@ static int check_vma_flags(struct vm_area_struct *vma, unsigned long gup_flags)
*
* The caller is responsible for releasing returned @pages, via put_page().
*
- * @vmas are valid only as long as mmap_sem is held.
+ * @vmas are valid only as long as mmap_lock is held.
*
- * Must be called with mmap_sem held. It may be released. See below.
+ * Must be called with mmap_lock held. It may be released. See below.
*
* __get_user_pages walks a process's page tables and takes a reference to
* each struct page that each user address corresponds to at a given
@@ -1016,12 +1015,12 @@ static int check_vma_flags(struct vm_area_struct *vma, unsigned long gup_flags)
* appropriate) must be called after the page is finished with, and
* before put_page is called.
*
- * If @locked != NULL, *@locked will be set to 0 when mmap_sem is
+ * If @locked != NULL, *@locked will be set to 0 when mmap_lock is
* released by an up_read(). That can happen if @gup_flags does not
* have FOLL_NOWAIT.
*
* A caller using such a combination of @locked and @gup_flags
- * must therefore hold the mmap_sem for reading only, and recognize
+ * must therefore hold the mmap_lock for reading only, and recognize
* when it's been released. Otherwise, it must be held for either
* reading or writing and will not be released.
*
@@ -1084,7 +1083,7 @@ static long __get_user_pages(struct task_struct *tsk, struct mm_struct *mm,
if (locked && *locked == 0) {
/*
* We've got a VM_FAULT_RETRY
- * and we've lost mmap_sem.
+ * and we've lost mmap_lock.
* We must stop here.
*/
BUG_ON(gup_flags & FOLL_NOWAIT);
@@ -1191,7 +1190,7 @@ static bool vma_permits_fault(struct vm_area_struct *vma,
* @mm: mm_struct of target mm
* @address: user address
* @fault_flags:flags to pass down to handle_mm_fault()
- * @unlocked: did we unlock the mmap_sem while retrying, maybe NULL if caller
+ * @unlocked: did we unlock the mmap_lock while retrying, maybe NULL if caller
* does not allow retry. If NULL, the caller must guarantee
* that fault_flags does not contain FAULT_FLAG_ALLOW_RETRY.
*
@@ -1212,8 +1211,8 @@ static bool vma_permits_fault(struct vm_area_struct *vma,
* such architectures, gup() will not be enough to make a subsequent access
* succeed.
*
- * This function will not return with an unlocked mmap_sem. So it has not the
- * same semantics wrt the @mm->mmap_sem as does filemap_fault().
+ * This function will not return with an unlocked mmap_lock. So it has not the
+ * same semantics wrt the @mm->mmap_lock as does filemap_fault().
*/
int fixup_user_fault(struct task_struct *tsk, struct mm_struct *mm,
unsigned long address, unsigned int fault_flags,
@@ -1250,7 +1249,7 @@ retry:
}
if (ret & VM_FAULT_RETRY) {
- down_read(&mm->mmap_sem);
+ mmap_read_lock(mm);
*unlocked = true;
fault_flags |= FAULT_FLAG_TRIED;
goto retry;
@@ -1355,7 +1354,7 @@ retry:
break;
}
- ret = down_read_killable(&mm->mmap_sem);
+ ret = mmap_read_lock_killable(mm);
if (ret) {
BUG_ON(ret > 0);
if (!pages_done)
@@ -1390,7 +1389,7 @@ retry:
* We must let the caller know we temporarily dropped the lock
* and so the critical section protected by it was lost.
*/
- up_read(&mm->mmap_sem);
+ mmap_read_unlock(mm);
*locked = 0;
}
return pages_done;
@@ -1401,13 +1400,13 @@ retry:
* @vma: target vma
* @start: start address
* @end: end address
- * @locked: whether the mmap_sem is still held
+ * @locked: whether the mmap_lock is still held
*
* This takes care of mlocking the pages too if VM_LOCKED is set.
*
* return 0 on success, negative error code on error.
*
- * vma->vm_mm->mmap_sem must be held.
+ * vma->vm_mm->mmap_lock must be held.
*
* If @locked is NULL, it may be held for read or write and will
* be unperturbed.
@@ -1426,7 +1425,7 @@ long populate_vma_page_range(struct vm_area_struct *vma,
VM_BUG_ON(end & ~PAGE_MASK);
VM_BUG_ON_VMA(start < vma->vm_start, vma);
VM_BUG_ON_VMA(end > vma->vm_end, vma);
- VM_BUG_ON_MM(!rwsem_is_locked(&mm->mmap_sem), mm);
+ mmap_assert_locked(mm);
gup_flags = FOLL_TOUCH | FOLL_POPULATE | FOLL_MLOCK;
if (vma->vm_flags & VM_LOCKONFAULT)
@@ -1459,7 +1458,7 @@ long populate_vma_page_range(struct vm_area_struct *vma,
*
* This is used to implement mlock() and the MAP_POPULATE / MAP_LOCKED mmap
* flags. VMAs must be already marked with the desired vm_flags, and
- * mmap_sem must not be held.
+ * mmap_lock must not be held.
*/
int __mm_populate(unsigned long start, unsigned long len, int ignore_errors)
{
@@ -1478,7 +1477,7 @@ int __mm_populate(unsigned long start, unsigned long len, int ignore_errors)
*/
if (!locked) {
locked = 1;
- down_read(&mm->mmap_sem);
+ mmap_read_lock(mm);
vma = find_vma(mm, nstart);
} else if (nstart >= vma->vm_end)
vma = vma->vm_next;
@@ -1510,7 +1509,7 @@ int __mm_populate(unsigned long start, unsigned long len, int ignore_errors)
ret = 0;
}
if (locked)
- up_read(&mm->mmap_sem);
+ mmap_read_unlock(mm);
return ret; /* 0 or negative error code */
}
@@ -1526,7 +1525,7 @@ int __mm_populate(unsigned long start, unsigned long len, int ignore_errors)
* NULL wherever the ZERO_PAGE, or an anonymous pte_none, has been found -
* allowing a hole to be left in the corefile to save diskspace.
*
- * Called without mmap_sem, but after all other threads have been killed.
+ * Called without mmap_lock, but after all other threads have been killed.
*/
#ifdef CONFIG_ELF_CORE
struct page *get_dump_page(unsigned long addr)
@@ -1887,9 +1886,9 @@ static long __get_user_pages_remote(struct task_struct *tsk,
*
* The caller is responsible for releasing returned @pages, via put_page().
*
- * @vmas are valid only as long as mmap_sem is held.
+ * @vmas are valid only as long as mmap_lock is held.
*
- * Must be called with mmap_sem held for read or write.
+ * Must be called with mmap_lock held for read or write.
*
* get_user_pages_remote walks a process's page tables and takes a reference
* to each struct page that each user address corresponds to at a given
@@ -1994,19 +1993,19 @@ EXPORT_SYMBOL(get_user_pages);
/**
* get_user_pages_locked() is suitable to replace the form:
*
- * down_read(&mm->mmap_sem);
+ * mmap_read_lock(mm);
* do_something()
* get_user_pages(tsk, mm, ..., pages, NULL);
- * up_read(&mm->mmap_sem);
+ * mmap_read_unlock(mm);
*
* to:
*
* int locked = 1;
- * down_read(&mm->mmap_sem);
+ * mmap_read_lock(mm);
* do_something()
* get_user_pages_locked(tsk, mm, ..., pages, &locked);
* if (locked)
- * up_read(&mm->mmap_sem);
+ * mmap_read_unlock(mm);
*
* @start: starting user address
* @nr_pages: number of pages from start to pin
@@ -2051,9 +2050,9 @@ EXPORT_SYMBOL(get_user_pages_locked);
/*
* get_user_pages_unlocked() is suitable to replace the form:
*
- * down_read(&mm->mmap_sem);
+ * mmap_read_lock(mm);
* get_user_pages(tsk, mm, ..., pages, NULL);
- * up_read(&mm->mmap_sem);
+ * mmap_read_unlock(mm);
*
* with:
*
@@ -2079,11 +2078,11 @@ long get_user_pages_unlocked(unsigned long start, unsigned long nr_pages,
if (WARN_ON_ONCE(gup_flags & FOLL_LONGTERM))
return -EINVAL;
- down_read(&mm->mmap_sem);
+ mmap_read_lock(mm);
ret = __get_user_pages_locked(current, mm, start, nr_pages, pages, NULL,
&locked, gup_flags | FOLL_TOUCH);
if (locked)
- up_read(&mm->mmap_sem);
+ mmap_read_unlock(mm);
return ret;
}
EXPORT_SYMBOL(get_user_pages_unlocked);
@@ -2724,11 +2723,11 @@ static int __gup_longterm_unlocked(unsigned long start, int nr_pages,
* get_user_pages_unlocked() (see comments in that function)
*/
if (gup_flags & FOLL_LONGTERM) {
- down_read(&current->mm->mmap_sem);
+ mmap_read_lock(current->mm);
ret = __gup_longterm_locked(current, current->mm,
start, nr_pages,
pages, NULL, gup_flags);
- up_read(&current->mm->mmap_sem);
+ mmap_read_unlock(current->mm);
} else {
ret = get_user_pages_unlocked(start, nr_pages,
pages, gup_flags);
@@ -2751,7 +2750,7 @@ static int internal_get_user_pages_fast(unsigned long start, int nr_pages,
return -EINVAL;
if (!(gup_flags & FOLL_FAST_ONLY))
- might_lock_read(&current->mm->mmap_sem);
+ might_lock_read(&current->mm->mmap_lock);
start = untagged_addr(start) & PAGE_MASK;
addr = start;
@@ -2874,7 +2873,7 @@ EXPORT_SYMBOL_GPL(get_user_pages_fast_only);
* @pages: array that receives pointers to the pages pinned.
* Should be at least nr_pages long.
*
- * Attempt to pin user pages in memory without taking mm->mmap_sem.
+ * Attempt to pin user pages in memory without taking mm->mmap_lock.
* If not successful, it will fall back to taking the lock and
* calling get_user_pages().
*
diff --git a/mm/hmm.c b/mm/hmm.c
index 41673a6d8d46..e9a545751108 100644
--- a/mm/hmm.c
+++ b/mm/hmm.c
@@ -563,7 +563,7 @@ int hmm_range_fault(struct hmm_range *range)
struct mm_struct *mm = range->notifier->mm;
int ret;
- lockdep_assert_held(&mm->mmap_sem);
+ mmap_assert_locked(mm);
do {
/* If range is no longer valid force retry. */
diff --git a/mm/huge_memory.c b/mm/huge_memory.c
index d9b2e0e0580a..78c84bee7e29 100644
--- a/mm/huge_memory.c
+++ b/mm/huge_memory.c
@@ -1746,7 +1746,7 @@ bool move_huge_pmd(struct vm_area_struct *vma, unsigned long old_addr,
/*
* We don't have to worry about the ordering of src and dst
- * ptlocks because exclusive mmap_sem prevents deadlock.
+ * ptlocks because exclusive mmap_lock prevents deadlock.
*/
old_ptl = __pmd_trans_huge_lock(old_pmd, vma);
if (old_ptl) {
@@ -1833,9 +1833,9 @@ int change_huge_pmd(struct vm_area_struct *vma, pmd_t *pmd,
goto unlock;
/*
- * In case prot_numa, we are under down_read(mmap_sem). It's critical
+ * In case prot_numa, we are under mmap_read_lock(mm). It's critical
* to not clear pmd intermittently to avoid race with MADV_DONTNEED
- * which is also under down_read(mmap_sem):
+ * which is also under mmap_read_lock(mm):
*
* CPU0: CPU1:
* change_huge_pmd(prot_numa=1)
@@ -2618,7 +2618,7 @@ int split_huge_page_to_list(struct page *page, struct list_head *list)
if (PageAnon(head)) {
/*
- * The caller does not necessarily hold an mmap_sem that would
+ * The caller does not necessarily hold an mmap_lock that would
* prevent the anon_vma disappearing so we first we take a
* reference to it and then lock the anon_vma for write. This
* is similar to page_lock_anon_vma_read except the write lock
diff --git a/mm/hugetlb.c b/mm/hugetlb.c
index dcb34d7f5562..57ece74e3aae 100644
--- a/mm/hugetlb.c
+++ b/mm/hugetlb.c
@@ -31,7 +31,6 @@
#include <linux/cma.h>
#include <asm/page.h>
-#include <asm/pgtable.h>
#include <asm/tlb.h>
#include <linux/io.h>
@@ -4696,7 +4695,7 @@ int hugetlb_mcopy_atomic_pte(struct mm_struct *dst_mm,
(const void __user *) src_addr,
pages_per_huge_page(h), false);
- /* fallback to copy_from_user outside mmap_sem */
+ /* fallback to copy_from_user outside mmap_lock */
if (unlikely(ret)) {
ret = -ENOENT;
*pagep = page;
diff --git a/mm/init-mm.c b/mm/init-mm.c
index 19603302a77f..3a613c85f9ed 100644
--- a/mm/init-mm.c
+++ b/mm/init-mm.c
@@ -6,10 +6,10 @@
#include <linux/list.h>
#include <linux/cpumask.h>
#include <linux/mman.h>
+#include <linux/pgtable.h>
#include <linux/atomic.h>
#include <linux/user_namespace.h>
-#include <asm/pgtable.h>
#include <asm/mmu.h>
#ifndef INIT_MM_CONTEXT
@@ -31,7 +31,7 @@ struct mm_struct init_mm = {
.pgd = swapper_pg_dir,
.mm_users = ATOMIC_INIT(2),
.mm_count = ATOMIC_INIT(1),
- .mmap_sem = __RWSEM_INITIALIZER(init_mm.mmap_sem),
+ MMAP_LOCK_INITIALIZER(init_mm)
.page_table_lock = __SPIN_LOCK_UNLOCKED(init_mm.page_table_lock),
.arg_lock = __SPIN_LOCK_UNLOCKED(init_mm.arg_lock),
.mmlist = LIST_HEAD_INIT(init_mm.mmlist),
diff --git a/mm/internal.h b/mm/internal.h
index 791e4b5a807c..9886db20d94f 100644
--- a/mm/internal.h
+++ b/mm/internal.h
@@ -344,7 +344,7 @@ static inline void munlock_vma_pages_all(struct vm_area_struct *vma)
}
/*
- * must be called with vma's mmap_sem held for read or write, and page locked.
+ * must be called with vma's mmap_lock held for read or write, and page locked.
*/
extern void mlock_vma_page(struct page *page);
extern unsigned int munlock_vma_page(struct page *page);
@@ -413,13 +413,13 @@ static inline struct file *maybe_unlock_mmap_for_io(struct vm_fault *vmf,
/*
* FAULT_FLAG_RETRY_NOWAIT means we don't want to wait on page locks or
- * anything, so we only pin the file and drop the mmap_sem if only
+ * anything, so we only pin the file and drop the mmap_lock if only
* FAULT_FLAG_ALLOW_RETRY is set, while this is the first attempt.
*/
if (fault_flag_allow_retry_first(flags) &&
!(flags & FAULT_FLAG_RETRY_NOWAIT)) {
fpin = get_file(vmf->vma->vm_file);
- up_read(&vmf->vma->vm_mm->mmap_sem);
+ mmap_read_unlock(vmf->vma->vm_mm);
}
return fpin;
}
diff --git a/mm/khugepaged.c b/mm/khugepaged.c
index 3f032487825b..b043c40a21d4 100644
--- a/mm/khugepaged.c
+++ b/mm/khugepaged.c
@@ -534,10 +534,10 @@ void __khugepaged_exit(struct mm_struct *mm)
* under mmap sem read mode). Stop here (after we
* return all pagetables will be destroyed) until
* khugepaged has finished working on the pagetables
- * under the mmap_sem.
+ * under the mmap_lock.
*/
- down_write(&mm->mmap_sem);
- up_write(&mm->mmap_sem);
+ mmap_write_lock(mm);
+ mmap_write_unlock(mm);
}
}
@@ -933,8 +933,8 @@ khugepaged_alloc_page(struct page **hpage, gfp_t gfp, int node)
#endif
/*
- * If mmap_sem temporarily dropped, revalidate vma
- * before taking mmap_sem.
+ * If mmap_lock temporarily dropped, revalidate vma
+ * before taking mmap_lock.
* Return 0 if succeeds, otherwise return none-zero
* value (scan code).
*/
@@ -966,7 +966,7 @@ static int hugepage_vma_revalidate(struct mm_struct *mm, unsigned long address,
* Only done if khugepaged_scan_pmd believes it is worthwhile.
*
* Called and returns without pte mapped or spinlocks held,
- * but with mmap_sem held to protect against vma changes.
+ * but with mmap_lock held to protect against vma changes.
*/
static bool __collapse_huge_page_swapin(struct mm_struct *mm,
@@ -993,9 +993,9 @@ static bool __collapse_huge_page_swapin(struct mm_struct *mm,
swapped_in++;
ret = do_swap_page(&vmf);
- /* do_swap_page returns VM_FAULT_RETRY with released mmap_sem */
+ /* do_swap_page returns VM_FAULT_RETRY with released mmap_lock */
if (ret & VM_FAULT_RETRY) {
- down_read(&mm->mmap_sem);
+ mmap_read_lock(mm);
if (hugepage_vma_revalidate(mm, address, &vmf.vma)) {
/* vma is no longer available, don't continue to swapin */
trace_mm_collapse_huge_page_swapin(mm, swapped_in, referenced, 0);
@@ -1047,12 +1047,12 @@ static void collapse_huge_page(struct mm_struct *mm,
gfp = alloc_hugepage_khugepaged_gfpmask() | __GFP_THISNODE;
/*
- * Before allocating the hugepage, release the mmap_sem read lock.
+ * Before allocating the hugepage, release the mmap_lock read lock.
* The allocation can take potentially a long time if it involves
- * sync compaction, and we do not need to hold the mmap_sem during
+ * sync compaction, and we do not need to hold the mmap_lock during
* that. We will recheck the vma after taking it again in write mode.
*/
- up_read(&mm->mmap_sem);
+ mmap_read_unlock(mm);
new_page = khugepaged_alloc_page(hpage, gfp, node);
if (!new_page) {
result = SCAN_ALLOC_HUGE_PAGE_FAIL;
@@ -1065,38 +1065,38 @@ static void collapse_huge_page(struct mm_struct *mm,
}
count_memcg_page_event(new_page, THP_COLLAPSE_ALLOC);
- down_read(&mm->mmap_sem);
+ mmap_read_lock(mm);
result = hugepage_vma_revalidate(mm, address, &vma);
if (result) {
- up_read(&mm->mmap_sem);
+ mmap_read_unlock(mm);
goto out_nolock;
}
pmd = mm_find_pmd(mm, address);
if (!pmd) {
result = SCAN_PMD_NULL;
- up_read(&mm->mmap_sem);
+ mmap_read_unlock(mm);
goto out_nolock;
}
/*
- * __collapse_huge_page_swapin always returns with mmap_sem locked.
- * If it fails, we release mmap_sem and jump out_nolock.
+ * __collapse_huge_page_swapin always returns with mmap_lock locked.
+ * If it fails, we release mmap_lock and jump out_nolock.
* Continuing to collapse causes inconsistency.
*/
if (unmapped && !__collapse_huge_page_swapin(mm, vma, address,
pmd, referenced)) {
- up_read(&mm->mmap_sem);
+ mmap_read_unlock(mm);
goto out_nolock;
}
- up_read(&mm->mmap_sem);
+ mmap_read_unlock(mm);
/*
* Prevent all access to pagetables with the exception of
* gup_fast later handled by the ptep_clear_flush and the VM
* handled by the anon_vma lock + PG_lock.
*/
- down_write(&mm->mmap_sem);
+ mmap_write_lock(mm);
result = SCAN_ANY_PROCESS;
if (!mmget_still_valid(mm))
goto out;
@@ -1184,7 +1184,7 @@ static void collapse_huge_page(struct mm_struct *mm,
khugepaged_pages_collapsed++;
result = SCAN_SUCCEED;
out_up_write:
- up_write(&mm->mmap_sem);
+ mmap_write_unlock(mm);
out_nolock:
if (!IS_ERR_OR_NULL(*hpage))
mem_cgroup_uncharge(*hpage);
@@ -1345,7 +1345,7 @@ out_unmap:
pte_unmap_unlock(pte, ptl);
if (ret) {
node = khugepaged_find_target_node();
- /* collapse_huge_page will return with the mmap_sem released */
+ /* collapse_huge_page will return with the mmap_lock released */
collapse_huge_page(mm, address, hpage, node,
referenced, unmapped);
}
@@ -1517,7 +1517,7 @@ static int khugepaged_collapse_pte_mapped_thps(struct mm_slot *mm_slot)
if (likely(mm_slot->nr_pte_mapped_thp == 0))
return 0;
- if (!down_write_trylock(&mm->mmap_sem))
+ if (!mmap_write_trylock(mm))
return -EBUSY;
if (unlikely(khugepaged_test_exit(mm)))
@@ -1528,7 +1528,7 @@ static int khugepaged_collapse_pte_mapped_thps(struct mm_slot *mm_slot)
out:
mm_slot->nr_pte_mapped_thp = 0;
- up_write(&mm->mmap_sem);
+ mmap_write_unlock(mm);
return 0;
}
@@ -1543,11 +1543,11 @@ static void retract_page_tables(struct address_space *mapping, pgoff_t pgoff)
/*
* Check vma->anon_vma to exclude MAP_PRIVATE mappings that
* got written to. These VMAs are likely not worth investing
- * down_write(mmap_sem) as PMD-mapping is likely to be split
+ * mmap_write_lock(mm) as PMD-mapping is likely to be split
* later.
*
* Not that vma->anon_vma check is racy: it can be set up after
- * the check but before we took mmap_sem by the fault path.
+ * the check but before we took mmap_lock by the fault path.
* But page lock would prevent establishing any new ptes of the
* page, so we are safe.
*
@@ -1567,18 +1567,18 @@ static void retract_page_tables(struct address_space *mapping, pgoff_t pgoff)
if (!pmd)
continue;
/*
- * We need exclusive mmap_sem to retract page table.
+ * We need exclusive mmap_lock to retract page table.
*
* We use trylock due to lock inversion: we need to acquire
- * mmap_sem while holding page lock. Fault path does it in
+ * mmap_lock while holding page lock. Fault path does it in
* reverse order. Trylock is a way to avoid deadlock.
*/
- if (down_write_trylock(&vma->vm_mm->mmap_sem)) {
+ if (mmap_write_trylock(vma->vm_mm)) {
spinlock_t *ptl = pmd_lock(vma->vm_mm, pmd);
/* assume page table is clear */
_pmd = pmdp_collapse_flush(vma, addr, pmd);
spin_unlock(ptl);
- up_write(&vma->vm_mm->mmap_sem);
+ mmap_write_unlock(vma->vm_mm);
mm_dec_nr_ptes(vma->vm_mm);
pte_free(vma->vm_mm, pmd_pgtable(_pmd));
} else {
@@ -2057,8 +2057,8 @@ static unsigned int khugepaged_scan_mm_slot(unsigned int pages,
* the next mm on the list.
*/
vma = NULL;
- if (unlikely(!down_read_trylock(&mm->mmap_sem)))
- goto breakouterloop_mmap_sem;
+ if (unlikely(!mmap_read_trylock(mm)))
+ goto breakouterloop_mmap_lock;
if (likely(!khugepaged_test_exit(mm)))
vma = find_vma(mm, khugepaged_scan.address);
@@ -2102,7 +2102,7 @@ skip:
pgoff_t pgoff = linear_page_index(vma,
khugepaged_scan.address);
- up_read(&mm->mmap_sem);
+ mmap_read_unlock(mm);
ret = 1;
khugepaged_scan_file(mm, file, pgoff, hpage);
fput(file);
@@ -2115,15 +2115,15 @@ skip:
khugepaged_scan.address += HPAGE_PMD_SIZE;
progress += HPAGE_PMD_NR;
if (ret)
- /* we released mmap_sem so break loop */
- goto breakouterloop_mmap_sem;
+ /* we released mmap_lock so break loop */
+ goto breakouterloop_mmap_lock;
if (progress >= pages)
goto breakouterloop;
}
}
breakouterloop:
- up_read(&mm->mmap_sem); /* exit_mmap will destroy ptes after this */
-breakouterloop_mmap_sem:
+ mmap_read_unlock(mm); /* exit_mmap will destroy ptes after this */
+breakouterloop_mmap_lock:
spin_lock(&khugepaged_mm_lock);
VM_BUG_ON(khugepaged_scan.mm_slot != mm_slot);
diff --git a/mm/ksm.c b/mm/ksm.c
index 18c5d005bd01..4102034cd55a 100644
--- a/mm/ksm.c
+++ b/mm/ksm.c
@@ -442,7 +442,7 @@ static void insert_to_mm_slots_hash(struct mm_struct *mm,
/*
* ksmd, and unmerge_and_remove_all_rmap_items(), must not touch an mm's
* page tables after it has passed through ksm_exit() - which, if necessary,
- * takes mmap_sem briefly to serialize against them. ksm_exit() does not set
+ * takes mmap_lock briefly to serialize against them. ksm_exit() does not set
* a special flag: they can just back out as soon as mm_users goes to zero.
* ksm_test_exit() is used throughout to make this test for exit: in some
* places for correctness, in some places just to avoid unnecessary work.
@@ -542,11 +542,11 @@ static void break_cow(struct rmap_item *rmap_item)
*/
put_anon_vma(rmap_item->anon_vma);
- down_read(&mm->mmap_sem);
+ mmap_read_lock(mm);
vma = find_mergeable_vma(mm, addr);
if (vma)
break_ksm(vma, addr);
- up_read(&mm->mmap_sem);
+ mmap_read_unlock(mm);
}
static struct page *get_mergeable_page(struct rmap_item *rmap_item)
@@ -556,7 +556,7 @@ static struct page *get_mergeable_page(struct rmap_item *rmap_item)
struct vm_area_struct *vma;
struct page *page;
- down_read(&mm->mmap_sem);
+ mmap_read_lock(mm);
vma = find_mergeable_vma(mm, addr);
if (!vma)
goto out;
@@ -572,7 +572,7 @@ static struct page *get_mergeable_page(struct rmap_item *rmap_item)
out:
page = NULL;
}
- up_read(&mm->mmap_sem);
+ mmap_read_unlock(mm);
return page;
}
@@ -831,7 +831,7 @@ static void remove_trailing_rmap_items(struct mm_slot *mm_slot,
* Though it's very tempting to unmerge rmap_items from stable tree rather
* than check every pte of a given vma, the locking doesn't quite work for
* that - an rmap_item is assigned to the stable tree after inserting ksm
- * page and upping mmap_sem. Nor does it fit with the way we skip dup'ing
+ * page and upping mmap_lock. Nor does it fit with the way we skip dup'ing
* rmap_items from parent to child at fork time (so as not to waste time
* if exit comes before the next scan reaches it).
*
@@ -976,7 +976,7 @@ static int unmerge_and_remove_all_rmap_items(void)
for (mm_slot = ksm_scan.mm_slot;
mm_slot != &ksm_mm_head; mm_slot = ksm_scan.mm_slot) {
mm = mm_slot->mm;
- down_read(&mm->mmap_sem);
+ mmap_read_lock(mm);
for (vma = mm->mmap; vma; vma = vma->vm_next) {
if (ksm_test_exit(mm))
break;
@@ -989,7 +989,7 @@ static int unmerge_and_remove_all_rmap_items(void)
}
remove_trailing_rmap_items(mm_slot, &mm_slot->rmap_list);
- up_read(&mm->mmap_sem);
+ mmap_read_unlock(mm);
spin_lock(&ksm_mmlist_lock);
ksm_scan.mm_slot = list_entry(mm_slot->mm_list.next,
@@ -1012,7 +1012,7 @@ static int unmerge_and_remove_all_rmap_items(void)
return 0;
error:
- up_read(&mm->mmap_sem);
+ mmap_read_unlock(mm);
spin_lock(&ksm_mmlist_lock);
ksm_scan.mm_slot = &ksm_mm_head;
spin_unlock(&ksm_mmlist_lock);
@@ -1280,7 +1280,7 @@ static int try_to_merge_with_ksm_page(struct rmap_item *rmap_item,
struct vm_area_struct *vma;
int err = -EFAULT;
- down_read(&mm->mmap_sem);
+ mmap_read_lock(mm);
vma = find_mergeable_vma(mm, rmap_item->address);
if (!vma)
goto out;
@@ -1292,11 +1292,11 @@ static int try_to_merge_with_ksm_page(struct rmap_item *rmap_item,
/* Unstable nid is in union with stable anon_vma: remove first */
remove_rmap_item_from_tree(rmap_item);
- /* Must get reference to anon_vma while still holding mmap_sem */
+ /* Must get reference to anon_vma while still holding mmap_lock */
rmap_item->anon_vma = vma->anon_vma;
get_anon_vma(vma->anon_vma);
out:
- up_read(&mm->mmap_sem);
+ mmap_read_unlock(mm);
return err;
}
@@ -2110,7 +2110,7 @@ static void cmp_and_merge_page(struct page *page, struct rmap_item *rmap_item)
if (ksm_use_zero_pages && (checksum == zero_checksum)) {
struct vm_area_struct *vma;
- down_read(&mm->mmap_sem);
+ mmap_read_lock(mm);
vma = find_mergeable_vma(mm, rmap_item->address);
if (vma) {
err = try_to_merge_one_page(vma, page,
@@ -2122,7 +2122,7 @@ static void cmp_and_merge_page(struct page *page, struct rmap_item *rmap_item)
*/
err = 0;
}
- up_read(&mm->mmap_sem);
+ mmap_read_unlock(mm);
/*
* In case of failure, the page was not really empty, so we
* need to continue. Otherwise we're done.
@@ -2285,7 +2285,7 @@ next_mm:
}
mm = slot->mm;
- down_read(&mm->mmap_sem);
+ mmap_read_lock(mm);
if (ksm_test_exit(mm))
vma = NULL;
else
@@ -2319,7 +2319,7 @@ next_mm:
ksm_scan.address += PAGE_SIZE;
} else
put_page(*page);
- up_read(&mm->mmap_sem);
+ mmap_read_unlock(mm);
return rmap_item;
}
put_page(*page);
@@ -2343,13 +2343,13 @@ next_mm:
struct mm_slot, mm_list);
if (ksm_scan.address == 0) {
/*
- * We've completed a full scan of all vmas, holding mmap_sem
+ * We've completed a full scan of all vmas, holding mmap_lock
* throughout, and found no VM_MERGEABLE: so do the same as
* __ksm_exit does to remove this mm from all our lists now.
* This applies either when cleaning up after __ksm_exit
* (but beware: we can reach here even before __ksm_exit),
* or when all VM_MERGEABLE areas have been unmapped (and
- * mmap_sem then protects against race with MADV_MERGEABLE).
+ * mmap_lock then protects against race with MADV_MERGEABLE).
*/
hash_del(&slot->link);
list_del(&slot->mm_list);
@@ -2357,12 +2357,12 @@ next_mm:
free_mm_slot(slot);
clear_bit(MMF_VM_MERGEABLE, &mm->flags);
- up_read(&mm->mmap_sem);
+ mmap_read_unlock(mm);
mmdrop(mm);
} else {
- up_read(&mm->mmap_sem);
+ mmap_read_unlock(mm);
/*
- * up_read(&mm->mmap_sem) first because after
+ * mmap_read_unlock(mm) first because after
* spin_unlock(&ksm_mmlist_lock) run, the "mm" may
* already have been freed under us by __ksm_exit()
* because the "mm_slot" is still hashed and
@@ -2536,7 +2536,7 @@ void __ksm_exit(struct mm_struct *mm)
* This process is exiting: if it's straightforward (as is the
* case when ksmd was never running), free mm_slot immediately.
* But if it's at the cursor or has rmap_items linked to it, use
- * mmap_sem to synchronize with any break_cows before pagetables
+ * mmap_lock to synchronize with any break_cows before pagetables
* are freed, and leave the mm_slot on the list for ksmd to free.
* Beware: ksm may already have noticed it exiting and freed the slot.
*/
@@ -2560,8 +2560,8 @@ void __ksm_exit(struct mm_struct *mm)
clear_bit(MMF_VM_MERGEABLE, &mm->flags);
mmdrop(mm);
} else if (mm_slot) {
- down_write(&mm->mmap_sem);
- up_write(&mm->mmap_sem);
+ mmap_write_lock(mm);
+ mmap_write_unlock(mm);
}
}
diff --git a/mm/maccess.c b/mm/maccess.c
index 3ca8d97e5010..88845eda5047 100644
--- a/mm/maccess.c
+++ b/mm/maccess.c
@@ -1,103 +1,128 @@
// SPDX-License-Identifier: GPL-2.0-only
/*
- * Access kernel memory without faulting.
+ * Access kernel or user memory without faulting.
*/
#include <linux/export.h>
#include <linux/mm.h>
#include <linux/uaccess.h>
-static __always_inline long
-probe_read_common(void *dst, const void __user *src, size_t size)
+bool __weak probe_kernel_read_allowed(const void *unsafe_src, size_t size)
{
- long ret;
+ return true;
+}
+
+#ifdef HAVE_GET_KERNEL_NOFAULT
+
+#define probe_kernel_read_loop(dst, src, len, type, err_label) \
+ while (len >= sizeof(type)) { \
+ __get_kernel_nofault(dst, src, type, err_label); \
+ dst += sizeof(type); \
+ src += sizeof(type); \
+ len -= sizeof(type); \
+ }
+
+long probe_kernel_read(void *dst, const void *src, size_t size)
+{
+ if (!probe_kernel_read_allowed(src, size))
+ return -ERANGE;
pagefault_disable();
- ret = __copy_from_user_inatomic(dst, src, size);
+ probe_kernel_read_loop(dst, src, size, u64, Efault);
+ probe_kernel_read_loop(dst, src, size, u32, Efault);
+ probe_kernel_read_loop(dst, src, size, u16, Efault);
+ probe_kernel_read_loop(dst, src, size, u8, Efault);
pagefault_enable();
+ return 0;
+Efault:
+ pagefault_enable();
+ return -EFAULT;
+}
+EXPORT_SYMBOL_GPL(probe_kernel_read);
+
+#define probe_kernel_write_loop(dst, src, len, type, err_label) \
+ while (len >= sizeof(type)) { \
+ __put_kernel_nofault(dst, src, type, err_label); \
+ dst += sizeof(type); \
+ src += sizeof(type); \
+ len -= sizeof(type); \
+ }
- return ret ? -EFAULT : 0;
+long probe_kernel_write(void *dst, const void *src, size_t size)
+{
+ pagefault_disable();
+ probe_kernel_write_loop(dst, src, size, u64, Efault);
+ probe_kernel_write_loop(dst, src, size, u32, Efault);
+ probe_kernel_write_loop(dst, src, size, u16, Efault);
+ probe_kernel_write_loop(dst, src, size, u8, Efault);
+ pagefault_enable();
+ return 0;
+Efault:
+ pagefault_enable();
+ return -EFAULT;
}
-static __always_inline long
-probe_write_common(void __user *dst, const void *src, size_t size)
+long strncpy_from_kernel_nofault(char *dst, const void *unsafe_addr, long count)
{
- long ret;
+ const void *src = unsafe_addr;
+
+ if (unlikely(count <= 0))
+ return 0;
+ if (!probe_kernel_read_allowed(unsafe_addr, count))
+ return -ERANGE;
pagefault_disable();
- ret = __copy_to_user_inatomic(dst, src, size);
+ do {
+ __get_kernel_nofault(dst, src, u8, Efault);
+ dst++;
+ src++;
+ } while (dst[-1] && src - unsafe_addr < count);
pagefault_enable();
- return ret ? -EFAULT : 0;
+ dst[-1] = '\0';
+ return src - unsafe_addr;
+Efault:
+ pagefault_enable();
+ dst[-1] = '\0';
+ return -EFAULT;
}
-
+#else /* HAVE_GET_KERNEL_NOFAULT */
/**
- * probe_kernel_read(): safely attempt to read from a kernel-space location
+ * probe_kernel_read(): safely attempt to read from kernel-space
* @dst: pointer to the buffer that shall take the data
* @src: address to read from
* @size: size of the data chunk
*
- * Safely read from address @src to the buffer at @dst. If a kernel fault
- * happens, handle that and return -EFAULT.
+ * Safely read from kernel address @src to the buffer at @dst. If a kernel
+ * fault happens, handle that and return -EFAULT. If @src is not a valid kernel
+ * address, return -ERANGE.
*
* We ensure that the copy_from_user is executed in atomic context so that
- * do_page_fault() doesn't attempt to take mmap_sem. This makes
+ * do_page_fault() doesn't attempt to take mmap_lock. This makes
* probe_kernel_read() suitable for use within regions where the caller
- * already holds mmap_sem, or other locks which nest inside mmap_sem.
- *
- * probe_kernel_read_strict() is the same as probe_kernel_read() except for
- * the case where architectures have non-overlapping user and kernel address
- * ranges: probe_kernel_read_strict() will additionally return -EFAULT for
- * probing memory on a user address range where probe_user_read() is supposed
- * to be used instead.
+ * already holds mmap_lock, or other locks which nest inside mmap_lock.
*/
-
-long __weak probe_kernel_read(void *dst, const void *src, size_t size)
- __attribute__((alias("__probe_kernel_read")));
-
-long __weak probe_kernel_read_strict(void *dst, const void *src, size_t size)
- __attribute__((alias("__probe_kernel_read")));
-
-long __probe_kernel_read(void *dst, const void *src, size_t size)
+long probe_kernel_read(void *dst, const void *src, size_t size)
{
long ret;
mm_segment_t old_fs = get_fs();
+ if (!probe_kernel_read_allowed(src, size))
+ return -ERANGE;
+
set_fs(KERNEL_DS);
- ret = probe_read_common(dst, (__force const void __user *)src, size);
+ pagefault_disable();
+ ret = __copy_from_user_inatomic(dst, (__force const void __user *)src,
+ size);
+ pagefault_enable();
set_fs(old_fs);
- return ret;
+ if (ret)
+ return -EFAULT;
+ return 0;
}
EXPORT_SYMBOL_GPL(probe_kernel_read);
/**
- * probe_user_read(): safely attempt to read from a user-space location
- * @dst: pointer to the buffer that shall take the data
- * @src: address to read from. This must be a user address.
- * @size: size of the data chunk
- *
- * Safely read from user address @src to the buffer at @dst. If a kernel fault
- * happens, handle that and return -EFAULT.
- */
-
-long __weak probe_user_read(void *dst, const void __user *src, size_t size)
- __attribute__((alias("__probe_user_read")));
-
-long __probe_user_read(void *dst, const void __user *src, size_t size)
-{
- long ret = -EFAULT;
- mm_segment_t old_fs = get_fs();
-
- set_fs(USER_DS);
- if (access_ok(src, size))
- ret = probe_read_common(dst, src, size);
- set_fs(old_fs);
-
- return ret;
-}
-EXPORT_SYMBOL_GPL(probe_user_read);
-
-/**
* probe_kernel_write(): safely attempt to write to a location
* @dst: address to write to
* @src: pointer to the data that shall be written
@@ -106,52 +131,25 @@ EXPORT_SYMBOL_GPL(probe_user_read);
* Safely write to address @dst from the buffer at @src. If a kernel fault
* happens, handle that and return -EFAULT.
*/
-
-long __weak probe_kernel_write(void *dst, const void *src, size_t size)
- __attribute__((alias("__probe_kernel_write")));
-
-long __probe_kernel_write(void *dst, const void *src, size_t size)
+long probe_kernel_write(void *dst, const void *src, size_t size)
{
long ret;
mm_segment_t old_fs = get_fs();
set_fs(KERNEL_DS);
- ret = probe_write_common((__force void __user *)dst, src, size);
- set_fs(old_fs);
-
- return ret;
-}
-EXPORT_SYMBOL_GPL(probe_kernel_write);
-
-/**
- * probe_user_write(): safely attempt to write to a user-space location
- * @dst: address to write to
- * @src: pointer to the data that shall be written
- * @size: size of the data chunk
- *
- * Safely write to address @dst from the buffer at @src. If a kernel fault
- * happens, handle that and return -EFAULT.
- */
-
-long __weak probe_user_write(void __user *dst, const void *src, size_t size)
- __attribute__((alias("__probe_user_write")));
-
-long __probe_user_write(void __user *dst, const void *src, size_t size)
-{
- long ret = -EFAULT;
- mm_segment_t old_fs = get_fs();
-
- set_fs(USER_DS);
- if (access_ok(dst, size))
- ret = probe_write_common(dst, src, size);
+ pagefault_disable();
+ ret = __copy_to_user_inatomic((__force void __user *)dst, src, size);
+ pagefault_enable();
set_fs(old_fs);
- return ret;
+ if (ret)
+ return -EFAULT;
+ return 0;
}
-EXPORT_SYMBOL_GPL(probe_user_write);
/**
- * strncpy_from_unsafe: - Copy a NUL terminated string from unsafe address.
+ * strncpy_from_kernel_nofault: - Copy a NUL terminated string from unsafe
+ * address.
* @dst: Destination address, in kernel space. This buffer must be at
* least @count bytes long.
* @unsafe_addr: Unsafe address.
@@ -161,27 +159,14 @@ EXPORT_SYMBOL_GPL(probe_user_write);
*
* On success, returns the length of the string INCLUDING the trailing NUL.
*
- * If access fails, returns -EFAULT (some data may have been copied
- * and the trailing NUL added).
+ * If access fails, returns -EFAULT (some data may have been copied and the
+ * trailing NUL added). If @unsafe_addr is not a valid kernel address, return
+ * -ERANGE.
*
* If @count is smaller than the length of the string, copies @count-1 bytes,
* sets the last byte of @dst buffer to NUL and returns @count.
- *
- * strncpy_from_unsafe_strict() is the same as strncpy_from_unsafe() except
- * for the case where architectures have non-overlapping user and kernel address
- * ranges: strncpy_from_unsafe_strict() will additionally return -EFAULT for
- * probing memory on a user address range where strncpy_from_unsafe_user() is
- * supposed to be used instead.
*/
-
-long __weak strncpy_from_unsafe(char *dst, const void *unsafe_addr, long count)
- __attribute__((alias("__strncpy_from_unsafe")));
-
-long __weak strncpy_from_unsafe_strict(char *dst, const void *unsafe_addr,
- long count)
- __attribute__((alias("__strncpy_from_unsafe")));
-
-long __strncpy_from_unsafe(char *dst, const void *unsafe_addr, long count)
+long strncpy_from_kernel_nofault(char *dst, const void *unsafe_addr, long count)
{
mm_segment_t old_fs = get_fs();
const void *src = unsafe_addr;
@@ -189,6 +174,8 @@ long __strncpy_from_unsafe(char *dst, const void *unsafe_addr, long count)
if (unlikely(count <= 0))
return 0;
+ if (!probe_kernel_read_allowed(unsafe_addr, count))
+ return -ERANGE;
set_fs(KERNEL_DS);
pagefault_disable();
@@ -203,9 +190,66 @@ long __strncpy_from_unsafe(char *dst, const void *unsafe_addr, long count)
return ret ? -EFAULT : src - unsafe_addr;
}
+#endif /* HAVE_GET_KERNEL_NOFAULT */
+
+/**
+ * probe_user_read(): safely attempt to read from a user-space location
+ * @dst: pointer to the buffer that shall take the data
+ * @src: address to read from. This must be a user address.
+ * @size: size of the data chunk
+ *
+ * Safely read from user address @src to the buffer at @dst. If a kernel fault
+ * happens, handle that and return -EFAULT.
+ */
+long probe_user_read(void *dst, const void __user *src, size_t size)
+{
+ long ret = -EFAULT;
+ mm_segment_t old_fs = get_fs();
+
+ set_fs(USER_DS);
+ if (access_ok(src, size)) {
+ pagefault_disable();
+ ret = __copy_from_user_inatomic(dst, src, size);
+ pagefault_enable();
+ }
+ set_fs(old_fs);
+
+ if (ret)
+ return -EFAULT;
+ return 0;
+}
+EXPORT_SYMBOL_GPL(probe_user_read);
+
+/**
+ * probe_user_write(): safely attempt to write to a user-space location
+ * @dst: address to write to
+ * @src: pointer to the data that shall be written
+ * @size: size of the data chunk
+ *
+ * Safely write to address @dst from the buffer at @src. If a kernel fault
+ * happens, handle that and return -EFAULT.
+ */
+long probe_user_write(void __user *dst, const void *src, size_t size)
+{
+ long ret = -EFAULT;
+ mm_segment_t old_fs = get_fs();
+
+ set_fs(USER_DS);
+ if (access_ok(dst, size)) {
+ pagefault_disable();
+ ret = __copy_to_user_inatomic(dst, src, size);
+ pagefault_enable();
+ }
+ set_fs(old_fs);
+
+ if (ret)
+ return -EFAULT;
+ return 0;
+}
+EXPORT_SYMBOL_GPL(probe_user_write);
/**
- * strncpy_from_unsafe_user: - Copy a NUL terminated string from unsafe user
+ * strncpy_from_user_nofault: - Copy a NUL terminated string from unsafe user
* address.
* @dst: Destination address, in kernel space. This buffer must be at
* least @count bytes long.
@@ -222,7 +266,7 @@ long __strncpy_from_unsafe(char *dst, const void *unsafe_addr, long count)
* If @count is smaller than the length of the string, copies @count-1 bytes,
* sets the last byte of @dst buffer to NUL and returns @count.
*/
-long strncpy_from_unsafe_user(char *dst, const void __user *unsafe_addr,
+long strncpy_from_user_nofault(char *dst, const void __user *unsafe_addr,
long count)
{
mm_segment_t old_fs = get_fs();
@@ -248,7 +292,7 @@ long strncpy_from_unsafe_user(char *dst, const void __user *unsafe_addr,
}
/**
- * strnlen_unsafe_user: - Get the size of a user string INCLUDING final NUL.
+ * strnlen_user_nofault: - Get the size of a user string INCLUDING final NUL.
* @unsafe_addr: The string to measure.
* @count: Maximum count (including NUL)
*
@@ -263,7 +307,7 @@ long strncpy_from_unsafe_user(char *dst, const void __user *unsafe_addr,
* Unlike strnlen_user, this can be used from IRQ handler etc. because
* it disables pagefaults.
*/
-long strnlen_unsafe_user(const void __user *unsafe_addr, long count)
+long strnlen_user_nofault(const void __user *unsafe_addr, long count)
{
mm_segment_t old_fs = get_fs();
int ret;
diff --git a/mm/madvise.c b/mm/madvise.c
index 8cbd8c1bfe15..dd1d43cf026d 100644
--- a/mm/madvise.c
+++ b/mm/madvise.c
@@ -40,7 +40,7 @@ struct madvise_walk_private {
/*
* Any behaviour which results in changes to the vma->vm_flags needs to
- * take mmap_sem for writing. Others, which simply traverse vmas, need
+ * take mmap_lock for writing. Others, which simply traverse vmas, need
* to only take it for reading.
*/
static int madvise_need_mmap_write(int behavior)
@@ -165,7 +165,7 @@ static long madvise_behavior(struct vm_area_struct *vma,
success:
/*
- * vm_flags is protected by the mmap_sem held in write mode.
+ * vm_flags is protected by the mmap_lock held in write mode.
*/
vma->vm_flags = new_flags;
@@ -285,16 +285,16 @@ static long madvise_willneed(struct vm_area_struct *vma,
* Filesystem's fadvise may need to take various locks. We need to
* explicitly grab a reference because the vma (and hence the
* vma's reference to the file) can go away as soon as we drop
- * mmap_sem.
+ * mmap_lock.
*/
- *prev = NULL; /* tell sys_madvise we drop mmap_sem */
+ *prev = NULL; /* tell sys_madvise we drop mmap_lock */
get_file(file);
- up_read(&current->mm->mmap_sem);
+ mmap_read_unlock(current->mm);
offset = (loff_t)(start - vma->vm_start)
+ ((loff_t)vma->vm_pgoff << PAGE_SHIFT);
vfs_fadvise(file, offset, end - start, POSIX_FADV_WILLNEED);
fput(file);
- down_read(&current->mm->mmap_sem);
+ mmap_read_lock(current->mm);
return 0;
}
@@ -768,9 +768,9 @@ static long madvise_dontneed_free(struct vm_area_struct *vma,
return -EINVAL;
if (!userfaultfd_remove(vma, start, end)) {
- *prev = NULL; /* mmap_sem has been dropped, prev is stale */
+ *prev = NULL; /* mmap_lock has been dropped, prev is stale */
- down_read(&current->mm->mmap_sem);
+ mmap_read_lock(current->mm);
vma = find_vma(current->mm, start);
if (!vma)
return -ENOMEM;
@@ -791,7 +791,7 @@ static long madvise_dontneed_free(struct vm_area_struct *vma,
if (end > vma->vm_end) {
/*
* Don't fail if end > vma->vm_end. If the old
- * vma was splitted while the mmap_sem was
+ * vma was splitted while the mmap_lock was
* released the effect of the concurrent
* operation may not cause madvise() to
* have an undefined result. There may be an
@@ -826,7 +826,7 @@ static long madvise_remove(struct vm_area_struct *vma,
int error;
struct file *f;
- *prev = NULL; /* tell sys_madvise we drop mmap_sem */
+ *prev = NULL; /* tell sys_madvise we drop mmap_lock */
if (vma->vm_flags & VM_LOCKED)
return -EINVAL;
@@ -847,18 +847,18 @@ static long madvise_remove(struct vm_area_struct *vma,
* Filesystem's fallocate may need to take i_mutex. We need to
* explicitly grab a reference because the vma (and hence the
* vma's reference to the file) can go away as soon as we drop
- * mmap_sem.
+ * mmap_lock.
*/
get_file(f);
if (userfaultfd_remove(vma, start, end)) {
- /* mmap_sem was not released by userfaultfd_remove() */
- up_read(&current->mm->mmap_sem);
+ /* mmap_lock was not released by userfaultfd_remove() */
+ mmap_read_unlock(current->mm);
}
error = vfs_fallocate(f,
FALLOC_FL_PUNCH_HOLE | FALLOC_FL_KEEP_SIZE,
offset, end - start);
fput(f);
- down_read(&current->mm->mmap_sem);
+ mmap_read_lock(current->mm);
return error;
}
@@ -1089,7 +1089,7 @@ int do_madvise(unsigned long start, size_t len_in, int behavior)
write = madvise_need_mmap_write(behavior);
if (write) {
- if (down_write_killable(&current->mm->mmap_sem))
+ if (mmap_write_lock_killable(current->mm))
return -EINTR;
/*
@@ -1105,11 +1105,11 @@ int do_madvise(unsigned long start, size_t len_in, int behavior)
* model.
*/
if (!mmget_still_valid(current->mm)) {
- up_write(&current->mm->mmap_sem);
+ mmap_write_unlock(current->mm);
return -EINTR;
}
} else {
- down_read(&current->mm->mmap_sem);
+ mmap_read_lock(current->mm);
}
/*
@@ -1153,15 +1153,15 @@ int do_madvise(unsigned long start, size_t len_in, int behavior)
goto out;
if (prev)
vma = prev->vm_next;
- else /* madvise_remove dropped mmap_sem */
+ else /* madvise_remove dropped mmap_lock */
vma = find_vma(current->mm, start);
}
out:
blk_finish_plug(&plug);
if (write)
- up_write(&current->mm->mmap_sem);
+ mmap_write_unlock(current->mm);
else
- up_read(&current->mm->mmap_sem);
+ mmap_read_unlock(current->mm);
return error;
}
diff --git a/mm/memcontrol.c b/mm/memcontrol.c
index 3dde78f5b918..0b38b6ad547d 100644
--- a/mm/memcontrol.c
+++ b/mm/memcontrol.c
@@ -5614,9 +5614,9 @@ static unsigned long mem_cgroup_count_precharge(struct mm_struct *mm)
{
unsigned long precharge;
- down_read(&mm->mmap_sem);
+ mmap_read_lock(mm);
walk_page_range(mm, 0, mm->highest_vm_end, &precharge_walk_ops, NULL);
- up_read(&mm->mmap_sem);
+ mmap_read_unlock(mm);
precharge = mc.precharge;
mc.precharge = 0;
@@ -5899,9 +5899,9 @@ static void mem_cgroup_move_charge(void)
atomic_inc(&mc.from->moving_account);
synchronize_rcu();
retry:
- if (unlikely(!down_read_trylock(&mc.mm->mmap_sem))) {
+ if (unlikely(!mmap_read_trylock(mc.mm))) {
/*
- * Someone who are holding the mmap_sem might be waiting in
+ * Someone who are holding the mmap_lock might be waiting in
* waitq. So we cancel all extra charges, wake up all waiters,
* and retry. Because we cancel precharges, we might not be able
* to move enough charges, but moving charge is a best-effort
@@ -5918,7 +5918,7 @@ retry:
walk_page_range(mc.mm, 0, mc.mm->highest_vm_end, &charge_walk_ops,
NULL);
- up_read(&mc.mm->mmap_sem);
+ mmap_read_unlock(mc.mm);
atomic_dec(&mc.from->moving_account);
}
diff --git a/mm/memory.c b/mm/memory.c
index d97e8848892d..dc7f3543b1fd 100644
--- a/mm/memory.c
+++ b/mm/memory.c
@@ -80,7 +80,6 @@
#include <linux/uaccess.h>
#include <asm/tlb.h>
#include <asm/tlbflush.h>
-#include <asm/pgtable.h>
#include "internal.h"
@@ -1186,7 +1185,7 @@ static inline unsigned long zap_pmd_range(struct mmu_gather *tlb,
* Here there can be other concurrent MADV_DONTNEED or
* trans huge page faults running, and if the pmd is
* none or trans huge it can change under us. This is
- * because MADV_DONTNEED holds the mmap_sem in read
+ * because MADV_DONTNEED holds the mmap_lock in read
* mode.
*/
if (pmd_none_or_trans_huge_or_clear_bad(pmd))
@@ -1212,7 +1211,7 @@ static inline unsigned long zap_pud_range(struct mmu_gather *tlb,
next = pud_addr_end(addr, end);
if (pud_trans_huge(*pud) || pud_devmap(*pud)) {
if (next - addr != HPAGE_PUD_SIZE) {
- VM_BUG_ON_VMA(!rwsem_is_locked(&tlb->mm->mmap_sem), vma);
+ mmap_assert_locked(tlb->mm);
split_huge_pud(vma, pud, addr);
} else if (zap_huge_pud(tlb, vma, pud, addr))
goto next;
@@ -1593,7 +1592,7 @@ int vm_insert_pages(struct vm_area_struct *vma, unsigned long addr,
if (addr < vma->vm_start || end_addr >= vma->vm_end)
return -EFAULT;
if (!(vma->vm_flags & VM_MIXEDMAP)) {
- BUG_ON(down_read_trylock(&vma->vm_mm->mmap_sem));
+ BUG_ON(mmap_read_trylock(vma->vm_mm));
BUG_ON(vma->vm_flags & VM_PFNMAP);
vma->vm_flags |= VM_MIXEDMAP;
}
@@ -1637,7 +1636,7 @@ EXPORT_SYMBOL(vm_insert_pages);
* The page does not need to be reserved.
*
* Usually this function is called from f_op->mmap() handler
- * under mm->mmap_sem write-lock, so it can change vma->vm_flags.
+ * under mm->mmap_lock write-lock, so it can change vma->vm_flags.
* Caller must set VM_MIXEDMAP on vma if it wants to call this
* function from other places, for example from page-fault handler.
*
@@ -1651,7 +1650,7 @@ int vm_insert_page(struct vm_area_struct *vma, unsigned long addr,
if (!page_count(page))
return -EINVAL;
if (!(vma->vm_flags & VM_MIXEDMAP)) {
- BUG_ON(down_read_trylock(&vma->vm_mm->mmap_sem));
+ BUG_ON(mmap_read_trylock(vma->vm_mm));
BUG_ON(vma->vm_flags & VM_PFNMAP);
vma->vm_flags |= VM_MIXEDMAP;
}
@@ -2574,7 +2573,7 @@ static vm_fault_t fault_dirty_shared_page(struct vm_fault *vmf)
* mapping may be NULL here because some device drivers do not
* set page.mapping but still dirty their pages
*
- * Drop the mmap_sem before waiting on IO, if we can. The file
+ * Drop the mmap_lock before waiting on IO, if we can. The file
* is pinning the mapping, as per above.
*/
if ((dirtied || page_mkwrite) && mapping) {
@@ -2624,7 +2623,7 @@ static inline void wp_page_reuse(struct vm_fault *vmf)
/*
* Handle the case of a page which we actually need to copy to a new page.
*
- * Called with mmap_sem locked and the old page referenced, but
+ * Called with mmap_lock locked and the old page referenced, but
* without the ptl held.
*
* High level logic flow:
@@ -2888,9 +2887,9 @@ static vm_fault_t wp_page_shared(struct vm_fault *vmf)
* change only once the write actually happens. This avoids a few races,
* and potentially makes it more efficient.
*
- * We enter with non-exclusive mmap_sem (to exclude vma changes,
+ * We enter with non-exclusive mmap_lock (to exclude vma changes,
* but allow concurrent faults), with pte both mapped and locked.
- * We return with mmap_sem still held, but pte unmapped and unlocked.
+ * We return with mmap_lock still held, but pte unmapped and unlocked.
*/
static vm_fault_t do_wp_page(struct vm_fault *vmf)
__releases(vmf->ptl)
@@ -3079,11 +3078,11 @@ void unmap_mapping_range(struct address_space *mapping,
EXPORT_SYMBOL(unmap_mapping_range);
/*
- * We enter with non-exclusive mmap_sem (to exclude vma changes,
+ * We enter with non-exclusive mmap_lock (to exclude vma changes,
* but allow concurrent faults), and pte mapped but not yet locked.
* We return with pte unmapped and unlocked.
*
- * We return with the mmap_sem locked or unlocked in the same cases
+ * We return with the mmap_lock locked or unlocked in the same cases
* as does filemap_fault().
*/
vm_fault_t do_swap_page(struct vm_fault *vmf)
@@ -3304,9 +3303,9 @@ out_release:
}
/*
- * We enter with non-exclusive mmap_sem (to exclude vma changes,
+ * We enter with non-exclusive mmap_lock (to exclude vma changes,
* but allow concurrent faults), and pte mapped but not yet locked.
- * We return with mmap_sem still held, but pte unmapped and unlocked.
+ * We return with mmap_lock still held, but pte unmapped and unlocked.
*/
static vm_fault_t do_anonymous_page(struct vm_fault *vmf)
{
@@ -3324,10 +3323,10 @@ static vm_fault_t do_anonymous_page(struct vm_fault *vmf)
* pte_offset_map() on pmds where a huge pmd might be created
* from a different thread.
*
- * pte_alloc_map() is safe to use under down_write(mmap_sem) or when
+ * pte_alloc_map() is safe to use under mmap_write_lock(mm) or when
* parallel threads are excluded by other means.
*
- * Here we only have down_read(mmap_sem).
+ * Here we only have mmap_read_lock(mm).
*/
if (pte_alloc(vma->vm_mm, vmf->pmd))
return VM_FAULT_OOM;
@@ -3420,7 +3419,7 @@ oom:
}
/*
- * The mmap_sem must have been held on entry, and may have been
+ * The mmap_lock must have been held on entry, and may have been
* released depending on flags and vma->vm_ops->fault() return value.
* See filemap_fault() and __lock_page_retry().
*/
@@ -3929,11 +3928,11 @@ static vm_fault_t do_shared_fault(struct vm_fault *vmf)
}
/*
- * We enter with non-exclusive mmap_sem (to exclude vma changes,
+ * We enter with non-exclusive mmap_lock (to exclude vma changes,
* but allow concurrent faults).
- * The mmap_sem may have been released depending on flags and our
+ * The mmap_lock may have been released depending on flags and our
* return value. See filemap_fault() and __lock_page_or_retry().
- * If mmap_sem is released, vma may become invalid (for example
+ * If mmap_lock is released, vma may become invalid (for example
* by other thread calling munmap()).
*/
static vm_fault_t do_fault(struct vm_fault *vmf)
@@ -4162,10 +4161,10 @@ static vm_fault_t wp_huge_pud(struct vm_fault *vmf, pud_t orig_pud)
* with external mmu caches can use to update those (ie the Sparc or
* PowerPC hashed page tables that act as extended TLBs).
*
- * We enter with non-exclusive mmap_sem (to exclude vma changes, but allow
+ * We enter with non-exclusive mmap_lock (to exclude vma changes, but allow
* concurrent faults).
*
- * The mmap_sem may have been released depending on flags and our return value.
+ * The mmap_lock may have been released depending on flags and our return value.
* See filemap_fault() and __lock_page_or_retry().
*/
static vm_fault_t handle_pte_fault(struct vm_fault *vmf)
@@ -4187,7 +4186,7 @@ static vm_fault_t handle_pte_fault(struct vm_fault *vmf)
/*
* A regular pmd is established and it can't morph into a huge
* pmd from under us anymore at this point because we hold the
- * mmap_sem read mode and khugepaged takes it in write mode.
+ * mmap_lock read mode and khugepaged takes it in write mode.
* So now it's safe to run pte_offset_map().
*/
vmf->pte = pte_offset_map(vmf->pmd, vmf->address);
@@ -4255,7 +4254,7 @@ unlock:
/*
* By the time we get here, we already hold the mm semaphore
*
- * The mmap_sem may have been released depending on flags and our
+ * The mmap_lock may have been released depending on flags and our
* return value. See filemap_fault() and __lock_page_or_retry().
*/
static vm_fault_t __handle_mm_fault(struct vm_area_struct *vma,
@@ -4350,7 +4349,7 @@ retry_pud:
/*
* By the time we get here, we already hold the mm semaphore
*
- * The mmap_sem may have been released depending on flags and our
+ * The mmap_lock may have been released depending on flags and our
* return value. See filemap_fault() and __lock_page_or_retry().
*/
vm_fault_t handle_mm_fault(struct vm_area_struct *vma, unsigned long address,
@@ -4659,7 +4658,7 @@ int __access_remote_vm(struct task_struct *tsk, struct mm_struct *mm,
void *old_buf = buf;
int write = gup_flags & FOLL_WRITE;
- if (down_read_killable(&mm->mmap_sem))
+ if (mmap_read_lock_killable(mm))
return 0;
/* ignore errors, just check how much was successfully transferred */
@@ -4710,7 +4709,7 @@ int __access_remote_vm(struct task_struct *tsk, struct mm_struct *mm,
buf += bytes;
addr += bytes;
}
- up_read(&mm->mmap_sem);
+ mmap_read_unlock(mm);
return buf - old_buf;
}
@@ -4767,7 +4766,7 @@ void print_vma_addr(char *prefix, unsigned long ip)
/*
* we might be running from an atomic context so we cannot sleep
*/
- if (!down_read_trylock(&mm->mmap_sem))
+ if (!mmap_read_trylock(mm))
return;
vma = find_vma(mm, ip);
@@ -4786,7 +4785,7 @@ void print_vma_addr(char *prefix, unsigned long ip)
free_page((unsigned long)buf);
}
}
- up_read(&mm->mmap_sem);
+ mmap_read_unlock(mm);
}
#if defined(CONFIG_PROVE_LOCKING) || defined(CONFIG_DEBUG_ATOMIC_SLEEP)
@@ -4794,7 +4793,7 @@ void __might_fault(const char *file, int line)
{
/*
* Some code (nfs/sunrpc) uses socket ops on kernel memory while
- * holding the mmap_sem, this is safe because kernel memory doesn't
+ * holding the mmap_lock, this is safe because kernel memory doesn't
* get paged out, therefore we'll never actually fault, and the
* below annotations will generate false positives.
*/
@@ -4805,7 +4804,7 @@ void __might_fault(const char *file, int line)
__might_sleep(file, line, 0);
#if defined(CONFIG_DEBUG_ATOMIC_SLEEP)
if (current->mm)
- might_lock_read(&current->mm->mmap_sem);
+ might_lock_read(&current->mm->mmap_lock);
#endif
}
EXPORT_SYMBOL(__might_fault);
diff --git a/mm/mempolicy.c b/mm/mempolicy.c
index 1965e2681877..381320671677 100644
--- a/mm/mempolicy.c
+++ b/mm/mempolicy.c
@@ -224,7 +224,7 @@ static int mpol_new_bind(struct mempolicy *pol, const nodemask_t *nodes)
* handle an empty nodemask with MPOL_PREFERRED here.
*
* Must be called holding task's alloc_lock to protect task's mems_allowed
- * and mempolicy. May also be called holding the mmap_semaphore for write.
+ * and mempolicy. May also be called holding the mmap_lock for write.
*/
static int mpol_set_nodemask(struct mempolicy *pol,
const nodemask_t *nodes, struct nodemask_scratch *nsc)
@@ -368,7 +368,7 @@ static void mpol_rebind_preferred(struct mempolicy *pol,
/*
* mpol_rebind_policy - Migrate a policy to a different set of nodes
*
- * Per-vma policies are protected by mmap_sem. Allocations using per-task
+ * Per-vma policies are protected by mmap_lock. Allocations using per-task
* policies are protected by task->mems_allowed_seq to prevent a premature
* OOM/allocation failure due to parallel nodemask modification.
*/
@@ -398,17 +398,17 @@ void mpol_rebind_task(struct task_struct *tsk, const nodemask_t *new)
/*
* Rebind each vma in mm to new nodemask.
*
- * Call holding a reference to mm. Takes mm->mmap_sem during call.
+ * Call holding a reference to mm. Takes mm->mmap_lock during call.
*/
void mpol_rebind_mm(struct mm_struct *mm, nodemask_t *new)
{
struct vm_area_struct *vma;
- down_write(&mm->mmap_sem);
+ mmap_write_lock(mm);
for (vma = mm->mmap; vma; vma = vma->vm_next)
mpol_rebind_policy(vma->vm_policy, new);
- up_write(&mm->mmap_sem);
+ mmap_write_unlock(mm);
}
static const struct mempolicy_operations mpol_ops[MPOL_MAX] = {
@@ -764,7 +764,7 @@ queue_pages_range(struct mm_struct *mm, unsigned long start, unsigned long end,
/*
* Apply policy to a single VMA
- * This must be called with the mmap_sem held for writing.
+ * This must be called with the mmap_lock held for writing.
*/
static int vma_replace_policy(struct vm_area_struct *vma,
struct mempolicy *pol)
@@ -789,7 +789,7 @@ static int vma_replace_policy(struct vm_area_struct *vma,
}
old = vma->vm_policy;
- vma->vm_policy = new; /* protected by mmap_sem */
+ vma->vm_policy = new; /* protected by mmap_lock */
mpol_put(old);
return 0;
@@ -932,7 +932,7 @@ static int lookup_node(struct mm_struct *mm, unsigned long addr)
put_page(p);
}
if (locked)
- up_read(&mm->mmap_sem);
+ mmap_read_unlock(mm);
return err;
}
@@ -965,10 +965,10 @@ static long do_get_mempolicy(int *policy, nodemask_t *nmask,
* vma/shared policy at addr is NULL. We
* want to return MPOL_DEFAULT in this case.
*/
- down_read(&mm->mmap_sem);
+ mmap_read_lock(mm);
vma = find_vma_intersection(mm, addr, addr+1);
if (!vma) {
- up_read(&mm->mmap_sem);
+ mmap_read_unlock(mm);
return -EFAULT;
}
if (vma->vm_ops && vma->vm_ops->get_policy)
@@ -985,7 +985,7 @@ static long do_get_mempolicy(int *policy, nodemask_t *nmask,
if (flags & MPOL_F_ADDR) {
/*
* Take a refcount on the mpol, lookup_node()
- * wil drop the mmap_sem, so after calling
+ * wil drop the mmap_lock, so after calling
* lookup_node() only "pol" remains valid, "vma"
* is stale.
*/
@@ -1027,7 +1027,7 @@ static long do_get_mempolicy(int *policy, nodemask_t *nmask,
out:
mpol_cond_put(pol);
if (vma)
- up_read(&mm->mmap_sem);
+ mmap_read_unlock(mm);
if (pol_refcount)
mpol_put(pol_refcount);
return err;
@@ -1136,7 +1136,7 @@ int do_migrate_pages(struct mm_struct *mm, const nodemask_t *from,
if (err)
return err;
- down_read(&mm->mmap_sem);
+ mmap_read_lock(mm);
/*
* Find a 'source' bit set in 'tmp' whose corresponding 'dest'
@@ -1217,7 +1217,7 @@ int do_migrate_pages(struct mm_struct *mm, const nodemask_t *from,
if (err < 0)
break;
}
- up_read(&mm->mmap_sem);
+ mmap_read_unlock(mm);
if (err < 0)
return err;
return busy;
@@ -1340,12 +1340,12 @@ static long do_mbind(unsigned long start, unsigned long len,
{
NODEMASK_SCRATCH(scratch);
if (scratch) {
- down_write(&mm->mmap_sem);
+ mmap_write_lock(mm);
task_lock(current);
err = mpol_set_nodemask(new, nmask, scratch);
task_unlock(current);
if (err)
- up_write(&mm->mmap_sem);
+ mmap_write_unlock(mm);
} else
err = -ENOMEM;
NODEMASK_SCRATCH_FREE(scratch);
@@ -1382,7 +1382,7 @@ up_out:
putback_movable_pages(&pagelist);
}
- up_write(&mm->mmap_sem);
+ mmap_write_unlock(mm);
mpol_out:
mpol_put(new);
return err;
@@ -2185,7 +2185,7 @@ static struct page *alloc_page_interleave(gfp_t gfp, unsigned order,
*
* This function allocates a page from the kernel page pool and applies
* a NUMA policy associated with the VMA or the current process.
- * When VMA is not NULL caller must hold down_read on the mmap_sem of the
+ * When VMA is not NULL caller must read-lock the mmap_lock of the
* mm_struct of the VMA to prevent it from going away. Should be used for
* all allocations for pages that will be mapped into user space. Returns
* NULL when no page can be allocated.
diff --git a/mm/migrate.c b/mm/migrate.c
index 7bfd0962149e..f37729673558 100644
--- a/mm/migrate.c
+++ b/mm/migrate.c
@@ -1555,7 +1555,7 @@ static int add_page_for_migration(struct mm_struct *mm, unsigned long addr,
unsigned int follflags;
int err;
- down_read(&mm->mmap_sem);
+ mmap_read_lock(mm);
err = -EFAULT;
vma = find_vma(mm, addr);
if (!vma || addr < vma->vm_start || !vma_migratable(vma))
@@ -1608,7 +1608,7 @@ out_putpage:
*/
put_page(page);
out:
- up_read(&mm->mmap_sem);
+ mmap_read_unlock(mm);
return err;
}
@@ -1733,7 +1733,7 @@ static void do_pages_stat_array(struct mm_struct *mm, unsigned long nr_pages,
{
unsigned long i;
- down_read(&mm->mmap_sem);
+ mmap_read_lock(mm);
for (i = 0; i < nr_pages; i++) {
unsigned long addr = (unsigned long)(*pages);
@@ -1760,7 +1760,7 @@ set_status:
status++;
}
- up_read(&mm->mmap_sem);
+ mmap_read_unlock(mm);
}
/*
@@ -2120,7 +2120,7 @@ int migrate_misplaced_transhuge_page(struct mm_struct *mm,
* pmd before doing set_pmd_at(), nor to flush the TLB after
* set_pmd_at(). Clearing the pmd here would introduce a race
* condition against MADV_DONTNEED, because MADV_DONTNEED only holds the
- * mmap_sem for reading. If the pmd is set to NULL at any given time,
+ * mmap_lock for reading. If the pmd is set to NULL at any given time,
* MADV_DONTNEED won't wait on the pmd lock and it'll skip clearing this
* pmd.
*/
@@ -2675,7 +2675,7 @@ restore:
* have the MIGRATE_PFN_MIGRATE flag set for their src array entry.
*
* It is safe to update device page table after migrate_vma_pages() because
- * both destination and source page are still locked, and the mmap_sem is held
+ * both destination and source page are still locked, and the mmap_lock is held
* in read mode (hence no one can unmap the range being migrated).
*
* Once the caller is done cleaning up things and updating its page table (if it
@@ -2772,10 +2772,10 @@ static void migrate_vma_insert_page(struct migrate_vma *migrate,
* pte_offset_map() on pmds where a huge pmd might be created
* from a different thread.
*
- * pte_alloc_map() is safe to use under down_write(mmap_sem) or when
+ * pte_alloc_map() is safe to use under mmap_write_lock(mm) or when
* parallel threads are excluded by other means.
*
- * Here we only have down_read(mmap_sem).
+ * Here we only have mmap_read_lock(mm).
*/
if (pte_alloc(mm, pmdp))
goto abort;
diff --git a/mm/mincore.c b/mm/mincore.c
index 0e6dd9948f1a..453ff112470f 100644
--- a/mm/mincore.c
+++ b/mm/mincore.c
@@ -17,9 +17,9 @@
#include <linux/swapops.h>
#include <linux/shmem_fs.h>
#include <linux/hugetlb.h>
+#include <linux/pgtable.h>
#include <linux/uaccess.h>
-#include <asm/pgtable.h>
static int mincore_hugetlb(pte_t *pte, unsigned long hmask, unsigned long addr,
unsigned long end, struct mm_walk *walk)
@@ -284,9 +284,9 @@ SYSCALL_DEFINE3(mincore, unsigned long, start, size_t, len,
* Do at most PAGE_SIZE entries per iteration, due to
* the temporary buffer size.
*/
- down_read(&current->mm->mmap_sem);
+ mmap_read_lock(current->mm);
retval = do_mincore(start, min(pages, PAGE_SIZE), tmp);
- up_read(&current->mm->mmap_sem);
+ mmap_read_unlock(current->mm);
if (retval <= 0)
break;
diff --git a/mm/mlock.c b/mm/mlock.c
index a72c1eeded77..f8736136fad7 100644
--- a/mm/mlock.c
+++ b/mm/mlock.c
@@ -49,7 +49,7 @@ EXPORT_SYMBOL(can_do_mlock);
* When lazy mlocking via vmscan, it is important to ensure that the
* vma's VM_LOCKED status is not concurrently being modified, otherwise we
* may have mlocked a page that is being munlocked. So lazy mlock must take
- * the mmap_sem for read, and verify that the vma really is locked
+ * the mmap_lock for read, and verify that the vma really is locked
* (see mm/rmap.c).
*/
@@ -381,7 +381,7 @@ static unsigned long __munlock_pagevec_fill(struct pagevec *pvec,
/*
* Initialize pte walk starting at the already pinned page where we
* are sure that there is a pte, as it was pinned under the same
- * mmap_sem write op.
+ * mmap_lock write op.
*/
pte = get_locked_pte(vma->vm_mm, start, &ptl);
/* Make sure we do not cross the page table boundary */
@@ -565,7 +565,7 @@ success:
mm->locked_vm += nr_pages;
/*
- * vm_flags is protected by the mmap_sem held in write mode.
+ * vm_flags is protected by the mmap_lock held in write mode.
* It's okay if try_to_unmap_one unmaps a page just after we
* set VM_LOCKED, populate_vma_page_range will bring it back.
*/
@@ -686,7 +686,7 @@ static __must_check int do_mlock(unsigned long start, size_t len, vm_flags_t fla
lock_limit >>= PAGE_SHIFT;
locked = len >> PAGE_SHIFT;
- if (down_write_killable(&current->mm->mmap_sem))
+ if (mmap_write_lock_killable(current->mm))
return -EINTR;
locked += current->mm->locked_vm;
@@ -705,7 +705,7 @@ static __must_check int do_mlock(unsigned long start, size_t len, vm_flags_t fla
if ((locked <= lock_limit) || capable(CAP_IPC_LOCK))
error = apply_vma_lock_flags(start, len, flags);
- up_write(&current->mm->mmap_sem);
+ mmap_write_unlock(current->mm);
if (error)
return error;
@@ -742,10 +742,10 @@ SYSCALL_DEFINE2(munlock, unsigned long, start, size_t, len)
len = PAGE_ALIGN(len + (offset_in_page(start)));
start &= PAGE_MASK;
- if (down_write_killable(&current->mm->mmap_sem))
+ if (mmap_write_lock_killable(current->mm))
return -EINTR;
ret = apply_vma_lock_flags(start, len, 0);
- up_write(&current->mm->mmap_sem);
+ mmap_write_unlock(current->mm);
return ret;
}
@@ -811,14 +811,14 @@ SYSCALL_DEFINE1(mlockall, int, flags)
lock_limit = rlimit(RLIMIT_MEMLOCK);
lock_limit >>= PAGE_SHIFT;
- if (down_write_killable(&current->mm->mmap_sem))
+ if (mmap_write_lock_killable(current->mm))
return -EINTR;
ret = -ENOMEM;
if (!(flags & MCL_CURRENT) || (current->mm->total_vm <= lock_limit) ||
capable(CAP_IPC_LOCK))
ret = apply_mlockall_flags(flags);
- up_write(&current->mm->mmap_sem);
+ mmap_write_unlock(current->mm);
if (!ret && (flags & MCL_CURRENT))
mm_populate(0, TASK_SIZE);
@@ -829,10 +829,10 @@ SYSCALL_DEFINE0(munlockall)
{
int ret;
- if (down_write_killable(&current->mm->mmap_sem))
+ if (mmap_write_lock_killable(current->mm))
return -EINTR;
ret = apply_mlockall_flags(0);
- up_write(&current->mm->mmap_sem);
+ mmap_write_unlock(current->mm);
return ret;
}
diff --git a/mm/mmap.c b/mm/mmap.c
index 39bd60c20a82..59a4682ebf3f 100644
--- a/mm/mmap.c
+++ b/mm/mmap.c
@@ -132,7 +132,7 @@ void vma_set_page_prot(struct vm_area_struct *vma)
vm_flags &= ~VM_SHARED;
vm_page_prot = vm_pgprot_modify(vm_page_prot, vm_flags);
}
- /* remove_protection_ptes reads vma->vm_page_prot without mmap_sem */
+ /* remove_protection_ptes reads vma->vm_page_prot without mmap_lock */
WRITE_ONCE(vma->vm_page_prot, vm_page_prot);
}
@@ -198,7 +198,7 @@ SYSCALL_DEFINE1(brk, unsigned long, brk)
bool downgraded = false;
LIST_HEAD(uf);
- if (down_write_killable(&mm->mmap_sem))
+ if (mmap_write_lock_killable(mm))
return -EINTR;
origbrk = mm->brk;
@@ -238,14 +238,14 @@ SYSCALL_DEFINE1(brk, unsigned long, brk)
/*
* Always allow shrinking brk.
- * __do_munmap() may downgrade mmap_sem to read.
+ * __do_munmap() may downgrade mmap_lock to read.
*/
if (brk <= mm->brk) {
int ret;
/*
- * mm->brk must to be protected by write mmap_sem so update it
- * before downgrading mmap_sem. When __do_munmap() fails,
+ * mm->brk must to be protected by write mmap_lock so update it
+ * before downgrading mmap_lock. When __do_munmap() fails,
* mm->brk will be restored from origbrk.
*/
mm->brk = brk;
@@ -272,9 +272,9 @@ SYSCALL_DEFINE1(brk, unsigned long, brk)
success:
populate = newbrk > oldbrk && (mm->def_flags & VM_LOCKED) != 0;
if (downgraded)
- up_read(&mm->mmap_sem);
+ mmap_read_unlock(mm);
else
- up_write(&mm->mmap_sem);
+ mmap_write_unlock(mm);
userfaultfd_unmap_complete(mm, &uf);
if (populate)
mm_populate(oldbrk, newbrk - oldbrk);
@@ -282,7 +282,7 @@ success:
out:
retval = origbrk;
- up_write(&mm->mmap_sem);
+ mmap_write_unlock(mm);
return retval;
}
@@ -505,7 +505,7 @@ static __always_inline void vma_rb_erase(struct vm_area_struct *vma,
* After the update, the vma will be reinserted using
* anon_vma_interval_tree_post_update_vma().
*
- * The entire update must be protected by exclusive mmap_sem and by
+ * The entire update must be protected by exclusive mmap_lock and by
* the root anon_vma's mutex.
*/
static inline void
@@ -1361,7 +1361,7 @@ static inline bool file_mmap_ok(struct file *file, struct inode *inode,
}
/*
- * The caller must hold down_write(&current->mm->mmap_sem).
+ * The caller must write-lock current->mm->mmap_lock.
*/
unsigned long do_mmap(struct file *file, unsigned long addr,
unsigned long len, unsigned long prot,
@@ -2371,7 +2371,7 @@ int expand_upwards(struct vm_area_struct *vma, unsigned long address)
/*
* vma->vm_start/vm_end cannot change under us because the caller
- * is required to hold the mmap_sem in read mode. We need the
+ * is required to hold the mmap_lock in read mode. We need the
* anon_vma lock to serialize against concurrent expand_stacks.
*/
anon_vma_lock_write(vma->anon_vma);
@@ -2389,7 +2389,7 @@ int expand_upwards(struct vm_area_struct *vma, unsigned long address)
if (!error) {
/*
* vma_gap_update() doesn't support concurrent
- * updates, but we only hold a shared mmap_sem
+ * updates, but we only hold a shared mmap_lock
* lock here, so we need to protect against
* concurrent vma expansions.
* anon_vma_lock_write() doesn't help here, as
@@ -2451,7 +2451,7 @@ int expand_downwards(struct vm_area_struct *vma,
/*
* vma->vm_start/vm_end cannot change under us because the caller
- * is required to hold the mmap_sem in read mode. We need the
+ * is required to hold the mmap_lock in read mode. We need the
* anon_vma lock to serialize against concurrent expand_stacks.
*/
anon_vma_lock_write(vma->anon_vma);
@@ -2469,7 +2469,7 @@ int expand_downwards(struct vm_area_struct *vma,
if (!error) {
/*
* vma_gap_update() doesn't support concurrent
- * updates, but we only hold a shared mmap_sem
+ * updates, but we only hold a shared mmap_lock
* lock here, so we need to protect against
* concurrent vma expansions.
* anon_vma_lock_write() doesn't help here, as
@@ -2828,7 +2828,7 @@ int __do_munmap(struct mm_struct *mm, unsigned long start, size_t len,
detach_vmas_to_be_unmapped(mm, vma, prev, end);
if (downgrade)
- downgrade_write(&mm->mmap_sem);
+ mmap_write_downgrade(mm);
unmap_region(mm, vma, prev, start, end);
@@ -2850,20 +2850,20 @@ static int __vm_munmap(unsigned long start, size_t len, bool downgrade)
struct mm_struct *mm = current->mm;
LIST_HEAD(uf);
- if (down_write_killable(&mm->mmap_sem))
+ if (mmap_write_lock_killable(mm))
return -EINTR;
ret = __do_munmap(mm, start, len, &uf, downgrade);
/*
- * Returning 1 indicates mmap_sem is downgraded.
+ * Returning 1 indicates mmap_lock is downgraded.
* But 1 is not legal return value of vm_munmap() and munmap(), reset
* it to 0 before return.
*/
if (ret == 1) {
- up_read(&mm->mmap_sem);
+ mmap_read_unlock(mm);
ret = 0;
} else
- up_write(&mm->mmap_sem);
+ mmap_write_unlock(mm);
userfaultfd_unmap_complete(mm, &uf);
return ret;
@@ -2911,7 +2911,7 @@ SYSCALL_DEFINE5(remap_file_pages, unsigned long, start, unsigned long, size,
if (pgoff + (size >> PAGE_SHIFT) < pgoff)
return ret;
- if (down_write_killable(&mm->mmap_sem))
+ if (mmap_write_lock_killable(mm))
return -EINTR;
vma = find_vma(mm, start);
@@ -2974,7 +2974,7 @@ SYSCALL_DEFINE5(remap_file_pages, unsigned long, start, unsigned long, size,
prot, flags, pgoff, &populate, NULL);
fput(file);
out:
- up_write(&mm->mmap_sem);
+ mmap_write_unlock(mm);
if (populate)
mm_populate(ret, populate);
if (!IS_ERR_VALUE(ret))
@@ -3074,12 +3074,12 @@ int vm_brk_flags(unsigned long addr, unsigned long request, unsigned long flags)
if (!len)
return 0;
- if (down_write_killable(&mm->mmap_sem))
+ if (mmap_write_lock_killable(mm))
return -EINTR;
ret = do_brk_flags(addr, len, flags, &uf);
populate = ((mm->def_flags & VM_LOCKED) != 0);
- up_write(&mm->mmap_sem);
+ mmap_write_unlock(mm);
userfaultfd_unmap_complete(mm, &uf);
if (populate && !ret)
mm_populate(addr, len);
@@ -3107,12 +3107,12 @@ void exit_mmap(struct mm_struct *mm)
/*
* Manually reap the mm to free as much memory as possible.
* Then, as the oom reaper does, set MMF_OOM_SKIP to disregard
- * this mm from further consideration. Taking mm->mmap_sem for
+ * this mm from further consideration. Taking mm->mmap_lock for
* write after setting MMF_OOM_SKIP will guarantee that the oom
- * reaper will not run on this mm again after mmap_sem is
+ * reaper will not run on this mm again after mmap_lock is
* dropped.
*
- * Nothing can be holding mm->mmap_sem here and the above call
+ * Nothing can be holding mm->mmap_lock here and the above call
* to mmu_notifier_release(mm) ensures mmu notifier callbacks in
* __oom_reap_task_mm() will not block.
*
@@ -3123,8 +3123,8 @@ void exit_mmap(struct mm_struct *mm)
(void)__oom_reap_task_mm(mm);
set_bit(MMF_OOM_SKIP, &mm->flags);
- down_write(&mm->mmap_sem);
- up_write(&mm->mmap_sem);
+ mmap_write_lock(mm);
+ mmap_write_unlock(mm);
}
if (mm->locked_vm) {
@@ -3437,7 +3437,7 @@ bool vma_is_special_mapping(const struct vm_area_struct *vma,
}
/*
- * Called with mm->mmap_sem held for writing.
+ * Called with mm->mmap_lock held for writing.
* Insert a new vma covering the given region, with the given flags.
* Its pages are supplied by the given array of struct page *.
* The array can be shorter than len >> PAGE_SHIFT if it's null-terminated.
@@ -3474,7 +3474,7 @@ static void vm_lock_anon_vma(struct mm_struct *mm, struct anon_vma *anon_vma)
* The LSB of head.next can't change from under us
* because we hold the mm_all_locks_mutex.
*/
- down_write_nest_lock(&anon_vma->root->rwsem, &mm->mmap_sem);
+ down_write_nest_lock(&anon_vma->root->rwsem, &mm->mmap_lock);
/*
* We can safely modify head.next after taking the
* anon_vma->root->rwsem. If some other vma in this mm shares
@@ -3504,7 +3504,7 @@ static void vm_lock_mapping(struct mm_struct *mm, struct address_space *mapping)
*/
if (test_and_set_bit(AS_MM_ALL_LOCKS, &mapping->flags))
BUG();
- down_write_nest_lock(&mapping->i_mmap_rwsem, &mm->mmap_sem);
+ down_write_nest_lock(&mapping->i_mmap_rwsem, &mm->mmap_lock);
}
}
@@ -3513,11 +3513,11 @@ static void vm_lock_mapping(struct mm_struct *mm, struct address_space *mapping)
* operations that could ever happen on a certain mm. This includes
* vmtruncate, try_to_unmap, and all page faults.
*
- * The caller must take the mmap_sem in write mode before calling
+ * The caller must take the mmap_lock in write mode before calling
* mm_take_all_locks(). The caller isn't allowed to release the
- * mmap_sem until mm_drop_all_locks() returns.
+ * mmap_lock until mm_drop_all_locks() returns.
*
- * mmap_sem in write mode is required in order to block all operations
+ * mmap_lock in write mode is required in order to block all operations
* that could modify pagetables and free pages without need of
* altering the vma layout. It's also needed in write mode to avoid new
* anon_vmas to be associated with existing vmas.
@@ -3550,7 +3550,7 @@ int mm_take_all_locks(struct mm_struct *mm)
struct vm_area_struct *vma;
struct anon_vma_chain *avc;
- BUG_ON(down_read_trylock(&mm->mmap_sem));
+ BUG_ON(mmap_read_trylock(mm));
mutex_lock(&mm_all_locks_mutex);
@@ -3622,7 +3622,7 @@ static void vm_unlock_mapping(struct address_space *mapping)
}
/*
- * The mmap_sem cannot be released by the caller until
+ * The mmap_lock cannot be released by the caller until
* mm_drop_all_locks() returns.
*/
void mm_drop_all_locks(struct mm_struct *mm)
@@ -3630,7 +3630,7 @@ void mm_drop_all_locks(struct mm_struct *mm)
struct vm_area_struct *vma;
struct anon_vma_chain *avc;
- BUG_ON(down_read_trylock(&mm->mmap_sem));
+ BUG_ON(mmap_read_trylock(mm));
BUG_ON(!mutex_is_locked(&mm_all_locks_mutex));
for (vma = mm->mmap; vma; vma = vma->vm_next) {
diff --git a/mm/mmu_gather.c b/mm/mmu_gather.c
index a3538cb2bcbe..03c33c93a582 100644
--- a/mm/mmu_gather.c
+++ b/mm/mmu_gather.c
@@ -301,7 +301,7 @@ void tlb_finish_mmu(struct mmu_gather *tlb,
{
/*
* If there are parallel threads are doing PTE changes on same range
- * under non-exclusive lock (e.g., mmap_sem read-side) but defer TLB
+ * under non-exclusive lock (e.g., mmap_lock read-side) but defer TLB
* flush by batching, one thread may end up seeing inconsistent PTEs
* and result in having stale TLB entries. So flush TLB forcefully
* if we detect parallel PTE batching threads.
diff --git a/mm/mmu_notifier.c b/mm/mmu_notifier.c
index 06852b896fa6..352bb9f3ecc0 100644
--- a/mm/mmu_notifier.c
+++ b/mm/mmu_notifier.c
@@ -599,7 +599,7 @@ void __mmu_notifier_invalidate_range(struct mm_struct *mm,
}
/*
- * Same as mmu_notifier_register but here the caller must hold the mmap_sem in
+ * Same as mmu_notifier_register but here the caller must hold the mmap_lock in
* write mode. A NULL mn signals the notifier is being registered for itree
* mode.
*/
@@ -609,7 +609,7 @@ int __mmu_notifier_register(struct mmu_notifier *subscription,
struct mmu_notifier_subscriptions *subscriptions = NULL;
int ret;
- lockdep_assert_held_write(&mm->mmap_sem);
+ mmap_assert_write_locked(mm);
BUG_ON(atomic_read(&mm->mm_users) <= 0);
if (IS_ENABLED(CONFIG_LOCKDEP)) {
@@ -623,7 +623,7 @@ int __mmu_notifier_register(struct mmu_notifier *subscription,
/*
* kmalloc cannot be called under mm_take_all_locks(), but we
* know that mm->notifier_subscriptions can't change while we
- * hold the write side of the mmap_sem.
+ * hold the write side of the mmap_lock.
*/
subscriptions = kzalloc(
sizeof(struct mmu_notifier_subscriptions), GFP_KERNEL);
@@ -655,7 +655,7 @@ int __mmu_notifier_register(struct mmu_notifier *subscription,
* readers. acquire can only be used while holding the mmgrab or
* mmget, and is safe because once created the
* mmu_notifier_subscriptions is not freed until the mm is destroyed.
- * As above, users holding the mmap_sem or one of the
+ * As above, users holding the mmap_lock or one of the
* mm_take_all_locks() do not need to use acquire semantics.
*/
if (subscriptions)
@@ -689,7 +689,7 @@ EXPORT_SYMBOL_GPL(__mmu_notifier_register);
* @mn: The notifier to attach
* @mm: The mm to attach the notifier to
*
- * Must not hold mmap_sem nor any other VM related lock when calling
+ * Must not hold mmap_lock nor any other VM related lock when calling
* this registration function. Must also ensure mm_users can't go down
* to zero while this runs to avoid races with mmu_notifier_release,
* so mm has to be current->mm or the mm should be pinned safely such
@@ -708,9 +708,9 @@ int mmu_notifier_register(struct mmu_notifier *subscription,
{
int ret;
- down_write(&mm->mmap_sem);
+ mmap_write_lock(mm);
ret = __mmu_notifier_register(subscription, mm);
- up_write(&mm->mmap_sem);
+ mmap_write_unlock(mm);
return ret;
}
EXPORT_SYMBOL_GPL(mmu_notifier_register);
@@ -750,7 +750,7 @@ find_get_mmu_notifier(struct mm_struct *mm, const struct mmu_notifier_ops *ops)
* are the same.
*
* Each call to mmu_notifier_get() must be paired with a call to
- * mmu_notifier_put(). The caller must hold the write side of mm->mmap_sem.
+ * mmu_notifier_put(). The caller must hold the write side of mm->mmap_lock.
*
* While the caller has a mmu_notifier get the mm pointer will remain valid,
* and can be converted to an active mm pointer via mmget_not_zero().
@@ -761,7 +761,7 @@ struct mmu_notifier *mmu_notifier_get_locked(const struct mmu_notifier_ops *ops,
struct mmu_notifier *subscription;
int ret;
- lockdep_assert_held_write(&mm->mmap_sem);
+ mmap_assert_write_locked(mm);
if (mm->notifier_subscriptions) {
subscription = find_get_mmu_notifier(mm, ops);
@@ -983,7 +983,7 @@ int mmu_interval_notifier_insert(struct mmu_interval_notifier *interval_sub,
struct mmu_notifier_subscriptions *subscriptions;
int ret;
- might_lock(&mm->mmap_sem);
+ might_lock(&mm->mmap_lock);
subscriptions = smp_load_acquire(&mm->notifier_subscriptions);
if (!subscriptions || !subscriptions->has_itree) {
@@ -1006,7 +1006,7 @@ int mmu_interval_notifier_insert_locked(
mm->notifier_subscriptions;
int ret;
- lockdep_assert_held_write(&mm->mmap_sem);
+ mmap_assert_write_locked(mm);
if (!subscriptions || !subscriptions->has_itree) {
ret = __mmu_notifier_register(NULL, mm);
diff --git a/mm/mprotect.c b/mm/mprotect.c
index 494192ca954b..ce8b8a5eacbb 100644
--- a/mm/mprotect.c
+++ b/mm/mprotect.c
@@ -28,7 +28,7 @@
#include <linux/ksm.h>
#include <linux/uaccess.h>
#include <linux/mm_inline.h>
-#include <asm/pgtable.h>
+#include <linux/pgtable.h>
#include <asm/cacheflush.h>
#include <asm/mmu_context.h>
#include <asm/tlbflush.h>
@@ -49,7 +49,7 @@ static unsigned long change_pte_range(struct vm_area_struct *vma, pmd_t *pmd,
bool uffd_wp_resolve = cp_flags & MM_CP_UFFD_WP_RESOLVE;
/*
- * Can be called with only the mmap_sem for reading by
+ * Can be called with only the mmap_lock for reading by
* prot_numa so we must check the pmd isn't constantly
* changing from under us from pmd_none to pmd_trans_huge
* and/or the other way around.
@@ -59,7 +59,7 @@ static unsigned long change_pte_range(struct vm_area_struct *vma, pmd_t *pmd,
/*
* The pmd points to a regular pte so the pmd can't change
- * from under us even if the mmap_sem is only hold for
+ * from under us even if the mmap_lock is only hold for
* reading.
*/
pte = pte_offset_map_lock(vma->vm_mm, pmd, addr, &ptl);
@@ -228,7 +228,7 @@ static inline unsigned long change_pmd_range(struct vm_area_struct *vma,
next = pmd_addr_end(addr, end);
/*
- * Automatic NUMA balancing walks the tables with mmap_sem
+ * Automatic NUMA balancing walks the tables with mmap_lock
* held for read. It's possible a parallel update to occur
* between pmd_trans_huge() and a pmd_none_or_clear_bad()
* check leading to a false positive and clearing.
@@ -477,7 +477,7 @@ mprotect_fixup(struct vm_area_struct *vma, struct vm_area_struct **pprev,
success:
/*
- * vm_flags and vm_page_prot are protected by the mmap_sem
+ * vm_flags and vm_page_prot are protected by the mmap_lock
* held in write mode.
*/
vma->vm_flags = newflags;
@@ -538,7 +538,7 @@ static int do_mprotect_pkey(unsigned long start, size_t len,
reqprot = prot;
- if (down_write_killable(&current->mm->mmap_sem))
+ if (mmap_write_lock_killable(current->mm))
return -EINTR;
/*
@@ -628,7 +628,7 @@ static int do_mprotect_pkey(unsigned long start, size_t len,
prot = reqprot;
}
out:
- up_write(&current->mm->mmap_sem);
+ mmap_write_unlock(current->mm);
return error;
}
@@ -658,7 +658,7 @@ SYSCALL_DEFINE2(pkey_alloc, unsigned long, flags, unsigned long, init_val)
if (init_val & ~PKEY_ACCESS_MASK)
return -EINVAL;
- down_write(&current->mm->mmap_sem);
+ mmap_write_lock(current->mm);
pkey = mm_pkey_alloc(current->mm);
ret = -ENOSPC;
@@ -672,7 +672,7 @@ SYSCALL_DEFINE2(pkey_alloc, unsigned long, flags, unsigned long, init_val)
}
ret = pkey;
out:
- up_write(&current->mm->mmap_sem);
+ mmap_write_unlock(current->mm);
return ret;
}
@@ -680,9 +680,9 @@ SYSCALL_DEFINE1(pkey_free, int, pkey)
{
int ret;
- down_write(&current->mm->mmap_sem);
+ mmap_write_lock(current->mm);
ret = mm_pkey_free(current->mm, pkey);
- up_write(&current->mm->mmap_sem);
+ mmap_write_unlock(current->mm);
/*
* We could provie warnings or errors if any VMA still
diff --git a/mm/mremap.c b/mm/mremap.c
index 7d69e3fe51aa..5dd572d57ca9 100644
--- a/mm/mremap.c
+++ b/mm/mremap.c
@@ -146,7 +146,7 @@ static void move_ptes(struct vm_area_struct *vma, pmd_t *old_pmd,
/*
* We don't have to worry about the ordering of src and dst
- * pte locks because exclusive mmap_sem prevents deadlock.
+ * pte locks because exclusive mmap_lock prevents deadlock.
*/
old_pte = pte_offset_map_lock(mm, old_pmd, old_addr, &old_ptl);
new_pte = pte_offset_map(new_pmd, new_addr);
@@ -213,7 +213,7 @@ static bool move_normal_pmd(struct vm_area_struct *vma, unsigned long old_addr,
/*
* We don't have to worry about the ordering of src and dst
- * ptlocks because exclusive mmap_sem prevents deadlock.
+ * ptlocks because exclusive mmap_lock prevents deadlock.
*/
old_ptl = pmd_lock(vma->vm_mm, old_pmd);
new_ptl = pmd_lockptr(mm, new_pmd);
@@ -696,7 +696,7 @@ SYSCALL_DEFINE5(mremap, unsigned long, addr, unsigned long, old_len,
if (!new_len)
return ret;
- if (down_write_killable(&current->mm->mmap_sem))
+ if (mmap_write_lock_killable(current->mm))
return -EINTR;
if (flags & (MREMAP_FIXED | MREMAP_DONTUNMAP)) {
@@ -710,7 +710,7 @@ SYSCALL_DEFINE5(mremap, unsigned long, addr, unsigned long, old_len,
* Always allow a shrinking remap: that just unmaps
* the unnecessary pages..
* __do_munmap does all the needed commit accounting, and
- * downgrades mmap_sem to read if so directed.
+ * downgrades mmap_lock to read if so directed.
*/
if (old_len >= new_len) {
int retval;
@@ -720,7 +720,7 @@ SYSCALL_DEFINE5(mremap, unsigned long, addr, unsigned long, old_len,
if (retval < 0 && old_len != new_len) {
ret = retval;
goto out;
- /* Returning 1 indicates mmap_sem is downgraded to read. */
+ /* Returning 1 indicates mmap_lock is downgraded to read. */
} else if (retval == 1)
downgraded = true;
ret = addr;
@@ -788,9 +788,9 @@ out:
locked = false;
}
if (downgraded)
- up_read(&current->mm->mmap_sem);
+ mmap_read_unlock(current->mm);
else
- up_write(&current->mm->mmap_sem);
+ mmap_write_unlock(current->mm);
if (locked && new_len > old_len)
mm_populate(new_addr + old_len, new_len - old_len);
userfaultfd_unmap_complete(mm, &uf_unmap_early);
diff --git a/mm/msync.c b/mm/msync.c
index c3bd3e75f687..69c6d2029531 100644
--- a/mm/msync.c
+++ b/mm/msync.c
@@ -57,7 +57,7 @@ SYSCALL_DEFINE3(msync, unsigned long, start, size_t, len, int, flags)
* If the interval [start,end) covers some unmapped address ranges,
* just ignore them, but return -ENOMEM at the end.
*/
- down_read(&mm->mmap_sem);
+ mmap_read_lock(mm);
vma = find_vma(mm, start);
for (;;) {
struct file *file;
@@ -88,12 +88,12 @@ SYSCALL_DEFINE3(msync, unsigned long, start, size_t, len, int, flags)
if ((flags & MS_SYNC) && file &&
(vma->vm_flags & VM_SHARED)) {
get_file(file);
- up_read(&mm->mmap_sem);
+ mmap_read_unlock(mm);
error = vfs_fsync_range(file, fstart, fend, 1);
fput(file);
if (error || start >= end)
goto out;
- down_read(&mm->mmap_sem);
+ mmap_read_lock(mm);
vma = find_vma(mm, start);
} else {
if (start >= end) {
@@ -104,7 +104,7 @@ SYSCALL_DEFINE3(msync, unsigned long, start, size_t, len, int, flags)
}
}
out_unlock:
- up_read(&mm->mmap_sem);
+ mmap_read_unlock(mm);
out:
return error ? : unmapped_error;
}
diff --git a/mm/nommu.c b/mm/nommu.c
index 062dc1c90d21..cdcad5d61dd1 100644
--- a/mm/nommu.c
+++ b/mm/nommu.c
@@ -172,11 +172,11 @@ static void *__vmalloc_user_flags(unsigned long size, gfp_t flags)
if (ret) {
struct vm_area_struct *vma;
- down_write(&current->mm->mmap_sem);
+ mmap_write_lock(current->mm);
vma = find_vma(current->mm, (unsigned long)ret);
if (vma)
vma->vm_flags |= VM_USERMAP;
- up_write(&current->mm->mmap_sem);
+ mmap_write_unlock(current->mm);
}
return ret;
@@ -582,7 +582,7 @@ static void put_nommu_region(struct vm_region *region)
* add a VMA into a process's mm_struct in the appropriate place in the list
* and tree and add to the address space's page tree also if not an anonymous
* page
- * - should be called with mm->mmap_sem held writelocked
+ * - should be called with mm->mmap_lock held writelocked
*/
static void add_vma_to_mm(struct mm_struct *mm, struct vm_area_struct *vma)
{
@@ -696,7 +696,7 @@ static void delete_vma(struct mm_struct *mm, struct vm_area_struct *vma)
/*
* look up the first VMA in which addr resides, NULL if none
- * - should be called with mm->mmap_sem at least held readlocked
+ * - should be called with mm->mmap_lock at least held readlocked
*/
struct vm_area_struct *find_vma(struct mm_struct *mm, unsigned long addr)
{
@@ -742,7 +742,7 @@ int expand_stack(struct vm_area_struct *vma, unsigned long address)
/*
* look up the first VMA exactly that exactly matches addr
- * - should be called with mm->mmap_sem at least held readlocked
+ * - should be called with mm->mmap_lock at least held readlocked
*/
static struct vm_area_struct *find_vma_exact(struct mm_struct *mm,
unsigned long addr,
@@ -1542,9 +1542,9 @@ int vm_munmap(unsigned long addr, size_t len)
struct mm_struct *mm = current->mm;
int ret;
- down_write(&mm->mmap_sem);
+ mmap_write_lock(mm);
ret = do_munmap(mm, addr, len, NULL);
- up_write(&mm->mmap_sem);
+ mmap_write_unlock(mm);
return ret;
}
EXPORT_SYMBOL(vm_munmap);
@@ -1631,9 +1631,9 @@ SYSCALL_DEFINE5(mremap, unsigned long, addr, unsigned long, old_len,
{
unsigned long ret;
- down_write(&current->mm->mmap_sem);
+ mmap_write_lock(current->mm);
ret = do_mremap(addr, old_len, new_len, flags, new_addr);
- up_write(&current->mm->mmap_sem);
+ mmap_write_unlock(current->mm);
return ret;
}
@@ -1705,7 +1705,7 @@ int __access_remote_vm(struct task_struct *tsk, struct mm_struct *mm,
struct vm_area_struct *vma;
int write = gup_flags & FOLL_WRITE;
- if (down_read_killable(&mm->mmap_sem))
+ if (mmap_read_lock_killable(mm))
return 0;
/* the access must start within one of the target process's mappings */
@@ -1728,7 +1728,7 @@ int __access_remote_vm(struct task_struct *tsk, struct mm_struct *mm,
len = 0;
}
- up_read(&mm->mmap_sem);
+ mmap_read_unlock(mm);
return len;
}
diff --git a/mm/oom_kill.c b/mm/oom_kill.c
index 4daedf7b91f6..b4e9491cb320 100644
--- a/mm/oom_kill.c
+++ b/mm/oom_kill.c
@@ -569,7 +569,7 @@ static bool oom_reap_task_mm(struct task_struct *tsk, struct mm_struct *mm)
{
bool ret = true;
- if (!down_read_trylock(&mm->mmap_sem)) {
+ if (!mmap_read_trylock(mm)) {
trace_skip_task_reaping(tsk->pid);
return false;
}
@@ -577,8 +577,8 @@ static bool oom_reap_task_mm(struct task_struct *tsk, struct mm_struct *mm)
/*
* MMF_OOM_SKIP is set by exit_mmap when the OOM reaper can't
* work on the mm anymore. The check for MMF_OOM_SKIP must run
- * under mmap_sem for reading because it serializes against the
- * down_write();up_write() cycle in exit_mmap().
+ * under mmap_lock for reading because it serializes against the
+ * mmap_write_lock();mmap_write_unlock() cycle in exit_mmap().
*/
if (test_bit(MMF_OOM_SKIP, &mm->flags)) {
trace_skip_task_reaping(tsk->pid);
@@ -600,7 +600,7 @@ static bool oom_reap_task_mm(struct task_struct *tsk, struct mm_struct *mm)
out_finish:
trace_finish_task_reaping(tsk->pid);
out_unlock:
- up_read(&mm->mmap_sem);
+ mmap_read_unlock(mm);
return ret;
}
@@ -611,7 +611,7 @@ static void oom_reap_task(struct task_struct *tsk)
int attempts = 0;
struct mm_struct *mm = tsk->signal->oom_mm;
- /* Retry the down_read_trylock(mmap_sem) a few times */
+ /* Retry the mmap_read_trylock(mm) a few times */
while (attempts++ < MAX_OOM_REAP_RETRIES && !oom_reap_task_mm(tsk, mm))
schedule_timeout_idle(HZ/10);
@@ -629,7 +629,7 @@ done:
/*
* Hide this mm from OOM killer because it has been either reaped or
- * somebody can't call up_write(mmap_sem).
+ * somebody can't call mmap_write_unlock(mm).
*/
set_bit(MMF_OOM_SKIP, &mm->flags);
@@ -898,7 +898,7 @@ static void __oom_kill_process(struct task_struct *victim, const char *message)
/*
* Kill all user processes sharing victim->mm in other thread groups, if
* any. They don't get access to memory reserves, though, to avoid
- * depletion of all memory. This prevents mm->mmap_sem livelock when an
+ * depletion of all memory. This prevents mm->mmap_lock livelock when an
* oom killed thread cannot exit because it requires the semaphore and
* its contended by another thread trying to allocate memory itself.
* That thread will now get access to memory reserves since it has a
diff --git a/mm/page_io.c b/mm/page_io.c
index 76965be1d40e..e8726f3e3820 100644
--- a/mm/page_io.c
+++ b/mm/page_io.c
@@ -25,7 +25,6 @@
#include <linux/psi.h>
#include <linux/uio.h>
#include <linux/sched/task.h>
-#include <asm/pgtable.h>
static struct bio *get_swap_bio(gfp_t gfp_flags,
struct page *page, bio_end_io_t end_io)
diff --git a/mm/page_reporting.h b/mm/page_reporting.h
index aa6d37f4dc22..2c385dd4ddbd 100644
--- a/mm/page_reporting.h
+++ b/mm/page_reporting.h
@@ -7,7 +7,7 @@
#include <linux/page-isolation.h>
#include <linux/jump_label.h>
#include <linux/slab.h>
-#include <asm/pgtable.h>
+#include <linux/pgtable.h>
#include <linux/scatterlist.h>
#define PAGE_REPORTING_MIN_ORDER pageblock_order
diff --git a/mm/pagewalk.c b/mm/pagewalk.c
index 928df1638c30..e81640d9f177 100644
--- a/mm/pagewalk.c
+++ b/mm/pagewalk.c
@@ -373,7 +373,7 @@ static int __walk_page_range(unsigned long start, unsigned long end,
* caller-specific data to callbacks, @private should be helpful.
*
* Locking:
- * Callers of walk_page_range() and walk_page_vma() should hold @mm->mmap_sem,
+ * Callers of walk_page_range() and walk_page_vma() should hold @mm->mmap_lock,
* because these function traverse vma list and/or access to vma's data.
*/
int walk_page_range(struct mm_struct *mm, unsigned long start,
@@ -395,7 +395,7 @@ int walk_page_range(struct mm_struct *mm, unsigned long start,
if (!walk.mm)
return -EINVAL;
- lockdep_assert_held(&walk.mm->mmap_sem);
+ mmap_assert_locked(walk.mm);
vma = find_vma(walk.mm, start);
do {
@@ -453,7 +453,7 @@ int walk_page_range_novma(struct mm_struct *mm, unsigned long start,
if (start >= end || !walk.mm)
return -EINVAL;
- lockdep_assert_held(&walk.mm->mmap_sem);
+ mmap_assert_locked(walk.mm);
return __walk_page_range(start, end, &walk);
}
@@ -472,7 +472,7 @@ int walk_page_vma(struct vm_area_struct *vma, const struct mm_walk_ops *ops,
if (!walk.mm)
return -EINVAL;
- lockdep_assert_held(&walk.mm->mmap_sem);
+ mmap_assert_locked(walk.mm);
err = walk_page_test(vma->vm_start, vma->vm_end, &walk);
if (err > 0)
@@ -498,11 +498,11 @@ int walk_page_vma(struct vm_area_struct *vma, const struct mm_walk_ops *ops,
* Also see walk_page_range() for additional information.
*
* Locking:
- * This function can't require that the struct mm_struct::mmap_sem is held,
+ * This function can't require that the struct mm_struct::mmap_lock is held,
* since @mapping may be mapped by multiple processes. Instead
* @mapping->i_mmap_rwsem must be held. This might have implications in the
* callbacks, and it's up tho the caller to ensure that the
- * struct mm_struct::mmap_sem is not needed.
+ * struct mm_struct::mmap_lock is not needed.
*
* Also this means that a caller can't rely on the struct
* vm_area_struct::vm_flags to be constant across a call,
diff --git a/mm/pgtable-generic.c b/mm/pgtable-generic.c
index d18f0e1b6792..9578db83e312 100644
--- a/mm/pgtable-generic.c
+++ b/mm/pgtable-generic.c
@@ -2,15 +2,15 @@
/*
* mm/pgtable-generic.c
*
- * Generic pgtable methods declared in asm-generic/pgtable.h
+ * Generic pgtable methods declared in linux/pgtable.h
*
* Copyright (C) 2010 Linus Torvalds
*/
#include <linux/pagemap.h>
#include <linux/hugetlb.h>
+#include <linux/pgtable.h>
#include <asm/tlb.h>
-#include <asm-generic/pgtable.h>
/*
* If a p?d_bad entry is found while walking page tables, report
@@ -53,7 +53,7 @@ void pmd_clear_bad(pmd_t *pmd)
#ifndef __HAVE_ARCH_PTEP_SET_ACCESS_FLAGS
/*
- * Only sets the access flags (dirty, accessed), as well as write
+ * Only sets the access flags (dirty, accessed), as well as write
* permission. Furthermore, we know it always gets set to a "more
* permissive" setting, which allows most architectures to optimize
* this. We return whether the PTE actually changed, which in turn
diff --git a/mm/process_vm_access.c b/mm/process_vm_access.c
index 74e957e302fe..cc85ce81914a 100644
--- a/mm/process_vm_access.c
+++ b/mm/process_vm_access.c
@@ -104,12 +104,12 @@ static int process_vm_rw_single_vec(unsigned long addr,
* access remotely because task/mm might not
* current/current->mm
*/
- down_read(&mm->mmap_sem);
+ mmap_read_lock(mm);
pinned_pages = pin_user_pages_remote(task, mm, pa, pinned_pages,
flags, process_pages,
NULL, &locked);
if (locked)
- up_read(&mm->mmap_sem);
+ mmap_read_unlock(mm);
if (pinned_pages <= 0)
return -EFAULT;
diff --git a/mm/ptdump.c b/mm/ptdump.c
index f4ce916f5602..ba88ec43ff21 100644
--- a/mm/ptdump.c
+++ b/mm/ptdump.c
@@ -141,13 +141,13 @@ void ptdump_walk_pgd(struct ptdump_state *st, struct mm_struct *mm, pgd_t *pgd)
{
const struct ptdump_range *range = st->range;
- down_read(&mm->mmap_sem);
+ mmap_read_lock(mm);
while (range->start != range->end) {
walk_page_range_novma(mm, range->start, range->end,
&ptdump_ops, pgd, st);
range++;
}
- up_read(&mm->mmap_sem);
+ mmap_read_unlock(mm);
/* Flush out the last page */
st->note_page(st, 0, -1, 0);
diff --git a/mm/rmap.c b/mm/rmap.c
index ad4a0fdcc94c..5fe2dedce1fc 100644
--- a/mm/rmap.c
+++ b/mm/rmap.c
@@ -21,7 +21,7 @@
* Lock ordering in mm:
*
* inode->i_mutex (while writing or truncating, not reading or faulting)
- * mm->mmap_sem
+ * mm->mmap_lock
* page->flags PG_locked (lock_page) * (see huegtlbfs below)
* hugetlbfs_i_mmap_rwsem_key (in huge_pmd_share)
* mapping->i_mmap_rwsem
@@ -177,7 +177,7 @@ static void anon_vma_chain_link(struct vm_area_struct *vma,
* to do any locking for the common case of already having
* an anon_vma.
*
- * This must be called with the mmap_sem held for reading.
+ * This must be called with the mmap_lock held for reading.
*/
int __anon_vma_prepare(struct vm_area_struct *vma)
{
@@ -1444,7 +1444,7 @@ static bool try_to_unmap_one(struct page *page, struct vm_area_struct *vma,
if (!PageTransCompound(page)) {
/*
* Holding pte lock, we do *not* need
- * mmap_sem here
+ * mmap_lock here
*/
mlock_vma_page(page);
}
@@ -1817,7 +1817,7 @@ static struct anon_vma *rmap_walk_anon_lock(struct page *page,
/*
* Note: remove_migration_ptes() cannot use page_lock_anon_vma_read()
* because that depends on page_mapped(); but not all its usages
- * are holding mmap_sem. Users without mmap_sem are required to
+ * are holding mmap_lock. Users without mmap_lock are required to
* take a reference count to prevent the anon_vma disappearing
*/
anon_vma = page_anon_vma(page);
@@ -1837,7 +1837,7 @@ static struct anon_vma *rmap_walk_anon_lock(struct page *page,
* Find all the mappings of a page using the mapping pointer and the vma chains
* contained in the anon_vma struct it points to.
*
- * When called from try_to_munlock(), the mmap_sem of the mm containing the vma
+ * When called from try_to_munlock(), the mmap_lock of the mm containing the vma
* where the page was found will be held for write. So, we won't recheck
* vm_flags for that VMA. That should be OK, because that vma shouldn't be
* LOCKED.
@@ -1889,7 +1889,7 @@ static void rmap_walk_anon(struct page *page, struct rmap_walk_control *rwc,
* Find all the mappings of a page using the mapping pointer and the vma chains
* contained in the address_space struct it points to.
*
- * When called from try_to_munlock(), the mmap_sem of the mm containing the vma
+ * When called from try_to_munlock(), the mmap_lock of the mm containing the vma
* where the page was found will be held for write. So, we won't recheck
* vm_flags for that VMA. That should be OK, because that vma shouldn't be
* LOCKED.
diff --git a/mm/shmem.c b/mm/shmem.c
index ea95a3e46fbb..a0dbe62f8042 100644
--- a/mm/shmem.c
+++ b/mm/shmem.c
@@ -82,7 +82,6 @@ static struct vfsmount *shm_mnt;
#include <linux/uuid.h>
#include <linux/uaccess.h>
-#include <asm/pgtable.h>
#include "internal.h"
@@ -2320,7 +2319,7 @@ static int shmem_mfill_atomic_pte(struct mm_struct *dst_mm,
PAGE_SIZE);
kunmap_atomic(page_kaddr);
- /* fallback to copy_from_user outside mmap_sem */
+ /* fallback to copy_from_user outside mmap_lock */
if (unlikely(ret)) {
*pagep = page;
shmem_inode_unacct_blocks(inode, 1);
@@ -4137,7 +4136,7 @@ int shmem_zero_setup(struct vm_area_struct *vma)
loff_t size = vma->vm_end - vma->vm_start;
/*
- * Cloning a new file under mmap_sem leads to a lock ordering conflict
+ * Cloning a new file under mmap_lock leads to a lock ordering conflict
* between XFS directory reading and selinux: since this file is only
* accessible to the user through its mapping, use S_PRIVATE flag to
* bypass file security, in the same way as shmem_kernel_file_setup().
diff --git a/mm/sparse-vmemmap.c b/mm/sparse-vmemmap.c
index 200aef686722..0db7738d76e9 100644
--- a/mm/sparse-vmemmap.c
+++ b/mm/sparse-vmemmap.c
@@ -29,7 +29,6 @@
#include <linux/sched.h>
#include <asm/dma.h>
#include <asm/pgalloc.h>
-#include <asm/pgtable.h>
/*
* Allocate a block of memory to be used to back the virtual memory map
diff --git a/mm/sparse.c b/mm/sparse.c
index 6284328cd9f2..b2b9a3e34696 100644
--- a/mm/sparse.c
+++ b/mm/sparse.c
@@ -17,7 +17,6 @@
#include "internal.h"
#include <asm/dma.h>
#include <asm/pgalloc.h>
-#include <asm/pgtable.h>
/*
* Permanent SPARSEMEM data:
diff --git a/mm/swap_state.c b/mm/swap_state.c
index 9d20b00627af..e98ff460e9e9 100644
--- a/mm/swap_state.c
+++ b/mm/swap_state.c
@@ -22,7 +22,6 @@
#include <linux/swap_slots.h>
#include <linux/huge_mm.h>
-#include <asm/pgtable.h>
/*
* swapper_space is a fiction, retained to simplify the path through
@@ -553,7 +552,7 @@ static unsigned long swapin_nr_pages(unsigned long offset)
* This has been extended to use the NUMA policies from the mm triggering
* the readahead.
*
- * Caller must hold read mmap_sem if vmf->vma is not NULL.
+ * Caller must hold read mmap_lock if vmf->vma is not NULL.
*/
struct page *swap_cluster_readahead(swp_entry_t entry, gfp_t gfp_mask,
struct vm_fault *vmf)
@@ -735,7 +734,7 @@ static void swap_ra_info(struct vm_fault *vmf,
* Primitive swap readahead code. We simply read in a few pages whoes
* virtual addresses are around the fault address in the same vma.
*
- * Caller must hold read mmap_sem if vmf->vma is not NULL.
+ * Caller must hold read mmap_lock if vmf->vma is not NULL.
*
*/
static struct page *swap_vma_readahead(swp_entry_t fentry, gfp_t gfp_mask,
diff --git a/mm/swapfile.c b/mm/swapfile.c
index a3d191e205f2..987276c557d1 100644
--- a/mm/swapfile.c
+++ b/mm/swapfile.c
@@ -40,7 +40,6 @@
#include <linux/swap_slots.h>
#include <linux/sort.h>
-#include <asm/pgtable.h>
#include <asm/tlbflush.h>
#include <linux/swapops.h>
#include <linux/swap_cgroup.h>
@@ -2101,7 +2100,7 @@ static int unuse_mm(struct mm_struct *mm, unsigned int type,
struct vm_area_struct *vma;
int ret = 0;
- down_read(&mm->mmap_sem);
+ mmap_read_lock(mm);
for (vma = mm->mmap; vma; vma = vma->vm_next) {
if (vma->anon_vma) {
ret = unuse_vma(vma, type, frontswap,
@@ -2111,7 +2110,7 @@ static int unuse_mm(struct mm_struct *mm, unsigned int type,
}
cond_resched();
}
- up_read(&mm->mmap_sem);
+ mmap_read_unlock(mm);
return ret;
}
diff --git a/mm/userfaultfd.c b/mm/userfaultfd.c
index 7f5194046b01..b80419320c7d 100644
--- a/mm/userfaultfd.c
+++ b/mm/userfaultfd.c
@@ -76,7 +76,7 @@ static int mcopy_atomic_pte(struct mm_struct *dst_mm,
PAGE_SIZE);
kunmap_atomic(page_kaddr);
- /* fallback to copy_from_user outside mmap_sem */
+ /* fallback to copy_from_user outside mmap_lock */
if (unlikely(ret)) {
ret = -ENOENT;
*pagep = page;
@@ -200,7 +200,7 @@ static pmd_t *mm_alloc_pmd(struct mm_struct *mm, unsigned long address)
#ifdef CONFIG_HUGETLB_PAGE
/*
* __mcopy_atomic processing for HUGETLB vmas. Note that this routine is
- * called with mmap_sem held, it will release mmap_sem before returning.
+ * called with mmap_lock held, it will release mmap_lock before returning.
*/
static __always_inline ssize_t __mcopy_atomic_hugetlb(struct mm_struct *dst_mm,
struct vm_area_struct *dst_vma,
@@ -228,7 +228,7 @@ static __always_inline ssize_t __mcopy_atomic_hugetlb(struct mm_struct *dst_mm,
* feature is not supported.
*/
if (zeropage) {
- up_read(&dst_mm->mmap_sem);
+ mmap_read_unlock(dst_mm);
return -EINVAL;
}
@@ -247,7 +247,7 @@ static __always_inline ssize_t __mcopy_atomic_hugetlb(struct mm_struct *dst_mm,
retry:
/*
- * On routine entry dst_vma is set. If we had to drop mmap_sem and
+ * On routine entry dst_vma is set. If we had to drop mmap_lock and
* retry, dst_vma will be set to NULL and we must lookup again.
*/
if (!dst_vma) {
@@ -315,7 +315,7 @@ retry:
cond_resched();
if (unlikely(err == -ENOENT)) {
- up_read(&dst_mm->mmap_sem);
+ mmap_read_unlock(dst_mm);
BUG_ON(!page);
err = copy_huge_page_from_user(page,
@@ -326,7 +326,7 @@ retry:
err = -EFAULT;
goto out;
}
- down_read(&dst_mm->mmap_sem);
+ mmap_read_lock(dst_mm);
dst_vma = NULL;
goto retry;
@@ -346,7 +346,7 @@ retry:
}
out_unlock:
- up_read(&dst_mm->mmap_sem);
+ mmap_read_unlock(dst_mm);
out:
if (page) {
/*
@@ -357,7 +357,7 @@ out:
* private and shared mappings. See the routine
* restore_reserve_on_error for details. Unfortunately, we
* can not call restore_reserve_on_error now as it would
- * require holding mmap_sem.
+ * require holding mmap_lock.
*
* If a reservation for the page existed in the reservation
* map of a private mapping, the map was modified to indicate
@@ -485,7 +485,7 @@ static __always_inline ssize_t __mcopy_atomic(struct mm_struct *dst_mm,
copied = 0;
page = NULL;
retry:
- down_read(&dst_mm->mmap_sem);
+ mmap_read_lock(dst_mm);
/*
* If memory mappings are changing because of non-cooperative
@@ -583,7 +583,7 @@ retry:
if (unlikely(err == -ENOENT)) {
void *page_kaddr;
- up_read(&dst_mm->mmap_sem);
+ mmap_read_unlock(dst_mm);
BUG_ON(!page);
page_kaddr = kmap(page);
@@ -612,7 +612,7 @@ retry:
}
out_unlock:
- up_read(&dst_mm->mmap_sem);
+ mmap_read_unlock(dst_mm);
out:
if (page)
put_page(page);
@@ -652,7 +652,7 @@ int mwriteprotect_range(struct mm_struct *dst_mm, unsigned long start,
/* Does the address range wrap, or is the span zero-sized? */
BUG_ON(start + len <= start);
- down_read(&dst_mm->mmap_sem);
+ mmap_read_lock(dst_mm);
/*
* If memory mappings are changing because of non-cooperative
@@ -686,6 +686,6 @@ int mwriteprotect_range(struct mm_struct *dst_mm, unsigned long start,
err = 0;
out_unlock:
- up_read(&dst_mm->mmap_sem);
+ mmap_read_unlock(dst_mm);
return err;
}
diff --git a/mm/util.c b/mm/util.c
index cd62e6fb5318..c63c8e47be57 100644
--- a/mm/util.c
+++ b/mm/util.c
@@ -425,7 +425,7 @@ void arch_pick_mmap_layout(struct mm_struct *mm, struct rlimit *rlim_stack)
* @bypass_rlim: %true if checking RLIMIT_MEMLOCK should be skipped
*
* Assumes @task and @mm are valid (i.e. at least one reference on each), and
- * that mmap_sem is held as writer.
+ * that mmap_lock is held as writer.
*
* Return:
* * 0 on success
@@ -437,7 +437,7 @@ int __account_locked_vm(struct mm_struct *mm, unsigned long pages, bool inc,
unsigned long locked_vm, limit;
int ret = 0;
- lockdep_assert_held_write(&mm->mmap_sem);
+ mmap_assert_write_locked(mm);
locked_vm = mm->locked_vm;
if (inc) {
@@ -481,10 +481,10 @@ int account_locked_vm(struct mm_struct *mm, unsigned long pages, bool inc)
if (pages == 0 || !mm)
return 0;
- down_write(&mm->mmap_sem);
+ mmap_write_lock(mm);
ret = __account_locked_vm(mm, pages, inc, current,
capable(CAP_IPC_LOCK));
- up_write(&mm->mmap_sem);
+ mmap_write_unlock(mm);
return ret;
}
@@ -501,11 +501,11 @@ unsigned long vm_mmap_pgoff(struct file *file, unsigned long addr,
ret = security_mmap_file(file, prot, flag);
if (!ret) {
- if (down_write_killable(&mm->mmap_sem))
+ if (mmap_write_lock_killable(mm))
return -EINTR;
ret = do_mmap_pgoff(file, addr, len, prot, flag, pgoff,
&populate, &uf);
- up_write(&mm->mmap_sem);
+ mmap_write_unlock(mm);
userfaultfd_unmap_complete(mm, &uf);
if (populate)
mm_populate(ret, populate);
diff --git a/mm/vmacache.c b/mm/vmacache.c
index cdc32a3b02fa..d9092814c772 100644
--- a/mm/vmacache.c
+++ b/mm/vmacache.c
@@ -6,7 +6,6 @@
#include <linux/sched/task.h>
#include <linux/mm.h>
#include <linux/vmacache.h>
-#include <asm/pgtable.h>
/*
* Hash based on the pmd of addr if configured with MMU, which provides a good
diff --git a/mm/zsmalloc.c b/mm/zsmalloc.c
index f6dc0673e62c..952a01e45c6a 100644
--- a/mm/zsmalloc.c
+++ b/mm/zsmalloc.c
@@ -39,8 +39,8 @@
#include <linux/highmem.h>
#include <linux/string.h>
#include <linux/slab.h>
+#include <linux/pgtable.h>
#include <asm/tlbflush.h>
-#include <asm/pgtable.h>
#include <linux/cpumask.h>
#include <linux/cpu.h>
#include <linux/vmalloc.h>