summaryrefslogtreecommitdiffstats
path: root/include
diff options
context:
space:
mode:
authorLinus Torvalds <torvalds@linux-foundation.org>2020-06-09 09:54:46 -0700
committerLinus Torvalds <torvalds@linux-foundation.org>2020-06-09 09:54:46 -0700
commita5ad5742f671de906adbf29fbedf0a04705cebad (patch)
tree88d1a4c18e2025a5a8335dbbc9dea8bebeba5789 /include
parent013b2deba9a6b80ca02f4fafd7dedf875e9b4450 (diff)
parent4fa7252338a56fbc90220e6330f136a379175a7a (diff)
downloadlinux-a5ad5742f671de906adbf29fbedf0a04705cebad.tar.gz
linux-a5ad5742f671de906adbf29fbedf0a04705cebad.tar.bz2
linux-a5ad5742f671de906adbf29fbedf0a04705cebad.zip
Merge branch 'akpm' (patches from Andrew)
Merge even more updates from Andrew Morton: - a kernel-wide sweep of show_stack() - pagetable cleanups - abstract out accesses to mmap_sem - prep for mmap_sem scalability work - hch's user acess work Subsystems affected by this patch series: debug, mm/pagemap, mm/maccess, mm/documentation. * emailed patches from Andrew Morton <akpm@linux-foundation.org>: (93 commits) include/linux/cache.h: expand documentation over __read_mostly maccess: return -ERANGE when probe_kernel_read() fails x86: use non-set_fs based maccess routines maccess: allow architectures to provide kernel probing directly maccess: move user access routines together maccess: always use strict semantics for probe_kernel_read maccess: remove strncpy_from_unsafe tracing/kprobes: handle mixed kernel/userspace probes better bpf: rework the compat kernel probe handling bpf:bpf_seq_printf(): handle potentially unsafe format string better bpf: handle the compat string in bpf_trace_copy_string better bpf: factor out a bpf_trace_copy_string helper maccess: unify the probe kernel arch hooks maccess: remove probe_read_common and probe_write_common maccess: rename strnlen_unsafe_user to strnlen_user_nofault maccess: rename strncpy_from_unsafe_strict to strncpy_from_kernel_nofault maccess: rename strncpy_from_unsafe_user to strncpy_from_user_nofault maccess: update the top of file comment maccess: clarify kerneldoc comments maccess: remove duplicate kerneldoc comments ...
Diffstat (limited to 'include')
-rw-r--r--include/asm-generic/io.h2
-rw-r--r--include/asm-generic/pgtable-nopmd.h1
-rw-r--r--include/asm-generic/pgtable-nopud.h1
-rw-r--r--include/linux/cache.h10
-rw-r--r--include/linux/crash_dump.h3
-rw-r--r--include/linux/dax.h1
-rw-r--r--include/linux/dma-noncoherent.h2
-rw-r--r--include/linux/fs.h4
-rw-r--r--include/linux/hmm.h2
-rw-r--r--include/linux/huge_mm.h2
-rw-r--r--include/linux/hugetlb.h2
-rw-r--r--include/linux/io-mapping.h2
-rw-r--r--include/linux/kallsyms.h4
-rw-r--r--include/linux/kasan.h2
-rw-r--r--include/linux/mempolicy.h2
-rw-r--r--include/linux/mm.h13
-rw-r--r--include/linux/mm_types.h4
-rw-r--r--include/linux/mmap_lock.h90
-rw-r--r--include/linux/mmu_notifier.h13
-rw-r--r--include/linux/pagemap.h2
-rw-r--r--include/linux/pgtable.h (renamed from include/asm-generic/pgtable.h)128
-rw-r--r--include/linux/rmap.h2
-rw-r--r--include/linux/sched/debug.h3
-rw-r--r--include/linux/sched/mm.h10
-rw-r--r--include/linux/uaccess.h60
-rw-r--r--include/xen/arm/page.h2
26 files changed, 271 insertions, 96 deletions
diff --git a/include/asm-generic/io.h b/include/asm-generic/io.h
index 3a7871130112..8b1e020e9a03 100644
--- a/include/asm-generic/io.h
+++ b/include/asm-generic/io.h
@@ -972,7 +972,7 @@ static inline void iounmap(void __iomem *addr)
}
#endif
#elif defined(CONFIG_GENERIC_IOREMAP)
-#include <asm/pgtable.h>
+#include <linux/pgtable.h>
void __iomem *ioremap_prot(phys_addr_t addr, size_t size, unsigned long prot);
void iounmap(volatile void __iomem *addr);
diff --git a/include/asm-generic/pgtable-nopmd.h b/include/asm-generic/pgtable-nopmd.h
index 0d9b28cba16d..3e13acd019ae 100644
--- a/include/asm-generic/pgtable-nopmd.h
+++ b/include/asm-generic/pgtable-nopmd.h
@@ -45,6 +45,7 @@ static inline pmd_t * pmd_offset(pud_t * pud, unsigned long address)
{
return (pmd_t *)pud;
}
+#define pmd_offset pmd_offset
#define pmd_val(x) (pud_val((x).pud))
#define __pmd(x) ((pmd_t) { __pud(x) } )
diff --git a/include/asm-generic/pgtable-nopud.h b/include/asm-generic/pgtable-nopud.h
index ad05c1684bfc..a9d751fbda9e 100644
--- a/include/asm-generic/pgtable-nopud.h
+++ b/include/asm-generic/pgtable-nopud.h
@@ -43,6 +43,7 @@ static inline pud_t *pud_offset(p4d_t *p4d, unsigned long address)
{
return (pud_t *)p4d;
}
+#define pud_offset pud_offset
#define pud_val(x) (p4d_val((x).p4d))
#define __pud(x) ((pud_t) { __p4d(x) })
diff --git a/include/linux/cache.h b/include/linux/cache.h
index 750621e41d1c..1aa8009f6d06 100644
--- a/include/linux/cache.h
+++ b/include/linux/cache.h
@@ -15,8 +15,14 @@
/*
* __read_mostly is used to keep rarely changing variables out of frequently
- * updated cachelines. If an architecture doesn't support it, ignore the
- * hint.
+ * updated cachelines. Its use should be reserved for data that is used
+ * frequently in hot paths. Performance traces can help decide when to use
+ * this. You want __read_mostly data to be tightly packed, so that in the
+ * best case multiple frequently read variables for a hot path will be next
+ * to each other in order to reduce the number of cachelines needed to
+ * execute a critical path. We should be mindful and selective of its use.
+ * ie: if you're going to use it please supply a *good* justification in your
+ * commit log
*/
#ifndef __read_mostly
#define __read_mostly
diff --git a/include/linux/crash_dump.h b/include/linux/crash_dump.h
index bc156285d097..a5192b718dbe 100644
--- a/include/linux/crash_dump.h
+++ b/include/linux/crash_dump.h
@@ -5,9 +5,10 @@
#include <linux/kexec.h>
#include <linux/proc_fs.h>
#include <linux/elf.h>
+#include <linux/pgtable.h>
#include <uapi/linux/vmcore.h>
-#include <asm/pgtable.h> /* for pgprot_t */
+#include <linux/pgtable.h> /* for pgprot_t */
#ifdef CONFIG_CRASH_DUMP
#define ELFCORE_ADDR_MAX (-1ULL)
diff --git a/include/linux/dax.h b/include/linux/dax.h
index d7af5d243f24..6904d4e0b2e0 100644
--- a/include/linux/dax.h
+++ b/include/linux/dax.h
@@ -5,7 +5,6 @@
#include <linux/fs.h>
#include <linux/mm.h>
#include <linux/radix-tree.h>
-#include <asm/pgtable.h>
/* Flag for synchronous flush */
#define DAXDEV_F_SYNC (1UL << 0)
diff --git a/include/linux/dma-noncoherent.h b/include/linux/dma-noncoherent.h
index b59f1b6be3e9..ca09a4e07d2d 100644
--- a/include/linux/dma-noncoherent.h
+++ b/include/linux/dma-noncoherent.h
@@ -3,7 +3,7 @@
#define _LINUX_DMA_NONCOHERENT_H 1
#include <linux/dma-mapping.h>
-#include <asm/pgtable.h>
+#include <linux/pgtable.h>
#ifdef CONFIG_ARCH_HAS_DMA_COHERENCE_H
#include <asm/dma-coherence.h>
diff --git a/include/linux/fs.h b/include/linux/fs.h
index 01f5d296f9bb..0b026329dbed 100644
--- a/include/linux/fs.h
+++ b/include/linux/fs.h
@@ -1679,10 +1679,10 @@ static inline int sb_start_write_trylock(struct super_block *sb)
*
* Since page fault freeze protection behaves as a lock, users have to preserve
* ordering of freeze protection and other filesystem locks. It is advised to
- * put sb_start_pagefault() close to mmap_sem in lock ordering. Page fault
+ * put sb_start_pagefault() close to mmap_lock in lock ordering. Page fault
* handling code implies lock dependency:
*
- * mmap_sem
+ * mmap_lock
* -> sb_start_pagefault
*/
static inline void sb_start_pagefault(struct super_block *sb)
diff --git a/include/linux/hmm.h b/include/linux/hmm.h
index e912b9dc4633..f4a09ed223ac 100644
--- a/include/linux/hmm.h
+++ b/include/linux/hmm.h
@@ -10,7 +10,7 @@
#define LINUX_HMM_H
#include <linux/kconfig.h>
-#include <asm/pgtable.h>
+#include <linux/pgtable.h>
#include <linux/device.h>
#include <linux/migrate.h>
diff --git a/include/linux/huge_mm.h b/include/linux/huge_mm.h
index cfbb0a87c5f0..71f20776b06c 100644
--- a/include/linux/huge_mm.h
+++ b/include/linux/huge_mm.h
@@ -248,7 +248,7 @@ static inline int is_swap_pmd(pmd_t pmd)
return !pmd_none(pmd) && !pmd_present(pmd);
}
-/* mmap_sem must be held on entry */
+/* mmap_lock must be held on entry */
static inline spinlock_t *pmd_trans_huge_lock(pmd_t *pmd,
struct vm_area_struct *vma)
{
diff --git a/include/linux/hugetlb.h b/include/linux/hugetlb.h
index 0cced410e0bd..50650d0d01b9 100644
--- a/include/linux/hugetlb.h
+++ b/include/linux/hugetlb.h
@@ -9,7 +9,7 @@
#include <linux/cgroup.h>
#include <linux/list.h>
#include <linux/kref.h>
-#include <asm/pgtable.h>
+#include <linux/pgtable.h>
struct ctl_table;
struct user_struct;
diff --git a/include/linux/io-mapping.h b/include/linux/io-mapping.h
index b336622612f3..0beaa3eba155 100644
--- a/include/linux/io-mapping.h
+++ b/include/linux/io-mapping.h
@@ -10,6 +10,7 @@
#include <linux/slab.h>
#include <linux/bug.h>
#include <linux/io.h>
+#include <linux/pgtable.h>
#include <asm/page.h>
/*
@@ -99,7 +100,6 @@ io_mapping_unmap(void __iomem *vaddr)
#else
#include <linux/uaccess.h>
-#include <asm/pgtable.h>
/* Create the io_mapping object*/
static inline struct io_mapping *
diff --git a/include/linux/kallsyms.h b/include/linux/kallsyms.h
index 657a83b943f0..98338dc6b5d2 100644
--- a/include/linux/kallsyms.h
+++ b/include/linux/kallsyms.h
@@ -165,9 +165,9 @@ static inline int kallsyms_show_value(void)
#endif /*CONFIG_KALLSYMS*/
-static inline void print_ip_sym(unsigned long ip)
+static inline void print_ip_sym(const char *loglvl, unsigned long ip)
{
- printk("[<%px>] %pS\n", (void *) ip, (void *) ip);
+ printk("%s[<%px>] %pS\n", loglvl, (void *) ip, (void *) ip);
}
#endif /*_LINUX_KALLSYMS_H*/
diff --git a/include/linux/kasan.h b/include/linux/kasan.h
index 31314ca7c635..82522e996c76 100644
--- a/include/linux/kasan.h
+++ b/include/linux/kasan.h
@@ -11,8 +11,8 @@ struct task_struct;
#ifdef CONFIG_KASAN
+#include <linux/pgtable.h>
#include <asm/kasan.h>
-#include <asm/pgtable.h>
extern unsigned char kasan_early_shadow_page[PAGE_SIZE];
extern pte_t kasan_early_shadow_pte[PTRS_PER_PTE];
diff --git a/include/linux/mempolicy.h b/include/linux/mempolicy.h
index 8165278c348a..ea9c15b60a96 100644
--- a/include/linux/mempolicy.h
+++ b/include/linux/mempolicy.h
@@ -31,7 +31,7 @@ struct mm_struct;
* Locking policy for interlave:
* In process context there is no locking because only the process accesses
* its own state. All vma manipulation is somewhat protected by a down_read on
- * mmap_sem.
+ * mmap_lock.
*
* Freeing policy:
* Mempolicy objects are reference counted. A mempolicy will be freed when
diff --git a/include/linux/mm.h b/include/linux/mm.h
index 9d6042178ca7..dc7b87310c10 100644
--- a/include/linux/mm.h
+++ b/include/linux/mm.h
@@ -15,6 +15,7 @@
#include <linux/atomic.h>
#include <linux/debug_locks.h>
#include <linux/mm_types.h>
+#include <linux/mmap_lock.h>
#include <linux/range.h>
#include <linux/pfn.h>
#include <linux/percpu-refcount.h>
@@ -28,6 +29,7 @@
#include <linux/overflow.h>
#include <linux/sizes.h>
#include <linux/sched.h>
+#include <linux/pgtable.h>
struct mempolicy;
struct anon_vma;
@@ -92,7 +94,6 @@ extern int mmap_rnd_compat_bits __read_mostly;
#endif
#include <asm/page.h>
-#include <asm/pgtable.h>
#include <asm/processor.h>
/*
@@ -401,7 +402,7 @@ extern pgprot_t protection_map[16];
* @FAULT_FLAG_WRITE: Fault was a write fault.
* @FAULT_FLAG_MKWRITE: Fault was mkwrite of existing PTE.
* @FAULT_FLAG_ALLOW_RETRY: Allow to retry the fault if blocked.
- * @FAULT_FLAG_RETRY_NOWAIT: Don't drop mmap_sem and wait when retrying.
+ * @FAULT_FLAG_RETRY_NOWAIT: Don't drop mmap_lock and wait when retrying.
* @FAULT_FLAG_KILLABLE: The fault task is in SIGKILL killable region.
* @FAULT_FLAG_TRIED: The fault has been tried once.
* @FAULT_FLAG_USER: The fault originated in userspace.
@@ -451,10 +452,10 @@ extern pgprot_t protection_map[16];
* fault_flag_allow_retry_first - check ALLOW_RETRY the first time
*
* This is mostly used for places where we want to try to avoid taking
- * the mmap_sem for too long a time when waiting for another condition
+ * the mmap_lock for too long a time when waiting for another condition
* to change, in which case we can try to be polite to release the
- * mmap_sem in the first round to avoid potential starvation of other
- * processes that would also want the mmap_sem.
+ * mmap_lock in the first round to avoid potential starvation of other
+ * processes that would also want the mmap_lock.
*
* Return: true if the page fault allows retry and this is the first
* attempt of the fault handling; false otherwise.
@@ -581,7 +582,7 @@ struct vm_operations_struct {
* (vma,addr) marked as MPOL_SHARED. The shared policy infrastructure
* in mm/mempolicy.c will do this automatically.
* get_policy() must NOT add a ref if the policy at (vma,addr) is not
- * marked as MPOL_SHARED. vma policies are protected by the mmap_sem.
+ * marked as MPOL_SHARED. vma policies are protected by the mmap_lock.
* If no [shared/vma] mempolicy exists at the addr, get_policy() op
* must return NULL--i.e., do not "fallback" to task or system default
* policy.
diff --git a/include/linux/mm_types.h b/include/linux/mm_types.h
index ef6d3aface8a..64ede5f150dc 100644
--- a/include/linux/mm_types.h
+++ b/include/linux/mm_types.h
@@ -344,7 +344,7 @@ struct vm_area_struct {
* can only be in the i_mmap tree. An anonymous MAP_PRIVATE, stack
* or brk vma (with NULL file) can only be in an anon_vma list.
*/
- struct list_head anon_vma_chain; /* Serialized by mmap_sem &
+ struct list_head anon_vma_chain; /* Serialized by mmap_lock &
* page_table_lock */
struct anon_vma *anon_vma; /* Serialized by page_table_lock */
@@ -440,7 +440,7 @@ struct mm_struct {
spinlock_t page_table_lock; /* Protects page tables and some
* counters
*/
- struct rw_semaphore mmap_sem;
+ struct rw_semaphore mmap_lock;
struct list_head mmlist; /* List of maybe swapped mm's. These
* are globally strung together off
diff --git a/include/linux/mmap_lock.h b/include/linux/mmap_lock.h
new file mode 100644
index 000000000000..0707671851a8
--- /dev/null
+++ b/include/linux/mmap_lock.h
@@ -0,0 +1,90 @@
+#ifndef _LINUX_MMAP_LOCK_H
+#define _LINUX_MMAP_LOCK_H
+
+#include <linux/mmdebug.h>
+
+#define MMAP_LOCK_INITIALIZER(name) \
+ .mmap_lock = __RWSEM_INITIALIZER((name).mmap_lock),
+
+static inline void mmap_init_lock(struct mm_struct *mm)
+{
+ init_rwsem(&mm->mmap_lock);
+}
+
+static inline void mmap_write_lock(struct mm_struct *mm)
+{
+ down_write(&mm->mmap_lock);
+}
+
+static inline void mmap_write_lock_nested(struct mm_struct *mm, int subclass)
+{
+ down_write_nested(&mm->mmap_lock, subclass);
+}
+
+static inline int mmap_write_lock_killable(struct mm_struct *mm)
+{
+ return down_write_killable(&mm->mmap_lock);
+}
+
+static inline bool mmap_write_trylock(struct mm_struct *mm)
+{
+ return down_write_trylock(&mm->mmap_lock) != 0;
+}
+
+static inline void mmap_write_unlock(struct mm_struct *mm)
+{
+ up_write(&mm->mmap_lock);
+}
+
+static inline void mmap_write_downgrade(struct mm_struct *mm)
+{
+ downgrade_write(&mm->mmap_lock);
+}
+
+static inline void mmap_read_lock(struct mm_struct *mm)
+{
+ down_read(&mm->mmap_lock);
+}
+
+static inline int mmap_read_lock_killable(struct mm_struct *mm)
+{
+ return down_read_killable(&mm->mmap_lock);
+}
+
+static inline bool mmap_read_trylock(struct mm_struct *mm)
+{
+ return down_read_trylock(&mm->mmap_lock) != 0;
+}
+
+static inline void mmap_read_unlock(struct mm_struct *mm)
+{
+ up_read(&mm->mmap_lock);
+}
+
+static inline bool mmap_read_trylock_non_owner(struct mm_struct *mm)
+{
+ if (down_read_trylock(&mm->mmap_lock)) {
+ rwsem_release(&mm->mmap_lock.dep_map, _RET_IP_);
+ return true;
+ }
+ return false;
+}
+
+static inline void mmap_read_unlock_non_owner(struct mm_struct *mm)
+{
+ up_read_non_owner(&mm->mmap_lock);
+}
+
+static inline void mmap_assert_locked(struct mm_struct *mm)
+{
+ lockdep_assert_held(&mm->mmap_lock);
+ VM_BUG_ON_MM(!rwsem_is_locked(&mm->mmap_lock), mm);
+}
+
+static inline void mmap_assert_write_locked(struct mm_struct *mm)
+{
+ lockdep_assert_held_write(&mm->mmap_lock);
+ VM_BUG_ON_MM(!rwsem_is_locked(&mm->mmap_lock), mm);
+}
+
+#endif /* _LINUX_MMAP_LOCK_H */
diff --git a/include/linux/mmu_notifier.h b/include/linux/mmu_notifier.h
index 736f6918335e..fc68f3570e19 100644
--- a/include/linux/mmu_notifier.h
+++ b/include/linux/mmu_notifier.h
@@ -5,6 +5,7 @@
#include <linux/list.h>
#include <linux/spinlock.h>
#include <linux/mm_types.h>
+#include <linux/mmap_lock.h>
#include <linux/srcu.h>
#include <linux/interval_tree.h>
@@ -121,7 +122,7 @@ struct mmu_notifier_ops {
/*
* invalidate_range_start() and invalidate_range_end() must be
- * paired and are called only when the mmap_sem and/or the
+ * paired and are called only when the mmap_lock and/or the
* locks protecting the reverse maps are held. If the subsystem
* can't guarantee that no additional references are taken to
* the pages in the range, it has to implement the
@@ -212,13 +213,13 @@ struct mmu_notifier_ops {
};
/*
- * The notifier chains are protected by mmap_sem and/or the reverse map
+ * The notifier chains are protected by mmap_lock and/or the reverse map
* semaphores. Notifier chains are only changed when all reverse maps and
- * the mmap_sem locks are taken.
+ * the mmap_lock locks are taken.
*
* Therefore notifier chains can only be traversed when either
*
- * 1. mmap_sem is held.
+ * 1. mmap_lock is held.
* 2. One of the reverse map locks is held (i_mmap_rwsem or anon_vma->rwsem).
* 3. No other concurrent thread can access the list (release)
*/
@@ -277,9 +278,9 @@ mmu_notifier_get(const struct mmu_notifier_ops *ops, struct mm_struct *mm)
{
struct mmu_notifier *ret;
- down_write(&mm->mmap_sem);
+ mmap_write_lock(mm);
ret = mmu_notifier_get_locked(ops, mm);
- up_write(&mm->mmap_sem);
+ mmap_write_unlock(mm);
return ret;
}
void mmu_notifier_put(struct mmu_notifier *subscription);
diff --git a/include/linux/pagemap.h b/include/linux/pagemap.h
index 8e085713150c..cf2468da68e9 100644
--- a/include/linux/pagemap.h
+++ b/include/linux/pagemap.h
@@ -538,7 +538,7 @@ static inline int lock_page_killable(struct page *page)
* lock_page_or_retry - Lock the page, unless this would block and the
* caller indicated that it can handle a retry.
*
- * Return value and mmap_sem implications depend on flags; see
+ * Return value and mmap_lock implications depend on flags; see
* __lock_page_or_retry().
*/
static inline int lock_page_or_retry(struct page *page, struct mm_struct *mm,
diff --git a/include/asm-generic/pgtable.h b/include/linux/pgtable.h
index 0a9329656ae6..32b6c52d41b9 100644
--- a/include/asm-generic/pgtable.h
+++ b/include/linux/pgtable.h
@@ -1,8 +1,9 @@
/* SPDX-License-Identifier: GPL-2.0 */
-#ifndef _ASM_GENERIC_PGTABLE_H
-#define _ASM_GENERIC_PGTABLE_H
+#ifndef _LINUX_PGTABLE_H
+#define _LINUX_PGTABLE_H
#include <linux/pfn.h>
+#include <asm/pgtable.h>
#ifndef __ASSEMBLY__
#ifdef CONFIG_MMU
@@ -27,6 +28,121 @@
#define USER_PGTABLES_CEILING 0UL
#endif
+/*
+ * A page table page can be thought of an array like this: pXd_t[PTRS_PER_PxD]
+ *
+ * The pXx_index() functions return the index of the entry in the page
+ * table page which would control the given virtual address
+ *
+ * As these functions may be used by the same code for different levels of
+ * the page table folding, they are always available, regardless of
+ * CONFIG_PGTABLE_LEVELS value. For the folded levels they simply return 0
+ * because in such cases PTRS_PER_PxD equals 1.
+ */
+
+static inline unsigned long pte_index(unsigned long address)
+{
+ return (address >> PAGE_SHIFT) & (PTRS_PER_PTE - 1);
+}
+
+#ifndef pmd_index
+static inline unsigned long pmd_index(unsigned long address)
+{
+ return (address >> PMD_SHIFT) & (PTRS_PER_PMD - 1);
+}
+#define pmd_index pmd_index
+#endif
+
+#ifndef pud_index
+static inline unsigned long pud_index(unsigned long address)
+{
+ return (address >> PUD_SHIFT) & (PTRS_PER_PUD - 1);
+}
+#define pud_index pud_index
+#endif
+
+#ifndef pgd_index
+/* Must be a compile-time constant, so implement it as a macro */
+#define pgd_index(a) (((a) >> PGDIR_SHIFT) & (PTRS_PER_PGD - 1))
+#endif
+
+#ifndef pte_offset_kernel
+static inline pte_t *pte_offset_kernel(pmd_t *pmd, unsigned long address)
+{
+ return (pte_t *)pmd_page_vaddr(*pmd) + pte_index(address);
+}
+#define pte_offset_kernel pte_offset_kernel
+#endif
+
+#if defined(CONFIG_HIGHPTE)
+#define pte_offset_map(dir, address) \
+ ((pte_t *)kmap_atomic(pmd_page(*(dir))) + \
+ pte_index((address)))
+#define pte_unmap(pte) kunmap_atomic((pte))
+#else
+#define pte_offset_map(dir, address) pte_offset_kernel((dir), (address))
+#define pte_unmap(pte) ((void)(pte)) /* NOP */
+#endif
+
+/* Find an entry in the second-level page table.. */
+#ifndef pmd_offset
+static inline pmd_t *pmd_offset(pud_t *pud, unsigned long address)
+{
+ return (pmd_t *)pud_page_vaddr(*pud) + pmd_index(address);
+}
+#define pmd_offset pmd_offset
+#endif
+
+#ifndef pud_offset
+static inline pud_t *pud_offset(p4d_t *p4d, unsigned long address)
+{
+ return (pud_t *)p4d_page_vaddr(*p4d) + pud_index(address);
+}
+#define pud_offset pud_offset
+#endif
+
+static inline pgd_t *pgd_offset_pgd(pgd_t *pgd, unsigned long address)
+{
+ return (pgd + pgd_index(address));
+};
+
+/*
+ * a shortcut to get a pgd_t in a given mm
+ */
+#ifndef pgd_offset
+#define pgd_offset(mm, address) pgd_offset_pgd((mm)->pgd, (address))
+#endif
+
+/*
+ * a shortcut which implies the use of the kernel's pgd, instead
+ * of a process's
+ */
+#define pgd_offset_k(address) pgd_offset(&init_mm, (address))
+
+/*
+ * In many cases it is known that a virtual address is mapped at PMD or PTE
+ * level, so instead of traversing all the page table levels, we can get a
+ * pointer to the PMD entry in user or kernel page table or translate a virtual
+ * address to the pointer in the PTE in the kernel page tables with simple
+ * helpers.
+ */
+static inline pmd_t *pmd_off(struct mm_struct *mm, unsigned long va)
+{
+ return pmd_offset(pud_offset(p4d_offset(pgd_offset(mm, va), va), va), va);
+}
+
+static inline pmd_t *pmd_off_k(unsigned long va)
+{
+ return pmd_offset(pud_offset(p4d_offset(pgd_offset_k(va), va), va), va);
+}
+
+static inline pte_t *virt_to_kpte(unsigned long vaddr)
+{
+ pmd_t *pmd = pmd_off_k(vaddr);
+
+ return pmd_none(*pmd) ? NULL : pte_offset_kernel(pmd, vaddr);
+}
+
#ifndef __HAVE_ARCH_PTEP_SET_ACCESS_FLAGS
extern int ptep_set_access_flags(struct vm_area_struct *vma,
unsigned long address, pte_t *ptep,
@@ -1018,11 +1134,11 @@ static inline pmd_t pmd_read_atomic(pmd_t *pmdp)
#endif
/*
* This function is meant to be used by sites walking pagetables with
- * the mmap_sem hold in read mode to protect against MADV_DONTNEED and
+ * the mmap_lock held in read mode to protect against MADV_DONTNEED and
* transhuge page faults. MADV_DONTNEED can convert a transhuge pmd
* into a null pmd and the transhuge page fault can convert a null pmd
* into an hugepmd or into a regular pmd (if the hugepage allocation
- * fails). While holding the mmap_sem in read mode the pmd becomes
+ * fails). While holding the mmap_lock in read mode the pmd becomes
* stable and stops changing under us only if it's not null and not a
* transhuge pmd. When those races occurs and this function makes a
* difference vs the standard pmd_none_or_clear_bad, the result is
@@ -1032,7 +1148,7 @@ static inline pmd_t pmd_read_atomic(pmd_t *pmdp)
*
* For 32bit kernels with a 64bit large pmd_t this automatically takes
* care of reading the pmd atomically to avoid SMP race conditions
- * against pmd_populate() when the mmap_sem is hold for reading by the
+ * against pmd_populate() when the mmap_lock is hold for reading by the
* caller (a special atomic read not done by "gcc" as in the generic
* version above, is also needed when THP is disabled because the page
* fault can populate the pmd from under us).
@@ -1319,4 +1435,4 @@ typedef unsigned int pgtbl_mod_mask;
#define pmd_leaf(x) 0
#endif
-#endif /* _ASM_GENERIC_PGTABLE_H */
+#endif /* _LINUX_PGTABLE_H */
diff --git a/include/linux/rmap.h b/include/linux/rmap.h
index 988d176472df..3a6adfa70fb0 100644
--- a/include/linux/rmap.h
+++ b/include/linux/rmap.h
@@ -77,7 +77,7 @@ struct anon_vma {
struct anon_vma_chain {
struct vm_area_struct *vma;
struct anon_vma *anon_vma;
- struct list_head same_vma; /* locked by mmap_sem & page_table_lock */
+ struct list_head same_vma; /* locked by mmap_lock & page_table_lock */
struct rb_node rb; /* locked by anon_vma->rwsem */
unsigned long rb_subtree_last;
#ifdef CONFIG_DEBUG_VM_RB
diff --git a/include/linux/sched/debug.h b/include/linux/sched/debug.h
index 95fb9e025247..00c45a0e6abe 100644
--- a/include/linux/sched/debug.h
+++ b/include/linux/sched/debug.h
@@ -30,7 +30,8 @@ extern void show_regs(struct pt_regs *);
* task), SP is the stack pointer of the first frame that should be shown in the back
* trace (or NULL if the entire call-chain of the task should be shown).
*/
-extern void show_stack(struct task_struct *task, unsigned long *sp);
+extern void show_stack(struct task_struct *task, unsigned long *sp,
+ const char *loglvl);
extern void sched_show_task(struct task_struct *p);
diff --git a/include/linux/sched/mm.h b/include/linux/sched/mm.h
index a132d875d351..480a4d1b7dd8 100644
--- a/include/linux/sched/mm.h
+++ b/include/linux/sched/mm.h
@@ -53,7 +53,7 @@ void mmdrop(struct mm_struct *mm);
/*
* This has to be called after a get_task_mm()/mmget_not_zero()
- * followed by taking the mmap_sem for writing before modifying the
+ * followed by taking the mmap_lock for writing before modifying the
* vmas or anything the coredump pretends not to change from under it.
*
* It also has to be called when mmgrab() is used in the context of
@@ -61,14 +61,14 @@ void mmdrop(struct mm_struct *mm);
* the context of the process to run down_write() on that pinned mm.
*
* NOTE: find_extend_vma() called from GUP context is the only place
- * that can modify the "mm" (notably the vm_start/end) under mmap_sem
+ * that can modify the "mm" (notably the vm_start/end) under mmap_lock
* for reading and outside the context of the process, so it is also
- * the only case that holds the mmap_sem for reading that must call
- * this function. Generally if the mmap_sem is hold for reading
+ * the only case that holds the mmap_lock for reading that must call
+ * this function. Generally if the mmap_lock is hold for reading
* there's no need of this check after get_task_mm()/mmget_not_zero().
*
* This function can be obsoleted and the check can be removed, after
- * the coredump code will hold the mmap_sem for writing before
+ * the coredump code will hold the mmap_lock for writing before
* invoking the ->core_dump methods.
*/
static inline bool mmget_still_valid(struct mm_struct *mm)
diff --git a/include/linux/uaccess.h b/include/linux/uaccess.h
index 9861c89f93be..dac1db05bf7e 100644
--- a/include/linux/uaccess.h
+++ b/include/linux/uaccess.h
@@ -301,62 +301,20 @@ copy_struct_from_user(void *dst, size_t ksize, const void __user *src,
return 0;
}
-/*
- * probe_kernel_read(): safely attempt to read from a location
- * @dst: pointer to the buffer that shall take the data
- * @src: address to read from
- * @size: size of the data chunk
- *
- * Safely read from address @src to the buffer at @dst. If a kernel fault
- * happens, handle that and return -EFAULT.
- */
-extern long probe_kernel_read(void *dst, const void *src, size_t size);
-extern long probe_kernel_read_strict(void *dst, const void *src, size_t size);
-extern long __probe_kernel_read(void *dst, const void *src, size_t size);
+bool probe_kernel_read_allowed(const void *unsafe_src, size_t size);
-/*
- * probe_user_read(): safely attempt to read from a location in user space
- * @dst: pointer to the buffer that shall take the data
- * @src: address to read from
- * @size: size of the data chunk
- *
- * Safely read from address @src to the buffer at @dst. If a kernel fault
- * happens, handle that and return -EFAULT.
- */
+extern long probe_kernel_read(void *dst, const void *src, size_t size);
extern long probe_user_read(void *dst, const void __user *src, size_t size);
-extern long __probe_user_read(void *dst, const void __user *src, size_t size);
-/*
- * probe_kernel_write(): safely attempt to write to a location
- * @dst: address to write to
- * @src: pointer to the data that shall be written
- * @size: size of the data chunk
- *
- * Safely write to address @dst from the buffer at @src. If a kernel fault
- * happens, handle that and return -EFAULT.
- */
extern long notrace probe_kernel_write(void *dst, const void *src, size_t size);
-extern long notrace __probe_kernel_write(void *dst, const void *src, size_t size);
-
-/*
- * probe_user_write(): safely attempt to write to a location in user space
- * @dst: address to write to
- * @src: pointer to the data that shall be written
- * @size: size of the data chunk
- *
- * Safely write to address @dst from the buffer at @src. If a kernel fault
- * happens, handle that and return -EFAULT.
- */
extern long notrace probe_user_write(void __user *dst, const void *src, size_t size);
-extern long notrace __probe_user_write(void __user *dst, const void *src, size_t size);
-
-extern long strncpy_from_unsafe(char *dst, const void *unsafe_addr, long count);
-extern long strncpy_from_unsafe_strict(char *dst, const void *unsafe_addr,
- long count);
-extern long __strncpy_from_unsafe(char *dst, const void *unsafe_addr, long count);
-extern long strncpy_from_unsafe_user(char *dst, const void __user *unsafe_addr,
- long count);
-extern long strnlen_unsafe_user(const void __user *unsafe_addr, long count);
+
+long strncpy_from_kernel_nofault(char *dst, const void *unsafe_addr,
+ long count);
+
+long strncpy_from_user_nofault(char *dst, const void __user *unsafe_addr,
+ long count);
+long strnlen_user_nofault(const void __user *unsafe_addr, long count);
/**
* probe_kernel_address(): safely attempt to read from a location
diff --git a/include/xen/arm/page.h b/include/xen/arm/page.h
index f77dcbcba5a6..d7f6af50e200 100644
--- a/include/xen/arm/page.h
+++ b/include/xen/arm/page.h
@@ -3,11 +3,11 @@
#define _ASM_ARM_XEN_PAGE_H
#include <asm/page.h>
-#include <asm/pgtable.h>
#include <linux/pfn.h>
#include <linux/types.h>
#include <linux/dma-mapping.h>
+#include <linux/pgtable.h>
#include <xen/xen.h>
#include <xen/interface/grant_table.h>