summaryrefslogtreecommitdiffstats
path: root/mm
diff options
context:
space:
mode:
authorLinus Torvalds <torvalds@linux-foundation.org>2020-02-04 07:24:48 +0000
committerLinus Torvalds <torvalds@linux-foundation.org>2020-02-04 07:24:48 +0000
commitcc12071ff39060fc2e47c58b43e249fe0d0061ee (patch)
tree085789dec88100d2ac12d9f2c990c00b909d7ee2 /mm
parent9717c1cea16e3eae81ca226f4c3670bb799b61ad (diff)
parentf3cc4e1d44a813a0685f2e558b78ace3db559722 (diff)
downloadlinux-cc12071ff39060fc2e47c58b43e249fe0d0061ee.tar.gz
linux-cc12071ff39060fc2e47c58b43e249fe0d0061ee.tar.bz2
linux-cc12071ff39060fc2e47c58b43e249fe0d0061ee.zip
Merge branch 'akpm' (patches from Andrew)
Merge more updates from Andrew Morton: "The rest of MM and the rest of everything else: hotfixes, ipc, misc, procfs, lib, cleanups, arm" * emailed patches from Andrew Morton <akpm@linux-foundation.org>: (67 commits) ARM: dma-api: fix max_pfn off-by-one error in __dma_supported() treewide: remove redundant IS_ERR() before error code check include/linux/cpumask.h: don't calculate length of the input string lib: new testcases for bitmap_parse{_user} lib: rework bitmap_parse() lib: make bitmap_parse_user a wrapper on bitmap_parse lib: add test for bitmap_parse() bitops: more BITS_TO_* macros lib/string: add strnchrnul() proc: convert everything to "struct proc_ops" proc: decouple proc from VFS with "struct proc_ops" asm-generic/tlb: provide MMU_GATHER_TABLE_FREE asm-generic/tlb: rename HAVE_MMU_GATHER_NO_GATHER asm-generic/tlb: rename HAVE_MMU_GATHER_PAGE_SIZE asm-generic/tlb: rename HAVE_RCU_TABLE_FREE asm-generic/tlb: add missing CONFIG symbol asm-gemeric/tlb: remove stray function declarations asm-generic/tlb: avoid potential double flush mm/mmu_gather: invalidate TLB correctly on batch allocation failure and flush powerpc/mmu_gather: enable RCU_TABLE_FREE even for !SMP case ...
Diffstat (limited to 'mm')
-rw-r--r--mm/Kconfig.debug21
-rw-r--r--mm/Makefile1
-rw-r--r--mm/gup.c2
-rw-r--r--mm/hmm.c62
-rw-r--r--mm/memory_hotplug.c104
-rw-r--r--mm/memremap.c2
-rw-r--r--mm/migrate.c5
-rw-r--r--mm/mincore.c1
-rw-r--r--mm/mmu_gather.c134
-rw-r--r--mm/page_alloc.c71
-rw-r--r--mm/pagewalk.c163
-rw-r--r--mm/ptdump.c139
-rw-r--r--mm/slab_common.c37
-rw-r--r--mm/sparse.c10
-rw-r--r--mm/swapfile.c14
15 files changed, 514 insertions, 252 deletions
diff --git a/mm/Kconfig.debug b/mm/Kconfig.debug
index 327b3ebf23bf..0271b22e063f 100644
--- a/mm/Kconfig.debug
+++ b/mm/Kconfig.debug
@@ -117,3 +117,24 @@ config DEBUG_RODATA_TEST
depends on STRICT_KERNEL_RWX
---help---
This option enables a testcase for the setting rodata read-only.
+
+config GENERIC_PTDUMP
+ bool
+
+config PTDUMP_CORE
+ bool
+
+config PTDUMP_DEBUGFS
+ bool "Export kernel pagetable layout to userspace via debugfs"
+ depends on DEBUG_KERNEL
+ depends on DEBUG_FS
+ depends on GENERIC_PTDUMP
+ select PTDUMP_CORE
+ help
+ Say Y here if you want to show the kernel pagetable layout in a
+ debugfs file. This information is only useful for kernel developers
+ who are working in architecture specific areas of the kernel.
+ It is probably not a good idea to enable this feature in a production
+ kernel.
+
+ If in doubt, say N.
diff --git a/mm/Makefile b/mm/Makefile
index 32f08e22e824..272e66039e70 100644
--- a/mm/Makefile
+++ b/mm/Makefile
@@ -109,3 +109,4 @@ obj-$(CONFIG_ZONE_DEVICE) += memremap.o
obj-$(CONFIG_HMM_MIRROR) += hmm.o
obj-$(CONFIG_MEMFD_CREATE) += memfd.o
obj-$(CONFIG_MAPPING_DIRTY_HELPERS) += mapping_dirty_helpers.o
+obj-$(CONFIG_PTDUMP_CORE) += ptdump.o
diff --git a/mm/gup.c b/mm/gup.c
index e13f4d211475..1b521e0ac1de 100644
--- a/mm/gup.c
+++ b/mm/gup.c
@@ -1792,7 +1792,7 @@ EXPORT_SYMBOL(get_user_pages_unlocked);
* Before activating this code, please be aware that the following assumptions
* are currently made:
*
- * *) Either HAVE_RCU_TABLE_FREE is enabled, and tlb_remove_table() is used to
+ * *) Either MMU_GATHER_RCU_TABLE_FREE is enabled, and tlb_remove_table() is used to
* free pages containing page tables or TLB flushing requires IPI broadcast.
*
* *) ptes can be read atomically by the architecture.
diff --git a/mm/hmm.c b/mm/hmm.c
index d379cb6496ae..72e5a6d9a417 100644
--- a/mm/hmm.c
+++ b/mm/hmm.c
@@ -186,7 +186,7 @@ static void hmm_range_need_fault(const struct hmm_vma_walk *hmm_vma_walk,
}
static int hmm_vma_walk_hole(unsigned long addr, unsigned long end,
- struct mm_walk *walk)
+ __always_unused int depth, struct mm_walk *walk)
{
struct hmm_vma_walk *hmm_vma_walk = walk->private;
struct hmm_range *range = hmm_vma_walk->range;
@@ -380,7 +380,7 @@ static int hmm_vma_walk_pmd(pmd_t *pmdp,
again:
pmd = READ_ONCE(*pmdp);
if (pmd_none(pmd))
- return hmm_vma_walk_hole(start, end, walk);
+ return hmm_vma_walk_hole(start, end, -1, walk);
if (thp_migration_supported() && is_pmd_migration_entry(pmd)) {
bool fault, write_fault;
@@ -474,23 +474,32 @@ static int hmm_vma_walk_pud(pud_t *pudp, unsigned long start, unsigned long end,
{
struct hmm_vma_walk *hmm_vma_walk = walk->private;
struct hmm_range *range = hmm_vma_walk->range;
- unsigned long addr = start, next;
- pmd_t *pmdp;
+ unsigned long addr = start;
pud_t pud;
- int ret;
+ int ret = 0;
+ spinlock_t *ptl = pud_trans_huge_lock(pudp, walk->vma);
+
+ if (!ptl)
+ return 0;
+
+ /* Normally we don't want to split the huge page */
+ walk->action = ACTION_CONTINUE;
-again:
pud = READ_ONCE(*pudp);
- if (pud_none(pud))
- return hmm_vma_walk_hole(start, end, walk);
+ if (pud_none(pud)) {
+ ret = hmm_vma_walk_hole(start, end, -1, walk);
+ goto out_unlock;
+ }
if (pud_huge(pud) && pud_devmap(pud)) {
unsigned long i, npages, pfn;
uint64_t *pfns, cpu_flags;
bool fault, write_fault;
- if (!pud_present(pud))
- return hmm_vma_walk_hole(start, end, walk);
+ if (!pud_present(pud)) {
+ ret = hmm_vma_walk_hole(start, end, -1, walk);
+ goto out_unlock;
+ }
i = (addr - range->start) >> PAGE_SHIFT;
npages = (end - addr) >> PAGE_SHIFT;
@@ -499,16 +508,20 @@ again:
cpu_flags = pud_to_hmm_pfn_flags(range, pud);
hmm_range_need_fault(hmm_vma_walk, pfns, npages,
cpu_flags, &fault, &write_fault);
- if (fault || write_fault)
- return hmm_vma_walk_hole_(addr, end, fault,
- write_fault, walk);
+ if (fault || write_fault) {
+ ret = hmm_vma_walk_hole_(addr, end, fault,
+ write_fault, walk);
+ goto out_unlock;
+ }
pfn = pud_pfn(pud) + ((addr & ~PUD_MASK) >> PAGE_SHIFT);
for (i = 0; i < npages; ++i, ++pfn) {
hmm_vma_walk->pgmap = get_dev_pagemap(pfn,
hmm_vma_walk->pgmap);
- if (unlikely(!hmm_vma_walk->pgmap))
- return -EBUSY;
+ if (unlikely(!hmm_vma_walk->pgmap)) {
+ ret = -EBUSY;
+ goto out_unlock;
+ }
pfns[i] = hmm_device_entry_from_pfn(range, pfn) |
cpu_flags;
}
@@ -517,22 +530,15 @@ again:
hmm_vma_walk->pgmap = NULL;
}
hmm_vma_walk->last = end;
- return 0;
+ goto out_unlock;
}
- split_huge_pud(walk->vma, pudp, addr);
- if (pud_none(*pudp))
- goto again;
+ /* Ask for the PUD to be split */
+ walk->action = ACTION_SUBTREE;
- pmdp = pmd_offset(pudp, addr);
- do {
- next = pmd_addr_end(addr, end);
- ret = hmm_vma_walk_pmd(pmdp, addr, next, walk);
- if (ret)
- return ret;
- } while (pmdp++, addr = next, addr != end);
-
- return 0;
+out_unlock:
+ spin_unlock(ptl);
+ return ret;
}
#else
#define hmm_vma_walk_pud NULL
diff --git a/mm/memory_hotplug.c b/mm/memory_hotplug.c
index 36d80915ddc2..0a54ffac8c68 100644
--- a/mm/memory_hotplug.c
+++ b/mm/memory_hotplug.c
@@ -355,7 +355,7 @@ static unsigned long find_smallest_section_pfn(int nid, struct zone *zone,
if (unlikely(pfn_to_nid(start_pfn) != nid))
continue;
- if (zone && zone != page_zone(pfn_to_page(start_pfn)))
+ if (zone != page_zone(pfn_to_page(start_pfn)))
continue;
return start_pfn;
@@ -380,7 +380,7 @@ static unsigned long find_biggest_section_pfn(int nid, struct zone *zone,
if (unlikely(pfn_to_nid(pfn) != nid))
continue;
- if (zone && zone != page_zone(pfn_to_page(pfn)))
+ if (zone != page_zone(pfn_to_page(pfn)))
continue;
return pfn;
@@ -392,14 +392,11 @@ static unsigned long find_biggest_section_pfn(int nid, struct zone *zone,
static void shrink_zone_span(struct zone *zone, unsigned long start_pfn,
unsigned long end_pfn)
{
- unsigned long zone_start_pfn = zone->zone_start_pfn;
- unsigned long z = zone_end_pfn(zone); /* zone_end_pfn namespace clash */
- unsigned long zone_end_pfn = z;
unsigned long pfn;
int nid = zone_to_nid(zone);
zone_span_writelock(zone);
- if (zone_start_pfn == start_pfn) {
+ if (zone->zone_start_pfn == start_pfn) {
/*
* If the section is smallest section in the zone, it need
* shrink zone->zone_start_pfn and zone->zone_spanned_pages.
@@ -407,50 +404,30 @@ static void shrink_zone_span(struct zone *zone, unsigned long start_pfn,
* for shrinking zone.
*/
pfn = find_smallest_section_pfn(nid, zone, end_pfn,
- zone_end_pfn);
+ zone_end_pfn(zone));
if (pfn) {
+ zone->spanned_pages = zone_end_pfn(zone) - pfn;
zone->zone_start_pfn = pfn;
- zone->spanned_pages = zone_end_pfn - pfn;
+ } else {
+ zone->zone_start_pfn = 0;
+ zone->spanned_pages = 0;
}
- } else if (zone_end_pfn == end_pfn) {
+ } else if (zone_end_pfn(zone) == end_pfn) {
/*
* If the section is biggest section in the zone, it need
* shrink zone->spanned_pages.
* In this case, we find second biggest valid mem_section for
* shrinking zone.
*/
- pfn = find_biggest_section_pfn(nid, zone, zone_start_pfn,
+ pfn = find_biggest_section_pfn(nid, zone, zone->zone_start_pfn,
start_pfn);
if (pfn)
- zone->spanned_pages = pfn - zone_start_pfn + 1;
- }
-
- /*
- * The section is not biggest or smallest mem_section in the zone, it
- * only creates a hole in the zone. So in this case, we need not
- * change the zone. But perhaps, the zone has only hole data. Thus
- * it check the zone has only hole or not.
- */
- pfn = zone_start_pfn;
- for (; pfn < zone_end_pfn; pfn += PAGES_PER_SUBSECTION) {
- if (unlikely(!pfn_to_online_page(pfn)))
- continue;
-
- if (page_zone(pfn_to_page(pfn)) != zone)
- continue;
-
- /* Skip range to be removed */
- if (pfn >= start_pfn && pfn < end_pfn)
- continue;
-
- /* If we find valid section, we have nothing to do */
- zone_span_writeunlock(zone);
- return;
+ zone->spanned_pages = pfn - zone->zone_start_pfn + 1;
+ else {
+ zone->zone_start_pfn = 0;
+ zone->spanned_pages = 0;
+ }
}
-
- /* The zone has no valid section */
- zone->zone_start_pfn = 0;
- zone->spanned_pages = 0;
zone_span_writeunlock(zone);
}
@@ -490,6 +467,9 @@ void __ref remove_pfn_range_from_zone(struct zone *zone,
struct pglist_data *pgdat = zone->zone_pgdat;
unsigned long flags;
+ /* Poison struct pages because they are now uninitialized again. */
+ page_init_poison(pfn_to_page(start_pfn), sizeof(struct page) * nr_pages);
+
#ifdef CONFIG_ZONE_DEVICE
/*
* Zone shrinking code cannot properly deal with ZONE_DEVICE. So
@@ -536,25 +516,20 @@ static void __remove_section(unsigned long pfn, unsigned long nr_pages,
void __remove_pages(unsigned long pfn, unsigned long nr_pages,
struct vmem_altmap *altmap)
{
+ const unsigned long end_pfn = pfn + nr_pages;
+ unsigned long cur_nr_pages;
unsigned long map_offset = 0;
- unsigned long nr, start_sec, end_sec;
map_offset = vmem_altmap_offset(altmap);
if (check_pfn_span(pfn, nr_pages, "remove"))
return;
- start_sec = pfn_to_section_nr(pfn);
- end_sec = pfn_to_section_nr(pfn + nr_pages - 1);
- for (nr = start_sec; nr <= end_sec; nr++) {
- unsigned long pfns;
-
+ for (; pfn < end_pfn; pfn += cur_nr_pages) {
cond_resched();
- pfns = min(nr_pages, PAGES_PER_SECTION
- - (pfn & ~PAGE_SECTION_MASK));
- __remove_section(pfn, pfns, map_offset, altmap);
- pfn += pfns;
- nr_pages -= pfns;
+ /* Select all remaining pages up to the next section boundary */
+ cur_nr_pages = min(end_pfn - pfn, -(pfn | PAGE_SECTION_MASK));
+ __remove_section(pfn, cur_nr_pages, map_offset, altmap);
map_offset = 0;
}
}
@@ -1197,14 +1172,13 @@ bool is_mem_section_removable(unsigned long start_pfn, unsigned long nr_pages)
}
/*
- * Confirm all pages in a range [start, end) belong to the same zone.
- * When true, return its valid [start, end).
+ * Confirm all pages in a range [start, end) belong to the same zone (skipping
+ * memory holes). When true, return the zone.
*/
-int test_pages_in_a_zone(unsigned long start_pfn, unsigned long end_pfn,
- unsigned long *valid_start, unsigned long *valid_end)
+struct zone *test_pages_in_a_zone(unsigned long start_pfn,
+ unsigned long end_pfn)
{
unsigned long pfn, sec_end_pfn;
- unsigned long start, end;
struct zone *zone = NULL;
struct page *page;
int i;
@@ -1225,24 +1199,15 @@ int test_pages_in_a_zone(unsigned long start_pfn, unsigned long end_pfn,
continue;
/* Check if we got outside of the zone */
if (zone && !zone_spans_pfn(zone, pfn + i))
- return 0;
+ return NULL;
page = pfn_to_page(pfn + i);
if (zone && page_zone(page) != zone)
- return 0;
- if (!zone)
- start = pfn + i;
+ return NULL;
zone = page_zone(page);
- end = pfn + MAX_ORDER_NR_PAGES;
}
}
- if (zone) {
- *valid_start = start;
- *valid_end = min(end, end_pfn);
- return 1;
- } else {
- return 0;
- }
+ return zone;
}
/*
@@ -1487,7 +1452,6 @@ static int __ref __offline_pages(unsigned long start_pfn,
unsigned long offlined_pages = 0;
int ret, node, nr_isolate_pageblock;
unsigned long flags;
- unsigned long valid_start, valid_end;
struct zone *zone;
struct memory_notify arg;
char *reason;
@@ -1512,14 +1476,12 @@ static int __ref __offline_pages(unsigned long start_pfn,
/* This makes hotplug much easier...and readable.
we assume this for now. .*/
- if (!test_pages_in_a_zone(start_pfn, end_pfn, &valid_start,
- &valid_end)) {
+ zone = test_pages_in_a_zone(start_pfn, end_pfn);
+ if (!zone) {
ret = -EINVAL;
reason = "multizone range";
goto failed_removal;
}
-
- zone = page_zone(pfn_to_page(valid_start));
node = zone_to_nid(zone);
/* set above range as isolated */
diff --git a/mm/memremap.c b/mm/memremap.c
index 4c723d2049d5..09b5b7adc773 100644
--- a/mm/memremap.c
+++ b/mm/memremap.c
@@ -120,6 +120,8 @@ void memunmap_pages(struct dev_pagemap *pgmap)
nid = page_to_nid(first_page);
mem_hotplug_begin();
+ remove_pfn_range_from_zone(page_zone(first_page), PHYS_PFN(res->start),
+ PHYS_PFN(resource_size(res)));
if (pgmap->type == MEMORY_DEVICE_PRIVATE) {
__remove_pages(PHYS_PFN(res->start),
PHYS_PFN(resource_size(res)), NULL);
diff --git a/mm/migrate.c b/mm/migrate.c
index edf42ed90030..b1092876e537 100644
--- a/mm/migrate.c
+++ b/mm/migrate.c
@@ -2151,6 +2151,7 @@ out_unlock:
#ifdef CONFIG_DEVICE_PRIVATE
static int migrate_vma_collect_hole(unsigned long start,
unsigned long end,
+ __always_unused int depth,
struct mm_walk *walk)
{
struct migrate_vma *migrate = walk->private;
@@ -2195,7 +2196,7 @@ static int migrate_vma_collect_pmd(pmd_t *pmdp,
again:
if (pmd_none(*pmdp))
- return migrate_vma_collect_hole(start, end, walk);
+ return migrate_vma_collect_hole(start, end, -1, walk);
if (pmd_trans_huge(*pmdp)) {
struct page *page;
@@ -2228,7 +2229,7 @@ again:
return migrate_vma_collect_skip(start, end,
walk);
if (pmd_none(*pmdp))
- return migrate_vma_collect_hole(start, end,
+ return migrate_vma_collect_hole(start, end, -1,
walk);
}
}
diff --git a/mm/mincore.c b/mm/mincore.c
index 49b6fa2f6aa1..0e6dd9948f1a 100644
--- a/mm/mincore.c
+++ b/mm/mincore.c
@@ -112,6 +112,7 @@ static int __mincore_unmapped_range(unsigned long addr, unsigned long end,
}
static int mincore_unmapped_range(unsigned long addr, unsigned long end,
+ __always_unused int depth,
struct mm_walk *walk)
{
walk->private += __mincore_unmapped_range(addr, end,
diff --git a/mm/mmu_gather.c b/mm/mmu_gather.c
index 7d70e5c78f97..a3538cb2bcbe 100644
--- a/mm/mmu_gather.c
+++ b/mm/mmu_gather.c
@@ -11,7 +11,7 @@
#include <asm/pgalloc.h>
#include <asm/tlb.h>
-#ifndef CONFIG_HAVE_MMU_GATHER_NO_GATHER
+#ifndef CONFIG_MMU_GATHER_NO_GATHER
static bool tlb_next_batch(struct mmu_gather *tlb)
{
@@ -69,7 +69,7 @@ bool __tlb_remove_page_size(struct mmu_gather *tlb, struct page *page, int page_
VM_BUG_ON(!tlb->end);
-#ifdef CONFIG_HAVE_MMU_GATHER_PAGE_SIZE
+#ifdef CONFIG_MMU_GATHER_PAGE_SIZE
VM_WARN_ON(tlb->page_size != page_size);
#endif
@@ -89,58 +89,108 @@ bool __tlb_remove_page_size(struct mmu_gather *tlb, struct page *page, int page_
return false;
}
-#endif /* HAVE_MMU_GATHER_NO_GATHER */
+#endif /* MMU_GATHER_NO_GATHER */
-#ifdef CONFIG_HAVE_RCU_TABLE_FREE
+#ifdef CONFIG_MMU_GATHER_TABLE_FREE
-/*
- * See the comment near struct mmu_table_batch.
- */
+static void __tlb_remove_table_free(struct mmu_table_batch *batch)
+{
+ int i;
+
+ for (i = 0; i < batch->nr; i++)
+ __tlb_remove_table(batch->tables[i]);
+
+ free_page((unsigned long)batch);
+}
+
+#ifdef CONFIG_MMU_GATHER_RCU_TABLE_FREE
/*
- * If we want tlb_remove_table() to imply TLB invalidates.
+ * Semi RCU freeing of the page directories.
+ *
+ * This is needed by some architectures to implement software pagetable walkers.
+ *
+ * gup_fast() and other software pagetable walkers do a lockless page-table
+ * walk and therefore needs some synchronization with the freeing of the page
+ * directories. The chosen means to accomplish that is by disabling IRQs over
+ * the walk.
+ *
+ * Architectures that use IPIs to flush TLBs will then automagically DTRT,
+ * since we unlink the page, flush TLBs, free the page. Since the disabling of
+ * IRQs delays the completion of the TLB flush we can never observe an already
+ * freed page.
+ *
+ * Architectures that do not have this (PPC) need to delay the freeing by some
+ * other means, this is that means.
+ *
+ * What we do is batch the freed directory pages (tables) and RCU free them.
+ * We use the sched RCU variant, as that guarantees that IRQ/preempt disabling
+ * holds off grace periods.
+ *
+ * However, in order to batch these pages we need to allocate storage, this
+ * allocation is deep inside the MM code and can thus easily fail on memory
+ * pressure. To guarantee progress we fall back to single table freeing, see
+ * the implementation of tlb_remove_table_one().
+ *
*/
-static inline void tlb_table_invalidate(struct mmu_gather *tlb)
-{
-#ifndef CONFIG_HAVE_RCU_TABLE_NO_INVALIDATE
- /*
- * Invalidate page-table caches used by hardware walkers. Then we still
- * need to RCU-sched wait while freeing the pages because software
- * walkers can still be in-flight.
- */
- tlb_flush_mmu_tlbonly(tlb);
-#endif
-}
static void tlb_remove_table_smp_sync(void *arg)
{
/* Simply deliver the interrupt */
}
-static void tlb_remove_table_one(void *table)
+static void tlb_remove_table_sync_one(void)
{
/*
* This isn't an RCU grace period and hence the page-tables cannot be
* assumed to be actually RCU-freed.
*
* It is however sufficient for software page-table walkers that rely on
- * IRQ disabling. See the comment near struct mmu_table_batch.
+ * IRQ disabling.
*/
smp_call_function(tlb_remove_table_smp_sync, NULL, 1);
- __tlb_remove_table(table);
}
static void tlb_remove_table_rcu(struct rcu_head *head)
{
- struct mmu_table_batch *batch;
- int i;
+ __tlb_remove_table_free(container_of(head, struct mmu_table_batch, rcu));
+}
+
+static void tlb_remove_table_free(struct mmu_table_batch *batch)
+{
+ call_rcu(&batch->rcu, tlb_remove_table_rcu);
+}
- batch = container_of(head, struct mmu_table_batch, rcu);
+#else /* !CONFIG_MMU_GATHER_RCU_TABLE_FREE */
- for (i = 0; i < batch->nr; i++)
- __tlb_remove_table(batch->tables[i]);
+static void tlb_remove_table_sync_one(void) { }
- free_page((unsigned long)batch);
+static void tlb_remove_table_free(struct mmu_table_batch *batch)
+{
+ __tlb_remove_table_free(batch);
+}
+
+#endif /* CONFIG_MMU_GATHER_RCU_TABLE_FREE */
+
+/*
+ * If we want tlb_remove_table() to imply TLB invalidates.
+ */
+static inline void tlb_table_invalidate(struct mmu_gather *tlb)
+{
+ if (tlb_needs_table_invalidate()) {
+ /*
+ * Invalidate page-table caches used by hardware walkers. Then
+ * we still need to RCU-sched wait while freeing the pages
+ * because software walkers can still be in-flight.
+ */
+ tlb_flush_mmu_tlbonly(tlb);
+ }
+}
+
+static void tlb_remove_table_one(void *table)
+{
+ tlb_remove_table_sync_one();
+ __tlb_remove_table(table);
}
static void tlb_table_flush(struct mmu_gather *tlb)
@@ -149,7 +199,7 @@ static void tlb_table_flush(struct mmu_gather *tlb)
if (*batch) {
tlb_table_invalidate(tlb);
- call_rcu(&(*batch)->rcu, tlb_remove_table_rcu);
+ tlb_remove_table_free(*batch);
*batch = NULL;
}
}
@@ -173,14 +223,22 @@ void tlb_remove_table(struct mmu_gather *tlb, void *table)
tlb_table_flush(tlb);
}
-#endif /* CONFIG_HAVE_RCU_TABLE_FREE */
+static inline void tlb_table_init(struct mmu_gather *tlb)
+{
+ tlb->batch = NULL;
+}
+
+#else /* !CONFIG_MMU_GATHER_TABLE_FREE */
+
+static inline void tlb_table_flush(struct mmu_gather *tlb) { }
+static inline void tlb_table_init(struct mmu_gather *tlb) { }
+
+#endif /* CONFIG_MMU_GATHER_TABLE_FREE */
static void tlb_flush_mmu_free(struct mmu_gather *tlb)
{
-#ifdef CONFIG_HAVE_RCU_TABLE_FREE
tlb_table_flush(tlb);
-#endif
-#ifndef CONFIG_HAVE_MMU_GATHER_NO_GATHER
+#ifndef CONFIG_MMU_GATHER_NO_GATHER
tlb_batch_pages_flush(tlb);
#endif
}
@@ -211,7 +269,7 @@ void tlb_gather_mmu(struct mmu_gather *tlb, struct mm_struct *mm,
/* Is it from 0 to ~0? */
tlb->fullmm = !(start | (end+1));
-#ifndef CONFIG_HAVE_MMU_GATHER_NO_GATHER
+#ifndef CONFIG_MMU_GATHER_NO_GATHER
tlb->need_flush_all = 0;
tlb->local.next = NULL;
tlb->local.nr = 0;
@@ -220,10 +278,8 @@ void tlb_gather_mmu(struct mmu_gather *tlb, struct mm_struct *mm,
tlb->batch_count = 0;
#endif
-#ifdef CONFIG_HAVE_RCU_TABLE_FREE
- tlb->batch = NULL;
-#endif
-#ifdef CONFIG_HAVE_MMU_GATHER_PAGE_SIZE
+ tlb_table_init(tlb);
+#ifdef CONFIG_MMU_GATHER_PAGE_SIZE
tlb->page_size = 0;
#endif
@@ -271,7 +327,7 @@ void tlb_finish_mmu(struct mmu_gather *tlb,
tlb_flush_mmu(tlb);
-#ifndef CONFIG_HAVE_MMU_GATHER_NO_GATHER
+#ifndef CONFIG_MMU_GATHER_NO_GATHER
tlb_batch_list_free(tlb);
#endif
dec_tlb_flush_pending(tlb->mm);
diff --git a/mm/page_alloc.c b/mm/page_alloc.c
index 15e908ad933b..3c4eb750a199 100644
--- a/mm/page_alloc.c
+++ b/mm/page_alloc.c
@@ -5852,18 +5852,11 @@ overlap_memmap_init(unsigned long zone, unsigned long *pfn)
/* Skip PFNs that belong to non-present sections */
static inline __meminit unsigned long next_pfn(unsigned long pfn)
{
- unsigned long section_nr;
+ const unsigned long section_nr = pfn_to_section_nr(++pfn);
- section_nr = pfn_to_section_nr(++pfn);
if (present_section_nr(section_nr))
return pfn;
-
- while (++section_nr <= __highest_present_section_nr) {
- if (present_section_nr(section_nr))
- return section_nr_to_pfn(section_nr);
- }
-
- return -1;
+ return section_nr_to_pfn(next_present_section_nr(section_nr));
}
#else
static inline __meminit unsigned long next_pfn(unsigned long pfn)
@@ -5905,18 +5898,20 @@ void __meminit memmap_init_zone(unsigned long size, int nid, unsigned long zone,
}
#endif
- for (pfn = start_pfn; pfn < end_pfn; pfn++) {
+ for (pfn = start_pfn; pfn < end_pfn; ) {
/*
* There can be holes in boot-time mem_map[]s handed to this
* function. They do not exist on hotplugged memory.
*/
if (context == MEMMAP_EARLY) {
if (!early_pfn_valid(pfn)) {
- pfn = next_pfn(pfn) - 1;
+ pfn = next_pfn(pfn);
continue;
}
- if (!early_pfn_in_nid(pfn, nid))
+ if (!early_pfn_in_nid(pfn, nid)) {
+ pfn++;
continue;
+ }
if (overlap_memmap_init(zone, &pfn))
continue;
if (defer_init(nid, pfn, end_pfn))
@@ -5944,16 +5939,17 @@ void __meminit memmap_init_zone(unsigned long size, int nid, unsigned long zone,
set_pageblock_migratetype(page, MIGRATE_MOVABLE);
cond_resched();
}
+ pfn++;
}
}
#ifdef CONFIG_ZONE_DEVICE
void __ref memmap_init_zone_device(struct zone *zone,
unsigned long start_pfn,
- unsigned long size,
+ unsigned long nr_pages,
struct dev_pagemap *pgmap)
{
- unsigned long pfn, end_pfn = start_pfn + size;
+ unsigned long pfn, end_pfn = start_pfn + nr_pages;
struct pglist_data *pgdat = zone->zone_pgdat;
struct vmem_altmap *altmap = pgmap_altmap(pgmap);
unsigned long zone_idx = zone_idx(zone);
@@ -5970,7 +5966,7 @@ void __ref memmap_init_zone_device(struct zone *zone,
*/
if (altmap) {
start_pfn = altmap->base_pfn + vmem_altmap_offset(altmap);
- size = end_pfn - start_pfn;
+ nr_pages = end_pfn - start_pfn;
}
for (pfn = start_pfn; pfn < end_pfn; pfn++) {
@@ -6017,7 +6013,7 @@ void __ref memmap_init_zone_device(struct zone *zone,
}
pr_info("%s initialised %lu pages in %ums\n", __func__,
- size, jiffies_to_msecs(jiffies - start));
+ nr_pages, jiffies_to_msecs(jiffies - start));
}
#endif
@@ -6916,10 +6912,10 @@ void __init free_area_init_node(int nid, unsigned long *zones_size,
#if !defined(CONFIG_FLAT_NODE_MEM_MAP)
/*
- * Zero all valid struct pages in range [spfn, epfn), return number of struct
- * pages zeroed
+ * Initialize all valid struct pages in the range [spfn, epfn) and mark them
+ * PageReserved(). Return the number of struct pages that were initialized.
*/
-static u64 zero_pfn_range(unsigned long spfn, unsigned long epfn)
+static u64 __init init_unavailable_range(unsigned long spfn, unsigned long epfn)
{
unsigned long pfn;
u64 pgcnt = 0;
@@ -6930,7 +6926,13 @@ static u64 zero_pfn_range(unsigned long spfn, unsigned long epfn)
+ pageblock_nr_pages - 1;
continue;
}
- mm_zero_struct_page(pfn_to_page(pfn));
+ /*
+ * Use a fake node/zone (0) for now. Some of these pages
+ * (in memblock.reserved but not in memblock.memory) will
+ * get re-initialized via reserve_bootmem_region() later.
+ */
+ __init_single_page(pfn_to_page(pfn), pfn, 0, 0);
+ __SetPageReserved(pfn_to_page(pfn));
pgcnt++;
}
@@ -6942,14 +6944,15 @@ static u64 zero_pfn_range(unsigned long spfn, unsigned long epfn)
* initialized by going through __init_single_page(). But, there are some
* struct pages which are reserved in memblock allocator and their fields
* may be accessed (for example page_to_pfn() on some configuration accesses
- * flags). We must explicitly zero those struct pages.
+ * flags). We must explicitly initialize those struct pages.
*
* This function also addresses a similar issue where struct pages are left
* uninitialized because the physical address range is not covered by
* memblock.memory or memblock.reserved. That could happen when memblock
- * layout is manually configured via memmap=.
+ * layout is manually configured via memmap=, or when the highest physical
+ * address (max_pfn) does not end on a section boundary.
*/
-void __init zero_resv_unavail(void)
+static void __init init_unavailable_mem(void)
{
phys_addr_t start, end;
u64 i, pgcnt;
@@ -6962,10 +6965,20 @@ void __init zero_resv_unavail(void)
for_each_mem_range(i, &memblock.memory, NULL,
NUMA_NO_NODE, MEMBLOCK_NONE, &start, &end, NULL) {
if (next < start)
- pgcnt += zero_pfn_range(PFN_DOWN(next), PFN_UP(start));
+ pgcnt += init_unavailable_range(PFN_DOWN(next),
+ PFN_UP(start));
next = end;
}
- pgcnt += zero_pfn_range(PFN_DOWN(next), max_pfn);
+
+ /*
+ * Early sections always have a fully populated memmap for the whole
+ * section - see pfn_valid(). If the last section has holes at the
+ * end and that section is marked "online", the memmap will be
+ * considered initialized. Make sure that memmap has a well defined
+ * state.
+ */
+ pgcnt += init_unavailable_range(PFN_DOWN(next),
+ round_up(max_pfn, PAGES_PER_SECTION));
/*
* Struct pages that do not have backing memory. This could be because
@@ -6974,6 +6987,10 @@ void __init zero_resv_unavail(void)
if (pgcnt)
pr_info("Zeroed struct page in unavailable ranges: %lld pages", pgcnt);
}
+#else
+static inline void __init init_unavailable_mem(void)
+{
+}
#endif /* !CONFIG_FLAT_NODE_MEM_MAP */
#ifdef CONFIG_HAVE_MEMBLOCK_NODE_MAP
@@ -7403,7 +7420,7 @@ void __init free_area_init_nodes(unsigned long *max_zone_pfn)
/* Initialise every node */
mminit_verify_pageflags_layout();
setup_nr_node_ids();
- zero_resv_unavail();
+ init_unavailable_mem();
for_each_online_node(nid) {
pg_data_t *pgdat = NODE_DATA(nid);
free_area_init_node(nid, NULL,
@@ -7598,7 +7615,7 @@ void __init set_dma_reserve(unsigned long new_dma_reserve)
void __init free_area_init(unsigned long *zones_size)
{
- zero_resv_unavail();
+ init_unavailable_mem();
free_area_init_node(0, zones_size,
__pa(PAGE_OFFSET) >> PAGE_SHIFT, NULL);
}
diff --git a/mm/pagewalk.c b/mm/pagewalk.c
index ea0b9e606ad1..928df1638c30 100644
--- a/mm/pagewalk.c
+++ b/mm/pagewalk.c
@@ -4,26 +4,57 @@
#include <linux/sched.h>
#include <linux/hugetlb.h>
-static int walk_pte_range(pmd_t *pmd, unsigned long addr, unsigned long end,
- struct mm_walk *walk)
+/*
+ * We want to know the real level where a entry is located ignoring any
+ * folding of levels which may be happening. For example if p4d is folded then
+ * a missing entry found at level 1 (p4d) is actually at level 0 (pgd).
+ */
+static int real_depth(int depth)
+{
+ if (depth == 3 && PTRS_PER_PMD == 1)
+ depth = 2;
+ if (depth == 2 && PTRS_PER_PUD == 1)
+ depth = 1;
+ if (depth == 1 && PTRS_PER_P4D == 1)
+ depth = 0;
+ return depth;
+}
+
+static int walk_pte_range_inner(pte_t *pte, unsigned long addr,
+ unsigned long end, struct mm_walk *walk)
{
- pte_t *pte;
- int err = 0;
const struct mm_walk_ops *ops = walk->ops;
- spinlock_t *ptl;
+ int err = 0;
- pte = pte_offset_map_lock(walk->mm, pmd, addr, &ptl);
for (;;) {
err = ops->pte_entry(pte, addr, addr + PAGE_SIZE, walk);
if (err)
break;
- addr += PAGE_SIZE;
- if (addr == end)
+ if (addr >= end - PAGE_SIZE)
break;
+ addr += PAGE_SIZE;
pte++;
}
+ return err;
+}
+
+static int walk_pte_range(pmd_t *pmd, unsigned long addr, unsigned long end,
+ struct mm_walk *walk)
+{
+ pte_t *pte;
+ int err = 0;
+ spinlock_t *ptl;
+
+ if (walk->no_vma) {
+ pte = pte_offset_map(pmd, addr);
+ err = walk_pte_range_inner(pte, addr, end, walk);
+ pte_unmap(pte);
+ } else {
+ pte = pte_offset_map_lock(walk->mm, pmd, addr, &ptl);
+ err = walk_pte_range_inner(pte, addr, end, walk);
+ pte_unmap_unlock(pte, ptl);
+ }
- pte_unmap_unlock(pte, ptl);
return err;
}
@@ -34,18 +65,22 @@ static int walk_pmd_range(pud_t *pud, unsigned long addr, unsigned long end,
unsigned long next;
const struct mm_walk_ops *ops = walk->ops;
int err = 0;
+ int depth = real_depth(3);
pmd = pmd_offset(pud, addr);
do {
again:
next = pmd_addr_end(addr, end);
- if (pmd_none(*pmd) || !walk->vma) {
+ if (pmd_none(*pmd) || (!walk->vma && !walk->no_vma)) {
if (ops->pte_hole)
- err = ops->pte_hole(addr, next, walk);
+ err = ops->pte_hole(addr, next, depth, walk);
if (err)
break;
continue;
}
+
+ walk->action = ACTION_SUBTREE;
+
/*
* This implies that each ->pmd_entry() handler
* needs to know about pmd_trans_huge() pmds
@@ -55,16 +90,24 @@ again:
if (err)
break;
+ if (walk->action == ACTION_AGAIN)
+ goto again;
+
/*
* Check this here so we only break down trans_huge
* pages when we _need_ to
*/
- if (!ops->pte_entry)
+ if ((!walk->vma && (pmd_leaf(*pmd) || !pmd_present(*pmd))) ||
+ walk->action == ACTION_CONTINUE ||
+ !(ops->pte_entry))
continue;
- split_huge_pmd(walk->vma, pmd, addr);
- if (pmd_trans_unstable(pmd))
- goto again;
+ if (walk->vma) {
+ split_huge_pmd(walk->vma, pmd, addr);
+ if (pmd_trans_unstable(pmd))
+ goto again;
+ }
+
err = walk_pte_range(pmd, addr, next, walk);
if (err)
break;
@@ -80,37 +123,41 @@ static int walk_pud_range(p4d_t *p4d, unsigned long addr, unsigned long end,
unsigned long next;
const struct mm_walk_ops *ops = walk->ops;
int err = 0;
+ int depth = real_depth(2);
pud = pud_offset(p4d, addr);
do {
again:
next = pud_addr_end(addr, end);
- if (pud_none(*pud) || !walk->vma) {
+ if (pud_none(*pud) || (!walk->vma && !walk->no_vma)) {
if (ops->pte_hole)
- err = ops->pte_hole(addr, next, walk);
+ err = ops->pte_hole(addr, next, depth, walk);
if (err)
break;
continue;
}
- if (ops->pud_entry) {
- spinlock_t *ptl = pud_trans_huge_lock(pud, walk->vma);
+ walk->action = ACTION_SUBTREE;
- if (ptl) {
- err = ops->pud_entry(pud, addr, next, walk);
- spin_unlock(ptl);
- if (err)
- break;
- continue;
- }
- }
+ if (ops->pud_entry)
+ err = ops->pud_entry(pud, addr, next, walk);
+ if (err)
+ break;
+
+ if (walk->action == ACTION_AGAIN)
+ goto again;
- split_huge_pud(walk->vma, pud, addr);
+ if ((!walk->vma && (pud_leaf(*pud) || !pud_present(*pud))) ||
+ walk->action == ACTION_CONTINUE ||
+ !(ops->pmd_entry || ops->pte_entry))
+ continue;
+
+ if (walk->vma)
+ split_huge_pud(walk->vma, pud, addr);
if (pud_none(*pud))
goto again;
- if (ops->pmd_entry || ops->pte_entry)
- err = walk_pmd_range(pud, addr, next, walk);
+ err = walk_pmd_range(pud, addr, next, walk);
if (err)
break;
} while (pud++, addr = next, addr != end);
@@ -125,18 +172,24 @@ static int walk_p4d_range(pgd_t *pgd, unsigned long addr, unsigned long end,
unsigned long next;
const struct mm_walk_ops *ops = walk->ops;
int err = 0;
+ int depth = real_depth(1);
p4d = p4d_offset(pgd, addr);
do {
next = p4d_addr_end(addr, end);
if (p4d_none_or_clear_bad(p4d)) {
if (ops->pte_hole)
- err = ops->pte_hole(addr, next, walk);
+ err = ops->pte_hole(addr, next, depth, walk);
if (err)
break;
continue;
}
- if (ops->pmd_entry || ops->pte_entry)
+ if (ops->p4d_entry) {
+ err = ops->p4d_entry(p4d, addr, next, walk);
+ if (err)
+ break;
+ }
+ if (ops->pud_entry || ops->pmd_entry || ops->pte_entry)
err = walk_pud_range(p4d, addr, next, walk);
if (err)
break;
@@ -153,17 +206,26 @@ static int walk_pgd_range(unsigned long addr, unsigned long end,
const struct mm_walk_ops *ops = walk->ops;
int err = 0;
- pgd = pgd_offset(walk->mm, addr);
+ if (walk->pgd)
+ pgd = walk->pgd + pgd_index(addr);
+ else
+ pgd = pgd_offset(walk->mm, addr);
do {
next = pgd_addr_end(addr, end);
if (pgd_none_or_clear_bad(pgd)) {
if (ops->pte_hole)
- err = ops->pte_hole(addr, next, walk);
+ err = ops->pte_hole(addr, next, 0, walk);
if (err)
break;
continue;
}
- if (ops->pmd_entry || ops->pte_entry)
+ if (ops->pgd_entry) {
+ err = ops->pgd_entry(pgd, addr, next, walk);
+ if (err)
+ break;
+ }
+ if (ops->p4d_entry || ops->pud_entry || ops->pmd_entry ||
+ ops->pte_entry)
err = walk_p4d_range(pgd, addr, next, walk);
if (err)
break;
@@ -199,7 +261,7 @@ static int walk_hugetlb_range(unsigned long addr, unsigned long end,
if (pte)
err = ops->hugetlb_entry(pte, hmask, addr, next, walk);
else if (ops->pte_hole)
- err = ops->pte_hole(addr, next, walk);
+ err = ops->pte_hole(addr, next, -1, walk);
if (err)
break;
@@ -243,7 +305,7 @@ static int walk_page_test(unsigned long start, unsigned long end,
if (vma->vm_flags & VM_PFNMAP) {
int err = 1;
if (ops->pte_hole)
- err = ops->pte_hole(start, end, walk);
+ err = ops->pte_hole(start, end, -1, walk);
return err ? err : 1;
}
return 0;
@@ -369,6 +431,33 @@ int walk_page_range(struct mm_struct *mm, unsigned long start,
return err;
}
+/*
+ * Similar to walk_page_range() but can walk any page tables even if they are
+ * not backed by VMAs. Because 'unusual' entries may be walked this function
+ * will also not lock the PTEs for the pte_entry() callback. This is useful for
+ * walking the kernel pages tables or page tables for firmware.
+ */
+int walk_page_range_novma(struct mm_struct *mm, unsigned long start,
+ unsigned long end, const struct mm_walk_ops *ops,
+ pgd_t *pgd,
+ void *private)
+{
+ struct mm_walk walk = {
+ .ops = ops,
+ .mm = mm,
+ .pgd = pgd,
+ .private = private,
+ .no_vma = true
+ };
+
+ if (start >= end || !walk.mm)
+ return -EINVAL;
+
+ lockdep_assert_held(&walk.mm->mmap_sem);
+
+ return __walk_page_range(start, end, &walk);
+}
+
int walk_page_vma(struct vm_area_struct *vma, const struct mm_walk_ops *ops,
void *private)
{
diff --git a/mm/ptdump.c b/mm/ptdump.c
new file mode 100644
index 000000000000..26208d0d03b7
--- /dev/null
+++ b/mm/ptdump.c
@@ -0,0 +1,139 @@
+// SPDX-License-Identifier: GPL-2.0
+
+#include <linux/pagewalk.h>
+#include <linux/ptdump.h>
+#include <linux/kasan.h>
+
+#ifdef CONFIG_KASAN
+/*
+ * This is an optimization for KASAN=y case. Since all kasan page tables
+ * eventually point to the kasan_early_shadow_page we could call note_page()
+ * right away without walking through lower level page tables. This saves
+ * us dozens of seconds (minutes for 5-level config) while checking for
+ * W+X mapping or reading kernel_page_tables debugfs file.
+ */
+static inline int note_kasan_page_table(struct mm_walk *walk,
+ unsigned long addr)
+{
+ struct ptdump_state *st = walk->private;
+
+ st->note_page(st, addr, 4, pte_val(kasan_early_shadow_pte[0]));
+
+ walk->action = ACTION_CONTINUE;
+
+ return 0;
+}
+#endif
+
+static int ptdump_pgd_entry(pgd_t *pgd, unsigned long addr,
+ unsigned long next, struct mm_walk *walk)
+{
+ struct ptdump_state *st = walk->private;
+ pgd_t val = READ_ONCE(*pgd);
+
+#if CONFIG_PGTABLE_LEVELS > 4 && defined(CONFIG_KASAN)
+ if (pgd_page(val) == virt_to_page(lm_alias(kasan_early_shadow_p4d)))
+ return note_kasan_page_table(walk, addr);
+#endif
+
+ if (pgd_leaf(val))
+ st->note_page(st, addr, 0, pgd_val(val));
+
+ return 0;
+}
+
+static int ptdump_p4d_entry(p4d_t *p4d, unsigned long addr,
+ unsigned long next, struct mm_walk *walk)
+{
+ struct ptdump_state *st = walk->private;
+ p4d_t val = READ_ONCE(*p4d);
+
+#if CONFIG_PGTABLE_LEVELS > 3 && defined(CONFIG_KASAN)
+ if (p4d_page(val) == virt_to_page(lm_alias(kasan_early_shadow_pud)))
+ return note_kasan_page_table(walk, addr);
+#endif
+
+ if (p4d_leaf(val))
+ st->note_page(st, addr, 1, p4d_val(val));
+
+ return 0;
+}
+
+static int ptdump_pud_entry(pud_t *pud, unsigned long addr,
+ unsigned long next, struct mm_walk *walk)
+{
+ struct ptdump_state *st = walk->private;
+ pud_t val = READ_ONCE(*pud);
+
+#if CONFIG_PGTABLE_LEVELS > 2 && defined(CONFIG_KASAN)
+ if (pud_page(val) == virt_to_page(lm_alias(kasan_early_shadow_pmd)))
+ return note_kasan_page_table(walk, addr);
+#endif
+
+ if (pud_leaf(val))
+ st->note_page(st, addr, 2, pud_val(val));
+
+ return 0;
+}
+
+static int ptdump_pmd_entry(pmd_t *pmd, unsigned long addr,
+ unsigned long next, struct mm_walk *walk)
+{
+ struct ptdump_state *st = walk->private;
+ pmd_t val = READ_ONCE(*pmd);
+
+#if defined(CONFIG_KASAN)
+ if (pmd_page(val) == virt_to_page(lm_alias(kasan_early_shadow_pte)))
+ return note_kasan_page_table(walk, addr);
+#endif
+
+ if (pmd_leaf(val))
+ st->note_page(st, addr, 3, pmd_val(val));
+
+ return 0;
+}
+
+static int ptdump_pte_entry(pte_t *pte, unsigned long addr,
+ unsigned long next, struct mm_walk *walk)
+{
+ struct ptdump_state *st = walk->private;
+
+ st->note_page(st, addr, 4, pte_val(READ_ONCE(*pte)));
+
+ return 0;
+}
+
+static int ptdump_hole(unsigned long addr, unsigned long next,
+ int depth, struct mm_walk *walk)
+{
+ struct ptdump_state *st = walk->private;
+
+ st->note_page(st, addr, depth, 0);
+
+ return 0;
+}
+
+static const struct mm_walk_ops ptdump_ops = {
+ .pgd_entry = ptdump_pgd_entry,
+ .p4d_entry = ptdump_p4d_entry,
+ .pud_entry = ptdump_pud_entry,
+ .pmd_entry = ptdump_pmd_entry,
+ .pte_entry = ptdump_pte_entry,
+ .pte_hole = ptdump_hole,
+};
+
+void ptdump_walk_pgd(struct ptdump_state *st, struct mm_struct *mm, pgd_t *pgd)
+{
+ const struct ptdump_range *range = st->range;
+
+ down_read(&mm->mmap_sem);
+ while (range->start != range->end) {
+ walk_page_range_novma(mm, range->start, range->end,
+ &ptdump_ops, pgd, st);
+ range++;
+ }
+ up_read(&mm->mmap_sem);
+
+ /* Flush out the last page */
+ st->note_page(st, 0, -1, 0);
+}
diff --git a/mm/slab_common.c b/mm/slab_common.c
index 0d95ddea13b0..1907cb2903c7 100644
--- a/mm/slab_common.c
+++ b/mm/slab_common.c
@@ -1580,18 +1580,17 @@ static int slabinfo_open(struct inode *inode, struct file *file)
return seq_open(file, &slabinfo_op);
}
-static const struct file_operations proc_slabinfo_operations = {
- .open = slabinfo_open,
- .read = seq_read,
- .write = slabinfo_write,
- .llseek = seq_lseek,
- .release = seq_release,
+static const struct proc_ops slabinfo_proc_ops = {
+ .proc_open = slabinfo_open,
+ .proc_read = seq_read,
+ .proc_write = slabinfo_write,
+ .proc_lseek = seq_lseek,
+ .proc_release = seq_release,
};
static int __init slab_proc_init(void)
{
- proc_create("slabinfo", SLABINFO_RIGHTS, NULL,
- &proc_slabinfo_operations);
+ proc_create("slabinfo", SLABINFO_RIGHTS, NULL, &slabinfo_proc_ops);
return 0;
}
module_init(slab_proc_init);
@@ -1677,28 +1676,6 @@ static __always_inline void *__do_krealloc(const void *p, size_t new_size,
}
/**
- * __krealloc - like krealloc() but don't free @p.
- * @p: object to reallocate memory for.
- * @new_size: how many bytes of memory are required.
- * @flags: the type of memory to allocate.
- *
- * This function is like krealloc() except it never frees the originally
- * allocated buffer. Use this if you don't want to free the buffer immediately
- * like, for example, with RCU.
- *
- * Return: pointer to the allocated memory or %NULL in case of error
- */
-void *__krealloc(const void *p, size_t new_size, gfp_t flags)
-{
- if (unlikely(!new_size))
- return ZERO_SIZE_PTR;
-
- return __do_krealloc(p, new_size, flags);
-
-}
-EXPORT_SYMBOL(__krealloc);
-
-/**
* krealloc - reallocate memory. The contents will remain unchanged.
* @p: object to reallocate memory for.
* @new_size: how many bytes of memory are required.
diff --git a/mm/sparse.c b/mm/sparse.c
index 3918fc3eaef1..c184b69460b7 100644
--- a/mm/sparse.c
+++ b/mm/sparse.c
@@ -198,16 +198,6 @@ static void section_mark_present(struct mem_section *ms)
ms->section_mem_map |= SECTION_MARKED_PRESENT;
}
-static inline unsigned long next_present_section_nr(unsigned long section_nr)
-{
- do {
- section_nr++;
- if (present_section_nr(section_nr))
- return section_nr;
- } while ((section_nr <= __highest_present_section_nr));
-
- return -1;
-}
#define for_each_present_section_nr(start, section_nr) \
for (section_nr = next_present_section_nr(start-1); \
((section_nr != -1) && \
diff --git a/mm/swapfile.c b/mm/swapfile.c
index 6febae9ad3cd..2c33ff456ed5 100644
--- a/mm/swapfile.c
+++ b/mm/swapfile.c
@@ -2796,17 +2796,17 @@ static int swaps_open(struct inode *inode, struct file *file)
return 0;
}
-static const struct file_operations proc_swaps_operations = {
- .open = swaps_open,
- .read = seq_read,
- .llseek = seq_lseek,
- .release = seq_release,
- .poll = swaps_poll,
+static const struct proc_ops swaps_proc_ops = {
+ .proc_open = swaps_open,
+ .proc_read = seq_read,
+ .proc_lseek = seq_lseek,
+ .proc_release = seq_release,
+ .proc_poll = swaps_poll,
};
static int __init procswaps_init(void)
{
- proc_create("swaps", 0, NULL, &proc_swaps_operations);
+ proc_create("swaps", 0, NULL, &swaps_proc_ops);
return 0;
}
__initcall(procswaps_init);