diff options
Diffstat (limited to 'arch')
62 files changed, 441 insertions, 662 deletions
diff --git a/arch/Kconfig b/arch/Kconfig index 48b5e103bdb0..98de654b79b3 100644 --- a/arch/Kconfig +++ b/arch/Kconfig @@ -393,18 +393,23 @@ config HAVE_ARCH_JUMP_LABEL config HAVE_ARCH_JUMP_LABEL_RELATIVE bool -config HAVE_RCU_TABLE_FREE +config MMU_GATHER_TABLE_FREE bool -config HAVE_RCU_TABLE_NO_INVALIDATE +config MMU_GATHER_RCU_TABLE_FREE bool + select MMU_GATHER_TABLE_FREE -config HAVE_MMU_GATHER_PAGE_SIZE +config MMU_GATHER_PAGE_SIZE bool -config HAVE_MMU_GATHER_NO_GATHER +config MMU_GATHER_NO_RANGE bool +config MMU_GATHER_NO_GATHER + bool + depends on MMU_GATHER_TABLE_FREE + config ARCH_HAVE_NMI_SAFE_CMPXCHG bool diff --git a/arch/alpha/kernel/srm_env.c b/arch/alpha/kernel/srm_env.c index 7268222cf4e1..528d2be58182 100644 --- a/arch/alpha/kernel/srm_env.c +++ b/arch/alpha/kernel/srm_env.c @@ -119,13 +119,12 @@ static ssize_t srm_env_proc_write(struct file *file, const char __user *buffer, return res; } -static const struct file_operations srm_env_proc_fops = { - .owner = THIS_MODULE, - .open = srm_env_proc_open, - .read = seq_read, - .llseek = seq_lseek, - .release = single_release, - .write = srm_env_proc_write, +static const struct proc_ops srm_env_proc_ops = { + .proc_open = srm_env_proc_open, + .proc_read = seq_read, + .proc_lseek = seq_lseek, + .proc_release = single_release, + .proc_write = srm_env_proc_write, }; static int __init @@ -182,7 +181,7 @@ srm_env_init(void) entry = srm_named_entries; while (entry->name && entry->id) { if (!proc_create_data(entry->name, 0644, named_dir, - &srm_env_proc_fops, (void *)entry->id)) + &srm_env_proc_ops, (void *)entry->id)) goto cleanup; entry++; } @@ -194,7 +193,7 @@ srm_env_init(void) char name[4]; sprintf(name, "%ld", var_num); if (!proc_create_data(name, 0644, numbered_dir, - &srm_env_proc_fops, (void *)var_num)) + &srm_env_proc_ops, (void *)var_num)) goto cleanup; } diff --git a/arch/arc/include/asm/pgtable.h b/arch/arc/include/asm/pgtable.h index 9019ed9f9c94..12be7e1b7cc0 100644 --- a/arch/arc/include/asm/pgtable.h +++ b/arch/arc/include/asm/pgtable.h @@ -273,6 +273,7 @@ static inline void pmd_set(pmd_t *pmdp, pte_t *ptep) #define pmd_none(x) (!pmd_val(x)) #define pmd_bad(x) ((pmd_val(x) & ~PAGE_MASK)) #define pmd_present(x) (pmd_val(x)) +#define pmd_leaf(x) (pmd_val(x) & _PAGE_HW_SZ) #define pmd_clear(xp) do { pmd_val(*(xp)) = 0; } while (0) #define pte_page(pte) pfn_to_page(pte_pfn(pte)) diff --git a/arch/arm/Kconfig b/arch/arm/Kconfig index 0b1b1c66bce9..497e0e4702b7 100644 --- a/arch/arm/Kconfig +++ b/arch/arm/Kconfig @@ -102,7 +102,7 @@ config ARM select HAVE_PERF_EVENTS select HAVE_PERF_REGS select HAVE_PERF_USER_STACK_DUMP - select HAVE_RCU_TABLE_FREE if SMP && ARM_LPAE + select MMU_GATHER_RCU_TABLE_FREE if SMP && ARM_LPAE select HAVE_REGS_AND_STACK_ACCESS_API select HAVE_RSEQ select HAVE_STACKPROTECTOR diff --git a/arch/arm/include/asm/pgtable-2level.h b/arch/arm/include/asm/pgtable-2level.h index 51beec41d48c..0d3ea35c97fe 100644 --- a/arch/arm/include/asm/pgtable-2level.h +++ b/arch/arm/include/asm/pgtable-2level.h @@ -189,6 +189,7 @@ static inline pmd_t *pmd_offset(pud_t *pud, unsigned long addr) } #define pmd_large(pmd) (pmd_val(pmd) & 2) +#define pmd_leaf(pmd) (pmd_val(pmd) & 2) #define pmd_bad(pmd) (pmd_val(pmd) & 2) #define pmd_present(pmd) (pmd_val(pmd)) diff --git a/arch/arm/include/asm/pgtable-3level.h b/arch/arm/include/asm/pgtable-3level.h index 5b18295021a0..ad55ab068dbf 100644 --- a/arch/arm/include/asm/pgtable-3level.h +++ b/arch/arm/include/asm/pgtable-3level.h @@ -134,6 +134,7 @@ #define pmd_sect(pmd) ((pmd_val(pmd) & PMD_TYPE_MASK) == \ PMD_TYPE_SECT) #define pmd_large(pmd) pmd_sect(pmd) +#define pmd_leaf(pmd) pmd_sect(pmd) #define pud_clear(pudp) \ do { \ diff --git a/arch/arm/include/asm/tlb.h b/arch/arm/include/asm/tlb.h index 669474add486..4d4e7b6aabff 100644 --- a/arch/arm/include/asm/tlb.h +++ b/arch/arm/include/asm/tlb.h @@ -37,10 +37,6 @@ static inline void __tlb_remove_table(void *_table) #include <asm-generic/tlb.h> -#ifndef CONFIG_HAVE_RCU_TABLE_FREE -#define tlb_remove_table(tlb, entry) tlb_remove_page(tlb, entry) -#endif - static inline void __pte_free_tlb(struct mmu_gather *tlb, pgtable_t pte, unsigned long addr) { diff --git a/arch/arm/kernel/atags_proc.c b/arch/arm/kernel/atags_proc.c index 312cb89ec364..4247ebf4b893 100644 --- a/arch/arm/kernel/atags_proc.c +++ b/arch/arm/kernel/atags_proc.c @@ -17,9 +17,9 @@ static ssize_t atags_read(struct file *file, char __user *buf, return simple_read_from_buffer(buf, count, ppos, b->data, b->size); } -static const struct file_operations atags_fops = { - .read = atags_read, - .llseek = default_llseek, +static const struct proc_ops atags_proc_ops = { + .proc_read = atags_read, + .proc_lseek = default_llseek, }; #define BOOT_PARAMS_SIZE 1536 @@ -61,7 +61,7 @@ static int __init init_atags_procfs(void) b->size = size; memcpy(b->data, atags_copy, size); - tags_entry = proc_create_data("atags", 0400, NULL, &atags_fops, b); + tags_entry = proc_create_data("atags", 0400, NULL, &atags_proc_ops, b); if (!tags_entry) goto nomem; diff --git a/arch/arm/mm/alignment.c b/arch/arm/mm/alignment.c index 788c5cf46de5..84718eddae60 100644 --- a/arch/arm/mm/alignment.c +++ b/arch/arm/mm/alignment.c @@ -162,12 +162,12 @@ static ssize_t alignment_proc_write(struct file *file, const char __user *buffer return count; } -static const struct file_operations alignment_proc_fops = { - .open = alignment_proc_open, - .read = seq_read, - .llseek = seq_lseek, - .release = single_release, - .write = alignment_proc_write, +static const struct proc_ops alignment_proc_ops = { + .proc_open = alignment_proc_open, + .proc_read = seq_read, + .proc_lseek = seq_lseek, + .proc_release = single_release, + .proc_write = alignment_proc_write, }; #endif /* CONFIG_PROC_FS */ @@ -1016,7 +1016,7 @@ static int __init alignment_init(void) struct proc_dir_entry *res; res = proc_create("cpu/alignment", S_IWUSR | S_IRUGO, NULL, - &alignment_proc_fops); + &alignment_proc_ops); if (!res) return -ENOMEM; #endif diff --git a/arch/arm/mm/dma-mapping.c b/arch/arm/mm/dma-mapping.c index e822af0d9219..9414d72f664b 100644 --- a/arch/arm/mm/dma-mapping.c +++ b/arch/arm/mm/dma-mapping.c @@ -221,7 +221,7 @@ EXPORT_SYMBOL(arm_coherent_dma_ops); static int __dma_supported(struct device *dev, u64 mask, bool warn) { - unsigned long max_dma_pfn = min(max_pfn, arm_dma_pfn_limit); + unsigned long max_dma_pfn = min(max_pfn - 1, arm_dma_pfn_limit); /* * Translate the device's DMA mask to a PFN limit. This diff --git a/arch/arm64/Kconfig b/arch/arm64/Kconfig index de238b59d9eb..0b30e884e088 100644 --- a/arch/arm64/Kconfig +++ b/arch/arm64/Kconfig @@ -104,6 +104,7 @@ config ARM64 select GENERIC_IRQ_SHOW select GENERIC_IRQ_SHOW_LEVEL select GENERIC_PCI_IOMAP + select GENERIC_PTDUMP select GENERIC_SCHED_CLOCK select GENERIC_SMP_IDLE_THREAD select GENERIC_STRNCPY_FROM_USER @@ -164,7 +165,7 @@ config ARM64 select HAVE_REGS_AND_STACK_ACCESS_API select HAVE_FUNCTION_ARG_ACCESS_API select HAVE_FUTEX_CMPXCHG if FUTEX - select HAVE_RCU_TABLE_FREE + select MMU_GATHER_RCU_TABLE_FREE select HAVE_RSEQ select HAVE_STACKPROTECTOR select HAVE_SYSCALL_TRACEPOINTS diff --git a/arch/arm64/Kconfig.debug b/arch/arm64/Kconfig.debug index cf09010d825f..1c906d932d6b 100644 --- a/arch/arm64/Kconfig.debug +++ b/arch/arm64/Kconfig.debug @@ -1,22 +1,5 @@ # SPDX-License-Identifier: GPL-2.0-only -config ARM64_PTDUMP_CORE - def_bool n - -config ARM64_PTDUMP_DEBUGFS - bool "Export kernel pagetable layout to userspace via debugfs" - depends on DEBUG_KERNEL - select ARM64_PTDUMP_CORE - select DEBUG_FS - help - Say Y here if you want to show the kernel pagetable layout in a - debugfs file. This information is only useful for kernel developers - who are working in architecture specific areas of the kernel. - It is probably not a good idea to enable this feature in a production - kernel. - - If in doubt, say N. - config PID_IN_CONTEXTIDR bool "Write the current PID to the CONTEXTIDR register" help @@ -42,7 +25,7 @@ config ARM64_RANDOMIZE_TEXT_OFFSET config DEBUG_WX bool "Warn on W+X mappings at boot" - select ARM64_PTDUMP_CORE + select PTDUMP_CORE ---help--- Generate a warning if any W+X mappings are found at boot. diff --git a/arch/arm64/include/asm/pgtable.h b/arch/arm64/include/asm/pgtable.h index cd5de0e40bfa..538c85e62f86 100644 --- a/arch/arm64/include/asm/pgtable.h +++ b/arch/arm64/include/asm/pgtable.h @@ -441,6 +441,7 @@ extern pgprot_t phys_mem_access_prot(struct file *file, unsigned long pfn, PMD_TYPE_TABLE) #define pmd_sect(pmd) ((pmd_val(pmd) & PMD_TYPE_MASK) == \ PMD_TYPE_SECT) +#define pmd_leaf(pmd) pmd_sect(pmd) #if defined(CONFIG_ARM64_64K_PAGES) || CONFIG_PGTABLE_LEVELS < 3 static inline bool pud_sect(pud_t pud) { return false; } @@ -525,6 +526,7 @@ static inline void pte_unmap(pte_t *pte) { } #define pud_none(pud) (!pud_val(pud)) #define pud_bad(pud) (!(pud_val(pud) & PUD_TABLE_BIT)) #define pud_present(pud) pte_present(pud_pte(pud)) +#define pud_leaf(pud) pud_sect(pud) #define pud_valid(pud) pte_valid(pud_pte(pud)) static inline void set_pud(pud_t *pudp, pud_t pud) diff --git a/arch/arm64/include/asm/ptdump.h b/arch/arm64/include/asm/ptdump.h index 0b8e7269ec82..38187f74e089 100644 --- a/arch/arm64/include/asm/ptdump.h +++ b/arch/arm64/include/asm/ptdump.h @@ -5,7 +5,7 @@ #ifndef __ASM_PTDUMP_H #define __ASM_PTDUMP_H -#ifdef CONFIG_ARM64_PTDUMP_CORE +#ifdef CONFIG_PTDUMP_CORE #include <linux/mm_types.h> #include <linux/seq_file.h> @@ -21,15 +21,15 @@ struct ptdump_info { unsigned long base_addr; }; -void ptdump_walk_pgd(struct seq_file *s, struct ptdump_info *info); -#ifdef CONFIG_ARM64_PTDUMP_DEBUGFS +void ptdump_walk(struct seq_file *s, struct ptdump_info *info); +#ifdef CONFIG_PTDUMP_DEBUGFS void ptdump_debugfs_register(struct ptdump_info *info, const char *name); #else static inline void ptdump_debugfs_register(struct ptdump_info *info, const char *name) { } #endif void ptdump_check_wx(void); -#endif /* CONFIG_ARM64_PTDUMP_CORE */ +#endif /* CONFIG_PTDUMP_CORE */ #ifdef CONFIG_DEBUG_WX #define debug_checkwx() ptdump_check_wx() diff --git a/arch/arm64/mm/Makefile b/arch/arm64/mm/Makefile index 849c1df3d214..d91030f0ffee 100644 --- a/arch/arm64/mm/Makefile +++ b/arch/arm64/mm/Makefile @@ -4,8 +4,8 @@ obj-y := dma-mapping.o extable.o fault.o init.o \ ioremap.o mmap.o pgd.o mmu.o \ context.o proc.o pageattr.o obj-$(CONFIG_HUGETLB_PAGE) += hugetlbpage.o -obj-$(CONFIG_ARM64_PTDUMP_CORE) += dump.o -obj-$(CONFIG_ARM64_PTDUMP_DEBUGFS) += ptdump_debugfs.o +obj-$(CONFIG_PTDUMP_CORE) += dump.o +obj-$(CONFIG_PTDUMP_DEBUGFS) += ptdump_debugfs.o obj-$(CONFIG_NUMA) += numa.o obj-$(CONFIG_DEBUG_VIRTUAL) += physaddr.o KASAN_SANITIZE_physaddr.o += n diff --git a/arch/arm64/mm/dump.c b/arch/arm64/mm/dump.c index 0a920b538a89..860c00ec8bd3 100644 --- a/arch/arm64/mm/dump.c +++ b/arch/arm64/mm/dump.c @@ -15,6 +15,7 @@ #include <linux/io.h> #include <linux/init.h> #include <linux/mm.h> +#include <linux/ptdump.h> #include <linux/sched.h> #include <linux/seq_file.h> @@ -75,10 +76,11 @@ static struct addr_marker address_markers[] = { * dumps out a description of the range. */ struct pg_state { + struct ptdump_state ptdump; struct seq_file *seq; const struct addr_marker *marker; unsigned long start_address; - unsigned level; + int level; u64 current_prot; bool check_wx; unsigned long wx_pages; @@ -174,11 +176,14 @@ struct pg_level { }; static struct pg_level pg_level[] = { - { - }, { /* pgd */ + { /* pgd */ .name = "PGD", .bits = pte_bits, .num = ARRAY_SIZE(pte_bits), + }, { /* p4d */ + .name = "P4D", + .bits = pte_bits, + .num = ARRAY_SIZE(pte_bits), }, { /* pud */ .name = (CONFIG_PGTABLE_LEVELS > 3) ? "PUD" : "PGD", .bits = pte_bits, @@ -241,13 +246,17 @@ static void note_prot_wx(struct pg_state *st, unsigned long addr) st->wx_pages += (addr - st->start_address) / PAGE_SIZE; } -static void note_page(struct pg_state *st, unsigned long addr, unsigned level, - u64 val) +static void note_page(struct ptdump_state *pt_st, unsigned long addr, int level, + unsigned long val) { + struct pg_state *st = container_of(pt_st, struct pg_state, ptdump); static const char units[] = "KMGTPE"; - u64 prot = val & pg_level[level].mask; + u64 prot = 0; + + if (level >= 0) + prot = val & pg_level[level].mask; - if (!st->level) { + if (st->level == -1) { st->level = level; st->current_prot = prot; st->start_address = addr; @@ -260,21 +269,22 @@ static void note_page(struct pg_state *st, unsigned long addr, unsigned level, if (st->current_prot) { note_prot_uxn(st, addr); note_prot_wx(st, addr); - pt_dump_seq_printf(st->seq, "0x%016lx-0x%016lx ", + } + + pt_dump_seq_printf(st->seq, "0x%016lx-0x%016lx ", st->start_address, addr); - delta = (addr - st->start_address) >> 10; - while (!(delta & 1023) && unit[1]) { - delta >>= 10; - unit++; - } - pt_dump_seq_printf(st->seq, "%9lu%c %s", delta, *unit, - pg_level[st->level].name); - if (pg_level[st->level].bits) - dump_prot(st, pg_level[st->level].bits, - pg_level[st->level].num); - pt_dump_seq_puts(st->seq, "\n"); + delta = (addr - st->start_address) >> 10; + while (!(delta & 1023) && unit[1]) { + delta >>= 10; + unit++; } + pt_dump_seq_printf(st->seq, "%9lu%c %s", delta, *unit, + pg_level[st->level].name); + if (st->current_prot && pg_level[st->level].bits) + dump_prot(st, pg_level[st->level].bits, + pg_level[st->level].num); + pt_dump_seq_puts(st->seq, "\n"); if (addr >= st->marker[1].start_address) { st->marker++; @@ -293,85 +303,27 @@ static void note_page(struct pg_state *st, unsigned long addr, unsigned level, } -static void walk_pte(struct pg_state *st, pmd_t *pmdp, unsigned long start, - unsigned long end) -{ - unsigned long addr = start; - pte_t *ptep = pte_offset_kernel(pmdp, start); - - do { - note_page(st, addr, 4, READ_ONCE(pte_val(*ptep))); - } while (ptep++, addr += PAGE_SIZE, addr != end); -} - -static void walk_pmd(struct pg_state *st, pud_t *pudp, unsigned long start, - unsigned long end) -{ - unsigned long next, addr = start; - pmd_t *pmdp = pmd_offset(pudp, start); - - do { - pmd_t pmd = READ_ONCE(*pmdp); - next = pmd_addr_end(addr, end); - - if (pmd_none(pmd) || pmd_sect(pmd)) { - note_page(st, addr, 3, pmd_val(pmd)); - } else { - BUG_ON(pmd_bad(pmd)); - walk_pte(st, pmdp, addr, next); - } - } while (pmdp++, addr = next, addr != end); -} - -static void walk_pud(struct pg_state *st, pgd_t *pgdp, unsigned long start, - unsigned long end) +void ptdump_walk(struct seq_file *s, struct ptdump_info *info) { - unsigned long next, addr = start; - pud_t *pudp = pud_offset(pgdp, start); - - do { - pud_t pud = READ_ONCE(*pudp); - next = pud_addr_end(addr, end); - - if (pud_none(pud) || pud_sect(pud)) { - note_page(st, addr, 2, pud_val(pud)); - } else { - BUG_ON(pud_bad(pud)); - walk_pmd(st, pudp, addr, next); - } - } while (pudp++, addr = next, addr != end); -} + unsigned long end = ~0UL; + struct pg_state st; -static void walk_pgd(struct pg_state *st, struct mm_struct *mm, - unsigned long start) -{ - unsigned long end = (start < TASK_SIZE_64) ? TASK_SIZE_64 : 0; - unsigned long next, addr = start; - pgd_t *pgdp = pgd_offset(mm, start); - - do { - pgd_t pgd = READ_ONCE(*pgdp); - next = pgd_addr_end(addr, end); - - if (pgd_none(pgd)) { - note_page(st, addr, 1, pgd_val(pgd)); - } else { - BUG_ON(pgd_bad(pgd)); - walk_pud(st, pgdp, addr, next); - } - } while (pgdp++, addr = next, addr != end); -} + if (info->base_addr < TASK_SIZE_64) + end = TASK_SIZE_64; -void ptdump_walk_pgd(struct seq_file *m, struct ptdump_info *info) -{ - struct pg_state st = { - .seq = m, + st = (struct pg_state){ + .seq = s, .marker = info->markers, + .ptdump = { + .note_page = note_page, + .range = (struct ptdump_range[]){ + {info->base_addr, end}, + {0, 0} + } + } }; - walk_pgd(&st, info->mm, info->base_addr); - - note_page(&st, 0, 0, 0); + ptdump_walk_pgd(&st.ptdump, info->mm, NULL); } static void ptdump_initialize(void) @@ -398,11 +350,19 @@ void ptdump_check_wx(void) { 0, NULL}, { -1, NULL}, }, + .level = -1, .check_wx = true, + .ptdump = { + .note_page = note_page, + .range = (struct ptdump_range[]) { + {PAGE_OFFSET, ~0UL}, + {0, 0} + } + } }; - walk_pgd(&st, &init_mm, PAGE_OFFSET); - note_page(&st, 0, 0, 0); + ptdump_walk_pgd(&st.ptdump, &init_mm, NULL); + if (st.wx_pages || st.uxn_pages) pr_warn("Checked W+X mappings: FAILED, %lu W+X pages found, %lu non-UXN pages found\n", st.wx_pages, st.uxn_pages); diff --git a/arch/arm64/mm/mmu.c b/arch/arm64/mm/mmu.c index 40797cbfba2d..128f70852bf3 100644 --- a/arch/arm64/mm/mmu.c +++ b/arch/arm64/mm/mmu.c @@ -943,13 +943,13 @@ int __init arch_ioremap_pud_supported(void) * SW table walks can't handle removal of intermediate entries. */ return IS_ENABLED(CONFIG_ARM64_4K_PAGES) && - !IS_ENABLED(CONFIG_ARM64_PTDUMP_DEBUGFS); + !IS_ENABLED(CONFIG_PTDUMP_DEBUGFS); } int __init arch_ioremap_pmd_supported(void) { /* See arch_ioremap_pud_supported() */ - return !IS_ENABLED(CONFIG_ARM64_PTDUMP_DEBUGFS); + return !IS_ENABLED(CONFIG_PTDUMP_DEBUGFS); } int pud_set_huge(pud_t *pudp, phys_addr_t phys, pgprot_t prot) diff --git a/arch/arm64/mm/ptdump_debugfs.c b/arch/arm64/mm/ptdump_debugfs.c index 064163f25592..1f2eae3e988b 100644 --- a/arch/arm64/mm/ptdump_debugfs.c +++ b/arch/arm64/mm/ptdump_debugfs.c @@ -7,7 +7,7 @@ static int ptdump_show(struct seq_file *m, void *v) { struct ptdump_info *info = m->private; - ptdump_walk_pgd(m, info); + ptdump_walk(m, info); return 0; } DEFINE_SHOW_ATTRIBUTE(ptdump); diff --git a/arch/ia64/kernel/salinfo.c b/arch/ia64/kernel/salinfo.c index b392c0a50346..a25ab9b37953 100644 --- a/arch/ia64/kernel/salinfo.c +++ b/arch/ia64/kernel/salinfo.c @@ -331,10 +331,10 @@ retry: return size; } -static const struct file_operations salinfo_event_fops = { - .open = salinfo_event_open, - .read = salinfo_event_read, - .llseek = noop_llseek, +static const struct proc_ops salinfo_event_proc_ops = { + .proc_open = salinfo_event_open, + .proc_read = salinfo_event_read, + .proc_lseek = noop_llseek, }; static int @@ -534,12 +534,12 @@ salinfo_log_write(struct file *file, const char __user *buffer, size_t count, lo return count; } -static const struct file_operations salinfo_data_fops = { - .open = salinfo_log_open, - .release = salinfo_log_release, - .read = salinfo_log_read, - .write = salinfo_log_write, - .llseek = default_llseek, +static const struct proc_ops salinfo_data_proc_ops = { + .proc_open = salinfo_log_open, + .proc_release = salinfo_log_release, + .proc_read = salinfo_log_read, + .proc_write = salinfo_log_write, + .proc_lseek = default_llseek, }; static int salinfo_cpu_online(unsigned int cpu) @@ -617,13 +617,13 @@ salinfo_init(void) continue; entry = proc_create_data("event", S_IRUSR, dir, - &salinfo_event_fops, data); + &salinfo_event_proc_ops, data); if (!entry) continue; *sdir++ = entry; entry = proc_create_data("data", S_IRUSR | S_IWUSR, dir, - &salinfo_data_fops, data); + &salinfo_data_proc_ops, data); if (!entry) continue; *sdir++ = entry; diff --git a/arch/m68k/kernel/bootinfo_proc.c b/arch/m68k/kernel/bootinfo_proc.c index 3b9cab84917d..857fa2aaa748 100644 --- a/arch/m68k/kernel/bootinfo_proc.c +++ b/arch/m68k/kernel/bootinfo_proc.c @@ -26,9 +26,9 @@ static ssize_t bootinfo_read(struct file *file, char __user *buf, bootinfo_size); } -static const struct file_operations bootinfo_fops = { - .read = bootinfo_read, - .llseek = default_llseek, +static const struct proc_ops bootinfo_proc_ops = { + .proc_read = bootinfo_read, + .proc_lseek = default_llseek, }; void __init save_bootinfo(const struct bi_record *bi) @@ -67,7 +67,7 @@ static int __init init_bootinfo_procfs(void) if (!bootinfo_copy) return -ENOMEM; - pde = proc_create_data("bootinfo", 0400, NULL, &bootinfo_fops, NULL); + pde = proc_create_data("bootinfo", 0400, NULL, &bootinfo_proc_ops, NULL); if (!pde) { kfree(bootinfo_copy); return -ENOMEM; diff --git a/arch/mips/include/asm/pgtable.h b/arch/mips/include/asm/pgtable.h index 91b89aab1787..aef5378f909c 100644 --- a/arch/mips/include/asm/pgtable.h +++ b/arch/mips/include/asm/pgtable.h @@ -639,6 +639,11 @@ static inline pmd_t pmdp_huge_get_and_clear(struct mm_struct *mm, #endif /* CONFIG_TRANSPARENT_HUGEPAGE */ +#ifdef _PAGE_HUGE +#define pmd_leaf(pmd) ((pmd_val(pmd) & _PAGE_HUGE) != 0) +#define pud_leaf(pud) ((pud_val(pud) & _PAGE_HUGE) != 0) +#endif + #define gup_fast_permitted(start, end) (!cpu_has_dc_aliases) #include <asm-generic/pgtable.h> diff --git a/arch/mips/lasat/picvue_proc.c b/arch/mips/lasat/picvue_proc.c index 8126f15b8e09..61c033494af5 100644 --- a/arch/mips/lasat/picvue_proc.c +++ b/arch/mips/lasat/picvue_proc.c @@ -89,13 +89,12 @@ static ssize_t pvc_line_proc_write(struct file *file, const char __user *buf, return count; } -static const struct file_operations pvc_line_proc_fops = { - .owner = THIS_MODULE, - .open = pvc_line_proc_open, - .read = seq_read, - .llseek = seq_lseek, - .release = single_release, - .write = pvc_line_proc_write, +static const struct proc_ops pvc_line_proc_ops = { + .proc_open = pvc_line_proc_open, + .proc_read = seq_read, + .proc_lseek = seq_lseek, + .proc_release = single_release, + .proc_write = pvc_line_proc_write, }; static ssize_t pvc_scroll_proc_write(struct file *file, const char __user *buf, @@ -148,13 +147,12 @@ static int pvc_scroll_proc_open(struct inode *inode, struct file *file) return single_open(file, pvc_scroll_proc_show, NULL); } -static const struct file_operations pvc_scroll_proc_fops = { - .owner = THIS_MODULE, - .open = pvc_scroll_proc_open, - .read = seq_read, - .llseek = seq_lseek, - .release = single_release, - .write = pvc_scroll_proc_write, +static const struct proc_ops pvc_scroll_proc_ops = { + .proc_open = pvc_scroll_proc_open, + .proc_read = seq_read, + .proc_lseek = seq_lseek, + .proc_release = single_release, + .proc_write = pvc_scroll_proc_write, }; void pvc_proc_timerfunc(struct timer_list *unused) @@ -189,12 +187,11 @@ static int __init pvc_proc_init(void) } for (i = 0; i < PVC_NLINES; i++) { proc_entry = proc_create_data(pvc_linename[i], 0644, dir, - &pvc_line_proc_fops, &pvc_linedata[i]); + &pvc_line_proc_ops, &pvc_linedata[i]); if (proc_entry == NULL) goto error; } - proc_entry = proc_create("scroll", 0644, dir, - &pvc_scroll_proc_fops); + proc_entry = proc_create("scroll", 0644, dir, &pvc_scroll_proc_ops); if (proc_entry == NULL) goto error; diff --git a/arch/powerpc/Kconfig b/arch/powerpc/Kconfig index c150a9d49343..bf2b538aba12 100644 --- a/arch/powerpc/Kconfig +++ b/arch/powerpc/Kconfig @@ -222,9 +222,8 @@ config PPC select HAVE_HARDLOCKUP_DETECTOR_PERF if PERF_EVENTS && HAVE_PERF_EVENTS_NMI && !HAVE_HARDLOCKUP_DETECTOR_ARCH select HAVE_PERF_REGS select HAVE_PERF_USER_STACK_DUMP - select HAVE_RCU_TABLE_FREE if SMP - select HAVE_RCU_TABLE_NO_INVALIDATE if HAVE_RCU_TABLE_FREE - select HAVE_MMU_GATHER_PAGE_SIZE + select MMU_GATHER_RCU_TABLE_FREE + select MMU_GATHER_PAGE_SIZE select HAVE_REGS_AND_STACK_ACCESS_API select HAVE_RELIABLE_STACKTRACE if PPC_BOOK3S_64 && CPU_LITTLE_ENDIAN select HAVE_SYSCALL_TRACEPOINTS diff --git a/arch/powerpc/include/asm/book3s/32/pgalloc.h b/arch/powerpc/include/asm/book3s/32/pgalloc.h index 998317702630..dc5c039eb28e 100644 --- a/arch/powerpc/include/asm/book3s/32/pgalloc.h +++ b/arch/powerpc/include/asm/book3s/32/pgalloc.h @@ -49,7 +49,6 @@ static inline void pgtable_free(void *table, unsigned index_size) #define get_hugepd_cache_index(x) (x) -#ifdef CONFIG_SMP static inline void pgtable_free_tlb(struct mmu_gather *tlb, void *table, int shift) { @@ -66,13 +65,6 @@ static inline void __tlb_remove_table(void *_table) pgtable_free(table, shift); } -#else -static inline void pgtable_free_tlb(struct mmu_gather *tlb, - void *table, int shift) -{ - pgtable_free(table, shift); -} -#endif static inline void __pte_free_tlb(struct mmu_gather *tlb, pgtable_t table, unsigned long address) diff --git a/arch/powerpc/include/asm/book3s/64/pgalloc.h b/arch/powerpc/include/asm/book3s/64/pgalloc.h index f6968c811026..a41e91bd0580 100644 --- a/arch/powerpc/include/asm/book3s/64/pgalloc.h +++ b/arch/powerpc/include/asm/book3s/64/pgalloc.h @@ -19,9 +19,7 @@ extern struct vmemmap_backing *vmemmap_list; extern pmd_t *pmd_fragment_alloc(struct mm_struct *, unsigned long); extern void pmd_fragment_free(unsigned long *); extern void pgtable_free_tlb(struct mmu_gather *tlb, void *table, int shift); -#ifdef CONFIG_SMP extern void __tlb_remove_table(void *_table); -#endif void pte_frag_destroy(void *pte_frag); static inline pgd_t *radix__pgd_alloc(struct mm_struct *mm) diff --git a/arch/powerpc/include/asm/book3s/64/pgtable.h b/arch/powerpc/include/asm/book3s/64/pgtable.h index b01624e5c467..201a69e6a355 100644 --- a/arch/powerpc/include/asm/book3s/64/pgtable.h +++ b/arch/powerpc/include/asm/book3s/64/pgtable.h @@ -1355,18 +1355,21 @@ static inline bool is_pte_rw_upgrade(unsigned long old_val, unsigned long new_va * Like pmd_huge() and pmd_large(), but works regardless of config options */ #define pmd_is_leaf pmd_is_leaf +#define pmd_leaf pmd_is_leaf static inline bool pmd_is_leaf(pmd_t pmd) { return !!(pmd_raw(pmd) & cpu_to_be64(_PAGE_PTE)); } #define pud_is_leaf pud_is_leaf +#define pud_leaf pud_is_leaf static inline bool pud_is_leaf(pud_t pud) { return !!(pud_raw(pud) & cpu_to_be64(_PAGE_PTE)); } #define pgd_is_leaf pgd_is_leaf +#define pgd_leaf pgd_is_leaf static inline bool pgd_is_leaf(pgd_t pgd) { return !!(pgd_raw(pgd) & cpu_to_be64(_PAGE_PTE)); diff --git a/arch/powerpc/include/asm/nohash/pgalloc.h b/arch/powerpc/include/asm/nohash/pgalloc.h index 332b13b4ecdb..29c43665a753 100644 --- a/arch/powerpc/include/asm/nohash/pgalloc.h +++ b/arch/powerpc/include/asm/nohash/pgalloc.h @@ -46,7 +46,6 @@ static inline void pgtable_free(void *table, int shift) #define get_hugepd_cache_index(x) (x) -#ifdef CONFIG_SMP static inline void pgtable_free_tlb(struct mmu_gather *tlb, void *table, int shift) { unsigned long pgf = (unsigned long)table; @@ -64,13 +63,6 @@ static inline void __tlb_remove_table(void *_table) pgtable_free(table, shift); } -#else -static inline void pgtable_free_tlb(struct mmu_gather *tlb, void *table, int shift) -{ - pgtable_free(table, shift); -} -#endif - static inline void __pte_free_tlb(struct mmu_gather *tlb, pgtable_t table, unsigned long address) { diff --git a/arch/powerpc/include/asm/tlb.h b/arch/powerpc/include/asm/tlb.h index b2c0be93929d..7f3a8b902325 100644 --- a/arch/powerpc/include/asm/tlb.h +++ b/arch/powerpc/include/asm/tlb.h @@ -26,6 +26,17 @@ #define tlb_flush tlb_flush extern void tlb_flush(struct mmu_gather *tlb); +/* + * book3s: + * Hash does not use the linux page-tables, so we can avoid + * the TLB invalidate for page-table freeing, Radix otoh does use the + * page-tables and needs the TLBI. + * + * nohash: + * We still do TLB invalidate in the __pte_free_tlb routine before we + * add the page table pages to mmu gather table batch. + */ +#define tlb_needs_table_invalidate() radix_enabled() /* Get the generic bits... */ #include <asm-generic/tlb.h> diff --git a/arch/powerpc/kernel/proc_powerpc.c b/arch/powerpc/kernel/proc_powerpc.c index be3758d54e59..877817471e3c 100644 --- a/arch/powerpc/kernel/proc_powerpc.c +++ b/arch/powerpc/kernel/proc_powerpc.c @@ -39,10 +39,10 @@ static int page_map_mmap( struct file *file, struct vm_area_struct *vma ) return 0; } -static const struct file_operations page_map_fops = { - .llseek = page_map_seek, - .read = page_map_read, - .mmap = page_map_mmap +static const struct proc_ops page_map_proc_ops = { + .proc_lseek = page_map_seek, + .proc_read = page_map_read, + .proc_mmap = page_map_mmap, }; @@ -51,7 +51,7 @@ static int __init proc_ppc64_init(void) struct proc_dir_entry *pde; pde = proc_create_data("powerpc/systemcfg", S_IFREG | 0444, NULL, - &page_map_fops, vdso_data); + &page_map_proc_ops, vdso_data); if (!pde) return 1; proc_set_size(pde, PAGE_SIZE); diff --git a/arch/powerpc/kernel/rtas-proc.c b/arch/powerpc/kernel/rtas-proc.c index 487dcd8da4de..2d33f342a293 100644 --- a/arch/powerpc/kernel/rtas-proc.c +++ b/arch/powerpc/kernel/rtas-proc.c @@ -159,12 +159,12 @@ static int poweron_open(struct inode *inode, struct file *file) return single_open(file, ppc_rtas_poweron_show, NULL); } -static const struct file_operations ppc_rtas_poweron_operations = { - .open = poweron_open, - .read = seq_read, - .llseek = seq_lseek, - .write = ppc_rtas_poweron_write, - .release = single_release, +static const struct proc_ops ppc_rtas_poweron_proc_ops = { + .proc_open = poweron_open, + .proc_read = seq_read, + .proc_lseek = seq_lseek, + .proc_write = ppc_rtas_poweron_write, + .proc_release = single_release, }; static int progress_open(struct inode *inode, struct file *file) @@ -172,12 +172,12 @@ static int progress_open(struct inode *inode, struct file *file) return single_open(file, ppc_rtas_progress_show, NULL); } -static const struct file_operations ppc_rtas_progress_operations = { - .open = progress_open, - .read = seq_read, - .llseek = seq_lseek, - .write = ppc_rtas_progress_write, - .release = single_release, +static const struct proc_ops ppc_rtas_progress_proc_ops = { + .proc_open = progress_open, + .proc_read = seq_read, + .proc_lseek = seq_lseek, + .proc_write = ppc_rtas_progress_write, + .proc_release = single_release, }; static int clock_open(struct inode *inode, struct file *file) @@ -185,12 +185,12 @@ static int clock_open(struct inode *inode, struct file *file) return single_open(file, ppc_rtas_clock_show, NULL); } -static const struct file_operations ppc_rtas_clock_operations = { - .open = clock_open, - .read = seq_read, - .llseek = seq_lseek, - .write = ppc_rtas_clock_write, - .release = single_release, +static const struct proc_ops ppc_rtas_clock_proc_ops = { + .proc_open = clock_open, + .proc_read = seq_read, + .proc_lseek = seq_lseek, + .proc_write = ppc_rtas_clock_write, + .proc_release = single_release, }; static int tone_freq_open(struct inode *inode, struct file *file) @@ -198,12 +198,12 @@ static int tone_freq_open(struct inode *inode, struct file *file) return single_open(file, ppc_rtas_tone_freq_show, NULL); } -static const struct file_operations ppc_rtas_tone_freq_operations = { - .open = tone_freq_open, - .read = seq_read, - .llseek = seq_lseek, - .write = ppc_rtas_tone_freq_write, - .release = single_release, +static const struct proc_ops ppc_rtas_tone_freq_proc_ops = { + .proc_open = tone_freq_open, + .proc_read = seq_read, + .proc_lseek = seq_lseek, + .proc_write = ppc_rtas_tone_freq_write, + .proc_release = single_release, }; static int tone_volume_open(struct inode *inode, struct file *file) @@ -211,12 +211,12 @@ static int tone_volume_open(struct inode *inode, struct file *file) return single_open(file, ppc_rtas_tone_volume_show, NULL); } -static const struct file_operations ppc_rtas_tone_volume_operations = { - .open = tone_volume_open, - .read = seq_read, - .llseek = seq_lseek, - .write = ppc_rtas_tone_volume_write, - .release = single_release, +static const struct proc_ops ppc_rtas_tone_volume_proc_ops = { + .proc_open = tone_volume_open, + .proc_read = seq_read, + .proc_lseek = seq_lseek, + .proc_write = ppc_rtas_tone_volume_write, + .proc_release = single_release, }; static int ppc_rtas_find_all_sensors(void); @@ -238,17 +238,17 @@ static int __init proc_rtas_init(void) return -ENODEV; proc_create("powerpc/rtas/progress", 0644, NULL, - &ppc_rtas_progress_operations); + &ppc_rtas_progress_proc_ops); proc_create("powerpc/rtas/clock", 0644, NULL, - &ppc_rtas_clock_operations); + &ppc_rtas_clock_proc_ops); proc_create("powerpc/rtas/poweron", 0644, NULL, - &ppc_rtas_poweron_operations); + &ppc_rtas_poweron_proc_ops); proc_create_single("powerpc/rtas/sensors", 0444, NULL, ppc_rtas_sensors_show); proc_create("powerpc/rtas/frequency", 0644, NULL, - &ppc_rtas_tone_freq_operations); + &ppc_rtas_tone_freq_proc_ops); proc_create("powerpc/rtas/volume", 0644, NULL, - &ppc_rtas_tone_volume_operations); + &ppc_rtas_tone_volume_proc_ops); proc_create_single("powerpc/rtas/rmo_buffer", 0400, NULL, ppc_rtas_rmo_buf_show); return 0; diff --git a/arch/powerpc/kernel/rtas_flash.c b/arch/powerpc/kernel/rtas_flash.c index 84f794782c62..a99179d83538 100644 --- a/arch/powerpc/kernel/rtas_flash.c +++ b/arch/powerpc/kernel/rtas_flash.c @@ -655,7 +655,7 @@ struct rtas_flash_file { const char *filename; const char *rtas_call_name; int *status; - const struct file_operations fops; + const struct proc_ops ops; }; static const struct rtas_flash_file rtas_flash_files[] = { @@ -663,36 +663,36 @@ static const struct rtas_flash_file rtas_flash_files[] = { .filename = "powerpc/rtas/" FIRMWARE_FLASH_NAME, .rtas_call_name = "ibm,update-flash-64-and-reboot", .status = &rtas_update_flash_data.status, - .fops.read = rtas_flash_read_msg, - .fops.write = rtas_flash_write, - .fops.release = rtas_flash_release, - .fops.llseek = default_llseek, + .ops.proc_read = rtas_flash_read_msg, + .ops.proc_write = rtas_flash_write, + .ops.proc_release = rtas_flash_release, + .ops.proc_lseek = default_llseek, }, { .filename = "powerpc/rtas/" FIRMWARE_UPDATE_NAME, .rtas_call_name = "ibm,update-flash-64-and-reboot", .status = &rtas_update_flash_data.status, - .fops.read = rtas_flash_read_num, - .fops.write = rtas_flash_write, - .fops.release = rtas_flash_release, - .fops.llseek = default_llseek, + .ops.proc_read = rtas_flash_read_num, + .ops.proc_write = rtas_flash_write, + .ops.proc_release = rtas_flash_release, + .ops.proc_lseek = default_llseek, }, { .filename = "powerpc/rtas/" VALIDATE_FLASH_NAME, .rtas_call_name = "ibm,validate-flash-image", .status = &rtas_validate_flash_data.status, - .fops.read = validate_flash_read, - .fops.write = validate_flash_write, - .fops.release = validate_flash_release, - .fops.llseek = default_llseek, + .ops.proc_read = validate_flash_read, + .ops.proc_write = validate_flash_write, + .ops.proc_release = validate_flash_release, + .ops.proc_lseek = default_llseek, }, { .filename = "powerpc/rtas/" MANAGE_FLASH_NAME, .rtas_call_name = "ibm,manage-flash-image", .status = &rtas_manage_flash_data.status, - .fops.read = manage_flash_read, - .fops.write = manage_flash_write, - .fops.llseek = default_llseek, + .ops.proc_read = manage_flash_read, + .ops.proc_write = manage_flash_write, + .ops.proc_lseek = default_llseek, } }; @@ -723,7 +723,7 @@ static int __init rtas_flash_init(void) const struct rtas_flash_file *f = &rtas_flash_files[i]; int token; - if (!proc_create(f->filename, 0600, NULL, &f->fops)) + if (!proc_create(f->filename, 0600, NULL, &f->ops)) goto enomem; /* diff --git a/arch/powerpc/kernel/rtasd.c b/arch/powerpc/kernel/rtasd.c index 8d02e047f96a..89b798f8f656 100644 --- a/arch/powerpc/kernel/rtasd.c +++ b/arch/powerpc/kernel/rtasd.c @@ -385,12 +385,12 @@ static __poll_t rtas_log_poll(struct file *file, poll_table * wait) return 0; } -static const struct file_operations proc_rtas_log_operations = { - .read = rtas_log_read, - .poll = rtas_log_poll, - .open = rtas_log_open, - .release = rtas_log_release, - .llseek = noop_llseek, +static const struct proc_ops rtas_log_proc_ops = { + .proc_read = rtas_log_read, + .proc_poll = rtas_log_poll, + .proc_open = rtas_log_open, + .proc_release = rtas_log_release, + .proc_lseek = noop_llseek, }; static int enable_surveillance(int timeout) @@ -572,7 +572,7 @@ static int __init rtas_init(void) return -ENODEV; entry = proc_create("powerpc/rtas/error_log", 0400, NULL, - &proc_rtas_log_operations); + &rtas_log_proc_ops); if (!entry) printk(KERN_ERR "Failed to create error_log proc entry\n"); diff --git a/arch/powerpc/mm/book3s64/pgtable.c b/arch/powerpc/mm/book3s64/pgtable.c index 75483b40fcb1..2bf7e1b4fd82 100644 --- a/arch/powerpc/mm/book3s64/pgtable.c +++ b/arch/powerpc/mm/book3s64/pgtable.c @@ -378,7 +378,6 @@ static inline void pgtable_free(void *table, int index) } } -#ifdef CONFIG_SMP void pgtable_free_tlb(struct mmu_gather *tlb, void *table, int index) { unsigned long pgf = (unsigned long)table; @@ -395,12 +394,6 @@ void __tlb_remove_table(void *_table) return pgtable_free(table, index); } -#else -void pgtable_free_tlb(struct mmu_gather *tlb, void *table, int index) -{ - return pgtable_free(table, index); -} -#endif #ifdef CONFIG_PROC_FS atomic_long_t direct_pages_count[MMU_PAGE_COUNT]; diff --git a/arch/powerpc/mm/numa.c b/arch/powerpc/mm/numa.c index 50d68d21ddcc..3c7dec70cda0 100644 --- a/arch/powerpc/mm/numa.c +++ b/arch/powerpc/mm/numa.c @@ -1616,11 +1616,11 @@ static ssize_t topology_write(struct file *file, const char __user *buf, return count; } -static const struct file_operations topology_ops = { - .read = seq_read, - .write = topology_write, - .open = topology_open, - .release = single_release +static const struct proc_ops topology_proc_ops = { + .proc_read = seq_read, + .proc_write = topology_write, + .proc_open = topology_open, + .proc_release = single_release, }; static int topology_update_init(void) @@ -1630,7 +1630,7 @@ static int topology_update_init(void) if (vphn_enabled) topology_schedule_update(); - if (!proc_create("powerpc/topology_updates", 0644, NULL, &topology_ops)) + if (!proc_create("powerpc/topology_updates", 0644, NULL, &topology_proc_ops)) return -ENOMEM; topology_inited = 1; diff --git a/arch/powerpc/platforms/pseries/lpar.c b/arch/powerpc/platforms/pseries/lpar.c index 60cb29ae4739..3c3da25b445c 100644 --- a/arch/powerpc/platforms/pseries/lpar.c +++ b/arch/powerpc/platforms/pseries/lpar.c @@ -582,12 +582,12 @@ static int vcpudispatch_stats_open(struct inode *inode, struct file *file) return single_open(file, vcpudispatch_stats_display, NULL); } -static const struct file_operations vcpudispatch_stats_proc_ops = { - .open = vcpudispatch_stats_open, - .read = seq_read, - .write = vcpudispatch_stats_write, - .llseek = seq_lseek, - .release = single_release, +static const struct proc_ops vcpudispatch_stats_proc_ops = { + .proc_open = vcpudispatch_stats_open, + .proc_read = seq_read, + .proc_write = vcpudispatch_stats_write, + .proc_lseek = seq_lseek, + .proc_release = single_release, }; static ssize_t vcpudispatch_stats_freq_write(struct file *file, @@ -626,12 +626,12 @@ static int vcpudispatch_stats_freq_open(struct inode *inode, struct file *file) return single_open(file, vcpudispatch_stats_freq_display, NULL); } -static const struct file_operations vcpudispatch_stats_freq_proc_ops = { - .open = vcpudispatch_stats_freq_open, - .read = seq_read, - .write = vcpudispatch_stats_freq_write, - .llseek = seq_lseek, - .release = single_release, +static const struct proc_ops vcpudispatch_stats_freq_proc_ops = { + .proc_open = vcpudispatch_stats_freq_open, + .proc_read = seq_read, + .proc_write = vcpudispatch_stats_freq_write, + .proc_lseek = seq_lseek, + .proc_release = single_release, }; static int __init vcpudispatch_stats_procfs_init(void) diff --git a/arch/powerpc/platforms/pseries/lparcfg.c b/arch/powerpc/platforms/pseries/lparcfg.c index e33e8bc4b69b..f43e778dd7c8 100644 --- a/arch/powerpc/platforms/pseries/lparcfg.c +++ b/arch/powerpc/platforms/pseries/lparcfg.c @@ -698,12 +698,12 @@ static int lparcfg_open(struct inode *inode, struct file *file) return single_open(file, lparcfg_data, NULL); } -static const struct file_operations lparcfg_fops = { - .read = seq_read, - .write = lparcfg_write, - .open = lparcfg_open, - .release = single_release, - .llseek = seq_lseek, +static const struct proc_ops lparcfg_proc_ops = { + .proc_read = seq_read, + .proc_write = lparcfg_write, + .proc_open = lparcfg_open, + .proc_release = single_release, + .proc_lseek = seq_lseek, }; static int __init lparcfg_init(void) @@ -714,7 +714,7 @@ static int __init lparcfg_init(void) if (firmware_has_feature(FW_FEATURE_SPLPAR)) mode |= 0200; - if (!proc_create("powerpc/lparcfg", mode, NULL, &lparcfg_fops)) { + if (!proc_create("powerpc/lparcfg", mode, NULL, &lparcfg_proc_ops)) { printk(KERN_ERR "Failed to create powerpc/lparcfg\n"); return -EIO; } diff --git a/arch/powerpc/platforms/pseries/reconfig.c b/arch/powerpc/platforms/pseries/reconfig.c index 8a9c4fb95b8b..7f7369fec46b 100644 --- a/arch/powerpc/platforms/pseries/reconfig.c +++ b/arch/powerpc/platforms/pseries/reconfig.c @@ -391,9 +391,9 @@ out: return rv ? rv : count; } -static const struct file_operations ofdt_fops = { - .write = ofdt_write, - .llseek = noop_llseek, +static const struct proc_ops ofdt_proc_ops = { + .proc_write = ofdt_write, + .proc_lseek = noop_llseek, }; /* create /proc/powerpc/ofdt write-only by root */ @@ -401,7 +401,7 @@ static int proc_ppc64_create_ofdt(void) { struct proc_dir_entry *ent; - ent = proc_create("powerpc/ofdt", 0200, NULL, &ofdt_fops); + ent = proc_create("powerpc/ofdt", 0200, NULL, &ofdt_proc_ops); if (ent) proc_set_size(ent, 0); diff --git a/arch/powerpc/platforms/pseries/scanlog.c b/arch/powerpc/platforms/pseries/scanlog.c index a0001280503c..2879c4f0ceb7 100644 --- a/arch/powerpc/platforms/pseries/scanlog.c +++ b/arch/powerpc/platforms/pseries/scanlog.c @@ -152,13 +152,12 @@ static int scanlog_release(struct inode * inode, struct file * file) return 0; } -static const struct file_operations scanlog_fops = { - .owner = THIS_MODULE, - .read = scanlog_read, - .write = scanlog_write, - .open = scanlog_open, - .release = scanlog_release, - .llseek = noop_llseek, +static const struct proc_ops scanlog_proc_ops = { + .proc_read = scanlog_read, + .proc_write = scanlog_write, + .proc_open = scanlog_open, + .proc_release = scanlog_release, + .proc_lseek = noop_llseek, }; static int __init scanlog_init(void) @@ -176,7 +175,7 @@ static int __init scanlog_init(void) goto err; ent = proc_create("powerpc/rtas/scan-log-dump", 0400, NULL, - &scanlog_fops); + &scanlog_proc_ops); if (!ent) goto err; return 0; diff --git a/arch/riscv/include/asm/pgtable-64.h b/arch/riscv/include/asm/pgtable-64.h index 36e638d1dfe4..b15f70a1fdfa 100644 --- a/arch/riscv/include/asm/pgtable-64.h +++ b/arch/riscv/include/asm/pgtable-64.h @@ -43,6 +43,13 @@ static inline int pud_bad(pud_t pud) return !pud_present(pud); } +#define pud_leaf pud_leaf +static inline int pud_leaf(pud_t pud) +{ + return pud_present(pud) && + (pud_val(pud) & (_PAGE_READ | _PAGE_WRITE | _PAGE_EXEC)); +} + static inline void set_pud(pud_t *pudp, pud_t pud) { *pudp = pud; diff --git a/arch/riscv/include/asm/pgtable.h b/arch/riscv/include/asm/pgtable.h index f66b87314fa2..e43041519edd 100644 --- a/arch/riscv/include/asm/pgtable.h +++ b/arch/riscv/include/asm/pgtable.h @@ -130,6 +130,13 @@ static inline int pmd_bad(pmd_t pmd) return !pmd_present(pmd); } +#define pmd_leaf pmd_leaf +static inline int pmd_leaf(pmd_t pmd) +{ + return pmd_present(pmd) && + (pmd_val(pmd) & (_PAGE_READ | _PAGE_WRITE | _PAGE_EXEC)); +} + static inline void set_pmd(pmd_t *pmdp, pmd_t pmd) { *pmdp = pmd; diff --git a/arch/s390/Kconfig b/arch/s390/Kconfig index 287714d51b47..734996784d03 100644 --- a/arch/s390/Kconfig +++ b/arch/s390/Kconfig @@ -163,13 +163,13 @@ config S390 select HAVE_PERF_USER_STACK_DUMP select HAVE_MEMBLOCK_NODE_MAP select HAVE_MEMBLOCK_PHYS_MAP - select HAVE_MMU_GATHER_NO_GATHER + select MMU_GATHER_NO_GATHER select HAVE_MOD_ARCH_SPECIFIC select HAVE_NOP_MCOUNT select HAVE_OPROFILE select HAVE_PCI select HAVE_PERF_EVENTS - select HAVE_RCU_TABLE_FREE + select MMU_GATHER_RCU_TABLE_FREE select HAVE_REGS_AND_STACK_ACCESS_API select HAVE_RELIABLE_STACKTRACE select HAVE_RSEQ diff --git a/arch/s390/include/asm/pgtable.h b/arch/s390/include/asm/pgtable.h index 7b03037a8475..137a3920ca36 100644 --- a/arch/s390/include/asm/pgtable.h +++ b/arch/s390/include/asm/pgtable.h @@ -673,6 +673,7 @@ static inline int pud_none(pud_t pud) return pud_val(pud) == _REGION3_ENTRY_EMPTY; } +#define pud_leaf pud_large static inline int pud_large(pud_t pud) { if ((pud_val(pud) & _REGION_ENTRY_TYPE_MASK) != _REGION_ENTRY_TYPE_R3) @@ -690,6 +691,7 @@ static inline unsigned long pud_pfn(pud_t pud) return (pud_val(pud) & origin_mask) >> PAGE_SHIFT; } +#define pmd_leaf pmd_large static inline int pmd_large(pmd_t pmd) { return (pmd_val(pmd) & _SEGMENT_ENTRY_LARGE) != 0; diff --git a/arch/sh/mm/alignment.c b/arch/sh/mm/alignment.c index ec2b25302427..fb517b82a87b 100644 --- a/arch/sh/mm/alignment.c +++ b/arch/sh/mm/alignment.c @@ -152,13 +152,12 @@ static ssize_t alignment_proc_write(struct file *file, return count; } -static const struct file_operations alignment_proc_fops = { - .owner = THIS_MODULE, - .open = alignment_proc_open, - .read = seq_read, - .llseek = seq_lseek, - .release = single_release, - .write = alignment_proc_write, +static const struct proc_ops alignment_proc_ops = { + .proc_open = alignment_proc_open, + .proc_read = seq_read, + .proc_lseek = seq_lseek, + .proc_release = single_release, + .proc_write = alignment_proc_write, }; /* @@ -176,12 +175,12 @@ static int __init alignment_init(void) return -ENOMEM; res = proc_create_data("alignment", S_IWUSR | S_IRUGO, dir, - &alignment_proc_fops, &se_usermode); + &alignment_proc_ops, &se_usermode); if (!res) return -ENOMEM; res = proc_create_data("kernel_alignment", S_IWUSR | S_IRUGO, dir, - &alignment_proc_fops, &se_kernmode_warn); + &alignment_proc_ops, &se_kernmode_warn); if (!res) return -ENOMEM; diff --git a/arch/sparc/Kconfig b/arch/sparc/Kconfig index e8c3ea01c12f..c1dd6dd642f4 100644 --- a/arch/sparc/Kconfig +++ b/arch/sparc/Kconfig @@ -64,8 +64,7 @@ config SPARC64 select HAVE_FUNCTION_GRAPH_TRACER select HAVE_KRETPROBES select HAVE_KPROBES - select HAVE_RCU_TABLE_FREE if SMP - select HAVE_RCU_TABLE_NO_INVALIDATE if HAVE_RCU_TABLE_FREE + select MMU_GATHER_RCU_TABLE_FREE if SMP select HAVE_MEMBLOCK_NODE_MAP select HAVE_ARCH_TRANSPARENT_HUGEPAGE select HAVE_DYNAMIC_FTRACE diff --git a/arch/sparc/include/asm/pgtable_64.h b/arch/sparc/include/asm/pgtable_64.h index 34ff3b43afbb..65494c3a420e 100644 --- a/arch/sparc/include/asm/pgtable_64.h +++ b/arch/sparc/include/asm/pgtable_64.h @@ -683,6 +683,7 @@ static inline unsigned long pte_special(pte_t pte) return pte_val(pte) & _PAGE_SPECIAL; } +#define pmd_leaf pmd_large static inline unsigned long pmd_large(pmd_t pmd) { pte_t pte = __pte(pmd_val(pmd)); @@ -867,6 +868,7 @@ static inline unsigned long pud_page_vaddr(pud_t pud) /* only used by the stubbed out hugetlb gup code, should never be called */ #define p4d_page(p4d) NULL +#define pud_leaf pud_large static inline unsigned long pud_large(pud_t pud) { pte_t pte = __pte(pud_val(pud)); diff --git a/arch/sparc/include/asm/tlb_64.h b/arch/sparc/include/asm/tlb_64.h index a2f3fa61ee36..6820d357581c 100644 --- a/arch/sparc/include/asm/tlb_64.h +++ b/arch/sparc/include/asm/tlb_64.h @@ -28,6 +28,15 @@ void flush_tlb_pending(void); #define __tlb_remove_tlb_entry(tlb, ptep, address) do { } while (0) #define tlb_flush(tlb) flush_tlb_pending() +/* + * SPARC64's hardware TLB fill does not use the Linux page-tables + * and therefore we don't need a TLBI when freeing page-table pages. + */ + +#ifdef CONFIG_MMU_GATHER_RCU_TABLE_FREE +#define tlb_needs_table_invalidate() (false) +#endif + #include <asm-generic/tlb.h> #endif /* _SPARC64_TLB_H */ diff --git a/arch/sparc/kernel/led.c b/arch/sparc/kernel/led.c index a6292f8ed180..bd48575172c3 100644 --- a/arch/sparc/kernel/led.c +++ b/arch/sparc/kernel/led.c @@ -104,13 +104,12 @@ static ssize_t led_proc_write(struct file *file, const char __user *buffer, return count; } -static const struct file_operations led_proc_fops = { - .owner = THIS_MODULE, - .open = led_proc_open, - .read = seq_read, - .llseek = seq_lseek, - .release = single_release, - .write = led_proc_write, +static const struct proc_ops led_proc_ops = { + .proc_open = led_proc_open, + .proc_read = seq_read, + .proc_lseek = seq_lseek, + .proc_release = single_release, + .proc_write = led_proc_write, }; static struct proc_dir_entry *led; @@ -121,7 +120,7 @@ static int __init led_init(void) { timer_setup(&led_blink_timer, led_blink, 0); - led = proc_create("led", 0, NULL, &led_proc_fops); + led = proc_create("led", 0, NULL, &led_proc_ops); if (!led) return -ENOMEM; diff --git a/arch/um/drivers/mconsole_kern.c b/arch/um/drivers/mconsole_kern.c index 0117489e9b30..b80a1d616e4e 100644 --- a/arch/um/drivers/mconsole_kern.c +++ b/arch/um/drivers/mconsole_kern.c @@ -752,10 +752,9 @@ static ssize_t mconsole_proc_write(struct file *file, return count; } -static const struct file_operations mconsole_proc_fops = { - .owner = THIS_MODULE, - .write = mconsole_proc_write, - .llseek = noop_llseek, +static const struct proc_ops mconsole_proc_ops = { + .proc_write = mconsole_proc_write, + .proc_lseek = noop_llseek, }; static int create_proc_mconsole(void) @@ -765,7 +764,7 @@ static int create_proc_mconsole(void) if (notify_socket == NULL) return 0; - ent = proc_create("mconsole", 0200, NULL, &mconsole_proc_fops); + ent = proc_create("mconsole", 0200, NULL, &mconsole_proc_ops); if (ent == NULL) { printk(KERN_INFO "create_proc_mconsole : proc_create failed\n"); return 0; diff --git a/arch/um/kernel/exitcode.c b/arch/um/kernel/exitcode.c index 369fd844e195..43edc2aa57e4 100644 --- a/arch/um/kernel/exitcode.c +++ b/arch/um/kernel/exitcode.c @@ -55,20 +55,19 @@ static ssize_t exitcode_proc_write(struct file *file, return count; } -static const struct file_operations exitcode_proc_fops = { - .owner = THIS_MODULE, - .open = exitcode_proc_open, - .read = seq_read, - .llseek = seq_lseek, - .release = single_release, - .write = exitcode_proc_write, +static const struct proc_ops exitcode_proc_ops = { + .proc_open = exitcode_proc_open, + .proc_read = seq_read, + .proc_lseek = seq_lseek, + .proc_release = single_release, + .proc_write = exitcode_proc_write, }; static int make_proc_exitcode(void) { struct proc_dir_entry *ent; - ent = proc_create("exitcode", 0600, NULL, &exitcode_proc_fops); + ent = proc_create("exitcode", 0600, NULL, &exitcode_proc_ops); if (ent == NULL) { printk(KERN_WARNING "make_proc_exitcode : Failed to register " "/proc/exitcode\n"); diff --git a/arch/um/kernel/process.c b/arch/um/kernel/process.c index 17045e7211bf..56a094182bf5 100644 --- a/arch/um/kernel/process.c +++ b/arch/um/kernel/process.c @@ -348,13 +348,12 @@ static ssize_t sysemu_proc_write(struct file *file, const char __user *buf, return count; } -static const struct file_operations sysemu_proc_fops = { - .owner = THIS_MODULE, - .open = sysemu_proc_open, - .read = seq_read, - .llseek = seq_lseek, - .release = single_release, - .write = sysemu_proc_write, +static const struct proc_ops sysemu_proc_ops = { + .proc_open = sysemu_proc_open, + .proc_read = seq_read, + .proc_lseek = seq_lseek, + .proc_release = single_release, + .proc_write = sysemu_proc_write, }; int __init make_proc_sysemu(void) @@ -363,7 +362,7 @@ int __init make_proc_sysemu(void) if (!sysemu_supported) return 0; - ent = proc_create("sysemu", 0600, NULL, &sysemu_proc_fops); + ent = proc_create("sysemu", 0600, NULL, &sysemu_proc_ops); if (ent == NULL) { diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig index 44d279698c0f..beea77046f9b 100644 --- a/arch/x86/Kconfig +++ b/arch/x86/Kconfig @@ -120,6 +120,7 @@ config X86 select GENERIC_IRQ_RESERVATION_MODE select GENERIC_IRQ_SHOW select GENERIC_PENDING_IRQ if SMP + select GENERIC_PTDUMP select GENERIC_SMP_IDLE_THREAD select GENERIC_STRNCPY_FROM_USER select GENERIC_STRNLEN_USER @@ -202,7 +203,7 @@ config X86 select HAVE_PCI select HAVE_PERF_REGS select HAVE_PERF_USER_STACK_DUMP - select HAVE_RCU_TABLE_FREE if PARAVIRT + select MMU_GATHER_RCU_TABLE_FREE if PARAVIRT select HAVE_REGS_AND_STACK_ACCESS_API select HAVE_RELIABLE_STACKTRACE if X86_64 && (UNWINDER_FRAME_POINTER || UNWINDER_ORC) && STACK_VALIDATION select HAVE_FUNCTION_ARG_ACCESS_API diff --git a/arch/x86/Kconfig.debug b/arch/x86/Kconfig.debug index c4eab8ed33a3..2e74690b028a 100644 --- a/arch/x86/Kconfig.debug +++ b/arch/x86/Kconfig.debug @@ -62,26 +62,10 @@ config EARLY_PRINTK_USB_XDBC config MCSAFE_TEST def_bool n -config X86_PTDUMP_CORE - def_bool n - -config X86_PTDUMP - tristate "Export kernel pagetable layout to userspace via debugfs" - depends on DEBUG_KERNEL - select DEBUG_FS - select X86_PTDUMP_CORE - ---help--- - Say Y here if you want to show the kernel pagetable layout in a - debugfs file. This information is only useful for kernel developers - who are working in architecture specific areas of the kernel. - It is probably not a good idea to enable this feature in a production - kernel. - If in doubt, say "N" - config EFI_PGT_DUMP bool "Dump the EFI pagetable" depends on EFI - select X86_PTDUMP_CORE + select PTDUMP_CORE ---help--- Enable this if you want to dump the EFI page table before enabling virtual mode. This can be used to debug miscellaneous @@ -90,7 +74,7 @@ config EFI_PGT_DUMP config DEBUG_WX bool "Warn on W+X mappings at boot" - select X86_PTDUMP_CORE + select PTDUMP_CORE ---help--- Generate a warning if any W+X mappings are found at boot. diff --git a/arch/x86/include/asm/pgtable.h b/arch/x86/include/asm/pgtable.h index ad97dc155195..7e118660bbd9 100644 --- a/arch/x86/include/asm/pgtable.h +++ b/arch/x86/include/asm/pgtable.h @@ -29,8 +29,9 @@ extern pgd_t early_top_pgt[PTRS_PER_PGD]; int __init __early_make_pgtable(unsigned long address, pmdval_t pmd); -void ptdump_walk_pgd_level(struct seq_file *m, pgd_t *pgd); -void ptdump_walk_pgd_level_debugfs(struct seq_file *m, pgd_t *pgd, bool user); +void ptdump_walk_pgd_level(struct seq_file *m, struct mm_struct *mm); +void ptdump_walk_pgd_level_debugfs(struct seq_file *m, struct mm_struct *mm, + bool user); void ptdump_walk_pgd_level_checkwx(void); void ptdump_walk_user_pgd_level_checkwx(void); @@ -239,6 +240,7 @@ static inline unsigned long pgd_pfn(pgd_t pgd) return (pgd_val(pgd) & PTE_PFN_MASK) >> PAGE_SHIFT; } +#define p4d_leaf p4d_large static inline int p4d_large(p4d_t p4d) { /* No 512 GiB pages yet */ @@ -247,6 +249,7 @@ static inline int p4d_large(p4d_t p4d) #define pte_page(pte) pfn_to_page(pte_pfn(pte)) +#define pmd_leaf pmd_large static inline int pmd_large(pmd_t pte) { return pmd_flags(pte) & _PAGE_PSE; @@ -874,6 +877,7 @@ static inline pmd_t *pmd_offset(pud_t *pud, unsigned long address) return (pmd_t *)pud_page_vaddr(*pud) + pmd_index(address); } +#define pud_leaf pud_large static inline int pud_large(pud_t pud) { return (pud_val(pud) & (_PAGE_PSE | _PAGE_PRESENT)) == @@ -885,6 +889,7 @@ static inline int pud_bad(pud_t pud) return (pud_flags(pud) & ~(_KERNPG_TABLE | _PAGE_USER)) != 0; } #else +#define pud_leaf pud_large static inline int pud_large(pud_t pud) { return 0; @@ -1233,6 +1238,7 @@ static inline bool pgdp_maps_userspace(void *__ptr) return (((ptr & ~PAGE_MASK) / sizeof(pgd_t)) < PGD_KERNEL_START); } +#define pgd_leaf pgd_large static inline int pgd_large(pgd_t pgd) { return 0; } #ifdef CONFIG_PAGE_TABLE_ISOLATION diff --git a/arch/x86/include/asm/tlb.h b/arch/x86/include/asm/tlb.h index f23e7aaff4cd..820082bd6880 100644 --- a/arch/x86/include/asm/tlb.h +++ b/arch/x86/include/asm/tlb.h @@ -29,8 +29,8 @@ static inline void tlb_flush(struct mmu_gather *tlb) * shootdown, enablement code for several hypervisors overrides * .flush_tlb_others hook in pv_mmu_ops and implements it by issuing * a hypercall. To keep software pagetable walkers safe in this case we - * switch to RCU based table free (HAVE_RCU_TABLE_FREE). See the comment - * below 'ifdef CONFIG_HAVE_RCU_TABLE_FREE' in include/asm-generic/tlb.h + * switch to RCU based table free (MMU_GATHER_RCU_TABLE_FREE). See the comment + * below 'ifdef CONFIG_MMU_GATHER_RCU_TABLE_FREE' in include/asm-generic/tlb.h * for more details. */ static inline void __tlb_remove_table(void *table) diff --git a/arch/x86/kernel/cpu/mtrr/if.c b/arch/x86/kernel/cpu/mtrr/if.c index da532f656a7b..a5c506f6da7f 100644 --- a/arch/x86/kernel/cpu/mtrr/if.c +++ b/arch/x86/kernel/cpu/mtrr/if.c @@ -396,15 +396,16 @@ static int mtrr_open(struct inode *inode, struct file *file) return single_open(file, mtrr_seq_show, NULL); } -static const struct file_operations mtrr_fops = { - .owner = THIS_MODULE, - .open = mtrr_open, - .read = seq_read, - .llseek = seq_lseek, - .write = mtrr_write, - .unlocked_ioctl = mtrr_ioctl, - .compat_ioctl = mtrr_ioctl, - .release = mtrr_close, +static const struct proc_ops mtrr_proc_ops = { + .proc_open = mtrr_open, + .proc_read = seq_read, + .proc_lseek = seq_lseek, + .proc_write = mtrr_write, + .proc_ioctl = mtrr_ioctl, +#ifdef CONFIG_COMPAT + .proc_compat_ioctl = mtrr_ioctl, +#endif + .proc_release = mtrr_close, }; static int __init mtrr_if_init(void) @@ -417,7 +418,7 @@ static int __init mtrr_if_init(void) (!cpu_has(c, X86_FEATURE_CENTAUR_MCR))) return -ENODEV; - proc_create("mtrr", S_IWUSR | S_IRUGO, NULL, &mtrr_fops); + proc_create("mtrr", S_IWUSR | S_IRUGO, NULL, &mtrr_proc_ops); return 0; } arch_initcall(mtrr_if_init); diff --git a/arch/x86/mm/Makefile b/arch/x86/mm/Makefile index 98aecb14fbcc..98f7c6fa2eaa 100644 --- a/arch/x86/mm/Makefile +++ b/arch/x86/mm/Makefile @@ -28,8 +28,8 @@ CFLAGS_fault.o := -I $(srctree)/$(src)/../include/asm/trace obj-$(CONFIG_X86_32) += pgtable_32.o iomap_32.o obj-$(CONFIG_HUGETLB_PAGE) += hugetlbpage.o -obj-$(CONFIG_X86_PTDUMP_CORE) += dump_pagetables.o -obj-$(CONFIG_X86_PTDUMP) += debug_pagetables.o +obj-$(CONFIG_PTDUMP_CORE) += dump_pagetables.o +obj-$(CONFIG_PTDUMP_DEBUGFS) += debug_pagetables.o obj-$(CONFIG_HIGHMEM) += highmem_32.o diff --git a/arch/x86/mm/debug_pagetables.c b/arch/x86/mm/debug_pagetables.c index 39001a401eff..4a3b62f780b4 100644 --- a/arch/x86/mm/debug_pagetables.c +++ b/arch/x86/mm/debug_pagetables.c @@ -7,7 +7,7 @@ static int ptdump_show(struct seq_file *m, void *v) { - ptdump_walk_pgd_level_debugfs(m, NULL, false); + ptdump_walk_pgd_level_debugfs(m, &init_mm, false); return 0; } @@ -15,11 +15,8 @@ DEFINE_SHOW_ATTRIBUTE(ptdump); static int ptdump_curknl_show(struct seq_file *m, void *v) { - if (current->mm->pgd) { - down_read(¤t->mm->mmap_sem); - ptdump_walk_pgd_level_debugfs(m, current->mm->pgd, false); - up_read(¤t->mm->mmap_sem); - } + if (current->mm->pgd) + ptdump_walk_pgd_level_debugfs(m, current->mm, false); return 0; } @@ -28,11 +25,8 @@ DEFINE_SHOW_ATTRIBUTE(ptdump_curknl); #ifdef CONFIG_PAGE_TABLE_ISOLATION static int ptdump_curusr_show(struct seq_file *m, void *v) { - if (current->mm->pgd) { - down_read(¤t->mm->mmap_sem); - ptdump_walk_pgd_level_debugfs(m, current->mm->pgd, true); - up_read(¤t->mm->mmap_sem); - } + if (current->mm->pgd) + ptdump_walk_pgd_level_debugfs(m, current->mm, true); return 0; } @@ -43,7 +37,7 @@ DEFINE_SHOW_ATTRIBUTE(ptdump_curusr); static int ptdump_efi_show(struct seq_file *m, void *v) { if (efi_mm.pgd) - ptdump_walk_pgd_level_debugfs(m, efi_mm.pgd, false); + ptdump_walk_pgd_level_debugfs(m, &efi_mm, false); return 0; } diff --git a/arch/x86/mm/dump_pagetables.c b/arch/x86/mm/dump_pagetables.c index ab67822fd2f4..64229dad7eab 100644 --- a/arch/x86/mm/dump_pagetables.c +++ b/arch/x86/mm/dump_pagetables.c @@ -16,6 +16,7 @@ #include <linux/seq_file.h> #include <linux/highmem.h> #include <linux/pci.h> +#include <linux/ptdump.h> #include <asm/e820/types.h> #include <asm/pgtable.h> @@ -26,16 +27,18 @@ * when a "break" in the continuity is found. */ struct pg_state { + struct ptdump_state ptdump; int level; - pgprot_t current_prot; + pgprotval_t current_prot; pgprotval_t effective_prot; + pgprotval_t prot_levels[5]; unsigned long start_address; - unsigned long current_address; const struct addr_marker *marker; unsigned long lines; bool to_dmesg; bool check_wx; unsigned long wx_pages; + struct seq_file *seq; }; struct addr_marker { @@ -174,11 +177,10 @@ static struct addr_marker address_markers[] = { /* * Print a readable form of a pgprot_t to the seq_file */ -static void printk_prot(struct seq_file *m, pgprot_t prot, int level, bool dmsg) +static void printk_prot(struct seq_file *m, pgprotval_t pr, int level, bool dmsg) { - pgprotval_t pr = pgprot_val(prot); static const char * const level_name[] = - { "cr3", "pgd", "p4d", "pud", "pmd", "pte" }; + { "pgd", "p4d", "pud", "pmd", "pte" }; if (!(pr & _PAGE_PRESENT)) { /* Not present */ @@ -202,12 +204,12 @@ static void printk_prot(struct seq_file *m, pgprot_t prot, int level, bool dmsg) pt_dump_cont_printf(m, dmsg, " "); /* Bit 7 has a different meaning on level 3 vs 4 */ - if (level <= 4 && pr & _PAGE_PSE) + if (level <= 3 && pr & _PAGE_PSE) pt_dump_cont_printf(m, dmsg, "PSE "); else pt_dump_cont_printf(m, dmsg, " "); - if ((level == 5 && pr & _PAGE_PAT) || - ((level == 4 || level == 3) && pr & _PAGE_PAT_LARGE)) + if ((level == 4 && pr & _PAGE_PAT) || + ((level == 3 || level == 2) && pr & _PAGE_PAT_LARGE)) pt_dump_cont_printf(m, dmsg, "PAT "); else pt_dump_cont_printf(m, dmsg, " "); @@ -223,24 +225,11 @@ static void printk_prot(struct seq_file *m, pgprot_t prot, int level, bool dmsg) pt_dump_cont_printf(m, dmsg, "%s\n", level_name[level]); } -/* - * On 64 bits, sign-extend the 48 bit address to 64 bit - */ -static unsigned long normalize_addr(unsigned long u) -{ - int shift; - if (!IS_ENABLED(CONFIG_X86_64)) - return u; - - shift = 64 - (__VIRTUAL_MASK_SHIFT + 1); - return (signed long)(u << shift) >> shift; -} - -static void note_wx(struct pg_state *st) +static void note_wx(struct pg_state *st, unsigned long addr) { unsigned long npages; - npages = (st->current_address - st->start_address) / PAGE_SIZE; + npages = (addr - st->start_address) / PAGE_SIZE; #ifdef CONFIG_PCI_BIOS /* @@ -248,7 +237,7 @@ static void note_wx(struct pg_state *st) * Inform about it, but avoid the warning. */ if (pcibios_enabled && st->start_address >= PAGE_OFFSET + BIOS_BEGIN && - st->current_address <= PAGE_OFFSET + BIOS_END) { + addr <= PAGE_OFFSET + BIOS_END) { pr_warn_once("x86/mm: PCI BIOS W+X mapping %lu pages\n", npages); return; } @@ -260,27 +249,47 @@ static void note_wx(struct pg_state *st) (void *)st->start_address); } +static inline pgprotval_t effective_prot(pgprotval_t prot1, pgprotval_t prot2) +{ + return (prot1 & prot2 & (_PAGE_USER | _PAGE_RW)) | + ((prot1 | prot2) & _PAGE_NX); +} + /* * This function gets called on a break in a continuous series * of PTE entries; the next one is different so we need to * print what we collected so far. */ -static void note_page(struct seq_file *m, struct pg_state *st, - pgprot_t new_prot, pgprotval_t new_eff, int level) +static void note_page(struct ptdump_state *pt_st, unsigned long addr, int level, + unsigned long val) { - pgprotval_t prot, cur, eff; + struct pg_state *st = container_of(pt_st, struct pg_state, ptdump); + pgprotval_t new_prot, new_eff; + pgprotval_t cur, eff; static const char units[] = "BKMGTPE"; + struct seq_file *m = st->seq; + + new_prot = val & PTE_FLAGS_MASK; + + if (level > 0) { + new_eff = effective_prot(st->prot_levels[level - 1], + new_prot); + } else { + new_eff = new_prot; + } + + if (level >= 0) + st->prot_levels[level] = new_eff; /* * If we have a "break" in the series, we need to flush the state that * we have now. "break" is either changing perms, levels or * address space marker. */ - prot = pgprot_val(new_prot); - cur = pgprot_val(st->current_prot); + cur = st->current_prot; eff = st->effective_prot; - if (!st->level) { + if (st->level == -1) { /* First entry */ st->current_prot = new_prot; st->effective_prot = new_eff; @@ -289,14 +298,14 @@ static void note_page(struct seq_file *m, struct pg_state *st, st->lines = 0; pt_dump_seq_printf(m, st->to_dmesg, "---[ %s ]---\n", st->marker->name); - } else if (prot != cur || new_eff != eff || level != st->level || - st->current_address >= st->marker[1].start_address) { + } else if (new_prot != cur || new_eff != eff || level != st->level || + addr >= st->marker[1].start_address) { const char *unit = units; unsigned long delta; int width = sizeof(unsigned long) * 2; if (st->check_wx && (eff & _PAGE_RW) && !(eff & _PAGE_NX)) - note_wx(st); + note_wx(st, addr); /* * Now print the actual finished series @@ -306,9 +315,9 @@ static void note_page(struct seq_file *m, struct pg_state *st, pt_dump_seq_printf(m, st->to_dmesg, "0x%0*lx-0x%0*lx ", width, st->start_address, - width, st->current_address); + width, addr); - delta = st->current_address - st->start_address; + delta = addr - st->start_address; while (!(delta & 1023) && unit[1]) { delta >>= 10; unit++; @@ -325,7 +334,7 @@ static void note_page(struct seq_file *m, struct pg_state *st, * such as the start of vmalloc space etc. * This helps in the interpretation. */ - if (st->current_address >= st->marker[1].start_address) { + if (addr >= st->marker[1].start_address) { if (st->marker->max_lines && st->lines > st->marker->max_lines) { unsigned long nskip = @@ -341,222 +350,45 @@ static void note_page(struct seq_file *m, struct pg_state *st, st->marker->name); } - st->start_address = st->current_address; + st->start_address = addr; st->current_prot = new_prot; st->effective_prot = new_eff; st->level = level; } } -static inline pgprotval_t effective_prot(pgprotval_t prot1, pgprotval_t prot2) -{ - return (prot1 & prot2 & (_PAGE_USER | _PAGE_RW)) | - ((prot1 | prot2) & _PAGE_NX); -} - -static void walk_pte_level(struct seq_file *m, struct pg_state *st, pmd_t addr, - pgprotval_t eff_in, unsigned long P) -{ - int i; - pte_t *pte; - pgprotval_t prot, eff; - - for (i = 0; i < PTRS_PER_PTE; i++) { - st->current_address = normalize_addr(P + i * PTE_LEVEL_MULT); - pte = pte_offset_map(&addr, st->current_address); - prot = pte_flags(*pte); - eff = effective_prot(eff_in, prot); - note_page(m, st, __pgprot(prot), eff, 5); - pte_unmap(pte); - } -} -#ifdef CONFIG_KASAN - -/* - * This is an optimization for KASAN=y case. Since all kasan page tables - * eventually point to the kasan_early_shadow_page we could call note_page() - * right away without walking through lower level page tables. This saves - * us dozens of seconds (minutes for 5-level config) while checking for - * W+X mapping or reading kernel_page_tables debugfs file. - */ -static inline bool kasan_page_table(struct seq_file *m, struct pg_state *st, - void *pt) -{ - if (__pa(pt) == __pa(kasan_early_shadow_pmd) || - (pgtable_l5_enabled() && - __pa(pt) == __pa(kasan_early_shadow_p4d)) || - __pa(pt) == __pa(kasan_early_shadow_pud)) { - pgprotval_t prot = pte_flags(kasan_early_shadow_pte[0]); - note_page(m, st, __pgprot(prot), 0, 5); - return true; - } - return false; -} -#else -static inline bool kasan_page_table(struct seq_file *m, struct pg_state *st, - void *pt) -{ - return false; -} -#endif - -#if PTRS_PER_PMD > 1 - -static void walk_pmd_level(struct seq_file *m, struct pg_state *st, pud_t addr, - pgprotval_t eff_in, unsigned long P) -{ - int i; - pmd_t *start, *pmd_start; - pgprotval_t prot, eff; - - pmd_start = start = (pmd_t *)pud_page_vaddr(addr); - for (i = 0; i < PTRS_PER_PMD; i++) { - st->current_address = normalize_addr(P + i * PMD_LEVEL_MULT); - if (!pmd_none(*start)) { - prot = pmd_flags(*start); - eff = effective_prot(eff_in, prot); - if (pmd_large(*start) || !pmd_present(*start)) { - note_page(m, st, __pgprot(prot), eff, 4); - } else if (!kasan_page_table(m, st, pmd_start)) { - walk_pte_level(m, st, *start, eff, - P + i * PMD_LEVEL_MULT); - } - } else - note_page(m, st, __pgprot(0), 0, 4); - start++; - } -} - -#else -#define walk_pmd_level(m,s,a,e,p) walk_pte_level(m,s,__pmd(pud_val(a)),e,p) -#define pud_large(a) pmd_large(__pmd(pud_val(a))) -#define pud_none(a) pmd_none(__pmd(pud_val(a))) -#endif - -#if PTRS_PER_PUD > 1 - -static void walk_pud_level(struct seq_file *m, struct pg_state *st, p4d_t addr, - pgprotval_t eff_in, unsigned long P) -{ - int i; - pud_t *start, *pud_start; - pgprotval_t prot, eff; - - pud_start = start = (pud_t *)p4d_page_vaddr(addr); - - for (i = 0; i < PTRS_PER_PUD; i++) { - st->current_address = normalize_addr(P + i * PUD_LEVEL_MULT); - if (!pud_none(*start)) { - prot = pud_flags(*start); - eff = effective_prot(eff_in, prot); - if (pud_large(*start) || !pud_present(*start)) { - note_page(m, st, __pgprot(prot), eff, 3); - } else if (!kasan_page_table(m, st, pud_start)) { - walk_pmd_level(m, st, *start, eff, - P + i * PUD_LEVEL_MULT); - } - } else - note_page(m, st, __pgprot(0), 0, 3); - - start++; - } -} - -#else -#define walk_pud_level(m,s,a,e,p) walk_pmd_level(m,s,__pud(p4d_val(a)),e,p) -#define p4d_large(a) pud_large(__pud(p4d_val(a))) -#define p4d_none(a) pud_none(__pud(p4d_val(a))) -#endif - -static void walk_p4d_level(struct seq_file *m, struct pg_state *st, pgd_t addr, - pgprotval_t eff_in, unsigned long P) -{ - int i; - p4d_t *start, *p4d_start; - pgprotval_t prot, eff; - - if (PTRS_PER_P4D == 1) - return walk_pud_level(m, st, __p4d(pgd_val(addr)), eff_in, P); - - p4d_start = start = (p4d_t *)pgd_page_vaddr(addr); - - for (i = 0; i < PTRS_PER_P4D; i++) { - st->current_address = normalize_addr(P + i * P4D_LEVEL_MULT); - if (!p4d_none(*start)) { - prot = p4d_flags(*start); - eff = effective_prot(eff_in, prot); - if (p4d_large(*start) || !p4d_present(*start)) { - note_page(m, st, __pgprot(prot), eff, 2); - } else if (!kasan_page_table(m, st, p4d_start)) { - walk_pud_level(m, st, *start, eff, - P + i * P4D_LEVEL_MULT); - } - } else - note_page(m, st, __pgprot(0), 0, 2); - - start++; - } -} - -#define pgd_large(a) (pgtable_l5_enabled() ? pgd_large(a) : p4d_large(__p4d(pgd_val(a)))) -#define pgd_none(a) (pgtable_l5_enabled() ? pgd_none(a) : p4d_none(__p4d(pgd_val(a)))) - -static inline bool is_hypervisor_range(int idx) -{ -#ifdef CONFIG_X86_64 - /* - * A hole in the beginning of kernel address space reserved - * for a hypervisor. - */ - return (idx >= pgd_index(GUARD_HOLE_BASE_ADDR)) && - (idx < pgd_index(GUARD_HOLE_END_ADDR)); -#else - return false; -#endif -} - -static void ptdump_walk_pgd_level_core(struct seq_file *m, pgd_t *pgd, +static void ptdump_walk_pgd_level_core(struct seq_file *m, + struct mm_struct *mm, pgd_t *pgd, bool checkwx, bool dmesg) { - pgd_t *start = INIT_PGD; - pgprotval_t prot, eff; - int i; - struct pg_state st = {}; - - if (pgd) { - start = pgd; - st.to_dmesg = dmesg; - } + const struct ptdump_range ptdump_ranges[] = { +#ifdef CONFIG_X86_64 - st.check_wx = checkwx; - if (checkwx) - st.wx_pages = 0; +#define normalize_addr_shift (64 - (__VIRTUAL_MASK_SHIFT + 1)) +#define normalize_addr(u) ((signed long)((u) << normalize_addr_shift) >> \ + normalize_addr_shift) - for (i = 0; i < PTRS_PER_PGD; i++) { - st.current_address = normalize_addr(i * PGD_LEVEL_MULT); - if (!pgd_none(*start) && !is_hypervisor_range(i)) { - prot = pgd_flags(*start); -#ifdef CONFIG_X86_PAE - eff = _PAGE_USER | _PAGE_RW; + {0, PTRS_PER_PGD * PGD_LEVEL_MULT / 2}, + {normalize_addr(PTRS_PER_PGD * PGD_LEVEL_MULT / 2), ~0UL}, #else - eff = prot; + {0, ~0UL}, #endif - if (pgd_large(*start) || !pgd_present(*start)) { - note_page(m, &st, __pgprot(prot), eff, 1); - } else { - walk_p4d_level(m, &st, *start, eff, - i * PGD_LEVEL_MULT); - } - } else - note_page(m, &st, __pgprot(0), 0, 1); + {0, 0} +}; - cond_resched(); - start++; - } + struct pg_state st = { + .ptdump = { + .note_page = note_page, + .range = ptdump_ranges + }, + .level = -1, + .to_dmesg = dmesg, + .check_wx = checkwx, + .seq = m + }; + + ptdump_walk_pgd(&st.ptdump, mm, pgd); - /* Flush out the last page */ - st.current_address = normalize_addr(PTRS_PER_PGD*PGD_LEVEL_MULT); - note_page(m, &st, __pgprot(0), 0, 0); if (!checkwx) return; if (st.wx_pages) @@ -566,18 +398,20 @@ static void ptdump_walk_pgd_level_core(struct seq_file *m, pgd_t *pgd, pr_info("x86/mm: Checked W+X mappings: passed, no W+X pages found.\n"); } -void ptdump_walk_pgd_level(struct seq_file *m, pgd_t *pgd) +void ptdump_walk_pgd_level(struct seq_file *m, struct mm_struct *mm) { - ptdump_walk_pgd_level_core(m, pgd, false, true); + ptdump_walk_pgd_level_core(m, mm, mm->pgd, false, true); } -void ptdump_walk_pgd_level_debugfs(struct seq_file *m, pgd_t *pgd, bool user) +void ptdump_walk_pgd_level_debugfs(struct seq_file *m, struct mm_struct *mm, + bool user) { + pgd_t *pgd = mm->pgd; #ifdef CONFIG_PAGE_TABLE_ISOLATION if (user && boot_cpu_has(X86_FEATURE_PTI)) pgd = kernel_to_user_pgdp(pgd); #endif - ptdump_walk_pgd_level_core(m, pgd, false, false); + ptdump_walk_pgd_level_core(m, mm, pgd, false, false); } EXPORT_SYMBOL_GPL(ptdump_walk_pgd_level_debugfs); @@ -592,13 +426,13 @@ void ptdump_walk_user_pgd_level_checkwx(void) pr_info("x86/mm: Checking user space page tables\n"); pgd = kernel_to_user_pgdp(pgd); - ptdump_walk_pgd_level_core(NULL, pgd, true, false); + ptdump_walk_pgd_level_core(NULL, &init_mm, pgd, true, false); #endif } void ptdump_walk_pgd_level_checkwx(void) { - ptdump_walk_pgd_level_core(NULL, NULL, true, false); + ptdump_walk_pgd_level_core(NULL, &init_mm, INIT_PGD, true, false); } static int __init pt_dump_init(void) diff --git a/arch/x86/platform/efi/efi_32.c b/arch/x86/platform/efi/efi_32.c index 71dddd1620f9..081d466002c9 100644 --- a/arch/x86/platform/efi/efi_32.c +++ b/arch/x86/platform/efi/efi_32.c @@ -49,7 +49,7 @@ void efi_sync_low_kernel_mappings(void) {} void __init efi_dump_pagetable(void) { #ifdef CONFIG_EFI_PGT_DUMP - ptdump_walk_pgd_level(NULL, swapper_pg_dir); + ptdump_walk_pgd_level(NULL, &init_mm); #endif } diff --git a/arch/x86/platform/efi/efi_64.c b/arch/x86/platform/efi/efi_64.c index e2accfe636bd..fa8506e76bbe 100644 --- a/arch/x86/platform/efi/efi_64.c +++ b/arch/x86/platform/efi/efi_64.c @@ -471,9 +471,9 @@ void __init efi_dump_pagetable(void) { #ifdef CONFIG_EFI_PGT_DUMP if (efi_have_uv1_memmap()) - ptdump_walk_pgd_level(NULL, swapper_pg_dir); + ptdump_walk_pgd_level(NULL, &init_mm); else - ptdump_walk_pgd_level(NULL, efi_mm.pgd); + ptdump_walk_pgd_level(NULL, &efi_mm); #endif } diff --git a/arch/x86/platform/uv/tlb_uv.c b/arch/x86/platform/uv/tlb_uv.c index 5f0a96bf27a1..1fd321f37f1b 100644 --- a/arch/x86/platform/uv/tlb_uv.c +++ b/arch/x86/platform/uv/tlb_uv.c @@ -1668,12 +1668,12 @@ static int tunables_open(struct inode *inode, struct file *file) return 0; } -static const struct file_operations proc_uv_ptc_operations = { - .open = ptc_proc_open, - .read = seq_read, - .write = ptc_proc_write, - .llseek = seq_lseek, - .release = seq_release, +static const struct proc_ops uv_ptc_proc_ops = { + .proc_open = ptc_proc_open, + .proc_read = seq_read, + .proc_write = ptc_proc_write, + .proc_lseek = seq_lseek, + .proc_release = seq_release, }; static const struct file_operations tunables_fops = { @@ -1691,7 +1691,7 @@ static int __init uv_ptc_init(void) return 0; proc_uv_ptc = proc_create(UV_PTC_BASENAME, 0444, NULL, - &proc_uv_ptc_operations); + &uv_ptc_proc_ops); if (!proc_uv_ptc) { pr_err("unable to create %s proc entry\n", UV_PTC_BASENAME); diff --git a/arch/xtensa/platforms/iss/simdisk.c b/arch/xtensa/platforms/iss/simdisk.c index f9cd45860bee..833109880165 100644 --- a/arch/xtensa/platforms/iss/simdisk.c +++ b/arch/xtensa/platforms/iss/simdisk.c @@ -251,10 +251,10 @@ out_free: return err; } -static const struct file_operations fops = { - .read = proc_read_simdisk, - .write = proc_write_simdisk, - .llseek = default_llseek, +static const struct proc_ops simdisk_proc_ops = { + .proc_read = proc_read_simdisk, + .proc_write = proc_write_simdisk, + .proc_lseek = default_llseek, }; static int __init simdisk_setup(struct simdisk *dev, int which, @@ -290,7 +290,7 @@ static int __init simdisk_setup(struct simdisk *dev, int which, set_capacity(dev->gd, 0); add_disk(dev->gd); - dev->procfile = proc_create_data(tmp, 0644, procdir, &fops, dev); + dev->procfile = proc_create_data(tmp, 0644, procdir, &simdisk_proc_ops, dev); return 0; out_alloc_disk: |