diff options
-rw-r--r-- | arch/s390/boot/Makefile | 2 | ||||
-rw-r--r-- | arch/s390/boot/boot.h | 39 | ||||
-rw-r--r-- | arch/s390/boot/ipl_report.c | 100 | ||||
-rw-r--r-- | arch/s390/boot/kaslr.c | 115 | ||||
-rw-r--r-- | arch/s390/boot/mem_detect.c | 191 | ||||
-rw-r--r-- | arch/s390/boot/pgm_check_info.c | 5 | ||||
-rw-r--r-- | arch/s390/boot/physmem_info.c | 323 | ||||
-rw-r--r-- | arch/s390/boot/startup.c | 96 | ||||
-rw-r--r-- | arch/s390/boot/vmem.c | 258 | ||||
-rw-r--r-- | arch/s390/boot/vmlinux.lds.S | 2 | ||||
-rw-r--r-- | arch/s390/include/asm/kasan.h | 31 | ||||
-rw-r--r-- | arch/s390/include/asm/mem_detect.h | 117 | ||||
-rw-r--r-- | arch/s390/include/asm/physmem_info.h | 171 | ||||
-rw-r--r-- | arch/s390/include/asm/setup.h | 11 | ||||
-rw-r--r-- | arch/s390/kernel/early.c | 23 | ||||
-rw-r--r-- | arch/s390/kernel/head64.S | 3 | ||||
-rw-r--r-- | arch/s390/kernel/setup.c | 96 | ||||
-rw-r--r-- | arch/s390/kernel/vmlinux.lds.S | 7 | ||||
-rw-r--r-- | arch/s390/mm/Makefile | 3 | ||||
-rw-r--r-- | arch/s390/mm/kasan_init.c | 301 | ||||
-rw-r--r-- | arch/s390/mm/pageattr.c | 2 | ||||
-rw-r--r-- | arch/s390/mm/vmem.c | 11 | ||||
-rw-r--r-- | drivers/s390/char/sclp_early_core.c | 8 |
23 files changed, 911 insertions, 1004 deletions
diff --git a/arch/s390/boot/Makefile b/arch/s390/boot/Makefile index cebd4ca16916..c7c81e5f9218 100644 --- a/arch/s390/boot/Makefile +++ b/arch/s390/boot/Makefile @@ -35,7 +35,7 @@ endif CFLAGS_sclp_early_core.o += -I$(srctree)/drivers/s390/char -obj-y := head.o als.o startup.o mem_detect.o ipl_parm.o ipl_report.o vmem.o +obj-y := head.o als.o startup.o physmem_info.o ipl_parm.o ipl_report.o vmem.o obj-y += string.o ebcdic.o sclp_early_core.o mem.o ipl_vmparm.o cmdline.o obj-y += version.o pgm_check_info.o ctype.o ipl_data.o machine_kexec_reloc.o obj-$(findstring y, $(CONFIG_PROTECTED_VIRTUALIZATION_GUEST) $(CONFIG_PGSTE)) += uv.o diff --git a/arch/s390/boot/boot.h b/arch/s390/boot/boot.h index 58ce701d6110..872963c8a0ab 100644 --- a/arch/s390/boot/boot.h +++ b/arch/s390/boot/boot.h @@ -8,6 +8,8 @@ #ifndef __ASSEMBLY__ +#include <asm/physmem_info.h> + struct machine_info { unsigned char has_edat1 : 1; unsigned char has_edat2 : 1; @@ -30,24 +32,44 @@ struct vmlinux_info { unsigned long init_mm_off; unsigned long swapper_pg_dir_off; unsigned long invalid_pg_dir_off; +#ifdef CONFIG_KASAN + unsigned long kasan_early_shadow_page_off; + unsigned long kasan_early_shadow_pte_off; + unsigned long kasan_early_shadow_pmd_off; + unsigned long kasan_early_shadow_pud_off; + unsigned long kasan_early_shadow_p4d_off; +#endif }; void startup_kernel(void); -unsigned long detect_memory(unsigned long *safe_addr); -void mem_detect_set_usable_limit(unsigned long limit); +unsigned long detect_max_physmem_end(void); +void detect_physmem_online_ranges(unsigned long max_physmem_end); +void physmem_set_usable_limit(unsigned long limit); +void physmem_reserve(enum reserved_range_type type, unsigned long addr, unsigned long size); +void physmem_free(enum reserved_range_type type); +/* for continuous/multiple allocations per type */ +unsigned long physmem_alloc_top_down(enum reserved_range_type type, unsigned long size, + unsigned long align); +/* for single allocations, 1 per type */ +unsigned long physmem_alloc_range(enum reserved_range_type type, unsigned long size, + unsigned long align, unsigned long min, unsigned long max, + bool die_on_oom); +bool ipl_report_certs_intersects(unsigned long addr, unsigned long size, + unsigned long *intersection_start); bool is_ipl_block_dump(void); void store_ipl_parmblock(void); -unsigned long read_ipl_report(unsigned long safe_addr); +int read_ipl_report(void); +void save_ipl_cert_comp_list(void); void setup_boot_command_line(void); void parse_boot_command_line(void); void verify_facilities(void); void print_missing_facilities(void); void sclp_early_setup_buffer(void); void print_pgm_check_info(void); -unsigned long get_random_base(unsigned long safe_addr); +unsigned long get_random_base(void); void setup_vmem(unsigned long asce_limit); -unsigned long vmem_estimate_memory_needs(unsigned long online_mem_total); void __printf(1, 2) decompressor_printk(const char *fmt, ...); +void print_stacktrace(unsigned long sp); void error(char *m); extern struct machine_info machine; @@ -62,7 +84,7 @@ extern char __boot_data_start[], __boot_data_end[]; extern char __boot_data_preserved_start[], __boot_data_preserved_end[]; extern char _decompressor_syms_start[], _decompressor_syms_end[]; extern char _stack_start[], _stack_end[]; -extern char _end[]; +extern char _end[], _decompressor_end[]; extern unsigned char _compressed_start[]; extern unsigned char _compressed_end[]; extern struct vmlinux_info _vmlinux_info; @@ -70,5 +92,10 @@ extern struct vmlinux_info _vmlinux_info; #define __abs_lowcore_pa(x) (((unsigned long)(x) - __abs_lowcore) % sizeof(struct lowcore)) +static inline bool intersects(unsigned long addr0, unsigned long size0, + unsigned long addr1, unsigned long size1) +{ + return addr0 + size0 > addr1 && addr1 + size1 > addr0; +} #endif /* __ASSEMBLY__ */ #endif /* BOOT_BOOT_H */ diff --git a/arch/s390/boot/ipl_report.c b/arch/s390/boot/ipl_report.c index 9b14045065b6..1803035e68d2 100644 --- a/arch/s390/boot/ipl_report.c +++ b/arch/s390/boot/ipl_report.c @@ -5,6 +5,7 @@ #include <asm/sclp.h> #include <asm/sections.h> #include <asm/boot_data.h> +#include <asm/physmem_info.h> #include <uapi/asm/ipl.h> #include "boot.h" @@ -16,20 +17,16 @@ unsigned long __bootdata_preserved(ipl_cert_list_size); unsigned long __bootdata(early_ipl_comp_list_addr); unsigned long __bootdata(early_ipl_comp_list_size); +static struct ipl_rb_certificates *certs; +static struct ipl_rb_components *comps; +static bool ipl_report_needs_saving; + #define for_each_rb_entry(entry, rb) \ for (entry = rb->entries; \ (void *) entry + sizeof(*entry) <= (void *) rb + rb->len; \ entry++) -static inline bool intersects(unsigned long addr0, unsigned long size0, - unsigned long addr1, unsigned long size1) -{ - return addr0 + size0 > addr1 && addr1 + size1 > addr0; -} - -static unsigned long find_bootdata_space(struct ipl_rb_components *comps, - struct ipl_rb_certificates *certs, - unsigned long safe_addr) +static unsigned long get_cert_comp_list_size(void) { struct ipl_rb_certificate_entry *cert; struct ipl_rb_component_entry *comp; @@ -44,36 +41,27 @@ static unsigned long find_bootdata_space(struct ipl_rb_components *comps, ipl_cert_list_size = 0; for_each_rb_entry(cert, certs) ipl_cert_list_size += sizeof(unsigned int) + cert->len; - size = ipl_cert_list_size + early_ipl_comp_list_size; + return ipl_cert_list_size + early_ipl_comp_list_size; +} - /* - * Start from safe_addr to find a free memory area large - * enough for the IPL report boot data. This area is used - * for ipl_cert_list_addr/ipl_cert_list_size and - * early_ipl_comp_list_addr/early_ipl_comp_list_size. It must - * not overlap with any component or any certificate. - */ -repeat: - if (IS_ENABLED(CONFIG_BLK_DEV_INITRD) && initrd_data.start && initrd_data.size && - intersects(initrd_data.start, initrd_data.size, safe_addr, size)) - safe_addr = initrd_data.start + initrd_data.size; - for_each_rb_entry(comp, comps) - if (intersects(safe_addr, size, comp->addr, comp->len)) { - safe_addr = comp->addr + comp->len; - goto repeat; - } - for_each_rb_entry(cert, certs) - if (intersects(safe_addr, size, cert->addr, cert->len)) { - safe_addr = cert->addr + cert->len; - goto repeat; - } - early_ipl_comp_list_addr = safe_addr; - ipl_cert_list_addr = safe_addr + early_ipl_comp_list_size; +bool ipl_report_certs_intersects(unsigned long addr, unsigned long size, + unsigned long *intersection_start) +{ + struct ipl_rb_certificate_entry *cert; - return safe_addr + size; + if (!ipl_report_needs_saving) + return false; + + for_each_rb_entry(cert, certs) { + if (intersects(addr, size, cert->addr, cert->len)) { + *intersection_start = cert->addr; + return true; + } + } + return false; } -static void copy_components_bootdata(struct ipl_rb_components *comps) +static void copy_components_bootdata(void) { struct ipl_rb_component_entry *comp, *ptr; @@ -82,7 +70,7 @@ static void copy_components_bootdata(struct ipl_rb_components *comps) memcpy(ptr++, comp, sizeof(*ptr)); } -static void copy_certificates_bootdata(struct ipl_rb_certificates *certs) +static void copy_certificates_bootdata(void) { struct ipl_rb_certificate_entry *cert; void *ptr; @@ -96,10 +84,8 @@ static void copy_certificates_bootdata(struct ipl_rb_certificates *certs) } } -unsigned long read_ipl_report(unsigned long safe_addr) +int read_ipl_report(void) { - struct ipl_rb_certificates *certs; - struct ipl_rb_components *comps; struct ipl_pl_hdr *pl_hdr; struct ipl_rl_hdr *rl_hdr; struct ipl_rb_hdr *rb_hdr; @@ -112,7 +98,7 @@ unsigned long read_ipl_report(unsigned long safe_addr) */ if (!ipl_block_valid || !(ipl_block.hdr.flags & IPL_PL_FLAG_IPLSR)) - return safe_addr; + return -1; ipl_secure_flag = !!(ipl_block.hdr.flags & IPL_PL_FLAG_SIPL); /* * There is an IPL report, to find it load the pointer to the @@ -150,16 +136,30 @@ unsigned long read_ipl_report(unsigned long safe_addr) * With either the component list or the certificate list * missing the kernel will stay ignorant of secure IPL. */ - if (!comps || !certs) - return safe_addr; + if (!comps || !certs) { + certs = NULL; + return -1; + } - /* - * Copy component and certificate list to a safe area - * where the decompressed kernel can find them. - */ - safe_addr = find_bootdata_space(comps, certs, safe_addr); - copy_components_bootdata(comps); - copy_certificates_bootdata(certs); + ipl_report_needs_saving = true; + physmem_reserve(RR_IPLREPORT, (unsigned long)pl_hdr, + (unsigned long)rl_end - (unsigned long)pl_hdr); + return 0; +} + +void save_ipl_cert_comp_list(void) +{ + unsigned long size; + + if (!ipl_report_needs_saving) + return; + + size = get_cert_comp_list_size(); + early_ipl_comp_list_addr = physmem_alloc_top_down(RR_CERT_COMP_LIST, size, sizeof(int)); + ipl_cert_list_addr = early_ipl_comp_list_addr + early_ipl_comp_list_size; - return safe_addr; + copy_components_bootdata(); + copy_certificates_bootdata(); + physmem_free(RR_IPLREPORT); + ipl_report_needs_saving = false; } diff --git a/arch/s390/boot/kaslr.c b/arch/s390/boot/kaslr.c index 3e3d846400b4..71f75f03f800 100644 --- a/arch/s390/boot/kaslr.c +++ b/arch/s390/boot/kaslr.c @@ -3,7 +3,7 @@ * Copyright IBM Corp. 2019 */ #include <linux/pgtable.h> -#include <asm/mem_detect.h> +#include <asm/physmem_info.h> #include <asm/cpacf.h> #include <asm/timex.h> #include <asm/sclp.h> @@ -91,113 +91,16 @@ static int get_random(unsigned long limit, unsigned long *value) return 0; } -/* - * To randomize kernel base address we have to consider several facts: - * 1. physical online memory might not be continuous and have holes. mem_detect - * info contains list of online memory ranges we should consider. - * 2. we have several memory regions which are occupied and we should not - * overlap and destroy them. Currently safe_addr tells us the border below - * which all those occupied regions are. We are safe to use anything above - * safe_addr. - * 3. the upper limit might apply as well, even if memory above that limit is - * online. Currently those limitations are: - * 3.1. Limit set by "mem=" kernel command line option - * 3.2. memory reserved at the end for kasan initialization. - * 4. kernel base address must be aligned to THREAD_SIZE (kernel stack size). - * Which is required for CONFIG_CHECK_STACK. Currently THREAD_SIZE is 4 pages - * (16 pages when the kernel is built with kasan enabled) - * Assumptions: - * 1. kernel size (including .bss size) and upper memory limit are page aligned. - * 2. mem_detect memory region start is THREAD_SIZE aligned / end is PAGE_SIZE - * aligned (in practice memory configurations granularity on z/VM and LPAR - * is 1mb). - * - * To guarantee uniform distribution of kernel base address among all suitable - * addresses we generate random value just once. For that we need to build a - * continuous range in which every value would be suitable. We can build this - * range by simply counting all suitable addresses (let's call them positions) - * which would be valid as kernel base address. To count positions we iterate - * over online memory ranges. For each range which is big enough for the - * kernel image we count all suitable addresses we can put the kernel image at - * that is - * (end - start - kernel_size) / THREAD_SIZE + 1 - * Two functions count_valid_kernel_positions and position_to_address help - * to count positions in memory range given and then convert position back - * to address. - */ -static unsigned long count_valid_kernel_positions(unsigned long kernel_size, - unsigned long _min, - unsigned long _max) -{ - unsigned long start, end, pos = 0; - int i; - - for_each_mem_detect_usable_block(i, &start, &end) { - if (_min >= end) - continue; - if (start >= _max) - break; - start = max(_min, start); - end = min(_max, end); - if (end - start < kernel_size) - continue; - pos += (end - start - kernel_size) / THREAD_SIZE + 1; - } - - return pos; -} - -static unsigned long position_to_address(unsigned long pos, unsigned long kernel_size, - unsigned long _min, unsigned long _max) -{ - unsigned long start, end; - int i; - - for_each_mem_detect_usable_block(i, &start, &end) { - if (_min >= end) - continue; - if (start >= _max) - break; - start = max(_min, start); - end = min(_max, end); - if (end - start < kernel_size) - continue; - if ((end - start - kernel_size) / THREAD_SIZE + 1 >= pos) - return start + (pos - 1) * THREAD_SIZE; - pos -= (end - start - kernel_size) / THREAD_SIZE + 1; - } - - return 0; -} - -unsigned long get_random_base(unsigned long safe_addr) +unsigned long get_random_base(void) { - unsigned long usable_total = get_mem_detect_usable_total(); - unsigned long memory_limit = get_mem_detect_end(); - unsigned long base_pos, max_pos, kernel_size; - int i; + unsigned long vmlinux_size = vmlinux.image_size + vmlinux.bss_size; + unsigned long minimal_pos = vmlinux.default_lma + vmlinux_size; + unsigned long random; - /* - * Avoid putting kernel in the end of physical memory - * which vmem and kasan code will use for shadow memory and - * pgtable mapping allocations. - */ - memory_limit -= kasan_estimate_memory_needs(usable_total); - memory_limit -= vmem_estimate_memory_needs(usable_total); - - safe_addr = ALIGN(safe_addr, THREAD_SIZE); - kernel_size = vmlinux.image_size + vmlinux.bss_size; - if (safe_addr + kernel_size > memory_limit) + /* [vmlinux.default_lma + vmlinux.image_size + vmlinux.bss_size : physmem_info.usable] */ + if (get_random(physmem_info.usable - minimal_pos, &random)) return 0; - max_pos = count_valid_kernel_positions(kernel_size, safe_addr, memory_limit); - if (!max_pos) { - sclp_early_printk("KASLR disabled: not enough memory\n"); - return 0; - } - - /* we need a value in the range [1, base_pos] inclusive */ - if (get_random(max_pos, &base_pos)) - return 0; - return position_to_address(base_pos + 1, kernel_size, safe_addr, memory_limit); + return physmem_alloc_range(RR_VMLINUX, vmlinux_size, THREAD_SIZE, + vmlinux.default_lma, minimal_pos + random, false); } diff --git a/arch/s390/boot/mem_detect.c b/arch/s390/boot/mem_detect.c deleted file mode 100644 index 35f4ba11f7fd..000000000000 --- a/arch/s390/boot/mem_detect.c +++ /dev/null @@ -1,191 +0,0 @@ -// SPDX-License-Identifier: GPL-2.0 -#include <linux/errno.h> -#include <linux/init.h> -#include <asm/setup.h> -#include <asm/processor.h> -#include <asm/sclp.h> -#include <asm/sections.h> -#include <asm/mem_detect.h> -#include <asm/sparsemem.h> -#include "decompressor.h" -#include "boot.h" - -struct mem_detect_info __bootdata(mem_detect); - -/* up to 256 storage elements, 1020 subincrements each */ -#define ENTRIES_EXTENDED_MAX \ - (256 * (1020 / 2) * sizeof(struct mem_detect_block)) - -static struct mem_detect_block *__get_mem_detect_block_ptr(u32 n) -{ - if (n < MEM_INLINED_ENTRIES) - return &mem_detect.entries[n]; - return &mem_detect.entries_extended[n - MEM_INLINED_ENTRIES]; -} - -/* - * sequential calls to add_mem_detect_block with adjacent memory areas - * are merged together into single memory block. - */ -void add_mem_detect_block(u64 start, u64 end) -{ - struct mem_detect_block *block; - - if (mem_detect.count) { - block = __get_mem_detect_block_ptr(mem_detect.count - 1); - if (block->end == start) { - block->end = end; - return; - } - } - - block = __get_mem_detect_block_ptr(mem_detect.count); - block->start = start; - block->end = end; - mem_detect.count++; -} - -static int __diag260(unsigned long rx1, unsigned long rx2) -{ - unsigned long reg1, reg2, ry; - union register_pair rx; - psw_t old; - int rc; - - rx.even = rx1; - rx.odd = rx2; - ry = 0x10; /* storage configuration */ - rc = -1; /* fail */ - asm volatile( - " mvc 0(16,%[psw_old]),0(%[psw_pgm])\n" - " epsw %[reg1],%[reg2]\n" - " st %[reg1],0(%[psw_pgm])\n" - " st %[reg2],4(%[psw_pgm])\n" - " larl %[reg1],1f\n" - " stg %[reg1],8(%[psw_pgm])\n" - " diag %[rx],%[ry],0x260\n" - " ipm %[rc]\n" - " srl %[rc],28\n" - "1: mvc 0(16,%[psw_pgm]),0(%[psw_old])\n" - : [reg1] "=&d" (reg1), - [reg2] "=&a" (reg2), - [rc] "+&d" (rc), - [ry] "+&d" (ry), - "+Q" (S390_lowcore.program_new_psw), - "=Q" (old) - : [rx] "d" (rx.pair), - [psw_old] "a" (&old), - [psw_pgm] "a" (&S390_lowcore.program_new_psw) - : "cc", "memory"); - return rc == 0 ? ry : -1; -} - -static int diag260(void) -{ - int rc, i; - - struct { - unsigned long start; - unsigned long end; - } storage_extents[8] __aligned(16); /* VM supports up to 8 extends */ - - memset(storage_extents, 0, sizeof(storage_extents)); - rc = __diag260((unsigned long)storage_extents, sizeof(storage_extents)); - if (rc == -1) - return -1; - - for (i = 0; i < min_t(int, rc, ARRAY_SIZE(storage_extents)); i++) - add_mem_detect_block(storage_extents[i].start, storage_extents[i].end + 1); - return 0; -} - -static int tprot(unsigned long addr) -{ - unsigned long reg1, reg2; - int rc = -EFAULT; - psw_t old; - - asm volatile( - " mvc 0(16,%[psw_old]),0(%[psw_pgm])\n" - " epsw %[reg1],%[reg2]\n" - " st %[reg1],0(%[psw_pgm])\n" - " st %[reg2],4(%[psw_pgm])\n" - " larl %[reg1],1f\n" - " stg %[reg1],8(%[psw_pgm])\n" - " tprot 0(%[addr]),0\n" - " ipm %[rc]\n" - " srl %[rc],28\n" - "1: mvc 0(16,%[psw_pgm]),0(%[psw_old])\n" - : [reg1] "=&d" (reg1), - [reg2] "=&a" (reg2), - [rc] "+&d" (rc), - "=Q" (S390_lowcore.program_new_psw.addr), - "=Q" (old) - : [psw_old] "a" (&old), - [psw_pgm] "a" (&S390_lowcore.program_new_psw), - [addr] "a" (addr) - : "cc", "memory"); - return rc; -} - -static unsigned long search_mem_end(void) -{ - unsigned long range = 1 << (MAX_PHYSMEM_BITS - 20); /* in 1MB blocks */ - unsigned long offset = 0; - unsigned long pivot; - - while (range > 1) { - range >>= 1; - pivot = offset + range; - if (!tprot(pivot << 20)) - offset = pivot; - } - return (offset + 1) << 20; -} - -unsigned long detect_memory(unsigned long *safe_addr) -{ - unsigned long max_physmem_end = 0; - - sclp_early_get_memsize(&max_physmem_end); - mem_detect.entries_extended = (struct mem_detect_block *)ALIGN(*safe_addr, sizeof(u64)); - - if (!sclp_early_read_storage_info()) { - mem_detect.info_source = MEM_DETECT_SCLP_STOR_INFO; - } else if (!diag260()) { - mem_detect.info_source = MEM_DETECT_DIAG260; - max_physmem_end = max_physmem_end ?: get_mem_detect_end(); - } else if (max_physmem_end) { - add_mem_detect_block(0, max_physmem_end); - mem_detect.info_source = MEM_DETECT_SCLP_READ_INFO; - } else { - max_physmem_end = search_mem_end(); - add_mem_detect_block(0, max_physmem_end); - mem_detect.info_source = MEM_DETECT_BIN_SEARCH; - } - - if (mem_detect.count > MEM_INLINED_ENTRIES) { - *safe_addr += (mem_detect.count - MEM_INLINED_ENTRIES) * - sizeof(struct mem_detect_block); - } - - return max_physmem_end; -} - -void mem_detect_set_usable_limit(unsigned long limit) -{ - struct mem_detect_block *block; - int i; - - /* make sure mem_detect.usable ends up within online memory block */ - for (i = 0; i < mem_detect.count; i++) { - block = __get_mem_detect_block_ptr(i); - if (block->start >= limit) - break; - if (block->end >= limit) { - mem_detect.usable = limit; - break; - } - mem_detect.usable = block->end; - } -} diff --git a/arch/s390/boot/pgm_check_info.c b/arch/s390/boot/pgm_check_info.c index c2a1defc79da..0861e3c403f8 100644 --- a/arch/s390/boot/pgm_check_info.c +++ b/arch/s390/boot/pgm_check_info.c @@ -123,11 +123,10 @@ out: sclp_early_printk(buf); } -static noinline void print_stacktrace(void) +void print_stacktrace(unsigned long sp) { struct stack_info boot_stack = { STACK_TYPE_TASK, (unsigned long)_stack_start, (unsigned long)_stack_end }; - unsigned long sp = S390_lowcore.gpregs_save_area[15]; bool first = true; decompressor_printk("Call Trace:\n"); @@ -173,7 +172,7 @@ void print_pgm_check_info(void) gpregs[8], gpregs[9], gpregs[10], gpregs[11]); decompressor_printk(" %016lx %016lx %016lx %016lx\n", gpregs[12], gpregs[13], gpregs[14], gpregs[15]); - print_stacktrace(); + print_stacktrace(S390_lowcore.gpregs_save_area[15]); decompressor_printk("Last Breaking-Event-Address:\n"); decompressor_printk(" [<%016lx>] %pS\n", (unsigned long)S390_lowcore.pgm_last_break, (void *)S390_lowcore.pgm_last_break); diff --git a/arch/s390/boot/physmem_info.c b/arch/s390/boot/physmem_info.c new file mode 100644 index 000000000000..4ee9b7381142 --- /dev/null +++ b/arch/s390/boot/physmem_info.c @@ -0,0 +1,323 @@ +// SPDX-License-Identifier: GPL-2.0 +#include <linux/processor.h> +#include <linux/errno.h> +#include <linux/init.h> +#include <asm/physmem_info.h> +#include <asm/stacktrace.h> +#include <asm/boot_data.h> +#include <asm/sparsemem.h> +#include <asm/sections.h> +#include <asm/setup.h> +#include <asm/sclp.h> +#include <asm/uv.h> +#include "decompressor.h" +#include "boot.h" + +struct physmem_info __bootdata(physmem_info); +static unsigned int physmem_alloc_ranges; +static unsigned long physmem_alloc_pos; + +/* up to 256 storage elements, 1020 subincrements each */ +#define ENTRIES_EXTENDED_MAX \ + (256 * (1020 / 2) * sizeof(struct physmem_range)) + +static struct physmem_range *__get_physmem_range_ptr(u32 n) +{ + if (n < MEM_INLINED_ENTRIES) + return &physmem_info.online[n]; + if (unlikely(!physmem_info.online_extended)) { + physmem_info.online_extended = (struct physmem_range *)physmem_alloc_range( + RR_MEM_DETECT_EXTENDED, ENTRIES_EXTENDED_MAX, sizeof(long), 0, + physmem_alloc_pos, true); + } + return &physmem_info.online_extended[n - MEM_INLINED_ENTRIES]; +} + +/* + * sequential calls to add_physmem_online_range with adjacent memory ranges + * are merged together into single memory range. + */ +void add_physmem_online_range(u64 start, u64 end) +{ + struct physmem_range *range; + + if (physmem_info.range_count) { + range = __get_physmem_range_ptr(physmem_info.range_count - 1); + if (range->end == start) { + range->end = end; + return; + } + } + + range = __get_physmem_range_ptr(physmem_info.range_count); + range->start = start; + range->end = end; + physmem_info.range_count++; +} + +static int __diag260(unsigned long rx1, unsigned long rx2) +{ + unsigned long reg1, reg2, ry; + union register_pair rx; + psw_t old; + int rc; + + rx.even = rx1; + rx.odd = rx2; + ry = 0x10; /* storage configuration */ + rc = -1; /* fail */ + asm volatile( + " mvc 0(16,%[psw_old]),0(%[psw_pgm])\n" + " epsw %[reg1],%[reg2]\n" + " st %[reg1],0(%[psw_pgm])\n" + " st %[reg2],4(%[psw_pgm])\n" + " larl %[reg1],1f\n" + " stg %[reg1],8(%[psw_pgm])\n" + " diag %[rx],%[ry],0x260\n" + " ipm %[rc]\n" + " srl %[rc],28\n" + "1: mvc 0(16,%[psw_pgm]),0(%[psw_old])\n" + : [reg1] "=&d" (reg1), + [reg2] "=&a" (reg2), + [rc] "+&d" (rc), + [ry] "+&d" (ry), + "+Q" (S390_lowcore.program_new_psw), + "=Q" (old) + : [rx] "d" (rx.pair), + [psw_old] "a" (&old), + [psw_pgm] "a" (&S390_lowcore.program_new_psw) + : "cc", "memory"); + return rc == 0 ? ry : -1; +} + +static int diag260(void) +{ + int rc, i; + + struct { + unsigned long start; + unsigned long end; + } storage_extents[8] __aligned(16); /* VM supports up to 8 extends */ + + memset(storage_extents, 0, sizeof(storage_extents)); + rc = __diag260((unsigned long)storage_extents, sizeof(storage_extents)); + if (rc == -1) + return -1; + + for (i = 0; i < min_t(int, rc, ARRAY_SIZE(storage_extents)); i++) + add_physmem_online_range(storage_extents[i].start, storage_extents[i].end + 1); + return 0; +} + +static int tprot(unsigned long addr) +{ + unsigned long reg1, reg2; + int rc = -EFAULT; + psw_t old; + + asm volatile( + " mvc 0(16,%[psw_old]),0(%[psw_pgm])\n" + " epsw %[reg1],%[reg2]\n" + " st %[reg1],0(%[psw_pgm])\n" + " st %[reg2],4(%[psw_pgm])\n" + " larl %[reg1],1f\n" + " stg %[reg1],8(%[psw_pgm])\n" + " tprot 0(%[addr]),0\n" + " ipm %[rc]\n" + " srl %[rc],28\n" + "1: mvc 0(16,%[psw_pgm]),0(%[psw_old])\n" + : [reg1] "=&d" (reg1), + [reg2] "=&a" (reg2), + [rc] "+&d" (rc), + "=Q" (S390_lowcore.program_new_psw.addr), + "=Q" (old) + : [psw_old] "a" (&old), + [psw_pgm] "a" (&S390_lowcore.program_new_psw), + [addr] "a" (addr) + : "cc", "memory"); + return rc; +} + +static unsigned long search_mem_end(void) +{ + unsigned long range = 1 << (MAX_PHYSMEM_BITS - 20); /* in 1MB blocks */ + unsigned long offset = 0; + unsigned long pivot; + + while (range > 1) { + range >>= 1; + pivot = offset + range; + if (!tprot(pivot << 20)) + offset = pivot; + } + return (offset + 1) << 20; +} + +unsigned long detect_max_physmem_end(void) +{ + unsigned long max_physmem_end = 0; + + if (!sclp_early_get_memsize(&max_physmem_end)) { + physmem_info.info_source = MEM_DETECT_SCLP_READ_INFO; + } else { + max_physmem_end = search_mem_end(); + physmem_info.info_source = MEM_DETECT_BIN_SEARCH; + } + return max_physmem_end; +} + +void detect_physmem_online_ranges(unsigned long max_physmem_end) +{ + if (!sclp_early_read_storage_info()) { + physmem_info.info_source = MEM_DETECT_SCLP_STOR_INFO; + } else if (!diag260()) { + physmem_info.info_source = MEM_DETECT_DIAG260; + } else if (max_physmem_end) { + add_physmem_online_range(0, max_physmem_end); + } +} + +void physmem_set_usable_limit(unsigned long limit) +{ + physmem_info.usable = limit; + physmem_alloc_pos = limit; +} + +static void die_oom(unsigned long size, unsigned long align, unsigned long min, unsigned long max) +{ + unsigned long start, end, total_mem = 0, total_reserved_mem = 0; + struct reserved_range *range; + enum reserved_range_type t; + int i; + + decompressor_printk("Linux version %s\n", kernel_version); + if (!is_prot_virt_guest() && early_command_line[0]) + decompressor_printk("Kernel command line: %s\n", early_command_line); + decompressor_printk("Out of memory allocating %lx bytes %lx aligned in range %lx:%lx\n", + size, align, min, max); + decompressor_printk("Reserved memory ranges:\n"); + for_each_physmem_reserved_range(t, range, &start, &end) { + decompressor_printk("%016lx %016lx %s\n", start, end, get_rr_type_name(t)); + total_reserved_mem += end - start; + } + decompressor_printk("Usable online memory ranges (info source: %s [%x]):\n", + get_physmem_info_source(), physmem_info.info_source); + for_each_physmem_usable_range(i, &start, &end) { + decompressor_printk("%016lx %016lx\n", start, end); + total_mem += end - start; + } + decompressor_printk("Usable online memory total: %lx Reserved: %lx Free: %lx\n", + total_mem, total_reserved_mem, + total_mem > total_reserved_mem ? total_mem - total_reserved_mem : 0); + print_stacktrace(current_frame_address()); + sclp_early_printk("\n\n -- System halted\n"); + disabled_wait(); +} + +void physmem_reserve(enum reserved_range_type type, unsigned long addr, unsigned long size) +{ + physmem_info.reserved[type].start = addr; + physmem_info.reserved[type].end = addr + size; +} + +void physmem_free(enum reserved_range_type type) +{ + physmem_info.reserved[type].start = 0; + physmem_info.reserved[type].end = 0; +} + +static bool __physmem_alloc_intersects(unsigned long addr, unsigned long size, + unsigned long *intersection_start) +{ + unsigned long res_addr, res_size; + int t; + + for (t = 0; t < RR_MAX; t++) { + if (!get_physmem_reserved(t, &res_addr, &res_size)) + continue; + if (intersects(addr, size, res_addr, res_size)) { + *intersection_start = res_addr; + return true; + } + } + return ipl_report_certs_intersects(addr, size, intersection_start); +} + +static unsigned long __physmem_alloc_range(unsigned long size, unsigned long align, + unsigned long min, unsigned long max, + unsigned int from_ranges, unsigned int *ranges_left, + bool die_on_oom) +{ + unsigned int nranges = from_ranges ?: physmem_info.range_count; + unsigned long range_start, range_end; + unsigned long intersection_start; + unsigned long addr, pos = max; + + align = max(align, 8UL); + while (nranges) { + __get_physmem_range(nranges - 1, &range_start, &range_end, false); + pos = min(range_end, pos); + + if (round_up(min, align) + size > pos) + break; + addr = round_down(pos - size, align); + if (range_start > addr) { + nranges--; + continue; + } + if (__physmem_alloc_intersects(addr, size, &intersection_start)) { + pos = intersection_start; + continue; + } + + if (ranges_left) + *ranges_left = nranges; + return addr; + } + if (die_on_oom) + die_oom(size, align, min, max); + return 0; +} + +unsigned long physmem_alloc_range(enum reserved_range_type type, unsigned long size, + unsigned long align, unsigned long min, unsigned long max, + bool die_on_oom) +{ + unsigned long addr; + + max = min(max, physmem_alloc_pos); + addr = __physmem_alloc_range(size, align, min, max, 0, NULL, die_on_oom); + if (addr) + physmem_reserve(type, addr, size); + return addr; +} + +unsigned long physmem_alloc_top_down(enum reserved_range_type type, unsigned long size, + unsigned long align) +{ + struct reserved_range *range = &physmem_info.reserved[type]; + struct reserved_range *new_range; + unsigned int ranges_left; + unsigned long addr; + + addr = __physmem_alloc_range(size, align, 0, physmem_alloc_pos, physmem_alloc_ranges, + &ranges_left, true); + /* if not a consecutive allocation of the same type or first allocation */ + if (range->start != addr + size) { + if (range->end) { + physmem_alloc_pos = __physmem_alloc_range( + sizeof(struct reserved_range), 0, 0, physmem_alloc_pos, + physmem_alloc_ranges, &ranges_left, true); + new_range = (struct reserved_range *)physmem_alloc_pos; + *new_range = *range; + range->chain = new_range; + addr = __physmem_alloc_range(size, align, 0, physmem_alloc_pos, + ranges_left, &ranges_left, true); + } + range->end = addr + size; + } + range->start = addr; + physmem_alloc_pos = addr; + physmem_alloc_ranges = ranges_left; + return addr; +} diff --git a/arch/s390/boot/startup.c b/arch/s390/boot/startup.c index 11413f0baabc..bdf305a93987 100644 --- a/arch/s390/boot/startup.c +++ b/arch/s390/boot/startup.c @@ -12,7 +12,7 @@ #include <asm/diag.h> #include <asm/uv.h> #include <asm/abs_lowcore.h> -#include <asm/mem_detect.h> +#include <asm/physmem_info.h> #include "decompressor.h" #include "boot.h" #include "uv.h" @@ -21,7 +21,6 @@ unsigned long __bootdata_preserved(__kaslr_offset); unsigned long __bootdata_preserved(__abs_lowcore); unsigned long __bootdata_preserved(__memcpy_real_area); pte_t *__bootdata_preserved(memcpy_real_ptep); -unsigned long __bootdata(__amode31_base); unsigned long __bootdata_preserved(VMALLOC_START); unsigned long __bootdata_preserved(VMALLOC_END); struct page *__bootdata_preserved(vmemmap); @@ -29,8 +28,6 @@ unsigned long __bootdata_preserved(vmemmap_size); unsigned long __bootdata_preserved(MODULES_VADDR); unsigned long __bootdata_preserved(MODULES_END); unsigned long __bootdata(ident_map_size); -int __bootdata(is_full_image) = 1; -struct initrd_data __bootdata(initrd_data); u64 __bootdata_preserved(stfle_fac_list[16]); u64 __bootdata_preserved(alt_stfle_fac_list[16]); @@ -76,17 +73,20 @@ unsigned long mem_safe_offset(void) } #endif -static unsigned long rescue_initrd(unsigned long safe_addr) +static void rescue_initrd(unsigned long min, unsigned long max) { + unsigned long old_addr, addr, size; + if (!IS_ENABLED(CONFIG_BLK_DEV_INITRD)) - return safe_addr; - if (!initrd_data.start || !initrd_data.size) - return safe_addr; - if (initrd_data.start < safe_addr) { - memmove((void *)safe_addr, (void *)initrd_data.start, initrd_data.size); - initrd_data.start = safe_addr; - } - return initrd_data.start + initrd_data.size; + return; + if (!get_physmem_reserved(RR_INITRD, &addr, &size)) + return; + if (addr >= min && addr + size <= max) + return; + old_addr = addr; + physmem_free(RR_INITRD); + addr = physmem_alloc_top_down(RR_INITRD, size, 0); + memmove((void *)addr, (void *)old_addr, size); } static void copy_bootdata(void) @@ -140,7 +140,7 @@ static void handle_relocs(unsigned long offset) * * Consider the following factors: * 1. max_physmem_end - end of physical memory online or standby. - * Always <= end of the last online memory block (get_mem_detect_end()). + * Always >= end of the last online memory range (get_physmem_online_end()). * 2. CONFIG_MAX_PHYSMEM_BITS - the maximum size of physical memory the * kernel is able to support. * 3. "mem=" kernel command line option which limits physical memory usage. @@ -266,48 +266,57 @@ static void offset_vmlinux_info(unsigned long offset) vmlinux.init_mm_off += offset; vmlinux.swapper_pg_dir_off += offset; vmlinux.invalid_pg_dir_off += offset; -} - -static unsigned long reserve_amode31(unsigned long safe_addr) -{ - __amode31_base = PAGE_ALIGN(safe_addr); - return __amode31_base + vmlinux.amode31_size; +#ifdef CONFIG_KASAN + vmlinux.kasan_early_shadow_page_off += offset; + vmlinux.kasan_early_shadow_pte_off += offset; + vmlinux.kasan_early_shadow_pmd_off += offset; + vmlinux.kasan_early_shadow_pud_off += offset; + vmlinux.kasan_early_shadow_p4d_off += offset; +#endif } void startup_kernel(void) { unsigned long max_physmem_end; unsigned long random_lma; - unsigned long safe_addr; unsigned long asce_limit; + unsigned long safe_addr; void *img; psw_t psw; - initrd_data.start = parmarea.initrd_start; - initrd_data.size = parmarea.initrd_size; + setup_lpp(); + safe_addr = mem_safe_offset(); + /* + * reserve decompressor memory together with decompression heap, buffer and + * memory which might be occupied by uncompressed kernel at default 1Mb + * position (if KASLR is off or failed). + */ + physmem_reserve(RR_DECOMPRESSOR, 0, safe_addr); + if (IS_ENABLED(CONFIG_BLK_DEV_INITRD) && parmarea.initrd_size) + physmem_reserve(RR_INITRD, parmarea.initrd_start, parmarea.initrd_size); oldmem_data.start = parmarea.oldmem_base; oldmem_data.size = parmarea.oldmem_size; - setup_lpp(); store_ipl_parmblock(); - safe_addr = mem_safe_offset(); - safe_addr = reserve_amode31(safe_addr); - safe_addr = read_ipl_report(safe_addr); + read_ipl_report(); uv_query_info(); - safe_addr = rescue_initrd(safe_addr); sclp_early_read_info(); setup_boot_command_line(); parse_boot_command_line(); detect_facilities(); sanitize_prot_virt_host(); - max_physmem_end = detect_memory(&safe_addr); + max_physmem_end = detect_max_physmem_end(); setup_ident_map_size(max_physmem_end); setup_vmalloc_size(); asce_limit = setup_kernel_memory_layout(); - mem_detect_set_usable_limit(ident_map_size); + /* got final ident_map_size, physmem allocations could be performed now */ + physmem_set_usable_limit(ident_map_size); + detect_physmem_online_ranges(max_physmem_end); + save_ipl_cert_comp_list(); + rescue_initrd(safe_addr, ident_map_size); if (IS_ENABLED(CONFIG_RANDOMIZE_BASE) && kaslr_enabled) { - random_lma = get_random_base(safe_addr); + random_lma = get_random_base(); if (random_lma) { __kaslr_offset = random_lma - vmlinux.default_lma; img = (void *)vmlinux.default_lma; @@ -318,8 +327,16 @@ void startup_kernel(void) if (!IS_ENABLED(CONFIG_KERNEL_UNCOMPRESSED)) { img = decompress_kernel(); memmove((void *)vmlinux.default_lma, img, vmlinux.image_size); - } else if (__kaslr_offset) + } else if (__kaslr_offset) { memcpy((void *)vmlinux.default_lma, img, vmlinux.image_size); + memset(img, 0, vmlinux.image_size); + } + + /* vmlinux decompression is done, shrink reserved low memory */ + physmem_reserve(RR_DECOMPRESSOR, 0, (unsigned long)_decompressor_end); + if (!__kaslr_offset) + physmem_reserve(RR_VMLINUX, vmlinux.default_lma, vmlinux.image_size + vmlinux.bss_size); + physmem_alloc_range(RR_AMODE31, vmlinux.amode31_size, PAGE_SIZE, 0, SZ_2G, true); /* * The order of the following operations is important: @@ -339,16 +356,11 @@ void startup_kernel(void) setup_vmem(asce_limit); copy_bootdata(); - if (__kaslr_offset) { - /* - * Save KASLR offset for early dumps, before vmcore_info is set. - * Mark as uneven to distinguish from real vmcore_info pointer. - */ - S390_lowcore.vmcore_info = __kaslr_offset | 0x1UL; - /* Clear non-relocated kernel */ - if (IS_ENABLED(CONFIG_KERNEL_UNCOMPRESSED)) - memset(img, 0, vmlinux.image_size); - } + /* + * Save KASLR offset for early dumps, before vmcore_info is set. + * Mark as uneven to distinguish from real vmcore_info pointer. + */ + S390_lowcore.vmcore_info = __kaslr_offset ? __kaslr_offset | 0x1UL : 0; /* * Jump to the decompressed kernel entry point and switch DAT mode on. diff --git a/arch/s390/boot/vmem.c b/arch/s390/boot/vmem.c index 4d1d0d8e99cb..b01ea2abda03 100644 --- a/arch/s390/boot/vmem.c +++ b/arch/s390/boot/vmem.c @@ -1,81 +1,213 @@ // SPDX-License-Identifier: GPL-2.0 #include <linux/sched/task.h> #include <linux/pgtable.h> +#include <linux/kasan.h> #include <asm/pgalloc.h> #include <asm/facility.h> #include <asm/sections.h> -#include <asm/mem_detect.h> +#include <asm/physmem_info.h> #include <asm/maccess.h> #include <asm/abs_lowcore.h> #include "decompressor.h" #include "boot.h" +unsigned long __bootdata_preserved(s390_invalid_asce); + #define init_mm (*(struct mm_struct *)vmlinux.init_mm_off) #define swapper_pg_dir vmlinux.swapper_pg_dir_off #define invalid_pg_dir vmlinux.invalid_pg_dir_off -/* - * Mimic virt_to_kpte() in lack of init_mm symbol. Skip pmd NULL check though. - */ -static inline pte_t *__virt_to_kpte(unsigned long va) -{ - return pte_offset_kernel(pmd_offset(pud_offset(p4d_offset(pgd_offset_k(va), va), va), va), va); -} - -unsigned long __bootdata_preserved(s390_invalid_asce); -unsigned long __bootdata(pgalloc_pos); -unsigned long __bootdata(pgalloc_end); -unsigned long __bootdata(pgalloc_low); - enum populate_mode { POPULATE_NONE, POPULATE_ONE2ONE, POPULATE_ABS_LOWCORE, +#ifdef CONFIG_KASAN + POPULATE_KASAN_MAP_SHADOW, + POPULATE_KASAN_ZERO_SHADOW, + POPULATE_KASAN_SHALLOW +#endif }; -static void boot_check_oom(void) +static void pgtable_populate(unsigned long addr, unsigned long end, enum populate_mode mode); + +#ifdef CONFIG_KASAN + +#define kasan_early_shadow_page vmlinux.kasan_early_shadow_page_off +#define kasan_early_shadow_pte ((pte_t *)vmlinux.kasan_early_shadow_pte_off) +#define kasan_early_shadow_pmd ((pmd_t *)vmlinux.kasan_early_shadow_pmd_off) +#define kasan_early_shadow_pud ((pud_t *)vmlinux.kasan_early_shadow_pud_off) +#define kasan_early_shadow_p4d ((p4d_t *)vmlinux.kasan_early_shadow_p4d_off) +#define __sha(x) ((unsigned long)kasan_mem_to_shadow((void *)x)) + +static pte_t pte_z; + +static void kasan_populate_shadow(void) +{ + pmd_t pmd_z = __pmd(__pa(kasan_early_shadow_pte) | _SEGMENT_ENTRY); + pud_t pud_z = __pud(__pa(kasan_early_shadow_pmd) | _REGION3_ENTRY); + p4d_t p4d_z = __p4d(__pa(kasan_early_shadow_pud) | _REGION2_ENTRY); + unsigned long untracked_end; + unsigned long start, end; + int i; + + pte_z = __pte(__pa(kasan_early_shadow_page) | pgprot_val(PAGE_KERNEL_RO)); + if (!machine.has_nx) + pte_z = clear_pte_bit(pte_z, __pgprot(_PAGE_NOEXEC)); + crst_table_init((unsigned long *)kasan_early_shadow_p4d, p4d_val(p4d_z)); + crst_table_init((unsigned long *)kasan_early_shadow_pud, pud_val(pud_z)); + crst_table_init((unsigned long *)kasan_early_shadow_pmd, pmd_val(pmd_z)); + memset64((u64 *)kasan_early_shadow_pte, pte_val(pte_z), PTRS_PER_PTE); + + /* + * Current memory layout: + * +- 0 -------------+ +- shadow start -+ + * |1:1 ident mapping| /|1/8 of ident map| + * | | / | | + * +-end of ident map+ / +----------------+ + * | ... gap ... | / | kasan | + * | | / | zero page | + * +- vmalloc area -+ / | mapping | + * | vmalloc_size | / | (untracked) | + * +- modules vaddr -+ / +----------------+ + * | 2Gb |/ | unmapped | allocated per module + * +- shadow start -+ +----------------+ + * | 1/8 addr space | | zero pg mapping| (untracked) + * +- shadow end ----+---------+- shadow end ---+ + * + * Current memory layout (KASAN_VMALLOC): + * +- 0 -------------+ +- shadow start -+ + * |1:1 ident mapping| /|1/8 of ident map| + * | | / | | + * +-end of ident map+ / +----------------+ + * | ... gap ... | / | kasan zero page| (untracked) + * | | / | mapping | + * +- vmalloc area -+ / +----------------+ + * | vmalloc_size | / |shallow populate| + * +- modules vaddr -+ / +----------------+ + * | 2Gb |/ |shallow populate| + * +- shadow start -+ +----------------+ + * | 1/8 addr space | | zero pg mapping| (untracked) + * +- shadow end ----+---------+- shadow end ---+ + */ + + for_each_physmem_usable_range(i, &start, &end) + pgtable_populate(__sha(start), __sha(end), POPULATE_KASAN_MAP_SHADOW); + if (IS_ENABLED(CONFIG_KASAN_VMALLOC)) { + untracked_end = VMALLOC_START; + /* shallowly populate kasan shadow for vmalloc and modules */ + pgtable_populate(__sha(VMALLOC_START), __sha(MODULES_END), POPULATE_KASAN_SHALLOW); + } else { + untracked_end = MODULES_VADDR; + } + /* populate kasan shadow for untracked memory */ + pgtable_populate(__sha(ident_map_size), __sha(untracked_end), POPULATE_KASAN_ZERO_SHADOW); + pgtable_populate(__sha(MODULES_END), __sha(_REGION1_SIZE), POPULATE_KASAN_ZERO_SHADOW); +} + +static bool kasan_pgd_populate_zero_shadow(pgd_t *pgd, unsigned long addr, + unsigned long end, enum populate_mode mode) +{ + if (mode == POPULATE_KASAN_ZERO_SHADOW && + IS_ALIGNED(addr, PGDIR_SIZE) && end - addr >= PGDIR_SIZE) { + pgd_populate(&init_mm, pgd, kasan_early_shadow_p4d); + return true; + } + return false; +} + +static bool kasan_p4d_populate_zero_shadow(p4d_t *p4d, unsigned long addr, + unsigned long end, enum populate_mode mode) +{ + if (mode == POPULATE_KASAN_ZERO_SHADOW && + IS_ALIGNED(addr, P4D_SIZE) && end - addr >= P4D_SIZE) { + p4d_populate(&init_mm, p4d, kasan_early_shadow_pud); + return true; + } + return false; +} + +static bool kasan_pud_populate_zero_shadow(pud_t *pud, unsigned long addr, + unsigned long end, enum populate_mode mode) +{ + if (mode == POPULATE_KASAN_ZERO_SHADOW && + IS_ALIGNED(addr, PUD_SIZE) && end - addr >= PUD_SIZE) { + pud_populate(&init_mm, pud, kasan_early_shadow_pmd); + return true; + } + return false; +} + +static bool kasan_pmd_populate_zero_shadow(pmd_t *pmd, unsigned long addr, + unsigned long end, enum populate_mode mode) { - if (pgalloc_pos < pgalloc_low) - error("out of memory on boot\n"); + if (mode == POPULATE_KASAN_ZERO_SHADOW && + IS_ALIGNED(addr, PMD_SIZE) && end - addr >= PMD_SIZE) { + pmd_populate(&init_mm, pmd, kasan_early_shadow_pte); + return true; + } + return false; } -static void pgtable_populate_init(void) +static bool kasan_pte_populate_zero_shadow(pte_t *pte, enum populate_mode mode) { - unsigned long initrd_end; - unsigned long kernel_end; - - kernel_end = vmlinux.default_lma + vmlinux.image_size + vmlinux.bss_size; - pgalloc_low = round_up(kernel_end, PAGE_SIZE); - if (IS_ENABLED(CONFIG_BLK_DEV_INITRD)) { - initrd_end = round_up(initrd_data.start + initrd_data.size, _SEGMENT_SIZE); - pgalloc_low = max(pgalloc_low, initrd_end); + pte_t entry; + + if (mode == POPULATE_KASAN_ZERO_SHADOW) { + set_pte(pte, pte_z); + return true; } + return false; +} +#else - pgalloc_end = round_down(get_mem_detect_end(), PAGE_SIZE); - pgalloc_pos = pgalloc_end; +static inline void kasan_populate_shadow(void) {} - boot_check_oom(); +static inline bool kasan_pgd_populate_zero_shadow(pgd_t *pgd, unsigned long addr, + unsigned long end, enum populate_mode mode) +{ + return false; } -static void *boot_alloc_pages(unsigned int order) +static inline bool kasan_p4d_populate_zero_shadow(p4d_t *p4d, unsigned long addr, + unsigned long end, enum populate_mode mode) { - unsigned long size = PAGE_SIZE << order; + return false; +} - pgalloc_pos -= size; - pgalloc_pos = round_down(pgalloc_pos, size); +static inline bool kasan_pud_populate_zero_shadow(pud_t *pud, unsigned long addr, + unsigned long end, enum populate_mode mode) +{ + return false; +} - boot_check_oom(); +static inline bool kasan_pmd_populate_zero_shadow(pmd_t *pmd, unsigned long addr, + unsigned long end, enum populate_mode mode) +{ + return false; +} + +static bool kasan_pte_populate_zero_shadow(pte_t *pte, enum populate_mode mode) +{ + return false; +} + +#endif - return (void *)pgalloc_pos; +/* + * Mimic virt_to_kpte() in lack of init_mm symbol. Skip pmd NULL check though. + */ +static inline pte_t *__virt_to_kpte(unsigned long va) +{ + return pte_offset_kernel(pmd_offset(pud_offset(p4d_offset(pgd_offset_k(va), va), va), va), va); } static void *boot_crst_alloc(unsigned long val) { + unsigned long size = PAGE_SIZE << CRST_ALLOC_ORDER; unsigned long *table; - table = boot_alloc_pages(CRST_ALLOC_ORDER); - if (table) - crst_table_init(table, val); + table = (unsigned long *)physmem_alloc_top_down(RR_VMEM, size, size); + crst_table_init(table, val); return table; } @@ -84,20 +216,23 @@ static pte_t *boot_pte_alloc(void) static void *pte_leftover; pte_t *pte; - BUILD_BUG_ON(_PAGE_TABLE_SIZE * 2 != PAGE_SIZE); - + /* + * handling pte_leftovers this way helps to avoid memory fragmentation + * during POPULATE_KASAN_MAP_SHADOW when EDAT is off + */ if (!pte_leftover) { - pte_leftover = boot_alloc_pages(0); + pte_leftover = (void *)physmem_alloc_top_down(RR_VMEM, PAGE_SIZE, PAGE_SIZE); pte = pte_leftover + _PAGE_TABLE_SIZE; } else { pte = pte_leftover; pte_leftover = NULL; } + memset64((u64 *)pte, _PAGE_INVALID, PTRS_PER_PTE); return pte; } -static unsigned long _pa(unsigned long addr, enum populate_mode mode) +static unsigned long _pa(unsigned long addr, unsigned long size, enum populate_mode mode) { switch (mode) { case POPULATE_NONE: @@ -106,6 +241,12 @@ static unsigned long _pa(unsigned long addr, enum populate_mode mode) return addr; case POPULATE_ABS_LOWCORE: return __abs_lowcore_pa(addr); +#ifdef CONFIG_KASAN + case POPULATE_KASAN_MAP_SHADOW: + addr = physmem_alloc_top_down(RR_VMEM, size, size); + memset((void *)addr, 0, size); + return addr; +#endif default: return -1; } @@ -126,13 +267,14 @@ static bool can_large_pmd(pmd_t *pm_dir, unsigned long addr, unsigned long end) static void pgtable_pte_populate(pmd_t *pmd, unsigned long addr, unsigned long end, enum populate_mode mode) { - unsigned long next; pte_t *pte, entry; pte = pte_offset_kernel(pmd, addr); for (; addr < end; addr += PAGE_SIZE, pte++) { if (pte_none(*pte)) { - entry = __pte(_pa(addr, mode)); + if (kasan_pte_populate_zero_shadow(pte, mode)) + continue; + entry = __pte(_pa(addr, PAGE_SIZE, mode)); entry = set_pte_bit(entry, PAGE_KERNEL_EXEC); set_pte(pte, entry); } @@ -150,8 +292,10 @@ static void pgtable_pmd_populate(pud_t *pud, unsigned long addr, unsigned long e for (; addr < end; addr = next, pmd++) { next = pmd_addr_end(addr, end); if (pmd_none(*pmd)) { + if (kasan_pmd_populate_zero_shadow(pmd, addr, next, mode)) + continue; if (can_large_pmd(pmd, addr, next)) { - entry = __pmd(_pa(addr, mode)); + entry = __pmd(_pa(addr, _SEGMENT_SIZE, mode)); entry = set_pmd_bit(entry, SEGMENT_KERNEL_EXEC); set_pmd(pmd, entry); continue; @@ -176,8 +320,10 @@ static void pgtable_pud_populate(p4d_t *p4d, unsigned long addr, unsigned long e for (; addr < end; addr = next, pud++) { next = pud_addr_end(addr, end); if (pud_none(*pud)) { + if (kasan_pud_populate_zero_shadow(pud, addr, next, mode)) + continue; if (can_large_pud(pud, addr, next)) { - entry = __pud(_pa(addr, mode)); + entry = __pud(_pa(addr, _REGION3_SIZE, mode)); entry = set_pud_bit(entry, REGION3_KERNEL_EXEC); set_pud(pud, entry); continue; @@ -202,6 +348,8 @@ static void pgtable_p4d_populate(pgd_t *pgd, unsigned long addr, unsigned long e for (; addr < end; addr = next, p4d++) { next = p4d_addr_end(addr, end); if (p4d_none(*p4d)) { + if (kasan_p4d_populate_zero_shadow(p4d, addr, next, mode)) + continue; pud = boot_crst_alloc(_REGION3_ENTRY_EMPTY); p4d_populate(&init_mm, p4d, pud); } @@ -219,9 +367,15 @@ static void pgtable_populate(unsigned long addr, unsigned long end, enum populat for (; addr < end; addr = next, pgd++) { next = pgd_addr_end(addr, end); if (pgd_none(*pgd)) { + if (kasan_pgd_populate_zero_shadow(pgd, addr, next, mode)) + continue; p4d = boot_crst_alloc(_REGION2_ENTRY_EMPTY); pgd_populate(&init_mm, pgd, p4d); } +#ifdef CONFIG_KASAN + if (mode == POPULATE_KASAN_SHALLOW) + continue; +#endif pgtable_p4d_populate(pgd, addr, next, mode); } } @@ -250,9 +404,8 @@ void setup_vmem(unsigned long asce_limit) * To prevent creation of a large page at address 0 first map * the lowcore and create the identity mapping only afterwards. */ - pgtable_populate_init(); pgtable_populate(0, sizeof(struct lowcore), POPULATE_ONE2ONE); - for_each_mem_detect_usable_block(i, &start, &end) + for_each_physmem_usable_range(i, &start, &end) pgtable_populate(start, end, POPULATE_ONE2ONE); pgtable_populate(__abs_lowcore, __abs_lowcore + sizeof(struct lowcore), POPULATE_ABS_LOWCORE); @@ -260,6 +413,8 @@ void setup_vmem(unsigned long asce_limit) POPULATE_NONE); memcpy_real_ptep = __virt_to_kpte(__memcpy_real_area); + kasan_populate_shadow(); + S390_lowcore.kernel_asce = swapper_pg_dir | asce_bits; S390_lowcore.user_asce = s390_invalid_asce; @@ -269,10 +424,3 @@ void setup_vmem(unsigned long asce_limit) init_mm.context.asce = S390_lowcore.kernel_asce; } - -unsigned long vmem_estimate_memory_needs(unsigned long online_mem_total) -{ - unsigned long pages = DIV_ROUND_UP(online_mem_total, PAGE_SIZE); - - return DIV_ROUND_UP(pages, _PAGE_ENTRIES) * _PAGE_TABLE_SIZE * 2; -} diff --git a/arch/s390/boot/vmlinux.lds.S b/arch/s390/boot/vmlinux.lds.S index fa9d33b01b85..389df0e0d9e5 100644 --- a/arch/s390/boot/vmlinux.lds.S +++ b/arch/s390/boot/vmlinux.lds.S @@ -93,6 +93,8 @@ SECTIONS _decompressor_syms_end = .; } + _decompressor_end = .; + #ifdef CONFIG_KERNEL_UNCOMPRESSED . = 0x100000; #else diff --git a/arch/s390/include/asm/kasan.h b/arch/s390/include/asm/kasan.h index e5cfc81d5b61..0cffead0f2f2 100644 --- a/arch/s390/include/asm/kasan.h +++ b/arch/s390/include/asm/kasan.h @@ -2,7 +2,7 @@ #ifndef __ASM_KASAN_H #define __ASM_KASAN_H -#include <asm/pgtable.h> +#include <linux/const.h> #ifdef CONFIG_KASAN @@ -13,35 +13,6 @@ #define KASAN_SHADOW_START KASAN_SHADOW_OFFSET #define KASAN_SHADOW_END (KASAN_SHADOW_START + KASAN_SHADOW_SIZE) -extern void kasan_early_init(void); - -/* - * Estimate kasan memory requirements, which it will reserve - * at the very end of available physical memory. To estimate - * that, we take into account that kasan would require - * 1/8 of available physical memory (for shadow memory) + - * creating page tables for the shadow memory region. - * To keep page tables estimates simple take the double of - * combined ptes size. - * - * physmem parameter has to be already adjusted if not entire physical memory - * would be used (e.g. due to effect of "mem=" option). - */ -static inline unsigned long kasan_estimate_memory_needs(unsigned long physmem) -{ - unsigned long kasan_needs; - unsigned long pages; - /* for shadow memory */ - kasan_needs = round_up(physmem / 8, PAGE_SIZE); - /* for paging structures */ - pages = DIV_ROUND_UP(kasan_needs, PAGE_SIZE); - kasan_needs += DIV_ROUND_UP(pages, _PAGE_ENTRIES) * _PAGE_TABLE_SIZE * 2; - - return kasan_needs; -} -#else -static inline void kasan_early_init(void) { } -static inline unsigned long kasan_estimate_memory_needs(unsigned long physmem) { return 0; } #endif #endif diff --git a/arch/s390/include/asm/mem_detect.h b/arch/s390/include/asm/mem_detect.h deleted file mode 100644 index f9e7354036d2..000000000000 --- a/arch/s390/include/asm/mem_detect.h +++ /dev/null @@ -1,117 +0,0 @@ -/* SPDX-License-Identifier: GPL-2.0 */ -#ifndef _ASM_S390_MEM_DETECT_H -#define _ASM_S390_MEM_DETECT_H - -#include <linux/types.h> - -enum mem_info_source { - MEM_DETECT_NONE = 0, - MEM_DETECT_SCLP_STOR_INFO, - MEM_DETECT_DIAG260, - MEM_DETECT_SCLP_READ_INFO, - MEM_DETECT_BIN_SEARCH -}; - -struct mem_detect_block { - u64 start; - u64 end; -}; - -/* - * Storage element id is defined as 1 byte (up to 256 storage elements). - * In practise only storage element id 0 and 1 are used). - * According to architecture one storage element could have as much as - * 1020 subincrements. 255 mem_detect_blocks are embedded in mem_detect_info. - * If more mem_detect_blocks are required, a block of memory from already - * known mem_detect_block is taken (entries_extended points to it). - */ -#define MEM_INLINED_ENTRIES 255 /* (PAGE_SIZE - 16) / 16 */ - -struct mem_detect_info { - u32 count; - u8 info_source; - unsigned long usable; - struct mem_detect_block entries[MEM_INLINED_ENTRIES]; - struct mem_detect_block *entries_extended; -}; -extern struct mem_detect_info mem_detect; - -void add_mem_detect_block(u64 start, u64 end); - -static inline int __get_mem_detect_block(u32 n, unsigned long *start, - unsigned long *end, bool respect_usable_limit) -{ - if (n >= mem_detect.count) { - *start = 0; - *end = 0; - return -1; - } - - if (n < MEM_INLINED_ENTRIES) { - *start = (unsigned long)mem_detect.entries[n].start; - *end = (unsigned long)mem_detect.entries[n].end; - } else { - *start = (unsigned long)mem_detect.entries_extended[n - MEM_INLINED_ENTRIES].start; - *end = (unsigned long)mem_detect.entries_extended[n - MEM_INLINED_ENTRIES].end; - } - - if (respect_usable_limit && mem_detect.usable) { - if (*start >= mem_detect.usable) - return -1; - if (*end > mem_detect.usable) - *end = mem_detect.usable; - } - return 0; -} - -/** - * for_each_mem_detect_usable_block - early online memory range iterator - * @i: an integer used as loop variable - * @p_start: ptr to unsigned long for start address of the range - * @p_end: ptr to unsigned long for end address of the range - * - * Walks over detected online memory ranges below usable limit. - */ -#define for_each_mem_detect_usable_block(i, p_start, p_end) \ - for (i = 0; !__get_mem_detect_block(i, p_start, p_end, true); i++) - -/* Walks over all detected online memory ranges disregarding usable limit. */ -#define for_each_mem_detect_block(i, p_start, p_end) \ - for (i = 0; !__get_mem_detect_block(i, p_start, p_end, false); i++) - -static inline unsigned long get_mem_detect_usable_total(void) -{ - unsigned long start, end, total = 0; - int i; - - for_each_mem_detect_usable_block(i, &start, &end) - total += end - start; - - return total; -} - -static inline void get_mem_detect_reserved(unsigned long *start, - unsigned long *size) -{ - *start = (unsigned long)mem_detect.entries_extended; - if (mem_detect.count > MEM_INLINED_ENTRIES) - *size = (mem_detect.count - MEM_INLINED_ENTRIES) * sizeof(struct mem_detect_block); - else - *size = 0; -} - -static inline unsigned long get_mem_detect_end(void) -{ - unsigned long start; - unsigned long end; - - if (mem_detect.usable) - return mem_detect.usable; - if (mem_detect.count) { - __get_mem_detect_block(mem_detect.count - 1, &start, &end, false); - return end; - } - return 0; -} - -#endif diff --git a/arch/s390/include/asm/physmem_info.h b/arch/s390/include/asm/physmem_info.h new file mode 100644 index 000000000000..8e9c582592b3 --- /dev/null +++ b/arch/s390/include/asm/physmem_info.h @@ -0,0 +1,171 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +#ifndef _ASM_S390_MEM_DETECT_H +#define _ASM_S390_MEM_DETECT_H + +#include <linux/types.h> + +enum physmem_info_source { + MEM_DETECT_NONE = 0, + MEM_DETECT_SCLP_STOR_INFO, + MEM_DETECT_DIAG260, + MEM_DETECT_SCLP_READ_INFO, + MEM_DETECT_BIN_SEARCH +}; + +struct physmem_range { + u64 start; + u64 end; +}; + +enum reserved_range_type { + RR_DECOMPRESSOR, + RR_INITRD, + RR_VMLINUX, + RR_AMODE31, + RR_IPLREPORT, + RR_CERT_COMP_LIST, + RR_MEM_DETECT_EXTENDED, + RR_VMEM, + RR_MAX +}; + +struct reserved_range { + unsigned long start; + unsigned long end; + struct reserved_range *chain; +}; + +/* + * Storage element id is defined as 1 byte (up to 256 storage elements). + * In practise only storage element id 0 and 1 are used). + * According to architecture one storage element could have as much as + * 1020 subincrements. 255 physmem_ranges are embedded in physmem_info. + * If more physmem_ranges are required, a block of memory from already + * known physmem_range is taken (online_extended points to it). + */ +#define MEM_INLINED_ENTRIES 255 /* (PAGE_SIZE - 16) / 16 */ + +struct physmem_info { + u32 range_count; + u8 info_source; + unsigned long usable; + struct reserved_range reserved[RR_MAX]; + struct physmem_range online[MEM_INLINED_ENTRIES]; + struct physmem_range *online_extended; +}; + +extern struct physmem_info physmem_info; + +void add_physmem_online_range(u64 start, u64 end); + +static inline int __get_physmem_range(u32 n, unsigned long *start, + unsigned long *end, bool respect_usable_limit) +{ + if (n >= physmem_info.range_count) { + *start = 0; + *end = 0; + return -1; + } + + if (n < MEM_INLINED_ENTRIES) { + *start = (unsigned long)physmem_info.online[n].start; + *end = (unsigned long)physmem_info.online[n].end; + } else { + *start = (unsigned long)physmem_info.online_extended[n - MEM_INLINED_ENTRIES].start; + *end = (unsigned long)physmem_info.online_extended[n - MEM_INLINED_ENTRIES].end; + } + + if (respect_usable_limit && physmem_info.usable) { + if (*start >= physmem_info.usable) + return -1; + if (*end > physmem_info.usable) + *end = physmem_info.usable; + } + return 0; +} + +/** + * for_each_physmem_usable_range - early online memory range iterator + * @i: an integer used as loop variable + * @p_start: ptr to unsigned long for start address of the range + * @p_end: ptr to unsigned long for end address of the range + * + * Walks over detected online memory ranges below usable limit. + */ +#define for_each_physmem_usable_range(i, p_start, p_end) \ + for (i = 0; !__get_physmem_range(i, p_start, p_end, true); i++) + +/* Walks over all detected online memory ranges disregarding usable limit. */ +#define for_each_physmem_online_range(i, p_start, p_end) \ + for (i = 0; !__get_physmem_range(i, p_start, p_end, false); i++) + +static inline const char *get_physmem_info_source(void) +{ + switch (physmem_info.info_source) { + case MEM_DETECT_SCLP_STOR_INFO: + return "sclp storage info"; + case MEM_DETECT_DIAG260: + return "diag260"; + case MEM_DETECT_SCLP_READ_INFO: + return "sclp read info"; + case MEM_DETECT_BIN_SEARCH: + return "binary search"; + } + return "none"; +} + +#define RR_TYPE_NAME(t) case RR_ ## t: return #t +static inline const char *get_rr_type_name(enum reserved_range_type t) +{ + switch (t) { + RR_TYPE_NAME(DECOMPRESSOR); + RR_TYPE_NAME(INITRD); + RR_TYPE_NAME(VMLINUX); + RR_TYPE_NAME(AMODE31); + RR_TYPE_NAME(IPLREPORT); + RR_TYPE_NAME(CERT_COMP_LIST); + RR_TYPE_NAME(MEM_DETECT_EXTENDED); + RR_TYPE_NAME(VMEM); + default: + return "UNKNOWN"; + } +} + +#define for_each_physmem_reserved_type_range(t, range, p_start, p_end) \ + for (range = &physmem_info.reserved[t], *p_start = range->start, *p_end = range->end; \ + range && range->end; range = range->chain, \ + *p_start = range ? range->start : 0, *p_end = range ? range->end : 0) + +static inline struct reserved_range *__physmem_reserved_next(enum reserved_range_type *t, + struct reserved_range *range) +{ + if (!range) { + range = &physmem_info.reserved[*t]; + if (range->end) + return range; + } + if (range->chain) + return range->chain; + while (++*t < RR_MAX) { + range = &physmem_info.reserved[*t]; + if (range->end) + return range; + } + return NULL; +} + +#define for_each_physmem_reserved_range(t, range, p_start, p_end) \ + for (t = 0, range = __physmem_reserved_next(&t, NULL), \ + *p_start = range ? range->start : 0, *p_end = range ? range->end : 0; \ + range; range = __physmem_reserved_next(&t, range), \ + *p_start = range ? range->start : 0, *p_end = range ? range->end : 0) + +static inline unsigned long get_physmem_reserved(enum reserved_range_type type, + unsigned long *addr, unsigned long *size) +{ + *addr = physmem_info.reserved[type].start; + *size = physmem_info.reserved[type].end - physmem_info.reserved[type].start; + return *size; +} + +#endif diff --git a/arch/s390/include/asm/setup.h b/arch/s390/include/asm/setup.h index 5271bb278cfa..b28d250efbaa 100644 --- a/arch/s390/include/asm/setup.h +++ b/arch/s390/include/asm/setup.h @@ -74,10 +74,6 @@ extern unsigned int zlib_dfltcc_support; extern int noexec_disabled; extern unsigned long ident_map_size; -extern unsigned long pgalloc_pos; -extern unsigned long pgalloc_end; -extern unsigned long pgalloc_low; -extern unsigned long __amode31_base; /* The Write Back bit position in the physaddr is given by the SLPC PCI */ extern unsigned long mio_wb_bit_mask; @@ -150,13 +146,6 @@ static inline unsigned long kaslr_offset(void) return __kaslr_offset; } -extern int is_full_image; - -struct initrd_data { - unsigned long start; - unsigned long size; -}; -extern struct initrd_data initrd_data; struct oldmem_data { unsigned long start; diff --git a/arch/s390/kernel/early.c b/arch/s390/kernel/early.c index d26f02495636..2dd5976a55ac 100644 --- a/arch/s390/kernel/early.c +++ b/arch/s390/kernel/early.c @@ -34,8 +34,6 @@ #include <asm/switch_to.h> #include "entry.h" -int __bootdata(is_full_image); - #define decompressor_handled_param(param) \ static int __init ignore_decompressor_param_##param(char *s) \ { \ @@ -53,6 +51,14 @@ decompressor_handled_param(nokaslr); decompressor_handled_param(prot_virt); #endif +static void __init kasan_early_init(void) +{ +#ifdef CONFIG_KASAN + init_task.kasan_depth = 0; + sclp_early_printk("KernelAddressSanitizer initialized\n"); +#endif +} + static void __init reset_tod_clock(void) { union tod_clock clk; @@ -288,17 +294,6 @@ static void __init setup_boot_command_line(void) strscpy(boot_command_line, early_command_line, COMMAND_LINE_SIZE); } -static void __init check_image_bootable(void) -{ - if (is_full_image) - return; - - sclp_early_printk("Linux kernel boot failure: An attempt to boot a vmlinux ELF image failed.\n"); - sclp_early_printk("This image does not contain all parts necessary for starting up. Use\n"); - sclp_early_printk("bzImage or arch/s390/boot/compressed/vmlinux instead.\n"); - disabled_wait(); -} - static void __init sort_amode31_extable(void) { sort_extable(__start_amode31_ex_table, __stop_amode31_ex_table); @@ -306,8 +301,8 @@ static void __init sort_amode31_extable(void) void __init startup_init(void) { + kasan_early_init(); reset_tod_clock(); - check_image_bootable(); time_early_init(); init_kernel_storage_key(); lockdep_off(); diff --git a/arch/s390/kernel/head64.S b/arch/s390/kernel/head64.S index 3b3bf8329e6c..f68be3951103 100644 --- a/arch/s390/kernel/head64.S +++ b/arch/s390/kernel/head64.S @@ -26,9 +26,6 @@ ENTRY(startup_continue) stg %r14,__LC_CURRENT larl %r15,init_thread_union+THREAD_SIZE-STACK_FRAME_OVERHEAD-__PT_SIZE brasl %r14,sclp_early_adjust_va # allow sclp_early_printk -#ifdef CONFIG_KASAN - brasl %r14,kasan_early_init -#endif brasl %r14,startup_init # s390 specific early init brasl %r14,start_kernel # common init code # diff --git a/arch/s390/kernel/setup.c b/arch/s390/kernel/setup.c index 8ec5cdf9dadc..d25425b8d0c0 100644 --- a/arch/s390/kernel/setup.c +++ b/arch/s390/kernel/setup.c @@ -74,7 +74,7 @@ #include <asm/numa.h> #include <asm/alternative.h> #include <asm/nospec-branch.h> -#include <asm/mem_detect.h> +#include <asm/physmem_info.h> #include <asm/maccess.h> #include <asm/uv.h> #include <asm/asm-offsets.h> @@ -147,14 +147,9 @@ static u32 __amode31_ref *__ctl_duct = __ctl_duct_amode31; int __bootdata(noexec_disabled); unsigned long __bootdata(ident_map_size); -struct mem_detect_info __bootdata(mem_detect); -struct initrd_data __bootdata(initrd_data); -unsigned long __bootdata(pgalloc_pos); -unsigned long __bootdata(pgalloc_end); -unsigned long __bootdata(pgalloc_low); +struct physmem_info __bootdata(physmem_info); unsigned long __bootdata_preserved(__kaslr_offset); -unsigned long __bootdata(__amode31_base); unsigned int __bootdata_preserved(zlib_dfltcc_support); EXPORT_SYMBOL(zlib_dfltcc_support); u64 __bootdata_preserved(stfle_fac_list[16]); @@ -635,7 +630,11 @@ static struct notifier_block kdump_mem_nb = { */ static void __init reserve_pgtables(void) { - memblock_reserve(pgalloc_pos, pgalloc_end - pgalloc_pos); + unsigned long start, end; + struct reserved_range *range; + + for_each_physmem_reserved_type_range(RR_VMEM, range, &start, &end) + memblock_reserve(start, end - start); } /* @@ -712,13 +711,13 @@ static void __init reserve_crashkernel(void) */ static void __init reserve_initrd(void) { -#ifdef CONFIG_BLK_DEV_INITRD - if (!initrd_data.start || !initrd_data.size) + unsigned long addr, size; + + if (!IS_ENABLED(CONFIG_BLK_DEV_INITRD) || !get_physmem_reserved(RR_INITRD, &addr, &size)) return; - initrd_start = (unsigned long)__va(initrd_data.start); - initrd_end = initrd_start + initrd_data.size; - memblock_reserve(initrd_data.start, initrd_data.size); -#endif + initrd_start = (unsigned long)__va(addr); + initrd_end = initrd_start + size; + memblock_reserve(addr, size); } /* @@ -730,72 +729,40 @@ static void __init reserve_certificate_list(void) memblock_reserve(ipl_cert_list_addr, ipl_cert_list_size); } -static void __init reserve_mem_detect_info(void) +static void __init reserve_physmem_info(void) { - unsigned long start, size; + unsigned long addr, size; - get_mem_detect_reserved(&start, &size); - if (size) - memblock_reserve(start, size); + if (get_physmem_reserved(RR_MEM_DETECT_EXTENDED, &addr, &size)) + memblock_reserve(addr, size); } -static void __init free_mem_detect_info(void) +static void __init free_physmem_info(void) { - unsigned long start, size; + unsigned long addr, size; - get_mem_detect_reserved(&start, &size); - if (size) - memblock_phys_free(start, size); -} - -static const char * __init get_mem_info_source(void) -{ - switch (mem_detect.info_source) { - case MEM_DETECT_SCLP_STOR_INFO: - return "sclp storage info"; - case MEM_DETECT_DIAG260: - return "diag260"; - case MEM_DETECT_SCLP_READ_INFO: - return "sclp read info"; - case MEM_DETECT_BIN_SEARCH: - return "binary search"; - } - return "none"; + if (get_physmem_reserved(RR_MEM_DETECT_EXTENDED, &addr, &size)) + memblock_phys_free(addr, size); } -static void __init memblock_add_mem_detect_info(void) +static void __init memblock_add_physmem_info(void) { unsigned long start, end; int i; pr_debug("physmem info source: %s (%hhd)\n", - get_mem_info_source(), mem_detect.info_source); + get_physmem_info_source(), physmem_info.info_source); /* keep memblock lists close to the kernel */ memblock_set_bottom_up(true); - for_each_mem_detect_usable_block(i, &start, &end) + for_each_physmem_usable_range(i, &start, &end) memblock_add(start, end - start); - for_each_mem_detect_block(i, &start, &end) + for_each_physmem_online_range(i, &start, &end) memblock_physmem_add(start, end - start); memblock_set_bottom_up(false); memblock_set_node(0, ULONG_MAX, &memblock.memory, 0); } /* - * Check for initrd being in usable memory - */ -static void __init check_initrd(void) -{ -#ifdef CONFIG_BLK_DEV_INITRD - if (initrd_data.start && initrd_data.size && - !memblock_is_region_memory(initrd_data.start, initrd_data.size)) { - pr_err("The initial RAM disk does not fit into the memory\n"); - memblock_phys_free(initrd_data.start, initrd_data.size); - initrd_start = initrd_end = 0; - } -#endif -} - -/* * Reserve memory used for lowcore/command line/kernel image. */ static void __init reserve_kernel(void) @@ -803,7 +770,7 @@ static void __init reserve_kernel(void) memblock_reserve(0, STARTUP_NORMAL_OFFSET); memblock_reserve(OLDMEM_BASE, sizeof(unsigned long)); memblock_reserve(OLDMEM_SIZE, sizeof(unsigned long)); - memblock_reserve(__amode31_base, __eamode31 - __samode31); + memblock_reserve(physmem_info.reserved[RR_AMODE31].start, __eamode31 - __samode31); memblock_reserve(__pa(sclp_early_sccb), EXT_SCCB_READ_SCP); memblock_reserve(__pa(_stext), _end - _stext); } @@ -825,13 +792,13 @@ static void __init setup_memory(void) static void __init relocate_amode31_section(void) { unsigned long amode31_size = __eamode31 - __samode31; - long amode31_offset = __amode31_base - __samode31; + long amode31_offset = physmem_info.reserved[RR_AMODE31].start - __samode31; long *ptr; pr_info("Relocating AMODE31 section of size 0x%08lx\n", amode31_size); /* Move original AMODE31 section to the new one */ - memmove((void *)__amode31_base, (void *)__samode31, amode31_size); + memmove((void *)physmem_info.reserved[RR_AMODE31].start, (void *)__samode31, amode31_size); /* Zero out the old AMODE31 section to catch invalid accesses within it */ memset((void *)__samode31, 0, amode31_size); @@ -997,14 +964,14 @@ void __init setup_arch(char **cmdline_p) reserve_kernel(); reserve_initrd(); reserve_certificate_list(); - reserve_mem_detect_info(); + reserve_physmem_info(); memblock_set_current_limit(ident_map_size); memblock_allow_resize(); /* Get information about *all* installed memory */ - memblock_add_mem_detect_info(); + memblock_add_physmem_info(); - free_mem_detect_info(); + free_physmem_info(); setup_memory_end(); memblock_dump_all(); setup_memory(); @@ -1017,7 +984,6 @@ void __init setup_arch(char **cmdline_p) if (MACHINE_HAS_EDAT2) hugetlb_cma_reserve(PUD_SHIFT - PAGE_SHIFT); - check_initrd(); reserve_crashkernel(); #ifdef CONFIG_CRASH_DUMP /* diff --git a/arch/s390/kernel/vmlinux.lds.S b/arch/s390/kernel/vmlinux.lds.S index b653ba8d51e6..8d2288a5ba25 100644 --- a/arch/s390/kernel/vmlinux.lds.S +++ b/arch/s390/kernel/vmlinux.lds.S @@ -219,6 +219,13 @@ SECTIONS QUAD(init_mm) QUAD(swapper_pg_dir) QUAD(invalid_pg_dir) +#ifdef CONFIG_KASAN + QUAD(kasan_early_shadow_page) + QUAD(kasan_early_shadow_pte) + QUAD(kasan_early_shadow_pmd) + QUAD(kasan_early_shadow_pud) + QUAD(kasan_early_shadow_p4d) +#endif } :NONE /* Debugging sections. */ diff --git a/arch/s390/mm/Makefile b/arch/s390/mm/Makefile index 57e4f3a24829..d90db06a8af5 100644 --- a/arch/s390/mm/Makefile +++ b/arch/s390/mm/Makefile @@ -10,6 +10,3 @@ obj-$(CONFIG_CMM) += cmm.o obj-$(CONFIG_HUGETLB_PAGE) += hugetlbpage.o obj-$(CONFIG_PTDUMP_CORE) += dump_pagetables.o obj-$(CONFIG_PGSTE) += gmap.o - -KASAN_SANITIZE_kasan_init.o := n -obj-$(CONFIG_KASAN) += kasan_init.o diff --git a/arch/s390/mm/kasan_init.c b/arch/s390/mm/kasan_init.c deleted file mode 100644 index ef89a5f26853..000000000000 --- a/arch/s390/mm/kasan_init.c +++ /dev/null @@ -1,301 +0,0 @@ -// SPDX-License-Identifier: GPL-2.0 -#include <linux/kasan.h> -#include <linux/sched/task.h> -#include <linux/pgtable.h> -#include <asm/pgalloc.h> -#include <asm/kasan.h> -#include <asm/mem_detect.h> -#include <asm/processor.h> -#include <asm/sclp.h> -#include <asm/facility.h> -#include <asm/sections.h> -#include <asm/setup.h> -#include <asm/uv.h> - -static unsigned long segment_pos __initdata; -static unsigned long segment_low __initdata; -static bool has_edat __initdata; -static bool has_nx __initdata; - -#define __sha(x) ((unsigned long)kasan_mem_to_shadow((void *)x)) - -static void __init kasan_early_panic(const char *reason) -{ - sclp_early_printk("The Linux kernel failed to boot with the KernelAddressSanitizer:\n"); - sclp_early_printk(reason); - disabled_wait(); -} - -static void * __init kasan_early_alloc_segment(void) -{ - segment_pos -= _SEGMENT_SIZE; - - if (segment_pos < segment_low) - kasan_early_panic("out of memory during initialisation\n"); - - return __va(segment_pos); -} - -static void * __init kasan_early_alloc_pages(unsigned int order) -{ - pgalloc_pos -= (PAGE_SIZE << order); - - if (pgalloc_pos < pgalloc_low) - kasan_early_panic("out of memory during initialisation\n"); - - return __va(pgalloc_pos); -} - -static void * __init kasan_early_crst_alloc(unsigned long val) -{ - unsigned long *table; - - table = kasan_early_alloc_pages(CRST_ALLOC_ORDER); - if (table) - crst_table_init(table, val); - return table; -} - -static pte_t * __init kasan_early_pte_alloc(void) -{ - static void *pte_leftover; - pte_t *pte; - - BUILD_BUG_ON(_PAGE_TABLE_SIZE * 2 != PAGE_SIZE); - - if (!pte_leftover) { - pte_leftover = kasan_early_alloc_pages(0); - pte = pte_leftover + _PAGE_TABLE_SIZE; - } else { - pte = pte_leftover; - pte_leftover = NULL; - } - memset64((u64 *)pte, _PAGE_INVALID, PTRS_PER_PTE); - return pte; -} - -enum populate_mode { - POPULATE_MAP, - POPULATE_ZERO_SHADOW, - POPULATE_SHALLOW -}; - -static inline pgprot_t pgprot_clear_bit(pgprot_t pgprot, unsigned long bit) -{ - return __pgprot(pgprot_val(pgprot) & ~bit); -} - -static void __init kasan_early_pgtable_populate(unsigned long address, - unsigned long end, - enum populate_mode mode) -{ - pgprot_t pgt_prot_zero = PAGE_KERNEL_RO; - pgprot_t pgt_prot = PAGE_KERNEL; - pgprot_t sgt_prot = SEGMENT_KERNEL; - pgd_t *pg_dir; - p4d_t *p4_dir; - pud_t *pu_dir; - pmd_t *pm_dir; - pte_t *pt_dir; - pmd_t pmd; - pte_t pte; - - if (!has_nx) { - pgt_prot_zero = pgprot_clear_bit(pgt_prot_zero, _PAGE_NOEXEC); - pgt_prot = pgprot_clear_bit(pgt_prot, _PAGE_NOEXEC); - sgt_prot = pgprot_clear_bit(sgt_prot, _SEGMENT_ENTRY_NOEXEC); - } - - while (address < end) { - pg_dir = pgd_offset_k(address); - if (pgd_none(*pg_dir)) { - if (mode == POPULATE_ZERO_SHADOW && - IS_ALIGNED(address, PGDIR_SIZE) && - end - address >= PGDIR_SIZE) { - pgd_populate(&init_mm, pg_dir, - kasan_early_shadow_p4d); - address = (address + PGDIR_SIZE) & PGDIR_MASK; - continue; - } - p4_dir = kasan_early_crst_alloc(_REGION2_ENTRY_EMPTY); - pgd_populate(&init_mm, pg_dir, p4_dir); - } - - if (mode == POPULATE_SHALLOW) { - address = (address + P4D_SIZE) & P4D_MASK; - continue; - } - - p4_dir = p4d_offset(pg_dir, address); - if (p4d_none(*p4_dir)) { - if (mode == POPULATE_ZERO_SHADOW && - IS_ALIGNED(address, P4D_SIZE) && - end - address >= P4D_SIZE) { - p4d_populate(&init_mm, p4_dir, - kasan_early_shadow_pud); - address = (address + P4D_SIZE) & P4D_MASK; - continue; - } - pu_dir = kasan_early_crst_alloc(_REGION3_ENTRY_EMPTY); - p4d_populate(&init_mm, p4_dir, pu_dir); - } - - pu_dir = pud_offset(p4_dir, address); - if (pud_none(*pu_dir)) { - if (mode == POPULATE_ZERO_SHADOW && - IS_ALIGNED(address, PUD_SIZE) && - end - address >= PUD_SIZE) { - pud_populate(&init_mm, pu_dir, - kasan_early_shadow_pmd); - address = (address + PUD_SIZE) & PUD_MASK; - continue; - } - pm_dir = kasan_early_crst_alloc(_SEGMENT_ENTRY_EMPTY); - pud_populate(&init_mm, pu_dir, pm_dir); - } - - pm_dir = pmd_offset(pu_dir, address); - if (pmd_none(*pm_dir)) { - if (IS_ALIGNED(address, PMD_SIZE) && - end - address >= PMD_SIZE) { - if (mode == POPULATE_ZERO_SHADOW) { - pmd_populate(&init_mm, pm_dir, kasan_early_shadow_pte); - address = (address + PMD_SIZE) & PMD_MASK; - continue; - } else if (has_edat) { - void *page = kasan_early_alloc_segment(); - - memset(page, 0, _SEGMENT_SIZE); - pmd = __pmd(__pa(page)); - pmd = set_pmd_bit(pmd, sgt_prot); - set_pmd(pm_dir, pmd); - address = (address + PMD_SIZE) & PMD_MASK; - continue; - } - } - pt_dir = kasan_early_pte_alloc(); - pmd_populate(&init_mm, pm_dir, pt_dir); - } else if (pmd_large(*pm_dir)) { - address = (address + PMD_SIZE) & PMD_MASK; - continue; - } - - pt_dir = pte_offset_kernel(pm_dir, address); - if (pte_none(*pt_dir)) { - void *page; - - switch (mode) { - case POPULATE_MAP: - page = kasan_early_alloc_pages(0); - memset(page, 0, PAGE_SIZE); - pte = __pte(__pa(page)); - pte = set_pte_bit(pte, pgt_prot); - set_pte(pt_dir, pte); - break; - case POPULATE_ZERO_SHADOW: - page = kasan_early_shadow_page; - pte = __pte(__pa(page)); - pte = set_pte_bit(pte, pgt_prot_zero); - set_pte(pt_dir, pte); - break; - case POPULATE_SHALLOW: - /* should never happen */ - break; - } - } - address += PAGE_SIZE; - } -} - -static void __init kasan_early_detect_facilities(void) -{ - if (test_facility(8)) { - has_edat = true; - __ctl_set_bit(0, 23); - } - if (!noexec_disabled && test_facility(130)) { - has_nx = true; - __ctl_set_bit(0, 20); - } -} - -void __init kasan_early_init(void) -{ - pte_t pte_z = __pte(__pa(kasan_early_shadow_page) | pgprot_val(PAGE_KERNEL_RO)); - pmd_t pmd_z = __pmd(__pa(kasan_early_shadow_pte) | _SEGMENT_ENTRY); - pud_t pud_z = __pud(__pa(kasan_early_shadow_pmd) | _REGION3_ENTRY); - p4d_t p4d_z = __p4d(__pa(kasan_early_shadow_pud) | _REGION2_ENTRY); - unsigned long untracked_end = MODULES_VADDR; - unsigned long shadow_alloc_size; - unsigned long start, end; - int i; - - kasan_early_detect_facilities(); - if (!has_nx) - pte_z = clear_pte_bit(pte_z, __pgprot(_PAGE_NOEXEC)); - - BUILD_BUG_ON(!IS_ALIGNED(KASAN_SHADOW_START, P4D_SIZE)); - BUILD_BUG_ON(!IS_ALIGNED(KASAN_SHADOW_END, P4D_SIZE)); - - /* init kasan zero shadow */ - crst_table_init((unsigned long *)kasan_early_shadow_p4d, p4d_val(p4d_z)); - crst_table_init((unsigned long *)kasan_early_shadow_pud, pud_val(pud_z)); - crst_table_init((unsigned long *)kasan_early_shadow_pmd, pmd_val(pmd_z)); - memset64((u64 *)kasan_early_shadow_pte, pte_val(pte_z), PTRS_PER_PTE); - - if (has_edat) { - shadow_alloc_size = get_mem_detect_usable_total() >> KASAN_SHADOW_SCALE_SHIFT; - segment_pos = round_down(pgalloc_pos, _SEGMENT_SIZE); - segment_low = segment_pos - shadow_alloc_size; - segment_low = round_down(segment_low, _SEGMENT_SIZE); - pgalloc_pos = segment_low; - } - /* - * Current memory layout: - * +- 0 -------------+ +- shadow start -+ - * |1:1 ident mapping| /|1/8 of ident map| - * | | / | | - * +-end of ident map+ / +----------------+ - * | ... gap ... | / | kasan | - * | | / | zero page | - * +- vmalloc area -+ / | mapping | - * | vmalloc_size | / | (untracked) | - * +- modules vaddr -+ / +----------------+ - * | 2Gb |/ | unmapped | allocated per module - * +- shadow start -+ +----------------+ - * | 1/8 addr space | | zero pg mapping| (untracked) - * +- shadow end ----+---------+- shadow end ---+ - * - * Current memory layout (KASAN_VMALLOC): - * +- 0 -------------+ +- shadow start -+ - * |1:1 ident mapping| /|1/8 of ident map| - * | | / | | - * +-end of ident map+ / +----------------+ - * | ... gap ... | / | kasan zero page| (untracked) - * | | / | mapping | - * +- vmalloc area -+ / +----------------+ - * | vmalloc_size | / |shallow populate| - * +- modules vaddr -+ / +----------------+ - * | 2Gb |/ |shallow populate| - * +- shadow start -+ +----------------+ - * | 1/8 addr space | | zero pg mapping| (untracked) - * +- shadow end ----+---------+- shadow end ---+ - */ - /* populate kasan shadow (for identity mapping and zero page mapping) */ - for_each_mem_detect_usable_block(i, &start, &end) - kasan_early_pgtable_populate(__sha(start), __sha(end), POPULATE_MAP); - if (IS_ENABLED(CONFIG_KASAN_VMALLOC)) { - untracked_end = VMALLOC_START; - /* shallowly populate kasan shadow for vmalloc and modules */ - kasan_early_pgtable_populate(__sha(VMALLOC_START), __sha(MODULES_END), - POPULATE_SHALLOW); - } - /* populate kasan shadow for untracked memory */ - kasan_early_pgtable_populate(__sha(ident_map_size), __sha(untracked_end), - POPULATE_ZERO_SHADOW); - kasan_early_pgtable_populate(__sha(MODULES_END), __sha(_REGION1_SIZE), - POPULATE_ZERO_SHADOW); - /* enable kasan */ - init_task.kasan_depth = 0; - sclp_early_printk("KernelAddressSanitizer initialized\n"); -} diff --git a/arch/s390/mm/pageattr.c b/arch/s390/mm/pageattr.c index 85195c18b2e8..7838e9c70000 100644 --- a/arch/s390/mm/pageattr.c +++ b/arch/s390/mm/pageattr.c @@ -300,8 +300,6 @@ static int change_page_attr(unsigned long addr, unsigned long end, if (addr == end) return 0; - if (end >= MODULES_END) - return -EINVAL; mutex_lock(&cpa_mutex); pgdp = pgd_offset_k(addr); do { diff --git a/arch/s390/mm/vmem.c b/arch/s390/mm/vmem.c index 4113a7ffa149..242f95aa9801 100644 --- a/arch/s390/mm/vmem.c +++ b/arch/s390/mm/vmem.c @@ -5,6 +5,7 @@ #include <linux/memory_hotplug.h> #include <linux/memblock.h> +#include <linux/kasan.h> #include <linux/pfn.h> #include <linux/mm.h> #include <linux/init.h> @@ -664,6 +665,9 @@ static void __init memblock_region_swap(void *a, void *b, int size) swap(*(struct memblock_region *)a, *(struct memblock_region *)b); } +#ifdef CONFIG_KASAN +#define __sha(x) ((unsigned long)kasan_mem_to_shadow((void *)x)) +#endif /* * map whole physical memory to virtual memory (identity mapping) * we reserve enough space in the vmalloc area for vmemmap to hotplug @@ -733,6 +737,13 @@ void __init vmem_map_init(void) SET_MEMORY_RW | SET_MEMORY_NX); } +#ifdef CONFIG_KASAN + for_each_mem_range(i, &base, &end) + __set_memory(__sha(base), + (__sha(end) - __sha(base)) >> PAGE_SHIFT, + SET_MEMORY_RW | SET_MEMORY_NX); +#endif + __set_memory((unsigned long)_stext, (unsigned long)(_etext - _stext) >> PAGE_SHIFT, SET_MEMORY_RO | SET_MEMORY_X); diff --git a/drivers/s390/char/sclp_early_core.c b/drivers/s390/char/sclp_early_core.c index ac1d00980fa6..dbd5c53d8edf 100644 --- a/drivers/s390/char/sclp_early_core.c +++ b/drivers/s390/char/sclp_early_core.c @@ -10,7 +10,7 @@ #include <asm/ebcdic.h> #include <asm/irq.h> #include <asm/sections.h> -#include <asm/mem_detect.h> +#include <asm/physmem_info.h> #include <asm/facility.h> #include "sclp.h" #include "sclp_rw.h" @@ -336,7 +336,7 @@ int __init sclp_early_get_hsa_size(unsigned long *hsa_size) #define SCLP_STORAGE_INFO_FACILITY 0x0000400000000000UL -void __weak __init add_mem_detect_block(u64 start, u64 end) {} +void __weak __init add_physmem_online_range(u64 start, u64 end) {} int __init sclp_early_read_storage_info(void) { struct read_storage_sccb *sccb = (struct read_storage_sccb *)sclp_early_sccb; @@ -369,7 +369,7 @@ int __init sclp_early_read_storage_info(void) if (!sccb->entries[sn]) continue; rn = sccb->entries[sn] >> 16; - add_mem_detect_block((rn - 1) * rzm, rn * rzm); + add_physmem_online_range((rn - 1) * rzm, rn * rzm); } break; case 0x0310: @@ -382,6 +382,6 @@ int __init sclp_early_read_storage_info(void) return 0; fail: - mem_detect.count = 0; + physmem_info.range_count = 0; return -EIO; } |