diff options
Diffstat (limited to 'arch/s390')
67 files changed, 1483 insertions, 574 deletions
diff --git a/arch/s390/Kconfig b/arch/s390/Kconfig index 4fe5b2affa23..515240576930 100644 --- a/arch/s390/Kconfig +++ b/arch/s390/Kconfig @@ -145,6 +145,7 @@ config S390 select HAVE_KERNEL_LZ4 select HAVE_KERNEL_LZMA select HAVE_KERNEL_LZO + select HAVE_KERNEL_UNCOMPRESSED select HAVE_KERNEL_XZ select HAVE_KPROBES select HAVE_KRETPROBES diff --git a/arch/s390/Makefile b/arch/s390/Makefile index 68a690442be0..eee6703093c3 100644 --- a/arch/s390/Makefile +++ b/arch/s390/Makefile @@ -14,8 +14,18 @@ LD_BFD := elf64-s390 LDFLAGS := -m elf64_s390 KBUILD_AFLAGS_MODULE += -fPIC KBUILD_CFLAGS_MODULE += -fPIC -KBUILD_CFLAGS += -m64 KBUILD_AFLAGS += -m64 +KBUILD_CFLAGS += -m64 +aflags_dwarf := -Wa,-gdwarf-2 +KBUILD_AFLAGS_DECOMPRESSOR := -m64 -D__ASSEMBLY__ +KBUILD_AFLAGS_DECOMPRESSOR += $(if $(CONFIG_DEBUG_INFO),$(aflags_dwarf)) +KBUILD_CFLAGS_DECOMPRESSOR := -m64 -O2 +KBUILD_CFLAGS_DECOMPRESSOR += -DDISABLE_BRANCH_PROFILING -D__NO_FORTIFY +KBUILD_CFLAGS_DECOMPRESSOR += -fno-delete-null-pointer-checks -msoft-float +KBUILD_CFLAGS_DECOMPRESSOR += -fno-asynchronous-unwind-tables +KBUILD_CFLAGS_DECOMPRESSOR += $(call cc-option,-ffreestanding) +KBUILD_CFLAGS_DECOMPRESSOR += $(if $(CONFIG_DEBUG_INFO),-g) +KBUILD_CFLAGS_DECOMPRESSOR += $(if $(CONFIG_DEBUG_INFO_DWARF4), $(call cc-option, -gdwarf-4,)) UTS_MACHINE := s390x STACK_SIZE := 16384 CHECKFLAGS += -D__s390__ -D__s390x__ @@ -52,18 +62,14 @@ cflags-y += -Wa,-I$(srctree)/arch/$(ARCH)/include # cflags-$(CONFIG_FRAME_POINTER) += -fno-optimize-sibling-calls -# old style option for packed stacks -ifeq ($(call cc-option-yn,-mkernel-backchain),y) -cflags-$(CONFIG_PACK_STACK) += -mkernel-backchain -D__PACK_STACK -aflags-$(CONFIG_PACK_STACK) += -D__PACK_STACK -endif - -# new style option for packed stacks ifeq ($(call cc-option-yn,-mpacked-stack),y) cflags-$(CONFIG_PACK_STACK) += -mpacked-stack -D__PACK_STACK aflags-$(CONFIG_PACK_STACK) += -D__PACK_STACK endif +KBUILD_AFLAGS_DECOMPRESSOR += $(aflags-y) +KBUILD_CFLAGS_DECOMPRESSOR += $(cflags-y) + ifeq ($(call cc-option-yn,-mstack-size=8192 -mstack-guard=128),y) cflags-$(CONFIG_CHECK_STACK) += -mstack-size=$(STACK_SIZE) ifneq ($(call cc-option-yn,-mstack-size=8192),y) @@ -71,8 +77,11 @@ cflags-$(CONFIG_CHECK_STACK) += -mstack-guard=$(CONFIG_STACK_GUARD) endif endif -ifeq ($(call cc-option-yn,-mwarn-dynamicstack),y) -cflags-$(CONFIG_WARN_DYNAMIC_STACK) += -mwarn-dynamicstack +ifdef CONFIG_WARN_DYNAMIC_STACK + ifeq ($(call cc-option-yn,-mwarn-dynamicstack),y) + KBUILD_CFLAGS += -mwarn-dynamicstack + KBUILD_CFLAGS_DECOMPRESSOR += -mwarn-dynamicstack + endif endif ifdef CONFIG_EXPOLINE @@ -82,6 +91,7 @@ ifdef CONFIG_EXPOLINE CC_FLAGS_EXPOLINE += -mindirect-branch-table export CC_FLAGS_EXPOLINE cflags-y += $(CC_FLAGS_EXPOLINE) -DCC_USING_EXPOLINE + aflags-y += -DCC_USING_EXPOLINE endif endif @@ -102,11 +112,12 @@ KBUILD_CFLAGS += -mbackchain -msoft-float $(cflags-y) KBUILD_CFLAGS += -pipe -fno-strength-reduce -Wno-sign-compare KBUILD_CFLAGS += -fno-asynchronous-unwind-tables $(cfi) KBUILD_AFLAGS += $(aflags-y) $(cfi) +export KBUILD_AFLAGS_DECOMPRESSOR +export KBUILD_CFLAGS_DECOMPRESSOR OBJCOPYFLAGS := -O binary -head-y := arch/s390/kernel/head.o -head-y += arch/s390/kernel/head64.o +head-y := arch/s390/kernel/head64.o # See arch/s390/Kbuild for content of core part of the kernel core-y += arch/s390/ @@ -121,7 +132,7 @@ boot := arch/s390/boot syscalls := arch/s390/kernel/syscalls tools := arch/s390/tools -all: image bzImage +all: bzImage #KBUILD_IMAGE is necessary for packaging targets like rpm-pkg, deb-pkg... KBUILD_IMAGE := $(boot)/bzImage @@ -129,7 +140,7 @@ KBUILD_IMAGE := $(boot)/bzImage install: vmlinux $(Q)$(MAKE) $(build)=$(boot) $@ -image bzImage: vmlinux +bzImage: vmlinux $(Q)$(MAKE) $(build)=$(boot) $(boot)/$@ zfcpdump: @@ -152,8 +163,7 @@ archprepare: # Don't use tabs in echo arguments define archhelp - echo '* image - Kernel image for IPL ($(boot)/image)' - echo '* bzImage - Compressed kernel image for IPL ($(boot)/bzImage)' + echo '* bzImage - Kernel image for IPL ($(boot)/bzImage)' echo ' install - Install kernel using' echo ' (your) ~/bin/$(INSTALLKERNEL) or' echo ' (distribution) /sbin/$(INSTALLKERNEL) or' diff --git a/arch/s390/appldata/appldata_base.c b/arch/s390/appldata/appldata_base.c index ee6a9c387c87..9bf8489df6e6 100644 --- a/arch/s390/appldata/appldata_base.c +++ b/arch/s390/appldata/appldata_base.c @@ -206,35 +206,28 @@ static int appldata_timer_handler(struct ctl_table *ctl, int write, void __user *buffer, size_t *lenp, loff_t *ppos) { - unsigned int len; - char buf[2]; + int timer_active = appldata_timer_active; + int zero = 0; + int one = 1; + int rc; + struct ctl_table ctl_entry = { + .procname = ctl->procname, + .data = &timer_active, + .maxlen = sizeof(int), + .extra1 = &zero, + .extra2 = &one, + }; + + rc = proc_douintvec_minmax(&ctl_entry, write, buffer, lenp, ppos); + if (rc < 0 || !write) + return rc; - if (!*lenp || *ppos) { - *lenp = 0; - return 0; - } - if (!write) { - strncpy(buf, appldata_timer_active ? "1\n" : "0\n", - ARRAY_SIZE(buf)); - len = strnlen(buf, ARRAY_SIZE(buf)); - if (len > *lenp) - len = *lenp; - if (copy_to_user(buffer, buf, len)) - return -EFAULT; - goto out; - } - len = *lenp; - if (copy_from_user(buf, buffer, len > sizeof(buf) ? sizeof(buf) : len)) - return -EFAULT; spin_lock(&appldata_timer_lock); - if (buf[0] == '1') + if (timer_active) __appldata_vtimer_setup(APPLDATA_ADD_TIMER); - else if (buf[0] == '0') + else __appldata_vtimer_setup(APPLDATA_DEL_TIMER); spin_unlock(&appldata_timer_lock); -out: - *lenp = len; - *ppos += len; return 0; } @@ -248,37 +241,24 @@ static int appldata_interval_handler(struct ctl_table *ctl, int write, void __user *buffer, size_t *lenp, loff_t *ppos) { - unsigned int len; - int interval; - char buf[16]; + int interval = appldata_interval; + int one = 1; + int rc; + struct ctl_table ctl_entry = { + .procname = ctl->procname, + .data = &interval, + .maxlen = sizeof(int), + .extra1 = &one, + }; - if (!*lenp || *ppos) { - *lenp = 0; - return 0; - } - if (!write) { - len = sprintf(buf, "%i\n", appldata_interval); - if (len > *lenp) - len = *lenp; - if (copy_to_user(buffer, buf, len)) - return -EFAULT; - goto out; - } - len = *lenp; - if (copy_from_user(buf, buffer, len > sizeof(buf) ? sizeof(buf) : len)) - return -EFAULT; - interval = 0; - sscanf(buf, "%i", &interval); - if (interval <= 0) - return -EINVAL; + rc = proc_dointvec_minmax(&ctl_entry, write, buffer, lenp, ppos); + if (rc < 0 || !write) + return rc; spin_lock(&appldata_timer_lock); appldata_interval = interval; __appldata_vtimer_setup(APPLDATA_MOD_TIMER); spin_unlock(&appldata_timer_lock); -out: - *lenp = len; - *ppos += len; return 0; } @@ -293,10 +273,17 @@ appldata_generic_handler(struct ctl_table *ctl, int write, void __user *buffer, size_t *lenp, loff_t *ppos) { struct appldata_ops *ops = NULL, *tmp_ops; - unsigned int len; - int rc, found; - char buf[2]; struct list_head *lh; + int rc, found; + int active; + int zero = 0; + int one = 1; + struct ctl_table ctl_entry = { + .data = &active, + .maxlen = sizeof(int), + .extra1 = &zero, + .extra2 = &one, + }; found = 0; mutex_lock(&appldata_ops_mutex); @@ -317,31 +304,15 @@ appldata_generic_handler(struct ctl_table *ctl, int write, } mutex_unlock(&appldata_ops_mutex); - if (!*lenp || *ppos) { - *lenp = 0; + active = ops->active; + rc = proc_douintvec_minmax(&ctl_entry, write, buffer, lenp, ppos); + if (rc < 0 || !write) { module_put(ops->owner); - return 0; - } - if (!write) { - strncpy(buf, ops->active ? "1\n" : "0\n", ARRAY_SIZE(buf)); - len = strnlen(buf, ARRAY_SIZE(buf)); - if (len > *lenp) - len = *lenp; - if (copy_to_user(buffer, buf, len)) { - module_put(ops->owner); - return -EFAULT; - } - goto out; - } - len = *lenp; - if (copy_from_user(buf, buffer, - len > sizeof(buf) ? sizeof(buf) : len)) { - module_put(ops->owner); - return -EFAULT; + return rc; } mutex_lock(&appldata_ops_mutex); - if ((buf[0] == '1') && (ops->active == 0)) { + if (active && (ops->active == 0)) { // protect work queue callback if (!try_module_get(ops->owner)) { mutex_unlock(&appldata_ops_mutex); @@ -359,7 +330,7 @@ appldata_generic_handler(struct ctl_table *ctl, int write, module_put(ops->owner); } else ops->active = 1; - } else if ((buf[0] == '0') && (ops->active == 1)) { + } else if (!active && (ops->active == 1)) { ops->active = 0; rc = appldata_diag(ops->record_nr, APPLDATA_STOP_REC, (unsigned long) ops->data, ops->size, @@ -370,9 +341,6 @@ appldata_generic_handler(struct ctl_table *ctl, int write, module_put(ops->owner); } mutex_unlock(&appldata_ops_mutex); -out: - *lenp = len; - *ppos += len; module_put(ops->owner); return 0; } diff --git a/arch/s390/boot/Makefile b/arch/s390/boot/Makefile index d1fa37fcce83..9e6668ee93de 100644 --- a/arch/s390/boot/Makefile +++ b/arch/s390/boot/Makefile @@ -3,19 +3,52 @@ # Makefile for the linux s390-specific parts of the memory manager. # -targets := image -targets += bzImage -subdir- := compressed +KCOV_INSTRUMENT := n +GCOV_PROFILE := n +UBSAN_SANITIZE := n -$(obj)/image: vmlinux FORCE - $(call if_changed,objcopy) +KBUILD_AFLAGS := $(KBUILD_AFLAGS_DECOMPRESSOR) +KBUILD_CFLAGS := $(KBUILD_CFLAGS_DECOMPRESSOR) + +# +# Use -march=z900 for als.c to be able to print an error +# message if the kernel is started on a machine which is too old +# +ifneq ($(CC_FLAGS_MARCH),-march=z900) +AFLAGS_REMOVE_head.o += $(CC_FLAGS_MARCH) +AFLAGS_head.o += -march=z900 +AFLAGS_REMOVE_mem.o += $(CC_FLAGS_MARCH) +AFLAGS_mem.o += -march=z900 +CFLAGS_REMOVE_als.o += $(CC_FLAGS_MARCH) +CFLAGS_als.o += -march=z900 +CFLAGS_REMOVE_sclp_early_core.o += $(CC_FLAGS_MARCH) +CFLAGS_sclp_early_core.o += -march=z900 +endif + +CFLAGS_sclp_early_core.o += -I$(srctree)/drivers/s390/char + +obj-y := head.o als.o ebcdic.o sclp_early_core.o mem.o +targets := bzImage startup.a $(obj-y) +subdir- := compressed + +OBJECTS := $(addprefix $(obj)/,$(obj-y)) $(obj)/bzImage: $(obj)/compressed/vmlinux FORCE $(call if_changed,objcopy) -$(obj)/compressed/vmlinux: FORCE +$(obj)/compressed/vmlinux: $(obj)/startup.a FORCE $(Q)$(MAKE) $(build)=$(obj)/compressed $@ +quiet_cmd_ar = AR $@ + cmd_ar = rm -f $@; $(AR) rcsTP$(KBUILD_ARFLAGS) $@ $(filter $(OBJECTS), $^) + +$(obj)/startup.a: $(OBJECTS) FORCE + $(call if_changed,ar) + install: $(CONFIGURE) $(obj)/bzImage sh -x $(srctree)/$(obj)/install.sh $(KERNELRELEASE) $(obj)/bzImage \ System.map "$(INSTALL_PATH)" + +chkbss := $(OBJECTS) +chkbss-target := $(obj)/startup.a +include $(srctree)/arch/s390/scripts/Makefile.chkbss diff --git a/arch/s390/kernel/als.c b/arch/s390/boot/als.c index d1892bf36cab..d592e0d90d9f 100644 --- a/arch/s390/kernel/als.c +++ b/arch/s390/boot/als.c @@ -3,12 +3,10 @@ * Copyright IBM Corp. 2016 */ #include <linux/kernel.h> -#include <linux/init.h> #include <asm/processor.h> #include <asm/facility.h> #include <asm/lowcore.h> #include <asm/sclp.h> -#include "entry.h" /* * The code within this file will be called very early. It may _not_ @@ -18,9 +16,9 @@ * For temporary objects the stack (16k) should be used. */ -static unsigned long als[] __initdata = { FACILITIES_ALS }; +static unsigned long als[] = { FACILITIES_ALS }; -static void __init u16_to_hex(char *str, u16 val) +static void u16_to_hex(char *str, u16 val) { int i, num; @@ -33,9 +31,9 @@ static void __init u16_to_hex(char *str, u16 val) *str = '\0'; } -static void __init print_machine_type(void) +static void print_machine_type(void) { - static char mach_str[80] __initdata = "Detected machine-type number: "; + static char mach_str[80] = "Detected machine-type number: "; char type_str[5]; struct cpuid id; @@ -46,7 +44,7 @@ static void __init print_machine_type(void) sclp_early_printk(mach_str); } -static void __init u16_to_decimal(char *str, u16 val) +static void u16_to_decimal(char *str, u16 val) { int div = 1; @@ -60,9 +58,9 @@ static void __init u16_to_decimal(char *str, u16 val) *str = '\0'; } -static void __init print_missing_facilities(void) +static void print_missing_facilities(void) { - static char als_str[80] __initdata = "Missing facilities: "; + static char als_str[80] = "Missing facilities: "; unsigned long val; char val_str[6]; int i, j, first; @@ -95,7 +93,7 @@ static void __init print_missing_facilities(void) sclp_early_printk("See Principles of Operations for facility bits\n"); } -static void __init facility_mismatch(void) +static void facility_mismatch(void) { sclp_early_printk("The Linux kernel requires more recent processor hardware\n"); print_machine_type(); @@ -103,7 +101,7 @@ static void __init facility_mismatch(void) disabled_wait(0x8badcccc); } -void __init verify_facilities(void) +void verify_facilities(void) { int i; diff --git a/arch/s390/boot/compressed/.gitignore b/arch/s390/boot/compressed/.gitignore index 2088cc140629..45aeb4f08752 100644 --- a/arch/s390/boot/compressed/.gitignore +++ b/arch/s390/boot/compressed/.gitignore @@ -1,4 +1,5 @@ sizes.h vmlinux vmlinux.lds +vmlinux.scr.lds vmlinux.bin.full diff --git a/arch/s390/boot/compressed/Makefile b/arch/s390/boot/compressed/Makefile index 5766f7b9b271..04609478d18b 100644 --- a/arch/s390/boot/compressed/Makefile +++ b/arch/s390/boot/compressed/Makefile @@ -6,39 +6,29 @@ # KCOV_INSTRUMENT := n +GCOV_PROFILE := n +UBSAN_SANITIZE := n +obj-y := $(if $(CONFIG_KERNEL_UNCOMPRESSED),,head.o misc.o) piggy.o targets := vmlinux.lds vmlinux vmlinux.bin vmlinux.bin.gz vmlinux.bin.bz2 targets += vmlinux.bin.xz vmlinux.bin.lzma vmlinux.bin.lzo vmlinux.bin.lz4 -targets += misc.o piggy.o sizes.h head.o - -KBUILD_CFLAGS := -m64 -D__KERNEL__ -O2 -KBUILD_CFLAGS += -DDISABLE_BRANCH_PROFILING -D__NO_FORTIFY -KBUILD_CFLAGS += $(cflags-y) -fno-delete-null-pointer-checks -msoft-float -KBUILD_CFLAGS += -fno-asynchronous-unwind-tables -KBUILD_CFLAGS += $(call cc-option,-mpacked-stack) -KBUILD_CFLAGS += $(call cc-option,-ffreestanding) +targets += vmlinux.scr.lds $(obj-y) $(if $(CONFIG_KERNEL_UNCOMPRESSED),,sizes.h) -GCOV_PROFILE := n -UBSAN_SANITIZE := n +KBUILD_AFLAGS := $(KBUILD_AFLAGS_DECOMPRESSOR) +KBUILD_CFLAGS := $(KBUILD_CFLAGS_DECOMPRESSOR) -OBJECTS := $(addprefix $(objtree)/arch/s390/kernel/, head.o ebcdic.o als.o) -OBJECTS += $(objtree)/drivers/s390/char/sclp_early_core.o -OBJECTS += $(obj)/head.o $(obj)/misc.o $(obj)/piggy.o +OBJECTS := $(addprefix $(obj)/,$(obj-y)) LDFLAGS_vmlinux := --oformat $(LD_BFD) -e startup -T -$(obj)/vmlinux: $(obj)/vmlinux.lds $(OBJECTS) +$(obj)/vmlinux: $(obj)/vmlinux.lds $(objtree)/arch/s390/boot/startup.a $(OBJECTS) $(call if_changed,ld) -TRIM_HEAD_SIZE := 0x11000 - -sed-sizes := -e 's/^\([0-9a-fA-F]*\) . \(__bss_start\|_end\)$$/\#define SZ\2 (0x\1 - $(TRIM_HEAD_SIZE))/p' +# extract required uncompressed vmlinux symbols and adjust them to reflect offsets inside vmlinux.bin +sed-sizes := -e 's/^\([0-9a-fA-F]*\) . \(__bss_start\|_end\)$$/\#define SZ\2 (0x\1 - 0x100000)/p' quiet_cmd_sizes = GEN $@ cmd_sizes = $(NM) $< | sed -n $(sed-sizes) > $@ -quiet_cmd_trim_head = TRIM $@ - cmd_trim_head = tail -c +$$(($(TRIM_HEAD_SIZE) + 1)) $< > $@ - $(obj)/sizes.h: vmlinux $(call if_changed,sizes) @@ -48,21 +38,18 @@ $(obj)/head.o: $(obj)/sizes.h CFLAGS_misc.o += -I$(objtree)/$(obj) $(obj)/misc.o: $(obj)/sizes.h -OBJCOPYFLAGS_vmlinux.bin.full := -R .comment -S -$(obj)/vmlinux.bin.full: vmlinux +OBJCOPYFLAGS_vmlinux.bin := -R .comment -S +$(obj)/vmlinux.bin: vmlinux $(call if_changed,objcopy) -$(obj)/vmlinux.bin: $(obj)/vmlinux.bin.full - $(call if_changed,trim_head) - vmlinux.bin.all-y := $(obj)/vmlinux.bin -suffix-$(CONFIG_KERNEL_GZIP) := gz -suffix-$(CONFIG_KERNEL_BZIP2) := bz2 -suffix-$(CONFIG_KERNEL_LZ4) := lz4 -suffix-$(CONFIG_KERNEL_LZMA) := lzma -suffix-$(CONFIG_KERNEL_LZO) := lzo -suffix-$(CONFIG_KERNEL_XZ) := xz +suffix-$(CONFIG_KERNEL_GZIP) := .gz +suffix-$(CONFIG_KERNEL_BZIP2) := .bz2 +suffix-$(CONFIG_KERNEL_LZ4) := .lz4 +suffix-$(CONFIG_KERNEL_LZMA) := .lzma +suffix-$(CONFIG_KERNEL_LZO) := .lzo +suffix-$(CONFIG_KERNEL_XZ) := .xz $(obj)/vmlinux.bin.gz: $(vmlinux.bin.all-y) $(call if_changed,gzip) @@ -78,5 +65,9 @@ $(obj)/vmlinux.bin.xz: $(vmlinux.bin.all-y) $(call if_changed,xzkern) LDFLAGS_piggy.o := -r --format binary --oformat $(LD_BFD) -T -$(obj)/piggy.o: $(obj)/vmlinux.scr $(obj)/vmlinux.bin.$(suffix-y) +$(obj)/piggy.o: $(obj)/vmlinux.scr.lds $(obj)/vmlinux.bin$(suffix-y) $(call if_changed,ld) + +chkbss := $(filter-out $(obj)/misc.o $(obj)/piggy.o,$(OBJECTS)) +chkbss-target := $(obj)/vmlinux.bin +include $(srctree)/arch/s390/scripts/Makefile.chkbss diff --git a/arch/s390/boot/compressed/head.S b/arch/s390/boot/compressed/head.S index 9f94eca0f467..df8dbbc17bcc 100644 --- a/arch/s390/boot/compressed/head.S +++ b/arch/s390/boot/compressed/head.S @@ -15,7 +15,7 @@ #include "sizes.h" __HEAD -ENTRY(startup_continue) +ENTRY(startup_decompressor) basr %r13,0 # get base .LPG1: # setup stack @@ -23,7 +23,7 @@ ENTRY(startup_continue) aghi %r15,-160 brasl %r14,decompress_kernel # Set up registers for memory mover. We move the decompressed image to - # 0x11000, where startup_continue of the decompressed image is supposed + # 0x100000, where startup_continue of the decompressed image is supposed # to be. lgr %r4,%r2 lg %r2,.Loffset-.LPG1(%r13) @@ -33,7 +33,7 @@ ENTRY(startup_continue) la %r1,0x200 mvc 0(mover_end-mover,%r1),mover-.LPG1(%r13) # When the memory mover is done we pass control to - # arch/s390/kernel/head64.S:startup_continue which lives at 0x11000 in + # arch/s390/kernel/head64.S:startup_continue which lives at 0x100000 in # the decompressed image. lgr %r6,%r2 br %r1 @@ -47,6 +47,6 @@ mover_end: .Lstack: .quad 0x8000 + (1<<(PAGE_SHIFT+THREAD_SIZE_ORDER)) .Loffset: - .quad 0x11000 + .quad 0x100000 .Lmvsize: .quad SZ__bss_start diff --git a/arch/s390/boot/compressed/misc.c b/arch/s390/boot/compressed/misc.c index 511b2cc9b91a..f66ad73c205b 100644 --- a/arch/s390/boot/compressed/misc.c +++ b/arch/s390/boot/compressed/misc.c @@ -71,43 +71,6 @@ static int puts(const char *s) return 0; } -void *memset(void *s, int c, size_t n) -{ - char *xs; - - xs = s; - while (n--) - *xs++ = c; - return s; -} - -void *memcpy(void *dest, const void *src, size_t n) -{ - const char *s = src; - char *d = dest; - - while (n--) - *d++ = *s++; - return dest; -} - -void *memmove(void *dest, const void *src, size_t n) -{ - const char *s = src; - char *d = dest; - - if (d <= s) { - while (n--) - *d++ = *s++; - } else { - d += n; - s += n; - while (n--) - *--d = *--s; - } - return dest; -} - static void error(char *x) { unsigned long long psw = 0x000a0000deadbeefULL; diff --git a/arch/s390/boot/compressed/vmlinux.lds.S b/arch/s390/boot/compressed/vmlinux.lds.S index d43c2db12d30..b16ac8b3c439 100644 --- a/arch/s390/boot/compressed/vmlinux.lds.S +++ b/arch/s390/boot/compressed/vmlinux.lds.S @@ -23,13 +23,10 @@ SECTIONS *(.text.*) _etext = . ; } - .rodata.compressed : { - *(.rodata.compressed) - } .rodata : { _rodata = . ; *(.rodata) /* read-only data */ - *(.rodata.*) + *(EXCLUDE_FILE (*piggy.o) .rodata.compressed) _erodata = . ; } .data : { @@ -38,6 +35,15 @@ SECTIONS *(.data.*) _edata = . ; } + startup_continue = 0x100000; +#ifdef CONFIG_KERNEL_UNCOMPRESSED + . = 0x100000; +#else + . = ALIGN(8); +#endif + .rodata.compressed : { + *(.rodata.compressed) + } . = ALIGN(256); .bss : { _bss = . ; @@ -54,5 +60,6 @@ SECTIONS *(.eh_frame) *(__ex_table) *(*__ksymtab*) + *(___kcrctab*) } } diff --git a/arch/s390/boot/compressed/vmlinux.scr b/arch/s390/boot/compressed/vmlinux.scr.lds.S index 42a242597f34..ff01d18c9222 100644 --- a/arch/s390/boot/compressed/vmlinux.scr +++ b/arch/s390/boot/compressed/vmlinux.scr.lds.S @@ -2,10 +2,14 @@ SECTIONS { .rodata.compressed : { +#ifndef CONFIG_KERNEL_UNCOMPRESSED input_len = .; LONG(input_data_end - input_data) input_data = .; +#endif *(.data) +#ifndef CONFIG_KERNEL_UNCOMPRESSED output_len = . - 4; input_data_end = .; +#endif } } diff --git a/arch/s390/boot/ebcdic.c b/arch/s390/boot/ebcdic.c new file mode 100644 index 000000000000..7391e7d36086 --- /dev/null +++ b/arch/s390/boot/ebcdic.c @@ -0,0 +1,2 @@ +// SPDX-License-Identifier: GPL-2.0 +#include "../kernel/ebcdic.c" diff --git a/arch/s390/kernel/head.S b/arch/s390/boot/head.S index 5c42f16a54c4..f721913b73f1 100644 --- a/arch/s390/kernel/head.S +++ b/arch/s390/boot/head.S @@ -272,14 +272,14 @@ iplstart: .org 0x10000 ENTRY(startup) j .Lep_startup_normal - .org 0x10008 + .org EP_OFFSET # # This is a list of s390 kernel entry points. At address 0x1000f the number of # valid entry points is stored. # # IMPORTANT: Do not change this table, it is s390 kernel ABI! # - .ascii "S390EP" + .ascii EP_STRING .byte 0x00,0x01 # # kdump startup-code at 0x10010, running in 64 bit absolute addressing mode @@ -310,10 +310,11 @@ ENTRY(startup_kdump) l %r15,.Lstack-.LPG0(%r13) ahi %r15,-STACK_FRAME_OVERHEAD brasl %r14,verify_facilities -# For uncompressed images, continue in -# arch/s390/kernel/head64.S. For compressed images, continue in -# arch/s390/boot/compressed/head.S. +#ifdef CONFIG_KERNEL_UNCOMPRESSED jg startup_continue +#else + jg startup_decompressor +#endif .Lstack: .long 0x8000 + (1<<(PAGE_SHIFT+THREAD_SIZE_ORDER)) diff --git a/arch/s390/kernel/head_kdump.S b/arch/s390/boot/head_kdump.S index 174d6959bf5b..174d6959bf5b 100644 --- a/arch/s390/kernel/head_kdump.S +++ b/arch/s390/boot/head_kdump.S diff --git a/arch/s390/boot/mem.S b/arch/s390/boot/mem.S new file mode 100644 index 000000000000..b33463633f03 --- /dev/null +++ b/arch/s390/boot/mem.S @@ -0,0 +1,2 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +#include "../lib/mem.S" diff --git a/arch/s390/boot/sclp_early_core.c b/arch/s390/boot/sclp_early_core.c new file mode 100644 index 000000000000..5a19fd7020b5 --- /dev/null +++ b/arch/s390/boot/sclp_early_core.c @@ -0,0 +1,2 @@ +// SPDX-License-Identifier: GPL-2.0 +#include "../../../drivers/s390/char/sclp_early_core.c" diff --git a/arch/s390/hypfs/hypfs_diag.c b/arch/s390/hypfs/hypfs_diag.c index a2945b289a29..3452e18bb1ca 100644 --- a/arch/s390/hypfs/hypfs_diag.c +++ b/arch/s390/hypfs/hypfs_diag.c @@ -497,7 +497,7 @@ static int hypfs_create_cpu_files(struct dentry *cpus_dir, void *cpu_info) } diag224_idx2name(cpu_info__ctidx(diag204_info_type, cpu_info), buffer); rc = hypfs_create_str(cpu_dir, "type", buffer); - return PTR_RET(rc); + return PTR_ERR_OR_ZERO(rc); } static void *hypfs_create_lpar_files(struct dentry *systems_dir, void *part_hdr) @@ -544,7 +544,7 @@ static int hypfs_create_phys_cpu_files(struct dentry *cpus_dir, void *cpu_info) return PTR_ERR(rc); diag224_idx2name(phys_cpu__ctidx(diag204_info_type, cpu_info), buffer); rc = hypfs_create_str(cpu_dir, "type", buffer); - return PTR_RET(rc); + return PTR_ERR_OR_ZERO(rc); } static void *hypfs_create_phys_files(struct dentry *parent_dir, void *phys_hdr) diff --git a/arch/s390/hypfs/inode.c b/arch/s390/hypfs/inode.c index 06b513d192b9..c681329fdeec 100644 --- a/arch/s390/hypfs/inode.c +++ b/arch/s390/hypfs/inode.c @@ -36,7 +36,7 @@ struct hypfs_sb_info { kuid_t uid; /* uid used for files and dirs */ kgid_t gid; /* gid used for files and dirs */ struct dentry *update_file; /* file to trigger update */ - time_t last_update; /* last update time in secs since 1970 */ + time64_t last_update; /* last update, CLOCK_MONOTONIC time */ struct mutex lock; /* lock to protect update process */ }; @@ -52,7 +52,7 @@ static void hypfs_update_update(struct super_block *sb) struct hypfs_sb_info *sb_info = sb->s_fs_info; struct inode *inode = d_inode(sb_info->update_file); - sb_info->last_update = get_seconds(); + sb_info->last_update = ktime_get_seconds(); inode->i_atime = inode->i_mtime = inode->i_ctime = current_time(inode); } @@ -179,7 +179,7 @@ static ssize_t hypfs_write_iter(struct kiocb *iocb, struct iov_iter *from) * to restart data collection in this case. */ mutex_lock(&fs_info->lock); - if (fs_info->last_update == get_seconds()) { + if (fs_info->last_update == ktime_get_seconds()) { rc = -EBUSY; goto out; } diff --git a/arch/s390/include/asm/ap.h b/arch/s390/include/asm/ap.h index c1bedb4c8de0..046e044a48d0 100644 --- a/arch/s390/include/asm/ap.h +++ b/arch/s390/include/asm/ap.h @@ -47,6 +47,50 @@ struct ap_queue_status { }; /** + * ap_intructions_available() - Test if AP instructions are available. + * + * Returns 0 if the AP instructions are installed. + */ +static inline int ap_instructions_available(void) +{ + register unsigned long reg0 asm ("0") = AP_MKQID(0, 0); + register unsigned long reg1 asm ("1") = -ENODEV; + register unsigned long reg2 asm ("2"); + + asm volatile( + " .long 0xb2af0000\n" /* PQAP(TAPQ) */ + "0: la %0,0\n" + "1:\n" + EX_TABLE(0b, 1b) + : "+d" (reg1), "=d" (reg2) + : "d" (reg0) + : "cc"); + return reg1; +} + +/** + * ap_tapq(): Test adjunct processor queue. + * @qid: The AP queue number + * @info: Pointer to queue descriptor + * + * Returns AP queue status structure. + */ +static inline struct ap_queue_status ap_tapq(ap_qid_t qid, unsigned long *info) +{ + register unsigned long reg0 asm ("0") = qid; + register struct ap_queue_status reg1 asm ("1"); + register unsigned long reg2 asm ("2"); + + asm volatile(".long 0xb2af0000" /* PQAP(TAPQ) */ + : "=d" (reg1), "=d" (reg2) + : "d" (reg0) + : "cc"); + if (info) + *info = reg2; + return reg1; +} + +/** * ap_test_queue(): Test adjunct processor queue. * @qid: The AP queue number * @tbit: Test facilities bit @@ -54,10 +98,57 @@ struct ap_queue_status { * * Returns AP queue status structure. */ -struct ap_queue_status ap_test_queue(ap_qid_t qid, - int tbit, - unsigned long *info); +static inline struct ap_queue_status ap_test_queue(ap_qid_t qid, + int tbit, + unsigned long *info) +{ + if (tbit) + qid |= 1UL << 23; /* set T bit*/ + return ap_tapq(qid, info); +} +/** + * ap_pqap_rapq(): Reset adjunct processor queue. + * @qid: The AP queue number + * + * Returns AP queue status structure. + */ +static inline struct ap_queue_status ap_rapq(ap_qid_t qid) +{ + register unsigned long reg0 asm ("0") = qid | (1UL << 24); + register struct ap_queue_status reg1 asm ("1"); + + asm volatile( + ".long 0xb2af0000" /* PQAP(RAPQ) */ + : "=d" (reg1) + : "d" (reg0) + : "cc"); + return reg1; +} + +/** + * ap_pqap_zapq(): Reset and zeroize adjunct processor queue. + * @qid: The AP queue number + * + * Returns AP queue status structure. + */ +static inline struct ap_queue_status ap_zapq(ap_qid_t qid) +{ + register unsigned long reg0 asm ("0") = qid | (2UL << 24); + register struct ap_queue_status reg1 asm ("1"); + + asm volatile( + ".long 0xb2af0000" /* PQAP(ZAPQ) */ + : "=d" (reg1) + : "d" (reg0) + : "cc"); + return reg1; +} + +/** + * struct ap_config_info - convenience struct for AP crypto + * config info as returned by the ap_qci() function. + */ struct ap_config_info { unsigned int apsc : 1; /* S bit */ unsigned int apxa : 1; /* N bit */ @@ -74,50 +165,189 @@ struct ap_config_info { unsigned char _reserved4[16]; } __aligned(8); -/* - * ap_query_configuration(): Fetch cryptographic config info +/** + * ap_qci(): Get AP configuration data * - * Returns the ap configuration info fetched via PQAP(QCI). - * On success 0 is returned, on failure a negative errno - * is returned, e.g. if the PQAP(QCI) instruction is not - * available, the return value will be -EOPNOTSUPP. + * Returns 0 on success, or -EOPNOTSUPP. */ -int ap_query_configuration(struct ap_config_info *info); +static inline int ap_qci(struct ap_config_info *config) +{ + register unsigned long reg0 asm ("0") = 4UL << 24; + register unsigned long reg1 asm ("1") = -EOPNOTSUPP; + register struct ap_config_info *reg2 asm ("2") = config; + + asm volatile( + ".long 0xb2af0000\n" /* PQAP(QCI) */ + "0: la %0,0\n" + "1:\n" + EX_TABLE(0b, 1b) + : "+d" (reg1) + : "d" (reg0), "d" (reg2) + : "cc", "memory"); + + return reg1; +} /* * struct ap_qirq_ctrl - convenient struct for easy invocation - * of the ap_queue_irq_ctrl() function. This struct is passed - * as GR1 parameter to the PQAP(AQIC) instruction. For details - * please see the AR documentation. + * of the ap_aqic() function. This struct is passed as GR1 + * parameter to the PQAP(AQIC) instruction. For details please + * see the AR documentation. */ struct ap_qirq_ctrl { unsigned int _res1 : 8; - unsigned int zone : 8; /* zone info */ - unsigned int ir : 1; /* ir flag: enable (1) or disable (0) irq */ + unsigned int zone : 8; /* zone info */ + unsigned int ir : 1; /* ir flag: enable (1) or disable (0) irq */ unsigned int _res2 : 4; - unsigned int gisc : 3; /* guest isc field */ + unsigned int gisc : 3; /* guest isc field */ unsigned int _res3 : 6; - unsigned int gf : 2; /* gisa format */ + unsigned int gf : 2; /* gisa format */ unsigned int _res4 : 1; - unsigned int gisa : 27; /* gisa origin */ + unsigned int gisa : 27; /* gisa origin */ unsigned int _res5 : 1; - unsigned int isc : 3; /* irq sub class */ + unsigned int isc : 3; /* irq sub class */ }; /** - * ap_queue_irq_ctrl(): Control interruption on a AP queue. + * ap_aqic(): Control interruption for a specific AP. * @qid: The AP queue number - * @qirqctrl: struct ap_qirq_ctrl, see above + * @qirqctrl: struct ap_qirq_ctrl (64 bit value) * @ind: The notification indicator byte * * Returns AP queue status. + */ +static inline struct ap_queue_status ap_aqic(ap_qid_t qid, + struct ap_qirq_ctrl qirqctrl, + void *ind) +{ + register unsigned long reg0 asm ("0") = qid | (3UL << 24); + register struct ap_qirq_ctrl reg1_in asm ("1") = qirqctrl; + register struct ap_queue_status reg1_out asm ("1"); + register void *reg2 asm ("2") = ind; + + asm volatile( + ".long 0xb2af0000" /* PQAP(AQIC) */ + : "=d" (reg1_out) + : "d" (reg0), "d" (reg1_in), "d" (reg2) + : "cc"); + return reg1_out; +} + +/* + * union ap_qact_ap_info - used together with the + * ap_aqic() function to provide a convenient way + * to handle the ap info needed by the qact function. + */ +union ap_qact_ap_info { + unsigned long val; + struct { + unsigned int : 3; + unsigned int mode : 3; + unsigned int : 26; + unsigned int cat : 8; + unsigned int : 8; + unsigned char ver[2]; + }; +}; + +/** + * ap_qact(): Query AP combatibility type. + * @qid: The AP queue number + * @apinfo: On input the info about the AP queue. On output the + * alternate AP queue info provided by the qact function + * in GR2 is stored in. * - * Control interruption on the given AP queue. - * Just a simple wrapper function for the low level PQAP(AQIC) - * instruction available for other kernel modules. + * Returns AP queue status. Check response_code field for failures. */ -struct ap_queue_status ap_queue_irq_ctrl(ap_qid_t qid, - struct ap_qirq_ctrl qirqctrl, - void *ind); +static inline struct ap_queue_status ap_qact(ap_qid_t qid, int ifbit, + union ap_qact_ap_info *apinfo) +{ + register unsigned long reg0 asm ("0") = qid | (5UL << 24) + | ((ifbit & 0x01) << 22); + register unsigned long reg1_in asm ("1") = apinfo->val; + register struct ap_queue_status reg1_out asm ("1"); + register unsigned long reg2 asm ("2"); + + asm volatile( + ".long 0xb2af0000" /* PQAP(QACT) */ + : "+d" (reg1_in), "=d" (reg1_out), "=d" (reg2) + : "d" (reg0) + : "cc"); + apinfo->val = reg2; + return reg1_out; +} + +/** + * ap_nqap(): Send message to adjunct processor queue. + * @qid: The AP queue number + * @psmid: The program supplied message identifier + * @msg: The message text + * @length: The message length + * + * Returns AP queue status structure. + * Condition code 1 on NQAP can't happen because the L bit is 1. + * Condition code 2 on NQAP also means the send is incomplete, + * because a segment boundary was reached. The NQAP is repeated. + */ +static inline struct ap_queue_status ap_nqap(ap_qid_t qid, + unsigned long long psmid, + void *msg, size_t length) +{ + register unsigned long reg0 asm ("0") = qid | 0x40000000UL; + register struct ap_queue_status reg1 asm ("1"); + register unsigned long reg2 asm ("2") = (unsigned long) msg; + register unsigned long reg3 asm ("3") = (unsigned long) length; + register unsigned long reg4 asm ("4") = (unsigned int) (psmid >> 32); + register unsigned long reg5 asm ("5") = psmid & 0xffffffff; + + asm volatile ( + "0: .long 0xb2ad0042\n" /* NQAP */ + " brc 2,0b" + : "+d" (reg0), "=d" (reg1), "+d" (reg2), "+d" (reg3) + : "d" (reg4), "d" (reg5) + : "cc", "memory"); + return reg1; +} + +/** + * ap_dqap(): Receive message from adjunct processor queue. + * @qid: The AP queue number + * @psmid: Pointer to program supplied message identifier + * @msg: The message text + * @length: The message length + * + * Returns AP queue status structure. + * Condition code 1 on DQAP means the receive has taken place + * but only partially. The response is incomplete, hence the + * DQAP is repeated. + * Condition code 2 on DQAP also means the receive is incomplete, + * this time because a segment boundary was reached. Again, the + * DQAP is repeated. + * Note that gpr2 is used by the DQAP instruction to keep track of + * any 'residual' length, in case the instruction gets interrupted. + * Hence it gets zeroed before the instruction. + */ +static inline struct ap_queue_status ap_dqap(ap_qid_t qid, + unsigned long long *psmid, + void *msg, size_t length) +{ + register unsigned long reg0 asm("0") = qid | 0x80000000UL; + register struct ap_queue_status reg1 asm ("1"); + register unsigned long reg2 asm("2") = 0UL; + register unsigned long reg4 asm("4") = (unsigned long) msg; + register unsigned long reg5 asm("5") = (unsigned long) length; + register unsigned long reg6 asm("6") = 0UL; + register unsigned long reg7 asm("7") = 0UL; + + + asm volatile( + "0: .long 0xb2ae0064\n" /* DQAP */ + " brc 6,0b\n" + : "+d" (reg0), "=d" (reg1), "+d" (reg2), + "+d" (reg4), "+d" (reg5), "+d" (reg6), "+d" (reg7) + : : "cc", "memory"); + *psmid = (((unsigned long long) reg6) << 32) + reg7; + return reg1; +} #endif /* _ASM_S390_AP_H_ */ diff --git a/arch/s390/include/asm/cpu_mf.h b/arch/s390/include/asm/cpu_mf.h index de023a9a88ca..bf2cbff926ef 100644 --- a/arch/s390/include/asm/cpu_mf.h +++ b/arch/s390/include/asm/cpu_mf.h @@ -2,7 +2,7 @@ /* * CPU-measurement facilities * - * Copyright IBM Corp. 2012 + * Copyright IBM Corp. 2012, 2018 * Author(s): Hendrik Brueckner <brueckner@linux.vnet.ibm.com> * Jan Glauber <jang@linux.vnet.ibm.com> */ @@ -139,8 +139,14 @@ struct hws_trailer_entry { unsigned char timestamp[16]; /* 16 - 31 timestamp */ unsigned long long reserved1; /* 32 -Reserved */ unsigned long long reserved2; /* */ - unsigned long long progusage1; /* 48 - reserved for programming use */ - unsigned long long progusage2; /* */ + union { /* 48 - reserved for programming use */ + struct { + unsigned int clock_base:1; /* in progusage2 */ + unsigned long long progusage1:63; + unsigned long long progusage2; + }; + unsigned long long progusage[2]; + }; } __packed; /* Load program parameter */ diff --git a/arch/s390/include/asm/gmap.h b/arch/s390/include/asm/gmap.h index e07cce88dfb0..fcbd638fb9f4 100644 --- a/arch/s390/include/asm/gmap.h +++ b/arch/s390/include/asm/gmap.h @@ -9,6 +9,14 @@ #ifndef _ASM_S390_GMAP_H #define _ASM_S390_GMAP_H +/* Generic bits for GMAP notification on DAT table entry changes. */ +#define GMAP_NOTIFY_SHADOW 0x2 +#define GMAP_NOTIFY_MPROT 0x1 + +/* Status bits only for huge segment entries */ +#define _SEGMENT_ENTRY_GMAP_IN 0x8000 /* invalidation notify bit */ +#define _SEGMENT_ENTRY_GMAP_UC 0x4000 /* dirty (migration) */ + /** * struct gmap_struct - guest address space * @list: list head for the mm->context gmap list @@ -132,4 +140,6 @@ void gmap_pte_notify(struct mm_struct *, unsigned long addr, pte_t *, int gmap_mprotect_notify(struct gmap *, unsigned long start, unsigned long len, int prot); +void gmap_sync_dirty_log_pmd(struct gmap *gmap, unsigned long dirty_bitmap[4], + unsigned long gaddr, unsigned long vmaddr); #endif /* _ASM_S390_GMAP_H */ diff --git a/arch/s390/include/asm/hugetlb.h b/arch/s390/include/asm/hugetlb.h index 9c5fc50204dd..2d1afa58a4b6 100644 --- a/arch/s390/include/asm/hugetlb.h +++ b/arch/s390/include/asm/hugetlb.h @@ -37,7 +37,10 @@ static inline int prepare_hugepage_range(struct file *file, return 0; } -#define arch_clear_hugepage_flags(page) do { } while (0) +static inline void arch_clear_hugepage_flags(struct page *page) +{ + clear_bit(PG_arch_1, &page->flags); +} static inline void huge_pte_clear(struct mm_struct *mm, unsigned long addr, pte_t *ptep, unsigned long sz) diff --git a/arch/s390/include/asm/lowcore.h b/arch/s390/include/asm/lowcore.h index 5bc888841eaf..406d940173ab 100644 --- a/arch/s390/include/asm/lowcore.h +++ b/arch/s390/include/asm/lowcore.h @@ -185,7 +185,7 @@ struct lowcore { /* Transaction abort diagnostic block */ __u8 pgm_tdb[256]; /* 0x1800 */ __u8 pad_0x1900[0x2000-0x1900]; /* 0x1900 */ -} __packed; +} __packed __aligned(8192); #define S390_lowcore (*((struct lowcore *) 0)) diff --git a/arch/s390/include/asm/mmu.h b/arch/s390/include/asm/mmu.h index f5ff9dbad8ac..f31a15044c24 100644 --- a/arch/s390/include/asm/mmu.h +++ b/arch/s390/include/asm/mmu.h @@ -24,6 +24,8 @@ typedef struct { unsigned int uses_skeys:1; /* The mmu context uses CMM. */ unsigned int uses_cmm:1; + /* The gmaps associated with this context are allowed to use huge pages. */ + unsigned int allow_gmap_hpage_1m:1; } mm_context_t; #define INIT_MM_CONTEXT(name) \ diff --git a/arch/s390/include/asm/mmu_context.h b/arch/s390/include/asm/mmu_context.h index d16bc79c30bb..0717ee76885d 100644 --- a/arch/s390/include/asm/mmu_context.h +++ b/arch/s390/include/asm/mmu_context.h @@ -32,6 +32,7 @@ static inline int init_new_context(struct task_struct *tsk, mm->context.has_pgste = 0; mm->context.uses_skeys = 0; mm->context.uses_cmm = 0; + mm->context.allow_gmap_hpage_1m = 0; #endif switch (mm->context.asce_limit) { case _REGION2_SIZE: diff --git a/arch/s390/include/asm/nospec-insn.h b/arch/s390/include/asm/nospec-insn.h index a01f81186e86..123dac3717b3 100644 --- a/arch/s390/include/asm/nospec-insn.h +++ b/arch/s390/include/asm/nospec-insn.h @@ -8,7 +8,7 @@ #ifdef __ASSEMBLY__ -#ifdef CONFIG_EXPOLINE +#ifdef CC_USING_EXPOLINE _LC_BR_R1 = __LC_BR_R1 @@ -189,7 +189,7 @@ _LC_BR_R1 = __LC_BR_R1 .macro BASR_EX rsave,rtarget,ruse=%r1 basr \rsave,\rtarget .endm -#endif +#endif /* CC_USING_EXPOLINE */ #endif /* __ASSEMBLY__ */ diff --git a/arch/s390/include/asm/pci.h b/arch/s390/include/asm/pci.h index 94f8db468c9b..10fe982f2b4b 100644 --- a/arch/s390/include/asm/pci.h +++ b/arch/s390/include/asm/pci.h @@ -51,6 +51,10 @@ struct zpci_fmb_fmt2 { u64 max_work_units; }; +struct zpci_fmb_fmt3 { + u64 tx_bytes; +}; + struct zpci_fmb { u32 format : 8; u32 fmt_ind : 24; @@ -66,6 +70,7 @@ struct zpci_fmb { struct zpci_fmb_fmt0 fmt0; struct zpci_fmb_fmt1 fmt1; struct zpci_fmb_fmt2 fmt2; + struct zpci_fmb_fmt3 fmt3; }; } __packed __aligned(128); diff --git a/arch/s390/include/asm/pgtable.h b/arch/s390/include/asm/pgtable.h index 5ab636089c60..0e7cb0dc9c33 100644 --- a/arch/s390/include/asm/pgtable.h +++ b/arch/s390/include/asm/pgtable.h @@ -268,8 +268,10 @@ static inline int is_module_addr(void *addr) #define _REGION_ENTRY_BITS_LARGE 0xffffffff8000fe2fUL /* Bits in the segment table entry */ -#define _SEGMENT_ENTRY_BITS 0xfffffffffffffe33UL -#define _SEGMENT_ENTRY_BITS_LARGE 0xfffffffffff0ff33UL +#define _SEGMENT_ENTRY_BITS 0xfffffffffffffe33UL +#define _SEGMENT_ENTRY_BITS_LARGE 0xfffffffffff0ff33UL +#define _SEGMENT_ENTRY_HARDWARE_BITS 0xfffffffffffffe30UL +#define _SEGMENT_ENTRY_HARDWARE_BITS_LARGE 0xfffffffffff00730UL #define _SEGMENT_ENTRY_ORIGIN_LARGE ~0xfffffUL /* large page address */ #define _SEGMENT_ENTRY_ORIGIN ~0x7ffUL/* page table origin */ #define _SEGMENT_ENTRY_PROTECT 0x200 /* segment protection bit */ @@ -1101,7 +1103,8 @@ int ptep_shadow_pte(struct mm_struct *mm, unsigned long saddr, pte_t *sptep, pte_t *tptep, pte_t pte); void ptep_unshadow_pte(struct mm_struct *mm, unsigned long saddr, pte_t *ptep); -bool test_and_clear_guest_dirty(struct mm_struct *mm, unsigned long address); +bool ptep_test_and_clear_uc(struct mm_struct *mm, unsigned long address, + pte_t *ptep); int set_guest_storage_key(struct mm_struct *mm, unsigned long addr, unsigned char key, bool nq); int cond_set_guest_storage_key(struct mm_struct *mm, unsigned long addr, @@ -1116,6 +1119,10 @@ int set_pgste_bits(struct mm_struct *mm, unsigned long addr, int get_pgste(struct mm_struct *mm, unsigned long hva, unsigned long *pgstep); int pgste_perform_essa(struct mm_struct *mm, unsigned long hva, int orc, unsigned long *oldpte, unsigned long *oldpgste); +void gmap_pmdp_csp(struct mm_struct *mm, unsigned long vmaddr); +void gmap_pmdp_invalidate(struct mm_struct *mm, unsigned long vmaddr); +void gmap_pmdp_idte_local(struct mm_struct *mm, unsigned long vmaddr); +void gmap_pmdp_idte_global(struct mm_struct *mm, unsigned long vmaddr); /* * Certain architectures need to do special things when PTEs diff --git a/arch/s390/include/asm/purgatory.h b/arch/s390/include/asm/purgatory.h index 6090670df51f..e297bcfc476f 100644 --- a/arch/s390/include/asm/purgatory.h +++ b/arch/s390/include/asm/purgatory.h @@ -13,11 +13,5 @@ int verify_sha256_digest(void); -extern u64 kernel_entry; -extern u64 kernel_type; - -extern u64 crash_start; -extern u64 crash_size; - #endif /* __ASSEMBLY__ */ #endif /* _S390_PURGATORY_H_ */ diff --git a/arch/s390/include/asm/qdio.h b/arch/s390/include/asm/qdio.h index de11ecc99c7c..9c9970a5dfb1 100644 --- a/arch/s390/include/asm/qdio.h +++ b/arch/s390/include/asm/qdio.h @@ -262,7 +262,6 @@ struct qdio_outbuf_state { void *user; }; -#define QDIO_OUTBUF_STATE_FLAG_NONE 0x00 #define QDIO_OUTBUF_STATE_FLAG_PENDING 0x01 #define CHSC_AC1_INITIATE_INPUTQ 0x80 diff --git a/arch/s390/include/asm/sections.h b/arch/s390/include/asm/sections.h index 54f81f8ed662..724faede8ac5 100644 --- a/arch/s390/include/asm/sections.h +++ b/arch/s390/include/asm/sections.h @@ -4,6 +4,4 @@ #include <asm-generic/sections.h> -extern char _ehead[]; - #endif diff --git a/arch/s390/include/asm/setup.h b/arch/s390/include/asm/setup.h index 9c30ebe046f3..1d66016f4170 100644 --- a/arch/s390/include/asm/setup.h +++ b/arch/s390/include/asm/setup.h @@ -9,8 +9,10 @@ #include <linux/const.h> #include <uapi/asm/setup.h> - +#define EP_OFFSET 0x10008 +#define EP_STRING "S390EP" #define PARMAREA 0x10400 +#define PARMAREA_END 0x11000 /* * Machine features detected in early.c diff --git a/arch/s390/include/uapi/asm/chsc.h b/arch/s390/include/uapi/asm/chsc.h index dc329aa03f76..83a574e95b3a 100644 --- a/arch/s390/include/uapi/asm/chsc.h +++ b/arch/s390/include/uapi/asm/chsc.h @@ -23,29 +23,29 @@ struct chsc_async_header { __u32 key : 4; __u32 : 28; struct subchannel_id sid; -} __attribute__ ((packed)); +}; struct chsc_async_area { struct chsc_async_header header; __u8 data[CHSC_SIZE - sizeof(struct chsc_async_header)]; -} __attribute__ ((packed)); +}; struct chsc_header { __u16 length; __u16 code; -} __attribute__ ((packed)); +}; struct chsc_sync_area { struct chsc_header header; __u8 data[CHSC_SIZE - sizeof(struct chsc_header)]; -} __attribute__ ((packed)); +}; struct chsc_response_struct { __u16 length; __u16 code; __u32 parms; __u8 data[CHSC_SIZE - 2 * sizeof(__u16) - sizeof(__u32)]; -} __attribute__ ((packed)); +}; struct chsc_chp_cd { struct chp_id chpid; diff --git a/arch/s390/kernel/Makefile b/arch/s390/kernel/Makefile index 2fed39b26b42..dbfd1730e631 100644 --- a/arch/s390/kernel/Makefile +++ b/arch/s390/kernel/Makefile @@ -9,39 +9,21 @@ ifdef CONFIG_FUNCTION_TRACER CFLAGS_REMOVE_ftrace.o = $(CC_FLAGS_FTRACE) # Do not trace early setup code -CFLAGS_REMOVE_als.o = $(CC_FLAGS_FTRACE) CFLAGS_REMOVE_early.o = $(CC_FLAGS_FTRACE) CFLAGS_REMOVE_early_nobss.o = $(CC_FLAGS_FTRACE) endif -GCOV_PROFILE_als.o := n GCOV_PROFILE_early.o := n GCOV_PROFILE_early_nobss.o := n -KCOV_INSTRUMENT_als.o := n KCOV_INSTRUMENT_early.o := n KCOV_INSTRUMENT_early_nobss.o := n -UBSAN_SANITIZE_als.o := n UBSAN_SANITIZE_early.o := n UBSAN_SANITIZE_early_nobss.o := n # -# Use -march=z900 for als.c to be able to print an error -# message if the kernel is started on a machine which is too old -# -ifneq ($(CC_FLAGS_MARCH),-march=z900) -CFLAGS_REMOVE_als.o += $(CC_FLAGS_MARCH) -CFLAGS_REMOVE_als.o += $(CC_FLAGS_EXPOLINE) -CFLAGS_als.o += -march=z900 -AFLAGS_REMOVE_head.o += $(CC_FLAGS_MARCH) -AFLAGS_head.o += -march=z900 -endif - -CFLAGS_als.o += -D__NO_FORTIFY - -# # Passing null pointers is ok for smp code, since we access the lowcore here. # CFLAGS_smp.o := -Wno-nonnull @@ -61,13 +43,13 @@ CFLAGS_ptrace.o += -DUTS_MACHINE='"$(UTS_MACHINE)"' obj-y := traps.o time.o process.o base.o early.o setup.o idle.o vtime.o obj-y += processor.o sys_s390.o ptrace.o signal.o cpcmd.o ebcdic.o nmi.o -obj-y += debug.o irq.o ipl.o dis.o diag.o vdso.o als.o early_nobss.o +obj-y += debug.o irq.o ipl.o dis.o diag.o vdso.o early_nobss.o obj-y += sysinfo.o jump_label.o lgr.o os_info.o machine_kexec.o pgm_check.o obj-y += runtime_instr.o cache.o fpu.o dumpstack.o guarded_storage.o sthyi.o obj-y += entry.o reipl.o relocate_kernel.o kdebugfs.o alternative.o obj-y += nospec-branch.o -extra-y += head.o head64.o vmlinux.lds +extra-y += head64.o vmlinux.lds obj-$(CONFIG_SYSFS) += nospec-sysfs.o CFLAGS_REMOVE_nospec-branch.o += $(CC_FLAGS_EXPOLINE) @@ -99,5 +81,5 @@ obj-$(CONFIG_TRACEPOINTS) += trace.o obj-y += vdso64/ obj-$(CONFIG_COMPAT) += vdso32/ -chkbss := head.o head64.o als.o early_nobss.o +chkbss := head64.o early_nobss.o include $(srctree)/arch/s390/scripts/Makefile.chkbss diff --git a/arch/s390/kernel/crash_dump.c b/arch/s390/kernel/crash_dump.c index 9f5ea9d87069..c3620bafc374 100644 --- a/arch/s390/kernel/crash_dump.c +++ b/arch/s390/kernel/crash_dump.c @@ -306,6 +306,15 @@ static void *kzalloc_panic(int len) return rc; } +static const char *nt_name(Elf64_Word type) +{ + const char *name = "LINUX"; + + if (type == NT_PRPSINFO || type == NT_PRSTATUS || type == NT_PRFPREG) + name = KEXEC_CORE_NOTE_NAME; + return name; +} + /* * Initialize ELF note */ @@ -332,11 +341,26 @@ static void *nt_init_name(void *buf, Elf64_Word type, void *desc, int d_len, static inline void *nt_init(void *buf, Elf64_Word type, void *desc, int d_len) { - const char *note_name = "LINUX"; + return nt_init_name(buf, type, desc, d_len, nt_name(type)); +} + +/* + * Calculate the size of ELF note + */ +static size_t nt_size_name(int d_len, const char *name) +{ + size_t size; - if (type == NT_PRPSINFO || type == NT_PRSTATUS || type == NT_PRFPREG) - note_name = KEXEC_CORE_NOTE_NAME; - return nt_init_name(buf, type, desc, d_len, note_name); + size = sizeof(Elf64_Nhdr); + size += roundup(strlen(name) + 1, 4); + size += roundup(d_len, 4); + + return size; +} + +static inline size_t nt_size(Elf64_Word type, int d_len) +{ + return nt_size_name(d_len, nt_name(type)); } /* @@ -375,6 +399,29 @@ static void *fill_cpu_elf_notes(void *ptr, int cpu, struct save_area *sa) } /* + * Calculate size of ELF notes per cpu + */ +static size_t get_cpu_elf_notes_size(void) +{ + struct save_area *sa = NULL; + size_t size; + + size = nt_size(NT_PRSTATUS, sizeof(struct elf_prstatus)); + size += nt_size(NT_PRFPREG, sizeof(elf_fpregset_t)); + size += nt_size(NT_S390_TIMER, sizeof(sa->timer)); + size += nt_size(NT_S390_TODCMP, sizeof(sa->todcmp)); + size += nt_size(NT_S390_TODPREG, sizeof(sa->todpreg)); + size += nt_size(NT_S390_CTRS, sizeof(sa->ctrs)); + size += nt_size(NT_S390_PREFIX, sizeof(sa->prefix)); + if (MACHINE_HAS_VX) { + size += nt_size(NT_S390_VXRS_HIGH, sizeof(sa->vxrs_high)); + size += nt_size(NT_S390_VXRS_LOW, sizeof(sa->vxrs_low)); + } + + return size; +} + +/* * Initialize prpsinfo note (new kernel) */ static void *nt_prpsinfo(void *ptr) @@ -429,6 +476,30 @@ static void *nt_vmcoreinfo(void *ptr) return nt_init_name(ptr, 0, vmcoreinfo, size, "VMCOREINFO"); } +static size_t nt_vmcoreinfo_size(void) +{ + const char *name = "VMCOREINFO"; + char nt_name[11]; + Elf64_Nhdr note; + void *addr; + + if (copy_oldmem_kernel(&addr, &S390_lowcore.vmcore_info, sizeof(addr))) + return 0; + + if (copy_oldmem_kernel(¬e, addr, sizeof(note))) + return 0; + + memset(nt_name, 0, sizeof(nt_name)); + if (copy_oldmem_kernel(nt_name, addr + sizeof(note), + sizeof(nt_name) - 1)) + return 0; + + if (strcmp(nt_name, name) != 0) + return 0; + + return nt_size_name(note.n_descsz, name); +} + /* * Initialize final note (needed for /proc/vmcore code) */ @@ -539,6 +610,27 @@ static void *notes_init(Elf64_Phdr *phdr, void *ptr, u64 notes_offset) return ptr; } +static size_t get_elfcorehdr_size(int mem_chunk_cnt) +{ + size_t size; + + size = sizeof(Elf64_Ehdr); + /* PT_NOTES */ + size += sizeof(Elf64_Phdr); + /* nt_prpsinfo */ + size += nt_size(NT_PRPSINFO, sizeof(struct elf_prpsinfo)); + /* regsets */ + size += get_cpu_cnt() * get_cpu_elf_notes_size(); + /* nt_vmcoreinfo */ + size += nt_vmcoreinfo_size(); + /* nt_final */ + size += sizeof(Elf64_Nhdr); + /* PT_LOADS */ + size += mem_chunk_cnt * sizeof(Elf64_Phdr); + + return size; +} + /* * Create ELF core header (new kernel) */ @@ -566,8 +658,8 @@ int elfcorehdr_alloc(unsigned long long *addr, unsigned long long *size) mem_chunk_cnt = get_mem_chunk_cnt(); - alloc_size = 0x1000 + get_cpu_cnt() * 0x4a0 + - mem_chunk_cnt * sizeof(Elf64_Phdr); + alloc_size = get_elfcorehdr_size(mem_chunk_cnt); + hdr = kzalloc_panic(alloc_size); /* Init elf header */ ptr = ehdr_init(hdr, mem_chunk_cnt); diff --git a/arch/s390/kernel/early.c b/arch/s390/kernel/early.c index 827699eb48fa..5b28b434f8a1 100644 --- a/arch/s390/kernel/early.c +++ b/arch/s390/kernel/early.c @@ -331,8 +331,20 @@ static void __init setup_boot_command_line(void) append_to_cmdline(append_ipl_scpdata); } +static void __init check_image_bootable(void) +{ + if (!memcmp(EP_STRING, (void *)EP_OFFSET, strlen(EP_STRING))) + return; + + sclp_early_printk("Linux kernel boot failure: An attempt to boot a vmlinux ELF image failed.\n"); + sclp_early_printk("This image does not contain all parts necessary for starting up. Use\n"); + sclp_early_printk("bzImage or arch/s390/boot/compressed/vmlinux instead.\n"); + disabled_wait(0xbadb007); +} + void __init startup_init(void) { + check_image_bootable(); time_early_init(); init_kernel_storage_key(); lockdep_off(); diff --git a/arch/s390/kernel/entry.h b/arch/s390/kernel/entry.h index 961abfac2c5f..472fa2f1a4a5 100644 --- a/arch/s390/kernel/entry.h +++ b/arch/s390/kernel/entry.h @@ -83,7 +83,6 @@ long sys_s390_sthyi(unsigned long function_code, void __user *buffer, u64 __user DECLARE_PER_CPU(u64, mt_cycles[8]); -void verify_facilities(void); void gs_load_bc_cb(struct pt_regs *regs); void set_fs_fixup(void); diff --git a/arch/s390/kernel/head64.S b/arch/s390/kernel/head64.S index 791cb9000e86..6d14ad42ba88 100644 --- a/arch/s390/kernel/head64.S +++ b/arch/s390/kernel/head64.S @@ -48,11 +48,23 @@ ENTRY(startup_continue) # Early machine initialization and detection functions. # brasl %r14,startup_init - lpswe .Lentry-.LPG1(13) # jump to _stext in primary-space, - # virtual and never return ... + +# check control registers + stctg %c0,%c15,0(%r15) + oi 6(%r15),0x60 # enable sigp emergency & external call + oi 4(%r15),0x10 # switch on low address proctection + lctlg %c0,%c15,0(%r15) + + lam 0,15,.Laregs-.LPG1(%r13) # load acrs needed by uaccess + brasl %r14,start_kernel # go to C code +# +# We returned from start_kernel ?!? PANIK +# + basr %r13,0 + lpswe .Ldw-.(%r13) # load disabled wait psw + .align 16 .LPG1: -.Lentry:.quad 0x0000000180000000,_stext .Lctl: .quad 0x04040000 # cr0: AFP registers & secondary space .quad 0 # cr1: primary space segment table .quad .Lduct # cr2: dispatchable unit control table @@ -85,30 +97,5 @@ ENTRY(startup_continue) .endr .Llinkage_stack: .long 0,0,0x89000000,0,0,0,0x8a000000,0 - -ENTRY(_ehead) - - .org 0x100000 - 0x11000 # head.o ends at 0x11000 -# -# startup-code, running in absolute addressing mode -# -ENTRY(_stext) - basr %r13,0 # get base -.LPG3: -# check control registers - stctg %c0,%c15,0(%r15) - oi 6(%r15),0x60 # enable sigp emergency & external call - oi 4(%r15),0x10 # switch on low address proctection - lctlg %c0,%c15,0(%r15) - - lam 0,15,.Laregs-.LPG3(%r13) # load acrs needed by uaccess - brasl %r14,start_kernel # go to C code -# -# We returned from start_kernel ?!? PANIK -# - basr %r13,0 - lpswe .Ldw-.(%r13) # load disabled wait psw - - .align 8 .Ldw: .quad 0x0002000180000000,0x0000000000000000 .Laregs:.long 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0 diff --git a/arch/s390/kernel/nospec-branch.c b/arch/s390/kernel/nospec-branch.c index 18ae7b9c71d6..bdddaae96559 100644 --- a/arch/s390/kernel/nospec-branch.c +++ b/arch/s390/kernel/nospec-branch.c @@ -35,6 +35,8 @@ early_param("nospec", nospec_setup_early); static int __init nospec_report(void) { + if (test_facility(156)) + pr_info("Spectre V2 mitigation: etokens\n"); if (IS_ENABLED(CC_USING_EXPOLINE) && !nospec_disable) pr_info("Spectre V2 mitigation: execute trampolines\n"); if (__test_facility(82, S390_lowcore.alt_stfle_fac_list)) @@ -56,7 +58,15 @@ early_param("nospectre_v2", nospectre_v2_setup_early); void __init nospec_auto_detect(void) { - if (IS_ENABLED(CC_USING_EXPOLINE)) { + if (test_facility(156)) { + /* + * The machine supports etokens. + * Disable expolines and disable nobp. + */ + if (IS_ENABLED(CC_USING_EXPOLINE)) + nospec_disable = 1; + __clear_facility(82, S390_lowcore.alt_stfle_fac_list); + } else if (IS_ENABLED(CC_USING_EXPOLINE)) { /* * The kernel has been compiled with expolines. * Keep expolines enabled and disable nobp. diff --git a/arch/s390/kernel/nospec-sysfs.c b/arch/s390/kernel/nospec-sysfs.c index 8affad5f18cb..e30e580ae362 100644 --- a/arch/s390/kernel/nospec-sysfs.c +++ b/arch/s390/kernel/nospec-sysfs.c @@ -13,6 +13,8 @@ ssize_t cpu_show_spectre_v1(struct device *dev, ssize_t cpu_show_spectre_v2(struct device *dev, struct device_attribute *attr, char *buf) { + if (test_facility(156)) + return sprintf(buf, "Mitigation: etokens\n"); if (IS_ENABLED(CC_USING_EXPOLINE) && !nospec_disable) return sprintf(buf, "Mitigation: execute trampolines\n"); if (__test_facility(82, S390_lowcore.alt_stfle_fac_list)) diff --git a/arch/s390/kernel/perf_cpum_sf.c b/arch/s390/kernel/perf_cpum_sf.c index 0292d68e7dde..cb198d4a6dca 100644 --- a/arch/s390/kernel/perf_cpum_sf.c +++ b/arch/s390/kernel/perf_cpum_sf.c @@ -2,7 +2,7 @@ /* * Performance event support for the System z CPU-measurement Sampling Facility * - * Copyright IBM Corp. 2013 + * Copyright IBM Corp. 2013, 2018 * Author(s): Hendrik Brueckner <brueckner@linux.vnet.ibm.com> */ #define KMSG_COMPONENT "cpum_sf" @@ -1587,6 +1587,17 @@ static void aux_buffer_free(void *data) "%lu SDBTs\n", num_sdbt); } +static void aux_sdb_init(unsigned long sdb) +{ + struct hws_trailer_entry *te; + + te = (struct hws_trailer_entry *)trailer_entry_ptr(sdb); + + /* Save clock base */ + te->clock_base = 1; + memcpy(&te->progusage2, &tod_clock_base[1], 8); +} + /* * aux_buffer_setup() - Setup AUX buffer for diagnostic mode sampling * @cpu: On which to allocate, -1 means current @@ -1666,6 +1677,7 @@ static void *aux_buffer_setup(int cpu, void **pages, int nr_pages, /* Tail is the entry in a SDBT */ *tail = (unsigned long)pages[i]; aux->sdb_index[i] = (unsigned long)pages[i]; + aux_sdb_init((unsigned long)pages[i]); } sfb->num_sdb = nr_pages; diff --git a/arch/s390/kernel/perf_regs.c b/arch/s390/kernel/perf_regs.c index 54e2d634b849..4352a504f235 100644 --- a/arch/s390/kernel/perf_regs.c +++ b/arch/s390/kernel/perf_regs.c @@ -12,9 +12,6 @@ u64 perf_reg_value(struct pt_regs *regs, int idx) { freg_t fp; - if (WARN_ON_ONCE((u32)idx >= PERF_REG_S390_MAX)) - return 0; - if (idx >= PERF_REG_S390_R0 && idx <= PERF_REG_S390_R15) return regs->gprs[idx]; @@ -33,7 +30,8 @@ u64 perf_reg_value(struct pt_regs *regs, int idx) if (idx == PERF_REG_S390_PC) return regs->psw.addr; - return regs->gprs[idx]; + WARN_ON_ONCE((u32)idx >= PERF_REG_S390_MAX); + return 0; } #define REG_RESERVED (~((1UL << PERF_REG_S390_MAX) - 1)) diff --git a/arch/s390/kernel/setup.c b/arch/s390/kernel/setup.c index d82a9ec64ea9..c637c12f9e37 100644 --- a/arch/s390/kernel/setup.c +++ b/arch/s390/kernel/setup.c @@ -674,12 +674,12 @@ static void __init reserve_kernel(void) #ifdef CONFIG_DMA_API_DEBUG /* * DMA_API_DEBUG code stumbles over addresses from the - * range [_ehead, _stext]. Mark the memory as reserved + * range [PARMAREA_END, _stext]. Mark the memory as reserved * so it is not used for CONFIG_DMA_API_DEBUG=y. */ memblock_reserve(0, PFN_PHYS(start_pfn)); #else - memblock_reserve(0, (unsigned long)_ehead); + memblock_reserve(0, PARMAREA_END); memblock_reserve((unsigned long)_stext, PFN_PHYS(start_pfn) - (unsigned long)_stext); #endif diff --git a/arch/s390/kernel/sysinfo.c b/arch/s390/kernel/sysinfo.c index 54f5496913fa..12f80d1f0415 100644 --- a/arch/s390/kernel/sysinfo.c +++ b/arch/s390/kernel/sysinfo.c @@ -59,6 +59,8 @@ int stsi(void *sysinfo, int fc, int sel1, int sel2) } EXPORT_SYMBOL(stsi); +#ifdef CONFIG_PROC_FS + static bool convert_ext_name(unsigned char encoding, char *name, size_t len) { switch (encoding) { @@ -301,6 +303,8 @@ static int __init sysinfo_create_proc(void) } device_initcall(sysinfo_create_proc); +#endif /* CONFIG_PROC_FS */ + /* * Service levels interface. */ diff --git a/arch/s390/kernel/topology.c b/arch/s390/kernel/topology.c index 4b6e0397f66d..e8184a15578a 100644 --- a/arch/s390/kernel/topology.c +++ b/arch/s390/kernel/topology.c @@ -579,41 +579,33 @@ early_param("topology", topology_setup); static int topology_ctl_handler(struct ctl_table *ctl, int write, void __user *buffer, size_t *lenp, loff_t *ppos) { - unsigned int len; + int enabled = topology_is_enabled(); int new_mode; - char buf[2]; + int zero = 0; + int one = 1; + int rc; + struct ctl_table ctl_entry = { + .procname = ctl->procname, + .data = &enabled, + .maxlen = sizeof(int), + .extra1 = &zero, + .extra2 = &one, + }; + + rc = proc_douintvec_minmax(&ctl_entry, write, buffer, lenp, ppos); + if (rc < 0 || !write) + return rc; - if (!*lenp || *ppos) { - *lenp = 0; - return 0; - } - if (!write) { - strncpy(buf, topology_is_enabled() ? "1\n" : "0\n", - ARRAY_SIZE(buf)); - len = strnlen(buf, ARRAY_SIZE(buf)); - if (len > *lenp) - len = *lenp; - if (copy_to_user(buffer, buf, len)) - return -EFAULT; - goto out; - } - len = *lenp; - if (copy_from_user(buf, buffer, len > sizeof(buf) ? sizeof(buf) : len)) - return -EFAULT; - if (buf[0] != '0' && buf[0] != '1') - return -EINVAL; mutex_lock(&smp_cpu_state_mutex); - new_mode = topology_get_mode(buf[0] == '1'); + new_mode = topology_get_mode(enabled); if (topology_mode != new_mode) { topology_mode = new_mode; topology_schedule_update(); } mutex_unlock(&smp_cpu_state_mutex); topology_flush_work(); -out: - *lenp = len; - *ppos += len; - return 0; + + return rc; } static struct ctl_table topology_ctl_table[] = { diff --git a/arch/s390/kernel/vdso.c b/arch/s390/kernel/vdso.c index 09abae40f917..3031cc6dd0ab 100644 --- a/arch/s390/kernel/vdso.c +++ b/arch/s390/kernel/vdso.c @@ -47,7 +47,7 @@ static struct page **vdso64_pagelist; */ unsigned int __read_mostly vdso_enabled = 1; -static int vdso_fault(const struct vm_special_mapping *sm, +static vm_fault_t vdso_fault(const struct vm_special_mapping *sm, struct vm_area_struct *vma, struct vm_fault *vmf) { struct page **vdso_pagelist; diff --git a/arch/s390/kernel/vmlinux.lds.S b/arch/s390/kernel/vmlinux.lds.S index f0414f52817b..b43f8d33a369 100644 --- a/arch/s390/kernel/vmlinux.lds.S +++ b/arch/s390/kernel/vmlinux.lds.S @@ -19,7 +19,7 @@ OUTPUT_FORMAT("elf64-s390", "elf64-s390", "elf64-s390") OUTPUT_ARCH(s390:64-bit) -ENTRY(startup) +ENTRY(startup_continue) jiffies = jiffies_64; PHDRS { @@ -30,16 +30,12 @@ PHDRS { SECTIONS { - . = 0x00000000; + . = 0x100000; + _stext = .; /* Start of text section */ .text : { /* Text and read-only data */ + _text = .; HEAD_TEXT - /* - * E.g. perf doesn't like symbols starting at address zero, - * therefore skip the initial PSW and channel program located - * at address zero and let _text start at 0x200. - */ - _text = 0x200; TEXT_TEXT SCHED_TEXT CPUIDLE_TEXT @@ -47,6 +43,7 @@ SECTIONS KPROBES_TEXT IRQENTRY_TEXT SOFTIRQENTRY_TEXT + *(.text.*_indirect_*) *(.fixup) *(.gnu.warning) } :text = 0x0700 diff --git a/arch/s390/kvm/kvm-s390.c b/arch/s390/kvm/kvm-s390.c index 3b7a5151b6a5..f9d90337e64a 100644 --- a/arch/s390/kvm/kvm-s390.c +++ b/arch/s390/kvm/kvm-s390.c @@ -172,6 +172,10 @@ static int nested; module_param(nested, int, S_IRUGO); MODULE_PARM_DESC(nested, "Nested virtualization support"); +/* allow 1m huge page guest backing, if !nested */ +static int hpage; +module_param(hpage, int, 0444); +MODULE_PARM_DESC(hpage, "1m huge page backing support"); /* * For now we handle at most 16 double words as this is what the s390 base @@ -475,6 +479,11 @@ int kvm_vm_ioctl_check_extension(struct kvm *kvm, long ext) case KVM_CAP_S390_AIS_MIGRATION: r = 1; break; + case KVM_CAP_S390_HPAGE_1M: + r = 0; + if (hpage) + r = 1; + break; case KVM_CAP_S390_MEM_OP: r = MEM_OP_MAX_SIZE; break; @@ -511,19 +520,30 @@ int kvm_vm_ioctl_check_extension(struct kvm *kvm, long ext) } static void kvm_s390_sync_dirty_log(struct kvm *kvm, - struct kvm_memory_slot *memslot) + struct kvm_memory_slot *memslot) { + int i; gfn_t cur_gfn, last_gfn; - unsigned long address; + unsigned long gaddr, vmaddr; struct gmap *gmap = kvm->arch.gmap; + DECLARE_BITMAP(bitmap, _PAGE_ENTRIES); - /* Loop over all guest pages */ + /* Loop over all guest segments */ + cur_gfn = memslot->base_gfn; last_gfn = memslot->base_gfn + memslot->npages; - for (cur_gfn = memslot->base_gfn; cur_gfn <= last_gfn; cur_gfn++) { - address = gfn_to_hva_memslot(memslot, cur_gfn); + for (; cur_gfn <= last_gfn; cur_gfn += _PAGE_ENTRIES) { + gaddr = gfn_to_gpa(cur_gfn); + vmaddr = gfn_to_hva_memslot(memslot, cur_gfn); + if (kvm_is_error_hva(vmaddr)) + continue; + + bitmap_zero(bitmap, _PAGE_ENTRIES); + gmap_sync_dirty_log_pmd(gmap, bitmap, gaddr, vmaddr); + for (i = 0; i < _PAGE_ENTRIES; i++) { + if (test_bit(i, bitmap)) + mark_page_dirty(kvm, cur_gfn + i); + } - if (test_and_clear_guest_dirty(gmap->mm, address)) - mark_page_dirty(kvm, cur_gfn); if (fatal_signal_pending(current)) return; cond_resched(); @@ -667,6 +687,27 @@ static int kvm_vm_ioctl_enable_cap(struct kvm *kvm, struct kvm_enable_cap *cap) VM_EVENT(kvm, 3, "ENABLE: CAP_S390_GS %s", r ? "(not available)" : "(success)"); break; + case KVM_CAP_S390_HPAGE_1M: + mutex_lock(&kvm->lock); + if (kvm->created_vcpus) + r = -EBUSY; + else if (!hpage || kvm->arch.use_cmma) + r = -EINVAL; + else { + r = 0; + kvm->mm->context.allow_gmap_hpage_1m = 1; + /* + * We might have to create fake 4k page + * tables. To avoid that the hardware works on + * stale PGSTEs, we emulate these instructions. + */ + kvm->arch.use_skf = 0; + kvm->arch.use_pfmfi = 0; + } + mutex_unlock(&kvm->lock); + VM_EVENT(kvm, 3, "ENABLE: CAP_S390_HPAGE %s", + r ? "(not available)" : "(success)"); + break; case KVM_CAP_S390_USER_STSI: VM_EVENT(kvm, 3, "%s", "ENABLE: CAP_S390_USER_STSI"); kvm->arch.user_stsi = 1; @@ -714,10 +755,13 @@ static int kvm_s390_set_mem_control(struct kvm *kvm, struct kvm_device_attr *att if (!sclp.has_cmma) break; - ret = -EBUSY; VM_EVENT(kvm, 3, "%s", "ENABLE: CMMA support"); mutex_lock(&kvm->lock); - if (!kvm->created_vcpus) { + if (kvm->created_vcpus) + ret = -EBUSY; + else if (kvm->mm->context.allow_gmap_hpage_1m) + ret = -EINVAL; + else { kvm->arch.use_cmma = 1; /* Not compatible with cmma. */ kvm->arch.use_pfmfi = 0; @@ -1540,6 +1584,7 @@ static long kvm_s390_set_skeys(struct kvm *kvm, struct kvm_s390_skeys *args) uint8_t *keys; uint64_t hva; int srcu_idx, i, r = 0; + bool unlocked; if (args->flags != 0) return -EINVAL; @@ -1564,9 +1609,11 @@ static long kvm_s390_set_skeys(struct kvm *kvm, struct kvm_s390_skeys *args) if (r) goto out; + i = 0; down_read(¤t->mm->mmap_sem); srcu_idx = srcu_read_lock(&kvm->srcu); - for (i = 0; i < args->count; i++) { + while (i < args->count) { + unlocked = false; hva = gfn_to_hva(kvm, args->start_gfn + i); if (kvm_is_error_hva(hva)) { r = -EFAULT; @@ -1580,8 +1627,14 @@ static long kvm_s390_set_skeys(struct kvm *kvm, struct kvm_s390_skeys *args) } r = set_guest_storage_key(current->mm, hva, keys[i], 0); - if (r) - break; + if (r) { + r = fixup_user_fault(current, current->mm, hva, + FAULT_FLAG_WRITE, &unlocked); + if (r) + break; + } + if (!r) + i++; } srcu_read_unlock(&kvm->srcu, srcu_idx); up_read(¤t->mm->mmap_sem); @@ -4082,6 +4135,11 @@ static int __init kvm_s390_init(void) return -ENODEV; } + if (nested && hpage) { + pr_info("nested (vSIE) and hpage (huge page backing) can currently not be activated concurrently"); + return -EINVAL; + } + for (i = 0; i < 16; i++) kvm_s390_fac_base[i] |= S390_lowcore.stfle_fac_list[i] & nonhyp_mask(i); diff --git a/arch/s390/kvm/priv.c b/arch/s390/kvm/priv.c index eb0eb60c7be6..cfc5a62329f6 100644 --- a/arch/s390/kvm/priv.c +++ b/arch/s390/kvm/priv.c @@ -246,9 +246,10 @@ static int try_handle_skey(struct kvm_vcpu *vcpu) static int handle_iske(struct kvm_vcpu *vcpu) { - unsigned long addr; + unsigned long gaddr, vmaddr; unsigned char key; int reg1, reg2; + bool unlocked; int rc; vcpu->stat.instruction_iske++; @@ -262,18 +263,28 @@ static int handle_iske(struct kvm_vcpu *vcpu) kvm_s390_get_regs_rre(vcpu, ®1, ®2); - addr = vcpu->run->s.regs.gprs[reg2] & PAGE_MASK; - addr = kvm_s390_logical_to_effective(vcpu, addr); - addr = kvm_s390_real_to_abs(vcpu, addr); - addr = gfn_to_hva(vcpu->kvm, gpa_to_gfn(addr)); - if (kvm_is_error_hva(addr)) + gaddr = vcpu->run->s.regs.gprs[reg2] & PAGE_MASK; + gaddr = kvm_s390_logical_to_effective(vcpu, gaddr); + gaddr = kvm_s390_real_to_abs(vcpu, gaddr); + vmaddr = gfn_to_hva(vcpu->kvm, gpa_to_gfn(gaddr)); + if (kvm_is_error_hva(vmaddr)) return kvm_s390_inject_program_int(vcpu, PGM_ADDRESSING); - +retry: + unlocked = false; down_read(¤t->mm->mmap_sem); - rc = get_guest_storage_key(current->mm, addr, &key); - up_read(¤t->mm->mmap_sem); + rc = get_guest_storage_key(current->mm, vmaddr, &key); + + if (rc) { + rc = fixup_user_fault(current, current->mm, vmaddr, + FAULT_FLAG_WRITE, &unlocked); + if (!rc) { + up_read(¤t->mm->mmap_sem); + goto retry; + } + } if (rc) return kvm_s390_inject_program_int(vcpu, PGM_ADDRESSING); + up_read(¤t->mm->mmap_sem); vcpu->run->s.regs.gprs[reg1] &= ~0xff; vcpu->run->s.regs.gprs[reg1] |= key; return 0; @@ -281,8 +292,9 @@ static int handle_iske(struct kvm_vcpu *vcpu) static int handle_rrbe(struct kvm_vcpu *vcpu) { - unsigned long addr; + unsigned long vmaddr, gaddr; int reg1, reg2; + bool unlocked; int rc; vcpu->stat.instruction_rrbe++; @@ -296,19 +308,27 @@ static int handle_rrbe(struct kvm_vcpu *vcpu) kvm_s390_get_regs_rre(vcpu, ®1, ®2); - addr = vcpu->run->s.regs.gprs[reg2] & PAGE_MASK; - addr = kvm_s390_logical_to_effective(vcpu, addr); - addr = kvm_s390_real_to_abs(vcpu, addr); - addr = gfn_to_hva(vcpu->kvm, gpa_to_gfn(addr)); - if (kvm_is_error_hva(addr)) + gaddr = vcpu->run->s.regs.gprs[reg2] & PAGE_MASK; + gaddr = kvm_s390_logical_to_effective(vcpu, gaddr); + gaddr = kvm_s390_real_to_abs(vcpu, gaddr); + vmaddr = gfn_to_hva(vcpu->kvm, gpa_to_gfn(gaddr)); + if (kvm_is_error_hva(vmaddr)) return kvm_s390_inject_program_int(vcpu, PGM_ADDRESSING); - +retry: + unlocked = false; down_read(¤t->mm->mmap_sem); - rc = reset_guest_reference_bit(current->mm, addr); - up_read(¤t->mm->mmap_sem); + rc = reset_guest_reference_bit(current->mm, vmaddr); + if (rc < 0) { + rc = fixup_user_fault(current, current->mm, vmaddr, + FAULT_FLAG_WRITE, &unlocked); + if (!rc) { + up_read(¤t->mm->mmap_sem); + goto retry; + } + } if (rc < 0) return kvm_s390_inject_program_int(vcpu, PGM_ADDRESSING); - + up_read(¤t->mm->mmap_sem); kvm_s390_set_psw_cc(vcpu, rc); return 0; } @@ -323,6 +343,7 @@ static int handle_sske(struct kvm_vcpu *vcpu) unsigned long start, end; unsigned char key, oldkey; int reg1, reg2; + bool unlocked; int rc; vcpu->stat.instruction_sske++; @@ -355,19 +376,28 @@ static int handle_sske(struct kvm_vcpu *vcpu) } while (start != end) { - unsigned long addr = gfn_to_hva(vcpu->kvm, gpa_to_gfn(start)); + unsigned long vmaddr = gfn_to_hva(vcpu->kvm, gpa_to_gfn(start)); + unlocked = false; - if (kvm_is_error_hva(addr)) + if (kvm_is_error_hva(vmaddr)) return kvm_s390_inject_program_int(vcpu, PGM_ADDRESSING); down_read(¤t->mm->mmap_sem); - rc = cond_set_guest_storage_key(current->mm, addr, key, &oldkey, + rc = cond_set_guest_storage_key(current->mm, vmaddr, key, &oldkey, m3 & SSKE_NQ, m3 & SSKE_MR, m3 & SSKE_MC); - up_read(¤t->mm->mmap_sem); - if (rc < 0) + + if (rc < 0) { + rc = fixup_user_fault(current, current->mm, vmaddr, + FAULT_FLAG_WRITE, &unlocked); + rc = !rc ? -EAGAIN : rc; + } + if (rc == -EFAULT) return kvm_s390_inject_program_int(vcpu, PGM_ADDRESSING); - start += PAGE_SIZE; + + up_read(¤t->mm->mmap_sem); + if (rc >= 0) + start += PAGE_SIZE; } if (m3 & (SSKE_MC | SSKE_MR)) { @@ -948,15 +978,16 @@ static int handle_pfmf(struct kvm_vcpu *vcpu) } while (start != end) { - unsigned long useraddr; + unsigned long vmaddr; + bool unlocked = false; /* Translate guest address to host address */ - useraddr = gfn_to_hva(vcpu->kvm, gpa_to_gfn(start)); - if (kvm_is_error_hva(useraddr)) + vmaddr = gfn_to_hva(vcpu->kvm, gpa_to_gfn(start)); + if (kvm_is_error_hva(vmaddr)) return kvm_s390_inject_program_int(vcpu, PGM_ADDRESSING); if (vcpu->run->s.regs.gprs[reg1] & PFMF_CF) { - if (clear_user((void __user *)useraddr, PAGE_SIZE)) + if (clear_user((void __user *)vmaddr, PAGE_SIZE)) return kvm_s390_inject_program_int(vcpu, PGM_ADDRESSING); } @@ -966,14 +997,20 @@ static int handle_pfmf(struct kvm_vcpu *vcpu) if (rc) return rc; down_read(¤t->mm->mmap_sem); - rc = cond_set_guest_storage_key(current->mm, useraddr, + rc = cond_set_guest_storage_key(current->mm, vmaddr, key, NULL, nq, mr, mc); - up_read(¤t->mm->mmap_sem); - if (rc < 0) + if (rc < 0) { + rc = fixup_user_fault(current, current->mm, vmaddr, + FAULT_FLAG_WRITE, &unlocked); + rc = !rc ? -EAGAIN : rc; + } + if (rc == -EFAULT) return kvm_s390_inject_program_int(vcpu, PGM_ADDRESSING); - } - start += PAGE_SIZE; + up_read(¤t->mm->mmap_sem); + if (rc >= 0) + start += PAGE_SIZE; + } } if (vcpu->run->s.regs.gprs[reg1] & PFMF_FSC) { if (psw_bits(vcpu->arch.sie_block->gpsw).eaba == PSW_BITS_AMODE_64BIT) { diff --git a/arch/s390/lib/mem.S b/arch/s390/lib/mem.S index 2311f15be9cf..40c4d59c926e 100644 --- a/arch/s390/lib/mem.S +++ b/arch/s390/lib/mem.S @@ -17,7 +17,7 @@ ENTRY(memmove) ltgr %r4,%r4 lgr %r1,%r2 - bzr %r14 + jz .Lmemmove_exit aghi %r4,-1 clgr %r2,%r3 jnh .Lmemmove_forward @@ -36,6 +36,7 @@ ENTRY(memmove) .Lmemmove_forward_remainder: larl %r5,.Lmemmove_mvc ex %r4,0(%r5) +.Lmemmove_exit: BR_EX %r14 .Lmemmove_reverse: ic %r0,0(%r4,%r3) @@ -65,7 +66,7 @@ EXPORT_SYMBOL(memmove) */ ENTRY(memset) ltgr %r4,%r4 - bzr %r14 + jz .Lmemset_exit ltgr %r3,%r3 jnz .Lmemset_fill aghi %r4,-1 @@ -80,6 +81,7 @@ ENTRY(memset) .Lmemset_clear_remainder: larl %r3,.Lmemset_xc ex %r4,0(%r3) +.Lmemset_exit: BR_EX %r14 .Lmemset_fill: cghi %r4,1 @@ -115,7 +117,7 @@ EXPORT_SYMBOL(memset) */ ENTRY(memcpy) ltgr %r4,%r4 - bzr %r14 + jz .Lmemcpy_exit aghi %r4,-1 srlg %r5,%r4,8 ltgr %r5,%r5 @@ -124,6 +126,7 @@ ENTRY(memcpy) .Lmemcpy_remainder: larl %r5,.Lmemcpy_mvc ex %r4,0(%r5) +.Lmemcpy_exit: BR_EX %r14 .Lmemcpy_loop: mvc 0(256,%r1),0(%r3) @@ -145,9 +148,9 @@ EXPORT_SYMBOL(memcpy) .macro __MEMSET bits,bytes,insn ENTRY(__memset\bits) ltgr %r4,%r4 - bzr %r14 + jz .L__memset_exit\bits cghi %r4,\bytes - je .L__memset_exit\bits + je .L__memset_store\bits aghi %r4,-(\bytes+1) srlg %r5,%r4,8 ltgr %r5,%r5 @@ -163,8 +166,9 @@ ENTRY(__memset\bits) larl %r5,.L__memset_mvc\bits ex %r4,0(%r5) BR_EX %r14 -.L__memset_exit\bits: +.L__memset_store\bits: \insn %r3,0(%r2) +.L__memset_exit\bits: BR_EX %r14 .L__memset_mvc\bits: mvc \bytes(1,%r1),0(%r1) diff --git a/arch/s390/mm/cmm.c b/arch/s390/mm/cmm.c index 6cf024eb2085..510a18299196 100644 --- a/arch/s390/mm/cmm.c +++ b/arch/s390/mm/cmm.c @@ -191,12 +191,7 @@ static void cmm_set_timer(void) del_timer(&cmm_timer); return; } - if (timer_pending(&cmm_timer)) { - if (mod_timer(&cmm_timer, jiffies + cmm_timeout_seconds*HZ)) - return; - } - cmm_timer.expires = jiffies + cmm_timeout_seconds*HZ; - add_timer(&cmm_timer); + mod_timer(&cmm_timer, jiffies + cmm_timeout_seconds * HZ); } static void cmm_timer_fn(struct timer_list *unused) @@ -251,45 +246,42 @@ static int cmm_skip_blanks(char *cp, char **endp) return str != cp; } -static struct ctl_table cmm_table[]; - static int cmm_pages_handler(struct ctl_table *ctl, int write, void __user *buffer, size_t *lenp, loff_t *ppos) { - char buf[16], *p; - unsigned int len; - long nr; + long nr = cmm_get_pages(); + struct ctl_table ctl_entry = { + .procname = ctl->procname, + .data = &nr, + .maxlen = sizeof(long), + }; + int rc; - if (!*lenp || (*ppos && !write)) { - *lenp = 0; - return 0; - } + rc = proc_doulongvec_minmax(&ctl_entry, write, buffer, lenp, ppos); + if (rc < 0 || !write) + return rc; - if (write) { - len = *lenp; - if (copy_from_user(buf, buffer, - len > sizeof(buf) ? sizeof(buf) : len)) - return -EFAULT; - buf[sizeof(buf) - 1] = '\0'; - cmm_skip_blanks(buf, &p); - nr = simple_strtoul(p, &p, 0); - if (ctl == &cmm_table[0]) - cmm_set_pages(nr); - else - cmm_add_timed_pages(nr); - } else { - if (ctl == &cmm_table[0]) - nr = cmm_get_pages(); - else - nr = cmm_get_timed_pages(); - len = sprintf(buf, "%ld\n", nr); - if (len > *lenp) - len = *lenp; - if (copy_to_user(buffer, buf, len)) - return -EFAULT; - } - *lenp = len; - *ppos += len; + cmm_set_pages(nr); + return 0; +} + +static int cmm_timed_pages_handler(struct ctl_table *ctl, int write, + void __user *buffer, size_t *lenp, + loff_t *ppos) +{ + long nr = cmm_get_timed_pages(); + struct ctl_table ctl_entry = { + .procname = ctl->procname, + .data = &nr, + .maxlen = sizeof(long), + }; + int rc; + + rc = proc_doulongvec_minmax(&ctl_entry, write, buffer, lenp, ppos); + if (rc < 0 || !write) + return rc; + + cmm_add_timed_pages(nr); return 0; } @@ -338,7 +330,7 @@ static struct ctl_table cmm_table[] = { { .procname = "cmm_timed_pages", .mode = 0644, - .proc_handler = cmm_pages_handler, + .proc_handler = cmm_timed_pages_handler, }, { .procname = "cmm_timeout", diff --git a/arch/s390/mm/extmem.c b/arch/s390/mm/extmem.c index 6ad15d3fab81..84111a43ea29 100644 --- a/arch/s390/mm/extmem.c +++ b/arch/s390/mm/extmem.c @@ -80,7 +80,7 @@ struct qin64 { struct dcss_segment { struct list_head list; char dcss_name[8]; - char res_name[15]; + char res_name[16]; unsigned long start_addr; unsigned long end; atomic_t ref_count; @@ -433,7 +433,7 @@ __segment_load (char *name, int do_nonshared, unsigned long *addr, unsigned long memcpy(&seg->res_name, seg->dcss_name, 8); EBCASC(seg->res_name, 8); seg->res_name[8] = '\0'; - strncat(seg->res_name, " (DCSS)", 7); + strlcat(seg->res_name, " (DCSS)", sizeof(seg->res_name)); seg->res->name = seg->res_name; rc = seg->vm_segtype; if (rc == SEG_TYPE_SC || diff --git a/arch/s390/mm/fault.c b/arch/s390/mm/fault.c index e074480d3598..4cc3f06b0ab3 100644 --- a/arch/s390/mm/fault.c +++ b/arch/s390/mm/fault.c @@ -502,6 +502,8 @@ retry: /* No reason to continue if interrupted by SIGKILL. */ if ((fault & VM_FAULT_RETRY) && fatal_signal_pending(current)) { fault = VM_FAULT_SIGNAL; + if (flags & FAULT_FLAG_RETRY_NOWAIT) + goto out_up; goto out; } if (unlikely(fault & VM_FAULT_ERROR)) diff --git a/arch/s390/mm/gmap.c b/arch/s390/mm/gmap.c index bc56ec8abcf7..bb44990c8212 100644 --- a/arch/s390/mm/gmap.c +++ b/arch/s390/mm/gmap.c @@ -2,8 +2,10 @@ /* * KVM guest address space mapping code * - * Copyright IBM Corp. 2007, 2016 + * Copyright IBM Corp. 2007, 2016, 2018 * Author(s): Martin Schwidefsky <schwidefsky@de.ibm.com> + * David Hildenbrand <david@redhat.com> + * Janosch Frank <frankja@linux.vnet.ibm.com> */ #include <linux/kernel.h> @@ -521,6 +523,9 @@ void gmap_unlink(struct mm_struct *mm, unsigned long *table, rcu_read_unlock(); } +static void gmap_pmdp_xchg(struct gmap *gmap, pmd_t *old, pmd_t new, + unsigned long gaddr); + /** * gmap_link - set up shadow page tables to connect a host to a guest address * @gmap: pointer to guest mapping meta data structure @@ -541,6 +546,7 @@ int __gmap_link(struct gmap *gmap, unsigned long gaddr, unsigned long vmaddr) p4d_t *p4d; pud_t *pud; pmd_t *pmd; + u64 unprot; int rc; BUG_ON(gmap_is_shadow(gmap)); @@ -584,8 +590,8 @@ int __gmap_link(struct gmap *gmap, unsigned long gaddr, unsigned long vmaddr) return -EFAULT; pmd = pmd_offset(pud, vmaddr); VM_BUG_ON(pmd_none(*pmd)); - /* large pmds cannot yet be handled */ - if (pmd_large(*pmd)) + /* Are we allowed to use huge pages? */ + if (pmd_large(*pmd) && !gmap->mm->context.allow_gmap_hpage_1m) return -EFAULT; /* Link gmap segment table entry location to page table. */ rc = radix_tree_preload(GFP_KERNEL); @@ -596,10 +602,22 @@ int __gmap_link(struct gmap *gmap, unsigned long gaddr, unsigned long vmaddr) if (*table == _SEGMENT_ENTRY_EMPTY) { rc = radix_tree_insert(&gmap->host_to_guest, vmaddr >> PMD_SHIFT, table); - if (!rc) - *table = pmd_val(*pmd); - } else - rc = 0; + if (!rc) { + if (pmd_large(*pmd)) { + *table = (pmd_val(*pmd) & + _SEGMENT_ENTRY_HARDWARE_BITS_LARGE) + | _SEGMENT_ENTRY_GMAP_UC; + } else + *table = pmd_val(*pmd) & + _SEGMENT_ENTRY_HARDWARE_BITS; + } + } else if (*table & _SEGMENT_ENTRY_PROTECT && + !(pmd_val(*pmd) & _SEGMENT_ENTRY_PROTECT)) { + unprot = (u64)*table; + unprot &= ~_SEGMENT_ENTRY_PROTECT; + unprot |= _SEGMENT_ENTRY_GMAP_UC; + gmap_pmdp_xchg(gmap, (pmd_t *)table, __pmd(unprot), gaddr); + } spin_unlock(&gmap->guest_table_lock); spin_unlock(ptl); radix_tree_preload_end(); @@ -690,6 +708,12 @@ void gmap_discard(struct gmap *gmap, unsigned long from, unsigned long to) vmaddr |= gaddr & ~PMD_MASK; /* Find vma in the parent mm */ vma = find_vma(gmap->mm, vmaddr); + /* + * We do not discard pages that are backed by + * hugetlbfs, so we don't have to refault them. + */ + if (vma && is_vm_hugetlb_page(vma)) + continue; size = min(to - gaddr, PMD_SIZE - (gaddr & ~PMD_MASK)); zap_page_range(vma, vmaddr, size); } @@ -864,7 +888,128 @@ static int gmap_pte_op_fixup(struct gmap *gmap, unsigned long gaddr, */ static void gmap_pte_op_end(spinlock_t *ptl) { - spin_unlock(ptl); + if (ptl) + spin_unlock(ptl); +} + +/** + * gmap_pmd_op_walk - walk the gmap tables, get the guest table lock + * and return the pmd pointer + * @gmap: pointer to guest mapping meta data structure + * @gaddr: virtual address in the guest address space + * + * Returns a pointer to the pmd for a guest address, or NULL + */ +static inline pmd_t *gmap_pmd_op_walk(struct gmap *gmap, unsigned long gaddr) +{ + pmd_t *pmdp; + + BUG_ON(gmap_is_shadow(gmap)); + spin_lock(&gmap->guest_table_lock); + pmdp = (pmd_t *) gmap_table_walk(gmap, gaddr, 1); + + if (!pmdp || pmd_none(*pmdp)) { + spin_unlock(&gmap->guest_table_lock); + return NULL; + } + + /* 4k page table entries are locked via the pte (pte_alloc_map_lock). */ + if (!pmd_large(*pmdp)) + spin_unlock(&gmap->guest_table_lock); + return pmdp; +} + +/** + * gmap_pmd_op_end - release the guest_table_lock if needed + * @gmap: pointer to the guest mapping meta data structure + * @pmdp: pointer to the pmd + */ +static inline void gmap_pmd_op_end(struct gmap *gmap, pmd_t *pmdp) +{ + if (pmd_large(*pmdp)) + spin_unlock(&gmap->guest_table_lock); +} + +/* + * gmap_protect_pmd - remove access rights to memory and set pmd notification bits + * @pmdp: pointer to the pmd to be protected + * @prot: indicates access rights: PROT_NONE, PROT_READ or PROT_WRITE + * @bits: notification bits to set + * + * Returns: + * 0 if successfully protected + * -EAGAIN if a fixup is needed + * -EINVAL if unsupported notifier bits have been specified + * + * Expected to be called with sg->mm->mmap_sem in read and + * guest_table_lock held. + */ +static int gmap_protect_pmd(struct gmap *gmap, unsigned long gaddr, + pmd_t *pmdp, int prot, unsigned long bits) +{ + int pmd_i = pmd_val(*pmdp) & _SEGMENT_ENTRY_INVALID; + int pmd_p = pmd_val(*pmdp) & _SEGMENT_ENTRY_PROTECT; + pmd_t new = *pmdp; + + /* Fixup needed */ + if ((pmd_i && (prot != PROT_NONE)) || (pmd_p && (prot == PROT_WRITE))) + return -EAGAIN; + + if (prot == PROT_NONE && !pmd_i) { + pmd_val(new) |= _SEGMENT_ENTRY_INVALID; + gmap_pmdp_xchg(gmap, pmdp, new, gaddr); + } + + if (prot == PROT_READ && !pmd_p) { + pmd_val(new) &= ~_SEGMENT_ENTRY_INVALID; + pmd_val(new) |= _SEGMENT_ENTRY_PROTECT; + gmap_pmdp_xchg(gmap, pmdp, new, gaddr); + } + + if (bits & GMAP_NOTIFY_MPROT) + pmd_val(*pmdp) |= _SEGMENT_ENTRY_GMAP_IN; + + /* Shadow GMAP protection needs split PMDs */ + if (bits & GMAP_NOTIFY_SHADOW) + return -EINVAL; + + return 0; +} + +/* + * gmap_protect_pte - remove access rights to memory and set pgste bits + * @gmap: pointer to guest mapping meta data structure + * @gaddr: virtual address in the guest address space + * @pmdp: pointer to the pmd associated with the pte + * @prot: indicates access rights: PROT_NONE, PROT_READ or PROT_WRITE + * @bits: notification bits to set + * + * Returns 0 if successfully protected, -ENOMEM if out of memory and + * -EAGAIN if a fixup is needed. + * + * Expected to be called with sg->mm->mmap_sem in read + */ +static int gmap_protect_pte(struct gmap *gmap, unsigned long gaddr, + pmd_t *pmdp, int prot, unsigned long bits) +{ + int rc; + pte_t *ptep; + spinlock_t *ptl = NULL; + unsigned long pbits = 0; + + if (pmd_val(*pmdp) & _SEGMENT_ENTRY_INVALID) + return -EAGAIN; + + ptep = pte_alloc_map_lock(gmap->mm, pmdp, gaddr, &ptl); + if (!ptep) + return -ENOMEM; + + pbits |= (bits & GMAP_NOTIFY_MPROT) ? PGSTE_IN_BIT : 0; + pbits |= (bits & GMAP_NOTIFY_SHADOW) ? PGSTE_VSIE_BIT : 0; + /* Protect and unlock. */ + rc = ptep_force_prot(gmap->mm, gaddr, ptep, prot, pbits); + gmap_pte_op_end(ptl); + return rc; } /* @@ -883,30 +1028,45 @@ static void gmap_pte_op_end(spinlock_t *ptl) static int gmap_protect_range(struct gmap *gmap, unsigned long gaddr, unsigned long len, int prot, unsigned long bits) { - unsigned long vmaddr; - spinlock_t *ptl; - pte_t *ptep; + unsigned long vmaddr, dist; + pmd_t *pmdp; int rc; BUG_ON(gmap_is_shadow(gmap)); while (len) { rc = -EAGAIN; - ptep = gmap_pte_op_walk(gmap, gaddr, &ptl); - if (ptep) { - rc = ptep_force_prot(gmap->mm, gaddr, ptep, prot, bits); - gmap_pte_op_end(ptl); + pmdp = gmap_pmd_op_walk(gmap, gaddr); + if (pmdp) { + if (!pmd_large(*pmdp)) { + rc = gmap_protect_pte(gmap, gaddr, pmdp, prot, + bits); + if (!rc) { + len -= PAGE_SIZE; + gaddr += PAGE_SIZE; + } + } else { + rc = gmap_protect_pmd(gmap, gaddr, pmdp, prot, + bits); + if (!rc) { + dist = HPAGE_SIZE - (gaddr & ~HPAGE_MASK); + len = len < dist ? 0 : len - dist; + gaddr = (gaddr & HPAGE_MASK) + HPAGE_SIZE; + } + } + gmap_pmd_op_end(gmap, pmdp); } if (rc) { + if (rc == -EINVAL) + return rc; + + /* -EAGAIN, fixup of userspace mm and gmap */ vmaddr = __gmap_translate(gmap, gaddr); if (IS_ERR_VALUE(vmaddr)) return vmaddr; rc = gmap_pte_op_fixup(gmap, gaddr, vmaddr, prot); if (rc) return rc; - continue; } - gaddr += PAGE_SIZE; - len -= PAGE_SIZE; } return 0; } @@ -935,7 +1095,7 @@ int gmap_mprotect_notify(struct gmap *gmap, unsigned long gaddr, if (!MACHINE_HAS_ESOP && prot == PROT_READ) return -EINVAL; down_read(&gmap->mm->mmap_sem); - rc = gmap_protect_range(gmap, gaddr, len, prot, PGSTE_IN_BIT); + rc = gmap_protect_range(gmap, gaddr, len, prot, GMAP_NOTIFY_MPROT); up_read(&gmap->mm->mmap_sem); return rc; } @@ -1474,6 +1634,7 @@ struct gmap *gmap_shadow(struct gmap *parent, unsigned long asce, unsigned long limit; int rc; + BUG_ON(parent->mm->context.allow_gmap_hpage_1m); BUG_ON(gmap_is_shadow(parent)); spin_lock(&parent->shadow_lock); sg = gmap_find_shadow(parent, asce, edat_level); @@ -1526,7 +1687,7 @@ struct gmap *gmap_shadow(struct gmap *parent, unsigned long asce, down_read(&parent->mm->mmap_sem); rc = gmap_protect_range(parent, asce & _ASCE_ORIGIN, ((asce & _ASCE_TABLE_LENGTH) + 1) * PAGE_SIZE, - PROT_READ, PGSTE_VSIE_BIT); + PROT_READ, GMAP_NOTIFY_SHADOW); up_read(&parent->mm->mmap_sem); spin_lock(&parent->shadow_lock); new->initialized = true; @@ -2092,6 +2253,225 @@ void ptep_notify(struct mm_struct *mm, unsigned long vmaddr, } EXPORT_SYMBOL_GPL(ptep_notify); +static void pmdp_notify_gmap(struct gmap *gmap, pmd_t *pmdp, + unsigned long gaddr) +{ + pmd_val(*pmdp) &= ~_SEGMENT_ENTRY_GMAP_IN; + gmap_call_notifier(gmap, gaddr, gaddr + HPAGE_SIZE - 1); +} + +/** + * gmap_pmdp_xchg - exchange a gmap pmd with another + * @gmap: pointer to the guest address space structure + * @pmdp: pointer to the pmd entry + * @new: replacement entry + * @gaddr: the affected guest address + * + * This function is assumed to be called with the guest_table_lock + * held. + */ +static void gmap_pmdp_xchg(struct gmap *gmap, pmd_t *pmdp, pmd_t new, + unsigned long gaddr) +{ + gaddr &= HPAGE_MASK; + pmdp_notify_gmap(gmap, pmdp, gaddr); + pmd_val(new) &= ~_SEGMENT_ENTRY_GMAP_IN; + if (MACHINE_HAS_TLB_GUEST) + __pmdp_idte(gaddr, (pmd_t *)pmdp, IDTE_GUEST_ASCE, gmap->asce, + IDTE_GLOBAL); + else if (MACHINE_HAS_IDTE) + __pmdp_idte(gaddr, (pmd_t *)pmdp, 0, 0, IDTE_GLOBAL); + else + __pmdp_csp(pmdp); + *pmdp = new; +} + +static void gmap_pmdp_clear(struct mm_struct *mm, unsigned long vmaddr, + int purge) +{ + pmd_t *pmdp; + struct gmap *gmap; + unsigned long gaddr; + + rcu_read_lock(); + list_for_each_entry_rcu(gmap, &mm->context.gmap_list, list) { + spin_lock(&gmap->guest_table_lock); + pmdp = (pmd_t *)radix_tree_delete(&gmap->host_to_guest, + vmaddr >> PMD_SHIFT); + if (pmdp) { + gaddr = __gmap_segment_gaddr((unsigned long *)pmdp); + pmdp_notify_gmap(gmap, pmdp, gaddr); + WARN_ON(pmd_val(*pmdp) & ~(_SEGMENT_ENTRY_HARDWARE_BITS_LARGE | + _SEGMENT_ENTRY_GMAP_UC)); + if (purge) + __pmdp_csp(pmdp); + pmd_val(*pmdp) = _SEGMENT_ENTRY_EMPTY; + } + spin_unlock(&gmap->guest_table_lock); + } + rcu_read_unlock(); +} + +/** + * gmap_pmdp_invalidate - invalidate all affected guest pmd entries without + * flushing + * @mm: pointer to the process mm_struct + * @vmaddr: virtual address in the process address space + */ +void gmap_pmdp_invalidate(struct mm_struct *mm, unsigned long vmaddr) +{ + gmap_pmdp_clear(mm, vmaddr, 0); +} +EXPORT_SYMBOL_GPL(gmap_pmdp_invalidate); + +/** + * gmap_pmdp_csp - csp all affected guest pmd entries + * @mm: pointer to the process mm_struct + * @vmaddr: virtual address in the process address space + */ +void gmap_pmdp_csp(struct mm_struct *mm, unsigned long vmaddr) +{ + gmap_pmdp_clear(mm, vmaddr, 1); +} +EXPORT_SYMBOL_GPL(gmap_pmdp_csp); + +/** + * gmap_pmdp_idte_local - invalidate and clear a guest pmd entry + * @mm: pointer to the process mm_struct + * @vmaddr: virtual address in the process address space + */ +void gmap_pmdp_idte_local(struct mm_struct *mm, unsigned long vmaddr) +{ + unsigned long *entry, gaddr; + struct gmap *gmap; + pmd_t *pmdp; + + rcu_read_lock(); + list_for_each_entry_rcu(gmap, &mm->context.gmap_list, list) { + spin_lock(&gmap->guest_table_lock); + entry = radix_tree_delete(&gmap->host_to_guest, + vmaddr >> PMD_SHIFT); + if (entry) { + pmdp = (pmd_t *)entry; + gaddr = __gmap_segment_gaddr(entry); + pmdp_notify_gmap(gmap, pmdp, gaddr); + WARN_ON(*entry & ~(_SEGMENT_ENTRY_HARDWARE_BITS_LARGE | + _SEGMENT_ENTRY_GMAP_UC)); + if (MACHINE_HAS_TLB_GUEST) + __pmdp_idte(gaddr, pmdp, IDTE_GUEST_ASCE, + gmap->asce, IDTE_LOCAL); + else if (MACHINE_HAS_IDTE) + __pmdp_idte(gaddr, pmdp, 0, 0, IDTE_LOCAL); + *entry = _SEGMENT_ENTRY_EMPTY; + } + spin_unlock(&gmap->guest_table_lock); + } + rcu_read_unlock(); +} +EXPORT_SYMBOL_GPL(gmap_pmdp_idte_local); + +/** + * gmap_pmdp_idte_global - invalidate and clear a guest pmd entry + * @mm: pointer to the process mm_struct + * @vmaddr: virtual address in the process address space + */ +void gmap_pmdp_idte_global(struct mm_struct *mm, unsigned long vmaddr) +{ + unsigned long *entry, gaddr; + struct gmap *gmap; + pmd_t *pmdp; + + rcu_read_lock(); + list_for_each_entry_rcu(gmap, &mm->context.gmap_list, list) { + spin_lock(&gmap->guest_table_lock); + entry = radix_tree_delete(&gmap->host_to_guest, + vmaddr >> PMD_SHIFT); + if (entry) { + pmdp = (pmd_t *)entry; + gaddr = __gmap_segment_gaddr(entry); + pmdp_notify_gmap(gmap, pmdp, gaddr); + WARN_ON(*entry & ~(_SEGMENT_ENTRY_HARDWARE_BITS_LARGE | + _SEGMENT_ENTRY_GMAP_UC)); + if (MACHINE_HAS_TLB_GUEST) + __pmdp_idte(gaddr, pmdp, IDTE_GUEST_ASCE, + gmap->asce, IDTE_GLOBAL); + else if (MACHINE_HAS_IDTE) + __pmdp_idte(gaddr, pmdp, 0, 0, IDTE_GLOBAL); + else + __pmdp_csp(pmdp); + *entry = _SEGMENT_ENTRY_EMPTY; + } + spin_unlock(&gmap->guest_table_lock); + } + rcu_read_unlock(); +} +EXPORT_SYMBOL_GPL(gmap_pmdp_idte_global); + +/** + * gmap_test_and_clear_dirty_pmd - test and reset segment dirty status + * @gmap: pointer to guest address space + * @pmdp: pointer to the pmd to be tested + * @gaddr: virtual address in the guest address space + * + * This function is assumed to be called with the guest_table_lock + * held. + */ +bool gmap_test_and_clear_dirty_pmd(struct gmap *gmap, pmd_t *pmdp, + unsigned long gaddr) +{ + if (pmd_val(*pmdp) & _SEGMENT_ENTRY_INVALID) + return false; + + /* Already protected memory, which did not change is clean */ + if (pmd_val(*pmdp) & _SEGMENT_ENTRY_PROTECT && + !(pmd_val(*pmdp) & _SEGMENT_ENTRY_GMAP_UC)) + return false; + + /* Clear UC indication and reset protection */ + pmd_val(*pmdp) &= ~_SEGMENT_ENTRY_GMAP_UC; + gmap_protect_pmd(gmap, gaddr, pmdp, PROT_READ, 0); + return true; +} + +/** + * gmap_sync_dirty_log_pmd - set bitmap based on dirty status of segment + * @gmap: pointer to guest address space + * @bitmap: dirty bitmap for this pmd + * @gaddr: virtual address in the guest address space + * @vmaddr: virtual address in the host address space + * + * This function is assumed to be called with the guest_table_lock + * held. + */ +void gmap_sync_dirty_log_pmd(struct gmap *gmap, unsigned long bitmap[4], + unsigned long gaddr, unsigned long vmaddr) +{ + int i; + pmd_t *pmdp; + pte_t *ptep; + spinlock_t *ptl; + + pmdp = gmap_pmd_op_walk(gmap, gaddr); + if (!pmdp) + return; + + if (pmd_large(*pmdp)) { + if (gmap_test_and_clear_dirty_pmd(gmap, pmdp, gaddr)) + bitmap_fill(bitmap, _PAGE_ENTRIES); + } else { + for (i = 0; i < _PAGE_ENTRIES; i++, vmaddr += PAGE_SIZE) { + ptep = pte_alloc_map_lock(gmap->mm, pmdp, vmaddr, &ptl); + if (!ptep) + continue; + if (ptep_test_and_clear_uc(gmap->mm, vmaddr, ptep)) + set_bit(i, bitmap); + spin_unlock(ptl); + } + } + gmap_pmd_op_end(gmap, pmdp); +} +EXPORT_SYMBOL_GPL(gmap_sync_dirty_log_pmd); + static inline void thp_split_mm(struct mm_struct *mm) { #ifdef CONFIG_TRANSPARENT_HUGEPAGE @@ -2168,17 +2548,45 @@ EXPORT_SYMBOL_GPL(s390_enable_sie); * Enable storage key handling from now on and initialize the storage * keys with the default key. */ -static int __s390_enable_skey(pte_t *pte, unsigned long addr, - unsigned long next, struct mm_walk *walk) +static int __s390_enable_skey_pte(pte_t *pte, unsigned long addr, + unsigned long next, struct mm_walk *walk) { /* Clear storage key */ ptep_zap_key(walk->mm, addr, pte); return 0; } +static int __s390_enable_skey_hugetlb(pte_t *pte, unsigned long addr, + unsigned long hmask, unsigned long next, + struct mm_walk *walk) +{ + pmd_t *pmd = (pmd_t *)pte; + unsigned long start, end; + struct page *page = pmd_page(*pmd); + + /* + * The write check makes sure we do not set a key on shared + * memory. This is needed as the walker does not differentiate + * between actual guest memory and the process executable or + * shared libraries. + */ + if (pmd_val(*pmd) & _SEGMENT_ENTRY_INVALID || + !(pmd_val(*pmd) & _SEGMENT_ENTRY_WRITE)) + return 0; + + start = pmd_val(*pmd) & HPAGE_MASK; + end = start + HPAGE_SIZE - 1; + __storage_key_init_range(start, end); + set_bit(PG_arch_1, &page->flags); + return 0; +} + int s390_enable_skey(void) { - struct mm_walk walk = { .pte_entry = __s390_enable_skey }; + struct mm_walk walk = { + .hugetlb_entry = __s390_enable_skey_hugetlb, + .pte_entry = __s390_enable_skey_pte, + }; struct mm_struct *mm = current->mm; struct vm_area_struct *vma; int rc = 0; diff --git a/arch/s390/mm/hugetlbpage.c b/arch/s390/mm/hugetlbpage.c index e804090f4470..b0246c705a19 100644 --- a/arch/s390/mm/hugetlbpage.c +++ b/arch/s390/mm/hugetlbpage.c @@ -123,6 +123,29 @@ static inline pte_t __rste_to_pte(unsigned long rste) return pte; } +static void clear_huge_pte_skeys(struct mm_struct *mm, unsigned long rste) +{ + struct page *page; + unsigned long size, paddr; + + if (!mm_uses_skeys(mm) || + rste & _SEGMENT_ENTRY_INVALID) + return; + + if ((rste & _REGION_ENTRY_TYPE_MASK) == _REGION_ENTRY_TYPE_R3) { + page = pud_page(__pud(rste)); + size = PUD_SIZE; + paddr = rste & PUD_MASK; + } else { + page = pmd_page(__pmd(rste)); + size = PMD_SIZE; + paddr = rste & PMD_MASK; + } + + if (!test_and_set_bit(PG_arch_1, &page->flags)) + __storage_key_init_range(paddr, paddr + size - 1); +} + void set_huge_pte_at(struct mm_struct *mm, unsigned long addr, pte_t *ptep, pte_t pte) { @@ -137,6 +160,7 @@ void set_huge_pte_at(struct mm_struct *mm, unsigned long addr, rste |= _REGION_ENTRY_TYPE_R3 | _REGION3_ENTRY_LARGE; else rste |= _SEGMENT_ENTRY_LARGE; + clear_huge_pte_skeys(mm, rste); pte_val(*ptep) = rste; } diff --git a/arch/s390/mm/page-states.c b/arch/s390/mm/page-states.c index 382153ff17e3..dc3cede7f2ec 100644 --- a/arch/s390/mm/page-states.c +++ b/arch/s390/mm/page-states.c @@ -271,7 +271,7 @@ void arch_set_page_states(int make_stable) list_for_each(l, &zone->free_area[order].free_list[t]) { page = list_entry(l, struct page, lru); if (make_stable) - set_page_stable_dat(page, 0); + set_page_stable_dat(page, order); else set_page_unused(page, order); } diff --git a/arch/s390/mm/pageattr.c b/arch/s390/mm/pageattr.c index c44171588d08..f8c6faab41f4 100644 --- a/arch/s390/mm/pageattr.c +++ b/arch/s390/mm/pageattr.c @@ -14,7 +14,7 @@ static inline unsigned long sske_frame(unsigned long addr, unsigned char skey) { - asm volatile(".insn rrf,0xb22b0000,%[skey],%[addr],9,0" + asm volatile(".insn rrf,0xb22b0000,%[skey],%[addr],1,0" : [addr] "+a" (addr) : [skey] "d" (skey)); return addr; } @@ -23,8 +23,6 @@ void __storage_key_init_range(unsigned long start, unsigned long end) { unsigned long boundary, size; - if (!PAGE_DEFAULT_KEY) - return; while (start < end) { if (MACHINE_HAS_EDAT1) { /* set storage keys for a 1MB frame */ @@ -37,7 +35,7 @@ void __storage_key_init_range(unsigned long start, unsigned long end) continue; } } - page_set_storage_key(start, PAGE_DEFAULT_KEY, 0); + page_set_storage_key(start, PAGE_DEFAULT_KEY, 1); start += PAGE_SIZE; } } diff --git a/arch/s390/mm/pgalloc.c b/arch/s390/mm/pgalloc.c index e3bd5627afef..76d89ee8b428 100644 --- a/arch/s390/mm/pgalloc.c +++ b/arch/s390/mm/pgalloc.c @@ -28,7 +28,7 @@ static struct ctl_table page_table_sysctl[] = { .data = &page_table_allocate_pgste, .maxlen = sizeof(int), .mode = S_IRUGO | S_IWUSR, - .proc_handler = proc_dointvec, + .proc_handler = proc_dointvec_minmax, .extra1 = &page_table_allocate_pgste_min, .extra2 = &page_table_allocate_pgste_max, }, diff --git a/arch/s390/mm/pgtable.c b/arch/s390/mm/pgtable.c index 301e466e4263..f2cc7da473e4 100644 --- a/arch/s390/mm/pgtable.c +++ b/arch/s390/mm/pgtable.c @@ -347,18 +347,27 @@ static inline void pmdp_idte_local(struct mm_struct *mm, mm->context.asce, IDTE_LOCAL); else __pmdp_idte(addr, pmdp, 0, 0, IDTE_LOCAL); + if (mm_has_pgste(mm) && mm->context.allow_gmap_hpage_1m) + gmap_pmdp_idte_local(mm, addr); } static inline void pmdp_idte_global(struct mm_struct *mm, unsigned long addr, pmd_t *pmdp) { - if (MACHINE_HAS_TLB_GUEST) + if (MACHINE_HAS_TLB_GUEST) { __pmdp_idte(addr, pmdp, IDTE_NODAT | IDTE_GUEST_ASCE, mm->context.asce, IDTE_GLOBAL); - else if (MACHINE_HAS_IDTE) + if (mm_has_pgste(mm) && mm->context.allow_gmap_hpage_1m) + gmap_pmdp_idte_global(mm, addr); + } else if (MACHINE_HAS_IDTE) { __pmdp_idte(addr, pmdp, 0, 0, IDTE_GLOBAL); - else + if (mm_has_pgste(mm) && mm->context.allow_gmap_hpage_1m) + gmap_pmdp_idte_global(mm, addr); + } else { __pmdp_csp(pmdp); + if (mm_has_pgste(mm) && mm->context.allow_gmap_hpage_1m) + gmap_pmdp_csp(mm, addr); + } } static inline pmd_t pmdp_flush_direct(struct mm_struct *mm, @@ -392,6 +401,8 @@ static inline pmd_t pmdp_flush_lazy(struct mm_struct *mm, cpumask_of(smp_processor_id()))) { pmd_val(*pmdp) |= _SEGMENT_ENTRY_INVALID; mm->context.flush_mm = 1; + if (mm_has_pgste(mm)) + gmap_pmdp_invalidate(mm, addr); } else { pmdp_idte_global(mm, addr, pmdp); } @@ -399,6 +410,24 @@ static inline pmd_t pmdp_flush_lazy(struct mm_struct *mm, return old; } +static pmd_t *pmd_alloc_map(struct mm_struct *mm, unsigned long addr) +{ + pgd_t *pgd; + p4d_t *p4d; + pud_t *pud; + pmd_t *pmd; + + pgd = pgd_offset(mm, addr); + p4d = p4d_alloc(mm, pgd, addr); + if (!p4d) + return NULL; + pud = pud_alloc(mm, p4d, addr); + if (!pud) + return NULL; + pmd = pmd_alloc(mm, pud, addr); + return pmd; +} + pmd_t pmdp_xchg_direct(struct mm_struct *mm, unsigned long addr, pmd_t *pmdp, pmd_t new) { @@ -693,40 +722,14 @@ void ptep_zap_key(struct mm_struct *mm, unsigned long addr, pte_t *ptep) /* * Test and reset if a guest page is dirty */ -bool test_and_clear_guest_dirty(struct mm_struct *mm, unsigned long addr) +bool ptep_test_and_clear_uc(struct mm_struct *mm, unsigned long addr, + pte_t *ptep) { - spinlock_t *ptl; - pgd_t *pgd; - p4d_t *p4d; - pud_t *pud; - pmd_t *pmd; pgste_t pgste; - pte_t *ptep; pte_t pte; bool dirty; int nodat; - pgd = pgd_offset(mm, addr); - p4d = p4d_alloc(mm, pgd, addr); - if (!p4d) - return false; - pud = pud_alloc(mm, p4d, addr); - if (!pud) - return false; - pmd = pmd_alloc(mm, pud, addr); - if (!pmd) - return false; - /* We can't run guests backed by huge pages, but userspace can - * still set them up and then try to migrate them without any - * migration support. - */ - if (pmd_large(*pmd)) - return true; - - ptep = pte_alloc_map_lock(mm, pmd, addr, &ptl); - if (unlikely(!ptep)) - return false; - pgste = pgste_get_lock(ptep); dirty = !!(pgste_val(pgste) & PGSTE_UC_BIT); pgste_val(pgste) &= ~PGSTE_UC_BIT; @@ -742,21 +745,43 @@ bool test_and_clear_guest_dirty(struct mm_struct *mm, unsigned long addr) *ptep = pte; } pgste_set_unlock(ptep, pgste); - - spin_unlock(ptl); return dirty; } -EXPORT_SYMBOL_GPL(test_and_clear_guest_dirty); +EXPORT_SYMBOL_GPL(ptep_test_and_clear_uc); int set_guest_storage_key(struct mm_struct *mm, unsigned long addr, unsigned char key, bool nq) { - unsigned long keyul; + unsigned long keyul, paddr; spinlock_t *ptl; pgste_t old, new; + pmd_t *pmdp; pte_t *ptep; - ptep = get_locked_pte(mm, addr, &ptl); + pmdp = pmd_alloc_map(mm, addr); + if (unlikely(!pmdp)) + return -EFAULT; + + ptl = pmd_lock(mm, pmdp); + if (!pmd_present(*pmdp)) { + spin_unlock(ptl); + return -EFAULT; + } + + if (pmd_large(*pmdp)) { + paddr = pmd_val(*pmdp) & HPAGE_MASK; + paddr |= addr & ~HPAGE_MASK; + /* + * Huge pmds need quiescing operations, they are + * always mapped. + */ + page_set_storage_key(paddr, key, 1); + spin_unlock(ptl); + return 0; + } + spin_unlock(ptl); + + ptep = pte_alloc_map_lock(mm, pmdp, addr, &ptl); if (unlikely(!ptep)) return -EFAULT; @@ -767,14 +792,14 @@ int set_guest_storage_key(struct mm_struct *mm, unsigned long addr, pgste_val(new) |= (keyul & (_PAGE_CHANGED | _PAGE_REFERENCED)) << 48; pgste_val(new) |= (keyul & (_PAGE_ACC_BITS | _PAGE_FP_BIT)) << 56; if (!(pte_val(*ptep) & _PAGE_INVALID)) { - unsigned long address, bits, skey; + unsigned long bits, skey; - address = pte_val(*ptep) & PAGE_MASK; - skey = (unsigned long) page_get_storage_key(address); + paddr = pte_val(*ptep) & PAGE_MASK; + skey = (unsigned long) page_get_storage_key(paddr); bits = skey & (_PAGE_CHANGED | _PAGE_REFERENCED); skey = key & (_PAGE_ACC_BITS | _PAGE_FP_BIT); /* Set storage key ACC and FP */ - page_set_storage_key(address, skey, !nq); + page_set_storage_key(paddr, skey, !nq); /* Merge host changed & referenced into pgste */ pgste_val(new) |= bits << 52; } @@ -830,11 +855,32 @@ EXPORT_SYMBOL(cond_set_guest_storage_key); int reset_guest_reference_bit(struct mm_struct *mm, unsigned long addr) { spinlock_t *ptl; + unsigned long paddr; pgste_t old, new; + pmd_t *pmdp; pte_t *ptep; int cc = 0; - ptep = get_locked_pte(mm, addr, &ptl); + pmdp = pmd_alloc_map(mm, addr); + if (unlikely(!pmdp)) + return -EFAULT; + + ptl = pmd_lock(mm, pmdp); + if (!pmd_present(*pmdp)) { + spin_unlock(ptl); + return -EFAULT; + } + + if (pmd_large(*pmdp)) { + paddr = pmd_val(*pmdp) & HPAGE_MASK; + paddr |= addr & ~HPAGE_MASK; + cc = page_reset_referenced(paddr); + spin_unlock(ptl); + return cc; + } + spin_unlock(ptl); + + ptep = pte_alloc_map_lock(mm, pmdp, addr, &ptl); if (unlikely(!ptep)) return -EFAULT; @@ -843,7 +889,8 @@ int reset_guest_reference_bit(struct mm_struct *mm, unsigned long addr) pgste_val(new) &= ~PGSTE_GR_BIT; if (!(pte_val(*ptep) & _PAGE_INVALID)) { - cc = page_reset_referenced(pte_val(*ptep) & PAGE_MASK); + paddr = pte_val(*ptep) & PAGE_MASK; + cc = page_reset_referenced(paddr); /* Merge real referenced bit into host-set */ pgste_val(new) |= ((unsigned long) cc << 53) & PGSTE_HR_BIT; } @@ -862,18 +909,42 @@ EXPORT_SYMBOL(reset_guest_reference_bit); int get_guest_storage_key(struct mm_struct *mm, unsigned long addr, unsigned char *key) { + unsigned long paddr; spinlock_t *ptl; pgste_t pgste; + pmd_t *pmdp; pte_t *ptep; - ptep = get_locked_pte(mm, addr, &ptl); + pmdp = pmd_alloc_map(mm, addr); + if (unlikely(!pmdp)) + return -EFAULT; + + ptl = pmd_lock(mm, pmdp); + if (!pmd_present(*pmdp)) { + /* Not yet mapped memory has a zero key */ + spin_unlock(ptl); + *key = 0; + return 0; + } + + if (pmd_large(*pmdp)) { + paddr = pmd_val(*pmdp) & HPAGE_MASK; + paddr |= addr & ~HPAGE_MASK; + *key = page_get_storage_key(paddr); + spin_unlock(ptl); + return 0; + } + spin_unlock(ptl); + + ptep = pte_alloc_map_lock(mm, pmdp, addr, &ptl); if (unlikely(!ptep)) return -EFAULT; pgste = pgste_get_lock(ptep); *key = (pgste_val(pgste) & (PGSTE_ACC_BITS | PGSTE_FP_BIT)) >> 56; + paddr = pte_val(*ptep) & PAGE_MASK; if (!(pte_val(*ptep) & _PAGE_INVALID)) - *key = page_get_storage_key(pte_val(*ptep) & PAGE_MASK); + *key = page_get_storage_key(paddr); /* Reflect guest's logical view, not physical */ *key |= (pgste_val(pgste) & (PGSTE_GR_BIT | PGSTE_GC_BIT)) >> 48; pgste_set_unlock(ptep, pgste); diff --git a/arch/s390/net/bpf_jit_comp.c b/arch/s390/net/bpf_jit_comp.c index 5f0234ec8038..d7052cbe984f 100644 --- a/arch/s390/net/bpf_jit_comp.c +++ b/arch/s390/net/bpf_jit_comp.c @@ -485,8 +485,6 @@ static void bpf_jit_epilogue(struct bpf_jit *jit, u32 stack_depth) /* br %r1 */ _EMIT2(0x07f1); } else { - /* larl %r1,.+14 */ - EMIT6_PCREL_RILB(0xc0000000, REG_1, jit->prg + 14); /* ex 0,S390_lowcore.br_r1_tampoline */ EMIT4_DISP(0x44000000, REG_0, REG_0, offsetof(struct lowcore, br_r1_trampoline)); diff --git a/arch/s390/numa/numa.c b/arch/s390/numa/numa.c index 06a80434cfe6..5bd374491f94 100644 --- a/arch/s390/numa/numa.c +++ b/arch/s390/numa/numa.c @@ -134,6 +134,8 @@ void __init numa_setup(void) { pr_info("NUMA mode: %s\n", mode->name); nodes_clear(node_possible_map); + /* Initially attach all possible CPUs to node 0. */ + cpumask_copy(&node_to_cpumask_map[0], cpu_possible_mask); if (mode->setup) mode->setup(); numa_setup_memory(); @@ -141,20 +143,6 @@ void __init numa_setup(void) } /* - * numa_init_early() - Initialization initcall - * - * This runs when only one CPU is online and before the first - * topology update is called for by the scheduler. - */ -static int __init numa_init_early(void) -{ - /* Attach all possible CPUs to node 0 for now. */ - cpumask_copy(&node_to_cpumask_map[0], cpu_possible_mask); - return 0; -} -early_initcall(numa_init_early); - -/* * numa_init_late() - Initialization initcall * * Register NUMA nodes. diff --git a/arch/s390/pci/pci_debug.c b/arch/s390/pci/pci_debug.c index b482e95b6249..57f7cdac70a3 100644 --- a/arch/s390/pci/pci_debug.c +++ b/arch/s390/pci/pci_debug.c @@ -48,6 +48,10 @@ static char *pci_fmt2_names[] = { "Maximum work units", }; +static char *pci_fmt3_names[] = { + "Transmitted bytes", +}; + static char *pci_sw_names[] = { "Allocated pages", "Mapped pages", @@ -112,6 +116,10 @@ static int pci_perf_show(struct seq_file *m, void *v) pci_fmb_show(m, pci_fmt2_names, ARRAY_SIZE(pci_fmt2_names), &zdev->fmb->fmt2.consumed_work_units); break; + case 3: + pci_fmb_show(m, pci_fmt3_names, ARRAY_SIZE(pci_fmt3_names), + &zdev->fmb->fmt3.tx_bytes); + break; default: seq_puts(m, "Unknown format\n"); } diff --git a/arch/s390/purgatory/Makefile b/arch/s390/purgatory/Makefile index 1ace023cbdce..8d61218a71aa 100644 --- a/arch/s390/purgatory/Makefile +++ b/arch/s390/purgatory/Makefile @@ -7,13 +7,13 @@ purgatory-y := head.o purgatory.o string.o sha256.o mem.o targets += $(purgatory-y) purgatory.ro kexec-purgatory.c PURGATORY_OBJS = $(addprefix $(obj)/,$(purgatory-y)) -$(obj)/sha256.o: $(srctree)/lib/sha256.c +$(obj)/sha256.o: $(srctree)/lib/sha256.c FORCE $(call if_changed_rule,cc_o_c) -$(obj)/mem.o: $(srctree)/arch/s390/lib/mem.S +$(obj)/mem.o: $(srctree)/arch/s390/lib/mem.S FORCE $(call if_changed_rule,as_o_S) -$(obj)/string.o: $(srctree)/arch/s390/lib/string.c +$(obj)/string.o: $(srctree)/arch/s390/lib/string.c FORCE $(call if_changed_rule,cc_o_c) LDFLAGS_purgatory.ro := -e purgatory_start -r --no-undefined -nostdlib @@ -21,8 +21,9 @@ LDFLAGS_purgatory.ro += -z nodefaultlib KBUILD_CFLAGS := -fno-strict-aliasing -Wall -Wstrict-prototypes KBUILD_CFLAGS += -Wno-pointer-sign -Wno-sign-compare KBUILD_CFLAGS += -fno-zero-initialized-in-bss -fno-builtin -ffreestanding -KBUILD_CFLAGS += -c -MD -Os -m64 -msoft-float +KBUILD_CFLAGS += -c -MD -Os -m64 -msoft-float -fno-common KBUILD_CFLAGS += $(call cc-option,-fno-PIE) +KBUILD_AFLAGS := $(filter-out -DCC_USING_EXPOLINE,$(KBUILD_AFLAGS)) $(obj)/purgatory.ro: $(PURGATORY_OBJS) FORCE $(call if_changed,ld) diff --git a/arch/s390/purgatory/head.S b/arch/s390/purgatory/head.S index 660c96a05a9b..2e3707b12edd 100644 --- a/arch/s390/purgatory/head.S +++ b/arch/s390/purgatory/head.S @@ -243,33 +243,26 @@ gprregs: .quad 0 .endr -purgatory_sha256_digest: - .global purgatory_sha256_digest - .rept 32 /* SHA256_DIGEST_SIZE */ - .byte 0 - .endr - -purgatory_sha_regions: - .global purgatory_sha_regions - .rept 16 * __KEXEC_SHA_REGION_SIZE /* KEXEC_SEGMENTS_MAX */ - .byte 0 - .endr - -kernel_entry: - .global kernel_entry - .quad 0 - -kernel_type: - .global kernel_type - .quad 0 - -crash_start: - .global crash_start - .quad 0 +/* Macro to define a global variable with name and size (in bytes) to be + * shared with C code. + * + * Add the .size and .type attribute to satisfy checks on the Elf_Sym during + * purgatory load. + */ +.macro GLOBAL_VARIABLE name,size +\name: + .global \name + .size \name,\size + .type \name,object + .skip \size,0 +.endm -crash_size: - .global crash_size - .quad 0 +GLOBAL_VARIABLE purgatory_sha256_digest,32 +GLOBAL_VARIABLE purgatory_sha_regions,16*__KEXEC_SHA_REGION_SIZE +GLOBAL_VARIABLE kernel_entry,8 +GLOBAL_VARIABLE kernel_type,8 +GLOBAL_VARIABLE crash_start,8 +GLOBAL_VARIABLE crash_size,8 .align PAGE_SIZE stack: diff --git a/arch/s390/purgatory/purgatory.c b/arch/s390/purgatory/purgatory.c index 4e2beb3c29b7..3528e6da4e87 100644 --- a/arch/s390/purgatory/purgatory.c +++ b/arch/s390/purgatory/purgatory.c @@ -12,15 +12,6 @@ #include <linux/string.h> #include <asm/purgatory.h> -struct kexec_sha_region purgatory_sha_regions[KEXEC_SEGMENT_MAX]; -u8 purgatory_sha256_digest[SHA256_DIGEST_SIZE]; - -u64 kernel_entry; -u64 kernel_type; - -u64 crash_start; -u64 crash_size; - int verify_sha256_digest(void) { struct kexec_sha_region *ptr, *end; diff --git a/arch/s390/scripts/Makefile.chkbss b/arch/s390/scripts/Makefile.chkbss index d92f2d94a5d9..9bba2c14e0ca 100644 --- a/arch/s390/scripts/Makefile.chkbss +++ b/arch/s390/scripts/Makefile.chkbss @@ -2,13 +2,22 @@ quiet_cmd_chkbss = CHKBSS $< define cmd_chkbss + rm -f $@; \ if ! $(OBJDUMP) -j .bss -w -h $< | awk 'END { if ($$3) exit 1 }'; then \ echo "error: $< .bss section is not empty" >&2; exit 1; \ fi; \ touch $@; endef -$(obj)/built-in.a: $(patsubst %, $(obj)/%.chkbss, $(chkbss)) +chkbss-target ?= $(obj)/built-in.a +ifneq (,$(findstring /,$(chkbss))) +chkbss-files := $(patsubst %, %.chkbss, $(chkbss)) +else +chkbss-files := $(patsubst %, $(obj)/%.chkbss, $(chkbss)) +endif + +$(chkbss-target): $(chkbss-files) +targets += $(notdir $(chkbss-files)) %.o.chkbss: %.o $(call cmd,chkbss) diff --git a/arch/s390/tools/gen_opcode_table.c b/arch/s390/tools/gen_opcode_table.c index 259aa0680d1a..a1bc02b29c81 100644 --- a/arch/s390/tools/gen_opcode_table.c +++ b/arch/s390/tools/gen_opcode_table.c @@ -257,7 +257,7 @@ static void add_to_group(struct gen_opcode *desc, struct insn *insn, int offset) if (!desc->group) exit(EXIT_FAILURE); group = &desc->group[desc->nr_groups - 1]; - strncpy(group->opcode, insn->opcode, 2); + memcpy(group->opcode, insn->opcode, 2); group->type = insn->type; group->offset = offset; group->count = 1; @@ -283,7 +283,7 @@ static void print_opcode_table(struct gen_opcode *desc) continue; add_to_group(desc, insn, offset); if (strncmp(opcode, insn->opcode, 2)) { - strncpy(opcode, insn->opcode, 2); + memcpy(opcode, insn->opcode, 2); printf("\t/* %.2s */ \\\n", opcode); } print_opcode(insn, offset); |