summaryrefslogtreecommitdiffstats
path: root/arch/s390
diff options
context:
space:
mode:
Diffstat (limited to 'arch/s390')
-rw-r--r--arch/s390/Kconfig28
-rw-r--r--arch/s390/Makefile2
-rw-r--r--arch/s390/appldata/appldata_base.c10
-rw-r--r--arch/s390/boot/Makefile6
-rw-r--r--arch/s390/boot/alternative.c3
-rw-r--r--arch/s390/boot/boot.h4
-rwxr-xr-xarch/s390/boot/install.sh2
-rw-r--r--arch/s390/boot/ipl_parm.c7
-rw-r--r--arch/s390/boot/ipl_report.c2
-rw-r--r--arch/s390/boot/kmsan.c6
-rw-r--r--arch/s390/boot/pgm_check_info.c18
-rw-r--r--arch/s390/boot/physmem_info.c8
-rw-r--r--arch/s390/boot/startup.c117
-rw-r--r--arch/s390/boot/string.c16
-rw-r--r--arch/s390/boot/uv.c8
-rw-r--r--arch/s390/boot/uv.h13
-rw-r--r--arch/s390/boot/vmem.c49
-rw-r--r--arch/s390/boot/vmlinux.lds.S6
-rw-r--r--arch/s390/configs/debug_defconfig49
-rw-r--r--arch/s390/configs/defconfig46
-rw-r--r--arch/s390/configs/zfcpdump_defconfig5
-rw-r--r--arch/s390/crypto/crc32-vx.c1
-rw-r--r--arch/s390/hypfs/hypfs_dbfs.c4
-rw-r--r--arch/s390/hypfs/hypfs_diag.c17
-rw-r--r--arch/s390/include/asm/abs_lowcore.h8
-rw-r--r--arch/s390/include/asm/alternative-asm.h57
-rw-r--r--arch/s390/include/asm/alternative.h154
-rw-r--r--arch/s390/include/asm/arch_hweight.h76
-rw-r--r--arch/s390/include/asm/atomic_ops.h79
-rw-r--r--arch/s390/include/asm/bitops.h3
-rw-r--r--arch/s390/include/asm/ccwdev.h2
-rw-r--r--arch/s390/include/asm/checksum.h2
-rw-r--r--arch/s390/include/asm/cpacf.h3
-rw-r--r--arch/s390/include/asm/cpu_mf.h6
-rw-r--r--arch/s390/include/asm/current.h2
-rw-r--r--arch/s390/include/asm/dat-bits.h170
-rw-r--r--arch/s390/include/asm/diag.h8
-rw-r--r--arch/s390/include/asm/elf.h8
-rw-r--r--arch/s390/include/asm/entry-common.h2
-rw-r--r--arch/s390/include/asm/facility.h5
-rw-r--r--arch/s390/include/asm/ftrace.h1
-rw-r--r--arch/s390/include/asm/hardirq.h6
-rw-r--r--arch/s390/include/asm/hugetlb.h4
-rw-r--r--arch/s390/include/asm/irqflags.h17
-rw-r--r--arch/s390/include/asm/kmsan.h59
-rw-r--r--arch/s390/include/asm/kvm_host.h10
-rw-r--r--arch/s390/include/asm/lowcore.h35
-rw-r--r--arch/s390/include/asm/mmu_context.h8
-rw-r--r--arch/s390/include/asm/nospec-branch.h9
-rw-r--r--arch/s390/include/asm/page.h10
-rw-r--r--arch/s390/include/asm/pai.h17
-rw-r--r--arch/s390/include/asm/percpu.h2
-rw-r--r--arch/s390/include/asm/pgtable.h45
-rw-r--r--arch/s390/include/asm/preempt.h30
-rw-r--r--arch/s390/include/asm/processor.h32
-rw-r--r--arch/s390/include/asm/runtime-const.h77
-rw-r--r--arch/s390/include/asm/sclp.h1
-rw-r--r--arch/s390/include/asm/setup.h36
-rw-r--r--arch/s390/include/asm/smp.h5
-rw-r--r--arch/s390/include/asm/softirq_stack.h2
-rw-r--r--arch/s390/include/asm/spinlock.h4
-rw-r--r--arch/s390/include/asm/stacktrace.h1
-rw-r--r--arch/s390/include/asm/string.h20
-rw-r--r--arch/s390/include/asm/thread_info.h3
-rw-r--r--arch/s390/include/asm/timex.h10
-rw-r--r--arch/s390/include/asm/uaccess.h121
-rw-r--r--arch/s390/include/asm/unistd.h1
-rw-r--r--arch/s390/include/asm/uv.h43
-rw-r--r--arch/s390/include/asm/vtime.h16
-rw-r--r--arch/s390/kernel/Makefile3
-rw-r--r--arch/s390/kernel/abs_lowcore.c1
-rw-r--r--arch/s390/kernel/alternative.c75
-rw-r--r--arch/s390/kernel/asm-offsets.c6
-rw-r--r--arch/s390/kernel/crash_dump.c54
-rw-r--r--arch/s390/kernel/debug.c2
-rw-r--r--arch/s390/kernel/diag.c22
-rw-r--r--arch/s390/kernel/dumpstack.c8
-rw-r--r--arch/s390/kernel/early.c45
-rw-r--r--arch/s390/kernel/entry.S259
-rw-r--r--arch/s390/kernel/fpu.c2
-rw-r--r--arch/s390/kernel/ftrace.c2
-rw-r--r--arch/s390/kernel/head64.S8
-rw-r--r--arch/s390/kernel/idle.c11
-rw-r--r--arch/s390/kernel/ipl.c2
-rw-r--r--arch/s390/kernel/irq.c18
-rw-r--r--arch/s390/kernel/machine_kexec.c6
-rw-r--r--arch/s390/kernel/nmi.c31
-rw-r--r--arch/s390/kernel/nospec-branch.c16
-rw-r--r--arch/s390/kernel/nospec-sysfs.c2
-rw-r--r--arch/s390/kernel/perf_cpum_cf.c14
-rw-r--r--arch/s390/kernel/perf_cpum_sf.c2
-rw-r--r--arch/s390/kernel/perf_pai_crypto.c183
-rw-r--r--arch/s390/kernel/perf_pai_ext.c146
-rw-r--r--arch/s390/kernel/process.c6
-rw-r--r--arch/s390/kernel/processor.c20
-rw-r--r--arch/s390/kernel/reipl.S26
-rw-r--r--arch/s390/kernel/setup.c50
-rw-r--r--arch/s390/kernel/smp.c221
-rw-r--r--arch/s390/kernel/sthyi.c95
-rw-r--r--arch/s390/kernel/syscall.c31
-rw-r--r--arch/s390/kernel/syscalls/syscall.tbl2
-rw-r--r--arch/s390/kernel/time.c22
-rw-r--r--arch/s390/kernel/topology.c10
-rw-r--r--arch/s390/kernel/traps.c34
-rw-r--r--arch/s390/kernel/unwind_bc.c4
-rw-r--r--arch/s390/kernel/uv.c242
-rw-r--r--arch/s390/kernel/vmlinux.lds.S24
-rw-r--r--arch/s390/kernel/vtime.c82
-rw-r--r--arch/s390/kvm/gaccess.c163
-rw-r--r--arch/s390/kvm/kvm-s390.c20
-rw-r--r--arch/s390/kvm/kvm-s390.h22
-rw-r--r--arch/s390/kvm/priv.c32
-rw-r--r--arch/s390/kvm/vsie.c24
-rw-r--r--arch/s390/lib/spinlock.c8
-rw-r--r--arch/s390/lib/test_kprobes.c1
-rw-r--r--arch/s390/lib/test_modules.c1
-rw-r--r--arch/s390/lib/test_unwind.c3
-rw-r--r--arch/s390/lib/uaccess.c4
-rw-r--r--arch/s390/mm/cmm.c7
-rw-r--r--arch/s390/mm/dump_pagetables.c156
-rw-r--r--arch/s390/mm/fault.c33
-rw-r--r--arch/s390/mm/gmap.c16
-rw-r--r--arch/s390/mm/hugetlbpage.c12
-rw-r--r--arch/s390/mm/init.c12
-rw-r--r--arch/s390/mm/maccess.c4
-rw-r--r--arch/s390/mm/pageattr.c2
-rw-r--r--arch/s390/mm/pgalloc.c8
-rw-r--r--arch/s390/mm/vmem.c13
-rw-r--r--arch/s390/net/bpf_jit_comp.c489
-rw-r--r--arch/s390/pci/pci.c2
-rw-r--r--arch/s390/pci/pci_irq.c112
-rw-r--r--arch/s390/tools/relocs.c2
132 files changed, 2928 insertions, 1642 deletions
diff --git a/arch/s390/Kconfig b/arch/s390/Kconfig
index c59d2b54df49..c60e699e99f5 100644
--- a/arch/s390/Kconfig
+++ b/arch/s390/Kconfig
@@ -21,7 +21,7 @@ config ARCH_PROC_KCORE_TEXT
def_bool y
config GENERIC_HWEIGHT
- def_bool y
+ def_bool !HAVE_MARCH_Z196_FEATURES
config GENERIC_BUG
def_bool y if BUG
@@ -142,6 +142,7 @@ config S390
select FUNCTION_ALIGNMENT_8B if CC_IS_GCC
select FUNCTION_ALIGNMENT_16B if !CC_IS_GCC
select GENERIC_ALLOCATOR
+ select GENERIC_CPU_DEVICES
select GENERIC_CPU_AUTOPROBE
select GENERIC_CPU_VULNERABILITIES
select GENERIC_ENTRY
@@ -158,6 +159,7 @@ config S390
select HAVE_ARCH_KASAN
select HAVE_ARCH_KASAN_VMALLOC
select HAVE_ARCH_KCSAN
+ select HAVE_ARCH_KMSAN
select HAVE_ARCH_KFENCE
select HAVE_ARCH_RANDOMIZE_KSTACK_OFFSET
select HAVE_ARCH_SECCOMP_FILTER
@@ -602,6 +604,19 @@ config RANDOMIZE_BASE
as a security feature that deters exploit attempts relying on
knowledge of the location of kernel internals.
+config RANDOMIZE_IDENTITY_BASE
+ bool "Randomize the address of the identity mapping base"
+ depends on RANDOMIZE_BASE
+ default DEBUG_VM
+ help
+ The identity mapping base address is pinned to zero by default.
+ Allow randomization of that base to expose otherwise missed
+ notion of physical and virtual addresses of data structures.
+ That does not have any impact on the base address at which the
+ kernel image is loaded.
+
+ If unsure, say N
+
config KERNEL_IMAGE_BASE
hex "Kernel image base address"
range 0x100000 0x1FFFFFE0000000 if !KASAN
@@ -797,17 +812,6 @@ config HAVE_PNETID
menu "Virtualization"
-config PROTECTED_VIRTUALIZATION_GUEST
- def_bool n
- prompt "Protected virtualization guest support"
- help
- Select this option, if you want to be able to run this
- kernel as a protected virtualization KVM guest.
- Protected virtualization capable machines have a mini hypervisor
- located at machine level (an ultravisor). With help of the
- Ultravisor, KVM will be able to run "protected" VMs, special
- VMs whose memory and management data are unavailable to KVM.
-
config PFAULT
def_bool y
prompt "Pseudo page fault support"
diff --git a/arch/s390/Makefile b/arch/s390/Makefile
index f2b21c7a70ef..7fd57398221e 100644
--- a/arch/s390/Makefile
+++ b/arch/s390/Makefile
@@ -36,7 +36,7 @@ KBUILD_CFLAGS_DECOMPRESSOR += $(if $(CONFIG_DEBUG_INFO_DWARF4), $(call cc-option
KBUILD_CFLAGS_DECOMPRESSOR += $(if $(CONFIG_CC_NO_ARRAY_BOUNDS),-Wno-array-bounds)
UTS_MACHINE := s390x
-STACK_SIZE := $(if $(CONFIG_KASAN),65536,16384)
+STACK_SIZE := $(if $(CONFIG_KASAN),65536,$(if $(CONFIG_KMSAN),65536,16384))
CHECKFLAGS += -D__s390__ -D__s390x__
export LD_BFD
diff --git a/arch/s390/appldata/appldata_base.c b/arch/s390/appldata/appldata_base.c
index c2978cb03b36..91a30e017d65 100644
--- a/arch/s390/appldata/appldata_base.c
+++ b/arch/s390/appldata/appldata_base.c
@@ -46,9 +46,9 @@
* /proc entries (sysctl)
*/
static const char appldata_proc_name[APPLDATA_PROC_NAME_LENGTH] = "appldata";
-static int appldata_timer_handler(struct ctl_table *ctl, int write,
+static int appldata_timer_handler(const struct ctl_table *ctl, int write,
void *buffer, size_t *lenp, loff_t *ppos);
-static int appldata_interval_handler(struct ctl_table *ctl, int write,
+static int appldata_interval_handler(const struct ctl_table *ctl, int write,
void *buffer, size_t *lenp, loff_t *ppos);
static struct ctl_table_header *appldata_sysctl_header;
@@ -199,7 +199,7 @@ static void __appldata_vtimer_setup(int cmd)
* Start/Stop timer, show status of timer (0 = not active, 1 = active)
*/
static int
-appldata_timer_handler(struct ctl_table *ctl, int write,
+appldata_timer_handler(const struct ctl_table *ctl, int write,
void *buffer, size_t *lenp, loff_t *ppos)
{
int timer_active = appldata_timer_active;
@@ -232,7 +232,7 @@ appldata_timer_handler(struct ctl_table *ctl, int write,
* current timer interval.
*/
static int
-appldata_interval_handler(struct ctl_table *ctl, int write,
+appldata_interval_handler(const struct ctl_table *ctl, int write,
void *buffer, size_t *lenp, loff_t *ppos)
{
int interval = appldata_interval;
@@ -262,7 +262,7 @@ appldata_interval_handler(struct ctl_table *ctl, int write,
* monitoring (0 = not in process, 1 = in process)
*/
static int
-appldata_generic_handler(struct ctl_table *ctl, int write,
+appldata_generic_handler(const struct ctl_table *ctl, int write,
void *buffer, size_t *lenp, loff_t *ppos)
{
struct appldata_ops *ops = NULL, *tmp_ops;
diff --git a/arch/s390/boot/Makefile b/arch/s390/boot/Makefile
index 070c9b2e905f..4f476884d340 100644
--- a/arch/s390/boot/Makefile
+++ b/arch/s390/boot/Makefile
@@ -3,11 +3,13 @@
# Makefile for the linux s390-specific parts of the memory manager.
#
+# Tooling runtimes are unavailable and cannot be linked for early boot code
KCOV_INSTRUMENT := n
GCOV_PROFILE := n
UBSAN_SANITIZE := n
KASAN_SANITIZE := n
KCSAN_SANITIZE := n
+KMSAN_SANITIZE := n
KBUILD_AFLAGS := $(KBUILD_AFLAGS_DECOMPRESSOR)
KBUILD_CFLAGS := $(KBUILD_CFLAGS_DECOMPRESSOR)
@@ -37,11 +39,11 @@ CFLAGS_sclp_early_core.o += -I$(srctree)/drivers/s390/char
obj-y := head.o als.o startup.o physmem_info.o ipl_parm.o ipl_report.o vmem.o
obj-y += string.o ebcdic.o sclp_early_core.o mem.o ipl_vmparm.o cmdline.o
-obj-y += version.o pgm_check_info.o ctype.o ipl_data.o relocs.o
-obj-$(findstring y, $(CONFIG_PROTECTED_VIRTUALIZATION_GUEST) $(CONFIG_PGSTE)) += uv.o
+obj-y += version.o pgm_check_info.o ctype.o ipl_data.o relocs.o alternative.o uv.o
obj-$(CONFIG_RANDOMIZE_BASE) += kaslr.o
obj-y += $(if $(CONFIG_KERNEL_UNCOMPRESSED),,decompressor.o) info.o
obj-$(CONFIG_KERNEL_ZSTD) += clz_ctz.o
+obj-$(CONFIG_KMSAN) += kmsan.o
obj-all := $(obj-y) piggy.o syms.o
targets := bzImage section_cmp.boot.data section_cmp.boot.preserved.data $(obj-y)
diff --git a/arch/s390/boot/alternative.c b/arch/s390/boot/alternative.c
new file mode 100644
index 000000000000..abc08d2c873d
--- /dev/null
+++ b/arch/s390/boot/alternative.c
@@ -0,0 +1,3 @@
+// SPDX-License-Identifier: GPL-2.0
+
+#include "../kernel/alternative.c"
diff --git a/arch/s390/boot/boot.h b/arch/s390/boot/boot.h
index 18027fdc92b0..83e2ce050b6c 100644
--- a/arch/s390/boot/boot.h
+++ b/arch/s390/boot/boot.h
@@ -30,6 +30,8 @@ struct vmlinux_info {
unsigned long init_mm_off;
unsigned long swapper_pg_dir_off;
unsigned long invalid_pg_dir_off;
+ unsigned long alt_instructions;
+ unsigned long alt_instructions_end;
#ifdef CONFIG_KASAN
unsigned long kasan_early_shadow_page_off;
unsigned long kasan_early_shadow_pte_off;
@@ -89,8 +91,10 @@ extern char _end[], _decompressor_end[];
extern unsigned char _compressed_start[];
extern unsigned char _compressed_end[];
extern struct vmlinux_info _vmlinux_info;
+
#define vmlinux _vmlinux_info
+#define __lowcore_pa(x) ((unsigned long)(x) % sizeof(struct lowcore))
#define __abs_lowcore_pa(x) (((unsigned long)(x) - __abs_lowcore) % sizeof(struct lowcore))
#define __kernel_va(x) ((void *)((unsigned long)(x) - __kaslr_offset_phys + __kaslr_offset))
#define __kernel_pa(x) ((unsigned long)(x) - __kaslr_offset + __kaslr_offset_phys)
diff --git a/arch/s390/boot/install.sh b/arch/s390/boot/install.sh
index a13dd2f2aa1c..fa41486258ee 100755
--- a/arch/s390/boot/install.sh
+++ b/arch/s390/boot/install.sh
@@ -15,6 +15,8 @@
# $3 - kernel map file
# $4 - default install path (blank if root directory)
+set -e
+
echo "Warning: '${INSTALLKERNEL}' command not available - additional " \
"bootloader config required" >&2
if [ -f "$4/vmlinuz-$1" ]; then mv -- "$4/vmlinuz-$1" "$4/vmlinuz-$1.old"; fi
diff --git a/arch/s390/boot/ipl_parm.c b/arch/s390/boot/ipl_parm.c
index b24de9aabf7d..1773b72a6a7b 100644
--- a/arch/s390/boot/ipl_parm.c
+++ b/arch/s390/boot/ipl_parm.c
@@ -3,6 +3,7 @@
#include <linux/init.h>
#include <linux/ctype.h>
#include <linux/pgtable.h>
+#include <asm/abs_lowcore.h>
#include <asm/page-states.h>
#include <asm/ebcdic.h>
#include <asm/sclp.h>
@@ -51,11 +52,11 @@ static inline int __diag308(unsigned long subcode, void *addr)
: [r1] "+&d" (r1.pair),
[reg1] "=&d" (reg1),
[reg2] "=&a" (reg2),
- "+Q" (S390_lowcore.program_new_psw),
+ "+Q" (get_lowcore()->program_new_psw),
"=Q" (old)
: [subcode] "d" (subcode),
[psw_old] "a" (&old),
- [psw_pgm] "a" (&S390_lowcore.program_new_psw)
+ [psw_pgm] "a" (&get_lowcore()->program_new_psw)
: "cc", "memory");
return r1.odd;
}
@@ -310,5 +311,7 @@ void parse_boot_command_line(void)
prot_virt_host = 1;
}
#endif
+ if (!strcmp(param, "relocate_lowcore") && test_facility(193))
+ relocate_lowcore = 1;
}
}
diff --git a/arch/s390/boot/ipl_report.c b/arch/s390/boot/ipl_report.c
index 1803035e68d2..d00898852a88 100644
--- a/arch/s390/boot/ipl_report.c
+++ b/arch/s390/boot/ipl_report.c
@@ -106,7 +106,7 @@ int read_ipl_report(void)
* the IPL parameter list, then align the address to a double
* word boundary.
*/
- tmp = (unsigned long) S390_lowcore.ipl_parmblock_ptr;
+ tmp = (unsigned long)get_lowcore()->ipl_parmblock_ptr;
pl_hdr = (struct ipl_pl_hdr *) tmp;
tmp = (tmp + pl_hdr->len + 7) & -8UL;
rl_hdr = (struct ipl_rl_hdr *) tmp;
diff --git a/arch/s390/boot/kmsan.c b/arch/s390/boot/kmsan.c
new file mode 100644
index 000000000000..e7b3ac48143e
--- /dev/null
+++ b/arch/s390/boot/kmsan.c
@@ -0,0 +1,6 @@
+// SPDX-License-Identifier: GPL-2.0
+#include <linux/kmsan-checks.h>
+
+void kmsan_unpoison_memory(const void *address, size_t size)
+{
+}
diff --git a/arch/s390/boot/pgm_check_info.c b/arch/s390/boot/pgm_check_info.c
index ea96275b0380..5352b3d356da 100644
--- a/arch/s390/boot/pgm_check_info.c
+++ b/arch/s390/boot/pgm_check_info.c
@@ -145,22 +145,22 @@ void print_stacktrace(unsigned long sp)
void print_pgm_check_info(void)
{
- unsigned long *gpregs = (unsigned long *)S390_lowcore.gpregs_save_area;
- struct psw_bits *psw = &psw_bits(S390_lowcore.psw_save_area);
+ unsigned long *gpregs = (unsigned long *)get_lowcore()->gpregs_save_area;
+ struct psw_bits *psw = &psw_bits(get_lowcore()->psw_save_area);
decompressor_printk("Linux version %s\n", kernel_version);
if (!is_prot_virt_guest() && early_command_line[0])
decompressor_printk("Kernel command line: %s\n", early_command_line);
decompressor_printk("Kernel fault: interruption code %04x ilc:%x\n",
- S390_lowcore.pgm_code, S390_lowcore.pgm_ilc >> 1);
+ get_lowcore()->pgm_code, get_lowcore()->pgm_ilc >> 1);
if (kaslr_enabled()) {
decompressor_printk("Kernel random base: %lx\n", __kaslr_offset);
decompressor_printk("Kernel random base phys: %lx\n", __kaslr_offset_phys);
}
decompressor_printk("PSW : %016lx %016lx (%pS)\n",
- S390_lowcore.psw_save_area.mask,
- S390_lowcore.psw_save_area.addr,
- (void *)S390_lowcore.psw_save_area.addr);
+ get_lowcore()->psw_save_area.mask,
+ get_lowcore()->psw_save_area.addr,
+ (void *)get_lowcore()->psw_save_area.addr);
decompressor_printk(
" R:%x T:%x IO:%x EX:%x Key:%x M:%x W:%x P:%x AS:%x CC:%x PM:%x RI:%x EA:%x\n",
psw->per, psw->dat, psw->io, psw->ext, psw->key, psw->mcheck,
@@ -174,8 +174,8 @@ void print_pgm_check_info(void)
gpregs[8], gpregs[9], gpregs[10], gpregs[11]);
decompressor_printk(" %016lx %016lx %016lx %016lx\n",
gpregs[12], gpregs[13], gpregs[14], gpregs[15]);
- print_stacktrace(S390_lowcore.gpregs_save_area[15]);
+ print_stacktrace(get_lowcore()->gpregs_save_area[15]);
decompressor_printk("Last Breaking-Event-Address:\n");
- decompressor_printk(" [<%016lx>] %pS\n", (unsigned long)S390_lowcore.pgm_last_break,
- (void *)S390_lowcore.pgm_last_break);
+ decompressor_printk(" [<%016lx>] %pS\n", (unsigned long)get_lowcore()->pgm_last_break,
+ (void *)get_lowcore()->pgm_last_break);
}
diff --git a/arch/s390/boot/physmem_info.c b/arch/s390/boot/physmem_info.c
index 0cf79826eef9..4c9ad8258f7e 100644
--- a/arch/s390/boot/physmem_info.c
+++ b/arch/s390/boot/physmem_info.c
@@ -81,11 +81,11 @@ static int __diag260(unsigned long rx1, unsigned long rx2)
[reg2] "=&a" (reg2),
[rc] "+&d" (rc),
[ry] "+&d" (ry),
- "+Q" (S390_lowcore.program_new_psw),
+ "+Q" (get_lowcore()->program_new_psw),
"=Q" (old)
: [rx] "d" (rx.pair),
[psw_old] "a" (&old),
- [psw_pgm] "a" (&S390_lowcore.program_new_psw)
+ [psw_pgm] "a" (&get_lowcore()->program_new_psw)
: "cc", "memory");
return rc == 0 ? ry : -1;
}
@@ -129,10 +129,10 @@ static int tprot(unsigned long addr)
: [reg1] "=&d" (reg1),
[reg2] "=&a" (reg2),
[rc] "+&d" (rc),
- "=Q" (S390_lowcore.program_new_psw.addr),
+ "=Q" (get_lowcore()->program_new_psw.addr),
"=Q" (old)
: [psw_old] "a" (&old),
- [psw_pgm] "a" (&S390_lowcore.program_new_psw),
+ [psw_pgm] "a" (&get_lowcore()->program_new_psw),
[addr] "a" (addr)
: "cc", "memory");
return rc;
diff --git a/arch/s390/boot/startup.c b/arch/s390/boot/startup.c
index 182aac6a0f77..c73b5118ad42 100644
--- a/arch/s390/boot/startup.c
+++ b/arch/s390/boot/startup.c
@@ -30,6 +30,7 @@ unsigned long __bootdata_preserved(vmemmap_size);
unsigned long __bootdata_preserved(MODULES_VADDR);
unsigned long __bootdata_preserved(MODULES_END);
unsigned long __bootdata_preserved(max_mappable);
+int __bootdata_preserved(relocate_lowcore);
u64 __bootdata_preserved(stfle_fac_list[16]);
struct oldmem_data __bootdata_preserved(oldmem_data);
@@ -78,10 +79,10 @@ static int cmma_test_essa(void)
[reg2] "=&a" (reg2),
[rc] "+&d" (rc),
[tmp] "=&d" (tmp),
- "+Q" (S390_lowcore.program_new_psw),
+ "+Q" (get_lowcore()->program_new_psw),
"=Q" (old)
: [psw_old] "a" (&old),
- [psw_pgm] "a" (&S390_lowcore.program_new_psw),
+ [psw_pgm] "a" (&get_lowcore()->program_new_psw),
[cmd] "i" (ESSA_GET_STATE)
: "cc", "memory");
return rc;
@@ -101,10 +102,10 @@ static void cmma_init(void)
static void setup_lpp(void)
{
- S390_lowcore.current_pid = 0;
- S390_lowcore.lpp = LPP_MAGIC;
+ get_lowcore()->current_pid = 0;
+ get_lowcore()->lpp = LPP_MAGIC;
if (test_facility(40))
- lpp(&S390_lowcore.lpp);
+ lpp(&get_lowcore()->lpp);
}
#ifdef CONFIG_KERNEL_UNCOMPRESSED
@@ -161,7 +162,7 @@ static void kaslr_adjust_relocs(unsigned long min_addr, unsigned long max_addr,
loc = (long)*reloc + phys_offset;
if (loc < min_addr || loc > max_addr)
error("64-bit relocation outside of kernel!\n");
- *(u64 *)loc += offset - __START_KERNEL;
+ *(u64 *)loc += offset;
}
}
@@ -170,11 +171,14 @@ static void kaslr_adjust_got(unsigned long offset)
u64 *entry;
/*
- * Even without -fPIE, Clang still uses a global offset table for some
- * reason. Adjust the GOT entries.
+ * Adjust GOT entries, except for ones for undefined weak symbols
+ * that resolved to zero. This also skips the first three reserved
+ * entries on s390x that are zero.
*/
- for (entry = (u64 *)vmlinux.got_start; entry < (u64 *)vmlinux.got_end; entry++)
- *entry += offset - __START_KERNEL;
+ for (entry = (u64 *)vmlinux.got_start; entry < (u64 *)vmlinux.got_end; entry++) {
+ if (*entry)
+ *entry += offset;
+ }
}
/*
@@ -248,7 +252,7 @@ static unsigned long setup_kernel_memory_layout(unsigned long kernel_size)
vmemmap_size = SECTION_ALIGN_UP(pages) * sizeof(struct page);
/* choose kernel address space layout: 4 or 3 levels. */
- BUILD_BUG_ON(!IS_ALIGNED(__START_KERNEL, THREAD_SIZE));
+ BUILD_BUG_ON(!IS_ALIGNED(TEXT_OFFSET, THREAD_SIZE));
BUILD_BUG_ON(!IS_ALIGNED(__NO_KASLR_START_KERNEL, THREAD_SIZE));
BUILD_BUG_ON(__NO_KASLR_END_KERNEL > _REGION1_SIZE);
vsize = get_vmem_size(ident_map_size, vmemmap_size, vmalloc_size, _REGION3_SIZE);
@@ -301,11 +305,18 @@ static unsigned long setup_kernel_memory_layout(unsigned long kernel_size)
MODULES_END = round_down(kernel_start, _SEGMENT_SIZE);
MODULES_VADDR = MODULES_END - MODULES_LEN;
VMALLOC_END = MODULES_VADDR;
+ if (IS_ENABLED(CONFIG_KMSAN))
+ VMALLOC_END -= MODULES_LEN * 2;
/* allow vmalloc area to occupy up to about 1/2 of the rest virtual space left */
vsize = (VMALLOC_END - FIXMAP_SIZE) / 2;
vsize = round_down(vsize, _SEGMENT_SIZE);
vmalloc_size = min(vmalloc_size, vsize);
+ if (IS_ENABLED(CONFIG_KMSAN)) {
+ /* take 2/3 of vmalloc area for KMSAN shadow and origins */
+ vmalloc_size = round_down(vmalloc_size / 3, _SEGMENT_SIZE);
+ VMALLOC_END -= vmalloc_size * 2;
+ }
VMALLOC_START = VMALLOC_END - vmalloc_size;
__memcpy_real_area = round_down(VMALLOC_START - MEMCPY_REAL_SIZE, PAGE_SIZE);
@@ -330,7 +341,8 @@ static unsigned long setup_kernel_memory_layout(unsigned long kernel_size)
BUILD_BUG_ON(MAX_DCSS_ADDR > (1UL << MAX_PHYSMEM_BITS));
max_mappable = max(ident_map_size, MAX_DCSS_ADDR);
max_mappable = min(max_mappable, vmemmap_start);
- __identity_base = round_down(vmemmap_start - max_mappable, rte_size);
+ if (IS_ENABLED(CONFIG_RANDOMIZE_IDENTITY_BASE))
+ __identity_base = round_down(vmemmap_start - max_mappable, rte_size);
return asce_limit;
}
@@ -366,6 +378,8 @@ static void kaslr_adjust_vmlinux_info(long offset)
vmlinux.init_mm_off += offset;
vmlinux.swapper_pg_dir_off += offset;
vmlinux.invalid_pg_dir_off += offset;
+ vmlinux.alt_instructions += offset;
+ vmlinux.alt_instructions_end += offset;
#ifdef CONFIG_KASAN
vmlinux.kasan_early_shadow_page_off += offset;
vmlinux.kasan_early_shadow_pte_off += offset;
@@ -375,25 +389,25 @@ static void kaslr_adjust_vmlinux_info(long offset)
#endif
}
-static void fixup_vmlinux_info(void)
-{
- vmlinux.entry -= __START_KERNEL;
- kaslr_adjust_vmlinux_info(-__START_KERNEL);
-}
-
void startup_kernel(void)
{
- unsigned long kernel_size = vmlinux.image_size + vmlinux.bss_size;
- unsigned long nokaslr_offset_phys = mem_safe_offset();
- unsigned long amode31_lma = 0;
+ unsigned long vmlinux_size = vmlinux.image_size + vmlinux.bss_size;
+ unsigned long nokaslr_text_lma, text_lma = 0, amode31_lma = 0;
+ unsigned long kernel_size = TEXT_OFFSET + vmlinux_size;
+ unsigned long kaslr_large_page_offset;
unsigned long max_physmem_end;
unsigned long asce_limit;
unsigned long safe_addr;
psw_t psw;
- fixup_vmlinux_info();
setup_lpp();
- safe_addr = PAGE_ALIGN(nokaslr_offset_phys + kernel_size);
+
+ /*
+ * Non-randomized kernel physical start address must be _SEGMENT_SIZE
+ * aligned (see blow).
+ */
+ nokaslr_text_lma = ALIGN(mem_safe_offset(), _SEGMENT_SIZE);
+ safe_addr = PAGE_ALIGN(nokaslr_text_lma + vmlinux_size);
/*
* Reserve decompressor memory together with decompression heap,
@@ -425,13 +439,39 @@ void startup_kernel(void)
save_ipl_cert_comp_list();
rescue_initrd(safe_addr, ident_map_size);
- if (kaslr_enabled())
- __kaslr_offset_phys = randomize_within_range(kernel_size, THREAD_SIZE, 0, ident_map_size);
- if (!__kaslr_offset_phys)
- __kaslr_offset_phys = nokaslr_offset_phys;
+ /*
+ * __kaslr_offset_phys must be _SEGMENT_SIZE aligned, so the lower
+ * 20 bits (the offset within a large page) are zero. Copy the last
+ * 20 bits of __kaslr_offset, which is THREAD_SIZE aligned, to
+ * __kaslr_offset_phys.
+ *
+ * With this the last 20 bits of __kaslr_offset_phys and __kaslr_offset
+ * are identical, which is required to allow for large mappings of the
+ * kernel image.
+ */
+ kaslr_large_page_offset = __kaslr_offset & ~_SEGMENT_MASK;
+ if (kaslr_enabled()) {
+ unsigned long size = vmlinux_size + kaslr_large_page_offset;
+
+ text_lma = randomize_within_range(size, _SEGMENT_SIZE, TEXT_OFFSET, ident_map_size);
+ }
+ if (!text_lma)
+ text_lma = nokaslr_text_lma;
+ text_lma |= kaslr_large_page_offset;
+
+ /*
+ * [__kaslr_offset_phys..__kaslr_offset_phys + TEXT_OFFSET] region is
+ * never accessed via the kernel image mapping as per the linker script:
+ *
+ * . = TEXT_OFFSET;
+ *
+ * Therefore, this region could be used for something else and does
+ * not need to be reserved. See how it is skipped in setup_vmem().
+ */
+ __kaslr_offset_phys = text_lma - TEXT_OFFSET;
kaslr_adjust_vmlinux_info(__kaslr_offset_phys);
- physmem_reserve(RR_VMLINUX, __kaslr_offset_phys, kernel_size);
- deploy_kernel((void *)__kaslr_offset_phys);
+ physmem_reserve(RR_VMLINUX, text_lma, vmlinux_size);
+ deploy_kernel((void *)text_lma);
/* vmlinux decompression is done, shrink reserved low memory */
physmem_reserve(RR_DECOMPRESSOR, 0, (unsigned long)_decompressor_end);
@@ -447,10 +487,14 @@ void startup_kernel(void)
* before the kernel started. Therefore, in case the two sections
* overlap there is no risk of corrupting any data.
*/
- if (kaslr_enabled())
- amode31_lma = randomize_within_range(vmlinux.amode31_size, PAGE_SIZE, 0, SZ_2G);
+ if (kaslr_enabled()) {
+ unsigned long amode31_min;
+
+ amode31_min = (unsigned long)_decompressor_end;
+ amode31_lma = randomize_within_range(vmlinux.amode31_size, PAGE_SIZE, amode31_min, SZ_2G);
+ }
if (!amode31_lma)
- amode31_lma = __kaslr_offset_phys - vmlinux.amode31_size;
+ amode31_lma = text_lma - vmlinux.amode31_size;
physmem_reserve(RR_AMODE31, amode31_lma, vmlinux.amode31_size);
/*
@@ -466,18 +510,21 @@ void startup_kernel(void)
* - copy_bootdata() must follow setup_vmem() to propagate changes
* to bootdata made by setup_vmem()
*/
- clear_bss_section(__kaslr_offset_phys);
- kaslr_adjust_relocs(__kaslr_offset_phys, __kaslr_offset_phys + vmlinux.image_size,
+ clear_bss_section(text_lma);
+ kaslr_adjust_relocs(text_lma, text_lma + vmlinux.image_size,
__kaslr_offset, __kaslr_offset_phys);
kaslr_adjust_got(__kaslr_offset);
setup_vmem(__kaslr_offset, __kaslr_offset + kernel_size, asce_limit);
copy_bootdata();
+ __apply_alternatives((struct alt_instr *)_vmlinux_info.alt_instructions,
+ (struct alt_instr *)_vmlinux_info.alt_instructions_end,
+ ALT_CTX_EARLY);
/*
* Save KASLR offset for early dumps, before vmcore_info is set.
* Mark as uneven to distinguish from real vmcore_info pointer.
*/
- S390_lowcore.vmcore_info = __kaslr_offset_phys ? __kaslr_offset_phys | 0x1UL : 0;
+ get_lowcore()->vmcore_info = __kaslr_offset_phys ? __kaslr_offset_phys | 0x1UL : 0;
/*
* Jump to the decompressed kernel entry point and switch DAT mode on.
diff --git a/arch/s390/boot/string.c b/arch/s390/boot/string.c
index faccb33b462c..f6b9b1df48a8 100644
--- a/arch/s390/boot/string.c
+++ b/arch/s390/boot/string.c
@@ -1,11 +1,18 @@
// SPDX-License-Identifier: GPL-2.0
+#define IN_BOOT_STRING_C 1
#include <linux/ctype.h>
#include <linux/kernel.h>
#include <linux/errno.h>
#undef CONFIG_KASAN
#undef CONFIG_KASAN_GENERIC
+#undef CONFIG_KMSAN
#include "../lib/string.c"
+/*
+ * Duplicate some functions from the common lib/string.c
+ * instead of fully including it.
+ */
+
int strncmp(const char *cs, const char *ct, size_t count)
{
unsigned char c1, c2;
@@ -22,6 +29,15 @@ int strncmp(const char *cs, const char *ct, size_t count)
return 0;
}
+void *memset64(uint64_t *s, uint64_t v, size_t count)
+{
+ uint64_t *xs = s;
+
+ while (count--)
+ *xs++ = v;
+ return s;
+}
+
char *skip_spaces(const char *str)
{
while (isspace(*str))
diff --git a/arch/s390/boot/uv.c b/arch/s390/boot/uv.c
index 1e66d2cbb096..318e6ba95bfd 100644
--- a/arch/s390/boot/uv.c
+++ b/arch/s390/boot/uv.c
@@ -8,12 +8,8 @@
#include "uv.h"
/* will be used in arch/s390/kernel/uv.c */
-#ifdef CONFIG_PROTECTED_VIRTUALIZATION_GUEST
int __bootdata_preserved(prot_virt_guest);
-#endif
-#if IS_ENABLED(CONFIG_KVM)
int __bootdata_preserved(prot_virt_host);
-#endif
struct uv_info __bootdata_preserved(uv_info);
void uv_query_info(void)
@@ -53,14 +49,11 @@ void uv_query_info(void)
uv_info.max_secrets = uvcb.max_secrets;
}
-#ifdef CONFIG_PROTECTED_VIRTUALIZATION_GUEST
if (test_bit_inv(BIT_UVC_CMD_SET_SHARED_ACCESS, (unsigned long *)uvcb.inst_calls_list) &&
test_bit_inv(BIT_UVC_CMD_REMOVE_SHARED_ACCESS, (unsigned long *)uvcb.inst_calls_list))
prot_virt_guest = 1;
-#endif
}
-#if IS_ENABLED(CONFIG_KVM)
unsigned long adjust_to_uv_max(unsigned long limit)
{
if (is_prot_virt_host() && uv_info.max_sec_stor_addr)
@@ -92,4 +85,3 @@ void sanitize_prot_virt_host(void)
{
prot_virt_host = is_prot_virt_host_capable();
}
-#endif
diff --git a/arch/s390/boot/uv.h b/arch/s390/boot/uv.h
index 0f3070856f8d..da4a4a8d48e0 100644
--- a/arch/s390/boot/uv.h
+++ b/arch/s390/boot/uv.h
@@ -2,21 +2,8 @@
#ifndef BOOT_UV_H
#define BOOT_UV_H
-#if IS_ENABLED(CONFIG_KVM)
unsigned long adjust_to_uv_max(unsigned long limit);
void sanitize_prot_virt_host(void);
-#else
-static inline unsigned long adjust_to_uv_max(unsigned long limit)
-{
- return limit;
-}
-static inline void sanitize_prot_virt_host(void) {}
-#endif
-
-#if defined(CONFIG_PROTECTED_VIRTUALIZATION_GUEST) || IS_ENABLED(CONFIG_KVM)
void uv_query_info(void);
-#else
-static inline void uv_query_info(void) {}
-#endif
#endif /* BOOT_UV_H */
diff --git a/arch/s390/boot/vmem.c b/arch/s390/boot/vmem.c
index 96d48b7112d4..145035f84a0e 100644
--- a/arch/s390/boot/vmem.c
+++ b/arch/s390/boot/vmem.c
@@ -26,6 +26,7 @@ atomic_long_t __bootdata_preserved(direct_pages_count[PG_DIRECT_MAP_MAX]);
enum populate_mode {
POPULATE_NONE,
POPULATE_DIRECT,
+ POPULATE_LOWCORE,
POPULATE_ABS_LOWCORE,
POPULATE_IDENTITY,
POPULATE_KERNEL,
@@ -89,7 +90,7 @@ static void kasan_populate_shadow(unsigned long kernel_start, unsigned long kern
}
memgap_start = end;
}
- kasan_populate(kernel_start, kernel_end, POPULATE_KASAN_MAP_SHADOW);
+ kasan_populate(kernel_start + TEXT_OFFSET, kernel_end, POPULATE_KASAN_MAP_SHADOW);
kasan_populate(0, (unsigned long)__identity_va(0), POPULATE_KASAN_ZERO_SHADOW);
kasan_populate(AMODE31_START, AMODE31_END, POPULATE_KASAN_ZERO_SHADOW);
if (IS_ENABLED(CONFIG_KASAN_VMALLOC)) {
@@ -242,6 +243,8 @@ static unsigned long _pa(unsigned long addr, unsigned long size, enum populate_m
return -1;
case POPULATE_DIRECT:
return addr;
+ case POPULATE_LOWCORE:
+ return __lowcore_pa(addr);
case POPULATE_ABS_LOWCORE:
return __abs_lowcore_pa(addr);
case POPULATE_KERNEL:
@@ -261,21 +264,27 @@ static unsigned long _pa(unsigned long addr, unsigned long size, enum populate_m
static bool large_allowed(enum populate_mode mode)
{
- return (mode == POPULATE_DIRECT) || (mode == POPULATE_IDENTITY);
+ return (mode == POPULATE_DIRECT) || (mode == POPULATE_IDENTITY) || (mode == POPULATE_KERNEL);
}
static bool can_large_pud(pud_t *pu_dir, unsigned long addr, unsigned long end,
enum populate_mode mode)
{
+ unsigned long size = end - addr;
+
return machine.has_edat2 && large_allowed(mode) &&
- IS_ALIGNED(addr, PUD_SIZE) && (end - addr) >= PUD_SIZE;
+ IS_ALIGNED(addr, PUD_SIZE) && (size >= PUD_SIZE) &&
+ IS_ALIGNED(_pa(addr, size, mode), PUD_SIZE);
}
static bool can_large_pmd(pmd_t *pm_dir, unsigned long addr, unsigned long end,
enum populate_mode mode)
{
+ unsigned long size = end - addr;
+
return machine.has_edat1 && large_allowed(mode) &&
- IS_ALIGNED(addr, PMD_SIZE) && (end - addr) >= PMD_SIZE;
+ IS_ALIGNED(addr, PMD_SIZE) && (size >= PMD_SIZE) &&
+ IS_ALIGNED(_pa(addr, size, mode), PMD_SIZE);
}
static void pgtable_pte_populate(pmd_t *pmd, unsigned long addr, unsigned long end,
@@ -412,6 +421,7 @@ static void pgtable_populate(unsigned long addr, unsigned long end, enum populat
void setup_vmem(unsigned long kernel_start, unsigned long kernel_end, unsigned long asce_limit)
{
+ unsigned long lowcore_address = 0;
unsigned long start, end;
unsigned long asce_type;
unsigned long asce_bits;
@@ -449,18 +459,33 @@ void setup_vmem(unsigned long kernel_start, unsigned long kernel_end, unsigned l
__arch_set_page_dat((void *)swapper_pg_dir, 1UL << CRST_ALLOC_ORDER);
__arch_set_page_dat((void *)invalid_pg_dir, 1UL << CRST_ALLOC_ORDER);
+ if (relocate_lowcore)
+ lowcore_address = LOWCORE_ALT_ADDRESS;
+
/*
* To allow prefixing the lowcore must be mapped with 4KB pages.
* To prevent creation of a large page at address 0 first map
* the lowcore and create the identity mapping only afterwards.
*/
- pgtable_populate(0, sizeof(struct lowcore), POPULATE_DIRECT);
+ pgtable_populate(lowcore_address,
+ lowcore_address + sizeof(struct lowcore),
+ POPULATE_LOWCORE);
for_each_physmem_usable_range(i, &start, &end) {
pgtable_populate((unsigned long)__identity_va(start),
(unsigned long)__identity_va(end),
POPULATE_IDENTITY);
}
- pgtable_populate(kernel_start, kernel_end, POPULATE_KERNEL);
+
+ /*
+ * [kernel_start..kernel_start + TEXT_OFFSET] region is never
+ * accessed as per the linker script:
+ *
+ * . = TEXT_OFFSET;
+ *
+ * Therefore, skip mapping TEXT_OFFSET bytes to prevent access to
+ * [__kaslr_offset_phys..__kaslr_offset_phys + TEXT_OFFSET] region.
+ */
+ pgtable_populate(kernel_start + TEXT_OFFSET, kernel_end, POPULATE_KERNEL);
pgtable_populate(AMODE31_START, AMODE31_END, POPULATE_DIRECT);
pgtable_populate(__abs_lowcore, __abs_lowcore + sizeof(struct lowcore),
POPULATE_ABS_LOWCORE);
@@ -470,13 +495,13 @@ void setup_vmem(unsigned long kernel_start, unsigned long kernel_end, unsigned l
kasan_populate_shadow(kernel_start, kernel_end);
- S390_lowcore.kernel_asce.val = swapper_pg_dir | asce_bits;
- S390_lowcore.user_asce = s390_invalid_asce;
+ get_lowcore()->kernel_asce.val = swapper_pg_dir | asce_bits;
+ get_lowcore()->user_asce = s390_invalid_asce;
- local_ctl_load(1, &S390_lowcore.kernel_asce);
- local_ctl_load(7, &S390_lowcore.user_asce);
- local_ctl_load(13, &S390_lowcore.kernel_asce);
+ local_ctl_load(1, &get_lowcore()->kernel_asce);
+ local_ctl_load(7, &get_lowcore()->user_asce);
+ local_ctl_load(13, &get_lowcore()->kernel_asce);
- init_mm.context.asce = S390_lowcore.kernel_asce.val;
+ init_mm.context.asce = get_lowcore()->kernel_asce.val;
init_mm.pgd = init_mm_pgd;
}
diff --git a/arch/s390/boot/vmlinux.lds.S b/arch/s390/boot/vmlinux.lds.S
index 1fe5a1d3ff60..66670212a361 100644
--- a/arch/s390/boot/vmlinux.lds.S
+++ b/arch/s390/boot/vmlinux.lds.S
@@ -109,6 +109,12 @@ SECTIONS
#ifdef CONFIG_KERNEL_UNCOMPRESSED
. = ALIGN(PAGE_SIZE);
. += AMODE31_SIZE; /* .amode31 section */
+
+ /*
+ * Make sure the location counter is not less than TEXT_OFFSET.
+ * _SEGMENT_SIZE is not available, use ALIGN(1 << 20) instead.
+ */
+ . = MAX(TEXT_OFFSET, ALIGN(1 << 20));
#else
. = ALIGN(8);
#endif
diff --git a/arch/s390/configs/debug_defconfig b/arch/s390/configs/debug_defconfig
index 145342e46ea8..ea63a7342f5f 100644
--- a/arch/s390/configs/debug_defconfig
+++ b/arch/s390/configs/debug_defconfig
@@ -43,7 +43,6 @@ CONFIG_PROFILING=y
CONFIG_KEXEC=y
CONFIG_KEXEC_FILE=y
CONFIG_KEXEC_SIG=y
-CONFIG_CRASH_DUMP=y
CONFIG_LIVEPATCH=y
CONFIG_MARCH_Z13=y
CONFIG_NR_CPUS=512
@@ -51,11 +50,11 @@ CONFIG_NUMA=y
CONFIG_HZ_100=y
CONFIG_CERT_STORE=y
CONFIG_EXPOLINE=y
+# CONFIG_EXPOLINE_EXTERN is not set
CONFIG_EXPOLINE_AUTO=y
CONFIG_CHSC_SCH=y
CONFIG_VFIO_CCW=m
CONFIG_VFIO_AP=m
-CONFIG_PROTECTED_VIRTUALIZATION_GUEST=y
CONFIG_CMM=m
CONFIG_APPLDATA_BASE=y
CONFIG_S390_HYPFS_FS=y
@@ -76,6 +75,7 @@ CONFIG_MODULE_FORCE_UNLOAD=y
CONFIG_MODULE_UNLOAD_TAINT_TRACKING=y
CONFIG_MODVERSIONS=y
CONFIG_MODULE_SRCVERSION_ALL=y
+CONFIG_MODULE_SIG_SHA256=y
CONFIG_BLK_DEV_THROTTLING=y
CONFIG_BLK_WBT=y
CONFIG_BLK_CGROUP_IOLATENCY=y
@@ -100,7 +100,6 @@ CONFIG_MEMORY_HOTPLUG=y
CONFIG_MEMORY_HOTREMOVE=y
CONFIG_KSM=y
CONFIG_TRANSPARENT_HUGEPAGE=y
-CONFIG_CMA_DEBUG=y
CONFIG_CMA_DEBUGFS=y
CONFIG_CMA_SYSFS=y
CONFIG_CMA_AREAS=7
@@ -119,6 +118,7 @@ CONFIG_UNIX_DIAG=m
CONFIG_XFRM_USER=m
CONFIG_NET_KEY=m
CONFIG_SMC_DIAG=m
+CONFIG_SMC_LO=y
CONFIG_INET=y
CONFIG_IP_MULTICAST=y
CONFIG_IP_ADVANCED_ROUTER=y
@@ -133,7 +133,6 @@ CONFIG_IP_MROUTE=y
CONFIG_IP_MROUTE_MULTIPLE_TABLES=y
CONFIG_IP_PIMSM_V1=y
CONFIG_IP_PIMSM_V2=y
-CONFIG_SYN_COOKIES=y
CONFIG_NET_IPVTI=m
CONFIG_INET_AH=m
CONFIG_INET_ESP=m
@@ -167,6 +166,7 @@ CONFIG_BRIDGE_NETFILTER=m
CONFIG_NETFILTER_NETLINK_HOOK=m
CONFIG_NF_CONNTRACK=m
CONFIG_NF_CONNTRACK_SECMARK=y
+CONFIG_NF_CONNTRACK_ZONES=y
CONFIG_NF_CONNTRACK_PROCFS=y
CONFIG_NF_CONNTRACK_EVENTS=y
CONFIG_NF_CONNTRACK_TIMEOUT=y
@@ -183,17 +183,39 @@ CONFIG_NF_CONNTRACK_SIP=m
CONFIG_NF_CONNTRACK_TFTP=m
CONFIG_NF_CT_NETLINK=m
CONFIG_NF_CT_NETLINK_TIMEOUT=m
+CONFIG_NF_CT_NETLINK_HELPER=m
+CONFIG_NETFILTER_NETLINK_GLUE_CT=y
CONFIG_NF_TABLES=m
CONFIG_NF_TABLES_INET=y
+CONFIG_NF_TABLES_NETDEV=y
+CONFIG_NFT_NUMGEN=m
CONFIG_NFT_CT=m
+CONFIG_NFT_FLOW_OFFLOAD=m
+CONFIG_NFT_CONNLIMIT=m
CONFIG_NFT_LOG=m
CONFIG_NFT_LIMIT=m
+CONFIG_NFT_MASQ=m
+CONFIG_NFT_REDIR=m
CONFIG_NFT_NAT=m
+CONFIG_NFT_TUNNEL=m
+CONFIG_NFT_QUEUE=m
+CONFIG_NFT_QUOTA=m
CONFIG_NFT_REJECT=m
CONFIG_NFT_COMPAT=m
CONFIG_NFT_HASH=m
CONFIG_NFT_FIB_INET=m
-CONFIG_NETFILTER_XTABLES_COMPAT=y
+CONFIG_NFT_XFRM=m
+CONFIG_NFT_SOCKET=m
+CONFIG_NFT_OSF=m
+CONFIG_NFT_TPROXY=m
+CONFIG_NFT_SYNPROXY=m
+CONFIG_NFT_DUP_NETDEV=m
+CONFIG_NFT_FWD_NETDEV=m
+CONFIG_NFT_FIB_NETDEV=m
+CONFIG_NFT_REJECT_NETDEV=m
+CONFIG_NF_FLOW_TABLE_INET=m
+CONFIG_NF_FLOW_TABLE=m
+CONFIG_NF_FLOW_TABLE_PROCFS=y
CONFIG_NETFILTER_XT_SET=m
CONFIG_NETFILTER_XT_TARGET_AUDIT=m
CONFIG_NETFILTER_XT_TARGET_CHECKSUM=m
@@ -206,8 +228,10 @@ CONFIG_NETFILTER_XT_TARGET_HMARK=m
CONFIG_NETFILTER_XT_TARGET_IDLETIMER=m
CONFIG_NETFILTER_XT_TARGET_LOG=m
CONFIG_NETFILTER_XT_TARGET_MARK=m
+CONFIG_NETFILTER_XT_TARGET_NETMAP=m
CONFIG_NETFILTER_XT_TARGET_NFLOG=m
CONFIG_NETFILTER_XT_TARGET_NFQUEUE=m
+CONFIG_NETFILTER_XT_TARGET_REDIRECT=m
CONFIG_NETFILTER_XT_TARGET_TEE=m
CONFIG_NETFILTER_XT_TARGET_TPROXY=m
CONFIG_NETFILTER_XT_TARGET_TRACE=m
@@ -216,6 +240,7 @@ CONFIG_NETFILTER_XT_TARGET_TCPMSS=m
CONFIG_NETFILTER_XT_TARGET_TCPOPTSTRIP=m
CONFIG_NETFILTER_XT_MATCH_ADDRTYPE=m
CONFIG_NETFILTER_XT_MATCH_BPF=m
+CONFIG_NETFILTER_XT_MATCH_CGROUP=m
CONFIG_NETFILTER_XT_MATCH_CLUSTER=m
CONFIG_NETFILTER_XT_MATCH_COMMENT=m
CONFIG_NETFILTER_XT_MATCH_CONNBYTES=m
@@ -230,6 +255,7 @@ CONFIG_NETFILTER_XT_MATCH_DSCP=m
CONFIG_NETFILTER_XT_MATCH_ESP=m
CONFIG_NETFILTER_XT_MATCH_HASHLIMIT=m
CONFIG_NETFILTER_XT_MATCH_HELPER=m
+CONFIG_NETFILTER_XT_MATCH_IPCOMP=m
CONFIG_NETFILTER_XT_MATCH_IPRANGE=m
CONFIG_NETFILTER_XT_MATCH_IPVS=m
CONFIG_NETFILTER_XT_MATCH_LENGTH=m
@@ -247,6 +273,7 @@ CONFIG_NETFILTER_XT_MATCH_QUOTA=m
CONFIG_NETFILTER_XT_MATCH_RATEEST=m
CONFIG_NETFILTER_XT_MATCH_REALM=m
CONFIG_NETFILTER_XT_MATCH_RECENT=m
+CONFIG_NETFILTER_XT_MATCH_SOCKET=m
CONFIG_NETFILTER_XT_MATCH_STATE=m
CONFIG_NETFILTER_XT_MATCH_STATISTIC=m
CONFIG_NETFILTER_XT_MATCH_STRING=m
@@ -302,7 +329,6 @@ CONFIG_IP_NF_TARGET_ECN=m
CONFIG_IP_NF_TARGET_TTL=m
CONFIG_IP_NF_RAW=m
CONFIG_IP_NF_SECURITY=m
-CONFIG_IP_NF_ARPTABLES=m
CONFIG_IP_NF_ARPFILTER=m
CONFIG_IP_NF_ARP_MANGLE=m
CONFIG_NFT_FIB_IPV6=m
@@ -373,7 +399,6 @@ CONFIG_NET_ACT_POLICE=m
CONFIG_NET_ACT_GACT=m
CONFIG_GACT_PROB=y
CONFIG_NET_ACT_MIRRED=m
-CONFIG_NET_ACT_IPT=m
CONFIG_NET_ACT_NAT=m
CONFIG_NET_ACT_PEDIT=m
CONFIG_NET_ACT_SIMP=m
@@ -462,6 +487,7 @@ CONFIG_DM_VERITY=m
CONFIG_DM_VERITY_VERIFY_ROOTHASH_SIG=y
CONFIG_DM_SWITCH=m
CONFIG_DM_INTEGRITY=m
+CONFIG_DM_VDO=m
CONFIG_NETDEVICES=y
CONFIG_BONDING=m
CONFIG_DUMMY=m
@@ -574,18 +600,16 @@ CONFIG_WATCHDOG=y
CONFIG_WATCHDOG_NOWAYOUT=y
CONFIG_SOFT_WATCHDOG=m
CONFIG_DIAG288_WATCHDOG=m
-# CONFIG_DRM_DEBUG_MODESET_LOCK is not set
+CONFIG_DRM=m
+CONFIG_DRM_VIRTIO_GPU=m
CONFIG_FB=y
# CONFIG_FB_DEVICE is not set
-CONFIG_FRAMEBUFFER_CONSOLE=y
-CONFIG_FRAMEBUFFER_CONSOLE_DETECT_PRIMARY=y
# CONFIG_HID_SUPPORT is not set
# CONFIG_USB_SUPPORT is not set
CONFIG_INFINIBAND=m
CONFIG_INFINIBAND_USER_ACCESS=m
CONFIG_MLX4_INFINIBAND=m
CONFIG_MLX5_INFINIBAND=m
-CONFIG_SYNC_FILE=y
CONFIG_VFIO=m
CONFIG_VFIO_PCI=m
CONFIG_MLX5_VFIO_PCI=m
@@ -645,7 +669,6 @@ CONFIG_MSDOS_FS=m
CONFIG_VFAT_FS=m
CONFIG_EXFAT_FS=m
CONFIG_NTFS_FS=m
-CONFIG_NTFS_RW=y
CONFIG_PROC_KCORE=y
CONFIG_TMPFS=y
CONFIG_TMPFS_POSIX_ACL=y
@@ -663,6 +686,7 @@ CONFIG_SQUASHFS_XZ=y
CONFIG_SQUASHFS_ZSTD=y
CONFIG_ROMFS_FS=m
CONFIG_NFS_FS=m
+CONFIG_NFS_V2=m
CONFIG_NFS_V3_ACL=y
CONFIG_NFS_V4=m
CONFIG_NFS_SWAP=y
@@ -879,6 +903,5 @@ CONFIG_RBTREE_TEST=y
CONFIG_INTERVAL_TREE_TEST=m
CONFIG_PERCPU_TEST=m
CONFIG_ATOMIC64_SELFTEST=y
-CONFIG_STRING_SELFTEST=y
CONFIG_TEST_BITOPS=m
CONFIG_TEST_BPF=m
diff --git a/arch/s390/configs/defconfig b/arch/s390/configs/defconfig
index dc237896f99d..d8b28ff8ff45 100644
--- a/arch/s390/configs/defconfig
+++ b/arch/s390/configs/defconfig
@@ -41,7 +41,6 @@ CONFIG_PROFILING=y
CONFIG_KEXEC=y
CONFIG_KEXEC_FILE=y
CONFIG_KEXEC_SIG=y
-CONFIG_CRASH_DUMP=y
CONFIG_LIVEPATCH=y
CONFIG_MARCH_Z13=y
CONFIG_NR_CPUS=512
@@ -49,11 +48,11 @@ CONFIG_NUMA=y
CONFIG_HZ_100=y
CONFIG_CERT_STORE=y
CONFIG_EXPOLINE=y
+# CONFIG_EXPOLINE_EXTERN is not set
CONFIG_EXPOLINE_AUTO=y
CONFIG_CHSC_SCH=y
CONFIG_VFIO_CCW=m
CONFIG_VFIO_AP=m
-CONFIG_PROTECTED_VIRTUALIZATION_GUEST=y
CONFIG_CMM=m
CONFIG_APPLDATA_BASE=y
CONFIG_S390_HYPFS_FS=y
@@ -71,6 +70,7 @@ CONFIG_MODULE_FORCE_UNLOAD=y
CONFIG_MODULE_UNLOAD_TAINT_TRACKING=y
CONFIG_MODVERSIONS=y
CONFIG_MODULE_SRCVERSION_ALL=y
+CONFIG_MODULE_SIG_SHA256=y
CONFIG_BLK_DEV_THROTTLING=y
CONFIG_BLK_WBT=y
CONFIG_BLK_CGROUP_IOLATENCY=y
@@ -110,6 +110,7 @@ CONFIG_UNIX_DIAG=m
CONFIG_XFRM_USER=m
CONFIG_NET_KEY=m
CONFIG_SMC_DIAG=m
+CONFIG_SMC_LO=y
CONFIG_INET=y
CONFIG_IP_MULTICAST=y
CONFIG_IP_ADVANCED_ROUTER=y
@@ -124,7 +125,6 @@ CONFIG_IP_MROUTE=y
CONFIG_IP_MROUTE_MULTIPLE_TABLES=y
CONFIG_IP_PIMSM_V1=y
CONFIG_IP_PIMSM_V2=y
-CONFIG_SYN_COOKIES=y
CONFIG_NET_IPVTI=m
CONFIG_INET_AH=m
CONFIG_INET_ESP=m
@@ -158,6 +158,7 @@ CONFIG_BRIDGE_NETFILTER=m
CONFIG_NETFILTER_NETLINK_HOOK=m
CONFIG_NF_CONNTRACK=m
CONFIG_NF_CONNTRACK_SECMARK=y
+CONFIG_NF_CONNTRACK_ZONES=y
CONFIG_NF_CONNTRACK_PROCFS=y
CONFIG_NF_CONNTRACK_EVENTS=y
CONFIG_NF_CONNTRACK_TIMEOUT=y
@@ -174,17 +175,39 @@ CONFIG_NF_CONNTRACK_SIP=m
CONFIG_NF_CONNTRACK_TFTP=m
CONFIG_NF_CT_NETLINK=m
CONFIG_NF_CT_NETLINK_TIMEOUT=m
+CONFIG_NF_CT_NETLINK_HELPER=m
+CONFIG_NETFILTER_NETLINK_GLUE_CT=y
CONFIG_NF_TABLES=m
CONFIG_NF_TABLES_INET=y
+CONFIG_NF_TABLES_NETDEV=y
+CONFIG_NFT_NUMGEN=m
CONFIG_NFT_CT=m
+CONFIG_NFT_FLOW_OFFLOAD=m
+CONFIG_NFT_CONNLIMIT=m
CONFIG_NFT_LOG=m
CONFIG_NFT_LIMIT=m
+CONFIG_NFT_MASQ=m
+CONFIG_NFT_REDIR=m
CONFIG_NFT_NAT=m
+CONFIG_NFT_TUNNEL=m
+CONFIG_NFT_QUEUE=m
+CONFIG_NFT_QUOTA=m
CONFIG_NFT_REJECT=m
CONFIG_NFT_COMPAT=m
CONFIG_NFT_HASH=m
CONFIG_NFT_FIB_INET=m
-CONFIG_NETFILTER_XTABLES_COMPAT=y
+CONFIG_NFT_XFRM=m
+CONFIG_NFT_SOCKET=m
+CONFIG_NFT_OSF=m
+CONFIG_NFT_TPROXY=m
+CONFIG_NFT_SYNPROXY=m
+CONFIG_NFT_DUP_NETDEV=m
+CONFIG_NFT_FWD_NETDEV=m
+CONFIG_NFT_FIB_NETDEV=m
+CONFIG_NFT_REJECT_NETDEV=m
+CONFIG_NF_FLOW_TABLE_INET=m
+CONFIG_NF_FLOW_TABLE=m
+CONFIG_NF_FLOW_TABLE_PROCFS=y
CONFIG_NETFILTER_XT_SET=m
CONFIG_NETFILTER_XT_TARGET_AUDIT=m
CONFIG_NETFILTER_XT_TARGET_CHECKSUM=m
@@ -197,8 +220,10 @@ CONFIG_NETFILTER_XT_TARGET_HMARK=m
CONFIG_NETFILTER_XT_TARGET_IDLETIMER=m
CONFIG_NETFILTER_XT_TARGET_LOG=m
CONFIG_NETFILTER_XT_TARGET_MARK=m
+CONFIG_NETFILTER_XT_TARGET_NETMAP=m
CONFIG_NETFILTER_XT_TARGET_NFLOG=m
CONFIG_NETFILTER_XT_TARGET_NFQUEUE=m
+CONFIG_NETFILTER_XT_TARGET_REDIRECT=m
CONFIG_NETFILTER_XT_TARGET_TEE=m
CONFIG_NETFILTER_XT_TARGET_TPROXY=m
CONFIG_NETFILTER_XT_TARGET_TRACE=m
@@ -207,6 +232,7 @@ CONFIG_NETFILTER_XT_TARGET_TCPMSS=m
CONFIG_NETFILTER_XT_TARGET_TCPOPTSTRIP=m
CONFIG_NETFILTER_XT_MATCH_ADDRTYPE=m
CONFIG_NETFILTER_XT_MATCH_BPF=m
+CONFIG_NETFILTER_XT_MATCH_CGROUP=m
CONFIG_NETFILTER_XT_MATCH_CLUSTER=m
CONFIG_NETFILTER_XT_MATCH_COMMENT=m
CONFIG_NETFILTER_XT_MATCH_CONNBYTES=m
@@ -221,6 +247,7 @@ CONFIG_NETFILTER_XT_MATCH_DSCP=m
CONFIG_NETFILTER_XT_MATCH_ESP=m
CONFIG_NETFILTER_XT_MATCH_HASHLIMIT=m
CONFIG_NETFILTER_XT_MATCH_HELPER=m
+CONFIG_NETFILTER_XT_MATCH_IPCOMP=m
CONFIG_NETFILTER_XT_MATCH_IPRANGE=m
CONFIG_NETFILTER_XT_MATCH_IPVS=m
CONFIG_NETFILTER_XT_MATCH_LENGTH=m
@@ -238,6 +265,7 @@ CONFIG_NETFILTER_XT_MATCH_QUOTA=m
CONFIG_NETFILTER_XT_MATCH_RATEEST=m
CONFIG_NETFILTER_XT_MATCH_REALM=m
CONFIG_NETFILTER_XT_MATCH_RECENT=m
+CONFIG_NETFILTER_XT_MATCH_SOCKET=m
CONFIG_NETFILTER_XT_MATCH_STATE=m
CONFIG_NETFILTER_XT_MATCH_STATISTIC=m
CONFIG_NETFILTER_XT_MATCH_STRING=m
@@ -293,7 +321,6 @@ CONFIG_IP_NF_TARGET_ECN=m
CONFIG_IP_NF_TARGET_TTL=m
CONFIG_IP_NF_RAW=m
CONFIG_IP_NF_SECURITY=m
-CONFIG_IP_NF_ARPTABLES=m
CONFIG_IP_NF_ARPFILTER=m
CONFIG_IP_NF_ARP_MANGLE=m
CONFIG_NFT_FIB_IPV6=m
@@ -363,7 +390,6 @@ CONFIG_NET_ACT_POLICE=m
CONFIG_NET_ACT_GACT=m
CONFIG_GACT_PROB=y
CONFIG_NET_ACT_MIRRED=m
-CONFIG_NET_ACT_IPT=m
CONFIG_NET_ACT_NAT=m
CONFIG_NET_ACT_PEDIT=m
CONFIG_NET_ACT_SIMP=m
@@ -452,6 +478,7 @@ CONFIG_DM_VERITY=m
CONFIG_DM_VERITY_VERIFY_ROOTHASH_SIG=y
CONFIG_DM_SWITCH=m
CONFIG_DM_INTEGRITY=m
+CONFIG_DM_VDO=m
CONFIG_NETDEVICES=y
CONFIG_BONDING=m
CONFIG_DUMMY=m
@@ -564,17 +591,16 @@ CONFIG_WATCHDOG_CORE=y
CONFIG_WATCHDOG_NOWAYOUT=y
CONFIG_SOFT_WATCHDOG=m
CONFIG_DIAG288_WATCHDOG=m
+CONFIG_DRM=m
+CONFIG_DRM_VIRTIO_GPU=m
CONFIG_FB=y
# CONFIG_FB_DEVICE is not set
-CONFIG_FRAMEBUFFER_CONSOLE=y
-CONFIG_FRAMEBUFFER_CONSOLE_DETECT_PRIMARY=y
# CONFIG_HID_SUPPORT is not set
# CONFIG_USB_SUPPORT is not set
CONFIG_INFINIBAND=m
CONFIG_INFINIBAND_USER_ACCESS=m
CONFIG_MLX4_INFINIBAND=m
CONFIG_MLX5_INFINIBAND=m
-CONFIG_SYNC_FILE=y
CONFIG_VFIO=m
CONFIG_VFIO_PCI=m
CONFIG_MLX5_VFIO_PCI=m
@@ -630,7 +656,6 @@ CONFIG_MSDOS_FS=m
CONFIG_VFAT_FS=m
CONFIG_EXFAT_FS=m
CONFIG_NTFS_FS=m
-CONFIG_NTFS_RW=y
CONFIG_PROC_KCORE=y
CONFIG_TMPFS=y
CONFIG_TMPFS_POSIX_ACL=y
@@ -649,6 +674,7 @@ CONFIG_SQUASHFS_XZ=y
CONFIG_SQUASHFS_ZSTD=y
CONFIG_ROMFS_FS=m
CONFIG_NFS_FS=m
+CONFIG_NFS_V2=m
CONFIG_NFS_V3_ACL=y
CONFIG_NFS_V4=m
CONFIG_NFS_SWAP=y
diff --git a/arch/s390/configs/zfcpdump_defconfig b/arch/s390/configs/zfcpdump_defconfig
index c51f3ec4eb28..8c2b61363bab 100644
--- a/arch/s390/configs/zfcpdump_defconfig
+++ b/arch/s390/configs/zfcpdump_defconfig
@@ -9,25 +9,22 @@ CONFIG_BPF_SYSCALL=y
CONFIG_BLK_DEV_INITRD=y
CONFIG_CC_OPTIMIZE_FOR_SIZE=y
CONFIG_KEXEC=y
-CONFIG_CRASH_DUMP=y
CONFIG_MARCH_Z13=y
CONFIG_NR_CPUS=2
CONFIG_HZ_100=y
# CONFIG_CHSC_SCH is not set
# CONFIG_SCM_BUS is not set
+# CONFIG_AP is not set
# CONFIG_PFAULT is not set
# CONFIG_S390_HYPFS is not set
# CONFIG_VIRTUALIZATION is not set
# CONFIG_S390_GUEST is not set
# CONFIG_SECCOMP is not set
-# CONFIG_GCC_PLUGINS is not set
# CONFIG_BLOCK_LEGACY_AUTOLOAD is not set
CONFIG_PARTITION_ADVANCED=y
# CONFIG_CORE_DUMP_DEFAULT_ELF_HEADERS is not set
# CONFIG_SWAP is not set
# CONFIG_COMPAT_BRK is not set
-# CONFIG_COMPACTION is not set
-# CONFIG_MIGRATION is not set
CONFIG_NET=y
# CONFIG_IUCV is not set
# CONFIG_PCPU_DEV_REFCNT is not set
diff --git a/arch/s390/crypto/crc32-vx.c b/arch/s390/crypto/crc32-vx.c
index 74f17c905d12..89a10337e6ea 100644
--- a/arch/s390/crypto/crc32-vx.c
+++ b/arch/s390/crypto/crc32-vx.c
@@ -297,6 +297,7 @@ module_cpu_feature_match(S390_CPU_FEATURE_VXRS, crc_vx_mod_init);
module_exit(crc_vx_mod_exit);
MODULE_AUTHOR("Hendrik Brueckner <brueckner@linux.vnet.ibm.com>");
+MODULE_DESCRIPTION("CRC-32 algorithms using z/Architecture Vector Extension Facility");
MODULE_LICENSE("GPL");
MODULE_ALIAS_CRYPTO("crc32");
diff --git a/arch/s390/hypfs/hypfs_dbfs.c b/arch/s390/hypfs/hypfs_dbfs.c
index 4024599eb448..0e855c5e91c5 100644
--- a/arch/s390/hypfs/hypfs_dbfs.c
+++ b/arch/s390/hypfs/hypfs_dbfs.c
@@ -39,7 +39,9 @@ static ssize_t dbfs_read(struct file *file, char __user *buf,
return 0;
df = file_inode(file)->i_private;
- mutex_lock(&df->lock);
+ if (mutex_lock_interruptible(&df->lock))
+ return -ERESTARTSYS;
+
data = hypfs_dbfs_data_alloc(df);
if (!data) {
mutex_unlock(&df->lock);
diff --git a/arch/s390/hypfs/hypfs_diag.c b/arch/s390/hypfs/hypfs_diag.c
index 279b7bba4d43..26a009f9c49e 100644
--- a/arch/s390/hypfs/hypfs_diag.c
+++ b/arch/s390/hypfs/hypfs_diag.c
@@ -140,11 +140,22 @@ fail_alloc:
int diag204_store(void *buf, int pages)
{
+ unsigned long subcode;
int rc;
- rc = diag204((unsigned long)diag204_store_sc |
- (unsigned long)diag204_get_info_type(), pages, buf);
- return rc < 0 ? -EOPNOTSUPP : 0;
+ subcode = diag204_get_info_type();
+ subcode |= diag204_store_sc;
+ if (diag204_has_bif())
+ subcode |= DIAG204_BIF_BIT;
+ while (1) {
+ rc = diag204(subcode, pages, buf);
+ if (rc != -EBUSY)
+ break;
+ if (signal_pending(current))
+ return -ERESTARTSYS;
+ schedule_timeout_interruptible(DIAG204_BUSY_WAIT);
+ }
+ return rc < 0 ? rc : 0;
}
struct dbfs_d204_hdr {
diff --git a/arch/s390/include/asm/abs_lowcore.h b/arch/s390/include/asm/abs_lowcore.h
index 6f264b79e377..d20df8c923fc 100644
--- a/arch/s390/include/asm/abs_lowcore.h
+++ b/arch/s390/include/asm/abs_lowcore.h
@@ -2,6 +2,7 @@
#ifndef _ASM_S390_ABS_LOWCORE_H
#define _ASM_S390_ABS_LOWCORE_H
+#include <asm/sections.h>
#include <asm/lowcore.h>
#define ABS_LOWCORE_MAP_SIZE (NR_CPUS * sizeof(struct lowcore))
@@ -24,4 +25,11 @@ static inline void put_abs_lowcore(struct lowcore *lc)
put_cpu();
}
+extern int __bootdata_preserved(relocate_lowcore);
+
+static inline int have_relocated_lowcore(void)
+{
+ return relocate_lowcore;
+}
+
#endif /* _ASM_S390_ABS_LOWCORE_H */
diff --git a/arch/s390/include/asm/alternative-asm.h b/arch/s390/include/asm/alternative-asm.h
deleted file mode 100644
index 608f6287ca9c..000000000000
--- a/arch/s390/include/asm/alternative-asm.h
+++ /dev/null
@@ -1,57 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0 */
-#ifndef _ASM_S390_ALTERNATIVE_ASM_H
-#define _ASM_S390_ALTERNATIVE_ASM_H
-
-#ifdef __ASSEMBLY__
-
-/*
- * Issue one struct alt_instr descriptor entry (need to put it into
- * the section .altinstructions, see below). This entry contains
- * enough information for the alternatives patching code to patch an
- * instruction. See apply_alternatives().
- */
-.macro alt_entry orig_start, orig_end, alt_start, alt_end, feature
- .long \orig_start - .
- .long \alt_start - .
- .word \feature
- .byte \orig_end - \orig_start
- .org . - ( \orig_end - \orig_start ) & 1
- .org . - ( \orig_end - \orig_start ) + ( \alt_end - \alt_start )
- .org . - ( \alt_end - \alt_start ) + ( \orig_end - \orig_start )
-.endm
-
-/*
- * Define an alternative between two instructions. If @feature is
- * present, early code in apply_alternatives() replaces @oldinstr with
- * @newinstr.
- */
-.macro ALTERNATIVE oldinstr, newinstr, feature
- .pushsection .altinstr_replacement,"ax"
-770: \newinstr
-771: .popsection
-772: \oldinstr
-773: .pushsection .altinstructions,"a"
- alt_entry 772b, 773b, 770b, 771b, \feature
- .popsection
-.endm
-
-/*
- * Define an alternative between two instructions. If @feature is
- * present, early code in apply_alternatives() replaces @oldinstr with
- * @newinstr.
- */
-.macro ALTERNATIVE_2 oldinstr, newinstr1, feature1, newinstr2, feature2
- .pushsection .altinstr_replacement,"ax"
-770: \newinstr1
-771: \newinstr2
-772: .popsection
-773: \oldinstr
-774: .pushsection .altinstructions,"a"
- alt_entry 773b, 774b, 770b, 771b,\feature1
- alt_entry 773b, 774b, 771b, 772b,\feature2
- .popsection
-.endm
-
-#endif /* __ASSEMBLY__ */
-
-#endif /* _ASM_S390_ALTERNATIVE_ASM_H */
diff --git a/arch/s390/include/asm/alternative.h b/arch/s390/include/asm/alternative.h
index dd93b92c3ab6..de980c938a3e 100644
--- a/arch/s390/include/asm/alternative.h
+++ b/arch/s390/include/asm/alternative.h
@@ -2,6 +2,58 @@
#ifndef _ASM_S390_ALTERNATIVE_H
#define _ASM_S390_ALTERNATIVE_H
+/*
+ * Each alternative comes with a 32 bit feature field:
+ * union {
+ * u32 feature;
+ * struct {
+ * u32 ctx : 4;
+ * u32 type : 8;
+ * u32 data : 20;
+ * };
+ * }
+ *
+ * @ctx is a bitfield, where only one bit must be set. Each bit defines
+ * in which context an alternative is supposed to be applied to the
+ * kernel image:
+ *
+ * - from the decompressor before the kernel itself is executed
+ * - from early kernel code from within the kernel
+ *
+ * @type is a number which defines the type and with that the type
+ * specific alternative patching.
+ *
+ * @data is additional type specific information which defines if an
+ * alternative should be applied.
+ */
+
+#define ALT_CTX_EARLY 1
+#define ALT_CTX_LATE 2
+#define ALT_CTX_ALL (ALT_CTX_EARLY | ALT_CTX_LATE)
+
+#define ALT_TYPE_FACILITY 0
+#define ALT_TYPE_SPEC 1
+#define ALT_TYPE_LOWCORE 2
+
+#define ALT_DATA_SHIFT 0
+#define ALT_TYPE_SHIFT 20
+#define ALT_CTX_SHIFT 28
+
+#define ALT_FACILITY_EARLY(facility) (ALT_CTX_EARLY << ALT_CTX_SHIFT | \
+ ALT_TYPE_FACILITY << ALT_TYPE_SHIFT | \
+ (facility) << ALT_DATA_SHIFT)
+
+#define ALT_FACILITY(facility) (ALT_CTX_LATE << ALT_CTX_SHIFT | \
+ ALT_TYPE_FACILITY << ALT_TYPE_SHIFT | \
+ (facility) << ALT_DATA_SHIFT)
+
+#define ALT_SPEC(facility) (ALT_CTX_LATE << ALT_CTX_SHIFT | \
+ ALT_TYPE_SPEC << ALT_TYPE_SHIFT | \
+ (facility) << ALT_DATA_SHIFT)
+
+#define ALT_LOWCORE (ALT_CTX_EARLY << ALT_CTX_SHIFT | \
+ ALT_TYPE_LOWCORE << ALT_TYPE_SHIFT)
+
#ifndef __ASSEMBLY__
#include <linux/types.h>
@@ -11,12 +63,30 @@
struct alt_instr {
s32 instr_offset; /* original instruction */
s32 repl_offset; /* offset to replacement instruction */
- u16 facility; /* facility bit set for replacement */
+ union {
+ u32 feature; /* feature required for replacement */
+ struct {
+ u32 ctx : 4; /* context */
+ u32 type : 8; /* type of alternative */
+ u32 data : 20; /* patching information */
+ };
+ };
u8 instrlen; /* length of original instruction */
} __packed;
-void apply_alternative_instructions(void);
-void apply_alternatives(struct alt_instr *start, struct alt_instr *end);
+extern struct alt_instr __alt_instructions[], __alt_instructions_end[];
+
+void __apply_alternatives(struct alt_instr *start, struct alt_instr *end, unsigned int ctx);
+
+static inline void apply_alternative_instructions(void)
+{
+ __apply_alternatives(__alt_instructions, __alt_instructions_end, ALT_CTX_LATE);
+}
+
+static inline void apply_alternatives(struct alt_instr *start, struct alt_instr *end)
+{
+ __apply_alternatives(start, end, ALT_CTX_ALL);
+}
/*
* +---------------------------------+
@@ -48,10 +118,10 @@ void apply_alternatives(struct alt_instr *start, struct alt_instr *end);
#define OLDINSTR(oldinstr) \
"661:\n\t" oldinstr "\n662:\n"
-#define ALTINSTR_ENTRY(facility, num) \
+#define ALTINSTR_ENTRY(feature, num) \
"\t.long 661b - .\n" /* old instruction */ \
"\t.long " b_altinstr(num)"b - .\n" /* alt instruction */ \
- "\t.word " __stringify(facility) "\n" /* facility bit */ \
+ "\t.long " __stringify(feature) "\n" /* feature */ \
"\t.byte " oldinstr_len "\n" /* instruction len */ \
"\t.org . - (" oldinstr_len ") & 1\n" \
"\t.org . - (" oldinstr_len ") + (" altinstr_len(num) ")\n" \
@@ -61,24 +131,24 @@ void apply_alternatives(struct alt_instr *start, struct alt_instr *end);
b_altinstr(num)":\n\t" altinstr "\n" e_altinstr(num) ":\n"
/* alternative assembly primitive: */
-#define ALTERNATIVE(oldinstr, altinstr, facility) \
+#define ALTERNATIVE(oldinstr, altinstr, feature) \
".pushsection .altinstr_replacement, \"ax\"\n" \
ALTINSTR_REPLACEMENT(altinstr, 1) \
".popsection\n" \
OLDINSTR(oldinstr) \
".pushsection .altinstructions,\"a\"\n" \
- ALTINSTR_ENTRY(facility, 1) \
+ ALTINSTR_ENTRY(feature, 1) \
".popsection\n"
-#define ALTERNATIVE_2(oldinstr, altinstr1, facility1, altinstr2, facility2)\
+#define ALTERNATIVE_2(oldinstr, altinstr1, feature1, altinstr2, feature2)\
".pushsection .altinstr_replacement, \"ax\"\n" \
ALTINSTR_REPLACEMENT(altinstr1, 1) \
ALTINSTR_REPLACEMENT(altinstr2, 2) \
".popsection\n" \
OLDINSTR(oldinstr) \
".pushsection .altinstructions,\"a\"\n" \
- ALTINSTR_ENTRY(facility1, 1) \
- ALTINSTR_ENTRY(facility2, 2) \
+ ALTINSTR_ENTRY(feature1, 1) \
+ ALTINSTR_ENTRY(feature2, 2) \
".popsection\n"
/*
@@ -93,12 +163,12 @@ void apply_alternatives(struct alt_instr *start, struct alt_instr *end);
* For non barrier like inlines please define new variants
* without volatile and memory clobber.
*/
-#define alternative(oldinstr, altinstr, facility) \
- asm_inline volatile(ALTERNATIVE(oldinstr, altinstr, facility) : : : "memory")
+#define alternative(oldinstr, altinstr, feature) \
+ asm_inline volatile(ALTERNATIVE(oldinstr, altinstr, feature) : : : "memory")
-#define alternative_2(oldinstr, altinstr1, facility1, altinstr2, facility2) \
- asm_inline volatile(ALTERNATIVE_2(oldinstr, altinstr1, facility1, \
- altinstr2, facility2) ::: "memory")
+#define alternative_2(oldinstr, altinstr1, feature1, altinstr2, feature2) \
+ asm_inline volatile(ALTERNATIVE_2(oldinstr, altinstr1, feature1, \
+ altinstr2, feature2) ::: "memory")
/* Alternative inline assembly with input. */
#define alternative_input(oldinstr, newinstr, feature, input...) \
@@ -106,8 +176,8 @@ void apply_alternatives(struct alt_instr *start, struct alt_instr *end);
: : input)
/* Like alternative_input, but with a single output argument */
-#define alternative_io(oldinstr, altinstr, facility, output, input...) \
- asm_inline volatile(ALTERNATIVE(oldinstr, altinstr, facility) \
+#define alternative_io(oldinstr, altinstr, feature, output, input...) \
+ asm_inline volatile(ALTERNATIVE(oldinstr, altinstr, feature) \
: output : input)
/* Use this macro if more than one output parameter is needed. */
@@ -116,6 +186,56 @@ void apply_alternatives(struct alt_instr *start, struct alt_instr *end);
/* Use this macro if clobbers are needed without inputs. */
#define ASM_NO_INPUT_CLOBBER(clobber...) : clobber
+#else /* __ASSEMBLY__ */
+
+/*
+ * Issue one struct alt_instr descriptor entry (need to put it into
+ * the section .altinstructions, see below). This entry contains
+ * enough information for the alternatives patching code to patch an
+ * instruction. See apply_alternatives().
+ */
+.macro alt_entry orig_start, orig_end, alt_start, alt_end, feature
+ .long \orig_start - .
+ .long \alt_start - .
+ .long \feature
+ .byte \orig_end - \orig_start
+ .org . - ( \orig_end - \orig_start ) & 1
+ .org . - ( \orig_end - \orig_start ) + ( \alt_end - \alt_start )
+ .org . - ( \alt_end - \alt_start ) + ( \orig_end - \orig_start )
+.endm
+
+/*
+ * Define an alternative between two instructions. If @feature is
+ * present, early code in apply_alternatives() replaces @oldinstr with
+ * @newinstr.
+ */
+.macro ALTERNATIVE oldinstr, newinstr, feature
+ .pushsection .altinstr_replacement,"ax"
+770: \newinstr
+771: .popsection
+772: \oldinstr
+773: .pushsection .altinstructions,"a"
+ alt_entry 772b, 773b, 770b, 771b, \feature
+ .popsection
+.endm
+
+/*
+ * Define an alternative between two instructions. If @feature is
+ * present, early code in apply_alternatives() replaces @oldinstr with
+ * @newinstr.
+ */
+.macro ALTERNATIVE_2 oldinstr, newinstr1, feature1, newinstr2, feature2
+ .pushsection .altinstr_replacement,"ax"
+770: \newinstr1
+771: \newinstr2
+772: .popsection
+773: \oldinstr
+774: .pushsection .altinstructions,"a"
+ alt_entry 773b, 774b, 770b, 771b,\feature1
+ alt_entry 773b, 774b, 771b, 772b,\feature2
+ .popsection
+.endm
+
#endif /* __ASSEMBLY__ */
#endif /* _ASM_S390_ALTERNATIVE_H */
diff --git a/arch/s390/include/asm/arch_hweight.h b/arch/s390/include/asm/arch_hweight.h
new file mode 100644
index 000000000000..50e23ce854e5
--- /dev/null
+++ b/arch/s390/include/asm/arch_hweight.h
@@ -0,0 +1,76 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+
+#ifndef _ASM_S390_ARCH_HWEIGHT_H
+#define _ASM_S390_ARCH_HWEIGHT_H
+
+#include <linux/types.h>
+
+static __always_inline unsigned long popcnt_z196(unsigned long w)
+{
+ unsigned long cnt;
+
+ asm volatile(".insn rrf,0xb9e10000,%[cnt],%[w],0,0"
+ : [cnt] "=d" (cnt)
+ : [w] "d" (w)
+ : "cc");
+ return cnt;
+}
+
+static __always_inline unsigned long popcnt_z15(unsigned long w)
+{
+ unsigned long cnt;
+
+ asm volatile(".insn rrf,0xb9e10000,%[cnt],%[w],8,0"
+ : [cnt] "=d" (cnt)
+ : [w] "d" (w)
+ : "cc");
+ return cnt;
+}
+
+static __always_inline unsigned long __arch_hweight64(__u64 w)
+{
+ if (IS_ENABLED(CONFIG_HAVE_MARCH_Z15_FEATURES))
+ return popcnt_z15(w);
+ if (IS_ENABLED(CONFIG_HAVE_MARCH_Z196_FEATURES)) {
+ w = popcnt_z196(w);
+ w += w >> 32;
+ w += w >> 16;
+ w += w >> 8;
+ return w & 0xff;
+ }
+ return __sw_hweight64(w);
+}
+
+static __always_inline unsigned int __arch_hweight32(unsigned int w)
+{
+ if (IS_ENABLED(CONFIG_HAVE_MARCH_Z15_FEATURES))
+ return popcnt_z15(w);
+ if (IS_ENABLED(CONFIG_HAVE_MARCH_Z196_FEATURES)) {
+ w = popcnt_z196(w);
+ w += w >> 16;
+ w += w >> 8;
+ return w & 0xff;
+ }
+ return __sw_hweight32(w);
+}
+
+static __always_inline unsigned int __arch_hweight16(unsigned int w)
+{
+ if (IS_ENABLED(CONFIG_HAVE_MARCH_Z15_FEATURES))
+ return popcnt_z15((unsigned short)w);
+ if (IS_ENABLED(CONFIG_HAVE_MARCH_Z196_FEATURES)) {
+ w = popcnt_z196(w);
+ w += w >> 8;
+ return w & 0xff;
+ }
+ return __sw_hweight16(w);
+}
+
+static __always_inline unsigned int __arch_hweight8(unsigned int w)
+{
+ if (IS_ENABLED(CONFIG_HAVE_MARCH_Z196_FEATURES))
+ return popcnt_z196((unsigned char)w);
+ return __sw_hweight8(w);
+}
+
+#endif /* _ASM_S390_ARCH_HWEIGHT_H */
diff --git a/arch/s390/include/asm/atomic_ops.h b/arch/s390/include/asm/atomic_ops.h
index 7fa5f96a553a..742c7919cbcd 100644
--- a/arch/s390/include/asm/atomic_ops.h
+++ b/arch/s390/include/asm/atomic_ops.h
@@ -8,21 +8,29 @@
#ifndef __ARCH_S390_ATOMIC_OPS__
#define __ARCH_S390_ATOMIC_OPS__
+#include <linux/limits.h>
+
static __always_inline int __atomic_read(const atomic_t *v)
{
int c;
asm volatile(
- " l %0,%1\n"
- : "=d" (c) : "R" (v->counter));
+ " l %[c],%[counter]\n"
+ : [c] "=d" (c) : [counter] "R" (v->counter));
return c;
}
static __always_inline void __atomic_set(atomic_t *v, int i)
{
- asm volatile(
- " st %1,%0\n"
- : "=R" (v->counter) : "d" (i));
+ if (__builtin_constant_p(i) && i >= S16_MIN && i <= S16_MAX) {
+ asm volatile(
+ " mvhi %[counter], %[i]\n"
+ : [counter] "=Q" (v->counter) : [i] "K" (i));
+ } else {
+ asm volatile(
+ " st %[i],%[counter]\n"
+ : [counter] "=R" (v->counter) : [i] "d" (i));
+ }
}
static __always_inline s64 __atomic64_read(const atomic64_t *v)
@@ -30,16 +38,22 @@ static __always_inline s64 __atomic64_read(const atomic64_t *v)
s64 c;
asm volatile(
- " lg %0,%1\n"
- : "=d" (c) : "RT" (v->counter));
+ " lg %[c],%[counter]\n"
+ : [c] "=d" (c) : [counter] "RT" (v->counter));
return c;
}
static __always_inline void __atomic64_set(atomic64_t *v, s64 i)
{
- asm volatile(
- " stg %1,%0\n"
- : "=RT" (v->counter) : "d" (i));
+ if (__builtin_constant_p(i) && i >= S16_MIN && i <= S16_MAX) {
+ asm volatile(
+ " mvghi %[counter], %[i]\n"
+ : [counter] "=Q" (v->counter) : [i] "K" (i));
+ } else {
+ asm volatile(
+ " stg %[i],%[counter]\n"
+ : [counter] "=RT" (v->counter) : [i] "d" (i));
+ }
}
#ifdef CONFIG_HAVE_MARCH_Z196_FEATURES
@@ -164,26 +178,55 @@ static __always_inline int __atomic_cmpxchg(int *ptr, int old, int new)
return old;
}
+static __always_inline long __atomic64_cmpxchg(long *ptr, long old, long new)
+{
+ asm volatile(
+ " csg %[old],%[new],%[ptr]"
+ : [old] "+d" (old), [ptr] "+QS" (*ptr)
+ : [new] "d" (new)
+ : "cc", "memory");
+ return old;
+}
+
+/* GCC versions before 14.2.0 may die with an ICE in some configurations. */
+#if defined(__GCC_ASM_FLAG_OUTPUTS__) && !(IS_ENABLED(CONFIG_CC_IS_GCC) && (GCC_VERSION < 140200))
+
static __always_inline bool __atomic_cmpxchg_bool(int *ptr, int old, int new)
{
- int old_expected = old;
+ int cc;
asm volatile(
" cs %[old],%[new],%[ptr]"
- : [old] "+d" (old), [ptr] "+Q" (*ptr)
+ : [old] "+d" (old), [ptr] "+Q" (*ptr), "=@cc" (cc)
: [new] "d" (new)
- : "cc", "memory");
- return old == old_expected;
+ : "memory");
+ return cc == 0;
}
-static __always_inline long __atomic64_cmpxchg(long *ptr, long old, long new)
+static __always_inline bool __atomic64_cmpxchg_bool(long *ptr, long old, long new)
{
+ int cc;
+
asm volatile(
" csg %[old],%[new],%[ptr]"
- : [old] "+d" (old), [ptr] "+QS" (*ptr)
+ : [old] "+d" (old), [ptr] "+QS" (*ptr), "=@cc" (cc)
+ : [new] "d" (new)
+ : "memory");
+ return cc == 0;
+}
+
+#else /* __GCC_ASM_FLAG_OUTPUTS__ */
+
+static __always_inline bool __atomic_cmpxchg_bool(int *ptr, int old, int new)
+{
+ int old_expected = old;
+
+ asm volatile(
+ " cs %[old],%[new],%[ptr]"
+ : [old] "+d" (old), [ptr] "+Q" (*ptr)
: [new] "d" (new)
: "cc", "memory");
- return old;
+ return old == old_expected;
}
static __always_inline bool __atomic64_cmpxchg_bool(long *ptr, long old, long new)
@@ -198,4 +241,6 @@ static __always_inline bool __atomic64_cmpxchg_bool(long *ptr, long old, long ne
return old == old_expected;
}
+#endif /* __GCC_ASM_FLAG_OUTPUTS__ */
+
#endif /* __ARCH_S390_ATOMIC_OPS__ */
diff --git a/arch/s390/include/asm/bitops.h b/arch/s390/include/asm/bitops.h
index c467dffa8c12..54a079cd39ed 100644
--- a/arch/s390/include/asm/bitops.h
+++ b/arch/s390/include/asm/bitops.h
@@ -379,8 +379,9 @@ static inline int fls(unsigned int word)
return fls64(word);
}
+#include <asm/arch_hweight.h>
+#include <asm-generic/bitops/const_hweight.h>
#include <asm-generic/bitops/ffz.h>
-#include <asm-generic/bitops/hweight.h>
#include <asm-generic/bitops/sched.h>
#include <asm-generic/bitops/le.h>
#include <asm-generic/bitops/ext2-atomic-setbit.h>
diff --git a/arch/s390/include/asm/ccwdev.h b/arch/s390/include/asm/ccwdev.h
index 436365ff6c19..e3afcece375e 100644
--- a/arch/s390/include/asm/ccwdev.h
+++ b/arch/s390/include/asm/ccwdev.h
@@ -210,7 +210,7 @@ extern void ccw_device_get_id(struct ccw_device *, struct ccw_dev_id *);
#define get_ccwdev_lock(x) (x)->ccwlock
#define to_ccwdev(n) container_of(n, struct ccw_device, dev)
-#define to_ccwdrv(n) container_of(n, struct ccw_driver, driver)
+#define to_ccwdrv(n) container_of_const(n, struct ccw_driver, driver)
extern struct ccw_device *ccw_device_create_console(struct ccw_driver *);
extern void ccw_device_destroy_console(struct ccw_device *);
diff --git a/arch/s390/include/asm/checksum.h b/arch/s390/include/asm/checksum.h
index b89159591ca0..46f5c9660616 100644
--- a/arch/s390/include/asm/checksum.h
+++ b/arch/s390/include/asm/checksum.h
@@ -13,6 +13,7 @@
#define _S390_CHECKSUM_H
#include <linux/instrumented.h>
+#include <linux/kmsan-checks.h>
#include <linux/in6.h>
static inline __wsum cksm(const void *buff, int len, __wsum sum)
@@ -23,6 +24,7 @@ static inline __wsum cksm(const void *buff, int len, __wsum sum)
};
instrument_read(buff, len);
+ kmsan_check_memory(buff, len);
asm volatile("\n"
"0: cksm %[sum],%[rp]\n"
" jo 0b\n"
diff --git a/arch/s390/include/asm/cpacf.h b/arch/s390/include/asm/cpacf.h
index c786538e397c..dae8843b164f 100644
--- a/arch/s390/include/asm/cpacf.h
+++ b/arch/s390/include/asm/cpacf.h
@@ -12,6 +12,7 @@
#define _ASM_S390_CPACF_H
#include <asm/facility.h>
+#include <linux/kmsan-checks.h>
/*
* Instruction opcodes for the CPACF instructions
@@ -542,6 +543,8 @@ static inline void cpacf_trng(u8 *ucbuf, unsigned long ucbuf_len,
: [ucbuf] "+&d" (u.pair), [cbuf] "+&d" (c.pair)
: [fc] "K" (CPACF_PRNO_TRNG), [opc] "i" (CPACF_PRNO)
: "cc", "memory", "0");
+ kmsan_unpoison_memory(ucbuf, ucbuf_len);
+ kmsan_unpoison_memory(cbuf, cbuf_len);
}
/**
diff --git a/arch/s390/include/asm/cpu_mf.h b/arch/s390/include/asm/cpu_mf.h
index a0de5b9b02ea..9e4bbc3e53f8 100644
--- a/arch/s390/include/asm/cpu_mf.h
+++ b/arch/s390/include/asm/cpu_mf.h
@@ -10,6 +10,7 @@
#define _ASM_S390_CPU_MF_H
#include <linux/errno.h>
+#include <linux/kmsan-checks.h>
#include <asm/asm-extable.h>
#include <asm/facility.h>
@@ -239,6 +240,11 @@ static __always_inline int stcctm(enum stcctm_ctr_set set, u64 range, u64 *dest)
: "=d" (cc)
: "Q" (*dest), "d" (range), "i" (set)
: "cc", "memory");
+ /*
+ * If cc == 2, less than RANGE counters are stored, but it's not easy
+ * to tell how many. Always unpoison the whole range for simplicity.
+ */
+ kmsan_unpoison_memory(dest, range * sizeof(u64));
return cc;
}
diff --git a/arch/s390/include/asm/current.h b/arch/s390/include/asm/current.h
index 68f84315277c..d03a922c641e 100644
--- a/arch/s390/include/asm/current.h
+++ b/arch/s390/include/asm/current.h
@@ -14,6 +14,6 @@
struct task_struct;
-#define current ((struct task_struct *const)S390_lowcore.current_task)
+#define current ((struct task_struct *const)get_lowcore()->current_task)
#endif /* !(_S390_CURRENT_H) */
diff --git a/arch/s390/include/asm/dat-bits.h b/arch/s390/include/asm/dat-bits.h
new file mode 100644
index 000000000000..8d65eec2f124
--- /dev/null
+++ b/arch/s390/include/asm/dat-bits.h
@@ -0,0 +1,170 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/*
+ * DAT table and related structures
+ *
+ * Copyright IBM Corp. 2024
+ *
+ */
+
+#ifndef _S390_DAT_BITS_H
+#define _S390_DAT_BITS_H
+
+union asce {
+ unsigned long val;
+ struct {
+ unsigned long rsto: 52;/* Region- or Segment-Table Origin */
+ unsigned long : 2;
+ unsigned long g : 1; /* Subspace Group control */
+ unsigned long p : 1; /* Private Space control */
+ unsigned long s : 1; /* Storage-Alteration-Event control */
+ unsigned long x : 1; /* Space-Switch-Event control */
+ unsigned long r : 1; /* Real-Space control */
+ unsigned long : 1;
+ unsigned long dt : 2; /* Designation-Type control */
+ unsigned long tl : 2; /* Region- or Segment-Table Length */
+ };
+};
+
+enum {
+ ASCE_TYPE_SEGMENT = 0,
+ ASCE_TYPE_REGION3 = 1,
+ ASCE_TYPE_REGION2 = 2,
+ ASCE_TYPE_REGION1 = 3
+};
+
+union region1_table_entry {
+ unsigned long val;
+ struct {
+ unsigned long rto: 52;/* Region-Table Origin */
+ unsigned long : 2;
+ unsigned long p : 1; /* DAT-Protection Bit */
+ unsigned long : 1;
+ unsigned long tf : 2; /* Region-Second-Table Offset */
+ unsigned long i : 1; /* Region-Invalid Bit */
+ unsigned long : 1;
+ unsigned long tt : 2; /* Table-Type Bits */
+ unsigned long tl : 2; /* Region-Second-Table Length */
+ };
+};
+
+union region2_table_entry {
+ unsigned long val;
+ struct {
+ unsigned long rto: 52;/* Region-Table Origin */
+ unsigned long : 2;
+ unsigned long p : 1; /* DAT-Protection Bit */
+ unsigned long : 1;
+ unsigned long tf : 2; /* Region-Third-Table Offset */
+ unsigned long i : 1; /* Region-Invalid Bit */
+ unsigned long : 1;
+ unsigned long tt : 2; /* Table-Type Bits */
+ unsigned long tl : 2; /* Region-Third-Table Length */
+ };
+};
+
+struct region3_table_entry_fc0 {
+ unsigned long sto: 52;/* Segment-Table Origin */
+ unsigned long : 1;
+ unsigned long fc : 1; /* Format-Control */
+ unsigned long p : 1; /* DAT-Protection Bit */
+ unsigned long : 1;
+ unsigned long tf : 2; /* Segment-Table Offset */
+ unsigned long i : 1; /* Region-Invalid Bit */
+ unsigned long cr : 1; /* Common-Region Bit */
+ unsigned long tt : 2; /* Table-Type Bits */
+ unsigned long tl : 2; /* Segment-Table Length */
+};
+
+struct region3_table_entry_fc1 {
+ unsigned long rfaa: 33;/* Region-Frame Absolute Address */
+ unsigned long : 14;
+ unsigned long av : 1; /* ACCF-Validity Control */
+ unsigned long acc : 4; /* Access-Control Bits */
+ unsigned long f : 1; /* Fetch-Protection Bit */
+ unsigned long fc : 1; /* Format-Control */
+ unsigned long p : 1; /* DAT-Protection Bit */
+ unsigned long iep : 1; /* Instruction-Execution-Protection */
+ unsigned long : 2;
+ unsigned long i : 1; /* Region-Invalid Bit */
+ unsigned long cr : 1; /* Common-Region Bit */
+ unsigned long tt : 2; /* Table-Type Bits */
+ unsigned long : 2;
+};
+
+union region3_table_entry {
+ unsigned long val;
+ struct region3_table_entry_fc0 fc0;
+ struct region3_table_entry_fc1 fc1;
+ struct {
+ unsigned long : 53;
+ unsigned long fc: 1; /* Format-Control */
+ unsigned long : 4;
+ unsigned long i : 1; /* Region-Invalid Bit */
+ unsigned long cr: 1; /* Common-Region Bit */
+ unsigned long tt: 2; /* Table-Type Bits */
+ unsigned long : 2;
+ };
+};
+
+struct segment_table_entry_fc0 {
+ unsigned long pto: 53;/* Page-Table Origin */
+ unsigned long fc : 1; /* Format-Control */
+ unsigned long p : 1; /* DAT-Protection Bit */
+ unsigned long : 3;
+ unsigned long i : 1; /* Segment-Invalid Bit */
+ unsigned long cs : 1; /* Common-Segment Bit */
+ unsigned long tt : 2; /* Table-Type Bits */
+ unsigned long : 2;
+};
+
+struct segment_table_entry_fc1 {
+ unsigned long sfaa: 44;/* Segment-Frame Absolute Address */
+ unsigned long : 3;
+ unsigned long av : 1; /* ACCF-Validity Control */
+ unsigned long acc : 4; /* Access-Control Bits */
+ unsigned long f : 1; /* Fetch-Protection Bit */
+ unsigned long fc : 1; /* Format-Control */
+ unsigned long p : 1; /* DAT-Protection Bit */
+ unsigned long iep : 1; /* Instruction-Execution-Protection */
+ unsigned long : 2;
+ unsigned long i : 1; /* Segment-Invalid Bit */
+ unsigned long cs : 1; /* Common-Segment Bit */
+ unsigned long tt : 2; /* Table-Type Bits */
+ unsigned long : 2;
+};
+
+union segment_table_entry {
+ unsigned long val;
+ struct segment_table_entry_fc0 fc0;
+ struct segment_table_entry_fc1 fc1;
+ struct {
+ unsigned long : 53;
+ unsigned long fc: 1; /* Format-Control */
+ unsigned long : 4;
+ unsigned long i : 1; /* Segment-Invalid Bit */
+ unsigned long cs: 1; /* Common-Segment Bit */
+ unsigned long tt: 2; /* Table-Type Bits */
+ unsigned long : 2;
+ };
+};
+
+union page_table_entry {
+ unsigned long val;
+ struct {
+ unsigned long pfra: 52;/* Page-Frame Real Address */
+ unsigned long z : 1; /* Zero Bit */
+ unsigned long i : 1; /* Page-Invalid Bit */
+ unsigned long p : 1; /* DAT-Protection Bit */
+ unsigned long iep : 1; /* Instruction-Execution-Protection */
+ unsigned long : 8;
+ };
+};
+
+enum {
+ TABLE_TYPE_SEGMENT = 0,
+ TABLE_TYPE_REGION3 = 1,
+ TABLE_TYPE_REGION2 = 2,
+ TABLE_TYPE_REGION1 = 3
+};
+
+#endif /* _S390_DAT_BITS_H */
diff --git a/arch/s390/include/asm/diag.h b/arch/s390/include/asm/diag.h
index 20b94220113b..c0d43512f4fc 100644
--- a/arch/s390/include/asm/diag.h
+++ b/arch/s390/include/asm/diag.h
@@ -12,6 +12,7 @@
#include <linux/if_ether.h>
#include <linux/percpu.h>
#include <asm/asm-extable.h>
+#include <asm/sclp.h>
#include <asm/cio.h>
enum diag_stat_enum {
@@ -117,6 +118,8 @@ enum diag204_sc {
};
#define DIAG204_SUBCODE_MASK 0xffff
+#define DIAG204_BIF_BIT 0x80000000
+#define DIAG204_BUSY_WAIT (HZ / 10)
/* The two available diag 204 data formats */
enum diag204_format {
@@ -326,6 +329,11 @@ union diag318_info {
};
};
+static inline bool diag204_has_bif(void)
+{
+ return sclp.has_diag204_bif;
+}
+
int diag204(unsigned long subcode, unsigned long size, void *addr);
int diag224(void *ptr);
int diag26c(void *req, void *resp, enum diag26c_sc subcode);
diff --git a/arch/s390/include/asm/elf.h b/arch/s390/include/asm/elf.h
index 70a30ae258b7..8f2c23cc52b6 100644
--- a/arch/s390/include/asm/elf.h
+++ b/arch/s390/include/asm/elf.h
@@ -91,6 +91,14 @@
/* Keep this the last entry. */
#define R_390_NUM 61
+/*
+ * HWCAP flags - for AT_HWCAP
+ *
+ * Bits 32-63 are reserved for use by libc.
+ * Bit 31 is reserved and will be used by libc to determine if a second
+ * argument is passed to IFUNC resolvers. This will be implemented when
+ * there is a need for AT_HWCAP2.
+ */
enum {
HWCAP_NR_ESAN3 = 0,
HWCAP_NR_ZARCH = 1,
diff --git a/arch/s390/include/asm/entry-common.h b/arch/s390/include/asm/entry-common.h
index 7f5004065e8a..35555c944630 100644
--- a/arch/s390/include/asm/entry-common.h
+++ b/arch/s390/include/asm/entry-common.h
@@ -54,7 +54,7 @@ static __always_inline void arch_exit_to_user_mode(void)
static inline void arch_exit_to_user_mode_prepare(struct pt_regs *regs,
unsigned long ti_work)
{
- choose_random_kstack_offset(get_tod_clock_fast() & 0xff);
+ choose_random_kstack_offset(get_tod_clock_fast());
}
#define arch_exit_to_user_mode_prepare arch_exit_to_user_mode_prepare
diff --git a/arch/s390/include/asm/facility.h b/arch/s390/include/asm/facility.h
index 796007125dff..b7d234838a36 100644
--- a/arch/s390/include/asm/facility.h
+++ b/arch/s390/include/asm/facility.h
@@ -20,7 +20,6 @@
#define MAX_FACILITY_BIT (sizeof(stfle_fac_list) * 8)
extern u64 stfle_fac_list[16];
-extern u64 alt_stfle_fac_list[16];
static inline void __set_facility(unsigned long nr, void *facilities)
{
@@ -92,8 +91,8 @@ static inline void __stfle(u64 *stfle_fac_list, int size)
asm volatile(
" stfl 0(0)\n"
- : "=m" (S390_lowcore.stfl_fac_list));
- stfl_fac_list = S390_lowcore.stfl_fac_list;
+ : "=m" (get_lowcore()->stfl_fac_list));
+ stfl_fac_list = get_lowcore()->stfl_fac_list;
memcpy(stfle_fac_list, &stfl_fac_list, 4);
nr = 4; /* bytes stored by stfl */
if (stfl_fac_list & 0x01000000) {
diff --git a/arch/s390/include/asm/ftrace.h b/arch/s390/include/asm/ftrace.h
index 77e479d44f1e..fbadca645af7 100644
--- a/arch/s390/include/asm/ftrace.h
+++ b/arch/s390/include/asm/ftrace.h
@@ -2,7 +2,6 @@
#ifndef _ASM_S390_FTRACE_H
#define _ASM_S390_FTRACE_H
-#define HAVE_FUNCTION_GRAPH_RET_ADDR_PTR
#define ARCH_SUPPORTS_FTRACE_OPS 1
#define MCOUNT_INSN_SIZE 6
diff --git a/arch/s390/include/asm/hardirq.h b/arch/s390/include/asm/hardirq.h
index 58668ffb5488..a5b45388c91f 100644
--- a/arch/s390/include/asm/hardirq.h
+++ b/arch/s390/include/asm/hardirq.h
@@ -13,9 +13,9 @@
#include <asm/lowcore.h>
-#define local_softirq_pending() (S390_lowcore.softirq_pending)
-#define set_softirq_pending(x) (S390_lowcore.softirq_pending = (x))
-#define or_softirq_pending(x) (S390_lowcore.softirq_pending |= (x))
+#define local_softirq_pending() (get_lowcore()->softirq_pending)
+#define set_softirq_pending(x) (get_lowcore()->softirq_pending = (x))
+#define or_softirq_pending(x) (get_lowcore()->softirq_pending |= (x))
#define __ARCH_IRQ_STAT
#define __ARCH_IRQ_EXIT_IRQS_DISABLED
diff --git a/arch/s390/include/asm/hugetlb.h b/arch/s390/include/asm/hugetlb.h
index ce5f4fe8be4d..cf1b5d6fb1a6 100644
--- a/arch/s390/include/asm/hugetlb.h
+++ b/arch/s390/include/asm/hugetlb.h
@@ -19,7 +19,7 @@ void set_huge_pte_at(struct mm_struct *mm, unsigned long addr,
pte_t *ptep, pte_t pte, unsigned long sz);
void __set_huge_pte_at(struct mm_struct *mm, unsigned long addr,
pte_t *ptep, pte_t pte);
-pte_t huge_ptep_get(pte_t *ptep);
+pte_t huge_ptep_get(struct mm_struct *mm, unsigned long addr, pte_t *ptep);
pte_t huge_ptep_get_and_clear(struct mm_struct *mm,
unsigned long addr, pte_t *ptep);
@@ -64,7 +64,7 @@ static inline int huge_ptep_set_access_flags(struct vm_area_struct *vma,
unsigned long addr, pte_t *ptep,
pte_t pte, int dirty)
{
- int changed = !pte_same(huge_ptep_get(ptep), pte);
+ int changed = !pte_same(huge_ptep_get(vma->vm_mm, addr, ptep), pte);
if (changed) {
huge_ptep_get_and_clear(vma->vm_mm, addr, ptep);
__set_huge_pte_at(vma->vm_mm, addr, ptep, pte);
diff --git a/arch/s390/include/asm/irqflags.h b/arch/s390/include/asm/irqflags.h
index 02427b205c11..bcab456dfb80 100644
--- a/arch/s390/include/asm/irqflags.h
+++ b/arch/s390/include/asm/irqflags.h
@@ -37,12 +37,18 @@ static __always_inline void __arch_local_irq_ssm(unsigned long flags)
asm volatile("ssm %0" : : "Q" (flags) : "memory");
}
-static __always_inline unsigned long arch_local_save_flags(void)
+#ifdef CONFIG_KMSAN
+#define arch_local_irq_attributes noinline notrace __no_sanitize_memory __maybe_unused
+#else
+#define arch_local_irq_attributes __always_inline
+#endif
+
+static arch_local_irq_attributes unsigned long arch_local_save_flags(void)
{
return __arch_local_irq_stnsm(0xff);
}
-static __always_inline unsigned long arch_local_irq_save(void)
+static arch_local_irq_attributes unsigned long arch_local_irq_save(void)
{
return __arch_local_irq_stnsm(0xfc);
}
@@ -52,7 +58,12 @@ static __always_inline void arch_local_irq_disable(void)
arch_local_irq_save();
}
-static __always_inline void arch_local_irq_enable(void)
+static arch_local_irq_attributes void arch_local_irq_enable_external(void)
+{
+ __arch_local_irq_stosm(0x01);
+}
+
+static arch_local_irq_attributes void arch_local_irq_enable(void)
{
__arch_local_irq_stosm(0x03);
}
diff --git a/arch/s390/include/asm/kmsan.h b/arch/s390/include/asm/kmsan.h
new file mode 100644
index 000000000000..f73e181d09ae
--- /dev/null
+++ b/arch/s390/include/asm/kmsan.h
@@ -0,0 +1,59 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef _ASM_S390_KMSAN_H
+#define _ASM_S390_KMSAN_H
+
+#include <asm/lowcore.h>
+#include <asm/page.h>
+#include <linux/kmsan.h>
+#include <linux/mmzone.h>
+#include <linux/stddef.h>
+
+#ifndef MODULE
+
+static inline bool is_lowcore_addr(void *addr)
+{
+ return addr >= (void *)get_lowcore() &&
+ addr < (void *)(get_lowcore() + 1);
+}
+
+static inline void *arch_kmsan_get_meta_or_null(void *addr, bool is_origin)
+{
+ if (is_lowcore_addr(addr)) {
+ /*
+ * Different lowcores accessed via S390_lowcore are described
+ * by the same struct page. Resolve the prefix manually in
+ * order to get a distinct struct page.
+ */
+ addr += (void *)lowcore_ptr[raw_smp_processor_id()] -
+ (void *)get_lowcore();
+ if (KMSAN_WARN_ON(is_lowcore_addr(addr)))
+ return NULL;
+ return kmsan_get_metadata(addr, is_origin);
+ }
+ return NULL;
+}
+
+static inline bool kmsan_virt_addr_valid(void *addr)
+{
+ bool ret;
+
+ /*
+ * pfn_valid() relies on RCU, and may call into the scheduler on exiting
+ * the critical section. However, this would result in recursion with
+ * KMSAN. Therefore, disable preemption here, and re-enable preemption
+ * below while suppressing reschedules to avoid recursion.
+ *
+ * Note, this sacrifices occasionally breaking scheduling guarantees.
+ * Although, a kernel compiled with KMSAN has already given up on any
+ * performance guarantees due to being heavily instrumented.
+ */
+ preempt_disable();
+ ret = virt_addr_valid(addr);
+ preempt_enable_no_resched();
+
+ return ret;
+}
+
+#endif /* !MODULE */
+
+#endif /* _ASM_S390_KMSAN_H */
diff --git a/arch/s390/include/asm/kvm_host.h b/arch/s390/include/asm/kvm_host.h
index 95990461888f..8e77afbed58e 100644
--- a/arch/s390/include/asm/kvm_host.h
+++ b/arch/s390/include/asm/kvm_host.h
@@ -15,7 +15,6 @@
#include <linux/hrtimer.h>
#include <linux/interrupt.h>
#include <linux/kvm_types.h>
-#include <linux/kvm_host.h>
#include <linux/kvm.h>
#include <linux/seqlock.h>
#include <linux/module.h>
@@ -427,6 +426,7 @@ struct kvm_vcpu_stat {
u64 instruction_io_other;
u64 instruction_lpsw;
u64 instruction_lpswe;
+ u64 instruction_lpswey;
u64 instruction_pfmf;
u64 instruction_ptff;
u64 instruction_sck;
@@ -1029,11 +1029,12 @@ void kvm_arch_crypto_clear_masks(struct kvm *kvm);
void kvm_arch_crypto_set_masks(struct kvm *kvm, unsigned long *apm,
unsigned long *aqm, unsigned long *adm);
-int __sie64a(phys_addr_t sie_block_phys, struct kvm_s390_sie_block *sie_block, u64 *rsa);
+int __sie64a(phys_addr_t sie_block_phys, struct kvm_s390_sie_block *sie_block, u64 *rsa,
+ unsigned long gasce);
-static inline int sie64a(struct kvm_s390_sie_block *sie_block, u64 *rsa)
+static inline int sie64a(struct kvm_s390_sie_block *sie_block, u64 *rsa, unsigned long gasce)
{
- return __sie64a(virt_to_phys(sie_block), sie_block, rsa);
+ return __sie64a(virt_to_phys(sie_block), sie_block, rsa, gasce);
}
extern char sie_exit;
@@ -1045,7 +1046,6 @@ extern int kvm_s390_gisc_register(struct kvm *kvm, u32 gisc);
extern int kvm_s390_gisc_unregister(struct kvm *kvm, u32 gisc);
static inline void kvm_arch_sync_events(struct kvm *kvm) {}
-static inline void kvm_arch_sched_in(struct kvm_vcpu *vcpu, int cpu) {}
static inline void kvm_arch_free_memslot(struct kvm *kvm,
struct kvm_memory_slot *slot) {}
static inline void kvm_arch_memslots_updated(struct kvm *kvm, u64 gen) {}
diff --git a/arch/s390/include/asm/lowcore.h b/arch/s390/include/asm/lowcore.h
index 8c5f16857539..183ac29afaf8 100644
--- a/arch/s390/include/asm/lowcore.h
+++ b/arch/s390/include/asm/lowcore.h
@@ -14,10 +14,15 @@
#include <asm/ctlreg.h>
#include <asm/cpu.h>
#include <asm/types.h>
+#include <asm/alternative.h>
#define LC_ORDER 1
#define LC_PAGES 2
+#define LOWCORE_ALT_ADDRESS _AC(0x70000, UL)
+
+#ifndef __ASSEMBLY__
+
struct pgm_tdb {
u64 data[32];
};
@@ -97,8 +102,7 @@ struct lowcore {
__u64 save_area_async[8]; /* 0x0240 */
__u64 save_area_restart[1]; /* 0x0280 */
- /* CPU flags. */
- __u64 cpu_flags; /* 0x0288 */
+ __u64 pcpu; /* 0x0288 */
/* Return psws. */
psw_t return_psw; /* 0x0290 */
@@ -213,7 +217,17 @@ struct lowcore {
__u8 pad_0x1900[0x2000-0x1900]; /* 0x1900 */
} __packed __aligned(8192);
-#define S390_lowcore (*((struct lowcore *) 0))
+static __always_inline struct lowcore *get_lowcore(void)
+{
+ struct lowcore *lc;
+
+ if (__is_defined(__DECOMPRESSOR))
+ return NULL;
+ asm(ALTERNATIVE("llilh %[lc],0", "llilh %[lc],%[alt]", ALT_LOWCORE)
+ : [lc] "=d" (lc)
+ : [alt] "i" (LOWCORE_ALT_ADDRESS >> 16));
+ return lc;
+}
extern struct lowcore *lowcore_ptr[];
@@ -222,4 +236,19 @@ static inline void set_prefix(__u32 address)
asm volatile("spx %0" : : "Q" (address) : "memory");
}
+#else /* __ASSEMBLY__ */
+
+.macro GET_LC reg
+ ALTERNATIVE "llilh \reg,0", \
+ __stringify(llilh \reg, LOWCORE_ALT_ADDRESS >> 16), \
+ ALT_LOWCORE
+.endm
+
+.macro STMG_LC start, end, savearea
+ ALTERNATIVE "stmg \start, \end, \savearea", \
+ __stringify(stmg \start, \end, LOWCORE_ALT_ADDRESS + \savearea), \
+ ALT_LOWCORE
+.endm
+
+#endif /* __ASSEMBLY__ */
#endif /* _ASM_S390_LOWCORE_H */
diff --git a/arch/s390/include/asm/mmu_context.h b/arch/s390/include/asm/mmu_context.h
index a7789a9f6218..d56eb0a1f37b 100644
--- a/arch/s390/include/asm/mmu_context.h
+++ b/arch/s390/include/asm/mmu_context.h
@@ -76,9 +76,9 @@ static inline void switch_mm_irqs_off(struct mm_struct *prev, struct mm_struct *
int cpu = smp_processor_id();
if (next == &init_mm)
- S390_lowcore.user_asce = s390_invalid_asce;
+ get_lowcore()->user_asce = s390_invalid_asce;
else
- S390_lowcore.user_asce.val = next->context.asce;
+ get_lowcore()->user_asce.val = next->context.asce;
cpumask_set_cpu(cpu, &next->context.cpu_attach_mask);
/* Clear previous user-ASCE from CR7 */
local_ctl_load(7, &s390_invalid_asce);
@@ -111,7 +111,7 @@ static inline void finish_arch_post_lock_switch(void)
__tlb_flush_mm_lazy(mm);
preempt_enable();
}
- local_ctl_load(7, &S390_lowcore.user_asce);
+ local_ctl_load(7, &get_lowcore()->user_asce);
}
#define activate_mm activate_mm
@@ -120,7 +120,7 @@ static inline void activate_mm(struct mm_struct *prev,
{
switch_mm(prev, next, current);
cpumask_set_cpu(smp_processor_id(), mm_cpumask(next));
- local_ctl_load(7, &S390_lowcore.user_asce);
+ local_ctl_load(7, &get_lowcore()->user_asce);
}
#include <asm-generic/mmu_context.h>
diff --git a/arch/s390/include/asm/nospec-branch.h b/arch/s390/include/asm/nospec-branch.h
index b9c1f3cae842..192835a3e24d 100644
--- a/arch/s390/include/asm/nospec-branch.h
+++ b/arch/s390/include/asm/nospec-branch.h
@@ -5,8 +5,17 @@
#ifndef __ASSEMBLY__
#include <linux/types.h>
+#include <asm/facility.h>
extern int nospec_disable;
+extern int nobp;
+
+static inline bool nobp_enabled(void)
+{
+ if (__is_defined(__DECOMPRESSOR))
+ return false;
+ return nobp && test_facility(82);
+}
void nospec_init_branches(void);
void nospec_auto_detect(void);
diff --git a/arch/s390/include/asm/page.h b/arch/s390/include/asm/page.h
index 224ff9d433ea..16e4caa931f1 100644
--- a/arch/s390/include/asm/page.h
+++ b/arch/s390/include/asm/page.h
@@ -162,6 +162,7 @@ static inline int page_reset_referenced(unsigned long addr)
#define _PAGE_ACC_BITS 0xf0 /* HW access control bits */
struct page;
+struct folio;
void arch_free_page(struct page *page, int order);
void arch_alloc_page(struct page *page, int order);
@@ -173,10 +174,10 @@ static inline int devmem_is_allowed(unsigned long pfn)
#define HAVE_ARCH_FREE_PAGE
#define HAVE_ARCH_ALLOC_PAGE
-#if IS_ENABLED(CONFIG_PGSTE)
+int arch_make_folio_accessible(struct folio *folio);
+#define HAVE_ARCH_MAKE_FOLIO_ACCESSIBLE
int arch_make_page_accessible(struct page *page);
#define HAVE_ARCH_MAKE_PAGE_ACCESSIBLE
-#endif
struct vm_layout {
unsigned long kaslr_offset;
@@ -247,7 +248,9 @@ static inline unsigned long __phys_addr(unsigned long x, bool is_31bit)
#define pfn_to_phys(pfn) ((pfn) << PAGE_SHIFT)
#define phys_to_page(phys) pfn_to_page(phys_to_pfn(phys))
+#define phys_to_folio(phys) page_folio(phys_to_page(phys))
#define page_to_phys(page) pfn_to_phys(page_to_pfn(page))
+#define folio_to_phys(page) pfn_to_phys(folio_pfn(folio))
static inline void *pfn_to_virt(unsigned long pfn)
{
@@ -276,8 +279,9 @@ static inline unsigned long virt_to_pfn(const void *kaddr)
#define AMODE31_SIZE (3 * PAGE_SIZE)
#define KERNEL_IMAGE_SIZE (512 * 1024 * 1024)
-#define __START_KERNEL 0x100000
#define __NO_KASLR_START_KERNEL CONFIG_KERNEL_IMAGE_BASE
#define __NO_KASLR_END_KERNEL (__NO_KASLR_START_KERNEL + KERNEL_IMAGE_SIZE)
+#define TEXT_OFFSET 0x100000
+
#endif /* _S390_PAGE_H */
diff --git a/arch/s390/include/asm/pai.h b/arch/s390/include/asm/pai.h
index 3f609565734b..25f2077ba3c9 100644
--- a/arch/s390/include/asm/pai.h
+++ b/arch/s390/include/asm/pai.h
@@ -55,11 +55,11 @@ static __always_inline void pai_kernel_enter(struct pt_regs *regs)
return;
if (!static_branch_unlikely(&pai_key))
return;
- if (!S390_lowcore.ccd)
+ if (!get_lowcore()->ccd)
return;
if (!user_mode(regs))
return;
- WRITE_ONCE(S390_lowcore.ccd, S390_lowcore.ccd | PAI_CRYPTO_KERNEL_OFFSET);
+ WRITE_ONCE(get_lowcore()->ccd, get_lowcore()->ccd | PAI_CRYPTO_KERNEL_OFFSET);
}
static __always_inline void pai_kernel_exit(struct pt_regs *regs)
@@ -68,18 +68,15 @@ static __always_inline void pai_kernel_exit(struct pt_regs *regs)
return;
if (!static_branch_unlikely(&pai_key))
return;
- if (!S390_lowcore.ccd)
+ if (!get_lowcore()->ccd)
return;
if (!user_mode(regs))
return;
- WRITE_ONCE(S390_lowcore.ccd, S390_lowcore.ccd & ~PAI_CRYPTO_KERNEL_OFFSET);
+ WRITE_ONCE(get_lowcore()->ccd, get_lowcore()->ccd & ~PAI_CRYPTO_KERNEL_OFFSET);
}
-enum paievt_mode {
- PAI_MODE_NONE,
- PAI_MODE_SAMPLING,
- PAI_MODE_COUNTING,
-};
-
#define PAI_SAVE_AREA(x) ((x)->hw.event_base)
+#define PAI_CPU_MASK(x) ((x)->hw.addr_filters)
+#define PAI_SWLIST(x) (&(x)->hw.tp_list)
+
#endif
diff --git a/arch/s390/include/asm/percpu.h b/arch/s390/include/asm/percpu.h
index 264095dd84bc..89a28740b6ab 100644
--- a/arch/s390/include/asm/percpu.h
+++ b/arch/s390/include/asm/percpu.h
@@ -9,7 +9,7 @@
* s390 uses its own implementation for per cpu data, the offset of
* the cpu local data area is cached in the cpu's lowcore memory.
*/
-#define __my_cpu_offset S390_lowcore.percpu_offset
+#define __my_cpu_offset get_lowcore()->percpu_offset
/*
* For 64 bit module code, the module may be more than 4G above the
diff --git a/arch/s390/include/asm/pgtable.h b/arch/s390/include/asm/pgtable.h
index 70b6ee557eb2..3fa280d0672a 100644
--- a/arch/s390/include/asm/pgtable.h
+++ b/arch/s390/include/asm/pgtable.h
@@ -107,6 +107,18 @@ static inline int is_module_addr(void *addr)
return 1;
}
+#ifdef CONFIG_KMSAN
+#define KMSAN_VMALLOC_SIZE (VMALLOC_END - VMALLOC_START)
+#define KMSAN_VMALLOC_SHADOW_START VMALLOC_END
+#define KMSAN_VMALLOC_SHADOW_END (KMSAN_VMALLOC_SHADOW_START + KMSAN_VMALLOC_SIZE)
+#define KMSAN_VMALLOC_ORIGIN_START KMSAN_VMALLOC_SHADOW_END
+#define KMSAN_VMALLOC_ORIGIN_END (KMSAN_VMALLOC_ORIGIN_START + KMSAN_VMALLOC_SIZE)
+#define KMSAN_MODULES_SHADOW_START KMSAN_VMALLOC_ORIGIN_END
+#define KMSAN_MODULES_SHADOW_END (KMSAN_MODULES_SHADOW_START + MODULES_LEN)
+#define KMSAN_MODULES_ORIGIN_START KMSAN_MODULES_SHADOW_END
+#define KMSAN_MODULES_ORIGIN_END (KMSAN_MODULES_ORIGIN_START + MODULES_LEN)
+#endif
+
#ifdef CONFIG_RANDOMIZE_BASE
#define KASLR_LEN (1UL << 31)
#else
@@ -609,7 +621,15 @@ static inline void csp(unsigned int *ptr, unsigned int old, unsigned int new)
: "cc");
}
-static inline void cspg(unsigned long *ptr, unsigned long old, unsigned long new)
+/**
+ * cspg() - Compare and Swap and Purge (CSPG)
+ * @ptr: Pointer to the value to be exchanged
+ * @old: The expected old value
+ * @new: The new value
+ *
+ * Return: True if compare and swap was successful, otherwise false.
+ */
+static inline bool cspg(unsigned long *ptr, unsigned long old, unsigned long new)
{
union register_pair r1 = { .even = old, .odd = new, };
unsigned long address = (unsigned long)ptr | 1;
@@ -619,6 +639,7 @@ static inline void cspg(unsigned long *ptr, unsigned long old, unsigned long new
: [r1] "+&d" (r1.pair), "+m" (*ptr)
: [address] "d" (address)
: "cc");
+ return old == r1.even;
}
#define CRDTE_DTT_PAGE 0x00UL
@@ -627,7 +648,18 @@ static inline void cspg(unsigned long *ptr, unsigned long old, unsigned long new
#define CRDTE_DTT_REGION2 0x18UL
#define CRDTE_DTT_REGION1 0x1cUL
-static inline void crdte(unsigned long old, unsigned long new,
+/**
+ * crdte() - Compare and Replace DAT Table Entry
+ * @old: The expected old value
+ * @new: The new value
+ * @table: Pointer to the value to be exchanged
+ * @dtt: Table type of the table to be exchanged
+ * @address: The address mapped by the entry to be replaced
+ * @asce: The ASCE of this entry
+ *
+ * Return: True if compare and replace was successful, otherwise false.
+ */
+static inline bool crdte(unsigned long old, unsigned long new,
unsigned long *table, unsigned long dtt,
unsigned long address, unsigned long asce)
{
@@ -638,6 +670,7 @@ static inline void crdte(unsigned long old, unsigned long new,
: [r1] "+&d" (r1.pair)
: [r2] "d" (r2.pair), [asce] "a" (asce)
: "memory", "cc");
+ return old == r1.even;
}
/*
@@ -1167,7 +1200,7 @@ static inline pte_t ptep_get_and_clear(struct mm_struct *mm,
res = ptep_xchg_lazy(mm, addr, ptep, __pte(_PAGE_INVALID));
/* At this point the reference through the mapping is still present */
if (mm_is_protected(mm) && pte_present(res))
- uv_convert_owned_from_secure(pte_val(res) & PAGE_MASK);
+ uv_convert_from_secure_pte(res);
return res;
}
@@ -1185,7 +1218,7 @@ static inline pte_t ptep_clear_flush(struct vm_area_struct *vma,
res = ptep_xchg_direct(vma->vm_mm, addr, ptep, __pte(_PAGE_INVALID));
/* At this point the reference through the mapping is still present */
if (mm_is_protected(vma->vm_mm) && pte_present(res))
- uv_convert_owned_from_secure(pte_val(res) & PAGE_MASK);
+ uv_convert_from_secure_pte(res);
return res;
}
@@ -1217,14 +1250,14 @@ static inline pte_t ptep_get_and_clear_full(struct mm_struct *mm,
* The notifier should have destroyed all protected vCPUs at this
* point, so the destroy should be successful.
*/
- if (full && !uv_destroy_owned_page(pte_val(res) & PAGE_MASK))
+ if (full && !uv_destroy_pte(res))
return res;
/*
* If something went wrong and the page could not be destroyed, or
* if this is not a mm teardown, the slower export is used as
* fallback instead.
*/
- uv_convert_owned_from_secure(pte_val(res) & PAGE_MASK);
+ uv_convert_from_secure_pte(res);
return res;
}
diff --git a/arch/s390/include/asm/preempt.h b/arch/s390/include/asm/preempt.h
index 0e3da500e98c..3ae5f31c665d 100644
--- a/arch/s390/include/asm/preempt.h
+++ b/arch/s390/include/asm/preempt.h
@@ -14,7 +14,7 @@
static __always_inline int preempt_count(void)
{
- return READ_ONCE(S390_lowcore.preempt_count) & ~PREEMPT_NEED_RESCHED;
+ return READ_ONCE(get_lowcore()->preempt_count) & ~PREEMPT_NEED_RESCHED;
}
static __always_inline void preempt_count_set(int pc)
@@ -22,26 +22,26 @@ static __always_inline void preempt_count_set(int pc)
int old, new;
do {
- old = READ_ONCE(S390_lowcore.preempt_count);
+ old = READ_ONCE(get_lowcore()->preempt_count);
new = (old & PREEMPT_NEED_RESCHED) |
(pc & ~PREEMPT_NEED_RESCHED);
- } while (__atomic_cmpxchg(&S390_lowcore.preempt_count,
+ } while (__atomic_cmpxchg(&get_lowcore()->preempt_count,
old, new) != old);
}
static __always_inline void set_preempt_need_resched(void)
{
- __atomic_and(~PREEMPT_NEED_RESCHED, &S390_lowcore.preempt_count);
+ __atomic_and(~PREEMPT_NEED_RESCHED, &get_lowcore()->preempt_count);
}
static __always_inline void clear_preempt_need_resched(void)
{
- __atomic_or(PREEMPT_NEED_RESCHED, &S390_lowcore.preempt_count);
+ __atomic_or(PREEMPT_NEED_RESCHED, &get_lowcore()->preempt_count);
}
static __always_inline bool test_preempt_need_resched(void)
{
- return !(READ_ONCE(S390_lowcore.preempt_count) & PREEMPT_NEED_RESCHED);
+ return !(READ_ONCE(get_lowcore()->preempt_count) & PREEMPT_NEED_RESCHED);
}
static __always_inline void __preempt_count_add(int val)
@@ -52,11 +52,11 @@ static __always_inline void __preempt_count_add(int val)
*/
if (!IS_ENABLED(CONFIG_PROFILE_ALL_BRANCHES)) {
if (__builtin_constant_p(val) && (val >= -128) && (val <= 127)) {
- __atomic_add_const(val, &S390_lowcore.preempt_count);
+ __atomic_add_const(val, &get_lowcore()->preempt_count);
return;
}
}
- __atomic_add(val, &S390_lowcore.preempt_count);
+ __atomic_add(val, &get_lowcore()->preempt_count);
}
static __always_inline void __preempt_count_sub(int val)
@@ -66,12 +66,12 @@ static __always_inline void __preempt_count_sub(int val)
static __always_inline bool __preempt_count_dec_and_test(void)
{
- return __atomic_add(-1, &S390_lowcore.preempt_count) == 1;
+ return __atomic_add(-1, &get_lowcore()->preempt_count) == 1;
}
static __always_inline bool should_resched(int preempt_offset)
{
- return unlikely(READ_ONCE(S390_lowcore.preempt_count) ==
+ return unlikely(READ_ONCE(get_lowcore()->preempt_count) ==
preempt_offset);
}
@@ -81,12 +81,12 @@ static __always_inline bool should_resched(int preempt_offset)
static __always_inline int preempt_count(void)
{
- return READ_ONCE(S390_lowcore.preempt_count);
+ return READ_ONCE(get_lowcore()->preempt_count);
}
static __always_inline void preempt_count_set(int pc)
{
- S390_lowcore.preempt_count = pc;
+ get_lowcore()->preempt_count = pc;
}
static __always_inline void set_preempt_need_resched(void)
@@ -104,17 +104,17 @@ static __always_inline bool test_preempt_need_resched(void)
static __always_inline void __preempt_count_add(int val)
{
- S390_lowcore.preempt_count += val;
+ get_lowcore()->preempt_count += val;
}
static __always_inline void __preempt_count_sub(int val)
{
- S390_lowcore.preempt_count -= val;
+ get_lowcore()->preempt_count -= val;
}
static __always_inline bool __preempt_count_dec_and_test(void)
{
- return !--S390_lowcore.preempt_count && tif_need_resched();
+ return !--get_lowcore()->preempt_count && tif_need_resched();
}
static __always_inline bool should_resched(int preempt_offset)
diff --git a/arch/s390/include/asm/processor.h b/arch/s390/include/asm/processor.h
index 07ad5a1df878..5ecd442535b9 100644
--- a/arch/s390/include/asm/processor.h
+++ b/arch/s390/include/asm/processor.h
@@ -14,13 +14,11 @@
#include <linux/bits.h>
-#define CIF_SIE 0 /* CPU needs SIE exit cleanup */
#define CIF_NOHZ_DELAY 2 /* delay HZ disable for a tick */
#define CIF_ENABLED_WAIT 5 /* in enabled wait state */
#define CIF_MCCK_GUEST 6 /* machine check happening in guest */
#define CIF_DEDICATED_CPU 7 /* this CPU is dedicated */
-#define _CIF_SIE BIT(CIF_SIE)
#define _CIF_NOHZ_DELAY BIT(CIF_NOHZ_DELAY)
#define _CIF_ENABLED_WAIT BIT(CIF_ENABLED_WAIT)
#define _CIF_MCCK_GUEST BIT(CIF_MCCK_GUEST)
@@ -42,21 +40,37 @@
#include <asm/irqflags.h>
#include <asm/alternative.h>
+struct pcpu {
+ unsigned long ec_mask; /* bit mask for ec_xxx functions */
+ unsigned long ec_clk; /* sigp timestamp for ec_xxx */
+ unsigned long flags; /* per CPU flags */
+ signed char state; /* physical cpu state */
+ signed char polarization; /* physical polarization */
+ u16 address; /* physical cpu address */
+};
+
+DECLARE_PER_CPU(struct pcpu, pcpu_devices);
+
typedef long (*sys_call_ptr_t)(struct pt_regs *regs);
+static __always_inline struct pcpu *this_pcpu(void)
+{
+ return (struct pcpu *)(get_lowcore()->pcpu);
+}
+
static __always_inline void set_cpu_flag(int flag)
{
- S390_lowcore.cpu_flags |= (1UL << flag);
+ this_pcpu()->flags |= (1UL << flag);
}
static __always_inline void clear_cpu_flag(int flag)
{
- S390_lowcore.cpu_flags &= ~(1UL << flag);
+ this_pcpu()->flags &= ~(1UL << flag);
}
static __always_inline bool test_cpu_flag(int flag)
{
- return S390_lowcore.cpu_flags & (1UL << flag);
+ return this_pcpu()->flags & (1UL << flag);
}
static __always_inline bool test_and_set_cpu_flag(int flag)
@@ -81,9 +95,7 @@ static __always_inline bool test_and_clear_cpu_flag(int flag)
*/
static __always_inline bool test_cpu_flag_of(int flag, int cpu)
{
- struct lowcore *lc = lowcore_ptr[cpu];
-
- return lc->cpu_flags & (1UL << flag);
+ return per_cpu(pcpu_devices, cpu).flags & (1UL << flag);
}
#define arch_needs_cpu() test_cpu_flag(CIF_NOHZ_DELAY)
@@ -269,7 +281,7 @@ static __always_inline unsigned long __current_stack_pointer(void)
static __always_inline bool on_thread_stack(void)
{
- unsigned long ksp = S390_lowcore.kernel_stack;
+ unsigned long ksp = get_lowcore()->kernel_stack;
return !((ksp ^ current_stack_pointer) & ~(THREAD_SIZE - 1));
}
@@ -405,7 +417,7 @@ static __always_inline bool regs_irqs_disabled(struct pt_regs *regs)
static __always_inline void bpon(void)
{
- asm volatile(ALTERNATIVE("nop", ".insn rrf,0xb2e80000,0,0,13,0", 82));
+ asm volatile(ALTERNATIVE("nop", ".insn rrf,0xb2e80000,0,0,13,0", ALT_SPEC(82)));
}
#endif /* __ASSEMBLY__ */
diff --git a/arch/s390/include/asm/runtime-const.h b/arch/s390/include/asm/runtime-const.h
new file mode 100644
index 000000000000..17878b1d048c
--- /dev/null
+++ b/arch/s390/include/asm/runtime-const.h
@@ -0,0 +1,77 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef _ASM_S390_RUNTIME_CONST_H
+#define _ASM_S390_RUNTIME_CONST_H
+
+#include <linux/uaccess.h>
+
+#define runtime_const_ptr(sym) \
+({ \
+ typeof(sym) __ret; \
+ \
+ asm_inline( \
+ "0: iihf %[__ret],%[c1]\n" \
+ " iilf %[__ret],%[c2]\n" \
+ ".pushsection runtime_ptr_" #sym ",\"a\"\n" \
+ ".long 0b - .\n" \
+ ".popsection" \
+ : [__ret] "=d" (__ret) \
+ : [c1] "i" (0x01234567UL), \
+ [c2] "i" (0x89abcdefUL)); \
+ __ret; \
+})
+
+#define runtime_const_shift_right_32(val, sym) \
+({ \
+ unsigned int __ret = (val); \
+ \
+ asm_inline( \
+ "0: srl %[__ret],12\n" \
+ ".pushsection runtime_shift_" #sym ",\"a\"\n" \
+ ".long 0b - .\n" \
+ ".popsection" \
+ : [__ret] "+d" (__ret)); \
+ __ret; \
+})
+
+#define runtime_const_init(type, sym) do { \
+ extern s32 __start_runtime_##type##_##sym[]; \
+ extern s32 __stop_runtime_##type##_##sym[]; \
+ \
+ runtime_const_fixup(__runtime_fixup_##type, \
+ (unsigned long)(sym), \
+ __start_runtime_##type##_##sym, \
+ __stop_runtime_##type##_##sym); \
+} while (0)
+
+/* 32-bit immediate for iihf and iilf in bits in I2 field */
+static inline void __runtime_fixup_32(u32 *p, unsigned int val)
+{
+ s390_kernel_write(p, &val, sizeof(val));
+}
+
+static inline void __runtime_fixup_ptr(void *where, unsigned long val)
+{
+ __runtime_fixup_32(where + 2, val >> 32);
+ __runtime_fixup_32(where + 8, val);
+}
+
+/* Immediate value is lower 12 bits of D2 field of srl */
+static inline void __runtime_fixup_shift(void *where, unsigned long val)
+{
+ u32 insn = *(u32 *)where;
+
+ insn &= 0xfffff000;
+ insn |= (val & 63);
+ s390_kernel_write(where, &insn, sizeof(insn));
+}
+
+static inline void runtime_const_fixup(void (*fn)(void *, unsigned long),
+ unsigned long val, s32 *start, s32 *end)
+{
+ while (start < end) {
+ fn(*start + (void *)start, val);
+ start++;
+ }
+}
+
+#endif /* _ASM_S390_RUNTIME_CONST_H */
diff --git a/arch/s390/include/asm/sclp.h b/arch/s390/include/asm/sclp.h
index 5742d23bba13..da3dad18fe50 100644
--- a/arch/s390/include/asm/sclp.h
+++ b/arch/s390/include/asm/sclp.h
@@ -84,6 +84,7 @@ struct sclp_info {
unsigned char has_ibs : 1;
unsigned char has_skey : 1;
unsigned char has_kss : 1;
+ unsigned char has_diag204_bif : 1;
unsigned char has_gisaf : 1;
unsigned char has_diag318 : 1;
unsigned char has_diag320 : 1;
diff --git a/arch/s390/include/asm/setup.h b/arch/s390/include/asm/setup.h
index 32f70873e2b7..8505737712ee 100644
--- a/arch/s390/include/asm/setup.h
+++ b/arch/s390/include/asm/setup.h
@@ -77,24 +77,24 @@ extern unsigned long max_mappable;
/* The Write Back bit position in the physaddr is given by the SLPC PCI */
extern unsigned long mio_wb_bit_mask;
-#define MACHINE_IS_VM (S390_lowcore.machine_flags & MACHINE_FLAG_VM)
-#define MACHINE_IS_KVM (S390_lowcore.machine_flags & MACHINE_FLAG_KVM)
-#define MACHINE_IS_LPAR (S390_lowcore.machine_flags & MACHINE_FLAG_LPAR)
-
-#define MACHINE_HAS_DIAG9C (S390_lowcore.machine_flags & MACHINE_FLAG_DIAG9C)
-#define MACHINE_HAS_ESOP (S390_lowcore.machine_flags & MACHINE_FLAG_ESOP)
-#define MACHINE_HAS_IDTE (S390_lowcore.machine_flags & MACHINE_FLAG_IDTE)
-#define MACHINE_HAS_EDAT1 (S390_lowcore.machine_flags & MACHINE_FLAG_EDAT1)
-#define MACHINE_HAS_EDAT2 (S390_lowcore.machine_flags & MACHINE_FLAG_EDAT2)
-#define MACHINE_HAS_TOPOLOGY (S390_lowcore.machine_flags & MACHINE_FLAG_TOPOLOGY)
-#define MACHINE_HAS_TE (S390_lowcore.machine_flags & MACHINE_FLAG_TE)
-#define MACHINE_HAS_TLB_LC (S390_lowcore.machine_flags & MACHINE_FLAG_TLB_LC)
-#define MACHINE_HAS_TLB_GUEST (S390_lowcore.machine_flags & MACHINE_FLAG_TLB_GUEST)
-#define MACHINE_HAS_NX (S390_lowcore.machine_flags & MACHINE_FLAG_NX)
-#define MACHINE_HAS_GS (S390_lowcore.machine_flags & MACHINE_FLAG_GS)
-#define MACHINE_HAS_SCC (S390_lowcore.machine_flags & MACHINE_FLAG_SCC)
-#define MACHINE_HAS_PCI_MIO (S390_lowcore.machine_flags & MACHINE_FLAG_PCI_MIO)
-#define MACHINE_HAS_RDP (S390_lowcore.machine_flags & MACHINE_FLAG_RDP)
+#define MACHINE_IS_VM (get_lowcore()->machine_flags & MACHINE_FLAG_VM)
+#define MACHINE_IS_KVM (get_lowcore()->machine_flags & MACHINE_FLAG_KVM)
+#define MACHINE_IS_LPAR (get_lowcore()->machine_flags & MACHINE_FLAG_LPAR)
+
+#define MACHINE_HAS_DIAG9C (get_lowcore()->machine_flags & MACHINE_FLAG_DIAG9C)
+#define MACHINE_HAS_ESOP (get_lowcore()->machine_flags & MACHINE_FLAG_ESOP)
+#define MACHINE_HAS_IDTE (get_lowcore()->machine_flags & MACHINE_FLAG_IDTE)
+#define MACHINE_HAS_EDAT1 (get_lowcore()->machine_flags & MACHINE_FLAG_EDAT1)
+#define MACHINE_HAS_EDAT2 (get_lowcore()->machine_flags & MACHINE_FLAG_EDAT2)
+#define MACHINE_HAS_TOPOLOGY (get_lowcore()->machine_flags & MACHINE_FLAG_TOPOLOGY)
+#define MACHINE_HAS_TE (get_lowcore()->machine_flags & MACHINE_FLAG_TE)
+#define MACHINE_HAS_TLB_LC (get_lowcore()->machine_flags & MACHINE_FLAG_TLB_LC)
+#define MACHINE_HAS_TLB_GUEST (get_lowcore()->machine_flags & MACHINE_FLAG_TLB_GUEST)
+#define MACHINE_HAS_NX (get_lowcore()->machine_flags & MACHINE_FLAG_NX)
+#define MACHINE_HAS_GS (get_lowcore()->machine_flags & MACHINE_FLAG_GS)
+#define MACHINE_HAS_SCC (get_lowcore()->machine_flags & MACHINE_FLAG_SCC)
+#define MACHINE_HAS_PCI_MIO (get_lowcore()->machine_flags & MACHINE_FLAG_PCI_MIO)
+#define MACHINE_HAS_RDP (get_lowcore()->machine_flags & MACHINE_FLAG_RDP)
/*
* Console mode. Override with conmode=
diff --git a/arch/s390/include/asm/smp.h b/arch/s390/include/asm/smp.h
index 6e5b1b4b19a9..cd835f4fb11a 100644
--- a/arch/s390/include/asm/smp.h
+++ b/arch/s390/include/asm/smp.h
@@ -11,7 +11,7 @@
#include <asm/lowcore.h>
#include <asm/processor.h>
-#define raw_smp_processor_id() (S390_lowcore.cpu_nr)
+#define raw_smp_processor_id() (get_lowcore()->cpu_nr)
extern struct mutex smp_cpu_state_mutex;
extern unsigned int smp_cpu_mt_shift;
@@ -24,7 +24,6 @@ extern int __cpu_up(unsigned int cpu, struct task_struct *tidle);
extern void arch_send_call_function_single_ipi(int cpu);
extern void arch_send_call_function_ipi_mask(const struct cpumask *mask);
-extern void smp_call_online_cpu(void (*func)(void *), void *);
extern void smp_call_ipl_cpu(void (*func)(void *), void *);
extern void smp_emergency_stop(void);
@@ -59,7 +58,7 @@ static inline void smp_cpus_done(unsigned int max_cpus)
{
}
-extern int smp_rescan_cpus(void);
+extern int smp_rescan_cpus(bool early);
extern void __noreturn cpu_die(void);
extern void __cpu_die(unsigned int cpu);
extern int __cpu_disable(void);
diff --git a/arch/s390/include/asm/softirq_stack.h b/arch/s390/include/asm/softirq_stack.h
index 1ac5115d3115..42d61296bbad 100644
--- a/arch/s390/include/asm/softirq_stack.h
+++ b/arch/s390/include/asm/softirq_stack.h
@@ -8,7 +8,7 @@
#ifdef CONFIG_SOFTIRQ_ON_OWN_STACK
static inline void do_softirq_own_stack(void)
{
- call_on_stack(0, S390_lowcore.async_stack, void, __do_softirq);
+ call_on_stack(0, get_lowcore()->async_stack, void, __do_softirq);
}
#endif
#endif /* __ASM_S390_SOFTIRQ_STACK_H */
diff --git a/arch/s390/include/asm/spinlock.h b/arch/s390/include/asm/spinlock.h
index 37127cd7749e..77d5e804af93 100644
--- a/arch/s390/include/asm/spinlock.h
+++ b/arch/s390/include/asm/spinlock.h
@@ -16,7 +16,7 @@
#include <asm/processor.h>
#include <asm/alternative.h>
-#define SPINLOCK_LOCKVAL (S390_lowcore.spinlock_lockval)
+#define SPINLOCK_LOCKVAL (get_lowcore()->spinlock_lockval)
extern int spin_retry;
@@ -79,7 +79,7 @@ static inline void arch_spin_unlock(arch_spinlock_t *lp)
typecheck(int, lp->lock);
kcsan_release();
asm_inline volatile(
- ALTERNATIVE("nop", ".insn rre,0xb2fa0000,7,0", 49) /* NIAI 7 */
+ ALTERNATIVE("nop", ".insn rre,0xb2fa0000,7,0", ALT_FACILITY(49)) /* NIAI 7 */
" sth %1,%0\n"
: "=R" (((unsigned short *) &lp->lock)[1])
: "d" (0) : "cc", "memory");
diff --git a/arch/s390/include/asm/stacktrace.h b/arch/s390/include/asm/stacktrace.h
index 85b6738b826a..1d5ca13dc90f 100644
--- a/arch/s390/include/asm/stacktrace.h
+++ b/arch/s390/include/asm/stacktrace.h
@@ -65,6 +65,7 @@ struct stack_frame {
unsigned long sie_reason;
unsigned long sie_flags;
unsigned long sie_control_block_phys;
+ unsigned long sie_guest_asce;
};
};
unsigned long gprs[10];
diff --git a/arch/s390/include/asm/string.h b/arch/s390/include/asm/string.h
index 351685de53d2..2ab868cbae6c 100644
--- a/arch/s390/include/asm/string.h
+++ b/arch/s390/include/asm/string.h
@@ -15,15 +15,12 @@
#define __HAVE_ARCH_MEMCPY /* gcc builtin & arch function */
#define __HAVE_ARCH_MEMMOVE /* gcc builtin & arch function */
#define __HAVE_ARCH_MEMSET /* gcc builtin & arch function */
-#define __HAVE_ARCH_MEMSET16 /* arch function */
-#define __HAVE_ARCH_MEMSET32 /* arch function */
-#define __HAVE_ARCH_MEMSET64 /* arch function */
void *memcpy(void *dest, const void *src, size_t n);
void *memset(void *s, int c, size_t n);
void *memmove(void *dest, const void *src, size_t n);
-#ifndef CONFIG_KASAN
+#if !defined(CONFIG_KASAN) && !defined(CONFIG_KMSAN)
#define __HAVE_ARCH_MEMCHR /* inline & arch function */
#define __HAVE_ARCH_MEMCMP /* arch function */
#define __HAVE_ARCH_MEMSCAN /* inline & arch function */
@@ -36,6 +33,9 @@ void *memmove(void *dest, const void *src, size_t n);
#define __HAVE_ARCH_STRNCPY /* arch function */
#define __HAVE_ARCH_STRNLEN /* inline & arch function */
#define __HAVE_ARCH_STRSTR /* arch function */
+#define __HAVE_ARCH_MEMSET16 /* arch function */
+#define __HAVE_ARCH_MEMSET32 /* arch function */
+#define __HAVE_ARCH_MEMSET64 /* arch function */
/* Prototypes for non-inlined arch strings functions. */
int memcmp(const void *s1, const void *s2, size_t n);
@@ -44,7 +44,7 @@ size_t strlcat(char *dest, const char *src, size_t n);
char *strncat(char *dest, const char *src, size_t n);
char *strncpy(char *dest, const char *src, size_t n);
char *strstr(const char *s1, const char *s2);
-#endif /* !CONFIG_KASAN */
+#endif /* !defined(CONFIG_KASAN) && !defined(CONFIG_KMSAN) */
#undef __HAVE_ARCH_STRCHR
#undef __HAVE_ARCH_STRNCHR
@@ -74,20 +74,30 @@ void *__memset16(uint16_t *s, uint16_t v, size_t count);
void *__memset32(uint32_t *s, uint32_t v, size_t count);
void *__memset64(uint64_t *s, uint64_t v, size_t count);
+#ifdef __HAVE_ARCH_MEMSET16
static inline void *memset16(uint16_t *s, uint16_t v, size_t count)
{
return __memset16(s, v, count * sizeof(v));
}
+#endif
+#ifdef __HAVE_ARCH_MEMSET32
static inline void *memset32(uint32_t *s, uint32_t v, size_t count)
{
return __memset32(s, v, count * sizeof(v));
}
+#endif
+#ifdef __HAVE_ARCH_MEMSET64
+#ifdef IN_BOOT_STRING_C
+void *memset64(uint64_t *s, uint64_t v, size_t count);
+#else
static inline void *memset64(uint64_t *s, uint64_t v, size_t count)
{
return __memset64(s, v, count * sizeof(v));
}
+#endif
+#endif
#if !defined(IN_ARCH_STRING_C) && (!defined(CONFIG_FORTIFY_SOURCE) || defined(__NO_FORTIFY))
diff --git a/arch/s390/include/asm/thread_info.h b/arch/s390/include/asm/thread_info.h
index a674c7d25da5..00ac01874a12 100644
--- a/arch/s390/include/asm/thread_info.h
+++ b/arch/s390/include/asm/thread_info.h
@@ -16,7 +16,7 @@
/*
* General size of kernel stacks
*/
-#ifdef CONFIG_KASAN
+#if defined(CONFIG_KASAN) || defined(CONFIG_KMSAN)
#define THREAD_SIZE_ORDER 4
#else
#define THREAD_SIZE_ORDER 2
@@ -40,6 +40,7 @@ struct thread_info {
unsigned long flags; /* low level flags */
unsigned long syscall_work; /* SYSCALL_WORK_ flags */
unsigned int cpu; /* current CPU */
+ unsigned char sie; /* running in SIE context */
};
/*
diff --git a/arch/s390/include/asm/timex.h b/arch/s390/include/asm/timex.h
index 4d646659a5f5..640901f2fbc3 100644
--- a/arch/s390/include/asm/timex.h
+++ b/arch/s390/include/asm/timex.h
@@ -161,16 +161,16 @@ static inline unsigned long local_tick_disable(void)
{
unsigned long old;
- old = S390_lowcore.clock_comparator;
- S390_lowcore.clock_comparator = clock_comparator_max;
- set_clock_comparator(S390_lowcore.clock_comparator);
+ old = get_lowcore()->clock_comparator;
+ get_lowcore()->clock_comparator = clock_comparator_max;
+ set_clock_comparator(get_lowcore()->clock_comparator);
return old;
}
static inline void local_tick_enable(unsigned long comp)
{
- S390_lowcore.clock_comparator = comp;
- set_clock_comparator(S390_lowcore.clock_comparator);
+ get_lowcore()->clock_comparator = comp;
+ set_clock_comparator(get_lowcore()->clock_comparator);
}
#define CLOCK_TICK_RATE 1193180 /* Underlying HZ */
diff --git a/arch/s390/include/asm/uaccess.h b/arch/s390/include/asm/uaccess.h
index 81ae8a98e7ec..a81f897a81ce 100644
--- a/arch/s390/include/asm/uaccess.h
+++ b/arch/s390/include/asm/uaccess.h
@@ -18,6 +18,7 @@
#include <asm/extable.h>
#include <asm/facility.h>
#include <asm-generic/access_ok.h>
+#include <linux/instrumented.h>
void debug_user_asce(int exit);
@@ -78,13 +79,24 @@ union oac {
int __noreturn __put_user_bad(void);
-#define __put_user_asm(to, from, size) \
-({ \
+#ifdef CONFIG_KMSAN
+#define get_put_user_noinstr_attributes \
+ noinline __maybe_unused __no_sanitize_memory
+#else
+#define get_put_user_noinstr_attributes __always_inline
+#endif
+
+#define DEFINE_PUT_USER(type) \
+static get_put_user_noinstr_attributes int \
+__put_user_##type##_noinstr(unsigned type __user *to, \
+ unsigned type *from, \
+ unsigned long size) \
+{ \
union oac __oac_spec = { \
.oac1.as = PSW_BITS_AS_SECONDARY, \
.oac1.a = 1, \
}; \
- int __rc; \
+ int rc; \
\
asm volatile( \
" lr 0,%[spec]\n" \
@@ -93,12 +105,28 @@ int __noreturn __put_user_bad(void);
"2:\n" \
EX_TABLE_UA_STORE(0b, 2b, %[rc]) \
EX_TABLE_UA_STORE(1b, 2b, %[rc]) \
- : [rc] "=&d" (__rc), [_to] "+Q" (*(to)) \
+ : [rc] "=&d" (rc), [_to] "+Q" (*(to)) \
: [_size] "d" (size), [_from] "Q" (*(from)), \
[spec] "d" (__oac_spec.val) \
: "cc", "0"); \
- __rc; \
-})
+ return rc; \
+} \
+ \
+static __always_inline int \
+__put_user_##type(unsigned type __user *to, unsigned type *from, \
+ unsigned long size) \
+{ \
+ int rc; \
+ \
+ rc = __put_user_##type##_noinstr(to, from, size); \
+ instrument_put_user(*from, to, size); \
+ return rc; \
+}
+
+DEFINE_PUT_USER(char);
+DEFINE_PUT_USER(short);
+DEFINE_PUT_USER(int);
+DEFINE_PUT_USER(long);
static __always_inline int __put_user_fn(void *x, void __user *ptr, unsigned long size)
{
@@ -106,24 +134,24 @@ static __always_inline int __put_user_fn(void *x, void __user *ptr, unsigned lon
switch (size) {
case 1:
- rc = __put_user_asm((unsigned char __user *)ptr,
- (unsigned char *)x,
- size);
+ rc = __put_user_char((unsigned char __user *)ptr,
+ (unsigned char *)x,
+ size);
break;
case 2:
- rc = __put_user_asm((unsigned short __user *)ptr,
- (unsigned short *)x,
- size);
+ rc = __put_user_short((unsigned short __user *)ptr,
+ (unsigned short *)x,
+ size);
break;
case 4:
- rc = __put_user_asm((unsigned int __user *)ptr,
+ rc = __put_user_int((unsigned int __user *)ptr,
(unsigned int *)x,
size);
break;
case 8:
- rc = __put_user_asm((unsigned long __user *)ptr,
- (unsigned long *)x,
- size);
+ rc = __put_user_long((unsigned long __user *)ptr,
+ (unsigned long *)x,
+ size);
break;
default:
__put_user_bad();
@@ -134,13 +162,17 @@ static __always_inline int __put_user_fn(void *x, void __user *ptr, unsigned lon
int __noreturn __get_user_bad(void);
-#define __get_user_asm(to, from, size) \
-({ \
+#define DEFINE_GET_USER(type) \
+static get_put_user_noinstr_attributes int \
+__get_user_##type##_noinstr(unsigned type *to, \
+ unsigned type __user *from, \
+ unsigned long size) \
+{ \
union oac __oac_spec = { \
.oac2.as = PSW_BITS_AS_SECONDARY, \
.oac2.a = 1, \
}; \
- int __rc; \
+ int rc; \
\
asm volatile( \
" lr 0,%[spec]\n" \
@@ -149,13 +181,29 @@ int __noreturn __get_user_bad(void);
"2:\n" \
EX_TABLE_UA_LOAD_MEM(0b, 2b, %[rc], %[_to], %[_ksize]) \
EX_TABLE_UA_LOAD_MEM(1b, 2b, %[rc], %[_to], %[_ksize]) \
- : [rc] "=&d" (__rc), "=Q" (*(to)) \
+ : [rc] "=&d" (rc), "=Q" (*(to)) \
: [_size] "d" (size), [_from] "Q" (*(from)), \
[spec] "d" (__oac_spec.val), [_to] "a" (to), \
[_ksize] "K" (size) \
: "cc", "0"); \
- __rc; \
-})
+ return rc; \
+} \
+ \
+static __always_inline int \
+__get_user_##type(unsigned type *to, unsigned type __user *from, \
+ unsigned long size) \
+{ \
+ int rc; \
+ \
+ rc = __get_user_##type##_noinstr(to, from, size); \
+ instrument_get_user(*to); \
+ return rc; \
+}
+
+DEFINE_GET_USER(char);
+DEFINE_GET_USER(short);
+DEFINE_GET_USER(int);
+DEFINE_GET_USER(long);
static __always_inline int __get_user_fn(void *x, const void __user *ptr, unsigned long size)
{
@@ -163,24 +211,24 @@ static __always_inline int __get_user_fn(void *x, const void __user *ptr, unsign
switch (size) {
case 1:
- rc = __get_user_asm((unsigned char *)x,
- (unsigned char __user *)ptr,
- size);
+ rc = __get_user_char((unsigned char *)x,
+ (unsigned char __user *)ptr,
+ size);
break;
case 2:
- rc = __get_user_asm((unsigned short *)x,
- (unsigned short __user *)ptr,
- size);
+ rc = __get_user_short((unsigned short *)x,
+ (unsigned short __user *)ptr,
+ size);
break;
case 4:
- rc = __get_user_asm((unsigned int *)x,
+ rc = __get_user_int((unsigned int *)x,
(unsigned int __user *)ptr,
size);
break;
case 8:
- rc = __get_user_asm((unsigned long *)x,
- (unsigned long __user *)ptr,
- size);
+ rc = __get_user_long((unsigned long *)x,
+ (unsigned long __user *)ptr,
+ size);
break;
default:
__get_user_bad();
@@ -284,7 +332,14 @@ static inline unsigned long __must_check clear_user(void __user *to, unsigned lo
return __clear_user(to, n);
}
-void *s390_kernel_write(void *dst, const void *src, size_t size);
+void *__s390_kernel_write(void *dst, const void *src, size_t size);
+
+static inline void *s390_kernel_write(void *dst, const void *src, size_t size)
+{
+ if (__is_defined(__DECOMPRESSOR))
+ return memcpy(dst, src, size);
+ return __s390_kernel_write(dst, src, size);
+}
int __noreturn __put_kernel_bad(void);
diff --git a/arch/s390/include/asm/unistd.h b/arch/s390/include/asm/unistd.h
index 4260bc5ce7f8..70fc671397da 100644
--- a/arch/s390/include/asm/unistd.h
+++ b/arch/s390/include/asm/unistd.h
@@ -35,6 +35,5 @@
#define __ARCH_WANT_SYS_FORK
#define __ARCH_WANT_SYS_VFORK
#define __ARCH_WANT_SYS_CLONE
-#define __ARCH_WANT_SYS_CLONE3
#endif /* _ASM_S390_UNISTD_H_ */
diff --git a/arch/s390/include/asm/uv.h b/arch/s390/include/asm/uv.h
index 0e7bd3873907..153d93468b77 100644
--- a/arch/s390/include/asm/uv.h
+++ b/arch/s390/include/asm/uv.h
@@ -414,7 +414,6 @@ static inline bool uv_has_feature(u8 feature_bit)
return test_bit_inv(feature_bit, &uv_info.uv_feature_indications);
}
-#ifdef CONFIG_PROTECTED_VIRTUALIZATION_GUEST
extern int prot_virt_guest;
static inline int is_prot_virt_guest(void)
@@ -442,7 +441,10 @@ static inline int share(unsigned long addr, u16 cmd)
if (!uv_call(0, (u64)&uvcb))
return 0;
- return -EINVAL;
+ pr_err("%s UVC failed (rc: 0x%x, rrc: 0x%x), possible hypervisor bug.\n",
+ uvcb.header.cmd == UVC_CMD_SET_SHARED_ACCESS ? "Share" : "Unshare",
+ uvcb.header.rc, uvcb.header.rrc);
+ panic("System security cannot be guaranteed unless the system panics now.\n");
}
/*
@@ -466,13 +468,6 @@ static inline int uv_remove_shared(unsigned long addr)
return share(addr, UVC_CMD_REMOVE_SHARED_ACCESS);
}
-#else
-#define is_prot_virt_guest() 0
-static inline int uv_set_shared(unsigned long addr) { return 0; }
-static inline int uv_remove_shared(unsigned long addr) { return 0; }
-#endif
-
-#if IS_ENABLED(CONFIG_KVM)
extern int prot_virt_host;
static inline int is_prot_virt_host(void)
@@ -483,35 +478,11 @@ static inline int is_prot_virt_host(void)
int uv_pin_shared(unsigned long paddr);
int gmap_make_secure(struct gmap *gmap, unsigned long gaddr, void *uvcb);
int gmap_destroy_page(struct gmap *gmap, unsigned long gaddr);
-int uv_destroy_owned_page(unsigned long paddr);
-int uv_convert_from_secure(unsigned long paddr);
-int uv_convert_owned_from_secure(unsigned long paddr);
+int uv_destroy_folio(struct folio *folio);
+int uv_destroy_pte(pte_t pte);
+int uv_convert_from_secure_pte(pte_t pte);
int gmap_convert_to_secure(struct gmap *gmap, unsigned long gaddr);
void setup_uv(void);
-#else
-#define is_prot_virt_host() 0
-static inline void setup_uv(void) {}
-
-static inline int uv_pin_shared(unsigned long paddr)
-{
- return 0;
-}
-
-static inline int uv_destroy_owned_page(unsigned long paddr)
-{
- return 0;
-}
-
-static inline int uv_convert_from_secure(unsigned long paddr)
-{
- return 0;
-}
-
-static inline int uv_convert_owned_from_secure(unsigned long paddr)
-{
- return 0;
-}
-#endif
#endif /* _ASM_S390_UV_H */
diff --git a/arch/s390/include/asm/vtime.h b/arch/s390/include/asm/vtime.h
index 561c91c1a87c..9d25fb35a042 100644
--- a/arch/s390/include/asm/vtime.h
+++ b/arch/s390/include/asm/vtime.h
@@ -4,16 +4,20 @@
static inline void update_timer_sys(void)
{
- S390_lowcore.system_timer += S390_lowcore.last_update_timer - S390_lowcore.exit_timer;
- S390_lowcore.user_timer += S390_lowcore.exit_timer - S390_lowcore.sys_enter_timer;
- S390_lowcore.last_update_timer = S390_lowcore.sys_enter_timer;
+ struct lowcore *lc = get_lowcore();
+
+ lc->system_timer += lc->last_update_timer - lc->exit_timer;
+ lc->user_timer += lc->exit_timer - lc->sys_enter_timer;
+ lc->last_update_timer = lc->sys_enter_timer;
}
static inline void update_timer_mcck(void)
{
- S390_lowcore.system_timer += S390_lowcore.last_update_timer - S390_lowcore.exit_timer;
- S390_lowcore.user_timer += S390_lowcore.exit_timer - S390_lowcore.mcck_enter_timer;
- S390_lowcore.last_update_timer = S390_lowcore.mcck_enter_timer;
+ struct lowcore *lc = get_lowcore();
+
+ lc->system_timer += lc->last_update_timer - lc->exit_timer;
+ lc->user_timer += lc->exit_timer - lc->mcck_enter_timer;
+ lc->last_update_timer = lc->mcck_enter_timer;
}
#endif /* _S390_VTIME_H */
diff --git a/arch/s390/kernel/Makefile b/arch/s390/kernel/Makefile
index 7241fa194709..e47a4be54ff8 100644
--- a/arch/s390/kernel/Makefile
+++ b/arch/s390/kernel/Makefile
@@ -43,7 +43,7 @@ obj-y += sysinfo.o lgr.o os_info.o ctlreg.o
obj-y += runtime_instr.o cache.o fpu.o dumpstack.o guarded_storage.o sthyi.o
obj-y += entry.o reipl.o kdebugfs.o alternative.o
obj-y += nospec-branch.o ipl_vmparm.o machine_kexec_reloc.o unwind_bc.o
-obj-y += smp.o text_amode31.o stacktrace.o abs_lowcore.o facility.o
+obj-y += smp.o text_amode31.o stacktrace.o abs_lowcore.o facility.o uv.o
extra-y += vmlinux.lds
@@ -80,7 +80,6 @@ obj-$(CONFIG_PERF_EVENTS) += perf_cpum_cf_events.o perf_regs.o
obj-$(CONFIG_PERF_EVENTS) += perf_pai_crypto.o perf_pai_ext.o
obj-$(CONFIG_TRACEPOINTS) += trace.o
-obj-$(findstring y, $(CONFIG_PROTECTED_VIRTUALIZATION_GUEST) $(CONFIG_PGSTE)) += uv.o
# vdso
obj-y += vdso64/
diff --git a/arch/s390/kernel/abs_lowcore.c b/arch/s390/kernel/abs_lowcore.c
index f9efc54ec4b7..09cd24cbe74e 100644
--- a/arch/s390/kernel/abs_lowcore.c
+++ b/arch/s390/kernel/abs_lowcore.c
@@ -4,6 +4,7 @@
#include <asm/abs_lowcore.h>
unsigned long __bootdata_preserved(__abs_lowcore);
+int __bootdata_preserved(relocate_lowcore);
int abs_lowcore_map(int cpu, struct lowcore *lc, bool alloc)
{
diff --git a/arch/s390/kernel/alternative.c b/arch/s390/kernel/alternative.c
index 1ac5f707dd70..8d5d0de35de0 100644
--- a/arch/s390/kernel/alternative.c
+++ b/arch/s390/kernel/alternative.c
@@ -1,68 +1,41 @@
// SPDX-License-Identifier: GPL-2.0
-#include <linux/module.h>
-#include <linux/cpu.h>
-#include <linux/smp.h>
-#include <asm/text-patching.h>
+
+#include <linux/uaccess.h>
+#include <asm/nospec-branch.h>
+#include <asm/abs_lowcore.h>
#include <asm/alternative.h>
#include <asm/facility.h>
-#include <asm/nospec-branch.h>
-
-static int __initdata_or_module alt_instr_disabled;
-
-static int __init disable_alternative_instructions(char *str)
-{
- alt_instr_disabled = 1;
- return 0;
-}
-
-early_param("noaltinstr", disable_alternative_instructions);
-static void __init_or_module __apply_alternatives(struct alt_instr *start,
- struct alt_instr *end)
+void __apply_alternatives(struct alt_instr *start, struct alt_instr *end, unsigned int ctx)
{
- struct alt_instr *a;
u8 *instr, *replacement;
+ struct alt_instr *a;
+ bool replace;
/*
* The scan order should be from start to end. A later scanned
* alternative code can overwrite previously scanned alternative code.
*/
for (a = start; a < end; a++) {
+ if (!(a->ctx & ctx))
+ continue;
+ switch (a->type) {
+ case ALT_TYPE_FACILITY:
+ replace = test_facility(a->data);
+ break;
+ case ALT_TYPE_SPEC:
+ replace = nobp_enabled();
+ break;
+ case ALT_TYPE_LOWCORE:
+ replace = have_relocated_lowcore();
+ break;
+ default:
+ replace = false;
+ }
+ if (!replace)
+ continue;
instr = (u8 *)&a->instr_offset + a->instr_offset;
replacement = (u8 *)&a->repl_offset + a->repl_offset;
-
- if (!__test_facility(a->facility, alt_stfle_fac_list))
- continue;
s390_kernel_write(instr, replacement, a->instrlen);
}
}
-
-void __init_or_module apply_alternatives(struct alt_instr *start,
- struct alt_instr *end)
-{
- if (!alt_instr_disabled)
- __apply_alternatives(start, end);
-}
-
-extern struct alt_instr __alt_instructions[], __alt_instructions_end[];
-void __init apply_alternative_instructions(void)
-{
- apply_alternatives(__alt_instructions, __alt_instructions_end);
-}
-
-static void do_sync_core(void *info)
-{
- sync_core();
-}
-
-void text_poke_sync(void)
-{
- on_each_cpu(do_sync_core, NULL, 1);
-}
-
-void text_poke_sync_lock(void)
-{
- cpus_read_lock();
- text_poke_sync();
- cpus_read_unlock();
-}
diff --git a/arch/s390/kernel/asm-offsets.c b/arch/s390/kernel/asm-offsets.c
index f55979f64d49..ffa0dd2dbaac 100644
--- a/arch/s390/kernel/asm-offsets.c
+++ b/arch/s390/kernel/asm-offsets.c
@@ -28,6 +28,7 @@ int main(void)
BLANK();
/* thread info offsets */
OFFSET(__TI_flags, task_struct, thread_info.flags);
+ OFFSET(__TI_sie, task_struct, thread_info.sie);
BLANK();
/* pt_regs offsets */
OFFSET(__PT_PSW, pt_regs, psw);
@@ -63,6 +64,7 @@ int main(void)
OFFSET(__SF_SIE_REASON, stack_frame, sie_reason);
OFFSET(__SF_SIE_FLAGS, stack_frame, sie_flags);
OFFSET(__SF_SIE_CONTROL_PHYS, stack_frame, sie_control_block_phys);
+ OFFSET(__SF_SIE_GUEST_ASCE, stack_frame, sie_guest_asce);
DEFINE(STACK_FRAME_OVERHEAD, sizeof(struct stack_frame));
BLANK();
OFFSET(__SFUSER_BACKCHAIN, stack_frame_user, back_chain);
@@ -113,7 +115,7 @@ int main(void)
OFFSET(__LC_SAVE_AREA_SYNC, lowcore, save_area_sync);
OFFSET(__LC_SAVE_AREA_ASYNC, lowcore, save_area_async);
OFFSET(__LC_SAVE_AREA_RESTART, lowcore, save_area_restart);
- OFFSET(__LC_CPU_FLAGS, lowcore, cpu_flags);
+ OFFSET(__LC_PCPU, lowcore, pcpu);
OFFSET(__LC_RETURN_PSW, lowcore, return_psw);
OFFSET(__LC_RETURN_MCCK_PSW, lowcore, return_mcck_psw);
OFFSET(__LC_SYS_ENTER_TIMER, lowcore, sys_enter_timer);
@@ -185,5 +187,7 @@ int main(void)
#endif
OFFSET(__FTRACE_REGS_PT_REGS, ftrace_regs, regs);
DEFINE(__FTRACE_REGS_SIZE, sizeof(struct ftrace_regs));
+
+ OFFSET(__PCPU_FLAGS, pcpu, flags);
return 0;
}
diff --git a/arch/s390/kernel/crash_dump.c b/arch/s390/kernel/crash_dump.c
index 9863ebe75019..edae13416196 100644
--- a/arch/s390/kernel/crash_dump.c
+++ b/arch/s390/kernel/crash_dump.c
@@ -451,7 +451,7 @@ static void *nt_final(void *ptr)
/*
* Initialize ELF header (new kernel)
*/
-static void *ehdr_init(Elf64_Ehdr *ehdr, int mem_chunk_cnt)
+static void *ehdr_init(Elf64_Ehdr *ehdr, int phdr_count)
{
memset(ehdr, 0, sizeof(*ehdr));
memcpy(ehdr->e_ident, ELFMAG, SELFMAG);
@@ -465,11 +465,8 @@ static void *ehdr_init(Elf64_Ehdr *ehdr, int mem_chunk_cnt)
ehdr->e_phoff = sizeof(Elf64_Ehdr);
ehdr->e_ehsize = sizeof(Elf64_Ehdr);
ehdr->e_phentsize = sizeof(Elf64_Phdr);
- /*
- * Number of memory chunk PT_LOAD program headers plus one kernel
- * image PT_LOAD program header plus one PT_NOTE program header.
- */
- ehdr->e_phnum = mem_chunk_cnt + 1 + 1;
+ /* Number of PT_LOAD program headers plus PT_NOTE program header */
+ ehdr->e_phnum = phdr_count + 1;
return ehdr + 1;
}
@@ -503,12 +500,14 @@ static int get_mem_chunk_cnt(void)
/*
* Initialize ELF loads (new kernel)
*/
-static void loads_init(Elf64_Phdr *phdr)
+static void loads_init(Elf64_Phdr *phdr, bool os_info_has_vm)
{
- unsigned long old_identity_base = os_info_old_value(OS_INFO_IDENTITY_BASE);
+ unsigned long old_identity_base = 0;
phys_addr_t start, end;
u64 idx;
+ if (os_info_has_vm)
+ old_identity_base = os_info_old_value(OS_INFO_IDENTITY_BASE);
for_each_physmem_range(idx, &oldmem_type, &start, &end) {
phdr->p_type = PT_LOAD;
phdr->p_vaddr = old_identity_base + start;
@@ -522,6 +521,11 @@ static void loads_init(Elf64_Phdr *phdr)
}
}
+static bool os_info_has_vm(void)
+{
+ return os_info_old_value(OS_INFO_KASLR_OFFSET);
+}
+
/*
* Prepare PT_LOAD type program header for kernel image region
*/
@@ -566,7 +570,7 @@ static void *notes_init(Elf64_Phdr *phdr, void *ptr, u64 notes_offset)
return ptr;
}
-static size_t get_elfcorehdr_size(int mem_chunk_cnt)
+static size_t get_elfcorehdr_size(int phdr_count)
{
size_t size;
@@ -581,10 +585,8 @@ static size_t get_elfcorehdr_size(int mem_chunk_cnt)
size += nt_vmcoreinfo_size();
/* nt_final */
size += sizeof(Elf64_Nhdr);
- /* PT_LOAD type program header for kernel text region */
- size += sizeof(Elf64_Phdr);
/* PT_LOADS */
- size += mem_chunk_cnt * sizeof(Elf64_Phdr);
+ size += phdr_count * sizeof(Elf64_Phdr);
return size;
}
@@ -595,8 +597,8 @@ static size_t get_elfcorehdr_size(int mem_chunk_cnt)
int elfcorehdr_alloc(unsigned long long *addr, unsigned long long *size)
{
Elf64_Phdr *phdr_notes, *phdr_loads, *phdr_text;
+ int mem_chunk_cnt, phdr_text_cnt;
size_t alloc_size;
- int mem_chunk_cnt;
void *ptr, *hdr;
u64 hdr_off;
@@ -615,12 +617,14 @@ int elfcorehdr_alloc(unsigned long long *addr, unsigned long long *size)
}
mem_chunk_cnt = get_mem_chunk_cnt();
+ phdr_text_cnt = os_info_has_vm() ? 1 : 0;
- alloc_size = get_elfcorehdr_size(mem_chunk_cnt);
+ alloc_size = get_elfcorehdr_size(mem_chunk_cnt + phdr_text_cnt);
hdr = kzalloc(alloc_size, GFP_KERNEL);
- /* Without elfcorehdr /proc/vmcore cannot be created. Thus creating
+ /*
+ * Without elfcorehdr /proc/vmcore cannot be created. Thus creating
* a dump with this crash kernel will fail. Panic now to allow other
* dump mechanisms to take over.
*/
@@ -628,21 +632,23 @@ int elfcorehdr_alloc(unsigned long long *addr, unsigned long long *size)
panic("s390 kdump allocating elfcorehdr failed");
/* Init elf header */
- ptr = ehdr_init(hdr, mem_chunk_cnt);
+ phdr_notes = ehdr_init(hdr, mem_chunk_cnt + phdr_text_cnt);
/* Init program headers */
- phdr_notes = ptr;
- ptr = PTR_ADD(ptr, sizeof(Elf64_Phdr));
- phdr_text = ptr;
- ptr = PTR_ADD(ptr, sizeof(Elf64_Phdr));
- phdr_loads = ptr;
- ptr = PTR_ADD(ptr, sizeof(Elf64_Phdr) * mem_chunk_cnt);
+ if (phdr_text_cnt) {
+ phdr_text = phdr_notes + 1;
+ phdr_loads = phdr_text + 1;
+ } else {
+ phdr_loads = phdr_notes + 1;
+ }
+ ptr = PTR_ADD(phdr_loads, sizeof(Elf64_Phdr) * mem_chunk_cnt);
/* Init notes */
hdr_off = PTR_DIFF(ptr, hdr);
ptr = notes_init(phdr_notes, ptr, ((unsigned long) hdr) + hdr_off);
/* Init kernel text program header */
- text_init(phdr_text);
+ if (phdr_text_cnt)
+ text_init(phdr_text);
/* Init loads */
- loads_init(phdr_loads);
+ loads_init(phdr_loads, phdr_text_cnt);
/* Finalize program headers */
hdr_off = PTR_DIFF(ptr, hdr);
*addr = (unsigned long long) hdr;
diff --git a/arch/s390/kernel/debug.c b/arch/s390/kernel/debug.c
index 85328a0ef3b6..bce50ca75ea7 100644
--- a/arch/s390/kernel/debug.c
+++ b/arch/s390/kernel/debug.c
@@ -954,7 +954,7 @@ static int debug_active = 1;
* always allow read, allow write only if debug_stoppable is set or
* if debug_active is already off
*/
-static int s390dbf_procactive(struct ctl_table *table, int write,
+static int s390dbf_procactive(const struct ctl_table *table, int write,
void *buffer, size_t *lenp, loff_t *ppos)
{
if (!write || debug_stoppable || !debug_active)
diff --git a/arch/s390/kernel/diag.c b/arch/s390/kernel/diag.c
index 8dee9aa0ec95..ac7b8c8e3133 100644
--- a/arch/s390/kernel/diag.c
+++ b/arch/s390/kernel/diag.c
@@ -185,6 +185,8 @@ int diag14(unsigned long rx, unsigned long ry1, unsigned long subcode)
}
EXPORT_SYMBOL(diag14);
+#define DIAG204_BUSY_RC 8
+
static inline int __diag204(unsigned long *subcode, unsigned long size, void *addr)
{
union register_pair rp = { .even = *subcode, .odd = size };
@@ -215,16 +217,18 @@ int diag204(unsigned long subcode, unsigned long size, void *addr)
{
if (addr) {
if (WARN_ON_ONCE(!is_vmalloc_addr(addr)))
- return -1;
+ return -EINVAL;
if (WARN_ON_ONCE(!IS_ALIGNED((unsigned long)addr, PAGE_SIZE)))
- return -1;
+ return -EINVAL;
}
if ((subcode & DIAG204_SUBCODE_MASK) == DIAG204_SUBC_STIB4)
addr = (void *)pfn_to_phys(vmalloc_to_pfn(addr));
diag_stat_inc(DIAG_STAT_X204);
size = __diag204(&subcode, size, addr);
- if (subcode)
- return -1;
+ if (subcode == DIAG204_BUSY_RC)
+ return -EBUSY;
+ else if (subcode)
+ return -EOPNOTSUPP;
return size;
}
EXPORT_SYMBOL(diag204);
@@ -278,12 +282,14 @@ int diag224(void *ptr)
int rc = -EOPNOTSUPP;
diag_stat_inc(DIAG_STAT_X224);
- asm volatile(
- " diag %1,%2,0x224\n"
- "0: lhi %0,0x0\n"
+ asm volatile("\n"
+ " diag %[type],%[addr],0x224\n"
+ "0: lhi %[rc],0\n"
"1:\n"
EX_TABLE(0b,1b)
- : "+d" (rc) :"d" (0), "d" (addr) : "memory");
+ : [rc] "+d" (rc)
+ , "=m" (*(struct { char buf[PAGE_SIZE]; } *)ptr)
+ : [type] "d" (0), [addr] "d" (addr));
return rc;
}
EXPORT_SYMBOL(diag224);
diff --git a/arch/s390/kernel/dumpstack.c b/arch/s390/kernel/dumpstack.c
index d2012635b093..1ecd0580561f 100644
--- a/arch/s390/kernel/dumpstack.c
+++ b/arch/s390/kernel/dumpstack.c
@@ -61,28 +61,28 @@ static bool in_task_stack(unsigned long sp, struct task_struct *task,
static bool in_irq_stack(unsigned long sp, struct stack_info *info)
{
- unsigned long stack = S390_lowcore.async_stack - STACK_INIT_OFFSET;
+ unsigned long stack = get_lowcore()->async_stack - STACK_INIT_OFFSET;
return in_stack(sp, info, STACK_TYPE_IRQ, stack);
}
static bool in_nodat_stack(unsigned long sp, struct stack_info *info)
{
- unsigned long stack = S390_lowcore.nodat_stack - STACK_INIT_OFFSET;
+ unsigned long stack = get_lowcore()->nodat_stack - STACK_INIT_OFFSET;
return in_stack(sp, info, STACK_TYPE_NODAT, stack);
}
static bool in_mcck_stack(unsigned long sp, struct stack_info *info)
{
- unsigned long stack = S390_lowcore.mcck_stack - STACK_INIT_OFFSET;
+ unsigned long stack = get_lowcore()->mcck_stack - STACK_INIT_OFFSET;
return in_stack(sp, info, STACK_TYPE_MCCK, stack);
}
static bool in_restart_stack(unsigned long sp, struct stack_info *info)
{
- unsigned long stack = S390_lowcore.restart_stack - STACK_INIT_OFFSET;
+ unsigned long stack = get_lowcore()->restart_stack - STACK_INIT_OFFSET;
return in_stack(sp, info, STACK_TYPE_RESTART, stack);
}
diff --git a/arch/s390/kernel/early.c b/arch/s390/kernel/early.c
index c666271433fb..14d324865e33 100644
--- a/arch/s390/kernel/early.c
+++ b/arch/s390/kernel/early.c
@@ -48,6 +48,7 @@ decompressor_handled_param(dfltcc);
decompressor_handled_param(facilities);
decompressor_handled_param(nokaslr);
decompressor_handled_param(cmma);
+decompressor_handled_param(relocate_lowcore);
#if IS_ENABLED(CONFIG_KVM)
decompressor_handled_param(prot_virt);
#endif
@@ -72,7 +73,7 @@ static void __init reset_tod_clock(void)
memset(&tod_clock_base, 0, sizeof(tod_clock_base));
tod_clock_base.tod = TOD_UNIX_EPOCH;
- S390_lowcore.last_update_clock = TOD_UNIX_EPOCH;
+ get_lowcore()->last_update_clock = TOD_UNIX_EPOCH;
}
/*
@@ -99,7 +100,7 @@ static noinline __init void detect_machine_type(void)
/* Check current-configuration-level */
if (stsi(NULL, 0, 0, 0) <= 2) {
- S390_lowcore.machine_flags |= MACHINE_FLAG_LPAR;
+ get_lowcore()->machine_flags |= MACHINE_FLAG_LPAR;
return;
}
/* Get virtual-machine cpu information. */
@@ -108,9 +109,9 @@ static noinline __init void detect_machine_type(void)
/* Detect known hypervisors */
if (!memcmp(vmms->vm[0].cpi, "\xd2\xe5\xd4", 3))
- S390_lowcore.machine_flags |= MACHINE_FLAG_KVM;
+ get_lowcore()->machine_flags |= MACHINE_FLAG_KVM;
else if (!memcmp(vmms->vm[0].cpi, "\xa9\x61\xe5\xd4", 4))
- S390_lowcore.machine_flags |= MACHINE_FLAG_VM;
+ get_lowcore()->machine_flags |= MACHINE_FLAG_VM;
}
/* Remove leading, trailing and double whitespace. */
@@ -166,7 +167,7 @@ static __init void setup_topology(void)
if (!test_facility(11))
return;
- S390_lowcore.machine_flags |= MACHINE_FLAG_TOPOLOGY;
+ get_lowcore()->machine_flags |= MACHINE_FLAG_TOPOLOGY;
for (max_mnest = 6; max_mnest > 1; max_mnest--) {
if (stsi(&sysinfo_page, 15, 1, max_mnest) == 0)
break;
@@ -186,15 +187,8 @@ static noinline __init void setup_lowcore_early(void)
psw.addr = (unsigned long)early_pgm_check_handler;
psw.mask = PSW_KERNEL_BITS;
- S390_lowcore.program_new_psw = psw;
- S390_lowcore.preempt_count = INIT_PREEMPT_COUNT;
-}
-
-static noinline __init void setup_facility_list(void)
-{
- memcpy(alt_stfle_fac_list, stfle_fac_list, sizeof(alt_stfle_fac_list));
- if (!IS_ENABLED(CONFIG_KERNEL_NOBP))
- __clear_facility(82, alt_stfle_fac_list);
+ get_lowcore()->program_new_psw = psw;
+ get_lowcore()->preempt_count = INIT_PREEMPT_COUNT;
}
static __init void detect_diag9c(void)
@@ -211,43 +205,43 @@ static __init void detect_diag9c(void)
EX_TABLE(0b,1b)
: "=d" (rc) : "0" (-EOPNOTSUPP), "d" (cpu_address) : "cc");
if (!rc)
- S390_lowcore.machine_flags |= MACHINE_FLAG_DIAG9C;
+ get_lowcore()->machine_flags |= MACHINE_FLAG_DIAG9C;
}
static __init void detect_machine_facilities(void)
{
if (test_facility(8)) {
- S390_lowcore.machine_flags |= MACHINE_FLAG_EDAT1;
+ get_lowcore()->machine_flags |= MACHINE_FLAG_EDAT1;
system_ctl_set_bit(0, CR0_EDAT_BIT);
}
if (test_facility(78))
- S390_lowcore.machine_flags |= MACHINE_FLAG_EDAT2;
+ get_lowcore()->machine_flags |= MACHINE_FLAG_EDAT2;
if (test_facility(3))
- S390_lowcore.machine_flags |= MACHINE_FLAG_IDTE;
+ get_lowcore()->machine_flags |= MACHINE_FLAG_IDTE;
if (test_facility(50) && test_facility(73)) {
- S390_lowcore.machine_flags |= MACHINE_FLAG_TE;
+ get_lowcore()->machine_flags |= MACHINE_FLAG_TE;
system_ctl_set_bit(0, CR0_TRANSACTIONAL_EXECUTION_BIT);
}
if (test_facility(51))
- S390_lowcore.machine_flags |= MACHINE_FLAG_TLB_LC;
+ get_lowcore()->machine_flags |= MACHINE_FLAG_TLB_LC;
if (test_facility(129))
system_ctl_set_bit(0, CR0_VECTOR_BIT);
if (test_facility(130))
- S390_lowcore.machine_flags |= MACHINE_FLAG_NX;
+ get_lowcore()->machine_flags |= MACHINE_FLAG_NX;
if (test_facility(133))
- S390_lowcore.machine_flags |= MACHINE_FLAG_GS;
+ get_lowcore()->machine_flags |= MACHINE_FLAG_GS;
if (test_facility(139) && (tod_clock_base.tod >> 63)) {
/* Enabled signed clock comparator comparisons */
- S390_lowcore.machine_flags |= MACHINE_FLAG_SCC;
+ get_lowcore()->machine_flags |= MACHINE_FLAG_SCC;
clock_comparator_max = -1ULL >> 1;
system_ctl_set_bit(0, CR0_CLOCK_COMPARATOR_SIGN_BIT);
}
if (IS_ENABLED(CONFIG_PCI) && test_facility(153)) {
- S390_lowcore.machine_flags |= MACHINE_FLAG_PCI_MIO;
+ get_lowcore()->machine_flags |= MACHINE_FLAG_PCI_MIO;
/* the control bit is set during PCI initialization */
}
if (test_facility(194))
- S390_lowcore.machine_flags |= MACHINE_FLAG_RDP;
+ get_lowcore()->machine_flags |= MACHINE_FLAG_RDP;
}
static inline void save_vector_registers(void)
@@ -291,7 +285,6 @@ void __init startup_init(void)
lockdep_off();
sort_amode31_extable();
setup_lowcore_early();
- setup_facility_list();
detect_machine_type();
setup_arch_string();
setup_boot_command_line();
diff --git a/arch/s390/kernel/entry.S b/arch/s390/kernel/entry.S
index 60cf917a7122..749410cfdbc0 100644
--- a/arch/s390/kernel/entry.S
+++ b/arch/s390/kernel/entry.S
@@ -12,7 +12,7 @@
#include <linux/init.h>
#include <linux/linkage.h>
#include <asm/asm-extable.h>
-#include <asm/alternative-asm.h>
+#include <asm/alternative.h>
#include <asm/processor.h>
#include <asm/cache.h>
#include <asm/dwarf.h>
@@ -28,49 +28,54 @@
#include <asm/setup.h>
#include <asm/nmi.h>
#include <asm/nospec-insn.h>
+#include <asm/lowcore.h>
_LPP_OFFSET = __LC_LPP
.macro STBEAR address
- ALTERNATIVE "nop", ".insn s,0xb2010000,\address", 193
+ ALTERNATIVE "nop", ".insn s,0xb2010000,\address", ALT_FACILITY(193)
.endm
.macro LBEAR address
- ALTERNATIVE "nop", ".insn s,0xb2000000,\address", 193
+ ALTERNATIVE "nop", ".insn s,0xb2000000,\address", ALT_FACILITY(193)
.endm
- .macro LPSWEY address,lpswe
- ALTERNATIVE "b \lpswe; nopr", ".insn siy,0xeb0000000071,\address,0", 193
+ .macro LPSWEY address, lpswe
+ ALTERNATIVE_2 "b \lpswe;nopr", \
+ ".insn siy,0xeb0000000071,\address,0", ALT_FACILITY_EARLY(193), \
+ __stringify(.insn siy,0xeb0000000071,LOWCORE_ALT_ADDRESS+\address,0), \
+ ALT_LOWCORE
.endm
- .macro MBEAR reg
- ALTERNATIVE "brcl 0,0", __stringify(mvc __PT_LAST_BREAK(8,\reg),__LC_LAST_BREAK), 193
+ .macro MBEAR reg, lowcore
+ ALTERNATIVE "brcl 0,0", __stringify(mvc __PT_LAST_BREAK(8,\reg),__LC_LAST_BREAK(\lowcore)),\
+ ALT_FACILITY(193)
.endm
- .macro CHECK_STACK savearea
+ .macro CHECK_STACK savearea, lowcore
#ifdef CONFIG_CHECK_STACK
tml %r15,THREAD_SIZE - CONFIG_STACK_GUARD
- lghi %r14,\savearea
+ la %r14,\savearea(\lowcore)
jz stack_overflow
#endif
.endm
- .macro CHECK_VMAP_STACK savearea,oklabel
+ .macro CHECK_VMAP_STACK savearea, lowcore, oklabel
#ifdef CONFIG_VMAP_STACK
lgr %r14,%r15
nill %r14,0x10000 - THREAD_SIZE
oill %r14,STACK_INIT_OFFSET
- clg %r14,__LC_KERNEL_STACK
+ clg %r14,__LC_KERNEL_STACK(\lowcore)
je \oklabel
- clg %r14,__LC_ASYNC_STACK
+ clg %r14,__LC_ASYNC_STACK(\lowcore)
je \oklabel
- clg %r14,__LC_MCCK_STACK
+ clg %r14,__LC_MCCK_STACK(\lowcore)
je \oklabel
- clg %r14,__LC_NODAT_STACK
+ clg %r14,__LC_NODAT_STACK(\lowcore)
je \oklabel
- clg %r14,__LC_RESTART_STACK
+ clg %r14,__LC_RESTART_STACK(\lowcore)
je \oklabel
- lghi %r14,\savearea
+ la %r14,\savearea(\lowcore)
j stack_overflow
#else
j \oklabel
@@ -100,30 +105,31 @@ _LPP_OFFSET = __LC_LPP
.endm
.macro BPOFF
- ALTERNATIVE "nop", ".insn rrf,0xb2e80000,0,0,12,0", 82
+ ALTERNATIVE "nop", ".insn rrf,0xb2e80000,0,0,12,0", ALT_SPEC(82)
.endm
.macro BPON
- ALTERNATIVE "nop", ".insn rrf,0xb2e80000,0,0,13,0", 82
+ ALTERNATIVE "nop", ".insn rrf,0xb2e80000,0,0,13,0", ALT_SPEC(82)
.endm
.macro BPENTER tif_ptr,tif_mask
ALTERNATIVE "TSTMSK \tif_ptr,\tif_mask; jz .+8; .insn rrf,0xb2e80000,0,0,13,0", \
- "j .+12; nop; nop", 82
+ "j .+12; nop; nop", ALT_SPEC(82)
.endm
.macro BPEXIT tif_ptr,tif_mask
TSTMSK \tif_ptr,\tif_mask
ALTERNATIVE "jz .+8; .insn rrf,0xb2e80000,0,0,12,0", \
- "jnz .+8; .insn rrf,0xb2e80000,0,0,13,0", 82
+ "jnz .+8; .insn rrf,0xb2e80000,0,0,13,0", ALT_SPEC(82)
.endm
#if IS_ENABLED(CONFIG_KVM)
- .macro SIEEXIT sie_control
- lg %r9,\sie_control # get control block pointer
- ni __SIE_PROG0C+3(%r9),0xfe # no longer in SIE
- lctlg %c1,%c1,__LC_KERNEL_ASCE # load primary asce
- ni __LC_CPU_FLAGS+7,255-_CIF_SIE
+ .macro SIEEXIT sie_control,lowcore
+ lg %r9,\sie_control # get control block pointer
+ ni __SIE_PROG0C+3(%r9),0xfe # no longer in SIE
+ lctlg %c1,%c1,__LC_KERNEL_ASCE(\lowcore) # load primary asce
+ lg %r9,__LC_CURRENT(\lowcore)
+ mvi __TI_sie(%r9),0
larl %r9,sie_exit # skip forward to sie_exit
.endm
#endif
@@ -163,13 +169,14 @@ SYM_FUNC_START(__switch_to_asm)
stg %r15,__THREAD_ksp(%r1,%r2) # store kernel stack of prev
lg %r15,0(%r4,%r3) # start of kernel stack of next
agr %r15,%r5 # end of kernel stack of next
- stg %r3,__LC_CURRENT # store task struct of next
- stg %r15,__LC_KERNEL_STACK # store end of kernel stack
+ GET_LC %r13
+ stg %r3,__LC_CURRENT(%r13) # store task struct of next
+ stg %r15,__LC_KERNEL_STACK(%r13) # store end of kernel stack
lg %r15,__THREAD_ksp(%r1,%r3) # load kernel stack of next
aghi %r3,__TASK_pid
- mvc __LC_CURRENT_PID(4,%r0),0(%r3) # store pid of next
+ mvc __LC_CURRENT_PID(4,%r13),0(%r3) # store pid of next
+ ALTERNATIVE "nop", "lpp _LPP_OFFSET(%r13)", ALT_FACILITY(40)
lmg %r6,%r15,__SF_GPRS(%r15) # load gprs of next task
- ALTERNATIVE "nop", "lpp _LPP_OFFSET", 40
BR_EX %r14
SYM_FUNC_END(__switch_to_asm)
@@ -179,22 +186,21 @@ SYM_FUNC_END(__switch_to_asm)
* %r2 pointer to sie control block phys
* %r3 pointer to sie control block virt
* %r4 guest register save area
+ * %r5 guest asce
*/
SYM_FUNC_START(__sie64a)
stmg %r6,%r14,__SF_GPRS(%r15) # save kernel registers
- lg %r12,__LC_CURRENT
+ GET_LC %r13
+ lg %r14,__LC_CURRENT(%r13)
stg %r2,__SF_SIE_CONTROL_PHYS(%r15) # save sie block physical..
stg %r3,__SF_SIE_CONTROL(%r15) # ...and virtual addresses
stg %r4,__SF_SIE_SAVEAREA(%r15) # save guest register save area
+ stg %r5,__SF_SIE_GUEST_ASCE(%r15) # save guest asce
xc __SF_SIE_REASON(8,%r15),__SF_SIE_REASON(%r15) # reason code = 0
- mvc __SF_SIE_FLAGS(8,%r15),__TI_flags(%r12) # copy thread flags
+ mvc __SF_SIE_FLAGS(8,%r15),__TI_flags(%r14) # copy thread flags
lmg %r0,%r13,0(%r4) # load guest gprs 0-13
- lg %r14,__LC_GMAP # get gmap pointer
- ltgr %r14,%r14
- jz .Lsie_gmap
- oi __LC_CPU_FLAGS+7,_CIF_SIE
- lctlg %c1,%c1,__GMAP_ASCE(%r14) # load primary asce
-.Lsie_gmap:
+ mvi __TI_sie(%r14),1
+ lctlg %c1,%c1,__SF_SIE_GUEST_ASCE(%r15) # load primary asce
lg %r14,__SF_SIE_CONTROL(%r15) # get control block pointer
oi __SIE_PROG0C+3(%r14),1 # we are going into SIE now
tm __SIE_PROG20+3(%r14),3 # last exit...
@@ -212,8 +218,10 @@ SYM_FUNC_START(__sie64a)
.Lsie_skip:
lg %r14,__SF_SIE_CONTROL(%r15) # get control block pointer
ni __SIE_PROG0C+3(%r14),0xfe # no longer in SIE
- lctlg %c1,%c1,__LC_KERNEL_ASCE # load primary asce
- ni __LC_CPU_FLAGS+7,255-_CIF_SIE
+ GET_LC %r14
+ lctlg %c1,%c1,__LC_KERNEL_ASCE(%r14) # load primary asce
+ lg %r14,__LC_CURRENT(%r14)
+ mvi __TI_sie(%r14),0
# some program checks are suppressing. C code (e.g. do_protection_exception)
# will rewind the PSW by the ILC, which is often 4 bytes in case of SIE. There
# are some corner cases (e.g. runtime instrumentation) where ILC is unpredictable.
@@ -256,14 +264,15 @@ EXPORT_SYMBOL(sie_exit)
*/
SYM_CODE_START(system_call)
- stpt __LC_SYS_ENTER_TIMER
- stmg %r8,%r15,__LC_SAVE_AREA_SYNC
+ STMG_LC %r8,%r15,__LC_SAVE_AREA_SYNC
+ GET_LC %r13
+ stpt __LC_SYS_ENTER_TIMER(%r13)
BPOFF
lghi %r14,0
.Lsysc_per:
- STBEAR __LC_LAST_BREAK
- lctlg %c1,%c1,__LC_KERNEL_ASCE
- lg %r15,__LC_KERNEL_STACK
+ STBEAR __LC_LAST_BREAK(%r13)
+ lctlg %c1,%c1,__LC_KERNEL_ASCE(%r13)
+ lg %r15,__LC_KERNEL_STACK(%r13)
xc __SF_BACKCHAIN(8,%r15),__SF_BACKCHAIN(%r15)
stmg %r0,%r7,STACK_FRAME_OVERHEAD+__PT_R0(%r15)
# clear user controlled register to prevent speculative use
@@ -278,17 +287,17 @@ SYM_CODE_START(system_call)
xgr %r10,%r10
xgr %r11,%r11
la %r2,STACK_FRAME_OVERHEAD(%r15) # pointer to pt_regs
- mvc __PT_R8(64,%r2),__LC_SAVE_AREA_SYNC
- MBEAR %r2
+ mvc __PT_R8(64,%r2),__LC_SAVE_AREA_SYNC(%r13)
+ MBEAR %r2,%r13
lgr %r3,%r14
brasl %r14,__do_syscall
STACKLEAK_ERASE
- lctlg %c1,%c1,__LC_USER_ASCE
- mvc __LC_RETURN_PSW(16),STACK_FRAME_OVERHEAD+__PT_PSW(%r15)
+ lctlg %c1,%c1,__LC_USER_ASCE(%r13)
+ mvc __LC_RETURN_PSW(16,%r13),STACK_FRAME_OVERHEAD+__PT_PSW(%r15)
BPON
LBEAR STACK_FRAME_OVERHEAD+__PT_LAST_BREAK(%r15)
+ stpt __LC_EXIT_TIMER(%r13)
lmg %r0,%r15,STACK_FRAME_OVERHEAD+__PT_R0(%r15)
- stpt __LC_EXIT_TIMER
LPSWEY __LC_RETURN_PSW,__LC_RETURN_LPSWE
SYM_CODE_END(system_call)
@@ -299,12 +308,13 @@ SYM_CODE_START(ret_from_fork)
lgr %r3,%r11
brasl %r14,__ret_from_fork
STACKLEAK_ERASE
- lctlg %c1,%c1,__LC_USER_ASCE
- mvc __LC_RETURN_PSW(16),STACK_FRAME_OVERHEAD+__PT_PSW(%r15)
+ GET_LC %r13
+ lctlg %c1,%c1,__LC_USER_ASCE(%r13)
+ mvc __LC_RETURN_PSW(16,%r13),STACK_FRAME_OVERHEAD+__PT_PSW(%r15)
BPON
LBEAR STACK_FRAME_OVERHEAD+__PT_LAST_BREAK(%r15)
+ stpt __LC_EXIT_TIMER(%r13)
lmg %r0,%r15,STACK_FRAME_OVERHEAD+__PT_R0(%r15)
- stpt __LC_EXIT_TIMER
LPSWEY __LC_RETURN_PSW,__LC_RETURN_LPSWE
SYM_CODE_END(ret_from_fork)
@@ -313,39 +323,40 @@ SYM_CODE_END(ret_from_fork)
*/
SYM_CODE_START(pgm_check_handler)
- stpt __LC_SYS_ENTER_TIMER
+ STMG_LC %r8,%r15,__LC_SAVE_AREA_SYNC
+ GET_LC %r13
+ stpt __LC_SYS_ENTER_TIMER(%r13)
BPOFF
- stmg %r8,%r15,__LC_SAVE_AREA_SYNC
lgr %r10,%r15
- lmg %r8,%r9,__LC_PGM_OLD_PSW
+ lmg %r8,%r9,__LC_PGM_OLD_PSW(%r13)
tmhh %r8,0x0001 # coming from user space?
jno .Lpgm_skip_asce
- lctlg %c1,%c1,__LC_KERNEL_ASCE
+ lctlg %c1,%c1,__LC_KERNEL_ASCE(%r13)
j 3f # -> fault in user space
.Lpgm_skip_asce:
1: tmhh %r8,0x4000 # PER bit set in old PSW ?
jnz 2f # -> enabled, can't be a double fault
- tm __LC_PGM_ILC+3,0x80 # check for per exception
+ tm __LC_PGM_ILC+3(%r13),0x80 # check for per exception
jnz .Lpgm_svcper # -> single stepped svc
-2: CHECK_STACK __LC_SAVE_AREA_SYNC
+2: CHECK_STACK __LC_SAVE_AREA_SYNC,%r13
aghi %r15,-(STACK_FRAME_OVERHEAD + __PT_SIZE)
# CHECK_VMAP_STACK branches to stack_overflow or 4f
- CHECK_VMAP_STACK __LC_SAVE_AREA_SYNC,4f
-3: lg %r15,__LC_KERNEL_STACK
+ CHECK_VMAP_STACK __LC_SAVE_AREA_SYNC,%r13,4f
+3: lg %r15,__LC_KERNEL_STACK(%r13)
4: la %r11,STACK_FRAME_OVERHEAD(%r15)
xc __PT_FLAGS(8,%r11),__PT_FLAGS(%r11)
xc __SF_BACKCHAIN(8,%r15),__SF_BACKCHAIN(%r15)
stmg %r0,%r7,__PT_R0(%r11)
- mvc __PT_R8(64,%r11),__LC_SAVE_AREA_SYNC
- mvc __PT_LAST_BREAK(8,%r11),__LC_PGM_LAST_BREAK
+ mvc __PT_R8(64,%r11),__LC_SAVE_AREA_SYNC(%r13)
+ mvc __PT_LAST_BREAK(8,%r11),__LC_PGM_LAST_BREAK(%r13)
stctg %c1,%c1,__PT_CR1(%r11)
#if IS_ENABLED(CONFIG_KVM)
- ltg %r12,__LC_GMAP
+ ltg %r12,__LC_GMAP(%r13)
jz 5f
clc __GMAP_ASCE(8,%r12), __PT_CR1(%r11)
jne 5f
BPENTER __SF_SIE_FLAGS(%r10),_TIF_ISOLATE_BP_GUEST
- SIEEXIT __SF_SIE_CONTROL(%r10)
+ SIEEXIT __SF_SIE_CONTROL(%r10),%r13
#endif
5: stmg %r8,%r9,__PT_PSW(%r11)
# clear user controlled registers to prevent speculative use
@@ -361,11 +372,11 @@ SYM_CODE_START(pgm_check_handler)
tmhh %r8,0x0001 # returning to user space?
jno .Lpgm_exit_kernel
STACKLEAK_ERASE
- lctlg %c1,%c1,__LC_USER_ASCE
+ lctlg %c1,%c1,__LC_USER_ASCE(%r13)
BPON
- stpt __LC_EXIT_TIMER
+ stpt __LC_EXIT_TIMER(%r13)
.Lpgm_exit_kernel:
- mvc __LC_RETURN_PSW(16),STACK_FRAME_OVERHEAD+__PT_PSW(%r15)
+ mvc __LC_RETURN_PSW(16,%r13),STACK_FRAME_OVERHEAD+__PT_PSW(%r15)
LBEAR STACK_FRAME_OVERHEAD+__PT_LAST_BREAK(%r15)
lmg %r0,%r15,STACK_FRAME_OVERHEAD+__PT_R0(%r15)
LPSWEY __LC_RETURN_PSW,__LC_RETURN_LPSWE
@@ -374,11 +385,11 @@ SYM_CODE_START(pgm_check_handler)
# single stepped system call
#
.Lpgm_svcper:
- mvc __LC_RETURN_PSW(8),__LC_SVC_NEW_PSW
+ mvc __LC_RETURN_PSW(8,%r13),__LC_SVC_NEW_PSW(%r13)
larl %r14,.Lsysc_per
- stg %r14,__LC_RETURN_PSW+8
+ stg %r14,__LC_RETURN_PSW+8(%r13)
lghi %r14,1
- LBEAR __LC_PGM_LAST_BREAK
+ LBEAR __LC_PGM_LAST_BREAK(%r13)
LPSWEY __LC_RETURN_PSW,__LC_RETURN_LPSWE # branch to .Lsysc_per
SYM_CODE_END(pgm_check_handler)
@@ -387,25 +398,27 @@ SYM_CODE_END(pgm_check_handler)
*/
.macro INT_HANDLER name,lc_old_psw,handler
SYM_CODE_START(\name)
- stckf __LC_INT_CLOCK
- stpt __LC_SYS_ENTER_TIMER
- STBEAR __LC_LAST_BREAK
+ STMG_LC %r8,%r15,__LC_SAVE_AREA_ASYNC
+ GET_LC %r13
+ stckf __LC_INT_CLOCK(%r13)
+ stpt __LC_SYS_ENTER_TIMER(%r13)
+ STBEAR __LC_LAST_BREAK(%r13)
BPOFF
- stmg %r8,%r15,__LC_SAVE_AREA_ASYNC
- lmg %r8,%r9,\lc_old_psw
+ lmg %r8,%r9,\lc_old_psw(%r13)
tmhh %r8,0x0001 # interrupting from user ?
jnz 1f
#if IS_ENABLED(CONFIG_KVM)
- TSTMSK __LC_CPU_FLAGS,_CIF_SIE
+ lg %r10,__LC_CURRENT(%r13)
+ tm __TI_sie(%r10),0xff
jz 0f
BPENTER __SF_SIE_FLAGS(%r15),_TIF_ISOLATE_BP_GUEST
- SIEEXIT __SF_SIE_CONTROL(%r15)
+ SIEEXIT __SF_SIE_CONTROL(%r15),%r13
#endif
-0: CHECK_STACK __LC_SAVE_AREA_ASYNC
+0: CHECK_STACK __LC_SAVE_AREA_ASYNC,%r13
aghi %r15,-(STACK_FRAME_OVERHEAD + __PT_SIZE)
j 2f
-1: lctlg %c1,%c1,__LC_KERNEL_ASCE
- lg %r15,__LC_KERNEL_STACK
+1: lctlg %c1,%c1,__LC_KERNEL_ASCE(%r13)
+ lg %r15,__LC_KERNEL_STACK(%r13)
2: xc __SF_BACKCHAIN(8,%r15),__SF_BACKCHAIN(%r15)
la %r11,STACK_FRAME_OVERHEAD(%r15)
stmg %r0,%r7,__PT_R0(%r11)
@@ -419,18 +432,18 @@ SYM_CODE_START(\name)
xgr %r7,%r7
xgr %r10,%r10
xc __PT_FLAGS(8,%r11),__PT_FLAGS(%r11)
- mvc __PT_R8(64,%r11),__LC_SAVE_AREA_ASYNC
- MBEAR %r11
+ mvc __PT_R8(64,%r11),__LC_SAVE_AREA_ASYNC(%r13)
+ MBEAR %r11,%r13
stmg %r8,%r9,__PT_PSW(%r11)
lgr %r2,%r11 # pass pointer to pt_regs
brasl %r14,\handler
- mvc __LC_RETURN_PSW(16),__PT_PSW(%r11)
+ mvc __LC_RETURN_PSW(16,%r13),__PT_PSW(%r11)
tmhh %r8,0x0001 # returning to user ?
jno 2f
STACKLEAK_ERASE
- lctlg %c1,%c1,__LC_USER_ASCE
+ lctlg %c1,%c1,__LC_USER_ASCE(%r13)
BPON
- stpt __LC_EXIT_TIMER
+ stpt __LC_EXIT_TIMER(%r13)
2: LBEAR __PT_LAST_BREAK(%r11)
lmg %r0,%r15,__PT_R0(%r11)
LPSWEY __LC_RETURN_PSW,__LC_RETURN_LPSWE
@@ -445,35 +458,37 @@ INT_HANDLER io_int_handler,__LC_IO_OLD_PSW,do_io_irq
*/
SYM_CODE_START(mcck_int_handler)
BPOFF
- lmg %r8,%r9,__LC_MCK_OLD_PSW
- TSTMSK __LC_MCCK_CODE,MCCK_CODE_SYSTEM_DAMAGE
+ GET_LC %r13
+ lmg %r8,%r9,__LC_MCK_OLD_PSW(%r13)
+ TSTMSK __LC_MCCK_CODE(%r13),MCCK_CODE_SYSTEM_DAMAGE
jo .Lmcck_panic # yes -> rest of mcck code invalid
- TSTMSK __LC_MCCK_CODE,MCCK_CODE_CR_VALID
+ TSTMSK __LC_MCCK_CODE(%r13),MCCK_CODE_CR_VALID
jno .Lmcck_panic # control registers invalid -> panic
ptlb
- lghi %r14,__LC_CPU_TIMER_SAVE_AREA
- mvc __LC_MCCK_ENTER_TIMER(8),0(%r14)
- TSTMSK __LC_MCCK_CODE,MCCK_CODE_CPU_TIMER_VALID
+ lay %r14,__LC_CPU_TIMER_SAVE_AREA(%r13)
+ mvc __LC_MCCK_ENTER_TIMER(8,%r13),0(%r14)
+ TSTMSK __LC_MCCK_CODE(%r13),MCCK_CODE_CPU_TIMER_VALID
jo 3f
- la %r14,__LC_SYS_ENTER_TIMER
- clc 0(8,%r14),__LC_EXIT_TIMER
+ la %r14,__LC_SYS_ENTER_TIMER(%r13)
+ clc 0(8,%r14),__LC_EXIT_TIMER(%r13)
jl 1f
- la %r14,__LC_EXIT_TIMER
-1: clc 0(8,%r14),__LC_LAST_UPDATE_TIMER
+ la %r14,__LC_EXIT_TIMER(%r13)
+1: clc 0(8,%r14),__LC_LAST_UPDATE_TIMER(%r13)
jl 2f
- la %r14,__LC_LAST_UPDATE_TIMER
+ la %r14,__LC_LAST_UPDATE_TIMER(%r13)
2: spt 0(%r14)
- mvc __LC_MCCK_ENTER_TIMER(8),0(%r14)
-3: TSTMSK __LC_MCCK_CODE,MCCK_CODE_PSW_MWP_VALID
+ mvc __LC_MCCK_ENTER_TIMER(8,%r13),0(%r14)
+3: TSTMSK __LC_MCCK_CODE(%r13),MCCK_CODE_PSW_MWP_VALID
jno .Lmcck_panic
tmhh %r8,0x0001 # interrupting from user ?
jnz .Lmcck_user
- TSTMSK __LC_MCCK_CODE,MCCK_CODE_PSW_IA_VALID
+ TSTMSK __LC_MCCK_CODE(%r13),MCCK_CODE_PSW_IA_VALID
jno .Lmcck_panic
#if IS_ENABLED(CONFIG_KVM)
- TSTMSK __LC_CPU_FLAGS,_CIF_SIE
+ lg %r10,__LC_CURRENT(%r13)
+ tm __TI_sie(%r10),0xff
jz .Lmcck_user
- # Need to compare the address instead of a CIF_SIE* flag.
+ # Need to compare the address instead of __TI_SIE flag.
# Otherwise there would be a race between setting the flag
# and entering SIE (or leaving and clearing the flag). This
# would cause machine checks targeted at the guest to be
@@ -482,18 +497,19 @@ SYM_CODE_START(mcck_int_handler)
clgrjl %r9,%r14, 4f
larl %r14,.Lsie_leave
clgrjhe %r9,%r14, 4f
- oi __LC_CPU_FLAGS+7, _CIF_MCCK_GUEST
+ lg %r10,__LC_PCPU
+ oi __PCPU_FLAGS+7(%r10), _CIF_MCCK_GUEST
4: BPENTER __SF_SIE_FLAGS(%r15),_TIF_ISOLATE_BP_GUEST
- SIEEXIT __SF_SIE_CONTROL(%r15)
+ SIEEXIT __SF_SIE_CONTROL(%r15),%r13
#endif
.Lmcck_user:
- lg %r15,__LC_MCCK_STACK
+ lg %r15,__LC_MCCK_STACK(%r13)
la %r11,STACK_FRAME_OVERHEAD(%r15)
stctg %c1,%c1,__PT_CR1(%r11)
- lctlg %c1,%c1,__LC_KERNEL_ASCE
+ lctlg %c1,%c1,__LC_KERNEL_ASCE(%r13)
xc __SF_BACKCHAIN(8,%r15),__SF_BACKCHAIN(%r15)
- lghi %r14,__LC_GPREGS_SAVE_AREA+64
- stmg %r0,%r7,__PT_R0(%r11)
+ lay %r14,__LC_GPREGS_SAVE_AREA(%r13)
+ mvc __PT_R0(128,%r11),0(%r14)
# clear user controlled registers to prevent speculative use
xgr %r0,%r0
xgr %r1,%r1
@@ -503,7 +519,6 @@ SYM_CODE_START(mcck_int_handler)
xgr %r6,%r6
xgr %r7,%r7
xgr %r10,%r10
- mvc __PT_R8(64,%r11),0(%r14)
stmg %r8,%r9,__PT_PSW(%r11)
xc __PT_FLAGS(8,%r11),__PT_FLAGS(%r11)
xc __SF_BACKCHAIN(8,%r15),__SF_BACKCHAIN(%r15)
@@ -511,12 +526,13 @@ SYM_CODE_START(mcck_int_handler)
brasl %r14,s390_do_machine_check
lctlg %c1,%c1,__PT_CR1(%r11)
lmg %r0,%r10,__PT_R0(%r11)
- mvc __LC_RETURN_MCCK_PSW(16),__PT_PSW(%r11) # move return PSW
- tm __LC_RETURN_MCCK_PSW+1,0x01 # returning to user ?
+ mvc __LC_RETURN_MCCK_PSW(16,%r13),__PT_PSW(%r11) # move return PSW
+ tm __LC_RETURN_MCCK_PSW+1(%r13),0x01 # returning to user ?
jno 0f
BPON
- stpt __LC_EXIT_TIMER
-0: ALTERNATIVE "nop", __stringify(lghi %r12,__LC_LAST_BREAK_SAVE_AREA),193
+ stpt __LC_EXIT_TIMER(%r13)
+0: ALTERNATIVE "brcl 0,0", __stringify(lay %r12,__LC_LAST_BREAK_SAVE_AREA(%r13)),\
+ ALT_FACILITY(193)
LBEAR 0(%r12)
lmg %r11,%r15,__PT_R11(%r11)
LPSWEY __LC_RETURN_MCCK_PSW,__LC_RETURN_MCCK_LPSWE
@@ -552,7 +568,7 @@ SYM_CODE_START(mcck_int_handler)
SYM_CODE_END(mcck_int_handler)
SYM_CODE_START(restart_int_handler)
- ALTERNATIVE "nop", "lpp _LPP_OFFSET", 40
+ ALTERNATIVE "nop", "lpp _LPP_OFFSET", ALT_FACILITY(40)
stg %r15,__LC_SAVE_AREA_RESTART
TSTMSK __LC_RESTART_FLAGS,RESTART_FLAG_CTLREGS,4
jz 0f
@@ -560,15 +576,17 @@ SYM_CODE_START(restart_int_handler)
0: larl %r15,daton_psw
lpswe 0(%r15) # turn dat on, keep irqs off
.Ldaton:
- lg %r15,__LC_RESTART_STACK
+ GET_LC %r15
+ lg %r15,__LC_RESTART_STACK(%r15)
xc STACK_FRAME_OVERHEAD(__PT_SIZE,%r15),STACK_FRAME_OVERHEAD(%r15)
stmg %r0,%r14,STACK_FRAME_OVERHEAD+__PT_R0(%r15)
- mvc STACK_FRAME_OVERHEAD+__PT_R15(8,%r15),__LC_SAVE_AREA_RESTART
- mvc STACK_FRAME_OVERHEAD+__PT_PSW(16,%r15),__LC_RST_OLD_PSW
+ GET_LC %r13
+ mvc STACK_FRAME_OVERHEAD+__PT_R15(8,%r15),__LC_SAVE_AREA_RESTART(%r13)
+ mvc STACK_FRAME_OVERHEAD+__PT_PSW(16,%r15),__LC_RST_OLD_PSW(%r13)
xc 0(STACK_FRAME_OVERHEAD,%r15),0(%r15)
- lg %r1,__LC_RESTART_FN # load fn, parm & source cpu
- lg %r2,__LC_RESTART_DATA
- lgf %r3,__LC_RESTART_SOURCE
+ lg %r1,__LC_RESTART_FN(%r13) # load fn, parm & source cpu
+ lg %r2,__LC_RESTART_DATA(%r13)
+ lgf %r3,__LC_RESTART_SOURCE(%r13)
ltgr %r3,%r3 # test source cpu address
jm 1f # negative -> skip source stop
0: sigp %r4,%r3,SIGP_SENSE # sigp sense to source cpu
@@ -590,7 +608,8 @@ SYM_CODE_END(restart_int_handler)
* Setup a pt_regs so that show_trace can provide a good call trace.
*/
SYM_CODE_START(stack_overflow)
- lg %r15,__LC_NODAT_STACK # change to panic stack
+ GET_LC %r15
+ lg %r15,__LC_NODAT_STACK(%r15) # change to panic stack
la %r11,STACK_FRAME_OVERHEAD(%r15)
stmg %r0,%r7,__PT_R0(%r11)
stmg %r8,%r9,__PT_PSW(%r11)
diff --git a/arch/s390/kernel/fpu.c b/arch/s390/kernel/fpu.c
index fa90bbdc5ef9..6f2e87920288 100644
--- a/arch/s390/kernel/fpu.c
+++ b/arch/s390/kernel/fpu.c
@@ -113,7 +113,7 @@ void load_fpu_state(struct fpu *state, int flags)
int mask;
if (flags & KERNEL_FPC)
- fpu_lfpc(&state->fpc);
+ fpu_lfpc_safe(&state->fpc);
if (!cpu_has_vx()) {
if (flags & KERNEL_VXR_V0V7)
load_fp_regs_vx(state->vxrs);
diff --git a/arch/s390/kernel/ftrace.c b/arch/s390/kernel/ftrace.c
index ddf2ee47cb87..0bd6adc40a34 100644
--- a/arch/s390/kernel/ftrace.c
+++ b/arch/s390/kernel/ftrace.c
@@ -12,6 +12,7 @@
#include <linux/ftrace.h>
#include <linux/kernel.h>
#include <linux/types.h>
+#include <linux/kmsan-checks.h>
#include <linux/kprobes.h>
#include <linux/execmem.h>
#include <trace/syscall.h>
@@ -303,6 +304,7 @@ void kprobe_ftrace_handler(unsigned long ip, unsigned long parent_ip,
if (bit < 0)
return;
+ kmsan_unpoison_memory(fregs, sizeof(*fregs));
regs = ftrace_get_regs(fregs);
p = get_kprobe((kprobe_opcode_t *)ip);
if (!regs || unlikely(!p) || kprobe_disabled(p))
diff --git a/arch/s390/kernel/head64.S b/arch/s390/kernel/head64.S
index 45413b04efc5..396034b2fe67 100644
--- a/arch/s390/kernel/head64.S
+++ b/arch/s390/kernel/head64.S
@@ -10,6 +10,7 @@
#include <linux/init.h>
#include <linux/linkage.h>
+#include <asm/lowcore.h>
#include <asm/asm-offsets.h>
#include <asm/thread_info.h>
#include <asm/page.h>
@@ -18,14 +19,15 @@
__HEAD
SYM_CODE_START(startup_continue)
larl %r1,tod_clock_base
- mvc 0(16,%r1),__LC_BOOT_CLOCK
+ GET_LC %r2
+ mvc 0(16,%r1),__LC_BOOT_CLOCK(%r2)
#
# Setup stack
#
larl %r14,init_task
- stg %r14,__LC_CURRENT
+ stg %r14,__LC_CURRENT(%r2)
larl %r15,init_thread_union+STACK_INIT_OFFSET
- stg %r15,__LC_KERNEL_STACK
+ stg %r15,__LC_KERNEL_STACK(%r2)
brasl %r14,sclp_early_adjust_va # allow sclp_early_printk
brasl %r14,startup_init # s390 specific early init
brasl %r14,start_kernel # common init code
diff --git a/arch/s390/kernel/idle.c b/arch/s390/kernel/idle.c
index af9c97c0ad73..39cb8d0ae348 100644
--- a/arch/s390/kernel/idle.c
+++ b/arch/s390/kernel/idle.c
@@ -24,6 +24,7 @@ static DEFINE_PER_CPU(struct s390_idle_data, s390_idle);
void account_idle_time_irq(void)
{
struct s390_idle_data *idle = this_cpu_ptr(&s390_idle);
+ struct lowcore *lc = get_lowcore();
unsigned long idle_time;
u64 cycles_new[8];
int i;
@@ -34,13 +35,13 @@ void account_idle_time_irq(void)
this_cpu_add(mt_cycles[i], cycles_new[i] - idle->mt_cycles_enter[i]);
}
- idle_time = S390_lowcore.int_clock - idle->clock_idle_enter;
+ idle_time = lc->int_clock - idle->clock_idle_enter;
- S390_lowcore.steal_timer += idle->clock_idle_enter - S390_lowcore.last_update_clock;
- S390_lowcore.last_update_clock = S390_lowcore.int_clock;
+ lc->steal_timer += idle->clock_idle_enter - lc->last_update_clock;
+ lc->last_update_clock = lc->int_clock;
- S390_lowcore.system_timer += S390_lowcore.last_update_timer - idle->timer_idle_enter;
- S390_lowcore.last_update_timer = S390_lowcore.sys_enter_timer;
+ lc->system_timer += lc->last_update_timer - idle->timer_idle_enter;
+ lc->last_update_timer = lc->sys_enter_timer;
/* Account time spent with enabled wait psw loaded as idle time. */
WRITE_ONCE(idle->idle_time, READ_ONCE(idle->idle_time) + idle_time);
diff --git a/arch/s390/kernel/ipl.c b/arch/s390/kernel/ipl.c
index 3a7d6e172211..f17bb7bf9392 100644
--- a/arch/s390/kernel/ipl.c
+++ b/arch/s390/kernel/ipl.c
@@ -2112,7 +2112,7 @@ void do_restart(void *arg)
tracing_off();
debug_locks_off();
lgr_info_log();
- smp_call_online_cpu(__do_restart, arg);
+ smp_call_ipl_cpu(__do_restart, arg);
}
/* on halt */
diff --git a/arch/s390/kernel/irq.c b/arch/s390/kernel/irq.c
index 9acc6630abd3..1af5a08d72ab 100644
--- a/arch/s390/kernel/irq.c
+++ b/arch/s390/kernel/irq.c
@@ -100,8 +100,8 @@ static const struct irq_class irqclass_sub_desc[] = {
static void do_IRQ(struct pt_regs *regs, int irq)
{
- if (tod_after_eq(S390_lowcore.int_clock,
- S390_lowcore.clock_comparator))
+ if (tod_after_eq(get_lowcore()->int_clock,
+ get_lowcore()->clock_comparator))
/* Serve timer interrupts first. */
clock_comparator_work();
generic_handle_irq(irq);
@@ -111,7 +111,7 @@ static int on_async_stack(void)
{
unsigned long frame = current_frame_address();
- return ((S390_lowcore.async_stack ^ frame) & ~(THREAD_SIZE - 1)) == 0;
+ return ((get_lowcore()->async_stack ^ frame) & ~(THREAD_SIZE - 1)) == 0;
}
static void do_irq_async(struct pt_regs *regs, int irq)
@@ -119,7 +119,7 @@ static void do_irq_async(struct pt_regs *regs, int irq)
if (on_async_stack()) {
do_IRQ(regs, irq);
} else {
- call_on_stack(2, S390_lowcore.async_stack, void, do_IRQ,
+ call_on_stack(2, get_lowcore()->async_stack, void, do_IRQ,
struct pt_regs *, regs, int, irq);
}
}
@@ -153,8 +153,8 @@ void noinstr do_io_irq(struct pt_regs *regs)
set_cpu_flag(CIF_NOHZ_DELAY);
do {
- regs->tpi_info = S390_lowcore.tpi_info;
- if (S390_lowcore.tpi_info.adapter_IO)
+ regs->tpi_info = get_lowcore()->tpi_info;
+ if (get_lowcore()->tpi_info.adapter_IO)
do_irq_async(regs, THIN_INTERRUPT);
else
do_irq_async(regs, IO_INTERRUPT);
@@ -183,9 +183,9 @@ void noinstr do_ext_irq(struct pt_regs *regs)
current->thread.last_break = regs->last_break;
}
- regs->int_code = S390_lowcore.ext_int_code_addr;
- regs->int_parm = S390_lowcore.ext_params;
- regs->int_parm_long = S390_lowcore.ext_params2;
+ regs->int_code = get_lowcore()->ext_int_code_addr;
+ regs->int_parm = get_lowcore()->ext_params;
+ regs->int_parm_long = get_lowcore()->ext_params2;
from_idle = test_and_clear_cpu_flag(CIF_ENABLED_WAIT);
if (from_idle)
diff --git a/arch/s390/kernel/machine_kexec.c b/arch/s390/kernel/machine_kexec.c
index 3aee98efc374..8f681ccfb83a 100644
--- a/arch/s390/kernel/machine_kexec.c
+++ b/arch/s390/kernel/machine_kexec.c
@@ -52,7 +52,7 @@ static void __do_machine_kdump(void *data)
purgatory = (purgatory_t)image->start;
/* store_status() saved the prefix register to lowcore */
- prefix = (unsigned long) S390_lowcore.prefixreg_save_area;
+ prefix = (unsigned long)get_lowcore()->prefixreg_save_area;
/* Now do the reset */
s390_reset_system();
@@ -62,7 +62,7 @@ static void __do_machine_kdump(void *data)
* This need to be done *after* s390_reset_system set the
* prefix register of this CPU to zero
*/
- memcpy(absolute_pointer(__LC_FPREGS_SAVE_AREA),
+ memcpy(absolute_pointer(get_lowcore()->floating_pt_save_area),
phys_to_virt(prefix + __LC_FPREGS_SAVE_AREA), 512);
call_nodat(1, int, purgatory, int, 1);
@@ -91,7 +91,7 @@ static noinline void __machine_kdump(void *image)
continue;
}
/* Store status of the boot CPU */
- mcesa = __va(S390_lowcore.mcesad & MCESA_ORIGIN_MASK);
+ mcesa = __va(get_lowcore()->mcesad & MCESA_ORIGIN_MASK);
if (cpu_has_vx())
save_vx_regs((__vector128 *) mcesa->vector_save_area);
if (MACHINE_HAS_GS) {
diff --git a/arch/s390/kernel/nmi.c b/arch/s390/kernel/nmi.c
index 230d010bac9b..fbd218b6fc8e 100644
--- a/arch/s390/kernel/nmi.c
+++ b/arch/s390/kernel/nmi.c
@@ -117,6 +117,7 @@ static __always_inline char *u64_to_hex(char *dest, u64 val)
static notrace void s390_handle_damage(void)
{
+ struct lowcore *lc = get_lowcore();
union ctlreg0 cr0, cr0_new;
char message[100];
psw_t psw_save;
@@ -125,7 +126,7 @@ static notrace void s390_handle_damage(void)
smp_emergency_stop();
diag_amode31_ops.diag308_reset();
ptr = nmi_puts(message, "System stopped due to unrecoverable machine check, code: 0x");
- u64_to_hex(ptr, S390_lowcore.mcck_interruption_code);
+ u64_to_hex(ptr, lc->mcck_interruption_code);
/*
* Disable low address protection and make machine check new PSW a
@@ -135,17 +136,17 @@ static notrace void s390_handle_damage(void)
cr0_new = cr0;
cr0_new.lap = 0;
local_ctl_load(0, &cr0_new.reg);
- psw_save = S390_lowcore.mcck_new_psw;
- psw_bits(S390_lowcore.mcck_new_psw).io = 0;
- psw_bits(S390_lowcore.mcck_new_psw).ext = 0;
- psw_bits(S390_lowcore.mcck_new_psw).wait = 1;
+ psw_save = lc->mcck_new_psw;
+ psw_bits(lc->mcck_new_psw).io = 0;
+ psw_bits(lc->mcck_new_psw).ext = 0;
+ psw_bits(lc->mcck_new_psw).wait = 1;
sclp_emergency_printk(message);
/*
* Restore machine check new PSW and control register 0 to original
* values. This makes possible system dump analysis easier.
*/
- S390_lowcore.mcck_new_psw = psw_save;
+ lc->mcck_new_psw = psw_save;
local_ctl_load(0, &cr0.reg);
disabled_wait();
while (1);
@@ -226,7 +227,7 @@ static bool notrace nmi_registers_valid(union mci mci)
/*
* Set the clock comparator register to the next expected value.
*/
- set_clock_comparator(S390_lowcore.clock_comparator);
+ set_clock_comparator(get_lowcore()->clock_comparator);
if (!mci.gr || !mci.fp || !mci.fc)
return false;
/*
@@ -252,7 +253,7 @@ static bool notrace nmi_registers_valid(union mci mci)
* check handling must take care of this. The host values are saved by
* KVM and are not affected.
*/
- cr2.reg = S390_lowcore.cregs_save_area[2];
+ cr2.reg = get_lowcore()->cregs_save_area[2];
if (cr2.gse && !mci.gs && !test_cpu_flag(CIF_MCCK_GUEST))
return false;
if (!mci.ms || !mci.pm || !mci.ia)
@@ -278,11 +279,10 @@ static void notrace s390_backup_mcck_info(struct pt_regs *regs)
sie_page = container_of(sie_block, struct sie_page, sie_block);
mcck_backup = &sie_page->mcck_info;
- mcck_backup->mcic = S390_lowcore.mcck_interruption_code &
+ mcck_backup->mcic = get_lowcore()->mcck_interruption_code &
~(MCCK_CODE_CP | MCCK_CODE_EXT_DAMAGE);
- mcck_backup->ext_damage_code = S390_lowcore.external_damage_code;
- mcck_backup->failing_storage_address
- = S390_lowcore.failing_storage_address;
+ mcck_backup->ext_damage_code = get_lowcore()->external_damage_code;
+ mcck_backup->failing_storage_address = get_lowcore()->failing_storage_address;
}
NOKPROBE_SYMBOL(s390_backup_mcck_info);
@@ -302,6 +302,7 @@ void notrace s390_do_machine_check(struct pt_regs *regs)
static int ipd_count;
static DEFINE_SPINLOCK(ipd_lock);
static unsigned long long last_ipd;
+ struct lowcore *lc = get_lowcore();
struct mcck_struct *mcck;
unsigned long long tmp;
irqentry_state_t irq_state;
@@ -314,7 +315,7 @@ void notrace s390_do_machine_check(struct pt_regs *regs)
if (user_mode(regs))
update_timer_mcck();
inc_irq_stat(NMI_NMI);
- mci.val = S390_lowcore.mcck_interruption_code;
+ mci.val = lc->mcck_interruption_code;
mcck = this_cpu_ptr(&cpu_mcck);
/*
@@ -382,9 +383,9 @@ void notrace s390_do_machine_check(struct pt_regs *regs)
}
if (mci.ed && mci.ec) {
/* External damage */
- if (S390_lowcore.external_damage_code & (1U << ED_STP_SYNC))
+ if (lc->external_damage_code & (1U << ED_STP_SYNC))
mcck->stp_queue |= stp_sync_check();
- if (S390_lowcore.external_damage_code & (1U << ED_STP_ISLAND))
+ if (lc->external_damage_code & (1U << ED_STP_ISLAND))
mcck->stp_queue |= stp_island_check();
mcck_pending = 1;
}
diff --git a/arch/s390/kernel/nospec-branch.c b/arch/s390/kernel/nospec-branch.c
index 9b8c24ebb008..e11ec15960a1 100644
--- a/arch/s390/kernel/nospec-branch.c
+++ b/arch/s390/kernel/nospec-branch.c
@@ -4,6 +4,8 @@
#include <linux/cpu.h>
#include <asm/nospec-branch.h>
+int nobp = IS_ENABLED(CONFIG_KERNEL_NOBP);
+
static int __init nobp_setup_early(char *str)
{
bool enabled;
@@ -17,11 +19,11 @@ static int __init nobp_setup_early(char *str)
* The user explicitly requested nobp=1, enable it and
* disable the expoline support.
*/
- __set_facility(82, alt_stfle_fac_list);
+ nobp = 1;
if (IS_ENABLED(CONFIG_EXPOLINE))
nospec_disable = 1;
} else {
- __clear_facility(82, alt_stfle_fac_list);
+ nobp = 0;
}
return 0;
}
@@ -29,7 +31,7 @@ early_param("nobp", nobp_setup_early);
static int __init nospec_setup_early(char *str)
{
- __clear_facility(82, alt_stfle_fac_list);
+ nobp = 0;
return 0;
}
early_param("nospec", nospec_setup_early);
@@ -40,7 +42,7 @@ static int __init nospec_report(void)
pr_info("Spectre V2 mitigation: etokens\n");
if (nospec_uses_trampoline())
pr_info("Spectre V2 mitigation: execute trampolines\n");
- if (__test_facility(82, alt_stfle_fac_list))
+ if (nobp_enabled())
pr_info("Spectre V2 mitigation: limited branch prediction\n");
return 0;
}
@@ -66,14 +68,14 @@ void __init nospec_auto_detect(void)
*/
if (__is_defined(CC_USING_EXPOLINE))
nospec_disable = 1;
- __clear_facility(82, alt_stfle_fac_list);
+ nobp = 0;
} else if (__is_defined(CC_USING_EXPOLINE)) {
/*
* The kernel has been compiled with expolines.
* Keep expolines enabled and disable nobp.
*/
nospec_disable = 0;
- __clear_facility(82, alt_stfle_fac_list);
+ nobp = 0;
}
/*
* If the kernel has not been compiled with expolines the
@@ -86,7 +88,7 @@ static int __init spectre_v2_setup_early(char *str)
{
if (str && !strncmp(str, "on", 2)) {
nospec_disable = 0;
- __clear_facility(82, alt_stfle_fac_list);
+ nobp = 0;
}
if (str && !strncmp(str, "off", 3))
nospec_disable = 1;
diff --git a/arch/s390/kernel/nospec-sysfs.c b/arch/s390/kernel/nospec-sysfs.c
index 52d4353188ad..a95188818637 100644
--- a/arch/s390/kernel/nospec-sysfs.c
+++ b/arch/s390/kernel/nospec-sysfs.c
@@ -17,7 +17,7 @@ ssize_t cpu_show_spectre_v2(struct device *dev,
return sprintf(buf, "Mitigation: etokens\n");
if (nospec_uses_trampoline())
return sprintf(buf, "Mitigation: execute trampolines\n");
- if (__test_facility(82, alt_stfle_fac_list))
+ if (nobp_enabled())
return sprintf(buf, "Mitigation: limited branch prediction\n");
return sprintf(buf, "Vulnerable\n");
}
diff --git a/arch/s390/kernel/perf_cpum_cf.c b/arch/s390/kernel/perf_cpum_cf.c
index 1434642e9cba..6968be98af11 100644
--- a/arch/s390/kernel/perf_cpum_cf.c
+++ b/arch/s390/kernel/perf_cpum_cf.c
@@ -556,25 +556,31 @@ static int cfdiag_diffctr(struct cpu_cf_events *cpuhw, unsigned long auth)
struct cf_trailer_entry *trailer_start, *trailer_stop;
struct cf_ctrset_entry *ctrstart, *ctrstop;
size_t offset = 0;
+ int i;
- auth &= (1 << CPUMF_LCCTL_ENABLE_SHIFT) - 1;
- do {
+ for (i = CPUMF_CTR_SET_BASIC; i < CPUMF_CTR_SET_MAX; ++i) {
ctrstart = (struct cf_ctrset_entry *)(cpuhw->start + offset);
ctrstop = (struct cf_ctrset_entry *)(cpuhw->stop + offset);
+ /* Counter set not authorized */
+ if (!(auth & cpumf_ctr_ctl[i]))
+ continue;
+ /* Counter set size zero was not saved */
+ if (!cpum_cf_read_setsize(i))
+ continue;
+
if (memcmp(ctrstop, ctrstart, sizeof(*ctrstop))) {
pr_err_once("cpum_cf_diag counter set compare error "
"in set %i\n", ctrstart->set);
return 0;
}
- auth &= ~cpumf_ctr_ctl[ctrstart->set];
if (ctrstart->def == CF_DIAG_CTRSET_DEF) {
cfdiag_diffctrset((u64 *)(ctrstart + 1),
(u64 *)(ctrstop + 1), ctrstart->ctr);
offset += ctrstart->ctr * sizeof(u64) +
sizeof(*ctrstart);
}
- } while (ctrstart->def && auth);
+ }
/* Save time_stamp from start of event in stop's trailer */
trailer_start = (struct cf_trailer_entry *)(cpuhw->start + offset);
diff --git a/arch/s390/kernel/perf_cpum_sf.c b/arch/s390/kernel/perf_cpum_sf.c
index 06efad5b4f93..736c1d9632dd 100644
--- a/arch/s390/kernel/perf_cpum_sf.c
+++ b/arch/s390/kernel/perf_cpum_sf.c
@@ -1022,7 +1022,7 @@ static void cpumsf_pmu_enable(struct pmu *pmu)
}
/* Load current program parameter */
- lpp(&S390_lowcore.lpp);
+ lpp(&get_lowcore()->lpp);
debug_sprintf_event(sfdbg, 6, "%s: es %i cs %i ed %i cd %i "
"interval %#lx tear %#lx dear %#lx\n", __func__,
diff --git a/arch/s390/kernel/perf_pai_crypto.c b/arch/s390/kernel/perf_pai_crypto.c
index 4ad472d130a3..2f5a20e300f6 100644
--- a/arch/s390/kernel/perf_pai_crypto.c
+++ b/arch/s390/kernel/perf_pai_crypto.c
@@ -36,8 +36,8 @@ struct paicrypt_map {
struct pai_userdata *save; /* Page to store no-zero counters */
unsigned int active_events; /* # of PAI crypto users */
refcount_t refcnt; /* Reference count mapped buffers */
- enum paievt_mode mode; /* Type of event */
struct perf_event *event; /* Perf event for sampling */
+ struct list_head syswide_list; /* List system-wide sampling events */
};
struct paicrypt_mapptr {
@@ -84,20 +84,16 @@ static DEFINE_MUTEX(pai_reserve_mutex);
/* Adjust usage counters and remove allocated memory when all users are
* gone.
*/
-static void paicrypt_event_destroy(struct perf_event *event)
+static void paicrypt_event_destroy_cpu(struct perf_event *event, int cpu)
{
- struct paicrypt_mapptr *mp = per_cpu_ptr(paicrypt_root.mapptr,
- event->cpu);
+ struct paicrypt_mapptr *mp = per_cpu_ptr(paicrypt_root.mapptr, cpu);
struct paicrypt_map *cpump = mp->mapptr;
- static_branch_dec(&pai_key);
mutex_lock(&pai_reserve_mutex);
- debug_sprintf_event(cfm_dbg, 5, "%s event %#llx cpu %d users %d"
- " mode %d refcnt %u\n", __func__,
- event->attr.config, event->cpu,
- cpump->active_events, cpump->mode,
+ debug_sprintf_event(cfm_dbg, 5, "%s event %#llx cpu %d users %d "
+ "refcnt %u\n", __func__, event->attr.config,
+ event->cpu, cpump->active_events,
refcount_read(&cpump->refcnt));
- free_page(PAI_SAVE_AREA(event));
if (refcount_dec_and_test(&cpump->refcnt)) {
debug_sprintf_event(cfm_dbg, 4, "%s page %#lx save %p\n",
__func__, (unsigned long)cpump->page,
@@ -111,6 +107,23 @@ static void paicrypt_event_destroy(struct perf_event *event)
mutex_unlock(&pai_reserve_mutex);
}
+static void paicrypt_event_destroy(struct perf_event *event)
+{
+ int cpu;
+
+ static_branch_dec(&pai_key);
+ free_page(PAI_SAVE_AREA(event));
+ if (event->cpu == -1) {
+ struct cpumask *mask = PAI_CPU_MASK(event);
+
+ for_each_cpu(cpu, mask)
+ paicrypt_event_destroy_cpu(event, cpu);
+ kfree(mask);
+ } else {
+ paicrypt_event_destroy_cpu(event, event->cpu);
+ }
+}
+
static u64 paicrypt_getctr(unsigned long *page, int nr, bool kernel)
{
if (kernel)
@@ -156,23 +169,15 @@ static u64 paicrypt_getall(struct perf_event *event)
return sum;
}
-/* Used to avoid races in checking concurrent access of counting and
- * sampling for crypto events
- *
- * Only one instance of event pai_crypto/CRYPTO_ALL/ for sampling is
- * allowed and when this event is running, no counting event is allowed.
- * Several counting events are allowed in parallel, but no sampling event
- * is allowed while one (or more) counting events are running.
- *
+/* Check concurrent access of counting and sampling for crypto events.
* This function is called in process context and it is save to block.
* When the event initialization functions fails, no other call back will
* be invoked.
*
* Allocate the memory for the event.
*/
-static struct paicrypt_map *paicrypt_busy(struct perf_event *event)
+static struct paicrypt_map *paicrypt_busy(struct perf_event *event, int cpu)
{
- struct perf_event_attr *a = &event->attr;
struct paicrypt_map *cpump = NULL;
struct paicrypt_mapptr *mp;
int rc;
@@ -185,7 +190,7 @@ static struct paicrypt_map *paicrypt_busy(struct perf_event *event)
goto unlock;
/* Allocate node for this event */
- mp = per_cpu_ptr(paicrypt_root.mapptr, event->cpu);
+ mp = per_cpu_ptr(paicrypt_root.mapptr, cpu);
cpump = mp->mapptr;
if (!cpump) { /* Paicrypt_map allocated? */
cpump = kzalloc(sizeof(*cpump), GFP_KERNEL);
@@ -193,25 +198,9 @@ static struct paicrypt_map *paicrypt_busy(struct perf_event *event)
rc = -ENOMEM;
goto free_root;
}
+ INIT_LIST_HEAD(&cpump->syswide_list);
}
- if (a->sample_period) { /* Sampling requested */
- if (cpump->mode != PAI_MODE_NONE)
- rc = -EBUSY; /* ... sampling/counting active */
- } else { /* Counting requested */
- if (cpump->mode == PAI_MODE_SAMPLING)
- rc = -EBUSY; /* ... and sampling active */
- }
- /*
- * This error case triggers when there is a conflict:
- * Either sampling requested and counting already active, or visa
- * versa. Therefore the struct paicrypto_map for this CPU is
- * needed or the error could not have occurred. Only adjust root
- * node refcount.
- */
- if (rc)
- goto free_root;
-
/* Allocate memory for counter page and counter extraction.
* Only the first counting event has to allocate a page.
*/
@@ -235,26 +224,58 @@ static struct paicrypt_map *paicrypt_busy(struct perf_event *event)
/* Set mode and reference count */
rc = 0;
refcount_set(&cpump->refcnt, 1);
- cpump->mode = a->sample_period ? PAI_MODE_SAMPLING : PAI_MODE_COUNTING;
mp->mapptr = cpump;
- debug_sprintf_event(cfm_dbg, 5, "%s sample_period %#llx users %d"
- " mode %d refcnt %u page %#lx save %p rc %d\n",
- __func__, a->sample_period, cpump->active_events,
- cpump->mode, refcount_read(&cpump->refcnt),
+ debug_sprintf_event(cfm_dbg, 5, "%s users %d refcnt %u page %#lx "
+ "save %p rc %d\n", __func__, cpump->active_events,
+ refcount_read(&cpump->refcnt),
(unsigned long)cpump->page, cpump->save, rc);
goto unlock;
free_paicrypt_map:
+ /* Undo memory allocation */
kfree(cpump);
mp->mapptr = NULL;
free_root:
paicrypt_root_free();
-
unlock:
mutex_unlock(&pai_reserve_mutex);
return rc ? ERR_PTR(rc) : cpump;
}
+static int paicrypt_event_init_all(struct perf_event *event)
+{
+ struct paicrypt_map *cpump;
+ struct cpumask *maskptr;
+ int cpu, rc = -ENOMEM;
+
+ maskptr = kzalloc(sizeof(*maskptr), GFP_KERNEL);
+ if (!maskptr)
+ goto out;
+
+ for_each_online_cpu(cpu) {
+ cpump = paicrypt_busy(event, cpu);
+ if (IS_ERR(cpump)) {
+ for_each_cpu(cpu, maskptr)
+ paicrypt_event_destroy_cpu(event, cpu);
+ kfree(maskptr);
+ rc = PTR_ERR(cpump);
+ goto out;
+ }
+ cpumask_set_cpu(cpu, maskptr);
+ }
+
+ /*
+ * On error all cpumask are freed and all events have been destroyed.
+ * Save of which CPUs data structures have been allocated for.
+ * Release them in paicrypt_event_destroy call back function
+ * for this event.
+ */
+ PAI_CPU_MASK(event) = maskptr;
+ rc = 0;
+out:
+ return rc;
+}
+
/* Might be called on different CPU than the one the event is intended for. */
static int paicrypt_event_init(struct perf_event *event)
{
@@ -269,10 +290,7 @@ static int paicrypt_event_init(struct perf_event *event)
if (a->config < PAI_CRYPTO_BASE ||
a->config > PAI_CRYPTO_BASE + paicrypt_cnt)
return -EINVAL;
- /* Allow only CPU wide operation, no process context for now. */
- if ((event->attach_state & PERF_ATTACH_TASK) || event->cpu == -1)
- return -ENOENT;
- /* Allow only CRYPTO_ALL for sampling. */
+ /* Allow only CRYPTO_ALL for sampling */
if (a->sample_period && a->config != PAI_CRYPTO_BASE)
return -EINVAL;
/* Get a page to store last counter values for sampling */
@@ -284,13 +302,17 @@ static int paicrypt_event_init(struct perf_event *event)
}
}
- cpump = paicrypt_busy(event);
- if (IS_ERR(cpump)) {
+ if (event->cpu >= 0) {
+ cpump = paicrypt_busy(event, event->cpu);
+ if (IS_ERR(cpump))
+ rc = PTR_ERR(cpump);
+ } else {
+ rc = paicrypt_event_init_all(event);
+ }
+ if (rc) {
free_page(PAI_SAVE_AREA(event));
- rc = PTR_ERR(cpump);
goto out;
}
-
event->destroy = paicrypt_event_destroy;
if (a->sample_period) {
@@ -331,8 +353,14 @@ static void paicrypt_start(struct perf_event *event, int flags)
sum = paicrypt_getall(event); /* Get current value */
local64_set(&event->hw.prev_count, sum);
} else { /* Sampling */
- cpump->event = event;
- perf_sched_cb_inc(event->pmu);
+ memcpy((void *)PAI_SAVE_AREA(event), cpump->page, PAGE_SIZE);
+ /* Enable context switch callback for system-wide sampling */
+ if (!(event->attach_state & PERF_ATTACH_TASK)) {
+ list_add_tail(PAI_SWLIST(event), &cpump->syswide_list);
+ perf_sched_cb_inc(event->pmu);
+ } else {
+ cpump->event = event;
+ }
}
}
@@ -344,7 +372,7 @@ static int paicrypt_add(struct perf_event *event, int flags)
if (++cpump->active_events == 1) {
ccd = virt_to_phys(cpump->page) | PAI_CRYPTO_KERNEL_OFFSET;
- WRITE_ONCE(S390_lowcore.ccd, ccd);
+ WRITE_ONCE(get_lowcore()->ccd, ccd);
local_ctl_set_bit(0, CR0_CRYPTOGRAPHY_COUNTER_BIT);
}
if (flags & PERF_EF_START)
@@ -353,6 +381,7 @@ static int paicrypt_add(struct perf_event *event, int flags)
return 0;
}
+static void paicrypt_have_sample(struct perf_event *, struct paicrypt_map *);
static void paicrypt_stop(struct perf_event *event, int flags)
{
struct paicrypt_mapptr *mp = this_cpu_ptr(paicrypt_root.mapptr);
@@ -361,8 +390,13 @@ static void paicrypt_stop(struct perf_event *event, int flags)
if (!event->attr.sample_period) { /* Counting */
paicrypt_read(event);
} else { /* Sampling */
- perf_sched_cb_dec(event->pmu);
- cpump->event = NULL;
+ if (!(event->attach_state & PERF_ATTACH_TASK)) {
+ perf_sched_cb_dec(event->pmu);
+ list_del(PAI_SWLIST(event));
+ } else {
+ paicrypt_have_sample(event, cpump);
+ cpump->event = NULL;
+ }
}
event->hw.state = PERF_HES_STOPPED;
}
@@ -375,7 +409,7 @@ static void paicrypt_del(struct perf_event *event, int flags)
paicrypt_stop(event, PERF_EF_UPDATE);
if (--cpump->active_events == 0) {
local_ctl_clear_bit(0, CR0_CRYPTOGRAPHY_COUNTER_BIT);
- WRITE_ONCE(S390_lowcore.ccd, 0);
+ WRITE_ONCE(get_lowcore()->ccd, 0);
}
}
@@ -455,23 +489,30 @@ static int paicrypt_push_sample(size_t rawsize, struct paicrypt_map *cpump,
}
/* Check if there is data to be saved on schedule out of a task. */
-static int paicrypt_have_sample(void)
+static void paicrypt_have_sample(struct perf_event *event,
+ struct paicrypt_map *cpump)
{
- struct paicrypt_mapptr *mp = this_cpu_ptr(paicrypt_root.mapptr);
- struct paicrypt_map *cpump = mp->mapptr;
- struct perf_event *event = cpump->event;
size_t rawsize;
- int rc = 0;
if (!event) /* No event active */
- return 0;
+ return;
rawsize = paicrypt_copy(cpump->save, cpump->page,
(unsigned long *)PAI_SAVE_AREA(event),
- cpump->event->attr.exclude_user,
- cpump->event->attr.exclude_kernel);
+ event->attr.exclude_user,
+ event->attr.exclude_kernel);
if (rawsize) /* No incremented counters */
- rc = paicrypt_push_sample(rawsize, cpump, event);
- return rc;
+ paicrypt_push_sample(rawsize, cpump, event);
+}
+
+/* Check if there is data to be saved on schedule out of a task. */
+static void paicrypt_have_samples(void)
+{
+ struct paicrypt_mapptr *mp = this_cpu_ptr(paicrypt_root.mapptr);
+ struct paicrypt_map *cpump = mp->mapptr;
+ struct perf_event *event;
+
+ list_for_each_entry(event, &cpump->syswide_list, hw.tp_list)
+ paicrypt_have_sample(event, cpump);
}
/* Called on schedule-in and schedule-out. No access to event structure,
@@ -480,10 +521,10 @@ static int paicrypt_have_sample(void)
static void paicrypt_sched_task(struct perf_event_pmu_context *pmu_ctx, bool sched_in)
{
/* We started with a clean page on event installation. So read out
- * results on schedule_out and if page was dirty, clear values.
+ * results on schedule_out and if page was dirty, save old values.
*/
if (!sched_in)
- paicrypt_have_sample();
+ paicrypt_have_samples();
}
/* Attribute definitions for paicrypt interface. As with other CPU
@@ -527,7 +568,7 @@ static const struct attribute_group *paicrypt_attr_groups[] = {
/* Performance monitoring unit for mapped counters */
static struct pmu paicrypt = {
- .task_ctx_nr = perf_invalid_context,
+ .task_ctx_nr = perf_hw_context,
.event_init = paicrypt_event_init,
.add = paicrypt_add,
.del = paicrypt_del,
diff --git a/arch/s390/kernel/perf_pai_ext.c b/arch/s390/kernel/perf_pai_ext.c
index a6da7e0cc7a6..6295531b39a2 100644
--- a/arch/s390/kernel/perf_pai_ext.c
+++ b/arch/s390/kernel/perf_pai_ext.c
@@ -47,11 +47,11 @@ struct paiext_cb { /* PAI extension 1 control block */
struct paiext_map {
unsigned long *area; /* Area for CPU to store counters */
struct pai_userdata *save; /* Area to store non-zero counters */
- enum paievt_mode mode; /* Type of event */
unsigned int active_events; /* # of PAI Extension users */
refcount_t refcnt;
struct perf_event *event; /* Perf event for sampling */
struct paiext_cb *paiext_cb; /* PAI extension control block area */
+ struct list_head syswide_list; /* List system-wide sampling events */
};
struct paiext_mapptr {
@@ -70,6 +70,8 @@ static void paiext_root_free(void)
free_percpu(paiext_root.mapptr);
paiext_root.mapptr = NULL;
}
+ debug_sprintf_event(paiext_dbg, 5, "%s root.refcount %d\n", __func__,
+ refcount_read(&paiext_root.refcnt));
}
/* On initialization of first event also allocate per CPU data dynamically.
@@ -115,20 +117,34 @@ static void paiext_free(struct paiext_mapptr *mp)
}
/* Release the PMU if event is the last perf event */
-static void paiext_event_destroy(struct perf_event *event)
+static void paiext_event_destroy_cpu(struct perf_event *event, int cpu)
{
- struct paiext_mapptr *mp = per_cpu_ptr(paiext_root.mapptr, event->cpu);
+ struct paiext_mapptr *mp = per_cpu_ptr(paiext_root.mapptr, cpu);
struct paiext_map *cpump = mp->mapptr;
- free_page(PAI_SAVE_AREA(event));
mutex_lock(&paiext_reserve_mutex);
if (refcount_dec_and_test(&cpump->refcnt)) /* Last reference gone */
paiext_free(mp);
paiext_root_free();
mutex_unlock(&paiext_reserve_mutex);
- debug_sprintf_event(paiext_dbg, 4, "%s cpu %d mapptr %p\n", __func__,
- event->cpu, mp->mapptr);
+}
+
+static void paiext_event_destroy(struct perf_event *event)
+{
+ int cpu;
+
+ free_page(PAI_SAVE_AREA(event));
+ if (event->cpu == -1) {
+ struct cpumask *mask = PAI_CPU_MASK(event);
+ for_each_cpu(cpu, mask)
+ paiext_event_destroy_cpu(event, cpu);
+ kfree(mask);
+ } else {
+ paiext_event_destroy_cpu(event, event->cpu);
+ }
+ debug_sprintf_event(paiext_dbg, 4, "%s cpu %d\n", __func__,
+ event->cpu);
}
/* Used to avoid races in checking concurrent access of counting and
@@ -145,19 +161,18 @@ static void paiext_event_destroy(struct perf_event *event)
*
* Allocate the memory for the event.
*/
-static int paiext_alloc(struct perf_event_attr *a, struct perf_event *event)
+static int paiext_alloc_cpu(struct perf_event *event, int cpu)
{
struct paiext_mapptr *mp;
struct paiext_map *cpump;
int rc;
mutex_lock(&paiext_reserve_mutex);
-
rc = paiext_root_alloc();
if (rc)
goto unlock;
- mp = per_cpu_ptr(paiext_root.mapptr, event->cpu);
+ mp = per_cpu_ptr(paiext_root.mapptr, cpu);
cpump = mp->mapptr;
if (!cpump) { /* Paiext_map allocated? */
rc = -ENOMEM;
@@ -185,24 +200,13 @@ static int paiext_alloc(struct perf_event_attr *a, struct perf_event *event)
paiext_free(mp);
goto undo;
}
+ INIT_LIST_HEAD(&cpump->syswide_list);
refcount_set(&cpump->refcnt, 1);
- cpump->mode = a->sample_period ? PAI_MODE_SAMPLING
- : PAI_MODE_COUNTING;
+ rc = 0;
} else {
- /* Multiple invocation, check what is active.
- * Supported are multiple counter events or only one sampling
- * event concurrently at any one time.
- */
- if (cpump->mode == PAI_MODE_SAMPLING ||
- (cpump->mode == PAI_MODE_COUNTING && a->sample_period)) {
- rc = -EBUSY;
- goto undo;
- }
refcount_inc(&cpump->refcnt);
}
- rc = 0;
-
undo:
if (rc) {
/* Error in allocation of event, decrement anchor. Since
@@ -217,6 +221,38 @@ unlock:
return rc;
}
+static int paiext_alloc(struct perf_event *event)
+{
+ struct cpumask *maskptr;
+ int cpu, rc = -ENOMEM;
+
+ maskptr = kzalloc(sizeof(*maskptr), GFP_KERNEL);
+ if (!maskptr)
+ goto out;
+
+ for_each_online_cpu(cpu) {
+ rc = paiext_alloc_cpu(event, cpu);
+ if (rc) {
+ for_each_cpu(cpu, maskptr)
+ paiext_event_destroy_cpu(event, cpu);
+ kfree(maskptr);
+ goto out;
+ }
+ cpumask_set_cpu(cpu, maskptr);
+ }
+
+ /*
+ * On error all cpumask are freed and all events have been destroyed.
+ * Save of which CPUs data structures have been allocated for.
+ * Release them in paicrypt_event_destroy call back function
+ * for this event.
+ */
+ PAI_CPU_MASK(event) = maskptr;
+ rc = 0;
+out:
+ return rc;
+}
+
/* The PAI extension 1 control block supports up to 128 entries. Return
* the index within PAIE1_CB given the event number. Also validate event
* number.
@@ -246,9 +282,6 @@ static int paiext_event_init(struct perf_event *event)
rc = paiext_event_valid(event);
if (rc)
return rc;
- /* Allow only CPU wide operation, no process context for now. */
- if ((event->attach_state & PERF_ATTACH_TASK) || event->cpu == -1)
- return -ENOENT;
/* Allow only event NNPA_ALL for sampling. */
if (a->sample_period && a->config != PAI_NNPA_BASE)
return -EINVAL;
@@ -262,7 +295,10 @@ static int paiext_event_init(struct perf_event *event)
return -ENOMEM;
}
- rc = paiext_alloc(a, event);
+ if (event->cpu >= 0)
+ rc = paiext_alloc_cpu(event, event->cpu);
+ else
+ rc = paiext_alloc(event);
if (rc) {
free_page(PAI_SAVE_AREA(event));
return rc;
@@ -334,8 +370,15 @@ static void paiext_start(struct perf_event *event, int flags)
sum = paiext_getall(event); /* Get current value */
local64_set(&event->hw.prev_count, sum);
} else { /* Sampling */
- cpump->event = event;
- perf_sched_cb_inc(event->pmu);
+ memcpy((void *)PAI_SAVE_AREA(event), cpump->area,
+ PAIE1_CTRBLOCK_SZ);
+ /* Enable context switch callback for system-wide sampling */
+ if (!(event->attach_state & PERF_ATTACH_TASK)) {
+ list_add_tail(PAI_SWLIST(event), &cpump->syswide_list);
+ perf_sched_cb_inc(event->pmu);
+ } else {
+ cpump->event = event;
+ }
}
}
@@ -346,12 +389,10 @@ static int paiext_add(struct perf_event *event, int flags)
struct paiext_cb *pcb = cpump->paiext_cb;
if (++cpump->active_events == 1) {
- S390_lowcore.aicd = virt_to_phys(cpump->paiext_cb);
+ get_lowcore()->aicd = virt_to_phys(cpump->paiext_cb);
pcb->acc = virt_to_phys(cpump->area) | 0x1;
/* Enable CPU instruction lookup for PAIE1 control block */
local_ctl_set_bit(0, CR0_PAI_EXTENSION_BIT);
- debug_sprintf_event(paiext_dbg, 4, "%s 1508 %llx acc %llx\n",
- __func__, S390_lowcore.aicd, pcb->acc);
}
if (flags & PERF_EF_START)
paiext_start(event, PERF_EF_RELOAD);
@@ -359,6 +400,7 @@ static int paiext_add(struct perf_event *event, int flags)
return 0;
}
+static void paiext_have_sample(struct perf_event *, struct paiext_map *);
static void paiext_stop(struct perf_event *event, int flags)
{
struct paiext_mapptr *mp = this_cpu_ptr(paiext_root.mapptr);
@@ -367,8 +409,13 @@ static void paiext_stop(struct perf_event *event, int flags)
if (!event->attr.sample_period) { /* Counting */
paiext_read(event);
} else { /* Sampling */
- perf_sched_cb_dec(event->pmu);
- cpump->event = NULL;
+ if (!(event->attach_state & PERF_ATTACH_TASK)) {
+ list_del(PAI_SWLIST(event));
+ perf_sched_cb_dec(event->pmu);
+ } else {
+ paiext_have_sample(event, cpump);
+ cpump->event = NULL;
+ }
}
event->hw.state = PERF_HES_STOPPED;
}
@@ -384,9 +431,7 @@ static void paiext_del(struct perf_event *event, int flags)
/* Disable CPU instruction lookup for PAIE1 control block */
local_ctl_clear_bit(0, CR0_PAI_EXTENSION_BIT);
pcb->acc = 0;
- S390_lowcore.aicd = 0;
- debug_sprintf_event(paiext_dbg, 4, "%s 1508 %llx acc %llx\n",
- __func__, S390_lowcore.aicd, pcb->acc);
+ get_lowcore()->aicd = 0;
}
}
@@ -470,21 +515,28 @@ static int paiext_push_sample(size_t rawsize, struct paiext_map *cpump,
}
/* Check if there is data to be saved on schedule out of a task. */
-static int paiext_have_sample(void)
+static void paiext_have_sample(struct perf_event *event,
+ struct paiext_map *cpump)
{
- struct paiext_mapptr *mp = this_cpu_ptr(paiext_root.mapptr);
- struct paiext_map *cpump = mp->mapptr;
- struct perf_event *event = cpump->event;
size_t rawsize;
- int rc = 0;
if (!event)
- return 0;
+ return;
rawsize = paiext_copy(cpump->save, cpump->area,
(unsigned long *)PAI_SAVE_AREA(event));
if (rawsize) /* Incremented counters */
- rc = paiext_push_sample(rawsize, cpump, event);
- return rc;
+ paiext_push_sample(rawsize, cpump, event);
+}
+
+/* Check if there is data to be saved on schedule out of a task. */
+static void paiext_have_samples(void)
+{
+ struct paiext_mapptr *mp = this_cpu_ptr(paiext_root.mapptr);
+ struct paiext_map *cpump = mp->mapptr;
+ struct perf_event *event;
+
+ list_for_each_entry(event, &cpump->syswide_list, hw.tp_list)
+ paiext_have_sample(event, cpump);
}
/* Called on schedule-in and schedule-out. No access to event structure,
@@ -493,10 +545,10 @@ static int paiext_have_sample(void)
static void paiext_sched_task(struct perf_event_pmu_context *pmu_ctx, bool sched_in)
{
/* We started with a clean page on event installation. So read out
- * results on schedule_out and if page was dirty, clear values.
+ * results on schedule_out and if page was dirty, save old values.
*/
if (!sched_in)
- paiext_have_sample();
+ paiext_have_samples();
}
/* Attribute definitions for pai extension1 interface. As with other CPU
@@ -542,7 +594,7 @@ static const struct attribute_group *paiext_attr_groups[] = {
/* Performance monitoring unit for mapped counters */
static struct pmu paiext = {
- .task_ctx_nr = perf_invalid_context,
+ .task_ctx_nr = perf_hw_context,
.event_init = paiext_event_init,
.add = paiext_add,
.del = paiext_del,
diff --git a/arch/s390/kernel/process.c b/arch/s390/kernel/process.c
index d8740631df4b..9637aee43c40 100644
--- a/arch/s390/kernel/process.c
+++ b/arch/s390/kernel/process.c
@@ -71,10 +71,10 @@ void flush_thread(void)
void arch_setup_new_exec(void)
{
- if (S390_lowcore.current_pid != current->pid) {
- S390_lowcore.current_pid = current->pid;
+ if (get_lowcore()->current_pid != current->pid) {
+ get_lowcore()->current_pid = current->pid;
if (test_facility(40))
- lpp(&S390_lowcore.lpp);
+ lpp(&get_lowcore()->lpp);
}
}
diff --git a/arch/s390/kernel/processor.c b/arch/s390/kernel/processor.c
index 65c1464eea4f..5ce9a795a0fe 100644
--- a/arch/s390/kernel/processor.c
+++ b/arch/s390/kernel/processor.c
@@ -17,7 +17,8 @@
#include <linux/mm_types.h>
#include <linux/delay.h>
#include <linux/cpu.h>
-
+#include <linux/smp.h>
+#include <asm/text-patching.h>
#include <asm/diag.h>
#include <asm/facility.h>
#include <asm/elf.h>
@@ -79,6 +80,23 @@ void notrace stop_machine_yield(const struct cpumask *cpumask)
}
}
+static void do_sync_core(void *info)
+{
+ sync_core();
+}
+
+void text_poke_sync(void)
+{
+ on_each_cpu(do_sync_core, NULL, 1);
+}
+
+void text_poke_sync_lock(void)
+{
+ cpus_read_lock();
+ text_poke_sync();
+ cpus_read_unlock();
+}
+
/*
* cpu_init - initializes state that is per-CPU.
*/
diff --git a/arch/s390/kernel/reipl.S b/arch/s390/kernel/reipl.S
index 88087a32ebc6..69fcaf54d5ca 100644
--- a/arch/s390/kernel/reipl.S
+++ b/arch/s390/kernel/reipl.S
@@ -9,6 +9,7 @@
#include <asm/asm-offsets.h>
#include <asm/nospec-insn.h>
#include <asm/sigp.h>
+#include <asm/lowcore.h>
GEN_BR_THUNK %r9
@@ -20,20 +21,15 @@
# r3 = Parameter for function
#
SYM_CODE_START(store_status)
- /* Save register one and load save area base */
- stg %r1,__LC_SAVE_AREA_RESTART
+ STMG_LC %r0,%r15,__LC_GPREGS_SAVE_AREA
/* General purpose registers */
- lghi %r1,__LC_GPREGS_SAVE_AREA
- stmg %r0,%r15,0(%r1)
- mvc 8(8,%r1),__LC_SAVE_AREA_RESTART
+ GET_LC %r13
/* Control registers */
- lghi %r1,__LC_CREGS_SAVE_AREA
- stctg %c0,%c15,0(%r1)
+ stctg %c0,%c15,__LC_CREGS_SAVE_AREA(%r13)
/* Access registers */
- lghi %r1,__LC_AREGS_SAVE_AREA
- stam %a0,%a15,0(%r1)
+ stamy %a0,%a15,__LC_AREGS_SAVE_AREA(%r13)
/* Floating point registers */
- lghi %r1,__LC_FPREGS_SAVE_AREA
+ lay %r1,__LC_FPREGS_SAVE_AREA(%r13)
std %f0, 0x00(%r1)
std %f1, 0x08(%r1)
std %f2, 0x10(%r1)
@@ -51,21 +47,21 @@ SYM_CODE_START(store_status)
std %f14,0x70(%r1)
std %f15,0x78(%r1)
/* Floating point control register */
- lghi %r1,__LC_FP_CREG_SAVE_AREA
+ lay %r1,__LC_FP_CREG_SAVE_AREA(%r13)
stfpc 0(%r1)
/* CPU timer */
- lghi %r1,__LC_CPU_TIMER_SAVE_AREA
+ lay %r1,__LC_CPU_TIMER_SAVE_AREA(%r13)
stpt 0(%r1)
/* Store prefix register */
- lghi %r1,__LC_PREFIX_SAVE_AREA
+ lay %r1,__LC_PREFIX_SAVE_AREA(%r13)
stpx 0(%r1)
/* Clock comparator - seven bytes */
- lghi %r1,__LC_CLOCK_COMP_SAVE_AREA
larl %r4,clkcmp
stckc 0(%r4)
+ lay %r1,__LC_CLOCK_COMP_SAVE_AREA(%r13)
mvc 1(7,%r1),1(%r4)
/* Program status word */
- lghi %r1,__LC_PSW_SAVE_AREA
+ lay %r1,__LC_PSW_SAVE_AREA(%r13)
epsw %r4,%r5
st %r4,0(%r1)
st %r5,4(%r1)
diff --git a/arch/s390/kernel/setup.c b/arch/s390/kernel/setup.c
index 90c2c786bb35..a3fea683b227 100644
--- a/arch/s390/kernel/setup.c
+++ b/arch/s390/kernel/setup.c
@@ -149,13 +149,12 @@ unsigned long __bootdata_preserved(max_mappable);
struct physmem_info __bootdata(physmem_info);
struct vm_layout __bootdata_preserved(vm_layout);
-EXPORT_SYMBOL_GPL(vm_layout);
+EXPORT_SYMBOL(vm_layout);
int __bootdata_preserved(__kaslr_enabled);
unsigned int __bootdata_preserved(zlib_dfltcc_support);
EXPORT_SYMBOL(zlib_dfltcc_support);
u64 __bootdata_preserved(stfle_fac_list[16]);
EXPORT_SYMBOL(stfle_fac_list);
-u64 alt_stfle_fac_list[16];
struct oldmem_data __bootdata_preserved(oldmem_data);
unsigned long VMALLOC_START;
@@ -406,6 +405,7 @@ static void __init setup_lowcore(void)
panic("%s: Failed to allocate %zu bytes align=%zx\n",
__func__, sizeof(*lc), sizeof(*lc));
+ lc->pcpu = (unsigned long)per_cpu_ptr(&pcpu_devices, 0);
lc->restart_psw.mask = PSW_KERNEL_BITS & ~PSW_MASK_DAT;
lc->restart_psw.addr = __pa(restart_int_handler);
lc->external_new_psw.mask = PSW_KERNEL_BITS;
@@ -421,16 +421,16 @@ static void __init setup_lowcore(void)
lc->clock_comparator = clock_comparator_max;
lc->current_task = (unsigned long)&init_task;
lc->lpp = LPP_MAGIC;
- lc->machine_flags = S390_lowcore.machine_flags;
- lc->preempt_count = S390_lowcore.preempt_count;
+ lc->machine_flags = get_lowcore()->machine_flags;
+ lc->preempt_count = get_lowcore()->preempt_count;
nmi_alloc_mcesa_early(&lc->mcesad);
- lc->sys_enter_timer = S390_lowcore.sys_enter_timer;
- lc->exit_timer = S390_lowcore.exit_timer;
- lc->user_timer = S390_lowcore.user_timer;
- lc->system_timer = S390_lowcore.system_timer;
- lc->steal_timer = S390_lowcore.steal_timer;
- lc->last_update_timer = S390_lowcore.last_update_timer;
- lc->last_update_clock = S390_lowcore.last_update_clock;
+ lc->sys_enter_timer = get_lowcore()->sys_enter_timer;
+ lc->exit_timer = get_lowcore()->exit_timer;
+ lc->user_timer = get_lowcore()->user_timer;
+ lc->system_timer = get_lowcore()->system_timer;
+ lc->steal_timer = get_lowcore()->steal_timer;
+ lc->last_update_timer = get_lowcore()->last_update_timer;
+ lc->last_update_clock = get_lowcore()->last_update_clock;
/*
* Allocate the global restart stack which is the same for
* all CPUs in case *one* of them does a PSW restart.
@@ -439,7 +439,7 @@ static void __init setup_lowcore(void)
lc->mcck_stack = stack_alloc_early() + STACK_INIT_OFFSET;
lc->async_stack = stack_alloc_early() + STACK_INIT_OFFSET;
lc->nodat_stack = stack_alloc_early() + STACK_INIT_OFFSET;
- lc->kernel_stack = S390_lowcore.kernel_stack;
+ lc->kernel_stack = get_lowcore()->kernel_stack;
/*
* Set up PSW restart to call ipl.c:do_restart(). Copy the relevant
* restart data to the absolute zero lowcore. This is necessary if
@@ -455,8 +455,8 @@ static void __init setup_lowcore(void)
lc->return_lpswe = gen_lpswe(__LC_RETURN_PSW);
lc->return_mcck_lpswe = gen_lpswe(__LC_RETURN_MCCK_PSW);
lc->preempt_count = PREEMPT_DISABLED;
- lc->kernel_asce = S390_lowcore.kernel_asce;
- lc->user_asce = S390_lowcore.user_asce;
+ lc->kernel_asce = get_lowcore()->kernel_asce;
+ lc->user_asce = get_lowcore()->user_asce;
system_ctlreg_init_save_area(lc);
abs_lc = get_abs_lowcore();
@@ -734,7 +734,23 @@ static void __init memblock_add_physmem_info(void)
}
/*
- * Reserve memory used for lowcore/command line/kernel image.
+ * Reserve memory used for lowcore.
+ */
+static void __init reserve_lowcore(void)
+{
+ void *lowcore_start = get_lowcore();
+ void *lowcore_end = lowcore_start + sizeof(struct lowcore);
+ void *start, *end;
+
+ if ((void *)__identity_base < lowcore_end) {
+ start = max(lowcore_start, (void *)__identity_base);
+ end = min(lowcore_end, (void *)(__identity_base + ident_map_size));
+ memblock_reserve(__pa(start), __pa(end));
+ }
+}
+
+/*
+ * Reserve memory used for absolute lowcore/command line/kernel image.
*/
static void __init reserve_kernel(void)
{
@@ -889,6 +905,9 @@ void __init setup_arch(char **cmdline_p)
else
pr_info("Linux is running as a guest in 64-bit mode\n");
+ if (have_relocated_lowcore())
+ pr_info("Lowcore relocated to 0x%px\n", get_lowcore());
+
log_component_list();
/* Have one command line that is parsed and saved in /proc/cmdline */
@@ -915,6 +934,7 @@ void __init setup_arch(char **cmdline_p)
/* Do some memory reservations *before* memory is added to memblock */
reserve_pgtables();
+ reserve_lowcore();
reserve_kernel();
reserve_initrd();
reserve_certificate_list();
diff --git a/arch/s390/kernel/smp.c b/arch/s390/kernel/smp.c
index 0324649aae0a..fbba37ec53cf 100644
--- a/arch/s390/kernel/smp.c
+++ b/arch/s390/kernel/smp.c
@@ -74,18 +74,15 @@ enum {
CPU_STATE_CONFIGURED,
};
-static DEFINE_PER_CPU(struct cpu *, cpu_device);
-
-struct pcpu {
- unsigned long ec_mask; /* bit mask for ec_xxx functions */
- unsigned long ec_clk; /* sigp timestamp for ec_xxx */
- signed char state; /* physical cpu state */
- signed char polarization; /* physical polarization */
- u16 address; /* physical cpu address */
-};
-
static u8 boot_core_type;
-static struct pcpu pcpu_devices[NR_CPUS];
+DEFINE_PER_CPU(struct pcpu, pcpu_devices);
+/*
+ * Pointer to the pcpu area of the boot CPU. This is required when a restart
+ * interrupt is triggered on an offline CPU. For that case accessing percpu
+ * data with the common primitives does not work, since the percpu offset is
+ * stored in a non existent lowcore.
+ */
+static struct pcpu *ipl_pcpu;
unsigned int smp_cpu_mt_shift;
EXPORT_SYMBOL(smp_cpu_mt_shift);
@@ -176,8 +173,8 @@ static struct pcpu *pcpu_find_address(const struct cpumask *mask, u16 address)
int cpu;
for_each_cpu(cpu, mask)
- if (pcpu_devices[cpu].address == address)
- return pcpu_devices + cpu;
+ if (per_cpu(pcpu_devices, cpu).address == address)
+ return &per_cpu(pcpu_devices, cpu);
return NULL;
}
@@ -203,7 +200,7 @@ static int pcpu_alloc_lowcore(struct pcpu *pcpu, int cpu)
mcck_stack = stack_alloc();
if (!lc || !nodat_stack || !async_stack || !mcck_stack)
goto out;
- memcpy(lc, &S390_lowcore, 512);
+ memcpy(lc, get_lowcore(), 512);
memset((char *) lc + 512, 0, sizeof(*lc) - 512);
lc->async_stack = async_stack + STACK_INIT_OFFSET;
lc->nodat_stack = nodat_stack + STACK_INIT_OFFSET;
@@ -232,13 +229,11 @@ out:
return -ENOMEM;
}
-static void pcpu_free_lowcore(struct pcpu *pcpu)
+static void pcpu_free_lowcore(struct pcpu *pcpu, int cpu)
{
unsigned long async_stack, nodat_stack, mcck_stack;
struct lowcore *lc;
- int cpu;
- cpu = pcpu - pcpu_devices;
lc = lowcore_ptr[cpu];
nodat_stack = lc->nodat_stack - STACK_INIT_OFFSET;
async_stack = lc->async_stack - STACK_INIT_OFFSET;
@@ -261,13 +256,14 @@ static void pcpu_prepare_secondary(struct pcpu *pcpu, int cpu)
cpumask_set_cpu(cpu, &init_mm.context.cpu_attach_mask);
cpumask_set_cpu(cpu, mm_cpumask(&init_mm));
lc->cpu_nr = cpu;
+ lc->pcpu = (unsigned long)pcpu;
lc->restart_flags = RESTART_FLAG_CTLREGS;
lc->spinlock_lockval = arch_spin_lockval(cpu);
lc->spinlock_index = 0;
lc->percpu_offset = __per_cpu_offset[cpu];
- lc->kernel_asce = S390_lowcore.kernel_asce;
+ lc->kernel_asce = get_lowcore()->kernel_asce;
lc->user_asce = s390_invalid_asce;
- lc->machine_flags = S390_lowcore.machine_flags;
+ lc->machine_flags = get_lowcore()->machine_flags;
lc->user_timer = lc->system_timer =
lc->steal_timer = lc->avg_steal_timer = 0;
abs_lc = get_abs_lowcore();
@@ -279,12 +275,10 @@ static void pcpu_prepare_secondary(struct pcpu *pcpu, int cpu)
arch_spin_lock_setup(cpu);
}
-static void pcpu_attach_task(struct pcpu *pcpu, struct task_struct *tsk)
+static void pcpu_attach_task(int cpu, struct task_struct *tsk)
{
struct lowcore *lc;
- int cpu;
- cpu = pcpu - pcpu_devices;
lc = lowcore_ptr[cpu];
lc->kernel_stack = (unsigned long)task_stack_page(tsk) + STACK_INIT_OFFSET;
lc->current_task = (unsigned long)tsk;
@@ -298,18 +292,16 @@ static void pcpu_attach_task(struct pcpu *pcpu, struct task_struct *tsk)
lc->steal_timer = 0;
}
-static void pcpu_start_fn(struct pcpu *pcpu, void (*func)(void *), void *data)
+static void pcpu_start_fn(int cpu, void (*func)(void *), void *data)
{
struct lowcore *lc;
- int cpu;
- cpu = pcpu - pcpu_devices;
lc = lowcore_ptr[cpu];
lc->restart_stack = lc->kernel_stack;
lc->restart_fn = (unsigned long) func;
lc->restart_data = (unsigned long) data;
lc->restart_source = -1U;
- pcpu_sigp_retry(pcpu, SIGP_RESTART, 0);
+ pcpu_sigp_retry(per_cpu_ptr(&pcpu_devices, cpu), SIGP_RESTART, 0);
}
typedef void (pcpu_delegate_fn)(void *);
@@ -322,14 +314,14 @@ static void __pcpu_delegate(pcpu_delegate_fn *func, void *data)
func(data); /* should not return */
}
-static void pcpu_delegate(struct pcpu *pcpu,
+static void pcpu_delegate(struct pcpu *pcpu, int cpu,
pcpu_delegate_fn *func,
void *data, unsigned long stack)
{
struct lowcore *lc, *abs_lc;
unsigned int source_cpu;
- lc = lowcore_ptr[pcpu - pcpu_devices];
+ lc = lowcore_ptr[cpu];
source_cpu = stap();
if (pcpu->address == source_cpu) {
@@ -379,38 +371,22 @@ static int pcpu_set_smt(unsigned int mtid)
smp_cpu_mt_shift = 0;
while (smp_cpu_mtid >= (1U << smp_cpu_mt_shift))
smp_cpu_mt_shift++;
- pcpu_devices[0].address = stap();
+ per_cpu(pcpu_devices, 0).address = stap();
}
return cc;
}
/*
- * Call function on an online CPU.
- */
-void smp_call_online_cpu(void (*func)(void *), void *data)
-{
- struct pcpu *pcpu;
-
- /* Use the current cpu if it is online. */
- pcpu = pcpu_find_address(cpu_online_mask, stap());
- if (!pcpu)
- /* Use the first online cpu. */
- pcpu = pcpu_devices + cpumask_first(cpu_online_mask);
- pcpu_delegate(pcpu, func, data, (unsigned long) restart_stack);
-}
-
-/*
* Call function on the ipl CPU.
*/
void smp_call_ipl_cpu(void (*func)(void *), void *data)
{
struct lowcore *lc = lowcore_ptr[0];
- if (pcpu_devices[0].address == stap())
- lc = &S390_lowcore;
+ if (ipl_pcpu->address == stap())
+ lc = get_lowcore();
- pcpu_delegate(&pcpu_devices[0], func, data,
- lc->nodat_stack);
+ pcpu_delegate(ipl_pcpu, 0, func, data, lc->nodat_stack);
}
int smp_find_processor_id(u16 address)
@@ -418,21 +394,21 @@ int smp_find_processor_id(u16 address)
int cpu;
for_each_present_cpu(cpu)
- if (pcpu_devices[cpu].address == address)
+ if (per_cpu(pcpu_devices, cpu).address == address)
return cpu;
return -1;
}
void schedule_mcck_handler(void)
{
- pcpu_ec_call(pcpu_devices + smp_processor_id(), ec_mcck_pending);
+ pcpu_ec_call(this_cpu_ptr(&pcpu_devices), ec_mcck_pending);
}
bool notrace arch_vcpu_is_preempted(int cpu)
{
if (test_cpu_flag_of(CIF_ENABLED_WAIT, cpu))
return false;
- if (pcpu_running(pcpu_devices + cpu))
+ if (pcpu_running(per_cpu_ptr(&pcpu_devices, cpu)))
return false;
return true;
}
@@ -444,7 +420,7 @@ void notrace smp_yield_cpu(int cpu)
return;
diag_stat_inc_norecursion(DIAG_STAT_X09C);
asm volatile("diag %0,0,0x9c"
- : : "d" (pcpu_devices[cpu].address));
+ : : "d" (per_cpu(pcpu_devices, cpu).address));
}
EXPORT_SYMBOL_GPL(smp_yield_cpu);
@@ -465,7 +441,7 @@ void notrace smp_emergency_stop(void)
end = get_tod_clock() + (1000000UL << 12);
for_each_cpu(cpu, &cpumask) {
- struct pcpu *pcpu = pcpu_devices + cpu;
+ struct pcpu *pcpu = per_cpu_ptr(&pcpu_devices, cpu);
set_bit(ec_stop_cpu, &pcpu->ec_mask);
while (__pcpu_sigp(pcpu->address, SIGP_EMERGENCY_SIGNAL,
0, NULL) == SIGP_CC_BUSY &&
@@ -474,7 +450,7 @@ void notrace smp_emergency_stop(void)
}
while (get_tod_clock() < end) {
for_each_cpu(cpu, &cpumask)
- if (pcpu_stopped(pcpu_devices + cpu))
+ if (pcpu_stopped(per_cpu_ptr(&pcpu_devices, cpu)))
cpumask_clear_cpu(cpu, &cpumask);
if (cpumask_empty(&cpumask))
break;
@@ -489,6 +465,7 @@ NOKPROBE_SYMBOL(smp_emergency_stop);
*/
void smp_send_stop(void)
{
+ struct pcpu *pcpu;
int cpu;
/* Disable all interrupts/machine checks */
@@ -504,8 +481,9 @@ void smp_send_stop(void)
for_each_online_cpu(cpu) {
if (cpu == smp_processor_id())
continue;
- pcpu_sigp_retry(pcpu_devices + cpu, SIGP_STOP, 0);
- while (!pcpu_stopped(pcpu_devices + cpu))
+ pcpu = per_cpu_ptr(&pcpu_devices, cpu);
+ pcpu_sigp_retry(pcpu, SIGP_STOP, 0);
+ while (!pcpu_stopped(pcpu))
cpu_relax();
}
}
@@ -519,7 +497,7 @@ static void smp_handle_ext_call(void)
unsigned long bits;
/* handle bit signal external calls */
- bits = xchg(&pcpu_devices[smp_processor_id()].ec_mask, 0);
+ bits = this_cpu_xchg(pcpu_devices.ec_mask, 0);
if (test_bit(ec_stop_cpu, &bits))
smp_stop_cpu();
if (test_bit(ec_schedule, &bits))
@@ -544,12 +522,12 @@ void arch_send_call_function_ipi_mask(const struct cpumask *mask)
int cpu;
for_each_cpu(cpu, mask)
- pcpu_ec_call(pcpu_devices + cpu, ec_call_function_single);
+ pcpu_ec_call(per_cpu_ptr(&pcpu_devices, cpu), ec_call_function_single);
}
void arch_send_call_function_single_ipi(int cpu)
{
- pcpu_ec_call(pcpu_devices + cpu, ec_call_function_single);
+ pcpu_ec_call(per_cpu_ptr(&pcpu_devices, cpu), ec_call_function_single);
}
/*
@@ -559,13 +537,13 @@ void arch_send_call_function_single_ipi(int cpu)
*/
void arch_smp_send_reschedule(int cpu)
{
- pcpu_ec_call(pcpu_devices + cpu, ec_schedule);
+ pcpu_ec_call(per_cpu_ptr(&pcpu_devices, cpu), ec_schedule);
}
#ifdef CONFIG_IRQ_WORK
void arch_irq_work_raise(void)
{
- pcpu_ec_call(pcpu_devices + smp_processor_id(), ec_irq_work);
+ pcpu_ec_call(this_cpu_ptr(&pcpu_devices), ec_irq_work);
}
#endif
@@ -577,7 +555,7 @@ int smp_store_status(int cpu)
struct pcpu *pcpu;
unsigned long pa;
- pcpu = pcpu_devices + cpu;
+ pcpu = per_cpu_ptr(&pcpu_devices, cpu);
lc = lowcore_ptr[cpu];
pa = __pa(&lc->floating_pt_save_area);
if (__pcpu_sigp_relax(pcpu->address, SIGP_STORE_STATUS_AT_ADDRESS,
@@ -685,17 +663,17 @@ void __init smp_save_dump_secondary_cpus(void)
void smp_cpu_set_polarization(int cpu, int val)
{
- pcpu_devices[cpu].polarization = val;
+ per_cpu(pcpu_devices, cpu).polarization = val;
}
int smp_cpu_get_polarization(int cpu)
{
- return pcpu_devices[cpu].polarization;
+ return per_cpu(pcpu_devices, cpu).polarization;
}
int smp_cpu_get_cpu_address(int cpu)
{
- return pcpu_devices[cpu].address;
+ return per_cpu(pcpu_devices, cpu).address;
}
static void __ref smp_get_core_info(struct sclp_core_info *info, int early)
@@ -719,8 +697,6 @@ static void __ref smp_get_core_info(struct sclp_core_info *info, int early)
}
}
-static int smp_add_present_cpu(int cpu);
-
static int smp_add_core(struct sclp_core_entry *core, cpumask_t *avail,
bool configured, bool early)
{
@@ -736,7 +712,7 @@ static int smp_add_core(struct sclp_core_entry *core, cpumask_t *avail,
for (i = 0; (i <= smp_cpu_mtid) && (cpu < nr_cpu_ids); i++) {
if (pcpu_find_address(cpu_present_mask, address + i))
continue;
- pcpu = pcpu_devices + cpu;
+ pcpu = per_cpu_ptr(&pcpu_devices, cpu);
pcpu->address = address + i;
if (configured)
pcpu->state = CPU_STATE_CONFIGURED;
@@ -744,7 +720,7 @@ static int smp_add_core(struct sclp_core_entry *core, cpumask_t *avail,
pcpu->state = CPU_STATE_STANDBY;
smp_cpu_set_polarization(cpu, POLARIZATION_UNKNOWN);
set_cpu_present(cpu, true);
- if (!early && smp_add_present_cpu(cpu) != 0)
+ if (!early && arch_register_cpu(cpu))
set_cpu_present(cpu, false);
else
nr++;
@@ -771,7 +747,7 @@ static int __smp_rescan_cpus(struct sclp_core_info *info, bool early)
* that all SMT threads get subsequent logical CPU numbers.
*/
if (early) {
- core_id = pcpu_devices[0].address >> smp_cpu_mt_shift;
+ core_id = per_cpu(pcpu_devices, 0).address >> smp_cpu_mt_shift;
for (i = 0; i < info->configured; i++) {
core = &info->core[i];
if (core->core_id == core_id) {
@@ -831,9 +807,6 @@ void __init smp_detect_cpus(void)
s_cpus += smp_cpu_mtid + 1;
}
pr_info("%d configured CPUs, %d standby CPUs\n", c_cpus, s_cpus);
-
- /* Add CPUs present at boot */
- __smp_rescan_cpus(info, true);
memblock_free(info, sizeof(*info));
}
@@ -842,15 +815,16 @@ void __init smp_detect_cpus(void)
*/
static void smp_start_secondary(void *cpuvoid)
{
+ struct lowcore *lc = get_lowcore();
int cpu = raw_smp_processor_id();
- S390_lowcore.last_update_clock = get_tod_clock();
- S390_lowcore.restart_stack = (unsigned long)restart_stack;
- S390_lowcore.restart_fn = (unsigned long)do_restart;
- S390_lowcore.restart_data = 0;
- S390_lowcore.restart_source = -1U;
- S390_lowcore.restart_flags = 0;
- restore_access_regs(S390_lowcore.access_regs_save_area);
+ lc->last_update_clock = get_tod_clock();
+ lc->restart_stack = (unsigned long)restart_stack;
+ lc->restart_fn = (unsigned long)do_restart;
+ lc->restart_data = 0;
+ lc->restart_source = -1U;
+ lc->restart_flags = 0;
+ restore_access_regs(lc->access_regs_save_area);
cpu_init();
rcutree_report_cpu_starting(cpu);
init_cpu_timer();
@@ -873,7 +847,7 @@ static void smp_start_secondary(void *cpuvoid)
/* Upping and downing of CPUs */
int __cpu_up(unsigned int cpu, struct task_struct *tidle)
{
- struct pcpu *pcpu = pcpu_devices + cpu;
+ struct pcpu *pcpu = per_cpu_ptr(&pcpu_devices, cpu);
int rc;
if (pcpu->state != CPU_STATE_CONFIGURED)
@@ -891,8 +865,8 @@ int __cpu_up(unsigned int cpu, struct task_struct *tidle)
*/
system_ctlreg_lock();
pcpu_prepare_secondary(pcpu, cpu);
- pcpu_attach_task(pcpu, tidle);
- pcpu_start_fn(pcpu, smp_start_secondary, NULL);
+ pcpu_attach_task(cpu, tidle);
+ pcpu_start_fn(cpu, smp_start_secondary, NULL);
/* Wait until cpu puts itself in the online & active maps */
while (!cpu_online(cpu))
cpu_relax();
@@ -937,18 +911,19 @@ void __cpu_die(unsigned int cpu)
struct pcpu *pcpu;
/* Wait until target cpu is down */
- pcpu = pcpu_devices + cpu;
+ pcpu = per_cpu_ptr(&pcpu_devices, cpu);
while (!pcpu_stopped(pcpu))
cpu_relax();
- pcpu_free_lowcore(pcpu);
+ pcpu_free_lowcore(pcpu, cpu);
cpumask_clear_cpu(cpu, mm_cpumask(&init_mm));
cpumask_clear_cpu(cpu, &init_mm.context.cpu_attach_mask);
+ pcpu->flags = 0;
}
void __noreturn cpu_die(void)
{
idle_task_exit();
- pcpu_sigp_retry(pcpu_devices + smp_processor_id(), SIGP_STOP, 0);
+ pcpu_sigp_retry(this_cpu_ptr(&pcpu_devices), SIGP_STOP, 0);
for (;;) ;
}
@@ -973,24 +948,29 @@ void __init smp_prepare_cpus(unsigned int max_cpus)
if (register_external_irq(EXT_IRQ_EXTERNAL_CALL, do_ext_call_interrupt))
panic("Couldn't request external interrupt 0x1202");
system_ctl_set_bit(0, 13);
+ smp_rescan_cpus(true);
}
void __init smp_prepare_boot_cpu(void)
{
- struct pcpu *pcpu = pcpu_devices;
+ struct lowcore *lc = get_lowcore();
WARN_ON(!cpu_present(0) || !cpu_online(0));
- pcpu->state = CPU_STATE_CONFIGURED;
- S390_lowcore.percpu_offset = __per_cpu_offset[0];
+ lc->percpu_offset = __per_cpu_offset[0];
+ ipl_pcpu = per_cpu_ptr(&pcpu_devices, 0);
+ ipl_pcpu->state = CPU_STATE_CONFIGURED;
+ lc->pcpu = (unsigned long)ipl_pcpu;
smp_cpu_set_polarization(0, POLARIZATION_UNKNOWN);
}
void __init smp_setup_processor_id(void)
{
- pcpu_devices[0].address = stap();
- S390_lowcore.cpu_nr = 0;
- S390_lowcore.spinlock_lockval = arch_spin_lockval(0);
- S390_lowcore.spinlock_index = 0;
+ struct lowcore *lc = get_lowcore();
+
+ lc->cpu_nr = 0;
+ per_cpu(pcpu_devices, 0).address = stap();
+ lc->spinlock_lockval = arch_spin_lockval(0);
+ lc->spinlock_index = 0;
}
/*
@@ -1010,7 +990,7 @@ static ssize_t cpu_configure_show(struct device *dev,
ssize_t count;
mutex_lock(&smp_cpu_state_mutex);
- count = sprintf(buf, "%d\n", pcpu_devices[dev->id].state);
+ count = sprintf(buf, "%d\n", per_cpu(pcpu_devices, dev->id).state);
mutex_unlock(&smp_cpu_state_mutex);
return count;
}
@@ -1036,7 +1016,7 @@ static ssize_t cpu_configure_store(struct device *dev,
for (i = 0; i <= smp_cpu_mtid; i++)
if (cpu_online(cpu + i))
goto out;
- pcpu = pcpu_devices + cpu;
+ pcpu = per_cpu_ptr(&pcpu_devices, cpu);
rc = 0;
switch (val) {
case 0:
@@ -1048,7 +1028,7 @@ static ssize_t cpu_configure_store(struct device *dev,
for (i = 0; i <= smp_cpu_mtid; i++) {
if (cpu + i >= nr_cpu_ids || !cpu_present(cpu + i))
continue;
- pcpu[i].state = CPU_STATE_STANDBY;
+ per_cpu(pcpu_devices, cpu + i).state = CPU_STATE_STANDBY;
smp_cpu_set_polarization(cpu + i,
POLARIZATION_UNKNOWN);
}
@@ -1063,7 +1043,7 @@ static ssize_t cpu_configure_store(struct device *dev,
for (i = 0; i <= smp_cpu_mtid; i++) {
if (cpu + i >= nr_cpu_ids || !cpu_present(cpu + i))
continue;
- pcpu[i].state = CPU_STATE_CONFIGURED;
+ per_cpu(pcpu_devices, cpu + i).state = CPU_STATE_CONFIGURED;
smp_cpu_set_polarization(cpu + i,
POLARIZATION_UNKNOWN);
}
@@ -1082,7 +1062,7 @@ static DEVICE_ATTR(configure, 0644, cpu_configure_show, cpu_configure_store);
static ssize_t show_cpu_address(struct device *dev,
struct device_attribute *attr, char *buf)
{
- return sprintf(buf, "%d\n", pcpu_devices[dev->id].address);
+ return sprintf(buf, "%d\n", per_cpu(pcpu_devices, dev->id).address);
}
static DEVICE_ATTR(address, 0444, show_cpu_address, NULL);
@@ -1108,35 +1088,34 @@ static struct attribute_group cpu_online_attr_group = {
static int smp_cpu_online(unsigned int cpu)
{
- struct device *s = &per_cpu(cpu_device, cpu)->dev;
+ struct cpu *c = per_cpu_ptr(&cpu_devices, cpu);
- return sysfs_create_group(&s->kobj, &cpu_online_attr_group);
+ return sysfs_create_group(&c->dev.kobj, &cpu_online_attr_group);
}
static int smp_cpu_pre_down(unsigned int cpu)
{
- struct device *s = &per_cpu(cpu_device, cpu)->dev;
+ struct cpu *c = per_cpu_ptr(&cpu_devices, cpu);
- sysfs_remove_group(&s->kobj, &cpu_online_attr_group);
+ sysfs_remove_group(&c->dev.kobj, &cpu_online_attr_group);
return 0;
}
-static int smp_add_present_cpu(int cpu)
+bool arch_cpu_is_hotpluggable(int cpu)
+{
+ return !!cpu;
+}
+
+int arch_register_cpu(int cpu)
{
- struct device *s;
- struct cpu *c;
+ struct cpu *c = per_cpu_ptr(&cpu_devices, cpu);
int rc;
- c = kzalloc(sizeof(*c), GFP_KERNEL);
- if (!c)
- return -ENOMEM;
- per_cpu(cpu_device, cpu) = c;
- s = &c->dev;
- c->hotpluggable = !!cpu;
+ c->hotpluggable = arch_cpu_is_hotpluggable(cpu);
rc = register_cpu(c, cpu);
if (rc)
goto out;
- rc = sysfs_create_group(&s->kobj, &cpu_common_attr_group);
+ rc = sysfs_create_group(&c->dev.kobj, &cpu_common_attr_group);
if (rc)
goto out_cpu;
rc = topology_cpu_init(c);
@@ -1145,14 +1124,14 @@ static int smp_add_present_cpu(int cpu)
return 0;
out_topology:
- sysfs_remove_group(&s->kobj, &cpu_common_attr_group);
+ sysfs_remove_group(&c->dev.kobj, &cpu_common_attr_group);
out_cpu:
unregister_cpu(c);
out:
return rc;
}
-int __ref smp_rescan_cpus(void)
+int __ref smp_rescan_cpus(bool early)
{
struct sclp_core_info *info;
int nr;
@@ -1161,7 +1140,7 @@ int __ref smp_rescan_cpus(void)
if (!info)
return -ENOMEM;
smp_get_core_info(info, 0);
- nr = __smp_rescan_cpus(info, false);
+ nr = __smp_rescan_cpus(info, early);
kfree(info);
if (nr)
topology_schedule_update();
@@ -1178,7 +1157,7 @@ static ssize_t __ref rescan_store(struct device *dev,
rc = lock_device_hotplug_sysfs();
if (rc)
return rc;
- rc = smp_rescan_cpus();
+ rc = smp_rescan_cpus(false);
unlock_device_hotplug();
return rc ? rc : count;
}
@@ -1187,7 +1166,7 @@ static DEVICE_ATTR_WO(rescan);
static int __init s390_smp_init(void)
{
struct device *dev_root;
- int cpu, rc = 0;
+ int rc;
dev_root = bus_get_dev_root(&cpu_subsys);
if (dev_root) {
@@ -1196,17 +1175,9 @@ static int __init s390_smp_init(void)
if (rc)
return rc;
}
-
- for_each_present_cpu(cpu) {
- rc = smp_add_present_cpu(cpu);
- if (rc)
- goto out;
- }
-
rc = cpuhp_setup_state(CPUHP_AP_ONLINE_DYN, "s390/smp:online",
smp_cpu_online, smp_cpu_pre_down);
rc = rc <= 0 ? rc : 0;
-out:
return rc;
}
subsys_initcall(s390_smp_init);
diff --git a/arch/s390/kernel/sthyi.c b/arch/s390/kernel/sthyi.c
index 30bb20461db4..1cf2ad04f8e9 100644
--- a/arch/s390/kernel/sthyi.c
+++ b/arch/s390/kernel/sthyi.c
@@ -300,33 +300,56 @@ static struct diag204_x_part_block *lpar_cpu_inf(struct lpar_cpu_inf *part_inf,
return (struct diag204_x_part_block *)&block->cpus[i];
}
-static void fill_diag(struct sthyi_sctns *sctns)
+static void *diag204_get_data(bool diag204_allow_busy)
{
- int i, r, pages;
- bool this_lpar;
+ unsigned long subcode;
void *diag204_buf;
- void *diag224_buf = NULL;
- struct diag204_x_info_blk_hdr *ti_hdr;
- struct diag204_x_part_block *part_block;
- struct diag204_x_phys_block *phys_block;
- struct lpar_cpu_inf lpar_inf = {};
-
- /* Errors are handled through the validity bits in the response. */
- pages = diag204((unsigned long)DIAG204_SUBC_RSI |
- (unsigned long)DIAG204_INFO_EXT, 0, NULL);
- if (pages <= 0)
- return;
-
+ int pages, rc;
+
+ subcode = DIAG204_SUBC_RSI;
+ subcode |= DIAG204_INFO_EXT;
+ pages = diag204(subcode, 0, NULL);
+ if (pages < 0)
+ return ERR_PTR(pages);
+ if (pages == 0)
+ return ERR_PTR(-ENODATA);
diag204_buf = __vmalloc_node(array_size(pages, PAGE_SIZE),
PAGE_SIZE, GFP_KERNEL, NUMA_NO_NODE,
__builtin_return_address(0));
if (!diag204_buf)
- return;
+ return ERR_PTR(-ENOMEM);
+ subcode = DIAG204_SUBC_STIB7;
+ subcode |= DIAG204_INFO_EXT;
+ if (diag204_has_bif() && diag204_allow_busy)
+ subcode |= DIAG204_BIF_BIT;
+ rc = diag204(subcode, pages, diag204_buf);
+ if (rc < 0) {
+ vfree(diag204_buf);
+ return ERR_PTR(rc);
+ }
+ return diag204_buf;
+}
- r = diag204((unsigned long)DIAG204_SUBC_STIB7 |
- (unsigned long)DIAG204_INFO_EXT, pages, diag204_buf);
- if (r < 0)
- goto out;
+static bool is_diag204_cached(struct sthyi_sctns *sctns)
+{
+ /*
+ * Check if validity bits are set when diag204 data
+ * is gathered.
+ */
+ if (sctns->par.infpval1)
+ return true;
+ return false;
+}
+
+static void fill_diag(struct sthyi_sctns *sctns, void *diag204_buf)
+{
+ int i;
+ bool this_lpar;
+ void *diag224_buf = NULL;
+ struct diag204_x_info_blk_hdr *ti_hdr;
+ struct diag204_x_part_block *part_block;
+ struct diag204_x_phys_block *phys_block;
+ struct lpar_cpu_inf lpar_inf = {};
diag224_buf = (void *)__get_free_page(GFP_KERNEL | GFP_DMA);
if (!diag224_buf || diag224(diag224_buf))
@@ -392,7 +415,6 @@ static void fill_diag(struct sthyi_sctns *sctns)
out:
free_page((unsigned long)diag224_buf);
- vfree(diag204_buf);
}
static int sthyi(u64 vaddr, u64 *rc)
@@ -414,19 +436,31 @@ static int sthyi(u64 vaddr, u64 *rc)
static int fill_dst(void *dst, u64 *rc)
{
+ void *diag204_buf;
+
struct sthyi_sctns *sctns = (struct sthyi_sctns *)dst;
/*
* If the facility is on, we don't want to emulate the instruction.
* We ask the hypervisor to provide the data.
*/
- if (test_facility(74))
+ if (test_facility(74)) {
+ memset(dst, 0, PAGE_SIZE);
return sthyi((u64)dst, rc);
-
+ }
+ /*
+ * When emulating, if diag204 returns BUSY don't reset dst buffer
+ * and use cached data.
+ */
+ *rc = 0;
+ diag204_buf = diag204_get_data(is_diag204_cached(sctns));
+ if (IS_ERR(diag204_buf))
+ return PTR_ERR(diag204_buf);
+ memset(dst, 0, PAGE_SIZE);
fill_hdr(sctns);
fill_stsi(sctns);
- fill_diag(sctns);
- *rc = 0;
+ fill_diag(sctns, diag204_buf);
+ vfree(diag204_buf);
return 0;
}
@@ -445,11 +479,14 @@ static int sthyi_update_cache(u64 *rc)
{
int r;
- memset(sthyi_cache.info, 0, PAGE_SIZE);
r = fill_dst(sthyi_cache.info, rc);
- if (r)
- return r;
- sthyi_cache.end = jiffies + CACHE_VALID_JIFFIES;
+ if (r == 0) {
+ sthyi_cache.end = jiffies + CACHE_VALID_JIFFIES;
+ } else if (r == -EBUSY) {
+ /* mark as expired and return 0 to keep using cached data */
+ sthyi_cache.end = jiffies - 1;
+ r = 0;
+ }
return r;
}
diff --git a/arch/s390/kernel/syscall.c b/arch/s390/kernel/syscall.c
index dc2355c623d6..5ec28028315b 100644
--- a/arch/s390/kernel/syscall.c
+++ b/arch/s390/kernel/syscall.c
@@ -38,33 +38,6 @@
#include "entry.h"
-/*
- * Perform the mmap() system call. Linux for S/390 isn't able to handle more
- * than 5 system call parameters, so this system call uses a memory block
- * for parameter passing.
- */
-
-struct s390_mmap_arg_struct {
- unsigned long addr;
- unsigned long len;
- unsigned long prot;
- unsigned long flags;
- unsigned long fd;
- unsigned long offset;
-};
-
-SYSCALL_DEFINE1(mmap2, struct s390_mmap_arg_struct __user *, arg)
-{
- struct s390_mmap_arg_struct a;
- int error = -EFAULT;
-
- if (copy_from_user(&a, arg, sizeof(a)))
- goto out;
- error = ksys_mmap_pgoff(a.addr, a.len, a.prot, a.flags, a.fd, a.offset);
-out:
- return error;
-}
-
#ifdef CONFIG_SYSVIPC
/*
* sys_ipc() is the de-multiplexer for the SysV IPC calls.
@@ -151,8 +124,8 @@ void noinstr __do_syscall(struct pt_regs *regs, int per_trap)
{
add_random_kstack_offset();
enter_from_user_mode(regs);
- regs->psw = S390_lowcore.svc_old_psw;
- regs->int_code = S390_lowcore.svc_int_code;
+ regs->psw = get_lowcore()->svc_old_psw;
+ regs->int_code = get_lowcore()->svc_int_code;
update_timer_sys();
if (static_branch_likely(&cpu_has_bear))
current->thread.last_break = regs->last_break;
diff --git a/arch/s390/kernel/syscalls/syscall.tbl b/arch/s390/kernel/syscalls/syscall.tbl
index bd0fee24ad10..01071182763e 100644
--- a/arch/s390/kernel/syscalls/syscall.tbl
+++ b/arch/s390/kernel/syscalls/syscall.tbl
@@ -418,7 +418,7 @@
412 32 utimensat_time64 - sys_utimensat
413 32 pselect6_time64 - compat_sys_pselect6_time64
414 32 ppoll_time64 - compat_sys_ppoll_time64
-416 32 io_pgetevents_time64 - sys_io_pgetevents
+416 32 io_pgetevents_time64 - compat_sys_io_pgetevents_time64
417 32 recvmmsg_time64 - compat_sys_recvmmsg_time64
418 32 mq_timedsend_time64 - sys_mq_timedsend
419 32 mq_timedreceive_time64 - sys_mq_timedreceive
diff --git a/arch/s390/kernel/time.c b/arch/s390/kernel/time.c
index fb9f31f36628..b713effe0579 100644
--- a/arch/s390/kernel/time.c
+++ b/arch/s390/kernel/time.c
@@ -131,7 +131,7 @@ void clock_comparator_work(void)
{
struct clock_event_device *cd;
- S390_lowcore.clock_comparator = clock_comparator_max;
+ get_lowcore()->clock_comparator = clock_comparator_max;
cd = this_cpu_ptr(&comparators);
cd->event_handler(cd);
}
@@ -139,8 +139,8 @@ void clock_comparator_work(void)
static int s390_next_event(unsigned long delta,
struct clock_event_device *evt)
{
- S390_lowcore.clock_comparator = get_tod_clock() + delta;
- set_clock_comparator(S390_lowcore.clock_comparator);
+ get_lowcore()->clock_comparator = get_tod_clock() + delta;
+ set_clock_comparator(get_lowcore()->clock_comparator);
return 0;
}
@@ -153,8 +153,8 @@ void init_cpu_timer(void)
struct clock_event_device *cd;
int cpu;
- S390_lowcore.clock_comparator = clock_comparator_max;
- set_clock_comparator(S390_lowcore.clock_comparator);
+ get_lowcore()->clock_comparator = clock_comparator_max;
+ set_clock_comparator(get_lowcore()->clock_comparator);
cpu = smp_processor_id();
cd = &per_cpu(comparators, cpu);
@@ -184,8 +184,8 @@ static void clock_comparator_interrupt(struct ext_code ext_code,
unsigned long param64)
{
inc_irq_stat(IRQEXT_CLK);
- if (S390_lowcore.clock_comparator == clock_comparator_max)
- set_clock_comparator(S390_lowcore.clock_comparator);
+ if (get_lowcore()->clock_comparator == clock_comparator_max)
+ set_clock_comparator(get_lowcore()->clock_comparator);
}
static void stp_timing_alert(struct stp_irq_parm *);
@@ -408,12 +408,12 @@ static void clock_sync_global(long delta)
static void clock_sync_local(long delta)
{
/* Add the delta to the clock comparator. */
- if (S390_lowcore.clock_comparator != clock_comparator_max) {
- S390_lowcore.clock_comparator += delta;
- set_clock_comparator(S390_lowcore.clock_comparator);
+ if (get_lowcore()->clock_comparator != clock_comparator_max) {
+ get_lowcore()->clock_comparator += delta;
+ set_clock_comparator(get_lowcore()->clock_comparator);
}
/* Adjust the last_update_clock time-stamp. */
- S390_lowcore.last_update_clock += delta;
+ get_lowcore()->last_update_clock += delta;
}
/* Single threaded workqueue used for stp sync events */
diff --git a/arch/s390/kernel/topology.c b/arch/s390/kernel/topology.c
index 89e91b8ce842..22029ecae1c5 100644
--- a/arch/s390/kernel/topology.c
+++ b/arch/s390/kernel/topology.c
@@ -320,16 +320,10 @@ static int __arch_update_cpu_topology(void)
int arch_update_cpu_topology(void)
{
- struct device *dev;
- int cpu, rc;
+ int rc;
rc = __arch_update_cpu_topology();
on_each_cpu(__arch_update_dedicated_flag, NULL, 0);
- for_each_online_cpu(cpu) {
- dev = get_cpu_device(cpu);
- if (dev)
- kobject_uevent(&dev->kobj, KOBJ_CHANGE);
- }
return rc;
}
@@ -600,7 +594,7 @@ static int __init topology_setup(char *str)
}
early_param("topology", topology_setup);
-static int topology_ctl_handler(struct ctl_table *ctl, int write,
+static int topology_ctl_handler(const struct ctl_table *ctl, int write,
void *buffer, size_t *lenp, loff_t *ppos)
{
int enabled = topology_is_enabled();
diff --git a/arch/s390/kernel/traps.c b/arch/s390/kernel/traps.c
index 52578b5cecbd..160b2acba8db 100644
--- a/arch/s390/kernel/traps.c
+++ b/arch/s390/kernel/traps.c
@@ -27,6 +27,7 @@
#include <linux/uaccess.h>
#include <linux/cpu.h>
#include <linux/entry-common.h>
+#include <linux/kmsan.h>
#include <asm/asm-extable.h>
#include <asm/vtime.h>
#include <asm/fpu.h>
@@ -262,6 +263,11 @@ static void monitor_event_exception(struct pt_regs *regs)
void kernel_stack_overflow(struct pt_regs *regs)
{
+ /*
+ * Normally regs are unpoisoned by the generic entry code, but
+ * kernel_stack_overflow() is a rare case that is called bypassing it.
+ */
+ kmsan_unpoison_entry_regs(regs);
bust_spinlocks(1);
printk("Kernel stack overflow.\n");
show_regs(regs);
@@ -288,15 +294,16 @@ static void __init test_monitor_call(void)
void __init trap_init(void)
{
+ struct lowcore *lc = get_lowcore();
unsigned long flags;
struct ctlreg cr0;
local_irq_save(flags);
cr0 = local_ctl_clear_bit(0, CR0_LOW_ADDRESS_PROTECTION_BIT);
- psw_bits(S390_lowcore.external_new_psw).mcheck = 1;
- psw_bits(S390_lowcore.program_new_psw).mcheck = 1;
- psw_bits(S390_lowcore.svc_new_psw).mcheck = 1;
- psw_bits(S390_lowcore.io_new_psw).mcheck = 1;
+ psw_bits(lc->external_new_psw).mcheck = 1;
+ psw_bits(lc->program_new_psw).mcheck = 1;
+ psw_bits(lc->svc_new_psw).mcheck = 1;
+ psw_bits(lc->io_new_psw).mcheck = 1;
local_ctl_load(0, &cr0);
local_irq_restore(flags);
local_mcck_enable();
@@ -307,11 +314,12 @@ static void (*pgm_check_table[128])(struct pt_regs *regs);
void noinstr __do_pgm_check(struct pt_regs *regs)
{
- unsigned int trapnr;
+ struct lowcore *lc = get_lowcore();
irqentry_state_t state;
+ unsigned int trapnr;
- regs->int_code = S390_lowcore.pgm_int_code;
- regs->int_parm_long = S390_lowcore.trans_exc_code;
+ regs->int_code = lc->pgm_int_code;
+ regs->int_parm_long = lc->trans_exc_code;
state = irqentry_enter(regs);
@@ -324,19 +332,19 @@ void noinstr __do_pgm_check(struct pt_regs *regs)
current->thread.last_break = regs->last_break;
}
- if (S390_lowcore.pgm_code & 0x0200) {
+ if (lc->pgm_code & 0x0200) {
/* transaction abort */
- current->thread.trap_tdb = S390_lowcore.pgm_tdb;
+ current->thread.trap_tdb = lc->pgm_tdb;
}
- if (S390_lowcore.pgm_code & PGM_INT_CODE_PER) {
+ if (lc->pgm_code & PGM_INT_CODE_PER) {
if (user_mode(regs)) {
struct per_event *ev = &current->thread.per_event;
set_thread_flag(TIF_PER_TRAP);
- ev->address = S390_lowcore.per_address;
- ev->cause = S390_lowcore.per_code_combined;
- ev->paid = S390_lowcore.per_access_id;
+ ev->address = lc->per_address;
+ ev->cause = lc->per_code_combined;
+ ev->paid = lc->per_access_id;
} else {
/* PER event in kernel is kprobes */
__arch_local_irq_ssm(regs->psw.mask & ~PSW_MASK_PER);
diff --git a/arch/s390/kernel/unwind_bc.c b/arch/s390/kernel/unwind_bc.c
index 0ece156fdd7c..cd44be2b6ce8 100644
--- a/arch/s390/kernel/unwind_bc.c
+++ b/arch/s390/kernel/unwind_bc.c
@@ -49,6 +49,8 @@ static inline bool is_final_pt_regs(struct unwind_state *state,
READ_ONCE_NOCHECK(regs->psw.mask) & PSW_MASK_PSTATE;
}
+/* Avoid KMSAN false positives from touching uninitialized frames. */
+__no_kmsan_checks
bool unwind_next_frame(struct unwind_state *state)
{
struct stack_info *info = &state->stack_info;
@@ -118,6 +120,8 @@ out_stop:
}
EXPORT_SYMBOL_GPL(unwind_next_frame);
+/* Avoid KMSAN false positives from touching uninitialized frames. */
+__no_kmsan_checks
void __unwind_start(struct unwind_state *state, struct task_struct *task,
struct pt_regs *regs, unsigned long first_frame)
{
diff --git a/arch/s390/kernel/uv.c b/arch/s390/kernel/uv.c
index 265fea37e030..36db065c7cf7 100644
--- a/arch/s390/kernel/uv.c
+++ b/arch/s390/kernel/uv.c
@@ -18,11 +18,22 @@
#include <asm/sections.h>
#include <asm/uv.h>
+#if !IS_ENABLED(CONFIG_KVM)
+unsigned long __gmap_translate(struct gmap *gmap, unsigned long gaddr)
+{
+ return 0;
+}
+
+int gmap_fault(struct gmap *gmap, unsigned long gaddr,
+ unsigned int fault_flags)
+{
+ return 0;
+}
+#endif
+
/* the bootdata_preserved fields come from ones in arch/s390/boot/uv.c */
-#ifdef CONFIG_PROTECTED_VIRTUALIZATION_GUEST
int __bootdata_preserved(prot_virt_guest);
EXPORT_SYMBOL(prot_virt_guest);
-#endif
/*
* uv_info contains both host and guest information but it's currently only
@@ -35,7 +46,6 @@ EXPORT_SYMBOL(prot_virt_guest);
struct uv_info __bootdata_preserved(uv_info);
EXPORT_SYMBOL(uv_info);
-#if IS_ENABLED(CONFIG_KVM)
int __bootdata_preserved(prot_virt_host);
EXPORT_SYMBOL(prot_virt_host);
@@ -110,7 +120,7 @@ EXPORT_SYMBOL_GPL(uv_pin_shared);
*
* @paddr: Absolute host address of page to be destroyed
*/
-static int uv_destroy_page(unsigned long paddr)
+static int uv_destroy(unsigned long paddr)
{
struct uv_cb_cfs uvcb = {
.header.cmd = UVC_CMD_DESTR_SEC_STOR,
@@ -131,28 +141,40 @@ static int uv_destroy_page(unsigned long paddr)
}
/*
- * The caller must already hold a reference to the page
+ * The caller must already hold a reference to the folio
*/
-int uv_destroy_owned_page(unsigned long paddr)
+int uv_destroy_folio(struct folio *folio)
{
- struct page *page = phys_to_page(paddr);
int rc;
- get_page(page);
- rc = uv_destroy_page(paddr);
+ /* See gmap_make_secure(): large folios cannot be secure */
+ if (unlikely(folio_test_large(folio)))
+ return 0;
+
+ folio_get(folio);
+ rc = uv_destroy(folio_to_phys(folio));
if (!rc)
- clear_bit(PG_arch_1, &page->flags);
- put_page(page);
+ clear_bit(PG_arch_1, &folio->flags);
+ folio_put(folio);
return rc;
}
/*
+ * The present PTE still indirectly holds a folio reference through the mapping.
+ */
+int uv_destroy_pte(pte_t pte)
+{
+ VM_WARN_ON(!pte_present(pte));
+ return uv_destroy_folio(pfn_folio(pte_pfn(pte)));
+}
+
+/*
* Requests the Ultravisor to encrypt a guest page and make it
* accessible to the host for paging (export).
*
* @paddr: Absolute host address of page to be exported
*/
-int uv_convert_from_secure(unsigned long paddr)
+static int uv_convert_from_secure(unsigned long paddr)
{
struct uv_cb_cfs uvcb = {
.header.cmd = UVC_CMD_CONV_FROM_SEC_STOR,
@@ -166,22 +188,34 @@ int uv_convert_from_secure(unsigned long paddr)
}
/*
- * The caller must already hold a reference to the page
+ * The caller must already hold a reference to the folio.
*/
-int uv_convert_owned_from_secure(unsigned long paddr)
+static int uv_convert_from_secure_folio(struct folio *folio)
{
- struct page *page = phys_to_page(paddr);
int rc;
- get_page(page);
- rc = uv_convert_from_secure(paddr);
+ /* See gmap_make_secure(): large folios cannot be secure */
+ if (unlikely(folio_test_large(folio)))
+ return 0;
+
+ folio_get(folio);
+ rc = uv_convert_from_secure(folio_to_phys(folio));
if (!rc)
- clear_bit(PG_arch_1, &page->flags);
- put_page(page);
+ clear_bit(PG_arch_1, &folio->flags);
+ folio_put(folio);
return rc;
}
/*
+ * The present PTE still indirectly holds a folio reference through the mapping.
+ */
+int uv_convert_from_secure_pte(pte_t pte)
+{
+ VM_WARN_ON(!pte_present(pte));
+ return uv_convert_from_secure_folio(pfn_folio(pte_pfn(pte)));
+}
+
+/*
* Calculate the expected ref_count for a folio that would otherwise have no
* further pins. This was cribbed from similar functions in other places in
* the kernel, but with some slight modifications. We know that a secure
@@ -267,6 +301,36 @@ static bool should_export_before_import(struct uv_cb_header *uvcb, struct mm_str
}
/*
+ * Drain LRU caches: the local one on first invocation and the ones of all
+ * CPUs on successive invocations. Returns "true" on the first invocation.
+ */
+static bool drain_lru(bool *drain_lru_called)
+{
+ /*
+ * If we have tried a local drain and the folio refcount
+ * still does not match our expected safe value, try with a
+ * system wide drain. This is needed if the pagevecs holding
+ * the page are on a different CPU.
+ */
+ if (*drain_lru_called) {
+ lru_add_drain_all();
+ /* We give up here, don't retry immediately. */
+ return false;
+ }
+ /*
+ * We are here if the folio refcount does not match the
+ * expected safe value. The main culprits are usually
+ * pagevecs. With lru_add_drain() we drain the pagevecs
+ * on the local CPU so that hopefully the refcount will
+ * reach the expected safe value.
+ */
+ lru_add_drain();
+ *drain_lru_called = true;
+ /* The caller should try again immediately */
+ return true;
+}
+
+/*
* Requests the Ultravisor to make a page accessible to a guest.
* If it's brought in the first time, it will be cleared. If
* it has been exported before, it will be decrypted and integrity
@@ -275,7 +339,7 @@ static bool should_export_before_import(struct uv_cb_header *uvcb, struct mm_str
int gmap_make_secure(struct gmap *gmap, unsigned long gaddr, void *uvcb)
{
struct vm_area_struct *vma;
- bool local_drain = false;
+ bool drain_lru_called = false;
spinlock_t *ptelock;
unsigned long uaddr;
struct folio *folio;
@@ -308,52 +372,63 @@ again:
goto out;
if (pte_present(*ptep) && !(pte_val(*ptep) & _PAGE_INVALID) && pte_write(*ptep)) {
folio = page_folio(pte_page(*ptep));
- rc = -EINVAL;
- if (folio_test_large(folio))
- goto unlock;
rc = -EAGAIN;
- if (folio_trylock(folio)) {
+ if (folio_test_large(folio)) {
+ rc = -E2BIG;
+ } else if (folio_trylock(folio)) {
if (should_export_before_import(uvcb, gmap->mm))
uv_convert_from_secure(PFN_PHYS(folio_pfn(folio)));
rc = make_folio_secure(folio, uvcb);
folio_unlock(folio);
}
+
+ /*
+ * Once we drop the PTL, the folio may get unmapped and
+ * freed immediately. We need a temporary reference.
+ */
+ if (rc == -EAGAIN || rc == -E2BIG)
+ folio_get(folio);
}
-unlock:
pte_unmap_unlock(ptep, ptelock);
out:
mmap_read_unlock(gmap->mm);
- if (rc == -EAGAIN) {
+ switch (rc) {
+ case -E2BIG:
+ folio_lock(folio);
+ rc = split_folio(folio);
+ folio_unlock(folio);
+ folio_put(folio);
+
+ switch (rc) {
+ case 0:
+ /* Splitting succeeded, try again immediately. */
+ goto again;
+ case -EAGAIN:
+ /* Additional folio references. */
+ if (drain_lru(&drain_lru_called))
+ goto again;
+ return -EAGAIN;
+ case -EBUSY:
+ /* Unexpected race. */
+ return -EAGAIN;
+ }
+ WARN_ON_ONCE(1);
+ return -ENXIO;
+ case -EAGAIN:
/*
* If we are here because the UVC returned busy or partial
* completion, this is just a useless check, but it is safe.
*/
folio_wait_writeback(folio);
- } else if (rc == -EBUSY) {
- /*
- * If we have tried a local drain and the folio refcount
- * still does not match our expected safe value, try with a
- * system wide drain. This is needed if the pagevecs holding
- * the page are on a different CPU.
- */
- if (local_drain) {
- lru_add_drain_all();
- /* We give up here, and let the caller try again */
- return -EAGAIN;
- }
- /*
- * We are here if the folio refcount does not match the
- * expected safe value. The main culprits are usually
- * pagevecs. With lru_add_drain() we drain the pagevecs
- * on the local CPU so that hopefully the refcount will
- * reach the expected safe value.
- */
- lru_add_drain();
- local_drain = true;
- /* And now we try again immediately after draining */
- goto again;
- } else if (rc == -ENXIO) {
+ folio_put(folio);
+ return -EAGAIN;
+ case -EBUSY:
+ /* Additional folio references. */
+ if (drain_lru(&drain_lru_called))
+ goto again;
+ return -EAGAIN;
+ case -ENXIO:
if (gmap_fault(gmap, gaddr, FAULT_FLAG_WRITE))
return -EFAULT;
return -EAGAIN;
@@ -388,6 +463,7 @@ int gmap_destroy_page(struct gmap *gmap, unsigned long gaddr)
{
struct vm_area_struct *vma;
unsigned long uaddr;
+ struct folio *folio;
struct page *page;
int rc;
@@ -411,7 +487,8 @@ int gmap_destroy_page(struct gmap *gmap, unsigned long gaddr)
page = follow_page(vma, uaddr, FOLL_WRITE | FOLL_GET);
if (IS_ERR_OR_NULL(page))
goto out;
- rc = uv_destroy_owned_page(page_to_phys(page));
+ folio = page_folio(page);
+ rc = uv_destroy_folio(folio);
/*
* Fault handlers can race; it is possible that two CPUs will fault
* on the same secure page. One CPU can destroy the page, reboot,
@@ -422,8 +499,8 @@ int gmap_destroy_page(struct gmap *gmap, unsigned long gaddr)
* we instead try to export the page.
*/
if (rc)
- rc = uv_convert_owned_from_secure(page_to_phys(page));
- put_page(page);
+ rc = uv_convert_from_secure_folio(folio);
+ folio_put(folio);
out:
mmap_read_unlock(gmap->mm);
return rc;
@@ -431,50 +508,51 @@ out:
EXPORT_SYMBOL_GPL(gmap_destroy_page);
/*
- * To be called with the page locked or with an extra reference! This will
- * prevent gmap_make_secure from touching the page concurrently. Having 2
- * parallel make_page_accessible is fine, as the UV calls will become a
- * no-op if the page is already exported.
+ * To be called with the folio locked or with an extra reference! This will
+ * prevent gmap_make_secure from touching the folio concurrently. Having 2
+ * parallel arch_make_folio_accessible is fine, as the UV calls will become a
+ * no-op if the folio is already exported.
*/
-int arch_make_page_accessible(struct page *page)
+int arch_make_folio_accessible(struct folio *folio)
{
int rc = 0;
- /* Hugepage cannot be protected, so nothing to do */
- if (PageHuge(page))
+ /* See gmap_make_secure(): large folios cannot be secure */
+ if (unlikely(folio_test_large(folio)))
return 0;
/*
- * PG_arch_1 is used in 3 places:
- * 1. for kernel page tables during early boot
- * 2. for storage keys of huge pages and KVM
- * 3. As an indication that this page might be secure. This can
+ * PG_arch_1 is used in 2 places:
+ * 1. for storage keys of hugetlb folios and KVM
+ * 2. As an indication that this small folio might be secure. This can
* overindicate, e.g. we set the bit before calling
* convert_to_secure.
- * As secure pages are never huge, all 3 variants can co-exists.
+ * As secure pages are never large folios, both variants can co-exists.
*/
- if (!test_bit(PG_arch_1, &page->flags))
+ if (!test_bit(PG_arch_1, &folio->flags))
return 0;
- rc = uv_pin_shared(page_to_phys(page));
+ rc = uv_pin_shared(folio_to_phys(folio));
if (!rc) {
- clear_bit(PG_arch_1, &page->flags);
+ clear_bit(PG_arch_1, &folio->flags);
return 0;
}
- rc = uv_convert_from_secure(page_to_phys(page));
+ rc = uv_convert_from_secure(folio_to_phys(folio));
if (!rc) {
- clear_bit(PG_arch_1, &page->flags);
+ clear_bit(PG_arch_1, &folio->flags);
return 0;
}
return rc;
}
-EXPORT_SYMBOL_GPL(arch_make_page_accessible);
+EXPORT_SYMBOL_GPL(arch_make_folio_accessible);
-#endif
-
-#if defined(CONFIG_PROTECTED_VIRTUALIZATION_GUEST) || IS_ENABLED(CONFIG_KVM)
+int arch_make_page_accessible(struct page *page)
+{
+ return arch_make_folio_accessible(page_folio(page));
+}
+EXPORT_SYMBOL_GPL(arch_make_page_accessible);
static ssize_t uv_query_facilities(struct kobject *kobj,
struct kobj_attribute *attr, char *buf)
{
@@ -650,24 +728,13 @@ static struct attribute_group uv_query_attr_group = {
static ssize_t uv_is_prot_virt_guest(struct kobject *kobj,
struct kobj_attribute *attr, char *buf)
{
- int val = 0;
-
-#ifdef CONFIG_PROTECTED_VIRTUALIZATION_GUEST
- val = prot_virt_guest;
-#endif
- return sysfs_emit(buf, "%d\n", val);
+ return sysfs_emit(buf, "%d\n", prot_virt_guest);
}
static ssize_t uv_is_prot_virt_host(struct kobject *kobj,
struct kobj_attribute *attr, char *buf)
{
- int val = 0;
-
-#if IS_ENABLED(CONFIG_KVM)
- val = prot_virt_host;
-#endif
-
- return sysfs_emit(buf, "%d\n", val);
+ return sysfs_emit(buf, "%d\n", prot_virt_host);
}
static struct kobj_attribute uv_prot_virt_guest =
@@ -719,4 +786,3 @@ out_kobj:
return rc;
}
device_initcall(uv_info_init);
-#endif
diff --git a/arch/s390/kernel/vmlinux.lds.S b/arch/s390/kernel/vmlinux.lds.S
index a1ce3925ec71..ae5d0a9d6911 100644
--- a/arch/s390/kernel/vmlinux.lds.S
+++ b/arch/s390/kernel/vmlinux.lds.S
@@ -39,7 +39,7 @@ PHDRS {
SECTIONS
{
- . = __START_KERNEL;
+ . = TEXT_OFFSET;
.text : {
_stext = .; /* Start of text section */
_text = .; /* Text and read-only data */
@@ -59,14 +59,6 @@ SECTIONS
} :text = 0x0700
RO_DATA(PAGE_SIZE)
- .data.rel.ro : {
- *(.data.rel.ro .data.rel.ro.*)
- }
- .got : {
- __got_start = .;
- *(.got)
- __got_end = .;
- }
. = ALIGN(PAGE_SIZE);
_sdata = .; /* Start of data section */
@@ -80,6 +72,15 @@ SECTIONS
. = ALIGN(PAGE_SIZE);
__end_ro_after_init = .;
+ .data.rel.ro : {
+ *(.data.rel.ro .data.rel.ro.*)
+ }
+ .got : {
+ __got_start = .;
+ *(.got)
+ __got_end = .;
+ }
+
RW_DATA(0x100, PAGE_SIZE, THREAD_SIZE)
.data.rel : {
*(.data.rel*)
@@ -190,6 +191,9 @@ SECTIONS
. = ALIGN(PAGE_SIZE);
INIT_DATA_SECTION(0x100)
+ RUNTIME_CONST(shift, d_hash_shift)
+ RUNTIME_CONST(ptr, dentry_hashtable)
+
PERCPU_SECTION(0x100)
. = ALIGN(PAGE_SIZE);
@@ -219,6 +223,8 @@ SECTIONS
QUAD(init_mm)
QUAD(swapper_pg_dir)
QUAD(invalid_pg_dir)
+ QUAD(__alt_instructions)
+ QUAD(__alt_instructions_end)
#ifdef CONFIG_KASAN
QUAD(kasan_early_shadow_page)
QUAD(kasan_early_shadow_pte)
diff --git a/arch/s390/kernel/vtime.c b/arch/s390/kernel/vtime.c
index ffc1db0cbf9c..234a0ba30510 100644
--- a/arch/s390/kernel/vtime.c
+++ b/arch/s390/kernel/vtime.c
@@ -35,14 +35,15 @@ static DEFINE_PER_CPU(u64, mt_scaling_jiffies);
static inline void set_vtimer(u64 expires)
{
+ struct lowcore *lc = get_lowcore();
u64 timer;
asm volatile(
" stpt %0\n" /* Store current cpu timer value */
" spt %1" /* Set new value imm. afterwards */
: "=Q" (timer) : "Q" (expires));
- S390_lowcore.system_timer += S390_lowcore.last_update_timer - timer;
- S390_lowcore.last_update_timer = expires;
+ lc->system_timer += lc->last_update_timer - timer;
+ lc->last_update_timer = expires;
}
static inline int virt_timer_forward(u64 elapsed)
@@ -117,22 +118,23 @@ static void account_system_index_scaled(struct task_struct *p, u64 cputime,
static int do_account_vtime(struct task_struct *tsk)
{
u64 timer, clock, user, guest, system, hardirq, softirq;
+ struct lowcore *lc = get_lowcore();
- timer = S390_lowcore.last_update_timer;
- clock = S390_lowcore.last_update_clock;
+ timer = lc->last_update_timer;
+ clock = lc->last_update_clock;
asm volatile(
" stpt %0\n" /* Store current cpu timer value */
" stckf %1" /* Store current tod clock value */
- : "=Q" (S390_lowcore.last_update_timer),
- "=Q" (S390_lowcore.last_update_clock)
+ : "=Q" (lc->last_update_timer),
+ "=Q" (lc->last_update_clock)
: : "cc");
- clock = S390_lowcore.last_update_clock - clock;
- timer -= S390_lowcore.last_update_timer;
+ clock = lc->last_update_clock - clock;
+ timer -= lc->last_update_timer;
if (hardirq_count())
- S390_lowcore.hardirq_timer += timer;
+ lc->hardirq_timer += timer;
else
- S390_lowcore.system_timer += timer;
+ lc->system_timer += timer;
/* Update MT utilization calculation */
if (smp_cpu_mtid &&
@@ -141,16 +143,16 @@ static int do_account_vtime(struct task_struct *tsk)
/* Calculate cputime delta */
user = update_tsk_timer(&tsk->thread.user_timer,
- READ_ONCE(S390_lowcore.user_timer));
+ READ_ONCE(lc->user_timer));
guest = update_tsk_timer(&tsk->thread.guest_timer,
- READ_ONCE(S390_lowcore.guest_timer));
+ READ_ONCE(lc->guest_timer));
system = update_tsk_timer(&tsk->thread.system_timer,
- READ_ONCE(S390_lowcore.system_timer));
+ READ_ONCE(lc->system_timer));
hardirq = update_tsk_timer(&tsk->thread.hardirq_timer,
- READ_ONCE(S390_lowcore.hardirq_timer));
+ READ_ONCE(lc->hardirq_timer));
softirq = update_tsk_timer(&tsk->thread.softirq_timer,
- READ_ONCE(S390_lowcore.softirq_timer));
- S390_lowcore.steal_timer +=
+ READ_ONCE(lc->softirq_timer));
+ lc->steal_timer +=
clock - user - guest - system - hardirq - softirq;
/* Push account value */
@@ -176,17 +178,19 @@ static int do_account_vtime(struct task_struct *tsk)
void vtime_task_switch(struct task_struct *prev)
{
+ struct lowcore *lc = get_lowcore();
+
do_account_vtime(prev);
- prev->thread.user_timer = S390_lowcore.user_timer;
- prev->thread.guest_timer = S390_lowcore.guest_timer;
- prev->thread.system_timer = S390_lowcore.system_timer;
- prev->thread.hardirq_timer = S390_lowcore.hardirq_timer;
- prev->thread.softirq_timer = S390_lowcore.softirq_timer;
- S390_lowcore.user_timer = current->thread.user_timer;
- S390_lowcore.guest_timer = current->thread.guest_timer;
- S390_lowcore.system_timer = current->thread.system_timer;
- S390_lowcore.hardirq_timer = current->thread.hardirq_timer;
- S390_lowcore.softirq_timer = current->thread.softirq_timer;
+ prev->thread.user_timer = lc->user_timer;
+ prev->thread.guest_timer = lc->guest_timer;
+ prev->thread.system_timer = lc->system_timer;
+ prev->thread.hardirq_timer = lc->hardirq_timer;
+ prev->thread.softirq_timer = lc->softirq_timer;
+ lc->user_timer = current->thread.user_timer;
+ lc->guest_timer = current->thread.guest_timer;
+ lc->system_timer = current->thread.system_timer;
+ lc->hardirq_timer = current->thread.hardirq_timer;
+ lc->softirq_timer = current->thread.softirq_timer;
}
/*
@@ -196,28 +200,29 @@ void vtime_task_switch(struct task_struct *prev)
*/
void vtime_flush(struct task_struct *tsk)
{
+ struct lowcore *lc = get_lowcore();
u64 steal, avg_steal;
if (do_account_vtime(tsk))
virt_timer_expire();
- steal = S390_lowcore.steal_timer;
- avg_steal = S390_lowcore.avg_steal_timer;
+ steal = lc->steal_timer;
+ avg_steal = lc->avg_steal_timer;
if ((s64) steal > 0) {
- S390_lowcore.steal_timer = 0;
+ lc->steal_timer = 0;
account_steal_time(cputime_to_nsecs(steal));
avg_steal += steal;
}
- S390_lowcore.avg_steal_timer = avg_steal / 2;
+ lc->avg_steal_timer = avg_steal / 2;
}
static u64 vtime_delta(void)
{
- u64 timer = S390_lowcore.last_update_timer;
-
- S390_lowcore.last_update_timer = get_cpu_timer();
+ struct lowcore *lc = get_lowcore();
+ u64 timer = lc->last_update_timer;
- return timer - S390_lowcore.last_update_timer;
+ lc->last_update_timer = get_cpu_timer();
+ return timer - lc->last_update_timer;
}
/*
@@ -226,12 +231,13 @@ static u64 vtime_delta(void)
*/
void vtime_account_kernel(struct task_struct *tsk)
{
+ struct lowcore *lc = get_lowcore();
u64 delta = vtime_delta();
if (tsk->flags & PF_VCPU)
- S390_lowcore.guest_timer += delta;
+ lc->guest_timer += delta;
else
- S390_lowcore.system_timer += delta;
+ lc->system_timer += delta;
virt_timer_forward(delta);
}
@@ -241,7 +247,7 @@ void vtime_account_softirq(struct task_struct *tsk)
{
u64 delta = vtime_delta();
- S390_lowcore.softirq_timer += delta;
+ get_lowcore()->softirq_timer += delta;
virt_timer_forward(delta);
}
@@ -250,7 +256,7 @@ void vtime_account_hardirq(struct task_struct *tsk)
{
u64 delta = vtime_delta();
- S390_lowcore.hardirq_timer += delta;
+ get_lowcore()->hardirq_timer += delta;
virt_timer_forward(delta);
}
diff --git a/arch/s390/kvm/gaccess.c b/arch/s390/kvm/gaccess.c
index 5bf3d94e9dda..e65f597e3044 100644
--- a/arch/s390/kvm/gaccess.c
+++ b/arch/s390/kvm/gaccess.c
@@ -14,167 +14,10 @@
#include <asm/access-regs.h>
#include <asm/fault.h>
#include <asm/gmap.h>
+#include <asm/dat-bits.h>
#include "kvm-s390.h"
#include "gaccess.h"
-union asce {
- unsigned long val;
- struct {
- unsigned long origin : 52; /* Region- or Segment-Table Origin */
- unsigned long : 2;
- unsigned long g : 1; /* Subspace Group Control */
- unsigned long p : 1; /* Private Space Control */
- unsigned long s : 1; /* Storage-Alteration-Event Control */
- unsigned long x : 1; /* Space-Switch-Event Control */
- unsigned long r : 1; /* Real-Space Control */
- unsigned long : 1;
- unsigned long dt : 2; /* Designation-Type Control */
- unsigned long tl : 2; /* Region- or Segment-Table Length */
- };
-};
-
-enum {
- ASCE_TYPE_SEGMENT = 0,
- ASCE_TYPE_REGION3 = 1,
- ASCE_TYPE_REGION2 = 2,
- ASCE_TYPE_REGION1 = 3
-};
-
-union region1_table_entry {
- unsigned long val;
- struct {
- unsigned long rto: 52;/* Region-Table Origin */
- unsigned long : 2;
- unsigned long p : 1; /* DAT-Protection Bit */
- unsigned long : 1;
- unsigned long tf : 2; /* Region-Second-Table Offset */
- unsigned long i : 1; /* Region-Invalid Bit */
- unsigned long : 1;
- unsigned long tt : 2; /* Table-Type Bits */
- unsigned long tl : 2; /* Region-Second-Table Length */
- };
-};
-
-union region2_table_entry {
- unsigned long val;
- struct {
- unsigned long rto: 52;/* Region-Table Origin */
- unsigned long : 2;
- unsigned long p : 1; /* DAT-Protection Bit */
- unsigned long : 1;
- unsigned long tf : 2; /* Region-Third-Table Offset */
- unsigned long i : 1; /* Region-Invalid Bit */
- unsigned long : 1;
- unsigned long tt : 2; /* Table-Type Bits */
- unsigned long tl : 2; /* Region-Third-Table Length */
- };
-};
-
-struct region3_table_entry_fc0 {
- unsigned long sto: 52;/* Segment-Table Origin */
- unsigned long : 1;
- unsigned long fc : 1; /* Format-Control */
- unsigned long p : 1; /* DAT-Protection Bit */
- unsigned long : 1;
- unsigned long tf : 2; /* Segment-Table Offset */
- unsigned long i : 1; /* Region-Invalid Bit */
- unsigned long cr : 1; /* Common-Region Bit */
- unsigned long tt : 2; /* Table-Type Bits */
- unsigned long tl : 2; /* Segment-Table Length */
-};
-
-struct region3_table_entry_fc1 {
- unsigned long rfaa : 33; /* Region-Frame Absolute Address */
- unsigned long : 14;
- unsigned long av : 1; /* ACCF-Validity Control */
- unsigned long acc: 4; /* Access-Control Bits */
- unsigned long f : 1; /* Fetch-Protection Bit */
- unsigned long fc : 1; /* Format-Control */
- unsigned long p : 1; /* DAT-Protection Bit */
- unsigned long iep: 1; /* Instruction-Execution-Protection */
- unsigned long : 2;
- unsigned long i : 1; /* Region-Invalid Bit */
- unsigned long cr : 1; /* Common-Region Bit */
- unsigned long tt : 2; /* Table-Type Bits */
- unsigned long : 2;
-};
-
-union region3_table_entry {
- unsigned long val;
- struct region3_table_entry_fc0 fc0;
- struct region3_table_entry_fc1 fc1;
- struct {
- unsigned long : 53;
- unsigned long fc : 1; /* Format-Control */
- unsigned long : 4;
- unsigned long i : 1; /* Region-Invalid Bit */
- unsigned long cr : 1; /* Common-Region Bit */
- unsigned long tt : 2; /* Table-Type Bits */
- unsigned long : 2;
- };
-};
-
-struct segment_entry_fc0 {
- unsigned long pto: 53;/* Page-Table Origin */
- unsigned long fc : 1; /* Format-Control */
- unsigned long p : 1; /* DAT-Protection Bit */
- unsigned long : 3;
- unsigned long i : 1; /* Segment-Invalid Bit */
- unsigned long cs : 1; /* Common-Segment Bit */
- unsigned long tt : 2; /* Table-Type Bits */
- unsigned long : 2;
-};
-
-struct segment_entry_fc1 {
- unsigned long sfaa : 44; /* Segment-Frame Absolute Address */
- unsigned long : 3;
- unsigned long av : 1; /* ACCF-Validity Control */
- unsigned long acc: 4; /* Access-Control Bits */
- unsigned long f : 1; /* Fetch-Protection Bit */
- unsigned long fc : 1; /* Format-Control */
- unsigned long p : 1; /* DAT-Protection Bit */
- unsigned long iep: 1; /* Instruction-Execution-Protection */
- unsigned long : 2;
- unsigned long i : 1; /* Segment-Invalid Bit */
- unsigned long cs : 1; /* Common-Segment Bit */
- unsigned long tt : 2; /* Table-Type Bits */
- unsigned long : 2;
-};
-
-union segment_table_entry {
- unsigned long val;
- struct segment_entry_fc0 fc0;
- struct segment_entry_fc1 fc1;
- struct {
- unsigned long : 53;
- unsigned long fc : 1; /* Format-Control */
- unsigned long : 4;
- unsigned long i : 1; /* Segment-Invalid Bit */
- unsigned long cs : 1; /* Common-Segment Bit */
- unsigned long tt : 2; /* Table-Type Bits */
- unsigned long : 2;
- };
-};
-
-enum {
- TABLE_TYPE_SEGMENT = 0,
- TABLE_TYPE_REGION3 = 1,
- TABLE_TYPE_REGION2 = 2,
- TABLE_TYPE_REGION1 = 3
-};
-
-union page_table_entry {
- unsigned long val;
- struct {
- unsigned long pfra : 52; /* Page-Frame Real Address */
- unsigned long z : 1; /* Zero Bit */
- unsigned long i : 1; /* Page-Invalid Bit */
- unsigned long p : 1; /* DAT-Protection Bit */
- unsigned long iep: 1; /* Instruction-Execution-Protection */
- unsigned long : 8;
- };
-};
-
/*
* vaddress union in order to easily decode a virtual address into its
* region first index, region second index etc. parts.
@@ -632,7 +475,7 @@ static unsigned long guest_translate(struct kvm_vcpu *vcpu, unsigned long gva,
iep = ctlreg0.iep && test_kvm_facility(vcpu->kvm, 130);
if (asce.r)
goto real_address;
- ptr = asce.origin * PAGE_SIZE;
+ ptr = asce.rsto * PAGE_SIZE;
switch (asce.dt) {
case ASCE_TYPE_REGION1:
if (vaddr.rfx01 > asce.tl)
@@ -1379,7 +1222,7 @@ static int kvm_s390_shadow_tables(struct gmap *sg, unsigned long saddr,
parent = sg->parent;
vaddr.addr = saddr;
asce.val = sg->orig_asce;
- ptr = asce.origin * PAGE_SIZE;
+ ptr = asce.rsto * PAGE_SIZE;
if (asce.r) {
*fake = 1;
ptr = 0;
diff --git a/arch/s390/kvm/kvm-s390.c b/arch/s390/kvm/kvm-s390.c
index 82e9631cd9ef..0fd96860fc45 100644
--- a/arch/s390/kvm/kvm-s390.c
+++ b/arch/s390/kvm/kvm-s390.c
@@ -132,6 +132,7 @@ const struct _kvm_stats_desc kvm_vcpu_stats_desc[] = {
STATS_DESC_COUNTER(VCPU, instruction_io_other),
STATS_DESC_COUNTER(VCPU, instruction_lpsw),
STATS_DESC_COUNTER(VCPU, instruction_lpswe),
+ STATS_DESC_COUNTER(VCPU, instruction_lpswey),
STATS_DESC_COUNTER(VCPU, instruction_pfmf),
STATS_DESC_COUNTER(VCPU, instruction_ptff),
STATS_DESC_COUNTER(VCPU, instruction_sck),
@@ -2996,14 +2997,9 @@ int kvm_arch_vm_ioctl(struct file *filp, unsigned int ioctl, unsigned long arg)
break;
}
case KVM_CREATE_IRQCHIP: {
- struct kvm_irq_routing_entry routing;
-
r = -EINVAL;
- if (kvm->arch.use_irqchip) {
- /* Set up dummy routing. */
- memset(&routing, 0, sizeof(routing));
- r = kvm_set_irq_routing(kvm, &routing, 0, 0);
- }
+ if (kvm->arch.use_irqchip)
+ r = 0;
break;
}
case KVM_SET_DEVICE_ATTR: {
@@ -4079,7 +4075,7 @@ static void kvm_gmap_notifier(struct gmap *gmap, unsigned long start,
bool kvm_arch_no_poll(struct kvm_vcpu *vcpu)
{
/* do not poll with more than halt_poll_max_steal percent of steal time */
- if (S390_lowcore.avg_steal_timer * 100 / (TICK_USEC << 12) >=
+ if (get_lowcore()->avg_steal_timer * 100 / (TICK_USEC << 12) >=
READ_ONCE(halt_poll_max_steal)) {
vcpu->stat.halt_no_poll_steal++;
return true;
@@ -4829,7 +4825,8 @@ static int __vcpu_run(struct kvm_vcpu *vcpu)
sizeof(sie_page->pv_grregs));
}
exit_reason = sie64a(vcpu->arch.sie_block,
- vcpu->run->s.regs.gprs);
+ vcpu->run->s.regs.gprs,
+ gmap_get_enabled()->asce);
if (kvm_s390_pv_cpu_is_protected(vcpu)) {
memcpy(vcpu->run->s.regs.gprs,
sie_page->pv_grregs,
@@ -5031,7 +5028,7 @@ int kvm_arch_vcpu_ioctl_run(struct kvm_vcpu *vcpu)
if (vcpu->kvm->arch.pv.dumping)
return -EINVAL;
- if (kvm_run->immediate_exit)
+ if (!vcpu->wants_to_run)
return -EINTR;
if (kvm_run->kvm_valid_regs & ~KVM_SYNC_S390_VALID_FIELDS ||
@@ -5748,6 +5745,9 @@ int kvm_arch_prepare_memory_region(struct kvm *kvm,
{
gpa_t size;
+ if (kvm_is_ucontrol(kvm))
+ return -EINVAL;
+
/* When we are protected, we should not change the memory slots */
if (kvm_s390_pv_get_handle(kvm))
return -EINVAL;
diff --git a/arch/s390/kvm/kvm-s390.h b/arch/s390/kvm/kvm-s390.h
index 111eb5c74784..e680c6bf0c9d 100644
--- a/arch/s390/kvm/kvm-s390.h
+++ b/arch/s390/kvm/kvm-s390.h
@@ -138,6 +138,21 @@ static inline u64 kvm_s390_get_base_disp_s(struct kvm_vcpu *vcpu, u8 *ar)
return (base2 ? vcpu->run->s.regs.gprs[base2] : 0) + disp2;
}
+static inline u64 kvm_s390_get_base_disp_siy(struct kvm_vcpu *vcpu, u8 *ar)
+{
+ u32 base1 = vcpu->arch.sie_block->ipb >> 28;
+ s64 disp1;
+
+ /* The displacement is a 20bit _SIGNED_ value */
+ disp1 = sign_extend64(((vcpu->arch.sie_block->ipb & 0x0fff0000) >> 16) +
+ ((vcpu->arch.sie_block->ipb & 0xff00) << 4), 19);
+
+ if (ar)
+ *ar = base1;
+
+ return (base1 ? vcpu->run->s.regs.gprs[base1] : 0) + disp1;
+}
+
static inline void kvm_s390_get_base_disp_sse(struct kvm_vcpu *vcpu,
u64 *address1, u64 *address2,
u8 *ar_b1, u8 *ar_b2)
@@ -252,7 +267,12 @@ static inline unsigned long kvm_s390_get_gfn_end(struct kvm_memslots *slots)
static inline u32 kvm_s390_get_gisa_desc(struct kvm *kvm)
{
- u32 gd = virt_to_phys(kvm->arch.gisa_int.origin);
+ u32 gd;
+
+ if (!kvm->arch.gisa_int.origin)
+ return 0;
+
+ gd = virt_to_phys(kvm->arch.gisa_int.origin);
if (gd && sclp.has_gisaf)
gd |= GISA_FORMAT1;
diff --git a/arch/s390/kvm/priv.c b/arch/s390/kvm/priv.c
index 1be19cc9d73c..1a49b89706f8 100644
--- a/arch/s390/kvm/priv.c
+++ b/arch/s390/kvm/priv.c
@@ -797,6 +797,36 @@ static int handle_lpswe(struct kvm_vcpu *vcpu)
return 0;
}
+static int handle_lpswey(struct kvm_vcpu *vcpu)
+{
+ psw_t new_psw;
+ u64 addr;
+ int rc;
+ u8 ar;
+
+ vcpu->stat.instruction_lpswey++;
+
+ if (!test_kvm_facility(vcpu->kvm, 193))
+ return kvm_s390_inject_program_int(vcpu, PGM_OPERATION);
+
+ if (vcpu->arch.sie_block->gpsw.mask & PSW_MASK_PSTATE)
+ return kvm_s390_inject_program_int(vcpu, PGM_PRIVILEGED_OP);
+
+ addr = kvm_s390_get_base_disp_siy(vcpu, &ar);
+ if (addr & 7)
+ return kvm_s390_inject_program_int(vcpu, PGM_SPECIFICATION);
+
+ rc = read_guest(vcpu, addr, ar, &new_psw, sizeof(new_psw));
+ if (rc)
+ return kvm_s390_inject_prog_cond(vcpu, rc);
+
+ vcpu->arch.sie_block->gpsw = new_psw;
+ if (!is_valid_psw(&vcpu->arch.sie_block->gpsw))
+ return kvm_s390_inject_program_int(vcpu, PGM_SPECIFICATION);
+
+ return 0;
+}
+
static int handle_stidp(struct kvm_vcpu *vcpu)
{
u64 stidp_data = vcpu->kvm->arch.model.cpuid;
@@ -1462,6 +1492,8 @@ int kvm_s390_handle_eb(struct kvm_vcpu *vcpu)
case 0x61:
case 0x62:
return handle_ri(vcpu);
+ case 0x71:
+ return handle_lpswey(vcpu);
default:
return -EOPNOTSUPP;
}
diff --git a/arch/s390/kvm/vsie.c b/arch/s390/kvm/vsie.c
index c9ecae830634..89cafea4c41f 100644
--- a/arch/s390/kvm/vsie.c
+++ b/arch/s390/kvm/vsie.c
@@ -1150,7 +1150,7 @@ static int do_vsie_run(struct kvm_vcpu *vcpu, struct vsie_page *vsie_page)
vcpu->arch.sie_block->prog0c |= PROG_IN_SIE;
barrier();
if (!kvm_s390_vcpu_sie_inhibited(vcpu))
- rc = sie64a(scb_s, vcpu->run->s.regs.gprs);
+ rc = sie64a(scb_s, vcpu->run->s.regs.gprs, gmap_get_enabled()->asce);
barrier();
vcpu->arch.sie_block->prog0c &= ~PROG_IN_SIE;
@@ -1304,10 +1304,24 @@ static int vsie_run(struct kvm_vcpu *vcpu, struct vsie_page *vsie_page)
if (rc == -EAGAIN)
rc = 0;
- if (rc || scb_s->icptcode || signal_pending(current) ||
+
+ /*
+ * Exit the loop if the guest needs to process the intercept
+ */
+ if (rc || scb_s->icptcode)
+ break;
+
+ /*
+ * Exit the loop if the host needs to process an intercept,
+ * but rewind the PSW to re-enter SIE once that's completed
+ * instead of passing a "no action" intercept to the guest.
+ */
+ if (signal_pending(current) ||
kvm_s390_vcpu_has_irq(vcpu, 0) ||
- kvm_s390_vcpu_sie_inhibited(vcpu))
+ kvm_s390_vcpu_sie_inhibited(vcpu)) {
+ kvm_s390_rewind_psw(vcpu, 4);
break;
+ }
cond_resched();
}
@@ -1426,8 +1440,10 @@ int kvm_s390_handle_vsie(struct kvm_vcpu *vcpu)
return kvm_s390_inject_program_int(vcpu, PGM_SPECIFICATION);
if (signal_pending(current) || kvm_s390_vcpu_has_irq(vcpu, 0) ||
- kvm_s390_vcpu_sie_inhibited(vcpu))
+ kvm_s390_vcpu_sie_inhibited(vcpu)) {
+ kvm_s390_rewind_psw(vcpu, 4);
return 0;
+ }
vsie_page = get_vsie_page(vcpu->kvm, scb_addr);
if (IS_ERR(vsie_page))
diff --git a/arch/s390/lib/spinlock.c b/arch/s390/lib/spinlock.c
index 81c53440b3e6..9f86ad8fa8b4 100644
--- a/arch/s390/lib/spinlock.c
+++ b/arch/s390/lib/spinlock.c
@@ -75,7 +75,7 @@ static inline int arch_load_niai4(int *lock)
int owner;
asm_inline volatile(
- ALTERNATIVE("nop", ".insn rre,0xb2fa0000,4,0", 49) /* NIAI 4 */
+ ALTERNATIVE("nop", ".insn rre,0xb2fa0000,4,0", ALT_FACILITY(49)) /* NIAI 4 */
" l %0,%1\n"
: "=d" (owner) : "Q" (*lock) : "memory");
return owner;
@@ -86,7 +86,7 @@ static inline int arch_cmpxchg_niai8(int *lock, int old, int new)
int expected = old;
asm_inline volatile(
- ALTERNATIVE("nop", ".insn rre,0xb2fa0000,8,0", 49) /* NIAI 8 */
+ ALTERNATIVE("nop", ".insn rre,0xb2fa0000,8,0", ALT_FACILITY(49)) /* NIAI 8 */
" cs %0,%3,%1\n"
: "=d" (old), "=Q" (*lock)
: "0" (old), "d" (new), "Q" (*lock)
@@ -119,7 +119,7 @@ static inline void arch_spin_lock_queued(arch_spinlock_t *lp)
struct spin_wait *node, *next;
int lockval, ix, node_id, tail_id, old, new, owner, count;
- ix = S390_lowcore.spinlock_index++;
+ ix = get_lowcore()->spinlock_index++;
barrier();
lockval = SPINLOCK_LOCKVAL; /* cpu + 1 */
node = this_cpu_ptr(&spin_wait[ix]);
@@ -205,7 +205,7 @@ static inline void arch_spin_lock_queued(arch_spinlock_t *lp)
}
out:
- S390_lowcore.spinlock_index--;
+ get_lowcore()->spinlock_index--;
}
static inline void arch_spin_lock_classic(arch_spinlock_t *lp)
diff --git a/arch/s390/lib/test_kprobes.c b/arch/s390/lib/test_kprobes.c
index 9e62d62812e5..9021298c3e8a 100644
--- a/arch/s390/lib/test_kprobes.c
+++ b/arch/s390/lib/test_kprobes.c
@@ -72,4 +72,5 @@ static struct kunit_suite kprobes_test_suite = {
kunit_test_suites(&kprobes_test_suite);
+MODULE_DESCRIPTION("KUnit tests for kprobes");
MODULE_LICENSE("GPL");
diff --git a/arch/s390/lib/test_modules.c b/arch/s390/lib/test_modules.c
index 9894009fc1f2..f96b6a3737e7 100644
--- a/arch/s390/lib/test_modules.c
+++ b/arch/s390/lib/test_modules.c
@@ -29,4 +29,5 @@ static struct kunit_suite modules_test_suite = {
kunit_test_suites(&modules_test_suite);
+MODULE_DESCRIPTION("KUnit test that modules with many relocations are loaded properly");
MODULE_LICENSE("GPL");
diff --git a/arch/s390/lib/test_unwind.c b/arch/s390/lib/test_unwind.c
index 2848e3fb2ff5..8b7f981e6f34 100644
--- a/arch/s390/lib/test_unwind.c
+++ b/arch/s390/lib/test_unwind.c
@@ -356,7 +356,7 @@ static noinline int unwindme_func2(struct unwindme *u)
if (u->flags & UWM_SWITCH_STACK) {
local_irq_save(flags);
local_mcck_save(mflags);
- rc = call_on_stack(1, S390_lowcore.nodat_stack,
+ rc = call_on_stack(1, get_lowcore()->nodat_stack,
int, unwindme_func3, struct unwindme *, u);
local_mcck_restore(mflags);
local_irq_restore(flags);
@@ -519,4 +519,5 @@ static struct kunit_suite test_unwind_suite = {
kunit_test_suites(&test_unwind_suite);
+MODULE_DESCRIPTION("KUnit test for unwind_for_each_frame");
MODULE_LICENSE("GPL");
diff --git a/arch/s390/lib/uaccess.c b/arch/s390/lib/uaccess.c
index 61d8dcd95bbc..c7c269d5c491 100644
--- a/arch/s390/lib/uaccess.c
+++ b/arch/s390/lib/uaccess.c
@@ -21,13 +21,13 @@ void debug_user_asce(int exit)
local_ctl_store(1, &cr1);
local_ctl_store(7, &cr7);
- if (cr1.val == S390_lowcore.kernel_asce.val && cr7.val == S390_lowcore.user_asce.val)
+ if (cr1.val == get_lowcore()->kernel_asce.val && cr7.val == get_lowcore()->user_asce.val)
return;
panic("incorrect ASCE on kernel %s\n"
"cr1: %016lx cr7: %016lx\n"
"kernel: %016lx user: %016lx\n",
exit ? "exit" : "entry", cr1.val, cr7.val,
- S390_lowcore.kernel_asce.val, S390_lowcore.user_asce.val);
+ get_lowcore()->kernel_asce.val, get_lowcore()->user_asce.val);
}
#endif /*CONFIG_DEBUG_ENTRY */
diff --git a/arch/s390/mm/cmm.c b/arch/s390/mm/cmm.c
index f8b13f247646..75d15bf41d97 100644
--- a/arch/s390/mm/cmm.c
+++ b/arch/s390/mm/cmm.c
@@ -243,7 +243,7 @@ static int cmm_skip_blanks(char *cp, char **endp)
return str != cp;
}
-static int cmm_pages_handler(struct ctl_table *ctl, int write,
+static int cmm_pages_handler(const struct ctl_table *ctl, int write,
void *buffer, size_t *lenp, loff_t *ppos)
{
long nr = cmm_get_pages();
@@ -262,7 +262,7 @@ static int cmm_pages_handler(struct ctl_table *ctl, int write,
return 0;
}
-static int cmm_timed_pages_handler(struct ctl_table *ctl, int write,
+static int cmm_timed_pages_handler(const struct ctl_table *ctl, int write,
void *buffer, size_t *lenp,
loff_t *ppos)
{
@@ -282,7 +282,7 @@ static int cmm_timed_pages_handler(struct ctl_table *ctl, int write,
return 0;
}
-static int cmm_timeout_handler(struct ctl_table *ctl, int write,
+static int cmm_timeout_handler(const struct ctl_table *ctl, int write,
void *buffer, size_t *lenp, loff_t *ppos)
{
char buf[64], *p;
@@ -427,4 +427,5 @@ static void __exit cmm_exit(void)
}
module_exit(cmm_exit);
+MODULE_DESCRIPTION("Cooperative memory management interface");
MODULE_LICENSE("GPL");
diff --git a/arch/s390/mm/dump_pagetables.c b/arch/s390/mm/dump_pagetables.c
index ffd07ed7b4af..0a67fcee4414 100644
--- a/arch/s390/mm/dump_pagetables.c
+++ b/arch/s390/mm/dump_pagetables.c
@@ -3,6 +3,7 @@
#include <linux/ptdump.h>
#include <linux/seq_file.h>
#include <linux/debugfs.h>
+#include <linux/sort.h>
#include <linux/mm.h>
#include <linux/kfence.h>
#include <linux/kasan.h>
@@ -15,13 +16,15 @@
static unsigned long max_addr;
struct addr_marker {
+ int is_start;
unsigned long start_address;
const char *name;
};
enum address_markers_idx {
- IDENTITY_BEFORE_NR = 0,
- IDENTITY_BEFORE_END_NR,
+ KVA_NR = 0,
+ LOWCORE_START_NR,
+ LOWCORE_END_NR,
AMODE31_START_NR,
AMODE31_END_NR,
KERNEL_START_NR,
@@ -30,12 +33,22 @@ enum address_markers_idx {
KFENCE_START_NR,
KFENCE_END_NR,
#endif
- IDENTITY_AFTER_NR,
- IDENTITY_AFTER_END_NR,
+ IDENTITY_START_NR,
+ IDENTITY_END_NR,
VMEMMAP_NR,
VMEMMAP_END_NR,
VMALLOC_NR,
VMALLOC_END_NR,
+#ifdef CONFIG_KMSAN
+ KMSAN_VMALLOC_SHADOW_START_NR,
+ KMSAN_VMALLOC_SHADOW_END_NR,
+ KMSAN_VMALLOC_ORIGIN_START_NR,
+ KMSAN_VMALLOC_ORIGIN_END_NR,
+ KMSAN_MODULES_SHADOW_START_NR,
+ KMSAN_MODULES_SHADOW_END_NR,
+ KMSAN_MODULES_ORIGIN_START_NR,
+ KMSAN_MODULES_ORIGIN_END_NR,
+#endif
MODULES_NR,
MODULES_END_NR,
ABS_LOWCORE_NR,
@@ -49,33 +62,44 @@ enum address_markers_idx {
};
static struct addr_marker address_markers[] = {
- [IDENTITY_BEFORE_NR] = {0, "Identity Mapping Start"},
- [IDENTITY_BEFORE_END_NR] = {(unsigned long)_stext, "Identity Mapping End"},
- [AMODE31_START_NR] = {0, "Amode31 Area Start"},
- [AMODE31_END_NR] = {0, "Amode31 Area End"},
- [KERNEL_START_NR] = {(unsigned long)_stext, "Kernel Image Start"},
- [KERNEL_END_NR] = {(unsigned long)_end, "Kernel Image End"},
+ [KVA_NR] = {0, 0, "Kernel Virtual Address Space"},
+ [LOWCORE_START_NR] = {1, 0, "Lowcore Start"},
+ [LOWCORE_END_NR] = {0, 0, "Lowcore End"},
+ [IDENTITY_START_NR] = {1, 0, "Identity Mapping Start"},
+ [IDENTITY_END_NR] = {0, 0, "Identity Mapping End"},
+ [AMODE31_START_NR] = {1, 0, "Amode31 Area Start"},
+ [AMODE31_END_NR] = {0, 0, "Amode31 Area End"},
+ [KERNEL_START_NR] = {1, (unsigned long)_stext, "Kernel Image Start"},
+ [KERNEL_END_NR] = {0, (unsigned long)_end, "Kernel Image End"},
#ifdef CONFIG_KFENCE
- [KFENCE_START_NR] = {0, "KFence Pool Start"},
- [KFENCE_END_NR] = {0, "KFence Pool End"},
+ [KFENCE_START_NR] = {1, 0, "KFence Pool Start"},
+ [KFENCE_END_NR] = {0, 0, "KFence Pool End"},
+#endif
+ [VMEMMAP_NR] = {1, 0, "vmemmap Area Start"},
+ [VMEMMAP_END_NR] = {0, 0, "vmemmap Area End"},
+ [VMALLOC_NR] = {1, 0, "vmalloc Area Start"},
+ [VMALLOC_END_NR] = {0, 0, "vmalloc Area End"},
+#ifdef CONFIG_KMSAN
+ [KMSAN_VMALLOC_SHADOW_START_NR] = {1, 0, "Kmsan vmalloc Shadow Start"},
+ [KMSAN_VMALLOC_SHADOW_END_NR] = {0, 0, "Kmsan vmalloc Shadow End"},
+ [KMSAN_VMALLOC_ORIGIN_START_NR] = {1, 0, "Kmsan vmalloc Origins Start"},
+ [KMSAN_VMALLOC_ORIGIN_END_NR] = {0, 0, "Kmsan vmalloc Origins End"},
+ [KMSAN_MODULES_SHADOW_START_NR] = {1, 0, "Kmsan Modules Shadow Start"},
+ [KMSAN_MODULES_SHADOW_END_NR] = {0, 0, "Kmsan Modules Shadow End"},
+ [KMSAN_MODULES_ORIGIN_START_NR] = {1, 0, "Kmsan Modules Origins Start"},
+ [KMSAN_MODULES_ORIGIN_END_NR] = {0, 0, "Kmsan Modules Origins End"},
#endif
- [IDENTITY_AFTER_NR] = {(unsigned long)_end, "Identity Mapping Start"},
- [IDENTITY_AFTER_END_NR] = {0, "Identity Mapping End"},
- [VMEMMAP_NR] = {0, "vmemmap Area Start"},
- [VMEMMAP_END_NR] = {0, "vmemmap Area End"},
- [VMALLOC_NR] = {0, "vmalloc Area Start"},
- [VMALLOC_END_NR] = {0, "vmalloc Area End"},
- [MODULES_NR] = {0, "Modules Area Start"},
- [MODULES_END_NR] = {0, "Modules Area End"},
- [ABS_LOWCORE_NR] = {0, "Lowcore Area Start"},
- [ABS_LOWCORE_END_NR] = {0, "Lowcore Area End"},
- [MEMCPY_REAL_NR] = {0, "Real Memory Copy Area Start"},
- [MEMCPY_REAL_END_NR] = {0, "Real Memory Copy Area End"},
+ [MODULES_NR] = {1, 0, "Modules Area Start"},
+ [MODULES_END_NR] = {0, 0, "Modules Area End"},
+ [ABS_LOWCORE_NR] = {1, 0, "Lowcore Area Start"},
+ [ABS_LOWCORE_END_NR] = {0, 0, "Lowcore Area End"},
+ [MEMCPY_REAL_NR] = {1, 0, "Real Memory Copy Area Start"},
+ [MEMCPY_REAL_END_NR] = {0, 0, "Real Memory Copy Area End"},
#ifdef CONFIG_KASAN
- [KASAN_SHADOW_START_NR] = {KASAN_SHADOW_START, "Kasan Shadow Start"},
- [KASAN_SHADOW_END_NR] = {KASAN_SHADOW_END, "Kasan Shadow End"},
+ [KASAN_SHADOW_START_NR] = {1, KASAN_SHADOW_START, "Kasan Shadow Start"},
+ [KASAN_SHADOW_END_NR] = {0, KASAN_SHADOW_END, "Kasan Shadow End"},
#endif
- { -1, NULL }
+ {1, -1UL, NULL}
};
struct pg_state {
@@ -143,6 +167,19 @@ static void note_prot_wx(struct pg_state *st, unsigned long addr)
st->wx_pages += (addr - st->start_address) / PAGE_SIZE;
}
+static void note_page_update_state(struct pg_state *st, unsigned long addr, unsigned int prot, int level)
+{
+ struct seq_file *m = st->seq;
+
+ while (addr >= st->marker[1].start_address) {
+ st->marker++;
+ pt_dump_seq_printf(m, "---[ %s ]---\n", st->marker->name);
+ }
+ st->start_address = addr;
+ st->current_prot = prot;
+ st->level = level;
+}
+
static void note_page(struct ptdump_state *pt_st, unsigned long addr, int level, u64 val)
{
int width = sizeof(unsigned long) * 2;
@@ -166,9 +203,7 @@ static void note_page(struct ptdump_state *pt_st, unsigned long addr, int level,
addr = max_addr;
if (st->level == -1) {
pt_dump_seq_printf(m, "---[ %s ]---\n", st->marker->name);
- st->start_address = addr;
- st->current_prot = prot;
- st->level = level;
+ note_page_update_state(st, addr, prot, level);
} else if (prot != st->current_prot || level != st->level ||
addr >= st->marker[1].start_address) {
note_prot_wx(st, addr);
@@ -182,13 +217,7 @@ static void note_page(struct ptdump_state *pt_st, unsigned long addr, int level,
}
pt_dump_seq_printf(m, "%9lu%c ", delta, *unit);
print_prot(m, st->current_prot, st->level);
- while (addr >= st->marker[1].start_address) {
- st->marker++;
- pt_dump_seq_printf(m, "---[ %s ]---\n", st->marker->name);
- }
- st->start_address = addr;
- st->current_prot = prot;
- st->level = level;
+ note_page_update_state(st, addr, prot, level);
}
}
@@ -260,22 +289,25 @@ static int ptdump_show(struct seq_file *m, void *v)
DEFINE_SHOW_ATTRIBUTE(ptdump);
#endif /* CONFIG_PTDUMP_DEBUGFS */
-/*
- * Heapsort from lib/sort.c is not a stable sorting algorithm, do a simple
- * insertion sort to preserve the original order of markers with the same
- * start address.
- */
-static void sort_address_markers(void)
+static int ptdump_cmp(const void *a, const void *b)
{
- struct addr_marker tmp;
- int i, j;
+ const struct addr_marker *ama = a;
+ const struct addr_marker *amb = b;
- for (i = 1; i < ARRAY_SIZE(address_markers) - 1; i++) {
- tmp = address_markers[i];
- for (j = i - 1; j >= 0 && address_markers[j].start_address > tmp.start_address; j--)
- address_markers[j + 1] = address_markers[j];
- address_markers[j + 1] = tmp;
- }
+ if (ama->start_address > amb->start_address)
+ return 1;
+ if (ama->start_address < amb->start_address)
+ return -1;
+ /*
+ * If the start addresses of two markers are identical consider the
+ * marker which defines the start of an area higher than the one which
+ * defines the end of an area. This keeps pairs of markers sorted.
+ */
+ if (ama->is_start)
+ return 1;
+ if (amb->is_start)
+ return -1;
+ return 0;
}
static int pt_dump_init(void)
@@ -283,14 +315,19 @@ static int pt_dump_init(void)
#ifdef CONFIG_KFENCE
unsigned long kfence_start = (unsigned long)__kfence_pool;
#endif
+ unsigned long lowcore = (unsigned long)get_lowcore();
+
/*
* Figure out the maximum virtual address being accessible with the
* kernel ASCE. We need this to keep the page table walker functions
* from accessing non-existent entries.
*/
- max_addr = (S390_lowcore.kernel_asce.val & _REGION_ENTRY_TYPE_MASK) >> 2;
+ max_addr = (get_lowcore()->kernel_asce.val & _REGION_ENTRY_TYPE_MASK) >> 2;
max_addr = 1UL << (max_addr * 11 + 31);
- address_markers[IDENTITY_AFTER_END_NR].start_address = ident_map_size;
+ address_markers[LOWCORE_START_NR].start_address = lowcore;
+ address_markers[LOWCORE_END_NR].start_address = lowcore + sizeof(struct lowcore);
+ address_markers[IDENTITY_START_NR].start_address = __identity_base;
+ address_markers[IDENTITY_END_NR].start_address = __identity_base + ident_map_size;
address_markers[AMODE31_START_NR].start_address = (unsigned long)__samode31;
address_markers[AMODE31_END_NR].start_address = (unsigned long)__eamode31;
address_markers[MODULES_NR].start_address = MODULES_VADDR;
@@ -307,7 +344,18 @@ static int pt_dump_init(void)
address_markers[KFENCE_START_NR].start_address = kfence_start;
address_markers[KFENCE_END_NR].start_address = kfence_start + KFENCE_POOL_SIZE;
#endif
- sort_address_markers();
+#ifdef CONFIG_KMSAN
+ address_markers[KMSAN_VMALLOC_SHADOW_START_NR].start_address = KMSAN_VMALLOC_SHADOW_START;
+ address_markers[KMSAN_VMALLOC_SHADOW_END_NR].start_address = KMSAN_VMALLOC_SHADOW_END;
+ address_markers[KMSAN_VMALLOC_ORIGIN_START_NR].start_address = KMSAN_VMALLOC_ORIGIN_START;
+ address_markers[KMSAN_VMALLOC_ORIGIN_END_NR].start_address = KMSAN_VMALLOC_ORIGIN_END;
+ address_markers[KMSAN_MODULES_SHADOW_START_NR].start_address = KMSAN_MODULES_SHADOW_START;
+ address_markers[KMSAN_MODULES_SHADOW_END_NR].start_address = KMSAN_MODULES_SHADOW_END;
+ address_markers[KMSAN_MODULES_ORIGIN_START_NR].start_address = KMSAN_MODULES_ORIGIN_START;
+ address_markers[KMSAN_MODULES_ORIGIN_END_NR].start_address = KMSAN_MODULES_ORIGIN_END;
+#endif
+ sort(address_markers, ARRAY_SIZE(address_markers) - 1,
+ sizeof(address_markers[0]), ptdump_cmp, NULL);
#ifdef CONFIG_PTDUMP_DEBUGFS
debugfs_create_file("kernel_page_tables", 0400, NULL, NULL, &ptdump_fops);
#endif /* CONFIG_PTDUMP_DEBUGFS */
diff --git a/arch/s390/mm/fault.c b/arch/s390/mm/fault.c
index 65747f15dbec..8e149ef5e89b 100644
--- a/arch/s390/mm/fault.c
+++ b/arch/s390/mm/fault.c
@@ -74,7 +74,7 @@ static enum fault_type get_fault_type(struct pt_regs *regs)
return USER_FAULT;
if (!IS_ENABLED(CONFIG_PGSTE))
return KERNEL_FAULT;
- gmap = (struct gmap *)S390_lowcore.gmap;
+ gmap = (struct gmap *)get_lowcore()->gmap;
if (gmap && gmap->asce == regs->cr1)
return GMAP_FAULT;
return KERNEL_FAULT;
@@ -182,15 +182,15 @@ static void dump_fault_info(struct pt_regs *regs)
pr_cont("mode while using ");
switch (get_fault_type(regs)) {
case USER_FAULT:
- asce = S390_lowcore.user_asce.val;
+ asce = get_lowcore()->user_asce.val;
pr_cont("user ");
break;
case GMAP_FAULT:
- asce = ((struct gmap *)S390_lowcore.gmap)->asce;
+ asce = ((struct gmap *)get_lowcore()->gmap)->asce;
pr_cont("gmap ");
break;
case KERNEL_FAULT:
- asce = S390_lowcore.kernel_asce.val;
+ asce = get_lowcore()->kernel_asce.val;
pr_cont("kernel ");
break;
default:
@@ -351,7 +351,7 @@ lock_mmap:
mmap_read_lock(mm);
gmap = NULL;
if (IS_ENABLED(CONFIG_PGSTE) && type == GMAP_FAULT) {
- gmap = (struct gmap *)S390_lowcore.gmap;
+ gmap = (struct gmap *)get_lowcore()->gmap;
current->thread.gmap_addr = address;
current->thread.gmap_write_flag = !!(flags & FAULT_FLAG_WRITE);
current->thread.gmap_int_code = regs->int_code & 0xffff;
@@ -433,12 +433,13 @@ error:
handle_fault_error_nolock(regs, 0);
else
do_sigsegv(regs, SEGV_MAPERR);
- } else if (fault & VM_FAULT_SIGBUS) {
+ } else if (fault & (VM_FAULT_SIGBUS | VM_FAULT_HWPOISON)) {
if (!user_mode(regs))
handle_fault_error_nolock(regs, 0);
else
do_sigbus(regs);
} else {
+ pr_emerg("Unexpected fault flags: %08x\n", fault);
BUG();
}
}
@@ -492,6 +493,7 @@ void do_secure_storage_access(struct pt_regs *regs)
unsigned long addr = get_fault_address(regs);
struct vm_area_struct *vma;
struct mm_struct *mm;
+ struct folio *folio;
struct page *page;
struct gmap *gmap;
int rc;
@@ -521,7 +523,7 @@ void do_secure_storage_access(struct pt_regs *regs)
switch (get_fault_type(regs)) {
case GMAP_FAULT:
mm = current->mm;
- gmap = (struct gmap *)S390_lowcore.gmap;
+ gmap = (struct gmap *)get_lowcore()->gmap;
mmap_read_lock(mm);
addr = __gmap_translate(gmap, addr);
mmap_read_unlock(mm);
@@ -539,17 +541,18 @@ void do_secure_storage_access(struct pt_regs *regs)
mmap_read_unlock(mm);
break;
}
- if (arch_make_page_accessible(page))
+ folio = page_folio(page);
+ if (arch_make_folio_accessible(folio))
send_sig(SIGSEGV, current, 0);
- put_page(page);
+ folio_put(folio);
mmap_read_unlock(mm);
break;
case KERNEL_FAULT:
- page = phys_to_page(addr);
- if (unlikely(!try_get_page(page)))
+ folio = phys_to_folio(addr);
+ if (unlikely(!folio_try_get(folio)))
break;
- rc = arch_make_page_accessible(page);
- put_page(page);
+ rc = arch_make_folio_accessible(folio);
+ folio_put(folio);
if (rc)
BUG();
break;
@@ -561,7 +564,7 @@ NOKPROBE_SYMBOL(do_secure_storage_access);
void do_non_secure_storage_access(struct pt_regs *regs)
{
- struct gmap *gmap = (struct gmap *)S390_lowcore.gmap;
+ struct gmap *gmap = (struct gmap *)get_lowcore()->gmap;
unsigned long gaddr = get_fault_address(regs);
if (WARN_ON_ONCE(get_fault_type(regs) != GMAP_FAULT))
@@ -573,7 +576,7 @@ NOKPROBE_SYMBOL(do_non_secure_storage_access);
void do_secure_storage_violation(struct pt_regs *regs)
{
- struct gmap *gmap = (struct gmap *)S390_lowcore.gmap;
+ struct gmap *gmap = (struct gmap *)get_lowcore()->gmap;
unsigned long gaddr = get_fault_address(regs);
/*
diff --git a/arch/s390/mm/gmap.c b/arch/s390/mm/gmap.c
index 474a25ca5c48..eb0b51a36be0 100644
--- a/arch/s390/mm/gmap.c
+++ b/arch/s390/mm/gmap.c
@@ -287,7 +287,7 @@ EXPORT_SYMBOL_GPL(gmap_remove);
*/
void gmap_enable(struct gmap *gmap)
{
- S390_lowcore.gmap = (unsigned long) gmap;
+ get_lowcore()->gmap = (unsigned long)gmap;
}
EXPORT_SYMBOL_GPL(gmap_enable);
@@ -297,7 +297,7 @@ EXPORT_SYMBOL_GPL(gmap_enable);
*/
void gmap_disable(struct gmap *gmap)
{
- S390_lowcore.gmap = 0UL;
+ get_lowcore()->gmap = 0UL;
}
EXPORT_SYMBOL_GPL(gmap_disable);
@@ -308,7 +308,7 @@ EXPORT_SYMBOL_GPL(gmap_disable);
*/
struct gmap *gmap_get_enabled(void)
{
- return (struct gmap *) S390_lowcore.gmap;
+ return (struct gmap *)get_lowcore()->gmap;
}
EXPORT_SYMBOL_GPL(gmap_get_enabled);
@@ -2733,7 +2733,7 @@ static int __s390_enable_skey_hugetlb(pte_t *pte, unsigned long addr,
{
pmd_t *pmd = (pmd_t *)pte;
unsigned long start, end;
- struct page *page = pmd_page(*pmd);
+ struct folio *folio = page_folio(pmd_page(*pmd));
/*
* The write check makes sure we do not set a key on shared
@@ -2748,7 +2748,7 @@ static int __s390_enable_skey_hugetlb(pte_t *pte, unsigned long addr,
start = pmd_val(*pmd) & HPAGE_MASK;
end = start + HPAGE_SIZE;
__storage_key_init_range(start, end);
- set_bit(PG_arch_1, &page->flags);
+ set_bit(PG_arch_1, &folio->flags);
cond_resched();
return 0;
}
@@ -2841,13 +2841,15 @@ static const struct mm_walk_ops gather_pages_ops = {
*/
void s390_uv_destroy_pfns(unsigned long count, unsigned long *pfns)
{
+ struct folio *folio;
unsigned long i;
for (i = 0; i < count; i++) {
+ folio = pfn_folio(pfns[i]);
/* we always have an extra reference */
- uv_destroy_owned_page(pfn_to_phys(pfns[i]));
+ uv_destroy_folio(folio);
/* get rid of the extra reference */
- put_page(pfn_to_page(pfns[i]));
+ folio_put(folio);
cond_resched();
}
}
diff --git a/arch/s390/mm/hugetlbpage.c b/arch/s390/mm/hugetlbpage.c
index 2675aab4acc7..ded0eff58a19 100644
--- a/arch/s390/mm/hugetlbpage.c
+++ b/arch/s390/mm/hugetlbpage.c
@@ -121,7 +121,7 @@ static inline pte_t __rste_to_pte(unsigned long rste)
static void clear_huge_pte_skeys(struct mm_struct *mm, unsigned long rste)
{
- struct page *page;
+ struct folio *folio;
unsigned long size, paddr;
if (!mm_uses_skeys(mm) ||
@@ -129,16 +129,16 @@ static void clear_huge_pte_skeys(struct mm_struct *mm, unsigned long rste)
return;
if ((rste & _REGION_ENTRY_TYPE_MASK) == _REGION_ENTRY_TYPE_R3) {
- page = pud_page(__pud(rste));
+ folio = page_folio(pud_page(__pud(rste)));
size = PUD_SIZE;
paddr = rste & PUD_MASK;
} else {
- page = pmd_page(__pmd(rste));
+ folio = page_folio(pmd_page(__pmd(rste)));
size = PMD_SIZE;
paddr = rste & PMD_MASK;
}
- if (!test_and_set_bit(PG_arch_1, &page->flags))
+ if (!test_and_set_bit(PG_arch_1, &folio->flags))
__storage_key_init_range(paddr, paddr + size);
}
@@ -169,7 +169,7 @@ void set_huge_pte_at(struct mm_struct *mm, unsigned long addr,
__set_huge_pte_at(mm, addr, ptep, pte);
}
-pte_t huge_ptep_get(pte_t *ptep)
+pte_t huge_ptep_get(struct mm_struct *mm, unsigned long addr, pte_t *ptep)
{
return __rste_to_pte(pte_val(*ptep));
}
@@ -177,7 +177,7 @@ pte_t huge_ptep_get(pte_t *ptep)
pte_t huge_ptep_get_and_clear(struct mm_struct *mm,
unsigned long addr, pte_t *ptep)
{
- pte_t pte = huge_ptep_get(ptep);
+ pte_t pte = huge_ptep_get(mm, addr, ptep);
pmd_t *pmdp = (pmd_t *) ptep;
pud_t *pudp = (pud_t *) ptep;
diff --git a/arch/s390/mm/init.c b/arch/s390/mm/init.c
index e769d2726f4e..e3d258f9e726 100644
--- a/arch/s390/mm/init.c
+++ b/arch/s390/mm/init.c
@@ -62,6 +62,7 @@ EXPORT_SYMBOL(zero_page_mask);
static void __init setup_zero_pages(void)
{
+ unsigned long total_pages = PHYS_PFN(memblock_phys_mem_size() - memblock_reserved_size());
unsigned int order;
struct page *page;
int i;
@@ -70,7 +71,7 @@ static void __init setup_zero_pages(void)
order = 7;
/* Limit number of empty zero pages for small memory sizes */
- while (order > 2 && (totalram_pages() >> 10) < (1UL << order))
+ while (order > 2 && (total_pages >> 10) < (1UL << order))
order--;
empty_zero_page = __get_free_pages(GFP_KERNEL | __GFP_ZERO, order);
@@ -107,6 +108,8 @@ void mark_rodata_ro(void)
{
unsigned long size = __end_ro_after_init - __start_ro_after_init;
+ if (MACHINE_HAS_NX)
+ system_ctl_set_bit(0, CR0_INSTRUCTION_EXEC_PROTECTION_BIT);
__set_memory_ro(__start_ro_after_init, __end_ro_after_init);
pr_info("Write protected read-only-after-init data: %luk\n", size >> 10);
}
@@ -169,13 +172,6 @@ void __init mem_init(void)
setup_zero_pages(); /* Setup zeroed pages. */
}
-void free_initmem(void)
-{
- set_memory_rwnx((unsigned long)_sinittext,
- (unsigned long)(_einittext - _sinittext) >> PAGE_SHIFT);
- free_initmem_default(POISON_FREE_INITMEM);
-}
-
unsigned long memory_block_size_bytes(void)
{
/*
diff --git a/arch/s390/mm/maccess.c b/arch/s390/mm/maccess.c
index 632c3a55feed..28a18c42ba99 100644
--- a/arch/s390/mm/maccess.c
+++ b/arch/s390/mm/maccess.c
@@ -48,7 +48,7 @@ static notrace long s390_kernel_write_odd(void *dst, const void *src, size_t siz
}
/*
- * s390_kernel_write - write to kernel memory bypassing DAT
+ * __s390_kernel_write - write to kernel memory bypassing DAT
* @dst: destination address
* @src: source address
* @size: number of bytes to copy
@@ -61,7 +61,7 @@ static notrace long s390_kernel_write_odd(void *dst, const void *src, size_t siz
*/
static DEFINE_SPINLOCK(s390_kernel_write_lock);
-notrace void *s390_kernel_write(void *dst, const void *src, size_t size)
+notrace void *__s390_kernel_write(void *dst, const void *src, size_t size)
{
void *tmp = dst;
unsigned long flags;
diff --git a/arch/s390/mm/pageattr.c b/arch/s390/mm/pageattr.c
index 01bc8fad64d6..5f805ad42d4c 100644
--- a/arch/s390/mm/pageattr.c
+++ b/arch/s390/mm/pageattr.c
@@ -75,7 +75,7 @@ static void pgt_set(unsigned long *old, unsigned long new, unsigned long addr,
break;
}
table = (unsigned long *)((unsigned long)old & mask);
- crdte(*old, new, table, dtt, addr, S390_lowcore.kernel_asce.val);
+ crdte(*old, new, table, dtt, addr, get_lowcore()->kernel_asce.val);
} else if (MACHINE_HAS_IDTE) {
cspg(old, *old, new);
} else {
diff --git a/arch/s390/mm/pgalloc.c b/arch/s390/mm/pgalloc.c
index abb629d7e131..f691e0fb66a2 100644
--- a/arch/s390/mm/pgalloc.c
+++ b/arch/s390/mm/pgalloc.c
@@ -55,6 +55,8 @@ unsigned long *crst_table_alloc(struct mm_struct *mm)
void crst_table_free(struct mm_struct *mm, unsigned long *table)
{
+ if (!table)
+ return;
pagetable_free(virt_to_ptdesc(table));
}
@@ -64,8 +66,8 @@ static void __crst_table_upgrade(void *arg)
/* change all active ASCEs to avoid the creation of new TLBs */
if (current->active_mm == mm) {
- S390_lowcore.user_asce.val = mm->context.asce;
- local_ctl_load(7, &S390_lowcore.user_asce);
+ get_lowcore()->user_asce.val = mm->context.asce;
+ local_ctl_load(7, &get_lowcore()->user_asce);
}
__tlb_flush_local();
}
@@ -262,6 +264,8 @@ static unsigned long *base_crst_alloc(unsigned long val)
static void base_crst_free(unsigned long *table)
{
+ if (!table)
+ return;
pagetable_free(virt_to_ptdesc(table));
}
diff --git a/arch/s390/mm/vmem.c b/arch/s390/mm/vmem.c
index 41c714e21292..665b8228afeb 100644
--- a/arch/s390/mm/vmem.c
+++ b/arch/s390/mm/vmem.c
@@ -661,7 +661,6 @@ void __init vmem_map_init(void)
{
__set_memory_rox(_stext, _etext);
__set_memory_ro(_etext, __end_rodata);
- __set_memory_rox(_sinittext, _einittext);
__set_memory_rox(__stext_amode31, __etext_amode31);
/*
* If the BEAR-enhancement facility is not installed the first
@@ -670,16 +669,8 @@ void __init vmem_map_init(void)
*/
if (!static_key_enabled(&cpu_has_bear))
set_memory_x(0, 1);
- if (debug_pagealloc_enabled()) {
- /*
- * Use RELOC_HIDE() as long as __va(0) translates to NULL,
- * since performing pointer arithmetic on a NULL pointer
- * has undefined behavior and generates compiler warnings.
- */
- __set_memory_4k(__va(0), RELOC_HIDE(__va(0), ident_map_size));
- }
- if (MACHINE_HAS_NX)
- system_ctl_set_bit(0, CR0_INSTRUCTION_EXEC_PROTECTION_BIT);
+ if (debug_pagealloc_enabled())
+ __set_memory_4k(__va(0), __va(0) + ident_map_size);
pr_info("Write protected kernel read-only data: %luk\n",
(unsigned long)(__end_rodata - _stext) >> 10);
}
diff --git a/arch/s390/net/bpf_jit_comp.c b/arch/s390/net/bpf_jit_comp.c
index 4be8f5cadd02..9d440a0b729e 100644
--- a/arch/s390/net/bpf_jit_comp.c
+++ b/arch/s390/net/bpf_jit_comp.c
@@ -31,11 +31,12 @@
#include <asm/nospec-branch.h>
#include <asm/set_memory.h>
#include <asm/text-patching.h>
+#include <asm/unwind.h>
#include "bpf_jit.h"
struct bpf_jit {
u32 seen; /* Flags to remember seen eBPF instructions */
- u32 seen_reg[16]; /* Array to remember which registers are used */
+ u16 seen_regs; /* Mask to remember which registers are used */
u32 *addrs; /* Array with relative instruction addresses */
u8 *prg_buf; /* Start of program */
int size; /* Size of program and literal pool */
@@ -53,6 +54,8 @@ struct bpf_jit {
int excnt; /* Number of exception table entries */
int prologue_plt_ret; /* Return address for prologue hotpatch PLT */
int prologue_plt; /* Start of prologue hotpatch PLT */
+ int kern_arena; /* Pool offset of kernel arena address */
+ u64 user_arena; /* User arena address */
};
#define SEEN_MEM BIT(0) /* use mem[] for temporary storage */
@@ -60,6 +63,8 @@ struct bpf_jit {
#define SEEN_FUNC BIT(2) /* calls C functions */
#define SEEN_STACK (SEEN_FUNC | SEEN_MEM)
+#define NVREGS 0xffc0 /* %r6-%r15 */
+
/*
* s390 registers
*/
@@ -118,8 +123,8 @@ static inline void reg_set_seen(struct bpf_jit *jit, u32 b1)
{
u32 r1 = reg2hex[b1];
- if (r1 >= 6 && r1 <= 15 && !jit->seen_reg[r1])
- jit->seen_reg[r1] = 1;
+ if (r1 >= 6 && r1 <= 15)
+ jit->seen_regs |= (1 << r1);
}
#define REG_SET_SEEN(b1) \
@@ -127,8 +132,6 @@ static inline void reg_set_seen(struct bpf_jit *jit, u32 b1)
reg_set_seen(jit, b1); \
})
-#define REG_SEEN(b1) jit->seen_reg[reg2hex[(b1)]]
-
/*
* EMIT macros for code generation
*/
@@ -436,12 +439,12 @@ static void restore_regs(struct bpf_jit *jit, u32 rs, u32 re, u32 stack_depth)
/*
* Return first seen register (from start)
*/
-static int get_start(struct bpf_jit *jit, int start)
+static int get_start(u16 seen_regs, int start)
{
int i;
for (i = start; i <= 15; i++) {
- if (jit->seen_reg[i])
+ if (seen_regs & (1 << i))
return i;
}
return 0;
@@ -450,15 +453,15 @@ static int get_start(struct bpf_jit *jit, int start)
/*
* Return last seen register (from start) (gap >= 2)
*/
-static int get_end(struct bpf_jit *jit, int start)
+static int get_end(u16 seen_regs, int start)
{
int i;
for (i = start; i < 15; i++) {
- if (!jit->seen_reg[i] && !jit->seen_reg[i + 1])
+ if (!(seen_regs & (3 << i)))
return i - 1;
}
- return jit->seen_reg[15] ? 15 : 14;
+ return (seen_regs & (1 << 15)) ? 15 : 14;
}
#define REGS_SAVE 1
@@ -467,8 +470,10 @@ static int get_end(struct bpf_jit *jit, int start)
* Save and restore clobbered registers (6-15) on stack.
* We save/restore registers in chunks with gap >= 2 registers.
*/
-static void save_restore_regs(struct bpf_jit *jit, int op, u32 stack_depth)
+static void save_restore_regs(struct bpf_jit *jit, int op, u32 stack_depth,
+ u16 extra_regs)
{
+ u16 seen_regs = jit->seen_regs | extra_regs;
const int last = 15, save_restore_size = 6;
int re = 6, rs;
@@ -482,10 +487,10 @@ static void save_restore_regs(struct bpf_jit *jit, int op, u32 stack_depth)
}
do {
- rs = get_start(jit, re);
+ rs = get_start(seen_regs, re);
if (!rs)
break;
- re = get_end(jit, rs + 1);
+ re = get_end(seen_regs, rs + 1);
if (op == REGS_SAVE)
save_regs(jit, rs, re);
else
@@ -570,8 +575,21 @@ static void bpf_jit_prologue(struct bpf_jit *jit, struct bpf_prog *fp,
}
/* Tail calls have to skip above initialization */
jit->tail_call_start = jit->prg;
- /* Save registers */
- save_restore_regs(jit, REGS_SAVE, stack_depth);
+ if (fp->aux->exception_cb) {
+ /*
+ * Switch stack, the new address is in the 2nd parameter.
+ *
+ * Arrange the restoration of %r6-%r15 in the epilogue.
+ * Do not restore them now, the prog does not need them.
+ */
+ /* lgr %r15,%r3 */
+ EMIT4(0xb9040000, REG_15, REG_3);
+ jit->seen_regs |= NVREGS;
+ } else {
+ /* Save registers */
+ save_restore_regs(jit, REGS_SAVE, stack_depth,
+ fp->aux->exception_boundary ? NVREGS : 0);
+ }
/* Setup literal pool */
if (is_first_pass(jit) || (jit->seen & SEEN_LITERAL)) {
if (!is_first_pass(jit) &&
@@ -647,7 +665,7 @@ static void bpf_jit_epilogue(struct bpf_jit *jit, u32 stack_depth)
/* Load exit code: lgr %r2,%b0 */
EMIT4(0xb9040000, REG_2, BPF_REG_0);
/* Restore registers */
- save_restore_regs(jit, REGS_RESTORE, stack_depth);
+ save_restore_regs(jit, REGS_RESTORE, stack_depth, 0);
if (nospec_uses_trampoline()) {
jit->r14_thunk_ip = jit->prg;
/* Generate __s390_indirect_jump_r14 thunk */
@@ -667,50 +685,111 @@ static void bpf_jit_epilogue(struct bpf_jit *jit, u32 stack_depth)
jit->prg += sizeof(struct bpf_plt);
}
-static int get_probe_mem_regno(const u8 *insn)
-{
- /*
- * insn must point to llgc, llgh, llgf, lg, lgb, lgh or lgf, which have
- * destination register at the same position.
- */
- if (insn[0] != 0xe3) /* common prefix */
- return -1;
- if (insn[5] != 0x90 && /* llgc */
- insn[5] != 0x91 && /* llgh */
- insn[5] != 0x16 && /* llgf */
- insn[5] != 0x04 && /* lg */
- insn[5] != 0x77 && /* lgb */
- insn[5] != 0x15 && /* lgh */
- insn[5] != 0x14) /* lgf */
- return -1;
- return insn[1] >> 4;
-}
-
bool ex_handler_bpf(const struct exception_table_entry *x, struct pt_regs *regs)
{
regs->psw.addr = extable_fixup(x);
- regs->gprs[x->data] = 0;
+ if (x->data != -1)
+ regs->gprs[x->data] = 0;
return true;
}
-static int bpf_jit_probe_mem(struct bpf_jit *jit, struct bpf_prog *fp,
- int probe_prg, int nop_prg)
+/*
+ * A single BPF probe instruction
+ */
+struct bpf_jit_probe {
+ int prg; /* JITed instruction offset */
+ int nop_prg; /* JITed nop offset */
+ int reg; /* Register to clear on exception */
+ int arena_reg; /* Register to use for arena addressing */
+};
+
+static void bpf_jit_probe_init(struct bpf_jit_probe *probe)
+{
+ probe->prg = -1;
+ probe->nop_prg = -1;
+ probe->reg = -1;
+ probe->arena_reg = REG_0;
+}
+
+/*
+ * Handlers of certain exceptions leave psw.addr pointing to the instruction
+ * directly after the failing one. Therefore, create two exception table
+ * entries and also add a nop in case two probing instructions come directly
+ * after each other.
+ */
+static void bpf_jit_probe_emit_nop(struct bpf_jit *jit,
+ struct bpf_jit_probe *probe)
+{
+ if (probe->prg == -1 || probe->nop_prg != -1)
+ /* The probe is not armed or nop is already emitted. */
+ return;
+
+ probe->nop_prg = jit->prg;
+ /* bcr 0,%0 */
+ _EMIT2(0x0700);
+}
+
+static void bpf_jit_probe_load_pre(struct bpf_jit *jit, struct bpf_insn *insn,
+ struct bpf_jit_probe *probe)
+{
+ if (BPF_MODE(insn->code) != BPF_PROBE_MEM &&
+ BPF_MODE(insn->code) != BPF_PROBE_MEMSX &&
+ BPF_MODE(insn->code) != BPF_PROBE_MEM32)
+ return;
+
+ if (BPF_MODE(insn->code) == BPF_PROBE_MEM32) {
+ /* lgrl %r1,kern_arena */
+ EMIT6_PCREL_RILB(0xc4080000, REG_W1, jit->kern_arena);
+ probe->arena_reg = REG_W1;
+ }
+ probe->prg = jit->prg;
+ probe->reg = reg2hex[insn->dst_reg];
+}
+
+static void bpf_jit_probe_store_pre(struct bpf_jit *jit, struct bpf_insn *insn,
+ struct bpf_jit_probe *probe)
+{
+ if (BPF_MODE(insn->code) != BPF_PROBE_MEM32)
+ return;
+
+ /* lgrl %r1,kern_arena */
+ EMIT6_PCREL_RILB(0xc4080000, REG_W1, jit->kern_arena);
+ probe->arena_reg = REG_W1;
+ probe->prg = jit->prg;
+}
+
+static void bpf_jit_probe_atomic_pre(struct bpf_jit *jit,
+ struct bpf_insn *insn,
+ struct bpf_jit_probe *probe)
+{
+ if (BPF_MODE(insn->code) != BPF_PROBE_ATOMIC)
+ return;
+
+ /* lgrl %r1,kern_arena */
+ EMIT6_PCREL_RILB(0xc4080000, REG_W1, jit->kern_arena);
+ /* agr %r1,%dst */
+ EMIT4(0xb9080000, REG_W1, insn->dst_reg);
+ probe->arena_reg = REG_W1;
+ probe->prg = jit->prg;
+}
+
+static int bpf_jit_probe_post(struct bpf_jit *jit, struct bpf_prog *fp,
+ struct bpf_jit_probe *probe)
{
struct exception_table_entry *ex;
- int reg, prg;
+ int i, prg;
s64 delta;
u8 *insn;
- int i;
+ if (probe->prg == -1)
+ /* The probe is not armed. */
+ return 0;
+ bpf_jit_probe_emit_nop(jit, probe);
if (!fp->aux->extable)
/* Do nothing during early JIT passes. */
return 0;
- insn = jit->prg_buf + probe_prg;
- reg = get_probe_mem_regno(insn);
- if (WARN_ON_ONCE(reg < 0))
- /* JIT bug - unexpected probe instruction. */
- return -1;
- if (WARN_ON_ONCE(probe_prg + insn_length(*insn) != nop_prg))
+ insn = jit->prg_buf + probe->prg;
+ if (WARN_ON_ONCE(probe->prg + insn_length(*insn) != probe->nop_prg))
/* JIT bug - gap between probe and nop instructions. */
return -1;
for (i = 0; i < 2; i++) {
@@ -719,23 +798,24 @@ static int bpf_jit_probe_mem(struct bpf_jit *jit, struct bpf_prog *fp,
return -1;
ex = &fp->aux->extable[jit->excnt];
/* Add extable entries for probe and nop instructions. */
- prg = i == 0 ? probe_prg : nop_prg;
+ prg = i == 0 ? probe->prg : probe->nop_prg;
delta = jit->prg_buf + prg - (u8 *)&ex->insn;
if (WARN_ON_ONCE(delta < INT_MIN || delta > INT_MAX))
/* JIT bug - code and extable must be close. */
return -1;
ex->insn = delta;
/*
- * Always land on the nop. Note that extable infrastructure
- * ignores fixup field, it is handled by ex_handler_bpf().
+ * Land on the current instruction. Note that the extable
+ * infrastructure ignores the fixup field; it is handled by
+ * ex_handler_bpf().
*/
- delta = jit->prg_buf + nop_prg - (u8 *)&ex->fixup;
+ delta = jit->prg_buf + jit->prg - (u8 *)&ex->fixup;
if (WARN_ON_ONCE(delta < INT_MIN || delta > INT_MAX))
/* JIT bug - landing pad and extable must be close. */
return -1;
ex->fixup = delta;
ex->type = EX_TYPE_BPF;
- ex->data = reg;
+ ex->data = probe->reg;
jit->excnt++;
}
return 0;
@@ -782,19 +862,15 @@ static noinline int bpf_jit_insn(struct bpf_jit *jit, struct bpf_prog *fp,
s32 branch_oc_off = insn->off;
u32 dst_reg = insn->dst_reg;
u32 src_reg = insn->src_reg;
+ struct bpf_jit_probe probe;
int last, insn_count = 1;
u32 *addrs = jit->addrs;
s32 imm = insn->imm;
s16 off = insn->off;
- int probe_prg = -1;
unsigned int mask;
- int nop_prg;
int err;
- if (BPF_CLASS(insn->code) == BPF_LDX &&
- (BPF_MODE(insn->code) == BPF_PROBE_MEM ||
- BPF_MODE(insn->code) == BPF_PROBE_MEMSX))
- probe_prg = jit->prg;
+ bpf_jit_probe_init(&probe);
switch (insn->code) {
/*
@@ -823,6 +899,22 @@ static noinline int bpf_jit_insn(struct bpf_jit *jit, struct bpf_prog *fp,
}
break;
case BPF_ALU64 | BPF_MOV | BPF_X:
+ if (insn_is_cast_user(insn)) {
+ int patch_brc;
+
+ /* ltgr %dst,%src */
+ EMIT4(0xb9020000, dst_reg, src_reg);
+ /* brc 8,0f */
+ patch_brc = jit->prg;
+ EMIT4_PCREL_RIC(0xa7040000, 8, 0);
+ /* iihf %dst,user_arena>>32 */
+ EMIT6_IMM(0xc0080000, dst_reg, jit->user_arena >> 32);
+ /* 0: */
+ if (jit->prg_buf)
+ *(u16 *)(jit->prg_buf + patch_brc + 2) =
+ (jit->prg - patch_brc) >> 1;
+ break;
+ }
switch (insn->off) {
case 0: /* DST = SRC */
/* lgr %dst,%src */
@@ -1366,51 +1458,99 @@ static noinline int bpf_jit_insn(struct bpf_jit *jit, struct bpf_prog *fp,
* BPF_ST(X)
*/
case BPF_STX | BPF_MEM | BPF_B: /* *(u8 *)(dst + off) = src_reg */
- /* stcy %src,off(%dst) */
- EMIT6_DISP_LH(0xe3000000, 0x0072, src_reg, dst_reg, REG_0, off);
+ case BPF_STX | BPF_PROBE_MEM32 | BPF_B:
+ bpf_jit_probe_store_pre(jit, insn, &probe);
+ /* stcy %src,off(%dst,%arena) */
+ EMIT6_DISP_LH(0xe3000000, 0x0072, src_reg, dst_reg,
+ probe.arena_reg, off);
+ err = bpf_jit_probe_post(jit, fp, &probe);
+ if (err < 0)
+ return err;
jit->seen |= SEEN_MEM;
break;
case BPF_STX | BPF_MEM | BPF_H: /* (u16 *)(dst + off) = src */
- /* sthy %src,off(%dst) */
- EMIT6_DISP_LH(0xe3000000, 0x0070, src_reg, dst_reg, REG_0, off);
+ case BPF_STX | BPF_PROBE_MEM32 | BPF_H:
+ bpf_jit_probe_store_pre(jit, insn, &probe);
+ /* sthy %src,off(%dst,%arena) */
+ EMIT6_DISP_LH(0xe3000000, 0x0070, src_reg, dst_reg,
+ probe.arena_reg, off);
+ err = bpf_jit_probe_post(jit, fp, &probe);
+ if (err < 0)
+ return err;
jit->seen |= SEEN_MEM;
break;
case BPF_STX | BPF_MEM | BPF_W: /* *(u32 *)(dst + off) = src */
- /* sty %src,off(%dst) */
- EMIT6_DISP_LH(0xe3000000, 0x0050, src_reg, dst_reg, REG_0, off);
+ case BPF_STX | BPF_PROBE_MEM32 | BPF_W:
+ bpf_jit_probe_store_pre(jit, insn, &probe);
+ /* sty %src,off(%dst,%arena) */
+ EMIT6_DISP_LH(0xe3000000, 0x0050, src_reg, dst_reg,
+ probe.arena_reg, off);
+ err = bpf_jit_probe_post(jit, fp, &probe);
+ if (err < 0)
+ return err;
jit->seen |= SEEN_MEM;
break;
case BPF_STX | BPF_MEM | BPF_DW: /* (u64 *)(dst + off) = src */
- /* stg %src,off(%dst) */
- EMIT6_DISP_LH(0xe3000000, 0x0024, src_reg, dst_reg, REG_0, off);
+ case BPF_STX | BPF_PROBE_MEM32 | BPF_DW:
+ bpf_jit_probe_store_pre(jit, insn, &probe);
+ /* stg %src,off(%dst,%arena) */
+ EMIT6_DISP_LH(0xe3000000, 0x0024, src_reg, dst_reg,
+ probe.arena_reg, off);
+ err = bpf_jit_probe_post(jit, fp, &probe);
+ if (err < 0)
+ return err;
jit->seen |= SEEN_MEM;
break;
case BPF_ST | BPF_MEM | BPF_B: /* *(u8 *)(dst + off) = imm */
+ case BPF_ST | BPF_PROBE_MEM32 | BPF_B:
/* lhi %w0,imm */
EMIT4_IMM(0xa7080000, REG_W0, (u8) imm);
- /* stcy %w0,off(dst) */
- EMIT6_DISP_LH(0xe3000000, 0x0072, REG_W0, dst_reg, REG_0, off);
+ bpf_jit_probe_store_pre(jit, insn, &probe);
+ /* stcy %w0,off(%dst,%arena) */
+ EMIT6_DISP_LH(0xe3000000, 0x0072, REG_W0, dst_reg,
+ probe.arena_reg, off);
+ err = bpf_jit_probe_post(jit, fp, &probe);
+ if (err < 0)
+ return err;
jit->seen |= SEEN_MEM;
break;
case BPF_ST | BPF_MEM | BPF_H: /* (u16 *)(dst + off) = imm */
+ case BPF_ST | BPF_PROBE_MEM32 | BPF_H:
/* lhi %w0,imm */
EMIT4_IMM(0xa7080000, REG_W0, (u16) imm);
- /* sthy %w0,off(dst) */
- EMIT6_DISP_LH(0xe3000000, 0x0070, REG_W0, dst_reg, REG_0, off);
+ bpf_jit_probe_store_pre(jit, insn, &probe);
+ /* sthy %w0,off(%dst,%arena) */
+ EMIT6_DISP_LH(0xe3000000, 0x0070, REG_W0, dst_reg,
+ probe.arena_reg, off);
+ err = bpf_jit_probe_post(jit, fp, &probe);
+ if (err < 0)
+ return err;
jit->seen |= SEEN_MEM;
break;
case BPF_ST | BPF_MEM | BPF_W: /* *(u32 *)(dst + off) = imm */
+ case BPF_ST | BPF_PROBE_MEM32 | BPF_W:
/* llilf %w0,imm */
EMIT6_IMM(0xc00f0000, REG_W0, (u32) imm);
- /* sty %w0,off(%dst) */
- EMIT6_DISP_LH(0xe3000000, 0x0050, REG_W0, dst_reg, REG_0, off);
+ bpf_jit_probe_store_pre(jit, insn, &probe);
+ /* sty %w0,off(%dst,%arena) */
+ EMIT6_DISP_LH(0xe3000000, 0x0050, REG_W0, dst_reg,
+ probe.arena_reg, off);
+ err = bpf_jit_probe_post(jit, fp, &probe);
+ if (err < 0)
+ return err;
jit->seen |= SEEN_MEM;
break;
case BPF_ST | BPF_MEM | BPF_DW: /* *(u64 *)(dst + off) = imm */
+ case BPF_ST | BPF_PROBE_MEM32 | BPF_DW:
/* lgfi %w0,imm */
EMIT6_IMM(0xc0010000, REG_W0, imm);
- /* stg %w0,off(%dst) */
- EMIT6_DISP_LH(0xe3000000, 0x0024, REG_W0, dst_reg, REG_0, off);
+ bpf_jit_probe_store_pre(jit, insn, &probe);
+ /* stg %w0,off(%dst,%arena) */
+ EMIT6_DISP_LH(0xe3000000, 0x0024, REG_W0, dst_reg,
+ probe.arena_reg, off);
+ err = bpf_jit_probe_post(jit, fp, &probe);
+ if (err < 0)
+ return err;
jit->seen |= SEEN_MEM;
break;
/*
@@ -1418,15 +1558,30 @@ static noinline int bpf_jit_insn(struct bpf_jit *jit, struct bpf_prog *fp,
*/
case BPF_STX | BPF_ATOMIC | BPF_DW:
case BPF_STX | BPF_ATOMIC | BPF_W:
+ case BPF_STX | BPF_PROBE_ATOMIC | BPF_DW:
+ case BPF_STX | BPF_PROBE_ATOMIC | BPF_W:
{
bool is32 = BPF_SIZE(insn->code) == BPF_W;
+ /*
+ * Unlike loads and stores, atomics have only a base register,
+ * but no index register. For the non-arena case, simply use
+ * %dst as a base. For the arena case, use the work register
+ * %r1: first, load the arena base into it, and then add %dst
+ * to it.
+ */
+ probe.arena_reg = dst_reg;
+
switch (insn->imm) {
-/* {op32|op64} {%w0|%src},%src,off(%dst) */
#define EMIT_ATOMIC(op32, op64) do { \
+ bpf_jit_probe_atomic_pre(jit, insn, &probe); \
+ /* {op32|op64} {%w0|%src},%src,off(%arena) */ \
EMIT6_DISP_LH(0xeb000000, is32 ? (op32) : (op64), \
(insn->imm & BPF_FETCH) ? src_reg : REG_W0, \
- src_reg, dst_reg, off); \
+ src_reg, probe.arena_reg, off); \
+ err = bpf_jit_probe_post(jit, fp, &probe); \
+ if (err < 0) \
+ return err; \
if (insn->imm & BPF_FETCH) { \
/* bcr 14,0 - see atomic_fetch_{add,and,or,xor}() */ \
_EMIT2(0x07e0); \
@@ -1455,25 +1610,50 @@ static noinline int bpf_jit_insn(struct bpf_jit *jit, struct bpf_prog *fp,
EMIT_ATOMIC(0x00f7, 0x00e7);
break;
#undef EMIT_ATOMIC
- case BPF_XCHG:
- /* {ly|lg} %w0,off(%dst) */
+ case BPF_XCHG: {
+ struct bpf_jit_probe load_probe = probe;
+ int loop_start;
+
+ bpf_jit_probe_atomic_pre(jit, insn, &load_probe);
+ /* {ly|lg} %w0,off(%arena) */
EMIT6_DISP_LH(0xe3000000,
is32 ? 0x0058 : 0x0004, REG_W0, REG_0,
- dst_reg, off);
- /* 0: {csy|csg} %w0,%src,off(%dst) */
+ load_probe.arena_reg, off);
+ bpf_jit_probe_emit_nop(jit, &load_probe);
+ /* Reuse {ly|lg}'s arena_reg for {csy|csg}. */
+ if (load_probe.prg != -1) {
+ probe.prg = jit->prg;
+ probe.arena_reg = load_probe.arena_reg;
+ }
+ loop_start = jit->prg;
+ /* 0: {csy|csg} %w0,%src,off(%arena) */
EMIT6_DISP_LH(0xeb000000, is32 ? 0x0014 : 0x0030,
- REG_W0, src_reg, dst_reg, off);
+ REG_W0, src_reg, probe.arena_reg, off);
+ bpf_jit_probe_emit_nop(jit, &probe);
/* brc 4,0b */
- EMIT4_PCREL_RIC(0xa7040000, 4, jit->prg - 6);
+ EMIT4_PCREL_RIC(0xa7040000, 4, loop_start);
/* {llgfr|lgr} %src,%w0 */
EMIT4(is32 ? 0xb9160000 : 0xb9040000, src_reg, REG_W0);
+ /* Both probes should land here on exception. */
+ err = bpf_jit_probe_post(jit, fp, &load_probe);
+ if (err < 0)
+ return err;
+ err = bpf_jit_probe_post(jit, fp, &probe);
+ if (err < 0)
+ return err;
if (is32 && insn_is_zext(&insn[1]))
insn_count = 2;
break;
+ }
case BPF_CMPXCHG:
- /* 0: {csy|csg} %b0,%src,off(%dst) */
+ bpf_jit_probe_atomic_pre(jit, insn, &probe);
+ /* 0: {csy|csg} %b0,%src,off(%arena) */
EMIT6_DISP_LH(0xeb000000, is32 ? 0x0014 : 0x0030,
- BPF_REG_0, src_reg, dst_reg, off);
+ BPF_REG_0, src_reg,
+ probe.arena_reg, off);
+ err = bpf_jit_probe_post(jit, fp, &probe);
+ if (err < 0)
+ return err;
break;
default:
pr_err("Unknown atomic operation %02x\n", insn->imm);
@@ -1488,51 +1668,87 @@ static noinline int bpf_jit_insn(struct bpf_jit *jit, struct bpf_prog *fp,
*/
case BPF_LDX | BPF_MEM | BPF_B: /* dst = *(u8 *)(ul) (src + off) */
case BPF_LDX | BPF_PROBE_MEM | BPF_B:
- /* llgc %dst,0(off,%src) */
- EMIT6_DISP_LH(0xe3000000, 0x0090, dst_reg, src_reg, REG_0, off);
+ case BPF_LDX | BPF_PROBE_MEM32 | BPF_B:
+ bpf_jit_probe_load_pre(jit, insn, &probe);
+ /* llgc %dst,off(%src,%arena) */
+ EMIT6_DISP_LH(0xe3000000, 0x0090, dst_reg, src_reg,
+ probe.arena_reg, off);
+ err = bpf_jit_probe_post(jit, fp, &probe);
+ if (err < 0)
+ return err;
jit->seen |= SEEN_MEM;
if (insn_is_zext(&insn[1]))
insn_count = 2;
break;
case BPF_LDX | BPF_MEMSX | BPF_B: /* dst = *(s8 *)(ul) (src + off) */
case BPF_LDX | BPF_PROBE_MEMSX | BPF_B:
- /* lgb %dst,0(off,%src) */
+ bpf_jit_probe_load_pre(jit, insn, &probe);
+ /* lgb %dst,off(%src) */
EMIT6_DISP_LH(0xe3000000, 0x0077, dst_reg, src_reg, REG_0, off);
+ err = bpf_jit_probe_post(jit, fp, &probe);
+ if (err < 0)
+ return err;
jit->seen |= SEEN_MEM;
break;
case BPF_LDX | BPF_MEM | BPF_H: /* dst = *(u16 *)(ul) (src + off) */
case BPF_LDX | BPF_PROBE_MEM | BPF_H:
- /* llgh %dst,0(off,%src) */
- EMIT6_DISP_LH(0xe3000000, 0x0091, dst_reg, src_reg, REG_0, off);
+ case BPF_LDX | BPF_PROBE_MEM32 | BPF_H:
+ bpf_jit_probe_load_pre(jit, insn, &probe);
+ /* llgh %dst,off(%src,%arena) */
+ EMIT6_DISP_LH(0xe3000000, 0x0091, dst_reg, src_reg,
+ probe.arena_reg, off);
+ err = bpf_jit_probe_post(jit, fp, &probe);
+ if (err < 0)
+ return err;
jit->seen |= SEEN_MEM;
if (insn_is_zext(&insn[1]))
insn_count = 2;
break;
case BPF_LDX | BPF_MEMSX | BPF_H: /* dst = *(s16 *)(ul) (src + off) */
case BPF_LDX | BPF_PROBE_MEMSX | BPF_H:
- /* lgh %dst,0(off,%src) */
+ bpf_jit_probe_load_pre(jit, insn, &probe);
+ /* lgh %dst,off(%src) */
EMIT6_DISP_LH(0xe3000000, 0x0015, dst_reg, src_reg, REG_0, off);
+ err = bpf_jit_probe_post(jit, fp, &probe);
+ if (err < 0)
+ return err;
jit->seen |= SEEN_MEM;
break;
case BPF_LDX | BPF_MEM | BPF_W: /* dst = *(u32 *)(ul) (src + off) */
case BPF_LDX | BPF_PROBE_MEM | BPF_W:
+ case BPF_LDX | BPF_PROBE_MEM32 | BPF_W:
+ bpf_jit_probe_load_pre(jit, insn, &probe);
/* llgf %dst,off(%src) */
jit->seen |= SEEN_MEM;
- EMIT6_DISP_LH(0xe3000000, 0x0016, dst_reg, src_reg, REG_0, off);
+ EMIT6_DISP_LH(0xe3000000, 0x0016, dst_reg, src_reg,
+ probe.arena_reg, off);
+ err = bpf_jit_probe_post(jit, fp, &probe);
+ if (err < 0)
+ return err;
if (insn_is_zext(&insn[1]))
insn_count = 2;
break;
case BPF_LDX | BPF_MEMSX | BPF_W: /* dst = *(s32 *)(ul) (src + off) */
case BPF_LDX | BPF_PROBE_MEMSX | BPF_W:
+ bpf_jit_probe_load_pre(jit, insn, &probe);
/* lgf %dst,off(%src) */
jit->seen |= SEEN_MEM;
EMIT6_DISP_LH(0xe3000000, 0x0014, dst_reg, src_reg, REG_0, off);
+ err = bpf_jit_probe_post(jit, fp, &probe);
+ if (err < 0)
+ return err;
break;
case BPF_LDX | BPF_MEM | BPF_DW: /* dst = *(u64 *)(ul) (src + off) */
case BPF_LDX | BPF_PROBE_MEM | BPF_DW:
- /* lg %dst,0(off,%src) */
+ case BPF_LDX | BPF_PROBE_MEM32 | BPF_DW:
+ bpf_jit_probe_load_pre(jit, insn, &probe);
+ /* lg %dst,off(%src,%arena) */
jit->seen |= SEEN_MEM;
- EMIT6_DISP_LH(0xe3000000, 0x0004, dst_reg, src_reg, REG_0, off);
+ EMIT6_DISP_LH(0xe3000000, 0x0004, dst_reg, src_reg,
+ probe.arena_reg, off);
+ err = bpf_jit_probe_post(jit, fp, &probe);
+ if (err < 0)
+ return err;
break;
/*
* BPF_JMP / CALL
@@ -1647,7 +1863,7 @@ static noinline int bpf_jit_insn(struct bpf_jit *jit, struct bpf_prog *fp,
/*
* Restore registers before calling function
*/
- save_restore_regs(jit, REGS_RESTORE, stack_depth);
+ save_restore_regs(jit, REGS_RESTORE, stack_depth, 0);
/*
* goto *(prog->bpf_func + tail_call_start);
@@ -1897,22 +2113,6 @@ branch_oc:
return -1;
}
- if (probe_prg != -1) {
- /*
- * Handlers of certain exceptions leave psw.addr pointing to
- * the instruction directly after the failing one. Therefore,
- * create two exception table entries and also add a nop in
- * case two probing instructions come directly after each
- * other.
- */
- nop_prg = jit->prg;
- /* bcr 0,%0 */
- _EMIT2(0x0700);
- err = bpf_jit_probe_mem(jit, fp, probe_prg, nop_prg);
- if (err < 0)
- return err;
- }
-
return insn_count;
}
@@ -1958,12 +2158,18 @@ static int bpf_jit_prog(struct bpf_jit *jit, struct bpf_prog *fp,
bool extra_pass, u32 stack_depth)
{
int i, insn_count, lit32_size, lit64_size;
+ u64 kern_arena;
jit->lit32 = jit->lit32_start;
jit->lit64 = jit->lit64_start;
jit->prg = 0;
jit->excnt = 0;
+ kern_arena = bpf_arena_get_kern_vm_start(fp->aux->arena);
+ if (kern_arena)
+ jit->kern_arena = _EMIT_CONST_U64(kern_arena);
+ jit->user_arena = bpf_arena_get_user_vm_start(fp->aux->arena);
+
bpf_jit_prologue(jit, fp, stack_depth);
if (bpf_set_addr(jit, 0) < 0)
return -1;
@@ -2011,9 +2217,25 @@ static struct bpf_binary_header *bpf_jit_alloc(struct bpf_jit *jit,
struct bpf_prog *fp)
{
struct bpf_binary_header *header;
+ struct bpf_insn *insn;
u32 extable_size;
u32 code_size;
+ int i;
+
+ for (i = 0; i < fp->len; i++) {
+ insn = &fp->insnsi[i];
+ if (BPF_CLASS(insn->code) == BPF_STX &&
+ BPF_MODE(insn->code) == BPF_PROBE_ATOMIC &&
+ (BPF_SIZE(insn->code) == BPF_DW ||
+ BPF_SIZE(insn->code) == BPF_W) &&
+ insn->imm == BPF_XCHG)
+ /*
+ * bpf_jit_insn() emits a load and a compare-and-swap,
+ * both of which need to be probed.
+ */
+ fp->aux->num_exentries += 1;
+ }
/* We need two entries per insn. */
fp->aux->num_exentries *= 2;
@@ -2689,3 +2911,52 @@ bool bpf_jit_supports_subprog_tailcalls(void)
{
return true;
}
+
+bool bpf_jit_supports_arena(void)
+{
+ return true;
+}
+
+bool bpf_jit_supports_insn(struct bpf_insn *insn, bool in_arena)
+{
+ /*
+ * Currently the verifier uses this function only to check which
+ * atomic stores to arena are supported, and they all are.
+ */
+ return true;
+}
+
+bool bpf_jit_supports_exceptions(void)
+{
+ /*
+ * Exceptions require unwinding support, which is always available,
+ * because the kernel is always built with backchain.
+ */
+ return true;
+}
+
+void arch_bpf_stack_walk(bool (*consume_fn)(void *, u64, u64, u64),
+ void *cookie)
+{
+ unsigned long addr, prev_addr = 0;
+ struct unwind_state state;
+
+ unwind_for_each_frame(&state, NULL, NULL, 0) {
+ addr = unwind_get_return_address(&state);
+ if (!addr)
+ break;
+ /*
+ * addr is a return address and state.sp is the value of %r15
+ * at this address. exception_cb needs %r15 at entry to the
+ * function containing addr, so take the next state.sp.
+ *
+ * There is no bp, and the exception_cb prog does not need one
+ * to perform a quasi-longjmp. The common code requires a
+ * non-zero bp, so pass sp there as well.
+ */
+ if (prev_addr && !consume_fn(cookie, prev_addr, state.sp,
+ state.sp))
+ break;
+ prev_addr = addr;
+ }
+}
diff --git a/arch/s390/pci/pci.c b/arch/s390/pci/pci.c
index 0de0f6e405b5..cff4838fad21 100644
--- a/arch/s390/pci/pci.c
+++ b/arch/s390/pci/pci.c
@@ -1064,7 +1064,7 @@ char * __init pcibios_setup(char *str)
return NULL;
}
if (!strcmp(str, "nomio")) {
- S390_lowcore.machine_flags &= ~MACHINE_FLAG_PCI_MIO;
+ get_lowcore()->machine_flags &= ~MACHINE_FLAG_PCI_MIO;
return NULL;
}
if (!strcmp(str, "force_floating")) {
diff --git a/arch/s390/pci/pci_irq.c b/arch/s390/pci/pci_irq.c
index ff8f24854c64..84482a921332 100644
--- a/arch/s390/pci/pci_irq.c
+++ b/arch/s390/pci/pci_irq.c
@@ -268,33 +268,20 @@ static void zpci_floating_irq_handler(struct airq_struct *airq,
}
}
-int arch_setup_msi_irqs(struct pci_dev *pdev, int nvec, int type)
+static int __alloc_airq(struct zpci_dev *zdev, int msi_vecs,
+ unsigned long *bit)
{
- struct zpci_dev *zdev = to_zpci(pdev);
- unsigned int hwirq, msi_vecs, cpu;
- unsigned long bit;
- struct msi_desc *msi;
- struct msi_msg msg;
- int cpu_addr;
- int rc, irq;
-
- zdev->aisb = -1UL;
- zdev->msi_first_bit = -1U;
- if (type == PCI_CAP_ID_MSI && nvec > 1)
- return 1;
- msi_vecs = min_t(unsigned int, nvec, zdev->max_msi);
-
if (irq_delivery == DIRECTED) {
/* Allocate cpu vector bits */
- bit = airq_iv_alloc(zpci_ibv[0], msi_vecs);
- if (bit == -1UL)
+ *bit = airq_iv_alloc(zpci_ibv[0], msi_vecs);
+ if (*bit == -1UL)
return -EIO;
} else {
/* Allocate adapter summary indicator bit */
- bit = airq_iv_alloc_bit(zpci_sbv);
- if (bit == -1UL)
+ *bit = airq_iv_alloc_bit(zpci_sbv);
+ if (*bit == -1UL)
return -EIO;
- zdev->aisb = bit;
+ zdev->aisb = *bit;
/* Create adapter interrupt vector */
zdev->aibv = airq_iv_create(msi_vecs, AIRQ_IV_DATA | AIRQ_IV_BITLOCK, NULL);
@@ -302,27 +289,66 @@ int arch_setup_msi_irqs(struct pci_dev *pdev, int nvec, int type)
return -ENOMEM;
/* Wire up shortcut pointer */
- zpci_ibv[bit] = zdev->aibv;
+ zpci_ibv[*bit] = zdev->aibv;
/* Each function has its own interrupt vector */
- bit = 0;
+ *bit = 0;
}
+ return 0;
+}
- /* Request MSI interrupts */
+int arch_setup_msi_irqs(struct pci_dev *pdev, int nvec, int type)
+{
+ unsigned int hwirq, msi_vecs, irqs_per_msi, i, cpu;
+ struct zpci_dev *zdev = to_zpci(pdev);
+ struct msi_desc *msi;
+ struct msi_msg msg;
+ unsigned long bit;
+ int cpu_addr;
+ int rc, irq;
+
+ zdev->aisb = -1UL;
+ zdev->msi_first_bit = -1U;
+
+ msi_vecs = min_t(unsigned int, nvec, zdev->max_msi);
+ if (msi_vecs < nvec) {
+ pr_info("%s requested %d irqs, allocate system limit of %d",
+ pci_name(pdev), nvec, zdev->max_msi);
+ }
+
+ rc = __alloc_airq(zdev, msi_vecs, &bit);
+ if (rc < 0)
+ return rc;
+
+ /*
+ * Request MSI interrupts:
+ * When using MSI, nvec_used interrupt sources and their irq
+ * descriptors are controlled through one msi descriptor.
+ * Thus the outer loop over msi descriptors shall run only once,
+ * while two inner loops iterate over the interrupt vectors.
+ * When using MSI-X, each interrupt vector/irq descriptor
+ * is bound to exactly one msi descriptor (nvec_used is one).
+ * So the inner loops are executed once, while the outer iterates
+ * over the MSI-X descriptors.
+ */
hwirq = bit;
msi_for_each_desc(msi, &pdev->dev, MSI_DESC_NOTASSOCIATED) {
- rc = -EIO;
if (hwirq - bit >= msi_vecs)
break;
- irq = __irq_alloc_descs(-1, 0, 1, 0, THIS_MODULE,
- (irq_delivery == DIRECTED) ?
- msi->affinity : NULL);
+ irqs_per_msi = min_t(unsigned int, msi_vecs, msi->nvec_used);
+ irq = __irq_alloc_descs(-1, 0, irqs_per_msi, 0, THIS_MODULE,
+ (irq_delivery == DIRECTED) ?
+ msi->affinity : NULL);
if (irq < 0)
return -ENOMEM;
- rc = irq_set_msi_desc(irq, msi);
- if (rc)
- return rc;
- irq_set_chip_and_handler(irq, &zpci_irq_chip,
- handle_percpu_irq);
+
+ for (i = 0; i < irqs_per_msi; i++) {
+ rc = irq_set_msi_desc_off(irq, i, msi);
+ if (rc)
+ return rc;
+ irq_set_chip_and_handler(irq + i, &zpci_irq_chip,
+ handle_percpu_irq);
+ }
+
msg.data = hwirq - bit;
if (irq_delivery == DIRECTED) {
if (msi->affinity)
@@ -335,31 +361,35 @@ int arch_setup_msi_irqs(struct pci_dev *pdev, int nvec, int type)
msg.address_lo |= (cpu_addr << 8);
for_each_possible_cpu(cpu) {
- airq_iv_set_data(zpci_ibv[cpu], hwirq, irq);
+ for (i = 0; i < irqs_per_msi; i++)
+ airq_iv_set_data(zpci_ibv[cpu],
+ hwirq + i, irq + i);
}
} else {
msg.address_lo = zdev->msi_addr & 0xffffffff;
- airq_iv_set_data(zdev->aibv, hwirq, irq);
+ for (i = 0; i < irqs_per_msi; i++)
+ airq_iv_set_data(zdev->aibv, hwirq + i, irq + i);
}
msg.address_hi = zdev->msi_addr >> 32;
pci_write_msi_msg(irq, &msg);
- hwirq++;
+ hwirq += irqs_per_msi;
}
zdev->msi_first_bit = bit;
- zdev->msi_nr_irqs = msi_vecs;
+ zdev->msi_nr_irqs = hwirq - bit;
rc = zpci_set_irq(zdev);
if (rc)
return rc;
- return (msi_vecs == nvec) ? 0 : msi_vecs;
+ return (zdev->msi_nr_irqs == nvec) ? 0 : zdev->msi_nr_irqs;
}
void arch_teardown_msi_irqs(struct pci_dev *pdev)
{
struct zpci_dev *zdev = to_zpci(pdev);
struct msi_desc *msi;
+ unsigned int i;
int rc;
/* Disable interrupts */
@@ -369,8 +399,10 @@ void arch_teardown_msi_irqs(struct pci_dev *pdev)
/* Release MSI interrupts */
msi_for_each_desc(msi, &pdev->dev, MSI_DESC_ASSOCIATED) {
- irq_set_msi_desc(msi->irq, NULL);
- irq_free_desc(msi->irq);
+ for (i = 0; i < msi->nvec_used; i++) {
+ irq_set_msi_desc(msi->irq + i, NULL);
+ irq_free_desc(msi->irq + i);
+ }
msi->msg.address_lo = 0;
msi->msg.address_hi = 0;
msi->msg.data = 0;
@@ -410,7 +442,7 @@ static void __init cpu_enable_directed_irq(void *unused)
union zpci_sic_iib iib = {{0}};
union zpci_sic_iib ziib = {{0}};
- iib.cdiib.dibv_addr = (u64) zpci_ibv[smp_processor_id()]->vector;
+ iib.cdiib.dibv_addr = virt_to_phys(zpci_ibv[smp_processor_id()]->vector);
zpci_set_irq_ctrl(SIC_IRQ_MODE_SET_CPU, 0, &iib);
zpci_set_irq_ctrl(SIC_IRQ_MODE_D_SINGLE, PCI_ISC, &ziib);
diff --git a/arch/s390/tools/relocs.c b/arch/s390/tools/relocs.c
index a74dbd5c9896..30a732c808f3 100644
--- a/arch/s390/tools/relocs.c
+++ b/arch/s390/tools/relocs.c
@@ -280,7 +280,7 @@ static int do_reloc(struct section *sec, Elf_Rel *rel)
case R_390_GOTOFF64:
break;
case R_390_64:
- add_reloc(&relocs64, offset - ehdr.e_entry);
+ add_reloc(&relocs64, offset);
break;
default:
die("Unsupported relocation type: %d\n", r_type);