diff options
Diffstat (limited to 'arch/s390')
32 files changed, 387 insertions, 663 deletions
diff --git a/arch/s390/Kconfig b/arch/s390/Kconfig index 16c673096a22..c80235206c01 100644 --- a/arch/s390/Kconfig +++ b/arch/s390/Kconfig @@ -220,23 +220,8 @@ config AUDIT_ARCH bool default y -config S390_SWITCH_AMODE - bool "Switch kernel/user addressing modes" - help - This option allows to switch the addressing modes of kernel and user - space. The kernel parameter switch_amode=on will enable this feature, - default is disabled. Enabling this (via kernel parameter) on machines - earlier than IBM System z9-109 EC/BC will reduce system performance. - - Note that this option will also be selected by selecting the execute - protection option below. Enabling the execute protection via the - noexec kernel parameter will also switch the addressing modes, - independent of the switch_amode kernel parameter. - - config S390_EXEC_PROTECT bool "Data execute protection" - select S390_SWITCH_AMODE help This option allows to enable a buffer overflow protection for user space programs and it also selects the addressing mode option above. diff --git a/arch/s390/crypto/prng.c b/arch/s390/crypto/prng.c index b49c00ce65e9..a3209906739e 100644 --- a/arch/s390/crypto/prng.c +++ b/arch/s390/crypto/prng.c @@ -6,7 +6,6 @@ #include <linux/fs.h> #include <linux/init.h> #include <linux/kernel.h> -#include <linux/smp_lock.h> #include <linux/miscdevice.h> #include <linux/module.h> #include <linux/moduleparam.h> @@ -49,7 +48,6 @@ static unsigned char parm_block[32] = { static int prng_open(struct inode *inode, struct file *file) { - cycle_kernel_lock(); return nonseekable_open(inode, file); } diff --git a/arch/s390/defconfig b/arch/s390/defconfig index ab4464486b7a..f4e53c6708dc 100644 --- a/arch/s390/defconfig +++ b/arch/s390/defconfig @@ -185,7 +185,6 @@ CONFIG_HOTPLUG_CPU=y CONFIG_COMPAT=y CONFIG_SYSVIPC_COMPAT=y CONFIG_AUDIT_ARCH=y -CONFIG_S390_SWITCH_AMODE=y CONFIG_S390_EXEC_PROTECT=y # diff --git a/arch/s390/include/asm/atomic.h b/arch/s390/include/asm/atomic.h index ae7c8f9f94a5..2a113d6a7dfd 100644 --- a/arch/s390/include/asm/atomic.h +++ b/arch/s390/include/asm/atomic.h @@ -21,7 +21,7 @@ #if __GNUC__ > 3 || (__GNUC__ == 3 && __GNUC_MINOR__ > 2) #define __CS_LOOP(ptr, op_val, op_string) ({ \ - typeof(ptr->counter) old_val, new_val; \ + int old_val, new_val; \ asm volatile( \ " l %0,%2\n" \ "0: lr %1,%0\n" \ @@ -38,7 +38,7 @@ #else /* __GNUC__ */ #define __CS_LOOP(ptr, op_val, op_string) ({ \ - typeof(ptr->counter) old_val, new_val; \ + int old_val, new_val; \ asm volatile( \ " l %0,0(%3)\n" \ "0: lr %1,%0\n" \ @@ -143,7 +143,7 @@ static inline int atomic_add_unless(atomic_t *v, int a, int u) #if __GNUC__ > 3 || (__GNUC__ == 3 && __GNUC_MINOR__ > 2) #define __CSG_LOOP(ptr, op_val, op_string) ({ \ - typeof(ptr->counter) old_val, new_val; \ + long long old_val, new_val; \ asm volatile( \ " lg %0,%2\n" \ "0: lgr %1,%0\n" \ @@ -160,7 +160,7 @@ static inline int atomic_add_unless(atomic_t *v, int a, int u) #else /* __GNUC__ */ #define __CSG_LOOP(ptr, op_val, op_string) ({ \ - typeof(ptr->counter) old_val, new_val; \ + long long old_val, new_val; \ asm volatile( \ " lg %0,0(%3)\n" \ "0: lgr %1,%0\n" \ diff --git a/arch/s390/include/asm/ccwdev.h b/arch/s390/include/asm/ccwdev.h index 2a5419551176..f4bd346a52d3 100644 --- a/arch/s390/include/asm/ccwdev.h +++ b/arch/s390/include/asm/ccwdev.h @@ -142,6 +142,8 @@ struct ccw1; extern int ccw_device_set_options_mask(struct ccw_device *, unsigned long); extern int ccw_device_set_options(struct ccw_device *, unsigned long); extern void ccw_device_clear_options(struct ccw_device *, unsigned long); +int ccw_device_is_pathgroup(struct ccw_device *cdev); +int ccw_device_is_multipath(struct ccw_device *cdev); /* Allow for i/o completion notification after primary interrupt status. */ #define CCWDEV_EARLY_NOTIFICATION 0x0001 @@ -151,6 +153,8 @@ extern void ccw_device_clear_options(struct ccw_device *, unsigned long); #define CCWDEV_DO_PATHGROUP 0x0004 /* Allow forced onlining of boxed devices. */ #define CCWDEV_ALLOW_FORCE 0x0008 +/* Try to use multipath mode. */ +#define CCWDEV_DO_MULTIPATH 0x0010 extern int ccw_device_start(struct ccw_device *, struct ccw1 *, unsigned long, __u8, unsigned long); diff --git a/arch/s390/include/asm/cputime.h b/arch/s390/include/asm/cputime.h index f23961ada7fb..258ba88b7b50 100644 --- a/arch/s390/include/asm/cputime.h +++ b/arch/s390/include/asm/cputime.h @@ -183,6 +183,7 @@ struct s390_idle_data { unsigned long long idle_count; unsigned long long idle_enter; unsigned long long idle_time; + int nohz_delay; }; DECLARE_PER_CPU(struct s390_idle_data, s390_idle); @@ -198,4 +199,11 @@ static inline void s390_idle_check(void) vtime_start_cpu(); } +static inline int s390_nohz_delay(int cpu) +{ + return per_cpu(s390_idle, cpu).nohz_delay != 0; +} + +#define arch_needs_cpu(cpu) s390_nohz_delay(cpu) + #endif /* _S390_CPUTIME_H */ diff --git a/arch/s390/include/asm/mmu_context.h b/arch/s390/include/asm/mmu_context.h index fc7edd6f41b6..976e273988c2 100644 --- a/arch/s390/include/asm/mmu_context.h +++ b/arch/s390/include/asm/mmu_context.h @@ -36,7 +36,7 @@ static inline int init_new_context(struct task_struct *tsk, mm->context.has_pgste = 1; mm->context.alloc_pgste = 1; } else { - mm->context.noexec = s390_noexec; + mm->context.noexec = (user_mode == SECONDARY_SPACE_MODE); mm->context.has_pgste = 0; mm->context.alloc_pgste = 0; } @@ -58,7 +58,7 @@ static inline void update_mm(struct mm_struct *mm, struct task_struct *tsk) pgd_t *pgd = mm->pgd; S390_lowcore.user_asce = mm->context.asce_bits | __pa(pgd); - if (switch_amode) { + if (user_mode != HOME_SPACE_MODE) { /* Load primary space page table origin. */ pgd = mm->context.noexec ? get_shadow_table(pgd) : pgd; S390_lowcore.user_exec_asce = mm->context.asce_bits | __pa(pgd); diff --git a/arch/s390/include/asm/pgalloc.h b/arch/s390/include/asm/pgalloc.h index ddad5903341c..68940d0bad91 100644 --- a/arch/s390/include/asm/pgalloc.h +++ b/arch/s390/include/asm/pgalloc.h @@ -143,7 +143,8 @@ static inline pgd_t *pgd_alloc(struct mm_struct *mm) spin_lock_init(&mm->context.list_lock); INIT_LIST_HEAD(&mm->context.crst_list); INIT_LIST_HEAD(&mm->context.pgtable_list); - return (pgd_t *) crst_table_alloc(mm, s390_noexec); + return (pgd_t *) + crst_table_alloc(mm, user_mode == SECONDARY_SPACE_MODE); } #define pgd_free(mm, pgd) crst_table_free(mm, (unsigned long *) pgd) diff --git a/arch/s390/include/asm/pgtable.h b/arch/s390/include/asm/pgtable.h index 60a7b1a1702f..e2fa79cf0614 100644 --- a/arch/s390/include/asm/pgtable.h +++ b/arch/s390/include/asm/pgtable.h @@ -169,12 +169,13 @@ extern unsigned long VMALLOC_START; * STL Segment-Table-Length: Segment-table length (STL+1*16 entries -> up to 2048) * * A 64 bit pagetable entry of S390 has following format: - * | PFRA |0IP0| OS | + * | PFRA |0IPC| OS | * 0000000000111111111122222222223333333333444444444455555555556666 * 0123456789012345678901234567890123456789012345678901234567890123 * * I Page-Invalid Bit: Page is not available for address-translation * P Page-Protection Bit: Store access not possible for page + * C Change-bit override: HW is not required to set change bit * * A 64 bit segmenttable entry of S390 has following format: * | P-table origin | TT @@ -218,6 +219,7 @@ extern unsigned long VMALLOC_START; */ /* Hardware bits in the page table entry */ +#define _PAGE_CO 0x100 /* HW Change-bit override */ #define _PAGE_RO 0x200 /* HW read-only bit */ #define _PAGE_INVALID 0x400 /* HW invalid bit */ diff --git a/arch/s390/include/asm/setup.h b/arch/s390/include/asm/setup.h index e37478e87286..52a779c337e8 100644 --- a/arch/s390/include/asm/setup.h +++ b/arch/s390/include/asm/setup.h @@ -49,17 +49,12 @@ extern unsigned long memory_end; void detect_memory_layout(struct mem_chunk chunk[]); -#ifdef CONFIG_S390_SWITCH_AMODE -extern unsigned int switch_amode; -#else -#define switch_amode (0) -#endif - -#ifdef CONFIG_S390_EXEC_PROTECT -extern unsigned int s390_noexec; -#else -#define s390_noexec (0) -#endif +#define PRIMARY_SPACE_MODE 0 +#define ACCESS_REGISTER_MODE 1 +#define SECONDARY_SPACE_MODE 2 +#define HOME_SPACE_MODE 3 + +extern unsigned int user_mode; /* * Machine features detected in head.S diff --git a/arch/s390/include/asm/smp.h b/arch/s390/include/asm/smp.h index a868b272c257..2ab1141eeb50 100644 --- a/arch/s390/include/asm/smp.h +++ b/arch/s390/include/asm/smp.h @@ -1,57 +1,22 @@ /* - * include/asm-s390/smp.h - * - * S390 version - * Copyright (C) 1999 IBM Deutschland Entwicklung GmbH, IBM Corporation - * Author(s): Denis Joseph Barrow (djbarrow@de.ibm.com,barrow_dj@yahoo.com), - * Martin Schwidefsky (schwidefsky@de.ibm.com) - * Heiko Carstens (heiko.carstens@de.ibm.com) + * Copyright IBM Corp. 1999,2009 + * Author(s): Denis Joseph Barrow, + * Martin Schwidefsky <schwidefsky@de.ibm.com>, + * Heiko Carstens <heiko.carstens@de.ibm.com>, */ #ifndef __ASM_SMP_H #define __ASM_SMP_H -#include <linux/threads.h> -#include <linux/cpumask.h> -#include <linux/bitops.h> +#ifdef CONFIG_SMP -#if defined(__KERNEL__) && defined(CONFIG_SMP) && !defined(__ASSEMBLY__) - -#include <asm/lowcore.h> -#include <asm/sigp.h> -#include <asm/ptrace.h> #include <asm/system.h> - -/* - s390 specific smp.c headers - */ -typedef struct -{ - int intresting; - sigp_ccode ccode; - __u32 status; - __u16 cpu; -} sigp_info; +#include <asm/sigp.h> extern void machine_restart_smp(char *); extern void machine_halt_smp(void); extern void machine_power_off_smp(void); -#define NO_PROC_ID 0xFF /* No processor magic marker */ - -/* - * This magic constant controls our willingness to transfer - * a process across CPUs. Such a transfer incurs misses on the L1 - * cache, and on a P6 or P5 with multiple L2 caches L2 hits. My - * gut feeling is this will vary by board in value. For a board - * with separate L2 cache it probably depends also on the RSS, and - * for a board with shared L2 cache it ought to decay fast as other - * processes are run. - */ - -#define PROC_CHANGE_PENALTY 20 /* Schedule penalty */ - #define raw_smp_processor_id() (S390_lowcore.cpu_nr) -#define cpu_logical_map(cpu) (cpu) extern int __cpu_disable (void); extern void __cpu_die (unsigned int cpu); @@ -64,7 +29,9 @@ extern int smp_cpu_polarization[]; extern void arch_send_call_function_single_ipi(int cpu); extern void arch_send_call_function_ipi_mask(const struct cpumask *mask); -#endif +extern union save_area *zfcpdump_save_areas[NR_CPUS + 1]; + +#endif /* CONFIG_SMP */ #ifdef CONFIG_HOTPLUG_CPU extern int smp_rescan_cpus(void); @@ -72,5 +39,4 @@ extern int smp_rescan_cpus(void); static inline int smp_rescan_cpus(void) { return 0; } #endif -extern union save_area *zfcpdump_save_areas[NR_CPUS + 1]; -#endif +#endif /* __ASM_SMP_H */ diff --git a/arch/s390/include/asm/sockios.h b/arch/s390/include/asm/sockios.h index f4fc16c7da59..6f60eee73242 100644 --- a/arch/s390/include/asm/sockios.h +++ b/arch/s390/include/asm/sockios.h @@ -1,21 +1,6 @@ -/* - * include/asm-s390/sockios.h - * - * S390 version - * - * Derived from "include/asm-i386/sockios.h" - */ +#ifndef _ASM_S390_SOCKIOS_H +#define _ASM_S390_SOCKIOS_H -#ifndef __ARCH_S390_SOCKIOS__ -#define __ARCH_S390_SOCKIOS__ - -/* Socket-level I/O control calls. */ -#define FIOSETOWN 0x8901 -#define SIOCSPGRP 0x8902 -#define FIOGETOWN 0x8903 -#define SIOCGPGRP 0x8904 -#define SIOCATMARK 0x8905 -#define SIOCGSTAMP 0x8906 /* Get stamp (timeval) */ -#define SIOCGSTAMPNS 0x8907 /* Get stamp (timespec) */ +#include <asm-generic/sockios.h> #endif diff --git a/arch/s390/include/asm/termbits.h b/arch/s390/include/asm/termbits.h index 58731853d529..71bf6ac6a2b9 100644 --- a/arch/s390/include/asm/termbits.h +++ b/arch/s390/include/asm/termbits.h @@ -1,206 +1,6 @@ -/* - * include/asm-s390/termbits.h - * - * S390 version - * - * Derived from "include/asm-i386/termbits.h" - */ +#ifndef _ASM_S390_TERMBITS_H +#define _ASM_S390_TERMBITS_H -#ifndef __ARCH_S390_TERMBITS_H__ -#define __ARCH_S390_TERMBITS_H__ - -#include <linux/posix_types.h> - -typedef unsigned char cc_t; -typedef unsigned int speed_t; -typedef unsigned int tcflag_t; - -#define NCCS 19 -struct termios { - tcflag_t c_iflag; /* input mode flags */ - tcflag_t c_oflag; /* output mode flags */ - tcflag_t c_cflag; /* control mode flags */ - tcflag_t c_lflag; /* local mode flags */ - cc_t c_line; /* line discipline */ - cc_t c_cc[NCCS]; /* control characters */ -}; - -struct termios2 { - tcflag_t c_iflag; /* input mode flags */ - tcflag_t c_oflag; /* output mode flags */ - tcflag_t c_cflag; /* control mode flags */ - tcflag_t c_lflag; /* local mode flags */ - cc_t c_line; /* line discipline */ - cc_t c_cc[NCCS]; /* control characters */ - speed_t c_ispeed; /* input speed */ - speed_t c_ospeed; /* output speed */ -}; - -struct ktermios { - tcflag_t c_iflag; /* input mode flags */ - tcflag_t c_oflag; /* output mode flags */ - tcflag_t c_cflag; /* control mode flags */ - tcflag_t c_lflag; /* local mode flags */ - cc_t c_line; /* line discipline */ - cc_t c_cc[NCCS]; /* control characters */ - speed_t c_ispeed; /* input speed */ - speed_t c_ospeed; /* output speed */ -}; - -/* c_cc characters */ -#define VINTR 0 -#define VQUIT 1 -#define VERASE 2 -#define VKILL 3 -#define VEOF 4 -#define VTIME 5 -#define VMIN 6 -#define VSWTC 7 -#define VSTART 8 -#define VSTOP 9 -#define VSUSP 10 -#define VEOL 11 -#define VREPRINT 12 -#define VDISCARD 13 -#define VWERASE 14 -#define VLNEXT 15 -#define VEOL2 16 - -/* c_iflag bits */ -#define IGNBRK 0000001 -#define BRKINT 0000002 -#define IGNPAR 0000004 -#define PARMRK 0000010 -#define INPCK 0000020 -#define ISTRIP 0000040 -#define INLCR 0000100 -#define IGNCR 0000200 -#define ICRNL 0000400 -#define IUCLC 0001000 -#define IXON 0002000 -#define IXANY 0004000 -#define IXOFF 0010000 -#define IMAXBEL 0020000 -#define IUTF8 0040000 - -/* c_oflag bits */ -#define OPOST 0000001 -#define OLCUC 0000002 -#define ONLCR 0000004 -#define OCRNL 0000010 -#define ONOCR 0000020 -#define ONLRET 0000040 -#define OFILL 0000100 -#define OFDEL 0000200 -#define NLDLY 0000400 -#define NL0 0000000 -#define NL1 0000400 -#define CRDLY 0003000 -#define CR0 0000000 -#define CR1 0001000 -#define CR2 0002000 -#define CR3 0003000 -#define TABDLY 0014000 -#define TAB0 0000000 -#define TAB1 0004000 -#define TAB2 0010000 -#define TAB3 0014000 -#define XTABS 0014000 -#define BSDLY 0020000 -#define BS0 0000000 -#define BS1 0020000 -#define VTDLY 0040000 -#define VT0 0000000 -#define VT1 0040000 -#define FFDLY 0100000 -#define FF0 0000000 -#define FF1 0100000 - -/* c_cflag bit meaning */ -#define CBAUD 0010017 -#define B0 0000000 /* hang up */ -#define B50 0000001 -#define B75 0000002 -#define B110 0000003 -#define B134 0000004 -#define B150 0000005 -#define B200 0000006 -#define B300 0000007 -#define B600 0000010 -#define B1200 0000011 -#define B1800 0000012 -#define B2400 0000013 -#define B4800 0000014 -#define B9600 0000015 -#define B19200 0000016 -#define B38400 0000017 -#define EXTA B19200 -#define EXTB B38400 -#define CSIZE 0000060 -#define CS5 0000000 -#define CS6 0000020 -#define CS7 0000040 -#define CS8 0000060 -#define CSTOPB 0000100 -#define CREAD 0000200 -#define PARENB 0000400 -#define PARODD 0001000 -#define HUPCL 0002000 -#define CLOCAL 0004000 -#define CBAUDEX 0010000 -#define BOTHER 0010000 -#define B57600 0010001 -#define B115200 0010002 -#define B230400 0010003 -#define B460800 0010004 -#define B500000 0010005 -#define B576000 0010006 -#define B921600 0010007 -#define B1000000 0010010 -#define B1152000 0010011 -#define B1500000 0010012 -#define B2000000 0010013 -#define B2500000 0010014 -#define B3000000 0010015 -#define B3500000 0010016 -#define B4000000 0010017 -#define CIBAUD 002003600000 /* input baud rate */ -#define CMSPAR 010000000000 /* mark or space (stick) parity */ -#define CRTSCTS 020000000000 /* flow control */ - -#define IBSHIFT 16 /* Shift from CBAUD to CIBAUD */ - -/* c_lflag bits */ -#define ISIG 0000001 -#define ICANON 0000002 -#define XCASE 0000004 -#define ECHO 0000010 -#define ECHOE 0000020 -#define ECHOK 0000040 -#define ECHONL 0000100 -#define NOFLSH 0000200 -#define TOSTOP 0000400 -#define ECHOCTL 0001000 -#define ECHOPRT 0002000 -#define ECHOKE 0004000 -#define FLUSHO 0010000 -#define PENDIN 0040000 -#define IEXTEN 0100000 - -/* tcflow() and TCXONC use these */ -#define TCOOFF 0 -#define TCOON 1 -#define TCIOFF 2 -#define TCION 3 - -/* tcflush() and TCFLSH use these */ -#define TCIFLUSH 0 -#define TCOFLUSH 1 -#define TCIOFLUSH 2 - -/* tcsetattr uses these */ -#define TCSANOW 0 -#define TCSADRAIN 1 -#define TCSAFLUSH 2 +#include <asm-generic/termbits.h> #endif diff --git a/arch/s390/include/asm/todclk.h b/arch/s390/include/asm/todclk.h deleted file mode 100644 index c7f62055488a..000000000000 --- a/arch/s390/include/asm/todclk.h +++ /dev/null @@ -1,23 +0,0 @@ -/* - * File...........: linux/include/asm/todclk.h - * Author(s)......: Holger Smolinski <Holger.Smolinski@de.ibm.com> - * Bugreports.to..: <Linux390@de.ibm.com> - * (C) IBM Corporation, IBM Deutschland Entwicklung GmbH, 1999,2000 - * - * History of changes (starts July 2000) - */ - -#ifndef __ASM_TODCLK_H -#define __ASM_TODCLK_H - -#ifdef __KERNEL__ - -#define TOD_uSEC (0x1000ULL) -#define TOD_mSEC (1000 * TOD_uSEC) -#define TOD_SEC (1000 * TOD_mSEC) -#define TOD_MIN (60 * TOD_SEC) -#define TOD_HOUR (60 * TOD_MIN) - -#endif - -#endif diff --git a/arch/s390/include/asm/uaccess.h b/arch/s390/include/asm/uaccess.h index 8377e91533d2..cbf0a8745bf4 100644 --- a/arch/s390/include/asm/uaccess.h +++ b/arch/s390/include/asm/uaccess.h @@ -93,6 +93,8 @@ extern struct uaccess_ops uaccess_mvcos; extern struct uaccess_ops uaccess_mvcos_switch; extern struct uaccess_ops uaccess_pt; +extern int __handle_fault(unsigned long, unsigned long, int); + static inline int __put_user_fn(size_t size, void __user *ptr, void *x) { size = uaccess.copy_to_user_small(size, ptr, x); diff --git a/arch/s390/kernel/Makefile b/arch/s390/kernel/Makefile index c7be8e10b87e..683f6381cc59 100644 --- a/arch/s390/kernel/Makefile +++ b/arch/s390/kernel/Makefile @@ -44,6 +44,7 @@ obj-$(CONFIG_KPROBES) += kprobes.o obj-$(CONFIG_FUNCTION_TRACER) += $(if $(CONFIG_64BIT),mcount64.o,mcount.o) obj-$(CONFIG_DYNAMIC_FTRACE) += ftrace.o obj-$(CONFIG_FUNCTION_GRAPH_TRACER) += ftrace.o +obj-$(CONFIG_FTRACE_SYSCALLS) += ftrace.o # Kexec part S390_KEXEC_OBJS := machine_kexec.o crash.o diff --git a/arch/s390/kernel/compat_linux.c b/arch/s390/kernel/compat_linux.c index fda1a8123f9b..25c31d681402 100644 --- a/arch/s390/kernel/compat_linux.c +++ b/arch/s390/kernel/compat_linux.c @@ -31,14 +31,8 @@ #include <linux/shm.h> #include <linux/slab.h> #include <linux/uio.h> -#include <linux/nfs_fs.h> #include <linux/quota.h> #include <linux/module.h> -#include <linux/sunrpc/svc.h> -#include <linux/nfsd/nfsd.h> -#include <linux/nfsd/cache.h> -#include <linux/nfsd/xdr.h> -#include <linux/nfsd/syscall.h> #include <linux/poll.h> #include <linux/personality.h> #include <linux/stat.h> diff --git a/arch/s390/kernel/compat_linux.h b/arch/s390/kernel/compat_linux.h index 45e9092b3aad..cb97afc85c94 100644 --- a/arch/s390/kernel/compat_linux.h +++ b/arch/s390/kernel/compat_linux.h @@ -4,10 +4,6 @@ #include <linux/compat.h> #include <linux/socket.h> #include <linux/syscalls.h> -#include <linux/nfs_fs.h> -#include <linux/sunrpc/svc.h> -#include <linux/nfsd/nfsd.h> -#include <linux/nfsd/export.h> /* Macro that masks the high order bit of an 32 bit pointer and converts it*/ /* to a 64 bit pointer */ diff --git a/arch/s390/kernel/head64.S b/arch/s390/kernel/head64.S index 6a250808092b..d984a2a380c3 100644 --- a/arch/s390/kernel/head64.S +++ b/arch/s390/kernel/head64.S @@ -83,6 +83,8 @@ startup_continue: slr %r0,%r0 # set cpuid to zero sigp %r1,%r0,0x12 # switch to esame mode sam64 # switch to 64 bit mode + llgfr %r13,%r13 # clear high-order half of base reg + lmh %r0,%r15,.Lzero64-.LPG1(%r13) # clear high-order half lctlg %c0,%c15,.Lctl-.LPG1(%r13) # load control registers lg %r12,.Lparmaddr-.LPG1(%r13) # pointer to parameter area # move IPL device to lowcore @@ -127,6 +129,7 @@ startup_continue: .L4malign:.quad 0xffffffffffc00000 .Lscan2g:.quad 0x80000000 + 0x20000 - 8 # 2GB + 128K - 8 .Lnop: .long 0x07000700 +.Lzero64:.fill 16,4,0x0 #ifdef CONFIG_ZFCPDUMP .Lcurrent_cpu: .long 0x0 diff --git a/arch/s390/kernel/s390_ext.c b/arch/s390/kernel/s390_ext.c index 0de305b598ce..59618bcd99b7 100644 --- a/arch/s390/kernel/s390_ext.c +++ b/arch/s390/kernel/s390_ext.c @@ -126,6 +126,8 @@ void __irq_entry do_extint(struct pt_regs *regs, unsigned short code) /* Serve timer interrupts first. */ clock_comparator_work(); kstat_cpu(smp_processor_id()).irqs[EXTERNAL_INTERRUPT]++; + if (code != 0x1004) + __get_cpu_var(s390_idle).nohz_delay = 1; index = ext_hash(code); for (p = ext_int_hash[index]; p; p = p->next) { if (likely(p->code == code)) diff --git a/arch/s390/kernel/setup.c b/arch/s390/kernel/setup.c index 061479ff029f..0663287fa1b3 100644 --- a/arch/s390/kernel/setup.c +++ b/arch/s390/kernel/setup.c @@ -305,9 +305,8 @@ static int __init early_parse_mem(char *p) } early_param("mem", early_parse_mem); -#ifdef CONFIG_S390_SWITCH_AMODE -unsigned int switch_amode = 0; -EXPORT_SYMBOL_GPL(switch_amode); +unsigned int user_mode = HOME_SPACE_MODE; +EXPORT_SYMBOL_GPL(user_mode); static int set_amode_and_uaccess(unsigned long user_amode, unsigned long user32_amode) @@ -340,23 +339,29 @@ static int set_amode_and_uaccess(unsigned long user_amode, */ static int __init early_parse_switch_amode(char *p) { - switch_amode = 1; + if (user_mode != SECONDARY_SPACE_MODE) + user_mode = PRIMARY_SPACE_MODE; return 0; } early_param("switch_amode", early_parse_switch_amode); -#else /* CONFIG_S390_SWITCH_AMODE */ -static inline int set_amode_and_uaccess(unsigned long user_amode, - unsigned long user32_amode) +static int __init early_parse_user_mode(char *p) { + if (p && strcmp(p, "primary") == 0) + user_mode = PRIMARY_SPACE_MODE; +#ifdef CONFIG_S390_EXEC_PROTECT + else if (p && strcmp(p, "secondary") == 0) + user_mode = SECONDARY_SPACE_MODE; +#endif + else if (!p || strcmp(p, "home") == 0) + user_mode = HOME_SPACE_MODE; + else + return 1; return 0; } -#endif /* CONFIG_S390_SWITCH_AMODE */ +early_param("user_mode", early_parse_user_mode); #ifdef CONFIG_S390_EXEC_PROTECT -unsigned int s390_noexec = 0; -EXPORT_SYMBOL_GPL(s390_noexec); - /* * Enable execute protection? */ @@ -364,8 +369,7 @@ static int __init early_parse_noexec(char *p) { if (!strncmp(p, "off", 3)) return 0; - switch_amode = 1; - s390_noexec = 1; + user_mode = SECONDARY_SPACE_MODE; return 0; } early_param("noexec", early_parse_noexec); @@ -373,7 +377,7 @@ early_param("noexec", early_parse_noexec); static void setup_addressing_mode(void) { - if (s390_noexec) { + if (user_mode == SECONDARY_SPACE_MODE) { if (set_amode_and_uaccess(PSW_ASC_SECONDARY, PSW32_ASC_SECONDARY)) pr_info("Execute protection active, " @@ -381,7 +385,7 @@ static void setup_addressing_mode(void) else pr_info("Execute protection active, " "mvcos not available\n"); - } else if (switch_amode) { + } else if (user_mode == PRIMARY_SPACE_MODE) { if (set_amode_and_uaccess(PSW_ASC_PRIMARY, PSW32_ASC_PRIMARY)) pr_info("Address spaces switched, " "mvcos available\n"); @@ -411,7 +415,7 @@ setup_lowcore(void) lc->restart_psw.mask = PSW_BASE_BITS | PSW_DEFAULT_KEY; lc->restart_psw.addr = PSW_ADDR_AMODE | (unsigned long) restart_int_handler; - if (switch_amode) + if (user_mode != HOME_SPACE_MODE) lc->restart_psw.mask |= PSW_ASC_HOME; lc->external_new_psw.mask = psw_kernel_bits; lc->external_new_psw.addr = diff --git a/arch/s390/kernel/time.c b/arch/s390/kernel/time.c index 34162a0b2caa..65065ac48ed3 100644 --- a/arch/s390/kernel/time.c +++ b/arch/s390/kernel/time.c @@ -214,7 +214,8 @@ struct clocksource * __init clocksource_default_clock(void) return &clocksource_tod; } -void update_vsyscall(struct timespec *wall_time, struct clocksource *clock) +void update_vsyscall(struct timespec *wall_time, struct clocksource *clock, + u32 mult) { if (clock != &clocksource_tod) return; @@ -334,7 +335,7 @@ int get_sync_clock(unsigned long long *clock) sw0 = atomic_read(sw_ptr); *clock = get_clock(); sw1 = atomic_read(sw_ptr); - put_cpu_var(clock_sync_sync); + put_cpu_var(clock_sync_word); if (sw0 == sw1 && (sw0 & 0x80000000U)) /* Success: time is in sync. */ return 0; @@ -384,7 +385,7 @@ static inline int check_sync_clock(void) sw_ptr = &get_cpu_var(clock_sync_word); rc = (atomic_read(sw_ptr) & 0x80000000U) != 0; - put_cpu_var(clock_sync_sync); + put_cpu_var(clock_sync_word); return rc; } diff --git a/arch/s390/kernel/vdso.c b/arch/s390/kernel/vdso.c index adfb32aa6d59..5f99e66c51c3 100644 --- a/arch/s390/kernel/vdso.c +++ b/arch/s390/kernel/vdso.c @@ -86,7 +86,8 @@ static void vdso_init_data(struct vdso_data *vd) unsigned int facility_list; facility_list = stfl(); - vd->ectg_available = switch_amode && (facility_list & 1); + vd->ectg_available = + user_mode != HOME_SPACE_MODE && (facility_list & 1); } #ifdef CONFIG_64BIT @@ -114,7 +115,7 @@ int vdso_alloc_per_cpu(int cpu, struct _lowcore *lowcore) lowcore->vdso_per_cpu_data = __LC_PASTE; - if (!switch_amode || !vdso_enabled) + if (user_mode == HOME_SPACE_MODE || !vdso_enabled) return 0; segment_table = __get_free_pages(GFP_KERNEL, SEGMENT_ORDER); @@ -160,7 +161,7 @@ void vdso_free_per_cpu(int cpu, struct _lowcore *lowcore) unsigned long segment_table, page_table, page_frame; u32 *psal, *aste; - if (!switch_amode || !vdso_enabled) + if (user_mode == HOME_SPACE_MODE || !vdso_enabled) return; psal = (u32 *)(addr_t) lowcore->paste[4]; @@ -184,7 +185,7 @@ static void __vdso_init_cr5(void *dummy) static void vdso_init_cr5(void) { - if (switch_amode && vdso_enabled) + if (user_mode != HOME_SPACE_MODE && vdso_enabled) on_each_cpu(__vdso_init_cr5, NULL, 1); } #endif /* CONFIG_64BIT */ diff --git a/arch/s390/kernel/vtime.c b/arch/s390/kernel/vtime.c index c41bb0d416e1..b59a812a010e 100644 --- a/arch/s390/kernel/vtime.c +++ b/arch/s390/kernel/vtime.c @@ -167,6 +167,8 @@ void vtime_stop_cpu(void) /* Wait for external, I/O or machine check interrupt. */ psw.mask = psw_kernel_bits | PSW_MASK_WAIT | PSW_MASK_IO | PSW_MASK_EXT; + idle->nohz_delay = 0; + /* Check if the CPU timer needs to be reprogrammed. */ if (vq->do_spt) { __u64 vmax = VTIMER_MAX_SLICE; diff --git a/arch/s390/kvm/Kconfig b/arch/s390/kvm/Kconfig index bf164fc21864..6ee55ae84ce2 100644 --- a/arch/s390/kvm/Kconfig +++ b/arch/s390/kvm/Kconfig @@ -20,7 +20,6 @@ config KVM depends on HAVE_KVM && EXPERIMENTAL select PREEMPT_NOTIFIERS select ANON_INODES - select S390_SWITCH_AMODE ---help--- Support hosting paravirtualized guest machines using the SIE virtualization capability on the mainframe. This should work diff --git a/arch/s390/lib/uaccess_mvcos.c b/arch/s390/lib/uaccess_mvcos.c index 58da3f461214..60455f104ea3 100644 --- a/arch/s390/lib/uaccess_mvcos.c +++ b/arch/s390/lib/uaccess_mvcos.c @@ -162,7 +162,6 @@ static size_t clear_user_mvcos(size_t size, void __user *to) return size; } -#ifdef CONFIG_S390_SWITCH_AMODE static size_t strnlen_user_mvcos(size_t count, const char __user *src) { char buf[256]; @@ -200,7 +199,6 @@ static size_t strncpy_from_user_mvcos(size_t count, const char __user *src, } while ((len_str == len) && (done < count)); return done; } -#endif /* CONFIG_S390_SWITCH_AMODE */ struct uaccess_ops uaccess_mvcos = { .copy_from_user = copy_from_user_mvcos_check, @@ -215,7 +213,6 @@ struct uaccess_ops uaccess_mvcos = { .futex_atomic_cmpxchg = futex_atomic_cmpxchg_std, }; -#ifdef CONFIG_S390_SWITCH_AMODE struct uaccess_ops uaccess_mvcos_switch = { .copy_from_user = copy_from_user_mvcos, .copy_from_user_small = copy_from_user_mvcos, @@ -228,4 +225,3 @@ struct uaccess_ops uaccess_mvcos_switch = { .futex_atomic_op = futex_atomic_op_pt, .futex_atomic_cmpxchg = futex_atomic_cmpxchg_pt, }; -#endif diff --git a/arch/s390/lib/uaccess_pt.c b/arch/s390/lib/uaccess_pt.c index cb5d59eab0ee..404f2de296dc 100644 --- a/arch/s390/lib/uaccess_pt.c +++ b/arch/s390/lib/uaccess_pt.c @@ -23,86 +23,21 @@ static inline pte_t *follow_table(struct mm_struct *mm, unsigned long addr) pgd = pgd_offset(mm, addr); if (pgd_none(*pgd) || unlikely(pgd_bad(*pgd))) - return NULL; + return (pte_t *) 0x3a; pud = pud_offset(pgd, addr); if (pud_none(*pud) || unlikely(pud_bad(*pud))) - return NULL; + return (pte_t *) 0x3b; pmd = pmd_offset(pud, addr); if (pmd_none(*pmd) || unlikely(pmd_bad(*pmd))) - return NULL; + return (pte_t *) 0x10; return pte_offset_map(pmd, addr); } -static int __handle_fault(struct mm_struct *mm, unsigned long address, - int write_access) -{ - struct vm_area_struct *vma; - int ret = -EFAULT; - int fault; - - if (in_atomic()) - return ret; - down_read(&mm->mmap_sem); - vma = find_vma(mm, address); - if (unlikely(!vma)) - goto out; - if (unlikely(vma->vm_start > address)) { - if (!(vma->vm_flags & VM_GROWSDOWN)) - goto out; - if (expand_stack(vma, address)) - goto out; - } - - if (!write_access) { - /* page not present, check vm flags */ - if (!(vma->vm_flags & (VM_READ | VM_EXEC | VM_WRITE))) - goto out; - } else { - if (!(vma->vm_flags & VM_WRITE)) - goto out; - } - -survive: - fault = handle_mm_fault(mm, vma, address, write_access ? FAULT_FLAG_WRITE : 0); - if (unlikely(fault & VM_FAULT_ERROR)) { - if (fault & VM_FAULT_OOM) - goto out_of_memory; - else if (fault & VM_FAULT_SIGBUS) - goto out_sigbus; - BUG(); - } - if (fault & VM_FAULT_MAJOR) - current->maj_flt++; - else - current->min_flt++; - ret = 0; -out: - up_read(&mm->mmap_sem); - return ret; - -out_of_memory: - up_read(&mm->mmap_sem); - if (is_global_init(current)) { - yield(); - down_read(&mm->mmap_sem); - goto survive; - } - printk("VM: killing process %s\n", current->comm); - return ret; - -out_sigbus: - up_read(&mm->mmap_sem); - current->thread.prot_addr = address; - current->thread.trap_no = 0x11; - force_sig(SIGBUS, current); - return ret; -} - -static size_t __user_copy_pt(unsigned long uaddr, void *kptr, - size_t n, int write_user) +static __always_inline size_t __user_copy_pt(unsigned long uaddr, void *kptr, + size_t n, int write_user) { struct mm_struct *mm = current->mm; unsigned long offset, pfn, done, size; @@ -114,12 +49,17 @@ retry: spin_lock(&mm->page_table_lock); do { pte = follow_table(mm, uaddr); - if (!pte || !pte_present(*pte) || - (write_user && !pte_write(*pte))) + if ((unsigned long) pte < 0x1000) goto fault; + if (!pte_present(*pte)) { + pte = (pte_t *) 0x11; + goto fault; + } else if (write_user && !pte_write(*pte)) { + pte = (pte_t *) 0x04; + goto fault; + } pfn = pte_pfn(*pte); - offset = uaddr & (PAGE_SIZE - 1); size = min(n - done, PAGE_SIZE - offset); if (write_user) { @@ -137,7 +77,7 @@ retry: return n - done; fault: spin_unlock(&mm->page_table_lock); - if (__handle_fault(mm, uaddr, write_user)) + if (__handle_fault(uaddr, (unsigned long) pte, write_user)) return n - done; goto retry; } @@ -146,30 +86,31 @@ fault: * Do DAT for user address by page table walk, return kernel address. * This function needs to be called with current->mm->page_table_lock held. */ -static unsigned long __dat_user_addr(unsigned long uaddr) +static __always_inline unsigned long __dat_user_addr(unsigned long uaddr) { struct mm_struct *mm = current->mm; - unsigned long pfn, ret; + unsigned long pfn; pte_t *pte; int rc; - ret = 0; retry: pte = follow_table(mm, uaddr); - if (!pte || !pte_present(*pte)) + if ((unsigned long) pte < 0x1000) goto fault; + if (!pte_present(*pte)) { + pte = (pte_t *) 0x11; + goto fault; + } pfn = pte_pfn(*pte); - ret = (pfn << PAGE_SHIFT) + (uaddr & (PAGE_SIZE - 1)); -out: - return ret; + return (pfn << PAGE_SHIFT) + (uaddr & (PAGE_SIZE - 1)); fault: spin_unlock(&mm->page_table_lock); - rc = __handle_fault(mm, uaddr, 0); + rc = __handle_fault(uaddr, (unsigned long) pte, 0); spin_lock(&mm->page_table_lock); - if (rc) - goto out; - goto retry; + if (!rc) + goto retry; + return 0; } size_t copy_from_user_pt(size_t n, const void __user *from, void *to) @@ -234,8 +175,12 @@ retry: spin_lock(&mm->page_table_lock); do { pte = follow_table(mm, uaddr); - if (!pte || !pte_present(*pte)) + if ((unsigned long) pte < 0x1000) + goto fault; + if (!pte_present(*pte)) { + pte = (pte_t *) 0x11; goto fault; + } pfn = pte_pfn(*pte); offset = uaddr & (PAGE_SIZE-1); @@ -249,9 +194,8 @@ retry: return done + 1; fault: spin_unlock(&mm->page_table_lock); - if (__handle_fault(mm, uaddr, 0)) { + if (__handle_fault(uaddr, (unsigned long) pte, 0)) return 0; - } goto retry; } @@ -284,7 +228,7 @@ static size_t copy_in_user_pt(size_t n, void __user *to, { struct mm_struct *mm = current->mm; unsigned long offset_from, offset_to, offset_max, pfn_from, pfn_to, - uaddr, done, size; + uaddr, done, size, error_code; unsigned long uaddr_from = (unsigned long) from; unsigned long uaddr_to = (unsigned long) to; pte_t *pte_from, *pte_to; @@ -298,17 +242,28 @@ static size_t copy_in_user_pt(size_t n, void __user *to, retry: spin_lock(&mm->page_table_lock); do { + write_user = 0; + uaddr = uaddr_from; pte_from = follow_table(mm, uaddr_from); - if (!pte_from || !pte_present(*pte_from)) { - uaddr = uaddr_from; - write_user = 0; + error_code = (unsigned long) pte_from; + if (error_code < 0x1000) + goto fault; + if (!pte_present(*pte_from)) { + error_code = 0x11; goto fault; } + write_user = 1; + uaddr = uaddr_to; pte_to = follow_table(mm, uaddr_to); - if (!pte_to || !pte_present(*pte_to) || !pte_write(*pte_to)) { - uaddr = uaddr_to; - write_user = 1; + error_code = (unsigned long) pte_to; + if (error_code < 0x1000) + goto fault; + if (!pte_present(*pte_to)) { + error_code = 0x11; + goto fault; + } else if (!pte_write(*pte_to)) { + error_code = 0x04; goto fault; } @@ -329,7 +284,7 @@ retry: return n - done; fault: spin_unlock(&mm->page_table_lock); - if (__handle_fault(mm, uaddr, write_user)) + if (__handle_fault(uaddr, error_code, write_user)) return n - done; goto retry; } diff --git a/arch/s390/math-emu/math.c b/arch/s390/math-emu/math.c index 3ee78ccb617d..cd4e9c168dd7 100644 --- a/arch/s390/math-emu/math.c +++ b/arch/s390/math-emu/math.c @@ -2088,7 +2088,7 @@ int math_emu_ldr(__u8 *opcode) { __u16 opc = *((__u16 *) opcode); if ((opc & 0x90) == 0) { /* test if rx in {0,2,4,6} */ - /* we got an exception therfore ry can't be in {0,2,4,6} */ + /* we got an exception therefore ry can't be in {0,2,4,6} */ asm volatile( /* load rx from fp_regs.fprs[ry] */ " bras 1,0f\n" " ld 0,0(%1)\n" @@ -2118,7 +2118,7 @@ int math_emu_ler(__u8 *opcode) { __u16 opc = *((__u16 *) opcode); if ((opc & 0x90) == 0) { /* test if rx in {0,2,4,6} */ - /* we got an exception therfore ry can't be in {0,2,4,6} */ + /* we got an exception therefore ry can't be in {0,2,4,6} */ asm volatile( /* load rx from fp_regs.fprs[ry] */ " bras 1,0f\n" " le 0,0(%1)\n" diff --git a/arch/s390/mm/cmm.c b/arch/s390/mm/cmm.c index ff58779bf7e9..76a3637b88e0 100644 --- a/arch/s390/mm/cmm.c +++ b/arch/s390/mm/cmm.c @@ -18,6 +18,7 @@ #include <linux/swap.h> #include <linux/kthread.h> #include <linux/oom.h> +#include <linux/suspend.h> #include <asm/pgalloc.h> #include <asm/uaccess.h> @@ -44,6 +45,7 @@ static volatile long cmm_pages_target; static volatile long cmm_timed_pages_target; static long cmm_timeout_pages; static long cmm_timeout_seconds; +static int cmm_suspended; static struct cmm_page_array *cmm_page_list; static struct cmm_page_array *cmm_timed_page_list; @@ -147,9 +149,9 @@ cmm_thread(void *dummy) while (1) { rc = wait_event_interruptible(cmm_thread_wait, - (cmm_pages != cmm_pages_target || - cmm_timed_pages != cmm_timed_pages_target || - kthread_should_stop())); + (!cmm_suspended && (cmm_pages != cmm_pages_target || + cmm_timed_pages != cmm_timed_pages_target)) || + kthread_should_stop()); if (kthread_should_stop() || rc == -ERESTARTSYS) { cmm_pages_target = cmm_pages; cmm_timed_pages_target = cmm_timed_pages; @@ -410,6 +412,38 @@ cmm_smsg_target(char *from, char *msg) static struct ctl_table_header *cmm_sysctl_header; +static int cmm_suspend(void) +{ + cmm_suspended = 1; + cmm_free_pages(cmm_pages, &cmm_pages, &cmm_page_list); + cmm_free_pages(cmm_timed_pages, &cmm_timed_pages, &cmm_timed_page_list); + return 0; +} + +static int cmm_resume(void) +{ + cmm_suspended = 0; + cmm_kick_thread(); + return 0; +} + +static int cmm_power_event(struct notifier_block *this, + unsigned long event, void *ptr) +{ + switch (event) { + case PM_POST_HIBERNATION: + return cmm_resume(); + case PM_HIBERNATION_PREPARE: + return cmm_suspend(); + default: + return NOTIFY_DONE; + } +} + +static struct notifier_block cmm_power_notifier = { + .notifier_call = cmm_power_event, +}; + static int cmm_init (void) { @@ -418,7 +452,7 @@ cmm_init (void) #ifdef CONFIG_CMM_PROC cmm_sysctl_header = register_sysctl_table(cmm_dir_table); if (!cmm_sysctl_header) - goto out; + goto out_sysctl; #endif #ifdef CONFIG_CMM_IUCV rc = smsg_register_callback(SMSG_PREFIX, cmm_smsg_target); @@ -428,17 +462,21 @@ cmm_init (void) rc = register_oom_notifier(&cmm_oom_nb); if (rc < 0) goto out_oom_notify; + rc = register_pm_notifier(&cmm_power_notifier); + if (rc) + goto out_pm; init_waitqueue_head(&cmm_thread_wait); init_timer(&cmm_timer); cmm_thread_ptr = kthread_run(cmm_thread, NULL, "cmmthread"); rc = IS_ERR(cmm_thread_ptr) ? PTR_ERR(cmm_thread_ptr) : 0; - if (!rc) - goto out; - /* - * kthread_create failed. undo all the stuff from above again. - */ - unregister_oom_notifier(&cmm_oom_nb); + if (rc) + goto out_kthread; + return 0; +out_kthread: + unregister_pm_notifier(&cmm_power_notifier); +out_pm: + unregister_oom_notifier(&cmm_oom_nb); out_oom_notify: #ifdef CONFIG_CMM_IUCV smsg_unregister_callback(SMSG_PREFIX, cmm_smsg_target); @@ -446,8 +484,8 @@ out_smsg: #endif #ifdef CONFIG_CMM_PROC unregister_sysctl_table(cmm_sysctl_header); +out_sysctl: #endif -out: return rc; } @@ -455,6 +493,7 @@ static void cmm_exit(void) { kthread_stop(cmm_thread_ptr); + unregister_pm_notifier(&cmm_power_notifier); unregister_oom_notifier(&cmm_oom_nb); cmm_free_pages(cmm_pages, &cmm_pages, &cmm_page_list); cmm_free_pages(cmm_timed_pages, &cmm_timed_pages, &cmm_timed_page_list); diff --git a/arch/s390/mm/fault.c b/arch/s390/mm/fault.c index 6d507462967a..fc102e70d9c2 100644 --- a/arch/s390/mm/fault.c +++ b/arch/s390/mm/fault.c @@ -34,16 +34,15 @@ #include <asm/pgtable.h> #include <asm/s390_ext.h> #include <asm/mmu_context.h> +#include <asm/compat.h> #include "../kernel/entry.h" #ifndef CONFIG_64BIT #define __FAIL_ADDR_MASK 0x7ffff000 -#define __FIXUP_MASK 0x7fffffff #define __SUBCODE_MASK 0x0200 #define __PF_RES_FIELD 0ULL #else /* CONFIG_64BIT */ #define __FAIL_ADDR_MASK -4096L -#define __FIXUP_MASK ~0L #define __SUBCODE_MASK 0x0600 #define __PF_RES_FIELD 0x8000000000000000ULL #endif /* CONFIG_64BIT */ @@ -52,11 +51,15 @@ extern int sysctl_userprocess_debug; #endif -#ifdef CONFIG_KPROBES -static inline int notify_page_fault(struct pt_regs *regs, long err) +#define VM_FAULT_BADCONTEXT 0x010000 +#define VM_FAULT_BADMAP 0x020000 +#define VM_FAULT_BADACCESS 0x040000 + +static inline int notify_page_fault(struct pt_regs *regs) { int ret = 0; +#ifdef CONFIG_KPROBES /* kprobe_running() needs smp_processor_id() */ if (!user_mode(regs)) { preempt_disable(); @@ -64,15 +67,9 @@ static inline int notify_page_fault(struct pt_regs *regs, long err) ret = 1; preempt_enable(); } - +#endif return ret; } -#else -static inline int notify_page_fault(struct pt_regs *regs, long err) -{ - return 0; -} -#endif /* @@ -100,57 +97,50 @@ void bust_spinlocks(int yes) /* * Returns the address space associated with the fault. - * Returns 0 for kernel space, 1 for user space and - * 2 for code execution in user space with noexec=on. + * Returns 0 for kernel space and 1 for user space. */ -static inline int check_space(struct task_struct *tsk) +static inline int user_space_fault(unsigned long trans_exc_code) { /* - * The lowest two bits of S390_lowcore.trans_exc_code - * indicate which paging table was used. + * The lowest two bits of the translation exception + * identification indicate which paging table was used. */ - int desc = S390_lowcore.trans_exc_code & 3; - - if (desc == 3) /* Home Segment Table Descriptor */ - return switch_amode == 0; - if (desc == 2) /* Secondary Segment Table Descriptor */ - return tsk->thread.mm_segment.ar4; -#ifdef CONFIG_S390_SWITCH_AMODE - if (unlikely(desc == 1)) { /* STD determined via access register */ - /* %a0 always indicates primary space. */ - if (S390_lowcore.exc_access_id != 0) { - save_access_regs(tsk->thread.acrs); - /* - * An alet of 0 indicates primary space. - * An alet of 1 indicates secondary space. - * Any other alet values generate an - * alen-translation exception. - */ - if (tsk->thread.acrs[S390_lowcore.exc_access_id]) - return tsk->thread.mm_segment.ar4; - } - } -#endif - /* Primary Segment Table Descriptor */ - return switch_amode << s390_noexec; + trans_exc_code &= 3; + if (trans_exc_code == 2) + /* Access via secondary space, set_fs setting decides */ + return current->thread.mm_segment.ar4; + if (user_mode == HOME_SPACE_MODE) + /* User space if the access has been done via home space. */ + return trans_exc_code == 3; + /* + * If the user space is not the home space the kernel runs in home + * space. Access via secondary space has already been covered, + * access via primary space or access register is from user space + * and access via home space is from the kernel. + */ + return trans_exc_code != 3; } /* * Send SIGSEGV to task. This is an external routine * to keep the stack usage of do_page_fault small. */ -static void do_sigsegv(struct pt_regs *regs, unsigned long error_code, - int si_code, unsigned long address) +static noinline void do_sigsegv(struct pt_regs *regs, long int_code, + int si_code, unsigned long trans_exc_code) { struct siginfo si; + unsigned long address; + address = trans_exc_code & __FAIL_ADDR_MASK; + current->thread.prot_addr = address; + current->thread.trap_no = int_code; #if defined(CONFIG_SYSCTL) || defined(CONFIG_PROCESS_DEBUG) #if defined(CONFIG_SYSCTL) if (sysctl_userprocess_debug) #endif { printk("User process fault: interruption code 0x%lX\n", - error_code); + int_code); printk("failing address: %lX\n", address); show_regs(regs); } @@ -161,13 +151,14 @@ static void do_sigsegv(struct pt_regs *regs, unsigned long error_code, force_sig_info(SIGSEGV, &si, current); } -static void do_no_context(struct pt_regs *regs, unsigned long error_code, - unsigned long address) +static noinline void do_no_context(struct pt_regs *regs, long int_code, + unsigned long trans_exc_code) { const struct exception_table_entry *fixup; + unsigned long address; /* Are we prepared to handle this kernel fault? */ - fixup = search_exception_tables(regs->psw.addr & __FIXUP_MASK); + fixup = search_exception_tables(regs->psw.addr & PSW_ADDR_INSN); if (fixup) { regs->psw.addr = fixup->fixup | PSW_ADDR_AMODE; return; @@ -177,129 +168,149 @@ static void do_no_context(struct pt_regs *regs, unsigned long error_code, * Oops. The kernel tried to access some bad page. We'll have to * terminate things with extreme prejudice. */ - if (check_space(current) == 0) + address = trans_exc_code & __FAIL_ADDR_MASK; + if (!user_space_fault(trans_exc_code)) printk(KERN_ALERT "Unable to handle kernel pointer dereference" " at virtual kernel address %p\n", (void *)address); else printk(KERN_ALERT "Unable to handle kernel paging request" " at virtual user address %p\n", (void *)address); - die("Oops", regs, error_code); + die("Oops", regs, int_code); do_exit(SIGKILL); } -static void do_low_address(struct pt_regs *regs, unsigned long error_code) +static noinline void do_low_address(struct pt_regs *regs, long int_code, + unsigned long trans_exc_code) { /* Low-address protection hit in kernel mode means NULL pointer write access in kernel mode. */ if (regs->psw.mask & PSW_MASK_PSTATE) { /* Low-address protection hit in user mode 'cannot happen'. */ - die ("Low-address protection", regs, error_code); + die ("Low-address protection", regs, int_code); do_exit(SIGKILL); } - do_no_context(regs, error_code, 0); + do_no_context(regs, int_code, trans_exc_code); } -static void do_sigbus(struct pt_regs *regs, unsigned long error_code, - unsigned long address) +static noinline void do_sigbus(struct pt_regs *regs, long int_code, + unsigned long trans_exc_code) { struct task_struct *tsk = current; - struct mm_struct *mm = tsk->mm; - up_read(&mm->mmap_sem); /* * Send a sigbus, regardless of whether we were in kernel * or user mode. */ - tsk->thread.prot_addr = address; - tsk->thread.trap_no = error_code; + tsk->thread.prot_addr = trans_exc_code & __FAIL_ADDR_MASK; + tsk->thread.trap_no = int_code; force_sig(SIGBUS, tsk); - - /* Kernel mode? Handle exceptions or die */ - if (!(regs->psw.mask & PSW_MASK_PSTATE)) - do_no_context(regs, error_code, address); } #ifdef CONFIG_S390_EXEC_PROTECT -static int signal_return(struct mm_struct *mm, struct pt_regs *regs, - unsigned long address, unsigned long error_code) +static noinline int signal_return(struct pt_regs *regs, long int_code, + unsigned long trans_exc_code) { u16 instruction; int rc; -#ifdef CONFIG_COMPAT - int compat; -#endif - pagefault_disable(); rc = __get_user(instruction, (u16 __user *) regs->psw.addr); - pagefault_enable(); - if (rc) - return -EFAULT; - up_read(&mm->mmap_sem); - clear_tsk_thread_flag(current, TIF_SINGLE_STEP); -#ifdef CONFIG_COMPAT - compat = is_compat_task(); - if (compat && instruction == 0x0a77) - sys32_sigreturn(); - else if (compat && instruction == 0x0aad) - sys32_rt_sigreturn(); - else -#endif - if (instruction == 0x0a77) - sys_sigreturn(); - else if (instruction == 0x0aad) - sys_rt_sigreturn(); - else { - current->thread.prot_addr = address; - current->thread.trap_no = error_code; - do_sigsegv(regs, error_code, SEGV_MAPERR, address); - } + if (!rc && instruction == 0x0a77) { + clear_tsk_thread_flag(current, TIF_SINGLE_STEP); + if (is_compat_task()) + sys32_sigreturn(); + else + sys_sigreturn(); + } else if (!rc && instruction == 0x0aad) { + clear_tsk_thread_flag(current, TIF_SINGLE_STEP); + if (is_compat_task()) + sys32_rt_sigreturn(); + else + sys_rt_sigreturn(); + } else + do_sigsegv(regs, int_code, SEGV_MAPERR, trans_exc_code); return 0; } #endif /* CONFIG_S390_EXEC_PROTECT */ +static noinline void do_fault_error(struct pt_regs *regs, long int_code, + unsigned long trans_exc_code, int fault) +{ + int si_code; + + switch (fault) { + case VM_FAULT_BADACCESS: +#ifdef CONFIG_S390_EXEC_PROTECT + if ((regs->psw.mask & PSW_MASK_ASC) == PSW_ASC_SECONDARY && + (trans_exc_code & 3) == 0) { + signal_return(regs, int_code, trans_exc_code); + break; + } +#endif /* CONFIG_S390_EXEC_PROTECT */ + case VM_FAULT_BADMAP: + /* Bad memory access. Check if it is kernel or user space. */ + if (regs->psw.mask & PSW_MASK_PSTATE) { + /* User mode accesses just cause a SIGSEGV */ + si_code = (fault == VM_FAULT_BADMAP) ? + SEGV_MAPERR : SEGV_ACCERR; + do_sigsegv(regs, int_code, si_code, trans_exc_code); + return; + } + case VM_FAULT_BADCONTEXT: + do_no_context(regs, int_code, trans_exc_code); + break; + default: /* fault & VM_FAULT_ERROR */ + if (fault & VM_FAULT_OOM) + pagefault_out_of_memory(); + else if (fault & VM_FAULT_SIGBUS) { + do_sigbus(regs, int_code, trans_exc_code); + /* Kernel mode? Handle exceptions or die */ + if (!(regs->psw.mask & PSW_MASK_PSTATE)) + do_no_context(regs, int_code, trans_exc_code); + } else + BUG(); + break; + } +} + /* * This routine handles page faults. It determines the address, * and the problem, and then passes it off to one of the appropriate * routines. * - * error_code: + * interruption code (int_code): * 04 Protection -> Write-Protection (suprression) * 10 Segment translation -> Not present (nullification) * 11 Page translation -> Not present (nullification) * 3b Region third trans. -> Not present (nullification) */ -static inline void -do_exception(struct pt_regs *regs, unsigned long error_code, int write) +static inline int do_exception(struct pt_regs *regs, int access, + unsigned long trans_exc_code) { struct task_struct *tsk; struct mm_struct *mm; struct vm_area_struct *vma; unsigned long address; - int space; - int si_code; int fault; - if (notify_page_fault(regs, error_code)) - return; + if (notify_page_fault(regs)) + return 0; tsk = current; mm = tsk->mm; - /* get the failing address and the affected space */ - address = S390_lowcore.trans_exc_code & __FAIL_ADDR_MASK; - space = check_space(tsk); - /* * Verify that the fault happened in user space, that * we are not in an interrupt and that there is a * user context. */ - if (unlikely(space == 0 || in_atomic() || !mm)) - goto no_context; + fault = VM_FAULT_BADCONTEXT; + if (unlikely(!user_space_fault(trans_exc_code) || in_atomic() || !mm)) + goto out; + address = trans_exc_code & __FAIL_ADDR_MASK; /* * When we get here, the fault happened in the current * task's user address space, so we can switch on the @@ -309,42 +320,26 @@ do_exception(struct pt_regs *regs, unsigned long error_code, int write) perf_sw_event(PERF_COUNT_SW_PAGE_FAULTS, 1, 0, regs, address); down_read(&mm->mmap_sem); - si_code = SEGV_MAPERR; + fault = VM_FAULT_BADMAP; vma = find_vma(mm, address); if (!vma) - goto bad_area; - -#ifdef CONFIG_S390_EXEC_PROTECT - if (unlikely((space == 2) && !(vma->vm_flags & VM_EXEC))) - if (!signal_return(mm, regs, address, error_code)) - /* - * signal_return() has done an up_read(&mm->mmap_sem) - * if it returns 0. - */ - return; -#endif + goto out_up; - if (vma->vm_start <= address) - goto good_area; - if (!(vma->vm_flags & VM_GROWSDOWN)) - goto bad_area; - if (expand_stack(vma, address)) - goto bad_area; -/* - * Ok, we have a good vm_area for this memory access, so - * we can handle it.. - */ -good_area: - si_code = SEGV_ACCERR; - if (!write) { - /* page not present, check vm flags */ - if (!(vma->vm_flags & (VM_READ | VM_EXEC | VM_WRITE))) - goto bad_area; - } else { - if (!(vma->vm_flags & VM_WRITE)) - goto bad_area; + if (unlikely(vma->vm_start > address)) { + if (!(vma->vm_flags & VM_GROWSDOWN)) + goto out_up; + if (expand_stack(vma, address)) + goto out_up; } + /* + * Ok, we have a good vm_area for this memory access, so + * we can handle it.. + */ + fault = VM_FAULT_BADACCESS; + if (unlikely(!(vma->vm_flags & access))) + goto out_up; + if (is_vm_hugetlb_page(vma)) address &= HPAGE_MASK; /* @@ -352,18 +347,11 @@ good_area: * make sure we exit gracefully rather than endlessly redo * the fault. */ - fault = handle_mm_fault(mm, vma, address, write ? FAULT_FLAG_WRITE : 0); - if (unlikely(fault & VM_FAULT_ERROR)) { - if (fault & VM_FAULT_OOM) { - up_read(&mm->mmap_sem); - pagefault_out_of_memory(); - return; - } else if (fault & VM_FAULT_SIGBUS) { - do_sigbus(regs, error_code, address); - return; - } - BUG(); - } + fault = handle_mm_fault(mm, vma, address, + (access == VM_WRITE) ? FAULT_FLAG_WRITE : 0); + if (unlikely(fault & VM_FAULT_ERROR)) + goto out_up; + if (fault & VM_FAULT_MAJOR) { tsk->maj_flt++; perf_sw_event(PERF_COUNT_SW_PAGE_FAULTS_MAJ, 1, 0, @@ -373,74 +361,69 @@ good_area: perf_sw_event(PERF_COUNT_SW_PAGE_FAULTS_MIN, 1, 0, regs, address); } - up_read(&mm->mmap_sem); /* * The instruction that caused the program check will * be repeated. Don't signal single step via SIGTRAP. */ clear_tsk_thread_flag(tsk, TIF_SINGLE_STEP); - return; - -/* - * Something tried to access memory that isn't in our memory map.. - * Fix it, but check if it's kernel or user first.. - */ -bad_area: + fault = 0; +out_up: up_read(&mm->mmap_sem); - - /* User mode accesses just cause a SIGSEGV */ - if (regs->psw.mask & PSW_MASK_PSTATE) { - tsk->thread.prot_addr = address; - tsk->thread.trap_no = error_code; - do_sigsegv(regs, error_code, si_code, address); - return; - } - -no_context: - do_no_context(regs, error_code, address); +out: + return fault; } -void __kprobes do_protection_exception(struct pt_regs *regs, - long error_code) +void __kprobes do_protection_exception(struct pt_regs *regs, long int_code) { + unsigned long trans_exc_code = S390_lowcore.trans_exc_code; + int fault; + /* Protection exception is supressing, decrement psw address. */ - regs->psw.addr -= (error_code >> 16); + regs->psw.addr -= (int_code >> 16); /* * Check for low-address protection. This needs to be treated * as a special case because the translation exception code * field is not guaranteed to contain valid data in this case. */ - if (unlikely(!(S390_lowcore.trans_exc_code & 4))) { - do_low_address(regs, error_code); + if (unlikely(!(trans_exc_code & 4))) { + do_low_address(regs, int_code, trans_exc_code); return; } - do_exception(regs, 4, 1); + fault = do_exception(regs, VM_WRITE, trans_exc_code); + if (unlikely(fault)) + do_fault_error(regs, 4, trans_exc_code, fault); } -void __kprobes do_dat_exception(struct pt_regs *regs, long error_code) +void __kprobes do_dat_exception(struct pt_regs *regs, long int_code) { - do_exception(regs, error_code & 0xff, 0); + unsigned long trans_exc_code = S390_lowcore.trans_exc_code; + int access, fault; + + access = VM_READ | VM_EXEC | VM_WRITE; +#ifdef CONFIG_S390_EXEC_PROTECT + if ((regs->psw.mask & PSW_MASK_ASC) == PSW_ASC_SECONDARY && + (trans_exc_code & 3) == 0) + access = VM_EXEC; +#endif + fault = do_exception(regs, access, trans_exc_code); + if (unlikely(fault)) + do_fault_error(regs, int_code & 255, trans_exc_code, fault); } #ifdef CONFIG_64BIT -void __kprobes do_asce_exception(struct pt_regs *regs, unsigned long error_code) +void __kprobes do_asce_exception(struct pt_regs *regs, long int_code) { - struct mm_struct *mm; + unsigned long trans_exc_code = S390_lowcore.trans_exc_code; + struct mm_struct *mm = current->mm; struct vm_area_struct *vma; - unsigned long address; - int space; - - mm = current->mm; - address = S390_lowcore.trans_exc_code & __FAIL_ADDR_MASK; - space = check_space(current); - if (unlikely(space == 0 || in_atomic() || !mm)) + if (unlikely(!user_space_fault(trans_exc_code) || in_atomic() || !mm)) goto no_context; local_irq_enable(); down_read(&mm->mmap_sem); - vma = find_vma(mm, address); + vma = find_vma(mm, trans_exc_code & __FAIL_ADDR_MASK); up_read(&mm->mmap_sem); if (vma) { @@ -450,17 +433,38 @@ void __kprobes do_asce_exception(struct pt_regs *regs, unsigned long error_code) /* User mode accesses just cause a SIGSEGV */ if (regs->psw.mask & PSW_MASK_PSTATE) { - current->thread.prot_addr = address; - current->thread.trap_no = error_code; - do_sigsegv(regs, error_code, SEGV_MAPERR, address); + do_sigsegv(regs, int_code, SEGV_MAPERR, trans_exc_code); return; } no_context: - do_no_context(regs, error_code, address); + do_no_context(regs, int_code, trans_exc_code); } #endif +int __handle_fault(unsigned long uaddr, unsigned long int_code, int write_user) +{ + struct pt_regs regs; + int access, fault; + + regs.psw.mask = psw_kernel_bits; + if (!irqs_disabled()) + regs.psw.mask |= PSW_MASK_IO | PSW_MASK_EXT; + regs.psw.addr = (unsigned long) __builtin_return_address(0); + regs.psw.addr |= PSW_ADDR_AMODE; + uaddr &= PAGE_MASK; + access = write_user ? VM_WRITE : VM_READ; + fault = do_exception(®s, access, uaddr | 2); + if (unlikely(fault)) { + if (fault & VM_FAULT_OOM) { + pagefault_out_of_memory(); + fault = 0; + } else if (fault & VM_FAULT_SIGBUS) + do_sigbus(®s, int_code, uaddr); + } + return fault ? -EFAULT : 0; +} + #ifdef CONFIG_PFAULT /* * 'pfault' pseudo page faults routines. @@ -522,7 +526,7 @@ void pfault_fini(void) : : "a" (&refbk), "m" (refbk) : "cc"); } -static void pfault_interrupt(__u16 error_code) +static void pfault_interrupt(__u16 int_code) { struct task_struct *tsk; __u16 subcode; diff --git a/arch/s390/mm/pgtable.c b/arch/s390/mm/pgtable.c index 2757c5616a07..ad621e06ada3 100644 --- a/arch/s390/mm/pgtable.c +++ b/arch/s390/mm/pgtable.c @@ -269,7 +269,7 @@ int s390_enable_sie(void) struct mm_struct *mm, *old_mm; /* Do we have switched amode? If no, we cannot do sie */ - if (!switch_amode) + if (user_mode == HOME_SPACE_MODE) return -EINVAL; /* Do we have pgstes? if yes, we are done */ diff --git a/arch/s390/mm/vmem.c b/arch/s390/mm/vmem.c index 5f91a38d7592..300ab012b0fd 100644 --- a/arch/s390/mm/vmem.c +++ b/arch/s390/mm/vmem.c @@ -70,8 +70,12 @@ static pte_t __ref *vmem_pte_alloc(void) pte = alloc_bootmem(PTRS_PER_PTE * sizeof(pte_t)); if (!pte) return NULL; - clear_table((unsigned long *) pte, _PAGE_TYPE_EMPTY, - PTRS_PER_PTE * sizeof(pte_t)); + if (MACHINE_HAS_HPAGE) + clear_table((unsigned long *) pte, _PAGE_TYPE_EMPTY | _PAGE_CO, + PTRS_PER_PTE * sizeof(pte_t)); + else + clear_table((unsigned long *) pte, _PAGE_TYPE_EMPTY, + PTRS_PER_PTE * sizeof(pte_t)); return pte; } @@ -112,7 +116,8 @@ static int vmem_add_mem(unsigned long start, unsigned long size, int ro) if (MACHINE_HAS_HPAGE && !(address & ~HPAGE_MASK) && (address + HPAGE_SIZE <= start + size) && (address >= HPAGE_SIZE)) { - pte_val(pte) |= _SEGMENT_ENTRY_LARGE; + pte_val(pte) |= _SEGMENT_ENTRY_LARGE | + _SEGMENT_ENTRY_CO; pmd_val(*pm_dir) = pte_val(pte); address += HPAGE_SIZE - PAGE_SIZE; continue; |