From 104ea556ee7f40039c9c635d0c267b1fde084a81 Mon Sep 17 00:00:00 2001 From: "frank.blaschka@de.ibm.com" Date: Mon, 8 Aug 2011 01:33:55 +0000 Subject: qdio: support asynchronous delivery of storage blocks This patch introduces support for asynchronous delivery of storage blocks for Hipersockets. Upper layers may exploit this functionality to reuse SBALs for which the delivery status is still pending. Signed-off-by: Einar Lueck Signed-off-by: Jan Glauber Signed-off-by: Frank Blaschka Signed-off-by: David S. Miller --- arch/s390/include/asm/qdio.h | 68 ++++++++++++++++++++++++++++++++++++++++++-- 1 file changed, 65 insertions(+), 3 deletions(-) (limited to 'arch/s390') diff --git a/arch/s390/include/asm/qdio.h b/arch/s390/include/asm/qdio.h index 15c97625df8d..3881e9499e17 100644 --- a/arch/s390/include/asm/qdio.h +++ b/arch/s390/include/asm/qdio.h @@ -122,6 +122,40 @@ struct slibe { u64 parms; }; +/** + * struct qaob - queue asynchronous operation block + * @res0: reserved parameters + * @res1: reserved parameter + * @res2: reserved parameter + * @res3: reserved parameter + * @aorc: asynchronous operation return code + * @flags: internal flags + * @cbtbs: control block type + * @sb_count: number of storage blocks + * @sba: storage block element addresses + * @dcount: size of storage block elements + * @user0: user defineable value + * @res4: reserved paramater + * @user1: user defineable value + * @user2: user defineable value + */ +struct qaob { + u64 res0[6]; + u8 res1; + u8 res2; + u8 res3; + u8 aorc; + u8 flags; + u16 cbtbs; + u8 sb_count; + u64 sba[QDIO_MAX_ELEMENTS_PER_BUFFER]; + u16 dcount[QDIO_MAX_ELEMENTS_PER_BUFFER]; + u64 user0; + u64 res4[2]; + u64 user1; + u64 user2; +} __attribute__ ((packed, aligned(256))); + /** * struct slib - storage list information block (SLIB) * @nsliba: next SLIB address (if any) @@ -225,6 +259,31 @@ struct slsb { #define CHSC_AC2_DATA_DIV_AVAILABLE 0x0010 #define CHSC_AC2_DATA_DIV_ENABLED 0x0002 +/** + * struct qdio_outbuf_state - SBAL related asynchronous operation information + * (for communication with upper layer programs) + * (only required for use with completion queues) + * @flags: flags indicating state of buffer + * @aob: pointer to QAOB used for the particular SBAL + * @user: pointer to upper layer program's state information related to SBAL + * (stored in user1 data of QAOB) + */ +struct qdio_outbuf_state { + u8 flags; + struct qaob *aob; + void *user; +}; + +#define QDIO_OUTBUF_STATE_FLAG_NONE 0x00 +#define QDIO_OUTBUF_STATE_FLAG_PENDING 0x01 + +#define CHSC_AC1_INITIATE_INPUTQ 0x80 + +#define CHSC_AC2_DATA_DIV_AVAILABLE 0x0010 +#define CHSC_AC2_DATA_DIV_ENABLED 0x0002 + +#define CHSC_AC3_FORMAT2_CQ_AVAILABLE 0x8000 + struct qdio_ssqd_desc { u8 flags; u8:8; @@ -243,8 +302,7 @@ struct qdio_ssqd_desc { u64 sch_token; u8 mro; u8 mri; - u8:8; - u8 sbalic; + u16 qdioac3; u16:16; u8:8; u8 mmwc; @@ -280,9 +338,11 @@ typedef void qdio_handler_t(struct ccw_device *, unsigned int, int, * @no_output_qs: number of output queues * @input_handler: handler to be called for input queues * @output_handler: handler to be called for output queues + * @queue_start_poll: polling handlers (one per input queue or NULL) * @int_parm: interruption parameter * @input_sbal_addr_array: address of no_input_qs * 128 pointers * @output_sbal_addr_array: address of no_output_qs * 128 pointers + * @output_sbal_state_array: no_output_qs * 128 state info (for CQ or NULL) */ struct qdio_initialize { struct ccw_device *cdev; @@ -297,11 +357,12 @@ struct qdio_initialize { unsigned int no_output_qs; qdio_handler_t *input_handler; qdio_handler_t *output_handler; - void (*queue_start_poll) (struct ccw_device *, int, unsigned long); + void (**queue_start_poll) (struct ccw_device *, int, unsigned long); int scan_threshold; unsigned long int_parm; void **input_sbal_addr_array; void **output_sbal_addr_array; + struct qdio_outbuf_state *output_sbal_state_array; }; #define QDIO_STATE_INACTIVE 0x00000002 /* after qdio_cleanup */ @@ -316,6 +377,7 @@ struct qdio_initialize { extern int qdio_allocate(struct qdio_initialize *); extern int qdio_establish(struct qdio_initialize *); extern int qdio_activate(struct ccw_device *); +extern void qdio_release_aob(struct qaob *); extern int do_QDIO(struct ccw_device *, unsigned int, int, unsigned int, unsigned int); extern int qdio_start_irq(struct ccw_device *, int); -- cgit v1.2.3 From 9cb7284f3058d272758ebaaa8f6f924cb99792bc Mon Sep 17 00:00:00 2001 From: "frank.blaschka@de.ibm.com" Date: Mon, 8 Aug 2011 01:33:56 +0000 Subject: qdio: support forced signal adapter indications This patch ensures that signal adapter commands are issued if they are indicated to be required. Signed-off-by: Einar Lueck Signed-off-by: Jan Glauber Signed-off-by: Frank Blaschka Signed-off-by: David S. Miller --- arch/s390/include/asm/qdio.h | 10 ++++++++++ 1 file changed, 10 insertions(+) (limited to 'arch/s390') diff --git a/arch/s390/include/asm/qdio.h b/arch/s390/include/asm/qdio.h index 3881e9499e17..21993623da9a 100644 --- a/arch/s390/include/asm/qdio.h +++ b/arch/s390/include/asm/qdio.h @@ -279,6 +279,16 @@ struct qdio_outbuf_state { #define CHSC_AC1_INITIATE_INPUTQ 0x80 + +/* qdio adapter-characteristics-1 flag */ +#define AC1_SIGA_INPUT_NEEDED 0x40 /* process input queues */ +#define AC1_SIGA_OUTPUT_NEEDED 0x20 /* process output queues */ +#define AC1_SIGA_SYNC_NEEDED 0x10 /* ask hypervisor to sync */ +#define AC1_AUTOMATIC_SYNC_ON_THININT 0x08 /* set by hypervisor */ +#define AC1_AUTOMATIC_SYNC_ON_OUT_PCI 0x04 /* set by hypervisor */ +#define AC1_SC_QEBSM_AVAILABLE 0x02 /* available for subchannel */ +#define AC1_SC_QEBSM_ENABLED 0x01 /* enabled for subchannel */ + #define CHSC_AC2_DATA_DIV_AVAILABLE 0x0010 #define CHSC_AC2_DATA_DIV_ENABLED 0x0002 -- cgit v1.2.3 From dfe5bb506172307e43287b8962348fb85801c0f4 Mon Sep 17 00:00:00 2001 From: Swen Schillig Date: Mon, 15 Aug 2011 14:40:31 +0200 Subject: [SCSI] qdio: base support for hardware data router with zfcp FICON Express8S supports hardware data router, which requires an adapted qdio request format. This part 1/2 provides the qdio base required for exploitation in zfcp. Signed-off-by: Swen Schillig Signed-off-by: Steffen Maier Signed-off-by: James Bottomley --- arch/s390/include/asm/qdio.h | 5 +++++ 1 file changed, 5 insertions(+) (limited to 'arch/s390') diff --git a/arch/s390/include/asm/qdio.h b/arch/s390/include/asm/qdio.h index 15c97625df8d..cbbf7d831a33 100644 --- a/arch/s390/include/asm/qdio.h +++ b/arch/s390/include/asm/qdio.h @@ -46,6 +46,8 @@ struct qdesfmt0 { u32 : 16; } __attribute__ ((packed)); +#define QDR_AC_MULTI_BUFFER_ENABLE 0x01 + /** * struct qdr - queue description record (QDR) * @qfmt: queue format @@ -222,6 +224,8 @@ struct slsb { u8 val[QDIO_MAX_BUFFERS_PER_Q]; } __attribute__ ((packed, aligned(256))); +#define CHSC_AC2_MULTI_BUFFER_AVAILABLE 0x0080 +#define CHSC_AC2_MULTI_BUFFER_ENABLED 0x0040 #define CHSC_AC2_DATA_DIV_AVAILABLE 0x0010 #define CHSC_AC2_DATA_DIV_ENABLED 0x0002 @@ -287,6 +291,7 @@ typedef void qdio_handler_t(struct ccw_device *, unsigned int, int, struct qdio_initialize { struct ccw_device *cdev; unsigned char q_format; + unsigned char qdr_ac; unsigned char adapter_name[8]; unsigned int qib_param_field_format; unsigned char *qib_param_field; -- cgit v1.2.3 From 4f37a68cdaf6dea833cfdded2a3e0c47c0f006da Mon Sep 17 00:00:00 2001 From: Martin Schwidefsky Date: Tue, 23 Aug 2011 15:29:44 +0200 Subject: s390: Use direct ktime path for s390 clockevent device The clock comparator on s390 uses the same format as the TOD clock. If the value in the clock comparator is smaller than the current TOD value an interrupt is pending. Use the CLOCK_EVT_FEAT_KTIME feature to get the unmodified ktime of the next clockevent expiration and use it to program the clock comparator without querying the TOD clock. Signed-off-by: Martin Schwidefsky Cc: john stultz Link: http://lkml.kernel.org/r/20110823133143.153017933@de.ibm.com Signed-off-by: Thomas Gleixner --- arch/s390/kernel/time.c | 13 +++++++++---- 1 file changed, 9 insertions(+), 4 deletions(-) (limited to 'arch/s390') diff --git a/arch/s390/kernel/time.c b/arch/s390/kernel/time.c index dff933065ab6..c53716487e77 100644 --- a/arch/s390/kernel/time.c +++ b/arch/s390/kernel/time.c @@ -109,10 +109,14 @@ static void fixup_clock_comparator(unsigned long long delta) set_clock_comparator(S390_lowcore.clock_comparator); } -static int s390_next_event(unsigned long delta, +static int s390_next_ktime(ktime_t expires, struct clock_event_device *evt) { - S390_lowcore.clock_comparator = get_clock() + delta; + s64 nsecs; + + nsecs = ktime_to_ns(ktime_sub(expires, ktime_get_monotonic_offset())); + do_div(nsecs, 125); + S390_lowcore.clock_comparator = TOD_UNIX_EPOCH + (nsecs << 9); set_clock_comparator(S390_lowcore.clock_comparator); return 0; } @@ -137,14 +141,15 @@ void init_cpu_timer(void) cpu = smp_processor_id(); cd = &per_cpu(comparators, cpu); cd->name = "comparator"; - cd->features = CLOCK_EVT_FEAT_ONESHOT; + cd->features = CLOCK_EVT_FEAT_ONESHOT | + CLOCK_EVT_FEAT_KTIME; cd->mult = 16777; cd->shift = 12; cd->min_delta_ns = 1; cd->max_delta_ns = LONG_MAX; cd->rating = 400; cd->cpumask = cpumask_of(cpu); - cd->set_next_event = s390_next_event; + cd->set_next_ktime = s390_next_ktime; cd->set_mode = s390_set_mode; clockevents_register_device(cd); -- cgit v1.2.3 From e35f95b36e43f67a6f806172555a152c11ea0a78 Mon Sep 17 00:00:00 2001 From: Heiko Carstens Date: Thu, 22 Sep 2011 09:19:17 +0200 Subject: time, s390: Get rid of compile warning "s390: Use direct ktime path for s390 clockevent device" in linux-next introduces this compile warning: arch/s390/kernel/time.c: In function 's390_next_ktime': arch/s390/kernel/time.c:118:2: warning: comparison of distinct pointer types lacks a cast [enabled by default] Just use a u64 instead of an s64 variable. This is not a problem since it will always contain a positive value. Signed-off-by: Heiko Carstens Cc: Martin Schwidefsky Link: http://lkml.kernel.org/r/1316675957-5538-1-git-send-email-heiko.carstens@de.ibm.com Signed-off-by: Thomas Gleixner --- arch/s390/kernel/time.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'arch/s390') diff --git a/arch/s390/kernel/time.c b/arch/s390/kernel/time.c index c53716487e77..8d65bd0383fc 100644 --- a/arch/s390/kernel/time.c +++ b/arch/s390/kernel/time.c @@ -112,7 +112,7 @@ static void fixup_clock_comparator(unsigned long long delta) static int s390_next_ktime(ktime_t expires, struct clock_event_device *evt) { - s64 nsecs; + u64 nsecs; nsecs = ktime_to_ns(ktime_sub(expires, ktime_get_monotonic_offset())); do_div(nsecs, 125); -- cgit v1.2.3 From 85055dd805f0822f13f736bee2a521e222c38293 Mon Sep 17 00:00:00 2001 From: Martin Schwidefsky Date: Wed, 17 Aug 2011 20:42:24 +0200 Subject: PM / Hibernate: Include storage keys in hibernation image on s390 For s390 there is one additional byte associated with each page, the storage key. This byte contains the referenced and changed bits and needs to be included into the hibernation image. If the storage keys are not restored to their previous state all original pages would appear to be dirty. This can cause inconsistencies e.g. with read-only filesystems. Signed-off-by: Martin Schwidefsky Signed-off-by: Rafael J. Wysocki --- arch/s390/Kconfig | 1 + arch/s390/kernel/suspend.c | 118 ++++++++++++++++++++++++++++++++++++++++ arch/s390/kernel/swsusp_asm64.S | 3 + 3 files changed, 122 insertions(+) (limited to 'arch/s390') diff --git a/arch/s390/Kconfig b/arch/s390/Kconfig index ed5cb5af5281..6b99fc3f9b63 100644 --- a/arch/s390/Kconfig +++ b/arch/s390/Kconfig @@ -91,6 +91,7 @@ config S390 select HAVE_ARCH_MUTEX_CPU_RELAX select HAVE_ARCH_JUMP_LABEL if !MARCH_G5 select HAVE_RCU_TABLE_FREE if SMP + select ARCH_SAVE_PAGE_KEYS if HIBERNATION select ARCH_INLINE_SPIN_TRYLOCK select ARCH_INLINE_SPIN_TRYLOCK_BH select ARCH_INLINE_SPIN_LOCK diff --git a/arch/s390/kernel/suspend.c b/arch/s390/kernel/suspend.c index cf9e5c6d5527..b6f9afed74ec 100644 --- a/arch/s390/kernel/suspend.c +++ b/arch/s390/kernel/suspend.c @@ -7,6 +7,7 @@ */ #include +#include #include /* @@ -14,6 +15,123 @@ */ extern const void __nosave_begin, __nosave_end; +/* + * The restore of the saved pages in an hibernation image will set + * the change and referenced bits in the storage key for each page. + * Overindication of the referenced bits after an hibernation cycle + * does not cause any harm but the overindication of the change bits + * would cause trouble. + * Use the ARCH_SAVE_PAGE_KEYS hooks to save the storage key of each + * page to the most significant byte of the associated page frame + * number in the hibernation image. + */ + +/* + * Key storage is allocated as a linked list of pages. + * The size of the keys array is (PAGE_SIZE - sizeof(long)) + */ +struct page_key_data { + struct page_key_data *next; + unsigned char data[]; +}; + +#define PAGE_KEY_DATA_SIZE (PAGE_SIZE - sizeof(struct page_key_data *)) + +static struct page_key_data *page_key_data; +static struct page_key_data *page_key_rp, *page_key_wp; +static unsigned long page_key_rx, page_key_wx; + +/* + * For each page in the hibernation image one additional byte is + * stored in the most significant byte of the page frame number. + * On suspend no additional memory is required but on resume the + * keys need to be memorized until the page data has been restored. + * Only then can the storage keys be set to their old state. + */ +unsigned long page_key_additional_pages(unsigned long pages) +{ + return DIV_ROUND_UP(pages, PAGE_KEY_DATA_SIZE); +} + +/* + * Free page_key_data list of arrays. + */ +void page_key_free(void) +{ + struct page_key_data *pkd; + + while (page_key_data) { + pkd = page_key_data; + page_key_data = pkd->next; + free_page((unsigned long) pkd); + } +} + +/* + * Allocate page_key_data list of arrays with enough room to store + * one byte for each page in the hibernation image. + */ +int page_key_alloc(unsigned long pages) +{ + struct page_key_data *pk; + unsigned long size; + + size = DIV_ROUND_UP(pages, PAGE_KEY_DATA_SIZE); + while (size--) { + pk = (struct page_key_data *) get_zeroed_page(GFP_KERNEL); + if (!pk) { + page_key_free(); + return -ENOMEM; + } + pk->next = page_key_data; + page_key_data = pk; + } + page_key_rp = page_key_wp = page_key_data; + page_key_rx = page_key_wx = 0; + return 0; +} + +/* + * Save the storage key into the upper 8 bits of the page frame number. + */ +void page_key_read(unsigned long *pfn) +{ + unsigned long addr; + + addr = (unsigned long) page_address(pfn_to_page(*pfn)); + *(unsigned char *) pfn = (unsigned char) page_get_storage_key(addr); +} + +/* + * Extract the storage key from the upper 8 bits of the page frame number + * and store it in the page_key_data list of arrays. + */ +void page_key_memorize(unsigned long *pfn) +{ + page_key_wp->data[page_key_wx] = *(unsigned char *) pfn; + *(unsigned char *) pfn = 0; + if (++page_key_wx < PAGE_KEY_DATA_SIZE) + return; + page_key_wp = page_key_wp->next; + page_key_wx = 0; +} + +/* + * Get the next key from the page_key_data list of arrays and set the + * storage key of the page referred by @address. If @address refers to + * a "safe" page the swsusp_arch_resume code will transfer the storage + * key from the buffer page to the original page. + */ +void page_key_write(void *address) +{ + page_set_storage_key((unsigned long) address, + page_key_rp->data[page_key_rx], 0); + if (++page_key_rx >= PAGE_KEY_DATA_SIZE) + return; + page_key_rp = page_key_rp->next; + page_key_rx = 0; +} + int pfn_is_nosave(unsigned long pfn) { unsigned long nosave_begin_pfn = PFN_DOWN(__pa(&__nosave_begin)); diff --git a/arch/s390/kernel/swsusp_asm64.S b/arch/s390/kernel/swsusp_asm64.S index 51bcdb50a230..acb78cdee896 100644 --- a/arch/s390/kernel/swsusp_asm64.S +++ b/arch/s390/kernel/swsusp_asm64.S @@ -136,11 +136,14 @@ ENTRY(swsusp_arch_resume) 0: lg %r2,8(%r1) lg %r4,0(%r1) + iske %r0,%r4 lghi %r3,PAGE_SIZE lghi %r5,PAGE_SIZE 1: mvcle %r2,%r4,0 jo 1b + lg %r2,8(%r1) + sske %r0,%r2 lg %r1,16(%r1) ltgr %r1,%r1 jnz 0b -- cgit v1.2.3 From 61f42183fd15d5e666236f474d73b3555c7b83d1 Mon Sep 17 00:00:00 2001 From: Jeremy Fitzhardinge Date: Thu, 29 Sep 2011 10:58:46 -0700 Subject: s390/jump-label: add arch_jump_label_transform_static() This allows jump-label entries to be cheaply updated on code which is not yet live. Signed-off-by: Jeremy Fitzhardinge Acked-by: Jason Baron Acked-by: Peter Zijlstra Cc: Jan Glauber --- arch/s390/kernel/jump_label.c | 51 ++++++++++++++++++++++++++----------------- 1 file changed, 31 insertions(+), 20 deletions(-) (limited to 'arch/s390') diff --git a/arch/s390/kernel/jump_label.c b/arch/s390/kernel/jump_label.c index 44cc06bedf77..b987ab2c1541 100644 --- a/arch/s390/kernel/jump_label.c +++ b/arch/s390/kernel/jump_label.c @@ -18,26 +18,15 @@ struct insn { } __packed; struct insn_args { - unsigned long *target; - struct insn *insn; - ssize_t size; + struct jump_entry *entry; + enum jump_label_type type; }; -static int __arch_jump_label_transform(void *data) +static void __jump_label_transform(struct jump_entry *entry, + enum jump_label_type type) { - struct insn_args *args = data; - int rc; - - rc = probe_kernel_write(args->target, args->insn, args->size); - WARN_ON_ONCE(rc < 0); - return 0; -} - -void arch_jump_label_transform(struct jump_entry *entry, - enum jump_label_type type) -{ - struct insn_args args; struct insn insn; + int rc; if (type == JUMP_LABEL_ENABLE) { /* brcl 15,offset */ @@ -49,11 +38,33 @@ void arch_jump_label_transform(struct jump_entry *entry, insn.offset = 0; } - args.target = (void *) entry->code; - args.insn = &insn; - args.size = JUMP_LABEL_NOP_SIZE; + rc = probe_kernel_write((void *)entry->code, &insn, JUMP_LABEL_NOP_SIZE); + WARN_ON_ONCE(rc < 0); +} - stop_machine(__arch_jump_label_transform, &args, NULL); +static int __sm_arch_jump_label_transform(void *data) +{ + struct insn_args *args = data; + + __jump_label_transform(args->entry, args->type); + return 0; +} + +void arch_jump_label_transform(struct jump_entry *entry, + enum jump_label_type type) +{ + struct insn_args args; + + args.entry = entry; + args.type = type; + + stop_machine(__sm_arch_jump_label_transform, &args, NULL); +} + +void arch_jump_label_transform_static(struct jump_entry *entry, + enum jump_label_type type) +{ + __jump_label_transform(entry, type); } #endif -- cgit v1.2.3 From 1448c721e4fa2faf742029a0403b4b787fccb7fa Mon Sep 17 00:00:00 2001 From: "Eric W. Biederman" Date: Mon, 17 Oct 2011 13:40:02 -0700 Subject: compat: sync compat_stats with statfs. This was found by inspection while tracking a similar bug in compat_statfs64, that has been fixed in mainline since decemeber. - This fixes a bug where not all of the f_spare fields were cleared on mips and s390. - Add the f_flags field to struct compat_statfs - Copy f_flags to userspace in case someone cares. - Use __clear_user to copy the f_spare field to userspace to ensure that all of the elements of f_spare are cleared. On some architectures f_spare is has 5 ints and on some architectures f_spare only has 4 ints. Which makes the previous technique of clearing each int individually broken. I don't expect anyone actually uses the old statfs system call anymore but if they do let them benefit from having the compat and the native version working the same. Signed-off-by: Eric W. Biederman Signed-off-by: Christoph Hellwig --- arch/s390/include/asm/compat.h | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) (limited to 'arch/s390') diff --git a/arch/s390/include/asm/compat.h b/arch/s390/include/asm/compat.h index da359ca6fe55..cdb9b78f6c08 100644 --- a/arch/s390/include/asm/compat.h +++ b/arch/s390/include/asm/compat.h @@ -131,7 +131,8 @@ struct compat_statfs { compat_fsid_t f_fsid; s32 f_namelen; s32 f_frsize; - s32 f_spare[6]; + s32 f_flags; + s32 f_spare[5]; }; #define COMPAT_RLIM_OLD_INFINITY 0x7fffffff -- cgit v1.2.3 From 4d47555a80495657161a7e71ec3014ff2021e450 Mon Sep 17 00:00:00 2001 From: Carsten Otte Date: Tue, 18 Oct 2011 12:27:12 +0200 Subject: KVM: s390: check cpu_id prior to using it We use the cpu id provided by userspace as array index here. Thus we clearly need to check it first. Ooops. CC: Signed-off-by: Carsten Otte Signed-off-by: Christian Borntraeger Signed-off-by: Marcelo Tosatti --- arch/s390/kvm/kvm-s390.c | 14 ++++++++++---- 1 file changed, 10 insertions(+), 4 deletions(-) (limited to 'arch/s390') diff --git a/arch/s390/kvm/kvm-s390.c b/arch/s390/kvm/kvm-s390.c index dc2b580e27bc..0cba935d1282 100644 --- a/arch/s390/kvm/kvm-s390.c +++ b/arch/s390/kvm/kvm-s390.c @@ -312,11 +312,17 @@ int kvm_arch_vcpu_setup(struct kvm_vcpu *vcpu) struct kvm_vcpu *kvm_arch_vcpu_create(struct kvm *kvm, unsigned int id) { - struct kvm_vcpu *vcpu = kzalloc(sizeof(struct kvm_vcpu), GFP_KERNEL); - int rc = -ENOMEM; + struct kvm_vcpu *vcpu; + int rc = -EINVAL; + + if (id >= KVM_MAX_VCPUS) + goto out; + + rc = -ENOMEM; + vcpu = kzalloc(sizeof(struct kvm_vcpu), GFP_KERNEL); if (!vcpu) - goto out_nomem; + goto out; vcpu->arch.sie_block = (struct kvm_s390_sie_block *) get_zeroed_page(GFP_KERNEL); @@ -352,7 +358,7 @@ out_free_sie_block: free_page((unsigned long)(vcpu->arch.sie_block)); out_free_cpu: kfree(vcpu); -out_nomem: +out: return ERR_PTR(rc); } -- cgit v1.2.3 From b290411a1321dd937dce4aaa812e5d8fae8a14a5 Mon Sep 17 00:00:00 2001 From: Carsten Otte Date: Tue, 18 Oct 2011 12:27:13 +0200 Subject: KVM: s390: fix return value of kvm_arch_init_vm This patch fixes the return value of kvm_arch_init_vm in case a memory allocation goes wrong. Signed-off-by: Carsten Otte Signed-off-by: Christian Borntraeger Signed-off-by: Marcelo Tosatti --- arch/s390/kvm/kvm-s390.c | 2 ++ 1 file changed, 2 insertions(+) (limited to 'arch/s390') diff --git a/arch/s390/kvm/kvm-s390.c b/arch/s390/kvm/kvm-s390.c index 0cba935d1282..397f0cbc8b76 100644 --- a/arch/s390/kvm/kvm-s390.c +++ b/arch/s390/kvm/kvm-s390.c @@ -175,6 +175,8 @@ int kvm_arch_init_vm(struct kvm *kvm) if (rc) goto out_err; + rc = -ENOMEM; + kvm->arch.sca = (struct sca_block *) get_zeroed_page(GFP_KERNEL); if (!kvm->arch.sca) goto out_err; -- cgit v1.2.3 From 7eef87dc99e419b1cc051e4417c37e4744d7b661 Mon Sep 17 00:00:00 2001 From: Carsten Otte Date: Tue, 18 Oct 2011 12:27:14 +0200 Subject: KVM: s390: fix register setting KVM common code does vcpu_load prior to calling our arch ioctls and vcpu_put after we're done here. Via the kvm_arch_vcpu_load/put callbacks we do load the fpu and access register state into the processor, which saves us moving the state on every SIE exit the kernel handles. However this breaks register setting from userspace, because of the following sequence: 1a. vcpu load stores userspace register content 1b. vcpu load loads guest register content 2. kvm_arch_vcpu_ioctl_set_fpu/sregs updates saved guest register content 3a. vcpu put stores the guest registers and overwrites the new content 3b. vcpu put loads the userspace register set again This patch loads the new guest register state into the cpu, so that the correct (new) set of guest registers will be stored in step 3a. Signed-off-by: Carsten Otte Signed-off-by: Christian Borntraeger Signed-off-by: Marcelo Tosatti --- arch/s390/kvm/kvm-s390.c | 2 ++ 1 file changed, 2 insertions(+) (limited to 'arch/s390') diff --git a/arch/s390/kvm/kvm-s390.c b/arch/s390/kvm/kvm-s390.c index 397f0cbc8b76..29635678b5ec 100644 --- a/arch/s390/kvm/kvm-s390.c +++ b/arch/s390/kvm/kvm-s390.c @@ -394,6 +394,7 @@ int kvm_arch_vcpu_ioctl_set_sregs(struct kvm_vcpu *vcpu, { memcpy(&vcpu->arch.guest_acrs, &sregs->acrs, sizeof(sregs->acrs)); memcpy(&vcpu->arch.sie_block->gcr, &sregs->crs, sizeof(sregs->crs)); + restore_access_regs(vcpu->arch.guest_acrs); return 0; } @@ -409,6 +410,7 @@ int kvm_arch_vcpu_ioctl_set_fpu(struct kvm_vcpu *vcpu, struct kvm_fpu *fpu) { memcpy(&vcpu->arch.guest_fpregs.fprs, &fpu->fprs, sizeof(fpu->fprs)); vcpu->arch.guest_fpregs.fpc = fpu->fpc; + restore_fp_regs(&vcpu->arch.guest_fpregs); return 0; } -- cgit v1.2.3 From 7697e71f72b45a1bd0abe70918c383100fcc8514 Mon Sep 17 00:00:00 2001 From: Christian Ehrhardt Date: Tue, 18 Oct 2011 12:27:15 +0200 Subject: KVM: s390: implement sigp external call Implement sigp external call, which might be required for guests that issue an external call instead of an emergency signal for IPI. This fixes an issue with "KVM: unknown SIGP: 0x02" when booting such an SMP guest. Signed-off-by: Christian Ehrhardt Signed-off-by: Christian Borntraeger Signed-off-by: Marcelo Tosatti --- arch/s390/include/asm/kvm_host.h | 7 +++++++ arch/s390/kvm/interrupt.c | 30 +++++++++++++++++++++++++++ arch/s390/kvm/kvm-s390.c | 2 ++ arch/s390/kvm/sigp.c | 45 +++++++++++++++++++++++++++++++++++++++- 4 files changed, 83 insertions(+), 1 deletion(-) (limited to 'arch/s390') diff --git a/arch/s390/include/asm/kvm_host.h b/arch/s390/include/asm/kvm_host.h index 00ff00dfb24c..1ca5de07ac36 100644 --- a/arch/s390/include/asm/kvm_host.h +++ b/arch/s390/include/asm/kvm_host.h @@ -119,6 +119,7 @@ struct kvm_vcpu_stat { u32 instruction_lctlg; u32 exit_program_interruption; u32 exit_instr_and_program; + u32 deliver_external_call; u32 deliver_emergency_signal; u32 deliver_service_signal; u32 deliver_virtio_interrupt; @@ -138,6 +139,7 @@ struct kvm_vcpu_stat { u32 instruction_stfl; u32 instruction_tprot; u32 instruction_sigp_sense; + u32 instruction_sigp_external_call; u32 instruction_sigp_emergency; u32 instruction_sigp_stop; u32 instruction_sigp_arch; @@ -174,6 +176,10 @@ struct kvm_s390_prefix_info { __u32 address; }; +struct kvm_s390_extcall_info { + __u16 code; +}; + struct kvm_s390_emerg_info { __u16 code; }; @@ -186,6 +192,7 @@ struct kvm_s390_interrupt_info { struct kvm_s390_ext_info ext; struct kvm_s390_pgm_info pgm; struct kvm_s390_emerg_info emerg; + struct kvm_s390_extcall_info extcall; struct kvm_s390_prefix_info prefix; }; }; diff --git a/arch/s390/kvm/interrupt.c b/arch/s390/kvm/interrupt.c index c9aeb4b4d0b8..87c16705b381 100644 --- a/arch/s390/kvm/interrupt.c +++ b/arch/s390/kvm/interrupt.c @@ -38,6 +38,11 @@ static int __interrupt_is_deliverable(struct kvm_vcpu *vcpu, struct kvm_s390_interrupt_info *inti) { switch (inti->type) { + case KVM_S390_INT_EXTERNAL_CALL: + if (psw_extint_disabled(vcpu)) + return 0; + if (vcpu->arch.sie_block->gcr[0] & 0x2000ul) + return 1; case KVM_S390_INT_EMERGENCY: if (psw_extint_disabled(vcpu)) return 0; @@ -98,6 +103,7 @@ static void __set_intercept_indicator(struct kvm_vcpu *vcpu, struct kvm_s390_interrupt_info *inti) { switch (inti->type) { + case KVM_S390_INT_EXTERNAL_CALL: case KVM_S390_INT_EMERGENCY: case KVM_S390_INT_SERVICE: case KVM_S390_INT_VIRTIO: @@ -143,6 +149,28 @@ static void __do_deliver_interrupt(struct kvm_vcpu *vcpu, exception = 1; break; + case KVM_S390_INT_EXTERNAL_CALL: + VCPU_EVENT(vcpu, 4, "%s", "interrupt: sigp ext call"); + vcpu->stat.deliver_external_call++; + rc = put_guest_u16(vcpu, __LC_EXT_INT_CODE, 0x1202); + if (rc == -EFAULT) + exception = 1; + + rc = put_guest_u16(vcpu, __LC_CPU_ADDRESS, inti->extcall.code); + if (rc == -EFAULT) + exception = 1; + + rc = copy_to_guest(vcpu, __LC_EXT_OLD_PSW, + &vcpu->arch.sie_block->gpsw, sizeof(psw_t)); + if (rc == -EFAULT) + exception = 1; + + rc = copy_from_guest(vcpu, &vcpu->arch.sie_block->gpsw, + __LC_EXT_NEW_PSW, sizeof(psw_t)); + if (rc == -EFAULT) + exception = 1; + break; + case KVM_S390_INT_SERVICE: VCPU_EVENT(vcpu, 4, "interrupt: sclp parm:%x", inti->ext.ext_params); @@ -522,6 +550,7 @@ int kvm_s390_inject_vm(struct kvm *kvm, break; case KVM_S390_PROGRAM_INT: case KVM_S390_SIGP_STOP: + case KVM_S390_INT_EXTERNAL_CALL: case KVM_S390_INT_EMERGENCY: default: kfree(inti); @@ -581,6 +610,7 @@ int kvm_s390_inject_vcpu(struct kvm_vcpu *vcpu, break; case KVM_S390_SIGP_STOP: case KVM_S390_RESTART: + case KVM_S390_INT_EXTERNAL_CALL: case KVM_S390_INT_EMERGENCY: VCPU_EVENT(vcpu, 3, "inject: type %x", s390int->type); inti->type = s390int->type; diff --git a/arch/s390/kvm/kvm-s390.c b/arch/s390/kvm/kvm-s390.c index 29635678b5ec..9610ba41b974 100644 --- a/arch/s390/kvm/kvm-s390.c +++ b/arch/s390/kvm/kvm-s390.c @@ -46,6 +46,7 @@ struct kvm_stats_debugfs_item debugfs_entries[] = { { "instruction_lctlg", VCPU_STAT(instruction_lctlg) }, { "instruction_lctl", VCPU_STAT(instruction_lctl) }, { "deliver_emergency_signal", VCPU_STAT(deliver_emergency_signal) }, + { "deliver_external_call", VCPU_STAT(deliver_external_call) }, { "deliver_service_signal", VCPU_STAT(deliver_service_signal) }, { "deliver_virtio_interrupt", VCPU_STAT(deliver_virtio_interrupt) }, { "deliver_stop_signal", VCPU_STAT(deliver_stop_signal) }, @@ -64,6 +65,7 @@ struct kvm_stats_debugfs_item debugfs_entries[] = { { "instruction_stfl", VCPU_STAT(instruction_stfl) }, { "instruction_tprot", VCPU_STAT(instruction_tprot) }, { "instruction_sigp_sense", VCPU_STAT(instruction_sigp_sense) }, + { "instruction_sigp_external_call", VCPU_STAT(instruction_sigp_external_call) }, { "instruction_sigp_emergency", VCPU_STAT(instruction_sigp_emergency) }, { "instruction_sigp_stop", VCPU_STAT(instruction_sigp_stop) }, { "instruction_sigp_set_arch", VCPU_STAT(instruction_sigp_arch) }, diff --git a/arch/s390/kvm/sigp.c b/arch/s390/kvm/sigp.c index d6a50c1fb2e6..f815118835f3 100644 --- a/arch/s390/kvm/sigp.c +++ b/arch/s390/kvm/sigp.c @@ -87,6 +87,7 @@ static int __sigp_emergency(struct kvm_vcpu *vcpu, u16 cpu_addr) return -ENOMEM; inti->type = KVM_S390_INT_EMERGENCY; + inti->emerg.code = vcpu->vcpu_id; spin_lock(&fi->lock); li = fi->local_int[cpu_addr]; @@ -103,9 +104,47 @@ static int __sigp_emergency(struct kvm_vcpu *vcpu, u16 cpu_addr) wake_up_interruptible(&li->wq); spin_unlock_bh(&li->lock); rc = 0; /* order accepted */ + VCPU_EVENT(vcpu, 4, "sent sigp emerg to cpu %x", cpu_addr); +unlock: + spin_unlock(&fi->lock); + return rc; +} + +static int __sigp_external_call(struct kvm_vcpu *vcpu, u16 cpu_addr) +{ + struct kvm_s390_float_interrupt *fi = &vcpu->kvm->arch.float_int; + struct kvm_s390_local_interrupt *li; + struct kvm_s390_interrupt_info *inti; + int rc; + + if (cpu_addr >= KVM_MAX_VCPUS) + return 3; /* not operational */ + + inti = kzalloc(sizeof(*inti), GFP_KERNEL); + if (!inti) + return -ENOMEM; + + inti->type = KVM_S390_INT_EXTERNAL_CALL; + inti->extcall.code = vcpu->vcpu_id; + + spin_lock(&fi->lock); + li = fi->local_int[cpu_addr]; + if (li == NULL) { + rc = 3; /* not operational */ + kfree(inti); + goto unlock; + } + spin_lock_bh(&li->lock); + list_add_tail(&inti->list, &li->list); + atomic_set(&li->active, 1); + atomic_set_mask(CPUSTAT_EXT_INT, li->cpuflags); + if (waitqueue_active(&li->wq)) + wake_up_interruptible(&li->wq); + spin_unlock_bh(&li->lock); + rc = 0; /* order accepted */ + VCPU_EVENT(vcpu, 4, "sent sigp ext call to cpu %x", cpu_addr); unlock: spin_unlock(&fi->lock); - VCPU_EVENT(vcpu, 4, "sent sigp emerg to cpu %x", cpu_addr); return rc; } @@ -267,6 +306,10 @@ int kvm_s390_handle_sigp(struct kvm_vcpu *vcpu) rc = __sigp_sense(vcpu, cpu_addr, &vcpu->arch.guest_gprs[r1]); break; + case SIGP_EXTERNAL_CALL: + vcpu->stat.instruction_sigp_external_call++; + rc = __sigp_external_call(vcpu, cpu_addr); + break; case SIGP_EMERGENCY: vcpu->stat.instruction_sigp_emergency++; rc = __sigp_emergency(vcpu, cpu_addr); -- cgit v1.2.3 From de400d6b78d15a73023485f050bc6b1709dc7a79 Mon Sep 17 00:00:00 2001 From: Peter Oberparleiter Date: Sun, 30 Oct 2011 15:16:04 +0100 Subject: [S390] fix mismatch in summation of I/O IRQ statistics Current IRQ statistics support does not show detail counts for I/O interrupts which are processed internally only. The result is a summation count which is way off such as this one: CPU0 CPU1 CPU2 I/O: 1331 710 442 [...] QAI: 15 16 16 [I/O] QDIO Adapter Interrupt QDI: 1 0 0 [I/O] QDIO Interrupt DAS: 706 645 381 [I/O] DASD C15: 26 10 0 [I/O] 3215 C70: 0 0 0 [I/O] 3270 TAP: 0 0 0 [I/O] Tape VMR: 0 0 0 [I/O] Unit Record Devices LCS: 0 0 0 [I/O] LCS CLW: 0 0 0 [I/O] CLAW CTC: 0 0 0 [I/O] CTC APB: 0 0 0 [I/O] AP Bus Fix this by moving I/O interrupt accounting into the common I/O layer. Signed-off-by: Peter Oberparleiter Signed-off-by: Martin Schwidefsky --- arch/s390/include/asm/ccwdev.h | 3 +++ arch/s390/include/asm/irq.h | 2 +- arch/s390/kernel/irq.c | 2 +- 3 files changed, 5 insertions(+), 2 deletions(-) (limited to 'arch/s390') diff --git a/arch/s390/include/asm/ccwdev.h b/arch/s390/include/asm/ccwdev.h index 623f2fb71774..9381c92cc779 100644 --- a/arch/s390/include/asm/ccwdev.h +++ b/arch/s390/include/asm/ccwdev.h @@ -11,6 +11,7 @@ #include #include #include +#include /* structs from asm/cio.h */ struct irb; @@ -127,6 +128,7 @@ enum uc_todo { * @restore: callback for restoring after hibernation * @uc_handler: callback for unit check handler * @driver: embedded device driver structure + * @int_class: interruption class to use for accounting interrupts */ struct ccw_driver { struct ccw_device_id *ids; @@ -144,6 +146,7 @@ struct ccw_driver { int (*restore)(struct ccw_device *); enum uc_todo (*uc_handler) (struct ccw_device *, struct irb *); struct device_driver driver; + enum interruption_class int_class; }; extern struct ccw_device *get_ccwdev_by_busid(struct ccw_driver *cdrv, diff --git a/arch/s390/include/asm/irq.h b/arch/s390/include/asm/irq.h index ba7b01c726a3..1f686059f293 100644 --- a/arch/s390/include/asm/irq.h +++ b/arch/s390/include/asm/irq.h @@ -17,8 +17,8 @@ enum interruption_class { EXTINT_SCP, EXTINT_IUC, EXTINT_CPM, + IOINT_CIO, IOINT_QAI, - IOINT_QDI, IOINT_DAS, IOINT_C15, IOINT_C70, diff --git a/arch/s390/kernel/irq.c b/arch/s390/kernel/irq.c index 1f4050d45f78..d382f9db3df5 100644 --- a/arch/s390/kernel/irq.c +++ b/arch/s390/kernel/irq.c @@ -42,8 +42,8 @@ static const struct irq_class intrclass_names[] = { {.name = "SCP", .desc = "[EXT] Service Call" }, {.name = "IUC", .desc = "[EXT] IUCV" }, {.name = "CPM", .desc = "[EXT] CPU Measurement" }, + {.name = "CIO", .desc = "[I/O] Common I/O Layer Interrupt" }, {.name = "QAI", .desc = "[I/O] QDIO Adapter Interrupt" }, - {.name = "QDI", .desc = "[I/O] QDIO Interrupt" }, {.name = "DAS", .desc = "[I/O] DASD" }, {.name = "C15", .desc = "[I/O] 3215" }, {.name = "C70", .desc = "[I/O] 3270" }, -- cgit v1.2.3 From dd4a5a31fc06a2cda4f6a1d9de74b9009f8e31de Mon Sep 17 00:00:00 2001 From: Martin Schwidefsky Date: Sun, 30 Oct 2011 15:16:05 +0100 Subject: [S390] avoid warning in show_cpuinfo The .start function and indirectly the .next function of the show_cpuinfo sequential operation uses NR_CPUS as limit instead of nr_cpu_ids. This can cause warnings like this: WARNING: at /usr/src/linux/include/linux/cpumask.h:107 Process lscpu (pid: 575, task: 000000007deb4338, ksp: 000000007794f588) Krnl PSW : 0704000180000000 0000000000106db4 (show_cpuinfo+0x108/0x234) R:0 T:1 IO:1 EX:1 Key:0 M:1 W:0 P:0 AS:0 CC:0 PM:0 EA:3 Krnl GPRS: 0000000000000003 0000000000791988 000000000071b478 0000000000000004 0000000000000001 0000000000000000 000000007d139500 0000000000000400 0000000000000000 000000000070e24c 000000007d48d600 0000000000000005 000000007d48d600 00000000004dfa10 0000000000106cf8 000000007794fcc0 Krnl Code: 0000000000106da8: 95001000 cli 0(%r1),0 0000000000106dac: a774ffac brc 7,106d04 0000000000106db0: a7f40001 brc 15,106db2 >0000000000106db4: 92011000 mvi 0(%r1),1 0000000000106db8: a7f4ffa6 brc 15,106d04 0000000000106dbc: c0e5000065b4 brasl %r14,113924 0000000000106dc2: c09000303a45 larl %r9,70e24c 0000000000106dc8: c020001eefd4 larl %r2,4e4d70 Replacing NR_CPUS with nr_cpu_ids fixes it. Signed-off-by: Martin Schwidefsky --- arch/s390/kernel/processor.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'arch/s390') diff --git a/arch/s390/kernel/processor.c b/arch/s390/kernel/processor.c index 311e9d712888..6e0073e43f54 100644 --- a/arch/s390/kernel/processor.c +++ b/arch/s390/kernel/processor.c @@ -74,7 +74,7 @@ static int show_cpuinfo(struct seq_file *m, void *v) static void *c_start(struct seq_file *m, loff_t *pos) { - return *pos < NR_CPUS ? (void *)((unsigned long) *pos + 1) : NULL; + return *pos < nr_cpu_ids ? (void *)((unsigned long) *pos + 1) : NULL; } static void *c_next(struct seq_file *m, void *v, loff_t *pos) -- cgit v1.2.3 From caa04f69df9a5d4c10867b475006cf6f2f1e8500 Mon Sep 17 00:00:00 2001 From: Sebastian Ott Date: Sun, 30 Oct 2011 15:16:06 +0100 Subject: [S390] topology: fix alloc_masks annotation Fix this warning: WARNING: vmlinux.o(.text+0x199b6): Section mismatch in reference from the function alloc_masks() to the function .init.text:__alloc_bootmem() Signed-off-by: Sebastian Ott Signed-off-by: Martin Schwidefsky --- arch/s390/kernel/topology.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'arch/s390') diff --git a/arch/s390/kernel/topology.c b/arch/s390/kernel/topology.c index 0cd340b72632..77b8942b9a15 100644 --- a/arch/s390/kernel/topology.c +++ b/arch/s390/kernel/topology.c @@ -299,8 +299,8 @@ out: } __initcall(init_topology_update); -static void alloc_masks(struct sysinfo_15_1_x *info, struct mask_info *mask, - int offset) +static void __init alloc_masks(struct sysinfo_15_1_x *info, + struct mask_info *mask, int offset) { int i, nr_masks; -- cgit v1.2.3 From a45aff5285871bf7be1781d9462d3fdbb6c913f9 Mon Sep 17 00:00:00 2001 From: Martin Schwidefsky Date: Sun, 30 Oct 2011 15:16:07 +0100 Subject: [S390] user per registers vs. ptrace single stepping git commit 5e9a2692 "[S390] ptrace cleanup" introduced a regression for the case when both a user PER set (e.g. a storage alteration trace) and PTRACE_SINGLESTEP are active. The new code will overrule the user PER set with a instruction-fetch PER set over the whole address space for ptrace single stepping. The inferior process will be stopped after each instruction with an instruction fetch event. Any other events that may have occurred concurrently are not reported (e.g. storage alteration event) because the control bits for them are not set. The solution is to merge the PER control bits of the user PER set with the PER_EVENT_IFETCH control bit for PTRACE_SINGLESTEP. Cc: stable@kernel.org Signed-off-by: Martin Schwidefsky --- arch/s390/kernel/ptrace.c | 30 ++++++++++++++++-------------- 1 file changed, 16 insertions(+), 14 deletions(-) (limited to 'arch/s390') diff --git a/arch/s390/kernel/ptrace.c b/arch/s390/kernel/ptrace.c index ef86ad243986..ae0e14b8880c 100644 --- a/arch/s390/kernel/ptrace.c +++ b/arch/s390/kernel/ptrace.c @@ -47,29 +47,31 @@ enum s390_regset { void update_per_regs(struct task_struct *task) { - static const struct per_regs per_single_step = { - .control = PER_EVENT_IFETCH, - .start = 0, - .end = PSW_ADDR_INSN, - }; struct pt_regs *regs = task_pt_regs(task); struct thread_struct *thread = &task->thread; - const struct per_regs *new; - struct per_regs old; - - /* TIF_SINGLE_STEP overrides the user specified PER registers. */ - new = test_tsk_thread_flag(task, TIF_SINGLE_STEP) ? - &per_single_step : &thread->per_user; + struct per_regs old, new; + + /* Copy user specified PER registers */ + new.control = thread->per_user.control; + new.start = thread->per_user.start; + new.end = thread->per_user.end; + + /* merge TIF_SINGLE_STEP into user specified PER registers. */ + if (test_tsk_thread_flag(task, TIF_SINGLE_STEP)) { + new.control |= PER_EVENT_IFETCH; + new.start = 0; + new.end = PSW_ADDR_INSN; + } /* Take care of the PER enablement bit in the PSW. */ - if (!(new->control & PER_EVENT_MASK)) { + if (!(new.control & PER_EVENT_MASK)) { regs->psw.mask &= ~PSW_MASK_PER; return; } regs->psw.mask |= PSW_MASK_PER; __ctl_store(old, 9, 11); - if (memcmp(new, &old, sizeof(struct per_regs)) != 0) - __ctl_load(*new, 9, 11); + if (memcmp(&new, &old, sizeof(struct per_regs)) != 0) + __ctl_load(new, 9, 11); } void user_enable_single_step(struct task_struct *task) -- cgit v1.2.3 From e73b7fffe487c315fd1a4fa22282e3362b440a06 Mon Sep 17 00:00:00 2001 From: Martin Schwidefsky Date: Sun, 30 Oct 2011 15:16:08 +0100 Subject: [S390] memory leak with RCU_TABLE_FREE The rcu page table free code uses a couple of bits in the page table pointer passed to tlb_remove_table to discern the different page table types. __tlb_remove_table extracts the type with an incorrect mask which leads to memory leaks. The correct mask is ((FRAG_MASK << 4) | FRAG_MASK). Cc: stable@kernel.org Signed-off-by: Martin Schwidefsky --- arch/s390/mm/pgtable.c | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) (limited to 'arch/s390') diff --git a/arch/s390/mm/pgtable.c b/arch/s390/mm/pgtable.c index 5d56c2b95b14..529a08838376 100644 --- a/arch/s390/mm/pgtable.c +++ b/arch/s390/mm/pgtable.c @@ -662,8 +662,9 @@ void page_table_free_rcu(struct mmu_gather *tlb, unsigned long *table) void __tlb_remove_table(void *_table) { - void *table = (void *)((unsigned long) _table & PAGE_MASK); - unsigned type = (unsigned long) _table & ~PAGE_MASK; + const unsigned long mask = (FRAG_MASK << 4) | FRAG_MASK; + void *table = (void *)((unsigned long) _table & ~mask); + unsigned type = (unsigned long) _table & mask; if (type) __page_table_free_rcu(table, type); -- cgit v1.2.3 From 017ec18360c9894d11f1a2ba5d69f7786732e07a Mon Sep 17 00:00:00 2001 From: Jan Glauber Date: Sun, 30 Oct 2011 15:16:09 +0100 Subject: [S390] use ENTRY macro for sys_setns_wrapper Use the ENTRY macro for the system call wrapper sys_setns_wrapper similarly to the other wrappers. Signed-off-by: Jan Glauber Signed-off-by: Martin Schwidefsky --- arch/s390/kernel/compat_wrapper.S | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) (limited to 'arch/s390') diff --git a/arch/s390/kernel/compat_wrapper.S b/arch/s390/kernel/compat_wrapper.S index 7526db6bf501..5006a1d9f5d0 100644 --- a/arch/s390/kernel/compat_wrapper.S +++ b/arch/s390/kernel/compat_wrapper.S @@ -1623,8 +1623,7 @@ ENTRY(sys_syncfs_wrapper) lgfr %r2,%r2 # int jg sys_syncfs - .globl sys_setns_wrapper -sys_setns_wrapper: +ENTRY(sys_setns_wrapper) lgfr %r2,%r2 # int lgfr %r3,%r3 # int jg sys_setns -- cgit v1.2.3 From 80853a8ab4fcfff55d88e86ed1f50695dba0d8ff Mon Sep 17 00:00:00 2001 From: Tejun Heo Date: Sun, 30 Oct 2011 15:16:35 +0100 Subject: [S390] fix _TIF_SINGLE_STEP definition _TIF_SINGLE_STEP is incorrectly defined as 1< Signed-off-by: Martin Schwidefsky --- arch/s390/include/asm/thread_info.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'arch/s390') diff --git a/arch/s390/include/asm/thread_info.h b/arch/s390/include/asm/thread_info.h index 1a5dbb6f1495..f9a9a10979c9 100644 --- a/arch/s390/include/asm/thread_info.h +++ b/arch/s390/include/asm/thread_info.h @@ -117,7 +117,7 @@ static inline struct thread_info *current_thread_info(void) #define _TIF_SIE (1< Date: Sun, 30 Oct 2011 15:16:38 +0100 Subject: [S390] Force PSW restart on online CPU PSW restart can be triggered on offline CPUs. If this happens, currently the PSW restart code fails, because functions like smp_processor_id() do not work on offline CPUs. This patch fixes this as follows: If PSW restart is triggered on an offline CPU, the PSW restart (sigp restart) is done a second time on another CPU that is online and the old CPU is stopped afterwards. Signed-off-by: Michael Holzheu Signed-off-by: Martin Schwidefsky --- arch/s390/include/asm/smp.h | 5 +++++ arch/s390/kernel/ipl.c | 1 + arch/s390/kernel/smp.c | 23 +++++++++++++++++++++++ 3 files changed, 29 insertions(+) (limited to 'arch/s390') diff --git a/arch/s390/include/asm/smp.h b/arch/s390/include/asm/smp.h index 045e009fc164..ab47a69fdf07 100644 --- a/arch/s390/include/asm/smp.h +++ b/arch/s390/include/asm/smp.h @@ -33,6 +33,7 @@ extern struct save_area *zfcpdump_save_areas[NR_CPUS + 1]; extern void smp_switch_to_ipl_cpu(void (*func)(void *), void *); extern void smp_switch_to_cpu(void (*)(void *), void *, unsigned long sp, int from, int to); +extern void smp_restart_with_online_cpu(void); extern void smp_restart_cpu(void); /* @@ -64,6 +65,10 @@ static inline void smp_switch_to_ipl_cpu(void (*func)(void *), void *data) func(data); } +static inline void smp_restart_with_online_cpu(void) +{ +} + #define smp_vcpu_scheduled (1) #endif /* CONFIG_SMP */ diff --git a/arch/s390/kernel/ipl.c b/arch/s390/kernel/ipl.c index 48c710206366..90769b4bc7f6 100644 --- a/arch/s390/kernel/ipl.c +++ b/arch/s390/kernel/ipl.c @@ -1738,6 +1738,7 @@ static struct kobj_attribute on_restart_attr = void do_restart(void) { + smp_restart_with_online_cpu(); smp_send_stop(); on_restart_trigger.action->fn(&on_restart_trigger); stop_run(&on_restart_trigger); diff --git a/arch/s390/kernel/smp.c b/arch/s390/kernel/smp.c index 6ab16ac64d29..e4572601e91e 100644 --- a/arch/s390/kernel/smp.c +++ b/arch/s390/kernel/smp.c @@ -97,6 +97,29 @@ static inline int cpu_stopped(int cpu) return raw_cpu_stopped(cpu_logical_map(cpu)); } +/* + * Ensure that PSW restart is done on an online CPU + */ +void smp_restart_with_online_cpu(void) +{ + int cpu; + + for_each_online_cpu(cpu) { + if (stap() == __cpu_logical_map[cpu]) { + /* We are online: Enable DAT again and return */ + __load_psw_mask(psw_kernel_bits & ~PSW_MASK_MCHECK); + return; + } + } + /* We are not online: Do PSW restart on an online CPU */ + while (sigp(cpu, sigp_restart) == sigp_busy) + cpu_relax(); + /* And stop ourself */ + while (raw_sigp(stap(), sigp_stop) == sigp_busy) + cpu_relax(); + for (;;); +} + void smp_switch_to_ipl_cpu(void (*func)(void *), void *data) { struct _lowcore *lc, *current_lc; -- cgit v1.2.3 From 7f0bf656c66e4292e965c95fd9de55c72b6578bb Mon Sep 17 00:00:00 2001 From: Michael Holzheu Date: Sun, 30 Oct 2011 15:16:39 +0100 Subject: [S390] Add real memory access functions Add access function for real memory needed by s390 kdump backend. Signed-off-by: Michael Holzheu Signed-off-by: Martin Schwidefsky --- arch/s390/include/asm/system.h | 2 ++ arch/s390/mm/maccess.c | 56 ++++++++++++++++++++++++++++++++++++++++++ 2 files changed, 58 insertions(+) (limited to 'arch/s390') diff --git a/arch/s390/include/asm/system.h b/arch/s390/include/asm/system.h index 6582f69f2389..afb849e4bf4b 100644 --- a/arch/s390/include/asm/system.h +++ b/arch/s390/include/asm/system.h @@ -114,6 +114,8 @@ extern void pfault_fini(void); extern void cmma_init(void); extern int memcpy_real(void *, void *, size_t); extern void copy_to_absolute_zero(void *dest, void *src, size_t count); +extern int copy_to_user_real(void __user *dest, void *src, size_t count); +extern int copy_from_user_real(void *dest, void __user *src, size_t count); #define finish_arch_switch(prev) do { \ set_fs(current->thread.mm_segment); \ diff --git a/arch/s390/mm/maccess.c b/arch/s390/mm/maccess.c index 5dbbaa6e594c..1cb8427bedfb 100644 --- a/arch/s390/mm/maccess.c +++ b/arch/s390/mm/maccess.c @@ -11,6 +11,7 @@ #include #include #include +#include #include /* @@ -60,6 +61,9 @@ long probe_kernel_write(void *dst, const void *src, size_t size) return copied < 0 ? -EFAULT : 0; } +/* + * Copy memory in real mode (kernel to kernel) + */ int memcpy_real(void *dest, void *src, size_t count) { register unsigned long _dest asm("2") = (unsigned long) dest; @@ -101,3 +105,55 @@ void copy_to_absolute_zero(void *dest, void *src, size_t count) __ctl_load(cr0, 0, 0); preempt_enable(); } + +/* + * Copy memory from kernel (real) to user (virtual) + */ +int copy_to_user_real(void __user *dest, void *src, size_t count) +{ + int offs = 0, size, rc; + char *buf; + + buf = (char *) __get_free_page(GFP_KERNEL); + if (!buf) + return -ENOMEM; + rc = -EFAULT; + while (offs < count) { + size = min(PAGE_SIZE, count - offs); + if (memcpy_real(buf, src + offs, size)) + goto out; + if (copy_to_user(dest + offs, buf, size)) + goto out; + offs += size; + } + rc = 0; +out: + free_page((unsigned long) buf); + return rc; +} + +/* + * Copy memory from user (virtual) to kernel (real) + */ +int copy_from_user_real(void *dest, void __user *src, size_t count) +{ + int offs = 0, size, rc; + char *buf; + + buf = (char *) __get_free_page(GFP_KERNEL); + if (!buf) + return -ENOMEM; + rc = -EFAULT; + while (offs < count) { + size = min(PAGE_SIZE, count - offs); + if (copy_from_user(buf, src + offs, size)) + goto out; + if (memcpy_real(dest + offs, buf, size)) + goto out; + offs += size; + } + rc = 0; +out: + free_page((unsigned long) buf); + return rc; +} -- cgit v1.2.3 From 60a0c68df2632feaa4a986af084650d1165d89c5 Mon Sep 17 00:00:00 2001 From: Michael Holzheu Date: Sun, 30 Oct 2011 15:16:40 +0100 Subject: [S390] kdump backend code This patch provides the architecture specific part of the s390 kdump support. Signed-off-by: Michael Holzheu Signed-off-by: Martin Schwidefsky --- arch/s390/Kconfig | 10 + arch/s390/include/asm/ipl.h | 1 + arch/s390/include/asm/kexec.h | 3 + arch/s390/include/asm/reset.h | 2 +- arch/s390/include/asm/setup.h | 13 ++ arch/s390/kernel/Makefile | 1 + arch/s390/kernel/base.S | 6 + arch/s390/kernel/crash_dump.c | 427 +++++++++++++++++++++++++++++++++++++++ arch/s390/kernel/head.S | 22 ++ arch/s390/kernel/head_kdump.S | 119 +++++++++++ arch/s390/kernel/ipl.c | 12 +- arch/s390/kernel/machine_kexec.c | 161 ++++++++++++++- arch/s390/kernel/mem_detect.c | 69 +++++++ arch/s390/kernel/reipl.S | 6 + arch/s390/kernel/reipl64.S | 5 +- arch/s390/kernel/setup.c | 208 +++++++++++++++++++ arch/s390/kernel/smp.c | 19 +- arch/s390/mm/vmem.c | 6 + 18 files changed, 1078 insertions(+), 12 deletions(-) create mode 100644 arch/s390/kernel/crash_dump.c create mode 100644 arch/s390/kernel/head_kdump.S (limited to 'arch/s390') diff --git a/arch/s390/Kconfig b/arch/s390/Kconfig index 6b99fc3f9b63..a9fbd43395f7 100644 --- a/arch/s390/Kconfig +++ b/arch/s390/Kconfig @@ -569,6 +569,16 @@ config KEXEC current kernel, and to start another kernel. It is like a reboot but is independent of hardware/microcode support. +config CRASH_DUMP + bool "kernel crash dumps" + depends on 64BIT + help + Generate crash dump after being started by kexec. + Crash dump kernels are loaded in the main kernel with kexec-tools + into a specially reserved region and then later executed after + a crash by kdump/kexec. + For more details see Documentation/kdump/kdump.txt + config ZFCPDUMP def_bool n prompt "zfcpdump support" diff --git a/arch/s390/include/asm/ipl.h b/arch/s390/include/asm/ipl.h index 97cc4403fabf..6940abfbe1d9 100644 --- a/arch/s390/include/asm/ipl.h +++ b/arch/s390/include/asm/ipl.h @@ -168,5 +168,6 @@ enum diag308_rc { extern int diag308(unsigned long subcode, void *addr); extern void diag308_reset(void); +extern void store_status(void); #endif /* _ASM_S390_IPL_H */ diff --git a/arch/s390/include/asm/kexec.h b/arch/s390/include/asm/kexec.h index bb729b84a21e..fb1c96fa348c 100644 --- a/arch/s390/include/asm/kexec.h +++ b/arch/s390/include/asm/kexec.h @@ -30,6 +30,9 @@ /* Not more than 2GB */ #define KEXEC_CONTROL_MEMORY_LIMIT (1UL<<31) +/* Maximum address we can use for the crash control pages */ +#define KEXEC_CRASH_CONTROL_MEMORY_LIMIT (-1UL) + /* Allocate one page for the pdp and the second for the code */ #define KEXEC_CONTROL_PAGE_SIZE 4096 diff --git a/arch/s390/include/asm/reset.h b/arch/s390/include/asm/reset.h index f584f4a52581..3d6ad4ad2a3f 100644 --- a/arch/s390/include/asm/reset.h +++ b/arch/s390/include/asm/reset.h @@ -17,5 +17,5 @@ struct reset_call { extern void register_reset_call(struct reset_call *reset); extern void unregister_reset_call(struct reset_call *reset); -extern void s390_reset_system(void); +extern void s390_reset_system(void (*func)(void *), void *data); #endif /* _ASM_S390_RESET_H */ diff --git a/arch/s390/include/asm/setup.h b/arch/s390/include/asm/setup.h index d5e2ef10537d..5a099714df04 100644 --- a/arch/s390/include/asm/setup.h +++ b/arch/s390/include/asm/setup.h @@ -26,15 +26,21 @@ #define IPL_DEVICE (*(unsigned long *) (0x10404)) #define INITRD_START (*(unsigned long *) (0x1040C)) #define INITRD_SIZE (*(unsigned long *) (0x10414)) +#define OLDMEM_BASE (*(unsigned long *) (0x1041C)) +#define OLDMEM_SIZE (*(unsigned long *) (0x10424)) #else /* __s390x__ */ #define IPL_DEVICE (*(unsigned long *) (0x10400)) #define INITRD_START (*(unsigned long *) (0x10408)) #define INITRD_SIZE (*(unsigned long *) (0x10410)) +#define OLDMEM_BASE (*(unsigned long *) (0x10418)) +#define OLDMEM_SIZE (*(unsigned long *) (0x10420)) #endif /* __s390x__ */ #define COMMAND_LINE ((char *) (0x10480)) #define CHUNK_READ_WRITE 0 #define CHUNK_READ_ONLY 1 +#define CHUNK_OLDMEM 4 +#define CHUNK_CRASHK 5 struct mem_chunk { unsigned long addr; @@ -48,6 +54,8 @@ extern int memory_end_set; extern unsigned long memory_end; void detect_memory_layout(struct mem_chunk chunk[]); +void create_mem_hole(struct mem_chunk memory_chunk[], unsigned long addr, + unsigned long size, int type); #define PRIMARY_SPACE_MODE 0 #define ACCESS_REGISTER_MODE 1 @@ -106,6 +114,7 @@ extern unsigned int user_mode; #endif /* __s390x__ */ #define ZFCPDUMP_HSA_SIZE (32UL<<20) +#define ZFCPDUMP_HSA_SIZE_MAX (64UL<<20) /* * Console mode. Override with conmode= @@ -134,10 +143,14 @@ extern char kernel_nss_name[]; #define IPL_DEVICE 0x10404 #define INITRD_START 0x1040C #define INITRD_SIZE 0x10414 +#define OLDMEM_BASE 0x1041C +#define OLDMEM_SIZE 0x10424 #else /* __s390x__ */ #define IPL_DEVICE 0x10400 #define INITRD_START 0x10408 #define INITRD_SIZE 0x10410 +#define OLDMEM_BASE 0x10418 +#define OLDMEM_SIZE 0x10420 #endif /* __s390x__ */ #define COMMAND_LINE 0x10480 diff --git a/arch/s390/kernel/Makefile b/arch/s390/kernel/Makefile index df3732249baa..dd4f07640919 100644 --- a/arch/s390/kernel/Makefile +++ b/arch/s390/kernel/Makefile @@ -48,6 +48,7 @@ obj-$(CONFIG_FUNCTION_TRACER) += $(if $(CONFIG_64BIT),mcount64.o,mcount.o) obj-$(CONFIG_DYNAMIC_FTRACE) += ftrace.o obj-$(CONFIG_FUNCTION_GRAPH_TRACER) += ftrace.o obj-$(CONFIG_FTRACE_SYSCALLS) += ftrace.o +obj-$(CONFIG_CRASH_DUMP) += crash_dump.o # Kexec part S390_KEXEC_OBJS := machine_kexec.o crash.o diff --git a/arch/s390/kernel/base.S b/arch/s390/kernel/base.S index 255435663bf8..f8828d38fa6e 100644 --- a/arch/s390/kernel/base.S +++ b/arch/s390/kernel/base.S @@ -86,6 +86,8 @@ s390_base_pgm_handler_fn: ENTRY(diag308_reset) larl %r4,.Lctlregs # Save control registers stctg %c0,%c15,0(%r4) + larl %r4,.Lfpctl # Floating point control register + stfpc 0(%r4) larl %r4,.Lrestart_psw # Setup restart PSW at absolute 0 lghi %r3,0 lg %r4,0(%r4) # Save PSW @@ -99,6 +101,8 @@ ENTRY(diag308_reset) sam64 # Switch to 64 bit addressing mode larl %r4,.Lctlregs # Restore control registers lctlg %c0,%c15,0(%r4) + larl %r4,.Lfpctl # Restore floating point ctl register + lfpc 0(%r4) br %r14 .align 16 .Lrestart_psw: @@ -110,6 +114,8 @@ ENTRY(diag308_reset) .rept 16 .quad 0 .endr +.Lfpctl: + .long 0 .previous #else /* CONFIG_64BIT */ diff --git a/arch/s390/kernel/crash_dump.c b/arch/s390/kernel/crash_dump.c new file mode 100644 index 000000000000..2a9a3f405574 --- /dev/null +++ b/arch/s390/kernel/crash_dump.c @@ -0,0 +1,427 @@ +/* + * S390 kdump implementation + * + * Copyright IBM Corp. 2011 + * Author(s): Michael Holzheu + */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#define PTR_ADD(x, y) (((char *) (x)) + ((unsigned long) (y))) +#define PTR_SUB(x, y) (((char *) (x)) - ((unsigned long) (y))) +#define PTR_DIFF(x, y) ((unsigned long)(((char *) (x)) - ((unsigned long) (y)))) + +/* + * Copy one page from "oldmem" + * + * For the kdump reserved memory this functions performs a swap operation: + * - [OLDMEM_BASE - OLDMEM_BASE + OLDMEM_SIZE] is mapped to [0 - OLDMEM_SIZE]. + * - [0 - OLDMEM_SIZE] is mapped to [OLDMEM_BASE - OLDMEM_BASE + OLDMEM_SIZE] + */ +ssize_t copy_oldmem_page(unsigned long pfn, char *buf, + size_t csize, unsigned long offset, int userbuf) +{ + unsigned long src; + int rc; + + if (!csize) + return 0; + + src = (pfn << PAGE_SHIFT) + offset; + if (src < OLDMEM_SIZE) + src += OLDMEM_BASE; + else if (src > OLDMEM_BASE && + src < OLDMEM_BASE + OLDMEM_SIZE) + src -= OLDMEM_BASE; + if (userbuf) + rc = copy_to_user_real((void __user *) buf, (void *) src, + csize); + else + rc = memcpy_real(buf, (void *) src, csize); + return rc < 0 ? rc : csize; +} + +/* + * Copy memory from old kernel + */ +static int copy_from_oldmem(void *dest, void *src, size_t count) +{ + unsigned long copied = 0; + int rc; + + if ((unsigned long) src < OLDMEM_SIZE) { + copied = min(count, OLDMEM_SIZE - (unsigned long) src); + rc = memcpy_real(dest, src + OLDMEM_BASE, copied); + if (rc) + return rc; + } + return memcpy_real(dest + copied, src + copied, count - copied); +} + +/* + * Alloc memory and panic in case of ENOMEM + */ +static void *kzalloc_panic(int len) +{ + void *rc; + + rc = kzalloc(len, GFP_KERNEL); + if (!rc) + panic("s390 kdump kzalloc (%d) failed", len); + return rc; +} + +/* + * Get memory layout and create hole for oldmem + */ +static struct mem_chunk *get_memory_layout(void) +{ + struct mem_chunk *chunk_array; + + chunk_array = kzalloc_panic(MEMORY_CHUNKS * sizeof(struct mem_chunk)); + detect_memory_layout(chunk_array); + create_mem_hole(chunk_array, OLDMEM_BASE, OLDMEM_SIZE, CHUNK_CRASHK); + return chunk_array; +} + +/* + * Initialize ELF note + */ +static void *nt_init(void *buf, Elf64_Word type, void *desc, int d_len, + const char *name) +{ + Elf64_Nhdr *note; + u64 len; + + note = (Elf64_Nhdr *)buf; + note->n_namesz = strlen(name) + 1; + note->n_descsz = d_len; + note->n_type = type; + len = sizeof(Elf64_Nhdr); + + memcpy(buf + len, name, note->n_namesz); + len = roundup(len + note->n_namesz, 4); + + memcpy(buf + len, desc, note->n_descsz); + len = roundup(len + note->n_descsz, 4); + + return PTR_ADD(buf, len); +} + +/* + * Initialize prstatus note + */ +static void *nt_prstatus(void *ptr, struct save_area *sa) +{ + struct elf_prstatus nt_prstatus; + static int cpu_nr = 1; + + memset(&nt_prstatus, 0, sizeof(nt_prstatus)); + memcpy(&nt_prstatus.pr_reg.gprs, sa->gp_regs, sizeof(sa->gp_regs)); + memcpy(&nt_prstatus.pr_reg.psw, sa->psw, sizeof(sa->psw)); + memcpy(&nt_prstatus.pr_reg.acrs, sa->acc_regs, sizeof(sa->acc_regs)); + nt_prstatus.pr_pid = cpu_nr; + cpu_nr++; + + return nt_init(ptr, NT_PRSTATUS, &nt_prstatus, sizeof(nt_prstatus), + "CORE"); +} + +/* + * Initialize fpregset (floating point) note + */ +static void *nt_fpregset(void *ptr, struct save_area *sa) +{ + elf_fpregset_t nt_fpregset; + + memset(&nt_fpregset, 0, sizeof(nt_fpregset)); + memcpy(&nt_fpregset.fpc, &sa->fp_ctrl_reg, sizeof(sa->fp_ctrl_reg)); + memcpy(&nt_fpregset.fprs, &sa->fp_regs, sizeof(sa->fp_regs)); + + return nt_init(ptr, NT_PRFPREG, &nt_fpregset, sizeof(nt_fpregset), + "CORE"); +} + +/* + * Initialize timer note + */ +static void *nt_s390_timer(void *ptr, struct save_area *sa) +{ + return nt_init(ptr, NT_S390_TIMER, &sa->timer, sizeof(sa->timer), + KEXEC_CORE_NOTE_NAME); +} + +/* + * Initialize TOD clock comparator note + */ +static void *nt_s390_tod_cmp(void *ptr, struct save_area *sa) +{ + return nt_init(ptr, NT_S390_TODCMP, &sa->clk_cmp, + sizeof(sa->clk_cmp), KEXEC_CORE_NOTE_NAME); +} + +/* + * Initialize TOD programmable register note + */ +static void *nt_s390_tod_preg(void *ptr, struct save_area *sa) +{ + return nt_init(ptr, NT_S390_TODPREG, &sa->tod_reg, + sizeof(sa->tod_reg), KEXEC_CORE_NOTE_NAME); +} + +/* + * Initialize control register note + */ +static void *nt_s390_ctrs(void *ptr, struct save_area *sa) +{ + return nt_init(ptr, NT_S390_CTRS, &sa->ctrl_regs, + sizeof(sa->ctrl_regs), KEXEC_CORE_NOTE_NAME); +} + +/* + * Initialize prefix register note + */ +static void *nt_s390_prefix(void *ptr, struct save_area *sa) +{ + return nt_init(ptr, NT_S390_PREFIX, &sa->pref_reg, + sizeof(sa->pref_reg), KEXEC_CORE_NOTE_NAME); +} + +/* + * Fill ELF notes for one CPU with save area registers + */ +void *fill_cpu_elf_notes(void *ptr, struct save_area *sa) +{ + ptr = nt_prstatus(ptr, sa); + ptr = nt_fpregset(ptr, sa); + ptr = nt_s390_timer(ptr, sa); + ptr = nt_s390_tod_cmp(ptr, sa); + ptr = nt_s390_tod_preg(ptr, sa); + ptr = nt_s390_ctrs(ptr, sa); + ptr = nt_s390_prefix(ptr, sa); + return ptr; +} + +/* + * Initialize prpsinfo note (new kernel) + */ +static void *nt_prpsinfo(void *ptr) +{ + struct elf_prpsinfo prpsinfo; + + memset(&prpsinfo, 0, sizeof(prpsinfo)); + prpsinfo.pr_sname = 'R'; + strcpy(prpsinfo.pr_fname, "vmlinux"); + return nt_init(ptr, NT_PRPSINFO, &prpsinfo, sizeof(prpsinfo), + KEXEC_CORE_NOTE_NAME); +} + +/* + * Initialize vmcoreinfo note (new kernel) + */ +static void *nt_vmcoreinfo(void *ptr) +{ + char nt_name[11], *vmcoreinfo; + Elf64_Nhdr note; + void *addr; + + if (copy_from_oldmem(&addr, &S390_lowcore.vmcore_info, sizeof(addr))) + return ptr; + memset(nt_name, 0, sizeof(nt_name)); + if (copy_from_oldmem(¬e, addr, sizeof(note))) + return ptr; + if (copy_from_oldmem(nt_name, addr + sizeof(note), sizeof(nt_name) - 1)) + return ptr; + if (strcmp(nt_name, "VMCOREINFO") != 0) + return ptr; + vmcoreinfo = kzalloc_panic(note.n_descsz + 1); + if (copy_from_oldmem(vmcoreinfo, addr + 24, note.n_descsz)) + return ptr; + vmcoreinfo[note.n_descsz + 1] = 0; + return nt_init(ptr, 0, vmcoreinfo, note.n_descsz, "VMCOREINFO"); +} + +/* + * Initialize ELF header (new kernel) + */ +static void *ehdr_init(Elf64_Ehdr *ehdr, int mem_chunk_cnt) +{ + memset(ehdr, 0, sizeof(*ehdr)); + memcpy(ehdr->e_ident, ELFMAG, SELFMAG); + ehdr->e_ident[EI_CLASS] = ELFCLASS64; + ehdr->e_ident[EI_DATA] = ELFDATA2MSB; + ehdr->e_ident[EI_VERSION] = EV_CURRENT; + memset(ehdr->e_ident + EI_PAD, 0, EI_NIDENT - EI_PAD); + ehdr->e_type = ET_CORE; + ehdr->e_machine = EM_S390; + ehdr->e_version = EV_CURRENT; + ehdr->e_phoff = sizeof(Elf64_Ehdr); + ehdr->e_ehsize = sizeof(Elf64_Ehdr); + ehdr->e_phentsize = sizeof(Elf64_Phdr); + ehdr->e_phnum = mem_chunk_cnt + 1; + return ehdr + 1; +} + +/* + * Return CPU count for ELF header (new kernel) + */ +static int get_cpu_cnt(void) +{ + int i, cpus = 0; + + for (i = 0; zfcpdump_save_areas[i]; i++) { + if (zfcpdump_save_areas[i]->pref_reg == 0) + continue; + cpus++; + } + return cpus; +} + +/* + * Return memory chunk count for ELF header (new kernel) + */ +static int get_mem_chunk_cnt(void) +{ + struct mem_chunk *chunk_array, *mem_chunk; + int i, cnt = 0; + + chunk_array = get_memory_layout(); + for (i = 0; i < MEMORY_CHUNKS; i++) { + mem_chunk = &chunk_array[i]; + if (chunk_array[i].type != CHUNK_READ_WRITE && + chunk_array[i].type != CHUNK_READ_ONLY) + continue; + if (mem_chunk->size == 0) + continue; + cnt++; + } + kfree(chunk_array); + return cnt; +} + +/* + * Relocate pointer in order to allow vmcore code access the data + */ +static inline unsigned long relocate(unsigned long addr) +{ + return OLDMEM_BASE + addr; +} + +/* + * Initialize ELF loads (new kernel) + */ +static int loads_init(Elf64_Phdr *phdr, u64 loads_offset) +{ + struct mem_chunk *chunk_array, *mem_chunk; + int i; + + chunk_array = get_memory_layout(); + for (i = 0; i < MEMORY_CHUNKS; i++) { + mem_chunk = &chunk_array[i]; + if (mem_chunk->size == 0) + break; + if (chunk_array[i].type != CHUNK_READ_WRITE && + chunk_array[i].type != CHUNK_READ_ONLY) + continue; + else + phdr->p_filesz = mem_chunk->size; + phdr->p_type = PT_LOAD; + phdr->p_offset = mem_chunk->addr; + phdr->p_vaddr = mem_chunk->addr; + phdr->p_paddr = mem_chunk->addr; + phdr->p_memsz = mem_chunk->size; + phdr->p_flags = PF_R | PF_W | PF_X; + phdr->p_align = PAGE_SIZE; + phdr++; + } + kfree(chunk_array); + return i; +} + +/* + * Initialize notes (new kernel) + */ +static void *notes_init(Elf64_Phdr *phdr, void *ptr, u64 notes_offset) +{ + struct save_area *sa; + void *ptr_start = ptr; + int i; + + ptr = nt_prpsinfo(ptr); + + for (i = 0; zfcpdump_save_areas[i]; i++) { + sa = zfcpdump_save_areas[i]; + if (sa->pref_reg == 0) + continue; + ptr = fill_cpu_elf_notes(ptr, sa); + } + ptr = nt_vmcoreinfo(ptr); + memset(phdr, 0, sizeof(*phdr)); + phdr->p_type = PT_NOTE; + phdr->p_offset = relocate(notes_offset); + phdr->p_filesz = (unsigned long) PTR_SUB(ptr, ptr_start); + phdr->p_memsz = phdr->p_filesz; + return ptr; +} + +/* + * Create ELF core header (new kernel) + */ +static void s390_elf_corehdr_create(char **elfcorebuf, size_t *elfcorebuf_sz) +{ + Elf64_Phdr *phdr_notes, *phdr_loads; + int mem_chunk_cnt; + void *ptr, *hdr; + u32 alloc_size; + u64 hdr_off; + + mem_chunk_cnt = get_mem_chunk_cnt(); + + alloc_size = 0x1000 + get_cpu_cnt() * 0x300 + + mem_chunk_cnt * sizeof(Elf64_Phdr); + hdr = kzalloc_panic(alloc_size); + /* Init elf header */ + ptr = ehdr_init(hdr, mem_chunk_cnt); + /* Init program headers */ + phdr_notes = ptr; + ptr = PTR_ADD(ptr, sizeof(Elf64_Phdr)); + phdr_loads = ptr; + ptr = PTR_ADD(ptr, sizeof(Elf64_Phdr) * mem_chunk_cnt); + /* Init notes */ + hdr_off = PTR_DIFF(ptr, hdr); + ptr = notes_init(phdr_notes, ptr, ((unsigned long) hdr) + hdr_off); + /* Init loads */ + hdr_off = PTR_DIFF(ptr, hdr); + loads_init(phdr_loads, ((unsigned long) hdr) + hdr_off); + *elfcorebuf_sz = hdr_off; + *elfcorebuf = (void *) relocate((unsigned long) hdr); + BUG_ON(*elfcorebuf_sz > alloc_size); +} + +/* + * Create kdump ELF core header in new kernel, if it has not been passed via + * the "elfcorehdr" kernel parameter + */ +static int setup_kdump_elfcorehdr(void) +{ + size_t elfcorebuf_sz; + char *elfcorebuf; + + if (!OLDMEM_BASE || is_kdump_kernel()) + return -EINVAL; + s390_elf_corehdr_create(&elfcorebuf, &elfcorebuf_sz); + elfcorehdr_addr = (unsigned long long) elfcorebuf; + elfcorehdr_size = elfcorebuf_sz; + return 0; +} + +subsys_initcall(setup_kdump_elfcorehdr); diff --git a/arch/s390/kernel/head.S b/arch/s390/kernel/head.S index 2d781bab37bb..900068d2bf92 100644 --- a/arch/s390/kernel/head.S +++ b/arch/s390/kernel/head.S @@ -449,10 +449,28 @@ ENTRY(start) # .org 0x10000 ENTRY(startup) + j .Lep_startup_normal + .org 0x10008 +# +# This is a list of s390 kernel entry points. At address 0x1000f the number of +# valid entry points is stored. +# +# IMPORTANT: Do not change this table, it is s390 kernel ABI! +# + .ascii "S390EP" + .byte 0x00,0x01 +# +# kdump startup-code at 0x10010, running in 64 bit absolute addressing mode +# + .org 0x10010 +ENTRY(startup_kdump) + j .Lep_startup_kdump +.Lep_startup_normal: basr %r13,0 # get base .LPG0: xc 0x200(256),0x200 # partially clear lowcore xc 0x300(256),0x300 + xc 0xe00(256),0xe00 stck __LC_LAST_UPDATE_CLOCK spt 5f-.LPG0(%r13) mvc __LC_LAST_UPDATE_TIMER(8),5f-.LPG0(%r13) @@ -534,6 +552,8 @@ ENTRY(startup) .align 8 5: .long 0x7fffffff,0xffffffff +#include "head_kdump.S" + # # params at 10400 (setup.h) # @@ -541,6 +561,8 @@ ENTRY(startup) .long 0,0 # IPL_DEVICE .long 0,0 # INITRD_START .long 0,0 # INITRD_SIZE + .long 0,0 # OLDMEM_BASE + .long 0,0 # OLDMEM_SIZE .org COMMAND_LINE .byte "root=/dev/ram0 ro" diff --git a/arch/s390/kernel/head_kdump.S b/arch/s390/kernel/head_kdump.S new file mode 100644 index 000000000000..e1ac3893e972 --- /dev/null +++ b/arch/s390/kernel/head_kdump.S @@ -0,0 +1,119 @@ +/* + * S390 kdump lowlevel functions (new kernel) + * + * Copyright IBM Corp. 2011 + * Author(s): Michael Holzheu + */ + +#define DATAMOVER_ADDR 0x4000 +#define COPY_PAGE_ADDR 0x6000 + +#ifdef CONFIG_CRASH_DUMP + +# +# kdump entry (new kernel - not yet relocated) +# +# Note: This code has to be position independent +# + +.align 2 +.Lep_startup_kdump: + lhi %r1,2 # mode 2 = esame (dump) + sigp %r1,%r0,0x12 # Switch to esame mode + sam64 # Switch to 64 bit addressing + basr %r13,0 +.Lbase: + larl %r2,.Lbase_addr # Check, if we have been + lg %r2,0(%r2) # already relocated: + clgr %r2,%r13 # + jne .Lrelocate # No : Start data mover + lghi %r2,0 # Yes: Start kdump kernel + brasl %r14,startup_kdump_relocated + +.Lrelocate: + larl %r4,startup + lg %r2,0x418(%r4) # Get kdump base + lg %r3,0x420(%r4) # Get kdump size + + larl %r10,.Lcopy_start # Source of data mover + lghi %r8,DATAMOVER_ADDR # Target of data mover + mvc 0(256,%r8),0(%r10) # Copy data mover code + + agr %r8,%r2 # Copy data mover to + mvc 0(256,%r8),0(%r10) # reserved mem + + lghi %r14,DATAMOVER_ADDR # Jump to copied data mover + basr %r14,%r14 +.Lbase_addr: + .quad .Lbase + +# +# kdump data mover code (runs at address DATAMOVER_ADDR) +# +# r2: kdump base address +# r3: kdump size +# +.Lcopy_start: + basr %r13,0 # Base +0: + lgr %r11,%r2 # Save kdump base address + lgr %r12,%r2 + agr %r12,%r3 # Compute kdump end address + + lghi %r5,0 + lghi %r10,COPY_PAGE_ADDR # Load copy page address +1: + mvc 0(256,%r10),0(%r5) # Copy old kernel to tmp + mvc 0(256,%r5),0(%r11) # Copy new kernel to old + mvc 0(256,%r11),0(%r10) # Copy tmp to new + aghi %r11,256 + aghi %r5,256 + clgr %r11,%r12 + jl 1b + + lg %r14,.Lstartup_kdump-0b(%r13) + basr %r14,%r14 # Start relocated kernel +.Lstartup_kdump: + .long 0x00000000,0x00000000 + startup_kdump_relocated +.Lcopy_end: + +# +# Startup of kdump (relocated new kernel) +# +.align 2 +startup_kdump_relocated: + basr %r13,0 +0: + mvc 0(8,%r0),.Lrestart_psw-0b(%r13) # Setup restart PSW + mvc 464(16,%r0),.Lpgm_psw-0b(%r13) # Setup pgm check PSW + lhi %r1,1 # Start new kernel + diag %r1,%r1,0x308 # with diag 308 + +.Lno_diag308: # No diag 308 + sam31 # Switch to 31 bit addr mode + sr %r1,%r1 # Erase register r1 + sr %r2,%r2 # Erase register r2 + sigp %r1,%r2,0x12 # Switch to 31 bit arch mode + lpsw 0 # Start new kernel... +.align 8 +.Lrestart_psw: + .long 0x00080000,0x80000000 + startup +.Lpgm_psw: + .quad 0x0000000180000000,0x0000000000000000 + .Lno_diag308 +#else +.align 2 +.Lep_startup_kdump: +#ifdef CONFIG_64BIT + larl %r13,startup_kdump_crash + lpswe 0(%r13) +.align 8 +startup_kdump_crash: + .quad 0x0002000080000000,0x0000000000000000 + startup_kdump_crash +#else + basr %r13,0 +0: lpsw startup_kdump_crash-0b(%r13) +.align 8 +startup_kdump_crash: + .long 0x000a0000,0x00000000 + startup_kdump_crash +#endif /* CONFIG_64BIT */ +#endif /* CONFIG_CRASH_DUMP */ diff --git a/arch/s390/kernel/ipl.c b/arch/s390/kernel/ipl.c index 90769b4bc7f6..ca0520c52547 100644 --- a/arch/s390/kernel/ipl.c +++ b/arch/s390/kernel/ipl.c @@ -16,6 +16,7 @@ #include #include #include +#include #include #include #include @@ -1740,6 +1741,9 @@ void do_restart(void) { smp_restart_with_online_cpu(); smp_send_stop(); +#ifdef CONFIG_CRASH_DUMP + crash_kexec(NULL); +#endif on_restart_trigger.action->fn(&on_restart_trigger); stop_run(&on_restart_trigger); } @@ -2010,7 +2014,7 @@ static void do_reset_calls(void) u32 dump_prefix_page; -void s390_reset_system(void) +void s390_reset_system(void (*func)(void *), void *data) { struct _lowcore *lc; @@ -2038,6 +2042,10 @@ void s390_reset_system(void) S390_lowcore.program_new_psw.addr = PSW_ADDR_AMODE | (unsigned long) s390_base_pgm_handler; + /* Store status at absolute zero */ + store_status(); + do_reset_calls(); + if (func) + func(data); } - diff --git a/arch/s390/kernel/machine_kexec.c b/arch/s390/kernel/machine_kexec.c index b09b9c62573e..0ceac06a0299 100644 --- a/arch/s390/kernel/machine_kexec.c +++ b/arch/s390/kernel/machine_kexec.c @@ -1,10 +1,11 @@ /* * arch/s390/kernel/machine_kexec.c * - * Copyright IBM Corp. 2005,2006 + * Copyright IBM Corp. 2005,2011 * * Author(s): Rolf Adelsberger, * Heiko Carstens + * Michael Holzheu */ #include @@ -21,12 +22,131 @@ #include #include #include +#include +#include typedef void (*relocate_kernel_t)(kimage_entry_t *, unsigned long); extern const unsigned char relocate_kernel[]; extern const unsigned long long relocate_kernel_len; +#ifdef CONFIG_CRASH_DUMP + +void *fill_cpu_elf_notes(void *ptr, struct save_area *sa); + +/* + * Create ELF notes for one CPU + */ +static void add_elf_notes(int cpu) +{ + struct save_area *sa = (void *) 4608 + store_prefix(); + void *ptr; + + memcpy((void *) (4608UL + sa->pref_reg), sa, sizeof(*sa)); + ptr = (u64 *) per_cpu_ptr(crash_notes, cpu); + ptr = fill_cpu_elf_notes(ptr, sa); + memset(ptr, 0, sizeof(struct elf_note)); +} + +/* + * Store status of next available physical CPU + */ +static int store_status_next(int start_cpu, int this_cpu) +{ + struct save_area *sa = (void *) 4608 + store_prefix(); + int cpu, rc; + + for (cpu = start_cpu; cpu < 65536; cpu++) { + if (cpu == this_cpu) + continue; + do { + rc = raw_sigp(cpu, sigp_stop_and_store_status); + } while (rc == sigp_busy); + if (rc != sigp_order_code_accepted) + continue; + if (sa->pref_reg) + return cpu; + } + return -1; +} + +/* + * Initialize CPU ELF notes + */ +void setup_regs(void) +{ + unsigned long sa = S390_lowcore.prefixreg_save_area + SAVE_AREA_BASE; + int cpu, this_cpu, phys_cpu = 0, first = 1; + + this_cpu = stap(); + + if (!S390_lowcore.prefixreg_save_area) + first = 0; + for_each_online_cpu(cpu) { + if (first) { + add_elf_notes(cpu); + first = 0; + continue; + } + phys_cpu = store_status_next(phys_cpu, this_cpu); + if (phys_cpu == -1) + break; + add_elf_notes(cpu); + phys_cpu++; + } + /* Copy dump CPU store status info to absolute zero */ + memcpy((void *) SAVE_AREA_BASE, (void *) sa, sizeof(struct save_area)); +} + +#endif + +/* + * Start kdump: We expect here that a store status has been done on our CPU + */ +static void __do_machine_kdump(void *image) +{ +#ifdef CONFIG_CRASH_DUMP + int (*start_kdump)(int) = (void *)((struct kimage *) image)->start; + + __load_psw_mask(PSW_BASE_BITS | PSW_DEFAULT_KEY); + setup_regs(); + start_kdump(1); +#endif +} + +/* + * Check if kdump checksums are valid: We call purgatory with parameter "0" + */ +static int kdump_csum_valid(struct kimage *image) +{ +#ifdef CONFIG_CRASH_DUMP + int (*start_kdump)(int) = (void *)image->start; + int rc; + + __arch_local_irq_stnsm(0xfb); /* disable DAT */ + rc = start_kdump(0); + __arch_local_irq_stosm(0x04); /* enable DAT */ + return rc ? 0 : -EINVAL; +#else + return -EINVAL; +#endif +} + +/* + * Give back memory to hypervisor before new kdump is loaded + */ +static int machine_kexec_prepare_kdump(void) +{ +#ifdef CONFIG_CRASH_DUMP + if (MACHINE_IS_VM) + diag10_range(PFN_DOWN(crashk_res.start), + PFN_DOWN(crashk_res.end - crashk_res.start + 1)); + return 0; +#else + return -EINVAL; +#endif +} + int machine_kexec_prepare(struct kimage *image) { void *reboot_code_buffer; @@ -35,6 +155,9 @@ int machine_kexec_prepare(struct kimage *image) if (ipl_flags & IPL_NSS_VALID) return -ENOSYS; + if (image->type == KEXEC_TYPE_CRASH) + return machine_kexec_prepare_kdump(); + /* We don't support anything but the default image type for now. */ if (image->type != KEXEC_TYPE_DEFAULT) return -EINVAL; @@ -51,27 +174,53 @@ void machine_kexec_cleanup(struct kimage *image) { } +void arch_crash_save_vmcoreinfo(void) +{ + VMCOREINFO_SYMBOL(lowcore_ptr); + VMCOREINFO_LENGTH(lowcore_ptr, NR_CPUS); +} + void machine_shutdown(void) { } -static void __machine_kexec(void *data) +/* + * Do normal kexec + */ +static void __do_machine_kexec(void *data) { relocate_kernel_t data_mover; struct kimage *image = data; - pfault_fini(); - s390_reset_system(); - data_mover = (relocate_kernel_t) page_to_phys(image->control_code_page); /* Call the moving routine */ (*data_mover)(&image->head, image->start); - for (;;); } +/* + * Reset system and call either kdump or normal kexec + */ +static void __machine_kexec(void *data) +{ + struct kimage *image = data; + + pfault_fini(); + if (image->type == KEXEC_TYPE_CRASH) + s390_reset_system(__do_machine_kdump, data); + else + s390_reset_system(__do_machine_kexec, data); + disabled_wait((unsigned long) __builtin_return_address(0)); +} + +/* + * Do either kdump or normal kexec. In case of kdump we first ask + * purgatory, if kdump checksums are valid. + */ void machine_kexec(struct kimage *image) { + if (image->type == KEXEC_TYPE_CRASH && !kdump_csum_valid(image)) + return; tracer_disable(); smp_send_stop(); smp_switch_to_ipl_cpu(__machine_kexec, image); diff --git a/arch/s390/kernel/mem_detect.c b/arch/s390/kernel/mem_detect.c index 0fbe4e32f7ba..19b4568f4cee 100644 --- a/arch/s390/kernel/mem_detect.c +++ b/arch/s390/kernel/mem_detect.c @@ -62,3 +62,72 @@ void detect_memory_layout(struct mem_chunk chunk[]) arch_local_irq_restore(flags); } EXPORT_SYMBOL(detect_memory_layout); + +/* + * Create memory hole with given address, size, and type + */ +void create_mem_hole(struct mem_chunk chunks[], unsigned long addr, + unsigned long size, int type) +{ + unsigned long start, end, new_size; + int i; + + for (i = 0; i < MEMORY_CHUNKS; i++) { + if (chunks[i].size == 0) + continue; + if (addr + size < chunks[i].addr) + continue; + if (addr >= chunks[i].addr + chunks[i].size) + continue; + start = max(addr, chunks[i].addr); + end = min(addr + size, chunks[i].addr + chunks[i].size); + new_size = end - start; + if (new_size == 0) + continue; + if (start == chunks[i].addr && + end == chunks[i].addr + chunks[i].size) { + /* Remove chunk */ + chunks[i].type = type; + } else if (start == chunks[i].addr) { + /* Make chunk smaller at start */ + if (i >= MEMORY_CHUNKS - 1) + panic("Unable to create memory hole"); + memmove(&chunks[i + 1], &chunks[i], + sizeof(struct mem_chunk) * + (MEMORY_CHUNKS - (i + 1))); + chunks[i + 1].addr = chunks[i].addr + new_size; + chunks[i + 1].size = chunks[i].size - new_size; + chunks[i].size = new_size; + chunks[i].type = type; + i += 1; + } else if (end == chunks[i].addr + chunks[i].size) { + /* Make chunk smaller at end */ + if (i >= MEMORY_CHUNKS - 1) + panic("Unable to create memory hole"); + memmove(&chunks[i + 1], &chunks[i], + sizeof(struct mem_chunk) * + (MEMORY_CHUNKS - (i + 1))); + chunks[i + 1].addr = start; + chunks[i + 1].size = new_size; + chunks[i + 1].type = type; + chunks[i].size -= new_size; + i += 1; + } else { + /* Create memory hole */ + if (i >= MEMORY_CHUNKS - 2) + panic("Unable to create memory hole"); + memmove(&chunks[i + 2], &chunks[i], + sizeof(struct mem_chunk) * + (MEMORY_CHUNKS - (i + 2))); + chunks[i + 1].addr = addr; + chunks[i + 1].size = size; + chunks[i + 1].type = type; + chunks[i + 2].addr = addr + size; + chunks[i + 2].size = + chunks[i].addr + chunks[i].size - (addr + size); + chunks[i + 2].type = chunks[i].type; + chunks[i].size = addr - chunks[i].addr; + i += 2; + } + } +} diff --git a/arch/s390/kernel/reipl.S b/arch/s390/kernel/reipl.S index 303d961c3bb5..ad67c214be04 100644 --- a/arch/s390/kernel/reipl.S +++ b/arch/s390/kernel/reipl.S @@ -9,6 +9,12 @@ #include #include +# +# store_status: Empty implementation until kdump is supported on 31 bit +# +ENTRY(store_status) + br %r14 + # # do_reipl_asm # Parameter: r2 = schid of reipl device diff --git a/arch/s390/kernel/reipl64.S b/arch/s390/kernel/reipl64.S index e690975403f4..a0f5b686a3cd 100644 --- a/arch/s390/kernel/reipl64.S +++ b/arch/s390/kernel/reipl64.S @@ -62,8 +62,11 @@ ENTRY(store_status) larl %r2,store_status stg %r2,__LC_PSW_SAVE_AREA-SAVE_AREA_BASE + 8(%r1) br %r14 -.align 8 + + .section .bss + .align 8 .Lclkcmp: .quad 0x0000000000000000 + .previous # # do_reipl_asm diff --git a/arch/s390/kernel/setup.c b/arch/s390/kernel/setup.c index 7b371c37061d..b5a30412b2e5 100644 --- a/arch/s390/kernel/setup.c +++ b/arch/s390/kernel/setup.c @@ -42,6 +42,9 @@ #include #include #include +#include +#include +#include #include #include @@ -57,6 +60,7 @@ #include #include #include +#include long psw_kernel_bits = (PSW_BASE_BITS | PSW_MASK_DAT | PSW_ASC_PRIMARY | PSW_MASK_MCHECK | PSW_DEFAULT_KEY); @@ -435,6 +439,9 @@ static void __init setup_resources(void) for (i = 0; i < MEMORY_CHUNKS; i++) { if (!memory_chunk[i].size) continue; + if (memory_chunk[i].type == CHUNK_OLDMEM || + memory_chunk[i].type == CHUNK_CRASHK) + continue; res = alloc_bootmem_low(sizeof(*res)); res->flags = IORESOURCE_BUSY | IORESOURCE_MEM; switch (memory_chunk[i].type) { @@ -479,6 +486,7 @@ static void __init setup_memory_end(void) unsigned long max_mem; int i; + #ifdef CONFIG_ZFCPDUMP if (ipl_info.type == IPL_TYPE_FCP_DUMP) { memory_end = ZFCPDUMP_HSA_SIZE; @@ -550,6 +558,187 @@ static void __init setup_restart_psw(void) copy_to_absolute_zero(&S390_lowcore.restart_psw, &psw, sizeof(psw)); } +#ifdef CONFIG_CRASH_DUMP + +/* + * Find suitable location for crashkernel memory + */ +static unsigned long __init find_crash_base(unsigned long crash_size, + char **msg) +{ + unsigned long crash_base; + struct mem_chunk *chunk; + int i; + + if (memory_chunk[0].size < crash_size) { + *msg = "first memory chunk must be at least crashkernel size"; + return 0; + } + if (is_kdump_kernel() && (crash_size == OLDMEM_SIZE)) + return OLDMEM_BASE; + + for (i = MEMORY_CHUNKS - 1; i >= 0; i--) { + chunk = &memory_chunk[i]; + if (chunk->size == 0) + continue; + if (chunk->type != CHUNK_READ_WRITE) + continue; + if (chunk->size < crash_size) + continue; + crash_base = (chunk->addr + chunk->size) - crash_size; + if (crash_base < crash_size) + continue; + if (crash_base < ZFCPDUMP_HSA_SIZE_MAX) + continue; + if (crash_base < (unsigned long) INITRD_START + INITRD_SIZE) + continue; + return crash_base; + } + *msg = "no suitable area found"; + return 0; +} + +/* + * Check if crash_base and crash_size is valid + */ +static int __init verify_crash_base(unsigned long crash_base, + unsigned long crash_size, + char **msg) +{ + struct mem_chunk *chunk; + int i; + + /* + * Because we do the swap to zero, we must have at least 'crash_size' + * bytes free space before crash_base + */ + if (crash_size > crash_base) { + *msg = "crashkernel offset must be greater than size"; + return -EINVAL; + } + + /* First memory chunk must be at least crash_size */ + if (memory_chunk[0].size < crash_size) { + *msg = "first memory chunk must be at least crashkernel size"; + return -EINVAL; + } + /* Check if we fit into the respective memory chunk */ + for (i = 0; i < MEMORY_CHUNKS; i++) { + chunk = &memory_chunk[i]; + if (chunk->size == 0) + continue; + if (crash_base < chunk->addr) + continue; + if (crash_base >= chunk->addr + chunk->size) + continue; + /* we have found the memory chunk */ + if (crash_base + crash_size > chunk->addr + chunk->size) { + *msg = "selected memory chunk is too small for " + "crashkernel memory"; + return -EINVAL; + } + return 0; + } + *msg = "invalid memory range specified"; + return -EINVAL; +} + +/* + * Reserve kdump memory by creating a memory hole in the mem_chunk array + */ +static void __init reserve_kdump_bootmem(unsigned long addr, unsigned long size, + int type) +{ + + create_mem_hole(memory_chunk, addr, size, type); +} + +/* + * When kdump is enabled, we have to ensure that no memory from + * the area [0 - crashkernel memory size] and + * [crashk_res.start - crashk_res.end] is set offline. + */ +static int kdump_mem_notifier(struct notifier_block *nb, + unsigned long action, void *data) +{ + struct memory_notify *arg = data; + + if (arg->start_pfn < PFN_DOWN(resource_size(&crashk_res))) + return NOTIFY_BAD; + if (arg->start_pfn > PFN_DOWN(crashk_res.end)) + return NOTIFY_OK; + if (arg->start_pfn + arg->nr_pages - 1 < PFN_DOWN(crashk_res.start)) + return NOTIFY_OK; + return NOTIFY_BAD; +} + +static struct notifier_block kdump_mem_nb = { + .notifier_call = kdump_mem_notifier, +}; + +#endif + +/* + * Make sure that oldmem, where the dump is stored, is protected + */ +static void reserve_oldmem(void) +{ +#ifdef CONFIG_CRASH_DUMP + if (!OLDMEM_BASE) + return; + + reserve_kdump_bootmem(OLDMEM_BASE, OLDMEM_SIZE, CHUNK_OLDMEM); + reserve_kdump_bootmem(OLDMEM_SIZE, memory_end - OLDMEM_SIZE, + CHUNK_OLDMEM); + if (OLDMEM_BASE + OLDMEM_SIZE == real_memory_size) + saved_max_pfn = PFN_DOWN(OLDMEM_BASE) - 1; + else + saved_max_pfn = PFN_DOWN(real_memory_size) - 1; +#endif +} + +/* + * Reserve memory for kdump kernel to be loaded with kexec + */ +static void __init reserve_crashkernel(void) +{ +#ifdef CONFIG_CRASH_DUMP + unsigned long long crash_base, crash_size; + char *msg; + int rc; + + rc = parse_crashkernel(boot_command_line, memory_end, &crash_size, + &crash_base); + if (rc || crash_size == 0) + return; + crash_base = PAGE_ALIGN(crash_base); + crash_size = PAGE_ALIGN(crash_size); + if (register_memory_notifier(&kdump_mem_nb)) + return; + if (!crash_base) + crash_base = find_crash_base(crash_size, &msg); + if (!crash_base) { + pr_info("crashkernel reservation failed: %s\n", msg); + unregister_memory_notifier(&kdump_mem_nb); + return; + } + if (verify_crash_base(crash_base, crash_size, &msg)) { + pr_info("crashkernel reservation failed: %s\n", msg); + unregister_memory_notifier(&kdump_mem_nb); + return; + } + if (!OLDMEM_BASE && MACHINE_IS_VM) + diag10_range(PFN_DOWN(crash_base), PFN_DOWN(crash_size)); + crashk_res.start = crash_base; + crashk_res.end = crash_base + crash_size - 1; + insert_resource(&iomem_resource, &crashk_res); + reserve_kdump_bootmem(crash_base, crash_size, CHUNK_READ_WRITE); + pr_info("Reserving %lluMB of memory at %lluMB " + "for crashkernel (System RAM: %luMB)\n", + crash_size >> 20, crash_base >> 20, memory_end >> 20); +#endif +} + static void __init setup_memory(void) { @@ -580,6 +769,14 @@ setup_memory(void) if (PFN_PHYS(start_pfn) + bmap_size > INITRD_START) { start = PFN_PHYS(start_pfn) + bmap_size + PAGE_SIZE; +#ifdef CONFIG_CRASH_DUMP + if (OLDMEM_BASE) { + /* Move initrd behind kdump oldmem */ + if (start + INITRD_SIZE > OLDMEM_BASE && + start < OLDMEM_BASE + OLDMEM_SIZE) + start = OLDMEM_BASE + OLDMEM_SIZE; + } +#endif if (start + INITRD_SIZE > memory_end) { pr_err("initrd extends beyond end of " "memory (0x%08lx > 0x%08lx) " @@ -644,6 +841,15 @@ setup_memory(void) reserve_bootmem(start_pfn << PAGE_SHIFT, bootmap_size, BOOTMEM_DEFAULT); +#ifdef CONFIG_CRASH_DUMP + if (crashk_res.start) + reserve_bootmem(crashk_res.start, + crashk_res.end - crashk_res.start + 1, + BOOTMEM_DEFAULT); + if (is_kdump_kernel()) + reserve_bootmem(elfcorehdr_addr - OLDMEM_BASE, + PAGE_ALIGN(elfcorehdr_size), BOOTMEM_DEFAULT); +#endif #ifdef CONFIG_BLK_DEV_INITRD if (INITRD_START && INITRD_SIZE) { if (INITRD_START + INITRD_SIZE <= memory_end) { @@ -812,6 +1018,8 @@ setup_arch(char **cmdline_p) setup_ipl(); setup_memory_end(); setup_addressing_mode(); + reserve_oldmem(); + reserve_crashkernel(); setup_memory(); setup_resources(); setup_restart_psw(); diff --git a/arch/s390/kernel/smp.c b/arch/s390/kernel/smp.c index e4572601e91e..e3f51dfa5cad 100644 --- a/arch/s390/kernel/smp.c +++ b/arch/s390/kernel/smp.c @@ -38,6 +38,7 @@ #include #include #include +#include #include #include #include @@ -304,11 +305,13 @@ void smp_ctl_clear_bit(int cr, int bit) } EXPORT_SYMBOL(smp_ctl_clear_bit); -#ifdef CONFIG_ZFCPDUMP +#if defined(CONFIG_ZFCPDUMP) || defined(CONFIG_CRASH_DUMP) static void __init smp_get_save_area(unsigned int cpu, unsigned int phy_cpu) { - if (ipl_info.type != IPL_TYPE_FCP_DUMP) + if (ipl_info.type != IPL_TYPE_FCP_DUMP && !OLDMEM_BASE) + return; + if (is_kdump_kernel()) return; if (cpu >= NR_CPUS) { pr_warning("CPU %i exceeds the maximum %i and is excluded from " @@ -426,6 +429,18 @@ static void __init smp_detect_cpus(void) info = kmalloc(sizeof(*info), GFP_KERNEL); if (!info) panic("smp_detect_cpus failed to allocate memory\n"); +#ifdef CONFIG_CRASH_DUMP + if (OLDMEM_BASE && !is_kdump_kernel()) { + struct save_area *save_area; + + save_area = kmalloc(sizeof(*save_area), GFP_KERNEL); + if (!save_area) + panic("could not allocate memory for save area\n"); + copy_oldmem_page(1, (void *) save_area, sizeof(*save_area), + 0x200, 0); + zfcpdump_save_areas[0] = save_area; + } +#endif /* Use sigp detection algorithm if sclp doesn't work. */ if (sclp_get_cpu_info(info)) { smp_use_sigp_detection = 1; diff --git a/arch/s390/mm/vmem.c b/arch/s390/mm/vmem.c index 781ff5169560..4799383e2df9 100644 --- a/arch/s390/mm/vmem.c +++ b/arch/s390/mm/vmem.c @@ -335,6 +335,9 @@ void __init vmem_map_init(void) ro_start = ((unsigned long)&_stext) & PAGE_MASK; ro_end = PFN_ALIGN((unsigned long)&_eshared); for (i = 0; i < MEMORY_CHUNKS && memory_chunk[i].size > 0; i++) { + if (memory_chunk[i].type == CHUNK_CRASHK || + memory_chunk[i].type == CHUNK_OLDMEM) + continue; start = memory_chunk[i].addr; end = memory_chunk[i].addr + memory_chunk[i].size; if (start >= ro_end || end <= ro_start) @@ -368,6 +371,9 @@ static int __init vmem_convert_memory_chunk(void) for (i = 0; i < MEMORY_CHUNKS; i++) { if (!memory_chunk[i].size) continue; + if (memory_chunk[i].type == CHUNK_CRASHK || + memory_chunk[i].type == CHUNK_OLDMEM) + continue; seg = kzalloc(sizeof(*seg), GFP_KERNEL); if (!seg) panic("Out of memory...\n"); -- cgit v1.2.3 From d38593f9387055566b782d00d38d9a347a96e7d9 Mon Sep 17 00:00:00 2001 From: Michael Holzheu Date: Sun, 30 Oct 2011 15:16:42 +0100 Subject: [S390] Export vmcoreinfo note This patch defines for s390 an ABI defined pointer to the vmcoreinfo note at a well known address. With this patch tools are able to find this information in dumps created by stand-alone or hypervisor dump tools. Signed-off-by: Michael Holzheu Signed-off-by: Martin Schwidefsky --- arch/s390/include/asm/lowcore.h | 10 ++++++---- arch/s390/kernel/setup.c | 10 ++++++++++ 2 files changed, 16 insertions(+), 4 deletions(-) (limited to 'arch/s390') diff --git a/arch/s390/include/asm/lowcore.h b/arch/s390/include/asm/lowcore.h index e85c911aabf0..4f990a5e5b59 100644 --- a/arch/s390/include/asm/lowcore.h +++ b/arch/s390/include/asm/lowcore.h @@ -151,10 +151,11 @@ struct _lowcore { */ __u32 ipib; /* 0x0e00 */ __u32 ipib_checksum; /* 0x0e04 */ + __u32 vmcore_info; /* 0x0e08 */ /* 64 bit save area */ - __u64 save_area_64; /* 0x0e08 */ - __u8 pad_0x0e10[0x0f00-0x0e10]; /* 0x0e10 */ + __u64 save_area_64; /* 0x0e0c */ + __u8 pad_0x0e14[0x0f00-0x0e14]; /* 0x0e14 */ /* Extended facility list */ __u64 stfle_fac_list[32]; /* 0x0f00 */ @@ -290,10 +291,11 @@ struct _lowcore { */ __u64 ipib; /* 0x0e00 */ __u32 ipib_checksum; /* 0x0e08 */ + __u64 vmcore_info; /* 0x0e0c */ /* 64 bit save area */ - __u64 save_area_64; /* 0x0e0c */ - __u8 pad_0x0e14[0x0f00-0x0e14]; /* 0x0e14 */ + __u64 save_area_64; /* 0x0e14 */ + __u8 pad_0x0e1c[0x0f00-0x0e1c]; /* 0x0e1c */ /* Extended facility list */ __u64 stfle_fac_list[32]; /* 0x0f00 */ diff --git a/arch/s390/kernel/setup.c b/arch/s390/kernel/setup.c index b5a30412b2e5..63c81de665ec 100644 --- a/arch/s390/kernel/setup.c +++ b/arch/s390/kernel/setup.c @@ -558,6 +558,15 @@ static void __init setup_restart_psw(void) copy_to_absolute_zero(&S390_lowcore.restart_psw, &psw, sizeof(psw)); } +static void __init setup_vmcoreinfo(void) +{ +#ifdef CONFIG_KEXEC + unsigned long ptr = paddr_vmcoreinfo_note(); + + copy_to_absolute_zero(&S390_lowcore.vmcore_info, &ptr, sizeof(ptr)); +#endif +} + #ifdef CONFIG_CRASH_DUMP /* @@ -1022,6 +1031,7 @@ setup_arch(char **cmdline_p) reserve_crashkernel(); setup_memory(); setup_resources(); + setup_vmcoreinfo(); setup_restart_psw(); setup_lowcore(); -- cgit v1.2.3 From dab7a7b1538fec48790a321a58180adae79a3f3c Mon Sep 17 00:00:00 2001 From: Michael Holzheu Date: Sun, 30 Oct 2011 15:16:44 +0100 Subject: [S390] Add architecture code for unmapping crashkernel memory This patch implements the crash_map_pages() function for s390. KEXEC_CRASH_MEM_ALIGN is set to HPAGE_SIZE, in order to support kernel mappings that use large pages. We also use HPAGE_SIZE alignment for CONFIG_HUGETLB_PAGE=n in order to have the same 1 MiB alignment on all s390 systems. Signed-off-by: Michael Holzheu Signed-off-by: Martin Schwidefsky --- arch/s390/include/asm/kexec.h | 3 +++ arch/s390/kernel/machine_kexec.c | 31 +++++++++++++++++++++++++++++++ arch/s390/kernel/setup.c | 10 ++++++---- 3 files changed, 40 insertions(+), 4 deletions(-) (limited to 'arch/s390') diff --git a/arch/s390/include/asm/kexec.h b/arch/s390/include/asm/kexec.h index fb1c96fa348c..cf4e47b0948c 100644 --- a/arch/s390/include/asm/kexec.h +++ b/arch/s390/include/asm/kexec.h @@ -36,6 +36,9 @@ /* Allocate one page for the pdp and the second for the code */ #define KEXEC_CONTROL_PAGE_SIZE 4096 +/* Alignment of crashkernel memory */ +#define KEXEC_CRASH_MEM_ALIGN HPAGE_SIZE + /* The native architecture */ #define KEXEC_ARCH KEXEC_ARCH_S390 diff --git a/arch/s390/kernel/machine_kexec.c b/arch/s390/kernel/machine_kexec.c index 0ceac06a0299..13a0b528c70b 100644 --- a/arch/s390/kernel/machine_kexec.c +++ b/arch/s390/kernel/machine_kexec.c @@ -132,6 +132,37 @@ static int kdump_csum_valid(struct kimage *image) #endif } +/* + * Map or unmap crashkernel memory + */ +static void crash_map_pages(int enable) +{ + unsigned long size = resource_size(&crashk_res); + + BUG_ON(crashk_res.start % KEXEC_CRASH_MEM_ALIGN || + size % KEXEC_CRASH_MEM_ALIGN); + if (enable) + vmem_add_mapping(crashk_res.start, size); + else + vmem_remove_mapping(crashk_res.start, size); +} + +/* + * Map crashkernel memory + */ +void crash_map_reserved_pages(void) +{ + crash_map_pages(1); +} + +/* + * Unmap crashkernel memory + */ +void crash_unmap_reserved_pages(void) +{ + crash_map_pages(0); +} + /* * Give back memory to hypervisor before new kdump is loaded */ diff --git a/arch/s390/kernel/setup.c b/arch/s390/kernel/setup.c index 63c81de665ec..6f8e3777a0c8 100644 --- a/arch/s390/kernel/setup.c +++ b/arch/s390/kernel/setup.c @@ -446,6 +446,7 @@ static void __init setup_resources(void) res->flags = IORESOURCE_BUSY | IORESOURCE_MEM; switch (memory_chunk[i].type) { case CHUNK_READ_WRITE: + case CHUNK_CRASHK: res->name = "System RAM"; break; case CHUNK_READ_ONLY: @@ -720,8 +721,8 @@ static void __init reserve_crashkernel(void) &crash_base); if (rc || crash_size == 0) return; - crash_base = PAGE_ALIGN(crash_base); - crash_size = PAGE_ALIGN(crash_size); + crash_base = ALIGN(crash_base, KEXEC_CRASH_MEM_ALIGN); + crash_size = ALIGN(crash_size, KEXEC_CRASH_MEM_ALIGN); if (register_memory_notifier(&kdump_mem_nb)) return; if (!crash_base) @@ -741,7 +742,7 @@ static void __init reserve_crashkernel(void) crashk_res.start = crash_base; crashk_res.end = crash_base + crash_size - 1; insert_resource(&iomem_resource, &crashk_res); - reserve_kdump_bootmem(crash_base, crash_size, CHUNK_READ_WRITE); + reserve_kdump_bootmem(crash_base, crash_size, CHUNK_CRASHK); pr_info("Reserving %lluMB of memory at %lluMB " "for crashkernel (System RAM: %luMB)\n", crash_size >> 20, crash_base >> 20, memory_end >> 20); @@ -816,7 +817,8 @@ setup_memory(void) for (i = 0; i < MEMORY_CHUNKS && memory_chunk[i].size > 0; i++) { unsigned long start_chunk, end_chunk, pfn; - if (memory_chunk[i].type != CHUNK_READ_WRITE) + if (memory_chunk[i].type != CHUNK_READ_WRITE && + memory_chunk[i].type != CHUNK_CRASHK) continue; start_chunk = PFN_DOWN(memory_chunk[i].addr); end_chunk = start_chunk + PFN_DOWN(memory_chunk[i].size); -- cgit v1.2.3 From 0edc8faa769095e98a5a5adc28db982f99f0d663 Mon Sep 17 00:00:00 2001 From: Martin Schwidefsky Date: Sun, 30 Oct 2011 15:16:45 +0100 Subject: [S390] lowcore cleanup Remove the save_area_64 field from the 0xe00 - 0xf00 area in the lowcore. Use a free slot in the save_area array instead. Signed-off-by: Martin Schwidefsky --- arch/s390/include/asm/lowcore.h | 10 ++-------- arch/s390/kernel/asm-offsets.c | 1 - arch/s390/kernel/entry.S | 4 ++-- arch/s390/kernel/entry64.S | 4 ++-- arch/s390/kernel/reipl64.S | 4 ++-- 5 files changed, 8 insertions(+), 15 deletions(-) (limited to 'arch/s390') diff --git a/arch/s390/include/asm/lowcore.h b/arch/s390/include/asm/lowcore.h index 4f990a5e5b59..9e13c7d56cc1 100644 --- a/arch/s390/include/asm/lowcore.h +++ b/arch/s390/include/asm/lowcore.h @@ -152,10 +152,7 @@ struct _lowcore { __u32 ipib; /* 0x0e00 */ __u32 ipib_checksum; /* 0x0e04 */ __u32 vmcore_info; /* 0x0e08 */ - - /* 64 bit save area */ - __u64 save_area_64; /* 0x0e0c */ - __u8 pad_0x0e14[0x0f00-0x0e14]; /* 0x0e14 */ + __u8 pad_0x0e0c[0x0f00-0x0e0c]; /* 0x0e0c */ /* Extended facility list */ __u64 stfle_fac_list[32]; /* 0x0f00 */ @@ -292,10 +289,7 @@ struct _lowcore { __u64 ipib; /* 0x0e00 */ __u32 ipib_checksum; /* 0x0e08 */ __u64 vmcore_info; /* 0x0e0c */ - - /* 64 bit save area */ - __u64 save_area_64; /* 0x0e14 */ - __u8 pad_0x0e1c[0x0f00-0x0e1c]; /* 0x0e1c */ + __u8 pad_0x0e14[0x0f00-0x0e14]; /* 0x0e14 */ /* Extended facility list */ __u64 stfle_fac_list[32]; /* 0x0f00 */ diff --git a/arch/s390/kernel/asm-offsets.c b/arch/s390/kernel/asm-offsets.c index 2b45591e1582..1140035b1cd8 100644 --- a/arch/s390/kernel/asm-offsets.c +++ b/arch/s390/kernel/asm-offsets.c @@ -141,7 +141,6 @@ int main(void) DEFINE(__LC_FPREGS_SAVE_AREA, offsetof(struct _lowcore, floating_pt_save_area)); DEFINE(__LC_GPREGS_SAVE_AREA, offsetof(struct _lowcore, gpregs_save_area)); DEFINE(__LC_CREGS_SAVE_AREA, offsetof(struct _lowcore, cregs_save_area)); - DEFINE(__LC_SAVE_AREA_64, offsetof(struct _lowcore, save_area_64)); #ifdef CONFIG_32BIT DEFINE(SAVE_AREA_BASE, offsetof(struct _lowcore, extended_save_area_addr)); #else /* CONFIG_32BIT */ diff --git a/arch/s390/kernel/entry.S b/arch/s390/kernel/entry.S index 02ec8fe7d03f..195387ab7c98 100644 --- a/arch/s390/kernel/entry.S +++ b/arch/s390/kernel/entry.S @@ -853,13 +853,13 @@ restart_go: # PSW restart interrupt handler # ENTRY(psw_restart_int_handler) - st %r15,__LC_SAVE_AREA_64(%r0) # save r15 + st %r15,__LC_SAVE_AREA+48(%r0) # save r15 basr %r15,0 0: l %r15,.Lrestart_stack-0b(%r15) # load restart stack l %r15,0(%r15) ahi %r15,-SP_SIZE # make room for pt_regs stm %r0,%r14,SP_R0(%r15) # store gprs %r0-%r14 to stack - mvc SP_R15(4,%r15),__LC_SAVE_AREA_64(%r0)# store saved %r15 to stack + mvc SP_R15(4,%r15),__LC_SAVE_AREA+48(%r0)# store saved %r15 to stack mvc SP_PSW(8,%r15),__LC_RST_OLD_PSW(%r0) # store restart old psw xc __SF_BACKCHAIN(4,%r15),__SF_BACKCHAIN(%r15) # set backchain to 0 basr %r14,0 diff --git a/arch/s390/kernel/entry64.S b/arch/s390/kernel/entry64.S index 713da0760538..3257f7f55551 100644 --- a/arch/s390/kernel/entry64.S +++ b/arch/s390/kernel/entry64.S @@ -869,12 +869,12 @@ restart_go: # PSW restart interrupt handler # ENTRY(psw_restart_int_handler) - stg %r15,__LC_SAVE_AREA_64(%r0) # save r15 + stg %r15,__LC_SAVE_AREA+120(%r0) # save r15 larl %r15,restart_stack # load restart stack lg %r15,0(%r15) aghi %r15,-SP_SIZE # make room for pt_regs stmg %r0,%r14,SP_R0(%r15) # store gprs %r0-%r14 to stack - mvc SP_R15(8,%r15),__LC_SAVE_AREA_64(%r0)# store saved %r15 to stack + mvc SP_R15(8,%r15),__LC_SAVE_AREA+120(%r0)# store saved %r15 to stack mvc SP_PSW(16,%r15),__LC_RST_OLD_PSW(%r0)# store restart old psw xc __SF_BACKCHAIN(8,%r15),__SF_BACKCHAIN(%r15) # set backchain to 0 brasl %r14,do_restart diff --git a/arch/s390/kernel/reipl64.S b/arch/s390/kernel/reipl64.S index a0f5b686a3cd..732a793ec53a 100644 --- a/arch/s390/kernel/reipl64.S +++ b/arch/s390/kernel/reipl64.S @@ -17,11 +17,11 @@ # ENTRY(store_status) /* Save register one and load save area base */ - stg %r1,__LC_SAVE_AREA_64(%r0) + stg %r1,__LC_SAVE_AREA+120(%r0) lghi %r1,SAVE_AREA_BASE /* General purpose registers */ stmg %r0,%r15,__LC_GPREGS_SAVE_AREA-SAVE_AREA_BASE(%r1) - lg %r2,__LC_SAVE_AREA_64(%r0) + lg %r2,__LC_SAVE_AREA+120(%r0) stg %r2,__LC_GPREGS_SAVE_AREA-SAVE_AREA_BASE+8(%r1) /* Control registers */ stctg %c0,%c15,__LC_CREGS_SAVE_AREA-SAVE_AREA_BASE(%r1) -- cgit v1.2.3 From 3ee49c8f123257c72b74f398ef99ac3348c493cc Mon Sep 17 00:00:00 2001 From: Hendrik Brueckner Date: Sun, 30 Oct 2011 15:16:46 +0100 Subject: [S390] defconfig: switch on CONFIG_DEVTMPFS Switching on the DEVTMPFS kernel option helpes to maintain a /dev file system early in the boot process, especially, in limited environments. Signed-off-by: Hendrik Brueckner Signed-off-by: Martin Schwidefsky --- arch/s390/defconfig | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'arch/s390') diff --git a/arch/s390/defconfig b/arch/s390/defconfig index 29c82c640a88..6cf8e26b3137 100644 --- a/arch/s390/defconfig +++ b/arch/s390/defconfig @@ -68,7 +68,7 @@ CONFIG_NET_CLS_RSVP6=m CONFIG_NET_CLS_ACT=y CONFIG_NET_ACT_POLICE=y CONFIG_UEVENT_HELPER_PATH="/sbin/hotplug" -# CONFIG_FIRMWARE_IN_KERNEL is not set +CONFIG_DEVTMPFS=y CONFIG_BLK_DEV_LOOP=m CONFIG_BLK_DEV_NBD=m CONFIG_BLK_DEV_RAM=y -- cgit v1.2.3 From 20b40a794baf3b4b0320c0a77ce944d5d1a01f25 Mon Sep 17 00:00:00 2001 From: Martin Schwidefsky Date: Sun, 30 Oct 2011 15:16:47 +0100 Subject: [S390] signal race with restarting system calls For a ERESTARTNOHAND/ERESTARTSYS/ERESTARTNOINTR restarting system call do_signal will prepare the restart of the system call with a rewind of the PSW before calling get_signal_to_deliver (where the debugger might take control). For A ERESTART_RESTARTBLOCK restarting system call do_signal will set -EINTR as return code. There are two issues with this approach: 1) strace never sees ERESTARTNOHAND, ERESTARTSYS, ERESTARTNOINTR or ERESTART_RESTARTBLOCK as the rewinding already took place or the return code has been changed to -EINTR 2) if get_signal_to_deliver does not return with a signal to deliver the restart via the repeat of the svc instruction is left in place. This opens a race if another signal is made pending before the system call instruction can be reexecuted. The original system call will be restarted even if the second signal would have ended the system call with -EINTR. These two issues can be solved by dropping the early rewind of the system call before get_signal_to_deliver has been called and by using the TIF_RESTART_SVC magic to do the restart if no signal has to be delivered. The only situation where the system call restart via the repeat of the svc instruction is appropriate is when a SA_RESTART signal is delivered to user space. Unfortunately this breaks inferior calls by the debugger again. The system call number and the length of the system call instruction is lost over the inferior call and user space will see ERESTARTNOHAND/ ERESTARTSYS/ERESTARTNOINTR/ERESTART_RESTARTBLOCK. To correct this a new ptrace interface is added to save/restore the system call number and system call instruction length. Signed-off-by: Martin Schwidefsky --- arch/s390/include/asm/ptrace.h | 5 +- arch/s390/include/asm/syscall.h | 5 +- arch/s390/include/asm/thread_info.h | 1 + arch/s390/kernel/asm-offsets.c | 3 +- arch/s390/kernel/compat_signal.c | 2 +- arch/s390/kernel/entry.S | 28 +++++----- arch/s390/kernel/entry64.S | 30 +++++----- arch/s390/kernel/ptrace.c | 51 ++++++++++++++++- arch/s390/kernel/signal.c | 107 ++++++++++++++++++------------------ 9 files changed, 141 insertions(+), 91 deletions(-) (limited to 'arch/s390') diff --git a/arch/s390/include/asm/ptrace.h b/arch/s390/include/asm/ptrace.h index 62fd80c9e98c..93c907b4776f 100644 --- a/arch/s390/include/asm/ptrace.h +++ b/arch/s390/include/asm/ptrace.h @@ -328,8 +328,7 @@ struct pt_regs psw_t psw; unsigned long gprs[NUM_GPRS]; unsigned long orig_gpr2; - unsigned short ilc; - unsigned short svcnr; + unsigned int svc_code; }; /* @@ -487,6 +486,8 @@ typedef struct #define PTRACE_POKETEXT_AREA 0x5004 #define PTRACE_POKEDATA_AREA 0x5005 #define PTRACE_GET_LAST_BREAK 0x5006 +#define PTRACE_PEEK_SYSTEM_CALL 0x5007 +#define PTRACE_POKE_SYSTEM_CALL 0x5008 /* * PT_PROT definition is loosely based on hppa bsd definition in diff --git a/arch/s390/include/asm/syscall.h b/arch/s390/include/asm/syscall.h index 5c0246b955d8..614267f60713 100644 --- a/arch/s390/include/asm/syscall.h +++ b/arch/s390/include/asm/syscall.h @@ -13,6 +13,7 @@ #define _ASM_SYSCALL_H 1 #include +#include #include /* @@ -25,7 +26,7 @@ extern const unsigned int sys_call_table[]; static inline long syscall_get_nr(struct task_struct *task, struct pt_regs *regs) { - return regs->svcnr ? regs->svcnr : -1; + return regs->svc_code ? (regs->svc_code & 0xffff) : -1; } static inline void syscall_rollback(struct task_struct *task, @@ -37,7 +38,7 @@ static inline void syscall_rollback(struct task_struct *task, static inline long syscall_get_error(struct task_struct *task, struct pt_regs *regs) { - return (regs->gprs[2] >= -4096UL) ? -regs->gprs[2] : 0; + return IS_ERR_VALUE(regs->gprs[2]) ? regs->gprs[2] : 0; } static inline long syscall_get_return_value(struct task_struct *task, diff --git a/arch/s390/include/asm/thread_info.h b/arch/s390/include/asm/thread_info.h index f9a9a10979c9..0c4788eb5a65 100644 --- a/arch/s390/include/asm/thread_info.h +++ b/arch/s390/include/asm/thread_info.h @@ -48,6 +48,7 @@ struct thread_info { unsigned int cpu; /* current CPU */ int preempt_count; /* 0 => preemptable, <0 => BUG */ struct restart_block restart_block; + unsigned int system_call; __u64 user_timer; __u64 system_timer; unsigned long last_break; /* last breaking-event-address. */ diff --git a/arch/s390/kernel/asm-offsets.c b/arch/s390/kernel/asm-offsets.c index 1140035b1cd8..751318765e2e 100644 --- a/arch/s390/kernel/asm-offsets.c +++ b/arch/s390/kernel/asm-offsets.c @@ -45,8 +45,7 @@ int main(void) DEFINE(__PT_PSW, offsetof(struct pt_regs, psw)); DEFINE(__PT_GPRS, offsetof(struct pt_regs, gprs)); DEFINE(__PT_ORIG_GPR2, offsetof(struct pt_regs, orig_gpr2)); - DEFINE(__PT_ILC, offsetof(struct pt_regs, ilc)); - DEFINE(__PT_SVCNR, offsetof(struct pt_regs, svcnr)); + DEFINE(__PT_SVC_CODE, offsetof(struct pt_regs, svc_code)); DEFINE(__PT_SIZE, sizeof(struct pt_regs)); BLANK(); DEFINE(__SF_BACKCHAIN, offsetof(struct stack_frame, back_chain)); diff --git a/arch/s390/kernel/compat_signal.c b/arch/s390/kernel/compat_signal.c index a9a285b8c4ad..d7c8e54c32e7 100644 --- a/arch/s390/kernel/compat_signal.c +++ b/arch/s390/kernel/compat_signal.c @@ -342,7 +342,7 @@ static int restore_sigregs32(struct pt_regs *regs,_sigregs32 __user *sregs) return err; restore_fp_regs(¤t->thread.fp_regs); - regs->svcnr = 0; /* disable syscall checks */ + regs->svc_code = 0; /* disable syscall checks */ return 0; } diff --git a/arch/s390/kernel/entry.S b/arch/s390/kernel/entry.S index 195387ab7c98..afe3685d30e7 100644 --- a/arch/s390/kernel/entry.S +++ b/arch/s390/kernel/entry.S @@ -43,8 +43,7 @@ SP_R13 = STACK_FRAME_OVERHEAD + __PT_GPRS + 52 SP_R14 = STACK_FRAME_OVERHEAD + __PT_GPRS + 56 SP_R15 = STACK_FRAME_OVERHEAD + __PT_GPRS + 60 SP_ORIG_R2 = STACK_FRAME_OVERHEAD + __PT_ORIG_GPR2 -SP_ILC = STACK_FRAME_OVERHEAD + __PT_ILC -SP_SVCNR = STACK_FRAME_OVERHEAD + __PT_SVCNR +SP_SVC_CODE = STACK_FRAME_OVERHEAD + __PT_SVC_CODE SP_SIZE = STACK_FRAME_OVERHEAD + __PT_SIZE _TIF_WORK_SVC = (_TIF_SIGPENDING | _TIF_NOTIFY_RESUME | _TIF_NEED_RESCHED | \ @@ -229,7 +228,7 @@ sysc_saveall: SAVE_ALL_SVC __LC_SVC_OLD_PSW,__LC_SAVE_AREA CREATE_STACK_FRAME __LC_SAVE_AREA mvc SP_PSW(8,%r15),__LC_SVC_OLD_PSW - mvc SP_ILC(4,%r15),__LC_SVC_ILC + mvc SP_SVC_CODE(4,%r15),__LC_SVC_ILC l %r12,__LC_THREAD_INFO # load pointer to thread_info struct sysc_vtime: UPDATE_VTIME __LC_EXIT_TIMER,__LC_SYNC_ENTER_TIMER,__LC_USER_TIMER @@ -239,12 +238,12 @@ sysc_update: mvc __LC_LAST_UPDATE_TIMER(8),__LC_SYNC_ENTER_TIMER sysc_do_svc: xr %r7,%r7 - icm %r7,3,SP_SVCNR(%r15) # load svc number and test for svc 0 + icm %r7,3,SP_SVC_CODE+2(%r15)# load svc number and test for svc 0 bnz BASED(sysc_nr_ok) # svc number > 0 # svc 0: system call number in %r1 cl %r1,BASED(.Lnr_syscalls) bnl BASED(sysc_nr_ok) - sth %r1,SP_SVCNR(%r15) + sth %r1,SP_SVC_CODE+2(%r15) lr %r7,%r1 # copy svc number to %r7 sysc_nr_ok: sll %r7,2 # svc number *4 @@ -335,10 +334,11 @@ sysc_notify_resume: # sysc_restart: ni __TI_flags+3(%r12),255-_TIF_RESTART_SVC # clear TIF_RESTART_SVC - l %r7,SP_R2(%r15) # load new svc number - mvc SP_R2(4,%r15),SP_ORIG_R2(%r15) # restore first argument lm %r2,%r6,SP_R2(%r15) # load svc arguments - sth %r7,SP_SVCNR(%r15) + xr %r7,%r7 # svc 0 returns -ENOSYS + clc SP_SVC_CODE+2(%r15),BASED(.Lnr_syscalls+2) + bnl BASED(sysc_nr_ok) # invalid svc number -> do svc 0 + icm %r7,3,SP_SVC_CODE+2(%r15)# load new svc number b BASED(sysc_nr_ok) # restart svc # @@ -346,7 +346,7 @@ sysc_restart: # sysc_singlestep: ni __TI_flags+3(%r12),255-_TIF_PER_TRAP # clear TIF_PER_TRAP - xc SP_SVCNR(2,%r15),SP_SVCNR(%r15) # clear svc number + xc SP_SVC_CODE(4,%r15),SP_SVC_CODE(%r15) # clear svc code la %r2,SP_PTREGS(%r15) # address of register-save area l %r1,BASED(.Lhandle_per) # load adr. of per handler la %r14,BASED(sysc_return) # load adr. of system return @@ -361,7 +361,7 @@ sysc_tracesys: la %r2,SP_PTREGS(%r15) # load pt_regs la %r3,0 xr %r0,%r0 - icm %r0,3,SP_SVCNR(%r15) + icm %r0,3,SP_SVC_CODE(%r15) st %r0,SP_R2(%r15) basr %r14,%r1 cl %r2,BASED(.Lnr_syscalls) @@ -454,7 +454,7 @@ ENTRY(pgm_check_handler) bnz BASED(pgm_per) # got per exception -> special case SAVE_ALL_PGM __LC_PGM_OLD_PSW,__LC_SAVE_AREA CREATE_STACK_FRAME __LC_SAVE_AREA - xc SP_ILC(4,%r15),SP_ILC(%r15) + xc SP_SVC_CODE(4,%r15),SP_SVC_CODE(%r15) mvc SP_PSW(8,%r15),__LC_PGM_OLD_PSW l %r12,__LC_THREAD_INFO # load pointer to thread_info struct tm SP_PSW+1(%r15),0x01 # interrupting from user ? @@ -531,7 +531,7 @@ pgm_svcper: SAVE_ALL_PGM __LC_SVC_OLD_PSW,__LC_SAVE_AREA CREATE_STACK_FRAME __LC_SAVE_AREA mvc SP_PSW(8,%r15),__LC_SVC_OLD_PSW - mvc SP_ILC(4,%r15),__LC_SVC_ILC + mvc SP_SVC_CODE(4,%r15),__LC_SVC_ILC l %r12,__LC_THREAD_INFO # load pointer to thread_info struct UPDATE_VTIME __LC_EXIT_TIMER,__LC_SYNC_ENTER_TIMER,__LC_USER_TIMER UPDATE_VTIME __LC_LAST_UPDATE_TIMER,__LC_EXIT_TIMER,__LC_SYSTEM_TIMER @@ -550,7 +550,7 @@ pgm_svcper: # kernel_per: REENABLE_IRQS - xc SP_SVCNR(2,%r15),SP_SVCNR(%r15) + xc SP_SVC_CODE(4,%r15),SP_SVC_CODE(%r15) la %r2,SP_PTREGS(%r15) # address of register-save area l %r1,BASED(.Lhandle_per) # load adr. of per handler basr %r14,%r1 # branch to do_single_step @@ -966,7 +966,7 @@ cleanup_system_call: st %r15,12(%r12) CREATE_STACK_FRAME __LC_SAVE_AREA mvc SP_PSW(8,%r15),__LC_SVC_OLD_PSW - mvc SP_ILC(4,%r15),__LC_SVC_ILC + mvc SP_SVC_CODE(4,%r15),__LC_SVC_ILC mvc 0(4,%r12),__LC_THREAD_INFO cleanup_vtime: clc __LC_RETURN_PSW+4(4),BASED(cleanup_system_call_insn+12) diff --git a/arch/s390/kernel/entry64.S b/arch/s390/kernel/entry64.S index 3257f7f55551..7ff07d3a29c1 100644 --- a/arch/s390/kernel/entry64.S +++ b/arch/s390/kernel/entry64.S @@ -43,8 +43,7 @@ SP_R13 = STACK_FRAME_OVERHEAD + __PT_GPRS + 104 SP_R14 = STACK_FRAME_OVERHEAD + __PT_GPRS + 112 SP_R15 = STACK_FRAME_OVERHEAD + __PT_GPRS + 120 SP_ORIG_R2 = STACK_FRAME_OVERHEAD + __PT_ORIG_GPR2 -SP_ILC = STACK_FRAME_OVERHEAD + __PT_ILC -SP_SVCNR = STACK_FRAME_OVERHEAD + __PT_SVCNR +SP_SVC_CODE = STACK_FRAME_OVERHEAD + __PT_SVC_CODE SP_SIZE = STACK_FRAME_OVERHEAD + __PT_SIZE STACK_SHIFT = PAGE_SHIFT + THREAD_ORDER @@ -250,7 +249,7 @@ sysc_saveall: SAVE_ALL_SVC __LC_SVC_OLD_PSW,__LC_SAVE_AREA CREATE_STACK_FRAME __LC_SAVE_AREA mvc SP_PSW(16,%r15),__LC_SVC_OLD_PSW - mvc SP_ILC(4,%r15),__LC_SVC_ILC + mvc SP_SVC_CODE(4,%r15),__LC_SVC_ILC lg %r12,__LC_THREAD_INFO # load pointer to thread_info struct sysc_vtime: UPDATE_VTIME __LC_EXIT_TIMER,__LC_SYNC_ENTER_TIMER,__LC_USER_TIMER @@ -260,14 +259,14 @@ sysc_update: mvc __LC_LAST_UPDATE_TIMER(8),__LC_SYNC_ENTER_TIMER LAST_BREAK sysc_do_svc: - llgh %r7,SP_SVCNR(%r15) + llgh %r7,SP_SVC_CODE+2(%r15) slag %r7,%r7,2 # shift and test for svc 0 jnz sysc_nr_ok # svc 0: system call number in %r1 llgfr %r1,%r1 # clear high word in r1 cghi %r1,NR_syscalls jnl sysc_nr_ok - sth %r1,SP_SVCNR(%r15) + sth %r1,SP_SVC_CODE+2(%r15) slag %r7,%r1,2 # shift and test for svc 0 sysc_nr_ok: larl %r10,sys_call_table @@ -358,11 +357,12 @@ sysc_notify_resume: # sysc_restart: ni __TI_flags+7(%r12),255-_TIF_RESTART_SVC # clear TIF_RESTART_SVC - lg %r7,SP_R2(%r15) # load new svc number - mvc SP_R2(8,%r15),SP_ORIG_R2(%r15) # restore first argument lmg %r2,%r6,SP_R2(%r15) # load svc arguments - sth %r7,SP_SVCNR(%r15) - slag %r7,%r7,2 + lghi %r7,0 # svc 0 returns -ENOSYS + lh %r1,SP_SVC_CODE+2(%r15) # load new svc number + cghi %r1,NR_syscalls + jnl sysc_nr_ok # invalid svc number -> do svc 0 + slag %r7,%r1,2 j sysc_nr_ok # restart svc # @@ -370,7 +370,7 @@ sysc_restart: # sysc_singlestep: ni __TI_flags+7(%r12),255-_TIF_PER_TRAP # clear TIF_PER_TRAP - xc SP_SVCNR(2,%r15),SP_SVCNR(%r15) # clear svc number + xc SP_SVC_CODE(4,%r15),SP_SVC_CODE(%r15) # clear svc code la %r2,SP_PTREGS(%r15) # address of register-save area larl %r14,sysc_return # load adr. of system return jg do_per_trap @@ -382,7 +382,7 @@ sysc_singlestep: sysc_tracesys: la %r2,SP_PTREGS(%r15) # load pt_regs la %r3,0 - llgh %r0,SP_SVCNR(%r15) + llgh %r0,SP_SVC_CODE+2(%r15) stg %r0,SP_R2(%r15) brasl %r14,do_syscall_trace_enter lghi %r0,NR_syscalls @@ -470,7 +470,7 @@ ENTRY(pgm_check_handler) jnz pgm_per # got per exception -> special case SAVE_ALL_PGM __LC_PGM_OLD_PSW,__LC_SAVE_AREA CREATE_STACK_FRAME __LC_SAVE_AREA - xc SP_ILC(4,%r15),SP_ILC(%r15) + xc SP_SVC_CODE(4,%r15),SP_SVC_CODE(%r15) mvc SP_PSW(16,%r15),__LC_PGM_OLD_PSW lg %r12,__LC_THREAD_INFO # load pointer to thread_info struct HANDLE_SIE_INTERCEPT @@ -551,7 +551,7 @@ pgm_svcper: SAVE_ALL_PGM __LC_SVC_OLD_PSW,__LC_SAVE_AREA CREATE_STACK_FRAME __LC_SAVE_AREA mvc SP_PSW(16,%r15),__LC_SVC_OLD_PSW - mvc SP_ILC(4,%r15),__LC_SVC_ILC + mvc SP_SVC_CODE(4,%r15),__LC_SVC_ILC lg %r12,__LC_THREAD_INFO # load pointer to thread_info struct UPDATE_VTIME __LC_EXIT_TIMER,__LC_SYNC_ENTER_TIMER,__LC_USER_TIMER UPDATE_VTIME __LC_LAST_UPDATE_TIMER,__LC_EXIT_TIMER,__LC_SYSTEM_TIMER @@ -571,7 +571,7 @@ pgm_svcper: # kernel_per: REENABLE_IRQS - xc SP_SVCNR(2,%r15),SP_SVCNR(%r15) # clear svc number + xc SP_SVC_CODE(4,%r15),SP_SVC_CODE(%r15) # clear svc number la %r2,SP_PTREGS(%r15) # address of register-save area brasl %r14,do_per_trap j pgm_exit @@ -973,7 +973,7 @@ cleanup_system_call: stg %r11,0(%r12) CREATE_STACK_FRAME __LC_SAVE_AREA mvc SP_PSW(16,%r15),__LC_SVC_OLD_PSW - mvc SP_ILC(4,%r15),__LC_SVC_ILC + mvc SP_SVC_CODE(4,%r15),__LC_SVC_ILC mvc 8(8,%r12),__LC_THREAD_INFO cleanup_vtime: clc __LC_RETURN_PSW+8(8),BASED(cleanup_system_call_insn+24) diff --git a/arch/s390/kernel/ptrace.c b/arch/s390/kernel/ptrace.c index ae0e14b8880c..bae1cc49fe96 100644 --- a/arch/s390/kernel/ptrace.c +++ b/arch/s390/kernel/ptrace.c @@ -42,6 +42,7 @@ enum s390_regset { REGSET_GENERAL, REGSET_FP, REGSET_LAST_BREAK, + REGSET_SYSTEM_CALL, REGSET_GENERAL_EXTENDED, }; @@ -303,6 +304,13 @@ static int __poke_user(struct task_struct *child, addr_t addr, addr_t data) high order bit but older gdb's rely on it */ data |= PSW_ADDR_AMODE; #endif + if (addr == (addr_t) &dummy->regs.psw.addr) + /* + * The debugger changed the instruction address, + * reset system call restart, see signal.c:do_signal + */ + task_thread_info(child)->system_call = 0; + *(addr_t *)((addr_t) &task_pt_regs(child)->psw + addr) = data; } else if (addr < (addr_t) (&dummy->regs.orig_gpr2)) { @@ -610,6 +618,11 @@ static int __poke_user_compat(struct task_struct *child, /* Build a 64 bit psw address from 31 bit address. */ task_pt_regs(child)->psw.addr = (__u64) tmp & PSW32_ADDR_INSN; + /* + * The debugger changed the instruction address, + * reset system call restart, see signal.c:do_signal + */ + task_thread_info(child)->system_call = 0; } else { /* gpr 0-15 */ *(__u32*)((addr_t) &task_pt_regs(child)->psw @@ -737,7 +750,7 @@ asmlinkage long do_syscall_trace_enter(struct pt_regs *regs) * debugger stored an invalid system call number. Skip * the system call and the system call restart handling. */ - regs->svcnr = 0; + regs->svc_code = 0; ret = -1; } @@ -899,6 +912,26 @@ static int s390_last_break_get(struct task_struct *target, #endif +static int s390_system_call_get(struct task_struct *target, + const struct user_regset *regset, + unsigned int pos, unsigned int count, + void *kbuf, void __user *ubuf) +{ + unsigned int *data = &task_thread_info(target)->system_call; + return user_regset_copyout(&pos, &count, &kbuf, &ubuf, + data, 0, sizeof(unsigned int)); +} + +static int s390_system_call_set(struct task_struct *target, + const struct user_regset *regset, + unsigned int pos, unsigned int count, + const void *kbuf, const void __user *ubuf) +{ + unsigned int *data = &task_thread_info(target)->system_call; + return user_regset_copyin(&pos, &count, &kbuf, &ubuf, + data, 0, sizeof(unsigned int)); +} + static const struct user_regset s390_regsets[] = { [REGSET_GENERAL] = { .core_note_type = NT_PRSTATUS, @@ -925,6 +958,14 @@ static const struct user_regset s390_regsets[] = { .get = s390_last_break_get, }, #endif + [REGSET_SYSTEM_CALL] = { + .core_note_type = NT_S390_SYSTEM_CALL, + .n = 1, + .size = sizeof(unsigned int), + .align = sizeof(unsigned int), + .get = s390_system_call_get, + .set = s390_system_call_set, + }, }; static const struct user_regset_view user_s390_view = { @@ -1104,6 +1145,14 @@ static const struct user_regset s390_compat_regsets[] = { .align = sizeof(long), .get = s390_compat_last_break_get, }, + [REGSET_SYSTEM_CALL] = { + .core_note_type = NT_S390_SYSTEM_CALL, + .n = 1, + .size = sizeof(compat_uint_t), + .align = sizeof(compat_uint_t), + .get = s390_system_call_get, + .set = s390_system_call_set, + }, [REGSET_GENERAL_EXTENDED] = { .core_note_type = NT_S390_HIGH_GPRS, .n = sizeof(s390_compat_regs_high) / sizeof(compat_long_t), diff --git a/arch/s390/kernel/signal.c b/arch/s390/kernel/signal.c index 9a40e1cc5ec3..e751cab80e04 100644 --- a/arch/s390/kernel/signal.c +++ b/arch/s390/kernel/signal.c @@ -30,6 +30,7 @@ #include #include #include +#include #include "entry.h" #define _BLOCKABLE (~(sigmask(SIGKILL) | sigmask(SIGSTOP))) @@ -156,7 +157,7 @@ static int restore_sigregs(struct pt_regs *regs, _sigregs __user *sregs) current->thread.fp_regs.fpc &= FPC_VALID_MASK; restore_fp_regs(¤t->thread.fp_regs); - regs->svcnr = 0; /* disable syscall checks */ + regs->svc_code = 0; /* disable syscall checks */ return 0; } @@ -401,7 +402,6 @@ static int handle_signal(unsigned long sig, struct k_sigaction *ka, */ void do_signal(struct pt_regs *regs) { - unsigned long retval = 0, continue_addr = 0, restart_addr = 0; siginfo_t info; int signr; struct k_sigaction ka; @@ -421,54 +421,43 @@ void do_signal(struct pt_regs *regs) else oldset = ¤t->blocked; - /* Are we from a system call? */ - if (regs->svcnr) { - continue_addr = regs->psw.addr; - restart_addr = continue_addr - regs->ilc; - retval = regs->gprs[2]; - - /* Prepare for system call restart. We do this here so that a - debugger will see the already changed PSW. */ - switch (retval) { - case -ERESTARTNOHAND: - case -ERESTARTSYS: - case -ERESTARTNOINTR: - regs->gprs[2] = regs->orig_gpr2; - regs->psw.addr = restart_addr; - break; - case -ERESTART_RESTARTBLOCK: - regs->gprs[2] = -EINTR; - } - regs->svcnr = 0; /* Don't deal with this again. */ - } - - /* Get signal to deliver. When running under ptrace, at this point - the debugger may change all our registers ... */ + /* + * Get signal to deliver. When running under ptrace, at this point + * the debugger may change all our registers, including the system + * call information. + */ + current_thread_info()->system_call = regs->svc_code; signr = get_signal_to_deliver(&info, &ka, regs, NULL); - - /* Depending on the signal settings we may need to revert the - decision to restart the system call. */ - if (signr > 0 && regs->psw.addr == restart_addr) { - if (retval == -ERESTARTNOHAND - || (retval == -ERESTARTSYS - && !(current->sighand->action[signr-1].sa.sa_flags - & SA_RESTART))) { - regs->gprs[2] = -EINTR; - regs->psw.addr = continue_addr; - } - } + regs->svc_code = current_thread_info()->system_call; if (signr > 0) { /* Whee! Actually deliver the signal. */ - int ret; -#ifdef CONFIG_COMPAT - if (is_compat_task()) { - ret = handle_signal32(signr, &ka, &info, oldset, regs); - } - else -#endif - ret = handle_signal(signr, &ka, &info, oldset, regs); - if (!ret) { + if (regs->svc_code > 0) { + /* Check for system call restarting. */ + switch (regs->gprs[2]) { + case -ERESTART_RESTARTBLOCK: + case -ERESTARTNOHAND: + regs->gprs[2] = -EINTR; + break; + case -ERESTARTSYS: + if (!(ka.sa.sa_flags & SA_RESTART)) { + regs->gprs[2] = -EINTR; + break; + } + /* fallthrough */ + case -ERESTARTNOINTR: + regs->gprs[2] = regs->orig_gpr2; + regs->psw.addr = regs->psw.addr - + (regs->svc_code >> 16); + break; + } + /* No longer in a system call */ + regs->svc_code = 0; + } + + if ((is_compat_task() ? + handle_signal32(signr, &ka, &info, oldset, regs) : + handle_signal(signr, &ka, &info, oldset, regs)) == 0) { /* * A signal was successfully delivered; the saved * sigmask will have been stored in the signal frame, @@ -482,11 +471,28 @@ void do_signal(struct pt_regs *regs) * Let tracing know that we've done the handler setup. */ tracehook_signal_handler(signr, &info, &ka, regs, - test_thread_flag(TIF_SINGLE_STEP)); + test_thread_flag(TIF_SINGLE_STEP)); } return; } + /* No handlers present - check for system call restart */ + if (regs->svc_code > 0) { + switch (regs->gprs[2]) { + case -ERESTART_RESTARTBLOCK: + /* Restart with sys_restart_syscall */ + regs->svc_code = __NR_restart_syscall; + /* fallthrough */ + case -ERESTARTNOHAND: + case -ERESTARTSYS: + case -ERESTARTNOINTR: + /* Restart system call with magic TIF bit. */ + regs->gprs[2] = regs->orig_gpr2; + set_thread_flag(TIF_RESTART_SVC); + break; + } + } + /* * If there's no signal to deliver, we just put the saved sigmask back. */ @@ -494,13 +500,6 @@ void do_signal(struct pt_regs *regs) clear_thread_flag(TIF_RESTORE_SIGMASK); sigprocmask(SIG_SETMASK, ¤t->saved_sigmask, NULL); } - - /* Restart a different system call. */ - if (retval == -ERESTART_RESTARTBLOCK - && regs->psw.addr == continue_addr) { - regs->gprs[2] = __NR_restart_syscall; - set_thread_flag(TIF_RESTART_SVC); - } } void do_notify_resume(struct pt_regs *regs) -- cgit v1.2.3 From ccf45cafb0805978e6f13a672caca0e536e87cad Mon Sep 17 00:00:00 2001 From: Martin Schwidefsky Date: Sun, 30 Oct 2011 15:16:48 +0100 Subject: [S390] addressing mode limits and psw address wrapping An instruction with an address right below the adress limit for the current addressing mode will wrap. The instruction restart logic in the protection fault handler and the signal code need to follow the wrapping rules to find the correct instruction address. Signed-off-by: Martin Schwidefsky --- arch/s390/include/asm/processor.h | 22 +++++++++++++++++++++- arch/s390/include/asm/ptrace.h | 4 ++++ arch/s390/kernel/signal.c | 5 +++-- arch/s390/mm/fault.c | 2 +- 4 files changed, 29 insertions(+), 4 deletions(-) (limited to 'arch/s390') diff --git a/arch/s390/include/asm/processor.h b/arch/s390/include/asm/processor.h index a4b6229e5d4b..306c93c1b184 100644 --- a/arch/s390/include/asm/processor.h +++ b/arch/s390/include/asm/processor.h @@ -187,7 +187,6 @@ static inline void __load_psw(psw_t psw) * Set PSW mask to specified value, while leaving the * PSW addr pointing to the next instruction. */ - static inline void __load_psw_mask (unsigned long mask) { unsigned long addr; @@ -212,6 +211,27 @@ static inline void __load_psw_mask (unsigned long mask) : "=&d" (addr), "=Q" (psw) : "Q" (psw) : "memory", "cc"); #endif /* __s390x__ */ } + +/* + * Rewind PSW instruction address by specified number of bytes. + */ +static inline unsigned long __rewind_psw(psw_t psw, unsigned long ilc) +{ +#ifndef __s390x__ + if (psw.addr & PSW_ADDR_AMODE) + /* 31 bit mode */ + return (psw.addr - ilc) | PSW_ADDR_AMODE; + /* 24 bit mode */ + return (psw.addr - ilc) & ((1UL << 24) - 1); +#else + unsigned long mask; + + mask = (psw.mask & PSW_MASK_EA) ? -1UL : + (psw.mask & PSW_MASK_BA) ? (1UL << 31) - 1 : + (1UL << 24) - 1; + return (psw.addr - ilc) & mask; +#endif +} /* * Function to stop a processor until an interruption occurred diff --git a/arch/s390/include/asm/ptrace.h b/arch/s390/include/asm/ptrace.h index 93c907b4776f..2904cc66967a 100644 --- a/arch/s390/include/asm/ptrace.h +++ b/arch/s390/include/asm/ptrace.h @@ -236,6 +236,8 @@ typedef struct #define PSW_MASK_ASC 0x0000C000UL #define PSW_MASK_CC 0x00003000UL #define PSW_MASK_PM 0x00000F00UL +#define PSW_MASK_EA 0x00000000UL +#define PSW_MASK_BA 0x00000000UL #define PSW_ADDR_AMODE 0x80000000UL #define PSW_ADDR_INSN 0x7FFFFFFFUL @@ -261,6 +263,8 @@ typedef struct #define PSW_MASK_ASC 0x0000C00000000000UL #define PSW_MASK_CC 0x0000300000000000UL #define PSW_MASK_PM 0x00000F0000000000UL +#define PSW_MASK_EA 0x0000000100000000UL +#define PSW_MASK_BA 0x0000000080000000UL #define PSW_ADDR_AMODE 0x0000000000000000UL #define PSW_ADDR_INSN 0xFFFFFFFFFFFFFFFFUL diff --git a/arch/s390/kernel/signal.c b/arch/s390/kernel/signal.c index e751cab80e04..058e372bada1 100644 --- a/arch/s390/kernel/signal.c +++ b/arch/s390/kernel/signal.c @@ -447,8 +447,9 @@ void do_signal(struct pt_regs *regs) /* fallthrough */ case -ERESTARTNOINTR: regs->gprs[2] = regs->orig_gpr2; - regs->psw.addr = regs->psw.addr - - (regs->svc_code >> 16); + regs->psw.addr = + __rewind_psw(regs->psw, + regs->svc_code >> 16); break; } /* No longer in a system call */ diff --git a/arch/s390/mm/fault.c b/arch/s390/mm/fault.c index 9564fc779b27..a90fd91a9c72 100644 --- a/arch/s390/mm/fault.c +++ b/arch/s390/mm/fault.c @@ -393,7 +393,7 @@ void __kprobes do_protection_exception(struct pt_regs *regs, long pgm_int_code, int fault; /* Protection exception is suppressing, decrement psw address. */ - regs->psw.addr -= (pgm_int_code >> 16); + regs->psw.addr = __rewind_psw(regs->psw, pgm_int_code >> 16); /* * Check for low-address protection. This needs to be treated * as a special case because the translation exception code -- cgit v1.2.3 From b6ef5bb3d93efb95ba855a628740375c2280a59e Mon Sep 17 00:00:00 2001 From: Martin Schwidefsky Date: Sun, 30 Oct 2011 15:16:49 +0100 Subject: [S390] add TIF_SYSCALL thread flag Add an explicit TIF_SYSCALL bit that indicates if a task is inside a system call. The svc_code in the pt_regs structure is now only valid if TIF_SYSCALL is set. With this definition TIF_RESTART_SVC can be replaced with TIF_SYSCALL. Overall do_signal is a bit more readable and it saves a few lines of code. Signed-off-by: Martin Schwidefsky --- arch/s390/include/asm/syscall.h | 3 +- arch/s390/include/asm/thread_info.h | 4 +-- arch/s390/kernel/compat_signal.c | 2 +- arch/s390/kernel/entry.S | 66 ++++++++++++++---------------------- arch/s390/kernel/entry64.S | 67 ++++++++++++++----------------------- arch/s390/kernel/ptrace.c | 2 +- arch/s390/kernel/signal.c | 19 +++++++---- 7 files changed, 69 insertions(+), 94 deletions(-) (limited to 'arch/s390') diff --git a/arch/s390/include/asm/syscall.h b/arch/s390/include/asm/syscall.h index 614267f60713..b239ff53b189 100644 --- a/arch/s390/include/asm/syscall.h +++ b/arch/s390/include/asm/syscall.h @@ -26,7 +26,8 @@ extern const unsigned int sys_call_table[]; static inline long syscall_get_nr(struct task_struct *task, struct pt_regs *regs) { - return regs->svc_code ? (regs->svc_code & 0xffff) : -1; + return test_tsk_thread_flag(task, TIF_SYSCALL) ? + (regs->svc_code & 0xffff) : -1; } static inline void syscall_rollback(struct task_struct *task, diff --git a/arch/s390/include/asm/thread_info.h b/arch/s390/include/asm/thread_info.h index 0c4788eb5a65..a23183423b14 100644 --- a/arch/s390/include/asm/thread_info.h +++ b/arch/s390/include/asm/thread_info.h @@ -85,10 +85,10 @@ static inline struct thread_info *current_thread_info(void) /* * thread information flags bit numbers */ +#define TIF_SYSCALL 0 /* inside a system call */ #define TIF_NOTIFY_RESUME 1 /* callback before returning to user */ #define TIF_SIGPENDING 2 /* signal pending */ #define TIF_NEED_RESCHED 3 /* rescheduling necessary */ -#define TIF_RESTART_SVC 4 /* restart svc with new svc number */ #define TIF_PER_TRAP 6 /* deliver sigtrap on return to user */ #define TIF_MCCK_PENDING 7 /* machine check handling is pending */ #define TIF_SYSCALL_TRACE 8 /* syscall trace active */ @@ -104,11 +104,11 @@ static inline struct thread_info *current_thread_info(void) #define TIF_SINGLE_STEP 20 /* This task is single stepped */ #define TIF_FREEZE 21 /* thread is freezing for suspend */ +#define _TIF_SYSCALL (1<thread.fp_regs); - regs->svc_code = 0; /* disable syscall checks */ + clear_thread_flag(TIF_SYSCALL); /* No longer in a system call */ return 0; } diff --git a/arch/s390/kernel/entry.S b/arch/s390/kernel/entry.S index afe3685d30e7..b13157057e02 100644 --- a/arch/s390/kernel/entry.S +++ b/arch/s390/kernel/entry.S @@ -47,11 +47,11 @@ SP_SVC_CODE = STACK_FRAME_OVERHEAD + __PT_SVC_CODE SP_SIZE = STACK_FRAME_OVERHEAD + __PT_SIZE _TIF_WORK_SVC = (_TIF_SIGPENDING | _TIF_NOTIFY_RESUME | _TIF_NEED_RESCHED | \ - _TIF_MCCK_PENDING | _TIF_RESTART_SVC | _TIF_PER_TRAP ) + _TIF_MCCK_PENDING | _TIF_PER_TRAP ) _TIF_WORK_INT = (_TIF_SIGPENDING | _TIF_NOTIFY_RESUME | _TIF_NEED_RESCHED | \ _TIF_MCCK_PENDING) -_TIF_SYSCALL = (_TIF_SYSCALL_TRACE>>8 | _TIF_SYSCALL_AUDIT>>8 | \ - _TIF_SECCOMP>>8 | _TIF_SYSCALL_TRACEPOINT>>8) +_TIF_TRACE = (_TIF_SYSCALL_TRACE | _TIF_SYSCALL_AUDIT | _TIF_SECCOMP | \ + _TIF_SYSCALL_TRACEPOINT) STACK_SHIFT = PAGE_SHIFT + THREAD_ORDER STACK_SIZE = 1 << STACK_SHIFT @@ -227,9 +227,10 @@ ENTRY(system_call) sysc_saveall: SAVE_ALL_SVC __LC_SVC_OLD_PSW,__LC_SAVE_AREA CREATE_STACK_FRAME __LC_SAVE_AREA + l %r12,__LC_THREAD_INFO # load pointer to thread_info struct mvc SP_PSW(8,%r15),__LC_SVC_OLD_PSW mvc SP_SVC_CODE(4,%r15),__LC_SVC_ILC - l %r12,__LC_THREAD_INFO # load pointer to thread_info struct + oi __TI_flags+3(%r12),_TIF_SYSCALL sysc_vtime: UPDATE_VTIME __LC_EXIT_TIMER,__LC_SYNC_ENTER_TIMER,__LC_USER_TIMER sysc_stime: @@ -248,7 +249,7 @@ sysc_do_svc: sysc_nr_ok: sll %r7,2 # svc number *4 l %r10,BASED(.Lsysc_table) - tm __TI_flags+2(%r12),_TIF_SYSCALL + tm __TI_flags+2(%r12),_TIF_TRACE >> 8 mvc SP_ARGS(4,%r15),SP_R7(%r15) l %r8,0(%r7,%r10) # get system call addr. bnz BASED(sysc_tracesys) @@ -258,23 +259,19 @@ sysc_nr_ok: sysc_return: LOCKDEP_SYS_EXIT sysc_tif: + tm SP_PSW+1(%r15),0x01 # returning to user ? + bno BASED(sysc_restore) tm __TI_flags+3(%r12),_TIF_WORK_SVC bnz BASED(sysc_work) # there is work to do (signals etc.) + ni __TI_flags+3(%r12),255-_TIF_SYSCALL sysc_restore: RESTORE_ALL __LC_RETURN_PSW,1 sysc_done: -# -# There is work to do, but first we need to check if we return to userspace. -# -sysc_work: - tm SP_PSW+1(%r15),0x01 # returning to user ? - bno BASED(sysc_restore) - # # One of the work bits is on. Find out which one. # -sysc_work_tif: +sysc_work: tm __TI_flags+3(%r12),_TIF_MCCK_PENDING bo BASED(sysc_mcck_pending) tm __TI_flags+3(%r12),_TIF_NEED_RESCHED @@ -283,8 +280,6 @@ sysc_work_tif: bo BASED(sysc_sigpending) tm __TI_flags+3(%r12),_TIF_NOTIFY_RESUME bo BASED(sysc_notify_resume) - tm __TI_flags+3(%r12),_TIF_RESTART_SVC - bo BASED(sysc_restart) tm __TI_flags+3(%r12),_TIF_PER_TRAP bo BASED(sysc_singlestep) b BASED(sysc_return) # beware of critical section cleanup @@ -313,11 +308,14 @@ sysc_sigpending: la %r2,SP_PTREGS(%r15) # load pt_regs l %r1,BASED(.Ldo_signal) basr %r14,%r1 # call do_signal - tm __TI_flags+3(%r12),_TIF_RESTART_SVC - bo BASED(sysc_restart) - tm __TI_flags+3(%r12),_TIF_PER_TRAP - bo BASED(sysc_singlestep) - b BASED(sysc_return) + tm __TI_flags+3(%r12),_TIF_SYSCALL + bno BASED(sysc_return) + lm %r2,%r6,SP_R2(%r15) # load svc arguments + xr %r7,%r7 # svc 0 returns -ENOSYS + clc SP_SVC_CODE+2(2,%r15),BASED(.Lnr_syscalls+2) + bnl BASED(sysc_nr_ok) # invalid svc number -> do svc 0 + icm %r7,3,SP_SVC_CODE+2(%r15)# load new svc number + b BASED(sysc_nr_ok) # restart svc # # _TIF_NOTIFY_RESUME is set, call do_notify_resume @@ -328,25 +326,11 @@ sysc_notify_resume: la %r14,BASED(sysc_return) br %r1 # call do_notify_resume - -# -# _TIF_RESTART_SVC is set, set up registers and restart svc -# -sysc_restart: - ni __TI_flags+3(%r12),255-_TIF_RESTART_SVC # clear TIF_RESTART_SVC - lm %r2,%r6,SP_R2(%r15) # load svc arguments - xr %r7,%r7 # svc 0 returns -ENOSYS - clc SP_SVC_CODE+2(%r15),BASED(.Lnr_syscalls+2) - bnl BASED(sysc_nr_ok) # invalid svc number -> do svc 0 - icm %r7,3,SP_SVC_CODE+2(%r15)# load new svc number - b BASED(sysc_nr_ok) # restart svc - # # _TIF_PER_TRAP is set, call do_per_trap # sysc_singlestep: - ni __TI_flags+3(%r12),255-_TIF_PER_TRAP # clear TIF_PER_TRAP - xc SP_SVC_CODE(4,%r15),SP_SVC_CODE(%r15) # clear svc code + ni __TI_flags+3(%r12),255-(_TIF_SYSCALL | _TIF_PER_TRAP) la %r2,SP_PTREGS(%r15) # address of register-save area l %r1,BASED(.Lhandle_per) # load adr. of per handler la %r14,BASED(sysc_return) # load adr. of system return @@ -376,7 +360,7 @@ sysc_tracego: basr %r14,%r8 # call sys_xxx st %r2,SP_R2(%r15) # store return value sysc_tracenogo: - tm __TI_flags+2(%r12),_TIF_SYSCALL + tm __TI_flags+2(%r12),_TIF_TRACE >> 8 bz BASED(sysc_return) l %r1,BASED(.Ltrace_exit) la %r2,SP_PTREGS(%r15) # load pt_regs @@ -454,7 +438,6 @@ ENTRY(pgm_check_handler) bnz BASED(pgm_per) # got per exception -> special case SAVE_ALL_PGM __LC_PGM_OLD_PSW,__LC_SAVE_AREA CREATE_STACK_FRAME __LC_SAVE_AREA - xc SP_SVC_CODE(4,%r15),SP_SVC_CODE(%r15) mvc SP_PSW(8,%r15),__LC_PGM_OLD_PSW l %r12,__LC_THREAD_INFO # load pointer to thread_info struct tm SP_PSW+1(%r15),0x01 # interrupting from user ? @@ -530,9 +513,10 @@ pgm_exit2: pgm_svcper: SAVE_ALL_PGM __LC_SVC_OLD_PSW,__LC_SAVE_AREA CREATE_STACK_FRAME __LC_SAVE_AREA + l %r12,__LC_THREAD_INFO # load pointer to thread_info struct mvc SP_PSW(8,%r15),__LC_SVC_OLD_PSW mvc SP_SVC_CODE(4,%r15),__LC_SVC_ILC - l %r12,__LC_THREAD_INFO # load pointer to thread_info struct + oi __TI_flags+3(%r12),(_TIF_SYSCALL | _TIF_PER_TRAP) UPDATE_VTIME __LC_EXIT_TIMER,__LC_SYNC_ENTER_TIMER,__LC_USER_TIMER UPDATE_VTIME __LC_LAST_UPDATE_TIMER,__LC_EXIT_TIMER,__LC_SYSTEM_TIMER mvc __LC_LAST_UPDATE_TIMER(8),__LC_SYNC_ENTER_TIMER @@ -540,7 +524,6 @@ pgm_svcper: mvc __THREAD_per_cause(2,%r8),__LC_PER_CAUSE mvc __THREAD_per_address(4,%r8),__LC_PER_ADDRESS mvc __THREAD_per_paid(1,%r8),__LC_PER_PAID - oi __TI_flags+3(%r12),_TIF_PER_TRAP # set TIF_PER_TRAP stosm __SF_EMPTY(%r15),0x03 # reenable interrupts lm %r2,%r6,SP_R2(%r15) # load svc arguments b BASED(sysc_do_svc) @@ -550,7 +533,6 @@ pgm_svcper: # kernel_per: REENABLE_IRQS - xc SP_SVC_CODE(4,%r15),SP_SVC_CODE(%r15) la %r2,SP_PTREGS(%r15) # address of register-save area l %r1,BASED(.Lhandle_per) # load adr. of per handler basr %r14,%r1 # branch to do_single_step @@ -965,9 +947,11 @@ cleanup_system_call: s %r15,BASED(.Lc_spsize) # make room for registers & psw st %r15,12(%r12) CREATE_STACK_FRAME __LC_SAVE_AREA + mvc 0(4,%r12),__LC_THREAD_INFO + l %r12,__LC_THREAD_INFO mvc SP_PSW(8,%r15),__LC_SVC_OLD_PSW mvc SP_SVC_CODE(4,%r15),__LC_SVC_ILC - mvc 0(4,%r12),__LC_THREAD_INFO + oi __TI_flags+3(%r12),_TIF_SYSCALL cleanup_vtime: clc __LC_RETURN_PSW+4(4),BASED(cleanup_system_call_insn+12) bhe BASED(cleanup_stime) diff --git a/arch/s390/kernel/entry64.S b/arch/s390/kernel/entry64.S index 7ff07d3a29c1..e34907560c23 100644 --- a/arch/s390/kernel/entry64.S +++ b/arch/s390/kernel/entry64.S @@ -50,11 +50,11 @@ STACK_SHIFT = PAGE_SHIFT + THREAD_ORDER STACK_SIZE = 1 << STACK_SHIFT _TIF_WORK_SVC = (_TIF_SIGPENDING | _TIF_NOTIFY_RESUME | _TIF_NEED_RESCHED | \ - _TIF_MCCK_PENDING | _TIF_RESTART_SVC | _TIF_PER_TRAP ) + _TIF_MCCK_PENDING | _TIF_PER_TRAP ) _TIF_WORK_INT = (_TIF_SIGPENDING | _TIF_NOTIFY_RESUME | _TIF_NEED_RESCHED | \ _TIF_MCCK_PENDING) -_TIF_SYSCALL = (_TIF_SYSCALL_TRACE>>8 | _TIF_SYSCALL_AUDIT>>8 | \ - _TIF_SECCOMP>>8 | _TIF_SYSCALL_TRACEPOINT>>8) +_TIF_TRACE = (_TIF_SYSCALL_TRACE | _TIF_SYSCALL_AUDIT | _TIF_SECCOMP | \ + _TIF_SYSCALL_TRACEPOINT) _TIF_EXIT_SIE = (_TIF_SIGPENDING | _TIF_NEED_RESCHED | _TIF_MCCK_PENDING) #define BASED(name) name-system_call(%r13) @@ -248,9 +248,10 @@ ENTRY(system_call) sysc_saveall: SAVE_ALL_SVC __LC_SVC_OLD_PSW,__LC_SAVE_AREA CREATE_STACK_FRAME __LC_SAVE_AREA + lg %r12,__LC_THREAD_INFO # load pointer to thread_info struct mvc SP_PSW(16,%r15),__LC_SVC_OLD_PSW mvc SP_SVC_CODE(4,%r15),__LC_SVC_ILC - lg %r12,__LC_THREAD_INFO # load pointer to thread_info struct + oi __TI_flags+7(%r12),_TIF_SYSCALL sysc_vtime: UPDATE_VTIME __LC_EXIT_TIMER,__LC_SYNC_ENTER_TIMER,__LC_USER_TIMER sysc_stime: @@ -276,7 +277,7 @@ sysc_nr_ok: larl %r10,sys_call_table_emu # use 31 bit emulation system calls sysc_noemu: #endif - tm __TI_flags+6(%r12),_TIF_SYSCALL + tm __TI_flags+6(%r12),_TIF_TRACE >> 8 mvc SP_ARGS(8,%r15),SP_R7(%r15) lgf %r8,0(%r7,%r10) # load address of system call routine jnz sysc_tracesys @@ -286,23 +287,19 @@ sysc_noemu: sysc_return: LOCKDEP_SYS_EXIT sysc_tif: + tm SP_PSW+1(%r15),0x01 # returning to user ? + jno sysc_restore tm __TI_flags+7(%r12),_TIF_WORK_SVC jnz sysc_work # there is work to do (signals etc.) + ni __TI_flags+7(%r12),255-_TIF_SYSCALL sysc_restore: RESTORE_ALL __LC_RETURN_PSW,1 sysc_done: -# -# There is work to do, but first we need to check if we return to userspace. -# -sysc_work: - tm SP_PSW+1(%r15),0x01 # returning to user ? - jno sysc_restore - # # One of the work bits is on. Find out which one. # -sysc_work_tif: +sysc_work: tm __TI_flags+7(%r12),_TIF_MCCK_PENDING jo sysc_mcck_pending tm __TI_flags+7(%r12),_TIF_NEED_RESCHED @@ -311,8 +308,6 @@ sysc_work_tif: jo sysc_sigpending tm __TI_flags+7(%r12),_TIF_NOTIFY_RESUME jo sysc_notify_resume - tm __TI_flags+7(%r12),_TIF_RESTART_SVC - jo sysc_restart tm __TI_flags+7(%r12),_TIF_PER_TRAP jo sysc_singlestep j sysc_return # beware of critical section cleanup @@ -338,11 +333,15 @@ sysc_sigpending: ni __TI_flags+7(%r12),255-_TIF_PER_TRAP # clear TIF_PER_TRAP la %r2,SP_PTREGS(%r15) # load pt_regs brasl %r14,do_signal # call do_signal - tm __TI_flags+7(%r12),_TIF_RESTART_SVC - jo sysc_restart - tm __TI_flags+7(%r12),_TIF_PER_TRAP - jo sysc_singlestep - j sysc_return + tm __TI_flags+7(%r12),_TIF_SYSCALL + jno sysc_return + lmg %r2,%r6,SP_R2(%r15) # load svc arguments + lghi %r7,0 # svc 0 returns -ENOSYS + lh %r1,SP_SVC_CODE+2(%r15) # load new svc number + cghi %r1,NR_syscalls + jnl sysc_nr_ok # invalid svc number -> do svc 0 + slag %r7,%r1,2 + j sysc_nr_ok # restart svc # # _TIF_NOTIFY_RESUME is set, call do_notify_resume @@ -352,25 +351,11 @@ sysc_notify_resume: larl %r14,sysc_return jg do_notify_resume # call do_notify_resume -# -# _TIF_RESTART_SVC is set, set up registers and restart svc -# -sysc_restart: - ni __TI_flags+7(%r12),255-_TIF_RESTART_SVC # clear TIF_RESTART_SVC - lmg %r2,%r6,SP_R2(%r15) # load svc arguments - lghi %r7,0 # svc 0 returns -ENOSYS - lh %r1,SP_SVC_CODE+2(%r15) # load new svc number - cghi %r1,NR_syscalls - jnl sysc_nr_ok # invalid svc number -> do svc 0 - slag %r7,%r1,2 - j sysc_nr_ok # restart svc - # # _TIF_PER_TRAP is set, call do_per_trap # sysc_singlestep: - ni __TI_flags+7(%r12),255-_TIF_PER_TRAP # clear TIF_PER_TRAP - xc SP_SVC_CODE(4,%r15),SP_SVC_CODE(%r15) # clear svc code + ni __TI_flags+7(%r12),255-(_TIF_SYSCALL | _TIF_PER_TRAP) la %r2,SP_PTREGS(%r15) # address of register-save area larl %r14,sysc_return # load adr. of system return jg do_per_trap @@ -397,7 +382,7 @@ sysc_tracego: basr %r14,%r8 # call sys_xxx stg %r2,SP_R2(%r15) # store return value sysc_tracenogo: - tm __TI_flags+6(%r12),_TIF_SYSCALL + tm __TI_flags+6(%r12),_TIF_TRACE >> 8 jz sysc_return la %r2,SP_PTREGS(%r15) # load pt_regs larl %r14,sysc_return # return point is sysc_return @@ -470,7 +455,6 @@ ENTRY(pgm_check_handler) jnz pgm_per # got per exception -> special case SAVE_ALL_PGM __LC_PGM_OLD_PSW,__LC_SAVE_AREA CREATE_STACK_FRAME __LC_SAVE_AREA - xc SP_SVC_CODE(4,%r15),SP_SVC_CODE(%r15) mvc SP_PSW(16,%r15),__LC_PGM_OLD_PSW lg %r12,__LC_THREAD_INFO # load pointer to thread_info struct HANDLE_SIE_INTERCEPT @@ -550,9 +534,10 @@ pgm_exit2: pgm_svcper: SAVE_ALL_PGM __LC_SVC_OLD_PSW,__LC_SAVE_AREA CREATE_STACK_FRAME __LC_SAVE_AREA + lg %r12,__LC_THREAD_INFO # load pointer to thread_info struct mvc SP_PSW(16,%r15),__LC_SVC_OLD_PSW mvc SP_SVC_CODE(4,%r15),__LC_SVC_ILC - lg %r12,__LC_THREAD_INFO # load pointer to thread_info struct + oi __TI_flags+7(%r12),(_TIF_SYSCALL | _TIF_PER_TRAP) UPDATE_VTIME __LC_EXIT_TIMER,__LC_SYNC_ENTER_TIMER,__LC_USER_TIMER UPDATE_VTIME __LC_LAST_UPDATE_TIMER,__LC_EXIT_TIMER,__LC_SYSTEM_TIMER mvc __LC_LAST_UPDATE_TIMER(8),__LC_SYNC_ENTER_TIMER @@ -561,7 +546,6 @@ pgm_svcper: mvc __THREAD_per_cause(2,%r8),__LC_PER_CAUSE mvc __THREAD_per_address(8,%r8),__LC_PER_ADDRESS mvc __THREAD_per_paid(1,%r8),__LC_PER_PAID - oi __TI_flags+7(%r12),_TIF_PER_TRAP # set TIF_PER_TRAP stosm __SF_EMPTY(%r15),0x03 # reenable interrupts lmg %r2,%r6,SP_R2(%r15) # load svc arguments j sysc_do_svc @@ -571,7 +555,6 @@ pgm_svcper: # kernel_per: REENABLE_IRQS - xc SP_SVC_CODE(4,%r15),SP_SVC_CODE(%r15) # clear svc number la %r2,SP_PTREGS(%r15) # address of register-save area brasl %r14,do_per_trap j pgm_exit @@ -972,9 +955,11 @@ cleanup_system_call: stg %r15,32(%r12) stg %r11,0(%r12) CREATE_STACK_FRAME __LC_SAVE_AREA + mvc 8(8,%r12),__LC_THREAD_INFO + lg %r12,__LC_THREAD_INFO mvc SP_PSW(16,%r15),__LC_SVC_OLD_PSW mvc SP_SVC_CODE(4,%r15),__LC_SVC_ILC - mvc 8(8,%r12),__LC_THREAD_INFO + oi __TI_flags+7(%r12),_TIF_SYSCALL cleanup_vtime: clc __LC_RETURN_PSW+8(8),BASED(cleanup_system_call_insn+24) jhe cleanup_stime diff --git a/arch/s390/kernel/ptrace.c b/arch/s390/kernel/ptrace.c index bae1cc49fe96..3519c99ae0c0 100644 --- a/arch/s390/kernel/ptrace.c +++ b/arch/s390/kernel/ptrace.c @@ -750,7 +750,7 @@ asmlinkage long do_syscall_trace_enter(struct pt_regs *regs) * debugger stored an invalid system call number. Skip * the system call and the system call restart handling. */ - regs->svc_code = 0; + clear_thread_flag(TIF_SYSCALL); ret = -1; } diff --git a/arch/s390/kernel/signal.c b/arch/s390/kernel/signal.c index 058e372bada1..0e905cb7604a 100644 --- a/arch/s390/kernel/signal.c +++ b/arch/s390/kernel/signal.c @@ -157,7 +157,7 @@ static int restore_sigregs(struct pt_regs *regs, _sigregs __user *sregs) current->thread.fp_regs.fpc &= FPC_VALID_MASK; restore_fp_regs(¤t->thread.fp_regs); - regs->svc_code = 0; /* disable syscall checks */ + clear_thread_flag(TIF_SYSCALL); /* No longer in a system call */ return 0; } @@ -426,13 +426,14 @@ void do_signal(struct pt_regs *regs) * the debugger may change all our registers, including the system * call information. */ - current_thread_info()->system_call = regs->svc_code; + current_thread_info()->system_call = + test_thread_flag(TIF_SYSCALL) ? regs->svc_code : 0; signr = get_signal_to_deliver(&info, &ka, regs, NULL); - regs->svc_code = current_thread_info()->system_call; if (signr > 0) { /* Whee! Actually deliver the signal. */ - if (regs->svc_code > 0) { + if (current_thread_info()->system_call) { + regs->svc_code = current_thread_info()->system_call; /* Check for system call restarting. */ switch (regs->gprs[2]) { case -ERESTART_RESTARTBLOCK: @@ -453,7 +454,7 @@ void do_signal(struct pt_regs *regs) break; } /* No longer in a system call */ - regs->svc_code = 0; + clear_thread_flag(TIF_SYSCALL); } if ((is_compat_task() ? @@ -478,7 +479,8 @@ void do_signal(struct pt_regs *regs) } /* No handlers present - check for system call restart */ - if (regs->svc_code > 0) { + if (current_thread_info()->system_call) { + regs->svc_code = current_thread_info()->system_call; switch (regs->gprs[2]) { case -ERESTART_RESTARTBLOCK: /* Restart with sys_restart_syscall */ @@ -489,7 +491,10 @@ void do_signal(struct pt_regs *regs) case -ERESTARTNOINTR: /* Restart system call with magic TIF bit. */ regs->gprs[2] = regs->orig_gpr2; - set_thread_flag(TIF_RESTART_SVC); + set_thread_flag(TIF_SYSCALL); + break; + default: + clear_thread_flag(TIF_SYSCALL); break; } } -- cgit v1.2.3 From b50511e41aa51a89b4176784a670582424bc7db6 Mon Sep 17 00:00:00 2001 From: Martin Schwidefsky Date: Sun, 30 Oct 2011 15:16:50 +0100 Subject: [S390] cleanup psw related bits and pieces Split out addressing mode bits from PSW_BASE_BITS, rename PSW_BASE_BITS to PSW_MASK_BASE, get rid of psw_user32_bits, remove unused function enabled_wait(), introduce PSW_MASK_USER, and drop PSW_MASK_MERGE macros. Change psw_kernel_bits / psw_user_bits to contain only the bits that are always set in the respective mode. Signed-off-by: Martin Schwidefsky --- arch/s390/include/asm/compat.h | 13 +++++----- arch/s390/include/asm/processor.h | 30 ++++++++-------------- arch/s390/include/asm/ptrace.h | 20 +++++---------- arch/s390/include/asm/system.h | 6 +++-- arch/s390/kernel/compat_linux.c | 9 +++---- arch/s390/kernel/compat_signal.c | 10 ++++---- arch/s390/kernel/early.c | 2 +- arch/s390/kernel/ipl.c | 4 +-- arch/s390/kernel/machine_kexec.c | 2 +- arch/s390/kernel/process.c | 3 ++- arch/s390/kernel/ptrace.c | 36 +++++++++++++------------- arch/s390/kernel/setup.c | 53 +++++++++++++++++---------------------- arch/s390/kernel/signal.c | 7 +++--- arch/s390/kernel/smp.c | 16 +++++++----- arch/s390/kernel/traps.c | 2 +- arch/s390/kernel/vtime.c | 9 ++++--- arch/s390/lib/delay.c | 3 ++- arch/s390/mm/fault.c | 2 +- 18 files changed, 105 insertions(+), 122 deletions(-) (limited to 'arch/s390') diff --git a/arch/s390/include/asm/compat.h b/arch/s390/include/asm/compat.h index cdb9b78f6c08..2e49748b27da 100644 --- a/arch/s390/include/asm/compat.h +++ b/arch/s390/include/asm/compat.h @@ -12,6 +12,7 @@ #define PSW32_MASK_IO 0x02000000UL #define PSW32_MASK_EXT 0x01000000UL #define PSW32_MASK_KEY 0x00F00000UL +#define PSW32_MASK_BASE 0x00080000UL /* Always one */ #define PSW32_MASK_MCHECK 0x00040000UL #define PSW32_MASK_WAIT 0x00020000UL #define PSW32_MASK_PSTATE 0x00010000UL @@ -19,21 +20,19 @@ #define PSW32_MASK_CC 0x00003000UL #define PSW32_MASK_PM 0x00000f00UL -#define PSW32_ADDR_AMODE31 0x80000000UL +#define PSW32_MASK_USER 0x00003F00UL + +#define PSW32_ADDR_AMODE 0x80000000UL #define PSW32_ADDR_INSN 0x7FFFFFFFUL -#define PSW32_BASE_BITS 0x00080000UL +#define PSW32_DEFAULT_KEY (((u32) PAGE_DEFAULT_ACC) << 20) #define PSW32_ASC_PRIMARY 0x00000000UL #define PSW32_ASC_ACCREG 0x00004000UL #define PSW32_ASC_SECONDARY 0x00008000UL #define PSW32_ASC_HOME 0x0000C000UL -#define PSW32_MASK_MERGE(CURRENT,NEW) \ - (((CURRENT) & ~(PSW32_MASK_CC|PSW32_MASK_PM)) | \ - ((NEW) & (PSW32_MASK_CC|PSW32_MASK_PM))) - -extern long psw32_user_bits; +extern u32 psw32_user_bits; #define COMPAT_USER_HZ 100 #define COMPAT_UTS_MACHINE "s390\0\0\0\0" diff --git a/arch/s390/include/asm/processor.h b/arch/s390/include/asm/processor.h index 306c93c1b184..20ffb12d4ae9 100644 --- a/arch/s390/include/asm/processor.h +++ b/arch/s390/include/asm/processor.h @@ -118,17 +118,17 @@ struct stack_frame { /* * Do necessary setup to start up a new thread. */ -#define start_thread(regs, new_psw, new_stackp) do { \ - regs->psw.mask = psw_user_bits; \ - regs->psw.addr = new_psw | PSW_ADDR_AMODE; \ - regs->gprs[15] = new_stackp; \ +#define start_thread(regs, new_psw, new_stackp) do { \ + regs->psw.mask = psw_user_bits | PSW_MASK_EA | PSW_MASK_BA; \ + regs->psw.addr = new_psw | PSW_ADDR_AMODE; \ + regs->gprs[15] = new_stackp; \ } while (0) -#define start_thread31(regs, new_psw, new_stackp) do { \ - regs->psw.mask = psw_user32_bits; \ - regs->psw.addr = new_psw | PSW_ADDR_AMODE; \ - regs->gprs[15] = new_stackp; \ - crst_table_downgrade(current->mm, 1UL << 31); \ +#define start_thread31(regs, new_psw, new_stackp) do { \ + regs->psw.mask = psw_user_bits | PSW_MASK_BA; \ + regs->psw.addr = new_psw | PSW_ADDR_AMODE; \ + regs->gprs[15] = new_stackp; \ + crst_table_downgrade(current->mm, 1UL << 31); \ } while (0) /* Forward declaration, a strange C thing */ @@ -233,25 +233,15 @@ static inline unsigned long __rewind_psw(psw_t psw, unsigned long ilc) #endif } -/* - * Function to stop a processor until an interruption occurred - */ -static inline void enabled_wait(void) -{ - __load_psw_mask(PSW_BASE_BITS | PSW_MASK_IO | PSW_MASK_EXT | - PSW_MASK_MCHECK | PSW_MASK_WAIT | PSW_DEFAULT_KEY); -} - /* * Function to drop a processor into disabled wait state */ - static inline void ATTRIB_NORET disabled_wait(unsigned long code) { unsigned long ctl_buf; psw_t dw_psw; - dw_psw.mask = PSW_BASE_BITS | PSW_MASK_WAIT; + dw_psw.mask = PSW_MASK_BASE | PSW_MASK_WAIT | PSW_MASK_BA | PSW_MASK_EA; dw_psw.addr = code; /* * Store status and then load disabled wait psw, diff --git a/arch/s390/include/asm/ptrace.h b/arch/s390/include/asm/ptrace.h index 2904cc66967a..6fc00d268143 100644 --- a/arch/s390/include/asm/ptrace.h +++ b/arch/s390/include/asm/ptrace.h @@ -230,6 +230,7 @@ typedef struct #define PSW_MASK_IO 0x02000000UL #define PSW_MASK_EXT 0x01000000UL #define PSW_MASK_KEY 0x00F00000UL +#define PSW_MASK_BASE 0x00080000UL /* always one */ #define PSW_MASK_MCHECK 0x00040000UL #define PSW_MASK_WAIT 0x00020000UL #define PSW_MASK_PSTATE 0x00010000UL @@ -239,10 +240,11 @@ typedef struct #define PSW_MASK_EA 0x00000000UL #define PSW_MASK_BA 0x00000000UL +#define PSW_MASK_USER 0x00003F00UL + #define PSW_ADDR_AMODE 0x80000000UL #define PSW_ADDR_INSN 0x7FFFFFFFUL -#define PSW_BASE_BITS 0x00080000UL #define PSW_DEFAULT_KEY (((unsigned long) PAGE_DEFAULT_ACC) << 20) #define PSW_ASC_PRIMARY 0x00000000UL @@ -256,6 +258,7 @@ typedef struct #define PSW_MASK_DAT 0x0400000000000000UL #define PSW_MASK_IO 0x0200000000000000UL #define PSW_MASK_EXT 0x0100000000000000UL +#define PSW_MASK_BASE 0x0000000000000000UL #define PSW_MASK_KEY 0x00F0000000000000UL #define PSW_MASK_MCHECK 0x0004000000000000UL #define PSW_MASK_WAIT 0x0002000000000000UL @@ -266,11 +269,11 @@ typedef struct #define PSW_MASK_EA 0x0000000100000000UL #define PSW_MASK_BA 0x0000000080000000UL +#define PSW_MASK_USER 0x00003F0000000000UL + #define PSW_ADDR_AMODE 0x0000000000000000UL #define PSW_ADDR_INSN 0xFFFFFFFFFFFFFFFFUL -#define PSW_BASE_BITS 0x0000000180000000UL -#define PSW_BASE32_BITS 0x0000000080000000UL #define PSW_DEFAULT_KEY (((unsigned long) PAGE_DEFAULT_ACC) << 52) #define PSW_ASC_PRIMARY 0x0000000000000000UL @@ -283,18 +286,7 @@ typedef struct #ifdef __KERNEL__ extern long psw_kernel_bits; extern long psw_user_bits; -#ifdef CONFIG_64BIT -extern long psw_user32_bits; #endif -#endif - -/* This macro merges a NEW PSW mask specified by the user into - the currently active PSW mask CURRENT, modifying only those - bits in CURRENT that the user may be allowed to change: this - is the condition code and the program mask bits. */ -#define PSW_MASK_MERGE(CURRENT,NEW) \ - (((CURRENT) & ~(PSW_MASK_CC|PSW_MASK_PM)) | \ - ((NEW) & (PSW_MASK_CC|PSW_MASK_PM))) /* * The s390_regs structure is used to define the elf_gregset_t. diff --git a/arch/s390/include/asm/system.h b/arch/s390/include/asm/system.h index afb849e4bf4b..d73cc6b60000 100644 --- a/arch/s390/include/asm/system.h +++ b/arch/s390/include/asm/system.h @@ -212,8 +212,10 @@ __set_psw_mask(unsigned long mask) __load_psw_mask(mask | (arch_local_save_flags() & ~(-1UL >> 8))); } -#define local_mcck_enable() __set_psw_mask(psw_kernel_bits) -#define local_mcck_disable() __set_psw_mask(psw_kernel_bits & ~PSW_MASK_MCHECK) +#define local_mcck_enable() \ + __set_psw_mask(psw_kernel_bits | PSW_MASK_DAT | PSW_MASK_MCHECK) +#define local_mcck_disable() \ + __set_psw_mask(psw_kernel_bits | PSW_MASK_DAT) #ifdef CONFIG_SMP diff --git a/arch/s390/kernel/compat_linux.c b/arch/s390/kernel/compat_linux.c index 53acaa86dd94..24b218737b9b 100644 --- a/arch/s390/kernel/compat_linux.c +++ b/arch/s390/kernel/compat_linux.c @@ -60,12 +60,9 @@ #include "compat_linux.h" -long psw_user32_bits = (PSW_BASE32_BITS | PSW_MASK_DAT | PSW_ASC_HOME | - PSW_MASK_IO | PSW_MASK_EXT | PSW_MASK_MCHECK | - PSW_MASK_PSTATE | PSW_DEFAULT_KEY); -long psw32_user_bits = (PSW32_BASE_BITS | PSW32_MASK_DAT | PSW32_ASC_HOME | - PSW32_MASK_IO | PSW32_MASK_EXT | PSW32_MASK_MCHECK | - PSW32_MASK_PSTATE); +u32 psw32_user_bits = PSW32_MASK_DAT | PSW32_MASK_IO | PSW32_MASK_EXT | + PSW32_DEFAULT_KEY | PSW32_MASK_BASE | PSW32_MASK_MCHECK | + PSW32_MASK_PSTATE | PSW32_ASC_HOME; /* For this source file, we want overflow handling. */ diff --git a/arch/s390/kernel/compat_signal.c b/arch/s390/kernel/compat_signal.c index 72a1c8d8d212..fd83b69207f5 100644 --- a/arch/s390/kernel/compat_signal.c +++ b/arch/s390/kernel/compat_signal.c @@ -300,9 +300,9 @@ static int save_sigregs32(struct pt_regs *regs, _sigregs32 __user *sregs) _s390_regs_common32 regs32; int err, i; - regs32.psw.mask = PSW32_MASK_MERGE(psw32_user_bits, - (__u32)(regs->psw.mask >> 32)); - regs32.psw.addr = PSW32_ADDR_AMODE31 | (__u32) regs->psw.addr; + regs32.psw.mask = psw32_user_bits | + ((__u32)(regs->psw.mask >> 32) & PSW32_MASK_USER); + regs32.psw.addr = PSW32_ADDR_AMODE | (__u32) regs->psw.addr; for (i = 0; i < NUM_GPRS; i++) regs32.gprs[i] = (__u32) regs->gprs[i]; save_access_regs(current->thread.acrs); @@ -327,8 +327,8 @@ static int restore_sigregs32(struct pt_regs *regs,_sigregs32 __user *sregs) err = __copy_from_user(®s32, &sregs->regs, sizeof(regs32)); if (err) return err; - regs->psw.mask = PSW_MASK_MERGE(regs->psw.mask, - (__u64)regs32.psw.mask << 32); + regs->psw.mask = (regs->psw.mask & ~PSW_MASK_USER) | + (__u64)(regs32.psw.mask & PSW32_MASK_USER) << 32; regs->psw.addr = (__u64)(regs32.psw.addr & PSW32_ADDR_INSN); for (i = 0; i < NUM_GPRS; i++) regs->gprs[i] = (__u64) regs32.gprs[i]; diff --git a/arch/s390/kernel/early.c b/arch/s390/kernel/early.c index f297456dba7a..37394b3413e2 100644 --- a/arch/s390/kernel/early.c +++ b/arch/s390/kernel/early.c @@ -252,7 +252,7 @@ static noinline __init void setup_lowcore_early(void) { psw_t psw; - psw.mask = PSW_BASE_BITS | PSW_DEFAULT_KEY; + psw.mask = PSW_MASK_BASE | PSW_DEFAULT_KEY | PSW_MASK_EA | PSW_MASK_BA; psw.addr = PSW_ADDR_AMODE | (unsigned long) s390_base_ext_handler; S390_lowcore.external_new_psw = psw; psw.addr = PSW_ADDR_AMODE | (unsigned long) s390_base_pgm_handler; diff --git a/arch/s390/kernel/ipl.c b/arch/s390/kernel/ipl.c index ca0520c52547..296458360a32 100644 --- a/arch/s390/kernel/ipl.c +++ b/arch/s390/kernel/ipl.c @@ -2033,12 +2033,12 @@ void s390_reset_system(void (*func)(void *), void *data) __ctl_clear_bit(0,28); /* Set new machine check handler */ - S390_lowcore.mcck_new_psw.mask = psw_kernel_bits & ~PSW_MASK_MCHECK; + S390_lowcore.mcck_new_psw.mask = psw_kernel_bits | PSW_MASK_DAT; S390_lowcore.mcck_new_psw.addr = PSW_ADDR_AMODE | (unsigned long) s390_base_mcck_handler; /* Set new program check handler */ - S390_lowcore.program_new_psw.mask = psw_kernel_bits & ~PSW_MASK_MCHECK; + S390_lowcore.program_new_psw.mask = psw_kernel_bits | PSW_MASK_DAT; S390_lowcore.program_new_psw.addr = PSW_ADDR_AMODE | (unsigned long) s390_base_pgm_handler; diff --git a/arch/s390/kernel/machine_kexec.c b/arch/s390/kernel/machine_kexec.c index 13a0b528c70b..3cd0f25ab015 100644 --- a/arch/s390/kernel/machine_kexec.c +++ b/arch/s390/kernel/machine_kexec.c @@ -108,7 +108,7 @@ static void __do_machine_kdump(void *image) #ifdef CONFIG_CRASH_DUMP int (*start_kdump)(int) = (void *)((struct kimage *) image)->start; - __load_psw_mask(PSW_BASE_BITS | PSW_DEFAULT_KEY); + __load_psw_mask(PSW_MASK_BASE | PSW_DEFAULT_KEY | PSW_MASK_EA | PSW_MASK_BA); setup_regs(); start_kdump(1); #endif diff --git a/arch/s390/kernel/process.c b/arch/s390/kernel/process.c index 541a7509faeb..5e64a9a29ea4 100644 --- a/arch/s390/kernel/process.c +++ b/arch/s390/kernel/process.c @@ -117,7 +117,8 @@ int kernel_thread(int (*fn)(void *), void * arg, unsigned long flags) struct pt_regs regs; memset(®s, 0, sizeof(regs)); - regs.psw.mask = psw_kernel_bits | PSW_MASK_IO | PSW_MASK_EXT; + regs.psw.mask = psw_kernel_bits | + PSW_MASK_DAT | PSW_MASK_IO | PSW_MASK_EXT | PSW_MASK_MCHECK; regs.psw.addr = (unsigned long) kernel_thread_starter | PSW_ADDR_AMODE; regs.gprs[9] = (unsigned long) fn; regs.gprs[10] = (unsigned long) arg; diff --git a/arch/s390/kernel/ptrace.c b/arch/s390/kernel/ptrace.c index 3519c99ae0c0..41e20763886e 100644 --- a/arch/s390/kernel/ptrace.c +++ b/arch/s390/kernel/ptrace.c @@ -169,8 +169,9 @@ static unsigned long __peek_user(struct task_struct *child, addr_t addr) */ tmp = *(addr_t *)((addr_t) &task_pt_regs(child)->psw + addr); if (addr == (addr_t) &dummy->regs.psw.mask) - /* Remove per bit from user psw. */ - tmp &= ~PSW_MASK_PER; + /* Return a clean psw mask. */ + tmp = psw_user_bits | (tmp & PSW_MASK_USER) | + PSW_MASK_EA | PSW_MASK_BA; } else if (addr < (addr_t) &dummy->regs.orig_gpr2) { /* @@ -285,17 +286,18 @@ static inline void __poke_user_per(struct task_struct *child, static int __poke_user(struct task_struct *child, addr_t addr, addr_t data) { struct user *dummy = NULL; - addr_t offset; + addr_t offset, tmp; if (addr < (addr_t) &dummy->regs.acrs) { /* * psw and gprs are stored on the stack */ + tmp = (data & ~PSW_MASK_USER) ^ psw_user_bits; if (addr == (addr_t) &dummy->regs.psw.mask && #ifdef CONFIG_COMPAT - data != PSW_MASK_MERGE(psw_user32_bits, data) && + tmp != PSW_MASK_BA && #endif - data != PSW_MASK_MERGE(psw_user_bits, data)) + tmp != (PSW_MASK_EA | PSW_MASK_BA)) /* Invalid psw mask. */ return -EINVAL; #ifndef CONFIG_64BIT @@ -505,21 +507,20 @@ static u32 __peek_user_compat(struct task_struct *child, addr_t addr) __u32 tmp; if (addr < (addr_t) &dummy32->regs.acrs) { + struct pt_regs *regs = task_pt_regs(child); /* * psw and gprs are stored on the stack */ if (addr == (addr_t) &dummy32->regs.psw.mask) { /* Fake a 31 bit psw mask. */ - tmp = (__u32)(task_pt_regs(child)->psw.mask >> 32); - tmp = PSW32_MASK_MERGE(psw32_user_bits, tmp); + tmp = (__u32)(regs->psw.mask >> 32); + tmp = psw32_user_bits | (tmp & PSW32_MASK_USER); } else if (addr == (addr_t) &dummy32->regs.psw.addr) { /* Fake a 31 bit psw address. */ - tmp = (__u32) task_pt_regs(child)->psw.addr | - PSW32_ADDR_AMODE31; + tmp = (__u32) regs->psw.addr | PSW32_ADDR_AMODE; } else { /* gpr 0-15 */ - tmp = *(__u32 *)((addr_t) &task_pt_regs(child)->psw + - addr*2 + 4); + tmp = *(__u32 *)((addr_t) ®s->psw + addr*2 + 4); } } else if (addr < (addr_t) (&dummy32->regs.orig_gpr2)) { /* @@ -604,20 +605,20 @@ static int __poke_user_compat(struct task_struct *child, addr_t offset; if (addr < (addr_t) &dummy32->regs.acrs) { + struct pt_regs *regs = task_pt_regs(child); /* * psw, gprs, acrs and orig_gpr2 are stored on the stack */ if (addr == (addr_t) &dummy32->regs.psw.mask) { /* Build a 64 bit psw mask from 31 bit mask. */ - if (tmp != PSW32_MASK_MERGE(psw32_user_bits, tmp)) + if ((tmp & ~PSW32_MASK_USER) != psw32_user_bits) /* Invalid psw mask. */ return -EINVAL; - task_pt_regs(child)->psw.mask = - PSW_MASK_MERGE(psw_user32_bits, (__u64) tmp << 32); + regs->psw.mask = (regs->psw.mask & ~PSW_MASK_USER) | + (__u64)(tmp & PSW32_MASK_USER) << 32; } else if (addr == (addr_t) &dummy32->regs.psw.addr) { /* Build a 64 bit psw address from 31 bit address. */ - task_pt_regs(child)->psw.addr = - (__u64) tmp & PSW32_ADDR_INSN; + regs->psw.addr = (__u64) tmp & PSW32_ADDR_INSN; /* * The debugger changed the instruction address, * reset system call restart, see signal.c:do_signal @@ -625,8 +626,7 @@ static int __poke_user_compat(struct task_struct *child, task_thread_info(child)->system_call = 0; } else { /* gpr 0-15 */ - *(__u32*)((addr_t) &task_pt_regs(child)->psw - + addr*2 + 4) = tmp; + *(__u32*)((addr_t) ®s->psw + addr*2 + 4) = tmp; } } else if (addr < (addr_t) (&dummy32->regs.orig_gpr2)) { /* diff --git a/arch/s390/kernel/setup.c b/arch/s390/kernel/setup.c index 6f8e3777a0c8..8ac6bfa2786c 100644 --- a/arch/s390/kernel/setup.c +++ b/arch/s390/kernel/setup.c @@ -62,11 +62,11 @@ #include #include -long psw_kernel_bits = (PSW_BASE_BITS | PSW_MASK_DAT | PSW_ASC_PRIMARY | - PSW_MASK_MCHECK | PSW_DEFAULT_KEY); -long psw_user_bits = (PSW_BASE_BITS | PSW_MASK_DAT | PSW_ASC_HOME | - PSW_MASK_IO | PSW_MASK_EXT | PSW_MASK_MCHECK | - PSW_MASK_PSTATE | PSW_DEFAULT_KEY); +long psw_kernel_bits = PSW_DEFAULT_KEY | PSW_MASK_BASE | PSW_ASC_PRIMARY | + PSW_MASK_EA | PSW_MASK_BA; +long psw_user_bits = PSW_MASK_DAT | PSW_MASK_IO | PSW_MASK_EXT | + PSW_DEFAULT_KEY | PSW_MASK_BASE | PSW_MASK_MCHECK | + PSW_MASK_PSTATE | PSW_ASC_HOME; /* * User copy operations. @@ -278,22 +278,14 @@ early_param("mem", early_parse_mem); unsigned int user_mode = HOME_SPACE_MODE; EXPORT_SYMBOL_GPL(user_mode); -static int set_amode_and_uaccess(unsigned long user_amode, - unsigned long user32_amode) +static int set_amode_primary(void) { - psw_user_bits = PSW_BASE_BITS | PSW_MASK_DAT | user_amode | - PSW_MASK_IO | PSW_MASK_EXT | PSW_MASK_MCHECK | - PSW_MASK_PSTATE | PSW_DEFAULT_KEY; + psw_kernel_bits = (psw_kernel_bits & ~PSW_MASK_ASC) | PSW_ASC_HOME; + psw_user_bits = (psw_user_bits & ~PSW_MASK_ASC) | PSW_ASC_PRIMARY; #ifdef CONFIG_COMPAT - psw_user32_bits = PSW_BASE32_BITS | PSW_MASK_DAT | user_amode | - PSW_MASK_IO | PSW_MASK_EXT | PSW_MASK_MCHECK | - PSW_MASK_PSTATE | PSW_DEFAULT_KEY; - psw32_user_bits = PSW32_BASE_BITS | PSW32_MASK_DAT | user32_amode | - PSW32_MASK_IO | PSW32_MASK_EXT | PSW32_MASK_MCHECK | - PSW32_MASK_PSTATE; + psw32_user_bits = + (psw32_user_bits & ~PSW32_MASK_ASC) | PSW32_ASC_PRIMARY; #endif - psw_kernel_bits = PSW_BASE_BITS | PSW_MASK_DAT | PSW_ASC_HOME | - PSW_MASK_MCHECK | PSW_DEFAULT_KEY; if (MACHINE_HAS_MVCOS) { memcpy(&uaccess, &uaccess_mvcos_switch, sizeof(uaccess)); @@ -329,7 +321,7 @@ early_param("user_mode", early_parse_user_mode); static void setup_addressing_mode(void) { if (user_mode == PRIMARY_SPACE_MODE) { - if (set_amode_and_uaccess(PSW_ASC_PRIMARY, PSW32_ASC_PRIMARY)) + if (set_amode_primary()) pr_info("Address spaces switched, " "mvcos available\n"); else @@ -348,24 +340,25 @@ setup_lowcore(void) */ BUILD_BUG_ON(sizeof(struct _lowcore) != LC_PAGES * 4096); lc = __alloc_bootmem_low(LC_PAGES * PAGE_SIZE, LC_PAGES * PAGE_SIZE, 0); - lc->restart_psw.mask = PSW_BASE_BITS | PSW_DEFAULT_KEY; + lc->restart_psw.mask = psw_kernel_bits; lc->restart_psw.addr = PSW_ADDR_AMODE | (unsigned long) psw_restart_int_handler; - if (user_mode != HOME_SPACE_MODE) - lc->restart_psw.mask |= PSW_ASC_HOME; - lc->external_new_psw.mask = psw_kernel_bits; + lc->external_new_psw.mask = psw_kernel_bits | + PSW_MASK_DAT | PSW_MASK_MCHECK; lc->external_new_psw.addr = PSW_ADDR_AMODE | (unsigned long) ext_int_handler; - lc->svc_new_psw.mask = psw_kernel_bits | PSW_MASK_IO | PSW_MASK_EXT; + lc->svc_new_psw.mask = psw_kernel_bits | + PSW_MASK_DAT | PSW_MASK_IO | PSW_MASK_EXT | PSW_MASK_MCHECK; lc->svc_new_psw.addr = PSW_ADDR_AMODE | (unsigned long) system_call; - lc->program_new_psw.mask = psw_kernel_bits; + lc->program_new_psw.mask = psw_kernel_bits | + PSW_MASK_DAT | PSW_MASK_MCHECK; lc->program_new_psw.addr = - PSW_ADDR_AMODE | (unsigned long)pgm_check_handler; - lc->mcck_new_psw.mask = - psw_kernel_bits & ~PSW_MASK_MCHECK & ~PSW_MASK_DAT; + PSW_ADDR_AMODE | (unsigned long) pgm_check_handler; + lc->mcck_new_psw.mask = psw_kernel_bits; lc->mcck_new_psw.addr = PSW_ADDR_AMODE | (unsigned long) mcck_int_handler; - lc->io_new_psw.mask = psw_kernel_bits; + lc->io_new_psw.mask = psw_kernel_bits | + PSW_MASK_DAT | PSW_MASK_MCHECK; lc->io_new_psw.addr = PSW_ADDR_AMODE | (unsigned long) io_int_handler; lc->clock_comparator = -1ULL; lc->kernel_stack = ((unsigned long) &init_thread_union) + THREAD_SIZE; @@ -554,7 +547,7 @@ static void __init setup_restart_psw(void) * Setup restart PSW for absolute zero lowcore. This is necesary * if PSW restart is done on an offline CPU that has lowcore zero */ - psw.mask = PSW_BASE_BITS | PSW_DEFAULT_KEY; + psw.mask = PSW_DEFAULT_KEY | PSW_MASK_BASE | PSW_MASK_EA | PSW_MASK_BA; psw.addr = PSW_ADDR_AMODE | (unsigned long) psw_restart_int_handler; copy_to_absolute_zero(&S390_lowcore.restart_psw, &psw, sizeof(psw)); } diff --git a/arch/s390/kernel/signal.c b/arch/s390/kernel/signal.c index 0e905cb7604a..c19755815e53 100644 --- a/arch/s390/kernel/signal.c +++ b/arch/s390/kernel/signal.c @@ -117,7 +117,8 @@ static int save_sigregs(struct pt_regs *regs, _sigregs __user *sregs) /* Copy a 'clean' PSW mask to the user to avoid leaking information about whether PER is currently on. */ - user_sregs.regs.psw.mask = PSW_MASK_MERGE(psw_user_bits, regs->psw.mask); + user_sregs.regs.psw.mask = psw_user_bits | PSW_MASK_EA | PSW_MASK_BA | + (regs->psw.mask & PSW_MASK_USER); user_sregs.regs.psw.addr = regs->psw.addr; memcpy(&user_sregs.regs.gprs, ®s->gprs, sizeof(sregs->regs.gprs)); memcpy(&user_sregs.regs.acrs, current->thread.acrs, @@ -144,8 +145,8 @@ static int restore_sigregs(struct pt_regs *regs, _sigregs __user *sregs) err = __copy_from_user(&user_sregs, sregs, sizeof(_sigregs)); if (err) return err; - regs->psw.mask = PSW_MASK_MERGE(regs->psw.mask, - user_sregs.regs.psw.mask); + regs->psw.mask = (regs->psw.mask & ~PSW_MASK_USER) | + (user_sregs.regs.psw.mask & PSW_MASK_USER); regs->psw.addr = PSW_ADDR_AMODE | user_sregs.regs.psw.addr; memcpy(®s->gprs, &user_sregs.regs.gprs, sizeof(sregs->regs.gprs)); memcpy(¤t->thread.acrs, &user_sregs.regs.acrs, diff --git a/arch/s390/kernel/smp.c b/arch/s390/kernel/smp.c index e3f51dfa5cad..6c8a977af595 100644 --- a/arch/s390/kernel/smp.c +++ b/arch/s390/kernel/smp.c @@ -108,7 +108,7 @@ void smp_restart_with_online_cpu(void) for_each_online_cpu(cpu) { if (stap() == __cpu_logical_map[cpu]) { /* We are online: Enable DAT again and return */ - __load_psw_mask(psw_kernel_bits & ~PSW_MASK_MCHECK); + __load_psw_mask(psw_kernel_bits | PSW_MASK_DAT); return; } } @@ -130,14 +130,16 @@ void smp_switch_to_ipl_cpu(void (*func)(void *), void *data) if (smp_processor_id() == 0) func(data); - __load_psw_mask(PSW_BASE_BITS | PSW_DEFAULT_KEY); + __load_psw_mask(PSW_DEFAULT_KEY | PSW_MASK_BASE | + PSW_MASK_EA | PSW_MASK_BA); /* Disable lowcore protection */ __ctl_clear_bit(0, 28); current_lc = lowcore_ptr[smp_processor_id()]; lc = lowcore_ptr[0]; if (!lc) lc = current_lc; - lc->restart_psw.mask = PSW_BASE_BITS | PSW_DEFAULT_KEY; + lc->restart_psw.mask = + PSW_DEFAULT_KEY | PSW_MASK_BASE | PSW_MASK_EA | PSW_MASK_BA; lc->restart_psw.addr = PSW_ADDR_AMODE | (unsigned long) smp_restart_cpu; if (!cpu_online(0)) smp_switch_to_cpu(func, data, 0, stap(), __cpu_logical_map[0]); @@ -159,7 +161,7 @@ void smp_send_stop(void) int cpu, rc; /* Disable all interrupts/machine checks */ - __load_psw_mask(psw_kernel_bits & ~PSW_MASK_MCHECK); + __load_psw_mask(psw_kernel_bits | PSW_MASK_DAT); trace_hardirqs_off(); /* stop all processors */ @@ -501,7 +503,8 @@ int __cpuinit start_secondary(void *cpuvoid) set_cpu_online(smp_processor_id(), true); ipi_call_unlock(); __ctl_clear_bit(0, 28); /* Disable lowcore protection */ - S390_lowcore.restart_psw.mask = PSW_BASE_BITS | PSW_DEFAULT_KEY; + S390_lowcore.restart_psw.mask = + PSW_DEFAULT_KEY | PSW_MASK_BASE | PSW_MASK_EA | PSW_MASK_BA; S390_lowcore.restart_psw.addr = PSW_ADDR_AMODE | (unsigned long) psw_restart_int_handler; __ctl_set_bit(0, 28); /* Enable lowcore protection */ @@ -549,7 +552,8 @@ static int __cpuinit smp_alloc_lowcore(int cpu) memset((char *)lowcore + 512, 0, sizeof(*lowcore) - 512); lowcore->async_stack = async_stack + ASYNC_SIZE; lowcore->panic_stack = panic_stack + PAGE_SIZE; - lowcore->restart_psw.mask = PSW_BASE_BITS | PSW_DEFAULT_KEY; + lowcore->restart_psw.mask = + PSW_DEFAULT_KEY | PSW_MASK_BASE | PSW_MASK_EA | PSW_MASK_BA; lowcore->restart_psw.addr = PSW_ADDR_AMODE | (unsigned long) restart_int_handler; if (user_mode != HOME_SPACE_MODE) diff --git a/arch/s390/kernel/traps.c b/arch/s390/kernel/traps.c index ffabcd9d3363..79eee3f27afb 100644 --- a/arch/s390/kernel/traps.c +++ b/arch/s390/kernel/traps.c @@ -200,7 +200,7 @@ void show_registers(struct pt_regs *regs) mask_bits(regs, PSW_MASK_PSTATE), mask_bits(regs, PSW_MASK_ASC), mask_bits(regs, PSW_MASK_CC), mask_bits(regs, PSW_MASK_PM)); #ifdef CONFIG_64BIT - printk(" EA:%x", mask_bits(regs, PSW_BASE_BITS)); + printk(" EA:%x", mask_bits(regs, PSW_MASK_EA | PSW_MASK_BA)); #endif printk("\n%s GPRS: " FOURLONG, mode, regs->gprs[0], regs->gprs[1], regs->gprs[2], regs->gprs[3]); diff --git a/arch/s390/kernel/vtime.c b/arch/s390/kernel/vtime.c index 2d6228f60cd6..4f3de980b0e5 100644 --- a/arch/s390/kernel/vtime.c +++ b/arch/s390/kernel/vtime.c @@ -170,7 +170,8 @@ void __kprobes vtime_stop_cpu(void) psw_t psw; /* Wait for external, I/O or machine check interrupt. */ - psw.mask = psw_kernel_bits | PSW_MASK_WAIT | PSW_MASK_IO | PSW_MASK_EXT; + psw.mask = psw_kernel_bits | PSW_MASK_WAIT | + PSW_MASK_DAT | PSW_MASK_IO | PSW_MASK_EXT | PSW_MASK_MCHECK; idle->nohz_delay = 0; @@ -183,7 +184,8 @@ void __kprobes vtime_stop_cpu(void) * set_cpu_timer(VTIMER_MAX_SLICE); * idle->idle_enter = get_clock(); * __load_psw_mask(psw_kernel_bits | PSW_MASK_WAIT | - * PSW_MASK_IO | PSW_MASK_EXT); + * PSW_MASK_DAT | PSW_MASK_IO | + * PSW_MASK_EXT | PSW_MASK_MCHECK); * The difference is that the inline assembly makes sure that * the last three instruction are stpt, stck and lpsw in that * order. This is done to increase the precision. @@ -216,7 +218,8 @@ void __kprobes vtime_stop_cpu(void) * vq->idle = get_cpu_timer(); * idle->idle_enter = get_clock(); * __load_psw_mask(psw_kernel_bits | PSW_MASK_WAIT | - * PSW_MASK_IO | PSW_MASK_EXT); + * PSW_MASK_DAT | PSW_MASK_IO | + * PSW_MASK_EXT | PSW_MASK_MCHECK); * The difference is that the inline assembly makes sure that * the last three instruction are stpt, stck and lpsw in that * order. This is done to increase the precision. diff --git a/arch/s390/lib/delay.c b/arch/s390/lib/delay.c index a65229d91c92..db92f044024c 100644 --- a/arch/s390/lib/delay.c +++ b/arch/s390/lib/delay.c @@ -32,7 +32,8 @@ static void __udelay_disabled(unsigned long long usecs) u64 clock_saved; u64 end; - mask = psw_kernel_bits | PSW_MASK_WAIT | PSW_MASK_EXT; + mask = psw_kernel_bits | PSW_MASK_DAT | PSW_MASK_WAIT | + PSW_MASK_EXT | PSW_MASK_MCHECK; end = get_clock() + (usecs << 12); clock_saved = local_tick_disable(); __ctl_store(cr0_saved, 0, 0); diff --git a/arch/s390/mm/fault.c b/arch/s390/mm/fault.c index a90fd91a9c72..de3af0c053c0 100644 --- a/arch/s390/mm/fault.c +++ b/arch/s390/mm/fault.c @@ -454,7 +454,7 @@ int __handle_fault(unsigned long uaddr, unsigned long pgm_int_code, int write) struct pt_regs regs; int access, fault; - regs.psw.mask = psw_kernel_bits; + regs.psw.mask = psw_kernel_bits | PSW_MASK_DAT | PSW_MASK_MCHECK; if (!irqs_disabled()) regs.psw.mask |= PSW_MASK_IO | PSW_MASK_EXT; regs.psw.addr = (unsigned long) __builtin_return_address(0); -- cgit v1.2.3 From d4e81b35b882d96f059afdb0f98e5b6025973b09 Mon Sep 17 00:00:00 2001 From: Martin Schwidefsky Date: Sun, 30 Oct 2011 15:16:51 +0100 Subject: [S390] allow all addressing modes The user space program can change its addressing mode between the 24-bit, 31-bit and the 64-bit mode if the kernel is 64 bit. Currently the kernel always forces the standard amode on signal delivery and signal return and on ptrace: 64-bit for a 64-bit process, 31-bit for a compat process and 31-bit kernels. Change the signal and ptrace code to allow the full range of addressing modes. Signal handlers are run in the standard addressing mode for the process. One caveat is that even an 31-bit compat process can switch to the 64-bit mode. The next signal will switch back into the 31-bit mode and there is no room in the 31-bit compat signal frame to store the information that the program came from the 64-bit mode. Signed-off-by: Martin Schwidefsky --- arch/s390/include/asm/ptrace.h | 2 +- arch/s390/kernel/compat_signal.c | 12 ++++++++---- arch/s390/kernel/ptrace.c | 25 ++++++++++--------------- arch/s390/kernel/signal.c | 14 ++++++++++---- 4 files changed, 29 insertions(+), 24 deletions(-) (limited to 'arch/s390') diff --git a/arch/s390/include/asm/ptrace.h b/arch/s390/include/asm/ptrace.h index 6fc00d268143..a65846340d51 100644 --- a/arch/s390/include/asm/ptrace.h +++ b/arch/s390/include/asm/ptrace.h @@ -269,7 +269,7 @@ typedef struct #define PSW_MASK_EA 0x0000000100000000UL #define PSW_MASK_BA 0x0000000080000000UL -#define PSW_MASK_USER 0x00003F0000000000UL +#define PSW_MASK_USER 0x00003F0180000000UL #define PSW_ADDR_AMODE 0x0000000000000000UL #define PSW_ADDR_INSN 0xFFFFFFFFFFFFFFFFUL diff --git a/arch/s390/kernel/compat_signal.c b/arch/s390/kernel/compat_signal.c index fd83b69207f5..a1dc6457016c 100644 --- a/arch/s390/kernel/compat_signal.c +++ b/arch/s390/kernel/compat_signal.c @@ -302,7 +302,8 @@ static int save_sigregs32(struct pt_regs *regs, _sigregs32 __user *sregs) regs32.psw.mask = psw32_user_bits | ((__u32)(regs->psw.mask >> 32) & PSW32_MASK_USER); - regs32.psw.addr = PSW32_ADDR_AMODE | (__u32) regs->psw.addr; + regs32.psw.addr = (__u32) regs->psw.addr | + (__u32)(regs->psw.mask & PSW_MASK_BA); for (i = 0; i < NUM_GPRS; i++) regs32.gprs[i] = (__u32) regs->gprs[i]; save_access_regs(current->thread.acrs); @@ -328,7 +329,8 @@ static int restore_sigregs32(struct pt_regs *regs,_sigregs32 __user *sregs) if (err) return err; regs->psw.mask = (regs->psw.mask & ~PSW_MASK_USER) | - (__u64)(regs32.psw.mask & PSW32_MASK_USER) << 32; + (__u64)(regs32.psw.mask & PSW32_MASK_USER) << 32 | + (__u64)(regs32.psw.addr & PSW32_ADDR_AMODE); regs->psw.addr = (__u64)(regs32.psw.addr & PSW32_ADDR_INSN); for (i = 0; i < NUM_GPRS; i++) regs->gprs[i] = (__u64) regs32.gprs[i]; @@ -496,9 +498,9 @@ static int setup_frame32(int sig, struct k_sigaction *ka, /* Set up to return from userspace. If provided, use a stub already in userspace. */ if (ka->sa.sa_flags & SA_RESTORER) { - regs->gprs[14] = (__u64) ka->sa.sa_restorer; + regs->gprs[14] = (__u64) ka->sa.sa_restorer | PSW32_ADDR_AMODE; } else { - regs->gprs[14] = (__u64) frame->retcode; + regs->gprs[14] = (__u64) frame->retcode | PSW32_ADDR_AMODE; if (__put_user(S390_SYSCALL_OPCODE | __NR_sigreturn, (u16 __user *)(frame->retcode))) goto give_sigsegv; @@ -510,6 +512,7 @@ static int setup_frame32(int sig, struct k_sigaction *ka, /* Set up registers for signal handler */ regs->gprs[15] = (__u64) frame; + regs->psw.mask |= PSW_MASK_BA; /* force amode 31 */ regs->psw.addr = (__u64) ka->sa.sa_handler; regs->gprs[2] = map_signal(sig); @@ -573,6 +576,7 @@ static int setup_rt_frame32(int sig, struct k_sigaction *ka, siginfo_t *info, /* Set up registers for signal handler */ regs->gprs[15] = (__u64) frame; + regs->psw.mask |= PSW_MASK_BA; /* force amode 31 */ regs->psw.addr = (__u64) ka->sa.sa_handler; regs->gprs[2] = map_signal(sig); diff --git a/arch/s390/kernel/ptrace.c b/arch/s390/kernel/ptrace.c index 41e20763886e..450931a45b68 100644 --- a/arch/s390/kernel/ptrace.c +++ b/arch/s390/kernel/ptrace.c @@ -170,8 +170,7 @@ static unsigned long __peek_user(struct task_struct *child, addr_t addr) tmp = *(addr_t *)((addr_t) &task_pt_regs(child)->psw + addr); if (addr == (addr_t) &dummy->regs.psw.mask) /* Return a clean psw mask. */ - tmp = psw_user_bits | (tmp & PSW_MASK_USER) | - PSW_MASK_EA | PSW_MASK_BA; + tmp = psw_user_bits | (tmp & PSW_MASK_USER); } else if (addr < (addr_t) &dummy->regs.orig_gpr2) { /* @@ -286,26 +285,17 @@ static inline void __poke_user_per(struct task_struct *child, static int __poke_user(struct task_struct *child, addr_t addr, addr_t data) { struct user *dummy = NULL; - addr_t offset, tmp; + addr_t offset; if (addr < (addr_t) &dummy->regs.acrs) { /* * psw and gprs are stored on the stack */ - tmp = (data & ~PSW_MASK_USER) ^ psw_user_bits; if (addr == (addr_t) &dummy->regs.psw.mask && -#ifdef CONFIG_COMPAT - tmp != PSW_MASK_BA && -#endif - tmp != (PSW_MASK_EA | PSW_MASK_BA)) + ((data & ~PSW_MASK_USER) != psw_user_bits || + ((data & PSW_MASK_EA) && !(data & PSW_MASK_BA)))) /* Invalid psw mask. */ return -EINVAL; -#ifndef CONFIG_64BIT - if (addr == (addr_t) &dummy->regs.psw.addr) - /* I'd like to reject addresses without the - high order bit but older gdb's rely on it */ - data |= PSW_ADDR_AMODE; -#endif if (addr == (addr_t) &dummy->regs.psw.addr) /* * The debugger changed the instruction address, @@ -517,7 +507,8 @@ static u32 __peek_user_compat(struct task_struct *child, addr_t addr) tmp = psw32_user_bits | (tmp & PSW32_MASK_USER); } else if (addr == (addr_t) &dummy32->regs.psw.addr) { /* Fake a 31 bit psw address. */ - tmp = (__u32) regs->psw.addr | PSW32_ADDR_AMODE; + tmp = (__u32) regs->psw.addr | + (__u32)(regs->psw.mask & PSW_MASK_BA); } else { /* gpr 0-15 */ tmp = *(__u32 *)((addr_t) ®s->psw + addr*2 + 4); @@ -615,10 +606,14 @@ static int __poke_user_compat(struct task_struct *child, /* Invalid psw mask. */ return -EINVAL; regs->psw.mask = (regs->psw.mask & ~PSW_MASK_USER) | + (regs->psw.mask & PSW_MASK_BA) | (__u64)(tmp & PSW32_MASK_USER) << 32; } else if (addr == (addr_t) &dummy32->regs.psw.addr) { /* Build a 64 bit psw address from 31 bit address. */ regs->psw.addr = (__u64) tmp & PSW32_ADDR_INSN; + /* Transfer 31 bit amode bit to psw mask. */ + regs->psw.mask = (regs->psw.mask & ~PSW_MASK_BA) | + (__u64)(tmp & PSW32_ADDR_AMODE); /* * The debugger changed the instruction address, * reset system call restart, see signal.c:do_signal diff --git a/arch/s390/kernel/signal.c b/arch/s390/kernel/signal.c index c19755815e53..05a85bc14c98 100644 --- a/arch/s390/kernel/signal.c +++ b/arch/s390/kernel/signal.c @@ -117,8 +117,8 @@ static int save_sigregs(struct pt_regs *regs, _sigregs __user *sregs) /* Copy a 'clean' PSW mask to the user to avoid leaking information about whether PER is currently on. */ - user_sregs.regs.psw.mask = psw_user_bits | PSW_MASK_EA | PSW_MASK_BA | - (regs->psw.mask & PSW_MASK_USER); + user_sregs.regs.psw.mask = psw_user_bits | + (regs->psw.mask & PSW_MASK_USER); user_sregs.regs.psw.addr = regs->psw.addr; memcpy(&user_sregs.regs.gprs, ®s->gprs, sizeof(sregs->regs.gprs)); memcpy(&user_sregs.regs.acrs, current->thread.acrs, @@ -145,9 +145,13 @@ static int restore_sigregs(struct pt_regs *regs, _sigregs __user *sregs) err = __copy_from_user(&user_sregs, sregs, sizeof(_sigregs)); if (err) return err; + /* Use regs->psw.mask instead of psw_user_bits to preserve PER bit. */ regs->psw.mask = (regs->psw.mask & ~PSW_MASK_USER) | - (user_sregs.regs.psw.mask & PSW_MASK_USER); - regs->psw.addr = PSW_ADDR_AMODE | user_sregs.regs.psw.addr; + (user_sregs.regs.psw.mask & PSW_MASK_USER); + /* Check for invalid amode */ + if (regs->psw.mask & PSW_MASK_EA) + regs->psw.mask |= PSW_MASK_BA; + regs->psw.addr = user_sregs.regs.psw.addr; memcpy(®s->gprs, &user_sregs.regs.gprs, sizeof(sregs->regs.gprs)); memcpy(¤t->thread.acrs, &user_sregs.regs.acrs, sizeof(sregs->regs.acrs)); @@ -290,6 +294,7 @@ static int setup_frame(int sig, struct k_sigaction *ka, /* Set up registers for signal handler */ regs->gprs[15] = (unsigned long) frame; + regs->psw.mask |= PSW_MASK_EA | PSW_MASK_BA; /* 64 bit amode */ regs->psw.addr = (unsigned long) ka->sa.sa_handler | PSW_ADDR_AMODE; regs->gprs[2] = map_signal(sig); @@ -358,6 +363,7 @@ static int setup_rt_frame(int sig, struct k_sigaction *ka, siginfo_t *info, /* Set up registers for signal handler */ regs->gprs[15] = (unsigned long) frame; + regs->psw.mask |= PSW_MASK_EA | PSW_MASK_BA; /* 64 bit amode */ regs->psw.addr = (unsigned long) ka->sa.sa_handler | PSW_ADDR_AMODE; regs->gprs[2] = map_signal(sig); -- cgit v1.2.3 From 65b4e403ac926f5196c2f28c4ec783d32dc322f0 Mon Sep 17 00:00:00 2001 From: Sebastian Ott Date: Sun, 30 Oct 2011 15:16:54 +0100 Subject: [S390] chsc_sch: add support for irq statistics Add support for CHSC I/O interrupt statistics in /proc/interrupts. Signed-off-by: Sebastian Ott Signed-off-by: Martin Schwidefsky --- arch/s390/include/asm/irq.h | 1 + arch/s390/kernel/irq.c | 1 + 2 files changed, 2 insertions(+) (limited to 'arch/s390') diff --git a/arch/s390/include/asm/irq.h b/arch/s390/include/asm/irq.h index 1f686059f293..eadfa9fc03ba 100644 --- a/arch/s390/include/asm/irq.h +++ b/arch/s390/include/asm/irq.h @@ -28,6 +28,7 @@ enum interruption_class { IOINT_CLW, IOINT_CTC, IOINT_APB, + IOINT_CSC, NMI_NMI, NR_IRQS, }; diff --git a/arch/s390/kernel/irq.c b/arch/s390/kernel/irq.c index d382f9db3df5..30faaef34457 100644 --- a/arch/s390/kernel/irq.c +++ b/arch/s390/kernel/irq.c @@ -53,6 +53,7 @@ static const struct irq_class intrclass_names[] = { {.name = "CLW", .desc = "[I/O] CLAW" }, {.name = "CTC", .desc = "[I/O] CTC" }, {.name = "APB", .desc = "[I/O] AP Bus" }, + {.name = "CSC", .desc = "[I/O] CHSC Subchannel" }, {.name = "NMI", .desc = "[NMI] Machine Check" }, }; -- cgit v1.2.3 From d98e19ccef10c5aadccc4ffe2925e4099ff9606a Mon Sep 17 00:00:00 2001 From: Martin Schwidefsky Date: Sun, 30 Oct 2011 15:16:58 +0100 Subject: [S390] smp: external call vs. emergency signal Use a sigp sense running to decide which signal processor order to use for an ipi. If the target cpu is running use external call, if the target cpu is not running use emergency signal. Signed-off-by: Martin Schwidefsky --- arch/s390/kernel/head31.S | 2 +- arch/s390/kernel/head64.S | 2 +- arch/s390/kernel/smp.c | 12 +++++++++++- 3 files changed, 13 insertions(+), 3 deletions(-) (limited to 'arch/s390') diff --git a/arch/s390/kernel/head31.S b/arch/s390/kernel/head31.S index f21954b44dc1..d3f1ab7d90ad 100644 --- a/arch/s390/kernel/head31.S +++ b/arch/s390/kernel/head31.S @@ -92,7 +92,7 @@ ENTRY(_stext) .LPG3: # check control registers stctl %c0,%c15,0(%r15) - oi 2(%r15),0x40 # enable sigp emergency signal + oi 2(%r15),0x60 # enable sigp emergency & external call oi 0(%r15),0x10 # switch on low address protection lctl %c0,%c15,0(%r15) diff --git a/arch/s390/kernel/head64.S b/arch/s390/kernel/head64.S index ae5d492b069e..99348c0eaa41 100644 --- a/arch/s390/kernel/head64.S +++ b/arch/s390/kernel/head64.S @@ -90,7 +90,7 @@ ENTRY(_stext) .LPG3: # check control registers stctg %c0,%c15,0(%r15) - oi 6(%r15),0x40 # enable sigp emergency signal + oi 6(%r15),0x60 # enable sigp emergency & external call oi 4(%r15),0x10 # switch on low address proctection lctlg %c0,%c15,0(%r15) diff --git a/arch/s390/kernel/smp.c b/arch/s390/kernel/smp.c index 6c8a977af595..3bde5688ceb5 100644 --- a/arch/s390/kernel/smp.c +++ b/arch/s390/kernel/smp.c @@ -209,12 +209,19 @@ static void do_ext_call_interrupt(unsigned int ext_int_code, */ static void smp_ext_bitcall(int cpu, int sig) { + int order; + /* * Set signaling bit in lowcore of target cpu and kick it */ set_bit(sig, (unsigned long *) &lowcore_ptr[cpu]->ext_call_fast); - while (sigp(cpu, sigp_emergency_signal) == sigp_busy) + while (1) { + order = smp_vcpu_scheduled(cpu) ? + sigp_external_call : sigp_emergency_signal; + if (sigp(cpu, order) != sigp_busy) + break; udelay(10); + } } void arch_send_call_function_ipi_mask(const struct cpumask *mask) @@ -754,6 +761,9 @@ void __init smp_prepare_cpus(unsigned int max_cpus) /* request the 0x1201 emergency signal external interrupt */ if (register_external_interrupt(0x1201, do_ext_call_interrupt) != 0) panic("Couldn't request external interrupt 0x1201"); + /* request the 0x1202 external call external interrupt */ + if (register_external_interrupt(0x1202, do_ext_call_interrupt) != 0) + panic("Couldn't request external interrupt 0x1202"); /* Reallocate current lowcore, but keep its contents. */ lowcore = (void *) __get_free_pages(GFP_KERNEL | GFP_DMA, LC_ORDER); -- cgit v1.2.3 From 69ba97436647f3b793f8e0784d1cde63adf241ea Mon Sep 17 00:00:00 2001 From: Carsten Otte Date: Sun, 30 Oct 2011 15:16:59 +0100 Subject: [S390] load user asce on sie_fault On sie_fault we need to switch back to user ASCE. Otherwise we get interresting effects when exiting to "userspace" while the guest space is still active. Signed-off-by: Carsten Otte Signed-off-by: Martin Schwidefsky --- arch/s390/kernel/entry64.S | 1 + 1 file changed, 1 insertion(+) (limited to 'arch/s390') diff --git a/arch/s390/kernel/entry64.S b/arch/s390/kernel/entry64.S index e34907560c23..83a93747e2fd 100644 --- a/arch/s390/kernel/entry64.S +++ b/arch/s390/kernel/entry64.S @@ -1081,6 +1081,7 @@ sie_exit: lghi %r2,0 br %r14 sie_fault: + lctlg %c1,%c1,__LC_USER_ASCE # load primary asce lg %r14,__LC_THREAD_INFO # pointer thread_info struct ni __TI_flags+6(%r14),255-(_TIF_SIE>>8) lg %r14,__SF_EMPTY+8(%r15) # load guest register save area -- cgit v1.2.3 From a9162f238a84ee05b09ea4b0ebd97fb20448c28c Mon Sep 17 00:00:00 2001 From: Carsten Otte Date: Sun, 30 Oct 2011 15:17:00 +0100 Subject: [S390] fix possible deadlock in gmap_map_segment Fix possible deadlock reported by lockdep: qemu-system-s39/2963 is trying to acquire lock: (&mm->mmap_sem){++++++}, at: gmap_alloc_table+0x9c/0x120 but task is already holding lock: (&mm->mmap_sem){++++++}, at: gmap_map_segment+0xa6/0x27c Actually gmap_alloc_table is the only called in gmap_map_segment with mmap_sem held, thus it's safe to simply remove the inner lock. Signed-off-by: Carsten Otte Signed-off-by: Martin Schwidefsky --- arch/s390/mm/pgtable.c | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) (limited to 'arch/s390') diff --git a/arch/s390/mm/pgtable.c b/arch/s390/mm/pgtable.c index 529a08838376..e4a4cefb92b3 100644 --- a/arch/s390/mm/pgtable.c +++ b/arch/s390/mm/pgtable.c @@ -256,6 +256,9 @@ void gmap_disable(struct gmap *gmap) } EXPORT_SYMBOL_GPL(gmap_disable); +/* + * gmap_alloc_table is assumed to be called with mmap_sem held + */ static int gmap_alloc_table(struct gmap *gmap, unsigned long *table, unsigned long init) { @@ -267,14 +270,12 @@ static int gmap_alloc_table(struct gmap *gmap, return -ENOMEM; new = (unsigned long *) page_to_phys(page); crst_table_init(new, init); - down_read(&gmap->mm->mmap_sem); if (*table & _REGION_ENTRY_INV) { list_add(&page->lru, &gmap->crst_list); *table = (unsigned long) new | _REGION_ENTRY_LENGTH | (*table & _REGION_ENTRY_TYPE_MASK); } else __free_pages(page, ALLOC_ORDER); - up_read(&gmap->mm->mmap_sem); return 0; } -- cgit v1.2.3 From cc772456ac9b460693492b3a3d89e8c81eda5874 Mon Sep 17 00:00:00 2001 From: Carsten Otte Date: Sun, 30 Oct 2011 15:17:01 +0100 Subject: [S390] fix list corruption in gmap reverse mapping This introduces locking via mm->page_table_lock to protect the rmap list for guest mappings from being corrupted by concurrent operations. Signed-off-by: Carsten Otte Signed-off-by: Martin Schwidefsky --- arch/s390/mm/pgtable.c | 9 +++++++++ 1 file changed, 9 insertions(+) (limited to 'arch/s390') diff --git a/arch/s390/mm/pgtable.c b/arch/s390/mm/pgtable.c index e4a4cefb92b3..96e85ac89269 100644 --- a/arch/s390/mm/pgtable.c +++ b/arch/s390/mm/pgtable.c @@ -222,6 +222,7 @@ void gmap_free(struct gmap *gmap) /* Free all segment & region tables. */ down_read(&gmap->mm->mmap_sem); + spin_lock(&gmap->mm->page_table_lock); list_for_each_entry_safe(page, next, &gmap->crst_list, lru) { table = (unsigned long *) page_to_phys(page); if ((*table & _REGION_ENTRY_TYPE_MASK) == 0) @@ -230,6 +231,7 @@ void gmap_free(struct gmap *gmap) gmap_unlink_segment(gmap, table); __free_pages(page, ALLOC_ORDER); } + spin_unlock(&gmap->mm->page_table_lock); up_read(&gmap->mm->mmap_sem); list_del(&gmap->list); kfree(gmap); @@ -300,6 +302,7 @@ int gmap_unmap_segment(struct gmap *gmap, unsigned long to, unsigned long len) flush = 0; down_read(&gmap->mm->mmap_sem); + spin_lock(&gmap->mm->page_table_lock); for (off = 0; off < len; off += PMD_SIZE) { /* Walk the guest addr space page table */ table = gmap->table + (((to + off) >> 53) & 0x7ff); @@ -321,6 +324,7 @@ int gmap_unmap_segment(struct gmap *gmap, unsigned long to, unsigned long len) *table = _SEGMENT_ENTRY_INV; } out: + spin_unlock(&gmap->mm->page_table_lock); up_read(&gmap->mm->mmap_sem); if (flush) gmap_flush_tlb(gmap); @@ -351,6 +355,7 @@ int gmap_map_segment(struct gmap *gmap, unsigned long from, flush = 0; down_read(&gmap->mm->mmap_sem); + spin_lock(&gmap->mm->page_table_lock); for (off = 0; off < len; off += PMD_SIZE) { /* Walk the gmap address space page table */ table = gmap->table + (((to + off) >> 53) & 0x7ff); @@ -374,12 +379,14 @@ int gmap_map_segment(struct gmap *gmap, unsigned long from, flush |= gmap_unlink_segment(gmap, table); *table = _SEGMENT_ENTRY_INV | _SEGMENT_ENTRY_RO | (from + off); } + spin_unlock(&gmap->mm->page_table_lock); up_read(&gmap->mm->mmap_sem); if (flush) gmap_flush_tlb(gmap); return 0; out_unmap: + spin_unlock(&gmap->mm->page_table_lock); up_read(&gmap->mm->mmap_sem); gmap_unmap_segment(gmap, to, len); return -ENOMEM; @@ -446,7 +453,9 @@ unsigned long gmap_fault(unsigned long address, struct gmap *gmap) page = pmd_page(*pmd); mp = (struct gmap_pgtable *) page->index; rmap->entry = table; + spin_lock(&mm->page_table_lock); list_add(&rmap->list, &mp->mapper); + spin_unlock(&mm->page_table_lock); /* Set gmap segment table entry to page table. */ *table = pmd_val(*pmd) & PAGE_MASK; return vmaddr | (address & ~PMD_MASK); -- cgit v1.2.3 From 499069e1a421e2a85e76846c3237f00f1a5cb435 Mon Sep 17 00:00:00 2001 From: Carsten Otte Date: Sun, 30 Oct 2011 15:17:02 +0100 Subject: [S390] take mmap_sem when walking guest page table gmap_fault needs to walk the guest page table. However, parts of that may change if some other thread does munmap. In that case gmap_unmap_notifier will also unmap the corresponding parts from the guest page table. We need to take mmap_sem in order to serialize these operations. do_exception now calls __gmap_fault with mmap_sem held which does not get exported to modules. The exported function, which is called from KVM, now takes mmap_sem. Reported-by: Heiko Carstens Signed-off-by: Carsten Otte Signed-off-by: Martin Schwidefsky --- arch/s390/include/asm/pgtable.h | 1 + arch/s390/mm/fault.c | 2 +- arch/s390/mm/pgtable.c | 15 ++++++++++++++- 3 files changed, 16 insertions(+), 2 deletions(-) (limited to 'arch/s390') diff --git a/arch/s390/include/asm/pgtable.h b/arch/s390/include/asm/pgtable.h index c0cb794bb365..bc5f520f6f87 100644 --- a/arch/s390/include/asm/pgtable.h +++ b/arch/s390/include/asm/pgtable.h @@ -696,6 +696,7 @@ void gmap_disable(struct gmap *gmap); int gmap_map_segment(struct gmap *gmap, unsigned long from, unsigned long to, unsigned long length); int gmap_unmap_segment(struct gmap *gmap, unsigned long to, unsigned long len); +unsigned long __gmap_fault(unsigned long address, struct gmap *); unsigned long gmap_fault(unsigned long address, struct gmap *); /* diff --git a/arch/s390/mm/fault.c b/arch/s390/mm/fault.c index de3af0c053c0..1766def5bc3f 100644 --- a/arch/s390/mm/fault.c +++ b/arch/s390/mm/fault.c @@ -307,7 +307,7 @@ static inline int do_exception(struct pt_regs *regs, int access, #ifdef CONFIG_PGSTE if (test_tsk_thread_flag(current, TIF_SIE) && S390_lowcore.gmap) { - address = gmap_fault(address, + address = __gmap_fault(address, (struct gmap *) S390_lowcore.gmap); if (address == -EFAULT) { fault = VM_FAULT_BADMAP; diff --git a/arch/s390/mm/pgtable.c b/arch/s390/mm/pgtable.c index 96e85ac89269..441d34445d0e 100644 --- a/arch/s390/mm/pgtable.c +++ b/arch/s390/mm/pgtable.c @@ -393,7 +393,10 @@ out_unmap: } EXPORT_SYMBOL_GPL(gmap_map_segment); -unsigned long gmap_fault(unsigned long address, struct gmap *gmap) +/* + * this function is assumed to be called with mmap_sem held + */ +unsigned long __gmap_fault(unsigned long address, struct gmap *gmap) { unsigned long *table, vmaddr, segment; struct mm_struct *mm; @@ -461,7 +464,17 @@ unsigned long gmap_fault(unsigned long address, struct gmap *gmap) return vmaddr | (address & ~PMD_MASK); } return -EFAULT; +} + +unsigned long gmap_fault(unsigned long address, struct gmap *gmap) +{ + unsigned long rc; + + down_read(&gmap->mm->mmap_sem); + rc = __gmap_fault(address, gmap); + up_read(&gmap->mm->mmap_sem); + return rc; } EXPORT_SYMBOL_GPL(gmap_fault); -- cgit v1.2.3 From 388186bc920d9200202e4d25de66fa95b1b8fc68 Mon Sep 17 00:00:00 2001 From: Christian Borntraeger Date: Sun, 30 Oct 2011 15:17:03 +0100 Subject: [S390] kvm: Handle diagnose 0x10 (release pages) Linux on System z uses a ballooner based on diagnose 0x10. (aka as collaborative memory management). This patch implements diagnose 0x10 on the guest address space. Signed-off-by: Christian Borntraeger Signed-off-by: Martin Schwidefsky --- arch/s390/include/asm/kvm_host.h | 1 + arch/s390/include/asm/pgtable.h | 1 + arch/s390/kvm/diag.c | 32 +++++++++++++++++++++++++- arch/s390/kvm/kvm-s390.c | 1 + arch/s390/mm/pgtable.c | 49 +++++++++++++++++++++++++++++++++++++++- 5 files changed, 82 insertions(+), 2 deletions(-) (limited to 'arch/s390') diff --git a/arch/s390/include/asm/kvm_host.h b/arch/s390/include/asm/kvm_host.h index 00ff00dfb24c..46c0bdaf4792 100644 --- a/arch/s390/include/asm/kvm_host.h +++ b/arch/s390/include/asm/kvm_host.h @@ -143,6 +143,7 @@ struct kvm_vcpu_stat { u32 instruction_sigp_arch; u32 instruction_sigp_prefix; u32 instruction_sigp_restart; + u32 diagnose_10; u32 diagnose_44; }; diff --git a/arch/s390/include/asm/pgtable.h b/arch/s390/include/asm/pgtable.h index bc5f520f6f87..34ede0ea85a9 100644 --- a/arch/s390/include/asm/pgtable.h +++ b/arch/s390/include/asm/pgtable.h @@ -698,6 +698,7 @@ int gmap_map_segment(struct gmap *gmap, unsigned long from, int gmap_unmap_segment(struct gmap *gmap, unsigned long to, unsigned long len); unsigned long __gmap_fault(unsigned long address, struct gmap *); unsigned long gmap_fault(unsigned long address, struct gmap *); +void gmap_discard(unsigned long from, unsigned long to, struct gmap *); /* * Certain architectures need to do special things when PTEs diff --git a/arch/s390/kvm/diag.c b/arch/s390/kvm/diag.c index 9e4c84187cf5..87cedd61be04 100644 --- a/arch/s390/kvm/diag.c +++ b/arch/s390/kvm/diag.c @@ -1,7 +1,7 @@ /* * diag.c - handling diagnose instructions * - * Copyright IBM Corp. 2008 + * Copyright IBM Corp. 2008,2011 * * This program is free software; you can redistribute it and/or modify * it under the terms of the GNU General Public License (version 2 only) @@ -15,6 +15,34 @@ #include #include "kvm-s390.h" +static int diag_release_pages(struct kvm_vcpu *vcpu) +{ + unsigned long start, end; + unsigned long prefix = vcpu->arch.sie_block->prefix; + + start = vcpu->arch.guest_gprs[(vcpu->arch.sie_block->ipa & 0xf0) >> 4]; + end = vcpu->arch.guest_gprs[vcpu->arch.sie_block->ipa & 0xf] + 4096; + + if (start & ~PAGE_MASK || end & ~PAGE_MASK || start > end + || start < 2 * PAGE_SIZE) + return kvm_s390_inject_program_int(vcpu, PGM_SPECIFICATION); + + VCPU_EVENT(vcpu, 5, "diag release pages %lX %lX", start, end); + vcpu->stat.diagnose_10++; + + /* we checked for start > end above */ + if (end < prefix || start >= prefix + 2 * PAGE_SIZE) { + gmap_discard(start, end, vcpu->arch.gmap); + } else { + if (start < prefix) + gmap_discard(start, prefix, vcpu->arch.gmap); + if (end >= prefix) + gmap_discard(prefix + 2 * PAGE_SIZE, + end, vcpu->arch.gmap); + } + return 0; +} + static int __diag_time_slice_end(struct kvm_vcpu *vcpu) { VCPU_EVENT(vcpu, 5, "%s", "diag time slice end"); @@ -57,6 +85,8 @@ int kvm_s390_handle_diag(struct kvm_vcpu *vcpu) int code = (vcpu->arch.sie_block->ipb & 0xfff0000) >> 16; switch (code) { + case 0x10: + return diag_release_pages(vcpu); case 0x44: return __diag_time_slice_end(vcpu); case 0x308: diff --git a/arch/s390/kvm/kvm-s390.c b/arch/s390/kvm/kvm-s390.c index dc2b580e27bc..189d6bdcac08 100644 --- a/arch/s390/kvm/kvm-s390.c +++ b/arch/s390/kvm/kvm-s390.c @@ -69,6 +69,7 @@ struct kvm_stats_debugfs_item debugfs_entries[] = { { "instruction_sigp_set_arch", VCPU_STAT(instruction_sigp_arch) }, { "instruction_sigp_set_prefix", VCPU_STAT(instruction_sigp_prefix) }, { "instruction_sigp_restart", VCPU_STAT(instruction_sigp_restart) }, + { "diagnose_10", VCPU_STAT(diagnose_10) }, { "diagnose_44", VCPU_STAT(diagnose_44) }, { NULL } }; diff --git a/arch/s390/mm/pgtable.c b/arch/s390/mm/pgtable.c index 441d34445d0e..301c84d3b542 100644 --- a/arch/s390/mm/pgtable.c +++ b/arch/s390/mm/pgtable.c @@ -1,5 +1,5 @@ /* - * Copyright IBM Corp. 2007,2009 + * Copyright IBM Corp. 2007,2011 * Author(s): Martin Schwidefsky */ @@ -478,6 +478,53 @@ unsigned long gmap_fault(unsigned long address, struct gmap *gmap) } EXPORT_SYMBOL_GPL(gmap_fault); +void gmap_discard(unsigned long from, unsigned long to, struct gmap *gmap) +{ + + unsigned long *table, address, size; + struct vm_area_struct *vma; + struct gmap_pgtable *mp; + struct page *page; + + down_read(&gmap->mm->mmap_sem); + address = from; + while (address < to) { + /* Walk the gmap address space page table */ + table = gmap->table + ((address >> 53) & 0x7ff); + if (unlikely(*table & _REGION_ENTRY_INV)) { + address = (address + PMD_SIZE) & PMD_MASK; + continue; + } + table = (unsigned long *)(*table & _REGION_ENTRY_ORIGIN); + table = table + ((address >> 42) & 0x7ff); + if (unlikely(*table & _REGION_ENTRY_INV)) { + address = (address + PMD_SIZE) & PMD_MASK; + continue; + } + table = (unsigned long *)(*table & _REGION_ENTRY_ORIGIN); + table = table + ((address >> 31) & 0x7ff); + if (unlikely(*table & _REGION_ENTRY_INV)) { + address = (address + PMD_SIZE) & PMD_MASK; + continue; + } + table = (unsigned long *)(*table & _REGION_ENTRY_ORIGIN); + table = table + ((address >> 20) & 0x7ff); + if (unlikely(*table & _SEGMENT_ENTRY_INV)) { + address = (address + PMD_SIZE) & PMD_MASK; + continue; + } + page = pfn_to_page(*table >> PAGE_SHIFT); + mp = (struct gmap_pgtable *) page->index; + vma = find_vma(gmap->mm, mp->vmaddr); + size = min(to - address, PMD_SIZE - (address & ~PMD_MASK)); + zap_page_range(vma, mp->vmaddr | (address & ~PMD_MASK), + size, NULL); + address = (address + PMD_SIZE) & PMD_MASK; + } + up_read(&gmap->mm->mmap_sem); +} +EXPORT_SYMBOL_GPL(gmap_discard); + void gmap_unmap_notifier(struct mm_struct *mm, unsigned long *table) { struct gmap_rmap *rmap, *next; -- cgit v1.2.3 From 80376f347d70ce5fcfb98105d83624518e0911d6 Mon Sep 17 00:00:00 2001 From: Jan Glauber Date: Sun, 30 Oct 2011 15:17:04 +0100 Subject: [S390] Introduce get_clock_fast() Add get_clock_fast() which uses the slightly faster stckf if available. If stckf is not available fall back to stck, which has the same width. Signed-off-by: Jan Glauber Signed-off-by: Martin Schwidefsky --- arch/s390/include/asm/timex.h | 11 +++++++++++ 1 file changed, 11 insertions(+) (limited to 'arch/s390') diff --git a/arch/s390/include/asm/timex.h b/arch/s390/include/asm/timex.h index 88829a40af6f..d610bef9c5e9 100644 --- a/arch/s390/include/asm/timex.h +++ b/arch/s390/include/asm/timex.h @@ -86,6 +86,17 @@ static inline void get_clock_ext(char *clk) asm volatile("stcke %0" : "=Q" (*clk) : : "cc"); } +static inline unsigned long long get_clock_fast(void) +{ + unsigned long long clk; + + if (test_facility(25)) + asm volatile(".insn s,0xb27c0000,%0" : "=Q" (clk) : : "cc"); + else + clk = get_clock(); + return clk; +} + static inline unsigned long long get_clock_xt(void) { unsigned char clk[16]; -- cgit v1.2.3 From 399c1d8dbfdcf46977fd2e2a833b02e18a284810 Mon Sep 17 00:00:00 2001 From: Martin Schwidefsky Date: Sun, 30 Oct 2011 15:17:10 +0100 Subject: [S390] sparse: fix access past end of array warnings Remove unnecessary code to avoid false positives from sparse, e.g. arch/s390/kernel/compat_signal.c:221:61: warning: invalid access past the end of 'set32' (8 8) Signed-off-by: Martin Schwidefsky --- arch/s390/kernel/compat_linux.c | 23 +++++------------------ arch/s390/kernel/compat_signal.c | 28 ++++------------------------ 2 files changed, 9 insertions(+), 42 deletions(-) (limited to 'arch/s390') diff --git a/arch/s390/kernel/compat_linux.c b/arch/s390/kernel/compat_linux.c index 24b218737b9b..84a982898448 100644 --- a/arch/s390/kernel/compat_linux.c +++ b/arch/s390/kernel/compat_linux.c @@ -362,12 +362,7 @@ asmlinkage long sys32_rt_sigprocmask(int how, compat_sigset_t __user *set, if (set) { if (copy_from_user (&s32, set, sizeof(compat_sigset_t))) return -EFAULT; - switch (_NSIG_WORDS) { - case 4: s.sig[3] = s32.sig[6] | (((long)s32.sig[7]) << 32); - case 3: s.sig[2] = s32.sig[4] | (((long)s32.sig[5]) << 32); - case 2: s.sig[1] = s32.sig[2] | (((long)s32.sig[3]) << 32); - case 1: s.sig[0] = s32.sig[0] | (((long)s32.sig[1]) << 32); - } + s.sig[0] = s32.sig[0] | (((long)s32.sig[1]) << 32); } set_fs (KERNEL_DS); ret = sys_rt_sigprocmask(how, @@ -377,12 +372,8 @@ asmlinkage long sys32_rt_sigprocmask(int how, compat_sigset_t __user *set, set_fs (old_fs); if (ret) return ret; if (oset) { - switch (_NSIG_WORDS) { - case 4: s32.sig[7] = (s.sig[3] >> 32); s32.sig[6] = s.sig[3]; - case 3: s32.sig[5] = (s.sig[2] >> 32); s32.sig[4] = s.sig[2]; - case 2: s32.sig[3] = (s.sig[1] >> 32); s32.sig[2] = s.sig[1]; - case 1: s32.sig[1] = (s.sig[0] >> 32); s32.sig[0] = s.sig[0]; - } + s32.sig[1] = (s.sig[0] >> 32); + s32.sig[0] = s.sig[0]; if (copy_to_user (oset, &s32, sizeof(compat_sigset_t))) return -EFAULT; } @@ -401,12 +392,8 @@ asmlinkage long sys32_rt_sigpending(compat_sigset_t __user *set, ret = sys_rt_sigpending((sigset_t __force __user *) &s, sigsetsize); set_fs (old_fs); if (!ret) { - switch (_NSIG_WORDS) { - case 4: s32.sig[7] = (s.sig[3] >> 32); s32.sig[6] = s.sig[3]; - case 3: s32.sig[5] = (s.sig[2] >> 32); s32.sig[4] = s.sig[2]; - case 2: s32.sig[3] = (s.sig[1] >> 32); s32.sig[2] = s.sig[1]; - case 1: s32.sig[1] = (s.sig[0] >> 32); s32.sig[0] = s.sig[0]; - } + s32.sig[1] = (s.sig[0] >> 32); + s32.sig[0] = s.sig[0]; if (copy_to_user (set, &s32, sizeof(compat_sigset_t))) return -EFAULT; } diff --git a/arch/s390/kernel/compat_signal.c b/arch/s390/kernel/compat_signal.c index a1dc6457016c..c68ea9c18046 100644 --- a/arch/s390/kernel/compat_signal.c +++ b/arch/s390/kernel/compat_signal.c @@ -213,16 +213,8 @@ sys32_rt_sigaction(int sig, const struct sigaction32 __user *act, ret = get_user(sa_handler, &act->sa_handler); ret |= __copy_from_user(&set32, &act->sa_mask, sizeof(compat_sigset_t)); - switch (_NSIG_WORDS) { - case 4: new_ka.sa.sa_mask.sig[3] = set32.sig[6] - | (((long)set32.sig[7]) << 32); - case 3: new_ka.sa.sa_mask.sig[2] = set32.sig[4] - | (((long)set32.sig[5]) << 32); - case 2: new_ka.sa.sa_mask.sig[1] = set32.sig[2] - | (((long)set32.sig[3]) << 32); - case 1: new_ka.sa.sa_mask.sig[0] = set32.sig[0] - | (((long)set32.sig[1]) << 32); - } + new_ka.sa.sa_mask.sig[0] = + set32.sig[0] | (((long)set32.sig[1]) << 32); ret |= __get_user(new_ka.sa.sa_flags, &act->sa_flags); if (ret) @@ -233,20 +225,8 @@ sys32_rt_sigaction(int sig, const struct sigaction32 __user *act, ret = do_sigaction(sig, act ? &new_ka : NULL, oact ? &old_ka : NULL); if (!ret && oact) { - switch (_NSIG_WORDS) { - case 4: - set32.sig[7] = (old_ka.sa.sa_mask.sig[3] >> 32); - set32.sig[6] = old_ka.sa.sa_mask.sig[3]; - case 3: - set32.sig[5] = (old_ka.sa.sa_mask.sig[2] >> 32); - set32.sig[4] = old_ka.sa.sa_mask.sig[2]; - case 2: - set32.sig[3] = (old_ka.sa.sa_mask.sig[1] >> 32); - set32.sig[2] = old_ka.sa.sa_mask.sig[1]; - case 1: - set32.sig[1] = (old_ka.sa.sa_mask.sig[0] >> 32); - set32.sig[0] = old_ka.sa.sa_mask.sig[0]; - } + set32.sig[1] = (old_ka.sa.sa_mask.sig[0] >> 32); + set32.sig[0] = old_ka.sa.sa_mask.sig[0]; ret = put_user((unsigned long)old_ka.sa.sa_handler, &oact->sa_handler); ret |= __copy_to_user(&oact->sa_mask, &set32, sizeof(compat_sigset_t)); -- cgit v1.2.3 From c4736d968254d71eba6814b2234a4e63f40aca15 Mon Sep 17 00:00:00 2001 From: Martin Schwidefsky Date: Sun, 30 Oct 2011 15:17:11 +0100 Subject: [S390] sparse: fix sparse static warnings Make functions and data static to avoid sparse warnings. Signed-off-by: Martin Schwidefsky --- arch/s390/boot/compressed/misc.c | 2 +- arch/s390/kernel/ipl.c | 8 ++++---- arch/s390/kernel/kprobes.c | 2 +- arch/s390/kernel/vtime.c | 2 +- 4 files changed, 7 insertions(+), 7 deletions(-) (limited to 'arch/s390') diff --git a/arch/s390/boot/compressed/misc.c b/arch/s390/boot/compressed/misc.c index 028f23ea81d1..465eca756feb 100644 --- a/arch/s390/boot/compressed/misc.c +++ b/arch/s390/boot/compressed/misc.c @@ -61,7 +61,7 @@ static unsigned long free_mem_end_ptr; extern _sclp_print_early(const char *); -int puts(const char *s) +static int puts(const char *s) { _sclp_print_early(s); return 0; diff --git a/arch/s390/kernel/ipl.c b/arch/s390/kernel/ipl.c index 296458360a32..6296809c8e02 100644 --- a/arch/s390/kernel/ipl.c +++ b/arch/s390/kernel/ipl.c @@ -276,8 +276,8 @@ static ssize_t ipl_type_show(struct kobject *kobj, struct kobj_attribute *attr, static struct kobj_attribute sys_ipl_type_attr = __ATTR_RO(ipl_type); /* VM IPL PARM routines */ -size_t reipl_get_ascii_vmparm(char *dest, size_t size, - const struct ipl_parameter_block *ipb) +static size_t reipl_get_ascii_vmparm(char *dest, size_t size, + const struct ipl_parameter_block *ipb) { int i; size_t len; @@ -339,8 +339,8 @@ static size_t scpdata_length(const char* buf, size_t count) return count; } -size_t reipl_append_ascii_scpdata(char *dest, size_t size, - const struct ipl_parameter_block *ipb) +static size_t reipl_append_ascii_scpdata(char *dest, size_t size, + const struct ipl_parameter_block *ipb) { size_t count; size_t i; diff --git a/arch/s390/kernel/kprobes.c b/arch/s390/kernel/kprobes.c index 1d05d669107c..64b761aef004 100644 --- a/arch/s390/kernel/kprobes.c +++ b/arch/s390/kernel/kprobes.c @@ -635,7 +635,7 @@ void __kprobes jprobe_return(void) asm volatile(".word 0x0002"); } -void __kprobes jprobe_return_end(void) +static void __used __kprobes jprobe_return_end(void) { asm volatile("bcr 0,0"); } diff --git a/arch/s390/kernel/vtime.c b/arch/s390/kernel/vtime.c index 4f3de980b0e5..bb48977f5469 100644 --- a/arch/s390/kernel/vtime.c +++ b/arch/s390/kernel/vtime.c @@ -461,7 +461,7 @@ void add_virt_timer_periodic(void *new) } EXPORT_SYMBOL(add_virt_timer_periodic); -int __mod_vtimer(struct vtimer_list *timer, __u64 expires, int periodic) +static int __mod_vtimer(struct vtimer_list *timer, __u64 expires, int periodic) { struct vtimer_queue *vq; unsigned long flags; -- cgit v1.2.3 From e54aafa0c3bef84bfd39b4c713942ae7cdcfc58a Mon Sep 17 00:00:00 2001 From: Martin Schwidefsky Date: Sun, 30 Oct 2011 15:17:12 +0100 Subject: [S390] sparse: fix sparse ANSI-C warnings Fix prototype of some functions in arch/s390/oprofile to avoid non-ANSI warnings from sparse. Signed-off-by: Martin Schwidefsky --- arch/s390/oprofile/hwsampler.c | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) (limited to 'arch/s390') diff --git a/arch/s390/oprofile/hwsampler.c b/arch/s390/oprofile/hwsampler.c index 4552ce40c81a..f43c0e4282af 100644 --- a/arch/s390/oprofile/hwsampler.c +++ b/arch/s390/oprofile/hwsampler.c @@ -994,7 +994,7 @@ allocate_error: * * Returns 0 on success, !0 on failure. */ -int hwsampler_deallocate() +int hwsampler_deallocate(void) { int rc; @@ -1035,7 +1035,7 @@ unsigned long hwsampler_get_sample_overflow_count(unsigned int cpu) return cb->sample_overflow; } -int hwsampler_setup() +int hwsampler_setup(void) { int rc; int cpu; @@ -1102,7 +1102,7 @@ setup_exit: return rc; } -int hwsampler_shutdown() +int hwsampler_shutdown(void) { int rc; @@ -1203,7 +1203,7 @@ start_all_exit: * * Returns 0 on success, !0 on failure. */ -int hwsampler_stop_all() +int hwsampler_stop_all(void) { int tmp_rc, rc, cpu; struct hws_cpu_buffer *cb; -- cgit v1.2.3 From 638ad34a8811119b32247b7722288ef8b90907d1 Mon Sep 17 00:00:00 2001 From: Martin Schwidefsky Date: Sun, 30 Oct 2011 15:17:13 +0100 Subject: [S390] sparse: fix sparse warnings about missing prototypes Add prototypes and includes for functions used in different modules. Signed-off-by: Martin Schwidefsky --- arch/s390/include/asm/page.h | 1 + arch/s390/include/asm/processor.h | 2 ++ arch/s390/include/asm/spinlock.h | 2 ++ arch/s390/include/asm/system.h | 2 ++ arch/s390/kernel/entry.h | 13 +++++++++++-- arch/s390/kernel/ipl.c | 1 + arch/s390/kernel/process.c | 1 + arch/s390/kernel/suspend.c | 1 + arch/s390/kernel/time.c | 1 + arch/s390/mm/mmap.c | 1 + arch/s390/mm/pageattr.c | 1 + 11 files changed, 24 insertions(+), 2 deletions(-) (limited to 'arch/s390') diff --git a/arch/s390/include/asm/page.h b/arch/s390/include/asm/page.h index accb372ddc7e..f7ec548c2b9d 100644 --- a/arch/s390/include/asm/page.h +++ b/arch/s390/include/asm/page.h @@ -177,6 +177,7 @@ static inline int page_test_and_clear_young(unsigned long pfn) struct page; void arch_free_page(struct page *page, int order); void arch_alloc_page(struct page *page, int order); +void arch_set_page_states(int make_stable); static inline int devmem_is_allowed(unsigned long pfn) { diff --git a/arch/s390/include/asm/processor.h b/arch/s390/include/asm/processor.h index 20ffb12d4ae9..5f33d37d032c 100644 --- a/arch/s390/include/asm/processor.h +++ b/arch/s390/include/asm/processor.h @@ -33,6 +33,8 @@ static inline void get_cpu_id(struct cpuid *ptr) extern void s390_adjust_jiffies(void); extern int get_cpu_capability(unsigned int *); +extern const struct seq_operations cpuinfo_op; +extern int sysctl_ieee_emulation_warnings; /* * User space process size: 2GB for 31 bit, 4TB or 8PT for 64 bit. diff --git a/arch/s390/include/asm/spinlock.h b/arch/s390/include/asm/spinlock.h index 56612fc8186e..fd94dfec8d08 100644 --- a/arch/s390/include/asm/spinlock.h +++ b/arch/s390/include/asm/spinlock.h @@ -13,6 +13,8 @@ #include +extern int spin_retry; + static inline int _raw_compare_and_swap(volatile unsigned int *lock, unsigned int old, unsigned int new) diff --git a/arch/s390/include/asm/system.h b/arch/s390/include/asm/system.h index d73cc6b60000..ef573c1d71a7 100644 --- a/arch/s390/include/asm/system.h +++ b/arch/s390/include/asm/system.h @@ -20,6 +20,8 @@ struct task_struct; +extern int sysctl_userprocess_debug; + extern struct task_struct *__switch_to(void *, void *); extern void update_per_regs(struct task_struct *task); diff --git a/arch/s390/kernel/entry.h b/arch/s390/kernel/entry.h index 66729eb7bbc5..ef8fb1d6e8d7 100644 --- a/arch/s390/kernel/entry.h +++ b/arch/s390/kernel/entry.h @@ -5,24 +5,33 @@ #include #include + +extern void (*pgm_check_table[128])(struct pt_regs *, long, unsigned long); +extern void *restart_stack; + +asmlinkage long do_syscall_trace_enter(struct pt_regs *regs); +asmlinkage void do_syscall_trace_exit(struct pt_regs *regs); + void do_protection_exception(struct pt_regs *, long, unsigned long); void do_dat_exception(struct pt_regs *, long, unsigned long); void do_asce_exception(struct pt_regs *, long, unsigned long); -extern int sysctl_userprocess_debug; - void do_per_trap(struct pt_regs *regs); void syscall_trace(struct pt_regs *regs, int entryexit); void kernel_stack_overflow(struct pt_regs * regs); void do_signal(struct pt_regs *regs); int handle_signal32(unsigned long sig, struct k_sigaction *ka, siginfo_t *info, sigset_t *oldset, struct pt_regs *regs); +void do_notify_resume(struct pt_regs *regs); void do_extint(struct pt_regs *regs, unsigned int, unsigned int, unsigned long); +void do_restart(void); int __cpuinit start_secondary(void *cpuvoid); void __init startup_init(void); void die(const char * str, struct pt_regs * regs, long err); +void __init time_init(void); + struct s390_mmap_arg_struct; struct fadvise64_64_args; struct old_sigaction; diff --git a/arch/s390/kernel/ipl.c b/arch/s390/kernel/ipl.c index 6296809c8e02..affa8e68124a 100644 --- a/arch/s390/kernel/ipl.c +++ b/arch/s390/kernel/ipl.c @@ -27,6 +27,7 @@ #include #include #include +#include "entry.h" #define IPL_PARM_BLOCK_VERSION 0 diff --git a/arch/s390/kernel/process.c b/arch/s390/kernel/process.c index 5e64a9a29ea4..9451b210a1b4 100644 --- a/arch/s390/kernel/process.c +++ b/arch/s390/kernel/process.c @@ -12,6 +12,7 @@ #include #include #include +#include #include #include #include diff --git a/arch/s390/kernel/suspend.c b/arch/s390/kernel/suspend.c index b6f9afed74ec..47df775c844d 100644 --- a/arch/s390/kernel/suspend.c +++ b/arch/s390/kernel/suspend.c @@ -7,6 +7,7 @@ */ #include +#include #include #include diff --git a/arch/s390/kernel/time.c b/arch/s390/kernel/time.c index 8d65bd0383fc..ebbfab3c6e5a 100644 --- a/arch/s390/kernel/time.c +++ b/arch/s390/kernel/time.c @@ -48,6 +48,7 @@ #include #include #include +#include "entry.h" /* change this if you have some constant time drift */ #define USECS_PER_JIFFY ((unsigned long) 1000000/HZ) diff --git a/arch/s390/mm/mmap.c b/arch/s390/mm/mmap.c index c9a9f7f18188..f09c74881b7e 100644 --- a/arch/s390/mm/mmap.c +++ b/arch/s390/mm/mmap.c @@ -26,6 +26,7 @@ #include #include +#include #include #include #include diff --git a/arch/s390/mm/pageattr.c b/arch/s390/mm/pageattr.c index d013ed39743b..b36537a5f43e 100644 --- a/arch/s390/mm/pageattr.c +++ b/arch/s390/mm/pageattr.c @@ -5,6 +5,7 @@ #include #include #include +#include #include static void change_page_attr(unsigned long addr, int numpages, -- cgit v1.2.3 From 5b479a79bff752c6719463f093bdd191f4b837db Mon Sep 17 00:00:00 2001 From: Martin Schwidefsky Date: Sun, 30 Oct 2011 15:17:14 +0100 Subject: [S390] sparse: fix sparse warnings in math-emu Fix three sparse warnings in math-emu / sysinfo: arch/s390/kernel/sysinfo.c:448:17: error: return expression in void function arch/s390/kernel/sysinfo.c:445:25: warning: shift too big (32) for type unsigned int arch/s390/kernel/sysinfo.c:445:25: warning: shift too big (32) for type unsigned int Signed-off-by: Martin Schwidefsky --- arch/s390/include/asm/sfp-util.h | 2 +- arch/s390/kernel/sysinfo.c | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) (limited to 'arch/s390') diff --git a/arch/s390/include/asm/sfp-util.h b/arch/s390/include/asm/sfp-util.h index 0addc6466d95..ca3f8814e361 100644 --- a/arch/s390/include/asm/sfp-util.h +++ b/arch/s390/include/asm/sfp-util.h @@ -72,6 +72,6 @@ extern unsigned long __udiv_qrnnd (unsigned int *, unsigned int, #define UDIV_NEEDS_NORMALIZATION 0 -#define abort() return 0 +#define abort() BUG() #define __BYTE_ORDER __BIG_ENDIAN diff --git a/arch/s390/kernel/sysinfo.c b/arch/s390/kernel/sysinfo.c index 5c9e439bf3f6..2a94b774695c 100644 --- a/arch/s390/kernel/sysinfo.c +++ b/arch/s390/kernel/sysinfo.c @@ -442,7 +442,7 @@ void s390_adjust_jiffies(void) */ FP_UNPACK_SP(SA, &fmil); if ((info->capability >> 23) == 0) - FP_FROM_INT_S(SB, info->capability, 32, int); + FP_FROM_INT_S(SB, (long) info->capability, 64, long); else FP_UNPACK_SP(SB, &info->capability); FP_DIV_S(SR, SA, SB); -- cgit v1.2.3 From 3c52e49d7c81434e3d2ccb520b3a654c2cc7d03d Mon Sep 17 00:00:00 2001 From: Martin Schwidefsky Date: Sun, 30 Oct 2011 15:17:15 +0100 Subject: [S390] sparse: fix sparse warnings with __user pointers Use __force to quiet sparse warnings about user address space. Signed-off-by: Martin Schwidefsky --- arch/s390/kernel/compat_signal.c | 23 ++++++++++++----------- arch/s390/kernel/traps.c | 3 ++- arch/s390/lib/uaccess_pt.c | 6 ++++-- 3 files changed, 18 insertions(+), 14 deletions(-) (limited to 'arch/s390') diff --git a/arch/s390/kernel/compat_signal.c b/arch/s390/kernel/compat_signal.c index c68ea9c18046..4f68c81d3ffa 100644 --- a/arch/s390/kernel/compat_signal.c +++ b/arch/s390/kernel/compat_signal.c @@ -141,7 +141,8 @@ int copy_siginfo_from_user32(siginfo_t *to, compat_siginfo_t __user *from) break; case __SI_FAULT >> 16: err |= __get_user(tmp, &from->si_addr); - to->si_addr = (void __user *)(u64) (tmp & PSW32_ADDR_INSN); + to->si_addr = (void __force __user *) + (u64) (tmp & PSW32_ADDR_INSN); break; case __SI_POLL >> 16: err |= __get_user(to->si_band, &from->si_band); @@ -482,7 +483,7 @@ static int setup_frame32(int sig, struct k_sigaction *ka, } else { regs->gprs[14] = (__u64) frame->retcode | PSW32_ADDR_AMODE; if (__put_user(S390_SYSCALL_OPCODE | __NR_sigreturn, - (u16 __user *)(frame->retcode))) + (u16 __force __user *)(frame->retcode))) goto give_sigsegv; } @@ -491,12 +492,12 @@ static int setup_frame32(int sig, struct k_sigaction *ka, goto give_sigsegv; /* Set up registers for signal handler */ - regs->gprs[15] = (__u64) frame; + regs->gprs[15] = (__force __u64) frame; regs->psw.mask |= PSW_MASK_BA; /* force amode 31 */ - regs->psw.addr = (__u64) ka->sa.sa_handler; + regs->psw.addr = (__force __u64) ka->sa.sa_handler; regs->gprs[2] = map_signal(sig); - regs->gprs[3] = (__u64) &frame->sc; + regs->gprs[3] = (__force __u64) &frame->sc; /* We forgot to include these in the sigcontext. To avoid breaking binary compatibility, they are passed as args. */ @@ -504,7 +505,7 @@ static int setup_frame32(int sig, struct k_sigaction *ka, regs->gprs[5] = current->thread.prot_addr; /* Place signal number on stack to allow backtrace from handler. */ - if (__put_user(regs->gprs[2], (int __user *) &frame->signo)) + if (__put_user(regs->gprs[2], (int __force __user *) &frame->signo)) goto give_sigsegv; return 0; @@ -547,21 +548,21 @@ static int setup_rt_frame32(int sig, struct k_sigaction *ka, siginfo_t *info, } else { regs->gprs[14] = (__u64) frame->retcode; err |= __put_user(S390_SYSCALL_OPCODE | __NR_rt_sigreturn, - (u16 __user *)(frame->retcode)); + (u16 __force __user *)(frame->retcode)); } /* Set up backchain. */ - if (__put_user(regs->gprs[15], (unsigned int __user *) frame)) + if (__put_user(regs->gprs[15], (unsigned int __force __user *) frame)) goto give_sigsegv; /* Set up registers for signal handler */ - regs->gprs[15] = (__u64) frame; + regs->gprs[15] = (__force __u64) frame; regs->psw.mask |= PSW_MASK_BA; /* force amode 31 */ regs->psw.addr = (__u64) ka->sa.sa_handler; regs->gprs[2] = map_signal(sig); - regs->gprs[3] = (__u64) &frame->info; - regs->gprs[4] = (__u64) &frame->uc; + regs->gprs[3] = (__force __u64) &frame->info; + regs->gprs[4] = (__force __u64) &frame->uc; return 0; give_sigsegv: diff --git a/arch/s390/kernel/traps.c b/arch/s390/kernel/traps.c index 79eee3f27afb..a9807dd86276 100644 --- a/arch/s390/kernel/traps.c +++ b/arch/s390/kernel/traps.c @@ -334,7 +334,8 @@ void __kprobes do_per_trap(struct pt_regs *regs) info.si_signo = SIGTRAP; info.si_errno = 0; info.si_code = TRAP_HWBKPT; - info.si_addr = (void *) current->thread.per_event.address; + info.si_addr = + (void __force __user *) current->thread.per_event.address; force_sig_info(SIGTRAP, &info, current); } diff --git a/arch/s390/lib/uaccess_pt.c b/arch/s390/lib/uaccess_pt.c index 74833831417f..342ae35a5ba9 100644 --- a/arch/s390/lib/uaccess_pt.c +++ b/arch/s390/lib/uaccess_pt.c @@ -342,7 +342,8 @@ int futex_atomic_op_pt(int op, u32 __user *uaddr, int oparg, int *old) if (segment_eq(get_fs(), KERNEL_DS)) return __futex_atomic_op_pt(op, uaddr, oparg, old); spin_lock(¤t->mm->page_table_lock); - uaddr = (int __user *) __dat_user_addr((unsigned long) uaddr); + uaddr = (u32 __force __user *) + __dat_user_addr((__force unsigned long) uaddr); if (!uaddr) { spin_unlock(¤t->mm->page_table_lock); return -EFAULT; @@ -378,7 +379,8 @@ int futex_atomic_cmpxchg_pt(u32 *uval, u32 __user *uaddr, if (segment_eq(get_fs(), KERNEL_DS)) return __futex_atomic_cmpxchg_pt(uval, uaddr, oldval, newval); spin_lock(¤t->mm->page_table_lock); - uaddr = (int __user *) __dat_user_addr((unsigned long) uaddr); + uaddr = (u32 __force __user *) + __dat_user_addr((__force unsigned long) uaddr); if (!uaddr) { spin_unlock(¤t->mm->page_table_lock); return -EFAULT; -- cgit v1.2.3 From e1c4d0142d32f97706b752d94c9e20c3f21901f8 Mon Sep 17 00:00:00 2001 From: Jan Glauber Date: Sun, 30 Oct 2011 15:17:18 +0100 Subject: [S390] add missing __tlb_flush_global() for !CONFIG_SMP MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Fix this compiler error for !CONFIG_SMP: CC arch/s390/mm/pgtable.o arch/s390/mm/pgtable.c: In function ‘gmap_flush_tlb’: arch/s390/mm/pgtable.c:202:3: error: implicit declaration of function ‘__tlb_flush_global’ [-Werror=implicit-function-declaration] cc1: some warnings being treated as errors Signed-off-by: Jan Glauber Signed-off-by: Martin Schwidefsky --- arch/s390/include/asm/tlbflush.h | 1 + 1 file changed, 1 insertion(+) (limited to 'arch/s390') diff --git a/arch/s390/include/asm/tlbflush.h b/arch/s390/include/asm/tlbflush.h index 304445382382..1d8648cf2fea 100644 --- a/arch/s390/include/asm/tlbflush.h +++ b/arch/s390/include/asm/tlbflush.h @@ -59,6 +59,7 @@ static inline void __tlb_flush_full(struct mm_struct *mm) } #else #define __tlb_flush_full(mm) __tlb_flush_local() +#define __tlb_flush_global() __tlb_flush_local() #endif /* -- cgit v1.2.3 From 2a3a2d66aa4e5abaf8f9222d21735321f02a00dc Mon Sep 17 00:00:00 2001 From: Heiko Carstens Date: Sun, 30 Oct 2011 15:17:19 +0100 Subject: [S390] irqstats: split IPI interrupt accounting We use both the external call and emergency call IPIs to signal remote cpus. Therefore it makes sense to account them differently withing /proc/irqstats so we actually know what happened. Signed-off-by: Heiko Carstens Signed-off-by: Martin Schwidefsky --- arch/s390/include/asm/irq.h | 3 ++- arch/s390/kernel/irq.c | 3 ++- arch/s390/kernel/smp.c | 5 ++++- 3 files changed, 8 insertions(+), 3 deletions(-) (limited to 'arch/s390') diff --git a/arch/s390/include/asm/irq.h b/arch/s390/include/asm/irq.h index eadfa9fc03ba..ba6d85f88d50 100644 --- a/arch/s390/include/asm/irq.h +++ b/arch/s390/include/asm/irq.h @@ -8,7 +8,8 @@ enum interruption_class { EXTERNAL_INTERRUPT, IO_INTERRUPT, EXTINT_CLK, - EXTINT_IPI, + EXTINT_EXC, + EXTINT_EMS, EXTINT_TMR, EXTINT_TLA, EXTINT_PFL, diff --git a/arch/s390/kernel/irq.c b/arch/s390/kernel/irq.c index 30faaef34457..b9a7fdd9c814 100644 --- a/arch/s390/kernel/irq.c +++ b/arch/s390/kernel/irq.c @@ -33,7 +33,8 @@ static const struct irq_class intrclass_names[] = { {.name = "EXT" }, {.name = "I/O" }, {.name = "CLK", .desc = "[EXT] Clock Comparator" }, - {.name = "IPI", .desc = "[EXT] Signal Processor" }, + {.name = "EXC", .desc = "[EXT] External Call" }, + {.name = "EMS", .desc = "[EXT] Emergency Signal" }, {.name = "TMR", .desc = "[EXT] CPU Timer" }, {.name = "TAL", .desc = "[EXT] Timing Alert" }, {.name = "PFL", .desc = "[EXT] Pseudo Page Fault" }, diff --git a/arch/s390/kernel/smp.c b/arch/s390/kernel/smp.c index 3bde5688ceb5..3ea872890da2 100644 --- a/arch/s390/kernel/smp.c +++ b/arch/s390/kernel/smp.c @@ -187,7 +187,10 @@ static void do_ext_call_interrupt(unsigned int ext_int_code, { unsigned long bits; - kstat_cpu(smp_processor_id()).irqs[EXTINT_IPI]++; + if (ext_int_code == 0x1202) + kstat_cpu(smp_processor_id()).irqs[EXTINT_EXC]++; + else + kstat_cpu(smp_processor_id()).irqs[EXTINT_EMS]++; /* * handle bit signal external calls */ -- cgit v1.2.3 From 07ea815b22b9f70ec8de6ddf8db63a1dd1585caf Mon Sep 17 00:00:00 2001 From: Michael Holzheu Date: Sun, 30 Oct 2011 15:17:21 +0100 Subject: [S390] Remove error checking from copy_oldmem_page() Currently it can happen that the pre-allocated ELF header contains a wrong memory map which would result in errors when copying /proc/vmcore. In order to still get a valid vmcore, we (temporarily) disable the error checking in copy_oldmem_page(). This will then produce zero pages for those memory regions. Signed-off-by: Michael Holzheu Signed-off-by: Martin Schwidefsky --- arch/s390/kernel/crash_dump.c | 9 ++++----- 1 file changed, 4 insertions(+), 5 deletions(-) (limited to 'arch/s390') diff --git a/arch/s390/kernel/crash_dump.c b/arch/s390/kernel/crash_dump.c index 2a9a3f405574..39f8fd4438fc 100644 --- a/arch/s390/kernel/crash_dump.c +++ b/arch/s390/kernel/crash_dump.c @@ -31,7 +31,6 @@ ssize_t copy_oldmem_page(unsigned long pfn, char *buf, size_t csize, unsigned long offset, int userbuf) { unsigned long src; - int rc; if (!csize) return 0; @@ -43,11 +42,11 @@ ssize_t copy_oldmem_page(unsigned long pfn, char *buf, src < OLDMEM_BASE + OLDMEM_SIZE) src -= OLDMEM_BASE; if (userbuf) - rc = copy_to_user_real((void __user *) buf, (void *) src, - csize); + copy_to_user_real((void __force __user *) buf, (void *) src, + csize); else - rc = memcpy_real(buf, (void *) src, csize); - return rc < 0 ? rc : csize; + memcpy_real(buf, (void *) src, csize); + return csize; } /* -- cgit v1.2.3 From 3a4c5d5964ed43a5524f6d289fb4cd37d39f3f1a Mon Sep 17 00:00:00 2001 From: Heiko Carstens Date: Sat, 30 Jul 2011 09:25:15 +0200 Subject: s390: add missing module.h/export.h includes Fix several compile errors on s390 caused by splitting module.h. Some include additions [e.g. qdio_setup.c, zfcp_qdio.c] are in anticipation of pending changes queued for s390 that increase the modular use footprint. [PG: added additional obvious changes since Heiko's original patch] Signed-off-by: Heiko Carstens Signed-off-by: Paul Gortmaker --- arch/s390/crypto/sha_common.c | 1 + arch/s390/mm/init.c | 1 + 2 files changed, 2 insertions(+) (limited to 'arch/s390') diff --git a/arch/s390/crypto/sha_common.c b/arch/s390/crypto/sha_common.c index 48884f89ab92..bd37d09b9d3c 100644 --- a/arch/s390/crypto/sha_common.c +++ b/arch/s390/crypto/sha_common.c @@ -14,6 +14,7 @@ */ #include +#include #include "sha.h" #include "crypt_s390.h" diff --git a/arch/s390/mm/init.c b/arch/s390/mm/init.c index 59b663109d90..d4b9fb4d0042 100644 --- a/arch/s390/mm/init.c +++ b/arch/s390/mm/init.c @@ -26,6 +26,7 @@ #include #include #include +#include #include #include #include -- cgit v1.2.3 From dc78610228fdcf84a45ff030ca6a7f0fc4913319 Mon Sep 17 00:00:00 2001 From: Miklos Szeredi Date: Fri, 28 Oct 2011 14:13:24 +0200 Subject: hypfs: remove unnecessary nlink setting alloc_inode() initializes i_nlink to 1. Remove unnecessary re-initialization. Signed-off-by: Miklos Szeredi CC: Martin Schwidefsky Signed-off-by: Christoph Hellwig --- arch/s390/hypfs/inode.c | 2 -- 1 file changed, 2 deletions(-) (limited to 'arch/s390') diff --git a/arch/s390/hypfs/inode.c b/arch/s390/hypfs/inode.c index 6fe874fc5f8e..7ae0d0d985a8 100644 --- a/arch/s390/hypfs/inode.c +++ b/arch/s390/hypfs/inode.c @@ -109,8 +109,6 @@ static struct inode *hypfs_make_inode(struct super_block *sb, int mode) ret->i_atime = ret->i_mtime = ret->i_ctime = CURRENT_TIME; if (mode & S_IFDIR) ret->i_nlink = 2; - else - ret->i_nlink = 1; } return ret; } -- cgit v1.2.3 From 6d6b77f163c7eabedbba00ed2abb7d4a570bff76 Mon Sep 17 00:00:00 2001 From: Miklos Szeredi Date: Fri, 28 Oct 2011 14:13:28 +0200 Subject: filesystems: add missing nlink wrappers Replace direct i_nlink updates with the respective updater function (inc_nlink, drop_nlink, clear_nlink, inode_dec_link_count). Signed-off-by: Miklos Szeredi --- arch/s390/hypfs/inode.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'arch/s390') diff --git a/arch/s390/hypfs/inode.c b/arch/s390/hypfs/inode.c index 7ae0d0d985a8..baa82f8dd076 100644 --- a/arch/s390/hypfs/inode.c +++ b/arch/s390/hypfs/inode.c @@ -359,7 +359,7 @@ static struct dentry *hypfs_create_file(struct super_block *sb, } else if (mode & S_IFDIR) { inode->i_op = &simple_dir_inode_operations; inode->i_fop = &simple_dir_operations; - parent->d_inode->i_nlink++; + inc_nlink(parent->d_inode); } else BUG(); inode->i_private = data; -- cgit v1.2.3 From bfe8684869601dacfcb2cd69ef8cfd9045f62170 Mon Sep 17 00:00:00 2001 From: Miklos Szeredi Date: Fri, 28 Oct 2011 14:13:29 +0200 Subject: filesystems: add set_nlink() Replace remaining direct i_nlink updates with a new set_nlink() updater function. Signed-off-by: Miklos Szeredi Tested-by: Toshiyuki Okajima Signed-off-by: Christoph Hellwig --- arch/s390/hypfs/inode.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'arch/s390') diff --git a/arch/s390/hypfs/inode.c b/arch/s390/hypfs/inode.c index baa82f8dd076..481f4f76f664 100644 --- a/arch/s390/hypfs/inode.c +++ b/arch/s390/hypfs/inode.c @@ -108,7 +108,7 @@ static struct inode *hypfs_make_inode(struct super_block *sb, int mode) ret->i_gid = hypfs_info->gid; ret->i_atime = ret->i_mtime = ret->i_ctime = CURRENT_TIME; if (mode & S_IFDIR) - ret->i_nlink = 2; + set_nlink(ret, 2); } return ret; } -- cgit v1.2.3 From 220a2eb228d032acde60e9fd044ca802706ff583 Mon Sep 17 00:00:00 2001 From: Andrea Arcangeli Date: Wed, 2 Nov 2011 13:37:25 -0700 Subject: s390: gup_huge_pmd() support THP tail recounting Up to this point the code assumed old refcounting for hugepages (pre-thp). This updates the code directly to the thp mapcount tail page refcounting. Signed-off-by: Andrea Arcangeli Cc: Peter Zijlstra Cc: Hugh Dickins Cc: Johannes Weiner Cc: Rik van Riel Cc: Mel Gorman Cc: KOSAKI Motohiro Cc: Benjamin Herrenschmidt Cc: David Gibson Cc: Martin Schwidefsky Cc: Heiko Carstens Cc: David Miller Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- arch/s390/mm/gup.c | 24 +++++++++++++++++++++++- 1 file changed, 23 insertions(+), 1 deletion(-) (limited to 'arch/s390') diff --git a/arch/s390/mm/gup.c b/arch/s390/mm/gup.c index 45b405ca2567..668dda964f20 100644 --- a/arch/s390/mm/gup.c +++ b/arch/s390/mm/gup.c @@ -48,11 +48,22 @@ static inline int gup_pte_range(pmd_t *pmdp, pmd_t pmd, unsigned long addr, return 1; } +static inline void get_huge_page_tail(struct page *page) +{ + /* + * __split_huge_page_refcount() cannot run + * from under us. + */ + VM_BUG_ON(page_mapcount(page) < 0); + VM_BUG_ON(atomic_read(&page->_count) != 0); + atomic_inc(&page->_mapcount); +} + static inline int gup_huge_pmd(pmd_t *pmdp, pmd_t pmd, unsigned long addr, unsigned long end, int write, struct page **pages, int *nr) { unsigned long mask, result; - struct page *head, *page; + struct page *head, *page, *tail; int refs; result = write ? 0 : _SEGMENT_ENTRY_RO; @@ -64,6 +75,7 @@ static inline int gup_huge_pmd(pmd_t *pmdp, pmd_t pmd, unsigned long addr, refs = 0; head = pmd_page(pmd); page = head + ((addr & ~PMD_MASK) >> PAGE_SHIFT); + tail = page; do { VM_BUG_ON(compound_head(page) != head); pages[*nr] = page; @@ -81,6 +93,16 @@ static inline int gup_huge_pmd(pmd_t *pmdp, pmd_t pmd, unsigned long addr, *nr -= refs; while (refs--) put_page(head); + } else { + /* + * Any tail page need their mapcount reference taken + * before we return. + */ + while (refs--) { + if (PageTail(tail)) + get_huge_page_tail(tail); + tail++; + } } return 1; -- cgit v1.2.3 From 0693bc9ce2cc4f6a1b9c3c05790fc149a74c0b87 Mon Sep 17 00:00:00 2001 From: Andrea Arcangeli Date: Wed, 2 Nov 2011 13:37:28 -0700 Subject: s390: gup_huge_pmd() return 0 if pte changes s390 didn't return 0 in that case, if it's rolling back the *nr pointer it should also return zero to avoid adding pages to the array at the wrong offset. Signed-off-by: Andrea Arcangeli Cc: Peter Zijlstra Cc: Hugh Dickins Cc: Johannes Weiner Cc: Rik van Riel Cc: Mel Gorman Cc: KOSAKI Motohiro Cc: Benjamin Herrenschmidt Cc: David Gibson Cc: Martin Schwidefsky Cc: Heiko Carstens Cc: David Miller Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- arch/s390/mm/gup.c | 21 +++++++++++---------- 1 file changed, 11 insertions(+), 10 deletions(-) (limited to 'arch/s390') diff --git a/arch/s390/mm/gup.c b/arch/s390/mm/gup.c index 668dda964f20..da33a0281d9d 100644 --- a/arch/s390/mm/gup.c +++ b/arch/s390/mm/gup.c @@ -93,16 +93,17 @@ static inline int gup_huge_pmd(pmd_t *pmdp, pmd_t pmd, unsigned long addr, *nr -= refs; while (refs--) put_page(head); - } else { - /* - * Any tail page need their mapcount reference taken - * before we return. - */ - while (refs--) { - if (PageTail(tail)) - get_huge_page_tail(tail); - tail++; - } + return 0; + } + + /* + * Any tail page need their mapcount reference taken before we + * return. + */ + while (refs--) { + if (PageTail(tail)) + get_huge_page_tail(tail); + tail++; } return 1; -- cgit v1.2.3 From b35a35b556f5e6b7993ad0baf20173e75c09ce8c Mon Sep 17 00:00:00 2001 From: Andrea Arcangeli Date: Wed, 2 Nov 2011 13:37:36 -0700 Subject: thp: share get_huge_page_tail() This avoids duplicating the function in every arch gup_fast. Signed-off-by: Andrea Arcangeli Cc: Peter Zijlstra Cc: Hugh Dickins Cc: Johannes Weiner Cc: Rik van Riel Cc: Mel Gorman Cc: KOSAKI Motohiro Cc: Benjamin Herrenschmidt Cc: David Gibson Cc: Martin Schwidefsky Cc: Heiko Carstens Cc: David Miller Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- arch/s390/mm/gup.c | 11 ----------- 1 file changed, 11 deletions(-) (limited to 'arch/s390') diff --git a/arch/s390/mm/gup.c b/arch/s390/mm/gup.c index da33a0281d9d..65cb06e2af4e 100644 --- a/arch/s390/mm/gup.c +++ b/arch/s390/mm/gup.c @@ -48,17 +48,6 @@ static inline int gup_pte_range(pmd_t *pmdp, pmd_t pmd, unsigned long addr, return 1; } -static inline void get_huge_page_tail(struct page *page) -{ - /* - * __split_huge_page_refcount() cannot run - * from under us. - */ - VM_BUG_ON(page_mapcount(page) < 0); - VM_BUG_ON(atomic_read(&page->_count) != 0); - atomic_inc(&page->_mapcount); -} - static inline int gup_huge_pmd(pmd_t *pmdp, pmd_t pmd, unsigned long addr, unsigned long end, int write, struct page **pages, int *nr) { -- cgit v1.2.3