diff options
Diffstat (limited to 'arch/powerpc')
253 files changed, 5349 insertions, 3433 deletions
diff --git a/arch/powerpc/Kconfig b/arch/powerpc/Kconfig index ec4047e170a0..65fba4c34cd7 100644 --- a/arch/powerpc/Kconfig +++ b/arch/powerpc/Kconfig @@ -12,11 +12,6 @@ config 64BIT bool default y if PPC64 -config WORD_SIZE - int - default 64 if PPC64 - default 32 if !PPC64 - config ARCH_PHYS_ADDR_T_64BIT def_bool PPC64 || PHYS_64BIT @@ -101,7 +96,7 @@ config PPC select VIRT_TO_BUS if !PPC64 select HAVE_IDE select HAVE_IOREMAP_PROT - select HAVE_EFFICIENT_UNALIGNED_ACCESS if !CPU_LITTLE_ENDIAN + select HAVE_EFFICIENT_UNALIGNED_ACCESS if !(CPU_LITTLE_ENDIAN && POWER7_CPU) select HAVE_KPROBES select HAVE_ARCH_KGDB select HAVE_KRETPROBES @@ -113,7 +108,6 @@ config PPC select HAVE_DEBUG_KMEMLEAK select ARCH_HAS_SG_CHAIN select GENERIC_ATOMIC64 if PPC32 - select ARCH_HAS_ATOMIC64_DEC_IF_POSITIVE select HAVE_PERF_EVENTS select HAVE_PERF_REGS select HAVE_PERF_USER_STACK_DUMP @@ -166,6 +160,8 @@ config PPC select HAVE_LIVEPATCH if HAVE_DYNAMIC_FTRACE_WITH_REGS select GENERIC_CPU_AUTOPROBE select HAVE_VIRT_CPU_ACCOUNTING + select HAVE_ARCH_HARDENED_USERCOPY + select HAVE_KERNEL_GZIP config GENERIC_CSUM def_bool CPU_LITTLE_ENDIAN @@ -636,7 +632,7 @@ config FORCE_MAX_ZONEORDER int "Maximum zone order" range 8 9 if PPC64 && PPC_64K_PAGES default "9" if PPC64 && PPC_64K_PAGES - range 9 13 if PPC64 && !PPC_64K_PAGES + range 13 13 if PPC64 && !PPC_64K_PAGES default "13" if PPC64 && !PPC_64K_PAGES range 9 64 if PPC32 && PPC_16K_PAGES default "9" if PPC32 && PPC_16K_PAGES diff --git a/arch/powerpc/Makefile b/arch/powerpc/Makefile index ca254546cd05..50d020ac0f48 100644 --- a/arch/powerpc/Makefile +++ b/arch/powerpc/Makefile @@ -43,57 +43,49 @@ NM := $(NM) --synthetic endif endif -ifeq ($(CONFIG_PPC64),y) -ifeq ($(CONFIG_CPU_LITTLE_ENDIAN),y) -OLDARCH := ppc64le -else -OLDARCH := ppc64 -endif -else -ifeq ($(CONFIG_CPU_LITTLE_ENDIAN),y) -OLDARCH := ppcle -else -OLDARCH := ppc -endif -endif +# BITS is used as extension for files which are available in a 32 bit +# and a 64 bit version to simplify shared Makefiles. +# e.g.: obj-y += foo_$(BITS).o +export BITS -# It seems there are times we use this Makefile without -# including the config file, but this replicates the old behaviour -ifeq ($(CONFIG_WORD_SIZE),) -CONFIG_WORD_SIZE := 32 +ifdef CONFIG_PPC64 + BITS := 64 +else + BITS := 32 endif -UTS_MACHINE := $(OLDARCH) +machine-y = ppc +machine-$(CONFIG_PPC64) += 64 +machine-$(CONFIG_CPU_LITTLE_ENDIAN) += le +UTS_MACHINE := $(subst $(space),,$(machine-y)) ifeq ($(CONFIG_CPU_LITTLE_ENDIAN),y) -override CC += -mlittle-endian -ifneq ($(cc-name),clang) -override CC += -mno-strict-align -endif -override AS += -mlittle-endian override LD += -EL -override CROSS32CC += -mlittle-endian -override CROSS32AS += -mlittle-endian LDEMULATION := lppc GNUTARGET := powerpcle MULTIPLEWORD := -mno-multiple KBUILD_CFLAGS_MODULE += $(call cc-option,-mno-save-toc-indirect) else -ifeq ($(call cc-option-yn,-mbig-endian),y) -override CC += -mbig-endian -override AS += -mbig-endian -endif override LD += -EB LDEMULATION := ppc GNUTARGET := powerpc MULTIPLEWORD := -mmultiple endif +cflags-$(CONFIG_CPU_BIG_ENDIAN) += $(call cc-option,-mbig-endian) +cflags-$(CONFIG_CPU_LITTLE_ENDIAN) += -mlittle-endian +ifneq ($(cc-name),clang) + cflags-$(CONFIG_CPU_LITTLE_ENDIAN) += -mno-strict-align +endif + +aflags-$(CONFIG_CPU_BIG_ENDIAN) += $(call cc-option,-mbig-endian) +aflags-$(CONFIG_CPU_LITTLE_ENDIAN) += -mlittle-endian + ifeq ($(HAS_BIARCH),y) -override AS += -a$(CONFIG_WORD_SIZE) -override LD += -m elf$(CONFIG_WORD_SIZE)$(LDEMULATION) -override CC += -m$(CONFIG_WORD_SIZE) -override AR := GNUTARGET=elf$(CONFIG_WORD_SIZE)-$(GNUTARGET) $(AR) +override AS += -a$(BITS) +override LD += -m elf$(BITS)$(LDEMULATION) +override CC += -m$(BITS) +override AR := GNUTARGET=elf$(BITS)-$(GNUTARGET) $(AR) endif LDFLAGS_vmlinux-y := -Bstatic @@ -180,7 +172,7 @@ KBUILD_CFLAGS += $(call cc-option,-msoft-float) KBUILD_CFLAGS += -pipe -Iarch/$(ARCH) $(CFLAGS-y) CPP = $(CC) -E $(KBUILD_CFLAGS) -CHECKFLAGS += -m$(CONFIG_WORD_SIZE) -D__powerpc__ -D__powerpc$(CONFIG_WORD_SIZE)__ +CHECKFLAGS += -m$(BITS) -D__powerpc__ -D__powerpc$(BITS)__ ifdef CONFIG_CPU_BIG_ENDIAN CHECKFLAGS += -D__BIG_ENDIAN__ else @@ -232,7 +224,10 @@ cpu-as-$(CONFIG_E200) += -Wa,-me200 KBUILD_AFLAGS += $(cpu-as-y) KBUILD_CFLAGS += $(cpu-as-y) -head-y := arch/powerpc/kernel/head_$(CONFIG_WORD_SIZE).o +KBUILD_AFLAGS += $(aflags-y) +KBUILD_CFLAGS += $(cflags-y) + +head-y := arch/powerpc/kernel/head_$(BITS).o head-$(CONFIG_8xx) := arch/powerpc/kernel/head_8xx.o head-$(CONFIG_40x) := arch/powerpc/kernel/head_40x.o head-$(CONFIG_44x) := arch/powerpc/kernel/head_44x.o diff --git a/arch/powerpc/boot/Makefile b/arch/powerpc/boot/Makefile index 1a2a6e8dc40d..eae2dc8bc218 100644 --- a/arch/powerpc/boot/Makefile +++ b/arch/powerpc/boot/Makefile @@ -19,10 +19,15 @@ all: $(obj)/zImage +compress-$(CONFIG_KERNEL_GZIP) := CONFIG_KERNEL_GZIP +compress-$(CONFIG_KERNEL_XZ) := CONFIG_KERNEL_XZ + BOOTCFLAGS := -Wall -Wundef -Wstrict-prototypes -Wno-trigraphs \ -fno-strict-aliasing -Os -msoft-float -pipe \ -fomit-frame-pointer -fno-builtin -fPIC -nostdinc \ - -isystem $(shell $(CROSS32CC) -print-file-name=include) + -isystem $(shell $(CROSS32CC) -print-file-name=include) \ + -D$(compress-y) + ifdef CONFIG_PPC64_BOOT_WRAPPER BOOTCFLAGS += -m64 endif @@ -59,13 +64,30 @@ $(obj)/treeboot-currituck.o: BOOTCFLAGS += -mcpu=405 $(obj)/treeboot-akebono.o: BOOTCFLAGS += -mcpu=405 $(obj)/virtex405-head.o: BOOTAFLAGS += -mcpu=405 +# The pre-boot decompressors pull in a lot of kernel headers and other source +# files. This creates a bit of a dependency headache since we need to copy +# these files into the build dir, fix up any includes and ensure that dependent +# files are copied in the right order. + +# these need to be seperate variables because they are copied out of different +# directories in the kernel tree. Sure you COULd merge them, but it's a +# cure-is-worse-than-disease situation. +zlib-decomp-$(CONFIG_KERNEL_GZIP) := decompress_inflate.c +zlib-$(CONFIG_KERNEL_GZIP) := inffast.c inflate.c inftrees.c +zlibheader-$(CONFIG_KERNEL_GZIP) := inffast.h inffixed.h inflate.h inftrees.h infutil.h +zliblinuxheader-$(CONFIG_KERNEL_GZIP) := zlib.h zconf.h zutil.h -zlib := inffast.c inflate.c inftrees.c -zlibheader := inffast.h inffixed.h inflate.h inftrees.h infutil.h -zliblinuxheader := zlib.h zconf.h zutil.h +$(addprefix $(obj)/, decompress.o): \ + $(addprefix $(obj)/,$(zlib-decomp-y)) -$(addprefix $(obj)/,$(zlib) cuboot-c2k.o gunzip_util.o main.o): \ - $(addprefix $(obj)/,$(zliblinuxheader)) $(addprefix $(obj)/,$(zlibheader)) +$(addprefix $(obj)/, $(zlib-decomp-y)): \ + $(addprefix $(obj)/,$(zliblinuxheader-y)) \ + $(addprefix $(obj)/,$(zlibheader-y)) \ + $(addprefix $(obj)/,$(zlib-y)) + +$(addprefix $(obj)/,$(zlib-y)): \ + $(addprefix $(obj)/,$(zliblinuxheader-y)) \ + $(addprefix $(obj)/,$(zlibheader-y)) libfdt := fdt.c fdt_ro.c fdt_wip.c fdt_sw.c fdt_rw.c fdt_strerror.c libfdtheader := fdt.h libfdt.h libfdt_internal.h @@ -73,10 +95,10 @@ libfdtheader := fdt.h libfdt.h libfdt_internal.h $(addprefix $(obj)/,$(libfdt) libfdt-wrapper.o simpleboot.o epapr.o opal.o): \ $(addprefix $(obj)/,$(libfdtheader)) -src-wlib-y := string.S crt0.S crtsavres.S stdio.c main.c \ +src-wlib-y := string.S crt0.S crtsavres.S stdio.c decompress.c main.c \ $(libfdt) libfdt-wrapper.c \ ns16550.c serial.c simple_alloc.c div64.S util.S \ - gunzip_util.c elf_util.c $(zlib) devtree.c stdlib.c \ + elf_util.c $(zlib-y) devtree.c stdlib.c \ oflib.c ofconsole.c cuboot.c mpsc.c cpm-serial.c \ uartlite.c mpc52xx-psc.c opal.c opal-calls.S src-wlib-$(CONFIG_40x) += 4xx.c planetcore.c @@ -125,23 +147,20 @@ obj-wlib := $(addsuffix .o, $(basename $(addprefix $(obj)/, $(src-wlib)))) obj-plat := $(addsuffix .o, $(basename $(addprefix $(obj)/, $(src-plat)))) obj-plat: $(libfdt) -quiet_cmd_copy_zlib = COPY $@ - cmd_copy_zlib = sed "s@__used@@;s@<linux/\([^>]*\).*@\"\1\"@" $< > $@ +quiet_cmd_copy_kern_src = COPY $@ + cmd_copy_kern_src = sed -f $(srctree)/arch/powerpc/boot/fixup-headers.sed $< > $@ -quiet_cmd_copy_zlibheader = COPY $@ - cmd_copy_zlibheader = sed "s@<linux/\([^>]*\).*@\"\1\"@" $< > $@ -# stddef.h for NULL -quiet_cmd_copy_zliblinuxheader = COPY $@ - cmd_copy_zliblinuxheader = sed "s@<linux/string.h>@\"string.h\"@;s@<linux/kernel.h>@<stddef.h>@;s@<linux/\([^>]*\).*@\"\1\"@" $< > $@ +$(addprefix $(obj)/,$(zlib-y)): $(obj)/%: $(srctree)/lib/zlib_inflate/% + $(call cmd,copy_kern_src) -$(addprefix $(obj)/,$(zlib)): $(obj)/%: $(srctree)/lib/zlib_inflate/% - $(call cmd,copy_zlib) +$(addprefix $(obj)/,$(zlibheader-y)): $(obj)/%: $(srctree)/lib/zlib_inflate/% + $(call cmd,copy_kern_src) -$(addprefix $(obj)/,$(zlibheader)): $(obj)/%: $(srctree)/lib/zlib_inflate/% - $(call cmd,copy_zlibheader) +$(addprefix $(obj)/,$(zliblinuxheader-y)): $(obj)/%: $(srctree)/include/linux/% + $(call cmd,copy_kern_src) -$(addprefix $(obj)/,$(zliblinuxheader)): $(obj)/%: $(srctree)/include/linux/% - $(call cmd,copy_zliblinuxheader) +$(addprefix $(obj)/,$(zlib-decomp-y)): $(obj)/%: $(srctree)/lib/% + $(call cmd,copy_kern_src) quiet_cmd_copy_libfdt = COPY $@ cmd_copy_libfdt = cp $< $@ @@ -150,17 +169,17 @@ $(addprefix $(obj)/,$(libfdt) $(libfdtheader)): $(obj)/%: $(srctree)/scripts/dtc $(call cmd,copy_libfdt) $(obj)/empty.c: - @touch $@ + $(Q)touch $@ $(obj)/zImage.lds: $(obj)/%: $(srctree)/$(src)/%.S $(CROSS32CC) $(cpp_flags) -E -Wp,-MD,$(depfile) -P -Upowerpc \ -D__ASSEMBLY__ -DLINKER_SCRIPT -o $@ $< $(obj)/zImage.coff.lds $(obj)/zImage.ps3.lds : $(obj)/%: $(srctree)/$(src)/%.S - @cp $< $@ + $(Q)cp $< $@ -clean-files := $(zlib) $(zlibheader) $(zliblinuxheader) \ - $(libfdt) $(libfdtheader) \ +clean-files := $(zlib-) $(zlibheader-) $(zliblinuxheader-) \ + $(zlib-decomp-) $(libfdt) $(libfdtheader) \ empty.c zImage.coff.lds zImage.ps3.lds zImage.lds quiet_cmd_bootcc = BOOTCC $@ @@ -207,10 +226,14 @@ CROSSWRAP := -C "$(CROSS_COMPILE)" endif endif +compressor-$(CONFIG_KERNEL_GZIP) := gz +compressor-$(CONFIG_KERNEL_XZ) := xz + # args (to if_changed): 1 = (this rule), 2 = platform, 3 = dts 4=dtb 5=initrd quiet_cmd_wrap = WRAP $@ - cmd_wrap =$(CONFIG_SHELL) $(wrapper) -c -o $@ -p $2 $(CROSSWRAP) \ - $(if $3, -s $3)$(if $4, -d $4)$(if $5, -i $5) vmlinux + cmd_wrap =$(CONFIG_SHELL) $(wrapper) -Z $(compressor-y) -c -o $@ -p $2 \ + $(CROSSWRAP) $(if $3, -s $3)$(if $4, -d $4)$(if $5, -i $5) \ + vmlinux image-$(CONFIG_PPC_PSERIES) += zImage.pseries image-$(CONFIG_PPC_POWERNV) += zImage.pseries @@ -391,9 +414,9 @@ image-y := vmlinux.strip endif $(obj)/zImage: $(addprefix $(obj)/, $(image-y)) - @rm -f $@; ln $< $@ + $(Q)rm -f $@; ln $< $@ $(obj)/zImage.initrd: $(addprefix $(obj)/, $(initrd-y)) - @rm -f $@; ln $< $@ + $(Q)rm -f $@; ln $< $@ # Only install the vmlinux install: $(CONFIGURE) $(addprefix $(obj)/, $(image-y)) @@ -410,8 +433,9 @@ clean-files += $(image-) $(initrd-) cuImage.* dtbImage.* treeImage.* \ zImage.maple simpleImage.* otheros.bld *.dtb # clean up files cached by wrapper -clean-kernel := vmlinux.strip vmlinux.bin -clean-kernel += $(addsuffix .gz,$(clean-kernel)) +clean-kernel-base := vmlinux.strip vmlinux.bin +clean-kernel := $(addsuffix .gz,$(clean-kernel-base)) +clean-kernel += $(addsuffix .xz,$(clean-kernel-base)) # If not absolute clean-files are relative to $(obj). clean-files += $(addprefix $(objtree)/, $(clean-kernel)) diff --git a/arch/powerpc/boot/cuboot-c2k.c b/arch/powerpc/boot/cuboot-c2k.c index e43594950ba3..9309c51f1d65 100644 --- a/arch/powerpc/boot/cuboot-c2k.c +++ b/arch/powerpc/boot/cuboot-c2k.c @@ -18,7 +18,6 @@ #include "io.h" #include "ops.h" #include "elf.h" -#include "gunzip_util.h" #include "mv64x60.h" #include "cuboot.h" #include "ppcboot.h" diff --git a/arch/powerpc/boot/decompress.c b/arch/powerpc/boot/decompress.c new file mode 100644 index 000000000000..3aff4423ad01 --- /dev/null +++ b/arch/powerpc/boot/decompress.c @@ -0,0 +1,148 @@ +/* + * Wrapper around the kernel's pre-boot decompression library. + * + * Copyright (C) IBM Corporation 2016. + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation; either version + * 2 of the License, or (at your option) any later version. + */ + +#include "elf.h" +#include "page.h" +#include "string.h" +#include "stdio.h" +#include "ops.h" +#include "reg.h" +#include "types.h" + +/* + * The decompressor_*.c files play #ifdef games so they can be used in both + * pre-boot and regular kernel code. We need these definitions to make the + * includes work. + */ + +#define STATIC static +#define INIT +#define __always_inline inline + +/* + * The build process will copy the required zlib source files and headers + * out of lib/ and "fix" the includes so they do not pull in other kernel + * headers. + */ + +#ifdef CONFIG_KERNEL_GZIP +# include "decompress_inflate.c" +#endif + +#ifdef CONFIG_KERNEL_XZ +# include "xz_config.h" +# include "../../../lib/decompress_unxz.c" +#endif + +/* globals for tracking the state of the decompression */ +static unsigned long decompressed_bytes; +static unsigned long limit; +static unsigned long skip; +static char *output_buffer; + +/* + * flush() is called by __decompress() when the decompressor's scratch buffer is + * full. + */ +static long flush(void *v, unsigned long buffer_size) +{ + unsigned long end = decompressed_bytes + buffer_size; + unsigned long size = buffer_size; + unsigned long offset = 0; + char *in = v; + char *out; + + /* + * if we hit our decompression limit, we need to fake an error to abort + * the in-progress decompression. + */ + if (decompressed_bytes >= limit) + return -1; + + /* skip this entire block */ + if (end <= skip) { + decompressed_bytes += buffer_size; + return buffer_size; + } + + /* skip some data at the start, but keep the rest of the block */ + if (decompressed_bytes < skip && end > skip) { + offset = skip - decompressed_bytes; + + in += offset; + size -= offset; + decompressed_bytes += offset; + } + + out = &output_buffer[decompressed_bytes - skip]; + size = min(decompressed_bytes + size, limit) - decompressed_bytes; + + memcpy(out, in, size); + decompressed_bytes += size; + + return buffer_size; +} + +static void print_err(char *s) +{ + /* suppress the "error" when we terminate the decompressor */ + if (decompressed_bytes >= limit) + return; + + printf("Decompression error: '%s'\n\r", s); +} + +/** + * partial_decompress - decompresses part or all of a compressed buffer + * @inbuf: input buffer + * @input_size: length of the input buffer + * @outbuf: input buffer + * @output_size: length of the input buffer + * @skip number of output bytes to ignore + * + * This function takes compressed data from inbuf, decompresses and write it to + * outbuf. Once output_size bytes are written to the output buffer, or the + * stream is exhausted the function will return the number of bytes that were + * decompressed. Otherwise it will return whatever error code the decompressor + * reported (NB: This is specific to each decompressor type). + * + * The skip functionality is mainly there so the program and discover + * the size of the compressed image so that it can ask firmware (if present) + * for an appropriately sized buffer. + */ +long partial_decompress(void *inbuf, unsigned long input_size, + void *outbuf, unsigned long output_size, unsigned long _skip) +{ + int ret; + + /* + * The skipped bytes needs to be included in the size of data we want + * to decompress. + */ + output_size += _skip; + + decompressed_bytes = 0; + output_buffer = outbuf; + limit = output_size; + skip = _skip; + + ret = __decompress(inbuf, input_size, NULL, flush, outbuf, + output_size, NULL, print_err); + + /* + * If decompression was aborted due to an actual error rather than + * a fake error that we used to abort, then we should report it. + */ + if (decompressed_bytes < limit) + return ret; + + return decompressed_bytes - skip; +} diff --git a/arch/powerpc/boot/fixup-headers.sed b/arch/powerpc/boot/fixup-headers.sed new file mode 100644 index 000000000000..96362428eb37 --- /dev/null +++ b/arch/powerpc/boot/fixup-headers.sed @@ -0,0 +1,12 @@ +# Copyright 2016 IBM Corporation. +# +# This program is free software; you can redistribute it and/or modify it +# under the terms of the GNU General Public License version 2 or later as +# published by the Free Software Foundation. + +s@#include <linux/decompress/mm\.h>@@; +s@\"zlib_inflate/\([^\"]*\).*@"\1"@; +s@<linux/kernel.h>@<stddef.h>@; + +s@__used@@; +s@<linux/\([^>]*\).*@"\1"@; diff --git a/arch/powerpc/boot/gunzip_util.c b/arch/powerpc/boot/gunzip_util.c deleted file mode 100644 index 9dc52501de83..000000000000 --- a/arch/powerpc/boot/gunzip_util.c +++ /dev/null @@ -1,204 +0,0 @@ -/* - * Copyright 2007 David Gibson, IBM Corporation. - * Based on earlier work, Copyright (C) Paul Mackerras 1997. - * - * This program is free software; you can redistribute it and/or - * modify it under the terms of the GNU General Public License - * as published by the Free Software Foundation; either version - * 2 of the License, or (at your option) any later version. - */ - -#include <stddef.h> -#include "string.h" -#include "stdio.h" -#include "ops.h" -#include "gunzip_util.h" - -#define HEAD_CRC 2 -#define EXTRA_FIELD 4 -#define ORIG_NAME 8 -#define COMMENT 0x10 -#define RESERVED 0xe0 - -/** - * gunzip_start - prepare to decompress gzip data - * @state: decompressor state structure to be initialized - * @src: buffer containing gzip compressed or uncompressed data - * @srclen: size in bytes of the buffer at src - * - * If the buffer at @src contains a gzip header, this function - * initializes zlib to decompress the data, storing the decompression - * state in @state. The other functions in this file can then be used - * to decompress data from the gzipped stream. - * - * If the buffer at @src does not contain a gzip header, it is assumed - * to contain uncompressed data. The buffer information is recorded - * in @state and the other functions in this file will simply copy - * data from the uncompressed data stream at @src. - * - * Any errors, such as bad compressed data, cause an error to be - * printed an the platform's exit() function to be called. - */ -void gunzip_start(struct gunzip_state *state, void *src, int srclen) -{ - char *hdr = src; - int hdrlen = 0; - - memset(state, 0, sizeof(*state)); - - /* Check for gzip magic number */ - if ((hdr[0] == 0x1f) && (hdr[1] == 0x8b)) { - /* gzip data, initialize zlib parameters */ - int r, flags; - - state->s.workspace = state->scratch; - if (zlib_inflate_workspacesize() > sizeof(state->scratch)) - fatal("insufficient scratch space for gunzip\n\r"); - - /* skip header */ - hdrlen = 10; - flags = hdr[3]; - if (hdr[2] != Z_DEFLATED || (flags & RESERVED) != 0) - fatal("bad gzipped data\n\r"); - if ((flags & EXTRA_FIELD) != 0) - hdrlen = 12 + hdr[10] + (hdr[11] << 8); - if ((flags & ORIG_NAME) != 0) - while (hdr[hdrlen++] != 0) - ; - if ((flags & COMMENT) != 0) - while (hdr[hdrlen++] != 0) - ; - if ((flags & HEAD_CRC) != 0) - hdrlen += 2; - if (hdrlen >= srclen) - fatal("gunzip_start: ran out of data in header\n\r"); - - r = zlib_inflateInit2(&state->s, -MAX_WBITS); - if (r != Z_OK) - fatal("inflateInit2 returned %d\n\r", r); - } - - state->s.total_in = hdrlen; - state->s.next_in = src + hdrlen; - state->s.avail_in = srclen - hdrlen; -} - -/** - * gunzip_partial - extract bytes from a gzip data stream - * @state: gzip state structure previously initialized by gunzip_start() - * @dst: buffer to store extracted data - * @dstlen: maximum number of bytes to extract - * - * This function extracts at most @dstlen bytes from the data stream - * previously associated with @state by gunzip_start(), decompressing - * if necessary. Exactly @dstlen bytes are extracted unless the data - * stream doesn't contain enough bytes, in which case the entire - * remainder of the stream is decompressed. - * - * Returns the actual number of bytes extracted. If any errors occur, - * such as a corrupted compressed stream, an error is printed an the - * platform's exit() function is called. - */ -int gunzip_partial(struct gunzip_state *state, void *dst, int dstlen) -{ - int len; - - if (state->s.workspace) { - /* gunzipping */ - int r; - - state->s.next_out = dst; - state->s.avail_out = dstlen; - r = zlib_inflate(&state->s, Z_FULL_FLUSH); - if (r != Z_OK && r != Z_STREAM_END) - fatal("inflate returned %d msg: %s\n\r", r, state->s.msg); - len = state->s.next_out - (Byte *)dst; - } else { - /* uncompressed image */ - len = min(state->s.avail_in, (uLong)dstlen); - memcpy(dst, state->s.next_in, len); - state->s.next_in += len; - state->s.avail_in -= len; - } - return len; -} - -/** - * gunzip_exactly - extract a fixed number of bytes from a gzip data stream - * @state: gzip state structure previously initialized by gunzip_start() - * @dst: buffer to store extracted data - * @dstlen: number of bytes to extract - * - * This function extracts exactly @dstlen bytes from the data stream - * previously associated with @state by gunzip_start(), decompressing - * if necessary. - * - * If there are less @dstlen bytes available in the data stream, or if - * any other errors occur, such as a corrupted compressed stream, an - * error is printed an the platform's exit() function is called. - */ -void gunzip_exactly(struct gunzip_state *state, void *dst, int dstlen) -{ - int len; - - len = gunzip_partial(state, dst, dstlen); - if (len < dstlen) - fatal("\n\rgunzip_exactly: ran out of data!" - " Wanted %d, got %d.\n\r", dstlen, len); -} - -/** - * gunzip_discard - discard bytes from a gzip data stream - * @state: gzip state structure previously initialized by gunzip_start() - * @len: number of bytes to discard - * - * This function extracts, then discards exactly @len bytes from the - * data stream previously associated with @state by gunzip_start(). - * Subsequent gunzip_partial(), gunzip_exactly() or gunzip_finish() - * calls will extract the data following the discarded bytes in the - * data stream. - * - * If there are less @len bytes available in the data stream, or if - * any other errors occur, such as a corrupted compressed stream, an - * error is printed an the platform's exit() function is called. - */ -void gunzip_discard(struct gunzip_state *state, int len) -{ - static char discard_buf[128]; - - while (len > sizeof(discard_buf)) { - gunzip_exactly(state, discard_buf, sizeof(discard_buf)); - len -= sizeof(discard_buf); - } - - if (len > 0) - gunzip_exactly(state, discard_buf, len); -} - -/** - * gunzip_finish - extract all remaining bytes from a gzip data stream - * @state: gzip state structure previously initialized by gunzip_start() - * @dst: buffer to store extracted data - * @dstlen: maximum number of bytes to extract - * - * This function extracts all remaining data, or at most @dstlen - * bytes, from the stream previously associated with @state by - * gunzip_start(). zlib is then shut down, so it is an error to use - * any of the functions in this file on @state until it is - * re-initialized with another call to gunzip_start(). - * - * If any errors occur, such as a corrupted compressed stream, an - * error is printed an the platform's exit() function is called. - */ -int gunzip_finish(struct gunzip_state *state, void *dst, int dstlen) -{ - int len; - - len = gunzip_partial(state, dst, dstlen); - - if (state->s.workspace) { - zlib_inflateEnd(&state->s); - } - - return len; -} diff --git a/arch/powerpc/boot/gunzip_util.h b/arch/powerpc/boot/gunzip_util.h deleted file mode 100644 index b3dfa6e87b3a..000000000000 --- a/arch/powerpc/boot/gunzip_util.h +++ /dev/null @@ -1,45 +0,0 @@ -/* - * Decompression convenience functions - * - * Copyright 2007 David Gibson, IBM Corporation. - * - * This file is licensed under the terms of the GNU General Public - * License version 2. This program is licensed "as is" without any - * warranty of any kind, whether express or implied. - */ -#ifndef _PPC_BOOT_GUNZIP_UTIL_H_ -#define _PPC_BOOT_GUNZIP_UTIL_H_ - -#include "zlib.h" - -/* - * These functions are designed to make life easy for decompressing - * kernel images, initrd images or any other gzip compressed image, - * particularly if its useful to decompress part of the image (e.g. to - * examine headers) before decompressing the remainder. - * - * To use: - * - declare a gunzip_state structure - * - use gunzip_start() to initialize the state, associating it - * with a stream of compressed data - * - use gunzip_partial(), gunzip_exactly() and gunzip_discard() - * in any combination to extract pieces of data from the stream - * - Finally use gunzip_finish() to extract the tail of the - * compressed stream and wind up zlib - */ - -/* scratch space for gunzip; 46912 is from zlib_inflate_workspacesize() */ -#define GUNZIP_SCRATCH_SIZE 46912 - -struct gunzip_state { - z_stream s; - char scratch[46912]; -}; - -void gunzip_start(struct gunzip_state *state, void *src, int srclen); -int gunzip_partial(struct gunzip_state *state, void *dst, int dstlen); -void gunzip_exactly(struct gunzip_state *state, void *dst, int len); -void gunzip_discard(struct gunzip_state *state, int len); -int gunzip_finish(struct gunzip_state *state, void *dst, int len); - -#endif /* _PPC_BOOT_GUNZIP_UTIL_H_ */ diff --git a/arch/powerpc/boot/main.c b/arch/powerpc/boot/main.c index d80161b633f4..f7a184b6c35b 100644 --- a/arch/powerpc/boot/main.c +++ b/arch/powerpc/boot/main.c @@ -15,11 +15,8 @@ #include "string.h" #include "stdio.h" #include "ops.h" -#include "gunzip_util.h" #include "reg.h" -static struct gunzip_state gzstate; - struct addr_range { void *addr; unsigned long size; @@ -30,15 +27,14 @@ struct addr_range { static struct addr_range prep_kernel(void) { char elfheader[256]; - void *vmlinuz_addr = _vmlinux_start; + unsigned char *vmlinuz_addr = (unsigned char *)_vmlinux_start; unsigned long vmlinuz_size = _vmlinux_end - _vmlinux_start; void *addr = 0; struct elf_info ei; - int len; + long len; - /* gunzip the ELF header of the kernel */ - gunzip_start(&gzstate, vmlinuz_addr, vmlinuz_size); - gunzip_exactly(&gzstate, elfheader, sizeof(elfheader)); + partial_decompress(vmlinuz_addr, vmlinuz_size, + elfheader, sizeof(elfheader), 0); if (!parse_elf64(elfheader, &ei) && !parse_elf32(elfheader, &ei)) fatal("Error: not a valid PPC32 or PPC64 ELF file!\n\r"); @@ -51,7 +47,7 @@ static struct addr_range prep_kernel(void) * the kernel bss must be claimed (it will be zero'd by the * kernel itself) */ - printf("Allocating 0x%lx bytes for kernel ...\n\r", ei.memsize); + printf("Allocating 0x%lx bytes for kernel...\n\r", ei.memsize); if (platform_ops.vmlinux_alloc) { addr = platform_ops.vmlinux_alloc(ei.memsize); @@ -71,16 +67,21 @@ static struct addr_range prep_kernel(void) "device tree\n\r"); } - /* Finally, gunzip the kernel */ - printf("gunzipping (0x%p <- 0x%p:0x%p)...", addr, + /* Finally, decompress the kernel */ + printf("Decompressing (0x%p <- 0x%p:0x%p)...\n\r", addr, vmlinuz_addr, vmlinuz_addr+vmlinuz_size); - /* discard up to the actual load data */ - gunzip_discard(&gzstate, ei.elfoffset - sizeof(elfheader)); - len = gunzip_finish(&gzstate, addr, ei.loadsize); + + len = partial_decompress(vmlinuz_addr, vmlinuz_size, + addr, ei.loadsize, ei.elfoffset); + + if (len < 0) + fatal("Decompression failed with error code %ld\n\r", len); + if (len != ei.loadsize) - fatal("ran out of data! only got 0x%x of 0x%lx bytes.\n\r", - len, ei.loadsize); - printf("done 0x%x bytes\n\r", len); + fatal("Decompression error: got 0x%lx bytes, expected 0x%lx.\n\r", + len, ei.loadsize); + + printf("Done! Decompressed 0x%lx bytes\n\r", len); flush_cache(addr, ei.loadsize); diff --git a/arch/powerpc/boot/ops.h b/arch/powerpc/boot/ops.h index e19b64ef977a..309d1b127e96 100644 --- a/arch/powerpc/boot/ops.h +++ b/arch/powerpc/boot/ops.h @@ -260,4 +260,7 @@ int __ilog2_u32(u32 n) return 31 - bit; } +long partial_decompress(void *inbuf, unsigned long input_size, void *outbuf, + unsigned long output_size, unsigned long skip); + #endif /* _PPC_BOOT_OPS_H_ */ diff --git a/arch/powerpc/boot/stdbool.h b/arch/powerpc/boot/stdbool.h new file mode 100644 index 000000000000..f818efb08891 --- /dev/null +++ b/arch/powerpc/boot/stdbool.h @@ -0,0 +1,14 @@ +/* + * Copyright (C) IBM Corporation 2016. + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation; either version + * 2 of the License, or (at your option) any later version. + * + * This file is only necessary because some of the pre-boot decompressors + * expect stdbool.h to be available. + * + */ + +#include "types.h" diff --git a/arch/powerpc/boot/stdint.h b/arch/powerpc/boot/stdint.h new file mode 100644 index 000000000000..c1c853be7490 --- /dev/null +++ b/arch/powerpc/boot/stdint.h @@ -0,0 +1,13 @@ +/* + * Copyright (C) IBM Corporation 2016. + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation; either version + * 2 of the License, or (at your option) any later version. + * + * This file is only necessary because some of the pre-boot decompressors + * expect stdint.h to be available. + */ + +#include "types.h" diff --git a/arch/powerpc/boot/types.h b/arch/powerpc/boot/types.h index 85565a89bcc2..af6b66b842c4 100644 --- a/arch/powerpc/boot/types.h +++ b/arch/powerpc/boot/types.h @@ -1,6 +1,8 @@ #ifndef _TYPES_H_ #define _TYPES_H_ +#include <stdbool.h> + #define ARRAY_SIZE(x) (sizeof(x) / sizeof((x)[0])) typedef unsigned char u8; @@ -34,4 +36,16 @@ typedef s64 int64_t; (void) (&_x == &_y); \ _x > _y ? _x : _y; }) +#define min_t(type, a, b) min(((type) a), ((type) b)) +#define max_t(type, a, b) max(((type) a), ((type) b)) + +typedef int bool; + +#ifndef true +#define true 1 +#endif + +#ifndef false +#define false 0 +#endif #endif /* _TYPES_H_ */ diff --git a/arch/powerpc/boot/wrapper b/arch/powerpc/boot/wrapper index 6681ec3625c9..404b3aabdb4d 100755 --- a/arch/powerpc/boot/wrapper +++ b/arch/powerpc/boot/wrapper @@ -20,6 +20,8 @@ # -D dir specify directory containing data files used by script # (default ./arch/powerpc/boot) # -W dir specify working directory for temporary files (default .) +# -z use gzip (legacy) +# -Z zsuffix compression to use (gz, xz or none) # Stop execution if any command fails set -e @@ -38,7 +40,7 @@ dtb= dts= cacheit= binary= -gzip=.gz +compression=.gz pie= format= @@ -59,7 +61,8 @@ tmpdir=. usage() { echo 'Usage: wrapper [-o output] [-p platform] [-i initrd]' >&2 echo ' [-d devtree] [-s tree.dts] [-c] [-C cross-prefix]' >&2 - echo ' [-D datadir] [-W workingdir] [--no-gzip] [vmlinux]' >&2 + echo ' [-D datadir] [-W workingdir] [-Z (gz|xz|none)]' >&2 + echo ' [--no-compression] [vmlinux]' >&2 exit 1 } @@ -126,8 +129,24 @@ while [ "$#" -gt 0 ]; do [ "$#" -gt 0 ] || usage tmpdir="$1" ;; + -z) + compression=.gz + ;; + -Z) + shift + [ "$#" -gt 0 ] || usage + [ "$1" != "gz" -o "$1" != "xz" -o "$1" != "none" ] || usage + + compression=".$1" + + if [ $compression = ".none" ]; then + compression= + fi + ;; --no-gzip) - gzip= + # a "feature" of the the wrapper script is that it can be used outside + # the kernel tree. So keeping this around for backwards compatibility. + compression= ;; -?) usage @@ -140,6 +159,7 @@ while [ "$#" -gt 0 ]; do shift done + if [ -n "$dts" ]; then if [ ! -r "$dts" -a -r "$object/dts/$dts" ]; then dts="$object/dts/$dts" @@ -212,7 +232,7 @@ miboot|uboot*) ;; cuboot*) binary=y - gzip= + compression= case "$platform" in *-mpc866ads|*-mpc885ads|*-adder875*|*-ep88xc) platformo=$object/cuboot-8xx.o @@ -243,7 +263,7 @@ cuboot*) ps3) platformo="$object/ps3-head.o $object/ps3-hvcall.o $object/ps3.o" lds=$object/zImage.ps3.lds - gzip= + compression= ext=bin objflags="-O binary --set-section-flags=.bss=contents,alloc,load,data" ksection=.kernel:vmlinux.bin @@ -310,27 +330,37 @@ mvme7100) esac vmz="$tmpdir/`basename \"$kernel\"`.$ext" -if [ -z "$cacheit" -o ! -f "$vmz$gzip" -o "$vmz$gzip" -ot "$kernel" ]; then - ${CROSS}objcopy $objflags "$kernel" "$vmz.$$" - strip_size=$(stat -c %s $vmz.$$) +# Calculate the vmlinux.strip size +${CROSS}objcopy $objflags "$kernel" "$vmz.$$" +strip_size=$(stat -c %s $vmz.$$) - if [ -n "$gzip" ]; then +if [ -z "$cacheit" -o ! -f "$vmz$compression" -o "$vmz$compression" -ot "$kernel" ]; then + # recompress the image if we need to + case $compression in + .xz) + xz --check=crc32 -f -6 "$vmz.$$" + ;; + .gz) gzip -n -f -9 "$vmz.$$" - fi + ;; + *) + # drop the compression suffix so the stripped vmlinux is used + compression= + ;; + esac if [ -n "$cacheit" ]; then - mv -f "$vmz.$$$gzip" "$vmz$gzip" + mv -f "$vmz.$$$compression" "$vmz$compression" else vmz="$vmz.$$" fi else - # Calculate the vmlinux.strip size - ${CROSS}objcopy $objflags "$kernel" "$vmz.$$" - strip_size=$(stat -c %s $vmz.$$) rm -f $vmz.$$ fi +vmz="$vmz$compression" + if [ "$make_space" = "y" ]; then # Round the size to next higher MB limit round_size=$(((strip_size + 0xfffff) & 0xfff00000)) @@ -346,8 +376,6 @@ if [ "$make_space" = "y" ]; then fi fi -vmz="$vmz$gzip" - # Extract kernel version information, some platforms want to include # it in the image header version=`${CROSS}strings "$kernel" | grep '^Linux version [-0-9.]' | \ @@ -417,6 +445,7 @@ if [ "$platform" != "miboot" ]; then if [ -n "$link_address" ] ; then text_start="-Ttext $link_address" fi +#link everything ${CROSS}ld -m $format -T $lds $text_start $pie -o "$ofile" \ $platformo $tmp $object/wrapper.a rm $tmp diff --git a/arch/powerpc/boot/xz_config.h b/arch/powerpc/boot/xz_config.h new file mode 100644 index 000000000000..5c6afdbca642 --- /dev/null +++ b/arch/powerpc/boot/xz_config.h @@ -0,0 +1,39 @@ +#ifndef __XZ_CONFIG_H__ +#define __XZ_CONFIG_H__ + +/* + * most of this is copied from lib/xz/xz_private.h, we can't use their defines + * since the boot wrapper is not built in the same environment as the rest of + * the kernel. + */ + +#include "types.h" +#include "swab.h" + +static inline uint32_t swab32p(void *p) +{ + uint32_t *q = p; + + return swab32(*q); +} + +#ifdef __LITTLE_ENDIAN__ +#define get_le32(p) (*((uint32_t *) (p))) +#else +#define get_le32(p) swab32p(p) +#endif + +#define memeq(a, b, size) (memcmp(a, b, size) == 0) +#define memzero(buf, size) memset(buf, 0, size) + +/* prevent the inclusion of the xz-preboot MM headers */ +#define DECOMPR_MM_H +#define memmove memmove +#define XZ_EXTERN static + +/* xz.h needs to be included directly since we need enum xz_mode */ +#include "../../../include/linux/xz.h" + +#undef XZ_EXTERN + +#endif diff --git a/arch/powerpc/configs/powernv_defconfig b/arch/powerpc/configs/powernv_defconfig index dce352e9153b..d98b6eb3254f 100644 --- a/arch/powerpc/configs/powernv_defconfig +++ b/arch/powerpc/configs/powernv_defconfig @@ -15,6 +15,8 @@ CONFIG_TASK_XACCT=y CONFIG_TASK_IO_ACCOUNTING=y CONFIG_IKCONFIG=y CONFIG_IKCONFIG_PROC=y +CONFIG_LOG_BUF_SHIFT=18 +CONFIG_LOG_CPU_MAX_BUF_SHIFT=13 CONFIG_NUMA_BALANCING=y CONFIG_CGROUPS=y CONFIG_MEMCG=y @@ -95,7 +97,7 @@ CONFIG_BLK_DEV_IDECD=y CONFIG_BLK_DEV_GENERIC=y CONFIG_BLK_DEV_AMD74XX=y CONFIG_BLK_DEV_SD=y -CONFIG_CHR_DEV_ST=y +CONFIG_CHR_DEV_ST=m CONFIG_BLK_DEV_SR=y CONFIG_BLK_DEV_SR_VENDOR=y CONFIG_CHR_DEV_SG=y @@ -107,7 +109,7 @@ CONFIG_SCSI_CXGB4_ISCSI=m CONFIG_SCSI_BNX2_ISCSI=m CONFIG_BE2ISCSI=m CONFIG_SCSI_MPT2SAS=m -CONFIG_SCSI_SYM53C8XX_2=y +CONFIG_SCSI_SYM53C8XX_2=m CONFIG_SCSI_SYM53C8XX_DMA_ADDRESSING_MODE=0 CONFIG_SCSI_IPR=y CONFIG_SCSI_QLA_FC=m @@ -149,10 +151,10 @@ CONFIG_TUN=m CONFIG_VETH=m CONFIG_VIRTIO_NET=m CONFIG_VHOST_NET=m -CONFIG_VORTEX=y +CONFIG_VORTEX=m CONFIG_ACENIC=m CONFIG_ACENIC_OMIT_TIGON_I=y -CONFIG_PCNET32=y +CONFIG_PCNET32=m CONFIG_TIGON3=y CONFIG_BNX2X=m CONFIG_CHELSIO_T1=m @@ -163,6 +165,7 @@ CONFIG_E1000=y CONFIG_E1000E=y CONFIG_IXGB=m CONFIG_IXGBE=m +CONFIG_I40E=m CONFIG_MLX4_EN=m CONFIG_MYRI10GE=m CONFIG_QLGE=m @@ -238,7 +241,7 @@ CONFIG_EXT2_FS_SECURITY=y CONFIG_EXT4_FS=y CONFIG_EXT4_FS_POSIX_ACL=y CONFIG_EXT4_FS_SECURITY=y -CONFIG_REISERFS_FS=y +CONFIG_REISERFS_FS=m CONFIG_REISERFS_FS_XATTR=y CONFIG_REISERFS_FS_POSIX_ACL=y CONFIG_REISERFS_FS_SECURITY=y @@ -253,10 +256,10 @@ CONFIG_NILFS2_FS=m CONFIG_AUTOFS4_FS=m CONFIG_FUSE_FS=m CONFIG_OVERLAY_FS=m -CONFIG_ISO9660_FS=y +CONFIG_ISO9660_FS=m CONFIG_UDF_FS=m CONFIG_MSDOS_FS=y -CONFIG_VFAT_FS=y +CONFIG_VFAT_FS=m CONFIG_PROC_KCORE=y CONFIG_TMPFS=y CONFIG_TMPFS_POSIX_ACL=y @@ -310,6 +313,8 @@ CONFIG_CRYPTO_TEA=m CONFIG_CRYPTO_TWOFISH=m CONFIG_CRYPTO_LZO=m CONFIG_CRYPTO_DEV_NX=y +CONFIG_CRYPTO_DEV_VMX=y +CONFIG_CRYPTO_DEV_VMX_ENCRYPT=m CONFIG_VIRTUALIZATION=y CONFIG_KVM_BOOK3S_64=m CONFIG_KVM_BOOK3S_64_HV=m diff --git a/arch/powerpc/configs/ppc64_defconfig b/arch/powerpc/configs/ppc64_defconfig index 0a8d250cb97e..58a98d40086f 100644 --- a/arch/powerpc/configs/ppc64_defconfig +++ b/arch/powerpc/configs/ppc64_defconfig @@ -10,6 +10,8 @@ CONFIG_TASKSTATS=y CONFIG_TASK_DELAY_ACCT=y CONFIG_IKCONFIG=y CONFIG_IKCONFIG_PROC=y +CONFIG_LOG_BUF_SHIFT=18 +CONFIG_LOG_CPU_MAX_BUF_SHIFT=13 CONFIG_CGROUPS=y CONFIG_CPUSETS=y CONFIG_BLK_DEV_INITRD=y @@ -90,7 +92,7 @@ CONFIG_BLK_DEV_AMD74XX=y CONFIG_BLK_DEV_IDE_PMAC=y CONFIG_BLK_DEV_IDE_PMAC_ATA100FIRST=y CONFIG_BLK_DEV_SD=y -CONFIG_CHR_DEV_ST=y +CONFIG_CHR_DEV_ST=m CONFIG_BLK_DEV_SR=y CONFIG_BLK_DEV_SR_VENDOR=y CONFIG_CHR_DEV_SG=y @@ -103,7 +105,7 @@ CONFIG_BE2ISCSI=m CONFIG_SCSI_MPT2SAS=m CONFIG_SCSI_IBMVSCSI=y CONFIG_SCSI_IBMVFC=m -CONFIG_SCSI_SYM53C8XX_2=y +CONFIG_SCSI_SYM53C8XX_2=m CONFIG_SCSI_SYM53C8XX_DMA_ADDRESSING_MODE=0 CONFIG_SCSI_IPR=y CONFIG_SCSI_QLA_FC=m @@ -149,10 +151,10 @@ CONFIG_NETCONSOLE=y CONFIG_TUN=m CONFIG_VIRTIO_NET=m CONFIG_VHOST_NET=m -CONFIG_VORTEX=y +CONFIG_VORTEX=m CONFIG_ACENIC=m CONFIG_ACENIC_OMIT_TIGON_I=y -CONFIG_PCNET32=y +CONFIG_PCNET32=m CONFIG_TIGON3=y CONFIG_BNX2X=m CONFIG_CHELSIO_T1=m @@ -165,6 +167,7 @@ CONFIG_E1000=y CONFIG_E1000E=y CONFIG_IXGB=m CONFIG_IXGBE=m +CONFIG_I40E=m CONFIG_MLX4_EN=m CONFIG_MYRI10GE=m CONFIG_PASEMI_MAC=y @@ -269,7 +272,7 @@ CONFIG_EXT2_FS_SECURITY=y CONFIG_EXT4_FS=y CONFIG_EXT4_FS_POSIX_ACL=y CONFIG_EXT4_FS_SECURITY=y -CONFIG_REISERFS_FS=y +CONFIG_REISERFS_FS=m CONFIG_REISERFS_FS_XATTR=y CONFIG_REISERFS_FS_POSIX_ACL=y CONFIG_REISERFS_FS_SECURITY=y @@ -284,10 +287,10 @@ CONFIG_NILFS2_FS=m CONFIG_AUTOFS4_FS=m CONFIG_FUSE_FS=m CONFIG_OVERLAY_FS=m -CONFIG_ISO9660_FS=y +CONFIG_ISO9660_FS=m CONFIG_UDF_FS=m CONFIG_MSDOS_FS=y -CONFIG_VFAT_FS=y +CONFIG_VFAT_FS=m CONFIG_PROC_KCORE=y CONFIG_TMPFS=y CONFIG_TMPFS_POSIX_ACL=y @@ -347,6 +350,8 @@ CONFIG_CRYPTO_LZO=m # CONFIG_CRYPTO_ANSI_CPRNG is not set CONFIG_CRYPTO_DEV_NX=y CONFIG_CRYPTO_DEV_NX_ENCRYPT=m +CONFIG_CRYPTO_DEV_VMX=y +CONFIG_CRYPTO_DEV_VMX_ENCRYPT=m CONFIG_VIRTUALIZATION=y CONFIG_KVM_BOOK3S_64=m CONFIG_KVM_BOOK3S_64_HV=m diff --git a/arch/powerpc/configs/pseries_defconfig b/arch/powerpc/configs/pseries_defconfig index 654aeffc57ef..8a3bc016b732 100644 --- a/arch/powerpc/configs/pseries_defconfig +++ b/arch/powerpc/configs/pseries_defconfig @@ -15,6 +15,8 @@ CONFIG_TASK_XACCT=y CONFIG_TASK_IO_ACCOUNTING=y CONFIG_IKCONFIG=y CONFIG_IKCONFIG_PROC=y +CONFIG_LOG_BUF_SHIFT=18 +CONFIG_LOG_CPU_MAX_BUF_SHIFT=13 CONFIG_NUMA_BALANCING=y CONFIG_NUMA_BALANCING_DEFAULT_ENABLED=y CONFIG_CGROUPS=y @@ -95,7 +97,7 @@ CONFIG_BLK_DEV_IDECD=y CONFIG_BLK_DEV_GENERIC=y CONFIG_BLK_DEV_AMD74XX=y CONFIG_BLK_DEV_SD=y -CONFIG_CHR_DEV_ST=y +CONFIG_CHR_DEV_ST=m CONFIG_BLK_DEV_SR=y CONFIG_BLK_DEV_SR_VENDOR=y CONFIG_CHR_DEV_SG=y @@ -108,7 +110,7 @@ CONFIG_BE2ISCSI=m CONFIG_SCSI_MPT2SAS=m CONFIG_SCSI_IBMVSCSI=y CONFIG_SCSI_IBMVFC=m -CONFIG_SCSI_SYM53C8XX_2=y +CONFIG_SCSI_SYM53C8XX_2=m CONFIG_SCSI_SYM53C8XX_DMA_ADDRESSING_MODE=0 CONFIG_SCSI_IPR=y CONFIG_SCSI_QLA_FC=m @@ -150,10 +152,10 @@ CONFIG_TUN=m CONFIG_VETH=m CONFIG_VIRTIO_NET=m CONFIG_VHOST_NET=m -CONFIG_VORTEX=y +CONFIG_VORTEX=m CONFIG_ACENIC=m CONFIG_ACENIC_OMIT_TIGON_I=y -CONFIG_PCNET32=y +CONFIG_PCNET32=m CONFIG_TIGON3=y CONFIG_BNX2X=m CONFIG_CHELSIO_T1=m @@ -166,6 +168,7 @@ CONFIG_E1000=y CONFIG_E1000E=y CONFIG_IXGB=m CONFIG_IXGBE=m +CONFIG_I40E=m CONFIG_MLX4_EN=m CONFIG_MYRI10GE=m CONFIG_QLGE=m @@ -241,7 +244,7 @@ CONFIG_EXT2_FS_SECURITY=y CONFIG_EXT4_FS=y CONFIG_EXT4_FS_POSIX_ACL=y CONFIG_EXT4_FS_SECURITY=y -CONFIG_REISERFS_FS=y +CONFIG_REISERFS_FS=m CONFIG_REISERFS_FS_XATTR=y CONFIG_REISERFS_FS_POSIX_ACL=y CONFIG_REISERFS_FS_SECURITY=y @@ -256,10 +259,10 @@ CONFIG_NILFS2_FS=m CONFIG_AUTOFS4_FS=m CONFIG_FUSE_FS=m CONFIG_OVERLAY_FS=m -CONFIG_ISO9660_FS=y +CONFIG_ISO9660_FS=m CONFIG_UDF_FS=m CONFIG_MSDOS_FS=y -CONFIG_VFAT_FS=y +CONFIG_VFAT_FS=m CONFIG_PROC_KCORE=y CONFIG_TMPFS=y CONFIG_TMPFS_POSIX_ACL=y @@ -314,6 +317,8 @@ CONFIG_CRYPTO_LZO=m # CONFIG_CRYPTO_ANSI_CPRNG is not set CONFIG_CRYPTO_DEV_NX=y CONFIG_CRYPTO_DEV_NX_ENCRYPT=m +CONFIG_CRYPTO_DEV_VMX=y +CONFIG_CRYPTO_DEV_VMX_ENCRYPT=m CONFIG_VIRTUALIZATION=y CONFIG_KVM_BOOK3S_64=m CONFIG_KVM_BOOK3S_64_HV=m diff --git a/arch/powerpc/crypto/crc32c-vpmsum_glue.c b/arch/powerpc/crypto/crc32c-vpmsum_glue.c index bfe3d37a24ef..9fa046d56eba 100644 --- a/arch/powerpc/crypto/crc32c-vpmsum_glue.c +++ b/arch/powerpc/crypto/crc32c-vpmsum_glue.c @@ -4,6 +4,7 @@ #include <linux/module.h> #include <linux/string.h> #include <linux/kernel.h> +#include <linux/cpufeature.h> #include <asm/switch_to.h> #define CHKSUM_BLOCK_SIZE 1 @@ -157,7 +158,7 @@ static void __exit crc32c_vpmsum_mod_fini(void) crypto_unregister_shash(&alg); } -module_init(crc32c_vpmsum_mod_init); +module_cpu_feature_match(PPC_MODULE_FEATURE_VEC_CRYPTO, crc32c_vpmsum_mod_init); module_exit(crc32c_vpmsum_mod_fini); MODULE_AUTHOR("Anton Blanchard <anton@samba.org>"); diff --git a/arch/powerpc/include/asm/asm-prototypes.h b/arch/powerpc/include/asm/asm-prototypes.h index e71b9097594c..d1492736d852 100644 --- a/arch/powerpc/include/asm/asm-prototypes.h +++ b/arch/powerpc/include/asm/asm-prototypes.h @@ -15,6 +15,8 @@ #include <linux/threads.h> #include <linux/kprobes.h> +#include <uapi/asm/ucontext.h> + /* SMP */ extern struct thread_info *current_set[NR_CPUS]; extern struct thread_info *secondary_ti; @@ -52,8 +54,8 @@ void SMIException(struct pt_regs *regs); void handle_hmi_exception(struct pt_regs *regs); void instruction_breakpoint_exception(struct pt_regs *regs); void RunModeException(struct pt_regs *regs); -void __kprobes single_step_exception(struct pt_regs *regs); -void __kprobes program_check_exception(struct pt_regs *regs); +void single_step_exception(struct pt_regs *regs); +void program_check_exception(struct pt_regs *regs); void alignment_exception(struct pt_regs *regs); void StackOverflow(struct pt_regs *regs); void nonrecoverable_exception(struct pt_regs *regs); @@ -70,6 +72,41 @@ void unrecoverable_exception(struct pt_regs *regs); void kernel_bad_stack(struct pt_regs *regs); void system_reset_exception(struct pt_regs *regs); void machine_check_exception(struct pt_regs *regs); -void __kprobes emulation_assist_interrupt(struct pt_regs *regs); +void emulation_assist_interrupt(struct pt_regs *regs); + +/* signals, syscalls and interrupts */ +#ifdef CONFIG_PPC64 +int sys_swapcontext(struct ucontext __user *old_ctx, + struct ucontext __user *new_ctx, + long ctx_size, long r6, long r7, long r8, struct pt_regs *regs); +#else +long sys_swapcontext(struct ucontext __user *old_ctx, + struct ucontext __user *new_ctx, + int ctx_size, int r6, int r7, int r8, struct pt_regs *regs); +#endif +long sys_switch_endian(void); +notrace unsigned int __check_irq_replay(void); +void notrace restore_interrupts(void); + +/* ptrace */ +long do_syscall_trace_enter(struct pt_regs *regs); +void do_syscall_trace_leave(struct pt_regs *regs); + +/* process */ +void restore_math(struct pt_regs *regs); +void restore_tm_state(struct pt_regs *regs); + +/* prom_init (OpenFirmware) */ +unsigned long __init prom_init(unsigned long r3, unsigned long r4, + unsigned long pp, + unsigned long r6, unsigned long r7, + unsigned long kbase); + +/* setup */ +void __init early_setup(unsigned long dt_ptr); +void early_setup_secondary(void); + +/* time */ +void accumulate_stolen_time(void); #endif /* _ASM_POWERPC_ASM_PROTOTYPES_H */ diff --git a/arch/powerpc/include/asm/atomic.h b/arch/powerpc/include/asm/atomic.h index f08d567e0ca4..2b90335194a7 100644 --- a/arch/powerpc/include/asm/atomic.h +++ b/arch/powerpc/include/asm/atomic.h @@ -233,7 +233,7 @@ static __inline__ int __atomic_add_unless(atomic_t *v, int a, int u) PPC_ATOMIC_ENTRY_BARRIER "1: lwarx %0,0,%1 # __atomic_add_unless\n\ cmpw 0,%0,%3 \n\ - beq- 2f \n\ + beq 2f \n\ add %0,%2,%0 \n" PPC405_ERR77(0,%2) " stwcx. %0,0,%1 \n\ @@ -539,7 +539,7 @@ static __inline__ int atomic64_add_unless(atomic64_t *v, long a, long u) PPC_ATOMIC_ENTRY_BARRIER "1: ldarx %0,0,%1 # __atomic_add_unless\n\ cmpd 0,%0,%3 \n\ - beq- 2f \n\ + beq 2f \n\ add %0,%2,%0 \n" " stdcx. %0,0,%1 \n\ bne- 1b \n" diff --git a/arch/powerpc/include/asm/book3s/32/pgtable.h b/arch/powerpc/include/asm/book3s/32/pgtable.h index 38b33dcfcc9d..6b8b2d57fdc8 100644 --- a/arch/powerpc/include/asm/book3s/32/pgtable.h +++ b/arch/powerpc/include/asm/book3s/32/pgtable.h @@ -223,7 +223,8 @@ static inline void huge_ptep_set_wrprotect(struct mm_struct *mm, } -static inline void __ptep_set_access_flags(pte_t *ptep, pte_t entry) +static inline void __ptep_set_access_flags(struct mm_struct *mm, + pte_t *ptep, pte_t entry) { unsigned long set = pte_val(entry) & (_PAGE_DIRTY | _PAGE_ACCESSED | _PAGE_RW | _PAGE_EXEC); diff --git a/arch/powerpc/include/asm/book3s/64/mmu-hash.h b/arch/powerpc/include/asm/book3s/64/mmu-hash.h index 287a656ceb57..e407af2b7333 100644 --- a/arch/powerpc/include/asm/book3s/64/mmu-hash.h +++ b/arch/powerpc/include/asm/book3s/64/mmu-hash.h @@ -245,6 +245,43 @@ static inline int segment_shift(int ssize) } /* + * This array is indexed by the LP field of the HPTE second dword. + * Since this field may contain some RPN bits, some entries are + * replicated so that we get the same value irrespective of RPN. + * The top 4 bits are the page size index (MMU_PAGE_*) for the + * actual page size, the bottom 4 bits are the base page size. + */ +extern u8 hpte_page_sizes[1 << LP_BITS]; + +static inline unsigned long __hpte_page_size(unsigned long h, unsigned long l, + bool is_base_size) +{ + unsigned int i, lp; + + if (!(h & HPTE_V_LARGE)) + return 1ul << 12; + + /* Look at the 8 bit LP value */ + lp = (l >> LP_SHIFT) & ((1 << LP_BITS) - 1); + i = hpte_page_sizes[lp]; + if (!i) + return 0; + if (!is_base_size) + i >>= 4; + return 1ul << mmu_psize_defs[i & 0xf].shift; +} + +static inline unsigned long hpte_page_size(unsigned long h, unsigned long l) +{ + return __hpte_page_size(h, l, 0); +} + +static inline unsigned long hpte_base_page_size(unsigned long h, unsigned long l) +{ + return __hpte_page_size(h, l, 1); +} + +/* * The current system page and segment sizes */ extern int mmu_kernel_ssize; diff --git a/arch/powerpc/include/asm/book3s/64/pgtable.h b/arch/powerpc/include/asm/book3s/64/pgtable.h index 263bf39ced40..9fd77f8794a0 100644 --- a/arch/powerpc/include/asm/book3s/64/pgtable.h +++ b/arch/powerpc/include/asm/book3s/64/pgtable.h @@ -6,6 +6,8 @@ */ #define _PAGE_BIT_SWAP_TYPE 0 +#define _PAGE_RO 0 + #define _PAGE_EXEC 0x00001 /* execute permission */ #define _PAGE_WRITE 0x00002 /* write access allowed */ #define _PAGE_READ 0x00004 /* read access allowed */ @@ -565,10 +567,11 @@ static inline bool check_pte_access(unsigned long access, unsigned long ptev) * Generic functions with hash/radix callbacks */ -static inline void __ptep_set_access_flags(pte_t *ptep, pte_t entry) +static inline void __ptep_set_access_flags(struct mm_struct *mm, + pte_t *ptep, pte_t entry) { if (radix_enabled()) - return radix__ptep_set_access_flags(ptep, entry); + return radix__ptep_set_access_flags(mm, ptep, entry); return hash__ptep_set_access_flags(ptep, entry); } diff --git a/arch/powerpc/include/asm/book3s/64/radix.h b/arch/powerpc/include/asm/book3s/64/radix.h index df294224e280..2a46dea8e1b1 100644 --- a/arch/powerpc/include/asm/book3s/64/radix.h +++ b/arch/powerpc/include/asm/book3s/64/radix.h @@ -11,6 +11,11 @@ #include <asm/book3s/64/radix-4k.h> #endif +#ifndef __ASSEMBLY__ +#include <asm/book3s/64/tlbflush-radix.h> +#include <asm/cpu_has_feature.h> +#endif + /* An empty PTE can still have a R or C writeback */ #define RADIX_PTE_NONE_MASK (_PAGE_DIRTY | _PAGE_ACCESSED) @@ -105,11 +110,8 @@ #define RADIX_PUD_TABLE_SIZE (sizeof(pud_t) << RADIX_PUD_INDEX_SIZE) #define RADIX_PGD_TABLE_SIZE (sizeof(pgd_t) << RADIX_PGD_INDEX_SIZE) -static inline unsigned long radix__pte_update(struct mm_struct *mm, - unsigned long addr, - pte_t *ptep, unsigned long clr, - unsigned long set, - int huge) +static inline unsigned long __radix_pte_update(pte_t *ptep, unsigned long clr, + unsigned long set) { pte_t pte; unsigned long old_pte, new_pte; @@ -121,9 +123,39 @@ static inline unsigned long radix__pte_update(struct mm_struct *mm, } while (!pte_xchg(ptep, __pte(old_pte), __pte(new_pte))); - /* We already do a sync in cmpxchg, is ptesync needed ?*/ + return old_pte; +} + + +static inline unsigned long radix__pte_update(struct mm_struct *mm, + unsigned long addr, + pte_t *ptep, unsigned long clr, + unsigned long set, + int huge) +{ + unsigned long old_pte; + + if (cpu_has_feature(CPU_FTR_POWER9_DD1)) { + + unsigned long new_pte; + + old_pte = __radix_pte_update(ptep, ~0, 0); + asm volatile("ptesync" : : : "memory"); + /* + * new value of pte + */ + new_pte = (old_pte | set) & ~clr; + + /* + * For now let's do heavy pid flush + * radix__flush_tlb_page_psize(mm, addr, mmu_virtual_psize); + */ + radix__flush_tlb_mm(mm); + + __radix_pte_update(ptep, 0, new_pte); + } else + old_pte = __radix_pte_update(ptep, clr, set); asm volatile("ptesync" : : : "memory"); - /* huge pages use the old page table lock */ if (!huge) assert_pte_locked(mm, addr); @@ -134,20 +166,33 @@ static inline unsigned long radix__pte_update(struct mm_struct *mm, * Set the dirty and/or accessed bits atomically in a linux PTE, this * function doesn't need to invalidate tlb. */ -static inline void radix__ptep_set_access_flags(pte_t *ptep, pte_t entry) +static inline void radix__ptep_set_access_flags(struct mm_struct *mm, + pte_t *ptep, pte_t entry) { - pte_t pte; - unsigned long old_pte, new_pte; + unsigned long set = pte_val(entry) & (_PAGE_DIRTY | _PAGE_ACCESSED | _PAGE_RW | _PAGE_EXEC); - do { - pte = READ_ONCE(*ptep); - old_pte = pte_val(pte); + + if (cpu_has_feature(CPU_FTR_POWER9_DD1)) { + + unsigned long old_pte, new_pte; + + old_pte = __radix_pte_update(ptep, ~0, 0); + asm volatile("ptesync" : : : "memory"); + /* + * new value of pte + */ new_pte = old_pte | set; - } while (!pte_xchg(ptep, __pte(old_pte), __pte(new_pte))); + /* + * For now let's do heavy pid flush + * radix__flush_tlb_page_psize(mm, addr, mmu_virtual_psize); + */ + radix__flush_tlb_mm(mm); - /* We already do a sync in cmpxchg, is ptesync needed ?*/ + __radix_pte_update(ptep, 0, new_pte); + } else + __radix_pte_update(ptep, 0, set); asm volatile("ptesync" : : : "memory"); } @@ -233,14 +278,19 @@ static inline unsigned long radix__get_tree_size(void) { unsigned long rts_field; /* - * we support 52 bits, hence 52-31 = 21, 0b10101 + * We support 52 bits, hence: + * DD1 52-28 = 24, 0b11000 + * Others 52-31 = 21, 0b10101 * RTS encoding details * bits 0 - 3 of rts -> bits 6 - 8 unsigned long * bits 4 - 5 of rts -> bits 62 - 63 of unsigned long */ - rts_field = (0x5UL << 5); /* 6 - 8 bits */ - rts_field |= (0x2UL << 61); - + if (cpu_has_feature(CPU_FTR_POWER9_DD1)) + rts_field = (0x3UL << 61); + else { + rts_field = (0x5UL << 5); /* 6 - 8 bits */ + rts_field |= (0x2UL << 61); + } return rts_field; } #endif /* __ASSEMBLY__ */ diff --git a/arch/powerpc/include/asm/book3s/64/tlbflush-radix.h b/arch/powerpc/include/asm/book3s/64/tlbflush-radix.h index 65037762b120..a9e19cb2f7c5 100644 --- a/arch/powerpc/include/asm/book3s/64/tlbflush-radix.h +++ b/arch/powerpc/include/asm/book3s/64/tlbflush-radix.h @@ -41,4 +41,5 @@ extern void radix__flush_tlb_page_psize(struct mm_struct *mm, unsigned long vmad extern void radix__flush_tlb_lpid_va(unsigned long lpid, unsigned long gpa, unsigned long page_size); extern void radix__flush_tlb_lpid(unsigned long lpid); +extern void radix__flush_tlb_all(void); #endif diff --git a/arch/powerpc/include/asm/cpu_has_feature.h b/arch/powerpc/include/asm/cpu_has_feature.h index 2ef55f8968a2..b312b152461b 100644 --- a/arch/powerpc/include/asm/cpu_has_feature.h +++ b/arch/powerpc/include/asm/cpu_has_feature.h @@ -15,7 +15,7 @@ static inline bool early_cpu_has_feature(unsigned long feature) #ifdef CONFIG_JUMP_LABEL_FEATURE_CHECKS #include <linux/jump_label.h> -#define NUM_CPU_FTR_KEYS 64 +#define NUM_CPU_FTR_KEYS BITS_PER_LONG extern struct static_key_true cpu_feature_keys[NUM_CPU_FTR_KEYS]; diff --git a/arch/powerpc/include/asm/cpuidle.h b/arch/powerpc/include/asm/cpuidle.h index 3d7fc06532a1..01b8a13f0224 100644 --- a/arch/powerpc/include/asm/cpuidle.h +++ b/arch/powerpc/include/asm/cpuidle.h @@ -19,4 +19,17 @@ extern u64 pnv_first_deep_stop_state; #endif +/* Idle state entry routines */ +#ifdef CONFIG_PPC_P7_NAP +#define IDLE_STATE_ENTER_SEQ(IDLE_INST) \ + /* Magic NAP/SLEEP/WINKLE mode enter sequence */ \ + std r0,0(r1); \ + ptesync; \ + ld r0,0(r1); \ +1: cmp cr0,r0,r0; \ + bne 1b; \ + IDLE_INST; \ + b . +#endif /* CONFIG_PPC_P7_NAP */ + #endif diff --git a/arch/powerpc/include/asm/cputable.h b/arch/powerpc/include/asm/cputable.h index 82026b419341..f752e6f7cfbe 100644 --- a/arch/powerpc/include/asm/cputable.h +++ b/arch/powerpc/include/asm/cputable.h @@ -212,6 +212,7 @@ enum { #define CPU_FTR_DABRX LONG_ASM_CONST(0x0800000000000000) #define CPU_FTR_PMAO_BUG LONG_ASM_CONST(0x1000000000000000) #define CPU_FTR_SUBCORE LONG_ASM_CONST(0x2000000000000000) +#define CPU_FTR_POWER9_DD1 LONG_ASM_CONST(0x4000000000000000) #ifndef __ASSEMBLY__ @@ -472,6 +473,7 @@ enum { CPU_FTR_ICSWX | CPU_FTR_CFAR | CPU_FTR_HVMODE | CPU_FTR_VMX_COPY | \ CPU_FTR_DBELL | CPU_FTR_HAS_PPR | CPU_FTR_DAWR | \ CPU_FTR_ARCH_207S | CPU_FTR_TM_COMP | CPU_FTR_ARCH_300) +#define CPU_FTRS_POWER9_DD1 (CPU_FTRS_POWER9 | CPU_FTR_POWER9_DD1) #define CPU_FTRS_CELL (CPU_FTR_USE_TB | CPU_FTR_LWSYNC | \ CPU_FTR_PPCAS_ARCH_V2 | CPU_FTR_CTRL | \ CPU_FTR_ALTIVEC_COMP | CPU_FTR_MMCRA | CPU_FTR_SMT | \ @@ -490,7 +492,7 @@ enum { (CPU_FTRS_POWER4 | CPU_FTRS_PPC970 | CPU_FTRS_POWER5 | \ CPU_FTRS_POWER6 | CPU_FTRS_POWER7 | CPU_FTRS_POWER8E | \ CPU_FTRS_POWER8 | CPU_FTRS_POWER8_DD1 | CPU_FTRS_CELL | \ - CPU_FTRS_PA6T | CPU_FTR_VSX | CPU_FTRS_POWER9) + CPU_FTRS_PA6T | CPU_FTR_VSX | CPU_FTRS_POWER9 | CPU_FTRS_POWER9_DD1) #endif #else enum { diff --git a/arch/powerpc/include/asm/cputhreads.h b/arch/powerpc/include/asm/cputhreads.h index 666bef4ebfae..9377bdf42eb8 100644 --- a/arch/powerpc/include/asm/cputhreads.h +++ b/arch/powerpc/include/asm/cputhreads.h @@ -3,6 +3,7 @@ #ifndef __ASSEMBLY__ #include <linux/cpumask.h> +#include <asm/cpu_has_feature.h> /* * Mapping of threads to cores diff --git a/arch/powerpc/include/asm/exception-64s.h b/arch/powerpc/include/asm/exception-64s.h index bed66e5743b3..2e4e7d878c8e 100644 --- a/arch/powerpc/include/asm/exception-64s.h +++ b/arch/powerpc/include/asm/exception-64s.h @@ -34,6 +34,7 @@ * exception handlers (including pSeries LPAR) and iSeries LPAR * implementations as possible. */ +#include <asm/head-64.h> #define EX_R9 0 #define EX_R10 8 @@ -52,7 +53,6 @@ #ifdef CONFIG_RELOCATABLE #define __EXCEPTION_RELON_PROLOG_PSERIES_1(label, h) \ - ld r12,PACAKBASE(r13); /* get high part of &label */ \ mfspr r11,SPRN_##h##SRR0; /* save SRR0 */ \ LOAD_HANDLER(r12,label); \ mtctr r12; \ @@ -84,13 +84,14 @@ /* * We're short on space and time in the exception prolog, so we can't - * use the normal SET_REG_IMMEDIATE macro. Normally we just need the - * low halfword of the address, but for Kdump we need the whole low - * word. + * use the normal LOAD_REG_IMMEDIATE macro to load the address of label. + * Instead we get the base of the kernel from paca->kernelbase and or in the low + * part of label. This requires that the label be within 64KB of kernelbase, and + * that kernelbase be 64K aligned. */ #define LOAD_HANDLER(reg, label) \ - /* Handlers must be within 64K of kbase, which must be 64k aligned */ \ - ori reg,reg,(label)-_stext; /* virt addr of handler ... */ + ld reg,PACAKBASE(r13); /* get high part of &label */ \ + ori reg,reg,(FIXED_SYMBOL_ABS_ADDR(label))@l; /* Exception register prefixes */ #define EXC_HV H @@ -175,7 +176,6 @@ END_FTR_SECTION_NESTED(ftr,ftr,943) __EXCEPTION_PROLOG_1(area, extra, vec) #define __EXCEPTION_PROLOG_PSERIES_1(label, h) \ - ld r12,PACAKBASE(r13); /* get high part of &label */ \ ld r10,PACAKMSR(r13); /* get MSR value for kernel */ \ mfspr r11,SPRN_##h##SRR0; /* save SRR0 */ \ LOAD_HANDLER(r12,label) \ @@ -192,10 +192,10 @@ END_FTR_SECTION_NESTED(ftr,ftr,943) EXCEPTION_PROLOG_1(area, extra, vec); \ EXCEPTION_PROLOG_PSERIES_1(label, h); -#define __KVMTEST(n) \ - lbz r10,HSTATE_IN_GUEST(r13); \ +#define __KVMTEST(h, n) \ + lbz r10,HSTATE_IN_GUEST(r13); \ cmpwi r10,0; \ - bne do_kvm_##n + bne do_kvm_##h##n #ifdef CONFIG_KVM_BOOK3S_HV_POSSIBLE /* @@ -208,8 +208,7 @@ END_FTR_SECTION_NESTED(ftr,ftr,943) #define kvmppc_interrupt kvmppc_interrupt_pr #endif -#define __KVM_HANDLER(area, h, n) \ -do_kvm_##n: \ +#define __KVM_HANDLER_PROLOG(area, n) \ BEGIN_FTR_SECTION_NESTED(947) \ ld r10,area+EX_CFAR(r13); \ std r10,HSTATE_CFAR(r13); \ @@ -222,21 +221,23 @@ do_kvm_##n: \ stw r9,HSTATE_SCRATCH1(r13); \ ld r9,area+EX_R9(r13); \ std r12,HSTATE_SCRATCH0(r13); \ + +#define __KVM_HANDLER(area, h, n) \ + __KVM_HANDLER_PROLOG(area, n) \ li r12,n; \ b kvmppc_interrupt #define __KVM_HANDLER_SKIP(area, h, n) \ -do_kvm_##n: \ cmpwi r10,KVM_GUEST_MODE_SKIP; \ ld r10,area+EX_R10(r13); \ beq 89f; \ - stw r9,HSTATE_SCRATCH1(r13); \ + stw r9,HSTATE_SCRATCH1(r13); \ BEGIN_FTR_SECTION_NESTED(948) \ ld r9,area+EX_PPR(r13); \ std r9,HSTATE_PPR(r13); \ END_FTR_SECTION_NESTED(CPU_FTR_HAS_PPR,CPU_FTR_HAS_PPR,948); \ ld r9,area+EX_R9(r13); \ - std r12,HSTATE_SCRATCH0(r13); \ + std r12,HSTATE_SCRATCH0(r13); \ li r12,n; \ b kvmppc_interrupt; \ 89: mtocrf 0x80,r9; \ @@ -244,12 +245,12 @@ do_kvm_##n: \ b kvmppc_skip_##h##interrupt #ifdef CONFIG_KVM_BOOK3S_64_HANDLER -#define KVMTEST(n) __KVMTEST(n) +#define KVMTEST(h, n) __KVMTEST(h, n) #define KVM_HANDLER(area, h, n) __KVM_HANDLER(area, h, n) #define KVM_HANDLER_SKIP(area, h, n) __KVM_HANDLER_SKIP(area, h, n) #else -#define KVMTEST(n) +#define KVMTEST(h, n) #define KVM_HANDLER(area, h, n) #define KVM_HANDLER_SKIP(area, h, n) #endif @@ -333,94 +334,79 @@ do_kvm_##n: \ /* * Exception vectors. */ -#define STD_EXCEPTION_PSERIES(vec, label) \ - . = vec; \ - .globl label##_pSeries; \ -label##_pSeries: \ +#define STD_EXCEPTION_PSERIES(vec, label) \ SET_SCRATCH0(r13); /* save r13 */ \ - EXCEPTION_PROLOG_PSERIES(PACA_EXGEN, label##_common, \ - EXC_STD, KVMTEST, vec) + EXCEPTION_PROLOG_PSERIES(PACA_EXGEN, label, \ + EXC_STD, KVMTEST_PR, vec); \ /* Version of above for when we have to branch out-of-line */ +#define __OOL_EXCEPTION(vec, label, hdlr) \ + SET_SCRATCH0(r13) \ + EXCEPTION_PROLOG_0(PACA_EXGEN) \ + b hdlr; + #define STD_EXCEPTION_PSERIES_OOL(vec, label) \ - .globl label##_pSeries; \ -label##_pSeries: \ - EXCEPTION_PROLOG_1(PACA_EXGEN, KVMTEST, vec); \ - EXCEPTION_PROLOG_PSERIES_1(label##_common, EXC_STD) - -#define STD_EXCEPTION_HV(loc, vec, label) \ - . = loc; \ - .globl label##_hv; \ -label##_hv: \ + EXCEPTION_PROLOG_1(PACA_EXGEN, KVMTEST_PR, vec); \ + EXCEPTION_PROLOG_PSERIES_1(label, EXC_STD) + +#define STD_EXCEPTION_HV(loc, vec, label) \ SET_SCRATCH0(r13); /* save r13 */ \ - EXCEPTION_PROLOG_PSERIES(PACA_EXGEN, label##_common, \ - EXC_HV, KVMTEST, vec) + EXCEPTION_PROLOG_PSERIES(PACA_EXGEN, label, \ + EXC_HV, KVMTEST_HV, vec); -/* Version of above for when we have to branch out-of-line */ -#define STD_EXCEPTION_HV_OOL(vec, label) \ - .globl label##_hv; \ -label##_hv: \ - EXCEPTION_PROLOG_1(PACA_EXGEN, KVMTEST, vec); \ - EXCEPTION_PROLOG_PSERIES_1(label##_common, EXC_HV) +#define STD_EXCEPTION_HV_OOL(vec, label) \ + EXCEPTION_PROLOG_1(PACA_EXGEN, KVMTEST_HV, vec); \ + EXCEPTION_PROLOG_PSERIES_1(label, EXC_HV) #define STD_RELON_EXCEPTION_PSERIES(loc, vec, label) \ - . = loc; \ - .globl label##_relon_pSeries; \ -label##_relon_pSeries: \ /* No guest interrupts come through here */ \ SET_SCRATCH0(r13); /* save r13 */ \ - EXCEPTION_RELON_PROLOG_PSERIES(PACA_EXGEN, label##_common, \ - EXC_STD, NOTEST, vec) + EXCEPTION_RELON_PROLOG_PSERIES(PACA_EXGEN, label, EXC_STD, NOTEST, vec); #define STD_RELON_EXCEPTION_PSERIES_OOL(vec, label) \ - .globl label##_relon_pSeries; \ -label##_relon_pSeries: \ EXCEPTION_PROLOG_1(PACA_EXGEN, NOTEST, vec); \ - EXCEPTION_RELON_PROLOG_PSERIES_1(label##_common, EXC_STD) + EXCEPTION_RELON_PROLOG_PSERIES_1(label, EXC_STD) #define STD_RELON_EXCEPTION_HV(loc, vec, label) \ - . = loc; \ - .globl label##_relon_hv; \ -label##_relon_hv: \ /* No guest interrupts come through here */ \ SET_SCRATCH0(r13); /* save r13 */ \ - EXCEPTION_RELON_PROLOG_PSERIES(PACA_EXGEN, label##_common, \ - EXC_HV, NOTEST, vec) + EXCEPTION_RELON_PROLOG_PSERIES(PACA_EXGEN, label, EXC_HV, NOTEST, vec); #define STD_RELON_EXCEPTION_HV_OOL(vec, label) \ - .globl label##_relon_hv; \ -label##_relon_hv: \ EXCEPTION_PROLOG_1(PACA_EXGEN, NOTEST, vec); \ - EXCEPTION_RELON_PROLOG_PSERIES_1(label##_common, EXC_HV) + EXCEPTION_RELON_PROLOG_PSERIES_1(label, EXC_HV) /* This associate vector numbers with bits in paca->irq_happened */ #define SOFTEN_VALUE_0x500 PACA_IRQ_EE -#define SOFTEN_VALUE_0x502 PACA_IRQ_EE #define SOFTEN_VALUE_0x900 PACA_IRQ_DEC -#define SOFTEN_VALUE_0x982 PACA_IRQ_DEC +#define SOFTEN_VALUE_0x980 PACA_IRQ_DEC #define SOFTEN_VALUE_0xa00 PACA_IRQ_DBELL #define SOFTEN_VALUE_0xe80 PACA_IRQ_DBELL -#define SOFTEN_VALUE_0xe82 PACA_IRQ_DBELL #define SOFTEN_VALUE_0xe60 PACA_IRQ_HMI -#define SOFTEN_VALUE_0xe62 PACA_IRQ_HMI #define SOFTEN_VALUE_0xea0 PACA_IRQ_EE -#define SOFTEN_VALUE_0xea2 PACA_IRQ_EE #define __SOFTEN_TEST(h, vec) \ lbz r10,PACASOFTIRQEN(r13); \ cmpwi r10,0; \ li r10,SOFTEN_VALUE_##vec; \ beq masked_##h##interrupt + #define _SOFTEN_TEST(h, vec) __SOFTEN_TEST(h, vec) #define SOFTEN_TEST_PR(vec) \ - KVMTEST(vec); \ + KVMTEST(EXC_STD, vec); \ _SOFTEN_TEST(EXC_STD, vec) #define SOFTEN_TEST_HV(vec) \ - KVMTEST(vec); \ + KVMTEST(EXC_HV, vec); \ _SOFTEN_TEST(EXC_HV, vec) +#define KVMTEST_PR(vec) \ + KVMTEST(EXC_STD, vec) + +#define KVMTEST_HV(vec) \ + KVMTEST(EXC_HV, vec) + #define SOFTEN_NOTEST_PR(vec) _SOFTEN_TEST(EXC_STD, vec) #define SOFTEN_NOTEST_HV(vec) _SOFTEN_TEST(EXC_HV, vec) @@ -428,58 +414,47 @@ label##_relon_hv: \ SET_SCRATCH0(r13); /* save r13 */ \ EXCEPTION_PROLOG_0(PACA_EXGEN); \ __EXCEPTION_PROLOG_1(PACA_EXGEN, extra, vec); \ - EXCEPTION_PROLOG_PSERIES_1(label##_common, h); + EXCEPTION_PROLOG_PSERIES_1(label, h); #define _MASKABLE_EXCEPTION_PSERIES(vec, label, h, extra) \ __MASKABLE_EXCEPTION_PSERIES(vec, label, h, extra) #define MASKABLE_EXCEPTION_PSERIES(loc, vec, label) \ - . = loc; \ - .globl label##_pSeries; \ -label##_pSeries: \ _MASKABLE_EXCEPTION_PSERIES(vec, label, \ EXC_STD, SOFTEN_TEST_PR) +#define MASKABLE_EXCEPTION_PSERIES_OOL(vec, label) \ + EXCEPTION_PROLOG_1(PACA_EXGEN, SOFTEN_TEST_PR, vec); \ + EXCEPTION_PROLOG_PSERIES_1(label, EXC_STD) + #define MASKABLE_EXCEPTION_HV(loc, vec, label) \ - . = loc; \ - .globl label##_hv; \ -label##_hv: \ _MASKABLE_EXCEPTION_PSERIES(vec, label, \ EXC_HV, SOFTEN_TEST_HV) #define MASKABLE_EXCEPTION_HV_OOL(vec, label) \ - .globl label##_hv; \ -label##_hv: \ EXCEPTION_PROLOG_1(PACA_EXGEN, SOFTEN_TEST_HV, vec); \ - EXCEPTION_PROLOG_PSERIES_1(label##_common, EXC_HV); + EXCEPTION_PROLOG_PSERIES_1(label, EXC_HV) #define __MASKABLE_RELON_EXCEPTION_PSERIES(vec, label, h, extra) \ SET_SCRATCH0(r13); /* save r13 */ \ EXCEPTION_PROLOG_0(PACA_EXGEN); \ - __EXCEPTION_PROLOG_1(PACA_EXGEN, extra, vec); \ - EXCEPTION_RELON_PROLOG_PSERIES_1(label##_common, h); -#define _MASKABLE_RELON_EXCEPTION_PSERIES(vec, label, h, extra) \ + __EXCEPTION_PROLOG_1(PACA_EXGEN, extra, vec); \ + EXCEPTION_RELON_PROLOG_PSERIES_1(label, h) + +#define _MASKABLE_RELON_EXCEPTION_PSERIES(vec, label, h, extra) \ __MASKABLE_RELON_EXCEPTION_PSERIES(vec, label, h, extra) #define MASKABLE_RELON_EXCEPTION_PSERIES(loc, vec, label) \ - . = loc; \ - .globl label##_relon_pSeries; \ -label##_relon_pSeries: \ _MASKABLE_RELON_EXCEPTION_PSERIES(vec, label, \ EXC_STD, SOFTEN_NOTEST_PR) #define MASKABLE_RELON_EXCEPTION_HV(loc, vec, label) \ - . = loc; \ - .globl label##_relon_hv; \ -label##_relon_hv: \ _MASKABLE_RELON_EXCEPTION_PSERIES(vec, label, \ EXC_HV, SOFTEN_NOTEST_HV) #define MASKABLE_RELON_EXCEPTION_HV_OOL(vec, label) \ - .globl label##_relon_hv; \ -label##_relon_hv: \ EXCEPTION_PROLOG_1(PACA_EXGEN, SOFTEN_NOTEST_HV, vec); \ - EXCEPTION_PROLOG_PSERIES_1(label##_common, EXC_HV); + EXCEPTION_PROLOG_PSERIES_1(label, EXC_HV) /* * Our exception common code can be passed various "additions" @@ -505,9 +480,6 @@ BEGIN_FTR_SECTION \ END_FTR_SECTION_IFSET(CPU_FTR_CTRL) #define EXCEPTION_COMMON(trap, label, hdlr, ret, additions) \ - .align 7; \ - .globl label##_common; \ -label##_common: \ EXCEPTION_PROLOG_COMMON(trap, PACA_EXGEN); \ /* Volatile regs are potentially clobbered here */ \ additions; \ diff --git a/arch/powerpc/include/asm/fadump.h b/arch/powerpc/include/asm/fadump.h index b4407d0add27..0031806475f0 100644 --- a/arch/powerpc/include/asm/fadump.h +++ b/arch/powerpc/include/asm/fadump.h @@ -45,10 +45,6 @@ #define memblock_num_regions(memblock_type) (memblock.memblock_type.cnt) -#ifndef ELF_CORE_EFLAGS -#define ELF_CORE_EFLAGS 0 -#endif - /* Firmware provided dump sections */ #define FADUMP_CPU_STATE_DATA 0x0001 #define FADUMP_HPTE_REGION 0x0002 diff --git a/arch/powerpc/include/asm/feature-fixups.h b/arch/powerpc/include/asm/feature-fixups.h index 57fec8ac7b92..ddf54f5bbdd1 100644 --- a/arch/powerpc/include/asm/feature-fixups.h +++ b/arch/powerpc/include/asm/feature-fixups.h @@ -186,6 +186,7 @@ label##3: \ #ifndef __ASSEMBLY__ void apply_feature_fixups(void); +void setup_feature_keys(void); #endif #endif /* __ASM_POWERPC_FEATURE_FIXUPS_H */ diff --git a/arch/powerpc/include/asm/head-64.h b/arch/powerpc/include/asm/head-64.h new file mode 100644 index 000000000000..ab90c2fa1ea6 --- /dev/null +++ b/arch/powerpc/include/asm/head-64.h @@ -0,0 +1,393 @@ +#ifndef _ASM_POWERPC_HEAD_64_H +#define _ASM_POWERPC_HEAD_64_H + +#include <asm/cache.h> + +/* + * We can't do CPP stringification and concatination directly into the section + * name for some reason, so these macros can do it for us. + */ +.macro define_ftsec name + .section ".head.text.\name\()","ax",@progbits +.endm +.macro define_data_ftsec name + .section ".head.data.\name\()","a",@progbits +.endm +.macro use_ftsec name + .section ".head.text.\name\()" +.endm + +/* + * Fixed (location) sections are used by opening fixed sections and emitting + * fixed section entries into them before closing them. Multiple fixed sections + * can be open at any time. + * + * Each fixed section created in a .S file must have corresponding linkage + * directives including location, added to arch/powerpc/kernel/vmlinux.lds.S + * + * For each fixed section, code is generated into it in the order which it + * appears in the source. Fixed section entries can be placed at a fixed + * location within the section using _LOCATION postifx variants. These must + * be ordered according to their relative placements within the section. + * + * OPEN_FIXED_SECTION(section_name, start_address, end_address) + * FIXED_SECTION_ENTRY_BEGIN(section_name, label1) + * + * USE_FIXED_SECTION(section_name) + * label3: + * li r10,128 + * mv r11,r10 + + * FIXED_SECTION_ENTRY_BEGIN_LOCATION(section_name, label2, start_address) + * FIXED_SECTION_ENTRY_END_LOCATION(section_name, label2, end_address) + * CLOSE_FIXED_SECTION(section_name) + * + * ZERO_FIXED_SECTION can be used to emit zeroed data. + * + * Troubleshooting: + * - If the build dies with "Error: attempt to move .org backwards" at + * CLOSE_FIXED_SECTION() or elsewhere, there may be something + * unexpected being added there. Remove the '. = x_len' line, rebuild, and + * check what is pushing the section down. + * - If the build dies in linking, check arch/powerpc/kernel/vmlinux.lds.S + * for instructions. + * - If the kernel crashes or hangs in very early boot, it could be linker + * stubs at the start of the main text. + */ + +#define OPEN_FIXED_SECTION(sname, start, end) \ + sname##_start = (start); \ + sname##_end = (end); \ + sname##_len = (end) - (start); \ + define_ftsec sname; \ + . = 0x0; \ +start_##sname: + +#define OPEN_TEXT_SECTION(start) \ + text_start = (start); \ + .section ".text","ax",@progbits; \ + . = 0x0; \ +start_text: + +#define ZERO_FIXED_SECTION(sname, start, end) \ + sname##_start = (start); \ + sname##_end = (end); \ + sname##_len = (end) - (start); \ + define_data_ftsec sname; \ + . = 0x0; \ + . = sname##_len; + +#define USE_FIXED_SECTION(sname) \ + fs_label = start_##sname; \ + fs_start = sname##_start; \ + use_ftsec sname; + +#define USE_TEXT_SECTION() \ + fs_label = start_text; \ + fs_start = text_start; \ + .text + +#define CLOSE_FIXED_SECTION(sname) \ + USE_FIXED_SECTION(sname); \ + . = sname##_len; \ +end_##sname: + + +#define __FIXED_SECTION_ENTRY_BEGIN(sname, name, __align) \ + USE_FIXED_SECTION(sname); \ + .align __align; \ + .global name; \ +name: + +#define FIXED_SECTION_ENTRY_BEGIN(sname, name) \ + __FIXED_SECTION_ENTRY_BEGIN(sname, name, 0) + +#define FIXED_SECTION_ENTRY_BEGIN_LOCATION(sname, name, start) \ + USE_FIXED_SECTION(sname); \ + name##_start = (start); \ + .if (start) < sname##_start; \ + .error "Fixed section underflow"; \ + .abort; \ + .endif; \ + . = (start) - sname##_start; \ + .global name; \ +name: + +#define FIXED_SECTION_ENTRY_END_LOCATION(sname, name, end) \ + .if (end) > sname##_end; \ + .error "Fixed section overflow"; \ + .abort; \ + .endif; \ + .if (. - name > end - name##_start); \ + .error "Fixed entry overflow"; \ + .abort; \ + .endif; \ + . = ((end) - sname##_start); \ + + +/* + * These macros are used to change symbols in other fixed sections to be + * absolute or related to our current fixed section. + * + * - DEFINE_FIXED_SYMBOL / FIXED_SYMBOL_ABS_ADDR is used to find the + * absolute address of a symbol within a fixed section, from any section. + * + * - ABS_ADDR is used to find the absolute address of any symbol, from within + * a fixed section. + */ +#define DEFINE_FIXED_SYMBOL(label) \ + label##_absolute = (label - fs_label + fs_start) + +#define FIXED_SYMBOL_ABS_ADDR(label) \ + (label##_absolute) + +#define ABS_ADDR(label) (label - fs_label + fs_start) + +/* + * Following are the BOOK3S exception handler helper macros. + * Handlers come in a number of types, and each type has a number of varieties. + * + * EXC_REAL_* - real, unrelocated exception vectors + * EXC_VIRT_* - virt (AIL), unrelocated exception vectors + * TRAMP_REAL_* - real, unrelocated helpers (virt can call these) + * TRAMP_VIRT_* - virt, unreloc helpers (in practice, real can use) + * TRAMP_KVM - KVM handlers that get put into real, unrelocated + * EXC_COMMON_* - virt, relocated common handlers + * + * The EXC handlers are given a name, and branch to name_common, or the + * appropriate KVM or masking function. Vector handler verieties are as + * follows: + * + * EXC_{REAL|VIRT}_BEGIN/END - used to open-code the exception + * + * EXC_{REAL|VIRT} - standard exception + * + * EXC_{REAL|VIRT}_suffix + * where _suffix is: + * - _MASKABLE - maskable exception + * - _OOL - out of line with trampoline to common handler + * - _HV - HV exception + * + * There can be combinations, e.g., EXC_VIRT_OOL_MASKABLE_HV + * + * The one unusual case is __EXC_REAL_OOL_HV_DIRECT, which is + * an OOL vector that branches to a specified handler rather than the usual + * trampoline that goes to common. It, and other underscore macros, should + * be used with care. + * + * KVM handlers come in the following verieties: + * TRAMP_KVM + * TRAMP_KVM_SKIP + * TRAMP_KVM_HV + * TRAMP_KVM_HV_SKIP + * + * COMMON handlers come in the following verieties: + * EXC_COMMON_BEGIN/END - used to open-code the handler + * EXC_COMMON + * EXC_COMMON_ASYNC + * EXC_COMMON_HV + * + * TRAMP_REAL and TRAMP_VIRT can be used with BEGIN/END. KVM + * and OOL handlers are implemented as types of TRAMP and TRAMP_VIRT handlers. + */ + +#define EXC_REAL_BEGIN(name, start, end) \ + FIXED_SECTION_ENTRY_BEGIN_LOCATION(real_vectors, exc_real_##start##_##name, start) + +#define EXC_REAL_END(name, start, end) \ + FIXED_SECTION_ENTRY_END_LOCATION(real_vectors, exc_real_##start##_##name, end) + +#define EXC_VIRT_BEGIN(name, start, end) \ + FIXED_SECTION_ENTRY_BEGIN_LOCATION(virt_vectors, exc_virt_##start##_##name, start) + +#define EXC_VIRT_END(name, start, end) \ + FIXED_SECTION_ENTRY_END_LOCATION(virt_vectors, exc_virt_##start##_##name, end) + +#define EXC_COMMON_BEGIN(name) \ + USE_TEXT_SECTION(); \ + .align 7; \ + .global name; \ + DEFINE_FIXED_SYMBOL(name); \ +name: + +#define TRAMP_REAL_BEGIN(name) \ + FIXED_SECTION_ENTRY_BEGIN(real_trampolines, name) + +#define TRAMP_VIRT_BEGIN(name) \ + FIXED_SECTION_ENTRY_BEGIN(virt_trampolines, name) + +#ifdef CONFIG_KVM_BOOK3S_64_HANDLER +#define TRAMP_KVM_BEGIN(name) \ + TRAMP_REAL_BEGIN(name) +#else +#define TRAMP_KVM_BEGIN(name) +#endif + +#define EXC_REAL_NONE(start, end) \ + FIXED_SECTION_ENTRY_BEGIN_LOCATION(real_vectors, exc_real_##start##_##unused, start); \ + FIXED_SECTION_ENTRY_END_LOCATION(real_vectors, exc_real_##start##_##unused, end) + +#define EXC_VIRT_NONE(start, end) \ + FIXED_SECTION_ENTRY_BEGIN_LOCATION(virt_vectors, exc_virt_##start##_##unused, start); \ + FIXED_SECTION_ENTRY_END_LOCATION(virt_vectors, exc_virt_##start##_##unused, end); + + +#define EXC_REAL(name, start, end) \ + EXC_REAL_BEGIN(name, start, end); \ + STD_EXCEPTION_PSERIES(start, name##_common); \ + EXC_REAL_END(name, start, end); + +#define EXC_VIRT(name, start, end, realvec) \ + EXC_VIRT_BEGIN(name, start, end); \ + STD_RELON_EXCEPTION_PSERIES(start, realvec, name##_common); \ + EXC_VIRT_END(name, start, end); + +#define EXC_REAL_MASKABLE(name, start, end) \ + EXC_REAL_BEGIN(name, start, end); \ + MASKABLE_EXCEPTION_PSERIES(start, start, name##_common); \ + EXC_REAL_END(name, start, end); + +#define EXC_VIRT_MASKABLE(name, start, end, realvec) \ + EXC_VIRT_BEGIN(name, start, end); \ + MASKABLE_RELON_EXCEPTION_PSERIES(start, realvec, name##_common); \ + EXC_VIRT_END(name, start, end); + +#define EXC_REAL_HV(name, start, end) \ + EXC_REAL_BEGIN(name, start, end); \ + STD_EXCEPTION_HV(start, start, name##_common); \ + EXC_REAL_END(name, start, end); + +#define EXC_VIRT_HV(name, start, end, realvec) \ + EXC_VIRT_BEGIN(name, start, end); \ + STD_RELON_EXCEPTION_HV(start, realvec, name##_common); \ + EXC_VIRT_END(name, start, end); + +#define __EXC_REAL_OOL(name, start, end) \ + EXC_REAL_BEGIN(name, start, end); \ + __OOL_EXCEPTION(start, label, tramp_real_##name); \ + EXC_REAL_END(name, start, end); + +#define __TRAMP_REAL_REAL_OOL(name, vec) \ + TRAMP_REAL_BEGIN(tramp_real_##name); \ + STD_EXCEPTION_PSERIES_OOL(vec, name##_common); \ + +#define EXC_REAL_OOL(name, start, end) \ + __EXC_REAL_OOL(name, start, end); \ + __TRAMP_REAL_REAL_OOL(name, start); + +#define __EXC_REAL_OOL_MASKABLE(name, start, end) \ + __EXC_REAL_OOL(name, start, end); + +#define __TRAMP_REAL_REAL_OOL_MASKABLE(name, vec) \ + TRAMP_REAL_BEGIN(tramp_real_##name); \ + MASKABLE_EXCEPTION_PSERIES_OOL(vec, name##_common); \ + +#define EXC_REAL_OOL_MASKABLE(name, start, end) \ + __EXC_REAL_OOL_MASKABLE(name, start, end); \ + __TRAMP_REAL_REAL_OOL_MASKABLE(name, start); + +#define __EXC_REAL_OOL_HV_DIRECT(name, start, end, handler) \ + EXC_REAL_BEGIN(name, start, end); \ + __OOL_EXCEPTION(start, label, handler); \ + EXC_REAL_END(name, start, end); + +#define __EXC_REAL_OOL_HV(name, start, end) \ + __EXC_REAL_OOL(name, start, end); + +#define __TRAMP_REAL_REAL_OOL_HV(name, vec) \ + TRAMP_REAL_BEGIN(tramp_real_##name); \ + STD_EXCEPTION_HV_OOL(vec, name##_common); \ + +#define EXC_REAL_OOL_HV(name, start, end) \ + __EXC_REAL_OOL_HV(name, start, end); \ + __TRAMP_REAL_REAL_OOL_HV(name, start); + +#define __EXC_REAL_OOL_MASKABLE_HV(name, start, end) \ + __EXC_REAL_OOL(name, start, end); + +#define __TRAMP_REAL_REAL_OOL_MASKABLE_HV(name, vec) \ + TRAMP_REAL_BEGIN(tramp_real_##name); \ + MASKABLE_EXCEPTION_HV_OOL(vec, name##_common); \ + +#define EXC_REAL_OOL_MASKABLE_HV(name, start, end) \ + __EXC_REAL_OOL_MASKABLE_HV(name, start, end); \ + __TRAMP_REAL_REAL_OOL_MASKABLE_HV(name, start); + +#define __EXC_VIRT_OOL(name, start, end) \ + EXC_VIRT_BEGIN(name, start, end); \ + __OOL_EXCEPTION(start, label, tramp_virt_##name); \ + EXC_VIRT_END(name, start, end); + +#define __TRAMP_REAL_VIRT_OOL(name, realvec) \ + TRAMP_VIRT_BEGIN(tramp_virt_##name); \ + STD_RELON_EXCEPTION_PSERIES_OOL(realvec, name##_common); \ + +#define EXC_VIRT_OOL(name, start, end, realvec) \ + __EXC_VIRT_OOL(name, start, end); \ + __TRAMP_REAL_VIRT_OOL(name, realvec); + +#define __EXC_VIRT_OOL_MASKABLE(name, start, end) \ + __EXC_VIRT_OOL(name, start, end); + +#define __TRAMP_REAL_VIRT_OOL_MASKABLE(name, realvec) \ + TRAMP_VIRT_BEGIN(tramp_virt_##name); \ + MASKABLE_RELON_EXCEPTION_PSERIES_OOL(realvec, name##_common); \ + +#define EXC_VIRT_OOL_MASKABLE(name, start, end, realvec) \ + __EXC_VIRT_OOL_MASKABLE(name, start, end); \ + __TRAMP_REAL_VIRT_OOL_MASKABLE(name, realvec); + +#define __EXC_VIRT_OOL_HV(name, start, end) \ + __EXC_VIRT_OOL(name, start, end); + +#define __TRAMP_REAL_VIRT_OOL_HV(name, realvec) \ + TRAMP_VIRT_BEGIN(tramp_virt_##name); \ + STD_RELON_EXCEPTION_HV_OOL(realvec, name##_common); \ + +#define EXC_VIRT_OOL_HV(name, start, end, realvec) \ + __EXC_VIRT_OOL_HV(name, start, end); \ + __TRAMP_REAL_VIRT_OOL_HV(name, realvec); + +#define __EXC_VIRT_OOL_MASKABLE_HV(name, start, end) \ + __EXC_VIRT_OOL(name, start, end); + +#define __TRAMP_REAL_VIRT_OOL_MASKABLE_HV(name, realvec) \ + TRAMP_VIRT_BEGIN(tramp_virt_##name); \ + MASKABLE_RELON_EXCEPTION_HV_OOL(realvec, name##_common); \ + +#define EXC_VIRT_OOL_MASKABLE_HV(name, start, end, realvec) \ + __EXC_VIRT_OOL_MASKABLE_HV(name, start, end); \ + __TRAMP_REAL_VIRT_OOL_MASKABLE_HV(name, realvec); + +#define TRAMP_KVM(area, n) \ + TRAMP_KVM_BEGIN(do_kvm_##n); \ + KVM_HANDLER(area, EXC_STD, n); \ + +#define TRAMP_KVM_SKIP(area, n) \ + TRAMP_KVM_BEGIN(do_kvm_##n); \ + KVM_HANDLER_SKIP(area, EXC_STD, n); \ + +/* + * HV variant exceptions get the 0x2 bit added to their trap number. + */ +#define TRAMP_KVM_HV(area, n) \ + TRAMP_KVM_BEGIN(do_kvm_H##n); \ + KVM_HANDLER(area, EXC_HV, n + 0x2); \ + +#define TRAMP_KVM_HV_SKIP(area, n) \ + TRAMP_KVM_BEGIN(do_kvm_H##n); \ + KVM_HANDLER_SKIP(area, EXC_HV, n + 0x2); \ + +#define EXC_COMMON(name, realvec, hdlr) \ + EXC_COMMON_BEGIN(name); \ + STD_EXCEPTION_COMMON(realvec, name, hdlr); \ + +#define EXC_COMMON_ASYNC(name, realvec, hdlr) \ + EXC_COMMON_BEGIN(name); \ + STD_EXCEPTION_COMMON_ASYNC(realvec, name, hdlr); \ + +#define EXC_COMMON_HV(name, realvec, hdlr) \ + EXC_COMMON_BEGIN(name); \ + STD_EXCEPTION_COMMON(realvec + 0x2, name, hdlr); \ + +#endif /* _ASM_POWERPC_HEAD_64_H */ diff --git a/arch/powerpc/include/asm/hmi.h b/arch/powerpc/include/asm/hmi.h index 88b4901ac4ee..85b7a1a21e22 100644 --- a/arch/powerpc/include/asm/hmi.h +++ b/arch/powerpc/include/asm/hmi.h @@ -21,7 +21,7 @@ #ifndef __ASM_PPC64_HMI_H__ #define __ASM_PPC64_HMI_H__ -#ifdef CONFIG_PPC_BOOK3S_64 +#ifdef CONFIG_KVM_BOOK3S_HV_POSSIBLE #define CORE_TB_RESYNC_REQ_BIT 63 #define MAX_SUBCORE_PER_CORE 4 diff --git a/arch/powerpc/include/asm/io.h b/arch/powerpc/include/asm/io.h index 2fd1690b79d2..f6fda8482f60 100644 --- a/arch/powerpc/include/asm/io.h +++ b/arch/powerpc/include/asm/io.h @@ -241,6 +241,35 @@ static inline void out_be64(volatile u64 __iomem *addr, u64 val) #endif #endif /* __powerpc64__ */ + +/* + * Simple Cache inhibited accessors + * Unlike the DEF_MMIO_* macros, these don't include any h/w memory + * barriers, callers need to manage memory barriers on their own. + * These can only be used in hypervisor real mode. + */ + +static inline u32 _lwzcix(unsigned long addr) +{ + u32 ret; + + __asm__ __volatile__("lwzcix %0,0, %1" + : "=r" (ret) : "r" (addr) : "memory"); + return ret; +} + +static inline void _stbcix(u64 addr, u8 val) +{ + __asm__ __volatile__("stbcix %0,0,%1" + : : "r" (val), "r" (addr) : "memory"); +} + +static inline void _stwcix(u64 addr, u32 val) +{ + __asm__ __volatile__("stwcix %0,0,%1" + : : "r" (val), "r" (addr) : "memory"); +} + /* * Low level IO stream instructions are defined out of line for now */ diff --git a/arch/powerpc/include/asm/kvm_asm.h b/arch/powerpc/include/asm/kvm_asm.h index 5bca220bbb60..05cabed3d1bd 100644 --- a/arch/powerpc/include/asm/kvm_asm.h +++ b/arch/powerpc/include/asm/kvm_asm.h @@ -105,6 +105,15 @@ #define BOOK3S_INTERRUPT_FAC_UNAVAIL 0xf60 #define BOOK3S_INTERRUPT_H_FAC_UNAVAIL 0xf80 +/* book3s_hv */ + +/* + * Special trap used to indicate to host that this is a + * passthrough interrupt that could not be handled + * completely in the guest. + */ +#define BOOK3S_INTERRUPT_HV_RM_HARD 0x5555 + #define BOOK3S_IRQPRIO_SYSTEM_RESET 0 #define BOOK3S_IRQPRIO_DATA_SEGMENT 1 #define BOOK3S_IRQPRIO_INST_SEGMENT 2 @@ -136,6 +145,7 @@ #define RESUME_FLAG_NV (1<<0) /* Reload guest nonvolatile state? */ #define RESUME_FLAG_HOST (1<<1) /* Resume host? */ #define RESUME_FLAG_ARCH1 (1<<2) +#define RESUME_FLAG_ARCH2 (1<<3) #define RESUME_GUEST 0 #define RESUME_GUEST_NV RESUME_FLAG_NV diff --git a/arch/powerpc/include/asm/kvm_book3s.h b/arch/powerpc/include/asm/kvm_book3s.h index 8f39796c9da8..5cf306ae0ac3 100644 --- a/arch/powerpc/include/asm/kvm_book3s.h +++ b/arch/powerpc/include/asm/kvm_book3s.h @@ -69,6 +69,43 @@ struct hpte_cache { int pagesize; }; +/* + * Struct for a virtual core. + * Note: entry_exit_map combines a bitmap of threads that have entered + * in the bottom 8 bits and a bitmap of threads that have exited in the + * next 8 bits. This is so that we can atomically set the entry bit + * iff the exit map is 0 without taking a lock. + */ +struct kvmppc_vcore { + int n_runnable; + int num_threads; + int entry_exit_map; + int napping_threads; + int first_vcpuid; + u16 pcpu; + u16 last_cpu; + u8 vcore_state; + u8 in_guest; + struct kvmppc_vcore *master_vcore; + struct kvm_vcpu *runnable_threads[MAX_SMT_THREADS]; + struct list_head preempt_list; + spinlock_t lock; + struct swait_queue_head wq; + spinlock_t stoltb_lock; /* protects stolen_tb and preempt_tb */ + u64 stolen_tb; + u64 preempt_tb; + struct kvm_vcpu *runner; + struct kvm *kvm; + u64 tb_offset; /* guest timebase - host timebase */ + ulong lpcr; + u32 arch_compat; + ulong pcr; + ulong dpdes; /* doorbell state (POWER8) */ + ulong vtb; /* virtual timebase */ + ulong conferring_threads; + unsigned int halt_poll_ns; +}; + struct kvmppc_vcpu_book3s { struct kvmppc_sid_map sid_map[SID_MAP_NUM]; struct { @@ -83,6 +120,7 @@ struct kvmppc_vcpu_book3s { u64 sdr1; u64 hior; u64 msr_mask; + u64 vtb; #ifdef CONFIG_PPC_BOOK3S_32 u32 vsid_pool[VSID_POOL_SIZE]; u32 vsid_next; @@ -191,6 +229,7 @@ extern void kvmppc_copy_to_svcpu(struct kvmppc_book3s_shadow_vcpu *svcpu, struct kvm_vcpu *vcpu); extern void kvmppc_copy_from_svcpu(struct kvm_vcpu *vcpu, struct kvmppc_book3s_shadow_vcpu *svcpu); +extern int kvm_irq_bypass; static inline struct kvmppc_vcpu_book3s *to_book3s(struct kvm_vcpu *vcpu) { diff --git a/arch/powerpc/include/asm/kvm_book3s_64.h b/arch/powerpc/include/asm/kvm_book3s_64.h index 88d17b4ea9c8..848292176908 100644 --- a/arch/powerpc/include/asm/kvm_book3s_64.h +++ b/arch/powerpc/include/asm/kvm_book3s_64.h @@ -20,6 +20,8 @@ #ifndef __ASM_KVM_BOOK3S_64_H__ #define __ASM_KVM_BOOK3S_64_H__ +#include <asm/book3s/64/mmu-hash.h> + #ifdef CONFIG_KVM_BOOK3S_PR_POSSIBLE static inline struct kvmppc_book3s_shadow_vcpu *svcpu_get(struct kvm_vcpu *vcpu) { @@ -97,56 +99,20 @@ static inline void __unlock_hpte(__be64 *hpte, unsigned long hpte_v) hpte[0] = cpu_to_be64(hpte_v); } -static inline int __hpte_actual_psize(unsigned int lp, int psize) -{ - int i, shift; - unsigned int mask; - - /* start from 1 ignoring MMU_PAGE_4K */ - for (i = 1; i < MMU_PAGE_COUNT; i++) { - - /* invalid penc */ - if (mmu_psize_defs[psize].penc[i] == -1) - continue; - /* - * encoding bits per actual page size - * PTE LP actual page size - * rrrr rrrz >=8KB - * rrrr rrzz >=16KB - * rrrr rzzz >=32KB - * rrrr zzzz >=64KB - * ....... - */ - shift = mmu_psize_defs[i].shift - LP_SHIFT; - if (shift > LP_BITS) - shift = LP_BITS; - mask = (1 << shift) - 1; - if ((lp & mask) == mmu_psize_defs[psize].penc[i]) - return i; - } - return -1; -} - static inline unsigned long compute_tlbie_rb(unsigned long v, unsigned long r, unsigned long pte_index) { - int b_psize = MMU_PAGE_4K, a_psize = MMU_PAGE_4K; + int i, b_psize = MMU_PAGE_4K, a_psize = MMU_PAGE_4K; unsigned int penc; unsigned long rb = 0, va_low, sllp; unsigned int lp = (r >> LP_SHIFT) & ((1 << LP_BITS) - 1); if (v & HPTE_V_LARGE) { - for (b_psize = 0; b_psize < MMU_PAGE_COUNT; b_psize++) { - - /* valid entries have a shift value */ - if (!mmu_psize_defs[b_psize].shift) - continue; - - a_psize = __hpte_actual_psize(lp, b_psize); - if (a_psize != -1) - break; - } + i = hpte_page_sizes[lp]; + b_psize = i & 0xf; + a_psize = i >> 4; } + /* * Ignore the top 14 bits of va * v have top two bits covering segment size, hence move @@ -159,7 +125,6 @@ static inline unsigned long compute_tlbie_rb(unsigned long v, unsigned long r, /* This covers 14..54 bits of va*/ rb = (v & ~0x7fUL) << 16; /* AVA field */ - rb |= (v >> HPTE_V_SSIZE_SHIFT) << 8; /* B field */ /* * AVA in v had cleared lower 23 bits. We need to derive * that from pteg index @@ -211,49 +176,10 @@ static inline unsigned long compute_tlbie_rb(unsigned long v, unsigned long r, break; } } - rb |= (v >> 54) & 0x300; /* B field */ + rb |= (v >> HPTE_V_SSIZE_SHIFT) << 8; /* B field */ return rb; } -static inline unsigned long __hpte_page_size(unsigned long h, unsigned long l, - bool is_base_size) -{ - - int size, a_psize; - /* Look at the 8 bit LP value */ - unsigned int lp = (l >> LP_SHIFT) & ((1 << LP_BITS) - 1); - - /* only handle 4k, 64k and 16M pages for now */ - if (!(h & HPTE_V_LARGE)) - return 1ul << 12; - else { - for (size = 0; size < MMU_PAGE_COUNT; size++) { - /* valid entries have a shift value */ - if (!mmu_psize_defs[size].shift) - continue; - - a_psize = __hpte_actual_psize(lp, size); - if (a_psize != -1) { - if (is_base_size) - return 1ul << mmu_psize_defs[size].shift; - return 1ul << mmu_psize_defs[a_psize].shift; - } - } - - } - return 0; -} - -static inline unsigned long hpte_page_size(unsigned long h, unsigned long l) -{ - return __hpte_page_size(h, l, 0); -} - -static inline unsigned long hpte_base_page_size(unsigned long h, unsigned long l) -{ - return __hpte_page_size(h, l, 1); -} - static inline unsigned long hpte_rpn(unsigned long ptel, unsigned long psize) { return ((ptel & HPTE_R_RPN) & ~(psize - 1)) >> PAGE_SHIFT; diff --git a/arch/powerpc/include/asm/kvm_host.h b/arch/powerpc/include/asm/kvm_host.h index ec35af34a3fb..28350a294b1e 100644 --- a/arch/powerpc/include/asm/kvm_host.h +++ b/arch/powerpc/include/asm/kvm_host.h @@ -43,6 +43,8 @@ #include <asm/cputhreads.h> #define KVM_MAX_VCPU_ID (threads_per_subcore * KVM_MAX_VCORES) +#define __KVM_HAVE_ARCH_INTC_INITIALIZED + #ifdef CONFIG_KVM_MMIO #define KVM_COALESCED_MMIO_PAGE_OFFSET 1 #endif @@ -95,42 +97,49 @@ struct kvmppc_vcpu_book3s; struct kvmppc_book3s_shadow_vcpu; struct kvm_vm_stat { - u32 remote_tlb_flush; + ulong remote_tlb_flush; }; struct kvm_vcpu_stat { - u32 sum_exits; - u32 mmio_exits; - u32 signal_exits; - u32 light_exits; + u64 sum_exits; + u64 mmio_exits; + u64 signal_exits; + u64 light_exits; /* Account for special types of light exits: */ - u32 itlb_real_miss_exits; - u32 itlb_virt_miss_exits; - u32 dtlb_real_miss_exits; - u32 dtlb_virt_miss_exits; - u32 syscall_exits; - u32 isi_exits; - u32 dsi_exits; - u32 emulated_inst_exits; - u32 dec_exits; - u32 ext_intr_exits; - u32 halt_successful_poll; - u32 halt_attempted_poll; - u32 halt_poll_invalid; - u32 halt_wakeup; - u32 dbell_exits; - u32 gdbell_exits; - u32 ld; - u32 st; + u64 itlb_real_miss_exits; + u64 itlb_virt_miss_exits; + u64 dtlb_real_miss_exits; + u64 dtlb_virt_miss_exits; + u64 syscall_exits; + u64 isi_exits; + u64 dsi_exits; + u64 emulated_inst_exits; + u64 dec_exits; + u64 ext_intr_exits; + u64 halt_poll_success_ns; + u64 halt_poll_fail_ns; + u64 halt_wait_ns; + u64 halt_successful_poll; + u64 halt_attempted_poll; + u64 halt_successful_wait; + u64 halt_poll_invalid; + u64 halt_wakeup; + u64 dbell_exits; + u64 gdbell_exits; + u64 ld; + u64 st; #ifdef CONFIG_PPC_BOOK3S - u32 pf_storage; - u32 pf_instruc; - u32 sp_storage; - u32 sp_instruc; - u32 queue_intr; - u32 ld_slow; - u32 st_slow; + u64 pf_storage; + u64 pf_instruc; + u64 sp_storage; + u64 sp_instruc; + u64 queue_intr; + u64 ld_slow; + u64 st_slow; #endif + u64 pthru_all; + u64 pthru_host; + u64 pthru_bad_aff; }; enum kvm_exit_types { @@ -197,6 +206,8 @@ struct kvmppc_spapr_tce_table { struct kvmppc_xics; struct kvmppc_icp; +struct kvmppc_passthru_irqmap; + /* * The reverse mapping array has one entry for each HPTE, * which stores the guest's view of the second word of the HPTE @@ -267,6 +278,7 @@ struct kvm_arch { #endif #ifdef CONFIG_KVM_XICS struct kvmppc_xics *xics; + struct kvmppc_passthru_irqmap *pimap; #endif struct kvmppc_ops *kvm_ops; #ifdef CONFIG_KVM_BOOK3S_HV_POSSIBLE @@ -275,41 +287,6 @@ struct kvm_arch { #endif }; -/* - * Struct for a virtual core. - * Note: entry_exit_map combines a bitmap of threads that have entered - * in the bottom 8 bits and a bitmap of threads that have exited in the - * next 8 bits. This is so that we can atomically set the entry bit - * iff the exit map is 0 without taking a lock. - */ -struct kvmppc_vcore { - int n_runnable; - int num_threads; - int entry_exit_map; - int napping_threads; - int first_vcpuid; - u16 pcpu; - u16 last_cpu; - u8 vcore_state; - u8 in_guest; - struct kvmppc_vcore *master_vcore; - struct list_head runnable_threads; - struct list_head preempt_list; - spinlock_t lock; - struct swait_queue_head wq; - spinlock_t stoltb_lock; /* protects stolen_tb and preempt_tb */ - u64 stolen_tb; - u64 preempt_tb; - struct kvm_vcpu *runner; - struct kvm *kvm; - u64 tb_offset; /* guest timebase - host timebase */ - ulong lpcr; - u32 arch_compat; - ulong pcr; - ulong dpdes; /* doorbell state (POWER8) */ - ulong conferring_threads; -}; - #define VCORE_ENTRY_MAP(vc) ((vc)->entry_exit_map & 0xff) #define VCORE_EXIT_MAP(vc) ((vc)->entry_exit_map >> 8) #define VCORE_IS_EXITING(vc) (VCORE_EXIT_MAP(vc) != 0) @@ -329,6 +306,7 @@ struct kvmppc_vcore { #define VCORE_SLEEPING 3 #define VCORE_RUNNING 4 #define VCORE_EXITING 5 +#define VCORE_POLLING 6 /* * Struct used to manage memory for a virtual processor area @@ -397,6 +375,20 @@ struct kvmhv_tb_accumulator { u64 tb_max; /* max time */ }; +#ifdef CONFIG_PPC_BOOK3S_64 +struct kvmppc_irq_map { + u32 r_hwirq; + u32 v_hwirq; + struct irq_desc *desc; +}; + +#define KVMPPC_PIRQ_MAPPED 1024 +struct kvmppc_passthru_irqmap { + int n_mapped; + struct kvmppc_irq_map mapped[KVMPPC_PIRQ_MAPPED]; +}; +#endif + # ifdef CONFIG_PPC_FSL_BOOK3E #define KVMPPC_BOOKE_IAC_NUM 2 #define KVMPPC_BOOKE_DAC_NUM 2 @@ -483,7 +475,6 @@ struct kvm_vcpu_arch { ulong purr; ulong spurr; ulong ic; - ulong vtb; ulong dscr; ulong amr; ulong uamor; @@ -668,7 +659,6 @@ struct kvm_vcpu_arch { long pgfault_index; unsigned long pgfault_hpte[2]; - struct list_head run_list; struct task_struct *run_task; struct kvm_run *kvm_run; diff --git a/arch/powerpc/include/asm/kvm_ppc.h b/arch/powerpc/include/asm/kvm_ppc.h index 2544edabe7f3..f6e49640dbe1 100644 --- a/arch/powerpc/include/asm/kvm_ppc.h +++ b/arch/powerpc/include/asm/kvm_ppc.h @@ -287,6 +287,10 @@ struct kvmppc_ops { long (*arch_vm_ioctl)(struct file *filp, unsigned int ioctl, unsigned long arg); int (*hcall_implemented)(unsigned long hcall); + int (*irq_bypass_add_producer)(struct irq_bypass_consumer *, + struct irq_bypass_producer *); + void (*irq_bypass_del_producer)(struct irq_bypass_consumer *, + struct irq_bypass_producer *); }; extern struct kvmppc_ops *kvmppc_hv_ops; @@ -453,8 +457,19 @@ static inline int kvmppc_xics_enabled(struct kvm_vcpu *vcpu) { return vcpu->arch.irq_type == KVMPPC_IRQ_XICS; } + +static inline struct kvmppc_passthru_irqmap *kvmppc_get_passthru_irqmap( + struct kvm *kvm) +{ + if (kvm && kvm_irq_bypass) + return kvm->arch.pimap; + return NULL; +} + extern void kvmppc_alloc_host_rm_ops(void); extern void kvmppc_free_host_rm_ops(void); +extern void kvmppc_free_pimap(struct kvm *kvm); +extern int kvmppc_xics_rm_complete(struct kvm_vcpu *vcpu, u32 hcall); extern void kvmppc_xics_free_icp(struct kvm_vcpu *vcpu); extern int kvmppc_xics_create_icp(struct kvm_vcpu *vcpu, unsigned long server); extern int kvm_vm_ioctl_xics_irq(struct kvm *kvm, struct kvm_irq_level *args); @@ -464,10 +479,23 @@ extern int kvmppc_xics_set_icp(struct kvm_vcpu *vcpu, u64 icpval); extern int kvmppc_xics_connect_vcpu(struct kvm_device *dev, struct kvm_vcpu *vcpu, u32 cpu); extern void kvmppc_xics_ipi_action(void); +extern void kvmppc_xics_set_mapped(struct kvm *kvm, unsigned long guest_irq, + unsigned long host_irq); +extern void kvmppc_xics_clr_mapped(struct kvm *kvm, unsigned long guest_irq, + unsigned long host_irq); +extern long kvmppc_deliver_irq_passthru(struct kvm_vcpu *vcpu, u32 xirr, + struct kvmppc_irq_map *irq_map, + struct kvmppc_passthru_irqmap *pimap); extern int h_ipi_redirect; #else +static inline struct kvmppc_passthru_irqmap *kvmppc_get_passthru_irqmap( + struct kvm *kvm) + { return NULL; } static inline void kvmppc_alloc_host_rm_ops(void) {}; static inline void kvmppc_free_host_rm_ops(void) {}; +static inline void kvmppc_free_pimap(struct kvm *kvm) {}; +static inline int kvmppc_xics_rm_complete(struct kvm_vcpu *vcpu, u32 hcall) + { return 0; } static inline int kvmppc_xics_enabled(struct kvm_vcpu *vcpu) { return 0; } static inline void kvmppc_xics_free_icp(struct kvm_vcpu *vcpu) { } diff --git a/arch/powerpc/include/asm/machdep.h b/arch/powerpc/include/asm/machdep.h index 0420b388dd83..e02cbc6a6c70 100644 --- a/arch/powerpc/include/asm/machdep.h +++ b/arch/powerpc/include/asm/machdep.h @@ -61,7 +61,7 @@ struct machdep_calls { void (*init_IRQ)(void); - /* Return an irq, or NO_IRQ to indicate there are none pending. */ + /* Return an irq, or 0 to indicate there are none pending. */ unsigned int (*get_irq)(void); /* PCI stuff */ diff --git a/arch/powerpc/include/asm/mmu-book3e.h b/arch/powerpc/include/asm/mmu-book3e.h index cd4f04a74802..b62a8d43a06c 100644 --- a/arch/powerpc/include/asm/mmu-book3e.h +++ b/arch/powerpc/include/asm/mmu-book3e.h @@ -313,6 +313,9 @@ extern int book3e_htw_mode; * return 1, indicating that the tlb requires preloading. */ #define HUGETLB_NEED_PRELOAD + +#define mmu_cleanup_all NULL + #endif #endif /* !__ASSEMBLY__ */ diff --git a/arch/powerpc/include/asm/mmu.h b/arch/powerpc/include/asm/mmu.h index e2fb408f8398..e88368354e49 100644 --- a/arch/powerpc/include/asm/mmu.h +++ b/arch/powerpc/include/asm/mmu.h @@ -204,6 +204,10 @@ extern unsigned int __start___mmu_ftr_fixup, __stop___mmu_ftr_fixup; * make it match the size our of bolted TLB area */ extern u64 ppc64_rma_size; + +/* Cleanup function used by kexec */ +extern void mmu_cleanup_all(void); +extern void radix__mmu_cleanup_all(void); #endif /* CONFIG_PPC64 */ struct mm_struct; @@ -271,6 +275,7 @@ static inline bool early_radix_enabled(void) #define MMU_PAGE_16G 13 #define MMU_PAGE_64G 14 +/* N.B. we need to change the type of hpte_page_sizes if this gets to be > 16 */ #define MMU_PAGE_COUNT 15 #ifdef CONFIG_PPC_BOOK3S_64 diff --git a/arch/powerpc/include/asm/mmu_context.h b/arch/powerpc/include/asm/mmu_context.h index 9d2cd0c36ec2..5c451140660a 100644 --- a/arch/powerpc/include/asm/mmu_context.h +++ b/arch/powerpc/include/asm/mmu_context.h @@ -18,6 +18,7 @@ extern void destroy_context(struct mm_struct *mm); #ifdef CONFIG_SPAPR_TCE_IOMMU struct mm_iommu_table_group_mem_t; +extern int isolate_lru_page(struct page *page); /* from internal.h */ extern bool mm_iommu_preregistered(void); extern long mm_iommu_get(unsigned long ua, unsigned long entries, struct mm_iommu_table_group_mem_t **pmem); @@ -71,7 +72,8 @@ static inline void switch_mm(struct mm_struct *prev, struct mm_struct *next, struct task_struct *tsk) { /* Mark this context has been used on the new CPU */ - cpumask_set_cpu(smp_processor_id(), mm_cpumask(next)); + if (!cpumask_test_cpu(smp_processor_id(), mm_cpumask(next))) + cpumask_set_cpu(smp_processor_id(), mm_cpumask(next)); /* 32-bit keeps track of the current PGDIR in the thread struct */ #ifdef CONFIG_PPC32 diff --git a/arch/powerpc/include/asm/mmzone.h b/arch/powerpc/include/asm/mmzone.h index 7b589178be46..4d52ccfc2366 100644 --- a/arch/powerpc/include/asm/mmzone.h +++ b/arch/powerpc/include/asm/mmzone.h @@ -41,6 +41,9 @@ u64 memory_hotplug_max(void); #else #define memory_hotplug_max() memblock_end_of_DRAM() #endif /* CONFIG_NEED_MULTIPLE_NODES */ +#ifdef CONFIG_FA_DUMP +#define __HAVE_ARCH_RESERVED_KERNEL_PAGES +#endif #endif /* __KERNEL__ */ #endif /* _ASM_MMZONE_H_ */ diff --git a/arch/powerpc/include/asm/mpic_msgr.h b/arch/powerpc/include/asm/mpic_msgr.h index d4f471fb1031..088420d8aa59 100644 --- a/arch/powerpc/include/asm/mpic_msgr.h +++ b/arch/powerpc/include/asm/mpic_msgr.h @@ -122,9 +122,9 @@ static inline void mpic_msgr_set_destination(struct mpic_msgr *msgr, * @msgr: the message register whose IRQ is to be returned * * Returns the IRQ number associated with the given message register. - * NO_IRQ is returned if this message register is not capable of - * receiving interrupts. What message register can and cannot receive - * interrupts is specified in the device tree for the system. + * 0 is returned if this message register is not capable of receiving + * interrupts. What message register can and cannot receive interrupts is + * specified in the device tree for the system. */ static inline int mpic_msgr_get_irq(struct mpic_msgr *msgr) { diff --git a/arch/powerpc/include/asm/nohash/32/pgtable.h b/arch/powerpc/include/asm/nohash/32/pgtable.h index 780847597514..c219ef7be53b 100644 --- a/arch/powerpc/include/asm/nohash/32/pgtable.h +++ b/arch/powerpc/include/asm/nohash/32/pgtable.h @@ -267,7 +267,8 @@ static inline void huge_ptep_set_wrprotect(struct mm_struct *mm, } -static inline void __ptep_set_access_flags(pte_t *ptep, pte_t entry) +static inline void __ptep_set_access_flags(struct mm_struct *mm, + pte_t *ptep, pte_t entry) { unsigned long set = pte_val(entry) & (_PAGE_DIRTY | _PAGE_ACCESSED | _PAGE_RW | _PAGE_EXEC); diff --git a/arch/powerpc/include/asm/nohash/64/pgtable.h b/arch/powerpc/include/asm/nohash/64/pgtable.h index d4d808cf905e..653a1838469d 100644 --- a/arch/powerpc/include/asm/nohash/64/pgtable.h +++ b/arch/powerpc/include/asm/nohash/64/pgtable.h @@ -300,7 +300,8 @@ static inline void pte_clear(struct mm_struct *mm, unsigned long addr, /* Set the dirty and/or accessed bits atomically in a linux PTE, this * function doesn't need to flush the hash entry */ -static inline void __ptep_set_access_flags(pte_t *ptep, pte_t entry) +static inline void __ptep_set_access_flags(struct mm_struct *mm, + pte_t *ptep, pte_t entry) { unsigned long bits = pte_val(entry) & (_PAGE_DIRTY | _PAGE_ACCESSED | _PAGE_RW | _PAGE_EXEC); diff --git a/arch/powerpc/include/asm/opal.h b/arch/powerpc/include/asm/opal.h index ee05bd203630..e958b7096f19 100644 --- a/arch/powerpc/include/asm/opal.h +++ b/arch/powerpc/include/asm/opal.h @@ -67,6 +67,7 @@ int64_t opal_pci_config_write_half_word(uint64_t phb_id, uint64_t bus_dev_func, int64_t opal_pci_config_write_word(uint64_t phb_id, uint64_t bus_dev_func, uint64_t offset, uint32_t data); int64_t opal_set_xive(uint32_t isn, uint16_t server, uint8_t priority); +int64_t opal_rm_set_xive(uint32_t isn, uint16_t server, uint8_t priority); int64_t opal_get_xive(uint32_t isn, __be16 *server, uint8_t *priority); int64_t opal_register_exception_handler(uint64_t opal_exception, uint64_t handler_address, diff --git a/arch/powerpc/include/asm/paca.h b/arch/powerpc/include/asm/paca.h index 148303e7771f..6a6792bb39fb 100644 --- a/arch/powerpc/include/asm/paca.h +++ b/arch/powerpc/include/asm/paca.h @@ -183,11 +183,6 @@ struct paca_struct { */ u16 in_mce; u8 hmi_event_available; /* HMI event is available */ - /* - * Bitmap for sibling subcore status. See kvm/book3s_hv_ras.c for - * more details - */ - struct sibling_subcore_state *sibling_subcore_state; #endif /* Stuff for accurate time accounting */ @@ -202,6 +197,13 @@ struct paca_struct { struct kvmppc_book3s_shadow_vcpu shadow_vcpu; #endif struct kvmppc_host_state kvm_hstate; +#ifdef CONFIG_KVM_BOOK3S_HV_POSSIBLE + /* + * Bitmap for sibling subcore status. See kvm/book3s_hv_ras.c for + * more details + */ + struct sibling_subcore_state *sibling_subcore_state; +#endif #endif }; diff --git a/arch/powerpc/include/asm/parport.h b/arch/powerpc/include/asm/parport.h index a452968b29ea..6595ad1d18cc 100644 --- a/arch/powerpc/include/asm/parport.h +++ b/arch/powerpc/include/asm/parport.h @@ -28,7 +28,7 @@ static int parport_pc_find_nonpci_ports (int autoirq, int autodma) io1 = prop[1]; io2 = prop[2]; virq = irq_of_parse_and_map(np, 0); - if (virq == NO_IRQ) + if (!virq) continue; if (parport_pc_probe_port(io1, io2, virq, autodma, NULL, 0) diff --git a/arch/powerpc/include/asm/pci-bridge.h b/arch/powerpc/include/asm/pci-bridge.h index b5e88e4a171a..c0309c59bed8 100644 --- a/arch/powerpc/include/asm/pci-bridge.h +++ b/arch/powerpc/include/asm/pci-bridge.h @@ -301,6 +301,7 @@ extern void pci_process_bridge_OF_ranges(struct pci_controller *hose, /* Allocate & free a PCI host bridge structure */ extern struct pci_controller *pcibios_alloc_controller(struct device_node *dev); extern void pcibios_free_controller(struct pci_controller *phb); +extern void pcibios_free_controller_deferred(struct pci_host_bridge *bridge); #ifdef CONFIG_PCI extern int pcibios_vaddr_is_ioport(void __iomem *address); diff --git a/arch/powerpc/include/asm/pnv-pci.h b/arch/powerpc/include/asm/pnv-pci.h index 0cbd8134ce81..696438f09aea 100644 --- a/arch/powerpc/include/asm/pnv-pci.h +++ b/arch/powerpc/include/asm/pnv-pci.h @@ -12,10 +12,11 @@ #include <linux/pci.h> #include <linux/pci_hotplug.h> +#include <linux/irq.h> #include <misc/cxl-base.h> #include <asm/opal-api.h> -#define PCI_SLOT_ID_PREFIX 0x8000000000000000 +#define PCI_SLOT_ID_PREFIX (1UL << 63) #define PCI_SLOT_ID(phb_id, bdfn) \ (PCI_SLOT_ID_PREFIX | ((uint64_t)(bdfn) << 16) | (phb_id)) @@ -33,6 +34,8 @@ int pnv_cxl_alloc_hwirqs(struct pci_dev *dev, int num); void pnv_cxl_release_hwirqs(struct pci_dev *dev, int hwirq, int num); int pnv_cxl_get_irq_count(struct pci_dev *dev); struct device_node *pnv_pci_get_phb_node(struct pci_dev *dev); +int64_t pnv_opal_pci_msi_eoi(struct irq_chip *chip, unsigned int hw_irq); +bool is_pnv_opal_msi(struct irq_chip *chip); #ifdef CONFIG_CXL_BASE int pnv_cxl_alloc_hwirq_ranges(struct cxl_irq_ranges *irqs, @@ -60,6 +63,8 @@ struct pnv_php_slot { #define PNV_PHP_STATE_POPULATED 2 #define PNV_PHP_STATE_OFFLINE 3 int state; + int irq; + struct workqueue_struct *wq; struct device_node *dn; struct pci_dev *pdev; struct pci_bus *bus; diff --git a/arch/powerpc/include/asm/ppc-opcode.h b/arch/powerpc/include/asm/ppc-opcode.h index 127ebf5862b4..54ff8ce7fa96 100644 --- a/arch/powerpc/include/asm/ppc-opcode.h +++ b/arch/powerpc/include/asm/ppc-opcode.h @@ -236,6 +236,7 @@ #define PPC_INST_STWU 0x94000000 #define PPC_INST_MFLR 0x7c0802a6 #define PPC_INST_MTLR 0x7c0803a6 +#define PPC_INST_MTCTR 0x7c0903a6 #define PPC_INST_CMPWI 0x2c000000 #define PPC_INST_CMPDI 0x2c200000 #define PPC_INST_CMPW 0x7c000000 @@ -250,6 +251,7 @@ #define PPC_INST_SUB 0x7c000050 #define PPC_INST_BLR 0x4e800020 #define PPC_INST_BLRL 0x4e800021 +#define PPC_INST_BCTR 0x4e800420 #define PPC_INST_MULLD 0x7c0001d2 #define PPC_INST_MULLW 0x7c0001d6 #define PPC_INST_MULHWU 0x7c000016 diff --git a/arch/powerpc/include/asm/ppc_asm.h b/arch/powerpc/include/asm/ppc_asm.h index d5d5b5e348f2..c73750b0d9fa 100644 --- a/arch/powerpc/include/asm/ppc_asm.h +++ b/arch/powerpc/include/asm/ppc_asm.h @@ -201,14 +201,12 @@ END_FW_FTR_SECTION_IFSET(FW_FEATURE_SPLPAR) #ifdef PPC64_ELF_ABI_v2 #define _GLOBAL(name) \ - .section ".text"; \ .align 2 ; \ .type name,@function; \ .globl name; \ name: #define _GLOBAL_TOC(name) \ - .section ".text"; \ .align 2 ; \ .type name,@function; \ .globl name; \ @@ -217,13 +215,6 @@ name: \ addi r2,r2,(.TOC.-0b)@l; \ .localentry name,.-name -#define _KPROBE(name) \ - .section ".kprobes.text","a"; \ - .align 2 ; \ - .type name,@function; \ - .globl name; \ -name: - #define DOTSYM(a) a #else @@ -232,35 +223,20 @@ name: #define GLUE(a,b) XGLUE(a,b) #define _GLOBAL(name) \ - .section ".text"; \ .align 2 ; \ .globl name; \ .globl GLUE(.,name); \ - .section ".opd","aw"; \ + .pushsection ".opd","aw"; \ name: \ .quad GLUE(.,name); \ .quad .TOC.@tocbase; \ .quad 0; \ - .previous; \ + .popsection; \ .type GLUE(.,name),@function; \ GLUE(.,name): #define _GLOBAL_TOC(name) _GLOBAL(name) -#define _KPROBE(name) \ - .section ".kprobes.text","a"; \ - .align 2 ; \ - .globl name; \ - .globl GLUE(.,name); \ - .section ".opd","aw"; \ -name: \ - .quad GLUE(.,name); \ - .quad .TOC.@tocbase; \ - .quad 0; \ - .previous; \ - .type GLUE(.,name),@function; \ -GLUE(.,name): - #define DOTSYM(a) GLUE(.,a) #endif @@ -272,20 +248,28 @@ GLUE(.,name): n: #define _GLOBAL(n) \ - .text; \ .stabs __stringify(n:F-1),N_FUN,0,0,n;\ .globl n; \ n: #define _GLOBAL_TOC(name) _GLOBAL(name) -#define _KPROBE(n) \ - .section ".kprobes.text","a"; \ - .globl n; \ -n: - #endif +/* + * __kprobes (the C annotation) puts the symbol into the .kprobes.text + * section, which gets emitted at the end of regular text. + * + * _ASM_NOKPROBE_SYMBOL and NOKPROBE_SYMBOL just adds the symbol to + * a blacklist. The former is for core kprobe functions/data, the + * latter is for those that incdentially must be excluded from probing + * and allows them to be linked at more optimal location within text. + */ +#define _ASM_NOKPROBE_SYMBOL(entry) \ + .pushsection "_kprobe_blacklist","aw"; \ + PPC_LONG (entry) ; \ + .popsection + #define FUNC_START(name) _GLOBAL(name) #define FUNC_END(name) @@ -527,7 +511,6 @@ END_FTR_SECTION_IFCLR(CPU_FTR_601) #endif #define MTMSRD(r) mtmsr r #define MTMSR_EERI(reg) mtmsr reg -#define CLR_TOP32(r) #endif #endif /* __KERNEL__ */ diff --git a/arch/powerpc/include/asm/processor.h b/arch/powerpc/include/asm/processor.h index 68e3bf57b027..c07c31b0e89e 100644 --- a/arch/powerpc/include/asm/processor.h +++ b/arch/powerpc/include/asm/processor.h @@ -147,7 +147,7 @@ typedef struct { } mm_segment_t; #define TS_FPR(i) fp_state.fpr[i][TS_FPROFFSET] -#define TS_TRANS_FPR(i) transact_fp.fpr[i][TS_FPROFFSET] +#define TS_CKFPR(i) ckfp_state.fpr[i][TS_FPROFFSET] /* FP and VSX 0-31 register set */ struct thread_fp_state { @@ -257,6 +257,7 @@ struct thread_struct { int used_spe; /* set if process has used spe */ #endif /* CONFIG_SPE */ #ifdef CONFIG_PPC_TRANSACTIONAL_MEM + u8 load_tm; u64 tm_tfhar; /* Transaction fail handler addr */ u64 tm_texasr; /* Transaction exception & summary */ u64 tm_tfiar; /* Transaction fail instr address reg */ @@ -267,20 +268,17 @@ struct thread_struct { unsigned long tm_dscr; /* - * Transactional FP and VSX 0-31 register set. - * NOTE: the sense of these is the opposite of the integer ckpt_regs! + * Checkpointed FP and VSX 0-31 register set. * * When a transaction is active/signalled/scheduled etc., *regs is the * most recent set of/speculated GPRs with ckpt_regs being the older * checkpointed regs to which we roll back if transaction aborts. * - * However, fpr[] is the checkpointed 'base state' of FP regs, and - * transact_fpr[] is the new set of transactional values. - * VRs work the same way. + * These are analogous to how ckpt_regs and pt_regs work */ - struct thread_fp_state transact_fp; - struct thread_vr_state transact_vr; - unsigned long transact_vrsave; + struct thread_fp_state ckfp_state; /* Checkpointed FP state */ + struct thread_vr_state ckvr_state; /* Checkpointed VR state */ + unsigned long ckvrsave; /* Checkpointed VRSAVE */ #endif /* CONFIG_PPC_TRANSACTIONAL_MEM */ #ifdef CONFIG_KVM_BOOK3S_32_HANDLER void* kvm_shadow_vcpu; /* KVM internal data */ diff --git a/arch/powerpc/include/asm/reg.h b/arch/powerpc/include/asm/reg.h index f69f40f1519a..2a620789954b 100644 --- a/arch/powerpc/include/asm/reg.h +++ b/arch/powerpc/include/asm/reg.h @@ -475,6 +475,9 @@ #define HID0_POWER8_1TO4LPAR __MASK(51) #define HID0_POWER8_DYNLPARDIS __MASK(48) +/* POWER9 HID0 bits */ +#define HID0_POWER9_RADIX __MASK(63 - 8) + #define SPRN_HID1 0x3F1 /* Hardware Implementation Register 1 */ #ifdef CONFIG_6xx #define HID1_EMCP (1<<31) /* 7450 Machine Check Pin Enable */ @@ -737,6 +740,7 @@ #define MMCR0_FCHV 0x00000001UL /* freeze conditions in hypervisor mode */ #define SPRN_MMCR1 798 #define SPRN_MMCR2 785 +#define SPRN_UMMCR2 769 #define SPRN_MMCRA 0x312 #define MMCRA_SDSYNC 0x80000000UL /* SDAR synced with SIAR */ #define MMCRA_SDAR_DCACHE_MISS 0x40000000UL @@ -1247,7 +1251,7 @@ static inline void mtmsr_isync(unsigned long val) : "memory") #endif -extern void msr_check_and_set(unsigned long bits); +extern unsigned long msr_check_and_set(unsigned long bits); extern bool strict_msr_control; extern void __msr_check_and_clear(unsigned long bits); static inline void msr_check_and_clear(unsigned long bits) diff --git a/arch/powerpc/include/asm/signal.h b/arch/powerpc/include/asm/signal.h index 9322c28aebd2..5ff77722a52d 100644 --- a/arch/powerpc/include/asm/signal.h +++ b/arch/powerpc/include/asm/signal.h @@ -5,6 +5,4 @@ #include <uapi/asm/signal.h> #include <uapi/asm/ptrace.h> -extern unsigned long get_tm_stackpointer(struct pt_regs *regs); - #endif /* _ASM_POWERPC_SIGNAL_H */ diff --git a/arch/powerpc/include/asm/switch_to.h b/arch/powerpc/include/asm/switch_to.h index 0a74ebe934e1..17c8380673a6 100644 --- a/arch/powerpc/include/asm/switch_to.h +++ b/arch/powerpc/include/asm/switch_to.h @@ -75,14 +75,6 @@ static inline void disable_kernel_spe(void) static inline void __giveup_spe(struct task_struct *t) { } #endif -#ifdef CONFIG_PPC_TRANSACTIONAL_MEM -extern void flush_tmregs_to_thread(struct task_struct *); -#else -static inline void flush_tmregs_to_thread(struct task_struct *t) -{ -} -#endif - static inline void clear_task_ebb(struct task_struct *t) { #ifdef CONFIG_PPC_BOOK3S_64 diff --git a/arch/powerpc/include/asm/tm.h b/arch/powerpc/include/asm/tm.h index c22d704b6d41..82e06ca3a49b 100644 --- a/arch/powerpc/include/asm/tm.h +++ b/arch/powerpc/include/asm/tm.h @@ -9,11 +9,6 @@ #ifndef __ASSEMBLY__ -#ifdef CONFIG_PPC_TRANSACTIONAL_MEM -extern void do_load_up_transact_fpu(struct thread_struct *thread); -extern void do_load_up_transact_altivec(struct thread_struct *thread); -#endif - extern void tm_enable(void); extern void tm_reclaim(struct thread_struct *thread, unsigned long orig_msr, uint8_t cause); diff --git a/arch/powerpc/include/asm/uaccess.h b/arch/powerpc/include/asm/uaccess.h index b7c20f0b8fbe..c266227fdd5b 100644 --- a/arch/powerpc/include/asm/uaccess.h +++ b/arch/powerpc/include/asm/uaccess.h @@ -308,29 +308,20 @@ extern unsigned long __copy_tofrom_user(void __user *to, static inline unsigned long copy_from_user(void *to, const void __user *from, unsigned long n) { - unsigned long over; - - if (access_ok(VERIFY_READ, from, n)) + if (likely(access_ok(VERIFY_READ, from, n))) { + check_object_size(to, n, false); return __copy_tofrom_user((__force void __user *)to, from, n); - if ((unsigned long)from < TASK_SIZE) { - over = (unsigned long)from + n - TASK_SIZE; - return __copy_tofrom_user((__force void __user *)to, from, - n - over) + over; } + memset(to, 0, n); return n; } static inline unsigned long copy_to_user(void __user *to, const void *from, unsigned long n) { - unsigned long over; - - if (access_ok(VERIFY_WRITE, to, n)) + if (access_ok(VERIFY_WRITE, to, n)) { + check_object_size(from, n, true); return __copy_tofrom_user(to, (__force void __user *)from, n); - if ((unsigned long)to < TASK_SIZE) { - over = (unsigned long)to + n - TASK_SIZE; - return __copy_tofrom_user(to, (__force void __user *)from, - n - over) + over; } return n; } @@ -372,6 +363,9 @@ static inline unsigned long __copy_from_user_inatomic(void *to, if (ret == 0) return 0; } + + check_object_size(to, n, false); + return __copy_tofrom_user((__force void __user *)to, from, n); } @@ -398,6 +392,9 @@ static inline unsigned long __copy_to_user_inatomic(void __user *to, if (ret == 0) return 0; } + + check_object_size(from, n, true); + return __copy_tofrom_user(to, (__force const void __user *)from, n); } @@ -422,10 +419,6 @@ static inline unsigned long clear_user(void __user *addr, unsigned long size) might_fault(); if (likely(access_ok(VERIFY_WRITE, addr, size))) return __clear_user(addr, size); - if ((unsigned long)addr < TASK_SIZE) { - unsigned long over = (unsigned long)addr + size - TASK_SIZE; - return __clear_user(addr, size - over) + over; - } return size; } diff --git a/arch/powerpc/include/asm/xics.h b/arch/powerpc/include/asm/xics.h index f5f729c11578..f0b238516e9b 100644 --- a/arch/powerpc/include/asm/xics.h +++ b/arch/powerpc/include/asm/xics.h @@ -159,6 +159,8 @@ extern void xics_teardown_cpu(void); extern void xics_kexec_teardown_cpu(int secondary); extern void xics_migrate_irqs_away(void); extern void icp_native_eoi(struct irq_data *d); +extern int xics_set_irq_type(struct irq_data *d, unsigned int flow_type); +extern int xics_retrigger(struct irq_data *data); #ifdef CONFIG_SMP extern int xics_get_irq_server(unsigned int virq, const struct cpumask *cpumask, unsigned int strict_check); diff --git a/arch/powerpc/kernel/Makefile b/arch/powerpc/kernel/Makefile index b2027a5cf508..aded29ad2e8f 100644 --- a/arch/powerpc/kernel/Makefile +++ b/arch/powerpc/kernel/Makefile @@ -31,8 +31,7 @@ obj-y := cputable.o ptrace.o syscalls.o \ process.o systbl.o idle.o \ signal.o sysfs.o cacheinfo.o time.o \ prom.o traps.o setup-common.o \ - udbg.o misc.o io.o dma.o \ - misc_$(CONFIG_WORD_SIZE).o \ + udbg.o misc.o io.o dma.o misc_$(BITS).o \ of_platform.o prom_parse.o obj-$(CONFIG_PPC64) += setup_64.o sys_ppc32.o \ signal_64.o ptrace32.o \ @@ -41,7 +40,7 @@ obj-$(CONFIG_VDSO32) += vdso32/ obj-$(CONFIG_HAVE_HW_BREAKPOINT) += hw_breakpoint.o obj-$(CONFIG_PPC_BOOK3S_64) += cpu_setup_ppc970.o cpu_setup_pa6t.o obj-$(CONFIG_PPC_BOOK3S_64) += cpu_setup_power.o -obj-$(CONFIG_PPC_BOOK3S_64) += mce.o mce_power.o hmi.o +obj-$(CONFIG_PPC_BOOK3S_64) += mce.o mce_power.o obj-$(CONFIG_PPC_BOOK3E_64) += exceptions-64e.o idle_book3e.o obj-$(CONFIG_PPC64) += vdso64/ obj-$(CONFIG_ALTIVEC) += vecemu.o @@ -70,23 +69,23 @@ obj-$(CONFIG_HIBERNATION) += swsusp.o suspend.o ifeq ($(CONFIG_FSL_BOOKE),y) obj-$(CONFIG_HIBERNATION) += swsusp_booke.o else -obj-$(CONFIG_HIBERNATION) += swsusp_$(CONFIG_WORD_SIZE).o +obj-$(CONFIG_HIBERNATION) += swsusp_$(BITS).o endif obj64-$(CONFIG_HIBERNATION) += swsusp_asm64.o -obj-$(CONFIG_MODULES) += module.o module_$(CONFIG_WORD_SIZE).o +obj-$(CONFIG_MODULES) += module.o module_$(BITS).o obj-$(CONFIG_44x) += cpu_setup_44x.o obj-$(CONFIG_PPC_FSL_BOOK3E) += cpu_setup_fsl_booke.o obj-$(CONFIG_PPC_DOORBELL) += dbell.o obj-$(CONFIG_JUMP_LABEL) += jump_label.o -extra-y := head_$(CONFIG_WORD_SIZE).o +extra-y := head_$(BITS).o extra-$(CONFIG_40x) := head_40x.o extra-$(CONFIG_44x) := head_44x.o extra-$(CONFIG_FSL_BOOKE) := head_fsl_booke.o extra-$(CONFIG_8xx) := head_8xx.o extra-y += vmlinux.lds -obj-$(CONFIG_RELOCATABLE) += reloc_$(CONFIG_WORD_SIZE).o +obj-$(CONFIG_RELOCATABLE) += reloc_$(BITS).o obj-$(CONFIG_PPC32) += entry_32.o setup_32.o obj-$(CONFIG_PPC64) += dma-iommu.o iommu.o @@ -104,11 +103,11 @@ obj-$(CONFIG_STACKTRACE) += stacktrace.o obj-$(CONFIG_SWIOTLB) += dma-swiotlb.o pci64-$(CONFIG_PPC64) += pci_dn.o pci-hotplug.o isa-bridge.o -obj-$(CONFIG_PCI) += pci_$(CONFIG_WORD_SIZE).o $(pci64-y) \ +obj-$(CONFIG_PCI) += pci_$(BITS).o $(pci64-y) \ pci-common.o pci_of_scan.o obj-$(CONFIG_PCI_MSI) += msi.o obj-$(CONFIG_KEXEC) += machine_kexec.o crash.o \ - machine_kexec_$(CONFIG_WORD_SIZE).o + machine_kexec_$(BITS).o obj-$(CONFIG_AUDIT) += audit.o obj64-$(CONFIG_AUDIT) += compat_audit.o diff --git a/arch/powerpc/kernel/asm-offsets.c b/arch/powerpc/kernel/asm-offsets.c index b89d14c0352c..caec7bf3b99a 100644 --- a/arch/powerpc/kernel/asm-offsets.c +++ b/arch/powerpc/kernel/asm-offsets.c @@ -142,12 +142,12 @@ int main(void) DEFINE(THREAD_TM_PPR, offsetof(struct thread_struct, tm_ppr)); DEFINE(THREAD_TM_DSCR, offsetof(struct thread_struct, tm_dscr)); DEFINE(PT_CKPT_REGS, offsetof(struct thread_struct, ckpt_regs)); - DEFINE(THREAD_TRANSACT_VRSTATE, offsetof(struct thread_struct, - transact_vr)); - DEFINE(THREAD_TRANSACT_VRSAVE, offsetof(struct thread_struct, - transact_vrsave)); - DEFINE(THREAD_TRANSACT_FPSTATE, offsetof(struct thread_struct, - transact_fp)); + DEFINE(THREAD_CKVRSTATE, offsetof(struct thread_struct, + ckvr_state)); + DEFINE(THREAD_CKVRSAVE, offsetof(struct thread_struct, + ckvrsave)); + DEFINE(THREAD_CKFPSTATE, offsetof(struct thread_struct, + ckfp_state)); /* Local pt_regs on stack for Transactional Memory funcs. */ DEFINE(TM_FRAME_SIZE, STACK_FRAME_OVERHEAD + sizeof(struct pt_regs) + 16); @@ -506,7 +506,6 @@ int main(void) DEFINE(VCPU_PURR, offsetof(struct kvm_vcpu, arch.purr)); DEFINE(VCPU_SPURR, offsetof(struct kvm_vcpu, arch.spurr)); DEFINE(VCPU_IC, offsetof(struct kvm_vcpu, arch.ic)); - DEFINE(VCPU_VTB, offsetof(struct kvm_vcpu, arch.vtb)); DEFINE(VCPU_DSCR, offsetof(struct kvm_vcpu, arch.dscr)); DEFINE(VCPU_AMR, offsetof(struct kvm_vcpu, arch.amr)); DEFINE(VCPU_UAMOR, offsetof(struct kvm_vcpu, arch.uamor)); @@ -557,6 +556,7 @@ int main(void) DEFINE(VCORE_LPCR, offsetof(struct kvmppc_vcore, lpcr)); DEFINE(VCORE_PCR, offsetof(struct kvmppc_vcore, pcr)); DEFINE(VCORE_DPDES, offsetof(struct kvmppc_vcore, dpdes)); + DEFINE(VCORE_VTB, offsetof(struct kvmppc_vcore, vtb)); DEFINE(VCPU_SLB_E, offsetof(struct kvmppc_slb, orige)); DEFINE(VCPU_SLB_V, offsetof(struct kvmppc_slb, origv)); DEFINE(VCPU_SLB_SIZE, sizeof(struct kvmppc_slb)); diff --git a/arch/powerpc/kernel/cputable.c b/arch/powerpc/kernel/cputable.c index 74248ab18e98..6c4646ac9234 100644 --- a/arch/powerpc/kernel/cputable.c +++ b/arch/powerpc/kernel/cputable.c @@ -506,6 +506,25 @@ static struct cpu_spec __initdata cpu_specs[] = { .machine_check_early = __machine_check_early_realmode_p8, .platform = "power8", }, + { /* Power9 DD1*/ + .pvr_mask = 0xffffff00, + .pvr_value = 0x004e0100, + .cpu_name = "POWER9 (raw)", + .cpu_features = CPU_FTRS_POWER9_DD1, + .cpu_user_features = COMMON_USER_POWER9, + .cpu_user_features2 = COMMON_USER2_POWER9, + .mmu_features = MMU_FTRS_POWER9, + .icache_bsize = 128, + .dcache_bsize = 128, + .num_pmcs = 6, + .pmc_type = PPC_PMC_IBM, + .oprofile_cpu_type = "ppc64/power9", + .oprofile_type = PPC_OPROFILE_INVALID, + .cpu_setup = __setup_cpu_power9, + .cpu_restore = __restore_cpu_power9, + .flush_tlb = __flush_tlb_power9, + .platform = "power9", + }, { /* Power9 */ .pvr_mask = 0xffff0000, .pvr_value = 0x004e0000, diff --git a/arch/powerpc/kernel/eeh.c b/arch/powerpc/kernel/eeh.c index c9bc78e9c610..f25731627d7f 100644 --- a/arch/powerpc/kernel/eeh.c +++ b/arch/powerpc/kernel/eeh.c @@ -116,6 +116,7 @@ struct eeh_ops *eeh_ops = NULL; /* Lock to avoid races due to multiple reports of an error */ DEFINE_RAW_SPINLOCK(confirm_error_lock); +EXPORT_SYMBOL_GPL(confirm_error_lock); /* Lock to protect passed flags */ static DEFINE_MUTEX(eeh_dev_mutex); @@ -168,10 +169,10 @@ static size_t eeh_dump_dev_log(struct eeh_dev *edev, char *buf, size_t len) int n = 0, l = 0; char buffer[128]; - n += scnprintf(buf+n, len-n, "%04x:%02x:%02x:%01x\n", + n += scnprintf(buf+n, len-n, "%04x:%02x:%02x.%01x\n", edev->phb->global_number, pdn->busno, PCI_SLOT(pdn->devfn), PCI_FUNC(pdn->devfn)); - pr_warn("EEH: of node=%04x:%02x:%02x:%01x\n", + pr_warn("EEH: of node=%04x:%02x:%02x.%01x\n", edev->phb->global_number, pdn->busno, PCI_SLOT(pdn->devfn), PCI_FUNC(pdn->devfn)); @@ -1044,7 +1045,7 @@ int eeh_init(void) if (eeh_enabled()) pr_info("EEH: PCI Enhanced I/O Error Handling Enabled\n"); else - pr_warn("EEH: No capable adapters found\n"); + pr_info("EEH: No capable adapters found\n"); return ret; } @@ -1502,6 +1503,7 @@ int eeh_pe_set_option(struct eeh_pe *pe, int option) break; case EEH_OPT_THAW_MMIO: case EEH_OPT_THAW_DMA: + case EEH_OPT_FREEZE_PE: if (!eeh_ops || !eeh_ops->set_option) { ret = -ENOENT; break; diff --git a/arch/powerpc/kernel/eeh_driver.c b/arch/powerpc/kernel/eeh_driver.c index 5f36e8a70daa..a62be72da274 100644 --- a/arch/powerpc/kernel/eeh_driver.c +++ b/arch/powerpc/kernel/eeh_driver.c @@ -993,9 +993,17 @@ static void eeh_handle_special_event(void) /* Notify all devices to be down */ eeh_pe_state_clear(pe, EEH_PE_PRI_BUS); - bus = eeh_pe_bus_get(phb_pe); eeh_pe_dev_traverse(pe, eeh_report_failure, NULL); + bus = eeh_pe_bus_get(phb_pe); + if (!bus) { + pr_err("%s: Cannot find PCI bus for " + "PHB#%d-PE#%x\n", + __func__, + pe->phb->global_number, + pe->addr); + break; + } pci_hp_remove_devices(bus); } pci_unlock_rescan_remove(); diff --git a/arch/powerpc/kernel/eeh_pe.c b/arch/powerpc/kernel/eeh_pe.c index f0520da85759..de7d091c4c31 100644 --- a/arch/powerpc/kernel/eeh_pe.c +++ b/arch/powerpc/kernel/eeh_pe.c @@ -581,6 +581,7 @@ void eeh_pe_state_mark(struct eeh_pe *pe, int state) { eeh_pe_traverse(pe, __eeh_pe_state_mark, &state); } +EXPORT_SYMBOL_GPL(eeh_pe_state_mark); static void *__eeh_pe_dev_mode_mark(void *data, void *flag) { diff --git a/arch/powerpc/kernel/entry_32.S b/arch/powerpc/kernel/entry_32.S index 9899032230b4..83428a283fa0 100644 --- a/arch/powerpc/kernel/entry_32.S +++ b/arch/powerpc/kernel/entry_32.S @@ -654,7 +654,6 @@ END_FTR_SECTION_IFSET(CPU_FTR_SPE) #endif /* CONFIG_SMP */ tophys(r0,r4) - CLR_TOP32(r0) mtspr SPRN_SPRG_THREAD,r0 /* Update current THREAD phys addr */ lwz r1,KSP(r4) /* Load new stack pointer */ diff --git a/arch/powerpc/kernel/entry_64.S b/arch/powerpc/kernel/entry_64.S index 6b8bc0dd09d4..51df82b61084 100644 --- a/arch/powerpc/kernel/entry_64.S +++ b/arch/powerpc/kernel/entry_64.S @@ -139,7 +139,7 @@ END_FW_FTR_SECTION_IFSET(FW_FEATURE_SPLPAR) #ifdef CONFIG_PPC_BOOK3E wrteei 1 #else - ld r11,PACAKMSR(r13) + li r11,MSR_RI ori r11,r11,MSR_EE mtmsrd r11,1 #endif /* CONFIG_PPC_BOOK3E */ @@ -195,7 +195,6 @@ system_call: /* label this so stack traces look sane */ #ifdef CONFIG_PPC_BOOK3E wrteei 0 #else - ld r10,PACAKMSR(r13) /* * For performance reasons we clear RI the same time that we * clear EE. We only need to clear RI just before we restore r13 @@ -203,8 +202,7 @@ system_call: /* label this so stack traces look sane */ * We have to be careful to restore RI if we branch anywhere from * here (eg syscall_exit_work). */ - li r9,MSR_RI - andc r11,r10,r9 + li r11,0 mtmsrd r11,1 #endif /* CONFIG_PPC_BOOK3E */ @@ -221,13 +219,12 @@ system_call: /* label this so stack traces look sane */ #endif 2: addi r3,r1,STACK_FRAME_OVERHEAD #ifdef CONFIG_PPC_BOOK3S + li r10,MSR_RI mtmsrd r10,1 /* Restore RI */ #endif bl restore_math #ifdef CONFIG_PPC_BOOK3S - ld r10,PACAKMSR(r13) - li r9,MSR_RI - andc r11,r10,r9 /* Re-clear RI */ + li r11,0 mtmsrd r11,1 #endif ld r8,_MSR(r1) @@ -308,6 +305,7 @@ syscall_enosys: syscall_exit_work: #ifdef CONFIG_PPC_BOOK3S + li r10,MSR_RI mtmsrd r10,1 /* Restore RI */ #endif /* If TIF_RESTOREALL is set, don't scribble on either r3 or ccr. @@ -354,7 +352,7 @@ END_FTR_SECTION_IFSET(CPU_FTR_HAS_PPR) #ifdef CONFIG_PPC_BOOK3E wrteei 1 #else - ld r10,PACAKMSR(r13) + li r10,MSR_RI ori r10,r10,MSR_EE mtmsrd r10,1 #endif /* CONFIG_PPC_BOOK3E */ @@ -368,13 +366,13 @@ END_FTR_SECTION_IFSET(CPU_FTR_HAS_PPR) tabort_syscall: /* Firstly we need to enable TM in the kernel */ mfmsr r10 - li r13, 1 - rldimi r10, r13, MSR_TM_LG, 63-MSR_TM_LG + li r9, 1 + rldimi r10, r9, MSR_TM_LG, 63-MSR_TM_LG mtmsrd r10, 0 /* tabort, this dooms the transaction, nothing else */ - li r13, (TM_CAUSE_SYSCALL|TM_CAUSE_PERSISTENT) - TABORT(R13) + li r9, (TM_CAUSE_SYSCALL|TM_CAUSE_PERSISTENT) + TABORT(R9) /* * Return directly to userspace. We have corrupted user register state, @@ -382,8 +380,8 @@ tabort_syscall: * resume after the tbegin of the aborted transaction with the * checkpointed register state. */ - li r13, MSR_RI - andc r10, r10, r13 + li r9, MSR_RI + andc r10, r10, r9 mtmsrd r10, 1 mtspr SPRN_SRR0, r11 mtspr SPRN_SRR1, r12 @@ -619,7 +617,7 @@ _GLOBAL(ret_from_except_lite) #ifdef CONFIG_PPC_BOOK3E wrteei 0 #else - ld r10,PACAKMSR(r13) /* Get kernel MSR without EE */ + li r10,MSR_RI mtmsrd r10,1 /* Update machine state */ #endif /* CONFIG_PPC_BOOK3E */ @@ -751,7 +749,7 @@ resume_kernel: #ifdef CONFIG_PPC_BOOK3E wrteei 0 #else - ld r10,PACAKMSR(r13) /* Get kernel MSR without EE */ + li r10,MSR_RI mtmsrd r10,1 /* Update machine state */ #endif /* CONFIG_PPC_BOOK3E */ #endif /* CONFIG_PREEMPT */ @@ -841,8 +839,7 @@ END_FTR_SECTION_IFSET(CPU_FTR_HAS_PPR) * userspace and we take an exception after restoring r13, * we end up corrupting the userspace r13 value. */ - ld r4,PACAKMSR(r13) /* Get kernel MSR without EE */ - andc r4,r4,r0 /* r0 contains MSR_RI here */ + li r4,0 mtmsrd r4,1 #ifdef CONFIG_PPC_TRANSACTIONAL_MEM diff --git a/arch/powerpc/kernel/exceptions-64s.S b/arch/powerpc/kernel/exceptions-64s.S index 41091fdf9bd8..08992f8f5036 100644 --- a/arch/powerpc/kernel/exceptions-64s.S +++ b/arch/powerpc/kernel/exceptions-64s.S @@ -16,72 +16,71 @@ #include <asm/exception-64s.h> #include <asm/ptrace.h> #include <asm/cpuidle.h> +#include <asm/head-64.h> /* + * There are a few constraints to be concerned with. + * - Real mode exceptions code/data must be located at their physical location. + * - Virtual mode exceptions must be mapped at their 0xc000... location. + * - Fixed location code must not call directly beyond the __end_interrupts + * area when built with CONFIG_RELOCATABLE. LOAD_HANDLER / bctr sequence + * must be used. + * - LOAD_HANDLER targets must be within first 64K of physical 0 / + * virtual 0xc00... + * - Conditional branch targets must be within +/-32K of caller. + * + * "Virtual exceptions" run with relocation on (MSR_IR=1, MSR_DR=1), and + * therefore don't have to run in physically located code or rfid to + * virtual mode kernel code. However on relocatable kernels they do have + * to branch to KERNELBASE offset because the rest of the kernel (outside + * the exception vectors) may be located elsewhere. + * + * Virtual exceptions correspond with physical, except their entry points + * are offset by 0xc000000000000000 and also tend to get an added 0x4000 + * offset applied. Virtual exceptions are enabled with the Alternate + * Interrupt Location (AIL) bit set in the LPCR. However this does not + * guarantee they will be delivered virtually. Some conditions (see the ISA) + * cause exceptions to be delivered in real mode. + * + * It's impossible to receive interrupts below 0x300 via AIL. + * + * KVM: None of the virtual exceptions are from the guest. Anything that + * escalated to HV=1 from HV=0 is delivered via real mode handlers. + * + * * We layout physical memory as follows: * 0x0000 - 0x00ff : Secondary processor spin code - * 0x0100 - 0x17ff : pSeries Interrupt prologs - * 0x1800 - 0x4000 : interrupt support common interrupt prologs - * 0x4000 - 0x5fff : pSeries interrupts with IR=1,DR=1 - * 0x6000 - 0x6fff : more interrupt support including for IR=1,DR=1 + * 0x0100 - 0x18ff : Real mode pSeries interrupt vectors + * 0x1900 - 0x3fff : Real mode trampolines + * 0x4000 - 0x58ff : Relon (IR=1,DR=1) mode pSeries interrupt vectors + * 0x5900 - 0x6fff : Relon mode trampolines * 0x7000 - 0x7fff : FWNMI data area - * 0x8000 - 0x8fff : Initial (CPU0) segment table - * 0x9000 - : Early init and support code + * 0x8000 - .... : Common interrupt handlers, remaining early + * setup code, rest of kernel. + * + * We could reclaim 0x4000-0x42ff for real mode trampolines if the space + * is necessary. Until then it's more consistent to explicitly put VIRT_NONE + * vectors there. */ - /* Syscall routine is used twice, in reloc-off and reloc-on paths */ -#define SYSCALL_PSERIES_1 \ -BEGIN_FTR_SECTION \ - cmpdi r0,0x1ebe ; \ - beq- 1f ; \ -END_FTR_SECTION_IFSET(CPU_FTR_REAL_LE) \ - mr r9,r13 ; \ - GET_PACA(r13) ; \ - mfspr r11,SPRN_SRR0 ; \ -0: - -#define SYSCALL_PSERIES_2_RFID \ - mfspr r12,SPRN_SRR1 ; \ - ld r10,PACAKBASE(r13) ; \ - LOAD_HANDLER(r10, system_call_entry) ; \ - mtspr SPRN_SRR0,r10 ; \ - ld r10,PACAKMSR(r13) ; \ - mtspr SPRN_SRR1,r10 ; \ - rfid ; \ - b . ; /* prevent speculative execution */ - -#define SYSCALL_PSERIES_3 \ - /* Fast LE/BE switch system call */ \ -1: mfspr r12,SPRN_SRR1 ; \ - xori r12,r12,MSR_LE ; \ - mtspr SPRN_SRR1,r12 ; \ - rfid ; /* return to userspace */ \ - b . ; /* prevent speculative execution */ - -#if defined(CONFIG_RELOCATABLE) - /* - * We can't branch directly so we do it via the CTR which - * is volatile across system calls. - */ -#define SYSCALL_PSERIES_2_DIRECT \ - mflr r10 ; \ - ld r12,PACAKBASE(r13) ; \ - LOAD_HANDLER(r12, system_call_entry) ; \ - mtctr r12 ; \ - mfspr r12,SPRN_SRR1 ; \ - /* Re-use of r13... No spare regs to do this */ \ - li r13,MSR_RI ; \ - mtmsrd r13,1 ; \ - GET_PACA(r13) ; /* get r13 back */ \ - bctr ; +OPEN_FIXED_SECTION(real_vectors, 0x0100, 0x1900) +OPEN_FIXED_SECTION(real_trampolines, 0x1900, 0x4000) +OPEN_FIXED_SECTION(virt_vectors, 0x4000, 0x5900) +OPEN_FIXED_SECTION(virt_trampolines, 0x5900, 0x7000) +#if defined(CONFIG_PPC_PSERIES) || defined(CONFIG_PPC_POWERNV) +/* + * Data area reserved for FWNMI option. + * This address (0x7000) is fixed by the RPA. + * pseries and powernv need to keep the whole page from + * 0x7000 to 0x8000 free for use by the firmware + */ +ZERO_FIXED_SECTION(fwnmi_page, 0x7000, 0x8000) +OPEN_TEXT_SECTION(0x8000) #else - /* We can branch directly */ -#define SYSCALL_PSERIES_2_DIRECT \ - mfspr r12,SPRN_SRR1 ; \ - li r10,MSR_RI ; \ - mtmsrd r10,1 ; /* Set RI (EE=0) */ \ - b system_call_common ; +OPEN_TEXT_SECTION(0x7000) #endif +USE_FIXED_SECTION(real_vectors) + /* * This is the start of the interrupt handlers for pSeries * This code runs with relocation off. @@ -90,12 +89,13 @@ END_FTR_SECTION_IFSET(CPU_FTR_REAL_LE) \ * Therefore any relative branches in this section must only * branch to labels in this section. */ - . = 0x100 .globl __start_interrupts __start_interrupts: - .globl system_reset_pSeries; -system_reset_pSeries: +/* No virt vectors corresponding with 0x0..0x100 */ +EXC_VIRT_NONE(0x4000, 0x4100) + +EXC_REAL_BEGIN(system_reset, 0x100, 0x200) SET_SCRATCH0(r13) #ifdef CONFIG_PPC_P7_NAP BEGIN_FTR_SECTION @@ -136,290 +136,44 @@ END_FTR_SECTION_IFSET(CPU_FTR_HVMODE | CPU_FTR_ARCH_206) #endif /* CONFIG_PPC_P7_NAP */ EXCEPTION_PROLOG_PSERIES(PACA_EXGEN, system_reset_common, EXC_STD, NOTEST, 0x100) +EXC_REAL_END(system_reset, 0x100, 0x200) +EXC_VIRT_NONE(0x4100, 0x4200) +EXC_COMMON(system_reset_common, 0x100, system_reset_exception) + +#ifdef CONFIG_PPC_PSERIES +/* + * Vectors for the FWNMI option. Share common code. + */ +TRAMP_REAL_BEGIN(system_reset_fwnmi) + SET_SCRATCH0(r13) /* save r13 */ + EXCEPTION_PROLOG_PSERIES(PACA_EXGEN, system_reset_common, EXC_STD, + NOTEST, 0x100) +#endif /* CONFIG_PPC_PSERIES */ + - . = 0x200 -machine_check_pSeries_1: +EXC_REAL_BEGIN(machine_check, 0x200, 0x300) /* This is moved out of line as it can be patched by FW, but * some code path might still want to branch into the original * vector */ SET_SCRATCH0(r13) /* save r13 */ -#ifdef CONFIG_PPC_P7_NAP -BEGIN_FTR_SECTION - /* Running native on arch 2.06 or later, check if we are - * waking up from nap. We only handle no state loss and - * supervisor state loss. We do -not- handle hypervisor - * state loss at this time. + /* + * Running native on arch 2.06 or later, we may wakeup from winkle + * inside machine check. If yes, then last bit of HSPGR0 would be set + * to 1. Hence clear it unconditionally. */ - mfspr r13,SPRN_SRR1 - rlwinm. r13,r13,47-31,30,31 - OPT_GET_SPR(r13, SPRN_CFAR, CPU_FTR_CFAR) - beq 9f - - mfspr r13,SPRN_SRR1 - rlwinm. r13,r13,47-31,30,31 - /* waking up from powersave (nap) state */ - cmpwi cr1,r13,2 - /* Total loss of HV state is fatal. let's just stay stuck here */ - OPT_GET_SPR(r13, SPRN_CFAR, CPU_FTR_CFAR) - bgt cr1,. -9: - OPT_SET_SPR(r13, SPRN_CFAR, CPU_FTR_CFAR) -END_FTR_SECTION_IFSET(CPU_FTR_HVMODE | CPU_FTR_ARCH_206) -#endif /* CONFIG_PPC_P7_NAP */ + GET_PACA(r13) + clrrdi r13,r13,1 + SET_PACA(r13) EXCEPTION_PROLOG_0(PACA_EXMC) BEGIN_FTR_SECTION b machine_check_powernv_early FTR_SECTION_ELSE b machine_check_pSeries_0 ALT_FTR_SECTION_END_IFSET(CPU_FTR_HVMODE) - - . = 0x300 - .globl data_access_pSeries -data_access_pSeries: - SET_SCRATCH0(r13) - EXCEPTION_PROLOG_PSERIES(PACA_EXGEN, data_access_common, EXC_STD, - KVMTEST, 0x300) - - . = 0x380 - .globl data_access_slb_pSeries -data_access_slb_pSeries: - SET_SCRATCH0(r13) - EXCEPTION_PROLOG_0(PACA_EXSLB) - EXCEPTION_PROLOG_1(PACA_EXSLB, KVMTEST, 0x380) - std r3,PACA_EXSLB+EX_R3(r13) - mfspr r3,SPRN_DAR - mfspr r12,SPRN_SRR1 -#ifndef CONFIG_RELOCATABLE - b slb_miss_realmode -#else - /* - * We can't just use a direct branch to slb_miss_realmode - * because the distance from here to there depends on where - * the kernel ends up being put. - */ - mfctr r11 - ld r10,PACAKBASE(r13) - LOAD_HANDLER(r10, slb_miss_realmode) - mtctr r10 - bctr -#endif - - STD_EXCEPTION_PSERIES(0x400, instruction_access) - - . = 0x480 - .globl instruction_access_slb_pSeries -instruction_access_slb_pSeries: - SET_SCRATCH0(r13) - EXCEPTION_PROLOG_0(PACA_EXSLB) - EXCEPTION_PROLOG_1(PACA_EXSLB, KVMTEST, 0x480) - std r3,PACA_EXSLB+EX_R3(r13) - mfspr r3,SPRN_SRR0 /* SRR0 is faulting address */ - mfspr r12,SPRN_SRR1 -#ifndef CONFIG_RELOCATABLE - b slb_miss_realmode -#else - mfctr r11 - ld r10,PACAKBASE(r13) - LOAD_HANDLER(r10, slb_miss_realmode) - mtctr r10 - bctr -#endif - - /* We open code these as we can't have a ". = x" (even with - * x = "." within a feature section - */ - . = 0x500; - .globl hardware_interrupt_pSeries; - .globl hardware_interrupt_hv; -hardware_interrupt_pSeries: -hardware_interrupt_hv: - BEGIN_FTR_SECTION - _MASKABLE_EXCEPTION_PSERIES(0x502, hardware_interrupt, - EXC_HV, SOFTEN_TEST_HV) - KVM_HANDLER(PACA_EXGEN, EXC_HV, 0x502) - FTR_SECTION_ELSE - _MASKABLE_EXCEPTION_PSERIES(0x500, hardware_interrupt, - EXC_STD, SOFTEN_TEST_PR) - KVM_HANDLER(PACA_EXGEN, EXC_STD, 0x500) - ALT_FTR_SECTION_END_IFSET(CPU_FTR_HVMODE | CPU_FTR_ARCH_206) - - STD_EXCEPTION_PSERIES(0x600, alignment) - KVM_HANDLER(PACA_EXGEN, EXC_STD, 0x600) - - STD_EXCEPTION_PSERIES(0x700, program_check) - KVM_HANDLER(PACA_EXGEN, EXC_STD, 0x700) - - STD_EXCEPTION_PSERIES(0x800, fp_unavailable) - KVM_HANDLER(PACA_EXGEN, EXC_STD, 0x800) - - . = 0x900 - .globl decrementer_pSeries -decrementer_pSeries: - _MASKABLE_EXCEPTION_PSERIES(0x900, decrementer, EXC_STD, SOFTEN_TEST_PR) - - STD_EXCEPTION_HV(0x980, 0x982, hdecrementer) - - MASKABLE_EXCEPTION_PSERIES(0xa00, 0xa00, doorbell_super) - KVM_HANDLER(PACA_EXGEN, EXC_STD, 0xa00) - - STD_EXCEPTION_PSERIES(0xb00, trap_0b) - KVM_HANDLER(PACA_EXGEN, EXC_STD, 0xb00) - - . = 0xc00 - .globl system_call_pSeries -system_call_pSeries: - /* - * If CONFIG_KVM_BOOK3S_64_HANDLER is set, save the PPR (on systems - * that support it) before changing to HMT_MEDIUM. That allows the KVM - * code to save that value into the guest state (it is the guest's PPR - * value). Otherwise just change to HMT_MEDIUM as userspace has - * already saved the PPR. - */ -#ifdef CONFIG_KVM_BOOK3S_64_HANDLER - SET_SCRATCH0(r13) - GET_PACA(r13) - std r9,PACA_EXGEN+EX_R9(r13) - OPT_GET_SPR(r9, SPRN_PPR, CPU_FTR_HAS_PPR); - HMT_MEDIUM; - std r10,PACA_EXGEN+EX_R10(r13) - OPT_SAVE_REG_TO_PACA(PACA_EXGEN+EX_PPR, r9, CPU_FTR_HAS_PPR); - mfcr r9 - KVMTEST(0xc00) - GET_SCRATCH0(r13) -#else - HMT_MEDIUM; -#endif - SYSCALL_PSERIES_1 - SYSCALL_PSERIES_2_RFID - SYSCALL_PSERIES_3 - KVM_HANDLER(PACA_EXGEN, EXC_STD, 0xc00) - - STD_EXCEPTION_PSERIES(0xd00, single_step) - KVM_HANDLER(PACA_EXGEN, EXC_STD, 0xd00) - - /* At 0xe??? we have a bunch of hypervisor exceptions, we branch - * out of line to handle them - */ - . = 0xe00 -hv_data_storage_trampoline: - SET_SCRATCH0(r13) - EXCEPTION_PROLOG_0(PACA_EXGEN) - b h_data_storage_hv - - . = 0xe20 -hv_instr_storage_trampoline: - SET_SCRATCH0(r13) - EXCEPTION_PROLOG_0(PACA_EXGEN) - b h_instr_storage_hv - - . = 0xe40 -emulation_assist_trampoline: - SET_SCRATCH0(r13) - EXCEPTION_PROLOG_0(PACA_EXGEN) - b emulation_assist_hv - - . = 0xe60 -hv_exception_trampoline: - SET_SCRATCH0(r13) - EXCEPTION_PROLOG_0(PACA_EXGEN) - b hmi_exception_early - - . = 0xe80 -hv_doorbell_trampoline: - SET_SCRATCH0(r13) - EXCEPTION_PROLOG_0(PACA_EXGEN) - b h_doorbell_hv - - . = 0xea0 -hv_virt_irq_trampoline: - SET_SCRATCH0(r13) - EXCEPTION_PROLOG_0(PACA_EXGEN) - b h_virt_irq_hv - - /* We need to deal with the Altivec unavailable exception - * here which is at 0xf20, thus in the middle of the - * prolog code of the PerformanceMonitor one. A little - * trickery is thus necessary - */ - . = 0xf00 -performance_monitor_pseries_trampoline: - SET_SCRATCH0(r13) - EXCEPTION_PROLOG_0(PACA_EXGEN) - b performance_monitor_pSeries - - . = 0xf20 -altivec_unavailable_pseries_trampoline: - SET_SCRATCH0(r13) - EXCEPTION_PROLOG_0(PACA_EXGEN) - b altivec_unavailable_pSeries - - . = 0xf40 -vsx_unavailable_pseries_trampoline: - SET_SCRATCH0(r13) - EXCEPTION_PROLOG_0(PACA_EXGEN) - b vsx_unavailable_pSeries - - . = 0xf60 -facility_unavailable_trampoline: - SET_SCRATCH0(r13) - EXCEPTION_PROLOG_0(PACA_EXGEN) - b facility_unavailable_pSeries - - . = 0xf80 -hv_facility_unavailable_trampoline: - SET_SCRATCH0(r13) - EXCEPTION_PROLOG_0(PACA_EXGEN) - b facility_unavailable_hv - -#ifdef CONFIG_CBE_RAS - STD_EXCEPTION_HV(0x1200, 0x1202, cbe_system_error) - KVM_HANDLER_SKIP(PACA_EXGEN, EXC_HV, 0x1202) -#endif /* CONFIG_CBE_RAS */ - - STD_EXCEPTION_PSERIES(0x1300, instruction_breakpoint) - KVM_HANDLER_SKIP(PACA_EXGEN, EXC_STD, 0x1300) - - . = 0x1500 - .global denorm_exception_hv -denorm_exception_hv: - mtspr SPRN_SPRG_HSCRATCH0,r13 - EXCEPTION_PROLOG_0(PACA_EXGEN) - EXCEPTION_PROLOG_1(PACA_EXGEN, NOTEST, 0x1500) - -#ifdef CONFIG_PPC_DENORMALISATION - mfspr r10,SPRN_HSRR1 - mfspr r11,SPRN_HSRR0 /* save HSRR0 */ - andis. r10,r10,(HSRR1_DENORM)@h /* denorm? */ - addi r11,r11,-4 /* HSRR0 is next instruction */ - bne+ denorm_assist -#endif - - KVMTEST(0x1500) - EXCEPTION_PROLOG_PSERIES_1(denorm_common, EXC_HV) - KVM_HANDLER_SKIP(PACA_EXGEN, EXC_STD, 0x1500) - -#ifdef CONFIG_CBE_RAS - STD_EXCEPTION_HV(0x1600, 0x1602, cbe_maintenance) - KVM_HANDLER_SKIP(PACA_EXGEN, EXC_HV, 0x1602) -#endif /* CONFIG_CBE_RAS */ - - STD_EXCEPTION_PSERIES(0x1700, altivec_assist) - KVM_HANDLER(PACA_EXGEN, EXC_STD, 0x1700) - -#ifdef CONFIG_CBE_RAS - STD_EXCEPTION_HV(0x1800, 0x1802, cbe_thermal) - KVM_HANDLER_SKIP(PACA_EXGEN, EXC_HV, 0x1802) -#else - . = 0x1800 -#endif /* CONFIG_CBE_RAS */ - - -/*** Out of line interrupts support ***/ - - .align 7 - /* moved from 0x200 */ -machine_check_powernv_early: +EXC_REAL_END(machine_check, 0x200, 0x300) +EXC_VIRT_NONE(0x4200, 0x4300) +TRAMP_REAL_BEGIN(machine_check_powernv_early) BEGIN_FTR_SECTION EXCEPTION_PROLOG_1(PACA_EXMC, NOTEST, 0x200) /* @@ -472,7 +226,6 @@ BEGIN_FTR_SECTION mfmsr r11 /* get MSR value */ ori r11,r11,MSR_ME /* turn on ME bit */ ori r11,r11,MSR_RI /* turn on RI bit */ - ld r12,PACAKBASE(r13) /* get high part of &label */ LOAD_HANDLER(r12, machine_check_handle_early) 1: mtspr SPRN_SRR0,r12 mtspr SPRN_SRR1,r11 @@ -485,7 +238,6 @@ BEGIN_FTR_SECTION */ addi r1,r1,INT_FRAME_SIZE /* go back to previous stack frame */ ld r11,PACAKMSR(r13) - ld r12,PACAKBASE(r13) LOAD_HANDLER(r12, unrecover_mce) li r10,MSR_ME andc r11,r11,r10 /* Turn off MSR_ME */ @@ -493,295 +245,266 @@ BEGIN_FTR_SECTION b . /* prevent speculative execution */ END_FTR_SECTION_IFSET(CPU_FTR_HVMODE) -machine_check_pSeries: +TRAMP_REAL_BEGIN(machine_check_pSeries) .globl machine_check_fwnmi machine_check_fwnmi: SET_SCRATCH0(r13) /* save r13 */ EXCEPTION_PROLOG_0(PACA_EXMC) machine_check_pSeries_0: - EXCEPTION_PROLOG_1(PACA_EXMC, KVMTEST, 0x200) - EXCEPTION_PROLOG_PSERIES_1(machine_check_common, EXC_STD) - KVM_HANDLER_SKIP(PACA_EXMC, EXC_STD, 0x200) - KVM_HANDLER_SKIP(PACA_EXGEN, EXC_STD, 0x300) - KVM_HANDLER_SKIP(PACA_EXSLB, EXC_STD, 0x380) - KVM_HANDLER(PACA_EXGEN, EXC_STD, 0x400) - KVM_HANDLER(PACA_EXSLB, EXC_STD, 0x480) - KVM_HANDLER(PACA_EXGEN, EXC_STD, 0x900) - KVM_HANDLER(PACA_EXGEN, EXC_HV, 0x982) - -#ifdef CONFIG_PPC_DENORMALISATION -denorm_assist: -BEGIN_FTR_SECTION -/* - * To denormalise we need to move a copy of the register to itself. - * For POWER6 do that here for all FP regs. - */ - mfmsr r10 - ori r10,r10,(MSR_FP|MSR_FE0|MSR_FE1) - xori r10,r10,(MSR_FE0|MSR_FE1) - mtmsrd r10 - sync + EXCEPTION_PROLOG_1(PACA_EXMC, KVMTEST_PR, 0x200) + /* + * The following is essentially EXCEPTION_PROLOG_PSERIES_1 with the + * difference that MSR_RI is not enabled, because PACA_EXMC is being + * used, so nested machine check corrupts it. machine_check_common + * enables MSR_RI. + */ + ld r10,PACAKMSR(r13) + xori r10,r10,MSR_RI + mfspr r11,SPRN_SRR0 + LOAD_HANDLER(r12, machine_check_common) + mtspr SPRN_SRR0,r12 + mfspr r12,SPRN_SRR1 + mtspr SPRN_SRR1,r10 + rfid + b . /* prevent speculative execution */ -#define FMR2(n) fmr (n), (n) ; fmr n+1, n+1 -#define FMR4(n) FMR2(n) ; FMR2(n+2) -#define FMR8(n) FMR4(n) ; FMR4(n+4) -#define FMR16(n) FMR8(n) ; FMR8(n+8) -#define FMR32(n) FMR16(n) ; FMR16(n+16) - FMR32(0) +TRAMP_KVM_SKIP(PACA_EXMC, 0x200) -FTR_SECTION_ELSE -/* - * To denormalise we need to move a copy of the register to itself. - * For POWER7 do that here for the first 32 VSX registers only. - */ - mfmsr r10 - oris r10,r10,MSR_VSX@h - mtmsrd r10 - sync +EXC_COMMON_BEGIN(machine_check_common) + /* + * Machine check is different because we use a different + * save area: PACA_EXMC instead of PACA_EXGEN. + */ + mfspr r10,SPRN_DAR + std r10,PACA_EXMC+EX_DAR(r13) + mfspr r10,SPRN_DSISR + stw r10,PACA_EXMC+EX_DSISR(r13) + EXCEPTION_PROLOG_COMMON(0x200, PACA_EXMC) + FINISH_NAP + RECONCILE_IRQ_STATE(r10, r11) + ld r3,PACA_EXMC+EX_DAR(r13) + lwz r4,PACA_EXMC+EX_DSISR(r13) + /* Enable MSR_RI when finished with PACA_EXMC */ + li r10,MSR_RI + mtmsrd r10,1 + std r3,_DAR(r1) + std r4,_DSISR(r1) + bl save_nvgprs + addi r3,r1,STACK_FRAME_OVERHEAD + bl machine_check_exception + b ret_from_except -#define XVCPSGNDP2(n) XVCPSGNDP(n,n,n) ; XVCPSGNDP(n+1,n+1,n+1) -#define XVCPSGNDP4(n) XVCPSGNDP2(n) ; XVCPSGNDP2(n+2) -#define XVCPSGNDP8(n) XVCPSGNDP4(n) ; XVCPSGNDP4(n+4) -#define XVCPSGNDP16(n) XVCPSGNDP8(n) ; XVCPSGNDP8(n+8) -#define XVCPSGNDP32(n) XVCPSGNDP16(n) ; XVCPSGNDP16(n+16) - XVCPSGNDP32(0) +#define MACHINE_CHECK_HANDLER_WINDUP \ + /* Clear MSR_RI before setting SRR0 and SRR1. */\ + li r0,MSR_RI; \ + mfmsr r9; /* get MSR value */ \ + andc r9,r9,r0; \ + mtmsrd r9,1; /* Clear MSR_RI */ \ + /* Move original SRR0 and SRR1 into the respective regs */ \ + ld r9,_MSR(r1); \ + mtspr SPRN_SRR1,r9; \ + ld r3,_NIP(r1); \ + mtspr SPRN_SRR0,r3; \ + ld r9,_CTR(r1); \ + mtctr r9; \ + ld r9,_XER(r1); \ + mtxer r9; \ + ld r9,_LINK(r1); \ + mtlr r9; \ + REST_GPR(0, r1); \ + REST_8GPRS(2, r1); \ + REST_GPR(10, r1); \ + ld r11,_CCR(r1); \ + mtcr r11; \ + /* Decrement paca->in_mce. */ \ + lhz r12,PACA_IN_MCE(r13); \ + subi r12,r12,1; \ + sth r12,PACA_IN_MCE(r13); \ + REST_GPR(11, r1); \ + REST_2GPRS(12, r1); \ + /* restore original r1. */ \ + ld r1,GPR1(r1) -ALT_FTR_SECTION_END_IFCLR(CPU_FTR_ARCH_206) + /* + * Handle machine check early in real mode. We come here with + * ME=1, MMU (IR=0 and DR=0) off and using MC emergency stack. + */ +EXC_COMMON_BEGIN(machine_check_handle_early) + std r0,GPR0(r1) /* Save r0 */ + EXCEPTION_PROLOG_COMMON_3(0x200) + bl save_nvgprs + addi r3,r1,STACK_FRAME_OVERHEAD + bl machine_check_early + std r3,RESULT(r1) /* Save result */ + ld r12,_MSR(r1) +#ifdef CONFIG_PPC_P7_NAP + /* + * Check if thread was in power saving mode. We come here when any + * of the following is true: + * a. thread wasn't in power saving mode + * b. thread was in power saving mode with no state loss, + * supervisor state loss or hypervisor state loss. + * + * Go back to nap/sleep/winkle mode again if (b) is true. + */ + rlwinm. r11,r12,47-31,30,31 /* Was it in power saving mode? */ + beq 4f /* No, it wasn;t */ + /* Thread was in power saving mode. Go back to nap again. */ + cmpwi r11,2 + blt 3f + /* Supervisor/Hypervisor state loss */ + li r0,1 + stb r0,PACA_NAPSTATELOST(r13) +3: bl machine_check_queue_event + MACHINE_CHECK_HANDLER_WINDUP + GET_PACA(r13) + ld r1,PACAR1(r13) + /* + * Check what idle state this CPU was in and go back to same mode + * again. + */ + lbz r3,PACA_THREAD_IDLE_STATE(r13) + cmpwi r3,PNV_THREAD_NAP + bgt 10f + IDLE_STATE_ENTER_SEQ(PPC_NAP) + /* No return */ +10: + cmpwi r3,PNV_THREAD_SLEEP + bgt 2f + IDLE_STATE_ENTER_SEQ(PPC_SLEEP) + /* No return */ -BEGIN_FTR_SECTION - b denorm_done -END_FTR_SECTION_IFCLR(CPU_FTR_ARCH_207S) -/* - * To denormalise we need to move a copy of the register to itself. - * For POWER8 we need to do that for all 64 VSX registers - */ - XVCPSGNDP32(32) -denorm_done: - mtspr SPRN_HSRR0,r11 - mtcrf 0x80,r9 - ld r9,PACA_EXGEN+EX_R9(r13) - RESTORE_PPR_PACA(PACA_EXGEN, r10) -BEGIN_FTR_SECTION - ld r10,PACA_EXGEN+EX_CFAR(r13) - mtspr SPRN_CFAR,r10 -END_FTR_SECTION_IFSET(CPU_FTR_CFAR) - ld r10,PACA_EXGEN+EX_R10(r13) - ld r11,PACA_EXGEN+EX_R11(r13) - ld r12,PACA_EXGEN+EX_R12(r13) - ld r13,PACA_EXGEN+EX_R13(r13) - HRFID - b . +2: + /* + * Go back to winkle. Please note that this thread was woken up in + * machine check from winkle and have not restored the per-subcore + * state. Hence before going back to winkle, set last bit of HSPGR0 + * to 1. This will make sure that if this thread gets woken up + * again at reset vector 0x100 then it will get chance to restore + * the subcore state. + */ + ori r13,r13,1 + SET_PACA(r13) + IDLE_STATE_ENTER_SEQ(PPC_WINKLE) + /* No return */ +4: #endif - - .align 7 - /* moved from 0xe00 */ - STD_EXCEPTION_HV_OOL(0xe02, h_data_storage) - KVM_HANDLER_SKIP(PACA_EXGEN, EXC_HV, 0xe02) - STD_EXCEPTION_HV_OOL(0xe22, h_instr_storage) - KVM_HANDLER(PACA_EXGEN, EXC_HV, 0xe22) - STD_EXCEPTION_HV_OOL(0xe42, emulation_assist) - KVM_HANDLER(PACA_EXGEN, EXC_HV, 0xe42) - MASKABLE_EXCEPTION_HV_OOL(0xe62, hmi_exception) - KVM_HANDLER(PACA_EXGEN, EXC_HV, 0xe62) - - MASKABLE_EXCEPTION_HV_OOL(0xe82, h_doorbell) - KVM_HANDLER(PACA_EXGEN, EXC_HV, 0xe82) - - MASKABLE_EXCEPTION_HV_OOL(0xea2, h_virt_irq) - KVM_HANDLER(PACA_EXGEN, EXC_HV, 0xea2) - - /* moved from 0xf00 */ - STD_EXCEPTION_PSERIES_OOL(0xf00, performance_monitor) - KVM_HANDLER(PACA_EXGEN, EXC_STD, 0xf00) - STD_EXCEPTION_PSERIES_OOL(0xf20, altivec_unavailable) - KVM_HANDLER(PACA_EXGEN, EXC_STD, 0xf20) - STD_EXCEPTION_PSERIES_OOL(0xf40, vsx_unavailable) - KVM_HANDLER(PACA_EXGEN, EXC_STD, 0xf40) - STD_EXCEPTION_PSERIES_OOL(0xf60, facility_unavailable) - KVM_HANDLER(PACA_EXGEN, EXC_STD, 0xf60) - STD_EXCEPTION_HV_OOL(0xf82, facility_unavailable) - KVM_HANDLER(PACA_EXGEN, EXC_HV, 0xf82) - -/* - * An interrupt came in while soft-disabled. We set paca->irq_happened, then: - * - If it was a decrementer interrupt, we bump the dec to max and and return. - * - If it was a doorbell we return immediately since doorbells are edge - * triggered and won't automatically refire. - * - If it was a HMI we return immediately since we handled it in realmode - * and it won't refire. - * - else we hard disable and return. - * This is called with r10 containing the value to OR to the paca field. - */ -#define MASKED_INTERRUPT(_H) \ -masked_##_H##interrupt: \ - std r11,PACA_EXGEN+EX_R11(r13); \ - lbz r11,PACAIRQHAPPENED(r13); \ - or r11,r11,r10; \ - stb r11,PACAIRQHAPPENED(r13); \ - cmpwi r10,PACA_IRQ_DEC; \ - bne 1f; \ - lis r10,0x7fff; \ - ori r10,r10,0xffff; \ - mtspr SPRN_DEC,r10; \ - b 2f; \ -1: cmpwi r10,PACA_IRQ_DBELL; \ - beq 2f; \ - cmpwi r10,PACA_IRQ_HMI; \ - beq 2f; \ - mfspr r10,SPRN_##_H##SRR1; \ - rldicl r10,r10,48,1; /* clear MSR_EE */ \ - rotldi r10,r10,16; \ - mtspr SPRN_##_H##SRR1,r10; \ -2: mtcrf 0x80,r9; \ - ld r9,PACA_EXGEN+EX_R9(r13); \ - ld r10,PACA_EXGEN+EX_R10(r13); \ - ld r11,PACA_EXGEN+EX_R11(r13); \ - GET_SCRATCH0(r13); \ - ##_H##rfid; \ - b . - - MASKED_INTERRUPT() - MASKED_INTERRUPT(H) - -/* - * Called from arch_local_irq_enable when an interrupt needs - * to be resent. r3 contains 0x500, 0x900, 0xa00 or 0xe80 to indicate - * which kind of interrupt. MSR:EE is already off. We generate a - * stackframe like if a real interrupt had happened. - * - * Note: While MSR:EE is off, we need to make sure that _MSR - * in the generated frame has EE set to 1 or the exception - * handler will not properly re-enable them. - */ -_GLOBAL(__replay_interrupt) - /* We are going to jump to the exception common code which - * will retrieve various register values from the PACA which - * we don't give a damn about, so we don't bother storing them. + /* + * Check if we are coming from hypervisor userspace. If yes then we + * continue in host kernel in V mode to deliver the MC event. */ - mfmsr r12 - mflr r11 - mfcr r9 - ori r12,r12,MSR_EE - cmpwi r3,0x900 - beq decrementer_common - cmpwi r3,0x500 - beq hardware_interrupt_common -BEGIN_FTR_SECTION - cmpwi r3,0xe80 - beq h_doorbell_common - cmpwi r3,0xea0 - beq h_virt_irq_common - cmpwi r3,0xe60 - beq hmi_exception_common -FTR_SECTION_ELSE - cmpwi r3,0xa00 - beq doorbell_super_common -ALT_FTR_SECTION_END_IFSET(CPU_FTR_HVMODE) - blr - -#ifdef CONFIG_PPC_PSERIES -/* - * Vectors for the FWNMI option. Share common code. - */ - .globl system_reset_fwnmi - .align 7 -system_reset_fwnmi: - SET_SCRATCH0(r13) /* save r13 */ - EXCEPTION_PROLOG_PSERIES(PACA_EXGEN, system_reset_common, EXC_STD, - NOTEST, 0x100) - -#endif /* CONFIG_PPC_PSERIES */ + rldicl. r11,r12,4,63 /* See if MC hit while in HV mode. */ + beq 5f + andi. r11,r12,MSR_PR /* See if coming from user. */ + bne 9f /* continue in V mode if we are. */ +5: #ifdef CONFIG_KVM_BOOK3S_64_HANDLER -kvmppc_skip_interrupt: /* - * Here all GPRs are unchanged from when the interrupt happened - * except for r13, which is saved in SPRG_SCRATCH0. + * We are coming from kernel context. Check if we are coming from + * guest. if yes, then we can continue. We will fall through + * do_kvm_200->kvmppc_interrupt to deliver the MC event to guest. */ - mfspr r13, SPRN_SRR0 - addi r13, r13, 4 - mtspr SPRN_SRR0, r13 - GET_SCRATCH0(r13) + lbz r11,HSTATE_IN_GUEST(r13) + cmpwi r11,0 /* Check if coming from guest */ + bne 9f /* continue if we are. */ +#endif + /* + * At this point we are not sure about what context we come from. + * Queue up the MCE event and return from the interrupt. + * But before that, check if this is an un-recoverable exception. + * If yes, then stay on emergency stack and panic. + */ + andi. r11,r12,MSR_RI + bne 2f +1: mfspr r11,SPRN_SRR0 + LOAD_HANDLER(r10,unrecover_mce) + mtspr SPRN_SRR0,r10 + ld r10,PACAKMSR(r13) + /* + * We are going down. But there are chances that we might get hit by + * another MCE during panic path and we may run into unstable state + * with no way out. Hence, turn ME bit off while going down, so that + * when another MCE is hit during panic path, system will checkstop + * and hypervisor will get restarted cleanly by SP. + */ + li r3,MSR_ME + andc r10,r10,r3 /* Turn off MSR_ME */ + mtspr SPRN_SRR1,r10 rfid b . - -kvmppc_skip_Hinterrupt: +2: /* - * Here all GPRs are unchanged from when the interrupt happened - * except for r13, which is saved in SPRG_SCRATCH0. + * Check if we have successfully handled/recovered from error, if not + * then stay on emergency stack and panic. */ - mfspr r13, SPRN_HSRR0 - addi r13, r13, 4 - mtspr SPRN_HSRR0, r13 - GET_SCRATCH0(r13) - hrfid - b . -#endif + ld r3,RESULT(r1) /* Load result */ + cmpdi r3,0 /* see if we handled MCE successfully */ -/* - * Ensure that any handlers that get invoked from the exception prologs - * above are below the first 64KB (0x10000) of the kernel image because - * the prologs assemble the addresses of these handlers using the - * LOAD_HANDLER macro, which uses an ori instruction. - */ + beq 1b /* if !handled then panic */ + /* + * Return from MC interrupt. + * Queue up the MCE event so that we can log it later, while + * returning from kernel or opal call. + */ + bl machine_check_queue_event + MACHINE_CHECK_HANDLER_WINDUP + rfid +9: + /* Deliver the machine check to host kernel in V mode. */ + MACHINE_CHECK_HANDLER_WINDUP + b machine_check_pSeries -/*** Common interrupt handlers ***/ +EXC_COMMON_BEGIN(unrecover_mce) + /* Invoke machine_check_exception to print MCE event and panic. */ + addi r3,r1,STACK_FRAME_OVERHEAD + bl machine_check_exception + /* + * We will not reach here. Even if we did, there is no way out. Call + * unrecoverable_exception and die. + */ +1: addi r3,r1,STACK_FRAME_OVERHEAD + bl unrecoverable_exception + b 1b - STD_EXCEPTION_COMMON(0x100, system_reset, system_reset_exception) - STD_EXCEPTION_COMMON_ASYNC(0x500, hardware_interrupt, do_IRQ) - STD_EXCEPTION_COMMON_ASYNC(0x900, decrementer, timer_interrupt) - STD_EXCEPTION_COMMON(0x980, hdecrementer, hdec_interrupt) -#ifdef CONFIG_PPC_DOORBELL - STD_EXCEPTION_COMMON_ASYNC(0xa00, doorbell_super, doorbell_exception) -#else - STD_EXCEPTION_COMMON_ASYNC(0xa00, doorbell_super, unknown_exception) -#endif - STD_EXCEPTION_COMMON(0xb00, trap_0b, unknown_exception) - STD_EXCEPTION_COMMON(0xd00, single_step, single_step_exception) - STD_EXCEPTION_COMMON(0xe00, trap_0e, unknown_exception) - STD_EXCEPTION_COMMON(0xe40, emulation_assist, emulation_assist_interrupt) - STD_EXCEPTION_COMMON_ASYNC(0xe60, hmi_exception, handle_hmi_exception) -#ifdef CONFIG_PPC_DOORBELL - STD_EXCEPTION_COMMON_ASYNC(0xe80, h_doorbell, doorbell_exception) -#else - STD_EXCEPTION_COMMON_ASYNC(0xe80, h_doorbell, unknown_exception) -#endif - STD_EXCEPTION_COMMON_ASYNC(0xea0, h_virt_irq, do_IRQ) - STD_EXCEPTION_COMMON_ASYNC(0xf00, performance_monitor, performance_monitor_exception) - STD_EXCEPTION_COMMON(0x1300, instruction_breakpoint, instruction_breakpoint_exception) - STD_EXCEPTION_COMMON(0x1502, denorm, unknown_exception) -#ifdef CONFIG_ALTIVEC - STD_EXCEPTION_COMMON(0x1700, altivec_assist, altivec_assist_exception) -#else - STD_EXCEPTION_COMMON(0x1700, altivec_assist, unknown_exception) -#endif +EXC_REAL(data_access, 0x300, 0x380) +EXC_VIRT(data_access, 0x4300, 0x4380, 0x300) +TRAMP_KVM_SKIP(PACA_EXGEN, 0x300) +EXC_COMMON_BEGIN(data_access_common) /* - * Relocation-on interrupts: A subset of the interrupts can be delivered - * with IR=1/DR=1, if AIL==2 and MSR.HV won't be changed by delivering - * it. Addresses are the same as the original interrupt addresses, but - * offset by 0xc000000000004000. - * It's impossible to receive interrupts below 0x300 via this mechanism. - * KVM: None of these traps are from the guest ; anything that escalated - * to HV=1 from HV=0 is delivered via real mode handlers. + * Here r13 points to the paca, r9 contains the saved CR, + * SRR0 and SRR1 are saved in r11 and r12, + * r9 - r13 are saved in paca->exgen. */ + mfspr r10,SPRN_DAR + std r10,PACA_EXGEN+EX_DAR(r13) + mfspr r10,SPRN_DSISR + stw r10,PACA_EXGEN+EX_DSISR(r13) + EXCEPTION_PROLOG_COMMON(0x300, PACA_EXGEN) + RECONCILE_IRQ_STATE(r10, r11) + ld r12,_MSR(r1) + ld r3,PACA_EXGEN+EX_DAR(r13) + lwz r4,PACA_EXGEN+EX_DSISR(r13) + li r5,0x300 + std r3,_DAR(r1) + std r4,_DSISR(r1) +BEGIN_MMU_FTR_SECTION + b do_hash_page /* Try to handle as hpte fault */ +MMU_FTR_SECTION_ELSE + b handle_page_fault +ALT_MMU_FTR_SECTION_END_IFCLR(MMU_FTR_TYPE_RADIX) - /* - * This uses the standard macro, since the original 0x300 vector - * only has extra guff for STAB-based processors -- which never - * come here. - */ - STD_RELON_EXCEPTION_PSERIES(0x4300, 0x300, data_access) - . = 0x4380 - .globl data_access_slb_relon_pSeries -data_access_slb_relon_pSeries: + +EXC_REAL_BEGIN(data_access_slb, 0x380, 0x400) SET_SCRATCH0(r13) EXCEPTION_PROLOG_0(PACA_EXSLB) - EXCEPTION_PROLOG_1(PACA_EXSLB, NOTEST, 0x380) + EXCEPTION_PROLOG_1(PACA_EXSLB, KVMTEST_PR, 0x380) std r3,PACA_EXSLB+EX_R3(r13) mfspr r3,SPRN_DAR mfspr r12,SPRN_SRR1 + crset 4*cr6+eq #ifndef CONFIG_RELOCATABLE b slb_miss_realmode #else @@ -791,217 +514,221 @@ data_access_slb_relon_pSeries: * the kernel ends up being put. */ mfctr r11 - ld r10,PACAKBASE(r13) LOAD_HANDLER(r10, slb_miss_realmode) mtctr r10 bctr #endif +EXC_REAL_END(data_access_slb, 0x380, 0x400) - STD_RELON_EXCEPTION_PSERIES(0x4400, 0x400, instruction_access) - . = 0x4480 - .globl instruction_access_slb_relon_pSeries -instruction_access_slb_relon_pSeries: +EXC_VIRT_BEGIN(data_access_slb, 0x4380, 0x4400) SET_SCRATCH0(r13) EXCEPTION_PROLOG_0(PACA_EXSLB) - EXCEPTION_PROLOG_1(PACA_EXSLB, NOTEST, 0x480) + EXCEPTION_PROLOG_1(PACA_EXSLB, NOTEST, 0x380) std r3,PACA_EXSLB+EX_R3(r13) - mfspr r3,SPRN_SRR0 /* SRR0 is faulting address */ + mfspr r3,SPRN_DAR mfspr r12,SPRN_SRR1 + crset 4*cr6+eq #ifndef CONFIG_RELOCATABLE b slb_miss_realmode #else + /* + * We can't just use a direct branch to slb_miss_realmode + * because the distance from here to there depends on where + * the kernel ends up being put. + */ mfctr r11 - ld r10,PACAKBASE(r13) LOAD_HANDLER(r10, slb_miss_realmode) mtctr r10 bctr #endif +EXC_VIRT_END(data_access_slb, 0x4380, 0x4400) +TRAMP_KVM_SKIP(PACA_EXSLB, 0x380) - . = 0x4500 - .globl hardware_interrupt_relon_pSeries; - .globl hardware_interrupt_relon_hv; -hardware_interrupt_relon_pSeries: -hardware_interrupt_relon_hv: - BEGIN_FTR_SECTION - _MASKABLE_RELON_EXCEPTION_PSERIES(0x502, hardware_interrupt, EXC_HV, SOFTEN_TEST_HV) - FTR_SECTION_ELSE - _MASKABLE_RELON_EXCEPTION_PSERIES(0x500, hardware_interrupt, EXC_STD, SOFTEN_TEST_PR) - ALT_FTR_SECTION_END_IFSET(CPU_FTR_HVMODE) - STD_RELON_EXCEPTION_PSERIES(0x4600, 0x600, alignment) - STD_RELON_EXCEPTION_PSERIES(0x4700, 0x700, program_check) - STD_RELON_EXCEPTION_PSERIES(0x4800, 0x800, fp_unavailable) - MASKABLE_RELON_EXCEPTION_PSERIES(0x4900, 0x900, decrementer) - STD_RELON_EXCEPTION_HV(0x4980, 0x982, hdecrementer) - MASKABLE_RELON_EXCEPTION_PSERIES(0x4a00, 0xa00, doorbell_super) - STD_RELON_EXCEPTION_PSERIES(0x4b00, 0xb00, trap_0b) - - . = 0x4c00 - .globl system_call_relon_pSeries -system_call_relon_pSeries: - HMT_MEDIUM - SYSCALL_PSERIES_1 - SYSCALL_PSERIES_2_DIRECT - SYSCALL_PSERIES_3 - STD_RELON_EXCEPTION_PSERIES(0x4d00, 0xd00, single_step) +EXC_REAL(instruction_access, 0x400, 0x480) +EXC_VIRT(instruction_access, 0x4400, 0x4480, 0x400) +TRAMP_KVM(PACA_EXGEN, 0x400) - . = 0x4e00 - b . /* Can't happen, see v2.07 Book III-S section 6.5 */ +EXC_COMMON_BEGIN(instruction_access_common) + EXCEPTION_PROLOG_COMMON(0x400, PACA_EXGEN) + RECONCILE_IRQ_STATE(r10, r11) + ld r12,_MSR(r1) + ld r3,_NIP(r1) + andis. r4,r12,0x5820 + li r5,0x400 + std r3,_DAR(r1) + std r4,_DSISR(r1) +BEGIN_MMU_FTR_SECTION + b do_hash_page /* Try to handle as hpte fault */ +MMU_FTR_SECTION_ELSE + b handle_page_fault +ALT_MMU_FTR_SECTION_END_IFCLR(MMU_FTR_TYPE_RADIX) - . = 0x4e20 - b . /* Can't happen, see v2.07 Book III-S section 6.5 */ - . = 0x4e40 -emulation_assist_relon_trampoline: +EXC_REAL_BEGIN(instruction_access_slb, 0x480, 0x500) SET_SCRATCH0(r13) - EXCEPTION_PROLOG_0(PACA_EXGEN) - b emulation_assist_relon_hv - - . = 0x4e60 - b . /* Can't happen, see v2.07 Book III-S section 6.5 */ + EXCEPTION_PROLOG_0(PACA_EXSLB) + EXCEPTION_PROLOG_1(PACA_EXSLB, KVMTEST_PR, 0x480) + std r3,PACA_EXSLB+EX_R3(r13) + mfspr r3,SPRN_SRR0 /* SRR0 is faulting address */ + mfspr r12,SPRN_SRR1 + crclr 4*cr6+eq +#ifndef CONFIG_RELOCATABLE + b slb_miss_realmode +#else + mfctr r11 + LOAD_HANDLER(r10, slb_miss_realmode) + mtctr r10 + bctr +#endif +EXC_REAL_END(instruction_access_slb, 0x480, 0x500) - . = 0x4e80 -h_doorbell_relon_trampoline: +EXC_VIRT_BEGIN(instruction_access_slb, 0x4480, 0x4500) SET_SCRATCH0(r13) - EXCEPTION_PROLOG_0(PACA_EXGEN) - b h_doorbell_relon_hv + EXCEPTION_PROLOG_0(PACA_EXSLB) + EXCEPTION_PROLOG_1(PACA_EXSLB, NOTEST, 0x480) + std r3,PACA_EXSLB+EX_R3(r13) + mfspr r3,SPRN_SRR0 /* SRR0 is faulting address */ + mfspr r12,SPRN_SRR1 + crclr 4*cr6+eq +#ifndef CONFIG_RELOCATABLE + b slb_miss_realmode +#else + mfctr r11 + LOAD_HANDLER(r10, slb_miss_realmode) + mtctr r10 + bctr +#endif +EXC_VIRT_END(instruction_access_slb, 0x4480, 0x4500) +TRAMP_KVM(PACA_EXSLB, 0x480) - . = 0x4ea0 -h_virt_irq_relon_trampoline: - SET_SCRATCH0(r13) - EXCEPTION_PROLOG_0(PACA_EXGEN) - b h_virt_irq_relon_hv - . = 0x4f00 -performance_monitor_relon_pseries_trampoline: - SET_SCRATCH0(r13) - EXCEPTION_PROLOG_0(PACA_EXGEN) - b performance_monitor_relon_pSeries +/* This handler is used by both 0x380 and 0x480 slb miss interrupts */ +EXC_COMMON_BEGIN(slb_miss_realmode) + /* + * r13 points to the PACA, r9 contains the saved CR, + * r12 contain the saved SRR1, SRR0 is still ready for return + * r3 has the faulting address + * r9 - r13 are saved in paca->exslb. + * r3 is saved in paca->slb_r3 + * cr6.eq is set for a D-SLB miss, clear for a I-SLB miss + * We assume we aren't going to take any exceptions during this + * procedure. + */ + mflr r10 +#ifdef CONFIG_RELOCATABLE + mtctr r11 +#endif - . = 0x4f20 -altivec_unavailable_relon_pseries_trampoline: - SET_SCRATCH0(r13) - EXCEPTION_PROLOG_0(PACA_EXGEN) - b altivec_unavailable_relon_pSeries + stw r9,PACA_EXSLB+EX_CCR(r13) /* save CR in exc. frame */ + std r10,PACA_EXSLB+EX_LR(r13) /* save LR */ + std r3,PACA_EXSLB+EX_DAR(r13) - . = 0x4f40 -vsx_unavailable_relon_pseries_trampoline: - SET_SCRATCH0(r13) - EXCEPTION_PROLOG_0(PACA_EXGEN) - b vsx_unavailable_relon_pSeries + crset 4*cr0+eq +#ifdef CONFIG_PPC_STD_MMU_64 +BEGIN_MMU_FTR_SECTION + bl slb_allocate_realmode +END_MMU_FTR_SECTION_IFCLR(MMU_FTR_TYPE_RADIX) +#endif - . = 0x4f60 -facility_unavailable_relon_trampoline: - SET_SCRATCH0(r13) - EXCEPTION_PROLOG_0(PACA_EXGEN) - b facility_unavailable_relon_pSeries + ld r10,PACA_EXSLB+EX_LR(r13) + ld r3,PACA_EXSLB+EX_R3(r13) + lwz r9,PACA_EXSLB+EX_CCR(r13) /* get saved CR */ + mtlr r10 - . = 0x4f80 -hv_facility_unavailable_relon_trampoline: - SET_SCRATCH0(r13) - EXCEPTION_PROLOG_0(PACA_EXGEN) - b hv_facility_unavailable_relon_hv + beq 8f /* if bad address, make full stack frame */ - STD_RELON_EXCEPTION_PSERIES(0x5300, 0x1300, instruction_breakpoint) -#ifdef CONFIG_PPC_DENORMALISATION - . = 0x5500 - b denorm_exception_hv -#endif - STD_RELON_EXCEPTION_PSERIES(0x5700, 0x1700, altivec_assist) + andi. r10,r12,MSR_RI /* check for unrecoverable exception */ + beq- 2f - .align 7 -system_call_entry: - b system_call_common + /* All done -- return from exception. */ -ppc64_runlatch_on_trampoline: - b __ppc64_runlatch_on +.machine push +.machine "power4" + mtcrf 0x80,r9 + mtcrf 0x02,r9 /* I/D indication is in cr6 */ + mtcrf 0x01,r9 /* slb_allocate uses cr0 and cr7 */ +.machine pop -/* - * Here r13 points to the paca, r9 contains the saved CR, - * SRR0 and SRR1 are saved in r11 and r12, - * r9 - r13 are saved in paca->exgen. - */ - .align 7 - .globl data_access_common -data_access_common: - mfspr r10,SPRN_DAR - std r10,PACA_EXGEN+EX_DAR(r13) - mfspr r10,SPRN_DSISR - stw r10,PACA_EXGEN+EX_DSISR(r13) - EXCEPTION_PROLOG_COMMON(0x300, PACA_EXGEN) - RECONCILE_IRQ_STATE(r10, r11) - ld r12,_MSR(r1) - ld r3,PACA_EXGEN+EX_DAR(r13) - lwz r4,PACA_EXGEN+EX_DSISR(r13) - li r5,0x300 - std r3,_DAR(r1) - std r4,_DSISR(r1) -BEGIN_MMU_FTR_SECTION - b do_hash_page /* Try to handle as hpte fault */ -MMU_FTR_SECTION_ELSE - b handle_page_fault -ALT_MMU_FTR_SECTION_END_IFCLR(MMU_FTR_TYPE_RADIX) + RESTORE_PPR_PACA(PACA_EXSLB, r9) + ld r9,PACA_EXSLB+EX_R9(r13) + ld r10,PACA_EXSLB+EX_R10(r13) + ld r11,PACA_EXSLB+EX_R11(r13) + ld r12,PACA_EXSLB+EX_R12(r13) + ld r13,PACA_EXSLB+EX_R13(r13) + rfid + b . /* prevent speculative execution */ - .align 7 - .globl h_data_storage_common -h_data_storage_common: - mfspr r10,SPRN_HDAR - std r10,PACA_EXGEN+EX_DAR(r13) - mfspr r10,SPRN_HDSISR - stw r10,PACA_EXGEN+EX_DSISR(r13) - EXCEPTION_PROLOG_COMMON(0xe00, PACA_EXGEN) - bl save_nvgprs +2: mfspr r11,SPRN_SRR0 + LOAD_HANDLER(r10,unrecov_slb) + mtspr SPRN_SRR0,r10 + ld r10,PACAKMSR(r13) + mtspr SPRN_SRR1,r10 + rfid + b . + +8: mfspr r11,SPRN_SRR0 + LOAD_HANDLER(r10,bad_addr_slb) + mtspr SPRN_SRR0,r10 + ld r10,PACAKMSR(r13) + mtspr SPRN_SRR1,r10 + rfid + b . + +EXC_COMMON_BEGIN(unrecov_slb) + EXCEPTION_PROLOG_COMMON(0x4100, PACA_EXSLB) RECONCILE_IRQ_STATE(r10, r11) - addi r3,r1,STACK_FRAME_OVERHEAD - bl unknown_exception - b ret_from_except + bl save_nvgprs +1: addi r3,r1,STACK_FRAME_OVERHEAD + bl unrecoverable_exception + b 1b - .align 7 - .globl instruction_access_common -instruction_access_common: - EXCEPTION_PROLOG_COMMON(0x400, PACA_EXGEN) +EXC_COMMON_BEGIN(bad_addr_slb) + EXCEPTION_PROLOG_COMMON(0x380, PACA_EXSLB) RECONCILE_IRQ_STATE(r10, r11) - ld r12,_MSR(r1) - ld r3,_NIP(r1) - andis. r4,r12,0x5820 - li r5,0x400 - std r3,_DAR(r1) - std r4,_DSISR(r1) -BEGIN_MMU_FTR_SECTION - b do_hash_page /* Try to handle as hpte fault */ -MMU_FTR_SECTION_ELSE - b handle_page_fault -ALT_MMU_FTR_SECTION_END_IFCLR(MMU_FTR_TYPE_RADIX) + ld r3, PACA_EXSLB+EX_DAR(r13) + std r3, _DAR(r1) + beq cr6, 2f + li r10, 0x480 /* fix trap number for I-SLB miss */ + std r10, _TRAP(r1) +2: bl save_nvgprs + addi r3, r1, STACK_FRAME_OVERHEAD + bl slb_miss_bad_addr + b ret_from_except - STD_EXCEPTION_COMMON(0xe20, h_instr_storage, unknown_exception) +EXC_REAL_BEGIN(hardware_interrupt, 0x500, 0x600) + .globl hardware_interrupt_hv; +hardware_interrupt_hv: + BEGIN_FTR_SECTION + _MASKABLE_EXCEPTION_PSERIES(0x500, hardware_interrupt_common, + EXC_HV, SOFTEN_TEST_HV) +do_kvm_H0x500: + KVM_HANDLER(PACA_EXGEN, EXC_HV, 0x502) + FTR_SECTION_ELSE + _MASKABLE_EXCEPTION_PSERIES(0x500, hardware_interrupt_common, + EXC_STD, SOFTEN_TEST_PR) +do_kvm_0x500: + KVM_HANDLER(PACA_EXGEN, EXC_STD, 0x500) + ALT_FTR_SECTION_END_IFSET(CPU_FTR_HVMODE | CPU_FTR_ARCH_206) +EXC_REAL_END(hardware_interrupt, 0x500, 0x600) - /* - * Machine check is different because we use a different - * save area: PACA_EXMC instead of PACA_EXGEN. - */ - .align 7 - .globl machine_check_common -machine_check_common: +EXC_VIRT_BEGIN(hardware_interrupt, 0x4500, 0x4600) + .globl hardware_interrupt_relon_hv; +hardware_interrupt_relon_hv: + BEGIN_FTR_SECTION + _MASKABLE_RELON_EXCEPTION_PSERIES(0x500, hardware_interrupt_common, EXC_HV, SOFTEN_TEST_HV) + FTR_SECTION_ELSE + _MASKABLE_RELON_EXCEPTION_PSERIES(0x500, hardware_interrupt_common, EXC_STD, SOFTEN_TEST_PR) + ALT_FTR_SECTION_END_IFSET(CPU_FTR_HVMODE) +EXC_VIRT_END(hardware_interrupt, 0x4500, 0x4600) - mfspr r10,SPRN_DAR - std r10,PACA_EXGEN+EX_DAR(r13) - mfspr r10,SPRN_DSISR - stw r10,PACA_EXGEN+EX_DSISR(r13) - EXCEPTION_PROLOG_COMMON(0x200, PACA_EXMC) - FINISH_NAP - RECONCILE_IRQ_STATE(r10, r11) - ld r3,PACA_EXGEN+EX_DAR(r13) - lwz r4,PACA_EXGEN+EX_DSISR(r13) - std r3,_DAR(r1) - std r4,_DSISR(r1) - bl save_nvgprs - addi r3,r1,STACK_FRAME_OVERHEAD - bl machine_check_exception - b ret_from_except +EXC_COMMON_ASYNC(hardware_interrupt_common, 0x500, do_IRQ) - .align 7 - .globl alignment_common -alignment_common: + +EXC_REAL(alignment, 0x600, 0x700) +EXC_VIRT(alignment, 0x4600, 0x4700, 0x600) +TRAMP_KVM(PACA_EXGEN, 0x600) +EXC_COMMON_BEGIN(alignment_common) mfspr r10,SPRN_DAR std r10,PACA_EXGEN+EX_DAR(r13) mfspr r10,SPRN_DSISR @@ -1017,9 +744,11 @@ alignment_common: bl alignment_exception b ret_from_except - .align 7 - .globl program_check_common -program_check_common: + +EXC_REAL(program_check, 0x700, 0x800) +EXC_VIRT(program_check, 0x4700, 0x4800, 0x700) +TRAMP_KVM(PACA_EXGEN, 0x700) +EXC_COMMON_BEGIN(program_check_common) EXCEPTION_PROLOG_COMMON(0x700, PACA_EXGEN) bl save_nvgprs RECONCILE_IRQ_STATE(r10, r11) @@ -1027,9 +756,11 @@ program_check_common: bl program_check_exception b ret_from_except - .align 7 - .globl fp_unavailable_common -fp_unavailable_common: + +EXC_REAL(fp_unavailable, 0x800, 0x900) +EXC_VIRT(fp_unavailable, 0x4800, 0x4900, 0x800) +TRAMP_KVM(PACA_EXGEN, 0x800) +EXC_COMMON_BEGIN(fp_unavailable_common) EXCEPTION_PROLOG_COMMON(0x800, PACA_EXGEN) bne 1f /* if from user, just load it up */ bl save_nvgprs @@ -1057,9 +788,250 @@ END_FTR_SECTION_IFSET(CPU_FTR_TM) bl fp_unavailable_tm b ret_from_except #endif - .align 7 - .globl altivec_unavailable_common -altivec_unavailable_common: + + +EXC_REAL_MASKABLE(decrementer, 0x900, 0x980) +EXC_VIRT_MASKABLE(decrementer, 0x4900, 0x4980, 0x900) +TRAMP_KVM(PACA_EXGEN, 0x900) +EXC_COMMON_ASYNC(decrementer_common, 0x900, timer_interrupt) + + +EXC_REAL_HV(hdecrementer, 0x980, 0xa00) +EXC_VIRT_HV(hdecrementer, 0x4980, 0x4a00, 0x980) +TRAMP_KVM_HV(PACA_EXGEN, 0x980) +EXC_COMMON(hdecrementer_common, 0x980, hdec_interrupt) + + +EXC_REAL_MASKABLE(doorbell_super, 0xa00, 0xb00) +EXC_VIRT_MASKABLE(doorbell_super, 0x4a00, 0x4b00, 0xa00) +TRAMP_KVM(PACA_EXGEN, 0xa00) +#ifdef CONFIG_PPC_DOORBELL +EXC_COMMON_ASYNC(doorbell_super_common, 0xa00, doorbell_exception) +#else +EXC_COMMON_ASYNC(doorbell_super_common, 0xa00, unknown_exception) +#endif + + +EXC_REAL(trap_0b, 0xb00, 0xc00) +EXC_VIRT(trap_0b, 0x4b00, 0x4c00, 0xb00) +TRAMP_KVM(PACA_EXGEN, 0xb00) +EXC_COMMON(trap_0b_common, 0xb00, unknown_exception) + + +#define LOAD_SYSCALL_HANDLER(reg) \ + ld reg,PACAKBASE(r13); \ + ori reg,reg,(ABS_ADDR(system_call_common))@l; + +/* Syscall routine is used twice, in reloc-off and reloc-on paths */ +#define SYSCALL_PSERIES_1 \ +BEGIN_FTR_SECTION \ + cmpdi r0,0x1ebe ; \ + beq- 1f ; \ +END_FTR_SECTION_IFSET(CPU_FTR_REAL_LE) \ + mr r9,r13 ; \ + GET_PACA(r13) ; \ + mfspr r11,SPRN_SRR0 ; \ +0: + +#define SYSCALL_PSERIES_2_RFID \ + mfspr r12,SPRN_SRR1 ; \ + LOAD_SYSCALL_HANDLER(r10) ; \ + mtspr SPRN_SRR0,r10 ; \ + ld r10,PACAKMSR(r13) ; \ + mtspr SPRN_SRR1,r10 ; \ + rfid ; \ + b . ; /* prevent speculative execution */ + +#define SYSCALL_PSERIES_3 \ + /* Fast LE/BE switch system call */ \ +1: mfspr r12,SPRN_SRR1 ; \ + xori r12,r12,MSR_LE ; \ + mtspr SPRN_SRR1,r12 ; \ + rfid ; /* return to userspace */ \ + b . ; /* prevent speculative execution */ + +#if defined(CONFIG_RELOCATABLE) + /* + * We can't branch directly so we do it via the CTR which + * is volatile across system calls. + */ +#define SYSCALL_PSERIES_2_DIRECT \ + LOAD_SYSCALL_HANDLER(r12) ; \ + mtctr r12 ; \ + mfspr r12,SPRN_SRR1 ; \ + li r10,MSR_RI ; \ + mtmsrd r10,1 ; \ + bctr ; +#else + /* We can branch directly */ +#define SYSCALL_PSERIES_2_DIRECT \ + mfspr r12,SPRN_SRR1 ; \ + li r10,MSR_RI ; \ + mtmsrd r10,1 ; /* Set RI (EE=0) */ \ + b system_call_common ; +#endif + +EXC_REAL_BEGIN(system_call, 0xc00, 0xd00) + /* + * If CONFIG_KVM_BOOK3S_64_HANDLER is set, save the PPR (on systems + * that support it) before changing to HMT_MEDIUM. That allows the KVM + * code to save that value into the guest state (it is the guest's PPR + * value). Otherwise just change to HMT_MEDIUM as userspace has + * already saved the PPR. + */ +#ifdef CONFIG_KVM_BOOK3S_64_HANDLER + SET_SCRATCH0(r13) + GET_PACA(r13) + std r9,PACA_EXGEN+EX_R9(r13) + OPT_GET_SPR(r9, SPRN_PPR, CPU_FTR_HAS_PPR); + HMT_MEDIUM; + std r10,PACA_EXGEN+EX_R10(r13) + OPT_SAVE_REG_TO_PACA(PACA_EXGEN+EX_PPR, r9, CPU_FTR_HAS_PPR); + mfcr r9 + KVMTEST_PR(0xc00) + GET_SCRATCH0(r13) +#else + HMT_MEDIUM; +#endif + SYSCALL_PSERIES_1 + SYSCALL_PSERIES_2_RFID + SYSCALL_PSERIES_3 +EXC_REAL_END(system_call, 0xc00, 0xd00) + +EXC_VIRT_BEGIN(system_call, 0x4c00, 0x4d00) + HMT_MEDIUM + SYSCALL_PSERIES_1 + SYSCALL_PSERIES_2_DIRECT + SYSCALL_PSERIES_3 +EXC_VIRT_END(system_call, 0x4c00, 0x4d00) + +TRAMP_KVM(PACA_EXGEN, 0xc00) + + +EXC_REAL(single_step, 0xd00, 0xe00) +EXC_VIRT(single_step, 0x4d00, 0x4e00, 0xd00) +TRAMP_KVM(PACA_EXGEN, 0xd00) +EXC_COMMON(single_step_common, 0xd00, single_step_exception) + +EXC_REAL_OOL_HV(h_data_storage, 0xe00, 0xe20) +EXC_VIRT_NONE(0x4e00, 0x4e20) +TRAMP_KVM_HV_SKIP(PACA_EXGEN, 0xe00) +EXC_COMMON_BEGIN(h_data_storage_common) + mfspr r10,SPRN_HDAR + std r10,PACA_EXGEN+EX_DAR(r13) + mfspr r10,SPRN_HDSISR + stw r10,PACA_EXGEN+EX_DSISR(r13) + EXCEPTION_PROLOG_COMMON(0xe00, PACA_EXGEN) + bl save_nvgprs + RECONCILE_IRQ_STATE(r10, r11) + addi r3,r1,STACK_FRAME_OVERHEAD + bl unknown_exception + b ret_from_except + + +EXC_REAL_OOL_HV(h_instr_storage, 0xe20, 0xe40) +EXC_VIRT_NONE(0x4e20, 0x4e40) +TRAMP_KVM_HV(PACA_EXGEN, 0xe20) +EXC_COMMON(h_instr_storage_common, 0xe20, unknown_exception) + + +EXC_REAL_OOL_HV(emulation_assist, 0xe40, 0xe60) +EXC_VIRT_OOL_HV(emulation_assist, 0x4e40, 0x4e60, 0xe40) +TRAMP_KVM_HV(PACA_EXGEN, 0xe40) +EXC_COMMON(emulation_assist_common, 0xe40, emulation_assist_interrupt) + + +/* + * hmi_exception trampoline is a special case. It jumps to hmi_exception_early + * first, and then eventaully from there to the trampoline to get into virtual + * mode. + */ +__EXC_REAL_OOL_HV_DIRECT(hmi_exception, 0xe60, 0xe80, hmi_exception_early) +__TRAMP_REAL_REAL_OOL_MASKABLE_HV(hmi_exception, 0xe60) +EXC_VIRT_NONE(0x4e60, 0x4e80) +TRAMP_KVM_HV(PACA_EXGEN, 0xe60) +TRAMP_REAL_BEGIN(hmi_exception_early) + EXCEPTION_PROLOG_1(PACA_EXGEN, KVMTEST_HV, 0xe60) + mr r10,r1 /* Save r1 */ + ld r1,PACAEMERGSP(r13) /* Use emergency stack */ + subi r1,r1,INT_FRAME_SIZE /* alloc stack frame */ + std r9,_CCR(r1) /* save CR in stackframe */ + mfspr r11,SPRN_HSRR0 /* Save HSRR0 */ + std r11,_NIP(r1) /* save HSRR0 in stackframe */ + mfspr r12,SPRN_HSRR1 /* Save SRR1 */ + std r12,_MSR(r1) /* save SRR1 in stackframe */ + std r10,0(r1) /* make stack chain pointer */ + std r0,GPR0(r1) /* save r0 in stackframe */ + std r10,GPR1(r1) /* save r1 in stackframe */ + EXCEPTION_PROLOG_COMMON_2(PACA_EXGEN) + EXCEPTION_PROLOG_COMMON_3(0xe60) + addi r3,r1,STACK_FRAME_OVERHEAD + bl hmi_exception_realmode + /* Windup the stack. */ + /* Move original HSRR0 and HSRR1 into the respective regs */ + ld r9,_MSR(r1) + mtspr SPRN_HSRR1,r9 + ld r3,_NIP(r1) + mtspr SPRN_HSRR0,r3 + ld r9,_CTR(r1) + mtctr r9 + ld r9,_XER(r1) + mtxer r9 + ld r9,_LINK(r1) + mtlr r9 + REST_GPR(0, r1) + REST_8GPRS(2, r1) + REST_GPR(10, r1) + ld r11,_CCR(r1) + mtcr r11 + REST_GPR(11, r1) + REST_2GPRS(12, r1) + /* restore original r1. */ + ld r1,GPR1(r1) + + /* + * Go to virtual mode and pull the HMI event information from + * firmware. + */ + .globl hmi_exception_after_realmode +hmi_exception_after_realmode: + SET_SCRATCH0(r13) + EXCEPTION_PROLOG_0(PACA_EXGEN) + b tramp_real_hmi_exception + +EXC_COMMON_ASYNC(hmi_exception_common, 0xe60, handle_hmi_exception) + + +EXC_REAL_OOL_MASKABLE_HV(h_doorbell, 0xe80, 0xea0) +EXC_VIRT_OOL_MASKABLE_HV(h_doorbell, 0x4e80, 0x4ea0, 0xe80) +TRAMP_KVM_HV(PACA_EXGEN, 0xe80) +#ifdef CONFIG_PPC_DOORBELL +EXC_COMMON_ASYNC(h_doorbell_common, 0xe80, doorbell_exception) +#else +EXC_COMMON_ASYNC(h_doorbell_common, 0xe80, unknown_exception) +#endif + + +EXC_REAL_OOL_MASKABLE_HV(h_virt_irq, 0xea0, 0xec0) +EXC_VIRT_OOL_MASKABLE_HV(h_virt_irq, 0x4ea0, 0x4ec0, 0xea0) +TRAMP_KVM_HV(PACA_EXGEN, 0xea0) +EXC_COMMON_ASYNC(h_virt_irq_common, 0xea0, do_IRQ) + + +EXC_REAL_NONE(0xec0, 0xf00) +EXC_VIRT_NONE(0x4ec0, 0x4f00) + + +EXC_REAL_OOL(performance_monitor, 0xf00, 0xf20) +EXC_VIRT_OOL(performance_monitor, 0x4f00, 0x4f20, 0xf00) +TRAMP_KVM(PACA_EXGEN, 0xf00) +EXC_COMMON_ASYNC(performance_monitor_common, 0xf00, performance_monitor_exception) + + +EXC_REAL_OOL(altivec_unavailable, 0xf20, 0xf40) +EXC_VIRT_OOL(altivec_unavailable, 0x4f20, 0x4f40, 0xf20) +TRAMP_KVM(PACA_EXGEN, 0xf20) +EXC_COMMON_BEGIN(altivec_unavailable_common) EXCEPTION_PROLOG_COMMON(0xf20, PACA_EXGEN) #ifdef CONFIG_ALTIVEC BEGIN_FTR_SECTION @@ -1092,9 +1064,11 @@ END_FTR_SECTION_IFSET(CPU_FTR_ALTIVEC) bl altivec_unavailable_exception b ret_from_except - .align 7 - .globl vsx_unavailable_common -vsx_unavailable_common: + +EXC_REAL_OOL(vsx_unavailable, 0xf40, 0xf60) +EXC_VIRT_OOL(vsx_unavailable, 0x4f40, 0x4f60, 0xf40) +TRAMP_KVM(PACA_EXGEN, 0xf40) +EXC_COMMON_BEGIN(vsx_unavailable_common) EXCEPTION_PROLOG_COMMON(0xf40, PACA_EXGEN) #ifdef CONFIG_VSX BEGIN_FTR_SECTION @@ -1126,323 +1100,284 @@ END_FTR_SECTION_IFSET(CPU_FTR_VSX) bl vsx_unavailable_exception b ret_from_except - /* Equivalents to the above handlers for relocation-on interrupt vectors */ - STD_RELON_EXCEPTION_HV_OOL(0xe40, emulation_assist) - MASKABLE_RELON_EXCEPTION_HV_OOL(0xe80, h_doorbell) - MASKABLE_RELON_EXCEPTION_HV_OOL(0xea0, h_virt_irq) - STD_RELON_EXCEPTION_PSERIES_OOL(0xf00, performance_monitor) - STD_RELON_EXCEPTION_PSERIES_OOL(0xf20, altivec_unavailable) - STD_RELON_EXCEPTION_PSERIES_OOL(0xf40, vsx_unavailable) - STD_RELON_EXCEPTION_PSERIES_OOL(0xf60, facility_unavailable) - STD_RELON_EXCEPTION_HV_OOL(0xf80, hv_facility_unavailable) +EXC_REAL_OOL(facility_unavailable, 0xf60, 0xf80) +EXC_VIRT_OOL(facility_unavailable, 0x4f60, 0x4f80, 0xf60) +TRAMP_KVM(PACA_EXGEN, 0xf60) +EXC_COMMON(facility_unavailable_common, 0xf60, facility_unavailable_exception) - /* - * The __end_interrupts marker must be past the out-of-line (OOL) - * handlers, so that they are copied to real address 0x100 when running - * a relocatable kernel. This ensures they can be reached from the short - * trampoline handlers (like 0x4f00, 0x4f20, etc.) which branch - * directly, without using LOAD_HANDLER(). - */ - .align 7 - .globl __end_interrupts -__end_interrupts: -#if defined(CONFIG_PPC_PSERIES) || defined(CONFIG_PPC_POWERNV) +EXC_REAL_OOL_HV(h_facility_unavailable, 0xf80, 0xfa0) +EXC_VIRT_OOL_HV(h_facility_unavailable, 0x4f80, 0x4fa0, 0xf80) +TRAMP_KVM_HV(PACA_EXGEN, 0xf80) +EXC_COMMON(h_facility_unavailable_common, 0xf80, facility_unavailable_exception) + + +EXC_REAL_NONE(0xfa0, 0x1200) +EXC_VIRT_NONE(0x4fa0, 0x5200) + +#ifdef CONFIG_CBE_RAS +EXC_REAL_HV(cbe_system_error, 0x1200, 0x1300) +EXC_VIRT_NONE(0x5200, 0x5300) +TRAMP_KVM_HV_SKIP(PACA_EXGEN, 0x1200) +EXC_COMMON(cbe_system_error_common, 0x1200, cbe_system_error_exception) +#else /* CONFIG_CBE_RAS */ +EXC_REAL_NONE(0x1200, 0x1300) +EXC_VIRT_NONE(0x5200, 0x5300) +#endif + + +EXC_REAL(instruction_breakpoint, 0x1300, 0x1400) +EXC_VIRT(instruction_breakpoint, 0x5300, 0x5400, 0x1300) +TRAMP_KVM_SKIP(PACA_EXGEN, 0x1300) +EXC_COMMON(instruction_breakpoint_common, 0x1300, instruction_breakpoint_exception) + +EXC_REAL_NONE(0x1400, 0x1500) +EXC_VIRT_NONE(0x5400, 0x5500) + +EXC_REAL_BEGIN(denorm_exception_hv, 0x1500, 0x1600) + mtspr SPRN_SPRG_HSCRATCH0,r13 + EXCEPTION_PROLOG_0(PACA_EXGEN) + EXCEPTION_PROLOG_1(PACA_EXGEN, NOTEST, 0x1500) + +#ifdef CONFIG_PPC_DENORMALISATION + mfspr r10,SPRN_HSRR1 + mfspr r11,SPRN_HSRR0 /* save HSRR0 */ + andis. r10,r10,(HSRR1_DENORM)@h /* denorm? */ + addi r11,r11,-4 /* HSRR0 is next instruction */ + bne+ denorm_assist +#endif + + KVMTEST_PR(0x1500) + EXCEPTION_PROLOG_PSERIES_1(denorm_common, EXC_HV) +EXC_REAL_END(denorm_exception_hv, 0x1500, 0x1600) + +#ifdef CONFIG_PPC_DENORMALISATION +EXC_VIRT_BEGIN(denorm_exception, 0x5500, 0x5600) + b exc_real_0x1500_denorm_exception_hv +EXC_VIRT_END(denorm_exception, 0x5500, 0x5600) +#else +EXC_VIRT_NONE(0x5500, 0x5600) +#endif + +TRAMP_KVM_SKIP(PACA_EXGEN, 0x1500) + +#ifdef CONFIG_PPC_DENORMALISATION +TRAMP_REAL_BEGIN(denorm_assist) +BEGIN_FTR_SECTION /* - * Data area reserved for FWNMI option. - * This address (0x7000) is fixed by the RPA. + * To denormalise we need to move a copy of the register to itself. + * For POWER6 do that here for all FP regs. */ - .= 0x7000 - .globl fwnmi_data_area -fwnmi_data_area: + mfmsr r10 + ori r10,r10,(MSR_FP|MSR_FE0|MSR_FE1) + xori r10,r10,(MSR_FE0|MSR_FE1) + mtmsrd r10 + sync - /* pseries and powernv need to keep the whole page from - * 0x7000 to 0x8000 free for use by the firmware - */ - . = 0x8000 -#endif /* defined(CONFIG_PPC_PSERIES) || defined(CONFIG_PPC_POWERNV) */ +#define FMR2(n) fmr (n), (n) ; fmr n+1, n+1 +#define FMR4(n) FMR2(n) ; FMR2(n+2) +#define FMR8(n) FMR4(n) ; FMR4(n+4) +#define FMR16(n) FMR8(n) ; FMR8(n+8) +#define FMR32(n) FMR16(n) ; FMR16(n+16) + FMR32(0) + +FTR_SECTION_ELSE +/* + * To denormalise we need to move a copy of the register to itself. + * For POWER7 do that here for the first 32 VSX registers only. + */ + mfmsr r10 + oris r10,r10,MSR_VSX@h + mtmsrd r10 + sync + +#define XVCPSGNDP2(n) XVCPSGNDP(n,n,n) ; XVCPSGNDP(n+1,n+1,n+1) +#define XVCPSGNDP4(n) XVCPSGNDP2(n) ; XVCPSGNDP2(n+2) +#define XVCPSGNDP8(n) XVCPSGNDP4(n) ; XVCPSGNDP4(n+4) +#define XVCPSGNDP16(n) XVCPSGNDP8(n) ; XVCPSGNDP8(n+8) +#define XVCPSGNDP32(n) XVCPSGNDP16(n) ; XVCPSGNDP16(n+16) + XVCPSGNDP32(0) + +ALT_FTR_SECTION_END_IFCLR(CPU_FTR_ARCH_206) + +BEGIN_FTR_SECTION + b denorm_done +END_FTR_SECTION_IFCLR(CPU_FTR_ARCH_207S) +/* + * To denormalise we need to move a copy of the register to itself. + * For POWER8 we need to do that for all 64 VSX registers + */ + XVCPSGNDP32(32) +denorm_done: + mtspr SPRN_HSRR0,r11 + mtcrf 0x80,r9 + ld r9,PACA_EXGEN+EX_R9(r13) + RESTORE_PPR_PACA(PACA_EXGEN, r10) +BEGIN_FTR_SECTION + ld r10,PACA_EXGEN+EX_CFAR(r13) + mtspr SPRN_CFAR,r10 +END_FTR_SECTION_IFSET(CPU_FTR_CFAR) + ld r10,PACA_EXGEN+EX_R10(r13) + ld r11,PACA_EXGEN+EX_R11(r13) + ld r12,PACA_EXGEN+EX_R12(r13) + ld r13,PACA_EXGEN+EX_R13(r13) + HRFID + b . +#endif + +EXC_COMMON_HV(denorm_common, 0x1500, unknown_exception) - STD_EXCEPTION_COMMON(0xf60, facility_unavailable, facility_unavailable_exception) - STD_EXCEPTION_COMMON(0xf80, hv_facility_unavailable, facility_unavailable_exception) #ifdef CONFIG_CBE_RAS - STD_EXCEPTION_COMMON(0x1200, cbe_system_error, cbe_system_error_exception) - STD_EXCEPTION_COMMON(0x1600, cbe_maintenance, cbe_maintenance_exception) - STD_EXCEPTION_COMMON(0x1800, cbe_thermal, cbe_thermal_exception) -#endif /* CONFIG_CBE_RAS */ - - .globl hmi_exception_early -hmi_exception_early: - EXCEPTION_PROLOG_1(PACA_EXGEN, KVMTEST, 0xe62) - mr r10,r1 /* Save r1 */ - ld r1,PACAEMERGSP(r13) /* Use emergency stack */ - subi r1,r1,INT_FRAME_SIZE /* alloc stack frame */ - std r9,_CCR(r1) /* save CR in stackframe */ - mfspr r11,SPRN_HSRR0 /* Save HSRR0 */ - std r11,_NIP(r1) /* save HSRR0 in stackframe */ - mfspr r12,SPRN_HSRR1 /* Save SRR1 */ - std r12,_MSR(r1) /* save SRR1 in stackframe */ - std r10,0(r1) /* make stack chain pointer */ - std r0,GPR0(r1) /* save r0 in stackframe */ - std r10,GPR1(r1) /* save r1 in stackframe */ - EXCEPTION_PROLOG_COMMON_2(PACA_EXGEN) - EXCEPTION_PROLOG_COMMON_3(0xe60) - addi r3,r1,STACK_FRAME_OVERHEAD - bl hmi_exception_realmode - /* Windup the stack. */ - /* Move original HSRR0 and HSRR1 into the respective regs */ - ld r9,_MSR(r1) - mtspr SPRN_HSRR1,r9 - ld r3,_NIP(r1) - mtspr SPRN_HSRR0,r3 - ld r9,_CTR(r1) - mtctr r9 - ld r9,_XER(r1) - mtxer r9 - ld r9,_LINK(r1) - mtlr r9 - REST_GPR(0, r1) - REST_8GPRS(2, r1) - REST_GPR(10, r1) - ld r11,_CCR(r1) - mtcr r11 - REST_GPR(11, r1) - REST_2GPRS(12, r1) - /* restore original r1. */ - ld r1,GPR1(r1) +EXC_REAL_HV(cbe_maintenance, 0x1600, 0x1700) +EXC_VIRT_NONE(0x5600, 0x5700) +TRAMP_KVM_HV_SKIP(PACA_EXGEN, 0x1600) +EXC_COMMON(cbe_maintenance_common, 0x1600, cbe_maintenance_exception) +#else /* CONFIG_CBE_RAS */ +EXC_REAL_NONE(0x1600, 0x1700) +EXC_VIRT_NONE(0x5600, 0x5700) +#endif - /* - * Go to virtual mode and pull the HMI event information from - * firmware. - */ - .globl hmi_exception_after_realmode -hmi_exception_after_realmode: - SET_SCRATCH0(r13) - EXCEPTION_PROLOG_0(PACA_EXGEN) - b hmi_exception_hv +EXC_REAL(altivec_assist, 0x1700, 0x1800) +EXC_VIRT(altivec_assist, 0x5700, 0x5800, 0x1700) +TRAMP_KVM(PACA_EXGEN, 0x1700) +#ifdef CONFIG_ALTIVEC +EXC_COMMON(altivec_assist_common, 0x1700, altivec_assist_exception) +#else +EXC_COMMON(altivec_assist_common, 0x1700, unknown_exception) +#endif -#define MACHINE_CHECK_HANDLER_WINDUP \ - /* Clear MSR_RI before setting SRR0 and SRR1. */\ - li r0,MSR_RI; \ - mfmsr r9; /* get MSR value */ \ - andc r9,r9,r0; \ - mtmsrd r9,1; /* Clear MSR_RI */ \ - /* Move original SRR0 and SRR1 into the respective regs */ \ - ld r9,_MSR(r1); \ - mtspr SPRN_SRR1,r9; \ - ld r3,_NIP(r1); \ - mtspr SPRN_SRR0,r3; \ - ld r9,_CTR(r1); \ - mtctr r9; \ - ld r9,_XER(r1); \ - mtxer r9; \ - ld r9,_LINK(r1); \ - mtlr r9; \ - REST_GPR(0, r1); \ - REST_8GPRS(2, r1); \ - REST_GPR(10, r1); \ - ld r11,_CCR(r1); \ - mtcr r11; \ - /* Decrement paca->in_mce. */ \ - lhz r12,PACA_IN_MCE(r13); \ - subi r12,r12,1; \ - sth r12,PACA_IN_MCE(r13); \ - REST_GPR(11, r1); \ - REST_2GPRS(12, r1); \ - /* restore original r1. */ \ - ld r1,GPR1(r1) - /* - * Handle machine check early in real mode. We come here with - * ME=1, MMU (IR=0 and DR=0) off and using MC emergency stack. - */ - .align 7 - .globl machine_check_handle_early -machine_check_handle_early: - std r0,GPR0(r1) /* Save r0 */ - EXCEPTION_PROLOG_COMMON_3(0x200) - bl save_nvgprs - addi r3,r1,STACK_FRAME_OVERHEAD - bl machine_check_early - std r3,RESULT(r1) /* Save result */ - ld r12,_MSR(r1) -#ifdef CONFIG_PPC_P7_NAP - /* - * Check if thread was in power saving mode. We come here when any - * of the following is true: - * a. thread wasn't in power saving mode - * b. thread was in power saving mode with no state loss or - * supervisor state loss - * - * Go back to nap again if (b) is true. - */ - rlwinm. r11,r12,47-31,30,31 /* Was it in power saving mode? */ - beq 4f /* No, it wasn;t */ - /* Thread was in power saving mode. Go back to nap again. */ - cmpwi r11,2 - bne 3f - /* Supervisor state loss */ - li r0,1 - stb r0,PACA_NAPSTATELOST(r13) -3: bl machine_check_queue_event - MACHINE_CHECK_HANDLER_WINDUP - GET_PACA(r13) - ld r1,PACAR1(r13) - li r3,PNV_THREAD_NAP - b pnv_enter_arch207_idle_mode -4: +#ifdef CONFIG_CBE_RAS +EXC_REAL_HV(cbe_thermal, 0x1800, 0x1900) +EXC_VIRT_NONE(0x5800, 0x5900) +TRAMP_KVM_HV_SKIP(PACA_EXGEN, 0x1800) +EXC_COMMON(cbe_thermal_common, 0x1800, cbe_thermal_exception) +#else /* CONFIG_CBE_RAS */ +EXC_REAL_NONE(0x1800, 0x1900) +EXC_VIRT_NONE(0x5800, 0x5900) #endif - /* - * Check if we are coming from hypervisor userspace. If yes then we - * continue in host kernel in V mode to deliver the MC event. - */ - rldicl. r11,r12,4,63 /* See if MC hit while in HV mode. */ - beq 5f - andi. r11,r12,MSR_PR /* See if coming from user. */ - bne 9f /* continue in V mode if we are. */ -5: + +/* + * An interrupt came in while soft-disabled. We set paca->irq_happened, then: + * - If it was a decrementer interrupt, we bump the dec to max and and return. + * - If it was a doorbell we return immediately since doorbells are edge + * triggered and won't automatically refire. + * - If it was a HMI we return immediately since we handled it in realmode + * and it won't refire. + * - else we hard disable and return. + * This is called with r10 containing the value to OR to the paca field. + */ +#define MASKED_INTERRUPT(_H) \ +masked_##_H##interrupt: \ + std r11,PACA_EXGEN+EX_R11(r13); \ + lbz r11,PACAIRQHAPPENED(r13); \ + or r11,r11,r10; \ + stb r11,PACAIRQHAPPENED(r13); \ + cmpwi r10,PACA_IRQ_DEC; \ + bne 1f; \ + lis r10,0x7fff; \ + ori r10,r10,0xffff; \ + mtspr SPRN_DEC,r10; \ + b 2f; \ +1: cmpwi r10,PACA_IRQ_DBELL; \ + beq 2f; \ + cmpwi r10,PACA_IRQ_HMI; \ + beq 2f; \ + mfspr r10,SPRN_##_H##SRR1; \ + rldicl r10,r10,48,1; /* clear MSR_EE */ \ + rotldi r10,r10,16; \ + mtspr SPRN_##_H##SRR1,r10; \ +2: mtcrf 0x80,r9; \ + ld r9,PACA_EXGEN+EX_R9(r13); \ + ld r10,PACA_EXGEN+EX_R10(r13); \ + ld r11,PACA_EXGEN+EX_R11(r13); \ + GET_SCRATCH0(r13); \ + ##_H##rfid; \ + b . + +/* + * Real mode exceptions actually use this too, but alternate + * instruction code patches (which end up in the common .text area) + * cannot reach these if they are put there. + */ +USE_FIXED_SECTION(virt_trampolines) + MASKED_INTERRUPT() + MASKED_INTERRUPT(H) + #ifdef CONFIG_KVM_BOOK3S_64_HANDLER +TRAMP_REAL_BEGIN(kvmppc_skip_interrupt) /* - * We are coming from kernel context. Check if we are coming from - * guest. if yes, then we can continue. We will fall through - * do_kvm_200->kvmppc_interrupt to deliver the MC event to guest. - */ - lbz r11,HSTATE_IN_GUEST(r13) - cmpwi r11,0 /* Check if coming from guest */ - bne 9f /* continue if we are. */ -#endif - /* - * At this point we are not sure about what context we come from. - * Queue up the MCE event and return from the interrupt. - * But before that, check if this is an un-recoverable exception. - * If yes, then stay on emergency stack and panic. - */ - andi. r11,r12,MSR_RI - bne 2f -1: mfspr r11,SPRN_SRR0 - ld r10,PACAKBASE(r13) - LOAD_HANDLER(r10,unrecover_mce) - mtspr SPRN_SRR0,r10 - ld r10,PACAKMSR(r13) - /* - * We are going down. But there are chances that we might get hit by - * another MCE during panic path and we may run into unstable state - * with no way out. Hence, turn ME bit off while going down, so that - * when another MCE is hit during panic path, system will checkstop - * and hypervisor will get restarted cleanly by SP. + * Here all GPRs are unchanged from when the interrupt happened + * except for r13, which is saved in SPRG_SCRATCH0. */ - li r3,MSR_ME - andc r10,r10,r3 /* Turn off MSR_ME */ - mtspr SPRN_SRR1,r10 + mfspr r13, SPRN_SRR0 + addi r13, r13, 4 + mtspr SPRN_SRR0, r13 + GET_SCRATCH0(r13) rfid b . -2: - /* - * Check if we have successfully handled/recovered from error, if not - * then stay on emergency stack and panic. - */ - ld r3,RESULT(r1) /* Load result */ - cmpdi r3,0 /* see if we handled MCE successfully */ - beq 1b /* if !handled then panic */ +TRAMP_REAL_BEGIN(kvmppc_skip_Hinterrupt) /* - * Return from MC interrupt. - * Queue up the MCE event so that we can log it later, while - * returning from kernel or opal call. + * Here all GPRs are unchanged from when the interrupt happened + * except for r13, which is saved in SPRG_SCRATCH0. */ - bl machine_check_queue_event - MACHINE_CHECK_HANDLER_WINDUP - rfid -9: - /* Deliver the machine check to host kernel in V mode. */ - MACHINE_CHECK_HANDLER_WINDUP - b machine_check_pSeries + mfspr r13, SPRN_HSRR0 + addi r13, r13, 4 + mtspr SPRN_HSRR0, r13 + GET_SCRATCH0(r13) + hrfid + b . +#endif -unrecover_mce: - /* Invoke machine_check_exception to print MCE event and panic. */ - addi r3,r1,STACK_FRAME_OVERHEAD - bl machine_check_exception - /* - * We will not reach here. Even if we did, there is no way out. Call - * unrecoverable_exception and die. - */ -1: addi r3,r1,STACK_FRAME_OVERHEAD - bl unrecoverable_exception - b 1b /* - * r13 points to the PACA, r9 contains the saved CR, - * r12 contain the saved SRR1, SRR0 is still ready for return - * r3 has the faulting address - * r9 - r13 are saved in paca->exslb. - * r3 is saved in paca->slb_r3 - * We assume we aren't going to take any exceptions during this procedure. + * Ensure that any handlers that get invoked from the exception prologs + * above are below the first 64KB (0x10000) of the kernel image because + * the prologs assemble the addresses of these handlers using the + * LOAD_HANDLER macro, which uses an ori instruction. */ -slb_miss_realmode: - mflr r10 -#ifdef CONFIG_RELOCATABLE - mtctr r11 -#endif - stw r9,PACA_EXSLB+EX_CCR(r13) /* save CR in exc. frame */ - std r10,PACA_EXSLB+EX_LR(r13) /* save LR */ - -#ifdef CONFIG_PPC_STD_MMU_64 -BEGIN_MMU_FTR_SECTION - bl slb_allocate_realmode -END_MMU_FTR_SECTION_IFCLR(MMU_FTR_TYPE_RADIX) -#endif - /* All done -- return from exception. */ - - ld r10,PACA_EXSLB+EX_LR(r13) - ld r3,PACA_EXSLB+EX_R3(r13) - lwz r9,PACA_EXSLB+EX_CCR(r13) /* get saved CR */ - - mtlr r10 - andi. r10,r12,MSR_RI /* check for unrecoverable exception */ -BEGIN_MMU_FTR_SECTION - beq- 2f -FTR_SECTION_ELSE - b 2f -ALT_MMU_FTR_SECTION_END_IFCLR(MMU_FTR_TYPE_RADIX) +/*** Common interrupt handlers ***/ -.machine push -.machine "power4" - mtcrf 0x80,r9 - mtcrf 0x01,r9 /* slb_allocate uses cr0 and cr7 */ -.machine pop - RESTORE_PPR_PACA(PACA_EXSLB, r9) - ld r9,PACA_EXSLB+EX_R9(r13) - ld r10,PACA_EXSLB+EX_R10(r13) - ld r11,PACA_EXSLB+EX_R11(r13) - ld r12,PACA_EXSLB+EX_R12(r13) - ld r13,PACA_EXSLB+EX_R13(r13) - rfid - b . /* prevent speculative execution */ + /* + * Relocation-on interrupts: A subset of the interrupts can be delivered + * with IR=1/DR=1, if AIL==2 and MSR.HV won't be changed by delivering + * it. Addresses are the same as the original interrupt addresses, but + * offset by 0xc000000000004000. + * It's impossible to receive interrupts below 0x300 via this mechanism. + * KVM: None of these traps are from the guest ; anything that escalated + * to HV=1 from HV=0 is delivered via real mode handlers. + */ -2: mfspr r11,SPRN_SRR0 - ld r10,PACAKBASE(r13) - LOAD_HANDLER(r10,unrecov_slb) - mtspr SPRN_SRR0,r10 - ld r10,PACAKMSR(r13) - mtspr SPRN_SRR1,r10 - rfid - b . + /* + * This uses the standard macro, since the original 0x300 vector + * only has extra guff for STAB-based processors -- which never + * come here. + */ -unrecov_slb: - EXCEPTION_PROLOG_COMMON(0x4100, PACA_EXSLB) - RECONCILE_IRQ_STATE(r10, r11) - bl save_nvgprs -1: addi r3,r1,STACK_FRAME_OVERHEAD - bl unrecoverable_exception - b 1b +EXC_COMMON_BEGIN(ppc64_runlatch_on_trampoline) + b __ppc64_runlatch_on +USE_FIXED_SECTION(virt_trampolines) + /* + * The __end_interrupts marker must be past the out-of-line (OOL) + * handlers, so that they are copied to real address 0x100 when running + * a relocatable kernel. This ensures they can be reached from the short + * trampoline handlers (like 0x4f00, 0x4f20, etc.) which branch + * directly, without using LOAD_HANDLER(). + */ + .align 7 + .globl __end_interrupts +__end_interrupts: +DEFINE_FIXED_SYMBOL(__end_interrupts) #ifdef CONFIG_PPC_970_NAP -power4_fixup_nap: +TRAMP_REAL_BEGIN(power4_fixup_nap) andc r9,r9,r10 std r9,TI_LOCAL_FLAGS(r11) ld r10,_LINK(r1) /* make idle task do the */ @@ -1450,6 +1385,13 @@ power4_fixup_nap: blr #endif +CLOSE_FIXED_SECTION(real_vectors); +CLOSE_FIXED_SECTION(real_trampolines); +CLOSE_FIXED_SECTION(virt_vectors); +CLOSE_FIXED_SECTION(virt_trampolines); + +USE_TEXT_SECTION() + /* * Hash table stuff */ @@ -1595,3 +1537,39 @@ END_FTR_SECTION_IFSET(CPU_FTR_CFAR) 1: addi r3,r1,STACK_FRAME_OVERHEAD bl kernel_bad_stack b 1b + +/* + * Called from arch_local_irq_enable when an interrupt needs + * to be resent. r3 contains 0x500, 0x900, 0xa00 or 0xe80 to indicate + * which kind of interrupt. MSR:EE is already off. We generate a + * stackframe like if a real interrupt had happened. + * + * Note: While MSR:EE is off, we need to make sure that _MSR + * in the generated frame has EE set to 1 or the exception + * handler will not properly re-enable them. + */ +_GLOBAL(__replay_interrupt) + /* We are going to jump to the exception common code which + * will retrieve various register values from the PACA which + * we don't give a damn about, so we don't bother storing them. + */ + mfmsr r12 + mflr r11 + mfcr r9 + ori r12,r12,MSR_EE + cmpwi r3,0x900 + beq decrementer_common + cmpwi r3,0x500 + beq hardware_interrupt_common +BEGIN_FTR_SECTION + cmpwi r3,0xe80 + beq h_doorbell_common + cmpwi r3,0xea0 + beq h_virt_irq_common + cmpwi r3,0xe60 + beq hmi_exception_common +FTR_SECTION_ELSE + cmpwi r3,0xa00 + beq doorbell_super_common +ALT_FTR_SECTION_END_IFSET(CPU_FTR_HVMODE) + blr diff --git a/arch/powerpc/kernel/fadump.c b/arch/powerpc/kernel/fadump.c index b3a663333d36..8f0c7c5d93f2 100644 --- a/arch/powerpc/kernel/fadump.c +++ b/arch/powerpc/kernel/fadump.c @@ -333,6 +333,11 @@ int __init fadump_reserve_mem(void) return 1; } +unsigned long __init arch_reserved_kernel_pages(void) +{ + return memblock_reserved_size() / PAGE_SIZE; +} + /* Look for fadump= cmdline option. */ static int __init early_fadump_param(char *p) { @@ -778,7 +783,11 @@ static int fadump_init_elfcore_header(char *bufp) elf->e_entry = 0; elf->e_phoff = sizeof(struct elfhdr); elf->e_shoff = 0; - elf->e_flags = ELF_CORE_EFLAGS; +#if defined(_CALL_ELF) + elf->e_flags = _CALL_ELF; +#else + elf->e_flags = 0; +#endif elf->e_ehsize = sizeof(struct elfhdr); elf->e_phentsize = sizeof(struct elf_phdr); elf->e_phnum = 0; @@ -1104,7 +1113,9 @@ static ssize_t fadump_release_memory_store(struct kobject *kobj, * Take away the '/proc/vmcore'. We are releasing the dump * memory, hence it will not be valid anymore. */ +#ifdef CONFIG_PROC_VMCORE vmcore_cleanup(); +#endif fadump_invalidate_release_mem(); } else diff --git a/arch/powerpc/kernel/fpu.S b/arch/powerpc/kernel/fpu.S index 15da2b5df85e..08d14b096eb9 100644 --- a/arch/powerpc/kernel/fpu.S +++ b/arch/powerpc/kernel/fpu.S @@ -50,32 +50,6 @@ END_FTR_SECTION_IFSET(CPU_FTR_VSX); \ #define REST_32FPVSRS(n,c,base) __REST_32FPVSRS(n,__REG_##c,__REG_##base) #define SAVE_32FPVSRS(n,c,base) __SAVE_32FPVSRS(n,__REG_##c,__REG_##base) -#ifdef CONFIG_PPC_TRANSACTIONAL_MEM -/* void do_load_up_transact_fpu(struct thread_struct *thread) - * - * This is similar to load_up_fpu but for the transactional version of the FP - * register set. It doesn't mess with the task MSR or valid flags. - * Furthermore, we don't do lazy FP with TM currently. - */ -_GLOBAL(do_load_up_transact_fpu) - mfmsr r6 - ori r5,r6,MSR_FP -#ifdef CONFIG_VSX -BEGIN_FTR_SECTION - oris r5,r5,MSR_VSX@h -END_FTR_SECTION_IFSET(CPU_FTR_VSX) -#endif - SYNC - MTMSRD(r5) - - addi r7,r3,THREAD_TRANSACT_FPSTATE - lfd fr0,FPSTATE_FPSCR(r7) - MTFSF_L(fr0) - REST_32FPVSRS(0, R4, R7) - - blr -#endif /* CONFIG_PPC_TRANSACTIONAL_MEM */ - /* * Load state from memory into FP registers including FPSCR. * Assumes the caller has enabled FP in the MSR. diff --git a/arch/powerpc/kernel/ftrace.c b/arch/powerpc/kernel/ftrace.c index cc52d9795f88..a95639b8d4ac 100644 --- a/arch/powerpc/kernel/ftrace.c +++ b/arch/powerpc/kernel/ftrace.c @@ -593,7 +593,8 @@ unsigned long prepare_ftrace_return(unsigned long parent, unsigned long ip) if (!ftrace_graph_entry(&trace)) goto out; - if (ftrace_push_return_trace(parent, ip, &trace.depth, 0) == -EBUSY) + if (ftrace_push_return_trace(parent, ip, &trace.depth, 0, + NULL) == -EBUSY) goto out; parent = return_hooker; diff --git a/arch/powerpc/kernel/head_32.S b/arch/powerpc/kernel/head_32.S index dc0488b6f6e1..a3f821eb7e9a 100644 --- a/arch/powerpc/kernel/head_32.S +++ b/arch/powerpc/kernel/head_32.S @@ -266,7 +266,6 @@ __secondary_hold_acknowledge: #define EXCEPTION_PROLOG_2 \ - CLR_TOP32(r11); \ stw r10,_CCR(r11); /* save registers */ \ stw r12,GPR12(r11); \ stw r9,GPR9(r11); \ @@ -862,7 +861,6 @@ __secondary_start: /* ptr to phys current thread */ tophys(r4,r2) addi r4,r4,THREAD /* phys address of our thread_struct */ - CLR_TOP32(r4) mtspr SPRN_SPRG_THREAD,r4 li r3,0 mtspr SPRN_SPRG_RTAS,r3 /* 0 => not in RTAS */ @@ -949,7 +947,6 @@ start_here: /* ptr to phys current thread */ tophys(r4,r2) addi r4,r4,THREAD /* init task's THREAD */ - CLR_TOP32(r4) mtspr SPRN_SPRG_THREAD,r4 li r3,0 mtspr SPRN_SPRG_RTAS,r3 /* 0 => not in RTAS */ diff --git a/arch/powerpc/kernel/head_64.S b/arch/powerpc/kernel/head_64.S index f765b0434731..79da0641bae2 100644 --- a/arch/powerpc/kernel/head_64.S +++ b/arch/powerpc/kernel/head_64.S @@ -28,6 +28,7 @@ #include <asm/page.h> #include <asm/mmu.h> #include <asm/ppc_asm.h> +#include <asm/head-64.h> #include <asm/asm-offsets.h> #include <asm/bug.h> #include <asm/cputable.h> @@ -65,9 +66,14 @@ * 2. The kernel is entered at __start */ - .text - .globl _stext -_stext: +OPEN_FIXED_SECTION(first_256B, 0x0, 0x100) +USE_FIXED_SECTION(first_256B) + /* + * Offsets are relative from the start of fixed section, and + * first_256B starts at 0. Offsets are a bit easier to use here + * than the fixed section entry macros. + */ + . = 0x0 _GLOBAL(__start) /* NOP this out unconditionally */ BEGIN_FTR_SECTION @@ -104,6 +110,7 @@ __secondary_hold_acknowledge: . = 0x5c .globl __run_at_load __run_at_load: +DEFINE_FIXED_SYMBOL(__run_at_load) .long 0x72756e30 /* "run0" -- relocate to 0 by default */ #endif @@ -133,7 +140,7 @@ __secondary_hold: /* Tell the master cpu we're here */ /* Relocation is off & we are located at an address less */ /* than 0x100, so only need to grab low order offset. */ - std r24,__secondary_hold_acknowledge-_stext(0) + std r24,(ABS_ADDR(__secondary_hold_acknowledge))(0) sync li r26,0 @@ -141,7 +148,7 @@ __secondary_hold: tovirt(r26,r26) #endif /* All secondary cpus wait here until told to start. */ -100: ld r12,__secondary_hold_spinloop-_stext(r26) +100: ld r12,(ABS_ADDR(__secondary_hold_spinloop))(r26) cmpdi 0,r12,0 beq 100b @@ -166,12 +173,13 @@ __secondary_hold: #else BUG_OPCODE #endif +CLOSE_FIXED_SECTION(first_256B) /* This value is used to mark exception frames on the stack. */ .section ".toc","aw" exception_marker: .tc ID_72656773_68657265[TC],0x7265677368657265 - .text + .previous /* * On server, we include the exception vectors code here as it @@ -180,8 +188,12 @@ exception_marker: */ #ifdef CONFIG_PPC_BOOK3S #include "exceptions-64s.S" +#else +OPEN_TEXT_SECTION(0x100) #endif +USE_TEXT_SECTION() + #ifdef CONFIG_PPC_BOOK3E /* * The booting_thread_hwid holds the thread id we want to boot in cpu @@ -558,7 +570,7 @@ __after_prom_start: #if defined(CONFIG_PPC_BOOK3E) tovirt(r26,r26) /* on booke, we already run at PAGE_OFFSET */ #endif - lwz r7,__run_at_load-_stext(r26) + lwz r7,(FIXED_SYMBOL_ABS_ADDR(__run_at_load))(r26) #if defined(CONFIG_PPC_BOOK3E) tophys(r26,r26) #endif @@ -601,7 +613,7 @@ __after_prom_start: #if defined(CONFIG_PPC_BOOK3E) tovirt(r26,r26) /* on booke, we already run at PAGE_OFFSET */ #endif - lwz r7,__run_at_load-_stext(r26) + lwz r7,(FIXED_SYMBOL_ABS_ADDR(__run_at_load))(r26) cmplwi cr0,r7,1 bne 3f @@ -611,28 +623,35 @@ __after_prom_start: sub r5,r5,r11 #else /* just copy interrupts */ - LOAD_REG_IMMEDIATE(r5, __end_interrupts - _stext) + LOAD_REG_IMMEDIATE(r5, FIXED_SYMBOL_ABS_ADDR(__end_interrupts)) #endif b 5f 3: #endif - lis r5,(copy_to_here - _stext)@ha - addi r5,r5,(copy_to_here - _stext)@l /* # bytes of memory to copy */ + /* # bytes of memory to copy */ + lis r5,(ABS_ADDR(copy_to_here))@ha + addi r5,r5,(ABS_ADDR(copy_to_here))@l bl copy_and_flush /* copy the first n bytes */ /* this includes the code being */ /* executed here. */ - addis r8,r3,(4f - _stext)@ha /* Jump to the copy of this code */ - addi r12,r8,(4f - _stext)@l /* that we just made */ + /* Jump to the copy of this code that we just made */ + addis r8,r3,(ABS_ADDR(4f))@ha + addi r12,r8,(ABS_ADDR(4f))@l mtctr r12 bctr .balign 8 -p_end: .llong _end - _stext +p_end: .llong _end - copy_to_here -4: /* Now copy the rest of the kernel up to _end */ - addis r5,r26,(p_end - _stext)@ha - ld r5,(p_end - _stext)@l(r5) /* get _end */ +4: + /* + * Now copy the rest of the kernel up to _end, add + * _end - copy_to_here to the copy limit and run again. + */ + addis r8,r26,(ABS_ADDR(p_end))@ha + ld r8,(ABS_ADDR(p_end))@l(r8) + add r5,r5,r8 5: bl copy_and_flush /* copy the rest */ 9: b start_here_multiplatform diff --git a/arch/powerpc/kernel/head_8xx.S b/arch/powerpc/kernel/head_8xx.S index 43ddaae42baf..3a185c51ce8f 100644 --- a/arch/powerpc/kernel/head_8xx.S +++ b/arch/powerpc/kernel/head_8xx.S @@ -151,7 +151,6 @@ turn_on_mmu: #define EXCEPTION_PROLOG_2 \ - CLR_TOP32(r11); \ stw r10,_CCR(r11); /* save registers */ \ stw r12,GPR12(r11); \ stw r9,GPR9(r11); \ diff --git a/arch/powerpc/kernel/hw_breakpoint.c b/arch/powerpc/kernel/hw_breakpoint.c index aec9a1b1d25b..9781c69eae57 100644 --- a/arch/powerpc/kernel/hw_breakpoint.c +++ b/arch/powerpc/kernel/hw_breakpoint.c @@ -206,7 +206,7 @@ void thread_change_pc(struct task_struct *tsk, struct pt_regs *regs) /* * Handle debug exception notifications. */ -int __kprobes hw_breakpoint_handler(struct die_args *args) +int hw_breakpoint_handler(struct die_args *args) { int rc = NOTIFY_STOP; struct perf_event *bp; @@ -290,11 +290,12 @@ out: rcu_read_unlock(); return rc; } +NOKPROBE_SYMBOL(hw_breakpoint_handler); /* * Handle single-step exceptions following a DABR hit. */ -static int __kprobes single_step_dabr_instruction(struct die_args *args) +static int single_step_dabr_instruction(struct die_args *args) { struct pt_regs *regs = args->regs; struct perf_event *bp = NULL; @@ -329,11 +330,12 @@ static int __kprobes single_step_dabr_instruction(struct die_args *args) return NOTIFY_STOP; } +NOKPROBE_SYMBOL(single_step_dabr_instruction); /* * Handle debug exception notifications. */ -int __kprobes hw_breakpoint_exceptions_notify( +int hw_breakpoint_exceptions_notify( struct notifier_block *unused, unsigned long val, void *data) { int ret = NOTIFY_DONE; @@ -349,6 +351,7 @@ int __kprobes hw_breakpoint_exceptions_notify( return ret; } +NOKPROBE_SYMBOL(hw_breakpoint_exceptions_notify); /* * Release the user breakpoints used by ptrace diff --git a/arch/powerpc/kernel/ibmebus.c b/arch/powerpc/kernel/ibmebus.c index c1ca9282f4a0..6ca9a2ffaac7 100644 --- a/arch/powerpc/kernel/ibmebus.c +++ b/arch/powerpc/kernel/ibmebus.c @@ -227,7 +227,7 @@ int ibmebus_request_irq(u32 ist, irq_handler_t handler, { unsigned int irq = irq_create_mapping(NULL, ist); - if (irq == NO_IRQ) + if (!irq) return -EINVAL; return request_irq(irq, handler, irq_flags, devname, dev_id); diff --git a/arch/powerpc/kernel/idle_book3s.S b/arch/powerpc/kernel/idle_book3s.S index ba79d15f4ddd..bd739fed26e3 100644 --- a/arch/powerpc/kernel/idle_book3s.S +++ b/arch/powerpc/kernel/idle_book3s.S @@ -44,18 +44,6 @@ PSSCR_PSLL_MASK | PSSCR_TR_MASK | \ PSSCR_MTL_MASK -/* Idle state entry routines */ - -#define IDLE_STATE_ENTER_SEQ(IDLE_INST) \ - /* Magic NAP/SLEEP/WINKLE mode enter sequence */ \ - std r0,0(r1); \ - ptesync; \ - ld r0,0(r1); \ -1: cmp cr0,r0,r0; \ - bne 1b; \ - IDLE_INST; \ - b . - .text /* @@ -363,8 +351,8 @@ _GLOBAL(power9_idle_stop) * cr3 - set to gt if waking up with partial/complete hypervisor state loss */ _GLOBAL(pnv_restore_hyp_resource) - ld r2,PACATOC(r13); BEGIN_FTR_SECTION + ld r2,PACATOC(r13); /* * POWER ISA 3. Use PSSCR to determine if we * are waking up from deep idle state @@ -395,6 +383,9 @@ END_FTR_SECTION_IFSET(CPU_FTR_ARCH_300) */ clrldi r5,r13,63 clrrdi r13,r13,1 + + /* Now that we are sure r13 is corrected, load TOC */ + ld r2,PACATOC(r13); cmpwi cr4,r5,1 mtspr SPRN_HSPRG0,r13 @@ -420,7 +411,7 @@ END_FTR_SECTION_IFSET(CPU_FTR_ARCH_300) * * r13 - PACA * cr3 - gt if waking up with partial/complete hypervisor state loss - * cr4 - eq if waking up from complete hypervisor state loss. + * cr4 - gt or eq if waking up from complete hypervisor state loss. */ _GLOBAL(pnv_wakeup_tb_loss) ld r1,PACAR1(r13) @@ -462,7 +453,7 @@ lwarx_loop2: * At this stage * cr2 - eq if first thread to wakeup in core * cr3- gt if waking up with partial/complete hypervisor state loss - * cr4 - eq if waking up from complete hypervisor state loss. + * cr4 - gt or eq if waking up from complete hypervisor state loss. */ ori r15,r15,PNV_CORE_IDLE_LOCK_BIT @@ -490,7 +481,7 @@ first_thread_in_subcore: * If waking up from sleep, subcore state is not lost. Hence * skip subcore state restore */ - bne cr4,subcore_state_restored + blt cr4,subcore_state_restored /* Restore per-subcore state */ ld r4,_SDR1(r1) @@ -535,7 +526,7 @@ timebase_resync: * If waking up from sleep, per core state is not lost, skip to * clear_lock. */ - bne cr4,clear_lock + blt cr4,clear_lock /* * First thread in the core to wake up and its waking up with @@ -566,7 +557,7 @@ common_exit: * If waking up from sleep, hypervisor state is not lost. Hence * skip hypervisor state restore. */ - bne cr4,hypervisor_state_restored + blt cr4,hypervisor_state_restored /* Waking up from winkle */ diff --git a/arch/powerpc/kernel/irq.c b/arch/powerpc/kernel/irq.c index 08887cf2b20e..3c05c311e35e 100644 --- a/arch/powerpc/kernel/irq.c +++ b/arch/powerpc/kernel/irq.c @@ -67,6 +67,7 @@ #include <asm/smp.h> #include <asm/debug.h> #include <asm/livepatch.h> +#include <asm/asm-prototypes.h> #ifdef CONFIG_PPC64 #include <asm/paca.h> @@ -156,6 +157,15 @@ notrace unsigned int __check_irq_replay(void) } /* + * Check if an hypervisor Maintenance interrupt happened. + * This is a higher priority interrupt than the others, so + * replay it first. + */ + local_paca->irq_happened &= ~PACA_IRQ_HMI; + if (happened & PACA_IRQ_HMI) + return 0xe60; + + /* * We may have missed a decrementer interrupt. We check the * decrementer itself rather than the paca irq_happened field * in case we also had a rollover while hard disabled @@ -190,11 +200,6 @@ notrace unsigned int __check_irq_replay(void) } #endif /* CONFIG_PPC_BOOK3E */ - /* Check if an hypervisor Maintenance interrupt happened */ - local_paca->irq_happened &= ~PACA_IRQ_HMI; - if (happened & PACA_IRQ_HMI) - return 0xe60; - /* There should be nothing left ! */ BUG_ON(local_paca->irq_happened != 0); @@ -514,7 +519,7 @@ void __do_irq(struct pt_regs *regs) may_hard_irq_enable(); /* And finally process it */ - if (unlikely(irq == NO_IRQ)) + if (unlikely(!irq)) __this_cpu_inc(irq_stat.spurious_irqs); else generic_handle_irq(irq); diff --git a/arch/powerpc/kernel/kprobes.c b/arch/powerpc/kernel/kprobes.c index 3ed8ec09b5c9..e785cc9e1ecd 100644 --- a/arch/powerpc/kernel/kprobes.c +++ b/arch/powerpc/kernel/kprobes.c @@ -29,7 +29,7 @@ #include <linux/kprobes.h> #include <linux/ptrace.h> #include <linux/preempt.h> -#include <linux/module.h> +#include <linux/extable.h> #include <linux/kdebug.h> #include <linux/slab.h> #include <asm/code-patching.h> diff --git a/arch/powerpc/kernel/legacy_serial.c b/arch/powerpc/kernel/legacy_serial.c index 7b750c4ed5c7..bc525ea0dc09 100644 --- a/arch/powerpc/kernel/legacy_serial.c +++ b/arch/powerpc/kernel/legacy_serial.c @@ -193,10 +193,10 @@ static int __init add_legacy_soc_port(struct device_node *np, */ if (tsi && !strcmp(tsi->type, "tsi-bridge")) return add_legacy_port(np, -1, UPIO_TSI, addr, addr, - NO_IRQ, legacy_port_flags, 0); + 0, legacy_port_flags, 0); else return add_legacy_port(np, -1, UPIO_MEM, addr, addr, - NO_IRQ, legacy_port_flags, 0); + 0, legacy_port_flags, 0); } static int __init add_legacy_isa_port(struct device_node *np, @@ -242,7 +242,7 @@ static int __init add_legacy_isa_port(struct device_node *np, /* Add port, irq will be dealt with later */ return add_legacy_port(np, index, UPIO_PORT, be32_to_cpu(reg[1]), - taddr, NO_IRQ, legacy_port_flags, 0); + taddr, 0, legacy_port_flags, 0); } @@ -314,7 +314,7 @@ static int __init add_legacy_pci_port(struct device_node *np, /* Add port, irq will be dealt with later. We passed a translated * IO port value. It will be fixed up later along with the irq */ - return add_legacy_port(np, index, iotype, base, addr, NO_IRQ, + return add_legacy_port(np, index, iotype, base, addr, 0, legacy_port_flags, np != pci_dev); } #endif @@ -462,14 +462,14 @@ static void __init fixup_port_irq(int index, DBG("fixup_port_irq(%d)\n", index); virq = irq_of_parse_and_map(np, 0); - if (virq == NO_IRQ && legacy_serial_infos[index].irq_check_parent) { + if (!virq && legacy_serial_infos[index].irq_check_parent) { np = of_get_parent(np); if (np == NULL) return; virq = irq_of_parse_and_map(np, 0); of_node_put(np); } - if (virq == NO_IRQ) + if (!virq) return; port->irq = virq; @@ -543,7 +543,7 @@ static int __init serial_dev_init(void) struct plat_serial8250_port *port = &legacy_serial_ports[i]; struct device_node *np = legacy_serial_infos[i].np; - if (port->irq == NO_IRQ) + if (!port->irq) fixup_port_irq(i, np, port); if (port->iotype == UPIO_PORT) fixup_port_pio(i, np, port); diff --git a/arch/powerpc/kernel/machine_kexec_64.c b/arch/powerpc/kernel/machine_kexec_64.c index 4c780a342282..a205fa3d9bf3 100644 --- a/arch/powerpc/kernel/machine_kexec_64.c +++ b/arch/powerpc/kernel/machine_kexec_64.c @@ -23,6 +23,7 @@ #include <asm/current.h> #include <asm/machdep.h> #include <asm/cacheflush.h> +#include <asm/firmware.h> #include <asm/paca.h> #include <asm/mmu.h> #include <asm/sections.h> /* _end */ @@ -31,21 +32,6 @@ #include <asm/hw_breakpoint.h> #include <asm/asm-prototypes.h> -#ifdef CONFIG_PPC_BOOK3E -int default_machine_kexec_prepare(struct kimage *image) -{ - int i; - /* - * Since we use the kernel fault handlers and paging code to - * handle the virtual mode, we must make sure no destination - * overlaps kernel static data or bss. - */ - for (i = 0; i < image->nr_segments; i++) - if (image->segment[i].mem < __pa(_end)) - return -ETXTBSY; - return 0; -} -#else int default_machine_kexec_prepare(struct kimage *image) { int i; @@ -55,9 +41,6 @@ int default_machine_kexec_prepare(struct kimage *image) const unsigned long *basep; const unsigned int *sizep; - if (!mmu_hash_ops.hpte_clear_all) - return -ENOENT; - /* * Since we use the kernel fault handlers and paging code to * handle the virtual mode, we must make sure no destination @@ -67,31 +50,6 @@ int default_machine_kexec_prepare(struct kimage *image) if (image->segment[i].mem < __pa(_end)) return -ETXTBSY; - /* - * For non-LPAR, we absolutely can not overwrite the mmu hash - * table, since we are still using the bolted entries in it to - * do the copy. Check that here. - * - * It is safe if the end is below the start of the blocked - * region (end <= low), or if the beginning is after the - * end of the blocked region (begin >= high). Use the - * boolean identity !(a || b) === (!a && !b). - */ -#ifdef CONFIG_PPC_STD_MMU_64 - if (htab_address) { - low = __pa(htab_address); - high = low + htab_size_bytes; - - for (i = 0; i < image->nr_segments; i++) { - begin = image->segment[i].mem; - end = begin + image->segment[i].memsz; - - if ((begin < high) && (end > low)) - return -ETXTBSY; - } - } -#endif /* CONFIG_PPC_STD_MMU_64 */ - /* We also should not overwrite the tce tables */ for_each_node_by_type(node, "pci") { basep = of_get_property(node, "linux,tce-base", NULL); @@ -113,7 +71,6 @@ int default_machine_kexec_prepare(struct kimage *image) return 0; } -#endif /* !CONFIG_PPC_BOOK3E */ static void copy_segments(unsigned long ind) { @@ -332,11 +289,14 @@ struct paca_struct kexec_paca; /* Our assembly helper, in misc_64.S */ extern void kexec_sequence(void *newstack, unsigned long start, void *image, void *control, - void (*clear_all)(void)) __noreturn; + void (*clear_all)(void), + bool copy_with_mmu_off) __noreturn; /* too late to fail here */ void default_machine_kexec(struct kimage *image) { + bool copy_with_mmu_off; + /* prepare control code if any */ /* @@ -374,18 +334,29 @@ void default_machine_kexec(struct kimage *image) /* XXX: If anyone does 'dynamic lppacas' this will also need to be * switched to a static version! */ + /* + * On Book3S, the copy must happen with the MMU off if we are either + * using Radix page tables or we are not in an LPAR since we can + * overwrite the page tables while copying. + * + * In an LPAR, we keep the MMU on otherwise we can't access beyond + * the RMA. On BookE there is no real MMU off mode, so we have to + * keep it enabled as well (but then we have bolted TLB entries). + */ +#ifdef CONFIG_PPC_BOOK3E + copy_with_mmu_off = false; +#else + copy_with_mmu_off = radix_enabled() || + !(firmware_has_feature(FW_FEATURE_LPAR) || + firmware_has_feature(FW_FEATURE_PS3_LV1)); +#endif /* Some things are best done in assembly. Finding globals with * a toc is easier in C, so pass in what we can. */ kexec_sequence(&kexec_stack, image->start, image, - page_address(image->control_code_page), -#ifdef CONFIG_PPC_STD_MMU - mmu_hash_ops.hpte_clear_all -#else - NULL -#endif - ); + page_address(image->control_code_page), + mmu_cleanup_all, copy_with_mmu_off); /* NOTREACHED */ } diff --git a/arch/powerpc/kernel/mce.c b/arch/powerpc/kernel/mce.c index ef267fd9dd22..5e7ece0fda9f 100644 --- a/arch/powerpc/kernel/mce.c +++ b/arch/powerpc/kernel/mce.c @@ -92,7 +92,8 @@ void save_mce_event(struct pt_regs *regs, long handled, mce->in_use = 1; mce->initiator = MCE_INITIATOR_CPU; - if (handled) + /* Mark it recovered if we have handled it and MSR(RI=1). */ + if (handled && (regs->msr & MSR_RI)) mce->disposition = MCE_DISPOSITION_RECOVERED; else mce->disposition = MCE_DISPOSITION_NOT_RECOVERED; diff --git a/arch/powerpc/kernel/misc_32.S b/arch/powerpc/kernel/misc_32.S index d9c912b6e632..03756ffdcd71 100644 --- a/arch/powerpc/kernel/misc_32.S +++ b/arch/powerpc/kernel/misc_32.S @@ -328,7 +328,7 @@ END_FTR_SECTION_IFSET(CPU_FTR_UNIFIED_ID_CACHE) * * flush_icache_range(unsigned long start, unsigned long stop) */ -_KPROBE(flush_icache_range) +_GLOBAL(flush_icache_range) BEGIN_FTR_SECTION PURGE_PREFETCHED_INS blr /* for 601, do nothing */ @@ -358,6 +358,8 @@ END_FTR_SECTION_IFSET(CPU_FTR_COHERENT_ICACHE) sync /* additional sync needed on g4 */ isync blr +_ASM_NOKPROBE_SYMBOL(flush_icache_range) + /* * Flush a particular page from the data cache to RAM. * Note: this is necessary because the instruction cache does *not* diff --git a/arch/powerpc/kernel/misc_64.S b/arch/powerpc/kernel/misc_64.S index cb195157b318..9f0bed214bcb 100644 --- a/arch/powerpc/kernel/misc_64.S +++ b/arch/powerpc/kernel/misc_64.S @@ -66,7 +66,7 @@ PPC64_CACHES: * flush all bytes from start through stop-1 inclusive */ -_KPROBE(flush_icache_range) +_GLOBAL(flush_icache_range) BEGIN_FTR_SECTION PURGE_PREFETCHED_INS blr @@ -109,7 +109,8 @@ END_FTR_SECTION_IFSET(CPU_FTR_COHERENT_ICACHE) bdnz 2b isync blr - .previous .text +_ASM_NOKPROBE_SYMBOL(flush_icache_range) + /* * Like above, but only do the D-cache. * @@ -591,7 +592,8 @@ real_mode: /* assume normal blr return */ #endif /* - * kexec_sequence(newstack, start, image, control, clear_all()) + * kexec_sequence(newstack, start, image, control, clear_all(), + copy_with_mmu_off) * * does the grungy work with stack switching and real mode switches * also does simple calls to other code @@ -627,7 +629,7 @@ _GLOBAL(kexec_sequence) mr r29,r5 /* image (virt) */ mr r28,r6 /* control, unused */ mr r27,r7 /* clear_all() fn desc */ - mr r26,r8 /* spare */ + mr r26,r8 /* copy_with_mmu_off */ lhz r25,PACAHWCPUID(r13) /* get our phys cpu from paca */ /* disable interrupts, we are overwriting kernel data next */ @@ -639,15 +641,24 @@ _GLOBAL(kexec_sequence) mtmsrd r3,1 #endif + /* We need to turn the MMU off unless we are in hash mode + * under a hypervisor + */ + cmpdi r26,0 + beq 1f + bl real_mode +1: /* copy dest pages, flush whole dest image */ mr r3,r29 bl kexec_copy_flush /* (image) */ - /* turn off mmu */ + /* turn off mmu now if not done earlier */ + cmpdi r26,0 + bne 1f bl real_mode /* copy 0x100 bytes starting at start to 0 */ - li r3,0 +1: li r3,0 mr r4,r30 /* start, aka phys mem offset */ li r5,0x100 li r6,0 @@ -659,7 +670,9 @@ _GLOBAL(kexec_sequence) li r6,1 stw r6,kexec_flag-1b(5) -#ifndef CONFIG_PPC_BOOK3E + cmpdi r27,0 + beq 1f + /* clear out hardware hash page table and tlb */ #ifdef PPC64_ELF_ABI_v1 ld r12,0(r27) /* deref function descriptor */ @@ -668,7 +681,6 @@ _GLOBAL(kexec_sequence) #endif mtctr r12 bctrl /* mmu_hash_ops.hpte_clear_all(void); */ -#endif /* !CONFIG_PPC_BOOK3E */ /* * kexec image calling is: @@ -695,7 +707,7 @@ _GLOBAL(kexec_sequence) * are the boot cpu ????? * other device tree differences (prop sizes, va vs pa, etc)... */ - mr r3,r25 # my phys cpu +1: mr r3,r25 # my phys cpu mr r4,r30 # start, aka phys mem offset mtlr 4 li r5,0 diff --git a/arch/powerpc/kernel/module.c b/arch/powerpc/kernel/module.c index d1f1b35bf0c7..30b89d5cbb03 100644 --- a/arch/powerpc/kernel/module.c +++ b/arch/powerpc/kernel/module.c @@ -27,7 +27,7 @@ #include <linux/sort.h> #include <asm/setup.h> -LIST_HEAD(module_bug_list); +static LIST_HEAD(module_bug_list); static const Elf_Shdr *find_section(const Elf_Ehdr *hdr, const Elf_Shdr *sechdrs, diff --git a/arch/powerpc/kernel/nvram_64.c b/arch/powerpc/kernel/nvram_64.c index 64174bf95611..34d2c595de23 100644 --- a/arch/powerpc/kernel/nvram_64.c +++ b/arch/powerpc/kernel/nvram_64.c @@ -542,9 +542,9 @@ static ssize_t nvram_pstore_read(u64 *id, enum pstore_type_id *type, time->tv_nsec = 0; } *buf = kmemdup(buff + hdr_size, length, GFP_KERNEL); + kfree(buff); if (*buf == NULL) return -ENOMEM; - kfree(buff); *ecc_notice_size = 0; if (err_type == ERR_TYPE_KERNEL_PANIC_GZ) @@ -851,7 +851,7 @@ static long dev_nvram_ioctl(struct file *file, unsigned int cmd, } } -const struct file_operations nvram_fops = { +static const struct file_operations nvram_fops = { .owner = THIS_MODULE, .llseek = dev_nvram_llseek, .read = dev_nvram_read, @@ -956,7 +956,7 @@ int __init nvram_remove_partition(const char *name, int sig, /* Make partition a free partition */ part->header.signature = NVRAM_SIG_FREE; - strncpy(part->header.name, "wwwwwwwwwwww", 12); + memset(part->header.name, 'w', 12); part->header.checksum = nvram_checksum(&part->header); rc = nvram_write_header(part); if (rc <= 0) { @@ -974,8 +974,8 @@ int __init nvram_remove_partition(const char *name, int sig, } if (prev) { prev->header.length += part->header.length; - prev->header.checksum = nvram_checksum(&part->header); - rc = nvram_write_header(part); + prev->header.checksum = nvram_checksum(&prev->header); + rc = nvram_write_header(prev); if (rc <= 0) { printk(KERN_ERR "nvram_remove_partition: nvram_write failed (%d)\n", rc); return rc; diff --git a/arch/powerpc/kernel/pci-common.c b/arch/powerpc/kernel/pci-common.c index a5c0153ede37..95d3769a2e26 100644 --- a/arch/powerpc/kernel/pci-common.c +++ b/arch/powerpc/kernel/pci-common.c @@ -78,6 +78,7 @@ EXPORT_SYMBOL(get_pci_dma_ops); static int get_phb_number(struct device_node *dn) { int ret, phb_id = -1; + u32 prop_32; u64 prop; /* @@ -86,8 +87,10 @@ static int get_phb_number(struct device_node *dn) * reading "ibm,opal-phbid", only present in OPAL environment. */ ret = of_property_read_u64(dn, "ibm,opal-phbid", &prop); - if (ret) - ret = of_property_read_u32_index(dn, "reg", 1, (u32 *)&prop); + if (ret) { + ret = of_property_read_u32_index(dn, "reg", 1, &prop_32); + prop = prop_32; + } if (!ret) phb_id = (int)(prop & (MAX_PHBS - 1)); @@ -151,6 +154,42 @@ void pcibios_free_controller(struct pci_controller *phb) EXPORT_SYMBOL_GPL(pcibios_free_controller); /* + * This function is used to call pcibios_free_controller() + * in a deferred manner: a callback from the PCI subsystem. + * + * _*DO NOT*_ call pcibios_free_controller() explicitly if + * this is used (or it may access an invalid *phb pointer). + * + * The callback occurs when all references to the root bus + * are dropped (e.g., child buses/devices and their users). + * + * It's called as .release_fn() of 'struct pci_host_bridge' + * which is associated with the 'struct pci_controller.bus' + * (root bus) - it expects .release_data to hold a pointer + * to 'struct pci_controller'. + * + * In order to use it, register .release_fn()/release_data + * like this: + * + * pci_set_host_bridge_release(bridge, + * pcibios_free_controller_deferred + * (void *) phb); + * + * e.g. in the pcibios_root_bridge_prepare() callback from + * pci_create_root_bus(). + */ +void pcibios_free_controller_deferred(struct pci_host_bridge *bridge) +{ + struct pci_controller *phb = (struct pci_controller *) + bridge->release_data; + + pr_debug("domain %d, dynamic %d\n", phb->global_number, phb->is_dynamic); + + pcibios_free_controller(phb); +} +EXPORT_SYMBOL_GPL(pcibios_free_controller_deferred); + +/* * The function is used to return the minimal alignment * for memory or I/O windows of the associated P2P bridge. * By default, 4KiB alignment for I/O windows and 1MiB for @@ -321,7 +360,7 @@ static int pci_read_irq_line(struct pci_dev *pci_dev) line, pin); virq = irq_create_mapping(NULL, line); - if (virq != NO_IRQ) + if (virq) irq_set_irq_type(virq, IRQ_TYPE_LEVEL_LOW); } else { pr_debug(" Got one, spec %d cells (0x%08x 0x%08x...) on %s\n", @@ -330,7 +369,8 @@ static int pci_read_irq_line(struct pci_dev *pci_dev) virq = irq_create_of_mapping(&oirq); } - if(virq == NO_IRQ) { + + if (!virq) { pr_debug(" Failed to map !\n"); return -1; } diff --git a/arch/powerpc/kernel/pci_of_scan.c b/arch/powerpc/kernel/pci_of_scan.c index 526ac6750e4d..ea3d98115b88 100644 --- a/arch/powerpc/kernel/pci_of_scan.c +++ b/arch/powerpc/kernel/pci_of_scan.c @@ -178,7 +178,7 @@ struct pci_dev *of_create_pci_dev(struct device_node *node, dev->hdr_type = PCI_HEADER_TYPE_NORMAL; dev->rom_base_reg = PCI_ROM_ADDRESS; /* Maybe do a default OF mapping here */ - dev->irq = NO_IRQ; + dev->irq = 0; } of_pci_parse_addrs(node, dev); diff --git a/arch/powerpc/kernel/process.c b/arch/powerpc/kernel/process.c index 58ccf86415b4..9e7c10fe205f 100644 --- a/arch/powerpc/kernel/process.c +++ b/arch/powerpc/kernel/process.c @@ -59,6 +59,7 @@ #include <asm/exec.h> #include <asm/livepatch.h> #include <asm/cpu_has_feature.h> +#include <asm/asm-prototypes.h> #include <linux/kprobes.h> #include <linux/kdebug.h> @@ -88,7 +89,13 @@ static void check_if_tm_restore_required(struct task_struct *tsk) set_thread_flag(TIF_RESTORE_TM); } } + +static inline bool msr_tm_active(unsigned long msr) +{ + return MSR_TM_ACTIVE(msr); +} #else +static inline bool msr_tm_active(unsigned long msr) { return false; } static inline void check_if_tm_restore_required(struct task_struct *tsk) { } #endif /* CONFIG_PPC_TRANSACTIONAL_MEM */ @@ -104,7 +111,7 @@ static int __init enable_strict_msr_control(char *str) } early_param("ppc_strict_facility_enable", enable_strict_msr_control); -void msr_check_and_set(unsigned long bits) +unsigned long msr_check_and_set(unsigned long bits) { unsigned long oldmsr = mfmsr(); unsigned long newmsr; @@ -118,6 +125,8 @@ void msr_check_and_set(unsigned long bits) if (oldmsr != newmsr) mtmsr_isync(newmsr); + + return newmsr; } void __msr_check_and_clear(unsigned long bits) @@ -196,19 +205,30 @@ EXPORT_SYMBOL_GPL(flush_fp_to_thread); void enable_kernel_fp(void) { + unsigned long cpumsr; + WARN_ON(preemptible()); - msr_check_and_set(MSR_FP); + cpumsr = msr_check_and_set(MSR_FP); if (current->thread.regs && (current->thread.regs->msr & MSR_FP)) { check_if_tm_restore_required(current); + /* + * If a thread has already been reclaimed then the + * checkpointed registers are on the CPU but have definitely + * been saved by the reclaim code. Don't need to and *cannot* + * giveup as this would save to the 'live' structure not the + * checkpointed structure. + */ + if(!msr_tm_active(cpumsr) && msr_tm_active(current->thread.regs->msr)) + return; __giveup_fpu(current); } } EXPORT_SYMBOL(enable_kernel_fp); static int restore_fp(struct task_struct *tsk) { - if (tsk->thread.load_fp) { + if (tsk->thread.load_fp || msr_tm_active(tsk->thread.regs->msr)) { load_fp_state(¤t->thread.fp_state); current->thread.load_fp++; return 1; @@ -248,12 +268,23 @@ EXPORT_SYMBOL(giveup_altivec); void enable_kernel_altivec(void) { + unsigned long cpumsr; + WARN_ON(preemptible()); - msr_check_and_set(MSR_VEC); + cpumsr = msr_check_and_set(MSR_VEC); if (current->thread.regs && (current->thread.regs->msr & MSR_VEC)) { check_if_tm_restore_required(current); + /* + * If a thread has already been reclaimed then the + * checkpointed registers are on the CPU but have definitely + * been saved by the reclaim code. Don't need to and *cannot* + * giveup as this would save to the 'live' structure not the + * checkpointed structure. + */ + if(!msr_tm_active(cpumsr) && msr_tm_active(current->thread.regs->msr)) + return; __giveup_altivec(current); } } @@ -278,7 +309,8 @@ EXPORT_SYMBOL_GPL(flush_altivec_to_thread); static int restore_altivec(struct task_struct *tsk) { - if (cpu_has_feature(CPU_FTR_ALTIVEC) && tsk->thread.load_vec) { + if (cpu_has_feature(CPU_FTR_ALTIVEC) && + (tsk->thread.load_vec || msr_tm_active(tsk->thread.regs->msr))) { load_vr_state(&tsk->thread.vr_state); tsk->thread.used_vr = 1; tsk->thread.load_vec++; @@ -321,12 +353,23 @@ static void save_vsx(struct task_struct *tsk) void enable_kernel_vsx(void) { + unsigned long cpumsr; + WARN_ON(preemptible()); - msr_check_and_set(MSR_FP|MSR_VEC|MSR_VSX); + cpumsr = msr_check_and_set(MSR_FP|MSR_VEC|MSR_VSX); if (current->thread.regs && (current->thread.regs->msr & MSR_VSX)) { check_if_tm_restore_required(current); + /* + * If a thread has already been reclaimed then the + * checkpointed registers are on the CPU but have definitely + * been saved by the reclaim code. Don't need to and *cannot* + * giveup as this would save to the 'live' structure not the + * checkpointed structure. + */ + if(!msr_tm_active(cpumsr) && msr_tm_active(current->thread.regs->msr)) + return; if (current->thread.regs->msr & MSR_FP) __giveup_fpu(current); if (current->thread.regs->msr & MSR_VEC) @@ -438,6 +481,7 @@ void giveup_all(struct task_struct *tsk) return; msr_check_and_set(msr_all_available); + check_if_tm_restore_required(tsk); #ifdef CONFIG_PPC_FPU if (usermsr & MSR_FP) @@ -464,7 +508,8 @@ void restore_math(struct pt_regs *regs) { unsigned long msr; - if (!current->thread.load_fp && !loadvec(current->thread)) + if (!msr_tm_active(regs->msr) && + !current->thread.load_fp && !loadvec(current->thread)) return; msr = regs->msr; @@ -767,29 +812,15 @@ static inline bool hw_brk_match(struct arch_hw_breakpoint *a, } #ifdef CONFIG_PPC_TRANSACTIONAL_MEM + +static inline bool tm_enabled(struct task_struct *tsk) +{ + return tsk && tsk->thread.regs && (tsk->thread.regs->msr & MSR_TM); +} + static void tm_reclaim_thread(struct thread_struct *thr, struct thread_info *ti, uint8_t cause) { - unsigned long msr_diff = 0; - - /* - * If FP/VSX registers have been already saved to the - * thread_struct, move them to the transact_fp array. - * We clear the TIF_RESTORE_TM bit since after the reclaim - * the thread will no longer be transactional. - */ - if (test_ti_thread_flag(ti, TIF_RESTORE_TM)) { - msr_diff = thr->ckpt_regs.msr & ~thr->regs->msr; - if (msr_diff & MSR_FP) - memcpy(&thr->transact_fp, &thr->fp_state, - sizeof(struct thread_fp_state)); - if (msr_diff & MSR_VEC) - memcpy(&thr->transact_vr, &thr->vr_state, - sizeof(struct thread_vr_state)); - clear_ti_thread_flag(ti, TIF_RESTORE_TM); - msr_diff &= MSR_FP | MSR_VEC | MSR_VSX | MSR_FE0 | MSR_FE1; - } - /* * Use the current MSR TM suspended bit to track if we have * checkpointed state outstanding. @@ -808,15 +839,9 @@ static void tm_reclaim_thread(struct thread_struct *thr, if (!MSR_TM_SUSPENDED(mfmsr())) return; - tm_reclaim(thr, thr->regs->msr, cause); + giveup_all(container_of(thr, struct task_struct, thread)); - /* Having done the reclaim, we now have the checkpointed - * FP/VSX values in the registers. These might be valid - * even if we have previously called enable_kernel_fp() or - * flush_fp_to_thread(), so update thr->regs->msr to - * indicate their current validity. - */ - thr->regs->msr |= msr_diff; + tm_reclaim(thr, thr->ckpt_regs.msr, cause); } void tm_reclaim_current(uint8_t cause) @@ -832,8 +857,8 @@ static inline void tm_reclaim_task(struct task_struct *tsk) * * In switching we need to maintain a 2nd register state as * oldtask->thread.ckpt_regs. We tm_reclaim(oldproc); this saves the - * checkpointed (tbegin) state in ckpt_regs and saves the transactional - * (current) FPRs into oldtask->thread.transact_fpr[]. + * checkpointed (tbegin) state in ckpt_regs, ckfp_state and + * ckvr_state * * We also context switch (save) TFHAR/TEXASR/TFIAR in here. */ @@ -845,14 +870,6 @@ static inline void tm_reclaim_task(struct task_struct *tsk) if (!MSR_TM_ACTIVE(thr->regs->msr)) goto out_and_saveregs; - /* Stash the original thread MSR, as giveup_fpu et al will - * modify it. We hold onto it to see whether the task used - * FP & vector regs. If the TIF_RESTORE_TM flag is set, - * ckpt_regs.msr is already set. - */ - if (!test_ti_thread_flag(task_thread_info(tsk), TIF_RESTORE_TM)) - thr->ckpt_regs.msr = thr->regs->msr; - TM_DEBUG("--- tm_reclaim on pid %d (NIP=%lx, " "ccr=%lx, msr=%lx, trap=%lx)\n", tsk->pid, thr->regs->nip, @@ -881,6 +898,9 @@ void tm_recheckpoint(struct thread_struct *thread, { unsigned long flags; + if (!(thread->regs->msr & MSR_TM)) + return; + /* We really can't be interrupted here as the TEXASR registers can't * change and later in the trecheckpoint code, we have a userspace R1. * So let's hard disable over this region. @@ -910,10 +930,10 @@ static inline void tm_recheckpoint_new_task(struct task_struct *new) * If the task was using FP, we non-lazily reload both the original and * the speculative FP register states. This is because the kernel * doesn't see if/when a TM rollback occurs, so if we take an FP - * unavoidable later, we are unable to determine which set of FP regs + * unavailable later, we are unable to determine which set of FP regs * need to be restored. */ - if (!new->thread.regs) + if (!tm_enabled(new)) return; if (!MSR_TM_ACTIVE(new->thread.regs->msr)){ @@ -926,35 +946,35 @@ static inline void tm_recheckpoint_new_task(struct task_struct *new) "(new->msr 0x%lx, new->origmsr 0x%lx)\n", new->pid, new->thread.regs->msr, msr); - /* This loads the checkpointed FP/VEC state, if used */ tm_recheckpoint(&new->thread, msr); - /* This loads the speculative FP/VEC state, if used */ - if (msr & MSR_FP) { - do_load_up_transact_fpu(&new->thread); - new->thread.regs->msr |= - (MSR_FP | new->thread.fpexc_mode); - } -#ifdef CONFIG_ALTIVEC - if (msr & MSR_VEC) { - do_load_up_transact_altivec(&new->thread); - new->thread.regs->msr |= MSR_VEC; - } -#endif - /* We may as well turn on VSX too since all the state is restored now */ - if (msr & MSR_VSX) - new->thread.regs->msr |= MSR_VSX; + /* + * The checkpointed state has been restored but the live state has + * not, ensure all the math functionality is turned off to trigger + * restore_math() to reload. + */ + new->thread.regs->msr &= ~(MSR_FP | MSR_VEC | MSR_VSX); TM_DEBUG("*** tm_recheckpoint of pid %d complete " "(kernel msr 0x%lx)\n", new->pid, mfmsr()); } -static inline void __switch_to_tm(struct task_struct *prev) +static inline void __switch_to_tm(struct task_struct *prev, + struct task_struct *new) { if (cpu_has_feature(CPU_FTR_TM)) { - tm_enable(); - tm_reclaim_task(prev); + if (tm_enabled(prev) || tm_enabled(new)) + tm_enable(); + + if (tm_enabled(prev)) { + prev->thread.load_tm++; + tm_reclaim_task(prev); + if (!MSR_TM_ACTIVE(prev->thread.regs->msr) && prev->thread.load_tm == 0) + prev->thread.regs->msr &= ~MSR_TM; + } + + tm_recheckpoint_new_task(new); } } @@ -976,6 +996,12 @@ void restore_tm_state(struct pt_regs *regs) { unsigned long msr_diff; + /* + * This is the only moment we should clear TIF_RESTORE_TM as + * it is here that ckpt_regs.msr and pt_regs.msr become the same + * again, anything else could lead to an incorrect ckpt_msr being + * saved and therefore incorrect signal contexts. + */ clear_thread_flag(TIF_RESTORE_TM); if (!MSR_TM_ACTIVE(regs->msr)) return; @@ -983,6 +1009,13 @@ void restore_tm_state(struct pt_regs *regs) msr_diff = current->thread.ckpt_regs.msr & ~regs->msr; msr_diff &= MSR_FP | MSR_VEC | MSR_VSX; + /* Ensure that restore_math() will restore */ + if (msr_diff & MSR_FP) + current->thread.load_fp = 1; +#ifdef CONFIG_ALIVEC + if (cpu_has_feature(CPU_FTR_ALTIVEC) && msr_diff & MSR_VEC) + current->thread.load_vec = 1; +#endif restore_math(regs); regs->msr |= msr_diff; @@ -990,7 +1023,7 @@ void restore_tm_state(struct pt_regs *regs) #else #define tm_recheckpoint_new_task(new) -#define __switch_to_tm(prev) +#define __switch_to_tm(prev, new) #endif /* CONFIG_PPC_TRANSACTIONAL_MEM */ static inline void save_sprs(struct thread_struct *t) @@ -1074,26 +1107,6 @@ static inline void restore_sprs(struct thread_struct *old_thread, #endif } -#ifdef CONFIG_PPC_TRANSACTIONAL_MEM -void flush_tmregs_to_thread(struct task_struct *tsk) -{ - /* - * Process self tracing is not yet supported through - * ptrace interface. Ptrace generic code should have - * prevented this from happening in the first place. - * Warn once here with the message, if some how it - * is attempted. - */ - WARN_ONCE(tsk == current, - "Not expecting ptrace on self: TM regs may be incorrect\n"); - - /* - * If task is not current, it should have been flushed - * already to it's thread_struct during __switch_to(). - */ -} -#endif - struct task_struct *__switch_to(struct task_struct *prev, struct task_struct *new) { @@ -1151,11 +1164,11 @@ struct task_struct *__switch_to(struct task_struct *prev, */ save_sprs(&prev->thread); - __switch_to_tm(prev); - /* Save FPU, Altivec, VSX and SPE state */ giveup_all(prev); + __switch_to_tm(prev, new); + /* * We can't take a PMU exception inside _switch() since there is a * window where the kernel stack SLB and the kernel stack are out @@ -1163,8 +1176,6 @@ struct task_struct *__switch_to(struct task_struct *prev, */ hard_irq_disable(); - tm_recheckpoint_new_task(new); - /* * Call restore_sprs() before calling _switch(). If we move it after * _switch() then we miss out on calling it for new tasks. The reason @@ -1399,9 +1410,11 @@ int arch_dup_task_struct(struct task_struct *dst, struct task_struct *src) * transitions the CPU out of TM mode. Hence we need to call * tm_recheckpoint_new_task() (on the same task) to restore the * checkpointed state back and the TM mode. + * + * Can't pass dst because it isn't ready. Doesn't matter, passing + * dst is only important for __switch_to() */ - __switch_to_tm(src); - tm_recheckpoint_new_task(src); + __switch_to_tm(src, src); *dst = *src; @@ -1643,8 +1656,6 @@ void start_thread(struct pt_regs *regs, unsigned long start, unsigned long sp) current->thread.used_spe = 0; #endif /* CONFIG_SPE */ #ifdef CONFIG_PPC_TRANSACTIONAL_MEM - if (cpu_has_feature(CPU_FTR_TM)) - regs->msr |= MSR_TM; current->thread.tm_tfhar = 0; current->thread.tm_texasr = 0; current->thread.tm_tfiar = 0; diff --git a/arch/powerpc/kernel/prom_init.c b/arch/powerpc/kernel/prom_init.c index 6ee4b72cda42..88ac964f4858 100644 --- a/arch/powerpc/kernel/prom_init.c +++ b/arch/powerpc/kernel/prom_init.c @@ -42,6 +42,7 @@ #include <asm/sections.h> #include <asm/machdep.h> #include <asm/opal.h> +#include <asm/asm-prototypes.h> #include <linux/linux_logo.h> @@ -695,7 +696,7 @@ unsigned char ibm_architecture_vec[] = { OV4_MIN_ENT_CAP, /* minimum VP entitled capacity */ /* option vector 5: PAPR/OF options */ - VECTOR_LENGTH(18), /* length */ + VECTOR_LENGTH(21), /* length */ 0, /* don't ignore, don't halt */ OV5_FEAT(OV5_LPAR) | OV5_FEAT(OV5_SPLPAR) | OV5_FEAT(OV5_LARGE_PAGES) | OV5_FEAT(OV5_DRCONF_MEMORY) | OV5_FEAT(OV5_DONATE_DEDICATE_CPU) | @@ -726,8 +727,11 @@ unsigned char ibm_architecture_vec[] = { 0, 0, OV5_FEAT(OV5_PFO_HW_RNG) | OV5_FEAT(OV5_PFO_HW_ENCR) | - OV5_FEAT(OV5_PFO_HW_842), - OV5_FEAT(OV5_SUB_PROCESSORS), + OV5_FEAT(OV5_PFO_HW_842), /* Byte 17 */ + 0, /* Byte 18 */ + 0, /* Byte 19 */ + 0, /* Byte 20 */ + OV5_FEAT(OV5_SUB_PROCESSORS), /* Byte 21 */ /* option vector 6: IBM PAPR hints */ VECTOR_LENGTH(3), /* length */ @@ -2640,6 +2644,86 @@ static void __init fixup_device_tree_efika(void) #define fixup_device_tree_efika() #endif +#ifdef CONFIG_PPC_PASEMI_NEMO +/* + * CFE supplied on Nemo is broken in several ways, biggest + * problem is that it reassigns ISA interrupts to unused mpic ints. + * Add an interrupt-controller property for the io-bridge to use + * and correct the ints so we can attach them to an irq_domain + */ +static void __init fixup_device_tree_pasemi(void) +{ + u32 interrupts[2], parent, rval, val = 0; + char *name, *pci_name; + phandle iob, node; + + /* Find the root pci node */ + name = "/pxp@0,e0000000"; + iob = call_prom("finddevice", 1, 1, ADDR(name)); + if (!PHANDLE_VALID(iob)) + return; + + /* check if interrupt-controller node set yet */ + if (prom_getproplen(iob, "interrupt-controller") !=PROM_ERROR) + return; + + prom_printf("adding interrupt-controller property for SB600...\n"); + + prom_setprop(iob, name, "interrupt-controller", &val, 0); + + pci_name = "/pxp@0,e0000000/pci@11"; + node = call_prom("finddevice", 1, 1, ADDR(pci_name)); + parent = ADDR(iob); + + for( ; prom_next_node(&node); ) { + /* scan each node for one with an interrupt */ + if (!PHANDLE_VALID(node)) + continue; + + rval = prom_getproplen(node, "interrupts"); + if (rval == 0 || rval == PROM_ERROR) + continue; + + prom_getprop(node, "interrupts", &interrupts, sizeof(interrupts)); + if ((interrupts[0] < 212) || (interrupts[0] > 222)) + continue; + + /* found a node, update both interrupts and interrupt-parent */ + if ((interrupts[0] >= 212) && (interrupts[0] <= 215)) + interrupts[0] -= 203; + if ((interrupts[0] >= 216) && (interrupts[0] <= 220)) + interrupts[0] -= 213; + if (interrupts[0] == 221) + interrupts[0] = 14; + if (interrupts[0] == 222) + interrupts[0] = 8; + + prom_setprop(node, pci_name, "interrupts", interrupts, + sizeof(interrupts)); + prom_setprop(node, pci_name, "interrupt-parent", &parent, + sizeof(parent)); + } + + /* + * The io-bridge has device_type set to 'io-bridge' change it to 'isa' + * so that generic isa-bridge code can add the SB600 and its on-board + * peripherals. + */ + name = "/pxp@0,e0000000/io-bridge@0"; + iob = call_prom("finddevice", 1, 1, ADDR(name)); + if (!PHANDLE_VALID(iob)) + return; + + /* device_type is already set, just change it. */ + + prom_printf("Changing device_type of SB600 node...\n"); + + prom_setprop(iob, name, "device_type", "isa", sizeof("isa")); +} +#else /* !CONFIG_PPC_PASEMI_NEMO */ +static inline void fixup_device_tree_pasemi(void) { } +#endif + static void __init fixup_device_tree(void) { fixup_device_tree_maple(); @@ -2647,6 +2731,7 @@ static void __init fixup_device_tree(void) fixup_device_tree_chrp(); fixup_device_tree_pmac(); fixup_device_tree_efika(); + fixup_device_tree_pasemi(); } static void __init prom_find_boot_cpu(void) @@ -2940,7 +3025,7 @@ unsigned long __init prom_init(unsigned long r3, unsigned long r4, /* Don't print anything after quiesce under OPAL, it crashes OFW */ if (of_platform != PLATFORM_OPAL) { - prom_printf("Booting Linux via __start() ...\n"); + prom_printf("Booting Linux via __start() @ 0x%lx ...\n", kbase); prom_debug("->dt_header_start=0x%x\n", hdr); } diff --git a/arch/powerpc/kernel/ptrace.c b/arch/powerpc/kernel/ptrace.c index 4f3c5756cc09..b1ec62f2cc31 100644 --- a/arch/powerpc/kernel/ptrace.c +++ b/arch/powerpc/kernel/ptrace.c @@ -38,6 +38,8 @@ #include <asm/page.h> #include <asm/pgtable.h> #include <asm/switch_to.h> +#include <asm/tm.h> +#include <asm/asm-prototypes.h> #define CREATE_TRACE_POINTS #include <trace/events/syscalls.h> @@ -118,6 +120,24 @@ static const struct pt_regs_offset regoffset_table[] = { REG_OFFSET_END, }; +#ifdef CONFIG_PPC_TRANSACTIONAL_MEM +static void flush_tmregs_to_thread(struct task_struct *tsk) +{ + /* + * If task is not current, it will have been flushed already to + * it's thread_struct during __switch_to(). + * + * A reclaim flushes ALL the state. + */ + + if (tsk == current && MSR_TM_SUSPENDED(mfmsr())) + tm_reclaim_current(TM_CAUSE_SIGNAL); + +} +#else +static inline void flush_tmregs_to_thread(struct task_struct *tsk) { } +#endif + /** * regs_query_register_offset() - query register offset from its name * @name: the name of a register @@ -383,13 +403,9 @@ static int gpr_set(struct task_struct *target, const struct user_regset *regset, } /* - * When the transaction is active, 'transact_fp' holds the current running - * value of all FPR registers and 'fp_state' holds the last checkpointed - * value of all FPR registers for the current transaction. When transaction - * is not active 'fp_state' holds the current running state of all the FPR - * registers. So this function which returns the current running values of - * all the FPR registers, needs to know whether any transaction is active - * or not. + * Regardless of transactions, 'fp_state' holds the current running + * value of all FPR registers and 'ckfp_state' holds the last checkpointed + * value of all FPR registers for the current transaction. * * Userspace interface buffer layout: * @@ -397,13 +413,6 @@ static int gpr_set(struct task_struct *target, const struct user_regset *regset, * u64 fpr[32]; * u64 fpscr; * }; - * - * There are two config options CONFIG_VSX and CONFIG_PPC_TRANSACTIONAL_MEM - * which determines the final code in this function. All the combinations of - * these two config options are possible except the one below as transactional - * memory config pulls in CONFIG_VSX automatically. - * - * !defined(CONFIG_VSX) && defined(CONFIG_PPC_TRANSACTIONAL_MEM) */ static int fpr_get(struct task_struct *target, const struct user_regset *regset, unsigned int pos, unsigned int count, @@ -412,50 +421,29 @@ static int fpr_get(struct task_struct *target, const struct user_regset *regset, #ifdef CONFIG_VSX u64 buf[33]; int i; -#endif - flush_fp_to_thread(target); -#if defined(CONFIG_VSX) && defined(CONFIG_PPC_TRANSACTIONAL_MEM) - /* copy to local buffer then write that out */ - if (MSR_TM_ACTIVE(target->thread.regs->msr)) { - flush_altivec_to_thread(target); - flush_tmregs_to_thread(target); - for (i = 0; i < 32 ; i++) - buf[i] = target->thread.TS_TRANS_FPR(i); - buf[32] = target->thread.transact_fp.fpscr; - } else { - for (i = 0; i < 32 ; i++) - buf[i] = target->thread.TS_FPR(i); - buf[32] = target->thread.fp_state.fpscr; - } - return user_regset_copyout(&pos, &count, &kbuf, &ubuf, buf, 0, -1); -#endif + flush_fp_to_thread(target); -#if defined(CONFIG_VSX) && !defined(CONFIG_PPC_TRANSACTIONAL_MEM) /* copy to local buffer then write that out */ for (i = 0; i < 32 ; i++) buf[i] = target->thread.TS_FPR(i); buf[32] = target->thread.fp_state.fpscr; return user_regset_copyout(&pos, &count, &kbuf, &ubuf, buf, 0, -1); -#endif - -#if !defined(CONFIG_VSX) && !defined(CONFIG_PPC_TRANSACTIONAL_MEM) +#else BUILD_BUG_ON(offsetof(struct thread_fp_state, fpscr) != offsetof(struct thread_fp_state, fpr[32])); + flush_fp_to_thread(target); + return user_regset_copyout(&pos, &count, &kbuf, &ubuf, &target->thread.fp_state, 0, -1); #endif } /* - * When the transaction is active, 'transact_fp' holds the current running - * value of all FPR registers and 'fp_state' holds the last checkpointed - * value of all FPR registers for the current transaction. When transaction - * is not active 'fp_state' holds the current running state of all the FPR - * registers. So this function which setss the current running values of - * all the FPR registers, needs to know whether any transaction is active - * or not. + * Regardless of transactions, 'fp_state' holds the current running + * value of all FPR registers and 'ckfp_state' holds the last checkpointed + * value of all FPR registers for the current transaction. * * Userspace interface buffer layout: * @@ -464,12 +452,6 @@ static int fpr_get(struct task_struct *target, const struct user_regset *regset, * u64 fpscr; * }; * - * There are two config options CONFIG_VSX and CONFIG_PPC_TRANSACTIONAL_MEM - * which determines the final code in this function. All the combinations of - * these two config options are possible except the one below as transactional - * memory config pulls in CONFIG_VSX automatically. - * - * !defined(CONFIG_VSX) && defined(CONFIG_PPC_TRANSACTIONAL_MEM) */ static int fpr_set(struct task_struct *target, const struct user_regset *regset, unsigned int pos, unsigned int count, @@ -478,44 +460,24 @@ static int fpr_set(struct task_struct *target, const struct user_regset *regset, #ifdef CONFIG_VSX u64 buf[33]; int i; -#endif + flush_fp_to_thread(target); -#if defined(CONFIG_VSX) && defined(CONFIG_PPC_TRANSACTIONAL_MEM) /* copy to local buffer then write that out */ i = user_regset_copyin(&pos, &count, &kbuf, &ubuf, buf, 0, -1); if (i) return i; - if (MSR_TM_ACTIVE(target->thread.regs->msr)) { - flush_altivec_to_thread(target); - flush_tmregs_to_thread(target); - for (i = 0; i < 32 ; i++) - target->thread.TS_TRANS_FPR(i) = buf[i]; - target->thread.transact_fp.fpscr = buf[32]; - } else { - for (i = 0; i < 32 ; i++) - target->thread.TS_FPR(i) = buf[i]; - target->thread.fp_state.fpscr = buf[32]; - } - return 0; -#endif - -#if defined(CONFIG_VSX) && !defined(CONFIG_PPC_TRANSACTIONAL_MEM) - /* copy to local buffer then write that out */ - i = user_regset_copyin(&pos, &count, &kbuf, &ubuf, buf, 0, -1); - if (i) - return i; for (i = 0; i < 32 ; i++) target->thread.TS_FPR(i) = buf[i]; target->thread.fp_state.fpscr = buf[32]; return 0; -#endif - -#if !defined(CONFIG_VSX) && !defined(CONFIG_PPC_TRANSACTIONAL_MEM) +#else BUILD_BUG_ON(offsetof(struct thread_fp_state, fpscr) != offsetof(struct thread_fp_state, fpr[32])); + flush_fp_to_thread(target); + return user_regset_copyin(&pos, &count, &kbuf, &ubuf, &target->thread.fp_state, 0, -1); #endif @@ -543,13 +505,10 @@ static int vr_active(struct task_struct *target, } /* - * When the transaction is active, 'transact_vr' holds the current running - * value of all the VMX registers and 'vr_state' holds the last checkpointed - * value of all the VMX registers for the current transaction to fall back - * on in case it aborts. When transaction is not active 'vr_state' holds - * the current running state of all the VMX registers. So this function which - * gets the current running values of all the VMX registers, needs to know - * whether any transaction is active or not. + * Regardless of transactions, 'vr_state' holds the current running + * value of all the VMX registers and 'ckvr_state' holds the last + * checkpointed value of all the VMX registers for the current + * transaction to fall back on in case it aborts. * * Userspace interface buffer layout: * @@ -563,7 +522,6 @@ static int vr_get(struct task_struct *target, const struct user_regset *regset, unsigned int pos, unsigned int count, void *kbuf, void __user *ubuf) { - struct thread_vr_state *addr; int ret; flush_altivec_to_thread(target); @@ -571,19 +529,8 @@ static int vr_get(struct task_struct *target, const struct user_regset *regset, BUILD_BUG_ON(offsetof(struct thread_vr_state, vscr) != offsetof(struct thread_vr_state, vr[32])); -#ifdef CONFIG_PPC_TRANSACTIONAL_MEM - if (MSR_TM_ACTIVE(target->thread.regs->msr)) { - flush_fp_to_thread(target); - flush_tmregs_to_thread(target); - addr = &target->thread.transact_vr; - } else { - addr = &target->thread.vr_state; - } -#else - addr = &target->thread.vr_state; -#endif ret = user_regset_copyout(&pos, &count, &kbuf, &ubuf, - addr, 0, + &target->thread.vr_state, 0, 33 * sizeof(vector128)); if (!ret) { /* @@ -595,14 +542,7 @@ static int vr_get(struct task_struct *target, const struct user_regset *regset, } vrsave; memset(&vrsave, 0, sizeof(vrsave)); -#ifdef CONFIG_PPC_TRANSACTIONAL_MEM - if (MSR_TM_ACTIVE(target->thread.regs->msr)) - vrsave.word = target->thread.transact_vrsave; - else - vrsave.word = target->thread.vrsave; -#else vrsave.word = target->thread.vrsave; -#endif ret = user_regset_copyout(&pos, &count, &kbuf, &ubuf, &vrsave, 33 * sizeof(vector128), -1); @@ -612,13 +552,10 @@ static int vr_get(struct task_struct *target, const struct user_regset *regset, } /* - * When the transaction is active, 'transact_vr' holds the current running - * value of all the VMX registers and 'vr_state' holds the last checkpointed - * value of all the VMX registers for the current transaction to fall back - * on in case it aborts. When transaction is not active 'vr_state' holds - * the current running state of all the VMX registers. So this function which - * sets the current running values of all the VMX registers, needs to know - * whether any transaction is active or not. + * Regardless of transactions, 'vr_state' holds the current running + * value of all the VMX registers and 'ckvr_state' holds the last + * checkpointed value of all the VMX registers for the current + * transaction to fall back on in case it aborts. * * Userspace interface buffer layout: * @@ -632,7 +569,6 @@ static int vr_set(struct task_struct *target, const struct user_regset *regset, unsigned int pos, unsigned int count, const void *kbuf, const void __user *ubuf) { - struct thread_vr_state *addr; int ret; flush_altivec_to_thread(target); @@ -640,19 +576,8 @@ static int vr_set(struct task_struct *target, const struct user_regset *regset, BUILD_BUG_ON(offsetof(struct thread_vr_state, vscr) != offsetof(struct thread_vr_state, vr[32])); -#ifdef CONFIG_PPC_TRANSACTIONAL_MEM - if (MSR_TM_ACTIVE(target->thread.regs->msr)) { - flush_fp_to_thread(target); - flush_tmregs_to_thread(target); - addr = &target->thread.transact_vr; - } else { - addr = &target->thread.vr_state; - } -#else - addr = &target->thread.vr_state; -#endif ret = user_regset_copyin(&pos, &count, &kbuf, &ubuf, - addr, 0, + &target->thread.vr_state, 0, 33 * sizeof(vector128)); if (!ret && count > 0) { /* @@ -664,27 +589,12 @@ static int vr_set(struct task_struct *target, const struct user_regset *regset, } vrsave; memset(&vrsave, 0, sizeof(vrsave)); -#ifdef CONFIG_PPC_TRANSACTIONAL_MEM - if (MSR_TM_ACTIVE(target->thread.regs->msr)) - vrsave.word = target->thread.transact_vrsave; - else - vrsave.word = target->thread.vrsave; -#else vrsave.word = target->thread.vrsave; -#endif + ret = user_regset_copyin(&pos, &count, &kbuf, &ubuf, &vrsave, 33 * sizeof(vector128), -1); - if (!ret) { - -#ifdef CONFIG_PPC_TRANSACTIONAL_MEM - if (MSR_TM_ACTIVE(target->thread.regs->msr)) - target->thread.transact_vrsave = vrsave.word; - else - target->thread.vrsave = vrsave.word; -#else + if (!ret) target->thread.vrsave = vrsave.word; -#endif - } } return ret; @@ -706,13 +616,10 @@ static int vsr_active(struct task_struct *target, } /* - * When the transaction is active, 'transact_fp' holds the current running - * value of all FPR registers and 'fp_state' holds the last checkpointed - * value of all FPR registers for the current transaction. When transaction - * is not active 'fp_state' holds the current running state of all the FPR - * registers. So this function which returns the current running values of - * all the FPR registers, needs to know whether any transaction is active - * or not. + * Regardless of transactions, 'fp_state' holds the current running + * value of all FPR registers and 'ckfp_state' holds the last + * checkpointed value of all FPR registers for the current + * transaction. * * Userspace interface buffer layout: * @@ -727,27 +634,14 @@ static int vsr_get(struct task_struct *target, const struct user_regset *regset, u64 buf[32]; int ret, i; -#ifdef CONFIG_PPC_TRANSACTIONAL_MEM + flush_tmregs_to_thread(target); flush_fp_to_thread(target); flush_altivec_to_thread(target); - flush_tmregs_to_thread(target); -#endif flush_vsx_to_thread(target); -#ifdef CONFIG_PPC_TRANSACTIONAL_MEM - if (MSR_TM_ACTIVE(target->thread.regs->msr)) { - for (i = 0; i < 32 ; i++) - buf[i] = target->thread. - transact_fp.fpr[i][TS_VSRLOWOFFSET]; - } else { - for (i = 0; i < 32 ; i++) - buf[i] = target->thread. - fp_state.fpr[i][TS_VSRLOWOFFSET]; - } -#else for (i = 0; i < 32 ; i++) buf[i] = target->thread.fp_state.fpr[i][TS_VSRLOWOFFSET]; -#endif + ret = user_regset_copyout(&pos, &count, &kbuf, &ubuf, buf, 0, 32 * sizeof(double)); @@ -755,12 +649,10 @@ static int vsr_get(struct task_struct *target, const struct user_regset *regset, } /* - * When the transaction is active, 'transact_fp' holds the current running - * value of all FPR registers and 'fp_state' holds the last checkpointed - * value of all FPR registers for the current transaction. When transaction - * is not active 'fp_state' holds the current running state of all the FPR - * registers. So this function which sets the current running values of all - * the FPR registers, needs to know whether any transaction is active or not. + * Regardless of transactions, 'fp_state' holds the current running + * value of all FPR registers and 'ckfp_state' holds the last + * checkpointed value of all FPR registers for the current + * transaction. * * Userspace interface buffer layout: * @@ -775,31 +667,16 @@ static int vsr_set(struct task_struct *target, const struct user_regset *regset, u64 buf[32]; int ret,i; -#ifdef CONFIG_PPC_TRANSACTIONAL_MEM + flush_tmregs_to_thread(target); flush_fp_to_thread(target); flush_altivec_to_thread(target); - flush_tmregs_to_thread(target); -#endif flush_vsx_to_thread(target); ret = user_regset_copyin(&pos, &count, &kbuf, &ubuf, buf, 0, 32 * sizeof(double)); - -#ifdef CONFIG_PPC_TRANSACTIONAL_MEM - if (MSR_TM_ACTIVE(target->thread.regs->msr)) { - for (i = 0; i < 32 ; i++) - target->thread.transact_fp. - fpr[i][TS_VSRLOWOFFSET] = buf[i]; - } else { + if (!ret) for (i = 0; i < 32 ; i++) - target->thread.fp_state. - fpr[i][TS_VSRLOWOFFSET] = buf[i]; - } -#else - for (i = 0; i < 32 ; i++) - target->thread.fp_state.fpr[i][TS_VSRLOWOFFSET] = buf[i]; -#endif - + target->thread.fp_state.fpr[i][TS_VSRLOWOFFSET] = buf[i]; return ret; } @@ -925,9 +802,9 @@ static int tm_cgpr_get(struct task_struct *target, if (!MSR_TM_ACTIVE(target->thread.regs->msr)) return -ENODATA; + flush_tmregs_to_thread(target); flush_fp_to_thread(target); flush_altivec_to_thread(target); - flush_tmregs_to_thread(target); ret = user_regset_copyout(&pos, &count, &kbuf, &ubuf, &target->thread.ckpt_regs, @@ -990,9 +867,9 @@ static int tm_cgpr_set(struct task_struct *target, if (!MSR_TM_ACTIVE(target->thread.regs->msr)) return -ENODATA; + flush_tmregs_to_thread(target); flush_fp_to_thread(target); flush_altivec_to_thread(target); - flush_tmregs_to_thread(target); ret = user_regset_copyin(&pos, &count, &kbuf, &ubuf, &target->thread.ckpt_regs, @@ -1068,7 +945,7 @@ static int tm_cfpr_active(struct task_struct *target, * * This function gets in transaction checkpointed FPR registers. * - * When the transaction is active 'fp_state' holds the checkpointed + * When the transaction is active 'ckfp_state' holds the checkpointed * values for the current transaction to fall back on if it aborts * in between. This function gets those checkpointed FPR registers. * The userspace interface buffer layout is as follows. @@ -1092,14 +969,14 @@ static int tm_cfpr_get(struct task_struct *target, if (!MSR_TM_ACTIVE(target->thread.regs->msr)) return -ENODATA; + flush_tmregs_to_thread(target); flush_fp_to_thread(target); flush_altivec_to_thread(target); - flush_tmregs_to_thread(target); /* copy to local buffer then write that out */ for (i = 0; i < 32 ; i++) - buf[i] = target->thread.TS_FPR(i); - buf[32] = target->thread.fp_state.fpscr; + buf[i] = target->thread.TS_CKFPR(i); + buf[32] = target->thread.ckfp_state.fpscr; return user_regset_copyout(&pos, &count, &kbuf, &ubuf, buf, 0, -1); } @@ -1114,7 +991,7 @@ static int tm_cfpr_get(struct task_struct *target, * * This function sets in transaction checkpointed FPR registers. * - * When the transaction is active 'fp_state' holds the checkpointed + * When the transaction is active 'ckfp_state' holds the checkpointed * FPR register values for the current transaction to fall back on * if it aborts in between. This function sets these checkpointed * FPR registers. The userspace interface buffer layout is as follows. @@ -1138,17 +1015,17 @@ static int tm_cfpr_set(struct task_struct *target, if (!MSR_TM_ACTIVE(target->thread.regs->msr)) return -ENODATA; + flush_tmregs_to_thread(target); flush_fp_to_thread(target); flush_altivec_to_thread(target); - flush_tmregs_to_thread(target); /* copy to local buffer then write that out */ i = user_regset_copyin(&pos, &count, &kbuf, &ubuf, buf, 0, -1); if (i) return i; for (i = 0; i < 32 ; i++) - target->thread.TS_FPR(i) = buf[i]; - target->thread.fp_state.fpscr = buf[32]; + target->thread.TS_CKFPR(i) = buf[i]; + target->thread.ckfp_state.fpscr = buf[32]; return 0; } @@ -1183,7 +1060,7 @@ static int tm_cvmx_active(struct task_struct *target, * * This function gets in transaction checkpointed VMX registers. * - * When the transaction is active 'vr_state' and 'vr_save' hold + * When the transaction is active 'ckvr_state' and 'ckvrsave' hold * the checkpointed values for the current transaction to fall * back on if it aborts in between. The userspace interface buffer * layout is as follows. @@ -1210,12 +1087,12 @@ static int tm_cvmx_get(struct task_struct *target, return -ENODATA; /* Flush the state */ + flush_tmregs_to_thread(target); flush_fp_to_thread(target); flush_altivec_to_thread(target); - flush_tmregs_to_thread(target); ret = user_regset_copyout(&pos, &count, &kbuf, &ubuf, - &target->thread.vr_state, 0, + &target->thread.ckvr_state, 0, 33 * sizeof(vector128)); if (!ret) { /* @@ -1226,7 +1103,7 @@ static int tm_cvmx_get(struct task_struct *target, u32 word; } vrsave; memset(&vrsave, 0, sizeof(vrsave)); - vrsave.word = target->thread.vrsave; + vrsave.word = target->thread.ckvrsave; ret = user_regset_copyout(&pos, &count, &kbuf, &ubuf, &vrsave, 33 * sizeof(vector128), -1); } @@ -1245,7 +1122,7 @@ static int tm_cvmx_get(struct task_struct *target, * * This function sets in transaction checkpointed VMX registers. * - * When the transaction is active 'vr_state' and 'vr_save' hold + * When the transaction is active 'ckvr_state' and 'ckvrsave' hold * the checkpointed values for the current transaction to fall * back on if it aborts in between. The userspace interface buffer * layout is as follows. @@ -1271,12 +1148,12 @@ static int tm_cvmx_set(struct task_struct *target, if (!MSR_TM_ACTIVE(target->thread.regs->msr)) return -ENODATA; + flush_tmregs_to_thread(target); flush_fp_to_thread(target); flush_altivec_to_thread(target); - flush_tmregs_to_thread(target); ret = user_regset_copyin(&pos, &count, &kbuf, &ubuf, - &target->thread.vr_state, 0, + &target->thread.ckvr_state, 0, 33 * sizeof(vector128)); if (!ret && count > 0) { /* @@ -1287,11 +1164,11 @@ static int tm_cvmx_set(struct task_struct *target, u32 word; } vrsave; memset(&vrsave, 0, sizeof(vrsave)); - vrsave.word = target->thread.vrsave; + vrsave.word = target->thread.ckvrsave; ret = user_regset_copyin(&pos, &count, &kbuf, &ubuf, &vrsave, 33 * sizeof(vector128), -1); if (!ret) - target->thread.vrsave = vrsave.word; + target->thread.ckvrsave = vrsave.word; } return ret; @@ -1329,7 +1206,7 @@ static int tm_cvsx_active(struct task_struct *target, * * This function gets in transaction checkpointed VSX registers. * - * When the transaction is active 'fp_state' holds the checkpointed + * When the transaction is active 'ckfp_state' holds the checkpointed * values for the current transaction to fall back on if it aborts * in between. This function gets those checkpointed VSX registers. * The userspace interface buffer layout is as follows. @@ -1353,13 +1230,13 @@ static int tm_cvsx_get(struct task_struct *target, return -ENODATA; /* Flush the state */ + flush_tmregs_to_thread(target); flush_fp_to_thread(target); flush_altivec_to_thread(target); - flush_tmregs_to_thread(target); flush_vsx_to_thread(target); for (i = 0; i < 32 ; i++) - buf[i] = target->thread.fp_state.fpr[i][TS_VSRLOWOFFSET]; + buf[i] = target->thread.ckfp_state.fpr[i][TS_VSRLOWOFFSET]; ret = user_regset_copyout(&pos, &count, &kbuf, &ubuf, buf, 0, 32 * sizeof(double)); @@ -1377,7 +1254,7 @@ static int tm_cvsx_get(struct task_struct *target, * * This function sets in transaction checkpointed VSX registers. * - * When the transaction is active 'fp_state' holds the checkpointed + * When the transaction is active 'ckfp_state' holds the checkpointed * VSX register values for the current transaction to fall back on * if it aborts in between. This function sets these checkpointed * FPR registers. The userspace interface buffer layout is as follows. @@ -1401,15 +1278,16 @@ static int tm_cvsx_set(struct task_struct *target, return -ENODATA; /* Flush the state */ + flush_tmregs_to_thread(target); flush_fp_to_thread(target); flush_altivec_to_thread(target); - flush_tmregs_to_thread(target); flush_vsx_to_thread(target); ret = user_regset_copyin(&pos, &count, &kbuf, &ubuf, buf, 0, 32 * sizeof(double)); - for (i = 0; i < 32 ; i++) - target->thread.fp_state.fpr[i][TS_VSRLOWOFFSET] = buf[i]; + if (!ret) + for (i = 0; i < 32 ; i++) + target->thread.ckfp_state.fpr[i][TS_VSRLOWOFFSET] = buf[i]; return ret; } @@ -1465,9 +1343,9 @@ static int tm_spr_get(struct task_struct *target, return -ENODEV; /* Flush the states */ + flush_tmregs_to_thread(target); flush_fp_to_thread(target); flush_altivec_to_thread(target); - flush_tmregs_to_thread(target); /* TFHAR register */ ret = user_regset_copyout(&pos, &count, &kbuf, &ubuf, @@ -1521,9 +1399,9 @@ static int tm_spr_set(struct task_struct *target, return -ENODEV; /* Flush the states */ + flush_tmregs_to_thread(target); flush_fp_to_thread(target); flush_altivec_to_thread(target); - flush_tmregs_to_thread(target); /* TFHAR register */ ret = user_regset_copyin(&pos, &count, &kbuf, &ubuf, @@ -2046,33 +1924,12 @@ static const struct user_regset_view user_ppc_native_view = { static int gpr32_get_common(struct task_struct *target, const struct user_regset *regset, unsigned int pos, unsigned int count, - void *kbuf, void __user *ubuf, bool tm_active) + void *kbuf, void __user *ubuf, + unsigned long *regs) { - const unsigned long *regs = &target->thread.regs->gpr[0]; - const unsigned long *ckpt_regs; compat_ulong_t *k = kbuf; compat_ulong_t __user *u = ubuf; compat_ulong_t reg; - int i; - -#ifdef CONFIG_PPC_TRANSACTIONAL_MEM - ckpt_regs = &target->thread.ckpt_regs.gpr[0]; -#endif - if (tm_active) { - regs = ckpt_regs; - } else { - if (target->thread.regs == NULL) - return -EIO; - - if (!FULL_REGS(target->thread.regs)) { - /* - * We have a partial register set. - * Fill 14-31 with bogus values. - */ - for (i = 14; i < 32; i++) - target->thread.regs->gpr[i] = NV_REG_POISON; - } - } pos /= sizeof(reg); count /= sizeof(reg); @@ -2114,29 +1971,13 @@ static int gpr32_get_common(struct task_struct *target, static int gpr32_set_common(struct task_struct *target, const struct user_regset *regset, unsigned int pos, unsigned int count, - const void *kbuf, const void __user *ubuf, bool tm_active) + const void *kbuf, const void __user *ubuf, + unsigned long *regs) { - unsigned long *regs = &target->thread.regs->gpr[0]; - unsigned long *ckpt_regs; const compat_ulong_t *k = kbuf; const compat_ulong_t __user *u = ubuf; compat_ulong_t reg; -#ifdef CONFIG_PPC_TRANSACTIONAL_MEM - ckpt_regs = &target->thread.ckpt_regs.gpr[0]; -#endif - - if (tm_active) { - regs = ckpt_regs; - } else { - regs = &target->thread.regs->gpr[0]; - - if (target->thread.regs == NULL) - return -EIO; - - CHECK_FULL_REGS(target->thread.regs); - } - pos /= sizeof(reg); count /= sizeof(reg); @@ -2201,7 +2042,8 @@ static int tm_cgpr32_get(struct task_struct *target, unsigned int pos, unsigned int count, void *kbuf, void __user *ubuf) { - return gpr32_get_common(target, regset, pos, count, kbuf, ubuf, 1); + return gpr32_get_common(target, regset, pos, count, kbuf, ubuf, + &target->thread.ckpt_regs.gpr[0]); } static int tm_cgpr32_set(struct task_struct *target, @@ -2209,7 +2051,8 @@ static int tm_cgpr32_set(struct task_struct *target, unsigned int pos, unsigned int count, const void *kbuf, const void __user *ubuf) { - return gpr32_set_common(target, regset, pos, count, kbuf, ubuf, 1); + return gpr32_set_common(target, regset, pos, count, kbuf, ubuf, + &target->thread.ckpt_regs.gpr[0]); } #endif /* CONFIG_PPC_TRANSACTIONAL_MEM */ @@ -2218,7 +2061,21 @@ static int gpr32_get(struct task_struct *target, unsigned int pos, unsigned int count, void *kbuf, void __user *ubuf) { - return gpr32_get_common(target, regset, pos, count, kbuf, ubuf, 0); + int i; + + if (target->thread.regs == NULL) + return -EIO; + + if (!FULL_REGS(target->thread.regs)) { + /* + * We have a partial register set. + * Fill 14-31 with bogus values. + */ + for (i = 14; i < 32; i++) + target->thread.regs->gpr[i] = NV_REG_POISON; + } + return gpr32_get_common(target, regset, pos, count, kbuf, ubuf, + &target->thread.regs->gpr[0]); } static int gpr32_set(struct task_struct *target, @@ -2226,7 +2083,12 @@ static int gpr32_set(struct task_struct *target, unsigned int pos, unsigned int count, const void *kbuf, const void __user *ubuf) { - return gpr32_set_common(target, regset, pos, count, kbuf, ubuf, 0); + if (target->thread.regs == NULL) + return -EIO; + + CHECK_FULL_REGS(target->thread.regs); + return gpr32_set_common(target, regset, pos, count, kbuf, ubuf, + &target->thread.regs->gpr[0]); } /* diff --git a/arch/powerpc/kernel/setup_32.c b/arch/powerpc/kernel/setup_32.c index c3e861df4b20..24ec3ea4b3a2 100644 --- a/arch/powerpc/kernel/setup_32.c +++ b/arch/powerpc/kernel/setup_32.c @@ -93,15 +93,16 @@ notrace unsigned long __init early_init(unsigned long dt_ptr) * and we are running with enough of the MMU enabled to have our * proper kernel virtual addresses * - * Find out what kind of machine we're on and save any data we need - * from the early boot process (devtree is copied on pmac by prom_init()). - * This is called very early on the boot process, after a minimal - * MMU environment has been set up but before MMU_init is called. + * We do the initial parsing of the flat device-tree and prepares + * for the MMU to be fully initialized. */ extern unsigned int memset_nocache_branch; /* Insn to be replaced by NOP */ notrace void __init machine_init(u64 dt_ptr) { + /* Configure static keys first, now that we're relocated. */ + setup_feature_keys(); + /* Enable early debugging if any specified (see udbg.h) */ udbg_early_init(); diff --git a/arch/powerpc/kernel/setup_64.c b/arch/powerpc/kernel/setup_64.c index eafb9a79e011..7ac8e6eaab5b 100644 --- a/arch/powerpc/kernel/setup_64.c +++ b/arch/powerpc/kernel/setup_64.c @@ -300,6 +300,7 @@ void __init early_setup(unsigned long dt_ptr) /* Apply all the dynamic patching */ apply_feature_fixups(); + setup_feature_keys(); /* Initialize the hash table or TLB handling */ early_init_mmu(); diff --git a/arch/powerpc/kernel/signal.c b/arch/powerpc/kernel/signal.c index cb64d6feb45a..bbe77aed198d 100644 --- a/arch/powerpc/kernel/signal.c +++ b/arch/powerpc/kernel/signal.c @@ -99,22 +99,24 @@ static void check_syscall_restart(struct pt_regs *regs, struct k_sigaction *ka, } } -static void do_signal(struct pt_regs *regs) +static void do_signal(struct task_struct *tsk) { sigset_t *oldset = sigmask_to_save(); struct ksignal ksig; int ret; int is32 = is_32bit_task(); + BUG_ON(tsk != current); + get_signal(&ksig); /* Is there any syscall restart business here ? */ - check_syscall_restart(regs, &ksig.ka, ksig.sig > 0); + check_syscall_restart(tsk->thread.regs, &ksig.ka, ksig.sig > 0); if (ksig.sig <= 0) { /* No signal to deliver -- put the saved sigmask back */ restore_saved_sigmask(); - regs->trap = 0; + tsk->thread.regs->trap = 0; return; /* no signals delivered */ } @@ -124,23 +126,22 @@ static void do_signal(struct pt_regs *regs) * user space. The DABR will have been cleared if it * triggered inside the kernel. */ - if (current->thread.hw_brk.address && - current->thread.hw_brk.type) - __set_breakpoint(¤t->thread.hw_brk); + if (tsk->thread.hw_brk.address && tsk->thread.hw_brk.type) + __set_breakpoint(&tsk->thread.hw_brk); #endif /* Re-enable the breakpoints for the signal stack */ - thread_change_pc(current, regs); + thread_change_pc(tsk, tsk->thread.regs); if (is32) { if (ksig.ka.sa.sa_flags & SA_SIGINFO) - ret = handle_rt_signal32(&ksig, oldset, regs); + ret = handle_rt_signal32(&ksig, oldset, tsk); else - ret = handle_signal32(&ksig, oldset, regs); + ret = handle_signal32(&ksig, oldset, tsk); } else { - ret = handle_rt_signal64(&ksig, oldset, regs); + ret = handle_rt_signal64(&ksig, oldset, tsk); } - regs->trap = 0; + tsk->thread.regs->trap = 0; signal_setup_done(ret, &ksig, test_thread_flag(TIF_SINGLESTEP)); } @@ -151,8 +152,10 @@ void do_notify_resume(struct pt_regs *regs, unsigned long thread_info_flags) if (thread_info_flags & _TIF_UPROBE) uprobe_notify_resume(regs); - if (thread_info_flags & _TIF_SIGPENDING) - do_signal(regs); + if (thread_info_flags & _TIF_SIGPENDING) { + BUG_ON(regs != current->thread.regs); + do_signal(current); + } if (thread_info_flags & _TIF_NOTIFY_RESUME) { clear_thread_flag(TIF_NOTIFY_RESUME); @@ -162,7 +165,7 @@ void do_notify_resume(struct pt_regs *regs, unsigned long thread_info_flags) user_enter(); } -unsigned long get_tm_stackpointer(struct pt_regs *regs) +unsigned long get_tm_stackpointer(struct task_struct *tsk) { /* When in an active transaction that takes a signal, we need to be * careful with the stack. It's possible that the stack has moved back @@ -187,11 +190,13 @@ unsigned long get_tm_stackpointer(struct pt_regs *regs) */ #ifdef CONFIG_PPC_TRANSACTIONAL_MEM - if (MSR_TM_ACTIVE(regs->msr)) { + BUG_ON(tsk != current); + + if (MSR_TM_ACTIVE(tsk->thread.regs->msr)) { tm_reclaim_current(TM_CAUSE_SIGNAL); - if (MSR_TM_TRANSACTIONAL(regs->msr)) - return current->thread.ckpt_regs.gpr[1]; + if (MSR_TM_TRANSACTIONAL(tsk->thread.regs->msr)) + return tsk->thread.ckpt_regs.gpr[1]; } #endif - return regs->gpr[1]; + return tsk->thread.regs->gpr[1]; } diff --git a/arch/powerpc/kernel/signal.h b/arch/powerpc/kernel/signal.h index be305c858e51..7c59d88b9d86 100644 --- a/arch/powerpc/kernel/signal.h +++ b/arch/powerpc/kernel/signal.h @@ -16,39 +16,41 @@ extern void __user *get_sigframe(struct ksignal *ksig, unsigned long sp, size_t frame_size, int is_32); extern int handle_signal32(struct ksignal *ksig, sigset_t *oldset, - struct pt_regs *regs); + struct task_struct *tsk); extern int handle_rt_signal32(struct ksignal *ksig, sigset_t *oldset, - struct pt_regs *regs); + struct task_struct *tsk); extern unsigned long copy_fpr_to_user(void __user *to, struct task_struct *task); -extern unsigned long copy_transact_fpr_to_user(void __user *to, +extern unsigned long copy_ckfpr_to_user(void __user *to, struct task_struct *task); extern unsigned long copy_fpr_from_user(struct task_struct *task, void __user *from); -extern unsigned long copy_transact_fpr_from_user(struct task_struct *task, +extern unsigned long copy_ckfpr_from_user(struct task_struct *task, void __user *from); +extern unsigned long get_tm_stackpointer(struct task_struct *tsk); + #ifdef CONFIG_VSX extern unsigned long copy_vsx_to_user(void __user *to, struct task_struct *task); -extern unsigned long copy_transact_vsx_to_user(void __user *to, +extern unsigned long copy_ckvsx_to_user(void __user *to, struct task_struct *task); extern unsigned long copy_vsx_from_user(struct task_struct *task, void __user *from); -extern unsigned long copy_transact_vsx_from_user(struct task_struct *task, +extern unsigned long copy_ckvsx_from_user(struct task_struct *task, void __user *from); #endif #ifdef CONFIG_PPC64 extern int handle_rt_signal64(struct ksignal *ksig, sigset_t *set, - struct pt_regs *regs); + struct task_struct *tsk); #else /* CONFIG_PPC64 */ static inline int handle_rt_signal64(struct ksignal *ksig, sigset_t *set, - struct pt_regs *regs) + struct task_struct *tsk) { return -EFAULT; } diff --git a/arch/powerpc/kernel/signal_32.c b/arch/powerpc/kernel/signal_32.c index b6aa378aff63..27aa913ac91d 100644 --- a/arch/powerpc/kernel/signal_32.c +++ b/arch/powerpc/kernel/signal_32.c @@ -44,6 +44,7 @@ #include <asm/vdso.h> #include <asm/switch_to.h> #include <asm/tm.h> +#include <asm/asm-prototypes.h> #ifdef CONFIG_PPC64 #include "ppc32.h" #include <asm/unistd.h> @@ -315,7 +316,7 @@ unsigned long copy_vsx_from_user(struct task_struct *task, } #ifdef CONFIG_PPC_TRANSACTIONAL_MEM -unsigned long copy_transact_fpr_to_user(void __user *to, +unsigned long copy_ckfpr_to_user(void __user *to, struct task_struct *task) { u64 buf[ELF_NFPREG]; @@ -323,12 +324,12 @@ unsigned long copy_transact_fpr_to_user(void __user *to, /* save FPR copy to local buffer then write to the thread_struct */ for (i = 0; i < (ELF_NFPREG - 1) ; i++) - buf[i] = task->thread.TS_TRANS_FPR(i); - buf[i] = task->thread.transact_fp.fpscr; + buf[i] = task->thread.TS_CKFPR(i); + buf[i] = task->thread.ckfp_state.fpscr; return __copy_to_user(to, buf, ELF_NFPREG * sizeof(double)); } -unsigned long copy_transact_fpr_from_user(struct task_struct *task, +unsigned long copy_ckfpr_from_user(struct task_struct *task, void __user *from) { u64 buf[ELF_NFPREG]; @@ -337,13 +338,13 @@ unsigned long copy_transact_fpr_from_user(struct task_struct *task, if (__copy_from_user(buf, from, ELF_NFPREG * sizeof(double))) return 1; for (i = 0; i < (ELF_NFPREG - 1) ; i++) - task->thread.TS_TRANS_FPR(i) = buf[i]; - task->thread.transact_fp.fpscr = buf[i]; + task->thread.TS_CKFPR(i) = buf[i]; + task->thread.ckfp_state.fpscr = buf[i]; return 0; } -unsigned long copy_transact_vsx_to_user(void __user *to, +unsigned long copy_ckvsx_to_user(void __user *to, struct task_struct *task) { u64 buf[ELF_NVSRHALFREG]; @@ -351,11 +352,11 @@ unsigned long copy_transact_vsx_to_user(void __user *to, /* save FPR copy to local buffer then write to the thread_struct */ for (i = 0; i < ELF_NVSRHALFREG; i++) - buf[i] = task->thread.transact_fp.fpr[i][TS_VSRLOWOFFSET]; + buf[i] = task->thread.ckfp_state.fpr[i][TS_VSRLOWOFFSET]; return __copy_to_user(to, buf, ELF_NVSRHALFREG * sizeof(double)); } -unsigned long copy_transact_vsx_from_user(struct task_struct *task, +unsigned long copy_ckvsx_from_user(struct task_struct *task, void __user *from) { u64 buf[ELF_NVSRHALFREG]; @@ -364,7 +365,7 @@ unsigned long copy_transact_vsx_from_user(struct task_struct *task, if (__copy_from_user(buf, from, ELF_NVSRHALFREG * sizeof(double))) return 1; for (i = 0; i < ELF_NVSRHALFREG ; i++) - task->thread.transact_fp.fpr[i][TS_VSRLOWOFFSET] = buf[i]; + task->thread.ckfp_state.fpr[i][TS_VSRLOWOFFSET] = buf[i]; return 0; } #endif /* CONFIG_PPC_TRANSACTIONAL_MEM */ @@ -384,17 +385,17 @@ inline unsigned long copy_fpr_from_user(struct task_struct *task, } #ifdef CONFIG_PPC_TRANSACTIONAL_MEM -inline unsigned long copy_transact_fpr_to_user(void __user *to, +inline unsigned long copy_ckfpr_to_user(void __user *to, struct task_struct *task) { - return __copy_to_user(to, task->thread.transact_fp.fpr, + return __copy_to_user(to, task->thread.ckfp_state.fpr, ELF_NFPREG * sizeof(double)); } -inline unsigned long copy_transact_fpr_from_user(struct task_struct *task, +inline unsigned long copy_ckfpr_from_user(struct task_struct *task, void __user *from) { - return __copy_from_user(task->thread.transact_fp.fpr, from, + return __copy_from_user(task->thread.ckfp_state.fpr, from, ELF_NFPREG * sizeof(double)); } #endif /* CONFIG_PPC_TRANSACTIONAL_MEM */ @@ -525,9 +526,6 @@ static int save_tm_user_regs(struct pt_regs *regs, */ regs->msr &= ~MSR_TS_MASK; - /* Make sure floating point registers are stored in regs */ - flush_fp_to_thread(current); - /* Save both sets of general registers */ if (save_general_regs(¤t->thread.ckpt_regs, frame) || save_general_regs(regs, tm_frame)) @@ -545,18 +543,17 @@ static int save_tm_user_regs(struct pt_regs *regs, #ifdef CONFIG_ALTIVEC /* save altivec registers */ if (current->thread.used_vr) { - flush_altivec_to_thread(current); - if (__copy_to_user(&frame->mc_vregs, ¤t->thread.vr_state, + if (__copy_to_user(&frame->mc_vregs, ¤t->thread.ckvr_state, ELF_NVRREG * sizeof(vector128))) return 1; if (msr & MSR_VEC) { if (__copy_to_user(&tm_frame->mc_vregs, - ¤t->thread.transact_vr, + ¤t->thread.vr_state, ELF_NVRREG * sizeof(vector128))) return 1; } else { if (__copy_to_user(&tm_frame->mc_vregs, - ¤t->thread.vr_state, + ¤t->thread.ckvr_state, ELF_NVRREG * sizeof(vector128))) return 1; } @@ -573,28 +570,28 @@ static int save_tm_user_regs(struct pt_regs *regs, * most significant bits of that same vector. --BenH */ if (cpu_has_feature(CPU_FTR_ALTIVEC)) - current->thread.vrsave = mfspr(SPRN_VRSAVE); - if (__put_user(current->thread.vrsave, + current->thread.ckvrsave = mfspr(SPRN_VRSAVE); + if (__put_user(current->thread.ckvrsave, (u32 __user *)&frame->mc_vregs[32])) return 1; if (msr & MSR_VEC) { - if (__put_user(current->thread.transact_vrsave, + if (__put_user(current->thread.vrsave, (u32 __user *)&tm_frame->mc_vregs[32])) return 1; } else { - if (__put_user(current->thread.vrsave, + if (__put_user(current->thread.ckvrsave, (u32 __user *)&tm_frame->mc_vregs[32])) return 1; } #endif /* CONFIG_ALTIVEC */ - if (copy_fpr_to_user(&frame->mc_fregs, current)) + if (copy_ckfpr_to_user(&frame->mc_fregs, current)) return 1; if (msr & MSR_FP) { - if (copy_transact_fpr_to_user(&tm_frame->mc_fregs, current)) + if (copy_fpr_to_user(&tm_frame->mc_fregs, current)) return 1; } else { - if (copy_fpr_to_user(&tm_frame->mc_fregs, current)) + if (copy_ckfpr_to_user(&tm_frame->mc_fregs, current)) return 1; } @@ -606,15 +603,14 @@ static int save_tm_user_regs(struct pt_regs *regs, * contains valid data */ if (current->thread.used_vsr) { - flush_vsx_to_thread(current); - if (copy_vsx_to_user(&frame->mc_vsregs, current)) + if (copy_ckvsx_to_user(&frame->mc_vsregs, current)) return 1; if (msr & MSR_VSX) { - if (copy_transact_vsx_to_user(&tm_frame->mc_vsregs, + if (copy_vsx_to_user(&tm_frame->mc_vsregs, current)) return 1; } else { - if (copy_vsx_to_user(&tm_frame->mc_vsregs, current)) + if (copy_ckvsx_to_user(&tm_frame->mc_vsregs, current)) return 1; } @@ -698,6 +694,7 @@ static long restore_user_regs(struct pt_regs *regs, if (__copy_from_user(¤t->thread.vr_state, &sr->mc_vregs, sizeof(sr->mc_vregs))) return 1; + current->thread.used_vr = true; } else if (current->thread.used_vr) memset(¤t->thread.vr_state, 0, ELF_NVRREG * sizeof(vector128)); @@ -724,6 +721,7 @@ static long restore_user_regs(struct pt_regs *regs, */ if (copy_vsx_from_user(current, &sr->mc_vsregs)) return 1; + current->thread.used_vsr = true; } else if (current->thread.used_vsr) for (i = 0; i < 32 ; i++) current->thread.fp_state.fpr[i][TS_VSRLOWOFFSET] = 0; @@ -743,6 +741,7 @@ static long restore_user_regs(struct pt_regs *regs, if (__copy_from_user(current->thread.evr, &sr->mc_vregs, ELF_NEVRREG * sizeof(u32))) return 1; + current->thread.used_spe = true; } else if (current->thread.used_spe) memset(current->thread.evr, 0, ELF_NEVRREG * sizeof(u32)); @@ -793,33 +792,34 @@ static long restore_tm_user_regs(struct pt_regs *regs, regs->msr &= ~MSR_VEC; if (msr & MSR_VEC) { /* restore altivec registers from the stack */ - if (__copy_from_user(¤t->thread.vr_state, &sr->mc_vregs, + if (__copy_from_user(¤t->thread.ckvr_state, &sr->mc_vregs, sizeof(sr->mc_vregs)) || - __copy_from_user(¤t->thread.transact_vr, + __copy_from_user(¤t->thread.vr_state, &tm_sr->mc_vregs, sizeof(sr->mc_vregs))) return 1; + current->thread.used_vr = true; } else if (current->thread.used_vr) { memset(¤t->thread.vr_state, 0, ELF_NVRREG * sizeof(vector128)); - memset(¤t->thread.transact_vr, 0, + memset(¤t->thread.ckvr_state, 0, ELF_NVRREG * sizeof(vector128)); } /* Always get VRSAVE back */ - if (__get_user(current->thread.vrsave, + if (__get_user(current->thread.ckvrsave, (u32 __user *)&sr->mc_vregs[32]) || - __get_user(current->thread.transact_vrsave, + __get_user(current->thread.vrsave, (u32 __user *)&tm_sr->mc_vregs[32])) return 1; if (cpu_has_feature(CPU_FTR_ALTIVEC)) - mtspr(SPRN_VRSAVE, current->thread.vrsave); + mtspr(SPRN_VRSAVE, current->thread.ckvrsave); #endif /* CONFIG_ALTIVEC */ regs->msr &= ~(MSR_FP | MSR_FE0 | MSR_FE1); if (copy_fpr_from_user(current, &sr->mc_fregs) || - copy_transact_fpr_from_user(current, &tm_sr->mc_fregs)) + copy_ckfpr_from_user(current, &tm_sr->mc_fregs)) return 1; #ifdef CONFIG_VSX @@ -829,13 +829,14 @@ static long restore_tm_user_regs(struct pt_regs *regs, * Restore altivec registers from the stack to a local * buffer, then write this out to the thread_struct */ - if (copy_vsx_from_user(current, &sr->mc_vsregs) || - copy_transact_vsx_from_user(current, &tm_sr->mc_vsregs)) + if (copy_vsx_from_user(current, &tm_sr->mc_vsregs) || + copy_ckvsx_from_user(current, &sr->mc_vsregs)) return 1; + current->thread.used_vsr = true; } else if (current->thread.used_vsr) for (i = 0; i < 32 ; i++) { current->thread.fp_state.fpr[i][TS_VSRLOWOFFSET] = 0; - current->thread.transact_fp.fpr[i][TS_VSRLOWOFFSET] = 0; + current->thread.ckfp_state.fpr[i][TS_VSRLOWOFFSET] = 0; } #endif /* CONFIG_VSX */ @@ -848,6 +849,7 @@ static long restore_tm_user_regs(struct pt_regs *regs, if (__copy_from_user(current->thread.evr, &sr->mc_vregs, ELF_NEVRREG * sizeof(u32))) return 1; + current->thread.used_spe = true; } else if (current->thread.used_spe) memset(current->thread.evr, 0, ELF_NEVRREG * sizeof(u32)); @@ -877,13 +879,14 @@ static long restore_tm_user_regs(struct pt_regs *regs, tm_recheckpoint(¤t->thread, msr); /* This loads the speculative FP/VEC state, if used */ + msr_check_and_set(msr & (MSR_FP | MSR_VEC)); if (msr & MSR_FP) { - do_load_up_transact_fpu(¤t->thread); + load_fp_state(¤t->thread.fp_state); regs->msr |= (MSR_FP | current->thread.fpexc_mode); } #ifdef CONFIG_ALTIVEC if (msr & MSR_VEC) { - do_load_up_transact_altivec(¤t->thread); + load_vr_state(¤t->thread.vr_state); regs->msr |= MSR_VEC; } #endif @@ -971,7 +974,7 @@ int copy_siginfo_from_user32(siginfo_t *to, struct compat_siginfo __user *from) * (one which gets siginfo). */ int handle_rt_signal32(struct ksignal *ksig, sigset_t *oldset, - struct pt_regs *regs) + struct task_struct *tsk) { struct rt_sigframe __user *rt_sf; struct mcontext __user *frame; @@ -980,10 +983,13 @@ int handle_rt_signal32(struct ksignal *ksig, sigset_t *oldset, unsigned long newsp = 0; int sigret; unsigned long tramp; + struct pt_regs *regs = tsk->thread.regs; + + BUG_ON(tsk != current); /* Set up Signal Frame */ /* Put a Real Time Context onto stack */ - rt_sf = get_sigframe(ksig, get_tm_stackpointer(regs), sizeof(*rt_sf), 1); + rt_sf = get_sigframe(ksig, get_tm_stackpointer(tsk), sizeof(*rt_sf), 1); addr = rt_sf; if (unlikely(rt_sf == NULL)) goto badframe; @@ -1000,9 +1006,9 @@ int handle_rt_signal32(struct ksignal *ksig, sigset_t *oldset, /* Save user registers on the stack */ frame = &rt_sf->uc.uc_mcontext; addr = frame; - if (vdso32_rt_sigtramp && current->mm->context.vdso_base) { + if (vdso32_rt_sigtramp && tsk->mm->context.vdso_base) { sigret = 0; - tramp = current->mm->context.vdso_base + vdso32_rt_sigtramp; + tramp = tsk->mm->context.vdso_base + vdso32_rt_sigtramp; } else { sigret = __NR_rt_sigreturn; tramp = (unsigned long) frame->tramp; @@ -1029,7 +1035,7 @@ int handle_rt_signal32(struct ksignal *ksig, sigset_t *oldset, } regs->link = tramp; - current->thread.fp_state.fpscr = 0; /* turn off all fp exceptions */ + tsk->thread.fp_state.fpscr = 0; /* turn off all fp exceptions */ /* create a stack frame for the caller of the handler */ newsp = ((unsigned long)rt_sf) - (__SIGNAL_FRAMESIZE + 16); @@ -1054,7 +1060,7 @@ badframe: printk_ratelimited(KERN_INFO "%s[%d]: bad frame in handle_rt_signal32: " "%p nip %08lx lr %08lx\n", - current->comm, current->pid, + tsk->comm, tsk->pid, addr, regs->nip, regs->link); return 1; @@ -1226,7 +1232,21 @@ long sys_rt_sigreturn(int r3, int r4, int r5, int r6, int r7, int r8, (regs->gpr[1] + __SIGNAL_FRAMESIZE + 16); if (!access_ok(VERIFY_READ, rt_sf, sizeof(*rt_sf))) goto bad; + #ifdef CONFIG_PPC_TRANSACTIONAL_MEM + /* + * If there is a transactional state then throw it away. + * The purpose of a sigreturn is to destroy all traces of the + * signal frame, this includes any transactional state created + * within in. We only check for suspended as we can never be + * active in the kernel, we are active, there is nothing better to + * do than go ahead and Bad Thing later. + * The cause is not important as there will never be a + * recheckpoint so it's not user visible. + */ + if (MSR_TM_SUSPENDED(mfmsr())) + tm_reclaim_current(0); + if (__get_user(tmp, &rt_sf->uc.uc_link)) goto bad; uc_transact = (struct ucontext __user *)(uintptr_t)tmp; @@ -1396,7 +1416,8 @@ int sys_debug_setcontext(struct ucontext __user *ctx, /* * OK, we're invoking a handler */ -int handle_signal32(struct ksignal *ksig, sigset_t *oldset, struct pt_regs *regs) +int handle_signal32(struct ksignal *ksig, sigset_t *oldset, + struct task_struct *tsk) { struct sigcontext __user *sc; struct sigframe __user *frame; @@ -1404,9 +1425,12 @@ int handle_signal32(struct ksignal *ksig, sigset_t *oldset, struct pt_regs *regs unsigned long newsp = 0; int sigret; unsigned long tramp; + struct pt_regs *regs = tsk->thread.regs; + + BUG_ON(tsk != current); /* Set up Signal Frame */ - frame = get_sigframe(ksig, get_tm_stackpointer(regs), sizeof(*frame), 1); + frame = get_sigframe(ksig, get_tm_stackpointer(tsk), sizeof(*frame), 1); if (unlikely(frame == NULL)) goto badframe; sc = (struct sigcontext __user *) &frame->sctx; @@ -1425,9 +1449,9 @@ int handle_signal32(struct ksignal *ksig, sigset_t *oldset, struct pt_regs *regs || __put_user(ksig->sig, &sc->signal)) goto badframe; - if (vdso32_sigtramp && current->mm->context.vdso_base) { + if (vdso32_sigtramp && tsk->mm->context.vdso_base) { sigret = 0; - tramp = current->mm->context.vdso_base + vdso32_sigtramp; + tramp = tsk->mm->context.vdso_base + vdso32_sigtramp; } else { sigret = __NR_sigreturn; tramp = (unsigned long) frame->mctx.tramp; @@ -1449,7 +1473,7 @@ int handle_signal32(struct ksignal *ksig, sigset_t *oldset, struct pt_regs *regs regs->link = tramp; - current->thread.fp_state.fpscr = 0; /* turn off all fp exceptions */ + tsk->thread.fp_state.fpscr = 0; /* turn off all fp exceptions */ /* create a stack frame for the caller of the handler */ newsp = ((unsigned long)frame) - __SIGNAL_FRAMESIZE; @@ -1469,7 +1493,7 @@ badframe: printk_ratelimited(KERN_INFO "%s[%d]: bad frame in handle_signal32: " "%p nip %08lx lr %08lx\n", - current->comm, current->pid, + tsk->comm, tsk->pid, frame, regs->nip, regs->link); return 1; diff --git a/arch/powerpc/kernel/signal_64.c b/arch/powerpc/kernel/signal_64.c index 7e49984d4331..96698fdf93b4 100644 --- a/arch/powerpc/kernel/signal_64.c +++ b/arch/powerpc/kernel/signal_64.c @@ -35,6 +35,7 @@ #include <asm/vdso.h> #include <asm/switch_to.h> #include <asm/tm.h> +#include <asm/asm-prototypes.h> #include "signal.h" @@ -90,9 +91,9 @@ static elf_vrreg_t __user *sigcontext_vmx_regs(struct sigcontext __user *sc) * Set up the sigcontext for the signal frame. */ -static long setup_sigcontext(struct sigcontext __user *sc, struct pt_regs *regs, - int signr, sigset_t *set, unsigned long handler, - int ctx_has_vsx_region) +static long setup_sigcontext(struct sigcontext __user *sc, + struct task_struct *tsk, int signr, sigset_t *set, + unsigned long handler, int ctx_has_vsx_region) { /* When CONFIG_ALTIVEC is set, we _always_ setup v_regs even if the * process never used altivec yet (MSR_VEC is zero in pt_regs of @@ -106,17 +107,20 @@ static long setup_sigcontext(struct sigcontext __user *sc, struct pt_regs *regs, elf_vrreg_t __user *v_regs = sigcontext_vmx_regs(sc); unsigned long vrsave; #endif + struct pt_regs *regs = tsk->thread.regs; unsigned long msr = regs->msr; long err = 0; + BUG_ON(tsk != current); + #ifdef CONFIG_ALTIVEC err |= __put_user(v_regs, &sc->v_regs); /* save altivec registers */ - if (current->thread.used_vr) { - flush_altivec_to_thread(current); + if (tsk->thread.used_vr) { + flush_altivec_to_thread(tsk); /* Copy 33 vec registers (vr0..31 and vscr) to the stack */ - err |= __copy_to_user(v_regs, ¤t->thread.vr_state, + err |= __copy_to_user(v_regs, &tsk->thread.vr_state, 33 * sizeof(vector128)); /* set MSR_VEC in the MSR value in the frame to indicate that sc->v_reg) * contains valid data. @@ -129,16 +133,16 @@ static long setup_sigcontext(struct sigcontext __user *sc, struct pt_regs *regs, vrsave = 0; if (cpu_has_feature(CPU_FTR_ALTIVEC)) { vrsave = mfspr(SPRN_VRSAVE); - current->thread.vrsave = vrsave; + tsk->thread.vrsave = vrsave; } err |= __put_user(vrsave, (u32 __user *)&v_regs[33]); #else /* CONFIG_ALTIVEC */ err |= __put_user(0, &sc->v_regs); #endif /* CONFIG_ALTIVEC */ - flush_fp_to_thread(current); + flush_fp_to_thread(tsk); /* copy fpr regs and fpscr */ - err |= copy_fpr_to_user(&sc->fp_regs, current); + err |= copy_fpr_to_user(&sc->fp_regs, tsk); /* * Clear the MSR VSX bit to indicate there is no valid state attached @@ -151,10 +155,10 @@ static long setup_sigcontext(struct sigcontext __user *sc, struct pt_regs *regs, * then out to userspace. Update v_regs to point after the * VMX data. */ - if (current->thread.used_vsr && ctx_has_vsx_region) { - flush_vsx_to_thread(current); + if (tsk->thread.used_vsr && ctx_has_vsx_region) { + flush_vsx_to_thread(tsk); v_regs += ELF_NVRREG; - err |= copy_vsx_to_user(v_regs, current); + err |= copy_vsx_to_user(v_regs, tsk); /* set MSR_VSX in the MSR value in the frame to * indicate that sc->vs_reg) contains valid data. */ @@ -187,7 +191,7 @@ static long setup_sigcontext(struct sigcontext __user *sc, struct pt_regs *regs, */ static long setup_tm_sigcontexts(struct sigcontext __user *sc, struct sigcontext __user *tm_sc, - struct pt_regs *regs, + struct task_struct *tsk, int signr, sigset_t *set, unsigned long handler) { /* When CONFIG_ALTIVEC is set, we _always_ setup v_regs even if the @@ -202,9 +206,12 @@ static long setup_tm_sigcontexts(struct sigcontext __user *sc, elf_vrreg_t __user *v_regs = sigcontext_vmx_regs(sc); elf_vrreg_t __user *tm_v_regs = sigcontext_vmx_regs(tm_sc); #endif - unsigned long msr = regs->msr; + struct pt_regs *regs = tsk->thread.regs; + unsigned long msr = tsk->thread.ckpt_regs.msr; long err = 0; + BUG_ON(tsk != current); + BUG_ON(!MSR_TM_ACTIVE(regs->msr)); /* Remove TM bits from thread's MSR. The MSR in the sigcontext @@ -214,28 +221,25 @@ static long setup_tm_sigcontexts(struct sigcontext __user *sc, */ regs->msr &= ~MSR_TS_MASK; - flush_fp_to_thread(current); - #ifdef CONFIG_ALTIVEC err |= __put_user(v_regs, &sc->v_regs); err |= __put_user(tm_v_regs, &tm_sc->v_regs); /* save altivec registers */ - if (current->thread.used_vr) { - flush_altivec_to_thread(current); + if (tsk->thread.used_vr) { /* Copy 33 vec registers (vr0..31 and vscr) to the stack */ - err |= __copy_to_user(v_regs, ¤t->thread.vr_state, + err |= __copy_to_user(v_regs, &tsk->thread.ckvr_state, 33 * sizeof(vector128)); /* If VEC was enabled there are transactional VRs valid too, * else they're a copy of the checkpointed VRs. */ if (msr & MSR_VEC) err |= __copy_to_user(tm_v_regs, - ¤t->thread.transact_vr, + &tsk->thread.vr_state, 33 * sizeof(vector128)); else err |= __copy_to_user(tm_v_regs, - ¤t->thread.vr_state, + &tsk->thread.ckvr_state, 33 * sizeof(vector128)); /* set MSR_VEC in the MSR value in the frame to indicate @@ -247,13 +251,13 @@ static long setup_tm_sigcontexts(struct sigcontext __user *sc, * use altivec. */ if (cpu_has_feature(CPU_FTR_ALTIVEC)) - current->thread.vrsave = mfspr(SPRN_VRSAVE); - err |= __put_user(current->thread.vrsave, (u32 __user *)&v_regs[33]); + tsk->thread.ckvrsave = mfspr(SPRN_VRSAVE); + err |= __put_user(tsk->thread.ckvrsave, (u32 __user *)&v_regs[33]); if (msr & MSR_VEC) - err |= __put_user(current->thread.transact_vrsave, + err |= __put_user(tsk->thread.vrsave, (u32 __user *)&tm_v_regs[33]); else - err |= __put_user(current->thread.vrsave, + err |= __put_user(tsk->thread.ckvrsave, (u32 __user *)&tm_v_regs[33]); #else /* CONFIG_ALTIVEC */ @@ -262,11 +266,11 @@ static long setup_tm_sigcontexts(struct sigcontext __user *sc, #endif /* CONFIG_ALTIVEC */ /* copy fpr regs and fpscr */ - err |= copy_fpr_to_user(&sc->fp_regs, current); + err |= copy_ckfpr_to_user(&sc->fp_regs, tsk); if (msr & MSR_FP) - err |= copy_transact_fpr_to_user(&tm_sc->fp_regs, current); + err |= copy_fpr_to_user(&tm_sc->fp_regs, tsk); else - err |= copy_fpr_to_user(&tm_sc->fp_regs, current); + err |= copy_ckfpr_to_user(&tm_sc->fp_regs, tsk); #ifdef CONFIG_VSX /* @@ -274,17 +278,16 @@ static long setup_tm_sigcontexts(struct sigcontext __user *sc, * then out to userspace. Update v_regs to point after the * VMX data. */ - if (current->thread.used_vsr) { - flush_vsx_to_thread(current); + if (tsk->thread.used_vsr) { v_regs += ELF_NVRREG; tm_v_regs += ELF_NVRREG; - err |= copy_vsx_to_user(v_regs, current); + err |= copy_ckvsx_to_user(v_regs, tsk); if (msr & MSR_VSX) - err |= copy_transact_vsx_to_user(tm_v_regs, current); + err |= copy_vsx_to_user(tm_v_regs, tsk); else - err |= copy_vsx_to_user(tm_v_regs, current); + err |= copy_ckvsx_to_user(tm_v_regs, tsk); /* set MSR_VSX in the MSR value in the frame to * indicate that sc->vs_reg) contains valid data. @@ -298,7 +301,7 @@ static long setup_tm_sigcontexts(struct sigcontext __user *sc, WARN_ON(!FULL_REGS(regs)); err |= __copy_to_user(&tm_sc->gp_regs, regs, GP_REGS_SIZE); err |= __copy_to_user(&sc->gp_regs, - ¤t->thread.ckpt_regs, GP_REGS_SIZE); + &tsk->thread.ckpt_regs, GP_REGS_SIZE); err |= __put_user(msr, &tm_sc->gp_regs[PT_MSR]); err |= __put_user(msr, &sc->gp_regs[PT_MSR]); err |= __put_user(signr, &sc->signal); @@ -314,7 +317,7 @@ static long setup_tm_sigcontexts(struct sigcontext __user *sc, * Restore the sigcontext from the signal frame. */ -static long restore_sigcontext(struct pt_regs *regs, sigset_t *set, int sig, +static long restore_sigcontext(struct task_struct *tsk, sigset_t *set, int sig, struct sigcontext __user *sc) { #ifdef CONFIG_ALTIVEC @@ -323,10 +326,13 @@ static long restore_sigcontext(struct pt_regs *regs, sigset_t *set, int sig, unsigned long err = 0; unsigned long save_r13 = 0; unsigned long msr; + struct pt_regs *regs = tsk->thread.regs; #ifdef CONFIG_VSX int i; #endif + BUG_ON(tsk != current); + /* If this is not a signal return, we preserve the TLS in r13 */ if (!sig) save_r13 = regs->gpr[13]; @@ -356,7 +362,7 @@ static long restore_sigcontext(struct pt_regs *regs, sigset_t *set, int sig, /* * Force reload of FP/VEC. - * This has to be done before copying stuff into current->thread.fpr/vr + * This has to be done before copying stuff into tsk->thread.fpr/vr * for the reasons explained in the previous comment. */ regs->msr &= ~(MSR_FP | MSR_FE0 | MSR_FE1 | MSR_VEC | MSR_VSX); @@ -368,21 +374,23 @@ static long restore_sigcontext(struct pt_regs *regs, sigset_t *set, int sig, if (v_regs && !access_ok(VERIFY_READ, v_regs, 34 * sizeof(vector128))) return -EFAULT; /* Copy 33 vec registers (vr0..31 and vscr) from the stack */ - if (v_regs != NULL && (msr & MSR_VEC) != 0) - err |= __copy_from_user(¤t->thread.vr_state, v_regs, + if (v_regs != NULL && (msr & MSR_VEC) != 0) { + err |= __copy_from_user(&tsk->thread.vr_state, v_regs, 33 * sizeof(vector128)); - else if (current->thread.used_vr) - memset(¤t->thread.vr_state, 0, 33 * sizeof(vector128)); + tsk->thread.used_vr = true; + } else if (tsk->thread.used_vr) { + memset(&tsk->thread.vr_state, 0, 33 * sizeof(vector128)); + } /* Always get VRSAVE back */ if (v_regs != NULL) - err |= __get_user(current->thread.vrsave, (u32 __user *)&v_regs[33]); + err |= __get_user(tsk->thread.vrsave, (u32 __user *)&v_regs[33]); else - current->thread.vrsave = 0; + tsk->thread.vrsave = 0; if (cpu_has_feature(CPU_FTR_ALTIVEC)) - mtspr(SPRN_VRSAVE, current->thread.vrsave); + mtspr(SPRN_VRSAVE, tsk->thread.vrsave); #endif /* CONFIG_ALTIVEC */ /* restore floating point */ - err |= copy_fpr_from_user(current, &sc->fp_regs); + err |= copy_fpr_from_user(tsk, &sc->fp_regs); #ifdef CONFIG_VSX /* * Get additional VSX data. Update v_regs to point after the @@ -390,11 +398,13 @@ static long restore_sigcontext(struct pt_regs *regs, sigset_t *set, int sig, * buffer for formatting, then into the taskstruct. */ v_regs += ELF_NVRREG; - if ((msr & MSR_VSX) != 0) - err |= copy_vsx_from_user(current, v_regs); - else + if ((msr & MSR_VSX) != 0) { + err |= copy_vsx_from_user(tsk, v_regs); + tsk->thread.used_vsr = true; + } else { for (i = 0; i < 32 ; i++) - current->thread.fp_state.fpr[i][TS_VSRLOWOFFSET] = 0; + tsk->thread.fp_state.fpr[i][TS_VSRLOWOFFSET] = 0; + } #endif return err; } @@ -404,7 +414,7 @@ static long restore_sigcontext(struct pt_regs *regs, sigset_t *set, int sig, * Restore the two sigcontexts from the frame of a transactional processes. */ -static long restore_tm_sigcontexts(struct pt_regs *regs, +static long restore_tm_sigcontexts(struct task_struct *tsk, struct sigcontext __user *sc, struct sigcontext __user *tm_sc) { @@ -413,12 +423,16 @@ static long restore_tm_sigcontexts(struct pt_regs *regs, #endif unsigned long err = 0; unsigned long msr; + struct pt_regs *regs = tsk->thread.regs; #ifdef CONFIG_VSX int i; #endif + + BUG_ON(tsk != current); + /* copy the GPRs */ err |= __copy_from_user(regs->gpr, tm_sc->gp_regs, sizeof(regs->gpr)); - err |= __copy_from_user(¤t->thread.ckpt_regs, sc->gp_regs, + err |= __copy_from_user(&tsk->thread.ckpt_regs, sc->gp_regs, sizeof(regs->gpr)); /* @@ -430,7 +444,7 @@ static long restore_tm_sigcontexts(struct pt_regs *regs, * we don't need to re-copy them here. */ err |= __get_user(regs->nip, &tm_sc->gp_regs[PT_NIP]); - err |= __get_user(current->thread.tm_tfhar, &sc->gp_regs[PT_NIP]); + err |= __get_user(tsk->thread.tm_tfhar, &sc->gp_regs[PT_NIP]); /* get MSR separately, transfer the LE bit if doing signal return */ err |= __get_user(msr, &sc->gp_regs[PT_MSR]); @@ -449,13 +463,13 @@ static long restore_tm_sigcontexts(struct pt_regs *regs, err |= __get_user(regs->link, &tm_sc->gp_regs[PT_LNK]); err |= __get_user(regs->xer, &tm_sc->gp_regs[PT_XER]); err |= __get_user(regs->ccr, &tm_sc->gp_regs[PT_CCR]); - err |= __get_user(current->thread.ckpt_regs.ctr, + err |= __get_user(tsk->thread.ckpt_regs.ctr, &sc->gp_regs[PT_CTR]); - err |= __get_user(current->thread.ckpt_regs.link, + err |= __get_user(tsk->thread.ckpt_regs.link, &sc->gp_regs[PT_LNK]); - err |= __get_user(current->thread.ckpt_regs.xer, + err |= __get_user(tsk->thread.ckpt_regs.xer, &sc->gp_regs[PT_XER]); - err |= __get_user(current->thread.ckpt_regs.ccr, + err |= __get_user(tsk->thread.ckpt_regs.ccr, &sc->gp_regs[PT_CCR]); /* These regs are not checkpointed; they can go in 'regs'. */ @@ -466,7 +480,7 @@ static long restore_tm_sigcontexts(struct pt_regs *regs, /* * Force reload of FP/VEC. - * This has to be done before copying stuff into current->thread.fpr/vr + * This has to be done before copying stuff into tsk->thread.fpr/vr * for the reasons explained in the previous comment. */ regs->msr &= ~(MSR_FP | MSR_FE0 | MSR_FE1 | MSR_VEC | MSR_VSX); @@ -483,32 +497,33 @@ static long restore_tm_sigcontexts(struct pt_regs *regs, return -EFAULT; /* Copy 33 vec registers (vr0..31 and vscr) from the stack */ if (v_regs != NULL && tm_v_regs != NULL && (msr & MSR_VEC) != 0) { - err |= __copy_from_user(¤t->thread.vr_state, v_regs, + err |= __copy_from_user(&tsk->thread.ckvr_state, v_regs, 33 * sizeof(vector128)); - err |= __copy_from_user(¤t->thread.transact_vr, tm_v_regs, + err |= __copy_from_user(&tsk->thread.vr_state, tm_v_regs, 33 * sizeof(vector128)); + current->thread.used_vr = true; } - else if (current->thread.used_vr) { - memset(¤t->thread.vr_state, 0, 33 * sizeof(vector128)); - memset(¤t->thread.transact_vr, 0, 33 * sizeof(vector128)); + else if (tsk->thread.used_vr) { + memset(&tsk->thread.vr_state, 0, 33 * sizeof(vector128)); + memset(&tsk->thread.ckvr_state, 0, 33 * sizeof(vector128)); } /* Always get VRSAVE back */ if (v_regs != NULL && tm_v_regs != NULL) { - err |= __get_user(current->thread.vrsave, + err |= __get_user(tsk->thread.ckvrsave, (u32 __user *)&v_regs[33]); - err |= __get_user(current->thread.transact_vrsave, + err |= __get_user(tsk->thread.vrsave, (u32 __user *)&tm_v_regs[33]); } else { - current->thread.vrsave = 0; - current->thread.transact_vrsave = 0; + tsk->thread.vrsave = 0; + tsk->thread.ckvrsave = 0; } if (cpu_has_feature(CPU_FTR_ALTIVEC)) - mtspr(SPRN_VRSAVE, current->thread.vrsave); + mtspr(SPRN_VRSAVE, tsk->thread.vrsave); #endif /* CONFIG_ALTIVEC */ /* restore floating point */ - err |= copy_fpr_from_user(current, &sc->fp_regs); - err |= copy_transact_fpr_from_user(current, &tm_sc->fp_regs); + err |= copy_fpr_from_user(tsk, &tm_sc->fp_regs); + err |= copy_ckfpr_from_user(tsk, &sc->fp_regs); #ifdef CONFIG_VSX /* * Get additional VSX data. Update v_regs to point after the @@ -518,32 +533,31 @@ static long restore_tm_sigcontexts(struct pt_regs *regs, if (v_regs && ((msr & MSR_VSX) != 0)) { v_regs += ELF_NVRREG; tm_v_regs += ELF_NVRREG; - err |= copy_vsx_from_user(current, v_regs); - err |= copy_transact_vsx_from_user(current, tm_v_regs); + err |= copy_vsx_from_user(tsk, tm_v_regs); + err |= copy_ckvsx_from_user(tsk, v_regs); + tsk->thread.used_vsr = true; } else { for (i = 0; i < 32 ; i++) { - current->thread.fp_state.fpr[i][TS_VSRLOWOFFSET] = 0; - current->thread.transact_fp.fpr[i][TS_VSRLOWOFFSET] = 0; + tsk->thread.fp_state.fpr[i][TS_VSRLOWOFFSET] = 0; + tsk->thread.ckfp_state.fpr[i][TS_VSRLOWOFFSET] = 0; } } #endif tm_enable(); /* Make sure the transaction is marked as failed */ - current->thread.tm_texasr |= TEXASR_FS; + tsk->thread.tm_texasr |= TEXASR_FS; /* This loads the checkpointed FP/VEC state, if used */ - tm_recheckpoint(¤t->thread, msr); + tm_recheckpoint(&tsk->thread, msr); - /* This loads the speculative FP/VEC state, if used */ + msr_check_and_set(msr & (MSR_FP | MSR_VEC)); if (msr & MSR_FP) { - do_load_up_transact_fpu(¤t->thread); - regs->msr |= (MSR_FP | current->thread.fpexc_mode); + load_fp_state(&tsk->thread.fp_state); + regs->msr |= (MSR_FP | tsk->thread.fpexc_mode); } -#ifdef CONFIG_ALTIVEC if (msr & MSR_VEC) { - do_load_up_transact_altivec(¤t->thread); + load_vr_state(&tsk->thread.vr_state); regs->msr |= MSR_VEC; } -#endif return err; } @@ -594,6 +608,8 @@ int sys_swapcontext(struct ucontext __user *old_ctx, unsigned long new_msr = 0; int ctx_has_vsx_region = 0; + BUG_ON(regs != current->thread.regs); + if (new_ctx && get_user(new_msr, &new_ctx->uc_mcontext.gp_regs[PT_MSR])) return -EFAULT; @@ -616,7 +632,7 @@ int sys_swapcontext(struct ucontext __user *old_ctx, if (old_ctx != NULL) { if (!access_ok(VERIFY_WRITE, old_ctx, ctx_size) - || setup_sigcontext(&old_ctx->uc_mcontext, regs, 0, NULL, 0, + || setup_sigcontext(&old_ctx->uc_mcontext, current, 0, NULL, 0, ctx_has_vsx_region) || __copy_to_user(&old_ctx->uc_sigmask, ¤t->blocked, sizeof(sigset_t))) @@ -644,7 +660,7 @@ int sys_swapcontext(struct ucontext __user *old_ctx, if (__copy_from_user(&set, &new_ctx->uc_sigmask, sizeof(set))) do_exit(SIGSEGV); set_current_blocked(&set); - if (restore_sigcontext(regs, NULL, 0, &new_ctx->uc_mcontext)) + if (restore_sigcontext(current, NULL, 0, &new_ctx->uc_mcontext)) do_exit(SIGSEGV); /* This returns like rt_sigreturn */ @@ -667,6 +683,8 @@ int sys_rt_sigreturn(unsigned long r3, unsigned long r4, unsigned long r5, unsigned long msr; #endif + BUG_ON(current->thread.regs != regs); + /* Always make any pending restarted system calls return -EINTR */ current->restart_block.fn = do_no_restart_syscall; @@ -676,7 +694,21 @@ int sys_rt_sigreturn(unsigned long r3, unsigned long r4, unsigned long r5, if (__copy_from_user(&set, &uc->uc_sigmask, sizeof(set))) goto badframe; set_current_blocked(&set); + #ifdef CONFIG_PPC_TRANSACTIONAL_MEM + /* + * If there is a transactional state then throw it away. + * The purpose of a sigreturn is to destroy all traces of the + * signal frame, this includes any transactional state created + * within in. We only check for suspended as we can never be + * active in the kernel, we are active, there is nothing better to + * do than go ahead and Bad Thing later. + * The cause is not important as there will never be a + * recheckpoint so it's not user visible. + */ + if (MSR_TM_SUSPENDED(mfmsr())) + tm_reclaim_current(0); + if (__get_user(msr, &uc->uc_mcontext.gp_regs[PT_MSR])) goto badframe; if (MSR_TM_ACTIVE(msr)) { @@ -684,14 +716,14 @@ int sys_rt_sigreturn(unsigned long r3, unsigned long r4, unsigned long r5, struct ucontext __user *uc_transact; if (__get_user(uc_transact, &uc->uc_link)) goto badframe; - if (restore_tm_sigcontexts(regs, &uc->uc_mcontext, + if (restore_tm_sigcontexts(current, &uc->uc_mcontext, &uc_transact->uc_mcontext)) goto badframe; } else /* Fall through, for non-TM restore */ #endif - if (restore_sigcontext(regs, NULL, 1, &uc->uc_mcontext)) + if (restore_sigcontext(current, NULL, 1, &uc->uc_mcontext)) goto badframe; if (restore_altstack(&uc->uc_stack)) @@ -710,13 +742,17 @@ badframe: return 0; } -int handle_rt_signal64(struct ksignal *ksig, sigset_t *set, struct pt_regs *regs) +int handle_rt_signal64(struct ksignal *ksig, sigset_t *set, + struct task_struct *tsk) { struct rt_sigframe __user *frame; unsigned long newsp = 0; long err = 0; + struct pt_regs *regs = tsk->thread.regs; + + BUG_ON(tsk != current); - frame = get_sigframe(ksig, get_tm_stackpointer(regs), sizeof(*frame), 0); + frame = get_sigframe(ksig, get_tm_stackpointer(tsk), sizeof(*frame), 0); if (unlikely(frame == NULL)) goto badframe; @@ -737,14 +773,13 @@ int handle_rt_signal64(struct ksignal *ksig, sigset_t *set, struct pt_regs *regs err |= __put_user(&frame->uc_transact, &frame->uc.uc_link); err |= setup_tm_sigcontexts(&frame->uc.uc_mcontext, &frame->uc_transact.uc_mcontext, - regs, ksig->sig, - NULL, + tsk, ksig->sig, NULL, (unsigned long)ksig->ka.sa.sa_handler); } else #endif { err |= __put_user(0, &frame->uc.uc_link); - err |= setup_sigcontext(&frame->uc.uc_mcontext, regs, ksig->sig, + err |= setup_sigcontext(&frame->uc.uc_mcontext, tsk, ksig->sig, NULL, (unsigned long)ksig->ka.sa.sa_handler, 1); } @@ -753,11 +788,11 @@ int handle_rt_signal64(struct ksignal *ksig, sigset_t *set, struct pt_regs *regs goto badframe; /* Make sure signal handler doesn't get spurious FP exceptions */ - current->thread.fp_state.fpscr = 0; + tsk->thread.fp_state.fpscr = 0; /* Set up to return from userspace. */ - if (vdso64_rt_sigtramp && current->mm->context.vdso_base) { - regs->link = current->mm->context.vdso_base + vdso64_rt_sigtramp; + if (vdso64_rt_sigtramp && tsk->mm->context.vdso_base) { + regs->link = tsk->mm->context.vdso_base + vdso64_rt_sigtramp; } else { err |= setup_trampoline(__NR_rt_sigreturn, &frame->tramp[0]); if (err) @@ -807,7 +842,7 @@ int handle_rt_signal64(struct ksignal *ksig, sigset_t *set, struct pt_regs *regs badframe: if (show_unhandled_signals) printk_ratelimited(regs->msr & MSR_64BIT ? fmt64 : fmt32, - current->comm, current->pid, "setup_rt_frame", + tsk->comm, tsk->pid, "setup_rt_frame", (long)frame, regs->nip, regs->link); return 1; diff --git a/arch/powerpc/kernel/smp.c b/arch/powerpc/kernel/smp.c index 25a39052bf6b..9c6f3fd58059 100644 --- a/arch/powerpc/kernel/smp.c +++ b/arch/powerpc/kernel/smp.c @@ -830,7 +830,7 @@ int __cpu_disable(void) /* Update sibling maps */ base = cpu_first_thread_sibling(cpu); - for (i = 0; i < threads_per_core; i++) { + for (i = 0; i < threads_per_core && base + i < nr_cpu_ids; i++) { cpumask_clear_cpu(cpu, cpu_sibling_mask(base + i)); cpumask_clear_cpu(base + i, cpu_sibling_mask(cpu)); cpumask_clear_cpu(cpu, cpu_core_mask(base + i)); diff --git a/arch/powerpc/kernel/syscalls.c b/arch/powerpc/kernel/syscalls.c index 5fa92706444b..644cce3d8dce 100644 --- a/arch/powerpc/kernel/syscalls.c +++ b/arch/powerpc/kernel/syscalls.c @@ -40,6 +40,7 @@ #include <asm/syscalls.h> #include <asm/time.h> #include <asm/unistd.h> +#include <asm/asm-prototypes.h> static inline unsigned long do_mmap2(unsigned long addr, size_t len, unsigned long prot, unsigned long flags, diff --git a/arch/powerpc/kernel/time.c b/arch/powerpc/kernel/time.c index 3efbedefba6a..67859b7d1c97 100644 --- a/arch/powerpc/kernel/time.c +++ b/arch/powerpc/kernel/time.c @@ -73,6 +73,7 @@ #include <asm/vdso_datapage.h> #include <asm/firmware.h> #include <asm/cputime.h> +#include <asm/asm-prototypes.h> /* powerpc clocksource/clockevent code */ diff --git a/arch/powerpc/kernel/tm.S b/arch/powerpc/kernel/tm.S index 298afcf3bf2a..3a2d04134da9 100644 --- a/arch/powerpc/kernel/tm.S +++ b/arch/powerpc/kernel/tm.S @@ -108,6 +108,7 @@ _GLOBAL(tm_reclaim) /* We've a struct pt_regs at [r1+STACK_FRAME_OVERHEAD]. */ std r3, STK_PARAM(R3)(r1) + std r4, STK_PARAM(R4)(r1) SAVE_NVGPRS(r1) /* We need to setup MSR for VSX register save instructions. */ @@ -126,43 +127,6 @@ _GLOBAL(tm_reclaim) mtmsrd r15 std r14, TM_FRAME_L0(r1) - /* Stash the stack pointer away for use after reclaim */ - std r1, PACAR1(r13) - - /* ******************** FPR/VR/VSRs ************ - * Before reclaiming, capture the current/transactional FPR/VR - * versions /if used/. - * - * (If VSX used, FP and VMX are implied. Or, we don't need to look - * at MSR.VSX as copying FP regs if .FP, vector regs if .VMX covers it.) - * - * We're passed the thread's MSR as parameter 2. - * - * We enabled VEC/FP/VSX in the msr above, so we can execute these - * instructions! - */ - andis. r0, r4, MSR_VEC@h - beq dont_backup_vec - - addi r7, r3, THREAD_TRANSACT_VRSTATE - SAVE_32VRS(0, r6, r7) /* r6 scratch, r7 transact vr state */ - mfvscr v0 - li r6, VRSTATE_VSCR - stvx v0, r7, r6 -dont_backup_vec: - mfspr r0, SPRN_VRSAVE - std r0, THREAD_TRANSACT_VRSAVE(r3) - - andi. r0, r4, MSR_FP - beq dont_backup_fp - - addi r7, r3, THREAD_TRANSACT_FPSTATE - SAVE_32FPRS_VSRS(0, R6, R7) /* r6 scratch, r7 transact fp state */ - - mffs fr0 - stfd fr0,FPSTATE_FPSCR(r7) - -dont_backup_fp: /* Do sanity check on MSR to make sure we are suspended */ li r7, (MSR_TS_S)@higher srdi r6, r14, 32 @@ -170,6 +134,9 @@ dont_backup_fp: 1: tdeqi r6, 0 EMIT_BUG_ENTRY 1b,__FILE__,__LINE__,0 + /* Stash the stack pointer away for use after reclaim */ + std r1, PACAR1(r13) + /* Clear MSR RI since we are about to change r1, EE is already off. */ li r4, 0 mtmsrd r4, 1 @@ -273,6 +240,43 @@ dont_backup_fp: * MSR. */ + + /* ******************** FPR/VR/VSRs ************ + * After reclaiming, capture the checkpointed FPRs/VRs /if used/. + * + * (If VSX used, FP and VMX are implied. Or, we don't need to look + * at MSR.VSX as copying FP regs if .FP, vector regs if .VMX covers it.) + * + * We're passed the thread's MSR as the second parameter + * + * We enabled VEC/FP/VSX in the msr above, so we can execute these + * instructions! + */ + ld r4, STK_PARAM(R4)(r1) /* Second parameter, MSR * */ + mr r3, r12 + andis. r0, r4, MSR_VEC@h + beq dont_backup_vec + + addi r7, r3, THREAD_CKVRSTATE + SAVE_32VRS(0, r6, r7) /* r6 scratch, r7 transact vr state */ + mfvscr v0 + li r6, VRSTATE_VSCR + stvx v0, r7, r6 +dont_backup_vec: + mfspr r0, SPRN_VRSAVE + std r0, THREAD_CKVRSAVE(r3) + + andi. r0, r4, MSR_FP + beq dont_backup_fp + + addi r7, r3, THREAD_CKFPSTATE + SAVE_32FPRS_VSRS(0, R6, R7) /* r6 scratch, r7 transact fp state */ + + mffs fr0 + stfd fr0,FPSTATE_FPSCR(r7) + +dont_backup_fp: + /* TM regs, incl TEXASR -- these live in thread_struct. Note they've * been updated by the treclaim, to explain to userland the failure * cause (aborted). @@ -288,6 +292,7 @@ dont_backup_fp: /* Restore original MSR/IRQ state & clear TM mode */ ld r14, TM_FRAME_L0(r1) /* Orig MSR */ + li r15, 0 rldimi r14, r15, MSR_TS_LG, (63-MSR_TS_LG)-1 mtmsrd r14 @@ -356,28 +361,29 @@ _GLOBAL(__tm_recheckpoint) mtmsr r5 #ifdef CONFIG_ALTIVEC - /* FP and VEC registers: These are recheckpointed from thread.fpr[] - * and thread.vr[] respectively. The thread.transact_fpr[] version - * is more modern, and will be loaded subsequently by any FPUnavailable - * trap. + /* + * FP and VEC registers: These are recheckpointed from + * thread.ckfp_state and thread.ckvr_state respectively. The + * thread.fp_state[] version holds the 'live' (transactional) + * and will be loaded subsequently by any FPUnavailable trap. */ andis. r0, r4, MSR_VEC@h beq dont_restore_vec - addi r8, r3, THREAD_VRSTATE + addi r8, r3, THREAD_CKVRSTATE li r5, VRSTATE_VSCR lvx v0, r8, r5 mtvscr v0 REST_32VRS(0, r5, r8) /* r5 scratch, r8 ptr */ dont_restore_vec: - ld r5, THREAD_VRSAVE(r3) + ld r5, THREAD_CKVRSAVE(r3) mtspr SPRN_VRSAVE, r5 #endif andi. r0, r4, MSR_FP beq dont_restore_fp - addi r8, r3, THREAD_FPSTATE + addi r8, r3, THREAD_CKFPSTATE lfd fr0, FPSTATE_FPSCR(r8) MTFSF_L(fr0) REST_32FPRS_VSRS(0, R4, R8) diff --git a/arch/powerpc/kernel/traps.c b/arch/powerpc/kernel/traps.c index 2cb589264cb7..a1f8f5641e9e 100644 --- a/arch/powerpc/kernel/traps.c +++ b/arch/powerpc/kernel/traps.c @@ -25,7 +25,8 @@ #include <linux/user.h> #include <linux/interrupt.h> #include <linux/init.h> -#include <linux/module.h> +#include <linux/extable.h> +#include <linux/module.h> /* print_modules */ #include <linux/prctl.h> #include <linux/delay.h> #include <linux/kprobes.h> @@ -116,7 +117,7 @@ static int die_owner = -1; static unsigned int die_nest_count; static int die_counter; -static unsigned __kprobes long oops_begin(struct pt_regs *regs) +static unsigned long oops_begin(struct pt_regs *regs) { int cpu; unsigned long flags; @@ -143,8 +144,9 @@ static unsigned __kprobes long oops_begin(struct pt_regs *regs) pmac_backlight_unblank(); return flags; } +NOKPROBE_SYMBOL(oops_begin); -static void __kprobes oops_end(unsigned long flags, struct pt_regs *regs, +static void oops_end(unsigned long flags, struct pt_regs *regs, int signr) { bust_spinlocks(0); @@ -195,8 +197,9 @@ static void __kprobes oops_end(unsigned long flags, struct pt_regs *regs, panic("Fatal exception"); do_exit(signr); } +NOKPROBE_SYMBOL(oops_end); -static int __kprobes __die(const char *str, struct pt_regs *regs, long err) +static int __die(const char *str, struct pt_regs *regs, long err) { printk("Oops: %s, sig: %ld [#%d]\n", str, err, ++die_counter); #ifdef CONFIG_PREEMPT @@ -220,6 +223,7 @@ static int __kprobes __die(const char *str, struct pt_regs *regs, long err) return 0; } +NOKPROBE_SYMBOL(__die); void die(const char *str, struct pt_regs *regs, long err) { @@ -801,7 +805,7 @@ void RunModeException(struct pt_regs *regs) _exception(SIGTRAP, regs, 0, 0); } -void __kprobes single_step_exception(struct pt_regs *regs) +void single_step_exception(struct pt_regs *regs) { enum ctx_state prev_state = exception_enter(); @@ -818,6 +822,7 @@ void __kprobes single_step_exception(struct pt_regs *regs) bail: exception_exit(prev_state); } +NOKPROBE_SYMBOL(single_step_exception); /* * After we have successfully emulated an instruction, we have to @@ -1139,7 +1144,7 @@ static int emulate_math(struct pt_regs *regs) static inline int emulate_math(struct pt_regs *regs) { return -1; } #endif -void __kprobes program_check_exception(struct pt_regs *regs) +void program_check_exception(struct pt_regs *regs) { enum ctx_state prev_state = exception_enter(); unsigned int reason = get_reason(regs); @@ -1259,16 +1264,18 @@ sigill: bail: exception_exit(prev_state); } +NOKPROBE_SYMBOL(program_check_exception); /* * This occurs when running in hypervisor mode on POWER6 or later * and an illegal instruction is encountered. */ -void __kprobes emulation_assist_interrupt(struct pt_regs *regs) +void emulation_assist_interrupt(struct pt_regs *regs) { regs->msr |= REASON_ILLEGAL; program_check_exception(regs); } +NOKPROBE_SYMBOL(emulation_assist_interrupt); void alignment_exception(struct pt_regs *regs) { @@ -1309,6 +1316,18 @@ bail: exception_exit(prev_state); } +void slb_miss_bad_addr(struct pt_regs *regs) +{ + enum ctx_state prev_state = exception_enter(); + + if (user_mode(regs)) + _exception(SIGSEGV, regs, SEGV_BNDERR, regs->dar); + else + bad_page_fault(regs, regs->dar, SIGSEGV); + + exception_exit(prev_state); +} + void StackOverflow(struct pt_regs *regs) { printk(KERN_CRIT "Kernel stack overflow in process %p, r1=%lx\n", @@ -1371,6 +1390,22 @@ void vsx_unavailable_exception(struct pt_regs *regs) } #ifdef CONFIG_PPC64 +static void tm_unavailable(struct pt_regs *regs) +{ +#ifdef CONFIG_PPC_TRANSACTIONAL_MEM + if (user_mode(regs)) { + current->thread.load_tm++; + regs->msr |= MSR_TM; + tm_enable(); + tm_restore_sprs(¤t->thread); + return; + } +#endif + pr_emerg("Unrecoverable TM Unavailable Exception " + "%lx at %lx\n", regs->trap, regs->nip); + die("Unrecoverable TM Unavailable Exception", regs, SIGABRT); +} + void facility_unavailable_exception(struct pt_regs *regs) { static char *facility_strings[] = { @@ -1450,6 +1485,27 @@ void facility_unavailable_exception(struct pt_regs *regs) return; } + if (status == FSCR_TM_LG) { + /* + * If we're here then the hardware is TM aware because it + * generated an exception with FSRM_TM set. + * + * If cpu_has_feature(CPU_FTR_TM) is false, then either firmware + * told us not to do TM, or the kernel is not built with TM + * support. + * + * If both of those things are true, then userspace can spam the + * console by triggering the printk() below just by continually + * doing tbegin (or any TM instruction). So in that case just + * send the process a SIGILL immediately. + */ + if (!cpu_has_feature(CPU_FTR_TM)) + goto out; + + tm_unavailable(regs); + return; + } + if ((status < ARRAY_SIZE(facility_strings)) && facility_strings[status]) facility = facility_strings[status]; @@ -1462,6 +1518,7 @@ void facility_unavailable_exception(struct pt_regs *regs) "%sFacility '%s' unavailable, exception at 0x%lx, MSR=%lx\n", hv ? "Hypervisor " : "", facility, regs->nip, regs->msr); +out: if (user_mode(regs)) { _exception(SIGILL, regs, ILL_ILLOPC, regs->nip); return; @@ -1503,7 +1560,8 @@ void fp_unavailable_tm(struct pt_regs *regs) /* If VMX is in use, get the transactional values back */ if (regs->msr & MSR_VEC) { - do_load_up_transact_altivec(¤t->thread); + msr_check_and_set(MSR_VEC); + load_vr_state(¤t->thread.vr_state); /* At this point all the VSX state is loaded, so enable it */ regs->msr |= MSR_VSX; } @@ -1524,7 +1582,8 @@ void altivec_unavailable_tm(struct pt_regs *regs) current->thread.used_vr = 1; if (regs->msr & MSR_FP) { - do_load_up_transact_fpu(¤t->thread); + msr_check_and_set(MSR_FP); + load_fp_state(¤t->thread.fp_state); regs->msr |= MSR_VSX; } } @@ -1563,10 +1622,12 @@ void vsx_unavailable_tm(struct pt_regs *regs) */ tm_recheckpoint(¤t->thread, regs->msr & ~orig_msr); + msr_check_and_set(orig_msr & (MSR_FP | MSR_VEC)); + if (orig_msr & MSR_FP) - do_load_up_transact_fpu(¤t->thread); + load_fp_state(¤t->thread.fp_state); if (orig_msr & MSR_VEC) - do_load_up_transact_altivec(¤t->thread); + load_vr_state(¤t->thread.vr_state); } #endif /* CONFIG_PPC_TRANSACTIONAL_MEM */ @@ -1655,7 +1716,7 @@ static void handle_debug(struct pt_regs *regs, unsigned long debug_status) mtspr(SPRN_DBCR0, current->thread.debug.dbcr0); } -void __kprobes DebugException(struct pt_regs *regs, unsigned long debug_status) +void DebugException(struct pt_regs *regs, unsigned long debug_status) { current->thread.debug.dbsr = debug_status; @@ -1716,6 +1777,7 @@ void __kprobes DebugException(struct pt_regs *regs, unsigned long debug_status) } else handle_debug(regs, debug_status); } +NOKPROBE_SYMBOL(DebugException); #endif /* CONFIG_PPC_ADV_DEBUG_REGS */ #if !defined(CONFIG_TAU_INT) diff --git a/arch/powerpc/kernel/vdso.c b/arch/powerpc/kernel/vdso.c index 6767605ea8da..4111d30badfa 100644 --- a/arch/powerpc/kernel/vdso.c +++ b/arch/powerpc/kernel/vdso.c @@ -22,6 +22,7 @@ #include <linux/security.h> #include <linux/memblock.h> +#include <asm/cpu_has_feature.h> #include <asm/pgtable.h> #include <asm/processor.h> #include <asm/mmu.h> diff --git a/arch/powerpc/kernel/vdso32/Makefile b/arch/powerpc/kernel/vdso32/Makefile index cbabd143acae..78a7449bf489 100644 --- a/arch/powerpc/kernel/vdso32/Makefile +++ b/arch/powerpc/kernel/vdso32/Makefile @@ -30,7 +30,7 @@ CPPFLAGS_vdso32.lds += -P -C -Upowerpc $(obj)/vdso32_wrapper.o : $(obj)/vdso32.so # link rule for the .so file, .lds has to be first -$(obj)/vdso32.so.dbg: $(src)/vdso32.lds $(obj-vdso32) +$(obj)/vdso32.so.dbg: $(src)/vdso32.lds $(obj-vdso32) FORCE $(call if_changed,vdso32ld) # strip rule for the .so file @@ -39,12 +39,12 @@ $(obj)/%.so: $(obj)/%.so.dbg FORCE $(call if_changed,objcopy) # assembly rules for the .S files -$(obj-vdso32): %.o: %.S +$(obj-vdso32): %.o: %.S FORCE $(call if_changed_dep,vdso32as) # actual build commands quiet_cmd_vdso32ld = VDSO32L $@ - cmd_vdso32ld = $(CROSS32CC) $(c_flags) -Wl,-T $^ -o $@ + cmd_vdso32ld = $(CROSS32CC) $(c_flags) -o $@ -Wl,-T$(filter %.lds,$^) $(filter %.o,$^) quiet_cmd_vdso32as = VDSO32A $@ cmd_vdso32as = $(CROSS32CC) $(a_flags) -c -o $@ $< diff --git a/arch/powerpc/kernel/vdso64/Makefile b/arch/powerpc/kernel/vdso64/Makefile index c710802b8fb6..31107bf5a61f 100644 --- a/arch/powerpc/kernel/vdso64/Makefile +++ b/arch/powerpc/kernel/vdso64/Makefile @@ -23,7 +23,7 @@ CPPFLAGS_vdso64.lds += -P -C -U$(ARCH) $(obj)/vdso64_wrapper.o : $(obj)/vdso64.so # link rule for the .so file, .lds has to be first -$(obj)/vdso64.so.dbg: $(src)/vdso64.lds $(obj-vdso64) +$(obj)/vdso64.so.dbg: $(src)/vdso64.lds $(obj-vdso64) FORCE $(call if_changed,vdso64ld) # strip rule for the .so file @@ -31,15 +31,9 @@ $(obj)/%.so: OBJCOPYFLAGS := -S $(obj)/%.so: $(obj)/%.so.dbg FORCE $(call if_changed,objcopy) -# assembly rules for the .S files -$(obj-vdso64): %.o: %.S - $(call if_changed_dep,vdso64as) - # actual build commands quiet_cmd_vdso64ld = VDSO64L $@ - cmd_vdso64ld = $(CC) $(c_flags) -Wl,-T $^ -o $@ -quiet_cmd_vdso64as = VDSO64A $@ - cmd_vdso64as = $(CC) $(a_flags) -c -o $@ $< + cmd_vdso64ld = $(CC) $(c_flags) -o $@ -Wl,-T$(filter %.lds,$^) $(filter %.o,$^) # install commands for the unstripped file quiet_cmd_vdso_install = INSTALL $@ diff --git a/arch/powerpc/kernel/vdso64/datapage.S b/arch/powerpc/kernel/vdso64/datapage.S index 184a6ba7f283..abf17feffe40 100644 --- a/arch/powerpc/kernel/vdso64/datapage.S +++ b/arch/powerpc/kernel/vdso64/datapage.S @@ -59,7 +59,7 @@ V_FUNCTION_BEGIN(__kernel_get_syscall_map) bl V_LOCAL_FUNC(__get_datapage) mtlr r12 addi r3,r3,CFG_SYSCALL_MAP64 - cmpli cr0,r4,0 + cmpldi cr0,r4,0 crclr cr0*4+so beqlr li r0,NR_syscalls diff --git a/arch/powerpc/kernel/vdso64/gettimeofday.S b/arch/powerpc/kernel/vdso64/gettimeofday.S index a76b4af37ef2..382021324883 100644 --- a/arch/powerpc/kernel/vdso64/gettimeofday.S +++ b/arch/powerpc/kernel/vdso64/gettimeofday.S @@ -145,7 +145,7 @@ V_FUNCTION_BEGIN(__kernel_clock_getres) bne cr0,99f li r3,0 - cmpli cr0,r4,0 + cmpldi cr0,r4,0 crclr cr0*4+so beqlr lis r5,CLOCK_REALTIME_RES@h diff --git a/arch/powerpc/kernel/vector.S b/arch/powerpc/kernel/vector.S index 616a6d854638..bc85bdff4e01 100644 --- a/arch/powerpc/kernel/vector.S +++ b/arch/powerpc/kernel/vector.S @@ -7,31 +7,6 @@ #include <asm/page.h> #include <asm/ptrace.h> -#ifdef CONFIG_PPC_TRANSACTIONAL_MEM -/* void do_load_up_transact_altivec(struct thread_struct *thread) - * - * This is similar to load_up_altivec but for the transactional version of the - * vector regs. It doesn't mess with the task MSR or valid flags. - * Furthermore, VEC laziness is not supported with TM currently. - */ -_GLOBAL(do_load_up_transact_altivec) - mfmsr r6 - oris r5,r6,MSR_VEC@h - MTMSRD(r5) - isync - - li r4,1 - stw r4,THREAD_USED_VR(r3) - - li r10,THREAD_TRANSACT_VRSTATE+VRSTATE_VSCR - lvx v0,r10,r3 - mtvscr v0 - addi r10,r3,THREAD_TRANSACT_VRSTATE - REST_32VRS(0,r4,r10) - - blr -#endif - /* * Load state from memory into VMX registers including VSCR. * Assumes the caller has enabled VMX in the MSR. diff --git a/arch/powerpc/kernel/vmlinux.lds.S b/arch/powerpc/kernel/vmlinux.lds.S index b5fba689fca6..8295f51c1a5f 100644 --- a/arch/powerpc/kernel/vmlinux.lds.S +++ b/arch/powerpc/kernel/vmlinux.lds.S @@ -44,18 +44,68 @@ SECTIONS * Text, read only data and other permanent read-only sections */ - /* Text and gots */ + _text = .; + _stext = .; + + /* + * Head text. + * This needs to be in its own output section to avoid ld placing + * branch trampoline stubs randomly throughout the fixed sections, + * which it will do (even if the branch comes from another section) + * in order to optimize stub generation. + */ + .head.text : AT(ADDR(.head.text) - LOAD_OFFSET) { +#ifdef CONFIG_PPC64 + KEEP(*(.head.text.first_256B)); +#ifdef CONFIG_PPC_BOOK3E +# define END_FIXED 0x100 +#else + KEEP(*(.head.text.real_vectors)); + *(.head.text.real_trampolines); + KEEP(*(.head.text.virt_vectors)); + *(.head.text.virt_trampolines); +# if defined(CONFIG_PPC_PSERIES) || defined(CONFIG_PPC_POWERNV) + KEEP(*(.head.data.fwnmi_page)); +# define END_FIXED 0x8000 +# else +# define END_FIXED 0x7000 +# endif +#endif + ASSERT((. == END_FIXED), "vmlinux.lds.S: fixed section overflow error"); +#else /* !CONFIG_PPC64 */ + HEAD_TEXT +#endif + } :kernel + + /* + * If the build dies here, it's likely code in head_64.S is referencing + * labels it can't reach, and the linker inserting stubs without the + * assembler's knowledge. To debug, remove the above assert and + * rebuild. Look for branch stubs in the fixed section region. + * + * Linker stub generation could be allowed in "trampoline" + * sections if absolutely necessary, but this would require + * some rework of the fixed sections. Before resorting to this, + * consider references that have sufficient addressing range, + * (e.g., hand coded trampolines) so the linker does not have + * to add stubs. + * + * Linker stubs at the top of the main text section are currently not + * detected, and will result in a crash at boot due to offsets being + * wrong. + */ .text : AT(ADDR(.text) - LOAD_OFFSET) { ALIGN_FUNCTION(); - HEAD_TEXT - _text = .; /* careful! __ftr_alt_* sections need to be close to .text */ *(.text .fixup __ftr_alt_* .ref.text) SCHED_TEXT + CPUIDLE_TEXT LOCK_TEXT KPROBES_TEXT IRQENTRY_TEXT SOFTIRQENTRY_TEXT + MEM_KEEP(init.text) + MEM_KEEP(exit.text) #ifdef CONFIG_PPC32 *(.got1) diff --git a/arch/powerpc/kvm/Kconfig b/arch/powerpc/kvm/Kconfig index c2024ac9d4e8..029be26b5a17 100644 --- a/arch/powerpc/kvm/Kconfig +++ b/arch/powerpc/kvm/Kconfig @@ -22,6 +22,9 @@ config KVM select ANON_INODES select HAVE_KVM_EVENTFD select SRCU + select KVM_VFIO + select IRQ_BYPASS_MANAGER + select HAVE_KVM_IRQ_BYPASS config KVM_BOOK3S_HANDLER bool diff --git a/arch/powerpc/kvm/Makefile b/arch/powerpc/kvm/Makefile index 1f9e5529e692..7dd89b79d038 100644 --- a/arch/powerpc/kvm/Makefile +++ b/arch/powerpc/kvm/Makefile @@ -7,16 +7,16 @@ subdir-ccflags-$(CONFIG_PPC_WERROR) := -Werror ccflags-y := -Ivirt/kvm -Iarch/powerpc/kvm KVM := ../../../virt/kvm -common-objs-y = $(KVM)/kvm_main.o $(KVM)/coalesced_mmio.o \ - $(KVM)/eventfd.o +common-objs-y = $(KVM)/kvm_main.o $(KVM)/eventfd.o common-objs-$(CONFIG_KVM_VFIO) += $(KVM)/vfio.o +common-objs-$(CONFIG_KVM_MMIO) += $(KVM)/coalesced_mmio.o CFLAGS_e500_mmu.o := -I. CFLAGS_e500_mmu_host.o := -I. CFLAGS_emulate.o := -I. CFLAGS_emulate_loadstore.o := -I. -common-objs-y += powerpc.o emulate.o emulate_loadstore.o +common-objs-y += powerpc.o emulate_loadstore.o obj-$(CONFIG_KVM_EXIT_TIMING) += timing.o obj-$(CONFIG_KVM_BOOK3S_HANDLER) += book3s_exports.o @@ -24,6 +24,7 @@ AFLAGS_booke_interrupts.o := -I$(objtree)/$(obj) kvm-e500-objs := \ $(common-objs-y) \ + emulate.o \ booke.o \ booke_emulate.o \ booke_interrupts.o \ @@ -35,6 +36,7 @@ kvm-objs-$(CONFIG_KVM_E500V2) := $(kvm-e500-objs) kvm-e500mc-objs := \ $(common-objs-y) \ + emulate.o \ booke.o \ booke_emulate.o \ bookehv_interrupts.o \ @@ -61,9 +63,6 @@ kvm-pr-y := \ book3s_32_mmu.o ifdef CONFIG_KVM_BOOK3S_PR_POSSIBLE -kvm-book3s_64-module-objs := \ - $(KVM)/coalesced_mmio.o - kvm-book3s_64-builtin-objs-$(CONFIG_KVM_BOOK3S_64_HANDLER) += \ book3s_rmhandlers.o endif @@ -78,6 +77,7 @@ kvm-book3s_64-builtin-xics-objs-$(CONFIG_KVM_XICS) := \ ifdef CONFIG_KVM_BOOK3S_HV_POSSIBLE kvm-book3s_64-builtin-objs-$(CONFIG_KVM_BOOK3S_64_HANDLER) += \ + book3s_hv_hmi.o \ book3s_hv_rmhandlers.o \ book3s_hv_rm_mmu.o \ book3s_hv_ras.o \ @@ -88,11 +88,8 @@ endif kvm-book3s_64-objs-$(CONFIG_KVM_XICS) += \ book3s_xics.o -kvm-book3s_64-module-objs += \ - $(KVM)/kvm_main.o \ - $(KVM)/eventfd.o \ - powerpc.o \ - emulate_loadstore.o \ +kvm-book3s_64-module-objs := \ + $(common-objs-y) \ book3s.o \ book3s_64_vio.o \ book3s_rtas.o \ @@ -102,6 +99,7 @@ kvm-objs-$(CONFIG_KVM_BOOK3S_64) := $(kvm-book3s_64-module-objs) kvm-book3s_32-objs := \ $(common-objs-y) \ + emulate.o \ fpu.o \ book3s_paired_singles.o \ book3s.o \ diff --git a/arch/powerpc/kvm/book3s.c b/arch/powerpc/kvm/book3s.c index 47018fcbf7d6..b6952dd23152 100644 --- a/arch/powerpc/kvm/book3s.c +++ b/arch/powerpc/kvm/book3s.c @@ -52,8 +52,12 @@ struct kvm_stats_debugfs_item debugfs_entries[] = { { "dec", VCPU_STAT(dec_exits) }, { "ext_intr", VCPU_STAT(ext_intr_exits) }, { "queue_intr", VCPU_STAT(queue_intr) }, + { "halt_poll_success_ns", VCPU_STAT(halt_poll_success_ns) }, + { "halt_poll_fail_ns", VCPU_STAT(halt_poll_fail_ns) }, + { "halt_wait_ns", VCPU_STAT(halt_wait_ns) }, { "halt_successful_poll", VCPU_STAT(halt_successful_poll), }, { "halt_attempted_poll", VCPU_STAT(halt_attempted_poll), }, + { "halt_successful_wait", VCPU_STAT(halt_successful_wait) }, { "halt_poll_invalid", VCPU_STAT(halt_poll_invalid) }, { "halt_wakeup", VCPU_STAT(halt_wakeup) }, { "pf_storage", VCPU_STAT(pf_storage) }, @@ -64,6 +68,9 @@ struct kvm_stats_debugfs_item debugfs_entries[] = { { "ld_slow", VCPU_STAT(ld_slow) }, { "st", VCPU_STAT(st) }, { "st_slow", VCPU_STAT(st_slow) }, + { "pthru_all", VCPU_STAT(pthru_all) }, + { "pthru_host", VCPU_STAT(pthru_host) }, + { "pthru_bad_aff", VCPU_STAT(pthru_bad_aff) }, { NULL } }; @@ -592,9 +599,6 @@ int kvmppc_get_one_reg(struct kvm_vcpu *vcpu, u64 id, case KVM_REG_PPC_BESCR: *val = get_reg_val(id, vcpu->arch.bescr); break; - case KVM_REG_PPC_VTB: - *val = get_reg_val(id, vcpu->arch.vtb); - break; case KVM_REG_PPC_IC: *val = get_reg_val(id, vcpu->arch.ic); break; @@ -666,9 +670,6 @@ int kvmppc_set_one_reg(struct kvm_vcpu *vcpu, u64 id, case KVM_REG_PPC_BESCR: vcpu->arch.bescr = set_reg_val(id, *val); break; - case KVM_REG_PPC_VTB: - vcpu->arch.vtb = set_reg_val(id, *val); - break; case KVM_REG_PPC_IC: vcpu->arch.ic = set_reg_val(id, *val); break; diff --git a/arch/powerpc/kvm/book3s_emulate.c b/arch/powerpc/kvm/book3s_emulate.c index 2afdb9c0937d..8359752b3efc 100644 --- a/arch/powerpc/kvm/book3s_emulate.c +++ b/arch/powerpc/kvm/book3s_emulate.c @@ -498,6 +498,7 @@ int kvmppc_core_emulate_mtspr_pr(struct kvm_vcpu *vcpu, int sprn, ulong spr_val) case SPRN_MMCR0: case SPRN_MMCR1: case SPRN_MMCR2: + case SPRN_UMMCR2: #endif break; unprivileged: @@ -579,7 +580,7 @@ int kvmppc_core_emulate_mfspr_pr(struct kvm_vcpu *vcpu, int sprn, ulong *spr_val *spr_val = vcpu->arch.spurr; break; case SPRN_VTB: - *spr_val = vcpu->arch.vtb; + *spr_val = to_book3s(vcpu)->vtb; break; case SPRN_IC: *spr_val = vcpu->arch.ic; @@ -640,6 +641,7 @@ int kvmppc_core_emulate_mfspr_pr(struct kvm_vcpu *vcpu, int sprn, ulong *spr_val case SPRN_MMCR0: case SPRN_MMCR1: case SPRN_MMCR2: + case SPRN_UMMCR2: case SPRN_TIR: #endif *spr_val = 0; diff --git a/arch/powerpc/kvm/book3s_hv.c b/arch/powerpc/kvm/book3s_hv.c index 2fd5580c8f6e..3686471be32b 100644 --- a/arch/powerpc/kvm/book3s_hv.c +++ b/arch/powerpc/kvm/book3s_hv.c @@ -53,11 +53,15 @@ #include <asm/smp.h> #include <asm/dbell.h> #include <asm/hmi.h> +#include <asm/pnv-pci.h> #include <linux/gfp.h> #include <linux/vmalloc.h> #include <linux/highmem.h> #include <linux/hugetlb.h> +#include <linux/kvm_irqfd.h> +#include <linux/irqbypass.h> #include <linux/module.h> +#include <linux/compiler.h> #include "book3s.h" @@ -70,6 +74,8 @@ /* Used to indicate that a guest page fault needs to be handled */ #define RESUME_PAGE_FAULT (RESUME_GUEST | RESUME_FLAG_ARCH1) +/* Used to indicate that a guest passthrough interrupt needs to be handled */ +#define RESUME_PASSTHROUGH (RESUME_GUEST | RESUME_FLAG_ARCH2) /* Used as a "null" value for timebase values */ #define TB_NIL (~(u64)0) @@ -89,14 +95,55 @@ static struct kernel_param_ops module_param_ops = { .get = param_get_int, }; +module_param_cb(kvm_irq_bypass, &module_param_ops, &kvm_irq_bypass, + S_IRUGO | S_IWUSR); +MODULE_PARM_DESC(kvm_irq_bypass, "Bypass passthrough interrupt optimization"); + module_param_cb(h_ipi_redirect, &module_param_ops, &h_ipi_redirect, S_IRUGO | S_IWUSR); MODULE_PARM_DESC(h_ipi_redirect, "Redirect H_IPI wakeup to a free host core"); #endif +/* Maximum halt poll interval defaults to KVM_HALT_POLL_NS_DEFAULT */ +static unsigned int halt_poll_max_ns = KVM_HALT_POLL_NS_DEFAULT; +module_param(halt_poll_max_ns, uint, S_IRUGO | S_IWUSR); +MODULE_PARM_DESC(halt_poll_max_ns, "Maximum halt poll time in ns"); + +/* Factor by which the vcore halt poll interval is grown, default is to double + */ +static unsigned int halt_poll_ns_grow = 2; +module_param(halt_poll_ns_grow, int, S_IRUGO); +MODULE_PARM_DESC(halt_poll_ns_grow, "Factor halt poll time is grown by"); + +/* Factor by which the vcore halt poll interval is shrunk, default is to reset + */ +static unsigned int halt_poll_ns_shrink; +module_param(halt_poll_ns_shrink, int, S_IRUGO); +MODULE_PARM_DESC(halt_poll_ns_shrink, "Factor halt poll time is shrunk by"); + static void kvmppc_end_cede(struct kvm_vcpu *vcpu); static int kvmppc_hv_setup_htab_rma(struct kvm_vcpu *vcpu); +static inline struct kvm_vcpu *next_runnable_thread(struct kvmppc_vcore *vc, + int *ip) +{ + int i = *ip; + struct kvm_vcpu *vcpu; + + while (++i < MAX_SMT_THREADS) { + vcpu = READ_ONCE(vc->runnable_threads[i]); + if (vcpu) { + *ip = i; + return vcpu; + } + } + return NULL; +} + +/* Used to traverse the list of runnable threads for a given vcore */ +#define for_each_runnable_thread(i, vcpu, vc) \ + for (i = -1; (vcpu = next_runnable_thread(vc, &i)); ) + static bool kvmppc_ipi_thread(int cpu) { /* On POWER8 for IPIs to threads in the same core, use msgsnd */ @@ -991,6 +1038,9 @@ static int kvmppc_handle_exit_hv(struct kvm_run *run, struct kvm_vcpu *vcpu, kvmppc_core_queue_program(vcpu, SRR1_PROGILL); r = RESUME_GUEST; break; + case BOOK3S_INTERRUPT_HV_RM_HARD: + r = RESUME_PASSTHROUGH; + break; default: kvmppc_dump_regs(vcpu); printk(KERN_EMERG "trap=0x%x | pc=0x%lx | msr=0x%llx\n", @@ -1149,6 +1199,9 @@ static int kvmppc_get_one_reg_hv(struct kvm_vcpu *vcpu, u64 id, case KVM_REG_PPC_DPDES: *val = get_reg_val(id, vcpu->arch.vcore->dpdes); break; + case KVM_REG_PPC_VTB: + *val = get_reg_val(id, vcpu->arch.vcore->vtb); + break; case KVM_REG_PPC_DAWR: *val = get_reg_val(id, vcpu->arch.dawr); break; @@ -1341,6 +1394,9 @@ static int kvmppc_set_one_reg_hv(struct kvm_vcpu *vcpu, u64 id, case KVM_REG_PPC_DPDES: vcpu->arch.vcore->dpdes = set_reg_val(id, *val); break; + case KVM_REG_PPC_VTB: + vcpu->arch.vcore->vtb = set_reg_val(id, *val); + break; case KVM_REG_PPC_DAWR: vcpu->arch.dawr = set_reg_val(id, *val); break; @@ -1493,7 +1549,6 @@ static struct kvmppc_vcore *kvmppc_vcore_create(struct kvm *kvm, int core) if (vcore == NULL) return NULL; - INIT_LIST_HEAD(&vcore->runnable_threads); spin_lock_init(&vcore->lock); spin_lock_init(&vcore->stoltb_lock); init_swait_queue_head(&vcore->wq); @@ -1802,7 +1857,7 @@ static void kvmppc_remove_runnable(struct kvmppc_vcore *vc, vcpu->arch.state = KVMPPC_VCPU_BUSY_IN_HOST; spin_unlock_irq(&vcpu->arch.tbacct_lock); --vc->n_runnable; - list_del(&vcpu->arch.run_list); + WRITE_ONCE(vc->runnable_threads[vcpu->arch.ptid], NULL); } static int kvmppc_grab_hwthread(int cpu) @@ -2048,66 +2103,6 @@ static void init_master_vcore(struct kvmppc_vcore *vc) vc->conferring_threads = 0; } -/* - * See if the existing subcores can be split into 3 (or fewer) subcores - * of at most two threads each, so we can fit in another vcore. This - * assumes there are at most two subcores and at most 6 threads in total. - */ -static bool can_split_piggybacked_subcores(struct core_info *cip) -{ - int sub, new_sub; - int large_sub = -1; - int thr; - int n_subcores = cip->n_subcores; - struct kvmppc_vcore *vc, *vcnext; - struct kvmppc_vcore *master_vc = NULL; - - for (sub = 0; sub < cip->n_subcores; ++sub) { - if (cip->subcore_threads[sub] <= 2) - continue; - if (large_sub >= 0) - return false; - large_sub = sub; - vc = list_first_entry(&cip->vcs[sub], struct kvmppc_vcore, - preempt_list); - if (vc->num_threads > 2) - return false; - n_subcores += (cip->subcore_threads[sub] - 1) >> 1; - } - if (large_sub < 0 || !subcore_config_ok(n_subcores + 1, 2)) - return false; - - /* - * Seems feasible, so go through and move vcores to new subcores. - * Note that when we have two or more vcores in one subcore, - * all those vcores must have only one thread each. - */ - new_sub = cip->n_subcores; - thr = 0; - sub = large_sub; - list_for_each_entry_safe(vc, vcnext, &cip->vcs[sub], preempt_list) { - if (thr >= 2) { - list_del(&vc->preempt_list); - list_add_tail(&vc->preempt_list, &cip->vcs[new_sub]); - /* vc->num_threads must be 1 */ - if (++cip->subcore_threads[new_sub] == 1) { - cip->subcore_vm[new_sub] = vc->kvm; - init_master_vcore(vc); - master_vc = vc; - ++cip->n_subcores; - } else { - vc->master_vcore = master_vc; - ++new_sub; - } - } - thr += vc->num_threads; - } - cip->subcore_threads[large_sub] = 2; - cip->max_subcore_threads = 2; - - return true; -} - static bool can_dynamic_split(struct kvmppc_vcore *vc, struct core_info *cip) { int n_threads = vc->num_threads; @@ -2118,23 +2113,9 @@ static bool can_dynamic_split(struct kvmppc_vcore *vc, struct core_info *cip) if (n_threads < cip->max_subcore_threads) n_threads = cip->max_subcore_threads; - if (subcore_config_ok(cip->n_subcores + 1, n_threads)) { - cip->max_subcore_threads = n_threads; - } else if (cip->n_subcores <= 2 && cip->total_threads <= 6 && - vc->num_threads <= 2) { - /* - * We may be able to fit another subcore in by - * splitting an existing subcore with 3 or 4 - * threads into two 2-thread subcores, or one - * with 5 or 6 threads into three subcores. - * We can only do this if those subcores have - * piggybacked virtual cores. - */ - if (!can_split_piggybacked_subcores(cip)) - return false; - } else { + if (!subcore_config_ok(cip->n_subcores + 1, n_threads)) return false; - } + cip->max_subcore_threads = n_threads; sub = cip->n_subcores; ++cip->n_subcores; @@ -2148,43 +2129,6 @@ static bool can_dynamic_split(struct kvmppc_vcore *vc, struct core_info *cip) return true; } -static bool can_piggyback_subcore(struct kvmppc_vcore *pvc, - struct core_info *cip, int sub) -{ - struct kvmppc_vcore *vc; - int n_thr; - - vc = list_first_entry(&cip->vcs[sub], struct kvmppc_vcore, - preempt_list); - - /* require same VM and same per-core reg values */ - if (pvc->kvm != vc->kvm || - pvc->tb_offset != vc->tb_offset || - pvc->pcr != vc->pcr || - pvc->lpcr != vc->lpcr) - return false; - - /* P8 guest with > 1 thread per core would see wrong TIR value */ - if (cpu_has_feature(CPU_FTR_ARCH_207S) && - (vc->num_threads > 1 || pvc->num_threads > 1)) - return false; - - n_thr = cip->subcore_threads[sub] + pvc->num_threads; - if (n_thr > cip->max_subcore_threads) { - if (!subcore_config_ok(cip->n_subcores, n_thr)) - return false; - cip->max_subcore_threads = n_thr; - } - - cip->total_threads += pvc->num_threads; - cip->subcore_threads[sub] = n_thr; - pvc->master_vcore = vc; - list_del(&pvc->preempt_list); - list_add_tail(&pvc->preempt_list, &cip->vcs[sub]); - - return true; -} - /* * Work out whether it is possible to piggyback the execution of * vcore *pvc onto the execution of the other vcores described in *cip. @@ -2192,27 +2136,18 @@ static bool can_piggyback_subcore(struct kvmppc_vcore *pvc, static bool can_piggyback(struct kvmppc_vcore *pvc, struct core_info *cip, int target_threads) { - int sub; - if (cip->total_threads + pvc->num_threads > target_threads) return false; - for (sub = 0; sub < cip->n_subcores; ++sub) - if (cip->subcore_threads[sub] && - can_piggyback_subcore(pvc, cip, sub)) - return true; - - if (can_dynamic_split(pvc, cip)) - return true; - return false; + return can_dynamic_split(pvc, cip); } static void prepare_threads(struct kvmppc_vcore *vc) { - struct kvm_vcpu *vcpu, *vnext; + int i; + struct kvm_vcpu *vcpu; - list_for_each_entry_safe(vcpu, vnext, &vc->runnable_threads, - arch.run_list) { + for_each_runnable_thread(i, vcpu, vc) { if (signal_pending(vcpu->arch.run_task)) vcpu->arch.ret = -EINTR; else if (vcpu->arch.vpa.update_pending || @@ -2259,15 +2194,14 @@ static void collect_piggybacks(struct core_info *cip, int target_threads) static void post_guest_process(struct kvmppc_vcore *vc, bool is_master) { - int still_running = 0; + int still_running = 0, i; u64 now; long ret; - struct kvm_vcpu *vcpu, *vnext; + struct kvm_vcpu *vcpu; spin_lock(&vc->lock); now = get_tb(); - list_for_each_entry_safe(vcpu, vnext, &vc->runnable_threads, - arch.run_list) { + for_each_runnable_thread(i, vcpu, vc) { /* cancel pending dec exception if dec is positive */ if (now < vcpu->arch.dec_expires && kvmppc_core_pending_dec(vcpu)) @@ -2307,8 +2241,8 @@ static void post_guest_process(struct kvmppc_vcore *vc, bool is_master) } if (vc->n_runnable > 0 && vc->runner == NULL) { /* make sure there's a candidate runner awake */ - vcpu = list_first_entry(&vc->runnable_threads, - struct kvm_vcpu, arch.run_list); + i = -1; + vcpu = next_runnable_thread(vc, &i); wake_up(&vcpu->arch.cpu_run); } } @@ -2361,7 +2295,7 @@ static inline void kvmppc_set_host_core(int cpu) */ static noinline void kvmppc_run_core(struct kvmppc_vcore *vc) { - struct kvm_vcpu *vcpu, *vnext; + struct kvm_vcpu *vcpu; int i; int srcu_idx; struct core_info core_info; @@ -2397,8 +2331,7 @@ static noinline void kvmppc_run_core(struct kvmppc_vcore *vc) */ if ((threads_per_core > 1) && ((vc->num_threads > threads_per_subcore) || !on_primary_thread())) { - list_for_each_entry_safe(vcpu, vnext, &vc->runnable_threads, - arch.run_list) { + for_each_runnable_thread(i, vcpu, vc) { vcpu->arch.ret = -EBUSY; kvmppc_remove_runnable(vc, vcpu); wake_up(&vcpu->arch.cpu_run); @@ -2477,8 +2410,7 @@ static noinline void kvmppc_run_core(struct kvmppc_vcore *vc) active |= 1 << thr; list_for_each_entry(pvc, &core_info.vcs[sub], preempt_list) { pvc->pcpu = pcpu + thr; - list_for_each_entry(vcpu, &pvc->runnable_threads, - arch.run_list) { + for_each_runnable_thread(i, vcpu, pvc) { kvmppc_start_thread(vcpu, pvc); kvmppc_create_dtl_entry(vcpu, pvc); trace_kvm_guest_enter(vcpu); @@ -2604,34 +2536,92 @@ static void kvmppc_wait_for_exec(struct kvmppc_vcore *vc, finish_wait(&vcpu->arch.cpu_run, &wait); } +static void grow_halt_poll_ns(struct kvmppc_vcore *vc) +{ + /* 10us base */ + if (vc->halt_poll_ns == 0 && halt_poll_ns_grow) + vc->halt_poll_ns = 10000; + else + vc->halt_poll_ns *= halt_poll_ns_grow; + + if (vc->halt_poll_ns > halt_poll_max_ns) + vc->halt_poll_ns = halt_poll_max_ns; +} + +static void shrink_halt_poll_ns(struct kvmppc_vcore *vc) +{ + if (halt_poll_ns_shrink == 0) + vc->halt_poll_ns = 0; + else + vc->halt_poll_ns /= halt_poll_ns_shrink; +} + +/* Check to see if any of the runnable vcpus on the vcore have pending + * exceptions or are no longer ceded + */ +static int kvmppc_vcore_check_block(struct kvmppc_vcore *vc) +{ + struct kvm_vcpu *vcpu; + int i; + + for_each_runnable_thread(i, vcpu, vc) { + if (vcpu->arch.pending_exceptions || !vcpu->arch.ceded) + return 1; + } + + return 0; +} + /* * All the vcpus in this vcore are idle, so wait for a decrementer * or external interrupt to one of the vcpus. vc->lock is held. */ static void kvmppc_vcore_blocked(struct kvmppc_vcore *vc) { - struct kvm_vcpu *vcpu; + ktime_t cur, start_poll, start_wait; int do_sleep = 1; + u64 block_ns; DECLARE_SWAITQUEUE(wait); - prepare_to_swait(&vc->wq, &wait, TASK_INTERRUPTIBLE); + /* Poll for pending exceptions and ceded state */ + cur = start_poll = ktime_get(); + if (vc->halt_poll_ns) { + ktime_t stop = ktime_add_ns(start_poll, vc->halt_poll_ns); + ++vc->runner->stat.halt_attempted_poll; - /* - * Check one last time for pending exceptions and ceded state after - * we put ourselves on the wait queue - */ - list_for_each_entry(vcpu, &vc->runnable_threads, arch.run_list) { - if (vcpu->arch.pending_exceptions || !vcpu->arch.ceded) { - do_sleep = 0; - break; + vc->vcore_state = VCORE_POLLING; + spin_unlock(&vc->lock); + + do { + if (kvmppc_vcore_check_block(vc)) { + do_sleep = 0; + break; + } + cur = ktime_get(); + } while (single_task_running() && ktime_before(cur, stop)); + + spin_lock(&vc->lock); + vc->vcore_state = VCORE_INACTIVE; + + if (!do_sleep) { + ++vc->runner->stat.halt_successful_poll; + goto out; } } - if (!do_sleep) { + prepare_to_swait(&vc->wq, &wait, TASK_INTERRUPTIBLE); + + if (kvmppc_vcore_check_block(vc)) { finish_swait(&vc->wq, &wait); - return; + do_sleep = 0; + /* If we polled, count this as a successful poll */ + if (vc->halt_poll_ns) + ++vc->runner->stat.halt_successful_poll; + goto out; } + start_wait = ktime_get(); + vc->vcore_state = VCORE_SLEEPING; trace_kvmppc_vcore_blocked(vc, 0); spin_unlock(&vc->lock); @@ -2640,13 +2630,52 @@ static void kvmppc_vcore_blocked(struct kvmppc_vcore *vc) spin_lock(&vc->lock); vc->vcore_state = VCORE_INACTIVE; trace_kvmppc_vcore_blocked(vc, 1); + ++vc->runner->stat.halt_successful_wait; + + cur = ktime_get(); + +out: + block_ns = ktime_to_ns(cur) - ktime_to_ns(start_poll); + + /* Attribute wait time */ + if (do_sleep) { + vc->runner->stat.halt_wait_ns += + ktime_to_ns(cur) - ktime_to_ns(start_wait); + /* Attribute failed poll time */ + if (vc->halt_poll_ns) + vc->runner->stat.halt_poll_fail_ns += + ktime_to_ns(start_wait) - + ktime_to_ns(start_poll); + } else { + /* Attribute successful poll time */ + if (vc->halt_poll_ns) + vc->runner->stat.halt_poll_success_ns += + ktime_to_ns(cur) - + ktime_to_ns(start_poll); + } + + /* Adjust poll time */ + if (halt_poll_max_ns) { + if (block_ns <= vc->halt_poll_ns) + ; + /* We slept and blocked for longer than the max halt time */ + else if (vc->halt_poll_ns && block_ns > halt_poll_max_ns) + shrink_halt_poll_ns(vc); + /* We slept and our poll time is too small */ + else if (vc->halt_poll_ns < halt_poll_max_ns && + block_ns < halt_poll_max_ns) + grow_halt_poll_ns(vc); + } else + vc->halt_poll_ns = 0; + + trace_kvmppc_vcore_wakeup(do_sleep, block_ns); } static int kvmppc_run_vcpu(struct kvm_run *kvm_run, struct kvm_vcpu *vcpu) { - int n_ceded; + int n_ceded, i; struct kvmppc_vcore *vc; - struct kvm_vcpu *v, *vn; + struct kvm_vcpu *v; trace_kvmppc_run_vcpu_enter(vcpu); @@ -2666,7 +2695,7 @@ static int kvmppc_run_vcpu(struct kvm_run *kvm_run, struct kvm_vcpu *vcpu) vcpu->arch.stolen_logged = vcore_stolen_time(vc, mftb()); vcpu->arch.state = KVMPPC_VCPU_RUNNABLE; vcpu->arch.busy_preempt = TB_NIL; - list_add_tail(&vcpu->arch.run_list, &vc->runnable_threads); + WRITE_ONCE(vc->runnable_threads[vcpu->arch.ptid], vcpu); ++vc->n_runnable; /* @@ -2706,8 +2735,7 @@ static int kvmppc_run_vcpu(struct kvm_run *kvm_run, struct kvm_vcpu *vcpu) kvmppc_wait_for_exec(vc, vcpu, TASK_INTERRUPTIBLE); continue; } - list_for_each_entry_safe(v, vn, &vc->runnable_threads, - arch.run_list) { + for_each_runnable_thread(i, v, vc) { kvmppc_core_prepare_to_enter(v); if (signal_pending(v->arch.run_task)) { kvmppc_remove_runnable(vc, v); @@ -2720,7 +2748,7 @@ static int kvmppc_run_vcpu(struct kvm_run *kvm_run, struct kvm_vcpu *vcpu) if (!vc->n_runnable || vcpu->arch.state != KVMPPC_VCPU_RUNNABLE) break; n_ceded = 0; - list_for_each_entry(v, &vc->runnable_threads, arch.run_list) { + for_each_runnable_thread(i, v, vc) { if (!v->arch.pending_exceptions) n_ceded += v->arch.ceded; else @@ -2759,8 +2787,8 @@ static int kvmppc_run_vcpu(struct kvm_run *kvm_run, struct kvm_vcpu *vcpu) if (vc->n_runnable && vc->vcore_state == VCORE_INACTIVE) { /* Wake up some vcpu to run the core */ - v = list_first_entry(&vc->runnable_threads, - struct kvm_vcpu, arch.run_list); + i = -1; + v = next_runnable_thread(vc, &i); wake_up(&v->arch.cpu_run); } @@ -2818,7 +2846,8 @@ static int kvmppc_vcpu_run_hv(struct kvm_run *run, struct kvm_vcpu *vcpu) r = kvmppc_book3s_hv_page_fault(run, vcpu, vcpu->arch.fault_dar, vcpu->arch.fault_dsisr); srcu_read_unlock(&vcpu->kvm->srcu, srcu_idx); - } + } else if (r == RESUME_PASSTHROUGH) + r = kvmppc_xics_rm_complete(vcpu, 0); } while (is_kvmppc_resume_guest(r)); out: @@ -3247,6 +3276,8 @@ static void kvmppc_core_destroy_vm_hv(struct kvm *kvm) kvmppc_free_vcores(kvm); kvmppc_free_hpt(kvm); + + kvmppc_free_pimap(kvm); } /* We don't need to emulate any privileged instructions or dcbz */ @@ -3282,6 +3313,184 @@ static int kvmppc_core_check_processor_compat_hv(void) return 0; } +#ifdef CONFIG_KVM_XICS + +void kvmppc_free_pimap(struct kvm *kvm) +{ + kfree(kvm->arch.pimap); +} + +static struct kvmppc_passthru_irqmap *kvmppc_alloc_pimap(void) +{ + return kzalloc(sizeof(struct kvmppc_passthru_irqmap), GFP_KERNEL); +} + +static int kvmppc_set_passthru_irq(struct kvm *kvm, int host_irq, int guest_gsi) +{ + struct irq_desc *desc; + struct kvmppc_irq_map *irq_map; + struct kvmppc_passthru_irqmap *pimap; + struct irq_chip *chip; + int i; + + if (!kvm_irq_bypass) + return 1; + + desc = irq_to_desc(host_irq); + if (!desc) + return -EIO; + + mutex_lock(&kvm->lock); + + pimap = kvm->arch.pimap; + if (pimap == NULL) { + /* First call, allocate structure to hold IRQ map */ + pimap = kvmppc_alloc_pimap(); + if (pimap == NULL) { + mutex_unlock(&kvm->lock); + return -ENOMEM; + } + kvm->arch.pimap = pimap; + } + + /* + * For now, we only support interrupts for which the EOI operation + * is an OPAL call followed by a write to XIRR, since that's + * what our real-mode EOI code does. + */ + chip = irq_data_get_irq_chip(&desc->irq_data); + if (!chip || !is_pnv_opal_msi(chip)) { + pr_warn("kvmppc_set_passthru_irq_hv: Could not assign IRQ map for (%d,%d)\n", + host_irq, guest_gsi); + mutex_unlock(&kvm->lock); + return -ENOENT; + } + + /* + * See if we already have an entry for this guest IRQ number. + * If it's mapped to a hardware IRQ number, that's an error, + * otherwise re-use this entry. + */ + for (i = 0; i < pimap->n_mapped; i++) { + if (guest_gsi == pimap->mapped[i].v_hwirq) { + if (pimap->mapped[i].r_hwirq) { + mutex_unlock(&kvm->lock); + return -EINVAL; + } + break; + } + } + + if (i == KVMPPC_PIRQ_MAPPED) { + mutex_unlock(&kvm->lock); + return -EAGAIN; /* table is full */ + } + + irq_map = &pimap->mapped[i]; + + irq_map->v_hwirq = guest_gsi; + irq_map->desc = desc; + + /* + * Order the above two stores before the next to serialize with + * the KVM real mode handler. + */ + smp_wmb(); + irq_map->r_hwirq = desc->irq_data.hwirq; + + if (i == pimap->n_mapped) + pimap->n_mapped++; + + kvmppc_xics_set_mapped(kvm, guest_gsi, desc->irq_data.hwirq); + + mutex_unlock(&kvm->lock); + + return 0; +} + +static int kvmppc_clr_passthru_irq(struct kvm *kvm, int host_irq, int guest_gsi) +{ + struct irq_desc *desc; + struct kvmppc_passthru_irqmap *pimap; + int i; + + if (!kvm_irq_bypass) + return 0; + + desc = irq_to_desc(host_irq); + if (!desc) + return -EIO; + + mutex_lock(&kvm->lock); + + if (kvm->arch.pimap == NULL) { + mutex_unlock(&kvm->lock); + return 0; + } + pimap = kvm->arch.pimap; + + for (i = 0; i < pimap->n_mapped; i++) { + if (guest_gsi == pimap->mapped[i].v_hwirq) + break; + } + + if (i == pimap->n_mapped) { + mutex_unlock(&kvm->lock); + return -ENODEV; + } + + kvmppc_xics_clr_mapped(kvm, guest_gsi, pimap->mapped[i].r_hwirq); + + /* invalidate the entry */ + pimap->mapped[i].r_hwirq = 0; + + /* + * We don't free this structure even when the count goes to + * zero. The structure is freed when we destroy the VM. + */ + + mutex_unlock(&kvm->lock); + return 0; +} + +static int kvmppc_irq_bypass_add_producer_hv(struct irq_bypass_consumer *cons, + struct irq_bypass_producer *prod) +{ + int ret = 0; + struct kvm_kernel_irqfd *irqfd = + container_of(cons, struct kvm_kernel_irqfd, consumer); + + irqfd->producer = prod; + + ret = kvmppc_set_passthru_irq(irqfd->kvm, prod->irq, irqfd->gsi); + if (ret) + pr_info("kvmppc_set_passthru_irq (irq %d, gsi %d) fails: %d\n", + prod->irq, irqfd->gsi, ret); + + return ret; +} + +static void kvmppc_irq_bypass_del_producer_hv(struct irq_bypass_consumer *cons, + struct irq_bypass_producer *prod) +{ + int ret; + struct kvm_kernel_irqfd *irqfd = + container_of(cons, struct kvm_kernel_irqfd, consumer); + + irqfd->producer = NULL; + + /* + * When producer of consumer is unregistered, we change back to + * default external interrupt handling mode - KVM real mode + * will switch back to host. + */ + ret = kvmppc_clr_passthru_irq(irqfd->kvm, prod->irq, irqfd->gsi); + if (ret) + pr_warn("kvmppc_clr_passthru_irq (irq %d, gsi %d) fails: %d\n", + prod->irq, irqfd->gsi, ret); +} +#endif + static long kvm_arch_vm_ioctl_hv(struct file *filp, unsigned int ioctl, unsigned long arg) { @@ -3400,6 +3609,10 @@ static struct kvmppc_ops kvm_ops_hv = { .fast_vcpu_kick = kvmppc_fast_vcpu_kick_hv, .arch_vm_ioctl = kvm_arch_vm_ioctl_hv, .hcall_implemented = kvmppc_hcall_impl_hv, +#ifdef CONFIG_KVM_XICS + .irq_bypass_add_producer = kvmppc_irq_bypass_add_producer_hv, + .irq_bypass_del_producer = kvmppc_irq_bypass_del_producer_hv, +#endif }; static int kvm_init_subcore_bitmap(void) diff --git a/arch/powerpc/kvm/book3s_hv_builtin.c b/arch/powerpc/kvm/book3s_hv_builtin.c index 5f0380db3eab..0c84d6bc8356 100644 --- a/arch/powerpc/kvm/book3s_hv_builtin.c +++ b/arch/powerpc/kvm/book3s_hv_builtin.c @@ -25,6 +25,7 @@ #include <asm/xics.h> #include <asm/dbell.h> #include <asm/cputhreads.h> +#include <asm/io.h> #define KVM_CMA_CHUNK_ORDER 18 @@ -286,3 +287,158 @@ void kvmhv_commence_exit(int trap) struct kvmppc_host_rm_ops *kvmppc_host_rm_ops_hv; EXPORT_SYMBOL_GPL(kvmppc_host_rm_ops_hv); + +#ifdef CONFIG_KVM_XICS +static struct kvmppc_irq_map *get_irqmap(struct kvmppc_passthru_irqmap *pimap, + u32 xisr) +{ + int i; + + /* + * We access the mapped array here without a lock. That + * is safe because we never reduce the number of entries + * in the array and we never change the v_hwirq field of + * an entry once it is set. + * + * We have also carefully ordered the stores in the writer + * and the loads here in the reader, so that if we find a matching + * hwirq here, the associated GSI and irq_desc fields are valid. + */ + for (i = 0; i < pimap->n_mapped; i++) { + if (xisr == pimap->mapped[i].r_hwirq) { + /* + * Order subsequent reads in the caller to serialize + * with the writer. + */ + smp_rmb(); + return &pimap->mapped[i]; + } + } + return NULL; +} + +/* + * If we have an interrupt that's not an IPI, check if we have a + * passthrough adapter and if so, check if this external interrupt + * is for the adapter. + * We will attempt to deliver the IRQ directly to the target VCPU's + * ICP, the virtual ICP (based on affinity - the xive value in ICS). + * + * If the delivery fails or if this is not for a passthrough adapter, + * return to the host to handle this interrupt. We earlier + * saved a copy of the XIRR in the PACA, it will be picked up by + * the host ICP driver. + */ +static int kvmppc_check_passthru(u32 xisr, __be32 xirr) +{ + struct kvmppc_passthru_irqmap *pimap; + struct kvmppc_irq_map *irq_map; + struct kvm_vcpu *vcpu; + + vcpu = local_paca->kvm_hstate.kvm_vcpu; + if (!vcpu) + return 1; + pimap = kvmppc_get_passthru_irqmap(vcpu->kvm); + if (!pimap) + return 1; + irq_map = get_irqmap(pimap, xisr); + if (!irq_map) + return 1; + + /* We're handling this interrupt, generic code doesn't need to */ + local_paca->kvm_hstate.saved_xirr = 0; + + return kvmppc_deliver_irq_passthru(vcpu, xirr, irq_map, pimap); +} + +#else +static inline int kvmppc_check_passthru(u32 xisr, __be32 xirr) +{ + return 1; +} +#endif + +/* + * Determine what sort of external interrupt is pending (if any). + * Returns: + * 0 if no interrupt is pending + * 1 if an interrupt is pending that needs to be handled by the host + * 2 Passthrough that needs completion in the host + * -1 if there was a guest wakeup IPI (which has now been cleared) + * -2 if there is PCI passthrough external interrupt that was handled + */ + +long kvmppc_read_intr(void) +{ + unsigned long xics_phys; + u32 h_xirr; + __be32 xirr; + u32 xisr; + u8 host_ipi; + + /* see if a host IPI is pending */ + host_ipi = local_paca->kvm_hstate.host_ipi; + if (host_ipi) + return 1; + + /* Now read the interrupt from the ICP */ + xics_phys = local_paca->kvm_hstate.xics_phys; + if (unlikely(!xics_phys)) + return 1; + + /* + * Save XIRR for later. Since we get control in reverse endian + * on LE systems, save it byte reversed and fetch it back in + * host endian. Note that xirr is the value read from the + * XIRR register, while h_xirr is the host endian version. + */ + xirr = _lwzcix(xics_phys + XICS_XIRR); + h_xirr = be32_to_cpu(xirr); + local_paca->kvm_hstate.saved_xirr = h_xirr; + xisr = h_xirr & 0xffffff; + /* + * Ensure that the store/load complete to guarantee all side + * effects of loading from XIRR has completed + */ + smp_mb(); + + /* if nothing pending in the ICP */ + if (!xisr) + return 0; + + /* We found something in the ICP... + * + * If it is an IPI, clear the MFRR and EOI it. + */ + if (xisr == XICS_IPI) { + _stbcix(xics_phys + XICS_MFRR, 0xff); + _stwcix(xics_phys + XICS_XIRR, xirr); + /* + * Need to ensure side effects of above stores + * complete before proceeding. + */ + smp_mb(); + + /* + * We need to re-check host IPI now in case it got set in the + * meantime. If it's clear, we bounce the interrupt to the + * guest + */ + host_ipi = local_paca->kvm_hstate.host_ipi; + if (unlikely(host_ipi != 0)) { + /* We raced with the host, + * we need to resend that IPI, bummer + */ + _stbcix(xics_phys + XICS_MFRR, IPI_PRIORITY); + /* Let side effects complete */ + smp_mb(); + return 1; + } + + /* OK, it's an IPI for us */ + local_paca->kvm_hstate.saved_xirr = 0; + return -1; + } + + return kvmppc_check_passthru(xisr, xirr); +} diff --git a/arch/powerpc/kernel/hmi.c b/arch/powerpc/kvm/book3s_hv_hmi.c index e3f738eb1cac..e3f738eb1cac 100644 --- a/arch/powerpc/kernel/hmi.c +++ b/arch/powerpc/kvm/book3s_hv_hmi.c diff --git a/arch/powerpc/kvm/book3s_hv_rm_xics.c b/arch/powerpc/kvm/book3s_hv_rm_xics.c index 980d8a6f7284..82ff5de8b1e7 100644 --- a/arch/powerpc/kvm/book3s_hv_rm_xics.c +++ b/arch/powerpc/kvm/book3s_hv_rm_xics.c @@ -10,6 +10,7 @@ #include <linux/kernel.h> #include <linux/kvm_host.h> #include <linux/err.h> +#include <linux/kernel_stat.h> #include <asm/kvm_book3s.h> #include <asm/kvm_ppc.h> @@ -18,7 +19,10 @@ #include <asm/debug.h> #include <asm/synch.h> #include <asm/cputhreads.h> +#include <asm/pgtable.h> #include <asm/ppc-opcode.h> +#include <asm/pnv-pci.h> +#include <asm/opal.h> #include "book3s_xics.h" @@ -26,9 +30,12 @@ int h_ipi_redirect = 1; EXPORT_SYMBOL(h_ipi_redirect); +int kvm_irq_bypass = 1; +EXPORT_SYMBOL(kvm_irq_bypass); static void icp_rm_deliver_irq(struct kvmppc_xics *xics, struct kvmppc_icp *icp, u32 new_irq); +static int xics_opal_rm_set_server(unsigned int hw_irq, int server_cpu); /* -- ICS routines -- */ static void ics_rm_check_resend(struct kvmppc_xics *xics, @@ -708,10 +715,123 @@ int kvmppc_rm_h_eoi(struct kvm_vcpu *vcpu, unsigned long xirr) icp->rm_action |= XICS_RM_NOTIFY_EOI; icp->rm_eoied_irq = irq; } + + if (state->host_irq) { + ++vcpu->stat.pthru_all; + if (state->intr_cpu != -1) { + int pcpu = raw_smp_processor_id(); + + pcpu = cpu_first_thread_sibling(pcpu); + ++vcpu->stat.pthru_host; + if (state->intr_cpu != pcpu) { + ++vcpu->stat.pthru_bad_aff; + xics_opal_rm_set_server(state->host_irq, pcpu); + } + state->intr_cpu = -1; + } + } bail: return check_too_hard(xics, icp); } +unsigned long eoi_rc; + +static void icp_eoi(struct irq_chip *c, u32 hwirq, u32 xirr) +{ + unsigned long xics_phys; + int64_t rc; + + rc = pnv_opal_pci_msi_eoi(c, hwirq); + + if (rc) + eoi_rc = rc; + + iosync(); + + /* EOI it */ + xics_phys = local_paca->kvm_hstate.xics_phys; + _stwcix(xics_phys + XICS_XIRR, xirr); +} + +static int xics_opal_rm_set_server(unsigned int hw_irq, int server_cpu) +{ + unsigned int mangle_cpu = get_hard_smp_processor_id(server_cpu) << 2; + + return opal_rm_set_xive(hw_irq, mangle_cpu, DEFAULT_PRIORITY); +} + +/* + * Increment a per-CPU 32-bit unsigned integer variable. + * Safe to call in real-mode. Handles vmalloc'ed addresses + * + * ToDo: Make this work for any integral type + */ + +static inline void this_cpu_inc_rm(unsigned int __percpu *addr) +{ + unsigned long l; + unsigned int *raddr; + int cpu = smp_processor_id(); + + raddr = per_cpu_ptr(addr, cpu); + l = (unsigned long)raddr; + + if (REGION_ID(l) == VMALLOC_REGION_ID) { + l = vmalloc_to_phys(raddr); + raddr = (unsigned int *)l; + } + ++*raddr; +} + +/* + * We don't try to update the flags in the irq_desc 'istate' field in + * here as would happen in the normal IRQ handling path for several reasons: + * - state flags represent internal IRQ state and are not expected to be + * updated outside the IRQ subsystem + * - more importantly, these are useful for edge triggered interrupts, + * IRQ probing, etc., but we are only handling MSI/MSIx interrupts here + * and these states shouldn't apply to us. + * + * However, we do update irq_stats - we somewhat duplicate the code in + * kstat_incr_irqs_this_cpu() for this since this function is defined + * in irq/internal.h which we don't want to include here. + * The only difference is that desc->kstat_irqs is an allocated per CPU + * variable and could have been vmalloc'ed, so we can't directly + * call __this_cpu_inc() on it. The kstat structure is a static + * per CPU variable and it should be accessible by real-mode KVM. + * + */ +static void kvmppc_rm_handle_irq_desc(struct irq_desc *desc) +{ + this_cpu_inc_rm(desc->kstat_irqs); + __this_cpu_inc(kstat.irqs_sum); +} + +long kvmppc_deliver_irq_passthru(struct kvm_vcpu *vcpu, + u32 xirr, + struct kvmppc_irq_map *irq_map, + struct kvmppc_passthru_irqmap *pimap) +{ + struct kvmppc_xics *xics; + struct kvmppc_icp *icp; + u32 irq; + + irq = irq_map->v_hwirq; + xics = vcpu->kvm->arch.xics; + icp = vcpu->arch.icp; + + kvmppc_rm_handle_irq_desc(irq_map->desc); + icp_rm_deliver_irq(xics, icp, irq); + + /* EOI the interrupt */ + icp_eoi(irq_desc_get_chip(irq_map->desc), irq_map->r_hwirq, xirr); + + if (check_too_hard(xics, icp) == H_TOO_HARD) + return 2; + else + return -2; +} + /* --- Non-real mode XICS-related built-in routines --- */ /** diff --git a/arch/powerpc/kvm/book3s_hv_rmhandlers.S b/arch/powerpc/kvm/book3s_hv_rmhandlers.S index 975655573844..c3c1d1bcfc67 100644 --- a/arch/powerpc/kvm/book3s_hv_rmhandlers.S +++ b/arch/powerpc/kvm/book3s_hv_rmhandlers.S @@ -221,6 +221,13 @@ kvmppc_primary_no_guest: li r3, 0 /* Don't wake on privileged (OS) doorbell */ b kvm_do_nap +/* + * kvm_novcpu_wakeup + * Entered from kvm_start_guest if kvm_hstate.napping is set + * to NAPPING_NOVCPU + * r2 = kernel TOC + * r13 = paca + */ kvm_novcpu_wakeup: ld r1, HSTATE_HOST_R1(r13) ld r5, HSTATE_KVM_VCORE(r13) @@ -230,6 +237,13 @@ kvm_novcpu_wakeup: /* check the wake reason */ bl kvmppc_check_wake_reason + /* + * Restore volatile registers since we could have called + * a C routine in kvmppc_check_wake_reason. + * r5 = VCORE + */ + ld r5, HSTATE_KVM_VCORE(r13) + /* see if any other thread is already exiting */ lwz r0, VCORE_ENTRY_EXIT(r5) cmpwi r0, 0x100 @@ -322,6 +336,11 @@ kvm_start_guest: /* Check the wake reason in SRR1 to see why we got here */ bl kvmppc_check_wake_reason + /* + * kvmppc_check_wake_reason could invoke a C routine, but we + * have no volatile registers to restore when we return. + */ + cmpdi r3, 0 bge kvm_no_guest @@ -625,9 +644,11 @@ ALT_FTR_SECTION_END_IFSET(CPU_FTR_ARCH_207S) 38: BEGIN_FTR_SECTION - /* DPDES is shared between threads */ + /* DPDES and VTB are shared between threads */ ld r8, VCORE_DPDES(r5) + ld r7, VCORE_VTB(r5) mtspr SPRN_DPDES, r8 + mtspr SPRN_VTB, r7 END_FTR_SECTION_IFSET(CPU_FTR_ARCH_207S) /* Mark the subcore state as inside guest */ @@ -787,10 +808,8 @@ END_FTR_SECTION_IFCLR(CPU_FTR_ARCH_207S) mtspr SPRN_CIABR, r7 mtspr SPRN_TAR, r8 ld r5, VCPU_IC(r4) - ld r6, VCPU_VTB(r4) - mtspr SPRN_IC, r5 - mtspr SPRN_VTB, r6 ld r8, VCPU_EBBHR(r4) + mtspr SPRN_IC, r5 mtspr SPRN_EBBHR, r8 ld r5, VCPU_EBBRR(r4) ld r6, VCPU_BESCR(r4) @@ -881,6 +900,7 @@ END_FTR_SECTION_IFCLR(CPU_FTR_ARCH_207S) cmpwi r3, 512 /* 1 microsecond */ blt hdec_soon +deliver_guest_interrupt: ld r6, VCPU_CTR(r4) ld r7, VCPU_XER(r4) @@ -895,7 +915,6 @@ kvmppc_cede_reentry: /* r4 = vcpu, r13 = paca */ mtspr SPRN_SRR0, r6 mtspr SPRN_SRR1, r7 -deliver_guest_interrupt: /* r11 = vcpu->arch.msr & ~MSR_HV */ rldicl r11, r11, 63 - MSR_HV_LG, 1 rotldi r11, r11, 1 + MSR_HV_LG @@ -1155,10 +1174,54 @@ END_FTR_SECTION_IFSET(CPU_FTR_HAS_PPR) * set, we know the host wants us out so let's do it now */ bl kvmppc_read_intr + + /* + * Restore the active volatile registers after returning from + * a C function. + */ + ld r9, HSTATE_KVM_VCPU(r13) + li r12, BOOK3S_INTERRUPT_EXTERNAL + + /* + * kvmppc_read_intr return codes: + * + * Exit to host (r3 > 0) + * 1 An interrupt is pending that needs to be handled by the host + * Exit guest and return to host by branching to guest_exit_cont + * + * 2 Passthrough that needs completion in the host + * Exit guest and return to host by branching to guest_exit_cont + * However, we also set r12 to BOOK3S_INTERRUPT_HV_RM_HARD + * to indicate to the host to complete handling the interrupt + * + * Before returning to guest, we check if any CPU is heading out + * to the host and if so, we head out also. If no CPUs are heading + * check return values <= 0. + * + * Return to guest (r3 <= 0) + * 0 No external interrupt is pending + * -1 A guest wakeup IPI (which has now been cleared) + * In either case, we return to guest to deliver any pending + * guest interrupts. + * + * -2 A PCI passthrough external interrupt was handled + * (interrupt was delivered directly to guest) + * Return to guest to deliver any pending guest interrupts. + */ + + cmpdi r3, 1 + ble 1f + + /* Return code = 2 */ + li r12, BOOK3S_INTERRUPT_HV_RM_HARD + stw r12, VCPU_TRAP(r9) + b guest_exit_cont + +1: /* Return code <= 1 */ cmpdi r3, 0 bgt guest_exit_cont - /* Check if any CPU is heading out to the host, if so head out too */ + /* Return code <= 0 */ 4: ld r5, HSTATE_KVM_VCORE(r13) lwz r0, VCORE_ENTRY_EXIT(r5) cmpwi r0, 0x100 @@ -1271,10 +1334,8 @@ END_FTR_SECTION_IFCLR(CPU_FTR_ARCH_207S) stw r6, VCPU_PSPB(r9) std r7, VCPU_FSCR(r9) mfspr r5, SPRN_IC - mfspr r6, SPRN_VTB mfspr r7, SPRN_TAR std r5, VCPU_IC(r9) - std r6, VCPU_VTB(r9) std r7, VCPU_TAR(r9) mfspr r8, SPRN_EBBHR std r8, VCPU_EBBHR(r9) @@ -1501,9 +1562,11 @@ kvmhv_switch_to_host: isync BEGIN_FTR_SECTION - /* DPDES is shared between threads */ + /* DPDES and VTB are shared between threads */ mfspr r7, SPRN_DPDES + mfspr r8, SPRN_VTB std r7, VCORE_DPDES(r5) + std r8, VCORE_VTB(r5) /* clear DPDES so we don't get guest doorbells in the host */ li r8, 0 mtspr SPRN_DPDES, r8 @@ -2213,10 +2276,20 @@ END_FTR_SECTION_IFSET(CPU_FTR_TM) ld r29, VCPU_GPR(R29)(r4) ld r30, VCPU_GPR(R30)(r4) ld r31, VCPU_GPR(R31)(r4) - + /* Check the wake reason in SRR1 to see why we got here */ bl kvmppc_check_wake_reason + /* + * Restore volatile registers since we could have called a + * C routine in kvmppc_check_wake_reason + * r4 = VCPU + * r3 tells us whether we need to return to host or not + * WARNING: it gets checked further down: + * should not modify r3 until this check is done. + */ + ld r4, HSTATE_KVM_VCPU(r13) + /* clear our bit in vcore->napping_threads */ 34: ld r5,HSTATE_KVM_VCORE(r13) lbz r7,HSTATE_PTID(r13) @@ -2230,7 +2303,7 @@ END_FTR_SECTION_IFSET(CPU_FTR_TM) li r0,0 stb r0,HSTATE_NAPPING(r13) - /* See if the wake reason means we need to exit */ + /* See if the wake reason saved in r3 means we need to exit */ stw r12, VCPU_TRAP(r4) mr r9, r4 cmpdi r3, 0 @@ -2297,10 +2370,14 @@ machine_check_realmode: * 0 if nothing needs to be done * 1 if something happened that needs to be handled by the host * -1 if there was a guest wakeup (IPI or msgsnd) + * -2 if we handled a PCI passthrough interrupt (returned by + * kvmppc_read_intr only) * * Also sets r12 to the interrupt vector for any interrupt that needs * to be handled now by the host (0x500 for external interrupt), or zero. - * Modifies r0, r6, r7, r8. + * Modifies all volatile registers (since it may call a C function). + * This routine calls kvmppc_read_intr, a C function, if an external + * interrupt is pending. */ kvmppc_check_wake_reason: mfspr r6, SPRN_SRR1 @@ -2310,8 +2387,7 @@ FTR_SECTION_ELSE rlwinm r6, r6, 45-31, 0xe /* P7 wake reason field is 3 bits */ ALT_FTR_SECTION_END_IFSET(CPU_FTR_ARCH_207S) cmpwi r6, 8 /* was it an external interrupt? */ - li r12, BOOK3S_INTERRUPT_EXTERNAL - beq kvmppc_read_intr /* if so, see what it was */ + beq 7f /* if so, see what it was */ li r3, 0 li r12, 0 cmpwi r6, 6 /* was it the decrementer? */ @@ -2350,83 +2426,28 @@ END_FTR_SECTION_IFSET(CPU_FTR_ARCH_207S) li r3, 1 blr -/* - * Determine what sort of external interrupt is pending (if any). - * Returns: - * 0 if no interrupt is pending - * 1 if an interrupt is pending that needs to be handled by the host - * -1 if there was a guest wakeup IPI (which has now been cleared) - * Modifies r0, r6, r7, r8, returns value in r3. - */ -kvmppc_read_intr: - /* see if a host IPI is pending */ - li r3, 1 - lbz r0, HSTATE_HOST_IPI(r13) - cmpwi r0, 0 - bne 1f + /* external interrupt - create a stack frame so we can call C */ +7: mflr r0 + std r0, PPC_LR_STKOFF(r1) + stdu r1, -PPC_MIN_STKFRM(r1) + bl kvmppc_read_intr + nop + li r12, BOOK3S_INTERRUPT_EXTERNAL + cmpdi r3, 1 + ble 1f - /* Now read the interrupt from the ICP */ - ld r6, HSTATE_XICS_PHYS(r13) - li r7, XICS_XIRR - cmpdi r6, 0 - beq- 1f - lwzcix r0, r6, r7 /* - * Save XIRR for later. Since we get in in reverse endian on LE - * systems, save it byte reversed and fetch it back in host endian. - */ - li r3, HSTATE_SAVED_XIRR - STWX_BE r0, r3, r13 -#ifdef __LITTLE_ENDIAN__ - lwz r3, HSTATE_SAVED_XIRR(r13) -#else - mr r3, r0 -#endif - rlwinm. r3, r3, 0, 0xffffff - sync - beq 1f /* if nothing pending in the ICP */ - - /* We found something in the ICP... - * - * If it's not an IPI, stash it in the PACA and return to - * the host, we don't (yet) handle directing real external - * interrupts directly to the guest + * Return code of 2 means PCI passthrough interrupt, but + * we need to return back to host to complete handling the + * interrupt. Trap reason is expected in r12 by guest + * exit code. */ - cmpwi r3, XICS_IPI /* if there is, is it an IPI? */ - bne 42f - - /* It's an IPI, clear the MFRR and EOI it */ - li r3, 0xff - li r8, XICS_MFRR - stbcix r3, r6, r8 /* clear the IPI */ - stwcix r0, r6, r7 /* EOI it */ - sync - - /* We need to re-check host IPI now in case it got set in the - * meantime. If it's clear, we bounce the interrupt to the - * guest - */ - lbz r0, HSTATE_HOST_IPI(r13) - cmpwi r0, 0 - bne- 43f - - /* OK, it's an IPI for us */ - li r12, 0 - li r3, -1 -1: blr - -42: /* It's not an IPI and it's for the host. We saved a copy of XIRR in - * the PACA earlier, it will be picked up by the host ICP driver - */ - li r3, 1 - b 1b - -43: /* We raced with the host, we need to resend that IPI, bummer */ - li r0, IPI_PRIORITY - stbcix r0, r6, r8 /* set the IPI */ - sync - li r3, 1 - b 1b + li r12, BOOK3S_INTERRUPT_HV_RM_HARD +1: + ld r0, PPC_MIN_STKFRM+PPC_LR_STKOFF(r1) + addi r1, r1, PPC_MIN_STKFRM + mtlr r0 + blr /* * Save away FP, VMX and VSX registers. diff --git a/arch/powerpc/kvm/book3s_pr.c b/arch/powerpc/kvm/book3s_pr.c index e76f79a45988..826c541a12af 100644 --- a/arch/powerpc/kvm/book3s_pr.c +++ b/arch/powerpc/kvm/book3s_pr.c @@ -226,7 +226,7 @@ void kvmppc_copy_from_svcpu(struct kvm_vcpu *vcpu, */ vcpu->arch.purr += get_tb() - vcpu->arch.entry_tb; vcpu->arch.spurr += get_tb() - vcpu->arch.entry_tb; - vcpu->arch.vtb += get_vtb() - vcpu->arch.entry_vtb; + to_book3s(vcpu)->vtb += get_vtb() - vcpu->arch.entry_vtb; if (cpu_has_feature(CPU_FTR_ARCH_207S)) vcpu->arch.ic += mfspr(SPRN_IC) - vcpu->arch.entry_ic; svcpu->in_use = false; @@ -448,6 +448,8 @@ void kvmppc_set_pvr_pr(struct kvm_vcpu *vcpu, u32 pvr) case PVR_POWER7: case PVR_POWER7p: case PVR_POWER8: + case PVR_POWER8E: + case PVR_POWER8NVL: vcpu->arch.hflags |= BOOK3S_HFLAG_MULTI_PGSIZE | BOOK3S_HFLAG_NEW_TLBIE; break; @@ -1361,6 +1363,9 @@ static int kvmppc_get_one_reg_pr(struct kvm_vcpu *vcpu, u64 id, case KVM_REG_PPC_HIOR: *val = get_reg_val(id, to_book3s(vcpu)->hior); break; + case KVM_REG_PPC_VTB: + *val = get_reg_val(id, to_book3s(vcpu)->vtb); + break; case KVM_REG_PPC_LPCR: case KVM_REG_PPC_LPCR_64: /* @@ -1397,6 +1402,9 @@ static int kvmppc_set_one_reg_pr(struct kvm_vcpu *vcpu, u64 id, to_book3s(vcpu)->hior = set_reg_val(id, *val); to_book3s(vcpu)->hior_explicit = true; break; + case KVM_REG_PPC_VTB: + to_book3s(vcpu)->vtb = set_reg_val(id, *val); + break; case KVM_REG_PPC_LPCR: case KVM_REG_PPC_LPCR_64: kvmppc_set_lpcr_pr(vcpu, set_reg_val(id, *val)); diff --git a/arch/powerpc/kvm/book3s_xics.c b/arch/powerpc/kvm/book3s_xics.c index a75ba38a2d81..3bdc639157c1 100644 --- a/arch/powerpc/kvm/book3s_xics.c +++ b/arch/powerpc/kvm/book3s_xics.c @@ -99,6 +99,10 @@ static int ics_deliver_irq(struct kvmppc_xics *xics, u32 irq, u32 level) return 0; } + /* Record which CPU this arrived on for passed-through interrupts */ + if (state->host_irq) + state->intr_cpu = raw_smp_processor_id(); + /* Attempt delivery */ icp_deliver_irq(xics, NULL, irq); @@ -812,7 +816,7 @@ static noinline int kvmppc_h_eoi(struct kvm_vcpu *vcpu, unsigned long xirr) return H_SUCCESS; } -static noinline int kvmppc_xics_rm_complete(struct kvm_vcpu *vcpu, u32 hcall) +int kvmppc_xics_rm_complete(struct kvm_vcpu *vcpu, u32 hcall) { struct kvmppc_xics *xics = vcpu->kvm->arch.xics; struct kvmppc_icp *icp = vcpu->arch.icp; @@ -841,6 +845,7 @@ static noinline int kvmppc_xics_rm_complete(struct kvm_vcpu *vcpu, u32 hcall) return H_SUCCESS; } +EXPORT_SYMBOL_GPL(kvmppc_xics_rm_complete); int kvmppc_xics_hcall(struct kvm_vcpu *vcpu, u32 req) { @@ -892,6 +897,21 @@ EXPORT_SYMBOL_GPL(kvmppc_xics_hcall); /* -- Initialisation code etc. -- */ +static void xics_debugfs_irqmap(struct seq_file *m, + struct kvmppc_passthru_irqmap *pimap) +{ + int i; + + if (!pimap) + return; + seq_printf(m, "========\nPIRQ mappings: %d maps\n===========\n", + pimap->n_mapped); + for (i = 0; i < pimap->n_mapped; i++) { + seq_printf(m, "r_hwirq=%x, v_hwirq=%x\n", + pimap->mapped[i].r_hwirq, pimap->mapped[i].v_hwirq); + } +} + static int xics_debug_show(struct seq_file *m, void *private) { struct kvmppc_xics *xics = m->private; @@ -913,6 +933,8 @@ static int xics_debug_show(struct seq_file *m, void *private) t_check_resend = 0; t_reject = 0; + xics_debugfs_irqmap(m, kvm->arch.pimap); + seq_printf(m, "=========\nICP state\n=========\n"); kvm_for_each_vcpu(i, vcpu, kvm) { @@ -1252,6 +1274,8 @@ int kvm_set_irq(struct kvm *kvm, int irq_source_id, u32 irq, int level, { struct kvmppc_xics *xics = kvm->arch.xics; + if (!xics) + return -ENODEV; return ics_deliver_irq(xics, irq, level); } @@ -1329,20 +1353,16 @@ static int kvmppc_xics_create(struct kvm_device *dev, u32 type) xics->kvm = kvm; /* Already there ? */ - mutex_lock(&kvm->lock); if (kvm->arch.xics) ret = -EEXIST; else kvm->arch.xics = xics; - mutex_unlock(&kvm->lock); if (ret) { kfree(xics); return ret; } - xics_debugfs_init(xics); - #ifdef CONFIG_KVM_BOOK3S_HV_POSSIBLE if (cpu_has_feature(CPU_FTR_ARCH_206)) { /* Enable real mode support */ @@ -1354,9 +1374,17 @@ static int kvmppc_xics_create(struct kvm_device *dev, u32 type) return 0; } +static void kvmppc_xics_init(struct kvm_device *dev) +{ + struct kvmppc_xics *xics = (struct kvmppc_xics *)dev->private; + + xics_debugfs_init(xics); +} + struct kvm_device_ops kvm_xics_ops = { .name = "kvm-xics", .create = kvmppc_xics_create, + .init = kvmppc_xics_init, .destroy = kvmppc_xics_free, .set_attr = xics_set_attr, .get_attr = xics_get_attr, @@ -1414,3 +1442,34 @@ int kvm_irq_map_chip_pin(struct kvm *kvm, unsigned irqchip, unsigned pin) { return pin; } + +void kvmppc_xics_set_mapped(struct kvm *kvm, unsigned long irq, + unsigned long host_irq) +{ + struct kvmppc_xics *xics = kvm->arch.xics; + struct kvmppc_ics *ics; + u16 idx; + + ics = kvmppc_xics_find_ics(xics, irq, &idx); + if (!ics) + return; + + ics->irq_state[idx].host_irq = host_irq; + ics->irq_state[idx].intr_cpu = -1; +} +EXPORT_SYMBOL_GPL(kvmppc_xics_set_mapped); + +void kvmppc_xics_clr_mapped(struct kvm *kvm, unsigned long irq, + unsigned long host_irq) +{ + struct kvmppc_xics *xics = kvm->arch.xics; + struct kvmppc_ics *ics; + u16 idx; + + ics = kvmppc_xics_find_ics(xics, irq, &idx); + if (!ics) + return; + + ics->irq_state[idx].host_irq = 0; +} +EXPORT_SYMBOL_GPL(kvmppc_xics_clr_mapped); diff --git a/arch/powerpc/kvm/book3s_xics.h b/arch/powerpc/kvm/book3s_xics.h index a46b954055c4..2a50320b55ca 100644 --- a/arch/powerpc/kvm/book3s_xics.h +++ b/arch/powerpc/kvm/book3s_xics.h @@ -42,6 +42,8 @@ struct ics_irq_state { u8 lsi; /* level-sensitive interrupt */ u8 asserted; /* Only for LSI */ u8 exists; + int intr_cpu; + u32 host_irq; }; /* Atomic ICP state, updated with a single compare & swap */ diff --git a/arch/powerpc/kvm/booke.c b/arch/powerpc/kvm/booke.c index 02b4672f7347..df3f2706d3e5 100644 --- a/arch/powerpc/kvm/booke.c +++ b/arch/powerpc/kvm/booke.c @@ -2038,7 +2038,7 @@ int kvm_arch_vcpu_ioctl_set_guest_debug(struct kvm_vcpu *vcpu, if (type == KVMPPC_DEBUG_NONE) continue; - if (type & !(KVMPPC_DEBUG_WATCH_READ | + if (type & ~(KVMPPC_DEBUG_WATCH_READ | KVMPPC_DEBUG_WATCH_WRITE | KVMPPC_DEBUG_BREAKPOINT)) return -EINVAL; diff --git a/arch/powerpc/kvm/e500_mmu.c b/arch/powerpc/kvm/e500_mmu.c index 29911a07bcdb..ddbf8f0284c0 100644 --- a/arch/powerpc/kvm/e500_mmu.c +++ b/arch/powerpc/kvm/e500_mmu.c @@ -743,7 +743,7 @@ int kvm_vcpu_ioctl_config_tlb(struct kvm_vcpu *vcpu, char *virt; struct page **pages; struct tlbe_priv *privs[2] = {}; - u64 *g2h_bitmap = NULL; + u64 *g2h_bitmap; size_t array_len; u32 sets; int num_pages, ret, i; @@ -779,41 +779,44 @@ int kvm_vcpu_ioctl_config_tlb(struct kvm_vcpu *vcpu, num_pages = DIV_ROUND_UP(cfg->array + array_len - 1, PAGE_SIZE) - cfg->array / PAGE_SIZE; - pages = kmalloc(sizeof(struct page *) * num_pages, GFP_KERNEL); + pages = kmalloc_array(num_pages, sizeof(*pages), GFP_KERNEL); if (!pages) return -ENOMEM; ret = get_user_pages_fast(cfg->array, num_pages, 1, pages); if (ret < 0) - goto err_pages; + goto free_pages; if (ret != num_pages) { num_pages = ret; ret = -EFAULT; - goto err_put_page; + goto put_pages; } virt = vmap(pages, num_pages, VM_MAP, PAGE_KERNEL); if (!virt) { ret = -ENOMEM; - goto err_put_page; + goto put_pages; } - privs[0] = kzalloc(sizeof(struct tlbe_priv) * params.tlb_sizes[0], - GFP_KERNEL); - privs[1] = kzalloc(sizeof(struct tlbe_priv) * params.tlb_sizes[1], - GFP_KERNEL); + privs[0] = kcalloc(params.tlb_sizes[0], sizeof(*privs[0]), GFP_KERNEL); + if (!privs[0]) { + ret = -ENOMEM; + goto put_pages; + } - if (!privs[0] || !privs[1]) { + privs[1] = kcalloc(params.tlb_sizes[1], sizeof(*privs[1]), GFP_KERNEL); + if (!privs[1]) { ret = -ENOMEM; - goto err_privs; + goto free_privs_first; } - g2h_bitmap = kzalloc(sizeof(u64) * params.tlb_sizes[1], - GFP_KERNEL); + g2h_bitmap = kcalloc(params.tlb_sizes[1], + sizeof(*g2h_bitmap), + GFP_KERNEL); if (!g2h_bitmap) { ret = -ENOMEM; - goto err_privs; + goto free_privs_second; } free_gtlb(vcpu_e500); @@ -845,16 +848,14 @@ int kvm_vcpu_ioctl_config_tlb(struct kvm_vcpu *vcpu, kvmppc_recalc_tlb1map_range(vcpu_e500); return 0; - -err_privs: - kfree(privs[0]); + free_privs_second: kfree(privs[1]); - -err_put_page: + free_privs_first: + kfree(privs[0]); + put_pages: for (i = 0; i < num_pages; i++) put_page(pages[i]); - -err_pages: + free_pages: kfree(pages); return ret; } @@ -904,11 +905,9 @@ static int vcpu_mmu_init(struct kvm_vcpu *vcpu, int kvmppc_e500_tlb_init(struct kvmppc_vcpu_e500 *vcpu_e500) { struct kvm_vcpu *vcpu = &vcpu_e500->vcpu; - int entry_size = sizeof(struct kvm_book3e_206_tlb_entry); - int entries = KVM_E500_TLB0_SIZE + KVM_E500_TLB1_SIZE; if (e500_mmu_host_init(vcpu_e500)) - goto err; + goto free_vcpu; vcpu_e500->gtlb_params[0].entries = KVM_E500_TLB0_SIZE; vcpu_e500->gtlb_params[1].entries = KVM_E500_TLB1_SIZE; @@ -920,37 +919,39 @@ int kvmppc_e500_tlb_init(struct kvmppc_vcpu_e500 *vcpu_e500) vcpu_e500->gtlb_params[1].ways = KVM_E500_TLB1_SIZE; vcpu_e500->gtlb_params[1].sets = 1; - vcpu_e500->gtlb_arch = kmalloc(entries * entry_size, GFP_KERNEL); + vcpu_e500->gtlb_arch = kmalloc_array(KVM_E500_TLB0_SIZE + + KVM_E500_TLB1_SIZE, + sizeof(*vcpu_e500->gtlb_arch), + GFP_KERNEL); if (!vcpu_e500->gtlb_arch) return -ENOMEM; vcpu_e500->gtlb_offset[0] = 0; vcpu_e500->gtlb_offset[1] = KVM_E500_TLB0_SIZE; - vcpu_e500->gtlb_priv[0] = kzalloc(sizeof(struct tlbe_ref) * - vcpu_e500->gtlb_params[0].entries, + vcpu_e500->gtlb_priv[0] = kcalloc(vcpu_e500->gtlb_params[0].entries, + sizeof(struct tlbe_ref), GFP_KERNEL); if (!vcpu_e500->gtlb_priv[0]) - goto err; + goto free_vcpu; - vcpu_e500->gtlb_priv[1] = kzalloc(sizeof(struct tlbe_ref) * - vcpu_e500->gtlb_params[1].entries, + vcpu_e500->gtlb_priv[1] = kcalloc(vcpu_e500->gtlb_params[1].entries, + sizeof(struct tlbe_ref), GFP_KERNEL); if (!vcpu_e500->gtlb_priv[1]) - goto err; + goto free_vcpu; - vcpu_e500->g2h_tlb1_map = kzalloc(sizeof(u64) * - vcpu_e500->gtlb_params[1].entries, + vcpu_e500->g2h_tlb1_map = kcalloc(vcpu_e500->gtlb_params[1].entries, + sizeof(*vcpu_e500->g2h_tlb1_map), GFP_KERNEL); if (!vcpu_e500->g2h_tlb1_map) - goto err; + goto free_vcpu; vcpu_mmu_init(vcpu, vcpu_e500->gtlb_params); kvmppc_recalc_tlb1map_range(vcpu_e500); return 0; - -err: + free_vcpu: free_gtlb(vcpu_e500); return -1; } diff --git a/arch/powerpc/kvm/powerpc.c b/arch/powerpc/kvm/powerpc.c index 6ce40dd6fe51..70963c845e96 100644 --- a/arch/powerpc/kvm/powerpc.c +++ b/arch/powerpc/kvm/powerpc.c @@ -27,6 +27,8 @@ #include <linux/slab.h> #include <linux/file.h> #include <linux/module.h> +#include <linux/irqbypass.h> +#include <linux/kvm_irqfd.h> #include <asm/cputable.h> #include <asm/uaccess.h> #include <asm/kvm_ppc.h> @@ -436,6 +438,16 @@ err_out: return -EINVAL; } +bool kvm_arch_has_vcpu_debugfs(void) +{ + return false; +} + +int kvm_arch_create_vcpu_debugfs(struct kvm_vcpu *vcpu) +{ + return 0; +} + void kvm_arch_destroy_vm(struct kvm *kvm) { unsigned int i; @@ -739,6 +751,42 @@ void kvm_arch_vcpu_put(struct kvm_vcpu *vcpu) #endif } +/* + * irq_bypass_add_producer and irq_bypass_del_producer are only + * useful if the architecture supports PCI passthrough. + * irq_bypass_stop and irq_bypass_start are not needed and so + * kvm_ops are not defined for them. + */ +bool kvm_arch_has_irq_bypass(void) +{ + return ((kvmppc_hv_ops && kvmppc_hv_ops->irq_bypass_add_producer) || + (kvmppc_pr_ops && kvmppc_pr_ops->irq_bypass_add_producer)); +} + +int kvm_arch_irq_bypass_add_producer(struct irq_bypass_consumer *cons, + struct irq_bypass_producer *prod) +{ + struct kvm_kernel_irqfd *irqfd = + container_of(cons, struct kvm_kernel_irqfd, consumer); + struct kvm *kvm = irqfd->kvm; + + if (kvm->arch.kvm_ops->irq_bypass_add_producer) + return kvm->arch.kvm_ops->irq_bypass_add_producer(cons, prod); + + return 0; +} + +void kvm_arch_irq_bypass_del_producer(struct irq_bypass_consumer *cons, + struct irq_bypass_producer *prod) +{ + struct kvm_kernel_irqfd *irqfd = + container_of(cons, struct kvm_kernel_irqfd, consumer); + struct kvm *kvm = irqfd->kvm; + + if (kvm->arch.kvm_ops->irq_bypass_del_producer) + kvm->arch.kvm_ops->irq_bypass_del_producer(cons, prod); +} + static void kvmppc_complete_mmio_load(struct kvm_vcpu *vcpu, struct kvm_run *run) { @@ -1167,6 +1215,19 @@ static int kvm_vcpu_ioctl_enable_cap(struct kvm_vcpu *vcpu, return r; } +bool kvm_arch_intc_initialized(struct kvm *kvm) +{ +#ifdef CONFIG_KVM_MPIC + if (kvm->arch.mpic) + return true; +#endif +#ifdef CONFIG_KVM_XICS + if (kvm->arch.xics) + return true; +#endif + return false; +} + int kvm_arch_vcpu_ioctl_get_mpstate(struct kvm_vcpu *vcpu, struct kvm_mp_state *mp_state) { diff --git a/arch/powerpc/kvm/trace_hv.h b/arch/powerpc/kvm/trace_hv.h index 33d9daff5783..fb21990c0fb4 100644 --- a/arch/powerpc/kvm/trace_hv.h +++ b/arch/powerpc/kvm/trace_hv.h @@ -432,6 +432,28 @@ TRACE_EVENT(kvmppc_vcore_blocked, __entry->runner_vcpu, __entry->n_runnable, __entry->tgid) ); +TRACE_EVENT(kvmppc_vcore_wakeup, + TP_PROTO(int do_sleep, __u64 ns), + + TP_ARGS(do_sleep, ns), + + TP_STRUCT__entry( + __field(__u64, ns) + __field(int, waited) + __field(pid_t, tgid) + ), + + TP_fast_assign( + __entry->ns = ns; + __entry->waited = do_sleep; + __entry->tgid = current->tgid; + ), + + TP_printk("%s time %lld ns, tgid=%d", + __entry->waited ? "wait" : "poll", + __entry->ns, __entry->tgid) +); + TRACE_EVENT(kvmppc_run_vcpu_enter, TP_PROTO(struct kvm_vcpu *vcpu), diff --git a/arch/powerpc/lib/Makefile b/arch/powerpc/lib/Makefile index ba21be15310f..ad5290005ca4 100644 --- a/arch/powerpc/lib/Makefile +++ b/arch/powerpc/lib/Makefile @@ -22,7 +22,7 @@ obj64-$(CONFIG_SMP) += locks.o obj64-$(CONFIG_ALTIVEC) += vmx-helper.o ifeq ($(CONFIG_GENERIC_CSUM),) -obj-y += checksum_$(CONFIG_WORD_SIZE).o checksum_wrappers.o +obj-y += checksum_$(BITS).o checksum_wrappers.o endif obj-$(CONFIG_PPC_EMULATE_SSTEP) += sstep.o ldstfp.o diff --git a/arch/powerpc/lib/checksum_32.S b/arch/powerpc/lib/checksum_32.S index d90870a66b60..aa8214f30c92 100644 --- a/arch/powerpc/lib/checksum_32.S +++ b/arch/powerpc/lib/checksum_32.S @@ -127,17 +127,19 @@ _GLOBAL(csum_partial_copy_generic) stw r7,12(r1) stw r8,8(r1) - andi. r0,r4,1 /* is destination address even ? */ - cmplwi cr7,r0,0 addic r12,r6,0 addi r6,r4,-4 neg r0,r4 addi r4,r3,-4 andi. r0,r0,CACHELINE_MASK /* # bytes to start of cache line */ + crset 4*cr7+eq beq 58f cmplw 0,r5,r0 /* is this more than total to do? */ blt 63f /* if not much to do */ + rlwinm r7,r6,3,0x8 + rlwnm r12,r12,r7,0,31 /* odd destination address: rotate one byte */ + cmplwi cr7,r7,0 /* is destination address even ? */ andi. r8,r0,3 /* get it word-aligned first */ mtctr r8 beq+ 61f @@ -237,7 +239,7 @@ _GLOBAL(csum_partial_copy_generic) 66: addze r3,r12 addi r1,r1,16 beqlr+ cr7 - rlwinm r3,r3,8,0,31 /* swap bytes for odd destination */ + rlwinm r3,r3,8,0,31 /* odd destination address: rotate one byte */ blr /* read fault */ diff --git a/arch/powerpc/lib/feature-fixups.c b/arch/powerpc/lib/feature-fixups.c index 74145f02ad41..043415f0bdb1 100644 --- a/arch/powerpc/lib/feature-fixups.c +++ b/arch/powerpc/lib/feature-fixups.c @@ -188,7 +188,10 @@ void __init apply_feature_fixups(void) &__start___fw_ftr_fixup, &__stop___fw_ftr_fixup); #endif do_final_fixups(); +} +void __init setup_feature_keys(void) +{ /* * Initialise jump label. This causes all the cpu/mmu_has_feature() * checks to take on their correct polarity based on the current set of diff --git a/arch/powerpc/lib/mem_64.S b/arch/powerpc/lib/mem_64.S index 43435c6892fb..eda7a96161ab 100644 --- a/arch/powerpc/lib/mem_64.S +++ b/arch/powerpc/lib/mem_64.S @@ -37,6 +37,7 @@ _GLOBAL(memset) clrldi r5,r5,58 mtctr r0 beq 5f + .balign 16 4: std r4,0(r6) std r4,8(r6) std r4,16(r6) @@ -90,6 +91,7 @@ _GLOBAL(backwards_memcpy) andi. r0,r6,3 mtctr r7 bne 5f + .balign 16 1: lwz r7,-4(r4) lwzu r8,-8(r4) stw r7,-4(r6) diff --git a/arch/powerpc/mm/Makefile b/arch/powerpc/mm/Makefile index f2cea6d5e764..1a4e570f7894 100644 --- a/arch/powerpc/mm/Makefile +++ b/arch/powerpc/mm/Makefile @@ -7,17 +7,16 @@ subdir-ccflags-$(CONFIG_PPC_WERROR) := -Werror ccflags-$(CONFIG_PPC64) := $(NO_MINIMAL_TOC) obj-y := fault.o mem.o pgtable.o mmap.o \ - init_$(CONFIG_WORD_SIZE).o \ - pgtable_$(CONFIG_WORD_SIZE).o + init_$(BITS).o pgtable_$(BITS).o obj-$(CONFIG_PPC_MMU_NOHASH) += mmu_context_nohash.o tlb_nohash.o \ tlb_nohash_low.o -obj-$(CONFIG_PPC_BOOK3E) += tlb_low_$(CONFIG_WORD_SIZE)e.o +obj-$(CONFIG_PPC_BOOK3E) += tlb_low_$(BITS)e.o hash64-$(CONFIG_PPC_NATIVE) := hash_native_64.o obj-$(CONFIG_PPC_BOOK3E_64) += pgtable-book3e.o obj-$(CONFIG_PPC_STD_MMU_64) += pgtable-hash64.o hash_utils_64.o slb_low.o slb.o $(hash64-y) mmu_context_book3s64.o pgtable-book3s64.o obj-$(CONFIG_PPC_RADIX_MMU) += pgtable-radix.o tlb-radix.o obj-$(CONFIG_PPC_STD_MMU_32) += ppc_mmu_32.o hash_low_32.o mmu_context_hash32.o -obj-$(CONFIG_PPC_STD_MMU) += tlb_hash$(CONFIG_WORD_SIZE).o +obj-$(CONFIG_PPC_STD_MMU) += tlb_hash$(BITS).o ifeq ($(CONFIG_PPC_STD_MMU_64),y) obj-$(CONFIG_PPC_4K_PAGES) += hash64_4k.o obj-$(CONFIG_PPC_64K_PAGES) += hash64_64k.o diff --git a/arch/powerpc/mm/fault.c b/arch/powerpc/mm/fault.c index a4db22f65021..d0b137d96df1 100644 --- a/arch/powerpc/mm/fault.c +++ b/arch/powerpc/mm/fault.c @@ -26,7 +26,7 @@ #include <linux/mm.h> #include <linux/interrupt.h> #include <linux/highmem.h> -#include <linux/module.h> +#include <linux/extable.h> #include <linux/kprobes.h> #include <linux/kdebug.h> #include <linux/perf_event.h> @@ -205,7 +205,7 @@ static int mm_fault_error(struct pt_regs *regs, unsigned long addr, int fault) * The return value is 0 if the fault was handled, or the signal * number if this is a kernel fault that can't be handled here. */ -int __kprobes do_page_fault(struct pt_regs *regs, unsigned long address, +int do_page_fault(struct pt_regs *regs, unsigned long address, unsigned long error_code) { enum ctx_state prev_state = exception_enter(); @@ -498,8 +498,8 @@ bad_area_nosemaphore: bail: exception_exit(prev_state); return rc; - } +NOKPROBE_SYMBOL(do_page_fault); /* * bad_page_fault is called when we have a bad access from the kernel. diff --git a/arch/powerpc/mm/hash_native_64.c b/arch/powerpc/mm/hash_native_64.c index 0e4e9654bd2c..83ddc0e171b0 100644 --- a/arch/powerpc/mm/hash_native_64.c +++ b/arch/powerpc/mm/hash_native_64.c @@ -493,36 +493,6 @@ static void native_hugepage_invalidate(unsigned long vsid, } #endif -static inline int __hpte_actual_psize(unsigned int lp, int psize) -{ - int i, shift; - unsigned int mask; - - /* start from 1 ignoring MMU_PAGE_4K */ - for (i = 1; i < MMU_PAGE_COUNT; i++) { - - /* invalid penc */ - if (mmu_psize_defs[psize].penc[i] == -1) - continue; - /* - * encoding bits per actual page size - * PTE LP actual page size - * rrrr rrrz >=8KB - * rrrr rrzz >=16KB - * rrrr rzzz >=32KB - * rrrr zzzz >=64KB - * ....... - */ - shift = mmu_psize_defs[i].shift - LP_SHIFT; - if (shift > LP_BITS) - shift = LP_BITS; - mask = (1 << shift) - 1; - if ((lp & mask) == mmu_psize_defs[psize].penc[i]) - return i; - } - return -1; -} - static void hpte_decode(struct hash_pte *hpte, unsigned long slot, int *psize, int *apsize, int *ssize, unsigned long *vpn) { @@ -538,16 +508,8 @@ static void hpte_decode(struct hash_pte *hpte, unsigned long slot, size = MMU_PAGE_4K; a_size = MMU_PAGE_4K; } else { - for (size = 0; size < MMU_PAGE_COUNT; size++) { - - /* valid entries have a shift value */ - if (!mmu_psize_defs[size].shift) - continue; - - a_size = __hpte_actual_psize(lp, size); - if (a_size != -1) - break; - } + size = hpte_page_sizes[lp] & 0xf; + a_size = hpte_page_sizes[lp] >> 4; } /* This works for all page sizes, and for 256M and 1T segments */ if (cpu_has_feature(CPU_FTR_ARCH_300)) diff --git a/arch/powerpc/mm/hash_utils_64.c b/arch/powerpc/mm/hash_utils_64.c index 0821556e16f4..90480e23fd2c 100644 --- a/arch/powerpc/mm/hash_utils_64.c +++ b/arch/powerpc/mm/hash_utils_64.c @@ -93,6 +93,9 @@ static unsigned long _SDR1; struct mmu_psize_def mmu_psize_defs[MMU_PAGE_COUNT]; EXPORT_SYMBOL_GPL(mmu_psize_defs); +u8 hpte_page_sizes[1 << LP_BITS]; +EXPORT_SYMBOL_GPL(hpte_page_sizes); + struct hash_pte *htab_address; unsigned long htab_size_bytes; unsigned long htab_hash_mask; @@ -564,8 +567,60 @@ static void __init htab_scan_page_sizes(void) #endif /* CONFIG_HUGETLB_PAGE */ } +/* + * Fill in the hpte_page_sizes[] array. + * We go through the mmu_psize_defs[] array looking for all the + * supported base/actual page size combinations. Each combination + * has a unique pagesize encoding (penc) value in the low bits of + * the LP field of the HPTE. For actual page sizes less than 1MB, + * some of the upper LP bits are used for RPN bits, meaning that + * we need to fill in several entries in hpte_page_sizes[]. + * + * In diagrammatic form, with r = RPN bits and z = page size bits: + * PTE LP actual page size + * rrrr rrrz >=8KB + * rrrr rrzz >=16KB + * rrrr rzzz >=32KB + * rrrr zzzz >=64KB + * ... + * + * The zzzz bits are implementation-specific but are chosen so that + * no encoding for a larger page size uses the same value in its + * low-order N bits as the encoding for the 2^(12+N) byte page size + * (if it exists). + */ +static void init_hpte_page_sizes(void) +{ + long int ap, bp; + long int shift, penc; + + for (bp = 0; bp < MMU_PAGE_COUNT; ++bp) { + if (!mmu_psize_defs[bp].shift) + continue; /* not a supported page size */ + for (ap = bp; ap < MMU_PAGE_COUNT; ++ap) { + penc = mmu_psize_defs[bp].penc[ap]; + if (penc == -1) + continue; + shift = mmu_psize_defs[ap].shift - LP_SHIFT; + if (shift <= 0) + continue; /* should never happen */ + /* + * For page sizes less than 1MB, this loop + * replicates the entry for all possible values + * of the rrrr bits. + */ + while (penc < (1 << LP_BITS)) { + hpte_page_sizes[penc] = (ap << 4) | bp; + penc += 1 << shift; + } + } + } +} + static void __init htab_init_page_sizes(void) { + init_hpte_page_sizes(); + if (!debug_pagealloc_enabled()) { /* * Pick a size for the linear mapping. Currently, we only @@ -711,6 +766,29 @@ int remove_section_mapping(unsigned long start, unsigned long end) } #endif /* CONFIG_MEMORY_HOTPLUG */ +static void update_hid_for_hash(void) +{ + unsigned long hid0; + unsigned long rb = 3UL << PPC_BITLSHIFT(53); /* IS = 3 */ + + asm volatile("ptesync": : :"memory"); + /* prs = 0, ric = 2, rs = 0, r = 1 is = 3 */ + asm volatile(PPC_TLBIE_5(%0, %4, %3, %2, %1) + : : "r"(rb), "i"(0), "i"(0), "i"(2), "r"(0) : "memory"); + asm volatile("eieio; tlbsync; ptesync; isync; slbia": : :"memory"); + /* + * now switch the HID + */ + hid0 = mfspr(SPRN_HID0); + hid0 &= ~HID0_POWER9_RADIX; + mtspr(SPRN_HID0, hid0); + asm volatile("isync": : :"memory"); + + /* Wait for it to happen */ + while ((mfspr(SPRN_HID0) & HID0_POWER9_RADIX)) + cpu_relax(); +} + static void __init hash_init_partition_table(phys_addr_t hash_table, unsigned long htab_size) { @@ -737,6 +815,8 @@ static void __init hash_init_partition_table(phys_addr_t hash_table, */ partition_tb->patb1 = 0; pr_info("Partition table %p\n", partition_tb); + if (cpu_has_feature(CPU_FTR_POWER9_DD1)) + update_hid_for_hash(); /* * update partition table control register, * 64 K size. @@ -1460,6 +1540,29 @@ out_exit: local_irq_restore(flags); } +#ifdef CONFIG_PPC_TRANSACTIONAL_MEM +static inline void tm_flush_hash_page(int local) +{ + /* + * Transactions are not aborted by tlbiel, only tlbie. Without, syncing a + * page back to a block device w/PIO could pick up transactional data + * (bad!) so we force an abort here. Before the sync the page will be + * made read-only, which will flush_hash_page. BIG ISSUE here: if the + * kernel uses a page from userspace without unmapping it first, it may + * see the speculated version. + */ + if (local && cpu_has_feature(CPU_FTR_TM) && current->thread.regs && + MSR_TM_ACTIVE(current->thread.regs->msr)) { + tm_enable(); + tm_abort(TM_CAUSE_TLBI); + } +} +#else +static inline void tm_flush_hash_page(int local) +{ +} +#endif + /* WARNING: This is called from hash_low_64.S, if you change this prototype, * do not forget to update the assembly call site ! */ @@ -1486,21 +1589,7 @@ void flush_hash_page(unsigned long vpn, real_pte_t pte, int psize, int ssize, ssize, local); } pte_iterate_hashed_end(); -#ifdef CONFIG_PPC_TRANSACTIONAL_MEM - /* Transactions are not aborted by tlbiel, only tlbie. - * Without, syncing a page back to a block device w/ PIO could pick up - * transactional data (bad!) so we force an abort here. Before the - * sync the page will be made read-only, which will flush_hash_page. - * BIG ISSUE here: if the kernel uses a page from userspace without - * unmapping it first, it may see the speculated version. - */ - if (local && cpu_has_feature(CPU_FTR_TM) && - current->thread.regs && - MSR_TM_ACTIVE(current->thread.regs->msr)) { - tm_enable(); - tm_abort(TM_CAUSE_TLBI); - } -#endif + tm_flush_hash_page(local); } #ifdef CONFIG_TRANSPARENT_HUGEPAGE @@ -1557,22 +1646,7 @@ void flush_hash_hugepage(unsigned long vsid, unsigned long addr, MMU_PAGE_16M, ssize, local); } tm_abort: -#ifdef CONFIG_PPC_TRANSACTIONAL_MEM - /* Transactions are not aborted by tlbiel, only tlbie. - * Without, syncing a page back to a block device w/ PIO could pick up - * transactional data (bad!) so we force an abort here. Before the - * sync the page will be made read-only, which will flush_hash_page. - * BIG ISSUE here: if the kernel uses a page from userspace without - * unmapping it first, it may see the speculated version. - */ - if (local && cpu_has_feature(CPU_FTR_TM) && - current->thread.regs && - MSR_TM_ACTIVE(current->thread.regs->msr)) { - tm_enable(); - tm_abort(TM_CAUSE_TLBI); - } -#endif - return; + tm_flush_hash_page(local); } #endif /* CONFIG_TRANSPARENT_HUGEPAGE */ diff --git a/arch/powerpc/mm/hugetlbpage.c b/arch/powerpc/mm/hugetlbpage.c index 7372ee13eb1e..a5d3ecdabc44 100644 --- a/arch/powerpc/mm/hugetlbpage.c +++ b/arch/powerpc/mm/hugetlbpage.c @@ -1019,8 +1019,15 @@ int gup_hugepte(pte_t *ptep, unsigned long sz, unsigned long addr, pte = READ_ONCE(*ptep); mask = _PAGE_PRESENT | _PAGE_READ; + + /* + * On some CPUs like the 8xx, _PAGE_RW hence _PAGE_WRITE is defined + * as 0 and _PAGE_RO has to be set when a page is not writable + */ if (write) mask |= _PAGE_WRITE; + else + mask |= _PAGE_RO; if ((pte_val(pte) & mask) != mask) return 0; diff --git a/arch/powerpc/mm/init_32.c b/arch/powerpc/mm/init_32.c index 448685fbf27c..8a7c38b8d335 100644 --- a/arch/powerpc/mm/init_32.c +++ b/arch/powerpc/mm/init_32.c @@ -137,7 +137,7 @@ void __init MMU_init(void) if (memblock.memory.cnt > 1) { #ifndef CONFIG_WII memblock_enforce_memory_limit(memblock.memory.regions[0].size); - printk(KERN_WARNING "Only using first contiguous memory region"); + pr_warn("Only using first contiguous memory region\n"); #else wii_memory_fixups(); #endif diff --git a/arch/powerpc/mm/mmu_context_iommu.c b/arch/powerpc/mm/mmu_context_iommu.c index da6a2168ae9e..e0f1c33601dd 100644 --- a/arch/powerpc/mm/mmu_context_iommu.c +++ b/arch/powerpc/mm/mmu_context_iommu.c @@ -15,6 +15,9 @@ #include <linux/rculist.h> #include <linux/vmalloc.h> #include <linux/mutex.h> +#include <linux/migrate.h> +#include <linux/hugetlb.h> +#include <linux/swap.h> #include <asm/mmu_context.h> static DEFINE_MUTEX(mem_list_mutex); @@ -72,6 +75,55 @@ bool mm_iommu_preregistered(void) } EXPORT_SYMBOL_GPL(mm_iommu_preregistered); +/* + * Taken from alloc_migrate_target with changes to remove CMA allocations + */ +struct page *new_iommu_non_cma_page(struct page *page, unsigned long private, + int **resultp) +{ + gfp_t gfp_mask = GFP_USER; + struct page *new_page; + + if (PageHuge(page) || PageTransHuge(page) || PageCompound(page)) + return NULL; + + if (PageHighMem(page)) + gfp_mask |= __GFP_HIGHMEM; + + /* + * We don't want the allocation to force an OOM if possibe + */ + new_page = alloc_page(gfp_mask | __GFP_NORETRY | __GFP_NOWARN); + return new_page; +} + +static int mm_iommu_move_page_from_cma(struct page *page) +{ + int ret = 0; + LIST_HEAD(cma_migrate_pages); + + /* Ignore huge pages for now */ + if (PageHuge(page) || PageTransHuge(page) || PageCompound(page)) + return -EBUSY; + + lru_add_drain(); + ret = isolate_lru_page(page); + if (ret) + return ret; + + list_add(&page->lru, &cma_migrate_pages); + put_page(page); /* Drop the gup reference */ + + ret = migrate_pages(&cma_migrate_pages, new_iommu_non_cma_page, + NULL, 0, MIGRATE_SYNC, MR_CMA); + if (ret) { + if (!list_empty(&cma_migrate_pages)) + putback_movable_pages(&cma_migrate_pages); + } + + return 0; +} + long mm_iommu_get(unsigned long ua, unsigned long entries, struct mm_iommu_table_group_mem_t **pmem) { @@ -124,15 +176,36 @@ long mm_iommu_get(unsigned long ua, unsigned long entries, for (i = 0; i < entries; ++i) { if (1 != get_user_pages_fast(ua + (i << PAGE_SHIFT), 1/* pages */, 1/* iswrite */, &page)) { + ret = -EFAULT; for (j = 0; j < i; ++j) - put_page(pfn_to_page( - mem->hpas[j] >> PAGE_SHIFT)); + put_page(pfn_to_page(mem->hpas[j] >> + PAGE_SHIFT)); vfree(mem->hpas); kfree(mem); - ret = -EFAULT; goto unlock_exit; } - + /* + * If we get a page from the CMA zone, since we are going to + * be pinning these entries, we might as well move them out + * of the CMA zone if possible. NOTE: faulting in + migration + * can be expensive. Batching can be considered later + */ + if (get_pageblock_migratetype(page) == MIGRATE_CMA) { + if (mm_iommu_move_page_from_cma(page)) + goto populate; + if (1 != get_user_pages_fast(ua + (i << PAGE_SHIFT), + 1/* pages */, 1/* iswrite */, + &page)) { + ret = -EFAULT; + for (j = 0; j < i; ++j) + put_page(pfn_to_page(mem->hpas[j] >> + PAGE_SHIFT)); + vfree(mem->hpas); + kfree(mem); + goto unlock_exit; + } + } +populate: mem->hpas[i] = page_to_pfn(page) << PAGE_SHIFT; } diff --git a/arch/powerpc/mm/mmu_context_nohash.c b/arch/powerpc/mm/mmu_context_nohash.c index 7d95bc402dba..c491f2c8f2b9 100644 --- a/arch/powerpc/mm/mmu_context_nohash.c +++ b/arch/powerpc/mm/mmu_context_nohash.c @@ -369,44 +369,34 @@ void destroy_context(struct mm_struct *mm) } #ifdef CONFIG_SMP - -static int mmu_context_cpu_notify(struct notifier_block *self, - unsigned long action, void *hcpu) +static int mmu_ctx_cpu_prepare(unsigned int cpu) { - unsigned int cpu = (unsigned int)(long)hcpu; - /* We don't touch CPU 0 map, it's allocated at aboot and kept * around forever */ if (cpu == boot_cpuid) - return NOTIFY_OK; - - switch (action) { - case CPU_UP_PREPARE: - case CPU_UP_PREPARE_FROZEN: - pr_devel("MMU: Allocating stale context map for CPU %d\n", cpu); - stale_map[cpu] = kzalloc(CTX_MAP_SIZE, GFP_KERNEL); - break; -#ifdef CONFIG_HOTPLUG_CPU - case CPU_UP_CANCELED: - case CPU_UP_CANCELED_FROZEN: - case CPU_DEAD: - case CPU_DEAD_FROZEN: - pr_devel("MMU: Freeing stale context map for CPU %d\n", cpu); - kfree(stale_map[cpu]); - stale_map[cpu] = NULL; - - /* We also clear the cpu_vm_mask bits of CPUs going away */ - clear_tasks_mm_cpumask(cpu); - break; -#endif /* CONFIG_HOTPLUG_CPU */ - } - return NOTIFY_OK; + return 0; + + pr_devel("MMU: Allocating stale context map for CPU %d\n", cpu); + stale_map[cpu] = kzalloc(CTX_MAP_SIZE, GFP_KERNEL); + return 0; } -static struct notifier_block mmu_context_cpu_nb = { - .notifier_call = mmu_context_cpu_notify, -}; +static int mmu_ctx_cpu_dead(unsigned int cpu) +{ +#ifdef CONFIG_HOTPLUG_CPU + if (cpu == boot_cpuid) + return 0; + + pr_devel("MMU: Freeing stale context map for CPU %d\n", cpu); + kfree(stale_map[cpu]); + stale_map[cpu] = NULL; + + /* We also clear the cpu_vm_mask bits of CPUs going away */ + clear_tasks_mm_cpumask(cpu); +#endif + return 0; +} #endif /* CONFIG_SMP */ @@ -469,7 +459,9 @@ void __init mmu_context_init(void) #else stale_map[boot_cpuid] = memblock_virt_alloc(CTX_MAP_SIZE, 0); - register_cpu_notifier(&mmu_context_cpu_nb); + cpuhp_setup_state_nocalls(CPUHP_POWERPC_MMU_CTX_PREPARE, + "powerpc/mmu/ctx:prepare", + mmu_ctx_cpu_prepare, mmu_ctx_cpu_dead); #endif printk(KERN_INFO diff --git a/arch/powerpc/mm/pgtable-book3s64.c b/arch/powerpc/mm/pgtable-book3s64.c index 34079302cc17..f4f437cbabf1 100644 --- a/arch/powerpc/mm/pgtable-book3s64.c +++ b/arch/powerpc/mm/pgtable-book3s64.c @@ -35,7 +35,7 @@ int pmdp_set_access_flags(struct vm_area_struct *vma, unsigned long address, #endif changed = !pmd_same(*(pmdp), entry); if (changed) { - __ptep_set_access_flags(pmdp_ptep(pmdp), pmd_pte(entry)); + __ptep_set_access_flags(vma->vm_mm, pmdp_ptep(pmdp), pmd_pte(entry)); flush_pmd_tlb_range(vma, address, address + HPAGE_PMD_SIZE); } return changed; @@ -116,3 +116,12 @@ void update_mmu_cache_pmd(struct vm_area_struct *vma, unsigned long addr, return; } #endif /* CONFIG_TRANSPARENT_HUGEPAGE */ + +/* For use by kexec */ +void mmu_cleanup_all(void) +{ + if (radix_enabled()) + radix__mmu_cleanup_all(); + else if (mmu_hash_ops.hpte_clear_all) + mmu_hash_ops.hpte_clear_all(); +} diff --git a/arch/powerpc/mm/pgtable-radix.c b/arch/powerpc/mm/pgtable-radix.c index af897d91d09f..ed7bddc456b7 100644 --- a/arch/powerpc/mm/pgtable-radix.c +++ b/arch/powerpc/mm/pgtable-radix.c @@ -294,6 +294,32 @@ found: return; } +static void update_hid_for_radix(void) +{ + unsigned long hid0; + unsigned long rb = 3UL << PPC_BITLSHIFT(53); /* IS = 3 */ + + asm volatile("ptesync": : :"memory"); + /* prs = 0, ric = 2, rs = 0, r = 1 is = 3 */ + asm volatile(PPC_TLBIE_5(%0, %4, %3, %2, %1) + : : "r"(rb), "i"(1), "i"(0), "i"(2), "r"(0) : "memory"); + /* prs = 1, ric = 2, rs = 0, r = 1 is = 3 */ + asm volatile(PPC_TLBIE_5(%0, %4, %3, %2, %1) + : : "r"(rb), "i"(1), "i"(1), "i"(2), "r"(0) : "memory"); + asm volatile("eieio; tlbsync; ptesync; isync; slbia": : :"memory"); + /* + * now switch the HID + */ + hid0 = mfspr(SPRN_HID0); + hid0 |= HID0_POWER9_RADIX; + mtspr(SPRN_HID0, hid0); + asm volatile("isync": : :"memory"); + + /* Wait for it to happen */ + while (!(mfspr(SPRN_HID0) & HID0_POWER9_RADIX)) + cpu_relax(); +} + void __init radix__early_init_mmu(void) { unsigned long lpcr; @@ -345,6 +371,8 @@ void __init radix__early_init_mmu(void) if (!firmware_has_feature(FW_FEATURE_LPAR)) { radix_init_native(); + if (cpu_has_feature(CPU_FTR_POWER9_DD1)) + update_hid_for_radix(); lpcr = mfspr(SPRN_LPCR); mtspr(SPRN_LPCR, lpcr | LPCR_UPRT | LPCR_HR); radix_init_partition_table(); @@ -368,6 +396,18 @@ void radix__early_init_mmu_secondary(void) } } +void radix__mmu_cleanup_all(void) +{ + unsigned long lpcr; + + if (!firmware_has_feature(FW_FEATURE_LPAR)) { + lpcr = mfspr(SPRN_LPCR); + mtspr(SPRN_LPCR, lpcr & ~LPCR_UPRT); + mtspr(SPRN_PTCR, 0); + radix__flush_tlb_all(); + } +} + void radix__setup_initial_memory_limit(phys_addr_t first_memblock_base, phys_addr_t first_memblock_size) { diff --git a/arch/powerpc/mm/pgtable.c b/arch/powerpc/mm/pgtable.c index 0b6fb244d0a1..911fdfb63ec1 100644 --- a/arch/powerpc/mm/pgtable.c +++ b/arch/powerpc/mm/pgtable.c @@ -224,7 +224,7 @@ int ptep_set_access_flags(struct vm_area_struct *vma, unsigned long address, if (changed) { if (!is_vm_hugetlb_page(vma)) assert_pte_locked(vma->vm_mm, address); - __ptep_set_access_flags(ptep, entry); + __ptep_set_access_flags(vma->vm_mm, ptep, entry); flush_tlb_page(vma, address); } return changed; diff --git a/arch/powerpc/mm/slb_low.S b/arch/powerpc/mm/slb_low.S index dfdb90cb4403..e2974fcd20f1 100644 --- a/arch/powerpc/mm/slb_low.S +++ b/arch/powerpc/mm/slb_low.S @@ -113,7 +113,12 @@ BEGIN_FTR_SECTION END_MMU_FTR_SECTION_IFCLR(MMU_FTR_1T_SEGMENT) b slb_finish_load_1T -0: +0: /* + * For userspace addresses, make sure this is region 0. + */ + cmpdi r9, 0 + bne 8f + /* when using slices, we extract the psize off the slice bitmaps * and then we need to get the sllp encoding off the mmu_psize_defs * array. @@ -173,11 +178,9 @@ BEGIN_FTR_SECTION END_MMU_FTR_SECTION_IFSET(MMU_FTR_1T_SEGMENT) b slb_finish_load -8: /* invalid EA */ - li r10,0 /* BAD_VSID */ - li r9,0 /* BAD_VSID */ - li r11,SLB_VSID_USER /* flags don't much matter */ - b slb_finish_load +8: /* invalid EA - return an error indication */ + crset 4*cr0+eq /* indicate failure */ + blr /* * Finish loading of an SLB entry and return diff --git a/arch/powerpc/mm/tlb-radix.c b/arch/powerpc/mm/tlb-radix.c index 48df05ef5231..0e49ec541ab5 100644 --- a/arch/powerpc/mm/tlb-radix.c +++ b/arch/powerpc/mm/tlb-radix.c @@ -400,3 +400,27 @@ void radix__flush_pmd_tlb_range(struct vm_area_struct *vma, radix__flush_tlb_range_psize(vma->vm_mm, start, end, MMU_PAGE_2M); } EXPORT_SYMBOL(radix__flush_pmd_tlb_range); + +void radix__flush_tlb_all(void) +{ + unsigned long rb,prs,r,rs; + unsigned long ric = RIC_FLUSH_ALL; + + rb = 0x3 << PPC_BITLSHIFT(53); /* IS = 3 */ + prs = 0; /* partition scoped */ + r = 1; /* raidx format */ + rs = 1 & ((1UL << 32) - 1); /* any LPID value to flush guest mappings */ + + asm volatile("ptesync": : :"memory"); + /* + * now flush guest entries by passing PRS = 1 and LPID != 0 + */ + asm volatile(PPC_TLBIE_5(%0, %4, %3, %2, %1) + : : "r"(rb), "i"(r), "i"(1), "i"(ric), "r"(rs) : "memory"); + /* + * now flush host entires by passing PRS = 0 and LPID == 0 + */ + asm volatile(PPC_TLBIE_5(%0, %4, %3, %2, %1) + : : "r"(rb), "i"(r), "i"(prs), "i"(ric), "r"(0) : "memory"); + asm volatile("eieio; tlbsync; ptesync": : :"memory"); +} diff --git a/arch/powerpc/net/bpf_jit.h b/arch/powerpc/net/bpf_jit.h index d5301b6f20d0..89f70073dec8 100644 --- a/arch/powerpc/net/bpf_jit.h +++ b/arch/powerpc/net/bpf_jit.h @@ -40,6 +40,8 @@ #define PPC_BLR() EMIT(PPC_INST_BLR) #define PPC_BLRL() EMIT(PPC_INST_BLRL) #define PPC_MTLR(r) EMIT(PPC_INST_MTLR | ___PPC_RT(r)) +#define PPC_BCTR() EMIT(PPC_INST_BCTR) +#define PPC_MTCTR(r) EMIT(PPC_INST_MTCTR | ___PPC_RT(r)) #define PPC_ADDI(d, a, i) EMIT(PPC_INST_ADDI | ___PPC_RT(d) | \ ___PPC_RA(a) | IMM_L(i)) #define PPC_MR(d, a) PPC_OR(d, a, a) diff --git a/arch/powerpc/net/bpf_jit64.h b/arch/powerpc/net/bpf_jit64.h index 5046d6f65c02..62fa7589db2b 100644 --- a/arch/powerpc/net/bpf_jit64.h +++ b/arch/powerpc/net/bpf_jit64.h @@ -16,30 +16,33 @@ /* * Stack layout: + * Ensure the top half (upto local_tmp_var) stays consistent + * with our redzone usage. * * [ prev sp ] <------------- * [ nv gpr save area ] 8*8 | + * [ tail_call_cnt ] 8 | + * [ local_tmp_var ] 8 | * fp (r31) --> [ ebpf stack space ] 512 | - * [ local/tmp var space ] 16 | * [ frame header ] 32/112 | * sp (r1) ---> [ stack pointer ] -------------- */ -/* for bpf JIT code internal usage */ -#define BPF_PPC_STACK_LOCALS 16 /* for gpr non volatile registers BPG_REG_6 to 10, plus skb cache registers */ #define BPF_PPC_STACK_SAVE (8*8) +/* for bpf JIT code internal usage */ +#define BPF_PPC_STACK_LOCALS 16 /* Ensure this is quadword aligned */ -#define BPF_PPC_STACKFRAME (STACK_FRAME_MIN_SIZE + BPF_PPC_STACK_LOCALS + \ - MAX_BPF_STACK + BPF_PPC_STACK_SAVE) +#define BPF_PPC_STACKFRAME (STACK_FRAME_MIN_SIZE + MAX_BPF_STACK + \ + BPF_PPC_STACK_LOCALS + BPF_PPC_STACK_SAVE) #ifndef __ASSEMBLY__ /* BPF register usage */ -#define SKB_HLEN_REG (MAX_BPF_REG + 0) -#define SKB_DATA_REG (MAX_BPF_REG + 1) -#define TMP_REG_1 (MAX_BPF_REG + 2) -#define TMP_REG_2 (MAX_BPF_REG + 3) +#define SKB_HLEN_REG (MAX_BPF_JIT_REG + 0) +#define SKB_DATA_REG (MAX_BPF_JIT_REG + 1) +#define TMP_REG_1 (MAX_BPF_JIT_REG + 2) +#define TMP_REG_2 (MAX_BPF_JIT_REG + 3) /* BPF to ppc register mappings */ static const int b2p[] = { @@ -59,12 +62,16 @@ static const int b2p[] = { /* frame pointer aka BPF_REG_10 */ [BPF_REG_FP] = 31, /* eBPF jit internal registers */ + [BPF_REG_AX] = 2, [SKB_HLEN_REG] = 25, [SKB_DATA_REG] = 26, [TMP_REG_1] = 9, [TMP_REG_2] = 10 }; +/* PPC NVR range -- update this if we ever use NVRs below r24 */ +#define BPF_PPC_NVR_MIN 24 + /* Assembly helpers */ #define DECLARE_LOAD_FUNC(func) u64 func(u64 r3, u64 r4); \ u64 func##_negative_offset(u64 r3, u64 r4); \ @@ -82,6 +89,7 @@ DECLARE_LOAD_FUNC(sk_load_byte); #define SEEN_FUNC 0x1000 /* might call external helpers */ #define SEEN_STACK 0x2000 /* uses BPF stack */ #define SEEN_SKB 0x4000 /* uses sk_buff */ +#define SEEN_TAILCALL 0x8000 /* uses tail calls */ struct codegen_context { /* diff --git a/arch/powerpc/net/bpf_jit_comp64.c b/arch/powerpc/net/bpf_jit_comp64.c index 6073b78516f6..0fe98a567125 100644 --- a/arch/powerpc/net/bpf_jit_comp64.c +++ b/arch/powerpc/net/bpf_jit_comp64.c @@ -17,6 +17,7 @@ #include <linux/filter.h> #include <linux/if_vlan.h> #include <asm/kprobes.h> +#include <linux/bpf.h> #include "bpf_jit64.h" @@ -58,6 +59,40 @@ static inline bool bpf_has_stack_frame(struct codegen_context *ctx) return ctx->seen & SEEN_FUNC || bpf_is_seen_register(ctx, BPF_REG_FP); } +/* + * When not setting up our own stackframe, the redzone usage is: + * + * [ prev sp ] <------------- + * [ ... ] | + * sp (r1) ---> [ stack pointer ] -------------- + * [ nv gpr save area ] 8*8 + * [ tail_call_cnt ] 8 + * [ local_tmp_var ] 8 + * [ unused red zone ] 208 bytes protected + */ +static int bpf_jit_stack_local(struct codegen_context *ctx) +{ + if (bpf_has_stack_frame(ctx)) + return STACK_FRAME_MIN_SIZE + MAX_BPF_STACK; + else + return -(BPF_PPC_STACK_SAVE + 16); +} + +static int bpf_jit_stack_tailcallcnt(struct codegen_context *ctx) +{ + return bpf_jit_stack_local(ctx) + 8; +} + +static int bpf_jit_stack_offsetof(struct codegen_context *ctx, int reg) +{ + if (reg >= BPF_PPC_NVR_MIN && reg < 32) + return (bpf_has_stack_frame(ctx) ? BPF_PPC_STACKFRAME : 0) + - (8 * (32 - reg)); + + pr_err("BPF JIT is asking about unknown registers"); + BUG(); +} + static void bpf_jit_emit_skb_loads(u32 *image, struct codegen_context *ctx) { /* @@ -73,36 +108,27 @@ static void bpf_jit_emit_skb_loads(u32 *image, struct codegen_context *ctx) PPC_BPF_LL(b2p[SKB_DATA_REG], 3, offsetof(struct sk_buff, data)); } -static void bpf_jit_emit_func_call(u32 *image, struct codegen_context *ctx, u64 func) +static void bpf_jit_build_prologue(u32 *image, struct codegen_context *ctx) { -#ifdef PPC64_ELF_ABI_v1 - /* func points to the function descriptor */ - PPC_LI64(b2p[TMP_REG_2], func); - /* Load actual entry point from function descriptor */ - PPC_BPF_LL(b2p[TMP_REG_1], b2p[TMP_REG_2], 0); - /* ... and move it to LR */ - PPC_MTLR(b2p[TMP_REG_1]); + int i; + /* - * Load TOC from function descriptor at offset 8. - * We can clobber r2 since we get called through a - * function pointer (so caller will save/restore r2) - * and since we don't use a TOC ourself. + * Initialize tail_call_cnt if we do tail calls. + * Otherwise, put in NOPs so that it can be skipped when we are + * invoked through a tail call. */ - PPC_BPF_LL(2, b2p[TMP_REG_2], 8); -#else - /* We can clobber r12 */ - PPC_FUNC_ADDR(12, func); - PPC_MTLR(12); -#endif - PPC_BLRL(); -} + if (ctx->seen & SEEN_TAILCALL) { + PPC_LI(b2p[TMP_REG_1], 0); + /* this goes in the redzone */ + PPC_BPF_STL(b2p[TMP_REG_1], 1, -(BPF_PPC_STACK_SAVE + 8)); + } else { + PPC_NOP(); + PPC_NOP(); + } -static void bpf_jit_build_prologue(u32 *image, struct codegen_context *ctx) -{ - int i; - bool new_stack_frame = bpf_has_stack_frame(ctx); +#define BPF_TAILCALL_PROLOGUE_SIZE 8 - if (new_stack_frame) { + if (bpf_has_stack_frame(ctx)) { /* * We need a stack frame, but we don't necessarily need to * save/restore LR unless we call other functions @@ -122,9 +148,7 @@ static void bpf_jit_build_prologue(u32 *image, struct codegen_context *ctx) */ for (i = BPF_REG_6; i <= BPF_REG_10; i++) if (bpf_is_seen_register(ctx, i)) - PPC_BPF_STL(b2p[i], 1, - (new_stack_frame ? BPF_PPC_STACKFRAME : 0) - - (8 * (32 - b2p[i]))); + PPC_BPF_STL(b2p[i], 1, bpf_jit_stack_offsetof(ctx, b2p[i])); /* * Save additional non-volatile regs if we cache skb @@ -132,53 +156,142 @@ static void bpf_jit_build_prologue(u32 *image, struct codegen_context *ctx) */ if (ctx->seen & SEEN_SKB) { PPC_BPF_STL(b2p[SKB_HLEN_REG], 1, - BPF_PPC_STACKFRAME - (8 * (32 - b2p[SKB_HLEN_REG]))); + bpf_jit_stack_offsetof(ctx, b2p[SKB_HLEN_REG])); PPC_BPF_STL(b2p[SKB_DATA_REG], 1, - BPF_PPC_STACKFRAME - (8 * (32 - b2p[SKB_DATA_REG]))); + bpf_jit_stack_offsetof(ctx, b2p[SKB_DATA_REG])); bpf_jit_emit_skb_loads(image, ctx); } /* Setup frame pointer to point to the bpf stack area */ if (bpf_is_seen_register(ctx, BPF_REG_FP)) PPC_ADDI(b2p[BPF_REG_FP], 1, - BPF_PPC_STACKFRAME - BPF_PPC_STACK_SAVE); + STACK_FRAME_MIN_SIZE + MAX_BPF_STACK); } -static void bpf_jit_build_epilogue(u32 *image, struct codegen_context *ctx) +static void bpf_jit_emit_common_epilogue(u32 *image, struct codegen_context *ctx) { int i; - bool new_stack_frame = bpf_has_stack_frame(ctx); - - /* Move result to r3 */ - PPC_MR(3, b2p[BPF_REG_0]); /* Restore NVRs */ for (i = BPF_REG_6; i <= BPF_REG_10; i++) if (bpf_is_seen_register(ctx, i)) - PPC_BPF_LL(b2p[i], 1, - (new_stack_frame ? BPF_PPC_STACKFRAME : 0) - - (8 * (32 - b2p[i]))); + PPC_BPF_LL(b2p[i], 1, bpf_jit_stack_offsetof(ctx, b2p[i])); /* Restore non-volatile registers used for skb cache */ if (ctx->seen & SEEN_SKB) { PPC_BPF_LL(b2p[SKB_HLEN_REG], 1, - BPF_PPC_STACKFRAME - (8 * (32 - b2p[SKB_HLEN_REG]))); + bpf_jit_stack_offsetof(ctx, b2p[SKB_HLEN_REG])); PPC_BPF_LL(b2p[SKB_DATA_REG], 1, - BPF_PPC_STACKFRAME - (8 * (32 - b2p[SKB_DATA_REG]))); + bpf_jit_stack_offsetof(ctx, b2p[SKB_DATA_REG])); } /* Tear down our stack frame */ - if (new_stack_frame) { + if (bpf_has_stack_frame(ctx)) { PPC_ADDI(1, 1, BPF_PPC_STACKFRAME); if (ctx->seen & SEEN_FUNC) { PPC_BPF_LL(0, 1, PPC_LR_STKOFF); PPC_MTLR(0); } } +} + +static void bpf_jit_build_epilogue(u32 *image, struct codegen_context *ctx) +{ + bpf_jit_emit_common_epilogue(image, ctx); + + /* Move result to r3 */ + PPC_MR(3, b2p[BPF_REG_0]); PPC_BLR(); } +static void bpf_jit_emit_func_call(u32 *image, struct codegen_context *ctx, u64 func) +{ +#ifdef PPC64_ELF_ABI_v1 + /* func points to the function descriptor */ + PPC_LI64(b2p[TMP_REG_2], func); + /* Load actual entry point from function descriptor */ + PPC_BPF_LL(b2p[TMP_REG_1], b2p[TMP_REG_2], 0); + /* ... and move it to LR */ + PPC_MTLR(b2p[TMP_REG_1]); + /* + * Load TOC from function descriptor at offset 8. + * We can clobber r2 since we get called through a + * function pointer (so caller will save/restore r2) + * and since we don't use a TOC ourself. + */ + PPC_BPF_LL(2, b2p[TMP_REG_2], 8); +#else + /* We can clobber r12 */ + PPC_FUNC_ADDR(12, func); + PPC_MTLR(12); +#endif + PPC_BLRL(); +} + +static void bpf_jit_emit_tail_call(u32 *image, struct codegen_context *ctx, u32 out) +{ + /* + * By now, the eBPF program has already setup parameters in r3, r4 and r5 + * r3/BPF_REG_1 - pointer to ctx -- passed as is to the next bpf program + * r4/BPF_REG_2 - pointer to bpf_array + * r5/BPF_REG_3 - index in bpf_array + */ + int b2p_bpf_array = b2p[BPF_REG_2]; + int b2p_index = b2p[BPF_REG_3]; + + /* + * if (index >= array->map.max_entries) + * goto out; + */ + PPC_LWZ(b2p[TMP_REG_1], b2p_bpf_array, offsetof(struct bpf_array, map.max_entries)); + PPC_CMPLW(b2p_index, b2p[TMP_REG_1]); + PPC_BCC(COND_GE, out); + + /* + * if (tail_call_cnt > MAX_TAIL_CALL_CNT) + * goto out; + */ + PPC_LD(b2p[TMP_REG_1], 1, bpf_jit_stack_tailcallcnt(ctx)); + PPC_CMPLWI(b2p[TMP_REG_1], MAX_TAIL_CALL_CNT); + PPC_BCC(COND_GT, out); + + /* + * tail_call_cnt++; + */ + PPC_ADDI(b2p[TMP_REG_1], b2p[TMP_REG_1], 1); + PPC_BPF_STL(b2p[TMP_REG_1], 1, bpf_jit_stack_tailcallcnt(ctx)); + + /* prog = array->ptrs[index]; */ + PPC_MULI(b2p[TMP_REG_1], b2p_index, 8); + PPC_ADD(b2p[TMP_REG_1], b2p[TMP_REG_1], b2p_bpf_array); + PPC_LD(b2p[TMP_REG_1], b2p[TMP_REG_1], offsetof(struct bpf_array, ptrs)); + + /* + * if (prog == NULL) + * goto out; + */ + PPC_CMPLDI(b2p[TMP_REG_1], 0); + PPC_BCC(COND_EQ, out); + + /* goto *(prog->bpf_func + prologue_size); */ + PPC_LD(b2p[TMP_REG_1], b2p[TMP_REG_1], offsetof(struct bpf_prog, bpf_func)); +#ifdef PPC64_ELF_ABI_v1 + /* skip past the function descriptor */ + PPC_ADDI(b2p[TMP_REG_1], b2p[TMP_REG_1], + FUNCTION_DESCR_SIZE + BPF_TAILCALL_PROLOGUE_SIZE); +#else + PPC_ADDI(b2p[TMP_REG_1], b2p[TMP_REG_1], BPF_TAILCALL_PROLOGUE_SIZE); +#endif + PPC_MTCTR(b2p[TMP_REG_1]); + + /* tear down stack, restore NVRs, ... */ + bpf_jit_emit_common_epilogue(image, ctx); + + PPC_BCTR(); + /* out: */ +} + /* Assemble the body code between the prologue & epilogue */ static int bpf_jit_build_body(struct bpf_prog *fp, u32 *image, struct codegen_context *ctx, @@ -200,7 +313,6 @@ static int bpf_jit_build_body(struct bpf_prog *fp, u32 *image, u64 imm64; u8 *func; u32 true_cond; - int stack_local_off; /* * addrs[] maps a BPF bytecode address into a real offset from @@ -219,9 +331,9 @@ static int bpf_jit_build_body(struct bpf_prog *fp, u32 *image, * optimization but everything else should work without * any issues. */ - if (dst_reg >= 24 && dst_reg <= 31) + if (dst_reg >= BPF_PPC_NVR_MIN && dst_reg < 32) bpf_set_seen_register(ctx, insn[i].dst_reg); - if (src_reg >= 24 && src_reg <= 31) + if (src_reg >= BPF_PPC_NVR_MIN && src_reg < 32) bpf_set_seen_register(ctx, insn[i].src_reg); switch (code) { @@ -490,25 +602,12 @@ bpf_alu32_trunc: * Way easier and faster(?) to store the value * into stack and then use ldbrx * - * First, determine where in stack we can store - * this: - * - if we have allotted a stack frame, then we - * will utilize the area set aside by - * BPF_PPC_STACK_LOCALS - * - else, we use the area beneath the NV GPR - * save area - * * ctx->seen will be reliable in pass2, but * the instructions generated will remain the * same across all passes */ - if (bpf_has_stack_frame(ctx)) - stack_local_off = STACK_FRAME_MIN_SIZE; - else - stack_local_off = -(BPF_PPC_STACK_SAVE + 8); - - PPC_STD(dst_reg, 1, stack_local_off); - PPC_ADDI(b2p[TMP_REG_1], 1, stack_local_off); + PPC_STD(dst_reg, 1, bpf_jit_stack_local(ctx)); + PPC_ADDI(b2p[TMP_REG_1], 1, bpf_jit_stack_local(ctx)); PPC_LDBRX(dst_reg, 0, b2p[TMP_REG_1]); break; } @@ -668,7 +767,7 @@ emit_clear: /* Save skb pointer if we need to re-cache skb data */ if (bpf_helper_changes_skb_data(func)) - PPC_BPF_STL(3, 1, STACK_FRAME_MIN_SIZE); + PPC_BPF_STL(3, 1, bpf_jit_stack_local(ctx)); bpf_jit_emit_func_call(image, ctx, (u64)func); @@ -678,7 +777,7 @@ emit_clear: /* refresh skb cache */ if (bpf_helper_changes_skb_data(func)) { /* reload skb pointer to r3 */ - PPC_BPF_LL(3, 1, STACK_FRAME_MIN_SIZE); + PPC_BPF_LL(3, 1, bpf_jit_stack_local(ctx)); bpf_jit_emit_skb_loads(image, ctx); } break; @@ -837,9 +936,12 @@ common_load: break; /* - * TODO: Tail call + * Tail call */ case BPF_JMP | BPF_CALL | BPF_X: + ctx->seen |= SEEN_TAILCALL; + bpf_jit_emit_tail_call(image, ctx, addrs[i + 1]); + break; default: /* @@ -872,21 +974,37 @@ struct bpf_prog *bpf_int_jit_compile(struct bpf_prog *fp) int pass; int flen; struct bpf_binary_header *bpf_hdr; + struct bpf_prog *org_fp = fp; + struct bpf_prog *tmp_fp; + bool bpf_blinded = false; if (!bpf_jit_enable) - return fp; + return org_fp; + + tmp_fp = bpf_jit_blind_constants(org_fp); + if (IS_ERR(tmp_fp)) + return org_fp; + + if (tmp_fp != org_fp) { + bpf_blinded = true; + fp = tmp_fp; + } flen = fp->len; addrs = kzalloc((flen+1) * sizeof(*addrs), GFP_KERNEL); - if (addrs == NULL) - return fp; + if (addrs == NULL) { + fp = org_fp; + goto out; + } + + memset(&cgctx, 0, sizeof(struct codegen_context)); - cgctx.idx = 0; - cgctx.seen = 0; /* Scouting faux-generate pass 0 */ - if (bpf_jit_build_body(fp, 0, &cgctx, addrs)) + if (bpf_jit_build_body(fp, 0, &cgctx, addrs)) { /* We hit something illegal or unsupported. */ + fp = org_fp; goto out; + } /* * Pretend to build prologue, given the features we've seen. This will @@ -901,8 +1019,10 @@ struct bpf_prog *bpf_int_jit_compile(struct bpf_prog *fp) bpf_hdr = bpf_jit_binary_alloc(alloclen, &image, 4, bpf_jit_fill_ill_insns); - if (!bpf_hdr) + if (!bpf_hdr) { + fp = org_fp; goto out; + } code_base = (u32 *)(image + FUNCTION_DESCR_SIZE); @@ -939,6 +1059,10 @@ struct bpf_prog *bpf_int_jit_compile(struct bpf_prog *fp) out: kfree(addrs); + + if (bpf_blinded) + bpf_jit_prog_release_other(fp, fp == org_fp ? tmp_fp : org_fp); + return fp; } diff --git a/arch/powerpc/oprofile/cell/spu_profiler.c b/arch/powerpc/oprofile/cell/spu_profiler.c index b129d007e7fe..b19265de9178 100644 --- a/arch/powerpc/oprofile/cell/spu_profiler.c +++ b/arch/powerpc/oprofile/cell/spu_profiler.c @@ -43,7 +43,7 @@ static unsigned int profiling_interval; #define SPU_PC_MASK 0xFFFF DEFINE_SPINLOCK(oprof_spu_smpl_arry_lck); -unsigned long oprof_spu_smpl_arry_lck_flags; +static unsigned long oprof_spu_smpl_arry_lck_flags; void set_spu_profiling_frequency(unsigned int freq_khz, unsigned int cycles_reset) { diff --git a/arch/powerpc/oprofile/cell/spu_task_sync.c b/arch/powerpc/oprofile/cell/spu_task_sync.c index ef2142ff7dbd..83d2b4ef7f0d 100644 --- a/arch/powerpc/oprofile/cell/spu_task_sync.c +++ b/arch/powerpc/oprofile/cell/spu_task_sync.c @@ -36,7 +36,7 @@ static DEFINE_SPINLOCK(buffer_lock); static DEFINE_SPINLOCK(cache_lock); static int num_spu_nodes; -int spu_prof_num_nodes; +static int spu_prof_num_nodes; struct spu_buffer spu_buff[MAX_NUMNODES * SPUS_PER_NODE]; struct delayed_work spu_work; @@ -88,7 +88,7 @@ static void spu_buff_add(unsigned long int value, int spu) /* This function copies the per SPU buffers to the * OProfile kernel buffer. */ -void sync_spu_buff(void) +static void sync_spu_buff(void) { int spu; unsigned long flags; diff --git a/arch/powerpc/perf/core-book3s.c b/arch/powerpc/perf/core-book3s.c index 4ed377f0f7b2..72c27b8d2cf3 100644 --- a/arch/powerpc/perf/core-book3s.c +++ b/arch/powerpc/perf/core-book3s.c @@ -2158,7 +2158,7 @@ static void perf_event_interrupt(struct pt_regs *regs) irq_exit(); } -int power_pmu_prepare_cpu(unsigned int cpu) +static int power_pmu_prepare_cpu(unsigned int cpu) { struct cpu_hw_events *cpuhw = &per_cpu(cpu_hw_events, cpu); diff --git a/arch/powerpc/perf/hv-gpci.c b/arch/powerpc/perf/hv-gpci.c index 7aa37236bb70..43fabb3cae0f 100644 --- a/arch/powerpc/perf/hv-gpci.c +++ b/arch/powerpc/perf/hv-gpci.c @@ -131,7 +131,7 @@ static const struct attribute_group *attr_groups[] = { #define HGPCI_MAX_DATA_BYTES \ (HGPCI_REQ_BUFFER_SIZE - sizeof(struct hv_get_perf_counter_info_params)) -DEFINE_PER_CPU(char, hv_gpci_reqb[HGPCI_REQ_BUFFER_SIZE]) __aligned(sizeof(uint64_t)); +static DEFINE_PER_CPU(char, hv_gpci_reqb[HGPCI_REQ_BUFFER_SIZE]) __aligned(sizeof(uint64_t)); struct hv_gpci_request_buffer { struct hv_get_perf_counter_info_params params; diff --git a/arch/powerpc/perf/power7-pmu.c b/arch/powerpc/perf/power7-pmu.c index a383c23a9070..7963658dbc22 100644 --- a/arch/powerpc/perf/power7-pmu.c +++ b/arch/powerpc/perf/power7-pmu.c @@ -416,7 +416,7 @@ static struct attribute *power7_pmu_format_attr[] = { NULL, }; -struct attribute_group power7_pmu_format_group = { +static struct attribute_group power7_pmu_format_group = { .name = "format", .attrs = power7_pmu_format_attr, }; diff --git a/arch/powerpc/perf/power8-pmu.c b/arch/powerpc/perf/power8-pmu.c index 5fde2b192fec..ab830d106ec5 100644 --- a/arch/powerpc/perf/power8-pmu.c +++ b/arch/powerpc/perf/power8-pmu.c @@ -204,7 +204,7 @@ static struct attribute *power8_pmu_format_attr[] = { NULL, }; -struct attribute_group power8_pmu_format_group = { +static struct attribute_group power8_pmu_format_group = { .name = "format", .attrs = power8_pmu_format_attr, }; diff --git a/arch/powerpc/perf/power9-pmu.c b/arch/powerpc/perf/power9-pmu.c index 788346303852..8e9a81967ff8 100644 --- a/arch/powerpc/perf/power9-pmu.c +++ b/arch/powerpc/perf/power9-pmu.c @@ -119,7 +119,7 @@ static struct attribute *power9_pmu_format_attr[] = { NULL, }; -struct attribute_group power9_pmu_format_group = { +static struct attribute_group power9_pmu_format_group = { .name = "format", .attrs = power9_pmu_format_attr, }; diff --git a/arch/powerpc/platforms/44x/warp.c b/arch/powerpc/platforms/44x/warp.c index 5ecce543103e..a886c2c22097 100644 --- a/arch/powerpc/platforms/44x/warp.c +++ b/arch/powerpc/platforms/44x/warp.c @@ -204,7 +204,7 @@ static void pika_setup_critical_temp(struct device_node *np, i2c_smbus_write_byte_data(client, 3, 0); /* Tlow */ irq = irq_of_parse_and_map(np, 0); - if (irq == NO_IRQ) { + if (!irq) { printk(KERN_ERR __FILE__ ": Unable to get ad7414 irq\n"); return; } diff --git a/arch/powerpc/platforms/512x/mpc5121_ads_cpld.c b/arch/powerpc/platforms/512x/mpc5121_ads_cpld.c index 0035d146df73..fe4d4eac7427 100644 --- a/arch/powerpc/platforms/512x/mpc5121_ads_cpld.c +++ b/arch/powerpc/platforms/512x/mpc5121_ads_cpld.c @@ -97,7 +97,7 @@ cpld_pic_get_irq(int offset, u8 ignore, u8 __iomem *statusp, status |= (ignore | mask); if (status == 0xff) - return NO_IRQ; + return 0; cpld_irq = ffz(status) + offset; @@ -110,14 +110,14 @@ static void cpld_pic_cascade(struct irq_desc *desc) irq = cpld_pic_get_irq(0, PCI_IGNORE, &cpld_regs->pci_status, &cpld_regs->pci_mask); - if (irq != NO_IRQ) { + if (irq) { generic_handle_irq(irq); return; } irq = cpld_pic_get_irq(8, MISC_IGNORE, &cpld_regs->misc_status, &cpld_regs->misc_mask); - if (irq != NO_IRQ) { + if (irq) { generic_handle_irq(irq); return; } @@ -177,7 +177,7 @@ mpc5121_ads_cpld_pic_init(void) goto end; cascade_irq = irq_of_parse_and_map(np, 0); - if (cascade_irq == NO_IRQ) + if (!cascade_irq) goto end; /* diff --git a/arch/powerpc/platforms/512x/mpc512x_lpbfifo.c b/arch/powerpc/platforms/512x/mpc512x_lpbfifo.c index 8eb82b043dd8..cec3f88f153d 100644 --- a/arch/powerpc/platforms/512x/mpc512x_lpbfifo.c +++ b/arch/powerpc/platforms/512x/mpc512x_lpbfifo.c @@ -473,7 +473,7 @@ static int mpc512x_lpbfifo_probe(struct platform_device *pdev) } lpbfifo.irq = irq_of_parse_and_map(pdev->dev.of_node, 0); - if (lpbfifo.irq == NO_IRQ) { + if (!lpbfifo.irq) { dev_err(&pdev->dev, "mapping irq failed\n"); ret = -ENODEV; goto err0; @@ -528,7 +528,6 @@ static struct platform_driver mpc512x_lpbfifo_driver = { .remove = mpc512x_lpbfifo_remove, .driver = { .name = DRV_NAME, - .owner = THIS_MODULE, .of_match_table = mpc512x_lpbfifo_match, }, }; diff --git a/arch/powerpc/platforms/52xx/mpc52xx_pic.c b/arch/powerpc/platforms/52xx/mpc52xx_pic.c index 4fe2074c88cb..fc98912f42cf 100644 --- a/arch/powerpc/platforms/52xx/mpc52xx_pic.c +++ b/arch/powerpc/platforms/52xx/mpc52xx_pic.c @@ -511,7 +511,7 @@ unsigned int mpc52xx_get_irq(void) irq |= (MPC52xx_IRQ_L1_PERP << MPC52xx_IRQ_L1_OFFSET); } } else { - return NO_IRQ; + return 0; } return irq_linear_revmap(mpc52xx_irqhost, irq); diff --git a/arch/powerpc/platforms/82xx/pq2ads-pci-pic.c b/arch/powerpc/platforms/82xx/pq2ads-pci-pic.c index 60e89fc9c753..8b065bdf7412 100644 --- a/arch/powerpc/platforms/82xx/pq2ads-pci-pic.c +++ b/arch/powerpc/platforms/82xx/pq2ads-pci-pic.c @@ -131,7 +131,7 @@ int __init pq2ads_pci_init_irq(void) } irq = irq_of_parse_and_map(np, 0); - if (irq == NO_IRQ) { + if (!irq) { printk(KERN_ERR "No interrupt in pci pic node.\n"); of_node_put(np); goto out; diff --git a/arch/powerpc/platforms/83xx/mcu_mpc8349emitx.c b/arch/powerpc/platforms/83xx/mcu_mpc8349emitx.c index dbcd0303afed..63c5ab6489c9 100644 --- a/arch/powerpc/platforms/83xx/mcu_mpc8349emitx.c +++ b/arch/powerpc/platforms/83xx/mcu_mpc8349emitx.c @@ -222,7 +222,6 @@ static const struct of_device_id mcu_of_match_table[] = { static struct i2c_driver mcu_driver = { .driver = { .name = "mcu-mpc8349emitx", - .owner = THIS_MODULE, .of_match_table = mcu_of_match_table, }, .probe = mcu_probe, diff --git a/arch/powerpc/platforms/83xx/mpc832x_rdb.c b/arch/powerpc/platforms/83xx/mpc832x_rdb.c index 2ef03e7d248c..0d6a62fc5864 100644 --- a/arch/powerpc/platforms/83xx/mpc832x_rdb.c +++ b/arch/powerpc/platforms/83xx/mpc832x_rdb.c @@ -89,7 +89,7 @@ static int __init of_fsl_spi_probe(char *type, char *compatible, u32 sysclk, goto err; ret = of_irq_to_resource(np, 0, &res[1]); - if (ret == NO_IRQ) + if (!ret) goto err; pdev = platform_device_alloc("mpc83xx_spi", i); diff --git a/arch/powerpc/platforms/83xx/suspend.c b/arch/powerpc/platforms/83xx/suspend.c index fcbea4b51a78..24717d060008 100644 --- a/arch/powerpc/platforms/83xx/suspend.c +++ b/arch/powerpc/platforms/83xx/suspend.c @@ -352,7 +352,7 @@ static int pmc_probe(struct platform_device *ofdev) return -ENODEV; pmc_irq = irq_of_parse_and_map(np, 0); - if (pmc_irq != NO_IRQ) { + if (pmc_irq) { ret = request_irq(pmc_irq, pmc_irq_handler, IRQF_SHARED, "pmc", ofdev); @@ -400,7 +400,7 @@ out_syscr: out_pmc: iounmap(pmc_regs); out: - if (pmc_irq != NO_IRQ) + if (pmc_irq) free_irq(pmc_irq, ofdev); return ret; diff --git a/arch/powerpc/platforms/85xx/common.c b/arch/powerpc/platforms/85xx/common.c index 28720a4ded7b..954e5e8b14ef 100644 --- a/arch/powerpc/platforms/85xx/common.c +++ b/arch/powerpc/platforms/85xx/common.c @@ -76,7 +76,7 @@ void __init mpc85xx_cpm2_pic_init(void) return; } irq = irq_of_parse_and_map(np, 0); - if (irq == NO_IRQ) { + if (!irq) { of_node_put(np); printk(KERN_ERR "PIC init: got no IRQ for cpm cascade\n"); return; diff --git a/arch/powerpc/platforms/85xx/mpc85xx_cds.c b/arch/powerpc/platforms/85xx/mpc85xx_cds.c index 62f171c71c4c..86f20156178e 100644 --- a/arch/powerpc/platforms/85xx/mpc85xx_cds.c +++ b/arch/powerpc/platforms/85xx/mpc85xx_cds.c @@ -196,7 +196,7 @@ static void mpc85xx_8259_cascade_handler(struct irq_desc *desc) { unsigned int cascade_irq = i8259_irq(); - if (cascade_irq != NO_IRQ) + if (cascade_irq) /* handle an interrupt from the 8259 */ generic_handle_irq(cascade_irq); @@ -247,7 +247,7 @@ static int mpc85xx_cds_8259_attach(void) } cascade_irq = irq_of_parse_and_map(cascade_node, 0); - if (cascade_irq == NO_IRQ) { + if (!cascade_irq) { printk(KERN_ERR "Failed to map cascade interrupt\n"); return -ENXIO; } diff --git a/arch/powerpc/platforms/85xx/mpc85xx_ds.c b/arch/powerpc/platforms/85xx/mpc85xx_ds.c index 6bc07d837b1c..ed69c7ee1829 100644 --- a/arch/powerpc/platforms/85xx/mpc85xx_ds.c +++ b/arch/powerpc/platforms/85xx/mpc85xx_ds.c @@ -51,7 +51,7 @@ static void mpc85xx_8259_cascade(struct irq_desc *desc) struct irq_chip *chip = irq_desc_get_chip(desc); unsigned int cascade_irq = i8259_irq(); - if (cascade_irq != NO_IRQ) { + if (cascade_irq) { generic_handle_irq(cascade_irq); } chip->irq_eoi(&desc->irq_data); @@ -96,7 +96,7 @@ void __init mpc85xx_ds_pic_init(void) } cascade_irq = irq_of_parse_and_map(cascade_node, 0); - if (cascade_irq == NO_IRQ) { + if (!cascade_irq) { printk(KERN_ERR "Failed to map cascade interrupt\n"); return; } diff --git a/arch/powerpc/platforms/85xx/socrates_fpga_pic.c b/arch/powerpc/platforms/85xx/socrates_fpga_pic.c index b02d6a5bb035..82f8490b5aa7 100644 --- a/arch/powerpc/platforms/85xx/socrates_fpga_pic.c +++ b/arch/powerpc/platforms/85xx/socrates_fpga_pic.c @@ -78,7 +78,7 @@ static inline unsigned int socrates_fpga_pic_get_irq(unsigned int irq) break; } if (i == 3) - return NO_IRQ; + return 0; raw_spin_lock_irqsave(&socrates_fpga_pic_lock, flags); cause = socrates_fpga_pic_read(FPGA_PIC_IRQMASK(i)); @@ -103,7 +103,7 @@ static void socrates_fpga_pic_cascade(struct irq_desc *desc) */ cascade_irq = socrates_fpga_pic_get_irq(irq); - if (cascade_irq != NO_IRQ) + if (cascade_irq) generic_handle_irq(cascade_irq); chip->irq_eoi(&desc->irq_data); } @@ -292,7 +292,7 @@ void socrates_fpga_pic_init(struct device_node *pic) for (i = 0; i < 3; i++) { socrates_fpga_irqs[i] = irq_of_parse_and_map(pic, i); - if (socrates_fpga_irqs[i] == NO_IRQ) { + if (!socrates_fpga_irqs[i]) { pr_warning("FPGA PIC: can't get irq%d.\n", i); continue; } diff --git a/arch/powerpc/platforms/86xx/pic.c b/arch/powerpc/platforms/86xx/pic.c index 845defa1fd19..a6c695fa4da0 100644 --- a/arch/powerpc/platforms/86xx/pic.c +++ b/arch/powerpc/platforms/86xx/pic.c @@ -22,7 +22,7 @@ static void mpc86xx_8259_cascade(struct irq_desc *desc) struct irq_chip *chip = irq_desc_get_chip(desc); unsigned int cascade_irq = i8259_irq(); - if (cascade_irq != NO_IRQ) + if (cascade_irq) generic_handle_irq(cascade_irq); chip->irq_eoi(&desc->irq_data); @@ -58,7 +58,7 @@ void __init mpc86xx_init_irq(void) } cascade_irq = irq_of_parse_and_map(cascade_node, 0); - if (cascade_irq == NO_IRQ) { + if (!cascade_irq) { printk(KERN_ERR "Failed to map cascade interrupt\n"); return; } diff --git a/arch/powerpc/platforms/8xx/m8xx_setup.c b/arch/powerpc/platforms/8xx/m8xx_setup.c index b1ab6e96cb31..f81069f79a94 100644 --- a/arch/powerpc/platforms/8xx/m8xx_setup.c +++ b/arch/powerpc/platforms/8xx/m8xx_setup.c @@ -241,6 +241,6 @@ void __init mpc8xx_pics_init(void) } irq = cpm_pic_init(); - if (irq != NO_IRQ) + if (irq) irq_set_chained_handler(irq, cpm_cascade); } diff --git a/arch/powerpc/platforms/Kconfig.cputype b/arch/powerpc/platforms/Kconfig.cputype index f32edec13fd1..ca2da30ad2ab 100644 --- a/arch/powerpc/platforms/Kconfig.cputype +++ b/arch/powerpc/platforms/Kconfig.cputype @@ -74,6 +74,7 @@ config PPC_BOOK3S_64 select HAVE_ARCH_TRANSPARENT_HUGEPAGE select ARCH_SUPPORTS_NUMA_BALANCING select IRQ_WORK + select HAVE_KERNEL_XZ config PPC_BOOK3E_64 bool "Embedded processors" @@ -86,6 +87,7 @@ endchoice choice prompt "CPU selection" depends on PPC64 + default POWER8_CPU if CPU_LITTLE_ENDIAN default GENERIC_CPU help This will create a kernel which is optimised for a particular CPU. diff --git a/arch/powerpc/platforms/cell/axon_msi.c b/arch/powerpc/platforms/cell/axon_msi.c index aed7714495c1..8b55c5f19d4c 100644 --- a/arch/powerpc/platforms/cell/axon_msi.c +++ b/arch/powerpc/platforms/cell/axon_msi.c @@ -271,7 +271,7 @@ static int axon_msi_setup_msi_irqs(struct pci_dev *dev, int nvec, int type) for_each_pci_msi_entry(entry, dev) { virq = irq_create_direct_mapping(msic->irq_domain); - if (virq == NO_IRQ) { + if (!virq) { dev_warn(&dev->dev, "axon_msi: virq allocation failed!\n"); return -1; @@ -293,7 +293,7 @@ static void axon_msi_teardown_msi_irqs(struct pci_dev *dev) dev_dbg(&dev->dev, "axon_msi: tearing down msi irqs\n"); for_each_pci_msi_entry(entry, dev) { - if (entry->irq == NO_IRQ) + if (!entry->irq) continue; irq_set_msi_desc(entry->irq, NULL); @@ -375,7 +375,7 @@ static int axon_msi_probe(struct platform_device *device) } virq = irq_of_parse_and_map(dn, 0); - if (virq == NO_IRQ) { + if (!virq) { printk(KERN_ERR "axon_msi: irq parse and map failed for %s\n", dn->full_name); goto out_free_fifo; diff --git a/arch/powerpc/platforms/cell/cbe_regs.c b/arch/powerpc/platforms/cell/cbe_regs.c index 1428d583c238..b926438d73af 100644 --- a/arch/powerpc/platforms/cell/cbe_regs.c +++ b/arch/powerpc/platforms/cell/cbe_regs.c @@ -189,7 +189,7 @@ static struct device_node *cbe_get_be_node(int cpu_id) return NULL; } -void __init cbe_fill_regs_map(struct cbe_regs_map *map) +static void __init cbe_fill_regs_map(struct cbe_regs_map *map) { if(map->be_node) { struct device_node *be, *np; diff --git a/arch/powerpc/platforms/cell/interrupt.c b/arch/powerpc/platforms/cell/interrupt.c index 9f609fc8d331..a6bbbaba14a3 100644 --- a/arch/powerpc/platforms/cell/interrupt.c +++ b/arch/powerpc/platforms/cell/interrupt.c @@ -123,7 +123,7 @@ static void iic_ioexc_cascade(struct irq_desc *desc) unsigned int cirq = irq_linear_revmap(iic_host, base | cascade); - if (cirq != NO_IRQ) + if (cirq) generic_handle_irq(cirq); } /* post-ack level interrupts */ @@ -153,10 +153,10 @@ static unsigned int iic_get_irq(void) *(unsigned long *) &pending = in_be64((u64 __iomem *) &iic->regs->pending_destr); if (!(pending.flags & CBE_IIC_IRQ_VALID)) - return NO_IRQ; + return 0; virq = irq_linear_revmap(iic_host, iic_pending_to_hwnum(pending)); - if (virq == NO_IRQ) - return NO_IRQ; + if (!virq) + return 0; iic->eoi_stack[++iic->eoi_ptr] = pending.prio; BUG_ON(iic->eoi_ptr > 15); return virq; @@ -187,18 +187,12 @@ void iic_message_pass(int cpu, int msg) out_be64(&per_cpu(cpu_iic, cpu).regs->generate, (0xf - msg) << 4); } -struct irq_domain *iic_get_irq_host(int node) -{ - return iic_host; -} -EXPORT_SYMBOL_GPL(iic_get_irq_host); - static void iic_request_ipi(int msg) { int virq; virq = irq_create_mapping(iic_host, iic_msg_to_irq(msg)); - if (virq == NO_IRQ) { + if (!virq) { printk(KERN_ERR "iic: failed to map IPI %s\n", smp_ipi_name[msg]); return; @@ -353,7 +347,7 @@ static int __init setup_iic(void) cascade |= 1 << IIC_IRQ_CLASS_SHIFT; cascade |= IIC_UNIT_IIC; cascade = irq_create_mapping(iic_host, cascade); - if (cascade == NO_IRQ) + if (!cascade) continue; /* * irq_data is a generic pointer that gets passed back diff --git a/arch/powerpc/platforms/cell/iommu.c b/arch/powerpc/platforms/cell/iommu.c index f7d1a4953ea0..7ff51f96a00e 100644 --- a/arch/powerpc/platforms/cell/iommu.c +++ b/arch/powerpc/platforms/cell/iommu.c @@ -411,7 +411,7 @@ static void cell_iommu_enable_hardware(struct cbe_iommu *iommu) virq = irq_create_mapping(NULL, IIC_IRQ_IOEX_ATI | (iommu->nid << IIC_IRQ_NODE_SHIFT)); - BUG_ON(virq == NO_IRQ); + BUG_ON(!virq); ret = request_irq(virq, ioc_interrupt, 0, iommu->name, iommu); BUG_ON(ret); @@ -651,7 +651,7 @@ static int dma_fixed_dma_supported(struct device *dev, u64 mask) static int dma_set_mask_and_switch(struct device *dev, u64 dma_mask); -struct dma_map_ops dma_iommu_fixed_ops = { +static struct dma_map_ops dma_iommu_fixed_ops = { .alloc = dma_fixed_alloc_coherent, .free = dma_fixed_free_coherent, .map_sg = dma_fixed_map_sg, diff --git a/arch/powerpc/platforms/cell/pmu.c b/arch/powerpc/platforms/cell/pmu.c index 348a27b12512..e3ad0c38f017 100644 --- a/arch/powerpc/platforms/cell/pmu.c +++ b/arch/powerpc/platforms/cell/pmu.c @@ -385,7 +385,7 @@ static int __init cbe_init_pm_irq(void) for_each_online_node(node) { irq = irq_create_mapping(NULL, IIC_IRQ_IOEX_PMI | (node << IIC_IRQ_NODE_SHIFT)); - if (irq == NO_IRQ) { + if (!irq) { printk("ERROR: Unable to allocate irq for node %d\n", node); return -EINVAL; @@ -412,7 +412,7 @@ void cbe_sync_irq(int node) IIC_IRQ_IOEX_PMI | (node << IIC_IRQ_NODE_SHIFT)); - if (irq == NO_IRQ) { + if (!irq) { printk(KERN_WARNING "ERROR, unable to get existing irq %d " \ "for node %d\n", irq, node); return; diff --git a/arch/powerpc/platforms/cell/ras.c b/arch/powerpc/platforms/cell/ras.c index 2d4f60c0119a..460ab392f0e7 100644 --- a/arch/powerpc/platforms/cell/ras.c +++ b/arch/powerpc/platforms/cell/ras.c @@ -298,7 +298,7 @@ int cbe_sysreset_hack(void) } #endif /* CONFIG_PPC_IBM_CELL_RESETBUTTON */ -int __init cbe_ptcal_init(void) +static int __init cbe_ptcal_init(void) { int ret; ptcal_start_tok = rtas_token("ibm,cbe-start-ptcal"); diff --git a/arch/powerpc/platforms/cell/spider-pic.c b/arch/powerpc/platforms/cell/spider-pic.c index d06dcac66fcb..ff924af00e78 100644 --- a/arch/powerpc/platforms/cell/spider-pic.c +++ b/arch/powerpc/platforms/cell/spider-pic.c @@ -207,11 +207,11 @@ static void spider_irq_cascade(struct irq_desc *desc) cs = in_be32(pic->regs + TIR_CS) >> 24; if (cs == SPIDER_IRQ_INVALID) - virq = NO_IRQ; + virq = 0; else virq = irq_linear_revmap(pic->host, cs); - if (virq != NO_IRQ) + if (virq) generic_handle_irq(virq); chip->irq_eoi(&desc->irq_data); @@ -245,19 +245,19 @@ static unsigned int __init spider_find_cascade_and_node(struct spider_pic *pic) /* Now do the horrible hacks */ tmp = of_get_property(of_node, "#interrupt-cells", NULL); if (tmp == NULL) - return NO_IRQ; + return 0; intsize = *tmp; imap = of_get_property(of_node, "interrupt-map", &imaplen); if (imap == NULL || imaplen < (intsize + 1)) - return NO_IRQ; + return 0; iic = of_find_node_by_phandle(imap[intsize]); if (iic == NULL) - return NO_IRQ; + return 0; imap += intsize + 1; tmp = of_get_property(iic, "#interrupt-cells", NULL); if (tmp == NULL) { of_node_put(iic); - return NO_IRQ; + return 0; } intsize = *tmp; /* Assume unit is last entry of interrupt specifier */ @@ -266,7 +266,7 @@ static unsigned int __init spider_find_cascade_and_node(struct spider_pic *pic) tmp = of_get_property(iic, "ibm,interrupt-server-ranges", NULL); if (tmp == NULL) { of_node_put(iic); - return NO_IRQ; + return 0; } /* ugly as hell but works for now */ pic->node_id = (*tmp) >> 1; @@ -281,7 +281,7 @@ static unsigned int __init spider_find_cascade_and_node(struct spider_pic *pic) (pic->node_id << IIC_IRQ_NODE_SHIFT) | (2 << IIC_IRQ_CLASS_SHIFT) | unit); - if (virq == NO_IRQ) + if (!virq) printk(KERN_ERR "spider_pic: failed to map cascade !"); return virq; } @@ -318,7 +318,7 @@ static void __init spider_init_one(struct device_node *of_node, int chip, /* Hook up the cascade interrupt to the iic and nodeid */ virq = spider_find_cascade_and_node(pic); - if (virq == NO_IRQ) + if (!virq) return; irq_set_handler_data(virq, pic); irq_set_chained_handler(virq, spider_irq_cascade); diff --git a/arch/powerpc/platforms/cell/spu_base.c b/arch/powerpc/platforms/cell/spu_base.c index bb4a8e07c229..e84d8fbc2e21 100644 --- a/arch/powerpc/platforms/cell/spu_base.c +++ b/arch/powerpc/platforms/cell/spu_base.c @@ -402,7 +402,7 @@ static int spu_request_irqs(struct spu *spu) { int ret = 0; - if (spu->irqs[0] != NO_IRQ) { + if (spu->irqs[0]) { snprintf(spu->irq_c0, sizeof (spu->irq_c0), "spe%02d.0", spu->number); ret = request_irq(spu->irqs[0], spu_irq_class_0, @@ -410,7 +410,7 @@ static int spu_request_irqs(struct spu *spu) if (ret) goto bail0; } - if (spu->irqs[1] != NO_IRQ) { + if (spu->irqs[1]) { snprintf(spu->irq_c1, sizeof (spu->irq_c1), "spe%02d.1", spu->number); ret = request_irq(spu->irqs[1], spu_irq_class_1, @@ -418,7 +418,7 @@ static int spu_request_irqs(struct spu *spu) if (ret) goto bail1; } - if (spu->irqs[2] != NO_IRQ) { + if (spu->irqs[2]) { snprintf(spu->irq_c2, sizeof (spu->irq_c2), "spe%02d.2", spu->number); ret = request_irq(spu->irqs[2], spu_irq_class_2, @@ -429,10 +429,10 @@ static int spu_request_irqs(struct spu *spu) return 0; bail2: - if (spu->irqs[1] != NO_IRQ) + if (spu->irqs[1]) free_irq(spu->irqs[1], spu); bail1: - if (spu->irqs[0] != NO_IRQ) + if (spu->irqs[0]) free_irq(spu->irqs[0], spu); bail0: return ret; @@ -440,11 +440,11 @@ bail0: static void spu_free_irqs(struct spu *spu) { - if (spu->irqs[0] != NO_IRQ) + if (spu->irqs[0]) free_irq(spu->irqs[0], spu); - if (spu->irqs[1] != NO_IRQ) + if (spu->irqs[1]) free_irq(spu->irqs[1], spu); - if (spu->irqs[2] != NO_IRQ) + if (spu->irqs[2]) free_irq(spu->irqs[2], spu); } diff --git a/arch/powerpc/platforms/cell/spu_manage.c b/arch/powerpc/platforms/cell/spu_manage.c index 21b4bfb97200..672d310dcf14 100644 --- a/arch/powerpc/platforms/cell/spu_manage.c +++ b/arch/powerpc/platforms/cell/spu_manage.c @@ -105,7 +105,10 @@ static int __init spu_map_interrupts_old(struct spu *spu, spu->irqs[2] = irq_create_mapping(NULL, IIC_IRQ_CLASS_2 | isrc); /* Right now, we only fail if class 2 failed */ - return spu->irqs[2] == NO_IRQ ? -EINVAL : 0; + if (!spu->irqs[2]) + return -EINVAL; + + return 0; } static void __iomem * __init spu_map_prop_old(struct spu *spu, @@ -191,7 +194,7 @@ static int __init spu_map_interrupts(struct spu *spu, struct device_node *np) pr_debug(" irq %d no 0x%x on %s\n", i, oirq.args[0], oirq.np->full_name); spu->irqs[i] = irq_create_of_mapping(&oirq); - if (spu->irqs[i] == NO_IRQ) { + if (!spu->irqs[i]) { pr_debug("spu_new: failed to map it !\n"); goto err; } @@ -202,7 +205,7 @@ err: pr_debug("failed to map irq %x for spu %s\n", *oirq.args, spu->name); for (; i >= 0; i--) { - if (spu->irqs[i] != NO_IRQ) + if (spu->irqs[i]) irq_dispose_mapping(spu->irqs[i]); } return ret; diff --git a/arch/powerpc/platforms/cell/spufs/inode.c b/arch/powerpc/platforms/cell/spufs/inode.c index 5be15cff758d..2975754c65ea 100644 --- a/arch/powerpc/platforms/cell/spufs/inode.c +++ b/arch/powerpc/platforms/cell/spufs/inode.c @@ -496,8 +496,10 @@ spufs_mkgang(struct inode *dir, struct dentry *dentry, umode_t mode) gang = alloc_spu_gang(); SPUFS_I(inode)->i_ctx = NULL; SPUFS_I(inode)->i_gang = gang; - if (!gang) + if (!gang) { + ret = -ENOMEM; goto out_iput; + } inode->i_op = &simple_dir_inode_operations; inode->i_fop = &simple_dir_operations; diff --git a/arch/powerpc/platforms/chrp/setup.c b/arch/powerpc/platforms/chrp/setup.c index bfb300633dfe..0ce1b45f02a8 100644 --- a/arch/powerpc/platforms/chrp/setup.c +++ b/arch/powerpc/platforms/chrp/setup.c @@ -368,7 +368,7 @@ static void chrp_8259_cascade(struct irq_desc *desc) struct irq_chip *chip = irq_desc_get_chip(desc); unsigned int cascade_irq = i8259_irq(); - if (cascade_irq != NO_IRQ) + if (cascade_irq) generic_handle_irq(cascade_irq); chip->irq_eoi(&desc->irq_data); @@ -514,7 +514,7 @@ static void __init chrp_find_8259(void) } if (chrp_mpic != NULL) { cascade_irq = irq_of_parse_and_map(pic, 0); - if (cascade_irq == NO_IRQ) + if (!cascade_irq) printk(KERN_ERR "i8259: failed to map cascade irq\n"); else irq_set_chained_handler(cascade_irq, diff --git a/arch/powerpc/platforms/embedded6xx/flipper-pic.c b/arch/powerpc/platforms/embedded6xx/flipper-pic.c index b7866e01483d..ade83829d5e8 100644 --- a/arch/powerpc/platforms/embedded6xx/flipper-pic.c +++ b/arch/powerpc/platforms/embedded6xx/flipper-pic.c @@ -181,7 +181,7 @@ unsigned int flipper_pic_get_irq(void) irq_status = in_be32(io_base + FLIPPER_ICR) & in_be32(io_base + FLIPPER_IMR); if (irq_status == 0) - return NO_IRQ; /* no more IRQs pending */ + return 0; /* no more IRQs pending */ irq = __ffs(irq_status); return irq_linear_revmap(flipper_irq_host, irq); diff --git a/arch/powerpc/platforms/embedded6xx/hlwd-pic.c b/arch/powerpc/platforms/embedded6xx/hlwd-pic.c index 9b7975706bfc..89c54de88b7a 100644 --- a/arch/powerpc/platforms/embedded6xx/hlwd-pic.c +++ b/arch/powerpc/platforms/embedded6xx/hlwd-pic.c @@ -114,7 +114,7 @@ static unsigned int __hlwd_pic_get_irq(struct irq_domain *h) irq_status = in_be32(io_base + HW_BROADWAY_ICR) & in_be32(io_base + HW_BROADWAY_IMR); if (irq_status == 0) - return NO_IRQ; /* no more IRQs pending */ + return 0; /* no more IRQs pending */ irq = __ffs(irq_status); return irq_linear_revmap(h, irq); @@ -131,7 +131,7 @@ static void hlwd_pic_irq_cascade(struct irq_desc *desc) raw_spin_unlock(&desc->lock); virq = __hlwd_pic_get_irq(irq_domain); - if (virq != NO_IRQ) + if (virq) generic_handle_irq(virq); else pr_err("spurious interrupt!\n"); diff --git a/arch/powerpc/platforms/embedded6xx/holly.c b/arch/powerpc/platforms/embedded6xx/holly.c index dafba1057a47..dfd310031549 100644 --- a/arch/powerpc/platforms/embedded6xx/holly.c +++ b/arch/powerpc/platforms/embedded6xx/holly.c @@ -26,7 +26,7 @@ #include <linux/tty.h> #include <linux/serial_core.h> #include <linux/of_platform.h> -#include <linux/module.h> +#include <linux/extable.h> #include <asm/time.h> #include <asm/machdep.h> diff --git a/arch/powerpc/platforms/embedded6xx/mpc7448_hpc2.c b/arch/powerpc/platforms/embedded6xx/mpc7448_hpc2.c index 80804f9916ee..f97bab8e37a2 100644 --- a/arch/powerpc/platforms/embedded6xx/mpc7448_hpc2.c +++ b/arch/powerpc/platforms/embedded6xx/mpc7448_hpc2.c @@ -23,7 +23,7 @@ #include <linux/pci.h> #include <linux/kdev_t.h> #include <linux/console.h> -#include <linux/module.h> +#include <linux/extable.h> #include <linux/delay.h> #include <linux/irq.h> #include <linux/seq_file.h> diff --git a/arch/powerpc/platforms/embedded6xx/mvme5100.c b/arch/powerpc/platforms/embedded6xx/mvme5100.c index ed7321d6772e..8e3590941960 100644 --- a/arch/powerpc/platforms/embedded6xx/mvme5100.c +++ b/arch/powerpc/platforms/embedded6xx/mvme5100.c @@ -47,7 +47,7 @@ static void mvme5100_8259_cascade(struct irq_desc *desc) struct irq_chip *chip = irq_desc_get_chip(desc); unsigned int cascade_irq = i8259_irq(); - if (cascade_irq != NO_IRQ) + if (cascade_irq) generic_handle_irq(cascade_irq); chip->irq_eoi(&desc->irq_data); @@ -84,7 +84,7 @@ static void __init mvme5100_pic_init(void) } cirq = irq_of_parse_and_map(cp, 0); - if (cirq == NO_IRQ) { + if (!cirq) { pr_warn("mvme5100_pic_init: no cascade interrupt?\n"); return; } diff --git a/arch/powerpc/platforms/maple/pci.c b/arch/powerpc/platforms/maple/pci.c index a2f89e6326ce..a0589aac4163 100644 --- a/arch/powerpc/platforms/maple/pci.c +++ b/arch/powerpc/platforms/maple/pci.c @@ -552,7 +552,7 @@ void maple_pci_irq_fixup(struct pci_dev *dev) pci_bus_to_host(dev->bus) == u4_pcie) { printk(KERN_DEBUG "Fixup U4 PCIe IRQ\n"); dev->irq = irq_create_mapping(NULL, 1); - if (dev->irq != NO_IRQ) + if (dev->irq) irq_set_irq_type(dev->irq, IRQ_TYPE_LEVEL_LOW); } @@ -562,7 +562,7 @@ void maple_pci_irq_fixup(struct pci_dev *dev) if (dev->vendor == PCI_VENDOR_ID_AMD && dev->device == PCI_DEVICE_ID_AMD_8111_IDE && (dev->class & 5) != 5) { - dev->irq = NO_IRQ; + dev->irq = 0; } DBG(" <- maple_pci_irq_fixup\n"); @@ -648,7 +648,7 @@ int maple_pci_get_legacy_ide_irq(struct pci_dev *pdev, int channel) return defirq; } irq = irq_of_parse_and_map(np, channel & 0x1); - if (irq == NO_IRQ) { + if (!irq) { printk("Failed to map onboard IDE interrupt for channel %d\n", channel); return defirq; diff --git a/arch/powerpc/platforms/maple/setup.c b/arch/powerpc/platforms/maple/setup.c index 3c30c7a4534d..b7f937563827 100644 --- a/arch/powerpc/platforms/maple/setup.c +++ b/arch/powerpc/platforms/maple/setup.c @@ -156,7 +156,7 @@ static void __noreturn maple_halt(void) } #ifdef CONFIG_SMP -struct smp_ops_t maple_smp_ops = { +static struct smp_ops_t maple_smp_ops = { .probe = smp_mpic_probe, .message_pass = smp_mpic_message_pass, .kick_cpu = smp_generic_kick_cpu, @@ -176,7 +176,7 @@ static void __init maple_use_rtas_reboot_and_halt_if_present(void) } } -void __init maple_setup_arch(void) +static void __init maple_setup_arch(void) { /* init to some ~sane value until calibrate_delay() runs */ loops_per_jiffy = 50000000; diff --git a/arch/powerpc/platforms/pasemi/Kconfig b/arch/powerpc/platforms/pasemi/Kconfig index 00d4b28cbb60..c7f1dbe94de7 100644 --- a/arch/powerpc/platforms/pasemi/Kconfig +++ b/arch/powerpc/platforms/pasemi/Kconfig @@ -14,6 +14,16 @@ config PPC_PASEMI menu "PA Semi PWRficient options" depends on PPC_PASEMI +config PPC_PASEMI_NEMO + bool "Nemo motherboard Support" + depends on PPC_PASEMI + select PPC_I8259 + help + This option enables support for the 'Nemo' motherboard + used in A-Eons's Amigaone X1000. This consists of some + device tree patches and workarounds for the SB600 South + Bridge that provides SATA/USB/Audio. + config PPC_PASEMI_IOMMU bool "PA Semi IOMMU support" depends on PPC_PASEMI diff --git a/arch/powerpc/platforms/pasemi/gpio_mdio.c b/arch/powerpc/platforms/pasemi/gpio_mdio.c index ddf635000c6b..c23e60959aa8 100644 --- a/arch/powerpc/platforms/pasemi/gpio_mdio.c +++ b/arch/powerpc/platforms/pasemi/gpio_mdio.c @@ -306,7 +306,7 @@ static struct platform_driver gpio_mdio_driver = }, }; -int gpio_mdio_init(void) +static int gpio_mdio_init(void) { struct device_node *np; @@ -326,7 +326,7 @@ int gpio_mdio_init(void) } module_init(gpio_mdio_init); -void gpio_mdio_exit(void) +static void gpio_mdio_exit(void) { platform_driver_unregister(&gpio_mdio_driver); if (gpio_regs) diff --git a/arch/powerpc/platforms/pasemi/iommu.c b/arch/powerpc/platforms/pasemi/iommu.c index 309d9ccccd50..e74adc4e7fd8 100644 --- a/arch/powerpc/platforms/pasemi/iommu.c +++ b/arch/powerpc/platforms/pasemi/iommu.c @@ -187,6 +187,11 @@ static void pci_dma_dev_setup_pasemi(struct pci_dev *dev) if (dev->vendor == 0x1959 && dev->device == 0xa007 && !firmware_has_feature(FW_FEATURE_LPAR)) { dev->dev.archdata.dma_ops = &dma_direct_ops; + /* + * Set the coherent DMA mask to prevent the iommu + * being used unnecessarily + */ + dev->dev.coherent_dma_mask = DMA_BIT_MASK(44); return; } #endif @@ -194,7 +199,7 @@ static void pci_dma_dev_setup_pasemi(struct pci_dev *dev) set_iommu_table_base(&dev->dev, &iommu_table_iobmap); } -int __init iob_init(struct device_node *dn) +static int __init iob_init(struct device_node *dn) { unsigned long tmp; u32 regword; diff --git a/arch/powerpc/platforms/pasemi/misc.c b/arch/powerpc/platforms/pasemi/misc.c index e0ab299763c1..8571e7bf78b6 100644 --- a/arch/powerpc/platforms/pasemi/misc.c +++ b/arch/powerpc/platforms/pasemi/misc.c @@ -76,7 +76,7 @@ static int __init pasemi_register_i2c_devices(void) } info.irq = irq_of_parse_and_map(node, 0); - if (info.irq == NO_IRQ) + if (!info.irq) info.irq = -1; if (find_i2c_driver(node, &info) < 0) diff --git a/arch/powerpc/platforms/pasemi/msi.c b/arch/powerpc/platforms/pasemi/msi.c index d9af76342d99..d9cd510c8865 100644 --- a/arch/powerpc/platforms/pasemi/msi.c +++ b/arch/powerpc/platforms/pasemi/msi.c @@ -68,7 +68,7 @@ static void pasemi_msi_teardown_msi_irqs(struct pci_dev *pdev) pr_debug("pasemi_msi_teardown_msi_irqs, pdev %p\n", pdev); for_each_pci_msi_entry(entry, pdev) { - if (entry->irq == NO_IRQ) + if (!entry->irq) continue; hwirq = virq_to_hw(entry->irq); @@ -109,7 +109,7 @@ static int pasemi_msi_setup_msi_irqs(struct pci_dev *pdev, int nvec, int type) } virq = irq_create_mapping(msi_mpic->irqhost, hwirq); - if (virq == NO_IRQ) { + if (!virq) { pr_debug("pasemi_msi: failed mapping hwirq 0x%x\n", hwirq); msi_bitmap_free_hwirqs(&msi_mpic->msi_bitmap, hwirq, diff --git a/arch/powerpc/platforms/pasemi/setup.c b/arch/powerpc/platforms/pasemi/setup.c index e86c1bd08f1f..3182400cf48f 100644 --- a/arch/powerpc/platforms/pasemi/setup.c +++ b/arch/powerpc/platforms/pasemi/setup.c @@ -59,7 +59,7 @@ struct mce_regs { static struct mce_regs mce_regs[MAX_MCE_REGS]; static int num_mce_regs; -static int nmi_virq = NO_IRQ; +static int nmi_virq = 0; static void __noreturn pas_restart(char *cmd) @@ -105,7 +105,7 @@ static void pas_take_timebase(void) arch_spin_unlock(&timebase_lock); } -struct smp_ops_t pas_smp_ops = { +static struct smp_ops_t pas_smp_ops = { .probe = smp_mpic_probe, .message_pass = smp_mpic_message_pass, .kick_cpu = smp_generic_kick_cpu, @@ -115,7 +115,7 @@ struct smp_ops_t pas_smp_ops = { }; #endif /* CONFIG_SMP */ -void __init pas_setup_arch(void) +static void __init pas_setup_arch(void) { #ifdef CONFIG_SMP /* Setup SMP callback */ @@ -264,7 +264,7 @@ static int pas_machine_check_handler(struct pt_regs *regs) srr0 = regs->nip; srr1 = regs->msr; - if (nmi_virq != NO_IRQ && mpic_get_mcirq() == nmi_virq) { + if (nmi_virq && mpic_get_mcirq() == nmi_virq) { printk(KERN_ERR "NMI delivered\n"); debugger(regs); mpic_end_irq(irq_get_irq_data(nmi_virq)); diff --git a/arch/powerpc/platforms/powermac/low_i2c.c b/arch/powerpc/platforms/powermac/low_i2c.c index 6d6f277477aa..c8c217b7dd33 100644 --- a/arch/powerpc/platforms/powermac/low_i2c.c +++ b/arch/powerpc/platforms/powermac/low_i2c.c @@ -401,7 +401,7 @@ static int kw_i2c_xfer(struct pmac_i2c_bus *bus, u8 addrdir, int subsize, { struct pmac_i2c_host_kw *host = bus->hostdata; u8 mode_reg = host->speed; - int use_irq = host->irq != NO_IRQ && !bus->polled; + int use_irq = host->irq && !bus->polled; /* Setup mode & subaddress if any */ switch(bus->mode) { @@ -535,7 +535,7 @@ static struct pmac_i2c_host_kw *__init kw_i2c_host_init(struct device_node *np) break; } host->irq = irq_of_parse_and_map(np, 0); - if (host->irq == NO_IRQ) + if (!host->irq) printk(KERN_WARNING "low_i2c: Failed to map interrupt for %s\n", np->full_name); @@ -557,7 +557,7 @@ static struct pmac_i2c_host_kw *__init kw_i2c_host_init(struct device_node *np) */ if (request_irq(host->irq, kw_i2c_irq, IRQF_NO_SUSPEND, "keywest i2c", host)) - host->irq = NO_IRQ; + host->irq = 0; printk(KERN_INFO "KeyWest i2c @0x%08x irq %d %s\n", *addrp, host->irq, np->full_name); diff --git a/arch/powerpc/platforms/powermac/pfunc_base.c b/arch/powerpc/platforms/powermac/pfunc_base.c index e49d07f3d542..459138ed4571 100644 --- a/arch/powerpc/platforms/powermac/pfunc_base.c +++ b/arch/powerpc/platforms/powermac/pfunc_base.c @@ -26,7 +26,7 @@ static irqreturn_t macio_gpio_irq(int irq, void *data) static int macio_do_gpio_irq_enable(struct pmf_function *func) { unsigned int irq = irq_of_parse_and_map(func->node, 0); - if (irq == NO_IRQ) + if (!irq) return -EINVAL; return request_irq(irq, macio_gpio_irq, 0, func->node->name, func); } @@ -34,7 +34,7 @@ static int macio_do_gpio_irq_enable(struct pmf_function *func) static int macio_do_gpio_irq_disable(struct pmf_function *func) { unsigned int irq = irq_of_parse_and_map(func->node, 0); - if (irq == NO_IRQ) + if (!irq) return -EINVAL; free_irq(irq, func); return 0; diff --git a/arch/powerpc/platforms/powermac/pfunc_core.c b/arch/powerpc/platforms/powermac/pfunc_core.c index 43075081721f..695e8c4d4224 100644 --- a/arch/powerpc/platforms/powermac/pfunc_core.c +++ b/arch/powerpc/platforms/powermac/pfunc_core.c @@ -804,7 +804,7 @@ void pmf_unregister_driver(struct device_node *np) } EXPORT_SYMBOL_GPL(pmf_unregister_driver); -struct pmf_function *__pmf_find_function(struct device_node *target, +static struct pmf_function *__pmf_find_function(struct device_node *target, const char *name, u32 flags) { struct device_node *actor = of_node_get(target); diff --git a/arch/powerpc/platforms/powermac/pic.c b/arch/powerpc/platforms/powermac/pic.c index 981546345033..f5f9ad7c3398 100644 --- a/arch/powerpc/platforms/powermac/pic.c +++ b/arch/powerpc/platforms/powermac/pic.c @@ -251,7 +251,7 @@ static unsigned int pmac_pic_get_irq(void) } raw_spin_unlock_irqrestore(&pmac_pic_lock, flags); if (unlikely(irq < 0)) - return NO_IRQ; + return 0; return irq_linear_revmap(pmac_pic_host, irq); } @@ -389,7 +389,7 @@ static void __init pmac_pic_probe_oldstyle(void) out_le32(&pmac_irq_hw[i]->enable, 0); /* Hookup cascade irq */ - if (slave && pmac_irq_cascade != NO_IRQ) + if (slave && pmac_irq_cascade) setup_irq(pmac_irq_cascade, &gatwick_cascade_action); printk(KERN_INFO "irq: System has %d possible interrupts\n", max_irqs); @@ -444,7 +444,7 @@ static void __init pmac_pic_setup_mpic_nmi(struct mpic *mpic) pswitch = of_find_node_by_name(NULL, "programmer-switch"); if (pswitch) { nmi_irq = irq_of_parse_and_map(pswitch, 0); - if (nmi_irq != NO_IRQ) { + if (nmi_irq) { mpic_irq_set_priority(nmi_irq, 9); setup_irq(nmi_irq, &xmon_action); } diff --git a/arch/powerpc/platforms/powermac/smp.c b/arch/powerpc/platforms/powermac/smp.c index 834868b9fdc9..c9eb7d6540ea 100644 --- a/arch/powerpc/platforms/powermac/smp.c +++ b/arch/powerpc/platforms/powermac/smp.c @@ -852,37 +852,33 @@ static void smp_core99_setup_cpu(int cpu_nr) #ifdef CONFIG_PPC64 #ifdef CONFIG_HOTPLUG_CPU -static int smp_core99_cpu_notify(struct notifier_block *self, - unsigned long action, void *hcpu) +static unsigned int smp_core99_host_open; + +static int smp_core99_cpu_prepare(unsigned int cpu) { int rc; - switch(action & ~CPU_TASKS_FROZEN) { - case CPU_UP_PREPARE: - /* Open i2c bus if it was used for tb sync */ - if (pmac_tb_clock_chip_host) { - rc = pmac_i2c_open(pmac_tb_clock_chip_host, 1); - if (rc) { - pr_err("Failed to open i2c bus for time sync\n"); - return notifier_from_errno(rc); - } + /* Open i2c bus if it was used for tb sync */ + if (pmac_tb_clock_chip_host && !smp_core99_host_open) { + rc = pmac_i2c_open(pmac_tb_clock_chip_host, 1); + if (rc) { + pr_err("Failed to open i2c bus for time sync\n"); + return notifier_from_errno(rc); } - break; - case CPU_ONLINE: - case CPU_UP_CANCELED: - /* Close i2c bus if it was used for tb sync */ - if (pmac_tb_clock_chip_host) - pmac_i2c_close(pmac_tb_clock_chip_host); - break; - default: - break; + smp_core99_host_open = 1; } - return NOTIFY_OK; + return 0; } -static struct notifier_block smp_core99_cpu_nb = { - .notifier_call = smp_core99_cpu_notify, -}; +static int smp_core99_cpu_online(unsigned int cpu) +{ + /* Close i2c bus if it was used for tb sync */ + if (pmac_tb_clock_chip_host && smp_core99_host_open) { + pmac_i2c_close(pmac_tb_clock_chip_host); + smp_core99_host_open = 0; + } + return 0; +} #endif /* CONFIG_HOTPLUG_CPU */ static void __init smp_core99_bringup_done(void) @@ -902,7 +898,11 @@ static void __init smp_core99_bringup_done(void) g5_phy_disable_cpu1(); } #ifdef CONFIG_HOTPLUG_CPU - register_cpu_notifier(&smp_core99_cpu_nb); + cpuhp_setup_state_nocalls(CPUHP_POWERPC_PMAC_PREPARE, + "powerpc/pmac:prepare", smp_core99_cpu_prepare, + NULL); + cpuhp_setup_state_nocalls(CPUHP_AP_ONLINE_DYN, "powerpc/pmac:online", + smp_core99_cpu_online, NULL); #endif if (ppc_md.progress) @@ -979,7 +979,7 @@ static void pmac_cpu_die(void) #endif /* CONFIG_HOTPLUG_CPU */ /* Core99 Macs (dual G4s and G5s) */ -struct smp_ops_t core99_smp_ops = { +static struct smp_ops_t core99_smp_ops = { .message_pass = smp_mpic_message_pass, .probe = smp_core99_probe, #ifdef CONFIG_PPC64 diff --git a/arch/powerpc/platforms/powernv/eeh-powernv.c b/arch/powerpc/platforms/powernv/eeh-powernv.c index 86544ea85dc3..2354ea51e871 100644 --- a/arch/powerpc/platforms/powernv/eeh-powernv.c +++ b/arch/powerpc/platforms/powernv/eeh-powernv.c @@ -763,7 +763,8 @@ int pnv_eeh_phb_reset(struct pci_controller *hose, int option) * reset followed by hot reset on root bus. So we also * need the PCI bus settlement delay. */ - rc = pnv_eeh_poll(phb->opal_id); + if (rc > 0) + rc = pnv_eeh_poll(phb->opal_id); if (option == EEH_RESET_DEACTIVATE) { if (system_state < SYSTEM_RUNNING) udelay(1000 * EEH_PE_RST_SETTLE_TIME); @@ -806,7 +807,8 @@ static int pnv_eeh_root_reset(struct pci_controller *hose, int option) goto out; /* Poll state of the PHB until the request is done */ - rc = pnv_eeh_poll(phb->opal_id); + if (rc > 0) + rc = pnv_eeh_poll(phb->opal_id); if (option == EEH_RESET_DEACTIVATE) msleep(EEH_PE_RST_SETTLE_TIME); out: @@ -1090,10 +1092,16 @@ static int pnv_eeh_reset(struct eeh_pe *pe, int option) } } - bus = eeh_pe_bus_get(pe); if (pe->type & EEH_PE_VF) return pnv_eeh_reset_vf_pe(pe, option); + bus = eeh_pe_bus_get(pe); + if (!bus) { + pr_err("%s: Cannot find PCI bus for PHB#%d-PE#%x\n", + __func__, pe->phb->global_number, pe->addr); + return -EIO; + } + if (pci_is_root_bus(bus) || pci_is_root_bus(bus->parent)) return pnv_eeh_root_reset(hose, option); @@ -1306,7 +1314,7 @@ static void pnv_eeh_get_and_dump_hub_diag(struct pci_controller *hose) return; } - switch (data->type) { + switch (be16_to_cpu(data->type)) { case OPAL_P7IOC_DIAG_TYPE_RGC: pr_info("P7IOC diag-data for RGC\n\n"); pnv_eeh_dump_hub_diag_common(data); @@ -1538,7 +1546,7 @@ static int pnv_eeh_next_error(struct eeh_pe **pe) /* Try best to clear it */ opal_pci_eeh_freeze_clear(phb->opal_id, - frozen_pe_no, + be64_to_cpu(frozen_pe_no), OPAL_EEH_ACTION_CLEAR_FREEZE_ALL); ret = EEH_NEXT_ERR_NONE; } else if ((*pe)->state & EEH_PE_ISOLATED || diff --git a/arch/powerpc/platforms/powernv/npu-dma.c b/arch/powerpc/platforms/powernv/npu-dma.c index 00e1a0195c78..aec85e778028 100644 --- a/arch/powerpc/platforms/powernv/npu-dma.c +++ b/arch/powerpc/platforms/powernv/npu-dma.c @@ -115,7 +115,7 @@ static u64 dma_npu_get_required_mask(struct device *dev) return 0; } -struct dma_map_ops dma_npu_ops = { +static struct dma_map_ops dma_npu_ops = { .map_page = dma_npu_map_page, .map_sg = dma_npu_map_sg, .alloc = dma_npu_alloc, diff --git a/arch/powerpc/platforms/powernv/opal-dump.c b/arch/powerpc/platforms/powernv/opal-dump.c index 2ee96431f736..4c827826c05e 100644 --- a/arch/powerpc/platforms/powernv/opal-dump.c +++ b/arch/powerpc/platforms/powernv/opal-dump.c @@ -370,6 +370,7 @@ static irqreturn_t process_dump(int irq, void *data) uint32_t dump_id, dump_size, dump_type; struct dump_obj *dump; char name[22]; + struct kobject *kobj; rc = dump_read_info(&dump_id, &dump_size, &dump_type); if (rc != OPAL_SUCCESS) @@ -381,8 +382,12 @@ static irqreturn_t process_dump(int irq, void *data) * that gracefully and not create two conflicting * entries. */ - if (kset_find_obj(dump_kset, name)) + kobj = kset_find_obj(dump_kset, name); + if (kobj) { + /* Drop reference added by kset_find_obj() */ + kobject_put(kobj); return 0; + } dump = create_dump_obj(dump_id, dump_size, dump_type); if (!dump) diff --git a/arch/powerpc/platforms/powernv/opal-elog.c b/arch/powerpc/platforms/powernv/opal-elog.c index 37f959bf392e..f2344cbd2f46 100644 --- a/arch/powerpc/platforms/powernv/opal-elog.c +++ b/arch/powerpc/platforms/powernv/opal-elog.c @@ -247,6 +247,7 @@ static irqreturn_t elog_event(int irq, void *data) uint64_t elog_type; int rc; char name[2+16+1]; + struct kobject *kobj; rc = opal_get_elog_size(&id, &size, &type); if (rc != OPAL_SUCCESS) { @@ -269,8 +270,12 @@ static irqreturn_t elog_event(int irq, void *data) * that gracefully and not create two conflicting * entries. */ - if (kset_find_obj(elog_kset, name)) + kobj = kset_find_obj(elog_kset, name); + if (kobj) { + /* Drop reference added by kset_find_obj() */ + kobject_put(kobj); return IRQ_HANDLED; + } create_elog_obj(log_id, elog_size, elog_type); diff --git a/arch/powerpc/platforms/powernv/opal-irqchip.c b/arch/powerpc/platforms/powernv/opal-irqchip.c index e505223b4ec5..998316bf2dad 100644 --- a/arch/powerpc/platforms/powernv/opal-irqchip.c +++ b/arch/powerpc/platforms/powernv/opal-irqchip.c @@ -222,13 +222,14 @@ int __init opal_event_init(void) /* Get hardware and virtual IRQ */ irq = be32_to_cpup(irqs); virq = irq_create_mapping(NULL, irq); - if (virq == NO_IRQ) { + if (!virq) { pr_warn("Failed to map irq 0x%x\n", irq); continue; } /* Install interrupt handler */ - rc = request_irq(virq, opal_interrupt, 0, "opal", NULL); + rc = request_irq(virq, opal_interrupt, IRQF_TRIGGER_LOW, + "opal", NULL); if (rc) { irq_dispose_mapping(virq); pr_warn("Error %d requesting irq %d (0x%x)\n", @@ -259,7 +260,7 @@ machine_arch_initcall(powernv, opal_event_init); int opal_event_request(unsigned int opal_event_nr) { if (WARN_ON_ONCE(!opal_event_irqchip.domain)) - return NO_IRQ; + return 0; return irq_create_mapping(opal_event_irqchip.domain, opal_event_nr); } diff --git a/arch/powerpc/platforms/powernv/opal-wrappers.S b/arch/powerpc/platforms/powernv/opal-wrappers.S index 3d29d40eb0e9..44d2d842cee7 100644 --- a/arch/powerpc/platforms/powernv/opal-wrappers.S +++ b/arch/powerpc/platforms/powernv/opal-wrappers.S @@ -208,6 +208,7 @@ OPAL_CALL(opal_pci_config_write_byte, OPAL_PCI_CONFIG_WRITE_BYTE); OPAL_CALL(opal_pci_config_write_half_word, OPAL_PCI_CONFIG_WRITE_HALF_WORD); OPAL_CALL(opal_pci_config_write_word, OPAL_PCI_CONFIG_WRITE_WORD); OPAL_CALL(opal_set_xive, OPAL_SET_XIVE); +OPAL_CALL_REAL(opal_rm_set_xive, OPAL_SET_XIVE); OPAL_CALL(opal_get_xive, OPAL_GET_XIVE); OPAL_CALL(opal_register_exception_handler, OPAL_REGISTER_OPAL_EXCEPTION_HANDLER); OPAL_CALL(opal_pci_eeh_freeze_status, OPAL_PCI_EEH_FREEZE_STATUS); diff --git a/arch/powerpc/platforms/powernv/opal.c b/arch/powerpc/platforms/powernv/opal.c index 8b4fc68cebcb..6c9a65b52e63 100644 --- a/arch/powerpc/platforms/powernv/opal.c +++ b/arch/powerpc/platforms/powernv/opal.c @@ -399,6 +399,7 @@ static int opal_recover_mce(struct pt_regs *regs, if (!(regs->msr & MSR_RI)) { /* If MSR_RI isn't set, we cannot recover */ + pr_err("Machine check interrupt unrecoverable: MSR(RI=0)\n"); recovered = 0; } else if (evt->disposition == MCE_DISPOSITION_RECOVERED) { /* Platform corrected itself */ diff --git a/arch/powerpc/platforms/powernv/pci-cxl.c b/arch/powerpc/platforms/powernv/pci-cxl.c index 1349a099c74c..94498a04558b 100644 --- a/arch/powerpc/platforms/powernv/pci-cxl.c +++ b/arch/powerpc/platforms/powernv/pci-cxl.c @@ -344,7 +344,7 @@ int pnv_cxl_cx4_setup_msi_irqs(struct pci_dev *pdev, int nvec, int type) return (hwirq ? hwirq : -ENOMEM); virq = irq_create_mapping(NULL, hwirq); - if (virq == NO_IRQ) { + if (!virq) { pr_warn("%s: Failed to map cxl mode MSI to linux irq\n", pci_name(pdev)); return -ENOMEM; @@ -374,7 +374,7 @@ void pnv_cxl_cx4_teardown_msi_irqs(struct pci_dev *pdev) return; for_each_pci_msi_entry(entry, pdev) { - if (entry->irq == NO_IRQ) + if (!entry->irq) continue; hwirq = virq_to_hw(entry->irq); irq_set_msi_desc(entry->irq, NULL); diff --git a/arch/powerpc/platforms/powernv/pci-ioda.c b/arch/powerpc/platforms/powernv/pci-ioda.c index 6b9528307f62..d4b33dd2d9e7 100644 --- a/arch/powerpc/platforms/powernv/pci-ioda.c +++ b/arch/powerpc/platforms/powernv/pci-ioda.c @@ -111,17 +111,44 @@ static int __init iommu_setup(char *str) } early_param("iommu", iommu_setup); -static inline bool pnv_pci_is_mem_pref_64(unsigned long flags) +static inline bool pnv_pci_is_m64(struct pnv_phb *phb, struct resource *r) { - return ((flags & (IORESOURCE_MEM_64 | IORESOURCE_PREFETCH)) == - (IORESOURCE_MEM_64 | IORESOURCE_PREFETCH)); + /* + * WARNING: We cannot rely on the resource flags. The Linux PCI + * allocation code sometimes decides to put a 64-bit prefetchable + * BAR in the 32-bit window, so we have to compare the addresses. + * + * For simplicity we only test resource start. + */ + return (r->start >= phb->ioda.m64_base && + r->start < (phb->ioda.m64_base + phb->ioda.m64_size)); +} + +static inline bool pnv_pci_is_m64_flags(unsigned long resource_flags) +{ + unsigned long flags = (IORESOURCE_MEM_64 | IORESOURCE_PREFETCH); + + return (resource_flags & flags) == flags; } static struct pnv_ioda_pe *pnv_ioda_init_pe(struct pnv_phb *phb, int pe_no) { + s64 rc; + phb->ioda.pe_array[pe_no].phb = phb; phb->ioda.pe_array[pe_no].pe_number = pe_no; + /* + * Clear the PE frozen state as it might be put into frozen state + * in the last PCI remove path. It's not harmful to do so when the + * PE is already in unfrozen state. + */ + rc = opal_pci_eeh_freeze_clear(phb->opal_id, pe_no, + OPAL_EEH_ACTION_CLEAR_FREEZE_ALL); + if (rc != OPAL_SUCCESS) + pr_warn("%s: Error %lld unfreezing PHB#%d-PE#%d\n", + __func__, rc, phb->hose->global_number, pe_no); + return &phb->ioda.pe_array[pe_no]; } @@ -142,7 +169,7 @@ static void pnv_ioda_reserve_pe(struct pnv_phb *phb, int pe_no) static struct pnv_ioda_pe *pnv_ioda_alloc_pe(struct pnv_phb *phb) { - unsigned long pe = phb->ioda.total_pe_num - 1; + long pe; for (pe = phb->ioda.total_pe_num - 1; pe >= 0; pe--) { if (!test_and_set_bit(pe, phb->ioda.pe_alloc)) @@ -155,11 +182,12 @@ static struct pnv_ioda_pe *pnv_ioda_alloc_pe(struct pnv_phb *phb) static void pnv_ioda_free_pe(struct pnv_ioda_pe *pe) { struct pnv_phb *phb = pe->phb; + unsigned int pe_num = pe->pe_number; WARN_ON(pe->pdev); memset(pe, 0, sizeof(struct pnv_ioda_pe)); - clear_bit(pe->pe_number, phb->ioda.pe_alloc); + clear_bit(pe_num, phb->ioda.pe_alloc); } /* The default M64 BAR is shared by all PEs */ @@ -229,7 +257,7 @@ static void pnv_ioda_reserve_dev_m64_pe(struct pci_dev *pdev, sgsz = phb->ioda.m64_segsize; for (i = 0; i <= PCI_ROM_RESOURCE; i++) { r = &pdev->resource[i]; - if (!r->parent || !pnv_pci_is_mem_pref_64(r->flags)) + if (!r->parent || !pnv_pci_is_m64(phb, r)) continue; start = _ALIGN_DOWN(r->start - base, sgsz); @@ -402,7 +430,7 @@ static void __init pnv_ioda_parse_m64_window(struct pnv_phb *phb) struct device_node *dn = hose->dn; struct resource *res; u32 m64_range[2], i; - const u32 *r; + const __be32 *r; u64 pci_addr; if (phb->type != PNV_PHB_IODA1 && phb->type != PNV_PHB_IODA2) { @@ -1877,7 +1905,7 @@ static void pnv_pci_phb3_tce_invalidate(struct pnv_ioda_pe *pe, bool rm, unsigned shift, unsigned long index, unsigned long npages) { - __be64 __iomem *invalidate = pnv_ioda_get_inval_reg(pe->phb, false); + __be64 __iomem *invalidate = pnv_ioda_get_inval_reg(pe->phb, rm); unsigned long start, end, inc; /* We'll invalidate DMA address in PE scope */ @@ -2209,7 +2237,7 @@ static long pnv_pci_ioda2_set_window(struct iommu_table_group *table_group, pnv_pci_link_table_and_group(phb->hose->node, num, tbl, &pe->table_group); - pnv_pci_phb3_tce_invalidate_pe(pe); + pnv_pci_ioda2_tce_invalidate_pe(pe); return 0; } @@ -2347,7 +2375,7 @@ static long pnv_pci_ioda2_unset_window(struct iommu_table_group *table_group, if (ret) pe_warn(pe, "Unmapping failed, ret = %ld\n", ret); else - pnv_pci_phb3_tce_invalidate_pe(pe); + pnv_pci_ioda2_tce_invalidate_pe(pe); pnv_pci_unlink_table_and_group(table_group->tables[num], table_group); @@ -2703,15 +2731,21 @@ static void pnv_pci_ioda2_setup_dma_pe(struct pnv_phb *phb, } #ifdef CONFIG_PCI_MSI -static void pnv_ioda2_msi_eoi(struct irq_data *d) +int64_t pnv_opal_pci_msi_eoi(struct irq_chip *chip, unsigned int hw_irq) { - unsigned int hw_irq = (unsigned int)irqd_to_hwirq(d); - struct irq_chip *chip = irq_data_get_irq_chip(d); struct pnv_phb *phb = container_of(chip, struct pnv_phb, ioda.irq_chip); + + return opal_pci_msi_eoi(phb->opal_id, hw_irq); +} + +static void pnv_ioda2_msi_eoi(struct irq_data *d) +{ int64_t rc; + unsigned int hw_irq = (unsigned int)irqd_to_hwirq(d); + struct irq_chip *chip = irq_data_get_irq_chip(d); - rc = opal_pci_msi_eoi(phb->opal_id, hw_irq); + rc = pnv_opal_pci_msi_eoi(chip, hw_irq); WARN_ON_ONCE(rc); icp_native_eoi(d); @@ -2741,6 +2775,16 @@ void pnv_set_msi_irq_chip(struct pnv_phb *phb, unsigned int virq) irq_set_chip(virq, &phb->ioda.irq_chip); } +/* + * Returns true iff chip is something that we could call + * pnv_opal_pci_msi_eoi for. + */ +bool is_pnv_opal_msi(struct irq_chip *chip) +{ + return chip->irq_eoi == pnv_ioda2_msi_eoi; +} +EXPORT_SYMBOL_GPL(is_pnv_opal_msi); + static int pnv_pci_ioda_msi_setup(struct pnv_phb *phb, struct pci_dev *dev, unsigned int hwirq, unsigned int virq, unsigned int is_64, struct msi_msg *msg) @@ -2863,7 +2907,7 @@ static void pnv_pci_ioda_fixup_iov_resources(struct pci_dev *pdev) res = &pdev->resource[i + PCI_IOV_RESOURCES]; if (!res->flags || res->parent) continue; - if (!pnv_pci_is_mem_pref_64(res->flags)) { + if (!pnv_pci_is_m64_flags(res->flags)) { dev_warn(&pdev->dev, "Don't support SR-IOV with" " non M64 VF BAR%d: %pR. \n", i, res); @@ -2958,7 +3002,7 @@ static void pnv_ioda_setup_pe_res(struct pnv_ioda_pe *pe, index++; } } else if ((res->flags & IORESOURCE_MEM) && - !pnv_pci_is_mem_pref_64(res->flags)) { + !pnv_pci_is_m64(phb, res)) { region.start = res->start - phb->hose->mem_offset[0] - phb->ioda.m32_pci_base; @@ -3018,6 +3062,38 @@ static void pnv_ioda_setup_pe_seg(struct pnv_ioda_pe *pe) } } +#ifdef CONFIG_DEBUG_FS +static int pnv_pci_diag_data_set(void *data, u64 val) +{ + struct pci_controller *hose; + struct pnv_phb *phb; + s64 ret; + + if (val != 1ULL) + return -EINVAL; + + hose = (struct pci_controller *)data; + if (!hose || !hose->private_data) + return -ENODEV; + + phb = hose->private_data; + + /* Retrieve the diag data from firmware */ + ret = opal_pci_get_phb_diag_data2(phb->opal_id, phb->diag.blob, + PNV_PCI_DIAG_BUF_SIZE); + if (ret != OPAL_SUCCESS) + return -EIO; + + /* Print the diag data to the kernel log */ + pnv_pci_dump_phb_diag_data(phb->hose, phb->diag.blob); + return 0; +} + +DEFINE_SIMPLE_ATTRIBUTE(pnv_pci_diag_data_fops, NULL, + pnv_pci_diag_data_set, "%llu\n"); + +#endif /* CONFIG_DEBUG_FS */ + static void pnv_pci_ioda_create_dbgfs(void) { #ifdef CONFIG_DEBUG_FS @@ -3033,9 +3109,14 @@ static void pnv_pci_ioda_create_dbgfs(void) sprintf(name, "PCI%04x", hose->global_number); phb->dbgfs = debugfs_create_dir(name, powerpc_debugfs_root); - if (!phb->dbgfs) + if (!phb->dbgfs) { pr_warning("%s: Error on creating debugfs on PHB#%x\n", __func__, hose->global_number); + continue; + } + + debugfs_create_file("dump_diag_regs", 0200, phb->dbgfs, hose, + &pnv_pci_diag_data_fops); } #endif /* CONFIG_DEBUG_FS */ } @@ -3083,9 +3164,12 @@ static resource_size_t pnv_pci_window_alignment(struct pci_bus *bus, bridge = bridge->bus->self; } - /* We fail back to M32 if M64 isn't supported */ - if (phb->ioda.m64_segsize && - pnv_pci_is_mem_pref_64(type)) + /* + * We fall back to M32 if M64 isn't supported. We enforce the M64 + * alignment for any 64-bit resource, PCIe doesn't care and + * bridges only do 64-bit prefetchable anyway. + */ + if (phb->ioda.m64_segsize && pnv_pci_is_m64_flags(type)) return phb->ioda.m64_segsize; if (type & IORESOURCE_MEM) return phb->ioda.m32_segsize; @@ -3125,7 +3209,7 @@ static void pnv_pci_fixup_bridge_resources(struct pci_bus *bus, w = NULL; if (r->flags & type & IORESOURCE_IO) w = &hose->io_resource; - else if (pnv_pci_is_mem_pref_64(r->flags) && + else if (pnv_pci_is_m64(phb, r) && (type & IORESOURCE_PREFETCH) && phb->ioda.m64_segsize) w = &hose->mem_resources[1]; @@ -3392,12 +3476,6 @@ static void pnv_ioda_release_pe(struct pnv_ioda_pe *pe) struct pnv_phb *phb = pe->phb; struct pnv_ioda_pe *slave, *tmp; - /* Release slave PEs in compound PE */ - if (pe->flags & PNV_IODA_PE_MASTER) { - list_for_each_entry_safe(slave, tmp, &pe->slaves, list) - pnv_ioda_release_pe(slave); - } - list_del(&pe->list); switch (phb->type) { case PNV_PHB_IODA1: @@ -3412,7 +3490,26 @@ static void pnv_ioda_release_pe(struct pnv_ioda_pe *pe) pnv_ioda_release_pe_seg(pe); pnv_ioda_deconfigure_pe(pe->phb, pe); - pnv_ioda_free_pe(pe); + + /* Release slave PEs in the compound PE */ + if (pe->flags & PNV_IODA_PE_MASTER) { + list_for_each_entry_safe(slave, tmp, &pe->slaves, list) { + list_del(&slave->list); + pnv_ioda_free_pe(slave); + } + } + + /* + * The PE for root bus can be removed because of hotplug in EEH + * recovery for fenced PHB error. We need to mark the PE dead so + * that it can be populated again in PCI hot add path. The PE + * shouldn't be destroyed as it's the global reserved resource. + */ + if (phb->ioda.root_pe_populated && + phb->ioda.root_pe_idx == pe->pe_number) + phb->ioda.root_pe_populated = false; + else + pnv_ioda_free_pe(pe); } static void pnv_pci_release_device(struct pci_dev *pdev) @@ -3428,7 +3525,17 @@ static void pnv_pci_release_device(struct pci_dev *pdev) if (!pdn || pdn->pe_number == IODA_INVALID_PE) return; + /* + * PCI hotplug can happen as part of EEH error recovery. The @pdn + * isn't removed and added afterwards in this scenario. We should + * set the PE number in @pdn to an invalid one. Otherwise, the PE's + * device count is decreased on removing devices while failing to + * be increased on adding devices. It leads to unbalanced PE's device + * count and eventually make normal PCI hotplug path broken. + */ pe = &phb->ioda.pe_array[pdn->pe_number]; + pdn->pe_number = IODA_INVALID_PE; + WARN_ON(--pe->device_count < 0); if (pe->device_count == 0) pnv_ioda_release_pe(pe); @@ -3722,10 +3829,11 @@ static void __init pnv_pci_init_ioda_phb(struct device_node *np, if (rc) pr_warning(" OPAL Error %ld performing IODA table reset !\n", rc); - /* If we're running in kdump kerenl, the previous kerenl never + /* + * If we're running in kdump kernel, the previous kernel never * shutdown PCI devices correctly. We already got IODA table * cleaned out. So we have to issue PHB reset to stop all PCI - * transactions from previous kerenl. + * transactions from previous kernel. */ if (is_kdump_kernel()) { pr_info(" Issue PHB reset ...\n"); diff --git a/arch/powerpc/platforms/powernv/pci.c b/arch/powerpc/platforms/powernv/pci.c index a21d831c1114..db7b8020f68e 100644 --- a/arch/powerpc/platforms/powernv/pci.c +++ b/arch/powerpc/platforms/powernv/pci.c @@ -186,7 +186,7 @@ int pnv_setup_msi_irqs(struct pci_dev *pdev, int nvec, int type) return -ENOSPC; } virq = irq_create_mapping(NULL, phb->msi_base + hwirq); - if (virq == NO_IRQ) { + if (!virq) { pr_warn("%s: Failed to map MSI to linux irq\n", pci_name(pdev)); msi_bitmap_free_hwirqs(&phb->msi_bmp, hwirq, 1); @@ -217,7 +217,7 @@ void pnv_teardown_msi_irqs(struct pci_dev *pdev) return; for_each_pci_msi_entry(entry, pdev) { - if (entry->irq == NO_IRQ) + if (!entry->irq) continue; hwirq = virq_to_hw(entry->irq); irq_set_msi_desc(entry->irq, NULL); @@ -309,8 +309,8 @@ static void pnv_pci_dump_p7ioc_diag_data(struct pci_controller *hose, be64_to_cpu(data->dma1ErrorLog1)); for (i = 0; i < OPAL_P7IOC_NUM_PEST_REGS; i++) { - if ((data->pestA[i] >> 63) == 0 && - (data->pestB[i] >> 63) == 0) + if ((be64_to_cpu(data->pestA[i]) >> 63) == 0 && + (be64_to_cpu(data->pestB[i]) >> 63) == 0) continue; pr_info("PE[%3d] A/B: %016llx %016llx\n", diff --git a/arch/powerpc/platforms/ps3/device-init.c b/arch/powerpc/platforms/ps3/device-init.c index 57caaf11a83f..e48462447ff0 100644 --- a/arch/powerpc/platforms/ps3/device-init.c +++ b/arch/powerpc/platforms/ps3/device-init.c @@ -62,7 +62,7 @@ static int __init ps3_register_lpm_devices(void) &dev->lpm.rights); if (result) { - pr_debug("%s:%d: ps3_repository_read_lpm_privleges failed \n", + pr_debug("%s:%d: ps3_repository_read_lpm_privileges failed\n", __func__, __LINE__); goto fail_read_repo; } diff --git a/arch/powerpc/platforms/ps3/interrupt.c b/arch/powerpc/platforms/ps3/interrupt.c index b831638e6f4a..98f8c3611133 100644 --- a/arch/powerpc/platforms/ps3/interrupt.c +++ b/arch/powerpc/platforms/ps3/interrupt.c @@ -192,7 +192,7 @@ static int ps3_virq_setup(enum ps3_cpu_binding cpu, unsigned long outlet, *virq = irq_create_mapping(NULL, outlet); - if (*virq == NO_IRQ) { + if (!*virq) { FAIL("%s:%d: irq_create_mapping failed: outlet %lu\n", __func__, __LINE__, outlet); result = -ENOMEM; @@ -339,7 +339,7 @@ int ps3_event_receive_port_setup(enum ps3_cpu_binding cpu, unsigned int *virq) if (result) { FAIL("%s:%d: lv1_construct_event_receive_port failed: %s\n", __func__, __LINE__, ps3_result(result)); - *virq = NO_IRQ; + *virq = 0; return result; } @@ -418,7 +418,7 @@ int ps3_sb_event_receive_port_setup(struct ps3_system_bus_device *dev, " failed: %s\n", __func__, __LINE__, ps3_result(result)); ps3_event_receive_port_destroy(*virq); - *virq = NO_IRQ; + *virq = 0; return result; } @@ -724,12 +724,12 @@ static unsigned int ps3_get_irq(void) asm volatile("cntlzd %0,%1" : "=r" (plug) : "r" (x)); plug &= 0x3f; - if (unlikely(plug == NO_IRQ)) { + if (unlikely(!plug)) { DBG("%s:%d: no plug found: thread_id %llu\n", __func__, __LINE__, pd->thread_id); dump_bmp(&per_cpu(ps3_private, 0)); dump_bmp(&per_cpu(ps3_private, 1)); - return NO_IRQ; + return 0; } #if defined(DEBUG) diff --git a/arch/powerpc/platforms/ps3/smp.c b/arch/powerpc/platforms/ps3/smp.c index 3c7707af3384..60154d08debf 100644 --- a/arch/powerpc/platforms/ps3/smp.c +++ b/arch/powerpc/platforms/ps3/smp.c @@ -91,7 +91,7 @@ static void __init ps3_smp_probe(void) result = smp_request_message_ipi(virqs[i], i); if (result) - virqs[i] = NO_IRQ; + virqs[i] = 0; else ps3_register_ipi_irq(cpu, virqs[i]); } @@ -112,7 +112,7 @@ void ps3_smp_cleanup_cpu(int cpu) for (i = 0; i < MSG_COUNT; i++) { /* Can't call free_irq from interrupt context. */ ps3_event_receive_port_destroy(virqs[i]); - virqs[i] = NO_IRQ; + virqs[i] = 0; } DBG(" <- %s:%d: (%d)\n", __func__, __LINE__, cpu); diff --git a/arch/powerpc/platforms/ps3/spu.c b/arch/powerpc/platforms/ps3/spu.c index 492b2575e0d2..b54850845466 100644 --- a/arch/powerpc/platforms/ps3/spu.c +++ b/arch/powerpc/platforms/ps3/spu.c @@ -284,7 +284,7 @@ fail_alloc_2: fail_alloc_1: ps3_spe_irq_destroy(spu->irqs[0]); fail_alloc_0: - spu->irqs[0] = spu->irqs[1] = spu->irqs[2] = NO_IRQ; + spu->irqs[0] = spu->irqs[1] = spu->irqs[2] = 0; return result; } @@ -334,7 +334,7 @@ static int ps3_destroy_spu(struct spu *spu) ps3_spe_irq_destroy(spu->irqs[1]); ps3_spe_irq_destroy(spu->irqs[0]); - spu->irqs[0] = spu->irqs[1] = spu->irqs[2] = NO_IRQ; + spu->irqs[0] = spu->irqs[1] = spu->irqs[2] = 0; spu_unmap(spu); diff --git a/arch/powerpc/platforms/pseries/dlpar.c b/arch/powerpc/platforms/pseries/dlpar.c index 4748124faa10..423e450efe07 100644 --- a/arch/powerpc/platforms/pseries/dlpar.c +++ b/arch/powerpc/platforms/pseries/dlpar.c @@ -27,7 +27,7 @@ #include <asm/uaccess.h> #include <asm/rtas.h> -struct workqueue_struct *pseries_hp_wq; +static struct workqueue_struct *pseries_hp_wq; struct pseries_hp_work { struct work_struct work; @@ -377,7 +377,7 @@ static int handle_dlpar_errorlog(struct pseries_hp_errorlog *hp_elog) return rc; } -void pseries_hp_work_fn(struct work_struct *work) +static void pseries_hp_work_fn(struct work_struct *work) { struct pseries_hp_work *hp_work = container_of(work, struct pseries_hp_work, work); @@ -413,6 +413,7 @@ void queue_hotplug_event(struct pseries_hp_errorlog *hp_errlog, queue_work(pseries_hp_wq, (struct work_struct *)work); } else { *rc = -ENOMEM; + kfree(hp_errlog_copy); complete(hotplug_done); } } diff --git a/arch/powerpc/platforms/pseries/event_sources.c b/arch/powerpc/platforms/pseries/event_sources.c index a6ddca833119..32187dc76730 100644 --- a/arch/powerpc/platforms/pseries/event_sources.c +++ b/arch/powerpc/platforms/pseries/event_sources.c @@ -34,7 +34,7 @@ void request_event_sources_irqs(struct device_node *np, if (count > 15) break; virqs[count] = irq_create_of_mapping(&oirq); - if (virqs[count] == NO_IRQ) { + if (!virqs[count]) { pr_err("event-sources: Unable to allocate " "interrupt number for %s\n", np->full_name); diff --git a/arch/powerpc/platforms/pseries/hotplug-memory.c b/arch/powerpc/platforms/pseries/hotplug-memory.c index 43f7beb2902d..76ec104e88be 100644 --- a/arch/powerpc/platforms/pseries/hotplug-memory.c +++ b/arch/powerpc/platforms/pseries/hotplug-memory.c @@ -320,19 +320,6 @@ static int dlpar_remove_device_tree_lmb(struct of_drconf_cell *lmb) return dlpar_update_device_tree_lmb(lmb); } -static struct memory_block *lmb_to_memblock(struct of_drconf_cell *lmb) -{ - unsigned long section_nr; - struct mem_section *mem_sect; - struct memory_block *mem_block; - - section_nr = pfn_to_section_nr(PFN_DOWN(lmb->base_addr)); - mem_sect = __nr_to_section(section_nr); - - mem_block = find_memory_block(mem_sect); - return mem_block; -} - #ifdef CONFIG_MEMORY_HOTREMOVE static int pseries_remove_memblock(unsigned long base, unsigned int memblock_size) { @@ -420,6 +407,19 @@ static bool lmb_is_removable(struct of_drconf_cell *lmb) static int dlpar_add_lmb(struct of_drconf_cell *); +static struct memory_block *lmb_to_memblock(struct of_drconf_cell *lmb) +{ + unsigned long section_nr; + struct mem_section *mem_sect; + struct memory_block *mem_block; + + section_nr = pfn_to_section_nr(PFN_DOWN(lmb->base_addr)); + mem_sect = __nr_to_section(section_nr); + + mem_block = find_memory_block(mem_sect); + return mem_block; +} + static int dlpar_remove_lmb(struct of_drconf_cell *lmb) { struct memory_block *mem_block; diff --git a/arch/powerpc/platforms/pseries/msi.c b/arch/powerpc/platforms/pseries/msi.c index 543a6386f3eb..326ef0dd6038 100644 --- a/arch/powerpc/platforms/pseries/msi.c +++ b/arch/powerpc/platforms/pseries/msi.c @@ -119,7 +119,7 @@ static void rtas_teardown_msi_irqs(struct pci_dev *pdev) struct msi_desc *entry; for_each_pci_msi_entry(entry, pdev) { - if (entry->irq == NO_IRQ) + if (!entry->irq) continue; irq_set_msi_desc(entry->irq, NULL); @@ -471,7 +471,7 @@ again: virq = irq_create_mapping(NULL, hwirq); - if (virq == NO_IRQ) { + if (!virq) { pr_debug("rtas_msi: Failed mapping hwirq %d\n", hwirq); return -ENOSPC; } @@ -490,7 +490,7 @@ again: static void rtas_msi_pci_irq_fixup(struct pci_dev *pdev) { /* No LSI -> leave MSIs (if any) configured */ - if (pdev->irq == NO_IRQ) { + if (!pdev->irq) { dev_dbg(&pdev->dev, "rtas_msi: no LSI, nothing to do.\n"); return; } diff --git a/arch/powerpc/platforms/pseries/pci.c b/arch/powerpc/platforms/pseries/pci.c index fe16a50700de..09eba5a9929a 100644 --- a/arch/powerpc/platforms/pseries/pci.c +++ b/arch/powerpc/platforms/pseries/pci.c @@ -119,6 +119,10 @@ int pseries_root_bridge_prepare(struct pci_host_bridge *bridge) bus = bridge->bus; + /* Rely on the pcibios_free_controller_deferred() callback. */ + pci_set_host_bridge_release(bridge, pcibios_free_controller_deferred, + (void *) pci_bus_to_host(bus)); + dn = pcibios_get_phb_of_node(bus); if (!dn) return 0; diff --git a/arch/powerpc/platforms/pseries/pci_dlpar.c b/arch/powerpc/platforms/pseries/pci_dlpar.c index 906dbaa97fe2..547fd13e4f8e 100644 --- a/arch/powerpc/platforms/pseries/pci_dlpar.c +++ b/arch/powerpc/platforms/pseries/pci_dlpar.c @@ -106,8 +106,11 @@ int remove_phb_dynamic(struct pci_controller *phb) release_resource(res); } - /* Free pci_controller data structure */ - pcibios_free_controller(phb); + /* + * The pci_controller data structure is freed by + * the pcibios_free_controller_deferred() callback; + * see pseries_root_bridge_prepare(). + */ return 0; } diff --git a/arch/powerpc/platforms/pseries/scanlog.c b/arch/powerpc/platforms/pseries/scanlog.c index b502ab61aafa..7d28cabf1206 100644 --- a/arch/powerpc/platforms/pseries/scanlog.c +++ b/arch/powerpc/platforms/pseries/scanlog.c @@ -156,7 +156,7 @@ static int scanlog_release(struct inode * inode, struct file * file) return 0; } -const struct file_operations scanlog_fops = { +static const struct file_operations scanlog_fops = { .owner = THIS_MODULE, .read = scanlog_read, .write = scanlog_write, diff --git a/arch/powerpc/platforms/pseries/setup.c b/arch/powerpc/platforms/pseries/setup.c index 4ffcaa6f8670..97aa3f332f24 100644 --- a/arch/powerpc/platforms/pseries/setup.c +++ b/arch/powerpc/platforms/pseries/setup.c @@ -41,7 +41,6 @@ #include <linux/root_dev.h> #include <linux/of.h> #include <linux/of_pci.h> -#include <linux/kexec.h> #include <asm/mmu.h> #include <asm/processor.h> @@ -66,6 +65,7 @@ #include <asm/eeh.h> #include <asm/reg.h> #include <asm/plpar_wrappers.h> +#include <asm/kexec.h> #include "pseries.h" @@ -114,7 +114,7 @@ static void pseries_8259_cascade(struct irq_desc *desc) struct irq_chip *chip = irq_desc_get_chip(desc); unsigned int cascade_irq = i8259_irq(); - if (cascade_irq != NO_IRQ) + if (cascade_irq) generic_handle_irq(cascade_irq); chip->irq_eoi(&desc->irq_data); @@ -141,7 +141,7 @@ static void __init pseries_setup_i8259_cascade(void) } cascade = irq_of_parse_and_map(found, 0); - if (cascade == NO_IRQ) { + if (!cascade) { printk(KERN_ERR "pic: failed to map cascade interrupt"); return; } diff --git a/arch/powerpc/sysdev/axonram.c b/arch/powerpc/sysdev/axonram.c index 9144204442eb..ada29eaed6e2 100644 --- a/arch/powerpc/sysdev/axonram.c +++ b/arch/powerpc/sysdev/axonram.c @@ -240,7 +240,7 @@ static int axon_ram_probe(struct platform_device *device) device_add_disk(&device->dev, bank->disk); bank->irq_id = irq_of_parse_and_map(device->dev.of_node, 0); - if (bank->irq_id == NO_IRQ) { + if (!bank->irq_id) { dev_err(&device->dev, "Cannot access ECC interrupt ID\n"); rc = -EFAULT; goto failed; @@ -250,7 +250,7 @@ static int axon_ram_probe(struct platform_device *device) AXON_RAM_IRQ_FLAGS, bank->disk->disk_name, device); if (rc != 0) { dev_err(&device->dev, "Cannot register ECC interrupt handler\n"); - bank->irq_id = NO_IRQ; + bank->irq_id = 0; rc = -EFAULT; goto failed; } @@ -268,7 +268,7 @@ static int axon_ram_probe(struct platform_device *device) failed: if (bank != NULL) { - if (bank->irq_id != NO_IRQ) + if (bank->irq_id) free_irq(bank->irq_id, device); if (bank->disk != NULL) { if (bank->disk->major > 0) diff --git a/arch/powerpc/sysdev/cpm1.c b/arch/powerpc/sysdev/cpm1.c index 6c110994d902..3c0eb9b25535 100644 --- a/arch/powerpc/sysdev/cpm1.c +++ b/arch/powerpc/sysdev/cpm1.c @@ -132,7 +132,7 @@ unsigned int cpm_pic_init(void) { struct device_node *np = NULL; struct resource res; - unsigned int sirq = NO_IRQ, hwirq, eirq; + unsigned int sirq = 0, hwirq, eirq; int ret; pr_debug("cpm_pic_init\n"); @@ -154,7 +154,7 @@ unsigned int cpm_pic_init(void) goto end; sirq = irq_of_parse_and_map(np, 0); - if (sirq == NO_IRQ) + if (!sirq) goto end; /* Initialize the CPM interrupt controller. */ @@ -168,7 +168,7 @@ unsigned int cpm_pic_init(void) cpm_pic_host = irq_domain_add_linear(np, 64, &cpm_pic_host_ops, NULL); if (cpm_pic_host == NULL) { printk(KERN_ERR "CPM2 PIC: failed to allocate irq host!\n"); - sirq = NO_IRQ; + sirq = 0; goto end; } @@ -182,7 +182,7 @@ unsigned int cpm_pic_init(void) } eirq = irq_of_parse_and_map(np, 0); - if (eirq == NO_IRQ) + if (!eirq) goto end; if (setup_irq(eirq, &cpm_error_irqaction)) @@ -534,7 +534,8 @@ struct cpm1_gpio16_chip { static void cpm1_gpio16_save_regs(struct of_mm_gpio_chip *mm_gc) { - struct cpm1_gpio16_chip *cpm1_gc = gpiochip_get_data(&mm_gc->gc); + struct cpm1_gpio16_chip *cpm1_gc = + container_of(mm_gc, struct cpm1_gpio16_chip, mm_gc); struct cpm_ioport16 __iomem *iop = mm_gc->regs; cpm1_gc->cpdata = in_be16(&iop->dat); @@ -649,7 +650,8 @@ struct cpm1_gpio32_chip { static void cpm1_gpio32_save_regs(struct of_mm_gpio_chip *mm_gc) { - struct cpm1_gpio32_chip *cpm1_gc = gpiochip_get_data(&mm_gc->gc); + struct cpm1_gpio32_chip *cpm1_gc = + container_of(mm_gc, struct cpm1_gpio32_chip, mm_gc); struct cpm_ioport32b __iomem *iop = mm_gc->regs; cpm1_gc->cpdata = in_be32(&iop->dat); diff --git a/arch/powerpc/sysdev/cpm_common.c b/arch/powerpc/sysdev/cpm_common.c index 911456d17713..947f42007734 100644 --- a/arch/powerpc/sysdev/cpm_common.c +++ b/arch/powerpc/sysdev/cpm_common.c @@ -94,7 +94,8 @@ struct cpm2_gpio32_chip { static void cpm2_gpio32_save_regs(struct of_mm_gpio_chip *mm_gc) { - struct cpm2_gpio32_chip *cpm2_gc = gpiochip_get_data(&mm_gc->gc); + struct cpm2_gpio32_chip *cpm2_gc = + container_of(mm_gc, struct cpm2_gpio32_chip, mm_gc); struct cpm2_ioports __iomem *iop = mm_gc->regs; cpm2_gc->cpdata = in_be32(&iop->dat); diff --git a/arch/powerpc/sysdev/ehv_pic.c b/arch/powerpc/sysdev/ehv_pic.c index bffcc7a486a1..48866e6c1efb 100644 --- a/arch/powerpc/sysdev/ehv_pic.c +++ b/arch/powerpc/sysdev/ehv_pic.c @@ -155,7 +155,7 @@ static struct irq_chip ehv_pic_direct_eoi_irq_chip = { .irq_set_type = ehv_pic_set_irq_type, }; -/* Return an interrupt vector or NO_IRQ if no interrupt is pending. */ +/* Return an interrupt vector or 0 if no interrupt is pending. */ unsigned int ehv_pic_get_irq(void) { int irq; @@ -168,7 +168,7 @@ unsigned int ehv_pic_get_irq(void) ev_int_iack(0, &irq); /* legacy mode */ if (irq == 0xFFFF) /* 0xFFFF --> no irq is pending */ - return NO_IRQ; + return 0; /* * this will also setup revmap[] in the slow path for the first diff --git a/arch/powerpc/sysdev/fsl_gtm.c b/arch/powerpc/sysdev/fsl_gtm.c index 06ac3c61b3d0..a6f0b96ce2c9 100644 --- a/arch/powerpc/sysdev/fsl_gtm.c +++ b/arch/powerpc/sysdev/fsl_gtm.c @@ -406,7 +406,7 @@ static int __init fsl_gtm_init(void) unsigned int irq; irq = irq_of_parse_and_map(np, i); - if (irq == NO_IRQ) { + if (!irq) { pr_err("%s: not enough interrupts specified\n", np->full_name); goto err; diff --git a/arch/powerpc/sysdev/fsl_mpic_err.c b/arch/powerpc/sysdev/fsl_mpic_err.c index b83f32562a37..488ec453038a 100644 --- a/arch/powerpc/sysdev/fsl_mpic_err.c +++ b/arch/powerpc/sysdev/fsl_mpic_err.c @@ -115,8 +115,8 @@ static irqreturn_t fsl_error_int_handler(int irq, void *data) errint = __builtin_clz(eisr); cascade_irq = irq_linear_revmap(mpic->irqhost, mpic->err_int_vecs[errint]); - WARN_ON(cascade_irq == NO_IRQ); - if (cascade_irq != NO_IRQ) { + WARN_ON(!cascade_irq); + if (cascade_irq) { generic_handle_irq(cascade_irq); } else { eimr |= 1 << (31 - errint); @@ -134,7 +134,7 @@ void mpic_err_int_init(struct mpic *mpic, irq_hw_number_t irqnum) int ret; virq = irq_create_mapping(mpic->irqhost, irqnum); - if (virq == NO_IRQ) { + if (!virq) { pr_err("Error interrupt setup failed\n"); return; } diff --git a/arch/powerpc/sysdev/fsl_msi.c b/arch/powerpc/sysdev/fsl_msi.c index 3a2be3676f43..8a244828782e 100644 --- a/arch/powerpc/sysdev/fsl_msi.c +++ b/arch/powerpc/sysdev/fsl_msi.c @@ -131,7 +131,7 @@ static void fsl_teardown_msi_irqs(struct pci_dev *pdev) irq_hw_number_t hwirq; for_each_pci_msi_entry(entry, pdev) { - if (entry->irq == NO_IRQ) + if (!entry->irq) continue; hwirq = virq_to_hw(entry->irq); msi_data = irq_get_chip_data(entry->irq); @@ -250,7 +250,7 @@ static int fsl_setup_msi_irqs(struct pci_dev *pdev, int nvec, int type) virq = irq_create_mapping(msi_data->irqhost, hwirq); - if (virq == NO_IRQ) { + if (!virq) { dev_err(&pdev->dev, "fail mapping hwirq %i\n", hwirq); msi_bitmap_free_hwirqs(&msi_data->bitmap, hwirq, 1); rc = -ENOSPC; @@ -285,7 +285,7 @@ static irqreturn_t fsl_msi_cascade(int irq, void *data) msir_index = cascade_data->index; if (msir_index >= NR_MSI_REG_MAX) - cascade_irq = NO_IRQ; + cascade_irq = 0; switch (msi_data->feature & FSL_PIC_IP_MASK) { case FSL_PIC_IP_MPIC: @@ -315,7 +315,7 @@ static irqreturn_t fsl_msi_cascade(int irq, void *data) cascade_irq = irq_linear_revmap(msi_data->irqhost, msi_hwirq(msi_data, msir_index, intr_index + have_shift)); - if (cascade_irq != NO_IRQ) { + if (cascade_irq) { generic_handle_irq(cascade_irq); ret = IRQ_HANDLED; } @@ -337,7 +337,7 @@ static int fsl_of_msi_remove(struct platform_device *ofdev) if (msi->cascade_array[i]) { virq = msi->cascade_array[i]->virq; - BUG_ON(virq == NO_IRQ); + BUG_ON(!virq); free_irq(virq, msi->cascade_array[i]); kfree(msi->cascade_array[i]); @@ -362,7 +362,7 @@ static int fsl_msi_setup_hwirq(struct fsl_msi *msi, struct platform_device *dev, int virt_msir, i, ret; virt_msir = irq_of_parse_and_map(dev->dev.of_node, irq_index); - if (virt_msir == NO_IRQ) { + if (!virt_msir) { dev_err(&dev->dev, "%s: Cannot translate IRQ index %d\n", __func__, irq_index); return 0; diff --git a/arch/powerpc/sysdev/fsl_rio.c b/arch/powerpc/sysdev/fsl_rio.c index 68e7c0dd2e45..3cc7cace194a 100644 --- a/arch/powerpc/sysdev/fsl_rio.c +++ b/arch/powerpc/sysdev/fsl_rio.c @@ -23,7 +23,7 @@ */ #include <linux/init.h> -#include <linux/module.h> +#include <linux/extable.h> #include <linux/types.h> #include <linux/dma-mapping.h> #include <linux/interrupt.h> diff --git a/arch/powerpc/sysdev/ge/ge_pic.c b/arch/powerpc/sysdev/ge/ge_pic.c index d57b77573068..02553a8ce191 100644 --- a/arch/powerpc/sysdev/ge/ge_pic.c +++ b/arch/powerpc/sysdev/ge/ge_pic.c @@ -102,7 +102,7 @@ static void gef_pic_cascade(struct irq_desc *desc) */ cascade_irq = gef_pic_get_irq(); - if (cascade_irq != NO_IRQ) + if (cascade_irq) generic_handle_irq(cascade_irq); chip->irq_eoi(&desc->irq_data); @@ -206,7 +206,7 @@ void __init gef_pic_init(struct device_node *np) /* Map controller */ gef_pic_cascade_irq = irq_of_parse_and_map(np, 0); - if (gef_pic_cascade_irq == NO_IRQ) { + if (!gef_pic_cascade_irq) { printk(KERN_ERR "SBC610: failed to map cascade interrupt"); return; } @@ -223,12 +223,12 @@ void __init gef_pic_init(struct device_node *np) /* * This is called when we receive an interrupt with apparently comes from this - * chip - check, returning the highest interrupt generated or return NO_IRQ + * chip - check, returning the highest interrupt generated or return 0. */ unsigned int gef_pic_get_irq(void) { u32 cause, mask, active; - unsigned int virq = NO_IRQ; + unsigned int virq = 0; int hwirq; cause = in_be32(gef_pic_irq_reg_base + GEF_PIC_INTR_STATUS); diff --git a/arch/powerpc/sysdev/i8259.c b/arch/powerpc/sysdev/i8259.c index aa2c186d3115..bafb014e1a7e 100644 --- a/arch/powerpc/sysdev/i8259.c +++ b/arch/powerpc/sysdev/i8259.c @@ -68,9 +68,9 @@ unsigned int i8259_irq(void) if (!pci_intack) outb(0x0B, 0x20); /* ISR register */ if(~inb(0x20) & 0x80) - irq = NO_IRQ; + irq = 0; } else if (irq == 0xff) - irq = NO_IRQ; + irq = 0; if (lock) raw_spin_unlock(&i8259_lock); diff --git a/arch/powerpc/sysdev/ipic.c b/arch/powerpc/sysdev/ipic.c index f76ee39cb337..f267ee0afc08 100644 --- a/arch/powerpc/sysdev/ipic.c +++ b/arch/powerpc/sysdev/ipic.c @@ -853,7 +853,7 @@ void ipic_clear_mcp_status(u32 mask) ipic_write(primary_ipic->regs, IPIC_SERMR, mask); } -/* Return an interrupt vector or NO_IRQ if no interrupt is pending. */ +/* Return an interrupt vector or 0 if no interrupt is pending. */ unsigned int ipic_get_irq(void) { int irq; @@ -864,7 +864,7 @@ unsigned int ipic_get_irq(void) irq = ipic_read(primary_ipic->regs, IPIC_SIVCR) & IPIC_SIVCR_VECTOR_MASK; if (irq == 0) /* 0 --> no irq is pending */ - return NO_IRQ; + return 0; return irq_linear_revmap(primary_ipic->irqhost, irq); } diff --git a/arch/powerpc/sysdev/mmio_nvram.c b/arch/powerpc/sysdev/mmio_nvram.c index 69f5814ae6d4..c31f634f1973 100644 --- a/arch/powerpc/sysdev/mmio_nvram.c +++ b/arch/powerpc/sysdev/mmio_nvram.c @@ -89,7 +89,7 @@ static ssize_t mmio_nvram_write(char *buf, size_t count, loff_t *index) return count; } -void mmio_nvram_write_val(int addr, unsigned char val) +static void mmio_nvram_write_val(int addr, unsigned char val) { unsigned long flags; diff --git a/arch/powerpc/sysdev/mpc8xx_pic.c b/arch/powerpc/sysdev/mpc8xx_pic.c index b7cf7abff2eb..3e828b20c21e 100644 --- a/arch/powerpc/sysdev/mpc8xx_pic.c +++ b/arch/powerpc/sysdev/mpc8xx_pic.c @@ -79,7 +79,7 @@ unsigned int mpc8xx_get_irq(void) irq = in_be32(&siu_reg->sc_sivec) >> 26; if (irq == PIC_VEC_SPURRIOUS) - irq = NO_IRQ; + irq = 0; return irq_linear_revmap(mpc8xx_pic_host, irq); diff --git a/arch/powerpc/sysdev/mpic.c b/arch/powerpc/sysdev/mpic.c index 7de45b2df366..4d48cecfedd1 100644 --- a/arch/powerpc/sysdev/mpic.c +++ b/arch/powerpc/sysdev/mpic.c @@ -1649,7 +1649,7 @@ void __init mpic_init(struct mpic *mpic) /* Check if this MPIC is chained from a parent interrupt controller */ if (mpic->flags & MPIC_SECONDARY) { int virq = irq_of_parse_and_map(mpic->node, 0); - if (virq != NO_IRQ) { + if (virq) { printk(KERN_INFO "%s: hooking up to IRQ %d\n", mpic->node->full_name, virq); irq_set_handler_data(virq, mpic); @@ -1778,13 +1778,13 @@ static unsigned int _mpic_get_one_irq(struct mpic *mpic, int reg) if (unlikely(src == mpic->spurious_vec)) { if (mpic->flags & MPIC_SPV_EOI) mpic_eoi(mpic); - return NO_IRQ; + return 0; } if (unlikely(mpic->protected && test_bit(src, mpic->protected))) { printk_ratelimited(KERN_WARNING "%s: Got protected source %d !\n", mpic->name, (int)src); mpic_eoi(mpic); - return NO_IRQ; + return 0; } return irq_linear_revmap(mpic->irqhost, src); @@ -1817,17 +1817,17 @@ unsigned int mpic_get_coreint_irq(void) if (unlikely(src == mpic->spurious_vec)) { if (mpic->flags & MPIC_SPV_EOI) mpic_eoi(mpic); - return NO_IRQ; + return 0; } if (unlikely(mpic->protected && test_bit(src, mpic->protected))) { printk_ratelimited(KERN_WARNING "%s: Got protected source %d !\n", mpic->name, (int)src); - return NO_IRQ; + return 0; } return irq_linear_revmap(mpic->irqhost, src); #else - return NO_IRQ; + return 0; #endif } @@ -1852,7 +1852,7 @@ void mpic_request_ipis(void) for (i = 0; i < 4; i++) { unsigned int vipi = irq_create_mapping(mpic->irqhost, mpic->ipi_vecs[0] + i); - if (vipi == NO_IRQ) { + if (!vipi) { printk(KERN_ERR "Failed to map %s\n", smp_ipi_name[i]); continue; } diff --git a/arch/powerpc/sysdev/mpic_msgr.c b/arch/powerpc/sysdev/mpic_msgr.c index 3f165d972a0e..db2286be5d9a 100644 --- a/arch/powerpc/sysdev/mpic_msgr.c +++ b/arch/powerpc/sysdev/mpic_msgr.c @@ -238,7 +238,7 @@ static int mpic_msgr_probe(struct platform_device *dev) if (receive_mask & (1 << i)) { msgr->irq = irq_of_parse_and_map(np, irq_index); - if (msgr->irq == NO_IRQ) { + if (!msgr->irq) { dev_err(&dev->dev, "Missing interrupt specifier"); kfree(msgr); @@ -246,7 +246,7 @@ static int mpic_msgr_probe(struct platform_device *dev) } irq_index += 1; } else { - msgr->irq = NO_IRQ; + msgr->irq = 0; } mpic_msgrs[reg_number] = msgr; diff --git a/arch/powerpc/sysdev/mpic_u3msi.c b/arch/powerpc/sysdev/mpic_u3msi.c index 2cbc7e29b85f..cfc1c57d760f 100644 --- a/arch/powerpc/sysdev/mpic_u3msi.c +++ b/arch/powerpc/sysdev/mpic_u3msi.c @@ -110,7 +110,7 @@ static void u3msi_teardown_msi_irqs(struct pci_dev *pdev) irq_hw_number_t hwirq; for_each_pci_msi_entry(entry, pdev) { - if (entry->irq == NO_IRQ) + if (!entry->irq) continue; hwirq = virq_to_hw(entry->irq); @@ -155,7 +155,7 @@ static int u3msi_setup_msi_irqs(struct pci_dev *pdev, int nvec, int type) msg.address_hi = addr >> 32; virq = irq_create_mapping(msi_mpic->irqhost, hwirq); - if (virq == NO_IRQ) { + if (!virq) { pr_debug("u3msi: failed mapping hwirq 0x%x\n", hwirq); msi_bitmap_free_hwirqs(&msi_mpic->msi_bitmap, hwirq, 1); return -ENOSPC; diff --git a/arch/powerpc/sysdev/mv64x60_pic.c b/arch/powerpc/sysdev/mv64x60_pic.c index 0f842dd16bcd..a79953deb489 100644 --- a/arch/powerpc/sysdev/mv64x60_pic.c +++ b/arch/powerpc/sysdev/mv64x60_pic.c @@ -272,7 +272,7 @@ unsigned int mv64x60_get_irq(void) u32 cause; int level1; irq_hw_number_t hwirq; - int virq = NO_IRQ; + int virq = 0; cause = in_le32(mv64x60_irq_reg_base + MV64X60_IC_CPU0_SELECT_CAUSE); if (cause & MV64X60_SELECT_CAUSE_HIGH) { diff --git a/arch/powerpc/sysdev/pmi.c b/arch/powerpc/sysdev/pmi.c index 8a0b77a3ec0c..9ea6a221d9d5 100644 --- a/arch/powerpc/sysdev/pmi.c +++ b/arch/powerpc/sysdev/pmi.c @@ -158,7 +158,7 @@ static int pmi_of_probe(struct platform_device *dev) data->dev = dev; data->irq = irq_of_parse_and_map(np, 0); - if (data->irq == NO_IRQ) { + if (!data->irq) { printk(KERN_ERR "pmi: invalid interrupt.\n"); rc = -EFAULT; goto error_cleanup_iomap; diff --git a/arch/powerpc/sysdev/ppc4xx_hsta_msi.c b/arch/powerpc/sysdev/ppc4xx_hsta_msi.c index 52a93dcae262..9926ad67af76 100644 --- a/arch/powerpc/sysdev/ppc4xx_hsta_msi.c +++ b/arch/powerpc/sysdev/ppc4xx_hsta_msi.c @@ -60,7 +60,7 @@ static int hsta_setup_msi_irqs(struct pci_dev *dev, int nvec, int type) } hwirq = ppc4xx_hsta_msi.irq_map[irq]; - if (hwirq == NO_IRQ) { + if (!hwirq) { pr_err("%s: Failed mapping irq %d\n", __func__, irq); return -EINVAL; } @@ -110,7 +110,7 @@ static void hsta_teardown_msi_irqs(struct pci_dev *dev) int irq; for_each_pci_msi_entry(entry, dev) { - if (entry->irq == NO_IRQ) + if (!entry->irq) continue; irq = hsta_find_hwirq_offset(entry->irq); @@ -166,7 +166,7 @@ static int hsta_msi_probe(struct platform_device *pdev) for (irq = 0; irq < irq_count; irq++) { ppc4xx_hsta_msi.irq_map[irq] = irq_of_parse_and_map(dev->of_node, irq); - if (ppc4xx_hsta_msi.irq_map[irq] == NO_IRQ) { + if (!ppc4xx_hsta_msi.irq_map[irq]) { dev_err(dev, "Unable to map IRQ\n"); ret = -EINVAL; goto out2; diff --git a/arch/powerpc/sysdev/ppc4xx_msi.c b/arch/powerpc/sysdev/ppc4xx_msi.c index 8fb806135043..590dab4f47d6 100644 --- a/arch/powerpc/sysdev/ppc4xx_msi.c +++ b/arch/powerpc/sysdev/ppc4xx_msi.c @@ -102,7 +102,7 @@ static int ppc4xx_setup_msi_irqs(struct pci_dev *dev, int nvec, int type) __func__); } virq = irq_of_parse_and_map(msi_data->msi_dev, int_no); - if (virq == NO_IRQ) { + if (!virq) { dev_err(&dev->dev, "%s: fail mapping irq\n", __func__); msi_bitmap_free_hwirqs(&msi_data->bitmap, int_no, 1); return -ENOSPC; @@ -129,7 +129,7 @@ void ppc4xx_teardown_msi_irqs(struct pci_dev *dev) dev_dbg(&dev->dev, "PCIE-MSI: tearing down msi irqs\n"); for_each_pci_msi_entry(entry, dev) { - if (entry->irq == NO_IRQ) + if (!entry->irq) continue; hwirq = virq_to_hw(entry->irq); irq_set_msi_desc(entry->irq, NULL); @@ -201,7 +201,7 @@ static int ppc4xx_of_msi_remove(struct platform_device *dev) for (i = 0; i < msi_irqs; i++) { virq = msi->msi_virqs[i]; - if (virq != NO_IRQ) + if (virq) irq_dispose_mapping(virq); } diff --git a/arch/powerpc/sysdev/ppc4xx_soc.c b/arch/powerpc/sysdev/ppc4xx_soc.c index 5c77c9ba33aa..d41134d2f786 100644 --- a/arch/powerpc/sysdev/ppc4xx_soc.c +++ b/arch/powerpc/sysdev/ppc4xx_soc.c @@ -109,7 +109,7 @@ static int __init ppc4xx_l2c_probe(void) /* Get and map irq number from device tree */ irq = irq_of_parse_and_map(np, 0); - if (irq == NO_IRQ) { + if (!irq) { printk(KERN_ERR "irq_of_parse_and_map failed\n"); of_node_put(np); return -ENODEV; diff --git a/arch/powerpc/sysdev/tsi108_pci.c b/arch/powerpc/sysdev/tsi108_pci.c index 379de955aae3..57c971b7839c 100644 --- a/arch/powerpc/sysdev/tsi108_pci.c +++ b/arch/powerpc/sysdev/tsi108_pci.c @@ -433,7 +433,7 @@ void tsi108_irq_cascade(struct irq_desc *desc) struct irq_chip *chip = irq_desc_get_chip(desc); unsigned int cascade_irq = get_pci_source(); - if (cascade_irq != NO_IRQ) + if (cascade_irq) generic_handle_irq(cascade_irq); chip->irq_eoi(&desc->irq_data); diff --git a/arch/powerpc/sysdev/uic.c b/arch/powerpc/sysdev/uic.c index 6893d8f236df..a00949f3e378 100644 --- a/arch/powerpc/sysdev/uic.c +++ b/arch/powerpc/sysdev/uic.c @@ -319,7 +319,7 @@ void __init uic_init_tree(void) } } -/* Return an interrupt vector or NO_IRQ if no interrupt is pending. */ +/* Return an interrupt vector or 0 if no interrupt is pending. */ unsigned int uic_get_irq(void) { u32 msr; diff --git a/arch/powerpc/sysdev/xics/Kconfig b/arch/powerpc/sysdev/xics/Kconfig index 0031eda320c3..385e7aa9e273 100644 --- a/arch/powerpc/sysdev/xics/Kconfig +++ b/arch/powerpc/sysdev/xics/Kconfig @@ -1,6 +1,7 @@ config PPC_XICS def_bool n select PPC_SMP_MUXED_IPI + select HARDIRQS_SW_RESEND config PPC_ICP_NATIVE def_bool n diff --git a/arch/powerpc/sysdev/xics/icp-hv.c b/arch/powerpc/sysdev/xics/icp-hv.c index c1917cf67c3d..e7fa26c4ff73 100644 --- a/arch/powerpc/sysdev/xics/icp-hv.c +++ b/arch/powerpc/sysdev/xics/icp-hv.c @@ -112,10 +112,10 @@ static unsigned int icp_hv_get_irq(void) unsigned int irq; if (vec == XICS_IRQ_SPURIOUS) - return NO_IRQ; + return 0; irq = irq_find_mapping(xics_host, vec); - if (likely(irq != NO_IRQ)) { + if (likely(irq)) { xics_push_cppr(vec); return irq; } @@ -126,7 +126,7 @@ static unsigned int icp_hv_get_irq(void) /* We might learn about it later, so EOI it */ icp_hv_set_xirr(xirr); - return NO_IRQ; + return 0; } static void icp_hv_set_cpu_priority(unsigned char cppr) diff --git a/arch/powerpc/sysdev/xics/icp-native.c b/arch/powerpc/sysdev/xics/icp-native.c index afdf62f2a695..8a6a043e239b 100644 --- a/arch/powerpc/sysdev/xics/icp-native.c +++ b/arch/powerpc/sysdev/xics/icp-native.c @@ -124,10 +124,10 @@ static unsigned int icp_native_get_irq(void) unsigned int irq; if (vec == XICS_IRQ_SPURIOUS) - return NO_IRQ; + return 0; irq = irq_find_mapping(xics_host, vec); - if (likely(irq != NO_IRQ)) { + if (likely(irq)) { xics_push_cppr(vec); return irq; } @@ -138,7 +138,7 @@ static unsigned int icp_native_get_irq(void) /* We might learn about it later, so EOI it */ icp_native_set_xirr(xirr); - return NO_IRQ; + return 0; } #ifdef CONFIG_SMP diff --git a/arch/powerpc/sysdev/xics/icp-opal.c b/arch/powerpc/sysdev/xics/icp-opal.c index 57d72f10a97f..d38e86fd5720 100644 --- a/arch/powerpc/sysdev/xics/icp-opal.c +++ b/arch/powerpc/sysdev/xics/icp-opal.c @@ -23,10 +23,10 @@ static void icp_opal_teardown_cpu(void) { - int cpu = smp_processor_id(); + int hw_cpu = hard_smp_processor_id(); /* Clear any pending IPI */ - opal_int_set_mfrr(cpu, 0xff); + opal_int_set_mfrr(hw_cpu, 0xff); } static void icp_opal_flush_ipi(void) @@ -51,14 +51,14 @@ static unsigned int icp_opal_get_irq(void) rc = opal_int_get_xirr(&xirr, false); if (rc < 0) - return NO_IRQ; + return 0; xirr = be32_to_cpu(xirr); vec = xirr & 0x00ffffff; if (vec == XICS_IRQ_SPURIOUS) - return NO_IRQ; + return 0; irq = irq_find_mapping(xics_host, vec); - if (likely(irq != NO_IRQ)) { + if (likely(irq)) { xics_push_cppr(vec); return irq; } @@ -69,7 +69,7 @@ static unsigned int icp_opal_get_irq(void) /* We might learn about it later, so EOI it */ opal_int_eoi(xirr); - return NO_IRQ; + return 0; } static void icp_opal_set_cpu_priority(unsigned char cppr) @@ -101,14 +101,16 @@ static void icp_opal_eoi(struct irq_data *d) static void icp_opal_cause_ipi(int cpu, unsigned long data) { - opal_int_set_mfrr(cpu, IPI_PRIORITY); + int hw_cpu = get_hard_smp_processor_id(cpu); + + opal_int_set_mfrr(hw_cpu, IPI_PRIORITY); } static irqreturn_t icp_opal_ipi_action(int irq, void *dev_id) { - int cpu = smp_processor_id(); + int hw_cpu = hard_smp_processor_id(); - opal_int_set_mfrr(cpu, 0xff); + opal_int_set_mfrr(hw_cpu, 0xff); return smp_ipi_demux(); } diff --git a/arch/powerpc/sysdev/xics/ics-opal.c b/arch/powerpc/sysdev/xics/ics-opal.c index 27c936c080a6..1c6bf4b66f56 100644 --- a/arch/powerpc/sysdev/xics/ics-opal.c +++ b/arch/powerpc/sysdev/xics/ics-opal.c @@ -156,7 +156,9 @@ static struct irq_chip ics_opal_irq_chip = { .irq_mask = ics_opal_mask_irq, .irq_unmask = ics_opal_unmask_irq, .irq_eoi = NULL, /* Patched at init time */ - .irq_set_affinity = ics_opal_set_affinity + .irq_set_affinity = ics_opal_set_affinity, + .irq_set_type = xics_set_irq_type, + .irq_retrigger = xics_retrigger, }; static int ics_opal_map(struct ics *ics, unsigned int virq); diff --git a/arch/powerpc/sysdev/xics/ics-rtas.c b/arch/powerpc/sysdev/xics/ics-rtas.c index 3854dd41558d..78ee5c778ef8 100644 --- a/arch/powerpc/sysdev/xics/ics-rtas.c +++ b/arch/powerpc/sysdev/xics/ics-rtas.c @@ -163,7 +163,9 @@ static struct irq_chip ics_rtas_irq_chip = { .irq_mask = ics_rtas_mask_irq, .irq_unmask = ics_rtas_unmask_irq, .irq_eoi = NULL, /* Patched at init time */ - .irq_set_affinity = ics_rtas_set_affinity + .irq_set_affinity = ics_rtas_set_affinity, + .irq_set_type = xics_set_irq_type, + .irq_retrigger = xics_retrigger, }; static int ics_rtas_map(struct ics *ics, unsigned int virq) diff --git a/arch/powerpc/sysdev/xics/xics-common.c b/arch/powerpc/sysdev/xics/xics-common.c index a795a5f0301c..69d858e51ac7 100644 --- a/arch/powerpc/sysdev/xics/xics-common.c +++ b/arch/powerpc/sysdev/xics/xics-common.c @@ -131,7 +131,7 @@ static void xics_request_ipi(void) unsigned int ipi; ipi = irq_create_mapping(xics_host, XICS_IPI); - BUG_ON(ipi == NO_IRQ); + BUG_ON(!ipi); /* * IPIs are marked IRQF_PERCPU. The handler was set in map. @@ -328,8 +328,12 @@ static int xics_host_map(struct irq_domain *h, unsigned int virq, pr_devel("xics: map virq %d, hwirq 0x%lx\n", virq, hw); - /* They aren't all level sensitive but we just don't really know */ - irq_set_status_flags(virq, IRQ_LEVEL); + /* + * Mark interrupts as edge sensitive by default so that resend + * actually works. The device-tree parsing will turn the LSIs + * back to level. + */ + irq_clear_status_flags(virq, IRQ_LEVEL); /* Don't call into ICS for IPIs */ if (hw == XICS_IPI) { @@ -351,13 +355,54 @@ static int xics_host_xlate(struct irq_domain *h, struct device_node *ct, irq_hw_number_t *out_hwirq, unsigned int *out_flags) { - /* Current xics implementation translates everything - * to level. It is not technically right for MSIs but this - * is irrelevant at this point. We might get smarter in the future - */ *out_hwirq = intspec[0]; - *out_flags = IRQ_TYPE_LEVEL_LOW; + /* + * If intsize is at least 2, we look for the type in the second cell, + * we assume the LSB indicates a level interrupt. + */ + if (intsize > 1) { + if (intspec[1] & 1) + *out_flags = IRQ_TYPE_LEVEL_LOW; + else + *out_flags = IRQ_TYPE_EDGE_RISING; + } else + *out_flags = IRQ_TYPE_LEVEL_LOW; + + return 0; +} + +int xics_set_irq_type(struct irq_data *d, unsigned int flow_type) +{ + /* + * We only support these. This has really no effect other than setting + * the corresponding descriptor bits mind you but those will in turn + * affect the resend function when re-enabling an edge interrupt. + * + * Set set the default to edge as explained in map(). + */ + if (flow_type == IRQ_TYPE_DEFAULT || flow_type == IRQ_TYPE_NONE) + flow_type = IRQ_TYPE_EDGE_RISING; + + if (flow_type != IRQ_TYPE_EDGE_RISING && + flow_type != IRQ_TYPE_LEVEL_LOW) + return -EINVAL; + + irqd_set_trigger_type(d, flow_type); + + return IRQ_SET_MASK_OK_NOCOPY; +} + +int xics_retrigger(struct irq_data *data) +{ + /* + * We need to push a dummy CPPR when retriggering, since the subsequent + * EOI will try to pop it. Passing 0 works, as the function hard codes + * the priority value anyway. + */ + xics_push_cppr(0); + + /* Tell the core to do a soft retrigger */ return 0; } diff --git a/arch/powerpc/xmon/spr_access.S b/arch/powerpc/xmon/spr_access.S index 84ad74213c83..7d8b0e8ed6d9 100644 --- a/arch/powerpc/xmon/spr_access.S +++ b/arch/powerpc/xmon/spr_access.S @@ -2,12 +2,12 @@ /* unsigned long xmon_mfspr(sprn, default_value) */ _GLOBAL(xmon_mfspr) - ld r5, .Lmfspr_table@got(r2) + PPC_LL r5, .Lmfspr_table@got(r2) b xmon_mxspr /* void xmon_mtspr(sprn, new_value) */ _GLOBAL(xmon_mtspr) - ld r5, .Lmtspr_table@got(r2) + PPC_LL r5, .Lmtspr_table@got(r2) b xmon_mxspr /* |