diff options
1498 files changed, 26981 insertions, 14196 deletions
diff --git a/Documentation/CodeOfConflict b/Documentation/CodeOfConflict new file mode 100644 index 000000000000..1684d0b4efa6 --- /dev/null +++ b/Documentation/CodeOfConflict @@ -0,0 +1,27 @@ +Code of Conflict +---------------- + +The Linux kernel development effort is a very personal process compared +to "traditional" ways of developing software. Your code and ideas +behind it will be carefully reviewed, often resulting in critique and +criticism. The review will almost always require improvements to the +code before it can be included in the kernel. Know that this happens +because everyone involved wants to see the best possible solution for +the overall success of Linux. This development process has been proven +to create the most robust operating system kernel ever, and we do not +want to do anything to cause the quality of submission and eventual +result to ever decrease. + +If however, anyone feels personally abused, threatened, or otherwise +uncomfortable due to this process, that is not acceptable. If so, +please contact the Linux Foundation's Technical Advisory Board at +<tab@lists.linux-foundation.org>, or the individual members, and they +will work to resolve the issue to the best of their ability. For more +information on who is on the Technical Advisory Board and what their +role is, please see: + http://www.linuxfoundation.org/programs/advisory-councils/tab + +As a reviewer of code, please strive to keep things civil and focused on +the technical issues involved. We are all humans, and frustrations can +be high on both sides of the process. Try to keep in mind the immortal +words of Bill and Ted, "Be excellent to each other." diff --git a/Documentation/devicetree/bindings/arm/exynos/power_domain.txt b/Documentation/devicetree/bindings/arm/exynos/power_domain.txt index f4445e5a2bbb..1e097037349c 100644 --- a/Documentation/devicetree/bindings/arm/exynos/power_domain.txt +++ b/Documentation/devicetree/bindings/arm/exynos/power_domain.txt @@ -22,6 +22,8 @@ Optional Properties: - pclkN, clkN: Pairs of parent of input clock and input clock to the devices in this power domain. Maximum of 4 pairs (N = 0 to 3) are supported currently. +- power-domains: phandle pointing to the parent power domain, for more details + see Documentation/devicetree/bindings/power/power_domain.txt Node of a device using power domains must have a power-domains property defined with a phandle to respective power domain. diff --git a/Documentation/devicetree/bindings/arm/sti.txt b/Documentation/devicetree/bindings/arm/sti.txt index d70ec358736c..8d27f6b084c7 100644 --- a/Documentation/devicetree/bindings/arm/sti.txt +++ b/Documentation/devicetree/bindings/arm/sti.txt @@ -13,6 +13,10 @@ Boards with the ST STiH407 SoC shall have the following properties: Required root node property: compatible = "st,stih407"; +Boards with the ST STiH410 SoC shall have the following properties: +Required root node property: +compatible = "st,stih410"; + Boards with the ST STiH418 SoC shall have the following properties: Required root node property: compatible = "st,stih418"; diff --git a/Documentation/devicetree/bindings/i2c/i2c-imx.txt b/Documentation/devicetree/bindings/i2c/i2c-imx.txt index 52d37fd8d3e5..ce4311d726ae 100644 --- a/Documentation/devicetree/bindings/i2c/i2c-imx.txt +++ b/Documentation/devicetree/bindings/i2c/i2c-imx.txt @@ -7,6 +7,7 @@ Required properties: - "fsl,vf610-i2c" for I2C compatible with the one integrated on Vybrid vf610 SoC - reg : Should contain I2C/HS-I2C registers location and length - interrupts : Should contain I2C/HS-I2C interrupt +- clocks : Should contain the I2C/HS-I2C clock specifier Optional properties: - clock-frequency : Constains desired I2C/HS-I2C bus clock frequency in Hz. diff --git a/Documentation/devicetree/bindings/net/amd-xgbe-phy.txt b/Documentation/devicetree/bindings/net/amd-xgbe-phy.txt index 33df3932168e..8db32384a486 100644 --- a/Documentation/devicetree/bindings/net/amd-xgbe-phy.txt +++ b/Documentation/devicetree/bindings/net/amd-xgbe-phy.txt @@ -27,6 +27,8 @@ property is used. - amd,serdes-cdr-rate: CDR rate speed selection - amd,serdes-pq-skew: PQ (data sampling) skew - amd,serdes-tx-amp: TX amplitude boost +- amd,serdes-dfe-tap-config: DFE taps available to run +- amd,serdes-dfe-tap-enable: DFE taps to enable Example: xgbe_phy@e1240800 { @@ -41,4 +43,6 @@ Example: amd,serdes-cdr-rate = <2>, <2>, <7>; amd,serdes-pq-skew = <10>, <10>, <30>; amd,serdes-tx-amp = <15>, <15>, <10>; + amd,serdes-dfe-tap-config = <3>, <3>, <1>; + amd,serdes-dfe-tap-enable = <0>, <0>, <127>; }; diff --git a/Documentation/devicetree/bindings/net/apm-xgene-enet.txt b/Documentation/devicetree/bindings/net/apm-xgene-enet.txt index cfcc52705ed8..6151999c5dca 100644 --- a/Documentation/devicetree/bindings/net/apm-xgene-enet.txt +++ b/Documentation/devicetree/bindings/net/apm-xgene-enet.txt @@ -4,7 +4,10 @@ Ethernet nodes are defined to describe on-chip ethernet interfaces in APM X-Gene SoC. Required properties for all the ethernet interfaces: -- compatible: Should be "apm,xgene-enet" +- compatible: Should state binding information from the following list, + - "apm,xgene-enet": RGMII based 1G interface + - "apm,xgene1-sgenet": SGMII based 1G interface + - "apm,xgene1-xgenet": XFI based 10G interface - reg: Address and length of the register set for the device. It contains the information of registers in the same order as described by reg-names - reg-names: Should contain the register set names diff --git a/Documentation/devicetree/bindings/net/dsa/dsa.txt b/Documentation/devicetree/bindings/net/dsa/dsa.txt index e124847443f8..f0b4cd72411d 100644 --- a/Documentation/devicetree/bindings/net/dsa/dsa.txt +++ b/Documentation/devicetree/bindings/net/dsa/dsa.txt @@ -19,7 +19,9 @@ the parent DSA node. The maximum number of allowed child nodes is 4 (DSA_MAX_SWITCHES). Each of these switch child nodes should have the following required properties: -- reg : Describes the switch address on the MII bus +- reg : Contains two fields. The first one describes the + address on the MII bus. The second is the switch + number that must be unique in cascaded configurations - #address-cells : Must be 1 - #size-cells : Must be 0 diff --git a/Documentation/devicetree/bindings/power/power_domain.txt b/Documentation/devicetree/bindings/power/power_domain.txt index 98c16672ab5f..0f8ed3710c66 100644 --- a/Documentation/devicetree/bindings/power/power_domain.txt +++ b/Documentation/devicetree/bindings/power/power_domain.txt @@ -19,6 +19,16 @@ Required properties: providing multiple PM domains (e.g. power controllers), but can be any value as specified by device tree binding documentation of particular provider. +Optional properties: + - power-domains : A phandle and PM domain specifier as defined by bindings of + the power controller specified by phandle. + Some power domains might be powered from another power domain (or have + other hardware specific dependencies). For representing such dependency + a standard PM domain consumer binding is used. When provided, all domains + created by the given provider should be subdomains of the domain + specified by this binding. More details about power domain specifier are + available in the next section. + Example: power: power-controller@12340000 { @@ -30,6 +40,25 @@ Example: The node above defines a power controller that is a PM domain provider and expects one cell as its phandle argument. +Example 2: + + parent: power-controller@12340000 { + compatible = "foo,power-controller"; + reg = <0x12340000 0x1000>; + #power-domain-cells = <1>; + }; + + child: power-controller@12340000 { + compatible = "foo,power-controller"; + reg = <0x12341000 0x1000>; + power-domains = <&parent 0>; + #power-domain-cells = <1>; + }; + +The nodes above define two power controllers: 'parent' and 'child'. +Domains created by the 'child' power controller are subdomains of '0' power +domain provided by the 'parent' power controller. + ==PM domain consumers== Required properties: diff --git a/Documentation/devicetree/bindings/serial/of-serial.txt b/Documentation/devicetree/bindings/serial/8250.txt index 91d5ab0e60fc..91d5ab0e60fc 100644 --- a/Documentation/devicetree/bindings/serial/of-serial.txt +++ b/Documentation/devicetree/bindings/serial/8250.txt diff --git a/Documentation/devicetree/bindings/serial/axis,etraxfs-uart.txt b/Documentation/devicetree/bindings/serial/axis,etraxfs-uart.txt new file mode 100644 index 000000000000..ebcbb62c0a76 --- /dev/null +++ b/Documentation/devicetree/bindings/serial/axis,etraxfs-uart.txt @@ -0,0 +1,19 @@ +ETRAX FS UART + +Required properties: +- compatible : "axis,etraxfs-uart" +- reg: offset and length of the register set for the device. +- interrupts: device interrupt + +Optional properties: +- {dtr,dsr,ri,cd}-gpios: specify a GPIO for DTR/DSR/RI/CD + line respectively. + +Example: + +serial@b00260000 { + compatible = "axis,etraxfs-uart"; + reg = <0xb0026000 0x1000>; + interrupts = <68>; + status = "disabled"; +}; diff --git a/Documentation/devicetree/bindings/serial/snps-dw-apb-uart.txt b/Documentation/devicetree/bindings/serial/snps-dw-apb-uart.txt index 7f76214f728a..289c40ed7470 100644 --- a/Documentation/devicetree/bindings/serial/snps-dw-apb-uart.txt +++ b/Documentation/devicetree/bindings/serial/snps-dw-apb-uart.txt @@ -21,6 +21,18 @@ Optional properties: - reg-io-width : the size (in bytes) of the IO accesses that should be performed on the device. If this property is not present then single byte accesses are used. +- dcd-override : Override the DCD modem status signal. This signal will always + be reported as active instead of being obtained from the modem status + register. Define this if your serial port does not use this pin. +- dsr-override : Override the DTS modem status signal. This signal will always + be reported as active instead of being obtained from the modem status + register. Define this if your serial port does not use this pin. +- cts-override : Override the CTS modem status signal. This signal will always + be reported as active instead of being obtained from the modem status + register. Define this if your serial port does not use this pin. +- ri-override : Override the RI modem status signal. This signal will always be + reported as inactive instead of being obtained from the modem status register. + Define this if your serial port does not use this pin. Example: @@ -31,6 +43,10 @@ Example: interrupts = <10>; reg-shift = <2>; reg-io-width = <4>; + dcd-override; + dsr-override; + cts-override; + ri-override; }; Example with one clock: diff --git a/Documentation/devicetree/bindings/submitting-patches.txt b/Documentation/devicetree/bindings/submitting-patches.txt index 56742bc70218..7d44eae7ab0b 100644 --- a/Documentation/devicetree/bindings/submitting-patches.txt +++ b/Documentation/devicetree/bindings/submitting-patches.txt @@ -12,6 +12,9 @@ I. For patch submitters devicetree@vger.kernel.org + and Cc: the DT maintainers. Use scripts/get_maintainer.pl to identify + all of the DT maintainers. + 3) The Documentation/ portion of the patch should come in the series before the code implementing the binding. diff --git a/Documentation/devicetree/bindings/thermal/rcar-thermal.txt b/Documentation/devicetree/bindings/thermal/rcar-thermal.txt index 43404b197933..332e625f6ed0 100644 --- a/Documentation/devicetree/bindings/thermal/rcar-thermal.txt +++ b/Documentation/devicetree/bindings/thermal/rcar-thermal.txt @@ -4,7 +4,7 @@ Required properties: - compatible : "renesas,thermal-<soctype>", "renesas,rcar-thermal" as fallback. Examples with soctypes are: - - "renesas,thermal-r8a73a4" (R-Mobile AP6) + - "renesas,thermal-r8a73a4" (R-Mobile APE6) - "renesas,thermal-r8a7779" (R-Car H1) - "renesas,thermal-r8a7790" (R-Car H2) - "renesas,thermal-r8a7791" (R-Car M2-W) diff --git a/Documentation/devicetree/bindings/vendor-prefixes.txt b/Documentation/devicetree/bindings/vendor-prefixes.txt index 389ca1347a77..fae26d014aaf 100644 --- a/Documentation/devicetree/bindings/vendor-prefixes.txt +++ b/Documentation/devicetree/bindings/vendor-prefixes.txt @@ -20,6 +20,7 @@ amlogic Amlogic, Inc. ams AMS AG amstaos AMS-Taos Inc. apm Applied Micro Circuits Corporation (APM) +arasan Arasan Chip Systems arm ARM Ltd. armadeus ARMadeus Systems SARL asahi-kasei Asahi Kasei Corp. @@ -27,6 +28,7 @@ atmel Atmel Corporation auo AU Optronics Corporation avago Avago Technologies avic Shanghai AVIC Optoelectronics Co., Ltd. +axis Axis Communications AB bosch Bosch Sensortec GmbH brcm Broadcom Corporation buffalo Buffalo, Inc. diff --git a/Documentation/devicetree/bindings/watchdog/atmel-wdt.txt b/Documentation/devicetree/bindings/watchdog/atmel-wdt.txt index f90e294d7631..a4d869744f59 100644 --- a/Documentation/devicetree/bindings/watchdog/atmel-wdt.txt +++ b/Documentation/devicetree/bindings/watchdog/atmel-wdt.txt @@ -26,6 +26,11 @@ Optional properties: - atmel,disable : Should be present if you want to disable the watchdog. - atmel,idle-halt : Should be present if you want to stop the watchdog when entering idle state. + CAUTION: This property should be used with care, it actually makes the + watchdog not counting when the CPU is in idle state, therefore the + watchdog reset time depends on mean CPU usage and will not reset at all + if the CPU stop working while it is in idle state, which is probably + not what you want. - atmel,dbg-halt : Should be present if you want to stop the watchdog when entering debug state. diff --git a/Documentation/input/alps.txt b/Documentation/input/alps.txt index a63e5e013a8c..92ae734c00c3 100644 --- a/Documentation/input/alps.txt +++ b/Documentation/input/alps.txt @@ -114,6 +114,9 @@ ALPS Absolute Mode - Protocol Version 2 byte 4: 0 y6 y5 y4 y3 y2 y1 y0 byte 5: 0 z6 z5 z4 z3 z2 z1 z0 +Protocol Version 2 DualPoint devices send standard PS/2 mouse packets for +the DualPoint Stick. + Dualpoint device -- interleaved packet format --------------------------------------------- @@ -127,6 +130,11 @@ Dualpoint device -- interleaved packet format byte 7: 0 y6 y5 y4 y3 y2 y1 y0 byte 8: 0 z6 z5 z4 z3 z2 z1 z0 +Devices which use the interleaving format normally send standard PS/2 mouse +packets for the DualPoint Stick + ALPS Absolute Mode packets for the +touchpad, switching to the interleaved packet format when both the stick and +the touchpad are used at the same time. + ALPS Absolute Mode - Protocol Version 3 --------------------------------------- diff --git a/Documentation/input/event-codes.txt b/Documentation/input/event-codes.txt index c587a966413e..96705616f582 100644 --- a/Documentation/input/event-codes.txt +++ b/Documentation/input/event-codes.txt @@ -294,6 +294,12 @@ accordingly. This property does not affect kernel behavior. The kernel does not provide button emulation for such devices but treats them as any other INPUT_PROP_BUTTONPAD device. +INPUT_PROP_ACCELEROMETER +------------------------- +Directional axes on this device (absolute and/or relative x, y, z) represent +accelerometer data. All other axes retain their meaning. A device must not mix +regular directional axes and accelerometer axes on the same event node. + Guidelines: ========== The guidelines below ensure proper single-touch and multi-finger functionality. diff --git a/Documentation/input/multi-touch-protocol.txt b/Documentation/input/multi-touch-protocol.txt index 7b4f59c09ee2..b85d000faeb4 100644 --- a/Documentation/input/multi-touch-protocol.txt +++ b/Documentation/input/multi-touch-protocol.txt @@ -312,9 +312,12 @@ ABS_MT_TOOL_TYPE The type of approaching tool. A lot of kernel drivers cannot distinguish between different tool types, such as a finger or a pen. In such cases, the -event should be omitted. The protocol currently supports MT_TOOL_FINGER and -MT_TOOL_PEN [2]. For type B devices, this event is handled by input core; -drivers should instead use input_mt_report_slot_state(). +event should be omitted. The protocol currently supports MT_TOOL_FINGER, +MT_TOOL_PEN, and MT_TOOL_PALM [2]. For type B devices, this event is handled +by input core; drivers should instead use input_mt_report_slot_state(). +A contact's ABS_MT_TOOL_TYPE may change over time while still touching the +device, because the firmware may not be able to determine which tool is being +used when it first appears. ABS_MT_BLOB_ID diff --git a/Documentation/kernel-parameters.txt b/Documentation/kernel-parameters.txt index bfcb1a62a7b4..01aa47d3b6ab 100644 --- a/Documentation/kernel-parameters.txt +++ b/Documentation/kernel-parameters.txt @@ -1036,7 +1036,7 @@ bytes respectively. Such letter suffixes can also be entirely omitted. Format: {"off" | "on" | "skip[mbr]"} efi= [EFI] - Format: { "old_map", "nochunk", "noruntime" } + Format: { "old_map", "nochunk", "noruntime", "debug" } old_map [X86-64]: switch to the old ioremap-based EFI runtime services mapping. 32-bit still uses this one by default. @@ -1044,6 +1044,7 @@ bytes respectively. Such letter suffixes can also be entirely omitted. boot stub, as chunking can cause problems with some firmware implementations. noruntime : disable EFI runtime services support + debug: enable misc debug output efi_no_storage_paranoia [EFI; X86] Using this parameter you can use more than 50% of diff --git a/Documentation/power/suspend-and-interrupts.txt b/Documentation/power/suspend-and-interrupts.txt index 2f9c5a5fcb25..8afb29a8604a 100644 --- a/Documentation/power/suspend-and-interrupts.txt +++ b/Documentation/power/suspend-and-interrupts.txt @@ -40,8 +40,10 @@ but also to IPIs and to some other special-purpose interrupts. The IRQF_NO_SUSPEND flag is used to indicate that to the IRQ subsystem when requesting a special-purpose interrupt. It causes suspend_device_irqs() to -leave the corresponding IRQ enabled so as to allow the interrupt to work all -the time as expected. +leave the corresponding IRQ enabled so as to allow the interrupt to work as +expected during the suspend-resume cycle, but does not guarantee that the +interrupt will wake the system from a suspended state -- for such cases it is +necessary to use enable_irq_wake(). Note that the IRQF_NO_SUSPEND flag affects the entire IRQ and not just one user of it. Thus, if the IRQ is shared, all of the interrupt handlers installed @@ -110,8 +112,9 @@ any special interrupt handling logic for it to work. IRQF_NO_SUSPEND and enable_irq_wake() ------------------------------------- -There are no valid reasons to use both enable_irq_wake() and the IRQF_NO_SUSPEND -flag on the same IRQ. +There are very few valid reasons to use both enable_irq_wake() and the +IRQF_NO_SUSPEND flag on the same IRQ, and it is never valid to use both for the +same device. First of all, if the IRQ is not shared, the rules for handling IRQF_NO_SUSPEND interrupts (interrupt handlers are invoked after suspend_device_irqs()) are @@ -120,4 +123,13 @@ handlers are not invoked after suspend_device_irqs()). Second, both enable_irq_wake() and IRQF_NO_SUSPEND apply to entire IRQs and not to individual interrupt handlers, so sharing an IRQ between a system wakeup -interrupt source and an IRQF_NO_SUSPEND interrupt source does not make sense. +interrupt source and an IRQF_NO_SUSPEND interrupt source does not generally +make sense. + +In rare cases an IRQ can be shared between a wakeup device driver and an +IRQF_NO_SUSPEND user. In order for this to be safe, the wakeup device driver +must be able to discern spurious IRQs from genuine wakeup events (signalling +the latter to the core with pm_system_wakeup()), must use enable_irq_wake() to +ensure that the IRQ will function as a wakeup source, and must request the IRQ +with IRQF_COND_SUSPEND to tell the core that it meets these requirements. If +these requirements are not met, it is not valid to use IRQF_COND_SUSPEND. diff --git a/Documentation/rtc.txt b/Documentation/rtc.txt index 596b60c08b74..8446f1ea1410 100644 --- a/Documentation/rtc.txt +++ b/Documentation/rtc.txt @@ -204,266 +204,4 @@ Some common examples: * RTC_PIE_ON, RTC_PIE_OFF: These are also emulated by the generic code. -If all else fails, check out the rtc-test.c driver! - - --------------------- 8< ---------------- 8< ----------------------------- - -/* - * Real Time Clock Driver Test/Example Program - * - * Compile with: - * gcc -s -Wall -Wstrict-prototypes rtctest.c -o rtctest - * - * Copyright (C) 1996, Paul Gortmaker. - * - * Released under the GNU General Public License, version 2, - * included herein by reference. - * - */ - -#include <stdio.h> -#include <linux/rtc.h> -#include <sys/ioctl.h> -#include <sys/time.h> -#include <sys/types.h> -#include <fcntl.h> -#include <unistd.h> -#include <stdlib.h> -#include <errno.h> - - -/* - * This expects the new RTC class driver framework, working with - * clocks that will often not be clones of what the PC-AT had. - * Use the command line to specify another RTC if you need one. - */ -static const char default_rtc[] = "/dev/rtc0"; - - -int main(int argc, char **argv) -{ - int i, fd, retval, irqcount = 0; - unsigned long tmp, data; - struct rtc_time rtc_tm; - const char *rtc = default_rtc; - - switch (argc) { - case 2: - rtc = argv[1]; - /* FALLTHROUGH */ - case 1: - break; - default: - fprintf(stderr, "usage: rtctest [rtcdev]\n"); - return 1; - } - - fd = open(rtc, O_RDONLY); - - if (fd == -1) { - perror(rtc); - exit(errno); - } - - fprintf(stderr, "\n\t\t\tRTC Driver Test Example.\n\n"); - - /* Turn on update interrupts (one per second) */ - retval = ioctl(fd, RTC_UIE_ON, 0); - if (retval == -1) { - if (errno == ENOTTY) { - fprintf(stderr, - "\n...Update IRQs not supported.\n"); - goto test_READ; - } - perror("RTC_UIE_ON ioctl"); - exit(errno); - } - - fprintf(stderr, "Counting 5 update (1/sec) interrupts from reading %s:", - rtc); - fflush(stderr); - for (i=1; i<6; i++) { - /* This read will block */ - retval = read(fd, &data, sizeof(unsigned long)); - if (retval == -1) { - perror("read"); - exit(errno); - } - fprintf(stderr, " %d",i); - fflush(stderr); - irqcount++; - } - - fprintf(stderr, "\nAgain, from using select(2) on /dev/rtc:"); - fflush(stderr); - for (i=1; i<6; i++) { - struct timeval tv = {5, 0}; /* 5 second timeout on select */ - fd_set readfds; - - FD_ZERO(&readfds); - FD_SET(fd, &readfds); - /* The select will wait until an RTC interrupt happens. */ - retval = select(fd+1, &readfds, NULL, NULL, &tv); - if (retval == -1) { - perror("select"); - exit(errno); - } - /* This read won't block unlike the select-less case above. */ - retval = read(fd, &data, sizeof(unsigned long)); - if (retval == -1) { - perror("read"); - exit(errno); - } - fprintf(stderr, " %d",i); - fflush(stderr); - irqcount++; - } - - /* Turn off update interrupts */ - retval = ioctl(fd, RTC_UIE_OFF, 0); - if (retval == -1) { - perror("RTC_UIE_OFF ioctl"); - exit(errno); - } - -test_READ: - /* Read the RTC time/date */ - retval = ioctl(fd, RTC_RD_TIME, &rtc_tm); - if (retval == -1) { - perror("RTC_RD_TIME ioctl"); - exit(errno); - } - - fprintf(stderr, "\n\nCurrent RTC date/time is %d-%d-%d, %02d:%02d:%02d.\n", - rtc_tm.tm_mday, rtc_tm.tm_mon + 1, rtc_tm.tm_year + 1900, - rtc_tm.tm_hour, rtc_tm.tm_min, rtc_tm.tm_sec); - - /* Set the alarm to 5 sec in the future, and check for rollover */ - rtc_tm.tm_sec += 5; - if (rtc_tm.tm_sec >= 60) { - rtc_tm.tm_sec %= 60; - rtc_tm.tm_min++; - } - if (rtc_tm.tm_min == 60) { - rtc_tm.tm_min = 0; - rtc_tm.tm_hour++; - } - if (rtc_tm.tm_hour == 24) - rtc_tm.tm_hour = 0; - - retval = ioctl(fd, RTC_ALM_SET, &rtc_tm); - if (retval == -1) { - if (errno == ENOTTY) { - fprintf(stderr, - "\n...Alarm IRQs not supported.\n"); - goto test_PIE; - } - perror("RTC_ALM_SET ioctl"); - exit(errno); - } - - /* Read the current alarm settings */ - retval = ioctl(fd, RTC_ALM_READ, &rtc_tm); - if (retval == -1) { - perror("RTC_ALM_READ ioctl"); - exit(errno); - } - - fprintf(stderr, "Alarm time now set to %02d:%02d:%02d.\n", - rtc_tm.tm_hour, rtc_tm.tm_min, rtc_tm.tm_sec); - - /* Enable alarm interrupts */ - retval = ioctl(fd, RTC_AIE_ON, 0); - if (retval == -1) { - perror("RTC_AIE_ON ioctl"); - exit(errno); - } - - fprintf(stderr, "Waiting 5 seconds for alarm..."); - fflush(stderr); - /* This blocks until the alarm ring causes an interrupt */ - retval = read(fd, &data, sizeof(unsigned long)); - if (retval == -1) { - perror("read"); - exit(errno); - } - irqcount++; - fprintf(stderr, " okay. Alarm rang.\n"); - - /* Disable alarm interrupts */ - retval = ioctl(fd, RTC_AIE_OFF, 0); - if (retval == -1) { - perror("RTC_AIE_OFF ioctl"); - exit(errno); - } - -test_PIE: - /* Read periodic IRQ rate */ - retval = ioctl(fd, RTC_IRQP_READ, &tmp); - if (retval == -1) { - /* not all RTCs support periodic IRQs */ - if (errno == ENOTTY) { - fprintf(stderr, "\nNo periodic IRQ support\n"); - goto done; - } - perror("RTC_IRQP_READ ioctl"); - exit(errno); - } - fprintf(stderr, "\nPeriodic IRQ rate is %ldHz.\n", tmp); - - fprintf(stderr, "Counting 20 interrupts at:"); - fflush(stderr); - - /* The frequencies 128Hz, 256Hz, ... 8192Hz are only allowed for root. */ - for (tmp=2; tmp<=64; tmp*=2) { - - retval = ioctl(fd, RTC_IRQP_SET, tmp); - if (retval == -1) { - /* not all RTCs can change their periodic IRQ rate */ - if (errno == ENOTTY) { - fprintf(stderr, - "\n...Periodic IRQ rate is fixed\n"); - goto done; - } - perror("RTC_IRQP_SET ioctl"); - exit(errno); - } - - fprintf(stderr, "\n%ldHz:\t", tmp); - fflush(stderr); - - /* Enable periodic interrupts */ - retval = ioctl(fd, RTC_PIE_ON, 0); - if (retval == -1) { - perror("RTC_PIE_ON ioctl"); - exit(errno); - } - - for (i=1; i<21; i++) { - /* This blocks */ - retval = read(fd, &data, sizeof(unsigned long)); - if (retval == -1) { - perror("read"); - exit(errno); - } - fprintf(stderr, " %d",i); - fflush(stderr); - irqcount++; - } - - /* Disable periodic interrupts */ - retval = ioctl(fd, RTC_PIE_OFF, 0); - if (retval == -1) { - perror("RTC_PIE_OFF ioctl"); - exit(errno); - } - } - -done: - fprintf(stderr, "\n\n\t\t\t *** Test complete ***\n"); - - close(fd); - - return 0; -} +If all else fails, check out the tools/testing/selftests/timers/rtctest.c test! diff --git a/Documentation/virtual/kvm/api.txt b/Documentation/virtual/kvm/api.txt index b112efc816f1..bc9f6fe44e27 100644 --- a/Documentation/virtual/kvm/api.txt +++ b/Documentation/virtual/kvm/api.txt @@ -997,7 +997,7 @@ for vm-wide capabilities. 4.38 KVM_GET_MP_STATE Capability: KVM_CAP_MP_STATE -Architectures: x86, s390 +Architectures: x86, s390, arm, arm64 Type: vcpu ioctl Parameters: struct kvm_mp_state (out) Returns: 0 on success; -1 on error @@ -1011,7 +1011,7 @@ uniprocessor guests). Possible values are: - - KVM_MP_STATE_RUNNABLE: the vcpu is currently running [x86] + - KVM_MP_STATE_RUNNABLE: the vcpu is currently running [x86,arm/arm64] - KVM_MP_STATE_UNINITIALIZED: the vcpu is an application processor (AP) which has not yet received an INIT signal [x86] - KVM_MP_STATE_INIT_RECEIVED: the vcpu has received an INIT signal, and is @@ -1020,7 +1020,7 @@ Possible values are: is waiting for an interrupt [x86] - KVM_MP_STATE_SIPI_RECEIVED: the vcpu has just received a SIPI (vector accessible via KVM_GET_VCPU_EVENTS) [x86] - - KVM_MP_STATE_STOPPED: the vcpu is stopped [s390] + - KVM_MP_STATE_STOPPED: the vcpu is stopped [s390,arm/arm64] - KVM_MP_STATE_CHECK_STOP: the vcpu is in a special error state [s390] - KVM_MP_STATE_OPERATING: the vcpu is operating (running or halted) [s390] @@ -1031,11 +1031,15 @@ On x86, this ioctl is only useful after KVM_CREATE_IRQCHIP. Without an in-kernel irqchip, the multiprocessing state must be maintained by userspace on these architectures. +For arm/arm64: + +The only states that are valid are KVM_MP_STATE_STOPPED and +KVM_MP_STATE_RUNNABLE which reflect if the vcpu is paused or not. 4.39 KVM_SET_MP_STATE Capability: KVM_CAP_MP_STATE -Architectures: x86, s390 +Architectures: x86, s390, arm, arm64 Type: vcpu ioctl Parameters: struct kvm_mp_state (in) Returns: 0 on success; -1 on error @@ -1047,6 +1051,10 @@ On x86, this ioctl is only useful after KVM_CREATE_IRQCHIP. Without an in-kernel irqchip, the multiprocessing state must be maintained by userspace on these architectures. +For arm/arm64: + +The only states that are valid are KVM_MP_STATE_STOPPED and +KVM_MP_STATE_RUNNABLE which reflect if the vcpu should be paused or not. 4.40 KVM_SET_IDENTITY_MAP_ADDR @@ -1967,15 +1975,25 @@ registers, find a list below: MIPS | KVM_REG_MIPS_CP0_STATUS | 32 MIPS | KVM_REG_MIPS_CP0_CAUSE | 32 MIPS | KVM_REG_MIPS_CP0_EPC | 64 + MIPS | KVM_REG_MIPS_CP0_PRID | 32 MIPS | KVM_REG_MIPS_CP0_CONFIG | 32 MIPS | KVM_REG_MIPS_CP0_CONFIG1 | 32 MIPS | KVM_REG_MIPS_CP0_CONFIG2 | 32 MIPS | KVM_REG_MIPS_CP0_CONFIG3 | 32 + MIPS | KVM_REG_MIPS_CP0_CONFIG4 | 32 + MIPS | KVM_REG_MIPS_CP0_CONFIG5 | 32 MIPS | KVM_REG_MIPS_CP0_CONFIG7 | 32 MIPS | KVM_REG_MIPS_CP0_ERROREPC | 64 MIPS | KVM_REG_MIPS_COUNT_CTL | 64 MIPS | KVM_REG_MIPS_COUNT_RESUME | 64 MIPS | KVM_REG_MIPS_COUNT_HZ | 64 + MIPS | KVM_REG_MIPS_FPR_32(0..31) | 32 + MIPS | KVM_REG_MIPS_FPR_64(0..31) | 64 + MIPS | KVM_REG_MIPS_VEC_128(0..31) | 128 + MIPS | KVM_REG_MIPS_FCR_IR | 32 + MIPS | KVM_REG_MIPS_FCR_CSR | 32 + MIPS | KVM_REG_MIPS_MSA_IR | 32 + MIPS | KVM_REG_MIPS_MSA_CSR | 32 ARM registers are mapped using the lower 32 bits. The upper 16 of that is the register group type, or coprocessor number: @@ -2029,6 +2047,25 @@ patterns depending on whether they're 32-bit or 64-bit registers: MIPS KVM control registers (see above) have the following id bit patterns: 0x7030 0000 0002 <reg:16> +MIPS FPU registers (see KVM_REG_MIPS_FPR_{32,64}() above) have the following +id bit patterns depending on the size of the register being accessed. They are +always accessed according to the current guest FPU mode (Status.FR and +Config5.FRE), i.e. as the guest would see them, and they become unpredictable +if the guest FPU mode is changed. MIPS SIMD Architecture (MSA) vector +registers (see KVM_REG_MIPS_VEC_128() above) have similar patterns as they +overlap the FPU registers: + 0x7020 0000 0003 00 <0:3> <reg:5> (32-bit FPU registers) + 0x7030 0000 0003 00 <0:3> <reg:5> (64-bit FPU registers) + 0x7040 0000 0003 00 <0:3> <reg:5> (128-bit MSA vector registers) + +MIPS FPU control registers (see KVM_REG_MIPS_FCR_{IR,CSR} above) have the +following id bit patterns: + 0x7020 0000 0003 01 <0:3> <reg:5> + +MIPS MSA control registers (see KVM_REG_MIPS_MSA_{IR,CSR} above) have the +following id bit patterns: + 0x7020 0000 0003 02 <0:3> <reg:5> + 4.69 KVM_GET_ONE_REG @@ -2234,7 +2271,7 @@ into the hash PTE second double word). 4.75 KVM_IRQFD Capability: KVM_CAP_IRQFD -Architectures: x86 s390 +Architectures: x86 s390 arm arm64 Type: vm ioctl Parameters: struct kvm_irqfd (in) Returns: 0 on success, -1 on error @@ -2260,6 +2297,10 @@ Note that closing the resamplefd is not sufficient to disable the irqfd. The KVM_IRQFD_FLAG_RESAMPLE is only necessary on assignment and need not be specified with KVM_IRQFD_FLAG_DEASSIGN. +On ARM/ARM64, the gsi field in the kvm_irqfd struct specifies the Shared +Peripheral Interrupt (SPI) index, such that the GIC interrupt ID is +given by gsi + 32. + 4.76 KVM_PPC_ALLOCATE_HTAB Capability: KVM_CAP_PPC_ALLOC_HTAB @@ -2716,6 +2757,227 @@ The fields in each entry are defined as follows: eax, ebx, ecx, edx: the values returned by the cpuid instruction for this function/index combination +4.89 KVM_S390_MEM_OP + +Capability: KVM_CAP_S390_MEM_OP +Architectures: s390 +Type: vcpu ioctl +Parameters: struct kvm_s390_mem_op (in) +Returns: = 0 on success, + < 0 on generic error (e.g. -EFAULT or -ENOMEM), + > 0 if an exception occurred while walking the page tables + +Read or write data from/to the logical (virtual) memory of a VPCU. + +Parameters are specified via the following structure: + +struct kvm_s390_mem_op { + __u64 gaddr; /* the guest address */ + __u64 flags; /* flags */ + __u32 size; /* amount of bytes */ + __u32 op; /* type of operation */ + __u64 buf; /* buffer in userspace */ + __u8 ar; /* the access register number */ + __u8 reserved[31]; /* should be set to 0 */ +}; + +The type of operation is specified in the "op" field. It is either +KVM_S390_MEMOP_LOGICAL_READ for reading from logical memory space or +KVM_S390_MEMOP_LOGICAL_WRITE for writing to logical memory space. The +KVM_S390_MEMOP_F_CHECK_ONLY flag can be set in the "flags" field to check +whether the corresponding memory access would create an access exception +(without touching the data in the memory at the destination). In case an +access exception occurred while walking the MMU tables of the guest, the +ioctl returns a positive error number to indicate the type of exception. +This exception is also raised directly at the corresponding VCPU if the +flag KVM_S390_MEMOP_F_INJECT_EXCEPTION is set in the "flags" field. + +The start address of the memory region has to be specified in the "gaddr" +field, and the length of the region in the "size" field. "buf" is the buffer +supplied by the userspace application where the read data should be written +to for KVM_S390_MEMOP_LOGICAL_READ, or where the data that should be written +is stored for a KVM_S390_MEMOP_LOGICAL_WRITE. "buf" is unused and can be NULL +when KVM_S390_MEMOP_F_CHECK_ONLY is specified. "ar" designates the access +register number to be used. + +The "reserved" field is meant for future extensions. It is not used by +KVM with the currently defined set of flags. + +4.90 KVM_S390_GET_SKEYS + +Capability: KVM_CAP_S390_SKEYS +Architectures: s390 +Type: vm ioctl +Parameters: struct kvm_s390_skeys +Returns: 0 on success, KVM_S390_GET_KEYS_NONE if guest is not using storage + keys, negative value on error + +This ioctl is used to get guest storage key values on the s390 +architecture. The ioctl takes parameters via the kvm_s390_skeys struct. + +struct kvm_s390_skeys { + __u64 start_gfn; + __u64 count; + __u64 skeydata_addr; + __u32 flags; + __u32 reserved[9]; +}; + +The start_gfn field is the number of the first guest frame whose storage keys +you want to get. + +The count field is the number of consecutive frames (starting from start_gfn) +whose storage keys to get. The count field must be at least 1 and the maximum +allowed value is defined as KVM_S390_SKEYS_ALLOC_MAX. Values outside this range +will cause the ioctl to return -EINVAL. + +The skeydata_addr field is the address to a buffer large enough to hold count +bytes. This buffer will be filled with storage key data by the ioctl. + +4.91 KVM_S390_SET_SKEYS + +Capability: KVM_CAP_S390_SKEYS +Architectures: s390 +Type: vm ioctl +Parameters: struct kvm_s390_skeys +Returns: 0 on success, negative value on error + +This ioctl is used to set guest storage key values on the s390 +architecture. The ioctl takes parameters via the kvm_s390_skeys struct. +See section on KVM_S390_GET_SKEYS for struct definition. + +The start_gfn field is the number of the first guest frame whose storage keys +you want to set. + +The count field is the number of consecutive frames (starting from start_gfn) +whose storage keys to get. The count field must be at least 1 and the maximum +allowed value is defined as KVM_S390_SKEYS_ALLOC_MAX. Values outside this range +will cause the ioctl to return -EINVAL. + +The skeydata_addr field is the address to a buffer containing count bytes of +storage keys. Each byte in the buffer will be set as the storage key for a +single frame starting at start_gfn for count frames. + +Note: If any architecturally invalid key value is found in the given data then +the ioctl will return -EINVAL. + +4.92 KVM_S390_IRQ + +Capability: KVM_CAP_S390_INJECT_IRQ +Architectures: s390 +Type: vcpu ioctl +Parameters: struct kvm_s390_irq (in) +Returns: 0 on success, -1 on error +Errors: + EINVAL: interrupt type is invalid + type is KVM_S390_SIGP_STOP and flag parameter is invalid value + type is KVM_S390_INT_EXTERNAL_CALL and code is bigger + than the maximum of VCPUs + EBUSY: type is KVM_S390_SIGP_SET_PREFIX and vcpu is not stopped + type is KVM_S390_SIGP_STOP and a stop irq is already pending + type is KVM_S390_INT_EXTERNAL_CALL and an external call interrupt + is already pending + +Allows to inject an interrupt to the guest. + +Using struct kvm_s390_irq as a parameter allows +to inject additional payload which is not +possible via KVM_S390_INTERRUPT. + +Interrupt parameters are passed via kvm_s390_irq: + +struct kvm_s390_irq { + __u64 type; + union { + struct kvm_s390_io_info io; + struct kvm_s390_ext_info ext; + struct kvm_s390_pgm_info pgm; + struct kvm_s390_emerg_info emerg; + struct kvm_s390_extcall_info extcall; + struct kvm_s390_prefix_info prefix; + struct kvm_s390_stop_info stop; + struct kvm_s390_mchk_info mchk; + char reserved[64]; + } u; +}; + +type can be one of the following: + +KVM_S390_SIGP_STOP - sigp stop; parameter in .stop +KVM_S390_PROGRAM_INT - program check; parameters in .pgm +KVM_S390_SIGP_SET_PREFIX - sigp set prefix; parameters in .prefix +KVM_S390_RESTART - restart; no parameters +KVM_S390_INT_CLOCK_COMP - clock comparator interrupt; no parameters +KVM_S390_INT_CPU_TIMER - CPU timer interrupt; no parameters +KVM_S390_INT_EMERGENCY - sigp emergency; parameters in .emerg +KVM_S390_INT_EXTERNAL_CALL - sigp external call; parameters in .extcall +KVM_S390_MCHK - machine check interrupt; parameters in .mchk + + +Note that the vcpu ioctl is asynchronous to vcpu execution. + +4.94 KVM_S390_GET_IRQ_STATE + +Capability: KVM_CAP_S390_IRQ_STATE +Architectures: s390 +Type: vcpu ioctl +Parameters: struct kvm_s390_irq_state (out) +Returns: >= number of bytes copied into buffer, + -EINVAL if buffer size is 0, + -ENOBUFS if buffer size is too small to fit all pending interrupts, + -EFAULT if the buffer address was invalid + +This ioctl allows userspace to retrieve the complete state of all currently +pending interrupts in a single buffer. Use cases include migration +and introspection. The parameter structure contains the address of a +userspace buffer and its length: + +struct kvm_s390_irq_state { + __u64 buf; + __u32 flags; + __u32 len; + __u32 reserved[4]; +}; + +Userspace passes in the above struct and for each pending interrupt a +struct kvm_s390_irq is copied to the provided buffer. + +If -ENOBUFS is returned the buffer provided was too small and userspace +may retry with a bigger buffer. + +4.95 KVM_S390_SET_IRQ_STATE + +Capability: KVM_CAP_S390_IRQ_STATE +Architectures: s390 +Type: vcpu ioctl +Parameters: struct kvm_s390_irq_state (in) +Returns: 0 on success, + -EFAULT if the buffer address was invalid, + -EINVAL for an invalid buffer length (see below), + -EBUSY if there were already interrupts pending, + errors occurring when actually injecting the + interrupt. See KVM_S390_IRQ. + +This ioctl allows userspace to set the complete state of all cpu-local +interrupts currently pending for the vcpu. It is intended for restoring +interrupt state after a migration. The input parameter is a userspace buffer +containing a struct kvm_s390_irq_state: + +struct kvm_s390_irq_state { + __u64 buf; + __u32 len; + __u32 pad; +}; + +The userspace memory referenced by buf contains a struct kvm_s390_irq +for each interrupt to be injected into the guest. +If one of the interrupts could not be injected for some reason the +ioctl aborts. + +len must be a multiple of sizeof(struct kvm_s390_irq). It must be > 0 +and it must not exceed (max_vcpus + 32) * sizeof(struct kvm_s390_irq), +which is the maximum number of possibly pending cpu-local interrupts. + 5. The kvm_run structure ------------------------ @@ -3189,6 +3451,31 @@ Parameters: none This capability enables the in-kernel irqchip for s390. Please refer to "4.24 KVM_CREATE_IRQCHIP" for details. +6.9 KVM_CAP_MIPS_FPU + +Architectures: mips +Target: vcpu +Parameters: args[0] is reserved for future use (should be 0). + +This capability allows the use of the host Floating Point Unit by the guest. It +allows the Config1.FP bit to be set to enable the FPU in the guest. Once this is +done the KVM_REG_MIPS_FPR_* and KVM_REG_MIPS_FCR_* registers can be accessed +(depending on the current guest FPU register mode), and the Status.FR, +Config5.FRE bits are accessible via the KVM API and also from the guest, +depending on them being supported by the FPU. + +6.10 KVM_CAP_MIPS_MSA + +Architectures: mips +Target: vcpu +Parameters: args[0] is reserved for future use (should be 0). + +This capability allows the use of the MIPS SIMD Architecture (MSA) by the guest. +It allows the Config3.MSAP bit to be set to enable the use of MSA by the guest. +Once this is done the KVM_REG_MIPS_VEC_* and KVM_REG_MIPS_MSA_* registers can be +accessed, and the Config5.MSAEn bit is accessible via the KVM API and also from +the guest. + 7. Capabilities that can be enabled on VMs ------------------------------------------ @@ -3248,3 +3535,41 @@ All other orders will be handled completely in user space. Only privileged operation exceptions will be checked for in the kernel (or even in the hardware prior to interception). If this capability is not enabled, the old way of handling SIGP orders is used (partially in kernel and user space). + +7.3 KVM_CAP_S390_VECTOR_REGISTERS + +Architectures: s390 +Parameters: none +Returns: 0 on success, negative value on error + +Allows use of the vector registers introduced with z13 processor, and +provides for the synchronization between host and user space. Will +return -EINVAL if the machine does not support vectors. + +7.4 KVM_CAP_S390_USER_STSI + +Architectures: s390 +Parameters: none + +This capability allows post-handlers for the STSI instruction. After +initial handling in the kernel, KVM exits to user space with +KVM_EXIT_S390_STSI to allow user space to insert further data. + +Before exiting to userspace, kvm handlers should fill in s390_stsi field of +vcpu->run: +struct { + __u64 addr; + __u8 ar; + __u8 reserved; + __u8 fc; + __u8 sel1; + __u16 sel2; +} s390_stsi; + +@addr - guest address of STSI SYSIB +@fc - function code +@sel1 - selector 1 +@sel2 - selector 2 +@ar - access register number + +KVM handlers should exit to userspace with rc = -EREMOTE. diff --git a/Documentation/virtual/kvm/devices/s390_flic.txt b/Documentation/virtual/kvm/devices/s390_flic.txt index 4ceef53164b0..d1ad9d5cae46 100644 --- a/Documentation/virtual/kvm/devices/s390_flic.txt +++ b/Documentation/virtual/kvm/devices/s390_flic.txt @@ -27,6 +27,9 @@ Groups: Copies all floating interrupts into a buffer provided by userspace. When the buffer is too small it returns -ENOMEM, which is the indication for userspace to try again with a bigger buffer. + -ENOBUFS is returned when the allocation of a kernelspace buffer has + failed. + -EFAULT is returned when copying data to userspace failed. All interrupts remain pending, i.e. are not deleted from the list of currently pending interrupts. attr->addr contains the userspace address of the buffer into which all diff --git a/Documentation/x86/boot.txt b/Documentation/x86/boot.txt index a75e3adaa39d..88b85899d309 100644 --- a/Documentation/x86/boot.txt +++ b/Documentation/x86/boot.txt @@ -406,6 +406,12 @@ Protocol: 2.00+ - If 0, the protected-mode code is loaded at 0x10000. - If 1, the protected-mode code is loaded at 0x100000. + Bit 1 (kernel internal): ALSR_FLAG + - Used internally by the compressed kernel to communicate + KASLR status to kernel proper. + If 1, KASLR enabled. + If 0, KASLR disabled. + Bit 5 (write): QUIET_FLAG - If 0, print early messages. - If 1, suppress early messages. diff --git a/MAINTAINERS b/MAINTAINERS index eaf999638a65..b84686826b23 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -637,8 +637,7 @@ F: drivers/gpu/drm/radeon/radeon_kfd.h F: include/uapi/linux/kfd_ioctl.h AMD MICROCODE UPDATE SUPPORT -M: Andreas Herrmann <herrmann.der.user@googlemail.com> -L: amd64-microcode@amd64.org +M: Borislav Petkov <bp@alien8.de> S: Maintained F: arch/x86/kernel/cpu/microcode/amd* @@ -1030,6 +1029,16 @@ F: arch/arm/mach-mxs/ F: arch/arm/boot/dts/imx* F: arch/arm/configs/imx*_defconfig +ARM/FREESCALE VYBRID ARM ARCHITECTURE +M: Shawn Guo <shawn.guo@linaro.org> +M: Sascha Hauer <kernel@pengutronix.de> +R: Stefan Agner <stefan@agner.ch> +L: linux-arm-kernel@lists.infradead.org (moderated for non-subscribers) +S: Maintained +T: git git://git.kernel.org/pub/scm/linux/kernel/git/shawnguo/linux.git +F: arch/arm/mach-imx/*vf610* +F: arch/arm/boot/dts/vf* + ARM/GLOMATION GESBC9312SX MACHINE SUPPORT M: Lennert Buytenhek <kernel@wantstofly.org> L: linux-arm-kernel@lists.infradead.org (moderated for non-subscribers) @@ -1176,7 +1185,7 @@ M: Sebastian Hesselbarth <sebastian.hesselbarth@gmail.com> L: linux-arm-kernel@lists.infradead.org (moderated for non-subscribers) S: Maintained F: arch/arm/mach-mvebu/ -F: drivers/rtc/armada38x-rtc +F: drivers/rtc/rtc-armada38x.c ARM/Marvell Berlin SoC support M: Sebastian Hesselbarth <sebastian.hesselbarth@gmail.com> @@ -1188,6 +1197,7 @@ ARM/Marvell Dove/MV78xx0/Orion SOC support M: Jason Cooper <jason@lakedaemon.net> M: Andrew Lunn <andrew@lunn.ch> M: Sebastian Hesselbarth <sebastian.hesselbarth@gmail.com> +M: Gregory Clement <gregory.clement@free-electrons.com> L: linux-arm-kernel@lists.infradead.org (moderated for non-subscribers) S: Maintained F: arch/arm/mach-dove/ @@ -1351,6 +1361,7 @@ F: drivers/i2c/busses/i2c-rk3x.c F: drivers/*/*rockchip* F: drivers/*/*/*rockchip* F: sound/soc/rockchip/ +N: rockchip ARM/SAMSUNG EXYNOS ARM ARCHITECTURES M: Kukjin Kim <kgene@kernel.org> @@ -1664,8 +1675,8 @@ F: drivers/misc/eeprom/at24.c F: include/linux/platform_data/at24.h ATA OVER ETHERNET (AOE) DRIVER -M: "Ed L. Cashin" <ecashin@coraid.com> -W: http://support.coraid.com/support/linux +M: "Ed L. Cashin" <ed.cashin@acm.org> +W: http://www.openaoe.org/ S: Supported F: Documentation/aoe/ F: drivers/block/aoe/ @@ -1730,7 +1741,7 @@ S: Maintained F: drivers/net/ethernet/atheros/ ATM -M: Chas Williams <chas@cmf.nrl.navy.mil> +M: Chas Williams <3chas3@gmail.com> L: linux-atm-general@lists.sourceforge.net (moderated for non-subscribers) L: netdev@vger.kernel.org W: http://linux-atm.sourceforge.net @@ -2065,7 +2076,7 @@ F: include/net/bluetooth/ BONDING DRIVER M: Jay Vosburgh <j.vosburgh@gmail.com> M: Veaceslav Falico <vfalico@gmail.com> -M: Andy Gospodarek <andy@greyhouse.net> +M: Andy Gospodarek <gospo@cumulusnetworks.com> L: netdev@vger.kernel.org W: http://sourceforge.net/projects/bonding/ S: Supported @@ -2107,7 +2118,6 @@ F: drivers/net/ethernet/broadcom/bnx2x/ BROADCOM BCM281XX/BCM11XXX/BCM216XX ARM ARCHITECTURE M: Christian Daudt <bcm@fixthebug.org> -M: Matt Porter <mporter@linaro.org> M: Florian Fainelli <f.fainelli@gmail.com> L: bcm-kernel-feedback-list@broadcom.com T: git git://github.com/broadcom/mach-bcm @@ -2369,8 +2379,9 @@ F: arch/x86/include/asm/tce.h CAN NETWORK LAYER M: Oliver Hartkopp <socketcan@hartkopp.net> +M: Marc Kleine-Budde <mkl@pengutronix.de> L: linux-can@vger.kernel.org -W: http://gitorious.org/linux-can +W: https://github.com/linux-can T: git git://git.kernel.org/pub/scm/linux/kernel/git/mkl/linux-can.git T: git git://git.kernel.org/pub/scm/linux/kernel/git/mkl/linux-can-next.git S: Maintained @@ -2386,7 +2397,7 @@ CAN NETWORK DRIVERS M: Wolfgang Grandegger <wg@grandegger.com> M: Marc Kleine-Budde <mkl@pengutronix.de> L: linux-can@vger.kernel.org -W: http://gitorious.org/linux-can +W: https://github.com/linux-can T: git git://git.kernel.org/pub/scm/linux/kernel/git/mkl/linux-can.git T: git git://git.kernel.org/pub/scm/linux/kernel/git/mkl/linux-can-next.git S: Maintained @@ -3241,6 +3252,13 @@ S: Maintained F: Documentation/hwmon/dme1737 F: drivers/hwmon/dme1737.c +DMI/SMBIOS SUPPORT +M: Jean Delvare <jdelvare@suse.de> +S: Maintained +F: drivers/firmware/dmi-id.c +F: drivers/firmware/dmi_scan.c +F: include/linux/dmi.h + DOCKING STATION DRIVER M: Shaohua Li <shaohua.li@intel.com> L: linux-acpi@vger.kernel.org @@ -5076,7 +5094,7 @@ S: Supported F: drivers/platform/x86/intel_menlow.c INTEL IA32 MICROCODE UPDATE SUPPORT -M: Tigran Aivazian <tigran@aivazian.fsnet.co.uk> +M: Borislav Petkov <bp@alien8.de> S: Maintained F: arch/x86/kernel/cpu/microcode/core* F: arch/x86/kernel/cpu/microcode/intel* @@ -5117,22 +5135,21 @@ M: Deepak Saxena <dsaxena@plexity.net> S: Maintained F: drivers/char/hw_random/ixp4xx-rng.c -INTEL ETHERNET DRIVERS (e100/e1000/e1000e/fm10k/igb/igbvf/ixgb/ixgbe/ixgbevf/i40e/i40evf) +INTEL ETHERNET DRIVERS M: Jeff Kirsher <jeffrey.t.kirsher@intel.com> -M: Jesse Brandeburg <jesse.brandeburg@intel.com> -M: Bruce Allan <bruce.w.allan@intel.com> -M: Carolyn Wyborny <carolyn.wyborny@intel.com> -M: Don Skidmore <donald.c.skidmore@intel.com> -M: Greg Rose <gregory.v.rose@intel.com> -M: Matthew Vick <matthew.vick@intel.com> -M: John Ronciak <john.ronciak@intel.com> -M: Mitch Williams <mitch.a.williams@intel.com> -M: Linux NICS <linux.nics@intel.com> -L: e1000-devel@lists.sourceforge.net +R: Jesse Brandeburg <jesse.brandeburg@intel.com> +R: Shannon Nelson <shannon.nelson@intel.com> +R: Carolyn Wyborny <carolyn.wyborny@intel.com> +R: Don Skidmore <donald.c.skidmore@intel.com> +R: Matthew Vick <matthew.vick@intel.com> +R: John Ronciak <john.ronciak@intel.com> +R: Mitch Williams <mitch.a.williams@intel.com> +L: intel-wired-lan@lists.osuosl.org W: http://www.intel.com/support/feedback.htm W: http://e1000.sourceforge.net/ -T: git git://git.kernel.org/pub/scm/linux/kernel/git/jkirsher/net.git -T: git git://git.kernel.org/pub/scm/linux/kernel/git/jkirsher/net-next.git +Q: http://patchwork.ozlabs.org/project/intel-wired-lan/list/ +T: git git://git.kernel.org/pub/scm/linux/kernel/git/jkirsher/net-queue.git +T: git git://git.kernel.org/pub/scm/linux/kernel/git/jkirsher/next-queue.git S: Supported F: Documentation/networking/e100.txt F: Documentation/networking/e1000.txt @@ -5574,6 +5591,8 @@ S: Supported F: Documentation/*/kvm*.txt F: Documentation/virtual/kvm/ F: arch/*/kvm/ +F: arch/x86/kernel/kvm.c +F: arch/x86/kernel/kvmclock.c F: arch/*/include/asm/kvm* F: include/linux/kvm* F: include/uapi/linux/kvm* @@ -8480,6 +8499,14 @@ S: Supported L: netdev@vger.kernel.org F: drivers/net/ethernet/samsung/sxgbe/ +SAMSUNG THERMAL DRIVER +M: Lukasz Majewski <l.majewski@samsung.com> +L: linux-pm@vger.kernel.org +L: linux-samsung-soc@vger.kernel.org +S: Supported +T: https://github.com/lmajewski/linux-samsung-thermal.git +F: drivers/thermal/samsung/ + SAMSUNG USB2 PHY DRIVER M: Kamil Debski <k.debski@samsung.com> L: linux-kernel@vger.kernel.org @@ -8534,6 +8561,7 @@ F: include/uapi/linux/timex.h F: kernel/time/clocksource.c F: kernel/time/time*.c F: kernel/time/ntp.c +F: tools/testing/selftests/timers/ SC1200 WDT DRIVER M: Zwane Mwaikambo <zwanem@gmail.com> @@ -10188,6 +10216,13 @@ S: Maintained F: Documentation/usb/ohci.txt F: drivers/usb/host/ohci* +USB OTG FSM (Finite State Machine) +M: Peter Chen <Peter.Chen@freescale.com> +T: git git://github.com/hzpeterchen/linux-usb.git +L: linux-usb@vger.kernel.org +S: Maintained +F: drivers/usb/common/usb-otg-fsm.c + USB OVER IP DRIVER M: Valentina Manea <valentina.manea.m@gmail.com> M: Shuah Khan <shuah.kh@samsung.com> @@ -1,7 +1,7 @@ VERSION = 4 PATCHLEVEL = 0 SUBLEVEL = 0 -EXTRAVERSION = -rc1 +EXTRAVERSION = NAME = Hurr durr I'ma sheep # *DOCUMENTATION* @@ -779,6 +779,7 @@ KBUILD_ARFLAGS := $(call ar-option,D) # check for 'asm goto' ifeq ($(shell $(CONFIG_SHELL) $(srctree)/scripts/gcc-goto.sh $(CC)), y) KBUILD_CFLAGS += -DCC_HAVE_ASM_GOTO + KBUILD_AFLAGS += -DCC_HAVE_ASM_GOTO endif include $(srctree)/scripts/Makefile.kasan diff --git a/arch/alpha/kernel/rtc.c b/arch/alpha/kernel/rtc.c index c8d284d8521f..f535a3fd0f60 100644 --- a/arch/alpha/kernel/rtc.c +++ b/arch/alpha/kernel/rtc.c @@ -116,7 +116,7 @@ alpha_rtc_set_time(struct device *dev, struct rtc_time *tm) } static int -alpha_rtc_set_mmss(struct device *dev, unsigned long nowtime) +alpha_rtc_set_mmss(struct device *dev, time64_t nowtime) { int retval = 0; int real_seconds, real_minutes, cmos_minutes; @@ -211,7 +211,7 @@ alpha_rtc_ioctl(struct device *dev, unsigned int cmd, unsigned long arg) static const struct rtc_class_ops alpha_rtc_ops = { .read_time = alpha_rtc_read_time, .set_time = alpha_rtc_set_time, - .set_mmss = alpha_rtc_set_mmss, + .set_mmss64 = alpha_rtc_set_mmss, .ioctl = alpha_rtc_ioctl, }; @@ -276,7 +276,7 @@ do_remote_mmss(void *data) } static int -remote_set_mmss(struct device *dev, unsigned long now) +remote_set_mmss(struct device *dev, time64_t now) { union remote_data x; if (smp_processor_id() != boot_cpuid) { @@ -290,7 +290,7 @@ remote_set_mmss(struct device *dev, unsigned long now) static const struct rtc_class_ops remote_rtc_ops = { .read_time = remote_read_time, .set_time = remote_set_time, - .set_mmss = remote_set_mmss, + .set_mmss64 = remote_set_mmss, .ioctl = alpha_rtc_ioctl, }; #endif diff --git a/arch/arc/include/asm/processor.h b/arch/arc/include/asm/processor.h index 4e547296831d..52312cb5dbe2 100644 --- a/arch/arc/include/asm/processor.h +++ b/arch/arc/include/asm/processor.h @@ -47,9 +47,6 @@ struct thread_struct { /* Forward declaration, a strange C thing */ struct task_struct; -/* Return saved PC of a blocked thread */ -unsigned long thread_saved_pc(struct task_struct *t); - #define task_pt_regs(p) \ ((struct pt_regs *)(THREAD_SIZE + (void *)task_stack_page(p)) - 1) @@ -72,18 +69,21 @@ unsigned long thread_saved_pc(struct task_struct *t); #define release_segments(mm) do { } while (0) #define KSTK_EIP(tsk) (task_pt_regs(tsk)->ret) +#define KSTK_ESP(tsk) (task_pt_regs(tsk)->sp) /* * Where abouts of Task's sp, fp, blink when it was last seen in kernel mode. * Look in process.c for details of kernel stack layout */ -#define KSTK_ESP(tsk) (tsk->thread.ksp) +#define TSK_K_ESP(tsk) (tsk->thread.ksp) -#define KSTK_REG(tsk, off) (*((unsigned int *)(KSTK_ESP(tsk) + \ +#define TSK_K_REG(tsk, off) (*((unsigned int *)(TSK_K_ESP(tsk) + \ sizeof(struct callee_regs) + off))) -#define KSTK_BLINK(tsk) KSTK_REG(tsk, 4) -#define KSTK_FP(tsk) KSTK_REG(tsk, 0) +#define TSK_K_BLINK(tsk) TSK_K_REG(tsk, 4) +#define TSK_K_FP(tsk) TSK_K_REG(tsk, 0) + +#define thread_saved_pc(tsk) TSK_K_BLINK(tsk) extern void start_thread(struct pt_regs * regs, unsigned long pc, unsigned long usp); diff --git a/arch/arc/include/asm/stacktrace.h b/arch/arc/include/asm/stacktrace.h new file mode 100644 index 000000000000..b29b6064ea14 --- /dev/null +++ b/arch/arc/include/asm/stacktrace.h @@ -0,0 +1,37 @@ +/* + * Copyright (C) 2014-15 Synopsys, Inc. (www.synopsys.com) + * Copyright (C) 2007-2010, 2011-2012 Synopsys, Inc. (www.synopsys.com) + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + */ + +#ifndef __ASM_STACKTRACE_H +#define __ASM_STACKTRACE_H + +#include <linux/sched.h> + +/** + * arc_unwind_core - Unwind the kernel mode stack for an execution context + * @tsk: NULL for current task, specific task otherwise + * @regs: pt_regs used to seed the unwinder {SP, FP, BLINK, PC} + * If NULL, use pt_regs of @tsk (if !NULL) otherwise + * use the current values of {SP, FP, BLINK, PC} + * @consumer_fn: Callback invoked for each frame unwound + * Returns 0 to continue unwinding, -1 to stop + * @arg: Arg to callback + * + * Returns the address of first function in stack + * + * Semantics: + * - synchronous unwinding (e.g. dump_stack): @tsk NULL, @regs NULL + * - Asynchronous unwinding of sleeping task: @tsk !NULL, @regs NULL + * - Asynchronous unwinding of intr/excp etc: @tsk !NULL, @regs !NULL + */ +notrace noinline unsigned int arc_unwind_core( + struct task_struct *tsk, struct pt_regs *regs, + int (*consumer_fn) (unsigned int, void *), + void *arg); + +#endif /* __ASM_STACKTRACE_H */ diff --git a/arch/arc/kernel/process.c b/arch/arc/kernel/process.c index fdd89715d2d3..98c00a2d4dd9 100644 --- a/arch/arc/kernel/process.c +++ b/arch/arc/kernel/process.c @@ -192,29 +192,6 @@ int dump_fpu(struct pt_regs *regs, elf_fpregset_t *fpu) return 0; } -/* - * API: expected by schedular Code: If thread is sleeping where is that. - * What is this good for? it will be always the scheduler or ret_from_fork. - * So we hard code that anyways. - */ -unsigned long thread_saved_pc(struct task_struct *t) -{ - struct pt_regs *regs = task_pt_regs(t); - unsigned long blink = 0; - - /* - * If the thread being queried for in not itself calling this, then it - * implies it is not executing, which in turn implies it is sleeping, - * which in turn implies it got switched OUT by the schedular. - * In that case, it's kernel mode blink can reliably retrieved as per - * the picture above (right above pt_regs). - */ - if (t != current && t->state != TASK_RUNNING) - blink = *((unsigned int *)regs - 1); - - return blink; -} - int elf_check_arch(const struct elf32_hdr *x) { unsigned int eflags; diff --git a/arch/arc/kernel/signal.c b/arch/arc/kernel/signal.c index 114234e83caa..edda76fae83f 100644 --- a/arch/arc/kernel/signal.c +++ b/arch/arc/kernel/signal.c @@ -67,7 +67,7 @@ stash_usr_regs(struct rt_sigframe __user *sf, struct pt_regs *regs, sigset_t *set) { int err; - err = __copy_to_user(&(sf->uc.uc_mcontext.regs), regs, + err = __copy_to_user(&(sf->uc.uc_mcontext.regs.scratch), regs, sizeof(sf->uc.uc_mcontext.regs.scratch)); err |= __copy_to_user(&sf->uc.uc_sigmask, set, sizeof(sigset_t)); @@ -83,7 +83,7 @@ static int restore_usr_regs(struct pt_regs *regs, struct rt_sigframe __user *sf) if (!err) set_current_blocked(&set); - err |= __copy_from_user(regs, &(sf->uc.uc_mcontext.regs), + err |= __copy_from_user(regs, &(sf->uc.uc_mcontext.regs.scratch), sizeof(sf->uc.uc_mcontext.regs.scratch)); return err; @@ -131,6 +131,15 @@ SYSCALL_DEFINE0(rt_sigreturn) /* Don't restart from sigreturn */ syscall_wont_restart(regs); + /* + * Ensure that sigreturn always returns to user mode (in case the + * regs saved on user stack got fudged between save and sigreturn) + * Otherwise it is easy to panic the kernel with a custom + * signal handler and/or restorer which clobberes the status32/ret + * to return to a bogus location in kernel mode. + */ + regs->status32 |= STATUS_U_MASK; + return regs->r0; badframe: @@ -229,8 +238,11 @@ setup_rt_frame(struct ksignal *ksig, sigset_t *set, struct pt_regs *regs) /* * handler returns using sigreturn stub provided already by userpsace + * If not, nuke the process right away */ - BUG_ON(!(ksig->ka.sa.sa_flags & SA_RESTORER)); + if(!(ksig->ka.sa.sa_flags & SA_RESTORER)) + return 1; + regs->blink = (unsigned long)ksig->ka.sa.sa_restorer; /* User Stack for signal handler will be above the frame just carved */ @@ -296,12 +308,12 @@ static void handle_signal(struct ksignal *ksig, struct pt_regs *regs) { sigset_t *oldset = sigmask_to_save(); - int ret; + int failed; /* Set up the stack frame */ - ret = setup_rt_frame(ksig, oldset, regs); + failed = setup_rt_frame(ksig, oldset, regs); - signal_setup_done(ret, ksig, 0); + signal_setup_done(failed, ksig, 0); } void do_signal(struct pt_regs *regs) diff --git a/arch/arc/kernel/stacktrace.c b/arch/arc/kernel/stacktrace.c index 9ce47cfe2303..92320d6f737c 100644 --- a/arch/arc/kernel/stacktrace.c +++ b/arch/arc/kernel/stacktrace.c @@ -43,6 +43,10 @@ static void seed_unwind_frame_info(struct task_struct *tsk, struct pt_regs *regs, struct unwind_frame_info *frame_info) { + /* + * synchronous unwinding (e.g. dump_stack) + * - uses current values of SP and friends + */ if (tsk == NULL && regs == NULL) { unsigned long fp, sp, blink, ret; frame_info->task = current; @@ -61,12 +65,17 @@ static void seed_unwind_frame_info(struct task_struct *tsk, frame_info->regs.r63 = ret; frame_info->call_frame = 0; } else if (regs == NULL) { + /* + * Asynchronous unwinding of sleeping task + * - Gets SP etc from task's pt_regs (saved bottom of kernel + * mode stack of task) + */ frame_info->task = tsk; - frame_info->regs.r27 = KSTK_FP(tsk); - frame_info->regs.r28 = KSTK_ESP(tsk); - frame_info->regs.r31 = KSTK_BLINK(tsk); + frame_info->regs.r27 = TSK_K_FP(tsk); + frame_info->regs.r28 = TSK_K_ESP(tsk); + frame_info->regs.r31 = TSK_K_BLINK(tsk); frame_info->regs.r63 = (unsigned int)__switch_to; /* In the prologue of __switch_to, first FP is saved on stack @@ -83,6 +92,10 @@ static void seed_unwind_frame_info(struct task_struct *tsk, frame_info->call_frame = 0; } else { + /* + * Asynchronous unwinding of intr/exception + * - Just uses the pt_regs passed + */ frame_info->task = tsk; frame_info->regs.r27 = regs->fp; @@ -95,7 +108,7 @@ static void seed_unwind_frame_info(struct task_struct *tsk, #endif -static noinline unsigned int +notrace noinline unsigned int arc_unwind_core(struct task_struct *tsk, struct pt_regs *regs, int (*consumer_fn) (unsigned int, void *), void *arg) { diff --git a/arch/arc/kernel/unaligned.c b/arch/arc/kernel/unaligned.c index 7ff5b5c183bb..74db59b6f392 100644 --- a/arch/arc/kernel/unaligned.c +++ b/arch/arc/kernel/unaligned.c @@ -12,6 +12,7 @@ */ #include <linux/types.h> +#include <linux/perf_event.h> #include <linux/ptrace.h> #include <linux/uaccess.h> #include <asm/disasm.h> @@ -253,6 +254,7 @@ int misaligned_fixup(unsigned long address, struct pt_regs *regs, } } + perf_sw_event(PERF_COUNT_SW_ALIGNMENT_FAULTS, 1, regs, address); return 0; fault: diff --git a/arch/arc/mm/fault.c b/arch/arc/mm/fault.c index 563cb27e37f5..6a2e006cbcce 100644 --- a/arch/arc/mm/fault.c +++ b/arch/arc/mm/fault.c @@ -14,6 +14,7 @@ #include <linux/ptrace.h> #include <linux/uaccess.h> #include <linux/kdebug.h> +#include <linux/perf_event.h> #include <asm/pgalloc.h> #include <asm/mmu.h> @@ -139,13 +140,20 @@ good_area: return; } + perf_sw_event(PERF_COUNT_SW_PAGE_FAULTS, 1, regs, address); + if (likely(!(fault & VM_FAULT_ERROR))) { if (flags & FAULT_FLAG_ALLOW_RETRY) { /* To avoid updating stats twice for retry case */ - if (fault & VM_FAULT_MAJOR) + if (fault & VM_FAULT_MAJOR) { tsk->maj_flt++; - else + perf_sw_event(PERF_COUNT_SW_PAGE_FAULTS_MAJ, 1, + regs, address); + } else { tsk->min_flt++; + perf_sw_event(PERF_COUNT_SW_PAGE_FAULTS_MIN, 1, + regs, address); + } if (fault & VM_FAULT_RETRY) { flags &= ~FAULT_FLAG_ALLOW_RETRY; diff --git a/arch/arm/Kconfig b/arch/arm/Kconfig index 9f1f09a2bc9b..cf4c0c99aa25 100644 --- a/arch/arm/Kconfig +++ b/arch/arm/Kconfig @@ -619,6 +619,7 @@ config ARCH_PXA select GENERIC_CLOCKEVENTS select GPIO_PXA select HAVE_IDE + select IRQ_DOMAIN select MULTI_IRQ_HANDLER select PLAT_PXA select SPARSE_IRQ diff --git a/arch/arm/Makefile b/arch/arm/Makefile index 7f99cd652203..eb7bb511f853 100644 --- a/arch/arm/Makefile +++ b/arch/arm/Makefile @@ -150,6 +150,7 @@ machine-$(CONFIG_ARCH_BERLIN) += berlin machine-$(CONFIG_ARCH_CLPS711X) += clps711x machine-$(CONFIG_ARCH_CNS3XXX) += cns3xxx machine-$(CONFIG_ARCH_DAVINCI) += davinci +machine-$(CONFIG_ARCH_DIGICOLOR) += digicolor machine-$(CONFIG_ARCH_DOVE) += dove machine-$(CONFIG_ARCH_EBSA110) += ebsa110 machine-$(CONFIG_ARCH_EFM32) += efm32 diff --git a/arch/arm/boot/dts/am335x-bone-common.dtsi b/arch/arm/boot/dts/am335x-bone-common.dtsi index 2c6248d9a9ef..c3255e0c90aa 100644 --- a/arch/arm/boot/dts/am335x-bone-common.dtsi +++ b/arch/arm/boot/dts/am335x-bone-common.dtsi @@ -301,3 +301,11 @@ cd-gpios = <&gpio0 6 GPIO_ACTIVE_HIGH>; cd-inverted; }; + +&aes { + status = "okay"; +}; + +&sham { + status = "okay"; +}; diff --git a/arch/arm/boot/dts/am335x-bone.dts b/arch/arm/boot/dts/am335x-bone.dts index 83d40f7655e5..6b8493720424 100644 --- a/arch/arm/boot/dts/am335x-bone.dts +++ b/arch/arm/boot/dts/am335x-bone.dts @@ -24,11 +24,3 @@ &mmc1 { vmmc-supply = <&ldo3_reg>; }; - -&sham { - status = "okay"; -}; - -&aes { - status = "okay"; -}; diff --git a/arch/arm/boot/dts/am335x-lxm.dts b/arch/arm/boot/dts/am335x-lxm.dts index 7266a00aab2e..5c5667a3624d 100644 --- a/arch/arm/boot/dts/am335x-lxm.dts +++ b/arch/arm/boot/dts/am335x-lxm.dts @@ -328,6 +328,10 @@ dual_emac_res_vlan = <3>; }; +&phy_sel { + rmii-clock-ext; +}; + &mac { pinctrl-names = "default", "sleep"; pinctrl-0 = <&cpsw_default>; diff --git a/arch/arm/boot/dts/am33xx-clocks.dtsi b/arch/arm/boot/dts/am33xx-clocks.dtsi index 712edce7d6fb..071b56aa0c7e 100644 --- a/arch/arm/boot/dts/am33xx-clocks.dtsi +++ b/arch/arm/boot/dts/am33xx-clocks.dtsi @@ -99,7 +99,7 @@ ehrpwm0_tbclk: ehrpwm0_tbclk@44e10664 { #clock-cells = <0>; compatible = "ti,gate-clock"; - clocks = <&dpll_per_m2_ck>; + clocks = <&l4ls_gclk>; ti,bit-shift = <0>; reg = <0x0664>; }; @@ -107,7 +107,7 @@ ehrpwm1_tbclk: ehrpwm1_tbclk@44e10664 { #clock-cells = <0>; compatible = "ti,gate-clock"; - clocks = <&dpll_per_m2_ck>; + clocks = <&l4ls_gclk>; ti,bit-shift = <1>; reg = <0x0664>; }; @@ -115,7 +115,7 @@ ehrpwm2_tbclk: ehrpwm2_tbclk@44e10664 { #clock-cells = <0>; compatible = "ti,gate-clock"; - clocks = <&dpll_per_m2_ck>; + clocks = <&l4ls_gclk>; ti,bit-shift = <2>; reg = <0x0664>; }; diff --git a/arch/arm/boot/dts/am43xx-clocks.dtsi b/arch/arm/boot/dts/am43xx-clocks.dtsi index c7dc9dab93a4..cfb49686ab6a 100644 --- a/arch/arm/boot/dts/am43xx-clocks.dtsi +++ b/arch/arm/boot/dts/am43xx-clocks.dtsi @@ -107,7 +107,7 @@ ehrpwm0_tbclk: ehrpwm0_tbclk { #clock-cells = <0>; compatible = "ti,gate-clock"; - clocks = <&dpll_per_m2_ck>; + clocks = <&l4ls_gclk>; ti,bit-shift = <0>; reg = <0x0664>; }; @@ -115,7 +115,7 @@ ehrpwm1_tbclk: ehrpwm1_tbclk { #clock-cells = <0>; compatible = "ti,gate-clock"; - clocks = <&dpll_per_m2_ck>; + clocks = <&l4ls_gclk>; ti,bit-shift = <1>; reg = <0x0664>; }; @@ -123,7 +123,7 @@ ehrpwm2_tbclk: ehrpwm2_tbclk { #clock-cells = <0>; compatible = "ti,gate-clock"; - clocks = <&dpll_per_m2_ck>; + clocks = <&l4ls_gclk>; ti,bit-shift = <2>; reg = <0x0664>; }; @@ -131,7 +131,7 @@ ehrpwm3_tbclk: ehrpwm3_tbclk { #clock-cells = <0>; compatible = "ti,gate-clock"; - clocks = <&dpll_per_m2_ck>; + clocks = <&l4ls_gclk>; ti,bit-shift = <4>; reg = <0x0664>; }; @@ -139,7 +139,7 @@ ehrpwm4_tbclk: ehrpwm4_tbclk { #clock-cells = <0>; compatible = "ti,gate-clock"; - clocks = <&dpll_per_m2_ck>; + clocks = <&l4ls_gclk>; ti,bit-shift = <5>; reg = <0x0664>; }; @@ -147,7 +147,7 @@ ehrpwm5_tbclk: ehrpwm5_tbclk { #clock-cells = <0>; compatible = "ti,gate-clock"; - clocks = <&dpll_per_m2_ck>; + clocks = <&l4ls_gclk>; ti,bit-shift = <6>; reg = <0x0664>; }; diff --git a/arch/arm/boot/dts/at91sam9260.dtsi b/arch/arm/boot/dts/at91sam9260.dtsi index fff0ee69aab4..e7f0a4ae271c 100644 --- a/arch/arm/boot/dts/at91sam9260.dtsi +++ b/arch/arm/boot/dts/at91sam9260.dtsi @@ -494,12 +494,12 @@ pinctrl_usart3_rts: usart3_rts-0 { atmel,pins = - <AT91_PIOB 8 AT91_PERIPH_B AT91_PINCTRL_NONE>; /* PC8 periph B */ + <AT91_PIOC 8 AT91_PERIPH_B AT91_PINCTRL_NONE>; }; pinctrl_usart3_cts: usart3_cts-0 { atmel,pins = - <AT91_PIOB 10 AT91_PERIPH_B AT91_PINCTRL_NONE>; /* PC10 periph B */ + <AT91_PIOC 10 AT91_PERIPH_B AT91_PINCTRL_NONE>; }; }; @@ -853,7 +853,7 @@ }; usb1: gadget@fffa4000 { - compatible = "atmel,at91rm9200-udc"; + compatible = "atmel,at91sam9260-udc"; reg = <0xfffa4000 0x4000>; interrupts = <10 IRQ_TYPE_LEVEL_HIGH 2>; clocks = <&udc_clk>, <&udpck>; @@ -976,7 +976,6 @@ atmel,watchdog-type = "hardware"; atmel,reset-type = "all"; atmel,dbg-halt; - atmel,idle-halt; status = "disabled"; }; diff --git a/arch/arm/boot/dts/at91sam9261.dtsi b/arch/arm/boot/dts/at91sam9261.dtsi index e247b0b5fdab..d55fdf2487ef 100644 --- a/arch/arm/boot/dts/at91sam9261.dtsi +++ b/arch/arm/boot/dts/at91sam9261.dtsi @@ -124,11 +124,12 @@ }; usb1: gadget@fffa4000 { - compatible = "atmel,at91rm9200-udc"; + compatible = "atmel,at91sam9261-udc"; reg = <0xfffa4000 0x4000>; interrupts = <10 IRQ_TYPE_LEVEL_HIGH 2>; - clocks = <&usb>, <&udc_clk>, <&udpck>; - clock-names = "usb_clk", "udc_clk", "udpck"; + clocks = <&udc_clk>, <&udpck>; + clock-names = "pclk", "hclk"; + atmel,matrix = <&matrix>; status = "disabled"; }; @@ -262,7 +263,7 @@ }; matrix: matrix@ffffee00 { - compatible = "atmel,at91sam9260-bus-matrix"; + compatible = "atmel,at91sam9260-bus-matrix", "syscon"; reg = <0xffffee00 0x200>; }; diff --git a/arch/arm/boot/dts/at91sam9263.dtsi b/arch/arm/boot/dts/at91sam9263.dtsi index 1f67bb4c144e..fce301c4e9d6 100644 --- a/arch/arm/boot/dts/at91sam9263.dtsi +++ b/arch/arm/boot/dts/at91sam9263.dtsi @@ -69,7 +69,7 @@ sram1: sram@00500000 { compatible = "mmio-sram"; - reg = <0x00300000 0x4000>; + reg = <0x00500000 0x4000>; }; ahb { @@ -856,7 +856,7 @@ }; usb1: gadget@fff78000 { - compatible = "atmel,at91rm9200-udc"; + compatible = "atmel,at91sam9263-udc"; reg = <0xfff78000 0x4000>; interrupts = <24 IRQ_TYPE_LEVEL_HIGH 2>; clocks = <&udc_clk>, <&udpck>; @@ -905,7 +905,6 @@ atmel,watchdog-type = "hardware"; atmel,reset-type = "all"; atmel,dbg-halt; - atmel,idle-halt; status = "disabled"; }; diff --git a/arch/arm/boot/dts/at91sam9g45.dtsi b/arch/arm/boot/dts/at91sam9g45.dtsi index ee80aa9c0759..488af63d5174 100644 --- a/arch/arm/boot/dts/at91sam9g45.dtsi +++ b/arch/arm/boot/dts/at91sam9g45.dtsi @@ -1116,7 +1116,6 @@ atmel,watchdog-type = "hardware"; atmel,reset-type = "all"; atmel,dbg-halt; - atmel,idle-halt; status = "disabled"; }; @@ -1301,7 +1300,7 @@ compatible = "atmel,at91sam9g45-ehci", "usb-ehci"; reg = <0x00800000 0x100000>; interrupts = <22 IRQ_TYPE_LEVEL_HIGH 2>; - clocks = <&usb>, <&uhphs_clk>, <&uhphs_clk>, <&uhpck>; + clocks = <&utmi>, <&uhphs_clk>, <&uhphs_clk>, <&uhpck>; clock-names = "usb_clk", "ehci_clk", "hclk", "uhpck"; status = "disabled"; }; diff --git a/arch/arm/boot/dts/at91sam9n12.dtsi b/arch/arm/boot/dts/at91sam9n12.dtsi index c2666a7cb5b1..0c53a375ba99 100644 --- a/arch/arm/boot/dts/at91sam9n12.dtsi +++ b/arch/arm/boot/dts/at91sam9n12.dtsi @@ -894,7 +894,6 @@ atmel,watchdog-type = "hardware"; atmel,reset-type = "all"; atmel,dbg-halt; - atmel,idle-halt; status = "disabled"; }; diff --git a/arch/arm/boot/dts/at91sam9x5.dtsi b/arch/arm/boot/dts/at91sam9x5.dtsi index 818dabdd8c0e..d221179d0f1a 100644 --- a/arch/arm/boot/dts/at91sam9x5.dtsi +++ b/arch/arm/boot/dts/at91sam9x5.dtsi @@ -1066,7 +1066,7 @@ reg = <0x00500000 0x80000 0xf803c000 0x400>; interrupts = <23 IRQ_TYPE_LEVEL_HIGH 0>; - clocks = <&usb>, <&udphs_clk>; + clocks = <&utmi>, <&udphs_clk>; clock-names = "hclk", "pclk"; status = "disabled"; @@ -1130,7 +1130,6 @@ atmel,watchdog-type = "hardware"; atmel,reset-type = "all"; atmel,dbg-halt; - atmel,idle-halt; status = "disabled"; }; @@ -1186,7 +1185,7 @@ compatible = "atmel,at91sam9g45-ehci", "usb-ehci"; reg = <0x00700000 0x100000>; interrupts = <22 IRQ_TYPE_LEVEL_HIGH 2>; - clocks = <&usb>, <&uhphs_clk>, <&uhpck>; + clocks = <&utmi>, <&uhphs_clk>, <&uhpck>; clock-names = "usb_clk", "ehci_clk", "uhpck"; status = "disabled"; }; diff --git a/arch/arm/boot/dts/dm8168-evm.dts b/arch/arm/boot/dts/dm8168-evm.dts index d3a29c1b8417..afe678f6d2e9 100644 --- a/arch/arm/boot/dts/dm8168-evm.dts +++ b/arch/arm/boot/dts/dm8168-evm.dts @@ -36,6 +36,20 @@ >; }; + mmc_pins: pinmux_mmc_pins { + pinctrl-single,pins = < + DM816X_IOPAD(0x0a70, MUX_MODE0) /* SD_POW */ + DM816X_IOPAD(0x0a74, MUX_MODE0) /* SD_CLK */ + DM816X_IOPAD(0x0a78, MUX_MODE0) /* SD_CMD */ + DM816X_IOPAD(0x0a7C, MUX_MODE0) /* SD_DAT0 */ + DM816X_IOPAD(0x0a80, MUX_MODE0) /* SD_DAT1 */ + DM816X_IOPAD(0x0a84, MUX_MODE0) /* SD_DAT2 */ + DM816X_IOPAD(0x0a88, MUX_MODE0) /* SD_DAT2 */ + DM816X_IOPAD(0x0a8c, MUX_MODE2) /* GP1[7] */ + DM816X_IOPAD(0x0a90, MUX_MODE2) /* GP1[8] */ + >; + }; + usb0_pins: pinmux_usb0_pins { pinctrl-single,pins = < DM816X_IOPAD(0x0d00, MUX_MODE0) /* USB0_DRVVBUS */ @@ -137,7 +151,12 @@ }; &mmc1 { + pinctrl-names = "default"; + pinctrl-0 = <&mmc_pins>; vmmc-supply = <&vmmcsd_fixed>; + bus-width = <4>; + cd-gpios = <&gpio2 7 GPIO_ACTIVE_LOW>; + wp-gpios = <&gpio2 8 GPIO_ACTIVE_LOW>; }; /* At least dm8168-evm rev c won't support multipoint, later may */ diff --git a/arch/arm/boot/dts/dm816x.dtsi b/arch/arm/boot/dts/dm816x.dtsi index 3c97b5f2addc..f35715bc6992 100644 --- a/arch/arm/boot/dts/dm816x.dtsi +++ b/arch/arm/boot/dts/dm816x.dtsi @@ -150,17 +150,27 @@ }; gpio1: gpio@48032000 { - compatible = "ti,omap3-gpio"; + compatible = "ti,omap4-gpio"; ti,hwmods = "gpio1"; + ti,gpio-always-on; reg = <0x48032000 0x1000>; - interrupts = <97>; + interrupts = <96>; + gpio-controller; + #gpio-cells = <2>; + interrupt-controller; + #interrupt-cells = <2>; }; gpio2: gpio@4804c000 { - compatible = "ti,omap3-gpio"; + compatible = "ti,omap4-gpio"; ti,hwmods = "gpio2"; + ti,gpio-always-on; reg = <0x4804c000 0x1000>; - interrupts = <99>; + interrupts = <98>; + gpio-controller; + #gpio-cells = <2>; + interrupt-controller; + #interrupt-cells = <2>; }; gpmc: gpmc@50000000 { diff --git a/arch/arm/boot/dts/dra7-evm.dts b/arch/arm/boot/dts/dra7-evm.dts index 3290a96ba586..7563d7ce01bb 100644 --- a/arch/arm/boot/dts/dra7-evm.dts +++ b/arch/arm/boot/dts/dra7-evm.dts @@ -263,17 +263,15 @@ dcan1_pins_default: dcan1_pins_default { pinctrl-single,pins = < - 0x3d0 (PIN_OUTPUT | MUX_MODE0) /* dcan1_tx */ - 0x3d4 (MUX_MODE15) /* dcan1_rx.off */ - 0x418 (PULL_DIS | MUX_MODE1) /* wakeup0.dcan1_rx */ + 0x3d0 (PIN_OUTPUT_PULLUP | MUX_MODE0) /* dcan1_tx */ + 0x418 (PULL_UP | MUX_MODE1) /* wakeup0.dcan1_rx */ >; }; dcan1_pins_sleep: dcan1_pins_sleep { pinctrl-single,pins = < - 0x3d0 (MUX_MODE15) /* dcan1_tx.off */ - 0x3d4 (MUX_MODE15) /* dcan1_rx.off */ - 0x418 (MUX_MODE15) /* wakeup0.off */ + 0x3d0 (MUX_MODE15 | PULL_UP) /* dcan1_tx.off */ + 0x418 (MUX_MODE15 | PULL_UP) /* wakeup0.off */ >; }; }; diff --git a/arch/arm/boot/dts/dra7.dtsi b/arch/arm/boot/dts/dra7.dtsi index 127608d79033..c4659a979c41 100644 --- a/arch/arm/boot/dts/dra7.dtsi +++ b/arch/arm/boot/dts/dra7.dtsi @@ -1111,7 +1111,6 @@ "wkupclk", "refclk", "div-clk", "phy-div"; #phy-cells = <0>; - ti,hwmods = "pcie1-phy"; }; pcie2_phy: pciephy@4a095000 { @@ -1130,7 +1129,6 @@ "wkupclk", "refclk", "div-clk", "phy-div"; #phy-cells = <0>; - ti,hwmods = "pcie2-phy"; status = "disabled"; }; }; diff --git a/arch/arm/boot/dts/dra72-evm.dts b/arch/arm/boot/dts/dra72-evm.dts index e0264d0bf7b9..40ed539ce474 100644 --- a/arch/arm/boot/dts/dra72-evm.dts +++ b/arch/arm/boot/dts/dra72-evm.dts @@ -119,17 +119,15 @@ dcan1_pins_default: dcan1_pins_default { pinctrl-single,pins = < - 0x3d0 (PIN_OUTPUT | MUX_MODE0) /* dcan1_tx */ - 0x3d4 (MUX_MODE15) /* dcan1_rx.off */ - 0x418 (PULL_DIS | MUX_MODE1) /* wakeup0.dcan1_rx */ + 0x3d0 (PIN_OUTPUT_PULLUP | MUX_MODE0) /* dcan1_tx */ + 0x418 (PULL_UP | MUX_MODE1) /* wakeup0.dcan1_rx */ >; }; dcan1_pins_sleep: dcan1_pins_sleep { pinctrl-single,pins = < - 0x3d0 (MUX_MODE15) /* dcan1_tx.off */ - 0x3d4 (MUX_MODE15) /* dcan1_rx.off */ - 0x418 (MUX_MODE15) /* wakeup0.off */ + 0x3d0 (MUX_MODE15 | PULL_UP) /* dcan1_tx.off */ + 0x418 (MUX_MODE15 | PULL_UP) /* wakeup0.off */ >; }; diff --git a/arch/arm/boot/dts/dra7xx-clocks.dtsi b/arch/arm/boot/dts/dra7xx-clocks.dtsi index 4bdcbd61ce47..99b09a44e269 100644 --- a/arch/arm/boot/dts/dra7xx-clocks.dtsi +++ b/arch/arm/boot/dts/dra7xx-clocks.dtsi @@ -243,10 +243,18 @@ ti,invert-autoidle-bit; }; + dpll_core_byp_mux: dpll_core_byp_mux { + #clock-cells = <0>; + compatible = "ti,mux-clock"; + clocks = <&sys_clkin1>, <&dpll_abe_m3x2_ck>; + ti,bit-shift = <23>; + reg = <0x012c>; + }; + dpll_core_ck: dpll_core_ck { #clock-cells = <0>; compatible = "ti,omap4-dpll-core-clock"; - clocks = <&sys_clkin1>, <&dpll_abe_m3x2_ck>; + clocks = <&sys_clkin1>, <&dpll_core_byp_mux>; reg = <0x0120>, <0x0124>, <0x012c>, <0x0128>; }; @@ -309,10 +317,18 @@ clock-div = <1>; }; + dpll_dsp_byp_mux: dpll_dsp_byp_mux { + #clock-cells = <0>; + compatible = "ti,mux-clock"; + clocks = <&sys_clkin1>, <&dsp_dpll_hs_clk_div>; + ti,bit-shift = <23>; + reg = <0x0240>; + }; + dpll_dsp_ck: dpll_dsp_ck { #clock-cells = <0>; compatible = "ti,omap4-dpll-clock"; - clocks = <&sys_clkin1>, <&dsp_dpll_hs_clk_div>; + clocks = <&sys_clkin1>, <&dpll_dsp_byp_mux>; reg = <0x0234>, <0x0238>, <0x0240>, <0x023c>; }; @@ -335,10 +351,18 @@ clock-div = <1>; }; + dpll_iva_byp_mux: dpll_iva_byp_mux { + #clock-cells = <0>; + compatible = "ti,mux-clock"; + clocks = <&sys_clkin1>, <&iva_dpll_hs_clk_div>; + ti,bit-shift = <23>; + reg = <0x01ac>; + }; + dpll_iva_ck: dpll_iva_ck { #clock-cells = <0>; compatible = "ti,omap4-dpll-clock"; - clocks = <&sys_clkin1>, <&iva_dpll_hs_clk_div>; + clocks = <&sys_clkin1>, <&dpll_iva_byp_mux>; reg = <0x01a0>, <0x01a4>, <0x01ac>, <0x01a8>; }; @@ -361,10 +385,18 @@ clock-div = <1>; }; + dpll_gpu_byp_mux: dpll_gpu_byp_mux { + #clock-cells = <0>; + compatible = "ti,mux-clock"; + clocks = <&sys_clkin1>, <&dpll_abe_m3x2_ck>; + ti,bit-shift = <23>; + reg = <0x02e4>; + }; + dpll_gpu_ck: dpll_gpu_ck { #clock-cells = <0>; compatible = "ti,omap4-dpll-clock"; - clocks = <&sys_clkin1>, <&dpll_abe_m3x2_ck>; + clocks = <&sys_clkin1>, <&dpll_gpu_byp_mux>; reg = <0x02d8>, <0x02dc>, <0x02e4>, <0x02e0>; }; @@ -398,10 +430,18 @@ clock-div = <1>; }; + dpll_ddr_byp_mux: dpll_ddr_byp_mux { + #clock-cells = <0>; + compatible = "ti,mux-clock"; + clocks = <&sys_clkin1>, <&dpll_abe_m3x2_ck>; + ti,bit-shift = <23>; + reg = <0x021c>; + }; + dpll_ddr_ck: dpll_ddr_ck { #clock-cells = <0>; compatible = "ti,omap4-dpll-clock"; - clocks = <&sys_clkin1>, <&dpll_abe_m3x2_ck>; + clocks = <&sys_clkin1>, <&dpll_ddr_byp_mux>; reg = <0x0210>, <0x0214>, <0x021c>, <0x0218>; }; @@ -416,10 +456,18 @@ ti,invert-autoidle-bit; }; + dpll_gmac_byp_mux: dpll_gmac_byp_mux { + #clock-cells = <0>; + compatible = "ti,mux-clock"; + clocks = <&sys_clkin1>, <&dpll_abe_m3x2_ck>; + ti,bit-shift = <23>; + reg = <0x02b4>; + }; + dpll_gmac_ck: dpll_gmac_ck { #clock-cells = <0>; compatible = "ti,omap4-dpll-clock"; - clocks = <&sys_clkin1>, <&dpll_abe_m3x2_ck>; + clocks = <&sys_clkin1>, <&dpll_gmac_byp_mux>; reg = <0x02a8>, <0x02ac>, <0x02b4>, <0x02b0>; }; @@ -482,10 +530,18 @@ clock-div = <1>; }; + dpll_eve_byp_mux: dpll_eve_byp_mux { + #clock-cells = <0>; + compatible = "ti,mux-clock"; + clocks = <&sys_clkin1>, <&eve_dpll_hs_clk_div>; + ti,bit-shift = <23>; + reg = <0x0290>; + }; + dpll_eve_ck: dpll_eve_ck { #clock-cells = <0>; compatible = "ti,omap4-dpll-clock"; - clocks = <&sys_clkin1>, <&eve_dpll_hs_clk_div>; + clocks = <&sys_clkin1>, <&dpll_eve_byp_mux>; reg = <0x0284>, <0x0288>, <0x0290>, <0x028c>; }; @@ -1249,10 +1305,18 @@ clock-div = <1>; }; + dpll_per_byp_mux: dpll_per_byp_mux { + #clock-cells = <0>; + compatible = "ti,mux-clock"; + clocks = <&sys_clkin1>, <&per_dpll_hs_clk_div>; + ti,bit-shift = <23>; + reg = <0x014c>; + }; + dpll_per_ck: dpll_per_ck { #clock-cells = <0>; compatible = "ti,omap4-dpll-clock"; - clocks = <&sys_clkin1>, <&per_dpll_hs_clk_div>; + clocks = <&sys_clkin1>, <&dpll_per_byp_mux>; reg = <0x0140>, <0x0144>, <0x014c>, <0x0148>; }; @@ -1275,10 +1339,18 @@ clock-div = <1>; }; + dpll_usb_byp_mux: dpll_usb_byp_mux { + #clock-cells = <0>; + compatible = "ti,mux-clock"; + clocks = <&sys_clkin1>, <&usb_dpll_hs_clk_div>; + ti,bit-shift = <23>; + reg = <0x018c>; + }; + dpll_usb_ck: dpll_usb_ck { #clock-cells = <0>; compatible = "ti,omap4-dpll-j-type-clock"; - clocks = <&sys_clkin1>, <&usb_dpll_hs_clk_div>; + clocks = <&sys_clkin1>, <&dpll_usb_byp_mux>; reg = <0x0180>, <0x0184>, <0x018c>, <0x0188>; }; diff --git a/arch/arm/boot/dts/exynos3250.dtsi b/arch/arm/boot/dts/exynos3250.dtsi index 277b48b0b6f9..ac6b0ae42caf 100644 --- a/arch/arm/boot/dts/exynos3250.dtsi +++ b/arch/arm/boot/dts/exynos3250.dtsi @@ -18,6 +18,7 @@ */ #include "skeleton.dtsi" +#include "exynos4-cpu-thermal.dtsi" #include <dt-bindings/clock/exynos3250.h> / { @@ -193,6 +194,7 @@ interrupts = <0 216 0>; clocks = <&cmu CLK_TMU_APBIF>; clock-names = "tmu_apbif"; + #include "exynos4412-tmu-sensor-conf.dtsi" status = "disabled"; }; diff --git a/arch/arm/boot/dts/exynos4-cpu-thermal.dtsi b/arch/arm/boot/dts/exynos4-cpu-thermal.dtsi new file mode 100644 index 000000000000..735cb2f10817 --- /dev/null +++ b/arch/arm/boot/dts/exynos4-cpu-thermal.dtsi @@ -0,0 +1,52 @@ +/* + * Device tree sources for Exynos4 thermal zone + * + * Copyright (c) 2014 Lukasz Majewski <l.majewski@samsung.com> + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + * + */ + +#include <dt-bindings/thermal/thermal.h> + +/ { +thermal-zones { + cpu_thermal: cpu-thermal { + thermal-sensors = <&tmu 0>; + polling-delay-passive = <0>; + polling-delay = <0>; + trips { + cpu_alert0: cpu-alert-0 { + temperature = <70000>; /* millicelsius */ + hysteresis = <10000>; /* millicelsius */ + type = "active"; + }; + cpu_alert1: cpu-alert-1 { + temperature = <95000>; /* millicelsius */ + hysteresis = <10000>; /* millicelsius */ + type = "active"; + }; + cpu_alert2: cpu-alert-2 { + temperature = <110000>; /* millicelsius */ + hysteresis = <10000>; /* millicelsius */ + type = "active"; + }; + cpu_crit0: cpu-crit-0 { + temperature = <120000>; /* millicelsius */ + hysteresis = <0>; /* millicelsius */ + type = "critical"; + }; + }; + cooling-maps { + map0 { + trip = <&cpu_alert0>; + }; + map1 { + trip = <&cpu_alert1>; + }; + }; + }; +}; +}; diff --git a/arch/arm/boot/dts/exynos4.dtsi b/arch/arm/boot/dts/exynos4.dtsi index 76173cacd450..77ea547768f4 100644 --- a/arch/arm/boot/dts/exynos4.dtsi +++ b/arch/arm/boot/dts/exynos4.dtsi @@ -38,6 +38,7 @@ i2c5 = &i2c_5; i2c6 = &i2c_6; i2c7 = &i2c_7; + i2c8 = &i2c_8; csis0 = &csis_0; csis1 = &csis_1; fimc0 = &fimc_0; @@ -104,6 +105,7 @@ compatible = "samsung,exynos4210-pd"; reg = <0x10023C20 0x20>; #power-domain-cells = <0>; + power-domains = <&pd_lcd0>; }; pd_cam: cam-power-domain@10023C00 { @@ -554,6 +556,22 @@ status = "disabled"; }; + i2c_8: i2c@138E0000 { + #address-cells = <1>; + #size-cells = <0>; + compatible = "samsung,s3c2440-hdmiphy-i2c"; + reg = <0x138E0000 0x100>; + interrupts = <0 93 0>; + clocks = <&clock CLK_I2C_HDMI>; + clock-names = "i2c"; + status = "disabled"; + + hdmi_i2c_phy: hdmiphy@38 { + compatible = "exynos4210-hdmiphy"; + reg = <0x38>; + }; + }; + spi_0: spi@13920000 { compatible = "samsung,exynos4210-spi"; reg = <0x13920000 0x100>; @@ -663,6 +681,33 @@ status = "disabled"; }; + tmu: tmu@100C0000 { + #include "exynos4412-tmu-sensor-conf.dtsi" + }; + + hdmi: hdmi@12D00000 { + compatible = "samsung,exynos4210-hdmi"; + reg = <0x12D00000 0x70000>; + interrupts = <0 92 0>; + clock-names = "hdmi", "sclk_hdmi", "sclk_pixel", "sclk_hdmiphy", + "mout_hdmi"; + clocks = <&clock CLK_HDMI>, <&clock CLK_SCLK_HDMI>, + <&clock CLK_SCLK_PIXEL>, <&clock CLK_SCLK_HDMIPHY>, + <&clock CLK_MOUT_HDMI>; + phy = <&hdmi_i2c_phy>; + power-domains = <&pd_tv>; + samsung,syscon-phandle = <&pmu_system_controller>; + status = "disabled"; + }; + + mixer: mixer@12C10000 { + compatible = "samsung,exynos4210-mixer"; + interrupts = <0 91 0>; + reg = <0x12C10000 0x2100>, <0x12c00000 0x300>; + power-domains = <&pd_tv>; + status = "disabled"; + }; + ppmu_dmc0: ppmu_dmc0@106a0000 { compatible = "samsung,exynos-ppmu"; reg = <0x106a0000 0x2000>; diff --git a/arch/arm/boot/dts/exynos4210-trats.dts b/arch/arm/boot/dts/exynos4210-trats.dts index 3d6652a4b6cb..32c5fd8f6269 100644 --- a/arch/arm/boot/dts/exynos4210-trats.dts +++ b/arch/arm/boot/dts/exynos4210-trats.dts @@ -426,6 +426,25 @@ status = "okay"; }; + tmu@100C0000 { + status = "okay"; + }; + + thermal-zones { + cpu_thermal: cpu-thermal { + cooling-maps { + map0 { + /* Corresponds to 800MHz at freq_table */ + cooling-device = <&cpu0 2 2>; + }; + map1 { + /* Corresponds to 200MHz at freq_table */ + cooling-device = <&cpu0 4 4>; + }; + }; + }; + }; + camera { pinctrl-names = "default"; pinctrl-0 = <>; diff --git a/arch/arm/boot/dts/exynos4210-universal_c210.dts b/arch/arm/boot/dts/exynos4210-universal_c210.dts index b57e6b82ea20..d4f2b11319dd 100644 --- a/arch/arm/boot/dts/exynos4210-universal_c210.dts +++ b/arch/arm/boot/dts/exynos4210-universal_c210.dts @@ -505,6 +505,63 @@ assigned-clock-rates = <0>, <160000000>; }; }; + + hdmi_en: voltage-regulator-hdmi-5v { + compatible = "regulator-fixed"; + regulator-name = "HDMI_5V"; + regulator-min-microvolt = <5000000>; + regulator-max-microvolt = <5000000>; + gpio = <&gpe0 1 0>; + enable-active-high; + }; + + hdmi_ddc: i2c-ddc { + compatible = "i2c-gpio"; + gpios = <&gpe4 2 0 &gpe4 3 0>; + i2c-gpio,delay-us = <100>; + #address-cells = <1>; + #size-cells = <0>; + + pinctrl-0 = <&i2c_ddc_bus>; + pinctrl-names = "default"; + status = "okay"; + }; + + mixer@12C10000 { + status = "okay"; + }; + + hdmi@12D00000 { + hpd-gpio = <&gpx3 7 0>; + pinctrl-names = "default"; + pinctrl-0 = <&hdmi_hpd>; + hdmi-en-supply = <&hdmi_en>; + vdd-supply = <&ldo3_reg>; + vdd_osc-supply = <&ldo4_reg>; + vdd_pll-supply = <&ldo3_reg>; + ddc = <&hdmi_ddc>; + status = "okay"; + }; + + i2c@138E0000 { + status = "okay"; + }; +}; + +&pinctrl_1 { + hdmi_hpd: hdmi-hpd { + samsung,pins = "gpx3-7"; + samsung,pin-pud = <0>; + }; +}; + +&pinctrl_0 { + i2c_ddc_bus: i2c-ddc-bus { + samsung,pins = "gpe4-2", "gpe4-3"; + samsung,pin-function = <2>; + samsung,pin-pud = <3>; + samsung,pin-drv = <0>; + }; }; &mdma1 { diff --git a/arch/arm/boot/dts/exynos4210.dtsi b/arch/arm/boot/dts/exynos4210.dtsi index 67c832c9dcf1..be89f83f70e7 100644 --- a/arch/arm/boot/dts/exynos4210.dtsi +++ b/arch/arm/boot/dts/exynos4210.dtsi @@ -21,6 +21,7 @@ #include "exynos4.dtsi" #include "exynos4210-pinctrl.dtsi" +#include "exynos4-cpu-thermal.dtsi" / { compatible = "samsung,exynos4210", "samsung,exynos4"; @@ -35,10 +36,13 @@ #address-cells = <1>; #size-cells = <0>; - cpu@900 { + cpu0: cpu@900 { device_type = "cpu"; compatible = "arm,cortex-a9"; reg = <0x900>; + cooling-min-level = <4>; + cooling-max-level = <2>; + #cooling-cells = <2>; /* min followed by max */ }; cpu@901 { @@ -153,16 +157,38 @@ reg = <0x03860000 0x1000>; }; - tmu@100C0000 { + tmu: tmu@100C0000 { compatible = "samsung,exynos4210-tmu"; interrupt-parent = <&combiner>; reg = <0x100C0000 0x100>; interrupts = <2 4>; clocks = <&clock CLK_TMU_APBIF>; clock-names = "tmu_apbif"; + samsung,tmu_gain = <15>; + samsung,tmu_reference_voltage = <7>; status = "disabled"; }; + thermal-zones { + cpu_thermal: cpu-thermal { + polling-delay-passive = <0>; + polling-delay = <0>; + thermal-sensors = <&tmu 0>; + + trips { + cpu_alert0: cpu-alert-0 { + temperature = <85000>; /* millicelsius */ + }; + cpu_alert1: cpu-alert-1 { + temperature = <100000>; /* millicelsius */ + }; + cpu_alert2: cpu-alert-2 { + temperature = <110000>; /* millicelsius */ + }; + }; + }; + }; + g2d@12800000 { compatible = "samsung,s5pv210-g2d"; reg = <0x12800000 0x1000>; @@ -203,6 +229,14 @@ }; }; + mixer: mixer@12C10000 { + clock-names = "mixer", "hdmi", "sclk_hdmi", "vp", "mout_mixer", + "sclk_mixer"; + clocks = <&clock CLK_MIXER>, <&clock CLK_HDMI>, + <&clock CLK_SCLK_HDMI>, <&clock CLK_VP>, + <&clock CLK_MOUT_MIXER>, <&clock CLK_SCLK_MIXER>; + }; + ppmu_lcd1: ppmu_lcd1@12240000 { compatible = "samsung,exynos-ppmu"; reg = <0x12240000 0x2000>; diff --git a/arch/arm/boot/dts/exynos4212.dtsi b/arch/arm/boot/dts/exynos4212.dtsi index dd0a43ec56da..5be03288f1ee 100644 --- a/arch/arm/boot/dts/exynos4212.dtsi +++ b/arch/arm/boot/dts/exynos4212.dtsi @@ -26,10 +26,13 @@ #address-cells = <1>; #size-cells = <0>; - cpu@A00 { + cpu0: cpu@A00 { device_type = "cpu"; compatible = "arm,cortex-a9"; reg = <0xA00>; + cooling-min-level = <13>; + cooling-max-level = <7>; + #cooling-cells = <2>; /* min followed by max */ }; cpu@A01 { diff --git a/arch/arm/boot/dts/exynos4412-odroid-common.dtsi b/arch/arm/boot/dts/exynos4412-odroid-common.dtsi index de80b5bba204..adb4f6a97a1d 100644 --- a/arch/arm/boot/dts/exynos4412-odroid-common.dtsi +++ b/arch/arm/boot/dts/exynos4412-odroid-common.dtsi @@ -249,6 +249,20 @@ regulator-always-on; }; + ldo8_reg: ldo@8 { + regulator-compatible = "LDO8"; + regulator-name = "VDD10_HDMI_1.0V"; + regulator-min-microvolt = <1000000>; + regulator-max-microvolt = <1000000>; + }; + + ldo10_reg: ldo@10 { + regulator-compatible = "LDO10"; + regulator-name = "VDDQ_MIPIHSI_1.8V"; + regulator-min-microvolt = <1800000>; + regulator-max-microvolt = <1800000>; + }; + ldo11_reg: LDO11 { regulator-name = "VDD18_ABB1_1.8V"; regulator-min-microvolt = <1800000>; @@ -411,6 +425,51 @@ ehci: ehci@12580000 { status = "okay"; }; + + tmu@100C0000 { + vtmu-supply = <&ldo10_reg>; + status = "okay"; + }; + + thermal-zones { + cpu_thermal: cpu-thermal { + cooling-maps { + map0 { + /* Corresponds to 800MHz at freq_table */ + cooling-device = <&cpu0 7 7>; + }; + map1 { + /* Corresponds to 200MHz at freq_table */ + cooling-device = <&cpu0 13 13>; + }; + }; + }; + }; + + mixer: mixer@12C10000 { + status = "okay"; + }; + + hdmi@12D00000 { + hpd-gpio = <&gpx3 7 0>; + pinctrl-names = "default"; + pinctrl-0 = <&hdmi_hpd>; + vdd-supply = <&ldo8_reg>; + vdd_osc-supply = <&ldo10_reg>; + vdd_pll-supply = <&ldo8_reg>; + ddc = <&hdmi_ddc>; + status = "okay"; + }; + + hdmi_ddc: i2c@13880000 { + status = "okay"; + pinctrl-names = "default"; + pinctrl-0 = <&i2c2_bus>; + }; + + i2c@138E0000 { + status = "okay"; + }; }; &pinctrl_1 { @@ -425,4 +484,9 @@ samsung,pin-pud = <0>; samsung,pin-drv = <0>; }; + + hdmi_hpd: hdmi-hpd { + samsung,pins = "gpx3-7"; + samsung,pin-pud = <1>; + }; }; diff --git a/arch/arm/boot/dts/exynos4412-tmu-sensor-conf.dtsi b/arch/arm/boot/dts/exynos4412-tmu-sensor-conf.dtsi new file mode 100644 index 000000000000..e3f7934d19d0 --- /dev/null +++ b/arch/arm/boot/dts/exynos4412-tmu-sensor-conf.dtsi @@ -0,0 +1,24 @@ +/* + * Device tree sources for Exynos4412 TMU sensor configuration + * + * Copyright (c) 2014 Lukasz Majewski <l.majewski@samsung.com> + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + * + */ + +#include <dt-bindings/thermal/thermal_exynos.h> + +#thermal-sensor-cells = <0>; +samsung,tmu_gain = <8>; +samsung,tmu_reference_voltage = <16>; +samsung,tmu_noise_cancel_mode = <4>; +samsung,tmu_efuse_value = <55>; +samsung,tmu_min_efuse_value = <40>; +samsung,tmu_max_efuse_value = <100>; +samsung,tmu_first_point_trim = <25>; +samsung,tmu_second_point_trim = <85>; +samsung,tmu_default_temp_offset = <50>; +samsung,tmu_cal_type = <TYPE_ONE_POINT_TRIMMING>; diff --git a/arch/arm/boot/dts/exynos4412-trats2.dts b/arch/arm/boot/dts/exynos4412-trats2.dts index 21f748083586..173ffa479ad3 100644 --- a/arch/arm/boot/dts/exynos4412-trats2.dts +++ b/arch/arm/boot/dts/exynos4412-trats2.dts @@ -927,6 +927,21 @@ pulldown-ohm = <100000>; /* 100K */ io-channels = <&adc 2>; /* Battery temperature */ }; + + thermal-zones { + cpu_thermal: cpu-thermal { + cooling-maps { + map0 { + /* Corresponds to 800MHz at freq_table */ + cooling-device = <&cpu0 7 7>; + }; + map1 { + /* Corresponds to 200MHz at freq_table */ + cooling-device = <&cpu0 13 13>; + }; + }; + }; + }; }; &pmu_system_controller { diff --git a/arch/arm/boot/dts/exynos4412.dtsi b/arch/arm/boot/dts/exynos4412.dtsi index 0f6ec93bb1d8..68ad43b391ae 100644 --- a/arch/arm/boot/dts/exynos4412.dtsi +++ b/arch/arm/boot/dts/exynos4412.dtsi @@ -26,10 +26,13 @@ #address-cells = <1>; #size-cells = <0>; - cpu@A00 { + cpu0: cpu@A00 { device_type = "cpu"; compatible = "arm,cortex-a9"; reg = <0xA00>; + cooling-min-level = <13>; + cooling-max-level = <7>; + #cooling-cells = <2>; /* min followed by max */ }; cpu@A01 { diff --git a/arch/arm/boot/dts/exynos4x12.dtsi b/arch/arm/boot/dts/exynos4x12.dtsi index f5e0ae780d6c..6a6abe14fd9b 100644 --- a/arch/arm/boot/dts/exynos4x12.dtsi +++ b/arch/arm/boot/dts/exynos4x12.dtsi @@ -19,6 +19,7 @@ #include "exynos4.dtsi" #include "exynos4x12-pinctrl.dtsi" +#include "exynos4-cpu-thermal.dtsi" / { aliases { @@ -297,4 +298,15 @@ clock-names = "tmu_apbif"; status = "disabled"; }; + + hdmi: hdmi@12D00000 { + compatible = "samsung,exynos4212-hdmi"; + }; + + mixer: mixer@12C10000 { + compatible = "samsung,exynos4212-mixer"; + clock-names = "mixer", "hdmi", "sclk_hdmi", "vp"; + clocks = <&clock CLK_MIXER>, <&clock CLK_HDMI>, + <&clock CLK_SCLK_HDMI>, <&clock CLK_VP>; + }; }; diff --git a/arch/arm/boot/dts/exynos5250.dtsi b/arch/arm/boot/dts/exynos5250.dtsi index 9bb1b0b738f5..adbde1adad95 100644 --- a/arch/arm/boot/dts/exynos5250.dtsi +++ b/arch/arm/boot/dts/exynos5250.dtsi @@ -20,7 +20,7 @@ #include <dt-bindings/clock/exynos5250.h> #include "exynos5.dtsi" #include "exynos5250-pinctrl.dtsi" - +#include "exynos4-cpu-thermal.dtsi" #include <dt-bindings/clock/exynos-audss-clk.h> / { @@ -58,11 +58,14 @@ #address-cells = <1>; #size-cells = <0>; - cpu@0 { + cpu0: cpu@0 { device_type = "cpu"; compatible = "arm,cortex-a15"; reg = <0>; clock-frequency = <1700000000>; + cooling-min-level = <15>; + cooling-max-level = <9>; + #cooling-cells = <2>; /* min followed by max */ }; cpu@1 { device_type = "cpu"; @@ -102,6 +105,12 @@ #power-domain-cells = <0>; }; + pd_disp1: disp1-power-domain@100440A0 { + compatible = "samsung,exynos4210-pd"; + reg = <0x100440A0 0x20>; + #power-domain-cells = <0>; + }; + clock: clock-controller@10010000 { compatible = "samsung,exynos5250-clock"; reg = <0x10010000 0x30000>; @@ -235,12 +244,32 @@ status = "disabled"; }; - tmu@10060000 { + tmu: tmu@10060000 { compatible = "samsung,exynos5250-tmu"; reg = <0x10060000 0x100>; interrupts = <0 65 0>; clocks = <&clock CLK_TMU>; clock-names = "tmu_apbif"; + #include "exynos4412-tmu-sensor-conf.dtsi" + }; + + thermal-zones { + cpu_thermal: cpu-thermal { + polling-delay-passive = <0>; + polling-delay = <0>; + thermal-sensors = <&tmu 0>; + + cooling-maps { + map0 { + /* Corresponds to 800MHz at freq_table */ + cooling-device = <&cpu0 9 9>; + }; + map1 { + /* Corresponds to 200MHz at freq_table */ + cooling-device = <&cpu0 15 15>; + }; + }; + }; }; serial@12C00000 { @@ -719,6 +748,7 @@ hdmi: hdmi { compatible = "samsung,exynos4212-hdmi"; reg = <0x14530000 0x70000>; + power-domains = <&pd_disp1>; interrupts = <0 95 0>; clocks = <&clock CLK_HDMI>, <&clock CLK_SCLK_HDMI>, <&clock CLK_SCLK_PIXEL>, <&clock CLK_SCLK_HDMIPHY>, @@ -731,9 +761,11 @@ mixer { compatible = "samsung,exynos5250-mixer"; reg = <0x14450000 0x10000>; + power-domains = <&pd_disp1>; interrupts = <0 94 0>; - clocks = <&clock CLK_MIXER>, <&clock CLK_SCLK_HDMI>; - clock-names = "mixer", "sclk_hdmi"; + clocks = <&clock CLK_MIXER>, <&clock CLK_HDMI>, + <&clock CLK_SCLK_HDMI>; + clock-names = "mixer", "hdmi", "sclk_hdmi"; }; dp_phy: video-phy@10040720 { @@ -743,6 +775,7 @@ }; dp: dp-controller@145B0000 { + power-domains = <&pd_disp1>; clocks = <&clock CLK_DP>; clock-names = "dp"; phys = <&dp_phy>; @@ -750,6 +783,7 @@ }; fimd: fimd@14400000 { + power-domains = <&pd_disp1>; clocks = <&clock CLK_SCLK_FIMD1>, <&clock CLK_FIMD1>; clock-names = "sclk_fimd", "fimd"; }; diff --git a/arch/arm/boot/dts/exynos5420-trip-points.dtsi b/arch/arm/boot/dts/exynos5420-trip-points.dtsi new file mode 100644 index 000000000000..5d31fc140823 --- /dev/null +++ b/arch/arm/boot/dts/exynos5420-trip-points.dtsi @@ -0,0 +1,35 @@ +/* + * Device tree sources for default Exynos5420 thermal zone definition + * + * Copyright (c) 2014 Lukasz Majewski <l.majewski@samsung.com> + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + * + */ + +polling-delay-passive = <0>; +polling-delay = <0>; +trips { + cpu-alert-0 { + temperature = <85000>; /* millicelsius */ + hysteresis = <10000>; /* millicelsius */ + type = "active"; + }; + cpu-alert-1 { + temperature = <103000>; /* millicelsius */ + hysteresis = <10000>; /* millicelsius */ + type = "active"; + }; + cpu-alert-2 { + temperature = <110000>; /* millicelsius */ + hysteresis = <10000>; /* millicelsius */ + type = "active"; + }; + cpu-crit-0 { + temperature = <1200000>; /* millicelsius */ + hysteresis = <0>; /* millicelsius */ + type = "critical"; + }; +}; diff --git a/arch/arm/boot/dts/exynos5420.dtsi b/arch/arm/boot/dts/exynos5420.dtsi index 9dc2e9773b30..c0e98cf3514f 100644 --- a/arch/arm/boot/dts/exynos5420.dtsi +++ b/arch/arm/boot/dts/exynos5420.dtsi @@ -740,8 +740,9 @@ compatible = "samsung,exynos5420-mixer"; reg = <0x14450000 0x10000>; interrupts = <0 94 0>; - clocks = <&clock CLK_MIXER>, <&clock CLK_SCLK_HDMI>; - clock-names = "mixer", "sclk_hdmi"; + clocks = <&clock CLK_MIXER>, <&clock CLK_HDMI>, + <&clock CLK_SCLK_HDMI>; + clock-names = "mixer", "hdmi", "sclk_hdmi"; power-domains = <&disp_pd>; }; @@ -782,6 +783,7 @@ interrupts = <0 65 0>; clocks = <&clock CLK_TMU>; clock-names = "tmu_apbif"; + #include "exynos4412-tmu-sensor-conf.dtsi" }; tmu_cpu1: tmu@10064000 { @@ -790,6 +792,7 @@ interrupts = <0 183 0>; clocks = <&clock CLK_TMU>; clock-names = "tmu_apbif"; + #include "exynos4412-tmu-sensor-conf.dtsi" }; tmu_cpu2: tmu@10068000 { @@ -798,6 +801,7 @@ interrupts = <0 184 0>; clocks = <&clock CLK_TMU>, <&clock CLK_TMU>; clock-names = "tmu_apbif", "tmu_triminfo_apbif"; + #include "exynos4412-tmu-sensor-conf.dtsi" }; tmu_cpu3: tmu@1006c000 { @@ -806,6 +810,7 @@ interrupts = <0 185 0>; clocks = <&clock CLK_TMU>, <&clock CLK_TMU_GPU>; clock-names = "tmu_apbif", "tmu_triminfo_apbif"; + #include "exynos4412-tmu-sensor-conf.dtsi" }; tmu_gpu: tmu@100a0000 { @@ -814,6 +819,30 @@ interrupts = <0 215 0>; clocks = <&clock CLK_TMU_GPU>, <&clock CLK_TMU>; clock-names = "tmu_apbif", "tmu_triminfo_apbif"; + #include "exynos4412-tmu-sensor-conf.dtsi" + }; + + thermal-zones { + cpu0_thermal: cpu0-thermal { + thermal-sensors = <&tmu_cpu0>; + #include "exynos5420-trip-points.dtsi" + }; + cpu1_thermal: cpu1-thermal { + thermal-sensors = <&tmu_cpu1>; + #include "exynos5420-trip-points.dtsi" + }; + cpu2_thermal: cpu2-thermal { + thermal-sensors = <&tmu_cpu2>; + #include "exynos5420-trip-points.dtsi" + }; + cpu3_thermal: cpu3-thermal { + thermal-sensors = <&tmu_cpu3>; + #include "exynos5420-trip-points.dtsi" + }; + gpu_thermal: gpu-thermal { + thermal-sensors = <&tmu_gpu>; + #include "exynos5420-trip-points.dtsi" + }; }; watchdog: watchdog@101D0000 { diff --git a/arch/arm/boot/dts/exynos5440-tmu-sensor-conf.dtsi b/arch/arm/boot/dts/exynos5440-tmu-sensor-conf.dtsi new file mode 100644 index 000000000000..7b2fba0ae92b --- /dev/null +++ b/arch/arm/boot/dts/exynos5440-tmu-sensor-conf.dtsi @@ -0,0 +1,24 @@ +/* + * Device tree sources for Exynos5440 TMU sensor configuration + * + * Copyright (c) 2014 Lukasz Majewski <l.majewski@samsung.com> + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + * + */ + +#include <dt-bindings/thermal/thermal_exynos.h> + +#thermal-sensor-cells = <0>; +samsung,tmu_gain = <5>; +samsung,tmu_reference_voltage = <16>; +samsung,tmu_noise_cancel_mode = <4>; +samsung,tmu_efuse_value = <0x5d2d>; +samsung,tmu_min_efuse_value = <16>; +samsung,tmu_max_efuse_value = <76>; +samsung,tmu_first_point_trim = <25>; +samsung,tmu_second_point_trim = <70>; +samsung,tmu_default_temp_offset = <25>; +samsung,tmu_cal_type = <TYPE_ONE_POINT_TRIMMING>; diff --git a/arch/arm/boot/dts/exynos5440-trip-points.dtsi b/arch/arm/boot/dts/exynos5440-trip-points.dtsi new file mode 100644 index 000000000000..48adfa8f4300 --- /dev/null +++ b/arch/arm/boot/dts/exynos5440-trip-points.dtsi @@ -0,0 +1,25 @@ +/* + * Device tree sources for default Exynos5440 thermal zone definition + * + * Copyright (c) 2014 Lukasz Majewski <l.majewski@samsung.com> + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + * + */ + +polling-delay-passive = <0>; +polling-delay = <0>; +trips { + cpu-alert-0 { + temperature = <100000>; /* millicelsius */ + hysteresis = <0>; /* millicelsius */ + type = "active"; + }; + cpu-crit-0 { + temperature = <1050000>; /* millicelsius */ + hysteresis = <0>; /* millicelsius */ + type = "critical"; + }; +}; diff --git a/arch/arm/boot/dts/exynos5440.dtsi b/arch/arm/boot/dts/exynos5440.dtsi index 8f3373cd7b87..59d9416b3b03 100644 --- a/arch/arm/boot/dts/exynos5440.dtsi +++ b/arch/arm/boot/dts/exynos5440.dtsi @@ -219,6 +219,7 @@ interrupts = <0 58 0>; clocks = <&clock CLK_B_125>; clock-names = "tmu_apbif"; + #include "exynos5440-tmu-sensor-conf.dtsi" }; tmuctrl_1: tmuctrl@16011C { @@ -227,6 +228,7 @@ interrupts = <0 58 0>; clocks = <&clock CLK_B_125>; clock-names = "tmu_apbif"; + #include "exynos5440-tmu-sensor-conf.dtsi" }; tmuctrl_2: tmuctrl@160120 { @@ -235,6 +237,22 @@ interrupts = <0 58 0>; clocks = <&clock CLK_B_125>; clock-names = "tmu_apbif"; + #include "exynos5440-tmu-sensor-conf.dtsi" + }; + + thermal-zones { + cpu0_thermal: cpu0-thermal { + thermal-sensors = <&tmuctrl_0>; + #include "exynos5440-trip-points.dtsi" + }; + cpu1_thermal: cpu1-thermal { + thermal-sensors = <&tmuctrl_1>; + #include "exynos5440-trip-points.dtsi" + }; + cpu2_thermal: cpu2-thermal { + thermal-sensors = <&tmuctrl_2>; + #include "exynos5440-trip-points.dtsi" + }; }; sata@210000 { diff --git a/arch/arm/boot/dts/imx6qdl-sabresd.dtsi b/arch/arm/boot/dts/imx6qdl-sabresd.dtsi index f1cd2147421d..a626e6dd8022 100644 --- a/arch/arm/boot/dts/imx6qdl-sabresd.dtsi +++ b/arch/arm/boot/dts/imx6qdl-sabresd.dtsi @@ -35,6 +35,7 @@ regulator-max-microvolt = <5000000>; gpio = <&gpio3 22 0>; enable-active-high; + vin-supply = <&swbst_reg>; }; reg_usb_h1_vbus: regulator@1 { @@ -45,6 +46,7 @@ regulator-max-microvolt = <5000000>; gpio = <&gpio1 29 0>; enable-active-high; + vin-supply = <&swbst_reg>; }; reg_audio: regulator@2 { diff --git a/arch/arm/boot/dts/imx6sl-evk.dts b/arch/arm/boot/dts/imx6sl-evk.dts index fda4932faefd..945887d3fdb3 100644 --- a/arch/arm/boot/dts/imx6sl-evk.dts +++ b/arch/arm/boot/dts/imx6sl-evk.dts @@ -52,6 +52,7 @@ regulator-max-microvolt = <5000000>; gpio = <&gpio4 0 0>; enable-active-high; + vin-supply = <&swbst_reg>; }; reg_usb_otg2_vbus: regulator@1 { @@ -62,6 +63,7 @@ regulator-max-microvolt = <5000000>; gpio = <&gpio4 2 0>; enable-active-high; + vin-supply = <&swbst_reg>; }; reg_aud3v: regulator@2 { diff --git a/arch/arm/boot/dts/omap3.dtsi b/arch/arm/boot/dts/omap3.dtsi index f4f78c40b564..3fdc84fddb70 100644 --- a/arch/arm/boot/dts/omap3.dtsi +++ b/arch/arm/boot/dts/omap3.dtsi @@ -92,6 +92,8 @@ ti,hwmods = "aes"; reg = <0x480c5000 0x50>; interrupts = <0>; + dmas = <&sdma 65 &sdma 66>; + dma-names = "tx", "rx"; }; prm: prm@48306000 { @@ -550,6 +552,8 @@ ti,hwmods = "sham"; reg = <0x480c3000 0x64>; interrupts = <49>; + dmas = <&sdma 69>; + dma-names = "rx"; }; smartreflex_core: smartreflex@480cb000 { diff --git a/arch/arm/boot/dts/omap5-core-thermal.dtsi b/arch/arm/boot/dts/omap5-core-thermal.dtsi index 19212ac6eef0..de8a3d456cf7 100644 --- a/arch/arm/boot/dts/omap5-core-thermal.dtsi +++ b/arch/arm/boot/dts/omap5-core-thermal.dtsi @@ -13,7 +13,7 @@ core_thermal: core_thermal { polling-delay-passive = <250>; /* milliseconds */ - polling-delay = <1000>; /* milliseconds */ + polling-delay = <500>; /* milliseconds */ /* sensor ID */ thermal-sensors = <&bandgap 2>; diff --git a/arch/arm/boot/dts/omap5-gpu-thermal.dtsi b/arch/arm/boot/dts/omap5-gpu-thermal.dtsi index 1b87aca88b77..bc3090f2e84b 100644 --- a/arch/arm/boot/dts/omap5-gpu-thermal.dtsi +++ b/arch/arm/boot/dts/omap5-gpu-thermal.dtsi @@ -13,7 +13,7 @@ gpu_thermal: gpu_thermal { polling-delay-passive = <250>; /* milliseconds */ - polling-delay = <1000>; /* milliseconds */ + polling-delay = <500>; /* milliseconds */ /* sensor ID */ thermal-sensors = <&bandgap 1>; diff --git a/arch/arm/boot/dts/omap5.dtsi b/arch/arm/boot/dts/omap5.dtsi index ddff674bd05e..4a485b63a141 100644 --- a/arch/arm/boot/dts/omap5.dtsi +++ b/arch/arm/boot/dts/omap5.dtsi @@ -1079,4 +1079,8 @@ }; }; +&cpu_thermal { + polling-delay = <500>; /* milliseconds */ +}; + /include/ "omap54xx-clocks.dtsi" diff --git a/arch/arm/boot/dts/omap54xx-clocks.dtsi b/arch/arm/boot/dts/omap54xx-clocks.dtsi index 58c27466f012..83b425fb3ac2 100644 --- a/arch/arm/boot/dts/omap54xx-clocks.dtsi +++ b/arch/arm/boot/dts/omap54xx-clocks.dtsi @@ -167,10 +167,18 @@ ti,index-starts-at-one; }; + dpll_core_byp_mux: dpll_core_byp_mux { + #clock-cells = <0>; + compatible = "ti,mux-clock"; + clocks = <&sys_clkin>, <&dpll_abe_m3x2_ck>; + ti,bit-shift = <23>; + reg = <0x012c>; + }; + dpll_core_ck: dpll_core_ck { #clock-cells = <0>; compatible = "ti,omap4-dpll-core-clock"; - clocks = <&sys_clkin>, <&dpll_abe_m3x2_ck>; + clocks = <&sys_clkin>, <&dpll_core_byp_mux>; reg = <0x0120>, <0x0124>, <0x012c>, <0x0128>; }; @@ -294,10 +302,18 @@ clock-div = <1>; }; + dpll_iva_byp_mux: dpll_iva_byp_mux { + #clock-cells = <0>; + compatible = "ti,mux-clock"; + clocks = <&sys_clkin>, <&iva_dpll_hs_clk_div>; + ti,bit-shift = <23>; + reg = <0x01ac>; + }; + dpll_iva_ck: dpll_iva_ck { #clock-cells = <0>; compatible = "ti,omap4-dpll-clock"; - clocks = <&sys_clkin>, <&iva_dpll_hs_clk_div>; + clocks = <&sys_clkin>, <&dpll_iva_byp_mux>; reg = <0x01a0>, <0x01a4>, <0x01ac>, <0x01a8>; }; @@ -599,10 +615,19 @@ }; }; &cm_core_clocks { + + dpll_per_byp_mux: dpll_per_byp_mux { + #clock-cells = <0>; + compatible = "ti,mux-clock"; + clocks = <&sys_clkin>, <&per_dpll_hs_clk_div>; + ti,bit-shift = <23>; + reg = <0x014c>; + }; + dpll_per_ck: dpll_per_ck { #clock-cells = <0>; compatible = "ti,omap4-dpll-clock"; - clocks = <&sys_clkin>, <&per_dpll_hs_clk_div>; + clocks = <&sys_clkin>, <&dpll_per_byp_mux>; reg = <0x0140>, <0x0144>, <0x014c>, <0x0148>; }; @@ -714,10 +739,18 @@ ti,index-starts-at-one; }; + dpll_usb_byp_mux: dpll_usb_byp_mux { + #clock-cells = <0>; + compatible = "ti,mux-clock"; + clocks = <&sys_clkin>, <&usb_dpll_hs_clk_div>; + ti,bit-shift = <23>; + reg = <0x018c>; + }; + dpll_usb_ck: dpll_usb_ck { #clock-cells = <0>; compatible = "ti,omap4-dpll-j-type-clock"; - clocks = <&sys_clkin>, <&usb_dpll_hs_clk_div>; + clocks = <&sys_clkin>, <&dpll_usb_byp_mux>; reg = <0x0180>, <0x0184>, <0x018c>, <0x0188>; }; diff --git a/arch/arm/boot/dts/rk3288.dtsi b/arch/arm/boot/dts/rk3288.dtsi index d771f687a13b..eccc78d3220b 100644 --- a/arch/arm/boot/dts/rk3288.dtsi +++ b/arch/arm/boot/dts/rk3288.dtsi @@ -411,6 +411,7 @@ "mac_clk_rx", "mac_clk_tx", "clk_mac_ref", "clk_mac_refout", "aclk_mac", "pclk_mac"; + status = "disabled"; }; usb_host0_ehci: usb@ff500000 { diff --git a/arch/arm/boot/dts/sama5d3.dtsi b/arch/arm/boot/dts/sama5d3.dtsi index 261311bdf65b..367af53c1b84 100644 --- a/arch/arm/boot/dts/sama5d3.dtsi +++ b/arch/arm/boot/dts/sama5d3.dtsi @@ -1248,7 +1248,6 @@ atmel,watchdog-type = "hardware"; atmel,reset-type = "all"; atmel,dbg-halt; - atmel,idle-halt; status = "disabled"; }; @@ -1416,7 +1415,7 @@ compatible = "atmel,at91sam9g45-ehci", "usb-ehci"; reg = <0x00700000 0x100000>; interrupts = <32 IRQ_TYPE_LEVEL_HIGH 2>; - clocks = <&usb>, <&uhphs_clk>, <&uhpck>; + clocks = <&utmi>, <&uhphs_clk>, <&uhpck>; clock-names = "usb_clk", "ehci_clk", "uhpck"; status = "disabled"; }; diff --git a/arch/arm/boot/dts/sama5d4.dtsi b/arch/arm/boot/dts/sama5d4.dtsi index d986b41b9654..4303874889c6 100644 --- a/arch/arm/boot/dts/sama5d4.dtsi +++ b/arch/arm/boot/dts/sama5d4.dtsi @@ -66,6 +66,7 @@ gpio4 = &pioE; tcb0 = &tcb0; tcb1 = &tcb1; + i2c0 = &i2c0; i2c2 = &i2c2; }; cpus { @@ -259,7 +260,7 @@ compatible = "atmel,at91sam9g45-ehci", "usb-ehci"; reg = <0x00600000 0x100000>; interrupts = <46 IRQ_TYPE_LEVEL_HIGH 2>; - clocks = <&usb>, <&uhphs_clk>, <&uhpck>; + clocks = <&utmi>, <&uhphs_clk>, <&uhpck>; clock-names = "usb_clk", "ehci_clk", "uhpck"; status = "disabled"; }; @@ -461,8 +462,8 @@ lcdck: lcdck { #clock-cells = <0>; - reg = <4>; - clocks = <&smd>; + reg = <3>; + clocks = <&mck>; }; smdck: smdck { @@ -770,7 +771,7 @@ reg = <50>; }; - lcd_clk: lcd_clk { + lcdc_clk: lcdc_clk { #clock-cells = <0>; reg = <51>; }; diff --git a/arch/arm/boot/dts/socfpga.dtsi b/arch/arm/boot/dts/socfpga.dtsi index 252c3d1bda50..d9176e606173 100644 --- a/arch/arm/boot/dts/socfpga.dtsi +++ b/arch/arm/boot/dts/socfpga.dtsi @@ -660,7 +660,7 @@ #address-cells = <1>; #size-cells = <0>; reg = <0xfff01000 0x1000>; - interrupts = <0 156 4>; + interrupts = <0 155 4>; num-cs = <4>; clocks = <&spi_m_clk>; status = "disabled"; @@ -713,6 +713,9 @@ reg-shift = <2>; reg-io-width = <4>; clocks = <&l4_sp_clk>; + dmas = <&pdma 28>, + <&pdma 29>; + dma-names = "tx", "rx"; }; uart1: serial1@ffc03000 { @@ -722,6 +725,9 @@ reg-shift = <2>; reg-io-width = <4>; clocks = <&l4_sp_clk>; + dmas = <&pdma 30>, + <&pdma 31>; + dma-names = "tx", "rx"; }; rst: rstmgr@ffd05000 { diff --git a/arch/arm/boot/dts/sun4i-a10-olinuxino-lime.dts b/arch/arm/boot/dts/sun4i-a10-olinuxino-lime.dts index ab7891c43231..75742f8f96f3 100644 --- a/arch/arm/boot/dts/sun4i-a10-olinuxino-lime.dts +++ b/arch/arm/boot/dts/sun4i-a10-olinuxino-lime.dts @@ -56,6 +56,22 @@ model = "Olimex A10-OLinuXino-LIME"; compatible = "olimex,a10-olinuxino-lime", "allwinner,sun4i-a10"; + cpus { + cpu0: cpu@0 { + /* + * The A10-Lime is known to be unstable + * when running at 1008 MHz + */ + operating-points = < + /* kHz uV */ + 912000 1350000 + 864000 1300000 + 624000 1250000 + >; + cooling-max-level = <2>; + }; + }; + soc@01c00000 { emac: ethernet@01c0b000 { pinctrl-names = "default"; diff --git a/arch/arm/boot/dts/sun4i-a10.dtsi b/arch/arm/boot/dts/sun4i-a10.dtsi index 5c2925831f20..eebb7853e00b 100644 --- a/arch/arm/boot/dts/sun4i-a10.dtsi +++ b/arch/arm/boot/dts/sun4i-a10.dtsi @@ -75,7 +75,6 @@ clock-latency = <244144>; /* 8 32k periods */ operating-points = < /* kHz uV */ - 1056000 1500000 1008000 1400000 912000 1350000 864000 1300000 @@ -83,7 +82,7 @@ >; #cooling-cells = <2>; cooling-min-level = <0>; - cooling-max-level = <4>; + cooling-max-level = <3>; }; }; diff --git a/arch/arm/boot/dts/sun5i-a13.dtsi b/arch/arm/boot/dts/sun5i-a13.dtsi index f8818f1edbbe..883cb4873688 100644 --- a/arch/arm/boot/dts/sun5i-a13.dtsi +++ b/arch/arm/boot/dts/sun5i-a13.dtsi @@ -47,7 +47,6 @@ clock-latency = <244144>; /* 8 32k periods */ operating-points = < /* kHz uV */ - 1104000 1500000 1008000 1400000 912000 1350000 864000 1300000 @@ -57,7 +56,7 @@ >; #cooling-cells = <2>; cooling-min-level = <0>; - cooling-max-level = <6>; + cooling-max-level = <5>; }; }; diff --git a/arch/arm/boot/dts/sun7i-a20.dtsi b/arch/arm/boot/dts/sun7i-a20.dtsi index 3a8530b79f1c..fdd181792b4b 100644 --- a/arch/arm/boot/dts/sun7i-a20.dtsi +++ b/arch/arm/boot/dts/sun7i-a20.dtsi @@ -105,7 +105,6 @@ clock-latency = <244144>; /* 8 32k periods */ operating-points = < /* kHz uV */ - 1008000 1450000 960000 1400000 912000 1400000 864000 1300000 @@ -116,7 +115,7 @@ >; #cooling-cells = <2>; cooling-min-level = <0>; - cooling-max-level = <7>; + cooling-max-level = <6>; }; cpu@1 { diff --git a/arch/arm/common/bL_switcher.c b/arch/arm/common/bL_switcher.c index 6eaddc47c43d..37dc0fe1093f 100644 --- a/arch/arm/common/bL_switcher.c +++ b/arch/arm/common/bL_switcher.c @@ -151,8 +151,6 @@ static int bL_switch_to(unsigned int new_cluster_id) unsigned int mpidr, this_cpu, that_cpu; unsigned int ob_mpidr, ob_cpu, ob_cluster, ib_mpidr, ib_cpu, ib_cluster; struct completion inbound_alive; - struct tick_device *tdev; - enum clock_event_mode tdev_mode; long volatile *handshake_ptr; int ipi_nr, ret; @@ -219,13 +217,7 @@ static int bL_switch_to(unsigned int new_cluster_id) /* redirect GIC's SGIs to our counterpart */ gic_migrate_target(bL_gic_id[ib_cpu][ib_cluster]); - tdev = tick_get_device(this_cpu); - if (tdev && !cpumask_equal(tdev->evtdev->cpumask, cpumask_of(this_cpu))) - tdev = NULL; - if (tdev) { - tdev_mode = tdev->evtdev->mode; - clockevents_set_mode(tdev->evtdev, CLOCK_EVT_MODE_SHUTDOWN); - } + tick_suspend_local(); ret = cpu_pm_enter(); @@ -251,11 +243,7 @@ static int bL_switch_to(unsigned int new_cluster_id) ret = cpu_pm_exit(); - if (tdev) { - clockevents_set_mode(tdev->evtdev, tdev_mode); - clockevents_program_event(tdev->evtdev, - tdev->evtdev->next_event, 1); - } + tick_resume_local(); trace_cpu_migrate_finish(ktime_get_real_ns(), ib_mpidr); local_fiq_enable(); diff --git a/arch/arm/configs/at91_dt_defconfig b/arch/arm/configs/at91_dt_defconfig index f2670f638e97..811e72bbe642 100644 --- a/arch/arm/configs/at91_dt_defconfig +++ b/arch/arm/configs/at91_dt_defconfig @@ -70,6 +70,7 @@ CONFIG_SCSI=y CONFIG_BLK_DEV_SD=y # CONFIG_SCSI_LOWLEVEL is not set CONFIG_NETDEVICES=y +CONFIG_ARM_AT91_ETHER=y CONFIG_MACB=y # CONFIG_NET_VENDOR_BROADCOM is not set CONFIG_DM9000=y diff --git a/arch/arm/configs/multi_v7_defconfig b/arch/arm/configs/multi_v7_defconfig index b7e6b6fba5e0..06075b6d2463 100644 --- a/arch/arm/configs/multi_v7_defconfig +++ b/arch/arm/configs/multi_v7_defconfig @@ -99,7 +99,7 @@ CONFIG_PCI_RCAR_GEN2=y CONFIG_PCI_RCAR_GEN2_PCIE=y CONFIG_PCIEPORTBUS=y CONFIG_SMP=y -CONFIG_NR_CPUS=8 +CONFIG_NR_CPUS=16 CONFIG_HIGHPTE=y CONFIG_CMA=y CONFIG_ARM_APPENDED_DTB=y diff --git a/arch/arm/configs/omap2plus_defconfig b/arch/arm/configs/omap2plus_defconfig index a097cffa1231..8e108599e1af 100644 --- a/arch/arm/configs/omap2plus_defconfig +++ b/arch/arm/configs/omap2plus_defconfig @@ -377,6 +377,7 @@ CONFIG_PWM_TWL=m CONFIG_PWM_TWL_LED=m CONFIG_OMAP_USB2=m CONFIG_TI_PIPE3=y +CONFIG_TWL4030_USB=m CONFIG_EXT2_FS=y CONFIG_EXT3_FS=y # CONFIG_EXT3_FS_XATTR is not set diff --git a/arch/arm/configs/sama5_defconfig b/arch/arm/configs/sama5_defconfig index 41d856effe6c..510c747c65b4 100644 --- a/arch/arm/configs/sama5_defconfig +++ b/arch/arm/configs/sama5_defconfig @@ -3,8 +3,6 @@ CONFIG_SYSVIPC=y CONFIG_IRQ_DOMAIN_DEBUG=y CONFIG_LOG_BUF_SHIFT=14 -CONFIG_SYSFS_DEPRECATED=y -CONFIG_SYSFS_DEPRECATED_V2=y CONFIG_BLK_DEV_INITRD=y CONFIG_EMBEDDED=y CONFIG_SLAB=y diff --git a/arch/arm/configs/sunxi_defconfig b/arch/arm/configs/sunxi_defconfig index 38840a812924..8f6a5702b696 100644 --- a/arch/arm/configs/sunxi_defconfig +++ b/arch/arm/configs/sunxi_defconfig @@ -4,6 +4,7 @@ CONFIG_BLK_DEV_INITRD=y CONFIG_PERF_EVENTS=y CONFIG_ARCH_SUNXI=y CONFIG_SMP=y +CONFIG_NR_CPUS=8 CONFIG_AEABI=y CONFIG_HIGHMEM=y CONFIG_HIGHPTE=y diff --git a/arch/arm/configs/vexpress_defconfig b/arch/arm/configs/vexpress_defconfig index f489fdaa19b8..37fe607a4ede 100644 --- a/arch/arm/configs/vexpress_defconfig +++ b/arch/arm/configs/vexpress_defconfig @@ -118,8 +118,8 @@ CONFIG_HID_ZEROPLUS=y CONFIG_USB=y CONFIG_USB_ANNOUNCE_NEW_DEVICES=y CONFIG_USB_MON=y -CONFIG_USB_ISP1760_HCD=y CONFIG_USB_STORAGE=y +CONFIG_USB_ISP1760=y CONFIG_MMC=y CONFIG_MMC_ARMMMCI=y CONFIG_NEW_LEDS=y diff --git a/arch/arm/crypto/aesbs-core.S_shipped b/arch/arm/crypto/aesbs-core.S_shipped index 71e5fc7cfb18..1d1800f71c5b 100644 --- a/arch/arm/crypto/aesbs-core.S_shipped +++ b/arch/arm/crypto/aesbs-core.S_shipped @@ -58,14 +58,18 @@ # define VFP_ABI_FRAME 0 # define BSAES_ASM_EXTENDED_KEY # define XTS_CHAIN_TWEAK -# define __ARM_ARCH__ 7 +# define __ARM_ARCH__ __LINUX_ARM_ARCH__ +# define __ARM_MAX_ARCH__ 7 #endif #ifdef __thumb__ # define adrl adr #endif -#if __ARM_ARCH__>=7 +#if __ARM_MAX_ARCH__>=7 +.arch armv7-a +.fpu neon + .text .syntax unified @ ARMv7-capable assembler is expected to handle this #ifdef __thumb2__ @@ -74,8 +78,6 @@ .code 32 #endif -.fpu neon - .type _bsaes_decrypt8,%function .align 4 _bsaes_decrypt8: @@ -2095,9 +2097,11 @@ bsaes_xts_decrypt: vld1.8 {q8}, [r0] @ initial tweak adr r2, .Lxts_magic +#ifndef XTS_CHAIN_TWEAK tst r9, #0xf @ if not multiple of 16 it ne @ Thumb2 thing, sanity check in ARM subne r9, #0x10 @ subtract another 16 bytes +#endif subs r9, #0x80 blo .Lxts_dec_short diff --git a/arch/arm/crypto/bsaes-armv7.pl b/arch/arm/crypto/bsaes-armv7.pl index be068db960ee..a4d3856e7d24 100644 --- a/arch/arm/crypto/bsaes-armv7.pl +++ b/arch/arm/crypto/bsaes-armv7.pl @@ -701,14 +701,18 @@ $code.=<<___; # define VFP_ABI_FRAME 0 # define BSAES_ASM_EXTENDED_KEY # define XTS_CHAIN_TWEAK -# define __ARM_ARCH__ 7 +# define __ARM_ARCH__ __LINUX_ARM_ARCH__ +# define __ARM_MAX_ARCH__ 7 #endif #ifdef __thumb__ # define adrl adr #endif -#if __ARM_ARCH__>=7 +#if __ARM_MAX_ARCH__>=7 +.arch armv7-a +.fpu neon + .text .syntax unified @ ARMv7-capable assembler is expected to handle this #ifdef __thumb2__ @@ -717,8 +721,6 @@ $code.=<<___; .code 32 #endif -.fpu neon - .type _bsaes_decrypt8,%function .align 4 _bsaes_decrypt8: @@ -2076,9 +2078,11 @@ bsaes_xts_decrypt: vld1.8 {@XMM[8]}, [r0] @ initial tweak adr $magic, .Lxts_magic +#ifndef XTS_CHAIN_TWEAK tst $len, #0xf @ if not multiple of 16 it ne @ Thumb2 thing, sanity check in ARM subne $len, #0x10 @ subtract another 16 bytes +#endif subs $len, #0x80 blo .Lxts_dec_short diff --git a/arch/arm/include/asm/jump_label.h b/arch/arm/include/asm/jump_label.h index 70f9b9bfb1f9..5f337dc5c108 100644 --- a/arch/arm/include/asm/jump_label.h +++ b/arch/arm/include/asm/jump_label.h @@ -1,7 +1,7 @@ #ifndef _ASM_ARM_JUMP_LABEL_H #define _ASM_ARM_JUMP_LABEL_H -#ifdef __KERNEL__ +#ifndef __ASSEMBLY__ #include <linux/types.h> @@ -27,8 +27,6 @@ l_yes: return true; } -#endif /* __KERNEL__ */ - typedef u32 jump_label_t; struct jump_entry { @@ -37,4 +35,5 @@ struct jump_entry { jump_label_t key; }; +#endif /* __ASSEMBLY__ */ #endif diff --git a/arch/arm/include/asm/kvm_arm.h b/arch/arm/include/asm/kvm_arm.h index 816db0bf2dd8..d995821f1698 100644 --- a/arch/arm/include/asm/kvm_arm.h +++ b/arch/arm/include/asm/kvm_arm.h @@ -185,6 +185,7 @@ #define HSR_COND (0xfU << HSR_COND_SHIFT) #define FSC_FAULT (0x04) +#define FSC_ACCESS (0x08) #define FSC_PERM (0x0c) /* Hyp Prefetch Fault Address Register (HPFAR/HDFAR) */ diff --git a/arch/arm/include/asm/kvm_host.h b/arch/arm/include/asm/kvm_host.h index 41008cd7c53f..d71607c16601 100644 --- a/arch/arm/include/asm/kvm_host.h +++ b/arch/arm/include/asm/kvm_host.h @@ -27,6 +27,8 @@ #include <asm/fpstate.h> #include <kvm/arm_arch_timer.h> +#define __KVM_HAVE_ARCH_INTC_INITIALIZED + #if defined(CONFIG_KVM_ARM_MAX_VCPUS) #define KVM_MAX_VCPUS CONFIG_KVM_ARM_MAX_VCPUS #else @@ -165,19 +167,10 @@ void kvm_set_spte_hva(struct kvm *kvm, unsigned long hva, pte_t pte); unsigned long kvm_arm_num_regs(struct kvm_vcpu *vcpu); int kvm_arm_copy_reg_indices(struct kvm_vcpu *vcpu, u64 __user *indices); +int kvm_age_hva(struct kvm *kvm, unsigned long start, unsigned long end); +int kvm_test_age_hva(struct kvm *kvm, unsigned long hva); /* We do not have shadow page tables, hence the empty hooks */ -static inline int kvm_age_hva(struct kvm *kvm, unsigned long start, - unsigned long end) -{ - return 0; -} - -static inline int kvm_test_age_hva(struct kvm *kvm, unsigned long hva) -{ - return 0; -} - static inline void kvm_arch_mmu_notifier_invalidate_page(struct kvm *kvm, unsigned long address) { diff --git a/arch/arm/include/asm/kvm_mmio.h b/arch/arm/include/asm/kvm_mmio.h index 3f83db2f6cf0..d8e90c8cb5fa 100644 --- a/arch/arm/include/asm/kvm_mmio.h +++ b/arch/arm/include/asm/kvm_mmio.h @@ -28,28 +28,6 @@ struct kvm_decode { bool sign_extend; }; -/* - * The in-kernel MMIO emulation code wants to use a copy of run->mmio, - * which is an anonymous type. Use our own type instead. - */ -struct kvm_exit_mmio { - phys_addr_t phys_addr; - u8 data[8]; - u32 len; - bool is_write; - void *private; -}; - -static inline void kvm_prepare_mmio(struct kvm_run *run, - struct kvm_exit_mmio *mmio) -{ - run->mmio.phys_addr = mmio->phys_addr; - run->mmio.len = mmio->len; - run->mmio.is_write = mmio->is_write; - memcpy(run->mmio.data, mmio->data, mmio->len); - run->exit_reason = KVM_EXIT_MMIO; -} - int kvm_handle_mmio_return(struct kvm_vcpu *vcpu, struct kvm_run *run); int io_mem_abort(struct kvm_vcpu *vcpu, struct kvm_run *run, phys_addr_t fault_ipa); diff --git a/arch/arm/include/asm/kvm_mmu.h b/arch/arm/include/asm/kvm_mmu.h index 37ca2a4c6f09..4cf48c3aca13 100644 --- a/arch/arm/include/asm/kvm_mmu.h +++ b/arch/arm/include/asm/kvm_mmu.h @@ -149,29 +149,28 @@ static inline bool kvm_s2pmd_readonly(pmd_t *pmd) (__boundary - 1 < (end) - 1)? __boundary: (end); \ }) +#define kvm_pgd_index(addr) pgd_index(addr) + static inline bool kvm_page_empty(void *ptr) { struct page *ptr_page = virt_to_page(ptr); return page_count(ptr_page) == 1; } - #define kvm_pte_table_empty(kvm, ptep) kvm_page_empty(ptep) #define kvm_pmd_table_empty(kvm, pmdp) kvm_page_empty(pmdp) #define kvm_pud_table_empty(kvm, pudp) (0) #define KVM_PREALLOC_LEVEL 0 -static inline int kvm_prealloc_hwpgd(struct kvm *kvm, pgd_t *pgd) +static inline void *kvm_get_hwpgd(struct kvm *kvm) { - return 0; + return kvm->arch.pgd; } -static inline void kvm_free_hwpgd(struct kvm *kvm) { } - -static inline void *kvm_get_hwpgd(struct kvm *kvm) +static inline unsigned int kvm_get_hwpgd_size(void) { - return kvm->arch.pgd; + return PTRS_PER_S2_PGD * sizeof(pgd_t); } struct kvm; @@ -207,7 +206,7 @@ static inline void __coherent_cache_guest_page(struct kvm_vcpu *vcpu, pfn_t pfn, bool need_flush = !vcpu_has_cache_enabled(vcpu) || ipa_uncached; - VM_BUG_ON(size & PAGE_MASK); + VM_BUG_ON(size & ~PAGE_MASK); if (!need_flush && !icache_is_pipt()) goto vipt_cache; diff --git a/arch/arm/include/asm/mach/time.h b/arch/arm/include/asm/mach/time.h index 90c12e1e695c..0f79e4dec7f9 100644 --- a/arch/arm/include/asm/mach/time.h +++ b/arch/arm/include/asm/mach/time.h @@ -12,8 +12,7 @@ extern void timer_tick(void); -struct timespec; -typedef void (*clock_access_fn)(struct timespec *); +typedef void (*clock_access_fn)(struct timespec64 *); extern int register_persistent_clock(clock_access_fn read_boot, clock_access_fn read_persistent); diff --git a/arch/arm/include/debug/at91.S b/arch/arm/include/debug/at91.S index 80a6501b4d50..c3c45e628e33 100644 --- a/arch/arm/include/debug/at91.S +++ b/arch/arm/include/debug/at91.S @@ -18,8 +18,11 @@ #define AT91_DBGU 0xfc00c000 /* SAMA5D4_BASE_USART3 */ #endif -/* Keep in sync with mach-at91/include/mach/hardware.h */ +#ifdef CONFIG_MMU #define AT91_IO_P2V(x) ((x) - 0x01000000) +#else +#define AT91_IO_P2V(x) (x) +#endif #define AT91_DBGU_SR (0x14) /* Status Register */ #define AT91_DBGU_THR (0x1c) /* Transmitter Holding Register */ diff --git a/arch/arm/include/uapi/asm/kvm.h b/arch/arm/include/uapi/asm/kvm.h index 0db25bc32864..2499867dd0d8 100644 --- a/arch/arm/include/uapi/asm/kvm.h +++ b/arch/arm/include/uapi/asm/kvm.h @@ -198,6 +198,9 @@ struct kvm_arch_memory_slot { /* Highest supported SPI, from VGIC_NR_IRQS */ #define KVM_ARM_IRQ_GIC_MAX 127 +/* One single KVM irqchip, ie. the VGIC */ +#define KVM_NR_IRQCHIPS 1 + /* PSCI interface */ #define KVM_PSCI_FN_BASE 0x95c1ba5e #define KVM_PSCI_FN(n) (KVM_PSCI_FN_BASE + (n)) diff --git a/arch/arm/kernel/asm-offsets.c b/arch/arm/kernel/asm-offsets.c index 2d2d6087b9b1..488eaac56028 100644 --- a/arch/arm/kernel/asm-offsets.c +++ b/arch/arm/kernel/asm-offsets.c @@ -190,7 +190,6 @@ int main(void) DEFINE(VCPU_HxFAR, offsetof(struct kvm_vcpu, arch.fault.hxfar)); DEFINE(VCPU_HPFAR, offsetof(struct kvm_vcpu, arch.fault.hpfar)); DEFINE(VCPU_HYP_PC, offsetof(struct kvm_vcpu, arch.fault.hyp_pc)); -#ifdef CONFIG_KVM_ARM_VGIC DEFINE(VCPU_VGIC_CPU, offsetof(struct kvm_vcpu, arch.vgic_cpu)); DEFINE(VGIC_V2_CPU_HCR, offsetof(struct vgic_cpu, vgic_v2.vgic_hcr)); DEFINE(VGIC_V2_CPU_VMCR, offsetof(struct vgic_cpu, vgic_v2.vgic_vmcr)); @@ -200,14 +199,11 @@ int main(void) DEFINE(VGIC_V2_CPU_APR, offsetof(struct vgic_cpu, vgic_v2.vgic_apr)); DEFINE(VGIC_V2_CPU_LR, offsetof(struct vgic_cpu, vgic_v2.vgic_lr)); DEFINE(VGIC_CPU_NR_LR, offsetof(struct vgic_cpu, nr_lr)); -#ifdef CONFIG_KVM_ARM_TIMER DEFINE(VCPU_TIMER_CNTV_CTL, offsetof(struct kvm_vcpu, arch.timer_cpu.cntv_ctl)); DEFINE(VCPU_TIMER_CNTV_CVAL, offsetof(struct kvm_vcpu, arch.timer_cpu.cntv_cval)); DEFINE(KVM_TIMER_CNTVOFF, offsetof(struct kvm, arch.timer.cntvoff)); DEFINE(KVM_TIMER_ENABLED, offsetof(struct kvm, arch.timer.enabled)); -#endif DEFINE(KVM_VGIC_VCTRL, offsetof(struct kvm, arch.vgic.vctrl_base)); -#endif DEFINE(KVM_VTTBR, offsetof(struct kvm, arch.vttbr)); #endif return 0; diff --git a/arch/arm/kernel/setup.c b/arch/arm/kernel/setup.c index e55408e96559..1d60bebea4b8 100644 --- a/arch/arm/kernel/setup.c +++ b/arch/arm/kernel/setup.c @@ -246,12 +246,9 @@ static int __get_cpu_architecture(void) if (cpu_arch) cpu_arch += CPU_ARCH_ARMv3; } else if ((read_cpuid_id() & 0x000f0000) == 0x000f0000) { - unsigned int mmfr0; - /* Revised CPUID format. Read the Memory Model Feature * Register 0 and check for VMSAv7 or PMSAv7 */ - asm("mrc p15, 0, %0, c0, c1, 4" - : "=r" (mmfr0)); + unsigned int mmfr0 = read_cpuid_ext(CPUID_EXT_MMFR0); if ((mmfr0 & 0x0000000f) >= 0x00000003 || (mmfr0 & 0x000000f0) >= 0x00000030) cpu_arch = CPU_ARCH_ARMv7; diff --git a/arch/arm/kernel/time.c b/arch/arm/kernel/time.c index 0cc7e58c47cc..a66e37e211a9 100644 --- a/arch/arm/kernel/time.c +++ b/arch/arm/kernel/time.c @@ -76,7 +76,7 @@ void timer_tick(void) } #endif -static void dummy_clock_access(struct timespec *ts) +static void dummy_clock_access(struct timespec64 *ts) { ts->tv_sec = 0; ts->tv_nsec = 0; @@ -85,12 +85,12 @@ static void dummy_clock_access(struct timespec *ts) static clock_access_fn __read_persistent_clock = dummy_clock_access; static clock_access_fn __read_boot_clock = dummy_clock_access;; -void read_persistent_clock(struct timespec *ts) +void read_persistent_clock64(struct timespec64 *ts) { __read_persistent_clock(ts); } -void read_boot_clock(struct timespec *ts) +void read_boot_clock64(struct timespec64 *ts) { __read_boot_clock(ts); } diff --git a/arch/arm/kvm/Kconfig b/arch/arm/kvm/Kconfig index 338ace78ed18..f1f79d104309 100644 --- a/arch/arm/kvm/Kconfig +++ b/arch/arm/kvm/Kconfig @@ -18,6 +18,7 @@ if VIRTUALIZATION config KVM bool "Kernel-based Virtual Machine (KVM) support" + depends on MMU && OF select PREEMPT_NOTIFIERS select ANON_INODES select HAVE_KVM_CPU_RELAX_INTERCEPT @@ -26,10 +27,12 @@ config KVM select KVM_ARM_HOST select KVM_GENERIC_DIRTYLOG_READ_PROTECT select SRCU - depends on ARM_VIRT_EXT && ARM_LPAE + select MMU_NOTIFIER + select HAVE_KVM_EVENTFD + select HAVE_KVM_IRQFD + depends on ARM_VIRT_EXT && ARM_LPAE && ARM_ARCH_TIMER ---help--- - Support hosting virtualized guest machines. You will also - need to select one or more of the processor modules below. + Support hosting virtualized guest machines. This module provides access to the hardware capabilities through a character device node named /dev/kvm. @@ -37,10 +40,7 @@ config KVM If unsure, say N. config KVM_ARM_HOST - bool "KVM host support for ARM cpus." - depends on KVM - depends on MMU - select MMU_NOTIFIER + bool ---help--- Provides host support for ARM processors. @@ -55,20 +55,4 @@ config KVM_ARM_MAX_VCPUS large, so only choose a reasonable number that you expect to actually use. -config KVM_ARM_VGIC - bool "KVM support for Virtual GIC" - depends on KVM_ARM_HOST && OF - select HAVE_KVM_IRQCHIP - default y - ---help--- - Adds support for a hardware assisted, in-kernel GIC emulation. - -config KVM_ARM_TIMER - bool "KVM support for Architected Timers" - depends on KVM_ARM_VGIC && ARM_ARCH_TIMER - select HAVE_KVM_IRQCHIP - default y - ---help--- - Adds support for the Architected Timers in virtual machines - endif # VIRTUALIZATION diff --git a/arch/arm/kvm/Makefile b/arch/arm/kvm/Makefile index 443b8bea43e9..139e46c08b6e 100644 --- a/arch/arm/kvm/Makefile +++ b/arch/arm/kvm/Makefile @@ -7,7 +7,7 @@ ifeq ($(plus_virt),+virt) plus_virt_def := -DREQUIRES_VIRT=1 endif -ccflags-y += -Ivirt/kvm -Iarch/arm/kvm +ccflags-y += -Iarch/arm/kvm CFLAGS_arm.o := -I. $(plus_virt_def) CFLAGS_mmu.o := -I. @@ -15,12 +15,12 @@ AFLAGS_init.o := -Wa,-march=armv7-a$(plus_virt) AFLAGS_interrupts.o := -Wa,-march=armv7-a$(plus_virt) KVM := ../../../virt/kvm -kvm-arm-y = $(KVM)/kvm_main.o $(KVM)/coalesced_mmio.o +kvm-arm-y = $(KVM)/kvm_main.o $(KVM)/coalesced_mmio.o $(KVM)/eventfd.o obj-y += kvm-arm.o init.o interrupts.o obj-y += arm.o handle_exit.o guest.o mmu.o emulate.o reset.o obj-y += coproc.o coproc_a15.o coproc_a7.o mmio.o psci.o perf.o -obj-$(CONFIG_KVM_ARM_VGIC) += $(KVM)/arm/vgic.o -obj-$(CONFIG_KVM_ARM_VGIC) += $(KVM)/arm/vgic-v2.o -obj-$(CONFIG_KVM_ARM_VGIC) += $(KVM)/arm/vgic-v2-emul.o -obj-$(CONFIG_KVM_ARM_TIMER) += $(KVM)/arm/arch_timer.o +obj-y += $(KVM)/arm/vgic.o +obj-y += $(KVM)/arm/vgic-v2.o +obj-y += $(KVM)/arm/vgic-v2-emul.o +obj-y += $(KVM)/arm/arch_timer.o diff --git a/arch/arm/kvm/arm.c b/arch/arm/kvm/arm.c index 07e7eb1d7ab6..6f536451ab78 100644 --- a/arch/arm/kvm/arm.c +++ b/arch/arm/kvm/arm.c @@ -61,8 +61,6 @@ static atomic64_t kvm_vmid_gen = ATOMIC64_INIT(1); static u8 kvm_next_vmid; static DEFINE_SPINLOCK(kvm_vmid_lock); -static bool vgic_present; - static void kvm_arm_set_running_vcpu(struct kvm_vcpu *vcpu) { BUG_ON(preemptible()); @@ -173,8 +171,8 @@ int kvm_vm_ioctl_check_extension(struct kvm *kvm, long ext) int r; switch (ext) { case KVM_CAP_IRQCHIP: - r = vgic_present; - break; + case KVM_CAP_IRQFD: + case KVM_CAP_IOEVENTFD: case KVM_CAP_DEVICE_CTRL: case KVM_CAP_USER_MEMORY: case KVM_CAP_SYNC_MMU: @@ -183,6 +181,7 @@ int kvm_vm_ioctl_check_extension(struct kvm *kvm, long ext) case KVM_CAP_ARM_PSCI: case KVM_CAP_ARM_PSCI_0_2: case KVM_CAP_READONLY_MEM: + case KVM_CAP_MP_STATE: r = 1; break; case KVM_CAP_COALESCED_MMIO: @@ -268,7 +267,7 @@ void kvm_arch_vcpu_destroy(struct kvm_vcpu *vcpu) int kvm_cpu_has_pending_timer(struct kvm_vcpu *vcpu) { - return 0; + return kvm_timer_should_fire(vcpu); } int kvm_arch_vcpu_init(struct kvm_vcpu *vcpu) @@ -313,13 +312,29 @@ int kvm_arch_vcpu_ioctl_set_guest_debug(struct kvm_vcpu *vcpu, int kvm_arch_vcpu_ioctl_get_mpstate(struct kvm_vcpu *vcpu, struct kvm_mp_state *mp_state) { - return -EINVAL; + if (vcpu->arch.pause) + mp_state->mp_state = KVM_MP_STATE_STOPPED; + else + mp_state->mp_state = KVM_MP_STATE_RUNNABLE; + + return 0; } int kvm_arch_vcpu_ioctl_set_mpstate(struct kvm_vcpu *vcpu, struct kvm_mp_state *mp_state) { - return -EINVAL; + switch (mp_state->mp_state) { + case KVM_MP_STATE_RUNNABLE: + vcpu->arch.pause = false; + break; + case KVM_MP_STATE_STOPPED: + vcpu->arch.pause = true; + break; + default: + return -EINVAL; + } + + return 0; } /** @@ -452,6 +467,11 @@ static int kvm_vcpu_first_run_init(struct kvm_vcpu *vcpu) return 0; } +bool kvm_arch_intc_initialized(struct kvm *kvm) +{ + return vgic_initialized(kvm); +} + static void vcpu_pause(struct kvm_vcpu *vcpu) { wait_queue_head_t *wq = kvm_arch_vcpu_wq(vcpu); @@ -540,7 +560,7 @@ int kvm_arch_vcpu_ioctl_run(struct kvm_vcpu *vcpu, struct kvm_run *run) vcpu->mode = OUTSIDE_GUEST_MODE; kvm_guest_exit(); - trace_kvm_exit(*vcpu_pc(vcpu)); + trace_kvm_exit(kvm_vcpu_trap_get_class(vcpu), *vcpu_pc(vcpu)); /* * We may have taken a host interrupt in HYP mode (ie * while executing the guest). This interrupt is still @@ -831,8 +851,6 @@ static int kvm_vm_ioctl_set_device_addr(struct kvm *kvm, switch (dev_id) { case KVM_ARM_DEVICE_VGIC_V2: - if (!vgic_present) - return -ENXIO; return kvm_vgic_addr(kvm, type, &dev_addr->addr, true); default: return -ENODEV; @@ -847,10 +865,7 @@ long kvm_arch_vm_ioctl(struct file *filp, switch (ioctl) { case KVM_CREATE_IRQCHIP: { - if (vgic_present) - return kvm_vgic_create(kvm, KVM_DEV_TYPE_ARM_VGIC_V2); - else - return -ENXIO; + return kvm_vgic_create(kvm, KVM_DEV_TYPE_ARM_VGIC_V2); } case KVM_ARM_SET_DEVICE_ADDR: { struct kvm_arm_device_addr dev_addr; @@ -1035,10 +1050,6 @@ static int init_hyp_mode(void) if (err) goto out_free_context; -#ifdef CONFIG_KVM_ARM_VGIC - vgic_present = true; -#endif - /* * Init HYP architected timer support */ diff --git a/arch/arm/kvm/guest.c b/arch/arm/kvm/guest.c index 384bab67c462..d503fbb787d3 100644 --- a/arch/arm/kvm/guest.c +++ b/arch/arm/kvm/guest.c @@ -109,22 +109,6 @@ int kvm_arch_vcpu_ioctl_set_regs(struct kvm_vcpu *vcpu, struct kvm_regs *regs) return -EINVAL; } -#ifndef CONFIG_KVM_ARM_TIMER - -#define NUM_TIMER_REGS 0 - -static int copy_timer_indices(struct kvm_vcpu *vcpu, u64 __user *uindices) -{ - return 0; -} - -static bool is_timer_reg(u64 index) -{ - return false; -} - -#else - #define NUM_TIMER_REGS 3 static bool is_timer_reg(u64 index) @@ -152,8 +136,6 @@ static int copy_timer_indices(struct kvm_vcpu *vcpu, u64 __user *uindices) return 0; } -#endif - static int set_timer_reg(struct kvm_vcpu *vcpu, const struct kvm_one_reg *reg) { void __user *uaddr = (void __user *)(long)reg->addr; diff --git a/arch/arm/kvm/interrupts_head.S b/arch/arm/kvm/interrupts_head.S index 14d488388480..35e4a3a0c476 100644 --- a/arch/arm/kvm/interrupts_head.S +++ b/arch/arm/kvm/interrupts_head.S @@ -402,7 +402,6 @@ vcpu .req r0 @ vcpu pointer always in r0 * Assumes vcpu pointer in vcpu reg */ .macro save_vgic_state -#ifdef CONFIG_KVM_ARM_VGIC /* Get VGIC VCTRL base into r2 */ ldr r2, [vcpu, #VCPU_KVM] ldr r2, [r2, #KVM_VGIC_VCTRL] @@ -460,7 +459,6 @@ ARM_BE8(rev r6, r6 ) subs r4, r4, #1 bne 1b 2: -#endif .endm /* @@ -469,7 +467,6 @@ ARM_BE8(rev r6, r6 ) * Assumes vcpu pointer in vcpu reg */ .macro restore_vgic_state -#ifdef CONFIG_KVM_ARM_VGIC /* Get VGIC VCTRL base into r2 */ ldr r2, [vcpu, #VCPU_KVM] ldr r2, [r2, #KVM_VGIC_VCTRL] @@ -501,7 +498,6 @@ ARM_BE8(rev r6, r6 ) subs r4, r4, #1 bne 1b 2: -#endif .endm #define CNTHCTL_PL1PCTEN (1 << 0) @@ -515,7 +511,6 @@ ARM_BE8(rev r6, r6 ) * Clobbers r2-r5 */ .macro save_timer_state -#ifdef CONFIG_KVM_ARM_TIMER ldr r4, [vcpu, #VCPU_KVM] ldr r2, [r4, #KVM_TIMER_ENABLED] cmp r2, #0 @@ -537,7 +532,6 @@ ARM_BE8(rev r6, r6 ) mcrr p15, 4, r2, r2, c14 @ CNTVOFF 1: -#endif @ Allow physical timer/counter access for the host mrc p15, 4, r2, c14, c1, 0 @ CNTHCTL orr r2, r2, #(CNTHCTL_PL1PCEN | CNTHCTL_PL1PCTEN) @@ -559,7 +553,6 @@ ARM_BE8(rev r6, r6 ) bic r2, r2, #CNTHCTL_PL1PCEN mcr p15, 4, r2, c14, c1, 0 @ CNTHCTL -#ifdef CONFIG_KVM_ARM_TIMER ldr r4, [vcpu, #VCPU_KVM] ldr r2, [r4, #KVM_TIMER_ENABLED] cmp r2, #0 @@ -579,7 +572,6 @@ ARM_BE8(rev r6, r6 ) and r2, r2, #3 mcr p15, 0, r2, c14, c3, 1 @ CNTV_CTL 1: -#endif .endm .equ vmentry, 0 diff --git a/arch/arm/kvm/mmio.c b/arch/arm/kvm/mmio.c index 5d3bfc0eb3f0..974b1c606d04 100644 --- a/arch/arm/kvm/mmio.c +++ b/arch/arm/kvm/mmio.c @@ -121,12 +121,11 @@ int kvm_handle_mmio_return(struct kvm_vcpu *vcpu, struct kvm_run *run) return 0; } -static int decode_hsr(struct kvm_vcpu *vcpu, phys_addr_t fault_ipa, - struct kvm_exit_mmio *mmio) +static int decode_hsr(struct kvm_vcpu *vcpu, bool *is_write, int *len) { unsigned long rt; - int len; - bool is_write, sign_extend; + int access_size; + bool sign_extend; if (kvm_vcpu_dabt_isextabt(vcpu)) { /* cache operation on I/O addr, tell guest unsupported */ @@ -140,17 +139,15 @@ static int decode_hsr(struct kvm_vcpu *vcpu, phys_addr_t fault_ipa, return 1; } - len = kvm_vcpu_dabt_get_as(vcpu); - if (unlikely(len < 0)) - return len; + access_size = kvm_vcpu_dabt_get_as(vcpu); + if (unlikely(access_size < 0)) + return access_size; - is_write = kvm_vcpu_dabt_iswrite(vcpu); + *is_write = kvm_vcpu_dabt_iswrite(vcpu); sign_extend = kvm_vcpu_dabt_issext(vcpu); rt = kvm_vcpu_dabt_get_rd(vcpu); - mmio->is_write = is_write; - mmio->phys_addr = fault_ipa; - mmio->len = len; + *len = access_size; vcpu->arch.mmio_decode.sign_extend = sign_extend; vcpu->arch.mmio_decode.rt = rt; @@ -165,20 +162,20 @@ static int decode_hsr(struct kvm_vcpu *vcpu, phys_addr_t fault_ipa, int io_mem_abort(struct kvm_vcpu *vcpu, struct kvm_run *run, phys_addr_t fault_ipa) { - struct kvm_exit_mmio mmio; unsigned long data; unsigned long rt; int ret; + bool is_write; + int len; + u8 data_buf[8]; /* - * Prepare MMIO operation. First stash it in a private - * structure that we can use for in-kernel emulation. If the - * kernel can't handle it, copy it into run->mmio and let user - * space do its magic. + * Prepare MMIO operation. First decode the syndrome data we get + * from the CPU. Then try if some in-kernel emulation feels + * responsible, otherwise let user space do its magic. */ - if (kvm_vcpu_dabt_isvalid(vcpu)) { - ret = decode_hsr(vcpu, fault_ipa, &mmio); + ret = decode_hsr(vcpu, &is_write, &len); if (ret) return ret; } else { @@ -188,21 +185,34 @@ int io_mem_abort(struct kvm_vcpu *vcpu, struct kvm_run *run, rt = vcpu->arch.mmio_decode.rt; - if (mmio.is_write) { - data = vcpu_data_guest_to_host(vcpu, *vcpu_reg(vcpu, rt), - mmio.len); + if (is_write) { + data = vcpu_data_guest_to_host(vcpu, *vcpu_reg(vcpu, rt), len); + + trace_kvm_mmio(KVM_TRACE_MMIO_WRITE, len, fault_ipa, data); + mmio_write_buf(data_buf, len, data); - trace_kvm_mmio(KVM_TRACE_MMIO_WRITE, mmio.len, - fault_ipa, data); - mmio_write_buf(mmio.data, mmio.len, data); + ret = kvm_io_bus_write(vcpu, KVM_MMIO_BUS, fault_ipa, len, + data_buf); } else { - trace_kvm_mmio(KVM_TRACE_MMIO_READ_UNSATISFIED, mmio.len, + trace_kvm_mmio(KVM_TRACE_MMIO_READ_UNSATISFIED, len, fault_ipa, 0); + + ret = kvm_io_bus_read(vcpu, KVM_MMIO_BUS, fault_ipa, len, + data_buf); } - if (vgic_handle_mmio(vcpu, run, &mmio)) + /* Now prepare kvm_run for the potential return to userland. */ + run->mmio.is_write = is_write; + run->mmio.phys_addr = fault_ipa; + run->mmio.len = len; + memcpy(run->mmio.data, data_buf, len); + + if (!ret) { + /* We handled the access successfully in the kernel. */ + kvm_handle_mmio_return(vcpu, run); return 1; + } - kvm_prepare_mmio(run, &mmio); + run->exit_reason = KVM_EXIT_MMIO; return 0; } diff --git a/arch/arm/kvm/mmu.c b/arch/arm/kvm/mmu.c index 3e6859bc3e11..15b050d46fc9 100644 --- a/arch/arm/kvm/mmu.c +++ b/arch/arm/kvm/mmu.c @@ -290,7 +290,7 @@ static void unmap_range(struct kvm *kvm, pgd_t *pgdp, phys_addr_t addr = start, end = start + size; phys_addr_t next; - pgd = pgdp + pgd_index(addr); + pgd = pgdp + kvm_pgd_index(addr); do { next = kvm_pgd_addr_end(addr, end); if (!pgd_none(*pgd)) @@ -355,7 +355,7 @@ static void stage2_flush_memslot(struct kvm *kvm, phys_addr_t next; pgd_t *pgd; - pgd = kvm->arch.pgd + pgd_index(addr); + pgd = kvm->arch.pgd + kvm_pgd_index(addr); do { next = kvm_pgd_addr_end(addr, end); stage2_flush_puds(kvm, pgd, addr, next); @@ -632,6 +632,20 @@ int create_hyp_io_mappings(void *from, void *to, phys_addr_t phys_addr) __phys_to_pfn(phys_addr), PAGE_HYP_DEVICE); } +/* Free the HW pgd, one page at a time */ +static void kvm_free_hwpgd(void *hwpgd) +{ + free_pages_exact(hwpgd, kvm_get_hwpgd_size()); +} + +/* Allocate the HW PGD, making sure that each page gets its own refcount */ +static void *kvm_alloc_hwpgd(void) +{ + unsigned int size = kvm_get_hwpgd_size(); + + return alloc_pages_exact(size, GFP_KERNEL | __GFP_ZERO); +} + /** * kvm_alloc_stage2_pgd - allocate level-1 table for stage-2 translation. * @kvm: The KVM struct pointer for the VM. @@ -645,15 +659,31 @@ int create_hyp_io_mappings(void *from, void *to, phys_addr_t phys_addr) */ int kvm_alloc_stage2_pgd(struct kvm *kvm) { - int ret; pgd_t *pgd; + void *hwpgd; if (kvm->arch.pgd != NULL) { kvm_err("kvm_arch already initialized?\n"); return -EINVAL; } + hwpgd = kvm_alloc_hwpgd(); + if (!hwpgd) + return -ENOMEM; + + /* When the kernel uses more levels of page tables than the + * guest, we allocate a fake PGD and pre-populate it to point + * to the next-level page table, which will be the real + * initial page table pointed to by the VTTBR. + * + * When KVM_PREALLOC_LEVEL==2, we allocate a single page for + * the PMD and the kernel will use folded pud. + * When KVM_PREALLOC_LEVEL==1, we allocate 2 consecutive PUD + * pages. + */ if (KVM_PREALLOC_LEVEL > 0) { + int i; + /* * Allocate fake pgd for the page table manipulation macros to * work. This is not used by the hardware and we have no @@ -661,30 +691,32 @@ int kvm_alloc_stage2_pgd(struct kvm *kvm) */ pgd = (pgd_t *)kmalloc(PTRS_PER_S2_PGD * sizeof(pgd_t), GFP_KERNEL | __GFP_ZERO); + + if (!pgd) { + kvm_free_hwpgd(hwpgd); + return -ENOMEM; + } + + /* Plug the HW PGD into the fake one. */ + for (i = 0; i < PTRS_PER_S2_PGD; i++) { + if (KVM_PREALLOC_LEVEL == 1) + pgd_populate(NULL, pgd + i, + (pud_t *)hwpgd + i * PTRS_PER_PUD); + else if (KVM_PREALLOC_LEVEL == 2) + pud_populate(NULL, pud_offset(pgd, 0) + i, + (pmd_t *)hwpgd + i * PTRS_PER_PMD); + } } else { /* * Allocate actual first-level Stage-2 page table used by the * hardware for Stage-2 page table walks. */ - pgd = (pgd_t *)__get_free_pages(GFP_KERNEL | __GFP_ZERO, S2_PGD_ORDER); + pgd = (pgd_t *)hwpgd; } - if (!pgd) - return -ENOMEM; - - ret = kvm_prealloc_hwpgd(kvm, pgd); - if (ret) - goto out_err; - kvm_clean_pgd(pgd); kvm->arch.pgd = pgd; return 0; -out_err: - if (KVM_PREALLOC_LEVEL > 0) - kfree(pgd); - else - free_pages((unsigned long)pgd, S2_PGD_ORDER); - return ret; } /** @@ -785,11 +817,10 @@ void kvm_free_stage2_pgd(struct kvm *kvm) return; unmap_stage2_range(kvm, 0, KVM_PHYS_SIZE); - kvm_free_hwpgd(kvm); + kvm_free_hwpgd(kvm_get_hwpgd(kvm)); if (KVM_PREALLOC_LEVEL > 0) kfree(kvm->arch.pgd); - else - free_pages((unsigned long)kvm->arch.pgd, S2_PGD_ORDER); + kvm->arch.pgd = NULL; } @@ -799,7 +830,7 @@ static pud_t *stage2_get_pud(struct kvm *kvm, struct kvm_mmu_memory_cache *cache pgd_t *pgd; pud_t *pud; - pgd = kvm->arch.pgd + pgd_index(addr); + pgd = kvm->arch.pgd + kvm_pgd_index(addr); if (WARN_ON(pgd_none(*pgd))) { if (!cache) return NULL; @@ -1089,7 +1120,7 @@ static void stage2_wp_range(struct kvm *kvm, phys_addr_t addr, phys_addr_t end) pgd_t *pgd; phys_addr_t next; - pgd = kvm->arch.pgd + pgd_index(addr); + pgd = kvm->arch.pgd + kvm_pgd_index(addr); do { /* * Release kvm_mmu_lock periodically if the memory region is @@ -1299,10 +1330,51 @@ static int user_mem_abort(struct kvm_vcpu *vcpu, phys_addr_t fault_ipa, out_unlock: spin_unlock(&kvm->mmu_lock); + kvm_set_pfn_accessed(pfn); kvm_release_pfn_clean(pfn); return ret; } +/* + * Resolve the access fault by making the page young again. + * Note that because the faulting entry is guaranteed not to be + * cached in the TLB, we don't need to invalidate anything. + */ +static void handle_access_fault(struct kvm_vcpu *vcpu, phys_addr_t fault_ipa) +{ + pmd_t *pmd; + pte_t *pte; + pfn_t pfn; + bool pfn_valid = false; + + trace_kvm_access_fault(fault_ipa); + + spin_lock(&vcpu->kvm->mmu_lock); + + pmd = stage2_get_pmd(vcpu->kvm, NULL, fault_ipa); + if (!pmd || pmd_none(*pmd)) /* Nothing there */ + goto out; + + if (kvm_pmd_huge(*pmd)) { /* THP, HugeTLB */ + *pmd = pmd_mkyoung(*pmd); + pfn = pmd_pfn(*pmd); + pfn_valid = true; + goto out; + } + + pte = pte_offset_kernel(pmd, fault_ipa); + if (pte_none(*pte)) /* Nothing there either */ + goto out; + + *pte = pte_mkyoung(*pte); /* Just a page... */ + pfn = pte_pfn(*pte); + pfn_valid = true; +out: + spin_unlock(&vcpu->kvm->mmu_lock); + if (pfn_valid) + kvm_set_pfn_accessed(pfn); +} + /** * kvm_handle_guest_abort - handles all 2nd stage aborts * @vcpu: the VCPU pointer @@ -1333,7 +1405,8 @@ int kvm_handle_guest_abort(struct kvm_vcpu *vcpu, struct kvm_run *run) /* Check the stage-2 fault is trans. fault or write fault */ fault_status = kvm_vcpu_trap_get_fault_type(vcpu); - if (fault_status != FSC_FAULT && fault_status != FSC_PERM) { + if (fault_status != FSC_FAULT && fault_status != FSC_PERM && + fault_status != FSC_ACCESS) { kvm_err("Unsupported FSC: EC=%#x xFSC=%#lx ESR_EL2=%#lx\n", kvm_vcpu_trap_get_class(vcpu), (unsigned long)kvm_vcpu_trap_get_fault(vcpu), @@ -1369,6 +1442,12 @@ int kvm_handle_guest_abort(struct kvm_vcpu *vcpu, struct kvm_run *run) /* Userspace should not be able to register out-of-bounds IPAs */ VM_BUG_ON(fault_ipa >= KVM_PHYS_SIZE); + if (fault_status == FSC_ACCESS) { + handle_access_fault(vcpu, fault_ipa); + ret = 1; + goto out_unlock; + } + ret = user_mem_abort(vcpu, fault_ipa, memslot, hva, fault_status); if (ret == 0) ret = 1; @@ -1377,15 +1456,16 @@ out_unlock: return ret; } -static void handle_hva_to_gpa(struct kvm *kvm, - unsigned long start, - unsigned long end, - void (*handler)(struct kvm *kvm, - gpa_t gpa, void *data), - void *data) +static int handle_hva_to_gpa(struct kvm *kvm, + unsigned long start, + unsigned long end, + int (*handler)(struct kvm *kvm, + gpa_t gpa, void *data), + void *data) { struct kvm_memslots *slots; struct kvm_memory_slot *memslot; + int ret = 0; slots = kvm_memslots(kvm); @@ -1409,14 +1489,17 @@ static void handle_hva_to_gpa(struct kvm *kvm, for (; gfn < gfn_end; ++gfn) { gpa_t gpa = gfn << PAGE_SHIFT; - handler(kvm, gpa, data); + ret |= handler(kvm, gpa, data); } } + + return ret; } -static void kvm_unmap_hva_handler(struct kvm *kvm, gpa_t gpa, void *data) +static int kvm_unmap_hva_handler(struct kvm *kvm, gpa_t gpa, void *data) { unmap_stage2_range(kvm, gpa, PAGE_SIZE); + return 0; } int kvm_unmap_hva(struct kvm *kvm, unsigned long hva) @@ -1442,7 +1525,7 @@ int kvm_unmap_hva_range(struct kvm *kvm, return 0; } -static void kvm_set_spte_handler(struct kvm *kvm, gpa_t gpa, void *data) +static int kvm_set_spte_handler(struct kvm *kvm, gpa_t gpa, void *data) { pte_t *pte = (pte_t *)data; @@ -1454,6 +1537,7 @@ static void kvm_set_spte_handler(struct kvm *kvm, gpa_t gpa, void *data) * through this calling path. */ stage2_set_pte(kvm, NULL, gpa, pte, 0); + return 0; } @@ -1470,6 +1554,67 @@ void kvm_set_spte_hva(struct kvm *kvm, unsigned long hva, pte_t pte) handle_hva_to_gpa(kvm, hva, end, &kvm_set_spte_handler, &stage2_pte); } +static int kvm_age_hva_handler(struct kvm *kvm, gpa_t gpa, void *data) +{ + pmd_t *pmd; + pte_t *pte; + + pmd = stage2_get_pmd(kvm, NULL, gpa); + if (!pmd || pmd_none(*pmd)) /* Nothing there */ + return 0; + + if (kvm_pmd_huge(*pmd)) { /* THP, HugeTLB */ + if (pmd_young(*pmd)) { + *pmd = pmd_mkold(*pmd); + return 1; + } + + return 0; + } + + pte = pte_offset_kernel(pmd, gpa); + if (pte_none(*pte)) + return 0; + + if (pte_young(*pte)) { + *pte = pte_mkold(*pte); /* Just a page... */ + return 1; + } + + return 0; +} + +static int kvm_test_age_hva_handler(struct kvm *kvm, gpa_t gpa, void *data) +{ + pmd_t *pmd; + pte_t *pte; + + pmd = stage2_get_pmd(kvm, NULL, gpa); + if (!pmd || pmd_none(*pmd)) /* Nothing there */ + return 0; + + if (kvm_pmd_huge(*pmd)) /* THP, HugeTLB */ + return pmd_young(*pmd); + + pte = pte_offset_kernel(pmd, gpa); + if (!pte_none(*pte)) /* Just a page... */ + return pte_young(*pte); + + return 0; +} + +int kvm_age_hva(struct kvm *kvm, unsigned long start, unsigned long end) +{ + trace_kvm_age_hva(start, end); + return handle_hva_to_gpa(kvm, start, end, kvm_age_hva_handler, NULL); +} + +int kvm_test_age_hva(struct kvm *kvm, unsigned long hva) +{ + trace_kvm_test_age_hva(hva); + return handle_hva_to_gpa(kvm, hva, hva, kvm_test_age_hva_handler, NULL); +} + void kvm_mmu_free_memory_caches(struct kvm_vcpu *vcpu) { mmu_free_memory_cache(&vcpu->arch.mmu_page_cache); diff --git a/arch/arm/kvm/trace.h b/arch/arm/kvm/trace.h index 881874b1a036..0ec35392d208 100644 --- a/arch/arm/kvm/trace.h +++ b/arch/arm/kvm/trace.h @@ -25,18 +25,22 @@ TRACE_EVENT(kvm_entry, ); TRACE_EVENT(kvm_exit, - TP_PROTO(unsigned long vcpu_pc), - TP_ARGS(vcpu_pc), + TP_PROTO(unsigned int exit_reason, unsigned long vcpu_pc), + TP_ARGS(exit_reason, vcpu_pc), TP_STRUCT__entry( + __field( unsigned int, exit_reason ) __field( unsigned long, vcpu_pc ) ), TP_fast_assign( + __entry->exit_reason = exit_reason; __entry->vcpu_pc = vcpu_pc; ), - TP_printk("PC: 0x%08lx", __entry->vcpu_pc) + TP_printk("HSR_EC: 0x%04x, PC: 0x%08lx", + __entry->exit_reason, + __entry->vcpu_pc) ); TRACE_EVENT(kvm_guest_fault, @@ -64,6 +68,21 @@ TRACE_EVENT(kvm_guest_fault, __entry->hxfar, __entry->vcpu_pc) ); +TRACE_EVENT(kvm_access_fault, + TP_PROTO(unsigned long ipa), + TP_ARGS(ipa), + + TP_STRUCT__entry( + __field( unsigned long, ipa ) + ), + + TP_fast_assign( + __entry->ipa = ipa; + ), + + TP_printk("IPA: %lx", __entry->ipa) +); + TRACE_EVENT(kvm_irq_line, TP_PROTO(unsigned int type, int vcpu_idx, int irq_num, int level), TP_ARGS(type, vcpu_idx, irq_num, level), @@ -206,6 +225,39 @@ TRACE_EVENT(kvm_set_spte_hva, TP_printk("mmu notifier set pte hva: %#08lx", __entry->hva) ); +TRACE_EVENT(kvm_age_hva, + TP_PROTO(unsigned long start, unsigned long end), + TP_ARGS(start, end), + + TP_STRUCT__entry( + __field( unsigned long, start ) + __field( unsigned long, end ) + ), + + TP_fast_assign( + __entry->start = start; + __entry->end = end; + ), + + TP_printk("mmu notifier age hva: %#08lx -- %#08lx", + __entry->start, __entry->end) +); + +TRACE_EVENT(kvm_test_age_hva, + TP_PROTO(unsigned long hva), + TP_ARGS(hva), + + TP_STRUCT__entry( + __field( unsigned long, hva ) + ), + + TP_fast_assign( + __entry->hva = hva; + ), + + TP_printk("mmu notifier test age hva: %#08lx", __entry->hva) +); + TRACE_EVENT(kvm_hvc, TP_PROTO(unsigned long vcpu_pc, unsigned long r0, unsigned long imm), TP_ARGS(vcpu_pc, r0, imm), diff --git a/arch/arm/mach-at91/pm.c b/arch/arm/mach-at91/pm.c index 5e34fb143309..aa4116e9452f 100644 --- a/arch/arm/mach-at91/pm.c +++ b/arch/arm/mach-at91/pm.c @@ -270,37 +270,35 @@ static void __init at91_pm_sram_init(void) phys_addr_t sram_pbase; unsigned long sram_base; struct device_node *node; - struct platform_device *pdev; + struct platform_device *pdev = NULL; - node = of_find_compatible_node(NULL, NULL, "mmio-sram"); - if (!node) { - pr_warn("%s: failed to find sram node!\n", __func__); - return; + for_each_compatible_node(node, NULL, "mmio-sram") { + pdev = of_find_device_by_node(node); + if (pdev) { + of_node_put(node); + break; + } } - pdev = of_find_device_by_node(node); if (!pdev) { pr_warn("%s: failed to find sram device!\n", __func__); - goto put_node; + return; } sram_pool = dev_get_gen_pool(&pdev->dev); if (!sram_pool) { pr_warn("%s: sram pool unavailable!\n", __func__); - goto put_node; + return; } sram_base = gen_pool_alloc(sram_pool, at91_slow_clock_sz); if (!sram_base) { pr_warn("%s: unable to alloc ocram!\n", __func__); - goto put_node; + return; } sram_pbase = gen_pool_virt_to_phys(sram_pool, sram_base); slow_clock = __arm_ioremap_exec(sram_pbase, at91_slow_clock_sz, false); - -put_node: - of_node_put(node); } #endif diff --git a/arch/arm/mach-at91/pm.h b/arch/arm/mach-at91/pm.h index d2c89963af2d..86c0aa819d25 100644 --- a/arch/arm/mach-at91/pm.h +++ b/arch/arm/mach-at91/pm.h @@ -44,7 +44,7 @@ static inline void at91rm9200_standby(void) " mcr p15, 0, %0, c7, c0, 4\n\t" " str %5, [%1, %2]" : - : "r" (0), "r" (AT91_BASE_SYS), "r" (AT91RM9200_SDRAMC_LPR), + : "r" (0), "r" (at91_ramc_base[0]), "r" (AT91RM9200_SDRAMC_LPR), "r" (1), "r" (AT91RM9200_SDRAMC_SRR), "r" (lpr)); } diff --git a/arch/arm/mach-at91/pm_slowclock.S b/arch/arm/mach-at91/pm_slowclock.S index 556151e85ec4..931f0e302c03 100644 --- a/arch/arm/mach-at91/pm_slowclock.S +++ b/arch/arm/mach-at91/pm_slowclock.S @@ -25,11 +25,6 @@ */ #undef SLOWDOWN_MASTER_CLOCK -#define MCKRDY_TIMEOUT 1000 -#define MOSCRDY_TIMEOUT 1000 -#define PLLALOCK_TIMEOUT 1000 -#define PLLBLOCK_TIMEOUT 1000 - pmc .req r0 sdramc .req r1 ramc1 .req r2 @@ -41,60 +36,42 @@ tmp2 .req r5 * Wait until master clock is ready (after switching master clock source) */ .macro wait_mckrdy - mov tmp2, #MCKRDY_TIMEOUT -1: sub tmp2, tmp2, #1 - cmp tmp2, #0 - beq 2f - ldr tmp1, [pmc, #AT91_PMC_SR] +1: ldr tmp1, [pmc, #AT91_PMC_SR] tst tmp1, #AT91_PMC_MCKRDY beq 1b -2: .endm /* * Wait until master oscillator has stabilized. */ .macro wait_moscrdy - mov tmp2, #MOSCRDY_TIMEOUT -1: sub tmp2, tmp2, #1 - cmp tmp2, #0 - beq 2f - ldr tmp1, [pmc, #AT91_PMC_SR] +1: ldr tmp1, [pmc, #AT91_PMC_SR] tst tmp1, #AT91_PMC_MOSCS beq 1b -2: .endm /* * Wait until PLLA has locked. */ .macro wait_pllalock - mov tmp2, #PLLALOCK_TIMEOUT -1: sub tmp2, tmp2, #1 - cmp tmp2, #0 - beq 2f - ldr tmp1, [pmc, #AT91_PMC_SR] +1: ldr tmp1, [pmc, #AT91_PMC_SR] tst tmp1, #AT91_PMC_LOCKA beq 1b -2: .endm /* * Wait until PLLB has locked. */ .macro wait_pllblock - mov tmp2, #PLLBLOCK_TIMEOUT -1: sub tmp2, tmp2, #1 - cmp tmp2, #0 - beq 2f - ldr tmp1, [pmc, #AT91_PMC_SR] +1: ldr tmp1, [pmc, #AT91_PMC_SR] tst tmp1, #AT91_PMC_LOCKB beq 1b -2: .endm .text + .arm + /* void at91_slow_clock(void __iomem *pmc, void __iomem *sdramc, * void __iomem *ramc1, int memctrl) */ @@ -134,6 +111,16 @@ ddr_sr_enable: cmp memctrl, #AT91_MEMCTRL_DDRSDR bne sdr_sr_enable + /* LPDDR1 --> force DDR2 mode during self-refresh */ + ldr tmp1, [sdramc, #AT91_DDRSDRC_MDR] + str tmp1, .saved_sam9_mdr + bic tmp1, tmp1, #~AT91_DDRSDRC_MD + cmp tmp1, #AT91_DDRSDRC_MD_LOW_POWER_DDR + ldreq tmp1, [sdramc, #AT91_DDRSDRC_MDR] + biceq tmp1, tmp1, #AT91_DDRSDRC_MD + orreq tmp1, tmp1, #AT91_DDRSDRC_MD_DDR2 + streq tmp1, [sdramc, #AT91_DDRSDRC_MDR] + /* prepare for DDRAM self-refresh mode */ ldr tmp1, [sdramc, #AT91_DDRSDRC_LPR] str tmp1, .saved_sam9_lpr @@ -142,14 +129,26 @@ ddr_sr_enable: /* figure out if we use the second ram controller */ cmp ramc1, #0 - ldrne tmp2, [ramc1, #AT91_DDRSDRC_LPR] - strne tmp2, .saved_sam9_lpr1 - bicne tmp2, #AT91_DDRSDRC_LPCB - orrne tmp2, #AT91_DDRSDRC_LPCB_SELF_REFRESH + beq ddr_no_2nd_ctrl + + ldr tmp2, [ramc1, #AT91_DDRSDRC_MDR] + str tmp2, .saved_sam9_mdr1 + bic tmp2, tmp2, #~AT91_DDRSDRC_MD + cmp tmp2, #AT91_DDRSDRC_MD_LOW_POWER_DDR + ldreq tmp2, [ramc1, #AT91_DDRSDRC_MDR] + biceq tmp2, tmp2, #AT91_DDRSDRC_MD + orreq tmp2, tmp2, #AT91_DDRSDRC_MD_DDR2 + streq tmp2, [ramc1, #AT91_DDRSDRC_MDR] + + ldr tmp2, [ramc1, #AT91_DDRSDRC_LPR] + str tmp2, .saved_sam9_lpr1 + bic tmp2, #AT91_DDRSDRC_LPCB + orr tmp2, #AT91_DDRSDRC_LPCB_SELF_REFRESH /* Enable DDRAM self-refresh mode */ + str tmp2, [ramc1, #AT91_DDRSDRC_LPR] +ddr_no_2nd_ctrl: str tmp1, [sdramc, #AT91_DDRSDRC_LPR] - strne tmp2, [ramc1, #AT91_DDRSDRC_LPR] b sdr_sr_done @@ -208,6 +207,7 @@ sdr_sr_done: /* Turn off the main oscillator */ ldr tmp1, [pmc, #AT91_CKGR_MOR] bic tmp1, tmp1, #AT91_PMC_MOSCEN + orr tmp1, tmp1, #AT91_PMC_KEY str tmp1, [pmc, #AT91_CKGR_MOR] /* Wait for interrupt */ @@ -216,6 +216,7 @@ sdr_sr_done: /* Turn on the main oscillator */ ldr tmp1, [pmc, #AT91_CKGR_MOR] orr tmp1, tmp1, #AT91_PMC_MOSCEN + orr tmp1, tmp1, #AT91_PMC_KEY str tmp1, [pmc, #AT91_CKGR_MOR] wait_moscrdy @@ -280,12 +281,17 @@ sdr_sr_done: */ cmp memctrl, #AT91_MEMCTRL_DDRSDR bne sdr_en_restore + /* Restore MDR in case of LPDDR1 */ + ldr tmp1, .saved_sam9_mdr + str tmp1, [sdramc, #AT91_DDRSDRC_MDR] /* Restore LPR on AT91 with DDRAM */ ldr tmp1, .saved_sam9_lpr str tmp1, [sdramc, #AT91_DDRSDRC_LPR] /* if we use the second ram controller */ cmp ramc1, #0 + ldrne tmp2, .saved_sam9_mdr1 + strne tmp2, [ramc1, #AT91_DDRSDRC_MDR] ldrne tmp2, .saved_sam9_lpr1 strne tmp2, [ramc1, #AT91_DDRSDRC_LPR] @@ -319,5 +325,11 @@ ram_restored: .saved_sam9_lpr1: .word 0 +.saved_sam9_mdr: + .word 0 + +.saved_sam9_mdr1: + .word 0 + ENTRY(at91_slow_clock_sz) .word .-at91_slow_clock diff --git a/arch/arm/mach-exynos/platsmp.c b/arch/arm/mach-exynos/platsmp.c index 3f32c47a6d74..d2e9f12d12f1 100644 --- a/arch/arm/mach-exynos/platsmp.c +++ b/arch/arm/mach-exynos/platsmp.c @@ -126,8 +126,7 @@ static inline void platform_do_lowpower(unsigned int cpu, int *spurious) */ void exynos_cpu_power_down(int cpu) { - if (cpu == 0 && (of_machine_is_compatible("samsung,exynos5420") || - of_machine_is_compatible("samsung,exynos5800"))) { + if (cpu == 0 && (soc_is_exynos5420() || soc_is_exynos5800())) { /* * Bypass power down for CPU0 during suspend. Check for * the SYS_PWR_REG value to decide if we are suspending diff --git a/arch/arm/mach-exynos/pm_domains.c b/arch/arm/mach-exynos/pm_domains.c index 20f267121b3e..37266a826437 100644 --- a/arch/arm/mach-exynos/pm_domains.c +++ b/arch/arm/mach-exynos/pm_domains.c @@ -161,6 +161,34 @@ no_clk: of_genpd_add_provider_simple(np, &pd->pd); } + /* Assign the child power domains to their parents */ + for_each_compatible_node(np, NULL, "samsung,exynos4210-pd") { + struct generic_pm_domain *child_domain, *parent_domain; + struct of_phandle_args args; + + args.np = np; + args.args_count = 0; + child_domain = of_genpd_get_from_provider(&args); + if (!child_domain) + continue; + + if (of_parse_phandle_with_args(np, "power-domains", + "#power-domain-cells", 0, &args) != 0) + continue; + + parent_domain = of_genpd_get_from_provider(&args); + if (!parent_domain) + continue; + + if (pm_genpd_add_subdomain(parent_domain, child_domain)) + pr_warn("%s failed to add subdomain: %s\n", + parent_domain->name, child_domain->name); + else + pr_info("%s has as child subdomain: %s.\n", + parent_domain->name, child_domain->name); + of_node_put(np); + } + return 0; } arch_initcall(exynos4_pm_init_power_domain); diff --git a/arch/arm/mach-exynos/suspend.c b/arch/arm/mach-exynos/suspend.c index 52e2b1a2fddb..318d127df147 100644 --- a/arch/arm/mach-exynos/suspend.c +++ b/arch/arm/mach-exynos/suspend.c @@ -87,8 +87,8 @@ static unsigned int exynos_pmu_spare3; static u32 exynos_irqwake_intmask = 0xffffffff; static const struct exynos_wkup_irq exynos3250_wkup_irq[] = { - { 73, BIT(1) }, /* RTC alarm */ - { 74, BIT(2) }, /* RTC tick */ + { 105, BIT(1) }, /* RTC alarm */ + { 106, BIT(2) }, /* RTC tick */ { /* sentinel */ }, }; diff --git a/arch/arm/mach-imx/mach-imx6q.c b/arch/arm/mach-imx/mach-imx6q.c index 4ad6e473cf83..9de3412af406 100644 --- a/arch/arm/mach-imx/mach-imx6q.c +++ b/arch/arm/mach-imx/mach-imx6q.c @@ -211,8 +211,9 @@ static void __init imx6q_1588_init(void) * set bit IOMUXC_GPR1[21]. Or the PTP clock must be from pad * (external OSC), and we need to clear the bit. */ - clksel = ptp_clk == enet_ref ? IMX6Q_GPR1_ENET_CLK_SEL_ANATOP : - IMX6Q_GPR1_ENET_CLK_SEL_PAD; + clksel = clk_is_match(ptp_clk, enet_ref) ? + IMX6Q_GPR1_ENET_CLK_SEL_ANATOP : + IMX6Q_GPR1_ENET_CLK_SEL_PAD; gpr = syscon_regmap_lookup_by_compatible("fsl,imx6q-iomuxc-gpr"); if (!IS_ERR(gpr)) regmap_update_bits(gpr, IOMUXC_GPR1, diff --git a/arch/arm/mach-msm/board-halibut.c b/arch/arm/mach-msm/board-halibut.c index 61bfe584a9d7..fc832040c6e9 100644 --- a/arch/arm/mach-msm/board-halibut.c +++ b/arch/arm/mach-msm/board-halibut.c @@ -20,6 +20,7 @@ #include <linux/input.h> #include <linux/io.h> #include <linux/delay.h> +#include <linux/smc91x.h> #include <mach/hardware.h> #include <asm/mach-types.h> @@ -46,15 +47,20 @@ static struct resource smc91x_resources[] = { [1] = { .start = MSM_GPIO_TO_INT(49), .end = MSM_GPIO_TO_INT(49), - .flags = IORESOURCE_IRQ, + .flags = IORESOURCE_IRQ | IORESOURCE_IRQ_HIGHLEVEL, }, }; +static struct smc91x_platdata smc91x_platdata = { + .flags = SMC91X_USE_16BIT | SMC91X_NOWAIT, +}; + static struct platform_device smc91x_device = { .name = "smc91x", .id = 0, .num_resources = ARRAY_SIZE(smc91x_resources), .resource = smc91x_resources, + .dev.platform_data = &smc91x_platdata, }; static struct platform_device *devices[] __initdata = { diff --git a/arch/arm/mach-msm/board-qsd8x50.c b/arch/arm/mach-msm/board-qsd8x50.c index 4c748616ef47..10016a3bc698 100644 --- a/arch/arm/mach-msm/board-qsd8x50.c +++ b/arch/arm/mach-msm/board-qsd8x50.c @@ -22,6 +22,7 @@ #include <linux/usb/msm_hsusb.h> #include <linux/err.h> #include <linux/clkdev.h> +#include <linux/smc91x.h> #include <asm/mach-types.h> #include <asm/mach/arch.h> @@ -49,15 +50,20 @@ static struct resource smc91x_resources[] = { .flags = IORESOURCE_MEM, }, [1] = { - .flags = IORESOURCE_IRQ, + .flags = IORESOURCE_IRQ | IORESOURCE_IRQ_HIGHLEVEL, }, }; +static struct smc91x_platdata smc91x_platdata = { + .flags = SMC91X_USE_16BIT | SMC91X_NOWAIT, +}; + static struct platform_device smc91x_device = { .name = "smc91x", .id = 0, .num_resources = ARRAY_SIZE(smc91x_resources), .resource = smc91x_resources, + .dev.platform_data = &smc91x_platdata, }; static int __init msm_init_smc91x(void) diff --git a/arch/arm/mach-omap2/cpuidle44xx.c b/arch/arm/mach-omap2/cpuidle44xx.c index 01e398a868bc..57d429830e09 100644 --- a/arch/arm/mach-omap2/cpuidle44xx.c +++ b/arch/arm/mach-omap2/cpuidle44xx.c @@ -14,7 +14,7 @@ #include <linux/cpuidle.h> #include <linux/cpu_pm.h> #include <linux/export.h> -#include <linux/clockchips.h> +#include <linux/tick.h> #include <asm/cpuidle.h> #include <asm/proc-fns.h> @@ -84,7 +84,6 @@ static int omap_enter_idle_coupled(struct cpuidle_device *dev, { struct idle_statedata *cx = state_ptr + index; u32 mpuss_can_lose_context = 0; - int cpu_id = smp_processor_id(); /* * CPU0 has to wait and stay ON until CPU1 is OFF state. @@ -112,7 +111,7 @@ static int omap_enter_idle_coupled(struct cpuidle_device *dev, mpuss_can_lose_context = (cx->mpu_state == PWRDM_POWER_RET) && (cx->mpu_logic_state == PWRDM_POWER_OFF); - clockevents_notify(CLOCK_EVT_NOTIFY_BROADCAST_ENTER, &cpu_id); + tick_broadcast_enter(); /* * Call idle CPU PM enter notifier chain so that @@ -169,7 +168,7 @@ static int omap_enter_idle_coupled(struct cpuidle_device *dev, if (dev->cpu == 0 && mpuss_can_lose_context) cpu_cluster_pm_exit(); - clockevents_notify(CLOCK_EVT_NOTIFY_BROADCAST_EXIT, &cpu_id); + tick_broadcast_exit(); fail: cpuidle_coupled_parallel_barrier(dev, &abort_barrier); @@ -184,8 +183,7 @@ fail: */ static void omap_setup_broadcast_timer(void *arg) { - int cpu = smp_processor_id(); - clockevents_notify(CLOCK_EVT_NOTIFY_BROADCAST_ON, &cpu); + tick_broadcast_enable(); } static struct cpuidle_driver omap4_idle_driver = { diff --git a/arch/arm/mach-omap2/id.c b/arch/arm/mach-omap2/id.c index 2a2f4d56e4c8..25f1beea453e 100644 --- a/arch/arm/mach-omap2/id.c +++ b/arch/arm/mach-omap2/id.c @@ -720,6 +720,8 @@ static const char * __init omap_get_family(void) return kasprintf(GFP_KERNEL, "OMAP4"); else if (soc_is_omap54xx()) return kasprintf(GFP_KERNEL, "OMAP5"); + else if (soc_is_am33xx() || soc_is_am335x()) + return kasprintf(GFP_KERNEL, "AM33xx"); else if (soc_is_am43xx()) return kasprintf(GFP_KERNEL, "AM43xx"); else if (soc_is_dra7xx()) diff --git a/arch/arm/mach-omap2/omap_hwmod.c b/arch/arm/mach-omap2/omap_hwmod.c index 92afb723dcfc..355b08936871 100644 --- a/arch/arm/mach-omap2/omap_hwmod.c +++ b/arch/arm/mach-omap2/omap_hwmod.c @@ -1692,16 +1692,15 @@ static int _deassert_hardreset(struct omap_hwmod *oh, const char *name) if (ret == -EBUSY) pr_warn("omap_hwmod: %s: failed to hardreset\n", oh->name); - if (!ret) { + if (oh->clkdm) { /* * Set the clockdomain to HW_AUTO, assuming that the * previous state was HW_AUTO. */ - if (oh->clkdm && hwsup) + if (hwsup) clkdm_allow_idle(oh->clkdm); - } else { - if (oh->clkdm) - clkdm_hwmod_disable(oh->clkdm, oh); + + clkdm_hwmod_disable(oh->clkdm, oh); } return ret; @@ -2698,6 +2697,7 @@ static int __init _register(struct omap_hwmod *oh) INIT_LIST_HEAD(&oh->master_ports); INIT_LIST_HEAD(&oh->slave_ports); spin_lock_init(&oh->_lock); + lockdep_set_class(&oh->_lock, &oh->hwmod_key); oh->_state = _HWMOD_STATE_REGISTERED; diff --git a/arch/arm/mach-omap2/omap_hwmod.h b/arch/arm/mach-omap2/omap_hwmod.h index 9d4bec6ee742..9611c91d9b82 100644 --- a/arch/arm/mach-omap2/omap_hwmod.h +++ b/arch/arm/mach-omap2/omap_hwmod.h @@ -674,6 +674,7 @@ struct omap_hwmod { u32 _sysc_cache; void __iomem *_mpu_rt_va; spinlock_t _lock; + struct lock_class_key hwmod_key; /* unique lock class */ struct list_head node; struct omap_hwmod_ocp_if *_mpu_port; unsigned int (*xlate_irq)(unsigned int); diff --git a/arch/arm/mach-omap2/omap_hwmod_7xx_data.c b/arch/arm/mach-omap2/omap_hwmod_7xx_data.c index e8692e7675b8..16fe7a1b7a35 100644 --- a/arch/arm/mach-omap2/omap_hwmod_7xx_data.c +++ b/arch/arm/mach-omap2/omap_hwmod_7xx_data.c @@ -1466,55 +1466,18 @@ static struct omap_hwmod dra7xx_ocp2scp3_hwmod = { * */ -static struct omap_hwmod_class dra7xx_pcie_hwmod_class = { +static struct omap_hwmod_class dra7xx_pciess_hwmod_class = { .name = "pcie", }; /* pcie1 */ -static struct omap_hwmod dra7xx_pcie1_hwmod = { +static struct omap_hwmod dra7xx_pciess1_hwmod = { .name = "pcie1", - .class = &dra7xx_pcie_hwmod_class, + .class = &dra7xx_pciess_hwmod_class, .clkdm_name = "pcie_clkdm", .main_clk = "l4_root_clk_div", .prcm = { .omap4 = { - .clkctrl_offs = DRA7XX_CM_PCIE_CLKSTCTRL_OFFSET, - .modulemode = MODULEMODE_SWCTRL, - }, - }, -}; - -/* pcie2 */ -static struct omap_hwmod dra7xx_pcie2_hwmod = { - .name = "pcie2", - .class = &dra7xx_pcie_hwmod_class, - .clkdm_name = "pcie_clkdm", - .main_clk = "l4_root_clk_div", - .prcm = { - .omap4 = { - .clkctrl_offs = DRA7XX_CM_PCIE_CLKSTCTRL_OFFSET, - .modulemode = MODULEMODE_SWCTRL, - }, - }, -}; - -/* - * 'PCIE PHY' class - * - */ - -static struct omap_hwmod_class dra7xx_pcie_phy_hwmod_class = { - .name = "pcie-phy", -}; - -/* pcie1 phy */ -static struct omap_hwmod dra7xx_pcie1_phy_hwmod = { - .name = "pcie1-phy", - .class = &dra7xx_pcie_phy_hwmod_class, - .clkdm_name = "l3init_clkdm", - .main_clk = "l4_root_clk_div", - .prcm = { - .omap4 = { .clkctrl_offs = DRA7XX_CM_L3INIT_PCIESS1_CLKCTRL_OFFSET, .context_offs = DRA7XX_RM_L3INIT_PCIESS1_CONTEXT_OFFSET, .modulemode = MODULEMODE_SWCTRL, @@ -1522,11 +1485,11 @@ static struct omap_hwmod dra7xx_pcie1_phy_hwmod = { }, }; -/* pcie2 phy */ -static struct omap_hwmod dra7xx_pcie2_phy_hwmod = { - .name = "pcie2-phy", - .class = &dra7xx_pcie_phy_hwmod_class, - .clkdm_name = "l3init_clkdm", +/* pcie2 */ +static struct omap_hwmod dra7xx_pciess2_hwmod = { + .name = "pcie2", + .class = &dra7xx_pciess_hwmod_class, + .clkdm_name = "pcie_clkdm", .main_clk = "l4_root_clk_div", .prcm = { .omap4 = { @@ -2877,50 +2840,34 @@ static struct omap_hwmod_ocp_if dra7xx_l4_cfg__ocp2scp3 = { .user = OCP_USER_MPU | OCP_USER_SDMA, }; -/* l3_main_1 -> pcie1 */ -static struct omap_hwmod_ocp_if dra7xx_l3_main_1__pcie1 = { +/* l3_main_1 -> pciess1 */ +static struct omap_hwmod_ocp_if dra7xx_l3_main_1__pciess1 = { .master = &dra7xx_l3_main_1_hwmod, - .slave = &dra7xx_pcie1_hwmod, + .slave = &dra7xx_pciess1_hwmod, .clk = "l3_iclk_div", .user = OCP_USER_MPU | OCP_USER_SDMA, }; -/* l4_cfg -> pcie1 */ -static struct omap_hwmod_ocp_if dra7xx_l4_cfg__pcie1 = { +/* l4_cfg -> pciess1 */ +static struct omap_hwmod_ocp_if dra7xx_l4_cfg__pciess1 = { .master = &dra7xx_l4_cfg_hwmod, - .slave = &dra7xx_pcie1_hwmod, + .slave = &dra7xx_pciess1_hwmod, .clk = "l4_root_clk_div", .user = OCP_USER_MPU | OCP_USER_SDMA, }; -/* l3_main_1 -> pcie2 */ -static struct omap_hwmod_ocp_if dra7xx_l3_main_1__pcie2 = { +/* l3_main_1 -> pciess2 */ +static struct omap_hwmod_ocp_if dra7xx_l3_main_1__pciess2 = { .master = &dra7xx_l3_main_1_hwmod, - .slave = &dra7xx_pcie2_hwmod, + .slave = &dra7xx_pciess2_hwmod, .clk = "l3_iclk_div", .user = OCP_USER_MPU | OCP_USER_SDMA, }; -/* l4_cfg -> pcie2 */ -static struct omap_hwmod_ocp_if dra7xx_l4_cfg__pcie2 = { - .master = &dra7xx_l4_cfg_hwmod, - .slave = &dra7xx_pcie2_hwmod, - .clk = "l4_root_clk_div", - .user = OCP_USER_MPU | OCP_USER_SDMA, -}; - -/* l4_cfg -> pcie1 phy */ -static struct omap_hwmod_ocp_if dra7xx_l4_cfg__pcie1_phy = { - .master = &dra7xx_l4_cfg_hwmod, - .slave = &dra7xx_pcie1_phy_hwmod, - .clk = "l4_root_clk_div", - .user = OCP_USER_MPU | OCP_USER_SDMA, -}; - -/* l4_cfg -> pcie2 phy */ -static struct omap_hwmod_ocp_if dra7xx_l4_cfg__pcie2_phy = { +/* l4_cfg -> pciess2 */ +static struct omap_hwmod_ocp_if dra7xx_l4_cfg__pciess2 = { .master = &dra7xx_l4_cfg_hwmod, - .slave = &dra7xx_pcie2_phy_hwmod, + .slave = &dra7xx_pciess2_hwmod, .clk = "l4_root_clk_div", .user = OCP_USER_MPU | OCP_USER_SDMA, }; @@ -3327,12 +3274,10 @@ static struct omap_hwmod_ocp_if *dra7xx_hwmod_ocp_ifs[] __initdata = { &dra7xx_l4_cfg__mpu, &dra7xx_l4_cfg__ocp2scp1, &dra7xx_l4_cfg__ocp2scp3, - &dra7xx_l3_main_1__pcie1, - &dra7xx_l4_cfg__pcie1, - &dra7xx_l3_main_1__pcie2, - &dra7xx_l4_cfg__pcie2, - &dra7xx_l4_cfg__pcie1_phy, - &dra7xx_l4_cfg__pcie2_phy, + &dra7xx_l3_main_1__pciess1, + &dra7xx_l4_cfg__pciess1, + &dra7xx_l3_main_1__pciess2, + &dra7xx_l4_cfg__pciess2, &dra7xx_l3_main_1__qspi, &dra7xx_l4_per3__rtcss, &dra7xx_l4_cfg__sata, diff --git a/arch/arm/mach-omap2/pdata-quirks.c b/arch/arm/mach-omap2/pdata-quirks.c index 190fa43e7479..e642b079e9f3 100644 --- a/arch/arm/mach-omap2/pdata-quirks.c +++ b/arch/arm/mach-omap2/pdata-quirks.c @@ -173,6 +173,7 @@ static void __init omap3_igep0030_rev_g_legacy_init(void) static void __init omap3_evm_legacy_init(void) { + hsmmc2_internal_input_clk(); legacy_init_wl12xx(WL12XX_REFCLOCK_38, 0, 149); } diff --git a/arch/arm/mach-omap2/prm44xx.c b/arch/arm/mach-omap2/prm44xx.c index a08a617a6c11..d6d6bc39e05c 100644 --- a/arch/arm/mach-omap2/prm44xx.c +++ b/arch/arm/mach-omap2/prm44xx.c @@ -252,10 +252,10 @@ static void omap44xx_prm_save_and_clear_irqen(u32 *saved_mask) { saved_mask[0] = omap4_prm_read_inst_reg(OMAP4430_PRM_OCP_SOCKET_INST, - OMAP4_PRM_IRQSTATUS_MPU_OFFSET); + OMAP4_PRM_IRQENABLE_MPU_OFFSET); saved_mask[1] = omap4_prm_read_inst_reg(OMAP4430_PRM_OCP_SOCKET_INST, - OMAP4_PRM_IRQSTATUS_MPU_2_OFFSET); + OMAP4_PRM_IRQENABLE_MPU_2_OFFSET); omap4_prm_write_inst_reg(0, OMAP4430_PRM_OCP_SOCKET_INST, OMAP4_PRM_IRQENABLE_MPU_OFFSET); diff --git a/arch/arm/mach-pxa/idp.c b/arch/arm/mach-pxa/idp.c index 343c4e3a7c5d..f6d02e4cbcda 100644 --- a/arch/arm/mach-pxa/idp.c +++ b/arch/arm/mach-pxa/idp.c @@ -36,6 +36,7 @@ #include <linux/platform_data/video-pxafb.h> #include <mach/bitfield.h> #include <linux/platform_data/mmc-pxamci.h> +#include <linux/smc91x.h> #include "generic.h" #include "devices.h" @@ -81,11 +82,16 @@ static struct resource smc91x_resources[] = { } }; +static struct smc91x_platdata smc91x_platdata = { + .flags = SMC91X_USE_32BIT | SMC91X_USE_DMA | SMC91X_NOWAIT, +}; + static struct platform_device smc91x_device = { .name = "smc91x", .id = 0, .num_resources = ARRAY_SIZE(smc91x_resources), .resource = smc91x_resources, + .dev.platform_data = &smc91x_platdata, }; static void idp_backlight_power(int on) diff --git a/arch/arm/mach-pxa/irq.c b/arch/arm/mach-pxa/irq.c index 0eecd83c624e..89a7c06570d3 100644 --- a/arch/arm/mach-pxa/irq.c +++ b/arch/arm/mach-pxa/irq.c @@ -11,6 +11,7 @@ * it under the terms of the GNU General Public License version 2 as * published by the Free Software Foundation. */ +#include <linux/bitops.h> #include <linux/init.h> #include <linux/module.h> #include <linux/interrupt.h> @@ -40,7 +41,6 @@ #define ICHP_VAL_IRQ (1 << 31) #define ICHP_IRQ(i) (((i) >> 16) & 0x7fff) #define IPR_VALID (1 << 31) -#define IRQ_BIT(n) (((n) - PXA_IRQ(0)) & 0x1f) #define MAX_INTERNAL_IRQS 128 @@ -51,6 +51,7 @@ static void __iomem *pxa_irq_base; static int pxa_internal_irq_nr; static bool cpu_has_ipr; +static struct irq_domain *pxa_irq_domain; static inline void __iomem *irq_base(int i) { @@ -66,18 +67,20 @@ static inline void __iomem *irq_base(int i) void pxa_mask_irq(struct irq_data *d) { void __iomem *base = irq_data_get_irq_chip_data(d); + irq_hw_number_t irq = irqd_to_hwirq(d); uint32_t icmr = __raw_readl(base + ICMR); - icmr &= ~(1 << IRQ_BIT(d->irq)); + icmr &= ~BIT(irq & 0x1f); __raw_writel(icmr, base + ICMR); } void pxa_unmask_irq(struct irq_data *d) { void __iomem *base = irq_data_get_irq_chip_data(d); + irq_hw_number_t irq = irqd_to_hwirq(d); uint32_t icmr = __raw_readl(base + ICMR); - icmr |= 1 << IRQ_BIT(d->irq); + icmr |= BIT(irq & 0x1f); __raw_writel(icmr, base + ICMR); } @@ -118,40 +121,63 @@ asmlinkage void __exception_irq_entry ichp_handle_irq(struct pt_regs *regs) } while (1); } -void __init pxa_init_irq(int irq_nr, int (*fn)(struct irq_data *, unsigned int)) +static int pxa_irq_map(struct irq_domain *h, unsigned int virq, + irq_hw_number_t hw) { - int irq, i, n; + void __iomem *base = irq_base(hw / 32); - BUG_ON(irq_nr > MAX_INTERNAL_IRQS); + /* initialize interrupt priority */ + if (cpu_has_ipr) + __raw_writel(hw | IPR_VALID, pxa_irq_base + IPR(hw)); + + irq_set_chip_and_handler(virq, &pxa_internal_irq_chip, + handle_level_irq); + irq_set_chip_data(virq, base); + set_irq_flags(virq, IRQF_VALID); + + return 0; +} + +static struct irq_domain_ops pxa_irq_ops = { + .map = pxa_irq_map, + .xlate = irq_domain_xlate_onecell, +}; + +static __init void +pxa_init_irq_common(struct device_node *node, int irq_nr, + int (*fn)(struct irq_data *, unsigned int)) +{ + int n; pxa_internal_irq_nr = irq_nr; - cpu_has_ipr = !cpu_is_pxa25x(); - pxa_irq_base = io_p2v(0x40d00000); + pxa_irq_domain = irq_domain_add_legacy(node, irq_nr, + PXA_IRQ(0), 0, + &pxa_irq_ops, NULL); + if (!pxa_irq_domain) + panic("Unable to add PXA IRQ domain\n"); + irq_set_default_host(pxa_irq_domain); for (n = 0; n < irq_nr; n += 32) { void __iomem *base = irq_base(n >> 5); __raw_writel(0, base + ICMR); /* disable all IRQs */ __raw_writel(0, base + ICLR); /* all IRQs are IRQ, not FIQ */ - for (i = n; (i < (n + 32)) && (i < irq_nr); i++) { - /* initialize interrupt priority */ - if (cpu_has_ipr) - __raw_writel(i | IPR_VALID, pxa_irq_base + IPR(i)); - - irq = PXA_IRQ(i); - irq_set_chip_and_handler(irq, &pxa_internal_irq_chip, - handle_level_irq); - irq_set_chip_data(irq, base); - set_irq_flags(irq, IRQF_VALID); - } } - /* only unmasked interrupts kick us out of idle */ __raw_writel(1, irq_base(0) + ICCR); pxa_internal_irq_chip.irq_set_wake = fn; } +void __init pxa_init_irq(int irq_nr, int (*fn)(struct irq_data *, unsigned int)) +{ + BUG_ON(irq_nr > MAX_INTERNAL_IRQS); + + pxa_irq_base = io_p2v(0x40d00000); + cpu_has_ipr = !cpu_is_pxa25x(); + pxa_init_irq_common(NULL, irq_nr, fn); +} + #ifdef CONFIG_PM static unsigned long saved_icmr[MAX_INTERNAL_IRQS/32]; static unsigned long saved_ipr[MAX_INTERNAL_IRQS]; @@ -203,30 +229,6 @@ struct syscore_ops pxa_irq_syscore_ops = { }; #ifdef CONFIG_OF -static struct irq_domain *pxa_irq_domain; - -static int pxa_irq_map(struct irq_domain *h, unsigned int virq, - irq_hw_number_t hw) -{ - void __iomem *base = irq_base(hw / 32); - - /* initialize interrupt priority */ - if (cpu_has_ipr) - __raw_writel(hw | IPR_VALID, pxa_irq_base + IPR(hw)); - - irq_set_chip_and_handler(hw, &pxa_internal_irq_chip, - handle_level_irq); - irq_set_chip_data(hw, base); - set_irq_flags(hw, IRQF_VALID); - - return 0; -} - -static struct irq_domain_ops pxa_irq_ops = { - .map = pxa_irq_map, - .xlate = irq_domain_xlate_onecell, -}; - static const struct of_device_id intc_ids[] __initconst = { { .compatible = "marvell,pxa-intc", }, {} @@ -236,7 +238,7 @@ void __init pxa_dt_irq_init(int (*fn)(struct irq_data *, unsigned int)) { struct device_node *node; struct resource res; - int n, ret; + int ret; node = of_find_matching_node(NULL, intc_ids); if (!node) { @@ -267,23 +269,6 @@ void __init pxa_dt_irq_init(int (*fn)(struct irq_data *, unsigned int)) return; } - pxa_irq_domain = irq_domain_add_legacy(node, pxa_internal_irq_nr, 0, 0, - &pxa_irq_ops, NULL); - if (!pxa_irq_domain) - panic("Unable to add PXA IRQ domain\n"); - - irq_set_default_host(pxa_irq_domain); - - for (n = 0; n < pxa_internal_irq_nr; n += 32) { - void __iomem *base = irq_base(n >> 5); - - __raw_writel(0, base + ICMR); /* disable all IRQs */ - __raw_writel(0, base + ICLR); /* all IRQs are IRQ, not FIQ */ - } - - /* only unmasked interrupts kick us out of idle */ - __raw_writel(1, irq_base(0) + ICCR); - - pxa_internal_irq_chip.irq_set_wake = fn; + pxa_init_irq_common(node, pxa_internal_irq_nr, fn); } #endif /* CONFIG_OF */ diff --git a/arch/arm/mach-pxa/lpd270.c b/arch/arm/mach-pxa/lpd270.c index ad777b353bd5..eaee2c20b189 100644 --- a/arch/arm/mach-pxa/lpd270.c +++ b/arch/arm/mach-pxa/lpd270.c @@ -24,6 +24,7 @@ #include <linux/mtd/mtd.h> #include <linux/mtd/partitions.h> #include <linux/pwm_backlight.h> +#include <linux/smc91x.h> #include <asm/types.h> #include <asm/setup.h> @@ -189,15 +190,20 @@ static struct resource smc91x_resources[] = { [1] = { .start = LPD270_ETHERNET_IRQ, .end = LPD270_ETHERNET_IRQ, - .flags = IORESOURCE_IRQ, + .flags = IORESOURCE_IRQ | IORESOURCE_IRQ_HIGHEDGE, }, }; +struct smc91x_platdata smc91x_platdata = { + .flags = SMC91X_USE_16BIT | SMC91X_NOWAIT, +}; + static struct platform_device smc91x_device = { .name = "smc91x", .id = 0, .num_resources = ARRAY_SIZE(smc91x_resources), .resource = smc91x_resources, + .dev.platform_data = &smc91x_platdata, }; static struct resource lpd270_flash_resources[] = { diff --git a/arch/arm/mach-pxa/zeus.c b/arch/arm/mach-pxa/zeus.c index 205f9bf3821e..ac2ae5c71ab4 100644 --- a/arch/arm/mach-pxa/zeus.c +++ b/arch/arm/mach-pxa/zeus.c @@ -412,7 +412,7 @@ static struct fixed_voltage_config can_regulator_pdata = { }; static struct platform_device can_regulator_device = { - .name = "reg-fixed-volage", + .name = "reg-fixed-voltage", .id = 0, .dev = { .platform_data = &can_regulator_pdata, diff --git a/arch/arm/mach-realview/core.c b/arch/arm/mach-realview/core.c index 850e506926df..c309593abdb2 100644 --- a/arch/arm/mach-realview/core.c +++ b/arch/arm/mach-realview/core.c @@ -28,6 +28,7 @@ #include <linux/platform_data/video-clcd-versatile.h> #include <linux/io.h> #include <linux/smsc911x.h> +#include <linux/smc91x.h> #include <linux/ata_platform.h> #include <linux/amba/mmci.h> #include <linux/gfp.h> @@ -94,6 +95,10 @@ static struct smsc911x_platform_config smsc911x_config = { .phy_interface = PHY_INTERFACE_MODE_MII, }; +static struct smc91x_platdata smc91x_platdata = { + .flags = SMC91X_USE_32BIT | SMC91X_NOWAIT, +}; + static struct platform_device realview_eth_device = { .name = "smsc911x", .id = 0, @@ -107,6 +112,8 @@ int realview_eth_register(const char *name, struct resource *res) realview_eth_device.resource = res; if (strcmp(realview_eth_device.name, "smsc911x") == 0) realview_eth_device.dev.platform_data = &smsc911x_config; + else + realview_eth_device.dev.platform_data = &smc91x_platdata; return platform_device_register(&realview_eth_device); } diff --git a/arch/arm/mach-realview/realview_eb.c b/arch/arm/mach-realview/realview_eb.c index 64c88d657f9e..b3869cbbcc68 100644 --- a/arch/arm/mach-realview/realview_eb.c +++ b/arch/arm/mach-realview/realview_eb.c @@ -234,7 +234,7 @@ static struct resource realview_eb_eth_resources[] = { [1] = { .start = IRQ_EB_ETH, .end = IRQ_EB_ETH, - .flags = IORESOURCE_IRQ, + .flags = IORESOURCE_IRQ | IORESOURCE_IRQ_HIGHEDGE, }, }; diff --git a/arch/arm/mach-sa1100/neponset.c b/arch/arm/mach-sa1100/neponset.c index 169262e3040d..af868d258e66 100644 --- a/arch/arm/mach-sa1100/neponset.c +++ b/arch/arm/mach-sa1100/neponset.c @@ -12,6 +12,7 @@ #include <linux/pm.h> #include <linux/serial_core.h> #include <linux/slab.h> +#include <linux/smc91x.h> #include <asm/mach-types.h> #include <asm/mach/map.h> @@ -258,12 +259,17 @@ static int neponset_probe(struct platform_device *dev) 0x02000000, "smc91x-attrib"), { .flags = IORESOURCE_IRQ }, }; + struct smc91x_platdata smc91x_platdata = { + .flags = SMC91X_USE_8BIT | SMC91X_IO_SHIFT_2 | SMC91X_NOWAIT, + }; struct platform_device_info smc91x_devinfo = { .parent = &dev->dev, .name = "smc91x", .id = 0, .res = smc91x_resources, .num_res = ARRAY_SIZE(smc91x_resources), + .data = &smc91x_platdata, + .size_data = sizeof(smc91x_platdata), }; int ret, irq; diff --git a/arch/arm/mach-sa1100/pleb.c b/arch/arm/mach-sa1100/pleb.c index 091261878eff..1525d7b5f1b7 100644 --- a/arch/arm/mach-sa1100/pleb.c +++ b/arch/arm/mach-sa1100/pleb.c @@ -11,6 +11,7 @@ #include <linux/irq.h> #include <linux/io.h> #include <linux/mtd/partitions.h> +#include <linux/smc91x.h> #include <mach/hardware.h> #include <asm/setup.h> @@ -43,12 +44,18 @@ static struct resource smc91x_resources[] = { #endif }; +static struct smc91x_platdata smc91x_platdata = { + .flags = SMC91X_USE_16BIT | SMC91X_NOWAIT, +}; static struct platform_device smc91x_device = { .name = "smc91x", .id = 0, .num_resources = ARRAY_SIZE(smc91x_resources), .resource = smc91x_resources, + .dev = { + .platform_data = &smc91x_platdata, + }, }; static struct platform_device *devices[] __initdata = { diff --git a/arch/arm/mach-socfpga/core.h b/arch/arm/mach-socfpga/core.h index 483cb467bf65..a0f3b1cd497c 100644 --- a/arch/arm/mach-socfpga/core.h +++ b/arch/arm/mach-socfpga/core.h @@ -45,6 +45,6 @@ extern char secondary_trampoline, secondary_trampoline_end; extern unsigned long socfpga_cpu1start_addr; -#define SOCFPGA_SCU_VIRT_BASE 0xfffec000 +#define SOCFPGA_SCU_VIRT_BASE 0xfee00000 #endif diff --git a/arch/arm/mach-socfpga/socfpga.c b/arch/arm/mach-socfpga/socfpga.c index 383d61e138af..f5e597c207b9 100644 --- a/arch/arm/mach-socfpga/socfpga.c +++ b/arch/arm/mach-socfpga/socfpga.c @@ -23,6 +23,7 @@ #include <asm/hardware/cache-l2x0.h> #include <asm/mach/arch.h> #include <asm/mach/map.h> +#include <asm/cacheflush.h> #include "core.h" @@ -73,6 +74,10 @@ void __init socfpga_sysmgr_init(void) (u32 *) &socfpga_cpu1start_addr)) pr_err("SMP: Need cpu1-start-addr in device tree.\n"); + /* Ensure that socfpga_cpu1start_addr is visible to other CPUs */ + smp_wmb(); + sync_cache_w(&socfpga_cpu1start_addr); + sys_manager_base_addr = of_iomap(np, 0); np = of_find_compatible_node(NULL, NULL, "altr,rst-mgr"); diff --git a/arch/arm/mach-sti/board-dt.c b/arch/arm/mach-sti/board-dt.c index b067390cef4e..b373acade338 100644 --- a/arch/arm/mach-sti/board-dt.c +++ b/arch/arm/mach-sti/board-dt.c @@ -18,6 +18,7 @@ static const char *stih41x_dt_match[] __initdata = { "st,stih415", "st,stih416", "st,stih407", + "st,stih410", "st,stih418", NULL }; diff --git a/arch/arm/mach-sunxi/Kconfig b/arch/arm/mach-sunxi/Kconfig index a77604fbaf25..81502b90dd91 100644 --- a/arch/arm/mach-sunxi/Kconfig +++ b/arch/arm/mach-sunxi/Kconfig @@ -1,10 +1,12 @@ menuconfig ARCH_SUNXI bool "Allwinner SoCs" if ARCH_MULTI_V7 select ARCH_REQUIRE_GPIOLIB + select ARCH_HAS_RESET_CONTROLLER select CLKSRC_MMIO select GENERIC_IRQ_CHIP select PINCTRL select SUN4I_TIMER + select RESET_CONTROLLER if ARCH_SUNXI @@ -20,10 +22,8 @@ config MACH_SUN5I config MACH_SUN6I bool "Allwinner A31 (sun6i) SoCs support" default ARCH_SUNXI - select ARCH_HAS_RESET_CONTROLLER select ARM_GIC select MFD_SUN6I_PRCM - select RESET_CONTROLLER select SUN5I_HSTIMER config MACH_SUN7I @@ -37,16 +37,12 @@ config MACH_SUN7I config MACH_SUN8I bool "Allwinner A23 (sun8i) SoCs support" default ARCH_SUNXI - select ARCH_HAS_RESET_CONTROLLER select ARM_GIC select MFD_SUN6I_PRCM - select RESET_CONTROLLER config MACH_SUN9I bool "Allwinner (sun9i) SoCs support" default ARCH_SUNXI - select ARCH_HAS_RESET_CONTROLLER select ARM_GIC - select RESET_CONTROLLER endif diff --git a/arch/arm/mach-tegra/cpuidle-tegra114.c b/arch/arm/mach-tegra/cpuidle-tegra114.c index f2b586d7b15d..155807fa6fdd 100644 --- a/arch/arm/mach-tegra/cpuidle-tegra114.c +++ b/arch/arm/mach-tegra/cpuidle-tegra114.c @@ -15,7 +15,7 @@ */ #include <asm/firmware.h> -#include <linux/clockchips.h> +#include <linux/tick.h> #include <linux/cpuidle.h> #include <linux/cpu_pm.h> #include <linux/kernel.h> @@ -44,7 +44,7 @@ static int tegra114_idle_power_down(struct cpuidle_device *dev, tegra_set_cpu_in_lp2(); cpu_pm_enter(); - clockevents_notify(CLOCK_EVT_NOTIFY_BROADCAST_ENTER, &dev->cpu); + tick_broadcast_enter(); call_firmware_op(prepare_idle); @@ -52,7 +52,7 @@ static int tegra114_idle_power_down(struct cpuidle_device *dev, if (call_firmware_op(do_idle, 0) == -ENOSYS) cpu_suspend(0, tegra30_sleep_cpu_secondary_finish); - clockevents_notify(CLOCK_EVT_NOTIFY_BROADCAST_EXIT, &dev->cpu); + tick_broadcast_exit(); cpu_pm_exit(); tegra_clear_cpu_in_lp2(); diff --git a/arch/arm/mach-tegra/cpuidle-tegra20.c b/arch/arm/mach-tegra/cpuidle-tegra20.c index 4f25a7c7ca0f..48844ae6c3a1 100644 --- a/arch/arm/mach-tegra/cpuidle-tegra20.c +++ b/arch/arm/mach-tegra/cpuidle-tegra20.c @@ -20,7 +20,7 @@ */ #include <linux/clk/tegra.h> -#include <linux/clockchips.h> +#include <linux/tick.h> #include <linux/cpuidle.h> #include <linux/cpu_pm.h> #include <linux/kernel.h> @@ -136,11 +136,11 @@ static bool tegra20_cpu_cluster_power_down(struct cpuidle_device *dev, if (tegra20_reset_cpu_1() || !tegra_cpu_rail_off_ready()) return false; - clockevents_notify(CLOCK_EVT_NOTIFY_BROADCAST_ENTER, &dev->cpu); + tick_broadcast_enter(); tegra_idle_lp2_last(); - clockevents_notify(CLOCK_EVT_NOTIFY_BROADCAST_EXIT, &dev->cpu); + tick_broadcast_exit(); if (cpu_online(1)) tegra20_wake_cpu1_from_reset(); @@ -153,13 +153,13 @@ static bool tegra20_idle_enter_lp2_cpu_1(struct cpuidle_device *dev, struct cpuidle_driver *drv, int index) { - clockevents_notify(CLOCK_EVT_NOTIFY_BROADCAST_ENTER, &dev->cpu); + tick_broadcast_enter(); cpu_suspend(0, tegra20_sleep_cpu_secondary_finish); tegra20_cpu_clear_resettable(); - clockevents_notify(CLOCK_EVT_NOTIFY_BROADCAST_EXIT, &dev->cpu); + tick_broadcast_exit(); return true; } diff --git a/arch/arm/mach-tegra/cpuidle-tegra30.c b/arch/arm/mach-tegra/cpuidle-tegra30.c index f8815ed65d9d..84d809a3cba3 100644 --- a/arch/arm/mach-tegra/cpuidle-tegra30.c +++ b/arch/arm/mach-tegra/cpuidle-tegra30.c @@ -20,7 +20,7 @@ */ #include <linux/clk/tegra.h> -#include <linux/clockchips.h> +#include <linux/tick.h> #include <linux/cpuidle.h> #include <linux/cpu_pm.h> #include <linux/kernel.h> @@ -76,11 +76,11 @@ static bool tegra30_cpu_cluster_power_down(struct cpuidle_device *dev, return false; } - clockevents_notify(CLOCK_EVT_NOTIFY_BROADCAST_ENTER, &dev->cpu); + tick_broadcast_enter(); tegra_idle_lp2_last(); - clockevents_notify(CLOCK_EVT_NOTIFY_BROADCAST_EXIT, &dev->cpu); + tick_broadcast_exit(); return true; } @@ -90,13 +90,13 @@ static bool tegra30_cpu_core_power_down(struct cpuidle_device *dev, struct cpuidle_driver *drv, int index) { - clockevents_notify(CLOCK_EVT_NOTIFY_BROADCAST_ENTER, &dev->cpu); + tick_broadcast_enter(); smp_wmb(); cpu_suspend(0, tegra30_sleep_cpu_secondary_finish); - clockevents_notify(CLOCK_EVT_NOTIFY_BROADCAST_EXIT, &dev->cpu); + tick_broadcast_exit(); return true; } diff --git a/arch/arm/mm/cache-l2x0.c b/arch/arm/mm/cache-l2x0.c index c6c7696b8db9..8f15f70622a6 100644 --- a/arch/arm/mm/cache-l2x0.c +++ b/arch/arm/mm/cache-l2x0.c @@ -1131,23 +1131,22 @@ static void __init l2c310_of_parse(const struct device_node *np, } ret = l2x0_cache_size_of_parse(np, aux_val, aux_mask, &assoc, SZ_512K); - if (ret) - return; - - switch (assoc) { - case 16: - *aux_val &= ~L2X0_AUX_CTRL_ASSOC_MASK; - *aux_val |= L310_AUX_CTRL_ASSOCIATIVITY_16; - *aux_mask &= ~L2X0_AUX_CTRL_ASSOC_MASK; - break; - case 8: - *aux_val &= ~L2X0_AUX_CTRL_ASSOC_MASK; - *aux_mask &= ~L2X0_AUX_CTRL_ASSOC_MASK; - break; - default: - pr_err("L2C-310 OF cache associativity %d invalid, only 8 or 16 permitted\n", - assoc); - break; + if (!ret) { + switch (assoc) { + case 16: + *aux_val &= ~L2X0_AUX_CTRL_ASSOC_MASK; + *aux_val |= L310_AUX_CTRL_ASSOCIATIVITY_16; + *aux_mask &= ~L2X0_AUX_CTRL_ASSOC_MASK; + break; + case 8: + *aux_val &= ~L2X0_AUX_CTRL_ASSOC_MASK; + *aux_mask &= ~L2X0_AUX_CTRL_ASSOC_MASK; + break; + default: + pr_err("L2C-310 OF cache associativity %d invalid, only 8 or 16 permitted\n", + assoc); + break; + } } prefetch = l2x0_saved_regs.prefetch_ctrl; diff --git a/arch/arm/mm/dma-mapping.c b/arch/arm/mm/dma-mapping.c index 170a116d1b29..c27447653903 100644 --- a/arch/arm/mm/dma-mapping.c +++ b/arch/arm/mm/dma-mapping.c @@ -171,7 +171,7 @@ static int __dma_supported(struct device *dev, u64 mask, bool warn) */ if (sizeof(mask) != sizeof(dma_addr_t) && mask > (dma_addr_t)~0 && - dma_to_pfn(dev, ~0) < max_pfn) { + dma_to_pfn(dev, ~0) < max_pfn - 1) { if (warn) { dev_warn(dev, "Coherent DMA mask %#llx is larger than dma_addr_t allows\n", mask); diff --git a/arch/arm/mm/fault.c b/arch/arm/mm/fault.c index a982dc3190df..6333d9c17875 100644 --- a/arch/arm/mm/fault.c +++ b/arch/arm/mm/fault.c @@ -552,6 +552,7 @@ do_DataAbort(unsigned long addr, unsigned int fsr, struct pt_regs *regs) pr_alert("Unhandled fault: %s (0x%03x) at 0x%08lx\n", inf->name, fsr, addr); + show_pte(current->mm, addr); info.si_signo = inf->sig; info.si_errno = 0; diff --git a/arch/arm/mm/pageattr.c b/arch/arm/mm/pageattr.c index 004e35cdcfff..cf30daff8932 100644 --- a/arch/arm/mm/pageattr.c +++ b/arch/arm/mm/pageattr.c @@ -49,7 +49,10 @@ static int change_memory_common(unsigned long addr, int numpages, WARN_ON_ONCE(1); } - if (!is_module_address(start) || !is_module_address(end - 1)) + if (start < MODULES_VADDR || start >= MODULES_END) + return -EINVAL; + + if (end < MODULES_VADDR || start >= MODULES_END) return -EINVAL; data.set_mask = set_mask; diff --git a/arch/arm/plat-omap/counter_32k.c b/arch/arm/plat-omap/counter_32k.c index 61b4d705c267..2438b96004c1 100644 --- a/arch/arm/plat-omap/counter_32k.c +++ b/arch/arm/plat-omap/counter_32k.c @@ -44,24 +44,20 @@ static u64 notrace omap_32k_read_sched_clock(void) } /** - * omap_read_persistent_clock - Return time from a persistent clock. + * omap_read_persistent_clock64 - Return time from a persistent clock. * * Reads the time from a source which isn't disabled during PM, the * 32k sync timer. Convert the cycles elapsed since last read into - * nsecs and adds to a monotonically increasing timespec. + * nsecs and adds to a monotonically increasing timespec64. */ -static struct timespec persistent_ts; +static struct timespec64 persistent_ts; static cycles_t cycles; static unsigned int persistent_mult, persistent_shift; -static DEFINE_SPINLOCK(read_persistent_clock_lock); -static void omap_read_persistent_clock(struct timespec *ts) +static void omap_read_persistent_clock64(struct timespec64 *ts) { unsigned long long nsecs; cycles_t last_cycles; - unsigned long flags; - - spin_lock_irqsave(&read_persistent_clock_lock, flags); last_cycles = cycles; cycles = sync32k_cnt_reg ? readl_relaxed(sync32k_cnt_reg) : 0; @@ -69,11 +65,9 @@ static void omap_read_persistent_clock(struct timespec *ts) nsecs = clocksource_cyc2ns(cycles - last_cycles, persistent_mult, persistent_shift); - timespec_add_ns(&persistent_ts, nsecs); + timespec64_add_ns(&persistent_ts, nsecs); *ts = persistent_ts; - - spin_unlock_irqrestore(&read_persistent_clock_lock, flags); } /** @@ -103,7 +97,7 @@ int __init omap_init_clocksource_32k(void __iomem *vbase) /* * 120000 rough estimate from the calculations in - * __clocksource_updatefreq_scale. + * __clocksource_update_freq_scale. */ clocks_calc_mult_shift(&persistent_mult, &persistent_shift, 32768, NSEC_PER_SEC, 120000); @@ -116,7 +110,7 @@ int __init omap_init_clocksource_32k(void __iomem *vbase) } sched_clock_register(omap_32k_read_sched_clock, 32, 32768); - register_persistent_clock(NULL, omap_read_persistent_clock); + register_persistent_clock(NULL, omap_read_persistent_clock64); pr_info("OMAP clocksource: 32k_counter at 32768 Hz\n"); return 0; diff --git a/arch/arm/plat-omap/dmtimer.c b/arch/arm/plat-omap/dmtimer.c index db10169a08de..8ca94d379bc3 100644 --- a/arch/arm/plat-omap/dmtimer.c +++ b/arch/arm/plat-omap/dmtimer.c @@ -799,6 +799,7 @@ static int omap_dm_timer_probe(struct platform_device *pdev) struct device *dev = &pdev->dev; const struct of_device_id *match; const struct dmtimer_platform_data *pdata; + int ret; match = of_match_device(of_match_ptr(omap_timer_match), dev); pdata = match ? match->data : dev->platform_data; @@ -860,7 +861,12 @@ static int omap_dm_timer_probe(struct platform_device *pdev) } if (!timer->reserved) { - pm_runtime_get_sync(dev); + ret = pm_runtime_get_sync(dev); + if (ret < 0) { + dev_err(dev, "%s: pm_runtime_get_sync failed!\n", + __func__); + goto err_get_sync; + } __omap_dm_timer_init_regs(timer); pm_runtime_put(dev); } @@ -873,6 +879,11 @@ static int omap_dm_timer_probe(struct platform_device *pdev) dev_dbg(dev, "Device Probed.\n"); return 0; + +err_get_sync: + pm_runtime_put_noidle(dev); + pm_runtime_disable(dev); + return ret; } /** @@ -899,6 +910,8 @@ static int omap_dm_timer_remove(struct platform_device *pdev) } spin_unlock_irqrestore(&dm_timer_lock, flags); + pm_runtime_disable(&pdev->dev); + return ret; } diff --git a/arch/arm64/boot/dts/apm/apm-storm.dtsi b/arch/arm64/boot/dts/apm/apm-storm.dtsi index f1ad9c2ab2e9..a857794432d6 100644 --- a/arch/arm64/boot/dts/apm/apm-storm.dtsi +++ b/arch/arm64/boot/dts/apm/apm-storm.dtsi @@ -622,7 +622,7 @@ }; sgenet0: ethernet@1f210000 { - compatible = "apm,xgene-enet"; + compatible = "apm,xgene1-sgenet"; status = "disabled"; reg = <0x0 0x1f210000 0x0 0xd100>, <0x0 0x1f200000 0x0 0Xc300>, @@ -636,7 +636,7 @@ }; xgenet: ethernet@1f610000 { - compatible = "apm,xgene-enet"; + compatible = "apm,xgene1-xgenet"; status = "disabled"; reg = <0x0 0x1f610000 0x0 0xd100>, <0x0 0x1f600000 0x0 0Xc300>, diff --git a/arch/arm64/boot/dts/arm/juno-clocks.dtsi b/arch/arm64/boot/dts/arm/juno-clocks.dtsi index ea2b5666a16f..c9b89efe0f56 100644 --- a/arch/arm64/boot/dts/arm/juno-clocks.dtsi +++ b/arch/arm64/boot/dts/arm/juno-clocks.dtsi @@ -8,7 +8,7 @@ */ /* SoC fixed clocks */ - soc_uartclk: refclk72738khz { + soc_uartclk: refclk7273800hz { compatible = "fixed-clock"; #clock-cells = <0>; clock-frequency = <7273800>; diff --git a/arch/arm64/include/asm/cmpxchg.h b/arch/arm64/include/asm/cmpxchg.h index cb9593079f29..d8c25b7b18fb 100644 --- a/arch/arm64/include/asm/cmpxchg.h +++ b/arch/arm64/include/asm/cmpxchg.h @@ -246,14 +246,30 @@ static inline unsigned long __cmpxchg_mb(volatile void *ptr, unsigned long old, __ret; \ }) -#define this_cpu_cmpxchg_1(ptr, o, n) cmpxchg_local(raw_cpu_ptr(&(ptr)), o, n) -#define this_cpu_cmpxchg_2(ptr, o, n) cmpxchg_local(raw_cpu_ptr(&(ptr)), o, n) -#define this_cpu_cmpxchg_4(ptr, o, n) cmpxchg_local(raw_cpu_ptr(&(ptr)), o, n) -#define this_cpu_cmpxchg_8(ptr, o, n) cmpxchg_local(raw_cpu_ptr(&(ptr)), o, n) - -#define this_cpu_cmpxchg_double_8(ptr1, ptr2, o1, o2, n1, n2) \ - cmpxchg_double_local(raw_cpu_ptr(&(ptr1)), raw_cpu_ptr(&(ptr2)), \ - o1, o2, n1, n2) +#define _protect_cmpxchg_local(pcp, o, n) \ +({ \ + typeof(*raw_cpu_ptr(&(pcp))) __ret; \ + preempt_disable(); \ + __ret = cmpxchg_local(raw_cpu_ptr(&(pcp)), o, n); \ + preempt_enable(); \ + __ret; \ +}) + +#define this_cpu_cmpxchg_1(ptr, o, n) _protect_cmpxchg_local(ptr, o, n) +#define this_cpu_cmpxchg_2(ptr, o, n) _protect_cmpxchg_local(ptr, o, n) +#define this_cpu_cmpxchg_4(ptr, o, n) _protect_cmpxchg_local(ptr, o, n) +#define this_cpu_cmpxchg_8(ptr, o, n) _protect_cmpxchg_local(ptr, o, n) + +#define this_cpu_cmpxchg_double_8(ptr1, ptr2, o1, o2, n1, n2) \ +({ \ + int __ret; \ + preempt_disable(); \ + __ret = cmpxchg_double_local( raw_cpu_ptr(&(ptr1)), \ + raw_cpu_ptr(&(ptr2)), \ + o1, o2, n1, n2); \ + preempt_enable(); \ + __ret; \ +}) #define cmpxchg64(ptr,o,n) cmpxchg((ptr),(o),(n)) #define cmpxchg64_local(ptr,o,n) cmpxchg_local((ptr),(o),(n)) diff --git a/arch/arm64/include/asm/esr.h b/arch/arm64/include/asm/esr.h index 92bbae381598..70522450ca23 100644 --- a/arch/arm64/include/asm/esr.h +++ b/arch/arm64/include/asm/esr.h @@ -90,6 +90,7 @@ #define ESR_ELx_FSC (0x3F) #define ESR_ELx_FSC_TYPE (0x3C) #define ESR_ELx_FSC_EXTABT (0x10) +#define ESR_ELx_FSC_ACCESS (0x08) #define ESR_ELx_FSC_FAULT (0x04) #define ESR_ELx_FSC_PERM (0x0C) #define ESR_ELx_CV (UL(1) << 24) diff --git a/arch/arm64/include/asm/jump_label.h b/arch/arm64/include/asm/jump_label.h index 076a1c714049..c0e5165c2f76 100644 --- a/arch/arm64/include/asm/jump_label.h +++ b/arch/arm64/include/asm/jump_label.h @@ -18,11 +18,12 @@ */ #ifndef __ASM_JUMP_LABEL_H #define __ASM_JUMP_LABEL_H + +#ifndef __ASSEMBLY__ + #include <linux/types.h> #include <asm/insn.h> -#ifdef __KERNEL__ - #define JUMP_LABEL_NOP_SIZE AARCH64_INSN_SIZE static __always_inline bool arch_static_branch(struct static_key *key) @@ -39,8 +40,6 @@ l_yes: return true; } -#endif /* __KERNEL__ */ - typedef u64 jump_label_t; struct jump_entry { @@ -49,4 +48,5 @@ struct jump_entry { jump_label_t key; }; +#endif /* __ASSEMBLY__ */ #endif /* __ASM_JUMP_LABEL_H */ diff --git a/arch/arm64/include/asm/kvm_arm.h b/arch/arm64/include/asm/kvm_arm.h index 94674eb7e7bb..ac6fafb95fe7 100644 --- a/arch/arm64/include/asm/kvm_arm.h +++ b/arch/arm64/include/asm/kvm_arm.h @@ -129,6 +129,9 @@ * 40 bits wide (T0SZ = 24). Systems with a PARange smaller than 40 bits are * not known to exist and will break with this configuration. * + * VTCR_EL2.PS is extracted from ID_AA64MMFR0_EL1.PARange at boot time + * (see hyp-init.S). + * * Note that when using 4K pages, we concatenate two first level page tables * together. * @@ -138,7 +141,6 @@ #ifdef CONFIG_ARM64_64K_PAGES /* * Stage2 translation configuration: - * 40bits output (PS = 2) * 40bits input (T0SZ = 24) * 64kB pages (TG0 = 1) * 2 level page tables (SL = 1) @@ -150,7 +152,6 @@ #else /* * Stage2 translation configuration: - * 40bits output (PS = 2) * 40bits input (T0SZ = 24) * 4kB pages (TG0 = 0) * 3 level page tables (SL = 1) @@ -187,6 +188,7 @@ /* For compatibility with fault code shared with 32-bit */ #define FSC_FAULT ESR_ELx_FSC_FAULT +#define FSC_ACCESS ESR_ELx_FSC_ACCESS #define FSC_PERM ESR_ELx_FSC_PERM /* Hyp Prefetch Fault Address Register (HPFAR/HDFAR) */ diff --git a/arch/arm64/include/asm/kvm_host.h b/arch/arm64/include/asm/kvm_host.h index 8ac3c70fe3c6..f0f58c9beec0 100644 --- a/arch/arm64/include/asm/kvm_host.h +++ b/arch/arm64/include/asm/kvm_host.h @@ -28,6 +28,8 @@ #include <asm/kvm_asm.h> #include <asm/kvm_mmio.h> +#define __KVM_HAVE_ARCH_INTC_INITIALIZED + #if defined(CONFIG_KVM_ARM_MAX_VCPUS) #define KVM_MAX_VCPUS CONFIG_KVM_ARM_MAX_VCPUS #else @@ -177,19 +179,10 @@ int kvm_unmap_hva(struct kvm *kvm, unsigned long hva); int kvm_unmap_hva_range(struct kvm *kvm, unsigned long start, unsigned long end); void kvm_set_spte_hva(struct kvm *kvm, unsigned long hva, pte_t pte); +int kvm_age_hva(struct kvm *kvm, unsigned long start, unsigned long end); +int kvm_test_age_hva(struct kvm *kvm, unsigned long hva); /* We do not have shadow page tables, hence the empty hooks */ -static inline int kvm_age_hva(struct kvm *kvm, unsigned long start, - unsigned long end) -{ - return 0; -} - -static inline int kvm_test_age_hva(struct kvm *kvm, unsigned long hva) -{ - return 0; -} - static inline void kvm_arch_mmu_notifier_invalidate_page(struct kvm *kvm, unsigned long address) { diff --git a/arch/arm64/include/asm/kvm_mmio.h b/arch/arm64/include/asm/kvm_mmio.h index 9f52beb7cb13..889c908ee631 100644 --- a/arch/arm64/include/asm/kvm_mmio.h +++ b/arch/arm64/include/asm/kvm_mmio.h @@ -31,28 +31,6 @@ struct kvm_decode { bool sign_extend; }; -/* - * The in-kernel MMIO emulation code wants to use a copy of run->mmio, - * which is an anonymous type. Use our own type instead. - */ -struct kvm_exit_mmio { - phys_addr_t phys_addr; - u8 data[8]; - u32 len; - bool is_write; - void *private; -}; - -static inline void kvm_prepare_mmio(struct kvm_run *run, - struct kvm_exit_mmio *mmio) -{ - run->mmio.phys_addr = mmio->phys_addr; - run->mmio.len = mmio->len; - run->mmio.is_write = mmio->is_write; - memcpy(run->mmio.data, mmio->data, mmio->len); - run->exit_reason = KVM_EXIT_MMIO; -} - int kvm_handle_mmio_return(struct kvm_vcpu *vcpu, struct kvm_run *run); int io_mem_abort(struct kvm_vcpu *vcpu, struct kvm_run *run, phys_addr_t fault_ipa); diff --git a/arch/arm64/include/asm/kvm_mmu.h b/arch/arm64/include/asm/kvm_mmu.h index 6458b5373142..bbfb600fa822 100644 --- a/arch/arm64/include/asm/kvm_mmu.h +++ b/arch/arm64/include/asm/kvm_mmu.h @@ -158,6 +158,8 @@ static inline bool kvm_s2pmd_readonly(pmd_t *pmd) #define PTRS_PER_S2_PGD (1 << PTRS_PER_S2_PGD_SHIFT) #define S2_PGD_ORDER get_order(PTRS_PER_S2_PGD * sizeof(pgd_t)) +#define kvm_pgd_index(addr) (((addr) >> PGDIR_SHIFT) & (PTRS_PER_S2_PGD - 1)) + /* * If we are concatenating first level stage-2 page tables, we would have less * than or equal to 16 pointers in the fake PGD, because that's what the @@ -171,43 +173,6 @@ static inline bool kvm_s2pmd_readonly(pmd_t *pmd) #define KVM_PREALLOC_LEVEL (0) #endif -/** - * kvm_prealloc_hwpgd - allocate inital table for VTTBR - * @kvm: The KVM struct pointer for the VM. - * @pgd: The kernel pseudo pgd - * - * When the kernel uses more levels of page tables than the guest, we allocate - * a fake PGD and pre-populate it to point to the next-level page table, which - * will be the real initial page table pointed to by the VTTBR. - * - * When KVM_PREALLOC_LEVEL==2, we allocate a single page for the PMD and - * the kernel will use folded pud. When KVM_PREALLOC_LEVEL==1, we - * allocate 2 consecutive PUD pages. - */ -static inline int kvm_prealloc_hwpgd(struct kvm *kvm, pgd_t *pgd) -{ - unsigned int i; - unsigned long hwpgd; - - if (KVM_PREALLOC_LEVEL == 0) - return 0; - - hwpgd = __get_free_pages(GFP_KERNEL | __GFP_ZERO, PTRS_PER_S2_PGD_SHIFT); - if (!hwpgd) - return -ENOMEM; - - for (i = 0; i < PTRS_PER_S2_PGD; i++) { - if (KVM_PREALLOC_LEVEL == 1) - pgd_populate(NULL, pgd + i, - (pud_t *)hwpgd + i * PTRS_PER_PUD); - else if (KVM_PREALLOC_LEVEL == 2) - pud_populate(NULL, pud_offset(pgd, 0) + i, - (pmd_t *)hwpgd + i * PTRS_PER_PMD); - } - - return 0; -} - static inline void *kvm_get_hwpgd(struct kvm *kvm) { pgd_t *pgd = kvm->arch.pgd; @@ -224,12 +189,11 @@ static inline void *kvm_get_hwpgd(struct kvm *kvm) return pmd_offset(pud, 0); } -static inline void kvm_free_hwpgd(struct kvm *kvm) +static inline unsigned int kvm_get_hwpgd_size(void) { - if (KVM_PREALLOC_LEVEL > 0) { - unsigned long hwpgd = (unsigned long)kvm_get_hwpgd(kvm); - free_pages(hwpgd, PTRS_PER_S2_PGD_SHIFT); - } + if (KVM_PREALLOC_LEVEL > 0) + return PTRS_PER_S2_PGD * PAGE_SIZE; + return PTRS_PER_S2_PGD * sizeof(pgd_t); } static inline bool kvm_page_empty(void *ptr) diff --git a/arch/arm64/include/asm/mmu_context.h b/arch/arm64/include/asm/mmu_context.h index a9eee33dfa62..101a42bde728 100644 --- a/arch/arm64/include/asm/mmu_context.h +++ b/arch/arm64/include/asm/mmu_context.h @@ -151,6 +151,15 @@ switch_mm(struct mm_struct *prev, struct mm_struct *next, { unsigned int cpu = smp_processor_id(); + /* + * init_mm.pgd does not contain any user mappings and it is always + * active for kernel addresses in TTBR1. Just set the reserved TTBR0. + */ + if (next == &init_mm) { + cpu_set_reserved_ttbr0(); + return; + } + if (!cpumask_test_and_set_cpu(cpu, mm_cpumask(next)) || prev != next) check_and_switch_context(next, tsk); } diff --git a/arch/arm64/include/asm/percpu.h b/arch/arm64/include/asm/percpu.h index 09da25bc596f..4fde8c1df97f 100644 --- a/arch/arm64/include/asm/percpu.h +++ b/arch/arm64/include/asm/percpu.h @@ -204,25 +204,47 @@ static inline unsigned long __percpu_xchg(void *ptr, unsigned long val, return ret; } +#define _percpu_read(pcp) \ +({ \ + typeof(pcp) __retval; \ + preempt_disable(); \ + __retval = (typeof(pcp))__percpu_read(raw_cpu_ptr(&(pcp)), \ + sizeof(pcp)); \ + preempt_enable(); \ + __retval; \ +}) + +#define _percpu_write(pcp, val) \ +do { \ + preempt_disable(); \ + __percpu_write(raw_cpu_ptr(&(pcp)), (unsigned long)(val), \ + sizeof(pcp)); \ + preempt_enable(); \ +} while(0) \ + +#define _pcp_protect(operation, pcp, val) \ +({ \ + typeof(pcp) __retval; \ + preempt_disable(); \ + __retval = (typeof(pcp))operation(raw_cpu_ptr(&(pcp)), \ + (val), sizeof(pcp)); \ + preempt_enable(); \ + __retval; \ +}) + #define _percpu_add(pcp, val) \ - __percpu_add(raw_cpu_ptr(&(pcp)), val, sizeof(pcp)) + _pcp_protect(__percpu_add, pcp, val) -#define _percpu_add_return(pcp, val) (typeof(pcp)) (_percpu_add(pcp, val)) +#define _percpu_add_return(pcp, val) _percpu_add(pcp, val) #define _percpu_and(pcp, val) \ - __percpu_and(raw_cpu_ptr(&(pcp)), val, sizeof(pcp)) + _pcp_protect(__percpu_and, pcp, val) #define _percpu_or(pcp, val) \ - __percpu_or(raw_cpu_ptr(&(pcp)), val, sizeof(pcp)) - -#define _percpu_read(pcp) (typeof(pcp)) \ - (__percpu_read(raw_cpu_ptr(&(pcp)), sizeof(pcp))) - -#define _percpu_write(pcp, val) \ - __percpu_write(raw_cpu_ptr(&(pcp)), (unsigned long)(val), sizeof(pcp)) + _pcp_protect(__percpu_or, pcp, val) #define _percpu_xchg(pcp, val) (typeof(pcp)) \ - (__percpu_xchg(raw_cpu_ptr(&(pcp)), (unsigned long)(val), sizeof(pcp))) + _pcp_protect(__percpu_xchg, pcp, (unsigned long)(val)) #define this_cpu_add_1(pcp, val) _percpu_add(pcp, val) #define this_cpu_add_2(pcp, val) _percpu_add(pcp, val) diff --git a/arch/arm64/include/asm/proc-fns.h b/arch/arm64/include/asm/proc-fns.h index 9a8fd84f8fb2..941c375616e2 100644 --- a/arch/arm64/include/asm/proc-fns.h +++ b/arch/arm64/include/asm/proc-fns.h @@ -39,7 +39,11 @@ extern u64 cpu_do_resume(phys_addr_t ptr, u64 idmap_ttbr); #include <asm/memory.h> -#define cpu_switch_mm(pgd,mm) cpu_do_switch_mm(virt_to_phys(pgd),mm) +#define cpu_switch_mm(pgd,mm) \ +do { \ + BUG_ON(pgd == swapper_pg_dir); \ + cpu_do_switch_mm(virt_to_phys(pgd),mm); \ +} while (0) #define cpu_get_pgd() \ ({ \ diff --git a/arch/arm64/include/asm/tlb.h b/arch/arm64/include/asm/tlb.h index c028fe37456f..53d9c354219f 100644 --- a/arch/arm64/include/asm/tlb.h +++ b/arch/arm64/include/asm/tlb.h @@ -48,6 +48,7 @@ static inline void tlb_flush(struct mmu_gather *tlb) static inline void __pte_free_tlb(struct mmu_gather *tlb, pgtable_t pte, unsigned long addr) { + __flush_tlb_pgtable(tlb->mm, addr); pgtable_page_dtor(pte); tlb_remove_entry(tlb, pte); } @@ -56,6 +57,7 @@ static inline void __pte_free_tlb(struct mmu_gather *tlb, pgtable_t pte, static inline void __pmd_free_tlb(struct mmu_gather *tlb, pmd_t *pmdp, unsigned long addr) { + __flush_tlb_pgtable(tlb->mm, addr); tlb_remove_entry(tlb, virt_to_page(pmdp)); } #endif @@ -64,6 +66,7 @@ static inline void __pmd_free_tlb(struct mmu_gather *tlb, pmd_t *pmdp, static inline void __pud_free_tlb(struct mmu_gather *tlb, pud_t *pudp, unsigned long addr) { + __flush_tlb_pgtable(tlb->mm, addr); tlb_remove_entry(tlb, virt_to_page(pudp)); } #endif diff --git a/arch/arm64/include/asm/tlbflush.h b/arch/arm64/include/asm/tlbflush.h index 4abe9b945f77..c3bb05b98616 100644 --- a/arch/arm64/include/asm/tlbflush.h +++ b/arch/arm64/include/asm/tlbflush.h @@ -144,6 +144,19 @@ static inline void flush_tlb_kernel_range(unsigned long start, unsigned long end } /* + * Used to invalidate the TLB (walk caches) corresponding to intermediate page + * table levels (pgd/pud/pmd). + */ +static inline void __flush_tlb_pgtable(struct mm_struct *mm, + unsigned long uaddr) +{ + unsigned long addr = uaddr >> 12 | ((unsigned long)ASID(mm) << 48); + + dsb(ishst); + asm("tlbi vae1is, %0" : : "r" (addr)); + dsb(ish); +} +/* * On AArch64, the cache coherency is handled via the set_pte_at() function. */ static inline void update_mmu_cache(struct vm_area_struct *vma, diff --git a/arch/arm64/include/uapi/asm/kvm.h b/arch/arm64/include/uapi/asm/kvm.h index 3ef77a466018..c154c0b7eb60 100644 --- a/arch/arm64/include/uapi/asm/kvm.h +++ b/arch/arm64/include/uapi/asm/kvm.h @@ -191,6 +191,9 @@ struct kvm_arch_memory_slot { /* Highest supported SPI, from VGIC_NR_IRQS */ #define KVM_ARM_IRQ_GIC_MAX 127 +/* One single KVM irqchip, ie. the VGIC */ +#define KVM_NR_IRQCHIPS 1 + /* PSCI interface */ #define KVM_PSCI_FN_BASE 0x95c1ba5e #define KVM_PSCI_FN(n) (KVM_PSCI_FN_BASE + (n)) diff --git a/arch/arm64/kernel/efi.c b/arch/arm64/kernel/efi.c index b42c7b480e1e..ab21e0d58278 100644 --- a/arch/arm64/kernel/efi.c +++ b/arch/arm64/kernel/efi.c @@ -337,7 +337,11 @@ core_initcall(arm64_dmi_init); static void efi_set_pgd(struct mm_struct *mm) { - cpu_switch_mm(mm->pgd, mm); + if (mm == &init_mm) + cpu_set_reserved_ttbr0(); + else + cpu_switch_mm(mm->pgd, mm); + flush_tlb_all(); if (icache_is_aivivt()) __flush_icache_all(); @@ -354,3 +358,12 @@ void efi_virtmap_unload(void) efi_set_pgd(current->active_mm); preempt_enable(); } + +/* + * UpdateCapsule() depends on the system being shutdown via + * ResetSystem(). + */ +bool efi_poweroff_required(void) +{ + return efi_enabled(EFI_RUNTIME_SERVICES); +} diff --git a/arch/arm64/kernel/head.S b/arch/arm64/kernel/head.S index 8ce88e08c030..07f930540f4a 100644 --- a/arch/arm64/kernel/head.S +++ b/arch/arm64/kernel/head.S @@ -585,8 +585,8 @@ ENDPROC(set_cpu_boot_mode_flag) * zeroing of .bss would clobber it. */ .pushsection .data..cacheline_aligned -ENTRY(__boot_cpu_mode) .align L1_CACHE_SHIFT +ENTRY(__boot_cpu_mode) .long BOOT_CPU_MODE_EL2 .long 0 .popsection diff --git a/arch/arm64/kernel/process.c b/arch/arm64/kernel/process.c index fde9923af859..c6b1f3b96f45 100644 --- a/arch/arm64/kernel/process.c +++ b/arch/arm64/kernel/process.c @@ -21,6 +21,7 @@ #include <stdarg.h> #include <linux/compat.h> +#include <linux/efi.h> #include <linux/export.h> #include <linux/sched.h> #include <linux/kernel.h> @@ -150,6 +151,13 @@ void machine_restart(char *cmd) local_irq_disable(); smp_send_stop(); + /* + * UpdateCapsule() depends on the system being reset via + * ResetSystem(). + */ + if (efi_enabled(EFI_RUNTIME_SERVICES)) + efi_reboot(reboot_mode, NULL); + /* Now call the architecture specific reboot code. */ if (arm_pm_restart) arm_pm_restart(reboot_mode, cmd); diff --git a/arch/arm64/kernel/vdso.c b/arch/arm64/kernel/vdso.c index 32aeea083d93..ec37ab3f524f 100644 --- a/arch/arm64/kernel/vdso.c +++ b/arch/arm64/kernel/vdso.c @@ -200,7 +200,7 @@ up_fail: void update_vsyscall(struct timekeeper *tk) { struct timespec xtime_coarse; - u32 use_syscall = strcmp(tk->tkr.clock->name, "arch_sys_counter"); + u32 use_syscall = strcmp(tk->tkr_mono.clock->name, "arch_sys_counter"); ++vdso_data->tb_seq_count; smp_wmb(); @@ -213,11 +213,11 @@ void update_vsyscall(struct timekeeper *tk) vdso_data->wtm_clock_nsec = tk->wall_to_monotonic.tv_nsec; if (!use_syscall) { - vdso_data->cs_cycle_last = tk->tkr.cycle_last; + vdso_data->cs_cycle_last = tk->tkr_mono.cycle_last; vdso_data->xtime_clock_sec = tk->xtime_sec; - vdso_data->xtime_clock_nsec = tk->tkr.xtime_nsec; - vdso_data->cs_mult = tk->tkr.mult; - vdso_data->cs_shift = tk->tkr.shift; + vdso_data->xtime_clock_nsec = tk->tkr_mono.xtime_nsec; + vdso_data->cs_mult = tk->tkr_mono.mult; + vdso_data->cs_shift = tk->tkr_mono.shift; } smp_wmb(); diff --git a/arch/arm64/kvm/Kconfig b/arch/arm64/kvm/Kconfig index f5590c81d95f..5105e297ed5f 100644 --- a/arch/arm64/kvm/Kconfig +++ b/arch/arm64/kvm/Kconfig @@ -18,6 +18,7 @@ if VIRTUALIZATION config KVM bool "Kernel-based Virtual Machine (KVM) support" + depends on OF select MMU_NOTIFIER select PREEMPT_NOTIFIERS select ANON_INODES @@ -25,10 +26,10 @@ config KVM select HAVE_KVM_ARCH_TLB_FLUSH_ALL select KVM_MMIO select KVM_ARM_HOST - select KVM_ARM_VGIC - select KVM_ARM_TIMER select KVM_GENERIC_DIRTYLOG_READ_PROTECT select SRCU + select HAVE_KVM_EVENTFD + select HAVE_KVM_IRQFD ---help--- Support hosting virtualized guest machines. @@ -50,17 +51,4 @@ config KVM_ARM_MAX_VCPUS large, so only choose a reasonable number that you expect to actually use. -config KVM_ARM_VGIC - bool - depends on KVM_ARM_HOST && OF - select HAVE_KVM_IRQCHIP - ---help--- - Adds support for a hardware assisted, in-kernel GIC emulation. - -config KVM_ARM_TIMER - bool - depends on KVM_ARM_VGIC - ---help--- - Adds support for the Architected Timers in virtual machines. - endif # VIRTUALIZATION diff --git a/arch/arm64/kvm/Makefile b/arch/arm64/kvm/Makefile index 4e6e09ee4033..d5904f876cdb 100644 --- a/arch/arm64/kvm/Makefile +++ b/arch/arm64/kvm/Makefile @@ -2,7 +2,7 @@ # Makefile for Kernel-based Virtual Machine module # -ccflags-y += -Ivirt/kvm -Iarch/arm64/kvm +ccflags-y += -Iarch/arm64/kvm CFLAGS_arm.o := -I. CFLAGS_mmu.o := -I. @@ -11,7 +11,7 @@ ARM=../../../arch/arm/kvm obj-$(CONFIG_KVM_ARM_HOST) += kvm.o -kvm-$(CONFIG_KVM_ARM_HOST) += $(KVM)/kvm_main.o $(KVM)/coalesced_mmio.o +kvm-$(CONFIG_KVM_ARM_HOST) += $(KVM)/kvm_main.o $(KVM)/coalesced_mmio.o $(KVM)/eventfd.o kvm-$(CONFIG_KVM_ARM_HOST) += $(ARM)/arm.o $(ARM)/mmu.o $(ARM)/mmio.o kvm-$(CONFIG_KVM_ARM_HOST) += $(ARM)/psci.o $(ARM)/perf.o @@ -19,11 +19,11 @@ kvm-$(CONFIG_KVM_ARM_HOST) += emulate.o inject_fault.o regmap.o kvm-$(CONFIG_KVM_ARM_HOST) += hyp.o hyp-init.o handle_exit.o kvm-$(CONFIG_KVM_ARM_HOST) += guest.o reset.o sys_regs.o sys_regs_generic_v8.o -kvm-$(CONFIG_KVM_ARM_VGIC) += $(KVM)/arm/vgic.o -kvm-$(CONFIG_KVM_ARM_VGIC) += $(KVM)/arm/vgic-v2.o -kvm-$(CONFIG_KVM_ARM_VGIC) += $(KVM)/arm/vgic-v2-emul.o -kvm-$(CONFIG_KVM_ARM_VGIC) += vgic-v2-switch.o -kvm-$(CONFIG_KVM_ARM_VGIC) += $(KVM)/arm/vgic-v3.o -kvm-$(CONFIG_KVM_ARM_VGIC) += $(KVM)/arm/vgic-v3-emul.o -kvm-$(CONFIG_KVM_ARM_VGIC) += vgic-v3-switch.o -kvm-$(CONFIG_KVM_ARM_TIMER) += $(KVM)/arm/arch_timer.o +kvm-$(CONFIG_KVM_ARM_HOST) += $(KVM)/arm/vgic.o +kvm-$(CONFIG_KVM_ARM_HOST) += $(KVM)/arm/vgic-v2.o +kvm-$(CONFIG_KVM_ARM_HOST) += $(KVM)/arm/vgic-v2-emul.o +kvm-$(CONFIG_KVM_ARM_HOST) += vgic-v2-switch.o +kvm-$(CONFIG_KVM_ARM_HOST) += $(KVM)/arm/vgic-v3.o +kvm-$(CONFIG_KVM_ARM_HOST) += $(KVM)/arm/vgic-v3-emul.o +kvm-$(CONFIG_KVM_ARM_HOST) += vgic-v3-switch.o +kvm-$(CONFIG_KVM_ARM_HOST) += $(KVM)/arm/arch_timer.o diff --git a/arch/arm64/mm/dma-mapping.c b/arch/arm64/mm/dma-mapping.c index 58e0c2bdde04..ef7d112f5ce0 100644 --- a/arch/arm64/mm/dma-mapping.c +++ b/arch/arm64/mm/dma-mapping.c @@ -51,7 +51,7 @@ static int __init early_coherent_pool(char *p) } early_param("coherent_pool", early_coherent_pool); -static void *__alloc_from_pool(size_t size, struct page **ret_page) +static void *__alloc_from_pool(size_t size, struct page **ret_page, gfp_t flags) { unsigned long val; void *ptr = NULL; @@ -67,6 +67,8 @@ static void *__alloc_from_pool(size_t size, struct page **ret_page) *ret_page = phys_to_page(phys); ptr = (void *)val; + if (flags & __GFP_ZERO) + memset(ptr, 0, size); } return ptr; @@ -101,6 +103,7 @@ static void *__dma_alloc_coherent(struct device *dev, size_t size, flags |= GFP_DMA; if (IS_ENABLED(CONFIG_DMA_CMA) && (flags & __GFP_WAIT)) { struct page *page; + void *addr; size = PAGE_ALIGN(size); page = dma_alloc_from_contiguous(dev, size >> PAGE_SHIFT, @@ -109,7 +112,10 @@ static void *__dma_alloc_coherent(struct device *dev, size_t size, return NULL; *dma_handle = phys_to_dma(dev, page_to_phys(page)); - return page_address(page); + addr = page_address(page); + if (flags & __GFP_ZERO) + memset(addr, 0, size); + return addr; } else { return swiotlb_alloc_coherent(dev, size, dma_handle, flags); } @@ -146,7 +152,7 @@ static void *__dma_alloc(struct device *dev, size_t size, if (!coherent && !(flags & __GFP_WAIT)) { struct page *page = NULL; - void *addr = __alloc_from_pool(size, &page); + void *addr = __alloc_from_pool(size, &page, flags); if (addr) *dma_handle = phys_to_dma(dev, page_to_phys(page)); diff --git a/arch/arm64/mm/pageattr.c b/arch/arm64/mm/pageattr.c index bb0ea94c4ba1..1d3ec3ddd84b 100644 --- a/arch/arm64/mm/pageattr.c +++ b/arch/arm64/mm/pageattr.c @@ -51,7 +51,10 @@ static int change_memory_common(unsigned long addr, int numpages, WARN_ON_ONCE(1); } - if (!is_module_address(start) || !is_module_address(end - 1)) + if (start < MODULES_VADDR || start >= MODULES_END) + return -EINVAL; + + if (end < MODULES_VADDR || end >= MODULES_END) return -EINVAL; data.set_mask = set_mask; diff --git a/arch/c6x/include/asm/pgtable.h b/arch/c6x/include/asm/pgtable.h index 78d4483ba40c..ec4db6df5e0d 100644 --- a/arch/c6x/include/asm/pgtable.h +++ b/arch/c6x/include/asm/pgtable.h @@ -67,6 +67,11 @@ extern unsigned long empty_zero_page; */ #define pgtable_cache_init() do { } while (0) +/* + * c6x is !MMU, so define the simpliest implementation + */ +#define pgprot_writecombine pgprot_noncached + #include <asm-generic/pgtable.h> #endif /* _ASM_C6X_PGTABLE_H */ diff --git a/arch/metag/include/asm/io.h b/arch/metag/include/asm/io.h index 9359e5048442..d5779b0ec573 100644 --- a/arch/metag/include/asm/io.h +++ b/arch/metag/include/asm/io.h @@ -2,6 +2,7 @@ #define _ASM_METAG_IO_H #include <linux/types.h> +#include <asm/pgtable-bits.h> #define IO_SPACE_LIMIT 0 diff --git a/arch/metag/include/asm/pgtable-bits.h b/arch/metag/include/asm/pgtable-bits.h new file mode 100644 index 000000000000..25ba6729f496 --- /dev/null +++ b/arch/metag/include/asm/pgtable-bits.h @@ -0,0 +1,104 @@ +/* + * Meta page table definitions. + */ + +#ifndef _METAG_PGTABLE_BITS_H +#define _METAG_PGTABLE_BITS_H + +#include <asm/metag_mem.h> + +/* + * Definitions for MMU descriptors + * + * These are the hardware bits in the MMCU pte entries. + * Derived from the Meta toolkit headers. + */ +#define _PAGE_PRESENT MMCU_ENTRY_VAL_BIT +#define _PAGE_WRITE MMCU_ENTRY_WR_BIT +#define _PAGE_PRIV MMCU_ENTRY_PRIV_BIT +/* Write combine bit - this can cause writes to occur out of order */ +#define _PAGE_WR_COMBINE MMCU_ENTRY_WRC_BIT +/* Sys coherent bit - this bit is never used by Linux */ +#define _PAGE_SYS_COHERENT MMCU_ENTRY_SYS_BIT +#define _PAGE_ALWAYS_ZERO_1 0x020 +#define _PAGE_CACHE_CTRL0 0x040 +#define _PAGE_CACHE_CTRL1 0x080 +#define _PAGE_ALWAYS_ZERO_2 0x100 +#define _PAGE_ALWAYS_ZERO_3 0x200 +#define _PAGE_ALWAYS_ZERO_4 0x400 +#define _PAGE_ALWAYS_ZERO_5 0x800 + +/* These are software bits that we stuff into the gaps in the hardware + * pte entries that are not used. Note, these DO get stored in the actual + * hardware, but the hardware just does not use them. + */ +#define _PAGE_ACCESSED _PAGE_ALWAYS_ZERO_1 +#define _PAGE_DIRTY _PAGE_ALWAYS_ZERO_2 + +/* Pages owned, and protected by, the kernel. */ +#define _PAGE_KERNEL _PAGE_PRIV + +/* No cacheing of this page */ +#define _PAGE_CACHE_WIN0 (MMCU_CWIN_UNCACHED << MMCU_ENTRY_CWIN_S) +/* burst cacheing - good for data streaming */ +#define _PAGE_CACHE_WIN1 (MMCU_CWIN_BURST << MMCU_ENTRY_CWIN_S) +/* One cache way per thread */ +#define _PAGE_CACHE_WIN2 (MMCU_CWIN_C1SET << MMCU_ENTRY_CWIN_S) +/* Full on cacheing */ +#define _PAGE_CACHE_WIN3 (MMCU_CWIN_CACHED << MMCU_ENTRY_CWIN_S) + +#define _PAGE_CACHEABLE (_PAGE_CACHE_WIN3 | _PAGE_WR_COMBINE) + +/* which bits are used for cache control ... */ +#define _PAGE_CACHE_MASK (_PAGE_CACHE_CTRL0 | _PAGE_CACHE_CTRL1 | \ + _PAGE_WR_COMBINE) + +/* This is a mask of the bits that pte_modify is allowed to change. */ +#define _PAGE_CHG_MASK (PAGE_MASK) + +#define _PAGE_SZ_SHIFT 1 +#define _PAGE_SZ_4K (0x0) +#define _PAGE_SZ_8K (0x1 << _PAGE_SZ_SHIFT) +#define _PAGE_SZ_16K (0x2 << _PAGE_SZ_SHIFT) +#define _PAGE_SZ_32K (0x3 << _PAGE_SZ_SHIFT) +#define _PAGE_SZ_64K (0x4 << _PAGE_SZ_SHIFT) +#define _PAGE_SZ_128K (0x5 << _PAGE_SZ_SHIFT) +#define _PAGE_SZ_256K (0x6 << _PAGE_SZ_SHIFT) +#define _PAGE_SZ_512K (0x7 << _PAGE_SZ_SHIFT) +#define _PAGE_SZ_1M (0x8 << _PAGE_SZ_SHIFT) +#define _PAGE_SZ_2M (0x9 << _PAGE_SZ_SHIFT) +#define _PAGE_SZ_4M (0xa << _PAGE_SZ_SHIFT) +#define _PAGE_SZ_MASK (0xf << _PAGE_SZ_SHIFT) + +#if defined(CONFIG_PAGE_SIZE_4K) +#define _PAGE_SZ (_PAGE_SZ_4K) +#elif defined(CONFIG_PAGE_SIZE_8K) +#define _PAGE_SZ (_PAGE_SZ_8K) +#elif defined(CONFIG_PAGE_SIZE_16K) +#define _PAGE_SZ (_PAGE_SZ_16K) +#endif +#define _PAGE_TABLE (_PAGE_SZ | _PAGE_PRESENT) + +#if defined(CONFIG_HUGETLB_PAGE_SIZE_8K) +# define _PAGE_SZHUGE (_PAGE_SZ_8K) +#elif defined(CONFIG_HUGETLB_PAGE_SIZE_16K) +# define _PAGE_SZHUGE (_PAGE_SZ_16K) +#elif defined(CONFIG_HUGETLB_PAGE_SIZE_32K) +# define _PAGE_SZHUGE (_PAGE_SZ_32K) +#elif defined(CONFIG_HUGETLB_PAGE_SIZE_64K) +# define _PAGE_SZHUGE (_PAGE_SZ_64K) +#elif defined(CONFIG_HUGETLB_PAGE_SIZE_128K) +# define _PAGE_SZHUGE (_PAGE_SZ_128K) +#elif defined(CONFIG_HUGETLB_PAGE_SIZE_256K) +# define _PAGE_SZHUGE (_PAGE_SZ_256K) +#elif defined(CONFIG_HUGETLB_PAGE_SIZE_512K) +# define _PAGE_SZHUGE (_PAGE_SZ_512K) +#elif defined(CONFIG_HUGETLB_PAGE_SIZE_1M) +# define _PAGE_SZHUGE (_PAGE_SZ_1M) +#elif defined(CONFIG_HUGETLB_PAGE_SIZE_2M) +# define _PAGE_SZHUGE (_PAGE_SZ_2M) +#elif defined(CONFIG_HUGETLB_PAGE_SIZE_4M) +# define _PAGE_SZHUGE (_PAGE_SZ_4M) +#endif + +#endif /* _METAG_PGTABLE_BITS_H */ diff --git a/arch/metag/include/asm/pgtable.h b/arch/metag/include/asm/pgtable.h index d0604c0a8702..ffa3a3a2ecad 100644 --- a/arch/metag/include/asm/pgtable.h +++ b/arch/metag/include/asm/pgtable.h @@ -5,6 +5,7 @@ #ifndef _METAG_PGTABLE_H #define _METAG_PGTABLE_H +#include <asm/pgtable-bits.h> #include <asm-generic/pgtable-nopmd.h> /* Invalid regions on Meta: 0x00000000-0x001FFFFF and 0xFFFF0000-0xFFFFFFFF */ @@ -21,100 +22,6 @@ #endif /* - * Definitions for MMU descriptors - * - * These are the hardware bits in the MMCU pte entries. - * Derived from the Meta toolkit headers. - */ -#define _PAGE_PRESENT MMCU_ENTRY_VAL_BIT -#define _PAGE_WRITE MMCU_ENTRY_WR_BIT -#define _PAGE_PRIV MMCU_ENTRY_PRIV_BIT -/* Write combine bit - this can cause writes to occur out of order */ -#define _PAGE_WR_COMBINE MMCU_ENTRY_WRC_BIT -/* Sys coherent bit - this bit is never used by Linux */ -#define _PAGE_SYS_COHERENT MMCU_ENTRY_SYS_BIT -#define _PAGE_ALWAYS_ZERO_1 0x020 -#define _PAGE_CACHE_CTRL0 0x040 -#define _PAGE_CACHE_CTRL1 0x080 -#define _PAGE_ALWAYS_ZERO_2 0x100 -#define _PAGE_ALWAYS_ZERO_3 0x200 -#define _PAGE_ALWAYS_ZERO_4 0x400 -#define _PAGE_ALWAYS_ZERO_5 0x800 - -/* These are software bits that we stuff into the gaps in the hardware - * pte entries that are not used. Note, these DO get stored in the actual - * hardware, but the hardware just does not use them. - */ -#define _PAGE_ACCESSED _PAGE_ALWAYS_ZERO_1 -#define _PAGE_DIRTY _PAGE_ALWAYS_ZERO_2 - -/* Pages owned, and protected by, the kernel. */ -#define _PAGE_KERNEL _PAGE_PRIV - -/* No cacheing of this page */ -#define _PAGE_CACHE_WIN0 (MMCU_CWIN_UNCACHED << MMCU_ENTRY_CWIN_S) -/* burst cacheing - good for data streaming */ -#define _PAGE_CACHE_WIN1 (MMCU_CWIN_BURST << MMCU_ENTRY_CWIN_S) -/* One cache way per thread */ -#define _PAGE_CACHE_WIN2 (MMCU_CWIN_C1SET << MMCU_ENTRY_CWIN_S) -/* Full on cacheing */ -#define _PAGE_CACHE_WIN3 (MMCU_CWIN_CACHED << MMCU_ENTRY_CWIN_S) - -#define _PAGE_CACHEABLE (_PAGE_CACHE_WIN3 | _PAGE_WR_COMBINE) - -/* which bits are used for cache control ... */ -#define _PAGE_CACHE_MASK (_PAGE_CACHE_CTRL0 | _PAGE_CACHE_CTRL1 | \ - _PAGE_WR_COMBINE) - -/* This is a mask of the bits that pte_modify is allowed to change. */ -#define _PAGE_CHG_MASK (PAGE_MASK) - -#define _PAGE_SZ_SHIFT 1 -#define _PAGE_SZ_4K (0x0) -#define _PAGE_SZ_8K (0x1 << _PAGE_SZ_SHIFT) -#define _PAGE_SZ_16K (0x2 << _PAGE_SZ_SHIFT) -#define _PAGE_SZ_32K (0x3 << _PAGE_SZ_SHIFT) -#define _PAGE_SZ_64K (0x4 << _PAGE_SZ_SHIFT) -#define _PAGE_SZ_128K (0x5 << _PAGE_SZ_SHIFT) -#define _PAGE_SZ_256K (0x6 << _PAGE_SZ_SHIFT) -#define _PAGE_SZ_512K (0x7 << _PAGE_SZ_SHIFT) -#define _PAGE_SZ_1M (0x8 << _PAGE_SZ_SHIFT) -#define _PAGE_SZ_2M (0x9 << _PAGE_SZ_SHIFT) -#define _PAGE_SZ_4M (0xa << _PAGE_SZ_SHIFT) -#define _PAGE_SZ_MASK (0xf << _PAGE_SZ_SHIFT) - -#if defined(CONFIG_PAGE_SIZE_4K) -#define _PAGE_SZ (_PAGE_SZ_4K) -#elif defined(CONFIG_PAGE_SIZE_8K) -#define _PAGE_SZ (_PAGE_SZ_8K) -#elif defined(CONFIG_PAGE_SIZE_16K) -#define _PAGE_SZ (_PAGE_SZ_16K) -#endif -#define _PAGE_TABLE (_PAGE_SZ | _PAGE_PRESENT) - -#if defined(CONFIG_HUGETLB_PAGE_SIZE_8K) -# define _PAGE_SZHUGE (_PAGE_SZ_8K) -#elif defined(CONFIG_HUGETLB_PAGE_SIZE_16K) -# define _PAGE_SZHUGE (_PAGE_SZ_16K) -#elif defined(CONFIG_HUGETLB_PAGE_SIZE_32K) -# define _PAGE_SZHUGE (_PAGE_SZ_32K) -#elif defined(CONFIG_HUGETLB_PAGE_SIZE_64K) -# define _PAGE_SZHUGE (_PAGE_SZ_64K) -#elif defined(CONFIG_HUGETLB_PAGE_SIZE_128K) -# define _PAGE_SZHUGE (_PAGE_SZ_128K) -#elif defined(CONFIG_HUGETLB_PAGE_SIZE_256K) -# define _PAGE_SZHUGE (_PAGE_SZ_256K) -#elif defined(CONFIG_HUGETLB_PAGE_SIZE_512K) -# define _PAGE_SZHUGE (_PAGE_SZ_512K) -#elif defined(CONFIG_HUGETLB_PAGE_SIZE_1M) -# define _PAGE_SZHUGE (_PAGE_SZ_1M) -#elif defined(CONFIG_HUGETLB_PAGE_SIZE_2M) -# define _PAGE_SZHUGE (_PAGE_SZ_2M) -#elif defined(CONFIG_HUGETLB_PAGE_SIZE_4M) -# define _PAGE_SZHUGE (_PAGE_SZ_4M) -#endif - -/* * The Linux memory management assumes a three-level page table setup. On * Meta, we use that, but "fold" the mid level into the top-level page * table. diff --git a/arch/microblaze/kernel/entry.S b/arch/microblaze/kernel/entry.S index 0536bc021cc6..ef548510b951 100644 --- a/arch/microblaze/kernel/entry.S +++ b/arch/microblaze/kernel/entry.S @@ -348,8 +348,9 @@ C_ENTRY(_user_exception): * The LP register should point to the location where the called function * should return. [note that MAKE_SYS_CALL uses label 1] */ /* See if the system call number is valid */ + blti r12, 5f addi r11, r12, -__NR_syscalls; - bgei r11,5f; + bgei r11, 5f; /* Figure out which function to use for this system call. */ /* Note Microblaze barrel shift is optional, so don't rely on it */ add r12, r12, r12; /* convert num -> ptr */ @@ -375,7 +376,7 @@ C_ENTRY(_user_exception): /* The syscall number is invalid, return an error. */ 5: - rtsd r15, 8; /* looks like a normal subroutine return */ + braid ret_from_trap addi r3, r0, -ENOSYS; /* Entry point used to return from a syscall/trap */ @@ -411,7 +412,7 @@ C_ENTRY(ret_from_trap): bri 1b /* Maybe handle a signal */ -5: +5: andi r11, r19, _TIF_SIGPENDING | _TIF_NOTIFY_RESUME; beqi r11, 4f; /* Signals to handle, handle them */ diff --git a/arch/mips/include/asm/asmmacro-32.h b/arch/mips/include/asm/asmmacro-32.h index cdac7b3eeaf7..80386470d3a4 100644 --- a/arch/mips/include/asm/asmmacro-32.h +++ b/arch/mips/include/asm/asmmacro-32.h @@ -16,38 +16,38 @@ .set push SET_HARDFLOAT cfc1 \tmp, fcr31 - swc1 $f0, THREAD_FPR0_LS64(\thread) - swc1 $f1, THREAD_FPR1_LS64(\thread) - swc1 $f2, THREAD_FPR2_LS64(\thread) - swc1 $f3, THREAD_FPR3_LS64(\thread) - swc1 $f4, THREAD_FPR4_LS64(\thread) - swc1 $f5, THREAD_FPR5_LS64(\thread) - swc1 $f6, THREAD_FPR6_LS64(\thread) - swc1 $f7, THREAD_FPR7_LS64(\thread) - swc1 $f8, THREAD_FPR8_LS64(\thread) - swc1 $f9, THREAD_FPR9_LS64(\thread) - swc1 $f10, THREAD_FPR10_LS64(\thread) - swc1 $f11, THREAD_FPR11_LS64(\thread) - swc1 $f12, THREAD_FPR12_LS64(\thread) - swc1 $f13, THREAD_FPR13_LS64(\thread) - swc1 $f14, THREAD_FPR14_LS64(\thread) - swc1 $f15, THREAD_FPR15_LS64(\thread) - swc1 $f16, THREAD_FPR16_LS64(\thread) - swc1 $f17, THREAD_FPR17_LS64(\thread) - swc1 $f18, THREAD_FPR18_LS64(\thread) - swc1 $f19, THREAD_FPR19_LS64(\thread) - swc1 $f20, THREAD_FPR20_LS64(\thread) - swc1 $f21, THREAD_FPR21_LS64(\thread) - swc1 $f22, THREAD_FPR22_LS64(\thread) - swc1 $f23, THREAD_FPR23_LS64(\thread) - swc1 $f24, THREAD_FPR24_LS64(\thread) - swc1 $f25, THREAD_FPR25_LS64(\thread) - swc1 $f26, THREAD_FPR26_LS64(\thread) - swc1 $f27, THREAD_FPR27_LS64(\thread) - swc1 $f28, THREAD_FPR28_LS64(\thread) - swc1 $f29, THREAD_FPR29_LS64(\thread) - swc1 $f30, THREAD_FPR30_LS64(\thread) - swc1 $f31, THREAD_FPR31_LS64(\thread) + swc1 $f0, THREAD_FPR0(\thread) + swc1 $f1, THREAD_FPR1(\thread) + swc1 $f2, THREAD_FPR2(\thread) + swc1 $f3, THREAD_FPR3(\thread) + swc1 $f4, THREAD_FPR4(\thread) + swc1 $f5, THREAD_FPR5(\thread) + swc1 $f6, THREAD_FPR6(\thread) + swc1 $f7, THREAD_FPR7(\thread) + swc1 $f8, THREAD_FPR8(\thread) + swc1 $f9, THREAD_FPR9(\thread) + swc1 $f10, THREAD_FPR10(\thread) + swc1 $f11, THREAD_FPR11(\thread) + swc1 $f12, THREAD_FPR12(\thread) + swc1 $f13, THREAD_FPR13(\thread) + swc1 $f14, THREAD_FPR14(\thread) + swc1 $f15, THREAD_FPR15(\thread) + swc1 $f16, THREAD_FPR16(\thread) + swc1 $f17, THREAD_FPR17(\thread) + swc1 $f18, THREAD_FPR18(\thread) + swc1 $f19, THREAD_FPR19(\thread) + swc1 $f20, THREAD_FPR20(\thread) + swc1 $f21, THREAD_FPR21(\thread) + swc1 $f22, THREAD_FPR22(\thread) + swc1 $f23, THREAD_FPR23(\thread) + swc1 $f24, THREAD_FPR24(\thread) + swc1 $f25, THREAD_FPR25(\thread) + swc1 $f26, THREAD_FPR26(\thread) + swc1 $f27, THREAD_FPR27(\thread) + swc1 $f28, THREAD_FPR28(\thread) + swc1 $f29, THREAD_FPR29(\thread) + swc1 $f30, THREAD_FPR30(\thread) + swc1 $f31, THREAD_FPR31(\thread) sw \tmp, THREAD_FCR31(\thread) .set pop .endm @@ -56,38 +56,38 @@ .set push SET_HARDFLOAT lw \tmp, THREAD_FCR31(\thread) - lwc1 $f0, THREAD_FPR0_LS64(\thread) - lwc1 $f1, THREAD_FPR1_LS64(\thread) - lwc1 $f2, THREAD_FPR2_LS64(\thread) - lwc1 $f3, THREAD_FPR3_LS64(\thread) - lwc1 $f4, THREAD_FPR4_LS64(\thread) - lwc1 $f5, THREAD_FPR5_LS64(\thread) - lwc1 $f6, THREAD_FPR6_LS64(\thread) - lwc1 $f7, THREAD_FPR7_LS64(\thread) - lwc1 $f8, THREAD_FPR8_LS64(\thread) - lwc1 $f9, THREAD_FPR9_LS64(\thread) - lwc1 $f10, THREAD_FPR10_LS64(\thread) - lwc1 $f11, THREAD_FPR11_LS64(\thread) - lwc1 $f12, THREAD_FPR12_LS64(\thread) - lwc1 $f13, THREAD_FPR13_LS64(\thread) - lwc1 $f14, THREAD_FPR14_LS64(\thread) - lwc1 $f15, THREAD_FPR15_LS64(\thread) - lwc1 $f16, THREAD_FPR16_LS64(\thread) - lwc1 $f17, THREAD_FPR17_LS64(\thread) - lwc1 $f18, THREAD_FPR18_LS64(\thread) - lwc1 $f19, THREAD_FPR19_LS64(\thread) - lwc1 $f20, THREAD_FPR20_LS64(\thread) - lwc1 $f21, THREAD_FPR21_LS64(\thread) - lwc1 $f22, THREAD_FPR22_LS64(\thread) - lwc1 $f23, THREAD_FPR23_LS64(\thread) - lwc1 $f24, THREAD_FPR24_LS64(\thread) - lwc1 $f25, THREAD_FPR25_LS64(\thread) - lwc1 $f26, THREAD_FPR26_LS64(\thread) - lwc1 $f27, THREAD_FPR27_LS64(\thread) - lwc1 $f28, THREAD_FPR28_LS64(\thread) - lwc1 $f29, THREAD_FPR29_LS64(\thread) - lwc1 $f30, THREAD_FPR30_LS64(\thread) - lwc1 $f31, THREAD_FPR31_LS64(\thread) + lwc1 $f0, THREAD_FPR0(\thread) + lwc1 $f1, THREAD_FPR1(\thread) + lwc1 $f2, THREAD_FPR2(\thread) + lwc1 $f3, THREAD_FPR3(\thread) + lwc1 $f4, THREAD_FPR4(\thread) + lwc1 $f5, THREAD_FPR5(\thread) + lwc1 $f6, THREAD_FPR6(\thread) + lwc1 $f7, THREAD_FPR7(\thread) + lwc1 $f8, THREAD_FPR8(\thread) + lwc1 $f9, THREAD_FPR9(\thread) + lwc1 $f10, THREAD_FPR10(\thread) + lwc1 $f11, THREAD_FPR11(\thread) + lwc1 $f12, THREAD_FPR12(\thread) + lwc1 $f13, THREAD_FPR13(\thread) + lwc1 $f14, THREAD_FPR14(\thread) + lwc1 $f15, THREAD_FPR15(\thread) + lwc1 $f16, THREAD_FPR16(\thread) + lwc1 $f17, THREAD_FPR17(\thread) + lwc1 $f18, THREAD_FPR18(\thread) + lwc1 $f19, THREAD_FPR19(\thread) + lwc1 $f20, THREAD_FPR20(\thread) + lwc1 $f21, THREAD_FPR21(\thread) + lwc1 $f22, THREAD_FPR22(\thread) + lwc1 $f23, THREAD_FPR23(\thread) + lwc1 $f24, THREAD_FPR24(\thread) + lwc1 $f25, THREAD_FPR25(\thread) + lwc1 $f26, THREAD_FPR26(\thread) + lwc1 $f27, THREAD_FPR27(\thread) + lwc1 $f28, THREAD_FPR28(\thread) + lwc1 $f29, THREAD_FPR29(\thread) + lwc1 $f30, THREAD_FPR30(\thread) + lwc1 $f31, THREAD_FPR31(\thread) ctc1 \tmp, fcr31 .set pop .endm diff --git a/arch/mips/include/asm/asmmacro.h b/arch/mips/include/asm/asmmacro.h index 0cae4595e985..6156ac8c4cfb 100644 --- a/arch/mips/include/asm/asmmacro.h +++ b/arch/mips/include/asm/asmmacro.h @@ -60,22 +60,22 @@ .set push SET_HARDFLOAT cfc1 \tmp, fcr31 - sdc1 $f0, THREAD_FPR0_LS64(\thread) - sdc1 $f2, THREAD_FPR2_LS64(\thread) - sdc1 $f4, THREAD_FPR4_LS64(\thread) - sdc1 $f6, THREAD_FPR6_LS64(\thread) - sdc1 $f8, THREAD_FPR8_LS64(\thread) - sdc1 $f10, THREAD_FPR10_LS64(\thread) - sdc1 $f12, THREAD_FPR12_LS64(\thread) - sdc1 $f14, THREAD_FPR14_LS64(\thread) - sdc1 $f16, THREAD_FPR16_LS64(\thread) - sdc1 $f18, THREAD_FPR18_LS64(\thread) - sdc1 $f20, THREAD_FPR20_LS64(\thread) - sdc1 $f22, THREAD_FPR22_LS64(\thread) - sdc1 $f24, THREAD_FPR24_LS64(\thread) - sdc1 $f26, THREAD_FPR26_LS64(\thread) - sdc1 $f28, THREAD_FPR28_LS64(\thread) - sdc1 $f30, THREAD_FPR30_LS64(\thread) + sdc1 $f0, THREAD_FPR0(\thread) + sdc1 $f2, THREAD_FPR2(\thread) + sdc1 $f4, THREAD_FPR4(\thread) + sdc1 $f6, THREAD_FPR6(\thread) + sdc1 $f8, THREAD_FPR8(\thread) + sdc1 $f10, THREAD_FPR10(\thread) + sdc1 $f12, THREAD_FPR12(\thread) + sdc1 $f14, THREAD_FPR14(\thread) + sdc1 $f16, THREAD_FPR16(\thread) + sdc1 $f18, THREAD_FPR18(\thread) + sdc1 $f20, THREAD_FPR20(\thread) + sdc1 $f22, THREAD_FPR22(\thread) + sdc1 $f24, THREAD_FPR24(\thread) + sdc1 $f26, THREAD_FPR26(\thread) + sdc1 $f28, THREAD_FPR28(\thread) + sdc1 $f30, THREAD_FPR30(\thread) sw \tmp, THREAD_FCR31(\thread) .set pop .endm @@ -84,22 +84,22 @@ .set push .set mips64r2 SET_HARDFLOAT - sdc1 $f1, THREAD_FPR1_LS64(\thread) - sdc1 $f3, THREAD_FPR3_LS64(\thread) - sdc1 $f5, THREAD_FPR5_LS64(\thread) - sdc1 $f7, THREAD_FPR7_LS64(\thread) - sdc1 $f9, THREAD_FPR9_LS64(\thread) - sdc1 $f11, THREAD_FPR11_LS64(\thread) - sdc1 $f13, THREAD_FPR13_LS64(\thread) - sdc1 $f15, THREAD_FPR15_LS64(\thread) - sdc1 $f17, THREAD_FPR17_LS64(\thread) - sdc1 $f19, THREAD_FPR19_LS64(\thread) - sdc1 $f21, THREAD_FPR21_LS64(\thread) - sdc1 $f23, THREAD_FPR23_LS64(\thread) - sdc1 $f25, THREAD_FPR25_LS64(\thread) - sdc1 $f27, THREAD_FPR27_LS64(\thread) - sdc1 $f29, THREAD_FPR29_LS64(\thread) - sdc1 $f31, THREAD_FPR31_LS64(\thread) + sdc1 $f1, THREAD_FPR1(\thread) + sdc1 $f3, THREAD_FPR3(\thread) + sdc1 $f5, THREAD_FPR5(\thread) + sdc1 $f7, THREAD_FPR7(\thread) + sdc1 $f9, THREAD_FPR9(\thread) + sdc1 $f11, THREAD_FPR11(\thread) + sdc1 $f13, THREAD_FPR13(\thread) + sdc1 $f15, THREAD_FPR15(\thread) + sdc1 $f17, THREAD_FPR17(\thread) + sdc1 $f19, THREAD_FPR19(\thread) + sdc1 $f21, THREAD_FPR21(\thread) + sdc1 $f23, THREAD_FPR23(\thread) + sdc1 $f25, THREAD_FPR25(\thread) + sdc1 $f27, THREAD_FPR27(\thread) + sdc1 $f29, THREAD_FPR29(\thread) + sdc1 $f31, THREAD_FPR31(\thread) .set pop .endm @@ -118,22 +118,22 @@ .set push SET_HARDFLOAT lw \tmp, THREAD_FCR31(\thread) - ldc1 $f0, THREAD_FPR0_LS64(\thread) - ldc1 $f2, THREAD_FPR2_LS64(\thread) - ldc1 $f4, THREAD_FPR4_LS64(\thread) - ldc1 $f6, THREAD_FPR6_LS64(\thread) - ldc1 $f8, THREAD_FPR8_LS64(\thread) - ldc1 $f10, THREAD_FPR10_LS64(\thread) - ldc1 $f12, THREAD_FPR12_LS64(\thread) - ldc1 $f14, THREAD_FPR14_LS64(\thread) - ldc1 $f16, THREAD_FPR16_LS64(\thread) - ldc1 $f18, THREAD_FPR18_LS64(\thread) - ldc1 $f20, THREAD_FPR20_LS64(\thread) - ldc1 $f22, THREAD_FPR22_LS64(\thread) - ldc1 $f24, THREAD_FPR24_LS64(\thread) - ldc1 $f26, THREAD_FPR26_LS64(\thread) - ldc1 $f28, THREAD_FPR28_LS64(\thread) - ldc1 $f30, THREAD_FPR30_LS64(\thread) + ldc1 $f0, THREAD_FPR0(\thread) + ldc1 $f2, THREAD_FPR2(\thread) + ldc1 $f4, THREAD_FPR4(\thread) + ldc1 $f6, THREAD_FPR6(\thread) + ldc1 $f8, THREAD_FPR8(\thread) + ldc1 $f10, THREAD_FPR10(\thread) + ldc1 $f12, THREAD_FPR12(\thread) + ldc1 $f14, THREAD_FPR14(\thread) + ldc1 $f16, THREAD_FPR16(\thread) + ldc1 $f18, THREAD_FPR18(\thread) + ldc1 $f20, THREAD_FPR20(\thread) + ldc1 $f22, THREAD_FPR22(\thread) + ldc1 $f24, THREAD_FPR24(\thread) + ldc1 $f26, THREAD_FPR26(\thread) + ldc1 $f28, THREAD_FPR28(\thread) + ldc1 $f30, THREAD_FPR30(\thread) ctc1 \tmp, fcr31 .endm @@ -141,22 +141,22 @@ .set push .set mips64r2 SET_HARDFLOAT - ldc1 $f1, THREAD_FPR1_LS64(\thread) - ldc1 $f3, THREAD_FPR3_LS64(\thread) - ldc1 $f5, THREAD_FPR5_LS64(\thread) - ldc1 $f7, THREAD_FPR7_LS64(\thread) - ldc1 $f9, THREAD_FPR9_LS64(\thread) - ldc1 $f11, THREAD_FPR11_LS64(\thread) - ldc1 $f13, THREAD_FPR13_LS64(\thread) - ldc1 $f15, THREAD_FPR15_LS64(\thread) - ldc1 $f17, THREAD_FPR17_LS64(\thread) - ldc1 $f19, THREAD_FPR19_LS64(\thread) - ldc1 $f21, THREAD_FPR21_LS64(\thread) - ldc1 $f23, THREAD_FPR23_LS64(\thread) - ldc1 $f25, THREAD_FPR25_LS64(\thread) - ldc1 $f27, THREAD_FPR27_LS64(\thread) - ldc1 $f29, THREAD_FPR29_LS64(\thread) - ldc1 $f31, THREAD_FPR31_LS64(\thread) + ldc1 $f1, THREAD_FPR1(\thread) + ldc1 $f3, THREAD_FPR3(\thread) + ldc1 $f5, THREAD_FPR5(\thread) + ldc1 $f7, THREAD_FPR7(\thread) + ldc1 $f9, THREAD_FPR9(\thread) + ldc1 $f11, THREAD_FPR11(\thread) + ldc1 $f13, THREAD_FPR13(\thread) + ldc1 $f15, THREAD_FPR15(\thread) + ldc1 $f17, THREAD_FPR17(\thread) + ldc1 $f19, THREAD_FPR19(\thread) + ldc1 $f21, THREAD_FPR21(\thread) + ldc1 $f23, THREAD_FPR23(\thread) + ldc1 $f25, THREAD_FPR25(\thread) + ldc1 $f27, THREAD_FPR27(\thread) + ldc1 $f29, THREAD_FPR29(\thread) + ldc1 $f31, THREAD_FPR31(\thread) .set pop .endm @@ -211,6 +211,22 @@ .endm #ifdef TOOLCHAIN_SUPPORTS_MSA + .macro _cfcmsa rd, cs + .set push + .set mips32r2 + .set msa + cfcmsa \rd, $\cs + .set pop + .endm + + .macro _ctcmsa cd, rs + .set push + .set mips32r2 + .set msa + ctcmsa $\cd, \rs + .set pop + .endm + .macro ld_d wd, off, base .set push .set mips32r2 @@ -227,35 +243,35 @@ .set pop .endm - .macro copy_u_w rd, ws, n + .macro copy_u_w ws, n .set push .set mips32r2 .set msa - copy_u.w \rd, $w\ws[\n] + copy_u.w $1, $w\ws[\n] .set pop .endm - .macro copy_u_d rd, ws, n + .macro copy_u_d ws, n .set push .set mips64r2 .set msa - copy_u.d \rd, $w\ws[\n] + copy_u.d $1, $w\ws[\n] .set pop .endm - .macro insert_w wd, n, rs + .macro insert_w wd, n .set push .set mips32r2 .set msa - insert.w $w\wd[\n], \rs + insert.w $w\wd[\n], $1 .set pop .endm - .macro insert_d wd, n, rs + .macro insert_d wd, n .set push .set mips64r2 .set msa - insert.d $w\wd[\n], \rs + insert.d $w\wd[\n], $1 .set pop .endm #else @@ -283,7 +299,7 @@ /* * Temporary until all toolchains in use include MSA support. */ - .macro cfcmsa rd, cs + .macro _cfcmsa rd, cs .set push .set noat SET_HARDFLOAT @@ -293,7 +309,7 @@ .set pop .endm - .macro ctcmsa cd, rs + .macro _ctcmsa cd, rs .set push .set noat SET_HARDFLOAT @@ -320,44 +336,36 @@ .set pop .endm - .macro copy_u_w rd, ws, n + .macro copy_u_w ws, n .set push .set noat SET_HARDFLOAT .insn .word COPY_UW_MSA_INSN | (\n << 16) | (\ws << 11) - /* move triggers an assembler bug... */ - or \rd, $1, zero .set pop .endm - .macro copy_u_d rd, ws, n + .macro copy_u_d ws, n .set push .set noat SET_HARDFLOAT .insn .word COPY_UD_MSA_INSN | (\n << 16) | (\ws << 11) - /* move triggers an assembler bug... */ - or \rd, $1, zero .set pop .endm - .macro insert_w wd, n, rs + .macro insert_w wd, n .set push .set noat SET_HARDFLOAT - /* move triggers an assembler bug... */ - or $1, \rs, zero .word INSERT_W_MSA_INSN | (\n << 16) | (\wd << 6) .set pop .endm - .macro insert_d wd, n, rs + .macro insert_d wd, n .set push .set noat SET_HARDFLOAT - /* move triggers an assembler bug... */ - or $1, \rs, zero .word INSERT_D_MSA_INSN | (\n << 16) | (\wd << 6) .set pop .endm @@ -399,7 +407,7 @@ .set push .set noat SET_HARDFLOAT - cfcmsa $1, MSA_CSR + _cfcmsa $1, MSA_CSR sw $1, THREAD_MSA_CSR(\thread) .set pop .endm @@ -409,7 +417,7 @@ .set noat SET_HARDFLOAT lw $1, THREAD_MSA_CSR(\thread) - ctcmsa MSA_CSR, $1 + _ctcmsa MSA_CSR, $1 .set pop ld_d 0, THREAD_FPR0, \thread ld_d 1, THREAD_FPR1, \thread @@ -452,9 +460,6 @@ insert_w \wd, 2 insert_w \wd, 3 #endif - .if 31-\wd - msa_init_upper (\wd+1) - .endif .endm .macro msa_init_all_upper @@ -463,6 +468,37 @@ SET_HARDFLOAT not $1, zero msa_init_upper 0 + msa_init_upper 1 + msa_init_upper 2 + msa_init_upper 3 + msa_init_upper 4 + msa_init_upper 5 + msa_init_upper 6 + msa_init_upper 7 + msa_init_upper 8 + msa_init_upper 9 + msa_init_upper 10 + msa_init_upper 11 + msa_init_upper 12 + msa_init_upper 13 + msa_init_upper 14 + msa_init_upper 15 + msa_init_upper 16 + msa_init_upper 17 + msa_init_upper 18 + msa_init_upper 19 + msa_init_upper 20 + msa_init_upper 21 + msa_init_upper 22 + msa_init_upper 23 + msa_init_upper 24 + msa_init_upper 25 + msa_init_upper 26 + msa_init_upper 27 + msa_init_upper 28 + msa_init_upper 29 + msa_init_upper 30 + msa_init_upper 31 .set pop .endm diff --git a/arch/mips/include/asm/fpu.h b/arch/mips/include/asm/fpu.h index dd083e999b08..b104ad9d655f 100644 --- a/arch/mips/include/asm/fpu.h +++ b/arch/mips/include/asm/fpu.h @@ -48,6 +48,12 @@ enum fpu_mode { #define FPU_FR_MASK 0x1 }; +#define __disable_fpu() \ +do { \ + clear_c0_status(ST0_CU1); \ + disable_fpu_hazard(); \ +} while (0) + static inline int __enable_fpu(enum fpu_mode mode) { int fr; @@ -86,7 +92,12 @@ fr_common: enable_fpu_hazard(); /* check FR has the desired value */ - return (!!(read_c0_status() & ST0_FR) == !!fr) ? 0 : SIGFPE; + if (!!(read_c0_status() & ST0_FR) == !!fr) + return 0; + + /* unsupported FR value */ + __disable_fpu(); + return SIGFPE; default: BUG(); @@ -95,12 +106,6 @@ fr_common: return SIGFPE; } -#define __disable_fpu() \ -do { \ - clear_c0_status(ST0_CU1); \ - disable_fpu_hazard(); \ -} while (0) - #define clear_fpu_owner() clear_thread_flag(TIF_USEDFPU) static inline int __is_fpu_owner(void) @@ -170,6 +175,7 @@ static inline void lose_fpu(int save) } disable_msa(); clear_thread_flag(TIF_USEDMSA); + __disable_fpu(); } else if (is_fpu_owner()) { if (save) _save_fp(current); diff --git a/arch/mips/include/asm/jump_label.h b/arch/mips/include/asm/jump_label.h index fdbff44e5482..608aa57799c8 100644 --- a/arch/mips/include/asm/jump_label.h +++ b/arch/mips/include/asm/jump_label.h @@ -8,9 +8,9 @@ #ifndef _ASM_MIPS_JUMP_LABEL_H #define _ASM_MIPS_JUMP_LABEL_H -#include <linux/types.h> +#ifndef __ASSEMBLY__ -#ifdef __KERNEL__ +#include <linux/types.h> #define JUMP_LABEL_NOP_SIZE 4 @@ -39,8 +39,6 @@ l_yes: return true; } -#endif /* __KERNEL__ */ - #ifdef CONFIG_64BIT typedef u64 jump_label_t; #else @@ -53,4 +51,5 @@ struct jump_entry { jump_label_t key; }; +#endif /* __ASSEMBLY__ */ #endif /* _ASM_MIPS_JUMP_LABEL_H */ diff --git a/arch/mips/include/asm/kdebug.h b/arch/mips/include/asm/kdebug.h index 6a9af5fcb5d7..cba22ab7ad4d 100644 --- a/arch/mips/include/asm/kdebug.h +++ b/arch/mips/include/asm/kdebug.h @@ -10,7 +10,8 @@ enum die_val { DIE_RI, DIE_PAGE_FAULT, DIE_BREAK, - DIE_SSTEPBP + DIE_SSTEPBP, + DIE_MSAFP }; #endif /* _ASM_MIPS_KDEBUG_H */ diff --git a/arch/mips/include/asm/kvm_host.h b/arch/mips/include/asm/kvm_host.h index ac4fc716062b..4c25823563fe 100644 --- a/arch/mips/include/asm/kvm_host.h +++ b/arch/mips/include/asm/kvm_host.h @@ -21,10 +21,10 @@ /* MIPS KVM register ids */ #define MIPS_CP0_32(_R, _S) \ - (KVM_REG_MIPS | KVM_REG_SIZE_U32 | 0x10000 | (8 * (_R) + (_S))) + (KVM_REG_MIPS_CP0 | KVM_REG_SIZE_U32 | (8 * (_R) + (_S))) #define MIPS_CP0_64(_R, _S) \ - (KVM_REG_MIPS | KVM_REG_SIZE_U64 | 0x10000 | (8 * (_R) + (_S))) + (KVM_REG_MIPS_CP0 | KVM_REG_SIZE_U64 | (8 * (_R) + (_S))) #define KVM_REG_MIPS_CP0_INDEX MIPS_CP0_32(0, 0) #define KVM_REG_MIPS_CP0_ENTRYLO0 MIPS_CP0_64(2, 0) @@ -42,11 +42,14 @@ #define KVM_REG_MIPS_CP0_STATUS MIPS_CP0_32(12, 0) #define KVM_REG_MIPS_CP0_CAUSE MIPS_CP0_32(13, 0) #define KVM_REG_MIPS_CP0_EPC MIPS_CP0_64(14, 0) +#define KVM_REG_MIPS_CP0_PRID MIPS_CP0_32(15, 0) #define KVM_REG_MIPS_CP0_EBASE MIPS_CP0_64(15, 1) #define KVM_REG_MIPS_CP0_CONFIG MIPS_CP0_32(16, 0) #define KVM_REG_MIPS_CP0_CONFIG1 MIPS_CP0_32(16, 1) #define KVM_REG_MIPS_CP0_CONFIG2 MIPS_CP0_32(16, 2) #define KVM_REG_MIPS_CP0_CONFIG3 MIPS_CP0_32(16, 3) +#define KVM_REG_MIPS_CP0_CONFIG4 MIPS_CP0_32(16, 4) +#define KVM_REG_MIPS_CP0_CONFIG5 MIPS_CP0_32(16, 5) #define KVM_REG_MIPS_CP0_CONFIG7 MIPS_CP0_32(16, 7) #define KVM_REG_MIPS_CP0_XCONTEXT MIPS_CP0_64(20, 0) #define KVM_REG_MIPS_CP0_ERROREPC MIPS_CP0_64(30, 0) @@ -119,6 +122,10 @@ struct kvm_vcpu_stat { u32 syscall_exits; u32 resvd_inst_exits; u32 break_inst_exits; + u32 trap_inst_exits; + u32 msa_fpe_exits; + u32 fpe_exits; + u32 msa_disabled_exits; u32 flush_dcache_exits; u32 halt_successful_poll; u32 halt_wakeup; @@ -138,6 +145,10 @@ enum kvm_mips_exit_types { SYSCALL_EXITS, RESVD_INST_EXITS, BREAK_INST_EXITS, + TRAP_INST_EXITS, + MSA_FPE_EXITS, + FPE_EXITS, + MSA_DISABLED_EXITS, FLUSH_DCACHE_EXITS, MAX_KVM_MIPS_EXIT_TYPES }; @@ -206,6 +217,8 @@ struct mips_coproc { #define MIPS_CP0_CONFIG1_SEL 1 #define MIPS_CP0_CONFIG2_SEL 2 #define MIPS_CP0_CONFIG3_SEL 3 +#define MIPS_CP0_CONFIG4_SEL 4 +#define MIPS_CP0_CONFIG5_SEL 5 /* Config0 register bits */ #define CP0C0_M 31 @@ -262,31 +275,6 @@ struct mips_coproc { #define CP0C3_SM 1 #define CP0C3_TL 0 -/* Have config1, Cacheable, noncoherent, write-back, write allocate*/ -#define MIPS_CONFIG0 \ - ((1 << CP0C0_M) | (0x3 << CP0C0_K0)) - -/* Have config2, no coprocessor2 attached, no MDMX support attached, - no performance counters, watch registers present, - no code compression, EJTAG present, no FPU, no watch registers */ -#define MIPS_CONFIG1 \ -((1 << CP0C1_M) | \ - (0 << CP0C1_C2) | (0 << CP0C1_MD) | (0 << CP0C1_PC) | \ - (0 << CP0C1_WR) | (0 << CP0C1_CA) | (1 << CP0C1_EP) | \ - (0 << CP0C1_FP)) - -/* Have config3, no tertiary/secondary caches implemented */ -#define MIPS_CONFIG2 \ -((1 << CP0C2_M)) - -/* No config4, no DSP ASE, no large physaddr (PABITS), - no external interrupt controller, no vectored interrupts, - no 1kb pages, no SmartMIPS ASE, no trace logic */ -#define MIPS_CONFIG3 \ -((0 << CP0C3_M) | (0 << CP0C3_DSPP) | (0 << CP0C3_LPA) | \ - (0 << CP0C3_VEIC) | (0 << CP0C3_VInt) | (0 << CP0C3_SP) | \ - (0 << CP0C3_SM) | (0 << CP0C3_TL)) - /* MMU types, the first four entries have the same layout as the CP0C0_MT field. */ enum mips_mmu_types { @@ -321,7 +309,9 @@ enum mips_mmu_types { */ #define T_TRAP 13 /* Trap instruction */ #define T_VCEI 14 /* Virtual coherency exception */ +#define T_MSAFPE 14 /* MSA floating point exception */ #define T_FPE 15 /* Floating point exception */ +#define T_MSADIS 21 /* MSA disabled exception */ #define T_WATCH 23 /* Watch address reference */ #define T_VCED 31 /* Virtual coherency data */ @@ -374,6 +364,9 @@ struct kvm_mips_tlb { long tlb_lo1; }; +#define KVM_MIPS_FPU_FPU 0x1 +#define KVM_MIPS_FPU_MSA 0x2 + #define KVM_MIPS_GUEST_TLB_SIZE 64 struct kvm_vcpu_arch { void *host_ebase, *guest_ebase; @@ -395,6 +388,8 @@ struct kvm_vcpu_arch { /* FPU State */ struct mips_fpu_struct fpu; + /* Which FPU state is loaded (KVM_MIPS_FPU_*) */ + unsigned int fpu_inuse; /* COP0 State */ struct mips_coproc *cop0; @@ -441,6 +436,9 @@ struct kvm_vcpu_arch { /* WAIT executed */ int wait; + + u8 fpu_enabled; + u8 msa_enabled; }; @@ -482,11 +480,15 @@ struct kvm_vcpu_arch { #define kvm_read_c0_guest_config1(cop0) (cop0->reg[MIPS_CP0_CONFIG][1]) #define kvm_read_c0_guest_config2(cop0) (cop0->reg[MIPS_CP0_CONFIG][2]) #define kvm_read_c0_guest_config3(cop0) (cop0->reg[MIPS_CP0_CONFIG][3]) +#define kvm_read_c0_guest_config4(cop0) (cop0->reg[MIPS_CP0_CONFIG][4]) +#define kvm_read_c0_guest_config5(cop0) (cop0->reg[MIPS_CP0_CONFIG][5]) #define kvm_read_c0_guest_config7(cop0) (cop0->reg[MIPS_CP0_CONFIG][7]) #define kvm_write_c0_guest_config(cop0, val) (cop0->reg[MIPS_CP0_CONFIG][0] = (val)) #define kvm_write_c0_guest_config1(cop0, val) (cop0->reg[MIPS_CP0_CONFIG][1] = (val)) #define kvm_write_c0_guest_config2(cop0, val) (cop0->reg[MIPS_CP0_CONFIG][2] = (val)) #define kvm_write_c0_guest_config3(cop0, val) (cop0->reg[MIPS_CP0_CONFIG][3] = (val)) +#define kvm_write_c0_guest_config4(cop0, val) (cop0->reg[MIPS_CP0_CONFIG][4] = (val)) +#define kvm_write_c0_guest_config5(cop0, val) (cop0->reg[MIPS_CP0_CONFIG][5] = (val)) #define kvm_write_c0_guest_config7(cop0, val) (cop0->reg[MIPS_CP0_CONFIG][7] = (val)) #define kvm_read_c0_guest_errorepc(cop0) (cop0->reg[MIPS_CP0_ERROR_PC][0]) #define kvm_write_c0_guest_errorepc(cop0, val) (cop0->reg[MIPS_CP0_ERROR_PC][0] = (val)) @@ -567,6 +569,31 @@ static inline void _kvm_atomic_change_c0_guest_reg(unsigned long *reg, kvm_set_c0_guest_ebase(cop0, ((val) & (change))); \ } +/* Helpers */ + +static inline bool kvm_mips_guest_can_have_fpu(struct kvm_vcpu_arch *vcpu) +{ + return (!__builtin_constant_p(cpu_has_fpu) || cpu_has_fpu) && + vcpu->fpu_enabled; +} + +static inline bool kvm_mips_guest_has_fpu(struct kvm_vcpu_arch *vcpu) +{ + return kvm_mips_guest_can_have_fpu(vcpu) && + kvm_read_c0_guest_config1(vcpu->cop0) & MIPS_CONF1_FP; +} + +static inline bool kvm_mips_guest_can_have_msa(struct kvm_vcpu_arch *vcpu) +{ + return (!__builtin_constant_p(cpu_has_msa) || cpu_has_msa) && + vcpu->msa_enabled; +} + +static inline bool kvm_mips_guest_has_msa(struct kvm_vcpu_arch *vcpu) +{ + return kvm_mips_guest_can_have_msa(vcpu) && + kvm_read_c0_guest_config3(vcpu->cop0) & MIPS_CONF3_MSA; +} struct kvm_mips_callbacks { int (*handle_cop_unusable)(struct kvm_vcpu *vcpu); @@ -578,6 +605,10 @@ struct kvm_mips_callbacks { int (*handle_syscall)(struct kvm_vcpu *vcpu); int (*handle_res_inst)(struct kvm_vcpu *vcpu); int (*handle_break)(struct kvm_vcpu *vcpu); + int (*handle_trap)(struct kvm_vcpu *vcpu); + int (*handle_msa_fpe)(struct kvm_vcpu *vcpu); + int (*handle_fpe)(struct kvm_vcpu *vcpu); + int (*handle_msa_disabled)(struct kvm_vcpu *vcpu); int (*vm_init)(struct kvm *kvm); int (*vcpu_init)(struct kvm_vcpu *vcpu); int (*vcpu_setup)(struct kvm_vcpu *vcpu); @@ -596,6 +627,8 @@ struct kvm_mips_callbacks { const struct kvm_one_reg *reg, s64 *v); int (*set_one_reg)(struct kvm_vcpu *vcpu, const struct kvm_one_reg *reg, s64 v); + int (*vcpu_get_regs)(struct kvm_vcpu *vcpu); + int (*vcpu_set_regs)(struct kvm_vcpu *vcpu); }; extern struct kvm_mips_callbacks *kvm_mips_callbacks; int kvm_mips_emulation_init(struct kvm_mips_callbacks **install_callbacks); @@ -606,6 +639,19 @@ int kvm_arch_vcpu_dump_regs(struct kvm_vcpu *vcpu); /* Trampoline ASM routine to start running in "Guest" context */ extern int __kvm_mips_vcpu_run(struct kvm_run *run, struct kvm_vcpu *vcpu); +/* FPU/MSA context management */ +void __kvm_save_fpu(struct kvm_vcpu_arch *vcpu); +void __kvm_restore_fpu(struct kvm_vcpu_arch *vcpu); +void __kvm_restore_fcsr(struct kvm_vcpu_arch *vcpu); +void __kvm_save_msa(struct kvm_vcpu_arch *vcpu); +void __kvm_restore_msa(struct kvm_vcpu_arch *vcpu); +void __kvm_restore_msa_upper(struct kvm_vcpu_arch *vcpu); +void __kvm_restore_msacsr(struct kvm_vcpu_arch *vcpu); +void kvm_own_fpu(struct kvm_vcpu *vcpu); +void kvm_own_msa(struct kvm_vcpu *vcpu); +void kvm_drop_fpu(struct kvm_vcpu *vcpu); +void kvm_lose_fpu(struct kvm_vcpu *vcpu); + /* TLB handling */ uint32_t kvm_get_kernel_asid(struct kvm_vcpu *vcpu); @@ -711,6 +757,26 @@ extern enum emulation_result kvm_mips_emulate_bp_exc(unsigned long cause, struct kvm_run *run, struct kvm_vcpu *vcpu); +extern enum emulation_result kvm_mips_emulate_trap_exc(unsigned long cause, + uint32_t *opc, + struct kvm_run *run, + struct kvm_vcpu *vcpu); + +extern enum emulation_result kvm_mips_emulate_msafpe_exc(unsigned long cause, + uint32_t *opc, + struct kvm_run *run, + struct kvm_vcpu *vcpu); + +extern enum emulation_result kvm_mips_emulate_fpe_exc(unsigned long cause, + uint32_t *opc, + struct kvm_run *run, + struct kvm_vcpu *vcpu); + +extern enum emulation_result kvm_mips_emulate_msadis_exc(unsigned long cause, + uint32_t *opc, + struct kvm_run *run, + struct kvm_vcpu *vcpu); + extern enum emulation_result kvm_mips_complete_mmio_load(struct kvm_vcpu *vcpu, struct kvm_run *run); @@ -749,6 +815,11 @@ enum emulation_result kvm_mips_emulate_load(uint32_t inst, struct kvm_run *run, struct kvm_vcpu *vcpu); +unsigned int kvm_mips_config1_wrmask(struct kvm_vcpu *vcpu); +unsigned int kvm_mips_config3_wrmask(struct kvm_vcpu *vcpu); +unsigned int kvm_mips_config4_wrmask(struct kvm_vcpu *vcpu); +unsigned int kvm_mips_config5_wrmask(struct kvm_vcpu *vcpu); + /* Dynamic binary translation */ extern int kvm_mips_trans_cache_index(uint32_t inst, uint32_t *opc, struct kvm_vcpu *vcpu); diff --git a/arch/mips/include/asm/processor.h b/arch/mips/include/asm/processor.h index b5dcbee01fd7..9b3b48e21c22 100644 --- a/arch/mips/include/asm/processor.h +++ b/arch/mips/include/asm/processor.h @@ -105,7 +105,7 @@ union fpureg { #ifdef CONFIG_CPU_LITTLE_ENDIAN # define FPR_IDX(width, idx) (idx) #else -# define FPR_IDX(width, idx) ((FPU_REG_WIDTH / (width)) - 1 - (idx)) +# define FPR_IDX(width, idx) ((idx) ^ ((64 / (width)) - 1)) #endif #define BUILD_FPR_ACCESS(width) \ diff --git a/arch/mips/include/uapi/asm/kvm.h b/arch/mips/include/uapi/asm/kvm.h index 2c04b6d9ff85..6985eb59b085 100644 --- a/arch/mips/include/uapi/asm/kvm.h +++ b/arch/mips/include/uapi/asm/kvm.h @@ -36,77 +36,85 @@ struct kvm_regs { /* * for KVM_GET_FPU and KVM_SET_FPU - * - * If Status[FR] is zero (32-bit FPU), the upper 32-bits of the FPRs - * are zero filled. */ struct kvm_fpu { - __u64 fpr[32]; - __u32 fir; - __u32 fccr; - __u32 fexr; - __u32 fenr; - __u32 fcsr; - __u32 pad; }; /* - * For MIPS, we use KVM_SET_ONE_REG and KVM_GET_ONE_REG to access CP0 + * For MIPS, we use KVM_SET_ONE_REG and KVM_GET_ONE_REG to access various * registers. The id field is broken down as follows: * - * bits[2..0] - Register 'sel' index. - * bits[7..3] - Register 'rd' index. - * bits[15..8] - Must be zero. - * bits[31..16] - 1 -> CP0 registers. - * bits[51..32] - Must be zero. * bits[63..52] - As per linux/kvm.h + * bits[51..32] - Must be zero. + * bits[31..16] - Register set. + * + * Register set = 0: GP registers from kvm_regs (see definitions below). + * + * Register set = 1: CP0 registers. + * bits[15..8] - Must be zero. + * bits[7..3] - Register 'rd' index. + * bits[2..0] - Register 'sel' index. + * + * Register set = 2: KVM specific registers (see definitions below). + * + * Register set = 3: FPU / MSA registers (see definitions below). * * Other sets registers may be added in the future. Each set would * have its own identifier in bits[31..16]. - * - * The registers defined in struct kvm_regs are also accessible, the - * id values for these are below. */ -#define KVM_REG_MIPS_R0 (KVM_REG_MIPS | KVM_REG_SIZE_U64 | 0) -#define KVM_REG_MIPS_R1 (KVM_REG_MIPS | KVM_REG_SIZE_U64 | 1) -#define KVM_REG_MIPS_R2 (KVM_REG_MIPS | KVM_REG_SIZE_U64 | 2) -#define KVM_REG_MIPS_R3 (KVM_REG_MIPS | KVM_REG_SIZE_U64 | 3) -#define KVM_REG_MIPS_R4 (KVM_REG_MIPS | KVM_REG_SIZE_U64 | 4) -#define KVM_REG_MIPS_R5 (KVM_REG_MIPS | KVM_REG_SIZE_U64 | 5) -#define KVM_REG_MIPS_R6 (KVM_REG_MIPS | KVM_REG_SIZE_U64 | 6) -#define KVM_REG_MIPS_R7 (KVM_REG_MIPS | KVM_REG_SIZE_U64 | 7) -#define KVM_REG_MIPS_R8 (KVM_REG_MIPS | KVM_REG_SIZE_U64 | 8) -#define KVM_REG_MIPS_R9 (KVM_REG_MIPS | KVM_REG_SIZE_U64 | 9) -#define KVM_REG_MIPS_R10 (KVM_REG_MIPS | KVM_REG_SIZE_U64 | 10) -#define KVM_REG_MIPS_R11 (KVM_REG_MIPS | KVM_REG_SIZE_U64 | 11) -#define KVM_REG_MIPS_R12 (KVM_REG_MIPS | KVM_REG_SIZE_U64 | 12) -#define KVM_REG_MIPS_R13 (KVM_REG_MIPS | KVM_REG_SIZE_U64 | 13) -#define KVM_REG_MIPS_R14 (KVM_REG_MIPS | KVM_REG_SIZE_U64 | 14) -#define KVM_REG_MIPS_R15 (KVM_REG_MIPS | KVM_REG_SIZE_U64 | 15) -#define KVM_REG_MIPS_R16 (KVM_REG_MIPS | KVM_REG_SIZE_U64 | 16) -#define KVM_REG_MIPS_R17 (KVM_REG_MIPS | KVM_REG_SIZE_U64 | 17) -#define KVM_REG_MIPS_R18 (KVM_REG_MIPS | KVM_REG_SIZE_U64 | 18) -#define KVM_REG_MIPS_R19 (KVM_REG_MIPS | KVM_REG_SIZE_U64 | 19) -#define KVM_REG_MIPS_R20 (KVM_REG_MIPS | KVM_REG_SIZE_U64 | 20) -#define KVM_REG_MIPS_R21 (KVM_REG_MIPS | KVM_REG_SIZE_U64 | 21) -#define KVM_REG_MIPS_R22 (KVM_REG_MIPS | KVM_REG_SIZE_U64 | 22) -#define KVM_REG_MIPS_R23 (KVM_REG_MIPS | KVM_REG_SIZE_U64 | 23) -#define KVM_REG_MIPS_R24 (KVM_REG_MIPS | KVM_REG_SIZE_U64 | 24) -#define KVM_REG_MIPS_R25 (KVM_REG_MIPS | KVM_REG_SIZE_U64 | 25) -#define KVM_REG_MIPS_R26 (KVM_REG_MIPS | KVM_REG_SIZE_U64 | 26) -#define KVM_REG_MIPS_R27 (KVM_REG_MIPS | KVM_REG_SIZE_U64 | 27) -#define KVM_REG_MIPS_R28 (KVM_REG_MIPS | KVM_REG_SIZE_U64 | 28) -#define KVM_REG_MIPS_R29 (KVM_REG_MIPS | KVM_REG_SIZE_U64 | 29) -#define KVM_REG_MIPS_R30 (KVM_REG_MIPS | KVM_REG_SIZE_U64 | 30) -#define KVM_REG_MIPS_R31 (KVM_REG_MIPS | KVM_REG_SIZE_U64 | 31) - -#define KVM_REG_MIPS_HI (KVM_REG_MIPS | KVM_REG_SIZE_U64 | 32) -#define KVM_REG_MIPS_LO (KVM_REG_MIPS | KVM_REG_SIZE_U64 | 33) -#define KVM_REG_MIPS_PC (KVM_REG_MIPS | KVM_REG_SIZE_U64 | 34) - -/* KVM specific control registers */ +#define KVM_REG_MIPS_GP (KVM_REG_MIPS | 0x0000000000000000ULL) +#define KVM_REG_MIPS_CP0 (KVM_REG_MIPS | 0x0000000000010000ULL) +#define KVM_REG_MIPS_KVM (KVM_REG_MIPS | 0x0000000000020000ULL) +#define KVM_REG_MIPS_FPU (KVM_REG_MIPS | 0x0000000000030000ULL) + + +/* + * KVM_REG_MIPS_GP - General purpose registers from kvm_regs. + */ + +#define KVM_REG_MIPS_R0 (KVM_REG_MIPS_GP | KVM_REG_SIZE_U64 | 0) +#define KVM_REG_MIPS_R1 (KVM_REG_MIPS_GP | KVM_REG_SIZE_U64 | 1) +#define KVM_REG_MIPS_R2 (KVM_REG_MIPS_GP | KVM_REG_SIZE_U64 | 2) +#define KVM_REG_MIPS_R3 (KVM_REG_MIPS_GP | KVM_REG_SIZE_U64 | 3) +#define KVM_REG_MIPS_R4 (KVM_REG_MIPS_GP | KVM_REG_SIZE_U64 | 4) +#define KVM_REG_MIPS_R5 (KVM_REG_MIPS_GP | KVM_REG_SIZE_U64 | 5) +#define KVM_REG_MIPS_R6 (KVM_REG_MIPS_GP | KVM_REG_SIZE_U64 | 6) +#define KVM_REG_MIPS_R7 (KVM_REG_MIPS_GP | KVM_REG_SIZE_U64 | 7) +#define KVM_REG_MIPS_R8 (KVM_REG_MIPS_GP | KVM_REG_SIZE_U64 | 8) +#define KVM_REG_MIPS_R9 (KVM_REG_MIPS_GP | KVM_REG_SIZE_U64 | 9) +#define KVM_REG_MIPS_R10 (KVM_REG_MIPS_GP | KVM_REG_SIZE_U64 | 10) +#define KVM_REG_MIPS_R11 (KVM_REG_MIPS_GP | KVM_REG_SIZE_U64 | 11) +#define KVM_REG_MIPS_R12 (KVM_REG_MIPS_GP | KVM_REG_SIZE_U64 | 12) +#define KVM_REG_MIPS_R13 (KVM_REG_MIPS_GP | KVM_REG_SIZE_U64 | 13) +#define KVM_REG_MIPS_R14 (KVM_REG_MIPS_GP | KVM_REG_SIZE_U64 | 14) +#define KVM_REG_MIPS_R15 (KVM_REG_MIPS_GP | KVM_REG_SIZE_U64 | 15) +#define KVM_REG_MIPS_R16 (KVM_REG_MIPS_GP | KVM_REG_SIZE_U64 | 16) +#define KVM_REG_MIPS_R17 (KVM_REG_MIPS_GP | KVM_REG_SIZE_U64 | 17) +#define KVM_REG_MIPS_R18 (KVM_REG_MIPS_GP | KVM_REG_SIZE_U64 | 18) +#define KVM_REG_MIPS_R19 (KVM_REG_MIPS_GP | KVM_REG_SIZE_U64 | 19) +#define KVM_REG_MIPS_R20 (KVM_REG_MIPS_GP | KVM_REG_SIZE_U64 | 20) +#define KVM_REG_MIPS_R21 (KVM_REG_MIPS_GP | KVM_REG_SIZE_U64 | 21) +#define KVM_REG_MIPS_R22 (KVM_REG_MIPS_GP | KVM_REG_SIZE_U64 | 22) +#define KVM_REG_MIPS_R23 (KVM_REG_MIPS_GP | KVM_REG_SIZE_U64 | 23) +#define KVM_REG_MIPS_R24 (KVM_REG_MIPS_GP | KVM_REG_SIZE_U64 | 24) +#define KVM_REG_MIPS_R25 (KVM_REG_MIPS_GP | KVM_REG_SIZE_U64 | 25) +#define KVM_REG_MIPS_R26 (KVM_REG_MIPS_GP | KVM_REG_SIZE_U64 | 26) +#define KVM_REG_MIPS_R27 (KVM_REG_MIPS_GP | KVM_REG_SIZE_U64 | 27) +#define KVM_REG_MIPS_R28 (KVM_REG_MIPS_GP | KVM_REG_SIZE_U64 | 28) +#define KVM_REG_MIPS_R29 (KVM_REG_MIPS_GP | KVM_REG_SIZE_U64 | 29) +#define KVM_REG_MIPS_R30 (KVM_REG_MIPS_GP | KVM_REG_SIZE_U64 | 30) +#define KVM_REG_MIPS_R31 (KVM_REG_MIPS_GP | KVM_REG_SIZE_U64 | 31) + +#define KVM_REG_MIPS_HI (KVM_REG_MIPS_GP | KVM_REG_SIZE_U64 | 32) +#define KVM_REG_MIPS_LO (KVM_REG_MIPS_GP | KVM_REG_SIZE_U64 | 33) +#define KVM_REG_MIPS_PC (KVM_REG_MIPS_GP | KVM_REG_SIZE_U64 | 34) + + +/* + * KVM_REG_MIPS_KVM - KVM specific control registers. + */ /* * CP0_Count control @@ -118,8 +126,7 @@ struct kvm_fpu { * safely without losing time or guest timer interrupts. * Other: Reserved, do not change. */ -#define KVM_REG_MIPS_COUNT_CTL (KVM_REG_MIPS | KVM_REG_SIZE_U64 | \ - 0x20000 | 0) +#define KVM_REG_MIPS_COUNT_CTL (KVM_REG_MIPS_KVM | KVM_REG_SIZE_U64 | 0) #define KVM_REG_MIPS_COUNT_CTL_DC 0x00000001 /* @@ -131,15 +138,46 @@ struct kvm_fpu { * emulated. * Modifications to times in the future are rejected. */ -#define KVM_REG_MIPS_COUNT_RESUME (KVM_REG_MIPS | KVM_REG_SIZE_U64 | \ - 0x20000 | 1) +#define KVM_REG_MIPS_COUNT_RESUME (KVM_REG_MIPS_KVM | KVM_REG_SIZE_U64 | 1) /* * CP0_Count rate in Hz * Specifies the rate of the CP0_Count timer in Hz. Modifications occur without * discontinuities in CP0_Count. */ -#define KVM_REG_MIPS_COUNT_HZ (KVM_REG_MIPS | KVM_REG_SIZE_U64 | \ - 0x20000 | 2) +#define KVM_REG_MIPS_COUNT_HZ (KVM_REG_MIPS_KVM | KVM_REG_SIZE_U64 | 2) + + +/* + * KVM_REG_MIPS_FPU - Floating Point and MIPS SIMD Architecture (MSA) registers. + * + * bits[15..8] - Register subset (see definitions below). + * bits[7..5] - Must be zero. + * bits[4..0] - Register number within register subset. + */ + +#define KVM_REG_MIPS_FPR (KVM_REG_MIPS_FPU | 0x0000000000000000ULL) +#define KVM_REG_MIPS_FCR (KVM_REG_MIPS_FPU | 0x0000000000000100ULL) +#define KVM_REG_MIPS_MSACR (KVM_REG_MIPS_FPU | 0x0000000000000200ULL) + +/* + * KVM_REG_MIPS_FPR - Floating point / Vector registers. + */ +#define KVM_REG_MIPS_FPR_32(n) (KVM_REG_MIPS_FPR | KVM_REG_SIZE_U32 | (n)) +#define KVM_REG_MIPS_FPR_64(n) (KVM_REG_MIPS_FPR | KVM_REG_SIZE_U64 | (n)) +#define KVM_REG_MIPS_VEC_128(n) (KVM_REG_MIPS_FPR | KVM_REG_SIZE_U128 | (n)) + +/* + * KVM_REG_MIPS_FCR - Floating point control registers. + */ +#define KVM_REG_MIPS_FCR_IR (KVM_REG_MIPS_FCR | KVM_REG_SIZE_U32 | 0) +#define KVM_REG_MIPS_FCR_CSR (KVM_REG_MIPS_FCR | KVM_REG_SIZE_U32 | 31) + +/* + * KVM_REG_MIPS_MSACR - MIPS SIMD Architecture (MSA) control registers. + */ +#define KVM_REG_MIPS_MSA_IR (KVM_REG_MIPS_MSACR | KVM_REG_SIZE_U32 | 0) +#define KVM_REG_MIPS_MSA_CSR (KVM_REG_MIPS_MSACR | KVM_REG_SIZE_U32 | 1) + /* * KVM MIPS specific structures and definitions diff --git a/arch/mips/kernel/asm-offsets.c b/arch/mips/kernel/asm-offsets.c index 750d67ac41e9..e59fd7cfac9e 100644 --- a/arch/mips/kernel/asm-offsets.c +++ b/arch/mips/kernel/asm-offsets.c @@ -167,72 +167,6 @@ void output_thread_fpu_defines(void) OFFSET(THREAD_FPR30, task_struct, thread.fpu.fpr[30]); OFFSET(THREAD_FPR31, task_struct, thread.fpu.fpr[31]); - /* the least significant 64 bits of each FP register */ - OFFSET(THREAD_FPR0_LS64, task_struct, - thread.fpu.fpr[0].val64[FPR_IDX(64, 0)]); - OFFSET(THREAD_FPR1_LS64, task_struct, - thread.fpu.fpr[1].val64[FPR_IDX(64, 0)]); - OFFSET(THREAD_FPR2_LS64, task_struct, - thread.fpu.fpr[2].val64[FPR_IDX(64, 0)]); - OFFSET(THREAD_FPR3_LS64, task_struct, - thread.fpu.fpr[3].val64[FPR_IDX(64, 0)]); - OFFSET(THREAD_FPR4_LS64, task_struct, - thread.fpu.fpr[4].val64[FPR_IDX(64, 0)]); - OFFSET(THREAD_FPR5_LS64, task_struct, - thread.fpu.fpr[5].val64[FPR_IDX(64, 0)]); - OFFSET(THREAD_FPR6_LS64, task_struct, - thread.fpu.fpr[6].val64[FPR_IDX(64, 0)]); - OFFSET(THREAD_FPR7_LS64, task_struct, - thread.fpu.fpr[7].val64[FPR_IDX(64, 0)]); - OFFSET(THREAD_FPR8_LS64, task_struct, - thread.fpu.fpr[8].val64[FPR_IDX(64, 0)]); - OFFSET(THREAD_FPR9_LS64, task_struct, - thread.fpu.fpr[9].val64[FPR_IDX(64, 0)]); - OFFSET(THREAD_FPR10_LS64, task_struct, - thread.fpu.fpr[10].val64[FPR_IDX(64, 0)]); - OFFSET(THREAD_FPR11_LS64, task_struct, - thread.fpu.fpr[11].val64[FPR_IDX(64, 0)]); - OFFSET(THREAD_FPR12_LS64, task_struct, - thread.fpu.fpr[12].val64[FPR_IDX(64, 0)]); - OFFSET(THREAD_FPR13_LS64, task_struct, - thread.fpu.fpr[13].val64[FPR_IDX(64, 0)]); - OFFSET(THREAD_FPR14_LS64, task_struct, - thread.fpu.fpr[14].val64[FPR_IDX(64, 0)]); - OFFSET(THREAD_FPR15_LS64, task_struct, - thread.fpu.fpr[15].val64[FPR_IDX(64, 0)]); - OFFSET(THREAD_FPR16_LS64, task_struct, - thread.fpu.fpr[16].val64[FPR_IDX(64, 0)]); - OFFSET(THREAD_FPR17_LS64, task_struct, - thread.fpu.fpr[17].val64[FPR_IDX(64, 0)]); - OFFSET(THREAD_FPR18_LS64, task_struct, - thread.fpu.fpr[18].val64[FPR_IDX(64, 0)]); - OFFSET(THREAD_FPR19_LS64, task_struct, - thread.fpu.fpr[19].val64[FPR_IDX(64, 0)]); - OFFSET(THREAD_FPR20_LS64, task_struct, - thread.fpu.fpr[20].val64[FPR_IDX(64, 0)]); - OFFSET(THREAD_FPR21_LS64, task_struct, - thread.fpu.fpr[21].val64[FPR_IDX(64, 0)]); - OFFSET(THREAD_FPR22_LS64, task_struct, - thread.fpu.fpr[22].val64[FPR_IDX(64, 0)]); - OFFSET(THREAD_FPR23_LS64, task_struct, - thread.fpu.fpr[23].val64[FPR_IDX(64, 0)]); - OFFSET(THREAD_FPR24_LS64, task_struct, - thread.fpu.fpr[24].val64[FPR_IDX(64, 0)]); - OFFSET(THREAD_FPR25_LS64, task_struct, - thread.fpu.fpr[25].val64[FPR_IDX(64, 0)]); - OFFSET(THREAD_FPR26_LS64, task_struct, - thread.fpu.fpr[26].val64[FPR_IDX(64, 0)]); - OFFSET(THREAD_FPR27_LS64, task_struct, - thread.fpu.fpr[27].val64[FPR_IDX(64, 0)]); - OFFSET(THREAD_FPR28_LS64, task_struct, - thread.fpu.fpr[28].val64[FPR_IDX(64, 0)]); - OFFSET(THREAD_FPR29_LS64, task_struct, - thread.fpu.fpr[29].val64[FPR_IDX(64, 0)]); - OFFSET(THREAD_FPR30_LS64, task_struct, - thread.fpu.fpr[30].val64[FPR_IDX(64, 0)]); - OFFSET(THREAD_FPR31_LS64, task_struct, - thread.fpu.fpr[31].val64[FPR_IDX(64, 0)]); - OFFSET(THREAD_FCR31, task_struct, thread.fpu.fcr31); OFFSET(THREAD_MSA_CSR, task_struct, thread.fpu.msacsr); BLANK(); @@ -470,6 +404,45 @@ void output_kvm_defines(void) OFFSET(VCPU_LO, kvm_vcpu_arch, lo); OFFSET(VCPU_HI, kvm_vcpu_arch, hi); OFFSET(VCPU_PC, kvm_vcpu_arch, pc); + BLANK(); + + OFFSET(VCPU_FPR0, kvm_vcpu_arch, fpu.fpr[0]); + OFFSET(VCPU_FPR1, kvm_vcpu_arch, fpu.fpr[1]); + OFFSET(VCPU_FPR2, kvm_vcpu_arch, fpu.fpr[2]); + OFFSET(VCPU_FPR3, kvm_vcpu_arch, fpu.fpr[3]); + OFFSET(VCPU_FPR4, kvm_vcpu_arch, fpu.fpr[4]); + OFFSET(VCPU_FPR5, kvm_vcpu_arch, fpu.fpr[5]); + OFFSET(VCPU_FPR6, kvm_vcpu_arch, fpu.fpr[6]); + OFFSET(VCPU_FPR7, kvm_vcpu_arch, fpu.fpr[7]); + OFFSET(VCPU_FPR8, kvm_vcpu_arch, fpu.fpr[8]); + OFFSET(VCPU_FPR9, kvm_vcpu_arch, fpu.fpr[9]); + OFFSET(VCPU_FPR10, kvm_vcpu_arch, fpu.fpr[10]); + OFFSET(VCPU_FPR11, kvm_vcpu_arch, fpu.fpr[11]); + OFFSET(VCPU_FPR12, kvm_vcpu_arch, fpu.fpr[12]); + OFFSET(VCPU_FPR13, kvm_vcpu_arch, fpu.fpr[13]); + OFFSET(VCPU_FPR14, kvm_vcpu_arch, fpu.fpr[14]); + OFFSET(VCPU_FPR15, kvm_vcpu_arch, fpu.fpr[15]); + OFFSET(VCPU_FPR16, kvm_vcpu_arch, fpu.fpr[16]); + OFFSET(VCPU_FPR17, kvm_vcpu_arch, fpu.fpr[17]); + OFFSET(VCPU_FPR18, kvm_vcpu_arch, fpu.fpr[18]); + OFFSET(VCPU_FPR19, kvm_vcpu_arch, fpu.fpr[19]); + OFFSET(VCPU_FPR20, kvm_vcpu_arch, fpu.fpr[20]); + OFFSET(VCPU_FPR21, kvm_vcpu_arch, fpu.fpr[21]); + OFFSET(VCPU_FPR22, kvm_vcpu_arch, fpu.fpr[22]); + OFFSET(VCPU_FPR23, kvm_vcpu_arch, fpu.fpr[23]); + OFFSET(VCPU_FPR24, kvm_vcpu_arch, fpu.fpr[24]); + OFFSET(VCPU_FPR25, kvm_vcpu_arch, fpu.fpr[25]); + OFFSET(VCPU_FPR26, kvm_vcpu_arch, fpu.fpr[26]); + OFFSET(VCPU_FPR27, kvm_vcpu_arch, fpu.fpr[27]); + OFFSET(VCPU_FPR28, kvm_vcpu_arch, fpu.fpr[28]); + OFFSET(VCPU_FPR29, kvm_vcpu_arch, fpu.fpr[29]); + OFFSET(VCPU_FPR30, kvm_vcpu_arch, fpu.fpr[30]); + OFFSET(VCPU_FPR31, kvm_vcpu_arch, fpu.fpr[31]); + + OFFSET(VCPU_FCR31, kvm_vcpu_arch, fpu.fcr31); + OFFSET(VCPU_MSA_CSR, kvm_vcpu_arch, fpu.msacsr); + BLANK(); + OFFSET(VCPU_COP0, kvm_vcpu_arch, cop0); OFFSET(VCPU_GUEST_KERNEL_ASID, kvm_vcpu_arch, guest_kernel_asid); OFFSET(VCPU_GUEST_USER_ASID, kvm_vcpu_arch, guest_user_asid); diff --git a/arch/mips/kernel/genex.S b/arch/mips/kernel/genex.S index 2ebaabe3af15..af42e7003f12 100644 --- a/arch/mips/kernel/genex.S +++ b/arch/mips/kernel/genex.S @@ -360,12 +360,15 @@ NESTED(nmi_handler, PT_SIZE, sp) .set mips1 SET_HARDFLOAT cfc1 a1, fcr31 - li a2, ~(0x3f << 12) - and a2, a1 - ctc1 a2, fcr31 .set pop - TRACE_IRQS_ON - STI + CLI + TRACE_IRQS_OFF + .endm + + .macro __build_clear_msa_fpe + _cfcmsa a1, MSA_CSR + CLI + TRACE_IRQS_OFF .endm .macro __build_clear_ade @@ -426,7 +429,7 @@ NESTED(nmi_handler, PT_SIZE, sp) BUILD_HANDLER cpu cpu sti silent /* #11 */ BUILD_HANDLER ov ov sti silent /* #12 */ BUILD_HANDLER tr tr sti silent /* #13 */ - BUILD_HANDLER msa_fpe msa_fpe sti silent /* #14 */ + BUILD_HANDLER msa_fpe msa_fpe msa_fpe silent /* #14 */ BUILD_HANDLER fpe fpe fpe silent /* #15 */ BUILD_HANDLER ftlb ftlb none silent /* #16 */ BUILD_HANDLER msa msa sti silent /* #21 */ diff --git a/arch/mips/kernel/ptrace.c b/arch/mips/kernel/ptrace.c index 510452812594..7da6e324dd35 100644 --- a/arch/mips/kernel/ptrace.c +++ b/arch/mips/kernel/ptrace.c @@ -46,6 +46,26 @@ #define CREATE_TRACE_POINTS #include <trace/events/syscalls.h> +static void init_fp_ctx(struct task_struct *target) +{ + /* If FP has been used then the target already has context */ + if (tsk_used_math(target)) + return; + + /* Begin with data registers set to all 1s... */ + memset(&target->thread.fpu.fpr, ~0, sizeof(target->thread.fpu.fpr)); + + /* ...and FCSR zeroed */ + target->thread.fpu.fcr31 = 0; + + /* + * Record that the target has "used" math, such that the context + * just initialised, and any modifications made by the caller, + * aren't discarded. + */ + set_stopped_child_used_math(target); +} + /* * Called by kernel/ptrace.c when detaching.. * @@ -142,6 +162,7 @@ int ptrace_setfpregs(struct task_struct *child, __u32 __user *data) if (!access_ok(VERIFY_READ, data, 33 * 8)) return -EIO; + init_fp_ctx(child); fregs = get_fpu_regs(child); for (i = 0; i < 32; i++) { @@ -439,6 +460,8 @@ static int fpr_set(struct task_struct *target, /* XXX fcr31 */ + init_fp_ctx(target); + if (sizeof(target->thread.fpu.fpr[i]) == sizeof(elf_fpreg_t)) return user_regset_copyin(&pos, &count, &kbuf, &ubuf, &target->thread.fpu, @@ -660,12 +683,7 @@ long arch_ptrace(struct task_struct *child, long request, case FPR_BASE ... FPR_BASE + 31: { union fpureg *fregs = get_fpu_regs(child); - if (!tsk_used_math(child)) { - /* FP not yet used */ - memset(&child->thread.fpu, ~0, - sizeof(child->thread.fpu)); - child->thread.fpu.fcr31 = 0; - } + init_fp_ctx(child); #ifdef CONFIG_32BIT if (test_thread_flag(TIF_32BIT_FPREGS)) { /* diff --git a/arch/mips/kernel/r4k_fpu.S b/arch/mips/kernel/r4k_fpu.S index 676c5030a953..1d88af26ba82 100644 --- a/arch/mips/kernel/r4k_fpu.S +++ b/arch/mips/kernel/r4k_fpu.S @@ -34,7 +34,6 @@ .endm .set noreorder - .set MIPS_ISA_ARCH_LEVEL_RAW LEAF(_save_fp_context) .set push @@ -103,6 +102,7 @@ LEAF(_save_fp_context) /* Save 32-bit process floating point context */ LEAF(_save_fp_context32) .set push + .set MIPS_ISA_ARCH_LEVEL_RAW SET_HARDFLOAT cfc1 t1, fcr31 diff --git a/arch/mips/kernel/traps.c b/arch/mips/kernel/traps.c index 33984c04b60b..5b4d711f878d 100644 --- a/arch/mips/kernel/traps.c +++ b/arch/mips/kernel/traps.c @@ -701,6 +701,13 @@ asmlinkage void do_ov(struct pt_regs *regs) int process_fpemu_return(int sig, void __user *fault_addr) { + /* + * We can't allow the emulated instruction to leave any of the cause + * bits set in FCSR. If they were then the kernel would take an FP + * exception when restoring FP context. + */ + current->thread.fpu.fcr31 &= ~FPU_CSR_ALL_X; + if (sig == SIGSEGV || sig == SIGBUS) { struct siginfo si = {0}; si.si_addr = fault_addr; @@ -781,6 +788,11 @@ asmlinkage void do_fpe(struct pt_regs *regs, unsigned long fcr31) if (notify_die(DIE_FP, "FP exception", regs, 0, regs_to_trapnr(regs), SIGFPE) == NOTIFY_STOP) goto out; + + /* Clear FCSR.Cause before enabling interrupts */ + write_32bit_cp1_register(CP1_STATUS, fcr31 & ~FPU_CSR_ALL_X); + local_irq_enable(); + die_if_kernel("FP exception in kernel code", regs); if (fcr31 & FPU_CSR_UNI_X) { @@ -804,18 +816,12 @@ asmlinkage void do_fpe(struct pt_regs *regs, unsigned long fcr31) sig = fpu_emulator_cop1Handler(regs, ¤t->thread.fpu, 1, &fault_addr); - /* - * We can't allow the emulated instruction to leave any of - * the cause bit set in $fcr31. - */ - current->thread.fpu.fcr31 &= ~FPU_CSR_ALL_X; + /* If something went wrong, signal */ + process_fpemu_return(sig, fault_addr); /* Restore the hardware register state */ own_fpu(1); /* Using the FPU again. */ - /* If something went wrong, signal */ - process_fpemu_return(sig, fault_addr); - goto out; } else if (fcr31 & FPU_CSR_INV_X) info.si_code = FPE_FLTINV; @@ -1392,13 +1398,22 @@ out: exception_exit(prev_state); } -asmlinkage void do_msa_fpe(struct pt_regs *regs) +asmlinkage void do_msa_fpe(struct pt_regs *regs, unsigned int msacsr) { enum ctx_state prev_state; prev_state = exception_enter(); + if (notify_die(DIE_MSAFP, "MSA FP exception", regs, 0, + regs_to_trapnr(regs), SIGFPE) == NOTIFY_STOP) + goto out; + + /* Clear MSACSR.Cause before enabling interrupts */ + write_msa_csr(msacsr & ~MSA_CSR_CAUSEF); + local_irq_enable(); + die_if_kernel("do_msa_fpe invoked from kernel context!", regs); force_sig(SIGFPE, current); +out: exception_exit(prev_state); } diff --git a/arch/mips/kvm/Makefile b/arch/mips/kvm/Makefile index 401fe027c261..637ebbebd549 100644 --- a/arch/mips/kvm/Makefile +++ b/arch/mips/kvm/Makefile @@ -1,13 +1,15 @@ # Makefile for KVM support for MIPS # -common-objs = $(addprefix ../../../virt/kvm/, kvm_main.o coalesced_mmio.o) +common-objs-y = $(addprefix ../../../virt/kvm/, kvm_main.o coalesced_mmio.o) EXTRA_CFLAGS += -Ivirt/kvm -Iarch/mips/kvm -kvm-objs := $(common-objs) mips.o emulate.o locore.o \ +common-objs-$(CONFIG_CPU_HAS_MSA) += msa.o + +kvm-objs := $(common-objs-y) mips.o emulate.o locore.o \ interrupt.o stats.o commpage.o \ - dyntrans.o trap_emul.o + dyntrans.o trap_emul.o fpu.o obj-$(CONFIG_KVM) += kvm.o obj-y += callback.o tlb.o diff --git a/arch/mips/kvm/emulate.c b/arch/mips/kvm/emulate.c index fb3e8dfd1ff6..6230f376a44e 100644 --- a/arch/mips/kvm/emulate.c +++ b/arch/mips/kvm/emulate.c @@ -884,6 +884,84 @@ enum emulation_result kvm_mips_emul_tlbp(struct kvm_vcpu *vcpu) return EMULATE_DONE; } +/** + * kvm_mips_config1_wrmask() - Find mask of writable bits in guest Config1 + * @vcpu: Virtual CPU. + * + * Finds the mask of bits which are writable in the guest's Config1 CP0 + * register, by userland (currently read-only to the guest). + */ +unsigned int kvm_mips_config1_wrmask(struct kvm_vcpu *vcpu) +{ + unsigned int mask = 0; + + /* Permit FPU to be present if FPU is supported */ + if (kvm_mips_guest_can_have_fpu(&vcpu->arch)) + mask |= MIPS_CONF1_FP; + + return mask; +} + +/** + * kvm_mips_config3_wrmask() - Find mask of writable bits in guest Config3 + * @vcpu: Virtual CPU. + * + * Finds the mask of bits which are writable in the guest's Config3 CP0 + * register, by userland (currently read-only to the guest). + */ +unsigned int kvm_mips_config3_wrmask(struct kvm_vcpu *vcpu) +{ + /* Config4 is optional */ + unsigned int mask = MIPS_CONF_M; + + /* Permit MSA to be present if MSA is supported */ + if (kvm_mips_guest_can_have_msa(&vcpu->arch)) + mask |= MIPS_CONF3_MSA; + + return mask; +} + +/** + * kvm_mips_config4_wrmask() - Find mask of writable bits in guest Config4 + * @vcpu: Virtual CPU. + * + * Finds the mask of bits which are writable in the guest's Config4 CP0 + * register, by userland (currently read-only to the guest). + */ +unsigned int kvm_mips_config4_wrmask(struct kvm_vcpu *vcpu) +{ + /* Config5 is optional */ + return MIPS_CONF_M; +} + +/** + * kvm_mips_config5_wrmask() - Find mask of writable bits in guest Config5 + * @vcpu: Virtual CPU. + * + * Finds the mask of bits which are writable in the guest's Config5 CP0 + * register, by the guest itself. + */ +unsigned int kvm_mips_config5_wrmask(struct kvm_vcpu *vcpu) +{ + unsigned int mask = 0; + + /* Permit MSAEn changes if MSA supported and enabled */ + if (kvm_mips_guest_has_msa(&vcpu->arch)) + mask |= MIPS_CONF5_MSAEN; + + /* + * Permit guest FPU mode changes if FPU is enabled and the relevant + * feature exists according to FIR register. + */ + if (kvm_mips_guest_has_fpu(&vcpu->arch)) { + if (cpu_has_fre) + mask |= MIPS_CONF5_FRE; + /* We don't support UFR or UFE */ + } + + return mask; +} + enum emulation_result kvm_mips_emulate_CP0(uint32_t inst, uint32_t *opc, uint32_t cause, struct kvm_run *run, struct kvm_vcpu *vcpu) @@ -1021,18 +1099,114 @@ enum emulation_result kvm_mips_emulate_CP0(uint32_t inst, uint32_t *opc, kvm_mips_write_compare(vcpu, vcpu->arch.gprs[rt]); } else if ((rd == MIPS_CP0_STATUS) && (sel == 0)) { - kvm_write_c0_guest_status(cop0, - vcpu->arch.gprs[rt]); + unsigned int old_val, val, change; + + old_val = kvm_read_c0_guest_status(cop0); + val = vcpu->arch.gprs[rt]; + change = val ^ old_val; + + /* Make sure that the NMI bit is never set */ + val &= ~ST0_NMI; + + /* + * Don't allow CU1 or FR to be set unless FPU + * capability enabled and exists in guest + * configuration. + */ + if (!kvm_mips_guest_has_fpu(&vcpu->arch)) + val &= ~(ST0_CU1 | ST0_FR); + + /* + * Also don't allow FR to be set if host doesn't + * support it. + */ + if (!(current_cpu_data.fpu_id & MIPS_FPIR_F64)) + val &= ~ST0_FR; + + + /* Handle changes in FPU mode */ + preempt_disable(); + + /* + * FPU and Vector register state is made + * UNPREDICTABLE by a change of FR, so don't + * even bother saving it. + */ + if (change & ST0_FR) + kvm_drop_fpu(vcpu); + + /* + * If MSA state is already live, it is undefined + * how it interacts with FR=0 FPU state, and we + * don't want to hit reserved instruction + * exceptions trying to save the MSA state later + * when CU=1 && FR=1, so play it safe and save + * it first. + */ + if (change & ST0_CU1 && !(val & ST0_FR) && + vcpu->arch.fpu_inuse & KVM_MIPS_FPU_MSA) + kvm_lose_fpu(vcpu); + /* - * Make sure that CU1 and NMI bits are - * never set + * Propagate CU1 (FPU enable) changes + * immediately if the FPU context is already + * loaded. When disabling we leave the context + * loaded so it can be quickly enabled again in + * the near future. */ - kvm_clear_c0_guest_status(cop0, - (ST0_CU1 | ST0_NMI)); + if (change & ST0_CU1 && + vcpu->arch.fpu_inuse & KVM_MIPS_FPU_FPU) + change_c0_status(ST0_CU1, val); + + preempt_enable(); + + kvm_write_c0_guest_status(cop0, val); #ifdef CONFIG_KVM_MIPS_DYN_TRANS - kvm_mips_trans_mtc0(inst, opc, vcpu); + /* + * If FPU present, we need CU1/FR bits to take + * effect fairly soon. + */ + if (!kvm_mips_guest_has_fpu(&vcpu->arch)) + kvm_mips_trans_mtc0(inst, opc, vcpu); #endif + } else if ((rd == MIPS_CP0_CONFIG) && (sel == 5)) { + unsigned int old_val, val, change, wrmask; + + old_val = kvm_read_c0_guest_config5(cop0); + val = vcpu->arch.gprs[rt]; + + /* Only a few bits are writable in Config5 */ + wrmask = kvm_mips_config5_wrmask(vcpu); + change = (val ^ old_val) & wrmask; + val = old_val ^ change; + + + /* Handle changes in FPU/MSA modes */ + preempt_disable(); + + /* + * Propagate FRE changes immediately if the FPU + * context is already loaded. + */ + if (change & MIPS_CONF5_FRE && + vcpu->arch.fpu_inuse & KVM_MIPS_FPU_FPU) + change_c0_config5(MIPS_CONF5_FRE, val); + + /* + * Propagate MSAEn changes immediately if the + * MSA context is already loaded. When disabling + * we leave the context loaded so it can be + * quickly enabled again in the near future. + */ + if (change & MIPS_CONF5_MSAEN && + vcpu->arch.fpu_inuse & KVM_MIPS_FPU_MSA) + change_c0_config5(MIPS_CONF5_MSAEN, + val); + + preempt_enable(); + + kvm_write_c0_guest_config5(cop0, val); } else if ((rd == MIPS_CP0_CAUSE) && (sel == 0)) { uint32_t old_cause, new_cause; @@ -1970,6 +2144,146 @@ enum emulation_result kvm_mips_emulate_bp_exc(unsigned long cause, return er; } +enum emulation_result kvm_mips_emulate_trap_exc(unsigned long cause, + uint32_t *opc, + struct kvm_run *run, + struct kvm_vcpu *vcpu) +{ + struct mips_coproc *cop0 = vcpu->arch.cop0; + struct kvm_vcpu_arch *arch = &vcpu->arch; + enum emulation_result er = EMULATE_DONE; + + if ((kvm_read_c0_guest_status(cop0) & ST0_EXL) == 0) { + /* save old pc */ + kvm_write_c0_guest_epc(cop0, arch->pc); + kvm_set_c0_guest_status(cop0, ST0_EXL); + + if (cause & CAUSEF_BD) + kvm_set_c0_guest_cause(cop0, CAUSEF_BD); + else + kvm_clear_c0_guest_cause(cop0, CAUSEF_BD); + + kvm_debug("Delivering TRAP @ pc %#lx\n", arch->pc); + + kvm_change_c0_guest_cause(cop0, (0xff), + (T_TRAP << CAUSEB_EXCCODE)); + + /* Set PC to the exception entry point */ + arch->pc = KVM_GUEST_KSEG0 + 0x180; + + } else { + kvm_err("Trying to deliver TRAP when EXL is already set\n"); + er = EMULATE_FAIL; + } + + return er; +} + +enum emulation_result kvm_mips_emulate_msafpe_exc(unsigned long cause, + uint32_t *opc, + struct kvm_run *run, + struct kvm_vcpu *vcpu) +{ + struct mips_coproc *cop0 = vcpu->arch.cop0; + struct kvm_vcpu_arch *arch = &vcpu->arch; + enum emulation_result er = EMULATE_DONE; + + if ((kvm_read_c0_guest_status(cop0) & ST0_EXL) == 0) { + /* save old pc */ + kvm_write_c0_guest_epc(cop0, arch->pc); + kvm_set_c0_guest_status(cop0, ST0_EXL); + + if (cause & CAUSEF_BD) + kvm_set_c0_guest_cause(cop0, CAUSEF_BD); + else + kvm_clear_c0_guest_cause(cop0, CAUSEF_BD); + + kvm_debug("Delivering MSAFPE @ pc %#lx\n", arch->pc); + + kvm_change_c0_guest_cause(cop0, (0xff), + (T_MSAFPE << CAUSEB_EXCCODE)); + + /* Set PC to the exception entry point */ + arch->pc = KVM_GUEST_KSEG0 + 0x180; + + } else { + kvm_err("Trying to deliver MSAFPE when EXL is already set\n"); + er = EMULATE_FAIL; + } + + return er; +} + +enum emulation_result kvm_mips_emulate_fpe_exc(unsigned long cause, + uint32_t *opc, + struct kvm_run *run, + struct kvm_vcpu *vcpu) +{ + struct mips_coproc *cop0 = vcpu->arch.cop0; + struct kvm_vcpu_arch *arch = &vcpu->arch; + enum emulation_result er = EMULATE_DONE; + + if ((kvm_read_c0_guest_status(cop0) & ST0_EXL) == 0) { + /* save old pc */ + kvm_write_c0_guest_epc(cop0, arch->pc); + kvm_set_c0_guest_status(cop0, ST0_EXL); + + if (cause & CAUSEF_BD) + kvm_set_c0_guest_cause(cop0, CAUSEF_BD); + else + kvm_clear_c0_guest_cause(cop0, CAUSEF_BD); + + kvm_debug("Delivering FPE @ pc %#lx\n", arch->pc); + + kvm_change_c0_guest_cause(cop0, (0xff), + (T_FPE << CAUSEB_EXCCODE)); + + /* Set PC to the exception entry point */ + arch->pc = KVM_GUEST_KSEG0 + 0x180; + + } else { + kvm_err("Trying to deliver FPE when EXL is already set\n"); + er = EMULATE_FAIL; + } + + return er; +} + +enum emulation_result kvm_mips_emulate_msadis_exc(unsigned long cause, + uint32_t *opc, + struct kvm_run *run, + struct kvm_vcpu *vcpu) +{ + struct mips_coproc *cop0 = vcpu->arch.cop0; + struct kvm_vcpu_arch *arch = &vcpu->arch; + enum emulation_result er = EMULATE_DONE; + + if ((kvm_read_c0_guest_status(cop0) & ST0_EXL) == 0) { + /* save old pc */ + kvm_write_c0_guest_epc(cop0, arch->pc); + kvm_set_c0_guest_status(cop0, ST0_EXL); + + if (cause & CAUSEF_BD) + kvm_set_c0_guest_cause(cop0, CAUSEF_BD); + else + kvm_clear_c0_guest_cause(cop0, CAUSEF_BD); + + kvm_debug("Delivering MSADIS @ pc %#lx\n", arch->pc); + + kvm_change_c0_guest_cause(cop0, (0xff), + (T_MSADIS << CAUSEB_EXCCODE)); + + /* Set PC to the exception entry point */ + arch->pc = KVM_GUEST_KSEG0 + 0x180; + + } else { + kvm_err("Trying to deliver MSADIS when EXL is already set\n"); + er = EMULATE_FAIL; + } + + return er; +} + /* ll/sc, rdhwr, sync emulation */ #define OPCODE 0xfc000000 @@ -2176,6 +2490,10 @@ enum emulation_result kvm_mips_check_privilege(unsigned long cause, case T_SYSCALL: case T_BREAK: case T_RES_INST: + case T_TRAP: + case T_MSAFPE: + case T_FPE: + case T_MSADIS: break; case T_COP_UNUSABLE: diff --git a/arch/mips/kvm/fpu.S b/arch/mips/kvm/fpu.S new file mode 100644 index 000000000000..531fbf5131c0 --- /dev/null +++ b/arch/mips/kvm/fpu.S @@ -0,0 +1,122 @@ +/* + * This file is subject to the terms and conditions of the GNU General Public + * License. See the file "COPYING" in the main directory of this archive + * for more details. + * + * FPU context handling code for KVM. + * + * Copyright (C) 2015 Imagination Technologies Ltd. + */ + +#include <asm/asm.h> +#include <asm/asm-offsets.h> +#include <asm/fpregdef.h> +#include <asm/mipsregs.h> +#include <asm/regdef.h> + + .set noreorder + .set noat + +LEAF(__kvm_save_fpu) + .set push + .set mips64r2 + SET_HARDFLOAT + mfc0 t0, CP0_STATUS + sll t0, t0, 5 # is Status.FR set? + bgez t0, 1f # no: skip odd doubles + nop + sdc1 $f1, VCPU_FPR1(a0) + sdc1 $f3, VCPU_FPR3(a0) + sdc1 $f5, VCPU_FPR5(a0) + sdc1 $f7, VCPU_FPR7(a0) + sdc1 $f9, VCPU_FPR9(a0) + sdc1 $f11, VCPU_FPR11(a0) + sdc1 $f13, VCPU_FPR13(a0) + sdc1 $f15, VCPU_FPR15(a0) + sdc1 $f17, VCPU_FPR17(a0) + sdc1 $f19, VCPU_FPR19(a0) + sdc1 $f21, VCPU_FPR21(a0) + sdc1 $f23, VCPU_FPR23(a0) + sdc1 $f25, VCPU_FPR25(a0) + sdc1 $f27, VCPU_FPR27(a0) + sdc1 $f29, VCPU_FPR29(a0) + sdc1 $f31, VCPU_FPR31(a0) +1: sdc1 $f0, VCPU_FPR0(a0) + sdc1 $f2, VCPU_FPR2(a0) + sdc1 $f4, VCPU_FPR4(a0) + sdc1 $f6, VCPU_FPR6(a0) + sdc1 $f8, VCPU_FPR8(a0) + sdc1 $f10, VCPU_FPR10(a0) + sdc1 $f12, VCPU_FPR12(a0) + sdc1 $f14, VCPU_FPR14(a0) + sdc1 $f16, VCPU_FPR16(a0) + sdc1 $f18, VCPU_FPR18(a0) + sdc1 $f20, VCPU_FPR20(a0) + sdc1 $f22, VCPU_FPR22(a0) + sdc1 $f24, VCPU_FPR24(a0) + sdc1 $f26, VCPU_FPR26(a0) + sdc1 $f28, VCPU_FPR28(a0) + jr ra + sdc1 $f30, VCPU_FPR30(a0) + .set pop + END(__kvm_save_fpu) + +LEAF(__kvm_restore_fpu) + .set push + .set mips64r2 + SET_HARDFLOAT + mfc0 t0, CP0_STATUS + sll t0, t0, 5 # is Status.FR set? + bgez t0, 1f # no: skip odd doubles + nop + ldc1 $f1, VCPU_FPR1(a0) + ldc1 $f3, VCPU_FPR3(a0) + ldc1 $f5, VCPU_FPR5(a0) + ldc1 $f7, VCPU_FPR7(a0) + ldc1 $f9, VCPU_FPR9(a0) + ldc1 $f11, VCPU_FPR11(a0) + ldc1 $f13, VCPU_FPR13(a0) + ldc1 $f15, VCPU_FPR15(a0) + ldc1 $f17, VCPU_FPR17(a0) + ldc1 $f19, VCPU_FPR19(a0) + ldc1 $f21, VCPU_FPR21(a0) + ldc1 $f23, VCPU_FPR23(a0) + ldc1 $f25, VCPU_FPR25(a0) + ldc1 $f27, VCPU_FPR27(a0) + ldc1 $f29, VCPU_FPR29(a0) + ldc1 $f31, VCPU_FPR31(a0) +1: ldc1 $f0, VCPU_FPR0(a0) + ldc1 $f2, VCPU_FPR2(a0) + ldc1 $f4, VCPU_FPR4(a0) + ldc1 $f6, VCPU_FPR6(a0) + ldc1 $f8, VCPU_FPR8(a0) + ldc1 $f10, VCPU_FPR10(a0) + ldc1 $f12, VCPU_FPR12(a0) + ldc1 $f14, VCPU_FPR14(a0) + ldc1 $f16, VCPU_FPR16(a0) + ldc1 $f18, VCPU_FPR18(a0) + ldc1 $f20, VCPU_FPR20(a0) + ldc1 $f22, VCPU_FPR22(a0) + ldc1 $f24, VCPU_FPR24(a0) + ldc1 $f26, VCPU_FPR26(a0) + ldc1 $f28, VCPU_FPR28(a0) + jr ra + ldc1 $f30, VCPU_FPR30(a0) + .set pop + END(__kvm_restore_fpu) + +LEAF(__kvm_restore_fcsr) + .set push + SET_HARDFLOAT + lw t0, VCPU_FCR31(a0) + /* + * The ctc1 must stay at this offset in __kvm_restore_fcsr. + * See kvm_mips_csr_die_notify() which handles t0 containing a value + * which triggers an FP Exception, which must be stepped over and + * ignored since the set cause bits must remain there for the guest. + */ + ctc1 t0, fcr31 + jr ra + nop + .set pop + END(__kvm_restore_fcsr) diff --git a/arch/mips/kvm/locore.S b/arch/mips/kvm/locore.S index 4a68b176d6e4..c567240386a0 100644 --- a/arch/mips/kvm/locore.S +++ b/arch/mips/kvm/locore.S @@ -36,6 +36,8 @@ #define PT_HOST_USERLOCAL PT_EPC #define CP0_DDATA_LO $28,3 +#define CP0_CONFIG3 $16,3 +#define CP0_CONFIG5 $16,5 #define CP0_EBASE $15,1 #define CP0_INTCTL $12,1 @@ -353,6 +355,42 @@ NESTED (MIPSX(GuestException), CALLFRAME_SIZ, ra) LONG_L k0, VCPU_HOST_EBASE(k1) mtc0 k0,CP0_EBASE + /* + * If FPU is enabled, save FCR31 and clear it so that later ctc1's don't + * trigger FPE for pending exceptions. + */ + .set at + and v1, v0, ST0_CU1 + beqz v1, 1f + nop + .set push + SET_HARDFLOAT + cfc1 t0, fcr31 + sw t0, VCPU_FCR31(k1) + ctc1 zero,fcr31 + .set pop + .set noat +1: + +#ifdef CONFIG_CPU_HAS_MSA + /* + * If MSA is enabled, save MSACSR and clear it so that later + * instructions don't trigger MSAFPE for pending exceptions. + */ + mfc0 t0, CP0_CONFIG3 + ext t0, t0, 28, 1 /* MIPS_CONF3_MSAP */ + beqz t0, 1f + nop + mfc0 t0, CP0_CONFIG5 + ext t0, t0, 27, 1 /* MIPS_CONF5_MSAEN */ + beqz t0, 1f + nop + _cfcmsa t0, MSA_CSR + sw t0, VCPU_MSA_CSR(k1) + _ctcmsa MSA_CSR, zero +1: +#endif + /* Now that the new EBASE has been loaded, unset BEV and KSU_USER */ .set at and v0, v0, ~(ST0_EXL | KSU_USER | ST0_IE) diff --git a/arch/mips/kvm/mips.c b/arch/mips/kvm/mips.c index c9eccf5df912..bb68e8d520e8 100644 --- a/arch/mips/kvm/mips.c +++ b/arch/mips/kvm/mips.c @@ -11,6 +11,7 @@ #include <linux/errno.h> #include <linux/err.h> +#include <linux/kdebug.h> #include <linux/module.h> #include <linux/vmalloc.h> #include <linux/fs.h> @@ -48,6 +49,10 @@ struct kvm_stats_debugfs_item debugfs_entries[] = { { "syscall", VCPU_STAT(syscall_exits), KVM_STAT_VCPU }, { "resvd_inst", VCPU_STAT(resvd_inst_exits), KVM_STAT_VCPU }, { "break_inst", VCPU_STAT(break_inst_exits), KVM_STAT_VCPU }, + { "trap_inst", VCPU_STAT(trap_inst_exits), KVM_STAT_VCPU }, + { "msa_fpe", VCPU_STAT(msa_fpe_exits), KVM_STAT_VCPU }, + { "fpe", VCPU_STAT(fpe_exits), KVM_STAT_VCPU }, + { "msa_disabled", VCPU_STAT(msa_disabled_exits), KVM_STAT_VCPU }, { "flush_dcache", VCPU_STAT(flush_dcache_exits), KVM_STAT_VCPU }, { "halt_successful_poll", VCPU_STAT(halt_successful_poll), KVM_STAT_VCPU }, { "halt_wakeup", VCPU_STAT(halt_wakeup), KVM_STAT_VCPU }, @@ -504,10 +509,13 @@ static u64 kvm_mips_get_one_regs[] = { KVM_REG_MIPS_CP0_STATUS, KVM_REG_MIPS_CP0_CAUSE, KVM_REG_MIPS_CP0_EPC, + KVM_REG_MIPS_CP0_PRID, KVM_REG_MIPS_CP0_CONFIG, KVM_REG_MIPS_CP0_CONFIG1, KVM_REG_MIPS_CP0_CONFIG2, KVM_REG_MIPS_CP0_CONFIG3, + KVM_REG_MIPS_CP0_CONFIG4, + KVM_REG_MIPS_CP0_CONFIG5, KVM_REG_MIPS_CP0_CONFIG7, KVM_REG_MIPS_CP0_ERROREPC, @@ -520,10 +528,14 @@ static int kvm_mips_get_reg(struct kvm_vcpu *vcpu, const struct kvm_one_reg *reg) { struct mips_coproc *cop0 = vcpu->arch.cop0; + struct mips_fpu_struct *fpu = &vcpu->arch.fpu; int ret; s64 v; + s64 vs[2]; + unsigned int idx; switch (reg->id) { + /* General purpose registers */ case KVM_REG_MIPS_R0 ... KVM_REG_MIPS_R31: v = (long)vcpu->arch.gprs[reg->id - KVM_REG_MIPS_R0]; break; @@ -537,6 +549,67 @@ static int kvm_mips_get_reg(struct kvm_vcpu *vcpu, v = (long)vcpu->arch.pc; break; + /* Floating point registers */ + case KVM_REG_MIPS_FPR_32(0) ... KVM_REG_MIPS_FPR_32(31): + if (!kvm_mips_guest_has_fpu(&vcpu->arch)) + return -EINVAL; + idx = reg->id - KVM_REG_MIPS_FPR_32(0); + /* Odd singles in top of even double when FR=0 */ + if (kvm_read_c0_guest_status(cop0) & ST0_FR) + v = get_fpr32(&fpu->fpr[idx], 0); + else + v = get_fpr32(&fpu->fpr[idx & ~1], idx & 1); + break; + case KVM_REG_MIPS_FPR_64(0) ... KVM_REG_MIPS_FPR_64(31): + if (!kvm_mips_guest_has_fpu(&vcpu->arch)) + return -EINVAL; + idx = reg->id - KVM_REG_MIPS_FPR_64(0); + /* Can't access odd doubles in FR=0 mode */ + if (idx & 1 && !(kvm_read_c0_guest_status(cop0) & ST0_FR)) + return -EINVAL; + v = get_fpr64(&fpu->fpr[idx], 0); + break; + case KVM_REG_MIPS_FCR_IR: + if (!kvm_mips_guest_has_fpu(&vcpu->arch)) + return -EINVAL; + v = boot_cpu_data.fpu_id; + break; + case KVM_REG_MIPS_FCR_CSR: + if (!kvm_mips_guest_has_fpu(&vcpu->arch)) + return -EINVAL; + v = fpu->fcr31; + break; + + /* MIPS SIMD Architecture (MSA) registers */ + case KVM_REG_MIPS_VEC_128(0) ... KVM_REG_MIPS_VEC_128(31): + if (!kvm_mips_guest_has_msa(&vcpu->arch)) + return -EINVAL; + /* Can't access MSA registers in FR=0 mode */ + if (!(kvm_read_c0_guest_status(cop0) & ST0_FR)) + return -EINVAL; + idx = reg->id - KVM_REG_MIPS_VEC_128(0); +#ifdef CONFIG_CPU_LITTLE_ENDIAN + /* least significant byte first */ + vs[0] = get_fpr64(&fpu->fpr[idx], 0); + vs[1] = get_fpr64(&fpu->fpr[idx], 1); +#else + /* most significant byte first */ + vs[0] = get_fpr64(&fpu->fpr[idx], 1); + vs[1] = get_fpr64(&fpu->fpr[idx], 0); +#endif + break; + case KVM_REG_MIPS_MSA_IR: + if (!kvm_mips_guest_has_msa(&vcpu->arch)) + return -EINVAL; + v = boot_cpu_data.msa_id; + break; + case KVM_REG_MIPS_MSA_CSR: + if (!kvm_mips_guest_has_msa(&vcpu->arch)) + return -EINVAL; + v = fpu->msacsr; + break; + + /* Co-processor 0 registers */ case KVM_REG_MIPS_CP0_INDEX: v = (long)kvm_read_c0_guest_index(cop0); break; @@ -573,8 +646,8 @@ static int kvm_mips_get_reg(struct kvm_vcpu *vcpu, case KVM_REG_MIPS_CP0_EPC: v = (long)kvm_read_c0_guest_epc(cop0); break; - case KVM_REG_MIPS_CP0_ERROREPC: - v = (long)kvm_read_c0_guest_errorepc(cop0); + case KVM_REG_MIPS_CP0_PRID: + v = (long)kvm_read_c0_guest_prid(cop0); break; case KVM_REG_MIPS_CP0_CONFIG: v = (long)kvm_read_c0_guest_config(cop0); @@ -588,9 +661,18 @@ static int kvm_mips_get_reg(struct kvm_vcpu *vcpu, case KVM_REG_MIPS_CP0_CONFIG3: v = (long)kvm_read_c0_guest_config3(cop0); break; + case KVM_REG_MIPS_CP0_CONFIG4: + v = (long)kvm_read_c0_guest_config4(cop0); + break; + case KVM_REG_MIPS_CP0_CONFIG5: + v = (long)kvm_read_c0_guest_config5(cop0); + break; case KVM_REG_MIPS_CP0_CONFIG7: v = (long)kvm_read_c0_guest_config7(cop0); break; + case KVM_REG_MIPS_CP0_ERROREPC: + v = (long)kvm_read_c0_guest_errorepc(cop0); + break; /* registers to be handled specially */ case KVM_REG_MIPS_CP0_COUNT: case KVM_REG_MIPS_COUNT_CTL: @@ -612,6 +694,10 @@ static int kvm_mips_get_reg(struct kvm_vcpu *vcpu, u32 v32 = (u32)v; return put_user(v32, uaddr32); + } else if ((reg->id & KVM_REG_SIZE_MASK) == KVM_REG_SIZE_U128) { + void __user *uaddr = (void __user *)(long)reg->addr; + + return copy_to_user(uaddr, vs, 16); } else { return -EINVAL; } @@ -621,7 +707,10 @@ static int kvm_mips_set_reg(struct kvm_vcpu *vcpu, const struct kvm_one_reg *reg) { struct mips_coproc *cop0 = vcpu->arch.cop0; - u64 v; + struct mips_fpu_struct *fpu = &vcpu->arch.fpu; + s64 v; + s64 vs[2]; + unsigned int idx; if ((reg->id & KVM_REG_SIZE_MASK) == KVM_REG_SIZE_U64) { u64 __user *uaddr64 = (u64 __user *)(long)reg->addr; @@ -635,11 +724,16 @@ static int kvm_mips_set_reg(struct kvm_vcpu *vcpu, if (get_user(v32, uaddr32) != 0) return -EFAULT; v = (s64)v32; + } else if ((reg->id & KVM_REG_SIZE_MASK) == KVM_REG_SIZE_U128) { + void __user *uaddr = (void __user *)(long)reg->addr; + + return copy_from_user(vs, uaddr, 16); } else { return -EINVAL; } switch (reg->id) { + /* General purpose registers */ case KVM_REG_MIPS_R0: /* Silently ignore requests to set $0 */ break; @@ -656,6 +750,64 @@ static int kvm_mips_set_reg(struct kvm_vcpu *vcpu, vcpu->arch.pc = v; break; + /* Floating point registers */ + case KVM_REG_MIPS_FPR_32(0) ... KVM_REG_MIPS_FPR_32(31): + if (!kvm_mips_guest_has_fpu(&vcpu->arch)) + return -EINVAL; + idx = reg->id - KVM_REG_MIPS_FPR_32(0); + /* Odd singles in top of even double when FR=0 */ + if (kvm_read_c0_guest_status(cop0) & ST0_FR) + set_fpr32(&fpu->fpr[idx], 0, v); + else + set_fpr32(&fpu->fpr[idx & ~1], idx & 1, v); + break; + case KVM_REG_MIPS_FPR_64(0) ... KVM_REG_MIPS_FPR_64(31): + if (!kvm_mips_guest_has_fpu(&vcpu->arch)) + return -EINVAL; + idx = reg->id - KVM_REG_MIPS_FPR_64(0); + /* Can't access odd doubles in FR=0 mode */ + if (idx & 1 && !(kvm_read_c0_guest_status(cop0) & ST0_FR)) + return -EINVAL; + set_fpr64(&fpu->fpr[idx], 0, v); + break; + case KVM_REG_MIPS_FCR_IR: + if (!kvm_mips_guest_has_fpu(&vcpu->arch)) + return -EINVAL; + /* Read-only */ + break; + case KVM_REG_MIPS_FCR_CSR: + if (!kvm_mips_guest_has_fpu(&vcpu->arch)) + return -EINVAL; + fpu->fcr31 = v; + break; + + /* MIPS SIMD Architecture (MSA) registers */ + case KVM_REG_MIPS_VEC_128(0) ... KVM_REG_MIPS_VEC_128(31): + if (!kvm_mips_guest_has_msa(&vcpu->arch)) + return -EINVAL; + idx = reg->id - KVM_REG_MIPS_VEC_128(0); +#ifdef CONFIG_CPU_LITTLE_ENDIAN + /* least significant byte first */ + set_fpr64(&fpu->fpr[idx], 0, vs[0]); + set_fpr64(&fpu->fpr[idx], 1, vs[1]); +#else + /* most significant byte first */ + set_fpr64(&fpu->fpr[idx], 1, vs[0]); + set_fpr64(&fpu->fpr[idx], 0, vs[1]); +#endif + break; + case KVM_REG_MIPS_MSA_IR: + if (!kvm_mips_guest_has_msa(&vcpu->arch)) + return -EINVAL; + /* Read-only */ + break; + case KVM_REG_MIPS_MSA_CSR: + if (!kvm_mips_guest_has_msa(&vcpu->arch)) + return -EINVAL; + fpu->msacsr = v; + break; + + /* Co-processor 0 registers */ case KVM_REG_MIPS_CP0_INDEX: kvm_write_c0_guest_index(cop0, v); break; @@ -686,6 +838,9 @@ static int kvm_mips_set_reg(struct kvm_vcpu *vcpu, case KVM_REG_MIPS_CP0_EPC: kvm_write_c0_guest_epc(cop0, v); break; + case KVM_REG_MIPS_CP0_PRID: + kvm_write_c0_guest_prid(cop0, v); + break; case KVM_REG_MIPS_CP0_ERROREPC: kvm_write_c0_guest_errorepc(cop0, v); break; @@ -693,6 +848,12 @@ static int kvm_mips_set_reg(struct kvm_vcpu *vcpu, case KVM_REG_MIPS_CP0_COUNT: case KVM_REG_MIPS_CP0_COMPARE: case KVM_REG_MIPS_CP0_CAUSE: + case KVM_REG_MIPS_CP0_CONFIG: + case KVM_REG_MIPS_CP0_CONFIG1: + case KVM_REG_MIPS_CP0_CONFIG2: + case KVM_REG_MIPS_CP0_CONFIG3: + case KVM_REG_MIPS_CP0_CONFIG4: + case KVM_REG_MIPS_CP0_CONFIG5: case KVM_REG_MIPS_COUNT_CTL: case KVM_REG_MIPS_COUNT_RESUME: case KVM_REG_MIPS_COUNT_HZ: @@ -703,6 +864,33 @@ static int kvm_mips_set_reg(struct kvm_vcpu *vcpu, return 0; } +static int kvm_vcpu_ioctl_enable_cap(struct kvm_vcpu *vcpu, + struct kvm_enable_cap *cap) +{ + int r = 0; + + if (!kvm_vm_ioctl_check_extension(vcpu->kvm, cap->cap)) + return -EINVAL; + if (cap->flags) + return -EINVAL; + if (cap->args[0]) + return -EINVAL; + + switch (cap->cap) { + case KVM_CAP_MIPS_FPU: + vcpu->arch.fpu_enabled = true; + break; + case KVM_CAP_MIPS_MSA: + vcpu->arch.msa_enabled = true; + break; + default: + r = -EINVAL; + break; + } + + return r; +} + long kvm_arch_vcpu_ioctl(struct file *filp, unsigned int ioctl, unsigned long arg) { @@ -760,6 +948,15 @@ long kvm_arch_vcpu_ioctl(struct file *filp, unsigned int ioctl, r = kvm_vcpu_ioctl_interrupt(vcpu, &irq); break; } + case KVM_ENABLE_CAP: { + struct kvm_enable_cap cap; + + r = -EFAULT; + if (copy_from_user(&cap, argp, sizeof(cap))) + goto out; + r = kvm_vcpu_ioctl_enable_cap(vcpu, &cap); + break; + } default: r = -ENOIOCTLCMD; } @@ -868,11 +1065,30 @@ int kvm_vm_ioctl_check_extension(struct kvm *kvm, long ext) switch (ext) { case KVM_CAP_ONE_REG: + case KVM_CAP_ENABLE_CAP: r = 1; break; case KVM_CAP_COALESCED_MMIO: r = KVM_COALESCED_MMIO_PAGE_OFFSET; break; + case KVM_CAP_MIPS_FPU: + r = !!cpu_has_fpu; + break; + case KVM_CAP_MIPS_MSA: + /* + * We don't support MSA vector partitioning yet: + * 1) It would require explicit support which can't be tested + * yet due to lack of support in current hardware. + * 2) It extends the state that would need to be saved/restored + * by e.g. QEMU for migration. + * + * When vector partitioning hardware becomes available, support + * could be added by requiring a flag when enabling + * KVM_CAP_MIPS_MSA capability to indicate that userland knows + * to save/restore the appropriate extra state. + */ + r = cpu_has_msa && !(boot_cpu_data.msa_id & MSA_IR_WRPF); + break; default: r = 0; break; @@ -1119,6 +1335,30 @@ int kvm_mips_handle_exit(struct kvm_run *run, struct kvm_vcpu *vcpu) ret = kvm_mips_callbacks->handle_break(vcpu); break; + case T_TRAP: + ++vcpu->stat.trap_inst_exits; + trace_kvm_exit(vcpu, TRAP_INST_EXITS); + ret = kvm_mips_callbacks->handle_trap(vcpu); + break; + + case T_MSAFPE: + ++vcpu->stat.msa_fpe_exits; + trace_kvm_exit(vcpu, MSA_FPE_EXITS); + ret = kvm_mips_callbacks->handle_msa_fpe(vcpu); + break; + + case T_FPE: + ++vcpu->stat.fpe_exits; + trace_kvm_exit(vcpu, FPE_EXITS); + ret = kvm_mips_callbacks->handle_fpe(vcpu); + break; + + case T_MSADIS: + ++vcpu->stat.msa_disabled_exits; + trace_kvm_exit(vcpu, MSA_DISABLED_EXITS); + ret = kvm_mips_callbacks->handle_msa_disabled(vcpu); + break; + default: kvm_err("Exception Code: %d, not yet handled, @ PC: %p, inst: 0x%08x BadVaddr: %#lx Status: %#lx\n", exccode, opc, kvm_get_inst(opc, vcpu), badvaddr, @@ -1146,12 +1386,233 @@ skip_emul: } } + if (ret == RESUME_GUEST) { + /* + * If FPU / MSA are enabled (i.e. the guest's FPU / MSA context + * is live), restore FCR31 / MSACSR. + * + * This should be before returning to the guest exception + * vector, as it may well cause an [MSA] FP exception if there + * are pending exception bits unmasked. (see + * kvm_mips_csr_die_notifier() for how that is handled). + */ + if (kvm_mips_guest_has_fpu(&vcpu->arch) && + read_c0_status() & ST0_CU1) + __kvm_restore_fcsr(&vcpu->arch); + + if (kvm_mips_guest_has_msa(&vcpu->arch) && + read_c0_config5() & MIPS_CONF5_MSAEN) + __kvm_restore_msacsr(&vcpu->arch); + } + /* Disable HTW before returning to guest or host */ htw_stop(); return ret; } +/* Enable FPU for guest and restore context */ +void kvm_own_fpu(struct kvm_vcpu *vcpu) +{ + struct mips_coproc *cop0 = vcpu->arch.cop0; + unsigned int sr, cfg5; + + preempt_disable(); + + sr = kvm_read_c0_guest_status(cop0); + + /* + * If MSA state is already live, it is undefined how it interacts with + * FR=0 FPU state, and we don't want to hit reserved instruction + * exceptions trying to save the MSA state later when CU=1 && FR=1, so + * play it safe and save it first. + * + * In theory we shouldn't ever hit this case since kvm_lose_fpu() should + * get called when guest CU1 is set, however we can't trust the guest + * not to clobber the status register directly via the commpage. + */ + if (cpu_has_msa && sr & ST0_CU1 && !(sr & ST0_FR) && + vcpu->arch.fpu_inuse & KVM_MIPS_FPU_MSA) + kvm_lose_fpu(vcpu); + + /* + * Enable FPU for guest + * We set FR and FRE according to guest context + */ + change_c0_status(ST0_CU1 | ST0_FR, sr); + if (cpu_has_fre) { + cfg5 = kvm_read_c0_guest_config5(cop0); + change_c0_config5(MIPS_CONF5_FRE, cfg5); + } + enable_fpu_hazard(); + + /* If guest FPU state not active, restore it now */ + if (!(vcpu->arch.fpu_inuse & KVM_MIPS_FPU_FPU)) { + __kvm_restore_fpu(&vcpu->arch); + vcpu->arch.fpu_inuse |= KVM_MIPS_FPU_FPU; + } + + preempt_enable(); +} + +#ifdef CONFIG_CPU_HAS_MSA +/* Enable MSA for guest and restore context */ +void kvm_own_msa(struct kvm_vcpu *vcpu) +{ + struct mips_coproc *cop0 = vcpu->arch.cop0; + unsigned int sr, cfg5; + + preempt_disable(); + + /* + * Enable FPU if enabled in guest, since we're restoring FPU context + * anyway. We set FR and FRE according to guest context. + */ + if (kvm_mips_guest_has_fpu(&vcpu->arch)) { + sr = kvm_read_c0_guest_status(cop0); + + /* + * If FR=0 FPU state is already live, it is undefined how it + * interacts with MSA state, so play it safe and save it first. + */ + if (!(sr & ST0_FR) && + (vcpu->arch.fpu_inuse & (KVM_MIPS_FPU_FPU | + KVM_MIPS_FPU_MSA)) == KVM_MIPS_FPU_FPU) + kvm_lose_fpu(vcpu); + + change_c0_status(ST0_CU1 | ST0_FR, sr); + if (sr & ST0_CU1 && cpu_has_fre) { + cfg5 = kvm_read_c0_guest_config5(cop0); + change_c0_config5(MIPS_CONF5_FRE, cfg5); + } + } + + /* Enable MSA for guest */ + set_c0_config5(MIPS_CONF5_MSAEN); + enable_fpu_hazard(); + + switch (vcpu->arch.fpu_inuse & (KVM_MIPS_FPU_FPU | KVM_MIPS_FPU_MSA)) { + case KVM_MIPS_FPU_FPU: + /* + * Guest FPU state already loaded, only restore upper MSA state + */ + __kvm_restore_msa_upper(&vcpu->arch); + vcpu->arch.fpu_inuse |= KVM_MIPS_FPU_MSA; + break; + case 0: + /* Neither FPU or MSA already active, restore full MSA state */ + __kvm_restore_msa(&vcpu->arch); + vcpu->arch.fpu_inuse |= KVM_MIPS_FPU_MSA; + if (kvm_mips_guest_has_fpu(&vcpu->arch)) + vcpu->arch.fpu_inuse |= KVM_MIPS_FPU_FPU; + break; + default: + break; + } + + preempt_enable(); +} +#endif + +/* Drop FPU & MSA without saving it */ +void kvm_drop_fpu(struct kvm_vcpu *vcpu) +{ + preempt_disable(); + if (cpu_has_msa && vcpu->arch.fpu_inuse & KVM_MIPS_FPU_MSA) { + disable_msa(); + vcpu->arch.fpu_inuse &= ~KVM_MIPS_FPU_MSA; + } + if (vcpu->arch.fpu_inuse & KVM_MIPS_FPU_FPU) { + clear_c0_status(ST0_CU1 | ST0_FR); + vcpu->arch.fpu_inuse &= ~KVM_MIPS_FPU_FPU; + } + preempt_enable(); +} + +/* Save and disable FPU & MSA */ +void kvm_lose_fpu(struct kvm_vcpu *vcpu) +{ + /* + * FPU & MSA get disabled in root context (hardware) when it is disabled + * in guest context (software), but the register state in the hardware + * may still be in use. This is why we explicitly re-enable the hardware + * before saving. + */ + + preempt_disable(); + if (cpu_has_msa && vcpu->arch.fpu_inuse & KVM_MIPS_FPU_MSA) { + set_c0_config5(MIPS_CONF5_MSAEN); + enable_fpu_hazard(); + + __kvm_save_msa(&vcpu->arch); + + /* Disable MSA & FPU */ + disable_msa(); + if (vcpu->arch.fpu_inuse & KVM_MIPS_FPU_FPU) + clear_c0_status(ST0_CU1 | ST0_FR); + vcpu->arch.fpu_inuse &= ~(KVM_MIPS_FPU_FPU | KVM_MIPS_FPU_MSA); + } else if (vcpu->arch.fpu_inuse & KVM_MIPS_FPU_FPU) { + set_c0_status(ST0_CU1); + enable_fpu_hazard(); + + __kvm_save_fpu(&vcpu->arch); + vcpu->arch.fpu_inuse &= ~KVM_MIPS_FPU_FPU; + + /* Disable FPU */ + clear_c0_status(ST0_CU1 | ST0_FR); + } + preempt_enable(); +} + +/* + * Step over a specific ctc1 to FCSR and a specific ctcmsa to MSACSR which are + * used to restore guest FCSR/MSACSR state and may trigger a "harmless" FP/MSAFP + * exception if cause bits are set in the value being written. + */ +static int kvm_mips_csr_die_notify(struct notifier_block *self, + unsigned long cmd, void *ptr) +{ + struct die_args *args = (struct die_args *)ptr; + struct pt_regs *regs = args->regs; + unsigned long pc; + + /* Only interested in FPE and MSAFPE */ + if (cmd != DIE_FP && cmd != DIE_MSAFP) + return NOTIFY_DONE; + + /* Return immediately if guest context isn't active */ + if (!(current->flags & PF_VCPU)) + return NOTIFY_DONE; + + /* Should never get here from user mode */ + BUG_ON(user_mode(regs)); + + pc = instruction_pointer(regs); + switch (cmd) { + case DIE_FP: + /* match 2nd instruction in __kvm_restore_fcsr */ + if (pc != (unsigned long)&__kvm_restore_fcsr + 4) + return NOTIFY_DONE; + break; + case DIE_MSAFP: + /* match 2nd/3rd instruction in __kvm_restore_msacsr */ + if (!cpu_has_msa || + pc < (unsigned long)&__kvm_restore_msacsr + 4 || + pc > (unsigned long)&__kvm_restore_msacsr + 8) + return NOTIFY_DONE; + break; + } + + /* Move PC forward a little and continue executing */ + instruction_pointer(regs) += 4; + + return NOTIFY_STOP; +} + +static struct notifier_block kvm_mips_csr_die_notifier = { + .notifier_call = kvm_mips_csr_die_notify, +}; + int __init kvm_mips_init(void) { int ret; @@ -1161,6 +1622,8 @@ int __init kvm_mips_init(void) if (ret) return ret; + register_die_notifier(&kvm_mips_csr_die_notifier); + /* * On MIPS, kernel modules are executed from "mapped space", which * requires TLBs. The TLB handling code is statically linked with @@ -1173,7 +1636,6 @@ int __init kvm_mips_init(void) kvm_mips_release_pfn_clean = kvm_release_pfn_clean; kvm_mips_is_error_pfn = is_error_pfn; - pr_info("KVM/MIPS Initialized\n"); return 0; } @@ -1185,7 +1647,7 @@ void __exit kvm_mips_exit(void) kvm_mips_release_pfn_clean = NULL; kvm_mips_is_error_pfn = NULL; - pr_info("KVM/MIPS unloaded\n"); + unregister_die_notifier(&kvm_mips_csr_die_notifier); } module_init(kvm_mips_init); diff --git a/arch/mips/kvm/msa.S b/arch/mips/kvm/msa.S new file mode 100644 index 000000000000..d02f0c6cc2cc --- /dev/null +++ b/arch/mips/kvm/msa.S @@ -0,0 +1,161 @@ +/* + * This file is subject to the terms and conditions of the GNU General Public + * License. See the file "COPYING" in the main directory of this archive + * for more details. + * + * MIPS SIMD Architecture (MSA) context handling code for KVM. + * + * Copyright (C) 2015 Imagination Technologies Ltd. + */ + +#include <asm/asm.h> +#include <asm/asm-offsets.h> +#include <asm/asmmacro.h> +#include <asm/regdef.h> + + .set noreorder + .set noat + +LEAF(__kvm_save_msa) + st_d 0, VCPU_FPR0, a0 + st_d 1, VCPU_FPR1, a0 + st_d 2, VCPU_FPR2, a0 + st_d 3, VCPU_FPR3, a0 + st_d 4, VCPU_FPR4, a0 + st_d 5, VCPU_FPR5, a0 + st_d 6, VCPU_FPR6, a0 + st_d 7, VCPU_FPR7, a0 + st_d 8, VCPU_FPR8, a0 + st_d 9, VCPU_FPR9, a0 + st_d 10, VCPU_FPR10, a0 + st_d 11, VCPU_FPR11, a0 + st_d 12, VCPU_FPR12, a0 + st_d 13, VCPU_FPR13, a0 + st_d 14, VCPU_FPR14, a0 + st_d 15, VCPU_FPR15, a0 + st_d 16, VCPU_FPR16, a0 + st_d 17, VCPU_FPR17, a0 + st_d 18, VCPU_FPR18, a0 + st_d 19, VCPU_FPR19, a0 + st_d 20, VCPU_FPR20, a0 + st_d 21, VCPU_FPR21, a0 + st_d 22, VCPU_FPR22, a0 + st_d 23, VCPU_FPR23, a0 + st_d 24, VCPU_FPR24, a0 + st_d 25, VCPU_FPR25, a0 + st_d 26, VCPU_FPR26, a0 + st_d 27, VCPU_FPR27, a0 + st_d 28, VCPU_FPR28, a0 + st_d 29, VCPU_FPR29, a0 + st_d 30, VCPU_FPR30, a0 + st_d 31, VCPU_FPR31, a0 + jr ra + nop + END(__kvm_save_msa) + +LEAF(__kvm_restore_msa) + ld_d 0, VCPU_FPR0, a0 + ld_d 1, VCPU_FPR1, a0 + ld_d 2, VCPU_FPR2, a0 + ld_d 3, VCPU_FPR3, a0 + ld_d 4, VCPU_FPR4, a0 + ld_d 5, VCPU_FPR5, a0 + ld_d 6, VCPU_FPR6, a0 + ld_d 7, VCPU_FPR7, a0 + ld_d 8, VCPU_FPR8, a0 + ld_d 9, VCPU_FPR9, a0 + ld_d 10, VCPU_FPR10, a0 + ld_d 11, VCPU_FPR11, a0 + ld_d 12, VCPU_FPR12, a0 + ld_d 13, VCPU_FPR13, a0 + ld_d 14, VCPU_FPR14, a0 + ld_d 15, VCPU_FPR15, a0 + ld_d 16, VCPU_FPR16, a0 + ld_d 17, VCPU_FPR17, a0 + ld_d 18, VCPU_FPR18, a0 + ld_d 19, VCPU_FPR19, a0 + ld_d 20, VCPU_FPR20, a0 + ld_d 21, VCPU_FPR21, a0 + ld_d 22, VCPU_FPR22, a0 + ld_d 23, VCPU_FPR23, a0 + ld_d 24, VCPU_FPR24, a0 + ld_d 25, VCPU_FPR25, a0 + ld_d 26, VCPU_FPR26, a0 + ld_d 27, VCPU_FPR27, a0 + ld_d 28, VCPU_FPR28, a0 + ld_d 29, VCPU_FPR29, a0 + ld_d 30, VCPU_FPR30, a0 + ld_d 31, VCPU_FPR31, a0 + jr ra + nop + END(__kvm_restore_msa) + + .macro kvm_restore_msa_upper wr, off, base + .set push + .set noat +#ifdef CONFIG_64BIT + ld $1, \off(\base) + insert_d \wr, 1 +#elif defined(CONFIG_CPU_LITTLE_ENDIAN) + lw $1, \off(\base) + insert_w \wr, 2 + lw $1, (\off+4)(\base) + insert_w \wr, 3 +#else /* CONFIG_CPU_BIG_ENDIAN */ + lw $1, (\off+4)(\base) + insert_w \wr, 2 + lw $1, \off(\base) + insert_w \wr, 3 +#endif + .set pop + .endm + +LEAF(__kvm_restore_msa_upper) + kvm_restore_msa_upper 0, VCPU_FPR0 +8, a0 + kvm_restore_msa_upper 1, VCPU_FPR1 +8, a0 + kvm_restore_msa_upper 2, VCPU_FPR2 +8, a0 + kvm_restore_msa_upper 3, VCPU_FPR3 +8, a0 + kvm_restore_msa_upper 4, VCPU_FPR4 +8, a0 + kvm_restore_msa_upper 5, VCPU_FPR5 +8, a0 + kvm_restore_msa_upper 6, VCPU_FPR6 +8, a0 + kvm_restore_msa_upper 7, VCPU_FPR7 +8, a0 + kvm_restore_msa_upper 8, VCPU_FPR8 +8, a0 + kvm_restore_msa_upper 9, VCPU_FPR9 +8, a0 + kvm_restore_msa_upper 10, VCPU_FPR10+8, a0 + kvm_restore_msa_upper 11, VCPU_FPR11+8, a0 + kvm_restore_msa_upper 12, VCPU_FPR12+8, a0 + kvm_restore_msa_upper 13, VCPU_FPR13+8, a0 + kvm_restore_msa_upper 14, VCPU_FPR14+8, a0 + kvm_restore_msa_upper 15, VCPU_FPR15+8, a0 + kvm_restore_msa_upper 16, VCPU_FPR16+8, a0 + kvm_restore_msa_upper 17, VCPU_FPR17+8, a0 + kvm_restore_msa_upper 18, VCPU_FPR18+8, a0 + kvm_restore_msa_upper 19, VCPU_FPR19+8, a0 + kvm_restore_msa_upper 20, VCPU_FPR20+8, a0 + kvm_restore_msa_upper 21, VCPU_FPR21+8, a0 + kvm_restore_msa_upper 22, VCPU_FPR22+8, a0 + kvm_restore_msa_upper 23, VCPU_FPR23+8, a0 + kvm_restore_msa_upper 24, VCPU_FPR24+8, a0 + kvm_restore_msa_upper 25, VCPU_FPR25+8, a0 + kvm_restore_msa_upper 26, VCPU_FPR26+8, a0 + kvm_restore_msa_upper 27, VCPU_FPR27+8, a0 + kvm_restore_msa_upper 28, VCPU_FPR28+8, a0 + kvm_restore_msa_upper 29, VCPU_FPR29+8, a0 + kvm_restore_msa_upper 30, VCPU_FPR30+8, a0 + kvm_restore_msa_upper 31, VCPU_FPR31+8, a0 + jr ra + nop + END(__kvm_restore_msa_upper) + +LEAF(__kvm_restore_msacsr) + lw t0, VCPU_MSA_CSR(a0) + /* + * The ctcmsa must stay at this offset in __kvm_restore_msacsr. + * See kvm_mips_csr_die_notify() which handles t0 containing a value + * which triggers an MSA FP Exception, which must be stepped over and + * ignored since the set cause bits must remain there for the guest. + */ + _ctcmsa MSA_CSR, t0 + jr ra + nop + END(__kvm_restore_msacsr) diff --git a/arch/mips/kvm/stats.c b/arch/mips/kvm/stats.c index a74d6024c5ad..888bb67070ac 100644 --- a/arch/mips/kvm/stats.c +++ b/arch/mips/kvm/stats.c @@ -25,6 +25,10 @@ char *kvm_mips_exit_types_str[MAX_KVM_MIPS_EXIT_TYPES] = { "System Call", "Reserved Inst", "Break Inst", + "Trap Inst", + "MSA FPE", + "FPE", + "MSA Disabled", "D-Cache Flushes", }; diff --git a/arch/mips/kvm/tlb.c b/arch/mips/kvm/tlb.c index bbcd82242059..aed0ac2a4972 100644 --- a/arch/mips/kvm/tlb.c +++ b/arch/mips/kvm/tlb.c @@ -216,6 +216,7 @@ int kvm_mips_host_tlb_write(struct kvm_vcpu *vcpu, unsigned long entryhi, if (idx > current_cpu_data.tlbsize) { kvm_err("%s: Invalid Index: %d\n", __func__, idx); kvm_mips_dump_host_tlbs(); + local_irq_restore(flags); return -1; } @@ -732,6 +733,9 @@ void kvm_arch_vcpu_load(struct kvm_vcpu *vcpu, int cpu) } } + /* restore guest state to registers */ + kvm_mips_callbacks->vcpu_set_regs(vcpu); + local_irq_restore(flags); } @@ -750,6 +754,9 @@ void kvm_arch_vcpu_put(struct kvm_vcpu *vcpu) vcpu->arch.preempt_entryhi = read_c0_entryhi(); vcpu->arch.last_sched_cpu = cpu; + /* save guest state in registers */ + kvm_mips_callbacks->vcpu_get_regs(vcpu); + if (((cpu_context(cpu, current->mm) ^ asid_cache(cpu)) & ASID_VERSION_MASK)) { kvm_debug("%s: Dropping MMU Context: %#lx\n", __func__, diff --git a/arch/mips/kvm/trace.h b/arch/mips/kvm/trace.h index c1388d40663b..bd6437f67dc0 100644 --- a/arch/mips/kvm/trace.h +++ b/arch/mips/kvm/trace.h @@ -24,18 +24,18 @@ TRACE_EVENT(kvm_exit, TP_PROTO(struct kvm_vcpu *vcpu, unsigned int reason), TP_ARGS(vcpu, reason), TP_STRUCT__entry( - __field(struct kvm_vcpu *, vcpu) + __field(unsigned long, pc) __field(unsigned int, reason) ), TP_fast_assign( - __entry->vcpu = vcpu; + __entry->pc = vcpu->arch.pc; __entry->reason = reason; ), TP_printk("[%s]PC: 0x%08lx", kvm_mips_exit_types_str[__entry->reason], - __entry->vcpu->arch.pc) + __entry->pc) ); #endif /* _TRACE_KVM_H */ diff --git a/arch/mips/kvm/trap_emul.c b/arch/mips/kvm/trap_emul.c index fd7257b70e65..d836ed5b0bc7 100644 --- a/arch/mips/kvm/trap_emul.c +++ b/arch/mips/kvm/trap_emul.c @@ -39,16 +39,30 @@ static gpa_t kvm_trap_emul_gva_to_gpa_cb(gva_t gva) static int kvm_trap_emul_handle_cop_unusable(struct kvm_vcpu *vcpu) { + struct mips_coproc *cop0 = vcpu->arch.cop0; struct kvm_run *run = vcpu->run; uint32_t __user *opc = (uint32_t __user *) vcpu->arch.pc; unsigned long cause = vcpu->arch.host_cp0_cause; enum emulation_result er = EMULATE_DONE; int ret = RESUME_GUEST; - if (((cause & CAUSEF_CE) >> CAUSEB_CE) == 1) - er = kvm_mips_emulate_fpu_exc(cause, opc, run, vcpu); - else + if (((cause & CAUSEF_CE) >> CAUSEB_CE) == 1) { + /* FPU Unusable */ + if (!kvm_mips_guest_has_fpu(&vcpu->arch) || + (kvm_read_c0_guest_status(cop0) & ST0_CU1) == 0) { + /* + * Unusable/no FPU in guest: + * deliver guest COP1 Unusable Exception + */ + er = kvm_mips_emulate_fpu_exc(cause, opc, run, vcpu); + } else { + /* Restore FPU state */ + kvm_own_fpu(vcpu); + er = EMULATE_DONE; + } + } else { er = kvm_mips_emulate_inst(cause, opc, run, vcpu); + } switch (er) { case EMULATE_DONE: @@ -330,6 +344,107 @@ static int kvm_trap_emul_handle_break(struct kvm_vcpu *vcpu) return ret; } +static int kvm_trap_emul_handle_trap(struct kvm_vcpu *vcpu) +{ + struct kvm_run *run = vcpu->run; + uint32_t __user *opc = (uint32_t __user *)vcpu->arch.pc; + unsigned long cause = vcpu->arch.host_cp0_cause; + enum emulation_result er = EMULATE_DONE; + int ret = RESUME_GUEST; + + er = kvm_mips_emulate_trap_exc(cause, opc, run, vcpu); + if (er == EMULATE_DONE) { + ret = RESUME_GUEST; + } else { + run->exit_reason = KVM_EXIT_INTERNAL_ERROR; + ret = RESUME_HOST; + } + return ret; +} + +static int kvm_trap_emul_handle_msa_fpe(struct kvm_vcpu *vcpu) +{ + struct kvm_run *run = vcpu->run; + uint32_t __user *opc = (uint32_t __user *)vcpu->arch.pc; + unsigned long cause = vcpu->arch.host_cp0_cause; + enum emulation_result er = EMULATE_DONE; + int ret = RESUME_GUEST; + + er = kvm_mips_emulate_msafpe_exc(cause, opc, run, vcpu); + if (er == EMULATE_DONE) { + ret = RESUME_GUEST; + } else { + run->exit_reason = KVM_EXIT_INTERNAL_ERROR; + ret = RESUME_HOST; + } + return ret; +} + +static int kvm_trap_emul_handle_fpe(struct kvm_vcpu *vcpu) +{ + struct kvm_run *run = vcpu->run; + uint32_t __user *opc = (uint32_t __user *)vcpu->arch.pc; + unsigned long cause = vcpu->arch.host_cp0_cause; + enum emulation_result er = EMULATE_DONE; + int ret = RESUME_GUEST; + + er = kvm_mips_emulate_fpe_exc(cause, opc, run, vcpu); + if (er == EMULATE_DONE) { + ret = RESUME_GUEST; + } else { + run->exit_reason = KVM_EXIT_INTERNAL_ERROR; + ret = RESUME_HOST; + } + return ret; +} + +/** + * kvm_trap_emul_handle_msa_disabled() - Guest used MSA while disabled in root. + * @vcpu: Virtual CPU context. + * + * Handle when the guest attempts to use MSA when it is disabled. + */ +static int kvm_trap_emul_handle_msa_disabled(struct kvm_vcpu *vcpu) +{ + struct mips_coproc *cop0 = vcpu->arch.cop0; + struct kvm_run *run = vcpu->run; + uint32_t __user *opc = (uint32_t __user *) vcpu->arch.pc; + unsigned long cause = vcpu->arch.host_cp0_cause; + enum emulation_result er = EMULATE_DONE; + int ret = RESUME_GUEST; + + if (!kvm_mips_guest_has_msa(&vcpu->arch) || + (kvm_read_c0_guest_status(cop0) & (ST0_CU1 | ST0_FR)) == ST0_CU1) { + /* + * No MSA in guest, or FPU enabled and not in FR=1 mode, + * guest reserved instruction exception + */ + er = kvm_mips_emulate_ri_exc(cause, opc, run, vcpu); + } else if (!(kvm_read_c0_guest_config5(cop0) & MIPS_CONF5_MSAEN)) { + /* MSA disabled by guest, guest MSA disabled exception */ + er = kvm_mips_emulate_msadis_exc(cause, opc, run, vcpu); + } else { + /* Restore MSA/FPU state */ + kvm_own_msa(vcpu); + er = EMULATE_DONE; + } + + switch (er) { + case EMULATE_DONE: + ret = RESUME_GUEST; + break; + + case EMULATE_FAIL: + run->exit_reason = KVM_EXIT_INTERNAL_ERROR; + ret = RESUME_HOST; + break; + + default: + BUG(); + } + return ret; +} + static int kvm_trap_emul_vm_init(struct kvm *kvm) { return 0; @@ -351,8 +466,9 @@ static int kvm_trap_emul_vcpu_setup(struct kvm_vcpu *vcpu) * guest will come up as expected, for now we simulate a MIPS 24kc */ kvm_write_c0_guest_prid(cop0, 0x00019300); - kvm_write_c0_guest_config(cop0, - MIPS_CONFIG0 | (0x1 << CP0C0_AR) | + /* Have config1, Cacheable, noncoherent, write-back, write allocate */ + kvm_write_c0_guest_config(cop0, MIPS_CONF_M | (0x3 << CP0C0_K0) | + (0x1 << CP0C0_AR) | (MMU_TYPE_R4000 << CP0C0_MT)); /* Read the cache characteristics from the host Config1 Register */ @@ -368,10 +484,18 @@ static int kvm_trap_emul_vcpu_setup(struct kvm_vcpu *vcpu) (1 << CP0C1_WR) | (1 << CP0C1_CA)); kvm_write_c0_guest_config1(cop0, config1); - kvm_write_c0_guest_config2(cop0, MIPS_CONFIG2); - /* MIPS_CONFIG2 | (read_c0_config2() & 0xfff) */ - kvm_write_c0_guest_config3(cop0, MIPS_CONFIG3 | (0 << CP0C3_VInt) | - (1 << CP0C3_ULRI)); + /* Have config3, no tertiary/secondary caches implemented */ + kvm_write_c0_guest_config2(cop0, MIPS_CONF_M); + /* MIPS_CONF_M | (read_c0_config2() & 0xfff) */ + + /* Have config4, UserLocal */ + kvm_write_c0_guest_config3(cop0, MIPS_CONF_M | MIPS_CONF3_ULRI); + + /* Have config5 */ + kvm_write_c0_guest_config4(cop0, MIPS_CONF_M); + + /* No config6 */ + kvm_write_c0_guest_config5(cop0, 0); /* Set Wait IE/IXMT Ignore in Config7, IAR, AR */ kvm_write_c0_guest_config7(cop0, (MIPS_CONF7_WII) | (1 << 10)); @@ -416,6 +540,7 @@ static int kvm_trap_emul_set_one_reg(struct kvm_vcpu *vcpu, { struct mips_coproc *cop0 = vcpu->arch.cop0; int ret = 0; + unsigned int cur, change; switch (reg->id) { case KVM_REG_MIPS_CP0_COUNT: @@ -444,6 +569,44 @@ static int kvm_trap_emul_set_one_reg(struct kvm_vcpu *vcpu, kvm_write_c0_guest_cause(cop0, v); } break; + case KVM_REG_MIPS_CP0_CONFIG: + /* read-only for now */ + break; + case KVM_REG_MIPS_CP0_CONFIG1: + cur = kvm_read_c0_guest_config1(cop0); + change = (cur ^ v) & kvm_mips_config1_wrmask(vcpu); + if (change) { + v = cur ^ change; + kvm_write_c0_guest_config1(cop0, v); + } + break; + case KVM_REG_MIPS_CP0_CONFIG2: + /* read-only for now */ + break; + case KVM_REG_MIPS_CP0_CONFIG3: + cur = kvm_read_c0_guest_config3(cop0); + change = (cur ^ v) & kvm_mips_config3_wrmask(vcpu); + if (change) { + v = cur ^ change; + kvm_write_c0_guest_config3(cop0, v); + } + break; + case KVM_REG_MIPS_CP0_CONFIG4: + cur = kvm_read_c0_guest_config4(cop0); + change = (cur ^ v) & kvm_mips_config4_wrmask(vcpu); + if (change) { + v = cur ^ change; + kvm_write_c0_guest_config4(cop0, v); + } + break; + case KVM_REG_MIPS_CP0_CONFIG5: + cur = kvm_read_c0_guest_config5(cop0); + change = (cur ^ v) & kvm_mips_config5_wrmask(vcpu); + if (change) { + v = cur ^ change; + kvm_write_c0_guest_config5(cop0, v); + } + break; case KVM_REG_MIPS_COUNT_CTL: ret = kvm_mips_set_count_ctl(vcpu, v); break; @@ -459,6 +622,18 @@ static int kvm_trap_emul_set_one_reg(struct kvm_vcpu *vcpu, return ret; } +static int kvm_trap_emul_vcpu_get_regs(struct kvm_vcpu *vcpu) +{ + kvm_lose_fpu(vcpu); + + return 0; +} + +static int kvm_trap_emul_vcpu_set_regs(struct kvm_vcpu *vcpu) +{ + return 0; +} + static struct kvm_mips_callbacks kvm_trap_emul_callbacks = { /* exit handlers */ .handle_cop_unusable = kvm_trap_emul_handle_cop_unusable, @@ -470,6 +645,10 @@ static struct kvm_mips_callbacks kvm_trap_emul_callbacks = { .handle_syscall = kvm_trap_emul_handle_syscall, .handle_res_inst = kvm_trap_emul_handle_res_inst, .handle_break = kvm_trap_emul_handle_break, + .handle_trap = kvm_trap_emul_handle_trap, + .handle_msa_fpe = kvm_trap_emul_handle_msa_fpe, + .handle_fpe = kvm_trap_emul_handle_fpe, + .handle_msa_disabled = kvm_trap_emul_handle_msa_disabled, .vm_init = kvm_trap_emul_vm_init, .vcpu_init = kvm_trap_emul_vcpu_init, @@ -483,6 +662,8 @@ static struct kvm_mips_callbacks kvm_trap_emul_callbacks = { .irq_clear = kvm_mips_irq_clear_cb, .get_one_reg = kvm_trap_emul_get_one_reg, .set_one_reg = kvm_trap_emul_set_one_reg, + .vcpu_get_regs = kvm_trap_emul_vcpu_get_regs, + .vcpu_set_regs = kvm_trap_emul_vcpu_set_regs, }; int kvm_mips_emulation_init(struct kvm_mips_callbacks **install_callbacks) diff --git a/arch/mips/lasat/sysctl.c b/arch/mips/lasat/sysctl.c index 3b7f65cc4218..cf9b4633257e 100644 --- a/arch/mips/lasat/sysctl.c +++ b/arch/mips/lasat/sysctl.c @@ -75,11 +75,11 @@ static int rtctmp; int proc_dolasatrtc(struct ctl_table *table, int write, void *buffer, size_t *lenp, loff_t *ppos) { - struct timespec ts; + struct timespec64 ts; int r; if (!write) { - read_persistent_clock(&ts); + read_persistent_clock64(&ts); rtctmp = ts.tv_sec; /* check for time < 0 and set to 0 */ if (rtctmp < 0) diff --git a/arch/nios2/include/asm/ptrace.h b/arch/nios2/include/asm/ptrace.h index 20fb1cf2dab6..642462144872 100644 --- a/arch/nios2/include/asm/ptrace.h +++ b/arch/nios2/include/asm/ptrace.h @@ -15,7 +15,54 @@ #include <uapi/asm/ptrace.h> +/* This struct defines the way the registers are stored on the + stack during a system call. */ + #ifndef __ASSEMBLY__ +struct pt_regs { + unsigned long r8; /* r8-r15 Caller-saved GP registers */ + unsigned long r9; + unsigned long r10; + unsigned long r11; + unsigned long r12; + unsigned long r13; + unsigned long r14; + unsigned long r15; + unsigned long r1; /* Assembler temporary */ + unsigned long r2; /* Retval LS 32bits */ + unsigned long r3; /* Retval MS 32bits */ + unsigned long r4; /* r4-r7 Register arguments */ + unsigned long r5; + unsigned long r6; + unsigned long r7; + unsigned long orig_r2; /* Copy of r2 ?? */ + unsigned long ra; /* Return address */ + unsigned long fp; /* Frame pointer */ + unsigned long sp; /* Stack pointer */ + unsigned long gp; /* Global pointer */ + unsigned long estatus; + unsigned long ea; /* Exception return address (pc) */ + unsigned long orig_r7; +}; + +/* + * This is the extended stack used by signal handlers and the context + * switcher: it's pushed after the normal "struct pt_regs". + */ +struct switch_stack { + unsigned long r16; /* r16-r23 Callee-saved GP registers */ + unsigned long r17; + unsigned long r18; + unsigned long r19; + unsigned long r20; + unsigned long r21; + unsigned long r22; + unsigned long r23; + unsigned long fp; + unsigned long gp; + unsigned long ra; +}; + #define user_mode(regs) (((regs)->estatus & ESTATUS_EU)) #define instruction_pointer(regs) ((regs)->ra) diff --git a/arch/nios2/include/asm/thread_info.h b/arch/nios2/include/asm/thread_info.h index 1f266575beb5..a16e55cbd8ad 100644 --- a/arch/nios2/include/asm/thread_info.h +++ b/arch/nios2/include/asm/thread_info.h @@ -47,7 +47,6 @@ struct thread_info { 0-0x7FFFFFFF for user-thead 0-0xFFFFFFFF for kernel-thread */ - struct restart_block restart_block; struct pt_regs *regs; }; @@ -64,9 +63,6 @@ struct thread_info { .cpu = 0, \ .preempt_count = INIT_PREEMPT_COUNT, \ .addr_limit = KERNEL_DS, \ - .restart_block = { \ - .fn = do_no_restart_syscall, \ - }, \ } #define init_thread_info (init_thread_union.thread_info) diff --git a/arch/nios2/include/asm/ucontext.h b/arch/nios2/include/asm/ucontext.h deleted file mode 100644 index 2c87614b0f6e..000000000000 --- a/arch/nios2/include/asm/ucontext.h +++ /dev/null @@ -1,32 +0,0 @@ -/* - * Copyright (C) 2010 Tobias Klauser <tklauser@distanz.ch> - * Copyright (C) 2004 Microtronix Datacom Ltd - * - * This file is subject to the terms and conditions of the GNU General Public - * License. See the file "COPYING" in the main directory of this archive - * for more details. - */ - -#ifndef _ASM_NIOS2_UCONTEXT_H -#define _ASM_NIOS2_UCONTEXT_H - -typedef int greg_t; -#define NGREG 32 -typedef greg_t gregset_t[NGREG]; - -struct mcontext { - int version; - gregset_t gregs; -}; - -#define MCONTEXT_VERSION 2 - -struct ucontext { - unsigned long uc_flags; - struct ucontext *uc_link; - stack_t uc_stack; - struct mcontext uc_mcontext; - sigset_t uc_sigmask; /* mask last for extensibility */ -}; - -#endif diff --git a/arch/nios2/include/uapi/asm/Kbuild b/arch/nios2/include/uapi/asm/Kbuild index 4f07ca3f8d10..e0bb972a50d7 100644 --- a/arch/nios2/include/uapi/asm/Kbuild +++ b/arch/nios2/include/uapi/asm/Kbuild @@ -1,4 +1,5 @@ include include/uapi/asm-generic/Kbuild.asm header-y += elf.h -header-y += ucontext.h + +generic-y += ucontext.h diff --git a/arch/nios2/include/uapi/asm/elf.h b/arch/nios2/include/uapi/asm/elf.h index a5b91ae5cf56..6f06d3b2949e 100644 --- a/arch/nios2/include/uapi/asm/elf.h +++ b/arch/nios2/include/uapi/asm/elf.h @@ -50,9 +50,7 @@ typedef unsigned long elf_greg_t; -#define ELF_NGREG \ - ((sizeof(struct pt_regs) + sizeof(struct switch_stack)) / \ - sizeof(elf_greg_t)) +#define ELF_NGREG 49 typedef elf_greg_t elf_gregset_t[ELF_NGREG]; typedef unsigned long elf_fpregset_t; diff --git a/arch/nios2/include/uapi/asm/ptrace.h b/arch/nios2/include/uapi/asm/ptrace.h index e83a7c9d1c36..eff00e67c0a2 100644 --- a/arch/nios2/include/uapi/asm/ptrace.h +++ b/arch/nios2/include/uapi/asm/ptrace.h @@ -60,60 +60,21 @@ #define PTR_IPENDING 37 #define PTR_CPUID 38 #define PTR_CTL6 39 -#define PTR_CTL7 40 +#define PTR_EXCEPTION 40 #define PTR_PTEADDR 41 #define PTR_TLBACC 42 #define PTR_TLBMISC 43 +#define PTR_ECCINJ 44 +#define PTR_BADADDR 45 +#define PTR_CONFIG 46 +#define PTR_MPUBASE 47 +#define PTR_MPUACC 48 -#define NUM_PTRACE_REG (PTR_TLBMISC + 1) +#define NUM_PTRACE_REG (PTR_MPUACC + 1) -/* this struct defines the way the registers are stored on the - stack during a system call. - - There is a fake_regs in setup.c that has to match pt_regs.*/ - -struct pt_regs { - unsigned long r8; /* r8-r15 Caller-saved GP registers */ - unsigned long r9; - unsigned long r10; - unsigned long r11; - unsigned long r12; - unsigned long r13; - unsigned long r14; - unsigned long r15; - unsigned long r1; /* Assembler temporary */ - unsigned long r2; /* Retval LS 32bits */ - unsigned long r3; /* Retval MS 32bits */ - unsigned long r4; /* r4-r7 Register arguments */ - unsigned long r5; - unsigned long r6; - unsigned long r7; - unsigned long orig_r2; /* Copy of r2 ?? */ - unsigned long ra; /* Return address */ - unsigned long fp; /* Frame pointer */ - unsigned long sp; /* Stack pointer */ - unsigned long gp; /* Global pointer */ - unsigned long estatus; - unsigned long ea; /* Exception return address (pc) */ - unsigned long orig_r7; -}; - -/* - * This is the extended stack used by signal handlers and the context - * switcher: it's pushed after the normal "struct pt_regs". - */ -struct switch_stack { - unsigned long r16; /* r16-r23 Callee-saved GP registers */ - unsigned long r17; - unsigned long r18; - unsigned long r19; - unsigned long r20; - unsigned long r21; - unsigned long r22; - unsigned long r23; - unsigned long fp; - unsigned long gp; - unsigned long ra; +/* User structures for general purpose registers. */ +struct user_pt_regs { + __u32 regs[49]; }; #endif /* __ASSEMBLY__ */ diff --git a/arch/nios2/include/uapi/asm/sigcontext.h b/arch/nios2/include/uapi/asm/sigcontext.h index 7b8bb41867d4..b67944a50927 100644 --- a/arch/nios2/include/uapi/asm/sigcontext.h +++ b/arch/nios2/include/uapi/asm/sigcontext.h @@ -15,14 +15,16 @@ * details. */ -#ifndef _ASM_NIOS2_SIGCONTEXT_H -#define _ASM_NIOS2_SIGCONTEXT_H +#ifndef _UAPI__ASM_SIGCONTEXT_H +#define _UAPI__ASM_SIGCONTEXT_H -#include <asm/ptrace.h> +#include <linux/types.h> + +#define MCONTEXT_VERSION 2 struct sigcontext { - struct pt_regs regs; - unsigned long sc_mask; /* old sigmask */ + int version; + unsigned long gregs[32]; }; #endif diff --git a/arch/nios2/kernel/entry.S b/arch/nios2/kernel/entry.S index 7729bd3f2e79..27b006c52e12 100644 --- a/arch/nios2/kernel/entry.S +++ b/arch/nios2/kernel/entry.S @@ -161,7 +161,7 @@ ENTRY(inthandler) *********************************************************************** */ ENTRY(handle_trap) - ldw r24, -4(ea) /* instruction that caused the exception */ + ldwio r24, -4(ea) /* instruction that caused the exception */ srli r24, r24, 4 andi r24, r24, 0x7c movia r9,trap_table diff --git a/arch/nios2/kernel/signal.c b/arch/nios2/kernel/signal.c index 2d0ea25be171..20662b0f6c9e 100644 --- a/arch/nios2/kernel/signal.c +++ b/arch/nios2/kernel/signal.c @@ -39,11 +39,11 @@ static inline int rt_restore_ucontext(struct pt_regs *regs, struct ucontext *uc, int *pr2) { int temp; - greg_t *gregs = uc->uc_mcontext.gregs; + unsigned long *gregs = uc->uc_mcontext.gregs; int err; /* Always make any pending restarted system calls return -EINTR */ - current_thread_info()->restart_block.fn = do_no_restart_syscall; + current->restart_block.fn = do_no_restart_syscall; err = __get_user(temp, &uc->uc_mcontext.version); if (temp != MCONTEXT_VERSION) @@ -127,7 +127,7 @@ badframe: static inline int rt_setup_ucontext(struct ucontext *uc, struct pt_regs *regs) { struct switch_stack *sw = (struct switch_stack *)regs - 1; - greg_t *gregs = uc->uc_mcontext.gregs; + unsigned long *gregs = uc->uc_mcontext.gregs; int err = 0; err |= __put_user(MCONTEXT_VERSION, &uc->uc_mcontext.version); diff --git a/arch/nios2/mm/cacheflush.c b/arch/nios2/mm/cacheflush.c index 2ae482b42669..796642932e2e 100644 --- a/arch/nios2/mm/cacheflush.c +++ b/arch/nios2/mm/cacheflush.c @@ -23,9 +23,6 @@ static void __flush_dcache(unsigned long start, unsigned long end) end += (cpuinfo.dcache_line_size - 1); end &= ~(cpuinfo.dcache_line_size - 1); - if (end > start + cpuinfo.dcache_size) - end = start + cpuinfo.dcache_size; - for (addr = start; addr < end; addr += cpuinfo.dcache_line_size) { __asm__ __volatile__ (" flushda 0(%0)\n" : /* Outputs */ diff --git a/arch/nios2/mm/fault.c b/arch/nios2/mm/fault.c index 0d231adfe576..0c9b6afe69e9 100644 --- a/arch/nios2/mm/fault.c +++ b/arch/nios2/mm/fault.c @@ -126,7 +126,6 @@ good_area: break; } -survive: /* * If for any reason at all we couldn't handle the fault, * make sure we exit gracefully rather than endlessly redo @@ -220,11 +219,6 @@ no_context: */ out_of_memory: up_read(&mm->mmap_sem); - if (is_global_init(tsk)) { - yield(); - down_read(&mm->mmap_sem); - goto survive; - } if (!user_mode(regs)) goto no_context; pagefault_out_of_memory(); diff --git a/arch/parisc/include/asm/pgalloc.h b/arch/parisc/include/asm/pgalloc.h index f213f5b4c423..d17437238a2c 100644 --- a/arch/parisc/include/asm/pgalloc.h +++ b/arch/parisc/include/asm/pgalloc.h @@ -26,7 +26,7 @@ static inline pgd_t *pgd_alloc(struct mm_struct *mm) if (likely(pgd != NULL)) { memset(pgd, 0, PAGE_SIZE<<PGD_ALLOC_ORDER); -#ifdef CONFIG_64BIT +#if PT_NLEVELS == 3 actual_pgd += PTRS_PER_PGD; /* Populate first pmd with allocated memory. We mark it * with PxD_FLAG_ATTACHED as a signal to the system that this @@ -45,7 +45,7 @@ static inline pgd_t *pgd_alloc(struct mm_struct *mm) static inline void pgd_free(struct mm_struct *mm, pgd_t *pgd) { -#ifdef CONFIG_64BIT +#if PT_NLEVELS == 3 pgd -= PTRS_PER_PGD; #endif free_pages((unsigned long)pgd, PGD_ALLOC_ORDER); @@ -72,12 +72,15 @@ static inline pmd_t *pmd_alloc_one(struct mm_struct *mm, unsigned long address) static inline void pmd_free(struct mm_struct *mm, pmd_t *pmd) { -#ifdef CONFIG_64BIT if(pmd_flag(*pmd) & PxD_FLAG_ATTACHED) - /* This is the permanent pmd attached to the pgd; - * cannot free it */ + /* + * This is the permanent pmd attached to the pgd; + * cannot free it. + * Increment the counter to compensate for the decrement + * done by generic mm code. + */ + mm_inc_nr_pmds(mm); return; -#endif free_pages((unsigned long)pmd, PMD_ORDER); } @@ -99,7 +102,7 @@ static inline void pmd_free(struct mm_struct *mm, pmd_t *pmd) static inline void pmd_populate_kernel(struct mm_struct *mm, pmd_t *pmd, pte_t *pte) { -#ifdef CONFIG_64BIT +#if PT_NLEVELS == 3 /* preserve the gateway marker if this is the beginning of * the permanent pmd */ if(pmd_flag(*pmd) & PxD_FLAG_ATTACHED) diff --git a/arch/parisc/kernel/syscall_table.S b/arch/parisc/kernel/syscall_table.S index 5a8997d63899..8eefb12d1d33 100644 --- a/arch/parisc/kernel/syscall_table.S +++ b/arch/parisc/kernel/syscall_table.S @@ -55,8 +55,8 @@ #define ENTRY_COMP(_name_) .word sys_##_name_ #endif - ENTRY_SAME(restart_syscall) /* 0 */ - ENTRY_SAME(exit) +90: ENTRY_SAME(restart_syscall) /* 0 */ +91: ENTRY_SAME(exit) ENTRY_SAME(fork_wrapper) ENTRY_SAME(read) ENTRY_SAME(write) @@ -439,7 +439,10 @@ ENTRY_SAME(bpf) ENTRY_COMP(execveat) - /* Nothing yet */ + +.ifne (. - 90b) - (__NR_Linux_syscalls * (91b - 90b)) +.error "size of syscall table does not fit value of __NR_Linux_syscalls" +.endif #undef ENTRY_SAME #undef ENTRY_DIFF diff --git a/arch/powerpc/include/asm/cputhreads.h b/arch/powerpc/include/asm/cputhreads.h index 2bf8e9307be9..4c8ad592ae33 100644 --- a/arch/powerpc/include/asm/cputhreads.h +++ b/arch/powerpc/include/asm/cputhreads.h @@ -55,7 +55,7 @@ static inline cpumask_t cpu_thread_mask_to_cores(const struct cpumask *threads) static inline int cpu_nr_cores(void) { - return NR_CPUS >> threads_shift; + return nr_cpu_ids >> threads_shift; } static inline cpumask_t cpu_online_cores_map(void) diff --git a/arch/powerpc/include/asm/iommu.h b/arch/powerpc/include/asm/iommu.h index 9cfa3706a1b8..f1ea5972f6ec 100644 --- a/arch/powerpc/include/asm/iommu.h +++ b/arch/powerpc/include/asm/iommu.h @@ -113,6 +113,7 @@ extern void iommu_register_group(struct iommu_table *tbl, int pci_domain_number, unsigned long pe_num); extern int iommu_add_device(struct device *dev); extern void iommu_del_device(struct device *dev); +extern int __init tce_iommu_bus_notifier_init(void); #else static inline void iommu_register_group(struct iommu_table *tbl, int pci_domain_number, @@ -128,6 +129,11 @@ static inline int iommu_add_device(struct device *dev) static inline void iommu_del_device(struct device *dev) { } + +static inline int __init tce_iommu_bus_notifier_init(void) +{ + return 0; +} #endif /* !CONFIG_IOMMU_API */ static inline void set_iommu_table_base_and_group(struct device *dev, diff --git a/arch/powerpc/include/asm/irq_work.h b/arch/powerpc/include/asm/irq_work.h new file mode 100644 index 000000000000..744fd54de374 --- /dev/null +++ b/arch/powerpc/include/asm/irq_work.h @@ -0,0 +1,9 @@ +#ifndef _ASM_POWERPC_IRQ_WORK_H +#define _ASM_POWERPC_IRQ_WORK_H + +static inline bool arch_irq_work_has_interrupt(void) +{ + return true; +} + +#endif /* _ASM_POWERPC_IRQ_WORK_H */ diff --git a/arch/powerpc/include/asm/ppc-opcode.h b/arch/powerpc/include/asm/ppc-opcode.h index 03cd858a401c..4cbe23af400a 100644 --- a/arch/powerpc/include/asm/ppc-opcode.h +++ b/arch/powerpc/include/asm/ppc-opcode.h @@ -153,6 +153,7 @@ #define PPC_INST_MFSPR_PVR_MASK 0xfc1fffff #define PPC_INST_MFTMR 0x7c0002dc #define PPC_INST_MSGSND 0x7c00019c +#define PPC_INST_MSGCLR 0x7c0001dc #define PPC_INST_MSGSNDP 0x7c00011c #define PPC_INST_MTTMR 0x7c0003dc #define PPC_INST_NOP 0x60000000 @@ -309,6 +310,8 @@ ___PPC_RB(b) | __PPC_EH(eh)) #define PPC_MSGSND(b) stringify_in_c(.long PPC_INST_MSGSND | \ ___PPC_RB(b)) +#define PPC_MSGCLR(b) stringify_in_c(.long PPC_INST_MSGCLR | \ + ___PPC_RB(b)) #define PPC_MSGSNDP(b) stringify_in_c(.long PPC_INST_MSGSNDP | \ ___PPC_RB(b)) #define PPC_POPCNTB(a, s) stringify_in_c(.long PPC_INST_POPCNTB | \ diff --git a/arch/powerpc/include/asm/reg.h b/arch/powerpc/include/asm/reg.h index 1c874fb533bb..af56b5c6c81a 100644 --- a/arch/powerpc/include/asm/reg.h +++ b/arch/powerpc/include/asm/reg.h @@ -608,13 +608,16 @@ #define SRR1_ISI_N_OR_G 0x10000000 /* ISI: Access is no-exec or G */ #define SRR1_ISI_PROT 0x08000000 /* ISI: Other protection fault */ #define SRR1_WAKEMASK 0x00380000 /* reason for wakeup */ +#define SRR1_WAKEMASK_P8 0x003c0000 /* reason for wakeup on POWER8 */ #define SRR1_WAKESYSERR 0x00300000 /* System error */ #define SRR1_WAKEEE 0x00200000 /* External interrupt */ #define SRR1_WAKEMT 0x00280000 /* mtctrl */ #define SRR1_WAKEHMI 0x00280000 /* Hypervisor maintenance */ #define SRR1_WAKEDEC 0x00180000 /* Decrementer interrupt */ +#define SRR1_WAKEDBELL 0x00140000 /* Privileged doorbell on P8 */ #define SRR1_WAKETHERM 0x00100000 /* Thermal management interrupt */ #define SRR1_WAKERESET 0x00100000 /* System reset */ +#define SRR1_WAKEHDBELL 0x000c0000 /* Hypervisor doorbell on P8 */ #define SRR1_WAKESTATE 0x00030000 /* Powersave exit mask [46:47] */ #define SRR1_WS_DEEPEST 0x00030000 /* Some resources not maintained, * may not be recoverable */ diff --git a/arch/powerpc/kernel/cputable.c b/arch/powerpc/kernel/cputable.c index f337666768a7..f83046878336 100644 --- a/arch/powerpc/kernel/cputable.c +++ b/arch/powerpc/kernel/cputable.c @@ -437,6 +437,26 @@ static struct cpu_spec __initdata cpu_specs[] = { .machine_check_early = __machine_check_early_realmode_p8, .platform = "power8", }, + { /* Power8NVL */ + .pvr_mask = 0xffff0000, + .pvr_value = 0x004c0000, + .cpu_name = "POWER8NVL (raw)", + .cpu_features = CPU_FTRS_POWER8, + .cpu_user_features = COMMON_USER_POWER8, + .cpu_user_features2 = COMMON_USER2_POWER8, + .mmu_features = MMU_FTRS_POWER8, + .icache_bsize = 128, + .dcache_bsize = 128, + .num_pmcs = 6, + .pmc_type = PPC_PMC_IBM, + .oprofile_cpu_type = "ppc64/power8", + .oprofile_type = PPC_OPROFILE_INVALID, + .cpu_setup = __setup_cpu_power8, + .cpu_restore = __restore_cpu_power8, + .flush_tlb = __flush_tlb_power8, + .machine_check_early = __machine_check_early_realmode_p8, + .platform = "power8", + }, { /* Power8 DD1: Does not support doorbell IPIs */ .pvr_mask = 0xffffff00, .pvr_value = 0x004d0100, diff --git a/arch/powerpc/kernel/dbell.c b/arch/powerpc/kernel/dbell.c index f4217819cc31..2128f3a96c32 100644 --- a/arch/powerpc/kernel/dbell.c +++ b/arch/powerpc/kernel/dbell.c @@ -17,6 +17,7 @@ #include <asm/dbell.h> #include <asm/irq_regs.h> +#include <asm/kvm_ppc.h> #ifdef CONFIG_SMP void doorbell_setup_this_cpu(void) @@ -41,6 +42,7 @@ void doorbell_exception(struct pt_regs *regs) may_hard_irq_enable(); + kvmppc_set_host_ipi(smp_processor_id(), 0); __this_cpu_inc(irq_stat.doorbell_irqs); smp_ipi_demux(); diff --git a/arch/powerpc/kernel/exceptions-64s.S b/arch/powerpc/kernel/exceptions-64s.S index c2df8150bd7a..9519e6bdc6d7 100644 --- a/arch/powerpc/kernel/exceptions-64s.S +++ b/arch/powerpc/kernel/exceptions-64s.S @@ -1408,7 +1408,7 @@ machine_check_handle_early: bne 9f /* continue in V mode if we are. */ 5: -#ifdef CONFIG_KVM_BOOK3S_64_HV +#ifdef CONFIG_KVM_BOOK3S_64_HANDLER /* * We are coming from kernel context. Check if we are coming from * guest. if yes, then we can continue. We will fall through diff --git a/arch/powerpc/kernel/iommu.c b/arch/powerpc/kernel/iommu.c index 5d3968c4d799..b054f33ab1fb 100644 --- a/arch/powerpc/kernel/iommu.c +++ b/arch/powerpc/kernel/iommu.c @@ -1175,4 +1175,30 @@ void iommu_del_device(struct device *dev) } EXPORT_SYMBOL_GPL(iommu_del_device); +static int tce_iommu_bus_notifier(struct notifier_block *nb, + unsigned long action, void *data) +{ + struct device *dev = data; + + switch (action) { + case BUS_NOTIFY_ADD_DEVICE: + return iommu_add_device(dev); + case BUS_NOTIFY_DEL_DEVICE: + if (dev->iommu_group) + iommu_del_device(dev); + return 0; + default: + return 0; + } +} + +static struct notifier_block tce_iommu_bus_nb = { + .notifier_call = tce_iommu_bus_notifier, +}; + +int __init tce_iommu_bus_notifier_init(void) +{ + bus_register_notifier(&pci_bus_type, &tce_iommu_bus_nb); + return 0; +} #endif /* CONFIG_IOMMU_API */ diff --git a/arch/powerpc/kernel/smp.c b/arch/powerpc/kernel/smp.c index 6e19afa35a15..ec9ec2058d2d 100644 --- a/arch/powerpc/kernel/smp.c +++ b/arch/powerpc/kernel/smp.c @@ -541,8 +541,8 @@ int __cpu_up(unsigned int cpu, struct task_struct *tidle) if (smp_ops->give_timebase) smp_ops->give_timebase(); - /* Wait until cpu puts itself in the online map */ - while (!cpu_online(cpu)) + /* Wait until cpu puts itself in the online & active maps */ + while (!cpu_online(cpu) || !cpu_active(cpu)) cpu_relax(); return 0; diff --git a/arch/powerpc/kvm/book3s_hv.c b/arch/powerpc/kvm/book3s_hv.c index de4018a1bc4b..de747563d29d 100644 --- a/arch/powerpc/kvm/book3s_hv.c +++ b/arch/powerpc/kvm/book3s_hv.c @@ -636,7 +636,7 @@ static int kvmppc_get_yield_count(struct kvm_vcpu *vcpu) spin_lock(&vcpu->arch.vpa_update_lock); lppaca = (struct lppaca *)vcpu->arch.vpa.pinned_addr; if (lppaca) - yield_count = lppaca->yield_count; + yield_count = be32_to_cpu(lppaca->yield_count); spin_unlock(&vcpu->arch.vpa_update_lock); return yield_count; } @@ -942,20 +942,20 @@ static int kvm_arch_vcpu_ioctl_set_sregs_hv(struct kvm_vcpu *vcpu, static void kvmppc_set_lpcr(struct kvm_vcpu *vcpu, u64 new_lpcr, bool preserve_top32) { + struct kvm *kvm = vcpu->kvm; struct kvmppc_vcore *vc = vcpu->arch.vcore; u64 mask; + mutex_lock(&kvm->lock); spin_lock(&vc->lock); /* * If ILE (interrupt little-endian) has changed, update the * MSR_LE bit in the intr_msr for each vcpu in this vcore. */ if ((new_lpcr & LPCR_ILE) != (vc->lpcr & LPCR_ILE)) { - struct kvm *kvm = vcpu->kvm; struct kvm_vcpu *vcpu; int i; - mutex_lock(&kvm->lock); kvm_for_each_vcpu(i, vcpu, kvm) { if (vcpu->arch.vcore != vc) continue; @@ -964,7 +964,6 @@ static void kvmppc_set_lpcr(struct kvm_vcpu *vcpu, u64 new_lpcr, else vcpu->arch.intr_msr &= ~MSR_LE; } - mutex_unlock(&kvm->lock); } /* @@ -981,6 +980,7 @@ static void kvmppc_set_lpcr(struct kvm_vcpu *vcpu, u64 new_lpcr, mask &= 0xFFFFFFFF; vc->lpcr = (vc->lpcr & ~mask) | (new_lpcr & mask); spin_unlock(&vc->lock); + mutex_unlock(&kvm->lock); } static int kvmppc_get_one_reg_hv(struct kvm_vcpu *vcpu, u64 id, diff --git a/arch/powerpc/kvm/book3s_hv_rmhandlers.S b/arch/powerpc/kvm/book3s_hv_rmhandlers.S index bb94e6f20c81..6cbf1630cb70 100644 --- a/arch/powerpc/kvm/book3s_hv_rmhandlers.S +++ b/arch/powerpc/kvm/book3s_hv_rmhandlers.S @@ -1005,6 +1005,7 @@ END_FTR_SECTION_IFSET(CPU_FTR_HAS_PPR) /* Save HEIR (HV emulation assist reg) in emul_inst if this is an HEI (HV emulation interrupt, e40) */ li r3,KVM_INST_FETCH_FAILED + stw r3,VCPU_LAST_INST(r9) cmpwi r12,BOOK3S_INTERRUPT_H_EMUL_ASSIST bne 11f mfspr r3,SPRN_HEIR diff --git a/arch/powerpc/kvm/mpic.c b/arch/powerpc/kvm/mpic.c index 39b3a8f816f2..6249cdc834d1 100644 --- a/arch/powerpc/kvm/mpic.c +++ b/arch/powerpc/kvm/mpic.c @@ -34,7 +34,7 @@ #include <asm/kvm_para.h> #include <asm/kvm_host.h> #include <asm/kvm_ppc.h> -#include "iodev.h" +#include <kvm/iodev.h> #define MAX_CPU 32 #define MAX_SRC 256 @@ -289,11 +289,6 @@ static inline void IRQ_resetbit(struct irq_queue *q, int n_IRQ) clear_bit(n_IRQ, q->queue); } -static inline int IRQ_testbit(struct irq_queue *q, int n_IRQ) -{ - return test_bit(n_IRQ, q->queue); -} - static void IRQ_check(struct openpic *opp, struct irq_queue *q) { int irq = -1; @@ -1374,8 +1369,9 @@ static int kvm_mpic_write_internal(struct openpic *opp, gpa_t addr, u32 val) return -ENXIO; } -static int kvm_mpic_read(struct kvm_io_device *this, gpa_t addr, - int len, void *ptr) +static int kvm_mpic_read(struct kvm_vcpu *vcpu, + struct kvm_io_device *this, + gpa_t addr, int len, void *ptr) { struct openpic *opp = container_of(this, struct openpic, mmio); int ret; @@ -1415,8 +1411,9 @@ static int kvm_mpic_read(struct kvm_io_device *this, gpa_t addr, return ret; } -static int kvm_mpic_write(struct kvm_io_device *this, gpa_t addr, - int len, const void *ptr) +static int kvm_mpic_write(struct kvm_vcpu *vcpu, + struct kvm_io_device *this, + gpa_t addr, int len, const void *ptr) { struct openpic *opp = container_of(this, struct openpic, mmio); int ret; diff --git a/arch/powerpc/kvm/powerpc.c b/arch/powerpc/kvm/powerpc.c index 27c0face86f4..24bfe401373e 100644 --- a/arch/powerpc/kvm/powerpc.c +++ b/arch/powerpc/kvm/powerpc.c @@ -807,7 +807,7 @@ int kvmppc_handle_load(struct kvm_run *run, struct kvm_vcpu *vcpu, idx = srcu_read_lock(&vcpu->kvm->srcu); - ret = kvm_io_bus_read(vcpu->kvm, KVM_MMIO_BUS, run->mmio.phys_addr, + ret = kvm_io_bus_read(vcpu, KVM_MMIO_BUS, run->mmio.phys_addr, bytes, &run->mmio.data); srcu_read_unlock(&vcpu->kvm->srcu, idx); @@ -880,7 +880,7 @@ int kvmppc_handle_store(struct kvm_run *run, struct kvm_vcpu *vcpu, idx = srcu_read_lock(&vcpu->kvm->srcu); - ret = kvm_io_bus_write(vcpu->kvm, KVM_MMIO_BUS, run->mmio.phys_addr, + ret = kvm_io_bus_write(vcpu, KVM_MMIO_BUS, run->mmio.phys_addr, bytes, &run->mmio.data); srcu_read_unlock(&vcpu->kvm->srcu, idx); diff --git a/arch/powerpc/platforms/powernv/opal-wrappers.S b/arch/powerpc/platforms/powernv/opal-wrappers.S index 0509bca5e830..fcbe899fe299 100644 --- a/arch/powerpc/platforms/powernv/opal-wrappers.S +++ b/arch/powerpc/platforms/powernv/opal-wrappers.S @@ -9,11 +9,11 @@ * 2 of the License, or (at your option) any later version. */ +#include <linux/jump_label.h> #include <asm/ppc_asm.h> #include <asm/hvcall.h> #include <asm/asm-offsets.h> #include <asm/opal.h> -#include <asm/jump_label.h> .section ".text" diff --git a/arch/powerpc/platforms/powernv/pci.c b/arch/powerpc/platforms/powernv/pci.c index e69142f4af08..54323d6b5166 100644 --- a/arch/powerpc/platforms/powernv/pci.c +++ b/arch/powerpc/platforms/powernv/pci.c @@ -836,30 +836,4 @@ void __init pnv_pci_init(void) #endif } -static int tce_iommu_bus_notifier(struct notifier_block *nb, - unsigned long action, void *data) -{ - struct device *dev = data; - - switch (action) { - case BUS_NOTIFY_ADD_DEVICE: - return iommu_add_device(dev); - case BUS_NOTIFY_DEL_DEVICE: - if (dev->iommu_group) - iommu_del_device(dev); - return 0; - default: - return 0; - } -} - -static struct notifier_block tce_iommu_bus_nb = { - .notifier_call = tce_iommu_bus_notifier, -}; - -static int __init tce_iommu_bus_notifier_init(void) -{ - bus_register_notifier(&pci_bus_type, &tce_iommu_bus_nb); - return 0; -} machine_subsys_initcall_sync(powernv, tce_iommu_bus_notifier_init); diff --git a/arch/powerpc/platforms/powernv/smp.c b/arch/powerpc/platforms/powernv/smp.c index fc34025ef822..38a45088f633 100644 --- a/arch/powerpc/platforms/powernv/smp.c +++ b/arch/powerpc/platforms/powernv/smp.c @@ -33,6 +33,8 @@ #include <asm/runlatch.h> #include <asm/code-patching.h> #include <asm/dbell.h> +#include <asm/kvm_ppc.h> +#include <asm/ppc-opcode.h> #include "powernv.h" @@ -149,7 +151,7 @@ static int pnv_smp_cpu_disable(void) static void pnv_smp_cpu_kill_self(void) { unsigned int cpu; - unsigned long srr1; + unsigned long srr1, wmask; u32 idle_states; /* Standard hot unplug procedure */ @@ -161,6 +163,10 @@ static void pnv_smp_cpu_kill_self(void) generic_set_cpu_dead(cpu); smp_wmb(); + wmask = SRR1_WAKEMASK; + if (cpu_has_feature(CPU_FTR_ARCH_207S)) + wmask = SRR1_WAKEMASK_P8; + idle_states = pnv_get_supported_cpuidle_states(); /* We don't want to take decrementer interrupts while we are offline, * so clear LPCR:PECE1. We keep PECE2 enabled. @@ -191,10 +197,14 @@ static void pnv_smp_cpu_kill_self(void) * having finished executing in a KVM guest, then srr1 * contains 0. */ - if ((srr1 & SRR1_WAKEMASK) == SRR1_WAKEEE) { + if ((srr1 & wmask) == SRR1_WAKEEE) { icp_native_flush_interrupt(); local_paca->irq_happened &= PACA_IRQ_HARD_DIS; smp_mb(); + } else if ((srr1 & wmask) == SRR1_WAKEHDBELL) { + unsigned long msg = PPC_DBELL_TYPE(PPC_DBELL_SERVER); + asm volatile(PPC_MSGCLR(%0) : : "r" (msg)); + kvmppc_set_host_ipi(cpu, 0); } if (cpu_core_split_required()) diff --git a/arch/powerpc/platforms/pseries/hvCall.S b/arch/powerpc/platforms/pseries/hvCall.S index ccd53f91e8aa..74b5b8e239c8 100644 --- a/arch/powerpc/platforms/pseries/hvCall.S +++ b/arch/powerpc/platforms/pseries/hvCall.S @@ -7,12 +7,12 @@ * as published by the Free Software Foundation; either version * 2 of the License, or (at your option) any later version. */ +#include <linux/jump_label.h> #include <asm/hvcall.h> #include <asm/processor.h> #include <asm/ppc_asm.h> #include <asm/asm-offsets.h> #include <asm/ptrace.h> -#include <asm/jump_label.h> .section ".text" diff --git a/arch/powerpc/platforms/pseries/iommu.c b/arch/powerpc/platforms/pseries/iommu.c index 1d3d52dc3ff3..7803a19adb31 100644 --- a/arch/powerpc/platforms/pseries/iommu.c +++ b/arch/powerpc/platforms/pseries/iommu.c @@ -1340,3 +1340,5 @@ static int __init disable_multitce(char *str) } __setup("multitce=", disable_multitce); + +machine_subsys_initcall_sync(pseries, tce_iommu_bus_notifier_init); diff --git a/arch/powerpc/platforms/pseries/lpar.c b/arch/powerpc/platforms/pseries/lpar.c index b5682fd6c984..b7a67e3d2201 100644 --- a/arch/powerpc/platforms/pseries/lpar.c +++ b/arch/powerpc/platforms/pseries/lpar.c @@ -26,7 +26,7 @@ #include <linux/dma-mapping.h> #include <linux/console.h> #include <linux/export.h> -#include <linux/static_key.h> +#include <linux/jump_label.h> #include <asm/processor.h> #include <asm/mmu.h> #include <asm/page.h> diff --git a/arch/powerpc/platforms/pseries/mobility.c b/arch/powerpc/platforms/pseries/mobility.c index 90cf3dcbd9f2..8f35d525cede 100644 --- a/arch/powerpc/platforms/pseries/mobility.c +++ b/arch/powerpc/platforms/pseries/mobility.c @@ -25,10 +25,10 @@ static struct kobject *mobility_kobj; struct update_props_workarea { - u32 phandle; - u32 state; - u64 reserved; - u32 nprops; + __be32 phandle; + __be32 state; + __be64 reserved; + __be32 nprops; } __packed; #define NODE_ACTION_MASK 0xff000000 @@ -54,11 +54,11 @@ static int mobility_rtas_call(int token, char *buf, s32 scope) return rc; } -static int delete_dt_node(u32 phandle) +static int delete_dt_node(__be32 phandle) { struct device_node *dn; - dn = of_find_node_by_phandle(phandle); + dn = of_find_node_by_phandle(be32_to_cpu(phandle)); if (!dn) return -ENOENT; @@ -127,7 +127,7 @@ static int update_dt_property(struct device_node *dn, struct property **prop, return 0; } -static int update_dt_node(u32 phandle, s32 scope) +static int update_dt_node(__be32 phandle, s32 scope) { struct update_props_workarea *upwa; struct device_node *dn; @@ -136,6 +136,7 @@ static int update_dt_node(u32 phandle, s32 scope) char *prop_data; char *rtas_buf; int update_properties_token; + u32 nprops; u32 vd; update_properties_token = rtas_token("ibm,update-properties"); @@ -146,7 +147,7 @@ static int update_dt_node(u32 phandle, s32 scope) if (!rtas_buf) return -ENOMEM; - dn = of_find_node_by_phandle(phandle); + dn = of_find_node_by_phandle(be32_to_cpu(phandle)); if (!dn) { kfree(rtas_buf); return -ENOENT; @@ -162,6 +163,7 @@ static int update_dt_node(u32 phandle, s32 scope) break; prop_data = rtas_buf + sizeof(*upwa); + nprops = be32_to_cpu(upwa->nprops); /* On the first call to ibm,update-properties for a node the * the first property value descriptor contains an empty @@ -170,17 +172,17 @@ static int update_dt_node(u32 phandle, s32 scope) */ if (*prop_data == 0) { prop_data++; - vd = *(u32 *)prop_data; + vd = be32_to_cpu(*(__be32 *)prop_data); prop_data += vd + sizeof(vd); - upwa->nprops--; + nprops--; } - for (i = 0; i < upwa->nprops; i++) { + for (i = 0; i < nprops; i++) { char *prop_name; prop_name = prop_data; prop_data += strlen(prop_name) + 1; - vd = *(u32 *)prop_data; + vd = be32_to_cpu(*(__be32 *)prop_data); prop_data += sizeof(vd); switch (vd) { @@ -212,13 +214,13 @@ static int update_dt_node(u32 phandle, s32 scope) return 0; } -static int add_dt_node(u32 parent_phandle, u32 drc_index) +static int add_dt_node(__be32 parent_phandle, __be32 drc_index) { struct device_node *dn; struct device_node *parent_dn; int rc; - parent_dn = of_find_node_by_phandle(parent_phandle); + parent_dn = of_find_node_by_phandle(be32_to_cpu(parent_phandle)); if (!parent_dn) return -ENOENT; @@ -237,7 +239,7 @@ static int add_dt_node(u32 parent_phandle, u32 drc_index) int pseries_devicetree_update(s32 scope) { char *rtas_buf; - u32 *data; + __be32 *data; int update_nodes_token; int rc; @@ -254,17 +256,17 @@ int pseries_devicetree_update(s32 scope) if (rc && rc != 1) break; - data = (u32 *)rtas_buf + 4; - while (*data & NODE_ACTION_MASK) { + data = (__be32 *)rtas_buf + 4; + while (be32_to_cpu(*data) & NODE_ACTION_MASK) { int i; - u32 action = *data & NODE_ACTION_MASK; - int node_count = *data & NODE_COUNT_MASK; + u32 action = be32_to_cpu(*data) & NODE_ACTION_MASK; + u32 node_count = be32_to_cpu(*data) & NODE_COUNT_MASK; data++; for (i = 0; i < node_count; i++) { - u32 phandle = *data++; - u32 drc_index; + __be32 phandle = *data++; + __be32 drc_index; switch (action) { case DELETE_DT_NODE: diff --git a/arch/s390/include/asm/elf.h b/arch/s390/include/asm/elf.h index c9df40b5c0ac..c9c875d9ed31 100644 --- a/arch/s390/include/asm/elf.h +++ b/arch/s390/include/asm/elf.h @@ -211,7 +211,7 @@ do { \ extern unsigned long mmap_rnd_mask; -#define STACK_RND_MASK (mmap_rnd_mask) +#define STACK_RND_MASK (test_thread_flag(TIF_31BIT) ? 0x7ff : mmap_rnd_mask) #define ARCH_DLINFO \ do { \ diff --git a/arch/s390/include/asm/jump_label.h b/arch/s390/include/asm/jump_label.h index 58642fd29c87..2b77e235b5fb 100644 --- a/arch/s390/include/asm/jump_label.h +++ b/arch/s390/include/asm/jump_label.h @@ -1,6 +1,8 @@ #ifndef _ASM_S390_JUMP_LABEL_H #define _ASM_S390_JUMP_LABEL_H +#ifndef __ASSEMBLY__ + #include <linux/types.h> #define JUMP_LABEL_NOP_SIZE 6 @@ -39,4 +41,5 @@ struct jump_entry { jump_label_t key; }; +#endif /* __ASSEMBLY__ */ #endif diff --git a/arch/s390/include/asm/kvm_host.h b/arch/s390/include/asm/kvm_host.h index d84559e31f32..d01fc588b5c3 100644 --- a/arch/s390/include/asm/kvm_host.h +++ b/arch/s390/include/asm/kvm_host.h @@ -172,7 +172,9 @@ struct kvm_s390_sie_block { __u32 fac; /* 0x01a0 */ __u8 reserved1a4[20]; /* 0x01a4 */ __u64 cbrlo; /* 0x01b8 */ - __u8 reserved1c0[30]; /* 0x01c0 */ + __u8 reserved1c0[8]; /* 0x01c0 */ + __u32 ecd; /* 0x01c8 */ + __u8 reserved1cc[18]; /* 0x01cc */ __u64 pp; /* 0x01de */ __u8 reserved1e6[2]; /* 0x01e6 */ __u64 itdba; /* 0x01e8 */ @@ -183,11 +185,17 @@ struct kvm_s390_itdb { __u8 data[256]; } __packed; +struct kvm_s390_vregs { + __vector128 vrs[32]; + __u8 reserved200[512]; /* for future vector expansion */ +} __packed; + struct sie_page { struct kvm_s390_sie_block sie_block; __u8 reserved200[1024]; /* 0x0200 */ struct kvm_s390_itdb itdb; /* 0x0600 */ - __u8 reserved700[2304]; /* 0x0700 */ + __u8 reserved700[1280]; /* 0x0700 */ + struct kvm_s390_vregs vregs; /* 0x0c00 */ } __packed; struct kvm_vcpu_stat { @@ -238,6 +246,7 @@ struct kvm_vcpu_stat { u32 instruction_sigp_stop; u32 instruction_sigp_stop_store_status; u32 instruction_sigp_store_status; + u32 instruction_sigp_store_adtl_status; u32 instruction_sigp_arch; u32 instruction_sigp_prefix; u32 instruction_sigp_restart; @@ -270,6 +279,7 @@ struct kvm_vcpu_stat { #define PGM_SPECIAL_OPERATION 0x13 #define PGM_OPERAND 0x15 #define PGM_TRACE_TABEL 0x16 +#define PGM_VECTOR_PROCESSING 0x1b #define PGM_SPACE_SWITCH 0x1c #define PGM_HFP_SQUARE_ROOT 0x1d #define PGM_PC_TRANSLATION_SPEC 0x1f @@ -334,6 +344,11 @@ enum irq_types { IRQ_PEND_COUNT }; +/* We have 2M for virtio device descriptor pages. Smallest amount of + * memory per page is 24 bytes (1 queue), so (2048*1024) / 24 = 87381 + */ +#define KVM_S390_MAX_VIRTIO_IRQS 87381 + /* * Repressible (non-floating) machine check interrupts * subclass bits in MCIC @@ -411,13 +426,32 @@ struct kvm_s390_local_interrupt { unsigned long pending_irqs; }; +#define FIRQ_LIST_IO_ISC_0 0 +#define FIRQ_LIST_IO_ISC_1 1 +#define FIRQ_LIST_IO_ISC_2 2 +#define FIRQ_LIST_IO_ISC_3 3 +#define FIRQ_LIST_IO_ISC_4 4 +#define FIRQ_LIST_IO_ISC_5 5 +#define FIRQ_LIST_IO_ISC_6 6 +#define FIRQ_LIST_IO_ISC_7 7 +#define FIRQ_LIST_PFAULT 8 +#define FIRQ_LIST_VIRTIO 9 +#define FIRQ_LIST_COUNT 10 +#define FIRQ_CNTR_IO 0 +#define FIRQ_CNTR_SERVICE 1 +#define FIRQ_CNTR_VIRTIO 2 +#define FIRQ_CNTR_PFAULT 3 +#define FIRQ_MAX_COUNT 4 + struct kvm_s390_float_interrupt { + unsigned long pending_irqs; spinlock_t lock; - struct list_head list; - atomic_t active; + struct list_head lists[FIRQ_LIST_COUNT]; + int counters[FIRQ_MAX_COUNT]; + struct kvm_s390_mchk_info mchk; + struct kvm_s390_ext_info srv_signal; int next_rr_cpu; unsigned long idle_mask[BITS_TO_LONGS(KVM_MAX_VCPUS)]; - unsigned int irq_count; }; struct kvm_hw_wp_info_arch { @@ -465,6 +499,7 @@ struct kvm_vcpu_arch { s390_fp_regs host_fpregs; unsigned int host_acrs[NUM_ACRS]; s390_fp_regs guest_fpregs; + struct kvm_s390_vregs *host_vregs; struct kvm_s390_local_interrupt local_int; struct hrtimer ckc_timer; struct kvm_s390_pgm_info pgm; @@ -515,15 +550,15 @@ struct s390_io_adapter { #define S390_ARCH_FAC_MASK_SIZE_U64 \ (S390_ARCH_FAC_MASK_SIZE_BYTE / sizeof(u64)) -struct s390_model_fac { - /* facilities used in SIE context */ - __u64 sie[S390_ARCH_FAC_LIST_SIZE_U64]; - /* subset enabled by kvm */ - __u64 kvm[S390_ARCH_FAC_LIST_SIZE_U64]; +struct kvm_s390_fac { + /* facility list requested by guest */ + __u64 list[S390_ARCH_FAC_LIST_SIZE_U64]; + /* facility mask supported by kvm & hosting machine */ + __u64 mask[S390_ARCH_FAC_LIST_SIZE_U64]; }; struct kvm_s390_cpu_model { - struct s390_model_fac *fac; + struct kvm_s390_fac *fac; struct cpuid cpu_id; unsigned short ibc; }; @@ -553,6 +588,7 @@ struct kvm_arch{ int use_cmma; int user_cpu_state_ctrl; int user_sigp; + int user_stsi; struct s390_io_adapter *adapters[MAX_S390_IO_ADAPTERS]; wait_queue_head_t ipte_wq; int ipte_lock_count; diff --git a/arch/s390/include/asm/mmu_context.h b/arch/s390/include/asm/mmu_context.h index f49b71954654..8fb3802f8fad 100644 --- a/arch/s390/include/asm/mmu_context.h +++ b/arch/s390/include/asm/mmu_context.h @@ -62,6 +62,7 @@ static inline void switch_mm(struct mm_struct *prev, struct mm_struct *next, { int cpu = smp_processor_id(); + S390_lowcore.user_asce = next->context.asce_bits | __pa(next->pgd); if (prev == next) return; if (MACHINE_HAS_TLB_LC) @@ -73,7 +74,6 @@ static inline void switch_mm(struct mm_struct *prev, struct mm_struct *next, atomic_dec(&prev->context.attach_count); if (MACHINE_HAS_TLB_LC) cpumask_clear_cpu(cpu, &prev->context.cpu_attach_mask); - S390_lowcore.user_asce = next->context.asce_bits | __pa(next->pgd); } #define finish_arch_post_lock_switch finish_arch_post_lock_switch diff --git a/arch/s390/include/asm/page.h b/arch/s390/include/asm/page.h index 7b2ac6e44166..53eacbd4f09b 100644 --- a/arch/s390/include/asm/page.h +++ b/arch/s390/include/asm/page.h @@ -37,16 +37,7 @@ static inline void storage_key_init_range(unsigned long start, unsigned long end #endif } -static inline void clear_page(void *page) -{ - register unsigned long reg1 asm ("1") = 0; - register void *reg2 asm ("2") = page; - register unsigned long reg3 asm ("3") = 4096; - asm volatile( - " mvcl 2,0" - : "+d" (reg2), "+d" (reg3) : "d" (reg1) - : "memory", "cc"); -} +#define clear_page(page) memset((page), 0, PAGE_SIZE) /* * copy_page uses the mvcl instruction with 0xb0 padding byte in order to diff --git a/arch/s390/include/uapi/asm/kvm.h b/arch/s390/include/uapi/asm/kvm.h index 9c77e60b9a26..ef1a5fcc6c66 100644 --- a/arch/s390/include/uapi/asm/kvm.h +++ b/arch/s390/include/uapi/asm/kvm.h @@ -150,6 +150,7 @@ struct kvm_guest_debug_arch { #define KVM_SYNC_CRS (1UL << 3) #define KVM_SYNC_ARCH0 (1UL << 4) #define KVM_SYNC_PFAULT (1UL << 5) +#define KVM_SYNC_VRS (1UL << 6) /* definition of registers in kvm_run */ struct kvm_sync_regs { __u64 prefix; /* prefix register */ @@ -164,6 +165,9 @@ struct kvm_sync_regs { __u64 pft; /* pfault token [PFAULT] */ __u64 pfs; /* pfault select [PFAULT] */ __u64 pfc; /* pfault compare [PFAULT] */ + __u64 vrs[32][2]; /* vector registers */ + __u8 reserved[512]; /* for future vector expansion */ + __u32 fpc; /* only valid with vector registers */ }; #define KVM_REG_S390_TODPR (KVM_REG_S390 | KVM_REG_SIZE_U32 | 0x1) diff --git a/arch/s390/include/uapi/asm/sie.h b/arch/s390/include/uapi/asm/sie.h index d4096fdfc6ab..ee69c0854c88 100644 --- a/arch/s390/include/uapi/asm/sie.h +++ b/arch/s390/include/uapi/asm/sie.h @@ -230,7 +230,7 @@ * and returns a key, which can be used to find a mnemonic name * of the instruction in the icpt_insn_codes table. */ -#define icpt_insn_decoder(insn) \ +#define icpt_insn_decoder(insn) ( \ INSN_DECODE_IPA0(0x01, insn, 48, 0xff) \ INSN_DECODE_IPA0(0xaa, insn, 48, 0x0f) \ INSN_DECODE_IPA0(0xb2, insn, 48, 0xff) \ @@ -239,6 +239,6 @@ INSN_DECODE_IPA0(0xe5, insn, 48, 0xff) \ INSN_DECODE_IPA0(0xeb, insn, 16, 0xff) \ INSN_DECODE_IPA0(0xc8, insn, 48, 0x0f) \ - INSN_DECODE(insn) + INSN_DECODE(insn)) #endif /* _UAPI_ASM_S390_SIE_H */ diff --git a/arch/s390/kernel/asm-offsets.c b/arch/s390/kernel/asm-offsets.c index e07e91605353..8dc4db10d160 100644 --- a/arch/s390/kernel/asm-offsets.c +++ b/arch/s390/kernel/asm-offsets.c @@ -171,6 +171,7 @@ int main(void) #else /* CONFIG_32BIT */ DEFINE(__LC_DATA_EXC_CODE, offsetof(struct _lowcore, data_exc_code)); DEFINE(__LC_MCCK_FAIL_STOR_ADDR, offsetof(struct _lowcore, failing_storage_address)); + DEFINE(__LC_VX_SAVE_AREA_ADDR, offsetof(struct _lowcore, vector_save_area_addr)); DEFINE(__LC_EXT_PARAMS2, offsetof(struct _lowcore, ext_params2)); DEFINE(SAVE_AREA_BASE, offsetof(struct _lowcore, floating_pt_save_area)); DEFINE(__LC_PASTE, offsetof(struct _lowcore, paste)); diff --git a/arch/s390/kernel/ftrace.c b/arch/s390/kernel/ftrace.c index 82c19899574f..6c79f1b44fe7 100644 --- a/arch/s390/kernel/ftrace.c +++ b/arch/s390/kernel/ftrace.c @@ -57,6 +57,44 @@ unsigned long ftrace_plt; +static inline void ftrace_generate_orig_insn(struct ftrace_insn *insn) +{ +#ifdef CC_USING_HOTPATCH + /* brcl 0,0 */ + insn->opc = 0xc004; + insn->disp = 0; +#else + /* stg r14,8(r15) */ + insn->opc = 0xe3e0; + insn->disp = 0xf0080024; +#endif +} + +static inline int is_kprobe_on_ftrace(struct ftrace_insn *insn) +{ +#ifdef CONFIG_KPROBES + if (insn->opc == BREAKPOINT_INSTRUCTION) + return 1; +#endif + return 0; +} + +static inline void ftrace_generate_kprobe_nop_insn(struct ftrace_insn *insn) +{ +#ifdef CONFIG_KPROBES + insn->opc = BREAKPOINT_INSTRUCTION; + insn->disp = KPROBE_ON_FTRACE_NOP; +#endif +} + +static inline void ftrace_generate_kprobe_call_insn(struct ftrace_insn *insn) +{ +#ifdef CONFIG_KPROBES + insn->opc = BREAKPOINT_INSTRUCTION; + insn->disp = KPROBE_ON_FTRACE_CALL; +#endif +} + int ftrace_modify_call(struct dyn_ftrace *rec, unsigned long old_addr, unsigned long addr) { @@ -72,16 +110,9 @@ int ftrace_make_nop(struct module *mod, struct dyn_ftrace *rec, return -EFAULT; if (addr == MCOUNT_ADDR) { /* Initial code replacement */ -#ifdef CC_USING_HOTPATCH - /* We expect to see brcl 0,0 */ - ftrace_generate_nop_insn(&orig); -#else - /* We expect to see stg r14,8(r15) */ - orig.opc = 0xe3e0; - orig.disp = 0xf0080024; -#endif + ftrace_generate_orig_insn(&orig); ftrace_generate_nop_insn(&new); - } else if (old.opc == BREAKPOINT_INSTRUCTION) { + } else if (is_kprobe_on_ftrace(&old)) { /* * If we find a breakpoint instruction, a kprobe has been * placed at the beginning of the function. We write the @@ -89,9 +120,8 @@ int ftrace_make_nop(struct module *mod, struct dyn_ftrace *rec, * bytes of the original instruction so that the kprobes * handler can execute a nop, if it reaches this breakpoint. */ - new.opc = orig.opc = BREAKPOINT_INSTRUCTION; - orig.disp = KPROBE_ON_FTRACE_CALL; - new.disp = KPROBE_ON_FTRACE_NOP; + ftrace_generate_kprobe_call_insn(&orig); + ftrace_generate_kprobe_nop_insn(&new); } else { /* Replace ftrace call with a nop. */ ftrace_generate_call_insn(&orig, rec->ip); @@ -111,7 +141,7 @@ int ftrace_make_call(struct dyn_ftrace *rec, unsigned long addr) if (probe_kernel_read(&old, (void *) rec->ip, sizeof(old))) return -EFAULT; - if (old.opc == BREAKPOINT_INSTRUCTION) { + if (is_kprobe_on_ftrace(&old)) { /* * If we find a breakpoint instruction, a kprobe has been * placed at the beginning of the function. We write the @@ -119,9 +149,8 @@ int ftrace_make_call(struct dyn_ftrace *rec, unsigned long addr) * bytes of the original instruction so that the kprobes * handler can execute a brasl if it reaches this breakpoint. */ - new.opc = orig.opc = BREAKPOINT_INSTRUCTION; - orig.disp = KPROBE_ON_FTRACE_NOP; - new.disp = KPROBE_ON_FTRACE_CALL; + ftrace_generate_kprobe_nop_insn(&orig); + ftrace_generate_kprobe_call_insn(&new); } else { /* Replace nop with an ftrace call. */ ftrace_generate_nop_insn(&orig); diff --git a/arch/s390/kernel/jump_label.c b/arch/s390/kernel/jump_label.c index cb2d51e779df..830066f936c8 100644 --- a/arch/s390/kernel/jump_label.c +++ b/arch/s390/kernel/jump_label.c @@ -36,16 +36,20 @@ static void jump_label_make_branch(struct jump_entry *entry, struct insn *insn) insn->offset = (entry->target - entry->code) >> 1; } -static void jump_label_bug(struct jump_entry *entry, struct insn *insn) +static void jump_label_bug(struct jump_entry *entry, struct insn *expected, + struct insn *new) { unsigned char *ipc = (unsigned char *)entry->code; - unsigned char *ipe = (unsigned char *)insn; + unsigned char *ipe = (unsigned char *)expected; + unsigned char *ipn = (unsigned char *)new; pr_emerg("Jump label code mismatch at %pS [%p]\n", ipc, ipc); pr_emerg("Found: %02x %02x %02x %02x %02x %02x\n", ipc[0], ipc[1], ipc[2], ipc[3], ipc[4], ipc[5]); pr_emerg("Expected: %02x %02x %02x %02x %02x %02x\n", ipe[0], ipe[1], ipe[2], ipe[3], ipe[4], ipe[5]); + pr_emerg("New: %02x %02x %02x %02x %02x %02x\n", + ipn[0], ipn[1], ipn[2], ipn[3], ipn[4], ipn[5]); panic("Corrupted kernel text"); } @@ -69,10 +73,10 @@ static void __jump_label_transform(struct jump_entry *entry, } if (init) { if (memcmp((void *)entry->code, &orignop, sizeof(orignop))) - jump_label_bug(entry, &old); + jump_label_bug(entry, &orignop, &new); } else { if (memcmp((void *)entry->code, &old, sizeof(old))) - jump_label_bug(entry, &old); + jump_label_bug(entry, &old, &new); } probe_kernel_write((void *)entry->code, &new, sizeof(new)); } diff --git a/arch/s390/kernel/module.c b/arch/s390/kernel/module.c index 36154a2f1814..2ca95862e336 100644 --- a/arch/s390/kernel/module.c +++ b/arch/s390/kernel/module.c @@ -436,6 +436,7 @@ int module_finalize(const Elf_Ehdr *hdr, const Elf_Shdr *sechdrs, struct module *me) { + jump_label_apply_nops(me); vfree(me->arch.syminfo); me->arch.syminfo = NULL; return 0; diff --git a/arch/s390/kernel/perf_cpum_sf.c b/arch/s390/kernel/perf_cpum_sf.c index c3f8d157cb0d..e6a1578fc000 100644 --- a/arch/s390/kernel/perf_cpum_sf.c +++ b/arch/s390/kernel/perf_cpum_sf.c @@ -1415,7 +1415,7 @@ CPUMF_EVENT_ATTR(SF, SF_CYCLES_BASIC_DIAG, PERF_EVENT_CPUM_SF_DIAG); static struct attribute *cpumsf_pmu_events_attr[] = { CPUMF_EVENT_PTR(SF, SF_CYCLES_BASIC), - CPUMF_EVENT_PTR(SF, SF_CYCLES_BASIC_DIAG), + NULL, NULL, }; @@ -1606,8 +1606,11 @@ static int __init init_cpum_sampling_pmu(void) return -EINVAL; } - if (si.ad) + if (si.ad) { sfb_set_limits(CPUM_SF_MIN_SDB, CPUM_SF_MAX_SDB); + cpumsf_pmu_events_attr[1] = + CPUMF_EVENT_PTR(SF, SF_CYCLES_BASIC_DIAG); + } sfdbg = debug_register(KMSG_COMPONENT, 2, 1, 80); if (!sfdbg) diff --git a/arch/s390/kernel/processor.c b/arch/s390/kernel/processor.c index 26108232fcaa..dc488e13b7e3 100644 --- a/arch/s390/kernel/processor.c +++ b/arch/s390/kernel/processor.c @@ -18,7 +18,7 @@ static DEFINE_PER_CPU(struct cpuid, cpu_id); -void cpu_relax(void) +void notrace cpu_relax(void) { if (!smp_cpu_mtid && MACHINE_HAS_DIAG44) asm volatile("diag 0,0,0x44"); diff --git a/arch/s390/kernel/swsusp_asm64.S b/arch/s390/kernel/swsusp_asm64.S index 6b09fdffbd2f..ca6294645dd3 100644 --- a/arch/s390/kernel/swsusp_asm64.S +++ b/arch/s390/kernel/swsusp_asm64.S @@ -177,6 +177,17 @@ restart_entry: lhi %r1,1 sigp %r1,%r0,SIGP_SET_ARCHITECTURE sam64 +#ifdef CONFIG_SMP + larl %r1,smp_cpu_mt_shift + icm %r1,15,0(%r1) + jz smt_done + llgfr %r1,%r1 +smt_loop: + sigp %r1,%r0,SIGP_SET_MULTI_THREADING + brc 8,smt_done /* accepted */ + brc 2,smt_loop /* busy, try again */ +smt_done: +#endif larl %r1,.Lnew_pgm_check_psw lpswe 0(%r1) pgm_check_entry: diff --git a/arch/s390/kernel/time.c b/arch/s390/kernel/time.c index 20660dddb2d6..170ddd2018b3 100644 --- a/arch/s390/kernel/time.c +++ b/arch/s390/kernel/time.c @@ -215,20 +215,20 @@ void update_vsyscall(struct timekeeper *tk) { u64 nsecps; - if (tk->tkr.clock != &clocksource_tod) + if (tk->tkr_mono.clock != &clocksource_tod) return; /* Make userspace gettimeofday spin until we're done. */ ++vdso_data->tb_update_count; smp_wmb(); - vdso_data->xtime_tod_stamp = tk->tkr.cycle_last; + vdso_data->xtime_tod_stamp = tk->tkr_mono.cycle_last; vdso_data->xtime_clock_sec = tk->xtime_sec; - vdso_data->xtime_clock_nsec = tk->tkr.xtime_nsec; + vdso_data->xtime_clock_nsec = tk->tkr_mono.xtime_nsec; vdso_data->wtom_clock_sec = tk->xtime_sec + tk->wall_to_monotonic.tv_sec; - vdso_data->wtom_clock_nsec = tk->tkr.xtime_nsec + - + ((u64) tk->wall_to_monotonic.tv_nsec << tk->tkr.shift); - nsecps = (u64) NSEC_PER_SEC << tk->tkr.shift; + vdso_data->wtom_clock_nsec = tk->tkr_mono.xtime_nsec + + + ((u64) tk->wall_to_monotonic.tv_nsec << tk->tkr_mono.shift); + nsecps = (u64) NSEC_PER_SEC << tk->tkr_mono.shift; while (vdso_data->wtom_clock_nsec >= nsecps) { vdso_data->wtom_clock_nsec -= nsecps; vdso_data->wtom_clock_sec++; @@ -236,7 +236,7 @@ void update_vsyscall(struct timekeeper *tk) vdso_data->xtime_coarse_sec = tk->xtime_sec; vdso_data->xtime_coarse_nsec = - (long)(tk->tkr.xtime_nsec >> tk->tkr.shift); + (long)(tk->tkr_mono.xtime_nsec >> tk->tkr_mono.shift); vdso_data->wtom_coarse_sec = vdso_data->xtime_coarse_sec + tk->wall_to_monotonic.tv_sec; vdso_data->wtom_coarse_nsec = @@ -246,8 +246,8 @@ void update_vsyscall(struct timekeeper *tk) vdso_data->wtom_coarse_sec++; } - vdso_data->tk_mult = tk->tkr.mult; - vdso_data->tk_shift = tk->tkr.shift; + vdso_data->tk_mult = tk->tkr_mono.mult; + vdso_data->tk_shift = tk->tkr_mono.shift; smp_wmb(); ++vdso_data->tb_update_count; } @@ -283,7 +283,7 @@ void __init time_init(void) if (register_external_irq(EXT_IRQ_TIMING_ALERT, timing_alert_interrupt)) panic("Couldn't request external interrupt 0x1406"); - if (clocksource_register(&clocksource_tod) != 0) + if (__clocksource_register(&clocksource_tod) != 0) panic("Could not register TOD clock source"); /* Enable TOD clock interrupts on the boot cpu. */ diff --git a/arch/s390/kvm/diag.c b/arch/s390/kvm/diag.c index 9254afff250c..fc7ec95848c3 100644 --- a/arch/s390/kvm/diag.c +++ b/arch/s390/kvm/diag.c @@ -77,7 +77,7 @@ static int __diag_page_ref_service(struct kvm_vcpu *vcpu) if (vcpu->run->s.regs.gprs[rx] & 7) return kvm_s390_inject_program_int(vcpu, PGM_SPECIFICATION); - rc = read_guest(vcpu, vcpu->run->s.regs.gprs[rx], &parm, sizeof(parm)); + rc = read_guest(vcpu, vcpu->run->s.regs.gprs[rx], rx, &parm, sizeof(parm)); if (rc) return kvm_s390_inject_prog_cond(vcpu, rc); if (parm.parm_version != 2 || parm.parm_len < 5 || parm.code != 0x258) @@ -213,7 +213,7 @@ static int __diag_virtio_hypercall(struct kvm_vcpu *vcpu) * - gpr 3 contains the virtqueue index (passed as datamatch) * - gpr 4 contains the index on the bus (optionally) */ - ret = kvm_io_bus_write_cookie(vcpu->kvm, KVM_VIRTIO_CCW_NOTIFY_BUS, + ret = kvm_io_bus_write_cookie(vcpu, KVM_VIRTIO_CCW_NOTIFY_BUS, vcpu->run->s.regs.gprs[2] & 0xffffffff, 8, &vcpu->run->s.regs.gprs[3], vcpu->run->s.regs.gprs[4]); @@ -230,7 +230,7 @@ static int __diag_virtio_hypercall(struct kvm_vcpu *vcpu) int kvm_s390_handle_diag(struct kvm_vcpu *vcpu) { - int code = kvm_s390_get_base_disp_rs(vcpu) & 0xffff; + int code = kvm_s390_get_base_disp_rs(vcpu, NULL) & 0xffff; if (vcpu->arch.sie_block->gpsw.mask & PSW_MASK_PSTATE) return kvm_s390_inject_program_int(vcpu, PGM_PRIVILEGED_OP); diff --git a/arch/s390/kvm/gaccess.c b/arch/s390/kvm/gaccess.c index 267523cac6de..a7559f7207df 100644 --- a/arch/s390/kvm/gaccess.c +++ b/arch/s390/kvm/gaccess.c @@ -10,6 +10,7 @@ #include <asm/pgtable.h> #include "kvm-s390.h" #include "gaccess.h" +#include <asm/switch_to.h> union asce { unsigned long val; @@ -207,6 +208,54 @@ union raddress { unsigned long pfra : 52; /* Page-Frame Real Address */ }; +union alet { + u32 val; + struct { + u32 reserved : 7; + u32 p : 1; + u32 alesn : 8; + u32 alen : 16; + }; +}; + +union ald { + u32 val; + struct { + u32 : 1; + u32 alo : 24; + u32 all : 7; + }; +}; + +struct ale { + unsigned long i : 1; /* ALEN-Invalid Bit */ + unsigned long : 5; + unsigned long fo : 1; /* Fetch-Only Bit */ + unsigned long p : 1; /* Private Bit */ + unsigned long alesn : 8; /* Access-List-Entry Sequence Number */ + unsigned long aleax : 16; /* Access-List-Entry Authorization Index */ + unsigned long : 32; + unsigned long : 1; + unsigned long asteo : 25; /* ASN-Second-Table-Entry Origin */ + unsigned long : 6; + unsigned long astesn : 32; /* ASTE Sequence Number */ +} __packed; + +struct aste { + unsigned long i : 1; /* ASX-Invalid Bit */ + unsigned long ato : 29; /* Authority-Table Origin */ + unsigned long : 1; + unsigned long b : 1; /* Base-Space Bit */ + unsigned long ax : 16; /* Authorization Index */ + unsigned long atl : 12; /* Authority-Table Length */ + unsigned long : 2; + unsigned long ca : 1; /* Controlled-ASN Bit */ + unsigned long ra : 1; /* Reusable-ASN Bit */ + unsigned long asce : 64; /* Address-Space-Control Element */ + unsigned long ald : 32; + unsigned long astesn : 32; + /* .. more fields there */ +} __packed; int ipte_lock_held(struct kvm_vcpu *vcpu) { @@ -307,15 +356,157 @@ void ipte_unlock(struct kvm_vcpu *vcpu) ipte_unlock_simple(vcpu); } -static unsigned long get_vcpu_asce(struct kvm_vcpu *vcpu) +static int ar_translation(struct kvm_vcpu *vcpu, union asce *asce, ar_t ar, + int write) +{ + union alet alet; + struct ale ale; + struct aste aste; + unsigned long ald_addr, authority_table_addr; + union ald ald; + int eax, rc; + u8 authority_table; + + if (ar >= NUM_ACRS) + return -EINVAL; + + save_access_regs(vcpu->run->s.regs.acrs); + alet.val = vcpu->run->s.regs.acrs[ar]; + + if (ar == 0 || alet.val == 0) { + asce->val = vcpu->arch.sie_block->gcr[1]; + return 0; + } else if (alet.val == 1) { + asce->val = vcpu->arch.sie_block->gcr[7]; + return 0; + } + + if (alet.reserved) + return PGM_ALET_SPECIFICATION; + + if (alet.p) + ald_addr = vcpu->arch.sie_block->gcr[5]; + else + ald_addr = vcpu->arch.sie_block->gcr[2]; + ald_addr &= 0x7fffffc0; + + rc = read_guest_real(vcpu, ald_addr + 16, &ald.val, sizeof(union ald)); + if (rc) + return rc; + + if (alet.alen / 8 > ald.all) + return PGM_ALEN_TRANSLATION; + + if (0x7fffffff - ald.alo * 128 < alet.alen * 16) + return PGM_ADDRESSING; + + rc = read_guest_real(vcpu, ald.alo * 128 + alet.alen * 16, &ale, + sizeof(struct ale)); + if (rc) + return rc; + + if (ale.i == 1) + return PGM_ALEN_TRANSLATION; + if (ale.alesn != alet.alesn) + return PGM_ALE_SEQUENCE; + + rc = read_guest_real(vcpu, ale.asteo * 64, &aste, sizeof(struct aste)); + if (rc) + return rc; + + if (aste.i) + return PGM_ASTE_VALIDITY; + if (aste.astesn != ale.astesn) + return PGM_ASTE_SEQUENCE; + + if (ale.p == 1) { + eax = (vcpu->arch.sie_block->gcr[8] >> 16) & 0xffff; + if (ale.aleax != eax) { + if (eax / 16 > aste.atl) + return PGM_EXTENDED_AUTHORITY; + + authority_table_addr = aste.ato * 4 + eax / 4; + + rc = read_guest_real(vcpu, authority_table_addr, + &authority_table, + sizeof(u8)); + if (rc) + return rc; + + if ((authority_table & (0x40 >> ((eax & 3) * 2))) == 0) + return PGM_EXTENDED_AUTHORITY; + } + } + + if (ale.fo == 1 && write) + return PGM_PROTECTION; + + asce->val = aste.asce; + return 0; +} + +struct trans_exc_code_bits { + unsigned long addr : 52; /* Translation-exception Address */ + unsigned long fsi : 2; /* Access Exception Fetch/Store Indication */ + unsigned long : 6; + unsigned long b60 : 1; + unsigned long b61 : 1; + unsigned long as : 2; /* ASCE Identifier */ +}; + +enum { + FSI_UNKNOWN = 0, /* Unknown wether fetch or store */ + FSI_STORE = 1, /* Exception was due to store operation */ + FSI_FETCH = 2 /* Exception was due to fetch operation */ +}; + +static int get_vcpu_asce(struct kvm_vcpu *vcpu, union asce *asce, + ar_t ar, int write) { + int rc; + psw_t *psw = &vcpu->arch.sie_block->gpsw; + struct kvm_s390_pgm_info *pgm = &vcpu->arch.pgm; + struct trans_exc_code_bits *tec_bits; + + memset(pgm, 0, sizeof(*pgm)); + tec_bits = (struct trans_exc_code_bits *)&pgm->trans_exc_code; + tec_bits->fsi = write ? FSI_STORE : FSI_FETCH; + tec_bits->as = psw_bits(*psw).as; + + if (!psw_bits(*psw).t) { + asce->val = 0; + asce->r = 1; + return 0; + } + switch (psw_bits(vcpu->arch.sie_block->gpsw).as) { case PSW_AS_PRIMARY: - return vcpu->arch.sie_block->gcr[1]; + asce->val = vcpu->arch.sie_block->gcr[1]; + return 0; case PSW_AS_SECONDARY: - return vcpu->arch.sie_block->gcr[7]; + asce->val = vcpu->arch.sie_block->gcr[7]; + return 0; case PSW_AS_HOME: - return vcpu->arch.sie_block->gcr[13]; + asce->val = vcpu->arch.sie_block->gcr[13]; + return 0; + case PSW_AS_ACCREG: + rc = ar_translation(vcpu, asce, ar, write); + switch (rc) { + case PGM_ALEN_TRANSLATION: + case PGM_ALE_SEQUENCE: + case PGM_ASTE_VALIDITY: + case PGM_ASTE_SEQUENCE: + case PGM_EXTENDED_AUTHORITY: + vcpu->arch.pgm.exc_access_id = ar; + break; + case PGM_PROTECTION: + tec_bits->b60 = 1; + tec_bits->b61 = 1; + break; + } + if (rc > 0) + pgm->code = rc; + return rc; } return 0; } @@ -330,10 +521,11 @@ static int deref_table(struct kvm *kvm, unsigned long gpa, unsigned long *val) * @vcpu: virtual cpu * @gva: guest virtual address * @gpa: points to where guest physical (absolute) address should be stored + * @asce: effective asce * @write: indicates if access is a write access * * Translate a guest virtual address into a guest absolute address by means - * of dynamic address translation as specified by the architecuture. + * of dynamic address translation as specified by the architecture. * If the resulting absolute address is not available in the configuration * an addressing exception is indicated and @gpa will not be changed. * @@ -345,7 +537,8 @@ static int deref_table(struct kvm *kvm, unsigned long gpa, unsigned long *val) * by the architecture */ static unsigned long guest_translate(struct kvm_vcpu *vcpu, unsigned long gva, - unsigned long *gpa, int write) + unsigned long *gpa, const union asce asce, + int write) { union vaddress vaddr = {.addr = gva}; union raddress raddr = {.addr = gva}; @@ -354,12 +547,10 @@ static unsigned long guest_translate(struct kvm_vcpu *vcpu, unsigned long gva, union ctlreg0 ctlreg0; unsigned long ptr; int edat1, edat2; - union asce asce; ctlreg0.val = vcpu->arch.sie_block->gcr[0]; edat1 = ctlreg0.edat && test_kvm_facility(vcpu->kvm, 8); edat2 = edat1 && test_kvm_facility(vcpu->kvm, 78); - asce.val = get_vcpu_asce(vcpu); if (asce.r) goto real_address; ptr = asce.origin * 4096; @@ -506,48 +697,30 @@ static inline int is_low_address(unsigned long ga) return (ga & ~0x11fful) == 0; } -static int low_address_protection_enabled(struct kvm_vcpu *vcpu) +static int low_address_protection_enabled(struct kvm_vcpu *vcpu, + const union asce asce) { union ctlreg0 ctlreg0 = {.val = vcpu->arch.sie_block->gcr[0]}; psw_t *psw = &vcpu->arch.sie_block->gpsw; - union asce asce; if (!ctlreg0.lap) return 0; - asce.val = get_vcpu_asce(vcpu); if (psw_bits(*psw).t && asce.p) return 0; return 1; } -struct trans_exc_code_bits { - unsigned long addr : 52; /* Translation-exception Address */ - unsigned long fsi : 2; /* Access Exception Fetch/Store Indication */ - unsigned long : 7; - unsigned long b61 : 1; - unsigned long as : 2; /* ASCE Identifier */ -}; - -enum { - FSI_UNKNOWN = 0, /* Unknown wether fetch or store */ - FSI_STORE = 1, /* Exception was due to store operation */ - FSI_FETCH = 2 /* Exception was due to fetch operation */ -}; - static int guest_page_range(struct kvm_vcpu *vcpu, unsigned long ga, unsigned long *pages, unsigned long nr_pages, - int write) + const union asce asce, int write) { struct kvm_s390_pgm_info *pgm = &vcpu->arch.pgm; psw_t *psw = &vcpu->arch.sie_block->gpsw; struct trans_exc_code_bits *tec_bits; int lap_enabled, rc; - memset(pgm, 0, sizeof(*pgm)); tec_bits = (struct trans_exc_code_bits *)&pgm->trans_exc_code; - tec_bits->fsi = write ? FSI_STORE : FSI_FETCH; - tec_bits->as = psw_bits(*psw).as; - lap_enabled = low_address_protection_enabled(vcpu); + lap_enabled = low_address_protection_enabled(vcpu, asce); while (nr_pages) { ga = kvm_s390_logical_to_effective(vcpu, ga); tec_bits->addr = ga >> PAGE_SHIFT; @@ -557,7 +730,7 @@ static int guest_page_range(struct kvm_vcpu *vcpu, unsigned long ga, } ga &= PAGE_MASK; if (psw_bits(*psw).t) { - rc = guest_translate(vcpu, ga, pages, write); + rc = guest_translate(vcpu, ga, pages, asce, write); if (rc < 0) return rc; if (rc == PGM_PROTECTION) @@ -578,7 +751,7 @@ static int guest_page_range(struct kvm_vcpu *vcpu, unsigned long ga, return 0; } -int access_guest(struct kvm_vcpu *vcpu, unsigned long ga, void *data, +int access_guest(struct kvm_vcpu *vcpu, unsigned long ga, ar_t ar, void *data, unsigned long len, int write) { psw_t *psw = &vcpu->arch.sie_block->gpsw; @@ -591,20 +764,19 @@ int access_guest(struct kvm_vcpu *vcpu, unsigned long ga, void *data, if (!len) return 0; - /* Access register mode is not supported yet. */ - if (psw_bits(*psw).t && psw_bits(*psw).as == PSW_AS_ACCREG) - return -EOPNOTSUPP; + rc = get_vcpu_asce(vcpu, &asce, ar, write); + if (rc) + return rc; nr_pages = (((ga & ~PAGE_MASK) + len - 1) >> PAGE_SHIFT) + 1; pages = pages_array; if (nr_pages > ARRAY_SIZE(pages_array)) pages = vmalloc(nr_pages * sizeof(unsigned long)); if (!pages) return -ENOMEM; - asce.val = get_vcpu_asce(vcpu); need_ipte_lock = psw_bits(*psw).t && !asce.r; if (need_ipte_lock) ipte_lock(vcpu); - rc = guest_page_range(vcpu, ga, pages, nr_pages, write); + rc = guest_page_range(vcpu, ga, pages, nr_pages, asce, write); for (idx = 0; idx < nr_pages && !rc; idx++) { gpa = *(pages + idx) + (ga & ~PAGE_MASK); _len = min(PAGE_SIZE - (gpa & ~PAGE_MASK), len); @@ -652,7 +824,7 @@ int access_guest_real(struct kvm_vcpu *vcpu, unsigned long gra, * Note: The IPTE lock is not taken during this function, so the caller * has to take care of this. */ -int guest_translate_address(struct kvm_vcpu *vcpu, unsigned long gva, +int guest_translate_address(struct kvm_vcpu *vcpu, unsigned long gva, ar_t ar, unsigned long *gpa, int write) { struct kvm_s390_pgm_info *pgm = &vcpu->arch.pgm; @@ -661,26 +833,21 @@ int guest_translate_address(struct kvm_vcpu *vcpu, unsigned long gva, union asce asce; int rc; - /* Access register mode is not supported yet. */ - if (psw_bits(*psw).t && psw_bits(*psw).as == PSW_AS_ACCREG) - return -EOPNOTSUPP; - gva = kvm_s390_logical_to_effective(vcpu, gva); - memset(pgm, 0, sizeof(*pgm)); tec = (struct trans_exc_code_bits *)&pgm->trans_exc_code; - tec->as = psw_bits(*psw).as; - tec->fsi = write ? FSI_STORE : FSI_FETCH; + rc = get_vcpu_asce(vcpu, &asce, ar, write); tec->addr = gva >> PAGE_SHIFT; - if (is_low_address(gva) && low_address_protection_enabled(vcpu)) { + if (rc) + return rc; + if (is_low_address(gva) && low_address_protection_enabled(vcpu, asce)) { if (write) { rc = pgm->code = PGM_PROTECTION; return rc; } } - asce.val = get_vcpu_asce(vcpu); if (psw_bits(*psw).t && !asce.r) { /* Use DAT? */ - rc = guest_translate(vcpu, gva, gpa, write); + rc = guest_translate(vcpu, gva, gpa, asce, write); if (rc > 0) { if (rc == PGM_PROTECTION) tec->b61 = 1; @@ -697,28 +864,51 @@ int guest_translate_address(struct kvm_vcpu *vcpu, unsigned long gva, } /** - * kvm_s390_check_low_addr_protection - check for low-address protection - * @ga: Guest address + * check_gva_range - test a range of guest virtual addresses for accessibility + */ +int check_gva_range(struct kvm_vcpu *vcpu, unsigned long gva, ar_t ar, + unsigned long length, int is_write) +{ + unsigned long gpa; + unsigned long currlen; + int rc = 0; + + ipte_lock(vcpu); + while (length > 0 && !rc) { + currlen = min(length, PAGE_SIZE - (gva % PAGE_SIZE)); + rc = guest_translate_address(vcpu, gva, ar, &gpa, is_write); + gva += currlen; + length -= currlen; + } + ipte_unlock(vcpu); + + return rc; +} + +/** + * kvm_s390_check_low_addr_prot_real - check for low-address protection + * @gra: Guest real address * * Checks whether an address is subject to low-address protection and set * up vcpu->arch.pgm accordingly if necessary. * * Return: 0 if no protection exception, or PGM_PROTECTION if protected. */ -int kvm_s390_check_low_addr_protection(struct kvm_vcpu *vcpu, unsigned long ga) +int kvm_s390_check_low_addr_prot_real(struct kvm_vcpu *vcpu, unsigned long gra) { struct kvm_s390_pgm_info *pgm = &vcpu->arch.pgm; psw_t *psw = &vcpu->arch.sie_block->gpsw; struct trans_exc_code_bits *tec_bits; + union ctlreg0 ctlreg0 = {.val = vcpu->arch.sie_block->gcr[0]}; - if (!is_low_address(ga) || !low_address_protection_enabled(vcpu)) + if (!ctlreg0.lap || !is_low_address(gra)) return 0; memset(pgm, 0, sizeof(*pgm)); tec_bits = (struct trans_exc_code_bits *)&pgm->trans_exc_code; tec_bits->fsi = FSI_STORE; tec_bits->as = psw_bits(*psw).as; - tec_bits->addr = ga >> PAGE_SHIFT; + tec_bits->addr = gra >> PAGE_SHIFT; pgm->code = PGM_PROTECTION; return pgm->code; diff --git a/arch/s390/kvm/gaccess.h b/arch/s390/kvm/gaccess.h index 0149cf15058a..ef03726cc661 100644 --- a/arch/s390/kvm/gaccess.h +++ b/arch/s390/kvm/gaccess.h @@ -156,9 +156,11 @@ int read_guest_lc(struct kvm_vcpu *vcpu, unsigned long gra, void *data, } int guest_translate_address(struct kvm_vcpu *vcpu, unsigned long gva, - unsigned long *gpa, int write); + ar_t ar, unsigned long *gpa, int write); +int check_gva_range(struct kvm_vcpu *vcpu, unsigned long gva, ar_t ar, + unsigned long length, int is_write); -int access_guest(struct kvm_vcpu *vcpu, unsigned long ga, void *data, +int access_guest(struct kvm_vcpu *vcpu, unsigned long ga, ar_t ar, void *data, unsigned long len, int write); int access_guest_real(struct kvm_vcpu *vcpu, unsigned long gra, @@ -168,6 +170,7 @@ int access_guest_real(struct kvm_vcpu *vcpu, unsigned long gra, * write_guest - copy data from kernel space to guest space * @vcpu: virtual cpu * @ga: guest address + * @ar: access register * @data: source address in kernel space * @len: number of bytes to copy * @@ -176,8 +179,7 @@ int access_guest_real(struct kvm_vcpu *vcpu, unsigned long gra, * If DAT is off data will be copied to guest real or absolute memory. * If DAT is on data will be copied to the address space as specified by * the address space bits of the PSW: - * Primary, secondory or home space (access register mode is currently not - * implemented). + * Primary, secondary, home space or access register mode. * The addressing mode of the PSW is also inspected, so that address wrap * around is taken into account for 24-, 31- and 64-bit addressing mode, * if the to be copied data crosses page boundaries in guest address space. @@ -210,16 +212,17 @@ int access_guest_real(struct kvm_vcpu *vcpu, unsigned long gra, * if data has been changed in guest space in case of an exception. */ static inline __must_check -int write_guest(struct kvm_vcpu *vcpu, unsigned long ga, void *data, +int write_guest(struct kvm_vcpu *vcpu, unsigned long ga, ar_t ar, void *data, unsigned long len) { - return access_guest(vcpu, ga, data, len, 1); + return access_guest(vcpu, ga, ar, data, len, 1); } /** * read_guest - copy data from guest space to kernel space * @vcpu: virtual cpu * @ga: guest address + * @ar: access register * @data: destination address in kernel space * @len: number of bytes to copy * @@ -229,10 +232,10 @@ int write_guest(struct kvm_vcpu *vcpu, unsigned long ga, void *data, * data will be copied from guest space to kernel space. */ static inline __must_check -int read_guest(struct kvm_vcpu *vcpu, unsigned long ga, void *data, +int read_guest(struct kvm_vcpu *vcpu, unsigned long ga, ar_t ar, void *data, unsigned long len) { - return access_guest(vcpu, ga, data, len, 0); + return access_guest(vcpu, ga, ar, data, len, 0); } /** @@ -330,6 +333,6 @@ int read_guest_real(struct kvm_vcpu *vcpu, unsigned long gra, void *data, void ipte_lock(struct kvm_vcpu *vcpu); void ipte_unlock(struct kvm_vcpu *vcpu); int ipte_lock_held(struct kvm_vcpu *vcpu); -int kvm_s390_check_low_addr_protection(struct kvm_vcpu *vcpu, unsigned long ga); +int kvm_s390_check_low_addr_prot_real(struct kvm_vcpu *vcpu, unsigned long gra); #endif /* __KVM_S390_GACCESS_H */ diff --git a/arch/s390/kvm/guestdbg.c b/arch/s390/kvm/guestdbg.c index 3e8d4092ce30..e97b3455d7e6 100644 --- a/arch/s390/kvm/guestdbg.c +++ b/arch/s390/kvm/guestdbg.c @@ -191,8 +191,8 @@ static int __import_wp_info(struct kvm_vcpu *vcpu, if (!wp_info->old_data) return -ENOMEM; /* try to backup the original value */ - ret = read_guest(vcpu, wp_info->phys_addr, wp_info->old_data, - wp_info->len); + ret = read_guest_abs(vcpu, wp_info->phys_addr, wp_info->old_data, + wp_info->len); if (ret) { kfree(wp_info->old_data); wp_info->old_data = NULL; @@ -362,8 +362,8 @@ static struct kvm_hw_wp_info_arch *any_wp_changed(struct kvm_vcpu *vcpu) continue; /* refetch the wp data and compare it to the old value */ - if (!read_guest(vcpu, wp_info->phys_addr, temp, - wp_info->len)) { + if (!read_guest_abs(vcpu, wp_info->phys_addr, temp, + wp_info->len)) { if (memcmp(temp, wp_info->old_data, wp_info->len)) { kfree(temp); return wp_info; diff --git a/arch/s390/kvm/intercept.c b/arch/s390/kvm/intercept.c index bebd2157edd0..9e3779e3e496 100644 --- a/arch/s390/kvm/intercept.c +++ b/arch/s390/kvm/intercept.c @@ -165,6 +165,7 @@ static void __extract_prog_irq(struct kvm_vcpu *vcpu, pgm_info->mon_class_nr = vcpu->arch.sie_block->mcn; pgm_info->mon_code = vcpu->arch.sie_block->tecmc; break; + case PGM_VECTOR_PROCESSING: case PGM_DATA: pgm_info->data_exc_code = vcpu->arch.sie_block->dxc; break; @@ -319,7 +320,7 @@ static int handle_mvpg_pei(struct kvm_vcpu *vcpu) /* Make sure that the source is paged-in */ rc = guest_translate_address(vcpu, vcpu->run->s.regs.gprs[reg2], - &srcaddr, 0); + reg2, &srcaddr, 0); if (rc) return kvm_s390_inject_prog_cond(vcpu, rc); rc = kvm_arch_fault_in_page(vcpu, srcaddr, 0); @@ -328,7 +329,7 @@ static int handle_mvpg_pei(struct kvm_vcpu *vcpu) /* Make sure that the destination is paged-in */ rc = guest_translate_address(vcpu, vcpu->run->s.regs.gprs[reg1], - &dstaddr, 1); + reg1, &dstaddr, 1); if (rc) return kvm_s390_inject_prog_cond(vcpu, rc); rc = kvm_arch_fault_in_page(vcpu, dstaddr, 1); diff --git a/arch/s390/kvm/interrupt.c b/arch/s390/kvm/interrupt.c index 073b5f387d1d..9de47265ef73 100644 --- a/arch/s390/kvm/interrupt.c +++ b/arch/s390/kvm/interrupt.c @@ -1,7 +1,7 @@ /* * handling kvm guest interrupts * - * Copyright IBM Corp. 2008,2014 + * Copyright IBM Corp. 2008, 2015 * * This program is free software; you can redistribute it and/or modify * it under the terms of the GNU General Public License (version 2 only) @@ -17,9 +17,12 @@ #include <linux/signal.h> #include <linux/slab.h> #include <linux/bitmap.h> +#include <linux/vmalloc.h> #include <asm/asm-offsets.h> +#include <asm/dis.h> #include <asm/uaccess.h> #include <asm/sclp.h> +#include <asm/isc.h> #include "kvm-s390.h" #include "gaccess.h" #include "trace-s390.h" @@ -32,11 +35,6 @@ #define PFAULT_DONE 0x0680 #define VIRTIO_PARAM 0x0d00 -static int is_ioint(u64 type) -{ - return ((type & 0xfffe0000u) != 0xfffe0000u); -} - int psw_extint_disabled(struct kvm_vcpu *vcpu) { return !(vcpu->arch.sie_block->gpsw.mask & PSW_MASK_EXT); @@ -72,70 +70,45 @@ static int ckc_interrupts_enabled(struct kvm_vcpu *vcpu) return 1; } -static u64 int_word_to_isc_bits(u32 int_word) +static int ckc_irq_pending(struct kvm_vcpu *vcpu) +{ + if (!(vcpu->arch.sie_block->ckc < + get_tod_clock_fast() + vcpu->arch.sie_block->epoch)) + return 0; + return ckc_interrupts_enabled(vcpu); +} + +static int cpu_timer_interrupts_enabled(struct kvm_vcpu *vcpu) +{ + return !psw_extint_disabled(vcpu) && + (vcpu->arch.sie_block->gcr[0] & 0x400ul); +} + +static int cpu_timer_irq_pending(struct kvm_vcpu *vcpu) +{ + return (vcpu->arch.sie_block->cputm >> 63) && + cpu_timer_interrupts_enabled(vcpu); +} + +static inline int is_ioirq(unsigned long irq_type) { - u8 isc = (int_word & 0x38000000) >> 27; + return ((irq_type >= IRQ_PEND_IO_ISC_0) && + (irq_type <= IRQ_PEND_IO_ISC_7)); +} +static uint64_t isc_to_isc_bits(int isc) +{ return (0x80 >> isc) << 24; } -static int __must_check __interrupt_is_deliverable(struct kvm_vcpu *vcpu, - struct kvm_s390_interrupt_info *inti) +static inline u8 int_word_to_isc(u32 int_word) { - switch (inti->type) { - case KVM_S390_INT_EXTERNAL_CALL: - if (psw_extint_disabled(vcpu)) - return 0; - if (vcpu->arch.sie_block->gcr[0] & 0x2000ul) - return 1; - return 0; - case KVM_S390_INT_EMERGENCY: - if (psw_extint_disabled(vcpu)) - return 0; - if (vcpu->arch.sie_block->gcr[0] & 0x4000ul) - return 1; - return 0; - case KVM_S390_INT_CLOCK_COMP: - return ckc_interrupts_enabled(vcpu); - case KVM_S390_INT_CPU_TIMER: - if (psw_extint_disabled(vcpu)) - return 0; - if (vcpu->arch.sie_block->gcr[0] & 0x400ul) - return 1; - return 0; - case KVM_S390_INT_SERVICE: - case KVM_S390_INT_PFAULT_INIT: - case KVM_S390_INT_PFAULT_DONE: - case KVM_S390_INT_VIRTIO: - if (psw_extint_disabled(vcpu)) - return 0; - if (vcpu->arch.sie_block->gcr[0] & 0x200ul) - return 1; - return 0; - case KVM_S390_PROGRAM_INT: - case KVM_S390_SIGP_STOP: - case KVM_S390_SIGP_SET_PREFIX: - case KVM_S390_RESTART: - return 1; - case KVM_S390_MCHK: - if (psw_mchk_disabled(vcpu)) - return 0; - if (vcpu->arch.sie_block->gcr[14] & inti->mchk.cr14) - return 1; - return 0; - case KVM_S390_INT_IO_MIN...KVM_S390_INT_IO_MAX: - if (psw_ioint_disabled(vcpu)) - return 0; - if (vcpu->arch.sie_block->gcr[6] & - int_word_to_isc_bits(inti->io.io_int_word)) - return 1; - return 0; - default: - printk(KERN_WARNING "illegal interrupt type %llx\n", - inti->type); - BUG(); - } - return 0; + return (int_word & 0x38000000) >> 27; +} + +static inline unsigned long pending_floating_irqs(struct kvm_vcpu *vcpu) +{ + return vcpu->kvm->arch.float_int.pending_irqs; } static inline unsigned long pending_local_irqs(struct kvm_vcpu *vcpu) @@ -143,12 +116,31 @@ static inline unsigned long pending_local_irqs(struct kvm_vcpu *vcpu) return vcpu->arch.local_int.pending_irqs; } -static unsigned long deliverable_local_irqs(struct kvm_vcpu *vcpu) +static unsigned long disable_iscs(struct kvm_vcpu *vcpu, + unsigned long active_mask) +{ + int i; + + for (i = 0; i <= MAX_ISC; i++) + if (!(vcpu->arch.sie_block->gcr[6] & isc_to_isc_bits(i))) + active_mask &= ~(1UL << (IRQ_PEND_IO_ISC_0 + i)); + + return active_mask; +} + +static unsigned long deliverable_irqs(struct kvm_vcpu *vcpu) { - unsigned long active_mask = pending_local_irqs(vcpu); + unsigned long active_mask; + + active_mask = pending_local_irqs(vcpu); + active_mask |= pending_floating_irqs(vcpu); if (psw_extint_disabled(vcpu)) active_mask &= ~IRQ_PEND_EXT_MASK; + if (psw_ioint_disabled(vcpu)) + active_mask &= ~IRQ_PEND_IO_MASK; + else + active_mask = disable_iscs(vcpu, active_mask); if (!(vcpu->arch.sie_block->gcr[0] & 0x2000ul)) __clear_bit(IRQ_PEND_EXT_EXTERNAL, &active_mask); if (!(vcpu->arch.sie_block->gcr[0] & 0x4000ul)) @@ -157,8 +149,13 @@ static unsigned long deliverable_local_irqs(struct kvm_vcpu *vcpu) __clear_bit(IRQ_PEND_EXT_CLOCK_COMP, &active_mask); if (!(vcpu->arch.sie_block->gcr[0] & 0x400ul)) __clear_bit(IRQ_PEND_EXT_CPU_TIMER, &active_mask); + if (!(vcpu->arch.sie_block->gcr[0] & 0x200ul)) + __clear_bit(IRQ_PEND_EXT_SERVICE, &active_mask); if (psw_mchk_disabled(vcpu)) active_mask &= ~IRQ_PEND_MCHK_MASK; + if (!(vcpu->arch.sie_block->gcr[14] & + vcpu->kvm->arch.float_int.mchk.cr14)) + __clear_bit(IRQ_PEND_MCHK_REP, &active_mask); /* * STOP irqs will never be actively delivered. They are triggered via @@ -200,6 +197,16 @@ static void __set_cpuflag(struct kvm_vcpu *vcpu, u32 flag) atomic_set_mask(flag, &vcpu->arch.sie_block->cpuflags); } +static void set_intercept_indicators_io(struct kvm_vcpu *vcpu) +{ + if (!(pending_floating_irqs(vcpu) & IRQ_PEND_IO_MASK)) + return; + else if (psw_ioint_disabled(vcpu)) + __set_cpuflag(vcpu, CPUSTAT_IO_INT); + else + vcpu->arch.sie_block->lctl |= LCTL_CR6; +} + static void set_intercept_indicators_ext(struct kvm_vcpu *vcpu) { if (!(pending_local_irqs(vcpu) & IRQ_PEND_EXT_MASK)) @@ -226,47 +233,17 @@ static void set_intercept_indicators_stop(struct kvm_vcpu *vcpu) __set_cpuflag(vcpu, CPUSTAT_STOP_INT); } -/* Set interception request for non-deliverable local interrupts */ -static void set_intercept_indicators_local(struct kvm_vcpu *vcpu) +/* Set interception request for non-deliverable interrupts */ +static void set_intercept_indicators(struct kvm_vcpu *vcpu) { + set_intercept_indicators_io(vcpu); set_intercept_indicators_ext(vcpu); set_intercept_indicators_mchk(vcpu); set_intercept_indicators_stop(vcpu); } -static void __set_intercept_indicator(struct kvm_vcpu *vcpu, - struct kvm_s390_interrupt_info *inti) -{ - switch (inti->type) { - case KVM_S390_INT_SERVICE: - case KVM_S390_INT_PFAULT_DONE: - case KVM_S390_INT_VIRTIO: - if (psw_extint_disabled(vcpu)) - __set_cpuflag(vcpu, CPUSTAT_EXT_INT); - else - vcpu->arch.sie_block->lctl |= LCTL_CR0; - break; - case KVM_S390_MCHK: - if (psw_mchk_disabled(vcpu)) - vcpu->arch.sie_block->ictl |= ICTL_LPSW; - else - vcpu->arch.sie_block->lctl |= LCTL_CR14; - break; - case KVM_S390_INT_IO_MIN...KVM_S390_INT_IO_MAX: - if (psw_ioint_disabled(vcpu)) - __set_cpuflag(vcpu, CPUSTAT_IO_INT); - else - vcpu->arch.sie_block->lctl |= LCTL_CR6; - break; - default: - BUG(); - } -} - static u16 get_ilc(struct kvm_vcpu *vcpu) { - const unsigned short table[] = { 2, 4, 4, 6 }; - switch (vcpu->arch.sie_block->icptcode) { case ICPT_INST: case ICPT_INSTPROGI: @@ -274,7 +251,7 @@ static u16 get_ilc(struct kvm_vcpu *vcpu) case ICPT_PARTEXEC: case ICPT_IOINST: /* last instruction only stored for these icptcodes */ - return table[vcpu->arch.sie_block->ipa >> 14]; + return insn_length(vcpu->arch.sie_block->ipa >> 8); case ICPT_PROGI: return vcpu->arch.sie_block->pgmilc; default: @@ -350,38 +327,72 @@ static int __must_check __deliver_pfault_init(struct kvm_vcpu *vcpu) static int __must_check __deliver_machine_check(struct kvm_vcpu *vcpu) { + struct kvm_s390_float_interrupt *fi = &vcpu->kvm->arch.float_int; struct kvm_s390_local_interrupt *li = &vcpu->arch.local_int; - struct kvm_s390_mchk_info mchk; - int rc; + struct kvm_s390_mchk_info mchk = {}; + unsigned long adtl_status_addr; + int deliver = 0; + int rc = 0; + spin_lock(&fi->lock); spin_lock(&li->lock); - mchk = li->irq.mchk; + if (test_bit(IRQ_PEND_MCHK_EX, &li->pending_irqs) || + test_bit(IRQ_PEND_MCHK_REP, &li->pending_irqs)) { + /* + * If there was an exigent machine check pending, then any + * repressible machine checks that might have been pending + * are indicated along with it, so always clear bits for + * repressible and exigent interrupts + */ + mchk = li->irq.mchk; + clear_bit(IRQ_PEND_MCHK_EX, &li->pending_irqs); + clear_bit(IRQ_PEND_MCHK_REP, &li->pending_irqs); + memset(&li->irq.mchk, 0, sizeof(mchk)); + deliver = 1; + } /* - * If there was an exigent machine check pending, then any repressible - * machine checks that might have been pending are indicated along - * with it, so always clear both bits + * We indicate floating repressible conditions along with + * other pending conditions. Channel Report Pending and Channel + * Subsystem damage are the only two and and are indicated by + * bits in mcic and masked in cr14. */ - clear_bit(IRQ_PEND_MCHK_EX, &li->pending_irqs); - clear_bit(IRQ_PEND_MCHK_REP, &li->pending_irqs); - memset(&li->irq.mchk, 0, sizeof(mchk)); + if (test_and_clear_bit(IRQ_PEND_MCHK_REP, &fi->pending_irqs)) { + mchk.mcic |= fi->mchk.mcic; + mchk.cr14 |= fi->mchk.cr14; + memset(&fi->mchk, 0, sizeof(mchk)); + deliver = 1; + } spin_unlock(&li->lock); + spin_unlock(&fi->lock); - VCPU_EVENT(vcpu, 4, "interrupt: machine check mcic=%llx", - mchk.mcic); - trace_kvm_s390_deliver_interrupt(vcpu->vcpu_id, KVM_S390_MCHK, - mchk.cr14, mchk.mcic); - - rc = kvm_s390_vcpu_store_status(vcpu, KVM_S390_STORE_STATUS_PREFIXED); - rc |= put_guest_lc(vcpu, mchk.mcic, - (u64 __user *) __LC_MCCK_CODE); - rc |= put_guest_lc(vcpu, mchk.failing_storage_address, - (u64 __user *) __LC_MCCK_FAIL_STOR_ADDR); - rc |= write_guest_lc(vcpu, __LC_PSW_SAVE_AREA, - &mchk.fixed_logout, sizeof(mchk.fixed_logout)); - rc |= write_guest_lc(vcpu, __LC_MCK_OLD_PSW, - &vcpu->arch.sie_block->gpsw, sizeof(psw_t)); - rc |= read_guest_lc(vcpu, __LC_MCK_NEW_PSW, - &vcpu->arch.sie_block->gpsw, sizeof(psw_t)); + if (deliver) { + VCPU_EVENT(vcpu, 4, "interrupt: machine check mcic=%llx", + mchk.mcic); + trace_kvm_s390_deliver_interrupt(vcpu->vcpu_id, + KVM_S390_MCHK, + mchk.cr14, mchk.mcic); + + rc = kvm_s390_vcpu_store_status(vcpu, + KVM_S390_STORE_STATUS_PREFIXED); + rc |= read_guest_lc(vcpu, __LC_VX_SAVE_AREA_ADDR, + &adtl_status_addr, + sizeof(unsigned long)); + rc |= kvm_s390_vcpu_store_adtl_status(vcpu, + adtl_status_addr); + rc |= put_guest_lc(vcpu, mchk.mcic, + (u64 __user *) __LC_MCCK_CODE); + rc |= put_guest_lc(vcpu, mchk.failing_storage_address, + (u64 __user *) __LC_MCCK_FAIL_STOR_ADDR); + rc |= write_guest_lc(vcpu, __LC_PSW_SAVE_AREA, + &mchk.fixed_logout, + sizeof(mchk.fixed_logout)); + rc |= write_guest_lc(vcpu, __LC_MCK_OLD_PSW, + &vcpu->arch.sie_block->gpsw, + sizeof(psw_t)); + rc |= read_guest_lc(vcpu, __LC_MCK_NEW_PSW, + &vcpu->arch.sie_block->gpsw, + sizeof(psw_t)); + } return rc ? -EFAULT : 0; } @@ -484,7 +495,7 @@ static int __must_check __deliver_prog(struct kvm_vcpu *vcpu) { struct kvm_s390_local_interrupt *li = &vcpu->arch.local_int; struct kvm_s390_pgm_info pgm_info; - int rc = 0; + int rc = 0, nullifying = false; u16 ilc = get_ilc(vcpu); spin_lock(&li->lock); @@ -509,6 +520,8 @@ static int __must_check __deliver_prog(struct kvm_vcpu *vcpu) case PGM_LX_TRANSLATION: case PGM_PRIMARY_AUTHORITY: case PGM_SECONDARY_AUTHORITY: + nullifying = true; + /* fall through */ case PGM_SPACE_SWITCH: rc = put_guest_lc(vcpu, pgm_info.trans_exc_code, (u64 *)__LC_TRANS_EXC_CODE); @@ -521,6 +534,7 @@ static int __must_check __deliver_prog(struct kvm_vcpu *vcpu) case PGM_EXTENDED_AUTHORITY: rc = put_guest_lc(vcpu, pgm_info.exc_access_id, (u8 *)__LC_EXC_ACCESS_ID); + nullifying = true; break; case PGM_ASCE_TYPE: case PGM_PAGE_TRANSLATION: @@ -534,6 +548,7 @@ static int __must_check __deliver_prog(struct kvm_vcpu *vcpu) (u8 *)__LC_EXC_ACCESS_ID); rc |= put_guest_lc(vcpu, pgm_info.op_access_id, (u8 *)__LC_OP_ACCESS_ID); + nullifying = true; break; case PGM_MONITOR: rc = put_guest_lc(vcpu, pgm_info.mon_class_nr, @@ -541,6 +556,7 @@ static int __must_check __deliver_prog(struct kvm_vcpu *vcpu) rc |= put_guest_lc(vcpu, pgm_info.mon_code, (u64 *)__LC_MON_CODE); break; + case PGM_VECTOR_PROCESSING: case PGM_DATA: rc = put_guest_lc(vcpu, pgm_info.data_exc_code, (u32 *)__LC_DATA_EXC_CODE); @@ -551,6 +567,15 @@ static int __must_check __deliver_prog(struct kvm_vcpu *vcpu) rc |= put_guest_lc(vcpu, pgm_info.exc_access_id, (u8 *)__LC_EXC_ACCESS_ID); break; + case PGM_STACK_FULL: + case PGM_STACK_EMPTY: + case PGM_STACK_SPECIFICATION: + case PGM_STACK_TYPE: + case PGM_STACK_OPERATION: + case PGM_TRACE_TABEL: + case PGM_CRYPTO_OPERATION: + nullifying = true; + break; } if (pgm_info.code & PGM_PER) { @@ -564,7 +589,12 @@ static int __must_check __deliver_prog(struct kvm_vcpu *vcpu) (u8 *) __LC_PER_ACCESS_ID); } + if (nullifying && vcpu->arch.sie_block->icptcode == ICPT_INST) + kvm_s390_rewind_psw(vcpu, ilc); + rc |= put_guest_lc(vcpu, ilc, (u16 *) __LC_PGM_ILC); + rc |= put_guest_lc(vcpu, vcpu->arch.sie_block->gbea, + (u64 *) __LC_LAST_BREAK); rc |= put_guest_lc(vcpu, pgm_info.code, (u16 *)__LC_PGM_INT_CODE); rc |= write_guest_lc(vcpu, __LC_PGM_OLD_PSW, @@ -574,16 +604,27 @@ static int __must_check __deliver_prog(struct kvm_vcpu *vcpu) return rc ? -EFAULT : 0; } -static int __must_check __deliver_service(struct kvm_vcpu *vcpu, - struct kvm_s390_interrupt_info *inti) +static int __must_check __deliver_service(struct kvm_vcpu *vcpu) { - int rc; + struct kvm_s390_float_interrupt *fi = &vcpu->kvm->arch.float_int; + struct kvm_s390_ext_info ext; + int rc = 0; + + spin_lock(&fi->lock); + if (!(test_bit(IRQ_PEND_EXT_SERVICE, &fi->pending_irqs))) { + spin_unlock(&fi->lock); + return 0; + } + ext = fi->srv_signal; + memset(&fi->srv_signal, 0, sizeof(ext)); + clear_bit(IRQ_PEND_EXT_SERVICE, &fi->pending_irqs); + spin_unlock(&fi->lock); VCPU_EVENT(vcpu, 4, "interrupt: sclp parm:%x", - inti->ext.ext_params); + ext.ext_params); vcpu->stat.deliver_service_signal++; - trace_kvm_s390_deliver_interrupt(vcpu->vcpu_id, inti->type, - inti->ext.ext_params, 0); + trace_kvm_s390_deliver_interrupt(vcpu->vcpu_id, KVM_S390_INT_SERVICE, + ext.ext_params, 0); rc = put_guest_lc(vcpu, EXT_IRQ_SERVICE_SIG, (u16 *)__LC_EXT_INT_CODE); rc |= put_guest_lc(vcpu, 0, (u16 *)__LC_EXT_CPU_ADDR); @@ -591,106 +632,146 @@ static int __must_check __deliver_service(struct kvm_vcpu *vcpu, &vcpu->arch.sie_block->gpsw, sizeof(psw_t)); rc |= read_guest_lc(vcpu, __LC_EXT_NEW_PSW, &vcpu->arch.sie_block->gpsw, sizeof(psw_t)); - rc |= put_guest_lc(vcpu, inti->ext.ext_params, + rc |= put_guest_lc(vcpu, ext.ext_params, (u32 *)__LC_EXT_PARAMS); + return rc ? -EFAULT : 0; } -static int __must_check __deliver_pfault_done(struct kvm_vcpu *vcpu, - struct kvm_s390_interrupt_info *inti) +static int __must_check __deliver_pfault_done(struct kvm_vcpu *vcpu) { - int rc; + struct kvm_s390_float_interrupt *fi = &vcpu->kvm->arch.float_int; + struct kvm_s390_interrupt_info *inti; + int rc = 0; - trace_kvm_s390_deliver_interrupt(vcpu->vcpu_id, - KVM_S390_INT_PFAULT_DONE, 0, - inti->ext.ext_params2); + spin_lock(&fi->lock); + inti = list_first_entry_or_null(&fi->lists[FIRQ_LIST_PFAULT], + struct kvm_s390_interrupt_info, + list); + if (inti) { + trace_kvm_s390_deliver_interrupt(vcpu->vcpu_id, + KVM_S390_INT_PFAULT_DONE, 0, + inti->ext.ext_params2); + list_del(&inti->list); + fi->counters[FIRQ_CNTR_PFAULT] -= 1; + } + if (list_empty(&fi->lists[FIRQ_LIST_PFAULT])) + clear_bit(IRQ_PEND_PFAULT_DONE, &fi->pending_irqs); + spin_unlock(&fi->lock); - rc = put_guest_lc(vcpu, EXT_IRQ_CP_SERVICE, (u16 *)__LC_EXT_INT_CODE); - rc |= put_guest_lc(vcpu, PFAULT_DONE, (u16 *)__LC_EXT_CPU_ADDR); - rc |= write_guest_lc(vcpu, __LC_EXT_OLD_PSW, - &vcpu->arch.sie_block->gpsw, sizeof(psw_t)); - rc |= read_guest_lc(vcpu, __LC_EXT_NEW_PSW, - &vcpu->arch.sie_block->gpsw, sizeof(psw_t)); - rc |= put_guest_lc(vcpu, inti->ext.ext_params2, - (u64 *)__LC_EXT_PARAMS2); + if (inti) { + rc = put_guest_lc(vcpu, EXT_IRQ_CP_SERVICE, + (u16 *)__LC_EXT_INT_CODE); + rc |= put_guest_lc(vcpu, PFAULT_DONE, + (u16 *)__LC_EXT_CPU_ADDR); + rc |= write_guest_lc(vcpu, __LC_EXT_OLD_PSW, + &vcpu->arch.sie_block->gpsw, + sizeof(psw_t)); + rc |= read_guest_lc(vcpu, __LC_EXT_NEW_PSW, + &vcpu->arch.sie_block->gpsw, + sizeof(psw_t)); + rc |= put_guest_lc(vcpu, inti->ext.ext_params2, + (u64 *)__LC_EXT_PARAMS2); + kfree(inti); + } return rc ? -EFAULT : 0; } -static int __must_check __deliver_virtio(struct kvm_vcpu *vcpu, - struct kvm_s390_interrupt_info *inti) +static int __must_check __deliver_virtio(struct kvm_vcpu *vcpu) { - int rc; + struct kvm_s390_float_interrupt *fi = &vcpu->kvm->arch.float_int; + struct kvm_s390_interrupt_info *inti; + int rc = 0; - VCPU_EVENT(vcpu, 4, "interrupt: virtio parm:%x,parm64:%llx", - inti->ext.ext_params, inti->ext.ext_params2); - vcpu->stat.deliver_virtio_interrupt++; - trace_kvm_s390_deliver_interrupt(vcpu->vcpu_id, inti->type, - inti->ext.ext_params, - inti->ext.ext_params2); + spin_lock(&fi->lock); + inti = list_first_entry_or_null(&fi->lists[FIRQ_LIST_VIRTIO], + struct kvm_s390_interrupt_info, + list); + if (inti) { + VCPU_EVENT(vcpu, 4, + "interrupt: virtio parm:%x,parm64:%llx", + inti->ext.ext_params, inti->ext.ext_params2); + vcpu->stat.deliver_virtio_interrupt++; + trace_kvm_s390_deliver_interrupt(vcpu->vcpu_id, + inti->type, + inti->ext.ext_params, + inti->ext.ext_params2); + list_del(&inti->list); + fi->counters[FIRQ_CNTR_VIRTIO] -= 1; + } + if (list_empty(&fi->lists[FIRQ_LIST_VIRTIO])) + clear_bit(IRQ_PEND_VIRTIO, &fi->pending_irqs); + spin_unlock(&fi->lock); - rc = put_guest_lc(vcpu, EXT_IRQ_CP_SERVICE, (u16 *)__LC_EXT_INT_CODE); - rc |= put_guest_lc(vcpu, VIRTIO_PARAM, (u16 *)__LC_EXT_CPU_ADDR); - rc |= write_guest_lc(vcpu, __LC_EXT_OLD_PSW, - &vcpu->arch.sie_block->gpsw, sizeof(psw_t)); - rc |= read_guest_lc(vcpu, __LC_EXT_NEW_PSW, - &vcpu->arch.sie_block->gpsw, sizeof(psw_t)); - rc |= put_guest_lc(vcpu, inti->ext.ext_params, - (u32 *)__LC_EXT_PARAMS); - rc |= put_guest_lc(vcpu, inti->ext.ext_params2, - (u64 *)__LC_EXT_PARAMS2); + if (inti) { + rc = put_guest_lc(vcpu, EXT_IRQ_CP_SERVICE, + (u16 *)__LC_EXT_INT_CODE); + rc |= put_guest_lc(vcpu, VIRTIO_PARAM, + (u16 *)__LC_EXT_CPU_ADDR); + rc |= write_guest_lc(vcpu, __LC_EXT_OLD_PSW, + &vcpu->arch.sie_block->gpsw, + sizeof(psw_t)); + rc |= read_guest_lc(vcpu, __LC_EXT_NEW_PSW, + &vcpu->arch.sie_block->gpsw, + sizeof(psw_t)); + rc |= put_guest_lc(vcpu, inti->ext.ext_params, + (u32 *)__LC_EXT_PARAMS); + rc |= put_guest_lc(vcpu, inti->ext.ext_params2, + (u64 *)__LC_EXT_PARAMS2); + kfree(inti); + } return rc ? -EFAULT : 0; } static int __must_check __deliver_io(struct kvm_vcpu *vcpu, - struct kvm_s390_interrupt_info *inti) + unsigned long irq_type) { - int rc; + struct list_head *isc_list; + struct kvm_s390_float_interrupt *fi; + struct kvm_s390_interrupt_info *inti = NULL; + int rc = 0; - VCPU_EVENT(vcpu, 4, "interrupt: I/O %llx", inti->type); - vcpu->stat.deliver_io_int++; - trace_kvm_s390_deliver_interrupt(vcpu->vcpu_id, inti->type, - ((__u32)inti->io.subchannel_id << 16) | - inti->io.subchannel_nr, - ((__u64)inti->io.io_int_parm << 32) | - inti->io.io_int_word); - - rc = put_guest_lc(vcpu, inti->io.subchannel_id, - (u16 *)__LC_SUBCHANNEL_ID); - rc |= put_guest_lc(vcpu, inti->io.subchannel_nr, - (u16 *)__LC_SUBCHANNEL_NR); - rc |= put_guest_lc(vcpu, inti->io.io_int_parm, - (u32 *)__LC_IO_INT_PARM); - rc |= put_guest_lc(vcpu, inti->io.io_int_word, - (u32 *)__LC_IO_INT_WORD); - rc |= write_guest_lc(vcpu, __LC_IO_OLD_PSW, - &vcpu->arch.sie_block->gpsw, sizeof(psw_t)); - rc |= read_guest_lc(vcpu, __LC_IO_NEW_PSW, - &vcpu->arch.sie_block->gpsw, sizeof(psw_t)); - return rc ? -EFAULT : 0; -} + fi = &vcpu->kvm->arch.float_int; -static int __must_check __deliver_mchk_floating(struct kvm_vcpu *vcpu, - struct kvm_s390_interrupt_info *inti) -{ - struct kvm_s390_mchk_info *mchk = &inti->mchk; - int rc; + spin_lock(&fi->lock); + isc_list = &fi->lists[irq_type - IRQ_PEND_IO_ISC_0]; + inti = list_first_entry_or_null(isc_list, + struct kvm_s390_interrupt_info, + list); + if (inti) { + VCPU_EVENT(vcpu, 4, "interrupt: I/O %llx", inti->type); + vcpu->stat.deliver_io_int++; + trace_kvm_s390_deliver_interrupt(vcpu->vcpu_id, + inti->type, + ((__u32)inti->io.subchannel_id << 16) | + inti->io.subchannel_nr, + ((__u64)inti->io.io_int_parm << 32) | + inti->io.io_int_word); + list_del(&inti->list); + fi->counters[FIRQ_CNTR_IO] -= 1; + } + if (list_empty(isc_list)) + clear_bit(irq_type, &fi->pending_irqs); + spin_unlock(&fi->lock); + + if (inti) { + rc = put_guest_lc(vcpu, inti->io.subchannel_id, + (u16 *)__LC_SUBCHANNEL_ID); + rc |= put_guest_lc(vcpu, inti->io.subchannel_nr, + (u16 *)__LC_SUBCHANNEL_NR); + rc |= put_guest_lc(vcpu, inti->io.io_int_parm, + (u32 *)__LC_IO_INT_PARM); + rc |= put_guest_lc(vcpu, inti->io.io_int_word, + (u32 *)__LC_IO_INT_WORD); + rc |= write_guest_lc(vcpu, __LC_IO_OLD_PSW, + &vcpu->arch.sie_block->gpsw, + sizeof(psw_t)); + rc |= read_guest_lc(vcpu, __LC_IO_NEW_PSW, + &vcpu->arch.sie_block->gpsw, + sizeof(psw_t)); + kfree(inti); + } - VCPU_EVENT(vcpu, 4, "interrupt: machine check mcic=%llx", - mchk->mcic); - trace_kvm_s390_deliver_interrupt(vcpu->vcpu_id, KVM_S390_MCHK, - mchk->cr14, mchk->mcic); - - rc = kvm_s390_vcpu_store_status(vcpu, KVM_S390_STORE_STATUS_PREFIXED); - rc |= put_guest_lc(vcpu, mchk->mcic, - (u64 __user *) __LC_MCCK_CODE); - rc |= put_guest_lc(vcpu, mchk->failing_storage_address, - (u64 __user *) __LC_MCCK_FAIL_STOR_ADDR); - rc |= write_guest_lc(vcpu, __LC_PSW_SAVE_AREA, - &mchk->fixed_logout, sizeof(mchk->fixed_logout)); - rc |= write_guest_lc(vcpu, __LC_MCK_OLD_PSW, - &vcpu->arch.sie_block->gpsw, sizeof(psw_t)); - rc |= read_guest_lc(vcpu, __LC_MCK_NEW_PSW, - &vcpu->arch.sie_block->gpsw, sizeof(psw_t)); return rc ? -EFAULT : 0; } @@ -698,6 +779,7 @@ typedef int (*deliver_irq_t)(struct kvm_vcpu *vcpu); static const deliver_irq_t deliver_irq_funcs[] = { [IRQ_PEND_MCHK_EX] = __deliver_machine_check, + [IRQ_PEND_MCHK_REP] = __deliver_machine_check, [IRQ_PEND_PROG] = __deliver_prog, [IRQ_PEND_EXT_EMERGENCY] = __deliver_emergency_signal, [IRQ_PEND_EXT_EXTERNAL] = __deliver_external_call, @@ -706,36 +788,11 @@ static const deliver_irq_t deliver_irq_funcs[] = { [IRQ_PEND_RESTART] = __deliver_restart, [IRQ_PEND_SET_PREFIX] = __deliver_set_prefix, [IRQ_PEND_PFAULT_INIT] = __deliver_pfault_init, + [IRQ_PEND_EXT_SERVICE] = __deliver_service, + [IRQ_PEND_PFAULT_DONE] = __deliver_pfault_done, + [IRQ_PEND_VIRTIO] = __deliver_virtio, }; -static int __must_check __deliver_floating_interrupt(struct kvm_vcpu *vcpu, - struct kvm_s390_interrupt_info *inti) -{ - int rc; - - switch (inti->type) { - case KVM_S390_INT_SERVICE: - rc = __deliver_service(vcpu, inti); - break; - case KVM_S390_INT_PFAULT_DONE: - rc = __deliver_pfault_done(vcpu, inti); - break; - case KVM_S390_INT_VIRTIO: - rc = __deliver_virtio(vcpu, inti); - break; - case KVM_S390_MCHK: - rc = __deliver_mchk_floating(vcpu, inti); - break; - case KVM_S390_INT_IO_MIN...KVM_S390_INT_IO_MAX: - rc = __deliver_io(vcpu, inti); - break; - default: - BUG(); - } - - return rc; -} - /* Check whether an external call is pending (deliverable or not) */ int kvm_s390_ext_call_pending(struct kvm_vcpu *vcpu) { @@ -751,21 +808,9 @@ int kvm_s390_ext_call_pending(struct kvm_vcpu *vcpu) int kvm_s390_vcpu_has_irq(struct kvm_vcpu *vcpu, int exclude_stop) { - struct kvm_s390_float_interrupt *fi = vcpu->arch.local_int.float_int; - struct kvm_s390_interrupt_info *inti; int rc; - rc = !!deliverable_local_irqs(vcpu); - - if ((!rc) && atomic_read(&fi->active)) { - spin_lock(&fi->lock); - list_for_each_entry(inti, &fi->list, list) - if (__interrupt_is_deliverable(vcpu, inti)) { - rc = 1; - break; - } - spin_unlock(&fi->lock); - } + rc = !!deliverable_irqs(vcpu); if (!rc && kvm_cpu_has_pending_timer(vcpu)) rc = 1; @@ -784,12 +829,7 @@ int kvm_s390_vcpu_has_irq(struct kvm_vcpu *vcpu, int exclude_stop) int kvm_cpu_has_pending_timer(struct kvm_vcpu *vcpu) { - if (!(vcpu->arch.sie_block->ckc < - get_tod_clock_fast() + vcpu->arch.sie_block->epoch)) - return 0; - if (!ckc_interrupts_enabled(vcpu)) - return 0; - return 1; + return ckc_irq_pending(vcpu) || cpu_timer_irq_pending(vcpu); } int kvm_s390_handle_wait(struct kvm_vcpu *vcpu) @@ -884,60 +924,45 @@ void kvm_s390_clear_local_irqs(struct kvm_vcpu *vcpu) int __must_check kvm_s390_deliver_pending_interrupts(struct kvm_vcpu *vcpu) { struct kvm_s390_local_interrupt *li = &vcpu->arch.local_int; - struct kvm_s390_float_interrupt *fi = vcpu->arch.local_int.float_int; - struct kvm_s390_interrupt_info *n, *inti = NULL; deliver_irq_t func; - int deliver; int rc = 0; unsigned long irq_type; - unsigned long deliverable_irqs; + unsigned long irqs; __reset_intercept_indicators(vcpu); /* pending ckc conditions might have been invalidated */ clear_bit(IRQ_PEND_EXT_CLOCK_COMP, &li->pending_irqs); - if (kvm_cpu_has_pending_timer(vcpu)) + if (ckc_irq_pending(vcpu)) set_bit(IRQ_PEND_EXT_CLOCK_COMP, &li->pending_irqs); + /* pending cpu timer conditions might have been invalidated */ + clear_bit(IRQ_PEND_EXT_CPU_TIMER, &li->pending_irqs); + if (cpu_timer_irq_pending(vcpu)) + set_bit(IRQ_PEND_EXT_CPU_TIMER, &li->pending_irqs); + do { - deliverable_irqs = deliverable_local_irqs(vcpu); + irqs = deliverable_irqs(vcpu); /* bits are in the order of interrupt priority */ - irq_type = find_first_bit(&deliverable_irqs, IRQ_PEND_COUNT); + irq_type = find_first_bit(&irqs, IRQ_PEND_COUNT); if (irq_type == IRQ_PEND_COUNT) break; - func = deliver_irq_funcs[irq_type]; - if (!func) { - WARN_ON_ONCE(func == NULL); - clear_bit(irq_type, &li->pending_irqs); - continue; + if (is_ioirq(irq_type)) { + rc = __deliver_io(vcpu, irq_type); + } else { + func = deliver_irq_funcs[irq_type]; + if (!func) { + WARN_ON_ONCE(func == NULL); + clear_bit(irq_type, &li->pending_irqs); + continue; + } + rc = func(vcpu); } - rc = func(vcpu); - } while (!rc && irq_type != IRQ_PEND_COUNT); + if (rc) + break; + } while (!rc); - set_intercept_indicators_local(vcpu); - - if (!rc && atomic_read(&fi->active)) { - do { - deliver = 0; - spin_lock(&fi->lock); - list_for_each_entry_safe(inti, n, &fi->list, list) { - if (__interrupt_is_deliverable(vcpu, inti)) { - list_del(&inti->list); - fi->irq_count--; - deliver = 1; - break; - } - __set_intercept_indicator(vcpu, inti); - } - if (list_empty(&fi->list)) - atomic_set(&fi->active, 0); - spin_unlock(&fi->lock); - if (deliver) { - rc = __deliver_floating_interrupt(vcpu, inti); - kfree(inti); - } - } while (!rc && deliver); - } + set_intercept_indicators(vcpu); return rc; } @@ -1172,80 +1197,182 @@ static int __inject_cpu_timer(struct kvm_vcpu *vcpu) return 0; } +static struct kvm_s390_interrupt_info *get_io_int(struct kvm *kvm, + int isc, u32 schid) +{ + struct kvm_s390_float_interrupt *fi = &kvm->arch.float_int; + struct list_head *isc_list = &fi->lists[FIRQ_LIST_IO_ISC_0 + isc]; + struct kvm_s390_interrupt_info *iter; + u16 id = (schid & 0xffff0000U) >> 16; + u16 nr = schid & 0x0000ffffU; + spin_lock(&fi->lock); + list_for_each_entry(iter, isc_list, list) { + if (schid && (id != iter->io.subchannel_id || + nr != iter->io.subchannel_nr)) + continue; + /* found an appropriate entry */ + list_del_init(&iter->list); + fi->counters[FIRQ_CNTR_IO] -= 1; + if (list_empty(isc_list)) + clear_bit(IRQ_PEND_IO_ISC_0 + isc, &fi->pending_irqs); + spin_unlock(&fi->lock); + return iter; + } + spin_unlock(&fi->lock); + return NULL; +} + +/* + * Dequeue and return an I/O interrupt matching any of the interruption + * subclasses as designated by the isc mask in cr6 and the schid (if != 0). + */ struct kvm_s390_interrupt_info *kvm_s390_get_io_int(struct kvm *kvm, - u64 cr6, u64 schid) + u64 isc_mask, u32 schid) +{ + struct kvm_s390_interrupt_info *inti = NULL; + int isc; + + for (isc = 0; isc <= MAX_ISC && !inti; isc++) { + if (isc_mask & isc_to_isc_bits(isc)) + inti = get_io_int(kvm, isc, schid); + } + return inti; +} + +#define SCCB_MASK 0xFFFFFFF8 +#define SCCB_EVENT_PENDING 0x3 + +static int __inject_service(struct kvm *kvm, + struct kvm_s390_interrupt_info *inti) +{ + struct kvm_s390_float_interrupt *fi = &kvm->arch.float_int; + + spin_lock(&fi->lock); + fi->srv_signal.ext_params |= inti->ext.ext_params & SCCB_EVENT_PENDING; + /* + * Early versions of the QEMU s390 bios will inject several + * service interrupts after another without handling a + * condition code indicating busy. + * We will silently ignore those superfluous sccb values. + * A future version of QEMU will take care of serialization + * of servc requests + */ + if (fi->srv_signal.ext_params & SCCB_MASK) + goto out; + fi->srv_signal.ext_params |= inti->ext.ext_params & SCCB_MASK; + set_bit(IRQ_PEND_EXT_SERVICE, &fi->pending_irqs); +out: + spin_unlock(&fi->lock); + kfree(inti); + return 0; +} + +static int __inject_virtio(struct kvm *kvm, + struct kvm_s390_interrupt_info *inti) +{ + struct kvm_s390_float_interrupt *fi = &kvm->arch.float_int; + + spin_lock(&fi->lock); + if (fi->counters[FIRQ_CNTR_VIRTIO] >= KVM_S390_MAX_VIRTIO_IRQS) { + spin_unlock(&fi->lock); + return -EBUSY; + } + fi->counters[FIRQ_CNTR_VIRTIO] += 1; + list_add_tail(&inti->list, &fi->lists[FIRQ_LIST_VIRTIO]); + set_bit(IRQ_PEND_VIRTIO, &fi->pending_irqs); + spin_unlock(&fi->lock); + return 0; +} + +static int __inject_pfault_done(struct kvm *kvm, + struct kvm_s390_interrupt_info *inti) +{ + struct kvm_s390_float_interrupt *fi = &kvm->arch.float_int; + + spin_lock(&fi->lock); + if (fi->counters[FIRQ_CNTR_PFAULT] >= + (ASYNC_PF_PER_VCPU * KVM_MAX_VCPUS)) { + spin_unlock(&fi->lock); + return -EBUSY; + } + fi->counters[FIRQ_CNTR_PFAULT] += 1; + list_add_tail(&inti->list, &fi->lists[FIRQ_LIST_PFAULT]); + set_bit(IRQ_PEND_PFAULT_DONE, &fi->pending_irqs); + spin_unlock(&fi->lock); + return 0; +} + +#define CR_PENDING_SUBCLASS 28 +static int __inject_float_mchk(struct kvm *kvm, + struct kvm_s390_interrupt_info *inti) +{ + struct kvm_s390_float_interrupt *fi = &kvm->arch.float_int; + + spin_lock(&fi->lock); + fi->mchk.cr14 |= inti->mchk.cr14 & (1UL << CR_PENDING_SUBCLASS); + fi->mchk.mcic |= inti->mchk.mcic; + set_bit(IRQ_PEND_MCHK_REP, &fi->pending_irqs); + spin_unlock(&fi->lock); + kfree(inti); + return 0; +} + +static int __inject_io(struct kvm *kvm, struct kvm_s390_interrupt_info *inti) { struct kvm_s390_float_interrupt *fi; - struct kvm_s390_interrupt_info *inti, *iter; + struct list_head *list; + int isc; - if ((!schid && !cr6) || (schid && cr6)) - return NULL; fi = &kvm->arch.float_int; spin_lock(&fi->lock); - inti = NULL; - list_for_each_entry(iter, &fi->list, list) { - if (!is_ioint(iter->type)) - continue; - if (cr6 && - ((cr6 & int_word_to_isc_bits(iter->io.io_int_word)) == 0)) - continue; - if (schid) { - if (((schid & 0x00000000ffff0000) >> 16) != - iter->io.subchannel_id) - continue; - if ((schid & 0x000000000000ffff) != - iter->io.subchannel_nr) - continue; - } - inti = iter; - break; - } - if (inti) { - list_del_init(&inti->list); - fi->irq_count--; + if (fi->counters[FIRQ_CNTR_IO] >= KVM_S390_MAX_FLOAT_IRQS) { + spin_unlock(&fi->lock); + return -EBUSY; } - if (list_empty(&fi->list)) - atomic_set(&fi->active, 0); + fi->counters[FIRQ_CNTR_IO] += 1; + + isc = int_word_to_isc(inti->io.io_int_word); + list = &fi->lists[FIRQ_LIST_IO_ISC_0 + isc]; + list_add_tail(&inti->list, list); + set_bit(IRQ_PEND_IO_ISC_0 + isc, &fi->pending_irqs); spin_unlock(&fi->lock); - return inti; + return 0; } static int __inject_vm(struct kvm *kvm, struct kvm_s390_interrupt_info *inti) { struct kvm_s390_local_interrupt *li; struct kvm_s390_float_interrupt *fi; - struct kvm_s390_interrupt_info *iter; struct kvm_vcpu *dst_vcpu = NULL; int sigcpu; - int rc = 0; + u64 type = READ_ONCE(inti->type); + int rc; fi = &kvm->arch.float_int; - spin_lock(&fi->lock); - if (fi->irq_count >= KVM_S390_MAX_FLOAT_IRQS) { + + switch (type) { + case KVM_S390_MCHK: + rc = __inject_float_mchk(kvm, inti); + break; + case KVM_S390_INT_VIRTIO: + rc = __inject_virtio(kvm, inti); + break; + case KVM_S390_INT_SERVICE: + rc = __inject_service(kvm, inti); + break; + case KVM_S390_INT_PFAULT_DONE: + rc = __inject_pfault_done(kvm, inti); + break; + case KVM_S390_INT_IO_MIN...KVM_S390_INT_IO_MAX: + rc = __inject_io(kvm, inti); + break; + default: rc = -EINVAL; - goto unlock_fi; } - fi->irq_count++; - if (!is_ioint(inti->type)) { - list_add_tail(&inti->list, &fi->list); - } else { - u64 isc_bits = int_word_to_isc_bits(inti->io.io_int_word); + if (rc) + return rc; - /* Keep I/O interrupts sorted in isc order. */ - list_for_each_entry(iter, &fi->list, list) { - if (!is_ioint(iter->type)) - continue; - if (int_word_to_isc_bits(iter->io.io_int_word) - <= isc_bits) - continue; - break; - } - list_add_tail(&inti->list, &iter->list); - } - atomic_set(&fi->active, 1); - if (atomic_read(&kvm->online_vcpus) == 0) - goto unlock_fi; sigcpu = find_first_bit(fi->idle_mask, KVM_MAX_VCPUS); if (sigcpu == KVM_MAX_VCPUS) { do { @@ -1257,7 +1384,7 @@ static int __inject_vm(struct kvm *kvm, struct kvm_s390_interrupt_info *inti) dst_vcpu = kvm_get_vcpu(kvm, sigcpu); li = &dst_vcpu->arch.local_int; spin_lock(&li->lock); - switch (inti->type) { + switch (type) { case KVM_S390_MCHK: atomic_set_mask(CPUSTAT_STOP_INT, li->cpuflags); break; @@ -1270,9 +1397,8 @@ static int __inject_vm(struct kvm *kvm, struct kvm_s390_interrupt_info *inti) } spin_unlock(&li->lock); kvm_s390_vcpu_wakeup(kvm_get_vcpu(kvm, sigcpu)); -unlock_fi: - spin_unlock(&fi->lock); - return rc; + return 0; + } int kvm_s390_inject_vm(struct kvm *kvm, @@ -1332,10 +1458,10 @@ int kvm_s390_inject_vm(struct kvm *kvm, return rc; } -void kvm_s390_reinject_io_int(struct kvm *kvm, +int kvm_s390_reinject_io_int(struct kvm *kvm, struct kvm_s390_interrupt_info *inti) { - __inject_vm(kvm, inti); + return __inject_vm(kvm, inti); } int s390int_to_s390irq(struct kvm_s390_interrupt *s390int, @@ -1388,12 +1514,10 @@ void kvm_s390_clear_stop_irq(struct kvm_vcpu *vcpu) spin_unlock(&li->lock); } -int kvm_s390_inject_vcpu(struct kvm_vcpu *vcpu, struct kvm_s390_irq *irq) +static int do_inject_vcpu(struct kvm_vcpu *vcpu, struct kvm_s390_irq *irq) { - struct kvm_s390_local_interrupt *li = &vcpu->arch.local_int; int rc; - spin_lock(&li->lock); switch (irq->type) { case KVM_S390_PROGRAM_INT: VCPU_EVENT(vcpu, 3, "inject: program check %d (from user)", @@ -1433,83 +1557,130 @@ int kvm_s390_inject_vcpu(struct kvm_vcpu *vcpu, struct kvm_s390_irq *irq) default: rc = -EINVAL; } + + return rc; +} + +int kvm_s390_inject_vcpu(struct kvm_vcpu *vcpu, struct kvm_s390_irq *irq) +{ + struct kvm_s390_local_interrupt *li = &vcpu->arch.local_int; + int rc; + + spin_lock(&li->lock); + rc = do_inject_vcpu(vcpu, irq); spin_unlock(&li->lock); if (!rc) kvm_s390_vcpu_wakeup(vcpu); return rc; } -void kvm_s390_clear_float_irqs(struct kvm *kvm) +static inline void clear_irq_list(struct list_head *_list) { - struct kvm_s390_float_interrupt *fi; - struct kvm_s390_interrupt_info *n, *inti = NULL; + struct kvm_s390_interrupt_info *inti, *n; - fi = &kvm->arch.float_int; - spin_lock(&fi->lock); - list_for_each_entry_safe(inti, n, &fi->list, list) { + list_for_each_entry_safe(inti, n, _list, list) { list_del(&inti->list); kfree(inti); } - fi->irq_count = 0; - atomic_set(&fi->active, 0); - spin_unlock(&fi->lock); } -static inline int copy_irq_to_user(struct kvm_s390_interrupt_info *inti, - u8 *addr) +static void inti_to_irq(struct kvm_s390_interrupt_info *inti, + struct kvm_s390_irq *irq) { - struct kvm_s390_irq __user *uptr = (struct kvm_s390_irq __user *) addr; - struct kvm_s390_irq irq = {0}; - - irq.type = inti->type; + irq->type = inti->type; switch (inti->type) { case KVM_S390_INT_PFAULT_INIT: case KVM_S390_INT_PFAULT_DONE: case KVM_S390_INT_VIRTIO: - case KVM_S390_INT_SERVICE: - irq.u.ext = inti->ext; + irq->u.ext = inti->ext; break; case KVM_S390_INT_IO_MIN...KVM_S390_INT_IO_MAX: - irq.u.io = inti->io; + irq->u.io = inti->io; break; - case KVM_S390_MCHK: - irq.u.mchk = inti->mchk; - break; - default: - return -EINVAL; } +} - if (copy_to_user(uptr, &irq, sizeof(irq))) - return -EFAULT; +void kvm_s390_clear_float_irqs(struct kvm *kvm) +{ + struct kvm_s390_float_interrupt *fi = &kvm->arch.float_int; + int i; - return 0; -} + spin_lock(&fi->lock); + for (i = 0; i < FIRQ_LIST_COUNT; i++) + clear_irq_list(&fi->lists[i]); + for (i = 0; i < FIRQ_MAX_COUNT; i++) + fi->counters[i] = 0; + spin_unlock(&fi->lock); +}; -static int get_all_floating_irqs(struct kvm *kvm, __u8 *buf, __u64 len) +static int get_all_floating_irqs(struct kvm *kvm, u8 __user *usrbuf, u64 len) { struct kvm_s390_interrupt_info *inti; struct kvm_s390_float_interrupt *fi; + struct kvm_s390_irq *buf; + struct kvm_s390_irq *irq; + int max_irqs; int ret = 0; int n = 0; + int i; + + if (len > KVM_S390_FLIC_MAX_BUFFER || len == 0) + return -EINVAL; + + /* + * We are already using -ENOMEM to signal + * userspace it may retry with a bigger buffer, + * so we need to use something else for this case + */ + buf = vzalloc(len); + if (!buf) + return -ENOBUFS; + + max_irqs = len / sizeof(struct kvm_s390_irq); fi = &kvm->arch.float_int; spin_lock(&fi->lock); - - list_for_each_entry(inti, &fi->list, list) { - if (len < sizeof(struct kvm_s390_irq)) { + for (i = 0; i < FIRQ_LIST_COUNT; i++) { + list_for_each_entry(inti, &fi->lists[i], list) { + if (n == max_irqs) { + /* signal userspace to try again */ + ret = -ENOMEM; + goto out; + } + inti_to_irq(inti, &buf[n]); + n++; + } + } + if (test_bit(IRQ_PEND_EXT_SERVICE, &fi->pending_irqs)) { + if (n == max_irqs) { /* signal userspace to try again */ ret = -ENOMEM; - break; + goto out; } - ret = copy_irq_to_user(inti, buf); - if (ret) - break; - buf += sizeof(struct kvm_s390_irq); - len -= sizeof(struct kvm_s390_irq); + irq = (struct kvm_s390_irq *) &buf[n]; + irq->type = KVM_S390_INT_SERVICE; + irq->u.ext = fi->srv_signal; n++; } + if (test_bit(IRQ_PEND_MCHK_REP, &fi->pending_irqs)) { + if (n == max_irqs) { + /* signal userspace to try again */ + ret = -ENOMEM; + goto out; + } + irq = (struct kvm_s390_irq *) &buf[n]; + irq->type = KVM_S390_MCHK; + irq->u.mchk = fi->mchk; + n++; +} +out: spin_unlock(&fi->lock); + if (!ret && n > 0) { + if (copy_to_user(usrbuf, buf, sizeof(struct kvm_s390_irq) * n)) + ret = -EFAULT; + } + vfree(buf); return ret < 0 ? ret : n; } @@ -1520,7 +1691,7 @@ static int flic_get_attr(struct kvm_device *dev, struct kvm_device_attr *attr) switch (attr->group) { case KVM_DEV_FLIC_GET_ALL_IRQS: - r = get_all_floating_irqs(dev->kvm, (u8 *) attr->addr, + r = get_all_floating_irqs(dev->kvm, (u8 __user *) attr->addr, attr->attr); break; default: @@ -1952,3 +2123,143 @@ int kvm_set_msi(struct kvm_kernel_irq_routing_entry *e, struct kvm *kvm, { return -EINVAL; } + +int kvm_s390_set_irq_state(struct kvm_vcpu *vcpu, void __user *irqstate, int len) +{ + struct kvm_s390_local_interrupt *li = &vcpu->arch.local_int; + struct kvm_s390_irq *buf; + int r = 0; + int n; + + buf = vmalloc(len); + if (!buf) + return -ENOMEM; + + if (copy_from_user((void *) buf, irqstate, len)) { + r = -EFAULT; + goto out_free; + } + + /* + * Don't allow setting the interrupt state + * when there are already interrupts pending + */ + spin_lock(&li->lock); + if (li->pending_irqs) { + r = -EBUSY; + goto out_unlock; + } + + for (n = 0; n < len / sizeof(*buf); n++) { + r = do_inject_vcpu(vcpu, &buf[n]); + if (r) + break; + } + +out_unlock: + spin_unlock(&li->lock); +out_free: + vfree(buf); + + return r; +} + +static void store_local_irq(struct kvm_s390_local_interrupt *li, + struct kvm_s390_irq *irq, + unsigned long irq_type) +{ + switch (irq_type) { + case IRQ_PEND_MCHK_EX: + case IRQ_PEND_MCHK_REP: + irq->type = KVM_S390_MCHK; + irq->u.mchk = li->irq.mchk; + break; + case IRQ_PEND_PROG: + irq->type = KVM_S390_PROGRAM_INT; + irq->u.pgm = li->irq.pgm; + break; + case IRQ_PEND_PFAULT_INIT: + irq->type = KVM_S390_INT_PFAULT_INIT; + irq->u.ext = li->irq.ext; + break; + case IRQ_PEND_EXT_EXTERNAL: + irq->type = KVM_S390_INT_EXTERNAL_CALL; + irq->u.extcall = li->irq.extcall; + break; + case IRQ_PEND_EXT_CLOCK_COMP: + irq->type = KVM_S390_INT_CLOCK_COMP; + break; + case IRQ_PEND_EXT_CPU_TIMER: + irq->type = KVM_S390_INT_CPU_TIMER; + break; + case IRQ_PEND_SIGP_STOP: + irq->type = KVM_S390_SIGP_STOP; + irq->u.stop = li->irq.stop; + break; + case IRQ_PEND_RESTART: + irq->type = KVM_S390_RESTART; + break; + case IRQ_PEND_SET_PREFIX: + irq->type = KVM_S390_SIGP_SET_PREFIX; + irq->u.prefix = li->irq.prefix; + break; + } +} + +int kvm_s390_get_irq_state(struct kvm_vcpu *vcpu, __u8 __user *buf, int len) +{ + uint8_t sigp_ctrl = vcpu->kvm->arch.sca->cpu[vcpu->vcpu_id].sigp_ctrl; + unsigned long sigp_emerg_pending[BITS_TO_LONGS(KVM_MAX_VCPUS)]; + struct kvm_s390_local_interrupt *li = &vcpu->arch.local_int; + unsigned long pending_irqs; + struct kvm_s390_irq irq; + unsigned long irq_type; + int cpuaddr; + int n = 0; + + spin_lock(&li->lock); + pending_irqs = li->pending_irqs; + memcpy(&sigp_emerg_pending, &li->sigp_emerg_pending, + sizeof(sigp_emerg_pending)); + spin_unlock(&li->lock); + + for_each_set_bit(irq_type, &pending_irqs, IRQ_PEND_COUNT) { + memset(&irq, 0, sizeof(irq)); + if (irq_type == IRQ_PEND_EXT_EMERGENCY) + continue; + if (n + sizeof(irq) > len) + return -ENOBUFS; + store_local_irq(&vcpu->arch.local_int, &irq, irq_type); + if (copy_to_user(&buf[n], &irq, sizeof(irq))) + return -EFAULT; + n += sizeof(irq); + } + + if (test_bit(IRQ_PEND_EXT_EMERGENCY, &pending_irqs)) { + for_each_set_bit(cpuaddr, sigp_emerg_pending, KVM_MAX_VCPUS) { + memset(&irq, 0, sizeof(irq)); + if (n + sizeof(irq) > len) + return -ENOBUFS; + irq.type = KVM_S390_INT_EMERGENCY; + irq.u.emerg.code = cpuaddr; + if (copy_to_user(&buf[n], &irq, sizeof(irq))) + return -EFAULT; + n += sizeof(irq); + } + } + + if ((sigp_ctrl & SIGP_CTRL_C) && + (atomic_read(&vcpu->arch.sie_block->cpuflags) & + CPUSTAT_ECALL_PEND)) { + if (n + sizeof(irq) > len) + return -ENOBUFS; + memset(&irq, 0, sizeof(irq)); + irq.type = KVM_S390_INT_EXTERNAL_CALL; + irq.u.extcall.code = sigp_ctrl & SIGP_CTRL_SCN_MASK; + if (copy_to_user(&buf[n], &irq, sizeof(irq))) + return -EFAULT; + n += sizeof(irq); + } + + return n; +} diff --git a/arch/s390/kvm/kvm-s390.c b/arch/s390/kvm/kvm-s390.c index 0c3623927563..afa2bd750ffc 100644 --- a/arch/s390/kvm/kvm-s390.c +++ b/arch/s390/kvm/kvm-s390.c @@ -25,11 +25,13 @@ #include <linux/random.h> #include <linux/slab.h> #include <linux/timer.h> +#include <linux/vmalloc.h> #include <asm/asm-offsets.h> #include <asm/lowcore.h> #include <asm/pgtable.h> #include <asm/nmi.h> #include <asm/switch_to.h> +#include <asm/isc.h> #include <asm/sclp.h> #include "kvm-s390.h" #include "gaccess.h" @@ -38,6 +40,11 @@ #include "trace.h" #include "trace-s390.h" +#define MEM_OP_MAX_SIZE 65536 /* Maximum transfer size for KVM_S390_MEM_OP */ +#define LOCAL_IRQS 32 +#define VCPU_IRQS_MAX_BUF (sizeof(struct kvm_s390_irq) * \ + (KVM_MAX_VCPUS + LOCAL_IRQS)) + #define VCPU_STAT(x) offsetof(struct kvm_vcpu, stat.x), KVM_STAT_VCPU struct kvm_stats_debugfs_item debugfs_entries[] = { @@ -87,6 +94,7 @@ struct kvm_stats_debugfs_item debugfs_entries[] = { { "instruction_sigp_stop", VCPU_STAT(instruction_sigp_stop) }, { "instruction_sigp_stop_store_status", VCPU_STAT(instruction_sigp_stop_store_status) }, { "instruction_sigp_store_status", VCPU_STAT(instruction_sigp_store_status) }, + { "instruction_sigp_store_adtl_status", VCPU_STAT(instruction_sigp_store_adtl_status) }, { "instruction_sigp_set_arch", VCPU_STAT(instruction_sigp_arch) }, { "instruction_sigp_set_prefix", VCPU_STAT(instruction_sigp_prefix) }, { "instruction_sigp_restart", VCPU_STAT(instruction_sigp_restart) }, @@ -101,8 +109,8 @@ struct kvm_stats_debugfs_item debugfs_entries[] = { /* upper facilities limit for kvm */ unsigned long kvm_s390_fac_list_mask[] = { - 0xff82fffbf4fc2000UL, - 0x005c000000000000UL, + 0xffe6fffbfcfdfc40UL, + 0x205c800000000000UL, }; unsigned long kvm_s390_fac_list_mask_size(void) @@ -165,16 +173,22 @@ int kvm_vm_ioctl_check_extension(struct kvm *kvm, long ext) case KVM_CAP_ONE_REG: case KVM_CAP_ENABLE_CAP: case KVM_CAP_S390_CSS_SUPPORT: - case KVM_CAP_IRQFD: case KVM_CAP_IOEVENTFD: case KVM_CAP_DEVICE_CTRL: case KVM_CAP_ENABLE_CAP_VM: case KVM_CAP_S390_IRQCHIP: case KVM_CAP_VM_ATTRIBUTES: case KVM_CAP_MP_STATE: + case KVM_CAP_S390_INJECT_IRQ: case KVM_CAP_S390_USER_SIGP: + case KVM_CAP_S390_USER_STSI: + case KVM_CAP_S390_SKEYS: + case KVM_CAP_S390_IRQ_STATE: r = 1; break; + case KVM_CAP_S390_MEM_OP: + r = MEM_OP_MAX_SIZE; + break; case KVM_CAP_NR_VCPUS: case KVM_CAP_MAX_VCPUS: r = KVM_MAX_VCPUS; @@ -185,6 +199,9 @@ int kvm_vm_ioctl_check_extension(struct kvm *kvm, long ext) case KVM_CAP_S390_COW: r = MACHINE_HAS_ESOP; break; + case KVM_CAP_S390_VECTOR_REGISTERS: + r = MACHINE_HAS_VX; + break; default: r = 0; } @@ -265,6 +282,18 @@ static int kvm_vm_ioctl_enable_cap(struct kvm *kvm, struct kvm_enable_cap *cap) kvm->arch.user_sigp = 1; r = 0; break; + case KVM_CAP_S390_VECTOR_REGISTERS: + if (MACHINE_HAS_VX) { + set_kvm_facility(kvm->arch.model.fac->mask, 129); + set_kvm_facility(kvm->arch.model.fac->list, 129); + r = 0; + } else + r = -EINVAL; + break; + case KVM_CAP_S390_USER_STSI: + kvm->arch.user_stsi = 1; + r = 0; + break; default: r = -EINVAL; break; @@ -522,7 +551,7 @@ static int kvm_s390_set_processor(struct kvm *kvm, struct kvm_device_attr *attr) memcpy(&kvm->arch.model.cpu_id, &proc->cpuid, sizeof(struct cpuid)); kvm->arch.model.ibc = proc->ibc; - memcpy(kvm->arch.model.fac->kvm, proc->fac_list, + memcpy(kvm->arch.model.fac->list, proc->fac_list, S390_ARCH_FAC_LIST_SIZE_BYTE); } else ret = -EFAULT; @@ -556,7 +585,7 @@ static int kvm_s390_get_processor(struct kvm *kvm, struct kvm_device_attr *attr) } memcpy(&proc->cpuid, &kvm->arch.model.cpu_id, sizeof(struct cpuid)); proc->ibc = kvm->arch.model.ibc; - memcpy(&proc->fac_list, kvm->arch.model.fac->kvm, S390_ARCH_FAC_LIST_SIZE_BYTE); + memcpy(&proc->fac_list, kvm->arch.model.fac->list, S390_ARCH_FAC_LIST_SIZE_BYTE); if (copy_to_user((void __user *)attr->addr, proc, sizeof(*proc))) ret = -EFAULT; kfree(proc); @@ -576,10 +605,10 @@ static int kvm_s390_get_machine(struct kvm *kvm, struct kvm_device_attr *attr) } get_cpu_id((struct cpuid *) &mach->cpuid); mach->ibc = sclp_get_ibc(); - memcpy(&mach->fac_mask, kvm_s390_fac_list_mask, - kvm_s390_fac_list_mask_size() * sizeof(u64)); + memcpy(&mach->fac_mask, kvm->arch.model.fac->mask, + S390_ARCH_FAC_LIST_SIZE_BYTE); memcpy((unsigned long *)&mach->fac_list, S390_lowcore.stfle_fac_list, - S390_ARCH_FAC_LIST_SIZE_U64); + S390_ARCH_FAC_LIST_SIZE_BYTE); if (copy_to_user((void __user *)attr->addr, mach, sizeof(*mach))) ret = -EFAULT; kfree(mach); @@ -709,6 +738,108 @@ static int kvm_s390_vm_has_attr(struct kvm *kvm, struct kvm_device_attr *attr) return ret; } +static long kvm_s390_get_skeys(struct kvm *kvm, struct kvm_s390_skeys *args) +{ + uint8_t *keys; + uint64_t hva; + unsigned long curkey; + int i, r = 0; + + if (args->flags != 0) + return -EINVAL; + + /* Is this guest using storage keys? */ + if (!mm_use_skey(current->mm)) + return KVM_S390_GET_SKEYS_NONE; + + /* Enforce sane limit on memory allocation */ + if (args->count < 1 || args->count > KVM_S390_SKEYS_MAX) + return -EINVAL; + + keys = kmalloc_array(args->count, sizeof(uint8_t), + GFP_KERNEL | __GFP_NOWARN); + if (!keys) + keys = vmalloc(sizeof(uint8_t) * args->count); + if (!keys) + return -ENOMEM; + + for (i = 0; i < args->count; i++) { + hva = gfn_to_hva(kvm, args->start_gfn + i); + if (kvm_is_error_hva(hva)) { + r = -EFAULT; + goto out; + } + + curkey = get_guest_storage_key(current->mm, hva); + if (IS_ERR_VALUE(curkey)) { + r = curkey; + goto out; + } + keys[i] = curkey; + } + + r = copy_to_user((uint8_t __user *)args->skeydata_addr, keys, + sizeof(uint8_t) * args->count); + if (r) + r = -EFAULT; +out: + kvfree(keys); + return r; +} + +static long kvm_s390_set_skeys(struct kvm *kvm, struct kvm_s390_skeys *args) +{ + uint8_t *keys; + uint64_t hva; + int i, r = 0; + + if (args->flags != 0) + return -EINVAL; + + /* Enforce sane limit on memory allocation */ + if (args->count < 1 || args->count > KVM_S390_SKEYS_MAX) + return -EINVAL; + + keys = kmalloc_array(args->count, sizeof(uint8_t), + GFP_KERNEL | __GFP_NOWARN); + if (!keys) + keys = vmalloc(sizeof(uint8_t) * args->count); + if (!keys) + return -ENOMEM; + + r = copy_from_user(keys, (uint8_t __user *)args->skeydata_addr, + sizeof(uint8_t) * args->count); + if (r) { + r = -EFAULT; + goto out; + } + + /* Enable storage key handling for the guest */ + s390_enable_skey(); + + for (i = 0; i < args->count; i++) { + hva = gfn_to_hva(kvm, args->start_gfn + i); + if (kvm_is_error_hva(hva)) { + r = -EFAULT; + goto out; + } + + /* Lowest order bit is reserved */ + if (keys[i] & 0x01) { + r = -EINVAL; + goto out; + } + + r = set_guest_storage_key(current->mm, hva, + (unsigned long)keys[i], 0); + if (r) + goto out; + } +out: + kvfree(keys); + return r; +} + long kvm_arch_vm_ioctl(struct file *filp, unsigned int ioctl, unsigned long arg) { @@ -768,6 +899,26 @@ long kvm_arch_vm_ioctl(struct file *filp, r = kvm_s390_vm_has_attr(kvm, &attr); break; } + case KVM_S390_GET_SKEYS: { + struct kvm_s390_skeys args; + + r = -EFAULT; + if (copy_from_user(&args, argp, + sizeof(struct kvm_s390_skeys))) + break; + r = kvm_s390_get_skeys(kvm, &args); + break; + } + case KVM_S390_SET_SKEYS: { + struct kvm_s390_skeys args; + + r = -EFAULT; + if (copy_from_user(&args, argp, + sizeof(struct kvm_s390_skeys))) + break; + r = kvm_s390_set_skeys(kvm, &args); + break; + } default: r = -ENOTTY; } @@ -778,15 +929,18 @@ long kvm_arch_vm_ioctl(struct file *filp, static int kvm_s390_query_ap_config(u8 *config) { u32 fcn_code = 0x04000000UL; - u32 cc; + u32 cc = 0; + memset(config, 0, 128); asm volatile( "lgr 0,%1\n" "lgr 2,%2\n" ".long 0xb2af0000\n" /* PQAP(QCI) */ - "ipm %0\n" + "0: ipm %0\n" "srl %0,28\n" - : "=r" (cc) + "1:\n" + EX_TABLE(0b, 1b) + : "+r" (cc) : "r" (fcn_code), "r" (config) : "cc", "0", "2", "memory" ); @@ -839,9 +993,13 @@ static int kvm_s390_crypto_init(struct kvm *kvm) kvm_s390_set_crycb_format(kvm); - /* Disable AES/DEA protected key functions by default */ - kvm->arch.crypto.aes_kw = 0; - kvm->arch.crypto.dea_kw = 0; + /* Enable AES/DEA protected key functions by default */ + kvm->arch.crypto.aes_kw = 1; + kvm->arch.crypto.dea_kw = 1; + get_random_bytes(kvm->arch.crypto.crycb->aes_wrapping_key_mask, + sizeof(kvm->arch.crypto.crycb->aes_wrapping_key_mask)); + get_random_bytes(kvm->arch.crypto.crycb->dea_wrapping_key_mask, + sizeof(kvm->arch.crypto.crycb->dea_wrapping_key_mask)); return 0; } @@ -881,53 +1039,43 @@ int kvm_arch_init_vm(struct kvm *kvm, unsigned long type) kvm->arch.dbf = debug_register(debug_name, 8, 2, 8 * sizeof(long)); if (!kvm->arch.dbf) - goto out_nodbf; + goto out_err; /* * The architectural maximum amount of facilities is 16 kbit. To store * this amount, 2 kbyte of memory is required. Thus we need a full - * page to hold the active copy (arch.model.fac->sie) and the current - * facilities set (arch.model.fac->kvm). Its address size has to be + * page to hold the guest facility list (arch.model.fac->list) and the + * facility mask (arch.model.fac->mask). Its address size has to be * 31 bits and word aligned. */ kvm->arch.model.fac = - (struct s390_model_fac *) get_zeroed_page(GFP_KERNEL | GFP_DMA); + (struct kvm_s390_fac *) get_zeroed_page(GFP_KERNEL | GFP_DMA); if (!kvm->arch.model.fac) - goto out_nofac; - - memcpy(kvm->arch.model.fac->kvm, S390_lowcore.stfle_fac_list, - S390_ARCH_FAC_LIST_SIZE_U64); - - /* - * If this KVM host runs *not* in a LPAR, relax the facility bits - * of the kvm facility mask by all missing facilities. This will allow - * to determine the right CPU model by means of the remaining facilities. - * Live guest migration must prohibit the migration of KVMs running in - * a LPAR to non LPAR hosts. - */ - if (!MACHINE_IS_LPAR) - for (i = 0; i < kvm_s390_fac_list_mask_size(); i++) - kvm_s390_fac_list_mask[i] &= kvm->arch.model.fac->kvm[i]; + goto out_err; - /* - * Apply the kvm facility mask to limit the kvm supported/tolerated - * facility list. - */ + /* Populate the facility mask initially. */ + memcpy(kvm->arch.model.fac->mask, S390_lowcore.stfle_fac_list, + S390_ARCH_FAC_LIST_SIZE_BYTE); for (i = 0; i < S390_ARCH_FAC_LIST_SIZE_U64; i++) { if (i < kvm_s390_fac_list_mask_size()) - kvm->arch.model.fac->kvm[i] &= kvm_s390_fac_list_mask[i]; + kvm->arch.model.fac->mask[i] &= kvm_s390_fac_list_mask[i]; else - kvm->arch.model.fac->kvm[i] = 0UL; + kvm->arch.model.fac->mask[i] = 0UL; } + /* Populate the facility list initially. */ + memcpy(kvm->arch.model.fac->list, kvm->arch.model.fac->mask, + S390_ARCH_FAC_LIST_SIZE_BYTE); + kvm_s390_get_cpu_id(&kvm->arch.model.cpu_id); kvm->arch.model.ibc = sclp_get_ibc() & 0x0fff; if (kvm_s390_crypto_init(kvm) < 0) - goto out_crypto; + goto out_err; spin_lock_init(&kvm->arch.float_int.lock); - INIT_LIST_HEAD(&kvm->arch.float_int.list); + for (i = 0; i < FIRQ_LIST_COUNT; i++) + INIT_LIST_HEAD(&kvm->arch.float_int.lists[i]); init_waitqueue_head(&kvm->arch.ipte_wq); mutex_init(&kvm->arch.ipte_mutex); @@ -939,7 +1087,7 @@ int kvm_arch_init_vm(struct kvm *kvm, unsigned long type) } else { kvm->arch.gmap = gmap_alloc(current->mm, (1UL << 44) - 1); if (!kvm->arch.gmap) - goto out_nogmap; + goto out_err; kvm->arch.gmap->private = kvm; kvm->arch.gmap->pfault_enabled = 0; } @@ -951,15 +1099,11 @@ int kvm_arch_init_vm(struct kvm *kvm, unsigned long type) spin_lock_init(&kvm->arch.start_stop_lock); return 0; -out_nogmap: +out_err: kfree(kvm->arch.crypto.crycb); -out_crypto: free_page((unsigned long)kvm->arch.model.fac); -out_nofac: debug_unregister(kvm->arch.dbf); -out_nodbf: free_page((unsigned long)(kvm->arch.sca)); -out_err: return rc; } @@ -1039,6 +1183,8 @@ int kvm_arch_vcpu_init(struct kvm_vcpu *vcpu) KVM_SYNC_CRS | KVM_SYNC_ARCH0 | KVM_SYNC_PFAULT; + if (test_kvm_facility(vcpu->kvm, 129)) + vcpu->run->kvm_valid_regs |= KVM_SYNC_VRS; if (kvm_is_ucontrol(vcpu->kvm)) return __kvm_ucontrol_vcpu_init(vcpu); @@ -1049,10 +1195,18 @@ int kvm_arch_vcpu_init(struct kvm_vcpu *vcpu) void kvm_arch_vcpu_load(struct kvm_vcpu *vcpu, int cpu) { save_fp_ctl(&vcpu->arch.host_fpregs.fpc); - save_fp_regs(vcpu->arch.host_fpregs.fprs); + if (test_kvm_facility(vcpu->kvm, 129)) + save_vx_regs((__vector128 *)&vcpu->arch.host_vregs->vrs); + else + save_fp_regs(vcpu->arch.host_fpregs.fprs); save_access_regs(vcpu->arch.host_acrs); - restore_fp_ctl(&vcpu->arch.guest_fpregs.fpc); - restore_fp_regs(vcpu->arch.guest_fpregs.fprs); + if (test_kvm_facility(vcpu->kvm, 129)) { + restore_fp_ctl(&vcpu->run->s.regs.fpc); + restore_vx_regs((__vector128 *)&vcpu->run->s.regs.vrs); + } else { + restore_fp_ctl(&vcpu->arch.guest_fpregs.fpc); + restore_fp_regs(vcpu->arch.guest_fpregs.fprs); + } restore_access_regs(vcpu->run->s.regs.acrs); gmap_enable(vcpu->arch.gmap); atomic_set_mask(CPUSTAT_RUNNING, &vcpu->arch.sie_block->cpuflags); @@ -1062,11 +1216,19 @@ void kvm_arch_vcpu_put(struct kvm_vcpu *vcpu) { atomic_clear_mask(CPUSTAT_RUNNING, &vcpu->arch.sie_block->cpuflags); gmap_disable(vcpu->arch.gmap); - save_fp_ctl(&vcpu->arch.guest_fpregs.fpc); - save_fp_regs(vcpu->arch.guest_fpregs.fprs); + if (test_kvm_facility(vcpu->kvm, 129)) { + save_fp_ctl(&vcpu->run->s.regs.fpc); + save_vx_regs((__vector128 *)&vcpu->run->s.regs.vrs); + } else { + save_fp_ctl(&vcpu->arch.guest_fpregs.fpc); + save_fp_regs(vcpu->arch.guest_fpregs.fprs); + } save_access_regs(vcpu->run->s.regs.acrs); restore_fp_ctl(&vcpu->arch.host_fpregs.fpc); - restore_fp_regs(vcpu->arch.host_fpregs.fprs); + if (test_kvm_facility(vcpu->kvm, 129)) + restore_vx_regs((__vector128 *)&vcpu->arch.host_vregs->vrs); + else + restore_fp_regs(vcpu->arch.host_fpregs.fprs); restore_access_regs(vcpu->arch.host_acrs); } @@ -1134,6 +1296,15 @@ int kvm_s390_vcpu_setup_cmma(struct kvm_vcpu *vcpu) return 0; } +static void kvm_s390_vcpu_setup_model(struct kvm_vcpu *vcpu) +{ + struct kvm_s390_cpu_model *model = &vcpu->kvm->arch.model; + + vcpu->arch.cpu_id = model->cpu_id; + vcpu->arch.sie_block->ibc = model->ibc; + vcpu->arch.sie_block->fac = (int) (long) model->fac->list; +} + int kvm_arch_vcpu_setup(struct kvm_vcpu *vcpu) { int rc = 0; @@ -1142,6 +1313,8 @@ int kvm_arch_vcpu_setup(struct kvm_vcpu *vcpu) CPUSTAT_SM | CPUSTAT_STOPPED | CPUSTAT_GED); + kvm_s390_vcpu_setup_model(vcpu); + vcpu->arch.sie_block->ecb = 6; if (test_kvm_facility(vcpu->kvm, 50) && test_kvm_facility(vcpu->kvm, 73)) vcpu->arch.sie_block->ecb |= 0x10; @@ -1152,8 +1325,11 @@ int kvm_arch_vcpu_setup(struct kvm_vcpu *vcpu) vcpu->arch.sie_block->eca |= 1; if (sclp_has_sigpif()) vcpu->arch.sie_block->eca |= 0x10000000U; - vcpu->arch.sie_block->ictl |= ICTL_ISKE | ICTL_SSKE | ICTL_RRBE | - ICTL_TPROT; + if (test_kvm_facility(vcpu->kvm, 129)) { + vcpu->arch.sie_block->eca |= 0x00020000; + vcpu->arch.sie_block->ecd |= 0x20000000; + } + vcpu->arch.sie_block->ictl |= ICTL_ISKE | ICTL_SSKE | ICTL_RRBE; if (kvm_s390_cmma_enabled(vcpu->kvm)) { rc = kvm_s390_vcpu_setup_cmma(vcpu); @@ -1163,13 +1339,6 @@ int kvm_arch_vcpu_setup(struct kvm_vcpu *vcpu) hrtimer_init(&vcpu->arch.ckc_timer, CLOCK_MONOTONIC, HRTIMER_MODE_REL); vcpu->arch.ckc_timer.function = kvm_s390_idle_wakeup; - mutex_lock(&vcpu->kvm->lock); - vcpu->arch.cpu_id = vcpu->kvm->arch.model.cpu_id; - memcpy(vcpu->kvm->arch.model.fac->sie, vcpu->kvm->arch.model.fac->kvm, - S390_ARCH_FAC_LIST_SIZE_BYTE); - vcpu->arch.sie_block->ibc = vcpu->kvm->arch.model.ibc; - mutex_unlock(&vcpu->kvm->lock); - kvm_s390_vcpu_crypto_setup(vcpu); return rc; @@ -1197,6 +1366,7 @@ struct kvm_vcpu *kvm_arch_vcpu_create(struct kvm *kvm, vcpu->arch.sie_block = &sie_page->sie_block; vcpu->arch.sie_block->itdba = (unsigned long) &sie_page->itdb; + vcpu->arch.host_vregs = &sie_page->vregs; vcpu->arch.sie_block->icpua = id; if (!kvm_is_ucontrol(kvm)) { @@ -1212,7 +1382,6 @@ struct kvm_vcpu *kvm_arch_vcpu_create(struct kvm *kvm, vcpu->arch.sie_block->scaol = (__u32)(__u64)kvm->arch.sca; set_bit(63 - id, (unsigned long *) &kvm->arch.sca->mcn); } - vcpu->arch.sie_block->fac = (int) (long) kvm->arch.model.fac->sie; spin_lock_init(&vcpu->arch.local_int.lock); vcpu->arch.local_int.float_int = &kvm->arch.float_int; @@ -1732,6 +1901,31 @@ static int vcpu_pre_run(struct kvm_vcpu *vcpu) return 0; } +static int vcpu_post_run_fault_in_sie(struct kvm_vcpu *vcpu) +{ + psw_t *psw = &vcpu->arch.sie_block->gpsw; + u8 opcode; + int rc; + + VCPU_EVENT(vcpu, 3, "%s", "fault in sie instruction"); + trace_kvm_s390_sie_fault(vcpu); + + /* + * We want to inject an addressing exception, which is defined as a + * suppressing or terminating exception. However, since we came here + * by a DAT access exception, the PSW still points to the faulting + * instruction since DAT exceptions are nullifying. So we've got + * to look up the current opcode to get the length of the instruction + * to be able to forward the PSW. + */ + rc = read_guest(vcpu, psw->addr, 0, &opcode, 1); + if (rc) + return kvm_s390_inject_prog_cond(vcpu, rc); + psw->addr = __rewind_psw(*psw, -insn_length(opcode)); + + return kvm_s390_inject_program_int(vcpu, PGM_ADDRESSING); +} + static int vcpu_post_run(struct kvm_vcpu *vcpu, int exit_reason) { int rc = -1; @@ -1763,11 +1957,8 @@ static int vcpu_post_run(struct kvm_vcpu *vcpu, int exit_reason) } } - if (rc == -1) { - VCPU_EVENT(vcpu, 3, "%s", "fault in sie instruction"); - trace_kvm_s390_sie_fault(vcpu); - rc = kvm_s390_inject_program_int(vcpu, PGM_ADDRESSING); - } + if (rc == -1) + rc = vcpu_post_run_fault_in_sie(vcpu); memcpy(&vcpu->run->s.regs.gprs[14], &vcpu->arch.sie_block->gg14, 16); @@ -1983,6 +2174,35 @@ int kvm_s390_vcpu_store_status(struct kvm_vcpu *vcpu, unsigned long addr) return kvm_s390_store_status_unloaded(vcpu, addr); } +/* + * store additional status at address + */ +int kvm_s390_store_adtl_status_unloaded(struct kvm_vcpu *vcpu, + unsigned long gpa) +{ + /* Only bits 0-53 are used for address formation */ + if (!(gpa & ~0x3ff)) + return 0; + + return write_guest_abs(vcpu, gpa & ~0x3ff, + (void *)&vcpu->run->s.regs.vrs, 512); +} + +int kvm_s390_vcpu_store_adtl_status(struct kvm_vcpu *vcpu, unsigned long addr) +{ + if (!test_kvm_facility(vcpu->kvm, 129)) + return 0; + + /* + * The guest VXRS are in the host VXRs due to the lazy + * copying in vcpu load/put. Let's update our copies before we save + * it into the save area. + */ + save_vx_regs((__vector128 *)&vcpu->run->s.regs.vrs); + + return kvm_s390_store_adtl_status_unloaded(vcpu, addr); +} + static void __disable_ibs_on_vcpu(struct kvm_vcpu *vcpu) { kvm_check_request(KVM_REQ_ENABLE_IBS, vcpu); @@ -2107,6 +2327,65 @@ static int kvm_vcpu_ioctl_enable_cap(struct kvm_vcpu *vcpu, return r; } +static long kvm_s390_guest_mem_op(struct kvm_vcpu *vcpu, + struct kvm_s390_mem_op *mop) +{ + void __user *uaddr = (void __user *)mop->buf; + void *tmpbuf = NULL; + int r, srcu_idx; + const u64 supported_flags = KVM_S390_MEMOP_F_INJECT_EXCEPTION + | KVM_S390_MEMOP_F_CHECK_ONLY; + + if (mop->flags & ~supported_flags) + return -EINVAL; + + if (mop->size > MEM_OP_MAX_SIZE) + return -E2BIG; + + if (!(mop->flags & KVM_S390_MEMOP_F_CHECK_ONLY)) { + tmpbuf = vmalloc(mop->size); + if (!tmpbuf) + return -ENOMEM; + } + + srcu_idx = srcu_read_lock(&vcpu->kvm->srcu); + + switch (mop->op) { + case KVM_S390_MEMOP_LOGICAL_READ: + if (mop->flags & KVM_S390_MEMOP_F_CHECK_ONLY) { + r = check_gva_range(vcpu, mop->gaddr, mop->ar, mop->size, false); + break; + } + r = read_guest(vcpu, mop->gaddr, mop->ar, tmpbuf, mop->size); + if (r == 0) { + if (copy_to_user(uaddr, tmpbuf, mop->size)) + r = -EFAULT; + } + break; + case KVM_S390_MEMOP_LOGICAL_WRITE: + if (mop->flags & KVM_S390_MEMOP_F_CHECK_ONLY) { + r = check_gva_range(vcpu, mop->gaddr, mop->ar, mop->size, true); + break; + } + if (copy_from_user(tmpbuf, uaddr, mop->size)) { + r = -EFAULT; + break; + } + r = write_guest(vcpu, mop->gaddr, mop->ar, tmpbuf, mop->size); + break; + default: + r = -EINVAL; + } + + srcu_read_unlock(&vcpu->kvm->srcu, srcu_idx); + + if (r > 0 && (mop->flags & KVM_S390_MEMOP_F_INJECT_EXCEPTION) != 0) + kvm_s390_inject_prog_irq(vcpu, &vcpu->arch.pgm); + + vfree(tmpbuf); + return r; +} + long kvm_arch_vcpu_ioctl(struct file *filp, unsigned int ioctl, unsigned long arg) { @@ -2116,6 +2395,15 @@ long kvm_arch_vcpu_ioctl(struct file *filp, long r; switch (ioctl) { + case KVM_S390_IRQ: { + struct kvm_s390_irq s390irq; + + r = -EFAULT; + if (copy_from_user(&s390irq, argp, sizeof(s390irq))) + break; + r = kvm_s390_inject_vcpu(vcpu, &s390irq); + break; + } case KVM_S390_INTERRUPT: { struct kvm_s390_interrupt s390int; struct kvm_s390_irq s390irq; @@ -2206,6 +2494,47 @@ long kvm_arch_vcpu_ioctl(struct file *filp, r = kvm_vcpu_ioctl_enable_cap(vcpu, &cap); break; } + case KVM_S390_MEM_OP: { + struct kvm_s390_mem_op mem_op; + + if (copy_from_user(&mem_op, argp, sizeof(mem_op)) == 0) + r = kvm_s390_guest_mem_op(vcpu, &mem_op); + else + r = -EFAULT; + break; + } + case KVM_S390_SET_IRQ_STATE: { + struct kvm_s390_irq_state irq_state; + + r = -EFAULT; + if (copy_from_user(&irq_state, argp, sizeof(irq_state))) + break; + if (irq_state.len > VCPU_IRQS_MAX_BUF || + irq_state.len == 0 || + irq_state.len % sizeof(struct kvm_s390_irq) > 0) { + r = -EINVAL; + break; + } + r = kvm_s390_set_irq_state(vcpu, + (void __user *) irq_state.buf, + irq_state.len); + break; + } + case KVM_S390_GET_IRQ_STATE: { + struct kvm_s390_irq_state irq_state; + + r = -EFAULT; + if (copy_from_user(&irq_state, argp, sizeof(irq_state))) + break; + if (irq_state.len == 0) { + r = -EINVAL; + break; + } + r = kvm_s390_get_irq_state(vcpu, + (__u8 __user *) irq_state.buf, + irq_state.len); + break; + } default: r = -ENOTTY; } diff --git a/arch/s390/kvm/kvm-s390.h b/arch/s390/kvm/kvm-s390.h index 985c2114d7ef..ca108b90ae56 100644 --- a/arch/s390/kvm/kvm-s390.h +++ b/arch/s390/kvm/kvm-s390.h @@ -70,16 +70,22 @@ static inline void kvm_s390_set_prefix(struct kvm_vcpu *vcpu, u32 prefix) kvm_make_request(KVM_REQ_MMU_RELOAD, vcpu); } -static inline u64 kvm_s390_get_base_disp_s(struct kvm_vcpu *vcpu) +typedef u8 __bitwise ar_t; + +static inline u64 kvm_s390_get_base_disp_s(struct kvm_vcpu *vcpu, ar_t *ar) { u32 base2 = vcpu->arch.sie_block->ipb >> 28; u32 disp2 = ((vcpu->arch.sie_block->ipb & 0x0fff0000) >> 16); + if (ar) + *ar = base2; + return (base2 ? vcpu->run->s.regs.gprs[base2] : 0) + disp2; } static inline void kvm_s390_get_base_disp_sse(struct kvm_vcpu *vcpu, - u64 *address1, u64 *address2) + u64 *address1, u64 *address2, + ar_t *ar_b1, ar_t *ar_b2) { u32 base1 = (vcpu->arch.sie_block->ipb & 0xf0000000) >> 28; u32 disp1 = (vcpu->arch.sie_block->ipb & 0x0fff0000) >> 16; @@ -88,6 +94,11 @@ static inline void kvm_s390_get_base_disp_sse(struct kvm_vcpu *vcpu, *address1 = (base1 ? vcpu->run->s.regs.gprs[base1] : 0) + disp1; *address2 = (base2 ? vcpu->run->s.regs.gprs[base2] : 0) + disp2; + + if (ar_b1) + *ar_b1 = base1; + if (ar_b2) + *ar_b2 = base2; } static inline void kvm_s390_get_regs_rre(struct kvm_vcpu *vcpu, int *r1, int *r2) @@ -98,7 +109,7 @@ static inline void kvm_s390_get_regs_rre(struct kvm_vcpu *vcpu, int *r1, int *r2 *r2 = (vcpu->arch.sie_block->ipb & 0x000f0000) >> 16; } -static inline u64 kvm_s390_get_base_disp_rsy(struct kvm_vcpu *vcpu) +static inline u64 kvm_s390_get_base_disp_rsy(struct kvm_vcpu *vcpu, ar_t *ar) { u32 base2 = vcpu->arch.sie_block->ipb >> 28; u32 disp2 = ((vcpu->arch.sie_block->ipb & 0x0fff0000) >> 16) + @@ -107,14 +118,20 @@ static inline u64 kvm_s390_get_base_disp_rsy(struct kvm_vcpu *vcpu) if (disp2 & 0x80000) disp2+=0xfff00000; + if (ar) + *ar = base2; + return (base2 ? vcpu->run->s.regs.gprs[base2] : 0) + (long)(int)disp2; } -static inline u64 kvm_s390_get_base_disp_rs(struct kvm_vcpu *vcpu) +static inline u64 kvm_s390_get_base_disp_rs(struct kvm_vcpu *vcpu, ar_t *ar) { u32 base2 = vcpu->arch.sie_block->ipb >> 28; u32 disp2 = ((vcpu->arch.sie_block->ipb & 0x0fff0000) >> 16); + if (ar) + *ar = base2; + return (base2 ? vcpu->run->s.regs.gprs[base2] : 0) + disp2; } @@ -125,10 +142,22 @@ static inline void kvm_s390_set_psw_cc(struct kvm_vcpu *vcpu, unsigned long cc) vcpu->arch.sie_block->gpsw.mask |= cc << 44; } -/* test availability of facility in a kvm intance */ +/* test availability of facility in a kvm instance */ static inline int test_kvm_facility(struct kvm *kvm, unsigned long nr) { - return __test_facility(nr, kvm->arch.model.fac->kvm); + return __test_facility(nr, kvm->arch.model.fac->mask) && + __test_facility(nr, kvm->arch.model.fac->list); +} + +static inline int set_kvm_facility(u64 *fac_list, unsigned long nr) +{ + unsigned char *ptr; + + if (nr >= MAX_FACILITY_BIT) + return -EINVAL; + ptr = (unsigned char *) fac_list + (nr >> 3); + *ptr |= (0x80UL >> (nr & 7)); + return 0; } /* are cpu states controlled by user space */ @@ -149,9 +178,9 @@ int __must_check kvm_s390_inject_vcpu(struct kvm_vcpu *vcpu, struct kvm_s390_irq *irq); int __must_check kvm_s390_inject_program_int(struct kvm_vcpu *vcpu, u16 code); struct kvm_s390_interrupt_info *kvm_s390_get_io_int(struct kvm *kvm, - u64 cr6, u64 schid); -void kvm_s390_reinject_io_int(struct kvm *kvm, - struct kvm_s390_interrupt_info *inti); + u64 isc_mask, u32 schid); +int kvm_s390_reinject_io_int(struct kvm *kvm, + struct kvm_s390_interrupt_info *inti); int kvm_s390_mask_adapter(struct kvm *kvm, unsigned int id, bool masked); /* implemented in intercept.c */ @@ -176,7 +205,10 @@ int kvm_s390_handle_sigp_pei(struct kvm_vcpu *vcpu); /* implemented in kvm-s390.c */ long kvm_arch_fault_in_page(struct kvm_vcpu *vcpu, gpa_t gpa, int writable); int kvm_s390_store_status_unloaded(struct kvm_vcpu *vcpu, unsigned long addr); +int kvm_s390_store_adtl_status_unloaded(struct kvm_vcpu *vcpu, + unsigned long addr); int kvm_s390_vcpu_store_status(struct kvm_vcpu *vcpu, unsigned long addr); +int kvm_s390_vcpu_store_adtl_status(struct kvm_vcpu *vcpu, unsigned long addr); void kvm_s390_vcpu_start(struct kvm_vcpu *vcpu); void kvm_s390_vcpu_stop(struct kvm_vcpu *vcpu); void s390_vcpu_block(struct kvm_vcpu *vcpu); @@ -240,6 +272,10 @@ int kvm_s390_ext_call_pending(struct kvm_vcpu *vcpu); extern struct kvm_device_ops kvm_flic_ops; int kvm_s390_is_stop_irq_pending(struct kvm_vcpu *vcpu); void kvm_s390_clear_stop_irq(struct kvm_vcpu *vcpu); +int kvm_s390_set_irq_state(struct kvm_vcpu *vcpu, + void __user *buf, int len); +int kvm_s390_get_irq_state(struct kvm_vcpu *vcpu, + __u8 __user *buf, int len); /* implemented in guestdbg.c */ void kvm_s390_backup_guest_per_regs(struct kvm_vcpu *vcpu); diff --git a/arch/s390/kvm/priv.c b/arch/s390/kvm/priv.c index bdd9b5b17e03..d22d8ee1ff9d 100644 --- a/arch/s390/kvm/priv.c +++ b/arch/s390/kvm/priv.c @@ -36,15 +36,16 @@ static int handle_set_clock(struct kvm_vcpu *vcpu) struct kvm_vcpu *cpup; s64 hostclk, val; int i, rc; + ar_t ar; u64 op2; if (vcpu->arch.sie_block->gpsw.mask & PSW_MASK_PSTATE) return kvm_s390_inject_program_int(vcpu, PGM_PRIVILEGED_OP); - op2 = kvm_s390_get_base_disp_s(vcpu); + op2 = kvm_s390_get_base_disp_s(vcpu, &ar); if (op2 & 7) /* Operand must be on a doubleword boundary */ return kvm_s390_inject_program_int(vcpu, PGM_SPECIFICATION); - rc = read_guest(vcpu, op2, &val, sizeof(val)); + rc = read_guest(vcpu, op2, ar, &val, sizeof(val)); if (rc) return kvm_s390_inject_prog_cond(vcpu, rc); @@ -68,20 +69,21 @@ static int handle_set_prefix(struct kvm_vcpu *vcpu) u64 operand2; u32 address; int rc; + ar_t ar; vcpu->stat.instruction_spx++; if (vcpu->arch.sie_block->gpsw.mask & PSW_MASK_PSTATE) return kvm_s390_inject_program_int(vcpu, PGM_PRIVILEGED_OP); - operand2 = kvm_s390_get_base_disp_s(vcpu); + operand2 = kvm_s390_get_base_disp_s(vcpu, &ar); /* must be word boundary */ if (operand2 & 3) return kvm_s390_inject_program_int(vcpu, PGM_SPECIFICATION); /* get the value */ - rc = read_guest(vcpu, operand2, &address, sizeof(address)); + rc = read_guest(vcpu, operand2, ar, &address, sizeof(address)); if (rc) return kvm_s390_inject_prog_cond(vcpu, rc); @@ -107,13 +109,14 @@ static int handle_store_prefix(struct kvm_vcpu *vcpu) u64 operand2; u32 address; int rc; + ar_t ar; vcpu->stat.instruction_stpx++; if (vcpu->arch.sie_block->gpsw.mask & PSW_MASK_PSTATE) return kvm_s390_inject_program_int(vcpu, PGM_PRIVILEGED_OP); - operand2 = kvm_s390_get_base_disp_s(vcpu); + operand2 = kvm_s390_get_base_disp_s(vcpu, &ar); /* must be word boundary */ if (operand2 & 3) @@ -122,7 +125,7 @@ static int handle_store_prefix(struct kvm_vcpu *vcpu) address = kvm_s390_get_prefix(vcpu); /* get the value */ - rc = write_guest(vcpu, operand2, &address, sizeof(address)); + rc = write_guest(vcpu, operand2, ar, &address, sizeof(address)); if (rc) return kvm_s390_inject_prog_cond(vcpu, rc); @@ -136,18 +139,19 @@ static int handle_store_cpu_address(struct kvm_vcpu *vcpu) u16 vcpu_id = vcpu->vcpu_id; u64 ga; int rc; + ar_t ar; vcpu->stat.instruction_stap++; if (vcpu->arch.sie_block->gpsw.mask & PSW_MASK_PSTATE) return kvm_s390_inject_program_int(vcpu, PGM_PRIVILEGED_OP); - ga = kvm_s390_get_base_disp_s(vcpu); + ga = kvm_s390_get_base_disp_s(vcpu, &ar); if (ga & 1) return kvm_s390_inject_program_int(vcpu, PGM_SPECIFICATION); - rc = write_guest(vcpu, ga, &vcpu_id, sizeof(vcpu_id)); + rc = write_guest(vcpu, ga, ar, &vcpu_id, sizeof(vcpu_id)); if (rc) return kvm_s390_inject_prog_cond(vcpu, rc); @@ -207,7 +211,7 @@ static int handle_test_block(struct kvm_vcpu *vcpu) kvm_s390_get_regs_rre(vcpu, NULL, ®2); addr = vcpu->run->s.regs.gprs[reg2] & PAGE_MASK; addr = kvm_s390_logical_to_effective(vcpu, addr); - if (kvm_s390_check_low_addr_protection(vcpu, addr)) + if (kvm_s390_check_low_addr_prot_real(vcpu, addr)) return kvm_s390_inject_prog_irq(vcpu, &vcpu->arch.pgm); addr = kvm_s390_real_to_abs(vcpu, addr); @@ -229,18 +233,20 @@ static int handle_tpi(struct kvm_vcpu *vcpu) struct kvm_s390_interrupt_info *inti; unsigned long len; u32 tpi_data[3]; - int cc, rc; + int rc; u64 addr; + ar_t ar; - rc = 0; - addr = kvm_s390_get_base_disp_s(vcpu); + addr = kvm_s390_get_base_disp_s(vcpu, &ar); if (addr & 3) return kvm_s390_inject_program_int(vcpu, PGM_SPECIFICATION); - cc = 0; + inti = kvm_s390_get_io_int(vcpu->kvm, vcpu->arch.sie_block->gcr[6], 0); - if (!inti) - goto no_interrupt; - cc = 1; + if (!inti) { + kvm_s390_set_psw_cc(vcpu, 0); + return 0; + } + tpi_data[0] = inti->io.subchannel_id << 16 | inti->io.subchannel_nr; tpi_data[1] = inti->io.io_int_parm; tpi_data[2] = inti->io.io_int_word; @@ -250,40 +256,51 @@ static int handle_tpi(struct kvm_vcpu *vcpu) * provided area. */ len = sizeof(tpi_data) - 4; - rc = write_guest(vcpu, addr, &tpi_data, len); - if (rc) - return kvm_s390_inject_prog_cond(vcpu, rc); + rc = write_guest(vcpu, addr, ar, &tpi_data, len); + if (rc) { + rc = kvm_s390_inject_prog_cond(vcpu, rc); + goto reinject_interrupt; + } } else { /* * Store the three-word I/O interruption code into * the appropriate lowcore area. */ len = sizeof(tpi_data); - if (write_guest_lc(vcpu, __LC_SUBCHANNEL_ID, &tpi_data, len)) + if (write_guest_lc(vcpu, __LC_SUBCHANNEL_ID, &tpi_data, len)) { + /* failed writes to the low core are not recoverable */ rc = -EFAULT; + goto reinject_interrupt; + } } + + /* irq was successfully handed to the guest */ + kfree(inti); + kvm_s390_set_psw_cc(vcpu, 1); + return 0; +reinject_interrupt: /* * If we encounter a problem storing the interruption code, the * instruction is suppressed from the guest's view: reinject the * interrupt. */ - if (!rc) + if (kvm_s390_reinject_io_int(vcpu->kvm, inti)) { kfree(inti); - else - kvm_s390_reinject_io_int(vcpu->kvm, inti); -no_interrupt: - /* Set condition code and we're done. */ - if (!rc) - kvm_s390_set_psw_cc(vcpu, cc); + rc = -EFAULT; + } + /* don't set the cc, a pgm irq was injected or we drop to user space */ return rc ? -EFAULT : 0; } static int handle_tsch(struct kvm_vcpu *vcpu) { - struct kvm_s390_interrupt_info *inti; + struct kvm_s390_interrupt_info *inti = NULL; + const u64 isc_mask = 0xffUL << 24; /* all iscs set */ - inti = kvm_s390_get_io_int(vcpu->kvm, 0, - vcpu->run->s.regs.gprs[1]); + /* a valid schid has at least one bit set */ + if (vcpu->run->s.regs.gprs[1]) + inti = kvm_s390_get_io_int(vcpu->kvm, isc_mask, + vcpu->run->s.regs.gprs[1]); /* * Prepare exit to userspace. @@ -348,7 +365,7 @@ static int handle_stfl(struct kvm_vcpu *vcpu) * We need to shift the lower 32 facility bits (bit 0-31) from a u64 * into a u32 memory representation. They will remain bits 0-31. */ - fac = *vcpu->kvm->arch.model.fac->sie >> 32; + fac = *vcpu->kvm->arch.model.fac->list >> 32; rc = write_guest_lc(vcpu, offsetof(struct _lowcore, stfl_fac_list), &fac, sizeof(fac)); if (rc) @@ -386,15 +403,16 @@ int kvm_s390_handle_lpsw(struct kvm_vcpu *vcpu) psw_compat_t new_psw; u64 addr; int rc; + ar_t ar; if (gpsw->mask & PSW_MASK_PSTATE) return kvm_s390_inject_program_int(vcpu, PGM_PRIVILEGED_OP); - addr = kvm_s390_get_base_disp_s(vcpu); + addr = kvm_s390_get_base_disp_s(vcpu, &ar); if (addr & 7) return kvm_s390_inject_program_int(vcpu, PGM_SPECIFICATION); - rc = read_guest(vcpu, addr, &new_psw, sizeof(new_psw)); + rc = read_guest(vcpu, addr, ar, &new_psw, sizeof(new_psw)); if (rc) return kvm_s390_inject_prog_cond(vcpu, rc); if (!(new_psw.mask & PSW32_MASK_BASE)) @@ -412,14 +430,15 @@ static int handle_lpswe(struct kvm_vcpu *vcpu) psw_t new_psw; u64 addr; int rc; + ar_t ar; if (vcpu->arch.sie_block->gpsw.mask & PSW_MASK_PSTATE) return kvm_s390_inject_program_int(vcpu, PGM_PRIVILEGED_OP); - addr = kvm_s390_get_base_disp_s(vcpu); + addr = kvm_s390_get_base_disp_s(vcpu, &ar); if (addr & 7) return kvm_s390_inject_program_int(vcpu, PGM_SPECIFICATION); - rc = read_guest(vcpu, addr, &new_psw, sizeof(new_psw)); + rc = read_guest(vcpu, addr, ar, &new_psw, sizeof(new_psw)); if (rc) return kvm_s390_inject_prog_cond(vcpu, rc); vcpu->arch.sie_block->gpsw = new_psw; @@ -433,18 +452,19 @@ static int handle_stidp(struct kvm_vcpu *vcpu) u64 stidp_data = vcpu->arch.stidp_data; u64 operand2; int rc; + ar_t ar; vcpu->stat.instruction_stidp++; if (vcpu->arch.sie_block->gpsw.mask & PSW_MASK_PSTATE) return kvm_s390_inject_program_int(vcpu, PGM_PRIVILEGED_OP); - operand2 = kvm_s390_get_base_disp_s(vcpu); + operand2 = kvm_s390_get_base_disp_s(vcpu, &ar); if (operand2 & 7) return kvm_s390_inject_program_int(vcpu, PGM_SPECIFICATION); - rc = write_guest(vcpu, operand2, &stidp_data, sizeof(stidp_data)); + rc = write_guest(vcpu, operand2, ar, &stidp_data, sizeof(stidp_data)); if (rc) return kvm_s390_inject_prog_cond(vcpu, rc); @@ -467,6 +487,7 @@ static void handle_stsi_3_2_2(struct kvm_vcpu *vcpu, struct sysinfo_3_2_2 *mem) for (n = mem->count - 1; n > 0 ; n--) memcpy(&mem->vm[n], &mem->vm[n - 1], sizeof(mem->vm[0])); + memset(&mem->vm[0], 0, sizeof(mem->vm[0])); mem->vm[0].cpus_total = cpus; mem->vm[0].cpus_configured = cpus; mem->vm[0].cpus_standby = 0; @@ -478,6 +499,17 @@ static void handle_stsi_3_2_2(struct kvm_vcpu *vcpu, struct sysinfo_3_2_2 *mem) ASCEBC(mem->vm[0].cpi, 16); } +static void insert_stsi_usr_data(struct kvm_vcpu *vcpu, u64 addr, ar_t ar, + u8 fc, u8 sel1, u16 sel2) +{ + vcpu->run->exit_reason = KVM_EXIT_S390_STSI; + vcpu->run->s390_stsi.addr = addr; + vcpu->run->s390_stsi.ar = ar; + vcpu->run->s390_stsi.fc = fc; + vcpu->run->s390_stsi.sel1 = sel1; + vcpu->run->s390_stsi.sel2 = sel2; +} + static int handle_stsi(struct kvm_vcpu *vcpu) { int fc = (vcpu->run->s.regs.gprs[0] & 0xf0000000) >> 28; @@ -486,6 +518,7 @@ static int handle_stsi(struct kvm_vcpu *vcpu) unsigned long mem = 0; u64 operand2; int rc = 0; + ar_t ar; vcpu->stat.instruction_stsi++; VCPU_EVENT(vcpu, 4, "stsi: fc: %x sel1: %x sel2: %x", fc, sel1, sel2); @@ -508,7 +541,7 @@ static int handle_stsi(struct kvm_vcpu *vcpu) return 0; } - operand2 = kvm_s390_get_base_disp_s(vcpu); + operand2 = kvm_s390_get_base_disp_s(vcpu, &ar); if (operand2 & 0xfff) return kvm_s390_inject_program_int(vcpu, PGM_SPECIFICATION); @@ -532,16 +565,20 @@ static int handle_stsi(struct kvm_vcpu *vcpu) break; } - rc = write_guest(vcpu, operand2, (void *)mem, PAGE_SIZE); + rc = write_guest(vcpu, operand2, ar, (void *)mem, PAGE_SIZE); if (rc) { rc = kvm_s390_inject_prog_cond(vcpu, rc); goto out; } + if (vcpu->kvm->arch.user_stsi) { + insert_stsi_usr_data(vcpu, operand2, ar, fc, sel1, sel2); + rc = -EREMOTE; + } trace_kvm_s390_handle_stsi(vcpu, fc, sel1, sel2, operand2); free_page(mem); kvm_s390_set_psw_cc(vcpu, 0); vcpu->run->s.regs.gprs[0] = 0; - return 0; + return rc; out_no_data: kvm_s390_set_psw_cc(vcpu, 3); out: @@ -670,7 +707,7 @@ static int handle_pfmf(struct kvm_vcpu *vcpu) } if (vcpu->run->s.regs.gprs[reg1] & PFMF_CF) { - if (kvm_s390_check_low_addr_protection(vcpu, start)) + if (kvm_s390_check_low_addr_prot_real(vcpu, start)) return kvm_s390_inject_prog_irq(vcpu, &vcpu->arch.pgm); } @@ -776,13 +813,14 @@ int kvm_s390_handle_lctl(struct kvm_vcpu *vcpu) int reg, rc, nr_regs; u32 ctl_array[16]; u64 ga; + ar_t ar; vcpu->stat.instruction_lctl++; if (vcpu->arch.sie_block->gpsw.mask & PSW_MASK_PSTATE) return kvm_s390_inject_program_int(vcpu, PGM_PRIVILEGED_OP); - ga = kvm_s390_get_base_disp_rs(vcpu); + ga = kvm_s390_get_base_disp_rs(vcpu, &ar); if (ga & 3) return kvm_s390_inject_program_int(vcpu, PGM_SPECIFICATION); @@ -791,7 +829,7 @@ int kvm_s390_handle_lctl(struct kvm_vcpu *vcpu) trace_kvm_s390_handle_lctl(vcpu, 0, reg1, reg3, ga); nr_regs = ((reg3 - reg1) & 0xf) + 1; - rc = read_guest(vcpu, ga, ctl_array, nr_regs * sizeof(u32)); + rc = read_guest(vcpu, ga, ar, ctl_array, nr_regs * sizeof(u32)); if (rc) return kvm_s390_inject_prog_cond(vcpu, rc); reg = reg1; @@ -814,13 +852,14 @@ int kvm_s390_handle_stctl(struct kvm_vcpu *vcpu) int reg, rc, nr_regs; u32 ctl_array[16]; u64 ga; + ar_t ar; vcpu->stat.instruction_stctl++; if (vcpu->arch.sie_block->gpsw.mask & PSW_MASK_PSTATE) return kvm_s390_inject_program_int(vcpu, PGM_PRIVILEGED_OP); - ga = kvm_s390_get_base_disp_rs(vcpu); + ga = kvm_s390_get_base_disp_rs(vcpu, &ar); if (ga & 3) return kvm_s390_inject_program_int(vcpu, PGM_SPECIFICATION); @@ -836,7 +875,7 @@ int kvm_s390_handle_stctl(struct kvm_vcpu *vcpu) break; reg = (reg + 1) % 16; } while (1); - rc = write_guest(vcpu, ga, ctl_array, nr_regs * sizeof(u32)); + rc = write_guest(vcpu, ga, ar, ctl_array, nr_regs * sizeof(u32)); return rc ? kvm_s390_inject_prog_cond(vcpu, rc) : 0; } @@ -847,13 +886,14 @@ static int handle_lctlg(struct kvm_vcpu *vcpu) int reg, rc, nr_regs; u64 ctl_array[16]; u64 ga; + ar_t ar; vcpu->stat.instruction_lctlg++; if (vcpu->arch.sie_block->gpsw.mask & PSW_MASK_PSTATE) return kvm_s390_inject_program_int(vcpu, PGM_PRIVILEGED_OP); - ga = kvm_s390_get_base_disp_rsy(vcpu); + ga = kvm_s390_get_base_disp_rsy(vcpu, &ar); if (ga & 7) return kvm_s390_inject_program_int(vcpu, PGM_SPECIFICATION); @@ -862,7 +902,7 @@ static int handle_lctlg(struct kvm_vcpu *vcpu) trace_kvm_s390_handle_lctl(vcpu, 1, reg1, reg3, ga); nr_regs = ((reg3 - reg1) & 0xf) + 1; - rc = read_guest(vcpu, ga, ctl_array, nr_regs * sizeof(u64)); + rc = read_guest(vcpu, ga, ar, ctl_array, nr_regs * sizeof(u64)); if (rc) return kvm_s390_inject_prog_cond(vcpu, rc); reg = reg1; @@ -884,13 +924,14 @@ static int handle_stctg(struct kvm_vcpu *vcpu) int reg, rc, nr_regs; u64 ctl_array[16]; u64 ga; + ar_t ar; vcpu->stat.instruction_stctg++; if (vcpu->arch.sie_block->gpsw.mask & PSW_MASK_PSTATE) return kvm_s390_inject_program_int(vcpu, PGM_PRIVILEGED_OP); - ga = kvm_s390_get_base_disp_rsy(vcpu); + ga = kvm_s390_get_base_disp_rsy(vcpu, &ar); if (ga & 7) return kvm_s390_inject_program_int(vcpu, PGM_SPECIFICATION); @@ -906,7 +947,7 @@ static int handle_stctg(struct kvm_vcpu *vcpu) break; reg = (reg + 1) % 16; } while (1); - rc = write_guest(vcpu, ga, ctl_array, nr_regs * sizeof(u64)); + rc = write_guest(vcpu, ga, ar, ctl_array, nr_regs * sizeof(u64)); return rc ? kvm_s390_inject_prog_cond(vcpu, rc) : 0; } @@ -931,13 +972,14 @@ static int handle_tprot(struct kvm_vcpu *vcpu) unsigned long hva, gpa; int ret = 0, cc = 0; bool writable; + ar_t ar; vcpu->stat.instruction_tprot++; if (vcpu->arch.sie_block->gpsw.mask & PSW_MASK_PSTATE) return kvm_s390_inject_program_int(vcpu, PGM_PRIVILEGED_OP); - kvm_s390_get_base_disp_sse(vcpu, &address1, &address2); + kvm_s390_get_base_disp_sse(vcpu, &address1, &address2, &ar, NULL); /* we only handle the Linux memory detection case: * access key == 0 @@ -946,11 +988,11 @@ static int handle_tprot(struct kvm_vcpu *vcpu) return -EOPNOTSUPP; if (vcpu->arch.sie_block->gpsw.mask & PSW_MASK_DAT) ipte_lock(vcpu); - ret = guest_translate_address(vcpu, address1, &gpa, 1); + ret = guest_translate_address(vcpu, address1, ar, &gpa, 1); if (ret == PGM_PROTECTION) { /* Write protected? Try again with read-only... */ cc = 1; - ret = guest_translate_address(vcpu, address1, &gpa, 0); + ret = guest_translate_address(vcpu, address1, ar, &gpa, 0); } if (ret) { if (ret == PGM_ADDRESSING || ret == PGM_TRANSLATION_SPEC) { diff --git a/arch/s390/kvm/sigp.c b/arch/s390/kvm/sigp.c index 23b1e86b2122..72e58bd2bee7 100644 --- a/arch/s390/kvm/sigp.c +++ b/arch/s390/kvm/sigp.c @@ -393,6 +393,9 @@ static int handle_sigp_order_in_user_space(struct kvm_vcpu *vcpu, u8 order_code) case SIGP_STORE_STATUS_AT_ADDRESS: vcpu->stat.instruction_sigp_store_status++; break; + case SIGP_STORE_ADDITIONAL_STATUS: + vcpu->stat.instruction_sigp_store_adtl_status++; + break; case SIGP_SET_PREFIX: vcpu->stat.instruction_sigp_prefix++; break; @@ -431,7 +434,7 @@ int kvm_s390_handle_sigp(struct kvm_vcpu *vcpu) if (vcpu->arch.sie_block->gpsw.mask & PSW_MASK_PSTATE) return kvm_s390_inject_program_int(vcpu, PGM_PRIVILEGED_OP); - order_code = kvm_s390_get_base_disp_rs(vcpu); + order_code = kvm_s390_get_base_disp_rs(vcpu, NULL); if (handle_sigp_order_in_user_space(vcpu, order_code)) return -EOPNOTSUPP; @@ -473,7 +476,7 @@ int kvm_s390_handle_sigp_pei(struct kvm_vcpu *vcpu) int r3 = vcpu->arch.sie_block->ipa & 0x000f; u16 cpu_addr = vcpu->run->s.regs.gprs[r3]; struct kvm_vcpu *dest_vcpu; - u8 order_code = kvm_s390_get_base_disp_rs(vcpu); + u8 order_code = kvm_s390_get_base_disp_rs(vcpu, NULL); trace_kvm_s390_handle_sigp_pei(vcpu, order_code, cpu_addr); diff --git a/arch/s390/pci/pci.c b/arch/s390/pci/pci.c index 753a56731951..f0b85443e060 100644 --- a/arch/s390/pci/pci.c +++ b/arch/s390/pci/pci.c @@ -287,7 +287,7 @@ void __iomem *pci_iomap_range(struct pci_dev *pdev, addr = ZPCI_IOMAP_ADDR_BASE | ((u64) idx << 48); return (void __iomem *) addr + offset; } -EXPORT_SYMBOL_GPL(pci_iomap_range); +EXPORT_SYMBOL(pci_iomap_range); void __iomem *pci_iomap(struct pci_dev *dev, int bar, unsigned long maxlen) { @@ -309,7 +309,7 @@ void pci_iounmap(struct pci_dev *pdev, void __iomem *addr) } spin_unlock(&zpci_iomap_lock); } -EXPORT_SYMBOL_GPL(pci_iounmap); +EXPORT_SYMBOL(pci_iounmap); static int pci_read(struct pci_bus *bus, unsigned int devfn, int where, int size, u32 *val) @@ -483,9 +483,8 @@ void arch_teardown_msi_irqs(struct pci_dev *pdev) airq_iv_free_bit(zpci_aisb_iv, zdev->aisb); } -static void zpci_map_resources(struct zpci_dev *zdev) +static void zpci_map_resources(struct pci_dev *pdev) { - struct pci_dev *pdev = zdev->pdev; resource_size_t len; int i; @@ -499,9 +498,8 @@ static void zpci_map_resources(struct zpci_dev *zdev) } } -static void zpci_unmap_resources(struct zpci_dev *zdev) +static void zpci_unmap_resources(struct pci_dev *pdev) { - struct pci_dev *pdev = zdev->pdev; resource_size_t len; int i; @@ -651,7 +649,7 @@ int pcibios_add_device(struct pci_dev *pdev) zdev->pdev = pdev; pdev->dev.groups = zpci_attr_groups; - zpci_map_resources(zdev); + zpci_map_resources(pdev); for (i = 0; i < PCI_BAR_COUNT; i++) { res = &pdev->resource[i]; @@ -663,6 +661,11 @@ int pcibios_add_device(struct pci_dev *pdev) return 0; } +void pcibios_release_device(struct pci_dev *pdev) +{ + zpci_unmap_resources(pdev); +} + int pcibios_enable_device(struct pci_dev *pdev, int mask) { struct zpci_dev *zdev = get_zdev(pdev); @@ -670,7 +673,6 @@ int pcibios_enable_device(struct pci_dev *pdev, int mask) zdev->pdev = pdev; zpci_debug_init_device(zdev); zpci_fmb_enable_device(zdev); - zpci_map_resources(zdev); return pci_enable_resources(pdev, mask); } @@ -679,7 +681,6 @@ void pcibios_disable_device(struct pci_dev *pdev) { struct zpci_dev *zdev = get_zdev(pdev); - zpci_unmap_resources(zdev); zpci_fmb_disable_device(zdev); zpci_debug_exit_device(zdev); zdev->pdev = NULL; @@ -688,7 +689,8 @@ void pcibios_disable_device(struct pci_dev *pdev) #ifdef CONFIG_HIBERNATE_CALLBACKS static int zpci_restore(struct device *dev) { - struct zpci_dev *zdev = get_zdev(to_pci_dev(dev)); + struct pci_dev *pdev = to_pci_dev(dev); + struct zpci_dev *zdev = get_zdev(pdev); int ret = 0; if (zdev->state != ZPCI_FN_STATE_ONLINE) @@ -698,7 +700,7 @@ static int zpci_restore(struct device *dev) if (ret) goto out; - zpci_map_resources(zdev); + zpci_map_resources(pdev); zpci_register_ioat(zdev, 0, zdev->start_dma + PAGE_OFFSET, zdev->start_dma + zdev->iommu_size - 1, (u64) zdev->dma_table); @@ -709,12 +711,14 @@ out: static int zpci_freeze(struct device *dev) { - struct zpci_dev *zdev = get_zdev(to_pci_dev(dev)); + struct pci_dev *pdev = to_pci_dev(dev); + struct zpci_dev *zdev = get_zdev(pdev); if (zdev->state != ZPCI_FN_STATE_ONLINE) return 0; zpci_unregister_ioat(zdev, 0); + zpci_unmap_resources(pdev); return clp_disable_fh(zdev); } diff --git a/arch/s390/pci/pci_mmio.c b/arch/s390/pci/pci_mmio.c index 8aa271b3d1ad..b1bb2b72302c 100644 --- a/arch/s390/pci/pci_mmio.c +++ b/arch/s390/pci/pci_mmio.c @@ -64,8 +64,7 @@ SYSCALL_DEFINE3(s390_pci_mmio_write, unsigned long, mmio_addr, if (copy_from_user(buf, user_buffer, length)) goto out; - memcpy_toio(io_addr, buf, length); - ret = 0; + ret = zpci_memcpy_toio(io_addr, buf, length); out: if (buf != local_buf) kfree(buf); @@ -98,16 +97,16 @@ SYSCALL_DEFINE3(s390_pci_mmio_read, unsigned long, mmio_addr, goto out; io_addr = (void __iomem *)((pfn << PAGE_SHIFT) | (mmio_addr & ~PAGE_MASK)); - ret = -EFAULT; - if ((unsigned long) io_addr < ZPCI_IOMAP_ADDR_BASE) + if ((unsigned long) io_addr < ZPCI_IOMAP_ADDR_BASE) { + ret = -EFAULT; goto out; - - memcpy_fromio(buf, io_addr, length); - - if (copy_to_user(user_buffer, buf, length)) + } + ret = zpci_memcpy_fromio(buf, io_addr, length); + if (ret) goto out; + if (copy_to_user(user_buffer, buf, length)) + ret = -EFAULT; - ret = 0; out: if (buf != local_buf) kfree(buf); diff --git a/arch/sparc/Kconfig b/arch/sparc/Kconfig index 96ac69c5eba0..efb00ec75805 100644 --- a/arch/sparc/Kconfig +++ b/arch/sparc/Kconfig @@ -86,6 +86,9 @@ config ARCH_DEFCONFIG default "arch/sparc/configs/sparc32_defconfig" if SPARC32 default "arch/sparc/configs/sparc64_defconfig" if SPARC64 +config ARCH_PROC_KCORE_TEXT + def_bool y + config IOMMU_HELPER bool default y if SPARC64 diff --git a/arch/sparc/include/asm/hypervisor.h b/arch/sparc/include/asm/hypervisor.h index 4f6725ff4c33..f5b6537306f0 100644 --- a/arch/sparc/include/asm/hypervisor.h +++ b/arch/sparc/include/asm/hypervisor.h @@ -2957,6 +2957,17 @@ unsigned long sun4v_t5_set_perfreg(unsigned long reg_num, unsigned long reg_val); #endif + +#define HV_FAST_M7_GET_PERFREG 0x43 +#define HV_FAST_M7_SET_PERFREG 0x44 + +#ifndef __ASSEMBLY__ +unsigned long sun4v_m7_get_perfreg(unsigned long reg_num, + unsigned long *reg_val); +unsigned long sun4v_m7_set_perfreg(unsigned long reg_num, + unsigned long reg_val); +#endif + /* Function numbers for HV_CORE_TRAP. */ #define HV_CORE_SET_VER 0x00 #define HV_CORE_PUTCHAR 0x01 @@ -2981,6 +2992,7 @@ unsigned long sun4v_t5_set_perfreg(unsigned long reg_num, #define HV_GRP_SDIO 0x0108 #define HV_GRP_SDIO_ERR 0x0109 #define HV_GRP_REBOOT_DATA 0x0110 +#define HV_GRP_M7_PERF 0x0114 #define HV_GRP_NIAG_PERF 0x0200 #define HV_GRP_FIRE_PERF 0x0201 #define HV_GRP_N2_CPU 0x0202 diff --git a/arch/sparc/include/asm/io_64.h b/arch/sparc/include/asm/io_64.h index 9b672be70dda..50d4840d9aeb 100644 --- a/arch/sparc/include/asm/io_64.h +++ b/arch/sparc/include/asm/io_64.h @@ -407,16 +407,16 @@ static inline void iounmap(volatile void __iomem *addr) { } -#define ioread8(X) readb(X) -#define ioread16(X) readw(X) -#define ioread16be(X) __raw_readw(X) -#define ioread32(X) readl(X) -#define ioread32be(X) __raw_readl(X) -#define iowrite8(val,X) writeb(val,X) -#define iowrite16(val,X) writew(val,X) -#define iowrite16be(val,X) __raw_writew(val,X) -#define iowrite32(val,X) writel(val,X) -#define iowrite32be(val,X) __raw_writel(val,X) +#define ioread8 readb +#define ioread16 readw +#define ioread16be __raw_readw +#define ioread32 readl +#define ioread32be __raw_readl +#define iowrite8 writeb +#define iowrite16 writew +#define iowrite16be __raw_writew +#define iowrite32 writel +#define iowrite32be __raw_writel /* Create a virtual mapping cookie for an IO port range */ void __iomem *ioport_map(unsigned long port, unsigned int nr); diff --git a/arch/sparc/include/asm/jump_label.h b/arch/sparc/include/asm/jump_label.h index ec2e2e2aba7d..cc9b04a2b11b 100644 --- a/arch/sparc/include/asm/jump_label.h +++ b/arch/sparc/include/asm/jump_label.h @@ -1,7 +1,7 @@ #ifndef _ASM_SPARC_JUMP_LABEL_H #define _ASM_SPARC_JUMP_LABEL_H -#ifdef __KERNEL__ +#ifndef __ASSEMBLY__ #include <linux/types.h> @@ -22,8 +22,6 @@ l_yes: return true; } -#endif /* __KERNEL__ */ - typedef u32 jump_label_t; struct jump_entry { @@ -32,4 +30,5 @@ struct jump_entry { jump_label_t key; }; +#endif /* __ASSEMBLY__ */ #endif diff --git a/arch/sparc/include/asm/starfire.h b/arch/sparc/include/asm/starfire.h index c100dc27a0a9..176fa0ad19f1 100644 --- a/arch/sparc/include/asm/starfire.h +++ b/arch/sparc/include/asm/starfire.h @@ -12,7 +12,6 @@ extern int this_is_starfire; void check_if_starfire(void); -int starfire_hard_smp_processor_id(void); void starfire_hookup(int); unsigned int starfire_translate(unsigned long imap, unsigned int upaid); diff --git a/arch/sparc/kernel/entry.h b/arch/sparc/kernel/entry.h index 88d322b67fac..07cc49e541f4 100644 --- a/arch/sparc/kernel/entry.h +++ b/arch/sparc/kernel/entry.h @@ -98,11 +98,7 @@ void sun4v_do_mna(struct pt_regs *regs, void do_privop(struct pt_regs *regs); void do_privact(struct pt_regs *regs); void do_cee(struct pt_regs *regs); -void do_cee_tl1(struct pt_regs *regs); -void do_dae_tl1(struct pt_regs *regs); -void do_iae_tl1(struct pt_regs *regs); void do_div0_tl1(struct pt_regs *regs); -void do_fpdis_tl1(struct pt_regs *regs); void do_fpieee_tl1(struct pt_regs *regs); void do_fpother_tl1(struct pt_regs *regs); void do_ill_tl1(struct pt_regs *regs); diff --git a/arch/sparc/kernel/hvapi.c b/arch/sparc/kernel/hvapi.c index 5c55145bfbf0..662500fa555f 100644 --- a/arch/sparc/kernel/hvapi.c +++ b/arch/sparc/kernel/hvapi.c @@ -48,6 +48,7 @@ static struct api_info api_table[] = { { .group = HV_GRP_VT_CPU, }, { .group = HV_GRP_T5_CPU, }, { .group = HV_GRP_DIAG, .flags = FLAG_PRE_API }, + { .group = HV_GRP_M7_PERF, }, }; static DEFINE_SPINLOCK(hvapi_lock); diff --git a/arch/sparc/kernel/hvcalls.S b/arch/sparc/kernel/hvcalls.S index caedf8320416..afbaba52d2f1 100644 --- a/arch/sparc/kernel/hvcalls.S +++ b/arch/sparc/kernel/hvcalls.S @@ -837,3 +837,19 @@ ENTRY(sun4v_t5_set_perfreg) retl nop ENDPROC(sun4v_t5_set_perfreg) + +ENTRY(sun4v_m7_get_perfreg) + mov %o1, %o4 + mov HV_FAST_M7_GET_PERFREG, %o5 + ta HV_FAST_TRAP + stx %o1, [%o4] + retl + nop +ENDPROC(sun4v_m7_get_perfreg) + +ENTRY(sun4v_m7_set_perfreg) + mov HV_FAST_M7_SET_PERFREG, %o5 + ta HV_FAST_TRAP + retl + nop +ENDPROC(sun4v_m7_set_perfreg) diff --git a/arch/sparc/kernel/pci.c b/arch/sparc/kernel/pci.c index 9ce5afe167ff..b36365f49478 100644 --- a/arch/sparc/kernel/pci.c +++ b/arch/sparc/kernel/pci.c @@ -639,10 +639,7 @@ static void pci_claim_bus_resources(struct pci_bus *bus) (unsigned long long)r->end, (unsigned int)r->flags); - if (pci_claim_resource(dev, i) == 0) - continue; - - pci_claim_bridge_resource(dev, i); + pci_claim_resource(dev, i); } } diff --git a/arch/sparc/kernel/pcr.c b/arch/sparc/kernel/pcr.c index 7e967c8018c8..eb978c77c76a 100644 --- a/arch/sparc/kernel/pcr.c +++ b/arch/sparc/kernel/pcr.c @@ -217,6 +217,31 @@ static const struct pcr_ops n5_pcr_ops = { .pcr_nmi_disable = PCR_N4_PICNPT, }; +static u64 m7_pcr_read(unsigned long reg_num) +{ + unsigned long val; + + (void) sun4v_m7_get_perfreg(reg_num, &val); + + return val; +} + +static void m7_pcr_write(unsigned long reg_num, u64 val) +{ + (void) sun4v_m7_set_perfreg(reg_num, val); +} + +static const struct pcr_ops m7_pcr_ops = { + .read_pcr = m7_pcr_read, + .write_pcr = m7_pcr_write, + .read_pic = n4_pic_read, + .write_pic = n4_pic_write, + .nmi_picl_value = n4_picl_value, + .pcr_nmi_enable = (PCR_N4_PICNPT | PCR_N4_STRACE | + PCR_N4_UTRACE | PCR_N4_TOE | + (26 << PCR_N4_SL_SHIFT)), + .pcr_nmi_disable = PCR_N4_PICNPT, +}; static unsigned long perf_hsvc_group; static unsigned long perf_hsvc_major; @@ -248,6 +273,10 @@ static int __init register_perf_hsvc(void) perf_hsvc_group = HV_GRP_T5_CPU; break; + case SUN4V_CHIP_SPARC_M7: + perf_hsvc_group = HV_GRP_M7_PERF; + break; + default: return -ENODEV; } @@ -293,6 +322,10 @@ static int __init setup_sun4v_pcr_ops(void) pcr_ops = &n5_pcr_ops; break; + case SUN4V_CHIP_SPARC_M7: + pcr_ops = &m7_pcr_ops; + break; + default: ret = -ENODEV; break; diff --git a/arch/sparc/kernel/perf_event.c b/arch/sparc/kernel/perf_event.c index 46a5e4508752..86eebfa3b158 100644 --- a/arch/sparc/kernel/perf_event.c +++ b/arch/sparc/kernel/perf_event.c @@ -792,6 +792,42 @@ static const struct sparc_pmu niagara4_pmu = { .num_pic_regs = 4, }; +static void sparc_m7_write_pmc(int idx, u64 val) +{ + u64 pcr; + + pcr = pcr_ops->read_pcr(idx); + /* ensure ov and ntc are reset */ + pcr &= ~(PCR_N4_OV | PCR_N4_NTC); + + pcr_ops->write_pic(idx, val & 0xffffffff); + + pcr_ops->write_pcr(idx, pcr); +} + +static const struct sparc_pmu sparc_m7_pmu = { + .event_map = niagara4_event_map, + .cache_map = &niagara4_cache_map, + .max_events = ARRAY_SIZE(niagara4_perfmon_event_map), + .read_pmc = sparc_vt_read_pmc, + .write_pmc = sparc_m7_write_pmc, + .upper_shift = 5, + .lower_shift = 5, + .event_mask = 0x7ff, + .user_bit = PCR_N4_UTRACE, + .priv_bit = PCR_N4_STRACE, + + /* We explicitly don't support hypervisor tracing. */ + .hv_bit = 0, + + .irq_bit = PCR_N4_TOE, + .upper_nop = 0, + .lower_nop = 0, + .flags = 0, + .max_hw_events = 4, + .num_pcrs = 4, + .num_pic_regs = 4, +}; static const struct sparc_pmu *sparc_pmu __read_mostly; static u64 event_encoding(u64 event_id, int idx) @@ -960,6 +996,8 @@ out: cpuc->pcr[0] |= cpuc->event[0]->hw.config_base; } +static void sparc_pmu_start(struct perf_event *event, int flags); + /* On this PMU each PIC has it's own PCR control register. */ static void calculate_multiple_pcrs(struct cpu_hw_events *cpuc) { @@ -972,20 +1010,13 @@ static void calculate_multiple_pcrs(struct cpu_hw_events *cpuc) struct perf_event *cp = cpuc->event[i]; struct hw_perf_event *hwc = &cp->hw; int idx = hwc->idx; - u64 enc; if (cpuc->current_idx[i] != PIC_NO_INDEX) continue; - sparc_perf_event_set_period(cp, hwc, idx); cpuc->current_idx[i] = idx; - enc = perf_event_get_enc(cpuc->events[i]); - cpuc->pcr[idx] &= ~mask_for_index(idx); - if (hwc->state & PERF_HES_STOPPED) - cpuc->pcr[idx] |= nop_for_index(idx); - else - cpuc->pcr[idx] |= event_encoding(enc, idx); + sparc_pmu_start(cp, PERF_EF_RELOAD); } out: for (i = 0; i < cpuc->n_events; i++) { @@ -1101,7 +1132,6 @@ static void sparc_pmu_del(struct perf_event *event, int _flags) int i; local_irq_save(flags); - perf_pmu_disable(event->pmu); for (i = 0; i < cpuc->n_events; i++) { if (event == cpuc->event[i]) { @@ -1127,7 +1157,6 @@ static void sparc_pmu_del(struct perf_event *event, int _flags) } } - perf_pmu_enable(event->pmu); local_irq_restore(flags); } @@ -1361,7 +1390,6 @@ static int sparc_pmu_add(struct perf_event *event, int ef_flags) unsigned long flags; local_irq_save(flags); - perf_pmu_disable(event->pmu); n0 = cpuc->n_events; if (n0 >= sparc_pmu->max_hw_events) @@ -1394,7 +1422,6 @@ nocheck: ret = 0; out: - perf_pmu_enable(event->pmu); local_irq_restore(flags); return ret; } @@ -1667,6 +1694,10 @@ static bool __init supported_pmu(void) sparc_pmu = &niagara4_pmu; return true; } + if (!strcmp(sparc_pmu_type, "sparc-m7")) { + sparc_pmu = &sparc_m7_pmu; + return true; + } return false; } diff --git a/arch/sparc/kernel/process_64.c b/arch/sparc/kernel/process_64.c index 0be7bf978cb1..46a59643bb1c 100644 --- a/arch/sparc/kernel/process_64.c +++ b/arch/sparc/kernel/process_64.c @@ -287,6 +287,8 @@ void arch_trigger_all_cpu_backtrace(bool include_self) printk(" TPC[%lx] O7[%lx] I7[%lx] RPC[%lx]\n", gp->tpc, gp->o7, gp->i7, gp->rpc); } + + touch_nmi_watchdog(); } memset(global_cpu_snapshot, 0, sizeof(global_cpu_snapshot)); @@ -362,6 +364,8 @@ static void pmu_snapshot_all_cpus(void) (cpu == this_cpu ? '*' : ' '), cpu, pp->pcr[0], pp->pcr[1], pp->pcr[2], pp->pcr[3], pp->pic[0], pp->pic[1], pp->pic[2], pp->pic[3]); + + touch_nmi_watchdog(); } memset(global_cpu_snapshot, 0, sizeof(global_cpu_snapshot)); diff --git a/arch/sparc/kernel/smp_64.c b/arch/sparc/kernel/smp_64.c index da6f1a7fc4db..61139d9924ca 100644 --- a/arch/sparc/kernel/smp_64.c +++ b/arch/sparc/kernel/smp_64.c @@ -1406,11 +1406,32 @@ void __irq_entry smp_receive_signal_client(int irq, struct pt_regs *regs) scheduler_ipi(); } -/* This is a nop because we capture all other cpus - * anyways when making the PROM active. - */ +static void stop_this_cpu(void *dummy) +{ + prom_stopself(); +} + void smp_send_stop(void) { + int cpu; + + if (tlb_type == hypervisor) { + for_each_online_cpu(cpu) { + if (cpu == smp_processor_id()) + continue; +#ifdef CONFIG_SUN_LDOMS + if (ldom_domaining_enabled) { + unsigned long hv_err; + hv_err = sun4v_cpu_stop(cpu); + if (hv_err) + printk(KERN_ERR "sun4v_cpu_stop() " + "failed err=%lu\n", hv_err); + } else +#endif + prom_stopcpu_cpuid(cpu); + } + } else + smp_call_function(stop_this_cpu, NULL, 0); } /** diff --git a/arch/sparc/kernel/starfire.c b/arch/sparc/kernel/starfire.c index 82281a566bb8..167fdfd9c837 100644 --- a/arch/sparc/kernel/starfire.c +++ b/arch/sparc/kernel/starfire.c @@ -28,11 +28,6 @@ void check_if_starfire(void) this_is_starfire = 1; } -int starfire_hard_smp_processor_id(void) -{ - return upa_readl(0x1fff40000d0UL); -} - /* * Each Starfire board has 32 registers which perform translation * and delivery of traditional interrupt packets into the extended diff --git a/arch/sparc/kernel/sys_sparc_64.c b/arch/sparc/kernel/sys_sparc_64.c index c85403d0496c..30e7ddb27a3a 100644 --- a/arch/sparc/kernel/sys_sparc_64.c +++ b/arch/sparc/kernel/sys_sparc_64.c @@ -333,7 +333,7 @@ SYSCALL_DEFINE6(sparc_ipc, unsigned int, call, int, first, unsigned long, second long err; /* No need for backward compatibility. We can start fresh... */ - if (call <= SEMCTL) { + if (call <= SEMTIMEDOP) { switch (call) { case SEMOP: err = sys_semtimedop(first, ptr, diff --git a/arch/sparc/kernel/time_32.c b/arch/sparc/kernel/time_32.c index 2f80d23a0a44..18147a5523d9 100644 --- a/arch/sparc/kernel/time_32.c +++ b/arch/sparc/kernel/time_32.c @@ -181,17 +181,13 @@ static struct clocksource timer_cs = { .rating = 100, .read = timer_cs_read, .mask = CLOCKSOURCE_MASK(64), - .shift = 2, .flags = CLOCK_SOURCE_IS_CONTINUOUS, }; static __init int setup_timer_cs(void) { timer_cs_enabled = 1; - timer_cs.mult = clocksource_hz2mult(sparc_config.clock_rate, - timer_cs.shift); - - return clocksource_register(&timer_cs); + return clocksource_register_hz(&timer_cs, sparc_config.clock_rate); } #ifdef CONFIG_SMP diff --git a/arch/sparc/kernel/traps_64.c b/arch/sparc/kernel/traps_64.c index a27651e866e7..0e699745d643 100644 --- a/arch/sparc/kernel/traps_64.c +++ b/arch/sparc/kernel/traps_64.c @@ -2427,6 +2427,8 @@ void __noreturn die_if_kernel(char *str, struct pt_regs *regs) } user_instruction_dump ((unsigned int __user *) regs->tpc); } + if (panic_on_oops) + panic("Fatal exception"); if (regs->tstate & TSTATE_PRIV) do_exit(SIGKILL); do_exit(SIGSEGV); @@ -2564,27 +2566,6 @@ void do_cee(struct pt_regs *regs) die_if_kernel("TL0: Cache Error Exception", regs); } -void do_cee_tl1(struct pt_regs *regs) -{ - exception_enter(); - dump_tl1_traplog((struct tl1_traplog *)(regs + 1)); - die_if_kernel("TL1: Cache Error Exception", regs); -} - -void do_dae_tl1(struct pt_regs *regs) -{ - exception_enter(); - dump_tl1_traplog((struct tl1_traplog *)(regs + 1)); - die_if_kernel("TL1: Data Access Exception", regs); -} - -void do_iae_tl1(struct pt_regs *regs) -{ - exception_enter(); - dump_tl1_traplog((struct tl1_traplog *)(regs + 1)); - die_if_kernel("TL1: Instruction Access Exception", regs); -} - void do_div0_tl1(struct pt_regs *regs) { exception_enter(); @@ -2592,13 +2573,6 @@ void do_div0_tl1(struct pt_regs *regs) die_if_kernel("TL1: DIV0 Exception", regs); } -void do_fpdis_tl1(struct pt_regs *regs) -{ - exception_enter(); - dump_tl1_traplog((struct tl1_traplog *)(regs + 1)); - die_if_kernel("TL1: FPU Disabled", regs); -} - void do_fpieee_tl1(struct pt_regs *regs) { exception_enter(); diff --git a/arch/sparc/lib/memmove.S b/arch/sparc/lib/memmove.S index b7f6334e159f..857ad4f8905f 100644 --- a/arch/sparc/lib/memmove.S +++ b/arch/sparc/lib/memmove.S @@ -8,9 +8,11 @@ .text ENTRY(memmove) /* o0=dst o1=src o2=len */ - mov %o0, %g1 + brz,pn %o2, 99f + mov %o0, %g1 + cmp %o0, %o1 - bleu,pt %xcc, memcpy + bleu,pt %xcc, 2f add %o1, %o2, %g7 cmp %g7, %o0 bleu,pt %xcc, memcpy @@ -24,7 +26,34 @@ ENTRY(memmove) /* o0=dst o1=src o2=len */ stb %g7, [%o0] bne,pt %icc, 1b sub %o0, 1, %o0 - +99: retl mov %g1, %o0 + + /* We can't just call memcpy for these memmove cases. On some + * chips the memcpy uses cache initializing stores and when dst + * and src are close enough, those can clobber the source data + * before we've loaded it in. + */ +2: or %o0, %o1, %g7 + or %o2, %g7, %g7 + andcc %g7, 0x7, %g0 + bne,pn %xcc, 4f + nop + +3: ldx [%o1], %g7 + add %o1, 8, %o1 + subcc %o2, 8, %o2 + add %o0, 8, %o0 + bne,pt %icc, 3b + stx %g7, [%o0 - 0x8] + ba,a,pt %xcc, 99b + +4: ldub [%o1], %g7 + add %o1, 1, %o1 + subcc %o2, 1, %o2 + add %o0, 1, %o0 + bne,pt %icc, 4b + stb %g7, [%o0 - 0x1] + ba,a,pt %xcc, 99b ENDPROC(memmove) diff --git a/arch/sparc/mm/init_64.c b/arch/sparc/mm/init_64.c index 3ea267c53320..4ca0d6ba5ec8 100644 --- a/arch/sparc/mm/init_64.c +++ b/arch/sparc/mm/init_64.c @@ -2820,7 +2820,7 @@ static int __init report_memory(void) return 0; } -device_initcall(report_memory); +arch_initcall(report_memory); #ifdef CONFIG_SMP #define do_flush_tlb_kernel_range smp_flush_tlb_kernel_range diff --git a/arch/tile/kernel/time.c b/arch/tile/kernel/time.c index d412b0856c0a..00178ecf9aea 100644 --- a/arch/tile/kernel/time.c +++ b/arch/tile/kernel/time.c @@ -257,34 +257,34 @@ void update_vsyscall_tz(void) void update_vsyscall(struct timekeeper *tk) { - if (tk->tkr.clock != &cycle_counter_cs) + if (tk->tkr_mono.clock != &cycle_counter_cs) return; write_seqcount_begin(&vdso_data->tb_seq); - vdso_data->cycle_last = tk->tkr.cycle_last; - vdso_data->mask = tk->tkr.mask; - vdso_data->mult = tk->tkr.mult; - vdso_data->shift = tk->tkr.shift; + vdso_data->cycle_last = tk->tkr_mono.cycle_last; + vdso_data->mask = tk->tkr_mono.mask; + vdso_data->mult = tk->tkr_mono.mult; + vdso_data->shift = tk->tkr_mono.shift; vdso_data->wall_time_sec = tk->xtime_sec; - vdso_data->wall_time_snsec = tk->tkr.xtime_nsec; + vdso_data->wall_time_snsec = tk->tkr_mono.xtime_nsec; vdso_data->monotonic_time_sec = tk->xtime_sec + tk->wall_to_monotonic.tv_sec; - vdso_data->monotonic_time_snsec = tk->tkr.xtime_nsec + vdso_data->monotonic_time_snsec = tk->tkr_mono.xtime_nsec + ((u64)tk->wall_to_monotonic.tv_nsec - << tk->tkr.shift); + << tk->tkr_mono.shift); while (vdso_data->monotonic_time_snsec >= - (((u64)NSEC_PER_SEC) << tk->tkr.shift)) { + (((u64)NSEC_PER_SEC) << tk->tkr_mono.shift)) { vdso_data->monotonic_time_snsec -= - ((u64)NSEC_PER_SEC) << tk->tkr.shift; + ((u64)NSEC_PER_SEC) << tk->tkr_mono.shift; vdso_data->monotonic_time_sec++; } vdso_data->wall_time_coarse_sec = tk->xtime_sec; - vdso_data->wall_time_coarse_nsec = (long)(tk->tkr.xtime_nsec >> - tk->tkr.shift); + vdso_data->wall_time_coarse_nsec = (long)(tk->tkr_mono.xtime_nsec >> + tk->tkr_mono.shift); vdso_data->monotonic_time_coarse_sec = vdso_data->wall_time_coarse_sec + tk->wall_to_monotonic.tv_sec; diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig index c2fb8a87dccb..867bc5bea8dc 100644 --- a/arch/x86/Kconfig +++ b/arch/x86/Kconfig @@ -235,12 +235,10 @@ config ARCH_WANT_GENERAL_HUGETLB def_bool y config ZONE_DMA32 - bool - default X86_64 + def_bool y if X86_64 config AUDIT_ARCH - bool - default X86_64 + def_bool y if X86_64 config ARCH_SUPPORTS_OPTIMIZED_INLINING def_bool y @@ -499,6 +497,7 @@ config X86_INTEL_QUARK depends on X86_IO_APIC select IOSF_MBI select INTEL_IMR + select COMMON_CLK ---help--- Select to include support for Quark X1000 SoC. Say Y here if you have a Quark based system such as the Arduino @@ -890,7 +889,8 @@ config UP_LATE_INIT depends on !SMP && X86_LOCAL_APIC config X86_UP_APIC - bool "Local APIC support on uniprocessors" + bool "Local APIC support on uniprocessors" if !PCI_MSI + default PCI_MSI depends on X86_32 && !SMP && !X86_32_NON_STANDARD ---help--- A local APIC (Advanced Programmable Interrupt Controller) is an @@ -902,10 +902,6 @@ config X86_UP_APIC performance counters), and the NMI watchdog which detects hard lockups. -config X86_UP_APIC_MSI - def_bool y - select X86_UP_APIC if X86_32 && !SMP && !X86_32_NON_STANDARD && PCI_MSI - config X86_UP_IOAPIC bool "IO-APIC support on uniprocessors" depends on X86_UP_APIC @@ -924,8 +920,8 @@ config X86_LOCAL_APIC select GENERIC_IRQ_LEGACY_ALLOC_HWIRQ config X86_IO_APIC - def_bool X86_64 || SMP || X86_32_NON_STANDARD || X86_UP_IOAPIC - depends on X86_LOCAL_APIC + def_bool y + depends on X86_LOCAL_APIC || X86_UP_IOAPIC select IRQ_DOMAIN config X86_REROUTE_FOR_BROKEN_BOOT_IRQS @@ -1144,10 +1140,10 @@ config MICROCODE_OLD_INTERFACE depends on MICROCODE config MICROCODE_INTEL_EARLY - def_bool n + bool config MICROCODE_AMD_EARLY - def_bool n + bool config MICROCODE_EARLY bool "Early load microcode" @@ -1746,14 +1742,11 @@ config KEXEC_VERIFY_SIG depends on KEXEC_FILE ---help--- This option makes kernel signature verification mandatory for - kexec_file_load() syscall. If kernel is signature can not be - verified, kexec_file_load() will fail. - - This option enforces signature verification at generic level. - One needs to enable signature verification for type of kernel - image being loaded to make sure it works. For example, enable - bzImage signature verification option to be able to load and - verify signatures of bzImage. Otherwise kernel loading will fail. + the kexec_file_load() syscall. + + In addition to that option, you need to enable signature + verification for the corresponding kernel image type being + loaded in order for this to work. config KEXEC_BZIMAGE_VERIFY_SIG bool "Enable bzImage signature verification support" diff --git a/arch/x86/boot/compressed/aslr.c b/arch/x86/boot/compressed/aslr.c index 7083c16cccba..d7b1f655b3ef 100644 --- a/arch/x86/boot/compressed/aslr.c +++ b/arch/x86/boot/compressed/aslr.c @@ -14,13 +14,6 @@ static const char build_str[] = UTS_RELEASE " (" LINUX_COMPILE_BY "@" LINUX_COMPILE_HOST ") (" LINUX_COMPILER ") " UTS_VERSION; -struct kaslr_setup_data { - __u64 next; - __u32 type; - __u32 len; - __u8 data[1]; -} kaslr_setup_data; - #define I8254_PORT_CONTROL 0x43 #define I8254_PORT_COUNTER0 0x40 #define I8254_CMD_READBACK 0xC0 @@ -302,28 +295,7 @@ static unsigned long find_random_addr(unsigned long minimum, return slots_fetch_random(); } -static void add_kaslr_setup_data(struct boot_params *params, __u8 enabled) -{ - struct setup_data *data; - - kaslr_setup_data.type = SETUP_KASLR; - kaslr_setup_data.len = 1; - kaslr_setup_data.next = 0; - kaslr_setup_data.data[0] = enabled; - - data = (struct setup_data *)(unsigned long)params->hdr.setup_data; - - while (data && data->next) - data = (struct setup_data *)(unsigned long)data->next; - - if (data) - data->next = (unsigned long)&kaslr_setup_data; - else - params->hdr.setup_data = (unsigned long)&kaslr_setup_data; - -} - -unsigned char *choose_kernel_location(struct boot_params *params, +unsigned char *choose_kernel_location(struct boot_params *boot_params, unsigned char *input, unsigned long input_size, unsigned char *output, @@ -335,17 +307,16 @@ unsigned char *choose_kernel_location(struct boot_params *params, #ifdef CONFIG_HIBERNATION if (!cmdline_find_option_bool("kaslr")) { debug_putstr("KASLR disabled by default...\n"); - add_kaslr_setup_data(params, 0); goto out; } #else if (cmdline_find_option_bool("nokaslr")) { debug_putstr("KASLR disabled by cmdline...\n"); - add_kaslr_setup_data(params, 0); goto out; } #endif - add_kaslr_setup_data(params, 1); + + boot_params->hdr.loadflags |= KASLR_FLAG; /* Record the various known unsafe memory ranges. */ mem_avoid_init((unsigned long)input, input_size, diff --git a/arch/x86/boot/compressed/head_32.S b/arch/x86/boot/compressed/head_32.S index 1d7fbbcc196d..8ef964ddc18e 100644 --- a/arch/x86/boot/compressed/head_32.S +++ b/arch/x86/boot/compressed/head_32.S @@ -29,6 +29,7 @@ #include <asm/page_types.h> #include <asm/boot.h> #include <asm/asm-offsets.h> +#include <asm/bootparam.h> __HEAD ENTRY(startup_32) @@ -102,7 +103,7 @@ preferred_addr: * Test KEEP_SEGMENTS flag to see if the bootloader is asking * us to not reload segments */ - testb $(1<<6), BP_loadflags(%esi) + testb $KEEP_SEGMENTS, BP_loadflags(%esi) jnz 1f cli diff --git a/arch/x86/boot/compressed/head_64.S b/arch/x86/boot/compressed/head_64.S index 6b1766c6c082..b0c0d16ef58d 100644 --- a/arch/x86/boot/compressed/head_64.S +++ b/arch/x86/boot/compressed/head_64.S @@ -31,6 +31,7 @@ #include <asm/msr.h> #include <asm/processor-flags.h> #include <asm/asm-offsets.h> +#include <asm/bootparam.h> __HEAD .code32 @@ -46,7 +47,7 @@ ENTRY(startup_32) * Test KEEP_SEGMENTS flag to see if the bootloader is asking * us to not reload segments */ - testb $(1<<6), BP_loadflags(%esi) + testb $KEEP_SEGMENTS, BP_loadflags(%esi) jnz 1f cli @@ -164,7 +165,7 @@ ENTRY(startup_32) /* After gdt is loaded */ xorl %eax, %eax lldt %ax - movl $0x20, %eax + movl $__BOOT_TSS, %eax ltr %ax /* diff --git a/arch/x86/boot/compressed/misc.c b/arch/x86/boot/compressed/misc.c index 5903089c818f..a107b935e22f 100644 --- a/arch/x86/boot/compressed/misc.c +++ b/arch/x86/boot/compressed/misc.c @@ -377,6 +377,9 @@ asmlinkage __visible void *decompress_kernel(void *rmode, memptr heap, real_mode = rmode; + /* Clear it for solely in-kernel use */ + real_mode->hdr.loadflags &= ~KASLR_FLAG; + sanitize_boot_params(real_mode); if (real_mode->screen_info.orig_video_mode == 7) { @@ -401,8 +404,7 @@ asmlinkage __visible void *decompress_kernel(void *rmode, memptr heap, * the entire decompressed kernel plus relocation table, or the * entire decompressed kernel plus .bss and .brk sections. */ - output = choose_kernel_location(real_mode, input_data, input_len, - output, + output = choose_kernel_location(real_mode, input_data, input_len, output, output_len > run_size ? output_len : run_size); diff --git a/arch/x86/boot/compressed/misc.h b/arch/x86/boot/compressed/misc.h index ee3576b2666b..89dd0d78013a 100644 --- a/arch/x86/boot/compressed/misc.h +++ b/arch/x86/boot/compressed/misc.h @@ -57,7 +57,7 @@ int cmdline_find_option_bool(const char *option); #if CONFIG_RANDOMIZE_BASE /* aslr.c */ -unsigned char *choose_kernel_location(struct boot_params *params, +unsigned char *choose_kernel_location(struct boot_params *boot_params, unsigned char *input, unsigned long input_size, unsigned char *output, @@ -66,7 +66,7 @@ unsigned char *choose_kernel_location(struct boot_params *params, bool has_cpuflag(int flag); #else static inline -unsigned char *choose_kernel_location(struct boot_params *params, +unsigned char *choose_kernel_location(struct boot_params *boot_params, unsigned char *input, unsigned long input_size, unsigned char *output, diff --git a/arch/x86/boot/string.c b/arch/x86/boot/string.c index 493f3fd9f139..318b8465d302 100644 --- a/arch/x86/boot/string.c +++ b/arch/x86/boot/string.c @@ -30,7 +30,7 @@ int strcmp(const char *str1, const char *str2) int delta = 0; while (*s1 || *s2) { - delta = *s2 - *s1; + delta = *s1 - *s2; if (delta) return delta; s1++; diff --git a/arch/x86/boot/video-mode.c b/arch/x86/boot/video-mode.c index 748e8d06290a..aa8a96b052e3 100644 --- a/arch/x86/boot/video-mode.c +++ b/arch/x86/boot/video-mode.c @@ -22,10 +22,8 @@ /* * Common variables */ -int adapter; /* 0=CGA/MDA/HGC, 1=EGA, 2=VGA+ */ -u16 video_segment; +int adapter; /* 0=CGA/MDA/HGC, 1=EGA, 2=VGA+ */ int force_x, force_y; /* Don't query the BIOS for cols/rows */ - int do_restore; /* Screen contents changed during mode flip */ int graphic_mode; /* Graphic mode with linear frame buffer */ diff --git a/arch/x86/boot/video.c b/arch/x86/boot/video.c index 43eda284d27f..05111bb8d018 100644 --- a/arch/x86/boot/video.c +++ b/arch/x86/boot/video.c @@ -17,6 +17,8 @@ #include "video.h" #include "vesa.h" +static u16 video_segment; + static void store_cursor_position(void) { struct biosregs ireg, oreg; diff --git a/arch/x86/boot/video.h b/arch/x86/boot/video.h index 0bb25491262d..b54e0328c449 100644 --- a/arch/x86/boot/video.h +++ b/arch/x86/boot/video.h @@ -91,7 +91,6 @@ int mode_defined(u16 mode); /* video.c */ #define ADAPTER_VGA 2 extern int adapter; -extern u16 video_segment; extern int force_x, force_y; /* Don't query the BIOS for cols/rows */ extern int do_restore; /* Restore screen contents */ extern int graphic_mode; /* Graphics mode with linear frame buffer */ diff --git a/arch/x86/configs/i386_defconfig b/arch/x86/configs/i386_defconfig index 419819d6dab3..aaa1118bf01e 100644 --- a/arch/x86/configs/i386_defconfig +++ b/arch/x86/configs/i386_defconfig @@ -248,7 +248,7 @@ CONFIG_USB=y CONFIG_USB_ANNOUNCE_NEW_DEVICES=y CONFIG_USB_MON=y CONFIG_USB_EHCI_HCD=y -# CONFIG_USB_EHCI_TT_NEWSCHED is not set +CONFIG_USB_EHCI_TT_NEWSCHED=y CONFIG_USB_OHCI_HCD=y CONFIG_USB_UHCI_HCD=y CONFIG_USB_PRINTER=y diff --git a/arch/x86/configs/x86_64_defconfig b/arch/x86/configs/x86_64_defconfig index 4c311ddd973b..315b86106572 100644 --- a/arch/x86/configs/x86_64_defconfig +++ b/arch/x86/configs/x86_64_defconfig @@ -243,7 +243,7 @@ CONFIG_USB=y CONFIG_USB_ANNOUNCE_NEW_DEVICES=y CONFIG_USB_MON=y CONFIG_USB_EHCI_HCD=y -# CONFIG_USB_EHCI_TT_NEWSCHED is not set +CONFIG_USB_EHCI_TT_NEWSCHED=y CONFIG_USB_OHCI_HCD=y CONFIG_USB_UHCI_HCD=y CONFIG_USB_PRINTER=y diff --git a/arch/x86/crypto/aesni-intel_glue.c b/arch/x86/crypto/aesni-intel_glue.c index 947c6bf52c33..54f60ab41c63 100644 --- a/arch/x86/crypto/aesni-intel_glue.c +++ b/arch/x86/crypto/aesni-intel_glue.c @@ -1155,7 +1155,7 @@ static int __driver_rfc4106_decrypt(struct aead_request *req) src = kmalloc(req->cryptlen + req->assoclen, GFP_ATOMIC); if (!src) return -ENOMEM; - assoc = (src + req->cryptlen + auth_tag_len); + assoc = (src + req->cryptlen); scatterwalk_map_and_copy(src, req->src, 0, req->cryptlen, 0); scatterwalk_map_and_copy(assoc, req->assoc, 0, req->assoclen, 0); @@ -1180,7 +1180,7 @@ static int __driver_rfc4106_decrypt(struct aead_request *req) scatterwalk_done(&src_sg_walk, 0, 0); scatterwalk_done(&assoc_sg_walk, 0, 0); } else { - scatterwalk_map_and_copy(dst, req->dst, 0, req->cryptlen, 1); + scatterwalk_map_and_copy(dst, req->dst, 0, tempCipherLen, 1); kfree(src); } return retval; diff --git a/arch/x86/crypto/crc32c-pcl-intel-asm_64.S b/arch/x86/crypto/crc32c-pcl-intel-asm_64.S index 26d49ebae040..225be06edc80 100644 --- a/arch/x86/crypto/crc32c-pcl-intel-asm_64.S +++ b/arch/x86/crypto/crc32c-pcl-intel-asm_64.S @@ -178,7 +178,7 @@ continue_block: ## 2a) PROCESS FULL BLOCKS: ################################################################ full_block: - movq $128,%rax + movl $128,%eax lea 128*8*2(block_0), block_1 lea 128*8*3(block_0), block_2 add $128*8*1, block_0 diff --git a/arch/x86/crypto/twofish-x86_64-asm_64.S b/arch/x86/crypto/twofish-x86_64-asm_64.S index a039d21986a2..a350c990dc86 100644 --- a/arch/x86/crypto/twofish-x86_64-asm_64.S +++ b/arch/x86/crypto/twofish-x86_64-asm_64.S @@ -264,7 +264,7 @@ ENTRY(twofish_enc_blk) movq R1, 8(%rsi) popq R1 - movq $1,%rax + movl $1,%eax ret ENDPROC(twofish_enc_blk) @@ -316,6 +316,6 @@ ENTRY(twofish_dec_blk) movq R1, 8(%rsi) popq R1 - movq $1,%rax + movl $1,%eax ret ENDPROC(twofish_dec_blk) diff --git a/arch/x86/ia32/Makefile b/arch/x86/ia32/Makefile index e785b422b766..bb635c641869 100644 --- a/arch/x86/ia32/Makefile +++ b/arch/x86/ia32/Makefile @@ -3,7 +3,6 @@ # obj-$(CONFIG_IA32_EMULATION) := ia32entry.o sys_ia32.o ia32_signal.o -obj-$(CONFIG_IA32_EMULATION) += nosyscall.o syscall_ia32.o obj-$(CONFIG_IA32_AOUT) += ia32_aout.o diff --git a/arch/x86/ia32/ia32_signal.c b/arch/x86/ia32/ia32_signal.c index d0165c9a2932..c81d35e6c7f1 100644 --- a/arch/x86/ia32/ia32_signal.c +++ b/arch/x86/ia32/ia32_signal.c @@ -161,8 +161,7 @@ int copy_siginfo_from_user32(siginfo_t *to, compat_siginfo_t __user *from) } static int ia32_restore_sigcontext(struct pt_regs *regs, - struct sigcontext_ia32 __user *sc, - unsigned int *pax) + struct sigcontext_ia32 __user *sc) { unsigned int tmpflags, err = 0; void __user *buf; @@ -184,7 +183,7 @@ static int ia32_restore_sigcontext(struct pt_regs *regs, RELOAD_SEG(es); COPY(di); COPY(si); COPY(bp); COPY(sp); COPY(bx); - COPY(dx); COPY(cx); COPY(ip); + COPY(dx); COPY(cx); COPY(ip); COPY(ax); /* Don't touch extended registers */ COPY_SEG_CPL3(cs); @@ -197,12 +196,12 @@ static int ia32_restore_sigcontext(struct pt_regs *regs, get_user_ex(tmp, &sc->fpstate); buf = compat_ptr(tmp); - - get_user_ex(*pax, &sc->ax); } get_user_catch(err); err |= restore_xstate_sig(buf, 1); + force_iret(); + return err; } @@ -211,7 +210,6 @@ asmlinkage long sys32_sigreturn(void) struct pt_regs *regs = current_pt_regs(); struct sigframe_ia32 __user *frame = (struct sigframe_ia32 __user *)(regs->sp-8); sigset_t set; - unsigned int ax; if (!access_ok(VERIFY_READ, frame, sizeof(*frame))) goto badframe; @@ -224,9 +222,9 @@ asmlinkage long sys32_sigreturn(void) set_current_blocked(&set); - if (ia32_restore_sigcontext(regs, &frame->sc, &ax)) + if (ia32_restore_sigcontext(regs, &frame->sc)) goto badframe; - return ax; + return regs->ax; badframe: signal_fault(regs, frame, "32bit sigreturn"); @@ -238,7 +236,6 @@ asmlinkage long sys32_rt_sigreturn(void) struct pt_regs *regs = current_pt_regs(); struct rt_sigframe_ia32 __user *frame; sigset_t set; - unsigned int ax; frame = (struct rt_sigframe_ia32 __user *)(regs->sp - 4); @@ -249,13 +246,13 @@ asmlinkage long sys32_rt_sigreturn(void) set_current_blocked(&set); - if (ia32_restore_sigcontext(regs, &frame->uc.uc_mcontext, &ax)) + if (ia32_restore_sigcontext(regs, &frame->uc.uc_mcontext)) goto badframe; if (compat_restore_altstack(&frame->uc.uc_stack)) goto badframe; - return ax; + return regs->ax; badframe: signal_fault(regs, frame, "32bit rt sigreturn"); diff --git a/arch/x86/ia32/ia32entry.S b/arch/x86/ia32/ia32entry.S index 156ebcab4ada..a821b1cd4fa7 100644 --- a/arch/x86/ia32/ia32entry.S +++ b/arch/x86/ia32/ia32entry.S @@ -30,24 +30,13 @@ .section .entry.text, "ax" - .macro IA32_ARG_FIXUP noebp=0 - movl %edi,%r8d - .if \noebp - .else - movl %ebp,%r9d - .endif - xchg %ecx,%esi - movl %ebx,%edi - movl %edx,%edx /* zero extension */ - .endm - - /* clobbers %eax */ - .macro CLEAR_RREGS offset=0, _r9=rax + /* clobbers %rax */ + .macro CLEAR_RREGS _r9=rax xorl %eax,%eax - movq %rax,\offset+R11(%rsp) - movq %rax,\offset+R10(%rsp) - movq %\_r9,\offset+R9(%rsp) - movq %rax,\offset+R8(%rsp) + movq %rax,R11(%rsp) + movq %rax,R10(%rsp) + movq %\_r9,R9(%rsp) + movq %rax,R8(%rsp) .endm /* @@ -60,14 +49,14 @@ * If it's -1 to make us punt the syscall, then (u32)-1 is still * an appropriately invalid value. */ - .macro LOAD_ARGS32 offset, _r9=0 + .macro LOAD_ARGS32 _r9=0 .if \_r9 - movl \offset+16(%rsp),%r9d + movl R9(%rsp),%r9d .endif - movl \offset+40(%rsp),%ecx - movl \offset+48(%rsp),%edx - movl \offset+56(%rsp),%esi - movl \offset+64(%rsp),%edi + movl RCX(%rsp),%ecx + movl RDX(%rsp),%edx + movl RSI(%rsp),%esi + movl RDI(%rsp),%edi movl %eax,%eax /* zero extension */ .endm @@ -99,54 +88,69 @@ ENDPROC(native_irq_enable_sysexit) /* * 32bit SYSENTER instruction entry. * + * SYSENTER loads ss, rsp, cs, and rip from previously programmed MSRs. + * IF and VM in rflags are cleared (IOW: interrupts are off). + * SYSENTER does not save anything on the stack, + * and does not save old rip (!!!) and rflags. + * * Arguments: - * %eax System call number. - * %ebx Arg1 - * %ecx Arg2 - * %edx Arg3 - * %esi Arg4 - * %edi Arg5 - * %ebp user stack - * 0(%ebp) Arg6 - * - * Interrupts off. - * + * eax system call number + * ebx arg1 + * ecx arg2 + * edx arg3 + * esi arg4 + * edi arg5 + * ebp user stack + * 0(%ebp) arg6 + * * This is purely a fast path. For anything complicated we use the int 0x80 - * path below. Set up a complete hardware stack frame to share code + * path below. We set up a complete hardware stack frame to share code * with the int 0x80 path. - */ + */ ENTRY(ia32_sysenter_target) CFI_STARTPROC32 simple CFI_SIGNAL_FRAME CFI_DEF_CFA rsp,0 CFI_REGISTER rsp,rbp - SWAPGS_UNSAFE_STACK - movq PER_CPU_VAR(kernel_stack), %rsp - addq $(KERNEL_STACK_OFFSET),%rsp + /* - * No need to follow this irqs on/off section: the syscall - * disabled irqs, here we enable it straight after entry: + * Interrupts are off on entry. + * We do not frame this tiny irq-off block with TRACE_IRQS_OFF/ON, + * it is too small to ever cause noticeable irq latency. */ + SWAPGS_UNSAFE_STACK + movq PER_CPU_VAR(cpu_tss + TSS_sp0), %rsp ENABLE_INTERRUPTS(CLBR_NONE) - movl %ebp,%ebp /* zero extension */ - pushq_cfi $__USER32_DS - /*CFI_REL_OFFSET ss,0*/ - pushq_cfi %rbp - CFI_REL_OFFSET rsp,0 - pushfq_cfi - /*CFI_REL_OFFSET rflags,0*/ - movl TI_sysenter_return+THREAD_INFO(%rsp,3*8-KERNEL_STACK_OFFSET),%r10d - CFI_REGISTER rip,r10 - pushq_cfi $__USER32_CS - /*CFI_REL_OFFSET cs,0*/ + + /* Zero-extending 32-bit regs, do not remove */ + movl %ebp, %ebp movl %eax, %eax - pushq_cfi %r10 - CFI_REL_OFFSET rip,0 - pushq_cfi %rax + + movl ASM_THREAD_INFO(TI_sysenter_return, %rsp, 0), %r10d + CFI_REGISTER rip,r10 + + /* Construct struct pt_regs on stack */ + pushq_cfi $__USER32_DS /* pt_regs->ss */ + pushq_cfi %rbp /* pt_regs->sp */ + CFI_REL_OFFSET rsp,0 + pushfq_cfi /* pt_regs->flags */ + pushq_cfi $__USER32_CS /* pt_regs->cs */ + pushq_cfi %r10 /* pt_regs->ip = thread_info->sysenter_return */ + CFI_REL_OFFSET rip,0 + pushq_cfi_reg rax /* pt_regs->orig_ax */ + pushq_cfi_reg rdi /* pt_regs->di */ + pushq_cfi_reg rsi /* pt_regs->si */ + pushq_cfi_reg rdx /* pt_regs->dx */ + pushq_cfi_reg rcx /* pt_regs->cx */ + pushq_cfi_reg rax /* pt_regs->ax */ cld - SAVE_ARGS 0,1,0 - /* no need to do an access_ok check here because rbp has been - 32bit zero extended */ + sub $(10*8),%rsp /* pt_regs->r8-11,bp,bx,r12-15 not saved */ + CFI_ADJUST_CFA_OFFSET 10*8 + + /* + * no need to do an access_ok check here because rbp has been + * 32bit zero extended + */ ASM_STAC 1: movl (%rbp),%ebp _ASM_EXTABLE(1b,ia32_badarg) @@ -157,42 +161,80 @@ ENTRY(ia32_sysenter_target) * ourselves. To save a few cycles, we can check whether * NT was set instead of doing an unconditional popfq. */ - testl $X86_EFLAGS_NT,EFLAGS-ARGOFFSET(%rsp) + testl $X86_EFLAGS_NT,EFLAGS(%rsp) jnz sysenter_fix_flags sysenter_flags_fixed: - orl $TS_COMPAT,TI_status+THREAD_INFO(%rsp,RIP-ARGOFFSET) - testl $_TIF_WORK_SYSCALL_ENTRY,TI_flags+THREAD_INFO(%rsp,RIP-ARGOFFSET) + orl $TS_COMPAT, ASM_THREAD_INFO(TI_status, %rsp, SIZEOF_PTREGS) + testl $_TIF_WORK_SYSCALL_ENTRY, ASM_THREAD_INFO(TI_flags, %rsp, SIZEOF_PTREGS) CFI_REMEMBER_STATE jnz sysenter_tracesys cmpq $(IA32_NR_syscalls-1),%rax ja ia32_badsys sysenter_do_call: - IA32_ARG_FIXUP + /* 32bit syscall -> 64bit C ABI argument conversion */ + movl %edi,%r8d /* arg5 */ + movl %ebp,%r9d /* arg6 */ + xchg %ecx,%esi /* rsi:arg2, rcx:arg4 */ + movl %ebx,%edi /* arg1 */ + movl %edx,%edx /* arg3 (zero extension) */ sysenter_dispatch: call *ia32_sys_call_table(,%rax,8) - movq %rax,RAX-ARGOFFSET(%rsp) + movq %rax,RAX(%rsp) DISABLE_INTERRUPTS(CLBR_NONE) TRACE_IRQS_OFF - testl $_TIF_ALLWORK_MASK,TI_flags+THREAD_INFO(%rsp,RIP-ARGOFFSET) + testl $_TIF_ALLWORK_MASK, ASM_THREAD_INFO(TI_flags, %rsp, SIZEOF_PTREGS) jnz sysexit_audit sysexit_from_sys_call: - andl $~TS_COMPAT,TI_status+THREAD_INFO(%rsp,RIP-ARGOFFSET) - /* clear IF, that popfq doesn't enable interrupts early */ - andl $~0x200,EFLAGS-ARGOFFSET(%rsp) - movl RIP-ARGOFFSET(%rsp),%edx /* User %eip */ - CFI_REGISTER rip,rdx - RESTORE_ARGS 0,24,0,0,0,0 + /* + * NB: SYSEXIT is not obviously safe for 64-bit kernels -- an + * NMI between STI and SYSEXIT has poorly specified behavior, + * and and NMI followed by an IRQ with usergs is fatal. So + * we just pretend we're using SYSEXIT but we really use + * SYSRETL instead. + * + * This code path is still called 'sysexit' because it pairs + * with 'sysenter' and it uses the SYSENTER calling convention. + */ + andl $~TS_COMPAT,ASM_THREAD_INFO(TI_status, %rsp, SIZEOF_PTREGS) + movl RIP(%rsp),%ecx /* User %eip */ + CFI_REGISTER rip,rcx + RESTORE_RSI_RDI + xorl %edx,%edx /* avoid info leaks */ xorq %r8,%r8 xorq %r9,%r9 xorq %r10,%r10 - xorq %r11,%r11 - popfq_cfi + movl EFLAGS(%rsp),%r11d /* User eflags */ /*CFI_RESTORE rflags*/ - popq_cfi %rcx /* User %esp */ - CFI_REGISTER rsp,rcx TRACE_IRQS_ON - ENABLE_INTERRUPTS_SYSEXIT32 + + /* + * SYSRETL works even on Intel CPUs. Use it in preference to SYSEXIT, + * since it avoids a dicey window with interrupts enabled. + */ + movl RSP(%rsp),%esp + + /* + * USERGS_SYSRET32 does: + * gsbase = user's gs base + * eip = ecx + * rflags = r11 + * cs = __USER32_CS + * ss = __USER_DS + * + * The prologue set RIP(%rsp) to VDSO32_SYSENTER_RETURN, which does: + * + * pop %ebp + * pop %edx + * pop %ecx + * + * Therefore, we invoke SYSRETL with EDX and R8-R10 zeroed to + * avoid info leaks. R11 ends up with VDSO32_SYSENTER_RETURN's + * address (already known to user code), and R12-R15 are + * callee-saved and therefore don't contain any interesting + * kernel data. + */ + USERGS_SYSRET32 CFI_RESTORE_STATE @@ -205,18 +247,18 @@ sysexit_from_sys_call: movl %ebx,%esi /* 2nd arg: 1st syscall arg */ movl %eax,%edi /* 1st arg: syscall number */ call __audit_syscall_entry - movl RAX-ARGOFFSET(%rsp),%eax /* reload syscall number */ + movl RAX(%rsp),%eax /* reload syscall number */ cmpq $(IA32_NR_syscalls-1),%rax ja ia32_badsys movl %ebx,%edi /* reload 1st syscall arg */ - movl RCX-ARGOFFSET(%rsp),%esi /* reload 2nd syscall arg */ - movl RDX-ARGOFFSET(%rsp),%edx /* reload 3rd syscall arg */ - movl RSI-ARGOFFSET(%rsp),%ecx /* reload 4th syscall arg */ - movl RDI-ARGOFFSET(%rsp),%r8d /* reload 5th syscall arg */ + movl RCX(%rsp),%esi /* reload 2nd syscall arg */ + movl RDX(%rsp),%edx /* reload 3rd syscall arg */ + movl RSI(%rsp),%ecx /* reload 4th syscall arg */ + movl RDI(%rsp),%r8d /* reload 5th syscall arg */ .endm .macro auditsys_exit exit - testl $(_TIF_ALLWORK_MASK & ~_TIF_SYSCALL_AUDIT),TI_flags+THREAD_INFO(%rsp,RIP-ARGOFFSET) + testl $(_TIF_ALLWORK_MASK & ~_TIF_SYSCALL_AUDIT), ASM_THREAD_INFO(TI_flags, %rsp, SIZEOF_PTREGS) jnz ia32_ret_from_sys_call TRACE_IRQS_ON ENABLE_INTERRUPTS(CLBR_NONE) @@ -227,13 +269,13 @@ sysexit_from_sys_call: 1: setbe %al /* 1 if error, 0 if not */ movzbl %al,%edi /* zero-extend that into %edi */ call __audit_syscall_exit - movq RAX-ARGOFFSET(%rsp),%rax /* reload syscall return value */ + movq RAX(%rsp),%rax /* reload syscall return value */ movl $(_TIF_ALLWORK_MASK & ~_TIF_SYSCALL_AUDIT),%edi DISABLE_INTERRUPTS(CLBR_NONE) TRACE_IRQS_OFF - testl %edi,TI_flags+THREAD_INFO(%rsp,RIP-ARGOFFSET) + testl %edi, ASM_THREAD_INFO(TI_flags, %rsp, SIZEOF_PTREGS) jz \exit - CLEAR_RREGS -ARGOFFSET + CLEAR_RREGS jmp int_with_check .endm @@ -253,16 +295,16 @@ sysenter_fix_flags: sysenter_tracesys: #ifdef CONFIG_AUDITSYSCALL - testl $(_TIF_WORK_SYSCALL_ENTRY & ~_TIF_SYSCALL_AUDIT),TI_flags+THREAD_INFO(%rsp,RIP-ARGOFFSET) + testl $(_TIF_WORK_SYSCALL_ENTRY & ~_TIF_SYSCALL_AUDIT), ASM_THREAD_INFO(TI_flags, %rsp, SIZEOF_PTREGS) jz sysenter_auditsys #endif - SAVE_REST + SAVE_EXTRA_REGS CLEAR_RREGS movq $-ENOSYS,RAX(%rsp)/* ptrace can change this for a bad syscall */ movq %rsp,%rdi /* &pt_regs -> arg1 */ call syscall_trace_enter - LOAD_ARGS32 ARGOFFSET /* reload args from stack in case ptrace changed it */ - RESTORE_REST + LOAD_ARGS32 /* reload args from stack in case ptrace changed it */ + RESTORE_EXTRA_REGS cmpq $(IA32_NR_syscalls-1),%rax ja int_ret_from_sys_call /* sysenter_tracesys has set RAX(%rsp) */ jmp sysenter_do_call @@ -272,94 +314,128 @@ ENDPROC(ia32_sysenter_target) /* * 32bit SYSCALL instruction entry. * + * 32bit SYSCALL saves rip to rcx, clears rflags.RF, then saves rflags to r11, + * then loads new ss, cs, and rip from previously programmed MSRs. + * rflags gets masked by a value from another MSR (so CLD and CLAC + * are not needed). SYSCALL does not save anything on the stack + * and does not change rsp. + * + * Note: rflags saving+masking-with-MSR happens only in Long mode + * (in legacy 32bit mode, IF, RF and VM bits are cleared and that's it). + * Don't get confused: rflags saving+masking depends on Long Mode Active bit + * (EFER.LMA=1), NOT on bitness of userspace where SYSCALL executes + * or target CS descriptor's L bit (SYSCALL does not read segment descriptors). + * * Arguments: - * %eax System call number. - * %ebx Arg1 - * %ecx return EIP - * %edx Arg3 - * %esi Arg4 - * %edi Arg5 - * %ebp Arg2 [note: not saved in the stack frame, should not be touched] - * %esp user stack - * 0(%esp) Arg6 - * - * Interrupts off. - * + * eax system call number + * ecx return address + * ebx arg1 + * ebp arg2 (note: not saved in the stack frame, should not be touched) + * edx arg3 + * esi arg4 + * edi arg5 + * esp user stack + * 0(%esp) arg6 + * * This is purely a fast path. For anything complicated we use the int 0x80 - * path below. Set up a complete hardware stack frame to share code - * with the int 0x80 path. - */ + * path below. We set up a complete hardware stack frame to share code + * with the int 0x80 path. + */ ENTRY(ia32_cstar_target) CFI_STARTPROC32 simple CFI_SIGNAL_FRAME - CFI_DEF_CFA rsp,KERNEL_STACK_OFFSET + CFI_DEF_CFA rsp,0 CFI_REGISTER rip,rcx /*CFI_REGISTER rflags,r11*/ + + /* + * Interrupts are off on entry. + * We do not frame this tiny irq-off block with TRACE_IRQS_OFF/ON, + * it is too small to ever cause noticeable irq latency. + */ SWAPGS_UNSAFE_STACK movl %esp,%r8d CFI_REGISTER rsp,r8 movq PER_CPU_VAR(kernel_stack),%rsp - /* - * No need to follow this irqs on/off section: the syscall - * disabled irqs and here we enable it straight after entry: - */ ENABLE_INTERRUPTS(CLBR_NONE) - SAVE_ARGS 8,0,0 - movl %eax,%eax /* zero extension */ - movq %rax,ORIG_RAX-ARGOFFSET(%rsp) - movq %rcx,RIP-ARGOFFSET(%rsp) - CFI_REL_OFFSET rip,RIP-ARGOFFSET - movq %rbp,RCX-ARGOFFSET(%rsp) /* this lies slightly to ptrace */ + + /* Zero-extending 32-bit regs, do not remove */ + movl %eax,%eax + + /* Construct struct pt_regs on stack */ + pushq_cfi $__USER32_DS /* pt_regs->ss */ + pushq_cfi %r8 /* pt_regs->sp */ + CFI_REL_OFFSET rsp,0 + pushq_cfi %r11 /* pt_regs->flags */ + pushq_cfi $__USER32_CS /* pt_regs->cs */ + pushq_cfi %rcx /* pt_regs->ip */ + CFI_REL_OFFSET rip,0 + pushq_cfi_reg rax /* pt_regs->orig_ax */ + pushq_cfi_reg rdi /* pt_regs->di */ + pushq_cfi_reg rsi /* pt_regs->si */ + pushq_cfi_reg rdx /* pt_regs->dx */ + pushq_cfi_reg rbp /* pt_regs->cx */ movl %ebp,%ecx - movq $__USER32_CS,CS-ARGOFFSET(%rsp) - movq $__USER32_DS,SS-ARGOFFSET(%rsp) - movq %r11,EFLAGS-ARGOFFSET(%rsp) - /*CFI_REL_OFFSET rflags,EFLAGS-ARGOFFSET*/ - movq %r8,RSP-ARGOFFSET(%rsp) - CFI_REL_OFFSET rsp,RSP-ARGOFFSET - /* no need to do an access_ok check here because r8 has been - 32bit zero extended */ - /* hardware stack frame is complete now */ + pushq_cfi_reg rax /* pt_regs->ax */ + sub $(10*8),%rsp /* pt_regs->r8-11,bp,bx,r12-15 not saved */ + CFI_ADJUST_CFA_OFFSET 10*8 + + /* + * no need to do an access_ok check here because r8 has been + * 32bit zero extended + */ ASM_STAC 1: movl (%r8),%r9d _ASM_EXTABLE(1b,ia32_badarg) ASM_CLAC - orl $TS_COMPAT,TI_status+THREAD_INFO(%rsp,RIP-ARGOFFSET) - testl $_TIF_WORK_SYSCALL_ENTRY,TI_flags+THREAD_INFO(%rsp,RIP-ARGOFFSET) + orl $TS_COMPAT, ASM_THREAD_INFO(TI_status, %rsp, SIZEOF_PTREGS) + testl $_TIF_WORK_SYSCALL_ENTRY, ASM_THREAD_INFO(TI_flags, %rsp, SIZEOF_PTREGS) CFI_REMEMBER_STATE jnz cstar_tracesys cmpq $IA32_NR_syscalls-1,%rax ja ia32_badsys cstar_do_call: - IA32_ARG_FIXUP 1 + /* 32bit syscall -> 64bit C ABI argument conversion */ + movl %edi,%r8d /* arg5 */ + /* r9 already loaded */ /* arg6 */ + xchg %ecx,%esi /* rsi:arg2, rcx:arg4 */ + movl %ebx,%edi /* arg1 */ + movl %edx,%edx /* arg3 (zero extension) */ cstar_dispatch: call *ia32_sys_call_table(,%rax,8) - movq %rax,RAX-ARGOFFSET(%rsp) + movq %rax,RAX(%rsp) DISABLE_INTERRUPTS(CLBR_NONE) TRACE_IRQS_OFF - testl $_TIF_ALLWORK_MASK,TI_flags+THREAD_INFO(%rsp,RIP-ARGOFFSET) + testl $_TIF_ALLWORK_MASK, ASM_THREAD_INFO(TI_flags, %rsp, SIZEOF_PTREGS) jnz sysretl_audit sysretl_from_sys_call: - andl $~TS_COMPAT,TI_status+THREAD_INFO(%rsp,RIP-ARGOFFSET) - RESTORE_ARGS 0,-ARG_SKIP,0,0,0 - movl RIP-ARGOFFSET(%rsp),%ecx + andl $~TS_COMPAT, ASM_THREAD_INFO(TI_status, %rsp, SIZEOF_PTREGS) + RESTORE_RSI_RDI_RDX + movl RIP(%rsp),%ecx CFI_REGISTER rip,rcx - movl EFLAGS-ARGOFFSET(%rsp),%r11d + movl EFLAGS(%rsp),%r11d /*CFI_REGISTER rflags,r11*/ xorq %r10,%r10 xorq %r9,%r9 xorq %r8,%r8 TRACE_IRQS_ON - movl RSP-ARGOFFSET(%rsp),%esp + movl RSP(%rsp),%esp CFI_RESTORE rsp + /* + * 64bit->32bit SYSRET restores eip from ecx, + * eflags from r11 (but RF and VM bits are forced to 0), + * cs and ss are loaded from MSRs. + * (Note: 32bit->32bit SYSRET is different: since r11 + * does not exist, it merely sets eflags.IF=1). + */ USERGS_SYSRET32 - + #ifdef CONFIG_AUDITSYSCALL cstar_auditsys: CFI_RESTORE_STATE - movl %r9d,R9-ARGOFFSET(%rsp) /* register to be clobbered by call */ + movl %r9d,R9(%rsp) /* register to be clobbered by call */ auditsys_entry_common - movl R9-ARGOFFSET(%rsp),%r9d /* reload 6th syscall arg */ + movl R9(%rsp),%r9d /* reload 6th syscall arg */ jmp cstar_dispatch sysretl_audit: @@ -368,17 +444,17 @@ sysretl_audit: cstar_tracesys: #ifdef CONFIG_AUDITSYSCALL - testl $(_TIF_WORK_SYSCALL_ENTRY & ~_TIF_SYSCALL_AUDIT),TI_flags+THREAD_INFO(%rsp,RIP-ARGOFFSET) + testl $(_TIF_WORK_SYSCALL_ENTRY & ~_TIF_SYSCALL_AUDIT), ASM_THREAD_INFO(TI_flags, %rsp, SIZEOF_PTREGS) jz cstar_auditsys #endif xchgl %r9d,%ebp - SAVE_REST - CLEAR_RREGS 0, r9 + SAVE_EXTRA_REGS + CLEAR_RREGS r9 movq $-ENOSYS,RAX(%rsp) /* ptrace can change this for a bad syscall */ movq %rsp,%rdi /* &pt_regs -> arg1 */ call syscall_trace_enter - LOAD_ARGS32 ARGOFFSET, 1 /* reload args from stack in case ptrace changed it */ - RESTORE_REST + LOAD_ARGS32 1 /* reload args from stack in case ptrace changed it */ + RESTORE_EXTRA_REGS xchgl %ebp,%r9d cmpq $(IA32_NR_syscalls-1),%rax ja int_ret_from_sys_call /* cstar_tracesys has set RAX(%rsp) */ @@ -391,78 +467,94 @@ ia32_badarg: jmp ia32_sysret CFI_ENDPROC -/* - * Emulated IA32 system calls via int 0x80. +/* + * Emulated IA32 system calls via int 0x80. * - * Arguments: - * %eax System call number. - * %ebx Arg1 - * %ecx Arg2 - * %edx Arg3 - * %esi Arg4 - * %edi Arg5 - * %ebp Arg6 [note: not saved in the stack frame, should not be touched] + * Arguments: + * eax system call number + * ebx arg1 + * ecx arg2 + * edx arg3 + * esi arg4 + * edi arg5 + * ebp arg6 (note: not saved in the stack frame, should not be touched) * * Notes: - * Uses the same stack frame as the x86-64 version. - * All registers except %eax must be saved (but ptrace may violate that) + * Uses the same stack frame as the x86-64 version. + * All registers except eax must be saved (but ptrace may violate that). * Arguments are zero extended. For system calls that want sign extension and * take long arguments a wrapper is needed. Most calls can just be called * directly. - * Assumes it is only called from user space and entered with interrupts off. - */ + * Assumes it is only called from user space and entered with interrupts off. + */ ENTRY(ia32_syscall) CFI_STARTPROC32 simple CFI_SIGNAL_FRAME - CFI_DEF_CFA rsp,SS+8-RIP - /*CFI_REL_OFFSET ss,SS-RIP*/ - CFI_REL_OFFSET rsp,RSP-RIP - /*CFI_REL_OFFSET rflags,EFLAGS-RIP*/ - /*CFI_REL_OFFSET cs,CS-RIP*/ - CFI_REL_OFFSET rip,RIP-RIP - PARAVIRT_ADJUST_EXCEPTION_FRAME - SWAPGS + CFI_DEF_CFA rsp,5*8 + /*CFI_REL_OFFSET ss,4*8 */ + CFI_REL_OFFSET rsp,3*8 + /*CFI_REL_OFFSET rflags,2*8 */ + /*CFI_REL_OFFSET cs,1*8 */ + CFI_REL_OFFSET rip,0*8 + /* - * No need to follow this irqs on/off section: the syscall - * disabled irqs and here we enable it straight after entry: + * Interrupts are off on entry. + * We do not frame this tiny irq-off block with TRACE_IRQS_OFF/ON, + * it is too small to ever cause noticeable irq latency. */ + PARAVIRT_ADJUST_EXCEPTION_FRAME + SWAPGS ENABLE_INTERRUPTS(CLBR_NONE) - movl %eax,%eax - pushq_cfi %rax + + /* Zero-extending 32-bit regs, do not remove */ + movl %eax,%eax + + /* Construct struct pt_regs on stack (iret frame is already on stack) */ + pushq_cfi_reg rax /* pt_regs->orig_ax */ + pushq_cfi_reg rdi /* pt_regs->di */ + pushq_cfi_reg rsi /* pt_regs->si */ + pushq_cfi_reg rdx /* pt_regs->dx */ + pushq_cfi_reg rcx /* pt_regs->cx */ + pushq_cfi_reg rax /* pt_regs->ax */ cld - /* note the registers are not zero extended to the sf. - this could be a problem. */ - SAVE_ARGS 0,1,0 - orl $TS_COMPAT,TI_status+THREAD_INFO(%rsp,RIP-ARGOFFSET) - testl $_TIF_WORK_SYSCALL_ENTRY,TI_flags+THREAD_INFO(%rsp,RIP-ARGOFFSET) + sub $(10*8),%rsp /* pt_regs->r8-11,bp,bx,r12-15 not saved */ + CFI_ADJUST_CFA_OFFSET 10*8 + + orl $TS_COMPAT, ASM_THREAD_INFO(TI_status, %rsp, SIZEOF_PTREGS) + testl $_TIF_WORK_SYSCALL_ENTRY, ASM_THREAD_INFO(TI_flags, %rsp, SIZEOF_PTREGS) jnz ia32_tracesys cmpq $(IA32_NR_syscalls-1),%rax ja ia32_badsys ia32_do_call: - IA32_ARG_FIXUP + /* 32bit syscall -> 64bit C ABI argument conversion */ + movl %edi,%r8d /* arg5 */ + movl %ebp,%r9d /* arg6 */ + xchg %ecx,%esi /* rsi:arg2, rcx:arg4 */ + movl %ebx,%edi /* arg1 */ + movl %edx,%edx /* arg3 (zero extension) */ call *ia32_sys_call_table(,%rax,8) # xxx: rip relative ia32_sysret: - movq %rax,RAX-ARGOFFSET(%rsp) + movq %rax,RAX(%rsp) ia32_ret_from_sys_call: - CLEAR_RREGS -ARGOFFSET - jmp int_ret_from_sys_call + CLEAR_RREGS + jmp int_ret_from_sys_call -ia32_tracesys: - SAVE_REST +ia32_tracesys: + SAVE_EXTRA_REGS CLEAR_RREGS movq $-ENOSYS,RAX(%rsp) /* ptrace can change this for a bad syscall */ movq %rsp,%rdi /* &pt_regs -> arg1 */ call syscall_trace_enter - LOAD_ARGS32 ARGOFFSET /* reload args from stack in case ptrace changed it */ - RESTORE_REST + LOAD_ARGS32 /* reload args from stack in case ptrace changed it */ + RESTORE_EXTRA_REGS cmpq $(IA32_NR_syscalls-1),%rax ja int_ret_from_sys_call /* ia32_tracesys has set RAX(%rsp) */ jmp ia32_do_call END(ia32_syscall) ia32_badsys: - movq $0,ORIG_RAX-ARGOFFSET(%rsp) + movq $0,ORIG_RAX(%rsp) movq $-ENOSYS,%rax jmp ia32_sysret @@ -479,8 +571,6 @@ GLOBAL(\label) PTREGSCALL stub32_rt_sigreturn, sys32_rt_sigreturn PTREGSCALL stub32_sigreturn, sys32_sigreturn - PTREGSCALL stub32_execve, compat_sys_execve - PTREGSCALL stub32_execveat, compat_sys_execveat PTREGSCALL stub32_fork, sys_fork PTREGSCALL stub32_vfork, sys_vfork @@ -492,24 +582,23 @@ GLOBAL(stub32_clone) ALIGN ia32_ptregs_common: - popq %r11 CFI_ENDPROC CFI_STARTPROC32 simple CFI_SIGNAL_FRAME - CFI_DEF_CFA rsp,SS+8-ARGOFFSET - CFI_REL_OFFSET rax,RAX-ARGOFFSET - CFI_REL_OFFSET rcx,RCX-ARGOFFSET - CFI_REL_OFFSET rdx,RDX-ARGOFFSET - CFI_REL_OFFSET rsi,RSI-ARGOFFSET - CFI_REL_OFFSET rdi,RDI-ARGOFFSET - CFI_REL_OFFSET rip,RIP-ARGOFFSET -/* CFI_REL_OFFSET cs,CS-ARGOFFSET*/ -/* CFI_REL_OFFSET rflags,EFLAGS-ARGOFFSET*/ - CFI_REL_OFFSET rsp,RSP-ARGOFFSET -/* CFI_REL_OFFSET ss,SS-ARGOFFSET*/ - SAVE_REST + CFI_DEF_CFA rsp,SIZEOF_PTREGS + CFI_REL_OFFSET rax,RAX + CFI_REL_OFFSET rcx,RCX + CFI_REL_OFFSET rdx,RDX + CFI_REL_OFFSET rsi,RSI + CFI_REL_OFFSET rdi,RDI + CFI_REL_OFFSET rip,RIP +/* CFI_REL_OFFSET cs,CS*/ +/* CFI_REL_OFFSET rflags,EFLAGS*/ + CFI_REL_OFFSET rsp,RSP +/* CFI_REL_OFFSET ss,SS*/ + SAVE_EXTRA_REGS 8 call *%rax - RESTORE_REST - jmp ia32_sysret /* misbalances the return cache */ + RESTORE_EXTRA_REGS 8 + ret CFI_ENDPROC END(ia32_ptregs_common) diff --git a/arch/x86/ia32/nosyscall.c b/arch/x86/ia32/nosyscall.c deleted file mode 100644 index 51ecd5b4e787..000000000000 --- a/arch/x86/ia32/nosyscall.c +++ /dev/null @@ -1,7 +0,0 @@ -#include <linux/kernel.h> -#include <linux/errno.h> - -long compat_ni_syscall(void) -{ - return -ENOSYS; -} diff --git a/arch/x86/ia32/sys_ia32.c b/arch/x86/ia32/sys_ia32.c index 8e0ceecdc957..719cd702b0a4 100644 --- a/arch/x86/ia32/sys_ia32.c +++ b/arch/x86/ia32/sys_ia32.c @@ -201,20 +201,6 @@ long sys32_fadvise64_64(int fd, __u32 offset_low, __u32 offset_high, advice); } -long sys32_vm86_warning(void) -{ - struct task_struct *me = current; - static char lastcomm[sizeof(me->comm)]; - - if (strncmp(lastcomm, me->comm, sizeof(lastcomm))) { - compat_printk(KERN_INFO - "%s: vm86 mode not supported on 64 bit kernel\n", - me->comm); - strncpy(lastcomm, me->comm, sizeof(lastcomm)); - } - return -ENOSYS; -} - asmlinkage ssize_t sys32_readahead(int fd, unsigned off_lo, unsigned off_hi, size_t count) { diff --git a/arch/x86/ia32/syscall_ia32.c b/arch/x86/ia32/syscall_ia32.c deleted file mode 100644 index 4754ba0f5d9f..000000000000 --- a/arch/x86/ia32/syscall_ia32.c +++ /dev/null @@ -1,25 +0,0 @@ -/* System call table for ia32 emulation. */ - -#include <linux/linkage.h> -#include <linux/sys.h> -#include <linux/cache.h> -#include <asm/asm-offsets.h> - -#define __SYSCALL_I386(nr, sym, compat) extern asmlinkage void compat(void) ; -#include <asm/syscalls_32.h> -#undef __SYSCALL_I386 - -#define __SYSCALL_I386(nr, sym, compat) [nr] = compat, - -typedef void (*sys_call_ptr_t)(void); - -extern void compat_ni_syscall(void); - -const sys_call_ptr_t ia32_sys_call_table[__NR_ia32_syscall_max+1] = { - /* - * Smells like a compiler bug -- it doesn't work - * when the & below is removed. - */ - [0 ... __NR_ia32_syscall_max] = &compat_ni_syscall, -#include <asm/syscalls_32.h> -}; diff --git a/arch/x86/include/asm/alternative-asm.h b/arch/x86/include/asm/alternative-asm.h index 372231c22a47..bdf02eeee765 100644 --- a/arch/x86/include/asm/alternative-asm.h +++ b/arch/x86/include/asm/alternative-asm.h @@ -18,12 +18,63 @@ .endm #endif -.macro altinstruction_entry orig alt feature orig_len alt_len +.macro altinstruction_entry orig alt feature orig_len alt_len pad_len .long \orig - . .long \alt - . .word \feature .byte \orig_len .byte \alt_len + .byte \pad_len +.endm + +.macro ALTERNATIVE oldinstr, newinstr, feature +140: + \oldinstr +141: + .skip -(((144f-143f)-(141b-140b)) > 0) * ((144f-143f)-(141b-140b)),0x90 +142: + + .pushsection .altinstructions,"a" + altinstruction_entry 140b,143f,\feature,142b-140b,144f-143f,142b-141b + .popsection + + .pushsection .altinstr_replacement,"ax" +143: + \newinstr +144: + .popsection +.endm + +#define old_len 141b-140b +#define new_len1 144f-143f +#define new_len2 145f-144f + +/* + * max without conditionals. Idea adapted from: + * http://graphics.stanford.edu/~seander/bithacks.html#IntegerMinOrMax + */ +#define alt_max_short(a, b) ((a) ^ (((a) ^ (b)) & -(-((a) < (b))))) + +.macro ALTERNATIVE_2 oldinstr, newinstr1, feature1, newinstr2, feature2 +140: + \oldinstr +141: + .skip -((alt_max_short(new_len1, new_len2) - (old_len)) > 0) * \ + (alt_max_short(new_len1, new_len2) - (old_len)),0x90 +142: + + .pushsection .altinstructions,"a" + altinstruction_entry 140b,143f,\feature1,142b-140b,144f-143f,142b-141b + altinstruction_entry 140b,144f,\feature2,142b-140b,145f-144f,142b-141b + .popsection + + .pushsection .altinstr_replacement,"ax" +143: + \newinstr1 +144: + \newinstr2 +145: + .popsection .endm #endif /* __ASSEMBLY__ */ diff --git a/arch/x86/include/asm/alternative.h b/arch/x86/include/asm/alternative.h index 473bdbee378a..ba32af062f61 100644 --- a/arch/x86/include/asm/alternative.h +++ b/arch/x86/include/asm/alternative.h @@ -48,8 +48,9 @@ struct alt_instr { s32 repl_offset; /* offset to replacement instruction */ u16 cpuid; /* cpuid bit set for replacement */ u8 instrlen; /* length of original instruction */ - u8 replacementlen; /* length of new instruction, <= instrlen */ -}; + u8 replacementlen; /* length of new instruction */ + u8 padlen; /* length of build-time padding */ +} __packed; extern void alternative_instructions(void); extern void apply_alternatives(struct alt_instr *start, struct alt_instr *end); @@ -76,50 +77,69 @@ static inline int alternatives_text_reserved(void *start, void *end) } #endif /* CONFIG_SMP */ -#define OLDINSTR(oldinstr) "661:\n\t" oldinstr "\n662:\n" +#define b_replacement(num) "664"#num +#define e_replacement(num) "665"#num -#define b_replacement(number) "663"#number -#define e_replacement(number) "664"#number +#define alt_end_marker "663" +#define alt_slen "662b-661b" +#define alt_pad_len alt_end_marker"b-662b" +#define alt_total_slen alt_end_marker"b-661b" +#define alt_rlen(num) e_replacement(num)"f-"b_replacement(num)"f" -#define alt_slen "662b-661b" -#define alt_rlen(number) e_replacement(number)"f-"b_replacement(number)"f" +#define __OLDINSTR(oldinstr, num) \ + "661:\n\t" oldinstr "\n662:\n" \ + ".skip -(((" alt_rlen(num) ")-(" alt_slen ")) > 0) * " \ + "((" alt_rlen(num) ")-(" alt_slen ")),0x90\n" -#define ALTINSTR_ENTRY(feature, number) \ +#define OLDINSTR(oldinstr, num) \ + __OLDINSTR(oldinstr, num) \ + alt_end_marker ":\n" + +/* + * max without conditionals. Idea adapted from: + * http://graphics.stanford.edu/~seander/bithacks.html#IntegerMinOrMax + * + * The additional "-" is needed because gas works with s32s. + */ +#define alt_max_short(a, b) "((" a ") ^ (((" a ") ^ (" b ")) & -(-((" a ") - (" b ")))))" + +/* + * Pad the second replacement alternative with additional NOPs if it is + * additionally longer than the first replacement alternative. + */ +#define OLDINSTR_2(oldinstr, num1, num2) \ + "661:\n\t" oldinstr "\n662:\n" \ + ".skip -((" alt_max_short(alt_rlen(num1), alt_rlen(num2)) " - (" alt_slen ")) > 0) * " \ + "(" alt_max_short(alt_rlen(num1), alt_rlen(num2)) " - (" alt_slen ")), 0x90\n" \ + alt_end_marker ":\n" + +#define ALTINSTR_ENTRY(feature, num) \ " .long 661b - .\n" /* label */ \ - " .long " b_replacement(number)"f - .\n" /* new instruction */ \ + " .long " b_replacement(num)"f - .\n" /* new instruction */ \ " .word " __stringify(feature) "\n" /* feature bit */ \ - " .byte " alt_slen "\n" /* source len */ \ - " .byte " alt_rlen(number) "\n" /* replacement len */ - -#define DISCARD_ENTRY(number) /* rlen <= slen */ \ - " .byte 0xff + (" alt_rlen(number) ") - (" alt_slen ")\n" + " .byte " alt_total_slen "\n" /* source len */ \ + " .byte " alt_rlen(num) "\n" /* replacement len */ \ + " .byte " alt_pad_len "\n" /* pad len */ -#define ALTINSTR_REPLACEMENT(newinstr, feature, number) /* replacement */ \ - b_replacement(number)":\n\t" newinstr "\n" e_replacement(number) ":\n\t" +#define ALTINSTR_REPLACEMENT(newinstr, feature, num) /* replacement */ \ + b_replacement(num)":\n\t" newinstr "\n" e_replacement(num) ":\n\t" /* alternative assembly primitive: */ #define ALTERNATIVE(oldinstr, newinstr, feature) \ - OLDINSTR(oldinstr) \ + OLDINSTR(oldinstr, 1) \ ".pushsection .altinstructions,\"a\"\n" \ ALTINSTR_ENTRY(feature, 1) \ ".popsection\n" \ - ".pushsection .discard,\"aw\",@progbits\n" \ - DISCARD_ENTRY(1) \ - ".popsection\n" \ ".pushsection .altinstr_replacement, \"ax\"\n" \ ALTINSTR_REPLACEMENT(newinstr, feature, 1) \ ".popsection" #define ALTERNATIVE_2(oldinstr, newinstr1, feature1, newinstr2, feature2)\ - OLDINSTR(oldinstr) \ + OLDINSTR_2(oldinstr, 1, 2) \ ".pushsection .altinstructions,\"a\"\n" \ ALTINSTR_ENTRY(feature1, 1) \ ALTINSTR_ENTRY(feature2, 2) \ ".popsection\n" \ - ".pushsection .discard,\"aw\",@progbits\n" \ - DISCARD_ENTRY(1) \ - DISCARD_ENTRY(2) \ - ".popsection\n" \ ".pushsection .altinstr_replacement, \"ax\"\n" \ ALTINSTR_REPLACEMENT(newinstr1, feature1, 1) \ ALTINSTR_REPLACEMENT(newinstr2, feature2, 2) \ @@ -146,6 +166,9 @@ static inline int alternatives_text_reserved(void *start, void *end) #define alternative(oldinstr, newinstr, feature) \ asm volatile (ALTERNATIVE(oldinstr, newinstr, feature) : : : "memory") +#define alternative_2(oldinstr, newinstr1, feature1, newinstr2, feature2) \ + asm volatile(ALTERNATIVE_2(oldinstr, newinstr1, feature1, newinstr2, feature2) ::: "memory") + /* * Alternative inline assembly with input. * diff --git a/arch/x86/include/asm/apic.h b/arch/x86/include/asm/apic.h index efc3b22d896e..976b86a325e5 100644 --- a/arch/x86/include/asm/apic.h +++ b/arch/x86/include/asm/apic.h @@ -91,7 +91,7 @@ static inline void native_apic_mem_write(u32 reg, u32 v) { volatile u32 *addr = (volatile u32 *)(APIC_BASE + reg); - alternative_io("movl %0, %1", "xchgl %0, %1", X86_BUG_11AP, + alternative_io("movl %0, %P1", "xchgl %0, %P1", X86_BUG_11AP, ASM_OUTPUT2("=r" (v), "=m" (*addr)), ASM_OUTPUT2("0" (v), "m" (*addr))); } @@ -204,7 +204,6 @@ extern void clear_local_APIC(void); extern void disconnect_bsp_APIC(int virt_wire_setup); extern void disable_local_APIC(void); extern void lapic_shutdown(void); -extern int verify_local_APIC(void); extern void sync_Arb_IDs(void); extern void init_bsp_APIC(void); extern void setup_local_APIC(void); diff --git a/arch/x86/include/asm/barrier.h b/arch/x86/include/asm/barrier.h index 2ab1eb33106e..959e45b81fe2 100644 --- a/arch/x86/include/asm/barrier.h +++ b/arch/x86/include/asm/barrier.h @@ -95,13 +95,11 @@ do { \ * Stop RDTSC speculation. This is needed when you need to use RDTSC * (or get_cycles or vread that possibly accesses the TSC) in a defined * code region. - * - * (Could use an alternative three way for this if there was one.) */ static __always_inline void rdtsc_barrier(void) { - alternative(ASM_NOP3, "mfence", X86_FEATURE_MFENCE_RDTSC); - alternative(ASM_NOP3, "lfence", X86_FEATURE_LFENCE_RDTSC); + alternative_2("", "mfence", X86_FEATURE_MFENCE_RDTSC, + "lfence", X86_FEATURE_LFENCE_RDTSC); } #endif /* _ASM_X86_BARRIER_H */ diff --git a/arch/x86/include/asm/calling.h b/arch/x86/include/asm/calling.h index 1f1297b46f83..1c8b50edb2db 100644 --- a/arch/x86/include/asm/calling.h +++ b/arch/x86/include/asm/calling.h @@ -55,143 +55,157 @@ For 32-bit we have the following conventions - kernel is built with * for assembly code: */ -#define R15 0 -#define R14 8 -#define R13 16 -#define R12 24 -#define RBP 32 -#define RBX 40 - -/* arguments: interrupts/non tracing syscalls only save up to here: */ -#define R11 48 -#define R10 56 -#define R9 64 -#define R8 72 -#define RAX 80 -#define RCX 88 -#define RDX 96 -#define RSI 104 -#define RDI 112 -#define ORIG_RAX 120 /* + error_code */ -/* end of arguments */ - -/* cpu exception frame or undefined in case of fast syscall: */ -#define RIP 128 -#define CS 136 -#define EFLAGS 144 -#define RSP 152 -#define SS 160 - -#define ARGOFFSET R11 - - .macro SAVE_ARGS addskip=0, save_rcx=1, save_r891011=1, rax_enosys=0 - subq $9*8+\addskip, %rsp - CFI_ADJUST_CFA_OFFSET 9*8+\addskip - movq_cfi rdi, 8*8 - movq_cfi rsi, 7*8 - movq_cfi rdx, 6*8 - - .if \save_rcx - movq_cfi rcx, 5*8 - .endif +/* The layout forms the "struct pt_regs" on the stack: */ +/* + * C ABI says these regs are callee-preserved. They aren't saved on kernel entry + * unless syscall needs a complete, fully filled "struct pt_regs". + */ +#define R15 0*8 +#define R14 1*8 +#define R13 2*8 +#define R12 3*8 +#define RBP 4*8 +#define RBX 5*8 +/* These regs are callee-clobbered. Always saved on kernel entry. */ +#define R11 6*8 +#define R10 7*8 +#define R9 8*8 +#define R8 9*8 +#define RAX 10*8 +#define RCX 11*8 +#define RDX 12*8 +#define RSI 13*8 +#define RDI 14*8 +/* + * On syscall entry, this is syscall#. On CPU exception, this is error code. + * On hw interrupt, it's IRQ number: + */ +#define ORIG_RAX 15*8 +/* Return frame for iretq */ +#define RIP 16*8 +#define CS 17*8 +#define EFLAGS 18*8 +#define RSP 19*8 +#define SS 20*8 + +#define SIZEOF_PTREGS 21*8 + + .macro ALLOC_PT_GPREGS_ON_STACK addskip=0 + subq $15*8+\addskip, %rsp + CFI_ADJUST_CFA_OFFSET 15*8+\addskip + .endm - .if \rax_enosys - movq $-ENOSYS, 4*8(%rsp) - .else - movq_cfi rax, 4*8 + .macro SAVE_C_REGS_HELPER offset=0 rax=1 rcx=1 r8910=1 r11=1 + .if \r11 + movq_cfi r11, 6*8+\offset .endif - - .if \save_r891011 - movq_cfi r8, 3*8 - movq_cfi r9, 2*8 - movq_cfi r10, 1*8 - movq_cfi r11, 0*8 + .if \r8910 + movq_cfi r10, 7*8+\offset + movq_cfi r9, 8*8+\offset + movq_cfi r8, 9*8+\offset + .endif + .if \rax + movq_cfi rax, 10*8+\offset + .endif + .if \rcx + movq_cfi rcx, 11*8+\offset .endif + movq_cfi rdx, 12*8+\offset + movq_cfi rsi, 13*8+\offset + movq_cfi rdi, 14*8+\offset + .endm + .macro SAVE_C_REGS offset=0 + SAVE_C_REGS_HELPER \offset, 1, 1, 1, 1 + .endm + .macro SAVE_C_REGS_EXCEPT_RAX_RCX offset=0 + SAVE_C_REGS_HELPER \offset, 0, 0, 1, 1 + .endm + .macro SAVE_C_REGS_EXCEPT_R891011 + SAVE_C_REGS_HELPER 0, 1, 1, 0, 0 + .endm + .macro SAVE_C_REGS_EXCEPT_RCX_R891011 + SAVE_C_REGS_HELPER 0, 1, 0, 0, 0 + .endm + .macro SAVE_C_REGS_EXCEPT_RAX_RCX_R11 + SAVE_C_REGS_HELPER 0, 0, 0, 1, 0 + .endm + + .macro SAVE_EXTRA_REGS offset=0 + movq_cfi r15, 0*8+\offset + movq_cfi r14, 1*8+\offset + movq_cfi r13, 2*8+\offset + movq_cfi r12, 3*8+\offset + movq_cfi rbp, 4*8+\offset + movq_cfi rbx, 5*8+\offset + .endm + .macro SAVE_EXTRA_REGS_RBP offset=0 + movq_cfi rbp, 4*8+\offset + .endm + .macro RESTORE_EXTRA_REGS offset=0 + movq_cfi_restore 0*8+\offset, r15 + movq_cfi_restore 1*8+\offset, r14 + movq_cfi_restore 2*8+\offset, r13 + movq_cfi_restore 3*8+\offset, r12 + movq_cfi_restore 4*8+\offset, rbp + movq_cfi_restore 5*8+\offset, rbx .endm -#define ARG_SKIP (9*8) + .macro ZERO_EXTRA_REGS + xorl %r15d, %r15d + xorl %r14d, %r14d + xorl %r13d, %r13d + xorl %r12d, %r12d + xorl %ebp, %ebp + xorl %ebx, %ebx + .endm - .macro RESTORE_ARGS rstor_rax=1, addskip=0, rstor_rcx=1, rstor_r11=1, \ - rstor_r8910=1, rstor_rdx=1 + .macro RESTORE_C_REGS_HELPER rstor_rax=1, rstor_rcx=1, rstor_r11=1, rstor_r8910=1, rstor_rdx=1 .if \rstor_r11 - movq_cfi_restore 0*8, r11 + movq_cfi_restore 6*8, r11 .endif - .if \rstor_r8910 - movq_cfi_restore 1*8, r10 - movq_cfi_restore 2*8, r9 - movq_cfi_restore 3*8, r8 + movq_cfi_restore 7*8, r10 + movq_cfi_restore 8*8, r9 + movq_cfi_restore 9*8, r8 .endif - .if \rstor_rax - movq_cfi_restore 4*8, rax + movq_cfi_restore 10*8, rax .endif - .if \rstor_rcx - movq_cfi_restore 5*8, rcx + movq_cfi_restore 11*8, rcx .endif - .if \rstor_rdx - movq_cfi_restore 6*8, rdx - .endif - - movq_cfi_restore 7*8, rsi - movq_cfi_restore 8*8, rdi - - .if ARG_SKIP+\addskip > 0 - addq $ARG_SKIP+\addskip, %rsp - CFI_ADJUST_CFA_OFFSET -(ARG_SKIP+\addskip) + movq_cfi_restore 12*8, rdx .endif + movq_cfi_restore 13*8, rsi + movq_cfi_restore 14*8, rdi .endm - - .macro LOAD_ARGS offset, skiprax=0 - movq \offset(%rsp), %r11 - movq \offset+8(%rsp), %r10 - movq \offset+16(%rsp), %r9 - movq \offset+24(%rsp), %r8 - movq \offset+40(%rsp), %rcx - movq \offset+48(%rsp), %rdx - movq \offset+56(%rsp), %rsi - movq \offset+64(%rsp), %rdi - .if \skiprax - .else - movq \offset+72(%rsp), %rax - .endif + .macro RESTORE_C_REGS + RESTORE_C_REGS_HELPER 1,1,1,1,1 .endm - -#define REST_SKIP (6*8) - - .macro SAVE_REST - subq $REST_SKIP, %rsp - CFI_ADJUST_CFA_OFFSET REST_SKIP - movq_cfi rbx, 5*8 - movq_cfi rbp, 4*8 - movq_cfi r12, 3*8 - movq_cfi r13, 2*8 - movq_cfi r14, 1*8 - movq_cfi r15, 0*8 + .macro RESTORE_C_REGS_EXCEPT_RAX + RESTORE_C_REGS_HELPER 0,1,1,1,1 .endm - - .macro RESTORE_REST - movq_cfi_restore 0*8, r15 - movq_cfi_restore 1*8, r14 - movq_cfi_restore 2*8, r13 - movq_cfi_restore 3*8, r12 - movq_cfi_restore 4*8, rbp - movq_cfi_restore 5*8, rbx - addq $REST_SKIP, %rsp - CFI_ADJUST_CFA_OFFSET -(REST_SKIP) + .macro RESTORE_C_REGS_EXCEPT_RCX + RESTORE_C_REGS_HELPER 1,0,1,1,1 .endm - - .macro SAVE_ALL - SAVE_ARGS - SAVE_REST + .macro RESTORE_C_REGS_EXCEPT_R11 + RESTORE_C_REGS_HELPER 1,1,0,1,1 + .endm + .macro RESTORE_C_REGS_EXCEPT_RCX_R11 + RESTORE_C_REGS_HELPER 1,0,0,1,1 + .endm + .macro RESTORE_RSI_RDI + RESTORE_C_REGS_HELPER 0,0,0,0,0 + .endm + .macro RESTORE_RSI_RDI_RDX + RESTORE_C_REGS_HELPER 0,0,0,0,1 .endm - .macro RESTORE_ALL addskip=0 - RESTORE_REST - RESTORE_ARGS 1, \addskip + .macro REMOVE_PT_GPREGS_FROM_STACK addskip=0 + addq $15*8+\addskip, %rsp + CFI_ADJUST_CFA_OFFSET -(15*8+\addskip) .endm .macro icebp @@ -210,37 +224,23 @@ For 32-bit we have the following conventions - kernel is built with */ .macro SAVE_ALL - pushl_cfi %eax - CFI_REL_OFFSET eax, 0 - pushl_cfi %ebp - CFI_REL_OFFSET ebp, 0 - pushl_cfi %edi - CFI_REL_OFFSET edi, 0 - pushl_cfi %esi - CFI_REL_OFFSET esi, 0 - pushl_cfi %edx - CFI_REL_OFFSET edx, 0 - pushl_cfi %ecx - CFI_REL_OFFSET ecx, 0 - pushl_cfi %ebx - CFI_REL_OFFSET ebx, 0 + pushl_cfi_reg eax + pushl_cfi_reg ebp + pushl_cfi_reg edi + pushl_cfi_reg esi + pushl_cfi_reg edx + pushl_cfi_reg ecx + pushl_cfi_reg ebx .endm .macro RESTORE_ALL - popl_cfi %ebx - CFI_RESTORE ebx - popl_cfi %ecx - CFI_RESTORE ecx - popl_cfi %edx - CFI_RESTORE edx - popl_cfi %esi - CFI_RESTORE esi - popl_cfi %edi - CFI_RESTORE edi - popl_cfi %ebp - CFI_RESTORE ebp - popl_cfi %eax - CFI_RESTORE eax + popl_cfi_reg ebx + popl_cfi_reg ecx + popl_cfi_reg edx + popl_cfi_reg esi + popl_cfi_reg edi + popl_cfi_reg ebp + popl_cfi_reg eax .endm #endif /* CONFIG_X86_64 */ diff --git a/arch/x86/include/asm/compat.h b/arch/x86/include/asm/compat.h index 59c6c401f79f..acdee09228b3 100644 --- a/arch/x86/include/asm/compat.h +++ b/arch/x86/include/asm/compat.h @@ -301,7 +301,7 @@ static inline void __user *arch_compat_alloc_user_space(long len) sp = task_pt_regs(current)->sp; } else { /* -128 for the x32 ABI redzone */ - sp = this_cpu_read(old_rsp) - 128; + sp = task_pt_regs(current)->sp - 128; } return (void __user *)round_down(sp - len, 16); diff --git a/arch/x86/include/asm/cpufeature.h b/arch/x86/include/asm/cpufeature.h index 90a54851aedc..854c04b3c9c2 100644 --- a/arch/x86/include/asm/cpufeature.h +++ b/arch/x86/include/asm/cpufeature.h @@ -231,7 +231,9 @@ #define X86_FEATURE_RDSEED ( 9*32+18) /* The RDSEED instruction */ #define X86_FEATURE_ADX ( 9*32+19) /* The ADCX and ADOX instructions */ #define X86_FEATURE_SMAP ( 9*32+20) /* Supervisor Mode Access Prevention */ +#define X86_FEATURE_PCOMMIT ( 9*32+22) /* PCOMMIT instruction */ #define X86_FEATURE_CLFLUSHOPT ( 9*32+23) /* CLFLUSHOPT instruction */ +#define X86_FEATURE_CLWB ( 9*32+24) /* CLWB instruction */ #define X86_FEATURE_AVX512PF ( 9*32+26) /* AVX-512 Prefetch */ #define X86_FEATURE_AVX512ER ( 9*32+27) /* AVX-512 Exponential and Reciprocal */ #define X86_FEATURE_AVX512CD ( 9*32+28) /* AVX-512 Conflict Detection */ @@ -418,6 +420,7 @@ static __always_inline __pure bool __static_cpu_has(u16 bit) " .word %P0\n" /* 1: do replace */ " .byte 2b - 1b\n" /* source len */ " .byte 0\n" /* replacement len */ + " .byte 0\n" /* pad len */ ".previous\n" /* skipping size check since replacement size = 0 */ : : "i" (X86_FEATURE_ALWAYS) : : t_warn); @@ -432,6 +435,7 @@ static __always_inline __pure bool __static_cpu_has(u16 bit) " .word %P0\n" /* feature bit */ " .byte 2b - 1b\n" /* source len */ " .byte 0\n" /* replacement len */ + " .byte 0\n" /* pad len */ ".previous\n" /* skipping size check since replacement size = 0 */ : : "i" (bit) : : t_no); @@ -457,6 +461,7 @@ static __always_inline __pure bool __static_cpu_has(u16 bit) " .word %P1\n" /* feature bit */ " .byte 2b - 1b\n" /* source len */ " .byte 4f - 3f\n" /* replacement len */ + " .byte 0\n" /* pad len */ ".previous\n" ".section .discard,\"aw\",@progbits\n" " .byte 0xff + (4f-3f) - (2b-1b)\n" /* size check */ @@ -483,31 +488,30 @@ static __always_inline __pure bool __static_cpu_has(u16 bit) static __always_inline __pure bool _static_cpu_has_safe(u16 bit) { #ifdef CC_HAVE_ASM_GOTO -/* - * We need to spell the jumps to the compiler because, depending on the offset, - * the replacement jump can be bigger than the original jump, and this we cannot - * have. Thus, we force the jump to the widest, 4-byte, signed relative - * offset even though the last would often fit in less bytes. - */ - asm_volatile_goto("1: .byte 0xe9\n .long %l[t_dynamic] - 2f\n" + asm_volatile_goto("1: jmp %l[t_dynamic]\n" "2:\n" + ".skip -(((5f-4f) - (2b-1b)) > 0) * " + "((5f-4f) - (2b-1b)),0x90\n" + "3:\n" ".section .altinstructions,\"a\"\n" " .long 1b - .\n" /* src offset */ - " .long 3f - .\n" /* repl offset */ + " .long 4f - .\n" /* repl offset */ " .word %P1\n" /* always replace */ - " .byte 2b - 1b\n" /* src len */ - " .byte 4f - 3f\n" /* repl len */ + " .byte 3b - 1b\n" /* src len */ + " .byte 5f - 4f\n" /* repl len */ + " .byte 3b - 2b\n" /* pad len */ ".previous\n" ".section .altinstr_replacement,\"ax\"\n" - "3: .byte 0xe9\n .long %l[t_no] - 2b\n" - "4:\n" + "4: jmp %l[t_no]\n" + "5:\n" ".previous\n" ".section .altinstructions,\"a\"\n" " .long 1b - .\n" /* src offset */ " .long 0\n" /* no replacement */ " .word %P0\n" /* feature bit */ - " .byte 2b - 1b\n" /* src len */ + " .byte 3b - 1b\n" /* src len */ " .byte 0\n" /* repl len */ + " .byte 0\n" /* pad len */ ".previous\n" : : "i" (bit), "i" (X86_FEATURE_ALWAYS) : : t_dynamic, t_no); @@ -527,6 +531,7 @@ static __always_inline __pure bool _static_cpu_has_safe(u16 bit) " .word %P2\n" /* always replace */ " .byte 2b - 1b\n" /* source len */ " .byte 4f - 3f\n" /* replacement len */ + " .byte 0\n" /* pad len */ ".previous\n" ".section .discard,\"aw\",@progbits\n" " .byte 0xff + (4f-3f) - (2b-1b)\n" /* size check */ @@ -541,6 +546,7 @@ static __always_inline __pure bool _static_cpu_has_safe(u16 bit) " .word %P1\n" /* feature bit */ " .byte 4b - 3b\n" /* src len */ " .byte 6f - 5f\n" /* repl len */ + " .byte 0\n" /* pad len */ ".previous\n" ".section .discard,\"aw\",@progbits\n" " .byte 0xff + (6f-5f) - (4b-3b)\n" /* size check */ diff --git a/arch/x86/include/asm/desc.h b/arch/x86/include/asm/desc.h index a94b82e8f156..a0bf89fd2647 100644 --- a/arch/x86/include/asm/desc.h +++ b/arch/x86/include/asm/desc.h @@ -376,11 +376,16 @@ static inline void _set_gate(int gate, unsigned type, void *addr, * Pentium F0 0F bugfix can have resulted in the mapped * IDT being write-protected. */ -#define set_intr_gate(n, addr) \ +#define set_intr_gate_notrace(n, addr) \ do { \ BUG_ON((unsigned)n > 0xFF); \ _set_gate(n, GATE_INTERRUPT, (void *)addr, 0, 0, \ __KERNEL_CS); \ + } while (0) + +#define set_intr_gate(n, addr) \ + do { \ + set_intr_gate_notrace(n, addr); \ _trace_set_gate(n, GATE_INTERRUPT, (void *)trace_##addr,\ 0, 0, __KERNEL_CS); \ } while (0) diff --git a/arch/x86/include/asm/dwarf2.h b/arch/x86/include/asm/dwarf2.h index f6f15986df6c..de1cdaf4d743 100644 --- a/arch/x86/include/asm/dwarf2.h +++ b/arch/x86/include/asm/dwarf2.h @@ -86,11 +86,23 @@ CFI_ADJUST_CFA_OFFSET 8 .endm + .macro pushq_cfi_reg reg + pushq %\reg + CFI_ADJUST_CFA_OFFSET 8 + CFI_REL_OFFSET \reg, 0 + .endm + .macro popq_cfi reg popq \reg CFI_ADJUST_CFA_OFFSET -8 .endm + .macro popq_cfi_reg reg + popq %\reg + CFI_ADJUST_CFA_OFFSET -8 + CFI_RESTORE \reg + .endm + .macro pushfq_cfi pushfq CFI_ADJUST_CFA_OFFSET 8 @@ -116,11 +128,23 @@ CFI_ADJUST_CFA_OFFSET 4 .endm + .macro pushl_cfi_reg reg + pushl %\reg + CFI_ADJUST_CFA_OFFSET 4 + CFI_REL_OFFSET \reg, 0 + .endm + .macro popl_cfi reg popl \reg CFI_ADJUST_CFA_OFFSET -4 .endm + .macro popl_cfi_reg reg + popl %\reg + CFI_ADJUST_CFA_OFFSET -4 + CFI_RESTORE \reg + .endm + .macro pushfl_cfi pushfl CFI_ADJUST_CFA_OFFSET 4 diff --git a/arch/x86/include/asm/efi.h b/arch/x86/include/asm/efi.h index 25bce45c6fc4..3738b138b843 100644 --- a/arch/x86/include/asm/efi.h +++ b/arch/x86/include/asm/efi.h @@ -2,6 +2,8 @@ #define _ASM_X86_EFI_H #include <asm/i387.h> +#include <asm/pgtable.h> + /* * We map the EFI regions needed for runtime services non-contiguously, * with preserved alignment on virtual addresses starting from -4G down @@ -89,8 +91,8 @@ extern void __iomem *__init efi_ioremap(unsigned long addr, unsigned long size, extern struct efi_scratch efi_scratch; extern void __init efi_set_executable(efi_memory_desc_t *md, bool executable); extern int __init efi_memblock_x86_reserve_range(void); -extern void __init efi_call_phys_prolog(void); -extern void __init efi_call_phys_epilog(void); +extern pgd_t * __init efi_call_phys_prolog(void); +extern void __init efi_call_phys_epilog(pgd_t *save_pgd); extern void __init efi_unmap_memmap(void); extern void __init efi_memory_uc(u64 addr, unsigned long size); extern void __init efi_map_region(efi_memory_desc_t *md); diff --git a/arch/x86/include/asm/elf.h b/arch/x86/include/asm/elf.h index ca3347a9dab5..3563107b5060 100644 --- a/arch/x86/include/asm/elf.h +++ b/arch/x86/include/asm/elf.h @@ -171,10 +171,11 @@ do { \ static inline void elf_common_init(struct thread_struct *t, struct pt_regs *regs, const u16 ds) { - regs->ax = regs->bx = regs->cx = regs->dx = 0; - regs->si = regs->di = regs->bp = 0; + /* Commented-out registers are cleared in stub_execve */ + /*regs->ax = regs->bx =*/ regs->cx = regs->dx = 0; + regs->si = regs->di /*= regs->bp*/ = 0; regs->r8 = regs->r9 = regs->r10 = regs->r11 = 0; - regs->r12 = regs->r13 = regs->r14 = regs->r15 = 0; + /*regs->r12 = regs->r13 = regs->r14 = regs->r15 = 0;*/ t->fs = t->gs = 0; t->fsindex = t->gsindex = 0; t->ds = t->es = ds; diff --git a/arch/x86/include/asm/fpu-internal.h b/arch/x86/include/asm/fpu-internal.h index 0dbc08282291..72ba21a8b5fc 100644 --- a/arch/x86/include/asm/fpu-internal.h +++ b/arch/x86/include/asm/fpu-internal.h @@ -370,7 +370,7 @@ static inline void drop_fpu(struct task_struct *tsk) preempt_disable(); tsk->thread.fpu_counter = 0; __drop_fpu(tsk); - clear_used_math(); + clear_stopped_child_used_math(tsk); preempt_enable(); } diff --git a/arch/x86/include/asm/hw_irq.h b/arch/x86/include/asm/hw_irq.h index 9662290e0b20..e9571ddabc4f 100644 --- a/arch/x86/include/asm/hw_irq.h +++ b/arch/x86/include/asm/hw_irq.h @@ -181,10 +181,9 @@ extern __visible void smp_call_function_single_interrupt(struct pt_regs *); extern __visible void smp_invalidate_interrupt(struct pt_regs *); #endif -extern void (*__initconst interrupt[FIRST_SYSTEM_VECTOR - - FIRST_EXTERNAL_VECTOR])(void); +extern char irq_entries_start[]; #ifdef CONFIG_TRACING -#define trace_interrupt interrupt +#define trace_irq_entries_start irq_entries_start #endif #define VECTOR_UNDEFINED (-1) diff --git a/arch/x86/include/asm/insn.h b/arch/x86/include/asm/insn.h index 47f29b1d1846..e7814b74caf8 100644 --- a/arch/x86/include/asm/insn.h +++ b/arch/x86/include/asm/insn.h @@ -69,7 +69,7 @@ struct insn { const insn_byte_t *next_byte; }; -#define MAX_INSN_SIZE 16 +#define MAX_INSN_SIZE 15 #define X86_MODRM_MOD(modrm) (((modrm) & 0xc0) >> 6) #define X86_MODRM_REG(modrm) (((modrm) & 0x38) >> 3) diff --git a/arch/x86/include/asm/iommu_table.h b/arch/x86/include/asm/iommu_table.h index f42a04735a0a..e37d6b3ad983 100644 --- a/arch/x86/include/asm/iommu_table.h +++ b/arch/x86/include/asm/iommu_table.h @@ -79,11 +79,12 @@ struct iommu_table_entry { * d). Similar to the 'init', except that this gets called from pci_iommu_init * where we do have a memory allocator. * - * The standard vs the _FINISH differs in that the _FINISH variant will - * continue detecting other IOMMUs in the call list after the - * the detection routine returns a positive number. The _FINISH will - * stop the execution chain. Both will still call the 'init' and - * 'late_init' functions if they are set. + * The standard IOMMU_INIT differs from the IOMMU_INIT_FINISH variant + * in that the former will continue detecting other IOMMUs in the call + * list after the detection routine returns a positive number, while the + * latter will stop the execution chain upon first successful detection. + * Both variants will still call the 'init' and 'late_init' functions if + * they are set. */ #define IOMMU_INIT_FINISH(_detect, _depend, _init, _late_init) \ __IOMMU_INIT(_detect, _depend, _init, _late_init, 1) diff --git a/arch/x86/include/asm/irqflags.h b/arch/x86/include/asm/irqflags.h index 0a8b519226b8..b77f5edb03b0 100644 --- a/arch/x86/include/asm/irqflags.h +++ b/arch/x86/include/asm/irqflags.h @@ -136,10 +136,6 @@ static inline notrace unsigned long arch_local_irq_save(void) #define USERGS_SYSRET32 \ swapgs; \ sysretl -#define ENABLE_INTERRUPTS_SYSEXIT32 \ - swapgs; \ - sti; \ - sysexit #else #define INTERRUPT_RETURN iret @@ -163,22 +159,27 @@ static inline int arch_irqs_disabled(void) return arch_irqs_disabled_flags(flags); } +#endif /* !__ASSEMBLY__ */ +#ifdef __ASSEMBLY__ +#ifdef CONFIG_TRACE_IRQFLAGS +# define TRACE_IRQS_ON call trace_hardirqs_on_thunk; +# define TRACE_IRQS_OFF call trace_hardirqs_off_thunk; #else - -#ifdef CONFIG_X86_64 -#define ARCH_LOCKDEP_SYS_EXIT call lockdep_sys_exit_thunk -#define ARCH_LOCKDEP_SYS_EXIT_IRQ \ +# define TRACE_IRQS_ON +# define TRACE_IRQS_OFF +#endif +#ifdef CONFIG_DEBUG_LOCK_ALLOC +# ifdef CONFIG_X86_64 +# define LOCKDEP_SYS_EXIT call lockdep_sys_exit_thunk +# define LOCKDEP_SYS_EXIT_IRQ \ TRACE_IRQS_ON; \ sti; \ - SAVE_REST; \ - LOCKDEP_SYS_EXIT; \ - RESTORE_REST; \ + call lockdep_sys_exit_thunk; \ cli; \ TRACE_IRQS_OFF; - -#else -#define ARCH_LOCKDEP_SYS_EXIT \ +# else +# define LOCKDEP_SYS_EXIT \ pushl %eax; \ pushl %ecx; \ pushl %edx; \ @@ -186,24 +187,12 @@ static inline int arch_irqs_disabled(void) popl %edx; \ popl %ecx; \ popl %eax; - -#define ARCH_LOCKDEP_SYS_EXIT_IRQ -#endif - -#ifdef CONFIG_TRACE_IRQFLAGS -# define TRACE_IRQS_ON call trace_hardirqs_on_thunk; -# define TRACE_IRQS_OFF call trace_hardirqs_off_thunk; +# define LOCKDEP_SYS_EXIT_IRQ +# endif #else -# define TRACE_IRQS_ON -# define TRACE_IRQS_OFF -#endif -#ifdef CONFIG_DEBUG_LOCK_ALLOC -# define LOCKDEP_SYS_EXIT ARCH_LOCKDEP_SYS_EXIT -# define LOCKDEP_SYS_EXIT_IRQ ARCH_LOCKDEP_SYS_EXIT_IRQ -# else # define LOCKDEP_SYS_EXIT # define LOCKDEP_SYS_EXIT_IRQ -# endif - +#endif #endif /* __ASSEMBLY__ */ + #endif diff --git a/arch/x86/include/asm/jump_label.h b/arch/x86/include/asm/jump_label.h index 6a2cefb4395a..a4c1cf7e93f8 100644 --- a/arch/x86/include/asm/jump_label.h +++ b/arch/x86/include/asm/jump_label.h @@ -1,7 +1,7 @@ #ifndef _ASM_X86_JUMP_LABEL_H #define _ASM_X86_JUMP_LABEL_H -#ifdef __KERNEL__ +#ifndef __ASSEMBLY__ #include <linux/stringify.h> #include <linux/types.h> @@ -30,8 +30,6 @@ l_yes: return true; } -#endif /* __KERNEL__ */ - #ifdef CONFIG_X86_64 typedef u64 jump_label_t; #else @@ -44,4 +42,5 @@ struct jump_entry { jump_label_t key; }; +#endif /* __ASSEMBLY__ */ #endif diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h index a236e39cc385..dea2e7e962e3 100644 --- a/arch/x86/include/asm/kvm_host.h +++ b/arch/x86/include/asm/kvm_host.h @@ -81,11 +81,6 @@ static inline gfn_t gfn_to_index(gfn_t gfn, gfn_t base_gfn, int level) (base_gfn >> KVM_HPAGE_GFN_SHIFT(level)); } -#define SELECTOR_TI_MASK (1 << 2) -#define SELECTOR_RPL_MASK 0x03 - -#define IOPL_SHIFT 12 - #define KVM_PERMILLE_MMU_PAGES 20 #define KVM_MIN_ALLOC_MMU_PAGES 64 #define KVM_MMU_HASH_SHIFT 10 @@ -345,6 +340,7 @@ struct kvm_pmu { enum { KVM_DEBUGREG_BP_ENABLED = 1, KVM_DEBUGREG_WONT_EXIT = 2, + KVM_DEBUGREG_RELOAD = 4, }; struct kvm_vcpu_arch { @@ -431,6 +427,9 @@ struct kvm_vcpu_arch { int cpuid_nent; struct kvm_cpuid_entry2 cpuid_entries[KVM_MAX_CPUID_ENTRIES]; + + int maxphyaddr; + /* emulate context */ struct x86_emulate_ctxt emulate_ctxt; @@ -550,11 +549,20 @@ struct kvm_arch_memory_slot { struct kvm_lpage_info *lpage_info[KVM_NR_PAGE_SIZES - 1]; }; +/* + * We use as the mode the number of bits allocated in the LDR for the + * logical processor ID. It happens that these are all powers of two. + * This makes it is very easy to detect cases where the APICs are + * configured for multiple modes; in that case, we cannot use the map and + * hence cannot use kvm_irq_delivery_to_apic_fast either. + */ +#define KVM_APIC_MODE_XAPIC_CLUSTER 4 +#define KVM_APIC_MODE_XAPIC_FLAT 8 +#define KVM_APIC_MODE_X2APIC 16 + struct kvm_apic_map { struct rcu_head rcu; - u8 ldr_bits; - /* fields bellow are used to decode ldr values in different modes */ - u32 cid_shift, cid_mask, lid_mask, broadcast; + u8 mode; struct kvm_lapic *phys_map[256]; /* first index is cluster id second is cpu id in a cluster */ struct kvm_lapic *logical_map[16][16]; @@ -859,6 +867,8 @@ void kvm_mmu_set_mask_ptes(u64 user_mask, u64 accessed_mask, void kvm_mmu_reset_context(struct kvm_vcpu *vcpu); void kvm_mmu_slot_remove_write_access(struct kvm *kvm, struct kvm_memory_slot *memslot); +void kvm_mmu_zap_collapsible_sptes(struct kvm *kvm, + struct kvm_memory_slot *memslot); void kvm_mmu_slot_leaf_clear_dirty(struct kvm *kvm, struct kvm_memory_slot *memslot); void kvm_mmu_slot_largepage_remove_write_access(struct kvm *kvm, @@ -933,6 +943,7 @@ struct x86_emulate_ctxt; int kvm_fast_pio_out(struct kvm_vcpu *vcpu, int size, unsigned short port); void kvm_emulate_cpuid(struct kvm_vcpu *vcpu); int kvm_emulate_halt(struct kvm_vcpu *vcpu); +int kvm_vcpu_halt(struct kvm_vcpu *vcpu); int kvm_emulate_wbinvd(struct kvm_vcpu *vcpu); void kvm_get_segment(struct kvm_vcpu *vcpu, struct kvm_segment *var, int seg); @@ -1128,7 +1139,6 @@ int kvm_unmap_hva_range(struct kvm *kvm, unsigned long start, unsigned long end) int kvm_age_hva(struct kvm *kvm, unsigned long start, unsigned long end); int kvm_test_age_hva(struct kvm *kvm, unsigned long hva); void kvm_set_spte_hva(struct kvm *kvm, unsigned long hva, pte_t pte); -int cpuid_maxphyaddr(struct kvm_vcpu *vcpu); int kvm_cpu_has_injectable_intr(struct kvm_vcpu *v); int kvm_cpu_has_interrupt(struct kvm_vcpu *vcpu); int kvm_arch_interrupt_allowed(struct kvm_vcpu *vcpu); diff --git a/arch/x86/include/asm/kvm_para.h b/arch/x86/include/asm/kvm_para.h index e62cf897f781..c1adf33fdd0d 100644 --- a/arch/x86/include/asm/kvm_para.h +++ b/arch/x86/include/asm/kvm_para.h @@ -115,7 +115,7 @@ static inline void kvm_spinlock_init(void) static inline bool kvm_para_available(void) { - return 0; + return false; } static inline unsigned int kvm_arch_para_features(void) diff --git a/arch/x86/include/asm/mwait.h b/arch/x86/include/asm/mwait.h index a1410db38a1a..653dfa7662e1 100644 --- a/arch/x86/include/asm/mwait.h +++ b/arch/x86/include/asm/mwait.h @@ -30,6 +30,14 @@ static inline void __mwait(unsigned long eax, unsigned long ecx) :: "a" (eax), "c" (ecx)); } +static inline void __sti_mwait(unsigned long eax, unsigned long ecx) +{ + trace_hardirqs_on(); + /* "mwait %eax, %ecx;" */ + asm volatile("sti; .byte 0x0f, 0x01, 0xc9;" + :: "a" (eax), "c" (ecx)); +} + /* * This uses new MONITOR/MWAIT instructions on P4 processors with PNI, * which can obviate IPI to trigger checking of need_resched. diff --git a/arch/x86/include/asm/page_types.h b/arch/x86/include/asm/page_types.h index 95e11f79f123..f97fbe3abb67 100644 --- a/arch/x86/include/asm/page_types.h +++ b/arch/x86/include/asm/page_types.h @@ -51,8 +51,6 @@ extern int devmem_is_allowed(unsigned long pagenr); extern unsigned long max_low_pfn_mapped; extern unsigned long max_pfn_mapped; -extern bool kaslr_enabled; - static inline phys_addr_t get_max_mapped(void) { return (phys_addr_t)max_pfn_mapped << PAGE_SHIFT; diff --git a/arch/x86/include/asm/paravirt.h b/arch/x86/include/asm/paravirt.h index 965c47d254aa..5f6051d5d139 100644 --- a/arch/x86/include/asm/paravirt.h +++ b/arch/x86/include/asm/paravirt.h @@ -976,11 +976,6 @@ extern void default_banner(void); PARA_SITE(PARA_PATCH(pv_cpu_ops, PV_CPU_usergs_sysret64), \ CLBR_NONE, \ jmp PARA_INDIRECT(pv_cpu_ops+PV_CPU_usergs_sysret64)) - -#define ENABLE_INTERRUPTS_SYSEXIT32 \ - PARA_SITE(PARA_PATCH(pv_cpu_ops, PV_CPU_irq_enable_sysexit), \ - CLBR_NONE, \ - jmp PARA_INDIRECT(pv_cpu_ops+PV_CPU_irq_enable_sysexit)) #endif /* CONFIG_X86_32 */ #endif /* __ASSEMBLY__ */ diff --git a/arch/x86/include/asm/pci_x86.h b/arch/x86/include/asm/pci_x86.h index fa1195dae425..164e3f8d3c3d 100644 --- a/arch/x86/include/asm/pci_x86.h +++ b/arch/x86/include/asm/pci_x86.h @@ -93,6 +93,8 @@ extern raw_spinlock_t pci_config_lock; extern int (*pcibios_enable_irq)(struct pci_dev *dev); extern void (*pcibios_disable_irq)(struct pci_dev *dev); +extern bool mp_should_keep_irq(struct device *dev); + struct pci_raw_ops { int (*read)(unsigned int domain, unsigned int bus, unsigned int devfn, int reg, int len, u32 *val); diff --git a/arch/x86/include/asm/processor.h b/arch/x86/include/asm/processor.h index ec1c93588cef..d2203b5d9538 100644 --- a/arch/x86/include/asm/processor.h +++ b/arch/x86/include/asm/processor.h @@ -210,8 +210,23 @@ struct x86_hw_tss { unsigned long sp0; unsigned short ss0, __ss0h; unsigned long sp1; - /* ss1 caches MSR_IA32_SYSENTER_CS: */ - unsigned short ss1, __ss1h; + + /* + * We don't use ring 1, so ss1 is a convenient scratch space in + * the same cacheline as sp0. We use ss1 to cache the value in + * MSR_IA32_SYSENTER_CS. When we context switch + * MSR_IA32_SYSENTER_CS, we first check if the new value being + * written matches ss1, and, if it's not, then we wrmsr the new + * value and update ss1. + * + * The only reason we context switch MSR_IA32_SYSENTER_CS is + * that we set it to zero in vm86 tasks to avoid corrupting the + * stack if we were to go through the sysenter path from vm86 + * mode. + */ + unsigned short ss1; /* MSR_IA32_SYSENTER_CS */ + + unsigned short __ss1h; unsigned long sp2; unsigned short ss2, __ss2h; unsigned long __cr3; @@ -276,13 +291,17 @@ struct tss_struct { unsigned long io_bitmap[IO_BITMAP_LONGS + 1]; /* - * .. and then another 0x100 bytes for the emergency kernel stack: + * Space for the temporary SYSENTER stack: */ - unsigned long stack[64]; + unsigned long SYSENTER_stack[64]; } ____cacheline_aligned; -DECLARE_PER_CPU_SHARED_ALIGNED(struct tss_struct, init_tss); +DECLARE_PER_CPU_SHARED_ALIGNED(struct tss_struct, cpu_tss); + +#ifdef CONFIG_X86_32 +DECLARE_PER_CPU(unsigned long, cpu_current_top_of_stack); +#endif /* * Save the original ist values for checking stack pointers during debugging @@ -474,7 +493,6 @@ struct thread_struct { #ifdef CONFIG_X86_32 unsigned long sysenter_cs; #else - unsigned long usersp; /* Copy from PDA */ unsigned short es; unsigned short ds; unsigned short fsindex; @@ -564,6 +582,16 @@ static inline void native_swapgs(void) #endif } +static inline unsigned long current_top_of_stack(void) +{ +#ifdef CONFIG_X86_64 + return this_cpu_read_stable(cpu_tss.x86_tss.sp0); +#else + /* sp0 on x86_32 is special in and around vm86 mode. */ + return this_cpu_read_stable(cpu_current_top_of_stack); +#endif +} + #ifdef CONFIG_PARAVIRT #include <asm/paravirt.h> #else @@ -761,10 +789,10 @@ extern char ignore_fpu_irq; #define ARCH_HAS_SPINLOCK_PREFETCH #ifdef CONFIG_X86_32 -# define BASE_PREFETCH ASM_NOP4 +# define BASE_PREFETCH "" # define ARCH_HAS_PREFETCH #else -# define BASE_PREFETCH "prefetcht0 (%1)" +# define BASE_PREFETCH "prefetcht0 %P1" #endif /* @@ -775,10 +803,9 @@ extern char ignore_fpu_irq; */ static inline void prefetch(const void *x) { - alternative_input(BASE_PREFETCH, - "prefetchnta (%1)", + alternative_input(BASE_PREFETCH, "prefetchnta %P1", X86_FEATURE_XMM, - "r" (x)); + "m" (*(const char *)x)); } /* @@ -788,10 +815,9 @@ static inline void prefetch(const void *x) */ static inline void prefetchw(const void *x) { - alternative_input(BASE_PREFETCH, - "prefetchw (%1)", - X86_FEATURE_3DNOW, - "r" (x)); + alternative_input(BASE_PREFETCH, "prefetchw %P1", + X86_FEATURE_3DNOWPREFETCH, + "m" (*(const char *)x)); } static inline void spin_lock_prefetch(const void *x) @@ -799,6 +825,9 @@ static inline void spin_lock_prefetch(const void *x) prefetchw(x); } +#define TOP_OF_INIT_STACK ((unsigned long)&init_stack + sizeof(init_stack) - \ + TOP_OF_KERNEL_STACK_PADDING) + #ifdef CONFIG_X86_32 /* * User space process size: 3GB (default). @@ -809,39 +838,16 @@ static inline void spin_lock_prefetch(const void *x) #define STACK_TOP_MAX STACK_TOP #define INIT_THREAD { \ - .sp0 = sizeof(init_stack) + (long)&init_stack, \ + .sp0 = TOP_OF_INIT_STACK, \ .vm86_info = NULL, \ .sysenter_cs = __KERNEL_CS, \ .io_bitmap_ptr = NULL, \ } -/* - * Note that the .io_bitmap member must be extra-big. This is because - * the CPU will access an additional byte beyond the end of the IO - * permission bitmap. The extra byte must be all 1 bits, and must - * be within the limit. - */ -#define INIT_TSS { \ - .x86_tss = { \ - .sp0 = sizeof(init_stack) + (long)&init_stack, \ - .ss0 = __KERNEL_DS, \ - .ss1 = __KERNEL_CS, \ - .io_bitmap_base = INVALID_IO_BITMAP_OFFSET, \ - }, \ - .io_bitmap = { [0 ... IO_BITMAP_LONGS] = ~0 }, \ -} - extern unsigned long thread_saved_pc(struct task_struct *tsk); -#define THREAD_SIZE_LONGS (THREAD_SIZE/sizeof(unsigned long)) -#define KSTK_TOP(info) \ -({ \ - unsigned long *__ptr = (unsigned long *)(info); \ - (unsigned long)(&__ptr[THREAD_SIZE_LONGS]); \ -}) - /* - * The below -8 is to reserve 8 bytes on top of the ring0 stack. + * TOP_OF_KERNEL_STACK_PADDING reserves 8 bytes on top of the ring0 stack. * This is necessary to guarantee that the entire "struct pt_regs" * is accessible even if the CPU haven't stored the SS/ESP registers * on the stack (interrupt gate does not save these registers @@ -850,11 +856,11 @@ extern unsigned long thread_saved_pc(struct task_struct *tsk); * "struct pt_regs" is possible, but they may contain the * completely wrong values. */ -#define task_pt_regs(task) \ -({ \ - struct pt_regs *__regs__; \ - __regs__ = (struct pt_regs *)(KSTK_TOP(task_stack_page(task))-8); \ - __regs__ - 1; \ +#define task_pt_regs(task) \ +({ \ + unsigned long __ptr = (unsigned long)task_stack_page(task); \ + __ptr += THREAD_SIZE - TOP_OF_KERNEL_STACK_PADDING; \ + ((struct pt_regs *)__ptr) - 1; \ }) #define KSTK_ESP(task) (task_pt_regs(task)->sp) @@ -886,11 +892,7 @@ extern unsigned long thread_saved_pc(struct task_struct *tsk); #define STACK_TOP_MAX TASK_SIZE_MAX #define INIT_THREAD { \ - .sp0 = (unsigned long)&init_stack + sizeof(init_stack) \ -} - -#define INIT_TSS { \ - .x86_tss.sp0 = (unsigned long)&init_stack + sizeof(init_stack) \ + .sp0 = TOP_OF_INIT_STACK \ } /* @@ -902,11 +904,6 @@ extern unsigned long thread_saved_pc(struct task_struct *tsk); #define task_pt_regs(tsk) ((struct pt_regs *)(tsk)->thread.sp0 - 1) extern unsigned long KSTK_ESP(struct task_struct *task); -/* - * User space RSP while inside the SYSCALL fast path - */ -DECLARE_PER_CPU(unsigned long, old_rsp); - #endif /* CONFIG_X86_64 */ extern void start_thread(struct pt_regs *regs, unsigned long new_ip, diff --git a/arch/x86/include/asm/ptrace.h b/arch/x86/include/asm/ptrace.h index 86fc2bb82287..19507ffa5d28 100644 --- a/arch/x86/include/asm/ptrace.h +++ b/arch/x86/include/asm/ptrace.h @@ -31,13 +31,17 @@ struct pt_regs { #else /* __i386__ */ struct pt_regs { +/* + * C ABI says these regs are callee-preserved. They aren't saved on kernel entry + * unless syscall needs a complete, fully filled "struct pt_regs". + */ unsigned long r15; unsigned long r14; unsigned long r13; unsigned long r12; unsigned long bp; unsigned long bx; -/* arguments: non interrupts/non tracing syscalls only save up to here*/ +/* These regs are callee-clobbered. Always saved on kernel entry. */ unsigned long r11; unsigned long r10; unsigned long r9; @@ -47,9 +51,12 @@ struct pt_regs { unsigned long dx; unsigned long si; unsigned long di; +/* + * On syscall entry, this is syscall#. On CPU exception, this is error code. + * On hw interrupt, it's IRQ number: + */ unsigned long orig_ax; -/* end of arguments */ -/* cpu exception frame or undefined */ +/* Return frame for iretq */ unsigned long ip; unsigned long cs; unsigned long flags; @@ -89,11 +96,13 @@ static inline unsigned long regs_return_value(struct pt_regs *regs) } /* - * user_mode_vm(regs) determines whether a register set came from user mode. - * This is true if V8086 mode was enabled OR if the register set was from - * protected mode with RPL-3 CS value. This tricky test checks that with - * one comparison. Many places in the kernel can bypass this full check - * if they have already ruled out V8086 mode, so user_mode(regs) can be used. + * user_mode(regs) determines whether a register set came from user + * mode. On x86_32, this is true if V8086 mode was enabled OR if the + * register set was from protected mode with RPL-3 CS value. This + * tricky test checks that with one comparison. + * + * On x86_64, vm86 mode is mercifully nonexistent, and we don't need + * the extra check. */ static inline int user_mode(struct pt_regs *regs) { @@ -104,16 +113,6 @@ static inline int user_mode(struct pt_regs *regs) #endif } -static inline int user_mode_vm(struct pt_regs *regs) -{ -#ifdef CONFIG_X86_32 - return ((regs->cs & SEGMENT_RPL_MASK) | (regs->flags & X86_VM_MASK)) >= - USER_RPL; -#else - return user_mode(regs); -#endif -} - static inline int v8086_mode(struct pt_regs *regs) { #ifdef CONFIG_X86_32 @@ -138,12 +137,8 @@ static inline bool user_64bit_mode(struct pt_regs *regs) #endif } -#define current_user_stack_pointer() this_cpu_read(old_rsp) -/* ia32 vs. x32 difference */ -#define compat_user_stack_pointer() \ - (test_thread_flag(TIF_IA32) \ - ? current_pt_regs()->sp \ - : this_cpu_read(old_rsp)) +#define current_user_stack_pointer() current_pt_regs()->sp +#define compat_user_stack_pointer() current_pt_regs()->sp #endif #ifdef CONFIG_X86_32 @@ -248,7 +243,7 @@ static inline unsigned long regs_get_kernel_stack_nth(struct pt_regs *regs, */ #define arch_ptrace_stop_needed(code, info) \ ({ \ - set_thread_flag(TIF_NOTIFY_RESUME); \ + force_iret(); \ false; \ }) diff --git a/arch/x86/include/asm/pvclock.h b/arch/x86/include/asm/pvclock.h index d6b078e9fa28..25b1cc07d496 100644 --- a/arch/x86/include/asm/pvclock.h +++ b/arch/x86/include/asm/pvclock.h @@ -95,6 +95,7 @@ unsigned __pvclock_read_cycles(const struct pvclock_vcpu_time_info *src, struct pvclock_vsyscall_time_info { struct pvclock_vcpu_time_info pvti; + u32 migrate_count; } __attribute__((__aligned__(SMP_CACHE_BYTES))); #define PVTI_SIZE sizeof(struct pvclock_vsyscall_time_info) diff --git a/arch/x86/include/asm/segment.h b/arch/x86/include/asm/segment.h index db257a58571f..5a9856eb12ba 100644 --- a/arch/x86/include/asm/segment.h +++ b/arch/x86/include/asm/segment.h @@ -3,8 +3,10 @@ #include <linux/const.h> -/* Constructor for a conventional segment GDT (or LDT) entry */ -/* This is a macro so it can be used in initializers */ +/* + * Constructor for a conventional segment GDT (or LDT) entry. + * This is a macro so it can be used in initializers. + */ #define GDT_ENTRY(flags, base, limit) \ ((((base) & _AC(0xff000000,ULL)) << (56-24)) | \ (((flags) & _AC(0x0000f0ff,ULL)) << 40) | \ @@ -12,198 +14,228 @@ (((base) & _AC(0x00ffffff,ULL)) << 16) | \ (((limit) & _AC(0x0000ffff,ULL)))) -/* Simple and small GDT entries for booting only */ +/* Simple and small GDT entries for booting only: */ #define GDT_ENTRY_BOOT_CS 2 -#define __BOOT_CS (GDT_ENTRY_BOOT_CS * 8) +#define GDT_ENTRY_BOOT_DS 3 +#define GDT_ENTRY_BOOT_TSS 4 +#define __BOOT_CS (GDT_ENTRY_BOOT_CS*8) +#define __BOOT_DS (GDT_ENTRY_BOOT_DS*8) +#define __BOOT_TSS (GDT_ENTRY_BOOT_TSS*8) + +/* + * Bottom two bits of selector give the ring + * privilege level + */ +#define SEGMENT_RPL_MASK 0x3 -#define GDT_ENTRY_BOOT_DS (GDT_ENTRY_BOOT_CS + 1) -#define __BOOT_DS (GDT_ENTRY_BOOT_DS * 8) +/* User mode is privilege level 3: */ +#define USER_RPL 0x3 -#define GDT_ENTRY_BOOT_TSS (GDT_ENTRY_BOOT_CS + 2) -#define __BOOT_TSS (GDT_ENTRY_BOOT_TSS * 8) +/* Bit 2 is Table Indicator (TI): selects between LDT or GDT */ +#define SEGMENT_TI_MASK 0x4 +/* LDT segment has TI set ... */ +#define SEGMENT_LDT 0x4 +/* ... GDT has it cleared */ +#define SEGMENT_GDT 0x0 -#define SEGMENT_RPL_MASK 0x3 /* - * Bottom two bits of selector give the ring - * privilege level - */ -#define SEGMENT_TI_MASK 0x4 /* Bit 2 is table indicator (LDT/GDT) */ -#define USER_RPL 0x3 /* User mode is privilege level 3 */ -#define SEGMENT_LDT 0x4 /* LDT segment has TI set... */ -#define SEGMENT_GDT 0x0 /* ... GDT has it cleared */ +#define GDT_ENTRY_INVALID_SEG 0 #ifdef CONFIG_X86_32 /* * The layout of the per-CPU GDT under Linux: * - * 0 - null + * 0 - null <=== cacheline #1 * 1 - reserved * 2 - reserved * 3 - reserved * - * 4 - unused <==== new cacheline + * 4 - unused <=== cacheline #2 * 5 - unused * * ------- start of TLS (Thread-Local Storage) segments: * * 6 - TLS segment #1 [ glibc's TLS segment ] * 7 - TLS segment #2 [ Wine's %fs Win32 segment ] - * 8 - TLS segment #3 + * 8 - TLS segment #3 <=== cacheline #3 * 9 - reserved * 10 - reserved * 11 - reserved * * ------- start of kernel segments: * - * 12 - kernel code segment <==== new cacheline + * 12 - kernel code segment <=== cacheline #4 * 13 - kernel data segment * 14 - default user CS * 15 - default user DS - * 16 - TSS + * 16 - TSS <=== cacheline #5 * 17 - LDT * 18 - PNPBIOS support (16->32 gate) * 19 - PNPBIOS support - * 20 - PNPBIOS support + * 20 - PNPBIOS support <=== cacheline #6 * 21 - PNPBIOS support * 22 - PNPBIOS support * 23 - APM BIOS support - * 24 - APM BIOS support + * 24 - APM BIOS support <=== cacheline #7 * 25 - APM BIOS support * * 26 - ESPFIX small SS * 27 - per-cpu [ offset to per-cpu data area ] - * 28 - stack_canary-20 [ for stack protector ] + * 28 - stack_canary-20 [ for stack protector ] <=== cacheline #8 * 29 - unused * 30 - unused * 31 - TSS for double fault handler */ -#define GDT_ENTRY_TLS_MIN 6 -#define GDT_ENTRY_TLS_MAX (GDT_ENTRY_TLS_MIN + GDT_ENTRY_TLS_ENTRIES - 1) +#define GDT_ENTRY_TLS_MIN 6 +#define GDT_ENTRY_TLS_MAX (GDT_ENTRY_TLS_MIN + GDT_ENTRY_TLS_ENTRIES - 1) +#define GDT_ENTRY_KERNEL_CS 12 +#define GDT_ENTRY_KERNEL_DS 13 #define GDT_ENTRY_DEFAULT_USER_CS 14 - #define GDT_ENTRY_DEFAULT_USER_DS 15 +#define GDT_ENTRY_TSS 16 +#define GDT_ENTRY_LDT 17 +#define GDT_ENTRY_PNPBIOS_CS32 18 +#define GDT_ENTRY_PNPBIOS_CS16 19 +#define GDT_ENTRY_PNPBIOS_DS 20 +#define GDT_ENTRY_PNPBIOS_TS1 21 +#define GDT_ENTRY_PNPBIOS_TS2 22 +#define GDT_ENTRY_APMBIOS_BASE 23 + +#define GDT_ENTRY_ESPFIX_SS 26 +#define GDT_ENTRY_PERCPU 27 +#define GDT_ENTRY_STACK_CANARY 28 + +#define GDT_ENTRY_DOUBLEFAULT_TSS 31 -#define GDT_ENTRY_KERNEL_BASE (12) +/* + * Number of entries in the GDT table: + */ +#define GDT_ENTRIES 32 -#define GDT_ENTRY_KERNEL_CS (GDT_ENTRY_KERNEL_BASE+0) +/* + * Segment selector values corresponding to the above entries: + */ -#define GDT_ENTRY_KERNEL_DS (GDT_ENTRY_KERNEL_BASE+1) +#define __KERNEL_CS (GDT_ENTRY_KERNEL_CS*8) +#define __KERNEL_DS (GDT_ENTRY_KERNEL_DS*8) +#define __USER_DS (GDT_ENTRY_DEFAULT_USER_DS*8 + 3) +#define __USER_CS (GDT_ENTRY_DEFAULT_USER_CS*8 + 3) +#define __ESPFIX_SS (GDT_ENTRY_ESPFIX_SS*8) -#define GDT_ENTRY_TSS (GDT_ENTRY_KERNEL_BASE+4) -#define GDT_ENTRY_LDT (GDT_ENTRY_KERNEL_BASE+5) +/* segment for calling fn: */ +#define PNP_CS32 (GDT_ENTRY_PNPBIOS_CS32*8) +/* code segment for BIOS: */ +#define PNP_CS16 (GDT_ENTRY_PNPBIOS_CS16*8) -#define GDT_ENTRY_PNPBIOS_BASE (GDT_ENTRY_KERNEL_BASE+6) -#define GDT_ENTRY_APMBIOS_BASE (GDT_ENTRY_KERNEL_BASE+11) +/* "Is this PNP code selector (PNP_CS32 or PNP_CS16)?" */ +#define SEGMENT_IS_PNP_CODE(x) (((x) & 0xf4) == PNP_CS32) -#define GDT_ENTRY_ESPFIX_SS (GDT_ENTRY_KERNEL_BASE+14) -#define __ESPFIX_SS (GDT_ENTRY_ESPFIX_SS*8) +/* data segment for BIOS: */ +#define PNP_DS (GDT_ENTRY_PNPBIOS_DS*8) +/* transfer data segment: */ +#define PNP_TS1 (GDT_ENTRY_PNPBIOS_TS1*8) +/* another data segment: */ +#define PNP_TS2 (GDT_ENTRY_PNPBIOS_TS2*8) -#define GDT_ENTRY_PERCPU (GDT_ENTRY_KERNEL_BASE+15) #ifdef CONFIG_SMP -#define __KERNEL_PERCPU (GDT_ENTRY_PERCPU * 8) +# define __KERNEL_PERCPU (GDT_ENTRY_PERCPU*8) #else -#define __KERNEL_PERCPU 0 +# define __KERNEL_PERCPU 0 #endif -#define GDT_ENTRY_STACK_CANARY (GDT_ENTRY_KERNEL_BASE+16) #ifdef CONFIG_CC_STACKPROTECTOR -#define __KERNEL_STACK_CANARY (GDT_ENTRY_STACK_CANARY*8) +# define __KERNEL_STACK_CANARY (GDT_ENTRY_STACK_CANARY*8) #else -#define __KERNEL_STACK_CANARY 0 +# define __KERNEL_STACK_CANARY 0 #endif -#define GDT_ENTRY_DOUBLEFAULT_TSS 31 - -/* - * The GDT has 32 entries - */ -#define GDT_ENTRIES 32 +#else /* 64-bit: */ -/* The PnP BIOS entries in the GDT */ -#define GDT_ENTRY_PNPBIOS_CS32 (GDT_ENTRY_PNPBIOS_BASE + 0) -#define GDT_ENTRY_PNPBIOS_CS16 (GDT_ENTRY_PNPBIOS_BASE + 1) -#define GDT_ENTRY_PNPBIOS_DS (GDT_ENTRY_PNPBIOS_BASE + 2) -#define GDT_ENTRY_PNPBIOS_TS1 (GDT_ENTRY_PNPBIOS_BASE + 3) -#define GDT_ENTRY_PNPBIOS_TS2 (GDT_ENTRY_PNPBIOS_BASE + 4) - -/* The PnP BIOS selectors */ -#define PNP_CS32 (GDT_ENTRY_PNPBIOS_CS32 * 8) /* segment for calling fn */ -#define PNP_CS16 (GDT_ENTRY_PNPBIOS_CS16 * 8) /* code segment for BIOS */ -#define PNP_DS (GDT_ENTRY_PNPBIOS_DS * 8) /* data segment for BIOS */ -#define PNP_TS1 (GDT_ENTRY_PNPBIOS_TS1 * 8) /* transfer data segment */ -#define PNP_TS2 (GDT_ENTRY_PNPBIOS_TS2 * 8) /* another data segment */ +#include <asm/cache.h> +#define GDT_ENTRY_KERNEL32_CS 1 +#define GDT_ENTRY_KERNEL_CS 2 +#define GDT_ENTRY_KERNEL_DS 3 /* - * Matching rules for certain types of segments. + * We cannot use the same code segment descriptor for user and kernel mode, + * not even in long flat mode, because of different DPL. + * + * GDT layout to get 64-bit SYSCALL/SYSRET support right. SYSRET hardcodes + * selectors: + * + * if returning to 32-bit userspace: cs = STAR.SYSRET_CS, + * if returning to 64-bit userspace: cs = STAR.SYSRET_CS+16, + * + * ss = STAR.SYSRET_CS+8 (in either case) + * + * thus USER_DS should be between 32-bit and 64-bit code selectors: */ +#define GDT_ENTRY_DEFAULT_USER32_CS 4 +#define GDT_ENTRY_DEFAULT_USER_DS 5 +#define GDT_ENTRY_DEFAULT_USER_CS 6 -/* Matches PNP_CS32 and PNP_CS16 (they must be consecutive) */ -#define SEGMENT_IS_PNP_CODE(x) (((x) & 0xf4) == GDT_ENTRY_PNPBIOS_BASE * 8) - +/* Needs two entries */ +#define GDT_ENTRY_TSS 8 +/* Needs two entries */ +#define GDT_ENTRY_LDT 10 -#else -#include <asm/cache.h> - -#define GDT_ENTRY_KERNEL32_CS 1 -#define GDT_ENTRY_KERNEL_CS 2 -#define GDT_ENTRY_KERNEL_DS 3 +#define GDT_ENTRY_TLS_MIN 12 +#define GDT_ENTRY_TLS_MAX 14 -#define __KERNEL32_CS (GDT_ENTRY_KERNEL32_CS * 8) +/* Abused to load per CPU data from limit */ +#define GDT_ENTRY_PER_CPU 15 /* - * we cannot use the same code segment descriptor for user and kernel - * -- not even in the long flat mode, because of different DPL /kkeil - * The segment offset needs to contain a RPL. Grr. -AK - * GDT layout to get 64bit syscall right (sysret hardcodes gdt offsets) + * Number of entries in the GDT table: */ -#define GDT_ENTRY_DEFAULT_USER32_CS 4 -#define GDT_ENTRY_DEFAULT_USER_DS 5 -#define GDT_ENTRY_DEFAULT_USER_CS 6 -#define __USER32_CS (GDT_ENTRY_DEFAULT_USER32_CS*8+3) -#define __USER32_DS __USER_DS - -#define GDT_ENTRY_TSS 8 /* needs two entries */ -#define GDT_ENTRY_LDT 10 /* needs two entries */ -#define GDT_ENTRY_TLS_MIN 12 -#define GDT_ENTRY_TLS_MAX 14 - -#define GDT_ENTRY_PER_CPU 15 /* Abused to load per CPU data from limit */ -#define __PER_CPU_SEG (GDT_ENTRY_PER_CPU * 8 + 3) +#define GDT_ENTRIES 16 -/* TLS indexes for 64bit - hardcoded in arch_prctl */ -#define FS_TLS 0 -#define GS_TLS 1 - -#define GS_TLS_SEL ((GDT_ENTRY_TLS_MIN+GS_TLS)*8 + 3) -#define FS_TLS_SEL ((GDT_ENTRY_TLS_MIN+FS_TLS)*8 + 3) - -#define GDT_ENTRIES 16 +/* + * Segment selector values corresponding to the above entries: + * + * Note, selectors also need to have a correct RPL, + * expressed with the +3 value for user-space selectors: + */ +#define __KERNEL32_CS (GDT_ENTRY_KERNEL32_CS*8) +#define __KERNEL_CS (GDT_ENTRY_KERNEL_CS*8) +#define __KERNEL_DS (GDT_ENTRY_KERNEL_DS*8) +#define __USER32_CS (GDT_ENTRY_DEFAULT_USER32_CS*8 + 3) +#define __USER_DS (GDT_ENTRY_DEFAULT_USER_DS*8 + 3) +#define __USER32_DS __USER_DS +#define __USER_CS (GDT_ENTRY_DEFAULT_USER_CS*8 + 3) +#define __PER_CPU_SEG (GDT_ENTRY_PER_CPU*8 + 3) + +/* TLS indexes for 64-bit - hardcoded in arch_prctl(): */ +#define FS_TLS 0 +#define GS_TLS 1 + +#define GS_TLS_SEL ((GDT_ENTRY_TLS_MIN+GS_TLS)*8 + 3) +#define FS_TLS_SEL ((GDT_ENTRY_TLS_MIN+FS_TLS)*8 + 3) #endif -#define __KERNEL_CS (GDT_ENTRY_KERNEL_CS*8) -#define __KERNEL_DS (GDT_ENTRY_KERNEL_DS*8) -#define __USER_DS (GDT_ENTRY_DEFAULT_USER_DS*8+3) -#define __USER_CS (GDT_ENTRY_DEFAULT_USER_CS*8+3) #ifndef CONFIG_PARAVIRT -#define get_kernel_rpl() 0 +# define get_kernel_rpl() 0 #endif -#define IDT_ENTRIES 256 -#define NUM_EXCEPTION_VECTORS 32 -/* Bitmask of exception vectors which push an error code on the stack */ -#define EXCEPTION_ERRCODE_MASK 0x00027d00 -#define GDT_SIZE (GDT_ENTRIES * 8) -#define GDT_ENTRY_TLS_ENTRIES 3 -#define TLS_SIZE (GDT_ENTRY_TLS_ENTRIES * 8) +#define IDT_ENTRIES 256 +#define NUM_EXCEPTION_VECTORS 32 + +/* Bitmask of exception vectors which push an error code on the stack: */ +#define EXCEPTION_ERRCODE_MASK 0x00027d00 + +#define GDT_SIZE (GDT_ENTRIES*8) +#define GDT_ENTRY_TLS_ENTRIES 3 +#define TLS_SIZE (GDT_ENTRY_TLS_ENTRIES* 8) #ifdef __KERNEL__ #ifndef __ASSEMBLY__ + extern const char early_idt_handlers[NUM_EXCEPTION_VECTORS][2+2+5]; #ifdef CONFIG_TRACING -#define trace_early_idt_handlers early_idt_handlers +# define trace_early_idt_handlers early_idt_handlers #endif /* @@ -228,37 +260,30 @@ do { \ } while (0) /* - * Save a segment register away + * Save a segment register away: */ #define savesegment(seg, value) \ asm("mov %%" #seg ",%0":"=r" (value) : : "memory") /* - * x86_32 user gs accessors. + * x86-32 user GS accessors: */ #ifdef CONFIG_X86_32 -#ifdef CONFIG_X86_32_LAZY_GS -#define get_user_gs(regs) (u16)({unsigned long v; savesegment(gs, v); v;}) -#define set_user_gs(regs, v) loadsegment(gs, (unsigned long)(v)) -#define task_user_gs(tsk) ((tsk)->thread.gs) -#define lazy_save_gs(v) savesegment(gs, (v)) -#define lazy_load_gs(v) loadsegment(gs, (v)) -#else /* X86_32_LAZY_GS */ -#define get_user_gs(regs) (u16)((regs)->gs) -#define set_user_gs(regs, v) do { (regs)->gs = (v); } while (0) -#define task_user_gs(tsk) (task_pt_regs(tsk)->gs) -#define lazy_save_gs(v) do { } while (0) -#define lazy_load_gs(v) do { } while (0) -#endif /* X86_32_LAZY_GS */ +# ifdef CONFIG_X86_32_LAZY_GS +# define get_user_gs(regs) (u16)({ unsigned long v; savesegment(gs, v); v; }) +# define set_user_gs(regs, v) loadsegment(gs, (unsigned long)(v)) +# define task_user_gs(tsk) ((tsk)->thread.gs) +# define lazy_save_gs(v) savesegment(gs, (v)) +# define lazy_load_gs(v) loadsegment(gs, (v)) +# else /* X86_32_LAZY_GS */ +# define get_user_gs(regs) (u16)((regs)->gs) +# define set_user_gs(regs, v) do { (regs)->gs = (v); } while (0) +# define task_user_gs(tsk) (task_pt_regs(tsk)->gs) +# define lazy_save_gs(v) do { } while (0) +# define lazy_load_gs(v) do { } while (0) +# endif /* X86_32_LAZY_GS */ #endif /* X86_32 */ -static inline unsigned long get_limit(unsigned long segment) -{ - unsigned long __limit; - asm("lsll %1,%0" : "=r" (__limit) : "r" (segment)); - return __limit + 1; -} - #endif /* !__ASSEMBLY__ */ #endif /* __KERNEL__ */ diff --git a/arch/x86/include/asm/setup.h b/arch/x86/include/asm/setup.h index ff4e7b236e21..f69e06b283fb 100644 --- a/arch/x86/include/asm/setup.h +++ b/arch/x86/include/asm/setup.h @@ -66,6 +66,11 @@ static inline void x86_ce4100_early_setup(void) { } */ extern struct boot_params boot_params; +static inline bool kaslr_enabled(void) +{ + return !!(boot_params.hdr.loadflags & KASLR_FLAG); +} + /* * Do NOT EVER look at the BIOS memory size location. * It does not work on many machines. diff --git a/arch/x86/include/asm/sigcontext.h b/arch/x86/include/asm/sigcontext.h index 9dfce4e0417d..6fe6b182c998 100644 --- a/arch/x86/include/asm/sigcontext.h +++ b/arch/x86/include/asm/sigcontext.h @@ -57,9 +57,9 @@ struct sigcontext { unsigned long ip; unsigned long flags; unsigned short cs; - unsigned short gs; - unsigned short fs; - unsigned short __pad0; + unsigned short __pad2; /* Was called gs, but was always zero. */ + unsigned short __pad1; /* Was called fs, but was always zero. */ + unsigned short ss; unsigned long err; unsigned long trapno; unsigned long oldmask; diff --git a/arch/x86/include/asm/sighandling.h b/arch/x86/include/asm/sighandling.h index 7a958164088c..89db46752a8f 100644 --- a/arch/x86/include/asm/sighandling.h +++ b/arch/x86/include/asm/sighandling.h @@ -13,9 +13,7 @@ X86_EFLAGS_CF | X86_EFLAGS_RF) void signal_fault(struct pt_regs *regs, void __user *frame, char *where); - -int restore_sigcontext(struct pt_regs *regs, struct sigcontext __user *sc, - unsigned long *pax); +int restore_sigcontext(struct pt_regs *regs, struct sigcontext __user *sc); int setup_sigcontext(struct sigcontext __user *sc, void __user *fpstate, struct pt_regs *regs, unsigned long mask); diff --git a/arch/x86/include/asm/smap.h b/arch/x86/include/asm/smap.h index 8d3120f4e270..ba665ebd17bb 100644 --- a/arch/x86/include/asm/smap.h +++ b/arch/x86/include/asm/smap.h @@ -27,23 +27,11 @@ #ifdef CONFIG_X86_SMAP -#define ASM_CLAC \ - 661: ASM_NOP3 ; \ - .pushsection .altinstr_replacement, "ax" ; \ - 662: __ASM_CLAC ; \ - .popsection ; \ - .pushsection .altinstructions, "a" ; \ - altinstruction_entry 661b, 662b, X86_FEATURE_SMAP, 3, 3 ; \ - .popsection - -#define ASM_STAC \ - 661: ASM_NOP3 ; \ - .pushsection .altinstr_replacement, "ax" ; \ - 662: __ASM_STAC ; \ - .popsection ; \ - .pushsection .altinstructions, "a" ; \ - altinstruction_entry 661b, 662b, X86_FEATURE_SMAP, 3, 3 ; \ - .popsection +#define ASM_CLAC \ + ALTERNATIVE "", __stringify(__ASM_CLAC), X86_FEATURE_SMAP + +#define ASM_STAC \ + ALTERNATIVE "", __stringify(__ASM_STAC), X86_FEATURE_SMAP #else /* CONFIG_X86_SMAP */ @@ -61,20 +49,20 @@ static __always_inline void clac(void) { /* Note: a barrier is implicit in alternative() */ - alternative(ASM_NOP3, __stringify(__ASM_CLAC), X86_FEATURE_SMAP); + alternative("", __stringify(__ASM_CLAC), X86_FEATURE_SMAP); } static __always_inline void stac(void) { /* Note: a barrier is implicit in alternative() */ - alternative(ASM_NOP3, __stringify(__ASM_STAC), X86_FEATURE_SMAP); + alternative("", __stringify(__ASM_STAC), X86_FEATURE_SMAP); } /* These macros can be used in asm() statements */ #define ASM_CLAC \ - ALTERNATIVE(ASM_NOP3, __stringify(__ASM_CLAC), X86_FEATURE_SMAP) + ALTERNATIVE("", __stringify(__ASM_CLAC), X86_FEATURE_SMAP) #define ASM_STAC \ - ALTERNATIVE(ASM_NOP3, __stringify(__ASM_STAC), X86_FEATURE_SMAP) + ALTERNATIVE("", __stringify(__ASM_STAC), X86_FEATURE_SMAP) #else /* CONFIG_X86_SMAP */ diff --git a/arch/x86/include/asm/smp.h b/arch/x86/include/asm/smp.h index 8cd1cc3bc835..81d02fc7dafa 100644 --- a/arch/x86/include/asm/smp.h +++ b/arch/x86/include/asm/smp.h @@ -154,6 +154,7 @@ void cpu_die_common(unsigned int cpu); void native_smp_prepare_boot_cpu(void); void native_smp_prepare_cpus(unsigned int max_cpus); void native_smp_cpus_done(unsigned int max_cpus); +void common_cpu_up(unsigned int cpunum, struct task_struct *tidle); int native_cpu_up(unsigned int cpunum, struct task_struct *tidle); int native_cpu_disable(void); void native_cpu_die(unsigned int cpu); diff --git a/arch/x86/include/asm/special_insns.h b/arch/x86/include/asm/special_insns.h index 6a4b00fafb00..aeb4666e0c0a 100644 --- a/arch/x86/include/asm/special_insns.h +++ b/arch/x86/include/asm/special_insns.h @@ -4,6 +4,8 @@ #ifdef __KERNEL__ +#include <asm/nops.h> + static inline void native_clts(void) { asm volatile("clts"); @@ -199,6 +201,28 @@ static inline void clflushopt(volatile void *__p) "+m" (*(volatile char __force *)__p)); } +static inline void clwb(volatile void *__p) +{ + volatile struct { char x[64]; } *p = __p; + + asm volatile(ALTERNATIVE_2( + ".byte " __stringify(NOP_DS_PREFIX) "; clflush (%[pax])", + ".byte 0x66; clflush (%[pax])", /* clflushopt (%%rax) */ + X86_FEATURE_CLFLUSHOPT, + ".byte 0x66, 0x0f, 0xae, 0x30", /* clwb (%%rax) */ + X86_FEATURE_CLWB) + : [p] "+m" (*p) + : [pax] "a" (p)); +} + +static inline void pcommit_sfence(void) +{ + alternative(ASM_NOP7, + ".byte 0x66, 0x0f, 0xae, 0xf8\n\t" /* pcommit */ + "sfence", + X86_FEATURE_PCOMMIT); +} + #define nop() asm volatile ("nop") diff --git a/arch/x86/include/asm/thread_info.h b/arch/x86/include/asm/thread_info.h index 1d4e4f279a32..ea2dbe82cba3 100644 --- a/arch/x86/include/asm/thread_info.h +++ b/arch/x86/include/asm/thread_info.h @@ -13,6 +13,33 @@ #include <asm/types.h> /* + * TOP_OF_KERNEL_STACK_PADDING is a number of unused bytes that we + * reserve at the top of the kernel stack. We do it because of a nasty + * 32-bit corner case. On x86_32, the hardware stack frame is + * variable-length. Except for vm86 mode, struct pt_regs assumes a + * maximum-length frame. If we enter from CPL 0, the top 8 bytes of + * pt_regs don't actually exist. Ordinarily this doesn't matter, but it + * does in at least one case: + * + * If we take an NMI early enough in SYSENTER, then we can end up with + * pt_regs that extends above sp0. On the way out, in the espfix code, + * we can read the saved SS value, but that value will be above sp0. + * Without this offset, that can result in a page fault. (We are + * careful that, in this case, the value we read doesn't matter.) + * + * In vm86 mode, the hardware frame is much longer still, but we neither + * access the extra members from NMI context, nor do we write such a + * frame at sp0 at all. + * + * x86_64 has a fixed-length stack frame. + */ +#ifdef CONFIG_X86_32 +# define TOP_OF_KERNEL_STACK_PADDING 8 +#else +# define TOP_OF_KERNEL_STACK_PADDING 0 +#endif + +/* * low level task data that entry.S needs immediate access to * - this struct should fit entirely inside of one cache line * - this struct shares the supervisor stack pages @@ -145,7 +172,6 @@ struct thread_info { #define _TIF_WORK_CTXSW_NEXT (_TIF_WORK_CTXSW) #define STACK_WARN (THREAD_SIZE/8) -#define KERNEL_STACK_OFFSET (5*(BITS_PER_LONG/8)) /* * macros/functions for gaining access to the thread information structure @@ -158,10 +184,7 @@ DECLARE_PER_CPU(unsigned long, kernel_stack); static inline struct thread_info *current_thread_info(void) { - struct thread_info *ti; - ti = (void *)(this_cpu_read_stable(kernel_stack) + - KERNEL_STACK_OFFSET - THREAD_SIZE); - return ti; + return (struct thread_info *)(current_top_of_stack() - THREAD_SIZE); } static inline unsigned long current_stack_pointer(void) @@ -177,16 +200,37 @@ static inline unsigned long current_stack_pointer(void) #else /* !__ASSEMBLY__ */ -/* how to get the thread information struct from ASM */ +/* Load thread_info address into "reg" */ #define GET_THREAD_INFO(reg) \ _ASM_MOV PER_CPU_VAR(kernel_stack),reg ; \ - _ASM_SUB $(THREAD_SIZE-KERNEL_STACK_OFFSET),reg ; + _ASM_SUB $(THREAD_SIZE),reg ; /* - * Same if PER_CPU_VAR(kernel_stack) is, perhaps with some offset, already in - * a certain register (to be used in assembler memory operands). + * ASM operand which evaluates to a 'thread_info' address of + * the current task, if it is known that "reg" is exactly "off" + * bytes below the top of the stack currently. + * + * ( The kernel stack's size is known at build time, it is usually + * 2 or 4 pages, and the bottom of the kernel stack contains + * the thread_info structure. So to access the thread_info very + * quickly from assembly code we can calculate down from the + * top of the kernel stack to the bottom, using constant, + * build-time calculations only. ) + * + * For example, to fetch the current thread_info->flags value into %eax + * on x86-64 defconfig kernels, in syscall entry code where RSP is + * currently at exactly SIZEOF_PTREGS bytes away from the top of the + * stack: + * + * mov ASM_THREAD_INFO(TI_flags, %rsp, SIZEOF_PTREGS), %eax + * + * will translate to: + * + * 8b 84 24 b8 c0 ff ff mov -0x3f48(%rsp), %eax + * + * which is below the current RSP by almost 16K. */ -#define THREAD_INFO(reg, off) KERNEL_STACK_OFFSET+(off)-THREAD_SIZE(reg) +#define ASM_THREAD_INFO(field, reg, off) ((field)+(off)-THREAD_SIZE)(reg) #endif @@ -236,6 +280,16 @@ static inline bool is_ia32_task(void) #endif return false; } + +/* + * Force syscall return via IRET by making it look as if there was + * some work pending. IRET is our most capable (but slowest) syscall + * return path, which is able to restore modified SS, CS and certain + * EFLAGS values that other (fast) syscall return instructions + * are not able to restore properly. + */ +#define force_iret() set_thread_flag(TIF_NOTIFY_RESUME) + #endif /* !__ASSEMBLY__ */ #ifndef __ASSEMBLY__ diff --git a/arch/x86/include/asm/uaccess_64.h b/arch/x86/include/asm/uaccess_64.h index 12a26b979bf1..f2f9b39b274a 100644 --- a/arch/x86/include/asm/uaccess_64.h +++ b/arch/x86/include/asm/uaccess_64.h @@ -231,6 +231,6 @@ __copy_from_user_inatomic_nocache(void *dst, const void __user *src, } unsigned long -copy_user_handle_tail(char *to, char *from, unsigned len, unsigned zerorest); +copy_user_handle_tail(char *to, char *from, unsigned len); #endif /* _ASM_X86_UACCESS_64_H */ diff --git a/arch/x86/include/asm/xsave.h b/arch/x86/include/asm/xsave.h index 5fa9770035dc..c9a6d68b8d62 100644 --- a/arch/x86/include/asm/xsave.h +++ b/arch/x86/include/asm/xsave.h @@ -82,18 +82,15 @@ static inline int xsave_state_booting(struct xsave_struct *fx, u64 mask) if (boot_cpu_has(X86_FEATURE_XSAVES)) asm volatile("1:"XSAVES"\n\t" "2:\n\t" - : : "D" (fx), "m" (*fx), "a" (lmask), "d" (hmask) + xstate_fault + : "D" (fx), "m" (*fx), "a" (lmask), "d" (hmask) : "memory"); else asm volatile("1:"XSAVE"\n\t" "2:\n\t" - : : "D" (fx), "m" (*fx), "a" (lmask), "d" (hmask) + xstate_fault + : "D" (fx), "m" (*fx), "a" (lmask), "d" (hmask) : "memory"); - - asm volatile(xstate_fault - : "0" (0) - : "memory"); - return err; } @@ -112,18 +109,15 @@ static inline int xrstor_state_booting(struct xsave_struct *fx, u64 mask) if (boot_cpu_has(X86_FEATURE_XSAVES)) asm volatile("1:"XRSTORS"\n\t" "2:\n\t" - : : "D" (fx), "m" (*fx), "a" (lmask), "d" (hmask) + xstate_fault + : "D" (fx), "m" (*fx), "a" (lmask), "d" (hmask) : "memory"); else asm volatile("1:"XRSTOR"\n\t" "2:\n\t" - : : "D" (fx), "m" (*fx), "a" (lmask), "d" (hmask) + xstate_fault + : "D" (fx), "m" (*fx), "a" (lmask), "d" (hmask) : "memory"); - - asm volatile(xstate_fault - : "0" (0) - : "memory"); - return err; } @@ -149,9 +143,9 @@ static inline int xsave_state(struct xsave_struct *fx, u64 mask) */ alternative_input_2( "1:"XSAVE, - "1:"XSAVEOPT, + XSAVEOPT, X86_FEATURE_XSAVEOPT, - "1:"XSAVES, + XSAVES, X86_FEATURE_XSAVES, [fx] "D" (fx), "a" (lmask), "d" (hmask) : "memory"); @@ -178,7 +172,7 @@ static inline int xrstor_state(struct xsave_struct *fx, u64 mask) */ alternative_input( "1: " XRSTOR, - "1: " XRSTORS, + XRSTORS, X86_FEATURE_XSAVES, "D" (fx), "m" (*fx), "a" (lmask), "d" (hmask) : "memory"); diff --git a/arch/x86/include/uapi/asm/bootparam.h b/arch/x86/include/uapi/asm/bootparam.h index 44e6dd7e36a2..ab456dc233b5 100644 --- a/arch/x86/include/uapi/asm/bootparam.h +++ b/arch/x86/include/uapi/asm/bootparam.h @@ -7,7 +7,6 @@ #define SETUP_DTB 2 #define SETUP_PCI 3 #define SETUP_EFI 4 -#define SETUP_KASLR 5 /* ram_size flags */ #define RAMDISK_IMAGE_START_MASK 0x07FF @@ -16,6 +15,7 @@ /* loadflags */ #define LOADED_HIGH (1<<0) +#define KASLR_FLAG (1<<1) #define QUIET_FLAG (1<<5) #define KEEP_SEGMENTS (1<<6) #define CAN_USE_HEAP (1<<7) diff --git a/arch/x86/include/uapi/asm/ptrace-abi.h b/arch/x86/include/uapi/asm/ptrace-abi.h index 7b0a55a88851..580aee3072e0 100644 --- a/arch/x86/include/uapi/asm/ptrace-abi.h +++ b/arch/x86/include/uapi/asm/ptrace-abi.h @@ -25,13 +25,17 @@ #else /* __i386__ */ #if defined(__ASSEMBLY__) || defined(__FRAME_OFFSETS) +/* + * C ABI says these regs are callee-preserved. They aren't saved on kernel entry + * unless syscall needs a complete, fully filled "struct pt_regs". + */ #define R15 0 #define R14 8 #define R13 16 #define R12 24 #define RBP 32 #define RBX 40 -/* arguments: interrupts/non tracing syscalls only save up to here*/ +/* These regs are callee-clobbered. Always saved on kernel entry. */ #define R11 48 #define R10 56 #define R9 64 @@ -41,15 +45,17 @@ #define RDX 96 #define RSI 104 #define RDI 112 -#define ORIG_RAX 120 /* = ERROR */ -/* end of arguments */ -/* cpu exception frame or undefined in case of fast syscall. */ +/* + * On syscall entry, this is syscall#. On CPU exception, this is error code. + * On hw interrupt, it's IRQ number: + */ +#define ORIG_RAX 120 +/* Return frame for iretq */ #define RIP 128 #define CS 136 #define EFLAGS 144 #define RSP 152 #define SS 160 -#define ARGOFFSET R11 #endif /* __ASSEMBLY__ */ /* top of stack page */ diff --git a/arch/x86/include/uapi/asm/ptrace.h b/arch/x86/include/uapi/asm/ptrace.h index ac4b9aa4d999..bc16115af39b 100644 --- a/arch/x86/include/uapi/asm/ptrace.h +++ b/arch/x86/include/uapi/asm/ptrace.h @@ -41,13 +41,17 @@ struct pt_regs { #ifndef __KERNEL__ struct pt_regs { +/* + * C ABI says these regs are callee-preserved. They aren't saved on kernel entry + * unless syscall needs a complete, fully filled "struct pt_regs". + */ unsigned long r15; unsigned long r14; unsigned long r13; unsigned long r12; unsigned long rbp; unsigned long rbx; -/* arguments: non interrupts/non tracing syscalls only save up to here*/ +/* These regs are callee-clobbered. Always saved on kernel entry. */ unsigned long r11; unsigned long r10; unsigned long r9; @@ -57,9 +61,12 @@ struct pt_regs { unsigned long rdx; unsigned long rsi; unsigned long rdi; +/* + * On syscall entry, this is syscall#. On CPU exception, this is error code. + * On hw interrupt, it's IRQ number: + */ unsigned long orig_rax; -/* end of arguments */ -/* cpu exception frame or undefined */ +/* Return frame for iretq */ unsigned long rip; unsigned long cs; unsigned long eflags; diff --git a/arch/x86/include/uapi/asm/sigcontext.h b/arch/x86/include/uapi/asm/sigcontext.h index d8b9f9081e86..16dc4e8a2cd3 100644 --- a/arch/x86/include/uapi/asm/sigcontext.h +++ b/arch/x86/include/uapi/asm/sigcontext.h @@ -177,9 +177,24 @@ struct sigcontext { __u64 rip; __u64 eflags; /* RFLAGS */ __u16 cs; - __u16 gs; - __u16 fs; - __u16 __pad0; + + /* + * Prior to 2.5.64 ("[PATCH] x86-64 updates for 2.5.64-bk3"), + * Linux saved and restored fs and gs in these slots. This + * was counterproductive, as fsbase and gsbase were never + * saved, so arch_prctl was presumably unreliable. + * + * If these slots are ever needed for any other purpose, there + * is some risk that very old 64-bit binaries could get + * confused. I doubt that many such binaries still work, + * though, since the same patch in 2.5.64 also removed the + * 64-bit set_thread_area syscall, so it appears that there is + * no TLS API that works in both pre- and post-2.5.64 kernels. + */ + __u16 __pad2; /* Was gs. */ + __u16 __pad1; /* Was fs. */ + + __u16 ss; __u64 err; __u64 trapno; __u64 oldmask; diff --git a/arch/x86/include/uapi/asm/vmx.h b/arch/x86/include/uapi/asm/vmx.h index c5f1a1deb91a..1fe92181ee9e 100644 --- a/arch/x86/include/uapi/asm/vmx.h +++ b/arch/x86/include/uapi/asm/vmx.h @@ -67,6 +67,7 @@ #define EXIT_REASON_EPT_VIOLATION 48 #define EXIT_REASON_EPT_MISCONFIG 49 #define EXIT_REASON_INVEPT 50 +#define EXIT_REASON_RDTSCP 51 #define EXIT_REASON_PREEMPTION_TIMER 52 #define EXIT_REASON_INVVPID 53 #define EXIT_REASON_WBINVD 54 diff --git a/arch/x86/kernel/Makefile b/arch/x86/kernel/Makefile index cdb1b70ddad0..c887cd944f0c 100644 --- a/arch/x86/kernel/Makefile +++ b/arch/x86/kernel/Makefile @@ -32,6 +32,7 @@ obj-$(CONFIG_X86_32) += i386_ksyms_32.o obj-$(CONFIG_X86_64) += sys_x86_64.o x8664_ksyms_64.o obj-$(CONFIG_X86_64) += mcount_64.o obj-y += syscall_$(BITS).o vsyscall_gtod.o +obj-$(CONFIG_IA32_EMULATION) += syscall_32.o obj-$(CONFIG_X86_VSYSCALL_EMULATION) += vsyscall_64.o vsyscall_emu_64.o obj-$(CONFIG_X86_ESPFIX64) += espfix_64.o obj-$(CONFIG_SYSFS) += ksysfs.o diff --git a/arch/x86/kernel/acpi/boot.c b/arch/x86/kernel/acpi/boot.c index 3d525c6124f6..803b684676ff 100644 --- a/arch/x86/kernel/acpi/boot.c +++ b/arch/x86/kernel/acpi/boot.c @@ -1338,6 +1338,26 @@ static int __init dmi_ignore_irq0_timer_override(const struct dmi_system_id *d) } /* + * ACPI offers an alternative platform interface model that removes + * ACPI hardware requirements for platforms that do not implement + * the PC Architecture. + * + * We initialize the Hardware-reduced ACPI model here: + */ +static void __init acpi_reduced_hw_init(void) +{ + if (acpi_gbl_reduced_hardware) { + /* + * Override x86_init functions and bypass legacy pic + * in Hardware-reduced ACPI mode + */ + x86_init.timers.timer_init = x86_init_noop; + x86_init.irqs.pre_vector_init = x86_init_noop; + legacy_pic = &null_legacy_pic; + } +} + +/* * If your system is blacklisted here, but you find that acpi=force * works for you, please contact linux-acpi@vger.kernel.org */ @@ -1536,6 +1556,11 @@ int __init early_acpi_boot_init(void) */ early_acpi_process_madt(); + /* + * Hardware-reduced ACPI mode initialization: + */ + acpi_reduced_hw_init(); + return 0; } diff --git a/arch/x86/kernel/alternative.c b/arch/x86/kernel/alternative.c index 703130f469ec..aef653193160 100644 --- a/arch/x86/kernel/alternative.c +++ b/arch/x86/kernel/alternative.c @@ -52,10 +52,25 @@ static int __init setup_noreplace_paravirt(char *str) __setup("noreplace-paravirt", setup_noreplace_paravirt); #endif -#define DPRINTK(fmt, ...) \ -do { \ - if (debug_alternative) \ - printk(KERN_DEBUG fmt, ##__VA_ARGS__); \ +#define DPRINTK(fmt, args...) \ +do { \ + if (debug_alternative) \ + printk(KERN_DEBUG "%s: " fmt "\n", __func__, ##args); \ +} while (0) + +#define DUMP_BYTES(buf, len, fmt, args...) \ +do { \ + if (unlikely(debug_alternative)) { \ + int j; \ + \ + if (!(len)) \ + break; \ + \ + printk(KERN_DEBUG fmt, ##args); \ + for (j = 0; j < (len) - 1; j++) \ + printk(KERN_CONT "%02hhx ", buf[j]); \ + printk(KERN_CONT "%02hhx\n", buf[j]); \ + } \ } while (0) /* @@ -243,12 +258,89 @@ extern struct alt_instr __alt_instructions[], __alt_instructions_end[]; extern s32 __smp_locks[], __smp_locks_end[]; void *text_poke_early(void *addr, const void *opcode, size_t len); -/* Replace instructions with better alternatives for this CPU type. - This runs before SMP is initialized to avoid SMP problems with - self modifying code. This implies that asymmetric systems where - APs have less capabilities than the boot processor are not handled. - Tough. Make sure you disable such features by hand. */ +/* + * Are we looking at a near JMP with a 1 or 4-byte displacement. + */ +static inline bool is_jmp(const u8 opcode) +{ + return opcode == 0xeb || opcode == 0xe9; +} + +static void __init_or_module +recompute_jump(struct alt_instr *a, u8 *orig_insn, u8 *repl_insn, u8 *insnbuf) +{ + u8 *next_rip, *tgt_rip; + s32 n_dspl, o_dspl; + int repl_len; + + if (a->replacementlen != 5) + return; + + o_dspl = *(s32 *)(insnbuf + 1); + + /* next_rip of the replacement JMP */ + next_rip = repl_insn + a->replacementlen; + /* target rip of the replacement JMP */ + tgt_rip = next_rip + o_dspl; + n_dspl = tgt_rip - orig_insn; + + DPRINTK("target RIP: %p, new_displ: 0x%x", tgt_rip, n_dspl); + + if (tgt_rip - orig_insn >= 0) { + if (n_dspl - 2 <= 127) + goto two_byte_jmp; + else + goto five_byte_jmp; + /* negative offset */ + } else { + if (((n_dspl - 2) & 0xff) == (n_dspl - 2)) + goto two_byte_jmp; + else + goto five_byte_jmp; + } + +two_byte_jmp: + n_dspl -= 2; + + insnbuf[0] = 0xeb; + insnbuf[1] = (s8)n_dspl; + add_nops(insnbuf + 2, 3); + + repl_len = 2; + goto done; + +five_byte_jmp: + n_dspl -= 5; + + insnbuf[0] = 0xe9; + *(s32 *)&insnbuf[1] = n_dspl; + repl_len = 5; + +done: + + DPRINTK("final displ: 0x%08x, JMP 0x%lx", + n_dspl, (unsigned long)orig_insn + n_dspl + repl_len); +} + +static void __init_or_module optimize_nops(struct alt_instr *a, u8 *instr) +{ + if (instr[0] != 0x90) + return; + + add_nops(instr + (a->instrlen - a->padlen), a->padlen); + + DUMP_BYTES(instr, a->instrlen, "%p: [%d:%d) optimized NOPs: ", + instr, a->instrlen - a->padlen, a->padlen); +} + +/* + * Replace instructions with better alternatives for this CPU type. This runs + * before SMP is initialized to avoid SMP problems with self modifying code. + * This implies that asymmetric systems where APs have less capabilities than + * the boot processor are not handled. Tough. Make sure you disable such + * features by hand. + */ void __init_or_module apply_alternatives(struct alt_instr *start, struct alt_instr *end) { @@ -256,10 +348,10 @@ void __init_or_module apply_alternatives(struct alt_instr *start, u8 *instr, *replacement; u8 insnbuf[MAX_PATCH_LEN]; - DPRINTK("%s: alt table %p -> %p\n", __func__, start, end); + DPRINTK("alt table %p -> %p", start, end); /* * The scan order should be from start to end. A later scanned - * alternative code can overwrite a previous scanned alternative code. + * alternative code can overwrite previously scanned alternative code. * Some kernel functions (e.g. memcpy, memset, etc) use this order to * patch code. * @@ -267,29 +359,54 @@ void __init_or_module apply_alternatives(struct alt_instr *start, * order. */ for (a = start; a < end; a++) { + int insnbuf_sz = 0; + instr = (u8 *)&a->instr_offset + a->instr_offset; replacement = (u8 *)&a->repl_offset + a->repl_offset; - BUG_ON(a->replacementlen > a->instrlen); BUG_ON(a->instrlen > sizeof(insnbuf)); BUG_ON(a->cpuid >= (NCAPINTS + NBUGINTS) * 32); - if (!boot_cpu_has(a->cpuid)) + if (!boot_cpu_has(a->cpuid)) { + if (a->padlen > 1) + optimize_nops(a, instr); + continue; + } + + DPRINTK("feat: %d*32+%d, old: (%p, len: %d), repl: (%p, len: %d), pad: %d", + a->cpuid >> 5, + a->cpuid & 0x1f, + instr, a->instrlen, + replacement, a->replacementlen, a->padlen); + + DUMP_BYTES(instr, a->instrlen, "%p: old_insn: ", instr); + DUMP_BYTES(replacement, a->replacementlen, "%p: rpl_insn: ", replacement); memcpy(insnbuf, replacement, a->replacementlen); + insnbuf_sz = a->replacementlen; /* 0xe8 is a relative jump; fix the offset. */ - if (*insnbuf == 0xe8 && a->replacementlen == 5) - *(s32 *)(insnbuf + 1) += replacement - instr; + if (*insnbuf == 0xe8 && a->replacementlen == 5) { + *(s32 *)(insnbuf + 1) += replacement - instr; + DPRINTK("Fix CALL offset: 0x%x, CALL 0x%lx", + *(s32 *)(insnbuf + 1), + (unsigned long)instr + *(s32 *)(insnbuf + 1) + 5); + } + + if (a->replacementlen && is_jmp(replacement[0])) + recompute_jump(a, instr, replacement, insnbuf); - add_nops(insnbuf + a->replacementlen, - a->instrlen - a->replacementlen); + if (a->instrlen > a->replacementlen) { + add_nops(insnbuf + a->replacementlen, + a->instrlen - a->replacementlen); + insnbuf_sz += a->instrlen - a->replacementlen; + } + DUMP_BYTES(insnbuf, insnbuf_sz, "%p: final_insn: ", instr); - text_poke_early(instr, insnbuf, a->instrlen); + text_poke_early(instr, insnbuf, insnbuf_sz); } } #ifdef CONFIG_SMP - static void alternatives_smp_lock(const s32 *start, const s32 *end, u8 *text, u8 *text_end) { @@ -371,8 +488,8 @@ void __init_or_module alternatives_smp_module_add(struct module *mod, smp->locks_end = locks_end; smp->text = text; smp->text_end = text_end; - DPRINTK("%s: locks %p -> %p, text %p -> %p, name %s\n", - __func__, smp->locks, smp->locks_end, + DPRINTK("locks %p -> %p, text %p -> %p, name %s\n", + smp->locks, smp->locks_end, smp->text, smp->text_end, smp->name); list_add_tail(&smp->next, &smp_alt_modules); @@ -440,7 +557,7 @@ int alternatives_text_reserved(void *start, void *end) return 0; } -#endif +#endif /* CONFIG_SMP */ #ifdef CONFIG_PARAVIRT void __init_or_module apply_paravirt(struct paravirt_patch_site *start, @@ -601,7 +718,7 @@ int poke_int3_handler(struct pt_regs *regs) if (likely(!bp_patching_in_progress)) return 0; - if (user_mode_vm(regs) || regs->ip != (unsigned long)bp_int3_addr) + if (user_mode(regs) || regs->ip != (unsigned long)bp_int3_addr) return 0; /* set up the specified breakpoint handler */ diff --git a/arch/x86/kernel/apic/apic.c b/arch/x86/kernel/apic/apic.c index ad3639ae1b9b..dcb52850a28f 100644 --- a/arch/x86/kernel/apic/apic.c +++ b/arch/x86/kernel/apic/apic.c @@ -1084,67 +1084,6 @@ void lapic_shutdown(void) local_irq_restore(flags); } -/* - * This is to verify that we're looking at a real local APIC. - * Check these against your board if the CPUs aren't getting - * started for no apparent reason. - */ -int __init verify_local_APIC(void) -{ - unsigned int reg0, reg1; - - /* - * The version register is read-only in a real APIC. - */ - reg0 = apic_read(APIC_LVR); - apic_printk(APIC_DEBUG, "Getting VERSION: %x\n", reg0); - apic_write(APIC_LVR, reg0 ^ APIC_LVR_MASK); - reg1 = apic_read(APIC_LVR); - apic_printk(APIC_DEBUG, "Getting VERSION: %x\n", reg1); - - /* - * The two version reads above should print the same - * numbers. If the second one is different, then we - * poke at a non-APIC. - */ - if (reg1 != reg0) - return 0; - - /* - * Check if the version looks reasonably. - */ - reg1 = GET_APIC_VERSION(reg0); - if (reg1 == 0x00 || reg1 == 0xff) - return 0; - reg1 = lapic_get_maxlvt(); - if (reg1 < 0x02 || reg1 == 0xff) - return 0; - - /* - * The ID register is read/write in a real APIC. - */ - reg0 = apic_read(APIC_ID); - apic_printk(APIC_DEBUG, "Getting ID: %x\n", reg0); - apic_write(APIC_ID, reg0 ^ apic->apic_id_mask); - reg1 = apic_read(APIC_ID); - apic_printk(APIC_DEBUG, "Getting ID: %x\n", reg1); - apic_write(APIC_ID, reg0); - if (reg1 != (reg0 ^ apic->apic_id_mask)) - return 0; - - /* - * The next two are just to see if we have sane values. - * They're only really relevant if we're in Virtual Wire - * compatibility mode, but most boxes are anymore. - */ - reg0 = apic_read(APIC_LVT0); - apic_printk(APIC_DEBUG, "Getting LVT0: %x\n", reg0); - reg1 = apic_read(APIC_LVT1); - apic_printk(APIC_DEBUG, "Getting LVT1: %x\n", reg1); - - return 1; -} - /** * sync_Arb_IDs - synchronize APIC bus arbitration IDs */ @@ -2283,7 +2222,6 @@ int __init APIC_init_uniprocessor(void) disable_ioapic_support(); default_setup_apic_routing(); - verify_local_APIC(); apic_bsp_setup(true); return 0; } diff --git a/arch/x86/kernel/apic/apic_numachip.c b/arch/x86/kernel/apic/apic_numachip.c index c2fd21fed002..017149cded07 100644 --- a/arch/x86/kernel/apic/apic_numachip.c +++ b/arch/x86/kernel/apic/apic_numachip.c @@ -37,10 +37,12 @@ static const struct apic apic_numachip; static unsigned int get_apic_id(unsigned long x) { unsigned long value; - unsigned int id; + unsigned int id = (x >> 24) & 0xff; - rdmsrl(MSR_FAM10H_NODE_ID, value); - id = ((x >> 24) & 0xffU) | ((value << 2) & 0xff00U); + if (static_cpu_has_safe(X86_FEATURE_NODEID_MSR)) { + rdmsrl(MSR_FAM10H_NODE_ID, value); + id |= (value << 2) & 0xff00; + } return id; } @@ -155,10 +157,18 @@ static int __init numachip_probe(void) static void fixup_cpu_id(struct cpuinfo_x86 *c, int node) { - if (c->phys_proc_id != node) { - c->phys_proc_id = node; - per_cpu(cpu_llc_id, smp_processor_id()) = node; + u64 val; + u32 nodes = 1; + + this_cpu_write(cpu_llc_id, node); + + /* Account for nodes per socket in multi-core-module processors */ + if (static_cpu_has_safe(X86_FEATURE_NODEID_MSR)) { + rdmsrl(MSR_FAM10H_NODE_ID, val); + nodes = ((val >> 3) & 7) + 1; } + + c->phys_proc_id = node / nodes; } static int __init numachip_system_init(void) diff --git a/arch/x86/kernel/apic/x2apic_cluster.c b/arch/x86/kernel/apic/x2apic_cluster.c index e658f21681c8..d9d0bd2faaf4 100644 --- a/arch/x86/kernel/apic/x2apic_cluster.c +++ b/arch/x86/kernel/apic/x2apic_cluster.c @@ -135,12 +135,12 @@ static void init_x2apic_ldr(void) per_cpu(x86_cpu_to_logical_apicid, this_cpu) = apic_read(APIC_LDR); - __cpu_set(this_cpu, per_cpu(cpus_in_cluster, this_cpu)); + cpumask_set_cpu(this_cpu, per_cpu(cpus_in_cluster, this_cpu)); for_each_online_cpu(cpu) { if (x2apic_cluster(this_cpu) != x2apic_cluster(cpu)) continue; - __cpu_set(this_cpu, per_cpu(cpus_in_cluster, cpu)); - __cpu_set(cpu, per_cpu(cpus_in_cluster, this_cpu)); + cpumask_set_cpu(this_cpu, per_cpu(cpus_in_cluster, cpu)); + cpumask_set_cpu(cpu, per_cpu(cpus_in_cluster, this_cpu)); } } @@ -195,7 +195,7 @@ static int x2apic_init_cpu_notifier(void) BUG_ON(!per_cpu(cpus_in_cluster, cpu) || !per_cpu(ipi_mask, cpu)); - __cpu_set(cpu, per_cpu(cpus_in_cluster, cpu)); + cpumask_set_cpu(cpu, per_cpu(cpus_in_cluster, cpu)); register_hotcpu_notifier(&x2apic_cpu_notifier); return 1; } diff --git a/arch/x86/kernel/apic/x2apic_uv_x.c b/arch/x86/kernel/apic/x2apic_uv_x.c index 8e9dcfd630e4..c8d92950bc04 100644 --- a/arch/x86/kernel/apic/x2apic_uv_x.c +++ b/arch/x86/kernel/apic/x2apic_uv_x.c @@ -144,33 +144,60 @@ static void __init uv_set_apicid_hibit(void) static int __init uv_acpi_madt_oem_check(char *oem_id, char *oem_table_id) { - int pnodeid, is_uv1, is_uv2, is_uv3; - - is_uv1 = !strcmp(oem_id, "SGI"); - is_uv2 = !strcmp(oem_id, "SGI2"); - is_uv3 = !strncmp(oem_id, "SGI3", 4); /* there are varieties of UV3 */ - if (is_uv1 || is_uv2 || is_uv3) { - uv_hub_info->hub_revision = - (is_uv1 ? UV1_HUB_REVISION_BASE : - (is_uv2 ? UV2_HUB_REVISION_BASE : - UV3_HUB_REVISION_BASE)); - pnodeid = early_get_pnodeid(); - early_get_apic_pnode_shift(); - x86_platform.is_untracked_pat_range = uv_is_untracked_pat_range; - x86_platform.nmi_init = uv_nmi_init; - if (!strcmp(oem_table_id, "UVL")) - uv_system_type = UV_LEGACY_APIC; - else if (!strcmp(oem_table_id, "UVX")) - uv_system_type = UV_X2APIC; - else if (!strcmp(oem_table_id, "UVH")) { - __this_cpu_write(x2apic_extra_bits, - pnodeid << uvh_apicid.s.pnode_shift); - uv_system_type = UV_NON_UNIQUE_APIC; - uv_set_apicid_hibit(); - return 1; - } + int pnodeid; + int uv_apic; + + if (strncmp(oem_id, "SGI", 3) != 0) + return 0; + + /* + * Determine UV arch type. + * SGI: UV100/1000 + * SGI2: UV2000/3000 + * SGI3: UV300 (truncated to 4 chars because of different varieties) + */ + uv_hub_info->hub_revision = + !strncmp(oem_id, "SGI3", 4) ? UV3_HUB_REVISION_BASE : + !strcmp(oem_id, "SGI2") ? UV2_HUB_REVISION_BASE : + !strcmp(oem_id, "SGI") ? UV1_HUB_REVISION_BASE : 0; + + if (uv_hub_info->hub_revision == 0) + goto badbios; + + pnodeid = early_get_pnodeid(); + early_get_apic_pnode_shift(); + x86_platform.is_untracked_pat_range = uv_is_untracked_pat_range; + x86_platform.nmi_init = uv_nmi_init; + + if (!strcmp(oem_table_id, "UVX")) { /* most common */ + uv_system_type = UV_X2APIC; + uv_apic = 0; + + } else if (!strcmp(oem_table_id, "UVH")) { /* only UV1 systems */ + uv_system_type = UV_NON_UNIQUE_APIC; + __this_cpu_write(x2apic_extra_bits, + pnodeid << uvh_apicid.s.pnode_shift); + uv_set_apicid_hibit(); + uv_apic = 1; + + } else if (!strcmp(oem_table_id, "UVL")) { /* only used for */ + uv_system_type = UV_LEGACY_APIC; /* very small systems */ + uv_apic = 0; + + } else { + goto badbios; } - return 0; + + pr_info("UV: OEM IDs %s/%s, System/HUB Types %d/%d, uv_apic %d\n", + oem_id, oem_table_id, uv_system_type, + uv_min_hub_revision_id, uv_apic); + + return uv_apic; + +badbios: + pr_err("UV: OEM_ID:%s OEM_TABLE_ID:%s\n", oem_id, oem_table_id); + pr_err("Current BIOS not supported, update kernel and/or BIOS\n"); + BUG(); } enum uv_system_type get_uv_system_type(void) @@ -854,10 +881,14 @@ void __init uv_system_init(void) unsigned long mmr_base, present, paddr; unsigned short pnode_mask; unsigned char n_lshift; - char *hub = (is_uv1_hub() ? "UV1" : - (is_uv2_hub() ? "UV2" : - "UV3")); + char *hub = (is_uv1_hub() ? "UV100/1000" : + (is_uv2_hub() ? "UV2000/3000" : + (is_uv3_hub() ? "UV300" : NULL))); + if (!hub) { + pr_err("UV: Unknown/unsupported UV hub\n"); + return; + } pr_info("UV: Found %s hub\n", hub); map_low_mmrs(); diff --git a/arch/x86/kernel/asm-offsets_32.c b/arch/x86/kernel/asm-offsets_32.c index 3b3b9d33ac1d..47703aed74cf 100644 --- a/arch/x86/kernel/asm-offsets_32.c +++ b/arch/x86/kernel/asm-offsets_32.c @@ -68,7 +68,7 @@ void foo(void) /* Offset from the sysenter stack to tss.sp0 */ DEFINE(TSS_sysenter_sp0, offsetof(struct tss_struct, x86_tss.sp0) - - sizeof(struct tss_struct)); + offsetofend(struct tss_struct, SYSENTER_stack)); #if defined(CONFIG_LGUEST) || defined(CONFIG_LGUEST_GUEST) || defined(CONFIG_LGUEST_MODULE) BLANK(); diff --git a/arch/x86/kernel/asm-offsets_64.c b/arch/x86/kernel/asm-offsets_64.c index fdcbb4d27c9f..5ce6f2da8763 100644 --- a/arch/x86/kernel/asm-offsets_64.c +++ b/arch/x86/kernel/asm-offsets_64.c @@ -81,6 +81,7 @@ int main(void) #undef ENTRY OFFSET(TSS_ist, tss_struct, x86_tss.ist); + OFFSET(TSS_sp0, tss_struct, x86_tss.sp0); BLANK(); DEFINE(__NR_syscall_max, sizeof(syscalls_64) - 1); diff --git a/arch/x86/kernel/cpu/amd.c b/arch/x86/kernel/cpu/amd.c index a220239cea65..dd9e50500297 100644 --- a/arch/x86/kernel/cpu/amd.c +++ b/arch/x86/kernel/cpu/amd.c @@ -711,6 +711,11 @@ static void init_amd(struct cpuinfo_x86 *c) set_cpu_bug(c, X86_BUG_AMD_APIC_C1E); rdmsr_safe(MSR_AMD64_PATCH_LEVEL, &c->microcode, &dummy); + + /* 3DNow or LM implies PREFETCHW */ + if (!cpu_has(c, X86_FEATURE_3DNOWPREFETCH)) + if (cpu_has(c, X86_FEATURE_3DNOW) || cpu_has(c, X86_FEATURE_LM)) + set_cpu_cap(c, X86_FEATURE_3DNOWPREFETCH); } #ifdef CONFIG_X86_32 diff --git a/arch/x86/kernel/cpu/common.c b/arch/x86/kernel/cpu/common.c index 2346c95c6ab1..3f70538012e2 100644 --- a/arch/x86/kernel/cpu/common.c +++ b/arch/x86/kernel/cpu/common.c @@ -959,38 +959,37 @@ static void identify_cpu(struct cpuinfo_x86 *c) #endif } -#ifdef CONFIG_X86_64 -#ifdef CONFIG_IA32_EMULATION -/* May not be __init: called during resume */ -static void syscall32_cpu_init(void) -{ - /* Load these always in case some future AMD CPU supports - SYSENTER from compat mode too. */ - wrmsrl_safe(MSR_IA32_SYSENTER_CS, (u64)__KERNEL_CS); - wrmsrl_safe(MSR_IA32_SYSENTER_ESP, 0ULL); - wrmsrl_safe(MSR_IA32_SYSENTER_EIP, (u64)ia32_sysenter_target); - - wrmsrl(MSR_CSTAR, ia32_cstar_target); -} -#endif /* CONFIG_IA32_EMULATION */ -#endif /* CONFIG_X86_64 */ - +/* + * Set up the CPU state needed to execute SYSENTER/SYSEXIT instructions + * on 32-bit kernels: + */ #ifdef CONFIG_X86_32 void enable_sep_cpu(void) { - int cpu = get_cpu(); - struct tss_struct *tss = &per_cpu(init_tss, cpu); + struct tss_struct *tss; + int cpu; - if (!boot_cpu_has(X86_FEATURE_SEP)) { - put_cpu(); - return; - } + cpu = get_cpu(); + tss = &per_cpu(cpu_tss, cpu); + + if (!boot_cpu_has(X86_FEATURE_SEP)) + goto out; + + /* + * We cache MSR_IA32_SYSENTER_CS's value in the TSS's ss1 field -- + * see the big comment in struct x86_hw_tss's definition. + */ tss->x86_tss.ss1 = __KERNEL_CS; - tss->x86_tss.sp1 = sizeof(struct tss_struct) + (unsigned long) tss; - wrmsr(MSR_IA32_SYSENTER_CS, __KERNEL_CS, 0); - wrmsr(MSR_IA32_SYSENTER_ESP, tss->x86_tss.sp1, 0); - wrmsr(MSR_IA32_SYSENTER_EIP, (unsigned long) ia32_sysenter_target, 0); + wrmsr(MSR_IA32_SYSENTER_CS, tss->x86_tss.ss1, 0); + + wrmsr(MSR_IA32_SYSENTER_ESP, + (unsigned long)tss + offsetofend(struct tss_struct, SYSENTER_stack), + 0); + + wrmsr(MSR_IA32_SYSENTER_EIP, (unsigned long)ia32_sysenter_target, 0); + +out: put_cpu(); } #endif @@ -1118,7 +1117,7 @@ static __init int setup_disablecpuid(char *arg) __setup("clearcpuid=", setup_disablecpuid); DEFINE_PER_CPU(unsigned long, kernel_stack) = - (unsigned long)&init_thread_union - KERNEL_STACK_OFFSET + THREAD_SIZE; + (unsigned long)&init_thread_union + THREAD_SIZE; EXPORT_PER_CPU_SYMBOL(kernel_stack); #ifdef CONFIG_X86_64 @@ -1130,8 +1129,8 @@ DEFINE_PER_CPU_FIRST(union irq_stack_union, irq_stack_union) __aligned(PAGE_SIZE) __visible; /* - * The following four percpu variables are hot. Align current_task to - * cacheline size such that all four fall in the same cacheline. + * The following percpu variables are hot. Align current_task to + * cacheline size such that they fall in the same cacheline. */ DEFINE_PER_CPU(struct task_struct *, current_task) ____cacheline_aligned = &init_task; @@ -1171,10 +1170,23 @@ void syscall_init(void) */ wrmsrl(MSR_STAR, ((u64)__USER32_CS)<<48 | ((u64)__KERNEL_CS)<<32); wrmsrl(MSR_LSTAR, system_call); - wrmsrl(MSR_CSTAR, ignore_sysret); #ifdef CONFIG_IA32_EMULATION - syscall32_cpu_init(); + wrmsrl(MSR_CSTAR, ia32_cstar_target); + /* + * This only works on Intel CPUs. + * On AMD CPUs these MSRs are 32-bit, CPU truncates MSR_IA32_SYSENTER_EIP. + * This does not cause SYSENTER to jump to the wrong location, because + * AMD doesn't allow SYSENTER in long mode (either 32- or 64-bit). + */ + wrmsrl_safe(MSR_IA32_SYSENTER_CS, (u64)__KERNEL_CS); + wrmsrl_safe(MSR_IA32_SYSENTER_ESP, 0ULL); + wrmsrl_safe(MSR_IA32_SYSENTER_EIP, (u64)ia32_sysenter_target); +#else + wrmsrl(MSR_CSTAR, ignore_sysret); + wrmsrl_safe(MSR_IA32_SYSENTER_CS, (u64)GDT_ENTRY_INVALID_SEG); + wrmsrl_safe(MSR_IA32_SYSENTER_ESP, 0ULL); + wrmsrl_safe(MSR_IA32_SYSENTER_EIP, 0ULL); #endif /* Flags to clear on syscall */ @@ -1226,6 +1238,15 @@ DEFINE_PER_CPU(int, __preempt_count) = INIT_PREEMPT_COUNT; EXPORT_PER_CPU_SYMBOL(__preempt_count); DEFINE_PER_CPU(struct task_struct *, fpu_owner_task); +/* + * On x86_32, vm86 modifies tss.sp0, so sp0 isn't a reliable way to find + * the top of the kernel stack. Use an extra percpu variable to track the + * top of the kernel stack directly. + */ +DEFINE_PER_CPU(unsigned long, cpu_current_top_of_stack) = + (unsigned long)&init_thread_union + THREAD_SIZE; +EXPORT_PER_CPU_SYMBOL(cpu_current_top_of_stack); + #ifdef CONFIG_CC_STACKPROTECTOR DEFINE_PER_CPU_ALIGNED(struct stack_canary, stack_canary); #endif @@ -1307,7 +1328,7 @@ void cpu_init(void) */ load_ucode_ap(); - t = &per_cpu(init_tss, cpu); + t = &per_cpu(cpu_tss, cpu); oist = &per_cpu(orig_ist, cpu); #ifdef CONFIG_NUMA @@ -1391,7 +1412,7 @@ void cpu_init(void) { int cpu = smp_processor_id(); struct task_struct *curr = current; - struct tss_struct *t = &per_cpu(init_tss, cpu); + struct tss_struct *t = &per_cpu(cpu_tss, cpu); struct thread_struct *thread = &curr->thread; wait_for_master_cpu(cpu); diff --git a/arch/x86/kernel/cpu/intel_cacheinfo.c b/arch/x86/kernel/cpu/intel_cacheinfo.c index 659643376dbf..edcb0e28c336 100644 --- a/arch/x86/kernel/cpu/intel_cacheinfo.c +++ b/arch/x86/kernel/cpu/intel_cacheinfo.c @@ -7,16 +7,14 @@ * Andi Kleen / Andreas Herrmann : CPUID4 emulation on AMD. */ -#include <linux/init.h> #include <linux/slab.h> -#include <linux/device.h> -#include <linux/compiler.h> +#include <linux/cacheinfo.h> #include <linux/cpu.h> #include <linux/sched.h> +#include <linux/sysfs.h> #include <linux/pci.h> #include <asm/processor.h> -#include <linux/smp.h> #include <asm/amd_nb.h> #include <asm/smp.h> @@ -116,10 +114,10 @@ static const struct _cache_table cache_table[] = enum _cache_type { - CACHE_TYPE_NULL = 0, - CACHE_TYPE_DATA = 1, - CACHE_TYPE_INST = 2, - CACHE_TYPE_UNIFIED = 3 + CTYPE_NULL = 0, + CTYPE_DATA = 1, + CTYPE_INST = 2, + CTYPE_UNIFIED = 3 }; union _cpuid4_leaf_eax { @@ -159,11 +157,6 @@ struct _cpuid4_info_regs { struct amd_northbridge *nb; }; -struct _cpuid4_info { - struct _cpuid4_info_regs base; - DECLARE_BITMAP(shared_cpu_map, NR_CPUS); -}; - unsigned short num_cache_leaves; /* AMD doesn't have CPUID4. Emulate it here to report the same @@ -220,6 +213,13 @@ static const unsigned short assocs[] = { static const unsigned char levels[] = { 1, 1, 2, 3 }; static const unsigned char types[] = { 1, 2, 3, 3 }; +static const enum cache_type cache_type_map[] = { + [CTYPE_NULL] = CACHE_TYPE_NOCACHE, + [CTYPE_DATA] = CACHE_TYPE_DATA, + [CTYPE_INST] = CACHE_TYPE_INST, + [CTYPE_UNIFIED] = CACHE_TYPE_UNIFIED, +}; + static void amd_cpuid4(int leaf, union _cpuid4_leaf_eax *eax, union _cpuid4_leaf_ebx *ebx, @@ -291,14 +291,8 @@ amd_cpuid4(int leaf, union _cpuid4_leaf_eax *eax, (ebx->split.ways_of_associativity + 1) - 1; } -struct _cache_attr { - struct attribute attr; - ssize_t (*show)(struct _cpuid4_info *, char *, unsigned int); - ssize_t (*store)(struct _cpuid4_info *, const char *, size_t count, - unsigned int); -}; - #if defined(CONFIG_AMD_NB) && defined(CONFIG_SYSFS) + /* * L3 cache descriptors */ @@ -325,20 +319,6 @@ static void amd_calc_l3_indices(struct amd_northbridge *nb) l3->indices = (max(max3(sc0, sc1, sc2), sc3) << 10) - 1; } -static void amd_init_l3_cache(struct _cpuid4_info_regs *this_leaf, int index) -{ - int node; - - /* only for L3, and not in virtualized environments */ - if (index < 3) - return; - - node = amd_get_nb_id(smp_processor_id()); - this_leaf->nb = node_to_amd_nb(node); - if (this_leaf->nb && !this_leaf->nb->l3_cache.indices) - amd_calc_l3_indices(this_leaf->nb); -} - /* * check whether a slot used for disabling an L3 index is occupied. * @l3: L3 cache descriptor @@ -359,15 +339,13 @@ int amd_get_l3_disable_slot(struct amd_northbridge *nb, unsigned slot) return -1; } -static ssize_t show_cache_disable(struct _cpuid4_info *this_leaf, char *buf, +static ssize_t show_cache_disable(struct cacheinfo *this_leaf, char *buf, unsigned int slot) { int index; + struct amd_northbridge *nb = this_leaf->priv; - if (!this_leaf->base.nb || !amd_nb_has_feature(AMD_NB_L3_INDEX_DISABLE)) - return -EINVAL; - - index = amd_get_l3_disable_slot(this_leaf->base.nb, slot); + index = amd_get_l3_disable_slot(nb, slot); if (index >= 0) return sprintf(buf, "%d\n", index); @@ -376,9 +354,10 @@ static ssize_t show_cache_disable(struct _cpuid4_info *this_leaf, char *buf, #define SHOW_CACHE_DISABLE(slot) \ static ssize_t \ -show_cache_disable_##slot(struct _cpuid4_info *this_leaf, char *buf, \ - unsigned int cpu) \ +cache_disable_##slot##_show(struct device *dev, \ + struct device_attribute *attr, char *buf) \ { \ + struct cacheinfo *this_leaf = dev_get_drvdata(dev); \ return show_cache_disable(this_leaf, buf, slot); \ } SHOW_CACHE_DISABLE(0) @@ -446,25 +425,23 @@ int amd_set_l3_disable_slot(struct amd_northbridge *nb, int cpu, unsigned slot, return 0; } -static ssize_t store_cache_disable(struct _cpuid4_info *this_leaf, - const char *buf, size_t count, - unsigned int slot) +static ssize_t store_cache_disable(struct cacheinfo *this_leaf, + const char *buf, size_t count, + unsigned int slot) { unsigned long val = 0; int cpu, err = 0; + struct amd_northbridge *nb = this_leaf->priv; if (!capable(CAP_SYS_ADMIN)) return -EPERM; - if (!this_leaf->base.nb || !amd_nb_has_feature(AMD_NB_L3_INDEX_DISABLE)) - return -EINVAL; - - cpu = cpumask_first(to_cpumask(this_leaf->shared_cpu_map)); + cpu = cpumask_first(&this_leaf->shared_cpu_map); if (kstrtoul(buf, 10, &val) < 0) return -EINVAL; - err = amd_set_l3_disable_slot(this_leaf->base.nb, cpu, slot, val); + err = amd_set_l3_disable_slot(nb, cpu, slot, val); if (err) { if (err == -EEXIST) pr_warning("L3 slot %d in use/index already disabled!\n", @@ -476,41 +453,36 @@ static ssize_t store_cache_disable(struct _cpuid4_info *this_leaf, #define STORE_CACHE_DISABLE(slot) \ static ssize_t \ -store_cache_disable_##slot(struct _cpuid4_info *this_leaf, \ - const char *buf, size_t count, \ - unsigned int cpu) \ +cache_disable_##slot##_store(struct device *dev, \ + struct device_attribute *attr, \ + const char *buf, size_t count) \ { \ + struct cacheinfo *this_leaf = dev_get_drvdata(dev); \ return store_cache_disable(this_leaf, buf, count, slot); \ } STORE_CACHE_DISABLE(0) STORE_CACHE_DISABLE(1) -static struct _cache_attr cache_disable_0 = __ATTR(cache_disable_0, 0644, - show_cache_disable_0, store_cache_disable_0); -static struct _cache_attr cache_disable_1 = __ATTR(cache_disable_1, 0644, - show_cache_disable_1, store_cache_disable_1); - -static ssize_t -show_subcaches(struct _cpuid4_info *this_leaf, char *buf, unsigned int cpu) +static ssize_t subcaches_show(struct device *dev, + struct device_attribute *attr, char *buf) { - if (!this_leaf->base.nb || !amd_nb_has_feature(AMD_NB_L3_PARTITIONING)) - return -EINVAL; + struct cacheinfo *this_leaf = dev_get_drvdata(dev); + int cpu = cpumask_first(&this_leaf->shared_cpu_map); return sprintf(buf, "%x\n", amd_get_subcaches(cpu)); } -static ssize_t -store_subcaches(struct _cpuid4_info *this_leaf, const char *buf, size_t count, - unsigned int cpu) +static ssize_t subcaches_store(struct device *dev, + struct device_attribute *attr, + const char *buf, size_t count) { + struct cacheinfo *this_leaf = dev_get_drvdata(dev); + int cpu = cpumask_first(&this_leaf->shared_cpu_map); unsigned long val; if (!capable(CAP_SYS_ADMIN)) return -EPERM; - if (!this_leaf->base.nb || !amd_nb_has_feature(AMD_NB_L3_PARTITIONING)) - return -EINVAL; - if (kstrtoul(buf, 16, &val) < 0) return -EINVAL; @@ -520,9 +492,92 @@ store_subcaches(struct _cpuid4_info *this_leaf, const char *buf, size_t count, return count; } -static struct _cache_attr subcaches = - __ATTR(subcaches, 0644, show_subcaches, store_subcaches); +static DEVICE_ATTR_RW(cache_disable_0); +static DEVICE_ATTR_RW(cache_disable_1); +static DEVICE_ATTR_RW(subcaches); + +static umode_t +cache_private_attrs_is_visible(struct kobject *kobj, + struct attribute *attr, int unused) +{ + struct device *dev = kobj_to_dev(kobj); + struct cacheinfo *this_leaf = dev_get_drvdata(dev); + umode_t mode = attr->mode; + + if (!this_leaf->priv) + return 0; + + if ((attr == &dev_attr_subcaches.attr) && + amd_nb_has_feature(AMD_NB_L3_PARTITIONING)) + return mode; + + if ((attr == &dev_attr_cache_disable_0.attr || + attr == &dev_attr_cache_disable_1.attr) && + amd_nb_has_feature(AMD_NB_L3_INDEX_DISABLE)) + return mode; + + return 0; +} + +static struct attribute_group cache_private_group = { + .is_visible = cache_private_attrs_is_visible, +}; + +static void init_amd_l3_attrs(void) +{ + int n = 1; + static struct attribute **amd_l3_attrs; + + if (amd_l3_attrs) /* already initialized */ + return; + + if (amd_nb_has_feature(AMD_NB_L3_INDEX_DISABLE)) + n += 2; + if (amd_nb_has_feature(AMD_NB_L3_PARTITIONING)) + n += 1; + + amd_l3_attrs = kcalloc(n, sizeof(*amd_l3_attrs), GFP_KERNEL); + if (!amd_l3_attrs) + return; + + n = 0; + if (amd_nb_has_feature(AMD_NB_L3_INDEX_DISABLE)) { + amd_l3_attrs[n++] = &dev_attr_cache_disable_0.attr; + amd_l3_attrs[n++] = &dev_attr_cache_disable_1.attr; + } + if (amd_nb_has_feature(AMD_NB_L3_PARTITIONING)) + amd_l3_attrs[n++] = &dev_attr_subcaches.attr; + cache_private_group.attrs = amd_l3_attrs; +} + +const struct attribute_group * +cache_get_priv_group(struct cacheinfo *this_leaf) +{ + struct amd_northbridge *nb = this_leaf->priv; + + if (this_leaf->level < 3 || !nb) + return NULL; + + if (nb && nb->l3_cache.indices) + init_amd_l3_attrs(); + + return &cache_private_group; +} + +static void amd_init_l3_cache(struct _cpuid4_info_regs *this_leaf, int index) +{ + int node; + + /* only for L3, and not in virtualized environments */ + if (index < 3) + return; + + node = amd_get_nb_id(smp_processor_id()); + this_leaf->nb = node_to_amd_nb(node); + if (this_leaf->nb && !this_leaf->nb->l3_cache.indices) + amd_calc_l3_indices(this_leaf->nb); +} #else #define amd_init_l3_cache(x, y) #endif /* CONFIG_AMD_NB && CONFIG_SYSFS */ @@ -546,7 +601,7 @@ cpuid4_cache_lookup_regs(int index, struct _cpuid4_info_regs *this_leaf) cpuid_count(4, index, &eax.full, &ebx.full, &ecx.full, &edx); } - if (eax.split.type == CACHE_TYPE_NULL) + if (eax.split.type == CTYPE_NULL) return -EIO; /* better error ? */ this_leaf->eax = eax; @@ -575,7 +630,7 @@ static int find_num_cache_leaves(struct cpuinfo_x86 *c) /* Do cpuid(op) loop to find out num_cache_leaves */ cpuid_count(op, i, &eax, &ebx, &ecx, &edx); cache_eax.full = eax; - } while (cache_eax.split.type != CACHE_TYPE_NULL); + } while (cache_eax.split.type != CTYPE_NULL); return i; } @@ -626,9 +681,9 @@ unsigned int init_intel_cacheinfo(struct cpuinfo_x86 *c) switch (this_leaf.eax.split.level) { case 1: - if (this_leaf.eax.split.type == CACHE_TYPE_DATA) + if (this_leaf.eax.split.type == CTYPE_DATA) new_l1d = this_leaf.size/1024; - else if (this_leaf.eax.split.type == CACHE_TYPE_INST) + else if (this_leaf.eax.split.type == CTYPE_INST) new_l1i = this_leaf.size/1024; break; case 2: @@ -747,55 +802,52 @@ unsigned int init_intel_cacheinfo(struct cpuinfo_x86 *c) return l2; } -#ifdef CONFIG_SYSFS - -/* pointer to _cpuid4_info array (for each cache leaf) */ -static DEFINE_PER_CPU(struct _cpuid4_info *, ici_cpuid4_info); -#define CPUID4_INFO_IDX(x, y) (&((per_cpu(ici_cpuid4_info, x))[y])) - -#ifdef CONFIG_SMP - -static int cache_shared_amd_cpu_map_setup(unsigned int cpu, int index) +static int __cache_amd_cpumap_setup(unsigned int cpu, int index, + struct _cpuid4_info_regs *base) { - struct _cpuid4_info *this_leaf; + struct cpu_cacheinfo *this_cpu_ci = get_cpu_cacheinfo(cpu); + struct cacheinfo *this_leaf; int i, sibling; if (cpu_has_topoext) { unsigned int apicid, nshared, first, last; - if (!per_cpu(ici_cpuid4_info, cpu)) - return 0; - - this_leaf = CPUID4_INFO_IDX(cpu, index); - nshared = this_leaf->base.eax.split.num_threads_sharing + 1; + this_leaf = this_cpu_ci->info_list + index; + nshared = base->eax.split.num_threads_sharing + 1; apicid = cpu_data(cpu).apicid; first = apicid - (apicid % nshared); last = first + nshared - 1; for_each_online_cpu(i) { + this_cpu_ci = get_cpu_cacheinfo(i); + if (!this_cpu_ci->info_list) + continue; + apicid = cpu_data(i).apicid; if ((apicid < first) || (apicid > last)) continue; - if (!per_cpu(ici_cpuid4_info, i)) - continue; - this_leaf = CPUID4_INFO_IDX(i, index); + + this_leaf = this_cpu_ci->info_list + index; for_each_online_cpu(sibling) { apicid = cpu_data(sibling).apicid; if ((apicid < first) || (apicid > last)) continue; - set_bit(sibling, this_leaf->shared_cpu_map); + cpumask_set_cpu(sibling, + &this_leaf->shared_cpu_map); } } } else if (index == 3) { for_each_cpu(i, cpu_llc_shared_mask(cpu)) { - if (!per_cpu(ici_cpuid4_info, i)) + this_cpu_ci = get_cpu_cacheinfo(i); + if (!this_cpu_ci->info_list) continue; - this_leaf = CPUID4_INFO_IDX(i, index); + this_leaf = this_cpu_ci->info_list + index; for_each_cpu(sibling, cpu_llc_shared_mask(cpu)) { if (!cpu_online(sibling)) continue; - set_bit(sibling, this_leaf->shared_cpu_map); + cpumask_set_cpu(sibling, + &this_leaf->shared_cpu_map); } } } else @@ -804,457 +856,86 @@ static int cache_shared_amd_cpu_map_setup(unsigned int cpu, int index) return 1; } -static void cache_shared_cpu_map_setup(unsigned int cpu, int index) +static void __cache_cpumap_setup(unsigned int cpu, int index, + struct _cpuid4_info_regs *base) { - struct _cpuid4_info *this_leaf, *sibling_leaf; + struct cpu_cacheinfo *this_cpu_ci = get_cpu_cacheinfo(cpu); + struct cacheinfo *this_leaf, *sibling_leaf; unsigned long num_threads_sharing; int index_msb, i; struct cpuinfo_x86 *c = &cpu_data(cpu); if (c->x86_vendor == X86_VENDOR_AMD) { - if (cache_shared_amd_cpu_map_setup(cpu, index)) + if (__cache_amd_cpumap_setup(cpu, index, base)) return; } - this_leaf = CPUID4_INFO_IDX(cpu, index); - num_threads_sharing = 1 + this_leaf->base.eax.split.num_threads_sharing; + this_leaf = this_cpu_ci->info_list + index; + num_threads_sharing = 1 + base->eax.split.num_threads_sharing; + cpumask_set_cpu(cpu, &this_leaf->shared_cpu_map); if (num_threads_sharing == 1) - cpumask_set_cpu(cpu, to_cpumask(this_leaf->shared_cpu_map)); - else { - index_msb = get_count_order(num_threads_sharing); - - for_each_online_cpu(i) { - if (cpu_data(i).apicid >> index_msb == - c->apicid >> index_msb) { - cpumask_set_cpu(i, - to_cpumask(this_leaf->shared_cpu_map)); - if (i != cpu && per_cpu(ici_cpuid4_info, i)) { - sibling_leaf = - CPUID4_INFO_IDX(i, index); - cpumask_set_cpu(cpu, to_cpumask( - sibling_leaf->shared_cpu_map)); - } - } - } - } -} -static void cache_remove_shared_cpu_map(unsigned int cpu, int index) -{ - struct _cpuid4_info *this_leaf, *sibling_leaf; - int sibling; - - this_leaf = CPUID4_INFO_IDX(cpu, index); - for_each_cpu(sibling, to_cpumask(this_leaf->shared_cpu_map)) { - sibling_leaf = CPUID4_INFO_IDX(sibling, index); - cpumask_clear_cpu(cpu, - to_cpumask(sibling_leaf->shared_cpu_map)); - } -} -#else -static void cache_shared_cpu_map_setup(unsigned int cpu, int index) -{ -} - -static void cache_remove_shared_cpu_map(unsigned int cpu, int index) -{ -} -#endif - -static void free_cache_attributes(unsigned int cpu) -{ - int i; - - for (i = 0; i < num_cache_leaves; i++) - cache_remove_shared_cpu_map(cpu, i); - - kfree(per_cpu(ici_cpuid4_info, cpu)); - per_cpu(ici_cpuid4_info, cpu) = NULL; -} - -static void get_cpu_leaves(void *_retval) -{ - int j, *retval = _retval, cpu = smp_processor_id(); + return; - /* Do cpuid and store the results */ - for (j = 0; j < num_cache_leaves; j++) { - struct _cpuid4_info *this_leaf = CPUID4_INFO_IDX(cpu, j); + index_msb = get_count_order(num_threads_sharing); - *retval = cpuid4_cache_lookup_regs(j, &this_leaf->base); - if (unlikely(*retval < 0)) { - int i; + for_each_online_cpu(i) + if (cpu_data(i).apicid >> index_msb == c->apicid >> index_msb) { + struct cpu_cacheinfo *sib_cpu_ci = get_cpu_cacheinfo(i); - for (i = 0; i < j; i++) - cache_remove_shared_cpu_map(cpu, i); - break; + if (i == cpu || !sib_cpu_ci->info_list) + continue;/* skip if itself or no cacheinfo */ + sibling_leaf = sib_cpu_ci->info_list + index; + cpumask_set_cpu(i, &this_leaf->shared_cpu_map); + cpumask_set_cpu(cpu, &sibling_leaf->shared_cpu_map); } - cache_shared_cpu_map_setup(cpu, j); - } } -static int detect_cache_attributes(unsigned int cpu) +static void ci_leaf_init(struct cacheinfo *this_leaf, + struct _cpuid4_info_regs *base) { - int retval; - - if (num_cache_leaves == 0) - return -ENOENT; - - per_cpu(ici_cpuid4_info, cpu) = kzalloc( - sizeof(struct _cpuid4_info) * num_cache_leaves, GFP_KERNEL); - if (per_cpu(ici_cpuid4_info, cpu) == NULL) - return -ENOMEM; - - smp_call_function_single(cpu, get_cpu_leaves, &retval, true); - if (retval) { - kfree(per_cpu(ici_cpuid4_info, cpu)); - per_cpu(ici_cpuid4_info, cpu) = NULL; - } - - return retval; + this_leaf->level = base->eax.split.level; + this_leaf->type = cache_type_map[base->eax.split.type]; + this_leaf->coherency_line_size = + base->ebx.split.coherency_line_size + 1; + this_leaf->ways_of_associativity = + base->ebx.split.ways_of_associativity + 1; + this_leaf->size = base->size; + this_leaf->number_of_sets = base->ecx.split.number_of_sets + 1; + this_leaf->physical_line_partition = + base->ebx.split.physical_line_partition + 1; + this_leaf->priv = base->nb; } -#include <linux/kobject.h> -#include <linux/sysfs.h> -#include <linux/cpu.h> - -/* pointer to kobject for cpuX/cache */ -static DEFINE_PER_CPU(struct kobject *, ici_cache_kobject); - -struct _index_kobject { - struct kobject kobj; - unsigned int cpu; - unsigned short index; -}; - -/* pointer to array of kobjects for cpuX/cache/indexY */ -static DEFINE_PER_CPU(struct _index_kobject *, ici_index_kobject); -#define INDEX_KOBJECT_PTR(x, y) (&((per_cpu(ici_index_kobject, x))[y])) - -#define show_one_plus(file_name, object, val) \ -static ssize_t show_##file_name(struct _cpuid4_info *this_leaf, char *buf, \ - unsigned int cpu) \ -{ \ - return sprintf(buf, "%lu\n", (unsigned long)this_leaf->object + val); \ -} - -show_one_plus(level, base.eax.split.level, 0); -show_one_plus(coherency_line_size, base.ebx.split.coherency_line_size, 1); -show_one_plus(physical_line_partition, base.ebx.split.physical_line_partition, 1); -show_one_plus(ways_of_associativity, base.ebx.split.ways_of_associativity, 1); -show_one_plus(number_of_sets, base.ecx.split.number_of_sets, 1); - -static ssize_t show_size(struct _cpuid4_info *this_leaf, char *buf, - unsigned int cpu) -{ - return sprintf(buf, "%luK\n", this_leaf->base.size / 1024); -} - -static ssize_t show_shared_cpu_map_func(struct _cpuid4_info *this_leaf, - int type, char *buf) -{ - const struct cpumask *mask = to_cpumask(this_leaf->shared_cpu_map); - int ret; - - if (type) - ret = scnprintf(buf, PAGE_SIZE - 1, "%*pbl", - cpumask_pr_args(mask)); - else - ret = scnprintf(buf, PAGE_SIZE - 1, "%*pb", - cpumask_pr_args(mask)); - buf[ret++] = '\n'; - buf[ret] = '\0'; - return ret; -} - -static inline ssize_t show_shared_cpu_map(struct _cpuid4_info *leaf, char *buf, - unsigned int cpu) +static int __init_cache_level(unsigned int cpu) { - return show_shared_cpu_map_func(leaf, 0, buf); -} - -static inline ssize_t show_shared_cpu_list(struct _cpuid4_info *leaf, char *buf, - unsigned int cpu) -{ - return show_shared_cpu_map_func(leaf, 1, buf); -} + struct cpu_cacheinfo *this_cpu_ci = get_cpu_cacheinfo(cpu); -static ssize_t show_type(struct _cpuid4_info *this_leaf, char *buf, - unsigned int cpu) -{ - switch (this_leaf->base.eax.split.type) { - case CACHE_TYPE_DATA: - return sprintf(buf, "Data\n"); - case CACHE_TYPE_INST: - return sprintf(buf, "Instruction\n"); - case CACHE_TYPE_UNIFIED: - return sprintf(buf, "Unified\n"); - default: - return sprintf(buf, "Unknown\n"); - } -} - -#define to_object(k) container_of(k, struct _index_kobject, kobj) -#define to_attr(a) container_of(a, struct _cache_attr, attr) - -#define define_one_ro(_name) \ -static struct _cache_attr _name = \ - __ATTR(_name, 0444, show_##_name, NULL) - -define_one_ro(level); -define_one_ro(type); -define_one_ro(coherency_line_size); -define_one_ro(physical_line_partition); -define_one_ro(ways_of_associativity); -define_one_ro(number_of_sets); -define_one_ro(size); -define_one_ro(shared_cpu_map); -define_one_ro(shared_cpu_list); - -static struct attribute *default_attrs[] = { - &type.attr, - &level.attr, - &coherency_line_size.attr, - &physical_line_partition.attr, - &ways_of_associativity.attr, - &number_of_sets.attr, - &size.attr, - &shared_cpu_map.attr, - &shared_cpu_list.attr, - NULL -}; - -#ifdef CONFIG_AMD_NB -static struct attribute **amd_l3_attrs(void) -{ - static struct attribute **attrs; - int n; - - if (attrs) - return attrs; - - n = ARRAY_SIZE(default_attrs); - - if (amd_nb_has_feature(AMD_NB_L3_INDEX_DISABLE)) - n += 2; - - if (amd_nb_has_feature(AMD_NB_L3_PARTITIONING)) - n += 1; - - attrs = kzalloc(n * sizeof (struct attribute *), GFP_KERNEL); - if (attrs == NULL) - return attrs = default_attrs; - - for (n = 0; default_attrs[n]; n++) - attrs[n] = default_attrs[n]; - - if (amd_nb_has_feature(AMD_NB_L3_INDEX_DISABLE)) { - attrs[n++] = &cache_disable_0.attr; - attrs[n++] = &cache_disable_1.attr; - } - - if (amd_nb_has_feature(AMD_NB_L3_PARTITIONING)) - attrs[n++] = &subcaches.attr; - - return attrs; -} -#endif - -static ssize_t show(struct kobject *kobj, struct attribute *attr, char *buf) -{ - struct _cache_attr *fattr = to_attr(attr); - struct _index_kobject *this_leaf = to_object(kobj); - ssize_t ret; - - ret = fattr->show ? - fattr->show(CPUID4_INFO_IDX(this_leaf->cpu, this_leaf->index), - buf, this_leaf->cpu) : - 0; - return ret; -} - -static ssize_t store(struct kobject *kobj, struct attribute *attr, - const char *buf, size_t count) -{ - struct _cache_attr *fattr = to_attr(attr); - struct _index_kobject *this_leaf = to_object(kobj); - ssize_t ret; - - ret = fattr->store ? - fattr->store(CPUID4_INFO_IDX(this_leaf->cpu, this_leaf->index), - buf, count, this_leaf->cpu) : - 0; - return ret; -} - -static const struct sysfs_ops sysfs_ops = { - .show = show, - .store = store, -}; - -static struct kobj_type ktype_cache = { - .sysfs_ops = &sysfs_ops, - .default_attrs = default_attrs, -}; - -static struct kobj_type ktype_percpu_entry = { - .sysfs_ops = &sysfs_ops, -}; - -static void cpuid4_cache_sysfs_exit(unsigned int cpu) -{ - kfree(per_cpu(ici_cache_kobject, cpu)); - kfree(per_cpu(ici_index_kobject, cpu)); - per_cpu(ici_cache_kobject, cpu) = NULL; - per_cpu(ici_index_kobject, cpu) = NULL; - free_cache_attributes(cpu); -} - -static int cpuid4_cache_sysfs_init(unsigned int cpu) -{ - int err; - - if (num_cache_leaves == 0) + if (!num_cache_leaves) return -ENOENT; - - err = detect_cache_attributes(cpu); - if (err) - return err; - - /* Allocate all required memory */ - per_cpu(ici_cache_kobject, cpu) = - kzalloc(sizeof(struct kobject), GFP_KERNEL); - if (unlikely(per_cpu(ici_cache_kobject, cpu) == NULL)) - goto err_out; - - per_cpu(ici_index_kobject, cpu) = kzalloc( - sizeof(struct _index_kobject) * num_cache_leaves, GFP_KERNEL); - if (unlikely(per_cpu(ici_index_kobject, cpu) == NULL)) - goto err_out; - + if (!this_cpu_ci) + return -EINVAL; + this_cpu_ci->num_levels = 3; + this_cpu_ci->num_leaves = num_cache_leaves; return 0; - -err_out: - cpuid4_cache_sysfs_exit(cpu); - return -ENOMEM; } -static DECLARE_BITMAP(cache_dev_map, NR_CPUS); - -/* Add/Remove cache interface for CPU device */ -static int cache_add_dev(struct device *dev) +static int __populate_cache_leaves(unsigned int cpu) { - unsigned int cpu = dev->id; - unsigned long i, j; - struct _index_kobject *this_object; - struct _cpuid4_info *this_leaf; - int retval; - - retval = cpuid4_cache_sysfs_init(cpu); - if (unlikely(retval < 0)) - return retval; - - retval = kobject_init_and_add(per_cpu(ici_cache_kobject, cpu), - &ktype_percpu_entry, - &dev->kobj, "%s", "cache"); - if (retval < 0) { - cpuid4_cache_sysfs_exit(cpu); - return retval; - } + unsigned int idx, ret; + struct cpu_cacheinfo *this_cpu_ci = get_cpu_cacheinfo(cpu); + struct cacheinfo *this_leaf = this_cpu_ci->info_list; + struct _cpuid4_info_regs id4_regs = {}; - for (i = 0; i < num_cache_leaves; i++) { - this_object = INDEX_KOBJECT_PTR(cpu, i); - this_object->cpu = cpu; - this_object->index = i; - - this_leaf = CPUID4_INFO_IDX(cpu, i); - - ktype_cache.default_attrs = default_attrs; -#ifdef CONFIG_AMD_NB - if (this_leaf->base.nb) - ktype_cache.default_attrs = amd_l3_attrs(); -#endif - retval = kobject_init_and_add(&(this_object->kobj), - &ktype_cache, - per_cpu(ici_cache_kobject, cpu), - "index%1lu", i); - if (unlikely(retval)) { - for (j = 0; j < i; j++) - kobject_put(&(INDEX_KOBJECT_PTR(cpu, j)->kobj)); - kobject_put(per_cpu(ici_cache_kobject, cpu)); - cpuid4_cache_sysfs_exit(cpu); - return retval; - } - kobject_uevent(&(this_object->kobj), KOBJ_ADD); + for (idx = 0; idx < this_cpu_ci->num_leaves; idx++) { + ret = cpuid4_cache_lookup_regs(idx, &id4_regs); + if (ret) + return ret; + ci_leaf_init(this_leaf++, &id4_regs); + __cache_cpumap_setup(cpu, idx, &id4_regs); } - cpumask_set_cpu(cpu, to_cpumask(cache_dev_map)); - - kobject_uevent(per_cpu(ici_cache_kobject, cpu), KOBJ_ADD); return 0; } -static void cache_remove_dev(struct device *dev) -{ - unsigned int cpu = dev->id; - unsigned long i; - - if (per_cpu(ici_cpuid4_info, cpu) == NULL) - return; - if (!cpumask_test_cpu(cpu, to_cpumask(cache_dev_map))) - return; - cpumask_clear_cpu(cpu, to_cpumask(cache_dev_map)); - - for (i = 0; i < num_cache_leaves; i++) - kobject_put(&(INDEX_KOBJECT_PTR(cpu, i)->kobj)); - kobject_put(per_cpu(ici_cache_kobject, cpu)); - cpuid4_cache_sysfs_exit(cpu); -} - -static int cacheinfo_cpu_callback(struct notifier_block *nfb, - unsigned long action, void *hcpu) -{ - unsigned int cpu = (unsigned long)hcpu; - struct device *dev; - - dev = get_cpu_device(cpu); - switch (action) { - case CPU_ONLINE: - case CPU_ONLINE_FROZEN: - cache_add_dev(dev); - break; - case CPU_DEAD: - case CPU_DEAD_FROZEN: - cache_remove_dev(dev); - break; - } - return NOTIFY_OK; -} - -static struct notifier_block cacheinfo_cpu_notifier = { - .notifier_call = cacheinfo_cpu_callback, -}; - -static int __init cache_sysfs_init(void) -{ - int i, err = 0; - - if (num_cache_leaves == 0) - return 0; - - cpu_notifier_register_begin(); - for_each_online_cpu(i) { - struct device *dev = get_cpu_device(i); - - err = cache_add_dev(dev); - if (err) - goto out; - } - __register_hotcpu_notifier(&cacheinfo_cpu_notifier); - -out: - cpu_notifier_register_done(); - return err; -} - -device_initcall(cache_sysfs_init); - -#endif +DEFINE_SMP_CALL_CACHE_FUNCTION(init_cache_level) +DEFINE_SMP_CALL_CACHE_FUNCTION(populate_cache_leaves) diff --git a/arch/x86/kernel/cpu/mkcapflags.sh b/arch/x86/kernel/cpu/mkcapflags.sh index 36d99a337b49..3f20710a5b23 100644 --- a/arch/x86/kernel/cpu/mkcapflags.sh +++ b/arch/x86/kernel/cpu/mkcapflags.sh @@ -6,7 +6,7 @@ IN=$1 OUT=$2 -function dump_array() +dump_array() { ARRAY=$1 SIZE=$2 diff --git a/arch/x86/kernel/cpu/perf_event.c b/arch/x86/kernel/cpu/perf_event.c index b71a7f86d68a..e2888a3ad1e3 100644 --- a/arch/x86/kernel/cpu/perf_event.c +++ b/arch/x86/kernel/cpu/perf_event.c @@ -2147,24 +2147,24 @@ perf_callchain_user(struct perf_callchain_entry *entry, struct pt_regs *regs) static unsigned long code_segment_base(struct pt_regs *regs) { /* + * For IA32 we look at the GDT/LDT segment base to convert the + * effective IP to a linear address. + */ + +#ifdef CONFIG_X86_32 + /* * If we are in VM86 mode, add the segment offset to convert to a * linear address. */ if (regs->flags & X86_VM_MASK) return 0x10 * regs->cs; - /* - * For IA32 we look at the GDT/LDT segment base to convert the - * effective IP to a linear address. - */ -#ifdef CONFIG_X86_32 if (user_mode(regs) && regs->cs != __USER_CS) return get_segment_base(regs->cs); #else - if (test_thread_flag(TIF_IA32)) { - if (user_mode(regs) && regs->cs != __USER32_CS) - return get_segment_base(regs->cs); - } + if (user_mode(regs) && !user_64bit_mode(regs) && + regs->cs != __USER32_CS) + return get_segment_base(regs->cs); #endif return 0; } diff --git a/arch/x86/kernel/cpu/perf_event_intel.c b/arch/x86/kernel/cpu/perf_event_intel.c index 498b6d967138..258990688a5e 100644 --- a/arch/x86/kernel/cpu/perf_event_intel.c +++ b/arch/x86/kernel/cpu/perf_event_intel.c @@ -212,11 +212,11 @@ static struct event_constraint intel_hsw_event_constraints[] = { INTEL_UEVENT_CONSTRAINT(0x01c0, 0x2), /* INST_RETIRED.PREC_DIST */ INTEL_EVENT_CONSTRAINT(0xcd, 0x8), /* MEM_TRANS_RETIRED.LOAD_LATENCY */ /* CYCLE_ACTIVITY.CYCLES_L1D_PENDING */ - INTEL_EVENT_CONSTRAINT(0x08a3, 0x4), + INTEL_UEVENT_CONSTRAINT(0x08a3, 0x4), /* CYCLE_ACTIVITY.STALLS_L1D_PENDING */ - INTEL_EVENT_CONSTRAINT(0x0ca3, 0x4), + INTEL_UEVENT_CONSTRAINT(0x0ca3, 0x4), /* CYCLE_ACTIVITY.CYCLES_NO_EXECUTE */ - INTEL_EVENT_CONSTRAINT(0x04a3, 0xf), + INTEL_UEVENT_CONSTRAINT(0x04a3, 0xf), EVENT_CONSTRAINT_END }; @@ -1649,11 +1649,11 @@ intel_get_event_constraints(struct cpu_hw_events *cpuc, struct perf_event *event if (c) return c; - c = intel_pebs_constraints(event); + c = intel_shared_regs_constraints(cpuc, event); if (c) return c; - c = intel_shared_regs_constraints(cpuc, event); + c = intel_pebs_constraints(event); if (c) return c; diff --git a/arch/x86/kernel/crash.c b/arch/x86/kernel/crash.c index aceb2f90c716..c76d3e37c6e1 100644 --- a/arch/x86/kernel/crash.c +++ b/arch/x86/kernel/crash.c @@ -105,7 +105,7 @@ static void kdump_nmi_callback(int cpu, struct pt_regs *regs) #ifdef CONFIG_X86_32 struct pt_regs fixed_regs; - if (!user_mode_vm(regs)) { + if (!user_mode(regs)) { crash_fixup_ss_esp(&fixed_regs, regs); regs = &fixed_regs; } diff --git a/arch/x86/kernel/dumpstack.c b/arch/x86/kernel/dumpstack.c index 81b3932edbdc..9c30acfadae2 100644 --- a/arch/x86/kernel/dumpstack.c +++ b/arch/x86/kernel/dumpstack.c @@ -279,7 +279,7 @@ int __die(const char *str, struct pt_regs *regs, long err) print_modules(); show_regs(regs); #ifdef CONFIG_X86_32 - if (user_mode_vm(regs)) { + if (user_mode(regs)) { sp = regs->sp; ss = regs->ss & 0xffff; } else { @@ -308,7 +308,7 @@ void die(const char *str, struct pt_regs *regs, long err) unsigned long flags = oops_begin(); int sig = SIGSEGV; - if (!user_mode_vm(regs)) + if (!user_mode(regs)) report_bug(regs->ip, regs); if (__die(str, regs, err)) diff --git a/arch/x86/kernel/dumpstack_32.c b/arch/x86/kernel/dumpstack_32.c index efff5ed6045d..464ffd69b92e 100644 --- a/arch/x86/kernel/dumpstack_32.c +++ b/arch/x86/kernel/dumpstack_32.c @@ -126,13 +126,13 @@ void show_regs(struct pt_regs *regs) int i; show_regs_print_info(KERN_EMERG); - __show_regs(regs, !user_mode_vm(regs)); + __show_regs(regs, !user_mode(regs)); /* * When in-kernel, we also print out the stack and code at the * time of the fault.. */ - if (!user_mode_vm(regs)) { + if (!user_mode(regs)) { unsigned int code_prologue = code_bytes * 43 / 64; unsigned int code_len = code_bytes; unsigned char c; diff --git a/arch/x86/kernel/early_printk.c b/arch/x86/kernel/early_printk.c index a62536a1be88..49ff55ef9b26 100644 --- a/arch/x86/kernel/early_printk.c +++ b/arch/x86/kernel/early_printk.c @@ -95,20 +95,6 @@ static unsigned long early_serial_base = 0x3f8; /* ttyS0 */ #define DLL 0 /* Divisor Latch Low */ #define DLH 1 /* Divisor latch High */ -static void mem32_serial_out(unsigned long addr, int offset, int value) -{ - uint32_t *vaddr = (uint32_t *)addr; - /* shift implied by pointer type */ - writel(value, vaddr + offset); -} - -static unsigned int mem32_serial_in(unsigned long addr, int offset) -{ - uint32_t *vaddr = (uint32_t *)addr; - /* shift implied by pointer type */ - return readl(vaddr + offset); -} - static unsigned int io_serial_in(unsigned long addr, int offset) { return inb(addr + offset); @@ -205,6 +191,20 @@ static __init void early_serial_init(char *s) } #ifdef CONFIG_PCI +static void mem32_serial_out(unsigned long addr, int offset, int value) +{ + u32 *vaddr = (u32 *)addr; + /* shift implied by pointer type */ + writel(value, vaddr + offset); +} + +static unsigned int mem32_serial_in(unsigned long addr, int offset) +{ + u32 *vaddr = (u32 *)addr; + /* shift implied by pointer type */ + return readl(vaddr + offset); +} + /* * early_pci_serial_init() * @@ -217,8 +217,8 @@ static __init void early_pci_serial_init(char *s) unsigned divisor; unsigned long baud = DEFAULT_BAUD; u8 bus, slot, func; - uint32_t classcode, bar0; - uint16_t cmdreg; + u32 classcode, bar0; + u16 cmdreg; char *e; diff --git a/arch/x86/kernel/entry_32.S b/arch/x86/kernel/entry_32.S index 31e2d5bf3e38..1c309763e321 100644 --- a/arch/x86/kernel/entry_32.S +++ b/arch/x86/kernel/entry_32.S @@ -395,10 +395,13 @@ sysenter_past_esp: /*CFI_REL_OFFSET cs, 0*/ /* * Push current_thread_info()->sysenter_return to the stack. - * A tiny bit of offset fixup is necessary - 4*4 means the 4 words - * pushed above; +8 corresponds to copy_thread's esp0 setting. + * A tiny bit of offset fixup is necessary: TI_sysenter_return + * is relative to thread_info, which is at the bottom of the + * kernel stack page. 4*4 means the 4 words pushed above; + * TOP_OF_KERNEL_STACK_PADDING takes us to the top of the stack; + * and THREAD_SIZE takes us to the bottom. */ - pushl_cfi ((TI_sysenter_return)-THREAD_SIZE+8+4*4)(%esp) + pushl_cfi ((TI_sysenter_return) - THREAD_SIZE + TOP_OF_KERNEL_STACK_PADDING + 4*4)(%esp) CFI_REL_OFFSET eip, 0 pushl_cfi %eax @@ -432,7 +435,7 @@ sysenter_after_call: TRACE_IRQS_OFF movl TI_flags(%ebp), %ecx testl $_TIF_ALLWORK_MASK, %ecx - jne sysexit_audit + jnz sysexit_audit sysenter_exit: /* if something modifies registers it must also disable sysexit */ movl PT_EIP(%esp), %edx @@ -460,7 +463,7 @@ sysenter_audit: sysexit_audit: testl $(_TIF_ALLWORK_MASK & ~_TIF_SYSCALL_AUDIT), %ecx - jne syscall_exit_work + jnz syscall_exit_work TRACE_IRQS_ON ENABLE_INTERRUPTS(CLBR_ANY) movl %eax,%edx /* second arg, syscall return value */ @@ -472,7 +475,7 @@ sysexit_audit: TRACE_IRQS_OFF movl TI_flags(%ebp), %ecx testl $(_TIF_ALLWORK_MASK & ~_TIF_SYSCALL_AUDIT), %ecx - jne syscall_exit_work + jnz syscall_exit_work movl PT_EAX(%esp),%eax /* reload syscall return value */ jmp sysenter_exit #endif @@ -510,7 +513,7 @@ syscall_exit: TRACE_IRQS_OFF movl TI_flags(%ebp), %ecx testl $_TIF_ALLWORK_MASK, %ecx # current->work - jne syscall_exit_work + jnz syscall_exit_work restore_all: TRACE_IRQS_IRET @@ -612,7 +615,7 @@ work_notifysig: # deal with pending signals and #ifdef CONFIG_VM86 testl $X86_EFLAGS_VM, PT_EFLAGS(%esp) movl %esp, %eax - jne work_notifysig_v86 # returning to kernel-space or + jnz work_notifysig_v86 # returning to kernel-space or # vm86-space 1: #else @@ -720,43 +723,22 @@ END(sysenter_badsys) .endm /* - * Build the entry stubs and pointer table with some assembler magic. - * We pack 7 stubs into a single 32-byte chunk, which will fit in a - * single cache line on all modern x86 implementations. + * Build the entry stubs with some assembler magic. + * We pack 1 stub into every 8-byte block. */ -.section .init.rodata,"a" -ENTRY(interrupt) -.section .entry.text, "ax" - .p2align 5 - .p2align CONFIG_X86_L1_CACHE_SHIFT + .align 8 ENTRY(irq_entries_start) RING0_INT_FRAME -vector=FIRST_EXTERNAL_VECTOR -.rept (FIRST_SYSTEM_VECTOR-FIRST_EXTERNAL_VECTOR+6)/7 - .balign 32 - .rept 7 - .if vector < FIRST_SYSTEM_VECTOR - .if vector <> FIRST_EXTERNAL_VECTOR + vector=FIRST_EXTERNAL_VECTOR + .rept (FIRST_SYSTEM_VECTOR - FIRST_EXTERNAL_VECTOR) + pushl_cfi $(~vector+0x80) /* Note: always in signed byte range */ + vector=vector+1 + jmp common_interrupt CFI_ADJUST_CFA_OFFSET -4 - .endif -1: pushl_cfi $(~vector+0x80) /* Note: always in signed byte range */ - .if ((vector-FIRST_EXTERNAL_VECTOR)%7) <> 6 - jmp 2f - .endif - .previous - .long 1b - .section .entry.text, "ax" -vector=vector+1 - .endif - .endr -2: jmp common_interrupt -.endr + .align 8 + .endr END(irq_entries_start) -.previous -END(interrupt) -.previous - /* * the CPU automatically disables interrupts when executing an IRQ vector, * so IRQ-flags tracing has to follow that: @@ -816,15 +798,9 @@ ENTRY(simd_coprocessor_error) pushl_cfi $0 #ifdef CONFIG_X86_INVD_BUG /* AMD 486 bug: invd from userspace calls exception 19 instead of #GP */ -661: pushl_cfi $do_general_protection -662: -.section .altinstructions,"a" - altinstruction_entry 661b, 663f, X86_FEATURE_XMM, 662b-661b, 664f-663f -.previous -.section .altinstr_replacement,"ax" -663: pushl $do_simd_coprocessor_error -664: -.previous + ALTERNATIVE "pushl_cfi $do_general_protection", \ + "pushl $do_simd_coprocessor_error", \ + X86_FEATURE_XMM #else pushl_cfi $do_simd_coprocessor_error #endif @@ -1240,20 +1216,13 @@ error_code: /*CFI_REL_OFFSET es, 0*/ pushl_cfi %ds /*CFI_REL_OFFSET ds, 0*/ - pushl_cfi %eax - CFI_REL_OFFSET eax, 0 - pushl_cfi %ebp - CFI_REL_OFFSET ebp, 0 - pushl_cfi %edi - CFI_REL_OFFSET edi, 0 - pushl_cfi %esi - CFI_REL_OFFSET esi, 0 - pushl_cfi %edx - CFI_REL_OFFSET edx, 0 - pushl_cfi %ecx - CFI_REL_OFFSET ecx, 0 - pushl_cfi %ebx - CFI_REL_OFFSET ebx, 0 + pushl_cfi_reg eax + pushl_cfi_reg ebp + pushl_cfi_reg edi + pushl_cfi_reg esi + pushl_cfi_reg edx + pushl_cfi_reg ecx + pushl_cfi_reg ebx cld movl $(__KERNEL_PERCPU), %ecx movl %ecx, %fs diff --git a/arch/x86/kernel/entry_64.S b/arch/x86/kernel/entry_64.S index 10074ad9ebf8..c7b238494b31 100644 --- a/arch/x86/kernel/entry_64.S +++ b/arch/x86/kernel/entry_64.S @@ -14,27 +14,14 @@ * NOTE: This code handles signal-recognition, which happens every time * after an interrupt and after each system call. * - * Normal syscalls and interrupts don't save a full stack frame, this is - * only done for syscall tracing, signals or fork/exec et.al. - * * A note on terminology: - * - top of stack: Architecture defined interrupt frame from SS to RIP + * - iret frame: Architecture defined interrupt frame from SS to RIP * at the top of the kernel process stack. - * - partial stack frame: partially saved registers up to R11. - * - full stack frame: Like partial stack frame, but all register saved. * * Some macro usage: * - CFI macros are used to generate dwarf2 unwind information for better * backtraces. They don't change any code. - * - SAVE_ALL/RESTORE_ALL - Save/restore all registers - * - SAVE_ARGS/RESTORE_ARGS - Save/restore registers that C functions modify. - * There are unfortunately lots of special cases where some registers - * not touched. The macro is a big mess that should be cleaned up. - * - SAVE_REST/RESTORE_REST - Handle the registers not saved by SAVE_ARGS. - * Gives a full stack frame. * - ENTRY/END Define functions in the symbol table. - * - FIXUP_TOP_OF_STACK/RESTORE_TOP_OF_STACK - Fix up the hardware stack - * frame that is otherwise undefined after a SYSCALL * - TRACE_IRQ_* - Trace hard interrupt state for lock debugging. * - idtentry - Define exception entry points. */ @@ -70,10 +57,6 @@ .section .entry.text, "ax" -#ifndef CONFIG_PREEMPT -#define retint_kernel retint_restore_args -#endif - #ifdef CONFIG_PARAVIRT ENTRY(native_usergs_sysret64) swapgs @@ -82,9 +65,9 @@ ENDPROC(native_usergs_sysret64) #endif /* CONFIG_PARAVIRT */ -.macro TRACE_IRQS_IRETQ offset=ARGOFFSET +.macro TRACE_IRQS_IRETQ #ifdef CONFIG_TRACE_IRQFLAGS - bt $9,EFLAGS-\offset(%rsp) /* interrupts off? */ + bt $9,EFLAGS(%rsp) /* interrupts off? */ jnc 1f TRACE_IRQS_ON 1: @@ -116,8 +99,8 @@ ENDPROC(native_usergs_sysret64) call debug_stack_reset .endm -.macro TRACE_IRQS_IRETQ_DEBUG offset=ARGOFFSET - bt $9,EFLAGS-\offset(%rsp) /* interrupts off? */ +.macro TRACE_IRQS_IRETQ_DEBUG + bt $9,EFLAGS(%rsp) /* interrupts off? */ jnc 1f TRACE_IRQS_ON_DEBUG 1: @@ -130,34 +113,7 @@ ENDPROC(native_usergs_sysret64) #endif /* - * C code is not supposed to know about undefined top of stack. Every time - * a C function with an pt_regs argument is called from the SYSCALL based - * fast path FIXUP_TOP_OF_STACK is needed. - * RESTORE_TOP_OF_STACK syncs the syscall state after any possible ptregs - * manipulation. - */ - - /* %rsp:at FRAMEEND */ - .macro FIXUP_TOP_OF_STACK tmp offset=0 - movq PER_CPU_VAR(old_rsp),\tmp - movq \tmp,RSP+\offset(%rsp) - movq $__USER_DS,SS+\offset(%rsp) - movq $__USER_CS,CS+\offset(%rsp) - movq RIP+\offset(%rsp),\tmp /* get rip */ - movq \tmp,RCX+\offset(%rsp) /* copy it to rcx as sysret would do */ - movq R11+\offset(%rsp),\tmp /* get eflags */ - movq \tmp,EFLAGS+\offset(%rsp) - .endm - - .macro RESTORE_TOP_OF_STACK tmp offset=0 - movq RSP+\offset(%rsp),\tmp - movq \tmp,PER_CPU_VAR(old_rsp) - movq EFLAGS+\offset(%rsp),\tmp - movq \tmp,R11+\offset(%rsp) - .endm - -/* - * initial frame state for interrupts (and exceptions without error code) + * empty frame */ .macro EMPTY_FRAME start=1 offset=0 .if \start @@ -173,12 +129,12 @@ ENDPROC(native_usergs_sysret64) * initial frame state for interrupts (and exceptions without error code) */ .macro INTR_FRAME start=1 offset=0 - EMPTY_FRAME \start, SS+8+\offset-RIP - /*CFI_REL_OFFSET ss, SS+\offset-RIP*/ - CFI_REL_OFFSET rsp, RSP+\offset-RIP - /*CFI_REL_OFFSET rflags, EFLAGS+\offset-RIP*/ - /*CFI_REL_OFFSET cs, CS+\offset-RIP*/ - CFI_REL_OFFSET rip, RIP+\offset-RIP + EMPTY_FRAME \start, 5*8+\offset + /*CFI_REL_OFFSET ss, 4*8+\offset*/ + CFI_REL_OFFSET rsp, 3*8+\offset + /*CFI_REL_OFFSET rflags, 2*8+\offset*/ + /*CFI_REL_OFFSET cs, 1*8+\offset*/ + CFI_REL_OFFSET rip, 0*8+\offset .endm /* @@ -186,30 +142,23 @@ ENDPROC(native_usergs_sysret64) * with vector already pushed) */ .macro XCPT_FRAME start=1 offset=0 - INTR_FRAME \start, RIP+\offset-ORIG_RAX - .endm - -/* - * frame that enables calling into C. - */ - .macro PARTIAL_FRAME start=1 offset=0 - XCPT_FRAME \start, ORIG_RAX+\offset-ARGOFFSET - CFI_REL_OFFSET rdi, RDI+\offset-ARGOFFSET - CFI_REL_OFFSET rsi, RSI+\offset-ARGOFFSET - CFI_REL_OFFSET rdx, RDX+\offset-ARGOFFSET - CFI_REL_OFFSET rcx, RCX+\offset-ARGOFFSET - CFI_REL_OFFSET rax, RAX+\offset-ARGOFFSET - CFI_REL_OFFSET r8, R8+\offset-ARGOFFSET - CFI_REL_OFFSET r9, R9+\offset-ARGOFFSET - CFI_REL_OFFSET r10, R10+\offset-ARGOFFSET - CFI_REL_OFFSET r11, R11+\offset-ARGOFFSET + INTR_FRAME \start, 1*8+\offset .endm /* * frame that enables passing a complete pt_regs to a C function. */ .macro DEFAULT_FRAME start=1 offset=0 - PARTIAL_FRAME \start, R11+\offset-R15 + XCPT_FRAME \start, ORIG_RAX+\offset + CFI_REL_OFFSET rdi, RDI+\offset + CFI_REL_OFFSET rsi, RSI+\offset + CFI_REL_OFFSET rdx, RDX+\offset + CFI_REL_OFFSET rcx, RCX+\offset + CFI_REL_OFFSET rax, RAX+\offset + CFI_REL_OFFSET r8, R8+\offset + CFI_REL_OFFSET r9, R9+\offset + CFI_REL_OFFSET r10, R10+\offset + CFI_REL_OFFSET r11, R11+\offset CFI_REL_OFFSET rbx, RBX+\offset CFI_REL_OFFSET rbp, RBP+\offset CFI_REL_OFFSET r12, R12+\offset @@ -218,102 +167,30 @@ ENDPROC(native_usergs_sysret64) CFI_REL_OFFSET r15, R15+\offset .endm -ENTRY(save_paranoid) - XCPT_FRAME 1 RDI+8 - cld - movq %rdi, RDI+8(%rsp) - movq %rsi, RSI+8(%rsp) - movq_cfi rdx, RDX+8 - movq_cfi rcx, RCX+8 - movq_cfi rax, RAX+8 - movq %r8, R8+8(%rsp) - movq %r9, R9+8(%rsp) - movq %r10, R10+8(%rsp) - movq %r11, R11+8(%rsp) - movq_cfi rbx, RBX+8 - movq %rbp, RBP+8(%rsp) - movq %r12, R12+8(%rsp) - movq %r13, R13+8(%rsp) - movq %r14, R14+8(%rsp) - movq %r15, R15+8(%rsp) - movl $1,%ebx - movl $MSR_GS_BASE,%ecx - rdmsr - testl %edx,%edx - js 1f /* negative -> in kernel */ - SWAPGS - xorl %ebx,%ebx -1: ret - CFI_ENDPROC -END(save_paranoid) - /* - * A newly forked process directly context switches into this address. + * 64bit SYSCALL instruction entry. Up to 6 arguments in registers. * - * rdi: prev task we switched from - */ -ENTRY(ret_from_fork) - DEFAULT_FRAME - - LOCK ; btr $TIF_FORK,TI_flags(%r8) - - pushq_cfi $0x0002 - popfq_cfi # reset kernel eflags - - call schedule_tail # rdi: 'prev' task parameter - - GET_THREAD_INFO(%rcx) - - RESTORE_REST - - testl $3, CS-ARGOFFSET(%rsp) # from kernel_thread? - jz 1f - - testl $_TIF_IA32, TI_flags(%rcx) # 32-bit compat task needs IRET - jnz int_ret_from_sys_call - - RESTORE_TOP_OF_STACK %rdi, -ARGOFFSET - jmp ret_from_sys_call # go to the SYSRET fastpath - -1: - subq $REST_SKIP, %rsp # leave space for volatiles - CFI_ADJUST_CFA_OFFSET REST_SKIP - movq %rbp, %rdi - call *%rbx - movl $0, RAX(%rsp) - RESTORE_REST - jmp int_ret_from_sys_call - CFI_ENDPROC -END(ret_from_fork) - -/* - * System call entry. Up to 6 arguments in registers are supported. + * 64bit SYSCALL saves rip to rcx, clears rflags.RF, then saves rflags to r11, + * then loads new ss, cs, and rip from previously programmed MSRs. + * rflags gets masked by a value from another MSR (so CLD and CLAC + * are not needed). SYSCALL does not save anything on the stack + * and does not change rsp. * - * SYSCALL does not save anything on the stack and does not change the - * stack pointer. However, it does mask the flags register for us, so - * CLD and CLAC are not needed. - */ - -/* - * Register setup: + * Registers on entry: * rax system call number + * rcx return address + * r11 saved rflags (note: r11 is callee-clobbered register in C ABI) * rdi arg0 - * rcx return address for syscall/sysret, C arg3 * rsi arg1 * rdx arg2 - * r10 arg3 (--> moved to rcx for C) + * r10 arg3 (needs to be moved to rcx to conform to C ABI) * r8 arg4 * r9 arg5 - * r11 eflags for syscall/sysret, temporary for C - * r12-r15,rbp,rbx saved by C code, not touched. + * (note: r12-r15,rbp,rbx are callee-preserved in C ABI) * - * Interrupts are off on entry. * Only called from user space. * - * XXX if we had a free scratch register we could save the RSP into the stack frame - * and report it properly in ps. Unfortunately we haven't. - * - * When user can change the frames always force IRET. That is because + * When user can change pt_regs->foo always force IRET. That is because * it deals with uncanonical addresses better. SYSRET has trouble * with them due to bugs in both AMD and Intel CPUs. */ @@ -321,9 +198,15 @@ END(ret_from_fork) ENTRY(system_call) CFI_STARTPROC simple CFI_SIGNAL_FRAME - CFI_DEF_CFA rsp,KERNEL_STACK_OFFSET + CFI_DEF_CFA rsp,0 CFI_REGISTER rip,rcx /*CFI_REGISTER rflags,r11*/ + + /* + * Interrupts are off on entry. + * We do not frame this tiny irq-off block with TRACE_IRQS_OFF/ON, + * it is too small to ever cause noticeable irq latency. + */ SWAPGS_UNSAFE_STACK /* * A hypervisor implementation might want to use a label @@ -332,18 +215,38 @@ ENTRY(system_call) */ GLOBAL(system_call_after_swapgs) - movq %rsp,PER_CPU_VAR(old_rsp) + movq %rsp,PER_CPU_VAR(rsp_scratch) movq PER_CPU_VAR(kernel_stack),%rsp + + /* Construct struct pt_regs on stack */ + pushq_cfi $__USER_DS /* pt_regs->ss */ + pushq_cfi PER_CPU_VAR(rsp_scratch) /* pt_regs->sp */ /* - * No need to follow this irqs off/on section - it's straight - * and short: + * Re-enable interrupts. + * We use 'rsp_scratch' as a scratch space, hence irq-off block above + * must execute atomically in the face of possible interrupt-driven + * task preemption. We must enable interrupts only after we're done + * with using rsp_scratch: */ ENABLE_INTERRUPTS(CLBR_NONE) - SAVE_ARGS 8, 0, rax_enosys=1 - movq_cfi rax,(ORIG_RAX-ARGOFFSET) - movq %rcx,RIP-ARGOFFSET(%rsp) - CFI_REL_OFFSET rip,RIP-ARGOFFSET - testl $_TIF_WORK_SYSCALL_ENTRY,TI_flags+THREAD_INFO(%rsp,RIP-ARGOFFSET) + pushq_cfi %r11 /* pt_regs->flags */ + pushq_cfi $__USER_CS /* pt_regs->cs */ + pushq_cfi %rcx /* pt_regs->ip */ + CFI_REL_OFFSET rip,0 + pushq_cfi_reg rax /* pt_regs->orig_ax */ + pushq_cfi_reg rdi /* pt_regs->di */ + pushq_cfi_reg rsi /* pt_regs->si */ + pushq_cfi_reg rdx /* pt_regs->dx */ + pushq_cfi_reg rcx /* pt_regs->cx */ + pushq_cfi $-ENOSYS /* pt_regs->ax */ + pushq_cfi_reg r8 /* pt_regs->r8 */ + pushq_cfi_reg r9 /* pt_regs->r9 */ + pushq_cfi_reg r10 /* pt_regs->r10 */ + pushq_cfi_reg r11 /* pt_regs->r11 */ + sub $(6*8),%rsp /* pt_regs->bp,bx,r12-15 not saved */ + CFI_ADJUST_CFA_OFFSET 6*8 + + testl $_TIF_WORK_SYSCALL_ENTRY, ASM_THREAD_INFO(TI_flags, %rsp, SIZEOF_PTREGS) jnz tracesys system_call_fastpath: #if __SYSCALL_MASK == ~0 @@ -352,82 +255,96 @@ system_call_fastpath: andl $__SYSCALL_MASK,%eax cmpl $__NR_syscall_max,%eax #endif - ja ret_from_sys_call /* and return regs->ax */ + ja 1f /* return -ENOSYS (already in pt_regs->ax) */ movq %r10,%rcx - call *sys_call_table(,%rax,8) # XXX: rip relative - movq %rax,RAX-ARGOFFSET(%rsp) + call *sys_call_table(,%rax,8) + movq %rax,RAX(%rsp) +1: /* - * Syscall return path ending with SYSRET (fast path) - * Has incomplete stack frame and undefined top of stack. + * Syscall return path ending with SYSRET (fast path). + * Has incompletely filled pt_regs. */ -ret_from_sys_call: - testl $_TIF_ALLWORK_MASK,TI_flags+THREAD_INFO(%rsp,RIP-ARGOFFSET) - jnz int_ret_from_sys_call_fixup /* Go the the slow path */ - LOCKDEP_SYS_EXIT + /* + * We do not frame this tiny irq-off block with TRACE_IRQS_OFF/ON, + * it is too small to ever cause noticeable irq latency. + */ DISABLE_INTERRUPTS(CLBR_NONE) - TRACE_IRQS_OFF - CFI_REMEMBER_STATE + /* - * sysretq will re-enable interrupts: + * We must check ti flags with interrupts (or at least preemption) + * off because we must *never* return to userspace without + * processing exit work that is enqueued if we're preempted here. + * In particular, returning to userspace with any of the one-shot + * flags (TIF_NOTIFY_RESUME, TIF_USER_RETURN_NOTIFY, etc) set is + * very bad. */ - TRACE_IRQS_ON - movq RIP-ARGOFFSET(%rsp),%rcx + testl $_TIF_ALLWORK_MASK, ASM_THREAD_INFO(TI_flags, %rsp, SIZEOF_PTREGS) + jnz int_ret_from_sys_call_irqs_off /* Go to the slow path */ + + CFI_REMEMBER_STATE + + RESTORE_C_REGS_EXCEPT_RCX_R11 + movq RIP(%rsp),%rcx CFI_REGISTER rip,rcx - RESTORE_ARGS 1,-ARG_SKIP,0 + movq EFLAGS(%rsp),%r11 /*CFI_REGISTER rflags,r11*/ - movq PER_CPU_VAR(old_rsp), %rsp + movq RSP(%rsp),%rsp + /* + * 64bit SYSRET restores rip from rcx, + * rflags from r11 (but RF and VM bits are forced to 0), + * cs and ss are loaded from MSRs. + * Restoration of rflags re-enables interrupts. + */ USERGS_SYSRET64 CFI_RESTORE_STATE -int_ret_from_sys_call_fixup: - FIXUP_TOP_OF_STACK %r11, -ARGOFFSET - jmp int_ret_from_sys_call - - /* Do syscall tracing */ + /* Do syscall entry tracing */ tracesys: - leaq -REST_SKIP(%rsp), %rdi - movq $AUDIT_ARCH_X86_64, %rsi + movq %rsp, %rdi + movl $AUDIT_ARCH_X86_64, %esi call syscall_trace_enter_phase1 test %rax, %rax jnz tracesys_phase2 /* if needed, run the slow path */ - LOAD_ARGS 0 /* else restore clobbered regs */ + RESTORE_C_REGS_EXCEPT_RAX /* else restore clobbered regs */ + movq ORIG_RAX(%rsp), %rax jmp system_call_fastpath /* and return to the fast path */ tracesys_phase2: - SAVE_REST - FIXUP_TOP_OF_STACK %rdi + SAVE_EXTRA_REGS movq %rsp, %rdi - movq $AUDIT_ARCH_X86_64, %rsi + movl $AUDIT_ARCH_X86_64, %esi movq %rax,%rdx call syscall_trace_enter_phase2 /* - * Reload arg registers from stack in case ptrace changed them. + * Reload registers from stack in case ptrace changed them. * We don't reload %rax because syscall_trace_entry_phase2() returned * the value it wants us to use in the table lookup. */ - LOAD_ARGS ARGOFFSET, 1 - RESTORE_REST + RESTORE_C_REGS_EXCEPT_RAX + RESTORE_EXTRA_REGS #if __SYSCALL_MASK == ~0 cmpq $__NR_syscall_max,%rax #else andl $__SYSCALL_MASK,%eax cmpl $__NR_syscall_max,%eax #endif - ja int_ret_from_sys_call /* RAX(%rsp) is already set */ + ja 1f /* return -ENOSYS (already in pt_regs->ax) */ movq %r10,%rcx /* fixup for C */ call *sys_call_table(,%rax,8) - movq %rax,RAX-ARGOFFSET(%rsp) - /* Use IRET because user could have changed frame */ + movq %rax,RAX(%rsp) +1: + /* Use IRET because user could have changed pt_regs->foo */ /* * Syscall return path ending with IRET. - * Has correct top of stack, but partial stack frame. + * Has correct iret frame. */ GLOBAL(int_ret_from_sys_call) DISABLE_INTERRUPTS(CLBR_NONE) +int_ret_from_sys_call_irqs_off: /* jumps come here from the irqs-off SYSRET path */ TRACE_IRQS_OFF movl $_TIF_ALLWORK_MASK,%edi /* edi: mask to check */ @@ -437,8 +354,8 @@ GLOBAL(int_with_check) movl TI_flags(%rcx),%edx andl %edi,%edx jnz int_careful - andl $~TS_COMPAT,TI_status(%rcx) - jmp retint_swapgs + andl $~TS_COMPAT,TI_status(%rcx) + jmp syscall_return /* Either reschedule or signal or syscall exit tracking needed. */ /* First do a reschedule test. */ @@ -455,12 +372,11 @@ int_careful: TRACE_IRQS_OFF jmp int_with_check - /* handle signals and tracing -- both require a full stack frame */ + /* handle signals and tracing -- both require a full pt_regs */ int_very_careful: TRACE_IRQS_ON ENABLE_INTERRUPTS(CLBR_NONE) -int_check_syscall_exit_work: - SAVE_REST + SAVE_EXTRA_REGS /* Check for syscall exit trace */ testl $_TIF_WORK_SYSCALL_EXIT,%edx jz int_signal @@ -479,86 +395,192 @@ int_signal: call do_notify_resume 1: movl $_TIF_WORK_MASK,%edi int_restore_rest: - RESTORE_REST + RESTORE_EXTRA_REGS DISABLE_INTERRUPTS(CLBR_NONE) TRACE_IRQS_OFF jmp int_with_check + +syscall_return: + /* The IRETQ could re-enable interrupts: */ + DISABLE_INTERRUPTS(CLBR_ANY) + TRACE_IRQS_IRETQ + + /* + * Try to use SYSRET instead of IRET if we're returning to + * a completely clean 64-bit userspace context. + */ + movq RCX(%rsp),%rcx + cmpq %rcx,RIP(%rsp) /* RCX == RIP */ + jne opportunistic_sysret_failed + + /* + * On Intel CPUs, SYSRET with non-canonical RCX/RIP will #GP + * in kernel space. This essentially lets the user take over + * the kernel, since userspace controls RSP. It's not worth + * testing for canonicalness exactly -- this check detects any + * of the 17 high bits set, which is true for non-canonical + * or kernel addresses. (This will pessimize vsyscall=native. + * Big deal.) + * + * If virtual addresses ever become wider, this will need + * to be updated to remain correct on both old and new CPUs. + */ + .ifne __VIRTUAL_MASK_SHIFT - 47 + .error "virtual address width changed -- SYSRET checks need update" + .endif + shr $__VIRTUAL_MASK_SHIFT, %rcx + jnz opportunistic_sysret_failed + + cmpq $__USER_CS,CS(%rsp) /* CS must match SYSRET */ + jne opportunistic_sysret_failed + + movq R11(%rsp),%r11 + cmpq %r11,EFLAGS(%rsp) /* R11 == RFLAGS */ + jne opportunistic_sysret_failed + + /* + * SYSRET can't restore RF. SYSRET can restore TF, but unlike IRET, + * restoring TF results in a trap from userspace immediately after + * SYSRET. This would cause an infinite loop whenever #DB happens + * with register state that satisfies the opportunistic SYSRET + * conditions. For example, single-stepping this user code: + * + * movq $stuck_here,%rcx + * pushfq + * popq %r11 + * stuck_here: + * + * would never get past 'stuck_here'. + */ + testq $(X86_EFLAGS_RF|X86_EFLAGS_TF), %r11 + jnz opportunistic_sysret_failed + + /* nothing to check for RSP */ + + cmpq $__USER_DS,SS(%rsp) /* SS must match SYSRET */ + jne opportunistic_sysret_failed + + /* + * We win! This label is here just for ease of understanding + * perf profiles. Nothing jumps here. + */ +syscall_return_via_sysret: + CFI_REMEMBER_STATE + /* r11 is already restored (see code above) */ + RESTORE_C_REGS_EXCEPT_R11 + movq RSP(%rsp),%rsp + USERGS_SYSRET64 + CFI_RESTORE_STATE + +opportunistic_sysret_failed: + SWAPGS + jmp restore_c_regs_and_iret CFI_ENDPROC END(system_call) + .macro FORK_LIKE func ENTRY(stub_\func) CFI_STARTPROC - popq %r11 /* save return address */ - PARTIAL_FRAME 0 - SAVE_REST - pushq %r11 /* put it back on stack */ - FIXUP_TOP_OF_STACK %r11, 8 - DEFAULT_FRAME 0 8 /* offset 8: return address */ - call sys_\func - RESTORE_TOP_OF_STACK %r11, 8 - ret $REST_SKIP /* pop extended registers */ + DEFAULT_FRAME 0, 8 /* offset 8: return address */ + SAVE_EXTRA_REGS 8 + jmp sys_\func CFI_ENDPROC END(stub_\func) .endm - .macro FIXED_FRAME label,func -ENTRY(\label) - CFI_STARTPROC - PARTIAL_FRAME 0 8 /* offset 8: return address */ - FIXUP_TOP_OF_STACK %r11, 8-ARGOFFSET - call \func - RESTORE_TOP_OF_STACK %r11, 8-ARGOFFSET - ret - CFI_ENDPROC -END(\label) - .endm - FORK_LIKE clone FORK_LIKE fork FORK_LIKE vfork - FIXED_FRAME stub_iopl, sys_iopl ENTRY(stub_execve) CFI_STARTPROC - addq $8, %rsp - PARTIAL_FRAME 0 - SAVE_REST - FIXUP_TOP_OF_STACK %r11 - call sys_execve - movq %rax,RAX(%rsp) - RESTORE_REST - jmp int_ret_from_sys_call + DEFAULT_FRAME 0, 8 + call sys_execve +return_from_execve: + testl %eax, %eax + jz 1f + /* exec failed, can use fast SYSRET code path in this case */ + ret +1: + /* must use IRET code path (pt_regs->cs may have changed) */ + addq $8, %rsp + CFI_ADJUST_CFA_OFFSET -8 + ZERO_EXTRA_REGS + movq %rax,RAX(%rsp) + jmp int_ret_from_sys_call CFI_ENDPROC END(stub_execve) - -ENTRY(stub_execveat) +/* + * Remaining execve stubs are only 7 bytes long. + * ENTRY() often aligns to 16 bytes, which in this case has no benefits. + */ + .align 8 +GLOBAL(stub_execveat) CFI_STARTPROC - addq $8, %rsp - PARTIAL_FRAME 0 - SAVE_REST - FIXUP_TOP_OF_STACK %r11 - call sys_execveat - RESTORE_TOP_OF_STACK %r11 - movq %rax,RAX(%rsp) - RESTORE_REST - jmp int_ret_from_sys_call + DEFAULT_FRAME 0, 8 + call sys_execveat + jmp return_from_execve CFI_ENDPROC END(stub_execveat) +#ifdef CONFIG_X86_X32_ABI + .align 8 +GLOBAL(stub_x32_execve) + CFI_STARTPROC + DEFAULT_FRAME 0, 8 + call compat_sys_execve + jmp return_from_execve + CFI_ENDPROC +END(stub_x32_execve) + .align 8 +GLOBAL(stub_x32_execveat) + CFI_STARTPROC + DEFAULT_FRAME 0, 8 + call compat_sys_execveat + jmp return_from_execve + CFI_ENDPROC +END(stub_x32_execveat) +#endif + +#ifdef CONFIG_IA32_EMULATION + .align 8 +GLOBAL(stub32_execve) + CFI_STARTPROC + call compat_sys_execve + jmp return_from_execve + CFI_ENDPROC +END(stub32_execve) + .align 8 +GLOBAL(stub32_execveat) + CFI_STARTPROC + call compat_sys_execveat + jmp return_from_execve + CFI_ENDPROC +END(stub32_execveat) +#endif + /* * sigreturn is special because it needs to restore all registers on return. * This cannot be done with SYSRET, so use the IRET return path instead. */ ENTRY(stub_rt_sigreturn) CFI_STARTPROC - addq $8, %rsp - PARTIAL_FRAME 0 - SAVE_REST - FIXUP_TOP_OF_STACK %r11 + DEFAULT_FRAME 0, 8 + /* + * SAVE_EXTRA_REGS result is not normally needed: + * sigreturn overwrites all pt_regs->GPREGS. + * But sigreturn can fail (!), and there is no easy way to detect that. + * To make sure RESTORE_EXTRA_REGS doesn't restore garbage on error, + * we SAVE_EXTRA_REGS here. + */ + SAVE_EXTRA_REGS 8 call sys_rt_sigreturn - movq %rax,RAX(%rsp) # fixme, this could be done at the higher layer - RESTORE_REST +return_from_stub: + addq $8, %rsp + CFI_ADJUST_CFA_OFFSET -8 + RESTORE_EXTRA_REGS + movq %rax,RAX(%rsp) jmp int_ret_from_sys_call CFI_ENDPROC END(stub_rt_sigreturn) @@ -566,86 +588,70 @@ END(stub_rt_sigreturn) #ifdef CONFIG_X86_X32_ABI ENTRY(stub_x32_rt_sigreturn) CFI_STARTPROC - addq $8, %rsp - PARTIAL_FRAME 0 - SAVE_REST - FIXUP_TOP_OF_STACK %r11 + DEFAULT_FRAME 0, 8 + SAVE_EXTRA_REGS 8 call sys32_x32_rt_sigreturn - movq %rax,RAX(%rsp) # fixme, this could be done at the higher layer - RESTORE_REST - jmp int_ret_from_sys_call + jmp return_from_stub CFI_ENDPROC END(stub_x32_rt_sigreturn) +#endif -ENTRY(stub_x32_execve) - CFI_STARTPROC - addq $8, %rsp - PARTIAL_FRAME 0 - SAVE_REST - FIXUP_TOP_OF_STACK %r11 - call compat_sys_execve - RESTORE_TOP_OF_STACK %r11 - movq %rax,RAX(%rsp) - RESTORE_REST - jmp int_ret_from_sys_call - CFI_ENDPROC -END(stub_x32_execve) +/* + * A newly forked process directly context switches into this address. + * + * rdi: prev task we switched from + */ +ENTRY(ret_from_fork) + DEFAULT_FRAME -ENTRY(stub_x32_execveat) - CFI_STARTPROC - addq $8, %rsp - PARTIAL_FRAME 0 - SAVE_REST - FIXUP_TOP_OF_STACK %r11 - call compat_sys_execveat - RESTORE_TOP_OF_STACK %r11 - movq %rax,RAX(%rsp) - RESTORE_REST + LOCK ; btr $TIF_FORK,TI_flags(%r8) + + pushq_cfi $0x0002 + popfq_cfi # reset kernel eflags + + call schedule_tail # rdi: 'prev' task parameter + + RESTORE_EXTRA_REGS + + testl $3,CS(%rsp) # from kernel_thread? + + /* + * By the time we get here, we have no idea whether our pt_regs, + * ti flags, and ti status came from the 64-bit SYSCALL fast path, + * the slow path, or one of the ia32entry paths. + * Use IRET code path to return, since it can safely handle + * all of the above. + */ + jnz int_ret_from_sys_call + + /* We came from kernel_thread */ + /* nb: we depend on RESTORE_EXTRA_REGS above */ + movq %rbp, %rdi + call *%rbx + movl $0, RAX(%rsp) + RESTORE_EXTRA_REGS jmp int_ret_from_sys_call CFI_ENDPROC -END(stub_x32_execveat) - -#endif +END(ret_from_fork) /* - * Build the entry stubs and pointer table with some assembler magic. - * We pack 7 stubs into a single 32-byte chunk, which will fit in a - * single cache line on all modern x86 implementations. + * Build the entry stubs with some assembler magic. + * We pack 1 stub into every 8-byte block. */ - .section .init.rodata,"a" -ENTRY(interrupt) - .section .entry.text - .p2align 5 - .p2align CONFIG_X86_L1_CACHE_SHIFT + .align 8 ENTRY(irq_entries_start) INTR_FRAME -vector=FIRST_EXTERNAL_VECTOR -.rept (FIRST_SYSTEM_VECTOR-FIRST_EXTERNAL_VECTOR+6)/7 - .balign 32 - .rept 7 - .if vector < FIRST_SYSTEM_VECTOR - .if vector <> FIRST_EXTERNAL_VECTOR + vector=FIRST_EXTERNAL_VECTOR + .rept (FIRST_SYSTEM_VECTOR - FIRST_EXTERNAL_VECTOR) + pushq_cfi $(~vector+0x80) /* Note: always in signed byte range */ + vector=vector+1 + jmp common_interrupt CFI_ADJUST_CFA_OFFSET -8 - .endif -1: pushq_cfi $(~vector+0x80) /* Note: always in signed byte range */ - .if ((vector-FIRST_EXTERNAL_VECTOR)%7) <> 6 - jmp 2f - .endif - .previous - .quad 1b - .section .entry.text -vector=vector+1 - .endif - .endr -2: jmp common_interrupt -.endr + .align 8 + .endr CFI_ENDPROC END(irq_entries_start) -.previous -END(interrupt) -.previous - /* * Interrupt entry/exit. * @@ -656,47 +662,45 @@ END(interrupt) /* 0(%rsp): ~(interrupt number) */ .macro interrupt func - /* reserve pt_regs for scratch regs and rbp */ - subq $ORIG_RAX-RBP, %rsp - CFI_ADJUST_CFA_OFFSET ORIG_RAX-RBP cld - /* start from rbp in pt_regs and jump over */ - movq_cfi rdi, (RDI-RBP) - movq_cfi rsi, (RSI-RBP) - movq_cfi rdx, (RDX-RBP) - movq_cfi rcx, (RCX-RBP) - movq_cfi rax, (RAX-RBP) - movq_cfi r8, (R8-RBP) - movq_cfi r9, (R9-RBP) - movq_cfi r10, (R10-RBP) - movq_cfi r11, (R11-RBP) - - /* Save rbp so that we can unwind from get_irq_regs() */ - movq_cfi rbp, 0 - - /* Save previous stack value */ - movq %rsp, %rsi + /* + * Since nothing in interrupt handling code touches r12...r15 members + * of "struct pt_regs", and since interrupts can nest, we can save + * four stack slots and simultaneously provide + * an unwind-friendly stack layout by saving "truncated" pt_regs + * exactly up to rbp slot, without these members. + */ + ALLOC_PT_GPREGS_ON_STACK -RBP + SAVE_C_REGS -RBP + /* this goes to 0(%rsp) for unwinder, not for saving the value: */ + SAVE_EXTRA_REGS_RBP -RBP - leaq -RBP(%rsp),%rdi /* arg1 for handler */ - testl $3, CS-RBP(%rsi) + leaq -RBP(%rsp),%rdi /* arg1 for \func (pointer to pt_regs) */ + + testl $3, CS-RBP(%rsp) je 1f SWAPGS +1: /* + * Save previous stack pointer, optionally switch to interrupt stack. * irq_count is used to check if a CPU is already on an interrupt stack * or not. While this is essentially redundant with preempt_count it is * a little cheaper to use a separate counter in the PDA (short of * moving irq_enter into assembly, which would be too much work) */ -1: incl PER_CPU_VAR(irq_count) + movq %rsp, %rsi + incl PER_CPU_VAR(irq_count) cmovzq PER_CPU_VAR(irq_stack_ptr),%rsp CFI_DEF_CFA_REGISTER rsi - - /* Store previous stack value */ pushq %rsi + /* + * For debugger: + * "CFA (Current Frame Address) is the value on stack + offset" + */ CFI_ESCAPE 0x0f /* DW_CFA_def_cfa_expression */, 6, \ - 0x77 /* DW_OP_breg7 */, 0, \ + 0x77 /* DW_OP_breg7 (rsp) */, 0, \ 0x06 /* DW_OP_deref */, \ - 0x08 /* DW_OP_const1u */, SS+8-RBP, \ + 0x08 /* DW_OP_const1u */, SIZEOF_PTREGS-RBP, \ 0x22 /* DW_OP_plus */ /* We entered an interrupt context - irqs are off: */ TRACE_IRQS_OFF @@ -714,7 +718,7 @@ common_interrupt: ASM_CLAC addq $-0x80,(%rsp) /* Adjust vector to [-256,-1] range */ interrupt do_IRQ - /* 0(%rsp): old_rsp-ARGOFFSET */ + /* 0(%rsp): old RSP */ ret_from_intr: DISABLE_INTERRUPTS(CLBR_NONE) TRACE_IRQS_OFF @@ -722,19 +726,18 @@ ret_from_intr: /* Restore saved previous stack */ popq %rsi - CFI_DEF_CFA rsi,SS+8-RBP /* reg/off reset after def_cfa_expr */ - leaq ARGOFFSET-RBP(%rsi), %rsp + CFI_DEF_CFA rsi,SIZEOF_PTREGS-RBP /* reg/off reset after def_cfa_expr */ + /* return code expects complete pt_regs - adjust rsp accordingly: */ + leaq -RBP(%rsi),%rsp CFI_DEF_CFA_REGISTER rsp - CFI_ADJUST_CFA_OFFSET RBP-ARGOFFSET + CFI_ADJUST_CFA_OFFSET RBP -exit_intr: - GET_THREAD_INFO(%rcx) - testl $3,CS-ARGOFFSET(%rsp) + testl $3,CS(%rsp) je retint_kernel - /* Interrupt came from user space */ + + GET_THREAD_INFO(%rcx) /* - * Has a correct top of stack, but a partial stack frame * %rcx: thread info. Interrupts off. */ retint_with_reschedule: @@ -753,70 +756,34 @@ retint_swapgs: /* return to user-space */ DISABLE_INTERRUPTS(CLBR_ANY) TRACE_IRQS_IRETQ - /* - * Try to use SYSRET instead of IRET if we're returning to - * a completely clean 64-bit userspace context. - */ - movq (RCX-R11)(%rsp), %rcx - cmpq %rcx,(RIP-R11)(%rsp) /* RCX == RIP */ - jne opportunistic_sysret_failed - - /* - * On Intel CPUs, sysret with non-canonical RCX/RIP will #GP - * in kernel space. This essentially lets the user take over - * the kernel, since userspace controls RSP. It's not worth - * testing for canonicalness exactly -- this check detects any - * of the 17 high bits set, which is true for non-canonical - * or kernel addresses. (This will pessimize vsyscall=native. - * Big deal.) - * - * If virtual addresses ever become wider, this will need - * to be updated to remain correct on both old and new CPUs. - */ - .ifne __VIRTUAL_MASK_SHIFT - 47 - .error "virtual address width changed -- sysret checks need update" - .endif - shr $__VIRTUAL_MASK_SHIFT, %rcx - jnz opportunistic_sysret_failed - - cmpq $__USER_CS,(CS-R11)(%rsp) /* CS must match SYSRET */ - jne opportunistic_sysret_failed - - movq (R11-ARGOFFSET)(%rsp), %r11 - cmpq %r11,(EFLAGS-ARGOFFSET)(%rsp) /* R11 == RFLAGS */ - jne opportunistic_sysret_failed - - testq $X86_EFLAGS_RF,%r11 /* sysret can't restore RF */ - jnz opportunistic_sysret_failed - - /* nothing to check for RSP */ - - cmpq $__USER_DS,(SS-ARGOFFSET)(%rsp) /* SS must match SYSRET */ - jne opportunistic_sysret_failed - - /* - * We win! This label is here just for ease of understanding - * perf profiles. Nothing jumps here. - */ -irq_return_via_sysret: - CFI_REMEMBER_STATE - RESTORE_ARGS 1,8,1 - movq (RSP-RIP)(%rsp),%rsp - USERGS_SYSRET64 - CFI_RESTORE_STATE - -opportunistic_sysret_failed: SWAPGS - jmp restore_args + jmp restore_c_regs_and_iret -retint_restore_args: /* return to kernel space */ - DISABLE_INTERRUPTS(CLBR_ANY) +/* Returning to kernel space */ +retint_kernel: +#ifdef CONFIG_PREEMPT + /* Interrupts are off */ + /* Check if we need preemption */ + bt $9,EFLAGS(%rsp) /* interrupts were off? */ + jnc 1f +0: cmpl $0,PER_CPU_VAR(__preempt_count) + jnz 1f + call preempt_schedule_irq + jmp 0b +1: +#endif /* * The iretq could re-enable interrupts: */ TRACE_IRQS_IRETQ -restore_args: - RESTORE_ARGS 1,8,1 + +/* + * At this label, code paths which return to kernel and to user, + * which come from interrupts/exception and from syscalls, merge. + */ +restore_c_regs_and_iret: + RESTORE_C_REGS + REMOVE_PT_GPREGS_FROM_STACK 8 irq_return: INTERRUPT_RETURN @@ -887,28 +854,17 @@ retint_signal: jz retint_swapgs TRACE_IRQS_ON ENABLE_INTERRUPTS(CLBR_NONE) - SAVE_REST + SAVE_EXTRA_REGS movq $-1,ORIG_RAX(%rsp) xorl %esi,%esi # oldset movq %rsp,%rdi # &pt_regs call do_notify_resume - RESTORE_REST + RESTORE_EXTRA_REGS DISABLE_INTERRUPTS(CLBR_NONE) TRACE_IRQS_OFF GET_THREAD_INFO(%rcx) jmp retint_with_reschedule -#ifdef CONFIG_PREEMPT - /* Returning to kernel space. Check if we need preemption */ - /* rcx: threadinfo. interrupts off. */ -ENTRY(retint_kernel) - cmpl $0,PER_CPU_VAR(__preempt_count) - jnz retint_restore_args - bt $9,EFLAGS-ARGOFFSET(%rsp) /* interrupts off? */ - jnc retint_restore_args - call preempt_schedule_irq - jmp exit_intr -#endif CFI_ENDPROC END(common_interrupt) @@ -997,7 +953,7 @@ apicinterrupt IRQ_WORK_VECTOR \ /* * Exception entry points. */ -#define INIT_TSS_IST(x) PER_CPU_VAR(init_tss) + (TSS_ist + ((x) - 1) * 8) +#define CPU_TSS_IST(x) PER_CPU_VAR(cpu_tss) + (TSS_ist + ((x) - 1) * 8) .macro idtentry sym do_sym has_error_code:req paranoid=0 shift_ist=-1 ENTRY(\sym) @@ -1019,8 +975,7 @@ ENTRY(\sym) pushq_cfi $-1 /* ORIG_RAX: no syscall to restart */ .endif - subq $ORIG_RAX-R15, %rsp - CFI_ADJUST_CFA_OFFSET ORIG_RAX-R15 + ALLOC_PT_GPREGS_ON_STACK .if \paranoid .if \paranoid == 1 @@ -1028,10 +983,11 @@ ENTRY(\sym) testl $3, CS(%rsp) /* If coming from userspace, switch */ jnz 1f /* stacks. */ .endif - call save_paranoid + call paranoid_entry .else call error_entry .endif + /* returned flag: ebx=0: need swapgs on exit, ebx=1: don't need it */ DEFAULT_FRAME 0 @@ -1053,19 +1009,20 @@ ENTRY(\sym) .endif .if \shift_ist != -1 - subq $EXCEPTION_STKSZ, INIT_TSS_IST(\shift_ist) + subq $EXCEPTION_STKSZ, CPU_TSS_IST(\shift_ist) .endif call \do_sym .if \shift_ist != -1 - addq $EXCEPTION_STKSZ, INIT_TSS_IST(\shift_ist) + addq $EXCEPTION_STKSZ, CPU_TSS_IST(\shift_ist) .endif + /* these procedures expect "no swapgs" flag in ebx */ .if \paranoid - jmp paranoid_exit /* %ebx: no swapgs flag */ + jmp paranoid_exit .else - jmp error_exit /* %ebx: no swapgs flag */ + jmp error_exit .endif .if \paranoid == 1 @@ -1269,7 +1226,9 @@ ENTRY(xen_failsafe_callback) addq $0x30,%rsp CFI_ADJUST_CFA_OFFSET -0x30 pushq_cfi $-1 /* orig_ax = -1 => not a system call */ - SAVE_ALL + ALLOC_PT_GPREGS_ON_STACK + SAVE_C_REGS + SAVE_EXTRA_REGS jmp error_exit CFI_ENDPROC END(xen_failsafe_callback) @@ -1301,59 +1260,66 @@ idtentry async_page_fault do_async_page_fault has_error_code=1 idtentry machine_check has_error_code=0 paranoid=1 do_sym=*machine_check_vector(%rip) #endif - /* - * "Paranoid" exit path from exception stack. This is invoked - * only on return from non-NMI IST interrupts that came - * from kernel space. - * - * We may be returning to very strange contexts (e.g. very early - * in syscall entry), so checking for preemption here would - * be complicated. Fortunately, we there's no good reason - * to try to handle preemption here. - */ +/* + * Save all registers in pt_regs, and switch gs if needed. + * Use slow, but surefire "are we in kernel?" check. + * Return: ebx=0: need swapgs on exit, ebx=1: otherwise + */ +ENTRY(paranoid_entry) + XCPT_FRAME 1 15*8 + cld + SAVE_C_REGS 8 + SAVE_EXTRA_REGS 8 + movl $1,%ebx + movl $MSR_GS_BASE,%ecx + rdmsr + testl %edx,%edx + js 1f /* negative -> in kernel */ + SWAPGS + xorl %ebx,%ebx +1: ret + CFI_ENDPROC +END(paranoid_entry) - /* ebx: no swapgs flag */ +/* + * "Paranoid" exit path from exception stack. This is invoked + * only on return from non-NMI IST interrupts that came + * from kernel space. + * + * We may be returning to very strange contexts (e.g. very early + * in syscall entry), so checking for preemption here would + * be complicated. Fortunately, we there's no good reason + * to try to handle preemption here. + */ +/* On entry, ebx is "no swapgs" flag (1: don't need swapgs, 0: need it) */ ENTRY(paranoid_exit) DEFAULT_FRAME DISABLE_INTERRUPTS(CLBR_NONE) TRACE_IRQS_OFF_DEBUG testl %ebx,%ebx /* swapgs needed? */ - jnz paranoid_restore - TRACE_IRQS_IRETQ 0 + jnz paranoid_exit_no_swapgs + TRACE_IRQS_IRETQ SWAPGS_UNSAFE_STACK - RESTORE_ALL 8 - INTERRUPT_RETURN -paranoid_restore: - TRACE_IRQS_IRETQ_DEBUG 0 - RESTORE_ALL 8 + jmp paranoid_exit_restore +paranoid_exit_no_swapgs: + TRACE_IRQS_IRETQ_DEBUG +paranoid_exit_restore: + RESTORE_EXTRA_REGS + RESTORE_C_REGS + REMOVE_PT_GPREGS_FROM_STACK 8 INTERRUPT_RETURN CFI_ENDPROC END(paranoid_exit) /* - * Exception entry point. This expects an error code/orig_rax on the stack. - * returns in "no swapgs flag" in %ebx. + * Save all registers in pt_regs, and switch gs if needed. + * Return: ebx=0: need swapgs on exit, ebx=1: otherwise */ ENTRY(error_entry) - XCPT_FRAME - CFI_ADJUST_CFA_OFFSET 15*8 - /* oldrax contains error code */ + XCPT_FRAME 1 15*8 cld - movq %rdi, RDI+8(%rsp) - movq %rsi, RSI+8(%rsp) - movq %rdx, RDX+8(%rsp) - movq %rcx, RCX+8(%rsp) - movq %rax, RAX+8(%rsp) - movq %r8, R8+8(%rsp) - movq %r9, R9+8(%rsp) - movq %r10, R10+8(%rsp) - movq %r11, R11+8(%rsp) - movq_cfi rbx, RBX+8 - movq %rbp, RBP+8(%rsp) - movq %r12, R12+8(%rsp) - movq %r13, R13+8(%rsp) - movq %r14, R14+8(%rsp) - movq %r15, R15+8(%rsp) + SAVE_C_REGS 8 + SAVE_EXTRA_REGS 8 xorl %ebx,%ebx testl $3,CS+8(%rsp) je error_kernelspace @@ -1363,12 +1329,12 @@ error_sti: TRACE_IRQS_OFF ret -/* - * There are two places in the kernel that can potentially fault with - * usergs. Handle them here. B stepping K8s sometimes report a - * truncated RIP for IRET exceptions returning to compat mode. Check - * for these here too. - */ + /* + * There are two places in the kernel that can potentially fault with + * usergs. Handle them here. B stepping K8s sometimes report a + * truncated RIP for IRET exceptions returning to compat mode. Check + * for these here too. + */ error_kernelspace: CFI_REL_OFFSET rcx, RCX+8 incl %ebx @@ -1398,11 +1364,11 @@ error_bad_iret: END(error_entry) -/* ebx: no swapgs flag (1: don't need swapgs, 0: need it) */ +/* On entry, ebx is "no swapgs" flag (1: don't need swapgs, 0: need it) */ ENTRY(error_exit) DEFAULT_FRAME movl %ebx,%eax - RESTORE_REST + RESTORE_EXTRA_REGS DISABLE_INTERRUPTS(CLBR_NONE) TRACE_IRQS_OFF GET_THREAD_INFO(%rcx) @@ -1417,19 +1383,7 @@ ENTRY(error_exit) CFI_ENDPROC END(error_exit) -/* - * Test if a given stack is an NMI stack or not. - */ - .macro test_in_nmi reg stack nmi_ret normal_ret - cmpq %\reg, \stack - ja \normal_ret - subq $EXCEPTION_STKSZ, %\reg - cmpq %\reg, \stack - jb \normal_ret - jmp \nmi_ret - .endm - - /* runs on exception stack */ +/* Runs on exception stack */ ENTRY(nmi) INTR_FRAME PARAVIRT_ADJUST_EXCEPTION_FRAME @@ -1465,7 +1419,7 @@ ENTRY(nmi) * NMI. */ - /* Use %rdx as out temp variable throughout */ + /* Use %rdx as our temp variable throughout */ pushq_cfi %rdx CFI_REL_OFFSET rdx, 0 @@ -1490,8 +1444,17 @@ ENTRY(nmi) * We check the variable because the first NMI could be in a * breakpoint routine using a breakpoint stack. */ - lea 6*8(%rsp), %rdx - test_in_nmi rdx, 4*8(%rsp), nested_nmi, first_nmi + lea 6*8(%rsp), %rdx + /* Compare the NMI stack (rdx) with the stack we came from (4*8(%rsp)) */ + cmpq %rdx, 4*8(%rsp) + /* If the stack pointer is above the NMI stack, this is a normal NMI */ + ja first_nmi + subq $EXCEPTION_STKSZ, %rdx + cmpq %rdx, 4*8(%rsp) + /* If it is below the NMI stack, it is a normal NMI */ + jb first_nmi + /* Ah, it is within the NMI stack, treat it as nested */ + CFI_REMEMBER_STATE nested_nmi: @@ -1584,7 +1547,7 @@ first_nmi: .rept 5 pushq_cfi 11*8(%rsp) .endr - CFI_DEF_CFA_OFFSET SS+8-RIP + CFI_DEF_CFA_OFFSET 5*8 /* Everything up to here is safe from nested NMIs */ @@ -1612,7 +1575,7 @@ repeat_nmi: pushq_cfi -6*8(%rsp) .endr subq $(5*8), %rsp - CFI_DEF_CFA_OFFSET SS+8-RIP + CFI_DEF_CFA_OFFSET 5*8 end_repeat_nmi: /* @@ -1621,16 +1584,16 @@ end_repeat_nmi: * so that we repeat another NMI. */ pushq_cfi $-1 /* ORIG_RAX: no syscall to restart */ - subq $ORIG_RAX-R15, %rsp - CFI_ADJUST_CFA_OFFSET ORIG_RAX-R15 + ALLOC_PT_GPREGS_ON_STACK + /* - * Use save_paranoid to handle SWAPGS, but no need to use paranoid_exit + * Use paranoid_entry to handle SWAPGS, but no need to use paranoid_exit * as we should not be calling schedule in NMI context. * Even with normal interrupts enabled. An NMI should not be * setting NEED_RESCHED or anything that normal interrupts and * exceptions might do. */ - call save_paranoid + call paranoid_entry DEFAULT_FRAME 0 /* @@ -1661,8 +1624,10 @@ end_repeat_nmi: nmi_swapgs: SWAPGS_UNSAFE_STACK nmi_restore: + RESTORE_EXTRA_REGS + RESTORE_C_REGS /* Pop the extra iret frame at once */ - RESTORE_ALL 6*8 + REMOVE_PT_GPREGS_FROM_STACK 6*8 /* Clear the NMI executing stack variable */ movq $0, 5*8(%rsp) diff --git a/arch/x86/kernel/head64.c b/arch/x86/kernel/head64.c index c4f8d4659070..2b55ee6db053 100644 --- a/arch/x86/kernel/head64.c +++ b/arch/x86/kernel/head64.c @@ -177,9 +177,6 @@ asmlinkage __visible void __init x86_64_start_kernel(char * real_mode_data) */ load_ucode_bsp(); - if (console_loglevel >= CONSOLE_LOGLEVEL_DEBUG) - early_printk("Kernel alive\n"); - clear_page(init_level4_pgt); /* set init_level4_pgt kernel high mapping*/ init_level4_pgt[511] = early_level4_pgt[511]; diff --git a/arch/x86/kernel/head_32.S b/arch/x86/kernel/head_32.S index f36bd42d6f0c..d031bad9e07e 100644 --- a/arch/x86/kernel/head_32.S +++ b/arch/x86/kernel/head_32.S @@ -22,6 +22,7 @@ #include <asm/cpufeature.h> #include <asm/percpu.h> #include <asm/nops.h> +#include <asm/bootparam.h> /* Physical address */ #define pa(X) ((X) - __PAGE_OFFSET) @@ -90,7 +91,7 @@ ENTRY(startup_32) /* test KEEP_SEGMENTS flag to see if the bootloader is asking us to not reload segments */ - testb $(1<<6), BP_loadflags(%esi) + testb $KEEP_SEGMENTS, BP_loadflags(%esi) jnz 2f /* diff --git a/arch/x86/kernel/head_64.S b/arch/x86/kernel/head_64.S index 6fd514d9f69a..ae6588b301c2 100644 --- a/arch/x86/kernel/head_64.S +++ b/arch/x86/kernel/head_64.S @@ -1,5 +1,5 @@ /* - * linux/arch/x86_64/kernel/head.S -- start in 32bit and switch to 64bit + * linux/arch/x86/kernel/head_64.S -- start in 32bit and switch to 64bit * * Copyright (C) 2000 Andrea Arcangeli <andrea@suse.de> SuSE * Copyright (C) 2000 Pavel Machek <pavel@suse.cz> @@ -56,7 +56,7 @@ startup_64: * %rsi holds a physical pointer to real_mode_data. * * We come here either directly from a 64bit bootloader, or from - * arch/x86_64/boot/compressed/head.S. + * arch/x86/boot/compressed/head_64.S. * * We only come here initially at boot nothing else comes here. * @@ -146,7 +146,7 @@ startup_64: leaq level2_kernel_pgt(%rip), %rdi leaq 4096(%rdi), %r8 /* See if it is a valid page table entry */ -1: testq $1, 0(%rdi) +1: testb $1, 0(%rdi) jz 2f addq %rbp, 0(%rdi) /* Go to the next page */ diff --git a/arch/x86/kernel/i387.c b/arch/x86/kernel/i387.c index d5651fce0b71..29c740deafec 100644 --- a/arch/x86/kernel/i387.c +++ b/arch/x86/kernel/i387.c @@ -68,7 +68,7 @@ static inline bool interrupted_kernel_fpu_idle(void) static inline bool interrupted_user_mode(void) { struct pt_regs *regs = get_irq_regs(); - return regs && user_mode_vm(regs); + return regs && user_mode(regs); } /* diff --git a/arch/x86/kernel/ioport.c b/arch/x86/kernel/ioport.c index 4ddaf66ea35f..37dae792dbbe 100644 --- a/arch/x86/kernel/ioport.c +++ b/arch/x86/kernel/ioport.c @@ -54,7 +54,7 @@ asmlinkage long sys_ioperm(unsigned long from, unsigned long num, int turn_on) * because the ->io_bitmap_max value must match the bitmap * contents: */ - tss = &per_cpu(init_tss, get_cpu()); + tss = &per_cpu(cpu_tss, get_cpu()); if (turn_on) bitmap_clear(t->io_bitmap_ptr, from, num); diff --git a/arch/x86/kernel/irq.c b/arch/x86/kernel/irq.c index 67b1cbe0093a..e5952c225532 100644 --- a/arch/x86/kernel/irq.c +++ b/arch/x86/kernel/irq.c @@ -295,7 +295,7 @@ int check_irq_vectors_for_cpu_disable(void) this_cpu = smp_processor_id(); cpumask_copy(&online_new, cpu_online_mask); - cpu_clear(this_cpu, online_new); + cpumask_clear_cpu(this_cpu, &online_new); this_count = 0; for (vector = FIRST_EXTERNAL_VECTOR; vector < NR_VECTORS; vector++) { @@ -307,7 +307,7 @@ int check_irq_vectors_for_cpu_disable(void) data = irq_desc_get_irq_data(desc); cpumask_copy(&affinity_new, data->affinity); - cpu_clear(this_cpu, affinity_new); + cpumask_clear_cpu(this_cpu, &affinity_new); /* Do not count inactive or per-cpu irqs. */ if (!irq_has_action(irq) || irqd_is_per_cpu(data)) diff --git a/arch/x86/kernel/irq_32.c b/arch/x86/kernel/irq_32.c index 28d28f5eb8f4..f9fd86a7fcc7 100644 --- a/arch/x86/kernel/irq_32.c +++ b/arch/x86/kernel/irq_32.c @@ -165,7 +165,7 @@ bool handle_irq(unsigned irq, struct pt_regs *regs) if (unlikely(!desc)) return false; - if (user_mode_vm(regs) || !execute_on_irq_stack(overflow, desc, irq)) { + if (user_mode(regs) || !execute_on_irq_stack(overflow, desc, irq)) { if (unlikely(overflow)) print_stack_overflow(); desc->handle_irq(irq, desc); diff --git a/arch/x86/kernel/irq_64.c b/arch/x86/kernel/irq_64.c index e4b503d5558c..394e643d7830 100644 --- a/arch/x86/kernel/irq_64.c +++ b/arch/x86/kernel/irq_64.c @@ -44,7 +44,7 @@ static inline void stack_overflow_check(struct pt_regs *regs) u64 estack_top, estack_bottom; u64 curbase = (u64)task_stack_page(current); - if (user_mode_vm(regs)) + if (user_mode(regs)) return; if (regs->sp >= curbase + sizeof(struct thread_info) + diff --git a/arch/x86/kernel/irqinit.c b/arch/x86/kernel/irqinit.c index 70e181ea1eac..cd10a6437264 100644 --- a/arch/x86/kernel/irqinit.c +++ b/arch/x86/kernel/irqinit.c @@ -178,7 +178,8 @@ void __init native_init_IRQ(void) #endif for_each_clear_bit_from(i, used_vectors, first_system_vector) { /* IA32_SYSCALL_VECTOR could be used in trap_init already. */ - set_intr_gate(i, interrupt[i - FIRST_EXTERNAL_VECTOR]); + set_intr_gate(i, irq_entries_start + + 8 * (i - FIRST_EXTERNAL_VECTOR)); } #ifdef CONFIG_X86_LOCAL_APIC for_each_clear_bit_from(i, used_vectors, NR_VECTORS) diff --git a/arch/x86/kernel/kgdb.c b/arch/x86/kernel/kgdb.c index 7ec1d5f8d283..d6178d9791db 100644 --- a/arch/x86/kernel/kgdb.c +++ b/arch/x86/kernel/kgdb.c @@ -72,7 +72,7 @@ struct dbg_reg_def_t dbg_reg_def[DBG_MAX_REG_NUM] = { "bx", 8, offsetof(struct pt_regs, bx) }, { "cx", 8, offsetof(struct pt_regs, cx) }, { "dx", 8, offsetof(struct pt_regs, dx) }, - { "si", 8, offsetof(struct pt_regs, dx) }, + { "si", 8, offsetof(struct pt_regs, si) }, { "di", 8, offsetof(struct pt_regs, di) }, { "bp", 8, offsetof(struct pt_regs, bp) }, { "sp", 8, offsetof(struct pt_regs, sp) }, @@ -126,11 +126,11 @@ char *dbg_get_reg(int regno, void *mem, struct pt_regs *regs) #ifdef CONFIG_X86_32 switch (regno) { case GDB_SS: - if (!user_mode_vm(regs)) + if (!user_mode(regs)) *(unsigned long *)mem = __KERNEL_DS; break; case GDB_SP: - if (!user_mode_vm(regs)) + if (!user_mode(regs)) *(unsigned long *)mem = kernel_stack_pointer(regs); break; case GDB_GS: diff --git a/arch/x86/kernel/kprobes/core.c b/arch/x86/kernel/kprobes/core.c index 4e3d5a9621fe..24d079604fd5 100644 --- a/arch/x86/kernel/kprobes/core.c +++ b/arch/x86/kernel/kprobes/core.c @@ -602,7 +602,7 @@ int kprobe_int3_handler(struct pt_regs *regs) struct kprobe *p; struct kprobe_ctlblk *kcb; - if (user_mode_vm(regs)) + if (user_mode(regs)) return 0; addr = (kprobe_opcode_t *)(regs->ip - sizeof(kprobe_opcode_t)); @@ -1007,7 +1007,7 @@ int kprobe_exceptions_notify(struct notifier_block *self, unsigned long val, struct die_args *args = data; int ret = NOTIFY_DONE; - if (args->regs && user_mode_vm(args->regs)) + if (args->regs && user_mode(args->regs)) return ret; if (val == DIE_GPF) { diff --git a/arch/x86/kernel/module.c b/arch/x86/kernel/module.c index 9bbb9b35c144..005c03e93fc5 100644 --- a/arch/x86/kernel/module.c +++ b/arch/x86/kernel/module.c @@ -33,6 +33,7 @@ #include <asm/page.h> #include <asm/pgtable.h> +#include <asm/setup.h> #if 0 #define DEBUGP(fmt, ...) \ @@ -53,7 +54,7 @@ static DEFINE_MUTEX(module_kaslr_mutex); static unsigned long int get_module_load_offset(void) { - if (kaslr_enabled) { + if (kaslr_enabled()) { mutex_lock(&module_kaslr_mutex); /* * Calculate the module_load_offset the first time this diff --git a/arch/x86/kernel/perf_regs.c b/arch/x86/kernel/perf_regs.c index 781861cc5ee8..da8cb987b973 100644 --- a/arch/x86/kernel/perf_regs.c +++ b/arch/x86/kernel/perf_regs.c @@ -131,10 +131,11 @@ void perf_get_regs_user(struct perf_regs *regs_user, } /* - * RIP, flags, and the argument registers are usually saved. - * orig_ax is probably okay, too. + * These registers are always saved on 64-bit syscall entry. + * On 32-bit entry points, they are saved too except r8..r11. */ regs_user_copy->ip = user_regs->ip; + regs_user_copy->ax = user_regs->ax; regs_user_copy->cx = user_regs->cx; regs_user_copy->dx = user_regs->dx; regs_user_copy->si = user_regs->si; @@ -145,9 +146,12 @@ void perf_get_regs_user(struct perf_regs *regs_user, regs_user_copy->r11 = user_regs->r11; regs_user_copy->orig_ax = user_regs->orig_ax; regs_user_copy->flags = user_regs->flags; + regs_user_copy->sp = user_regs->sp; + regs_user_copy->cs = user_regs->cs; + regs_user_copy->ss = user_regs->ss; /* - * Don't even try to report the "rest" regs. + * Most system calls don't save these registers, don't report them. */ regs_user_copy->bx = -1; regs_user_copy->bp = -1; @@ -158,37 +162,13 @@ void perf_get_regs_user(struct perf_regs *regs_user, /* * For this to be at all useful, we need a reasonable guess for - * sp and the ABI. Be careful: we're in NMI context, and we're + * the ABI. Be careful: we're in NMI context, and we're * considering current to be the current task, so we should * be careful not to look at any other percpu variables that might * change during context switches. */ - if (IS_ENABLED(CONFIG_IA32_EMULATION) && - task_thread_info(current)->status & TS_COMPAT) { - /* Easy case: we're in a compat syscall. */ - regs_user->abi = PERF_SAMPLE_REGS_ABI_32; - regs_user_copy->sp = user_regs->sp; - regs_user_copy->cs = user_regs->cs; - regs_user_copy->ss = user_regs->ss; - } else if (user_regs->orig_ax != -1) { - /* - * We're probably in a 64-bit syscall. - * Warning: this code is severely racy. At least it's better - * than just blindly copying user_regs. - */ - regs_user->abi = PERF_SAMPLE_REGS_ABI_64; - regs_user_copy->sp = this_cpu_read(old_rsp); - regs_user_copy->cs = __USER_CS; - regs_user_copy->ss = __USER_DS; - regs_user_copy->cx = -1; /* usually contains garbage */ - } else { - /* We're probably in an interrupt or exception. */ - regs_user->abi = user_64bit_mode(user_regs) ? - PERF_SAMPLE_REGS_ABI_64 : PERF_SAMPLE_REGS_ABI_32; - regs_user_copy->sp = user_regs->sp; - regs_user_copy->cs = user_regs->cs; - regs_user_copy->ss = user_regs->ss; - } + regs_user->abi = user_64bit_mode(user_regs) ? + PERF_SAMPLE_REGS_ABI_64 : PERF_SAMPLE_REGS_ABI_32; regs_user->regs = regs_user_copy; } diff --git a/arch/x86/kernel/process.c b/arch/x86/kernel/process.c index 046e2d620bbe..0c8992dbead5 100644 --- a/arch/x86/kernel/process.c +++ b/arch/x86/kernel/process.c @@ -9,7 +9,7 @@ #include <linux/sched.h> #include <linux/module.h> #include <linux/pm.h> -#include <linux/clockchips.h> +#include <linux/tick.h> #include <linux/random.h> #include <linux/user-return-notifier.h> #include <linux/dmi.h> @@ -24,6 +24,7 @@ #include <asm/syscalls.h> #include <asm/idle.h> #include <asm/uaccess.h> +#include <asm/mwait.h> #include <asm/i387.h> #include <asm/fpu-internal.h> #include <asm/debugreg.h> @@ -37,7 +38,26 @@ * section. Since TSS's are completely CPU-local, we want them * on exact cacheline boundaries, to eliminate cacheline ping-pong. */ -__visible DEFINE_PER_CPU_SHARED_ALIGNED(struct tss_struct, init_tss) = INIT_TSS; +__visible DEFINE_PER_CPU_SHARED_ALIGNED(struct tss_struct, cpu_tss) = { + .x86_tss = { + .sp0 = TOP_OF_INIT_STACK, +#ifdef CONFIG_X86_32 + .ss0 = __KERNEL_DS, + .ss1 = __KERNEL_CS, + .io_bitmap_base = INVALID_IO_BITMAP_OFFSET, +#endif + }, +#ifdef CONFIG_X86_32 + /* + * Note that the .io_bitmap member must be extra-big. This is because + * the CPU will access an additional byte beyond the end of the IO + * permission bitmap. The extra byte must be all 1 bits, and must + * be within the limit. + */ + .io_bitmap = { [0 ... IO_BITMAP_LONGS] = ~0 }, +#endif +}; +EXPORT_PER_CPU_SYMBOL_GPL(cpu_tss); #ifdef CONFIG_X86_64 static DEFINE_PER_CPU(unsigned char, is_idle); @@ -109,7 +129,7 @@ void exit_thread(void) unsigned long *bp = t->io_bitmap_ptr; if (bp) { - struct tss_struct *tss = &per_cpu(init_tss, get_cpu()); + struct tss_struct *tss = &per_cpu(cpu_tss, get_cpu()); t->io_bitmap_ptr = NULL; clear_thread_flag(TIF_IO_BITMAP); @@ -377,14 +397,11 @@ static void amd_e400_idle(void) if (!cpumask_test_cpu(cpu, amd_e400_c1e_mask)) { cpumask_set_cpu(cpu, amd_e400_c1e_mask); - /* - * Force broadcast so ACPI can not interfere. - */ - clockevents_notify(CLOCK_EVT_NOTIFY_BROADCAST_FORCE, - &cpu); + /* Force broadcast so ACPI can not interfere. */ + tick_broadcast_force(); pr_info("Switch to broadcast mode on CPU%d\n", cpu); } - clockevents_notify(CLOCK_EVT_NOTIFY_BROADCAST_ENTER, &cpu); + tick_broadcast_enter(); default_idle(); @@ -393,12 +410,59 @@ static void amd_e400_idle(void) * called with interrupts disabled. */ local_irq_disable(); - clockevents_notify(CLOCK_EVT_NOTIFY_BROADCAST_EXIT, &cpu); + tick_broadcast_exit(); local_irq_enable(); } else default_idle(); } +/* + * Intel Core2 and older machines prefer MWAIT over HALT for C1. + * We can't rely on cpuidle installing MWAIT, because it will not load + * on systems that support only C1 -- so the boot default must be MWAIT. + * + * Some AMD machines are the opposite, they depend on using HALT. + * + * So for default C1, which is used during boot until cpuidle loads, + * use MWAIT-C1 on Intel HW that has it, else use HALT. + */ +static int prefer_mwait_c1_over_halt(const struct cpuinfo_x86 *c) +{ + if (c->x86_vendor != X86_VENDOR_INTEL) + return 0; + + if (!cpu_has(c, X86_FEATURE_MWAIT)) + return 0; + + return 1; +} + +/* + * MONITOR/MWAIT with no hints, used for default default C1 state. + * This invokes MWAIT with interrutps enabled and no flags, + * which is backwards compatible with the original MWAIT implementation. + */ + +static void mwait_idle(void) +{ + if (!current_set_polling_and_test()) { + if (this_cpu_has(X86_BUG_CLFLUSH_MONITOR)) { + smp_mb(); /* quirk */ + clflush((void *)¤t_thread_info()->flags); + smp_mb(); /* quirk */ + } + + __monitor((void *)¤t_thread_info()->flags, 0, 0); + if (!need_resched()) + __sti_mwait(0, 0); + else + local_irq_enable(); + } else { + local_irq_enable(); + } + __current_clr_polling(); +} + void select_idle_routine(const struct cpuinfo_x86 *c) { #ifdef CONFIG_SMP @@ -412,6 +476,9 @@ void select_idle_routine(const struct cpuinfo_x86 *c) /* E400: APIC timer interrupt does not wake up CPU from C1e */ pr_info("using AMD E400 aware idle routine\n"); x86_idle = amd_e400_idle; + } else if (prefer_mwait_c1_over_halt(c)) { + pr_info("using mwait in idle threads\n"); + x86_idle = mwait_idle; } else x86_idle = default_idle; } diff --git a/arch/x86/kernel/process_32.c b/arch/x86/kernel/process_32.c index 603c4f99cb5a..8ed2106b06da 100644 --- a/arch/x86/kernel/process_32.c +++ b/arch/x86/kernel/process_32.c @@ -73,7 +73,7 @@ void __show_regs(struct pt_regs *regs, int all) unsigned long sp; unsigned short ss, gs; - if (user_mode_vm(regs)) { + if (user_mode(regs)) { sp = regs->sp; ss = regs->ss & 0xffff; gs = get_user_gs(regs); @@ -206,11 +206,7 @@ start_thread(struct pt_regs *regs, unsigned long new_ip, unsigned long new_sp) regs->ip = new_ip; regs->sp = new_sp; regs->flags = X86_EFLAGS_IF; - /* - * force it to the iret return path by making it look as if there was - * some work pending. - */ - set_thread_flag(TIF_NOTIFY_RESUME); + force_iret(); } EXPORT_SYMBOL_GPL(start_thread); @@ -248,7 +244,7 @@ __switch_to(struct task_struct *prev_p, struct task_struct *next_p) struct thread_struct *prev = &prev_p->thread, *next = &next_p->thread; int cpu = smp_processor_id(); - struct tss_struct *tss = &per_cpu(init_tss, cpu); + struct tss_struct *tss = &per_cpu(cpu_tss, cpu); fpu_switch_t fpu; /* never put a printk in __switch_to... printk() calls wake_up*() indirectly */ @@ -256,11 +252,6 @@ __switch_to(struct task_struct *prev_p, struct task_struct *next_p) fpu = switch_fpu_prepare(prev_p, next_p, cpu); /* - * Reload esp0. - */ - load_sp0(tss, next); - - /* * Save away %gs. No need to save %fs, as it was saved on the * stack on entry. No need to save %es and %ds, as those are * always kernel segments while inside the kernel. Doing this @@ -310,9 +301,17 @@ __switch_to(struct task_struct *prev_p, struct task_struct *next_p) */ arch_end_context_switch(next_p); + /* + * Reload esp0, kernel_stack, and current_top_of_stack. This changes + * current_thread_info(). + */ + load_sp0(tss, next); this_cpu_write(kernel_stack, - (unsigned long)task_stack_page(next_p) + - THREAD_SIZE - KERNEL_STACK_OFFSET); + (unsigned long)task_stack_page(next_p) + + THREAD_SIZE); + this_cpu_write(cpu_current_top_of_stack, + (unsigned long)task_stack_page(next_p) + + THREAD_SIZE); /* * Restore %gs if needed (which is common) diff --git a/arch/x86/kernel/process_64.c b/arch/x86/kernel/process_64.c index 67fcc43577d2..4baaa972f52a 100644 --- a/arch/x86/kernel/process_64.c +++ b/arch/x86/kernel/process_64.c @@ -52,7 +52,7 @@ asmlinkage extern void ret_from_fork(void); -__visible DEFINE_PER_CPU(unsigned long, old_rsp); +__visible DEFINE_PER_CPU(unsigned long, rsp_scratch); /* Prints also some state that isn't saved in the pt_regs */ void __show_regs(struct pt_regs *regs, int all) @@ -161,7 +161,6 @@ int copy_thread(unsigned long clone_flags, unsigned long sp, p->thread.sp0 = (unsigned long)task_stack_page(p) + THREAD_SIZE; childregs = task_pt_regs(p); p->thread.sp = (unsigned long) childregs; - p->thread.usersp = me->thread.usersp; set_tsk_thread_flag(p, TIF_FORK); p->thread.io_bitmap_ptr = NULL; @@ -207,7 +206,7 @@ int copy_thread(unsigned long clone_flags, unsigned long sp, */ if (clone_flags & CLONE_SETTLS) { #ifdef CONFIG_IA32_EMULATION - if (test_thread_flag(TIF_IA32)) + if (is_ia32_task()) err = do_set_thread_area(p, -1, (struct user_desc __user *)childregs->si, 0); else @@ -235,13 +234,12 @@ start_thread_common(struct pt_regs *regs, unsigned long new_ip, loadsegment(es, _ds); loadsegment(ds, _ds); load_gs_index(0); - current->thread.usersp = new_sp; regs->ip = new_ip; regs->sp = new_sp; - this_cpu_write(old_rsp, new_sp); regs->cs = _cs; regs->ss = _ss; regs->flags = X86_EFLAGS_IF; + force_iret(); } void @@ -277,15 +275,12 @@ __switch_to(struct task_struct *prev_p, struct task_struct *next_p) struct thread_struct *prev = &prev_p->thread; struct thread_struct *next = &next_p->thread; int cpu = smp_processor_id(); - struct tss_struct *tss = &per_cpu(init_tss, cpu); + struct tss_struct *tss = &per_cpu(cpu_tss, cpu); unsigned fsindex, gsindex; fpu_switch_t fpu; fpu = switch_fpu_prepare(prev_p, next_p, cpu); - /* Reload esp0 and ss1. */ - load_sp0(tss, next); - /* We must save %fs and %gs before load_TLS() because * %fs and %gs may be cleared by load_TLS(). * @@ -401,8 +396,6 @@ __switch_to(struct task_struct *prev_p, struct task_struct *next_p) /* * Switch the PDA and FPU contexts. */ - prev->usersp = this_cpu_read(old_rsp); - this_cpu_write(old_rsp, next->usersp); this_cpu_write(current_task, next_p); /* @@ -413,9 +406,11 @@ __switch_to(struct task_struct *prev_p, struct task_struct *next_p) task_thread_info(prev_p)->saved_preempt_count = this_cpu_read(__preempt_count); this_cpu_write(__preempt_count, task_thread_info(next_p)->saved_preempt_count); + /* Reload esp0 and ss1. This changes current_thread_info(). */ + load_sp0(tss, next); + this_cpu_write(kernel_stack, - (unsigned long)task_stack_page(next_p) + - THREAD_SIZE - KERNEL_STACK_OFFSET); + (unsigned long)task_stack_page(next_p) + THREAD_SIZE); /* * Now maybe reload the debug registers and handle I/O bitmaps @@ -602,6 +597,5 @@ long sys_arch_prctl(int code, unsigned long addr) unsigned long KSTK_ESP(struct task_struct *task) { - return (test_tsk_thread_flag(task, TIF_IA32)) ? - (task_pt_regs(task)->sp) : ((task)->thread.usersp); + return task_pt_regs(task)->sp; } diff --git a/arch/x86/kernel/ptrace.c b/arch/x86/kernel/ptrace.c index e510618b2e91..a7bc79480719 100644 --- a/arch/x86/kernel/ptrace.c +++ b/arch/x86/kernel/ptrace.c @@ -364,18 +364,12 @@ static int set_segment_reg(struct task_struct *task, case offsetof(struct user_regs_struct,cs): if (unlikely(value == 0)) return -EIO; -#ifdef CONFIG_IA32_EMULATION - if (test_tsk_thread_flag(task, TIF_IA32)) - task_pt_regs(task)->cs = value; -#endif + task_pt_regs(task)->cs = value; break; case offsetof(struct user_regs_struct,ss): if (unlikely(value == 0)) return -EIO; -#ifdef CONFIG_IA32_EMULATION - if (test_tsk_thread_flag(task, TIF_IA32)) - task_pt_regs(task)->ss = value; -#endif + task_pt_regs(task)->ss = value; break; } @@ -1421,7 +1415,7 @@ static void fill_sigtrap_info(struct task_struct *tsk, memset(info, 0, sizeof(*info)); info->si_signo = SIGTRAP; info->si_code = si_code; - info->si_addr = user_mode_vm(regs) ? (void __user *)regs->ip : NULL; + info->si_addr = user_mode(regs) ? (void __user *)regs->ip : NULL; } void user_single_step_siginfo(struct task_struct *tsk, diff --git a/arch/x86/kernel/pvclock.c b/arch/x86/kernel/pvclock.c index 2f355d229a58..e5ecd20e72dd 100644 --- a/arch/x86/kernel/pvclock.c +++ b/arch/x86/kernel/pvclock.c @@ -141,7 +141,46 @@ void pvclock_read_wallclock(struct pvclock_wall_clock *wall_clock, set_normalized_timespec(ts, now.tv_sec, now.tv_nsec); } +static struct pvclock_vsyscall_time_info *pvclock_vdso_info; + +static struct pvclock_vsyscall_time_info * +pvclock_get_vsyscall_user_time_info(int cpu) +{ + if (!pvclock_vdso_info) { + BUG(); + return NULL; + } + + return &pvclock_vdso_info[cpu]; +} + +struct pvclock_vcpu_time_info *pvclock_get_vsyscall_time_info(int cpu) +{ + return &pvclock_get_vsyscall_user_time_info(cpu)->pvti; +} + #ifdef CONFIG_X86_64 +static int pvclock_task_migrate(struct notifier_block *nb, unsigned long l, + void *v) +{ + struct task_migration_notifier *mn = v; + struct pvclock_vsyscall_time_info *pvti; + + pvti = pvclock_get_vsyscall_user_time_info(mn->from_cpu); + + /* this is NULL when pvclock vsyscall is not initialized */ + if (unlikely(pvti == NULL)) + return NOTIFY_DONE; + + pvti->migrate_count++; + + return NOTIFY_DONE; +} + +static struct notifier_block pvclock_migrate = { + .notifier_call = pvclock_task_migrate, +}; + /* * Initialize the generic pvclock vsyscall state. This will allocate * a/some page(s) for the per-vcpu pvclock information, set up a @@ -155,12 +194,17 @@ int __init pvclock_init_vsyscall(struct pvclock_vsyscall_time_info *i, WARN_ON (size != PVCLOCK_VSYSCALL_NR_PAGES*PAGE_SIZE); + pvclock_vdso_info = i; + for (idx = 0; idx <= (PVCLOCK_FIXMAP_END-PVCLOCK_FIXMAP_BEGIN); idx++) { __set_fixmap(PVCLOCK_FIXMAP_BEGIN + idx, __pa(i) + (idx*PAGE_SIZE), PAGE_KERNEL_VVAR); } + + register_task_migration_notifier(&pvclock_migrate); + return 0; } #endif diff --git a/arch/x86/kernel/reboot.c b/arch/x86/kernel/reboot.c index bae6c609888e..86db4bcd7ce5 100644 --- a/arch/x86/kernel/reboot.c +++ b/arch/x86/kernel/reboot.c @@ -183,6 +183,16 @@ static struct dmi_system_id __initdata reboot_dmi_table[] = { }, }, + /* ASRock */ + { /* Handle problems with rebooting on ASRock Q1900DC-ITX */ + .callback = set_pci_reboot, + .ident = "ASRock Q1900DC-ITX", + .matches = { + DMI_MATCH(DMI_BOARD_VENDOR, "ASRock"), + DMI_MATCH(DMI_BOARD_NAME, "Q1900DC-ITX"), + }, + }, + /* ASUS */ { /* Handle problems with rebooting on ASUS P4S800 */ .callback = set_bios_reboot, diff --git a/arch/x86/kernel/relocate_kernel_32.S b/arch/x86/kernel/relocate_kernel_32.S index e13f8e7c22a6..77630d57e7bf 100644 --- a/arch/x86/kernel/relocate_kernel_32.S +++ b/arch/x86/kernel/relocate_kernel_32.S @@ -226,23 +226,23 @@ swap_pages: movl (%ebx), %ecx addl $4, %ebx 1: - testl $0x1, %ecx /* is it a destination page */ + testb $0x1, %cl /* is it a destination page */ jz 2f movl %ecx, %edi andl $0xfffff000, %edi jmp 0b 2: - testl $0x2, %ecx /* is it an indirection page */ + testb $0x2, %cl /* is it an indirection page */ jz 2f movl %ecx, %ebx andl $0xfffff000, %ebx jmp 0b 2: - testl $0x4, %ecx /* is it the done indicator */ + testb $0x4, %cl /* is it the done indicator */ jz 2f jmp 3f 2: - testl $0x8, %ecx /* is it the source indicator */ + testb $0x8, %cl /* is it the source indicator */ jz 0b /* Ignore it otherwise */ movl %ecx, %esi /* For every source page do a copy */ andl $0xfffff000, %esi diff --git a/arch/x86/kernel/relocate_kernel_64.S b/arch/x86/kernel/relocate_kernel_64.S index 3fd2c693e475..98111b38ebfd 100644 --- a/arch/x86/kernel/relocate_kernel_64.S +++ b/arch/x86/kernel/relocate_kernel_64.S @@ -123,7 +123,7 @@ identity_mapped: * Set cr4 to a known state: * - physical address extension enabled */ - movq $X86_CR4_PAE, %rax + movl $X86_CR4_PAE, %eax movq %rax, %cr4 jmp 1f @@ -221,23 +221,23 @@ swap_pages: movq (%rbx), %rcx addq $8, %rbx 1: - testq $0x1, %rcx /* is it a destination page? */ + testb $0x1, %cl /* is it a destination page? */ jz 2f movq %rcx, %rdi andq $0xfffffffffffff000, %rdi jmp 0b 2: - testq $0x2, %rcx /* is it an indirection page? */ + testb $0x2, %cl /* is it an indirection page? */ jz 2f movq %rcx, %rbx andq $0xfffffffffffff000, %rbx jmp 0b 2: - testq $0x4, %rcx /* is it the done indicator? */ + testb $0x4, %cl /* is it the done indicator? */ jz 2f jmp 3f 2: - testq $0x8, %rcx /* is it the source indicator? */ + testb $0x8, %cl /* is it the source indicator? */ jz 0b /* Ignore it otherwise */ movq %rcx, %rsi /* For ever source page do a copy */ andq $0xfffffffffffff000, %rsi @@ -246,17 +246,17 @@ swap_pages: movq %rsi, %rax movq %r10, %rdi - movq $512, %rcx + movl $512, %ecx rep ; movsq movq %rax, %rdi movq %rdx, %rsi - movq $512, %rcx + movl $512, %ecx rep ; movsq movq %rdx, %rdi movq %r10, %rsi - movq $512, %rcx + movl $512, %ecx rep ; movsq lea PAGE_SIZE(%rax), %rsi diff --git a/arch/x86/kernel/setup.c b/arch/x86/kernel/setup.c index 98dc9317286e..014466b152b5 100644 --- a/arch/x86/kernel/setup.c +++ b/arch/x86/kernel/setup.c @@ -122,8 +122,6 @@ unsigned long max_low_pfn_mapped; unsigned long max_pfn_mapped; -bool __read_mostly kaslr_enabled = false; - #ifdef CONFIG_DMI RESERVE_BRK(dmi_alloc, 65536); #endif @@ -427,11 +425,6 @@ static void __init reserve_initrd(void) } #endif /* CONFIG_BLK_DEV_INITRD */ -static void __init parse_kaslr_setup(u64 pa_data, u32 data_len) -{ - kaslr_enabled = (bool)(pa_data + sizeof(struct setup_data)); -} - static void __init parse_setup_data(void) { struct setup_data *data; @@ -457,9 +450,6 @@ static void __init parse_setup_data(void) case SETUP_EFI: parse_efi_setup(pa_data, data_len); break; - case SETUP_KASLR: - parse_kaslr_setup(pa_data, data_len); - break; default: break; } @@ -842,14 +832,15 @@ static void __init trim_low_memory_range(void) static int dump_kernel_offset(struct notifier_block *self, unsigned long v, void *p) { - if (kaslr_enabled) + if (kaslr_enabled()) { pr_emerg("Kernel Offset: 0x%lx from 0x%lx (relocation range: 0x%lx-0x%lx)\n", (unsigned long)&_text - __START_KERNEL, __START_KERNEL, __START_KERNEL_map, MODULES_VADDR-1); - else + } else { pr_emerg("Kernel Offset: disabled\n"); + } return 0; } diff --git a/arch/x86/kernel/signal.c b/arch/x86/kernel/signal.c index e5042463c1bc..53cc4085c3d7 100644 --- a/arch/x86/kernel/signal.c +++ b/arch/x86/kernel/signal.c @@ -61,8 +61,7 @@ regs->seg = GET_SEG(seg) | 3; \ } while (0) -int restore_sigcontext(struct pt_regs *regs, struct sigcontext __user *sc, - unsigned long *pax) +int restore_sigcontext(struct pt_regs *regs, struct sigcontext __user *sc) { void __user *buf; unsigned int tmpflags; @@ -81,7 +80,7 @@ int restore_sigcontext(struct pt_regs *regs, struct sigcontext __user *sc, #endif /* CONFIG_X86_32 */ COPY(di); COPY(si); COPY(bp); COPY(sp); COPY(bx); - COPY(dx); COPY(cx); COPY(ip); + COPY(dx); COPY(cx); COPY(ip); COPY(ax); #ifdef CONFIG_X86_64 COPY(r8); @@ -94,27 +93,20 @@ int restore_sigcontext(struct pt_regs *regs, struct sigcontext __user *sc, COPY(r15); #endif /* CONFIG_X86_64 */ -#ifdef CONFIG_X86_32 COPY_SEG_CPL3(cs); COPY_SEG_CPL3(ss); -#else /* !CONFIG_X86_32 */ - /* Kernel saves and restores only the CS segment register on signals, - * which is the bare minimum needed to allow mixed 32/64-bit code. - * App's signal handler can save/restore other segments if needed. */ - COPY_SEG_CPL3(cs); -#endif /* CONFIG_X86_32 */ get_user_ex(tmpflags, &sc->flags); regs->flags = (regs->flags & ~FIX_EFLAGS) | (tmpflags & FIX_EFLAGS); regs->orig_ax = -1; /* disable syscall checks */ get_user_ex(buf, &sc->fpstate); - - get_user_ex(*pax, &sc->ax); } get_user_catch(err); err |= restore_xstate_sig(buf, config_enabled(CONFIG_X86_32)); + force_iret(); + return err; } @@ -162,8 +154,9 @@ int setup_sigcontext(struct sigcontext __user *sc, void __user *fpstate, #else /* !CONFIG_X86_32 */ put_user_ex(regs->flags, &sc->flags); put_user_ex(regs->cs, &sc->cs); - put_user_ex(0, &sc->gs); - put_user_ex(0, &sc->fs); + put_user_ex(0, &sc->__pad2); + put_user_ex(0, &sc->__pad1); + put_user_ex(regs->ss, &sc->ss); #endif /* CONFIG_X86_32 */ put_user_ex(fpstate, &sc->fpstate); @@ -457,9 +450,19 @@ static int __setup_rt_frame(int sig, struct ksignal *ksig, regs->sp = (unsigned long)frame; - /* Set up the CS register to run signal handlers in 64-bit mode, - even if the handler happens to be interrupting 32-bit code. */ + /* + * Set up the CS and SS registers to run signal handlers in + * 64-bit mode, even if the handler happens to be interrupting + * 32-bit or 16-bit code. + * + * SS is subtle. In 64-bit mode, we don't need any particular + * SS descriptor, but we do need SS to be valid. It's possible + * that the old SS is entirely bogus -- this can happen if the + * signal we're trying to deliver is #GP or #SS caused by a bad + * SS value. + */ regs->cs = __USER_CS; + regs->ss = __USER_DS; return 0; } @@ -539,7 +542,6 @@ asmlinkage unsigned long sys_sigreturn(void) { struct pt_regs *regs = current_pt_regs(); struct sigframe __user *frame; - unsigned long ax; sigset_t set; frame = (struct sigframe __user *)(regs->sp - 8); @@ -553,9 +555,9 @@ asmlinkage unsigned long sys_sigreturn(void) set_current_blocked(&set); - if (restore_sigcontext(regs, &frame->sc, &ax)) + if (restore_sigcontext(regs, &frame->sc)) goto badframe; - return ax; + return regs->ax; badframe: signal_fault(regs, frame, "sigreturn"); @@ -568,7 +570,6 @@ asmlinkage long sys_rt_sigreturn(void) { struct pt_regs *regs = current_pt_regs(); struct rt_sigframe __user *frame; - unsigned long ax; sigset_t set; frame = (struct rt_sigframe __user *)(regs->sp - sizeof(long)); @@ -579,13 +580,13 @@ asmlinkage long sys_rt_sigreturn(void) set_current_blocked(&set); - if (restore_sigcontext(regs, &frame->uc.uc_mcontext, &ax)) + if (restore_sigcontext(regs, &frame->uc.uc_mcontext)) goto badframe; if (restore_altstack(&frame->uc.uc_stack)) goto badframe; - return ax; + return regs->ax; badframe: signal_fault(regs, frame, "rt_sigreturn"); @@ -780,7 +781,6 @@ asmlinkage long sys32_x32_rt_sigreturn(void) struct pt_regs *regs = current_pt_regs(); struct rt_sigframe_x32 __user *frame; sigset_t set; - unsigned long ax; frame = (struct rt_sigframe_x32 __user *)(regs->sp - 8); @@ -791,13 +791,13 @@ asmlinkage long sys32_x32_rt_sigreturn(void) set_current_blocked(&set); - if (restore_sigcontext(regs, &frame->uc.uc_mcontext, &ax)) + if (restore_sigcontext(regs, &frame->uc.uc_mcontext)) goto badframe; if (compat_restore_altstack(&frame->uc.uc_stack)) goto badframe; - return ax; + return regs->ax; badframe: signal_fault(regs, frame, "x32 rt_sigreturn"); diff --git a/arch/x86/kernel/smpboot.c b/arch/x86/kernel/smpboot.c index febc6aabc72e..7035f6b21c3f 100644 --- a/arch/x86/kernel/smpboot.c +++ b/arch/x86/kernel/smpboot.c @@ -779,6 +779,26 @@ out: return boot_error; } +void common_cpu_up(unsigned int cpu, struct task_struct *idle) +{ + /* Just in case we booted with a single CPU. */ + alternatives_enable_smp(); + + per_cpu(current_task, cpu) = idle; + +#ifdef CONFIG_X86_32 + /* Stack for startup_32 can be just as for start_secondary onwards */ + irq_ctx_init(cpu); + per_cpu(cpu_current_top_of_stack, cpu) = + (unsigned long)task_stack_page(idle) + THREAD_SIZE; +#else + clear_tsk_thread_flag(idle, TIF_FORK); + initial_gs = per_cpu_offset(cpu); +#endif + per_cpu(kernel_stack, cpu) = + (unsigned long)task_stack_page(idle) + THREAD_SIZE; +} + /* * NOTE - on most systems this is a PHYSICAL apic ID, but on multiquad * (ie clustered apic addressing mode), this is a LOGICAL apic ID. @@ -796,23 +816,9 @@ static int do_boot_cpu(int apicid, int cpu, struct task_struct *idle) int cpu0_nmi_registered = 0; unsigned long timeout; - /* Just in case we booted with a single CPU. */ - alternatives_enable_smp(); - idle->thread.sp = (unsigned long) (((struct pt_regs *) (THREAD_SIZE + task_stack_page(idle))) - 1); - per_cpu(current_task, cpu) = idle; -#ifdef CONFIG_X86_32 - /* Stack for startup_32 can be just as for start_secondary onwards */ - irq_ctx_init(cpu); -#else - clear_tsk_thread_flag(idle, TIF_FORK); - initial_gs = per_cpu_offset(cpu); -#endif - per_cpu(kernel_stack, cpu) = - (unsigned long)task_stack_page(idle) - - KERNEL_STACK_OFFSET + THREAD_SIZE; early_gdt_descr.address = (unsigned long)get_cpu_gdt_table(cpu); initial_code = (unsigned long)start_secondary; stack_start = idle->thread.sp; @@ -953,6 +959,8 @@ int native_cpu_up(unsigned int cpu, struct task_struct *tidle) /* the FPU context is blank, nobody can own it */ __cpu_disable_lazy_restore(cpu); + common_cpu_up(cpu, tidle); + err = do_boot_cpu(apicid, cpu, tidle); if (err) { pr_err("do_boot_cpu failed(%d) to wakeup CPU#%u\n", err, cpu); @@ -1086,8 +1094,6 @@ static int __init smp_sanity_check(unsigned max_cpus) return SMP_NO_APIC; } - verify_local_APIC(); - /* * If SMP should be disabled, then really disable it! */ diff --git a/arch/x86/kernel/syscall_32.c b/arch/x86/kernel/syscall_32.c index e9bcd57d8a9e..3777189c4a19 100644 --- a/arch/x86/kernel/syscall_32.c +++ b/arch/x86/kernel/syscall_32.c @@ -5,21 +5,29 @@ #include <linux/cache.h> #include <asm/asm-offsets.h> -#define __SYSCALL_I386(nr, sym, compat) extern asmlinkage void sym(void) ; +#ifdef CONFIG_IA32_EMULATION +#define SYM(sym, compat) compat +#else +#define SYM(sym, compat) sym +#define ia32_sys_call_table sys_call_table +#define __NR_ia32_syscall_max __NR_syscall_max +#endif + +#define __SYSCALL_I386(nr, sym, compat) extern asmlinkage void SYM(sym, compat)(void) ; #include <asm/syscalls_32.h> #undef __SYSCALL_I386 -#define __SYSCALL_I386(nr, sym, compat) [nr] = sym, +#define __SYSCALL_I386(nr, sym, compat) [nr] = SYM(sym, compat), typedef asmlinkage void (*sys_call_ptr_t)(void); extern asmlinkage void sys_ni_syscall(void); -__visible const sys_call_ptr_t sys_call_table[__NR_syscall_max+1] = { +__visible const sys_call_ptr_t ia32_sys_call_table[__NR_ia32_syscall_max+1] = { /* * Smells like a compiler bug -- it doesn't work * when the & below is removed. */ - [0 ... __NR_syscall_max] = &sys_ni_syscall, + [0 ... __NR_ia32_syscall_max] = &sys_ni_syscall, #include <asm/syscalls_32.h> }; diff --git a/arch/x86/kernel/time.c b/arch/x86/kernel/time.c index 25adc0e16eaa..d39c09119db6 100644 --- a/arch/x86/kernel/time.c +++ b/arch/x86/kernel/time.c @@ -30,7 +30,7 @@ unsigned long profile_pc(struct pt_regs *regs) { unsigned long pc = instruction_pointer(regs); - if (!user_mode_vm(regs) && in_lock_functions(pc)) { + if (!user_mode(regs) && in_lock_functions(pc)) { #ifdef CONFIG_FRAME_POINTER return *(unsigned long *)(regs->bp + sizeof(long)); #else diff --git a/arch/x86/kernel/traps.c b/arch/x86/kernel/traps.c index 9d2073e2ecc9..6751c5c58eec 100644 --- a/arch/x86/kernel/traps.c +++ b/arch/x86/kernel/traps.c @@ -112,7 +112,7 @@ enum ctx_state ist_enter(struct pt_regs *regs) { enum ctx_state prev_state; - if (user_mode_vm(regs)) { + if (user_mode(regs)) { /* Other than that, we're just an exception. */ prev_state = exception_enter(); } else { @@ -146,7 +146,7 @@ void ist_exit(struct pt_regs *regs, enum ctx_state prev_state) /* Must be before exception_exit. */ preempt_count_sub(HARDIRQ_OFFSET); - if (user_mode_vm(regs)) + if (user_mode(regs)) return exception_exit(prev_state); else rcu_nmi_exit(); @@ -158,7 +158,7 @@ void ist_exit(struct pt_regs *regs, enum ctx_state prev_state) * * IST exception handlers normally cannot schedule. As a special * exception, if the exception interrupted userspace code (i.e. - * user_mode_vm(regs) would return true) and the exception was not + * user_mode(regs) would return true) and the exception was not * a double fault, it can be safe to schedule. ist_begin_non_atomic() * begins a non-atomic section within an ist_enter()/ist_exit() region. * Callers are responsible for enabling interrupts themselves inside @@ -167,15 +167,15 @@ void ist_exit(struct pt_regs *regs, enum ctx_state prev_state) */ void ist_begin_non_atomic(struct pt_regs *regs) { - BUG_ON(!user_mode_vm(regs)); + BUG_ON(!user_mode(regs)); /* * Sanity check: we need to be on the normal thread stack. This * will catch asm bugs and any attempt to use ist_preempt_enable * from double_fault. */ - BUG_ON(((current_stack_pointer() ^ this_cpu_read_stable(kernel_stack)) - & ~(THREAD_SIZE - 1)) != 0); + BUG_ON((unsigned long)(current_top_of_stack() - + current_stack_pointer()) >= THREAD_SIZE); preempt_count_sub(HARDIRQ_OFFSET); } @@ -194,8 +194,7 @@ static nokprobe_inline int do_trap_no_signal(struct task_struct *tsk, int trapnr, char *str, struct pt_regs *regs, long error_code) { -#ifdef CONFIG_X86_32 - if (regs->flags & X86_VM_MASK) { + if (v8086_mode(regs)) { /* * Traps 0, 1, 3, 4, and 5 should be forwarded to vm86. * On nmi (interrupt 2), do_trap should not be called. @@ -207,7 +206,7 @@ do_trap_no_signal(struct task_struct *tsk, int trapnr, char *str, } return -1; } -#endif + if (!user_mode(regs)) { if (!fixup_exception(regs)) { tsk->thread.error_code = error_code; @@ -462,13 +461,11 @@ do_general_protection(struct pt_regs *regs, long error_code) prev_state = exception_enter(); conditional_sti(regs); -#ifdef CONFIG_X86_32 - if (regs->flags & X86_VM_MASK) { + if (v8086_mode(regs)) { local_irq_enable(); handle_vm86_fault((struct kernel_vm86_regs *) regs, error_code); goto exit; } -#endif tsk = current; if (!user_mode(regs)) { @@ -587,7 +584,7 @@ struct bad_iret_stack *fixup_bad_iret(struct bad_iret_stack *s) /* Copy the remainder of the stack from the current stack. */ memmove(new_stack, s, offsetof(struct bad_iret_stack, regs.ip)); - BUG_ON(!user_mode_vm(&new_stack->regs)); + BUG_ON(!user_mode(&new_stack->regs)); return new_stack; } NOKPROBE_SYMBOL(fixup_bad_iret); @@ -673,7 +670,7 @@ dotraplinkage void do_debug(struct pt_regs *regs, long error_code) /* It's safe to allow irq's after DR6 has been saved */ preempt_conditional_sti(regs); - if (regs->flags & X86_VM_MASK) { + if (v8086_mode(regs)) { handle_vm86_trap((struct kernel_vm86_regs *) regs, error_code, X86_TRAP_DB); preempt_conditional_cli(regs); @@ -721,7 +718,7 @@ static void math_error(struct pt_regs *regs, int error_code, int trapnr) return; conditional_sti(regs); - if (!user_mode_vm(regs)) + if (!user_mode(regs)) { if (!fixup_exception(regs)) { task->thread.error_code = error_code; @@ -925,9 +922,21 @@ dotraplinkage void do_iret_error(struct pt_regs *regs, long error_code) /* Set of traps needed for early debugging. */ void __init early_trap_init(void) { - set_intr_gate_ist(X86_TRAP_DB, &debug, DEBUG_STACK); + /* + * Don't use IST to set DEBUG_STACK as it doesn't work until TSS + * is ready in cpu_init() <-- trap_init(). Before trap_init(), + * CPU runs at ring 0 so it is impossible to hit an invalid + * stack. Using the original stack works well enough at this + * early stage. DEBUG_STACK will be equipped after cpu_init() in + * trap_init(). + * + * We don't need to set trace_idt_table like set_intr_gate(), + * since we don't have trace_debug and it will be reset to + * 'debug' in trap_init() by set_intr_gate_ist(). + */ + set_intr_gate_notrace(X86_TRAP_DB, debug); /* int3 can be called from all */ - set_system_intr_gate_ist(X86_TRAP_BP, &int3, DEBUG_STACK); + set_system_intr_gate(X86_TRAP_BP, &int3); #ifdef CONFIG_X86_32 set_intr_gate(X86_TRAP_PF, page_fault); #endif @@ -1005,6 +1014,15 @@ void __init trap_init(void) */ cpu_init(); + /* + * X86_TRAP_DB and X86_TRAP_BP have been set + * in early_trap_init(). However, ITS works only after + * cpu_init() loads TSS. See comments in early_trap_init(). + */ + set_intr_gate_ist(X86_TRAP_DB, &debug, DEBUG_STACK); + /* int3 can be called from all */ + set_system_intr_gate_ist(X86_TRAP_BP, &int3, DEBUG_STACK); + x86_init.irqs.trap_init(); #ifdef CONFIG_X86_64 diff --git a/arch/x86/kernel/uprobes.c b/arch/x86/kernel/uprobes.c index 81f8adb0679e..0b81ad67da07 100644 --- a/arch/x86/kernel/uprobes.c +++ b/arch/x86/kernel/uprobes.c @@ -912,7 +912,7 @@ int arch_uprobe_exception_notify(struct notifier_block *self, unsigned long val, int ret = NOTIFY_DONE; /* We are only interested in userspace traps */ - if (regs && !user_mode_vm(regs)) + if (regs && !user_mode(regs)) return NOTIFY_DONE; switch (val) { diff --git a/arch/x86/kernel/vm86_32.c b/arch/x86/kernel/vm86_32.c index e8edcf52e069..fc9db6ef2a95 100644 --- a/arch/x86/kernel/vm86_32.c +++ b/arch/x86/kernel/vm86_32.c @@ -150,7 +150,7 @@ struct pt_regs *save_v86_state(struct kernel_vm86_regs *regs) do_exit(SIGSEGV); } - tss = &per_cpu(init_tss, get_cpu()); + tss = &per_cpu(cpu_tss, get_cpu()); current->thread.sp0 = current->thread.saved_sp0; current->thread.sysenter_cs = __KERNEL_CS; load_sp0(tss, ¤t->thread); @@ -318,7 +318,7 @@ static void do_sys_vm86(struct kernel_vm86_struct *info, struct task_struct *tsk tsk->thread.saved_fs = info->regs32->fs; tsk->thread.saved_gs = get_user_gs(info->regs32); - tss = &per_cpu(init_tss, get_cpu()); + tss = &per_cpu(cpu_tss, get_cpu()); tsk->thread.sp0 = (unsigned long) &info->VM86_TSS_ESP0; if (cpu_has_sep) tsk->thread.sysenter_cs = 0; diff --git a/arch/x86/kernel/vsyscall_gtod.c b/arch/x86/kernel/vsyscall_gtod.c index c7d791f32b98..51e330416995 100644 --- a/arch/x86/kernel/vsyscall_gtod.c +++ b/arch/x86/kernel/vsyscall_gtod.c @@ -31,30 +31,30 @@ void update_vsyscall(struct timekeeper *tk) gtod_write_begin(vdata); /* copy vsyscall data */ - vdata->vclock_mode = tk->tkr.clock->archdata.vclock_mode; - vdata->cycle_last = tk->tkr.cycle_last; - vdata->mask = tk->tkr.mask; - vdata->mult = tk->tkr.mult; - vdata->shift = tk->tkr.shift; + vdata->vclock_mode = tk->tkr_mono.clock->archdata.vclock_mode; + vdata->cycle_last = tk->tkr_mono.cycle_last; + vdata->mask = tk->tkr_mono.mask; + vdata->mult = tk->tkr_mono.mult; + vdata->shift = tk->tkr_mono.shift; vdata->wall_time_sec = tk->xtime_sec; - vdata->wall_time_snsec = tk->tkr.xtime_nsec; + vdata->wall_time_snsec = tk->tkr_mono.xtime_nsec; vdata->monotonic_time_sec = tk->xtime_sec + tk->wall_to_monotonic.tv_sec; - vdata->monotonic_time_snsec = tk->tkr.xtime_nsec + vdata->monotonic_time_snsec = tk->tkr_mono.xtime_nsec + ((u64)tk->wall_to_monotonic.tv_nsec - << tk->tkr.shift); + << tk->tkr_mono.shift); while (vdata->monotonic_time_snsec >= - (((u64)NSEC_PER_SEC) << tk->tkr.shift)) { + (((u64)NSEC_PER_SEC) << tk->tkr_mono.shift)) { vdata->monotonic_time_snsec -= - ((u64)NSEC_PER_SEC) << tk->tkr.shift; + ((u64)NSEC_PER_SEC) << tk->tkr_mono.shift; vdata->monotonic_time_sec++; } vdata->wall_time_coarse_sec = tk->xtime_sec; - vdata->wall_time_coarse_nsec = (long)(tk->tkr.xtime_nsec >> - tk->tkr.shift); + vdata->wall_time_coarse_nsec = (long)(tk->tkr_mono.xtime_nsec >> + tk->tkr_mono.shift); vdata->monotonic_time_coarse_sec = vdata->wall_time_coarse_sec + tk->wall_to_monotonic.tv_sec; diff --git a/arch/x86/kernel/xsave.c b/arch/x86/kernel/xsave.c index 34f66e58a896..cdc6cf903078 100644 --- a/arch/x86/kernel/xsave.c +++ b/arch/x86/kernel/xsave.c @@ -379,7 +379,7 @@ int __restore_xstate_sig(void __user *buf, void __user *buf_fx, int size) * thread's fpu state, reconstruct fxstate from the fsave * header. Sanitize the copied state etc. */ - struct xsave_struct *xsave = &tsk->thread.fpu.state->xsave; + struct fpu *fpu = &tsk->thread.fpu; struct user_i387_ia32_struct env; int err = 0; @@ -393,14 +393,15 @@ int __restore_xstate_sig(void __user *buf, void __user *buf_fx, int size) */ drop_fpu(tsk); - if (__copy_from_user(xsave, buf_fx, state_size) || + if (__copy_from_user(&fpu->state->xsave, buf_fx, state_size) || __copy_from_user(&env, buf, sizeof(env))) { + fpu_finit(fpu); err = -1; } else { sanitize_restored_xstate(tsk, &env, xstate_bv, fx_only); - set_used_math(); } + set_used_math(); if (use_eager_fpu()) { preempt_disable(); math_state_restore(); diff --git a/arch/x86/kvm/Makefile b/arch/x86/kvm/Makefile index 08f790dfadc9..16e8f962eaad 100644 --- a/arch/x86/kvm/Makefile +++ b/arch/x86/kvm/Makefile @@ -1,5 +1,5 @@ -ccflags-y += -Ivirt/kvm -Iarch/x86/kvm +ccflags-y += -Iarch/x86/kvm CFLAGS_x86.o := -I. CFLAGS_svm.o := -I. diff --git a/arch/x86/kvm/cpuid.c b/arch/x86/kvm/cpuid.c index 8a80737ee6e6..59b69f6a2844 100644 --- a/arch/x86/kvm/cpuid.c +++ b/arch/x86/kvm/cpuid.c @@ -104,6 +104,9 @@ int kvm_update_cpuid(struct kvm_vcpu *vcpu) ((best->eax & 0xff00) >> 8) != 0) return -EINVAL; + /* Update physical-address width */ + vcpu->arch.maxphyaddr = cpuid_query_maxphyaddr(vcpu); + kvm_pmu_cpuid_update(vcpu); return 0; } @@ -135,6 +138,21 @@ static void cpuid_fix_nx_cap(struct kvm_vcpu *vcpu) } } +int cpuid_query_maxphyaddr(struct kvm_vcpu *vcpu) +{ + struct kvm_cpuid_entry2 *best; + + best = kvm_find_cpuid_entry(vcpu, 0x80000000, 0); + if (!best || best->eax < 0x80000008) + goto not_found; + best = kvm_find_cpuid_entry(vcpu, 0x80000008, 0); + if (best) + return best->eax & 0xff; +not_found: + return 36; +} +EXPORT_SYMBOL_GPL(cpuid_query_maxphyaddr); + /* when an old userspace process fills a new kernel module */ int kvm_vcpu_ioctl_set_cpuid(struct kvm_vcpu *vcpu, struct kvm_cpuid *cpuid, @@ -757,21 +775,6 @@ struct kvm_cpuid_entry2 *kvm_find_cpuid_entry(struct kvm_vcpu *vcpu, } EXPORT_SYMBOL_GPL(kvm_find_cpuid_entry); -int cpuid_maxphyaddr(struct kvm_vcpu *vcpu) -{ - struct kvm_cpuid_entry2 *best; - - best = kvm_find_cpuid_entry(vcpu, 0x80000000, 0); - if (!best || best->eax < 0x80000008) - goto not_found; - best = kvm_find_cpuid_entry(vcpu, 0x80000008, 0); - if (best) - return best->eax & 0xff; -not_found: - return 36; -} -EXPORT_SYMBOL_GPL(cpuid_maxphyaddr); - /* * If no match is found, check whether we exceed the vCPU's limit * and return the content of the highest valid _standard_ leaf instead. diff --git a/arch/x86/kvm/cpuid.h b/arch/x86/kvm/cpuid.h index 4452eedfaedd..c3b1ad9fca81 100644 --- a/arch/x86/kvm/cpuid.h +++ b/arch/x86/kvm/cpuid.h @@ -20,13 +20,19 @@ int kvm_vcpu_ioctl_get_cpuid2(struct kvm_vcpu *vcpu, struct kvm_cpuid_entry2 __user *entries); void kvm_cpuid(struct kvm_vcpu *vcpu, u32 *eax, u32 *ebx, u32 *ecx, u32 *edx); +int cpuid_query_maxphyaddr(struct kvm_vcpu *vcpu); + +static inline int cpuid_maxphyaddr(struct kvm_vcpu *vcpu) +{ + return vcpu->arch.maxphyaddr; +} static inline bool guest_cpuid_has_xsave(struct kvm_vcpu *vcpu) { struct kvm_cpuid_entry2 *best; if (!static_cpu_has(X86_FEATURE_XSAVE)) - return 0; + return false; best = kvm_find_cpuid_entry(vcpu, 1, 0); return best && (best->ecx & bit(X86_FEATURE_XSAVE)); diff --git a/arch/x86/kvm/emulate.c b/arch/x86/kvm/emulate.c index e0b794a84c35..630bcb0d7a04 100644 --- a/arch/x86/kvm/emulate.c +++ b/arch/x86/kvm/emulate.c @@ -248,27 +248,7 @@ struct mode_dual { struct opcode mode64; }; -/* EFLAGS bit definitions. */ -#define EFLG_ID (1<<21) -#define EFLG_VIP (1<<20) -#define EFLG_VIF (1<<19) -#define EFLG_AC (1<<18) -#define EFLG_VM (1<<17) -#define EFLG_RF (1<<16) -#define EFLG_IOPL (3<<12) -#define EFLG_NT (1<<14) -#define EFLG_OF (1<<11) -#define EFLG_DF (1<<10) -#define EFLG_IF (1<<9) -#define EFLG_TF (1<<8) -#define EFLG_SF (1<<7) -#define EFLG_ZF (1<<6) -#define EFLG_AF (1<<4) -#define EFLG_PF (1<<2) -#define EFLG_CF (1<<0) - #define EFLG_RESERVED_ZEROS_MASK 0xffc0802a -#define EFLG_RESERVED_ONE_MASK 2 enum x86_transfer_type { X86_TRANSFER_NONE, @@ -317,7 +297,8 @@ static void invalidate_registers(struct x86_emulate_ctxt *ctxt) * These EFLAGS bits are restored from saved value during emulation, and * any changes are written back to the saved value after emulation. */ -#define EFLAGS_MASK (EFLG_OF|EFLG_SF|EFLG_ZF|EFLG_AF|EFLG_PF|EFLG_CF) +#define EFLAGS_MASK (X86_EFLAGS_OF|X86_EFLAGS_SF|X86_EFLAGS_ZF|X86_EFLAGS_AF|\ + X86_EFLAGS_PF|X86_EFLAGS_CF) #ifdef CONFIG_X86_64 #define ON64(x) x @@ -478,6 +459,25 @@ static void assign_masked(ulong *dest, ulong src, ulong mask) *dest = (*dest & ~mask) | (src & mask); } +static void assign_register(unsigned long *reg, u64 val, int bytes) +{ + /* The 4-byte case *is* correct: in 64-bit mode we zero-extend. */ + switch (bytes) { + case 1: + *(u8 *)reg = (u8)val; + break; + case 2: + *(u16 *)reg = (u16)val; + break; + case 4: + *reg = (u32)val; + break; /* 64b: zero-extend */ + case 8: + *reg = val; + break; + } +} + static inline unsigned long ad_mask(struct x86_emulate_ctxt *ctxt) { return (1UL << (ctxt->ad_bytes << 3)) - 1; @@ -943,6 +943,22 @@ FASTOP2(xadd); FASTOP2R(cmp, cmp_r); +static int em_bsf_c(struct x86_emulate_ctxt *ctxt) +{ + /* If src is zero, do not writeback, but update flags */ + if (ctxt->src.val == 0) + ctxt->dst.type = OP_NONE; + return fastop(ctxt, em_bsf); +} + +static int em_bsr_c(struct x86_emulate_ctxt *ctxt) +{ + /* If src is zero, do not writeback, but update flags */ + if (ctxt->src.val == 0) + ctxt->dst.type = OP_NONE; + return fastop(ctxt, em_bsr); +} + static u8 test_cc(unsigned int condition, unsigned long flags) { u8 rc; @@ -1399,7 +1415,7 @@ static int pio_in_emulated(struct x86_emulate_ctxt *ctxt, unsigned int in_page, n; unsigned int count = ctxt->rep_prefix ? address_mask(ctxt, reg_read(ctxt, VCPU_REGS_RCX)) : 1; - in_page = (ctxt->eflags & EFLG_DF) ? + in_page = (ctxt->eflags & X86_EFLAGS_DF) ? offset_in_page(reg_read(ctxt, VCPU_REGS_RDI)) : PAGE_SIZE - offset_in_page(reg_read(ctxt, VCPU_REGS_RDI)); n = min3(in_page, (unsigned int)sizeof(rc->data) / size, count); @@ -1412,7 +1428,7 @@ static int pio_in_emulated(struct x86_emulate_ctxt *ctxt, } if (ctxt->rep_prefix && (ctxt->d & String) && - !(ctxt->eflags & EFLG_DF)) { + !(ctxt->eflags & X86_EFLAGS_DF)) { ctxt->dst.data = rc->data + rc->pos; ctxt->dst.type = OP_MEM_STR; ctxt->dst.count = (rc->end - rc->pos) / size; @@ -1691,21 +1707,7 @@ static int load_segment_descriptor(struct x86_emulate_ctxt *ctxt, static void write_register_operand(struct operand *op) { - /* The 4-byte case *is* correct: in 64-bit mode we zero-extend. */ - switch (op->bytes) { - case 1: - *(u8 *)op->addr.reg = (u8)op->val; - break; - case 2: - *(u16 *)op->addr.reg = (u16)op->val; - break; - case 4: - *op->addr.reg = (u32)op->val; - break; /* 64b: zero-extend */ - case 8: - *op->addr.reg = op->val; - break; - } + return assign_register(op->addr.reg, op->val, op->bytes); } static int writeback(struct x86_emulate_ctxt *ctxt, struct operand *op) @@ -1792,32 +1794,34 @@ static int emulate_popf(struct x86_emulate_ctxt *ctxt, { int rc; unsigned long val, change_mask; - int iopl = (ctxt->eflags & X86_EFLAGS_IOPL) >> IOPL_SHIFT; + int iopl = (ctxt->eflags & X86_EFLAGS_IOPL) >> X86_EFLAGS_IOPL_BIT; int cpl = ctxt->ops->cpl(ctxt); rc = emulate_pop(ctxt, &val, len); if (rc != X86EMUL_CONTINUE) return rc; - change_mask = EFLG_CF | EFLG_PF | EFLG_AF | EFLG_ZF | EFLG_SF | EFLG_OF - | EFLG_TF | EFLG_DF | EFLG_NT | EFLG_AC | EFLG_ID; + change_mask = X86_EFLAGS_CF | X86_EFLAGS_PF | X86_EFLAGS_AF | + X86_EFLAGS_ZF | X86_EFLAGS_SF | X86_EFLAGS_OF | + X86_EFLAGS_TF | X86_EFLAGS_DF | X86_EFLAGS_NT | + X86_EFLAGS_AC | X86_EFLAGS_ID; switch(ctxt->mode) { case X86EMUL_MODE_PROT64: case X86EMUL_MODE_PROT32: case X86EMUL_MODE_PROT16: if (cpl == 0) - change_mask |= EFLG_IOPL; + change_mask |= X86_EFLAGS_IOPL; if (cpl <= iopl) - change_mask |= EFLG_IF; + change_mask |= X86_EFLAGS_IF; break; case X86EMUL_MODE_VM86: if (iopl < 3) return emulate_gp(ctxt, 0); - change_mask |= EFLG_IF; + change_mask |= X86_EFLAGS_IF; break; default: /* real mode */ - change_mask |= (EFLG_IOPL | EFLG_IF); + change_mask |= (X86_EFLAGS_IOPL | X86_EFLAGS_IF); break; } @@ -1918,7 +1922,7 @@ static int em_pusha(struct x86_emulate_ctxt *ctxt) static int em_pushf(struct x86_emulate_ctxt *ctxt) { - ctxt->src.val = (unsigned long)ctxt->eflags & ~EFLG_VM; + ctxt->src.val = (unsigned long)ctxt->eflags & ~X86_EFLAGS_VM; return em_push(ctxt); } @@ -1926,6 +1930,7 @@ static int em_popa(struct x86_emulate_ctxt *ctxt) { int rc = X86EMUL_CONTINUE; int reg = VCPU_REGS_RDI; + u32 val; while (reg >= VCPU_REGS_RAX) { if (reg == VCPU_REGS_RSP) { @@ -1933,9 +1938,10 @@ static int em_popa(struct x86_emulate_ctxt *ctxt) --reg; } - rc = emulate_pop(ctxt, reg_rmw(ctxt, reg), ctxt->op_bytes); + rc = emulate_pop(ctxt, &val, ctxt->op_bytes); if (rc != X86EMUL_CONTINUE) break; + assign_register(reg_rmw(ctxt, reg), val, ctxt->op_bytes); --reg; } return rc; @@ -1956,7 +1962,7 @@ static int __emulate_int_real(struct x86_emulate_ctxt *ctxt, int irq) if (rc != X86EMUL_CONTINUE) return rc; - ctxt->eflags &= ~(EFLG_IF | EFLG_TF | EFLG_AC); + ctxt->eflags &= ~(X86_EFLAGS_IF | X86_EFLAGS_TF | X86_EFLAGS_AC); ctxt->src.val = get_segment_selector(ctxt, VCPU_SREG_CS); rc = em_push(ctxt); @@ -2022,10 +2028,14 @@ static int emulate_iret_real(struct x86_emulate_ctxt *ctxt) unsigned long temp_eip = 0; unsigned long temp_eflags = 0; unsigned long cs = 0; - unsigned long mask = EFLG_CF | EFLG_PF | EFLG_AF | EFLG_ZF | EFLG_SF | EFLG_TF | - EFLG_IF | EFLG_DF | EFLG_OF | EFLG_IOPL | EFLG_NT | EFLG_RF | - EFLG_AC | EFLG_ID | (1 << 1); /* Last one is the reserved bit */ - unsigned long vm86_mask = EFLG_VM | EFLG_VIF | EFLG_VIP; + unsigned long mask = X86_EFLAGS_CF | X86_EFLAGS_PF | X86_EFLAGS_AF | + X86_EFLAGS_ZF | X86_EFLAGS_SF | X86_EFLAGS_TF | + X86_EFLAGS_IF | X86_EFLAGS_DF | X86_EFLAGS_OF | + X86_EFLAGS_IOPL | X86_EFLAGS_NT | X86_EFLAGS_RF | + X86_EFLAGS_AC | X86_EFLAGS_ID | + X86_EFLAGS_FIXED; + unsigned long vm86_mask = X86_EFLAGS_VM | X86_EFLAGS_VIF | + X86_EFLAGS_VIP; /* TODO: Add stack limit check */ @@ -2054,7 +2064,6 @@ static int emulate_iret_real(struct x86_emulate_ctxt *ctxt) ctxt->_eip = temp_eip; - if (ctxt->op_bytes == 4) ctxt->eflags = ((temp_eflags & mask) | (ctxt->eflags & vm86_mask)); else if (ctxt->op_bytes == 2) { @@ -2063,7 +2072,7 @@ static int emulate_iret_real(struct x86_emulate_ctxt *ctxt) } ctxt->eflags &= ~EFLG_RESERVED_ZEROS_MASK; /* Clear reserved zeros */ - ctxt->eflags |= EFLG_RESERVED_ONE_MASK; + ctxt->eflags |= X86_EFLAGS_FIXED; ctxt->ops->set_nmi_mask(ctxt, false); return rc; @@ -2145,12 +2154,12 @@ static int em_cmpxchg8b(struct x86_emulate_ctxt *ctxt) ((u32) (old >> 32) != (u32) reg_read(ctxt, VCPU_REGS_RDX))) { *reg_write(ctxt, VCPU_REGS_RAX) = (u32) (old >> 0); *reg_write(ctxt, VCPU_REGS_RDX) = (u32) (old >> 32); - ctxt->eflags &= ~EFLG_ZF; + ctxt->eflags &= ~X86_EFLAGS_ZF; } else { ctxt->dst.val64 = ((u64)reg_read(ctxt, VCPU_REGS_RCX) << 32) | (u32) reg_read(ctxt, VCPU_REGS_RBX); - ctxt->eflags |= EFLG_ZF; + ctxt->eflags |= X86_EFLAGS_ZF; } return X86EMUL_CONTINUE; } @@ -2222,7 +2231,7 @@ static int em_cmpxchg(struct x86_emulate_ctxt *ctxt) ctxt->src.val = ctxt->dst.orig_val; fastop(ctxt, em_cmp); - if (ctxt->eflags & EFLG_ZF) { + if (ctxt->eflags & X86_EFLAGS_ZF) { /* Success: write back to memory; no update of EAX */ ctxt->src.type = OP_NONE; ctxt->dst.val = ctxt->src.orig_val; @@ -2381,14 +2390,14 @@ static int em_syscall(struct x86_emulate_ctxt *ctxt) ops->get_msr(ctxt, MSR_SYSCALL_MASK, &msr_data); ctxt->eflags &= ~msr_data; - ctxt->eflags |= EFLG_RESERVED_ONE_MASK; + ctxt->eflags |= X86_EFLAGS_FIXED; #endif } else { /* legacy mode */ ops->get_msr(ctxt, MSR_STAR, &msr_data); ctxt->_eip = (u32)msr_data; - ctxt->eflags &= ~(EFLG_VM | EFLG_IF); + ctxt->eflags &= ~(X86_EFLAGS_VM | X86_EFLAGS_IF); } return X86EMUL_CONTINUE; @@ -2425,8 +2434,8 @@ static int em_sysenter(struct x86_emulate_ctxt *ctxt) if ((msr_data & 0xfffc) == 0x0) return emulate_gp(ctxt, 0); - ctxt->eflags &= ~(EFLG_VM | EFLG_IF); - cs_sel = (u16)msr_data & ~SELECTOR_RPL_MASK; + ctxt->eflags &= ~(X86_EFLAGS_VM | X86_EFLAGS_IF); + cs_sel = (u16)msr_data & ~SEGMENT_RPL_MASK; ss_sel = cs_sel + 8; if (efer & EFER_LMA) { cs.d = 0; @@ -2493,8 +2502,8 @@ static int em_sysexit(struct x86_emulate_ctxt *ctxt) return emulate_gp(ctxt, 0); break; } - cs_sel |= SELECTOR_RPL_MASK; - ss_sel |= SELECTOR_RPL_MASK; + cs_sel |= SEGMENT_RPL_MASK; + ss_sel |= SEGMENT_RPL_MASK; ops->set_segment(ctxt, cs_sel, &cs, 0, VCPU_SREG_CS); ops->set_segment(ctxt, ss_sel, &ss, 0, VCPU_SREG_SS); @@ -2512,7 +2521,7 @@ static bool emulator_bad_iopl(struct x86_emulate_ctxt *ctxt) return false; if (ctxt->mode == X86EMUL_MODE_VM86) return true; - iopl = (ctxt->eflags & X86_EFLAGS_IOPL) >> IOPL_SHIFT; + iopl = (ctxt->eflags & X86_EFLAGS_IOPL) >> X86_EFLAGS_IOPL_BIT; return ctxt->ops->cpl(ctxt) > iopl; } @@ -2782,10 +2791,8 @@ static int load_state_from_tss32(struct x86_emulate_ctxt *ctxt, return ret; ret = __load_segment_descriptor(ctxt, tss->gs, VCPU_SREG_GS, cpl, X86_TRANSFER_TASK_SWITCH, NULL); - if (ret != X86EMUL_CONTINUE) - return ret; - return X86EMUL_CONTINUE; + return ret; } static int task_switch_32(struct x86_emulate_ctxt *ctxt, @@ -2954,7 +2961,7 @@ int emulator_task_switch(struct x86_emulate_ctxt *ctxt, static void string_addr_inc(struct x86_emulate_ctxt *ctxt, int reg, struct operand *op) { - int df = (ctxt->eflags & EFLG_DF) ? -op->count : op->count; + int df = (ctxt->eflags & X86_EFLAGS_DF) ? -op->count : op->count; register_address_increment(ctxt, reg, df * op->bytes); op->addr.mem.ea = register_address(ctxt, reg); @@ -3323,7 +3330,7 @@ static int em_clts(struct x86_emulate_ctxt *ctxt) return X86EMUL_CONTINUE; } -static int em_vmcall(struct x86_emulate_ctxt *ctxt) +static int em_hypercall(struct x86_emulate_ctxt *ctxt) { int rc = ctxt->ops->fix_hypercall(ctxt); @@ -3395,17 +3402,6 @@ static int em_lgdt(struct x86_emulate_ctxt *ctxt) return em_lgdt_lidt(ctxt, true); } -static int em_vmmcall(struct x86_emulate_ctxt *ctxt) -{ - int rc; - - rc = ctxt->ops->fix_hypercall(ctxt); - - /* Disable writeback. */ - ctxt->dst.type = OP_NONE; - return rc; -} - static int em_lidt(struct x86_emulate_ctxt *ctxt) { return em_lgdt_lidt(ctxt, false); @@ -3504,7 +3500,8 @@ static int em_sahf(struct x86_emulate_ctxt *ctxt) { u32 flags; - flags = EFLG_CF | EFLG_PF | EFLG_AF | EFLG_ZF | EFLG_SF; + flags = X86_EFLAGS_CF | X86_EFLAGS_PF | X86_EFLAGS_AF | X86_EFLAGS_ZF | + X86_EFLAGS_SF; flags &= *reg_rmw(ctxt, VCPU_REGS_RAX) >> 8; ctxt->eflags &= ~0xffUL; @@ -3769,7 +3766,7 @@ static int check_perm_out(struct x86_emulate_ctxt *ctxt) static const struct opcode group7_rm0[] = { N, - I(SrcNone | Priv | EmulateOnUD, em_vmcall), + I(SrcNone | Priv | EmulateOnUD, em_hypercall), N, N, N, N, N, N, }; @@ -3781,7 +3778,7 @@ static const struct opcode group7_rm1[] = { static const struct opcode group7_rm3[] = { DIP(SrcNone | Prot | Priv, vmrun, check_svme_pa), - II(SrcNone | Prot | EmulateOnUD, em_vmmcall, vmmcall), + II(SrcNone | Prot | EmulateOnUD, em_hypercall, vmmcall), DIP(SrcNone | Prot | Priv, vmload, check_svme_pa), DIP(SrcNone | Prot | Priv, vmsave, check_svme_pa), DIP(SrcNone | Prot | Priv, stgi, check_svme), @@ -4192,7 +4189,8 @@ static const struct opcode twobyte_table[256] = { N, N, G(BitOp, group8), F(DstMem | SrcReg | ModRM | BitOp | Lock | PageTable, em_btc), - F(DstReg | SrcMem | ModRM, em_bsf), F(DstReg | SrcMem | ModRM, em_bsr), + I(DstReg | SrcMem | ModRM, em_bsf_c), + I(DstReg | SrcMem | ModRM, em_bsr_c), D(DstReg | SrcMem8 | ModRM | Mov), D(DstReg | SrcMem16 | ModRM | Mov), /* 0xC0 - 0xC7 */ F2bv(DstMem | SrcReg | ModRM | SrcWrite | Lock, em_xadd), @@ -4759,9 +4757,9 @@ static bool string_insn_completed(struct x86_emulate_ctxt *ctxt) if (((ctxt->b == 0xa6) || (ctxt->b == 0xa7) || (ctxt->b == 0xae) || (ctxt->b == 0xaf)) && (((ctxt->rep_prefix == REPE_PREFIX) && - ((ctxt->eflags & EFLG_ZF) == 0)) + ((ctxt->eflags & X86_EFLAGS_ZF) == 0)) || ((ctxt->rep_prefix == REPNE_PREFIX) && - ((ctxt->eflags & EFLG_ZF) == EFLG_ZF)))) + ((ctxt->eflags & X86_EFLAGS_ZF) == X86_EFLAGS_ZF)))) return true; return false; @@ -4913,7 +4911,7 @@ int x86_emulate_insn(struct x86_emulate_ctxt *ctxt) /* All REP prefixes have the same first termination condition */ if (address_mask(ctxt, reg_read(ctxt, VCPU_REGS_RCX)) == 0) { ctxt->eip = ctxt->_eip; - ctxt->eflags &= ~EFLG_RF; + ctxt->eflags &= ~X86_EFLAGS_RF; goto done; } } @@ -4950,7 +4948,8 @@ int x86_emulate_insn(struct x86_emulate_ctxt *ctxt) goto done; } } - ctxt->dst.orig_val = ctxt->dst.val; + /* Copy full 64-bit value for CMPXCHG8B. */ + ctxt->dst.orig_val64 = ctxt->dst.val64; special_insn: @@ -4962,9 +4961,9 @@ special_insn: } if (ctxt->rep_prefix && (ctxt->d & String)) - ctxt->eflags |= EFLG_RF; + ctxt->eflags |= X86_EFLAGS_RF; else - ctxt->eflags &= ~EFLG_RF; + ctxt->eflags &= ~X86_EFLAGS_RF; if (ctxt->execute) { if (ctxt->d & Fastop) { @@ -5013,7 +5012,7 @@ special_insn: rc = emulate_int(ctxt, ctxt->src.val); break; case 0xce: /* into */ - if (ctxt->eflags & EFLG_OF) + if (ctxt->eflags & X86_EFLAGS_OF) rc = emulate_int(ctxt, 4); break; case 0xe9: /* jmp rel */ @@ -5026,19 +5025,19 @@ special_insn: break; case 0xf5: /* cmc */ /* complement carry flag from eflags reg */ - ctxt->eflags ^= EFLG_CF; + ctxt->eflags ^= X86_EFLAGS_CF; break; case 0xf8: /* clc */ - ctxt->eflags &= ~EFLG_CF; + ctxt->eflags &= ~X86_EFLAGS_CF; break; case 0xf9: /* stc */ - ctxt->eflags |= EFLG_CF; + ctxt->eflags |= X86_EFLAGS_CF; break; case 0xfc: /* cld */ - ctxt->eflags &= ~EFLG_DF; + ctxt->eflags &= ~X86_EFLAGS_DF; break; case 0xfd: /* std */ - ctxt->eflags |= EFLG_DF; + ctxt->eflags |= X86_EFLAGS_DF; break; default: goto cannot_emulate; @@ -5099,7 +5098,7 @@ writeback: } goto done; /* skip rip writeback */ } - ctxt->eflags &= ~EFLG_RF; + ctxt->eflags &= ~X86_EFLAGS_RF; } ctxt->eip = ctxt->_eip; @@ -5136,8 +5135,7 @@ twobyte_insn: case 0x40 ... 0x4f: /* cmov */ if (test_cc(ctxt->b, ctxt->eflags)) ctxt->dst.val = ctxt->src.val; - else if (ctxt->mode != X86EMUL_MODE_PROT64 || - ctxt->op_bytes != 4) + else if (ctxt->op_bytes != 4) ctxt->dst.type = OP_NONE; /* no writeback */ break; case 0x80 ... 0x8f: /* jnz rel, etc*/ diff --git a/arch/x86/kvm/i8254.c b/arch/x86/kvm/i8254.c index 298781d4cfb4..4dce6f8b6129 100644 --- a/arch/x86/kvm/i8254.c +++ b/arch/x86/kvm/i8254.c @@ -443,7 +443,8 @@ static inline int pit_in_range(gpa_t addr) (addr < KVM_PIT_BASE_ADDRESS + KVM_PIT_MEM_LENGTH)); } -static int pit_ioport_write(struct kvm_io_device *this, +static int pit_ioport_write(struct kvm_vcpu *vcpu, + struct kvm_io_device *this, gpa_t addr, int len, const void *data) { struct kvm_pit *pit = dev_to_pit(this); @@ -519,7 +520,8 @@ static int pit_ioport_write(struct kvm_io_device *this, return 0; } -static int pit_ioport_read(struct kvm_io_device *this, +static int pit_ioport_read(struct kvm_vcpu *vcpu, + struct kvm_io_device *this, gpa_t addr, int len, void *data) { struct kvm_pit *pit = dev_to_pit(this); @@ -589,7 +591,8 @@ static int pit_ioport_read(struct kvm_io_device *this, return 0; } -static int speaker_ioport_write(struct kvm_io_device *this, +static int speaker_ioport_write(struct kvm_vcpu *vcpu, + struct kvm_io_device *this, gpa_t addr, int len, const void *data) { struct kvm_pit *pit = speaker_to_pit(this); @@ -606,8 +609,9 @@ static int speaker_ioport_write(struct kvm_io_device *this, return 0; } -static int speaker_ioport_read(struct kvm_io_device *this, - gpa_t addr, int len, void *data) +static int speaker_ioport_read(struct kvm_vcpu *vcpu, + struct kvm_io_device *this, + gpa_t addr, int len, void *data) { struct kvm_pit *pit = speaker_to_pit(this); struct kvm_kpit_state *pit_state = &pit->pit_state; diff --git a/arch/x86/kvm/i8254.h b/arch/x86/kvm/i8254.h index dd1b16b611b0..c84990b42b5b 100644 --- a/arch/x86/kvm/i8254.h +++ b/arch/x86/kvm/i8254.h @@ -3,7 +3,7 @@ #include <linux/kthread.h> -#include "iodev.h" +#include <kvm/iodev.h> struct kvm_kpit_channel_state { u32 count; /* can be 65536 */ diff --git a/arch/x86/kvm/i8259.c b/arch/x86/kvm/i8259.c index cc31f7c06d3d..fef922ff2635 100644 --- a/arch/x86/kvm/i8259.c +++ b/arch/x86/kvm/i8259.c @@ -507,6 +507,7 @@ static int picdev_read(struct kvm_pic *s, return -EOPNOTSUPP; if (len != 1) { + memset(val, 0, len); pr_pic_unimpl("non byte read\n"); return 0; } @@ -528,42 +529,42 @@ static int picdev_read(struct kvm_pic *s, return 0; } -static int picdev_master_write(struct kvm_io_device *dev, +static int picdev_master_write(struct kvm_vcpu *vcpu, struct kvm_io_device *dev, gpa_t addr, int len, const void *val) { return picdev_write(container_of(dev, struct kvm_pic, dev_master), addr, len, val); } -static int picdev_master_read(struct kvm_io_device *dev, +static int picdev_master_read(struct kvm_vcpu *vcpu, struct kvm_io_device *dev, gpa_t addr, int len, void *val) { return picdev_read(container_of(dev, struct kvm_pic, dev_master), addr, len, val); } -static int picdev_slave_write(struct kvm_io_device *dev, +static int picdev_slave_write(struct kvm_vcpu *vcpu, struct kvm_io_device *dev, gpa_t addr, int len, const void *val) { return picdev_write(container_of(dev, struct kvm_pic, dev_slave), addr, len, val); } -static int picdev_slave_read(struct kvm_io_device *dev, +static int picdev_slave_read(struct kvm_vcpu *vcpu, struct kvm_io_device *dev, gpa_t addr, int len, void *val) { return picdev_read(container_of(dev, struct kvm_pic, dev_slave), addr, len, val); } -static int picdev_eclr_write(struct kvm_io_device *dev, +static int picdev_eclr_write(struct kvm_vcpu *vcpu, struct kvm_io_device *dev, gpa_t addr, int len, const void *val) { return picdev_write(container_of(dev, struct kvm_pic, dev_eclr), addr, len, val); } -static int picdev_eclr_read(struct kvm_io_device *dev, +static int picdev_eclr_read(struct kvm_vcpu *vcpu, struct kvm_io_device *dev, gpa_t addr, int len, void *val) { return picdev_read(container_of(dev, struct kvm_pic, dev_eclr), diff --git a/arch/x86/kvm/ioapic.c b/arch/x86/kvm/ioapic.c index b1947e0f3e10..28146f03c514 100644 --- a/arch/x86/kvm/ioapic.c +++ b/arch/x86/kvm/ioapic.c @@ -206,6 +206,8 @@ static int ioapic_set_irq(struct kvm_ioapic *ioapic, unsigned int irq, old_irr = ioapic->irr; ioapic->irr |= mask; + if (edge) + ioapic->irr_delivered &= ~mask; if ((edge && old_irr == ioapic->irr) || (!edge && entry.fields.remote_irr)) { ret = 0; @@ -349,7 +351,7 @@ static int ioapic_service(struct kvm_ioapic *ioapic, int irq, bool line_status) irqe.shorthand = 0; if (irqe.trig_mode == IOAPIC_EDGE_TRIG) - ioapic->irr &= ~(1 << irq); + ioapic->irr_delivered |= 1 << irq; if (irq == RTC_GSI && line_status) { /* @@ -422,6 +424,7 @@ static void __kvm_ioapic_update_eoi(struct kvm_vcpu *vcpu, struct kvm_ioapic *ioapic, int vector, int trigger_mode) { int i; + struct kvm_lapic *apic = vcpu->arch.apic; for (i = 0; i < IOAPIC_NUM_PINS; i++) { union kvm_ioapic_redirect_entry *ent = &ioapic->redirtbl[i]; @@ -443,7 +446,8 @@ static void __kvm_ioapic_update_eoi(struct kvm_vcpu *vcpu, kvm_notify_acked_irq(ioapic->kvm, KVM_IRQCHIP_IOAPIC, i); spin_lock(&ioapic->lock); - if (trigger_mode != IOAPIC_LEVEL_TRIG) + if (trigger_mode != IOAPIC_LEVEL_TRIG || + kvm_apic_get_reg(apic, APIC_SPIV) & APIC_SPIV_DIRECTED_EOI) continue; ASSERT(ent->fields.trig_mode == IOAPIC_LEVEL_TRIG); @@ -471,13 +475,6 @@ static void __kvm_ioapic_update_eoi(struct kvm_vcpu *vcpu, } } -bool kvm_ioapic_handles_vector(struct kvm *kvm, int vector) -{ - struct kvm_ioapic *ioapic = kvm->arch.vioapic; - smp_rmb(); - return test_bit(vector, ioapic->handled_vectors); -} - void kvm_ioapic_update_eoi(struct kvm_vcpu *vcpu, int vector, int trigger_mode) { struct kvm_ioapic *ioapic = vcpu->kvm->arch.vioapic; @@ -498,8 +495,8 @@ static inline int ioapic_in_range(struct kvm_ioapic *ioapic, gpa_t addr) (addr < ioapic->base_address + IOAPIC_MEM_LENGTH))); } -static int ioapic_mmio_read(struct kvm_io_device *this, gpa_t addr, int len, - void *val) +static int ioapic_mmio_read(struct kvm_vcpu *vcpu, struct kvm_io_device *this, + gpa_t addr, int len, void *val) { struct kvm_ioapic *ioapic = to_ioapic(this); u32 result; @@ -541,8 +538,8 @@ static int ioapic_mmio_read(struct kvm_io_device *this, gpa_t addr, int len, return 0; } -static int ioapic_mmio_write(struct kvm_io_device *this, gpa_t addr, int len, - const void *val) +static int ioapic_mmio_write(struct kvm_vcpu *vcpu, struct kvm_io_device *this, + gpa_t addr, int len, const void *val) { struct kvm_ioapic *ioapic = to_ioapic(this); u32 data; @@ -597,6 +594,7 @@ static void kvm_ioapic_reset(struct kvm_ioapic *ioapic) ioapic->base_address = IOAPIC_DEFAULT_BASE_ADDRESS; ioapic->ioregsel = 0; ioapic->irr = 0; + ioapic->irr_delivered = 0; ioapic->id = 0; memset(ioapic->irq_eoi, 0x00, IOAPIC_NUM_PINS); rtc_irq_eoi_tracking_reset(ioapic); @@ -654,6 +652,7 @@ int kvm_get_ioapic(struct kvm *kvm, struct kvm_ioapic_state *state) spin_lock(&ioapic->lock); memcpy(state, ioapic, sizeof(struct kvm_ioapic_state)); + state->irr &= ~ioapic->irr_delivered; spin_unlock(&ioapic->lock); return 0; } @@ -667,6 +666,7 @@ int kvm_set_ioapic(struct kvm *kvm, struct kvm_ioapic_state *state) spin_lock(&ioapic->lock); memcpy(ioapic, state, sizeof(struct kvm_ioapic_state)); ioapic->irr = 0; + ioapic->irr_delivered = 0; update_handled_vectors(ioapic); kvm_vcpu_request_scan_ioapic(kvm); kvm_ioapic_inject_all(ioapic, state->irr); diff --git a/arch/x86/kvm/ioapic.h b/arch/x86/kvm/ioapic.h index c2e36d934af4..ca0b0b4e6256 100644 --- a/arch/x86/kvm/ioapic.h +++ b/arch/x86/kvm/ioapic.h @@ -3,7 +3,7 @@ #include <linux/kvm_host.h> -#include "iodev.h" +#include <kvm/iodev.h> struct kvm; struct kvm_vcpu; @@ -77,6 +77,7 @@ struct kvm_ioapic { struct rtc_status rtc_status; struct delayed_work eoi_inject; u32 irq_eoi[IOAPIC_NUM_PINS]; + u32 irr_delivered; }; #ifdef DEBUG @@ -97,13 +98,19 @@ static inline struct kvm_ioapic *ioapic_irqchip(struct kvm *kvm) return kvm->arch.vioapic; } +static inline bool kvm_ioapic_handles_vector(struct kvm *kvm, int vector) +{ + struct kvm_ioapic *ioapic = kvm->arch.vioapic; + smp_rmb(); + return test_bit(vector, ioapic->handled_vectors); +} + void kvm_rtc_eoi_tracking_restore_one(struct kvm_vcpu *vcpu); bool kvm_apic_match_dest(struct kvm_vcpu *vcpu, struct kvm_lapic *source, int short_hand, unsigned int dest, int dest_mode); int kvm_apic_compare_prio(struct kvm_vcpu *vcpu1, struct kvm_vcpu *vcpu2); void kvm_ioapic_update_eoi(struct kvm_vcpu *vcpu, int vector, int trigger_mode); -bool kvm_ioapic_handles_vector(struct kvm *kvm, int vector); int kvm_ioapic_init(struct kvm *kvm); void kvm_ioapic_destroy(struct kvm *kvm); int kvm_ioapic_set_irq(struct kvm_ioapic *ioapic, int irq, int irq_source_id, diff --git a/arch/x86/kvm/irq.h b/arch/x86/kvm/irq.h index 2d03568e9498..ad68c73008c5 100644 --- a/arch/x86/kvm/irq.h +++ b/arch/x86/kvm/irq.h @@ -27,7 +27,7 @@ #include <linux/kvm_host.h> #include <linux/spinlock.h> -#include "iodev.h" +#include <kvm/iodev.h> #include "ioapic.h" #include "lapic.h" diff --git a/arch/x86/kvm/lapic.c b/arch/x86/kvm/lapic.c index e55b5fc344eb..d67206a7b99a 100644 --- a/arch/x86/kvm/lapic.c +++ b/arch/x86/kvm/lapic.c @@ -133,6 +133,28 @@ static inline int kvm_apic_id(struct kvm_lapic *apic) return (kvm_apic_get_reg(apic, APIC_ID) >> 24) & 0xff; } +/* The logical map is definitely wrong if we have multiple + * modes at the same time. (Physical map is always right.) + */ +static inline bool kvm_apic_logical_map_valid(struct kvm_apic_map *map) +{ + return !(map->mode & (map->mode - 1)); +} + +static inline void +apic_logical_id(struct kvm_apic_map *map, u32 dest_id, u16 *cid, u16 *lid) +{ + unsigned lid_bits; + + BUILD_BUG_ON(KVM_APIC_MODE_XAPIC_CLUSTER != 4); + BUILD_BUG_ON(KVM_APIC_MODE_XAPIC_FLAT != 8); + BUILD_BUG_ON(KVM_APIC_MODE_X2APIC != 16); + lid_bits = map->mode; + + *cid = dest_id >> lid_bits; + *lid = dest_id & ((1 << lid_bits) - 1); +} + static void recalculate_apic_map(struct kvm *kvm) { struct kvm_apic_map *new, *old = NULL; @@ -146,48 +168,6 @@ static void recalculate_apic_map(struct kvm *kvm) if (!new) goto out; - new->ldr_bits = 8; - /* flat mode is default */ - new->cid_shift = 8; - new->cid_mask = 0; - new->lid_mask = 0xff; - new->broadcast = APIC_BROADCAST; - - kvm_for_each_vcpu(i, vcpu, kvm) { - struct kvm_lapic *apic = vcpu->arch.apic; - - if (!kvm_apic_present(vcpu)) - continue; - - if (apic_x2apic_mode(apic)) { - new->ldr_bits = 32; - new->cid_shift = 16; - new->cid_mask = new->lid_mask = 0xffff; - new->broadcast = X2APIC_BROADCAST; - } else if (kvm_apic_get_reg(apic, APIC_LDR)) { - if (kvm_apic_get_reg(apic, APIC_DFR) == - APIC_DFR_CLUSTER) { - new->cid_shift = 4; - new->cid_mask = 0xf; - new->lid_mask = 0xf; - } else { - new->cid_shift = 8; - new->cid_mask = 0; - new->lid_mask = 0xff; - } - } - - /* - * All APICs have to be configured in the same mode by an OS. - * We take advatage of this while building logical id loockup - * table. After reset APICs are in software disabled mode, so if - * we find apic with different setting we assume this is the mode - * OS wants all apics to be in; build lookup table accordingly. - */ - if (kvm_apic_sw_enabled(apic)) - break; - } - kvm_for_each_vcpu(i, vcpu, kvm) { struct kvm_lapic *apic = vcpu->arch.apic; u16 cid, lid; @@ -198,11 +178,25 @@ static void recalculate_apic_map(struct kvm *kvm) aid = kvm_apic_id(apic); ldr = kvm_apic_get_reg(apic, APIC_LDR); - cid = apic_cluster_id(new, ldr); - lid = apic_logical_id(new, ldr); if (aid < ARRAY_SIZE(new->phys_map)) new->phys_map[aid] = apic; + + if (apic_x2apic_mode(apic)) { + new->mode |= KVM_APIC_MODE_X2APIC; + } else if (ldr) { + ldr = GET_APIC_LOGICAL_ID(ldr); + if (kvm_apic_get_reg(apic, APIC_DFR) == APIC_DFR_FLAT) + new->mode |= KVM_APIC_MODE_XAPIC_FLAT; + else + new->mode |= KVM_APIC_MODE_XAPIC_CLUSTER; + } + + if (!kvm_apic_logical_map_valid(new)) + continue; + + apic_logical_id(new, ldr, &cid, &lid); + if (lid && cid < ARRAY_SIZE(new->logical_map)) new->logical_map[cid][ffs(lid) - 1] = apic; } @@ -588,15 +582,23 @@ static void apic_set_tpr(struct kvm_lapic *apic, u32 tpr) apic_update_ppr(apic); } -static bool kvm_apic_broadcast(struct kvm_lapic *apic, u32 dest) +static bool kvm_apic_broadcast(struct kvm_lapic *apic, u32 mda) { - return dest == (apic_x2apic_mode(apic) ? - X2APIC_BROADCAST : APIC_BROADCAST); + if (apic_x2apic_mode(apic)) + return mda == X2APIC_BROADCAST; + + return GET_APIC_DEST_FIELD(mda) == APIC_BROADCAST; } -static bool kvm_apic_match_physical_addr(struct kvm_lapic *apic, u32 dest) +static bool kvm_apic_match_physical_addr(struct kvm_lapic *apic, u32 mda) { - return kvm_apic_id(apic) == dest || kvm_apic_broadcast(apic, dest); + if (kvm_apic_broadcast(apic, mda)) + return true; + + if (apic_x2apic_mode(apic)) + return mda == kvm_apic_id(apic); + + return mda == SET_APIC_DEST_FIELD(kvm_apic_id(apic)); } static bool kvm_apic_match_logical_addr(struct kvm_lapic *apic, u32 mda) @@ -613,6 +615,7 @@ static bool kvm_apic_match_logical_addr(struct kvm_lapic *apic, u32 mda) && (logical_id & mda & 0xffff) != 0; logical_id = GET_APIC_LOGICAL_ID(logical_id); + mda = GET_APIC_DEST_FIELD(mda); switch (kvm_apic_get_reg(apic, APIC_DFR)) { case APIC_DFR_FLAT: @@ -627,10 +630,27 @@ static bool kvm_apic_match_logical_addr(struct kvm_lapic *apic, u32 mda) } } +/* KVM APIC implementation has two quirks + * - dest always begins at 0 while xAPIC MDA has offset 24, + * - IOxAPIC messages have to be delivered (directly) to x2APIC. + */ +static u32 kvm_apic_mda(unsigned int dest_id, struct kvm_lapic *source, + struct kvm_lapic *target) +{ + bool ipi = source != NULL; + bool x2apic_mda = apic_x2apic_mode(ipi ? source : target); + + if (!ipi && dest_id == APIC_BROADCAST && x2apic_mda) + return X2APIC_BROADCAST; + + return x2apic_mda ? dest_id : SET_APIC_DEST_FIELD(dest_id); +} + bool kvm_apic_match_dest(struct kvm_vcpu *vcpu, struct kvm_lapic *source, int short_hand, unsigned int dest, int dest_mode) { struct kvm_lapic *target = vcpu->arch.apic; + u32 mda = kvm_apic_mda(dest, source, target); apic_debug("target %p, source %p, dest 0x%x, " "dest_mode 0x%x, short_hand 0x%x\n", @@ -640,9 +660,9 @@ bool kvm_apic_match_dest(struct kvm_vcpu *vcpu, struct kvm_lapic *source, switch (short_hand) { case APIC_DEST_NOSHORT: if (dest_mode == APIC_DEST_PHYSICAL) - return kvm_apic_match_physical_addr(target, dest); + return kvm_apic_match_physical_addr(target, mda); else - return kvm_apic_match_logical_addr(target, dest); + return kvm_apic_match_logical_addr(target, mda); case APIC_DEST_SELF: return target == source; case APIC_DEST_ALLINC: @@ -664,6 +684,7 @@ bool kvm_irq_delivery_to_apic_fast(struct kvm *kvm, struct kvm_lapic *src, struct kvm_lapic **dst; int i; bool ret = false; + bool x2apic_ipi = src && apic_x2apic_mode(src); *r = -1; @@ -675,15 +696,15 @@ bool kvm_irq_delivery_to_apic_fast(struct kvm *kvm, struct kvm_lapic *src, if (irq->shorthand) return false; + if (irq->dest_id == (x2apic_ipi ? X2APIC_BROADCAST : APIC_BROADCAST)) + return false; + rcu_read_lock(); map = rcu_dereference(kvm->arch.apic_map); if (!map) goto out; - if (irq->dest_id == map->broadcast) - goto out; - ret = true; if (irq->dest_mode == APIC_DEST_PHYSICAL) { @@ -692,16 +713,20 @@ bool kvm_irq_delivery_to_apic_fast(struct kvm *kvm, struct kvm_lapic *src, dst = &map->phys_map[irq->dest_id]; } else { - u32 mda = irq->dest_id << (32 - map->ldr_bits); - u16 cid = apic_cluster_id(map, mda); + u16 cid; + + if (!kvm_apic_logical_map_valid(map)) { + ret = false; + goto out; + } + + apic_logical_id(map, irq->dest_id, &cid, (u16 *)&bitmap); if (cid >= ARRAY_SIZE(map->logical_map)) goto out; dst = map->logical_map[cid]; - bitmap = apic_logical_id(map, mda); - if (irq->delivery_mode == APIC_DM_LOWEST) { int l = -1; for_each_set_bit(i, &bitmap, 16) { @@ -833,8 +858,7 @@ int kvm_apic_compare_prio(struct kvm_vcpu *vcpu1, struct kvm_vcpu *vcpu2) static void kvm_ioapic_send_eoi(struct kvm_lapic *apic, int vector) { - if (!(kvm_apic_get_reg(apic, APIC_SPIV) & APIC_SPIV_DIRECTED_EOI) && - kvm_ioapic_handles_vector(apic->vcpu->kvm, vector)) { + if (kvm_ioapic_handles_vector(apic->vcpu->kvm, vector)) { int trigger_mode; if (apic_test_vector(vector, apic->regs + APIC_TMR)) trigger_mode = IOAPIC_LEVEL_TRIG; @@ -1038,7 +1062,7 @@ static int apic_mmio_in_range(struct kvm_lapic *apic, gpa_t addr) addr < apic->base_address + LAPIC_MMIO_LENGTH; } -static int apic_mmio_read(struct kvm_io_device *this, +static int apic_mmio_read(struct kvm_vcpu *vcpu, struct kvm_io_device *this, gpa_t address, int len, void *data) { struct kvm_lapic *apic = to_lapic(this); @@ -1358,7 +1382,7 @@ static int apic_reg_write(struct kvm_lapic *apic, u32 reg, u32 val) return ret; } -static int apic_mmio_write(struct kvm_io_device *this, +static int apic_mmio_write(struct kvm_vcpu *vcpu, struct kvm_io_device *this, gpa_t address, int len, const void *data) { struct kvm_lapic *apic = to_lapic(this); @@ -1498,8 +1522,6 @@ void kvm_lapic_set_base(struct kvm_vcpu *vcpu, u64 value) return; } - if (!kvm_vcpu_is_bsp(apic->vcpu)) - value &= ~MSR_IA32_APICBASE_BSP; vcpu->arch.apic_base = value; /* update jump label if enable bit changes */ @@ -1572,7 +1594,7 @@ void kvm_lapic_reset(struct kvm_vcpu *vcpu) apic_set_reg(apic, APIC_TMR + 0x10 * i, 0); } apic->irr_pending = kvm_apic_vid_enabled(vcpu->kvm); - apic->isr_count = kvm_apic_vid_enabled(vcpu->kvm); + apic->isr_count = kvm_x86_ops->hwapic_isr_update ? 1 : 0; apic->highest_isr_cache = -1; update_divide_count(apic); atomic_set(&apic->lapic_timer.pending, 0); @@ -1782,7 +1804,7 @@ void kvm_apic_post_state_restore(struct kvm_vcpu *vcpu, update_divide_count(apic); start_apic_timer(apic); apic->irr_pending = true; - apic->isr_count = kvm_apic_vid_enabled(vcpu->kvm) ? + apic->isr_count = kvm_x86_ops->hwapic_isr_update ? 1 : count_vectors(apic->regs + APIC_ISR); apic->highest_isr_cache = -1; if (kvm_x86_ops->hwapic_irr_update) diff --git a/arch/x86/kvm/lapic.h b/arch/x86/kvm/lapic.h index 0bc6c656625b..9d28383fc1e7 100644 --- a/arch/x86/kvm/lapic.h +++ b/arch/x86/kvm/lapic.h @@ -1,7 +1,7 @@ #ifndef __KVM_X86_LAPIC_H #define __KVM_X86_LAPIC_H -#include "iodev.h" +#include <kvm/iodev.h> #include <linux/kvm_host.h> @@ -148,21 +148,6 @@ static inline bool kvm_apic_vid_enabled(struct kvm *kvm) return kvm_x86_ops->vm_has_apicv(kvm); } -static inline u16 apic_cluster_id(struct kvm_apic_map *map, u32 ldr) -{ - u16 cid; - ldr >>= 32 - map->ldr_bits; - cid = (ldr >> map->cid_shift) & map->cid_mask; - - return cid; -} - -static inline u16 apic_logical_id(struct kvm_apic_map *map, u32 ldr) -{ - ldr >>= (32 - map->ldr_bits); - return ldr & map->lid_mask; -} - static inline bool kvm_apic_has_events(struct kvm_vcpu *vcpu) { return vcpu->arch.apic->pending_events; diff --git a/arch/x86/kvm/mmu.c b/arch/x86/kvm/mmu.c index cee759299a35..146f295ee322 100644 --- a/arch/x86/kvm/mmu.c +++ b/arch/x86/kvm/mmu.c @@ -4465,6 +4465,79 @@ void kvm_mmu_slot_remove_write_access(struct kvm *kvm, kvm_flush_remote_tlbs(kvm); } +static bool kvm_mmu_zap_collapsible_spte(struct kvm *kvm, + unsigned long *rmapp) +{ + u64 *sptep; + struct rmap_iterator iter; + int need_tlb_flush = 0; + pfn_t pfn; + struct kvm_mmu_page *sp; + + for (sptep = rmap_get_first(*rmapp, &iter); sptep;) { + BUG_ON(!(*sptep & PT_PRESENT_MASK)); + + sp = page_header(__pa(sptep)); + pfn = spte_to_pfn(*sptep); + + /* + * Only EPT supported for now; otherwise, one would need to + * find out efficiently whether the guest page tables are + * also using huge pages. + */ + if (sp->role.direct && + !kvm_is_reserved_pfn(pfn) && + PageTransCompound(pfn_to_page(pfn))) { + drop_spte(kvm, sptep); + sptep = rmap_get_first(*rmapp, &iter); + need_tlb_flush = 1; + } else + sptep = rmap_get_next(&iter); + } + + return need_tlb_flush; +} + +void kvm_mmu_zap_collapsible_sptes(struct kvm *kvm, + struct kvm_memory_slot *memslot) +{ + bool flush = false; + unsigned long *rmapp; + unsigned long last_index, index; + gfn_t gfn_start, gfn_end; + + spin_lock(&kvm->mmu_lock); + + gfn_start = memslot->base_gfn; + gfn_end = memslot->base_gfn + memslot->npages - 1; + + if (gfn_start >= gfn_end) + goto out; + + rmapp = memslot->arch.rmap[0]; + last_index = gfn_to_index(gfn_end, memslot->base_gfn, + PT_PAGE_TABLE_LEVEL); + + for (index = 0; index <= last_index; ++index, ++rmapp) { + if (*rmapp) + flush |= kvm_mmu_zap_collapsible_spte(kvm, rmapp); + + if (need_resched() || spin_needbreak(&kvm->mmu_lock)) { + if (flush) { + kvm_flush_remote_tlbs(kvm); + flush = false; + } + cond_resched_lock(&kvm->mmu_lock); + } + } + + if (flush) + kvm_flush_remote_tlbs(kvm); + +out: + spin_unlock(&kvm->mmu_lock); +} + void kvm_mmu_slot_leaf_clear_dirty(struct kvm *kvm, struct kvm_memory_slot *memslot) { diff --git a/arch/x86/kvm/pmu.c b/arch/x86/kvm/pmu.c index 8e6b7d869d2f..29fbf9dfdc54 100644 --- a/arch/x86/kvm/pmu.c +++ b/arch/x86/kvm/pmu.c @@ -38,7 +38,7 @@ static struct kvm_arch_event_perf_mapping { }; /* mapping between fixed pmc index and arch_events array */ -int fixed_pmc_events[] = {1, 0, 7}; +static int fixed_pmc_events[] = {1, 0, 7}; static bool pmc_is_gp(struct kvm_pmc *pmc) { diff --git a/arch/x86/kvm/svm.c b/arch/x86/kvm/svm.c index d319e0c24758..ce741b8650f6 100644 --- a/arch/x86/kvm/svm.c +++ b/arch/x86/kvm/svm.c @@ -1261,7 +1261,7 @@ static struct kvm_vcpu *svm_create_vcpu(struct kvm *kvm, unsigned int id) svm->vcpu.arch.apic_base = APIC_DEFAULT_PHYS_BASE | MSR_IA32_APICBASE_ENABLE; - if (kvm_vcpu_is_bsp(&svm->vcpu)) + if (kvm_vcpu_is_reset_bsp(&svm->vcpu)) svm->vcpu.arch.apic_base |= MSR_IA32_APICBASE_BSP; svm_init_osvw(&svm->vcpu); @@ -1929,14 +1929,12 @@ static int nop_on_interception(struct vcpu_svm *svm) static int halt_interception(struct vcpu_svm *svm) { svm->next_rip = kvm_rip_read(&svm->vcpu) + 1; - skip_emulated_instruction(&svm->vcpu); return kvm_emulate_halt(&svm->vcpu); } static int vmmcall_interception(struct vcpu_svm *svm) { svm->next_rip = kvm_rip_read(&svm->vcpu) + 3; - skip_emulated_instruction(&svm->vcpu); kvm_emulate_hypercall(&svm->vcpu); return 1; } @@ -2757,11 +2755,11 @@ static int invlpga_interception(struct vcpu_svm *svm) { struct kvm_vcpu *vcpu = &svm->vcpu; - trace_kvm_invlpga(svm->vmcb->save.rip, vcpu->arch.regs[VCPU_REGS_RCX], - vcpu->arch.regs[VCPU_REGS_RAX]); + trace_kvm_invlpga(svm->vmcb->save.rip, kvm_register_read(&svm->vcpu, VCPU_REGS_RCX), + kvm_register_read(&svm->vcpu, VCPU_REGS_RAX)); /* Let's treat INVLPGA the same as INVLPG (can be optimized!) */ - kvm_mmu_invlpg(vcpu, vcpu->arch.regs[VCPU_REGS_RAX]); + kvm_mmu_invlpg(vcpu, kvm_register_read(&svm->vcpu, VCPU_REGS_RAX)); svm->next_rip = kvm_rip_read(&svm->vcpu) + 3; skip_emulated_instruction(&svm->vcpu); @@ -2770,12 +2768,18 @@ static int invlpga_interception(struct vcpu_svm *svm) static int skinit_interception(struct vcpu_svm *svm) { - trace_kvm_skinit(svm->vmcb->save.rip, svm->vcpu.arch.regs[VCPU_REGS_RAX]); + trace_kvm_skinit(svm->vmcb->save.rip, kvm_register_read(&svm->vcpu, VCPU_REGS_RAX)); kvm_queue_exception(&svm->vcpu, UD_VECTOR); return 1; } +static int wbinvd_interception(struct vcpu_svm *svm) +{ + kvm_emulate_wbinvd(&svm->vcpu); + return 1; +} + static int xsetbv_interception(struct vcpu_svm *svm) { u64 new_bv = kvm_read_edx_eax(&svm->vcpu); @@ -2902,7 +2906,8 @@ static int rdpmc_interception(struct vcpu_svm *svm) return 1; } -bool check_selective_cr0_intercepted(struct vcpu_svm *svm, unsigned long val) +static bool check_selective_cr0_intercepted(struct vcpu_svm *svm, + unsigned long val) { unsigned long cr0 = svm->vcpu.arch.cr0; bool ret = false; @@ -2940,7 +2945,10 @@ static int cr_interception(struct vcpu_svm *svm) return emulate_on_interception(svm); reg = svm->vmcb->control.exit_info_1 & SVM_EXITINFO_REG_MASK; - cr = svm->vmcb->control.exit_code - SVM_EXIT_READ_CR0; + if (svm->vmcb->control.exit_code == SVM_EXIT_CR0_SEL_WRITE) + cr = SVM_EXIT_WRITE_CR0 - SVM_EXIT_READ_CR0; + else + cr = svm->vmcb->control.exit_code - SVM_EXIT_READ_CR0; err = 0; if (cr >= 16) { /* mov to cr */ @@ -3133,7 +3141,7 @@ static int svm_get_msr(struct kvm_vcpu *vcpu, unsigned ecx, u64 *data) static int rdmsr_interception(struct vcpu_svm *svm) { - u32 ecx = svm->vcpu.arch.regs[VCPU_REGS_RCX]; + u32 ecx = kvm_register_read(&svm->vcpu, VCPU_REGS_RCX); u64 data; if (svm_get_msr(&svm->vcpu, ecx, &data)) { @@ -3142,8 +3150,8 @@ static int rdmsr_interception(struct vcpu_svm *svm) } else { trace_kvm_msr_read(ecx, data); - svm->vcpu.arch.regs[VCPU_REGS_RAX] = data & 0xffffffff; - svm->vcpu.arch.regs[VCPU_REGS_RDX] = data >> 32; + kvm_register_write(&svm->vcpu, VCPU_REGS_RAX, data & 0xffffffff); + kvm_register_write(&svm->vcpu, VCPU_REGS_RDX, data >> 32); svm->next_rip = kvm_rip_read(&svm->vcpu) + 2; skip_emulated_instruction(&svm->vcpu); } @@ -3246,9 +3254,8 @@ static int svm_set_msr(struct kvm_vcpu *vcpu, struct msr_data *msr) static int wrmsr_interception(struct vcpu_svm *svm) { struct msr_data msr; - u32 ecx = svm->vcpu.arch.regs[VCPU_REGS_RCX]; - u64 data = (svm->vcpu.arch.regs[VCPU_REGS_RAX] & -1u) - | ((u64)(svm->vcpu.arch.regs[VCPU_REGS_RDX] & -1u) << 32); + u32 ecx = kvm_register_read(&svm->vcpu, VCPU_REGS_RCX); + u64 data = kvm_read_edx_eax(&svm->vcpu); msr.data = data; msr.index = ecx; @@ -3325,7 +3332,7 @@ static int (*const svm_exit_handlers[])(struct vcpu_svm *svm) = { [SVM_EXIT_READ_CR3] = cr_interception, [SVM_EXIT_READ_CR4] = cr_interception, [SVM_EXIT_READ_CR8] = cr_interception, - [SVM_EXIT_CR0_SEL_WRITE] = emulate_on_interception, + [SVM_EXIT_CR0_SEL_WRITE] = cr_interception, [SVM_EXIT_WRITE_CR0] = cr_interception, [SVM_EXIT_WRITE_CR3] = cr_interception, [SVM_EXIT_WRITE_CR4] = cr_interception, @@ -3376,7 +3383,7 @@ static int (*const svm_exit_handlers[])(struct vcpu_svm *svm) = { [SVM_EXIT_STGI] = stgi_interception, [SVM_EXIT_CLGI] = clgi_interception, [SVM_EXIT_SKINIT] = skinit_interception, - [SVM_EXIT_WBINVD] = emulate_on_interception, + [SVM_EXIT_WBINVD] = wbinvd_interception, [SVM_EXIT_MONITOR] = monitor_interception, [SVM_EXIT_MWAIT] = mwait_interception, [SVM_EXIT_XSETBV] = xsetbv_interception, @@ -3555,7 +3562,7 @@ static int handle_exit(struct kvm_vcpu *vcpu) if (exit_code >= ARRAY_SIZE(svm_exit_handlers) || !svm_exit_handlers[exit_code]) { - WARN_ONCE(1, "vmx: unexpected exit reason 0x%x\n", exit_code); + WARN_ONCE(1, "svm: unexpected exit reason 0x%x\n", exit_code); kvm_queue_exception(vcpu, UD_VECTOR); return 1; } @@ -3649,11 +3656,6 @@ static void svm_load_eoi_exitmap(struct kvm_vcpu *vcpu, u64 *eoi_exit_bitmap) return; } -static void svm_hwapic_isr_update(struct kvm *kvm, int isr) -{ - return; -} - static void svm_sync_pir_to_irr(struct kvm_vcpu *vcpu) { return; @@ -4403,7 +4405,6 @@ static struct kvm_x86_ops svm_x86_ops = { .set_virtual_x2apic_mode = svm_set_virtual_x2apic_mode, .vm_has_apicv = svm_vm_has_apicv, .load_eoi_exitmap = svm_load_eoi_exitmap, - .hwapic_isr_update = svm_hwapic_isr_update, .sync_pir_to_irr = svm_sync_pir_to_irr, .set_tss_addr = svm_set_tss_addr, diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c index 14c1a18d206a..f5e8dce8046c 100644 --- a/arch/x86/kvm/vmx.c +++ b/arch/x86/kvm/vmx.c @@ -2168,7 +2168,10 @@ static void vmx_set_msr_bitmap(struct kvm_vcpu *vcpu) { unsigned long *msr_bitmap; - if (irqchip_in_kernel(vcpu->kvm) && apic_x2apic_mode(vcpu->arch.apic)) { + if (is_guest_mode(vcpu)) + msr_bitmap = vmx_msr_bitmap_nested; + else if (irqchip_in_kernel(vcpu->kvm) && + apic_x2apic_mode(vcpu->arch.apic)) { if (is_long_mode(vcpu)) msr_bitmap = vmx_msr_bitmap_longmode_x2apic; else @@ -2467,6 +2470,7 @@ static void nested_vmx_setup_ctls_msrs(struct vcpu_vmx *vmx) vmx->nested.nested_vmx_secondary_ctls_low = 0; vmx->nested.nested_vmx_secondary_ctls_high &= SECONDARY_EXEC_VIRTUALIZE_APIC_ACCESSES | + SECONDARY_EXEC_RDTSCP | SECONDARY_EXEC_VIRTUALIZE_X2APIC_MODE | SECONDARY_EXEC_APIC_REGISTER_VIRT | SECONDARY_EXEC_VIRTUAL_INTR_DELIVERY | @@ -2476,8 +2480,7 @@ static void nested_vmx_setup_ctls_msrs(struct vcpu_vmx *vmx) if (enable_ept) { /* nested EPT: emulate EPT also to L1 */ vmx->nested.nested_vmx_secondary_ctls_high |= - SECONDARY_EXEC_ENABLE_EPT | - SECONDARY_EXEC_UNRESTRICTED_GUEST; + SECONDARY_EXEC_ENABLE_EPT; vmx->nested.nested_vmx_ept_caps = VMX_EPT_PAGE_WALK_4_BIT | VMX_EPTP_WB_BIT | VMX_EPT_2MB_PAGE_BIT | VMX_EPT_INVEPT_BIT; @@ -2491,6 +2494,10 @@ static void nested_vmx_setup_ctls_msrs(struct vcpu_vmx *vmx) } else vmx->nested.nested_vmx_ept_caps = 0; + if (enable_unrestricted_guest) + vmx->nested.nested_vmx_secondary_ctls_high |= + SECONDARY_EXEC_UNRESTRICTED_GUEST; + /* miscellaneous data */ rdmsr(MSR_IA32_VMX_MISC, vmx->nested.nested_vmx_misc_low, @@ -3262,8 +3269,8 @@ static void fix_pmode_seg(struct kvm_vcpu *vcpu, int seg, * default value. */ if (seg == VCPU_SREG_CS || seg == VCPU_SREG_SS) - save->selector &= ~SELECTOR_RPL_MASK; - save->dpl = save->selector & SELECTOR_RPL_MASK; + save->selector &= ~SEGMENT_RPL_MASK; + save->dpl = save->selector & SEGMENT_RPL_MASK; save->s = 1; } vmx_set_segment(vcpu, save, seg); @@ -3836,7 +3843,7 @@ static bool code_segment_valid(struct kvm_vcpu *vcpu) unsigned int cs_rpl; vmx_get_segment(vcpu, &cs, VCPU_SREG_CS); - cs_rpl = cs.selector & SELECTOR_RPL_MASK; + cs_rpl = cs.selector & SEGMENT_RPL_MASK; if (cs.unusable) return false; @@ -3864,7 +3871,7 @@ static bool stack_segment_valid(struct kvm_vcpu *vcpu) unsigned int ss_rpl; vmx_get_segment(vcpu, &ss, VCPU_SREG_SS); - ss_rpl = ss.selector & SELECTOR_RPL_MASK; + ss_rpl = ss.selector & SEGMENT_RPL_MASK; if (ss.unusable) return true; @@ -3886,7 +3893,7 @@ static bool data_segment_valid(struct kvm_vcpu *vcpu, int seg) unsigned int rpl; vmx_get_segment(vcpu, &var, seg); - rpl = var.selector & SELECTOR_RPL_MASK; + rpl = var.selector & SEGMENT_RPL_MASK; if (var.unusable) return true; @@ -3913,7 +3920,7 @@ static bool tr_valid(struct kvm_vcpu *vcpu) if (tr.unusable) return false; - if (tr.selector & SELECTOR_TI_MASK) /* TI = 1 */ + if (tr.selector & SEGMENT_TI_MASK) /* TI = 1 */ return false; if (tr.type != 3 && tr.type != 11) /* TODO: Check if guest is in IA32e mode */ return false; @@ -3931,7 +3938,7 @@ static bool ldtr_valid(struct kvm_vcpu *vcpu) if (ldtr.unusable) return true; - if (ldtr.selector & SELECTOR_TI_MASK) /* TI = 1 */ + if (ldtr.selector & SEGMENT_TI_MASK) /* TI = 1 */ return false; if (ldtr.type != 2) return false; @@ -3948,8 +3955,8 @@ static bool cs_ss_rpl_check(struct kvm_vcpu *vcpu) vmx_get_segment(vcpu, &cs, VCPU_SREG_CS); vmx_get_segment(vcpu, &ss, VCPU_SREG_SS); - return ((cs.selector & SELECTOR_RPL_MASK) == - (ss.selector & SELECTOR_RPL_MASK)); + return ((cs.selector & SEGMENT_RPL_MASK) == + (ss.selector & SEGMENT_RPL_MASK)); } /* @@ -4367,6 +4374,18 @@ static int vmx_complete_nested_posted_interrupt(struct kvm_vcpu *vcpu) return 0; } +static inline bool kvm_vcpu_trigger_posted_interrupt(struct kvm_vcpu *vcpu) +{ +#ifdef CONFIG_SMP + if (vcpu->mode == IN_GUEST_MODE) { + apic->send_IPI_mask(get_cpu_mask(vcpu->cpu), + POSTED_INTR_VECTOR); + return true; + } +#endif + return false; +} + static int vmx_deliver_nested_posted_interrupt(struct kvm_vcpu *vcpu, int vector) { @@ -4375,9 +4394,7 @@ static int vmx_deliver_nested_posted_interrupt(struct kvm_vcpu *vcpu, if (is_guest_mode(vcpu) && vector == vmx->nested.posted_intr_nv) { /* the PIR and ON have been set by L1. */ - if (vcpu->mode == IN_GUEST_MODE) - apic->send_IPI_mask(get_cpu_mask(vcpu->cpu), - POSTED_INTR_VECTOR); + kvm_vcpu_trigger_posted_interrupt(vcpu); /* * If a posted intr is not recognized by hardware, * we will accomplish it in the next vmentry. @@ -4409,12 +4426,7 @@ static void vmx_deliver_posted_interrupt(struct kvm_vcpu *vcpu, int vector) r = pi_test_and_set_on(&vmx->pi_desc); kvm_make_request(KVM_REQ_EVENT, vcpu); -#ifdef CONFIG_SMP - if (!r && (vcpu->mode == IN_GUEST_MODE)) - apic->send_IPI_mask(get_cpu_mask(vcpu->cpu), - POSTED_INTR_VECTOR); - else -#endif + if (r || !kvm_vcpu_trigger_posted_interrupt(vcpu)) kvm_vcpu_kick(vcpu); } @@ -4700,7 +4712,7 @@ static void vmx_vcpu_reset(struct kvm_vcpu *vcpu) vmx->vcpu.arch.regs[VCPU_REGS_RDX] = get_rdx_init_val(); kvm_set_cr8(&vmx->vcpu, 0); apic_base_msr.data = APIC_DEFAULT_PHYS_BASE | MSR_IA32_APICBASE_ENABLE; - if (kvm_vcpu_is_bsp(&vmx->vcpu)) + if (kvm_vcpu_is_reset_bsp(&vmx->vcpu)) apic_base_msr.data |= MSR_IA32_APICBASE_BSP; apic_base_msr.host_initiated = true; kvm_set_apic_base(&vmx->vcpu, &apic_base_msr); @@ -4995,7 +5007,7 @@ static int handle_rmode_exception(struct kvm_vcpu *vcpu, if (emulate_instruction(vcpu, 0) == EMULATE_DONE) { if (vcpu->arch.halt_request) { vcpu->arch.halt_request = 0; - return kvm_emulate_halt(vcpu); + return kvm_vcpu_halt(vcpu); } return 1; } @@ -5060,6 +5072,10 @@ static int handle_exception(struct kvm_vcpu *vcpu) } if (is_invalid_opcode(intr_info)) { + if (is_guest_mode(vcpu)) { + kvm_queue_exception(vcpu, UD_VECTOR); + return 1; + } er = emulate_instruction(vcpu, EMULTYPE_TRAP_UD); if (er != EMULATE_DONE) kvm_queue_exception(vcpu, UD_VECTOR); @@ -5079,9 +5095,10 @@ static int handle_exception(struct kvm_vcpu *vcpu) !(is_page_fault(intr_info) && !(error_code & PFERR_RSVD_MASK))) { vcpu->run->exit_reason = KVM_EXIT_INTERNAL_ERROR; vcpu->run->internal.suberror = KVM_INTERNAL_ERROR_SIMUL_EX; - vcpu->run->internal.ndata = 2; + vcpu->run->internal.ndata = 3; vcpu->run->internal.data[0] = vect_info; vcpu->run->internal.data[1] = intr_info; + vcpu->run->internal.data[2] = error_code; return 0; } @@ -5522,13 +5539,11 @@ static int handle_interrupt_window(struct kvm_vcpu *vcpu) static int handle_halt(struct kvm_vcpu *vcpu) { - skip_emulated_instruction(vcpu); return kvm_emulate_halt(vcpu); } static int handle_vmcall(struct kvm_vcpu *vcpu) { - skip_emulated_instruction(vcpu); kvm_emulate_hypercall(vcpu); return 1; } @@ -5559,7 +5574,6 @@ static int handle_rdpmc(struct kvm_vcpu *vcpu) static int handle_wbinvd(struct kvm_vcpu *vcpu) { - skip_emulated_instruction(vcpu); kvm_emulate_wbinvd(vcpu); return 1; } @@ -5817,7 +5831,7 @@ static int handle_ept_misconfig(struct kvm_vcpu *vcpu) gpa_t gpa; gpa = vmcs_read64(GUEST_PHYSICAL_ADDRESS); - if (!kvm_io_bus_write(vcpu->kvm, KVM_FAST_MMIO_BUS, gpa, 0, NULL)) { + if (!kvm_io_bus_write(vcpu, KVM_FAST_MMIO_BUS, gpa, 0, NULL)) { skip_emulated_instruction(vcpu); return 1; } @@ -5898,7 +5912,7 @@ static int handle_invalid_guest_state(struct kvm_vcpu *vcpu) if (vcpu->arch.halt_request) { vcpu->arch.halt_request = 0; - ret = kvm_emulate_halt(vcpu); + ret = kvm_vcpu_halt(vcpu); goto out; } @@ -7307,21 +7321,21 @@ static bool nested_vmx_exit_handled_io(struct kvm_vcpu *vcpu, else if (port < 0x10000) bitmap = vmcs12->io_bitmap_b; else - return 1; + return true; bitmap += (port & 0x7fff) / 8; if (last_bitmap != bitmap) if (kvm_read_guest(vcpu->kvm, bitmap, &b, 1)) - return 1; + return true; if (b & (1 << (port & 7))) - return 1; + return true; port++; size--; last_bitmap = bitmap; } - return 0; + return false; } /* @@ -7337,7 +7351,7 @@ static bool nested_vmx_exit_handled_msr(struct kvm_vcpu *vcpu, gpa_t bitmap; if (!nested_cpu_has(vmcs12, CPU_BASED_USE_MSR_BITMAPS)) - return 1; + return true; /* * The MSR_BITMAP page is divided into four 1024-byte bitmaps, @@ -7356,10 +7370,10 @@ static bool nested_vmx_exit_handled_msr(struct kvm_vcpu *vcpu, if (msr_index < 1024*8) { unsigned char b; if (kvm_read_guest(vcpu->kvm, bitmap + msr_index/8, &b, 1)) - return 1; + return true; return 1 & (b >> (msr_index & 7)); } else - return 1; /* let L1 handle the wrong parameter */ + return true; /* let L1 handle the wrong parameter */ } /* @@ -7381,7 +7395,7 @@ static bool nested_vmx_exit_handled_cr(struct kvm_vcpu *vcpu, case 0: if (vmcs12->cr0_guest_host_mask & (val ^ vmcs12->cr0_read_shadow)) - return 1; + return true; break; case 3: if ((vmcs12->cr3_target_count >= 1 && @@ -7392,37 +7406,37 @@ static bool nested_vmx_exit_handled_cr(struct kvm_vcpu *vcpu, vmcs12->cr3_target_value2 == val) || (vmcs12->cr3_target_count >= 4 && vmcs12->cr3_target_value3 == val)) - return 0; + return false; if (nested_cpu_has(vmcs12, CPU_BASED_CR3_LOAD_EXITING)) - return 1; + return true; break; case 4: if (vmcs12->cr4_guest_host_mask & (vmcs12->cr4_read_shadow ^ val)) - return 1; + return true; break; case 8: if (nested_cpu_has(vmcs12, CPU_BASED_CR8_LOAD_EXITING)) - return 1; + return true; break; } break; case 2: /* clts */ if ((vmcs12->cr0_guest_host_mask & X86_CR0_TS) && (vmcs12->cr0_read_shadow & X86_CR0_TS)) - return 1; + return true; break; case 1: /* mov from cr */ switch (cr) { case 3: if (vmcs12->cpu_based_vm_exec_control & CPU_BASED_CR3_STORE_EXITING) - return 1; + return true; break; case 8: if (vmcs12->cpu_based_vm_exec_control & CPU_BASED_CR8_STORE_EXITING) - return 1; + return true; break; } break; @@ -7433,14 +7447,14 @@ static bool nested_vmx_exit_handled_cr(struct kvm_vcpu *vcpu, */ if (vmcs12->cr0_guest_host_mask & 0xe & (val ^ vmcs12->cr0_read_shadow)) - return 1; + return true; if ((vmcs12->cr0_guest_host_mask & 0x1) && !(vmcs12->cr0_read_shadow & 0x1) && (val & 0x1)) - return 1; + return true; break; } - return 0; + return false; } /* @@ -7463,48 +7477,48 @@ static bool nested_vmx_exit_handled(struct kvm_vcpu *vcpu) KVM_ISA_VMX); if (vmx->nested.nested_run_pending) - return 0; + return false; if (unlikely(vmx->fail)) { pr_info_ratelimited("%s failed vm entry %x\n", __func__, vmcs_read32(VM_INSTRUCTION_ERROR)); - return 1; + return true; } switch (exit_reason) { case EXIT_REASON_EXCEPTION_NMI: if (!is_exception(intr_info)) - return 0; + return false; else if (is_page_fault(intr_info)) return enable_ept; else if (is_no_device(intr_info) && !(vmcs12->guest_cr0 & X86_CR0_TS)) - return 0; + return false; return vmcs12->exception_bitmap & (1u << (intr_info & INTR_INFO_VECTOR_MASK)); case EXIT_REASON_EXTERNAL_INTERRUPT: - return 0; + return false; case EXIT_REASON_TRIPLE_FAULT: - return 1; + return true; case EXIT_REASON_PENDING_INTERRUPT: return nested_cpu_has(vmcs12, CPU_BASED_VIRTUAL_INTR_PENDING); case EXIT_REASON_NMI_WINDOW: return nested_cpu_has(vmcs12, CPU_BASED_VIRTUAL_NMI_PENDING); case EXIT_REASON_TASK_SWITCH: - return 1; + return true; case EXIT_REASON_CPUID: if (kvm_register_read(vcpu, VCPU_REGS_RAX) == 0xa) - return 0; - return 1; + return false; + return true; case EXIT_REASON_HLT: return nested_cpu_has(vmcs12, CPU_BASED_HLT_EXITING); case EXIT_REASON_INVD: - return 1; + return true; case EXIT_REASON_INVLPG: return nested_cpu_has(vmcs12, CPU_BASED_INVLPG_EXITING); case EXIT_REASON_RDPMC: return nested_cpu_has(vmcs12, CPU_BASED_RDPMC_EXITING); - case EXIT_REASON_RDTSC: + case EXIT_REASON_RDTSC: case EXIT_REASON_RDTSCP: return nested_cpu_has(vmcs12, CPU_BASED_RDTSC_EXITING); case EXIT_REASON_VMCALL: case EXIT_REASON_VMCLEAR: case EXIT_REASON_VMLAUNCH: case EXIT_REASON_VMPTRLD: @@ -7516,7 +7530,7 @@ static bool nested_vmx_exit_handled(struct kvm_vcpu *vcpu) * VMX instructions trap unconditionally. This allows L1 to * emulate them for its L2 guest, i.e., allows 3-level nesting! */ - return 1; + return true; case EXIT_REASON_CR_ACCESS: return nested_vmx_exit_handled_cr(vcpu, vmcs12); case EXIT_REASON_DR_ACCESS: @@ -7527,7 +7541,7 @@ static bool nested_vmx_exit_handled(struct kvm_vcpu *vcpu) case EXIT_REASON_MSR_WRITE: return nested_vmx_exit_handled_msr(vcpu, vmcs12, exit_reason); case EXIT_REASON_INVALID_STATE: - return 1; + return true; case EXIT_REASON_MWAIT_INSTRUCTION: return nested_cpu_has(vmcs12, CPU_BASED_MWAIT_EXITING); case EXIT_REASON_MONITOR_INSTRUCTION: @@ -7537,7 +7551,7 @@ static bool nested_vmx_exit_handled(struct kvm_vcpu *vcpu) nested_cpu_has2(vmcs12, SECONDARY_EXEC_PAUSE_LOOP_EXITING); case EXIT_REASON_MCE_DURING_VMENTRY: - return 0; + return false; case EXIT_REASON_TPR_BELOW_THRESHOLD: return nested_cpu_has(vmcs12, CPU_BASED_TPR_SHADOW); case EXIT_REASON_APIC_ACCESS: @@ -7546,7 +7560,7 @@ static bool nested_vmx_exit_handled(struct kvm_vcpu *vcpu) case EXIT_REASON_APIC_WRITE: case EXIT_REASON_EOI_INDUCED: /* apic_write and eoi_induced should exit unconditionally. */ - return 1; + return true; case EXIT_REASON_EPT_VIOLATION: /* * L0 always deals with the EPT violation. If nested EPT is @@ -7554,7 +7568,7 @@ static bool nested_vmx_exit_handled(struct kvm_vcpu *vcpu) * missing in the guest EPT table (EPT12), the EPT violation * will be injected with nested_ept_inject_page_fault() */ - return 0; + return false; case EXIT_REASON_EPT_MISCONFIG: /* * L2 never uses directly L1's EPT, but rather L0's own EPT @@ -7562,11 +7576,11 @@ static bool nested_vmx_exit_handled(struct kvm_vcpu *vcpu) * (EPT on EPT). So any problems with the structure of the * table is L0's fault. */ - return 0; + return false; case EXIT_REASON_WBINVD: return nested_cpu_has2(vmcs12, SECONDARY_EXEC_WBINVD_EXITING); case EXIT_REASON_XSETBV: - return 1; + return true; case EXIT_REASON_XSAVES: case EXIT_REASON_XRSTORS: /* * This should never happen, since it is not possible to @@ -7576,7 +7590,7 @@ static bool nested_vmx_exit_handled(struct kvm_vcpu *vcpu) */ return nested_cpu_has2(vmcs12, SECONDARY_EXEC_XSAVES); default: - return 1; + return true; } } @@ -8511,6 +8525,9 @@ static void vmx_cpuid_update(struct kvm_vcpu *vcpu) exec_control); } } + if (nested && !vmx->rdtscp_enabled) + vmx->nested.nested_vmx_secondary_ctls_high &= + ~SECONDARY_EXEC_RDTSCP; } /* Exposing INVPCID only when PCID is exposed */ @@ -8611,10 +8628,11 @@ static bool nested_get_vmcs12_pages(struct kvm_vcpu *vcpu, struct vmcs12 *vmcs12) { struct vcpu_vmx *vmx = to_vmx(vcpu); + int maxphyaddr = cpuid_maxphyaddr(vcpu); if (nested_cpu_has2(vmcs12, SECONDARY_EXEC_VIRTUALIZE_APIC_ACCESSES)) { - /* TODO: Also verify bits beyond physical address width are 0 */ - if (!PAGE_ALIGNED(vmcs12->apic_access_addr)) + if (!PAGE_ALIGNED(vmcs12->apic_access_addr) || + vmcs12->apic_access_addr >> maxphyaddr) return false; /* @@ -8630,8 +8648,8 @@ static bool nested_get_vmcs12_pages(struct kvm_vcpu *vcpu, } if (nested_cpu_has(vmcs12, CPU_BASED_TPR_SHADOW)) { - /* TODO: Also verify bits beyond physical address width are 0 */ - if (!PAGE_ALIGNED(vmcs12->virtual_apic_page_addr)) + if (!PAGE_ALIGNED(vmcs12->virtual_apic_page_addr) || + vmcs12->virtual_apic_page_addr >> maxphyaddr) return false; if (vmx->nested.virtual_apic_page) /* shouldn't happen */ @@ -8654,7 +8672,8 @@ static bool nested_get_vmcs12_pages(struct kvm_vcpu *vcpu, } if (nested_cpu_has_posted_intr(vmcs12)) { - if (!IS_ALIGNED(vmcs12->posted_intr_desc_addr, 64)) + if (!IS_ALIGNED(vmcs12->posted_intr_desc_addr, 64) || + vmcs12->posted_intr_desc_addr >> maxphyaddr) return false; if (vmx->nested.pi_desc_page) { /* shouldn't happen */ @@ -8853,9 +8872,9 @@ static int nested_vmx_check_apicv_controls(struct kvm_vcpu *vcpu, static int nested_vmx_check_msr_switch(struct kvm_vcpu *vcpu, unsigned long count_field, - unsigned long addr_field, - int maxphyaddr) + unsigned long addr_field) { + int maxphyaddr; u64 count, addr; if (vmcs12_read_any(vcpu, count_field, &count) || @@ -8865,6 +8884,7 @@ static int nested_vmx_check_msr_switch(struct kvm_vcpu *vcpu, } if (count == 0) return 0; + maxphyaddr = cpuid_maxphyaddr(vcpu); if (!IS_ALIGNED(addr, 16) || addr >> maxphyaddr || (addr + count * sizeof(struct vmx_msr_entry) - 1) >> maxphyaddr) { pr_warn_ratelimited( @@ -8878,19 +8898,16 @@ static int nested_vmx_check_msr_switch(struct kvm_vcpu *vcpu, static int nested_vmx_check_msr_switch_controls(struct kvm_vcpu *vcpu, struct vmcs12 *vmcs12) { - int maxphyaddr; - if (vmcs12->vm_exit_msr_load_count == 0 && vmcs12->vm_exit_msr_store_count == 0 && vmcs12->vm_entry_msr_load_count == 0) return 0; /* Fast path */ - maxphyaddr = cpuid_maxphyaddr(vcpu); if (nested_vmx_check_msr_switch(vcpu, VM_EXIT_MSR_LOAD_COUNT, - VM_EXIT_MSR_LOAD_ADDR, maxphyaddr) || + VM_EXIT_MSR_LOAD_ADDR) || nested_vmx_check_msr_switch(vcpu, VM_EXIT_MSR_STORE_COUNT, - VM_EXIT_MSR_STORE_ADDR, maxphyaddr) || + VM_EXIT_MSR_STORE_ADDR) || nested_vmx_check_msr_switch(vcpu, VM_ENTRY_MSR_LOAD_COUNT, - VM_ENTRY_MSR_LOAD_ADDR, maxphyaddr)) + VM_ENTRY_MSR_LOAD_ADDR)) return -EINVAL; return 0; } @@ -9140,8 +9157,9 @@ static void prepare_vmcs02(struct kvm_vcpu *vcpu, struct vmcs12 *vmcs12) exec_control &= ~SECONDARY_EXEC_RDTSCP; /* Take the following fields only from vmcs12 */ exec_control &= ~(SECONDARY_EXEC_VIRTUALIZE_APIC_ACCESSES | + SECONDARY_EXEC_RDTSCP | SECONDARY_EXEC_VIRTUAL_INTR_DELIVERY | - SECONDARY_EXEC_APIC_REGISTER_VIRT); + SECONDARY_EXEC_APIC_REGISTER_VIRT); if (nested_cpu_has(vmcs12, CPU_BASED_ACTIVATE_SECONDARY_CONTROLS)) exec_control |= vmcs12->secondary_vm_exec_control; @@ -9213,9 +9231,9 @@ static void prepare_vmcs02(struct kvm_vcpu *vcpu, struct vmcs12 *vmcs12) } if (cpu_has_vmx_msr_bitmap() && - exec_control & CPU_BASED_USE_MSR_BITMAPS && - nested_vmx_merge_msr_bitmap(vcpu, vmcs12)) { - vmcs_write64(MSR_BITMAP, __pa(vmx_msr_bitmap_nested)); + exec_control & CPU_BASED_USE_MSR_BITMAPS) { + nested_vmx_merge_msr_bitmap(vcpu, vmcs12); + /* MSR_BITMAP will be set by following vmx_set_efer. */ } else exec_control &= ~CPU_BASED_USE_MSR_BITMAPS; @@ -9374,7 +9392,6 @@ static int nested_vmx_run(struct kvm_vcpu *vcpu, bool launch) } if (!nested_get_vmcs12_pages(vcpu, vmcs12)) { - /*TODO: Also verify bits beyond physical address width are 0*/ nested_vmx_failValid(vcpu, VMXERR_ENTRY_INVALID_CONTROL_FIELD); return 1; } @@ -9513,7 +9530,7 @@ static int nested_vmx_run(struct kvm_vcpu *vcpu, bool launch) vmcs12->launch_state = 1; if (vmcs12->guest_activity_state == GUEST_ACTIVITY_HLT) - return kvm_emulate_halt(vcpu); + return kvm_vcpu_halt(vcpu); vmx->nested.nested_run_pending = 1; diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c index bd7a70be41b3..e1a81267f3f6 100644 --- a/arch/x86/kvm/x86.c +++ b/arch/x86/kvm/x86.c @@ -801,6 +801,17 @@ unsigned long kvm_get_cr8(struct kvm_vcpu *vcpu) } EXPORT_SYMBOL_GPL(kvm_get_cr8); +static void kvm_update_dr0123(struct kvm_vcpu *vcpu) +{ + int i; + + if (!(vcpu->guest_debug & KVM_GUESTDBG_USE_HW_BP)) { + for (i = 0; i < KVM_NR_DB_REGS; i++) + vcpu->arch.eff_db[i] = vcpu->arch.db[i]; + vcpu->arch.switch_db_regs |= KVM_DEBUGREG_RELOAD; + } +} + static void kvm_update_dr6(struct kvm_vcpu *vcpu) { if (!(vcpu->guest_debug & KVM_GUESTDBG_USE_HW_BP)) @@ -1070,19 +1081,19 @@ static void update_pvclock_gtod(struct timekeeper *tk) struct pvclock_gtod_data *vdata = &pvclock_gtod_data; u64 boot_ns; - boot_ns = ktime_to_ns(ktime_add(tk->tkr.base_mono, tk->offs_boot)); + boot_ns = ktime_to_ns(ktime_add(tk->tkr_mono.base, tk->offs_boot)); write_seqcount_begin(&vdata->seq); /* copy pvclock gtod data */ - vdata->clock.vclock_mode = tk->tkr.clock->archdata.vclock_mode; - vdata->clock.cycle_last = tk->tkr.cycle_last; - vdata->clock.mask = tk->tkr.mask; - vdata->clock.mult = tk->tkr.mult; - vdata->clock.shift = tk->tkr.shift; + vdata->clock.vclock_mode = tk->tkr_mono.clock->archdata.vclock_mode; + vdata->clock.cycle_last = tk->tkr_mono.cycle_last; + vdata->clock.mask = tk->tkr_mono.mask; + vdata->clock.mult = tk->tkr_mono.mult; + vdata->clock.shift = tk->tkr_mono.shift; vdata->boot_ns = boot_ns; - vdata->nsec_base = tk->tkr.xtime_nsec; + vdata->nsec_base = tk->tkr_mono.xtime_nsec; write_seqcount_end(&vdata->seq); } @@ -2744,7 +2755,6 @@ int kvm_vm_ioctl_check_extension(struct kvm *kvm, long ext) case KVM_CAP_USER_NMI: case KVM_CAP_REINJECT_CONTROL: case KVM_CAP_IRQ_INJECT_STATUS: - case KVM_CAP_IRQFD: case KVM_CAP_IOEVENTFD: case KVM_CAP_IOEVENTFD_NO_LENGTH: case KVM_CAP_PIT2: @@ -3150,6 +3160,7 @@ static int kvm_vcpu_ioctl_x86_set_debugregs(struct kvm_vcpu *vcpu, return -EINVAL; memcpy(vcpu->arch.db, dbgregs->db, sizeof(vcpu->arch.db)); + kvm_update_dr0123(vcpu); vcpu->arch.dr6 = dbgregs->dr6; kvm_update_dr6(vcpu); vcpu->arch.dr7 = dbgregs->dr7; @@ -4115,8 +4126,8 @@ static int vcpu_mmio_write(struct kvm_vcpu *vcpu, gpa_t addr, int len, do { n = min(len, 8); if (!(vcpu->arch.apic && - !kvm_iodevice_write(&vcpu->arch.apic->dev, addr, n, v)) - && kvm_io_bus_write(vcpu->kvm, KVM_MMIO_BUS, addr, n, v)) + !kvm_iodevice_write(vcpu, &vcpu->arch.apic->dev, addr, n, v)) + && kvm_io_bus_write(vcpu, KVM_MMIO_BUS, addr, n, v)) break; handled += n; addr += n; @@ -4135,8 +4146,9 @@ static int vcpu_mmio_read(struct kvm_vcpu *vcpu, gpa_t addr, int len, void *v) do { n = min(len, 8); if (!(vcpu->arch.apic && - !kvm_iodevice_read(&vcpu->arch.apic->dev, addr, n, v)) - && kvm_io_bus_read(vcpu->kvm, KVM_MMIO_BUS, addr, n, v)) + !kvm_iodevice_read(vcpu, &vcpu->arch.apic->dev, + addr, n, v)) + && kvm_io_bus_read(vcpu, KVM_MMIO_BUS, addr, n, v)) break; trace_kvm_mmio(KVM_TRACE_MMIO_READ, n, addr, *(u64 *)v); handled += n; @@ -4476,7 +4488,8 @@ mmio: return X86EMUL_CONTINUE; } -int emulator_read_write(struct x86_emulate_ctxt *ctxt, unsigned long addr, +static int emulator_read_write(struct x86_emulate_ctxt *ctxt, + unsigned long addr, void *val, unsigned int bytes, struct x86_exception *exception, const struct read_write_emulator_ops *ops) @@ -4539,7 +4552,7 @@ static int emulator_read_emulated(struct x86_emulate_ctxt *ctxt, exception, &read_emultor); } -int emulator_write_emulated(struct x86_emulate_ctxt *ctxt, +static int emulator_write_emulated(struct x86_emulate_ctxt *ctxt, unsigned long addr, const void *val, unsigned int bytes, @@ -4630,10 +4643,10 @@ static int kernel_pio(struct kvm_vcpu *vcpu, void *pd) int r; if (vcpu->arch.pio.in) - r = kvm_io_bus_read(vcpu->kvm, KVM_PIO_BUS, vcpu->arch.pio.port, + r = kvm_io_bus_read(vcpu, KVM_PIO_BUS, vcpu->arch.pio.port, vcpu->arch.pio.size, pd); else - r = kvm_io_bus_write(vcpu->kvm, KVM_PIO_BUS, + r = kvm_io_bus_write(vcpu, KVM_PIO_BUS, vcpu->arch.pio.port, vcpu->arch.pio.size, pd); return r; @@ -4706,7 +4719,7 @@ static void emulator_invlpg(struct x86_emulate_ctxt *ctxt, ulong address) kvm_mmu_invlpg(emul_to_vcpu(ctxt), address); } -int kvm_emulate_wbinvd(struct kvm_vcpu *vcpu) +int kvm_emulate_wbinvd_noskip(struct kvm_vcpu *vcpu) { if (!need_emulate_wbinvd(vcpu)) return X86EMUL_CONTINUE; @@ -4723,19 +4736,29 @@ int kvm_emulate_wbinvd(struct kvm_vcpu *vcpu) wbinvd(); return X86EMUL_CONTINUE; } + +int kvm_emulate_wbinvd(struct kvm_vcpu *vcpu) +{ + kvm_x86_ops->skip_emulated_instruction(vcpu); + return kvm_emulate_wbinvd_noskip(vcpu); +} EXPORT_SYMBOL_GPL(kvm_emulate_wbinvd); + + static void emulator_wbinvd(struct x86_emulate_ctxt *ctxt) { - kvm_emulate_wbinvd(emul_to_vcpu(ctxt)); + kvm_emulate_wbinvd_noskip(emul_to_vcpu(ctxt)); } -int emulator_get_dr(struct x86_emulate_ctxt *ctxt, int dr, unsigned long *dest) +static int emulator_get_dr(struct x86_emulate_ctxt *ctxt, int dr, + unsigned long *dest) { return kvm_get_dr(emul_to_vcpu(ctxt), dr, dest); } -int emulator_set_dr(struct x86_emulate_ctxt *ctxt, int dr, unsigned long value) +static int emulator_set_dr(struct x86_emulate_ctxt *ctxt, int dr, + unsigned long value) { return __kvm_set_dr(emul_to_vcpu(ctxt), dr, value); @@ -5817,7 +5840,7 @@ void kvm_arch_exit(void) free_percpu(shared_msrs); } -int kvm_emulate_halt(struct kvm_vcpu *vcpu) +int kvm_vcpu_halt(struct kvm_vcpu *vcpu) { ++vcpu->stat.halt_exits; if (irqchip_in_kernel(vcpu->kvm)) { @@ -5828,6 +5851,13 @@ int kvm_emulate_halt(struct kvm_vcpu *vcpu) return 0; } } +EXPORT_SYMBOL_GPL(kvm_vcpu_halt); + +int kvm_emulate_halt(struct kvm_vcpu *vcpu) +{ + kvm_x86_ops->skip_emulated_instruction(vcpu); + return kvm_vcpu_halt(vcpu); +} EXPORT_SYMBOL_GPL(kvm_emulate_halt); int kvm_hv_hypercall(struct kvm_vcpu *vcpu) @@ -5904,7 +5934,7 @@ static void kvm_pv_kick_cpu_op(struct kvm *kvm, unsigned long flags, int apicid) lapic_irq.dest_id = apicid; lapic_irq.delivery_mode = APIC_DM_REMRD; - kvm_irq_delivery_to_apic(kvm, 0, &lapic_irq, NULL); + kvm_irq_delivery_to_apic(kvm, NULL, &lapic_irq, NULL); } int kvm_emulate_hypercall(struct kvm_vcpu *vcpu) @@ -5912,6 +5942,8 @@ int kvm_emulate_hypercall(struct kvm_vcpu *vcpu) unsigned long nr, a0, a1, a2, a3, ret; int op_64_bit, r = 1; + kvm_x86_ops->skip_emulated_instruction(vcpu); + if (kvm_hv_hypercall_enabled(vcpu->kvm)) return kvm_hv_hypercall(vcpu); @@ -6165,7 +6197,7 @@ void kvm_arch_mmu_notifier_invalidate_page(struct kvm *kvm, } /* - * Returns 1 to let __vcpu_run() continue the guest execution loop without + * Returns 1 to let vcpu_run() continue the guest execution loop without * exiting to the userspace. Otherwise, the value will be returned to the * userspace. */ @@ -6302,6 +6334,7 @@ static int vcpu_enter_guest(struct kvm_vcpu *vcpu) set_debugreg(vcpu->arch.eff_db[2], 2); set_debugreg(vcpu->arch.eff_db[3], 3); set_debugreg(vcpu->arch.dr6, 6); + vcpu->arch.switch_db_regs &= ~KVM_DEBUGREG_RELOAD; } trace_kvm_entry(vcpu->vcpu_id); @@ -6383,42 +6416,47 @@ out: return r; } +static inline int vcpu_block(struct kvm *kvm, struct kvm_vcpu *vcpu) +{ + if (!kvm_arch_vcpu_runnable(vcpu)) { + srcu_read_unlock(&kvm->srcu, vcpu->srcu_idx); + kvm_vcpu_block(vcpu); + vcpu->srcu_idx = srcu_read_lock(&kvm->srcu); + if (!kvm_check_request(KVM_REQ_UNHALT, vcpu)) + return 1; + } + + kvm_apic_accept_events(vcpu); + switch(vcpu->arch.mp_state) { + case KVM_MP_STATE_HALTED: + vcpu->arch.pv.pv_unhalted = false; + vcpu->arch.mp_state = + KVM_MP_STATE_RUNNABLE; + case KVM_MP_STATE_RUNNABLE: + vcpu->arch.apf.halted = false; + break; + case KVM_MP_STATE_INIT_RECEIVED: + break; + default: + return -EINTR; + break; + } + return 1; +} -static int __vcpu_run(struct kvm_vcpu *vcpu) +static int vcpu_run(struct kvm_vcpu *vcpu) { int r; struct kvm *kvm = vcpu->kvm; vcpu->srcu_idx = srcu_read_lock(&kvm->srcu); - r = 1; - while (r > 0) { + for (;;) { if (vcpu->arch.mp_state == KVM_MP_STATE_RUNNABLE && !vcpu->arch.apf.halted) r = vcpu_enter_guest(vcpu); - else { - srcu_read_unlock(&kvm->srcu, vcpu->srcu_idx); - kvm_vcpu_block(vcpu); - vcpu->srcu_idx = srcu_read_lock(&kvm->srcu); - if (kvm_check_request(KVM_REQ_UNHALT, vcpu)) { - kvm_apic_accept_events(vcpu); - switch(vcpu->arch.mp_state) { - case KVM_MP_STATE_HALTED: - vcpu->arch.pv.pv_unhalted = false; - vcpu->arch.mp_state = - KVM_MP_STATE_RUNNABLE; - case KVM_MP_STATE_RUNNABLE: - vcpu->arch.apf.halted = false; - break; - case KVM_MP_STATE_INIT_RECEIVED: - break; - default: - r = -EINTR; - break; - } - } - } - + else + r = vcpu_block(kvm, vcpu); if (r <= 0) break; @@ -6430,6 +6468,7 @@ static int __vcpu_run(struct kvm_vcpu *vcpu) r = -EINTR; vcpu->run->exit_reason = KVM_EXIT_INTR; ++vcpu->stat.request_irq_exits; + break; } kvm_check_async_pf_completion(vcpu); @@ -6438,6 +6477,7 @@ static int __vcpu_run(struct kvm_vcpu *vcpu) r = -EINTR; vcpu->run->exit_reason = KVM_EXIT_INTR; ++vcpu->stat.signal_exits; + break; } if (need_resched()) { srcu_read_unlock(&kvm->srcu, vcpu->srcu_idx); @@ -6569,7 +6609,7 @@ int kvm_arch_vcpu_ioctl_run(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run) } else WARN_ON(vcpu->arch.pio.count || vcpu->mmio_needed); - r = __vcpu_run(vcpu); + r = vcpu_run(vcpu); out: post_kvm_run_save(vcpu); @@ -7076,11 +7116,14 @@ void kvm_vcpu_reset(struct kvm_vcpu *vcpu) kvm_clear_exception_queue(vcpu); memset(vcpu->arch.db, 0, sizeof(vcpu->arch.db)); + kvm_update_dr0123(vcpu); vcpu->arch.dr6 = DR6_INIT; kvm_update_dr6(vcpu); vcpu->arch.dr7 = DR7_FIXED_1; kvm_update_dr7(vcpu); + vcpu->arch.cr2 = 0; + kvm_make_request(KVM_REQ_EVENT, vcpu); vcpu->arch.apf.msr_val = 0; vcpu->arch.st.msr_val = 0; @@ -7241,7 +7284,7 @@ int kvm_arch_vcpu_init(struct kvm_vcpu *vcpu) vcpu->arch.pv.pv_unhalted = false; vcpu->arch.emulate_ctxt.ops = &emulate_ops; - if (!irqchip_in_kernel(kvm) || kvm_vcpu_is_bsp(vcpu)) + if (!irqchip_in_kernel(kvm) || kvm_vcpu_is_reset_bsp(vcpu)) vcpu->arch.mp_state = KVM_MP_STATE_RUNNABLE; else vcpu->arch.mp_state = KVM_MP_STATE_UNINITIALIZED; @@ -7289,6 +7332,8 @@ int kvm_arch_vcpu_init(struct kvm_vcpu *vcpu) vcpu->arch.guest_supported_xcr0 = 0; vcpu->arch.guest_xstate_size = XSAVE_HDR_SIZE + XSAVE_HDR_OFFSET; + vcpu->arch.maxphyaddr = cpuid_query_maxphyaddr(vcpu); + kvm_async_pf_hash_reset(vcpu); kvm_pmu_init(vcpu); @@ -7429,7 +7474,7 @@ void kvm_arch_free_memslot(struct kvm *kvm, struct kvm_memory_slot *free, for (i = 0; i < KVM_NR_PAGE_SIZES; ++i) { if (!dont || free->arch.rmap[i] != dont->arch.rmap[i]) { - kvm_kvfree(free->arch.rmap[i]); + kvfree(free->arch.rmap[i]); free->arch.rmap[i] = NULL; } if (i == 0) @@ -7437,7 +7482,7 @@ void kvm_arch_free_memslot(struct kvm *kvm, struct kvm_memory_slot *free, if (!dont || free->arch.lpage_info[i - 1] != dont->arch.lpage_info[i - 1]) { - kvm_kvfree(free->arch.lpage_info[i - 1]); + kvfree(free->arch.lpage_info[i - 1]); free->arch.lpage_info[i - 1] = NULL; } } @@ -7491,12 +7536,12 @@ int kvm_arch_create_memslot(struct kvm *kvm, struct kvm_memory_slot *slot, out_free: for (i = 0; i < KVM_NR_PAGE_SIZES; ++i) { - kvm_kvfree(slot->arch.rmap[i]); + kvfree(slot->arch.rmap[i]); slot->arch.rmap[i] = NULL; if (i == 0) continue; - kvm_kvfree(slot->arch.lpage_info[i - 1]); + kvfree(slot->arch.lpage_info[i - 1]); slot->arch.lpage_info[i - 1] = NULL; } return -ENOMEM; @@ -7619,6 +7664,23 @@ void kvm_arch_commit_memory_region(struct kvm *kvm, new = id_to_memslot(kvm->memslots, mem->slot); /* + * Dirty logging tracks sptes in 4k granularity, meaning that large + * sptes have to be split. If live migration is successful, the guest + * in the source machine will be destroyed and large sptes will be + * created in the destination. However, if the guest continues to run + * in the source machine (for example if live migration fails), small + * sptes will remain around and cause bad performance. + * + * Scan sptes if dirty logging has been stopped, dropping those + * which can be collapsed into a single large-page spte. Later + * page faults will create the large-page sptes. + */ + if ((change != KVM_MR_DELETE) && + (old->flags & KVM_MEM_LOG_DIRTY_PAGES) && + !(new->flags & KVM_MEM_LOG_DIRTY_PAGES)) + kvm_mmu_zap_collapsible_sptes(kvm, new); + + /* * Set up write protection and/or dirty logging for the new slot. * * For KVM_MR_DELETE and KVM_MR_MOVE, the shadow pages of old slot have diff --git a/arch/x86/lguest/boot.c b/arch/x86/lguest/boot.c index ac4453d8520e..717908b16037 100644 --- a/arch/x86/lguest/boot.c +++ b/arch/x86/lguest/boot.c @@ -868,7 +868,8 @@ static void __init lguest_init_IRQ(void) /* Some systems map "vectors" to interrupts weirdly. Not us! */ __this_cpu_write(vector_irq[i], i - FIRST_EXTERNAL_VECTOR); if (i != SYSCALL_VECTOR) - set_intr_gate(i, interrupt[i - FIRST_EXTERNAL_VECTOR]); + set_intr_gate(i, irq_entries_start + + 8 * (i - FIRST_EXTERNAL_VECTOR)); } /* @@ -1076,6 +1077,7 @@ static void lguest_load_sp0(struct tss_struct *tss, { lazy_hcall3(LHCALL_SET_STACK, __KERNEL_DS | 0x1, thread->sp0, THREAD_SIZE / PAGE_SIZE); + tss->x86_tss.sp0 = thread->sp0; } /* Let's just say, I wouldn't do debugging under a Guest. */ diff --git a/arch/x86/lib/atomic64_cx8_32.S b/arch/x86/lib/atomic64_cx8_32.S index f5cc9eb1d51b..082a85167a5b 100644 --- a/arch/x86/lib/atomic64_cx8_32.S +++ b/arch/x86/lib/atomic64_cx8_32.S @@ -13,16 +13,6 @@ #include <asm/alternative-asm.h> #include <asm/dwarf2.h> -.macro SAVE reg - pushl_cfi %\reg - CFI_REL_OFFSET \reg, 0 -.endm - -.macro RESTORE reg - popl_cfi %\reg - CFI_RESTORE \reg -.endm - .macro read64 reg movl %ebx, %eax movl %ecx, %edx @@ -67,10 +57,10 @@ ENDPROC(atomic64_xchg_cx8) .macro addsub_return func ins insc ENTRY(atomic64_\func\()_return_cx8) CFI_STARTPROC - SAVE ebp - SAVE ebx - SAVE esi - SAVE edi + pushl_cfi_reg ebp + pushl_cfi_reg ebx + pushl_cfi_reg esi + pushl_cfi_reg edi movl %eax, %esi movl %edx, %edi @@ -89,10 +79,10 @@ ENTRY(atomic64_\func\()_return_cx8) 10: movl %ebx, %eax movl %ecx, %edx - RESTORE edi - RESTORE esi - RESTORE ebx - RESTORE ebp + popl_cfi_reg edi + popl_cfi_reg esi + popl_cfi_reg ebx + popl_cfi_reg ebp ret CFI_ENDPROC ENDPROC(atomic64_\func\()_return_cx8) @@ -104,7 +94,7 @@ addsub_return sub sub sbb .macro incdec_return func ins insc ENTRY(atomic64_\func\()_return_cx8) CFI_STARTPROC - SAVE ebx + pushl_cfi_reg ebx read64 %esi 1: @@ -119,7 +109,7 @@ ENTRY(atomic64_\func\()_return_cx8) 10: movl %ebx, %eax movl %ecx, %edx - RESTORE ebx + popl_cfi_reg ebx ret CFI_ENDPROC ENDPROC(atomic64_\func\()_return_cx8) @@ -130,7 +120,7 @@ incdec_return dec sub sbb ENTRY(atomic64_dec_if_positive_cx8) CFI_STARTPROC - SAVE ebx + pushl_cfi_reg ebx read64 %esi 1: @@ -146,18 +136,18 @@ ENTRY(atomic64_dec_if_positive_cx8) 2: movl %ebx, %eax movl %ecx, %edx - RESTORE ebx + popl_cfi_reg ebx ret CFI_ENDPROC ENDPROC(atomic64_dec_if_positive_cx8) ENTRY(atomic64_add_unless_cx8) CFI_STARTPROC - SAVE ebp - SAVE ebx + pushl_cfi_reg ebp + pushl_cfi_reg ebx /* these just push these two parameters on the stack */ - SAVE edi - SAVE ecx + pushl_cfi_reg edi + pushl_cfi_reg ecx movl %eax, %ebp movl %edx, %edi @@ -179,8 +169,8 @@ ENTRY(atomic64_add_unless_cx8) 3: addl $8, %esp CFI_ADJUST_CFA_OFFSET -8 - RESTORE ebx - RESTORE ebp + popl_cfi_reg ebx + popl_cfi_reg ebp ret 4: cmpl %edx, 4(%esp) @@ -192,7 +182,7 @@ ENDPROC(atomic64_add_unless_cx8) ENTRY(atomic64_inc_not_zero_cx8) CFI_STARTPROC - SAVE ebx + pushl_cfi_reg ebx read64 %esi 1: @@ -209,7 +199,7 @@ ENTRY(atomic64_inc_not_zero_cx8) movl $1, %eax 3: - RESTORE ebx + popl_cfi_reg ebx ret CFI_ENDPROC ENDPROC(atomic64_inc_not_zero_cx8) diff --git a/arch/x86/lib/checksum_32.S b/arch/x86/lib/checksum_32.S index e78b8eee6615..9bc944a91274 100644 --- a/arch/x86/lib/checksum_32.S +++ b/arch/x86/lib/checksum_32.S @@ -51,10 +51,8 @@ unsigned int csum_partial(const unsigned char * buff, int len, unsigned int sum) */ ENTRY(csum_partial) CFI_STARTPROC - pushl_cfi %esi - CFI_REL_OFFSET esi, 0 - pushl_cfi %ebx - CFI_REL_OFFSET ebx, 0 + pushl_cfi_reg esi + pushl_cfi_reg ebx movl 20(%esp),%eax # Function arg: unsigned int sum movl 16(%esp),%ecx # Function arg: int len movl 12(%esp),%esi # Function arg: unsigned char *buff @@ -127,14 +125,12 @@ ENTRY(csum_partial) 6: addl %ecx,%eax adcl $0, %eax 7: - testl $1, 12(%esp) + testb $1, 12(%esp) jz 8f roll $8, %eax 8: - popl_cfi %ebx - CFI_RESTORE ebx - popl_cfi %esi - CFI_RESTORE esi + popl_cfi_reg ebx + popl_cfi_reg esi ret CFI_ENDPROC ENDPROC(csum_partial) @@ -145,10 +141,8 @@ ENDPROC(csum_partial) ENTRY(csum_partial) CFI_STARTPROC - pushl_cfi %esi - CFI_REL_OFFSET esi, 0 - pushl_cfi %ebx - CFI_REL_OFFSET ebx, 0 + pushl_cfi_reg esi + pushl_cfi_reg ebx movl 20(%esp),%eax # Function arg: unsigned int sum movl 16(%esp),%ecx # Function arg: int len movl 12(%esp),%esi # Function arg: const unsigned char *buf @@ -251,14 +245,12 @@ ENTRY(csum_partial) addl %ebx,%eax adcl $0,%eax 80: - testl $1, 12(%esp) + testb $1, 12(%esp) jz 90f roll $8, %eax 90: - popl_cfi %ebx - CFI_RESTORE ebx - popl_cfi %esi - CFI_RESTORE esi + popl_cfi_reg ebx + popl_cfi_reg esi ret CFI_ENDPROC ENDPROC(csum_partial) @@ -298,12 +290,9 @@ ENTRY(csum_partial_copy_generic) CFI_STARTPROC subl $4,%esp CFI_ADJUST_CFA_OFFSET 4 - pushl_cfi %edi - CFI_REL_OFFSET edi, 0 - pushl_cfi %esi - CFI_REL_OFFSET esi, 0 - pushl_cfi %ebx - CFI_REL_OFFSET ebx, 0 + pushl_cfi_reg edi + pushl_cfi_reg esi + pushl_cfi_reg ebx movl ARGBASE+16(%esp),%eax # sum movl ARGBASE+12(%esp),%ecx # len movl ARGBASE+4(%esp),%esi # src @@ -412,12 +401,9 @@ DST( movb %cl, (%edi) ) .previous - popl_cfi %ebx - CFI_RESTORE ebx - popl_cfi %esi - CFI_RESTORE esi - popl_cfi %edi - CFI_RESTORE edi + popl_cfi_reg ebx + popl_cfi_reg esi + popl_cfi_reg edi popl_cfi %ecx # equivalent to addl $4,%esp ret CFI_ENDPROC @@ -441,12 +427,9 @@ ENDPROC(csum_partial_copy_generic) ENTRY(csum_partial_copy_generic) CFI_STARTPROC - pushl_cfi %ebx - CFI_REL_OFFSET ebx, 0 - pushl_cfi %edi - CFI_REL_OFFSET edi, 0 - pushl_cfi %esi - CFI_REL_OFFSET esi, 0 + pushl_cfi_reg ebx + pushl_cfi_reg edi + pushl_cfi_reg esi movl ARGBASE+4(%esp),%esi #src movl ARGBASE+8(%esp),%edi #dst movl ARGBASE+12(%esp),%ecx #len @@ -506,12 +489,9 @@ DST( movb %dl, (%edi) ) jmp 7b .previous - popl_cfi %esi - CFI_RESTORE esi - popl_cfi %edi - CFI_RESTORE edi - popl_cfi %ebx - CFI_RESTORE ebx + popl_cfi_reg esi + popl_cfi_reg edi + popl_cfi_reg ebx ret CFI_ENDPROC ENDPROC(csum_partial_copy_generic) diff --git a/arch/x86/lib/clear_page_64.S b/arch/x86/lib/clear_page_64.S index f2145cfa12a6..e67e579c93bd 100644 --- a/arch/x86/lib/clear_page_64.S +++ b/arch/x86/lib/clear_page_64.S @@ -1,31 +1,35 @@ #include <linux/linkage.h> #include <asm/dwarf2.h> +#include <asm/cpufeature.h> #include <asm/alternative-asm.h> /* - * Zero a page. - * rdi page - */ -ENTRY(clear_page_c) + * Most CPUs support enhanced REP MOVSB/STOSB instructions. It is + * recommended to use this when possible and we do use them by default. + * If enhanced REP MOVSB/STOSB is not available, try to use fast string. + * Otherwise, use original. + */ + +/* + * Zero a page. + * %rdi - page + */ +ENTRY(clear_page) CFI_STARTPROC + + ALTERNATIVE_2 "jmp clear_page_orig", "", X86_FEATURE_REP_GOOD, \ + "jmp clear_page_c_e", X86_FEATURE_ERMS + movl $4096/8,%ecx xorl %eax,%eax rep stosq ret CFI_ENDPROC -ENDPROC(clear_page_c) +ENDPROC(clear_page) -ENTRY(clear_page_c_e) +ENTRY(clear_page_orig) CFI_STARTPROC - movl $4096,%ecx - xorl %eax,%eax - rep stosb - ret - CFI_ENDPROC -ENDPROC(clear_page_c_e) -ENTRY(clear_page) - CFI_STARTPROC xorl %eax,%eax movl $4096/64,%ecx .p2align 4 @@ -45,29 +49,13 @@ ENTRY(clear_page) nop ret CFI_ENDPROC -.Lclear_page_end: -ENDPROC(clear_page) - - /* - * Some CPUs support enhanced REP MOVSB/STOSB instructions. - * It is recommended to use this when possible. - * If enhanced REP MOVSB/STOSB is not available, try to use fast string. - * Otherwise, use original function. - * - */ +ENDPROC(clear_page_orig) -#include <asm/cpufeature.h> - - .section .altinstr_replacement,"ax" -1: .byte 0xeb /* jmp <disp8> */ - .byte (clear_page_c - clear_page) - (2f - 1b) /* offset */ -2: .byte 0xeb /* jmp <disp8> */ - .byte (clear_page_c_e - clear_page) - (3f - 2b) /* offset */ -3: - .previous - .section .altinstructions,"a" - altinstruction_entry clear_page,1b,X86_FEATURE_REP_GOOD,\ - .Lclear_page_end-clear_page, 2b-1b - altinstruction_entry clear_page,2b,X86_FEATURE_ERMS, \ - .Lclear_page_end-clear_page,3b-2b - .previous +ENTRY(clear_page_c_e) + CFI_STARTPROC + movl $4096,%ecx + xorl %eax,%eax + rep stosb + ret + CFI_ENDPROC +ENDPROC(clear_page_c_e) diff --git a/arch/x86/lib/copy_page_64.S b/arch/x86/lib/copy_page_64.S index 176cca67212b..8239dbcbf984 100644 --- a/arch/x86/lib/copy_page_64.S +++ b/arch/x86/lib/copy_page_64.S @@ -2,23 +2,26 @@ #include <linux/linkage.h> #include <asm/dwarf2.h> +#include <asm/cpufeature.h> #include <asm/alternative-asm.h> +/* + * Some CPUs run faster using the string copy instructions (sane microcode). + * It is also a lot simpler. Use this when possible. But, don't use streaming + * copy unless the CPU indicates X86_FEATURE_REP_GOOD. Could vary the + * prefetch distance based on SMP/UP. + */ ALIGN -copy_page_rep: +ENTRY(copy_page) CFI_STARTPROC + ALTERNATIVE "jmp copy_page_regs", "", X86_FEATURE_REP_GOOD movl $4096/8, %ecx rep movsq ret CFI_ENDPROC -ENDPROC(copy_page_rep) - -/* - * Don't use streaming copy unless the CPU indicates X86_FEATURE_REP_GOOD. - * Could vary the prefetch distance based on SMP/UP. -*/ +ENDPROC(copy_page) -ENTRY(copy_page) +ENTRY(copy_page_regs) CFI_STARTPROC subq $2*8, %rsp CFI_ADJUST_CFA_OFFSET 2*8 @@ -90,21 +93,5 @@ ENTRY(copy_page) addq $2*8, %rsp CFI_ADJUST_CFA_OFFSET -2*8 ret -.Lcopy_page_end: CFI_ENDPROC -ENDPROC(copy_page) - - /* Some CPUs run faster using the string copy instructions. - It is also a lot simpler. Use this when possible */ - -#include <asm/cpufeature.h> - - .section .altinstr_replacement,"ax" -1: .byte 0xeb /* jmp <disp8> */ - .byte (copy_page_rep - copy_page) - (2f - 1b) /* offset */ -2: - .previous - .section .altinstructions,"a" - altinstruction_entry copy_page, 1b, X86_FEATURE_REP_GOOD, \ - .Lcopy_page_end-copy_page, 2b-1b - .previous +ENDPROC(copy_page_regs) diff --git a/arch/x86/lib/copy_user_64.S b/arch/x86/lib/copy_user_64.S index dee945d55594..fa997dfaef24 100644 --- a/arch/x86/lib/copy_user_64.S +++ b/arch/x86/lib/copy_user_64.S @@ -8,9 +8,6 @@ #include <linux/linkage.h> #include <asm/dwarf2.h> - -#define FIX_ALIGNMENT 1 - #include <asm/current.h> #include <asm/asm-offsets.h> #include <asm/thread_info.h> @@ -19,33 +16,7 @@ #include <asm/asm.h> #include <asm/smap.h> -/* - * By placing feature2 after feature1 in altinstructions section, we logically - * implement: - * If CPU has feature2, jmp to alt2 is used - * else if CPU has feature1, jmp to alt1 is used - * else jmp to orig is used. - */ - .macro ALTERNATIVE_JUMP feature1,feature2,orig,alt1,alt2 -0: - .byte 0xe9 /* 32bit jump */ - .long \orig-1f /* by default jump to orig */ -1: - .section .altinstr_replacement,"ax" -2: .byte 0xe9 /* near jump with 32bit immediate */ - .long \alt1-1b /* offset */ /* or alternatively to alt1 */ -3: .byte 0xe9 /* near jump with 32bit immediate */ - .long \alt2-1b /* offset */ /* or alternatively to alt2 */ - .previous - - .section .altinstructions,"a" - altinstruction_entry 0b,2b,\feature1,5,5 - altinstruction_entry 0b,3b,\feature2,5,5 - .previous - .endm - .macro ALIGN_DESTINATION -#ifdef FIX_ALIGNMENT /* check for bad alignment of destination */ movl %edi,%ecx andl $7,%ecx @@ -67,7 +38,6 @@ _ASM_EXTABLE(100b,103b) _ASM_EXTABLE(101b,103b) -#endif .endm /* Standard copy_to_user with segment limit checking */ @@ -79,9 +49,11 @@ ENTRY(_copy_to_user) jc bad_to_user cmpq TI_addr_limit(%rax),%rcx ja bad_to_user - ALTERNATIVE_JUMP X86_FEATURE_REP_GOOD,X86_FEATURE_ERMS, \ - copy_user_generic_unrolled,copy_user_generic_string, \ - copy_user_enhanced_fast_string + ALTERNATIVE_2 "jmp copy_user_generic_unrolled", \ + "jmp copy_user_generic_string", \ + X86_FEATURE_REP_GOOD, \ + "jmp copy_user_enhanced_fast_string", \ + X86_FEATURE_ERMS CFI_ENDPROC ENDPROC(_copy_to_user) @@ -94,9 +66,11 @@ ENTRY(_copy_from_user) jc bad_from_user cmpq TI_addr_limit(%rax),%rcx ja bad_from_user - ALTERNATIVE_JUMP X86_FEATURE_REP_GOOD,X86_FEATURE_ERMS, \ - copy_user_generic_unrolled,copy_user_generic_string, \ - copy_user_enhanced_fast_string + ALTERNATIVE_2 "jmp copy_user_generic_unrolled", \ + "jmp copy_user_generic_string", \ + X86_FEATURE_REP_GOOD, \ + "jmp copy_user_enhanced_fast_string", \ + X86_FEATURE_ERMS CFI_ENDPROC ENDPROC(_copy_from_user) diff --git a/arch/x86/lib/csum-copy_64.S b/arch/x86/lib/csum-copy_64.S index 2419d5fefae3..9734182966f3 100644 --- a/arch/x86/lib/csum-copy_64.S +++ b/arch/x86/lib/csum-copy_64.S @@ -196,7 +196,7 @@ ENTRY(csum_partial_copy_generic) /* handle last odd byte */ .Lhandle_1: - testl $1, %r10d + testb $1, %r10b jz .Lende xorl %ebx, %ebx source diff --git a/arch/x86/lib/insn.c b/arch/x86/lib/insn.c index 1313ae6b478b..8f72b334aea0 100644 --- a/arch/x86/lib/insn.c +++ b/arch/x86/lib/insn.c @@ -52,6 +52,13 @@ */ void insn_init(struct insn *insn, const void *kaddr, int buf_len, int x86_64) { + /* + * Instructions longer than MAX_INSN_SIZE (15 bytes) are invalid + * even if the input buffer is long enough to hold them. + */ + if (buf_len > MAX_INSN_SIZE) + buf_len = MAX_INSN_SIZE; + memset(insn, 0, sizeof(*insn)); insn->kaddr = kaddr; insn->end_kaddr = kaddr + buf_len; @@ -164,6 +171,12 @@ found: /* VEX.W overrides opnd_size */ insn->opnd_bytes = 8; } else { + /* + * For VEX2, fake VEX3-like byte#2. + * Makes it easier to decode vex.W, vex.vvvv, + * vex.L and vex.pp. Masking with 0x7f sets vex.W == 0. + */ + insn->vex_prefix.bytes[2] = b2 & 0x7f; insn->vex_prefix.nbytes = 2; insn->next_byte += 2; } diff --git a/arch/x86/lib/memcpy_64.S b/arch/x86/lib/memcpy_64.S index 89b53c9968e7..b046664f5a1c 100644 --- a/arch/x86/lib/memcpy_64.S +++ b/arch/x86/lib/memcpy_64.S @@ -1,12 +1,20 @@ /* Copyright 2002 Andi Kleen */ #include <linux/linkage.h> - #include <asm/cpufeature.h> #include <asm/dwarf2.h> #include <asm/alternative-asm.h> /* + * We build a jump to memcpy_orig by default which gets NOPped out on + * the majority of x86 CPUs which set REP_GOOD. In addition, CPUs which + * have the enhanced REP MOVSB/STOSB feature (ERMS), change those NOPs + * to a jmp to memcpy_erms which does the REP; MOVSB mem copy. + */ + +.weak memcpy + +/* * memcpy - Copy a memory block. * * Input: @@ -17,15 +25,11 @@ * Output: * rax original destination */ +ENTRY(__memcpy) +ENTRY(memcpy) + ALTERNATIVE_2 "jmp memcpy_orig", "", X86_FEATURE_REP_GOOD, \ + "jmp memcpy_erms", X86_FEATURE_ERMS -/* - * memcpy_c() - fast string ops (REP MOVSQ) based variant. - * - * This gets patched over the unrolled variant (below) via the - * alternative instructions framework: - */ - .section .altinstr_replacement, "ax", @progbits -.Lmemcpy_c: movq %rdi, %rax movq %rdx, %rcx shrq $3, %rcx @@ -34,29 +38,21 @@ movl %edx, %ecx rep movsb ret -.Lmemcpy_e: - .previous +ENDPROC(memcpy) +ENDPROC(__memcpy) /* - * memcpy_c_e() - enhanced fast string memcpy. This is faster and simpler than - * memcpy_c. Use memcpy_c_e when possible. - * - * This gets patched over the unrolled variant (below) via the - * alternative instructions framework: + * memcpy_erms() - enhanced fast string memcpy. This is faster and + * simpler than memcpy. Use memcpy_erms when possible. */ - .section .altinstr_replacement, "ax", @progbits -.Lmemcpy_c_e: +ENTRY(memcpy_erms) movq %rdi, %rax movq %rdx, %rcx rep movsb ret -.Lmemcpy_e_e: - .previous - -.weak memcpy +ENDPROC(memcpy_erms) -ENTRY(__memcpy) -ENTRY(memcpy) +ENTRY(memcpy_orig) CFI_STARTPROC movq %rdi, %rax @@ -183,26 +179,4 @@ ENTRY(memcpy) .Lend: retq CFI_ENDPROC -ENDPROC(memcpy) -ENDPROC(__memcpy) - - /* - * Some CPUs are adding enhanced REP MOVSB/STOSB feature - * If the feature is supported, memcpy_c_e() is the first choice. - * If enhanced rep movsb copy is not available, use fast string copy - * memcpy_c() when possible. This is faster and code is simpler than - * original memcpy(). - * Otherwise, original memcpy() is used. - * In .altinstructions section, ERMS feature is placed after REG_GOOD - * feature to implement the right patch order. - * - * Replace only beginning, memcpy is used to apply alternatives, - * so it is silly to overwrite itself with nops - reboot is the - * only outcome... - */ - .section .altinstructions, "a" - altinstruction_entry __memcpy,.Lmemcpy_c,X86_FEATURE_REP_GOOD,\ - .Lmemcpy_e-.Lmemcpy_c,.Lmemcpy_e-.Lmemcpy_c - altinstruction_entry __memcpy,.Lmemcpy_c_e,X86_FEATURE_ERMS, \ - .Lmemcpy_e_e-.Lmemcpy_c_e,.Lmemcpy_e_e-.Lmemcpy_c_e - .previous +ENDPROC(memcpy_orig) diff --git a/arch/x86/lib/memmove_64.S b/arch/x86/lib/memmove_64.S index 9c4b530575da..0f8a0d0331b9 100644 --- a/arch/x86/lib/memmove_64.S +++ b/arch/x86/lib/memmove_64.S @@ -5,7 +5,6 @@ * This assembly file is re-written from memmove_64.c file. * - Copyright 2011 Fenghua Yu <fenghua.yu@intel.com> */ -#define _STRING_C #include <linux/linkage.h> #include <asm/dwarf2.h> #include <asm/cpufeature.h> @@ -44,6 +43,8 @@ ENTRY(__memmove) jg 2f .Lmemmove_begin_forward: + ALTERNATIVE "", "movq %rdx, %rcx; rep movsb; retq", X86_FEATURE_ERMS + /* * movsq instruction have many startup latency * so we handle small size by general register. @@ -207,21 +208,5 @@ ENTRY(__memmove) 13: retq CFI_ENDPROC - - .section .altinstr_replacement,"ax" -.Lmemmove_begin_forward_efs: - /* Forward moving data. */ - movq %rdx, %rcx - rep movsb - retq -.Lmemmove_end_forward_efs: - .previous - - .section .altinstructions,"a" - altinstruction_entry .Lmemmove_begin_forward, \ - .Lmemmove_begin_forward_efs,X86_FEATURE_ERMS, \ - .Lmemmove_end_forward-.Lmemmove_begin_forward, \ - .Lmemmove_end_forward_efs-.Lmemmove_begin_forward_efs - .previous ENDPROC(__memmove) ENDPROC(memmove) diff --git a/arch/x86/lib/memset_64.S b/arch/x86/lib/memset_64.S index 6f44935c6a60..93118fb23976 100644 --- a/arch/x86/lib/memset_64.S +++ b/arch/x86/lib/memset_64.S @@ -5,19 +5,30 @@ #include <asm/cpufeature.h> #include <asm/alternative-asm.h> +.weak memset + /* * ISO C memset - set a memory block to a byte value. This function uses fast * string to get better performance than the original function. The code is * simpler and shorter than the orignal function as well. - * + * * rdi destination - * rsi value (char) - * rdx count (bytes) - * + * rsi value (char) + * rdx count (bytes) + * * rax original destination - */ - .section .altinstr_replacement, "ax", @progbits -.Lmemset_c: + */ +ENTRY(memset) +ENTRY(__memset) + /* + * Some CPUs support enhanced REP MOVSB/STOSB feature. It is recommended + * to use it when possible. If not available, use fast string instructions. + * + * Otherwise, use original memset function. + */ + ALTERNATIVE_2 "jmp memset_orig", "", X86_FEATURE_REP_GOOD, \ + "jmp memset_erms", X86_FEATURE_ERMS + movq %rdi,%r9 movq %rdx,%rcx andl $7,%edx @@ -31,8 +42,8 @@ rep stosb movq %r9,%rax ret -.Lmemset_e: - .previous +ENDPROC(memset) +ENDPROC(__memset) /* * ISO C memset - set a memory block to a byte value. This function uses @@ -45,21 +56,16 @@ * * rax original destination */ - .section .altinstr_replacement, "ax", @progbits -.Lmemset_c_e: +ENTRY(memset_erms) movq %rdi,%r9 movb %sil,%al movq %rdx,%rcx rep stosb movq %r9,%rax ret -.Lmemset_e_e: - .previous - -.weak memset +ENDPROC(memset_erms) -ENTRY(memset) -ENTRY(__memset) +ENTRY(memset_orig) CFI_STARTPROC movq %rdi,%r10 @@ -134,23 +140,4 @@ ENTRY(__memset) jmp .Lafter_bad_alignment .Lfinal: CFI_ENDPROC -ENDPROC(memset) -ENDPROC(__memset) - - /* Some CPUs support enhanced REP MOVSB/STOSB feature. - * It is recommended to use this when possible. - * - * If enhanced REP MOVSB/STOSB feature is not available, use fast string - * instructions. - * - * Otherwise, use original memset function. - * - * In .altinstructions section, ERMS feature is placed after REG_GOOD - * feature to implement the right patch order. - */ - .section .altinstructions,"a" - altinstruction_entry __memset,.Lmemset_c,X86_FEATURE_REP_GOOD,\ - .Lfinal-__memset,.Lmemset_e-.Lmemset_c - altinstruction_entry __memset,.Lmemset_c_e,X86_FEATURE_ERMS, \ - .Lfinal-__memset,.Lmemset_e_e-.Lmemset_c_e - .previous +ENDPROC(memset_orig) diff --git a/arch/x86/lib/msr-reg.S b/arch/x86/lib/msr-reg.S index f6d13eefad10..3ca5218fbece 100644 --- a/arch/x86/lib/msr-reg.S +++ b/arch/x86/lib/msr-reg.S @@ -14,8 +14,8 @@ .macro op_safe_regs op ENTRY(\op\()_safe_regs) CFI_STARTPROC - pushq_cfi %rbx - pushq_cfi %rbp + pushq_cfi_reg rbx + pushq_cfi_reg rbp movq %rdi, %r10 /* Save pointer */ xorl %r11d, %r11d /* Return value */ movl (%rdi), %eax @@ -35,8 +35,8 @@ ENTRY(\op\()_safe_regs) movl %ebp, 20(%r10) movl %esi, 24(%r10) movl %edi, 28(%r10) - popq_cfi %rbp - popq_cfi %rbx + popq_cfi_reg rbp + popq_cfi_reg rbx ret 3: CFI_RESTORE_STATE @@ -53,10 +53,10 @@ ENDPROC(\op\()_safe_regs) .macro op_safe_regs op ENTRY(\op\()_safe_regs) CFI_STARTPROC - pushl_cfi %ebx - pushl_cfi %ebp - pushl_cfi %esi - pushl_cfi %edi + pushl_cfi_reg ebx + pushl_cfi_reg ebp + pushl_cfi_reg esi + pushl_cfi_reg edi pushl_cfi $0 /* Return value */ pushl_cfi %eax movl 4(%eax), %ecx @@ -80,10 +80,10 @@ ENTRY(\op\()_safe_regs) movl %esi, 24(%eax) movl %edi, 28(%eax) popl_cfi %eax - popl_cfi %edi - popl_cfi %esi - popl_cfi %ebp - popl_cfi %ebx + popl_cfi_reg edi + popl_cfi_reg esi + popl_cfi_reg ebp + popl_cfi_reg ebx ret 3: CFI_RESTORE_STATE diff --git a/arch/x86/lib/rwsem.S b/arch/x86/lib/rwsem.S index 5dff5f042468..2322abe4da3b 100644 --- a/arch/x86/lib/rwsem.S +++ b/arch/x86/lib/rwsem.S @@ -34,10 +34,10 @@ */ #define save_common_regs \ - pushl_cfi %ecx; CFI_REL_OFFSET ecx, 0 + pushl_cfi_reg ecx #define restore_common_regs \ - popl_cfi %ecx; CFI_RESTORE ecx + popl_cfi_reg ecx /* Avoid uglifying the argument copying x86-64 needs to do. */ .macro movq src, dst @@ -64,22 +64,22 @@ */ #define save_common_regs \ - pushq_cfi %rdi; CFI_REL_OFFSET rdi, 0; \ - pushq_cfi %rsi; CFI_REL_OFFSET rsi, 0; \ - pushq_cfi %rcx; CFI_REL_OFFSET rcx, 0; \ - pushq_cfi %r8; CFI_REL_OFFSET r8, 0; \ - pushq_cfi %r9; CFI_REL_OFFSET r9, 0; \ - pushq_cfi %r10; CFI_REL_OFFSET r10, 0; \ - pushq_cfi %r11; CFI_REL_OFFSET r11, 0 + pushq_cfi_reg rdi; \ + pushq_cfi_reg rsi; \ + pushq_cfi_reg rcx; \ + pushq_cfi_reg r8; \ + pushq_cfi_reg r9; \ + pushq_cfi_reg r10; \ + pushq_cfi_reg r11 #define restore_common_regs \ - popq_cfi %r11; CFI_RESTORE r11; \ - popq_cfi %r10; CFI_RESTORE r10; \ - popq_cfi %r9; CFI_RESTORE r9; \ - popq_cfi %r8; CFI_RESTORE r8; \ - popq_cfi %rcx; CFI_RESTORE rcx; \ - popq_cfi %rsi; CFI_RESTORE rsi; \ - popq_cfi %rdi; CFI_RESTORE rdi + popq_cfi_reg r11; \ + popq_cfi_reg r10; \ + popq_cfi_reg r9; \ + popq_cfi_reg r8; \ + popq_cfi_reg rcx; \ + popq_cfi_reg rsi; \ + popq_cfi_reg rdi #endif @@ -87,12 +87,10 @@ ENTRY(call_rwsem_down_read_failed) CFI_STARTPROC save_common_regs - __ASM_SIZE(push,_cfi) %__ASM_REG(dx) - CFI_REL_OFFSET __ASM_REG(dx), 0 + __ASM_SIZE(push,_cfi_reg) __ASM_REG(dx) movq %rax,%rdi call rwsem_down_read_failed - __ASM_SIZE(pop,_cfi) %__ASM_REG(dx) - CFI_RESTORE __ASM_REG(dx) + __ASM_SIZE(pop,_cfi_reg) __ASM_REG(dx) restore_common_regs ret CFI_ENDPROC @@ -124,12 +122,10 @@ ENDPROC(call_rwsem_wake) ENTRY(call_rwsem_downgrade_wake) CFI_STARTPROC save_common_regs - __ASM_SIZE(push,_cfi) %__ASM_REG(dx) - CFI_REL_OFFSET __ASM_REG(dx), 0 + __ASM_SIZE(push,_cfi_reg) __ASM_REG(dx) movq %rax,%rdi call rwsem_downgrade_wake - __ASM_SIZE(pop,_cfi) %__ASM_REG(dx) - CFI_RESTORE __ASM_REG(dx) + __ASM_SIZE(pop,_cfi_reg) __ASM_REG(dx) restore_common_regs ret CFI_ENDPROC diff --git a/arch/x86/lib/thunk_32.S b/arch/x86/lib/thunk_32.S index e28cdaf5ac2c..5eb715087b80 100644 --- a/arch/x86/lib/thunk_32.S +++ b/arch/x86/lib/thunk_32.S @@ -13,12 +13,9 @@ .globl \name \name: CFI_STARTPROC - pushl_cfi %eax - CFI_REL_OFFSET eax, 0 - pushl_cfi %ecx - CFI_REL_OFFSET ecx, 0 - pushl_cfi %edx - CFI_REL_OFFSET edx, 0 + pushl_cfi_reg eax + pushl_cfi_reg ecx + pushl_cfi_reg edx .if \put_ret_addr_in_eax /* Place EIP in the arg1 */ @@ -26,12 +23,9 @@ .endif call \func - popl_cfi %edx - CFI_RESTORE edx - popl_cfi %ecx - CFI_RESTORE ecx - popl_cfi %eax - CFI_RESTORE eax + popl_cfi_reg edx + popl_cfi_reg ecx + popl_cfi_reg eax ret CFI_ENDPROC _ASM_NOKPROBE(\name) diff --git a/arch/x86/lib/thunk_64.S b/arch/x86/lib/thunk_64.S index b30b5ebd614a..f89ba4e93025 100644 --- a/arch/x86/lib/thunk_64.S +++ b/arch/x86/lib/thunk_64.S @@ -17,9 +17,18 @@ CFI_STARTPROC /* this one pushes 9 elems, the next one would be %rIP */ - SAVE_ARGS + pushq_cfi_reg rdi + pushq_cfi_reg rsi + pushq_cfi_reg rdx + pushq_cfi_reg rcx + pushq_cfi_reg rax + pushq_cfi_reg r8 + pushq_cfi_reg r9 + pushq_cfi_reg r10 + pushq_cfi_reg r11 .if \put_ret_addr_in_rdi + /* 9*8(%rsp) is return addr on stack */ movq_cfi_restore 9*8, rdi .endif @@ -45,11 +54,22 @@ #endif #endif - /* SAVE_ARGS below is used only for the .cfi directives it contains. */ +#if defined(CONFIG_TRACE_IRQFLAGS) \ + || defined(CONFIG_DEBUG_LOCK_ALLOC) \ + || defined(CONFIG_PREEMPT) CFI_STARTPROC - SAVE_ARGS + CFI_ADJUST_CFA_OFFSET 9*8 restore: - RESTORE_ARGS + popq_cfi_reg r11 + popq_cfi_reg r10 + popq_cfi_reg r9 + popq_cfi_reg r8 + popq_cfi_reg rax + popq_cfi_reg rcx + popq_cfi_reg rdx + popq_cfi_reg rsi + popq_cfi_reg rdi ret CFI_ENDPROC _ASM_NOKPROBE(restore) +#endif diff --git a/arch/x86/lib/usercopy_64.c b/arch/x86/lib/usercopy_64.c index c905e89e19fe..1f33b3d1fd68 100644 --- a/arch/x86/lib/usercopy_64.c +++ b/arch/x86/lib/usercopy_64.c @@ -69,21 +69,20 @@ EXPORT_SYMBOL(copy_in_user); * it is not necessary to optimize tail handling. */ __visible unsigned long -copy_user_handle_tail(char *to, char *from, unsigned len, unsigned zerorest) +copy_user_handle_tail(char *to, char *from, unsigned len) { - char c; - unsigned zero_len; - for (; len; --len, to++) { + char c; + if (__get_user_nocheck(c, from++, sizeof(char))) break; if (__put_user_nocheck(c, to, sizeof(char))) break; } - - for (c = 0, zero_len = len; zerorest && zero_len; --zero_len) - if (__put_user_nocheck(c, to++, sizeof(char))) - break; clac(); + + /* If the destination is a kernel buffer, we always clear the end */ + if ((unsigned long)to >= TASK_SIZE_MAX) + memset(to, 0, len); return len; } diff --git a/arch/x86/lib/x86-opcode-map.txt b/arch/x86/lib/x86-opcode-map.txt index 1a2be7c6895d..816488c0b97e 100644 --- a/arch/x86/lib/x86-opcode-map.txt +++ b/arch/x86/lib/x86-opcode-map.txt @@ -273,6 +273,9 @@ dd: ESC de: ESC df: ESC # 0xe0 - 0xef +# Note: "forced64" is Intel CPU behavior: they ignore 0x66 prefix +# in 64-bit mode. AMD CPUs accept 0x66 prefix, it causes RIP truncation +# to 16 bits. In 32-bit mode, 0x66 is accepted by both Intel and AMD. e0: LOOPNE/LOOPNZ Jb (f64) e1: LOOPE/LOOPZ Jb (f64) e2: LOOP Jb (f64) @@ -281,6 +284,10 @@ e4: IN AL,Ib e5: IN eAX,Ib e6: OUT Ib,AL e7: OUT Ib,eAX +# With 0x66 prefix in 64-bit mode, for AMD CPUs immediate offset +# in "near" jumps and calls is 16-bit. For CALL, +# push of return address is 16-bit wide, RSP is decremented by 2 +# but is not truncated to 16 bits, unlike RIP. e8: CALL Jz (f64) e9: JMP-near Jz (f64) ea: JMP-far Ap (i64) @@ -456,6 +463,7 @@ AVXcode: 1 7e: movd/q Ey,Pd | vmovd/q Ey,Vy (66),(v1) | vmovq Vq,Wq (F3),(v1) 7f: movq Qq,Pq | vmovdqa Wx,Vx (66) | vmovdqu Wx,Vx (F3) # 0x0f 0x80-0x8f +# Note: "forced64" is Intel CPU behavior (see comment about CALL insn). 80: JO Jz (f64) 81: JNO Jz (f64) 82: JB/JC/JNAE Jz (f64) @@ -842,6 +850,7 @@ EndTable GrpTable: Grp5 0: INC Ev 1: DEC Ev +# Note: "forced64" is Intel CPU behavior (see comment about CALL insn). 2: CALLN Ev (f64) 3: CALLF Ep 4: JMPN Ev (f64) diff --git a/arch/x86/mm/fault.c b/arch/x86/mm/fault.c index ede025fb46f1..181c53bac3a7 100644 --- a/arch/x86/mm/fault.c +++ b/arch/x86/mm/fault.c @@ -59,7 +59,7 @@ static nokprobe_inline int kprobes_fault(struct pt_regs *regs) int ret = 0; /* kprobe_running() needs smp_processor_id() */ - if (kprobes_built_in() && !user_mode_vm(regs)) { + if (kprobes_built_in() && !user_mode(regs)) { preempt_disable(); if (kprobe_running() && kprobe_fault_handler(regs, 14)) ret = 1; @@ -148,7 +148,7 @@ is_prefetch(struct pt_regs *regs, unsigned long error_code, unsigned long addr) instr = (void *)convert_ip_to_linear(current, regs); max_instr = instr + 15; - if (user_mode(regs) && instr >= (unsigned char *)TASK_SIZE) + if (user_mode(regs) && instr >= (unsigned char *)TASK_SIZE_MAX) return 0; while (instr < max_instr) { @@ -1035,7 +1035,7 @@ static inline bool smap_violation(int error_code, struct pt_regs *regs) if (error_code & PF_USER) return false; - if (!user_mode_vm(regs) && (regs->flags & X86_EFLAGS_AC)) + if (!user_mode(regs) && (regs->flags & X86_EFLAGS_AC)) return false; return true; @@ -1140,7 +1140,7 @@ __do_page_fault(struct pt_regs *regs, unsigned long error_code, * User-mode registers count as a user access even for any * potential system fault or CPU buglet: */ - if (user_mode_vm(regs)) { + if (user_mode(regs)) { local_irq_enable(); error_code |= PF_USER; flags |= FAULT_FLAG_USER; diff --git a/arch/x86/mm/init.c b/arch/x86/mm/init.c index a110efca6d06..52417e771af9 100644 --- a/arch/x86/mm/init.c +++ b/arch/x86/mm/init.c @@ -179,7 +179,8 @@ static void __init probe_page_size_mask(void) if (cpu_has_pge) { cr4_set_bits_and_update_boot(X86_CR4_PGE); __supported_pte_mask |= _PAGE_GLOBAL; - } + } else + __supported_pte_mask &= ~_PAGE_GLOBAL; } #ifdef CONFIG_X86_32 diff --git a/arch/x86/oprofile/backtrace.c b/arch/x86/oprofile/backtrace.c index 5d04be5efb64..4e664bdb535a 100644 --- a/arch/x86/oprofile/backtrace.c +++ b/arch/x86/oprofile/backtrace.c @@ -111,7 +111,7 @@ x86_backtrace(struct pt_regs * const regs, unsigned int depth) { struct stack_frame *head = (struct stack_frame *)frame_pointer(regs); - if (!user_mode_vm(regs)) { + if (!user_mode(regs)) { unsigned long stack = kernel_stack_pointer(regs); if (depth) dump_trace(NULL, regs, (unsigned long *)stack, 0, diff --git a/arch/x86/pci/acpi.c b/arch/x86/pci/acpi.c index 6ac273832f28..e4695985f9de 100644 --- a/arch/x86/pci/acpi.c +++ b/arch/x86/pci/acpi.c @@ -331,7 +331,7 @@ static void probe_pci_root_info(struct pci_root_info *info, struct list_head *list) { int ret; - struct resource_entry *entry; + struct resource_entry *entry, *tmp; sprintf(info->name, "PCI Bus %04x:%02x", domain, busnum); info->bridge = device; @@ -345,8 +345,13 @@ static void probe_pci_root_info(struct pci_root_info *info, dev_dbg(&device->dev, "no IO and memory resources present in _CRS\n"); else - resource_list_for_each_entry(entry, list) - entry->res->name = info->name; + resource_list_for_each_entry_safe(entry, tmp, list) { + if ((entry->res->flags & IORESOURCE_WINDOW) == 0 || + (entry->res->flags & IORESOURCE_DISABLED)) + resource_list_destroy_entry(entry); + else + entry->res->name = info->name; + } } struct pci_bus *pci_acpi_scan_root(struct acpi_pci_root *root) diff --git a/arch/x86/pci/common.c b/arch/x86/pci/common.c index 3d2612b68694..2fb384724ebb 100644 --- a/arch/x86/pci/common.c +++ b/arch/x86/pci/common.c @@ -513,31 +513,6 @@ void __init pcibios_set_cache_line_size(void) } } -/* - * Some device drivers assume dev->irq won't change after calling - * pci_disable_device(). So delay releasing of IRQ resource to driver - * unbinding time. Otherwise it will break PM subsystem and drivers - * like xen-pciback etc. - */ -static int pci_irq_notifier(struct notifier_block *nb, unsigned long action, - void *data) -{ - struct pci_dev *dev = to_pci_dev(data); - - if (action != BUS_NOTIFY_UNBOUND_DRIVER) - return NOTIFY_DONE; - - if (pcibios_disable_irq) - pcibios_disable_irq(dev); - - return NOTIFY_OK; -} - -static struct notifier_block pci_irq_nb = { - .notifier_call = pci_irq_notifier, - .priority = INT_MIN, -}; - int __init pcibios_init(void) { if (!raw_pci_ops) { @@ -550,9 +525,6 @@ int __init pcibios_init(void) if (pci_bf_sort >= pci_force_bf) pci_sort_breadthfirst(); - - bus_register_notifier(&pci_bus_type, &pci_irq_nb); - return 0; } @@ -711,6 +683,12 @@ int pcibios_enable_device(struct pci_dev *dev, int mask) return 0; } +void pcibios_disable_device (struct pci_dev *dev) +{ + if (!pci_dev_msi_enabled(dev) && pcibios_disable_irq) + pcibios_disable_irq(dev); +} + int pci_ext_cfg_avail(void) { if (raw_pci_ext_ops) diff --git a/arch/x86/pci/intel_mid_pci.c b/arch/x86/pci/intel_mid_pci.c index efb849323c74..852aa4c92da0 100644 --- a/arch/x86/pci/intel_mid_pci.c +++ b/arch/x86/pci/intel_mid_pci.c @@ -234,10 +234,10 @@ static int intel_mid_pci_irq_enable(struct pci_dev *dev) static void intel_mid_pci_irq_disable(struct pci_dev *dev) { - if (dev->irq_managed && dev->irq > 0) { + if (!mp_should_keep_irq(&dev->dev) && dev->irq_managed && + dev->irq > 0) { mp_unmap_irq(dev->irq); dev->irq_managed = 0; - dev->irq = 0; } } diff --git a/arch/x86/pci/irq.c b/arch/x86/pci/irq.c index e71b3dbd87b8..5dc6ca5e1741 100644 --- a/arch/x86/pci/irq.c +++ b/arch/x86/pci/irq.c @@ -1256,9 +1256,22 @@ static int pirq_enable_irq(struct pci_dev *dev) return 0; } +bool mp_should_keep_irq(struct device *dev) +{ + if (dev->power.is_prepared) + return true; +#ifdef CONFIG_PM + if (dev->power.runtime_status == RPM_SUSPENDING) + return true; +#endif + + return false; +} + static void pirq_disable_irq(struct pci_dev *dev) { - if (io_apic_assign_pci_irqs && dev->irq_managed && dev->irq) { + if (io_apic_assign_pci_irqs && !mp_should_keep_irq(&dev->dev) && + dev->irq_managed && dev->irq) { mp_unmap_irq(dev->irq); dev->irq = 0; dev->irq_managed = 0; diff --git a/arch/x86/platform/efi/efi.c b/arch/x86/platform/efi/efi.c index dbc8627a5cdf..02744df576d5 100644 --- a/arch/x86/platform/efi/efi.c +++ b/arch/x86/platform/efi/efi.c @@ -85,12 +85,20 @@ static efi_status_t __init phys_efi_set_virtual_address_map( efi_memory_desc_t *virtual_map) { efi_status_t status; + unsigned long flags; + pgd_t *save_pgd; - efi_call_phys_prolog(); + save_pgd = efi_call_phys_prolog(); + + /* Disable interrupts around EFI calls: */ + local_irq_save(flags); status = efi_call_phys(efi_phys.set_virtual_address_map, memory_map_size, descriptor_size, descriptor_version, virtual_map); - efi_call_phys_epilog(); + local_irq_restore(flags); + + efi_call_phys_epilog(save_pgd); + return status; } @@ -491,7 +499,8 @@ void __init efi_init(void) if (efi_memmap_init()) return; - print_efi_memmap(); + if (efi_enabled(EFI_DBG)) + print_efi_memmap(); } void __init efi_late_init(void) @@ -939,6 +948,8 @@ static int __init arch_parse_efi_cmdline(char *str) { if (parse_option_str(str, "old_map")) set_bit(EFI_OLD_MEMMAP, &efi.flags); + if (parse_option_str(str, "debug")) + set_bit(EFI_DBG, &efi.flags); return 0; } diff --git a/arch/x86/platform/efi/efi_32.c b/arch/x86/platform/efi/efi_32.c index 40e7cda52936..ed5b67338294 100644 --- a/arch/x86/platform/efi/efi_32.c +++ b/arch/x86/platform/efi/efi_32.c @@ -33,11 +33,10 @@ /* * To make EFI call EFI runtime service in physical addressing mode we need - * prolog/epilog before/after the invocation to disable interrupt, to - * claim EFI runtime service handler exclusively and to duplicate a memory in - * low memory space say 0 - 3G. + * prolog/epilog before/after the invocation to claim the EFI runtime service + * handler exclusively and to duplicate a memory mapping in low memory space, + * say 0 - 3G. */ -static unsigned long efi_rt_eflags; void efi_sync_low_kernel_mappings(void) {} void __init efi_dump_pagetable(void) {} @@ -57,21 +56,24 @@ void __init efi_map_region(efi_memory_desc_t *md) void __init efi_map_region_fixed(efi_memory_desc_t *md) {} void __init parse_efi_setup(u64 phys_addr, u32 data_len) {} -void __init efi_call_phys_prolog(void) +pgd_t * __init efi_call_phys_prolog(void) { struct desc_ptr gdt_descr; + pgd_t *save_pgd; - local_irq_save(efi_rt_eflags); - + /* Current pgd is swapper_pg_dir, we'll restore it later: */ + save_pgd = swapper_pg_dir; load_cr3(initial_page_table); __flush_tlb_all(); gdt_descr.address = __pa(get_cpu_gdt_table(0)); gdt_descr.size = GDT_SIZE - 1; load_gdt(&gdt_descr); + + return save_pgd; } -void __init efi_call_phys_epilog(void) +void __init efi_call_phys_epilog(pgd_t *save_pgd) { struct desc_ptr gdt_descr; @@ -79,10 +81,8 @@ void __init efi_call_phys_epilog(void) gdt_descr.size = GDT_SIZE - 1; load_gdt(&gdt_descr); - load_cr3(swapper_pg_dir); + load_cr3(save_pgd); __flush_tlb_all(); - - local_irq_restore(efi_rt_eflags); } void __init efi_runtime_mkexec(void) diff --git a/arch/x86/platform/efi/efi_64.c b/arch/x86/platform/efi/efi_64.c index 17e80d829df0..a0ac0f9c307f 100644 --- a/arch/x86/platform/efi/efi_64.c +++ b/arch/x86/platform/efi/efi_64.c @@ -41,9 +41,6 @@ #include <asm/realmode.h> #include <asm/time.h> -static pgd_t *save_pgd __initdata; -static unsigned long efi_flags __initdata; - /* * We allocate runtime services regions bottom-up, starting from -4G, i.e. * 0xffff_ffff_0000_0000 and limit EFI VA mapping space to 64G. @@ -78,17 +75,18 @@ static void __init early_code_mapping_set_exec(int executable) } } -void __init efi_call_phys_prolog(void) +pgd_t * __init efi_call_phys_prolog(void) { unsigned long vaddress; + pgd_t *save_pgd; + int pgd; int n_pgds; if (!efi_enabled(EFI_OLD_MEMMAP)) - return; + return NULL; early_code_mapping_set_exec(1); - local_irq_save(efi_flags); n_pgds = DIV_ROUND_UP((max_pfn << PAGE_SHIFT), PGDIR_SIZE); save_pgd = kmalloc(n_pgds * sizeof(pgd_t), GFP_KERNEL); @@ -99,24 +97,29 @@ void __init efi_call_phys_prolog(void) set_pgd(pgd_offset_k(pgd * PGDIR_SIZE), *pgd_offset_k(vaddress)); } __flush_tlb_all(); + + return save_pgd; } -void __init efi_call_phys_epilog(void) +void __init efi_call_phys_epilog(pgd_t *save_pgd) { /* * After the lock is released, the original page table is restored. */ - int pgd; - int n_pgds = DIV_ROUND_UP((max_pfn << PAGE_SHIFT) , PGDIR_SIZE); + int pgd_idx; + int nr_pgds; - if (!efi_enabled(EFI_OLD_MEMMAP)) + if (!save_pgd) return; - for (pgd = 0; pgd < n_pgds; pgd++) - set_pgd(pgd_offset_k(pgd * PGDIR_SIZE), save_pgd[pgd]); + nr_pgds = DIV_ROUND_UP((max_pfn << PAGE_SHIFT) , PGDIR_SIZE); + + for (pgd_idx = 0; pgd_idx < nr_pgds; pgd_idx++) + set_pgd(pgd_offset_k(pgd_idx * PGDIR_SIZE), save_pgd[pgd_idx]); + kfree(save_pgd); + __flush_tlb_all(); - local_irq_restore(efi_flags); early_code_mapping_set_exec(0); } diff --git a/arch/x86/platform/uv/tlb_uv.c b/arch/x86/platform/uv/tlb_uv.c index 994798548b1a..3b6ec42718e4 100644 --- a/arch/x86/platform/uv/tlb_uv.c +++ b/arch/x86/platform/uv/tlb_uv.c @@ -415,7 +415,7 @@ static void reset_with_ipi(struct pnmask *distribution, struct bau_control *bcp) struct reset_args reset_args; reset_args.sender = sender; - cpus_clear(*mask); + cpumask_clear(mask); /* find a single cpu for each uvhub in this distribution mask */ maskbits = sizeof(struct pnmask) * BITSPERBYTE; /* each bit is a pnode relative to the partition base pnode */ @@ -425,7 +425,7 @@ static void reset_with_ipi(struct pnmask *distribution, struct bau_control *bcp) continue; apnode = pnode + bcp->partition_base_pnode; cpu = pnode_to_first_cpu(apnode, smaster); - cpu_set(cpu, *mask); + cpumask_set_cpu(cpu, mask); } /* IPI all cpus; preemption is already disabled */ @@ -1126,7 +1126,7 @@ const struct cpumask *uv_flush_tlb_others(const struct cpumask *cpumask, /* don't actually do a shootdown of the local cpu */ cpumask_andnot(flush_mask, cpumask, cpumask_of(cpu)); - if (cpu_isset(cpu, *cpumask)) + if (cpumask_test_cpu(cpu, cpumask)) stat->s_ntargself++; bau_desc = bcp->descriptor_base; diff --git a/arch/x86/power/cpu.c b/arch/x86/power/cpu.c index 3e32ed5648a0..757678fb26e1 100644 --- a/arch/x86/power/cpu.c +++ b/arch/x86/power/cpu.c @@ -134,7 +134,7 @@ static void do_fpu_end(void) static void fix_processor_context(void) { int cpu = smp_processor_id(); - struct tss_struct *t = &per_cpu(init_tss, cpu); + struct tss_struct *t = &per_cpu(cpu_tss, cpu); #ifdef CONFIG_X86_64 struct desc_struct *desc = get_cpu_gdt_table(cpu); tss_desc tss; diff --git a/arch/x86/syscalls/syscall_32.tbl b/arch/x86/syscalls/syscall_32.tbl index b3560ece1c9f..ef8187f9d28d 100644 --- a/arch/x86/syscalls/syscall_32.tbl +++ b/arch/x86/syscalls/syscall_32.tbl @@ -119,7 +119,7 @@ 110 i386 iopl sys_iopl 111 i386 vhangup sys_vhangup 112 i386 idle -113 i386 vm86old sys_vm86old sys32_vm86_warning +113 i386 vm86old sys_vm86old sys_ni_syscall 114 i386 wait4 sys_wait4 compat_sys_wait4 115 i386 swapoff sys_swapoff 116 i386 sysinfo sys_sysinfo compat_sys_sysinfo @@ -172,7 +172,7 @@ 163 i386 mremap sys_mremap 164 i386 setresuid sys_setresuid16 165 i386 getresuid sys_getresuid16 -166 i386 vm86 sys_vm86 sys32_vm86_warning +166 i386 vm86 sys_vm86 sys_ni_syscall 167 i386 query_module 168 i386 poll sys_poll 169 i386 nfsservctl diff --git a/arch/x86/syscalls/syscall_64.tbl b/arch/x86/syscalls/syscall_64.tbl index 8d656fbb57aa..9ef32d5f1b19 100644 --- a/arch/x86/syscalls/syscall_64.tbl +++ b/arch/x86/syscalls/syscall_64.tbl @@ -178,7 +178,7 @@ 169 common reboot sys_reboot 170 common sethostname sys_sethostname 171 common setdomainname sys_setdomainname -172 common iopl stub_iopl +172 common iopl sys_iopl 173 common ioperm sys_ioperm 174 64 create_module 175 common init_module sys_init_module diff --git a/arch/x86/um/asm/barrier.h b/arch/x86/um/asm/barrier.h index 2d7d9a1f5b53..8ffd2146fa6a 100644 --- a/arch/x86/um/asm/barrier.h +++ b/arch/x86/um/asm/barrier.h @@ -64,8 +64,8 @@ */ static inline void rdtsc_barrier(void) { - alternative(ASM_NOP3, "mfence", X86_FEATURE_MFENCE_RDTSC); - alternative(ASM_NOP3, "lfence", X86_FEATURE_LFENCE_RDTSC); + alternative_2("", "mfence", X86_FEATURE_MFENCE_RDTSC, + "lfence", X86_FEATURE_LFENCE_RDTSC); } #endif diff --git a/arch/x86/um/sys_call_table_64.c b/arch/x86/um/sys_call_table_64.c index 5cdfa9db2217..a75d8700472a 100644 --- a/arch/x86/um/sys_call_table_64.c +++ b/arch/x86/um/sys_call_table_64.c @@ -16,7 +16,7 @@ */ /* Not going to be implemented by UML, since we have no hardware. */ -#define stub_iopl sys_ni_syscall +#define sys_iopl sys_ni_syscall #define sys_ioperm sys_ni_syscall /* diff --git a/arch/x86/vdso/vclock_gettime.c b/arch/x86/vdso/vclock_gettime.c index 9793322751e0..40d2473836c9 100644 --- a/arch/x86/vdso/vclock_gettime.c +++ b/arch/x86/vdso/vclock_gettime.c @@ -82,18 +82,15 @@ static notrace cycle_t vread_pvclock(int *mode) cycle_t ret; u64 last; u32 version; + u32 migrate_count; u8 flags; unsigned cpu, cpu1; /* - * Note: hypervisor must guarantee that: - * 1. cpu ID number maps 1:1 to per-CPU pvclock time info. - * 2. that per-CPU pvclock time info is updated if the - * underlying CPU changes. - * 3. that version is increased whenever underlying CPU - * changes. - * + * When looping to get a consistent (time-info, tsc) pair, we + * also need to deal with the possibility we can switch vcpus, + * so make sure we always re-fetch time-info for the current vcpu. */ do { cpu = __getcpu() & VGETCPU_CPU_MASK; @@ -102,20 +99,27 @@ static notrace cycle_t vread_pvclock(int *mode) * __getcpu() calls (Gleb). */ - pvti = get_pvti(cpu); + /* Make sure migrate_count will change if we leave the VCPU. */ + do { + pvti = get_pvti(cpu); + migrate_count = pvti->migrate_count; + + cpu1 = cpu; + cpu = __getcpu() & VGETCPU_CPU_MASK; + } while (unlikely(cpu != cpu1)); version = __pvclock_read_cycles(&pvti->pvti, &ret, &flags); /* * Test we're still on the cpu as well as the version. - * We could have been migrated just after the first - * vgetcpu but before fetching the version, so we - * wouldn't notice a version change. + * - We must read TSC of pvti's VCPU. + * - KVM doesn't follow the versioning protocol, so data could + * change before version if we left the VCPU. */ - cpu1 = __getcpu() & VGETCPU_CPU_MASK; - } while (unlikely(cpu != cpu1 || - (pvti->pvti.version & 1) || - pvti->pvti.version != version)); + smp_rmb(); + } while (unlikely((pvti->pvti.version & 1) || + pvti->pvti.version != version || + pvti->migrate_count != migrate_count)); if (unlikely(!(flags & PVCLOCK_TSC_STABLE_BIT))) *mode = VCLOCK_NONE; diff --git a/arch/x86/vdso/vdso32/sigreturn.S b/arch/x86/vdso/vdso32/sigreturn.S index 31776d0efc8c..d7ec4e251c0a 100644 --- a/arch/x86/vdso/vdso32/sigreturn.S +++ b/arch/x86/vdso/vdso32/sigreturn.S @@ -17,6 +17,7 @@ .text .globl __kernel_sigreturn .type __kernel_sigreturn,@function + nop /* this guy is needed for .LSTARTFDEDLSI1 below (watch for HACK) */ ALIGN __kernel_sigreturn: .LSTART_sigreturn: diff --git a/arch/x86/xen/enlighten.c b/arch/x86/xen/enlighten.c index 5240f563076d..81665c9f2132 100644 --- a/arch/x86/xen/enlighten.c +++ b/arch/x86/xen/enlighten.c @@ -912,6 +912,7 @@ static void xen_load_sp0(struct tss_struct *tss, mcs = xen_mc_entry(0); MULTI_stack_switch(mcs.mc, __KERNEL_DS, thread->sp0); xen_mc_issue(PARAVIRT_LAZY_CPU); + tss->x86_tss.sp0 = thread->sp0; } static void xen_set_iopl_mask(unsigned mask) diff --git a/arch/x86/xen/p2m.c b/arch/x86/xen/p2m.c index 740ae3026a14..b47124d4cd67 100644 --- a/arch/x86/xen/p2m.c +++ b/arch/x86/xen/p2m.c @@ -91,6 +91,12 @@ EXPORT_SYMBOL_GPL(xen_p2m_size); unsigned long xen_max_p2m_pfn __read_mostly; EXPORT_SYMBOL_GPL(xen_max_p2m_pfn); +#ifdef CONFIG_XEN_BALLOON_MEMORY_HOTPLUG_LIMIT +#define P2M_LIMIT CONFIG_XEN_BALLOON_MEMORY_HOTPLUG_LIMIT +#else +#define P2M_LIMIT 0 +#endif + static DEFINE_SPINLOCK(p2m_update_lock); static unsigned long *p2m_mid_missing_mfn; @@ -385,9 +391,11 @@ static void __init xen_rebuild_p2m_list(unsigned long *p2m) void __init xen_vmalloc_p2m_tree(void) { static struct vm_struct vm; + unsigned long p2m_limit; + p2m_limit = (phys_addr_t)P2M_LIMIT * 1024 * 1024 * 1024 / PAGE_SIZE; vm.flags = VM_ALLOC; - vm.size = ALIGN(sizeof(unsigned long) * xen_max_p2m_pfn, + vm.size = ALIGN(sizeof(unsigned long) * max(xen_max_p2m_pfn, p2m_limit), PMD_SIZE * PMDS_PER_MID_PAGE); vm_area_register_early(&vm, PMD_SIZE * PMDS_PER_MID_PAGE); pr_notice("p2m virtual area at %p, size is %lx\n", vm.addr, vm.size); @@ -563,7 +571,7 @@ static bool alloc_p2m(unsigned long pfn) if (p2m_pfn == PFN_DOWN(__pa(p2m_missing))) p2m_init(p2m); else - p2m_init_identity(p2m, pfn); + p2m_init_identity(p2m, pfn & ~(P2M_PER_PAGE - 1)); spin_lock_irqsave(&p2m_update_lock, flags); diff --git a/arch/x86/xen/smp.c b/arch/x86/xen/smp.c index 08e8489c47f1..7413ee3706d0 100644 --- a/arch/x86/xen/smp.c +++ b/arch/x86/xen/smp.c @@ -445,15 +445,7 @@ static int xen_cpu_up(unsigned int cpu, struct task_struct *idle) { int rc; - per_cpu(current_task, cpu) = idle; -#ifdef CONFIG_X86_32 - irq_ctx_init(cpu); -#else - clear_tsk_thread_flag(idle, TIF_FORK); -#endif - per_cpu(kernel_stack, cpu) = - (unsigned long)task_stack_page(idle) - - KERNEL_STACK_OFFSET + THREAD_SIZE; + common_cpu_up(cpu, idle); xen_setup_runstate_info(cpu); xen_setup_timer(cpu); @@ -468,10 +460,6 @@ static int xen_cpu_up(unsigned int cpu, struct task_struct *idle) if (rc) return rc; - if (num_online_cpus() == 1) - /* Just in case we booted with a single CPU. */ - alternatives_enable_smp(); - rc = xen_smp_intr_init(cpu); if (rc) return rc; diff --git a/arch/x86/xen/suspend.c b/arch/x86/xen/suspend.c index c4df9dbd63b7..d9497698645a 100644 --- a/arch/x86/xen/suspend.c +++ b/arch/x86/xen/suspend.c @@ -1,5 +1,5 @@ #include <linux/types.h> -#include <linux/clockchips.h> +#include <linux/tick.h> #include <xen/interface/xen.h> #include <xen/grant_table.h> @@ -81,17 +81,14 @@ void xen_arch_post_suspend(int cancelled) static void xen_vcpu_notify_restore(void *data) { - unsigned long reason = (unsigned long)data; - /* Boot processor notified via generic timekeeping_resume() */ - if ( smp_processor_id() == 0) + if (smp_processor_id() == 0) return; - clockevents_notify(reason, NULL); + tick_resume_local(); } void xen_arch_resume(void) { - on_each_cpu(xen_vcpu_notify_restore, - (void *)CLOCK_EVT_NOTIFY_RESUME, 1); + on_each_cpu(xen_vcpu_notify_restore, NULL, 1); } diff --git a/arch/x86/xen/xen-asm_64.S b/arch/x86/xen/xen-asm_64.S index 53adefda4275..985fc3ee0973 100644 --- a/arch/x86/xen/xen-asm_64.S +++ b/arch/x86/xen/xen-asm_64.S @@ -68,11 +68,11 @@ ENTRY(xen_sysret64) * We're already on the usermode stack at this point, but * still with the kernel gs, so we can easily switch back */ - movq %rsp, PER_CPU_VAR(old_rsp) + movq %rsp, PER_CPU_VAR(rsp_scratch) movq PER_CPU_VAR(kernel_stack), %rsp pushq $__USER_DS - pushq PER_CPU_VAR(old_rsp) + pushq PER_CPU_VAR(rsp_scratch) pushq %r11 pushq $__USER_CS pushq %rcx @@ -87,11 +87,11 @@ ENTRY(xen_sysret32) * We're already on the usermode stack at this point, but * still with the kernel gs, so we can easily switch back */ - movq %rsp, PER_CPU_VAR(old_rsp) + movq %rsp, PER_CPU_VAR(rsp_scratch) movq PER_CPU_VAR(kernel_stack), %rsp pushq $__USER32_DS - pushq PER_CPU_VAR(old_rsp) + pushq PER_CPU_VAR(rsp_scratch) pushq %r11 pushq $__USER32_CS pushq %rcx diff --git a/block/blk-merge.c b/block/blk-merge.c index fc1ff3b1ea1f..fd3fee81c23c 100644 --- a/block/blk-merge.c +++ b/block/blk-merge.c @@ -592,7 +592,7 @@ bool blk_rq_merge_ok(struct request *rq, struct bio *bio) if (q->queue_flags & (1 << QUEUE_FLAG_SG_GAPS)) { struct bio_vec *bprev; - bprev = &rq->biotail->bi_io_vec[bio->bi_vcnt - 1]; + bprev = &rq->biotail->bi_io_vec[rq->biotail->bi_vcnt - 1]; if (bvec_gap_to_prev(bprev, bio->bi_io_vec[0].bv_offset)) return false; } diff --git a/block/blk-mq-tag.c b/block/blk-mq-tag.c index d53a764b05ea..be3290cc0644 100644 --- a/block/blk-mq-tag.c +++ b/block/blk-mq-tag.c @@ -278,9 +278,11 @@ static int bt_get(struct blk_mq_alloc_data *data, /* * We're out of tags on this hardware queue, kick any * pending IO submits before going to sleep waiting for - * some to complete. + * some to complete. Note that hctx can be NULL here for + * reserved tag allocation. */ - blk_mq_run_hw_queue(hctx, false); + if (hctx) + blk_mq_run_hw_queue(hctx, false); /* * Retry tag allocation after running the hardware queue, diff --git a/block/blk-mq.c b/block/blk-mq.c index 4f4bea21052e..33c428530193 100644 --- a/block/blk-mq.c +++ b/block/blk-mq.c @@ -1457,7 +1457,7 @@ static struct blk_mq_tags *blk_mq_init_rq_map(struct blk_mq_tag_set *set, do { page = alloc_pages_node(set->numa_node, - GFP_KERNEL | __GFP_NOWARN | __GFP_NORETRY, + GFP_KERNEL | __GFP_NOWARN | __GFP_NORETRY | __GFP_ZERO, this_order); if (page) break; @@ -1479,8 +1479,6 @@ static struct blk_mq_tags *blk_mq_init_rq_map(struct blk_mq_tag_set *set, left -= to_do * rq_size; for (j = 0; j < to_do; j++) { tags->rqs[i] = p; - tags->rqs[i]->atomic_flags = 0; - tags->rqs[i]->cmd_flags = 0; if (set->ops->init_request) { if (set->ops->init_request(set->driver_data, tags->rqs[i], hctx_idx, i, @@ -1938,7 +1936,7 @@ struct request_queue *blk_mq_init_queue(struct blk_mq_tag_set *set) */ if (percpu_ref_init(&q->mq_usage_counter, blk_mq_usage_counter_release, PERCPU_REF_INIT_ATOMIC, GFP_KERNEL)) - goto err_map; + goto err_mq_usage; setup_timer(&q->timeout, blk_mq_rq_timer, (unsigned long) q); blk_queue_rq_timeout(q, 30000); @@ -1981,7 +1979,7 @@ struct request_queue *blk_mq_init_queue(struct blk_mq_tag_set *set) blk_mq_init_cpu_queues(q, set->nr_hw_queues); if (blk_mq_init_hw_queues(q, set)) - goto err_hw; + goto err_mq_usage; mutex_lock(&all_q_mutex); list_add_tail(&q->all_q_node, &all_q_list); @@ -1993,7 +1991,7 @@ struct request_queue *blk_mq_init_queue(struct blk_mq_tag_set *set) return q; -err_hw: +err_mq_usage: blk_cleanup_queue(q); err_hctxs: kfree(map); diff --git a/block/blk-settings.c b/block/blk-settings.c index 6ed2cbe5e8c9..12600bfffca9 100644 --- a/block/blk-settings.c +++ b/block/blk-settings.c @@ -585,7 +585,7 @@ int blk_stack_limits(struct queue_limits *t, struct queue_limits *b, b->physical_block_size); t->io_min = max(t->io_min, b->io_min); - t->io_opt = lcm(t->io_opt, b->io_opt); + t->io_opt = lcm_not_zero(t->io_opt, b->io_opt); t->cluster &= b->cluster; t->discard_zeroes_data &= b->discard_zeroes_data; @@ -616,7 +616,7 @@ int blk_stack_limits(struct queue_limits *t, struct queue_limits *b, b->raid_partial_stripes_expensive); /* Find lowest common alignment_offset */ - t->alignment_offset = lcm(t->alignment_offset, alignment) + t->alignment_offset = lcm_not_zero(t->alignment_offset, alignment) % max(t->physical_block_size, t->io_min); /* Verify that new alignment_offset is on a logical block boundary */ @@ -643,7 +643,7 @@ int blk_stack_limits(struct queue_limits *t, struct queue_limits *b, b->max_discard_sectors); t->discard_granularity = max(t->discard_granularity, b->discard_granularity); - t->discard_alignment = lcm(t->discard_alignment, alignment) % + t->discard_alignment = lcm_not_zero(t->discard_alignment, alignment) % t->discard_granularity; } diff --git a/drivers/acpi/acpi_lpss.c b/drivers/acpi/acpi_lpss.c index 657964e8ab7e..37fb19047603 100644 --- a/drivers/acpi/acpi_lpss.c +++ b/drivers/acpi/acpi_lpss.c @@ -65,6 +65,7 @@ struct lpss_private_data; struct lpss_device_desc { unsigned int flags; + const char *clk_con_id; unsigned int prv_offset; size_t prv_size_override; void (*setup)(struct lpss_private_data *pdata); @@ -140,6 +141,7 @@ static struct lpss_device_desc lpt_i2c_dev_desc = { static struct lpss_device_desc lpt_uart_dev_desc = { .flags = LPSS_CLK | LPSS_CLK_GATE | LPSS_CLK_DIVIDER | LPSS_LTR, + .clk_con_id = "baudclk", .prv_offset = 0x800, .setup = lpss_uart_setup, }; @@ -156,6 +158,7 @@ static struct lpss_device_desc byt_pwm_dev_desc = { static struct lpss_device_desc byt_uart_dev_desc = { .flags = LPSS_CLK | LPSS_CLK_GATE | LPSS_CLK_DIVIDER | LPSS_SAVE_CTX, + .clk_con_id = "baudclk", .prv_offset = 0x800, .setup = lpss_uart_setup, }; @@ -313,7 +316,7 @@ out: return PTR_ERR(clk); pdata->clk = clk; - clk_register_clkdev(clk, NULL, devname); + clk_register_clkdev(clk, dev_desc->clk_con_id, devname); return 0; } diff --git a/drivers/acpi/acpi_pad.c b/drivers/acpi/acpi_pad.c index c7b105c0e1d3..6bc9cbc01ad6 100644 --- a/drivers/acpi/acpi_pad.c +++ b/drivers/acpi/acpi_pad.c @@ -26,7 +26,7 @@ #include <linux/kthread.h> #include <linux/freezer.h> #include <linux/cpu.h> -#include <linux/clockchips.h> +#include <linux/tick.h> #include <linux/slab.h> #include <linux/acpi.h> #include <asm/mwait.h> @@ -41,8 +41,6 @@ static unsigned long power_saving_mwait_eax; static unsigned char tsc_detected_unstable; static unsigned char tsc_marked_unstable; -static unsigned char lapic_detected_unstable; -static unsigned char lapic_marked_unstable; static void power_saving_mwait_init(void) { @@ -82,13 +80,10 @@ static void power_saving_mwait_init(void) */ if (!boot_cpu_has(X86_FEATURE_NONSTOP_TSC)) tsc_detected_unstable = 1; - if (!boot_cpu_has(X86_FEATURE_ARAT)) - lapic_detected_unstable = 1; break; default: - /* TSC & LAPIC could halt in idle */ + /* TSC could halt in idle */ tsc_detected_unstable = 1; - lapic_detected_unstable = 1; } #endif } @@ -155,7 +150,6 @@ static int power_saving_thread(void *data) sched_setscheduler(current, SCHED_RR, ¶m); while (!kthread_should_stop()) { - int cpu; unsigned long expire_time; try_to_freeze(); @@ -177,28 +171,15 @@ static int power_saving_thread(void *data) mark_tsc_unstable("TSC halts in idle"); tsc_marked_unstable = 1; } - if (lapic_detected_unstable && !lapic_marked_unstable) { - int i; - /* LAPIC could halt in idle, so notify users */ - for_each_online_cpu(i) - clockevents_notify( - CLOCK_EVT_NOTIFY_BROADCAST_ON, - &i); - lapic_marked_unstable = 1; - } local_irq_disable(); - cpu = smp_processor_id(); - if (lapic_marked_unstable) - clockevents_notify( - CLOCK_EVT_NOTIFY_BROADCAST_ENTER, &cpu); + tick_broadcast_enable(); + tick_broadcast_enter(); stop_critical_timings(); mwait_idle_with_hints(power_saving_mwait_eax, 1); start_critical_timings(); - if (lapic_marked_unstable) - clockevents_notify( - CLOCK_EVT_NOTIFY_BROADCAST_EXIT, &cpu); + tick_broadcast_exit(); local_irq_enable(); if (time_before(expire_time, jiffies)) { diff --git a/drivers/acpi/pci_irq.c b/drivers/acpi/pci_irq.c index e7f718d6918a..b1def411c0b8 100644 --- a/drivers/acpi/pci_irq.c +++ b/drivers/acpi/pci_irq.c @@ -485,6 +485,14 @@ void acpi_pci_irq_disable(struct pci_dev *dev) if (!pin || !dev->irq_managed || dev->irq <= 0) return; + /* Keep IOAPIC pin configuration when suspending */ + if (dev->dev.power.is_prepared) + return; +#ifdef CONFIG_PM + if (dev->dev.power.runtime_status == RPM_SUSPENDING) + return; +#endif + entry = acpi_pci_irq_lookup(dev, pin); if (!entry) return; @@ -505,6 +513,5 @@ void acpi_pci_irq_disable(struct pci_dev *dev) if (gsi >= 0) { acpi_unregister_gsi(gsi); dev->irq_managed = 0; - dev->irq = 0; } } diff --git a/drivers/acpi/processor_idle.c b/drivers/acpi/processor_idle.c index c6bb9f1257c9..39e0c8e36244 100644 --- a/drivers/acpi/processor_idle.c +++ b/drivers/acpi/processor_idle.c @@ -32,7 +32,7 @@ #include <linux/acpi.h> #include <linux/dmi.h> #include <linux/sched.h> /* need_resched() */ -#include <linux/clockchips.h> +#include <linux/tick.h> #include <linux/cpuidle.h> #include <linux/syscore_ops.h> #include <acpi/processor.h> @@ -157,12 +157,11 @@ static void lapic_timer_check_state(int state, struct acpi_processor *pr, static void __lapic_timer_propagate_broadcast(void *arg) { struct acpi_processor *pr = (struct acpi_processor *) arg; - unsigned long reason; - reason = pr->power.timer_broadcast_on_state < INT_MAX ? - CLOCK_EVT_NOTIFY_BROADCAST_ON : CLOCK_EVT_NOTIFY_BROADCAST_OFF; - - clockevents_notify(reason, &pr->id); + if (pr->power.timer_broadcast_on_state < INT_MAX) + tick_broadcast_enable(); + else + tick_broadcast_disable(); } static void lapic_timer_propagate_broadcast(struct acpi_processor *pr) @@ -179,11 +178,10 @@ static void lapic_timer_state_broadcast(struct acpi_processor *pr, int state = cx - pr->power.states; if (state >= pr->power.timer_broadcast_on_state) { - unsigned long reason; - - reason = broadcast ? CLOCK_EVT_NOTIFY_BROADCAST_ENTER : - CLOCK_EVT_NOTIFY_BROADCAST_EXIT; - clockevents_notify(reason, &pr->id); + if (broadcast) + tick_broadcast_enter(); + else + tick_broadcast_exit(); } } @@ -922,7 +920,7 @@ static int acpi_processor_setup_cpuidle_states(struct acpi_processor *pr) return -EINVAL; drv->safe_state_index = -1; - for (i = 0; i < CPUIDLE_STATE_MAX; i++) { + for (i = CPUIDLE_DRIVER_STATE_START; i < CPUIDLE_STATE_MAX; i++) { drv->states[i].name[0] = '\0'; drv->states[i].desc[0] = '\0'; } diff --git a/drivers/acpi/resource.c b/drivers/acpi/resource.c index c723668e3e27..5589a6e2a023 100644 --- a/drivers/acpi/resource.c +++ b/drivers/acpi/resource.c @@ -42,8 +42,10 @@ static bool acpi_dev_resource_len_valid(u64 start, u64 end, u64 len, bool io) * CHECKME: len might be required to check versus a minimum * length as well. 1 for io is fine, but for memory it does * not make any sense at all. + * Note: some BIOSes report incorrect length for ACPI address space + * descriptor, so remove check of 'reslen == len' to avoid regression. */ - if (len && reslen && reslen == len && start <= end) + if (len && reslen && start <= end) return true; pr_debug("ACPI: invalid or unassigned resource %s [%016llx - %016llx] length [%016llx]\n", diff --git a/drivers/acpi/video.c b/drivers/acpi/video.c index debd30917010..26eb70c8f518 100644 --- a/drivers/acpi/video.c +++ b/drivers/acpi/video.c @@ -2110,7 +2110,8 @@ static int __init intel_opregion_present(void) int acpi_video_register(void) { - int result = 0; + int ret; + if (register_count) { /* * if the function of acpi_video_register is already called, @@ -2122,9 +2123,9 @@ int acpi_video_register(void) mutex_init(&video_list_lock); INIT_LIST_HEAD(&video_bus_head); - result = acpi_bus_register_driver(&acpi_video_bus); - if (result < 0) - return -ENODEV; + ret = acpi_bus_register_driver(&acpi_video_bus); + if (ret) + return ret; /* * When the acpi_video_bus is loaded successfully, increase @@ -2176,6 +2177,17 @@ EXPORT_SYMBOL(acpi_video_unregister_backlight); static int __init acpi_video_init(void) { + /* + * Let the module load even if ACPI is disabled (e.g. due to + * a broken BIOS) so that i915.ko can still be loaded on such + * old systems without an AcpiOpRegion. + * + * acpi_video_register() will report -ENODEV later as well due + * to acpi_disabled when i915.ko tries to register itself afterwards. + */ + if (acpi_disabled) + return 0; + dmi_check_system(video_dmi_table); if (intel_opregion_present()) diff --git a/drivers/android/binder.c b/drivers/android/binder.c index 33b09b6568a4..6607f3c6ace1 100644 --- a/drivers/android/binder.c +++ b/drivers/android/binder.c @@ -551,7 +551,6 @@ static int binder_update_page_range(struct binder_proc *proc, int allocate, { void *page_addr; unsigned long user_page_addr; - struct vm_struct tmp_area; struct page **page; struct mm_struct *mm; @@ -600,10 +599,11 @@ static int binder_update_page_range(struct binder_proc *proc, int allocate, proc->pid, page_addr); goto err_alloc_page_failed; } - tmp_area.addr = page_addr; - tmp_area.size = PAGE_SIZE + PAGE_SIZE /* guard page? */; - ret = map_vm_area(&tmp_area, PAGE_KERNEL, page); - if (ret) { + ret = map_kernel_range_noflush((unsigned long)page_addr, + PAGE_SIZE, PAGE_KERNEL, page); + flush_cache_vmap((unsigned long)page_addr, + (unsigned long)page_addr + PAGE_SIZE); + if (ret != 1) { pr_err("%d: binder_alloc_buf failed to map page at %p in kernel\n", proc->pid, page_addr); goto err_map_kernel_failed; diff --git a/drivers/ata/libata-core.c b/drivers/ata/libata-core.c index 4c35f0822d06..23dac3babfe3 100644 --- a/drivers/ata/libata-core.c +++ b/drivers/ata/libata-core.c @@ -4204,9 +4204,18 @@ static const struct ata_blacklist_entry ata_device_blacklist [] = { { "PIONEER DVD-RW DVR-216D", NULL, ATA_HORKAGE_NOSETXFER }, /* devices that don't properly handle queued TRIM commands */ - { "Micron_M[56]*", NULL, ATA_HORKAGE_NO_NCQ_TRIM | + { "Micron_M500*", NULL, ATA_HORKAGE_NO_NCQ_TRIM | + ATA_HORKAGE_ZERO_AFTER_TRIM, }, + { "Crucial_CT*M500*", NULL, ATA_HORKAGE_NO_NCQ_TRIM | + ATA_HORKAGE_ZERO_AFTER_TRIM, }, + { "Micron_M5[15]0*", "MU01", ATA_HORKAGE_NO_NCQ_TRIM | + ATA_HORKAGE_ZERO_AFTER_TRIM, }, + { "Crucial_CT*M550*", "MU01", ATA_HORKAGE_NO_NCQ_TRIM | + ATA_HORKAGE_ZERO_AFTER_TRIM, }, + { "Crucial_CT*MX100*", "MU01", ATA_HORKAGE_NO_NCQ_TRIM | + ATA_HORKAGE_ZERO_AFTER_TRIM, }, + { "Samsung SSD 850 PRO*", NULL, ATA_HORKAGE_NO_NCQ_TRIM | ATA_HORKAGE_ZERO_AFTER_TRIM, }, - { "Crucial_CT*SSD*", NULL, ATA_HORKAGE_NO_NCQ_TRIM, }, /* * As defined, the DRAT (Deterministic Read After Trim) and RZAT @@ -4226,6 +4235,8 @@ static const struct ata_blacklist_entry ata_device_blacklist [] = { */ { "INTEL*SSDSC2MH*", NULL, 0, }, + { "Micron*", NULL, ATA_HORKAGE_ZERO_AFTER_TRIM, }, + { "Crucial*", NULL, ATA_HORKAGE_ZERO_AFTER_TRIM, }, { "INTEL*SSD*", NULL, ATA_HORKAGE_ZERO_AFTER_TRIM, }, { "SSD*INTEL*", NULL, ATA_HORKAGE_ZERO_AFTER_TRIM, }, { "Samsung*SSD*", NULL, ATA_HORKAGE_ZERO_AFTER_TRIM, }, @@ -4737,7 +4748,7 @@ struct ata_queued_cmd *ata_qc_new_init(struct ata_device *dev, int tag) return NULL; /* libsas case */ - if (!ap->scsi_host) { + if (ap->flags & ATA_FLAG_SAS_HOST) { tag = ata_sas_allocate_tag(ap); if (tag < 0) return NULL; @@ -4776,7 +4787,7 @@ void ata_qc_free(struct ata_queued_cmd *qc) tag = qc->tag; if (likely(ata_tag_valid(tag))) { qc->tag = ATA_TAG_POISON; - if (!ap->scsi_host) + if (ap->flags & ATA_FLAG_SAS_HOST) ata_sas_free_tag(tag, ap); } } diff --git a/drivers/ata/sata_fsl.c b/drivers/ata/sata_fsl.c index f9054cd36a72..5389579c5120 100644 --- a/drivers/ata/sata_fsl.c +++ b/drivers/ata/sata_fsl.c @@ -869,6 +869,8 @@ try_offline_again: */ ata_msleep(ap, 1); + sata_set_spd(link); + /* * Now, bring the host controller online again, this can take time * as PHY reset and communication establishment, 1st D2H FIS and diff --git a/drivers/base/power/domain.c b/drivers/base/power/domain.c index ba4abbe4693c..45937f88e77c 100644 --- a/drivers/base/power/domain.c +++ b/drivers/base/power/domain.c @@ -2242,7 +2242,7 @@ static void rtpm_status_str(struct seq_file *s, struct device *dev) } static int pm_genpd_summary_one(struct seq_file *s, - struct generic_pm_domain *gpd) + struct generic_pm_domain *genpd) { static const char * const status_lookup[] = { [GPD_STATE_ACTIVE] = "on", @@ -2256,26 +2256,26 @@ static int pm_genpd_summary_one(struct seq_file *s, struct gpd_link *link; int ret; - ret = mutex_lock_interruptible(&gpd->lock); + ret = mutex_lock_interruptible(&genpd->lock); if (ret) return -ERESTARTSYS; - if (WARN_ON(gpd->status >= ARRAY_SIZE(status_lookup))) + if (WARN_ON(genpd->status >= ARRAY_SIZE(status_lookup))) goto exit; - seq_printf(s, "%-30s %-15s ", gpd->name, status_lookup[gpd->status]); + seq_printf(s, "%-30s %-15s ", genpd->name, status_lookup[genpd->status]); /* * Modifications on the list require holding locks on both * master and slave, so we are safe. - * Also gpd->name is immutable. + * Also genpd->name is immutable. */ - list_for_each_entry(link, &gpd->master_links, master_node) { + list_for_each_entry(link, &genpd->master_links, master_node) { seq_printf(s, "%s", link->slave->name); - if (!list_is_last(&link->master_node, &gpd->master_links)) + if (!list_is_last(&link->master_node, &genpd->master_links)) seq_puts(s, ", "); } - list_for_each_entry(pm_data, &gpd->dev_list, list_node) { + list_for_each_entry(pm_data, &genpd->dev_list, list_node) { kobj_path = kobject_get_path(&pm_data->dev->kobj, GFP_KERNEL); if (kobj_path == NULL) continue; @@ -2287,14 +2287,14 @@ static int pm_genpd_summary_one(struct seq_file *s, seq_puts(s, "\n"); exit: - mutex_unlock(&gpd->lock); + mutex_unlock(&genpd->lock); return 0; } static int pm_genpd_summary_show(struct seq_file *s, void *data) { - struct generic_pm_domain *gpd; + struct generic_pm_domain *genpd; int ret = 0; seq_puts(s, " domain status slaves\n"); @@ -2305,8 +2305,8 @@ static int pm_genpd_summary_show(struct seq_file *s, void *data) if (ret) return -ERESTARTSYS; - list_for_each_entry(gpd, &gpd_list, gpd_list_node) { - ret = pm_genpd_summary_one(s, gpd); + list_for_each_entry(genpd, &gpd_list, gpd_list_node) { + ret = pm_genpd_summary_one(s, genpd); if (ret) break; } diff --git a/drivers/base/power/wakeup.c b/drivers/base/power/wakeup.c index c2744b30d5d9..aab7158d2afe 100644 --- a/drivers/base/power/wakeup.c +++ b/drivers/base/power/wakeup.c @@ -730,6 +730,7 @@ void pm_system_wakeup(void) pm_abort_suspend = true; freeze_wake(); } +EXPORT_SYMBOL_GPL(pm_system_wakeup); void pm_wakeup_clear(void) { diff --git a/drivers/base/regmap/internal.h b/drivers/base/regmap/internal.h index beb8b27d4621..a13587b5c2be 100644 --- a/drivers/base/regmap/internal.h +++ b/drivers/base/regmap/internal.h @@ -243,4 +243,12 @@ extern struct regcache_ops regcache_rbtree_ops; extern struct regcache_ops regcache_lzo_ops; extern struct regcache_ops regcache_flat_ops; +static inline const char *regmap_name(const struct regmap *map) +{ + if (map->dev) + return dev_name(map->dev); + + return map->name; +} + #endif diff --git a/drivers/base/regmap/regcache-rbtree.c b/drivers/base/regmap/regcache-rbtree.c index d453a2c98ad0..81751a49d8bf 100644 --- a/drivers/base/regmap/regcache-rbtree.c +++ b/drivers/base/regmap/regcache-rbtree.c @@ -307,7 +307,7 @@ static int regcache_rbtree_insert_to_block(struct regmap *map, if (pos == 0) { memmove(blk + offset * map->cache_word_size, blk, rbnode->blklen * map->cache_word_size); - bitmap_shift_right(present, present, offset, blklen); + bitmap_shift_left(present, present, offset, blklen); } /* update the rbnode block, its size and the base register */ diff --git a/drivers/base/regmap/regcache.c b/drivers/base/regmap/regcache.c index f373c35f9e1d..87db9893b463 100644 --- a/drivers/base/regmap/regcache.c +++ b/drivers/base/regmap/regcache.c @@ -218,7 +218,7 @@ int regcache_read(struct regmap *map, ret = map->cache_ops->read(map, reg, value); if (ret == 0) - trace_regmap_reg_read_cache(map->dev, reg, *value); + trace_regmap_reg_read_cache(map, reg, *value); return ret; } @@ -311,7 +311,7 @@ int regcache_sync(struct regmap *map) dev_dbg(map->dev, "Syncing %s cache\n", map->cache_ops->name); name = map->cache_ops->name; - trace_regcache_sync(map->dev, name, "start"); + trace_regcache_sync(map, name, "start"); if (!map->cache_dirty) goto out; @@ -346,7 +346,7 @@ out: regmap_async_complete(map); - trace_regcache_sync(map->dev, name, "stop"); + trace_regcache_sync(map, name, "stop"); return ret; } @@ -381,7 +381,7 @@ int regcache_sync_region(struct regmap *map, unsigned int min, name = map->cache_ops->name; dev_dbg(map->dev, "Syncing %s cache from %d-%d\n", name, min, max); - trace_regcache_sync(map->dev, name, "start region"); + trace_regcache_sync(map, name, "start region"); if (!map->cache_dirty) goto out; @@ -401,7 +401,7 @@ out: regmap_async_complete(map); - trace_regcache_sync(map->dev, name, "stop region"); + trace_regcache_sync(map, name, "stop region"); return ret; } @@ -428,7 +428,7 @@ int regcache_drop_region(struct regmap *map, unsigned int min, map->lock(map->lock_arg); - trace_regcache_drop_region(map->dev, min, max); + trace_regcache_drop_region(map, min, max); ret = map->cache_ops->drop(map, min, max); @@ -455,7 +455,7 @@ void regcache_cache_only(struct regmap *map, bool enable) map->lock(map->lock_arg); WARN_ON(map->cache_bypass && enable); map->cache_only = enable; - trace_regmap_cache_only(map->dev, enable); + trace_regmap_cache_only(map, enable); map->unlock(map->lock_arg); } EXPORT_SYMBOL_GPL(regcache_cache_only); @@ -493,7 +493,7 @@ void regcache_cache_bypass(struct regmap *map, bool enable) map->lock(map->lock_arg); WARN_ON(map->cache_only && enable); map->cache_bypass = enable; - trace_regmap_cache_bypass(map->dev, enable); + trace_regmap_cache_bypass(map, enable); map->unlock(map->lock_arg); } EXPORT_SYMBOL_GPL(regcache_cache_bypass); @@ -608,7 +608,8 @@ static int regcache_sync_block_single(struct regmap *map, void *block, for (i = start; i < end; i++) { regtmp = block_base + (i * map->reg_stride); - if (!regcache_reg_present(cache_present, i)) + if (!regcache_reg_present(cache_present, i) || + !regmap_writeable(map, regtmp)) continue; val = regcache_get_val(map, block, i); @@ -677,7 +678,8 @@ static int regcache_sync_block_raw(struct regmap *map, void *block, for (i = start; i < end; i++) { regtmp = block_base + (i * map->reg_stride); - if (!regcache_reg_present(cache_present, i)) { + if (!regcache_reg_present(cache_present, i) || + !regmap_writeable(map, regtmp)) { ret = regcache_sync_block_raw_flush(map, &data, base, regtmp); if (ret != 0) diff --git a/drivers/base/regmap/regmap-irq.c b/drivers/base/regmap/regmap-irq.c index 6299a50a5960..a6c3f75b4b01 100644 --- a/drivers/base/regmap/regmap-irq.c +++ b/drivers/base/regmap/regmap-irq.c @@ -499,7 +499,8 @@ int regmap_add_irq_chip(struct regmap *map, int irq, int irq_flags, goto err_alloc; } - ret = request_threaded_irq(irq, NULL, regmap_irq_thread, irq_flags, + ret = request_threaded_irq(irq, NULL, regmap_irq_thread, + irq_flags | IRQF_ONESHOT, chip->name, d); if (ret != 0) { dev_err(map->dev, "Failed to request IRQ %d for %s: %d\n", diff --git a/drivers/base/regmap/regmap.c b/drivers/base/regmap/regmap.c index f99b098ddabf..dbfe6a69c3da 100644 --- a/drivers/base/regmap/regmap.c +++ b/drivers/base/regmap/regmap.c @@ -1281,7 +1281,7 @@ int _regmap_raw_write(struct regmap *map, unsigned int reg, if (map->async && map->bus->async_write) { struct regmap_async *async; - trace_regmap_async_write_start(map->dev, reg, val_len); + trace_regmap_async_write_start(map, reg, val_len); spin_lock_irqsave(&map->async_lock, flags); async = list_first_entry_or_null(&map->async_free, @@ -1339,8 +1339,7 @@ int _regmap_raw_write(struct regmap *map, unsigned int reg, return ret; } - trace_regmap_hw_write_start(map->dev, reg, - val_len / map->format.val_bytes); + trace_regmap_hw_write_start(map, reg, val_len / map->format.val_bytes); /* If we're doing a single register write we can probably just * send the work_buf directly, otherwise try to do a gather @@ -1372,8 +1371,7 @@ int _regmap_raw_write(struct regmap *map, unsigned int reg, kfree(buf); } - trace_regmap_hw_write_done(map->dev, reg, - val_len / map->format.val_bytes); + trace_regmap_hw_write_done(map, reg, val_len / map->format.val_bytes); return ret; } @@ -1407,12 +1405,12 @@ static int _regmap_bus_formatted_write(void *context, unsigned int reg, map->format.format_write(map, reg, val); - trace_regmap_hw_write_start(map->dev, reg, 1); + trace_regmap_hw_write_start(map, reg, 1); ret = map->bus->write(map->bus_context, map->work_buf, map->format.buf_size); - trace_regmap_hw_write_done(map->dev, reg, 1); + trace_regmap_hw_write_done(map, reg, 1); return ret; } @@ -1470,7 +1468,7 @@ int _regmap_write(struct regmap *map, unsigned int reg, dev_info(map->dev, "%x <= %x\n", reg, val); #endif - trace_regmap_reg_write(map->dev, reg, val); + trace_regmap_reg_write(map, reg, val); return map->reg_write(context, reg, val); } @@ -1773,7 +1771,7 @@ static int _regmap_raw_multi_reg_write(struct regmap *map, for (i = 0; i < num_regs; i++) { int reg = regs[i].reg; int val = regs[i].def; - trace_regmap_hw_write_start(map->dev, reg, 1); + trace_regmap_hw_write_start(map, reg, 1); map->format.format_reg(u8, reg, map->reg_shift); u8 += reg_bytes + pad_bytes; map->format.format_val(u8, val, 0); @@ -1788,7 +1786,7 @@ static int _regmap_raw_multi_reg_write(struct regmap *map, for (i = 0; i < num_regs; i++) { int reg = regs[i].reg; - trace_regmap_hw_write_done(map->dev, reg, 1); + trace_regmap_hw_write_done(map, reg, 1); } return ret; } @@ -2059,15 +2057,13 @@ static int _regmap_raw_read(struct regmap *map, unsigned int reg, void *val, */ u8[0] |= map->read_flag_mask; - trace_regmap_hw_read_start(map->dev, reg, - val_len / map->format.val_bytes); + trace_regmap_hw_read_start(map, reg, val_len / map->format.val_bytes); ret = map->bus->read(map->bus_context, map->work_buf, map->format.reg_bytes + map->format.pad_bytes, val, val_len); - trace_regmap_hw_read_done(map->dev, reg, - val_len / map->format.val_bytes); + trace_regmap_hw_read_done(map, reg, val_len / map->format.val_bytes); return ret; } @@ -2123,7 +2119,7 @@ static int _regmap_read(struct regmap *map, unsigned int reg, dev_info(map->dev, "%x => %x\n", reg, *val); #endif - trace_regmap_reg_read(map->dev, reg, *val); + trace_regmap_reg_read(map, reg, *val); if (!map->cache_bypass) regcache_write(map, reg, *val); @@ -2480,7 +2476,7 @@ void regmap_async_complete_cb(struct regmap_async *async, int ret) struct regmap *map = async->map; bool wake; - trace_regmap_async_io_complete(map->dev); + trace_regmap_async_io_complete(map); spin_lock(&map->async_lock); list_move(&async->list, &map->async_free); @@ -2525,7 +2521,7 @@ int regmap_async_complete(struct regmap *map) if (!map->bus || !map->bus->async_write) return 0; - trace_regmap_async_complete_start(map->dev); + trace_regmap_async_complete_start(map); wait_event(map->async_waitq, regmap_async_is_done(map)); @@ -2534,7 +2530,7 @@ int regmap_async_complete(struct regmap *map) map->async_ret = 0; spin_unlock_irqrestore(&map->async_lock, flags); - trace_regmap_async_complete_done(map->dev); + trace_regmap_async_complete_done(map); return ret; } diff --git a/drivers/block/nbd.c b/drivers/block/nbd.c index 4bc2a5cb9935..a98c41f72c63 100644 --- a/drivers/block/nbd.c +++ b/drivers/block/nbd.c @@ -803,10 +803,6 @@ static int __init nbd_init(void) return -EINVAL; } - nbd_dev = kcalloc(nbds_max, sizeof(*nbd_dev), GFP_KERNEL); - if (!nbd_dev) - return -ENOMEM; - part_shift = 0; if (max_part > 0) { part_shift = fls(max_part); @@ -828,6 +824,10 @@ static int __init nbd_init(void) if (nbds_max > 1UL << (MINORBITS - part_shift)) return -EINVAL; + nbd_dev = kcalloc(nbds_max, sizeof(*nbd_dev), GFP_KERNEL); + if (!nbd_dev) + return -ENOMEM; + for (i = 0; i < nbds_max; i++) { struct gendisk *disk = alloc_disk(1 << part_shift); if (!disk) diff --git a/drivers/block/nvme-core.c b/drivers/block/nvme-core.c index ceb32dd52a6c..e23be20a3417 100644 --- a/drivers/block/nvme-core.c +++ b/drivers/block/nvme-core.c @@ -3003,6 +3003,7 @@ static int nvme_probe(struct pci_dev *pdev, const struct pci_device_id *id) } get_device(dev->device); + INIT_LIST_HEAD(&dev->node); INIT_WORK(&dev->probe_work, nvme_async_probe); schedule_work(&dev->probe_work); return 0; diff --git a/drivers/bluetooth/btusb.c b/drivers/bluetooth/btusb.c index b87688881143..8bfc4c2bba87 100644 --- a/drivers/bluetooth/btusb.c +++ b/drivers/bluetooth/btusb.c @@ -272,6 +272,7 @@ static const struct usb_device_id blacklist_table[] = { { USB_DEVICE(0x1286, 0x2046), .driver_info = BTUSB_MARVELL }, /* Intel Bluetooth devices */ + { USB_DEVICE(0x8087, 0x07da), .driver_info = BTUSB_CSR }, { USB_DEVICE(0x8087, 0x07dc), .driver_info = BTUSB_INTEL }, { USB_DEVICE(0x8087, 0x0a2a), .driver_info = BTUSB_INTEL }, { USB_DEVICE(0x8087, 0x0a2b), .driver_info = BTUSB_INTEL_NEW }, diff --git a/drivers/char/ipmi/ipmi_powernv.c b/drivers/char/ipmi/ipmi_powernv.c index 79524ed2a3cb..8753b0f6a317 100644 --- a/drivers/char/ipmi/ipmi_powernv.c +++ b/drivers/char/ipmi/ipmi_powernv.c @@ -125,6 +125,7 @@ static int ipmi_powernv_recv(struct ipmi_smi_powernv *smi) spin_lock_irqsave(&smi->msg_lock, flags); if (!smi->cur_msg) { + spin_unlock_irqrestore(&smi->msg_lock, flags); pr_warn("no current message?\n"); return 0; } diff --git a/drivers/char/ipmi/ipmi_si_intf.c b/drivers/char/ipmi/ipmi_si_intf.c index f6646ed3047e..518585c1ce94 100644 --- a/drivers/char/ipmi/ipmi_si_intf.c +++ b/drivers/char/ipmi/ipmi_si_intf.c @@ -263,6 +263,11 @@ struct smi_info { bool supports_event_msg_buff; /* + * Can we clear the global enables receive irq bit? + */ + bool cannot_clear_recv_irq_bit; + + /* * Did we get an attention that we did not handle? */ bool got_attn; @@ -461,6 +466,9 @@ static void smi_mod_timer(struct smi_info *smi_info, unsigned long new_val) * allocate messages, we just leave them in the BMC and run the system * polled until we can allocate some memory. Once we have some * memory, we will re-enable the interrupt. + * + * Note that we cannot just use disable_irq(), since the interrupt may + * be shared. */ static inline bool disable_si_irq(struct smi_info *smi_info) { @@ -549,20 +557,15 @@ static u8 current_global_enables(struct smi_info *smi_info, u8 base, if (smi_info->supports_event_msg_buff) enables |= IPMI_BMC_EVT_MSG_BUFF; - else - enables &= ~IPMI_BMC_EVT_MSG_BUFF; - if (smi_info->irq && !smi_info->interrupt_disabled) + if ((smi_info->irq && !smi_info->interrupt_disabled) || + smi_info->cannot_clear_recv_irq_bit) enables |= IPMI_BMC_RCV_MSG_INTR; - else - enables &= ~IPMI_BMC_RCV_MSG_INTR; if (smi_info->supports_event_msg_buff && smi_info->irq && !smi_info->interrupt_disabled) enables |= IPMI_BMC_EVT_MSG_INTR; - else - enables &= ~IPMI_BMC_EVT_MSG_INTR; *irq_on = enables & (IPMI_BMC_EVT_MSG_INTR | IPMI_BMC_RCV_MSG_INTR); @@ -2900,6 +2903,96 @@ static int try_get_dev_id(struct smi_info *smi_info) return rv; } +/* + * Some BMCs do not support clearing the receive irq bit in the global + * enables (even if they don't support interrupts on the BMC). Check + * for this and handle it properly. + */ +static void check_clr_rcv_irq(struct smi_info *smi_info) +{ + unsigned char msg[3]; + unsigned char *resp; + unsigned long resp_len; + int rv; + + resp = kmalloc(IPMI_MAX_MSG_LENGTH, GFP_KERNEL); + if (!resp) { + printk(KERN_WARNING PFX "Out of memory allocating response for" + " global enables command, cannot check recv irq bit" + " handling.\n"); + return; + } + + msg[0] = IPMI_NETFN_APP_REQUEST << 2; + msg[1] = IPMI_GET_BMC_GLOBAL_ENABLES_CMD; + smi_info->handlers->start_transaction(smi_info->si_sm, msg, 2); + + rv = wait_for_msg_done(smi_info); + if (rv) { + printk(KERN_WARNING PFX "Error getting response from get" + " global enables command, cannot check recv irq bit" + " handling.\n"); + goto out; + } + + resp_len = smi_info->handlers->get_result(smi_info->si_sm, + resp, IPMI_MAX_MSG_LENGTH); + + if (resp_len < 4 || + resp[0] != (IPMI_NETFN_APP_REQUEST | 1) << 2 || + resp[1] != IPMI_GET_BMC_GLOBAL_ENABLES_CMD || + resp[2] != 0) { + printk(KERN_WARNING PFX "Invalid return from get global" + " enables command, cannot check recv irq bit" + " handling.\n"); + rv = -EINVAL; + goto out; + } + + if ((resp[3] & IPMI_BMC_RCV_MSG_INTR) == 0) + /* Already clear, should work ok. */ + goto out; + + msg[0] = IPMI_NETFN_APP_REQUEST << 2; + msg[1] = IPMI_SET_BMC_GLOBAL_ENABLES_CMD; + msg[2] = resp[3] & ~IPMI_BMC_RCV_MSG_INTR; + smi_info->handlers->start_transaction(smi_info->si_sm, msg, 3); + + rv = wait_for_msg_done(smi_info); + if (rv) { + printk(KERN_WARNING PFX "Error getting response from set" + " global enables command, cannot check recv irq bit" + " handling.\n"); + goto out; + } + + resp_len = smi_info->handlers->get_result(smi_info->si_sm, + resp, IPMI_MAX_MSG_LENGTH); + + if (resp_len < 3 || + resp[0] != (IPMI_NETFN_APP_REQUEST | 1) << 2 || + resp[1] != IPMI_SET_BMC_GLOBAL_ENABLES_CMD) { + printk(KERN_WARNING PFX "Invalid return from get global" + " enables command, cannot check recv irq bit" + " handling.\n"); + rv = -EINVAL; + goto out; + } + + if (resp[2] != 0) { + /* + * An error when setting the event buffer bit means + * clearing the bit is not supported. + */ + printk(KERN_WARNING PFX "The BMC does not support clearing" + " the recv irq bit, compensating, but the BMC needs to" + " be fixed.\n"); + smi_info->cannot_clear_recv_irq_bit = true; + } + out: + kfree(resp); +} + static int try_enable_event_buffer(struct smi_info *smi_info) { unsigned char msg[3]; @@ -3395,6 +3488,8 @@ static int try_smi_init(struct smi_info *new_smi) goto out_err; } + check_clr_rcv_irq(new_smi); + setup_oem_data_handler(new_smi); setup_xaction_handlers(new_smi); diff --git a/drivers/char/ipmi/ipmi_ssif.c b/drivers/char/ipmi/ipmi_ssif.c index f6e378dac5f5..f40e3bd2c69c 100644 --- a/drivers/char/ipmi/ipmi_ssif.c +++ b/drivers/char/ipmi/ipmi_ssif.c @@ -468,11 +468,13 @@ static int ipmi_ssif_thread(void *data) int result; /* Wait for something to do */ - wait_for_completion(&ssif_info->wake_thread); - init_completion(&ssif_info->wake_thread); - + result = wait_for_completion_interruptible( + &ssif_info->wake_thread); if (ssif_info->stopping) break; + if (result == -ERESTARTSYS) + continue; + init_completion(&ssif_info->wake_thread); if (ssif_info->i2c_read_write == I2C_SMBUS_WRITE) { result = i2c_smbus_write_block_data( diff --git a/drivers/char/tpm/tpm-chip.c b/drivers/char/tpm/tpm-chip.c index 1d278ccd751f..e096e9cddb40 100644 --- a/drivers/char/tpm/tpm-chip.c +++ b/drivers/char/tpm/tpm-chip.c @@ -140,24 +140,24 @@ static int tpm_dev_add_device(struct tpm_chip *chip) { int rc; - rc = device_add(&chip->dev); + rc = cdev_add(&chip->cdev, chip->dev.devt, 1); if (rc) { dev_err(&chip->dev, - "unable to device_register() %s, major %d, minor %d, err=%d\n", + "unable to cdev_add() %s, major %d, minor %d, err=%d\n", chip->devname, MAJOR(chip->dev.devt), MINOR(chip->dev.devt), rc); + device_unregister(&chip->dev); return rc; } - rc = cdev_add(&chip->cdev, chip->dev.devt, 1); + rc = device_add(&chip->dev); if (rc) { dev_err(&chip->dev, - "unable to cdev_add() %s, major %d, minor %d, err=%d\n", + "unable to device_register() %s, major %d, minor %d, err=%d\n", chip->devname, MAJOR(chip->dev.devt), MINOR(chip->dev.devt), rc); - device_unregister(&chip->dev); return rc; } @@ -174,27 +174,17 @@ static void tpm_dev_del_device(struct tpm_chip *chip) * tpm_chip_register() - create a character device for the TPM chip * @chip: TPM chip to use. * - * Creates a character device for the TPM chip and adds sysfs interfaces for - * the device, PPI and TCPA. As the last step this function adds the - * chip to the list of TPM chips available for use. + * Creates a character device for the TPM chip and adds sysfs attributes for + * the device. As the last step this function adds the chip to the list of TPM + * chips available for in-kernel use. * - * NOTE: This function should be only called after the chip initialization - * is complete. - * - * Called from tpm_<specific>.c probe function only for devices - * the driver has determined it should claim. Prior to calling - * this function the specific probe function has called pci_enable_device - * upon errant exit from this function specific probe function should call - * pci_disable_device + * This function should be only called after the chip initialization is + * complete. */ int tpm_chip_register(struct tpm_chip *chip) { int rc; - rc = tpm_dev_add_device(chip); - if (rc) - return rc; - /* Populate sysfs for TPM1 devices. */ if (!(chip->flags & TPM_CHIP_FLAG_TPM2)) { rc = tpm_sysfs_add_device(chip); @@ -208,6 +198,10 @@ int tpm_chip_register(struct tpm_chip *chip) chip->bios_dir = tpm_bios_log_setup(chip->devname); } + rc = tpm_dev_add_device(chip); + if (rc) + return rc; + /* Make the chip available. */ spin_lock(&driver_lock); list_add_rcu(&chip->list, &tpm_chip_list); diff --git a/drivers/char/tpm/tpm_ibmvtpm.c b/drivers/char/tpm/tpm_ibmvtpm.c index b1e53e3aece5..42ffa5e7a1e0 100644 --- a/drivers/char/tpm/tpm_ibmvtpm.c +++ b/drivers/char/tpm/tpm_ibmvtpm.c @@ -124,7 +124,7 @@ static int tpm_ibmvtpm_send(struct tpm_chip *chip, u8 *buf, size_t count) { struct ibmvtpm_dev *ibmvtpm; struct ibmvtpm_crq crq; - u64 *word = (u64 *) &crq; + __be64 *word = (__be64 *)&crq; int rc; ibmvtpm = (struct ibmvtpm_dev *)TPM_VPRIV(chip); @@ -145,11 +145,11 @@ static int tpm_ibmvtpm_send(struct tpm_chip *chip, u8 *buf, size_t count) memcpy((void *)ibmvtpm->rtce_buf, (void *)buf, count); crq.valid = (u8)IBMVTPM_VALID_CMD; crq.msg = (u8)VTPM_TPM_COMMAND; - crq.len = (u16)count; - crq.data = ibmvtpm->rtce_dma_handle; + crq.len = cpu_to_be16(count); + crq.data = cpu_to_be32(ibmvtpm->rtce_dma_handle); - rc = ibmvtpm_send_crq(ibmvtpm->vdev, cpu_to_be64(word[0]), - cpu_to_be64(word[1])); + rc = ibmvtpm_send_crq(ibmvtpm->vdev, be64_to_cpu(word[0]), + be64_to_cpu(word[1])); if (rc != H_SUCCESS) { dev_err(ibmvtpm->dev, "tpm_ibmvtpm_send failed rc=%d\n", rc); rc = 0; diff --git a/drivers/char/tpm/tpm_ibmvtpm.h b/drivers/char/tpm/tpm_ibmvtpm.h index f595f14426bf..6af92890518f 100644 --- a/drivers/char/tpm/tpm_ibmvtpm.h +++ b/drivers/char/tpm/tpm_ibmvtpm.h @@ -22,9 +22,9 @@ struct ibmvtpm_crq { u8 valid; u8 msg; - u16 len; - u32 data; - u64 reserved; + __be16 len; + __be32 data; + __be64 reserved; } __attribute__((packed, aligned(8))); struct ibmvtpm_crq_queue { diff --git a/drivers/char/virtio_console.c b/drivers/char/virtio_console.c index fae2dbbf5745..72d7028f779b 100644 --- a/drivers/char/virtio_console.c +++ b/drivers/char/virtio_console.c @@ -142,6 +142,7 @@ struct ports_device { * notification */ struct work_struct control_work; + struct work_struct config_work; struct list_head ports; @@ -1837,10 +1838,21 @@ static void config_intr(struct virtio_device *vdev) portdev = vdev->priv; + if (!use_multiport(portdev)) + schedule_work(&portdev->config_work); +} + +static void config_work_handler(struct work_struct *work) +{ + struct ports_device *portdev; + + portdev = container_of(work, struct ports_device, control_work); if (!use_multiport(portdev)) { + struct virtio_device *vdev; struct port *port; u16 rows, cols; + vdev = portdev->vdev; virtio_cread(vdev, struct virtio_console_config, cols, &cols); virtio_cread(vdev, struct virtio_console_config, rows, &rows); @@ -2040,12 +2052,14 @@ static int virtcons_probe(struct virtio_device *vdev) virtio_device_ready(portdev->vdev); + INIT_WORK(&portdev->config_work, &config_work_handler); + INIT_WORK(&portdev->control_work, &control_work_handler); + if (multiport) { unsigned int nr_added_bufs; spin_lock_init(&portdev->c_ivq_lock); spin_lock_init(&portdev->c_ovq_lock); - INIT_WORK(&portdev->control_work, &control_work_handler); nr_added_bufs = fill_queue(portdev->c_ivq, &portdev->c_ivq_lock); @@ -2113,6 +2127,8 @@ static void virtcons_remove(struct virtio_device *vdev) /* Finish up work that's lined up */ if (use_multiport(portdev)) cancel_work_sync(&portdev->control_work); + else + cancel_work_sync(&portdev->config_work); list_for_each_entry_safe(port, port2, &portdev->ports, list) unplug_port(port); @@ -2164,6 +2180,7 @@ static int virtcons_freeze(struct virtio_device *vdev) virtqueue_disable_cb(portdev->c_ivq); cancel_work_sync(&portdev->control_work); + cancel_work_sync(&portdev->config_work); /* * Once more: if control_work_handler() was running, it would * enable the cb as the last step. diff --git a/drivers/clk/at91/pmc.c b/drivers/clk/at91/pmc.c index f07c8152e5cc..3f27d21fb729 100644 --- a/drivers/clk/at91/pmc.c +++ b/drivers/clk/at91/pmc.c @@ -89,12 +89,29 @@ static int pmc_irq_set_type(struct irq_data *d, unsigned type) return 0; } +static void pmc_irq_suspend(struct irq_data *d) +{ + struct at91_pmc *pmc = irq_data_get_irq_chip_data(d); + + pmc->imr = pmc_read(pmc, AT91_PMC_IMR); + pmc_write(pmc, AT91_PMC_IDR, pmc->imr); +} + +static void pmc_irq_resume(struct irq_data *d) +{ + struct at91_pmc *pmc = irq_data_get_irq_chip_data(d); + + pmc_write(pmc, AT91_PMC_IER, pmc->imr); +} + static struct irq_chip pmc_irq = { .name = "PMC", .irq_disable = pmc_irq_mask, .irq_mask = pmc_irq_mask, .irq_unmask = pmc_irq_unmask, .irq_set_type = pmc_irq_set_type, + .irq_suspend = pmc_irq_suspend, + .irq_resume = pmc_irq_resume, }; static struct lock_class_key pmc_lock_class; @@ -224,7 +241,8 @@ static struct at91_pmc *__init at91_pmc_init(struct device_node *np, goto out_free_pmc; pmc_write(pmc, AT91_PMC_IDR, 0xffffffff); - if (request_irq(pmc->virq, pmc_irq_handler, IRQF_SHARED, "pmc", pmc)) + if (request_irq(pmc->virq, pmc_irq_handler, + IRQF_SHARED | IRQF_COND_SUSPEND, "pmc", pmc)) goto out_remove_irqdomain; return pmc; diff --git a/drivers/clk/at91/pmc.h b/drivers/clk/at91/pmc.h index 52d2041fa3f6..69abb08cf146 100644 --- a/drivers/clk/at91/pmc.h +++ b/drivers/clk/at91/pmc.h @@ -33,6 +33,7 @@ struct at91_pmc { spinlock_t lock; const struct at91_pmc_caps *caps; struct irq_domain *irqdomain; + u32 imr; }; static inline void pmc_lock(struct at91_pmc *pmc) diff --git a/drivers/clk/clk-divider.c b/drivers/clk/clk-divider.c index db7f8bce7467..25006a8bb8e6 100644 --- a/drivers/clk/clk-divider.c +++ b/drivers/clk/clk-divider.c @@ -144,12 +144,6 @@ static unsigned long clk_divider_recalc_rate(struct clk_hw *hw, divider->flags); } -/* - * The reverse of DIV_ROUND_UP: The maximum number which - * divided by m is r - */ -#define MULT_ROUND_UP(r, m) ((r) * (m) + (m) - 1) - static bool _is_valid_table_div(const struct clk_div_table *table, unsigned int div) { @@ -225,19 +219,24 @@ static int _div_round_closest(const struct clk_div_table *table, unsigned long parent_rate, unsigned long rate, unsigned long flags) { - int up, down, div; + int up, down; + unsigned long up_rate, down_rate; - up = down = div = DIV_ROUND_CLOSEST(parent_rate, rate); + up = DIV_ROUND_UP(parent_rate, rate); + down = parent_rate / rate; if (flags & CLK_DIVIDER_POWER_OF_TWO) { - up = __roundup_pow_of_two(div); - down = __rounddown_pow_of_two(div); + up = __roundup_pow_of_two(up); + down = __rounddown_pow_of_two(down); } else if (table) { - up = _round_up_table(table, div); - down = _round_down_table(table, div); + up = _round_up_table(table, up); + down = _round_down_table(table, down); } - return (up - div) <= (div - down) ? up : down; + up_rate = DIV_ROUND_UP(parent_rate, up); + down_rate = DIV_ROUND_UP(parent_rate, down); + + return (rate - up_rate) <= (down_rate - rate) ? up : down; } static int _div_round(const struct clk_div_table *table, @@ -313,7 +312,7 @@ static int clk_divider_bestdiv(struct clk_hw *hw, unsigned long rate, return i; } parent_rate = __clk_round_rate(__clk_get_parent(hw->clk), - MULT_ROUND_UP(rate, i)); + rate * i); now = DIV_ROUND_UP(parent_rate, i); if (_is_best_div(rate, now, best, flags)) { bestdiv = i; @@ -353,7 +352,7 @@ static long clk_divider_round_rate(struct clk_hw *hw, unsigned long rate, bestdiv = readl(divider->reg) >> divider->shift; bestdiv &= div_mask(divider->width); bestdiv = _get_div(divider->table, bestdiv, divider->flags); - return bestdiv; + return DIV_ROUND_UP(*prate, bestdiv); } return divider_round_rate(hw, rate, prate, divider->table, diff --git a/drivers/clk/clk.c b/drivers/clk/clk.c index eb0152961d3c..237f23f68bfc 100644 --- a/drivers/clk/clk.c +++ b/drivers/clk/clk.c @@ -1350,7 +1350,6 @@ static unsigned long clk_core_get_rate(struct clk_core *clk) return rate; } -EXPORT_SYMBOL_GPL(clk_core_get_rate); /** * clk_get_rate - return the rate of clk @@ -2171,6 +2170,32 @@ int clk_get_phase(struct clk *clk) } /** + * clk_is_match - check if two clk's point to the same hardware clock + * @p: clk compared against q + * @q: clk compared against p + * + * Returns true if the two struct clk pointers both point to the same hardware + * clock node. Put differently, returns true if struct clk *p and struct clk *q + * share the same struct clk_core object. + * + * Returns false otherwise. Note that two NULL clks are treated as matching. + */ +bool clk_is_match(const struct clk *p, const struct clk *q) +{ + /* trivial case: identical struct clk's or both NULL */ + if (p == q) + return true; + + /* true if clk->core pointers match. Avoid derefing garbage */ + if (!IS_ERR_OR_NULL(p) && !IS_ERR_OR_NULL(q)) + if (p->core == q->core) + return true; + + return false; +} +EXPORT_SYMBOL_GPL(clk_is_match); + +/** * __clk_init - initialize the data structures in a struct clk * @dev: device initializing this clk, placeholder for now * @clk: clk being initialized diff --git a/drivers/clk/qcom/gcc-msm8960.c b/drivers/clk/qcom/gcc-msm8960.c index b0b562b9ce0e..e60feffc10a1 100644 --- a/drivers/clk/qcom/gcc-msm8960.c +++ b/drivers/clk/qcom/gcc-msm8960.c @@ -48,6 +48,17 @@ static struct clk_pll pll3 = { }, }; +static struct clk_regmap pll4_vote = { + .enable_reg = 0x34c0, + .enable_mask = BIT(4), + .hw.init = &(struct clk_init_data){ + .name = "pll4_vote", + .parent_names = (const char *[]){ "pll4" }, + .num_parents = 1, + .ops = &clk_pll_vote_ops, + }, +}; + static struct clk_pll pll8 = { .l_reg = 0x3144, .m_reg = 0x3148, @@ -3023,6 +3034,7 @@ static struct clk_branch rpm_msg_ram_h_clk = { static struct clk_regmap *gcc_msm8960_clks[] = { [PLL3] = &pll3.clkr, + [PLL4_VOTE] = &pll4_vote, [PLL8] = &pll8.clkr, [PLL8_VOTE] = &pll8_vote, [PLL14] = &pll14.clkr, @@ -3247,6 +3259,7 @@ static const struct qcom_reset_map gcc_msm8960_resets[] = { static struct clk_regmap *gcc_apq8064_clks[] = { [PLL3] = &pll3.clkr, + [PLL4_VOTE] = &pll4_vote, [PLL8] = &pll8.clkr, [PLL8_VOTE] = &pll8_vote, [PLL14] = &pll14.clkr, diff --git a/drivers/clk/qcom/lcc-ipq806x.c b/drivers/clk/qcom/lcc-ipq806x.c index 121ffde25dc3..c9ff27b4648b 100644 --- a/drivers/clk/qcom/lcc-ipq806x.c +++ b/drivers/clk/qcom/lcc-ipq806x.c @@ -462,7 +462,6 @@ static struct platform_driver lcc_ipq806x_driver = { .remove = lcc_ipq806x_remove, .driver = { .name = "lcc-ipq806x", - .owner = THIS_MODULE, .of_match_table = lcc_ipq806x_match_table, }, }; diff --git a/drivers/clk/qcom/lcc-msm8960.c b/drivers/clk/qcom/lcc-msm8960.c index a75a408cfccd..e2c863295f00 100644 --- a/drivers/clk/qcom/lcc-msm8960.c +++ b/drivers/clk/qcom/lcc-msm8960.c @@ -417,8 +417,8 @@ static struct clk_rcg slimbus_src = { .mnctr_en_bit = 8, .mnctr_reset_bit = 7, .mnctr_mode_shift = 5, - .n_val_shift = 16, - .m_val_shift = 16, + .n_val_shift = 24, + .m_val_shift = 8, .width = 8, }, .p = { @@ -547,7 +547,7 @@ static int lcc_msm8960_probe(struct platform_device *pdev) return PTR_ERR(regmap); /* Use the correct frequency plan depending on speed of PLL4 */ - val = regmap_read(regmap, 0x4, &val); + regmap_read(regmap, 0x4, &val); if (val == 0x12) { slimbus_src.freq_tbl = clk_tbl_aif_osr_492; mi2s_osr_src.freq_tbl = clk_tbl_aif_osr_492; @@ -574,7 +574,6 @@ static struct platform_driver lcc_msm8960_driver = { .remove = lcc_msm8960_remove, .driver = { .name = "lcc-msm8960", - .owner = THIS_MODULE, .of_match_table = lcc_msm8960_match_table, }, }; diff --git a/drivers/clk/ti/fapll.c b/drivers/clk/ti/fapll.c index 6ef89639a9f6..d21640634adf 100644 --- a/drivers/clk/ti/fapll.c +++ b/drivers/clk/ti/fapll.c @@ -84,7 +84,7 @@ static int ti_fapll_enable(struct clk_hw *hw) struct fapll_data *fd = to_fapll(hw); u32 v = readl_relaxed(fd->base); - v |= (1 << FAPLL_MAIN_PLLEN); + v |= FAPLL_MAIN_PLLEN; writel_relaxed(v, fd->base); return 0; @@ -95,7 +95,7 @@ static void ti_fapll_disable(struct clk_hw *hw) struct fapll_data *fd = to_fapll(hw); u32 v = readl_relaxed(fd->base); - v &= ~(1 << FAPLL_MAIN_PLLEN); + v &= ~FAPLL_MAIN_PLLEN; writel_relaxed(v, fd->base); } @@ -104,7 +104,7 @@ static int ti_fapll_is_enabled(struct clk_hw *hw) struct fapll_data *fd = to_fapll(hw); u32 v = readl_relaxed(fd->base); - return v & (1 << FAPLL_MAIN_PLLEN); + return v & FAPLL_MAIN_PLLEN; } static unsigned long ti_fapll_recalc_rate(struct clk_hw *hw, diff --git a/drivers/clocksource/Kconfig b/drivers/clocksource/Kconfig index 68161f7a07d6..a0b036ccb118 100644 --- a/drivers/clocksource/Kconfig +++ b/drivers/clocksource/Kconfig @@ -192,6 +192,7 @@ config SYS_SUPPORTS_EM_STI config SH_TIMER_CMT bool "Renesas CMT timer driver" if COMPILE_TEST depends on GENERIC_CLOCKEVENTS + depends on HAS_IOMEM default SYS_SUPPORTS_SH_CMT help This enables build of a clocksource and clockevent driver for @@ -201,6 +202,7 @@ config SH_TIMER_CMT config SH_TIMER_MTU2 bool "Renesas MTU2 timer driver" if COMPILE_TEST depends on GENERIC_CLOCKEVENTS + depends on HAS_IOMEM default SYS_SUPPORTS_SH_MTU2 help This enables build of a clockevent driver for the Multi-Function @@ -210,6 +212,7 @@ config SH_TIMER_MTU2 config SH_TIMER_TMU bool "Renesas TMU timer driver" if COMPILE_TEST depends on GENERIC_CLOCKEVENTS + depends on HAS_IOMEM default SYS_SUPPORTS_SH_TMU help This enables build of a clocksource and clockevent driver for diff --git a/drivers/clocksource/arm_arch_timer.c b/drivers/clocksource/arm_arch_timer.c index a3025e7ae35f..266469691e58 100644 --- a/drivers/clocksource/arm_arch_timer.c +++ b/drivers/clocksource/arm_arch_timer.c @@ -661,17 +661,17 @@ static const struct of_device_id arch_timer_mem_of_match[] __initconst = { }; static bool __init -arch_timer_probed(int type, const struct of_device_id *matches) +arch_timer_needs_probing(int type, const struct of_device_id *matches) { struct device_node *dn; - bool probed = true; + bool needs_probing = false; dn = of_find_matching_node(NULL, matches); if (dn && of_device_is_available(dn) && !(arch_timers_present & type)) - probed = false; + needs_probing = true; of_node_put(dn); - return probed; + return needs_probing; } static void __init arch_timer_common_init(void) @@ -680,9 +680,9 @@ static void __init arch_timer_common_init(void) /* Wait until both nodes are probed if we have two timers */ if ((arch_timers_present & mask) != mask) { - if (!arch_timer_probed(ARCH_MEM_TIMER, arch_timer_mem_of_match)) + if (arch_timer_needs_probing(ARCH_MEM_TIMER, arch_timer_mem_of_match)) return; - if (!arch_timer_probed(ARCH_CP15_TIMER, arch_timer_of_match)) + if (arch_timer_needs_probing(ARCH_CP15_TIMER, arch_timer_of_match)) return; } diff --git a/drivers/clocksource/dw_apb_timer_of.c b/drivers/clocksource/dw_apb_timer_of.c index d305fb089767..a19a3f619cc7 100644 --- a/drivers/clocksource/dw_apb_timer_of.c +++ b/drivers/clocksource/dw_apb_timer_of.c @@ -108,7 +108,7 @@ static void __init add_clocksource(struct device_node *source_timer) static u64 notrace read_sched_clock(void) { - return ~__raw_readl(sched_io_base); + return ~readl_relaxed(sched_io_base); } static const struct of_device_id sptimer_ids[] __initconst = { diff --git a/drivers/clocksource/em_sti.c b/drivers/clocksource/em_sti.c index d0a7bd66b8b9..dc3c6ee04aaa 100644 --- a/drivers/clocksource/em_sti.c +++ b/drivers/clocksource/em_sti.c @@ -210,7 +210,7 @@ static int em_sti_clocksource_enable(struct clocksource *cs) ret = em_sti_start(p, USER_CLOCKSOURCE); if (!ret) - __clocksource_updatefreq_hz(cs, p->rate); + __clocksource_update_freq_hz(cs, p->rate); return ret; } diff --git a/drivers/clocksource/sh_cmt.c b/drivers/clocksource/sh_cmt.c index 2bd13b53b727..b8ff3c64cc45 100644 --- a/drivers/clocksource/sh_cmt.c +++ b/drivers/clocksource/sh_cmt.c @@ -641,7 +641,7 @@ static int sh_cmt_clocksource_enable(struct clocksource *cs) ret = sh_cmt_start(ch, FLAG_CLOCKSOURCE); if (!ret) { - __clocksource_updatefreq_hz(cs, ch->rate); + __clocksource_update_freq_hz(cs, ch->rate); ch->cs_enabled = true; } return ret; diff --git a/drivers/clocksource/sh_tmu.c b/drivers/clocksource/sh_tmu.c index f150ca82bfaf..b6b8fa3cd211 100644 --- a/drivers/clocksource/sh_tmu.c +++ b/drivers/clocksource/sh_tmu.c @@ -272,7 +272,7 @@ static int sh_tmu_clocksource_enable(struct clocksource *cs) ret = sh_tmu_enable(ch); if (!ret) { - __clocksource_updatefreq_hz(cs, ch->rate); + __clocksource_update_freq_hz(cs, ch->rate); ch->cs_enabled = true; } diff --git a/drivers/clocksource/sun4i_timer.c b/drivers/clocksource/sun4i_timer.c index f4a9c0058b4d..1928a8912584 100644 --- a/drivers/clocksource/sun4i_timer.c +++ b/drivers/clocksource/sun4i_timer.c @@ -170,7 +170,15 @@ static void __init sun4i_timer_init(struct device_node *node) TIMER_CTL_CLK_SRC(TIMER_CTL_CLK_SRC_OSC24M), timer_base + TIMER_CTL_REG(1)); - sched_clock_register(sun4i_timer_sched_read, 32, rate); + /* + * sched_clock_register does not have priorities, and on sun6i and + * later there is a better sched_clock registered by arm_arch_timer.c + */ + if (of_machine_is_compatible("allwinner,sun4i-a10") || + of_machine_is_compatible("allwinner,sun5i-a13") || + of_machine_is_compatible("allwinner,sun5i-a10s")) + sched_clock_register(sun4i_timer_sched_read, 32, rate); + clocksource_mmio_init(timer_base + TIMER_CNTVAL_REG(1), node->name, rate, 350, 32, clocksource_mmio_readl_down); diff --git a/drivers/clocksource/tegra20_timer.c b/drivers/clocksource/tegra20_timer.c index d2616ef16770..5a112d72fc2d 100644 --- a/drivers/clocksource/tegra20_timer.c +++ b/drivers/clocksource/tegra20_timer.c @@ -51,15 +51,15 @@ static void __iomem *timer_reg_base; static void __iomem *rtc_base; -static struct timespec persistent_ts; +static struct timespec64 persistent_ts; static u64 persistent_ms, last_persistent_ms; static struct delay_timer tegra_delay_timer; #define timer_writel(value, reg) \ - __raw_writel(value, timer_reg_base + (reg)) + writel_relaxed(value, timer_reg_base + (reg)) #define timer_readl(reg) \ - __raw_readl(timer_reg_base + (reg)) + readl_relaxed(timer_reg_base + (reg)) static int tegra_timer_set_next_event(unsigned long cycles, struct clock_event_device *evt) @@ -120,26 +120,25 @@ static u64 tegra_rtc_read_ms(void) } /* - * tegra_read_persistent_clock - Return time from a persistent clock. + * tegra_read_persistent_clock64 - Return time from a persistent clock. * * Reads the time from a source which isn't disabled during PM, the * 32k sync timer. Convert the cycles elapsed since last read into - * nsecs and adds to a monotonically increasing timespec. + * nsecs and adds to a monotonically increasing timespec64. * Care must be taken that this funciton is not called while the * tegra_rtc driver could be executing to avoid race conditions * on the RTC shadow register */ -static void tegra_read_persistent_clock(struct timespec *ts) +static void tegra_read_persistent_clock64(struct timespec64 *ts) { u64 delta; - struct timespec *tsp = &persistent_ts; last_persistent_ms = persistent_ms; persistent_ms = tegra_rtc_read_ms(); delta = persistent_ms - last_persistent_ms; - timespec_add_ns(tsp, delta * NSEC_PER_MSEC); - *ts = *tsp; + timespec64_add_ns(&persistent_ts, delta * NSEC_PER_MSEC); + *ts = persistent_ts; } static unsigned long tegra_delay_timer_read_counter_long(void) @@ -252,7 +251,7 @@ static void __init tegra20_init_rtc(struct device_node *np) else clk_prepare_enable(clk); - register_persistent_clock(NULL, tegra_read_persistent_clock); + register_persistent_clock(NULL, tegra_read_persistent_clock64); } CLOCKSOURCE_OF_DECLARE(tegra20_rtc, "nvidia,tegra20-rtc", tegra20_init_rtc); diff --git a/drivers/clocksource/time-efm32.c b/drivers/clocksource/time-efm32.c index bba62f9deefb..5b6e3d5644c9 100644 --- a/drivers/clocksource/time-efm32.c +++ b/drivers/clocksource/time-efm32.c @@ -111,7 +111,7 @@ static irqreturn_t efm32_clock_event_handler(int irq, void *dev_id) static struct efm32_clock_event_ddata clock_event_ddata = { .evtdev = { .name = "efm32 clockevent", - .features = CLOCK_EVT_FEAT_ONESHOT | CLOCK_EVT_MODE_PERIODIC, + .features = CLOCK_EVT_FEAT_ONESHOT | CLOCK_EVT_FEAT_PERIODIC, .set_mode = efm32_clock_event_set_mode, .set_next_event = efm32_clock_event_set_next_event, .rating = 200, @@ -225,12 +225,12 @@ static int __init efm32_clockevent_init(struct device_node *np) clock_event_ddata.base = base; clock_event_ddata.periodic_top = DIV_ROUND_CLOSEST(rate, 1024 * HZ); - setup_irq(irq, &efm32_clock_event_irq); - clockevents_config_and_register(&clock_event_ddata.evtdev, DIV_ROUND_CLOSEST(rate, 1024), 0xf, 0xffff); + setup_irq(irq, &efm32_clock_event_irq); + return 0; err_get_irq: diff --git a/drivers/clocksource/timer-atmel-pit.c b/drivers/clocksource/timer-atmel-pit.c index b5b4d4585c9a..c0304ff608b0 100644 --- a/drivers/clocksource/timer-atmel-pit.c +++ b/drivers/clocksource/timer-atmel-pit.c @@ -61,12 +61,12 @@ static inline struct pit_data *clkevt_to_pit_data(struct clock_event_device *clk static inline unsigned int pit_read(void __iomem *base, unsigned int reg_offset) { - return __raw_readl(base + reg_offset); + return readl_relaxed(base + reg_offset); } static inline void pit_write(void __iomem *base, unsigned int reg_offset, unsigned long value) { - __raw_writel(value, base + reg_offset); + writel_relaxed(value, base + reg_offset); } /* diff --git a/drivers/clocksource/timer-sun5i.c b/drivers/clocksource/timer-sun5i.c index 02268448dc85..28aa4b7bb602 100644 --- a/drivers/clocksource/timer-sun5i.c +++ b/drivers/clocksource/timer-sun5i.c @@ -17,7 +17,7 @@ #include <linux/irq.h> #include <linux/irqreturn.h> #include <linux/reset.h> -#include <linux/sched_clock.h> +#include <linux/slab.h> #include <linux/of.h> #include <linux/of_address.h> #include <linux/of_irq.h> @@ -37,8 +37,31 @@ #define TIMER_SYNC_TICKS 3 -static void __iomem *timer_base; -static u32 ticks_per_jiffy; +struct sun5i_timer { + void __iomem *base; + struct clk *clk; + struct notifier_block clk_rate_cb; + u32 ticks_per_jiffy; +}; + +#define to_sun5i_timer(x) \ + container_of(x, struct sun5i_timer, clk_rate_cb) + +struct sun5i_timer_clksrc { + struct sun5i_timer timer; + struct clocksource clksrc; +}; + +#define to_sun5i_timer_clksrc(x) \ + container_of(x, struct sun5i_timer_clksrc, clksrc) + +struct sun5i_timer_clkevt { + struct sun5i_timer timer; + struct clock_event_device clkevt; +}; + +#define to_sun5i_timer_clkevt(x) \ + container_of(x, struct sun5i_timer_clkevt, clkevt) /* * When we disable a timer, we need to wait at least for 2 cycles of @@ -46,30 +69,30 @@ static u32 ticks_per_jiffy; * that is already setup and runs at the same frequency than the other * timers, and we never will be disabled. */ -static void sun5i_clkevt_sync(void) +static void sun5i_clkevt_sync(struct sun5i_timer_clkevt *ce) { - u32 old = readl(timer_base + TIMER_CNTVAL_LO_REG(1)); + u32 old = readl(ce->timer.base + TIMER_CNTVAL_LO_REG(1)); - while ((old - readl(timer_base + TIMER_CNTVAL_LO_REG(1))) < TIMER_SYNC_TICKS) + while ((old - readl(ce->timer.base + TIMER_CNTVAL_LO_REG(1))) < TIMER_SYNC_TICKS) cpu_relax(); } -static void sun5i_clkevt_time_stop(u8 timer) +static void sun5i_clkevt_time_stop(struct sun5i_timer_clkevt *ce, u8 timer) { - u32 val = readl(timer_base + TIMER_CTL_REG(timer)); - writel(val & ~TIMER_CTL_ENABLE, timer_base + TIMER_CTL_REG(timer)); + u32 val = readl(ce->timer.base + TIMER_CTL_REG(timer)); + writel(val & ~TIMER_CTL_ENABLE, ce->timer.base + TIMER_CTL_REG(timer)); - sun5i_clkevt_sync(); + sun5i_clkevt_sync(ce); } -static void sun5i_clkevt_time_setup(u8 timer, u32 delay) +static void sun5i_clkevt_time_setup(struct sun5i_timer_clkevt *ce, u8 timer, u32 delay) { - writel(delay, timer_base + TIMER_INTVAL_LO_REG(timer)); + writel(delay, ce->timer.base + TIMER_INTVAL_LO_REG(timer)); } -static void sun5i_clkevt_time_start(u8 timer, bool periodic) +static void sun5i_clkevt_time_start(struct sun5i_timer_clkevt *ce, u8 timer, bool periodic) { - u32 val = readl(timer_base + TIMER_CTL_REG(timer)); + u32 val = readl(ce->timer.base + TIMER_CTL_REG(timer)); if (periodic) val &= ~TIMER_CTL_ONESHOT; @@ -77,80 +100,230 @@ static void sun5i_clkevt_time_start(u8 timer, bool periodic) val |= TIMER_CTL_ONESHOT; writel(val | TIMER_CTL_ENABLE | TIMER_CTL_RELOAD, - timer_base + TIMER_CTL_REG(timer)); + ce->timer.base + TIMER_CTL_REG(timer)); } static void sun5i_clkevt_mode(enum clock_event_mode mode, - struct clock_event_device *clk) + struct clock_event_device *clkevt) { + struct sun5i_timer_clkevt *ce = to_sun5i_timer_clkevt(clkevt); + switch (mode) { case CLOCK_EVT_MODE_PERIODIC: - sun5i_clkevt_time_stop(0); - sun5i_clkevt_time_setup(0, ticks_per_jiffy); - sun5i_clkevt_time_start(0, true); + sun5i_clkevt_time_stop(ce, 0); + sun5i_clkevt_time_setup(ce, 0, ce->timer.ticks_per_jiffy); + sun5i_clkevt_time_start(ce, 0, true); break; case CLOCK_EVT_MODE_ONESHOT: - sun5i_clkevt_time_stop(0); - sun5i_clkevt_time_start(0, false); + sun5i_clkevt_time_stop(ce, 0); + sun5i_clkevt_time_start(ce, 0, false); break; case CLOCK_EVT_MODE_UNUSED: case CLOCK_EVT_MODE_SHUTDOWN: default: - sun5i_clkevt_time_stop(0); + sun5i_clkevt_time_stop(ce, 0); break; } } static int sun5i_clkevt_next_event(unsigned long evt, - struct clock_event_device *unused) + struct clock_event_device *clkevt) { - sun5i_clkevt_time_stop(0); - sun5i_clkevt_time_setup(0, evt - TIMER_SYNC_TICKS); - sun5i_clkevt_time_start(0, false); + struct sun5i_timer_clkevt *ce = to_sun5i_timer_clkevt(clkevt); + + sun5i_clkevt_time_stop(ce, 0); + sun5i_clkevt_time_setup(ce, 0, evt - TIMER_SYNC_TICKS); + sun5i_clkevt_time_start(ce, 0, false); return 0; } -static struct clock_event_device sun5i_clockevent = { - .name = "sun5i_tick", - .rating = 340, - .features = CLOCK_EVT_FEAT_PERIODIC | CLOCK_EVT_FEAT_ONESHOT, - .set_mode = sun5i_clkevt_mode, - .set_next_event = sun5i_clkevt_next_event, -}; - - static irqreturn_t sun5i_timer_interrupt(int irq, void *dev_id) { - struct clock_event_device *evt = (struct clock_event_device *)dev_id; + struct sun5i_timer_clkevt *ce = (struct sun5i_timer_clkevt *)dev_id; - writel(0x1, timer_base + TIMER_IRQ_ST_REG); - evt->event_handler(evt); + writel(0x1, ce->timer.base + TIMER_IRQ_ST_REG); + ce->clkevt.event_handler(&ce->clkevt); return IRQ_HANDLED; } -static struct irqaction sun5i_timer_irq = { - .name = "sun5i_timer0", - .flags = IRQF_TIMER | IRQF_IRQPOLL, - .handler = sun5i_timer_interrupt, - .dev_id = &sun5i_clockevent, -}; +static cycle_t sun5i_clksrc_read(struct clocksource *clksrc) +{ + struct sun5i_timer_clksrc *cs = to_sun5i_timer_clksrc(clksrc); + + return ~readl(cs->timer.base + TIMER_CNTVAL_LO_REG(1)); +} -static u64 sun5i_timer_sched_read(void) +static int sun5i_rate_cb_clksrc(struct notifier_block *nb, + unsigned long event, void *data) { - return ~readl(timer_base + TIMER_CNTVAL_LO_REG(1)); + struct clk_notifier_data *ndata = data; + struct sun5i_timer *timer = to_sun5i_timer(nb); + struct sun5i_timer_clksrc *cs = container_of(timer, struct sun5i_timer_clksrc, timer); + + switch (event) { + case PRE_RATE_CHANGE: + clocksource_unregister(&cs->clksrc); + break; + + case POST_RATE_CHANGE: + clocksource_register_hz(&cs->clksrc, ndata->new_rate); + break; + + default: + break; + } + + return NOTIFY_DONE; +} + +static int __init sun5i_setup_clocksource(struct device_node *node, + void __iomem *base, + struct clk *clk, int irq) +{ + struct sun5i_timer_clksrc *cs; + unsigned long rate; + int ret; + + cs = kzalloc(sizeof(*cs), GFP_KERNEL); + if (!cs) + return -ENOMEM; + + ret = clk_prepare_enable(clk); + if (ret) { + pr_err("Couldn't enable parent clock\n"); + goto err_free; + } + + rate = clk_get_rate(clk); + + cs->timer.base = base; + cs->timer.clk = clk; + cs->timer.clk_rate_cb.notifier_call = sun5i_rate_cb_clksrc; + cs->timer.clk_rate_cb.next = NULL; + + ret = clk_notifier_register(clk, &cs->timer.clk_rate_cb); + if (ret) { + pr_err("Unable to register clock notifier.\n"); + goto err_disable_clk; + } + + writel(~0, base + TIMER_INTVAL_LO_REG(1)); + writel(TIMER_CTL_ENABLE | TIMER_CTL_RELOAD, + base + TIMER_CTL_REG(1)); + + cs->clksrc.name = node->name; + cs->clksrc.rating = 340; + cs->clksrc.read = sun5i_clksrc_read; + cs->clksrc.mask = CLOCKSOURCE_MASK(32); + cs->clksrc.flags = CLOCK_SOURCE_IS_CONTINUOUS; + + ret = clocksource_register_hz(&cs->clksrc, rate); + if (ret) { + pr_err("Couldn't register clock source.\n"); + goto err_remove_notifier; + } + + return 0; + +err_remove_notifier: + clk_notifier_unregister(clk, &cs->timer.clk_rate_cb); +err_disable_clk: + clk_disable_unprepare(clk); +err_free: + kfree(cs); + return ret; +} + +static int sun5i_rate_cb_clkevt(struct notifier_block *nb, + unsigned long event, void *data) +{ + struct clk_notifier_data *ndata = data; + struct sun5i_timer *timer = to_sun5i_timer(nb); + struct sun5i_timer_clkevt *ce = container_of(timer, struct sun5i_timer_clkevt, timer); + + if (event == POST_RATE_CHANGE) { + clockevents_update_freq(&ce->clkevt, ndata->new_rate); + ce->timer.ticks_per_jiffy = DIV_ROUND_UP(ndata->new_rate, HZ); + } + + return NOTIFY_DONE; +} + +static int __init sun5i_setup_clockevent(struct device_node *node, void __iomem *base, + struct clk *clk, int irq) +{ + struct sun5i_timer_clkevt *ce; + unsigned long rate; + int ret; + u32 val; + + ce = kzalloc(sizeof(*ce), GFP_KERNEL); + if (!ce) + return -ENOMEM; + + ret = clk_prepare_enable(clk); + if (ret) { + pr_err("Couldn't enable parent clock\n"); + goto err_free; + } + + rate = clk_get_rate(clk); + + ce->timer.base = base; + ce->timer.ticks_per_jiffy = DIV_ROUND_UP(rate, HZ); + ce->timer.clk = clk; + ce->timer.clk_rate_cb.notifier_call = sun5i_rate_cb_clkevt; + ce->timer.clk_rate_cb.next = NULL; + + ret = clk_notifier_register(clk, &ce->timer.clk_rate_cb); + if (ret) { + pr_err("Unable to register clock notifier.\n"); + goto err_disable_clk; + } + + ce->clkevt.name = node->name; + ce->clkevt.features = CLOCK_EVT_FEAT_PERIODIC | CLOCK_EVT_FEAT_ONESHOT; + ce->clkevt.set_next_event = sun5i_clkevt_next_event; + ce->clkevt.set_mode = sun5i_clkevt_mode; + ce->clkevt.rating = 340; + ce->clkevt.irq = irq; + ce->clkevt.cpumask = cpu_possible_mask; + + /* Enable timer0 interrupt */ + val = readl(base + TIMER_IRQ_EN_REG); + writel(val | TIMER_IRQ_EN(0), base + TIMER_IRQ_EN_REG); + + clockevents_config_and_register(&ce->clkevt, rate, + TIMER_SYNC_TICKS, 0xffffffff); + + ret = request_irq(irq, sun5i_timer_interrupt, IRQF_TIMER | IRQF_IRQPOLL, + "sun5i_timer0", ce); + if (ret) { + pr_err("Unable to register interrupt\n"); + goto err_remove_notifier; + } + + return 0; + +err_remove_notifier: + clk_notifier_unregister(clk, &ce->timer.clk_rate_cb); +err_disable_clk: + clk_disable_unprepare(clk); +err_free: + kfree(ce); + return ret; } static void __init sun5i_timer_init(struct device_node *node) { struct reset_control *rstc; - unsigned long rate; + void __iomem *timer_base; struct clk *clk; - int ret, irq; - u32 val; + int irq; - timer_base = of_iomap(node, 0); + timer_base = of_io_request_and_map(node, 0, of_node_full_name(node)); if (!timer_base) panic("Can't map registers"); @@ -161,36 +334,13 @@ static void __init sun5i_timer_init(struct device_node *node) clk = of_clk_get(node, 0); if (IS_ERR(clk)) panic("Can't get timer clock"); - clk_prepare_enable(clk); - rate = clk_get_rate(clk); rstc = of_reset_control_get(node, NULL); if (!IS_ERR(rstc)) reset_control_deassert(rstc); - writel(~0, timer_base + TIMER_INTVAL_LO_REG(1)); - writel(TIMER_CTL_ENABLE | TIMER_CTL_RELOAD, - timer_base + TIMER_CTL_REG(1)); - - sched_clock_register(sun5i_timer_sched_read, 32, rate); - clocksource_mmio_init(timer_base + TIMER_CNTVAL_LO_REG(1), node->name, - rate, 340, 32, clocksource_mmio_readl_down); - - ticks_per_jiffy = DIV_ROUND_UP(rate, HZ); - - ret = setup_irq(irq, &sun5i_timer_irq); - if (ret) - pr_warn("failed to setup irq %d\n", irq); - - /* Enable timer0 interrupt */ - val = readl(timer_base + TIMER_IRQ_EN_REG); - writel(val | TIMER_IRQ_EN(0), timer_base + TIMER_IRQ_EN_REG); - - sun5i_clockevent.cpumask = cpu_possible_mask; - sun5i_clockevent.irq = irq; - - clockevents_config_and_register(&sun5i_clockevent, rate, - TIMER_SYNC_TICKS, 0xffffffff); + sun5i_setup_clocksource(node, timer_base, clk, irq); + sun5i_setup_clockevent(node, timer_base, clk, irq); } CLOCKSOURCE_OF_DECLARE(sun5i_a13, "allwinner,sun5i-a13-hstimer", sun5i_timer_init); diff --git a/drivers/cpufreq/cpufreq.c b/drivers/cpufreq/cpufreq.c index 28e59a48b35f..8ae655c364f4 100644 --- a/drivers/cpufreq/cpufreq.c +++ b/drivers/cpufreq/cpufreq.c @@ -1698,15 +1698,18 @@ void cpufreq_resume(void) || __cpufreq_governor(policy, CPUFREQ_GOV_LIMITS)) pr_err("%s: Failed to start governor for policy: %p\n", __func__, policy); - - /* - * schedule call cpufreq_update_policy() for boot CPU, i.e. last - * policy in list. It will verify that the current freq is in - * sync with what we believe it to be. - */ - if (list_is_last(&policy->policy_list, &cpufreq_policy_list)) - schedule_work(&policy->update); } + + /* + * schedule call cpufreq_update_policy() for first-online CPU, as that + * wouldn't be hotplugged-out on suspend. It will verify that the + * current freq is in sync with what we believe it to be. + */ + policy = cpufreq_cpu_get_raw(cpumask_first(cpu_online_mask)); + if (WARN_ON(!policy)) + return; + + schedule_work(&policy->update); } /** diff --git a/drivers/cpufreq/exynos-cpufreq.c b/drivers/cpufreq/exynos-cpufreq.c index 5e98c6b1f284..82d2fbb20f7e 100644 --- a/drivers/cpufreq/exynos-cpufreq.c +++ b/drivers/cpufreq/exynos-cpufreq.c @@ -159,7 +159,7 @@ static struct cpufreq_driver exynos_driver = { static int exynos_cpufreq_probe(struct platform_device *pdev) { - struct device_node *cpus, *np; + struct device_node *cpu0; int ret = -EINVAL; exynos_info = kzalloc(sizeof(*exynos_info), GFP_KERNEL); @@ -206,28 +206,19 @@ static int exynos_cpufreq_probe(struct platform_device *pdev) if (ret) goto err_cpufreq_reg; - cpus = of_find_node_by_path("/cpus"); - if (!cpus) { - pr_err("failed to find cpus node\n"); + cpu0 = of_get_cpu_node(0, NULL); + if (!cpu0) { + pr_err("failed to find cpu0 node\n"); return 0; } - np = of_get_next_child(cpus, NULL); - if (!np) { - pr_err("failed to find cpus child node\n"); - of_node_put(cpus); - return 0; - } - - if (of_find_property(np, "#cooling-cells", NULL)) { - cdev = of_cpufreq_cooling_register(np, + if (of_find_property(cpu0, "#cooling-cells", NULL)) { + cdev = of_cpufreq_cooling_register(cpu0, cpu_present_mask); if (IS_ERR(cdev)) pr_err("running cpufreq without cooling device: %ld\n", PTR_ERR(cdev)); } - of_node_put(np); - of_node_put(cpus); return 0; diff --git a/drivers/cpufreq/ppc-corenet-cpufreq.c b/drivers/cpufreq/ppc-corenet-cpufreq.c index bee5df7794d3..7cb4b766cf94 100644 --- a/drivers/cpufreq/ppc-corenet-cpufreq.c +++ b/drivers/cpufreq/ppc-corenet-cpufreq.c @@ -22,6 +22,8 @@ #include <linux/smp.h> #include <sysdev/fsl_soc.h> +#include <asm/smp.h> /* for get_hard_smp_processor_id() in UP configs */ + /** * struct cpu_data - per CPU data struct * @parent: the parent node of cpu clock diff --git a/drivers/cpuidle/cpuidle-mvebu-v7.c b/drivers/cpuidle/cpuidle-mvebu-v7.c index 38e68618513a..980151f34707 100644 --- a/drivers/cpuidle/cpuidle-mvebu-v7.c +++ b/drivers/cpuidle/cpuidle-mvebu-v7.c @@ -37,11 +37,11 @@ static int mvebu_v7_enter_idle(struct cpuidle_device *dev, deepidle = true; ret = mvebu_v7_cpu_suspend(deepidle); + cpu_pm_exit(); + if (ret) return ret; - cpu_pm_exit(); - return index; } @@ -50,17 +50,17 @@ static struct cpuidle_driver armadaxp_idle_driver = { .states[0] = ARM_CPUIDLE_WFI_STATE, .states[1] = { .enter = mvebu_v7_enter_idle, - .exit_latency = 10, + .exit_latency = 100, .power_usage = 50, - .target_residency = 100, + .target_residency = 1000, .name = "MV CPU IDLE", .desc = "CPU power down", }, .states[2] = { .enter = mvebu_v7_enter_idle, - .exit_latency = 100, + .exit_latency = 1000, .power_usage = 5, - .target_residency = 1000, + .target_residency = 10000, .flags = MVEBU_V7_FLAG_DEEP_IDLE, .name = "MV CPU DEEP IDLE", .desc = "CPU and L2 Fabric power down", diff --git a/drivers/cpuidle/cpuidle.c b/drivers/cpuidle/cpuidle.c index 4d534582514e..7a73a279e179 100644 --- a/drivers/cpuidle/cpuidle.c +++ b/drivers/cpuidle/cpuidle.c @@ -44,6 +44,12 @@ void disable_cpuidle(void) off = 1; } +bool cpuidle_not_available(struct cpuidle_driver *drv, + struct cpuidle_device *dev) +{ + return off || !initialized || !drv || !dev || !dev->enabled; +} + /** * cpuidle_play_dead - cpu off-lining * @@ -66,14 +72,8 @@ int cpuidle_play_dead(void) return -ENODEV; } -/** - * cpuidle_find_deepest_state - Find deepest state meeting specific conditions. - * @drv: cpuidle driver for the given CPU. - * @dev: cpuidle device for the given CPU. - * @freeze: Whether or not the state should be suitable for suspend-to-idle. - */ -static int cpuidle_find_deepest_state(struct cpuidle_driver *drv, - struct cpuidle_device *dev, bool freeze) +static int find_deepest_state(struct cpuidle_driver *drv, + struct cpuidle_device *dev, bool freeze) { unsigned int latency_req = 0; int i, ret = freeze ? -1 : CPUIDLE_DRIVER_STATE_START - 1; @@ -92,6 +92,17 @@ static int cpuidle_find_deepest_state(struct cpuidle_driver *drv, return ret; } +/** + * cpuidle_find_deepest_state - Find the deepest available idle state. + * @drv: cpuidle driver for the given CPU. + * @dev: cpuidle device for the given CPU. + */ +int cpuidle_find_deepest_state(struct cpuidle_driver *drv, + struct cpuidle_device *dev) +{ + return find_deepest_state(drv, dev, false); +} + static void enter_freeze_proper(struct cpuidle_driver *drv, struct cpuidle_device *dev, int index) { @@ -113,15 +124,14 @@ static void enter_freeze_proper(struct cpuidle_driver *drv, /** * cpuidle_enter_freeze - Enter an idle state suitable for suspend-to-idle. + * @drv: cpuidle driver for the given CPU. + * @dev: cpuidle device for the given CPU. * * If there are states with the ->enter_freeze callback, find the deepest of - * them and enter it with frozen tick. Otherwise, find the deepest state - * available and enter it normally. + * them and enter it with frozen tick. */ -void cpuidle_enter_freeze(void) +int cpuidle_enter_freeze(struct cpuidle_driver *drv, struct cpuidle_device *dev) { - struct cpuidle_device *dev = __this_cpu_read(cpuidle_devices); - struct cpuidle_driver *drv = cpuidle_get_cpu_driver(dev); int index; /* @@ -129,24 +139,11 @@ void cpuidle_enter_freeze(void) * that interrupts won't be enabled when it exits and allows the tick to * be frozen safely. */ - index = cpuidle_find_deepest_state(drv, dev, true); - if (index >= 0) { - enter_freeze_proper(drv, dev, index); - return; - } - - /* - * It is not safe to freeze the tick, find the deepest state available - * at all and try to enter it normally. - */ - index = cpuidle_find_deepest_state(drv, dev, false); + index = find_deepest_state(drv, dev, true); if (index >= 0) - cpuidle_enter(drv, dev, index); - else - arch_cpu_idle(); + enter_freeze_proper(drv, dev, index); - /* Interrupts are enabled again here. */ - local_irq_disable(); + return index; } /** @@ -205,12 +202,6 @@ int cpuidle_enter_state(struct cpuidle_device *dev, struct cpuidle_driver *drv, */ int cpuidle_select(struct cpuidle_driver *drv, struct cpuidle_device *dev) { - if (off || !initialized) - return -ENODEV; - - if (!drv || !dev || !dev->enabled) - return -EBUSY; - return cpuidle_curr_governor->select(drv, dev); } @@ -339,9 +330,6 @@ int cpuidle_enable_device(struct cpuidle_device *dev) if (!dev->registered) return -EINVAL; - if (!dev->state_count) - dev->state_count = drv->state_count; - ret = cpuidle_add_device_sysfs(dev); if (ret) return ret; diff --git a/drivers/cpuidle/driver.c b/drivers/cpuidle/driver.c index 2697e87d5b34..5db147859b90 100644 --- a/drivers/cpuidle/driver.c +++ b/drivers/cpuidle/driver.c @@ -13,7 +13,7 @@ #include <linux/sched.h> #include <linux/cpuidle.h> #include <linux/cpumask.h> -#include <linux/clockchips.h> +#include <linux/tick.h> #include "cpuidle.h" @@ -130,21 +130,20 @@ static inline void __cpuidle_unset_driver(struct cpuidle_driver *drv) #endif /** - * cpuidle_setup_broadcast_timer - enable/disable the broadcast timer + * cpuidle_setup_broadcast_timer - enable/disable the broadcast timer on a cpu * @arg: a void pointer used to match the SMP cross call API * - * @arg is used as a value of type 'long' with one of the two values: - * - CLOCK_EVT_NOTIFY_BROADCAST_ON - * - CLOCK_EVT_NOTIFY_BROADCAST_OFF + * If @arg is NULL broadcast is disabled otherwise enabled * - * Set the broadcast timer notification for the current CPU. This function - * is executed per CPU by an SMP cross call. It not supposed to be called - * directly. + * This function is executed per CPU by an SMP cross call. It's not + * supposed to be called directly. */ static void cpuidle_setup_broadcast_timer(void *arg) { - int cpu = smp_processor_id(); - clockevents_notify((long)(arg), &cpu); + if (arg) + tick_broadcast_enable(); + else + tick_broadcast_disable(); } /** @@ -239,7 +238,7 @@ static int __cpuidle_register_driver(struct cpuidle_driver *drv) if (drv->bctimer) on_each_cpu_mask(drv->cpumask, cpuidle_setup_broadcast_timer, - (void *)CLOCK_EVT_NOTIFY_BROADCAST_ON, 1); + (void *)1, 1); poll_idle_init(drv); @@ -263,7 +262,7 @@ static void __cpuidle_unregister_driver(struct cpuidle_driver *drv) if (drv->bctimer) { drv->bctimer = 0; on_each_cpu_mask(drv->cpumask, cpuidle_setup_broadcast_timer, - (void *)CLOCK_EVT_NOTIFY_BROADCAST_OFF, 1); + NULL, 1); } __cpuidle_unset_driver(drv); diff --git a/drivers/cpuidle/sysfs.c b/drivers/cpuidle/sysfs.c index 97c5903b4606..832a2c3f01ff 100644 --- a/drivers/cpuidle/sysfs.c +++ b/drivers/cpuidle/sysfs.c @@ -401,7 +401,7 @@ static int cpuidle_add_state_sysfs(struct cpuidle_device *device) struct cpuidle_driver *drv = cpuidle_get_cpu_driver(device); /* state statistics */ - for (i = 0; i < device->state_count; i++) { + for (i = 0; i < drv->state_count; i++) { kobj = kzalloc(sizeof(struct cpuidle_state_kobj), GFP_KERNEL); if (!kobj) goto error_state; @@ -433,9 +433,10 @@ error_state: */ static void cpuidle_remove_state_sysfs(struct cpuidle_device *device) { + struct cpuidle_driver *drv = cpuidle_get_cpu_driver(device); int i; - for (i = 0; i < device->state_count; i++) + for (i = 0; i < drv->state_count; i++) cpuidle_free_state_kobj(device, i); } diff --git a/drivers/dma-buf/fence.c b/drivers/dma-buf/fence.c index e5541117b3e9..50ef8bd8708b 100644 --- a/drivers/dma-buf/fence.c +++ b/drivers/dma-buf/fence.c @@ -159,6 +159,9 @@ fence_wait_timeout(struct fence *fence, bool intr, signed long timeout) if (WARN_ON(timeout < 0)) return -EINVAL; + if (timeout == 0) + return fence_is_signaled(fence); + trace_fence_wait_start(fence); ret = fence->ops->wait(fence, intr, timeout); trace_fence_wait_end(fence); diff --git a/drivers/dma-buf/reservation.c b/drivers/dma-buf/reservation.c index 3c97c8fa8d02..39920d77f288 100644 --- a/drivers/dma-buf/reservation.c +++ b/drivers/dma-buf/reservation.c @@ -327,6 +327,9 @@ long reservation_object_wait_timeout_rcu(struct reservation_object *obj, unsigned seq, shared_count, i = 0; long ret = timeout; + if (!timeout) + return reservation_object_test_signaled_rcu(obj, wait_all); + retry: fence = NULL; shared_count = 0; @@ -402,8 +405,6 @@ reservation_object_test_signaled_single(struct fence *passed_fence) int ret = 1; if (!test_bit(FENCE_FLAG_SIGNALED_BIT, &lfence->flags)) { - int ret; - fence = fence_get_rcu(lfence); if (!fence) return -1; diff --git a/drivers/dma/amba-pl08x.c b/drivers/dma/amba-pl08x.c index 4a5fd245014e..83aa55d6fa5d 100644 --- a/drivers/dma/amba-pl08x.c +++ b/drivers/dma/amba-pl08x.c @@ -97,6 +97,12 @@ #define DRIVER_NAME "pl08xdmac" +#define PL80X_DMA_BUSWIDTHS \ + BIT(DMA_SLAVE_BUSWIDTH_UNDEFINED) | \ + BIT(DMA_SLAVE_BUSWIDTH_1_BYTE) | \ + BIT(DMA_SLAVE_BUSWIDTH_2_BYTES) | \ + BIT(DMA_SLAVE_BUSWIDTH_4_BYTES) + static struct amba_driver pl08x_amba_driver; struct pl08x_driver_data; @@ -2070,6 +2076,10 @@ static int pl08x_probe(struct amba_device *adev, const struct amba_id *id) pl08x->memcpy.device_pause = pl08x_pause; pl08x->memcpy.device_resume = pl08x_resume; pl08x->memcpy.device_terminate_all = pl08x_terminate_all; + pl08x->memcpy.src_addr_widths = PL80X_DMA_BUSWIDTHS; + pl08x->memcpy.dst_addr_widths = PL80X_DMA_BUSWIDTHS; + pl08x->memcpy.directions = BIT(DMA_MEM_TO_MEM); + pl08x->memcpy.residue_granularity = DMA_RESIDUE_GRANULARITY_SEGMENT; /* Initialize slave engine */ dma_cap_set(DMA_SLAVE, pl08x->slave.cap_mask); @@ -2086,6 +2096,10 @@ static int pl08x_probe(struct amba_device *adev, const struct amba_id *id) pl08x->slave.device_pause = pl08x_pause; pl08x->slave.device_resume = pl08x_resume; pl08x->slave.device_terminate_all = pl08x_terminate_all; + pl08x->slave.src_addr_widths = PL80X_DMA_BUSWIDTHS; + pl08x->slave.dst_addr_widths = PL80X_DMA_BUSWIDTHS; + pl08x->slave.directions = BIT(DMA_DEV_TO_MEM) | BIT(DMA_MEM_TO_DEV); + pl08x->slave.residue_granularity = DMA_RESIDUE_GRANULARITY_SEGMENT; /* Get the platform data */ pl08x->pd = dev_get_platdata(&adev->dev); diff --git a/drivers/dma/at_hdmac.c b/drivers/dma/at_hdmac.c index 1e1a4c567542..0b4fc6fb48ce 100644 --- a/drivers/dma/at_hdmac.c +++ b/drivers/dma/at_hdmac.c @@ -238,93 +238,126 @@ static void atc_dostart(struct at_dma_chan *atchan, struct at_desc *first) } /* - * atc_get_current_descriptors - - * locate the descriptor which equal to physical address in DSCR - * @atchan: the channel we want to start - * @dscr_addr: physical descriptor address in DSCR + * atc_get_desc_by_cookie - get the descriptor of a cookie + * @atchan: the DMA channel + * @cookie: the cookie to get the descriptor for */ -static struct at_desc *atc_get_current_descriptors(struct at_dma_chan *atchan, - u32 dscr_addr) +static struct at_desc *atc_get_desc_by_cookie(struct at_dma_chan *atchan, + dma_cookie_t cookie) { - struct at_desc *desc, *_desc, *child, *desc_cur = NULL; + struct at_desc *desc, *_desc; - list_for_each_entry_safe(desc, _desc, &atchan->active_list, desc_node) { - if (desc->lli.dscr == dscr_addr) { - desc_cur = desc; - break; - } + list_for_each_entry_safe(desc, _desc, &atchan->queue, desc_node) { + if (desc->txd.cookie == cookie) + return desc; + } - list_for_each_entry(child, &desc->tx_list, desc_node) { - if (child->lli.dscr == dscr_addr) { - desc_cur = child; - break; - } - } + list_for_each_entry_safe(desc, _desc, &atchan->active_list, desc_node) { + if (desc->txd.cookie == cookie) + return desc; } - return desc_cur; + return NULL; } -/* - * atc_get_bytes_left - - * Get the number of bytes residue in dma buffer, - * @chan: the channel we want to start +/** + * atc_calc_bytes_left - calculates the number of bytes left according to the + * value read from CTRLA. + * + * @current_len: the number of bytes left before reading CTRLA + * @ctrla: the value of CTRLA + * @desc: the descriptor containing the transfer width + */ +static inline int atc_calc_bytes_left(int current_len, u32 ctrla, + struct at_desc *desc) +{ + return current_len - ((ctrla & ATC_BTSIZE_MAX) << desc->tx_width); +} + +/** + * atc_calc_bytes_left_from_reg - calculates the number of bytes left according + * to the current value of CTRLA. + * + * @current_len: the number of bytes left before reading CTRLA + * @atchan: the channel to read CTRLA for + * @desc: the descriptor containing the transfer width + */ +static inline int atc_calc_bytes_left_from_reg(int current_len, + struct at_dma_chan *atchan, struct at_desc *desc) +{ + u32 ctrla = channel_readl(atchan, CTRLA); + + return atc_calc_bytes_left(current_len, ctrla, desc); +} + +/** + * atc_get_bytes_left - get the number of bytes residue for a cookie + * @chan: DMA channel + * @cookie: transaction identifier to check status of */ -static int atc_get_bytes_left(struct dma_chan *chan) +static int atc_get_bytes_left(struct dma_chan *chan, dma_cookie_t cookie) { struct at_dma_chan *atchan = to_at_dma_chan(chan); - struct at_dma *atdma = to_at_dma(chan->device); - int chan_id = atchan->chan_common.chan_id; struct at_desc *desc_first = atc_first_active(atchan); - struct at_desc *desc_cur; - int ret = 0, count = 0; + struct at_desc *desc; + int ret; + u32 ctrla, dscr; /* - * Initialize necessary values in the first time. - * remain_desc record remain desc length. + * If the cookie doesn't match to the currently running transfer then + * we can return the total length of the associated DMA transfer, + * because it is still queued. */ - if (atchan->remain_desc == 0) - /* First descriptor embedds the transaction length */ - atchan->remain_desc = desc_first->len; + desc = atc_get_desc_by_cookie(atchan, cookie); + if (desc == NULL) + return -EINVAL; + else if (desc != desc_first) + return desc->total_len; - /* - * This happens when current descriptor transfer complete. - * The residual buffer size should reduce current descriptor length. - */ - if (unlikely(test_bit(ATC_IS_BTC, &atchan->status))) { - clear_bit(ATC_IS_BTC, &atchan->status); - desc_cur = atc_get_current_descriptors(atchan, - channel_readl(atchan, DSCR)); - if (!desc_cur) { - ret = -EINVAL; - goto out; - } + /* cookie matches to the currently running transfer */ + ret = desc_first->total_len; - count = (desc_cur->lli.ctrla & ATC_BTSIZE_MAX) - << desc_first->tx_width; - if (atchan->remain_desc < count) { - ret = -EINVAL; - goto out; + if (desc_first->lli.dscr) { + /* hardware linked list transfer */ + + /* + * Calculate the residue by removing the length of the child + * descriptors already transferred from the total length. + * To get the current child descriptor we can use the value of + * the channel's DSCR register and compare it against the value + * of the hardware linked list structure of each child + * descriptor. + */ + + ctrla = channel_readl(atchan, CTRLA); + rmb(); /* ensure CTRLA is read before DSCR */ + dscr = channel_readl(atchan, DSCR); + + /* for the first descriptor we can be more accurate */ + if (desc_first->lli.dscr == dscr) + return atc_calc_bytes_left(ret, ctrla, desc_first); + + ret -= desc_first->len; + list_for_each_entry(desc, &desc_first->tx_list, desc_node) { + if (desc->lli.dscr == dscr) + break; + + ret -= desc->len; } - atchan->remain_desc -= count; - ret = atchan->remain_desc; - } else { /* - * Get residual bytes when current - * descriptor transfer in progress. + * For the last descriptor in the chain we can calculate + * the remaining bytes using the channel's register. + * Note that the transfer width of the first and last + * descriptor may differ. */ - count = (channel_readl(atchan, CTRLA) & ATC_BTSIZE_MAX) - << (desc_first->tx_width); - ret = atchan->remain_desc - count; + if (!desc->lli.dscr) + ret = atc_calc_bytes_left_from_reg(ret, atchan, desc); + } else { + /* single transfer */ + ret = atc_calc_bytes_left_from_reg(ret, atchan, desc_first); } - /* - * Check fifo empty. - */ - if (!(dma_readl(atdma, CHSR) & AT_DMA_EMPT(chan_id))) - atc_issue_pending(chan); -out: return ret; } @@ -539,8 +572,6 @@ static irqreturn_t at_dma_interrupt(int irq, void *dev_id) /* Give information to tasklet */ set_bit(ATC_IS_ERROR, &atchan->status); } - if (pending & AT_DMA_BTC(i)) - set_bit(ATC_IS_BTC, &atchan->status); tasklet_schedule(&atchan->tasklet); ret = IRQ_HANDLED; } @@ -653,14 +684,18 @@ atc_prep_dma_memcpy(struct dma_chan *chan, dma_addr_t dest, dma_addr_t src, desc->lli.ctrlb = ctrlb; desc->txd.cookie = 0; + desc->len = xfer_count << src_width; atc_desc_chain(&first, &prev, desc); } /* First descriptor of the chain embedds additional information */ first->txd.cookie = -EBUSY; - first->len = len; + first->total_len = len; + + /* set transfer width for the calculation of the residue */ first->tx_width = src_width; + prev->tx_width = src_width; /* set end-of-link to the last link descriptor of list*/ set_desc_eol(desc); @@ -752,6 +787,7 @@ atc_prep_slave_sg(struct dma_chan *chan, struct scatterlist *sgl, | ATC_SRC_WIDTH(mem_width) | len >> mem_width; desc->lli.ctrlb = ctrlb; + desc->len = len; atc_desc_chain(&first, &prev, desc); total_len += len; @@ -792,6 +828,7 @@ atc_prep_slave_sg(struct dma_chan *chan, struct scatterlist *sgl, | ATC_DST_WIDTH(mem_width) | len >> reg_width; desc->lli.ctrlb = ctrlb; + desc->len = len; atc_desc_chain(&first, &prev, desc); total_len += len; @@ -806,8 +843,11 @@ atc_prep_slave_sg(struct dma_chan *chan, struct scatterlist *sgl, /* First descriptor of the chain embedds additional information */ first->txd.cookie = -EBUSY; - first->len = total_len; + first->total_len = total_len; + + /* set transfer width for the calculation of the residue */ first->tx_width = reg_width; + prev->tx_width = reg_width; /* first link descriptor of list is responsible of flags */ first->txd.flags = flags; /* client is in control of this ack */ @@ -872,6 +912,7 @@ atc_dma_cyclic_fill_desc(struct dma_chan *chan, struct at_desc *desc, | ATC_FC_MEM2PER | ATC_SIF(atchan->mem_if) | ATC_DIF(atchan->per_if); + desc->len = period_len; break; case DMA_DEV_TO_MEM: @@ -883,6 +924,7 @@ atc_dma_cyclic_fill_desc(struct dma_chan *chan, struct at_desc *desc, | ATC_FC_PER2MEM | ATC_SIF(atchan->per_if) | ATC_DIF(atchan->mem_if); + desc->len = period_len; break; default: @@ -964,7 +1006,7 @@ atc_prep_dma_cyclic(struct dma_chan *chan, dma_addr_t buf_addr, size_t buf_len, /* First descriptor of the chain embedds additional information */ first->txd.cookie = -EBUSY; - first->len = buf_len; + first->total_len = buf_len; first->tx_width = reg_width; return &first->txd; @@ -1118,7 +1160,7 @@ atc_tx_status(struct dma_chan *chan, spin_lock_irqsave(&atchan->lock, flags); /* Get number of bytes left in the active transactions */ - bytes = atc_get_bytes_left(chan); + bytes = atc_get_bytes_left(chan, cookie); spin_unlock_irqrestore(&atchan->lock, flags); @@ -1214,7 +1256,6 @@ static int atc_alloc_chan_resources(struct dma_chan *chan) spin_lock_irqsave(&atchan->lock, flags); atchan->descs_allocated = i; - atchan->remain_desc = 0; list_splice(&tmp_list, &atchan->free_list); dma_cookie_init(chan); spin_unlock_irqrestore(&atchan->lock, flags); @@ -1257,7 +1298,6 @@ static void atc_free_chan_resources(struct dma_chan *chan) list_splice_init(&atchan->free_list, &list); atchan->descs_allocated = 0; atchan->status = 0; - atchan->remain_desc = 0; dev_vdbg(chan2dev(chan), "free_chan_resources: done\n"); } diff --git a/drivers/dma/at_hdmac_regs.h b/drivers/dma/at_hdmac_regs.h index d6bba6c636c2..2727ca560572 100644 --- a/drivers/dma/at_hdmac_regs.h +++ b/drivers/dma/at_hdmac_regs.h @@ -181,8 +181,9 @@ struct at_lli { * @at_lli: hardware lli structure * @txd: support for the async_tx api * @desc_node: node on the channed descriptors list - * @len: total transaction bytecount + * @len: descriptor byte count * @tx_width: transfer width + * @total_len: total transaction byte count */ struct at_desc { /* FIRST values the hardware uses */ @@ -194,6 +195,7 @@ struct at_desc { struct list_head desc_node; size_t len; u32 tx_width; + size_t total_len; }; static inline struct at_desc * @@ -213,7 +215,6 @@ txd_to_at_desc(struct dma_async_tx_descriptor *txd) enum atc_status { ATC_IS_ERROR = 0, ATC_IS_PAUSED = 1, - ATC_IS_BTC = 2, ATC_IS_CYCLIC = 24, }; @@ -231,7 +232,6 @@ enum atc_status { * @save_cfg: configuration register that is saved on suspend/resume cycle * @save_dscr: for cyclic operations, preserve next descriptor address in * the cyclic list on suspend/resume cycle - * @remain_desc: to save remain desc length * @dma_sconfig: configuration for slave transfers, passed via * .device_config * @lock: serializes enqueue/dequeue operations to descriptors lists @@ -251,7 +251,6 @@ struct at_dma_chan { struct tasklet_struct tasklet; u32 save_cfg; u32 save_dscr; - u32 remain_desc; struct dma_slave_config dma_sconfig; spinlock_t lock; diff --git a/drivers/dma/at_xdmac.c b/drivers/dma/at_xdmac.c index 09e2825a547a..d9891d3461f6 100644 --- a/drivers/dma/at_xdmac.c +++ b/drivers/dma/at_xdmac.c @@ -664,7 +664,6 @@ at_xdmac_prep_dma_cyclic(struct dma_chan *chan, dma_addr_t buf_addr, struct at_xdmac_desc *first = NULL, *prev = NULL; unsigned int periods = buf_len / period_len; int i; - u32 cfg; dev_dbg(chan2dev(chan), "%s: buf_addr=%pad, buf_len=%zd, period_len=%zd, dir=%s, flags=0x%lx\n", __func__, &buf_addr, buf_len, period_len, @@ -700,17 +699,17 @@ at_xdmac_prep_dma_cyclic(struct dma_chan *chan, dma_addr_t buf_addr, if (direction == DMA_DEV_TO_MEM) { desc->lld.mbr_sa = atchan->per_src_addr; desc->lld.mbr_da = buf_addr + i * period_len; - cfg = atchan->cfg[AT_XDMAC_DEV_TO_MEM_CFG]; + desc->lld.mbr_cfg = atchan->cfg[AT_XDMAC_DEV_TO_MEM_CFG]; } else { desc->lld.mbr_sa = buf_addr + i * period_len; desc->lld.mbr_da = atchan->per_dst_addr; - cfg = atchan->cfg[AT_XDMAC_MEM_TO_DEV_CFG]; + desc->lld.mbr_cfg = atchan->cfg[AT_XDMAC_MEM_TO_DEV_CFG]; } desc->lld.mbr_ubc = AT_XDMAC_MBR_UBC_NDV1 | AT_XDMAC_MBR_UBC_NDEN | AT_XDMAC_MBR_UBC_NSEN | AT_XDMAC_MBR_UBC_NDE - | period_len >> at_xdmac_get_dwidth(cfg); + | period_len >> at_xdmac_get_dwidth(desc->lld.mbr_cfg); dev_dbg(chan2dev(chan), "%s: lld: mbr_sa=%pad, mbr_da=%pad, mbr_ubc=0x%08x\n", diff --git a/drivers/dma/bcm2835-dma.c b/drivers/dma/bcm2835-dma.c index 0723096fb50a..c92d6a70ccf3 100644 --- a/drivers/dma/bcm2835-dma.c +++ b/drivers/dma/bcm2835-dma.c @@ -475,6 +475,7 @@ static int bcm2835_dma_terminate_all(struct dma_chan *chan) * c->desc is NULL and exit.) */ if (c->desc) { + bcm2835_dma_desc_free(&c->desc->vd); c->desc = NULL; bcm2835_dma_abort(c->chan_base); diff --git a/drivers/dma/cppi41.c b/drivers/dma/cppi41.c index 512cb8e2805e..ceedafbd23e0 100644 --- a/drivers/dma/cppi41.c +++ b/drivers/dma/cppi41.c @@ -903,6 +903,11 @@ static const struct cppi_glue_infos *get_glue_info(struct device *dev) return of_id->data; } +#define CPPI41_DMA_BUSWIDTHS (BIT(DMA_SLAVE_BUSWIDTH_1_BYTE) | \ + BIT(DMA_SLAVE_BUSWIDTH_2_BYTES) | \ + BIT(DMA_SLAVE_BUSWIDTH_3_BYTES) | \ + BIT(DMA_SLAVE_BUSWIDTH_4_BYTES)) + static int cppi41_dma_probe(struct platform_device *pdev) { struct cppi41_dd *cdd; @@ -926,6 +931,10 @@ static int cppi41_dma_probe(struct platform_device *pdev) cdd->ddev.device_issue_pending = cppi41_dma_issue_pending; cdd->ddev.device_prep_slave_sg = cppi41_dma_prep_slave_sg; cdd->ddev.device_terminate_all = cppi41_stop_chan; + cdd->ddev.directions = BIT(DMA_DEV_TO_MEM) | BIT(DMA_MEM_TO_DEV); + cdd->ddev.src_addr_widths = CPPI41_DMA_BUSWIDTHS; + cdd->ddev.dst_addr_widths = CPPI41_DMA_BUSWIDTHS; + cdd->ddev.residue_granularity = DMA_RESIDUE_GRANULARITY_BURST; cdd->ddev.dev = dev; INIT_LIST_HEAD(&cdd->ddev.channels); cpp41_dma_info.dma_cap = cdd->ddev.cap_mask; diff --git a/drivers/dma/dma-jz4740.c b/drivers/dma/dma-jz4740.c index 4527a3ebeac4..84884418fd30 100644 --- a/drivers/dma/dma-jz4740.c +++ b/drivers/dma/dma-jz4740.c @@ -511,6 +511,9 @@ static void jz4740_dma_desc_free(struct virt_dma_desc *vdesc) kfree(container_of(vdesc, struct jz4740_dma_desc, vdesc)); } +#define JZ4740_DMA_BUSWIDTHS (BIT(DMA_SLAVE_BUSWIDTH_1_BYTE) | \ + BIT(DMA_SLAVE_BUSWIDTH_2_BYTES) | BIT(DMA_SLAVE_BUSWIDTH_4_BYTES)) + static int jz4740_dma_probe(struct platform_device *pdev) { struct jz4740_dmaengine_chan *chan; @@ -548,6 +551,10 @@ static int jz4740_dma_probe(struct platform_device *pdev) dd->device_prep_dma_cyclic = jz4740_dma_prep_dma_cyclic; dd->device_config = jz4740_dma_slave_config; dd->device_terminate_all = jz4740_dma_terminate_all; + dd->src_addr_widths = JZ4740_DMA_BUSWIDTHS; + dd->dst_addr_widths = JZ4740_DMA_BUSWIDTHS; + dd->directions = BIT(DMA_DEV_TO_MEM) | BIT(DMA_MEM_TO_DEV); + dd->residue_granularity = DMA_RESIDUE_GRANULARITY_BURST; dd->dev = &pdev->dev; INIT_LIST_HEAD(&dd->channels); diff --git a/drivers/dma/dmaengine.c b/drivers/dma/dmaengine.c index f15712f2fec6..ac336a961dea 100644 --- a/drivers/dma/dmaengine.c +++ b/drivers/dma/dmaengine.c @@ -859,9 +859,6 @@ int dma_async_device_register(struct dma_device *device) BUG_ON(!device->device_issue_pending); BUG_ON(!device->dev); - WARN(dma_has_cap(DMA_SLAVE, device->cap_mask) && !device->directions, - "this driver doesn't support generic slave capabilities reporting\n"); - /* note: this only matters in the * CONFIG_ASYNC_TX_ENABLE_CHANNEL_SWITCH=n case */ diff --git a/drivers/dma/dw/core.c b/drivers/dma/dw/core.c index 455b7a4f1e87..a8ad05291b27 100644 --- a/drivers/dma/dw/core.c +++ b/drivers/dma/dw/core.c @@ -626,7 +626,7 @@ static irqreturn_t dw_dma_interrupt(int irq, void *dev_id) dev_vdbg(dw->dma.dev, "%s: status=0x%x\n", __func__, status); /* Check if we have any interrupt from the DMAC */ - if (!status) + if (!status || !dw->in_use) return IRQ_NONE; /* diff --git a/drivers/dma/dw/platform.c b/drivers/dma/dw/platform.c index 6565a361e7e5..b2c3ae071429 100644 --- a/drivers/dma/dw/platform.c +++ b/drivers/dma/dw/platform.c @@ -26,6 +26,8 @@ #include "internal.h" +#define DRV_NAME "dw_dmac" + static struct dma_chan *dw_dma_of_xlate(struct of_phandle_args *dma_spec, struct of_dma *ofdma) { @@ -284,7 +286,7 @@ static struct platform_driver dw_driver = { .remove = dw_remove, .shutdown = dw_shutdown, .driver = { - .name = "dw_dmac", + .name = DRV_NAME, .pm = &dw_dev_pm_ops, .of_match_table = of_match_ptr(dw_dma_of_id_table), .acpi_match_table = ACPI_PTR(dw_dma_acpi_id_table), @@ -305,3 +307,4 @@ module_exit(dw_exit); MODULE_LICENSE("GPL v2"); MODULE_DESCRIPTION("Synopsys DesignWare DMA Controller platform driver"); +MODULE_ALIAS("platform:" DRV_NAME); diff --git a/drivers/dma/edma.c b/drivers/dma/edma.c index 276157f22612..53dbd3b3384c 100644 --- a/drivers/dma/edma.c +++ b/drivers/dma/edma.c @@ -260,6 +260,13 @@ static int edma_terminate_all(struct dma_chan *chan) */ if (echan->edesc) { int cyclic = echan->edesc->cyclic; + + /* + * free the running request descriptor + * since it is not in any of the vdesc lists + */ + edma_desc_free(&echan->edesc->vdesc); + echan->edesc = NULL; edma_stop(echan->ch_num); /* Move the cyclic channel back to default queue */ diff --git a/drivers/dma/imx-sdma.c b/drivers/dma/imx-sdma.c index 18c0a131e4e4..66a0efb9651d 100644 --- a/drivers/dma/imx-sdma.c +++ b/drivers/dma/imx-sdma.c @@ -531,6 +531,10 @@ static int sdma_run_channel0(struct sdma_engine *sdma) dev_err(sdma->dev, "Timeout waiting for CH0 ready\n"); } + /* Set bits of CONFIG register with dynamic context switching */ + if (readl(sdma->regs + SDMA_H_CONFIG) == 0) + writel_relaxed(SDMA_H_CONFIG_CSM, sdma->regs + SDMA_H_CONFIG); + return ret ? 0 : -ETIMEDOUT; } @@ -1394,9 +1398,6 @@ static int sdma_init(struct sdma_engine *sdma) writel_relaxed(ccb_phys, sdma->regs + SDMA_H_C0PTR); - /* Set bits of CONFIG register with given context switching mode */ - writel_relaxed(SDMA_H_CONFIG_CSM, sdma->regs + SDMA_H_CONFIG); - /* Initializes channel's priorities */ sdma_set_channel_priority(&sdma->channel[0], 7); diff --git a/drivers/dma/ioat/dma_v3.c b/drivers/dma/ioat/dma_v3.c index 77a6dcf25b98..194ec20c9408 100644 --- a/drivers/dma/ioat/dma_v3.c +++ b/drivers/dma/ioat/dma_v3.c @@ -230,6 +230,10 @@ static bool is_bwd_noraid(struct pci_dev *pdev) switch (pdev->device) { case PCI_DEVICE_ID_INTEL_IOAT_BWD2: case PCI_DEVICE_ID_INTEL_IOAT_BWD3: + case PCI_DEVICE_ID_INTEL_IOAT_BDXDE0: + case PCI_DEVICE_ID_INTEL_IOAT_BDXDE1: + case PCI_DEVICE_ID_INTEL_IOAT_BDXDE2: + case PCI_DEVICE_ID_INTEL_IOAT_BDXDE3: return true; default: return false; diff --git a/drivers/dma/mmp_pdma.c b/drivers/dma/mmp_pdma.c index 8926f271904e..eb410044e1af 100644 --- a/drivers/dma/mmp_pdma.c +++ b/drivers/dma/mmp_pdma.c @@ -219,6 +219,9 @@ static irqreturn_t mmp_pdma_int_handler(int irq, void *dev_id) while (dint) { i = __ffs(dint); + /* only handle interrupts belonging to pdma driver*/ + if (i >= pdev->dma_channels) + break; dint &= (dint - 1); phy = &pdev->phy[i]; ret = mmp_pdma_chan_handler(irq, phy); @@ -999,6 +1002,9 @@ static int mmp_pdma_probe(struct platform_device *op) struct resource *iores; int i, ret, irq = 0; int dma_channels = 0, irq_num = 0; + const enum dma_slave_buswidth widths = + DMA_SLAVE_BUSWIDTH_1_BYTE | DMA_SLAVE_BUSWIDTH_2_BYTES | + DMA_SLAVE_BUSWIDTH_4_BYTES; pdev = devm_kzalloc(&op->dev, sizeof(*pdev), GFP_KERNEL); if (!pdev) @@ -1066,6 +1072,10 @@ static int mmp_pdma_probe(struct platform_device *op) pdev->device.device_config = mmp_pdma_config; pdev->device.device_terminate_all = mmp_pdma_terminate_all; pdev->device.copy_align = PDMA_ALIGNMENT; + pdev->device.src_addr_widths = widths; + pdev->device.dst_addr_widths = widths; + pdev->device.directions = BIT(DMA_MEM_TO_DEV) | BIT(DMA_DEV_TO_MEM); + pdev->device.residue_granularity = DMA_RESIDUE_GRANULARITY_DESCRIPTOR; if (pdev->dev->coherent_dma_mask) dma_set_mask(pdev->dev, pdev->dev->coherent_dma_mask); diff --git a/drivers/dma/mmp_tdma.c b/drivers/dma/mmp_tdma.c index 70c2fa9963cd..b6f4e1fc9c78 100644 --- a/drivers/dma/mmp_tdma.c +++ b/drivers/dma/mmp_tdma.c @@ -110,7 +110,7 @@ struct mmp_tdma_chan { struct tasklet_struct tasklet; struct mmp_tdma_desc *desc_arr; - phys_addr_t desc_arr_phys; + dma_addr_t desc_arr_phys; int desc_num; enum dma_transfer_direction dir; dma_addr_t dev_addr; @@ -166,9 +166,12 @@ static void mmp_tdma_enable_chan(struct mmp_tdma_chan *tdmac) static int mmp_tdma_disable_chan(struct dma_chan *chan) { struct mmp_tdma_chan *tdmac = to_mmp_tdma_chan(chan); + u32 tdcr; - writel(readl(tdmac->reg_base + TDCR) & ~TDCR_CHANEN, - tdmac->reg_base + TDCR); + tdcr = readl(tdmac->reg_base + TDCR); + tdcr |= TDCR_ABR; + tdcr &= ~TDCR_CHANEN; + writel(tdcr, tdmac->reg_base + TDCR); tdmac->status = DMA_COMPLETE; @@ -296,12 +299,27 @@ static int mmp_tdma_clear_chan_irq(struct mmp_tdma_chan *tdmac) return -EAGAIN; } +static size_t mmp_tdma_get_pos(struct mmp_tdma_chan *tdmac) +{ + size_t reg; + + if (tdmac->idx == 0) { + reg = __raw_readl(tdmac->reg_base + TDSAR); + reg -= tdmac->desc_arr[0].src_addr; + } else if (tdmac->idx == 1) { + reg = __raw_readl(tdmac->reg_base + TDDAR); + reg -= tdmac->desc_arr[0].dst_addr; + } else + return -EINVAL; + + return reg; +} + static irqreturn_t mmp_tdma_chan_handler(int irq, void *dev_id) { struct mmp_tdma_chan *tdmac = dev_id; if (mmp_tdma_clear_chan_irq(tdmac) == 0) { - tdmac->pos = (tdmac->pos + tdmac->period_len) % tdmac->buf_len; tasklet_schedule(&tdmac->tasklet); return IRQ_HANDLED; } else @@ -343,7 +361,7 @@ static void mmp_tdma_free_descriptor(struct mmp_tdma_chan *tdmac) int size = tdmac->desc_num * sizeof(struct mmp_tdma_desc); gpool = tdmac->pool; - if (tdmac->desc_arr) + if (gpool && tdmac->desc_arr) gen_pool_free(gpool, (unsigned long)tdmac->desc_arr, size); tdmac->desc_arr = NULL; @@ -499,6 +517,7 @@ static enum dma_status mmp_tdma_tx_status(struct dma_chan *chan, { struct mmp_tdma_chan *tdmac = to_mmp_tdma_chan(chan); + tdmac->pos = mmp_tdma_get_pos(tdmac); dma_set_tx_state(txstate, chan->completed_cookie, chan->cookie, tdmac->buf_len - tdmac->pos); @@ -610,7 +629,7 @@ static int mmp_tdma_probe(struct platform_device *pdev) int i, ret; int irq = 0, irq_num = 0; int chan_num = TDMA_CHANNEL_NUM; - struct gen_pool *pool; + struct gen_pool *pool = NULL; of_id = of_match_device(mmp_tdma_dt_ids, &pdev->dev); if (of_id) diff --git a/drivers/dma/moxart-dma.c b/drivers/dma/moxart-dma.c index 15cab7d79525..b4634109e010 100644 --- a/drivers/dma/moxart-dma.c +++ b/drivers/dma/moxart-dma.c @@ -193,8 +193,10 @@ static int moxart_terminate_all(struct dma_chan *chan) spin_lock_irqsave(&ch->vc.lock, flags); - if (ch->desc) + if (ch->desc) { + moxart_dma_desc_free(&ch->desc->vd); ch->desc = NULL; + } ctrl = readl(ch->base + REG_OFF_CTRL); ctrl &= ~(APB_DMA_ENABLE | APB_DMA_FIN_INT_EN | APB_DMA_ERR_INT_EN); diff --git a/drivers/dma/omap-dma.c b/drivers/dma/omap-dma.c index 7dd6dd121681..167dbaf65742 100644 --- a/drivers/dma/omap-dma.c +++ b/drivers/dma/omap-dma.c @@ -981,6 +981,7 @@ static int omap_dma_terminate_all(struct dma_chan *chan) * c->desc is NULL and exit.) */ if (c->desc) { + omap_dma_desc_free(&c->desc->vd); c->desc = NULL; /* Avoid stopping the dma twice */ if (!c->paused) diff --git a/drivers/dma/qcom_bam_dma.c b/drivers/dma/qcom_bam_dma.c index d7a33b3ac466..9c914d625906 100644 --- a/drivers/dma/qcom_bam_dma.c +++ b/drivers/dma/qcom_bam_dma.c @@ -162,9 +162,9 @@ static const struct reg_offset_data bam_v1_4_reg_info[] = { [BAM_P_IRQ_STTS] = { 0x1010, 0x1000, 0x00, 0x00 }, [BAM_P_IRQ_CLR] = { 0x1014, 0x1000, 0x00, 0x00 }, [BAM_P_IRQ_EN] = { 0x1018, 0x1000, 0x00, 0x00 }, - [BAM_P_EVNT_DEST_ADDR] = { 0x102C, 0x00, 0x1000, 0x00 }, - [BAM_P_EVNT_REG] = { 0x1018, 0x00, 0x1000, 0x00 }, - [BAM_P_SW_OFSTS] = { 0x1000, 0x00, 0x1000, 0x00 }, + [BAM_P_EVNT_DEST_ADDR] = { 0x182C, 0x00, 0x1000, 0x00 }, + [BAM_P_EVNT_REG] = { 0x1818, 0x00, 0x1000, 0x00 }, + [BAM_P_SW_OFSTS] = { 0x1800, 0x00, 0x1000, 0x00 }, [BAM_P_DATA_FIFO_ADDR] = { 0x1824, 0x00, 0x1000, 0x00 }, [BAM_P_DESC_FIFO_ADDR] = { 0x181C, 0x00, 0x1000, 0x00 }, [BAM_P_EVNT_GEN_TRSHLD] = { 0x1828, 0x00, 0x1000, 0x00 }, @@ -1143,6 +1143,10 @@ static int bam_dma_probe(struct platform_device *pdev) dma_cap_set(DMA_SLAVE, bdev->common.cap_mask); /* initialize dmaengine apis */ + bdev->common.directions = BIT(DMA_DEV_TO_MEM) | BIT(DMA_MEM_TO_DEV); + bdev->common.residue_granularity = DMA_RESIDUE_GRANULARITY_SEGMENT; + bdev->common.src_addr_widths = DMA_SLAVE_BUSWIDTH_4_BYTES; + bdev->common.dst_addr_widths = DMA_SLAVE_BUSWIDTH_4_BYTES; bdev->common.device_alloc_chan_resources = bam_alloc_chan; bdev->common.device_free_chan_resources = bam_free_chan; bdev->common.device_prep_slave_sg = bam_prep_slave_sg; diff --git a/drivers/dma/sh/shdmac.c b/drivers/dma/sh/shdmac.c index b2431aa30033..9f1d4c7dbab8 100644 --- a/drivers/dma/sh/shdmac.c +++ b/drivers/dma/sh/shdmac.c @@ -582,15 +582,12 @@ static void sh_dmae_chan_remove(struct sh_dmae_device *shdev) } } -static void sh_dmae_shutdown(struct platform_device *pdev) -{ - struct sh_dmae_device *shdev = platform_get_drvdata(pdev); - sh_dmae_ctl_stop(shdev); -} - #ifdef CONFIG_PM static int sh_dmae_runtime_suspend(struct device *dev) { + struct sh_dmae_device *shdev = dev_get_drvdata(dev); + + sh_dmae_ctl_stop(shdev); return 0; } @@ -605,6 +602,9 @@ static int sh_dmae_runtime_resume(struct device *dev) #ifdef CONFIG_PM_SLEEP static int sh_dmae_suspend(struct device *dev) { + struct sh_dmae_device *shdev = dev_get_drvdata(dev); + + sh_dmae_ctl_stop(shdev); return 0; } @@ -929,13 +929,12 @@ static int sh_dmae_remove(struct platform_device *pdev) } static struct platform_driver sh_dmae_driver = { - .driver = { + .driver = { .pm = &sh_dmae_pm, .name = SH_DMAE_DRV_NAME, .of_match_table = sh_dmae_of_match, }, .remove = sh_dmae_remove, - .shutdown = sh_dmae_shutdown, }; static int __init sh_dmae_init(void) diff --git a/drivers/firmware/dmi_scan.c b/drivers/firmware/dmi_scan.c index c5f7b4e9eb6c..6e45a43ffe84 100644 --- a/drivers/firmware/dmi_scan.c +++ b/drivers/firmware/dmi_scan.c @@ -17,7 +17,9 @@ */ static const char dmi_empty_string[] = " "; -static u16 __initdata dmi_ver; +static u32 dmi_ver __initdata; +static u32 dmi_len; +static u16 dmi_num; /* * Catch too early calls to dmi_check_system(): */ @@ -78,7 +80,7 @@ static const char * __init dmi_string(const struct dmi_header *dm, u8 s) * We have to be cautious here. We have seen BIOSes with DMI pointers * pointing to completely the wrong place for example */ -static void dmi_table(u8 *buf, int len, int num, +static void dmi_table(u8 *buf, void (*decode)(const struct dmi_header *, void *), void *private_data) { @@ -86,36 +88,38 @@ static void dmi_table(u8 *buf, int len, int num, int i = 0; /* - * Stop when we see all the items the table claimed to have - * OR we run off the end of the table (also happens) + * Stop when we have seen all the items the table claimed to have + * (SMBIOS < 3.0 only) OR we reach an end-of-table marker OR we run + * off the end of the table (should never happen but sometimes does + * on bogus implementations.) */ - while ((i < num) && (data - buf + sizeof(struct dmi_header)) <= len) { + while ((!dmi_num || i < dmi_num) && + (data - buf + sizeof(struct dmi_header)) <= dmi_len) { const struct dmi_header *dm = (const struct dmi_header *)data; /* - * 7.45 End-of-Table (Type 127) [SMBIOS reference spec v3.0.0] - */ - if (dm->type == DMI_ENTRY_END_OF_TABLE) - break; - - /* * We want to know the total length (formatted area and * strings) before decoding to make sure we won't run off the * table in dmi_decode or dmi_string */ data += dm->length; - while ((data - buf < len - 1) && (data[0] || data[1])) + while ((data - buf < dmi_len - 1) && (data[0] || data[1])) data++; - if (data - buf < len - 1) + if (data - buf < dmi_len - 1) decode(dm, private_data); + + /* + * 7.45 End-of-Table (Type 127) [SMBIOS reference spec v3.0.0] + */ + if (dm->type == DMI_ENTRY_END_OF_TABLE) + break; + data += 2; i++; } } static phys_addr_t dmi_base; -static u16 dmi_len; -static u16 dmi_num; static int __init dmi_walk_early(void (*decode)(const struct dmi_header *, void *)) @@ -126,7 +130,7 @@ static int __init dmi_walk_early(void (*decode)(const struct dmi_header *, if (buf == NULL) return -1; - dmi_table(buf, dmi_len, dmi_num, decode, NULL); + dmi_table(buf, decode, NULL); add_device_randomness(buf, dmi_len); @@ -197,7 +201,7 @@ static void __init dmi_save_uuid(const struct dmi_header *dm, int slot, * the UUID are supposed to be little-endian encoded. The specification * says that this is the defacto standard. */ - if (dmi_ver >= 0x0206) + if (dmi_ver >= 0x020600) sprintf(s, "%pUL", d); else sprintf(s, "%pUB", d); @@ -469,7 +473,7 @@ static void __init dmi_format_ids(char *buf, size_t len) */ static int __init dmi_present(const u8 *buf) { - int smbios_ver; + u32 smbios_ver; if (memcmp(buf, "_SM_", 4) == 0 && buf[5] < 32 && dmi_checksum(buf, buf[5])) { @@ -502,14 +506,16 @@ static int __init dmi_present(const u8 *buf) if (dmi_walk_early(dmi_decode) == 0) { if (smbios_ver) { dmi_ver = smbios_ver; - pr_info("SMBIOS %d.%d present.\n", - dmi_ver >> 8, dmi_ver & 0xFF); + pr_info("SMBIOS %d.%d%s present.\n", + dmi_ver >> 8, dmi_ver & 0xFF, + (dmi_ver < 0x0300) ? "" : ".x"); } else { dmi_ver = (buf[14] & 0xF0) << 4 | (buf[14] & 0x0F); pr_info("Legacy DMI %d.%d present.\n", dmi_ver >> 8, dmi_ver & 0xFF); } + dmi_ver <<= 8; dmi_format_ids(dmi_ids_string, sizeof(dmi_ids_string)); printk(KERN_DEBUG "DMI: %s\n", dmi_ids_string); return 0; @@ -527,25 +533,16 @@ static int __init dmi_smbios3_present(const u8 *buf) { if (memcmp(buf, "_SM3_", 5) == 0 && buf[6] < 32 && dmi_checksum(buf, buf[6])) { - dmi_ver = get_unaligned_be16(buf + 7); + dmi_ver = get_unaligned_be32(buf + 6); + dmi_ver &= 0xFFFFFF; + dmi_num = 0; /* No longer specified */ dmi_len = get_unaligned_le32(buf + 12); dmi_base = get_unaligned_le64(buf + 16); - /* - * The 64-bit SMBIOS 3.0 entry point no longer has a field - * containing the number of structures present in the table. - * Instead, it defines the table size as a maximum size, and - * relies on the end-of-table structure type (#127) to be used - * to signal the end of the table. - * So let's define dmi_num as an upper bound as well: each - * structure has a 4 byte header, so dmi_len / 4 is an upper - * bound for the number of structures in the table. - */ - dmi_num = dmi_len / 4; - if (dmi_walk_early(dmi_decode) == 0) { - pr_info("SMBIOS %d.%d present.\n", - dmi_ver >> 8, dmi_ver & 0xFF); + pr_info("SMBIOS %d.%d.%d present.\n", + dmi_ver >> 16, (dmi_ver >> 8) & 0xFF, + dmi_ver & 0xFF); dmi_format_ids(dmi_ids_string, sizeof(dmi_ids_string)); pr_debug("DMI: %s\n", dmi_ids_string); return 0; @@ -900,7 +897,7 @@ int dmi_walk(void (*decode)(const struct dmi_header *, void *), if (buf == NULL) return -1; - dmi_table(buf, dmi_len, dmi_num, decode, private_data); + dmi_table(buf, decode, private_data); dmi_unmap(buf); return 0; diff --git a/drivers/firmware/efi/libstub/arm-stub.c b/drivers/firmware/efi/libstub/arm-stub.c index dcae482a9a17..e29560e6b40b 100644 --- a/drivers/firmware/efi/libstub/arm-stub.c +++ b/drivers/firmware/efi/libstub/arm-stub.c @@ -175,7 +175,7 @@ unsigned long efi_entry(void *handle, efi_system_table_t *sys_table, unsigned long initrd_addr; u64 initrd_size = 0; unsigned long fdt_addr = 0; /* Original DTB */ - u64 fdt_size = 0; /* We don't get size from configuration table */ + unsigned long fdt_size = 0; char *cmdline_ptr = NULL; int cmdline_size = 0; unsigned long new_fdt_addr; @@ -239,8 +239,7 @@ unsigned long efi_entry(void *handle, efi_system_table_t *sys_table, } else { status = handle_cmdline_files(sys_table, image, cmdline_ptr, "dtb=", - ~0UL, (unsigned long *)&fdt_addr, - (unsigned long *)&fdt_size); + ~0UL, &fdt_addr, &fdt_size); if (status != EFI_SUCCESS) { pr_efi_err(sys_table, "Failed to load device tree!\n"); @@ -252,7 +251,7 @@ unsigned long efi_entry(void *handle, efi_system_table_t *sys_table, pr_efi(sys_table, "Using DTB from command line\n"); } else { /* Look for a device tree configuration table entry. */ - fdt_addr = (uintptr_t)get_fdt(sys_table); + fdt_addr = (uintptr_t)get_fdt(sys_table, &fdt_size); if (fdt_addr) pr_efi(sys_table, "Using DTB from configuration table\n"); } diff --git a/drivers/firmware/efi/libstub/efi-stub-helper.c b/drivers/firmware/efi/libstub/efi-stub-helper.c index 2fe195002021..f07d4a67fa76 100644 --- a/drivers/firmware/efi/libstub/efi-stub-helper.c +++ b/drivers/firmware/efi/libstub/efi-stub-helper.c @@ -179,12 +179,12 @@ again: start = desc->phys_addr; end = start + desc->num_pages * (1UL << EFI_PAGE_SHIFT); - if ((start + size) > end || (start + size) > max) - continue; - - if (end - size > max) + if (end > max) end = max; + if ((start + size) > end) + continue; + if (round_down(end - size, align) < start) continue; diff --git a/drivers/firmware/efi/libstub/efistub.h b/drivers/firmware/efi/libstub/efistub.h index 47437b16b186..e334a01cf92f 100644 --- a/drivers/firmware/efi/libstub/efistub.h +++ b/drivers/firmware/efi/libstub/efistub.h @@ -41,7 +41,7 @@ efi_status_t allocate_new_fdt_and_exit_boot(efi_system_table_t *sys_table, unsigned long fdt_addr, unsigned long fdt_size); -void *get_fdt(efi_system_table_t *sys_table); +void *get_fdt(efi_system_table_t *sys_table, unsigned long *fdt_size); void efi_get_virtmap(efi_memory_desc_t *memory_map, unsigned long map_size, unsigned long desc_size, efi_memory_desc_t *runtime_map, diff --git a/drivers/firmware/efi/libstub/fdt.c b/drivers/firmware/efi/libstub/fdt.c index 91da56c4fd54..ef5d764e2a27 100644 --- a/drivers/firmware/efi/libstub/fdt.c +++ b/drivers/firmware/efi/libstub/fdt.c @@ -323,7 +323,7 @@ fail: return EFI_LOAD_ERROR; } -void *get_fdt(efi_system_table_t *sys_table) +void *get_fdt(efi_system_table_t *sys_table, unsigned long *fdt_size) { efi_guid_t fdt_guid = DEVICE_TREE_GUID; efi_config_table_t *tables; @@ -336,6 +336,11 @@ void *get_fdt(efi_system_table_t *sys_table) for (i = 0; i < sys_table->nr_tables; i++) if (efi_guidcmp(tables[i].guid, fdt_guid) == 0) { fdt = (void *) tables[i].table; + if (fdt_check_header(fdt) != 0) { + pr_efi_err(sys_table, "Invalid header detected on UEFI supplied FDT, ignoring ...\n"); + return NULL; + } + *fdt_size = fdt_totalsize(fdt); break; } diff --git a/drivers/gpio/gpio-mpc8xxx.c b/drivers/gpio/gpio-mpc8xxx.c index a6952ba343a8..a65b75161aa4 100644 --- a/drivers/gpio/gpio-mpc8xxx.c +++ b/drivers/gpio/gpio-mpc8xxx.c @@ -334,7 +334,7 @@ static struct irq_domain_ops mpc8xxx_gpio_irq_ops = { .xlate = irq_domain_xlate_twocell, }; -static struct of_device_id mpc8xxx_gpio_ids[] __initdata = { +static struct of_device_id mpc8xxx_gpio_ids[] = { { .compatible = "fsl,mpc8349-gpio", }, { .compatible = "fsl,mpc8572-gpio", }, { .compatible = "fsl,mpc8610-gpio", }, diff --git a/drivers/gpio/gpio-syscon.c b/drivers/gpio/gpio-syscon.c index 257e2989215c..045a952576c7 100644 --- a/drivers/gpio/gpio-syscon.c +++ b/drivers/gpio/gpio-syscon.c @@ -219,7 +219,7 @@ static int syscon_gpio_probe(struct platform_device *pdev) ret = of_property_read_u32_index(np, "gpio,syscon-dev", 2, &priv->dir_reg_offset); if (ret) - dev_err(dev, "can't read the dir register offset!\n"); + dev_dbg(dev, "can't read the dir register offset!\n"); priv->dir_reg_offset <<= 3; } diff --git a/drivers/gpio/gpiolib-acpi.c b/drivers/gpio/gpiolib-acpi.c index c0929d938ced..df990f29757a 100644 --- a/drivers/gpio/gpiolib-acpi.c +++ b/drivers/gpio/gpiolib-acpi.c @@ -201,6 +201,10 @@ static acpi_status acpi_gpiochip_request_interrupt(struct acpi_resource *ares, if (!handler) return AE_BAD_PARAMETER; + pin = acpi_gpiochip_pin_to_gpio_offset(chip, pin); + if (pin < 0) + return AE_BAD_PARAMETER; + desc = gpiochip_request_own_desc(chip, pin, "ACPI:Event"); if (IS_ERR(desc)) { dev_err(chip->dev, "Failed to request GPIO\n"); @@ -551,6 +555,12 @@ acpi_gpio_adr_space_handler(u32 function, acpi_physical_address address, struct gpio_desc *desc; bool found; + pin = acpi_gpiochip_pin_to_gpio_offset(chip, pin); + if (pin < 0) { + status = AE_BAD_PARAMETER; + goto out; + } + mutex_lock(&achip->conn_lock); found = false; diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c b/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c index 910ff8ab9c9c..d8135adb2238 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c +++ b/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c @@ -645,6 +645,7 @@ static int create_sdma_queue_nocpsch(struct device_queue_manager *dqm, pr_debug(" sdma queue id: %d\n", q->properties.sdma_queue_id); pr_debug(" sdma engine id: %d\n", q->properties.sdma_engine_id); + init_sdma_vm(dqm, q, qpd); retval = mqd->init_mqd(mqd, &q->mqd, &q->mqd_mem_obj, &q->gart_mqd_addr, &q->properties); if (retval != 0) { @@ -652,7 +653,14 @@ static int create_sdma_queue_nocpsch(struct device_queue_manager *dqm, return retval; } - init_sdma_vm(dqm, q, qpd); + retval = mqd->load_mqd(mqd, q->mqd, 0, + 0, NULL); + if (retval != 0) { + deallocate_sdma_queue(dqm, q->sdma_id); + mqd->uninit_mqd(mqd, q->mqd, q->mqd_mem_obj); + return retval; + } + return 0; } diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_kernel_queue.c b/drivers/gpu/drm/amd/amdkfd/kfd_kernel_queue.c index e415a2a9207e..c7d298e62c96 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_kernel_queue.c +++ b/drivers/gpu/drm/amd/amdkfd/kfd_kernel_queue.c @@ -44,7 +44,7 @@ static bool initialize(struct kernel_queue *kq, struct kfd_dev *dev, BUG_ON(!kq || !dev); BUG_ON(type != KFD_QUEUE_TYPE_DIQ && type != KFD_QUEUE_TYPE_HIQ); - pr_debug("kfd: In func %s initializing queue type %d size %d\n", + pr_debug("amdkfd: In func %s initializing queue type %d size %d\n", __func__, KFD_QUEUE_TYPE_HIQ, queue_size); nop.opcode = IT_NOP; @@ -69,12 +69,16 @@ static bool initialize(struct kernel_queue *kq, struct kfd_dev *dev, prop.doorbell_ptr = kfd_get_kernel_doorbell(dev, &prop.doorbell_off); - if (prop.doorbell_ptr == NULL) + if (prop.doorbell_ptr == NULL) { + pr_err("amdkfd: error init doorbell"); goto err_get_kernel_doorbell; + } retval = kfd_gtt_sa_allocate(dev, queue_size, &kq->pq); - if (retval != 0) + if (retval != 0) { + pr_err("amdkfd: error init pq queues size (%d)\n", queue_size); goto err_pq_allocate_vidmem; + } kq->pq_kernel_addr = kq->pq->cpu_ptr; kq->pq_gpu_addr = kq->pq->gpu_addr; @@ -165,10 +169,8 @@ err_rptr_allocate_vidmem: err_eop_allocate_vidmem: kfd_gtt_sa_free(dev, kq->pq); err_pq_allocate_vidmem: - pr_err("kfd: error init pq\n"); kfd_release_kernel_doorbell(dev, prop.doorbell_ptr); err_get_kernel_doorbell: - pr_err("kfd: error init doorbell"); return false; } @@ -187,6 +189,8 @@ static void uninitialize(struct kernel_queue *kq) else if (kq->queue->properties.type == KFD_QUEUE_TYPE_DIQ) kfd_gtt_sa_free(kq->dev, kq->fence_mem_obj); + kq->mqd->uninit_mqd(kq->mqd, kq->queue->mqd, kq->queue->mqd_mem_obj); + kfd_gtt_sa_free(kq->dev, kq->rptr_mem); kfd_gtt_sa_free(kq->dev, kq->wptr_mem); kq->ops_asic_specific.uninitialize(kq); @@ -211,7 +215,7 @@ static int acquire_packet_buffer(struct kernel_queue *kq, queue_address = (unsigned int *)kq->pq_kernel_addr; queue_size_dwords = kq->queue->properties.queue_size / sizeof(uint32_t); - pr_debug("kfd: In func %s\nrptr: %d\nwptr: %d\nqueue_address 0x%p\n", + pr_debug("amdkfd: In func %s\nrptr: %d\nwptr: %d\nqueue_address 0x%p\n", __func__, rptr, wptr, queue_address); available_size = (rptr - 1 - wptr + queue_size_dwords) % @@ -296,7 +300,7 @@ struct kernel_queue *kernel_queue_init(struct kfd_dev *dev, } if (kq->ops.initialize(kq, dev, type, KFD_KERNEL_QUEUE_SIZE) == false) { - pr_err("kfd: failed to init kernel queue\n"); + pr_err("amdkfd: failed to init kernel queue\n"); kfree(kq); return NULL; } @@ -319,7 +323,7 @@ static __attribute__((unused)) void test_kq(struct kfd_dev *dev) BUG_ON(!dev); - pr_err("kfd: starting kernel queue test\n"); + pr_err("amdkfd: starting kernel queue test\n"); kq = kernel_queue_init(dev, KFD_QUEUE_TYPE_HIQ); BUG_ON(!kq); @@ -330,7 +334,7 @@ static __attribute__((unused)) void test_kq(struct kfd_dev *dev) buffer[i] = kq->nop_packet; kq->ops.submit_packet(kq); - pr_err("kfd: ending kernel queue test\n"); + pr_err("amdkfd: ending kernel queue test\n"); } diff --git a/drivers/gpu/drm/drm_crtc.c b/drivers/gpu/drm/drm_crtc.c index 6b6b07ff720b..b6f076b213bc 100644 --- a/drivers/gpu/drm/drm_crtc.c +++ b/drivers/gpu/drm/drm_crtc.c @@ -43,9 +43,10 @@ #include "drm_crtc_internal.h" #include "drm_internal.h" -static struct drm_framebuffer *add_framebuffer_internal(struct drm_device *dev, - struct drm_mode_fb_cmd2 *r, - struct drm_file *file_priv); +static struct drm_framebuffer * +internal_framebuffer_create(struct drm_device *dev, + struct drm_mode_fb_cmd2 *r, + struct drm_file *file_priv); /* Avoid boilerplate. I'm tired of typing. */ #define DRM_ENUM_NAME_FN(fnname, list) \ @@ -524,17 +525,6 @@ void drm_framebuffer_reference(struct drm_framebuffer *fb) } EXPORT_SYMBOL(drm_framebuffer_reference); -static void drm_framebuffer_free_bug(struct kref *kref) -{ - BUG(); -} - -static void __drm_framebuffer_unreference(struct drm_framebuffer *fb) -{ - DRM_DEBUG("%p: FB ID: %d (%d)\n", fb, fb->base.id, atomic_read(&fb->refcount.refcount)); - kref_put(&fb->refcount, drm_framebuffer_free_bug); -} - /** * drm_framebuffer_unregister_private - unregister a private fb from the lookup idr * @fb: fb to unregister @@ -1319,7 +1309,7 @@ void drm_plane_force_disable(struct drm_plane *plane) return; } /* disconnect the plane from the fb and crtc: */ - __drm_framebuffer_unreference(plane->old_fb); + drm_framebuffer_unreference(plane->old_fb); plane->old_fb = NULL; plane->fb = NULL; plane->crtc = NULL; @@ -2131,7 +2121,7 @@ int drm_mode_getconnector(struct drm_device *dev, void *data, connector = drm_connector_find(dev, out_resp->connector_id); if (!connector) { ret = -ENOENT; - goto out; + goto out_unlock; } for (i = 0; i < DRM_CONNECTOR_MAX_ENCODER; i++) @@ -2211,6 +2201,8 @@ int drm_mode_getconnector(struct drm_device *dev, void *data, out: drm_modeset_unlock(&dev->mode_config.connection_mutex); + +out_unlock: mutex_unlock(&dev->mode_config.mutex); return ret; @@ -2908,13 +2900,11 @@ static int drm_mode_cursor_universal(struct drm_crtc *crtc, */ if (req->flags & DRM_MODE_CURSOR_BO) { if (req->handle) { - fb = add_framebuffer_internal(dev, &fbreq, file_priv); + fb = internal_framebuffer_create(dev, &fbreq, file_priv); if (IS_ERR(fb)) { DRM_DEBUG_KMS("failed to wrap cursor buffer in drm framebuffer\n"); return PTR_ERR(fb); } - - drm_framebuffer_reference(fb); } else { fb = NULL; } @@ -3267,9 +3257,10 @@ static int framebuffer_check(const struct drm_mode_fb_cmd2 *r) return 0; } -static struct drm_framebuffer *add_framebuffer_internal(struct drm_device *dev, - struct drm_mode_fb_cmd2 *r, - struct drm_file *file_priv) +static struct drm_framebuffer * +internal_framebuffer_create(struct drm_device *dev, + struct drm_mode_fb_cmd2 *r, + struct drm_file *file_priv) { struct drm_mode_config *config = &dev->mode_config; struct drm_framebuffer *fb; @@ -3301,12 +3292,6 @@ static struct drm_framebuffer *add_framebuffer_internal(struct drm_device *dev, return fb; } - mutex_lock(&file_priv->fbs_lock); - r->fb_id = fb->base.id; - list_add(&fb->filp_head, &file_priv->fbs); - DRM_DEBUG_KMS("[FB:%d]\n", fb->base.id); - mutex_unlock(&file_priv->fbs_lock); - return fb; } @@ -3328,15 +3313,24 @@ static struct drm_framebuffer *add_framebuffer_internal(struct drm_device *dev, int drm_mode_addfb2(struct drm_device *dev, void *data, struct drm_file *file_priv) { + struct drm_mode_fb_cmd2 *r = data; struct drm_framebuffer *fb; if (!drm_core_check_feature(dev, DRIVER_MODESET)) return -EINVAL; - fb = add_framebuffer_internal(dev, data, file_priv); + fb = internal_framebuffer_create(dev, r, file_priv); if (IS_ERR(fb)) return PTR_ERR(fb); + /* Transfer ownership to the filp for reaping on close */ + + DRM_DEBUG_KMS("[FB:%d]\n", fb->base.id); + mutex_lock(&file_priv->fbs_lock); + r->fb_id = fb->base.id; + list_add(&fb->filp_head, &file_priv->fbs); + mutex_unlock(&file_priv->fbs_lock); + return 0; } diff --git a/drivers/gpu/drm/drm_dp_mst_topology.c b/drivers/gpu/drm/drm_dp_mst_topology.c index 9a5b68717ec8..379ab4555756 100644 --- a/drivers/gpu/drm/drm_dp_mst_topology.c +++ b/drivers/gpu/drm/drm_dp_mst_topology.c @@ -733,10 +733,14 @@ static bool check_txmsg_state(struct drm_dp_mst_topology_mgr *mgr, struct drm_dp_sideband_msg_tx *txmsg) { bool ret; - mutex_lock(&mgr->qlock); + + /* + * All updates to txmsg->state are protected by mgr->qlock, and the two + * cases we check here are terminal states. For those the barriers + * provided by the wake_up/wait_event pair are enough. + */ ret = (txmsg->state == DRM_DP_SIDEBAND_TX_RX || txmsg->state == DRM_DP_SIDEBAND_TX_TIMEOUT); - mutex_unlock(&mgr->qlock); return ret; } @@ -1363,12 +1367,13 @@ static int process_single_tx_qlock(struct drm_dp_mst_topology_mgr *mgr, return 0; } -/* must be called holding qlock */ static void process_single_down_tx_qlock(struct drm_dp_mst_topology_mgr *mgr) { struct drm_dp_sideband_msg_tx *txmsg; int ret; + WARN_ON(!mutex_is_locked(&mgr->qlock)); + /* construct a chunk from the first msg in the tx_msg queue */ if (list_empty(&mgr->tx_msg_downq)) { mgr->tx_down_in_progress = false; diff --git a/drivers/gpu/drm/drm_edid_load.c b/drivers/gpu/drm/drm_edid_load.c index 732cb6f8e653..4c0aa97aaf03 100644 --- a/drivers/gpu/drm/drm_edid_load.c +++ b/drivers/gpu/drm/drm_edid_load.c @@ -287,6 +287,7 @@ int drm_load_edid_firmware(struct drm_connector *connector) drm_mode_connector_update_edid_property(connector, edid); ret = drm_add_edid_modes(connector, edid); + drm_edid_to_eld(connector, edid); kfree(edid); return ret; diff --git a/drivers/gpu/drm/drm_mm.c b/drivers/gpu/drm/drm_mm.c index 04a209e2b66d..1134526286c8 100644 --- a/drivers/gpu/drm/drm_mm.c +++ b/drivers/gpu/drm/drm_mm.c @@ -91,29 +91,29 @@ */ static struct drm_mm_node *drm_mm_search_free_generic(const struct drm_mm *mm, - unsigned long size, + u64 size, unsigned alignment, unsigned long color, enum drm_mm_search_flags flags); static struct drm_mm_node *drm_mm_search_free_in_range_generic(const struct drm_mm *mm, - unsigned long size, + u64 size, unsigned alignment, unsigned long color, - unsigned long start, - unsigned long end, + u64 start, + u64 end, enum drm_mm_search_flags flags); static void drm_mm_insert_helper(struct drm_mm_node *hole_node, struct drm_mm_node *node, - unsigned long size, unsigned alignment, + u64 size, unsigned alignment, unsigned long color, enum drm_mm_allocator_flags flags) { struct drm_mm *mm = hole_node->mm; - unsigned long hole_start = drm_mm_hole_node_start(hole_node); - unsigned long hole_end = drm_mm_hole_node_end(hole_node); - unsigned long adj_start = hole_start; - unsigned long adj_end = hole_end; + u64 hole_start = drm_mm_hole_node_start(hole_node); + u64 hole_end = drm_mm_hole_node_end(hole_node); + u64 adj_start = hole_start; + u64 adj_end = hole_end; BUG_ON(node->allocated); @@ -124,12 +124,15 @@ static void drm_mm_insert_helper(struct drm_mm_node *hole_node, adj_start = adj_end - size; if (alignment) { - unsigned tmp = adj_start % alignment; - if (tmp) { + u64 tmp = adj_start; + unsigned rem; + + rem = do_div(tmp, alignment); + if (rem) { if (flags & DRM_MM_CREATE_TOP) - adj_start -= tmp; + adj_start -= rem; else - adj_start += alignment - tmp; + adj_start += alignment - rem; } } @@ -176,9 +179,9 @@ static void drm_mm_insert_helper(struct drm_mm_node *hole_node, int drm_mm_reserve_node(struct drm_mm *mm, struct drm_mm_node *node) { struct drm_mm_node *hole; - unsigned long end = node->start + node->size; - unsigned long hole_start; - unsigned long hole_end; + u64 end = node->start + node->size; + u64 hole_start; + u64 hole_end; BUG_ON(node == NULL); @@ -227,7 +230,7 @@ EXPORT_SYMBOL(drm_mm_reserve_node); * 0 on success, -ENOSPC if there's no suitable hole. */ int drm_mm_insert_node_generic(struct drm_mm *mm, struct drm_mm_node *node, - unsigned long size, unsigned alignment, + u64 size, unsigned alignment, unsigned long color, enum drm_mm_search_flags sflags, enum drm_mm_allocator_flags aflags) @@ -246,16 +249,16 @@ EXPORT_SYMBOL(drm_mm_insert_node_generic); static void drm_mm_insert_helper_range(struct drm_mm_node *hole_node, struct drm_mm_node *node, - unsigned long size, unsigned alignment, + u64 size, unsigned alignment, unsigned long color, - unsigned long start, unsigned long end, + u64 start, u64 end, enum drm_mm_allocator_flags flags) { struct drm_mm *mm = hole_node->mm; - unsigned long hole_start = drm_mm_hole_node_start(hole_node); - unsigned long hole_end = drm_mm_hole_node_end(hole_node); - unsigned long adj_start = hole_start; - unsigned long adj_end = hole_end; + u64 hole_start = drm_mm_hole_node_start(hole_node); + u64 hole_end = drm_mm_hole_node_end(hole_node); + u64 adj_start = hole_start; + u64 adj_end = hole_end; BUG_ON(!hole_node->hole_follows || node->allocated); @@ -271,12 +274,15 @@ static void drm_mm_insert_helper_range(struct drm_mm_node *hole_node, mm->color_adjust(hole_node, color, &adj_start, &adj_end); if (alignment) { - unsigned tmp = adj_start % alignment; - if (tmp) { + u64 tmp = adj_start; + unsigned rem; + + rem = do_div(tmp, alignment); + if (rem) { if (flags & DRM_MM_CREATE_TOP) - adj_start -= tmp; + adj_start -= rem; else - adj_start += alignment - tmp; + adj_start += alignment - rem; } } @@ -324,9 +330,9 @@ static void drm_mm_insert_helper_range(struct drm_mm_node *hole_node, * 0 on success, -ENOSPC if there's no suitable hole. */ int drm_mm_insert_node_in_range_generic(struct drm_mm *mm, struct drm_mm_node *node, - unsigned long size, unsigned alignment, + u64 size, unsigned alignment, unsigned long color, - unsigned long start, unsigned long end, + u64 start, u64 end, enum drm_mm_search_flags sflags, enum drm_mm_allocator_flags aflags) { @@ -387,32 +393,34 @@ void drm_mm_remove_node(struct drm_mm_node *node) } EXPORT_SYMBOL(drm_mm_remove_node); -static int check_free_hole(unsigned long start, unsigned long end, - unsigned long size, unsigned alignment) +static int check_free_hole(u64 start, u64 end, u64 size, unsigned alignment) { if (end - start < size) return 0; if (alignment) { - unsigned tmp = start % alignment; - if (tmp) - start += alignment - tmp; + u64 tmp = start; + unsigned rem; + + rem = do_div(tmp, alignment); + if (rem) + start += alignment - rem; } return end >= start + size; } static struct drm_mm_node *drm_mm_search_free_generic(const struct drm_mm *mm, - unsigned long size, + u64 size, unsigned alignment, unsigned long color, enum drm_mm_search_flags flags) { struct drm_mm_node *entry; struct drm_mm_node *best; - unsigned long adj_start; - unsigned long adj_end; - unsigned long best_size; + u64 adj_start; + u64 adj_end; + u64 best_size; BUG_ON(mm->scanned_blocks); @@ -421,7 +429,7 @@ static struct drm_mm_node *drm_mm_search_free_generic(const struct drm_mm *mm, __drm_mm_for_each_hole(entry, mm, adj_start, adj_end, flags & DRM_MM_SEARCH_BELOW) { - unsigned long hole_size = adj_end - adj_start; + u64 hole_size = adj_end - adj_start; if (mm->color_adjust) { mm->color_adjust(entry, color, &adj_start, &adj_end); @@ -445,18 +453,18 @@ static struct drm_mm_node *drm_mm_search_free_generic(const struct drm_mm *mm, } static struct drm_mm_node *drm_mm_search_free_in_range_generic(const struct drm_mm *mm, - unsigned long size, + u64 size, unsigned alignment, unsigned long color, - unsigned long start, - unsigned long end, + u64 start, + u64 end, enum drm_mm_search_flags flags) { struct drm_mm_node *entry; struct drm_mm_node *best; - unsigned long adj_start; - unsigned long adj_end; - unsigned long best_size; + u64 adj_start; + u64 adj_end; + u64 best_size; BUG_ON(mm->scanned_blocks); @@ -465,7 +473,7 @@ static struct drm_mm_node *drm_mm_search_free_in_range_generic(const struct drm_ __drm_mm_for_each_hole(entry, mm, adj_start, adj_end, flags & DRM_MM_SEARCH_BELOW) { - unsigned long hole_size = adj_end - adj_start; + u64 hole_size = adj_end - adj_start; if (adj_start < start) adj_start = start; @@ -561,7 +569,7 @@ EXPORT_SYMBOL(drm_mm_replace_node); * adding/removing nodes to/from the scan list are allowed. */ void drm_mm_init_scan(struct drm_mm *mm, - unsigned long size, + u64 size, unsigned alignment, unsigned long color) { @@ -594,11 +602,11 @@ EXPORT_SYMBOL(drm_mm_init_scan); * adding/removing nodes to/from the scan list are allowed. */ void drm_mm_init_scan_with_range(struct drm_mm *mm, - unsigned long size, + u64 size, unsigned alignment, unsigned long color, - unsigned long start, - unsigned long end) + u64 start, + u64 end) { mm->scan_color = color; mm->scan_alignment = alignment; @@ -627,8 +635,8 @@ bool drm_mm_scan_add_block(struct drm_mm_node *node) { struct drm_mm *mm = node->mm; struct drm_mm_node *prev_node; - unsigned long hole_start, hole_end; - unsigned long adj_start, adj_end; + u64 hole_start, hole_end; + u64 adj_start, adj_end; mm->scanned_blocks++; @@ -731,7 +739,7 @@ EXPORT_SYMBOL(drm_mm_clean); * * Note that @mm must be cleared to 0 before calling this function. */ -void drm_mm_init(struct drm_mm * mm, unsigned long start, unsigned long size) +void drm_mm_init(struct drm_mm * mm, u64 start, u64 size) { INIT_LIST_HEAD(&mm->hole_stack); mm->scanned_blocks = 0; @@ -766,18 +774,17 @@ void drm_mm_takedown(struct drm_mm * mm) } EXPORT_SYMBOL(drm_mm_takedown); -static unsigned long drm_mm_debug_hole(struct drm_mm_node *entry, - const char *prefix) +static u64 drm_mm_debug_hole(struct drm_mm_node *entry, + const char *prefix) { - unsigned long hole_start, hole_end, hole_size; + u64 hole_start, hole_end, hole_size; if (entry->hole_follows) { hole_start = drm_mm_hole_node_start(entry); hole_end = drm_mm_hole_node_end(entry); hole_size = hole_end - hole_start; - printk(KERN_DEBUG "%s 0x%08lx-0x%08lx: %8lu: free\n", - prefix, hole_start, hole_end, - hole_size); + pr_debug("%s %#llx-%#llx: %llu: free\n", prefix, hole_start, + hole_end, hole_size); return hole_size; } @@ -792,35 +799,34 @@ static unsigned long drm_mm_debug_hole(struct drm_mm_node *entry, void drm_mm_debug_table(struct drm_mm *mm, const char *prefix) { struct drm_mm_node *entry; - unsigned long total_used = 0, total_free = 0, total = 0; + u64 total_used = 0, total_free = 0, total = 0; total_free += drm_mm_debug_hole(&mm->head_node, prefix); drm_mm_for_each_node(entry, mm) { - printk(KERN_DEBUG "%s 0x%08lx-0x%08lx: %8lu: used\n", - prefix, entry->start, entry->start + entry->size, - entry->size); + pr_debug("%s %#llx-%#llx: %llu: used\n", prefix, entry->start, + entry->start + entry->size, entry->size); total_used += entry->size; total_free += drm_mm_debug_hole(entry, prefix); } total = total_free + total_used; - printk(KERN_DEBUG "%s total: %lu, used %lu free %lu\n", prefix, total, - total_used, total_free); + pr_debug("%s total: %llu, used %llu free %llu\n", prefix, total, + total_used, total_free); } EXPORT_SYMBOL(drm_mm_debug_table); #if defined(CONFIG_DEBUG_FS) -static unsigned long drm_mm_dump_hole(struct seq_file *m, struct drm_mm_node *entry) +static u64 drm_mm_dump_hole(struct seq_file *m, struct drm_mm_node *entry) { - unsigned long hole_start, hole_end, hole_size; + u64 hole_start, hole_end, hole_size; if (entry->hole_follows) { hole_start = drm_mm_hole_node_start(entry); hole_end = drm_mm_hole_node_end(entry); hole_size = hole_end - hole_start; - seq_printf(m, "0x%08lx-0x%08lx: 0x%08lx: free\n", - hole_start, hole_end, hole_size); + seq_printf(m, "%#llx-%#llx: %llu: free\n", hole_start, + hole_end, hole_size); return hole_size; } @@ -835,20 +841,20 @@ static unsigned long drm_mm_dump_hole(struct seq_file *m, struct drm_mm_node *en int drm_mm_dump_table(struct seq_file *m, struct drm_mm *mm) { struct drm_mm_node *entry; - unsigned long total_used = 0, total_free = 0, total = 0; + u64 total_used = 0, total_free = 0, total = 0; total_free += drm_mm_dump_hole(m, &mm->head_node); drm_mm_for_each_node(entry, mm) { - seq_printf(m, "0x%08lx-0x%08lx: 0x%08lx: used\n", - entry->start, entry->start + entry->size, - entry->size); + seq_printf(m, "%#016llx-%#016llx: %llu: used\n", entry->start, + entry->start + entry->size, entry->size); total_used += entry->size; total_free += drm_mm_dump_hole(m, entry); } total = total_free + total_used; - seq_printf(m, "total: %lu, used %lu free %lu\n", total, total_used, total_free); + seq_printf(m, "total: %llu, used %llu free %llu\n", total, + total_used, total_free); return 0; } EXPORT_SYMBOL(drm_mm_dump_table); diff --git a/drivers/gpu/drm/drm_probe_helper.c b/drivers/gpu/drm/drm_probe_helper.c index 6591d48c1b9d..3fee587bc284 100644 --- a/drivers/gpu/drm/drm_probe_helper.c +++ b/drivers/gpu/drm/drm_probe_helper.c @@ -174,6 +174,7 @@ static int drm_helper_probe_single_connector_modes_merge_bits(struct drm_connect struct edid *edid = (struct edid *) connector->edid_blob_ptr->data; count = drm_add_edid_modes(connector, edid); + drm_edid_to_eld(connector, edid); } else count = (*connector_funcs->get_modes)(connector); } diff --git a/drivers/gpu/drm/exynos/Kconfig b/drivers/gpu/drm/exynos/Kconfig index a5e74612100e..0a6780367d28 100644 --- a/drivers/gpu/drm/exynos/Kconfig +++ b/drivers/gpu/drm/exynos/Kconfig @@ -50,7 +50,7 @@ config DRM_EXYNOS_DSI config DRM_EXYNOS_DP bool "EXYNOS DRM DP driver support" - depends on (DRM_EXYNOS_FIMD || DRM_EXYNOS7DECON) && ARCH_EXYNOS && (DRM_PTN3460=n || DRM_PTN3460=y || DRM_PTN3460=DRM_EXYNOS) + depends on (DRM_EXYNOS_FIMD || DRM_EXYNOS7_DECON) && ARCH_EXYNOS && (DRM_PTN3460=n || DRM_PTN3460=y || DRM_PTN3460=DRM_EXYNOS) default DRM_EXYNOS select DRM_PANEL help diff --git a/drivers/gpu/drm/exynos/exynos7_drm_decon.c b/drivers/gpu/drm/exynos/exynos7_drm_decon.c index 63f02e2380ae..970046199608 100644 --- a/drivers/gpu/drm/exynos/exynos7_drm_decon.c +++ b/drivers/gpu/drm/exynos/exynos7_drm_decon.c @@ -888,8 +888,8 @@ static int decon_probe(struct platform_device *pdev) of_node_put(i80_if_timings); ctx->regs = of_iomap(dev->of_node, 0); - if (IS_ERR(ctx->regs)) { - ret = PTR_ERR(ctx->regs); + if (!ctx->regs) { + ret = -ENOMEM; goto err_del_component; } diff --git a/drivers/gpu/drm/exynos/exynos_drm_connector.c b/drivers/gpu/drm/exynos/exynos_drm_connector.c deleted file mode 100644 index ba9b3d5ed672..000000000000 --- a/drivers/gpu/drm/exynos/exynos_drm_connector.c +++ /dev/null @@ -1,245 +0,0 @@ -/* - * Copyright (c) 2011 Samsung Electronics Co., Ltd. - * Authors: - * Inki Dae <inki.dae@samsung.com> - * Joonyoung Shim <jy0922.shim@samsung.com> - * Seung-Woo Kim <sw0312.kim@samsung.com> - * - * This program is free software; you can redistribute it and/or modify it - * under the terms of the GNU General Public License as published by the - * Free Software Foundation; either version 2 of the License, or (at your - * option) any later version. - */ - -#include <drm/drmP.h> -#include <drm/drm_crtc_helper.h> - -#include <drm/exynos_drm.h> -#include "exynos_drm_drv.h" -#include "exynos_drm_encoder.h" -#include "exynos_drm_connector.h" - -#define to_exynos_connector(x) container_of(x, struct exynos_drm_connector,\ - drm_connector) - -struct exynos_drm_connector { - struct drm_connector drm_connector; - uint32_t encoder_id; - struct exynos_drm_display *display; -}; - -static int exynos_drm_connector_get_modes(struct drm_connector *connector) -{ - struct exynos_drm_connector *exynos_connector = - to_exynos_connector(connector); - struct exynos_drm_display *display = exynos_connector->display; - struct edid *edid = NULL; - unsigned int count = 0; - int ret; - - /* - * if get_edid() exists then get_edid() callback of hdmi side - * is called to get edid data through i2c interface else - * get timing from the FIMD driver(display controller). - * - * P.S. in case of lcd panel, count is always 1 if success - * because lcd panel has only one mode. - */ - if (display->ops->get_edid) { - edid = display->ops->get_edid(display, connector); - if (IS_ERR_OR_NULL(edid)) { - ret = PTR_ERR(edid); - edid = NULL; - DRM_ERROR("Panel operation get_edid failed %d\n", ret); - goto out; - } - - count = drm_add_edid_modes(connector, edid); - if (!count) { - DRM_ERROR("Add edid modes failed %d\n", count); - goto out; - } - - drm_mode_connector_update_edid_property(connector, edid); - } else { - struct exynos_drm_panel_info *panel; - struct drm_display_mode *mode = drm_mode_create(connector->dev); - if (!mode) { - DRM_ERROR("failed to create a new display mode.\n"); - return 0; - } - - if (display->ops->get_panel) - panel = display->ops->get_panel(display); - else { - drm_mode_destroy(connector->dev, mode); - return 0; - } - - drm_display_mode_from_videomode(&panel->vm, mode); - mode->width_mm = panel->width_mm; - mode->height_mm = panel->height_mm; - connector->display_info.width_mm = mode->width_mm; - connector->display_info.height_mm = mode->height_mm; - - mode->type = DRM_MODE_TYPE_DRIVER | DRM_MODE_TYPE_PREFERRED; - drm_mode_set_name(mode); - drm_mode_probed_add(connector, mode); - - count = 1; - } - -out: - kfree(edid); - return count; -} - -static int exynos_drm_connector_mode_valid(struct drm_connector *connector, - struct drm_display_mode *mode) -{ - struct exynos_drm_connector *exynos_connector = - to_exynos_connector(connector); - struct exynos_drm_display *display = exynos_connector->display; - int ret = MODE_BAD; - - DRM_DEBUG_KMS("%s\n", __FILE__); - - if (display->ops->check_mode) - if (!display->ops->check_mode(display, mode)) - ret = MODE_OK; - - return ret; -} - -static struct drm_encoder *exynos_drm_best_encoder( - struct drm_connector *connector) -{ - struct drm_device *dev = connector->dev; - struct exynos_drm_connector *exynos_connector = - to_exynos_connector(connector); - return drm_encoder_find(dev, exynos_connector->encoder_id); -} - -static struct drm_connector_helper_funcs exynos_connector_helper_funcs = { - .get_modes = exynos_drm_connector_get_modes, - .mode_valid = exynos_drm_connector_mode_valid, - .best_encoder = exynos_drm_best_encoder, -}; - -static int exynos_drm_connector_fill_modes(struct drm_connector *connector, - unsigned int max_width, unsigned int max_height) -{ - struct exynos_drm_connector *exynos_connector = - to_exynos_connector(connector); - struct exynos_drm_display *display = exynos_connector->display; - unsigned int width, height; - - width = max_width; - height = max_height; - - /* - * if specific driver want to find desired_mode using maxmum - * resolution then get max width and height from that driver. - */ - if (display->ops->get_max_resol) - display->ops->get_max_resol(display, &width, &height); - - return drm_helper_probe_single_connector_modes(connector, width, - height); -} - -/* get detection status of display device. */ -static enum drm_connector_status -exynos_drm_connector_detect(struct drm_connector *connector, bool force) -{ - struct exynos_drm_connector *exynos_connector = - to_exynos_connector(connector); - struct exynos_drm_display *display = exynos_connector->display; - enum drm_connector_status status = connector_status_disconnected; - - if (display->ops->is_connected) { - if (display->ops->is_connected(display)) - status = connector_status_connected; - else - status = connector_status_disconnected; - } - - return status; -} - -static void exynos_drm_connector_destroy(struct drm_connector *connector) -{ - struct exynos_drm_connector *exynos_connector = - to_exynos_connector(connector); - - drm_connector_unregister(connector); - drm_connector_cleanup(connector); - kfree(exynos_connector); -} - -static struct drm_connector_funcs exynos_connector_funcs = { - .dpms = drm_helper_connector_dpms, - .fill_modes = exynos_drm_connector_fill_modes, - .detect = exynos_drm_connector_detect, - .destroy = exynos_drm_connector_destroy, -}; - -struct drm_connector *exynos_drm_connector_create(struct drm_device *dev, - struct drm_encoder *encoder) -{ - struct exynos_drm_connector *exynos_connector; - struct exynos_drm_display *display = exynos_drm_get_display(encoder); - struct drm_connector *connector; - int type; - int err; - - exynos_connector = kzalloc(sizeof(*exynos_connector), GFP_KERNEL); - if (!exynos_connector) - return NULL; - - connector = &exynos_connector->drm_connector; - - switch (display->type) { - case EXYNOS_DISPLAY_TYPE_HDMI: - type = DRM_MODE_CONNECTOR_HDMIA; - connector->interlace_allowed = true; - connector->polled = DRM_CONNECTOR_POLL_HPD; - break; - case EXYNOS_DISPLAY_TYPE_VIDI: - type = DRM_MODE_CONNECTOR_VIRTUAL; - connector->polled = DRM_CONNECTOR_POLL_HPD; - break; - default: - type = DRM_MODE_CONNECTOR_Unknown; - break; - } - - drm_connector_init(dev, connector, &exynos_connector_funcs, type); - drm_connector_helper_add(connector, &exynos_connector_helper_funcs); - - err = drm_connector_register(connector); - if (err) - goto err_connector; - - exynos_connector->encoder_id = encoder->base.id; - exynos_connector->display = display; - connector->dpms = DRM_MODE_DPMS_OFF; - connector->encoder = encoder; - - err = drm_mode_connector_attach_encoder(connector, encoder); - if (err) { - DRM_ERROR("failed to attach a connector to a encoder\n"); - goto err_sysfs; - } - - DRM_DEBUG_KMS("connector has been created\n"); - - return connector; - -err_sysfs: - drm_connector_unregister(connector); -err_connector: - drm_connector_cleanup(connector); - kfree(exynos_connector); - return NULL; -} diff --git a/drivers/gpu/drm/exynos/exynos_drm_connector.h b/drivers/gpu/drm/exynos/exynos_drm_connector.h deleted file mode 100644 index 4eb20d78379a..000000000000 --- a/drivers/gpu/drm/exynos/exynos_drm_connector.h +++ /dev/null @@ -1,20 +0,0 @@ -/* - * Copyright (c) 2011 Samsung Electronics Co., Ltd. - * Authors: - * Inki Dae <inki.dae@samsung.com> - * Joonyoung Shim <jy0922.shim@samsung.com> - * Seung-Woo Kim <sw0312.kim@samsung.com> - * - * This program is free software; you can redistribute it and/or modify it - * under the terms of the GNU General Public License as published by the - * Free Software Foundation; either version 2 of the License, or (at your - * option) any later version. - */ - -#ifndef _EXYNOS_DRM_CONNECTOR_H_ -#define _EXYNOS_DRM_CONNECTOR_H_ - -struct drm_connector *exynos_drm_connector_create(struct drm_device *dev, - struct drm_encoder *encoder); - -#endif diff --git a/drivers/gpu/drm/exynos/exynos_drm_fimd.c b/drivers/gpu/drm/exynos/exynos_drm_fimd.c index 925fc69af1a0..33a10ce967ea 100644 --- a/drivers/gpu/drm/exynos/exynos_drm_fimd.c +++ b/drivers/gpu/drm/exynos/exynos_drm_fimd.c @@ -147,6 +147,7 @@ struct fimd_win_data { unsigned int ovl_height; unsigned int fb_width; unsigned int fb_height; + unsigned int fb_pitch; unsigned int bpp; unsigned int pixel_format; dma_addr_t dma_addr; @@ -284,14 +285,9 @@ static void fimd_clear_channel(struct fimd_context *ctx) } } -static int fimd_ctx_initialize(struct fimd_context *ctx, +static int fimd_iommu_attach_devices(struct fimd_context *ctx, struct drm_device *drm_dev) { - struct exynos_drm_private *priv; - priv = drm_dev->dev_private; - - ctx->drm_dev = drm_dev; - ctx->pipe = priv->pipe++; /* attach this sub driver to iommu mapping if supported. */ if (is_drm_iommu_supported(ctx->drm_dev)) { @@ -313,7 +309,7 @@ static int fimd_ctx_initialize(struct fimd_context *ctx, return 0; } -static void fimd_ctx_remove(struct fimd_context *ctx) +static void fimd_iommu_detach_devices(struct fimd_context *ctx) { /* detach this sub driver from iommu mapping if supported. */ if (is_drm_iommu_supported(ctx->drm_dev)) @@ -537,13 +533,14 @@ static void fimd_win_mode_set(struct exynos_drm_crtc *crtc, win_data->offset_y = plane->crtc_y; win_data->ovl_width = plane->crtc_width; win_data->ovl_height = plane->crtc_height; + win_data->fb_pitch = plane->pitch; win_data->fb_width = plane->fb_width; win_data->fb_height = plane->fb_height; win_data->dma_addr = plane->dma_addr[0] + offset; win_data->bpp = plane->bpp; win_data->pixel_format = plane->pixel_format; - win_data->buf_offsize = (plane->fb_width - plane->crtc_width) * - (plane->bpp >> 3); + win_data->buf_offsize = + plane->pitch - (plane->crtc_width * (plane->bpp >> 3)); win_data->line_size = plane->crtc_width * (plane->bpp >> 3); DRM_DEBUG_KMS("offset_x = %d, offset_y = %d\n", @@ -709,7 +706,7 @@ static void fimd_win_commit(struct exynos_drm_crtc *crtc, int zpos) writel(val, ctx->regs + VIDWx_BUF_START(win, 0)); /* buffer end address */ - size = win_data->fb_width * win_data->ovl_height * (win_data->bpp >> 3); + size = win_data->fb_pitch * win_data->ovl_height * (win_data->bpp >> 3); val = (unsigned long)(win_data->dma_addr + size); writel(val, ctx->regs + VIDWx_BUF_END(win, 0)); @@ -1056,25 +1053,23 @@ static int fimd_bind(struct device *dev, struct device *master, void *data) { struct fimd_context *ctx = dev_get_drvdata(dev); struct drm_device *drm_dev = data; + struct exynos_drm_private *priv = drm_dev->dev_private; int ret; - ret = fimd_ctx_initialize(ctx, drm_dev); - if (ret) { - DRM_ERROR("fimd_ctx_initialize failed.\n"); - return ret; - } + ctx->drm_dev = drm_dev; + ctx->pipe = priv->pipe++; ctx->crtc = exynos_drm_crtc_create(drm_dev, ctx->pipe, EXYNOS_DISPLAY_TYPE_LCD, &fimd_crtc_ops, ctx); - if (IS_ERR(ctx->crtc)) { - fimd_ctx_remove(ctx); - return PTR_ERR(ctx->crtc); - } if (ctx->display) exynos_drm_create_enc_conn(drm_dev, ctx->display); + ret = fimd_iommu_attach_devices(ctx, drm_dev); + if (ret) + return ret; + return 0; } @@ -1086,10 +1081,10 @@ static void fimd_unbind(struct device *dev, struct device *master, fimd_dpms(ctx->crtc, DRM_MODE_DPMS_OFF); + fimd_iommu_detach_devices(ctx); + if (ctx->display) exynos_dpi_remove(ctx->display); - - fimd_ctx_remove(ctx); } static const struct component_ops fimd_component_ops = { diff --git a/drivers/gpu/drm/exynos/exynos_drm_plane.c b/drivers/gpu/drm/exynos/exynos_drm_plane.c index a5616872eee7..8ad5b7294eb4 100644 --- a/drivers/gpu/drm/exynos/exynos_drm_plane.c +++ b/drivers/gpu/drm/exynos/exynos_drm_plane.c @@ -175,7 +175,7 @@ static int exynos_disable_plane(struct drm_plane *plane) struct exynos_drm_plane *exynos_plane = to_exynos_plane(plane); struct exynos_drm_crtc *exynos_crtc = to_exynos_crtc(plane->crtc); - if (exynos_crtc->ops->win_disable) + if (exynos_crtc && exynos_crtc->ops->win_disable) exynos_crtc->ops->win_disable(exynos_crtc, exynos_plane->zpos); diff --git a/drivers/gpu/drm/exynos/exynos_mixer.c b/drivers/gpu/drm/exynos/exynos_mixer.c index 3518bc4654c5..2e3bc57ea50e 100644 --- a/drivers/gpu/drm/exynos/exynos_mixer.c +++ b/drivers/gpu/drm/exynos/exynos_mixer.c @@ -55,6 +55,7 @@ struct hdmi_win_data { unsigned int fb_x; unsigned int fb_y; unsigned int fb_width; + unsigned int fb_pitch; unsigned int fb_height; unsigned int src_width; unsigned int src_height; @@ -438,7 +439,7 @@ static void vp_video_buffer(struct mixer_context *ctx, int win) } else { luma_addr[0] = win_data->dma_addr; chroma_addr[0] = win_data->dma_addr - + (win_data->fb_width * win_data->fb_height); + + (win_data->fb_pitch * win_data->fb_height); } if (win_data->scan_flags & DRM_MODE_FLAG_INTERLACE) { @@ -447,8 +448,8 @@ static void vp_video_buffer(struct mixer_context *ctx, int win) luma_addr[1] = luma_addr[0] + 0x40; chroma_addr[1] = chroma_addr[0] + 0x40; } else { - luma_addr[1] = luma_addr[0] + win_data->fb_width; - chroma_addr[1] = chroma_addr[0] + win_data->fb_width; + luma_addr[1] = luma_addr[0] + win_data->fb_pitch; + chroma_addr[1] = chroma_addr[0] + win_data->fb_pitch; } } else { ctx->interlace = false; @@ -469,10 +470,10 @@ static void vp_video_buffer(struct mixer_context *ctx, int win) vp_reg_writemask(res, VP_MODE, val, VP_MODE_FMT_MASK); /* setting size of input image */ - vp_reg_write(res, VP_IMG_SIZE_Y, VP_IMG_HSIZE(win_data->fb_width) | + vp_reg_write(res, VP_IMG_SIZE_Y, VP_IMG_HSIZE(win_data->fb_pitch) | VP_IMG_VSIZE(win_data->fb_height)); /* chroma height has to reduced by 2 to avoid chroma distorions */ - vp_reg_write(res, VP_IMG_SIZE_C, VP_IMG_HSIZE(win_data->fb_width) | + vp_reg_write(res, VP_IMG_SIZE_C, VP_IMG_HSIZE(win_data->fb_pitch) | VP_IMG_VSIZE(win_data->fb_height / 2)); vp_reg_write(res, VP_SRC_WIDTH, win_data->src_width); @@ -559,7 +560,7 @@ static void mixer_graph_buffer(struct mixer_context *ctx, int win) /* converting dma address base and source offset */ dma_addr = win_data->dma_addr + (win_data->fb_x * win_data->bpp >> 3) - + (win_data->fb_y * win_data->fb_width * win_data->bpp >> 3); + + (win_data->fb_y * win_data->fb_pitch); src_x_offset = 0; src_y_offset = 0; @@ -576,7 +577,8 @@ static void mixer_graph_buffer(struct mixer_context *ctx, int win) MXR_GRP_CFG_FORMAT_VAL(fmt), MXR_GRP_CFG_FORMAT_MASK); /* setup geometry */ - mixer_reg_write(res, MXR_GRAPHIC_SPAN(win), win_data->fb_width); + mixer_reg_write(res, MXR_GRAPHIC_SPAN(win), + win_data->fb_pitch / (win_data->bpp >> 3)); /* setup display size */ if (ctx->mxr_ver == MXR_VER_128_0_0_184 && @@ -961,6 +963,7 @@ static void mixer_win_mode_set(struct exynos_drm_crtc *crtc, win_data->fb_y = plane->fb_y; win_data->fb_width = plane->fb_width; win_data->fb_height = plane->fb_height; + win_data->fb_pitch = plane->pitch; win_data->src_width = plane->src_width; win_data->src_height = plane->src_height; diff --git a/drivers/gpu/drm/i915/i915_debugfs.c b/drivers/gpu/drm/i915/i915_debugfs.c index 96e811fe24ca..e8b18e542da4 100644 --- a/drivers/gpu/drm/i915/i915_debugfs.c +++ b/drivers/gpu/drm/i915/i915_debugfs.c @@ -152,12 +152,12 @@ describe_obj(struct seq_file *m, struct drm_i915_gem_object *obj) seq_puts(m, " (pp"); else seq_puts(m, " (g"); - seq_printf(m, "gtt offset: %08lx, size: %08lx, type: %u)", + seq_printf(m, "gtt offset: %08llx, size: %08llx, type: %u)", vma->node.start, vma->node.size, vma->ggtt_view.type); } if (obj->stolen) - seq_printf(m, " (stolen: %08lx)", obj->stolen->start); + seq_printf(m, " (stolen: %08llx)", obj->stolen->start); if (obj->pin_mappable || obj->fault_mappable) { char s[3], *t = s; if (obj->pin_mappable) diff --git a/drivers/gpu/drm/i915/i915_drv.c b/drivers/gpu/drm/i915/i915_drv.c index 8039cec71fc2..5c66b568bb81 100644 --- a/drivers/gpu/drm/i915/i915_drv.c +++ b/drivers/gpu/drm/i915/i915_drv.c @@ -622,7 +622,7 @@ static int i915_drm_suspend(struct drm_device *dev) return 0; } -static int i915_drm_suspend_late(struct drm_device *drm_dev) +static int i915_drm_suspend_late(struct drm_device *drm_dev, bool hibernation) { struct drm_i915_private *dev_priv = drm_dev->dev_private; int ret; @@ -636,7 +636,17 @@ static int i915_drm_suspend_late(struct drm_device *drm_dev) } pci_disable_device(drm_dev->pdev); - pci_set_power_state(drm_dev->pdev, PCI_D3hot); + /* + * During hibernation on some GEN4 platforms the BIOS may try to access + * the device even though it's already in D3 and hang the machine. So + * leave the device in D0 on those platforms and hope the BIOS will + * power down the device properly. Platforms where this was seen: + * Lenovo Thinkpad X301, X61s + */ + if (!(hibernation && + drm_dev->pdev->subsystem_vendor == PCI_VENDOR_ID_LENOVO && + INTEL_INFO(dev_priv)->gen == 4)) + pci_set_power_state(drm_dev->pdev, PCI_D3hot); return 0; } @@ -662,7 +672,7 @@ int i915_suspend_legacy(struct drm_device *dev, pm_message_t state) if (error) return error; - return i915_drm_suspend_late(dev); + return i915_drm_suspend_late(dev, false); } static int i915_drm_resume(struct drm_device *dev) @@ -950,7 +960,17 @@ static int i915_pm_suspend_late(struct device *dev) if (drm_dev->switch_power_state == DRM_SWITCH_POWER_OFF) return 0; - return i915_drm_suspend_late(drm_dev); + return i915_drm_suspend_late(drm_dev, false); +} + +static int i915_pm_poweroff_late(struct device *dev) +{ + struct drm_device *drm_dev = dev_to_i915(dev)->dev; + + if (drm_dev->switch_power_state == DRM_SWITCH_POWER_OFF) + return 0; + + return i915_drm_suspend_late(drm_dev, true); } static int i915_pm_resume_early(struct device *dev) @@ -1075,6 +1095,7 @@ static void vlv_save_gunit_s0ix_state(struct drm_i915_private *dev_priv) /* Gunit-Display CZ domain, 0x182028-0x1821CF */ s->gu_ctl0 = I915_READ(VLV_GU_CTL0); s->gu_ctl1 = I915_READ(VLV_GU_CTL1); + s->pcbr = I915_READ(VLV_PCBR); s->clock_gate_dis2 = I915_READ(VLV_GUNIT_CLOCK_GATE2); /* @@ -1169,6 +1190,7 @@ static void vlv_restore_gunit_s0ix_state(struct drm_i915_private *dev_priv) /* Gunit-Display CZ domain, 0x182028-0x1821CF */ I915_WRITE(VLV_GU_CTL0, s->gu_ctl0); I915_WRITE(VLV_GU_CTL1, s->gu_ctl1); + I915_WRITE(VLV_PCBR, s->pcbr); I915_WRITE(VLV_GUNIT_CLOCK_GATE2, s->clock_gate_dis2); } @@ -1177,19 +1199,7 @@ int vlv_force_gfx_clock(struct drm_i915_private *dev_priv, bool force_on) u32 val; int err; - val = I915_READ(VLV_GTLC_SURVIVABILITY_REG); - WARN_ON(!!(val & VLV_GFX_CLK_FORCE_ON_BIT) == force_on); - #define COND (I915_READ(VLV_GTLC_SURVIVABILITY_REG) & VLV_GFX_CLK_STATUS_BIT) - /* Wait for a previous force-off to settle */ - if (force_on) { - err = wait_for(!COND, 20); - if (err) { - DRM_ERROR("timeout waiting for GFX clock force-off (%08x)\n", - I915_READ(VLV_GTLC_SURVIVABILITY_REG)); - return err; - } - } val = I915_READ(VLV_GTLC_SURVIVABILITY_REG); val &= ~VLV_GFX_CLK_FORCE_ON_BIT; @@ -1520,7 +1530,7 @@ static const struct dev_pm_ops i915_pm_ops = { .thaw_early = i915_pm_resume_early, .thaw = i915_pm_resume, .poweroff = i915_pm_suspend, - .poweroff_late = i915_pm_suspend_late, + .poweroff_late = i915_pm_poweroff_late, .restore_early = i915_pm_resume_early, .restore = i915_pm_resume, diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h index 8727086cf48c..b4faa2df9d3d 100644 --- a/drivers/gpu/drm/i915/i915_drv.h +++ b/drivers/gpu/drm/i915/i915_drv.h @@ -1094,6 +1094,7 @@ struct vlv_s0ix_state { /* Display 2 CZ domain */ u32 gu_ctl0; u32 gu_ctl1; + u32 pcbr; u32 clock_gate_dis2; }; diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c index e5daad5f75fb..27ea6bdebce7 100644 --- a/drivers/gpu/drm/i915/i915_gem.c +++ b/drivers/gpu/drm/i915/i915_gem.c @@ -2737,24 +2737,11 @@ i915_gem_retire_requests_ring(struct intel_engine_cs *ring) WARN_ON(i915_verify_lists(ring->dev)); - /* Move any buffers on the active list that are no longer referenced - * by the ringbuffer to the flushing/inactive lists as appropriate, - * before we free the context associated with the requests. + /* Retire requests first as we use it above for the early return. + * If we retire requests last, we may use a later seqno and so clear + * the requests lists without clearing the active list, leading to + * confusion. */ - while (!list_empty(&ring->active_list)) { - struct drm_i915_gem_object *obj; - - obj = list_first_entry(&ring->active_list, - struct drm_i915_gem_object, - ring_list); - - if (!i915_gem_request_completed(obj->last_read_req, true)) - break; - - i915_gem_object_move_to_inactive(obj); - } - - while (!list_empty(&ring->request_list)) { struct drm_i915_gem_request *request; struct intel_ringbuffer *ringbuf; @@ -2789,6 +2776,23 @@ i915_gem_retire_requests_ring(struct intel_engine_cs *ring) i915_gem_free_request(request); } + /* Move any buffers on the active list that are no longer referenced + * by the ringbuffer to the flushing/inactive lists as appropriate, + * before we free the context associated with the requests. + */ + while (!list_empty(&ring->active_list)) { + struct drm_i915_gem_object *obj; + + obj = list_first_entry(&ring->active_list, + struct drm_i915_gem_object, + ring_list); + + if (!i915_gem_request_completed(obj->last_read_req, true)) + break; + + i915_gem_object_move_to_inactive(obj); + } + if (unlikely(ring->trace_irq_req && i915_gem_request_completed(ring->trace_irq_req, true))) { ring->irq_put(ring); @@ -2936,9 +2940,9 @@ i915_gem_wait_ioctl(struct drm_device *dev, void *data, struct drm_file *file) req = obj->last_read_req; /* Do this after OLR check to make sure we make forward progress polling - * on this IOCTL with a timeout <=0 (like busy ioctl) + * on this IOCTL with a timeout == 0 (like busy ioctl) */ - if (args->timeout_ns <= 0) { + if (args->timeout_ns == 0) { ret = -ETIME; goto out; } @@ -2948,7 +2952,8 @@ i915_gem_wait_ioctl(struct drm_device *dev, void *data, struct drm_file *file) i915_gem_request_reference(req); mutex_unlock(&dev->struct_mutex); - ret = __i915_wait_request(req, reset_counter, true, &args->timeout_ns, + ret = __i915_wait_request(req, reset_counter, true, + args->timeout_ns > 0 ? &args->timeout_ns : NULL, file->driver_priv); mutex_lock(&dev->struct_mutex); i915_gem_request_unreference(req); @@ -4792,6 +4797,9 @@ i915_gem_init_hw(struct drm_device *dev) if (INTEL_INFO(dev)->gen < 6 && !intel_enable_gtt()) return -EIO; + /* Double layer security blanket, see i915_gem_init() */ + intel_uncore_forcewake_get(dev_priv, FORCEWAKE_ALL); + if (dev_priv->ellc_size) I915_WRITE(HSW_IDICR, I915_READ(HSW_IDICR) | IDIHASHMSK(0xf)); @@ -4824,7 +4832,7 @@ i915_gem_init_hw(struct drm_device *dev) for_each_ring(ring, dev_priv, i) { ret = ring->init_hw(ring); if (ret) - return ret; + goto out; } for (i = 0; i < NUM_L3_SLICES(dev); i++) @@ -4841,9 +4849,11 @@ i915_gem_init_hw(struct drm_device *dev) DRM_ERROR("Context enable failed %d\n", ret); i915_gem_cleanup_ringbuffer(dev); - return ret; + goto out; } +out: + intel_uncore_forcewake_put(dev_priv, FORCEWAKE_ALL); return ret; } @@ -4877,6 +4887,14 @@ int i915_gem_init(struct drm_device *dev) dev_priv->gt.stop_ring = intel_logical_ring_stop; } + /* This is just a security blanket to placate dragons. + * On some systems, we very sporadically observe that the first TLBs + * used by the CS may be stale, despite us poking the TLB reset. If + * we hold the forcewake during initialisation these problems + * just magically go away. + */ + intel_uncore_forcewake_get(dev_priv, FORCEWAKE_ALL); + ret = i915_gem_init_userptr(dev); if (ret) goto out_unlock; @@ -4903,6 +4921,7 @@ int i915_gem_init(struct drm_device *dev) } out_unlock: + intel_uncore_forcewake_put(dev_priv, FORCEWAKE_ALL); mutex_unlock(&dev->struct_mutex); return ret; diff --git a/drivers/gpu/drm/i915/i915_gem_execbuffer.c b/drivers/gpu/drm/i915/i915_gem_execbuffer.c index b773368fc62c..38a742532c4f 100644 --- a/drivers/gpu/drm/i915/i915_gem_execbuffer.c +++ b/drivers/gpu/drm/i915/i915_gem_execbuffer.c @@ -1487,7 +1487,7 @@ i915_gem_do_execbuffer(struct drm_device *dev, void *data, goto err; } - if (i915_needs_cmd_parser(ring)) { + if (i915_needs_cmd_parser(ring) && args->batch_len) { batch_obj = i915_gem_execbuffer_parse(ring, &shadow_exec_entry, eb, diff --git a/drivers/gpu/drm/i915/i915_gem_gtt.c b/drivers/gpu/drm/i915/i915_gem_gtt.c index 746f77fb57a3..dccdc8aad2e2 100644 --- a/drivers/gpu/drm/i915/i915_gem_gtt.c +++ b/drivers/gpu/drm/i915/i915_gem_gtt.c @@ -1145,7 +1145,7 @@ static int gen6_ppgtt_init(struct i915_hw_ppgtt *ppgtt) ppgtt->base.clear_range(&ppgtt->base, 0, ppgtt->base.total, true); - DRM_DEBUG_DRIVER("Allocated pde space (%ldM) at GTT entry: %lx\n", + DRM_DEBUG_DRIVER("Allocated pde space (%lldM) at GTT entry: %llx\n", ppgtt->node.size >> 20, ppgtt->node.start / PAGE_SIZE); @@ -1713,8 +1713,8 @@ void i915_gem_gtt_finish_object(struct drm_i915_gem_object *obj) static void i915_gtt_color_adjust(struct drm_mm_node *node, unsigned long color, - unsigned long *start, - unsigned long *end) + u64 *start, + u64 *end) { if (node->color != color) *start += 4096; diff --git a/drivers/gpu/drm/i915/intel_display.c b/drivers/gpu/drm/i915/intel_display.c index 3117679299a6..f75173c20f47 100644 --- a/drivers/gpu/drm/i915/intel_display.c +++ b/drivers/gpu/drm/i915/intel_display.c @@ -37,6 +37,7 @@ #include <drm/i915_drm.h> #include "i915_drv.h" #include "i915_trace.h" +#include <drm/drm_atomic.h> #include <drm/drm_atomic_helper.h> #include <drm/drm_dp_helper.h> #include <drm/drm_crtc_helper.h> @@ -2416,6 +2417,14 @@ out_unref_obj: return false; } +/* Update plane->state->fb to match plane->fb after driver-internal updates */ +static void +update_state_fb(struct drm_plane *plane) +{ + if (plane->fb != plane->state->fb) + drm_atomic_set_fb_for_plane(plane->state, plane->fb); +} + static void intel_find_plane_obj(struct intel_crtc *intel_crtc, struct intel_initial_plane_config *plane_config) @@ -2429,8 +2438,15 @@ intel_find_plane_obj(struct intel_crtc *intel_crtc, if (!intel_crtc->base.primary->fb) return; - if (intel_alloc_plane_obj(intel_crtc, plane_config)) + if (intel_alloc_plane_obj(intel_crtc, plane_config)) { + struct drm_plane *primary = intel_crtc->base.primary; + + primary->state->crtc = &intel_crtc->base; + primary->crtc = &intel_crtc->base; + update_state_fb(primary); + return; + } kfree(intel_crtc->base.primary->fb); intel_crtc->base.primary->fb = NULL; @@ -2453,15 +2469,21 @@ intel_find_plane_obj(struct intel_crtc *intel_crtc, continue; if (i915_gem_obj_ggtt_offset(obj) == plane_config->base) { + struct drm_plane *primary = intel_crtc->base.primary; + if (obj->tiling_mode != I915_TILING_NONE) dev_priv->preserve_bios_swizzle = true; drm_framebuffer_reference(c->primary->fb); - intel_crtc->base.primary->fb = c->primary->fb; + primary->fb = c->primary->fb; + primary->state->crtc = &intel_crtc->base; + primary->crtc = &intel_crtc->base; obj->frontbuffer_bits |= INTEL_FRONTBUFFER_PRIMARY(intel_crtc->pipe); break; } } + + update_state_fb(intel_crtc->base.primary); } static void i9xx_update_primary_plane(struct drm_crtc *crtc, @@ -6602,6 +6624,10 @@ i9xx_get_initial_plane_config(struct intel_crtc *crtc, struct drm_framebuffer *fb; struct intel_framebuffer *intel_fb; + val = I915_READ(DSPCNTR(plane)); + if (!(val & DISPLAY_PLANE_ENABLE)) + return; + intel_fb = kzalloc(sizeof(*intel_fb), GFP_KERNEL); if (!intel_fb) { DRM_DEBUG_KMS("failed to alloc fb\n"); @@ -6610,8 +6636,6 @@ i9xx_get_initial_plane_config(struct intel_crtc *crtc, fb = &intel_fb->base; - val = I915_READ(DSPCNTR(plane)); - if (INTEL_INFO(dev)->gen >= 4) if (val & DISPPLANE_TILED) plane_config->tiling = I915_TILING_X; @@ -7643,6 +7667,9 @@ skylake_get_initial_plane_config(struct intel_crtc *crtc, fb = &intel_fb->base; val = I915_READ(PLANE_CTL(pipe, 0)); + if (!(val & PLANE_CTL_ENABLE)) + goto error; + if (val & PLANE_CTL_TILED_MASK) plane_config->tiling = I915_TILING_X; @@ -7730,6 +7757,10 @@ ironlake_get_initial_plane_config(struct intel_crtc *crtc, struct drm_framebuffer *fb; struct intel_framebuffer *intel_fb; + val = I915_READ(DSPCNTR(pipe)); + if (!(val & DISPLAY_PLANE_ENABLE)) + return; + intel_fb = kzalloc(sizeof(*intel_fb), GFP_KERNEL); if (!intel_fb) { DRM_DEBUG_KMS("failed to alloc fb\n"); @@ -7738,8 +7769,6 @@ ironlake_get_initial_plane_config(struct intel_crtc *crtc, fb = &intel_fb->base; - val = I915_READ(DSPCNTR(pipe)); - if (INTEL_INFO(dev)->gen >= 4) if (val & DISPPLANE_TILED) plane_config->tiling = I915_TILING_X; @@ -8713,6 +8742,7 @@ retry: old->release_fb->funcs->destroy(old->release_fb); goto fail; } + crtc->primary->crtc = crtc; /* let the connector get through one full cycle before testing */ intel_wait_for_vblank(dev, intel_crtc->pipe); @@ -9715,7 +9745,7 @@ void intel_check_page_flip(struct drm_device *dev, int pipe) struct drm_crtc *crtc = dev_priv->pipe_to_crtc_mapping[pipe]; struct intel_crtc *intel_crtc = to_intel_crtc(crtc); - WARN_ON(!in_irq()); + WARN_ON(!in_interrupt()); if (crtc == NULL) return; @@ -9815,6 +9845,7 @@ static int intel_crtc_page_flip(struct drm_crtc *crtc, drm_gem_object_reference(&obj->base); crtc->primary->fb = fb; + update_state_fb(crtc->primary); work->pending_flip_obj = obj; @@ -9883,6 +9914,7 @@ cleanup_unpin: cleanup_pending: atomic_dec(&intel_crtc->unpin_work_count); crtc->primary->fb = old_fb; + update_state_fb(crtc->primary); drm_gem_object_unreference(&work->old_fb_obj->base); drm_gem_object_unreference(&obj->base); mutex_unlock(&dev->struct_mutex); @@ -13717,6 +13749,7 @@ void intel_modeset_gem_init(struct drm_device *dev) to_intel_crtc(c)->pipe); drm_framebuffer_unreference(c->primary->fb); c->primary->fb = NULL; + update_state_fb(c->primary); } } mutex_unlock(&dev->struct_mutex); diff --git a/drivers/gpu/drm/i915/intel_fifo_underrun.c b/drivers/gpu/drm/i915/intel_fifo_underrun.c index 04e248dd2259..54daa66c6970 100644 --- a/drivers/gpu/drm/i915/intel_fifo_underrun.c +++ b/drivers/gpu/drm/i915/intel_fifo_underrun.c @@ -282,16 +282,6 @@ bool intel_set_cpu_fifo_underrun_reporting(struct drm_i915_private *dev_priv, return ret; } -static bool -__cpu_fifo_underrun_reporting_enabled(struct drm_i915_private *dev_priv, - enum pipe pipe) -{ - struct drm_crtc *crtc = dev_priv->pipe_to_crtc_mapping[pipe]; - struct intel_crtc *intel_crtc = to_intel_crtc(crtc); - - return !intel_crtc->cpu_fifo_underrun_disabled; -} - /** * intel_set_pch_fifo_underrun_reporting - set PCH fifo underrun reporting state * @dev_priv: i915 device instance @@ -352,9 +342,15 @@ bool intel_set_pch_fifo_underrun_reporting(struct drm_i915_private *dev_priv, void intel_cpu_fifo_underrun_irq_handler(struct drm_i915_private *dev_priv, enum pipe pipe) { + struct drm_crtc *crtc = dev_priv->pipe_to_crtc_mapping[pipe]; + + /* We may be called too early in init, thanks BIOS! */ + if (crtc == NULL) + return; + /* GMCH can't disable fifo underruns, filter them. */ if (HAS_GMCH_DISPLAY(dev_priv->dev) && - !__cpu_fifo_underrun_reporting_enabled(dev_priv, pipe)) + to_intel_crtc(crtc)->cpu_fifo_underrun_disabled) return; if (intel_set_cpu_fifo_underrun_reporting(dev_priv, pipe, false)) diff --git a/drivers/gpu/drm/i915/intel_sprite.c b/drivers/gpu/drm/i915/intel_sprite.c index 0a52c44ad03d..9c5451c97942 100644 --- a/drivers/gpu/drm/i915/intel_sprite.c +++ b/drivers/gpu/drm/i915/intel_sprite.c @@ -1322,7 +1322,7 @@ int intel_sprite_set_colorkey(struct drm_device *dev, void *data, drm_modeset_lock_all(dev); plane = drm_plane_find(dev, set->plane_id); - if (!plane) { + if (!plane || plane->type != DRM_PLANE_TYPE_OVERLAY) { ret = -ENOENT; goto out_unlock; } @@ -1349,7 +1349,7 @@ int intel_sprite_get_colorkey(struct drm_device *dev, void *data, drm_modeset_lock_all(dev); plane = drm_plane_find(dev, get->plane_id); - if (!plane) { + if (!plane || plane->type != DRM_PLANE_TYPE_OVERLAY) { ret = -ENOENT; goto out_unlock; } diff --git a/drivers/gpu/drm/i915/intel_uncore.c b/drivers/gpu/drm/i915/intel_uncore.c index c47a3baa53d5..4e8fb891d4ea 100644 --- a/drivers/gpu/drm/i915/intel_uncore.c +++ b/drivers/gpu/drm/i915/intel_uncore.c @@ -1048,8 +1048,14 @@ static void intel_uncore_fw_domains_init(struct drm_device *dev) /* We need to init first for ECOBUS access and then * determine later if we want to reinit, in case of MT access is - * not working + * not working. In this stage we don't know which flavour this + * ivb is, so it is better to reset also the gen6 fw registers + * before the ecobus check. */ + + __raw_i915_write32(dev_priv, FORCEWAKE, 0); + __raw_posting_read(dev_priv, ECOBUS); + fw_domain_init(dev_priv, FW_DOMAIN_ID_RENDER, FORCEWAKE_MT, FORCEWAKE_MT_ACK); diff --git a/drivers/gpu/drm/imx/dw_hdmi-imx.c b/drivers/gpu/drm/imx/dw_hdmi-imx.c index 121d30ca2d44..87fe8ed92ebe 100644 --- a/drivers/gpu/drm/imx/dw_hdmi-imx.c +++ b/drivers/gpu/drm/imx/dw_hdmi-imx.c @@ -70,7 +70,9 @@ static const struct dw_hdmi_curr_ctrl imx_cur_ctr[] = { 118800000, { 0x091c, 0x091c, 0x06dc }, }, { 216000000, { 0x06dc, 0x0b5c, 0x091c }, - } + }, { + ~0UL, { 0x0000, 0x0000, 0x0000 }, + }, }; static const struct dw_hdmi_sym_term imx_sym_term[] = { @@ -136,11 +138,34 @@ static struct drm_encoder_funcs dw_hdmi_imx_encoder_funcs = { .destroy = drm_encoder_cleanup, }; +static enum drm_mode_status imx6q_hdmi_mode_valid(struct drm_connector *con, + struct drm_display_mode *mode) +{ + if (mode->clock < 13500) + return MODE_CLOCK_LOW; + if (mode->clock > 266000) + return MODE_CLOCK_HIGH; + + return MODE_OK; +} + +static enum drm_mode_status imx6dl_hdmi_mode_valid(struct drm_connector *con, + struct drm_display_mode *mode) +{ + if (mode->clock < 13500) + return MODE_CLOCK_LOW; + if (mode->clock > 270000) + return MODE_CLOCK_HIGH; + + return MODE_OK; +} + static struct dw_hdmi_plat_data imx6q_hdmi_drv_data = { - .mpll_cfg = imx_mpll_cfg, - .cur_ctr = imx_cur_ctr, - .sym_term = imx_sym_term, - .dev_type = IMX6Q_HDMI, + .mpll_cfg = imx_mpll_cfg, + .cur_ctr = imx_cur_ctr, + .sym_term = imx_sym_term, + .dev_type = IMX6Q_HDMI, + .mode_valid = imx6q_hdmi_mode_valid, }; static struct dw_hdmi_plat_data imx6dl_hdmi_drv_data = { @@ -148,6 +173,7 @@ static struct dw_hdmi_plat_data imx6dl_hdmi_drv_data = { .cur_ctr = imx_cur_ctr, .sym_term = imx_sym_term, .dev_type = IMX6DL_HDMI, + .mode_valid = imx6dl_hdmi_mode_valid, }; static const struct of_device_id dw_hdmi_imx_dt_ids[] = { diff --git a/drivers/gpu/drm/imx/imx-ldb.c b/drivers/gpu/drm/imx/imx-ldb.c index 1b86aac0b341..2d6dc94e1e64 100644 --- a/drivers/gpu/drm/imx/imx-ldb.c +++ b/drivers/gpu/drm/imx/imx-ldb.c @@ -163,22 +163,7 @@ static void imx_ldb_encoder_prepare(struct drm_encoder *encoder) { struct imx_ldb_channel *imx_ldb_ch = enc_to_imx_ldb_ch(encoder); struct imx_ldb *ldb = imx_ldb_ch->ldb; - struct drm_display_mode *mode = &encoder->crtc->hwmode; u32 pixel_fmt; - unsigned long serial_clk; - unsigned long di_clk = mode->clock * 1000; - int mux = imx_drm_encoder_get_mux_id(imx_ldb_ch->child, encoder); - - if (ldb->ldb_ctrl & LDB_SPLIT_MODE_EN) { - /* dual channel LVDS mode */ - serial_clk = 3500UL * mode->clock; - imx_ldb_set_clock(ldb, mux, 0, serial_clk, di_clk); - imx_ldb_set_clock(ldb, mux, 1, serial_clk, di_clk); - } else { - serial_clk = 7000UL * mode->clock; - imx_ldb_set_clock(ldb, mux, imx_ldb_ch->chno, serial_clk, - di_clk); - } switch (imx_ldb_ch->chno) { case 0: @@ -247,6 +232,9 @@ static void imx_ldb_encoder_mode_set(struct drm_encoder *encoder, struct imx_ldb_channel *imx_ldb_ch = enc_to_imx_ldb_ch(encoder); struct imx_ldb *ldb = imx_ldb_ch->ldb; int dual = ldb->ldb_ctrl & LDB_SPLIT_MODE_EN; + unsigned long serial_clk; + unsigned long di_clk = mode->clock * 1000; + int mux = imx_drm_encoder_get_mux_id(imx_ldb_ch->child, encoder); if (mode->clock > 170000) { dev_warn(ldb->dev, @@ -257,6 +245,16 @@ static void imx_ldb_encoder_mode_set(struct drm_encoder *encoder, "%s: mode exceeds 85 MHz pixel clock\n", __func__); } + if (dual) { + serial_clk = 3500UL * mode->clock; + imx_ldb_set_clock(ldb, mux, 0, serial_clk, di_clk); + imx_ldb_set_clock(ldb, mux, 1, serial_clk, di_clk); + } else { + serial_clk = 7000UL * mode->clock; + imx_ldb_set_clock(ldb, mux, imx_ldb_ch->chno, serial_clk, + di_clk); + } + /* FIXME - assumes straight connections DI0 --> CH0, DI1 --> CH1 */ if (imx_ldb_ch == &ldb->channel[0]) { if (mode->flags & DRM_MODE_FLAG_NVSYNC) diff --git a/drivers/gpu/drm/imx/parallel-display.c b/drivers/gpu/drm/imx/parallel-display.c index 5e83e007080f..900dda6a8e71 100644 --- a/drivers/gpu/drm/imx/parallel-display.c +++ b/drivers/gpu/drm/imx/parallel-display.c @@ -236,8 +236,11 @@ static int imx_pd_bind(struct device *dev, struct device *master, void *data) } panel_node = of_parse_phandle(np, "fsl,panel", 0); - if (panel_node) + if (panel_node) { imxpd->panel = of_drm_find_panel(panel_node); + if (!imxpd->panel) + return -EPROBE_DEFER; + } imxpd->dev = dev; diff --git a/drivers/gpu/drm/msm/mdp/mdp4/mdp4_irq.c b/drivers/gpu/drm/msm/mdp/mdp4/mdp4_irq.c index 8edd531cb621..7369ee7f0c55 100644 --- a/drivers/gpu/drm/msm/mdp/mdp4/mdp4_irq.c +++ b/drivers/gpu/drm/msm/mdp/mdp4/mdp4_irq.c @@ -32,7 +32,10 @@ static void mdp4_irq_error_handler(struct mdp_irq *irq, uint32_t irqstatus) void mdp4_irq_preinstall(struct msm_kms *kms) { struct mdp4_kms *mdp4_kms = to_mdp4_kms(to_mdp_kms(kms)); + mdp4_enable(mdp4_kms); mdp4_write(mdp4_kms, REG_MDP4_INTR_CLEAR, 0xffffffff); + mdp4_write(mdp4_kms, REG_MDP4_INTR_ENABLE, 0x00000000); + mdp4_disable(mdp4_kms); } int mdp4_irq_postinstall(struct msm_kms *kms) @@ -53,7 +56,9 @@ int mdp4_irq_postinstall(struct msm_kms *kms) void mdp4_irq_uninstall(struct msm_kms *kms) { struct mdp4_kms *mdp4_kms = to_mdp4_kms(to_mdp_kms(kms)); + mdp4_enable(mdp4_kms); mdp4_write(mdp4_kms, REG_MDP4_INTR_ENABLE, 0x00000000); + mdp4_disable(mdp4_kms); } irqreturn_t mdp4_irq(struct msm_kms *kms) diff --git a/drivers/gpu/drm/msm/mdp/mdp5/mdp5.xml.h b/drivers/gpu/drm/msm/mdp/mdp5/mdp5.xml.h index 09b4a25eb553..c276624290af 100644 --- a/drivers/gpu/drm/msm/mdp/mdp5/mdp5.xml.h +++ b/drivers/gpu/drm/msm/mdp/mdp5/mdp5.xml.h @@ -8,17 +8,9 @@ http://github.com/freedreno/envytools/ git clone https://github.com/freedreno/envytools.git The rules-ng-ng source files this header was generated from are: -- /home/robclark/src/freedreno/envytools/rnndb/msm.xml ( 676 bytes, from 2014-12-05 15:34:49) -- /home/robclark/src/freedreno/envytools/rnndb/freedreno_copyright.xml ( 1453 bytes, from 2013-03-31 16:51:27) -- /home/robclark/src/freedreno/envytools/rnndb/mdp/mdp4.xml ( 20908 bytes, from 2014-12-08 16:13:00) -- /home/robclark/src/freedreno/envytools/rnndb/mdp/mdp_common.xml ( 2357 bytes, from 2014-12-08 16:13:00) -- /home/robclark/src/freedreno/envytools/rnndb/mdp/mdp5.xml ( 27208 bytes, from 2015-01-13 23:56:11) -- /home/robclark/src/freedreno/envytools/rnndb/dsi/dsi.xml ( 11712 bytes, from 2013-08-17 17:13:43) -- /home/robclark/src/freedreno/envytools/rnndb/dsi/sfpb.xml ( 344 bytes, from 2013-08-11 19:26:32) -- /home/robclark/src/freedreno/envytools/rnndb/dsi/mmss_cc.xml ( 1686 bytes, from 2014-10-31 16:48:57) -- /home/robclark/src/freedreno/envytools/rnndb/hdmi/qfprom.xml ( 600 bytes, from 2013-07-05 19:21:12) -- /home/robclark/src/freedreno/envytools/rnndb/hdmi/hdmi.xml ( 26848 bytes, from 2015-01-13 23:55:57) -- /home/robclark/src/freedreno/envytools/rnndb/edp/edp.xml ( 8253 bytes, from 2014-12-08 16:13:00) +- /local/mnt2/workspace2/sviau/envytools/rnndb/mdp/mdp5.xml ( 27229 bytes, from 2015-02-10 17:00:41) +- /local/mnt2/workspace2/sviau/envytools/rnndb/freedreno_copyright.xml ( 1453 bytes, from 2014-06-02 18:31:15) +- /local/mnt2/workspace2/sviau/envytools/rnndb/mdp/mdp_common.xml ( 2357 bytes, from 2015-01-23 16:20:19) Copyright (C) 2013-2015 by the following authors: - Rob Clark <robdclark@gmail.com> (robclark) @@ -910,6 +902,7 @@ static inline uint32_t __offset_LM(uint32_t idx) case 2: return (mdp5_cfg->lm.base[2]); case 3: return (mdp5_cfg->lm.base[3]); case 4: return (mdp5_cfg->lm.base[4]); + case 5: return (mdp5_cfg->lm.base[5]); default: return INVALID_IDX(idx); } } diff --git a/drivers/gpu/drm/msm/mdp/mdp5/mdp5_crtc.c b/drivers/gpu/drm/msm/mdp/mdp5/mdp5_crtc.c index 46fac545dc2b..2f2863cf8b45 100644 --- a/drivers/gpu/drm/msm/mdp/mdp5/mdp5_crtc.c +++ b/drivers/gpu/drm/msm/mdp/mdp5/mdp5_crtc.c @@ -62,8 +62,8 @@ struct mdp5_crtc { /* current cursor being scanned out: */ struct drm_gem_object *scanout_bo; - uint32_t width; - uint32_t height; + uint32_t width, height; + uint32_t x, y; } cursor; }; #define to_mdp5_crtc(x) container_of(x, struct mdp5_crtc, base) @@ -103,8 +103,8 @@ static void crtc_flush_all(struct drm_crtc *crtc) struct drm_plane *plane; uint32_t flush_mask = 0; - /* we could have already released CTL in the disable path: */ - if (!mdp5_crtc->ctl) + /* this should not happen: */ + if (WARN_ON(!mdp5_crtc->ctl)) return; drm_atomic_crtc_for_each_plane(plane, crtc) { @@ -143,6 +143,11 @@ static void complete_flip(struct drm_crtc *crtc, struct drm_file *file) drm_atomic_crtc_for_each_plane(plane, crtc) { mdp5_plane_complete_flip(plane); } + + if (mdp5_crtc->ctl && !crtc->state->enable) { + mdp5_ctl_release(mdp5_crtc->ctl); + mdp5_crtc->ctl = NULL; + } } static void unref_cursor_worker(struct drm_flip_work *work, void *val) @@ -386,14 +391,17 @@ static void mdp5_crtc_atomic_flush(struct drm_crtc *crtc) mdp5_crtc->event = crtc->state->event; spin_unlock_irqrestore(&dev->event_lock, flags); + /* + * If no CTL has been allocated in mdp5_crtc_atomic_check(), + * it means we are trying to flush a CRTC whose state is disabled: + * nothing else needs to be done. + */ + if (unlikely(!mdp5_crtc->ctl)) + return; + blend_setup(crtc); crtc_flush_all(crtc); request_pending(crtc, PENDING_FLIP); - - if (mdp5_crtc->ctl && !crtc->state->enable) { - mdp5_ctl_release(mdp5_crtc->ctl); - mdp5_crtc->ctl = NULL; - } } static int mdp5_crtc_set_property(struct drm_crtc *crtc, @@ -403,6 +411,32 @@ static int mdp5_crtc_set_property(struct drm_crtc *crtc, return -EINVAL; } +static void get_roi(struct drm_crtc *crtc, uint32_t *roi_w, uint32_t *roi_h) +{ + struct mdp5_crtc *mdp5_crtc = to_mdp5_crtc(crtc); + uint32_t xres = crtc->mode.hdisplay; + uint32_t yres = crtc->mode.vdisplay; + + /* + * Cursor Region Of Interest (ROI) is a plane read from cursor + * buffer to render. The ROI region is determined by the visibility of + * the cursor point. In the default Cursor image the cursor point will + * be at the top left of the cursor image, unless it is specified + * otherwise using hotspot feature. + * + * If the cursor point reaches the right (xres - x < cursor.width) or + * bottom (yres - y < cursor.height) boundary of the screen, then ROI + * width and ROI height need to be evaluated to crop the cursor image + * accordingly. + * (xres-x) will be new cursor width when x > (xres - cursor.width) + * (yres-y) will be new cursor height when y > (yres - cursor.height) + */ + *roi_w = min(mdp5_crtc->cursor.width, xres - + mdp5_crtc->cursor.x); + *roi_h = min(mdp5_crtc->cursor.height, yres - + mdp5_crtc->cursor.y); +} + static int mdp5_crtc_cursor_set(struct drm_crtc *crtc, struct drm_file *file, uint32_t handle, uint32_t width, uint32_t height) @@ -416,6 +450,7 @@ static int mdp5_crtc_cursor_set(struct drm_crtc *crtc, unsigned int depth; enum mdp5_cursor_alpha cur_alpha = CURSOR_ALPHA_PER_PIXEL; uint32_t flush_mask = mdp_ctl_flush_mask_cursor(0); + uint32_t roi_w, roi_h; unsigned long flags; if ((width > CURSOR_WIDTH) || (height > CURSOR_HEIGHT)) { @@ -446,6 +481,12 @@ static int mdp5_crtc_cursor_set(struct drm_crtc *crtc, spin_lock_irqsave(&mdp5_crtc->cursor.lock, flags); old_bo = mdp5_crtc->cursor.scanout_bo; + mdp5_crtc->cursor.scanout_bo = cursor_bo; + mdp5_crtc->cursor.width = width; + mdp5_crtc->cursor.height = height; + + get_roi(crtc, &roi_w, &roi_h); + mdp5_write(mdp5_kms, REG_MDP5_LM_CURSOR_STRIDE(lm), stride); mdp5_write(mdp5_kms, REG_MDP5_LM_CURSOR_FORMAT(lm), MDP5_LM_CURSOR_FORMAT_FORMAT(CURSOR_FMT_ARGB8888)); @@ -453,19 +494,14 @@ static int mdp5_crtc_cursor_set(struct drm_crtc *crtc, MDP5_LM_CURSOR_IMG_SIZE_SRC_H(height) | MDP5_LM_CURSOR_IMG_SIZE_SRC_W(width)); mdp5_write(mdp5_kms, REG_MDP5_LM_CURSOR_SIZE(lm), - MDP5_LM_CURSOR_SIZE_ROI_H(height) | - MDP5_LM_CURSOR_SIZE_ROI_W(width)); + MDP5_LM_CURSOR_SIZE_ROI_H(roi_h) | + MDP5_LM_CURSOR_SIZE_ROI_W(roi_w)); mdp5_write(mdp5_kms, REG_MDP5_LM_CURSOR_BASE_ADDR(lm), cursor_addr); - blendcfg = MDP5_LM_CURSOR_BLEND_CONFIG_BLEND_EN; - blendcfg |= MDP5_LM_CURSOR_BLEND_CONFIG_BLEND_TRANSP_EN; blendcfg |= MDP5_LM_CURSOR_BLEND_CONFIG_BLEND_ALPHA_SEL(cur_alpha); mdp5_write(mdp5_kms, REG_MDP5_LM_CURSOR_BLEND_CONFIG(lm), blendcfg); - mdp5_crtc->cursor.scanout_bo = cursor_bo; - mdp5_crtc->cursor.width = width; - mdp5_crtc->cursor.height = height; spin_unlock_irqrestore(&mdp5_crtc->cursor.lock, flags); ret = mdp5_ctl_set_cursor(mdp5_crtc->ctl, true); @@ -489,31 +525,18 @@ static int mdp5_crtc_cursor_move(struct drm_crtc *crtc, int x, int y) struct mdp5_kms *mdp5_kms = get_kms(crtc); struct mdp5_crtc *mdp5_crtc = to_mdp5_crtc(crtc); uint32_t flush_mask = mdp_ctl_flush_mask_cursor(0); - uint32_t xres = crtc->mode.hdisplay; - uint32_t yres = crtc->mode.vdisplay; uint32_t roi_w; uint32_t roi_h; unsigned long flags; - x = (x > 0) ? x : 0; - y = (y > 0) ? y : 0; + /* In case the CRTC is disabled, just drop the cursor update */ + if (unlikely(!crtc->state->enable)) + return 0; - /* - * Cursor Region Of Interest (ROI) is a plane read from cursor - * buffer to render. The ROI region is determined by the visiblity of - * the cursor point. In the default Cursor image the cursor point will - * be at the top left of the cursor image, unless it is specified - * otherwise using hotspot feature. - * - * If the cursor point reaches the right (xres - x < cursor.width) or - * bottom (yres - y < cursor.height) boundary of the screen, then ROI - * width and ROI height need to be evaluated to crop the cursor image - * accordingly. - * (xres-x) will be new cursor width when x > (xres - cursor.width) - * (yres-y) will be new cursor height when y > (yres - cursor.height) - */ - roi_w = min(mdp5_crtc->cursor.width, xres - x); - roi_h = min(mdp5_crtc->cursor.height, yres - y); + mdp5_crtc->cursor.x = x = max(x, 0); + mdp5_crtc->cursor.y = y = max(y, 0); + + get_roi(crtc, &roi_w, &roi_h); spin_lock_irqsave(&mdp5_crtc->cursor.lock, flags); mdp5_write(mdp5_kms, REG_MDP5_LM_CURSOR_SIZE(mdp5_crtc->lm), @@ -544,8 +567,8 @@ static const struct drm_crtc_funcs mdp5_crtc_funcs = { static const struct drm_crtc_helper_funcs mdp5_crtc_helper_funcs = { .mode_fixup = mdp5_crtc_mode_fixup, .mode_set_nofb = mdp5_crtc_mode_set_nofb, - .prepare = mdp5_crtc_disable, - .commit = mdp5_crtc_enable, + .disable = mdp5_crtc_disable, + .enable = mdp5_crtc_enable, .atomic_check = mdp5_crtc_atomic_check, .atomic_begin = mdp5_crtc_atomic_begin, .atomic_flush = mdp5_crtc_atomic_flush, diff --git a/drivers/gpu/drm/msm/mdp/mdp5/mdp5_encoder.c b/drivers/gpu/drm/msm/mdp/mdp5/mdp5_encoder.c index d6a14bb99988..af0e02fa4f48 100644 --- a/drivers/gpu/drm/msm/mdp/mdp5/mdp5_encoder.c +++ b/drivers/gpu/drm/msm/mdp/mdp5/mdp5_encoder.c @@ -267,14 +267,14 @@ static void mdp5_encoder_enable(struct drm_encoder *encoder) mdp5_write(mdp5_kms, REG_MDP5_INTF_TIMING_ENGINE_EN(intf), 1); spin_unlock_irqrestore(&mdp5_encoder->intf_lock, flags); - mdp5_encoder->enabled = false; + mdp5_encoder->enabled = true; } static const struct drm_encoder_helper_funcs mdp5_encoder_helper_funcs = { .mode_fixup = mdp5_encoder_mode_fixup, .mode_set = mdp5_encoder_mode_set, - .prepare = mdp5_encoder_disable, - .commit = mdp5_encoder_enable, + .disable = mdp5_encoder_disable, + .enable = mdp5_encoder_enable, }; /* initialize encoder */ diff --git a/drivers/gpu/drm/msm/mdp/mdp5/mdp5_irq.c b/drivers/gpu/drm/msm/mdp/mdp5/mdp5_irq.c index 70ac81edd40f..a9407105b9b7 100644 --- a/drivers/gpu/drm/msm/mdp/mdp5/mdp5_irq.c +++ b/drivers/gpu/drm/msm/mdp/mdp5/mdp5_irq.c @@ -34,7 +34,10 @@ static void mdp5_irq_error_handler(struct mdp_irq *irq, uint32_t irqstatus) void mdp5_irq_preinstall(struct msm_kms *kms) { struct mdp5_kms *mdp5_kms = to_mdp5_kms(to_mdp_kms(kms)); + mdp5_enable(mdp5_kms); mdp5_write(mdp5_kms, REG_MDP5_INTR_CLEAR, 0xffffffff); + mdp5_write(mdp5_kms, REG_MDP5_INTR_EN, 0x00000000); + mdp5_disable(mdp5_kms); } int mdp5_irq_postinstall(struct msm_kms *kms) @@ -57,7 +60,9 @@ int mdp5_irq_postinstall(struct msm_kms *kms) void mdp5_irq_uninstall(struct msm_kms *kms) { struct mdp5_kms *mdp5_kms = to_mdp5_kms(to_mdp_kms(kms)); + mdp5_enable(mdp5_kms); mdp5_write(mdp5_kms, REG_MDP5_INTR_EN, 0x00000000); + mdp5_disable(mdp5_kms); } static void mdp5_irq_mdp(struct mdp_kms *mdp_kms) diff --git a/drivers/gpu/drm/msm/msm_atomic.c b/drivers/gpu/drm/msm/msm_atomic.c index 871aa2108dc6..18fd643b6e69 100644 --- a/drivers/gpu/drm/msm/msm_atomic.c +++ b/drivers/gpu/drm/msm/msm_atomic.c @@ -219,8 +219,10 @@ int msm_atomic_commit(struct drm_device *dev, * mark our set of crtc's as busy: */ ret = start_atomic(dev->dev_private, c->crtc_mask); - if (ret) + if (ret) { + kfree(c); return ret; + } /* * This is the point of no return - everything below never fails except diff --git a/drivers/gpu/drm/nouveau/nouveau_fbcon.c b/drivers/gpu/drm/nouveau/nouveau_fbcon.c index 79924e4b1b49..6751553abe4a 100644 --- a/drivers/gpu/drm/nouveau/nouveau_fbcon.c +++ b/drivers/gpu/drm/nouveau/nouveau_fbcon.c @@ -418,7 +418,7 @@ nouveau_fbcon_create(struct drm_fb_helper *helper, nouveau_fbcon_zfill(dev, fbcon); /* To allow resizeing without swapping buffers */ - NV_INFO(drm, "allocated %dx%d fb: 0x%lx, bo %p\n", + NV_INFO(drm, "allocated %dx%d fb: 0x%llx, bo %p\n", nouveau_fb->base.width, nouveau_fb->base.height, nvbo->bo.offset, nvbo); diff --git a/drivers/gpu/drm/nouveau/nvkm/engine/device/base.c b/drivers/gpu/drm/nouveau/nvkm/engine/device/base.c index 29bd539af183..6efa8f38ff54 100644 --- a/drivers/gpu/drm/nouveau/nvkm/engine/device/base.c +++ b/drivers/gpu/drm/nouveau/nvkm/engine/device/base.c @@ -340,11 +340,13 @@ nvkm_devobj_ctor(struct nvkm_object *parent, struct nvkm_object *engine, /* switch mmio to cpu's native endianness */ #ifndef __BIG_ENDIAN - if (ioread32_native(map + 0x000004) != 0x00000000) + if (ioread32_native(map + 0x000004) != 0x00000000) { #else - if (ioread32_native(map + 0x000004) == 0x00000000) + if (ioread32_native(map + 0x000004) == 0x00000000) { #endif iowrite32_native(0x01000001, map + 0x000004); + ioread32_native(map); + } /* read boot0 and strapping information */ boot0 = ioread32_native(map + 0x000000); diff --git a/drivers/gpu/drm/nouveau/nvkm/engine/device/gm100.c b/drivers/gpu/drm/nouveau/nvkm/engine/device/gm100.c index 539561ed3281..108d048da764 100644 --- a/drivers/gpu/drm/nouveau/nvkm/engine/device/gm100.c +++ b/drivers/gpu/drm/nouveau/nvkm/engine/device/gm100.c @@ -142,6 +142,49 @@ gm100_identify(struct nvkm_device *device) device->oclass[NVDEV_ENGINE_MSPPP ] = &gf100_msppp_oclass; #endif break; + case 0x126: + device->cname = "GM206"; + device->oclass[NVDEV_SUBDEV_VBIOS ] = &nvkm_bios_oclass; + device->oclass[NVDEV_SUBDEV_GPIO ] = gk104_gpio_oclass; + device->oclass[NVDEV_SUBDEV_I2C ] = gm204_i2c_oclass; + device->oclass[NVDEV_SUBDEV_FUSE ] = &gm107_fuse_oclass; +#if 0 + /* looks to be some non-trivial changes */ + device->oclass[NVDEV_SUBDEV_CLK ] = &gk104_clk_oclass; + /* priv ring says no to 0x10eb14 writes */ + device->oclass[NVDEV_SUBDEV_THERM ] = &gm107_therm_oclass; +#endif + device->oclass[NVDEV_SUBDEV_MXM ] = &nv50_mxm_oclass; + device->oclass[NVDEV_SUBDEV_DEVINIT] = gm204_devinit_oclass; + device->oclass[NVDEV_SUBDEV_MC ] = gk20a_mc_oclass; + device->oclass[NVDEV_SUBDEV_BUS ] = gf100_bus_oclass; + device->oclass[NVDEV_SUBDEV_TIMER ] = &gk20a_timer_oclass; + device->oclass[NVDEV_SUBDEV_FB ] = gm107_fb_oclass; + device->oclass[NVDEV_SUBDEV_LTC ] = gm107_ltc_oclass; + device->oclass[NVDEV_SUBDEV_IBUS ] = &gk104_ibus_oclass; + device->oclass[NVDEV_SUBDEV_INSTMEM] = nv50_instmem_oclass; + device->oclass[NVDEV_SUBDEV_MMU ] = &gf100_mmu_oclass; + device->oclass[NVDEV_SUBDEV_BAR ] = &gf100_bar_oclass; + device->oclass[NVDEV_SUBDEV_PMU ] = gk208_pmu_oclass; +#if 0 + device->oclass[NVDEV_SUBDEV_VOLT ] = &nv40_volt_oclass; +#endif + device->oclass[NVDEV_ENGINE_DMAOBJ ] = gf110_dmaeng_oclass; +#if 0 + device->oclass[NVDEV_ENGINE_FIFO ] = gk208_fifo_oclass; + device->oclass[NVDEV_ENGINE_SW ] = gf100_sw_oclass; + device->oclass[NVDEV_ENGINE_GR ] = gm107_gr_oclass; +#endif + device->oclass[NVDEV_ENGINE_DISP ] = gm204_disp_oclass; +#if 0 + device->oclass[NVDEV_ENGINE_CE0 ] = &gm204_ce0_oclass; + device->oclass[NVDEV_ENGINE_CE1 ] = &gm204_ce1_oclass; + device->oclass[NVDEV_ENGINE_CE2 ] = &gm204_ce2_oclass; + device->oclass[NVDEV_ENGINE_MSVLD ] = &gk104_msvld_oclass; + device->oclass[NVDEV_ENGINE_MSPDEC ] = &gk104_mspdec_oclass; + device->oclass[NVDEV_ENGINE_MSPPP ] = &gf100_msppp_oclass; +#endif + break; default: nv_fatal(device, "unknown Maxwell chipset\n"); return -EINVAL; diff --git a/drivers/gpu/drm/nouveau/nvkm/engine/fifo/nv04.c b/drivers/gpu/drm/nouveau/nvkm/engine/fifo/nv04.c index b038b6eb51db..043e4296084c 100644 --- a/drivers/gpu/drm/nouveau/nvkm/engine/fifo/nv04.c +++ b/drivers/gpu/drm/nouveau/nvkm/engine/fifo/nv04.c @@ -502,72 +502,57 @@ nv04_fifo_intr(struct nvkm_subdev *subdev) { struct nvkm_device *device = nv_device(subdev); struct nv04_fifo_priv *priv = (void *)subdev; - uint32_t status, reassign; - int cnt = 0; + u32 mask = nv_rd32(priv, NV03_PFIFO_INTR_EN_0); + u32 stat = nv_rd32(priv, NV03_PFIFO_INTR_0) & mask; + u32 reassign, chid, get, sem; reassign = nv_rd32(priv, NV03_PFIFO_CACHES) & 1; - while ((status = nv_rd32(priv, NV03_PFIFO_INTR_0)) && (cnt++ < 100)) { - uint32_t chid, get; - - nv_wr32(priv, NV03_PFIFO_CACHES, 0); - - chid = nv_rd32(priv, NV03_PFIFO_CACHE1_PUSH1) & priv->base.max; - get = nv_rd32(priv, NV03_PFIFO_CACHE1_GET); + nv_wr32(priv, NV03_PFIFO_CACHES, 0); - if (status & NV_PFIFO_INTR_CACHE_ERROR) { - nv04_fifo_cache_error(device, priv, chid, get); - status &= ~NV_PFIFO_INTR_CACHE_ERROR; - } + chid = nv_rd32(priv, NV03_PFIFO_CACHE1_PUSH1) & priv->base.max; + get = nv_rd32(priv, NV03_PFIFO_CACHE1_GET); - if (status & NV_PFIFO_INTR_DMA_PUSHER) { - nv04_fifo_dma_pusher(device, priv, chid); - status &= ~NV_PFIFO_INTR_DMA_PUSHER; - } + if (stat & NV_PFIFO_INTR_CACHE_ERROR) { + nv04_fifo_cache_error(device, priv, chid, get); + stat &= ~NV_PFIFO_INTR_CACHE_ERROR; + } - if (status & NV_PFIFO_INTR_SEMAPHORE) { - uint32_t sem; + if (stat & NV_PFIFO_INTR_DMA_PUSHER) { + nv04_fifo_dma_pusher(device, priv, chid); + stat &= ~NV_PFIFO_INTR_DMA_PUSHER; + } - status &= ~NV_PFIFO_INTR_SEMAPHORE; - nv_wr32(priv, NV03_PFIFO_INTR_0, - NV_PFIFO_INTR_SEMAPHORE); + if (stat & NV_PFIFO_INTR_SEMAPHORE) { + stat &= ~NV_PFIFO_INTR_SEMAPHORE; + nv_wr32(priv, NV03_PFIFO_INTR_0, NV_PFIFO_INTR_SEMAPHORE); - sem = nv_rd32(priv, NV10_PFIFO_CACHE1_SEMAPHORE); - nv_wr32(priv, NV10_PFIFO_CACHE1_SEMAPHORE, sem | 0x1); + sem = nv_rd32(priv, NV10_PFIFO_CACHE1_SEMAPHORE); + nv_wr32(priv, NV10_PFIFO_CACHE1_SEMAPHORE, sem | 0x1); - nv_wr32(priv, NV03_PFIFO_CACHE1_GET, get + 4); - nv_wr32(priv, NV04_PFIFO_CACHE1_PULL0, 1); - } + nv_wr32(priv, NV03_PFIFO_CACHE1_GET, get + 4); + nv_wr32(priv, NV04_PFIFO_CACHE1_PULL0, 1); + } - if (device->card_type == NV_50) { - if (status & 0x00000010) { - status &= ~0x00000010; - nv_wr32(priv, 0x002100, 0x00000010); - } - - if (status & 0x40000000) { - nv_wr32(priv, 0x002100, 0x40000000); - nvkm_fifo_uevent(&priv->base); - status &= ~0x40000000; - } + if (device->card_type == NV_50) { + if (stat & 0x00000010) { + stat &= ~0x00000010; + nv_wr32(priv, 0x002100, 0x00000010); } - if (status) { - nv_warn(priv, "unknown intr 0x%08x, ch %d\n", - status, chid); - nv_wr32(priv, NV03_PFIFO_INTR_0, status); - status = 0; + if (stat & 0x40000000) { + nv_wr32(priv, 0x002100, 0x40000000); + nvkm_fifo_uevent(&priv->base); + stat &= ~0x40000000; } - - nv_wr32(priv, NV03_PFIFO_CACHES, reassign); } - if (status) { - nv_error(priv, "still angry after %d spins, halt\n", cnt); - nv_wr32(priv, 0x002140, 0); - nv_wr32(priv, 0x000140, 0); + if (stat) { + nv_warn(priv, "unknown intr 0x%08x\n", stat); + nv_mask(priv, NV03_PFIFO_INTR_EN_0, stat, 0x00000000); + nv_wr32(priv, NV03_PFIFO_INTR_0, stat); } - nv_wr32(priv, 0x000100, 0x00000100); + nv_wr32(priv, NV03_PFIFO_CACHES, reassign); } static int diff --git a/drivers/gpu/drm/nouveau/nvkm/engine/gr/ctxgf100.c b/drivers/gpu/drm/nouveau/nvkm/engine/gr/ctxgf100.c index 2e7ec389eea7..57e2c5b13123 100644 --- a/drivers/gpu/drm/nouveau/nvkm/engine/gr/ctxgf100.c +++ b/drivers/gpu/drm/nouveau/nvkm/engine/gr/ctxgf100.c @@ -1032,9 +1032,9 @@ gf100_grctx_generate_bundle(struct gf100_grctx *info) const int s = 8; const int b = mmio_vram(info, impl->bundle_size, (1 << s), access); mmio_refn(info, 0x408004, 0x00000000, s, b); - mmio_refn(info, 0x408008, 0x80000000 | (impl->bundle_size >> s), 0, b); + mmio_wr32(info, 0x408008, 0x80000000 | (impl->bundle_size >> s)); mmio_refn(info, 0x418808, 0x00000000, s, b); - mmio_refn(info, 0x41880c, 0x80000000 | (impl->bundle_size >> s), 0, b); + mmio_wr32(info, 0x41880c, 0x80000000 | (impl->bundle_size >> s)); } void diff --git a/drivers/gpu/drm/nouveau/nvkm/engine/gr/ctxgk104.c b/drivers/gpu/drm/nouveau/nvkm/engine/gr/ctxgk104.c index b52300d8861a..5e9454ba158f 100644 --- a/drivers/gpu/drm/nouveau/nvkm/engine/gr/ctxgk104.c +++ b/drivers/gpu/drm/nouveau/nvkm/engine/gr/ctxgk104.c @@ -851,9 +851,9 @@ gk104_grctx_generate_bundle(struct gf100_grctx *info) const int s = 8; const int b = mmio_vram(info, impl->bundle_size, (1 << s), access); mmio_refn(info, 0x408004, 0x00000000, s, b); - mmio_refn(info, 0x408008, 0x80000000 | (impl->bundle_size >> s), 0, b); + mmio_wr32(info, 0x408008, 0x80000000 | (impl->bundle_size >> s)); mmio_refn(info, 0x418808, 0x00000000, s, b); - mmio_refn(info, 0x41880c, 0x80000000 | (impl->bundle_size >> s), 0, b); + mmio_wr32(info, 0x41880c, 0x80000000 | (impl->bundle_size >> s)); mmio_wr32(info, 0x4064c8, (state_limit << 16) | token_limit); } diff --git a/drivers/gpu/drm/nouveau/nvkm/engine/gr/ctxgm107.c b/drivers/gpu/drm/nouveau/nvkm/engine/gr/ctxgm107.c index 956f4dce960c..b2fae6e389e2 100644 --- a/drivers/gpu/drm/nouveau/nvkm/engine/gr/ctxgm107.c +++ b/drivers/gpu/drm/nouveau/nvkm/engine/gr/ctxgm107.c @@ -871,9 +871,9 @@ gm107_grctx_generate_bundle(struct gf100_grctx *info) const int s = 8; const int b = mmio_vram(info, impl->bundle_size, (1 << s), access); mmio_refn(info, 0x408004, 0x00000000, s, b); - mmio_refn(info, 0x408008, 0x80000000 | (impl->bundle_size >> s), 0, b); + mmio_wr32(info, 0x408008, 0x80000000 | (impl->bundle_size >> s)); mmio_refn(info, 0x418e24, 0x00000000, s, b); - mmio_refn(info, 0x418e28, 0x80000000 | (impl->bundle_size >> s), 0, b); + mmio_wr32(info, 0x418e28, 0x80000000 | (impl->bundle_size >> s)); mmio_wr32(info, 0x4064c8, (state_limit << 16) | token_limit); } diff --git a/drivers/gpu/drm/nouveau/nvkm/subdev/bios/i2c.c b/drivers/gpu/drm/nouveau/nvkm/subdev/bios/i2c.c index d1a89b2bd5c1..c4e1f085ee10 100644 --- a/drivers/gpu/drm/nouveau/nvkm/subdev/bios/i2c.c +++ b/drivers/gpu/drm/nouveau/nvkm/subdev/bios/i2c.c @@ -74,7 +74,11 @@ dcb_i2c_parse(struct nvkm_bios *bios, u8 idx, struct dcb_i2c_entry *info) u16 ent = dcb_i2c_entry(bios, idx, &ver, &len); if (ent) { if (ver >= 0x41) { - if (!(nv_ro32(bios, ent) & 0x80000000)) + u32 ent_value = nv_ro32(bios, ent); + u8 i2c_port = (ent_value >> 27) & 0x1f; + u8 dpaux_port = (ent_value >> 22) & 0x1f; + /* value 0x1f means unused according to DCB 4.x spec */ + if (i2c_port == 0x1f && dpaux_port == 0x1f) info->type = DCB_I2C_UNUSED; else info->type = DCB_I2C_PMGR; diff --git a/drivers/gpu/drm/radeon/atombios_crtc.c b/drivers/gpu/drm/radeon/atombios_crtc.c index ed644a4f6f57..86807ee91bd1 100644 --- a/drivers/gpu/drm/radeon/atombios_crtc.c +++ b/drivers/gpu/drm/radeon/atombios_crtc.c @@ -1405,6 +1405,9 @@ static int dce4_crtc_do_set_base(struct drm_crtc *crtc, (x << 16) | y); viewport_w = crtc->mode.hdisplay; viewport_h = (crtc->mode.vdisplay + 1) & ~1; + if ((rdev->family >= CHIP_BONAIRE) && + (crtc->mode.flags & DRM_MODE_FLAG_INTERLACE)) + viewport_h *= 2; WREG32(EVERGREEN_VIEWPORT_SIZE + radeon_crtc->crtc_offset, (viewport_w << 16) | viewport_h); diff --git a/drivers/gpu/drm/radeon/atombios_encoders.c b/drivers/gpu/drm/radeon/atombios_encoders.c index 7fe7b749e182..c39c1d0d9d4e 100644 --- a/drivers/gpu/drm/radeon/atombios_encoders.c +++ b/drivers/gpu/drm/radeon/atombios_encoders.c @@ -1626,7 +1626,6 @@ radeon_atom_encoder_dpms_dig(struct drm_encoder *encoder, int mode) struct radeon_connector *radeon_connector = NULL; struct radeon_connector_atom_dig *radeon_dig_connector = NULL; bool travis_quirk = false; - int encoder_mode; if (connector) { radeon_connector = to_radeon_connector(connector); @@ -1722,13 +1721,6 @@ radeon_atom_encoder_dpms_dig(struct drm_encoder *encoder, int mode) } break; } - - encoder_mode = atombios_get_encoder_mode(encoder); - if (connector && (radeon_audio != 0) && - ((encoder_mode == ATOM_ENCODER_MODE_HDMI) || - (ENCODER_MODE_IS_DP(encoder_mode) && - drm_detect_monitor_audio(radeon_connector_edid(connector))))) - radeon_audio_dpms(encoder, mode); } static void @@ -1737,10 +1729,19 @@ radeon_atom_encoder_dpms(struct drm_encoder *encoder, int mode) struct drm_device *dev = encoder->dev; struct radeon_device *rdev = dev->dev_private; struct radeon_encoder *radeon_encoder = to_radeon_encoder(encoder); + struct drm_connector *connector = radeon_get_connector_for_encoder(encoder); + int encoder_mode = atombios_get_encoder_mode(encoder); DRM_DEBUG_KMS("encoder dpms %d to mode %d, devices %08x, active_devices %08x\n", radeon_encoder->encoder_id, mode, radeon_encoder->devices, radeon_encoder->active_device); + + if (connector && (radeon_audio != 0) && + ((encoder_mode == ATOM_ENCODER_MODE_HDMI) || + (ENCODER_MODE_IS_DP(encoder_mode) && + drm_detect_monitor_audio(radeon_connector_edid(connector))))) + radeon_audio_dpms(encoder, mode); + switch (radeon_encoder->encoder_id) { case ENCODER_OBJECT_ID_INTERNAL_TMDS1: case ENCODER_OBJECT_ID_INTERNAL_KLDSCP_TMDS1: @@ -2170,12 +2171,6 @@ radeon_atom_encoder_mode_set(struct drm_encoder *encoder, case ENCODER_OBJECT_ID_INTERNAL_UNIPHY3: case ENCODER_OBJECT_ID_INTERNAL_KLDSCP_LVTMA: /* handled in dpms */ - encoder_mode = atombios_get_encoder_mode(encoder); - if (connector && (radeon_audio != 0) && - ((encoder_mode == ATOM_ENCODER_MODE_HDMI) || - (ENCODER_MODE_IS_DP(encoder_mode) && - drm_detect_monitor_audio(radeon_connector_edid(connector))))) - radeon_audio_mode_set(encoder, adjusted_mode); break; case ENCODER_OBJECT_ID_INTERNAL_DDI: case ENCODER_OBJECT_ID_INTERNAL_DVO1: @@ -2197,6 +2192,13 @@ radeon_atom_encoder_mode_set(struct drm_encoder *encoder, } atombios_apply_encoder_quirks(encoder, adjusted_mode); + + encoder_mode = atombios_get_encoder_mode(encoder); + if (connector && (radeon_audio != 0) && + ((encoder_mode == ATOM_ENCODER_MODE_HDMI) || + (ENCODER_MODE_IS_DP(encoder_mode) && + drm_detect_monitor_audio(radeon_connector_edid(connector))))) + radeon_audio_mode_set(encoder, adjusted_mode); } static bool diff --git a/drivers/gpu/drm/radeon/cik.c b/drivers/gpu/drm/radeon/cik.c index 0c993da9c8fb..3e670d344a20 100644 --- a/drivers/gpu/drm/radeon/cik.c +++ b/drivers/gpu/drm/radeon/cik.c @@ -7555,6 +7555,9 @@ int cik_irq_set(struct radeon_device *rdev) WREG32(DC_HPD5_INT_CONTROL, hpd5); WREG32(DC_HPD6_INT_CONTROL, hpd6); + /* posting read */ + RREG32(SRBM_STATUS); + return 0; } diff --git a/drivers/gpu/drm/radeon/cikd.h b/drivers/gpu/drm/radeon/cikd.h index c648e1996dab..243a36c93b8f 100644 --- a/drivers/gpu/drm/radeon/cikd.h +++ b/drivers/gpu/drm/radeon/cikd.h @@ -2129,6 +2129,7 @@ #define VCE_UENC_REG_CLOCK_GATING 0x207c0 #define VCE_SYS_INT_EN 0x21300 # define VCE_SYS_INT_TRAP_INTERRUPT_EN (1 << 3) +#define VCE_LMI_VCPU_CACHE_40BIT_BAR 0x2145c #define VCE_LMI_CTRL2 0x21474 #define VCE_LMI_CTRL 0x21498 #define VCE_LMI_VM_CTRL 0x214a0 diff --git a/drivers/gpu/drm/radeon/dce6_afmt.c b/drivers/gpu/drm/radeon/dce6_afmt.c index 192c80389151..3adc2afe32aa 100644 --- a/drivers/gpu/drm/radeon/dce6_afmt.c +++ b/drivers/gpu/drm/radeon/dce6_afmt.c @@ -26,6 +26,9 @@ #include "radeon_audio.h" #include "sid.h" +#define DCE8_DCCG_AUDIO_DTO1_PHASE 0x05b8 +#define DCE8_DCCG_AUDIO_DTO1_MODULE 0x05bc + u32 dce6_endpoint_rreg(struct radeon_device *rdev, u32 block_offset, u32 reg) { @@ -252,72 +255,67 @@ void dce6_audio_enable(struct radeon_device *rdev, void dce6_hdmi_audio_set_dto(struct radeon_device *rdev, struct radeon_crtc *crtc, unsigned int clock) { - /* Two dtos; generally use dto0 for HDMI */ + /* Two dtos; generally use dto0 for HDMI */ u32 value = 0; - if (crtc) + if (crtc) value |= DCCG_AUDIO_DTO0_SOURCE_SEL(crtc->crtc_id); WREG32(DCCG_AUDIO_DTO_SOURCE, value); - /* Express [24MHz / target pixel clock] as an exact rational - * number (coefficient of two integer numbers. DCCG_AUDIO_DTOx_PHASE - * is the numerator, DCCG_AUDIO_DTOx_MODULE is the denominator - */ - WREG32(DCCG_AUDIO_DTO0_PHASE, 24000); - WREG32(DCCG_AUDIO_DTO0_MODULE, clock); + /* Express [24MHz / target pixel clock] as an exact rational + * number (coefficient of two integer numbers. DCCG_AUDIO_DTOx_PHASE + * is the numerator, DCCG_AUDIO_DTOx_MODULE is the denominator + */ + WREG32(DCCG_AUDIO_DTO0_PHASE, 24000); + WREG32(DCCG_AUDIO_DTO0_MODULE, clock); } void dce6_dp_audio_set_dto(struct radeon_device *rdev, struct radeon_crtc *crtc, unsigned int clock) { - /* Two dtos; generally use dto1 for DP */ + /* Two dtos; generally use dto1 for DP */ u32 value = 0; value |= DCCG_AUDIO_DTO_SEL; - if (crtc) + if (crtc) value |= DCCG_AUDIO_DTO0_SOURCE_SEL(crtc->crtc_id); WREG32(DCCG_AUDIO_DTO_SOURCE, value); - /* Express [24MHz / target pixel clock] as an exact rational - * number (coefficient of two integer numbers. DCCG_AUDIO_DTOx_PHASE - * is the numerator, DCCG_AUDIO_DTOx_MODULE is the denominator - */ - WREG32(DCCG_AUDIO_DTO1_PHASE, 24000); - WREG32(DCCG_AUDIO_DTO1_MODULE, clock); + /* Express [24MHz / target pixel clock] as an exact rational + * number (coefficient of two integer numbers. DCCG_AUDIO_DTOx_PHASE + * is the numerator, DCCG_AUDIO_DTOx_MODULE is the denominator + */ + if (ASIC_IS_DCE8(rdev)) { + WREG32(DCE8_DCCG_AUDIO_DTO1_PHASE, 24000); + WREG32(DCE8_DCCG_AUDIO_DTO1_MODULE, clock); + } else { + WREG32(DCCG_AUDIO_DTO1_PHASE, 24000); + WREG32(DCCG_AUDIO_DTO1_MODULE, clock); + } } -void dce6_enable_dp_audio_packets(struct drm_encoder *encoder, bool enable) +void dce6_dp_enable(struct drm_encoder *encoder, bool enable) { struct drm_device *dev = encoder->dev; struct radeon_device *rdev = dev->dev_private; struct radeon_encoder *radeon_encoder = to_radeon_encoder(encoder); struct radeon_encoder_atom_dig *dig = radeon_encoder->enc_priv; - uint32_t offset; if (!dig || !dig->afmt) return; - offset = dig->afmt->offset; - if (enable) { - if (dig->afmt->enabled) - return; - - WREG32(EVERGREEN_DP_SEC_TIMESTAMP + offset, EVERGREEN_DP_SEC_TIMESTAMP_MODE(1)); - WREG32(EVERGREEN_DP_SEC_CNTL + offset, - EVERGREEN_DP_SEC_ASP_ENABLE | /* Audio packet transmission */ - EVERGREEN_DP_SEC_ATP_ENABLE | /* Audio timestamp packet transmission */ - EVERGREEN_DP_SEC_AIP_ENABLE | /* Audio infoframe packet transmission */ - EVERGREEN_DP_SEC_STREAM_ENABLE); /* Master enable for secondary stream engine */ - radeon_audio_enable(rdev, dig->afmt->pin, true); + WREG32(EVERGREEN_DP_SEC_TIMESTAMP + dig->afmt->offset, + EVERGREEN_DP_SEC_TIMESTAMP_MODE(1)); + WREG32(EVERGREEN_DP_SEC_CNTL + dig->afmt->offset, + EVERGREEN_DP_SEC_ASP_ENABLE | /* Audio packet transmission */ + EVERGREEN_DP_SEC_ATP_ENABLE | /* Audio timestamp packet transmission */ + EVERGREEN_DP_SEC_AIP_ENABLE | /* Audio infoframe packet transmission */ + EVERGREEN_DP_SEC_STREAM_ENABLE); /* Master enable for secondary stream engine */ } else { - if (!dig->afmt->enabled) - return; - - WREG32(EVERGREEN_DP_SEC_CNTL + offset, 0); - radeon_audio_enable(rdev, dig->afmt->pin, false); + WREG32(EVERGREEN_DP_SEC_CNTL + dig->afmt->offset, 0); } dig->afmt->enabled = enable; diff --git a/drivers/gpu/drm/radeon/evergreen.c b/drivers/gpu/drm/radeon/evergreen.c index 4c0e24b3bb90..973df064c14f 100644 --- a/drivers/gpu/drm/radeon/evergreen.c +++ b/drivers/gpu/drm/radeon/evergreen.c @@ -4593,6 +4593,9 @@ int evergreen_irq_set(struct radeon_device *rdev) WREG32(AFMT_AUDIO_PACKET_CONTROL + EVERGREEN_CRTC4_REGISTER_OFFSET, afmt5); WREG32(AFMT_AUDIO_PACKET_CONTROL + EVERGREEN_CRTC5_REGISTER_OFFSET, afmt6); + /* posting read */ + RREG32(SRBM_STATUS); + return 0; } diff --git a/drivers/gpu/drm/radeon/evergreen_hdmi.c b/drivers/gpu/drm/radeon/evergreen_hdmi.c index 1d9aebc79595..c18d4ecbd95d 100644 --- a/drivers/gpu/drm/radeon/evergreen_hdmi.c +++ b/drivers/gpu/drm/radeon/evergreen_hdmi.c @@ -272,7 +272,7 @@ void dce4_hdmi_audio_set_dto(struct radeon_device *rdev, } void dce4_dp_audio_set_dto(struct radeon_device *rdev, - struct radeon_crtc *crtc, unsigned int clock) + struct radeon_crtc *crtc, unsigned int clock) { u32 value; @@ -294,7 +294,7 @@ void dce4_dp_audio_set_dto(struct radeon_device *rdev, * is the numerator, DCCG_AUDIO_DTOx_MODULE is the denominator */ WREG32(DCCG_AUDIO_DTO1_PHASE, 24000); - WREG32(DCCG_AUDIO_DTO1_MODULE, rdev->clock.max_pixel_clock * 10); + WREG32(DCCG_AUDIO_DTO1_MODULE, clock); } void dce4_set_vbi_packet(struct drm_encoder *encoder, u32 offset) @@ -350,20 +350,9 @@ void dce4_set_audio_packet(struct drm_encoder *encoder, u32 offset) struct drm_device *dev = encoder->dev; struct radeon_device *rdev = dev->dev_private; - WREG32(HDMI_INFOFRAME_CONTROL0 + offset, - HDMI_AUDIO_INFO_SEND | /* enable audio info frames (frames won't be set until audio is enabled) */ - HDMI_AUDIO_INFO_CONT); /* required for audio info values to be updated */ - WREG32(AFMT_INFOFRAME_CONTROL0 + offset, AFMT_AUDIO_INFO_UPDATE); /* required for audio info values to be updated */ - WREG32(HDMI_INFOFRAME_CONTROL1 + offset, - HDMI_AUDIO_INFO_LINE(2)); /* anything other than 0 */ - - WREG32(HDMI_AUDIO_PACKET_CONTROL + offset, - HDMI_AUDIO_DELAY_EN(1) | /* set the default audio delay */ - HDMI_AUDIO_PACKETS_PER_LINE(3)); /* should be suffient for all audio modes and small enough for all hblanks */ - WREG32(AFMT_60958_0 + offset, AFMT_60958_CS_CHANNEL_NUMBER_L(1)); @@ -408,15 +397,19 @@ void evergreen_hdmi_enable(struct drm_encoder *encoder, bool enable) if (!dig || !dig->afmt) return; - /* Silent, r600_hdmi_enable will raise WARN for us */ - if (enable && dig->afmt->enabled) - return; - if (!enable && !dig->afmt->enabled) - return; + if (enable) { + WREG32(HDMI_INFOFRAME_CONTROL1 + dig->afmt->offset, + HDMI_AUDIO_INFO_LINE(2)); /* anything other than 0 */ + + WREG32(HDMI_AUDIO_PACKET_CONTROL + dig->afmt->offset, + HDMI_AUDIO_DELAY_EN(1) | /* set the default audio delay */ + HDMI_AUDIO_PACKETS_PER_LINE(3)); /* should be suffient for all audio modes and small enough for all hblanks */ - if (!enable && dig->afmt->pin) { - radeon_audio_enable(rdev, dig->afmt->pin, 0); - dig->afmt->pin = NULL; + WREG32(HDMI_INFOFRAME_CONTROL0 + dig->afmt->offset, + HDMI_AUDIO_INFO_SEND | /* enable audio info frames (frames won't be set until audio is enabled) */ + HDMI_AUDIO_INFO_CONT); /* required for audio info values to be updated */ + } else { + WREG32(HDMI_INFOFRAME_CONTROL0 + dig->afmt->offset, 0); } dig->afmt->enabled = enable; @@ -425,33 +418,28 @@ void evergreen_hdmi_enable(struct drm_encoder *encoder, bool enable) enable ? "En" : "Dis", dig->afmt->offset, radeon_encoder->encoder_id); } -void evergreen_enable_dp_audio_packets(struct drm_encoder *encoder, bool enable) +void evergreen_dp_enable(struct drm_encoder *encoder, bool enable) { struct drm_device *dev = encoder->dev; struct radeon_device *rdev = dev->dev_private; struct radeon_encoder *radeon_encoder = to_radeon_encoder(encoder); struct radeon_encoder_atom_dig *dig = radeon_encoder->enc_priv; - uint32_t offset; if (!dig || !dig->afmt) return; - offset = dig->afmt->offset; - if (enable) { struct drm_connector *connector = radeon_get_connector_for_encoder(encoder); struct radeon_connector *radeon_connector = to_radeon_connector(connector); struct radeon_connector_atom_dig *dig_connector; uint32_t val; - if (dig->afmt->enabled) - return; - - WREG32(EVERGREEN_DP_SEC_TIMESTAMP + offset, EVERGREEN_DP_SEC_TIMESTAMP_MODE(1)); + WREG32(EVERGREEN_DP_SEC_TIMESTAMP + dig->afmt->offset, + EVERGREEN_DP_SEC_TIMESTAMP_MODE(1)); if (radeon_connector->con_priv) { dig_connector = radeon_connector->con_priv; - val = RREG32(EVERGREEN_DP_SEC_AUD_N + offset); + val = RREG32(EVERGREEN_DP_SEC_AUD_N + dig->afmt->offset); val &= ~EVERGREEN_DP_SEC_N_BASE_MULTIPLE(0xf); if (dig_connector->dp_clock == 162000) @@ -459,21 +447,16 @@ void evergreen_enable_dp_audio_packets(struct drm_encoder *encoder, bool enable) else val |= EVERGREEN_DP_SEC_N_BASE_MULTIPLE(5); - WREG32(EVERGREEN_DP_SEC_AUD_N + offset, val); + WREG32(EVERGREEN_DP_SEC_AUD_N + dig->afmt->offset, val); } - WREG32(EVERGREEN_DP_SEC_CNTL + offset, + WREG32(EVERGREEN_DP_SEC_CNTL + dig->afmt->offset, EVERGREEN_DP_SEC_ASP_ENABLE | /* Audio packet transmission */ EVERGREEN_DP_SEC_ATP_ENABLE | /* Audio timestamp packet transmission */ EVERGREEN_DP_SEC_AIP_ENABLE | /* Audio infoframe packet transmission */ EVERGREEN_DP_SEC_STREAM_ENABLE); /* Master enable for secondary stream engine */ - radeon_audio_enable(rdev, dig->afmt->pin, 0xf); } else { - if (!dig->afmt->enabled) - return; - - WREG32(EVERGREEN_DP_SEC_CNTL + offset, 0); - radeon_audio_enable(rdev, dig->afmt->pin, 0); + WREG32(EVERGREEN_DP_SEC_CNTL + dig->afmt->offset, 0); } dig->afmt->enabled = enable; diff --git a/drivers/gpu/drm/radeon/r100.c b/drivers/gpu/drm/radeon/r100.c index 279801ca5110..04f2514f7564 100644 --- a/drivers/gpu/drm/radeon/r100.c +++ b/drivers/gpu/drm/radeon/r100.c @@ -728,6 +728,10 @@ int r100_irq_set(struct radeon_device *rdev) tmp |= RADEON_FP2_DETECT_MASK; } WREG32(RADEON_GEN_INT_CNTL, tmp); + + /* read back to post the write */ + RREG32(RADEON_GEN_INT_CNTL); + return 0; } diff --git a/drivers/gpu/drm/radeon/r600.c b/drivers/gpu/drm/radeon/r600.c index 07a71a2488c9..2fcad344492f 100644 --- a/drivers/gpu/drm/radeon/r600.c +++ b/drivers/gpu/drm/radeon/r600.c @@ -3784,6 +3784,9 @@ int r600_irq_set(struct radeon_device *rdev) WREG32(RV770_CG_THERMAL_INT, thermal_int); } + /* posting read */ + RREG32(R_000E50_SRBM_STATUS); + return 0; } diff --git a/drivers/gpu/drm/radeon/r600_hdmi.c b/drivers/gpu/drm/radeon/r600_hdmi.c index 62c91ed669ce..dd6606b8e23c 100644 --- a/drivers/gpu/drm/radeon/r600_hdmi.c +++ b/drivers/gpu/drm/radeon/r600_hdmi.c @@ -476,17 +476,6 @@ void r600_hdmi_enable(struct drm_encoder *encoder, bool enable) if (!dig || !dig->afmt) return; - /* Silent, r600_hdmi_enable will raise WARN for us */ - if (enable && dig->afmt->enabled) - return; - if (!enable && !dig->afmt->enabled) - return; - - if (!enable && dig->afmt->pin) { - radeon_audio_enable(rdev, dig->afmt->pin, 0); - dig->afmt->pin = NULL; - } - /* Older chipsets require setting HDMI and routing manually */ if (!ASIC_IS_DCE3(rdev)) { if (enable) diff --git a/drivers/gpu/drm/radeon/radeon.h b/drivers/gpu/drm/radeon/radeon.h index 5587603b4a89..33d5a4f4eebd 100644 --- a/drivers/gpu/drm/radeon/radeon.h +++ b/drivers/gpu/drm/radeon/radeon.h @@ -1565,6 +1565,7 @@ struct radeon_dpm { int new_active_crtc_count; u32 current_active_crtcs; int current_active_crtc_count; + bool single_display; struct radeon_dpm_dynamic_state dyn_state; struct radeon_dpm_fan fan; u32 tdp_limit; diff --git a/drivers/gpu/drm/radeon/radeon_audio.c b/drivers/gpu/drm/radeon/radeon_audio.c index a3ceef6d9632..b21ef69a34ac 100644 --- a/drivers/gpu/drm/radeon/radeon_audio.c +++ b/drivers/gpu/drm/radeon/radeon_audio.c @@ -101,8 +101,8 @@ static void radeon_audio_dp_mode_set(struct drm_encoder *encoder, struct drm_display_mode *mode); void r600_hdmi_enable(struct drm_encoder *encoder, bool enable); void evergreen_hdmi_enable(struct drm_encoder *encoder, bool enable); -void evergreen_enable_dp_audio_packets(struct drm_encoder *encoder, bool enable); -void dce6_enable_dp_audio_packets(struct drm_encoder *encoder, bool enable); +void evergreen_dp_enable(struct drm_encoder *encoder, bool enable); +void dce6_dp_enable(struct drm_encoder *encoder, bool enable); static const u32 pin_offsets[7] = { @@ -210,7 +210,7 @@ static struct radeon_audio_funcs dce4_dp_funcs = { .set_avi_packet = evergreen_set_avi_packet, .set_audio_packet = dce4_set_audio_packet, .mode_set = radeon_audio_dp_mode_set, - .dpms = evergreen_enable_dp_audio_packets, + .dpms = evergreen_dp_enable, }; static struct radeon_audio_funcs dce6_hdmi_funcs = { @@ -240,7 +240,7 @@ static struct radeon_audio_funcs dce6_dp_funcs = { .set_avi_packet = evergreen_set_avi_packet, .set_audio_packet = dce4_set_audio_packet, .mode_set = radeon_audio_dp_mode_set, - .dpms = dce6_enable_dp_audio_packets, + .dpms = dce6_dp_enable, }; static void radeon_audio_interface_init(struct radeon_device *rdev) @@ -452,7 +452,7 @@ void radeon_audio_enable(struct radeon_device *rdev, } void radeon_audio_detect(struct drm_connector *connector, - enum drm_connector_status status) + enum drm_connector_status status) { struct radeon_device *rdev; struct radeon_encoder *radeon_encoder; @@ -483,14 +483,11 @@ void radeon_audio_detect(struct drm_connector *connector, else radeon_encoder->audio = rdev->audio.hdmi_funcs; - radeon_audio_write_speaker_allocation(connector->encoder); - radeon_audio_write_sad_regs(connector->encoder); - if (connector->encoder->crtc) - radeon_audio_write_latency_fields(connector->encoder, - &connector->encoder->crtc->mode); + dig->afmt->pin = radeon_audio_get_pin(connector->encoder); radeon_audio_enable(rdev, dig->afmt->pin, 0xf); } else { radeon_audio_enable(rdev, dig->afmt->pin, 0); + dig->afmt->pin = NULL; } } @@ -694,23 +691,22 @@ static void radeon_audio_set_mute(struct drm_encoder *encoder, bool mute) * update the info frames with the data from the current display mode */ static void radeon_audio_hdmi_mode_set(struct drm_encoder *encoder, - struct drm_display_mode *mode) + struct drm_display_mode *mode) { - struct radeon_device *rdev = encoder->dev->dev_private; struct radeon_encoder *radeon_encoder = to_radeon_encoder(encoder); struct radeon_encoder_atom_dig *dig = radeon_encoder->enc_priv; if (!dig || !dig->afmt) return; - /* disable audio prior to setting up hw */ - dig->afmt->pin = radeon_audio_get_pin(encoder); - radeon_audio_enable(rdev, dig->afmt->pin, 0); + radeon_audio_set_mute(encoder, true); + radeon_audio_write_speaker_allocation(encoder); + radeon_audio_write_sad_regs(encoder); + radeon_audio_write_latency_fields(encoder, mode); radeon_audio_set_dto(encoder, mode->clock); radeon_audio_set_vbi_packet(encoder); radeon_hdmi_set_color_depth(encoder); - radeon_audio_set_mute(encoder, false); radeon_audio_update_acr(encoder, mode->clock); radeon_audio_set_audio_packet(encoder); radeon_audio_select_pin(encoder); @@ -718,8 +714,7 @@ static void radeon_audio_hdmi_mode_set(struct drm_encoder *encoder, if (radeon_audio_set_avi_packet(encoder, mode) < 0) return; - /* enable audio after to setting up hw */ - radeon_audio_enable(rdev, dig->afmt->pin, 0xf); + radeon_audio_set_mute(encoder, false); } static void radeon_audio_dp_mode_set(struct drm_encoder *encoder, @@ -729,23 +724,26 @@ static void radeon_audio_dp_mode_set(struct drm_encoder *encoder, struct radeon_device *rdev = dev->dev_private; struct radeon_encoder *radeon_encoder = to_radeon_encoder(encoder); struct radeon_encoder_atom_dig *dig = radeon_encoder->enc_priv; + struct drm_connector *connector = radeon_get_connector_for_encoder(encoder); + struct radeon_connector *radeon_connector = to_radeon_connector(connector); + struct radeon_connector_atom_dig *dig_connector = + radeon_connector->con_priv; if (!dig || !dig->afmt) return; - /* disable audio prior to setting up hw */ - dig->afmt->pin = radeon_audio_get_pin(encoder); - radeon_audio_enable(rdev, dig->afmt->pin, 0); - - radeon_audio_set_dto(encoder, rdev->clock.default_dispclk * 10); + radeon_audio_write_speaker_allocation(encoder); + radeon_audio_write_sad_regs(encoder); + radeon_audio_write_latency_fields(encoder, mode); + if (rdev->clock.dp_extclk || ASIC_IS_DCE5(rdev)) + radeon_audio_set_dto(encoder, rdev->clock.default_dispclk * 10); + else + radeon_audio_set_dto(encoder, dig_connector->dp_clock); radeon_audio_set_audio_packet(encoder); radeon_audio_select_pin(encoder); if (radeon_audio_set_avi_packet(encoder, mode) < 0) return; - - /* enable audio after to setting up hw */ - radeon_audio_enable(rdev, dig->afmt->pin, 0xf); } void radeon_audio_mode_set(struct drm_encoder *encoder, diff --git a/drivers/gpu/drm/radeon/radeon_bios.c b/drivers/gpu/drm/radeon/radeon_bios.c index 63ccb8fa799c..d27e4ccb848c 100644 --- a/drivers/gpu/drm/radeon/radeon_bios.c +++ b/drivers/gpu/drm/radeon/radeon_bios.c @@ -76,7 +76,7 @@ static bool igp_read_bios_from_vram(struct radeon_device *rdev) static bool radeon_read_bios(struct radeon_device *rdev) { - uint8_t __iomem *bios; + uint8_t __iomem *bios, val1, val2; size_t size; rdev->bios = NULL; @@ -86,15 +86,19 @@ static bool radeon_read_bios(struct radeon_device *rdev) return false; } - if (size == 0 || bios[0] != 0x55 || bios[1] != 0xaa) { + val1 = readb(&bios[0]); + val2 = readb(&bios[1]); + + if (size == 0 || val1 != 0x55 || val2 != 0xaa) { pci_unmap_rom(rdev->pdev, bios); return false; } - rdev->bios = kmemdup(bios, size, GFP_KERNEL); + rdev->bios = kzalloc(size, GFP_KERNEL); if (rdev->bios == NULL) { pci_unmap_rom(rdev->pdev, bios); return false; } + memcpy_fromio(rdev->bios, bios, size); pci_unmap_rom(rdev->pdev, bios); return true; } diff --git a/drivers/gpu/drm/radeon/radeon_cs.c b/drivers/gpu/drm/radeon/radeon_cs.c index a579ed379f20..4d0f96cc3da4 100644 --- a/drivers/gpu/drm/radeon/radeon_cs.c +++ b/drivers/gpu/drm/radeon/radeon_cs.c @@ -256,11 +256,13 @@ int radeon_cs_parser_init(struct radeon_cs_parser *p, void *data) u32 ring = RADEON_CS_RING_GFX; s32 priority = 0; + INIT_LIST_HEAD(&p->validated); + if (!cs->num_chunks) { return 0; } + /* get chunks */ - INIT_LIST_HEAD(&p->validated); p->idx = 0; p->ib.sa_bo = NULL; p->const_ib.sa_bo = NULL; diff --git a/drivers/gpu/drm/radeon/radeon_fence.c b/drivers/gpu/drm/radeon/radeon_fence.c index d13d1b5a859f..df09ca7c4889 100644 --- a/drivers/gpu/drm/radeon/radeon_fence.c +++ b/drivers/gpu/drm/radeon/radeon_fence.c @@ -1030,37 +1030,59 @@ static inline bool radeon_test_signaled(struct radeon_fence *fence) return test_bit(FENCE_FLAG_SIGNALED_BIT, &fence->base.flags); } +struct radeon_wait_cb { + struct fence_cb base; + struct task_struct *task; +}; + +static void +radeon_fence_wait_cb(struct fence *fence, struct fence_cb *cb) +{ + struct radeon_wait_cb *wait = + container_of(cb, struct radeon_wait_cb, base); + + wake_up_process(wait->task); +} + static signed long radeon_fence_default_wait(struct fence *f, bool intr, signed long t) { struct radeon_fence *fence = to_radeon_fence(f); struct radeon_device *rdev = fence->rdev; - bool signaled; + struct radeon_wait_cb cb; - fence_enable_sw_signaling(&fence->base); + cb.task = current; - /* - * This function has to return -EDEADLK, but cannot hold - * exclusive_lock during the wait because some callers - * may already hold it. This means checking needs_reset without - * lock, and not fiddling with any gpu internals. - * - * The callback installed with fence_enable_sw_signaling will - * run before our wait_event_*timeout call, so we will see - * both the signaled fence and the changes to needs_reset. - */ + if (fence_add_callback(f, &cb.base, radeon_fence_wait_cb)) + return t; + + while (t > 0) { + if (intr) + set_current_state(TASK_INTERRUPTIBLE); + else + set_current_state(TASK_UNINTERRUPTIBLE); + + /* + * radeon_test_signaled must be called after + * set_current_state to prevent a race with wake_up_process + */ + if (radeon_test_signaled(fence)) + break; + + if (rdev->needs_reset) { + t = -EDEADLK; + break; + } + + t = schedule_timeout(t); + + if (t > 0 && intr && signal_pending(current)) + t = -ERESTARTSYS; + } + + __set_current_state(TASK_RUNNING); + fence_remove_callback(f, &cb.base); - if (intr) - t = wait_event_interruptible_timeout(rdev->fence_queue, - ((signaled = radeon_test_signaled(fence)) || - rdev->needs_reset), t); - else - t = wait_event_timeout(rdev->fence_queue, - ((signaled = radeon_test_signaled(fence)) || - rdev->needs_reset), t); - - if (t > 0 && !signaled) - return -EDEADLK; return t; } diff --git a/drivers/gpu/drm/radeon/radeon_kfd.c b/drivers/gpu/drm/radeon/radeon_kfd.c index 061eaa9c19c7..122eb5693ba1 100644 --- a/drivers/gpu/drm/radeon/radeon_kfd.c +++ b/drivers/gpu/drm/radeon/radeon_kfd.c @@ -153,7 +153,7 @@ void radeon_kfd_device_init(struct radeon_device *rdev) .compute_vmid_bitmap = 0xFF00, .first_compute_pipe = 1, - .compute_pipe_count = 8 - 1, + .compute_pipe_count = 4 - 1, }; radeon_doorbell_get_kfd_info(rdev, diff --git a/drivers/gpu/drm/radeon/radeon_mn.c b/drivers/gpu/drm/radeon/radeon_mn.c index a69bd441dd2d..572b4dbec186 100644 --- a/drivers/gpu/drm/radeon/radeon_mn.c +++ b/drivers/gpu/drm/radeon/radeon_mn.c @@ -122,7 +122,6 @@ static void radeon_mn_invalidate_range_start(struct mmu_notifier *mn, it = interval_tree_iter_first(&rmn->objects, start, end); while (it) { struct radeon_bo *bo; - struct fence *fence; int r; bo = container_of(it, struct radeon_bo, mn_it); @@ -134,12 +133,10 @@ static void radeon_mn_invalidate_range_start(struct mmu_notifier *mn, continue; } - fence = reservation_object_get_excl(bo->tbo.resv); - if (fence) { - r = radeon_fence_wait((struct radeon_fence *)fence, false); - if (r) - DRM_ERROR("(%d) failed to wait for user bo\n", r); - } + r = reservation_object_wait_timeout_rcu(bo->tbo.resv, true, + false, MAX_SCHEDULE_TIMEOUT); + if (r) + DRM_ERROR("(%d) failed to wait for user bo\n", r); radeon_ttm_placement_from_domain(bo, RADEON_GEM_DOMAIN_CPU); r = ttm_bo_validate(&bo->tbo, &bo->placement, false, false); diff --git a/drivers/gpu/drm/radeon/radeon_object.c b/drivers/gpu/drm/radeon/radeon_object.c index 43e09942823e..318165d4855c 100644 --- a/drivers/gpu/drm/radeon/radeon_object.c +++ b/drivers/gpu/drm/radeon/radeon_object.c @@ -173,17 +173,6 @@ void radeon_ttm_placement_from_domain(struct radeon_bo *rbo, u32 domain) else rbo->placements[i].lpfn = 0; } - - /* - * Use two-ended allocation depending on the buffer size to - * improve fragmentation quality. - * 512kb was measured as the most optimal number. - */ - if (rbo->tbo.mem.size > 512 * 1024) { - for (i = 0; i < c; i++) { - rbo->placements[i].flags |= TTM_PL_FLAG_TOPDOWN; - } - } } int radeon_bo_create(struct radeon_device *rdev, diff --git a/drivers/gpu/drm/radeon/radeon_pm.c b/drivers/gpu/drm/radeon/radeon_pm.c index 33cf4108386d..c1ba83a8dd8c 100644 --- a/drivers/gpu/drm/radeon/radeon_pm.c +++ b/drivers/gpu/drm/radeon/radeon_pm.c @@ -837,12 +837,8 @@ static void radeon_dpm_thermal_work_handler(struct work_struct *work) radeon_pm_compute_clocks(rdev); } -static struct radeon_ps *radeon_dpm_pick_power_state(struct radeon_device *rdev, - enum radeon_pm_state_type dpm_state) +static bool radeon_dpm_single_display(struct radeon_device *rdev) { - int i; - struct radeon_ps *ps; - u32 ui_class; bool single_display = (rdev->pm.dpm.new_active_crtc_count < 2) ? true : false; @@ -858,6 +854,17 @@ static struct radeon_ps *radeon_dpm_pick_power_state(struct radeon_device *rdev, if (single_display && (r600_dpm_get_vrefresh(rdev) >= 120)) single_display = false; + return single_display; +} + +static struct radeon_ps *radeon_dpm_pick_power_state(struct radeon_device *rdev, + enum radeon_pm_state_type dpm_state) +{ + int i; + struct radeon_ps *ps; + u32 ui_class; + bool single_display = radeon_dpm_single_display(rdev); + /* certain older asics have a separare 3D performance state, * so try that first if the user selected performance */ @@ -983,6 +990,7 @@ static void radeon_dpm_change_power_state_locked(struct radeon_device *rdev) struct radeon_ps *ps; enum radeon_pm_state_type dpm_state; int ret; + bool single_display = radeon_dpm_single_display(rdev); /* if dpm init failed */ if (!rdev->pm.dpm_enabled) @@ -1007,6 +1015,9 @@ static void radeon_dpm_change_power_state_locked(struct radeon_device *rdev) /* vce just modifies an existing state so force a change */ if (ps->vce_active != rdev->pm.dpm.vce_active) goto force; + /* user has made a display change (such as timing) */ + if (rdev->pm.dpm.single_display != single_display) + goto force; if ((rdev->family < CHIP_BARTS) || (rdev->flags & RADEON_IS_IGP)) { /* for pre-BTC and APUs if the num crtcs changed but state is the same, * all we need to do is update the display configuration. @@ -1069,6 +1080,7 @@ force: rdev->pm.dpm.current_active_crtcs = rdev->pm.dpm.new_active_crtcs; rdev->pm.dpm.current_active_crtc_count = rdev->pm.dpm.new_active_crtc_count; + rdev->pm.dpm.single_display = single_display; /* wait for the rings to drain */ for (i = 0; i < RADEON_NUM_RINGS; i++) { diff --git a/drivers/gpu/drm/radeon/radeon_ring.c b/drivers/gpu/drm/radeon/radeon_ring.c index 2456f69efd23..8c7872339c2a 100644 --- a/drivers/gpu/drm/radeon/radeon_ring.c +++ b/drivers/gpu/drm/radeon/radeon_ring.c @@ -495,7 +495,7 @@ static int radeon_debugfs_ring_info(struct seq_file *m, void *data) seq_printf(m, "%u free dwords in ring\n", ring->ring_free_dw); seq_printf(m, "%u dwords in ring\n", count); - if (!ring->ready) + if (!ring->ring) return 0; /* print 8 dw before current rptr as often it's the last executed diff --git a/drivers/gpu/drm/radeon/radeon_ttm.c b/drivers/gpu/drm/radeon/radeon_ttm.c index d02aa1d0f588..b292aca0f342 100644 --- a/drivers/gpu/drm/radeon/radeon_ttm.c +++ b/drivers/gpu/drm/radeon/radeon_ttm.c @@ -598,6 +598,10 @@ static void radeon_ttm_tt_unpin_userptr(struct ttm_tt *ttm) enum dma_data_direction direction = write ? DMA_BIDIRECTIONAL : DMA_TO_DEVICE; + /* double check that we don't free the table twice */ + if (!ttm->sg->sgl) + return; + /* free the sg table and pages again */ dma_unmap_sg(rdev->dev, ttm->sg->sgl, ttm->sg->nents, direction); diff --git a/drivers/gpu/drm/radeon/rs600.c b/drivers/gpu/drm/radeon/rs600.c index d81182ad53ec..97a904835759 100644 --- a/drivers/gpu/drm/radeon/rs600.c +++ b/drivers/gpu/drm/radeon/rs600.c @@ -694,6 +694,10 @@ int rs600_irq_set(struct radeon_device *rdev) WREG32(R_007D18_DC_HOT_PLUG_DETECT2_INT_CONTROL, hpd2); if (ASIC_IS_DCE2(rdev)) WREG32(R_007408_HDMI0_AUDIO_PACKET_CONTROL, hdmi0); + + /* posting read */ + RREG32(R_000040_GEN_INT_CNTL); + return 0; } diff --git a/drivers/gpu/drm/radeon/si.c b/drivers/gpu/drm/radeon/si.c index bcf516a8a2f1..a7fb2735d4a9 100644 --- a/drivers/gpu/drm/radeon/si.c +++ b/drivers/gpu/drm/radeon/si.c @@ -6203,6 +6203,9 @@ int si_irq_set(struct radeon_device *rdev) WREG32(CG_THERMAL_INT, thermal_int); + /* posting read */ + RREG32(SRBM_STATUS); + return 0; } @@ -7127,8 +7130,7 @@ int si_set_uvd_clocks(struct radeon_device *rdev, u32 vclk, u32 dclk) WREG32_P(CG_UPLL_FUNC_CNTL, UPLL_BYPASS_EN_MASK, ~UPLL_BYPASS_EN_MASK); if (!vclk || !dclk) { - /* keep the Bypass mode, put PLL to sleep */ - WREG32_P(CG_UPLL_FUNC_CNTL, UPLL_SLEEP_MASK, ~UPLL_SLEEP_MASK); + /* keep the Bypass mode */ return 0; } @@ -7144,8 +7146,7 @@ int si_set_uvd_clocks(struct radeon_device *rdev, u32 vclk, u32 dclk) /* set VCO_MODE to 1 */ WREG32_P(CG_UPLL_FUNC_CNTL, UPLL_VCO_MODE_MASK, ~UPLL_VCO_MODE_MASK); - /* toggle UPLL_SLEEP to 1 then back to 0 */ - WREG32_P(CG_UPLL_FUNC_CNTL, UPLL_SLEEP_MASK, ~UPLL_SLEEP_MASK); + /* disable sleep mode */ WREG32_P(CG_UPLL_FUNC_CNTL, 0, ~UPLL_SLEEP_MASK); /* deassert UPLL_RESET */ diff --git a/drivers/gpu/drm/radeon/sid.h b/drivers/gpu/drm/radeon/sid.h index c27118cab16a..99a9835c9f61 100644 --- a/drivers/gpu/drm/radeon/sid.h +++ b/drivers/gpu/drm/radeon/sid.h @@ -912,8 +912,8 @@ #define DCCG_AUDIO_DTO0_PHASE 0x05b0 #define DCCG_AUDIO_DTO0_MODULE 0x05b4 -#define DCCG_AUDIO_DTO1_PHASE 0x05b8 -#define DCCG_AUDIO_DTO1_MODULE 0x05bc +#define DCCG_AUDIO_DTO1_PHASE 0x05c0 +#define DCCG_AUDIO_DTO1_MODULE 0x05c4 #define AFMT_AUDIO_SRC_CONTROL 0x713c #define AFMT_AUDIO_SRC_SELECT(x) (((x) & 7) << 0) diff --git a/drivers/gpu/drm/radeon/vce_v2_0.c b/drivers/gpu/drm/radeon/vce_v2_0.c index 1ac7bb825a1b..fbbe78fbd087 100644 --- a/drivers/gpu/drm/radeon/vce_v2_0.c +++ b/drivers/gpu/drm/radeon/vce_v2_0.c @@ -156,6 +156,9 @@ int vce_v2_0_resume(struct radeon_device *rdev) WREG32(VCE_LMI_SWAP_CNTL1, 0); WREG32(VCE_LMI_VM_CTRL, 0); + WREG32(VCE_LMI_VCPU_CACHE_40BIT_BAR, addr >> 8); + + addr &= 0xff; size = RADEON_GPU_PAGE_ALIGN(rdev->vce_fw->size); WREG32(VCE_VCPU_CACHE_OFFSET0, addr & 0x7fffffff); WREG32(VCE_VCPU_CACHE_SIZE0, size); diff --git a/drivers/gpu/drm/ttm/ttm_bo.c b/drivers/gpu/drm/ttm/ttm_bo.c index d395b0bef73b..8d9b7de25613 100644 --- a/drivers/gpu/drm/ttm/ttm_bo.c +++ b/drivers/gpu/drm/ttm/ttm_bo.c @@ -74,7 +74,7 @@ static void ttm_mem_type_debug(struct ttm_bo_device *bdev, int mem_type) pr_err(" has_type: %d\n", man->has_type); pr_err(" use_type: %d\n", man->use_type); pr_err(" flags: 0x%08X\n", man->flags); - pr_err(" gpu_offset: 0x%08lX\n", man->gpu_offset); + pr_err(" gpu_offset: 0x%08llX\n", man->gpu_offset); pr_err(" size: %llu\n", man->size); pr_err(" available_caching: 0x%08X\n", man->available_caching); pr_err(" default_caching: 0x%08X\n", man->default_caching); diff --git a/drivers/gpu/drm/vmwgfx/vmwgfx_drv.c b/drivers/gpu/drm/vmwgfx/vmwgfx_drv.c index 6c6b655defcf..e13b9cbc304e 100644 --- a/drivers/gpu/drm/vmwgfx/vmwgfx_drv.c +++ b/drivers/gpu/drm/vmwgfx/vmwgfx_drv.c @@ -725,32 +725,6 @@ static int vmw_driver_load(struct drm_device *dev, unsigned long chipset) goto out_err1; } - ret = ttm_bo_init_mm(&dev_priv->bdev, TTM_PL_VRAM, - (dev_priv->vram_size >> PAGE_SHIFT)); - if (unlikely(ret != 0)) { - DRM_ERROR("Failed initializing memory manager for VRAM.\n"); - goto out_err2; - } - - dev_priv->has_gmr = true; - if (((dev_priv->capabilities & (SVGA_CAP_GMR | SVGA_CAP_GMR2)) == 0) || - refuse_dma || ttm_bo_init_mm(&dev_priv->bdev, VMW_PL_GMR, - VMW_PL_GMR) != 0) { - DRM_INFO("No GMR memory available. " - "Graphics memory resources are very limited.\n"); - dev_priv->has_gmr = false; - } - - if (dev_priv->capabilities & SVGA_CAP_GBOBJECTS) { - dev_priv->has_mob = true; - if (ttm_bo_init_mm(&dev_priv->bdev, VMW_PL_MOB, - VMW_PL_MOB) != 0) { - DRM_INFO("No MOB memory available. " - "3D will be disabled.\n"); - dev_priv->has_mob = false; - } - } - dev_priv->mmio_mtrr = arch_phys_wc_add(dev_priv->mmio_start, dev_priv->mmio_size); @@ -813,6 +787,33 @@ static int vmw_driver_load(struct drm_device *dev, unsigned long chipset) goto out_no_fman; } + + ret = ttm_bo_init_mm(&dev_priv->bdev, TTM_PL_VRAM, + (dev_priv->vram_size >> PAGE_SHIFT)); + if (unlikely(ret != 0)) { + DRM_ERROR("Failed initializing memory manager for VRAM.\n"); + goto out_no_vram; + } + + dev_priv->has_gmr = true; + if (((dev_priv->capabilities & (SVGA_CAP_GMR | SVGA_CAP_GMR2)) == 0) || + refuse_dma || ttm_bo_init_mm(&dev_priv->bdev, VMW_PL_GMR, + VMW_PL_GMR) != 0) { + DRM_INFO("No GMR memory available. " + "Graphics memory resources are very limited.\n"); + dev_priv->has_gmr = false; + } + + if (dev_priv->capabilities & SVGA_CAP_GBOBJECTS) { + dev_priv->has_mob = true; + if (ttm_bo_init_mm(&dev_priv->bdev, VMW_PL_MOB, + VMW_PL_MOB) != 0) { + DRM_INFO("No MOB memory available. " + "3D will be disabled.\n"); + dev_priv->has_mob = false; + } + } + vmw_kms_save_vga(dev_priv); /* Start kms and overlay systems, needs fifo. */ @@ -838,6 +839,12 @@ out_no_fifo: vmw_kms_close(dev_priv); out_no_kms: vmw_kms_restore_vga(dev_priv); + if (dev_priv->has_mob) + (void) ttm_bo_clean_mm(&dev_priv->bdev, VMW_PL_MOB); + if (dev_priv->has_gmr) + (void) ttm_bo_clean_mm(&dev_priv->bdev, VMW_PL_GMR); + (void)ttm_bo_clean_mm(&dev_priv->bdev, TTM_PL_VRAM); +out_no_vram: vmw_fence_manager_takedown(dev_priv->fman); out_no_fman: if (dev_priv->capabilities & SVGA_CAP_IRQMASK) @@ -853,12 +860,6 @@ out_err4: iounmap(dev_priv->mmio_virt); out_err3: arch_phys_wc_del(dev_priv->mmio_mtrr); - if (dev_priv->has_mob) - (void) ttm_bo_clean_mm(&dev_priv->bdev, VMW_PL_MOB); - if (dev_priv->has_gmr) - (void) ttm_bo_clean_mm(&dev_priv->bdev, VMW_PL_GMR); - (void)ttm_bo_clean_mm(&dev_priv->bdev, TTM_PL_VRAM); -out_err2: (void)ttm_bo_device_release(&dev_priv->bdev); out_err1: vmw_ttm_global_release(dev_priv); @@ -887,6 +888,13 @@ static int vmw_driver_unload(struct drm_device *dev) } vmw_kms_close(dev_priv); vmw_overlay_close(dev_priv); + + if (dev_priv->has_mob) + (void) ttm_bo_clean_mm(&dev_priv->bdev, VMW_PL_MOB); + if (dev_priv->has_gmr) + (void)ttm_bo_clean_mm(&dev_priv->bdev, VMW_PL_GMR); + (void)ttm_bo_clean_mm(&dev_priv->bdev, TTM_PL_VRAM); + vmw_fence_manager_takedown(dev_priv->fman); if (dev_priv->capabilities & SVGA_CAP_IRQMASK) drm_irq_uninstall(dev_priv->dev); @@ -898,11 +906,6 @@ static int vmw_driver_unload(struct drm_device *dev) ttm_object_device_release(&dev_priv->tdev); iounmap(dev_priv->mmio_virt); arch_phys_wc_del(dev_priv->mmio_mtrr); - if (dev_priv->has_mob) - (void) ttm_bo_clean_mm(&dev_priv->bdev, VMW_PL_MOB); - if (dev_priv->has_gmr) - (void)ttm_bo_clean_mm(&dev_priv->bdev, VMW_PL_GMR); - (void)ttm_bo_clean_mm(&dev_priv->bdev, TTM_PL_VRAM); (void)ttm_bo_device_release(&dev_priv->bdev); vmw_ttm_global_release(dev_priv); @@ -1235,6 +1238,7 @@ static void vmw_remove(struct pci_dev *pdev) { struct drm_device *dev = pci_get_drvdata(pdev); + pci_disable_device(pdev); drm_put_dev(dev); } diff --git a/drivers/gpu/drm/vmwgfx/vmwgfx_execbuf.c b/drivers/gpu/drm/vmwgfx/vmwgfx_execbuf.c index 33176d05db35..654c8daeb5ab 100644 --- a/drivers/gpu/drm/vmwgfx/vmwgfx_execbuf.c +++ b/drivers/gpu/drm/vmwgfx/vmwgfx_execbuf.c @@ -890,7 +890,8 @@ static int vmw_translate_mob_ptr(struct vmw_private *dev_priv, ret = vmw_user_dmabuf_lookup(sw_context->fp->tfile, handle, &vmw_bo); if (unlikely(ret != 0)) { DRM_ERROR("Could not find or use MOB buffer.\n"); - return -EINVAL; + ret = -EINVAL; + goto out_no_reloc; } bo = &vmw_bo->base; @@ -914,7 +915,7 @@ static int vmw_translate_mob_ptr(struct vmw_private *dev_priv, out_no_reloc: vmw_dmabuf_unreference(&vmw_bo); - vmw_bo_p = NULL; + *vmw_bo_p = NULL; return ret; } @@ -951,7 +952,8 @@ static int vmw_translate_guest_ptr(struct vmw_private *dev_priv, ret = vmw_user_dmabuf_lookup(sw_context->fp->tfile, handle, &vmw_bo); if (unlikely(ret != 0)) { DRM_ERROR("Could not find or use GMR region.\n"); - return -EINVAL; + ret = -EINVAL; + goto out_no_reloc; } bo = &vmw_bo->base; @@ -974,7 +976,7 @@ static int vmw_translate_guest_ptr(struct vmw_private *dev_priv, out_no_reloc: vmw_dmabuf_unreference(&vmw_bo); - vmw_bo_p = NULL; + *vmw_bo_p = NULL; return ret; } @@ -2780,13 +2782,11 @@ int vmw_execbuf_ioctl(struct drm_device *dev, void *data, NULL, arg->command_size, arg->throttle_us, (void __user *)(unsigned long)arg->fence_rep, NULL); - + ttm_read_unlock(&dev_priv->reservation_sem); if (unlikely(ret != 0)) - goto out_unlock; + return ret; vmw_kms_cursor_post_execbuf(dev_priv); -out_unlock: - ttm_read_unlock(&dev_priv->reservation_sem); - return ret; + return 0; } diff --git a/drivers/gpu/drm/vmwgfx/vmwgfx_kms.c b/drivers/gpu/drm/vmwgfx/vmwgfx_kms.c index 8725b79e7847..07cda8cbbddb 100644 --- a/drivers/gpu/drm/vmwgfx/vmwgfx_kms.c +++ b/drivers/gpu/drm/vmwgfx/vmwgfx_kms.c @@ -2033,23 +2033,17 @@ int vmw_kms_update_layout_ioctl(struct drm_device *dev, void *data, int i; struct drm_mode_config *mode_config = &dev->mode_config; - ret = ttm_read_lock(&dev_priv->reservation_sem, true); - if (unlikely(ret != 0)) - return ret; - if (!arg->num_outputs) { struct drm_vmw_rect def_rect = {0, 0, 800, 600}; vmw_du_update_layout(dev_priv, 1, &def_rect); - goto out_unlock; + return 0; } rects_size = arg->num_outputs * sizeof(struct drm_vmw_rect); rects = kcalloc(arg->num_outputs, sizeof(struct drm_vmw_rect), GFP_KERNEL); - if (unlikely(!rects)) { - ret = -ENOMEM; - goto out_unlock; - } + if (unlikely(!rects)) + return -ENOMEM; user_rects = (void __user *)(unsigned long)arg->rects; ret = copy_from_user(rects, user_rects, rects_size); @@ -2074,7 +2068,5 @@ int vmw_kms_update_layout_ioctl(struct drm_device *dev, void *data, out_free: kfree(rects); -out_unlock: - ttm_read_unlock(&dev_priv->reservation_sem); return ret; } diff --git a/drivers/gpu/ipu-v3/ipu-di.c b/drivers/gpu/ipu-v3/ipu-di.c index b61d6be97602..3ddfb3d0b64d 100644 --- a/drivers/gpu/ipu-v3/ipu-di.c +++ b/drivers/gpu/ipu-v3/ipu-di.c @@ -459,6 +459,8 @@ static void ipu_di_config_clock(struct ipu_di *di, clkrate = clk_get_rate(di->clk_ipu); div = DIV_ROUND_CLOSEST(clkrate, sig->mode.pixelclock); + if (div == 0) + div = 1; rate = clkrate / div; error = rate / (sig->mode.pixelclock / 1000); diff --git a/drivers/hid/hid-core.c b/drivers/hid/hid-core.c index 7c669c328c4c..56ce8c2b5530 100644 --- a/drivers/hid/hid-core.c +++ b/drivers/hid/hid-core.c @@ -1959,6 +1959,7 @@ static const struct hid_device_id hid_have_special_driver[] = { { HID_USB_DEVICE(USB_VENDOR_ID_THRUSTMASTER, 0xb65a) }, { HID_BLUETOOTH_DEVICE(USB_VENDOR_ID_TIVO, USB_DEVICE_ID_TIVO_SLIDE_BT) }, { HID_USB_DEVICE(USB_VENDOR_ID_TIVO, USB_DEVICE_ID_TIVO_SLIDE) }, + { HID_USB_DEVICE(USB_VENDOR_ID_TIVO, USB_DEVICE_ID_TIVO_SLIDE_PRO) }, { HID_USB_DEVICE(USB_VENDOR_ID_TOPSEED, USB_DEVICE_ID_TOPSEED_CYBERLINK) }, { HID_USB_DEVICE(USB_VENDOR_ID_TOPSEED2, USB_DEVICE_ID_TOPSEED2_RF_COMBO) }, { HID_USB_DEVICE(USB_VENDOR_ID_TWINHAN, USB_DEVICE_ID_TWINHAN_IR_REMOTE) }, diff --git a/drivers/hid/hid-ids.h b/drivers/hid/hid-ids.h index 204312bfab2c..9c4786759f16 100644 --- a/drivers/hid/hid-ids.h +++ b/drivers/hid/hid-ids.h @@ -586,6 +586,7 @@ #define USB_VENDOR_ID_LOGITECH 0x046d #define USB_DEVICE_ID_LOGITECH_AUDIOHUB 0x0a0e #define USB_DEVICE_ID_LOGITECH_T651 0xb00c +#define USB_DEVICE_ID_LOGITECH_C077 0xc007 #define USB_DEVICE_ID_LOGITECH_RECEIVER 0xc101 #define USB_DEVICE_ID_LOGITECH_HARMONY_FIRST 0xc110 #define USB_DEVICE_ID_LOGITECH_HARMONY_LAST 0xc14f @@ -898,6 +899,7 @@ #define USB_VENDOR_ID_TIVO 0x150a #define USB_DEVICE_ID_TIVO_SLIDE_BT 0x1200 #define USB_DEVICE_ID_TIVO_SLIDE 0x1201 +#define USB_DEVICE_ID_TIVO_SLIDE_PRO 0x1203 #define USB_VENDOR_ID_TOPSEED 0x0766 #define USB_DEVICE_ID_TOPSEED_CYBERLINK 0x0204 diff --git a/drivers/hid/hid-tivo.c b/drivers/hid/hid-tivo.c index d790d8d71f7f..d98696927453 100644 --- a/drivers/hid/hid-tivo.c +++ b/drivers/hid/hid-tivo.c @@ -64,6 +64,7 @@ static const struct hid_device_id tivo_devices[] = { /* TiVo Slide Bluetooth remote, pairs with a Broadcom dongle */ { HID_BLUETOOTH_DEVICE(USB_VENDOR_ID_TIVO, USB_DEVICE_ID_TIVO_SLIDE_BT) }, { HID_USB_DEVICE(USB_VENDOR_ID_TIVO, USB_DEVICE_ID_TIVO_SLIDE) }, + { HID_USB_DEVICE(USB_VENDOR_ID_TIVO, USB_DEVICE_ID_TIVO_SLIDE_PRO) }, { } }; MODULE_DEVICE_TABLE(hid, tivo_devices); diff --git a/drivers/hid/usbhid/hid-quirks.c b/drivers/hid/usbhid/hid-quirks.c index 9be99a67bfe2..a82127753461 100644 --- a/drivers/hid/usbhid/hid-quirks.c +++ b/drivers/hid/usbhid/hid-quirks.c @@ -78,6 +78,7 @@ static const struct hid_blacklist { { USB_VENDOR_ID_ELO, USB_DEVICE_ID_ELO_TS2700, HID_QUIRK_NOGET }, { USB_VENDOR_ID_FORMOSA, USB_DEVICE_ID_FORMOSA_IR_RECEIVER, HID_QUIRK_NO_INIT_REPORTS }, { USB_VENDOR_ID_FREESCALE, USB_DEVICE_ID_FREESCALE_MX28, HID_QUIRK_NOGET }, + { USB_VENDOR_ID_LOGITECH, USB_DEVICE_ID_LOGITECH_C077, HID_QUIRK_ALWAYS_POLL }, { USB_VENDOR_ID_MGE, USB_DEVICE_ID_MGE_UPS, HID_QUIRK_NOGET }, { USB_VENDOR_ID_MICROSOFT, USB_DEVICE_ID_MS_TYPE_COVER_3, HID_QUIRK_NO_INIT_REPORTS }, { USB_VENDOR_ID_MICROSOFT, USB_DEVICE_ID_MS_TYPE_COVER_3_JP, HID_QUIRK_NO_INIT_REPORTS }, diff --git a/drivers/hid/wacom_wac.c b/drivers/hid/wacom_wac.c index 046351cf17f3..bbe32d66e500 100644 --- a/drivers/hid/wacom_wac.c +++ b/drivers/hid/wacom_wac.c @@ -551,9 +551,13 @@ static int wacom_intuos_inout(struct wacom_wac *wacom) (features->type == CINTIQ && !(data[1] & 0x40))) return 1; - if (features->quirks & WACOM_QUIRK_MULTI_INPUT) + if (wacom->shared) { wacom->shared->stylus_in_proximity = true; + if (wacom->shared->touch_down) + return 1; + } + /* in Range while exiting */ if (((data[1] & 0xfe) == 0x20) && wacom->reporting_data) { input_report_key(input, BTN_TOUCH, 0); @@ -1043,27 +1047,28 @@ static int wacom_24hdt_irq(struct wacom_wac *wacom) struct input_dev *input = wacom->input; unsigned char *data = wacom->data; int i; - int current_num_contacts = 0; + int current_num_contacts = data[61]; int contacts_to_send = 0; int num_contacts_left = 4; /* maximum contacts per packet */ int byte_per_packet = WACOM_BYTES_PER_24HDT_PACKET; int y_offset = 2; + static int contact_with_no_pen_down_count = 0; if (wacom->features.type == WACOM_27QHDT) { current_num_contacts = data[63]; num_contacts_left = 10; byte_per_packet = WACOM_BYTES_PER_QHDTHID_PACKET; y_offset = 0; - } else { - current_num_contacts = data[61]; } /* * First packet resets the counter since only the first * packet in series will have non-zero current_num_contacts. */ - if (current_num_contacts) + if (current_num_contacts) { wacom->num_contacts_left = current_num_contacts; + contact_with_no_pen_down_count = 0; + } contacts_to_send = min(num_contacts_left, wacom->num_contacts_left); @@ -1096,15 +1101,16 @@ static int wacom_24hdt_irq(struct wacom_wac *wacom) input_report_abs(input, ABS_MT_WIDTH_MINOR, min(w, h)); input_report_abs(input, ABS_MT_ORIENTATION, w > h); } + contact_with_no_pen_down_count++; } } input_mt_report_pointer_emulation(input, true); wacom->num_contacts_left -= contacts_to_send; - if (wacom->num_contacts_left <= 0) + if (wacom->num_contacts_left <= 0) { wacom->num_contacts_left = 0; - - wacom->shared->touch_down = (wacom->num_contacts_left > 0); + wacom->shared->touch_down = (contact_with_no_pen_down_count > 0); + } return 1; } @@ -1116,6 +1122,7 @@ static int wacom_mt_touch(struct wacom_wac *wacom) int current_num_contacts = data[2]; int contacts_to_send = 0; int x_offset = 0; + static int contact_with_no_pen_down_count = 0; /* MTTPC does not support Height and Width */ if (wacom->features.type == MTTPC || wacom->features.type == MTTPC_B) @@ -1125,8 +1132,10 @@ static int wacom_mt_touch(struct wacom_wac *wacom) * First packet resets the counter since only the first * packet in series will have non-zero current_num_contacts. */ - if (current_num_contacts) + if (current_num_contacts) { wacom->num_contacts_left = current_num_contacts; + contact_with_no_pen_down_count = 0; + } /* There are at most 5 contacts per packet */ contacts_to_send = min(5, wacom->num_contacts_left); @@ -1147,15 +1156,16 @@ static int wacom_mt_touch(struct wacom_wac *wacom) int y = get_unaligned_le16(&data[offset + x_offset + 9]); input_report_abs(input, ABS_MT_POSITION_X, x); input_report_abs(input, ABS_MT_POSITION_Y, y); + contact_with_no_pen_down_count++; } } input_mt_report_pointer_emulation(input, true); wacom->num_contacts_left -= contacts_to_send; - if (wacom->num_contacts_left < 0) + if (wacom->num_contacts_left <= 0) { wacom->num_contacts_left = 0; - - wacom->shared->touch_down = (wacom->num_contacts_left > 0); + wacom->shared->touch_down = (contact_with_no_pen_down_count > 0); + } return 1; } @@ -1193,29 +1203,25 @@ static int wacom_tpc_single_touch(struct wacom_wac *wacom, size_t len) { unsigned char *data = wacom->data; struct input_dev *input = wacom->input; - bool prox; + bool prox = !wacom->shared->stylus_in_proximity; int x = 0, y = 0; if (wacom->features.touch_max > 1 || len > WACOM_PKGLEN_TPC2FG) return 0; - if (!wacom->shared->stylus_in_proximity) { - if (len == WACOM_PKGLEN_TPC1FG) { - prox = data[0] & 0x01; - x = get_unaligned_le16(&data[1]); - y = get_unaligned_le16(&data[3]); - } else if (len == WACOM_PKGLEN_TPC1FG_B) { - prox = data[2] & 0x01; - x = get_unaligned_le16(&data[3]); - y = get_unaligned_le16(&data[5]); - } else { - prox = data[1] & 0x01; - x = le16_to_cpup((__le16 *)&data[2]); - y = le16_to_cpup((__le16 *)&data[4]); - } - } else - /* force touch out when pen is in prox */ - prox = 0; + if (len == WACOM_PKGLEN_TPC1FG) { + prox = prox && (data[0] & 0x01); + x = get_unaligned_le16(&data[1]); + y = get_unaligned_le16(&data[3]); + } else if (len == WACOM_PKGLEN_TPC1FG_B) { + prox = prox && (data[2] & 0x01); + x = get_unaligned_le16(&data[3]); + y = get_unaligned_le16(&data[5]); + } else { + prox = prox && (data[1] & 0x01); + x = le16_to_cpup((__le16 *)&data[2]); + y = le16_to_cpup((__le16 *)&data[4]); + } if (prox) { input_report_abs(input, ABS_X, x); @@ -1613,6 +1619,7 @@ static int wacom_bpt_touch(struct wacom_wac *wacom) struct input_dev *pad_input = wacom->pad_input; unsigned char *data = wacom->data; int i; + int contact_with_no_pen_down_count = 0; if (data[0] != 0x02) return 0; @@ -1640,6 +1647,7 @@ static int wacom_bpt_touch(struct wacom_wac *wacom) } input_report_abs(input, ABS_MT_POSITION_X, x); input_report_abs(input, ABS_MT_POSITION_Y, y); + contact_with_no_pen_down_count++; } } @@ -1649,11 +1657,12 @@ static int wacom_bpt_touch(struct wacom_wac *wacom) input_report_key(pad_input, BTN_FORWARD, (data[1] & 0x04) != 0); input_report_key(pad_input, BTN_BACK, (data[1] & 0x02) != 0); input_report_key(pad_input, BTN_RIGHT, (data[1] & 0x01) != 0); + wacom->shared->touch_down = (contact_with_no_pen_down_count > 0); return 1; } -static void wacom_bpt3_touch_msg(struct wacom_wac *wacom, unsigned char *data) +static int wacom_bpt3_touch_msg(struct wacom_wac *wacom, unsigned char *data, int last_touch_count) { struct wacom_features *features = &wacom->features; struct input_dev *input = wacom->input; @@ -1661,7 +1670,7 @@ static void wacom_bpt3_touch_msg(struct wacom_wac *wacom, unsigned char *data) int slot = input_mt_get_slot_by_key(input, data[0]); if (slot < 0) - return; + return 0; touch = touch && !wacom->shared->stylus_in_proximity; @@ -1693,7 +1702,9 @@ static void wacom_bpt3_touch_msg(struct wacom_wac *wacom, unsigned char *data) input_report_abs(input, ABS_MT_POSITION_Y, y); input_report_abs(input, ABS_MT_TOUCH_MAJOR, width); input_report_abs(input, ABS_MT_TOUCH_MINOR, height); + last_touch_count++; } + return last_touch_count; } static void wacom_bpt3_button_msg(struct wacom_wac *wacom, unsigned char *data) @@ -1718,6 +1729,7 @@ static int wacom_bpt3_touch(struct wacom_wac *wacom) unsigned char *data = wacom->data; int count = data[1] & 0x07; int i; + int contact_with_no_pen_down_count = 0; if (data[0] != 0x02) return 0; @@ -1728,12 +1740,15 @@ static int wacom_bpt3_touch(struct wacom_wac *wacom) int msg_id = data[offset]; if (msg_id >= 2 && msg_id <= 17) - wacom_bpt3_touch_msg(wacom, data + offset); + contact_with_no_pen_down_count = + wacom_bpt3_touch_msg(wacom, data + offset, + contact_with_no_pen_down_count); else if (msg_id == 128) wacom_bpt3_button_msg(wacom, data + offset); } input_mt_report_pointer_emulation(input, true); + wacom->shared->touch_down = (contact_with_no_pen_down_count > 0); return 1; } @@ -1759,6 +1774,9 @@ static int wacom_bpt_pen(struct wacom_wac *wacom) return 0; } + if (wacom->shared->touch_down) + return 0; + prox = (data[1] & 0x20) == 0x20; /* diff --git a/drivers/i2c/busses/i2c-designware-baytrail.c b/drivers/i2c/busses/i2c-designware-baytrail.c index 5f1ff4cc5c34..7d7ae97476e2 100644 --- a/drivers/i2c/busses/i2c-designware-baytrail.c +++ b/drivers/i2c/busses/i2c-designware-baytrail.c @@ -17,27 +17,31 @@ #include <linux/acpi.h> #include <linux/i2c.h> #include <linux/interrupt.h> + #include <asm/iosf_mbi.h> + #include "i2c-designware-core.h" #define SEMAPHORE_TIMEOUT 100 #define PUNIT_SEMAPHORE 0x7 +#define PUNIT_SEMAPHORE_BIT BIT(0) +#define PUNIT_SEMAPHORE_ACQUIRE BIT(1) static unsigned long acquired; static int get_sem(struct device *dev, u32 *sem) { - u32 reg_val; + u32 data; int ret; ret = iosf_mbi_read(BT_MBI_UNIT_PMC, BT_MBI_BUNIT_READ, PUNIT_SEMAPHORE, - ®_val); + &data); if (ret) { dev_err(dev, "iosf failed to read punit semaphore\n"); return ret; } - *sem = reg_val & 0x1; + *sem = data & PUNIT_SEMAPHORE_BIT; return 0; } @@ -52,27 +56,29 @@ static void reset_semaphore(struct device *dev) return; } - data = data & 0xfffffffe; + data &= ~PUNIT_SEMAPHORE_BIT; if (iosf_mbi_write(BT_MBI_UNIT_PMC, BT_MBI_BUNIT_WRITE, - PUNIT_SEMAPHORE, data)) + PUNIT_SEMAPHORE, data)) dev_err(dev, "iosf failed to reset punit semaphore during write\n"); } -int baytrail_i2c_acquire(struct dw_i2c_dev *dev) +static int baytrail_i2c_acquire(struct dw_i2c_dev *dev) { - u32 sem = 0; + u32 sem; int ret; unsigned long start, end; + might_sleep(); + if (!dev || !dev->dev) return -ENODEV; - if (!dev->acquire_lock) + if (!dev->release_lock) return 0; - /* host driver writes 0x2 to side band semaphore register */ + /* host driver writes to side band semaphore register */ ret = iosf_mbi_write(BT_MBI_UNIT_PMC, BT_MBI_BUNIT_WRITE, - PUNIT_SEMAPHORE, 0x2); + PUNIT_SEMAPHORE, PUNIT_SEMAPHORE_ACQUIRE); if (ret) { dev_err(dev->dev, "iosf punit semaphore request failed\n"); return ret; @@ -81,7 +87,7 @@ int baytrail_i2c_acquire(struct dw_i2c_dev *dev) /* host driver waits for bit 0 to be set in semaphore register */ start = jiffies; end = start + msecs_to_jiffies(SEMAPHORE_TIMEOUT); - while (!time_after(jiffies, end)) { + do { ret = get_sem(dev->dev, &sem); if (!ret && sem) { acquired = jiffies; @@ -91,14 +97,14 @@ int baytrail_i2c_acquire(struct dw_i2c_dev *dev) } usleep_range(1000, 2000); - } + } while (time_before(jiffies, end)); dev_err(dev->dev, "punit semaphore timed out, resetting\n"); reset_semaphore(dev->dev); ret = iosf_mbi_read(BT_MBI_UNIT_PMC, BT_MBI_BUNIT_READ, - PUNIT_SEMAPHORE, &sem); - if (!ret) + PUNIT_SEMAPHORE, &sem); + if (ret) dev_err(dev->dev, "iosf failed to read punit semaphore\n"); else dev_err(dev->dev, "PUNIT SEM: %d\n", sem); @@ -107,9 +113,8 @@ int baytrail_i2c_acquire(struct dw_i2c_dev *dev) return -ETIMEDOUT; } -EXPORT_SYMBOL(baytrail_i2c_acquire); -void baytrail_i2c_release(struct dw_i2c_dev *dev) +static void baytrail_i2c_release(struct dw_i2c_dev *dev) { if (!dev || !dev->dev) return; @@ -121,7 +126,6 @@ void baytrail_i2c_release(struct dw_i2c_dev *dev) dev_dbg(dev->dev, "punit semaphore held for %ums\n", jiffies_to_msecs(jiffies - acquired)); } -EXPORT_SYMBOL(baytrail_i2c_release); int i2c_dw_eval_lock_support(struct dw_i2c_dev *dev) { @@ -137,7 +141,6 @@ int i2c_dw_eval_lock_support(struct dw_i2c_dev *dev) return 0; status = acpi_evaluate_integer(handle, "_SEM", NULL, &shared_host); - if (ACPI_FAILURE(status)) return 0; @@ -153,7 +156,6 @@ int i2c_dw_eval_lock_support(struct dw_i2c_dev *dev) return 0; } -EXPORT_SYMBOL(i2c_dw_eval_lock_support); MODULE_AUTHOR("David E. Box <david.e.box@linux.intel.com>"); MODULE_DESCRIPTION("Baytrail I2C Semaphore driver"); diff --git a/drivers/i2c/i2c-core.c b/drivers/i2c/i2c-core.c index 210cf4874cb7..edf274cabe81 100644 --- a/drivers/i2c/i2c-core.c +++ b/drivers/i2c/i2c-core.c @@ -679,9 +679,6 @@ static int i2c_device_remove(struct device *dev) status = driver->remove(client); } - if (dev->of_node) - irq_dispose_mapping(client->irq); - dev_pm_domain_detach(&client->dev, true); return status; } diff --git a/drivers/ide/ide-tape.c b/drivers/ide/ide-tape.c index 1793aea4a7d2..6eb738ca6d2f 100644 --- a/drivers/ide/ide-tape.c +++ b/drivers/ide/ide-tape.c @@ -1793,11 +1793,11 @@ static void idetape_setup(ide_drive_t *drive, idetape_tape_t *tape, int minor) tape->best_dsc_rw_freq = clamp_t(unsigned long, t, IDETAPE_DSC_RW_MIN, IDETAPE_DSC_RW_MAX); printk(KERN_INFO "ide-tape: %s <-> %s: %dKBps, %d*%dkB buffer, " - "%lums tDSC%s\n", + "%ums tDSC%s\n", drive->name, tape->name, *(u16 *)&tape->caps[14], (*(u16 *)&tape->caps[16] * 512) / tape->buffer_size, tape->buffer_size / 1024, - tape->best_dsc_rw_freq * 1000 / HZ, + jiffies_to_msecs(tape->best_dsc_rw_freq), (drive->dev_flags & IDE_DFLAG_USING_DMA) ? ", DMA" : ""); ide_proc_register_driver(drive, tape->driver); diff --git a/drivers/idle/intel_idle.c b/drivers/idle/intel_idle.c index b0e58522780d..5c979d0667a2 100644 --- a/drivers/idle/intel_idle.c +++ b/drivers/idle/intel_idle.c @@ -55,7 +55,7 @@ #include <linux/kernel.h> #include <linux/cpuidle.h> -#include <linux/clockchips.h> +#include <linux/tick.h> #include <trace/events/power.h> #include <linux/sched.h> #include <linux/notifier.h> @@ -638,12 +638,12 @@ static int intel_idle(struct cpuidle_device *dev, leave_mm(cpu); if (!(lapic_timer_reliable_states & (1 << (cstate)))) - clockevents_notify(CLOCK_EVT_NOTIFY_BROADCAST_ENTER, &cpu); + tick_broadcast_enter(); mwait_idle_with_hints(eax, ecx); if (!(lapic_timer_reliable_states & (1 << (cstate)))) - clockevents_notify(CLOCK_EVT_NOTIFY_BROADCAST_EXIT, &cpu); + tick_broadcast_exit(); return index; } @@ -665,13 +665,12 @@ static void intel_idle_freeze(struct cpuidle_device *dev, static void __setup_broadcast_timer(void *arg) { - unsigned long reason = (unsigned long)arg; - int cpu = smp_processor_id(); - - reason = reason ? - CLOCK_EVT_NOTIFY_BROADCAST_ON : CLOCK_EVT_NOTIFY_BROADCAST_OFF; + unsigned long on = (unsigned long)arg; - clockevents_notify(reason, &cpu); + if (on) + tick_broadcast_enable(); + else + tick_broadcast_disable(); } static int cpu_hotplug_notify(struct notifier_block *n, diff --git a/drivers/iio/accel/bma180.c b/drivers/iio/accel/bma180.c index 1096da327130..75c6d2103e07 100644 --- a/drivers/iio/accel/bma180.c +++ b/drivers/iio/accel/bma180.c @@ -659,7 +659,7 @@ static irqreturn_t bma180_trigger_handler(int irq, void *p) mutex_lock(&data->mutex); - for_each_set_bit(bit, indio_dev->buffer->scan_mask, + for_each_set_bit(bit, indio_dev->active_scan_mask, indio_dev->masklength) { ret = bma180_get_data_reg(data, bit); if (ret < 0) { diff --git a/drivers/iio/accel/bmc150-accel.c b/drivers/iio/accel/bmc150-accel.c index 066d0c04072c..75567fd457dc 100644 --- a/drivers/iio/accel/bmc150-accel.c +++ b/drivers/iio/accel/bmc150-accel.c @@ -168,14 +168,14 @@ static const struct { int val; int val2; u8 bw_bits; -} bmc150_accel_samp_freq_table[] = { {7, 810000, 0x08}, - {15, 630000, 0x09}, - {31, 250000, 0x0A}, - {62, 500000, 0x0B}, - {125, 0, 0x0C}, - {250, 0, 0x0D}, - {500, 0, 0x0E}, - {1000, 0, 0x0F} }; +} bmc150_accel_samp_freq_table[] = { {15, 620000, 0x08}, + {31, 260000, 0x09}, + {62, 500000, 0x0A}, + {125, 0, 0x0B}, + {250, 0, 0x0C}, + {500, 0, 0x0D}, + {1000, 0, 0x0E}, + {2000, 0, 0x0F} }; static const struct { int bw_bits; @@ -840,7 +840,7 @@ static int bmc150_accel_validate_trigger(struct iio_dev *indio_dev, } static IIO_CONST_ATTR_SAMP_FREQ_AVAIL( - "7.810000 15.630000 31.250000 62.500000 125 250 500 1000"); + "15.620000 31.260000 62.50000 125 250 500 1000 2000"); static struct attribute *bmc150_accel_attributes[] = { &iio_const_attr_sampling_frequency_available.dev_attr.attr, @@ -986,7 +986,7 @@ static irqreturn_t bmc150_accel_trigger_handler(int irq, void *p) int bit, ret, i = 0; mutex_lock(&data->mutex); - for_each_set_bit(bit, indio_dev->buffer->scan_mask, + for_each_set_bit(bit, indio_dev->active_scan_mask, indio_dev->masklength) { ret = i2c_smbus_read_word_data(data->client, BMC150_ACCEL_AXIS_TO_REG(bit)); diff --git a/drivers/iio/accel/kxcjk-1013.c b/drivers/iio/accel/kxcjk-1013.c index 567de269cc00..1a6379525fa4 100644 --- a/drivers/iio/accel/kxcjk-1013.c +++ b/drivers/iio/accel/kxcjk-1013.c @@ -956,7 +956,7 @@ static irqreturn_t kxcjk1013_trigger_handler(int irq, void *p) mutex_lock(&data->mutex); - for_each_set_bit(bit, indio_dev->buffer->scan_mask, + for_each_set_bit(bit, indio_dev->active_scan_mask, indio_dev->masklength) { ret = kxcjk1013_get_acc_reg(data, bit); if (ret < 0) { diff --git a/drivers/iio/adc/Kconfig b/drivers/iio/adc/Kconfig index 202daf889be2..46379b1fb25b 100644 --- a/drivers/iio/adc/Kconfig +++ b/drivers/iio/adc/Kconfig @@ -137,7 +137,8 @@ config AXP288_ADC config CC10001_ADC tristate "Cosmic Circuits 10001 ADC driver" - depends on HAS_IOMEM || HAVE_CLK || REGULATOR + depends on HAVE_CLK || REGULATOR + depends on HAS_IOMEM select IIO_BUFFER select IIO_TRIGGERED_BUFFER help diff --git a/drivers/iio/adc/at91_adc.c b/drivers/iio/adc/at91_adc.c index ff61ae55dd3f..8a0eb4a04fb5 100644 --- a/drivers/iio/adc/at91_adc.c +++ b/drivers/iio/adc/at91_adc.c @@ -544,7 +544,6 @@ static int at91_adc_configure_trigger(struct iio_trigger *trig, bool state) { struct iio_dev *idev = iio_trigger_get_drvdata(trig); struct at91_adc_state *st = iio_priv(idev); - struct iio_buffer *buffer = idev->buffer; struct at91_adc_reg_desc *reg = st->registers; u32 status = at91_adc_readl(st, reg->trigger_register); int value; @@ -564,7 +563,7 @@ static int at91_adc_configure_trigger(struct iio_trigger *trig, bool state) at91_adc_writel(st, reg->trigger_register, status | value); - for_each_set_bit(bit, buffer->scan_mask, + for_each_set_bit(bit, idev->active_scan_mask, st->num_channels) { struct iio_chan_spec const *chan = idev->channels + bit; at91_adc_writel(st, AT91_ADC_CHER, @@ -579,7 +578,7 @@ static int at91_adc_configure_trigger(struct iio_trigger *trig, bool state) at91_adc_writel(st, reg->trigger_register, status & ~value); - for_each_set_bit(bit, buffer->scan_mask, + for_each_set_bit(bit, idev->active_scan_mask, st->num_channels) { struct iio_chan_spec const *chan = idev->channels + bit; at91_adc_writel(st, AT91_ADC_CHDR, diff --git a/drivers/iio/adc/mcp3422.c b/drivers/iio/adc/mcp3422.c index 51672256072b..b96c636470ef 100644 --- a/drivers/iio/adc/mcp3422.c +++ b/drivers/iio/adc/mcp3422.c @@ -58,20 +58,11 @@ .info_mask_shared_by_type = BIT(IIO_CHAN_INFO_SAMP_FREQ), \ } -/* LSB is in nV to eliminate floating point */ -static const u32 rates_to_lsb[] = {1000000, 250000, 62500, 15625}; - -/* - * scales calculated as: - * rates_to_lsb[sample_rate] / (1 << pga); - * pga is 1 for 0, 2 - */ - static const int mcp3422_scales[4][4] = { - { 1000000, 250000, 62500, 15625 }, - { 500000 , 125000, 31250, 7812 }, - { 250000 , 62500 , 15625, 3906 }, - { 125000 , 31250 , 7812 , 1953 } }; + { 1000000, 500000, 250000, 125000 }, + { 250000 , 125000, 62500 , 31250 }, + { 62500 , 31250 , 15625 , 7812 }, + { 15625 , 7812 , 3906 , 1953 } }; /* Constant msleep times for data acquisitions */ static const int mcp3422_read_times[4] = { diff --git a/drivers/iio/adc/qcom-spmi-iadc.c b/drivers/iio/adc/qcom-spmi-iadc.c index b9666f2f5e51..fabd24edc2a1 100644 --- a/drivers/iio/adc/qcom-spmi-iadc.c +++ b/drivers/iio/adc/qcom-spmi-iadc.c @@ -296,7 +296,8 @@ static int iadc_do_conversion(struct iadc_chip *iadc, int chan, u16 *data) if (iadc->poll_eoc) { ret = iadc_poll_wait_eoc(iadc, wait); } else { - ret = wait_for_completion_timeout(&iadc->complete, wait); + ret = wait_for_completion_timeout(&iadc->complete, + usecs_to_jiffies(wait)); if (!ret) ret = -ETIMEDOUT; else diff --git a/drivers/iio/adc/ti_am335x_adc.c b/drivers/iio/adc/ti_am335x_adc.c index 2e5cc4409f78..a0e7161f040c 100644 --- a/drivers/iio/adc/ti_am335x_adc.c +++ b/drivers/iio/adc/ti_am335x_adc.c @@ -188,12 +188,11 @@ static int tiadc_buffer_preenable(struct iio_dev *indio_dev) static int tiadc_buffer_postenable(struct iio_dev *indio_dev) { struct tiadc_device *adc_dev = iio_priv(indio_dev); - struct iio_buffer *buffer = indio_dev->buffer; unsigned int enb = 0; u8 bit; tiadc_step_config(indio_dev); - for_each_set_bit(bit, buffer->scan_mask, adc_dev->channels) + for_each_set_bit(bit, indio_dev->active_scan_mask, adc_dev->channels) enb |= (get_adc_step_bit(adc_dev, bit) << 1); adc_dev->buffer_en_ch_steps = enb; diff --git a/drivers/iio/adc/vf610_adc.c b/drivers/iio/adc/vf610_adc.c index 8ec353c01d98..e63b8e76d4c3 100644 --- a/drivers/iio/adc/vf610_adc.c +++ b/drivers/iio/adc/vf610_adc.c @@ -141,9 +141,13 @@ struct vf610_adc { struct regulator *vref; struct vf610_adc_feature adc_feature; + u32 sample_freq_avail[5]; + struct completion completion; }; +static const u32 vf610_hw_avgs[] = { 1, 4, 8, 16, 32 }; + #define VF610_ADC_CHAN(_idx, _chan_type) { \ .type = (_chan_type), \ .indexed = 1, \ @@ -180,35 +184,47 @@ static const struct iio_chan_spec vf610_adc_iio_channels[] = { /* sentinel */ }; -/* - * ADC sample frequency, unit is ADCK cycles. - * ADC clk source is ipg clock, which is the same as bus clock. - * - * ADC conversion time = SFCAdder + AverageNum x (BCT + LSTAdder) - * SFCAdder: fixed to 6 ADCK cycles - * AverageNum: 1, 4, 8, 16, 32 samples for hardware average. - * BCT (Base Conversion Time): fixed to 25 ADCK cycles for 12 bit mode - * LSTAdder(Long Sample Time): fixed to 3 ADCK cycles - * - * By default, enable 12 bit resolution mode, clock source - * set to ipg clock, So get below frequency group: - */ -static const u32 vf610_sample_freq_avail[5] = -{1941176, 559332, 286957, 145374, 73171}; +static inline void vf610_adc_calculate_rates(struct vf610_adc *info) +{ + unsigned long adck_rate, ipg_rate = clk_get_rate(info->clk); + int i; + + /* + * Calculate ADC sample frequencies + * Sample time unit is ADCK cycles. ADCK clk source is ipg clock, + * which is the same as bus clock. + * + * ADC conversion time = SFCAdder + AverageNum x (BCT + LSTAdder) + * SFCAdder: fixed to 6 ADCK cycles + * AverageNum: 1, 4, 8, 16, 32 samples for hardware average. + * BCT (Base Conversion Time): fixed to 25 ADCK cycles for 12 bit mode + * LSTAdder(Long Sample Time): fixed to 3 ADCK cycles + */ + adck_rate = ipg_rate / info->adc_feature.clk_div; + for (i = 0; i < ARRAY_SIZE(vf610_hw_avgs); i++) + info->sample_freq_avail[i] = + adck_rate / (6 + vf610_hw_avgs[i] * (25 + 3)); +} static inline void vf610_adc_cfg_init(struct vf610_adc *info) { + struct vf610_adc_feature *adc_feature = &info->adc_feature; + /* set default Configuration for ADC controller */ - info->adc_feature.clk_sel = VF610_ADCIOC_BUSCLK_SET; - info->adc_feature.vol_ref = VF610_ADCIOC_VR_VREF_SET; + adc_feature->clk_sel = VF610_ADCIOC_BUSCLK_SET; + adc_feature->vol_ref = VF610_ADCIOC_VR_VREF_SET; + + adc_feature->calibration = true; + adc_feature->ovwren = true; + + adc_feature->res_mode = 12; + adc_feature->sample_rate = 1; + adc_feature->lpm = true; - info->adc_feature.calibration = true; - info->adc_feature.ovwren = true; + /* Use a save ADCK which is below 20MHz on all devices */ + adc_feature->clk_div = 8; - info->adc_feature.clk_div = 1; - info->adc_feature.res_mode = 12; - info->adc_feature.sample_rate = 1; - info->adc_feature.lpm = true; + vf610_adc_calculate_rates(info); } static void vf610_adc_cfg_post_set(struct vf610_adc *info) @@ -290,12 +306,10 @@ static void vf610_adc_cfg_set(struct vf610_adc *info) cfg_data = readl(info->regs + VF610_REG_ADC_CFG); - /* low power configuration */ cfg_data &= ~VF610_ADC_ADLPC_EN; if (adc_feature->lpm) cfg_data |= VF610_ADC_ADLPC_EN; - /* disable high speed */ cfg_data &= ~VF610_ADC_ADHSC_EN; writel(cfg_data, info->regs + VF610_REG_ADC_CFG); @@ -435,10 +449,27 @@ static irqreturn_t vf610_adc_isr(int irq, void *dev_id) return IRQ_HANDLED; } -static IIO_CONST_ATTR_SAMP_FREQ_AVAIL("1941176, 559332, 286957, 145374, 73171"); +static ssize_t vf610_show_samp_freq_avail(struct device *dev, + struct device_attribute *attr, char *buf) +{ + struct vf610_adc *info = iio_priv(dev_to_iio_dev(dev)); + size_t len = 0; + int i; + + for (i = 0; i < ARRAY_SIZE(info->sample_freq_avail); i++) + len += scnprintf(buf + len, PAGE_SIZE - len, + "%u ", info->sample_freq_avail[i]); + + /* replace trailing space by newline */ + buf[len - 1] = '\n'; + + return len; +} + +static IIO_DEV_ATTR_SAMP_FREQ_AVAIL(vf610_show_samp_freq_avail); static struct attribute *vf610_attributes[] = { - &iio_const_attr_sampling_frequency_available.dev_attr.attr, + &iio_dev_attr_sampling_frequency_available.dev_attr.attr, NULL }; @@ -502,7 +533,7 @@ static int vf610_read_raw(struct iio_dev *indio_dev, return IIO_VAL_FRACTIONAL_LOG2; case IIO_CHAN_INFO_SAMP_FREQ: - *val = vf610_sample_freq_avail[info->adc_feature.sample_rate]; + *val = info->sample_freq_avail[info->adc_feature.sample_rate]; *val2 = 0; return IIO_VAL_INT; @@ -525,9 +556,9 @@ static int vf610_write_raw(struct iio_dev *indio_dev, switch (mask) { case IIO_CHAN_INFO_SAMP_FREQ: for (i = 0; - i < ARRAY_SIZE(vf610_sample_freq_avail); + i < ARRAY_SIZE(info->sample_freq_avail); i++) - if (val == vf610_sample_freq_avail[i]) { + if (val == info->sample_freq_avail[i]) { info->adc_feature.sample_rate = i; vf610_adc_sample_set(info); return 0; diff --git a/drivers/iio/common/ssp_sensors/ssp_dev.c b/drivers/iio/common/ssp_sensors/ssp_dev.c index 52d70435f5a1..55a90082a29b 100644 --- a/drivers/iio/common/ssp_sensors/ssp_dev.c +++ b/drivers/iio/common/ssp_sensors/ssp_dev.c @@ -640,6 +640,7 @@ static int ssp_remove(struct spi_device *spi) return 0; } +#ifdef CONFIG_PM_SLEEP static int ssp_suspend(struct device *dev) { int ret; @@ -688,6 +689,7 @@ static int ssp_resume(struct device *dev) return 0; } +#endif /* CONFIG_PM_SLEEP */ static const struct dev_pm_ops ssp_pm_ops = { SET_SYSTEM_SLEEP_PM_OPS(ssp_suspend, ssp_resume) diff --git a/drivers/iio/dac/ad5686.c b/drivers/iio/dac/ad5686.c index f57562aa396f..15c73e20272d 100644 --- a/drivers/iio/dac/ad5686.c +++ b/drivers/iio/dac/ad5686.c @@ -322,7 +322,7 @@ static int ad5686_probe(struct spi_device *spi) st = iio_priv(indio_dev); spi_set_drvdata(spi, indio_dev); - st->reg = devm_regulator_get(&spi->dev, "vcc"); + st->reg = devm_regulator_get_optional(&spi->dev, "vcc"); if (!IS_ERR(st->reg)) { ret = regulator_enable(st->reg); if (ret) diff --git a/drivers/iio/gyro/bmg160.c b/drivers/iio/gyro/bmg160.c index 60451b328242..ccf3ea7e1afa 100644 --- a/drivers/iio/gyro/bmg160.c +++ b/drivers/iio/gyro/bmg160.c @@ -822,7 +822,7 @@ static irqreturn_t bmg160_trigger_handler(int irq, void *p) int bit, ret, i = 0; mutex_lock(&data->mutex); - for_each_set_bit(bit, indio_dev->buffer->scan_mask, + for_each_set_bit(bit, indio_dev->active_scan_mask, indio_dev->masklength) { ret = i2c_smbus_read_word_data(data->client, BMG160_AXIS_TO_REG(bit)); diff --git a/drivers/iio/humidity/dht11.c b/drivers/iio/humidity/dht11.c index 623c145d8a97..7d79a1ac5f5f 100644 --- a/drivers/iio/humidity/dht11.c +++ b/drivers/iio/humidity/dht11.c @@ -29,6 +29,7 @@ #include <linux/wait.h> #include <linux/bitops.h> #include <linux/completion.h> +#include <linux/mutex.h> #include <linux/delay.h> #include <linux/gpio.h> #include <linux/of_gpio.h> @@ -39,8 +40,12 @@ #define DHT11_DATA_VALID_TIME 2000000000 /* 2s in ns */ -#define DHT11_EDGES_PREAMBLE 4 +#define DHT11_EDGES_PREAMBLE 2 #define DHT11_BITS_PER_READ 40 +/* + * Note that when reading the sensor actually 84 edges are detected, but + * since the last edge is not significant, we only store 83: + */ #define DHT11_EDGES_PER_READ (2*DHT11_BITS_PER_READ + DHT11_EDGES_PREAMBLE + 1) /* Data transmission timing (nano seconds) */ @@ -57,6 +62,7 @@ struct dht11 { int irq; struct completion completion; + struct mutex lock; s64 timestamp; int temperature; @@ -88,7 +94,7 @@ static int dht11_decode(struct dht11 *dht11, int offset) unsigned char temp_int, temp_dec, hum_int, hum_dec, checksum; /* Calculate timestamp resolution */ - for (i = 0; i < dht11->num_edges; ++i) { + for (i = 1; i < dht11->num_edges; ++i) { t = dht11->edges[i].ts - dht11->edges[i-1].ts; if (t > 0 && t < timeres) timeres = t; @@ -138,6 +144,27 @@ static int dht11_decode(struct dht11 *dht11, int offset) return 0; } +/* + * IRQ handler called on GPIO edges + */ +static irqreturn_t dht11_handle_irq(int irq, void *data) +{ + struct iio_dev *iio = data; + struct dht11 *dht11 = iio_priv(iio); + + /* TODO: Consider making the handler safe for IRQ sharing */ + if (dht11->num_edges < DHT11_EDGES_PER_READ && dht11->num_edges >= 0) { + dht11->edges[dht11->num_edges].ts = iio_get_time_ns(); + dht11->edges[dht11->num_edges++].value = + gpio_get_value(dht11->gpio); + + if (dht11->num_edges >= DHT11_EDGES_PER_READ) + complete(&dht11->completion); + } + + return IRQ_HANDLED; +} + static int dht11_read_raw(struct iio_dev *iio_dev, const struct iio_chan_spec *chan, int *val, int *val2, long m) @@ -145,6 +172,7 @@ static int dht11_read_raw(struct iio_dev *iio_dev, struct dht11 *dht11 = iio_priv(iio_dev); int ret; + mutex_lock(&dht11->lock); if (dht11->timestamp + DHT11_DATA_VALID_TIME < iio_get_time_ns()) { reinit_completion(&dht11->completion); @@ -157,8 +185,17 @@ static int dht11_read_raw(struct iio_dev *iio_dev, if (ret) goto err; + ret = request_irq(dht11->irq, dht11_handle_irq, + IRQF_TRIGGER_RISING | IRQF_TRIGGER_FALLING, + iio_dev->name, iio_dev); + if (ret) + goto err; + ret = wait_for_completion_killable_timeout(&dht11->completion, HZ); + + free_irq(dht11->irq, iio_dev); + if (ret == 0 && dht11->num_edges < DHT11_EDGES_PER_READ - 1) { dev_err(&iio_dev->dev, "Only %d signal edges detected\n", @@ -185,6 +222,7 @@ static int dht11_read_raw(struct iio_dev *iio_dev, ret = -EINVAL; err: dht11->num_edges = -1; + mutex_unlock(&dht11->lock); return ret; } @@ -193,27 +231,6 @@ static const struct iio_info dht11_iio_info = { .read_raw = dht11_read_raw, }; -/* - * IRQ handler called on GPIO edges -*/ -static irqreturn_t dht11_handle_irq(int irq, void *data) -{ - struct iio_dev *iio = data; - struct dht11 *dht11 = iio_priv(iio); - - /* TODO: Consider making the handler safe for IRQ sharing */ - if (dht11->num_edges < DHT11_EDGES_PER_READ && dht11->num_edges >= 0) { - dht11->edges[dht11->num_edges].ts = iio_get_time_ns(); - dht11->edges[dht11->num_edges++].value = - gpio_get_value(dht11->gpio); - - if (dht11->num_edges >= DHT11_EDGES_PER_READ) - complete(&dht11->completion); - } - - return IRQ_HANDLED; -} - static const struct iio_chan_spec dht11_chan_spec[] = { { .type = IIO_TEMP, .info_mask_separate = BIT(IIO_CHAN_INFO_PROCESSED), }, @@ -256,11 +273,6 @@ static int dht11_probe(struct platform_device *pdev) dev_err(dev, "GPIO %d has no interrupt\n", dht11->gpio); return -EINVAL; } - ret = devm_request_irq(dev, dht11->irq, dht11_handle_irq, - IRQF_TRIGGER_RISING | IRQF_TRIGGER_FALLING, - pdev->name, iio); - if (ret) - return ret; dht11->timestamp = iio_get_time_ns() - DHT11_DATA_VALID_TIME - 1; dht11->num_edges = -1; @@ -268,6 +280,7 @@ static int dht11_probe(struct platform_device *pdev) platform_set_drvdata(pdev, iio); init_completion(&dht11->completion); + mutex_init(&dht11->lock); iio->name = pdev->name; iio->dev.parent = &pdev->dev; iio->info = &dht11_iio_info; diff --git a/drivers/iio/humidity/si7020.c b/drivers/iio/humidity/si7020.c index b54164677b89..fa3b809aff5e 100644 --- a/drivers/iio/humidity/si7020.c +++ b/drivers/iio/humidity/si7020.c @@ -45,12 +45,12 @@ static int si7020_read_raw(struct iio_dev *indio_dev, struct iio_chan_spec const *chan, int *val, int *val2, long mask) { - struct i2c_client *client = iio_priv(indio_dev); + struct i2c_client **client = iio_priv(indio_dev); int ret; switch (mask) { case IIO_CHAN_INFO_RAW: - ret = i2c_smbus_read_word_data(client, + ret = i2c_smbus_read_word_data(*client, chan->type == IIO_TEMP ? SI7020CMD_TEMP_HOLD : SI7020CMD_RH_HOLD); @@ -126,7 +126,7 @@ static int si7020_probe(struct i2c_client *client, /* Wait the maximum power-up time after software reset. */ msleep(15); - indio_dev = devm_iio_device_alloc(&client->dev, sizeof(*client)); + indio_dev = devm_iio_device_alloc(&client->dev, sizeof(*data)); if (!indio_dev) return -ENOMEM; diff --git a/drivers/iio/imu/adis16400_core.c b/drivers/iio/imu/adis16400_core.c index b70873de04ea..fa795dcd5f75 100644 --- a/drivers/iio/imu/adis16400_core.c +++ b/drivers/iio/imu/adis16400_core.c @@ -26,6 +26,7 @@ #include <linux/list.h> #include <linux/module.h> #include <linux/debugfs.h> +#include <linux/bitops.h> #include <linux/iio/iio.h> #include <linux/iio/sysfs.h> @@ -414,7 +415,7 @@ static int adis16400_read_raw(struct iio_dev *indio_dev, mutex_unlock(&indio_dev->mlock); if (ret) return ret; - val16 = ((val16 & 0xFFF) << 4) >> 4; + val16 = sign_extend32(val16, 11); *val = val16; return IIO_VAL_INT; case IIO_CHAN_INFO_OFFSET: diff --git a/drivers/iio/imu/adis_trigger.c b/drivers/iio/imu/adis_trigger.c index e0017c22bb9c..f53e9a803a0e 100644 --- a/drivers/iio/imu/adis_trigger.c +++ b/drivers/iio/imu/adis_trigger.c @@ -60,7 +60,7 @@ int adis_probe_trigger(struct adis *adis, struct iio_dev *indio_dev) iio_trigger_set_drvdata(adis->trig, adis); ret = iio_trigger_register(adis->trig); - indio_dev->trig = adis->trig; + indio_dev->trig = iio_trigger_get(adis->trig); if (ret) goto error_free_irq; diff --git a/drivers/iio/imu/inv_mpu6050/inv_mpu_core.c b/drivers/iio/imu/inv_mpu6050/inv_mpu_core.c index f73e60b7a796..ef76afe2643c 100644 --- a/drivers/iio/imu/inv_mpu6050/inv_mpu_core.c +++ b/drivers/iio/imu/inv_mpu6050/inv_mpu_core.c @@ -410,42 +410,46 @@ error_read_raw: } } -static int inv_mpu6050_write_fsr(struct inv_mpu6050_state *st, int fsr) +static int inv_mpu6050_write_gyro_scale(struct inv_mpu6050_state *st, int val) { - int result; + int result, i; u8 d; - if (fsr < 0 || fsr > INV_MPU6050_MAX_GYRO_FS_PARAM) - return -EINVAL; - if (fsr == st->chip_config.fsr) - return 0; + for (i = 0; i < ARRAY_SIZE(gyro_scale_6050); ++i) { + if (gyro_scale_6050[i] == val) { + d = (i << INV_MPU6050_GYRO_CONFIG_FSR_SHIFT); + result = inv_mpu6050_write_reg(st, + st->reg->gyro_config, d); + if (result) + return result; - d = (fsr << INV_MPU6050_GYRO_CONFIG_FSR_SHIFT); - result = inv_mpu6050_write_reg(st, st->reg->gyro_config, d); - if (result) - return result; - st->chip_config.fsr = fsr; + st->chip_config.fsr = i; + return 0; + } + } - return 0; + return -EINVAL; } -static int inv_mpu6050_write_accel_fs(struct inv_mpu6050_state *st, int fs) +static int inv_mpu6050_write_accel_scale(struct inv_mpu6050_state *st, int val) { - int result; + int result, i; u8 d; - if (fs < 0 || fs > INV_MPU6050_MAX_ACCL_FS_PARAM) - return -EINVAL; - if (fs == st->chip_config.accl_fs) - return 0; + for (i = 0; i < ARRAY_SIZE(accel_scale); ++i) { + if (accel_scale[i] == val) { + d = (i << INV_MPU6050_ACCL_CONFIG_FSR_SHIFT); + result = inv_mpu6050_write_reg(st, + st->reg->accl_config, d); + if (result) + return result; - d = (fs << INV_MPU6050_ACCL_CONFIG_FSR_SHIFT); - result = inv_mpu6050_write_reg(st, st->reg->accl_config, d); - if (result) - return result; - st->chip_config.accl_fs = fs; + st->chip_config.accl_fs = i; + return 0; + } + } - return 0; + return -EINVAL; } static int inv_mpu6050_write_raw(struct iio_dev *indio_dev, @@ -471,10 +475,10 @@ static int inv_mpu6050_write_raw(struct iio_dev *indio_dev, case IIO_CHAN_INFO_SCALE: switch (chan->type) { case IIO_ANGL_VEL: - result = inv_mpu6050_write_fsr(st, val); + result = inv_mpu6050_write_gyro_scale(st, val2); break; case IIO_ACCEL: - result = inv_mpu6050_write_accel_fs(st, val); + result = inv_mpu6050_write_accel_scale(st, val2); break; default: result = -EINVAL; @@ -780,7 +784,11 @@ static int inv_mpu_probe(struct i2c_client *client, i2c_set_clientdata(client, indio_dev); indio_dev->dev.parent = &client->dev; - indio_dev->name = id->name; + /* id will be NULL when enumerated via ACPI */ + if (id) + indio_dev->name = (char *)id->name; + else + indio_dev->name = (char *)dev_name(&client->dev); indio_dev->channels = inv_mpu_channels; indio_dev->num_channels = ARRAY_SIZE(inv_mpu_channels); diff --git a/drivers/iio/imu/inv_mpu6050/inv_mpu_ring.c b/drivers/iio/imu/inv_mpu6050/inv_mpu_ring.c index 0cd306a72a6e..ba27e277511f 100644 --- a/drivers/iio/imu/inv_mpu6050/inv_mpu_ring.c +++ b/drivers/iio/imu/inv_mpu6050/inv_mpu_ring.c @@ -24,6 +24,16 @@ #include <linux/poll.h> #include "inv_mpu_iio.h" +static void inv_clear_kfifo(struct inv_mpu6050_state *st) +{ + unsigned long flags; + + /* take the spin lock sem to avoid interrupt kick in */ + spin_lock_irqsave(&st->time_stamp_lock, flags); + kfifo_reset(&st->timestamps); + spin_unlock_irqrestore(&st->time_stamp_lock, flags); +} + int inv_reset_fifo(struct iio_dev *indio_dev) { int result; @@ -50,6 +60,10 @@ int inv_reset_fifo(struct iio_dev *indio_dev) INV_MPU6050_BIT_FIFO_RST); if (result) goto reset_fifo_fail; + + /* clear timestamps fifo */ + inv_clear_kfifo(st); + /* enable interrupt */ if (st->chip_config.accl_fifo_enable || st->chip_config.gyro_fifo_enable) { @@ -83,16 +97,6 @@ reset_fifo_fail: return result; } -static void inv_clear_kfifo(struct inv_mpu6050_state *st) -{ - unsigned long flags; - - /* take the spin lock sem to avoid interrupt kick in */ - spin_lock_irqsave(&st->time_stamp_lock, flags); - kfifo_reset(&st->timestamps); - spin_unlock_irqrestore(&st->time_stamp_lock, flags); -} - /** * inv_mpu6050_irq_handler() - Cache a timestamp at each data ready interrupt. */ @@ -184,7 +188,6 @@ end_session: flush_fifo: /* Flush HW and SW FIFOs. */ inv_reset_fifo(indio_dev); - inv_clear_kfifo(st); mutex_unlock(&indio_dev->mlock); iio_trigger_notify_done(indio_dev->trig); diff --git a/drivers/iio/imu/kmx61.c b/drivers/iio/imu/kmx61.c index 5cc3692acf37..b3a36376c719 100644 --- a/drivers/iio/imu/kmx61.c +++ b/drivers/iio/imu/kmx61.c @@ -1227,7 +1227,7 @@ static irqreturn_t kmx61_trigger_handler(int irq, void *p) base = KMX61_MAG_XOUT_L; mutex_lock(&data->lock); - for_each_set_bit(bit, indio_dev->buffer->scan_mask, + for_each_set_bit(bit, indio_dev->active_scan_mask, indio_dev->masklength) { ret = kmx61_read_measurement(data, base, bit); if (ret < 0) { diff --git a/drivers/iio/industrialio-core.c b/drivers/iio/industrialio-core.c index aaba9d3d980e..4df97f650e44 100644 --- a/drivers/iio/industrialio-core.c +++ b/drivers/iio/industrialio-core.c @@ -847,8 +847,7 @@ static int iio_device_add_channel_sysfs(struct iio_dev *indio_dev, * @attr_list: List of IIO device attributes * * This function frees the memory allocated for each of the IIO device - * attributes in the list. Note: if you want to reuse the list after calling - * this function you have to reinitialize it using INIT_LIST_HEAD(). + * attributes in the list. */ void iio_free_chan_devattr_list(struct list_head *attr_list) { @@ -856,6 +855,7 @@ void iio_free_chan_devattr_list(struct list_head *attr_list) list_for_each_entry_safe(p, n, attr_list, l) { kfree(p->dev_attr.attr.name); + list_del(&p->l); kfree(p); } } @@ -936,6 +936,7 @@ static void iio_device_unregister_sysfs(struct iio_dev *indio_dev) iio_free_chan_devattr_list(&indio_dev->channel_attr_list); kfree(indio_dev->chan_attr_group.attrs); + indio_dev->chan_attr_group.attrs = NULL; } static void iio_dev_release(struct device *device) diff --git a/drivers/iio/industrialio-event.c b/drivers/iio/industrialio-event.c index a4b397048f71..a99692ba91bc 100644 --- a/drivers/iio/industrialio-event.c +++ b/drivers/iio/industrialio-event.c @@ -500,6 +500,7 @@ int iio_device_register_eventset(struct iio_dev *indio_dev) error_free_setup_event_lines: iio_free_chan_devattr_list(&indio_dev->event_interface->dev_attr_list); kfree(indio_dev->event_interface); + indio_dev->event_interface = NULL; return ret; } diff --git a/drivers/iio/light/Kconfig b/drivers/iio/light/Kconfig index ae68c64bdad3..a224afd6380c 100644 --- a/drivers/iio/light/Kconfig +++ b/drivers/iio/light/Kconfig @@ -73,6 +73,7 @@ config CM36651 config GP2AP020A00F tristate "Sharp GP2AP020A00F Proximity/ALS sensor" depends on I2C + select REGMAP_I2C select IIO_BUFFER select IIO_TRIGGERED_BUFFER select IRQ_WORK @@ -126,6 +127,7 @@ config HID_SENSOR_PROX config JSA1212 tristate "JSA1212 ALS and proximity sensor driver" depends on I2C + select REGMAP_I2C help Say Y here if you want to build a IIO driver for JSA1212 proximity & ALS sensor device. diff --git a/drivers/iio/magnetometer/Kconfig b/drivers/iio/magnetometer/Kconfig index 4c7a4c52dd06..a5d6de72c523 100644 --- a/drivers/iio/magnetometer/Kconfig +++ b/drivers/iio/magnetometer/Kconfig @@ -18,6 +18,8 @@ config AK8975 config AK09911 tristate "Asahi Kasei AK09911 3-axis Compass" + depends on I2C + depends on GPIOLIB select AK8975 help Deprecated: AK09911 is now supported by AK8975 driver. diff --git a/drivers/iio/proximity/sx9500.c b/drivers/iio/proximity/sx9500.c index 74dff4e4a11a..89fca3a70750 100644 --- a/drivers/iio/proximity/sx9500.c +++ b/drivers/iio/proximity/sx9500.c @@ -494,7 +494,7 @@ static irqreturn_t sx9500_trigger_handler(int irq, void *private) mutex_lock(&data->mutex); - for_each_set_bit(bit, indio_dev->buffer->scan_mask, + for_each_set_bit(bit, indio_dev->active_scan_mask, indio_dev->masklength) { ret = sx9500_read_proximity(data, &indio_dev->channels[bit], &val); diff --git a/drivers/infiniband/core/umem.c b/drivers/infiniband/core/umem.c index aec7a6aa2951..8c014b5dab4c 100644 --- a/drivers/infiniband/core/umem.c +++ b/drivers/infiniband/core/umem.c @@ -99,6 +99,14 @@ struct ib_umem *ib_umem_get(struct ib_ucontext *context, unsigned long addr, if (dmasync) dma_set_attr(DMA_ATTR_WRITE_BARRIER, &attrs); + /* + * If the combination of the addr and size requested for this memory + * region causes an integer overflow, return error. + */ + if ((PAGE_ALIGN(addr + size) <= size) || + (PAGE_ALIGN(addr + size) <= addr)) + return ERR_PTR(-EINVAL); + if (!can_do_mlock()) return ERR_PTR(-EPERM); diff --git a/drivers/infiniband/hw/mlx4/mad.c b/drivers/infiniband/hw/mlx4/mad.c index c7619716c31d..59040265e361 100644 --- a/drivers/infiniband/hw/mlx4/mad.c +++ b/drivers/infiniband/hw/mlx4/mad.c @@ -64,6 +64,14 @@ enum { #define GUID_TBL_BLK_NUM_ENTRIES 8 #define GUID_TBL_BLK_SIZE (GUID_TBL_ENTRY_SIZE * GUID_TBL_BLK_NUM_ENTRIES) +/* Counters should be saturate once they reach their maximum value */ +#define ASSIGN_32BIT_COUNTER(counter, value) do {\ + if ((value) > U32_MAX) \ + counter = cpu_to_be32(U32_MAX); \ + else \ + counter = cpu_to_be32(value); \ +} while (0) + struct mlx4_mad_rcv_buf { struct ib_grh grh; u8 payload[256]; @@ -806,10 +814,14 @@ static int ib_process_mad(struct ib_device *ibdev, int mad_flags, u8 port_num, static void edit_counter(struct mlx4_counter *cnt, struct ib_pma_portcounters *pma_cnt) { - pma_cnt->port_xmit_data = cpu_to_be32((be64_to_cpu(cnt->tx_bytes)>>2)); - pma_cnt->port_rcv_data = cpu_to_be32((be64_to_cpu(cnt->rx_bytes)>>2)); - pma_cnt->port_xmit_packets = cpu_to_be32(be64_to_cpu(cnt->tx_frames)); - pma_cnt->port_rcv_packets = cpu_to_be32(be64_to_cpu(cnt->rx_frames)); + ASSIGN_32BIT_COUNTER(pma_cnt->port_xmit_data, + (be64_to_cpu(cnt->tx_bytes) >> 2)); + ASSIGN_32BIT_COUNTER(pma_cnt->port_rcv_data, + (be64_to_cpu(cnt->rx_bytes) >> 2)); + ASSIGN_32BIT_COUNTER(pma_cnt->port_xmit_packets, + be64_to_cpu(cnt->tx_frames)); + ASSIGN_32BIT_COUNTER(pma_cnt->port_rcv_packets, + be64_to_cpu(cnt->rx_frames)); } static int iboe_process_mad(struct ib_device *ibdev, int mad_flags, u8 port_num, diff --git a/drivers/infiniband/hw/mlx4/main.c b/drivers/infiniband/hw/mlx4/main.c index ac6e2b710ea6..b972c0b41799 100644 --- a/drivers/infiniband/hw/mlx4/main.c +++ b/drivers/infiniband/hw/mlx4/main.c @@ -2697,8 +2697,12 @@ static void handle_bonded_port_state_event(struct work_struct *work) spin_lock_bh(&ibdev->iboe.lock); for (i = 0; i < MLX4_MAX_PORTS; ++i) { struct net_device *curr_netdev = ibdev->iboe.netdevs[i]; + enum ib_port_state curr_port_state; - enum ib_port_state curr_port_state = + if (!curr_netdev) + continue; + + curr_port_state = (netif_running(curr_netdev) && netif_carrier_ok(curr_netdev)) ? IB_PORT_ACTIVE : IB_PORT_DOWN; diff --git a/drivers/input/keyboard/tc3589x-keypad.c b/drivers/input/keyboard/tc3589x-keypad.c index 8ff612d160b0..563932500ff1 100644 --- a/drivers/input/keyboard/tc3589x-keypad.c +++ b/drivers/input/keyboard/tc3589x-keypad.c @@ -411,9 +411,9 @@ static int tc3589x_keypad_probe(struct platform_device *pdev) input_set_drvdata(input, keypad); - error = request_threaded_irq(irq, NULL, - tc3589x_keypad_irq, plat->irqtype, - "tc3589x-keypad", keypad); + error = request_threaded_irq(irq, NULL, tc3589x_keypad_irq, + plat->irqtype | IRQF_ONESHOT, + "tc3589x-keypad", keypad); if (error < 0) { dev_err(&pdev->dev, "Could not allocate irq %d,error %d\n", diff --git a/drivers/input/misc/mma8450.c b/drivers/input/misc/mma8450.c index 59d4dcddf6de..98228773a111 100644 --- a/drivers/input/misc/mma8450.c +++ b/drivers/input/misc/mma8450.c @@ -187,6 +187,7 @@ static int mma8450_probe(struct i2c_client *c, idev->private = m; idev->input->name = MMA8450_DRV_NAME; idev->input->id.bustype = BUS_I2C; + idev->input->dev.parent = &c->dev; idev->poll = mma8450_poll; idev->poll_interval = POLL_INTERVAL; idev->poll_interval_max = POLL_INTERVAL_MAX; diff --git a/drivers/input/mouse/alps.c b/drivers/input/mouse/alps.c index d28726a0ef85..27bcdbc950c9 100644 --- a/drivers/input/mouse/alps.c +++ b/drivers/input/mouse/alps.c @@ -1154,10 +1154,28 @@ out: mutex_unlock(&alps_mutex); } -static void alps_report_bare_ps2_packet(struct input_dev *dev, +static void alps_report_bare_ps2_packet(struct psmouse *psmouse, unsigned char packet[], bool report_buttons) { + struct alps_data *priv = psmouse->private; + struct input_dev *dev; + + /* Figure out which device to use to report the bare packet */ + if (priv->proto_version == ALPS_PROTO_V2 && + (priv->flags & ALPS_DUALPOINT)) { + /* On V2 devices the DualPoint Stick reports bare packets */ + dev = priv->dev2; + } else if (unlikely(IS_ERR_OR_NULL(priv->dev3))) { + /* Register dev3 mouse if we received PS/2 packet first time */ + if (!IS_ERR(priv->dev3)) + psmouse_queue_work(psmouse, &priv->dev3_register_work, + 0); + return; + } else { + dev = priv->dev3; + } + if (report_buttons) alps_report_buttons(dev, NULL, packet[0] & 1, packet[0] & 2, packet[0] & 4); @@ -1232,8 +1250,8 @@ static psmouse_ret_t alps_handle_interleaved_ps2(struct psmouse *psmouse) * de-synchronization. */ - alps_report_bare_ps2_packet(priv->dev2, - &psmouse->packet[3], false); + alps_report_bare_ps2_packet(psmouse, &psmouse->packet[3], + false); /* * Continue with the standard ALPS protocol handling, @@ -1289,18 +1307,9 @@ static psmouse_ret_t alps_process_byte(struct psmouse *psmouse) * properly we only do this if the device is fully synchronized. */ if (!psmouse->out_of_sync_cnt && (psmouse->packet[0] & 0xc8) == 0x08) { - - /* Register dev3 mouse if we received PS/2 packet first time */ - if (unlikely(!priv->dev3)) - psmouse_queue_work(psmouse, - &priv->dev3_register_work, 0); - if (psmouse->pktcnt == 3) { - /* Once dev3 mouse device is registered report data */ - if (likely(!IS_ERR_OR_NULL(priv->dev3))) - alps_report_bare_ps2_packet(priv->dev3, - psmouse->packet, - true); + alps_report_bare_ps2_packet(psmouse, psmouse->packet, + true); return PSMOUSE_FULL_PACKET; } return PSMOUSE_GOOD_DATA; @@ -2281,10 +2290,12 @@ static int alps_set_protocol(struct psmouse *psmouse, priv->set_abs_params = alps_set_abs_params_mt; priv->nibble_commands = alps_v3_nibble_commands; priv->addr_command = PSMOUSE_CMD_RESET_WRAP; - priv->x_max = 1360; - priv->y_max = 660; priv->x_bits = 23; priv->y_bits = 12; + + if (alps_dolphin_get_device_area(psmouse, priv)) + return -EIO; + break; case ALPS_PROTO_V6: @@ -2303,9 +2314,8 @@ static int alps_set_protocol(struct psmouse *psmouse, priv->set_abs_params = alps_set_abs_params_mt; priv->nibble_commands = alps_v3_nibble_commands; priv->addr_command = PSMOUSE_CMD_RESET_WRAP; - - if (alps_dolphin_get_device_area(psmouse, priv)) - return -EIO; + priv->x_max = 0xfff; + priv->y_max = 0x7ff; if (priv->fw_ver[1] != 0xba) priv->flags |= ALPS_BUTTONPAD; @@ -2605,8 +2615,10 @@ int alps_detect(struct psmouse *psmouse, bool set_properties) return -ENOMEM; error = alps_identify(psmouse, priv); - if (error) + if (error) { + kfree(priv); return error; + } if (set_properties) { psmouse->vendor = "ALPS"; diff --git a/drivers/input/mouse/cyapa_gen3.c b/drivers/input/mouse/cyapa_gen3.c index 77e9d70a986b..1e2291c378fe 100644 --- a/drivers/input/mouse/cyapa_gen3.c +++ b/drivers/input/mouse/cyapa_gen3.c @@ -20,7 +20,7 @@ #include <linux/input/mt.h> #include <linux/module.h> #include <linux/slab.h> -#include <linux/unaligned/access_ok.h> +#include <asm/unaligned.h> #include "cyapa.h" diff --git a/drivers/input/mouse/cyapa_gen5.c b/drivers/input/mouse/cyapa_gen5.c index ddf5393a1180..5b611dd71e79 100644 --- a/drivers/input/mouse/cyapa_gen5.c +++ b/drivers/input/mouse/cyapa_gen5.c @@ -17,7 +17,7 @@ #include <linux/mutex.h> #include <linux/completion.h> #include <linux/slab.h> -#include <linux/unaligned/access_ok.h> +#include <asm/unaligned.h> #include <linux/crc-itu-t.h> #include "cyapa.h" @@ -1926,7 +1926,7 @@ static int cyapa_gen5_read_idac_data(struct cyapa *cyapa, electrodes_tx = cyapa->electrodes_x; max_element_cnt = ((cyapa->aligned_electrodes_rx + 7) & ~7u) * electrodes_tx; - } else if (idac_data_type == GEN5_RETRIEVE_SELF_CAP_PWC_DATA) { + } else { offset = 2; max_element_cnt = cyapa->electrodes_x + cyapa->electrodes_y; diff --git a/drivers/input/mouse/focaltech.c b/drivers/input/mouse/focaltech.c index 757f78a94aec..23d259416f2f 100644 --- a/drivers/input/mouse/focaltech.c +++ b/drivers/input/mouse/focaltech.c @@ -67,9 +67,6 @@ static void focaltech_reset(struct psmouse *psmouse) #define FOC_MAX_FINGERS 5 -#define FOC_MAX_X 2431 -#define FOC_MAX_Y 1663 - /* * Current state of a single finger on the touchpad. */ @@ -129,9 +126,17 @@ static void focaltech_report_state(struct psmouse *psmouse) input_mt_slot(dev, i); input_mt_report_slot_state(dev, MT_TOOL_FINGER, active); if (active) { - input_report_abs(dev, ABS_MT_POSITION_X, finger->x); + unsigned int clamped_x, clamped_y; + /* + * The touchpad might report invalid data, so we clamp + * the resulting values so that we do not confuse + * userspace. + */ + clamped_x = clamp(finger->x, 0U, priv->x_max); + clamped_y = clamp(finger->y, 0U, priv->y_max); + input_report_abs(dev, ABS_MT_POSITION_X, clamped_x); input_report_abs(dev, ABS_MT_POSITION_Y, - FOC_MAX_Y - finger->y); + priv->y_max - clamped_y); } } input_mt_report_pointer_emulation(dev, true); @@ -180,16 +185,6 @@ static void focaltech_process_abs_packet(struct psmouse *psmouse, state->pressed = (packet[0] >> 4) & 1; - /* - * packet[5] contains some kind of tool size in the most - * significant nibble. 0xff is a special value (latching) that - * signals a large contact area. - */ - if (packet[5] == 0xff) { - state->fingers[finger].valid = false; - return; - } - state->fingers[finger].x = ((packet[1] & 0xf) << 8) | packet[2]; state->fingers[finger].y = (packet[3] << 8) | packet[4]; state->fingers[finger].valid = true; @@ -381,6 +376,23 @@ static int focaltech_read_size(struct psmouse *psmouse) return 0; } + +void focaltech_set_resolution(struct psmouse *psmouse, unsigned int resolution) +{ + /* not supported yet */ +} + +static void focaltech_set_rate(struct psmouse *psmouse, unsigned int rate) +{ + /* not supported yet */ +} + +static void focaltech_set_scale(struct psmouse *psmouse, + enum psmouse_scale scale) +{ + /* not supported yet */ +} + int focaltech_init(struct psmouse *psmouse) { struct focaltech_data *priv; @@ -415,6 +427,14 @@ int focaltech_init(struct psmouse *psmouse) psmouse->cleanup = focaltech_reset; /* resync is not supported yet */ psmouse->resync_time = 0; + /* + * rate/resolution/scale changes are not supported yet, and + * the generic implementations of these functions seem to + * confuse some touchpads + */ + psmouse->set_resolution = focaltech_set_resolution; + psmouse->set_rate = focaltech_set_rate; + psmouse->set_scale = focaltech_set_scale; return 0; diff --git a/drivers/input/mouse/psmouse-base.c b/drivers/input/mouse/psmouse-base.c index 4ccd01d7a48d..8bc61237bc1b 100644 --- a/drivers/input/mouse/psmouse-base.c +++ b/drivers/input/mouse/psmouse-base.c @@ -454,6 +454,17 @@ static void psmouse_set_rate(struct psmouse *psmouse, unsigned int rate) } /* + * Here we set the mouse scaling. + */ + +static void psmouse_set_scale(struct psmouse *psmouse, enum psmouse_scale scale) +{ + ps2_command(&psmouse->ps2dev, NULL, + scale == PSMOUSE_SCALE21 ? PSMOUSE_CMD_SETSCALE21 : + PSMOUSE_CMD_SETSCALE11); +} + +/* * psmouse_poll() - default poll handler. Everyone except for ALPS uses it. */ @@ -689,6 +700,7 @@ static void psmouse_apply_defaults(struct psmouse *psmouse) psmouse->set_rate = psmouse_set_rate; psmouse->set_resolution = psmouse_set_resolution; + psmouse->set_scale = psmouse_set_scale; psmouse->poll = psmouse_poll; psmouse->protocol_handler = psmouse_process_byte; psmouse->pktsize = 3; @@ -1160,7 +1172,7 @@ static void psmouse_initialize(struct psmouse *psmouse) if (psmouse_max_proto != PSMOUSE_PS2) { psmouse->set_rate(psmouse, psmouse->rate); psmouse->set_resolution(psmouse, psmouse->resolution); - ps2_command(&psmouse->ps2dev, NULL, PSMOUSE_CMD_SETSCALE11); + psmouse->set_scale(psmouse, PSMOUSE_SCALE11); } } diff --git a/drivers/input/mouse/psmouse.h b/drivers/input/mouse/psmouse.h index c2ff137ecbdb..d02e1bdc9ae4 100644 --- a/drivers/input/mouse/psmouse.h +++ b/drivers/input/mouse/psmouse.h @@ -36,6 +36,11 @@ typedef enum { PSMOUSE_FULL_PACKET } psmouse_ret_t; +enum psmouse_scale { + PSMOUSE_SCALE11, + PSMOUSE_SCALE21 +}; + struct psmouse { void *private; struct input_dev *dev; @@ -67,6 +72,7 @@ struct psmouse { psmouse_ret_t (*protocol_handler)(struct psmouse *psmouse); void (*set_rate)(struct psmouse *psmouse, unsigned int rate); void (*set_resolution)(struct psmouse *psmouse, unsigned int resolution); + void (*set_scale)(struct psmouse *psmouse, enum psmouse_scale scale); int (*reconnect)(struct psmouse *psmouse); void (*disconnect)(struct psmouse *psmouse); diff --git a/drivers/input/mouse/synaptics.c b/drivers/input/mouse/synaptics.c index f2cceb6493a0..3b06c8a360b6 100644 --- a/drivers/input/mouse/synaptics.c +++ b/drivers/input/mouse/synaptics.c @@ -67,9 +67,6 @@ #define X_MAX_POSITIVE 8176 #define Y_MAX_POSITIVE 8176 -/* maximum ABS_MT_POSITION displacement (in mm) */ -#define DMAX 10 - /***************************************************************************** * Stuff we need even when we do not want native Synaptics support ****************************************************************************/ @@ -123,32 +120,46 @@ void synaptics_reset(struct psmouse *psmouse) static bool cr48_profile_sensor; +#define ANY_BOARD_ID 0 struct min_max_quirk { const char * const *pnp_ids; + struct { + unsigned long int min, max; + } board_id; int x_min, x_max, y_min, y_max; }; static const struct min_max_quirk min_max_pnpid_table[] = { { (const char * const []){"LEN0033", NULL}, + {ANY_BOARD_ID, ANY_BOARD_ID}, 1024, 5052, 2258, 4832 }, { - (const char * const []){"LEN0035", "LEN0042", NULL}, + (const char * const []){"LEN0042", NULL}, + {ANY_BOARD_ID, ANY_BOARD_ID}, 1232, 5710, 1156, 4696 }, { (const char * const []){"LEN0034", "LEN0036", "LEN0037", "LEN0039", "LEN2002", "LEN2004", NULL}, + {ANY_BOARD_ID, 2961}, 1024, 5112, 2024, 4832 }, { (const char * const []){"LEN2001", NULL}, + {ANY_BOARD_ID, ANY_BOARD_ID}, 1024, 5022, 2508, 4832 }, { (const char * const []){"LEN2006", NULL}, + {2691, 2691}, + 1024, 5045, 2457, 4832 + }, + { + (const char * const []){"LEN2006", NULL}, + {ANY_BOARD_ID, ANY_BOARD_ID}, 1264, 5675, 1171, 4688 }, { } @@ -175,9 +186,7 @@ static const char * const topbuttonpad_pnp_ids[] = { "LEN0041", "LEN0042", /* Yoga */ "LEN0045", - "LEN0046", "LEN0047", - "LEN0048", "LEN0049", "LEN2000", "LEN2001", /* Edge E431 */ @@ -185,7 +194,7 @@ static const char * const topbuttonpad_pnp_ids[] = { "LEN2003", "LEN2004", /* L440 */ "LEN2005", - "LEN2006", + "LEN2006", /* Edge E440/E540 */ "LEN2007", "LEN2008", "LEN2009", @@ -235,18 +244,39 @@ static int synaptics_model_id(struct psmouse *psmouse) return 0; } +static int synaptics_more_extended_queries(struct psmouse *psmouse) +{ + struct synaptics_data *priv = psmouse->private; + unsigned char buf[3]; + + if (synaptics_send_cmd(psmouse, SYN_QUE_MEXT_CAPAB_10, buf)) + return -1; + + priv->ext_cap_10 = (buf[0]<<16) | (buf[1]<<8) | buf[2]; + + return 0; +} + /* - * Read the board id from the touchpad + * Read the board id and the "More Extended Queries" from the touchpad * The board id is encoded in the "QUERY MODES" response */ -static int synaptics_board_id(struct psmouse *psmouse) +static int synaptics_query_modes(struct psmouse *psmouse) { struct synaptics_data *priv = psmouse->private; unsigned char bid[3]; + /* firmwares prior 7.5 have no board_id encoded */ + if (SYN_ID_FULL(priv->identity) < 0x705) + return 0; + if (synaptics_send_cmd(psmouse, SYN_QUE_MODES, bid)) return -1; priv->board_id = ((bid[0] & 0xfc) << 6) | bid[1]; + + if (SYN_MEXT_CAP_BIT(bid[0])) + return synaptics_more_extended_queries(psmouse); + return 0; } @@ -346,7 +376,6 @@ static int synaptics_resolution(struct psmouse *psmouse) { struct synaptics_data *priv = psmouse->private; unsigned char resp[3]; - int i; if (SYN_ID_MAJOR(priv->identity) < 4) return 0; @@ -358,17 +387,6 @@ static int synaptics_resolution(struct psmouse *psmouse) } } - for (i = 0; min_max_pnpid_table[i].pnp_ids; i++) { - if (psmouse_matches_pnp_id(psmouse, - min_max_pnpid_table[i].pnp_ids)) { - priv->x_min = min_max_pnpid_table[i].x_min; - priv->x_max = min_max_pnpid_table[i].x_max; - priv->y_min = min_max_pnpid_table[i].y_min; - priv->y_max = min_max_pnpid_table[i].y_max; - return 0; - } - } - if (SYN_EXT_CAP_REQUESTS(priv->capabilities) >= 5 && SYN_CAP_MAX_DIMENSIONS(priv->ext_cap_0c)) { if (synaptics_send_cmd(psmouse, SYN_QUE_EXT_MAX_COORDS, resp)) { @@ -377,23 +395,69 @@ static int synaptics_resolution(struct psmouse *psmouse) } else { priv->x_max = (resp[0] << 5) | ((resp[1] & 0x0f) << 1); priv->y_max = (resp[2] << 5) | ((resp[1] & 0xf0) >> 3); + psmouse_info(psmouse, + "queried max coordinates: x [..%d], y [..%d]\n", + priv->x_max, priv->y_max); } } - if (SYN_EXT_CAP_REQUESTS(priv->capabilities) >= 7 && - SYN_CAP_MIN_DIMENSIONS(priv->ext_cap_0c)) { + if (SYN_CAP_MIN_DIMENSIONS(priv->ext_cap_0c) && + (SYN_EXT_CAP_REQUESTS(priv->capabilities) >= 7 || + /* + * Firmware v8.1 does not report proper number of extended + * capabilities, but has been proven to report correct min + * coordinates. + */ + SYN_ID_FULL(priv->identity) == 0x801)) { if (synaptics_send_cmd(psmouse, SYN_QUE_EXT_MIN_COORDS, resp)) { psmouse_warn(psmouse, "device claims to have min coordinates query, but I'm not able to read it.\n"); } else { priv->x_min = (resp[0] << 5) | ((resp[1] & 0x0f) << 1); priv->y_min = (resp[2] << 5) | ((resp[1] & 0xf0) >> 3); + psmouse_info(psmouse, + "queried min coordinates: x [%d..], y [%d..]\n", + priv->x_min, priv->y_min); } } return 0; } +/* + * Apply quirk(s) if the hardware matches + */ + +static void synaptics_apply_quirks(struct psmouse *psmouse) +{ + struct synaptics_data *priv = psmouse->private; + int i; + + for (i = 0; min_max_pnpid_table[i].pnp_ids; i++) { + if (!psmouse_matches_pnp_id(psmouse, + min_max_pnpid_table[i].pnp_ids)) + continue; + + if (min_max_pnpid_table[i].board_id.min != ANY_BOARD_ID && + priv->board_id < min_max_pnpid_table[i].board_id.min) + continue; + + if (min_max_pnpid_table[i].board_id.max != ANY_BOARD_ID && + priv->board_id > min_max_pnpid_table[i].board_id.max) + continue; + + priv->x_min = min_max_pnpid_table[i].x_min; + priv->x_max = min_max_pnpid_table[i].x_max; + priv->y_min = min_max_pnpid_table[i].y_min; + priv->y_max = min_max_pnpid_table[i].y_max; + psmouse_info(psmouse, + "quirked min/max coordinates: x [%d..%d], y [%d..%d]\n", + priv->x_min, priv->x_max, + priv->y_min, priv->y_max); + break; + } +} + static int synaptics_query_hardware(struct psmouse *psmouse) { if (synaptics_identify(psmouse)) @@ -402,13 +466,15 @@ static int synaptics_query_hardware(struct psmouse *psmouse) return -1; if (synaptics_firmware_id(psmouse)) return -1; - if (synaptics_board_id(psmouse)) + if (synaptics_query_modes(psmouse)) return -1; if (synaptics_capability(psmouse)) return -1; if (synaptics_resolution(psmouse)) return -1; + synaptics_apply_quirks(psmouse); + return 0; } @@ -516,18 +582,22 @@ static int synaptics_is_pt_packet(unsigned char *buf) return (buf[0] & 0xFC) == 0x84 && (buf[3] & 0xCC) == 0xC4; } -static void synaptics_pass_pt_packet(struct serio *ptport, unsigned char *packet) +static void synaptics_pass_pt_packet(struct psmouse *psmouse, + struct serio *ptport, + unsigned char *packet) { + struct synaptics_data *priv = psmouse->private; struct psmouse *child = serio_get_drvdata(ptport); if (child && child->state == PSMOUSE_ACTIVATED) { - serio_interrupt(ptport, packet[1], 0); + serio_interrupt(ptport, packet[1] | priv->pt_buttons, 0); serio_interrupt(ptport, packet[4], 0); serio_interrupt(ptport, packet[5], 0); if (child->pktsize == 4) serio_interrupt(ptport, packet[2], 0); - } else + } else { serio_interrupt(ptport, packet[1], 0); + } } static void synaptics_pt_activate(struct psmouse *psmouse) @@ -605,6 +675,18 @@ static void synaptics_parse_agm(const unsigned char buf[], } } +static void synaptics_parse_ext_buttons(const unsigned char buf[], + struct synaptics_data *priv, + struct synaptics_hw_state *hw) +{ + unsigned int ext_bits = + (SYN_CAP_MULTI_BUTTON_NO(priv->ext_cap) + 1) >> 1; + unsigned int ext_mask = GENMASK(ext_bits - 1, 0); + + hw->ext_buttons = buf[4] & ext_mask; + hw->ext_buttons |= (buf[5] & ext_mask) << ext_bits; +} + static bool is_forcepad; static int synaptics_parse_hw_state(const unsigned char buf[], @@ -691,28 +773,9 @@ static int synaptics_parse_hw_state(const unsigned char buf[], hw->down = ((buf[0] ^ buf[3]) & 0x02) ? 1 : 0; } - if (SYN_CAP_MULTI_BUTTON_NO(priv->ext_cap) && + if (SYN_CAP_MULTI_BUTTON_NO(priv->ext_cap) > 0 && ((buf[0] ^ buf[3]) & 0x02)) { - switch (SYN_CAP_MULTI_BUTTON_NO(priv->ext_cap) & ~0x01) { - default: - /* - * if nExtBtn is greater than 8 it should be - * considered invalid and treated as 0 - */ - break; - case 8: - hw->ext_buttons |= ((buf[5] & 0x08)) ? 0x80 : 0; - hw->ext_buttons |= ((buf[4] & 0x08)) ? 0x40 : 0; - case 6: - hw->ext_buttons |= ((buf[5] & 0x04)) ? 0x20 : 0; - hw->ext_buttons |= ((buf[4] & 0x04)) ? 0x10 : 0; - case 4: - hw->ext_buttons |= ((buf[5] & 0x02)) ? 0x08 : 0; - hw->ext_buttons |= ((buf[4] & 0x02)) ? 0x04 : 0; - case 2: - hw->ext_buttons |= ((buf[5] & 0x01)) ? 0x02 : 0; - hw->ext_buttons |= ((buf[4] & 0x01)) ? 0x01 : 0; - } + synaptics_parse_ext_buttons(buf, priv, hw); } } else { hw->x = (((buf[1] & 0x1f) << 8) | buf[2]); @@ -774,12 +837,54 @@ static void synaptics_report_semi_mt_data(struct input_dev *dev, } } +static void synaptics_report_ext_buttons(struct psmouse *psmouse, + const struct synaptics_hw_state *hw) +{ + struct input_dev *dev = psmouse->dev; + struct synaptics_data *priv = psmouse->private; + int ext_bits = (SYN_CAP_MULTI_BUTTON_NO(priv->ext_cap) + 1) >> 1; + char buf[6] = { 0x00, 0x00, 0x00, 0x00, 0x00, 0x00}; + int i; + + if (!SYN_CAP_MULTI_BUTTON_NO(priv->ext_cap)) + return; + + /* Bug in FW 8.1, buttons are reported only when ExtBit is 1 */ + if (SYN_ID_FULL(priv->identity) == 0x801 && + !((psmouse->packet[0] ^ psmouse->packet[3]) & 0x02)) + return; + + if (!SYN_CAP_EXT_BUTTONS_STICK(priv->ext_cap_10)) { + for (i = 0; i < ext_bits; i++) { + input_report_key(dev, BTN_0 + 2 * i, + hw->ext_buttons & (1 << i)); + input_report_key(dev, BTN_1 + 2 * i, + hw->ext_buttons & (1 << (i + ext_bits))); + } + return; + } + + /* + * This generation of touchpads has the trackstick buttons + * physically wired to the touchpad. Re-route them through + * the pass-through interface. + */ + if (!priv->pt_port) + return; + + /* The trackstick expects at most 3 buttons */ + priv->pt_buttons = SYN_CAP_EXT_BUTTON_STICK_L(hw->ext_buttons) | + SYN_CAP_EXT_BUTTON_STICK_R(hw->ext_buttons) << 1 | + SYN_CAP_EXT_BUTTON_STICK_M(hw->ext_buttons) << 2; + + synaptics_pass_pt_packet(psmouse, priv->pt_port, buf); +} + static void synaptics_report_buttons(struct psmouse *psmouse, const struct synaptics_hw_state *hw) { struct input_dev *dev = psmouse->dev; struct synaptics_data *priv = psmouse->private; - int i; input_report_key(dev, BTN_LEFT, hw->left); input_report_key(dev, BTN_RIGHT, hw->right); @@ -792,8 +897,7 @@ static void synaptics_report_buttons(struct psmouse *psmouse, input_report_key(dev, BTN_BACK, hw->down); } - for (i = 0; i < SYN_CAP_MULTI_BUTTON_NO(priv->ext_cap); i++) - input_report_key(dev, BTN_0 + i, hw->ext_buttons & (1 << i)); + synaptics_report_ext_buttons(psmouse, hw); } static void synaptics_report_mt_data(struct psmouse *psmouse, @@ -813,7 +917,7 @@ static void synaptics_report_mt_data(struct psmouse *psmouse, pos[i].y = synaptics_invert_y(hw[i]->y); } - input_mt_assign_slots(dev, slot, pos, nsemi, DMAX * priv->x_res); + input_mt_assign_slots(dev, slot, pos, nsemi, 0); for (i = 0; i < nsemi; i++) { input_mt_slot(dev, slot[i]); @@ -1014,7 +1118,8 @@ static psmouse_ret_t synaptics_process_byte(struct psmouse *psmouse) if (SYN_CAP_PASS_THROUGH(priv->capabilities) && synaptics_is_pt_packet(psmouse->packet)) { if (priv->pt_port) - synaptics_pass_pt_packet(priv->pt_port, psmouse->packet); + synaptics_pass_pt_packet(psmouse, priv->pt_port, + psmouse->packet); } else synaptics_process_packet(psmouse); @@ -1116,8 +1221,9 @@ static void set_input_params(struct psmouse *psmouse, __set_bit(BTN_BACK, dev->keybit); } - for (i = 0; i < SYN_CAP_MULTI_BUTTON_NO(priv->ext_cap); i++) - __set_bit(BTN_0 + i, dev->keybit); + if (!SYN_CAP_EXT_BUTTONS_STICK(priv->ext_cap_10)) + for (i = 0; i < SYN_CAP_MULTI_BUTTON_NO(priv->ext_cap); i++) + __set_bit(BTN_0 + i, dev->keybit); __clear_bit(EV_REL, dev->evbit); __clear_bit(REL_X, dev->relbit); @@ -1125,7 +1231,8 @@ static void set_input_params(struct psmouse *psmouse, if (SYN_CAP_CLICKPAD(priv->ext_cap_0c)) { __set_bit(INPUT_PROP_BUTTONPAD, dev->propbit); - if (psmouse_matches_pnp_id(psmouse, topbuttonpad_pnp_ids)) + if (psmouse_matches_pnp_id(psmouse, topbuttonpad_pnp_ids) && + !SYN_CAP_EXT_BUTTONS_STICK(priv->ext_cap_10)) __set_bit(INPUT_PROP_TOPBUTTONPAD, dev->propbit); /* Clickpads report only left button */ __clear_bit(BTN_RIGHT, dev->keybit); diff --git a/drivers/input/mouse/synaptics.h b/drivers/input/mouse/synaptics.h index aedc3299b14e..ee4bd0d12b26 100644 --- a/drivers/input/mouse/synaptics.h +++ b/drivers/input/mouse/synaptics.h @@ -22,6 +22,7 @@ #define SYN_QUE_EXT_CAPAB_0C 0x0c #define SYN_QUE_EXT_MAX_COORDS 0x0d #define SYN_QUE_EXT_MIN_COORDS 0x0f +#define SYN_QUE_MEXT_CAPAB_10 0x10 /* synatics modes */ #define SYN_BIT_ABSOLUTE_MODE (1 << 7) @@ -53,6 +54,7 @@ #define SYN_EXT_CAP_REQUESTS(c) (((c) & 0x700000) >> 20) #define SYN_CAP_MULTI_BUTTON_NO(ec) (((ec) & 0x00f000) >> 12) #define SYN_CAP_PRODUCT_ID(ec) (((ec) & 0xff0000) >> 16) +#define SYN_MEXT_CAP_BIT(m) ((m) & (1 << 1)) /* * The following describes response for the 0x0c query. @@ -89,6 +91,30 @@ #define SYN_CAP_REDUCED_FILTERING(ex0c) ((ex0c) & 0x000400) #define SYN_CAP_IMAGE_SENSOR(ex0c) ((ex0c) & 0x000800) +/* + * The following descibes response for the 0x10 query. + * + * byte mask name meaning + * ---- ---- ------- ------------ + * 1 0x01 ext buttons are stick buttons exported in the extended + * capability are actually meant to be used + * by the tracktick (pass-through). + * 1 0x02 SecurePad the touchpad is a SecurePad, so it + * contains a built-in fingerprint reader. + * 1 0xe0 more ext count how many more extented queries are + * available after this one. + * 2 0xff SecurePad width the width of the SecurePad fingerprint + * reader. + * 3 0xff SecurePad height the height of the SecurePad fingerprint + * reader. + */ +#define SYN_CAP_EXT_BUTTONS_STICK(ex10) ((ex10) & 0x010000) +#define SYN_CAP_SECUREPAD(ex10) ((ex10) & 0x020000) + +#define SYN_CAP_EXT_BUTTON_STICK_L(eb) (!!((eb) & 0x01)) +#define SYN_CAP_EXT_BUTTON_STICK_M(eb) (!!((eb) & 0x02)) +#define SYN_CAP_EXT_BUTTON_STICK_R(eb) (!!((eb) & 0x04)) + /* synaptics modes query bits */ #define SYN_MODE_ABSOLUTE(m) ((m) & (1 << 7)) #define SYN_MODE_RATE(m) ((m) & (1 << 6)) @@ -143,6 +169,7 @@ struct synaptics_data { unsigned long int capabilities; /* Capabilities */ unsigned long int ext_cap; /* Extended Capabilities */ unsigned long int ext_cap_0c; /* Ext Caps from 0x0c query */ + unsigned long int ext_cap_10; /* Ext Caps from 0x10 query */ unsigned long int identity; /* Identification */ unsigned int x_res, y_res; /* X/Y resolution in units/mm */ unsigned int x_max, y_max; /* Max coordinates (from FW) */ @@ -156,6 +183,7 @@ struct synaptics_data { bool disable_gesture; /* disable gestures */ struct serio *pt_port; /* Pass-through serio port */ + unsigned char pt_buttons; /* Pass-through buttons */ /* * Last received Advanced Gesture Mode (AGM) packet. An AGM packet diff --git a/drivers/input/touchscreen/Kconfig b/drivers/input/touchscreen/Kconfig index 58917525126e..6261fd6d7c3c 100644 --- a/drivers/input/touchscreen/Kconfig +++ b/drivers/input/touchscreen/Kconfig @@ -943,6 +943,7 @@ config TOUCHSCREEN_SUN4I tristate "Allwinner sun4i resistive touchscreen controller support" depends on ARCH_SUNXI || COMPILE_TEST depends on HWMON + depends on THERMAL || !THERMAL_OF help This selects support for the resistive touchscreen controller found on Allwinner sunxi SoCs. diff --git a/drivers/iommu/Kconfig b/drivers/iommu/Kconfig index baa0d9786f50..1ae4e547b419 100644 --- a/drivers/iommu/Kconfig +++ b/drivers/iommu/Kconfig @@ -23,6 +23,7 @@ config IOMMU_IO_PGTABLE config IOMMU_IO_PGTABLE_LPAE bool "ARMv7/v8 Long Descriptor Format" select IOMMU_IO_PGTABLE + depends on ARM || ARM64 || COMPILE_TEST help Enable support for the ARM long descriptor pagetable format. This allocator supports 4K/2M/1G, 16K/32M and 64K/512M page @@ -63,6 +64,7 @@ config MSM_IOMMU bool "MSM IOMMU Support" depends on ARM depends on ARCH_MSM8X60 || ARCH_MSM8960 || COMPILE_TEST + depends on BROKEN select IOMMU_API help Support for the IOMMUs found on certain Qualcomm SOCs. diff --git a/drivers/iommu/arm-smmu.c b/drivers/iommu/arm-smmu.c index fc13dd56953e..a3adde6519f0 100644 --- a/drivers/iommu/arm-smmu.c +++ b/drivers/iommu/arm-smmu.c @@ -1288,10 +1288,13 @@ static phys_addr_t arm_smmu_iova_to_phys(struct iommu_domain *domain, return 0; spin_lock_irqsave(&smmu_domain->pgtbl_lock, flags); - if (smmu_domain->smmu->features & ARM_SMMU_FEAT_TRANS_OPS) + if (smmu_domain->smmu->features & ARM_SMMU_FEAT_TRANS_OPS && + smmu_domain->stage == ARM_SMMU_DOMAIN_S1) { ret = arm_smmu_iova_to_phys_hard(domain, iova); - else + } else { ret = ops->iova_to_phys(ops, iova); + } + spin_unlock_irqrestore(&smmu_domain->pgtbl_lock, flags); return ret; @@ -1556,7 +1559,7 @@ static int arm_smmu_device_cfg_probe(struct arm_smmu_device *smmu) return -ENODEV; } - if (smmu->version == 1 || (!(id & ID0_ATOSNS) && (id & ID0_S1TS))) { + if ((id & ID0_S1TS) && ((smmu->version == 1) || (id & ID0_ATOSNS))) { smmu->features |= ARM_SMMU_FEAT_TRANS_OPS; dev_notice(smmu->dev, "\taddress translation ops\n"); } diff --git a/drivers/iommu/exynos-iommu.c b/drivers/iommu/exynos-iommu.c index 7ce52737c7a1..dc14fec4ede1 100644 --- a/drivers/iommu/exynos-iommu.c +++ b/drivers/iommu/exynos-iommu.c @@ -1186,8 +1186,15 @@ static const struct iommu_ops exynos_iommu_ops = { static int __init exynos_iommu_init(void) { + struct device_node *np; int ret; + np = of_find_matching_node(NULL, sysmmu_of_match); + if (!np) + return 0; + + of_node_put(np); + lv2table_kmem_cache = kmem_cache_create("exynos-iommu-lv2table", LV2TABLE_SIZE, LV2TABLE_SIZE, 0, NULL); if (!lv2table_kmem_cache) { diff --git a/drivers/iommu/intel-iommu.c b/drivers/iommu/intel-iommu.c index ae4c1a854e57..2d1e05bdbb53 100644 --- a/drivers/iommu/intel-iommu.c +++ b/drivers/iommu/intel-iommu.c @@ -1742,9 +1742,8 @@ static int domain_init(struct dmar_domain *domain, int guest_width) static void domain_exit(struct dmar_domain *domain) { - struct dmar_drhd_unit *drhd; - struct intel_iommu *iommu; struct page *freelist = NULL; + int i; /* Domain 0 is reserved, so dont process it */ if (!domain) @@ -1764,8 +1763,8 @@ static void domain_exit(struct dmar_domain *domain) /* clear attached or cached domains */ rcu_read_lock(); - for_each_active_iommu(iommu, drhd) - iommu_detach_domain(domain, iommu); + for_each_set_bit(i, domain->iommu_bmp, g_num_of_iommus) + iommu_detach_domain(domain, g_iommus[i]); rcu_read_unlock(); dma_free_pagelist(freelist); diff --git a/drivers/iommu/io-pgtable-arm.c b/drivers/iommu/io-pgtable-arm.c index 5a500edf00cc..b610a8dee238 100644 --- a/drivers/iommu/io-pgtable-arm.c +++ b/drivers/iommu/io-pgtable-arm.c @@ -56,7 +56,8 @@ ((((d)->levels - ((l) - ARM_LPAE_START_LVL(d) + 1)) \ * (d)->bits_per_level) + (d)->pg_shift) -#define ARM_LPAE_PAGES_PER_PGD(d) ((d)->pgd_size >> (d)->pg_shift) +#define ARM_LPAE_PAGES_PER_PGD(d) \ + DIV_ROUND_UP((d)->pgd_size, 1UL << (d)->pg_shift) /* * Calculate the index at level l used to map virtual address a using the @@ -66,7 +67,7 @@ ((l) == ARM_LPAE_START_LVL(d) ? ilog2(ARM_LPAE_PAGES_PER_PGD(d)) : 0) #define ARM_LPAE_LVL_IDX(a,l,d) \ - (((a) >> ARM_LPAE_LVL_SHIFT(l,d)) & \ + (((u64)(a) >> ARM_LPAE_LVL_SHIFT(l,d)) & \ ((1 << ((d)->bits_per_level + ARM_LPAE_PGD_IDX(l,d))) - 1)) /* Calculate the block/page mapping size at level l for pagetable in d. */ diff --git a/drivers/iommu/ipmmu-vmsa.c b/drivers/iommu/ipmmu-vmsa.c index 10186cac7716..bc39bdf7b99b 100644 --- a/drivers/iommu/ipmmu-vmsa.c +++ b/drivers/iommu/ipmmu-vmsa.c @@ -851,6 +851,7 @@ static int ipmmu_remove(struct platform_device *pdev) static const struct of_device_id ipmmu_of_ids[] = { { .compatible = "renesas,ipmmu-vmsa", }, + { } }; static struct platform_driver ipmmu_driver = { diff --git a/drivers/iommu/omap-iommu.c b/drivers/iommu/omap-iommu.c index f59f857b702e..a4ba851825c2 100644 --- a/drivers/iommu/omap-iommu.c +++ b/drivers/iommu/omap-iommu.c @@ -1376,6 +1376,13 @@ static int __init omap_iommu_init(void) struct kmem_cache *p; const unsigned long flags = SLAB_HWCACHE_ALIGN; size_t align = 1 << 10; /* L2 pagetable alignement */ + struct device_node *np; + + np = of_find_matching_node(NULL, omap_iommu_of_match); + if (!np) + return 0; + + of_node_put(np); p = kmem_cache_create("iopte_cache", IOPTE_TABLE_SIZE, align, flags, iopte_cachep_ctor); diff --git a/drivers/iommu/rockchip-iommu.c b/drivers/iommu/rockchip-iommu.c index 6a8b1ec4a48a..9f74fddcd304 100644 --- a/drivers/iommu/rockchip-iommu.c +++ b/drivers/iommu/rockchip-iommu.c @@ -1015,8 +1015,15 @@ static struct platform_driver rk_iommu_driver = { static int __init rk_iommu_init(void) { + struct device_node *np; int ret; + np = of_find_matching_node(NULL, rk_iommu_dt_ids); + if (!np) + return 0; + + of_node_put(np); + ret = bus_set_iommu(&platform_bus_type, &rk_iommu_ops); if (ret) return ret; diff --git a/drivers/irqchip/irq-armada-370-xp.c b/drivers/irqchip/irq-armada-370-xp.c index 463c235acbdc..4387dae14e45 100644 --- a/drivers/irqchip/irq-armada-370-xp.c +++ b/drivers/irqchip/irq-armada-370-xp.c @@ -69,6 +69,7 @@ static void __iomem *per_cpu_int_base; static void __iomem *main_int_base; static struct irq_domain *armada_370_xp_mpic_domain; static u32 doorbell_mask_reg; +static int parent_irq; #ifdef CONFIG_PCI_MSI static struct irq_domain *armada_370_xp_msi_domain; static DECLARE_BITMAP(msi_used, PCI_MSI_DOORBELL_NR); @@ -356,6 +357,7 @@ static int armada_xp_mpic_secondary_init(struct notifier_block *nfb, { if (action == CPU_STARTING || action == CPU_STARTING_FROZEN) armada_xp_mpic_smp_cpu_init(); + return NOTIFY_OK; } @@ -364,6 +366,20 @@ static struct notifier_block armada_370_xp_mpic_cpu_notifier = { .priority = 100, }; +static int mpic_cascaded_secondary_init(struct notifier_block *nfb, + unsigned long action, void *hcpu) +{ + if (action == CPU_STARTING || action == CPU_STARTING_FROZEN) + enable_percpu_irq(parent_irq, IRQ_TYPE_NONE); + + return NOTIFY_OK; +} + +static struct notifier_block mpic_cascaded_cpu_notifier = { + .notifier_call = mpic_cascaded_secondary_init, + .priority = 100, +}; + #endif /* CONFIG_SMP */ static struct irq_domain_ops armada_370_xp_mpic_irq_ops = { @@ -539,7 +555,7 @@ static int __init armada_370_xp_mpic_of_init(struct device_node *node, struct device_node *parent) { struct resource main_int_res, per_cpu_int_res; - int parent_irq, nr_irqs, i; + int nr_irqs, i; u32 control; BUG_ON(of_address_to_resource(node, 0, &main_int_res)); @@ -587,6 +603,9 @@ static int __init armada_370_xp_mpic_of_init(struct device_node *node, register_cpu_notifier(&armada_370_xp_mpic_cpu_notifier); #endif } else { +#ifdef CONFIG_SMP + register_cpu_notifier(&mpic_cascaded_cpu_notifier); +#endif irq_set_chained_handler(parent_irq, armada_370_xp_mpic_handle_cascade_irq); } diff --git a/drivers/irqchip/irq-gic-v3-its.c b/drivers/irqchip/irq-gic-v3-its.c index d8996bdf0f61..9687f8afebff 100644 --- a/drivers/irqchip/irq-gic-v3-its.c +++ b/drivers/irqchip/irq-gic-v3-its.c @@ -169,7 +169,7 @@ static void its_encode_cmd(struct its_cmd_block *cmd, u8 cmd_nr) static void its_encode_devid(struct its_cmd_block *cmd, u32 devid) { - cmd->raw_cmd[0] &= ~(0xffffUL << 32); + cmd->raw_cmd[0] &= BIT_ULL(32) - 1; cmd->raw_cmd[0] |= ((u64)devid) << 32; } @@ -416,13 +416,14 @@ static void its_send_single_command(struct its_node *its, { struct its_cmd_block *cmd, *sync_cmd, *next_cmd; struct its_collection *sync_col; + unsigned long flags; - raw_spin_lock(&its->lock); + raw_spin_lock_irqsave(&its->lock, flags); cmd = its_allocate_entry(its); if (!cmd) { /* We're soooooo screewed... */ pr_err_ratelimited("ITS can't allocate, dropping command\n"); - raw_spin_unlock(&its->lock); + raw_spin_unlock_irqrestore(&its->lock, flags); return; } sync_col = builder(cmd, desc); @@ -442,7 +443,7 @@ static void its_send_single_command(struct its_node *its, post: next_cmd = its_post_commands(its); - raw_spin_unlock(&its->lock); + raw_spin_unlock_irqrestore(&its->lock, flags); its_wait_for_range_completion(its, cmd, next_cmd); } @@ -799,21 +800,44 @@ static int its_alloc_tables(struct its_node *its) { int err; int i; - int psz = PAGE_SIZE; + int psz = SZ_64K; u64 shr = GITS_BASER_InnerShareable; + u64 cache = GITS_BASER_WaWb; for (i = 0; i < GITS_BASER_NR_REGS; i++) { u64 val = readq_relaxed(its->base + GITS_BASER + i * 8); u64 type = GITS_BASER_TYPE(val); u64 entry_size = GITS_BASER_ENTRY_SIZE(val); + int order = get_order(psz); + int alloc_size; u64 tmp; void *base; if (type == GITS_BASER_TYPE_NONE) continue; - /* We're lazy and only allocate a single page for now */ - base = (void *)get_zeroed_page(GFP_KERNEL); + /* + * Allocate as many entries as required to fit the + * range of device IDs that the ITS can grok... The ID + * space being incredibly sparse, this results in a + * massive waste of memory. + * + * For other tables, only allocate a single page. + */ + if (type == GITS_BASER_TYPE_DEVICE) { + u64 typer = readq_relaxed(its->base + GITS_TYPER); + u32 ids = GITS_TYPER_DEVBITS(typer); + + order = get_order((1UL << ids) * entry_size); + if (order >= MAX_ORDER) { + order = MAX_ORDER - 1; + pr_warn("%s: Device Table too large, reduce its page order to %u\n", + its->msi_chip.of_node->full_name, order); + } + } + + alloc_size = (1 << order) * PAGE_SIZE; + base = (void *)__get_free_pages(GFP_KERNEL | __GFP_ZERO, order); if (!base) { err = -ENOMEM; goto out_free; @@ -825,7 +849,7 @@ retry_baser: val = (virt_to_phys(base) | (type << GITS_BASER_TYPE_SHIFT) | ((entry_size - 1) << GITS_BASER_ENTRY_SIZE_SHIFT) | - GITS_BASER_WaWb | + cache | shr | GITS_BASER_VALID); @@ -841,7 +865,7 @@ retry_baser: break; } - val |= (PAGE_SIZE / psz) - 1; + val |= (alloc_size / psz) - 1; writeq_relaxed(val, its->base + GITS_BASER + i * 8); tmp = readq_relaxed(its->base + GITS_BASER + i * 8); @@ -851,9 +875,12 @@ retry_baser: * Shareability didn't stick. Just use * whatever the read reported, which is likely * to be the only thing this redistributor - * supports. + * supports. If that's zero, make it + * non-cacheable as well. */ shr = tmp & GITS_BASER_SHAREABILITY_MASK; + if (!shr) + cache = GITS_BASER_nC; goto retry_baser; } @@ -882,7 +909,7 @@ retry_baser: } pr_info("ITS: allocated %d %s @%lx (psz %dK, shr %d)\n", - (int)(PAGE_SIZE / entry_size), + (int)(alloc_size / entry_size), its_base_type_string[type], (unsigned long)virt_to_phys(base), psz / SZ_1K, (int)shr >> GITS_BASER_SHAREABILITY_SHIFT); @@ -957,16 +984,39 @@ static void its_cpu_init_lpis(void) tmp = readq_relaxed(rbase + GICR_PROPBASER); if ((tmp ^ val) & GICR_PROPBASER_SHAREABILITY_MASK) { + if (!(tmp & GICR_PROPBASER_SHAREABILITY_MASK)) { + /* + * The HW reports non-shareable, we must + * remove the cacheability attributes as + * well. + */ + val &= ~(GICR_PROPBASER_SHAREABILITY_MASK | + GICR_PROPBASER_CACHEABILITY_MASK); + val |= GICR_PROPBASER_nC; + writeq_relaxed(val, rbase + GICR_PROPBASER); + } pr_info_once("GIC: using cache flushing for LPI property table\n"); gic_rdists->flags |= RDIST_FLAGS_PROPBASE_NEEDS_FLUSHING; } /* set PENDBASE */ val = (page_to_phys(pend_page) | - GICR_PROPBASER_InnerShareable | - GICR_PROPBASER_WaWb); + GICR_PENDBASER_InnerShareable | + GICR_PENDBASER_WaWb); writeq_relaxed(val, rbase + GICR_PENDBASER); + tmp = readq_relaxed(rbase + GICR_PENDBASER); + + if (!(tmp & GICR_PENDBASER_SHAREABILITY_MASK)) { + /* + * The HW reports non-shareable, we must remove the + * cacheability attributes as well. + */ + val &= ~(GICR_PENDBASER_SHAREABILITY_MASK | + GICR_PENDBASER_CACHEABILITY_MASK); + val |= GICR_PENDBASER_nC; + writeq_relaxed(val, rbase + GICR_PENDBASER); + } /* Enable LPIs */ val = readl_relaxed(rbase + GICR_CTLR); @@ -1003,7 +1053,7 @@ static void its_cpu_init_collection(void) * This ITS wants a linear CPU number. */ target = readq_relaxed(gic_data_rdist_rd_base() + GICR_TYPER); - target = GICR_TYPER_CPU_NUMBER(target); + target = GICR_TYPER_CPU_NUMBER(target) << 16; } /* Perform collection mapping */ @@ -1020,8 +1070,9 @@ static void its_cpu_init_collection(void) static struct its_device *its_find_device(struct its_node *its, u32 dev_id) { struct its_device *its_dev = NULL, *tmp; + unsigned long flags; - raw_spin_lock(&its->lock); + raw_spin_lock_irqsave(&its->lock, flags); list_for_each_entry(tmp, &its->its_device_list, entry) { if (tmp->device_id == dev_id) { @@ -1030,7 +1081,7 @@ static struct its_device *its_find_device(struct its_node *its, u32 dev_id) } } - raw_spin_unlock(&its->lock); + raw_spin_unlock_irqrestore(&its->lock, flags); return its_dev; } @@ -1040,6 +1091,7 @@ static struct its_device *its_create_device(struct its_node *its, u32 dev_id, { struct its_device *dev; unsigned long *lpi_map; + unsigned long flags; void *itt; int lpi_base; int nr_lpis; @@ -1056,7 +1108,7 @@ static struct its_device *its_create_device(struct its_node *its, u32 dev_id, nr_ites = max(2UL, roundup_pow_of_two(nvecs)); sz = nr_ites * its->ite_size; sz = max(sz, ITS_ITT_ALIGN) + ITS_ITT_ALIGN - 1; - itt = kmalloc(sz, GFP_KERNEL); + itt = kzalloc(sz, GFP_KERNEL); lpi_map = its_lpi_alloc_chunks(nvecs, &lpi_base, &nr_lpis); if (!dev || !itt || !lpi_map) { @@ -1075,9 +1127,9 @@ static struct its_device *its_create_device(struct its_node *its, u32 dev_id, dev->device_id = dev_id; INIT_LIST_HEAD(&dev->entry); - raw_spin_lock(&its->lock); + raw_spin_lock_irqsave(&its->lock, flags); list_add(&dev->entry, &its->its_device_list); - raw_spin_unlock(&its->lock); + raw_spin_unlock_irqrestore(&its->lock, flags); /* Bind the device to the first possible CPU */ cpu = cpumask_first(cpu_online_mask); @@ -1091,9 +1143,11 @@ static struct its_device *its_create_device(struct its_node *its, u32 dev_id, static void its_free_device(struct its_device *its_dev) { - raw_spin_lock(&its_dev->its->lock); + unsigned long flags; + + raw_spin_lock_irqsave(&its_dev->its->lock, flags); list_del(&its_dev->entry); - raw_spin_unlock(&its_dev->its->lock); + raw_spin_unlock_irqrestore(&its_dev->its->lock, flags); kfree(its_dev->itt); kfree(its_dev); } @@ -1112,31 +1166,69 @@ static int its_alloc_device_irq(struct its_device *dev, irq_hw_number_t *hwirq) return 0; } +struct its_pci_alias { + struct pci_dev *pdev; + u32 dev_id; + u32 count; +}; + +static int its_pci_msi_vec_count(struct pci_dev *pdev) +{ + int msi, msix; + + msi = max(pci_msi_vec_count(pdev), 0); + msix = max(pci_msix_vec_count(pdev), 0); + + return max(msi, msix); +} + +static int its_get_pci_alias(struct pci_dev *pdev, u16 alias, void *data) +{ + struct its_pci_alias *dev_alias = data; + + dev_alias->dev_id = alias; + if (pdev != dev_alias->pdev) + dev_alias->count += its_pci_msi_vec_count(dev_alias->pdev); + + return 0; +} + static int its_msi_prepare(struct irq_domain *domain, struct device *dev, int nvec, msi_alloc_info_t *info) { struct pci_dev *pdev; struct its_node *its; - u32 dev_id; struct its_device *its_dev; + struct its_pci_alias dev_alias; if (!dev_is_pci(dev)) return -EINVAL; pdev = to_pci_dev(dev); - dev_id = PCI_DEVID(pdev->bus->number, pdev->devfn); + dev_alias.pdev = pdev; + dev_alias.count = nvec; + + pci_for_each_dma_alias(pdev, its_get_pci_alias, &dev_alias); its = domain->parent->host_data; - its_dev = its_find_device(its, dev_id); - if (WARN_ON(its_dev)) - return -EINVAL; + its_dev = its_find_device(its, dev_alias.dev_id); + if (its_dev) { + /* + * We already have seen this ID, probably through + * another alias (PCI bridge of some sort). No need to + * create the device. + */ + dev_dbg(dev, "Reusing ITT for devID %x\n", dev_alias.dev_id); + goto out; + } - its_dev = its_create_device(its, dev_id, nvec); + its_dev = its_create_device(its, dev_alias.dev_id, dev_alias.count); if (!its_dev) return -ENOMEM; - dev_dbg(&pdev->dev, "ITT %d entries, %d bits\n", nvec, ilog2(nvec)); - + dev_dbg(&pdev->dev, "ITT %d entries, %d bits\n", + dev_alias.count, ilog2(dev_alias.count)); +out: info->scratchpad[0].ptr = its_dev; info->scratchpad[1].ptr = dev; return 0; @@ -1255,6 +1347,34 @@ static const struct irq_domain_ops its_domain_ops = { .deactivate = its_irq_domain_deactivate, }; +static int its_force_quiescent(void __iomem *base) +{ + u32 count = 1000000; /* 1s */ + u32 val; + + val = readl_relaxed(base + GITS_CTLR); + if (val & GITS_CTLR_QUIESCENT) + return 0; + + /* Disable the generation of all interrupts to this ITS */ + val &= ~GITS_CTLR_ENABLE; + writel_relaxed(val, base + GITS_CTLR); + + /* Poll GITS_CTLR and wait until ITS becomes quiescent */ + while (1) { + val = readl_relaxed(base + GITS_CTLR); + if (val & GITS_CTLR_QUIESCENT) + return 0; + + count--; + if (!count) + return -EBUSY; + + cpu_relax(); + udelay(1); + } +} + static int its_probe(struct device_node *node, struct irq_domain *parent) { struct resource res; @@ -1283,6 +1403,13 @@ static int its_probe(struct device_node *node, struct irq_domain *parent) goto out_unmap; } + err = its_force_quiescent(its_base); + if (err) { + pr_warn("%s: failed to quiesce, giving up\n", + node->full_name); + goto out_unmap; + } + pr_info("ITS: %s\n", node->full_name); its = kzalloc(sizeof(*its), GFP_KERNEL); @@ -1322,14 +1449,26 @@ static int its_probe(struct device_node *node, struct irq_domain *parent) writeq_relaxed(baser, its->base + GITS_CBASER); tmp = readq_relaxed(its->base + GITS_CBASER); - writeq_relaxed(0, its->base + GITS_CWRITER); - writel_relaxed(1, its->base + GITS_CTLR); - if ((tmp ^ baser) & GITS_BASER_SHAREABILITY_MASK) { + if ((tmp ^ baser) & GITS_CBASER_SHAREABILITY_MASK) { + if (!(tmp & GITS_CBASER_SHAREABILITY_MASK)) { + /* + * The HW reports non-shareable, we must + * remove the cacheability attributes as + * well. + */ + baser &= ~(GITS_CBASER_SHAREABILITY_MASK | + GITS_CBASER_CACHEABILITY_MASK); + baser |= GITS_CBASER_nC; + writeq_relaxed(baser, its->base + GITS_CBASER); + } pr_info("ITS: using cache flushing for cmd queue\n"); its->flags |= ITS_FLAGS_CMDQ_NEEDS_FLUSHING; } + writeq_relaxed(0, its->base + GITS_CWRITER); + writel_relaxed(GITS_CTLR_ENABLE, its->base + GITS_CTLR); + if (of_property_read_bool(its->msi_chip.of_node, "msi-controller")) { its->domain = irq_domain_add_tree(NULL, &its_domain_ops, its); if (!its->domain) { @@ -1382,12 +1521,11 @@ static bool gic_rdists_supports_plpis(void) int its_cpu_init(void) { - if (!gic_rdists_supports_plpis()) { - pr_info("CPU%d: LPIs not supported\n", smp_processor_id()); - return -ENXIO; - } - if (!list_empty(&its_nodes)) { + if (!gic_rdists_supports_plpis()) { + pr_info("CPU%d: LPIs not supported\n", smp_processor_id()); + return -ENXIO; + } its_cpu_init_lpis(); its_cpu_init_collection(); } diff --git a/drivers/irqchip/irq-gic-v3.c b/drivers/irqchip/irq-gic-v3.c index 1c6dea2fbc34..fd8850def1b8 100644 --- a/drivers/irqchip/irq-gic-v3.c +++ b/drivers/irqchip/irq-gic-v3.c @@ -466,7 +466,7 @@ static u16 gic_compute_target_list(int *base_cpu, const struct cpumask *mask, tlist |= 1 << (mpidr & 0xf); cpu = cpumask_next(cpu, mask); - if (cpu == nr_cpu_ids) + if (cpu >= nr_cpu_ids) goto out; mpidr = cpu_logical_map(cpu); diff --git a/drivers/irqchip/irq-gic.c b/drivers/irqchip/irq-gic.c index 4634cf7d0ec3..471e1cdc1933 100644 --- a/drivers/irqchip/irq-gic.c +++ b/drivers/irqchip/irq-gic.c @@ -154,23 +154,25 @@ static inline unsigned int gic_irq(struct irq_data *d) static void gic_mask_irq(struct irq_data *d) { u32 mask = 1 << (gic_irq(d) % 32); + unsigned long flags; - raw_spin_lock(&irq_controller_lock); + raw_spin_lock_irqsave(&irq_controller_lock, flags); writel_relaxed(mask, gic_dist_base(d) + GIC_DIST_ENABLE_CLEAR + (gic_irq(d) / 32) * 4); if (gic_arch_extn.irq_mask) gic_arch_extn.irq_mask(d); - raw_spin_unlock(&irq_controller_lock); + raw_spin_unlock_irqrestore(&irq_controller_lock, flags); } static void gic_unmask_irq(struct irq_data *d) { u32 mask = 1 << (gic_irq(d) % 32); + unsigned long flags; - raw_spin_lock(&irq_controller_lock); + raw_spin_lock_irqsave(&irq_controller_lock, flags); if (gic_arch_extn.irq_unmask) gic_arch_extn.irq_unmask(d); writel_relaxed(mask, gic_dist_base(d) + GIC_DIST_ENABLE_SET + (gic_irq(d) / 32) * 4); - raw_spin_unlock(&irq_controller_lock); + raw_spin_unlock_irqrestore(&irq_controller_lock, flags); } static void gic_eoi_irq(struct irq_data *d) @@ -188,6 +190,7 @@ static int gic_set_type(struct irq_data *d, unsigned int type) { void __iomem *base = gic_dist_base(d); unsigned int gicirq = gic_irq(d); + unsigned long flags; int ret; /* Interrupt configuration for SGIs can't be changed */ @@ -199,14 +202,14 @@ static int gic_set_type(struct irq_data *d, unsigned int type) type != IRQ_TYPE_EDGE_RISING) return -EINVAL; - raw_spin_lock(&irq_controller_lock); + raw_spin_lock_irqsave(&irq_controller_lock, flags); if (gic_arch_extn.irq_set_type) gic_arch_extn.irq_set_type(d, type); ret = gic_configure_irq(gicirq, type, base, NULL); - raw_spin_unlock(&irq_controller_lock); + raw_spin_unlock_irqrestore(&irq_controller_lock, flags); return ret; } @@ -227,6 +230,7 @@ static int gic_set_affinity(struct irq_data *d, const struct cpumask *mask_val, void __iomem *reg = gic_dist_base(d) + GIC_DIST_TARGET + (gic_irq(d) & ~3); unsigned int cpu, shift = (gic_irq(d) % 4) * 8; u32 val, mask, bit; + unsigned long flags; if (!force) cpu = cpumask_any_and(mask_val, cpu_online_mask); @@ -236,12 +240,12 @@ static int gic_set_affinity(struct irq_data *d, const struct cpumask *mask_val, if (cpu >= NR_GIC_CPU_IF || cpu >= nr_cpu_ids) return -EINVAL; - raw_spin_lock(&irq_controller_lock); + raw_spin_lock_irqsave(&irq_controller_lock, flags); mask = 0xff << shift; bit = gic_cpu_map[cpu] << shift; val = readl_relaxed(reg) & ~mask; writel_relaxed(val | bit, reg); - raw_spin_unlock(&irq_controller_lock); + raw_spin_unlock_irqrestore(&irq_controller_lock, flags); return IRQ_SET_MASK_OK; } diff --git a/drivers/isdn/hardware/mISDN/hfcpci.c b/drivers/isdn/hardware/mISDN/hfcpci.c index 3c92780bda09..ff48da61c94c 100644 --- a/drivers/isdn/hardware/mISDN/hfcpci.c +++ b/drivers/isdn/hardware/mISDN/hfcpci.c @@ -1755,7 +1755,7 @@ init_card(struct hfc_pci *hc) enable_hwirq(hc); spin_unlock_irqrestore(&hc->lock, flags); /* Timeout 80ms */ - current->state = TASK_UNINTERRUPTIBLE; + set_current_state(TASK_UNINTERRUPTIBLE); schedule_timeout((80 * HZ) / 1000); printk(KERN_INFO "HFC PCI: IRQ %d count %d\n", hc->irq, hc->irqcnt); diff --git a/drivers/isdn/icn/icn.c b/drivers/isdn/icn/icn.c index 6a7447c304ac..358a574d9e8b 100644 --- a/drivers/isdn/icn/icn.c +++ b/drivers/isdn/icn/icn.c @@ -1609,7 +1609,7 @@ icn_setup(char *line) if (ints[0] > 1) membase = (unsigned long)ints[2]; if (str && *str) { - strcpy(sid, str); + strlcpy(sid, str, sizeof(sid)); icn_id = sid; if ((p = strchr(sid, ','))) { *p++ = 0; diff --git a/drivers/lguest/Kconfig b/drivers/lguest/Kconfig index ee035ec4526b..169172d2ba05 100644 --- a/drivers/lguest/Kconfig +++ b/drivers/lguest/Kconfig @@ -1,6 +1,6 @@ config LGUEST tristate "Linux hypervisor example code" - depends on X86_32 && EVENTFD && TTY + depends on X86_32 && EVENTFD && TTY && PCI_DIRECT select HVC_DRIVER ---help--- This is a very simple module which allows you to run diff --git a/drivers/md/dm-io.c b/drivers/md/dm-io.c index 37de0173b6d2..74adcd2c967e 100644 --- a/drivers/md/dm-io.c +++ b/drivers/md/dm-io.c @@ -289,9 +289,16 @@ static void do_region(int rw, unsigned region, struct dm_io_region *where, struct request_queue *q = bdev_get_queue(where->bdev); unsigned short logical_block_size = queue_logical_block_size(q); sector_t num_sectors; + unsigned int uninitialized_var(special_cmd_max_sectors); - /* Reject unsupported discard requests */ - if ((rw & REQ_DISCARD) && !blk_queue_discard(q)) { + /* + * Reject unsupported discard and write same requests. + */ + if (rw & REQ_DISCARD) + special_cmd_max_sectors = q->limits.max_discard_sectors; + else if (rw & REQ_WRITE_SAME) + special_cmd_max_sectors = q->limits.max_write_same_sectors; + if ((rw & (REQ_DISCARD | REQ_WRITE_SAME)) && special_cmd_max_sectors == 0) { dec_count(io, region, -EOPNOTSUPP); return; } @@ -317,7 +324,7 @@ static void do_region(int rw, unsigned region, struct dm_io_region *where, store_io_and_region_in_bio(bio, io, region); if (rw & REQ_DISCARD) { - num_sectors = min_t(sector_t, q->limits.max_discard_sectors, remaining); + num_sectors = min_t(sector_t, special_cmd_max_sectors, remaining); bio->bi_iter.bi_size = num_sectors << SECTOR_SHIFT; remaining -= num_sectors; } else if (rw & REQ_WRITE_SAME) { @@ -326,7 +333,7 @@ static void do_region(int rw, unsigned region, struct dm_io_region *where, */ dp->get_page(dp, &page, &len, &offset); bio_add_page(bio, page, logical_block_size, offset); - num_sectors = min_t(sector_t, q->limits.max_write_same_sectors, remaining); + num_sectors = min_t(sector_t, special_cmd_max_sectors, remaining); bio->bi_iter.bi_size = num_sectors << SECTOR_SHIFT; offset = 0; diff --git a/drivers/md/dm-snap.c b/drivers/md/dm-snap.c index 8b204ae216ab..f83a0f3fc365 100644 --- a/drivers/md/dm-snap.c +++ b/drivers/md/dm-snap.c @@ -20,6 +20,8 @@ #include <linux/log2.h> #include <linux/dm-kcopyd.h> +#include "dm.h" + #include "dm-exception-store.h" #define DM_MSG_PREFIX "snapshots" @@ -291,12 +293,23 @@ struct origin { }; /* + * This structure is allocated for each origin target + */ +struct dm_origin { + struct dm_dev *dev; + struct dm_target *ti; + unsigned split_boundary; + struct list_head hash_list; +}; + +/* * Size of the hash table for origin volumes. If we make this * the size of the minors list then it should be nearly perfect */ #define ORIGIN_HASH_SIZE 256 #define ORIGIN_MASK 0xFF static struct list_head *_origins; +static struct list_head *_dm_origins; static struct rw_semaphore _origins_lock; static DECLARE_WAIT_QUEUE_HEAD(_pending_exceptions_done); @@ -310,12 +323,22 @@ static int init_origin_hash(void) _origins = kmalloc(ORIGIN_HASH_SIZE * sizeof(struct list_head), GFP_KERNEL); if (!_origins) { - DMERR("unable to allocate memory"); + DMERR("unable to allocate memory for _origins"); return -ENOMEM; } - for (i = 0; i < ORIGIN_HASH_SIZE; i++) INIT_LIST_HEAD(_origins + i); + + _dm_origins = kmalloc(ORIGIN_HASH_SIZE * sizeof(struct list_head), + GFP_KERNEL); + if (!_dm_origins) { + DMERR("unable to allocate memory for _dm_origins"); + kfree(_origins); + return -ENOMEM; + } + for (i = 0; i < ORIGIN_HASH_SIZE; i++) + INIT_LIST_HEAD(_dm_origins + i); + init_rwsem(&_origins_lock); return 0; @@ -324,6 +347,7 @@ static int init_origin_hash(void) static void exit_origin_hash(void) { kfree(_origins); + kfree(_dm_origins); } static unsigned origin_hash(struct block_device *bdev) @@ -350,6 +374,30 @@ static void __insert_origin(struct origin *o) list_add_tail(&o->hash_list, sl); } +static struct dm_origin *__lookup_dm_origin(struct block_device *origin) +{ + struct list_head *ol; + struct dm_origin *o; + + ol = &_dm_origins[origin_hash(origin)]; + list_for_each_entry (o, ol, hash_list) + if (bdev_equal(o->dev->bdev, origin)) + return o; + + return NULL; +} + +static void __insert_dm_origin(struct dm_origin *o) +{ + struct list_head *sl = &_dm_origins[origin_hash(o->dev->bdev)]; + list_add_tail(&o->hash_list, sl); +} + +static void __remove_dm_origin(struct dm_origin *o) +{ + list_del(&o->hash_list); +} + /* * _origins_lock must be held when calling this function. * Returns number of snapshots registered using the supplied cow device, plus: @@ -1840,9 +1888,40 @@ static int snapshot_preresume(struct dm_target *ti) static void snapshot_resume(struct dm_target *ti) { struct dm_snapshot *s = ti->private; - struct dm_snapshot *snap_src = NULL, *snap_dest = NULL; + struct dm_snapshot *snap_src = NULL, *snap_dest = NULL, *snap_merging = NULL; + struct dm_origin *o; + struct mapped_device *origin_md = NULL; + bool must_restart_merging = false; down_read(&_origins_lock); + + o = __lookup_dm_origin(s->origin->bdev); + if (o) + origin_md = dm_table_get_md(o->ti->table); + if (!origin_md) { + (void) __find_snapshots_sharing_cow(s, NULL, NULL, &snap_merging); + if (snap_merging) + origin_md = dm_table_get_md(snap_merging->ti->table); + } + if (origin_md == dm_table_get_md(ti->table)) + origin_md = NULL; + if (origin_md) { + if (dm_hold(origin_md)) + origin_md = NULL; + } + + up_read(&_origins_lock); + + if (origin_md) { + dm_internal_suspend_fast(origin_md); + if (snap_merging && test_bit(RUNNING_MERGE, &snap_merging->state_bits)) { + must_restart_merging = true; + stop_merge(snap_merging); + } + } + + down_read(&_origins_lock); + (void) __find_snapshots_sharing_cow(s, &snap_src, &snap_dest, NULL); if (snap_src && snap_dest) { down_write(&snap_src->lock); @@ -1851,8 +1930,16 @@ static void snapshot_resume(struct dm_target *ti) up_write(&snap_dest->lock); up_write(&snap_src->lock); } + up_read(&_origins_lock); + if (origin_md) { + if (must_restart_merging) + start_merge(snap_merging); + dm_internal_resume_fast(origin_md); + dm_put(origin_md); + } + /* Now we have correct chunk size, reregister */ reregister_snapshot(s); @@ -2133,11 +2220,6 @@ static int origin_write_extent(struct dm_snapshot *merging_snap, * Origin: maps a linear range of a device, with hooks for snapshotting. */ -struct dm_origin { - struct dm_dev *dev; - unsigned split_boundary; -}; - /* * Construct an origin mapping: <dev_path> * The context for an origin is merely a 'struct dm_dev *' @@ -2166,6 +2248,7 @@ static int origin_ctr(struct dm_target *ti, unsigned int argc, char **argv) goto bad_open; } + o->ti = ti; ti->private = o; ti->num_flush_bios = 1; @@ -2180,6 +2263,7 @@ bad_alloc: static void origin_dtr(struct dm_target *ti) { struct dm_origin *o = ti->private; + dm_put_device(ti, o->dev); kfree(o); } @@ -2216,6 +2300,19 @@ static void origin_resume(struct dm_target *ti) struct dm_origin *o = ti->private; o->split_boundary = get_origin_minimum_chunksize(o->dev->bdev); + + down_write(&_origins_lock); + __insert_dm_origin(o); + up_write(&_origins_lock); +} + +static void origin_postsuspend(struct dm_target *ti) +{ + struct dm_origin *o = ti->private; + + down_write(&_origins_lock); + __remove_dm_origin(o); + up_write(&_origins_lock); } static void origin_status(struct dm_target *ti, status_type_t type, @@ -2258,12 +2355,13 @@ static int origin_iterate_devices(struct dm_target *ti, static struct target_type origin_target = { .name = "snapshot-origin", - .version = {1, 8, 1}, + .version = {1, 9, 0}, .module = THIS_MODULE, .ctr = origin_ctr, .dtr = origin_dtr, .map = origin_map, .resume = origin_resume, + .postsuspend = origin_postsuspend, .status = origin_status, .merge = origin_merge, .iterate_devices = origin_iterate_devices, @@ -2271,7 +2369,7 @@ static struct target_type origin_target = { static struct target_type snapshot_target = { .name = "snapshot", - .version = {1, 12, 0}, + .version = {1, 13, 0}, .module = THIS_MODULE, .ctr = snapshot_ctr, .dtr = snapshot_dtr, @@ -2285,7 +2383,7 @@ static struct target_type snapshot_target = { static struct target_type merge_target = { .name = dm_snapshot_merge_target_name, - .version = {1, 2, 0}, + .version = {1, 3, 0}, .module = THIS_MODULE, .ctr = snapshot_ctr, .dtr = snapshot_dtr, diff --git a/drivers/md/dm-thin.c b/drivers/md/dm-thin.c index 654773cb1eee..921aafd12aee 100644 --- a/drivers/md/dm-thin.c +++ b/drivers/md/dm-thin.c @@ -2358,17 +2358,6 @@ static int thin_bio_map(struct dm_target *ti, struct bio *bio) return DM_MAPIO_REMAPPED; case -ENODATA: - if (get_pool_mode(tc->pool) == PM_READ_ONLY) { - /* - * This block isn't provisioned, and we have no way - * of doing so. - */ - handle_unserviceable_bio(tc->pool, bio); - cell_defer_no_holder(tc, virt_cell); - return DM_MAPIO_SUBMITTED; - } - /* fall through */ - case -EWOULDBLOCK: thin_defer_cell(tc, virt_cell); return DM_MAPIO_SUBMITTED; diff --git a/drivers/md/dm.c b/drivers/md/dm.c index 73f28802dc7a..8001fe9e3434 100644 --- a/drivers/md/dm.c +++ b/drivers/md/dm.c @@ -433,7 +433,6 @@ static int dm_blk_open(struct block_device *bdev, fmode_t mode) dm_get(md); atomic_inc(&md->open_count); - out: spin_unlock(&_minor_lock); @@ -442,16 +441,20 @@ out: static void dm_blk_close(struct gendisk *disk, fmode_t mode) { - struct mapped_device *md = disk->private_data; + struct mapped_device *md; spin_lock(&_minor_lock); + md = disk->private_data; + if (WARN_ON(!md)) + goto out; + if (atomic_dec_and_test(&md->open_count) && (test_bit(DMF_DEFERRED_REMOVE, &md->flags))) queue_work(deferred_remove_workqueue, &deferred_remove_work); dm_put(md); - +out: spin_unlock(&_minor_lock); } @@ -2241,7 +2244,6 @@ static void free_dev(struct mapped_device *md) int minor = MINOR(disk_devt(md->disk)); unlock_fs(md); - bdput(md->bdev); destroy_workqueue(md->wq); if (md->kworker_task) @@ -2252,19 +2254,22 @@ static void free_dev(struct mapped_device *md) mempool_destroy(md->rq_pool); if (md->bs) bioset_free(md->bs); - blk_integrity_unregister(md->disk); - del_gendisk(md->disk); + cleanup_srcu_struct(&md->io_barrier); free_table_devices(&md->table_devices); - free_minor(minor); + dm_stats_cleanup(&md->stats); spin_lock(&_minor_lock); md->disk->private_data = NULL; spin_unlock(&_minor_lock); - + if (blk_get_integrity(md->disk)) + blk_integrity_unregister(md->disk); + del_gendisk(md->disk); put_disk(md->disk); blk_cleanup_queue(md->queue); - dm_stats_cleanup(&md->stats); + bdput(md->bdev); + free_minor(minor); + module_put(THIS_MODULE); kfree(md); } @@ -2616,6 +2621,19 @@ void dm_get(struct mapped_device *md) BUG_ON(test_bit(DMF_FREEING, &md->flags)); } +int dm_hold(struct mapped_device *md) +{ + spin_lock(&_minor_lock); + if (test_bit(DMF_FREEING, &md->flags)) { + spin_unlock(&_minor_lock); + return -EBUSY; + } + dm_get(md); + spin_unlock(&_minor_lock); + return 0; +} +EXPORT_SYMBOL_GPL(dm_hold); + const char *dm_device_name(struct mapped_device *md) { return md->name; @@ -2629,8 +2647,9 @@ static void __dm_destroy(struct mapped_device *md, bool wait) might_sleep(); - spin_lock(&_minor_lock); map = dm_get_live_table(md, &srcu_idx); + + spin_lock(&_minor_lock); idr_replace(&_minor_idr, MINOR_ALLOCED, MINOR(disk_devt(dm_disk(md)))); set_bit(DMF_FREEING, &md->flags); spin_unlock(&_minor_lock); @@ -2638,10 +2657,16 @@ static void __dm_destroy(struct mapped_device *md, bool wait) if (dm_request_based(md)) flush_kthread_worker(&md->kworker); + /* + * Take suspend_lock so that presuspend and postsuspend methods + * do not race with internal suspend. + */ + mutex_lock(&md->suspend_lock); if (!dm_suspended_md(md)) { dm_table_presuspend_targets(map); dm_table_postsuspend_targets(map); } + mutex_unlock(&md->suspend_lock); /* dm_put_live_table must be before msleep, otherwise deadlock is possible */ dm_put_live_table(md, srcu_idx); @@ -3115,6 +3140,7 @@ void dm_internal_suspend_fast(struct mapped_device *md) flush_workqueue(md->wq); dm_wait_for_completion(md, TASK_UNINTERRUPTIBLE); } +EXPORT_SYMBOL_GPL(dm_internal_suspend_fast); void dm_internal_resume_fast(struct mapped_device *md) { @@ -3126,6 +3152,7 @@ void dm_internal_resume_fast(struct mapped_device *md) done: mutex_unlock(&md->suspend_lock); } +EXPORT_SYMBOL_GPL(dm_internal_resume_fast); /*----------------------------------------------------------------- * Event notification. diff --git a/drivers/md/md.c b/drivers/md/md.c index cadf9cc02b25..e6178787ce3d 100644 --- a/drivers/md/md.c +++ b/drivers/md/md.c @@ -249,6 +249,7 @@ static void md_make_request(struct request_queue *q, struct bio *bio) const int rw = bio_data_dir(bio); struct mddev *mddev = q->queuedata; unsigned int sectors; + int cpu; if (mddev == NULL || mddev->pers == NULL || !mddev->ready) { @@ -284,7 +285,10 @@ static void md_make_request(struct request_queue *q, struct bio *bio) sectors = bio_sectors(bio); mddev->pers->make_request(mddev, bio); - generic_start_io_acct(rw, sectors, &mddev->gendisk->part0); + cpu = part_stat_lock(); + part_stat_inc(cpu, &mddev->gendisk->part0, ios[rw]); + part_stat_add(cpu, &mddev->gendisk->part0, sectors[rw], sectors); + part_stat_unlock(); if (atomic_dec_and_test(&mddev->active_io) && mddev->suspended) wake_up(&mddev->sb_wait); @@ -5080,7 +5084,8 @@ int md_run(struct mddev *mddev) } if (err) { mddev_detach(mddev); - pers->free(mddev, mddev->private); + if (mddev->private) + pers->free(mddev, mddev->private); module_put(pers->owner); bitmap_destroy(mddev); return err; diff --git a/drivers/md/raid0.c b/drivers/md/raid0.c index a13f738a7b39..3b5d7f704aa3 100644 --- a/drivers/md/raid0.c +++ b/drivers/md/raid0.c @@ -313,7 +313,7 @@ static struct strip_zone *find_zone(struct r0conf *conf, /* * remaps the bio to the target device. we separate two flows. - * power 2 flow and a general flow for the sake of perfromance + * power 2 flow and a general flow for the sake of performance */ static struct md_rdev *map_sector(struct mddev *mddev, struct strip_zone *zone, sector_t sector, sector_t *sector_offset) @@ -467,8 +467,6 @@ static int raid0_run(struct mddev *mddev) dump_zones(mddev); ret = md_integrity_register(mddev); - if (ret) - raid0_free(mddev, conf); return ret; } @@ -526,6 +524,7 @@ static void raid0_make_request(struct mddev *mddev, struct bio *bio) split = bio; } + sector = bio->bi_iter.bi_sector; zone = find_zone(mddev->private, §or); tmp_dev = map_sector(mddev, zone, sector, §or); split->bi_bdev = tmp_dev->bdev; diff --git a/drivers/media/dvb-frontends/rtl2832.c b/drivers/media/dvb-frontends/rtl2832.c index 5d2d8f45b4b6..67faa8d6950e 100644 --- a/drivers/media/dvb-frontends/rtl2832.c +++ b/drivers/media/dvb-frontends/rtl2832.c @@ -1240,7 +1240,7 @@ static int rtl2832_probe(struct i2c_client *client, dev->regmap_config.max_register = 5 * 0x100, dev->regmap_config.ranges = regmap_range_cfg, dev->regmap_config.num_ranges = ARRAY_SIZE(regmap_range_cfg), - dev->regmap_config.cache_type = REGCACHE_RBTREE, + dev->regmap_config.cache_type = REGCACHE_NONE, dev->regmap = regmap_init(&client->dev, ®map_bus, client, &dev->regmap_config); if (IS_ERR(dev->regmap)) { diff --git a/drivers/media/pci/cx23885/cx23885-417.c b/drivers/media/pci/cx23885/cx23885-417.c index e4901a503c73..63c0ee5d0bf5 100644 --- a/drivers/media/pci/cx23885/cx23885-417.c +++ b/drivers/media/pci/cx23885/cx23885-417.c @@ -1339,14 +1339,13 @@ static int vidioc_querycap(struct file *file, void *priv, strlcpy(cap->driver, dev->name, sizeof(cap->driver)); strlcpy(cap->card, cx23885_boards[tsport->dev->board].name, sizeof(cap->card)); - sprintf(cap->bus_info, "PCI:%s", pci_name(dev->pci)); - cap->capabilities = - V4L2_CAP_VIDEO_CAPTURE | - V4L2_CAP_READWRITE | - V4L2_CAP_STREAMING | - 0; + sprintf(cap->bus_info, "PCIe:%s", pci_name(dev->pci)); + cap->device_caps = V4L2_CAP_VIDEO_CAPTURE | V4L2_CAP_READWRITE | + V4L2_CAP_STREAMING; if (dev->tuner_type != TUNER_ABSENT) - cap->capabilities |= V4L2_CAP_TUNER; + cap->device_caps |= V4L2_CAP_TUNER; + cap->capabilities = cap->device_caps | V4L2_CAP_VBI_CAPTURE | + V4L2_CAP_AUDIO | V4L2_CAP_DEVICE_CAPS; return 0; } diff --git a/drivers/media/platform/s5p-jpeg/jpeg-core.c b/drivers/media/platform/s5p-jpeg/jpeg-core.c index 12f7452edce3..a92ff4249d10 100644 --- a/drivers/media/platform/s5p-jpeg/jpeg-core.c +++ b/drivers/media/platform/s5p-jpeg/jpeg-core.c @@ -1845,6 +1845,9 @@ static void exynos4_jpeg_set_img_addr(struct s5p_jpeg_ctx *ctx) struct s5p_jpeg_addr jpeg_addr; u32 pix_size, padding_bytes = 0; + jpeg_addr.cb = 0; + jpeg_addr.cr = 0; + pix_size = ctx->cap_q.w * ctx->cap_q.h; if (ctx->mode == S5P_JPEG_ENCODE) { diff --git a/drivers/media/platform/s5p-jpeg/jpeg-hw-exynos3250.c b/drivers/media/platform/s5p-jpeg/jpeg-hw-exynos3250.c index e8c2cad93962..0974b9a7a584 100644 --- a/drivers/media/platform/s5p-jpeg/jpeg-hw-exynos3250.c +++ b/drivers/media/platform/s5p-jpeg/jpeg-hw-exynos3250.c @@ -20,7 +20,7 @@ void exynos3250_jpeg_reset(void __iomem *regs) { - u32 reg = 0; + u32 reg = 1; int count = 1000; writel(1, regs + EXYNOS3250_SW_RESET); diff --git a/drivers/media/platform/s5p-mfc/s5p_mfc.c b/drivers/media/platform/s5p-mfc/s5p_mfc.c index 8e44a59d8ec2..98374e8bad3e 100644 --- a/drivers/media/platform/s5p-mfc/s5p_mfc.c +++ b/drivers/media/platform/s5p-mfc/s5p_mfc.c @@ -833,6 +833,7 @@ static int s5p_mfc_open(struct file *file) q->type = V4L2_BUF_TYPE_VIDEO_OUTPUT_MPLANE; q->io_modes = VB2_MMAP; q->drv_priv = &ctx->fh; + q->lock = &dev->mfc_mutex; if (vdev == dev->vfd_dec) { q->io_modes = VB2_MMAP; q->ops = get_dec_queue_ops(); diff --git a/drivers/media/platform/s5p-mfc/s5p_mfc_common.h b/drivers/media/platform/s5p-mfc/s5p_mfc_common.h index 15f7663dd9f5..24262bbb1a35 100644 --- a/drivers/media/platform/s5p-mfc/s5p_mfc_common.h +++ b/drivers/media/platform/s5p-mfc/s5p_mfc_common.h @@ -29,7 +29,7 @@ /* Offset base used to differentiate between CAPTURE and OUTPUT * while mmaping */ -#define DST_QUEUE_OFF_BASE (TASK_SIZE / 2) +#define DST_QUEUE_OFF_BASE (1 << 30) #define MFC_BANK1_ALLOC_CTX 0 #define MFC_BANK2_ALLOC_CTX 1 diff --git a/drivers/media/platform/s5p-mfc/s5p_mfc_opr.h b/drivers/media/platform/s5p-mfc/s5p_mfc_opr.h index de2b8c69daa5..22dfb3effda8 100644 --- a/drivers/media/platform/s5p-mfc/s5p_mfc_opr.h +++ b/drivers/media/platform/s5p-mfc/s5p_mfc_opr.h @@ -302,7 +302,7 @@ struct s5p_mfc_hw_ops { void (*write_info)(struct s5p_mfc_ctx *ctx, unsigned int data, unsigned int ofs); unsigned int (*read_info)(struct s5p_mfc_ctx *ctx, - unsigned int ofs); + unsigned long ofs); int (*get_dspl_y_adr)(struct s5p_mfc_dev *dev); int (*get_dec_y_adr)(struct s5p_mfc_dev *dev); int (*get_dspl_status)(struct s5p_mfc_dev *dev); diff --git a/drivers/media/platform/s5p-mfc/s5p_mfc_opr_v5.c b/drivers/media/platform/s5p-mfc/s5p_mfc_opr_v5.c index 0c4fcf2dfd09..b09bcd140491 100644 --- a/drivers/media/platform/s5p-mfc/s5p_mfc_opr_v5.c +++ b/drivers/media/platform/s5p-mfc/s5p_mfc_opr_v5.c @@ -263,15 +263,15 @@ static void s5p_mfc_release_dev_context_buffer_v5(struct s5p_mfc_dev *dev) static void s5p_mfc_write_info_v5(struct s5p_mfc_ctx *ctx, unsigned int data, unsigned int ofs) { - writel(data, (volatile void __iomem *)(ctx->shm.virt + ofs)); + writel(data, (void *)(ctx->shm.virt + ofs)); wmb(); } static unsigned int s5p_mfc_read_info_v5(struct s5p_mfc_ctx *ctx, - unsigned int ofs) + unsigned long ofs) { rmb(); - return readl((volatile void __iomem *)(ctx->shm.virt + ofs)); + return readl((void *)(ctx->shm.virt + ofs)); } static void s5p_mfc_dec_calc_dpb_size_v5(struct s5p_mfc_ctx *ctx) diff --git a/drivers/media/platform/s5p-mfc/s5p_mfc_opr_v6.c b/drivers/media/platform/s5p-mfc/s5p_mfc_opr_v6.c index d826c58b5d53..cefad184fe96 100644 --- a/drivers/media/platform/s5p-mfc/s5p_mfc_opr_v6.c +++ b/drivers/media/platform/s5p-mfc/s5p_mfc_opr_v6.c @@ -1852,17 +1852,17 @@ static void s5p_mfc_write_info_v6(struct s5p_mfc_ctx *ctx, unsigned int data, unsigned int ofs) { s5p_mfc_clock_on(); - writel(data, (volatile void __iomem *)((unsigned long)ofs)); + writel(data, (void *)((unsigned long)ofs)); s5p_mfc_clock_off(); } static unsigned int -s5p_mfc_read_info_v6(struct s5p_mfc_ctx *ctx, unsigned int ofs) +s5p_mfc_read_info_v6(struct s5p_mfc_ctx *ctx, unsigned long ofs) { int ret; s5p_mfc_clock_on(); - ret = readl((volatile void __iomem *)((unsigned long)ofs)); + ret = readl((void *)ofs); s5p_mfc_clock_off(); return ret; diff --git a/drivers/media/platform/s5p-tv/Kconfig b/drivers/media/platform/s5p-tv/Kconfig index 5a1835dd65e8..697aaed42486 100644 --- a/drivers/media/platform/s5p-tv/Kconfig +++ b/drivers/media/platform/s5p-tv/Kconfig @@ -20,6 +20,7 @@ if VIDEO_SAMSUNG_S5P_TV config VIDEO_SAMSUNG_S5P_HDMI tristate "Samsung HDMI Driver" depends on VIDEO_V4L2 + depends on I2C depends on VIDEO_SAMSUNG_S5P_TV select VIDEO_SAMSUNG_S5P_HDMIPHY help diff --git a/drivers/media/platform/sh_veu.c b/drivers/media/platform/sh_veu.c index a901b6248557..2554f3719b9e 100644 --- a/drivers/media/platform/sh_veu.c +++ b/drivers/media/platform/sh_veu.c @@ -1158,6 +1158,7 @@ static int sh_veu_probe(struct platform_device *pdev) } *vdev = sh_veu_videodev; + vdev->v4l2_dev = &veu->v4l2_dev; spin_lock_init(&veu->lock); mutex_init(&veu->fop_lock); vdev->lock = &veu->fop_lock; diff --git a/drivers/media/platform/soc_camera/atmel-isi.c b/drivers/media/platform/soc_camera/atmel-isi.c index 8526bf5c8429..c835beb2a1a8 100644 --- a/drivers/media/platform/soc_camera/atmel-isi.c +++ b/drivers/media/platform/soc_camera/atmel-isi.c @@ -843,6 +843,8 @@ static int isi_camera_set_bus_param(struct soc_camera_device *icd) if (isi->pdata.full_mode) cfg1 |= ISI_CFG1_FULL_MODE; + cfg1 |= ISI_CFG1_THMASK_BEATS_16; + isi_writel(isi, ISI_CTRL, ISI_CTRL_DIS); isi_writel(isi, ISI_CFG1, cfg1); diff --git a/drivers/media/platform/soc_camera/soc_camera.c b/drivers/media/platform/soc_camera/soc_camera.c index cee7b56f8404..66634b469c98 100644 --- a/drivers/media/platform/soc_camera/soc_camera.c +++ b/drivers/media/platform/soc_camera/soc_camera.c @@ -1665,7 +1665,7 @@ eclkreg: eaddpdev: platform_device_put(sasc->pdev); eallocpdev: - devm_kfree(ici->v4l2_dev.dev, sasc); + devm_kfree(ici->v4l2_dev.dev, info); dev_err(ici->v4l2_dev.dev, "group probe failed: %d\n", ret); return ret; diff --git a/drivers/media/usb/dvb-usb-v2/rtl28xxu.c b/drivers/media/usb/dvb-usb-v2/rtl28xxu.c index 77dcfdf547ac..87fc0fe29ebd 100644 --- a/drivers/media/usb/dvb-usb-v2/rtl28xxu.c +++ b/drivers/media/usb/dvb-usb-v2/rtl28xxu.c @@ -780,8 +780,6 @@ static int rtl2832u_frontend_callback(void *adapter_priv, int component, case TUNER_RTL2832_TUA9001: return rtl2832u_tua9001_tuner_callback(d, cmd, arg); } - default: - return -EINVAL; } return 0; diff --git a/drivers/media/usb/gspca/Kconfig b/drivers/media/usb/gspca/Kconfig index 60af3b167f3b..3fd94fe7e1eb 100644 --- a/drivers/media/usb/gspca/Kconfig +++ b/drivers/media/usb/gspca/Kconfig @@ -1,6 +1,7 @@ menuconfig USB_GSPCA tristate "GSPCA based webcams" depends on VIDEO_V4L2 + depends on INPUT || INPUT=n default m ---help--- Say Y here if you want to enable selecting webcams based diff --git a/drivers/media/v4l2-core/videobuf2-core.c b/drivers/media/v4l2-core/videobuf2-core.c index bc08a829bc13..cc16e76a2493 100644 --- a/drivers/media/v4l2-core/videobuf2-core.c +++ b/drivers/media/v4l2-core/videobuf2-core.c @@ -3230,18 +3230,13 @@ int vb2_thread_stop(struct vb2_queue *q) if (threadio == NULL) return 0; - call_void_qop(q, wait_finish, q); threadio->stop = true; - vb2_internal_streamoff(q, q->type); - call_void_qop(q, wait_prepare, q); + /* Wake up all pending sleeps in the thread */ + vb2_queue_error(q); err = kthread_stop(threadio->thread); - q->fileio = NULL; - fileio->req.count = 0; - vb2_reqbufs(q, &fileio->req); - kfree(fileio); + __vb2_cleanup_fileio(q); threadio->thread = NULL; kfree(threadio); - q->fileio = NULL; q->threadio = NULL; return err; } diff --git a/drivers/media/v4l2-core/videobuf2-dma-contig.c b/drivers/media/v4l2-core/videobuf2-dma-contig.c index b481d20c8372..69e0483adfee 100644 --- a/drivers/media/v4l2-core/videobuf2-dma-contig.c +++ b/drivers/media/v4l2-core/videobuf2-dma-contig.c @@ -632,8 +632,7 @@ static void *vb2_dc_get_userptr(void *alloc_ctx, unsigned long vaddr, } /* extract page list from userspace mapping */ - ret = vb2_dc_get_user_pages(start, pages, n_pages, vma, - dma_dir == DMA_FROM_DEVICE); + ret = vb2_dc_get_user_pages(start, pages, n_pages, vma, dma_dir); if (ret) { unsigned long pfn; if (vb2_dc_get_user_pfn(start, n_pages, vma, &pfn) == 0) { diff --git a/drivers/mfd/kempld-core.c b/drivers/mfd/kempld-core.c index f38ec424872e..5615522f8d62 100644 --- a/drivers/mfd/kempld-core.c +++ b/drivers/mfd/kempld-core.c @@ -739,7 +739,7 @@ static int __init kempld_init(void) for (id = kempld_dmi_table; id->matches[0].slot != DMI_NONE; id++) if (strstr(id->ident, force_device_id)) - if (id->callback && id->callback(id)) + if (id->callback && !id->callback(id)) break; if (id->matches[0].slot == DMI_NONE) return -ENODEV; diff --git a/drivers/mfd/rtsx_usb.c b/drivers/mfd/rtsx_usb.c index ede50244f265..dbd907d7170e 100644 --- a/drivers/mfd/rtsx_usb.c +++ b/drivers/mfd/rtsx_usb.c @@ -196,18 +196,27 @@ EXPORT_SYMBOL_GPL(rtsx_usb_ep0_write_register); int rtsx_usb_ep0_read_register(struct rtsx_ucr *ucr, u16 addr, u8 *data) { u16 value; + u8 *buf; + int ret; if (!data) return -EINVAL; - *data = 0; + + buf = kzalloc(sizeof(u8), GFP_KERNEL); + if (!buf) + return -ENOMEM; addr |= EP0_READ_REG_CMD << EP0_OP_SHIFT; value = swab16(addr); - return usb_control_msg(ucr->pusb_dev, + ret = usb_control_msg(ucr->pusb_dev, usb_rcvctrlpipe(ucr->pusb_dev, 0), RTSX_USB_REQ_REG_OP, USB_DIR_IN | USB_TYPE_VENDOR | USB_RECIP_DEVICE, - value, 0, data, 1, 100); + value, 0, buf, 1, 100); + *data = *buf; + + kfree(buf); + return ret; } EXPORT_SYMBOL_GPL(rtsx_usb_ep0_read_register); @@ -288,18 +297,27 @@ static int rtsx_usb_get_status_with_bulk(struct rtsx_ucr *ucr, u16 *status) int rtsx_usb_get_card_status(struct rtsx_ucr *ucr, u16 *status) { int ret; + u16 *buf; if (!status) return -EINVAL; - if (polling_pipe == 0) + if (polling_pipe == 0) { + buf = kzalloc(sizeof(u16), GFP_KERNEL); + if (!buf) + return -ENOMEM; + ret = usb_control_msg(ucr->pusb_dev, usb_rcvctrlpipe(ucr->pusb_dev, 0), RTSX_USB_REQ_POLL, USB_DIR_IN | USB_TYPE_VENDOR | USB_RECIP_DEVICE, - 0, 0, status, 2, 100); - else + 0, 0, buf, 2, 100); + *status = *buf; + + kfree(buf); + } else { ret = rtsx_usb_get_status_with_bulk(ucr, status); + } /* usb_control_msg may return positive when success */ if (ret < 0) diff --git a/drivers/misc/enclosure.c b/drivers/misc/enclosure.c index 38552a31304a..65fed7146e9b 100644 --- a/drivers/misc/enclosure.c +++ b/drivers/misc/enclosure.c @@ -202,16 +202,17 @@ static void enclosure_remove_links(struct enclosure_component *cdev) { char name[ENCLOSURE_NAME_SIZE]; + enclosure_link_name(cdev, name); + /* * In odd circumstances, like multipath devices, something else may * already have removed the links, so check for this condition first. */ - if (!cdev->dev->kobj.sd) - return; + if (cdev->dev->kobj.sd) + sysfs_remove_link(&cdev->dev->kobj, name); - enclosure_link_name(cdev, name); - sysfs_remove_link(&cdev->dev->kobj, name); - sysfs_remove_link(&cdev->cdev.kobj, "device"); + if (cdev->cdev.kobj.sd) + sysfs_remove_link(&cdev->cdev.kobj, "device"); } static int enclosure_add_links(struct enclosure_component *cdev) diff --git a/drivers/misc/mei/init.c b/drivers/misc/mei/init.c index 9306219d5675..6ad049a08e4d 100644 --- a/drivers/misc/mei/init.c +++ b/drivers/misc/mei/init.c @@ -341,6 +341,8 @@ void mei_stop(struct mei_device *dev) dev->dev_state = MEI_DEV_POWER_DOWN; mei_reset(dev); + /* move device to disabled state unconditionally */ + dev->dev_state = MEI_DEV_DISABLED; mutex_unlock(&dev->device_lock); diff --git a/drivers/misc/sgi-xp/xpc_main.c b/drivers/misc/sgi-xp/xpc_main.c index 82dc5748f873..7f327121e6d7 100644 --- a/drivers/misc/sgi-xp/xpc_main.c +++ b/drivers/misc/sgi-xp/xpc_main.c @@ -1210,7 +1210,7 @@ xpc_system_die(struct notifier_block *nb, unsigned long event, void *_die_args) if (((die_args->trapnr == X86_TRAP_MF) || (die_args->trapnr == X86_TRAP_XF)) && - !user_mode_vm(die_args->regs)) + !user_mode(die_args->regs)) xpc_die_deactivate(); break; diff --git a/drivers/mmc/core/pwrseq_simple.c b/drivers/mmc/core/pwrseq_simple.c index e9f1d8d84613..c53f14a7ce54 100644 --- a/drivers/mmc/core/pwrseq_simple.c +++ b/drivers/mmc/core/pwrseq_simple.c @@ -124,7 +124,7 @@ int mmc_pwrseq_simple_alloc(struct mmc_host *host, struct device *dev) PTR_ERR(pwrseq->reset_gpios[i]) != -ENOSYS) { ret = PTR_ERR(pwrseq->reset_gpios[i]); - while (--i) + while (i--) gpiod_put(pwrseq->reset_gpios[i]); goto clk_put; diff --git a/drivers/mtd/nand/Kconfig b/drivers/mtd/nand/Kconfig index 5b76a173cd95..5897d8d8fa5a 100644 --- a/drivers/mtd/nand/Kconfig +++ b/drivers/mtd/nand/Kconfig @@ -526,6 +526,7 @@ config MTD_NAND_SUNXI config MTD_NAND_HISI504 tristate "Support for NAND controller on Hisilicon SoC Hip04" + depends on HAS_DMA help Enables support for NAND controller on Hisilicon SoC Hip04. diff --git a/drivers/mtd/nand/pxa3xx_nand.c b/drivers/mtd/nand/pxa3xx_nand.c index 96b0b1d27df1..10b1f7a4fe50 100644 --- a/drivers/mtd/nand/pxa3xx_nand.c +++ b/drivers/mtd/nand/pxa3xx_nand.c @@ -480,6 +480,42 @@ static void disable_int(struct pxa3xx_nand_info *info, uint32_t int_mask) nand_writel(info, NDCR, ndcr | int_mask); } +static void drain_fifo(struct pxa3xx_nand_info *info, void *data, int len) +{ + if (info->ecc_bch) { + int timeout; + + /* + * According to the datasheet, when reading from NDDB + * with BCH enabled, after each 32 bytes reads, we + * have to make sure that the NDSR.RDDREQ bit is set. + * + * Drain the FIFO 8 32 bits reads at a time, and skip + * the polling on the last read. + */ + while (len > 8) { + __raw_readsl(info->mmio_base + NDDB, data, 8); + + for (timeout = 0; + !(nand_readl(info, NDSR) & NDSR_RDDREQ); + timeout++) { + if (timeout >= 5) { + dev_err(&info->pdev->dev, + "Timeout on RDDREQ while draining the FIFO\n"); + return; + } + + mdelay(1); + } + + data += 32; + len -= 8; + } + } + + __raw_readsl(info->mmio_base + NDDB, data, len); +} + static void handle_data_pio(struct pxa3xx_nand_info *info) { unsigned int do_bytes = min(info->data_size, info->chunk_size); @@ -496,14 +532,14 @@ static void handle_data_pio(struct pxa3xx_nand_info *info) DIV_ROUND_UP(info->oob_size, 4)); break; case STATE_PIO_READING: - __raw_readsl(info->mmio_base + NDDB, - info->data_buff + info->data_buff_pos, - DIV_ROUND_UP(do_bytes, 4)); + drain_fifo(info, + info->data_buff + info->data_buff_pos, + DIV_ROUND_UP(do_bytes, 4)); if (info->oob_size > 0) - __raw_readsl(info->mmio_base + NDDB, - info->oob_buff + info->oob_buff_pos, - DIV_ROUND_UP(info->oob_size, 4)); + drain_fifo(info, + info->oob_buff + info->oob_buff_pos, + DIV_ROUND_UP(info->oob_size, 4)); break; default: dev_err(&info->pdev->dev, "%s: invalid state %d\n", __func__, @@ -1572,6 +1608,8 @@ static int alloc_nand_resource(struct platform_device *pdev) int ret, irq, cs; pdata = dev_get_platdata(&pdev->dev); + if (pdata->num_cs <= 0) + return -ENODEV; info = devm_kzalloc(&pdev->dev, sizeof(*info) + (sizeof(*mtd) + sizeof(*host)) * pdata->num_cs, GFP_KERNEL); if (!info) diff --git a/drivers/mtd/ubi/eba.c b/drivers/mtd/ubi/eba.c index da4c79259f67..16e34b37d134 100644 --- a/drivers/mtd/ubi/eba.c +++ b/drivers/mtd/ubi/eba.c @@ -425,9 +425,10 @@ retry: ubi_warn(ubi, "corrupted VID header at PEB %d, LEB %d:%d", pnum, vol_id, lnum); err = -EBADMSG; - } else + } else { err = -EINVAL; ubi_ro_mode(ubi); + } } goto out_free; } else if (err == UBI_IO_BITFLIPS) diff --git a/drivers/net/Kconfig b/drivers/net/Kconfig index 84673ebcf428..df51d6025a90 100644 --- a/drivers/net/Kconfig +++ b/drivers/net/Kconfig @@ -157,7 +157,7 @@ config IPVLAN making it transparent to the connected L2 switch. Ipvlan devices can be added using the "ip" command from the - iproute2 package starting with the iproute2-X.Y.ZZ release: + iproute2 package starting with the iproute2-3.19 release: "ip link add link <main-dev> [ NAME ] type ipvlan" diff --git a/drivers/net/appletalk/Kconfig b/drivers/net/appletalk/Kconfig index 4ce6ca5f3d36..dc6b78e5342f 100644 --- a/drivers/net/appletalk/Kconfig +++ b/drivers/net/appletalk/Kconfig @@ -40,7 +40,7 @@ config DEV_APPLETALK config LTPC tristate "Apple/Farallon LocalTalk PC support" - depends on DEV_APPLETALK && (ISA || EISA) && ISA_DMA_API + depends on DEV_APPLETALK && (ISA || EISA) && ISA_DMA_API && VIRT_TO_BUS help This allows you to use the AppleTalk PC card to connect to LocalTalk networks. The card is also known as the Farallon PhoneNet PC card. diff --git a/drivers/net/bonding/bond_main.c b/drivers/net/bonding/bond_main.c index b979c265fc51..089a4028859d 100644 --- a/drivers/net/bonding/bond_main.c +++ b/drivers/net/bonding/bond_main.c @@ -3850,7 +3850,8 @@ static inline int bond_slave_override(struct bonding *bond, /* Find out if any slaves have the same mapping as this skb. */ bond_for_each_slave_rcu(bond, slave, iter) { if (slave->queue_id == skb->queue_mapping) { - if (bond_slave_can_tx(slave)) { + if (bond_slave_is_up(slave) && + slave->link == BOND_LINK_UP) { bond_dev_queue_xmit(bond, skb, slave->dev); return 0; } diff --git a/drivers/net/can/Kconfig b/drivers/net/can/Kconfig index 98d73aab52fe..58808f651452 100644 --- a/drivers/net/can/Kconfig +++ b/drivers/net/can/Kconfig @@ -131,7 +131,7 @@ config CAN_RCAR config CAN_XILINXCAN tristate "Xilinx CAN" - depends on ARCH_ZYNQ || MICROBLAZE || COMPILE_TEST + depends on ARCH_ZYNQ || ARM64 || MICROBLAZE || COMPILE_TEST depends on COMMON_CLK && HAS_IOMEM ---help--- Xilinx CAN driver. This driver supports both soft AXI CAN IP and diff --git a/drivers/net/can/dev.c b/drivers/net/can/dev.c index 3c82e02e3dae..b0f69248cb71 100644 --- a/drivers/net/can/dev.c +++ b/drivers/net/can/dev.c @@ -579,6 +579,10 @@ struct sk_buff *alloc_can_skb(struct net_device *dev, struct can_frame **cf) skb->pkt_type = PACKET_BROADCAST; skb->ip_summed = CHECKSUM_UNNECESSARY; + skb_reset_mac_header(skb); + skb_reset_network_header(skb); + skb_reset_transport_header(skb); + can_skb_reserve(skb); can_skb_prv(skb)->ifindex = dev->ifindex; @@ -603,6 +607,10 @@ struct sk_buff *alloc_canfd_skb(struct net_device *dev, skb->pkt_type = PACKET_BROADCAST; skb->ip_summed = CHECKSUM_UNNECESSARY; + skb_reset_mac_header(skb); + skb_reset_network_header(skb); + skb_reset_transport_header(skb); + can_skb_reserve(skb); can_skb_prv(skb)->ifindex = dev->ifindex; diff --git a/drivers/net/can/flexcan.c b/drivers/net/can/flexcan.c index 80c46ad4cee4..ad0a7e8c2c2b 100644 --- a/drivers/net/can/flexcan.c +++ b/drivers/net/can/flexcan.c @@ -592,13 +592,12 @@ static int flexcan_poll_state(struct net_device *dev, u32 reg_esr) rx_state = unlikely(reg_esr & FLEXCAN_ESR_RX_WRN) ? CAN_STATE_ERROR_WARNING : CAN_STATE_ERROR_ACTIVE; new_state = max(tx_state, rx_state); - } else if (unlikely(flt == FLEXCAN_ESR_FLT_CONF_PASSIVE)) { + } else { __flexcan_get_berr_counter(dev, &bec); - new_state = CAN_STATE_ERROR_PASSIVE; + new_state = flt == FLEXCAN_ESR_FLT_CONF_PASSIVE ? + CAN_STATE_ERROR_PASSIVE : CAN_STATE_BUS_OFF; rx_state = bec.rxerr >= bec.txerr ? new_state : 0; tx_state = bec.rxerr <= bec.txerr ? new_state : 0; - } else { - new_state = CAN_STATE_BUS_OFF; } /* state hasn't changed */ @@ -1158,12 +1157,19 @@ static int flexcan_probe(struct platform_device *pdev) const struct flexcan_devtype_data *devtype_data; struct net_device *dev; struct flexcan_priv *priv; + struct regulator *reg_xceiver; struct resource *mem; struct clk *clk_ipg = NULL, *clk_per = NULL; void __iomem *base; int err, irq; u32 clock_freq = 0; + reg_xceiver = devm_regulator_get(&pdev->dev, "xceiver"); + if (PTR_ERR(reg_xceiver) == -EPROBE_DEFER) + return -EPROBE_DEFER; + else if (IS_ERR(reg_xceiver)) + reg_xceiver = NULL; + if (pdev->dev.of_node) of_property_read_u32(pdev->dev.of_node, "clock-frequency", &clock_freq); @@ -1224,9 +1230,7 @@ static int flexcan_probe(struct platform_device *pdev) priv->pdata = dev_get_platdata(&pdev->dev); priv->devtype_data = devtype_data; - priv->reg_xceiver = devm_regulator_get(&pdev->dev, "xceiver"); - if (IS_ERR(priv->reg_xceiver)) - priv->reg_xceiver = NULL; + priv->reg_xceiver = reg_xceiver; netif_napi_add(dev, &priv->napi, flexcan_poll, FLEXCAN_NAPI_WEIGHT); diff --git a/drivers/net/can/usb/gs_usb.c b/drivers/net/can/usb/gs_usb.c index 009acc8641fc..8b4d3e6875eb 100644 --- a/drivers/net/can/usb/gs_usb.c +++ b/drivers/net/can/usb/gs_usb.c @@ -901,6 +901,8 @@ static int gs_usb_probe(struct usb_interface *intf, const struct usb_device_id * } dev = kzalloc(sizeof(*dev), GFP_KERNEL); + if (!dev) + return -ENOMEM; init_usb_anchor(&dev->rx_submitted); atomic_set(&dev->active_channels, 0); diff --git a/drivers/net/can/usb/kvaser_usb.c b/drivers/net/can/usb/kvaser_usb.c index 2928f7003041..57611fd91229 100644 --- a/drivers/net/can/usb/kvaser_usb.c +++ b/drivers/net/can/usb/kvaser_usb.c @@ -14,6 +14,8 @@ * Copyright (C) 2015 Valeo S.A. */ +#include <linux/spinlock.h> +#include <linux/kernel.h> #include <linux/completion.h> #include <linux/module.h> #include <linux/netdevice.h> @@ -23,7 +25,6 @@ #include <linux/can/dev.h> #include <linux/can/error.h> -#define MAX_TX_URBS 16 #define MAX_RX_URBS 4 #define START_TIMEOUT 1000 /* msecs */ #define STOP_TIMEOUT 1000 /* msecs */ @@ -441,6 +442,7 @@ struct kvaser_usb_error_summary { }; }; +/* Context for an outstanding, not yet ACKed, transmission */ struct kvaser_usb_tx_urb_context { struct kvaser_usb_net_priv *priv; u32 echo_index; @@ -454,8 +456,13 @@ struct kvaser_usb { struct usb_endpoint_descriptor *bulk_in, *bulk_out; struct usb_anchor rx_submitted; + /* @max_tx_urbs: Firmware-reported maximum number of oustanding, + * not yet ACKed, transmissions on this device. This value is + * also used as a sentinel for marking free tx contexts. + */ u32 fw_version; unsigned int nchannels; + unsigned int max_tx_urbs; enum kvaser_usb_family family; bool rxinitdone; @@ -465,18 +472,18 @@ struct kvaser_usb { struct kvaser_usb_net_priv { struct can_priv can; - - atomic_t active_tx_urbs; - struct usb_anchor tx_submitted; - struct kvaser_usb_tx_urb_context tx_contexts[MAX_TX_URBS]; - - struct completion start_comp, stop_comp; + struct can_berr_counter bec; struct kvaser_usb *dev; struct net_device *netdev; int channel; - struct can_berr_counter bec; + struct completion start_comp, stop_comp; + struct usb_anchor tx_submitted; + + spinlock_t tx_contexts_lock; + int active_tx_contexts; + struct kvaser_usb_tx_urb_context tx_contexts[]; }; static const struct usb_device_id kvaser_usb_table[] = { @@ -584,8 +591,15 @@ static int kvaser_usb_wait_msg(const struct kvaser_usb *dev, u8 id, while (pos <= actual_len - MSG_HEADER_LEN) { tmp = buf + pos; - if (!tmp->len) - break; + /* Handle messages crossing the USB endpoint max packet + * size boundary. Check kvaser_usb_read_bulk_callback() + * for further details. + */ + if (tmp->len == 0) { + pos = round_up(pos, le16_to_cpu(dev->bulk_in-> + wMaxPacketSize)); + continue; + } if (pos + tmp->len > actual_len) { dev_err(dev->udev->dev.parent, @@ -647,9 +661,13 @@ static int kvaser_usb_get_software_info(struct kvaser_usb *dev) switch (dev->family) { case KVASER_LEAF: dev->fw_version = le32_to_cpu(msg.u.leaf.softinfo.fw_version); + dev->max_tx_urbs = + le16_to_cpu(msg.u.leaf.softinfo.max_outstanding_tx); break; case KVASER_USBCAN: dev->fw_version = le32_to_cpu(msg.u.usbcan.softinfo.fw_version); + dev->max_tx_urbs = + le16_to_cpu(msg.u.usbcan.softinfo.max_outstanding_tx); break; } @@ -686,6 +704,7 @@ static void kvaser_usb_tx_acknowledge(const struct kvaser_usb *dev, struct kvaser_usb_net_priv *priv; struct sk_buff *skb; struct can_frame *cf; + unsigned long flags; u8 channel, tid; channel = msg->u.tx_acknowledge_header.channel; @@ -704,7 +723,7 @@ static void kvaser_usb_tx_acknowledge(const struct kvaser_usb *dev, stats = &priv->netdev->stats; - context = &priv->tx_contexts[tid % MAX_TX_URBS]; + context = &priv->tx_contexts[tid % dev->max_tx_urbs]; /* Sometimes the state change doesn't come after a bus-off event */ if (priv->can.restart_ms && @@ -729,12 +748,15 @@ static void kvaser_usb_tx_acknowledge(const struct kvaser_usb *dev, stats->tx_packets++; stats->tx_bytes += context->dlc; - can_get_echo_skb(priv->netdev, context->echo_index); - context->echo_index = MAX_TX_URBS; - atomic_dec(&priv->active_tx_urbs); + spin_lock_irqsave(&priv->tx_contexts_lock, flags); + can_get_echo_skb(priv->netdev, context->echo_index); + context->echo_index = dev->max_tx_urbs; + --priv->active_tx_contexts; netif_wake_queue(priv->netdev); + + spin_unlock_irqrestore(&priv->tx_contexts_lock, flags); } static void kvaser_usb_simple_msg_callback(struct urb *urb) @@ -787,7 +809,6 @@ static int kvaser_usb_simple_msg_async(struct kvaser_usb_net_priv *priv, netdev_err(netdev, "Error transmitting URB\n"); usb_unanchor_urb(urb); usb_free_urb(urb); - kfree(buf); return err; } @@ -796,17 +817,6 @@ static int kvaser_usb_simple_msg_async(struct kvaser_usb_net_priv *priv, return 0; } -static void kvaser_usb_unlink_tx_urbs(struct kvaser_usb_net_priv *priv) -{ - int i; - - usb_kill_anchored_urbs(&priv->tx_submitted); - atomic_set(&priv->active_tx_urbs, 0); - - for (i = 0; i < MAX_TX_URBS; i++) - priv->tx_contexts[i].echo_index = MAX_TX_URBS; -} - static void kvaser_usb_rx_error_update_can_state(struct kvaser_usb_net_priv *priv, const struct kvaser_usb_error_summary *es, struct can_frame *cf) @@ -1317,8 +1327,20 @@ static void kvaser_usb_read_bulk_callback(struct urb *urb) while (pos <= urb->actual_length - MSG_HEADER_LEN) { msg = urb->transfer_buffer + pos; - if (!msg->len) - break; + /* The Kvaser firmware can only read and write messages that + * does not cross the USB's endpoint wMaxPacketSize boundary. + * If a follow-up command crosses such boundary, firmware puts + * a placeholder zero-length command in its place then aligns + * the real command to the next max packet size. + * + * Handle such cases or we're going to miss a significant + * number of events in case of a heavy rx load on the bus. + */ + if (msg->len == 0) { + pos = round_up(pos, le16_to_cpu(dev->bulk_in-> + wMaxPacketSize)); + continue; + } if (pos + msg->len > urb->actual_length) { dev_err(dev->udev->dev.parent, "Format error\n"); @@ -1326,7 +1348,6 @@ static void kvaser_usb_read_bulk_callback(struct urb *urb) } kvaser_usb_handle_message(dev, msg); - pos += msg->len; } @@ -1498,6 +1519,26 @@ error: return err; } +static void kvaser_usb_reset_tx_urb_contexts(struct kvaser_usb_net_priv *priv) +{ + int i, max_tx_urbs; + + max_tx_urbs = priv->dev->max_tx_urbs; + + priv->active_tx_contexts = 0; + for (i = 0; i < max_tx_urbs; i++) + priv->tx_contexts[i].echo_index = max_tx_urbs; +} + +/* This method might sleep. Do not call it in the atomic context + * of URB completions. + */ +static void kvaser_usb_unlink_tx_urbs(struct kvaser_usb_net_priv *priv) +{ + usb_kill_anchored_urbs(&priv->tx_submitted); + kvaser_usb_reset_tx_urb_contexts(priv); +} + static void kvaser_usb_unlink_all_urbs(struct kvaser_usb *dev) { int i; @@ -1615,9 +1656,9 @@ static netdev_tx_t kvaser_usb_start_xmit(struct sk_buff *skb, struct urb *urb; void *buf; struct kvaser_msg *msg; - int i, err; - int ret = NETDEV_TX_OK; + int i, err, ret = NETDEV_TX_OK; u8 *msg_tx_can_flags = NULL; /* GCC */ + unsigned long flags; if (can_dropped_invalid_skb(netdev, skb)) return NETDEV_TX_OK; @@ -1634,7 +1675,7 @@ static netdev_tx_t kvaser_usb_start_xmit(struct sk_buff *skb, if (!buf) { stats->tx_dropped++; dev_kfree_skb(skb); - goto nobufmem; + goto freeurb; } msg = buf; @@ -1671,22 +1712,32 @@ static netdev_tx_t kvaser_usb_start_xmit(struct sk_buff *skb, if (cf->can_id & CAN_RTR_FLAG) *msg_tx_can_flags |= MSG_FLAG_REMOTE_FRAME; - for (i = 0; i < ARRAY_SIZE(priv->tx_contexts); i++) { - if (priv->tx_contexts[i].echo_index == MAX_TX_URBS) { + spin_lock_irqsave(&priv->tx_contexts_lock, flags); + for (i = 0; i < dev->max_tx_urbs; i++) { + if (priv->tx_contexts[i].echo_index == dev->max_tx_urbs) { context = &priv->tx_contexts[i]; + + context->echo_index = i; + can_put_echo_skb(skb, netdev, context->echo_index); + ++priv->active_tx_contexts; + if (priv->active_tx_contexts >= dev->max_tx_urbs) + netif_stop_queue(netdev); + break; } } + spin_unlock_irqrestore(&priv->tx_contexts_lock, flags); /* This should never happen; it implies a flow control bug */ if (!context) { netdev_warn(netdev, "cannot find free context\n"); + + kfree(buf); ret = NETDEV_TX_BUSY; - goto releasebuf; + goto freeurb; } context->priv = priv; - context->echo_index = i; context->dlc = cf->can_dlc; msg->u.tx_can.tid = context->echo_index; @@ -1698,18 +1749,17 @@ static netdev_tx_t kvaser_usb_start_xmit(struct sk_buff *skb, kvaser_usb_write_bulk_callback, context); usb_anchor_urb(urb, &priv->tx_submitted); - can_put_echo_skb(skb, netdev, context->echo_index); - - atomic_inc(&priv->active_tx_urbs); - - if (atomic_read(&priv->active_tx_urbs) >= MAX_TX_URBS) - netif_stop_queue(netdev); - err = usb_submit_urb(urb, GFP_ATOMIC); if (unlikely(err)) { + spin_lock_irqsave(&priv->tx_contexts_lock, flags); + can_free_echo_skb(netdev, context->echo_index); + context->echo_index = dev->max_tx_urbs; + --priv->active_tx_contexts; + netif_wake_queue(netdev); + + spin_unlock_irqrestore(&priv->tx_contexts_lock, flags); - atomic_dec(&priv->active_tx_urbs); usb_unanchor_urb(urb); stats->tx_dropped++; @@ -1719,16 +1769,12 @@ static netdev_tx_t kvaser_usb_start_xmit(struct sk_buff *skb, else netdev_warn(netdev, "Failed tx_urb %d\n", err); - goto releasebuf; + goto freeurb; } - usb_free_urb(urb); - - return NETDEV_TX_OK; + ret = NETDEV_TX_OK; -releasebuf: - kfree(buf); -nobufmem: +freeurb: usb_free_urb(urb); return ret; } @@ -1840,13 +1886,15 @@ static int kvaser_usb_init_one(struct usb_interface *intf, struct kvaser_usb *dev = usb_get_intfdata(intf); struct net_device *netdev; struct kvaser_usb_net_priv *priv; - int i, err; + int err; err = kvaser_usb_send_simple_msg(dev, CMD_RESET_CHIP, channel); if (err) return err; - netdev = alloc_candev(sizeof(*priv), MAX_TX_URBS); + netdev = alloc_candev(sizeof(*priv) + + dev->max_tx_urbs * sizeof(*priv->tx_contexts), + dev->max_tx_urbs); if (!netdev) { dev_err(&intf->dev, "Cannot alloc candev\n"); return -ENOMEM; @@ -1854,19 +1902,17 @@ static int kvaser_usb_init_one(struct usb_interface *intf, priv = netdev_priv(netdev); + init_usb_anchor(&priv->tx_submitted); init_completion(&priv->start_comp); init_completion(&priv->stop_comp); - init_usb_anchor(&priv->tx_submitted); - atomic_set(&priv->active_tx_urbs, 0); - - for (i = 0; i < ARRAY_SIZE(priv->tx_contexts); i++) - priv->tx_contexts[i].echo_index = MAX_TX_URBS; - priv->dev = dev; priv->netdev = netdev; priv->channel = channel; + spin_lock_init(&priv->tx_contexts_lock); + kvaser_usb_reset_tx_urb_contexts(priv); + priv->can.state = CAN_STATE_STOPPED; priv->can.clock.freq = CAN_USB_CLOCK; priv->can.bittiming_const = &kvaser_usb_bittiming_const; @@ -1976,6 +2022,13 @@ static int kvaser_usb_probe(struct usb_interface *intf, return err; } + dev_dbg(&intf->dev, "Firmware version: %d.%d.%d\n", + ((dev->fw_version >> 24) & 0xff), + ((dev->fw_version >> 16) & 0xff), + (dev->fw_version & 0xffff)); + + dev_dbg(&intf->dev, "Max oustanding tx = %d URBs\n", dev->max_tx_urbs); + err = kvaser_usb_get_card_info(dev); if (err) { dev_err(&intf->dev, @@ -1983,11 +2036,6 @@ static int kvaser_usb_probe(struct usb_interface *intf, return err; } - dev_dbg(&intf->dev, "Firmware version: %d.%d.%d\n", - ((dev->fw_version >> 24) & 0xff), - ((dev->fw_version >> 16) & 0xff), - (dev->fw_version & 0xffff)); - for (i = 0; i < dev->nchannels; i++) { err = kvaser_usb_init_one(intf, id, i); if (err) { diff --git a/drivers/net/can/usb/peak_usb/pcan_ucan.h b/drivers/net/can/usb/peak_usb/pcan_ucan.h index 1ba7c25002e1..e8fc4952c6b0 100644 --- a/drivers/net/can/usb/peak_usb/pcan_ucan.h +++ b/drivers/net/can/usb/peak_usb/pcan_ucan.h @@ -26,8 +26,8 @@ #define PUCAN_CMD_FILTER_STD 0x008 #define PUCAN_CMD_TX_ABORT 0x009 #define PUCAN_CMD_WR_ERR_CNT 0x00a -#define PUCAN_CMD_RX_FRAME_ENABLE 0x00b -#define PUCAN_CMD_RX_FRAME_DISABLE 0x00c +#define PUCAN_CMD_SET_EN_OPTION 0x00b +#define PUCAN_CMD_CLR_DIS_OPTION 0x00c #define PUCAN_CMD_END_OF_COLLECTION 0x3ff /* uCAN received messages list */ @@ -101,14 +101,15 @@ struct __packed pucan_wr_err_cnt { u16 unused; }; -/* uCAN RX_FRAME_ENABLE command fields */ -#define PUCAN_FLTEXT_ERROR 0x0001 -#define PUCAN_FLTEXT_BUSLOAD 0x0002 +/* uCAN SET_EN/CLR_DIS _OPTION command fields */ +#define PUCAN_OPTION_ERROR 0x0001 +#define PUCAN_OPTION_BUSLOAD 0x0002 +#define PUCAN_OPTION_CANDFDISO 0x0004 -struct __packed pucan_filter_ext { +struct __packed pucan_options { __le16 opcode_channel; - __le16 ext_mask; + __le16 options; u32 unused; }; diff --git a/drivers/net/can/usb/peak_usb/pcan_usb_fd.c b/drivers/net/can/usb/peak_usb/pcan_usb_fd.c index 962c3f027383..a9221ad9f1a0 100644 --- a/drivers/net/can/usb/peak_usb/pcan_usb_fd.c +++ b/drivers/net/can/usb/peak_usb/pcan_usb_fd.c @@ -110,13 +110,13 @@ struct __packed pcan_ufd_led { u8 unused[5]; }; -/* Extended usage of uCAN commands CMD_RX_FRAME_xxxABLE for PCAN-USB Pro FD */ +/* Extended usage of uCAN commands CMD_xxx_xx_OPTION for PCAN-USB Pro FD */ #define PCAN_UFD_FLTEXT_CALIBRATION 0x8000 -struct __packed pcan_ufd_filter_ext { +struct __packed pcan_ufd_options { __le16 opcode_channel; - __le16 ext_mask; + __le16 ucan_mask; u16 unused; __le16 usb_mask; }; @@ -251,6 +251,27 @@ static int pcan_usb_fd_build_restart_cmd(struct peak_usb_device *dev, u8 *buf) /* moves the pointer forward */ pc += sizeof(struct pucan_wr_err_cnt); + /* add command to switch from ISO to non-ISO mode, if fw allows it */ + if (dev->can.ctrlmode_supported & CAN_CTRLMODE_FD_NON_ISO) { + struct pucan_options *puo = (struct pucan_options *)pc; + + puo->opcode_channel = + (dev->can.ctrlmode & CAN_CTRLMODE_FD_NON_ISO) ? + pucan_cmd_opcode_channel(dev, + PUCAN_CMD_CLR_DIS_OPTION) : + pucan_cmd_opcode_channel(dev, PUCAN_CMD_SET_EN_OPTION); + + puo->options = cpu_to_le16(PUCAN_OPTION_CANDFDISO); + + /* to be sure that no other extended bits will be taken into + * account + */ + puo->unused = 0; + + /* moves the pointer forward */ + pc += sizeof(struct pucan_options); + } + /* next, go back to operational mode */ cmd = (struct pucan_command *)pc; cmd->opcode_channel = pucan_cmd_opcode_channel(dev, @@ -321,21 +342,21 @@ static int pcan_usb_fd_set_filter_std(struct peak_usb_device *dev, int idx, return pcan_usb_fd_send_cmd(dev, cmd); } -/* set/unset notifications filter: +/* set/unset options * - * onoff sets(1)/unset(0) notifications - * mask each bit defines a kind of notification to set/unset + * onoff set(1)/unset(0) options + * mask each bit defines a kind of options to set/unset */ -static int pcan_usb_fd_set_filter_ext(struct peak_usb_device *dev, - bool onoff, u16 ext_mask, u16 usb_mask) +static int pcan_usb_fd_set_options(struct peak_usb_device *dev, + bool onoff, u16 ucan_mask, u16 usb_mask) { - struct pcan_ufd_filter_ext *cmd = pcan_usb_fd_cmd_buffer(dev); + struct pcan_ufd_options *cmd = pcan_usb_fd_cmd_buffer(dev); cmd->opcode_channel = pucan_cmd_opcode_channel(dev, - (onoff) ? PUCAN_CMD_RX_FRAME_ENABLE : - PUCAN_CMD_RX_FRAME_DISABLE); + (onoff) ? PUCAN_CMD_SET_EN_OPTION : + PUCAN_CMD_CLR_DIS_OPTION); - cmd->ext_mask = cpu_to_le16(ext_mask); + cmd->ucan_mask = cpu_to_le16(ucan_mask); cmd->usb_mask = cpu_to_le16(usb_mask); /* send the command */ @@ -770,9 +791,9 @@ static int pcan_usb_fd_start(struct peak_usb_device *dev) &pcan_usb_pro_fd); /* enable USB calibration messages */ - err = pcan_usb_fd_set_filter_ext(dev, 1, - PUCAN_FLTEXT_ERROR, - PCAN_UFD_FLTEXT_CALIBRATION); + err = pcan_usb_fd_set_options(dev, 1, + PUCAN_OPTION_ERROR, + PCAN_UFD_FLTEXT_CALIBRATION); } pdev->usb_if->dev_opened_count++; @@ -806,9 +827,9 @@ static int pcan_usb_fd_stop(struct peak_usb_device *dev) /* turn off special msgs for that interface if no other dev opened */ if (pdev->usb_if->dev_opened_count == 1) - pcan_usb_fd_set_filter_ext(dev, 0, - PUCAN_FLTEXT_ERROR, - PCAN_UFD_FLTEXT_CALIBRATION); + pcan_usb_fd_set_options(dev, 0, + PUCAN_OPTION_ERROR, + PCAN_UFD_FLTEXT_CALIBRATION); pdev->usb_if->dev_opened_count--; return 0; @@ -860,8 +881,14 @@ static int pcan_usb_fd_init(struct peak_usb_device *dev) pdev->usb_if->fw_info.fw_version[2], dev->adapter->ctrl_count); - /* the currently supported hw is non-ISO */ - dev->can.ctrlmode = CAN_CTRLMODE_FD_NON_ISO; + /* check for ability to switch between ISO/non-ISO modes */ + if (pdev->usb_if->fw_info.fw_version[0] >= 2) { + /* firmware >= 2.x supports ISO/non-ISO switching */ + dev->can.ctrlmode_supported |= CAN_CTRLMODE_FD_NON_ISO; + } else { + /* firmware < 2.x only supports fixed(!) non-ISO */ + dev->can.ctrlmode |= CAN_CTRLMODE_FD_NON_ISO; + } /* tell the hardware the can driver is running */ err = pcan_usb_fd_drv_loaded(dev, 1); @@ -879,6 +906,10 @@ static int pcan_usb_fd_init(struct peak_usb_device *dev) pdev->usb_if = ppdev->usb_if; pdev->cmd_buffer_addr = ppdev->cmd_buffer_addr; + + /* do a copy of the ctrlmode[_supported] too */ + dev->can.ctrlmode = ppdev->dev.can.ctrlmode; + dev->can.ctrlmode_supported = ppdev->dev.can.ctrlmode_supported; } pdev->usb_if->dev[dev->ctrl_idx] = dev; @@ -933,9 +964,9 @@ static void pcan_usb_fd_exit(struct peak_usb_device *dev) if (dev->ctrl_idx == 0) { /* turn off calibration message if any device were opened */ if (pdev->usb_if->dev_opened_count > 0) - pcan_usb_fd_set_filter_ext(dev, 0, - PUCAN_FLTEXT_ERROR, - PCAN_UFD_FLTEXT_CALIBRATION); + pcan_usb_fd_set_options(dev, 0, + PUCAN_OPTION_ERROR, + PCAN_UFD_FLTEXT_CALIBRATION); /* tell USB adapter that the driver is being unloaded */ pcan_usb_fd_drv_loaded(dev, 0); diff --git a/drivers/net/dsa/bcm_sf2.h b/drivers/net/dsa/bcm_sf2.h index ee9f650d5026..7b7053d3c5fa 100644 --- a/drivers/net/dsa/bcm_sf2.h +++ b/drivers/net/dsa/bcm_sf2.h @@ -105,8 +105,8 @@ static inline u64 name##_readq(struct bcm_sf2_priv *priv, u32 off) \ { \ u32 indir, dir; \ spin_lock(&priv->indir_lock); \ - indir = reg_readl(priv, REG_DIR_DATA_READ); \ dir = __raw_readl(priv->name + off); \ + indir = reg_readl(priv, REG_DIR_DATA_READ); \ spin_unlock(&priv->indir_lock); \ return (u64)indir << 32 | dir; \ } \ diff --git a/drivers/net/ethernet/8390/axnet_cs.c b/drivers/net/ethernet/8390/axnet_cs.c index 7769c05543f1..ec6eac1f8c95 100644 --- a/drivers/net/ethernet/8390/axnet_cs.c +++ b/drivers/net/ethernet/8390/axnet_cs.c @@ -484,11 +484,8 @@ static int axnet_open(struct net_device *dev) link->open++; info->link_status = 0x00; - init_timer(&info->watchdog); - info->watchdog.function = ei_watchdog; - info->watchdog.data = (u_long)dev; - info->watchdog.expires = jiffies + HZ; - add_timer(&info->watchdog); + setup_timer(&info->watchdog, ei_watchdog, (u_long)dev); + mod_timer(&info->watchdog, jiffies + HZ); return ax_open(dev); } /* axnet_open */ diff --git a/drivers/net/ethernet/8390/pcnet_cs.c b/drivers/net/ethernet/8390/pcnet_cs.c index 9fb7b9d4fd6c..2777289a26c0 100644 --- a/drivers/net/ethernet/8390/pcnet_cs.c +++ b/drivers/net/ethernet/8390/pcnet_cs.c @@ -918,11 +918,8 @@ static int pcnet_open(struct net_device *dev) info->phy_id = info->eth_phy; info->link_status = 0x00; - init_timer(&info->watchdog); - info->watchdog.function = ei_watchdog; - info->watchdog.data = (u_long)dev; - info->watchdog.expires = jiffies + HZ; - add_timer(&info->watchdog); + setup_timer(&info->watchdog, ei_watchdog, (u_long)dev); + mod_timer(&info->watchdog, jiffies + HZ); return ei_open(dev); } /* pcnet_open */ diff --git a/drivers/net/ethernet/altera/altera_tse_main.c b/drivers/net/ethernet/altera/altera_tse_main.c index 760c72c6e2ac..6725dc00750b 100644 --- a/drivers/net/ethernet/altera/altera_tse_main.c +++ b/drivers/net/ethernet/altera/altera_tse_main.c @@ -376,7 +376,8 @@ static int tse_rx(struct altera_tse_private *priv, int limit) u16 pktlength; u16 pktstatus; - while ((rxstatus = priv->dmaops->get_rx_status(priv)) != 0) { + while (((rxstatus = priv->dmaops->get_rx_status(priv)) != 0) && + (count < limit)) { pktstatus = rxstatus >> 16; pktlength = rxstatus & 0xffff; @@ -491,28 +492,27 @@ static int tse_poll(struct napi_struct *napi, int budget) struct altera_tse_private *priv = container_of(napi, struct altera_tse_private, napi); int rxcomplete = 0; - int txcomplete = 0; unsigned long int flags; - txcomplete = tse_tx_complete(priv); + tse_tx_complete(priv); rxcomplete = tse_rx(priv, budget); - if (rxcomplete >= budget || txcomplete > 0) - return rxcomplete; + if (rxcomplete < budget) { - napi_gro_flush(napi, false); - __napi_complete(napi); + napi_gro_flush(napi, false); + __napi_complete(napi); - netdev_dbg(priv->dev, - "NAPI Complete, did %d packets with budget %d\n", - txcomplete+rxcomplete, budget); + netdev_dbg(priv->dev, + "NAPI Complete, did %d packets with budget %d\n", + rxcomplete, budget); - spin_lock_irqsave(&priv->rxdma_irq_lock, flags); - priv->dmaops->enable_rxirq(priv); - priv->dmaops->enable_txirq(priv); - spin_unlock_irqrestore(&priv->rxdma_irq_lock, flags); - return rxcomplete + txcomplete; + spin_lock_irqsave(&priv->rxdma_irq_lock, flags); + priv->dmaops->enable_rxirq(priv); + priv->dmaops->enable_txirq(priv); + spin_unlock_irqrestore(&priv->rxdma_irq_lock, flags); + } + return rxcomplete; } /* DMA TX & RX FIFO interrupt routing @@ -521,7 +521,6 @@ static irqreturn_t altera_isr(int irq, void *dev_id) { struct net_device *dev = dev_id; struct altera_tse_private *priv; - unsigned long int flags; if (unlikely(!dev)) { pr_err("%s: invalid dev pointer\n", __func__); @@ -529,20 +528,20 @@ static irqreturn_t altera_isr(int irq, void *dev_id) } priv = netdev_priv(dev); - /* turn off desc irqs and enable napi rx */ - spin_lock_irqsave(&priv->rxdma_irq_lock, flags); + spin_lock(&priv->rxdma_irq_lock); + /* reset IRQs */ + priv->dmaops->clear_rxirq(priv); + priv->dmaops->clear_txirq(priv); + spin_unlock(&priv->rxdma_irq_lock); if (likely(napi_schedule_prep(&priv->napi))) { + spin_lock(&priv->rxdma_irq_lock); priv->dmaops->disable_rxirq(priv); priv->dmaops->disable_txirq(priv); + spin_unlock(&priv->rxdma_irq_lock); __napi_schedule(&priv->napi); } - /* reset IRQs */ - priv->dmaops->clear_rxirq(priv); - priv->dmaops->clear_txirq(priv); - - spin_unlock_irqrestore(&priv->rxdma_irq_lock, flags); return IRQ_HANDLED; } @@ -1399,7 +1398,7 @@ static int altera_tse_probe(struct platform_device *pdev) } if (of_property_read_u32(pdev->dev.of_node, "tx-fifo-depth", - &priv->rx_fifo_depth)) { + &priv->tx_fifo_depth)) { dev_err(&pdev->dev, "cannot obtain tx-fifo-depth\n"); ret = -ENXIO; goto err_free_netdev; diff --git a/drivers/net/ethernet/amd/pcnet32.c b/drivers/net/ethernet/amd/pcnet32.c index 11d6e6561df1..15a8190a6f75 100644 --- a/drivers/net/ethernet/amd/pcnet32.c +++ b/drivers/net/ethernet/amd/pcnet32.c @@ -1543,7 +1543,7 @@ pcnet32_probe1(unsigned long ioaddr, int shared, struct pci_dev *pdev) { struct pcnet32_private *lp; int i, media; - int fdx, mii, fset, dxsuflo; + int fdx, mii, fset, dxsuflo, sram; int chip_version; char *chipname; struct net_device *dev; @@ -1580,7 +1580,7 @@ pcnet32_probe1(unsigned long ioaddr, int shared, struct pci_dev *pdev) } /* initialize variables */ - fdx = mii = fset = dxsuflo = 0; + fdx = mii = fset = dxsuflo = sram = 0; chip_version = (chip_version >> 12) & 0xffff; switch (chip_version) { @@ -1613,6 +1613,7 @@ pcnet32_probe1(unsigned long ioaddr, int shared, struct pci_dev *pdev) chipname = "PCnet/FAST III 79C973"; /* PCI */ fdx = 1; mii = 1; + sram = 1; break; case 0x2626: chipname = "PCnet/Home 79C978"; /* PCI */ @@ -1636,6 +1637,7 @@ pcnet32_probe1(unsigned long ioaddr, int shared, struct pci_dev *pdev) chipname = "PCnet/FAST III 79C975"; /* PCI */ fdx = 1; mii = 1; + sram = 1; break; case 0x2628: chipname = "PCnet/PRO 79C976"; @@ -1664,6 +1666,31 @@ pcnet32_probe1(unsigned long ioaddr, int shared, struct pci_dev *pdev) dxsuflo = 1; } + /* + * The Am79C973/Am79C975 controllers come with 12K of SRAM + * which we can use for the Tx/Rx buffers but most importantly, + * the use of SRAM allow us to use the BCR18:NOUFLO bit to avoid + * Tx fifo underflows. + */ + if (sram) { + /* + * The SRAM is being configured in two steps. First we + * set the SRAM size in the BCR25:SRAM_SIZE bits. According + * to the datasheet, each bit corresponds to a 512-byte + * page so we can have at most 24 pages. The SRAM_SIZE + * holds the value of the upper 8 bits of the 16-bit SRAM size. + * The low 8-bits start at 0x00 and end at 0xff. So the + * address range is from 0x0000 up to 0x17ff. Therefore, + * the SRAM_SIZE is set to 0x17. The next step is to set + * the BCR26:SRAM_BND midway through so the Tx and Rx + * buffers can share the SRAM equally. + */ + a->write_bcr(ioaddr, 25, 0x17); + a->write_bcr(ioaddr, 26, 0xc); + /* And finally enable the NOUFLO bit */ + a->write_bcr(ioaddr, 18, a->read_bcr(ioaddr, 18) | (1 << 11)); + } + dev = alloc_etherdev(sizeof(*lp)); if (!dev) { ret = -ENOMEM; diff --git a/drivers/net/ethernet/amd/xgbe/xgbe-drv.c b/drivers/net/ethernet/amd/xgbe/xgbe-drv.c index b93d4404d975..885b02b5be07 100644 --- a/drivers/net/ethernet/amd/xgbe/xgbe-drv.c +++ b/drivers/net/ethernet/amd/xgbe/xgbe-drv.c @@ -609,6 +609,68 @@ static void xgbe_napi_disable(struct xgbe_prv_data *pdata, unsigned int del) } } +static int xgbe_request_irqs(struct xgbe_prv_data *pdata) +{ + struct xgbe_channel *channel; + struct net_device *netdev = pdata->netdev; + unsigned int i; + int ret; + + ret = devm_request_irq(pdata->dev, pdata->dev_irq, xgbe_isr, 0, + netdev->name, pdata); + if (ret) { + netdev_alert(netdev, "error requesting irq %d\n", + pdata->dev_irq); + return ret; + } + + if (!pdata->per_channel_irq) + return 0; + + channel = pdata->channel; + for (i = 0; i < pdata->channel_count; i++, channel++) { + snprintf(channel->dma_irq_name, + sizeof(channel->dma_irq_name) - 1, + "%s-TxRx-%u", netdev_name(netdev), + channel->queue_index); + + ret = devm_request_irq(pdata->dev, channel->dma_irq, + xgbe_dma_isr, 0, + channel->dma_irq_name, channel); + if (ret) { + netdev_alert(netdev, "error requesting irq %d\n", + channel->dma_irq); + goto err_irq; + } + } + + return 0; + +err_irq: + /* Using an unsigned int, 'i' will go to UINT_MAX and exit */ + for (i--, channel--; i < pdata->channel_count; i--, channel--) + devm_free_irq(pdata->dev, channel->dma_irq, channel); + + devm_free_irq(pdata->dev, pdata->dev_irq, pdata); + + return ret; +} + +static void xgbe_free_irqs(struct xgbe_prv_data *pdata) +{ + struct xgbe_channel *channel; + unsigned int i; + + devm_free_irq(pdata->dev, pdata->dev_irq, pdata); + + if (!pdata->per_channel_irq) + return; + + channel = pdata->channel; + for (i = 0; i < pdata->channel_count; i++, channel++) + devm_free_irq(pdata->dev, channel->dma_irq, channel); +} + void xgbe_init_tx_coalesce(struct xgbe_prv_data *pdata) { struct xgbe_hw_if *hw_if = &pdata->hw_if; @@ -810,20 +872,20 @@ int xgbe_powerdown(struct net_device *netdev, unsigned int caller) return -EINVAL; } - phy_stop(pdata->phydev); - spin_lock_irqsave(&pdata->lock, flags); if (caller == XGMAC_DRIVER_CONTEXT) netif_device_detach(netdev); netif_tx_stop_all_queues(netdev); - xgbe_napi_disable(pdata, 0); - /* Powerdown Tx/Rx */ hw_if->powerdown_tx(pdata); hw_if->powerdown_rx(pdata); + xgbe_napi_disable(pdata, 0); + + phy_stop(pdata->phydev); + pdata->power_down = 1; spin_unlock_irqrestore(&pdata->lock, flags); @@ -854,14 +916,14 @@ int xgbe_powerup(struct net_device *netdev, unsigned int caller) phy_start(pdata->phydev); - /* Enable Tx/Rx */ + xgbe_napi_enable(pdata, 0); + hw_if->powerup_tx(pdata); hw_if->powerup_rx(pdata); if (caller == XGMAC_DRIVER_CONTEXT) netif_device_attach(netdev); - xgbe_napi_enable(pdata, 0); netif_tx_start_all_queues(netdev); spin_unlock_irqrestore(&pdata->lock, flags); @@ -875,6 +937,7 @@ static int xgbe_start(struct xgbe_prv_data *pdata) { struct xgbe_hw_if *hw_if = &pdata->hw_if; struct net_device *netdev = pdata->netdev; + int ret; DBGPR("-->xgbe_start\n"); @@ -884,17 +947,31 @@ static int xgbe_start(struct xgbe_prv_data *pdata) phy_start(pdata->phydev); + xgbe_napi_enable(pdata, 1); + + ret = xgbe_request_irqs(pdata); + if (ret) + goto err_napi; + hw_if->enable_tx(pdata); hw_if->enable_rx(pdata); xgbe_init_tx_timers(pdata); - xgbe_napi_enable(pdata, 1); netif_tx_start_all_queues(netdev); DBGPR("<--xgbe_start\n"); return 0; + +err_napi: + xgbe_napi_disable(pdata, 1); + + phy_stop(pdata->phydev); + + hw_if->exit(pdata); + + return ret; } static void xgbe_stop(struct xgbe_prv_data *pdata) @@ -907,16 +984,21 @@ static void xgbe_stop(struct xgbe_prv_data *pdata) DBGPR("-->xgbe_stop\n"); - phy_stop(pdata->phydev); - netif_tx_stop_all_queues(netdev); - xgbe_napi_disable(pdata, 1); xgbe_stop_tx_timers(pdata); hw_if->disable_tx(pdata); hw_if->disable_rx(pdata); + xgbe_free_irqs(pdata); + + xgbe_napi_disable(pdata, 1); + + phy_stop(pdata->phydev); + + hw_if->exit(pdata); + channel = pdata->channel; for (i = 0; i < pdata->channel_count; i++, channel++) { if (!channel->tx_ring) @@ -931,10 +1013,6 @@ static void xgbe_stop(struct xgbe_prv_data *pdata) static void xgbe_restart_dev(struct xgbe_prv_data *pdata) { - struct xgbe_channel *channel; - struct xgbe_hw_if *hw_if = &pdata->hw_if; - unsigned int i; - DBGPR("-->xgbe_restart_dev\n"); /* If not running, "restart" will happen on open */ @@ -942,19 +1020,10 @@ static void xgbe_restart_dev(struct xgbe_prv_data *pdata) return; xgbe_stop(pdata); - synchronize_irq(pdata->dev_irq); - if (pdata->per_channel_irq) { - channel = pdata->channel; - for (i = 0; i < pdata->channel_count; i++, channel++) - synchronize_irq(channel->dma_irq); - } xgbe_free_tx_data(pdata); xgbe_free_rx_data(pdata); - /* Issue software reset to device */ - hw_if->exit(pdata); - xgbe_start(pdata); DBGPR("<--xgbe_restart_dev\n"); @@ -1283,10 +1352,7 @@ static void xgbe_packet_info(struct xgbe_prv_data *pdata, static int xgbe_open(struct net_device *netdev) { struct xgbe_prv_data *pdata = netdev_priv(netdev); - struct xgbe_hw_if *hw_if = &pdata->hw_if; struct xgbe_desc_if *desc_if = &pdata->desc_if; - struct xgbe_channel *channel = NULL; - unsigned int i = 0; int ret; DBGPR("-->xgbe_open\n"); @@ -1329,55 +1395,14 @@ static int xgbe_open(struct net_device *netdev) INIT_WORK(&pdata->restart_work, xgbe_restart); INIT_WORK(&pdata->tx_tstamp_work, xgbe_tx_tstamp); - /* Request interrupts */ - ret = devm_request_irq(pdata->dev, pdata->dev_irq, xgbe_isr, 0, - netdev->name, pdata); - if (ret) { - netdev_alert(netdev, "error requesting irq %d\n", - pdata->dev_irq); - goto err_rings; - } - - if (pdata->per_channel_irq) { - channel = pdata->channel; - for (i = 0; i < pdata->channel_count; i++, channel++) { - snprintf(channel->dma_irq_name, - sizeof(channel->dma_irq_name) - 1, - "%s-TxRx-%u", netdev_name(netdev), - channel->queue_index); - - ret = devm_request_irq(pdata->dev, channel->dma_irq, - xgbe_dma_isr, 0, - channel->dma_irq_name, channel); - if (ret) { - netdev_alert(netdev, - "error requesting irq %d\n", - channel->dma_irq); - goto err_irq; - } - } - } - ret = xgbe_start(pdata); if (ret) - goto err_start; + goto err_rings; DBGPR("<--xgbe_open\n"); return 0; -err_start: - hw_if->exit(pdata); - -err_irq: - if (pdata->per_channel_irq) { - /* Using an unsigned int, 'i' will go to UINT_MAX and exit */ - for (i--, channel--; i < pdata->channel_count; i--, channel--) - devm_free_irq(pdata->dev, channel->dma_irq, channel); - } - - devm_free_irq(pdata->dev, pdata->dev_irq, pdata); - err_rings: desc_if->free_ring_resources(pdata); @@ -1399,30 +1424,16 @@ err_phy_init: static int xgbe_close(struct net_device *netdev) { struct xgbe_prv_data *pdata = netdev_priv(netdev); - struct xgbe_hw_if *hw_if = &pdata->hw_if; struct xgbe_desc_if *desc_if = &pdata->desc_if; - struct xgbe_channel *channel; - unsigned int i; DBGPR("-->xgbe_close\n"); /* Stop the device */ xgbe_stop(pdata); - /* Issue software reset to device */ - hw_if->exit(pdata); - /* Free the ring descriptors and buffers */ desc_if->free_ring_resources(pdata); - /* Release the interrupts */ - devm_free_irq(pdata->dev, pdata->dev_irq, pdata); - if (pdata->per_channel_irq) { - channel = pdata->channel; - for (i = 0; i < pdata->channel_count; i++, channel++) - devm_free_irq(pdata->dev, channel->dma_irq, channel); - } - /* Free the channel and ring structures */ xgbe_free_channels(pdata); diff --git a/drivers/net/ethernet/apm/xgene/xgene_enet_hw.c b/drivers/net/ethernet/apm/xgene/xgene_enet_hw.c index 869d97fcf781..b927021c6c40 100644 --- a/drivers/net/ethernet/apm/xgene/xgene_enet_hw.c +++ b/drivers/net/ethernet/apm/xgene/xgene_enet_hw.c @@ -593,7 +593,7 @@ static int xgene_enet_reset(struct xgene_enet_pdata *pdata) if (!xgene_ring_mgr_init(pdata)) return -ENODEV; - if (!efi_enabled(EFI_BOOT)) { + if (pdata->clk) { clk_prepare_enable(pdata->clk); clk_disable_unprepare(pdata->clk); clk_prepare_enable(pdata->clk); diff --git a/drivers/net/ethernet/apm/xgene/xgene_enet_main.c b/drivers/net/ethernet/apm/xgene/xgene_enet_main.c index 4de62b210c85..635a83be7e5e 100644 --- a/drivers/net/ethernet/apm/xgene/xgene_enet_main.c +++ b/drivers/net/ethernet/apm/xgene/xgene_enet_main.c @@ -1025,6 +1025,8 @@ static int xgene_enet_remove(struct platform_device *pdev) #ifdef CONFIG_ACPI static const struct acpi_device_id xgene_enet_acpi_match[] = { { "APMC0D05", }, + { "APMC0D30", }, + { "APMC0D31", }, { } }; MODULE_DEVICE_TABLE(acpi, xgene_enet_acpi_match); @@ -1033,6 +1035,8 @@ MODULE_DEVICE_TABLE(acpi, xgene_enet_acpi_match); #ifdef CONFIG_OF static struct of_device_id xgene_enet_of_match[] = { {.compatible = "apm,xgene-enet",}, + {.compatible = "apm,xgene1-sgenet",}, + {.compatible = "apm,xgene1-xgenet",}, {}, }; diff --git a/drivers/net/ethernet/broadcom/bcm63xx_enet.c b/drivers/net/ethernet/broadcom/bcm63xx_enet.c index 21206d33b638..a7f2cc3e485e 100644 --- a/drivers/net/ethernet/broadcom/bcm63xx_enet.c +++ b/drivers/net/ethernet/broadcom/bcm63xx_enet.c @@ -486,7 +486,7 @@ static int bcm_enet_poll(struct napi_struct *napi, int budget) { struct bcm_enet_priv *priv; struct net_device *dev; - int tx_work_done, rx_work_done; + int rx_work_done; priv = container_of(napi, struct bcm_enet_priv, napi); dev = priv->net_dev; @@ -498,14 +498,14 @@ static int bcm_enet_poll(struct napi_struct *napi, int budget) ENETDMAC_IR, priv->tx_chan); /* reclaim sent skb */ - tx_work_done = bcm_enet_tx_reclaim(dev, 0); + bcm_enet_tx_reclaim(dev, 0); spin_lock(&priv->rx_lock); rx_work_done = bcm_enet_receive_queue(dev, budget); spin_unlock(&priv->rx_lock); - if (rx_work_done >= budget || tx_work_done > 0) { - /* rx/tx queue is not yet empty/clean */ + if (rx_work_done >= budget) { + /* rx queue is not yet empty/clean */ return rx_work_done; } diff --git a/drivers/net/ethernet/broadcom/bcmsysport.c b/drivers/net/ethernet/broadcom/bcmsysport.c index 5b308a4a4d0e..783543ad1fcf 100644 --- a/drivers/net/ethernet/broadcom/bcmsysport.c +++ b/drivers/net/ethernet/broadcom/bcmsysport.c @@ -274,9 +274,9 @@ static const struct bcm_sysport_stats bcm_sysport_gstrings_stats[] = { /* RBUF misc statistics */ STAT_RBUF("rbuf_ovflow_cnt", mib.rbuf_ovflow_cnt, RBUF_OVFL_DISC_CNTR), STAT_RBUF("rbuf_err_cnt", mib.rbuf_err_cnt, RBUF_ERR_PKT_CNTR), - STAT_MIB_RX("alloc_rx_buff_failed", mib.alloc_rx_buff_failed), - STAT_MIB_RX("rx_dma_failed", mib.rx_dma_failed), - STAT_MIB_TX("tx_dma_failed", mib.tx_dma_failed), + STAT_MIB_SOFT("alloc_rx_buff_failed", mib.alloc_rx_buff_failed), + STAT_MIB_SOFT("rx_dma_failed", mib.rx_dma_failed), + STAT_MIB_SOFT("tx_dma_failed", mib.tx_dma_failed), }; #define BCM_SYSPORT_STATS_LEN ARRAY_SIZE(bcm_sysport_gstrings_stats) @@ -345,6 +345,7 @@ static void bcm_sysport_update_mib_counters(struct bcm_sysport_priv *priv) s = &bcm_sysport_gstrings_stats[i]; switch (s->type) { case BCM_SYSPORT_STAT_NETDEV: + case BCM_SYSPORT_STAT_SOFT: continue; case BCM_SYSPORT_STAT_MIB_RX: case BCM_SYSPORT_STAT_MIB_TX: diff --git a/drivers/net/ethernet/broadcom/bcmsysport.h b/drivers/net/ethernet/broadcom/bcmsysport.h index fc19417d82a5..7e3d87a88c76 100644 --- a/drivers/net/ethernet/broadcom/bcmsysport.h +++ b/drivers/net/ethernet/broadcom/bcmsysport.h @@ -570,6 +570,7 @@ enum bcm_sysport_stat_type { BCM_SYSPORT_STAT_RUNT, BCM_SYSPORT_STAT_RXCHK, BCM_SYSPORT_STAT_RBUF, + BCM_SYSPORT_STAT_SOFT, }; /* Macros to help define ethtool statistics */ @@ -590,6 +591,7 @@ enum bcm_sysport_stat_type { #define STAT_MIB_RX(str, m) STAT_MIB(str, m, BCM_SYSPORT_STAT_MIB_RX) #define STAT_MIB_TX(str, m) STAT_MIB(str, m, BCM_SYSPORT_STAT_MIB_TX) #define STAT_RUNT(str, m) STAT_MIB(str, m, BCM_SYSPORT_STAT_RUNT) +#define STAT_MIB_SOFT(str, m) STAT_MIB(str, m, BCM_SYSPORT_STAT_SOFT) #define STAT_RXCHK(str, m, ofs) { \ .stat_string = str, \ diff --git a/drivers/net/ethernet/broadcom/bgmac.c b/drivers/net/ethernet/broadcom/bgmac.c index 676ffe093180..0469f72c6e7e 100644 --- a/drivers/net/ethernet/broadcom/bgmac.c +++ b/drivers/net/ethernet/broadcom/bgmac.c @@ -302,9 +302,6 @@ static int bgmac_dma_rx_skb_for_slot(struct bgmac *bgmac, slot->skb = skb; slot->dma_addr = dma_addr; - if (slot->dma_addr & 0xC0000000) - bgmac_warn(bgmac, "DMA address using 0xC0000000 bit(s), it may need translation trick\n"); - return 0; } @@ -505,8 +502,6 @@ static int bgmac_dma_alloc(struct bgmac *bgmac) ring->mmio_base); goto err_dma_free; } - if (ring->dma_base & 0xC0000000) - bgmac_warn(bgmac, "DMA address using 0xC0000000 bit(s), it may need translation trick\n"); ring->unaligned = bgmac_dma_unaligned(bgmac, ring, BGMAC_DMA_RING_TX); @@ -536,8 +531,6 @@ static int bgmac_dma_alloc(struct bgmac *bgmac) err = -ENOMEM; goto err_dma_free; } - if (ring->dma_base & 0xC0000000) - bgmac_warn(bgmac, "DMA address using 0xC0000000 bit(s), it may need translation trick\n"); ring->unaligned = bgmac_dma_unaligned(bgmac, ring, BGMAC_DMA_RING_RX); diff --git a/drivers/net/ethernet/broadcom/bnx2x/bnx2x.h b/drivers/net/ethernet/broadcom/bnx2x/bnx2x.h index 756053c028be..4085c4b31047 100644 --- a/drivers/net/ethernet/broadcom/bnx2x/bnx2x.h +++ b/drivers/net/ethernet/broadcom/bnx2x/bnx2x.h @@ -1811,7 +1811,7 @@ struct bnx2x { int stats_state; /* used for synchronization of concurrent threads statistics handling */ - spinlock_t stats_lock; + struct mutex stats_lock; /* used by dmae command loader */ struct dmae_command stats_dmae; @@ -1935,8 +1935,6 @@ struct bnx2x { int fp_array_size; u32 dump_preset_idx; - bool stats_started; - struct semaphore stats_sema; u8 phys_port_id[ETH_ALEN]; diff --git a/drivers/net/ethernet/broadcom/bnx2x/bnx2x_main.c b/drivers/net/ethernet/broadcom/bnx2x/bnx2x_main.c index 7155e1d2c208..1ec635f54994 100644 --- a/drivers/net/ethernet/broadcom/bnx2x/bnx2x_main.c +++ b/drivers/net/ethernet/broadcom/bnx2x/bnx2x_main.c @@ -129,8 +129,8 @@ struct bnx2x_mac_vals { u32 xmac_val; u32 emac_addr; u32 emac_val; - u32 umac_addr; - u32 umac_val; + u32 umac_addr[2]; + u32 umac_val[2]; u32 bmac_addr; u32 bmac_val[2]; }; @@ -7866,6 +7866,20 @@ int bnx2x_init_hw_func_cnic(struct bnx2x *bp) return 0; } +/* previous driver DMAE transaction may have occurred when pre-boot stage ended + * and boot began, or when kdump kernel was loaded. Either case would invalidate + * the addresses of the transaction, resulting in was-error bit set in the pci + * causing all hw-to-host pcie transactions to timeout. If this happened we want + * to clear the interrupt which detected this from the pglueb and the was done + * bit + */ +static void bnx2x_clean_pglue_errors(struct bnx2x *bp) +{ + if (!CHIP_IS_E1x(bp)) + REG_WR(bp, PGLUE_B_REG_WAS_ERROR_PF_7_0_CLR, + 1 << BP_ABS_FUNC(bp)); +} + static int bnx2x_init_hw_func(struct bnx2x *bp) { int port = BP_PORT(bp); @@ -7958,8 +7972,7 @@ static int bnx2x_init_hw_func(struct bnx2x *bp) bnx2x_init_block(bp, BLOCK_PGLUE_B, init_phase); - if (!CHIP_IS_E1x(bp)) - REG_WR(bp, PGLUE_B_REG_WAS_ERROR_PF_7_0_CLR, func); + bnx2x_clean_pglue_errors(bp); bnx2x_init_block(bp, BLOCK_ATC, init_phase); bnx2x_init_block(bp, BLOCK_DMAE, init_phase); @@ -10141,6 +10154,25 @@ static u32 bnx2x_get_pretend_reg(struct bnx2x *bp) return base + (BP_ABS_FUNC(bp)) * stride; } +static bool bnx2x_prev_unload_close_umac(struct bnx2x *bp, + u8 port, u32 reset_reg, + struct bnx2x_mac_vals *vals) +{ + u32 mask = MISC_REGISTERS_RESET_REG_2_UMAC0 << port; + u32 base_addr; + + if (!(mask & reset_reg)) + return false; + + BNX2X_DEV_INFO("Disable umac Rx %02x\n", port); + base_addr = port ? GRCBASE_UMAC1 : GRCBASE_UMAC0; + vals->umac_addr[port] = base_addr + UMAC_REG_COMMAND_CONFIG; + vals->umac_val[port] = REG_RD(bp, vals->umac_addr[port]); + REG_WR(bp, vals->umac_addr[port], 0); + + return true; +} + static void bnx2x_prev_unload_close_mac(struct bnx2x *bp, struct bnx2x_mac_vals *vals) { @@ -10149,10 +10181,7 @@ static void bnx2x_prev_unload_close_mac(struct bnx2x *bp, u8 port = BP_PORT(bp); /* reset addresses as they also mark which values were changed */ - vals->bmac_addr = 0; - vals->umac_addr = 0; - vals->xmac_addr = 0; - vals->emac_addr = 0; + memset(vals, 0, sizeof(*vals)); reset_reg = REG_RD(bp, MISC_REG_RESET_REG_2); @@ -10201,15 +10230,11 @@ static void bnx2x_prev_unload_close_mac(struct bnx2x *bp, REG_WR(bp, vals->xmac_addr, 0); mac_stopped = true; } - mask = MISC_REGISTERS_RESET_REG_2_UMAC0 << port; - if (mask & reset_reg) { - BNX2X_DEV_INFO("Disable umac Rx\n"); - base_addr = BP_PORT(bp) ? GRCBASE_UMAC1 : GRCBASE_UMAC0; - vals->umac_addr = base_addr + UMAC_REG_COMMAND_CONFIG; - vals->umac_val = REG_RD(bp, vals->umac_addr); - REG_WR(bp, vals->umac_addr, 0); - mac_stopped = true; - } + + mac_stopped |= bnx2x_prev_unload_close_umac(bp, 0, + reset_reg, vals); + mac_stopped |= bnx2x_prev_unload_close_umac(bp, 1, + reset_reg, vals); } if (mac_stopped) @@ -10505,8 +10530,11 @@ static int bnx2x_prev_unload_common(struct bnx2x *bp) /* Close the MAC Rx to prevent BRB from filling up */ bnx2x_prev_unload_close_mac(bp, &mac_vals); - /* close LLH filters towards the BRB */ + /* close LLH filters for both ports towards the BRB */ + bnx2x_set_rx_filter(&bp->link_params, 0); + bp->link_params.port ^= 1; bnx2x_set_rx_filter(&bp->link_params, 0); + bp->link_params.port ^= 1; /* Check if the UNDI driver was previously loaded */ if (bnx2x_prev_is_after_undi(bp)) { @@ -10553,8 +10581,10 @@ static int bnx2x_prev_unload_common(struct bnx2x *bp) if (mac_vals.xmac_addr) REG_WR(bp, mac_vals.xmac_addr, mac_vals.xmac_val); - if (mac_vals.umac_addr) - REG_WR(bp, mac_vals.umac_addr, mac_vals.umac_val); + if (mac_vals.umac_addr[0]) + REG_WR(bp, mac_vals.umac_addr[0], mac_vals.umac_val[0]); + if (mac_vals.umac_addr[1]) + REG_WR(bp, mac_vals.umac_addr[1], mac_vals.umac_val[1]); if (mac_vals.emac_addr) REG_WR(bp, mac_vals.emac_addr, mac_vals.emac_val); if (mac_vals.bmac_addr) { @@ -10571,26 +10601,6 @@ static int bnx2x_prev_unload_common(struct bnx2x *bp) return bnx2x_prev_mcp_done(bp); } -/* previous driver DMAE transaction may have occurred when pre-boot stage ended - * and boot began, or when kdump kernel was loaded. Either case would invalidate - * the addresses of the transaction, resulting in was-error bit set in the pci - * causing all hw-to-host pcie transactions to timeout. If this happened we want - * to clear the interrupt which detected this from the pglueb and the was done - * bit - */ -static void bnx2x_prev_interrupted_dmae(struct bnx2x *bp) -{ - if (!CHIP_IS_E1x(bp)) { - u32 val = REG_RD(bp, PGLUE_B_REG_PGLUE_B_INT_STS); - if (val & PGLUE_B_PGLUE_B_INT_STS_REG_WAS_ERROR_ATTN) { - DP(BNX2X_MSG_SP, - "'was error' bit was found to be set in pglueb upon startup. Clearing\n"); - REG_WR(bp, PGLUE_B_REG_WAS_ERROR_PF_7_0_CLR, - 1 << BP_FUNC(bp)); - } - } -} - static int bnx2x_prev_unload(struct bnx2x *bp) { int time_counter = 10; @@ -10600,7 +10610,7 @@ static int bnx2x_prev_unload(struct bnx2x *bp) /* clear hw from errors which may have resulted from an interrupted * dmae transaction. */ - bnx2x_prev_interrupted_dmae(bp); + bnx2x_clean_pglue_errors(bp); /* Release previously held locks */ hw_lock_reg = (BP_FUNC(bp) <= 5) ? @@ -12037,9 +12047,8 @@ static int bnx2x_init_bp(struct bnx2x *bp) mutex_init(&bp->port.phy_mutex); mutex_init(&bp->fw_mb_mutex); mutex_init(&bp->drv_info_mutex); + mutex_init(&bp->stats_lock); bp->drv_info_mng_owner = false; - spin_lock_init(&bp->stats_lock); - sema_init(&bp->stats_sema, 1); INIT_DELAYED_WORK(&bp->sp_task, bnx2x_sp_task); INIT_DELAYED_WORK(&bp->sp_rtnl_task, bnx2x_sp_rtnl_task); @@ -12722,6 +12731,9 @@ static int bnx2x_init_dev(struct bnx2x *bp, struct pci_dev *pdev, pci_write_config_dword(bp->pdev, PCICFG_GRC_ADDRESS, PCICFG_VENDOR_ID_OFFSET); + /* Set PCIe reset type to fundamental for EEH recovery */ + pdev->needs_freset = 1; + /* AER (Advanced Error reporting) configuration */ rc = pci_enable_pcie_error_reporting(pdev); if (!rc) @@ -12766,7 +12778,7 @@ static int bnx2x_init_dev(struct bnx2x *bp, struct pci_dev *pdev, NETIF_F_TSO | NETIF_F_TSO_ECN | NETIF_F_TSO6 | NETIF_F_RXCSUM | NETIF_F_LRO | NETIF_F_GRO | NETIF_F_RXHASH | NETIF_F_HW_VLAN_CTAG_TX; - if (!CHIP_IS_E1x(bp)) { + if (!chip_is_e1x) { dev->hw_features |= NETIF_F_GSO_GRE | NETIF_F_GSO_UDP_TUNNEL | NETIF_F_GSO_IPIP | NETIF_F_GSO_SIT; dev->hw_enc_features = @@ -13665,9 +13677,9 @@ static int bnx2x_eeh_nic_unload(struct bnx2x *bp) cancel_delayed_work_sync(&bp->sp_task); cancel_delayed_work_sync(&bp->period_task); - spin_lock_bh(&bp->stats_lock); + mutex_lock(&bp->stats_lock); bp->stats_state = STATS_STATE_DISABLED; - spin_unlock_bh(&bp->stats_lock); + mutex_unlock(&bp->stats_lock); bnx2x_save_statistics(bp); diff --git a/drivers/net/ethernet/broadcom/bnx2x/bnx2x_sriov.c b/drivers/net/ethernet/broadcom/bnx2x/bnx2x_sriov.c index e5aca2de1871..cfe3c7695455 100644 --- a/drivers/net/ethernet/broadcom/bnx2x/bnx2x_sriov.c +++ b/drivers/net/ethernet/broadcom/bnx2x/bnx2x_sriov.c @@ -2238,7 +2238,9 @@ int bnx2x_vf_close(struct bnx2x *bp, struct bnx2x_virtf *vf) cookie.vf = vf; cookie.state = VF_ACQUIRED; - bnx2x_stats_safe_exec(bp, bnx2x_set_vf_state, &cookie); + rc = bnx2x_stats_safe_exec(bp, bnx2x_set_vf_state, &cookie); + if (rc) + goto op_err; } DP(BNX2X_MSG_IOV, "set state to acquired\n"); diff --git a/drivers/net/ethernet/broadcom/bnx2x/bnx2x_stats.c b/drivers/net/ethernet/broadcom/bnx2x/bnx2x_stats.c index d1608297c773..800ab44a07ce 100644 --- a/drivers/net/ethernet/broadcom/bnx2x/bnx2x_stats.c +++ b/drivers/net/ethernet/broadcom/bnx2x/bnx2x_stats.c @@ -123,36 +123,28 @@ static void bnx2x_dp_stats(struct bnx2x *bp) */ static void bnx2x_storm_stats_post(struct bnx2x *bp) { - if (!bp->stats_pending) { - int rc; + int rc; - spin_lock_bh(&bp->stats_lock); - - if (bp->stats_pending) { - spin_unlock_bh(&bp->stats_lock); - return; - } - - bp->fw_stats_req->hdr.drv_stats_counter = - cpu_to_le16(bp->stats_counter++); + if (bp->stats_pending) + return; - DP(BNX2X_MSG_STATS, "Sending statistics ramrod %d\n", - le16_to_cpu(bp->fw_stats_req->hdr.drv_stats_counter)); + bp->fw_stats_req->hdr.drv_stats_counter = + cpu_to_le16(bp->stats_counter++); - /* adjust the ramrod to include VF queues statistics */ - bnx2x_iov_adjust_stats_req(bp); - bnx2x_dp_stats(bp); + DP(BNX2X_MSG_STATS, "Sending statistics ramrod %d\n", + le16_to_cpu(bp->fw_stats_req->hdr.drv_stats_counter)); - /* send FW stats ramrod */ - rc = bnx2x_sp_post(bp, RAMROD_CMD_ID_COMMON_STAT_QUERY, 0, - U64_HI(bp->fw_stats_req_mapping), - U64_LO(bp->fw_stats_req_mapping), - NONE_CONNECTION_TYPE); - if (rc == 0) - bp->stats_pending = 1; + /* adjust the ramrod to include VF queues statistics */ + bnx2x_iov_adjust_stats_req(bp); + bnx2x_dp_stats(bp); - spin_unlock_bh(&bp->stats_lock); - } + /* send FW stats ramrod */ + rc = bnx2x_sp_post(bp, RAMROD_CMD_ID_COMMON_STAT_QUERY, 0, + U64_HI(bp->fw_stats_req_mapping), + U64_LO(bp->fw_stats_req_mapping), + NONE_CONNECTION_TYPE); + if (rc == 0) + bp->stats_pending = 1; } static void bnx2x_hw_stats_post(struct bnx2x *bp) @@ -221,7 +213,7 @@ static void bnx2x_stats_comp(struct bnx2x *bp) */ /* should be called under stats_sema */ -static void __bnx2x_stats_pmf_update(struct bnx2x *bp) +static void bnx2x_stats_pmf_update(struct bnx2x *bp) { struct dmae_command *dmae; u32 opcode; @@ -519,7 +511,7 @@ static void bnx2x_func_stats_init(struct bnx2x *bp) } /* should be called under stats_sema */ -static void __bnx2x_stats_start(struct bnx2x *bp) +static void bnx2x_stats_start(struct bnx2x *bp) { if (IS_PF(bp)) { if (bp->port.pmf) @@ -531,34 +523,13 @@ static void __bnx2x_stats_start(struct bnx2x *bp) bnx2x_hw_stats_post(bp); bnx2x_storm_stats_post(bp); } - - bp->stats_started = true; -} - -static void bnx2x_stats_start(struct bnx2x *bp) -{ - if (down_timeout(&bp->stats_sema, HZ/10)) - BNX2X_ERR("Unable to acquire stats lock\n"); - __bnx2x_stats_start(bp); - up(&bp->stats_sema); } static void bnx2x_stats_pmf_start(struct bnx2x *bp) { - if (down_timeout(&bp->stats_sema, HZ/10)) - BNX2X_ERR("Unable to acquire stats lock\n"); bnx2x_stats_comp(bp); - __bnx2x_stats_pmf_update(bp); - __bnx2x_stats_start(bp); - up(&bp->stats_sema); -} - -static void bnx2x_stats_pmf_update(struct bnx2x *bp) -{ - if (down_timeout(&bp->stats_sema, HZ/10)) - BNX2X_ERR("Unable to acquire stats lock\n"); - __bnx2x_stats_pmf_update(bp); - up(&bp->stats_sema); + bnx2x_stats_pmf_update(bp); + bnx2x_stats_start(bp); } static void bnx2x_stats_restart(struct bnx2x *bp) @@ -568,11 +539,9 @@ static void bnx2x_stats_restart(struct bnx2x *bp) */ if (IS_VF(bp)) return; - if (down_timeout(&bp->stats_sema, HZ/10)) - BNX2X_ERR("Unable to acquire stats lock\n"); + bnx2x_stats_comp(bp); - __bnx2x_stats_start(bp); - up(&bp->stats_sema); + bnx2x_stats_start(bp); } static void bnx2x_bmac_stats_update(struct bnx2x *bp) @@ -1246,18 +1215,12 @@ static void bnx2x_stats_update(struct bnx2x *bp) { u32 *stats_comp = bnx2x_sp(bp, stats_comp); - /* we run update from timer context, so give up - * if somebody is in the middle of transition - */ - if (down_trylock(&bp->stats_sema)) + if (bnx2x_edebug_stats_stopped(bp)) return; - if (bnx2x_edebug_stats_stopped(bp) || !bp->stats_started) - goto out; - if (IS_PF(bp)) { if (*stats_comp != DMAE_COMP_VAL) - goto out; + return; if (bp->port.pmf) bnx2x_hw_stats_update(bp); @@ -1267,7 +1230,7 @@ static void bnx2x_stats_update(struct bnx2x *bp) BNX2X_ERR("storm stats were not updated for 3 times\n"); bnx2x_panic(); } - goto out; + return; } } else { /* vf doesn't collect HW statistics, and doesn't get completions @@ -1281,7 +1244,7 @@ static void bnx2x_stats_update(struct bnx2x *bp) /* vf is done */ if (IS_VF(bp)) - goto out; + return; if (netif_msg_timer(bp)) { struct bnx2x_eth_stats *estats = &bp->eth_stats; @@ -1292,9 +1255,6 @@ static void bnx2x_stats_update(struct bnx2x *bp) bnx2x_hw_stats_post(bp); bnx2x_storm_stats_post(bp); - -out: - up(&bp->stats_sema); } static void bnx2x_port_stats_stop(struct bnx2x *bp) @@ -1358,12 +1318,7 @@ static void bnx2x_port_stats_stop(struct bnx2x *bp) static void bnx2x_stats_stop(struct bnx2x *bp) { - int update = 0; - - if (down_timeout(&bp->stats_sema, HZ/10)) - BNX2X_ERR("Unable to acquire stats lock\n"); - - bp->stats_started = false; + bool update = false; bnx2x_stats_comp(bp); @@ -1381,8 +1336,6 @@ static void bnx2x_stats_stop(struct bnx2x *bp) bnx2x_hw_stats_post(bp); bnx2x_stats_comp(bp); } - - up(&bp->stats_sema); } static void bnx2x_stats_do_nothing(struct bnx2x *bp) @@ -1410,18 +1363,28 @@ static const struct { void bnx2x_stats_handle(struct bnx2x *bp, enum bnx2x_stats_event event) { - enum bnx2x_stats_state state; - void (*action)(struct bnx2x *bp); + enum bnx2x_stats_state state = bp->stats_state; + if (unlikely(bp->panic)) return; - spin_lock_bh(&bp->stats_lock); - state = bp->stats_state; + /* Statistics update run from timer context, and we don't want to stop + * that context in case someone is in the middle of a transition. + * For other events, wait a bit until lock is taken. + */ + if (!mutex_trylock(&bp->stats_lock)) { + if (event == STATS_EVENT_UPDATE) + return; + + DP(BNX2X_MSG_STATS, + "Unlikely stats' lock contention [event %d]\n", event); + mutex_lock(&bp->stats_lock); + } + + bnx2x_stats_stm[state][event].action(bp); bp->stats_state = bnx2x_stats_stm[state][event].next_state; - action = bnx2x_stats_stm[state][event].action; - spin_unlock_bh(&bp->stats_lock); - action(bp); + mutex_unlock(&bp->stats_lock); if ((event != STATS_EVENT_UPDATE) || netif_msg_timer(bp)) DP(BNX2X_MSG_STATS, "state %d -> event %d -> state %d\n", @@ -1998,13 +1961,34 @@ void bnx2x_afex_collect_stats(struct bnx2x *bp, void *void_afex_stats, } } -void bnx2x_stats_safe_exec(struct bnx2x *bp, - void (func_to_exec)(void *cookie), - void *cookie){ - if (down_timeout(&bp->stats_sema, HZ/10)) - BNX2X_ERR("Unable to acquire stats lock\n"); +int bnx2x_stats_safe_exec(struct bnx2x *bp, + void (func_to_exec)(void *cookie), + void *cookie) +{ + int cnt = 10, rc = 0; + + /* Wait for statistics to end [while blocking further requests], + * then run supplied function 'safely'. + */ + mutex_lock(&bp->stats_lock); + bnx2x_stats_comp(bp); + while (bp->stats_pending && cnt--) + if (bnx2x_storm_stats_update(bp)) + usleep_range(1000, 2000); + if (bp->stats_pending) { + BNX2X_ERR("Failed to wait for stats pending to clear [possibly FW is stuck]\n"); + rc = -EBUSY; + goto out; + } + func_to_exec(cookie); - __bnx2x_stats_start(bp); - up(&bp->stats_sema); + +out: + /* No need to restart statistics - if they're enabled, the timer + * will restart the statistics. + */ + mutex_unlock(&bp->stats_lock); + + return rc; } diff --git a/drivers/net/ethernet/broadcom/bnx2x/bnx2x_stats.h b/drivers/net/ethernet/broadcom/bnx2x/bnx2x_stats.h index 2beceaefdeea..965539a9dabe 100644 --- a/drivers/net/ethernet/broadcom/bnx2x/bnx2x_stats.h +++ b/drivers/net/ethernet/broadcom/bnx2x/bnx2x_stats.h @@ -539,9 +539,9 @@ struct bnx2x; void bnx2x_memset_stats(struct bnx2x *bp); void bnx2x_stats_init(struct bnx2x *bp); void bnx2x_stats_handle(struct bnx2x *bp, enum bnx2x_stats_event event); -void bnx2x_stats_safe_exec(struct bnx2x *bp, - void (func_to_exec)(void *cookie), - void *cookie); +int bnx2x_stats_safe_exec(struct bnx2x *bp, + void (func_to_exec)(void *cookie), + void *cookie); /** * bnx2x_save_statistics - save statistics when unloading. diff --git a/drivers/net/ethernet/broadcom/genet/bcmgenet.c b/drivers/net/ethernet/broadcom/genet/bcmgenet.c index ff83c46bc389..6befde61c203 100644 --- a/drivers/net/ethernet/broadcom/genet/bcmgenet.c +++ b/drivers/net/ethernet/broadcom/genet/bcmgenet.c @@ -487,6 +487,7 @@ enum bcmgenet_stat_type { BCMGENET_STAT_MIB_TX, BCMGENET_STAT_RUNT, BCMGENET_STAT_MISC, + BCMGENET_STAT_SOFT, }; struct bcmgenet_stats { @@ -515,6 +516,7 @@ struct bcmgenet_stats { #define STAT_GENET_MIB_RX(str, m) STAT_GENET_MIB(str, m, BCMGENET_STAT_MIB_RX) #define STAT_GENET_MIB_TX(str, m) STAT_GENET_MIB(str, m, BCMGENET_STAT_MIB_TX) #define STAT_GENET_RUNT(str, m) STAT_GENET_MIB(str, m, BCMGENET_STAT_RUNT) +#define STAT_GENET_SOFT_MIB(str, m) STAT_GENET_MIB(str, m, BCMGENET_STAT_SOFT) #define STAT_GENET_MISC(str, m, offset) { \ .stat_string = str, \ @@ -614,9 +616,9 @@ static const struct bcmgenet_stats bcmgenet_gstrings_stats[] = { UMAC_RBUF_OVFL_CNT), STAT_GENET_MISC("rbuf_err_cnt", mib.rbuf_err_cnt, UMAC_RBUF_ERR_CNT), STAT_GENET_MISC("mdf_err_cnt", mib.mdf_err_cnt, UMAC_MDF_ERR_CNT), - STAT_GENET_MIB_RX("alloc_rx_buff_failed", mib.alloc_rx_buff_failed), - STAT_GENET_MIB_RX("rx_dma_failed", mib.rx_dma_failed), - STAT_GENET_MIB_TX("tx_dma_failed", mib.tx_dma_failed), + STAT_GENET_SOFT_MIB("alloc_rx_buff_failed", mib.alloc_rx_buff_failed), + STAT_GENET_SOFT_MIB("rx_dma_failed", mib.rx_dma_failed), + STAT_GENET_SOFT_MIB("tx_dma_failed", mib.tx_dma_failed), }; #define BCMGENET_STATS_LEN ARRAY_SIZE(bcmgenet_gstrings_stats) @@ -668,6 +670,7 @@ static void bcmgenet_update_mib_counters(struct bcmgenet_priv *priv) s = &bcmgenet_gstrings_stats[i]; switch (s->type) { case BCMGENET_STAT_NETDEV: + case BCMGENET_STAT_SOFT: continue; case BCMGENET_STAT_MIB_RX: case BCMGENET_STAT_MIB_TX: @@ -971,13 +974,14 @@ static inline void bcmgenet_tx_ring_int_disable(struct bcmgenet_priv *priv, } /* Unlocked version of the reclaim routine */ -static void __bcmgenet_tx_reclaim(struct net_device *dev, - struct bcmgenet_tx_ring *ring) +static unsigned int __bcmgenet_tx_reclaim(struct net_device *dev, + struct bcmgenet_tx_ring *ring) { struct bcmgenet_priv *priv = netdev_priv(dev); int last_tx_cn, last_c_index, num_tx_bds; struct enet_cb *tx_cb_ptr; struct netdev_queue *txq; + unsigned int pkts_compl = 0; unsigned int bds_compl; unsigned int c_index; @@ -1005,6 +1009,7 @@ static void __bcmgenet_tx_reclaim(struct net_device *dev, tx_cb_ptr = ring->cbs + last_c_index; bds_compl = 0; if (tx_cb_ptr->skb) { + pkts_compl++; bds_compl = skb_shinfo(tx_cb_ptr->skb)->nr_frags + 1; dev->stats.tx_bytes += tx_cb_ptr->skb->len; dma_unmap_single(&dev->dev, @@ -1028,23 +1033,45 @@ static void __bcmgenet_tx_reclaim(struct net_device *dev, last_c_index &= (num_tx_bds - 1); } - if (ring->free_bds > (MAX_SKB_FRAGS + 1)) - ring->int_disable(priv, ring); - - if (netif_tx_queue_stopped(txq)) - netif_tx_wake_queue(txq); + if (ring->free_bds > (MAX_SKB_FRAGS + 1)) { + if (netif_tx_queue_stopped(txq)) + netif_tx_wake_queue(txq); + } ring->c_index = c_index; + + return pkts_compl; } -static void bcmgenet_tx_reclaim(struct net_device *dev, +static unsigned int bcmgenet_tx_reclaim(struct net_device *dev, struct bcmgenet_tx_ring *ring) { + unsigned int released; unsigned long flags; spin_lock_irqsave(&ring->lock, flags); - __bcmgenet_tx_reclaim(dev, ring); + released = __bcmgenet_tx_reclaim(dev, ring); spin_unlock_irqrestore(&ring->lock, flags); + + return released; +} + +static int bcmgenet_tx_poll(struct napi_struct *napi, int budget) +{ + struct bcmgenet_tx_ring *ring = + container_of(napi, struct bcmgenet_tx_ring, napi); + unsigned int work_done = 0; + + work_done = bcmgenet_tx_reclaim(ring->priv->dev, ring); + + if (work_done == 0) { + napi_complete(napi); + ring->int_enable(ring->priv, ring); + + return 0; + } + + return budget; } static void bcmgenet_tx_reclaim_all(struct net_device *dev) @@ -1302,10 +1329,8 @@ static netdev_tx_t bcmgenet_xmit(struct sk_buff *skb, struct net_device *dev) bcmgenet_tdma_ring_writel(priv, ring->index, ring->prod_index, TDMA_PROD_INDEX); - if (ring->free_bds <= (MAX_SKB_FRAGS + 1)) { + if (ring->free_bds <= (MAX_SKB_FRAGS + 1)) netif_tx_stop_queue(txq); - ring->int_enable(priv, ring); - } out: spin_unlock_irqrestore(&ring->lock, flags); @@ -1621,6 +1646,7 @@ static int init_umac(struct bcmgenet_priv *priv) struct device *kdev = &priv->pdev->dev; int ret; u32 reg, cpu_mask_clear; + int index; dev_dbg(&priv->pdev->dev, "bcmgenet: init_umac\n"); @@ -1647,7 +1673,7 @@ static int init_umac(struct bcmgenet_priv *priv) bcmgenet_intr_disable(priv); - cpu_mask_clear = UMAC_IRQ_RXDMA_BDONE; + cpu_mask_clear = UMAC_IRQ_RXDMA_BDONE | UMAC_IRQ_TXDMA_BDONE; dev_dbg(kdev, "%s:Enabling RXDMA_BDONE interrupt\n", __func__); @@ -1674,6 +1700,10 @@ static int init_umac(struct bcmgenet_priv *priv) bcmgenet_intrl2_0_writel(priv, cpu_mask_clear, INTRL2_CPU_MASK_CLEAR); + for (index = 0; index < priv->hw_params->tx_queues; index++) + bcmgenet_intrl2_1_writel(priv, (1 << index), + INTRL2_CPU_MASK_CLEAR); + /* Enable rx/tx engine.*/ dev_dbg(kdev, "done init umac\n"); @@ -1693,6 +1723,8 @@ static void bcmgenet_init_tx_ring(struct bcmgenet_priv *priv, unsigned int first_bd; spin_lock_init(&ring->lock); + ring->priv = priv; + netif_napi_add(priv->dev, &ring->napi, bcmgenet_tx_poll, 64); ring->index = index; if (index == DESC_INDEX) { ring->queue = 0; @@ -1738,6 +1770,17 @@ static void bcmgenet_init_tx_ring(struct bcmgenet_priv *priv, TDMA_WRITE_PTR); bcmgenet_tdma_ring_writel(priv, index, end_ptr * words_per_bd - 1, DMA_END_ADDR); + + napi_enable(&ring->napi); +} + +static void bcmgenet_fini_tx_ring(struct bcmgenet_priv *priv, + unsigned int index) +{ + struct bcmgenet_tx_ring *ring = &priv->tx_rings[index]; + + napi_disable(&ring->napi); + netif_napi_del(&ring->napi); } /* Initialize a RDMA ring */ @@ -1907,7 +1950,7 @@ static int bcmgenet_dma_teardown(struct bcmgenet_priv *priv) return ret; } -static void bcmgenet_fini_dma(struct bcmgenet_priv *priv) +static void __bcmgenet_fini_dma(struct bcmgenet_priv *priv) { int i; @@ -1926,6 +1969,18 @@ static void bcmgenet_fini_dma(struct bcmgenet_priv *priv) kfree(priv->tx_cbs); } +static void bcmgenet_fini_dma(struct bcmgenet_priv *priv) +{ + int i; + + bcmgenet_fini_tx_ring(priv, DESC_INDEX); + + for (i = 0; i < priv->hw_params->tx_queues; i++) + bcmgenet_fini_tx_ring(priv, i); + + __bcmgenet_fini_dma(priv); +} + /* init_edma: Initialize DMA control register */ static int bcmgenet_init_dma(struct bcmgenet_priv *priv) { @@ -1952,7 +2007,7 @@ static int bcmgenet_init_dma(struct bcmgenet_priv *priv) priv->tx_cbs = kcalloc(priv->num_tx_bds, sizeof(struct enet_cb), GFP_KERNEL); if (!priv->tx_cbs) { - bcmgenet_fini_dma(priv); + __bcmgenet_fini_dma(priv); return -ENOMEM; } @@ -1975,9 +2030,6 @@ static int bcmgenet_poll(struct napi_struct *napi, int budget) struct bcmgenet_priv, napi); unsigned int work_done; - /* tx reclaim */ - bcmgenet_tx_reclaim(priv->dev, &priv->tx_rings[DESC_INDEX]); - work_done = bcmgenet_desc_rx(priv, budget); /* Advancing our consumer index*/ @@ -2022,28 +2074,34 @@ static void bcmgenet_irq_task(struct work_struct *work) static irqreturn_t bcmgenet_isr1(int irq, void *dev_id) { struct bcmgenet_priv *priv = dev_id; + struct bcmgenet_tx_ring *ring; unsigned int index; /* Save irq status for bottom-half processing. */ priv->irq1_stat = bcmgenet_intrl2_1_readl(priv, INTRL2_CPU_STAT) & - ~priv->int1_mask; + ~bcmgenet_intrl2_1_readl(priv, INTRL2_CPU_MASK_STATUS); /* clear interrupts */ bcmgenet_intrl2_1_writel(priv, priv->irq1_stat, INTRL2_CPU_CLEAR); netif_dbg(priv, intr, priv->dev, "%s: IRQ=0x%x\n", __func__, priv->irq1_stat); + /* Check the MBDONE interrupts. * packet is done, reclaim descriptors */ - if (priv->irq1_stat & 0x0000ffff) { - index = 0; - for (index = 0; index < 16; index++) { - if (priv->irq1_stat & (1 << index)) - bcmgenet_tx_reclaim(priv->dev, - &priv->tx_rings[index]); + for (index = 0; index < priv->hw_params->tx_queues; index++) { + if (!(priv->irq1_stat & BIT(index))) + continue; + + ring = &priv->tx_rings[index]; + + if (likely(napi_schedule_prep(&ring->napi))) { + ring->int_disable(priv, ring); + __napi_schedule(&ring->napi); } } + return IRQ_HANDLED; } @@ -2075,8 +2133,12 @@ static irqreturn_t bcmgenet_isr0(int irq, void *dev_id) } if (priv->irq0_stat & (UMAC_IRQ_TXDMA_BDONE | UMAC_IRQ_TXDMA_PDONE)) { - /* Tx reclaim */ - bcmgenet_tx_reclaim(priv->dev, &priv->tx_rings[DESC_INDEX]); + struct bcmgenet_tx_ring *ring = &priv->tx_rings[DESC_INDEX]; + + if (likely(napi_schedule_prep(&ring->napi))) { + ring->int_disable(priv, ring); + __napi_schedule(&ring->napi); + } } if (priv->irq0_stat & (UMAC_IRQ_PHY_DET_R | UMAC_IRQ_PHY_DET_F | diff --git a/drivers/net/ethernet/broadcom/genet/bcmgenet.h b/drivers/net/ethernet/broadcom/genet/bcmgenet.h index b36ddec0cc0a..0d370d168aee 100644 --- a/drivers/net/ethernet/broadcom/genet/bcmgenet.h +++ b/drivers/net/ethernet/broadcom/genet/bcmgenet.h @@ -520,6 +520,7 @@ struct bcmgenet_hw_params { struct bcmgenet_tx_ring { spinlock_t lock; /* ring lock */ + struct napi_struct napi; /* NAPI per tx queue */ unsigned int index; /* ring index */ unsigned int queue; /* queue index */ struct enet_cb *cbs; /* tx ring buffer control block*/ @@ -534,6 +535,7 @@ struct bcmgenet_tx_ring { struct bcmgenet_tx_ring *); void (*int_disable)(struct bcmgenet_priv *priv, struct bcmgenet_tx_ring *); + struct bcmgenet_priv *priv; }; /* device context */ diff --git a/drivers/net/ethernet/broadcom/genet/bcmgenet_wol.c b/drivers/net/ethernet/broadcom/genet/bcmgenet_wol.c index 149a0d70c108..b97122926d3a 100644 --- a/drivers/net/ethernet/broadcom/genet/bcmgenet_wol.c +++ b/drivers/net/ethernet/broadcom/genet/bcmgenet_wol.c @@ -73,15 +73,17 @@ int bcmgenet_set_wol(struct net_device *dev, struct ethtool_wolinfo *wol) if (wol->wolopts & ~(WAKE_MAGIC | WAKE_MAGICSECURE)) return -EINVAL; + reg = bcmgenet_umac_readl(priv, UMAC_MPD_CTRL); if (wol->wolopts & WAKE_MAGICSECURE) { bcmgenet_umac_writel(priv, get_unaligned_be16(&wol->sopass[0]), UMAC_MPD_PW_MS); bcmgenet_umac_writel(priv, get_unaligned_be32(&wol->sopass[2]), UMAC_MPD_PW_LS); - reg = bcmgenet_umac_readl(priv, UMAC_MPD_CTRL); reg |= MPD_PW_EN; - bcmgenet_umac_writel(priv, reg, UMAC_MPD_CTRL); + } else { + reg &= ~MPD_PW_EN; } + bcmgenet_umac_writel(priv, reg, UMAC_MPD_CTRL); /* Flag the device and relevant IRQ as wakeup capable */ if (wol->wolopts) { diff --git a/drivers/net/ethernet/cadence/macb.c b/drivers/net/ethernet/cadence/macb.c index ad76b8e35a00..81d41539fcba 100644 --- a/drivers/net/ethernet/cadence/macb.c +++ b/drivers/net/ethernet/cadence/macb.c @@ -2113,17 +2113,17 @@ static const struct net_device_ops macb_netdev_ops = { }; #if defined(CONFIG_OF) -static struct macb_config pc302gem_config = { +static const struct macb_config pc302gem_config = { .caps = MACB_CAPS_SG_DISABLED | MACB_CAPS_GIGABIT_MODE_AVAILABLE, .dma_burst_length = 16, }; -static struct macb_config sama5d3_config = { +static const struct macb_config sama5d3_config = { .caps = MACB_CAPS_SG_DISABLED | MACB_CAPS_GIGABIT_MODE_AVAILABLE, .dma_burst_length = 16, }; -static struct macb_config sama5d4_config = { +static const struct macb_config sama5d4_config = { .caps = 0, .dma_burst_length = 4, }; @@ -2154,7 +2154,7 @@ static void macb_configure_caps(struct macb *bp) if (bp->pdev->dev.of_node) { match = of_match_node(macb_dt_ids, bp->pdev->dev.of_node); if (match && match->data) { - config = (const struct macb_config *)match->data; + config = match->data; bp->caps = config->caps; /* diff --git a/drivers/net/ethernet/cadence/macb.h b/drivers/net/ethernet/cadence/macb.h index 31dc080f2437..ff85619a9732 100644 --- a/drivers/net/ethernet/cadence/macb.h +++ b/drivers/net/ethernet/cadence/macb.h @@ -351,7 +351,7 @@ /* Bitfields in MID */ #define MACB_IDNUM_OFFSET 16 -#define MACB_IDNUM_SIZE 16 +#define MACB_IDNUM_SIZE 12 #define MACB_REV_OFFSET 0 #define MACB_REV_SIZE 16 diff --git a/drivers/net/ethernet/chelsio/cxgb4/clip_tbl.c b/drivers/net/ethernet/chelsio/cxgb4/clip_tbl.c index 9062a8434246..c308429dd9c7 100644 --- a/drivers/net/ethernet/chelsio/cxgb4/clip_tbl.c +++ b/drivers/net/ethernet/chelsio/cxgb4/clip_tbl.c @@ -35,10 +35,10 @@ static inline unsigned int ipv6_clip_hash(struct clip_tbl *d, const u32 *key) } static unsigned int clip_addr_hash(struct clip_tbl *ctbl, const u32 *addr, - int addr_len) + u8 v6) { - return addr_len == 4 ? ipv4_clip_hash(ctbl, addr) : - ipv6_clip_hash(ctbl, addr); + return v6 ? ipv6_clip_hash(ctbl, addr) : + ipv4_clip_hash(ctbl, addr); } static int clip6_get_mbox(const struct net_device *dev, @@ -78,23 +78,22 @@ int cxgb4_clip_get(const struct net_device *dev, const u32 *lip, u8 v6) struct clip_entry *ce, *cte; u32 *addr = (u32 *)lip; int hash; - int addr_len; - int ret = 0; + int ret = -1; if (!ctbl) return 0; - if (v6) - addr_len = 16; - else - addr_len = 4; - - hash = clip_addr_hash(ctbl, addr, addr_len); + hash = clip_addr_hash(ctbl, addr, v6); read_lock_bh(&ctbl->lock); list_for_each_entry(cte, &ctbl->hash_list[hash], list) { - if (addr_len == cte->addr_len && - memcmp(lip, cte->addr, cte->addr_len) == 0) { + if (cte->addr6.sin6_family == AF_INET6 && v6) + ret = memcmp(lip, cte->addr6.sin6_addr.s6_addr, + sizeof(struct in6_addr)); + else if (cte->addr.sin_family == AF_INET && !v6) + ret = memcmp(lip, (char *)(&cte->addr.sin_addr), + sizeof(struct in_addr)); + if (!ret) { ce = cte; read_unlock_bh(&ctbl->lock); goto found; @@ -111,15 +110,20 @@ int cxgb4_clip_get(const struct net_device *dev, const u32 *lip, u8 v6) spin_lock_init(&ce->lock); atomic_set(&ce->refcnt, 0); atomic_dec(&ctbl->nfree); - ce->addr_len = addr_len; - memcpy(ce->addr, lip, addr_len); list_add_tail(&ce->list, &ctbl->hash_list[hash]); if (v6) { + ce->addr6.sin6_family = AF_INET6; + memcpy(ce->addr6.sin6_addr.s6_addr, + lip, sizeof(struct in6_addr)); ret = clip6_get_mbox(dev, (const struct in6_addr *)lip); if (ret) { write_unlock_bh(&ctbl->lock); return ret; } + } else { + ce->addr.sin_family = AF_INET; + memcpy((char *)(&ce->addr.sin_addr), lip, + sizeof(struct in_addr)); } } else { write_unlock_bh(&ctbl->lock); @@ -140,19 +144,19 @@ void cxgb4_clip_release(const struct net_device *dev, const u32 *lip, u8 v6) struct clip_entry *ce, *cte; u32 *addr = (u32 *)lip; int hash; - int addr_len; - - if (v6) - addr_len = 16; - else - addr_len = 4; + int ret = -1; - hash = clip_addr_hash(ctbl, addr, addr_len); + hash = clip_addr_hash(ctbl, addr, v6); read_lock_bh(&ctbl->lock); list_for_each_entry(cte, &ctbl->hash_list[hash], list) { - if (addr_len == cte->addr_len && - memcmp(lip, cte->addr, cte->addr_len) == 0) { + if (cte->addr6.sin6_family == AF_INET6 && v6) + ret = memcmp(lip, cte->addr6.sin6_addr.s6_addr, + sizeof(struct in6_addr)); + else if (cte->addr.sin_family == AF_INET && !v6) + ret = memcmp(lip, (char *)(&cte->addr.sin_addr), + sizeof(struct in_addr)); + if (!ret) { ce = cte; read_unlock_bh(&ctbl->lock); goto found; @@ -249,10 +253,7 @@ int clip_tbl_show(struct seq_file *seq, void *v) for (i = 0 ; i < ctbl->clipt_size; ++i) { list_for_each_entry(ce, &ctbl->hash_list[i], list) { ip[0] = '\0'; - if (ce->addr_len == 16) - sprintf(ip, "%pI6c", ce->addr); - else - sprintf(ip, "%pI4c", ce->addr); + sprintf(ip, "%pISc", &ce->addr); seq_printf(seq, "%-25s %u\n", ip, atomic_read(&ce->refcnt)); } diff --git a/drivers/net/ethernet/chelsio/cxgb4/clip_tbl.h b/drivers/net/ethernet/chelsio/cxgb4/clip_tbl.h index 2eaba0161cf8..35eb43c6bcbb 100644 --- a/drivers/net/ethernet/chelsio/cxgb4/clip_tbl.h +++ b/drivers/net/ethernet/chelsio/cxgb4/clip_tbl.h @@ -14,8 +14,10 @@ struct clip_entry { spinlock_t lock; /* Hold while modifying clip reference */ atomic_t refcnt; struct list_head list; - u32 addr[4]; - int addr_len; + union { + struct sockaddr_in addr; + struct sockaddr_in6 addr6; + }; }; struct clip_tbl { diff --git a/drivers/net/ethernet/chelsio/cxgb4/cxgb4.h b/drivers/net/ethernet/chelsio/cxgb4/cxgb4.h index d6cda17efe6e..c6ff4890d171 100644 --- a/drivers/net/ethernet/chelsio/cxgb4/cxgb4.h +++ b/drivers/net/ethernet/chelsio/cxgb4/cxgb4.h @@ -376,8 +376,6 @@ enum { enum { INGQ_EXTRAS = 2, /* firmware event queue and */ /* forwarded interrupts */ - MAX_EGRQ = MAX_ETH_QSETS*2 + MAX_OFLD_QSETS*2 - + MAX_CTRL_QUEUES + MAX_RDMA_QUEUES + MAX_ISCSI_QUEUES, MAX_INGQ = MAX_ETH_QSETS + MAX_OFLD_QSETS + MAX_RDMA_QUEUES + MAX_RDMA_CIQS + MAX_ISCSI_QUEUES + INGQ_EXTRAS, }; @@ -616,11 +614,13 @@ struct sge { unsigned int idma_qid[2]; /* SGE IDMA Hung Ingress Queue ID */ unsigned int egr_start; + unsigned int egr_sz; unsigned int ingr_start; - void *egr_map[MAX_EGRQ]; /* qid->queue egress queue map */ - struct sge_rspq *ingr_map[MAX_INGQ]; /* qid->queue ingress queue map */ - DECLARE_BITMAP(starving_fl, MAX_EGRQ); - DECLARE_BITMAP(txq_maperr, MAX_EGRQ); + unsigned int ingr_sz; + void **egr_map; /* qid->queue egress queue map */ + struct sge_rspq **ingr_map; /* qid->queue ingress queue map */ + unsigned long *starving_fl; + unsigned long *txq_maperr; struct timer_list rx_timer; /* refills starving FLs */ struct timer_list tx_timer; /* checks Tx queues */ }; @@ -1103,7 +1103,7 @@ int t4_restart_aneg(struct adapter *adap, unsigned int mbox, unsigned int port); #define T4_MEMORY_WRITE 0 #define T4_MEMORY_READ 1 int t4_memory_rw(struct adapter *adap, int win, int mtype, u32 addr, u32 len, - __be32 *buf, int dir); + void *buf, int dir); static inline int t4_memory_write(struct adapter *adap, int mtype, u32 addr, u32 len, __be32 *buf) { @@ -1136,6 +1136,8 @@ int cxgb4_t4_bar2_sge_qregs(struct adapter *adapter, unsigned int qtimer_val(const struct adapter *adap, const struct sge_rspq *q); + +int t4_init_devlog_params(struct adapter *adapter); int t4_init_sge_params(struct adapter *adapter); int t4_init_tp_params(struct adapter *adap); int t4_filter_field_shift(const struct adapter *adap, int filter_sel); diff --git a/drivers/net/ethernet/chelsio/cxgb4/cxgb4_debugfs.c b/drivers/net/ethernet/chelsio/cxgb4/cxgb4_debugfs.c index 78854ceb0870..dcb047945290 100644 --- a/drivers/net/ethernet/chelsio/cxgb4/cxgb4_debugfs.c +++ b/drivers/net/ethernet/chelsio/cxgb4/cxgb4_debugfs.c @@ -670,9 +670,13 @@ static int cctrl_tbl_show(struct seq_file *seq, void *v) "0.9375" }; int i; - u16 incr[NMTUS][NCCTRL_WIN]; + u16 (*incr)[NCCTRL_WIN]; struct adapter *adap = seq->private; + incr = kmalloc(sizeof(*incr) * NMTUS, GFP_KERNEL); + if (!incr) + return -ENOMEM; + t4_read_cong_tbl(adap, incr); for (i = 0; i < NCCTRL_WIN; ++i) { @@ -685,6 +689,8 @@ static int cctrl_tbl_show(struct seq_file *seq, void *v) adap->params.a_wnd[i], dec_fac[adap->params.b_wnd[i]]); } + + kfree(incr); return 0; } diff --git a/drivers/net/ethernet/chelsio/cxgb4/cxgb4_main.c b/drivers/net/ethernet/chelsio/cxgb4/cxgb4_main.c index a22cf932ca35..d92995138f7e 100644 --- a/drivers/net/ethernet/chelsio/cxgb4/cxgb4_main.c +++ b/drivers/net/ethernet/chelsio/cxgb4/cxgb4_main.c @@ -920,7 +920,7 @@ static void quiesce_rx(struct adapter *adap) { int i; - for (i = 0; i < ARRAY_SIZE(adap->sge.ingr_map); i++) { + for (i = 0; i < adap->sge.ingr_sz; i++) { struct sge_rspq *q = adap->sge.ingr_map[i]; if (q && q->handler) { @@ -934,6 +934,21 @@ static void quiesce_rx(struct adapter *adap) } } +/* Disable interrupt and napi handler */ +static void disable_interrupts(struct adapter *adap) +{ + if (adap->flags & FULL_INIT_DONE) { + t4_intr_disable(adap); + if (adap->flags & USING_MSIX) { + free_msix_queue_irqs(adap); + free_irq(adap->msix_info[0].vec, adap); + } else { + free_irq(adap->pdev->irq, adap); + } + quiesce_rx(adap); + } +} + /* * Enable NAPI scheduling and interrupt generation for all Rx queues. */ @@ -941,7 +956,7 @@ static void enable_rx(struct adapter *adap) { int i; - for (i = 0; i < ARRAY_SIZE(adap->sge.ingr_map); i++) { + for (i = 0; i < adap->sge.ingr_sz; i++) { struct sge_rspq *q = adap->sge.ingr_map[i]; if (!q) @@ -970,8 +985,8 @@ static int setup_sge_queues(struct adapter *adap) int err, msi_idx, i, j; struct sge *s = &adap->sge; - bitmap_zero(s->starving_fl, MAX_EGRQ); - bitmap_zero(s->txq_maperr, MAX_EGRQ); + bitmap_zero(s->starving_fl, s->egr_sz); + bitmap_zero(s->txq_maperr, s->egr_sz); if (adap->flags & USING_MSIX) msi_idx = 1; /* vector 0 is for non-queue interrupts */ @@ -983,6 +998,19 @@ static int setup_sge_queues(struct adapter *adap) msi_idx = -((int)s->intrq.abs_id + 1); } + /* NOTE: If you add/delete any Ingress/Egress Queue allocations in here, + * don't forget to update the following which need to be + * synchronized to and changes here. + * + * 1. The calculations of MAX_INGQ in cxgb4.h. + * + * 2. Update enable_msix/name_msix_vecs/request_msix_queue_irqs + * to accommodate any new/deleted Ingress Queues + * which need MSI-X Vectors. + * + * 3. Update sge_qinfo_show() to include information on the + * new/deleted queues. + */ err = t4_sge_alloc_rxq(adap, &s->fw_evtq, true, adap->port[0], msi_idx, NULL, fwevtq_handler); if (err) { @@ -4244,19 +4272,12 @@ static int cxgb_up(struct adapter *adap) static void cxgb_down(struct adapter *adapter) { - t4_intr_disable(adapter); cancel_work_sync(&adapter->tid_release_task); cancel_work_sync(&adapter->db_full_task); cancel_work_sync(&adapter->db_drop_task); adapter->tid_release_task_busy = false; adapter->tid_release_head = NULL; - if (adapter->flags & USING_MSIX) { - free_msix_queue_irqs(adapter); - free_irq(adapter->msix_info[0].vec, adapter); - } else - free_irq(adapter->pdev->irq, adapter); - quiesce_rx(adapter); t4_sge_stop(adapter); t4_free_sge_resources(adapter); adapter->flags &= ~FULL_INIT_DONE; @@ -4733,8 +4754,9 @@ static int adap_init1(struct adapter *adap, struct fw_caps_config_cmd *c) if (ret < 0) return ret; - ret = t4_cfg_pfvf(adap, adap->fn, adap->fn, 0, MAX_EGRQ, 64, MAX_INGQ, - 0, 0, 4, 0xf, 0xf, 16, FW_CMD_CAP_PF, FW_CMD_CAP_PF); + ret = t4_cfg_pfvf(adap, adap->fn, adap->fn, 0, adap->sge.egr_sz, 64, + MAX_INGQ, 0, 0, 4, 0xf, 0xf, 16, FW_CMD_CAP_PF, + FW_CMD_CAP_PF); if (ret < 0) return ret; @@ -5088,10 +5110,15 @@ static int adap_init0(struct adapter *adap) enum dev_state state; u32 params[7], val[7]; struct fw_caps_config_cmd caps_cmd; - struct fw_devlog_cmd devlog_cmd; - u32 devlog_meminfo; int reset = 1; + /* Grab Firmware Device Log parameters as early as possible so we have + * access to it for debugging, etc. + */ + ret = t4_init_devlog_params(adap); + if (ret < 0) + return ret; + /* Contact FW, advertising Master capability */ ret = t4_fw_hello(adap, adap->mbox, adap->mbox, MASTER_MAY, &state); if (ret < 0) { @@ -5169,30 +5196,6 @@ static int adap_init0(struct adapter *adap) if (ret < 0) goto bye; - /* Read firmware device log parameters. We really need to find a way - * to get these parameters initialized with some default values (which - * are likely to be correct) for the case where we either don't - * attache to the firmware or it's crashed when we probe the adapter. - * That way we'll still be able to perform early firmware startup - * debugging ... If the request to get the Firmware's Device Log - * parameters fails, we'll live so we don't make that a fatal error. - */ - memset(&devlog_cmd, 0, sizeof(devlog_cmd)); - devlog_cmd.op_to_write = htonl(FW_CMD_OP_V(FW_DEVLOG_CMD) | - FW_CMD_REQUEST_F | FW_CMD_READ_F); - devlog_cmd.retval_len16 = htonl(FW_LEN16(devlog_cmd)); - ret = t4_wr_mbox(adap, adap->mbox, &devlog_cmd, sizeof(devlog_cmd), - &devlog_cmd); - if (ret == 0) { - devlog_meminfo = - ntohl(devlog_cmd.memtype_devlog_memaddr16_devlog); - adap->params.devlog.memtype = - FW_DEVLOG_CMD_MEMTYPE_DEVLOG_G(devlog_meminfo); - adap->params.devlog.start = - FW_DEVLOG_CMD_MEMADDR16_DEVLOG_G(devlog_meminfo) << 4; - adap->params.devlog.size = ntohl(devlog_cmd.memsize_devlog); - } - /* * Find out what ports are available to us. Note that we need to do * this before calling adap_init0_no_config() since it needs nports @@ -5293,6 +5296,51 @@ static int adap_init0(struct adapter *adap) adap->tids.nftids = val[4] - val[3] + 1; adap->sge.ingr_start = val[5]; + /* qids (ingress/egress) returned from firmware can be anywhere + * in the range from EQ(IQFLINT)_START to EQ(IQFLINT)_END. + * Hence driver needs to allocate memory for this range to + * store the queue info. Get the highest IQFLINT/EQ index returned + * in FW_EQ_*_CMD.alloc command. + */ + params[0] = FW_PARAM_PFVF(EQ_END); + params[1] = FW_PARAM_PFVF(IQFLINT_END); + ret = t4_query_params(adap, adap->mbox, adap->fn, 0, 2, params, val); + if (ret < 0) + goto bye; + adap->sge.egr_sz = val[0] - adap->sge.egr_start + 1; + adap->sge.ingr_sz = val[1] - adap->sge.ingr_start + 1; + + adap->sge.egr_map = kcalloc(adap->sge.egr_sz, + sizeof(*adap->sge.egr_map), GFP_KERNEL); + if (!adap->sge.egr_map) { + ret = -ENOMEM; + goto bye; + } + + adap->sge.ingr_map = kcalloc(adap->sge.ingr_sz, + sizeof(*adap->sge.ingr_map), GFP_KERNEL); + if (!adap->sge.ingr_map) { + ret = -ENOMEM; + goto bye; + } + + /* Allocate the memory for the vaious egress queue bitmaps + * ie starving_fl and txq_maperr. + */ + adap->sge.starving_fl = kcalloc(BITS_TO_LONGS(adap->sge.egr_sz), + sizeof(long), GFP_KERNEL); + if (!adap->sge.starving_fl) { + ret = -ENOMEM; + goto bye; + } + + adap->sge.txq_maperr = kcalloc(BITS_TO_LONGS(adap->sge.egr_sz), + sizeof(long), GFP_KERNEL); + if (!adap->sge.txq_maperr) { + ret = -ENOMEM; + goto bye; + } + params[0] = FW_PARAM_PFVF(CLIP_START); params[1] = FW_PARAM_PFVF(CLIP_END); ret = t4_query_params(adap, adap->mbox, adap->fn, 0, 2, params, val); @@ -5501,6 +5549,10 @@ static int adap_init0(struct adapter *adap) * happened to HW/FW, stop issuing commands. */ bye: + kfree(adap->sge.egr_map); + kfree(adap->sge.ingr_map); + kfree(adap->sge.starving_fl); + kfree(adap->sge.txq_maperr); if (ret != -ETIMEDOUT && ret != -EIO) t4_fw_bye(adap, adap->mbox); return ret; @@ -5528,6 +5580,7 @@ static pci_ers_result_t eeh_err_detected(struct pci_dev *pdev, netif_carrier_off(dev); } spin_unlock(&adap->stats_lock); + disable_interrupts(adap); if (adap->flags & FULL_INIT_DONE) cxgb_down(adap); rtnl_unlock(); @@ -5912,6 +5965,10 @@ static void free_some_resources(struct adapter *adapter) t4_free_mem(adapter->l2t); t4_free_mem(adapter->tids.tid_tab); + kfree(adapter->sge.egr_map); + kfree(adapter->sge.ingr_map); + kfree(adapter->sge.starving_fl); + kfree(adapter->sge.txq_maperr); disable_msi(adapter); for_each_port(adapter, i) @@ -6237,6 +6294,8 @@ static void remove_one(struct pci_dev *pdev) if (is_offload(adapter)) detach_ulds(adapter); + disable_interrupts(adapter); + for_each_port(adapter, i) if (adapter->port[i]->reg_state == NETREG_REGISTERED) unregister_netdev(adapter->port[i]); diff --git a/drivers/net/ethernet/chelsio/cxgb4/sge.c b/drivers/net/ethernet/chelsio/cxgb4/sge.c index b4b9f6048fe7..b688b32c21fe 100644 --- a/drivers/net/ethernet/chelsio/cxgb4/sge.c +++ b/drivers/net/ethernet/chelsio/cxgb4/sge.c @@ -2171,7 +2171,7 @@ static void sge_rx_timer_cb(unsigned long data) struct adapter *adap = (struct adapter *)data; struct sge *s = &adap->sge; - for (i = 0; i < ARRAY_SIZE(s->starving_fl); i++) + for (i = 0; i < BITS_TO_LONGS(s->egr_sz); i++) for (m = s->starving_fl[i]; m; m &= m - 1) { struct sge_eth_rxq *rxq; unsigned int id = __ffs(m) + i * BITS_PER_LONG; @@ -2259,7 +2259,7 @@ static void sge_tx_timer_cb(unsigned long data) struct adapter *adap = (struct adapter *)data; struct sge *s = &adap->sge; - for (i = 0; i < ARRAY_SIZE(s->txq_maperr); i++) + for (i = 0; i < BITS_TO_LONGS(s->egr_sz); i++) for (m = s->txq_maperr[i]; m; m &= m - 1) { unsigned long id = __ffs(m) + i * BITS_PER_LONG; struct sge_ofld_txq *txq = s->egr_map[id]; @@ -2741,7 +2741,8 @@ void t4_free_sge_resources(struct adapter *adap) free_rspq_fl(adap, &adap->sge.intrq, NULL); /* clear the reverse egress queue map */ - memset(adap->sge.egr_map, 0, sizeof(adap->sge.egr_map)); + memset(adap->sge.egr_map, 0, + adap->sge.egr_sz * sizeof(*adap->sge.egr_map)); } void t4_sge_start(struct adapter *adap) diff --git a/drivers/net/ethernet/chelsio/cxgb4/t4_hw.c b/drivers/net/ethernet/chelsio/cxgb4/t4_hw.c index 4d643b65265e..ee394dc68303 100644 --- a/drivers/net/ethernet/chelsio/cxgb4/t4_hw.c +++ b/drivers/net/ethernet/chelsio/cxgb4/t4_hw.c @@ -449,7 +449,7 @@ int t4_edc_read(struct adapter *adap, int idx, u32 addr, __be32 *data, u64 *ecc) * @mtype: memory type: MEM_EDC0, MEM_EDC1 or MEM_MC * @addr: address within indicated memory type * @len: amount of memory to transfer - * @buf: host memory buffer + * @hbuf: host memory buffer * @dir: direction of transfer T4_MEMORY_READ (1) or T4_MEMORY_WRITE (0) * * Reads/writes an [almost] arbitrary memory region in the firmware: the @@ -460,15 +460,17 @@ int t4_edc_read(struct adapter *adap, int idx, u32 addr, __be32 *data, u64 *ecc) * caller's responsibility to perform appropriate byte order conversions. */ int t4_memory_rw(struct adapter *adap, int win, int mtype, u32 addr, - u32 len, __be32 *buf, int dir) + u32 len, void *hbuf, int dir) { u32 pos, offset, resid, memoffset; u32 edc_size, mc_size, win_pf, mem_reg, mem_aperture, mem_base; + u32 *buf; /* Argument sanity checks ... */ - if (addr & 0x3) + if (addr & 0x3 || (uintptr_t)hbuf & 0x3) return -EINVAL; + buf = (u32 *)hbuf; /* It's convenient to be able to handle lengths which aren't a * multiple of 32-bits because we often end up transferring files to @@ -532,14 +534,45 @@ int t4_memory_rw(struct adapter *adap, int win, int mtype, u32 addr, /* Transfer data to/from the adapter as long as there's an integral * number of 32-bit transfers to complete. + * + * A note on Endianness issues: + * + * The "register" reads and writes below from/to the PCI-E Memory + * Window invoke the standard adapter Big-Endian to PCI-E Link + * Little-Endian "swizzel." As a result, if we have the following + * data in adapter memory: + * + * Memory: ... | b0 | b1 | b2 | b3 | ... + * Address: i+0 i+1 i+2 i+3 + * + * Then a read of the adapter memory via the PCI-E Memory Window + * will yield: + * + * x = readl(i) + * 31 0 + * [ b3 | b2 | b1 | b0 ] + * + * If this value is stored into local memory on a Little-Endian system + * it will show up correctly in local memory as: + * + * ( ..., b0, b1, b2, b3, ... ) + * + * But on a Big-Endian system, the store will show up in memory + * incorrectly swizzled as: + * + * ( ..., b3, b2, b1, b0, ... ) + * + * So we need to account for this in the reads and writes to the + * PCI-E Memory Window below by undoing the register read/write + * swizzels. */ while (len > 0) { if (dir == T4_MEMORY_READ) - *buf++ = (__force __be32) t4_read_reg(adap, - mem_base + offset); + *buf++ = le32_to_cpu((__force __le32)t4_read_reg(adap, + mem_base + offset)); else t4_write_reg(adap, mem_base + offset, - (__force u32) *buf++); + (__force u32)cpu_to_le32(*buf++)); offset += sizeof(__be32); len -= sizeof(__be32); @@ -568,15 +601,16 @@ int t4_memory_rw(struct adapter *adap, int win, int mtype, u32 addr, */ if (resid) { union { - __be32 word; + u32 word; char byte[4]; } last; unsigned char *bp; int i; if (dir == T4_MEMORY_READ) { - last.word = (__force __be32) t4_read_reg(adap, - mem_base + offset); + last.word = le32_to_cpu( + (__force __le32)t4_read_reg(adap, + mem_base + offset)); for (bp = (unsigned char *)buf, i = resid; i < 4; i++) bp[i] = last.byte[i]; } else { @@ -584,7 +618,7 @@ int t4_memory_rw(struct adapter *adap, int win, int mtype, u32 addr, for (i = resid; i < 4; i++) last.byte[i] = 0; t4_write_reg(adap, mem_base + offset, - (__force u32) last.word); + (__force u32)cpu_to_le32(last.word)); } } @@ -1086,7 +1120,7 @@ int t4_prep_fw(struct adapter *adap, struct fw_info *fw_info, } /* Installed successfully, update the cached header too. */ - memcpy(card_fw, fs_fw, sizeof(*card_fw)); + *card_fw = *fs_fw; card_fw_usable = 1; *reset = 0; /* already reset as part of load_fw */ } @@ -4425,6 +4459,59 @@ int cxgb4_t4_bar2_sge_qregs(struct adapter *adapter, } /** + * t4_init_devlog_params - initialize adapter->params.devlog + * @adap: the adapter + * + * Initialize various fields of the adapter's Firmware Device Log + * Parameters structure. + */ +int t4_init_devlog_params(struct adapter *adap) +{ + struct devlog_params *dparams = &adap->params.devlog; + u32 pf_dparams; + unsigned int devlog_meminfo; + struct fw_devlog_cmd devlog_cmd; + int ret; + + /* If we're dealing with newer firmware, the Device Log Paramerters + * are stored in a designated register which allows us to access the + * Device Log even if we can't talk to the firmware. + */ + pf_dparams = + t4_read_reg(adap, PCIE_FW_REG(PCIE_FW_PF_A, PCIE_FW_PF_DEVLOG)); + if (pf_dparams) { + unsigned int nentries, nentries128; + + dparams->memtype = PCIE_FW_PF_DEVLOG_MEMTYPE_G(pf_dparams); + dparams->start = PCIE_FW_PF_DEVLOG_ADDR16_G(pf_dparams) << 4; + + nentries128 = PCIE_FW_PF_DEVLOG_NENTRIES128_G(pf_dparams); + nentries = (nentries128 + 1) * 128; + dparams->size = nentries * sizeof(struct fw_devlog_e); + + return 0; + } + + /* Otherwise, ask the firmware for it's Device Log Parameters. + */ + memset(&devlog_cmd, 0, sizeof(devlog_cmd)); + devlog_cmd.op_to_write = htonl(FW_CMD_OP_V(FW_DEVLOG_CMD) | + FW_CMD_REQUEST_F | FW_CMD_READ_F); + devlog_cmd.retval_len16 = htonl(FW_LEN16(devlog_cmd)); + ret = t4_wr_mbox(adap, adap->mbox, &devlog_cmd, sizeof(devlog_cmd), + &devlog_cmd); + if (ret) + return ret; + + devlog_meminfo = ntohl(devlog_cmd.memtype_devlog_memaddr16_devlog); + dparams->memtype = FW_DEVLOG_CMD_MEMTYPE_DEVLOG_G(devlog_meminfo); + dparams->start = FW_DEVLOG_CMD_MEMADDR16_DEVLOG_G(devlog_meminfo) << 4; + dparams->size = ntohl(devlog_cmd.memsize_devlog); + + return 0; +} + +/** * t4_init_sge_params - initialize adap->params.sge * @adapter: the adapter * diff --git a/drivers/net/ethernet/chelsio/cxgb4/t4_regs.h b/drivers/net/ethernet/chelsio/cxgb4/t4_regs.h index 231a725f6d5d..326674b19983 100644 --- a/drivers/net/ethernet/chelsio/cxgb4/t4_regs.h +++ b/drivers/net/ethernet/chelsio/cxgb4/t4_regs.h @@ -63,6 +63,8 @@ #define MC_BIST_STATUS_REG(reg_addr, idx) ((reg_addr) + (idx) * 4) #define EDC_BIST_STATUS_REG(reg_addr, idx) ((reg_addr) + (idx) * 4) +#define PCIE_FW_REG(reg_addr, idx) ((reg_addr) + (idx) * 4) + #define SGE_PF_KDOORBELL_A 0x0 #define QID_S 15 @@ -707,6 +709,7 @@ #define PFNUM_V(x) ((x) << PFNUM_S) #define PCIE_FW_A 0x30b8 +#define PCIE_FW_PF_A 0x30bc #define PCIE_CORE_UTL_SYSTEM_BUS_AGENT_STATUS_A 0x5908 diff --git a/drivers/net/ethernet/chelsio/cxgb4/t4fw_api.h b/drivers/net/ethernet/chelsio/cxgb4/t4fw_api.h index 9b353a88cbda..a4a19e0ec7f5 100644 --- a/drivers/net/ethernet/chelsio/cxgb4/t4fw_api.h +++ b/drivers/net/ethernet/chelsio/cxgb4/t4fw_api.h @@ -101,7 +101,7 @@ enum fw_wr_opcodes { FW_RI_BIND_MW_WR = 0x18, FW_RI_FR_NSMR_WR = 0x19, FW_RI_INV_LSTAG_WR = 0x1a, - FW_LASTC2E_WR = 0x40 + FW_LASTC2E_WR = 0x70 }; struct fw_wr_hdr { @@ -993,6 +993,7 @@ enum fw_memtype_cf { FW_MEMTYPE_CF_EXTMEM = 0x2, FW_MEMTYPE_CF_FLASH = 0x4, FW_MEMTYPE_CF_INTERNAL = 0x5, + FW_MEMTYPE_CF_EXTMEM1 = 0x6, }; struct fw_caps_config_cmd { @@ -1035,6 +1036,7 @@ enum fw_params_mnem { FW_PARAMS_MNEM_PFVF = 2, /* function params */ FW_PARAMS_MNEM_REG = 3, /* limited register access */ FW_PARAMS_MNEM_DMAQ = 4, /* dma queue params */ + FW_PARAMS_MNEM_CHNET = 5, /* chnet params */ FW_PARAMS_MNEM_LAST }; @@ -3102,7 +3104,8 @@ enum fw_devlog_facility { FW_DEVLOG_FACILITY_FCOE = 0x2E, FW_DEVLOG_FACILITY_FOISCSI = 0x30, FW_DEVLOG_FACILITY_FOFCOE = 0x32, - FW_DEVLOG_FACILITY_MAX = 0x32, + FW_DEVLOG_FACILITY_CHNET = 0x34, + FW_DEVLOG_FACILITY_MAX = 0x34, }; /* log message format */ @@ -3139,4 +3142,36 @@ struct fw_devlog_cmd { (((x) >> FW_DEVLOG_CMD_MEMADDR16_DEVLOG_S) & \ FW_DEVLOG_CMD_MEMADDR16_DEVLOG_M) +/* P C I E F W P F 7 R E G I S T E R */ + +/* PF7 stores the Firmware Device Log parameters which allows Host Drivers to + * access the "devlog" which needing to contact firmware. The encoding is + * mostly the same as that returned by the DEVLOG command except for the size + * which is encoded as the number of entries in multiples-1 of 128 here rather + * than the memory size as is done in the DEVLOG command. Thus, 0 means 128 + * and 15 means 2048. This of course in turn constrains the allowed values + * for the devlog size ... + */ +#define PCIE_FW_PF_DEVLOG 7 + +#define PCIE_FW_PF_DEVLOG_NENTRIES128_S 28 +#define PCIE_FW_PF_DEVLOG_NENTRIES128_M 0xf +#define PCIE_FW_PF_DEVLOG_NENTRIES128_V(x) \ + ((x) << PCIE_FW_PF_DEVLOG_NENTRIES128_S) +#define PCIE_FW_PF_DEVLOG_NENTRIES128_G(x) \ + (((x) >> PCIE_FW_PF_DEVLOG_NENTRIES128_S) & \ + PCIE_FW_PF_DEVLOG_NENTRIES128_M) + +#define PCIE_FW_PF_DEVLOG_ADDR16_S 4 +#define PCIE_FW_PF_DEVLOG_ADDR16_M 0xffffff +#define PCIE_FW_PF_DEVLOG_ADDR16_V(x) ((x) << PCIE_FW_PF_DEVLOG_ADDR16_S) +#define PCIE_FW_PF_DEVLOG_ADDR16_G(x) \ + (((x) >> PCIE_FW_PF_DEVLOG_ADDR16_S) & PCIE_FW_PF_DEVLOG_ADDR16_M) + +#define PCIE_FW_PF_DEVLOG_MEMTYPE_S 0 +#define PCIE_FW_PF_DEVLOG_MEMTYPE_M 0xf +#define PCIE_FW_PF_DEVLOG_MEMTYPE_V(x) ((x) << PCIE_FW_PF_DEVLOG_MEMTYPE_S) +#define PCIE_FW_PF_DEVLOG_MEMTYPE_G(x) \ + (((x) >> PCIE_FW_PF_DEVLOG_MEMTYPE_S) & PCIE_FW_PF_DEVLOG_MEMTYPE_M) + #endif /* _T4FW_INTERFACE_H_ */ diff --git a/drivers/net/ethernet/chelsio/cxgb4/t4fw_version.h b/drivers/net/ethernet/chelsio/cxgb4/t4fw_version.h index e2bd3f747858..b9d1cbac0eee 100644 --- a/drivers/net/ethernet/chelsio/cxgb4/t4fw_version.h +++ b/drivers/net/ethernet/chelsio/cxgb4/t4fw_version.h @@ -36,13 +36,13 @@ #define __T4FW_VERSION_H__ #define T4FW_VERSION_MAJOR 0x01 -#define T4FW_VERSION_MINOR 0x0C -#define T4FW_VERSION_MICRO 0x19 +#define T4FW_VERSION_MINOR 0x0D +#define T4FW_VERSION_MICRO 0x20 #define T4FW_VERSION_BUILD 0x00 #define T5FW_VERSION_MAJOR 0x01 -#define T5FW_VERSION_MINOR 0x0C -#define T5FW_VERSION_MICRO 0x19 +#define T5FW_VERSION_MINOR 0x0D +#define T5FW_VERSION_MICRO 0x20 #define T5FW_VERSION_BUILD 0x00 #endif diff --git a/drivers/net/ethernet/chelsio/cxgb4vf/sge.c b/drivers/net/ethernet/chelsio/cxgb4vf/sge.c index 0545f0de1c52..e0d711071afb 100644 --- a/drivers/net/ethernet/chelsio/cxgb4vf/sge.c +++ b/drivers/net/ethernet/chelsio/cxgb4vf/sge.c @@ -1004,7 +1004,7 @@ static inline void ring_tx_db(struct adapter *adapter, struct sge_txq *tq, ? (tq->pidx - 1) : (tq->size - 1)); __be64 *src = (__be64 *)&tq->desc[index]; - __be64 __iomem *dst = (__be64 *)(tq->bar2_addr + + __be64 __iomem *dst = (__be64 __iomem *)(tq->bar2_addr + SGE_UDB_WCDOORBELL); unsigned int count = EQ_UNIT / sizeof(__be64); @@ -1018,7 +1018,11 @@ static inline void ring_tx_db(struct adapter *adapter, struct sge_txq *tq, * DMA. */ while (count) { - writeq(*src, dst); + /* the (__force u64) is because the compiler + * doesn't understand the endian swizzling + * going on + */ + writeq((__force u64)*src, dst); src++; dst++; count--; @@ -1252,8 +1256,8 @@ int t4vf_eth_xmit(struct sk_buff *skb, struct net_device *dev) BUG_ON(DIV_ROUND_UP(ETHTXQ_MAX_HDR, TXD_PER_EQ_UNIT) > 1); wr = (void *)&txq->q.desc[txq->q.pidx]; wr->equiq_to_len16 = cpu_to_be32(wr_mid); - wr->r3[0] = cpu_to_be64(0); - wr->r3[1] = cpu_to_be64(0); + wr->r3[0] = cpu_to_be32(0); + wr->r3[1] = cpu_to_be32(0); skb_copy_from_linear_data(skb, (void *)wr->ethmacdst, fw_hdr_copy_len); end = (u64 *)wr + flits; diff --git a/drivers/net/ethernet/chelsio/cxgb4vf/t4vf_hw.c b/drivers/net/ethernet/chelsio/cxgb4vf/t4vf_hw.c index 1b5506df35b1..280b4a215849 100644 --- a/drivers/net/ethernet/chelsio/cxgb4vf/t4vf_hw.c +++ b/drivers/net/ethernet/chelsio/cxgb4vf/t4vf_hw.c @@ -210,10 +210,10 @@ int t4vf_wr_mbox_core(struct adapter *adapter, const void *cmd, int size, if (rpl) { /* request bit in high-order BE word */ - WARN_ON((be32_to_cpu(*(const u32 *)cmd) + WARN_ON((be32_to_cpu(*(const __be32 *)cmd) & FW_CMD_REQUEST_F) == 0); get_mbox_rpl(adapter, rpl, size, mbox_data); - WARN_ON((be32_to_cpu(*(u32 *)rpl) + WARN_ON((be32_to_cpu(*(__be32 *)rpl) & FW_CMD_REQUEST_F) != 0); } t4_write_reg(adapter, mbox_ctl, @@ -484,7 +484,7 @@ int t4_bar2_sge_qregs(struct adapter *adapter, * o The BAR2 Queue ID. * o The BAR2 Queue ID Offset into the BAR2 page. */ - bar2_page_offset = ((qid >> qpp_shift) << page_shift); + bar2_page_offset = ((u64)(qid >> qpp_shift) << page_shift); bar2_qid = qid & qpp_mask; bar2_qid_offset = bar2_qid * SGE_UDB_SIZE; diff --git a/drivers/net/ethernet/cisco/enic/enic_main.c b/drivers/net/ethernet/cisco/enic/enic_main.c index 9cbe038a388e..a5179bfcdc2c 100644 --- a/drivers/net/ethernet/cisco/enic/enic_main.c +++ b/drivers/net/ethernet/cisco/enic/enic_main.c @@ -272,8 +272,8 @@ static irqreturn_t enic_isr_legacy(int irq, void *data) } if (ENIC_TEST_INTR(pba, notify_intr)) { - vnic_intr_return_all_credits(&enic->intr[notify_intr]); enic_notify_check(enic); + vnic_intr_return_all_credits(&enic->intr[notify_intr]); } if (ENIC_TEST_INTR(pba, err_intr)) { @@ -346,8 +346,8 @@ static irqreturn_t enic_isr_msix_notify(int irq, void *data) struct enic *enic = data; unsigned int intr = enic_msix_notify_intr(enic); - vnic_intr_return_all_credits(&enic->intr[intr]); enic_notify_check(enic); + vnic_intr_return_all_credits(&enic->intr[intr]); return IRQ_HANDLED; } diff --git a/drivers/net/ethernet/dec/tulip/tulip_core.c b/drivers/net/ethernet/dec/tulip/tulip_core.c index 3b42556f7f8d..ed41559bae77 100644 --- a/drivers/net/ethernet/dec/tulip/tulip_core.c +++ b/drivers/net/ethernet/dec/tulip/tulip_core.c @@ -589,7 +589,7 @@ static void tulip_tx_timeout(struct net_device *dev) (unsigned int)tp->rx_ring[i].buffer1, (unsigned int)tp->rx_ring[i].buffer2, buf[0], buf[1], buf[2]); - for (j = 0; buf[j] != 0xee && j < 1600; j++) + for (j = 0; ((j < 1600) && buf[j] != 0xee); j++) if (j < 100) pr_cont(" %02x", buf[j]); pr_cont(" j=%d\n", j); diff --git a/drivers/net/ethernet/emulex/benet/be.h b/drivers/net/ethernet/emulex/benet/be.h index 27de37aa90af..27b9fe99a9bd 100644 --- a/drivers/net/ethernet/emulex/benet/be.h +++ b/drivers/net/ethernet/emulex/benet/be.h @@ -354,6 +354,7 @@ struct be_vf_cfg { u16 vlan_tag; u32 tx_rate; u32 plink_tracking; + u32 privileges; }; enum vf_state { @@ -423,6 +424,7 @@ struct be_adapter { u8 __iomem *csr; /* CSR BAR used only for BE2/3 */ u8 __iomem *db; /* Door Bell */ + u8 __iomem *pcicfg; /* On SH,BEx only. Shadow of PCI config space */ struct mutex mbox_lock; /* For serializing mbox cmds to BE card */ struct be_dma_mem mbox_mem; diff --git a/drivers/net/ethernet/emulex/benet/be_cmds.c b/drivers/net/ethernet/emulex/benet/be_cmds.c index 36916cfa70f9..7f05f309e935 100644 --- a/drivers/net/ethernet/emulex/benet/be_cmds.c +++ b/drivers/net/ethernet/emulex/benet/be_cmds.c @@ -1902,15 +1902,11 @@ int be_cmd_modify_eqd(struct be_adapter *adapter, struct be_set_eqd *set_eqd, { int num_eqs, i = 0; - if (lancer_chip(adapter) && num > 8) { - while (num) { - num_eqs = min(num, 8); - __be_cmd_modify_eqd(adapter, &set_eqd[i], num_eqs); - i += num_eqs; - num -= num_eqs; - } - } else { - __be_cmd_modify_eqd(adapter, set_eqd, num); + while (num) { + num_eqs = min(num, 8); + __be_cmd_modify_eqd(adapter, &set_eqd[i], num_eqs); + i += num_eqs; + num -= num_eqs; } return 0; @@ -1918,7 +1914,7 @@ int be_cmd_modify_eqd(struct be_adapter *adapter, struct be_set_eqd *set_eqd, /* Uses sycnhronous mcc */ int be_cmd_vlan_config(struct be_adapter *adapter, u32 if_id, u16 *vtag_array, - u32 num) + u32 num, u32 domain) { struct be_mcc_wrb *wrb; struct be_cmd_req_vlan_config *req; @@ -1936,6 +1932,7 @@ int be_cmd_vlan_config(struct be_adapter *adapter, u32 if_id, u16 *vtag_array, be_wrb_cmd_hdr_prepare(&req->hdr, CMD_SUBSYSTEM_COMMON, OPCODE_COMMON_NTWK_VLAN_CONFIG, sizeof(*req), wrb, NULL); + req->hdr.domain = domain; req->interface_id = if_id; req->untagged = BE_IF_FLAGS_UNTAGGED & be_if_cap_flags(adapter) ? 1 : 0; diff --git a/drivers/net/ethernet/emulex/benet/be_cmds.h b/drivers/net/ethernet/emulex/benet/be_cmds.h index db761e8e42a3..a7634a3f052a 100644 --- a/drivers/net/ethernet/emulex/benet/be_cmds.h +++ b/drivers/net/ethernet/emulex/benet/be_cmds.h @@ -2256,7 +2256,7 @@ int lancer_cmd_get_pport_stats(struct be_adapter *adapter, int be_cmd_get_fw_ver(struct be_adapter *adapter); int be_cmd_modify_eqd(struct be_adapter *adapter, struct be_set_eqd *, int num); int be_cmd_vlan_config(struct be_adapter *adapter, u32 if_id, u16 *vtag_array, - u32 num); + u32 num, u32 domain); int be_cmd_rx_filter(struct be_adapter *adapter, u32 flags, u32 status); int be_cmd_set_flow_control(struct be_adapter *adapter, u32 tx_fc, u32 rx_fc); int be_cmd_get_flow_control(struct be_adapter *adapter, u32 *tx_fc, u32 *rx_fc); diff --git a/drivers/net/ethernet/emulex/benet/be_main.c b/drivers/net/ethernet/emulex/benet/be_main.c index 0a816859aca5..e6b790f0d9dc 100644 --- a/drivers/net/ethernet/emulex/benet/be_main.c +++ b/drivers/net/ethernet/emulex/benet/be_main.c @@ -1171,7 +1171,7 @@ static int be_vid_config(struct be_adapter *adapter) for_each_set_bit(i, adapter->vids, VLAN_N_VID) vids[num++] = cpu_to_le16(i); - status = be_cmd_vlan_config(adapter, adapter->if_handle, vids, num); + status = be_cmd_vlan_config(adapter, adapter->if_handle, vids, num, 0); if (status) { dev_err(dev, "Setting HW VLAN filtering failed\n"); /* Set to VLAN promisc mode as setting VLAN filter failed */ @@ -1380,11 +1380,67 @@ static int be_get_vf_config(struct net_device *netdev, int vf, return 0; } +static int be_set_vf_tvt(struct be_adapter *adapter, int vf, u16 vlan) +{ + struct be_vf_cfg *vf_cfg = &adapter->vf_cfg[vf]; + u16 vids[BE_NUM_VLANS_SUPPORTED]; + int vf_if_id = vf_cfg->if_handle; + int status; + + /* Enable Transparent VLAN Tagging */ + status = be_cmd_set_hsw_config(adapter, vlan, vf + 1, vf_if_id, 0); + if (status) + return status; + + /* Clear pre-programmed VLAN filters on VF if any, if TVT is enabled */ + vids[0] = 0; + status = be_cmd_vlan_config(adapter, vf_if_id, vids, 1, vf + 1); + if (!status) + dev_info(&adapter->pdev->dev, + "Cleared guest VLANs on VF%d", vf); + + /* After TVT is enabled, disallow VFs to program VLAN filters */ + if (vf_cfg->privileges & BE_PRIV_FILTMGMT) { + status = be_cmd_set_fn_privileges(adapter, vf_cfg->privileges & + ~BE_PRIV_FILTMGMT, vf + 1); + if (!status) + vf_cfg->privileges &= ~BE_PRIV_FILTMGMT; + } + return 0; +} + +static int be_clear_vf_tvt(struct be_adapter *adapter, int vf) +{ + struct be_vf_cfg *vf_cfg = &adapter->vf_cfg[vf]; + struct device *dev = &adapter->pdev->dev; + int status; + + /* Reset Transparent VLAN Tagging. */ + status = be_cmd_set_hsw_config(adapter, BE_RESET_VLAN_TAG_ID, vf + 1, + vf_cfg->if_handle, 0); + if (status) + return status; + + /* Allow VFs to program VLAN filtering */ + if (!(vf_cfg->privileges & BE_PRIV_FILTMGMT)) { + status = be_cmd_set_fn_privileges(adapter, vf_cfg->privileges | + BE_PRIV_FILTMGMT, vf + 1); + if (!status) { + vf_cfg->privileges |= BE_PRIV_FILTMGMT; + dev_info(dev, "VF%d: FILTMGMT priv enabled", vf); + } + } + + dev_info(dev, + "Disable/re-enable i/f in VM to clear Transparent VLAN tag"); + return 0; +} + static int be_set_vf_vlan(struct net_device *netdev, int vf, u16 vlan, u8 qos) { struct be_adapter *adapter = netdev_priv(netdev); struct be_vf_cfg *vf_cfg = &adapter->vf_cfg[vf]; - int status = 0; + int status; if (!sriov_enabled(adapter)) return -EPERM; @@ -1394,24 +1450,19 @@ static int be_set_vf_vlan(struct net_device *netdev, int vf, u16 vlan, u8 qos) if (vlan || qos) { vlan |= qos << VLAN_PRIO_SHIFT; - if (vf_cfg->vlan_tag != vlan) - status = be_cmd_set_hsw_config(adapter, vlan, vf + 1, - vf_cfg->if_handle, 0); + status = be_set_vf_tvt(adapter, vf, vlan); } else { - /* Reset Transparent Vlan Tagging. */ - status = be_cmd_set_hsw_config(adapter, BE_RESET_VLAN_TAG_ID, - vf + 1, vf_cfg->if_handle, 0); + status = be_clear_vf_tvt(adapter, vf); } if (status) { dev_err(&adapter->pdev->dev, - "VLAN %d config on VF %d failed : %#x\n", vlan, - vf, status); + "VLAN %d config on VF %d failed : %#x\n", vlan, vf, + status); return be_cmd_status(status); } vf_cfg->vlan_tag = vlan; - return 0; } @@ -2772,14 +2823,12 @@ void be_detect_error(struct be_adapter *adapter) } } } else { - pci_read_config_dword(adapter->pdev, - PCICFG_UE_STATUS_LOW, &ue_lo); - pci_read_config_dword(adapter->pdev, - PCICFG_UE_STATUS_HIGH, &ue_hi); - pci_read_config_dword(adapter->pdev, - PCICFG_UE_STATUS_LOW_MASK, &ue_lo_mask); - pci_read_config_dword(adapter->pdev, - PCICFG_UE_STATUS_HI_MASK, &ue_hi_mask); + ue_lo = ioread32(adapter->pcicfg + PCICFG_UE_STATUS_LOW); + ue_hi = ioread32(adapter->pcicfg + PCICFG_UE_STATUS_HIGH); + ue_lo_mask = ioread32(adapter->pcicfg + + PCICFG_UE_STATUS_LOW_MASK); + ue_hi_mask = ioread32(adapter->pcicfg + + PCICFG_UE_STATUS_HI_MASK); ue_lo = (ue_lo & ~ue_lo_mask); ue_hi = (ue_hi & ~ue_hi_mask); @@ -3339,7 +3388,6 @@ static int be_if_create(struct be_adapter *adapter, u32 *if_handle, u32 cap_flags, u32 vf) { u32 en_flags; - int status; en_flags = BE_IF_FLAGS_UNTAGGED | BE_IF_FLAGS_BROADCAST | BE_IF_FLAGS_MULTICAST | BE_IF_FLAGS_PASS_L3L4_ERRORS | @@ -3347,10 +3395,7 @@ static int be_if_create(struct be_adapter *adapter, u32 *if_handle, en_flags &= cap_flags; - status = be_cmd_if_create(adapter, cap_flags, en_flags, - if_handle, vf); - - return status; + return be_cmd_if_create(adapter, cap_flags, en_flags, if_handle, vf); } static int be_vfs_if_create(struct be_adapter *adapter) @@ -3368,8 +3413,13 @@ static int be_vfs_if_create(struct be_adapter *adapter) if (!BE3_chip(adapter)) { status = be_cmd_get_profile_config(adapter, &res, vf + 1); - if (!status) + if (!status) { cap_flags = res.if_cap_flags; + /* Prevent VFs from enabling VLAN promiscuous + * mode + */ + cap_flags &= ~BE_IF_FLAGS_VLAN_PROMISCUOUS; + } } status = be_if_create(adapter, &vf_cfg->if_handle, @@ -3403,7 +3453,6 @@ static int be_vf_setup(struct be_adapter *adapter) struct device *dev = &adapter->pdev->dev; struct be_vf_cfg *vf_cfg; int status, old_vfs, vf; - u32 privileges; old_vfs = pci_num_vf(adapter->pdev); @@ -3433,15 +3482,18 @@ static int be_vf_setup(struct be_adapter *adapter) for_all_vfs(adapter, vf_cfg, vf) { /* Allow VFs to programs MAC/VLAN filters */ - status = be_cmd_get_fn_privileges(adapter, &privileges, vf + 1); - if (!status && !(privileges & BE_PRIV_FILTMGMT)) { + status = be_cmd_get_fn_privileges(adapter, &vf_cfg->privileges, + vf + 1); + if (!status && !(vf_cfg->privileges & BE_PRIV_FILTMGMT)) { status = be_cmd_set_fn_privileges(adapter, - privileges | + vf_cfg->privileges | BE_PRIV_FILTMGMT, vf + 1); - if (!status) + if (!status) { + vf_cfg->privileges |= BE_PRIV_FILTMGMT; dev_info(dev, "VF%d has FILTMGMT privilege\n", vf); + } } /* Allow full available bandwidth */ @@ -4820,24 +4872,37 @@ static int be_roce_map_pci_bars(struct be_adapter *adapter) static int be_map_pci_bars(struct be_adapter *adapter) { + struct pci_dev *pdev = adapter->pdev; u8 __iomem *addr; if (BEx_chip(adapter) && be_physfn(adapter)) { - adapter->csr = pci_iomap(adapter->pdev, 2, 0); + adapter->csr = pci_iomap(pdev, 2, 0); if (!adapter->csr) return -ENOMEM; } - addr = pci_iomap(adapter->pdev, db_bar(adapter), 0); + addr = pci_iomap(pdev, db_bar(adapter), 0); if (!addr) goto pci_map_err; adapter->db = addr; + if (skyhawk_chip(adapter) || BEx_chip(adapter)) { + if (be_physfn(adapter)) { + /* PCICFG is the 2nd BAR in BE2 */ + addr = pci_iomap(pdev, BE2_chip(adapter) ? 1 : 0, 0); + if (!addr) + goto pci_map_err; + adapter->pcicfg = addr; + } else { + adapter->pcicfg = adapter->db + SRIOV_VF_PCICFG_OFFSET; + } + } + be_roce_map_pci_bars(adapter); return 0; pci_map_err: - dev_err(&adapter->pdev->dev, "Error in mapping PCI BARs\n"); + dev_err(&pdev->dev, "Error in mapping PCI BARs\n"); be_unmap_pci_bars(adapter); return -ENOMEM; } diff --git a/drivers/net/ethernet/freescale/fec_main.c b/drivers/net/ethernet/freescale/fec_main.c index 9bb6220663b2..f6a3a7abd468 100644 --- a/drivers/net/ethernet/freescale/fec_main.c +++ b/drivers/net/ethernet/freescale/fec_main.c @@ -1189,13 +1189,12 @@ static void fec_enet_tx_queue(struct net_device *ndev, u16 queue_id) { struct fec_enet_private *fep; - struct bufdesc *bdp, *bdp_t; + struct bufdesc *bdp; unsigned short status; struct sk_buff *skb; struct fec_enet_priv_tx_q *txq; struct netdev_queue *nq; int index = 0; - int i, bdnum; int entries_free; fep = netdev_priv(ndev); @@ -1216,29 +1215,18 @@ fec_enet_tx_queue(struct net_device *ndev, u16 queue_id) if (bdp == txq->cur_tx) break; - bdp_t = bdp; - bdnum = 1; - index = fec_enet_get_bd_index(txq->tx_bd_base, bdp_t, fep); - skb = txq->tx_skbuff[index]; - while (!skb) { - bdp_t = fec_enet_get_nextdesc(bdp_t, fep, queue_id); - index = fec_enet_get_bd_index(txq->tx_bd_base, bdp_t, fep); - skb = txq->tx_skbuff[index]; - bdnum++; - } - if (skb_shinfo(skb)->nr_frags && - (status = bdp_t->cbd_sc) & BD_ENET_TX_READY) - break; + index = fec_enet_get_bd_index(txq->tx_bd_base, bdp, fep); - for (i = 0; i < bdnum; i++) { - if (!IS_TSO_HEADER(txq, bdp->cbd_bufaddr)) - dma_unmap_single(&fep->pdev->dev, bdp->cbd_bufaddr, - bdp->cbd_datlen, DMA_TO_DEVICE); - bdp->cbd_bufaddr = 0; - if (i < bdnum - 1) - bdp = fec_enet_get_nextdesc(bdp, fep, queue_id); - } + skb = txq->tx_skbuff[index]; txq->tx_skbuff[index] = NULL; + if (!IS_TSO_HEADER(txq, bdp->cbd_bufaddr)) + dma_unmap_single(&fep->pdev->dev, bdp->cbd_bufaddr, + bdp->cbd_datlen, DMA_TO_DEVICE); + bdp->cbd_bufaddr = 0; + if (!skb) { + bdp = fec_enet_get_nextdesc(bdp, fep, queue_id); + continue; + } /* Check for errors. */ if (status & (BD_ENET_TX_HB | BD_ENET_TX_LC | @@ -1479,8 +1467,7 @@ fec_enet_rx_queue(struct net_device *ndev, int budget, u16 queue_id) vlan_packet_rcvd = true; - skb_copy_to_linear_data_offset(skb, VLAN_HLEN, - data, (2 * ETH_ALEN)); + memmove(skb->data + VLAN_HLEN, data, ETH_ALEN * 2); skb_pull(skb, VLAN_HLEN); } @@ -1597,7 +1584,7 @@ fec_enet_interrupt(int irq, void *dev_id) writel(int_events, fep->hwp + FEC_IEVENT); fec_enet_collect_events(fep, int_events); - if (fep->work_tx || fep->work_rx) { + if ((fep->work_tx || fep->work_rx) && fep->link) { ret = IRQ_HANDLED; if (napi_schedule_prep(&fep->napi)) { @@ -1967,6 +1954,7 @@ static int fec_enet_mii_init(struct platform_device *pdev) struct fec_enet_private *fep = netdev_priv(ndev); struct device_node *node; int err = -ENXIO, i; + u32 mii_speed, holdtime; /* * The i.MX28 dual fec interfaces are not equal. @@ -2004,10 +1992,33 @@ static int fec_enet_mii_init(struct platform_device *pdev) * Reference Manual has an error on this, and gets fixed on i.MX6Q * document. */ - fep->phy_speed = DIV_ROUND_UP(clk_get_rate(fep->clk_ipg), 5000000); + mii_speed = DIV_ROUND_UP(clk_get_rate(fep->clk_ipg), 5000000); if (fep->quirks & FEC_QUIRK_ENET_MAC) - fep->phy_speed--; - fep->phy_speed <<= 1; + mii_speed--; + if (mii_speed > 63) { + dev_err(&pdev->dev, + "fec clock (%lu) to fast to get right mii speed\n", + clk_get_rate(fep->clk_ipg)); + err = -EINVAL; + goto err_out; + } + + /* + * The i.MX28 and i.MX6 types have another filed in the MSCR (aka + * MII_SPEED) register that defines the MDIO output hold time. Earlier + * versions are RAZ there, so just ignore the difference and write the + * register always. + * The minimal hold time according to IEE802.3 (clause 22) is 10 ns. + * HOLDTIME + 1 is the number of clk cycles the fec is holding the + * output. + * The HOLDTIME bitfield takes values between 0 and 7 (inclusive). + * Given that ceil(clkrate / 5000000) <= 64, the calculation for + * holdtime cannot result in a value greater than 3. + */ + holdtime = DIV_ROUND_UP(clk_get_rate(fep->clk_ipg), 100000000) - 1; + + fep->phy_speed = mii_speed << 1 | holdtime << 8; + writel(fep->phy_speed, fep->hwp + FEC_MII_SPEED); fep->mii_bus = mdiobus_alloc(); @@ -3383,7 +3394,6 @@ fec_drv_remove(struct platform_device *pdev) regulator_disable(fep->reg_phy); if (fep->ptp_clock) ptp_clock_unregister(fep->ptp_clock); - fec_enet_clk_enable(ndev, false); of_node_put(fep->phy_node); free_netdev(ndev); diff --git a/drivers/net/ethernet/freescale/gianfar.c b/drivers/net/ethernet/freescale/gianfar.c index 43df78882e48..7bf3682cdf47 100644 --- a/drivers/net/ethernet/freescale/gianfar.c +++ b/drivers/net/ethernet/freescale/gianfar.c @@ -747,6 +747,18 @@ static int gfar_parse_group(struct device_node *np, return 0; } +static int gfar_of_group_count(struct device_node *np) +{ + struct device_node *child; + int num = 0; + + for_each_available_child_of_node(np, child) + if (!of_node_cmp(child->name, "queue-group")) + num++; + + return num; +} + static int gfar_of_init(struct platform_device *ofdev, struct net_device **pdev) { const char *model; @@ -784,7 +796,7 @@ static int gfar_of_init(struct platform_device *ofdev, struct net_device **pdev) num_rx_qs = 1; } else { /* MQ_MG_MODE */ /* get the actual number of supported groups */ - unsigned int num_grps = of_get_available_child_count(np); + unsigned int num_grps = gfar_of_group_count(np); if (num_grps == 0 || num_grps > MAXGROUPS) { dev_err(&ofdev->dev, "Invalid # of int groups(%d)\n", @@ -851,7 +863,10 @@ static int gfar_of_init(struct platform_device *ofdev, struct net_device **pdev) /* Parse and initialize group specific information */ if (priv->mode == MQ_MG_MODE) { - for_each_child_of_node(np, child) { + for_each_available_child_of_node(np, child) { + if (of_node_cmp(child->name, "queue-group")) + continue; + err = gfar_parse_group(child, priv, model); if (err) goto err_grp_init; @@ -3162,8 +3177,8 @@ static void adjust_link(struct net_device *dev) struct phy_device *phydev = priv->phydev; if (unlikely(phydev->link != priv->oldlink || - phydev->duplex != priv->oldduplex || - phydev->speed != priv->oldspeed)) + (phydev->link && (phydev->duplex != priv->oldduplex || + phydev->speed != priv->oldspeed)))) gfar_update_link_state(priv); } diff --git a/drivers/net/ethernet/freescale/ucc_geth.c b/drivers/net/ethernet/freescale/ucc_geth.c index 357e8b576905..56b774d3a13d 100644 --- a/drivers/net/ethernet/freescale/ucc_geth.c +++ b/drivers/net/ethernet/freescale/ucc_geth.c @@ -3893,6 +3893,9 @@ static int ucc_geth_probe(struct platform_device* ofdev) ugeth->phy_interface = phy_interface; ugeth->max_speed = max_speed; + /* Carrier starts down, phylib will bring it up */ + netif_carrier_off(dev); + err = register_netdev(dev); if (err) { if (netif_msg_probe(ugeth)) diff --git a/drivers/net/ethernet/ibm/ehea/ehea_main.c b/drivers/net/ethernet/ibm/ehea/ehea_main.c index e8a1adb7a962..c05e50759621 100644 --- a/drivers/net/ethernet/ibm/ehea/ehea_main.c +++ b/drivers/net/ethernet/ibm/ehea/ehea_main.c @@ -3262,6 +3262,139 @@ static void ehea_remove_device_sysfs(struct platform_device *dev) device_remove_file(&dev->dev, &dev_attr_remove_port); } +static int ehea_reboot_notifier(struct notifier_block *nb, + unsigned long action, void *unused) +{ + if (action == SYS_RESTART) { + pr_info("Reboot: freeing all eHEA resources\n"); + ibmebus_unregister_driver(&ehea_driver); + } + return NOTIFY_DONE; +} + +static struct notifier_block ehea_reboot_nb = { + .notifier_call = ehea_reboot_notifier, +}; + +static int ehea_mem_notifier(struct notifier_block *nb, + unsigned long action, void *data) +{ + int ret = NOTIFY_BAD; + struct memory_notify *arg = data; + + mutex_lock(&dlpar_mem_lock); + + switch (action) { + case MEM_CANCEL_OFFLINE: + pr_info("memory offlining canceled"); + /* Fall through: re-add canceled memory block */ + + case MEM_ONLINE: + pr_info("memory is going online"); + set_bit(__EHEA_STOP_XFER, &ehea_driver_flags); + if (ehea_add_sect_bmap(arg->start_pfn, arg->nr_pages)) + goto out_unlock; + ehea_rereg_mrs(); + break; + + case MEM_GOING_OFFLINE: + pr_info("memory is going offline"); + set_bit(__EHEA_STOP_XFER, &ehea_driver_flags); + if (ehea_rem_sect_bmap(arg->start_pfn, arg->nr_pages)) + goto out_unlock; + ehea_rereg_mrs(); + break; + + default: + break; + } + + ehea_update_firmware_handles(); + ret = NOTIFY_OK; + +out_unlock: + mutex_unlock(&dlpar_mem_lock); + return ret; +} + +static struct notifier_block ehea_mem_nb = { + .notifier_call = ehea_mem_notifier, +}; + +static void ehea_crash_handler(void) +{ + int i; + + if (ehea_fw_handles.arr) + for (i = 0; i < ehea_fw_handles.num_entries; i++) + ehea_h_free_resource(ehea_fw_handles.arr[i].adh, + ehea_fw_handles.arr[i].fwh, + FORCE_FREE); + + if (ehea_bcmc_regs.arr) + for (i = 0; i < ehea_bcmc_regs.num_entries; i++) + ehea_h_reg_dereg_bcmc(ehea_bcmc_regs.arr[i].adh, + ehea_bcmc_regs.arr[i].port_id, + ehea_bcmc_regs.arr[i].reg_type, + ehea_bcmc_regs.arr[i].macaddr, + 0, H_DEREG_BCMC); +} + +static atomic_t ehea_memory_hooks_registered; + +/* Register memory hooks on probe of first adapter */ +static int ehea_register_memory_hooks(void) +{ + int ret = 0; + + if (atomic_inc_and_test(&ehea_memory_hooks_registered)) + return 0; + + ret = ehea_create_busmap(); + if (ret) { + pr_info("ehea_create_busmap failed\n"); + goto out; + } + + ret = register_reboot_notifier(&ehea_reboot_nb); + if (ret) { + pr_info("register_reboot_notifier failed\n"); + goto out; + } + + ret = register_memory_notifier(&ehea_mem_nb); + if (ret) { + pr_info("register_memory_notifier failed\n"); + goto out2; + } + + ret = crash_shutdown_register(ehea_crash_handler); + if (ret) { + pr_info("crash_shutdown_register failed\n"); + goto out3; + } + + return 0; + +out3: + unregister_memory_notifier(&ehea_mem_nb); +out2: + unregister_reboot_notifier(&ehea_reboot_nb); +out: + return ret; +} + +static void ehea_unregister_memory_hooks(void) +{ + if (atomic_read(&ehea_memory_hooks_registered)) + return; + + unregister_reboot_notifier(&ehea_reboot_nb); + if (crash_shutdown_unregister(ehea_crash_handler)) + pr_info("failed unregistering crash handler\n"); + unregister_memory_notifier(&ehea_mem_nb); +} + static int ehea_probe_adapter(struct platform_device *dev) { struct ehea_adapter *adapter; @@ -3269,6 +3402,10 @@ static int ehea_probe_adapter(struct platform_device *dev) int ret; int i; + ret = ehea_register_memory_hooks(); + if (ret) + return ret; + if (!dev || !dev->dev.of_node) { pr_err("Invalid ibmebus device probed\n"); return -EINVAL; @@ -3392,81 +3529,6 @@ static int ehea_remove(struct platform_device *dev) return 0; } -static void ehea_crash_handler(void) -{ - int i; - - if (ehea_fw_handles.arr) - for (i = 0; i < ehea_fw_handles.num_entries; i++) - ehea_h_free_resource(ehea_fw_handles.arr[i].adh, - ehea_fw_handles.arr[i].fwh, - FORCE_FREE); - - if (ehea_bcmc_regs.arr) - for (i = 0; i < ehea_bcmc_regs.num_entries; i++) - ehea_h_reg_dereg_bcmc(ehea_bcmc_regs.arr[i].adh, - ehea_bcmc_regs.arr[i].port_id, - ehea_bcmc_regs.arr[i].reg_type, - ehea_bcmc_regs.arr[i].macaddr, - 0, H_DEREG_BCMC); -} - -static int ehea_mem_notifier(struct notifier_block *nb, - unsigned long action, void *data) -{ - int ret = NOTIFY_BAD; - struct memory_notify *arg = data; - - mutex_lock(&dlpar_mem_lock); - - switch (action) { - case MEM_CANCEL_OFFLINE: - pr_info("memory offlining canceled"); - /* Readd canceled memory block */ - case MEM_ONLINE: - pr_info("memory is going online"); - set_bit(__EHEA_STOP_XFER, &ehea_driver_flags); - if (ehea_add_sect_bmap(arg->start_pfn, arg->nr_pages)) - goto out_unlock; - ehea_rereg_mrs(); - break; - case MEM_GOING_OFFLINE: - pr_info("memory is going offline"); - set_bit(__EHEA_STOP_XFER, &ehea_driver_flags); - if (ehea_rem_sect_bmap(arg->start_pfn, arg->nr_pages)) - goto out_unlock; - ehea_rereg_mrs(); - break; - default: - break; - } - - ehea_update_firmware_handles(); - ret = NOTIFY_OK; - -out_unlock: - mutex_unlock(&dlpar_mem_lock); - return ret; -} - -static struct notifier_block ehea_mem_nb = { - .notifier_call = ehea_mem_notifier, -}; - -static int ehea_reboot_notifier(struct notifier_block *nb, - unsigned long action, void *unused) -{ - if (action == SYS_RESTART) { - pr_info("Reboot: freeing all eHEA resources\n"); - ibmebus_unregister_driver(&ehea_driver); - } - return NOTIFY_DONE; -} - -static struct notifier_block ehea_reboot_nb = { - .notifier_call = ehea_reboot_notifier, -}; - static int check_module_parm(void) { int ret = 0; @@ -3520,26 +3582,10 @@ static int __init ehea_module_init(void) if (ret) goto out; - ret = ehea_create_busmap(); - if (ret) - goto out; - - ret = register_reboot_notifier(&ehea_reboot_nb); - if (ret) - pr_info("failed registering reboot notifier\n"); - - ret = register_memory_notifier(&ehea_mem_nb); - if (ret) - pr_info("failed registering memory remove notifier\n"); - - ret = crash_shutdown_register(ehea_crash_handler); - if (ret) - pr_info("failed registering crash handler\n"); - ret = ibmebus_register_driver(&ehea_driver); if (ret) { pr_err("failed registering eHEA device driver on ebus\n"); - goto out2; + goto out; } ret = driver_create_file(&ehea_driver.driver, @@ -3547,32 +3593,22 @@ static int __init ehea_module_init(void) if (ret) { pr_err("failed to register capabilities attribute, ret=%d\n", ret); - goto out3; + goto out2; } return ret; -out3: - ibmebus_unregister_driver(&ehea_driver); out2: - unregister_memory_notifier(&ehea_mem_nb); - unregister_reboot_notifier(&ehea_reboot_nb); - crash_shutdown_unregister(ehea_crash_handler); + ibmebus_unregister_driver(&ehea_driver); out: return ret; } static void __exit ehea_module_exit(void) { - int ret; - driver_remove_file(&ehea_driver.driver, &driver_attr_capabilities); ibmebus_unregister_driver(&ehea_driver); - unregister_reboot_notifier(&ehea_reboot_nb); - ret = crash_shutdown_unregister(ehea_crash_handler); - if (ret) - pr_info("failed unregistering crash handler\n"); - unregister_memory_notifier(&ehea_mem_nb); + ehea_unregister_memory_hooks(); kfree(ehea_fw_handles.arr); kfree(ehea_bcmc_regs.arr); ehea_destroy_busmap(); diff --git a/drivers/net/ethernet/ibm/ibmveth.c b/drivers/net/ethernet/ibm/ibmveth.c index 21978cc019e7..cd7675ac5bf9 100644 --- a/drivers/net/ethernet/ibm/ibmveth.c +++ b/drivers/net/ethernet/ibm/ibmveth.c @@ -1136,6 +1136,8 @@ restart_poll: ibmveth_replenish_task(adapter); if (frames_processed < budget) { + napi_complete(napi); + /* We think we are done - reenable interrupts, * then check once more to make sure we are done. */ @@ -1144,8 +1146,6 @@ restart_poll: BUG_ON(lpar_rc != H_SUCCESS); - napi_complete(napi); - if (ibmveth_rxq_pending_buffer(adapter) && napi_reschedule(napi)) { lpar_rc = h_vio_signal(adapter->vdev->unit_address, @@ -1327,6 +1327,28 @@ static unsigned long ibmveth_get_desired_dma(struct vio_dev *vdev) return ret; } +static int ibmveth_set_mac_addr(struct net_device *dev, void *p) +{ + struct ibmveth_adapter *adapter = netdev_priv(dev); + struct sockaddr *addr = p; + u64 mac_address; + int rc; + + if (!is_valid_ether_addr(addr->sa_data)) + return -EADDRNOTAVAIL; + + mac_address = ibmveth_encode_mac_addr(addr->sa_data); + rc = h_change_logical_lan_mac(adapter->vdev->unit_address, mac_address); + if (rc) { + netdev_err(adapter->netdev, "h_change_logical_lan_mac failed with rc=%d\n", rc); + return rc; + } + + ether_addr_copy(dev->dev_addr, addr->sa_data); + + return 0; +} + static const struct net_device_ops ibmveth_netdev_ops = { .ndo_open = ibmveth_open, .ndo_stop = ibmveth_close, @@ -1337,7 +1359,7 @@ static const struct net_device_ops ibmveth_netdev_ops = { .ndo_fix_features = ibmveth_fix_features, .ndo_set_features = ibmveth_set_features, .ndo_validate_addr = eth_validate_addr, - .ndo_set_mac_address = eth_mac_addr, + .ndo_set_mac_address = ibmveth_set_mac_addr, #ifdef CONFIG_NET_POLL_CONTROLLER .ndo_poll_controller = ibmveth_poll_controller, #endif diff --git a/drivers/net/ethernet/intel/i40e/i40e_common.c b/drivers/net/ethernet/intel/i40e/i40e_common.c index 11a9ffebf8d8..6aea65dae5ed 100644 --- a/drivers/net/ethernet/intel/i40e/i40e_common.c +++ b/drivers/net/ethernet/intel/i40e/i40e_common.c @@ -868,8 +868,9 @@ i40e_status i40e_pf_reset(struct i40e_hw *hw) * The grst delay value is in 100ms units, and we'll wait a * couple counts longer to be sure we don't just miss the end. */ - grst_del = rd32(hw, I40E_GLGEN_RSTCTL) & I40E_GLGEN_RSTCTL_GRSTDEL_MASK - >> I40E_GLGEN_RSTCTL_GRSTDEL_SHIFT; + grst_del = (rd32(hw, I40E_GLGEN_RSTCTL) & + I40E_GLGEN_RSTCTL_GRSTDEL_MASK) >> + I40E_GLGEN_RSTCTL_GRSTDEL_SHIFT; for (cnt = 0; cnt < grst_del + 2; cnt++) { reg = rd32(hw, I40E_GLGEN_RSTAT); if (!(reg & I40E_GLGEN_RSTAT_DEVSTATE_MASK)) @@ -2846,7 +2847,7 @@ i40e_status i40e_aq_add_udp_tunnel(struct i40e_hw *hw, status = i40e_asq_send_command(hw, &desc, NULL, 0, cmd_details); - if (!status) + if (!status && filter_index) *filter_index = resp->index; return status; diff --git a/drivers/net/ethernet/intel/i40e/i40e_dcb_nl.c b/drivers/net/ethernet/intel/i40e/i40e_dcb_nl.c index 183dcb63ce98..a11c70ca5a28 100644 --- a/drivers/net/ethernet/intel/i40e/i40e_dcb_nl.c +++ b/drivers/net/ethernet/intel/i40e/i40e_dcb_nl.c @@ -40,7 +40,7 @@ static void i40e_get_pfc_delay(struct i40e_hw *hw, u16 *delay) u32 val; val = rd32(hw, I40E_PRTDCB_GENC); - *delay = (u16)(val & I40E_PRTDCB_GENC_PFCLDA_MASK >> + *delay = (u16)((val & I40E_PRTDCB_GENC_PFCLDA_MASK) >> I40E_PRTDCB_GENC_PFCLDA_SHIFT); } diff --git a/drivers/net/ethernet/intel/i40e/i40e_debugfs.c b/drivers/net/ethernet/intel/i40e/i40e_debugfs.c index 61236f983971..c17ee77100d3 100644 --- a/drivers/net/ethernet/intel/i40e/i40e_debugfs.c +++ b/drivers/net/ethernet/intel/i40e/i40e_debugfs.c @@ -989,8 +989,10 @@ static ssize_t i40e_dbg_command_write(struct file *filp, if (!cmd_buf) return count; bytes_not_copied = copy_from_user(cmd_buf, buffer, count); - if (bytes_not_copied < 0) + if (bytes_not_copied < 0) { + kfree(cmd_buf); return bytes_not_copied; + } if (bytes_not_copied > 0) count -= bytes_not_copied; cmd_buf[count] = '\0'; diff --git a/drivers/net/ethernet/intel/i40e/i40e_main.c b/drivers/net/ethernet/intel/i40e/i40e_main.c index cbe281be1c9f..dadda3c5d658 100644 --- a/drivers/net/ethernet/intel/i40e/i40e_main.c +++ b/drivers/net/ethernet/intel/i40e/i40e_main.c @@ -1512,7 +1512,12 @@ static void i40e_vsi_setup_queue_map(struct i40e_vsi *vsi, vsi->tc_config.numtc = numtc; vsi->tc_config.enabled_tc = enabled_tc ? enabled_tc : 1; /* Number of queues per enabled TC */ - num_tc_qps = vsi->alloc_queue_pairs/numtc; + /* In MFP case we can have a much lower count of MSIx + * vectors available and so we need to lower the used + * q count. + */ + qcount = min_t(int, vsi->alloc_queue_pairs, pf->num_lan_msix); + num_tc_qps = qcount / numtc; num_tc_qps = min_t(int, num_tc_qps, I40E_MAX_QUEUES_PER_TC); /* Setup queue offset/count for all TCs for given VSI */ @@ -2684,8 +2689,15 @@ static void i40e_vsi_config_dcb_rings(struct i40e_vsi *vsi) u16 qoffset, qcount; int i, n; - if (!(vsi->back->flags & I40E_FLAG_DCB_ENABLED)) - return; + if (!(vsi->back->flags & I40E_FLAG_DCB_ENABLED)) { + /* Reset the TC information */ + for (i = 0; i < vsi->num_queue_pairs; i++) { + rx_ring = vsi->rx_rings[i]; + tx_ring = vsi->tx_rings[i]; + rx_ring->dcb_tc = 0; + tx_ring->dcb_tc = 0; + } + } for (n = 0; n < I40E_MAX_TRAFFIC_CLASS; n++) { if (!(vsi->tc_config.enabled_tc & (1 << n))) @@ -3830,6 +3842,12 @@ static void i40e_clear_interrupt_scheme(struct i40e_pf *pf) { int i; + i40e_stop_misc_vector(pf); + if (pf->flags & I40E_FLAG_MSIX_ENABLED) { + synchronize_irq(pf->msix_entries[0].vector); + free_irq(pf->msix_entries[0].vector, pf); + } + i40e_put_lump(pf->irq_pile, 0, I40E_PILE_VALID_BIT-1); for (i = 0; i < pf->num_alloc_vsi; i++) if (pf->vsi[i]) @@ -5254,8 +5272,14 @@ static int i40e_handle_lldp_event(struct i40e_pf *pf, /* Wait for the PF's Tx queues to be disabled */ ret = i40e_pf_wait_txq_disabled(pf); - if (!ret) + if (ret) { + /* Schedule PF reset to recover */ + set_bit(__I40E_PF_RESET_REQUESTED, &pf->state); + i40e_service_event_schedule(pf); + } else { i40e_pf_unquiesce_all_vsi(pf); + } + exit: return ret; } @@ -5587,7 +5611,8 @@ static void i40e_check_hang_subtask(struct i40e_pf *pf) int i, v; /* If we're down or resetting, just bail */ - if (test_bit(__I40E_CONFIG_BUSY, &pf->state)) + if (test_bit(__I40E_DOWN, &pf->state) || + test_bit(__I40E_CONFIG_BUSY, &pf->state)) return; /* for each VSI/netdev @@ -9533,6 +9558,7 @@ static void i40e_remove(struct pci_dev *pdev) set_bit(__I40E_DOWN, &pf->state); del_timer_sync(&pf->service_timer); cancel_work_sync(&pf->service_task); + i40e_fdir_teardown(pf); if (pf->flags & I40E_FLAG_SRIOV_ENABLED) { i40e_free_vfs(pf); @@ -9559,12 +9585,6 @@ static void i40e_remove(struct pci_dev *pdev) if (pf->vsi[pf->lan_vsi]) i40e_vsi_release(pf->vsi[pf->lan_vsi]); - i40e_stop_misc_vector(pf); - if (pf->flags & I40E_FLAG_MSIX_ENABLED) { - synchronize_irq(pf->msix_entries[0].vector); - free_irq(pf->msix_entries[0].vector, pf); - } - /* shutdown and destroy the HMC */ if (pf->hw.hmc.hmc_obj) { ret_code = i40e_shutdown_lan_hmc(&pf->hw); @@ -9718,6 +9738,8 @@ static void i40e_shutdown(struct pci_dev *pdev) wr32(hw, I40E_PFPM_APM, (pf->wol_en ? I40E_PFPM_APM_APME_MASK : 0)); wr32(hw, I40E_PFPM_WUFC, (pf->wol_en ? I40E_PFPM_WUFC_MAG_MASK : 0)); + i40e_clear_interrupt_scheme(pf); + if (system_state == SYSTEM_POWER_OFF) { pci_wake_from_d3(pdev, pf->wol_en); pci_set_power_state(pdev, PCI_D3hot); diff --git a/drivers/net/ethernet/intel/i40e/i40e_nvm.c b/drivers/net/ethernet/intel/i40e/i40e_nvm.c index 3e70f2e45a47..5defe0d63514 100644 --- a/drivers/net/ethernet/intel/i40e/i40e_nvm.c +++ b/drivers/net/ethernet/intel/i40e/i40e_nvm.c @@ -679,9 +679,11 @@ static i40e_status i40e_nvmupd_state_writing(struct i40e_hw *hw, { i40e_status status; enum i40e_nvmupd_cmd upd_cmd; + bool retry_attempt = false; upd_cmd = i40e_nvmupd_validate_command(hw, cmd, errno); +retry: switch (upd_cmd) { case I40E_NVMUPD_WRITE_CON: status = i40e_nvmupd_nvm_write(hw, cmd, bytes, errno); @@ -725,6 +727,39 @@ static i40e_status i40e_nvmupd_state_writing(struct i40e_hw *hw, *errno = -ESRCH; break; } + + /* In some circumstances, a multi-write transaction takes longer + * than the default 3 minute timeout on the write semaphore. If + * the write failed with an EBUSY status, this is likely the problem, + * so here we try to reacquire the semaphore then retry the write. + * We only do one retry, then give up. + */ + if (status && (hw->aq.asq_last_status == I40E_AQ_RC_EBUSY) && + !retry_attempt) { + i40e_status old_status = status; + u32 old_asq_status = hw->aq.asq_last_status; + u32 gtime; + + gtime = rd32(hw, I40E_GLVFGEN_TIMER); + if (gtime >= hw->nvm.hw_semaphore_timeout) { + i40e_debug(hw, I40E_DEBUG_ALL, + "NVMUPD: write semaphore expired (%d >= %lld), retrying\n", + gtime, hw->nvm.hw_semaphore_timeout); + i40e_release_nvm(hw); + status = i40e_acquire_nvm(hw, I40E_RESOURCE_WRITE); + if (status) { + i40e_debug(hw, I40E_DEBUG_ALL, + "NVMUPD: write semaphore reacquire failed aq_err = %d\n", + hw->aq.asq_last_status); + status = old_status; + hw->aq.asq_last_status = old_asq_status; + } else { + retry_attempt = true; + goto retry; + } + } + } + return status; } diff --git a/drivers/net/ethernet/intel/i40e/i40e_txrx.c b/drivers/net/ethernet/intel/i40e/i40e_txrx.c index 2206d2d36f0f..bbf1b1247ac4 100644 --- a/drivers/net/ethernet/intel/i40e/i40e_txrx.c +++ b/drivers/net/ethernet/intel/i40e/i40e_txrx.c @@ -586,6 +586,20 @@ void i40e_free_tx_resources(struct i40e_ring *tx_ring) } /** + * i40e_get_head - Retrieve head from head writeback + * @tx_ring: tx ring to fetch head of + * + * Returns value of Tx ring head based on value stored + * in head write-back location + **/ +static inline u32 i40e_get_head(struct i40e_ring *tx_ring) +{ + void *head = (struct i40e_tx_desc *)tx_ring->desc + tx_ring->count; + + return le32_to_cpu(*(volatile __le32 *)head); +} + +/** * i40e_get_tx_pending - how many tx descriptors not processed * @tx_ring: the ring of descriptors * @@ -594,10 +608,16 @@ void i40e_free_tx_resources(struct i40e_ring *tx_ring) **/ static u32 i40e_get_tx_pending(struct i40e_ring *ring) { - u32 ntu = ((ring->next_to_clean <= ring->next_to_use) - ? ring->next_to_use - : ring->next_to_use + ring->count); - return ntu - ring->next_to_clean; + u32 head, tail; + + head = i40e_get_head(ring); + tail = readl(ring->tail); + + if (head != tail) + return (head < tail) ? + tail - head : (tail + ring->count - head); + + return 0; } /** @@ -606,6 +626,8 @@ static u32 i40e_get_tx_pending(struct i40e_ring *ring) **/ static bool i40e_check_tx_hang(struct i40e_ring *tx_ring) { + u32 tx_done = tx_ring->stats.packets; + u32 tx_done_old = tx_ring->tx_stats.tx_done_old; u32 tx_pending = i40e_get_tx_pending(tx_ring); struct i40e_pf *pf = tx_ring->vsi->back; bool ret = false; @@ -623,41 +645,25 @@ static bool i40e_check_tx_hang(struct i40e_ring *tx_ring) * run the check_tx_hang logic with a transmit completion * pending but without time to complete it yet. */ - if ((tx_ring->tx_stats.tx_done_old == tx_ring->stats.packets) && - (tx_pending >= I40E_MIN_DESC_PENDING)) { + if ((tx_done_old == tx_done) && tx_pending) { /* make sure it is true for two checks in a row */ ret = test_and_set_bit(__I40E_HANG_CHECK_ARMED, &tx_ring->state); - } else if ((tx_ring->tx_stats.tx_done_old == tx_ring->stats.packets) && - (tx_pending < I40E_MIN_DESC_PENDING) && - (tx_pending > 0)) { + } else if (tx_done_old == tx_done && + (tx_pending < I40E_MIN_DESC_PENDING) && (tx_pending > 0)) { if (I40E_DEBUG_FLOW & pf->hw.debug_mask) dev_info(tx_ring->dev, "HW needs some more descs to do a cacheline flush. tx_pending %d, queue %d", tx_pending, tx_ring->queue_index); pf->tx_sluggish_count++; } else { /* update completed stats and disarm the hang check */ - tx_ring->tx_stats.tx_done_old = tx_ring->stats.packets; + tx_ring->tx_stats.tx_done_old = tx_done; clear_bit(__I40E_HANG_CHECK_ARMED, &tx_ring->state); } return ret; } -/** - * i40e_get_head - Retrieve head from head writeback - * @tx_ring: tx ring to fetch head of - * - * Returns value of Tx ring head based on value stored - * in head write-back location - **/ -static inline u32 i40e_get_head(struct i40e_ring *tx_ring) -{ - void *head = (struct i40e_tx_desc *)tx_ring->desc + tx_ring->count; - - return le32_to_cpu(*(volatile __le32 *)head); -} - #define WB_STRIDE 0x3 /** @@ -2140,6 +2146,67 @@ static int i40e_maybe_stop_tx(struct i40e_ring *tx_ring, int size) } /** + * i40e_chk_linearize - Check if there are more than 8 fragments per packet + * @skb: send buffer + * @tx_flags: collected send information + * @hdr_len: size of the packet header + * + * Note: Our HW can't scatter-gather more than 8 fragments to build + * a packet on the wire and so we need to figure out the cases where we + * need to linearize the skb. + **/ +static bool i40e_chk_linearize(struct sk_buff *skb, u32 tx_flags, + const u8 hdr_len) +{ + struct skb_frag_struct *frag; + bool linearize = false; + unsigned int size = 0; + u16 num_frags; + u16 gso_segs; + + num_frags = skb_shinfo(skb)->nr_frags; + gso_segs = skb_shinfo(skb)->gso_segs; + + if (tx_flags & (I40E_TX_FLAGS_TSO | I40E_TX_FLAGS_FSO)) { + u16 j = 1; + + if (num_frags < (I40E_MAX_BUFFER_TXD)) + goto linearize_chk_done; + /* try the simple math, if we have too many frags per segment */ + if (DIV_ROUND_UP((num_frags + gso_segs), gso_segs) > + I40E_MAX_BUFFER_TXD) { + linearize = true; + goto linearize_chk_done; + } + frag = &skb_shinfo(skb)->frags[0]; + size = hdr_len; + /* we might still have more fragments per segment */ + do { + size += skb_frag_size(frag); + frag++; j++; + if (j == I40E_MAX_BUFFER_TXD) { + if (size < skb_shinfo(skb)->gso_size) { + linearize = true; + break; + } + j = 1; + size -= skb_shinfo(skb)->gso_size; + if (size) + j++; + size += hdr_len; + } + num_frags--; + } while (num_frags); + } else { + if (num_frags >= I40E_MAX_BUFFER_TXD) + linearize = true; + } + +linearize_chk_done: + return linearize; +} + +/** * i40e_tx_map - Build the Tx descriptor * @tx_ring: ring to send buffer on * @skb: send buffer @@ -2396,6 +2463,10 @@ static netdev_tx_t i40e_xmit_frame_ring(struct sk_buff *skb, if (tsyn) tx_flags |= I40E_TX_FLAGS_TSYN; + if (i40e_chk_linearize(skb, tx_flags, hdr_len)) + if (skb_linearize(skb)) + goto out_drop; + skb_tx_timestamp(skb); /* always enable CRC insertion offload */ diff --git a/drivers/net/ethernet/intel/i40e/i40e_txrx.h b/drivers/net/ethernet/intel/i40e/i40e_txrx.h index 18b00231d2f1..dff0baeb1ecc 100644 --- a/drivers/net/ethernet/intel/i40e/i40e_txrx.h +++ b/drivers/net/ethernet/intel/i40e/i40e_txrx.h @@ -112,6 +112,7 @@ enum i40e_dyn_idx_t { #define i40e_rx_desc i40e_32byte_rx_desc +#define I40E_MAX_BUFFER_TXD 8 #define I40E_MIN_TX_LEN 17 #define I40E_MAX_DATA_PER_TXD 8192 diff --git a/drivers/net/ethernet/intel/i40evf/i40e_txrx.c b/drivers/net/ethernet/intel/i40evf/i40e_txrx.c index 29004382f462..708891571dae 100644 --- a/drivers/net/ethernet/intel/i40evf/i40e_txrx.c +++ b/drivers/net/ethernet/intel/i40evf/i40e_txrx.c @@ -126,6 +126,20 @@ void i40evf_free_tx_resources(struct i40e_ring *tx_ring) } /** + * i40e_get_head - Retrieve head from head writeback + * @tx_ring: tx ring to fetch head of + * + * Returns value of Tx ring head based on value stored + * in head write-back location + **/ +static inline u32 i40e_get_head(struct i40e_ring *tx_ring) +{ + void *head = (struct i40e_tx_desc *)tx_ring->desc + tx_ring->count; + + return le32_to_cpu(*(volatile __le32 *)head); +} + +/** * i40e_get_tx_pending - how many tx descriptors not processed * @tx_ring: the ring of descriptors * @@ -134,10 +148,16 @@ void i40evf_free_tx_resources(struct i40e_ring *tx_ring) **/ static u32 i40e_get_tx_pending(struct i40e_ring *ring) { - u32 ntu = ((ring->next_to_clean <= ring->next_to_use) - ? ring->next_to_use - : ring->next_to_use + ring->count); - return ntu - ring->next_to_clean; + u32 head, tail; + + head = i40e_get_head(ring); + tail = readl(ring->tail); + + if (head != tail) + return (head < tail) ? + tail - head : (tail + ring->count - head); + + return 0; } /** @@ -146,6 +166,8 @@ static u32 i40e_get_tx_pending(struct i40e_ring *ring) **/ static bool i40e_check_tx_hang(struct i40e_ring *tx_ring) { + u32 tx_done = tx_ring->stats.packets; + u32 tx_done_old = tx_ring->tx_stats.tx_done_old; u32 tx_pending = i40e_get_tx_pending(tx_ring); bool ret = false; @@ -162,36 +184,20 @@ static bool i40e_check_tx_hang(struct i40e_ring *tx_ring) * run the check_tx_hang logic with a transmit completion * pending but without time to complete it yet. */ - if ((tx_ring->tx_stats.tx_done_old == tx_ring->stats.packets) && - (tx_pending >= I40E_MIN_DESC_PENDING)) { + if ((tx_done_old == tx_done) && tx_pending) { /* make sure it is true for two checks in a row */ ret = test_and_set_bit(__I40E_HANG_CHECK_ARMED, &tx_ring->state); - } else if (!(tx_ring->tx_stats.tx_done_old == tx_ring->stats.packets) || - !(tx_pending < I40E_MIN_DESC_PENDING) || - !(tx_pending > 0)) { + } else if (tx_done_old == tx_done && + (tx_pending < I40E_MIN_DESC_PENDING) && (tx_pending > 0)) { /* update completed stats and disarm the hang check */ - tx_ring->tx_stats.tx_done_old = tx_ring->stats.packets; + tx_ring->tx_stats.tx_done_old = tx_done; clear_bit(__I40E_HANG_CHECK_ARMED, &tx_ring->state); } return ret; } -/** - * i40e_get_head - Retrieve head from head writeback - * @tx_ring: tx ring to fetch head of - * - * Returns value of Tx ring head based on value stored - * in head write-back location - **/ -static inline u32 i40e_get_head(struct i40e_ring *tx_ring) -{ - void *head = (struct i40e_tx_desc *)tx_ring->desc + tx_ring->count; - - return le32_to_cpu(*(volatile __le32 *)head); -} - #define WB_STRIDE 0x3 /** @@ -1206,17 +1212,16 @@ static int i40e_tso(struct i40e_ring *tx_ring, struct sk_buff *skb, if (err < 0) return err; - if (protocol == htons(ETH_P_IP)) { - iph = skb->encapsulation ? inner_ip_hdr(skb) : ip_hdr(skb); + iph = skb->encapsulation ? inner_ip_hdr(skb) : ip_hdr(skb); + ipv6h = skb->encapsulation ? inner_ipv6_hdr(skb) : ipv6_hdr(skb); + + if (iph->version == 4) { tcph = skb->encapsulation ? inner_tcp_hdr(skb) : tcp_hdr(skb); iph->tot_len = 0; iph->check = 0; tcph->check = ~csum_tcpudp_magic(iph->saddr, iph->daddr, 0, IPPROTO_TCP, 0); - } else if (skb_is_gso_v6(skb)) { - - ipv6h = skb->encapsulation ? inner_ipv6_hdr(skb) - : ipv6_hdr(skb); + } else if (ipv6h->version == 6) { tcph = skb->encapsulation ? inner_tcp_hdr(skb) : tcp_hdr(skb); ipv6h->payload_len = 0; tcph->check = ~csum_ipv6_magic(&ipv6h->saddr, &ipv6h->daddr, @@ -1274,13 +1279,9 @@ static void i40e_tx_enable_csum(struct sk_buff *skb, u32 tx_flags, I40E_TX_CTX_EXT_IP_IPV4_NO_CSUM; } } else if (tx_flags & I40E_TX_FLAGS_IPV6) { - if (tx_flags & I40E_TX_FLAGS_TSO) { - *cd_tunneling |= I40E_TX_CTX_EXT_IP_IPV6; + *cd_tunneling |= I40E_TX_CTX_EXT_IP_IPV6; + if (tx_flags & I40E_TX_FLAGS_TSO) ip_hdr(skb)->check = 0; - } else { - *cd_tunneling |= - I40E_TX_CTX_EXT_IP_IPV4_NO_CSUM; - } } /* Now set the ctx descriptor fields */ @@ -1290,6 +1291,11 @@ static void i40e_tx_enable_csum(struct sk_buff *skb, u32 tx_flags, ((skb_inner_network_offset(skb) - skb_transport_offset(skb)) >> 1) << I40E_TXD_CTX_QW0_NATLEN_SHIFT; + if (this_ip_hdr->version == 6) { + tx_flags &= ~I40E_TX_FLAGS_IPV4; + tx_flags |= I40E_TX_FLAGS_IPV6; + } + } else { network_hdr_len = skb_network_header_len(skb); @@ -1380,6 +1386,67 @@ static void i40e_create_tx_ctx(struct i40e_ring *tx_ring, context_desc->type_cmd_tso_mss = cpu_to_le64(cd_type_cmd_tso_mss); } + /** + * i40e_chk_linearize - Check if there are more than 8 fragments per packet + * @skb: send buffer + * @tx_flags: collected send information + * @hdr_len: size of the packet header + * + * Note: Our HW can't scatter-gather more than 8 fragments to build + * a packet on the wire and so we need to figure out the cases where we + * need to linearize the skb. + **/ +static bool i40e_chk_linearize(struct sk_buff *skb, u32 tx_flags, + const u8 hdr_len) +{ + struct skb_frag_struct *frag; + bool linearize = false; + unsigned int size = 0; + u16 num_frags; + u16 gso_segs; + + num_frags = skb_shinfo(skb)->nr_frags; + gso_segs = skb_shinfo(skb)->gso_segs; + + if (tx_flags & (I40E_TX_FLAGS_TSO | I40E_TX_FLAGS_FSO)) { + u16 j = 1; + + if (num_frags < (I40E_MAX_BUFFER_TXD)) + goto linearize_chk_done; + /* try the simple math, if we have too many frags per segment */ + if (DIV_ROUND_UP((num_frags + gso_segs), gso_segs) > + I40E_MAX_BUFFER_TXD) { + linearize = true; + goto linearize_chk_done; + } + frag = &skb_shinfo(skb)->frags[0]; + size = hdr_len; + /* we might still have more fragments per segment */ + do { + size += skb_frag_size(frag); + frag++; j++; + if (j == I40E_MAX_BUFFER_TXD) { + if (size < skb_shinfo(skb)->gso_size) { + linearize = true; + break; + } + j = 1; + size -= skb_shinfo(skb)->gso_size; + if (size) + j++; + size += hdr_len; + } + num_frags--; + } while (num_frags); + } else { + if (num_frags >= I40E_MAX_BUFFER_TXD) + linearize = true; + } + +linearize_chk_done: + return linearize; +} + /** * i40e_tx_map - Build the Tx descriptor * @tx_ring: ring to send buffer on @@ -1654,6 +1721,10 @@ static netdev_tx_t i40e_xmit_frame_ring(struct sk_buff *skb, else if (tso) tx_flags |= I40E_TX_FLAGS_TSO; + if (i40e_chk_linearize(skb, tx_flags, hdr_len)) + if (skb_linearize(skb)) + goto out_drop; + skb_tx_timestamp(skb); /* always enable CRC insertion offload */ diff --git a/drivers/net/ethernet/intel/i40evf/i40e_txrx.h b/drivers/net/ethernet/intel/i40evf/i40e_txrx.h index 4e15903b2b6d..c950a038237c 100644 --- a/drivers/net/ethernet/intel/i40evf/i40e_txrx.h +++ b/drivers/net/ethernet/intel/i40evf/i40e_txrx.h @@ -112,6 +112,7 @@ enum i40e_dyn_idx_t { #define i40e_rx_desc i40e_32byte_rx_desc +#define I40E_MAX_BUFFER_TXD 8 #define I40E_MIN_TX_LEN 17 #define I40E_MAX_DATA_PER_TXD 8192 diff --git a/drivers/net/ethernet/marvell/mvneta.c b/drivers/net/ethernet/marvell/mvneta.c index 96208f17bb53..2db653225a0e 100644 --- a/drivers/net/ethernet/marvell/mvneta.c +++ b/drivers/net/ethernet/marvell/mvneta.c @@ -2658,16 +2658,11 @@ static int mvneta_stop(struct net_device *dev) static int mvneta_ioctl(struct net_device *dev, struct ifreq *ifr, int cmd) { struct mvneta_port *pp = netdev_priv(dev); - int ret; if (!pp->phy_dev) return -ENOTSUPP; - ret = phy_mii_ioctl(pp->phy_dev, ifr, cmd); - if (!ret) - mvneta_adjust_link(dev); - - return ret; + return phy_mii_ioctl(pp->phy_dev, ifr, cmd); } /* Ethtool methods */ diff --git a/drivers/net/ethernet/mellanox/mlx4/cmd.c b/drivers/net/ethernet/mellanox/mlx4/cmd.c index a681d7c0bb9f..546ca4226916 100644 --- a/drivers/net/ethernet/mellanox/mlx4/cmd.c +++ b/drivers/net/ethernet/mellanox/mlx4/cmd.c @@ -724,7 +724,8 @@ static int mlx4_cmd_wait(struct mlx4_dev *dev, u64 in_param, u64 *out_param, * on the host, we deprecate the error message for this * specific command/input_mod/opcode_mod/fw-status to be debug. */ - if (op == MLX4_CMD_SET_PORT && in_modifier == 1 && + if (op == MLX4_CMD_SET_PORT && + (in_modifier == 1 || in_modifier == 2) && op_modifier == 0 && context->fw_status == CMD_STAT_BAD_SIZE) mlx4_dbg(dev, "command 0x%x failed: fw status = 0x%x\n", op, context->fw_status); @@ -1993,7 +1994,6 @@ static void mlx4_master_do_cmd(struct mlx4_dev *dev, int slave, u8 cmd, goto reset_slave; slave_state[slave].vhcr_dma = ((u64) param) << 48; priv->mfunc.master.slave_state[slave].cookie = 0; - mutex_init(&priv->mfunc.master.gen_eqe_mutex[slave]); break; case MLX4_COMM_CMD_VHCR1: if (slave_state[slave].last_cmd != MLX4_COMM_CMD_VHCR0) @@ -2225,6 +2225,7 @@ int mlx4_multi_func_init(struct mlx4_dev *dev) for (i = 0; i < dev->num_slaves; ++i) { s_state = &priv->mfunc.master.slave_state[i]; s_state->last_cmd = MLX4_COMM_CMD_RESET; + mutex_init(&priv->mfunc.master.gen_eqe_mutex[i]); for (j = 0; j < MLX4_EVENT_TYPES_NUM; ++j) s_state->event_eq[j].eqn = -1; __raw_writel((__force u32) 0, diff --git a/drivers/net/ethernet/mellanox/mlx4/en_netdev.c b/drivers/net/ethernet/mellanox/mlx4/en_netdev.c index 2a210c4efb89..3485acf03014 100644 --- a/drivers/net/ethernet/mellanox/mlx4/en_netdev.c +++ b/drivers/net/ethernet/mellanox/mlx4/en_netdev.c @@ -1698,8 +1698,6 @@ int mlx4_en_start_port(struct net_device *dev) /* Schedule multicast task to populate multicast list */ queue_work(mdev->workqueue, &priv->rx_mode_task); - mlx4_set_stats_bitmap(mdev->dev, &priv->stats_bitmap); - #ifdef CONFIG_MLX4_EN_VXLAN if (priv->mdev->dev->caps.tunnel_offload_mode == MLX4_TUNNEL_OFFLOAD_MODE_VXLAN) vxlan_get_rx_port(dev); @@ -2807,13 +2805,6 @@ int mlx4_en_init_netdev(struct mlx4_en_dev *mdev, int port, netif_carrier_off(dev); mlx4_en_set_default_moderation(priv); - err = register_netdev(dev); - if (err) { - en_err(priv, "Netdev registration failed for port %d\n", port); - goto out; - } - priv->registered = 1; - en_warn(priv, "Using %d TX rings\n", prof->tx_ring_num); en_warn(priv, "Using %d RX rings\n", prof->rx_ring_num); @@ -2853,6 +2844,16 @@ int mlx4_en_init_netdev(struct mlx4_en_dev *mdev, int port, queue_delayed_work(mdev->workqueue, &priv->service_task, SERVICE_TASK_DELAY); + mlx4_set_stats_bitmap(mdev->dev, &priv->stats_bitmap); + + err = register_netdev(dev); + if (err) { + en_err(priv, "Netdev registration failed for port %d\n", port); + goto out; + } + + priv->registered = 1; + return 0; out: diff --git a/drivers/net/ethernet/mellanox/mlx4/en_selftest.c b/drivers/net/ethernet/mellanox/mlx4/en_selftest.c index 2d8ee66138e8..a61009f4b2df 100644 --- a/drivers/net/ethernet/mellanox/mlx4/en_selftest.c +++ b/drivers/net/ethernet/mellanox/mlx4/en_selftest.c @@ -81,12 +81,14 @@ static int mlx4_en_test_loopback(struct mlx4_en_priv *priv) { u32 loopback_ok = 0; int i; - + bool gro_enabled; priv->loopback_ok = 0; priv->validate_loopback = 1; + gro_enabled = priv->dev->features & NETIF_F_GRO; mlx4_en_update_loopback_state(priv->dev, priv->dev->features); + priv->dev->features &= ~NETIF_F_GRO; /* xmit */ if (mlx4_en_test_loopback_xmit(priv)) { @@ -108,6 +110,10 @@ static int mlx4_en_test_loopback(struct mlx4_en_priv *priv) mlx4_en_test_loopback_exit: priv->validate_loopback = 0; + + if (gro_enabled) + priv->dev->features |= NETIF_F_GRO; + mlx4_en_update_loopback_state(priv->dev, priv->dev->features); return !loopback_ok; } diff --git a/drivers/net/ethernet/mellanox/mlx4/eq.c b/drivers/net/ethernet/mellanox/mlx4/eq.c index 264bc15c1ff2..6e70ffee8e87 100644 --- a/drivers/net/ethernet/mellanox/mlx4/eq.c +++ b/drivers/net/ethernet/mellanox/mlx4/eq.c @@ -153,12 +153,10 @@ void mlx4_gen_slave_eqe(struct work_struct *work) /* All active slaves need to receive the event */ if (slave == ALL_SLAVES) { - for (i = 0; i < dev->num_slaves; i++) { - if (i != dev->caps.function && - master->slave_state[i].active) - if (mlx4_GEN_EQE(dev, i, eqe)) - mlx4_warn(dev, "Failed to generate event for slave %d\n", - i); + for (i = 0; i <= dev->persist->num_vfs; i++) { + if (mlx4_GEN_EQE(dev, i, eqe)) + mlx4_warn(dev, "Failed to generate event for slave %d\n", + i); } } else { if (mlx4_GEN_EQE(dev, slave, eqe)) @@ -203,13 +201,11 @@ static void mlx4_slave_event(struct mlx4_dev *dev, int slave, struct mlx4_eqe *eqe) { struct mlx4_priv *priv = mlx4_priv(dev); - struct mlx4_slave_state *s_slave = - &priv->mfunc.master.slave_state[slave]; - if (!s_slave->active) { - /*mlx4_warn(dev, "Trying to pass event to inactive slave\n");*/ + if (slave < 0 || slave > dev->persist->num_vfs || + slave == dev->caps.function || + !priv->mfunc.master.slave_state[slave].active) return; - } slave_event(dev, slave, eqe); } diff --git a/drivers/net/ethernet/mellanox/mlx4/mlx4_en.h b/drivers/net/ethernet/mellanox/mlx4/mlx4_en.h index 2a8268e6be15..ebbe244e80dd 100644 --- a/drivers/net/ethernet/mellanox/mlx4/mlx4_en.h +++ b/drivers/net/ethernet/mellanox/mlx4/mlx4_en.h @@ -453,7 +453,7 @@ struct mlx4_en_port_stats { unsigned long rx_chksum_none; unsigned long rx_chksum_complete; unsigned long tx_chksum_offload; -#define NUM_PORT_STATS 9 +#define NUM_PORT_STATS 10 }; struct mlx4_en_perf_stats { diff --git a/drivers/net/ethernet/mellanox/mlx4/qp.c b/drivers/net/ethernet/mellanox/mlx4/qp.c index 2bb8553bd905..eda29dbbfcd2 100644 --- a/drivers/net/ethernet/mellanox/mlx4/qp.c +++ b/drivers/net/ethernet/mellanox/mlx4/qp.c @@ -412,7 +412,6 @@ err_icm: EXPORT_SYMBOL_GPL(mlx4_qp_alloc); -#define MLX4_UPDATE_QP_SUPPORTED_ATTRS MLX4_UPDATE_QP_SMAC int mlx4_update_qp(struct mlx4_dev *dev, u32 qpn, enum mlx4_update_qp_attr attr, struct mlx4_update_qp_params *params) diff --git a/drivers/net/ethernet/mellanox/mlx4/resource_tracker.c b/drivers/net/ethernet/mellanox/mlx4/resource_tracker.c index 486e3d26cd4a..6e413ac4e940 100644 --- a/drivers/net/ethernet/mellanox/mlx4/resource_tracker.c +++ b/drivers/net/ethernet/mellanox/mlx4/resource_tracker.c @@ -713,7 +713,7 @@ static int update_vport_qp_param(struct mlx4_dev *dev, struct mlx4_vport_oper_state *vp_oper; struct mlx4_priv *priv; u32 qp_type; - int port; + int port, err = 0; port = (qpc->pri_path.sched_queue & 0x40) ? 2 : 1; priv = mlx4_priv(dev); @@ -738,7 +738,9 @@ static int update_vport_qp_param(struct mlx4_dev *dev, } else { struct mlx4_update_qp_params params = {.flags = 0}; - mlx4_update_qp(dev, qpn, MLX4_UPDATE_QP_VSD, ¶ms); + err = mlx4_update_qp(dev, qpn, MLX4_UPDATE_QP_VSD, ¶ms); + if (err) + goto out; } } @@ -773,7 +775,8 @@ static int update_vport_qp_param(struct mlx4_dev *dev, qpc->pri_path.feup |= MLX4_FSM_FORCE_ETH_SRC_MAC; qpc->pri_path.grh_mylmc = (0x80 & qpc->pri_path.grh_mylmc) + vp_oper->mac_idx; } - return 0; +out: + return err; } static int mpt_mask(struct mlx4_dev *dev) @@ -3092,6 +3095,12 @@ int mlx4_GEN_EQE(struct mlx4_dev *dev, int slave, struct mlx4_eqe *eqe) if (!priv->mfunc.master.slave_state) return -EINVAL; + /* check for slave valid, slave not PF, and slave active */ + if (slave < 0 || slave > dev->persist->num_vfs || + slave == dev->caps.function || + !priv->mfunc.master.slave_state[slave].active) + return 0; + event_eq = &priv->mfunc.master.slave_state[slave].event_eq[eqe->type]; /* Create the event only if the slave is registered */ diff --git a/drivers/net/ethernet/pasemi/pasemi_mac.c b/drivers/net/ethernet/pasemi/pasemi_mac.c index 44e8d7d25547..57a6e6cd74fc 100644 --- a/drivers/net/ethernet/pasemi/pasemi_mac.c +++ b/drivers/net/ethernet/pasemi/pasemi_mac.c @@ -1239,11 +1239,9 @@ static int pasemi_mac_open(struct net_device *dev) if (mac->phydev) phy_start(mac->phydev); - init_timer(&mac->tx->clean_timer); - mac->tx->clean_timer.function = pasemi_mac_tx_timer; - mac->tx->clean_timer.data = (unsigned long)mac->tx; - mac->tx->clean_timer.expires = jiffies+HZ; - add_timer(&mac->tx->clean_timer); + setup_timer(&mac->tx->clean_timer, pasemi_mac_tx_timer, + (unsigned long)mac->tx); + mod_timer(&mac->tx->clean_timer, jiffies + HZ); return 0; diff --git a/drivers/net/ethernet/qlogic/netxen/netxen_nic.h b/drivers/net/ethernet/qlogic/netxen/netxen_nic.h index 6e426ae94692..0a5e204a0179 100644 --- a/drivers/net/ethernet/qlogic/netxen/netxen_nic.h +++ b/drivers/net/ethernet/qlogic/netxen/netxen_nic.h @@ -354,7 +354,7 @@ struct cmd_desc_type0 { } __attribute__ ((aligned(64))); -/* Note: sizeof(rcv_desc) should always be a mutliple of 2 */ +/* Note: sizeof(rcv_desc) should always be a multiple of 2 */ struct rcv_desc { __le16 reference_handle; __le16 reserved; @@ -499,7 +499,7 @@ struct uni_data_desc{ #define NETXEN_IMAGE_START 0x43000 /* compressed image */ #define NETXEN_SECONDARY_START 0x200000 /* backup images */ #define NETXEN_PXE_START 0x3E0000 /* PXE boot rom */ -#define NETXEN_USER_START 0x3E8000 /* Firmare info */ +#define NETXEN_USER_START 0x3E8000 /* Firmware info */ #define NETXEN_FIXED_START 0x3F0000 /* backup of crbinit */ #define NETXEN_USER_START_OLD NETXEN_PXE_START /* very old flash */ diff --git a/drivers/net/ethernet/qlogic/qlcnic/qlcnic.h b/drivers/net/ethernet/qlogic/qlcnic/qlcnic.h index fa4317611fd6..f221126a5c4e 100644 --- a/drivers/net/ethernet/qlogic/qlcnic/qlcnic.h +++ b/drivers/net/ethernet/qlogic/qlcnic/qlcnic.h @@ -314,7 +314,7 @@ struct qlcnic_fdt { #define QLCNIC_BRDCFG_START 0x4000 /* board config */ #define QLCNIC_BOOTLD_START 0x10000 /* bootld */ #define QLCNIC_IMAGE_START 0x43000 /* compressed image */ -#define QLCNIC_USER_START 0x3E8000 /* Firmare info */ +#define QLCNIC_USER_START 0x3E8000 /* Firmware info */ #define QLCNIC_FW_VERSION_OFFSET (QLCNIC_USER_START+0x408) #define QLCNIC_FW_SIZE_OFFSET (QLCNIC_USER_START+0x40c) diff --git a/drivers/net/ethernet/realtek/r8169.c b/drivers/net/ethernet/realtek/r8169.c index ad0020af2193..c70ab40d8698 100644 --- a/drivers/net/ethernet/realtek/r8169.c +++ b/drivers/net/ethernet/realtek/r8169.c @@ -2561,7 +2561,7 @@ static int rtl_check_firmware(struct rtl8169_private *tp, struct rtl_fw *rtl_fw) int rc = -EINVAL; if (!rtl_fw_format_ok(tp, rtl_fw)) { - netif_err(tp, ifup, dev, "invalid firwmare\n"); + netif_err(tp, ifup, dev, "invalid firmware\n"); goto out; } @@ -5067,8 +5067,6 @@ static void rtl_hw_reset(struct rtl8169_private *tp) RTL_W8(ChipCmd, CmdReset); rtl_udelay_loop_wait_low(tp, &rtl_chipcmd_cond, 100, 100); - - netdev_reset_queue(tp->dev); } static void rtl_request_uncached_firmware(struct rtl8169_private *tp) @@ -7049,7 +7047,6 @@ static netdev_tx_t rtl8169_start_xmit(struct sk_buff *skb, u32 status, len; u32 opts[2]; int frags; - bool stop_queue; if (unlikely(!TX_FRAGS_READY_FOR(tp, skb_shinfo(skb)->nr_frags))) { netif_err(tp, drv, dev, "BUG! Tx Ring full when queue awake!\n"); @@ -7090,8 +7087,6 @@ static netdev_tx_t rtl8169_start_xmit(struct sk_buff *skb, txd->opts2 = cpu_to_le32(opts[1]); - netdev_sent_queue(dev, skb->len); - skb_tx_timestamp(skb); /* Force memory writes to complete before releasing descriptor */ @@ -7106,16 +7101,11 @@ static netdev_tx_t rtl8169_start_xmit(struct sk_buff *skb, tp->cur_tx += frags + 1; - stop_queue = !TX_FRAGS_READY_FOR(tp, MAX_SKB_FRAGS); + RTL_W8(TxPoll, NPQ); - if (!skb->xmit_more || stop_queue || - netif_xmit_stopped(netdev_get_tx_queue(dev, 0))) { - RTL_W8(TxPoll, NPQ); - - mmiowb(); - } + mmiowb(); - if (stop_queue) { + if (!TX_FRAGS_READY_FOR(tp, MAX_SKB_FRAGS)) { /* Avoid wrongly optimistic queue wake-up: rtl_tx thread must * not miss a ring update when it notices a stopped queue. */ @@ -7198,7 +7188,6 @@ static void rtl8169_pcierr_interrupt(struct net_device *dev) static void rtl_tx(struct net_device *dev, struct rtl8169_private *tp) { unsigned int dirty_tx, tx_left; - unsigned int bytes_compl = 0, pkts_compl = 0; dirty_tx = tp->dirty_tx; smp_rmb(); @@ -7222,8 +7211,10 @@ static void rtl_tx(struct net_device *dev, struct rtl8169_private *tp) rtl8169_unmap_tx_skb(&tp->pci_dev->dev, tx_skb, tp->TxDescArray + entry); if (status & LastFrag) { - pkts_compl++; - bytes_compl += tx_skb->skb->len; + u64_stats_update_begin(&tp->tx_stats.syncp); + tp->tx_stats.packets++; + tp->tx_stats.bytes += tx_skb->skb->len; + u64_stats_update_end(&tp->tx_stats.syncp); dev_kfree_skb_any(tx_skb->skb); tx_skb->skb = NULL; } @@ -7232,13 +7223,6 @@ static void rtl_tx(struct net_device *dev, struct rtl8169_private *tp) } if (tp->dirty_tx != dirty_tx) { - netdev_completed_queue(tp->dev, pkts_compl, bytes_compl); - - u64_stats_update_begin(&tp->tx_stats.syncp); - tp->tx_stats.packets += pkts_compl; - tp->tx_stats.bytes += bytes_compl; - u64_stats_update_end(&tp->tx_stats.syncp); - tp->dirty_tx = dirty_tx; /* Sync with rtl8169_start_xmit: * - publish dirty_tx ring index (write barrier) diff --git a/drivers/net/ethernet/renesas/sh_eth.c b/drivers/net/ethernet/renesas/sh_eth.c index 4da8bd263997..736d5d1624a1 100644 --- a/drivers/net/ethernet/renesas/sh_eth.c +++ b/drivers/net/ethernet/renesas/sh_eth.c @@ -508,7 +508,6 @@ static struct sh_eth_cpu_data r8a779x_data = { .tpauser = 1, .hw_swap = 1, .rmiimode = 1, - .shift_rd0 = 1, }; static void sh_eth_set_rate_sh7724(struct net_device *ndev) @@ -1392,6 +1391,9 @@ static void sh_eth_dev_exit(struct net_device *ndev) msleep(2); /* max frame time at 10 Mbps < 1250 us */ sh_eth_get_stats(ndev); sh_eth_reset(ndev); + + /* Set MAC address again */ + update_mac_address(ndev); } /* free Tx skb function */ @@ -1407,6 +1409,8 @@ static int sh_eth_txfree(struct net_device *ndev) txdesc = &mdp->tx_ring[entry]; if (txdesc->status & cpu_to_edmac(mdp, TD_TACT)) break; + /* TACT bit must be checked before all the following reads */ + rmb(); /* Free the original skb. */ if (mdp->tx_skbuff[entry]) { dma_unmap_single(&ndev->dev, txdesc->addr, @@ -1444,6 +1448,8 @@ static int sh_eth_rx(struct net_device *ndev, u32 intr_status, int *quota) limit = boguscnt; rxdesc = &mdp->rx_ring[entry]; while (!(rxdesc->status & cpu_to_edmac(mdp, RD_RACT))) { + /* RACT bit must be checked before all the following reads */ + rmb(); desc_status = edmac_to_cpu(mdp, rxdesc->status); pkt_len = rxdesc->frame_length; @@ -1455,8 +1461,8 @@ static int sh_eth_rx(struct net_device *ndev, u32 intr_status, int *quota) /* In case of almost all GETHER/ETHERs, the Receive Frame State * (RFS) bits in the Receive Descriptor 0 are from bit 9 to - * bit 0. However, in case of the R8A7740, R8A779x, and - * R7S72100 the RFS bits are from bit 25 to bit 16. So, the + * bit 0. However, in case of the R8A7740 and R7S72100 + * the RFS bits are from bit 25 to bit 16. So, the * driver needs right shifting by 16. */ if (mdp->cd->shift_rd0) @@ -1523,6 +1529,7 @@ static int sh_eth_rx(struct net_device *ndev, u32 intr_status, int *quota) skb_checksum_none_assert(skb); rxdesc->addr = dma_addr; } + wmb(); /* RACT bit must be set after all the above writes */ if (entry >= mdp->num_rx_ring - 1) rxdesc->status |= cpu_to_edmac(mdp, RD_RACT | RD_RFP | RD_RDEL); @@ -1535,7 +1542,7 @@ static int sh_eth_rx(struct net_device *ndev, u32 intr_status, int *quota) /* If we don't need to check status, don't. -KDU */ if (!(sh_eth_read(ndev, EDRRR) & EDRRR_R)) { /* fix the values for the next receiving if RDE is set */ - if (intr_status & EESR_RDE) { + if (intr_status & EESR_RDE && mdp->reg_offset[RDFAR] != 0) { u32 count = (sh_eth_read(ndev, RDFAR) - sh_eth_read(ndev, RDLAR)) >> 4; @@ -2174,7 +2181,7 @@ static int sh_eth_start_xmit(struct sk_buff *skb, struct net_device *ndev) } spin_unlock_irqrestore(&mdp->lock, flags); - if (skb_padto(skb, ETH_ZLEN)) + if (skb_put_padto(skb, ETH_ZLEN)) return NETDEV_TX_OK; entry = mdp->cur_tx % mdp->num_tx_ring; @@ -2192,6 +2199,7 @@ static int sh_eth_start_xmit(struct sk_buff *skb, struct net_device *ndev) } txdesc->buffer_length = skb->len; + wmb(); /* TACT bit must be set after all the above writes */ if (entry >= mdp->num_tx_ring - 1) txdesc->status |= cpu_to_edmac(mdp, TD_TACT | TD_TDLE); else diff --git a/drivers/net/ethernet/rocker/rocker.c b/drivers/net/ethernet/rocker/rocker.c index 34389b6aa67c..5cecec282aba 100644 --- a/drivers/net/ethernet/rocker/rocker.c +++ b/drivers/net/ethernet/rocker/rocker.c @@ -1257,9 +1257,9 @@ static void rocker_port_set_enable(struct rocker_port *rocker_port, bool enable) u64 val = rocker_read64(rocker_port->rocker, PORT_PHYS_ENABLE); if (enable) - val |= 1 << rocker_port->lport; + val |= 1ULL << rocker_port->lport; else - val &= ~(1 << rocker_port->lport); + val &= ~(1ULL << rocker_port->lport); rocker_write64(rocker_port->rocker, PORT_PHYS_ENABLE, val); } @@ -4201,6 +4201,8 @@ static int rocker_probe_ports(struct rocker *rocker) alloc_size = sizeof(struct rocker_port *) * rocker->port_count; rocker->ports = kmalloc(alloc_size, GFP_KERNEL); + if (!rocker->ports) + return -ENOMEM; for (i = 0; i < rocker->port_count; i++) { err = rocker_probe_port(rocker, i); if (err) @@ -4466,10 +4468,16 @@ static int rocker_port_master_changed(struct net_device *dev) struct net_device *master = netdev_master_upper_dev_get(dev); int err = 0; + /* There are currently three cases handled here: + * 1. Joining a bridge + * 2. Leaving a previously joined bridge + * 3. Other, e.g. being added to or removed from a bond or openvswitch, + * in which case nothing is done + */ if (master && master->rtnl_link_ops && !strcmp(master->rtnl_link_ops->kind, "bridge")) err = rocker_port_bridge_join(rocker_port, master); - else + else if (rocker_port_is_bridged(rocker_port)) err = rocker_port_bridge_leave(rocker_port); return err; diff --git a/drivers/net/ethernet/smsc/smc91c92_cs.c b/drivers/net/ethernet/smsc/smc91c92_cs.c index 6b33127ab352..3449893aea8d 100644 --- a/drivers/net/ethernet/smsc/smc91c92_cs.c +++ b/drivers/net/ethernet/smsc/smc91c92_cs.c @@ -1070,11 +1070,8 @@ static int smc_open(struct net_device *dev) smc->packets_waiting = 0; smc_reset(dev); - init_timer(&smc->media); - smc->media.function = media_check; - smc->media.data = (u_long) dev; - smc->media.expires = jiffies + HZ; - add_timer(&smc->media); + setup_timer(&smc->media, media_check, (u_long)dev); + mod_timer(&smc->media, jiffies + HZ); return 0; } /* smc_open */ diff --git a/drivers/net/ethernet/smsc/smc91x.c b/drivers/net/ethernet/smsc/smc91x.c index 88a55f95fe09..8678e39aba08 100644 --- a/drivers/net/ethernet/smsc/smc91x.c +++ b/drivers/net/ethernet/smsc/smc91x.c @@ -91,6 +91,11 @@ static const char version[] = #include "smc91x.h" +#if defined(CONFIG_ASSABET_NEPONSET) +#include <mach/assabet.h> +#include <mach/neponset.h> +#endif + #ifndef SMC_NOWAIT # define SMC_NOWAIT 0 #endif @@ -2243,10 +2248,9 @@ static int smc_drv_probe(struct platform_device *pdev) const struct of_device_id *match = NULL; struct smc_local *lp; struct net_device *ndev; - struct resource *res; + struct resource *res, *ires; unsigned int __iomem *addr; unsigned long irq_flags = SMC_IRQ_FLAGS; - unsigned long irq_resflags; int ret; ndev = alloc_etherdev(sizeof(struct smc_local)); @@ -2338,25 +2342,23 @@ static int smc_drv_probe(struct platform_device *pdev) goto out_free_netdev; } - ndev->irq = platform_get_irq(pdev, 0); - if (ndev->irq <= 0) { + ires = platform_get_resource(pdev, IORESOURCE_IRQ, 0); + if (!ires) { ret = -ENODEV; goto out_release_io; } - /* - * If this platform does not specify any special irqflags, or if - * the resource supplies a trigger, override the irqflags with - * the trigger flags from the resource. - */ - irq_resflags = irqd_get_trigger_type(irq_get_irq_data(ndev->irq)); - if (irq_flags == -1 || irq_resflags & IRQF_TRIGGER_MASK) - irq_flags = irq_resflags & IRQF_TRIGGER_MASK; + + ndev->irq = ires->start; + + if (irq_flags == -1 || ires->flags & IRQF_TRIGGER_MASK) + irq_flags = ires->flags & IRQF_TRIGGER_MASK; ret = smc_request_attrib(pdev, ndev); if (ret) goto out_release_io; -#if defined(CONFIG_SA1100_ASSABET) - neponset_ncr_set(NCR_ENET_OSC_EN); +#if defined(CONFIG_ASSABET_NEPONSET) + if (machine_is_assabet() && machine_has_neponset()) + neponset_ncr_set(NCR_ENET_OSC_EN); #endif platform_set_drvdata(pdev, ndev); ret = smc_enable_device(pdev); diff --git a/drivers/net/ethernet/smsc/smc91x.h b/drivers/net/ethernet/smsc/smc91x.h index be67baf5f677..3a18501d1068 100644 --- a/drivers/net/ethernet/smsc/smc91x.h +++ b/drivers/net/ethernet/smsc/smc91x.h @@ -39,14 +39,7 @@ * Define your architecture specific bus configuration parameters here. */ -#if defined(CONFIG_ARCH_LUBBOCK) ||\ - defined(CONFIG_MACH_MAINSTONE) ||\ - defined(CONFIG_MACH_ZYLONITE) ||\ - defined(CONFIG_MACH_LITTLETON) ||\ - defined(CONFIG_MACH_ZYLONITE2) ||\ - defined(CONFIG_ARCH_VIPER) ||\ - defined(CONFIG_MACH_STARGATE2) ||\ - defined(CONFIG_ARCH_VERSATILE) +#if defined(CONFIG_ARM) #include <asm/mach-types.h> @@ -74,95 +67,8 @@ /* We actually can't write halfwords properly if not word aligned */ static inline void SMC_outw(u16 val, void __iomem *ioaddr, int reg) { - if ((machine_is_mainstone() || machine_is_stargate2()) && reg & 2) { - unsigned int v = val << 16; - v |= readl(ioaddr + (reg & ~2)) & 0xffff; - writel(v, ioaddr + (reg & ~2)); - } else { - writew(val, ioaddr + reg); - } -} - -#elif defined(CONFIG_SA1100_PLEB) -/* We can only do 16-bit reads and writes in the static memory space. */ -#define SMC_CAN_USE_8BIT 1 -#define SMC_CAN_USE_16BIT 1 -#define SMC_CAN_USE_32BIT 0 -#define SMC_IO_SHIFT 0 -#define SMC_NOWAIT 1 - -#define SMC_inb(a, r) readb((a) + (r)) -#define SMC_insb(a, r, p, l) readsb((a) + (r), p, (l)) -#define SMC_inw(a, r) readw((a) + (r)) -#define SMC_insw(a, r, p, l) readsw((a) + (r), p, l) -#define SMC_outb(v, a, r) writeb(v, (a) + (r)) -#define SMC_outsb(a, r, p, l) writesb((a) + (r), p, (l)) -#define SMC_outw(v, a, r) writew(v, (a) + (r)) -#define SMC_outsw(a, r, p, l) writesw((a) + (r), p, l) - -#define SMC_IRQ_FLAGS (-1) - -#elif defined(CONFIG_SA1100_ASSABET) - -#include <mach/neponset.h> - -/* We can only do 8-bit reads and writes in the static memory space. */ -#define SMC_CAN_USE_8BIT 1 -#define SMC_CAN_USE_16BIT 0 -#define SMC_CAN_USE_32BIT 0 -#define SMC_NOWAIT 1 - -/* The first two address lines aren't connected... */ -#define SMC_IO_SHIFT 2 - -#define SMC_inb(a, r) readb((a) + (r)) -#define SMC_outb(v, a, r) writeb(v, (a) + (r)) -#define SMC_insb(a, r, p, l) readsb((a) + (r), p, (l)) -#define SMC_outsb(a, r, p, l) writesb((a) + (r), p, (l)) -#define SMC_IRQ_FLAGS (-1) /* from resource */ - -#elif defined(CONFIG_MACH_LOGICPD_PXA270) || \ - defined(CONFIG_MACH_NOMADIK_8815NHK) - -#define SMC_CAN_USE_8BIT 0 -#define SMC_CAN_USE_16BIT 1 -#define SMC_CAN_USE_32BIT 0 -#define SMC_IO_SHIFT 0 -#define SMC_NOWAIT 1 - -#define SMC_inw(a, r) readw((a) + (r)) -#define SMC_outw(v, a, r) writew(v, (a) + (r)) -#define SMC_insw(a, r, p, l) readsw((a) + (r), p, l) -#define SMC_outsw(a, r, p, l) writesw((a) + (r), p, l) - -#elif defined(CONFIG_ARCH_INNOKOM) || \ - defined(CONFIG_ARCH_PXA_IDP) || \ - defined(CONFIG_ARCH_RAMSES) || \ - defined(CONFIG_ARCH_PCM027) - -#define SMC_CAN_USE_8BIT 1 -#define SMC_CAN_USE_16BIT 1 -#define SMC_CAN_USE_32BIT 1 -#define SMC_IO_SHIFT 0 -#define SMC_NOWAIT 1 -#define SMC_USE_PXA_DMA 1 - -#define SMC_inb(a, r) readb((a) + (r)) -#define SMC_inw(a, r) readw((a) + (r)) -#define SMC_inl(a, r) readl((a) + (r)) -#define SMC_outb(v, a, r) writeb(v, (a) + (r)) -#define SMC_outl(v, a, r) writel(v, (a) + (r)) -#define SMC_insl(a, r, p, l) readsl((a) + (r), p, l) -#define SMC_outsl(a, r, p, l) writesl((a) + (r), p, l) -#define SMC_insw(a, r, p, l) readsw((a) + (r), p, l) -#define SMC_outsw(a, r, p, l) writesw((a) + (r), p, l) -#define SMC_IRQ_FLAGS (-1) /* from resource */ - -/* We actually can't write halfwords properly if not word aligned */ -static inline void -SMC_outw(u16 val, void __iomem *ioaddr, int reg) -{ - if (reg & 2) { + if ((machine_is_mainstone() || machine_is_stargate2() || + machine_is_pxa_idp()) && reg & 2) { unsigned int v = val << 16; v |= readl(ioaddr + (reg & ~2)) & 0xffff; writel(v, ioaddr + (reg & ~2)); @@ -237,20 +143,6 @@ SMC_outw(u16 val, void __iomem *ioaddr, int reg) #define RPC_LSA_DEFAULT RPC_LED_100_10 #define RPC_LSB_DEFAULT RPC_LED_TX_RX -#elif defined(CONFIG_ARCH_MSM) - -#define SMC_CAN_USE_8BIT 0 -#define SMC_CAN_USE_16BIT 1 -#define SMC_CAN_USE_32BIT 0 -#define SMC_NOWAIT 1 - -#define SMC_inw(a, r) readw((a) + (r)) -#define SMC_outw(v, a, r) writew(v, (a) + (r)) -#define SMC_insw(a, r, p, l) readsw((a) + (r), p, l) -#define SMC_outsw(a, r, p, l) writesw((a) + (r), p, l) - -#define SMC_IRQ_FLAGS IRQF_TRIGGER_HIGH - #elif defined(CONFIG_COLDFIRE) #define SMC_CAN_USE_8BIT 0 diff --git a/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c b/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c index 55e89b3838f1..a0ea84fe6519 100644 --- a/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c +++ b/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c @@ -310,11 +310,11 @@ bool stmmac_eee_init(struct stmmac_priv *priv) spin_lock_irqsave(&priv->lock, flags); if (!priv->eee_active) { priv->eee_active = 1; - init_timer(&priv->eee_ctrl_timer); - priv->eee_ctrl_timer.function = stmmac_eee_ctrl_timer; - priv->eee_ctrl_timer.data = (unsigned long)priv; - priv->eee_ctrl_timer.expires = STMMAC_LPI_T(eee_timer); - add_timer(&priv->eee_ctrl_timer); + setup_timer(&priv->eee_ctrl_timer, + stmmac_eee_ctrl_timer, + (unsigned long)priv); + mod_timer(&priv->eee_ctrl_timer, + STMMAC_LPI_T(eee_timer)); priv->hw->mac->set_eee_timer(priv->hw, STMMAC_DEFAULT_LIT_LS, diff --git a/drivers/net/ethernet/stmicro/stmmac/stmmac_platform.c b/drivers/net/ethernet/stmicro/stmmac/stmmac_platform.c index fb846ebba1d9..f9b42f11950f 100644 --- a/drivers/net/ethernet/stmicro/stmmac/stmmac_platform.c +++ b/drivers/net/ethernet/stmicro/stmmac/stmmac_platform.c @@ -272,6 +272,37 @@ static int stmmac_pltfr_probe(struct platform_device *pdev) struct stmmac_priv *priv = NULL; struct plat_stmmacenet_data *plat_dat = NULL; const char *mac = NULL; + int irq, wol_irq, lpi_irq; + + /* Get IRQ information early to have an ability to ask for deferred + * probe if needed before we went too far with resource allocation. + */ + irq = platform_get_irq_byname(pdev, "macirq"); + if (irq < 0) { + if (irq != -EPROBE_DEFER) { + dev_err(dev, + "MAC IRQ configuration information not found\n"); + } + return irq; + } + + /* On some platforms e.g. SPEAr the wake up irq differs from the mac irq + * The external wake up irq can be passed through the platform code + * named as "eth_wake_irq" + * + * In case the wake up interrupt is not passed from the platform + * so the driver will continue to use the mac irq (ndev->irq) + */ + wol_irq = platform_get_irq_byname(pdev, "eth_wake_irq"); + if (wol_irq < 0) { + if (wol_irq == -EPROBE_DEFER) + return -EPROBE_DEFER; + wol_irq = irq; + } + + lpi_irq = platform_get_irq_byname(pdev, "eth_lpi"); + if (lpi_irq == -EPROBE_DEFER) + return -EPROBE_DEFER; res = platform_get_resource(pdev, IORESOURCE_MEM, 0); addr = devm_ioremap_resource(dev, res); @@ -323,39 +354,15 @@ static int stmmac_pltfr_probe(struct platform_device *pdev) return PTR_ERR(priv); } + /* Copy IRQ values to priv structure which is now avaialble */ + priv->dev->irq = irq; + priv->wol_irq = wol_irq; + priv->lpi_irq = lpi_irq; + /* Get MAC address if available (DT) */ if (mac) memcpy(priv->dev->dev_addr, mac, ETH_ALEN); - /* Get the MAC information */ - priv->dev->irq = platform_get_irq_byname(pdev, "macirq"); - if (priv->dev->irq < 0) { - if (priv->dev->irq != -EPROBE_DEFER) { - netdev_err(priv->dev, - "MAC IRQ configuration information not found\n"); - } - return priv->dev->irq; - } - - /* - * On some platforms e.g. SPEAr the wake up irq differs from the mac irq - * The external wake up irq can be passed through the platform code - * named as "eth_wake_irq" - * - * In case the wake up interrupt is not passed from the platform - * so the driver will continue to use the mac irq (ndev->irq) - */ - priv->wol_irq = platform_get_irq_byname(pdev, "eth_wake_irq"); - if (priv->wol_irq < 0) { - if (priv->wol_irq == -EPROBE_DEFER) - return -EPROBE_DEFER; - priv->wol_irq = priv->dev->irq; - } - - priv->lpi_irq = platform_get_irq_byname(pdev, "eth_lpi"); - if (priv->lpi_irq == -EPROBE_DEFER) - return -EPROBE_DEFER; - platform_set_drvdata(pdev, priv->dev); pr_debug("STMMAC platform driver registration completed"); diff --git a/drivers/net/ethernet/sun/niu.c b/drivers/net/ethernet/sun/niu.c index 4b51f903fb73..0c5842aeb807 100644 --- a/drivers/net/ethernet/sun/niu.c +++ b/drivers/net/ethernet/sun/niu.c @@ -6989,10 +6989,10 @@ static int niu_class_to_ethflow(u64 class, int *flow_type) *flow_type = IP_USER_FLOW; break; default: - return 0; + return -EINVAL; } - return 1; + return 0; } static int niu_ethflow_to_class(int flow_type, u64 *class) @@ -7198,11 +7198,9 @@ static int niu_get_ethtool_tcam_entry(struct niu *np, class = (tp->key[0] & TCAM_V4KEY0_CLASS_CODE) >> TCAM_V4KEY0_CLASS_CODE_SHIFT; ret = niu_class_to_ethflow(class, &fsp->flow_type); - if (ret < 0) { netdev_info(np->dev, "niu%d: niu_class_to_ethflow failed\n", parent->index); - ret = -EINVAL; goto out; } diff --git a/drivers/net/ethernet/ti/cpsw.c b/drivers/net/ethernet/ti/cpsw.c index 7d8dd0d2182e..a1bbaf6352ba 100644 --- a/drivers/net/ethernet/ti/cpsw.c +++ b/drivers/net/ethernet/ti/cpsw.c @@ -1103,7 +1103,7 @@ static inline void cpsw_add_dual_emac_def_ale_entries( cpsw_ale_add_mcast(priv->ale, priv->ndev->broadcast, port_mask, ALE_VLAN, slave->port_vlan, 0); cpsw_ale_add_ucast(priv->ale, priv->mac_addr, - priv->host_port, ALE_VLAN, slave->port_vlan); + priv->host_port, ALE_VLAN | ALE_SECURE, slave->port_vlan); } static void soft_reset_slave(struct cpsw_slave *slave) @@ -2466,6 +2466,7 @@ static int cpsw_remove(struct platform_device *pdev) return 0; } +#ifdef CONFIG_PM_SLEEP static int cpsw_suspend(struct device *dev) { struct platform_device *pdev = to_platform_device(dev); @@ -2518,11 +2519,9 @@ static int cpsw_resume(struct device *dev) } return 0; } +#endif -static const struct dev_pm_ops cpsw_pm_ops = { - .suspend = cpsw_suspend, - .resume = cpsw_resume, -}; +static SIMPLE_DEV_PM_OPS(cpsw_pm_ops, cpsw_suspend, cpsw_resume); static const struct of_device_id cpsw_of_mtable[] = { { .compatible = "ti,cpsw", }, diff --git a/drivers/net/ethernet/ti/davinci_mdio.c b/drivers/net/ethernet/ti/davinci_mdio.c index 98655b44b97e..c00084d689f3 100644 --- a/drivers/net/ethernet/ti/davinci_mdio.c +++ b/drivers/net/ethernet/ti/davinci_mdio.c @@ -423,6 +423,7 @@ static int davinci_mdio_remove(struct platform_device *pdev) return 0; } +#ifdef CONFIG_PM_SLEEP static int davinci_mdio_suspend(struct device *dev) { struct davinci_mdio_data *data = dev_get_drvdata(dev); @@ -464,10 +465,10 @@ static int davinci_mdio_resume(struct device *dev) return 0; } +#endif static const struct dev_pm_ops davinci_mdio_pm_ops = { - .suspend_late = davinci_mdio_suspend, - .resume_early = davinci_mdio_resume, + SET_LATE_SYSTEM_SLEEP_PM_OPS(davinci_mdio_suspend, davinci_mdio_resume) }; #if IS_ENABLED(CONFIG_OF) diff --git a/drivers/net/ethernet/wiznet/w5100.c b/drivers/net/ethernet/wiznet/w5100.c index a495931a66a1..0e0fbb5842b3 100644 --- a/drivers/net/ethernet/wiznet/w5100.c +++ b/drivers/net/ethernet/wiznet/w5100.c @@ -498,9 +498,9 @@ static int w5100_napi_poll(struct napi_struct *napi, int budget) } if (rx_count < budget) { + napi_complete(napi); w5100_write(priv, W5100_IMR, IR_S0); mmiowb(); - napi_complete(napi); } return rx_count; diff --git a/drivers/net/ethernet/wiznet/w5300.c b/drivers/net/ethernet/wiznet/w5300.c index 09322d9db578..4b310002258d 100644 --- a/drivers/net/ethernet/wiznet/w5300.c +++ b/drivers/net/ethernet/wiznet/w5300.c @@ -418,9 +418,9 @@ static int w5300_napi_poll(struct napi_struct *napi, int budget) } if (rx_count < budget) { + napi_complete(napi); w5300_write(priv, W5300_IMR, IR_S0); mmiowb(); - napi_complete(napi); } return rx_count; diff --git a/drivers/net/ethernet/xscale/ixp4xx_eth.c b/drivers/net/ethernet/xscale/ixp4xx_eth.c index f7e0f0f7c2e2..9e16a2819d48 100644 --- a/drivers/net/ethernet/xscale/ixp4xx_eth.c +++ b/drivers/net/ethernet/xscale/ixp4xx_eth.c @@ -938,7 +938,7 @@ static void eth_set_mcast_list(struct net_device *dev) int i; static const u8 allmulti[] = { 0x01, 0x00, 0x00, 0x00, 0x00, 0x00 }; - if (dev->flags & IFF_ALLMULTI) { + if ((dev->flags & IFF_ALLMULTI) && !(dev->flags & IFF_PROMISC)) { for (i = 0; i < ETH_ALEN; i++) { __raw_writel(allmulti[i], &port->regs->mcast_addr[i]); __raw_writel(allmulti[i], &port->regs->mcast_mask[i]); diff --git a/drivers/net/ipvlan/ipvlan.h b/drivers/net/ipvlan/ipvlan.h index 924ea98bd531..54549a6223dd 100644 --- a/drivers/net/ipvlan/ipvlan.h +++ b/drivers/net/ipvlan/ipvlan.h @@ -114,7 +114,9 @@ unsigned int ipvlan_mac_hash(const unsigned char *addr); rx_handler_result_t ipvlan_handle_frame(struct sk_buff **pskb); int ipvlan_queue_xmit(struct sk_buff *skb, struct net_device *dev); void ipvlan_ht_addr_add(struct ipvl_dev *ipvlan, struct ipvl_addr *addr); -bool ipvlan_addr_busy(struct ipvl_dev *ipvlan, void *iaddr, bool is_v6); +struct ipvl_addr *ipvlan_find_addr(const struct ipvl_dev *ipvlan, + const void *iaddr, bool is_v6); +bool ipvlan_addr_busy(struct ipvl_port *port, void *iaddr, bool is_v6); struct ipvl_addr *ipvlan_ht_addr_lookup(const struct ipvl_port *port, const void *iaddr, bool is_v6); void ipvlan_ht_addr_del(struct ipvl_addr *addr, bool sync); diff --git a/drivers/net/ipvlan/ipvlan_core.c b/drivers/net/ipvlan/ipvlan_core.c index 2a175006028b..b7877a194cfe 100644 --- a/drivers/net/ipvlan/ipvlan_core.c +++ b/drivers/net/ipvlan/ipvlan_core.c @@ -81,19 +81,20 @@ void ipvlan_ht_addr_add(struct ipvl_dev *ipvlan, struct ipvl_addr *addr) hash = (addr->atype == IPVL_IPV6) ? ipvlan_get_v6_hash(&addr->ip6addr) : ipvlan_get_v4_hash(&addr->ip4addr); - hlist_add_head_rcu(&addr->hlnode, &port->hlhead[hash]); + if (hlist_unhashed(&addr->hlnode)) + hlist_add_head_rcu(&addr->hlnode, &port->hlhead[hash]); } void ipvlan_ht_addr_del(struct ipvl_addr *addr, bool sync) { - hlist_del_rcu(&addr->hlnode); + hlist_del_init_rcu(&addr->hlnode); if (sync) synchronize_rcu(); } -bool ipvlan_addr_busy(struct ipvl_dev *ipvlan, void *iaddr, bool is_v6) +struct ipvl_addr *ipvlan_find_addr(const struct ipvl_dev *ipvlan, + const void *iaddr, bool is_v6) { - struct ipvl_port *port = ipvlan->port; struct ipvl_addr *addr; list_for_each_entry(addr, &ipvlan->addrs, anode) { @@ -101,12 +102,21 @@ bool ipvlan_addr_busy(struct ipvl_dev *ipvlan, void *iaddr, bool is_v6) ipv6_addr_equal(&addr->ip6addr, iaddr)) || (!is_v6 && addr->atype == IPVL_IPV4 && addr->ip4addr.s_addr == ((struct in_addr *)iaddr)->s_addr)) - return true; + return addr; } + return NULL; +} + +bool ipvlan_addr_busy(struct ipvl_port *port, void *iaddr, bool is_v6) +{ + struct ipvl_dev *ipvlan; - if (ipvlan_ht_addr_lookup(port, iaddr, is_v6)) - return true; + ASSERT_RTNL(); + list_for_each_entry(ipvlan, &port->ipvlans, pnode) { + if (ipvlan_find_addr(ipvlan, iaddr, is_v6)) + return true; + } return false; } @@ -192,7 +202,8 @@ static void ipvlan_multicast_frame(struct ipvl_port *port, struct sk_buff *skb, if (skb->protocol == htons(ETH_P_PAUSE)) return; - list_for_each_entry(ipvlan, &port->ipvlans, pnode) { + rcu_read_lock(); + list_for_each_entry_rcu(ipvlan, &port->ipvlans, pnode) { if (local && (ipvlan == in_dev)) continue; @@ -219,6 +230,7 @@ static void ipvlan_multicast_frame(struct ipvl_port *port, struct sk_buff *skb, mcast_acct: ipvlan_count_rx(ipvlan, len, ret == NET_RX_SUCCESS, true); } + rcu_read_unlock(); /* Locally generated? ...Forward a copy to the main-device as * well. On the RX side we'll ignore it (wont give it to any diff --git a/drivers/net/ipvlan/ipvlan_main.c b/drivers/net/ipvlan/ipvlan_main.c index 4f4099d5603d..4fa14208d799 100644 --- a/drivers/net/ipvlan/ipvlan_main.c +++ b/drivers/net/ipvlan/ipvlan_main.c @@ -505,7 +505,7 @@ static void ipvlan_link_delete(struct net_device *dev, struct list_head *head) if (ipvlan->ipv6cnt > 0 || ipvlan->ipv4cnt > 0) { list_for_each_entry_safe(addr, next, &ipvlan->addrs, anode) { ipvlan_ht_addr_del(addr, !dev->dismantle); - list_del_rcu(&addr->anode); + list_del(&addr->anode); } } list_del_rcu(&ipvlan->pnode); @@ -607,7 +607,7 @@ static int ipvlan_add_addr6(struct ipvl_dev *ipvlan, struct in6_addr *ip6_addr) { struct ipvl_addr *addr; - if (ipvlan_addr_busy(ipvlan, ip6_addr, true)) { + if (ipvlan_addr_busy(ipvlan->port, ip6_addr, true)) { netif_err(ipvlan, ifup, ipvlan->dev, "Failed to add IPv6=%pI6c addr for %s intf\n", ip6_addr, ipvlan->dev->name); @@ -620,9 +620,13 @@ static int ipvlan_add_addr6(struct ipvl_dev *ipvlan, struct in6_addr *ip6_addr) addr->master = ipvlan; memcpy(&addr->ip6addr, ip6_addr, sizeof(struct in6_addr)); addr->atype = IPVL_IPV6; - list_add_tail_rcu(&addr->anode, &ipvlan->addrs); + list_add_tail(&addr->anode, &ipvlan->addrs); ipvlan->ipv6cnt++; - ipvlan_ht_addr_add(ipvlan, addr); + /* If the interface is not up, the address will be added to the hash + * list by ipvlan_open. + */ + if (netif_running(ipvlan->dev)) + ipvlan_ht_addr_add(ipvlan, addr); return 0; } @@ -631,12 +635,12 @@ static void ipvlan_del_addr6(struct ipvl_dev *ipvlan, struct in6_addr *ip6_addr) { struct ipvl_addr *addr; - addr = ipvlan_ht_addr_lookup(ipvlan->port, ip6_addr, true); + addr = ipvlan_find_addr(ipvlan, ip6_addr, true); if (!addr) return; ipvlan_ht_addr_del(addr, true); - list_del_rcu(&addr->anode); + list_del(&addr->anode); ipvlan->ipv6cnt--; WARN_ON(ipvlan->ipv6cnt < 0); kfree_rcu(addr, rcu); @@ -675,7 +679,7 @@ static int ipvlan_add_addr4(struct ipvl_dev *ipvlan, struct in_addr *ip4_addr) { struct ipvl_addr *addr; - if (ipvlan_addr_busy(ipvlan, ip4_addr, false)) { + if (ipvlan_addr_busy(ipvlan->port, ip4_addr, false)) { netif_err(ipvlan, ifup, ipvlan->dev, "Failed to add IPv4=%pI4 on %s intf.\n", ip4_addr, ipvlan->dev->name); @@ -688,9 +692,13 @@ static int ipvlan_add_addr4(struct ipvl_dev *ipvlan, struct in_addr *ip4_addr) addr->master = ipvlan; memcpy(&addr->ip4addr, ip4_addr, sizeof(struct in_addr)); addr->atype = IPVL_IPV4; - list_add_tail_rcu(&addr->anode, &ipvlan->addrs); + list_add_tail(&addr->anode, &ipvlan->addrs); ipvlan->ipv4cnt++; - ipvlan_ht_addr_add(ipvlan, addr); + /* If the interface is not up, the address will be added to the hash + * list by ipvlan_open. + */ + if (netif_running(ipvlan->dev)) + ipvlan_ht_addr_add(ipvlan, addr); ipvlan_set_broadcast_mac_filter(ipvlan, true); return 0; @@ -700,12 +708,12 @@ static void ipvlan_del_addr4(struct ipvl_dev *ipvlan, struct in_addr *ip4_addr) { struct ipvl_addr *addr; - addr = ipvlan_ht_addr_lookup(ipvlan->port, ip4_addr, false); + addr = ipvlan_find_addr(ipvlan, ip4_addr, false); if (!addr) return; ipvlan_ht_addr_del(addr, true); - list_del_rcu(&addr->anode); + list_del(&addr->anode); ipvlan->ipv4cnt--; WARN_ON(ipvlan->ipv4cnt < 0); if (!ipvlan->ipv4cnt) diff --git a/drivers/net/macvtap.c b/drivers/net/macvtap.c index e40fdfccc9c1..27ecc5c4fa26 100644 --- a/drivers/net/macvtap.c +++ b/drivers/net/macvtap.c @@ -654,11 +654,14 @@ static void macvtap_skb_to_vnet_hdr(struct macvtap_queue *q, } /* else everything is zero */ } +/* Neighbour code has some assumptions on HH_DATA_MOD alignment */ +#define MACVTAP_RESERVE HH_DATA_OFF(ETH_HLEN) + /* Get packet from user space buffer */ static ssize_t macvtap_get_user(struct macvtap_queue *q, struct msghdr *m, struct iov_iter *from, int noblock) { - int good_linear = SKB_MAX_HEAD(NET_IP_ALIGN); + int good_linear = SKB_MAX_HEAD(MACVTAP_RESERVE); struct sk_buff *skb; struct macvlan_dev *vlan; unsigned long total_len = iov_iter_count(from); @@ -722,7 +725,7 @@ static ssize_t macvtap_get_user(struct macvtap_queue *q, struct msghdr *m, linear = macvtap16_to_cpu(q, vnet_hdr.hdr_len); } - skb = macvtap_alloc_skb(&q->sk, NET_IP_ALIGN, copylen, + skb = macvtap_alloc_skb(&q->sk, MACVTAP_RESERVE, copylen, linear, noblock, &err); if (!skb) goto err; diff --git a/drivers/net/phy/amd-xgbe-phy.c b/drivers/net/phy/amd-xgbe-phy.c index 9e3af54c9010..32efbd48f326 100644 --- a/drivers/net/phy/amd-xgbe-phy.c +++ b/drivers/net/phy/amd-xgbe-phy.c @@ -92,6 +92,8 @@ MODULE_DESCRIPTION("AMD 10GbE (amd-xgbe) PHY driver"); #define XGBE_PHY_CDR_RATE_PROPERTY "amd,serdes-cdr-rate" #define XGBE_PHY_PQ_SKEW_PROPERTY "amd,serdes-pq-skew" #define XGBE_PHY_TX_AMP_PROPERTY "amd,serdes-tx-amp" +#define XGBE_PHY_DFE_CFG_PROPERTY "amd,serdes-dfe-tap-config" +#define XGBE_PHY_DFE_ENA_PROPERTY "amd,serdes-dfe-tap-enable" #define XGBE_PHY_SPEEDS 3 #define XGBE_PHY_SPEED_1000 0 @@ -177,10 +179,12 @@ MODULE_DESCRIPTION("AMD 10GbE (amd-xgbe) PHY driver"); #define SPEED_10000_BLWC 0 #define SPEED_10000_CDR 0x7 #define SPEED_10000_PLL 0x1 -#define SPEED_10000_PQ 0x1e +#define SPEED_10000_PQ 0x12 #define SPEED_10000_RATE 0x0 #define SPEED_10000_TXAMP 0xa #define SPEED_10000_WORD 0x7 +#define SPEED_10000_DFE_TAP_CONFIG 0x1 +#define SPEED_10000_DFE_TAP_ENABLE 0x7f #define SPEED_2500_BLWC 1 #define SPEED_2500_CDR 0x2 @@ -189,6 +193,8 @@ MODULE_DESCRIPTION("AMD 10GbE (amd-xgbe) PHY driver"); #define SPEED_2500_RATE 0x1 #define SPEED_2500_TXAMP 0xf #define SPEED_2500_WORD 0x1 +#define SPEED_2500_DFE_TAP_CONFIG 0x3 +#define SPEED_2500_DFE_TAP_ENABLE 0x0 #define SPEED_1000_BLWC 1 #define SPEED_1000_CDR 0x2 @@ -197,16 +203,25 @@ MODULE_DESCRIPTION("AMD 10GbE (amd-xgbe) PHY driver"); #define SPEED_1000_RATE 0x3 #define SPEED_1000_TXAMP 0xf #define SPEED_1000_WORD 0x1 +#define SPEED_1000_DFE_TAP_CONFIG 0x3 +#define SPEED_1000_DFE_TAP_ENABLE 0x0 /* SerDes RxTx register offsets */ +#define RXTX_REG6 0x0018 #define RXTX_REG20 0x0050 +#define RXTX_REG22 0x0058 #define RXTX_REG114 0x01c8 +#define RXTX_REG129 0x0204 /* SerDes RxTx register entry bit positions and sizes */ +#define RXTX_REG6_RESETB_RXD_INDEX 8 +#define RXTX_REG6_RESETB_RXD_WIDTH 1 #define RXTX_REG20_BLWC_ENA_INDEX 2 #define RXTX_REG20_BLWC_ENA_WIDTH 1 #define RXTX_REG114_PQ_REG_INDEX 9 #define RXTX_REG114_PQ_REG_WIDTH 7 +#define RXTX_REG129_RXDFE_CONFIG_INDEX 14 +#define RXTX_REG129_RXDFE_CONFIG_WIDTH 2 /* Bit setting and getting macros * The get macro will extract the current bit field value from within @@ -333,6 +348,18 @@ static const u32 amd_xgbe_phy_serdes_tx_amp[] = { SPEED_10000_TXAMP, }; +static const u32 amd_xgbe_phy_serdes_dfe_tap_cfg[] = { + SPEED_1000_DFE_TAP_CONFIG, + SPEED_2500_DFE_TAP_CONFIG, + SPEED_10000_DFE_TAP_CONFIG, +}; + +static const u32 amd_xgbe_phy_serdes_dfe_tap_ena[] = { + SPEED_1000_DFE_TAP_ENABLE, + SPEED_2500_DFE_TAP_ENABLE, + SPEED_10000_DFE_TAP_ENABLE, +}; + enum amd_xgbe_phy_an { AMD_XGBE_AN_READY = 0, AMD_XGBE_AN_PAGE_RECEIVED, @@ -393,6 +420,8 @@ struct amd_xgbe_phy_priv { u32 serdes_cdr_rate[XGBE_PHY_SPEEDS]; u32 serdes_pq_skew[XGBE_PHY_SPEEDS]; u32 serdes_tx_amp[XGBE_PHY_SPEEDS]; + u32 serdes_dfe_tap_cfg[XGBE_PHY_SPEEDS]; + u32 serdes_dfe_tap_ena[XGBE_PHY_SPEEDS]; /* Auto-negotiation state machine support */ struct mutex an_mutex; @@ -481,11 +510,16 @@ static void amd_xgbe_phy_serdes_complete_ratechange(struct phy_device *phydev) status = XSIR0_IOREAD(priv, SIR0_STATUS); if (XSIR_GET_BITS(status, SIR0_STATUS, RX_READY) && XSIR_GET_BITS(status, SIR0_STATUS, TX_READY)) - return; + goto rx_reset; } netdev_dbg(phydev->attached_dev, "SerDes rx/tx not ready (%#hx)\n", status); + +rx_reset: + /* Perform Rx reset for the DFE changes */ + XRXTX_IOWRITE_BITS(priv, RXTX_REG6, RESETB_RXD, 0); + XRXTX_IOWRITE_BITS(priv, RXTX_REG6, RESETB_RXD, 1); } static int amd_xgbe_phy_xgmii_mode(struct phy_device *phydev) @@ -534,6 +568,10 @@ static int amd_xgbe_phy_xgmii_mode(struct phy_device *phydev) priv->serdes_blwc[XGBE_PHY_SPEED_10000]); XRXTX_IOWRITE_BITS(priv, RXTX_REG114, PQ_REG, priv->serdes_pq_skew[XGBE_PHY_SPEED_10000]); + XRXTX_IOWRITE_BITS(priv, RXTX_REG129, RXDFE_CONFIG, + priv->serdes_dfe_tap_cfg[XGBE_PHY_SPEED_10000]); + XRXTX_IOWRITE(priv, RXTX_REG22, + priv->serdes_dfe_tap_ena[XGBE_PHY_SPEED_10000]); amd_xgbe_phy_serdes_complete_ratechange(phydev); @@ -586,6 +624,10 @@ static int amd_xgbe_phy_gmii_2500_mode(struct phy_device *phydev) priv->serdes_blwc[XGBE_PHY_SPEED_2500]); XRXTX_IOWRITE_BITS(priv, RXTX_REG114, PQ_REG, priv->serdes_pq_skew[XGBE_PHY_SPEED_2500]); + XRXTX_IOWRITE_BITS(priv, RXTX_REG129, RXDFE_CONFIG, + priv->serdes_dfe_tap_cfg[XGBE_PHY_SPEED_2500]); + XRXTX_IOWRITE(priv, RXTX_REG22, + priv->serdes_dfe_tap_ena[XGBE_PHY_SPEED_2500]); amd_xgbe_phy_serdes_complete_ratechange(phydev); @@ -638,6 +680,10 @@ static int amd_xgbe_phy_gmii_mode(struct phy_device *phydev) priv->serdes_blwc[XGBE_PHY_SPEED_1000]); XRXTX_IOWRITE_BITS(priv, RXTX_REG114, PQ_REG, priv->serdes_pq_skew[XGBE_PHY_SPEED_1000]); + XRXTX_IOWRITE_BITS(priv, RXTX_REG129, RXDFE_CONFIG, + priv->serdes_dfe_tap_cfg[XGBE_PHY_SPEED_1000]); + XRXTX_IOWRITE(priv, RXTX_REG22, + priv->serdes_dfe_tap_ena[XGBE_PHY_SPEED_1000]); amd_xgbe_phy_serdes_complete_ratechange(phydev); @@ -1668,6 +1714,38 @@ static int amd_xgbe_phy_probe(struct phy_device *phydev) sizeof(priv->serdes_tx_amp)); } + if (device_property_present(phy_dev, XGBE_PHY_DFE_CFG_PROPERTY)) { + ret = device_property_read_u32_array(phy_dev, + XGBE_PHY_DFE_CFG_PROPERTY, + priv->serdes_dfe_tap_cfg, + XGBE_PHY_SPEEDS); + if (ret) { + dev_err(dev, "invalid %s property\n", + XGBE_PHY_DFE_CFG_PROPERTY); + goto err_sir1; + } + } else { + memcpy(priv->serdes_dfe_tap_cfg, + amd_xgbe_phy_serdes_dfe_tap_cfg, + sizeof(priv->serdes_dfe_tap_cfg)); + } + + if (device_property_present(phy_dev, XGBE_PHY_DFE_ENA_PROPERTY)) { + ret = device_property_read_u32_array(phy_dev, + XGBE_PHY_DFE_ENA_PROPERTY, + priv->serdes_dfe_tap_ena, + XGBE_PHY_SPEEDS); + if (ret) { + dev_err(dev, "invalid %s property\n", + XGBE_PHY_DFE_ENA_PROPERTY); + goto err_sir1; + } + } else { + memcpy(priv->serdes_dfe_tap_ena, + amd_xgbe_phy_serdes_dfe_tap_ena, + sizeof(priv->serdes_dfe_tap_ena)); + } + phydev->priv = priv; if (!priv->adev || acpi_disabled) diff --git a/drivers/net/phy/phy.c b/drivers/net/phy/phy.c index cdcac6aa4260..52cd8db2c57d 100644 --- a/drivers/net/phy/phy.c +++ b/drivers/net/phy/phy.c @@ -236,6 +236,25 @@ static inline unsigned int phy_find_valid(unsigned int idx, u32 features) } /** + * phy_check_valid - check if there is a valid PHY setting which matches + * speed, duplex, and feature mask + * @speed: speed to match + * @duplex: duplex to match + * @features: A mask of the valid settings + * + * Description: Returns true if there is a valid setting, false otherwise. + */ +static inline bool phy_check_valid(int speed, int duplex, u32 features) +{ + unsigned int idx; + + idx = phy_find_valid(phy_find_setting(speed, duplex), features); + + return settings[idx].speed == speed && settings[idx].duplex == duplex && + (settings[idx].setting & features); +} + +/** * phy_sanitize_settings - make sure the PHY is set to supported speed and duplex * @phydev: the target phy_device struct * @@ -1045,7 +1064,6 @@ int phy_init_eee(struct phy_device *phydev, bool clk_stop_enable) int eee_lp, eee_cap, eee_adv; u32 lp, cap, adv; int status; - unsigned int idx; /* Read phy status to properly get the right settings */ status = phy_read_status(phydev); @@ -1077,8 +1095,7 @@ int phy_init_eee(struct phy_device *phydev, bool clk_stop_enable) adv = mmd_eee_adv_to_ethtool_adv_t(eee_adv); lp = mmd_eee_adv_to_ethtool_adv_t(eee_lp); - idx = phy_find_setting(phydev->speed, phydev->duplex); - if (!(lp & adv & settings[idx].setting)) + if (!phy_check_valid(phydev->speed, phydev->duplex, lp & adv)) goto eee_exit_err; if (clk_stop_enable) { diff --git a/drivers/net/team/team.c b/drivers/net/team/team.c index 0e62274e884a..7d394846afc2 100644 --- a/drivers/net/team/team.c +++ b/drivers/net/team/team.c @@ -43,9 +43,7 @@ static struct team_port *team_port_get_rcu(const struct net_device *dev) { - struct team_port *port = rcu_dereference(dev->rx_handler_data); - - return team_port_exists(dev) ? port : NULL; + return rcu_dereference(dev->rx_handler_data); } static struct team_port *team_port_get_rtnl(const struct net_device *dev) @@ -1732,11 +1730,11 @@ static int team_set_mac_address(struct net_device *dev, void *p) if (dev->type == ARPHRD_ETHER && !is_valid_ether_addr(addr->sa_data)) return -EADDRNOTAVAIL; memcpy(dev->dev_addr, addr->sa_data, dev->addr_len); - rcu_read_lock(); - list_for_each_entry_rcu(port, &team->port_list, list) + mutex_lock(&team->lock); + list_for_each_entry(port, &team->port_list, list) if (team->ops.port_change_dev_addr) team->ops.port_change_dev_addr(team, port); - rcu_read_unlock(); + mutex_unlock(&team->lock); return 0; } diff --git a/drivers/net/usb/Kconfig b/drivers/net/usb/Kconfig index 3bd9678315ad..7ba8d0885f12 100644 --- a/drivers/net/usb/Kconfig +++ b/drivers/net/usb/Kconfig @@ -161,6 +161,7 @@ config USB_NET_AX8817X * Linksys USB200M * Netgear FA120 * Sitecom LN-029 + * Sitecom LN-028 * Intellinet USB 2.0 Ethernet * ST Lab USB 2.0 Ethernet * TrendNet TU2-ET100 diff --git a/drivers/net/usb/asix_common.c b/drivers/net/usb/asix_common.c index 5c55f11572ba..75d6f26729a3 100644 --- a/drivers/net/usb/asix_common.c +++ b/drivers/net/usb/asix_common.c @@ -188,6 +188,8 @@ struct sk_buff *asix_tx_fixup(struct usbnet *dev, struct sk_buff *skb, memcpy(skb_tail_pointer(skb), &padbytes, sizeof(padbytes)); skb_put(skb, sizeof(padbytes)); } + + usbnet_set_skb_tx_stats(skb, 1, 0); return skb; } diff --git a/drivers/net/usb/asix_devices.c b/drivers/net/usb/asix_devices.c index bf49792062a2..1173a24feda3 100644 --- a/drivers/net/usb/asix_devices.c +++ b/drivers/net/usb/asix_devices.c @@ -979,6 +979,10 @@ static const struct usb_device_id products [] = { USB_DEVICE (0x0df6, 0x0056), .driver_info = (unsigned long) &ax88178_info, }, { + // Sitecom LN-028 "USB 2.0 10/100/1000 Ethernet adapter" + USB_DEVICE (0x0df6, 0x061c), + .driver_info = (unsigned long) &ax88178_info, +}, { // corega FEther USB2-TX USB_DEVICE (0x07aa, 0x0017), .driver_info = (unsigned long) &ax8817x_info, diff --git a/drivers/net/usb/cdc_ether.c b/drivers/net/usb/cdc_ether.c index 9311a08565be..4545e78840b0 100644 --- a/drivers/net/usb/cdc_ether.c +++ b/drivers/net/usb/cdc_ether.c @@ -522,6 +522,7 @@ static const struct driver_info wwan_info = { #define DELL_VENDOR_ID 0x413C #define REALTEK_VENDOR_ID 0x0bda #define SAMSUNG_VENDOR_ID 0x04e8 +#define LENOVO_VENDOR_ID 0x17ef static const struct usb_device_id products[] = { /* BLACKLIST !! @@ -702,6 +703,13 @@ static const struct usb_device_id products[] = { .driver_info = 0, }, +/* Lenovo Thinkpad USB 3.0 Ethernet Adapters (based on Realtek RTL8153) */ +{ + USB_DEVICE_AND_INTERFACE_INFO(LENOVO_VENDOR_ID, 0x7205, USB_CLASS_COMM, + USB_CDC_SUBCLASS_ETHERNET, USB_CDC_PROTO_NONE), + .driver_info = 0, +}, + /* WHITELIST!!! * * CDC Ether uses two interfaces, not necessarily consecutive. diff --git a/drivers/net/usb/cdc_ncm.c b/drivers/net/usb/cdc_ncm.c index 80a844e0ae03..c3e4da9e79ca 100644 --- a/drivers/net/usb/cdc_ncm.c +++ b/drivers/net/usb/cdc_ncm.c @@ -1172,17 +1172,17 @@ cdc_ncm_fill_tx_frame(struct usbnet *dev, struct sk_buff *skb, __le32 sign) /* return skb */ ctx->tx_curr_skb = NULL; - dev->net->stats.tx_packets += ctx->tx_curr_frame_num; /* keep private stats: framing overhead and number of NTBs */ ctx->tx_overhead += skb_out->len - ctx->tx_curr_frame_payload; ctx->tx_ntbs++; - /* usbnet has already counted all the framing overhead. + /* usbnet will count all the framing overhead by default. * Adjust the stats so that the tx_bytes counter show real * payload data instead. */ - dev->net->stats.tx_bytes -= skb_out->len - ctx->tx_curr_frame_payload; + usbnet_set_skb_tx_stats(skb_out, n, + ctx->tx_curr_frame_payload - skb_out->len); return skb_out; diff --git a/drivers/net/usb/cx82310_eth.c b/drivers/net/usb/cx82310_eth.c index 3eed708a6182..1762ad3910b2 100644 --- a/drivers/net/usb/cx82310_eth.c +++ b/drivers/net/usb/cx82310_eth.c @@ -46,8 +46,7 @@ enum cx82310_status { }; #define CMD_PACKET_SIZE 64 -/* first command after power on can take around 8 seconds */ -#define CMD_TIMEOUT 15000 +#define CMD_TIMEOUT 100 #define CMD_REPLY_RETRY 5 #define CX82310_MTU 1514 @@ -78,8 +77,9 @@ static int cx82310_cmd(struct usbnet *dev, enum cx82310_cmd cmd, bool reply, ret = usb_bulk_msg(udev, usb_sndbulkpipe(udev, CMD_EP), buf, CMD_PACKET_SIZE, &actual_len, CMD_TIMEOUT); if (ret < 0) { - dev_err(&dev->udev->dev, "send command %#x: error %d\n", - cmd, ret); + if (cmd != CMD_GET_LINK_STATUS) + dev_err(&dev->udev->dev, "send command %#x: error %d\n", + cmd, ret); goto end; } @@ -90,8 +90,10 @@ static int cx82310_cmd(struct usbnet *dev, enum cx82310_cmd cmd, bool reply, buf, CMD_PACKET_SIZE, &actual_len, CMD_TIMEOUT); if (ret < 0) { - dev_err(&dev->udev->dev, - "reply receive error %d\n", ret); + if (cmd != CMD_GET_LINK_STATUS) + dev_err(&dev->udev->dev, + "reply receive error %d\n", + ret); goto end; } if (actual_len > 0) @@ -134,6 +136,8 @@ static int cx82310_bind(struct usbnet *dev, struct usb_interface *intf) int ret; char buf[15]; struct usb_device *udev = dev->udev; + u8 link[3]; + int timeout = 50; /* avoid ADSL modems - continue only if iProduct is "USB NET CARD" */ if (usb_string(udev, udev->descriptor.iProduct, buf, sizeof(buf)) > 0 @@ -160,6 +164,20 @@ static int cx82310_bind(struct usbnet *dev, struct usb_interface *intf) if (!dev->partial_data) return -ENOMEM; + /* wait for firmware to become ready (indicated by the link being up) */ + while (--timeout) { + ret = cx82310_cmd(dev, CMD_GET_LINK_STATUS, true, NULL, 0, + link, sizeof(link)); + /* the command can time out during boot - it's not an error */ + if (!ret && link[0] == 1 && link[2] == 1) + break; + msleep(500); + }; + if (!timeout) { + dev_err(&udev->dev, "firmware not ready in time\n"); + return -ETIMEDOUT; + } + /* enable ethernet mode (?) */ ret = cx82310_cmd(dev, CMD_ETHERNET_MODE, true, "\x01", 1, NULL, 0); if (ret) { @@ -300,9 +318,18 @@ static const struct driver_info cx82310_info = { .tx_fixup = cx82310_tx_fixup, }; +#define USB_DEVICE_CLASS(vend, prod, cl, sc, pr) \ + .match_flags = USB_DEVICE_ID_MATCH_DEVICE | \ + USB_DEVICE_ID_MATCH_DEV_INFO, \ + .idVendor = (vend), \ + .idProduct = (prod), \ + .bDeviceClass = (cl), \ + .bDeviceSubClass = (sc), \ + .bDeviceProtocol = (pr) + static const struct usb_device_id products[] = { { - USB_DEVICE_AND_INTERFACE_INFO(0x0572, 0xcb01, 0xff, 0, 0), + USB_DEVICE_CLASS(0x0572, 0xcb01, 0xff, 0, 0), .driver_info = (unsigned long) &cx82310_info }, { }, diff --git a/drivers/net/usb/hso.c b/drivers/net/usb/hso.c index 9cdfb3fe9c15..778e91531fac 100644 --- a/drivers/net/usb/hso.c +++ b/drivers/net/usb/hso.c @@ -1594,7 +1594,7 @@ hso_wait_modem_status(struct hso_serial *serial, unsigned long arg) } cprev = cnow; } - current->state = TASK_RUNNING; + __set_current_state(TASK_RUNNING); remove_wait_queue(&tiocmget->waitq, &wait); return ret; diff --git a/drivers/net/usb/plusb.c b/drivers/net/usb/plusb.c index 3d18bb0eee85..1bfe0fcaccf5 100644 --- a/drivers/net/usb/plusb.c +++ b/drivers/net/usb/plusb.c @@ -134,6 +134,11 @@ static const struct usb_device_id products [] = { }, { USB_DEVICE(0x050d, 0x258a), /* Belkin F5U258/F5U279 (PL-25A1) */ .driver_info = (unsigned long) &prolific_info, +}, { + USB_DEVICE(0x3923, 0x7825), /* National Instruments USB + * Host-to-Host Cable + */ + .driver_info = (unsigned long) &prolific_info, }, { }, // END diff --git a/drivers/net/usb/r8152.c b/drivers/net/usb/r8152.c index 438fc6bcaef1..9f7c0ab3b349 100644 --- a/drivers/net/usb/r8152.c +++ b/drivers/net/usb/r8152.c @@ -492,6 +492,7 @@ enum rtl8152_flags { /* Define these values to match your device */ #define VENDOR_ID_REALTEK 0x0bda #define VENDOR_ID_SAMSUNG 0x04e8 +#define VENDOR_ID_LENOVO 0x17ef #define MCU_TYPE_PLA 0x0100 #define MCU_TYPE_USB 0x0000 @@ -4037,6 +4038,7 @@ static struct usb_device_id rtl8152_table[] = { {REALTEK_USB_DEVICE(VENDOR_ID_REALTEK, 0x8152)}, {REALTEK_USB_DEVICE(VENDOR_ID_REALTEK, 0x8153)}, {REALTEK_USB_DEVICE(VENDOR_ID_SAMSUNG, 0xa101)}, + {REALTEK_USB_DEVICE(VENDOR_ID_LENOVO, 0x7205)}, {} }; diff --git a/drivers/net/usb/sr9800.c b/drivers/net/usb/sr9800.c index b94a0fbb8b3b..953de13267df 100644 --- a/drivers/net/usb/sr9800.c +++ b/drivers/net/usb/sr9800.c @@ -144,6 +144,7 @@ static struct sk_buff *sr_tx_fixup(struct usbnet *dev, struct sk_buff *skb, skb_put(skb, sizeof(padbytes)); } + usbnet_set_skb_tx_stats(skb, 1, 0); return skb; } diff --git a/drivers/net/usb/usbnet.c b/drivers/net/usb/usbnet.c index 449835f4331e..777757ae1973 100644 --- a/drivers/net/usb/usbnet.c +++ b/drivers/net/usb/usbnet.c @@ -1188,8 +1188,7 @@ static void tx_complete (struct urb *urb) struct usbnet *dev = entry->dev; if (urb->status == 0) { - if (!(dev->driver_info->flags & FLAG_MULTI_PACKET)) - dev->net->stats.tx_packets++; + dev->net->stats.tx_packets += entry->packets; dev->net->stats.tx_bytes += entry->length; } else { dev->net->stats.tx_errors++; @@ -1347,7 +1346,19 @@ netdev_tx_t usbnet_start_xmit (struct sk_buff *skb, } else urb->transfer_flags |= URB_ZERO_PACKET; } - entry->length = urb->transfer_buffer_length = length; + urb->transfer_buffer_length = length; + + if (info->flags & FLAG_MULTI_PACKET) { + /* Driver has set number of packets and a length delta. + * Calculate the complete length and ensure that it's + * positive. + */ + entry->length += length; + if (WARN_ON_ONCE(entry->length <= 0)) + entry->length = length; + } else { + usbnet_set_skb_tx_stats(skb, 1, length); + } spin_lock_irqsave(&dev->txq.lock, flags); retval = usb_autopm_get_interface_async(dev->intf); diff --git a/drivers/net/virtio_net.c b/drivers/net/virtio_net.c index f1ff3666f090..59b0e9754ae3 100644 --- a/drivers/net/virtio_net.c +++ b/drivers/net/virtio_net.c @@ -1448,8 +1448,10 @@ static void virtnet_free_queues(struct virtnet_info *vi) { int i; - for (i = 0; i < vi->max_queue_pairs; i++) + for (i = 0; i < vi->max_queue_pairs; i++) { + napi_hash_del(&vi->rq[i].napi); netif_napi_del(&vi->rq[i].napi); + } kfree(vi->rq); kfree(vi->sq); @@ -1948,11 +1950,8 @@ static int virtnet_freeze(struct virtio_device *vdev) cancel_delayed_work_sync(&vi->refill); if (netif_running(vi->dev)) { - for (i = 0; i < vi->max_queue_pairs; i++) { + for (i = 0; i < vi->max_queue_pairs; i++) napi_disable(&vi->rq[i].napi); - napi_hash_del(&vi->rq[i].napi); - netif_napi_del(&vi->rq[i].napi); - } } remove_vq_common(vi); diff --git a/drivers/net/vxlan.c b/drivers/net/vxlan.c index 1e0a775ea882..f8528a4cf54f 100644 --- a/drivers/net/vxlan.c +++ b/drivers/net/vxlan.c @@ -1218,7 +1218,7 @@ static int vxlan_udp_encap_recv(struct sock *sk, struct sk_buff *skb) goto drop; flags &= ~VXLAN_HF_RCO; - vni &= VXLAN_VID_MASK; + vni &= VXLAN_VNI_MASK; } /* For backwards compatibility, only allow reserved fields to be @@ -1239,7 +1239,7 @@ static int vxlan_udp_encap_recv(struct sock *sk, struct sk_buff *skb) flags &= ~VXLAN_GBP_USED_BITS; } - if (flags || (vni & ~VXLAN_VID_MASK)) { + if (flags || vni & ~VXLAN_VNI_MASK) { /* If there are any unprocessed flags remaining treat * this as a malformed packet. This behavior diverges from * VXLAN RFC (RFC7348) which stipulates that bits in reserved diff --git a/drivers/net/wan/cosa.c b/drivers/net/wan/cosa.c index 83c39e2858bf..88d121d43c08 100644 --- a/drivers/net/wan/cosa.c +++ b/drivers/net/wan/cosa.c @@ -806,21 +806,21 @@ static ssize_t cosa_read(struct file *file, spin_lock_irqsave(&cosa->lock, flags); add_wait_queue(&chan->rxwaitq, &wait); while (!chan->rx_status) { - current->state = TASK_INTERRUPTIBLE; + set_current_state(TASK_INTERRUPTIBLE); spin_unlock_irqrestore(&cosa->lock, flags); schedule(); spin_lock_irqsave(&cosa->lock, flags); if (signal_pending(current) && chan->rx_status == 0) { chan->rx_status = 1; remove_wait_queue(&chan->rxwaitq, &wait); - current->state = TASK_RUNNING; + __set_current_state(TASK_RUNNING); spin_unlock_irqrestore(&cosa->lock, flags); mutex_unlock(&chan->rlock); return -ERESTARTSYS; } } remove_wait_queue(&chan->rxwaitq, &wait); - current->state = TASK_RUNNING; + __set_current_state(TASK_RUNNING); kbuf = chan->rxdata; count = chan->rxsize; spin_unlock_irqrestore(&cosa->lock, flags); @@ -890,14 +890,14 @@ static ssize_t cosa_write(struct file *file, spin_lock_irqsave(&cosa->lock, flags); add_wait_queue(&chan->txwaitq, &wait); while (!chan->tx_status) { - current->state = TASK_INTERRUPTIBLE; + set_current_state(TASK_INTERRUPTIBLE); spin_unlock_irqrestore(&cosa->lock, flags); schedule(); spin_lock_irqsave(&cosa->lock, flags); if (signal_pending(current) && chan->tx_status == 0) { chan->tx_status = 1; remove_wait_queue(&chan->txwaitq, &wait); - current->state = TASK_RUNNING; + __set_current_state(TASK_RUNNING); chan->tx_status = 1; spin_unlock_irqrestore(&cosa->lock, flags); up(&chan->wsem); @@ -905,7 +905,7 @@ static ssize_t cosa_write(struct file *file, } } remove_wait_queue(&chan->txwaitq, &wait); - current->state = TASK_RUNNING; + __set_current_state(TASK_RUNNING); up(&chan->wsem); spin_unlock_irqrestore(&cosa->lock, flags); kfree(kbuf); diff --git a/drivers/net/wireless/ath/ath9k/beacon.c b/drivers/net/wireless/ath/ath9k/beacon.c index cb366adc820b..f50a6bc5d06e 100644 --- a/drivers/net/wireless/ath/ath9k/beacon.c +++ b/drivers/net/wireless/ath/ath9k/beacon.c @@ -219,12 +219,15 @@ void ath9k_beacon_remove_slot(struct ath_softc *sc, struct ieee80211_vif *vif) struct ath_common *common = ath9k_hw_common(sc->sc_ah); struct ath_vif *avp = (void *)vif->drv_priv; struct ath_buf *bf = avp->av_bcbuf; + struct ath_beacon_config *cur_conf = &sc->cur_chan->beacon; ath_dbg(common, CONFIG, "Removing interface at beacon slot: %d\n", avp->av_bslot); tasklet_disable(&sc->bcon_tasklet); + cur_conf->enable_beacon &= ~BIT(avp->av_bslot); + if (bf && bf->bf_mpdu) { struct sk_buff *skb = bf->bf_mpdu; dma_unmap_single(sc->dev, bf->bf_buf_addr, @@ -521,8 +524,7 @@ static bool ath9k_allow_beacon_config(struct ath_softc *sc, } if (sc->sc_ah->opmode == NL80211_IFTYPE_AP) { - if ((vif->type != NL80211_IFTYPE_AP) || - (sc->nbcnvifs > 1)) { + if (vif->type != NL80211_IFTYPE_AP) { ath_dbg(common, CONFIG, "An AP interface is already present !\n"); return false; @@ -616,12 +618,14 @@ void ath9k_beacon_config(struct ath_softc *sc, struct ieee80211_vif *vif, * enabling/disabling SWBA. */ if (changed & BSS_CHANGED_BEACON_ENABLED) { - if (!bss_conf->enable_beacon && - (sc->nbcnvifs <= 1)) { - cur_conf->enable_beacon = false; - } else if (bss_conf->enable_beacon) { - cur_conf->enable_beacon = true; - ath9k_cache_beacon_config(sc, ctx, bss_conf); + bool enabled = cur_conf->enable_beacon; + + if (!bss_conf->enable_beacon) { + cur_conf->enable_beacon &= ~BIT(avp->av_bslot); + } else { + cur_conf->enable_beacon |= BIT(avp->av_bslot); + if (!enabled) + ath9k_cache_beacon_config(sc, ctx, bss_conf); } } diff --git a/drivers/net/wireless/ath/ath9k/common.h b/drivers/net/wireless/ath/ath9k/common.h index 2b79a568e803..d23737342f4f 100644 --- a/drivers/net/wireless/ath/ath9k/common.h +++ b/drivers/net/wireless/ath/ath9k/common.h @@ -54,7 +54,7 @@ struct ath_beacon_config { u16 dtim_period; u16 bmiss_timeout; u8 dtim_count; - bool enable_beacon; + u8 enable_beacon; bool ibss_creator; u32 nexttbtt; u32 intval; diff --git a/drivers/net/wireless/ath/ath9k/hw.c b/drivers/net/wireless/ath/ath9k/hw.c index 60aa8d71e753..8529014e1a5e 100644 --- a/drivers/net/wireless/ath/ath9k/hw.c +++ b/drivers/net/wireless/ath/ath9k/hw.c @@ -424,7 +424,7 @@ static void ath9k_hw_init_defaults(struct ath_hw *ah) ah->power_mode = ATH9K_PM_UNDEFINED; ah->htc_reset_init = true; - ah->tpc_enabled = true; + ah->tpc_enabled = false; ah->ani_function = ATH9K_ANI_ALL; if (!AR_SREV_9300_20_OR_LATER(ah)) diff --git a/drivers/net/wireless/b43/main.c b/drivers/net/wireless/b43/main.c index ccbdb05b28cd..75345c1e8c34 100644 --- a/drivers/net/wireless/b43/main.c +++ b/drivers/net/wireless/b43/main.c @@ -5370,6 +5370,7 @@ static void b43_supported_bands(struct b43_wldev *dev, bool *have_2ghz_phy, case 0x432a: /* BCM4321 */ case 0x432d: /* BCM4322 */ case 0x4352: /* BCM43222 */ + case 0x435a: /* BCM43228 */ case 0x4333: /* BCM4331 */ case 0x43a2: /* BCM4360 */ case 0x43b3: /* BCM4352 */ diff --git a/drivers/net/wireless/brcm80211/brcmfmac/feature.c b/drivers/net/wireless/brcm80211/brcmfmac/feature.c index defb7a44e0bc..7748a1ccf14f 100644 --- a/drivers/net/wireless/brcm80211/brcmfmac/feature.c +++ b/drivers/net/wireless/brcm80211/brcmfmac/feature.c @@ -126,7 +126,8 @@ void brcmf_feat_attach(struct brcmf_pub *drvr) brcmf_feat_iovar_int_get(ifp, BRCMF_FEAT_MCHAN, "mchan"); if (drvr->bus_if->wowl_supported) brcmf_feat_iovar_int_get(ifp, BRCMF_FEAT_WOWL, "wowl"); - brcmf_feat_iovar_int_set(ifp, BRCMF_FEAT_MBSS, "mbss", 0); + if (drvr->bus_if->chip != BRCM_CC_43362_CHIP_ID) + brcmf_feat_iovar_int_set(ifp, BRCMF_FEAT_MBSS, "mbss", 0); /* set chip related quirks */ switch (drvr->bus_if->chip) { diff --git a/drivers/net/wireless/brcm80211/brcmfmac/vendor.c b/drivers/net/wireless/brcm80211/brcmfmac/vendor.c index 50cdf7090198..8eff2753abad 100644 --- a/drivers/net/wireless/brcm80211/brcmfmac/vendor.c +++ b/drivers/net/wireless/brcm80211/brcmfmac/vendor.c @@ -39,13 +39,22 @@ static int brcmf_cfg80211_vndr_cmds_dcmd_handler(struct wiphy *wiphy, void *dcmd_buf = NULL, *wr_pointer; u16 msglen, maxmsglen = PAGE_SIZE - 0x100; - brcmf_dbg(TRACE, "cmd %x set %d len %d\n", cmdhdr->cmd, cmdhdr->set, - cmdhdr->len); + if (len < sizeof(*cmdhdr)) { + brcmf_err("vendor command too short: %d\n", len); + return -EINVAL; + } vif = container_of(wdev, struct brcmf_cfg80211_vif, wdev); ifp = vif->ifp; - len -= sizeof(struct brcmf_vndr_dcmd_hdr); + brcmf_dbg(TRACE, "ifidx=%d, cmd=%d\n", ifp->ifidx, cmdhdr->cmd); + + if (cmdhdr->offset > len) { + brcmf_err("bad buffer offset %d > %d\n", cmdhdr->offset, len); + return -EINVAL; + } + + len -= cmdhdr->offset; ret_len = cmdhdr->len; if (ret_len > 0 || len > 0) { if (len > BRCMF_DCMD_MAXLEN) { diff --git a/drivers/net/wireless/iwlwifi/dvm/dev.h b/drivers/net/wireless/iwlwifi/dvm/dev.h index a6f22c32a279..3811878ab9cd 100644 --- a/drivers/net/wireless/iwlwifi/dvm/dev.h +++ b/drivers/net/wireless/iwlwifi/dvm/dev.h @@ -708,7 +708,6 @@ struct iwl_priv { unsigned long reload_jiffies; int reload_count; bool ucode_loaded; - bool init_ucode_run; /* Don't run init uCode again */ u8 plcp_delta_threshold; diff --git a/drivers/net/wireless/iwlwifi/dvm/mac80211.c b/drivers/net/wireless/iwlwifi/dvm/mac80211.c index 47e64e8b9517..cceb026e0793 100644 --- a/drivers/net/wireless/iwlwifi/dvm/mac80211.c +++ b/drivers/net/wireless/iwlwifi/dvm/mac80211.c @@ -1114,16 +1114,17 @@ static void iwlagn_mac_flush(struct ieee80211_hw *hw, struct ieee80211_vif *vif, scd_queues &= ~(BIT(IWL_IPAN_CMD_QUEUE_NUM) | BIT(IWL_DEFAULT_CMD_QUEUE_NUM)); - if (vif) - scd_queues &= ~BIT(vif->hw_queue[IEEE80211_AC_VO]); - - IWL_DEBUG_TX_QUEUES(priv, "Flushing SCD queues: 0x%x\n", scd_queues); - if (iwlagn_txfifo_flush(priv, scd_queues)) { - IWL_ERR(priv, "flush request fail\n"); - goto done; + if (drop) { + IWL_DEBUG_TX_QUEUES(priv, "Flushing SCD queues: 0x%x\n", + scd_queues); + if (iwlagn_txfifo_flush(priv, scd_queues)) { + IWL_ERR(priv, "flush request fail\n"); + goto done; + } } + IWL_DEBUG_TX_QUEUES(priv, "wait transmit/flush all frames\n"); - iwl_trans_wait_tx_queue_empty(priv->trans, 0xffffffff); + iwl_trans_wait_tx_queue_empty(priv->trans, scd_queues); done: mutex_unlock(&priv->mutex); IWL_DEBUG_MAC80211(priv, "leave\n"); diff --git a/drivers/net/wireless/iwlwifi/dvm/ucode.c b/drivers/net/wireless/iwlwifi/dvm/ucode.c index 4dbef7e58c2e..5244e43bfafb 100644 --- a/drivers/net/wireless/iwlwifi/dvm/ucode.c +++ b/drivers/net/wireless/iwlwifi/dvm/ucode.c @@ -418,9 +418,6 @@ int iwl_run_init_ucode(struct iwl_priv *priv) if (!priv->fw->img[IWL_UCODE_INIT].sec[0].len) return 0; - if (priv->init_ucode_run) - return 0; - iwl_init_notification_wait(&priv->notif_wait, &calib_wait, calib_complete, ARRAY_SIZE(calib_complete), iwlagn_wait_calib, priv); @@ -440,8 +437,6 @@ int iwl_run_init_ucode(struct iwl_priv *priv) */ ret = iwl_wait_notification(&priv->notif_wait, &calib_wait, UCODE_CALIB_TIMEOUT); - if (!ret) - priv->init_ucode_run = true; goto out; diff --git a/drivers/net/wireless/iwlwifi/iwl-1000.c b/drivers/net/wireless/iwlwifi/iwl-1000.c index c3817fae16c0..06f6cc08f451 100644 --- a/drivers/net/wireless/iwlwifi/iwl-1000.c +++ b/drivers/net/wireless/iwlwifi/iwl-1000.c @@ -95,7 +95,8 @@ static const struct iwl_eeprom_params iwl1000_eeprom_params = { .nvm_calib_ver = EEPROM_1000_TX_POWER_VERSION, \ .base_params = &iwl1000_base_params, \ .eeprom_params = &iwl1000_eeprom_params, \ - .led_mode = IWL_LED_BLINK + .led_mode = IWL_LED_BLINK, \ + .max_ht_ampdu_exponent = IEEE80211_HT_MAX_AMPDU_64K const struct iwl_cfg iwl1000_bgn_cfg = { .name = "Intel(R) Centrino(R) Wireless-N 1000 BGN", @@ -121,7 +122,8 @@ const struct iwl_cfg iwl1000_bg_cfg = { .base_params = &iwl1000_base_params, \ .eeprom_params = &iwl1000_eeprom_params, \ .led_mode = IWL_LED_RF_STATE, \ - .rx_with_siso_diversity = true + .rx_with_siso_diversity = true, \ + .max_ht_ampdu_exponent = IEEE80211_HT_MAX_AMPDU_64K const struct iwl_cfg iwl100_bgn_cfg = { .name = "Intel(R) Centrino(R) Wireless-N 100 BGN", diff --git a/drivers/net/wireless/iwlwifi/iwl-2000.c b/drivers/net/wireless/iwlwifi/iwl-2000.c index 21e5d0843a62..890b95f497d6 100644 --- a/drivers/net/wireless/iwlwifi/iwl-2000.c +++ b/drivers/net/wireless/iwlwifi/iwl-2000.c @@ -123,7 +123,9 @@ static const struct iwl_eeprom_params iwl20x0_eeprom_params = { .nvm_calib_ver = EEPROM_2000_TX_POWER_VERSION, \ .base_params = &iwl2000_base_params, \ .eeprom_params = &iwl20x0_eeprom_params, \ - .led_mode = IWL_LED_RF_STATE + .led_mode = IWL_LED_RF_STATE, \ + .max_ht_ampdu_exponent = IEEE80211_HT_MAX_AMPDU_64K + const struct iwl_cfg iwl2000_2bgn_cfg = { .name = "Intel(R) Centrino(R) Wireless-N 2200 BGN", @@ -149,7 +151,8 @@ const struct iwl_cfg iwl2000_2bgn_d_cfg = { .nvm_calib_ver = EEPROM_2000_TX_POWER_VERSION, \ .base_params = &iwl2030_base_params, \ .eeprom_params = &iwl20x0_eeprom_params, \ - .led_mode = IWL_LED_RF_STATE + .led_mode = IWL_LED_RF_STATE, \ + .max_ht_ampdu_exponent = IEEE80211_HT_MAX_AMPDU_64K const struct iwl_cfg iwl2030_2bgn_cfg = { .name = "Intel(R) Centrino(R) Wireless-N 2230 BGN", @@ -170,7 +173,8 @@ const struct iwl_cfg iwl2030_2bgn_cfg = { .base_params = &iwl2000_base_params, \ .eeprom_params = &iwl20x0_eeprom_params, \ .led_mode = IWL_LED_RF_STATE, \ - .rx_with_siso_diversity = true + .rx_with_siso_diversity = true, \ + .max_ht_ampdu_exponent = IEEE80211_HT_MAX_AMPDU_64K const struct iwl_cfg iwl105_bgn_cfg = { .name = "Intel(R) Centrino(R) Wireless-N 105 BGN", @@ -197,7 +201,8 @@ const struct iwl_cfg iwl105_bgn_d_cfg = { .base_params = &iwl2030_base_params, \ .eeprom_params = &iwl20x0_eeprom_params, \ .led_mode = IWL_LED_RF_STATE, \ - .rx_with_siso_diversity = true + .rx_with_siso_diversity = true, \ + .max_ht_ampdu_exponent = IEEE80211_HT_MAX_AMPDU_64K const struct iwl_cfg iwl135_bgn_cfg = { .name = "Intel(R) Centrino(R) Wireless-N 135 BGN", diff --git a/drivers/net/wireless/iwlwifi/iwl-5000.c b/drivers/net/wireless/iwlwifi/iwl-5000.c index 332bbede39e5..724194e23414 100644 --- a/drivers/net/wireless/iwlwifi/iwl-5000.c +++ b/drivers/net/wireless/iwlwifi/iwl-5000.c @@ -93,7 +93,8 @@ static const struct iwl_eeprom_params iwl5000_eeprom_params = { .nvm_calib_ver = EEPROM_5000_TX_POWER_VERSION, \ .base_params = &iwl5000_base_params, \ .eeprom_params = &iwl5000_eeprom_params, \ - .led_mode = IWL_LED_BLINK + .led_mode = IWL_LED_BLINK, \ + .max_ht_ampdu_exponent = IEEE80211_HT_MAX_AMPDU_64K const struct iwl_cfg iwl5300_agn_cfg = { .name = "Intel(R) Ultimate N WiFi Link 5300 AGN", @@ -158,7 +159,8 @@ const struct iwl_cfg iwl5350_agn_cfg = { .base_params = &iwl5000_base_params, \ .eeprom_params = &iwl5000_eeprom_params, \ .led_mode = IWL_LED_BLINK, \ - .internal_wimax_coex = true + .internal_wimax_coex = true, \ + .max_ht_ampdu_exponent = IEEE80211_HT_MAX_AMPDU_64K const struct iwl_cfg iwl5150_agn_cfg = { .name = "Intel(R) WiMAX/WiFi Link 5150 AGN", diff --git a/drivers/net/wireless/iwlwifi/iwl-6000.c b/drivers/net/wireless/iwlwifi/iwl-6000.c index 8f2c3c8c6b84..21b2630763dc 100644 --- a/drivers/net/wireless/iwlwifi/iwl-6000.c +++ b/drivers/net/wireless/iwlwifi/iwl-6000.c @@ -145,7 +145,8 @@ static const struct iwl_eeprom_params iwl6000_eeprom_params = { .nvm_calib_ver = EEPROM_6005_TX_POWER_VERSION, \ .base_params = &iwl6000_g2_base_params, \ .eeprom_params = &iwl6000_eeprom_params, \ - .led_mode = IWL_LED_RF_STATE + .led_mode = IWL_LED_RF_STATE, \ + .max_ht_ampdu_exponent = IEEE80211_HT_MAX_AMPDU_64K const struct iwl_cfg iwl6005_2agn_cfg = { .name = "Intel(R) Centrino(R) Advanced-N 6205 AGN", @@ -199,7 +200,8 @@ const struct iwl_cfg iwl6005_2agn_mow2_cfg = { .nvm_calib_ver = EEPROM_6030_TX_POWER_VERSION, \ .base_params = &iwl6000_g2_base_params, \ .eeprom_params = &iwl6000_eeprom_params, \ - .led_mode = IWL_LED_RF_STATE + .led_mode = IWL_LED_RF_STATE, \ + .max_ht_ampdu_exponent = IEEE80211_HT_MAX_AMPDU_64K const struct iwl_cfg iwl6030_2agn_cfg = { .name = "Intel(R) Centrino(R) Advanced-N 6230 AGN", @@ -235,7 +237,8 @@ const struct iwl_cfg iwl6030_2bg_cfg = { .nvm_calib_ver = EEPROM_6030_TX_POWER_VERSION, \ .base_params = &iwl6000_g2_base_params, \ .eeprom_params = &iwl6000_eeprom_params, \ - .led_mode = IWL_LED_RF_STATE + .led_mode = IWL_LED_RF_STATE, \ + .max_ht_ampdu_exponent = IEEE80211_HT_MAX_AMPDU_64K const struct iwl_cfg iwl6035_2agn_cfg = { .name = "Intel(R) Centrino(R) Advanced-N 6235 AGN", @@ -290,7 +293,8 @@ const struct iwl_cfg iwl130_bg_cfg = { .nvm_calib_ver = EEPROM_6000_TX_POWER_VERSION, \ .base_params = &iwl6000_base_params, \ .eeprom_params = &iwl6000_eeprom_params, \ - .led_mode = IWL_LED_BLINK + .led_mode = IWL_LED_BLINK, \ + .max_ht_ampdu_exponent = IEEE80211_HT_MAX_AMPDU_64K const struct iwl_cfg iwl6000i_2agn_cfg = { .name = "Intel(R) Centrino(R) Advanced-N 6200 AGN", @@ -322,7 +326,8 @@ const struct iwl_cfg iwl6000i_2bg_cfg = { .base_params = &iwl6050_base_params, \ .eeprom_params = &iwl6000_eeprom_params, \ .led_mode = IWL_LED_BLINK, \ - .internal_wimax_coex = true + .internal_wimax_coex = true, \ + .max_ht_ampdu_exponent = IEEE80211_HT_MAX_AMPDU_64K const struct iwl_cfg iwl6050_2agn_cfg = { .name = "Intel(R) Centrino(R) Advanced-N + WiMAX 6250 AGN", @@ -347,7 +352,8 @@ const struct iwl_cfg iwl6050_2abg_cfg = { .base_params = &iwl6050_base_params, \ .eeprom_params = &iwl6000_eeprom_params, \ .led_mode = IWL_LED_BLINK, \ - .internal_wimax_coex = true + .internal_wimax_coex = true, \ + .max_ht_ampdu_exponent = IEEE80211_HT_MAX_AMPDU_64K const struct iwl_cfg iwl6150_bgn_cfg = { .name = "Intel(R) Centrino(R) Wireless-N + WiMAX 6150 BGN", diff --git a/drivers/net/wireless/iwlwifi/iwl-drv.c b/drivers/net/wireless/iwlwifi/iwl-drv.c index 996e7f16adf9..c7154ac42c8c 100644 --- a/drivers/net/wireless/iwlwifi/iwl-drv.c +++ b/drivers/net/wireless/iwlwifi/iwl-drv.c @@ -1257,6 +1257,7 @@ static void iwl_req_fw_callback(const struct firmware *ucode_raw, void *context) op->name, err); #endif } + kfree(pieces); return; try_again: diff --git a/drivers/net/wireless/iwlwifi/mvm/coex.c b/drivers/net/wireless/iwlwifi/mvm/coex.c index 1ec4d55155f7..7810c41cf9a7 100644 --- a/drivers/net/wireless/iwlwifi/mvm/coex.c +++ b/drivers/net/wireless/iwlwifi/mvm/coex.c @@ -793,7 +793,8 @@ static void iwl_mvm_bt_notif_iterator(void *_data, u8 *mac, if (!vif->bss_conf.assoc) smps_mode = IEEE80211_SMPS_AUTOMATIC; - if (IWL_COEX_IS_RRC_ON(mvm->last_bt_notif.ttc_rrc_status, + if (mvmvif->phy_ctxt && + IWL_COEX_IS_RRC_ON(mvm->last_bt_notif.ttc_rrc_status, mvmvif->phy_ctxt->id)) smps_mode = IEEE80211_SMPS_AUTOMATIC; diff --git a/drivers/net/wireless/iwlwifi/mvm/coex_legacy.c b/drivers/net/wireless/iwlwifi/mvm/coex_legacy.c index d530ef3da107..542ee74f290a 100644 --- a/drivers/net/wireless/iwlwifi/mvm/coex_legacy.c +++ b/drivers/net/wireless/iwlwifi/mvm/coex_legacy.c @@ -832,7 +832,8 @@ static void iwl_mvm_bt_notif_iterator(void *_data, u8 *mac, if (!vif->bss_conf.assoc) smps_mode = IEEE80211_SMPS_AUTOMATIC; - if (data->notif->rrc_enabled & BIT(mvmvif->phy_ctxt->id)) + if (mvmvif->phy_ctxt && + data->notif->rrc_enabled & BIT(mvmvif->phy_ctxt->id)) smps_mode = IEEE80211_SMPS_AUTOMATIC; IWL_DEBUG_COEX(data->mvm, diff --git a/drivers/net/wireless/iwlwifi/mvm/mac80211.c b/drivers/net/wireless/iwlwifi/mvm/mac80211.c index 1ff7ec08532d..09654e73a533 100644 --- a/drivers/net/wireless/iwlwifi/mvm/mac80211.c +++ b/drivers/net/wireless/iwlwifi/mvm/mac80211.c @@ -405,7 +405,10 @@ int iwl_mvm_mac_setup_register(struct iwl_mvm *mvm) hw->wiphy->bands[IEEE80211_BAND_5GHZ] = &mvm->nvm_data->bands[IEEE80211_BAND_5GHZ]; - if (mvm->fw->ucode_capa.capa[0] & IWL_UCODE_TLV_CAPA_BEAMFORMER) + if ((mvm->fw->ucode_capa.capa[0] & + IWL_UCODE_TLV_CAPA_BEAMFORMER) && + (mvm->fw->ucode_capa.api[0] & + IWL_UCODE_TLV_API_LQ_SS_PARAMS)) hw->wiphy->bands[IEEE80211_BAND_5GHZ]->vht_cap.cap |= IEEE80211_VHT_CAP_SU_BEAMFORMER_CAPABLE; } @@ -2215,7 +2218,19 @@ static void iwl_mvm_mac_cancel_hw_scan(struct ieee80211_hw *hw, mutex_lock(&mvm->mutex); - iwl_mvm_cancel_scan(mvm); + /* Due to a race condition, it's possible that mac80211 asks + * us to stop a hw_scan when it's already stopped. This can + * happen, for instance, if we stopped the scan ourselves, + * called ieee80211_scan_completed() and the userspace called + * cancel scan scan before ieee80211_scan_work() could run. + * To handle that, simply return if the scan is not running. + */ + /* FIXME: for now, we ignore this race for UMAC scans, since + * they don't set the scan_status. + */ + if ((mvm->scan_status == IWL_MVM_SCAN_OS) || + (mvm->fw->ucode_capa.capa[0] & IWL_UCODE_TLV_CAPA_UMAC_SCAN)) + iwl_mvm_cancel_scan(mvm); mutex_unlock(&mvm->mutex); } @@ -2559,12 +2574,29 @@ static int iwl_mvm_mac_sched_scan_stop(struct ieee80211_hw *hw, int ret; mutex_lock(&mvm->mutex); + + /* Due to a race condition, it's possible that mac80211 asks + * us to stop a sched_scan when it's already stopped. This + * can happen, for instance, if we stopped the scan ourselves, + * called ieee80211_sched_scan_stopped() and the userspace called + * stop sched scan scan before ieee80211_sched_scan_stopped_work() + * could run. To handle this, simply return if the scan is + * not running. + */ + /* FIXME: for now, we ignore this race for UMAC scans, since + * they don't set the scan_status. + */ + if (mvm->scan_status != IWL_MVM_SCAN_SCHED && + !(mvm->fw->ucode_capa.capa[0] & IWL_UCODE_TLV_CAPA_UMAC_SCAN)) { + mutex_unlock(&mvm->mutex); + return 0; + } + ret = iwl_mvm_scan_offload_stop(mvm, false); mutex_unlock(&mvm->mutex); iwl_mvm_wait_for_async_handlers(mvm); return ret; - } static int iwl_mvm_mac_set_key(struct ieee80211_hw *hw, diff --git a/drivers/net/wireless/iwlwifi/mvm/rs.c b/drivers/net/wireless/iwlwifi/mvm/rs.c index 194bd1f939ca..078f24cf4af3 100644 --- a/drivers/net/wireless/iwlwifi/mvm/rs.c +++ b/drivers/net/wireless/iwlwifi/mvm/rs.c @@ -134,9 +134,12 @@ enum rs_column_mode { #define MAX_NEXT_COLUMNS 7 #define MAX_COLUMN_CHECKS 3 +struct rs_tx_column; + typedef bool (*allow_column_func_t) (struct iwl_mvm *mvm, struct ieee80211_sta *sta, - struct iwl_scale_tbl_info *tbl); + struct iwl_scale_tbl_info *tbl, + const struct rs_tx_column *next_col); struct rs_tx_column { enum rs_column_mode mode; @@ -147,13 +150,15 @@ struct rs_tx_column { }; static bool rs_ant_allow(struct iwl_mvm *mvm, struct ieee80211_sta *sta, - struct iwl_scale_tbl_info *tbl) + struct iwl_scale_tbl_info *tbl, + const struct rs_tx_column *next_col) { - return iwl_mvm_bt_coex_is_ant_avail(mvm, tbl->rate.ant); + return iwl_mvm_bt_coex_is_ant_avail(mvm, next_col->ant); } static bool rs_mimo_allow(struct iwl_mvm *mvm, struct ieee80211_sta *sta, - struct iwl_scale_tbl_info *tbl) + struct iwl_scale_tbl_info *tbl, + const struct rs_tx_column *next_col) { if (!sta->ht_cap.ht_supported) return false; @@ -171,7 +176,8 @@ static bool rs_mimo_allow(struct iwl_mvm *mvm, struct ieee80211_sta *sta, } static bool rs_siso_allow(struct iwl_mvm *mvm, struct ieee80211_sta *sta, - struct iwl_scale_tbl_info *tbl) + struct iwl_scale_tbl_info *tbl, + const struct rs_tx_column *next_col) { if (!sta->ht_cap.ht_supported) return false; @@ -180,7 +186,8 @@ static bool rs_siso_allow(struct iwl_mvm *mvm, struct ieee80211_sta *sta, } static bool rs_sgi_allow(struct iwl_mvm *mvm, struct ieee80211_sta *sta, - struct iwl_scale_tbl_info *tbl) + struct iwl_scale_tbl_info *tbl, + const struct rs_tx_column *next_col) { struct rs_rate *rate = &tbl->rate; struct ieee80211_sta_ht_cap *ht_cap = &sta->ht_cap; @@ -1271,6 +1278,9 @@ static void rs_mac80211_tx_status(void *mvm_r, struct iwl_mvm *mvm = IWL_OP_MODE_GET_MVM(op_mode); struct ieee80211_tx_info *info = IEEE80211_SKB_CB(skb); + if (!iwl_mvm_sta_from_mac80211(sta)->vif) + return; + if (!ieee80211_is_data(hdr->frame_control) || info->flags & IEEE80211_TX_CTL_NO_ACK) return; @@ -1590,7 +1600,7 @@ static enum rs_column rs_get_next_column(struct iwl_mvm *mvm, for (j = 0; j < MAX_COLUMN_CHECKS; j++) { allow_func = next_col->checks[j]; - if (allow_func && !allow_func(mvm, sta, tbl)) + if (allow_func && !allow_func(mvm, sta, tbl, next_col)) break; } @@ -2504,6 +2514,14 @@ static void rs_get_rate(void *mvm_r, struct ieee80211_sta *sta, void *mvm_sta, struct ieee80211_tx_info *info = IEEE80211_SKB_CB(skb); struct iwl_lq_sta *lq_sta = mvm_sta; + if (sta && !iwl_mvm_sta_from_mac80211(sta)->vif) { + /* if vif isn't initialized mvm doesn't know about + * this station, so don't do anything with the it + */ + sta = NULL; + mvm_sta = NULL; + } + /* TODO: handle rate_idx_mask and rate_idx_mcs_mask */ /* Treat uninitialized rate scaling data same as non-existing. */ @@ -2820,6 +2838,9 @@ static void rs_rate_update(void *mvm_r, (struct iwl_op_mode *)mvm_r; struct iwl_mvm *mvm = IWL_OP_MODE_GET_MVM(op_mode); + if (!iwl_mvm_sta_from_mac80211(sta)->vif) + return; + /* Stop any ongoing aggregations as rs starts off assuming no agg */ for (tid = 0; tid < IWL_MAX_TID_COUNT; tid++) ieee80211_stop_tx_ba_session(sta, tid); @@ -3580,9 +3601,15 @@ static ssize_t iwl_dbgfs_ss_force_write(struct iwl_lq_sta *lq_sta, char *buf, MVM_DEBUGFS_READ_WRITE_FILE_OPS(ss_force, 32); -static void rs_add_debugfs(void *mvm, void *mvm_sta, struct dentry *dir) +static void rs_add_debugfs(void *mvm, void *priv_sta, struct dentry *dir) { - struct iwl_lq_sta *lq_sta = mvm_sta; + struct iwl_lq_sta *lq_sta = priv_sta; + struct iwl_mvm_sta *mvmsta; + + mvmsta = container_of(lq_sta, struct iwl_mvm_sta, lq_sta); + + if (!mvmsta->vif) + return; debugfs_create_file("rate_scale_table", S_IRUSR | S_IWUSR, dir, lq_sta, &rs_sta_dbgfs_scale_table_ops); diff --git a/drivers/net/wireless/iwlwifi/mvm/scan.c b/drivers/net/wireless/iwlwifi/mvm/scan.c index 7e9aa3cb3254..c47c8051da77 100644 --- a/drivers/net/wireless/iwlwifi/mvm/scan.c +++ b/drivers/net/wireless/iwlwifi/mvm/scan.c @@ -1128,8 +1128,10 @@ int iwl_mvm_scan_offload_stop(struct iwl_mvm *mvm, bool notify) if (mvm->scan_status == IWL_MVM_SCAN_NONE) return 0; - if (iwl_mvm_is_radio_killed(mvm)) + if (iwl_mvm_is_radio_killed(mvm)) { + ret = 0; goto out; + } if (mvm->scan_status != IWL_MVM_SCAN_SCHED && (!(mvm->fw->ucode_capa.api[0] & IWL_UCODE_TLV_API_LMAC_SCAN) || @@ -1148,16 +1150,14 @@ int iwl_mvm_scan_offload_stop(struct iwl_mvm *mvm, bool notify) IWL_DEBUG_SCAN(mvm, "Send stop %sscan failed %d\n", sched ? "offloaded " : "", ret); iwl_remove_notification(&mvm->notif_wait, &wait_scan_done); - return ret; + goto out; } IWL_DEBUG_SCAN(mvm, "Successfully sent stop %sscan\n", sched ? "offloaded " : ""); ret = iwl_wait_notification(&mvm->notif_wait, &wait_scan_done, 1 * HZ); - if (ret) - return ret; - +out: /* * Clear the scan status so the next scan requests will succeed. This * also ensures the Rx handler doesn't do anything, as the scan was @@ -1167,7 +1167,6 @@ int iwl_mvm_scan_offload_stop(struct iwl_mvm *mvm, bool notify) if (mvm->scan_status == IWL_MVM_SCAN_OS) iwl_mvm_unref(mvm, IWL_MVM_REF_SCAN); -out: mvm->scan_status = IWL_MVM_SCAN_NONE; if (notify) { @@ -1177,7 +1176,7 @@ out: ieee80211_scan_completed(mvm->hw, true); } - return 0; + return ret; } static void iwl_mvm_unified_scan_fill_tx_cmd(struct iwl_mvm *mvm, diff --git a/drivers/net/wireless/iwlwifi/mvm/time-event.c b/drivers/net/wireless/iwlwifi/mvm/time-event.c index 54fafbf9a711..4b81c0bf63b0 100644 --- a/drivers/net/wireless/iwlwifi/mvm/time-event.c +++ b/drivers/net/wireless/iwlwifi/mvm/time-event.c @@ -197,6 +197,8 @@ iwl_mvm_te_handle_notify_csa(struct iwl_mvm *mvm, struct iwl_time_event_notif *notif) { if (!le32_to_cpu(notif->status)) { + if (te_data->vif->type == NL80211_IFTYPE_STATION) + ieee80211_connection_loss(te_data->vif); IWL_DEBUG_TE(mvm, "CSA time event failed to start\n"); iwl_mvm_te_clear_data(mvm, te_data); return; @@ -750,8 +752,7 @@ void iwl_mvm_stop_roc(struct iwl_mvm *mvm) * request */ list_for_each_entry(te_data, &mvm->time_event_list, list) { - if (te_data->vif->type == NL80211_IFTYPE_P2P_DEVICE && - te_data->running) { + if (te_data->vif->type == NL80211_IFTYPE_P2P_DEVICE) { mvmvif = iwl_mvm_vif_from_mac80211(te_data->vif); is_p2p = true; goto remove_te; @@ -766,10 +767,8 @@ void iwl_mvm_stop_roc(struct iwl_mvm *mvm) * request */ list_for_each_entry(te_data, &mvm->aux_roc_te_list, list) { - if (te_data->running) { - mvmvif = iwl_mvm_vif_from_mac80211(te_data->vif); - goto remove_te; - } + mvmvif = iwl_mvm_vif_from_mac80211(te_data->vif); + goto remove_te; } remove_te: diff --git a/drivers/net/wireless/iwlwifi/mvm/tx.c b/drivers/net/wireless/iwlwifi/mvm/tx.c index 07304e1fd64a..96a05406babf 100644 --- a/drivers/net/wireless/iwlwifi/mvm/tx.c +++ b/drivers/net/wireless/iwlwifi/mvm/tx.c @@ -949,8 +949,10 @@ int iwl_mvm_rx_ba_notif(struct iwl_mvm *mvm, struct iwl_rx_cmd_buffer *rxb, mvmsta = iwl_mvm_sta_from_mac80211(sta); tid_data = &mvmsta->tid_data[tid]; - if (WARN_ONCE(tid_data->txq_id != scd_flow, "Q %d, tid %d, flow %d", - tid_data->txq_id, tid, scd_flow)) { + if (tid_data->txq_id != scd_flow) { + IWL_ERR(mvm, + "invalid BA notification: Q %d, tid %d, flow %d\n", + tid_data->txq_id, tid, scd_flow); rcu_read_unlock(); return 0; } diff --git a/drivers/net/wireless/iwlwifi/pcie/drv.c b/drivers/net/wireless/iwlwifi/pcie/drv.c index dbd6bcf52205..686dd301cd53 100644 --- a/drivers/net/wireless/iwlwifi/pcie/drv.c +++ b/drivers/net/wireless/iwlwifi/pcie/drv.c @@ -368,10 +368,12 @@ static const struct pci_device_id iwl_hw_card_ids[] = { /* 3165 Series */ {IWL_PCI_DEVICE(0x3165, 0x4010, iwl3165_2ac_cfg)}, {IWL_PCI_DEVICE(0x3165, 0x4012, iwl3165_2ac_cfg)}, - {IWL_PCI_DEVICE(0x3165, 0x4110, iwl3165_2ac_cfg)}, - {IWL_PCI_DEVICE(0x3165, 0x4210, iwl3165_2ac_cfg)}, {IWL_PCI_DEVICE(0x3165, 0x4410, iwl3165_2ac_cfg)}, {IWL_PCI_DEVICE(0x3165, 0x4510, iwl3165_2ac_cfg)}, + {IWL_PCI_DEVICE(0x3165, 0x4110, iwl3165_2ac_cfg)}, + {IWL_PCI_DEVICE(0x3166, 0x4310, iwl3165_2ac_cfg)}, + {IWL_PCI_DEVICE(0x3166, 0x4210, iwl3165_2ac_cfg)}, + {IWL_PCI_DEVICE(0x3165, 0x8010, iwl3165_2ac_cfg)}, /* 7265 Series */ {IWL_PCI_DEVICE(0x095A, 0x5010, iwl7265_2ac_cfg)}, diff --git a/drivers/net/wireless/mac80211_hwsim.c b/drivers/net/wireless/mac80211_hwsim.c index 4a4c6586a8d2..8908be6dbc48 100644 --- a/drivers/net/wireless/mac80211_hwsim.c +++ b/drivers/net/wireless/mac80211_hwsim.c @@ -946,7 +946,8 @@ static void mac80211_hwsim_tx_frame_nl(struct ieee80211_hw *hw, goto nla_put_failure; genlmsg_end(skb, msg_head); - genlmsg_unicast(&init_net, skb, dst_portid); + if (genlmsg_unicast(&init_net, skb, dst_portid)) + goto err_free_txskb; /* Enqueue the packet */ skb_queue_tail(&data->pending, my_skb); @@ -955,6 +956,8 @@ static void mac80211_hwsim_tx_frame_nl(struct ieee80211_hw *hw, return; nla_put_failure: + nlmsg_free(skb); +err_free_txskb: printk(KERN_DEBUG "mac80211_hwsim: error occurred in %s\n", __func__); ieee80211_free_txskb(hw, my_skb); data->tx_failed++; diff --git a/drivers/net/wireless/rtlwifi/base.c b/drivers/net/wireless/rtlwifi/base.c index 1d4677460711..074f716020aa 100644 --- a/drivers/net/wireless/rtlwifi/base.c +++ b/drivers/net/wireless/rtlwifi/base.c @@ -1386,8 +1386,11 @@ u8 rtl_is_special_data(struct ieee80211_hw *hw, struct sk_buff *skb, u8 is_tx) } return true; - } else if (0x86DD == ether_type) { - return true; + } else if (ETH_P_IPV6 == ether_type) { + /* TODO: Handle any IPv6 cases that need special handling. + * For now, always return false + */ + goto end; } end: diff --git a/drivers/net/wireless/rtlwifi/pci.c b/drivers/net/wireless/rtlwifi/pci.c index a62170ea0481..8c45cf44ce24 100644 --- a/drivers/net/wireless/rtlwifi/pci.c +++ b/drivers/net/wireless/rtlwifi/pci.c @@ -1124,12 +1124,22 @@ static void _rtl_pci_prepare_bcn_tasklet(struct ieee80211_hw *hw) /*This is for new trx flow*/ struct rtl_tx_buffer_desc *pbuffer_desc = NULL; u8 temp_one = 1; + u8 *entry; memset(&tcb_desc, 0, sizeof(struct rtl_tcb_desc)); ring = &rtlpci->tx_ring[BEACON_QUEUE]; pskb = __skb_dequeue(&ring->queue); - if (pskb) + if (rtlpriv->use_new_trx_flow) + entry = (u8 *)(&ring->buffer_desc[ring->idx]); + else + entry = (u8 *)(&ring->desc[ring->idx]); + if (pskb) { + pci_unmap_single(rtlpci->pdev, + rtlpriv->cfg->ops->get_desc( + (u8 *)entry, true, HW_DESC_TXBUFF_ADDR), + pskb->len, PCI_DMA_TODEVICE); kfree_skb(pskb); + } /*NB: the beacon data buffer must be 32-bit aligned. */ pskb = ieee80211_beacon_get(hw, mac->vif); diff --git a/drivers/net/xen-netback/interface.c b/drivers/net/xen-netback/interface.c index f38227afe099..3aa8648080c8 100644 --- a/drivers/net/xen-netback/interface.c +++ b/drivers/net/xen-netback/interface.c @@ -340,12 +340,11 @@ static void xenvif_get_ethtool_stats(struct net_device *dev, unsigned int num_queues = vif->num_queues; int i; unsigned int queue_index; - struct xenvif_stats *vif_stats; for (i = 0; i < ARRAY_SIZE(xenvif_stats); i++) { unsigned long accum = 0; for (queue_index = 0; queue_index < num_queues; ++queue_index) { - vif_stats = &vif->queues[queue_index].stats; + void *vif_stats = &vif->queues[queue_index].stats; accum += *(unsigned long *)(vif_stats + xenvif_stats[i].offset); } data[i] = accum; diff --git a/drivers/net/xen-netback/netback.c b/drivers/net/xen-netback/netback.c index f7a31d2cb3f1..997cf0901ac2 100644 --- a/drivers/net/xen-netback/netback.c +++ b/drivers/net/xen-netback/netback.c @@ -96,6 +96,7 @@ static void xenvif_idx_release(struct xenvif_queue *queue, u16 pending_idx, static void make_tx_response(struct xenvif_queue *queue, struct xen_netif_tx_request *txp, s8 st); +static void push_tx_responses(struct xenvif_queue *queue); static inline int tx_work_todo(struct xenvif_queue *queue); @@ -657,6 +658,7 @@ static void xenvif_tx_err(struct xenvif_queue *queue, do { spin_lock_irqsave(&queue->response_lock, flags); make_tx_response(queue, txp, XEN_NETIF_RSP_ERROR); + push_tx_responses(queue); spin_unlock_irqrestore(&queue->response_lock, flags); if (cons == end) break; @@ -1343,7 +1345,7 @@ static int xenvif_handle_frag_list(struct xenvif_queue *queue, struct sk_buff *s { unsigned int offset = skb_headlen(skb); skb_frag_t frags[MAX_SKB_FRAGS]; - int i; + int i, f; struct ubuf_info *uarg; struct sk_buff *nskb = skb_shinfo(skb)->frag_list; @@ -1383,23 +1385,25 @@ static int xenvif_handle_frag_list(struct xenvif_queue *queue, struct sk_buff *s frags[i].page_offset = 0; skb_frag_size_set(&frags[i], len); } - /* swap out with old one */ - memcpy(skb_shinfo(skb)->frags, - frags, - i * sizeof(skb_frag_t)); - skb_shinfo(skb)->nr_frags = i; - skb->truesize += i * PAGE_SIZE; - /* remove traces of mapped pages and frag_list */ + /* Copied all the bits from the frag list -- free it. */ skb_frag_list_init(skb); + xenvif_skb_zerocopy_prepare(queue, nskb); + kfree_skb(nskb); + + /* Release all the original (foreign) frags. */ + for (f = 0; f < skb_shinfo(skb)->nr_frags; f++) + skb_frag_unref(skb, f); uarg = skb_shinfo(skb)->destructor_arg; /* increase inflight counter to offset decrement in callback */ atomic_inc(&queue->inflight_packets); uarg->callback(uarg, true); skb_shinfo(skb)->destructor_arg = NULL; - xenvif_skb_zerocopy_prepare(queue, nskb); - kfree_skb(nskb); + /* Fill the skb with the new (local) frags. */ + memcpy(skb_shinfo(skb)->frags, frags, i * sizeof(skb_frag_t)); + skb_shinfo(skb)->nr_frags = i; + skb->truesize += i * PAGE_SIZE; return 0; } @@ -1652,13 +1656,20 @@ static void xenvif_idx_release(struct xenvif_queue *queue, u16 pending_idx, unsigned long flags; pending_tx_info = &queue->pending_tx_info[pending_idx]; + spin_lock_irqsave(&queue->response_lock, flags); + make_tx_response(queue, &pending_tx_info->req, status); - index = pending_index(queue->pending_prod); + + /* Release the pending index before pusing the Tx response so + * its available before a new Tx request is pushed by the + * frontend. + */ + index = pending_index(queue->pending_prod++); queue->pending_ring[index] = pending_idx; - /* TX shouldn't use the index before we give it back here */ - mb(); - queue->pending_prod++; + + push_tx_responses(queue); + spin_unlock_irqrestore(&queue->response_lock, flags); } @@ -1669,7 +1680,6 @@ static void make_tx_response(struct xenvif_queue *queue, { RING_IDX i = queue->tx.rsp_prod_pvt; struct xen_netif_tx_response *resp; - int notify; resp = RING_GET_RESPONSE(&queue->tx, i); resp->id = txp->id; @@ -1679,6 +1689,12 @@ static void make_tx_response(struct xenvif_queue *queue, RING_GET_RESPONSE(&queue->tx, ++i)->status = XEN_NETIF_RSP_NULL; queue->tx.rsp_prod_pvt = ++i; +} + +static void push_tx_responses(struct xenvif_queue *queue) +{ + int notify; + RING_PUSH_RESPONSES_AND_CHECK_NOTIFY(&queue->tx, notify); if (notify) notify_remote_via_irq(queue->tx_irq); diff --git a/drivers/net/xen-netfront.c b/drivers/net/xen-netfront.c index e9b960f0ff32..720aaf6313d2 100644 --- a/drivers/net/xen-netfront.c +++ b/drivers/net/xen-netfront.c @@ -1008,8 +1008,7 @@ err: static int xennet_change_mtu(struct net_device *dev, int mtu) { - int max = xennet_can_sg(dev) ? - XEN_NETIF_MAX_TX_SIZE - MAX_TCP_HEADER : ETH_DATA_LEN; + int max = xennet_can_sg(dev) ? XEN_NETIF_MAX_TX_SIZE : ETH_DATA_LEN; if (mtu > max) return -EINVAL; @@ -1279,8 +1278,6 @@ static struct net_device *xennet_create_dev(struct xenbus_device *dev) netdev->ethtool_ops = &xennet_ethtool_ops; SET_NETDEV_DEV(netdev, &dev->dev); - netif_set_gso_max_size(netdev, XEN_NETIF_MAX_TX_SIZE - MAX_TCP_HEADER); - np->netdev = netdev; netif_carrier_off(netdev); diff --git a/drivers/of/Kconfig b/drivers/of/Kconfig index 38d1c51f58b1..7bcaeec876c0 100644 --- a/drivers/of/Kconfig +++ b/drivers/of/Kconfig @@ -84,8 +84,7 @@ config OF_RESOLVE bool config OF_OVERLAY - bool - depends on OF + bool "Device Tree overlays" select OF_DYNAMIC select OF_RESOLVE diff --git a/drivers/of/address.c b/drivers/of/address.c index ad2906919d45..78a7dcbec7d8 100644 --- a/drivers/of/address.c +++ b/drivers/of/address.c @@ -450,12 +450,17 @@ static struct of_bus *of_match_bus(struct device_node *np) return NULL; } -static int of_empty_ranges_quirk(void) +static int of_empty_ranges_quirk(struct device_node *np) { if (IS_ENABLED(CONFIG_PPC)) { - /* To save cycles, we cache the result */ + /* To save cycles, we cache the result for global "Mac" setting */ static int quirk_state = -1; + /* PA-SEMI sdc DT bug */ + if (of_device_is_compatible(np, "1682m-sdc")) + return true; + + /* Make quirk cached */ if (quirk_state < 0) quirk_state = of_machine_is_compatible("Power Macintosh") || @@ -490,7 +495,7 @@ static int of_translate_one(struct device_node *parent, struct of_bus *bus, * This code is only enabled on powerpc. --gcl */ ranges = of_get_property(parent, rprop, &rlen); - if (ranges == NULL && !of_empty_ranges_quirk()) { + if (ranges == NULL && !of_empty_ranges_quirk(parent)) { pr_debug("OF: no ranges; cannot translate\n"); return 1; } diff --git a/drivers/of/base.c b/drivers/of/base.c index 0a8aeb8523fe..8f165b112e03 100644 --- a/drivers/of/base.c +++ b/drivers/of/base.c @@ -714,16 +714,12 @@ static struct device_node *__of_find_node_by_path(struct device_node *parent, const char *path) { struct device_node *child; - int len = strchrnul(path, '/') - path; - int term; + int len; + len = strcspn(path, "/:"); if (!len) return NULL; - term = strchrnul(path, ':') - path; - if (term < len) - len = term; - __for_each_child_of_node(parent, child) { const char *name = strrchr(child->full_name, '/'); if (WARN(!name, "malformed device_node %s\n", child->full_name)) @@ -768,8 +764,12 @@ struct device_node *of_find_node_opts_by_path(const char *path, const char **opt /* The path could begin with an alias */ if (*path != '/') { - char *p = strchrnul(path, '/'); - int len = separator ? separator - path : p - path; + int len; + const char *p = separator; + + if (!p) + p = strchrnul(path, '/'); + len = p - path; /* of_aliases must not be NULL */ if (!of_aliases) @@ -794,6 +794,8 @@ struct device_node *of_find_node_opts_by_path(const char *path, const char **opt path++; /* Increment past '/' delimiter */ np = __of_find_node_by_path(np, path); path = strchrnul(path, '/'); + if (separator && separator < path) + break; } raw_spin_unlock_irqrestore(&devtree_lock, flags); return np; diff --git a/drivers/of/irq.c b/drivers/of/irq.c index 0d7765807f49..1a7980692f25 100644 --- a/drivers/of/irq.c +++ b/drivers/of/irq.c @@ -290,7 +290,7 @@ int of_irq_parse_one(struct device_node *device, int index, struct of_phandle_ar struct device_node *p; const __be32 *intspec, *tmp, *addr; u32 intsize, intlen; - int i, res = -EINVAL; + int i, res; pr_debug("of_irq_parse_one: dev=%s, index=%d\n", of_node_full_name(device), index); @@ -323,15 +323,19 @@ int of_irq_parse_one(struct device_node *device, int index, struct of_phandle_ar /* Get size of interrupt specifier */ tmp = of_get_property(p, "#interrupt-cells", NULL); - if (tmp == NULL) + if (tmp == NULL) { + res = -EINVAL; goto out; + } intsize = be32_to_cpu(*tmp); pr_debug(" intsize=%d intlen=%d\n", intsize, intlen); /* Check index */ - if ((index + 1) * intsize > intlen) + if ((index + 1) * intsize > intlen) { + res = -EINVAL; goto out; + } /* Copy intspec into irq structure */ intspec += index * intsize; diff --git a/drivers/of/overlay.c b/drivers/of/overlay.c index 352b4f28f82c..dee9270ba547 100644 --- a/drivers/of/overlay.c +++ b/drivers/of/overlay.c @@ -19,6 +19,7 @@ #include <linux/string.h> #include <linux/slab.h> #include <linux/err.h> +#include <linux/idr.h> #include "of_private.h" @@ -85,7 +86,7 @@ static int of_overlay_apply_single_device_node(struct of_overlay *ov, struct device_node *target, struct device_node *child) { const char *cname; - struct device_node *tchild, *grandchild; + struct device_node *tchild; int ret = 0; cname = kbasename(child->full_name); diff --git a/drivers/of/unittest.c b/drivers/of/unittest.c index 0cf9a236d438..52c45c7df07f 100644 --- a/drivers/of/unittest.c +++ b/drivers/of/unittest.c @@ -92,6 +92,16 @@ static void __init of_selftest_find_node_by_name(void) "option path test failed\n"); of_node_put(np); + np = of_find_node_opts_by_path("/testcase-data:test/option", &options); + selftest(np && !strcmp("test/option", options), + "option path test, subcase #1 failed\n"); + of_node_put(np); + + np = of_find_node_opts_by_path("/testcase-data/testcase-device1:test/option", &options); + selftest(np && !strcmp("test/option", options), + "option path test, subcase #2 failed\n"); + of_node_put(np); + np = of_find_node_opts_by_path("/testcase-data:testoption", NULL); selftest(np, "NULL option path test failed\n"); of_node_put(np); @@ -102,6 +112,12 @@ static void __init of_selftest_find_node_by_name(void) "option alias path test failed\n"); of_node_put(np); + np = of_find_node_opts_by_path("testcase-alias:test/alias/option", + &options); + selftest(np && !strcmp("test/alias/option", options), + "option alias path test, subcase #1 failed\n"); + of_node_put(np); + np = of_find_node_opts_by_path("testcase-alias:testaliasoption", NULL); selftest(np, "NULL option alias path test failed\n"); of_node_put(np); @@ -378,9 +394,9 @@ static void __init of_selftest_property_string(void) rc = of_property_match_string(np, "phandle-list-names", "first"); selftest(rc == 0, "first expected:0 got:%i\n", rc); rc = of_property_match_string(np, "phandle-list-names", "second"); - selftest(rc == 1, "second expected:0 got:%i\n", rc); + selftest(rc == 1, "second expected:1 got:%i\n", rc); rc = of_property_match_string(np, "phandle-list-names", "third"); - selftest(rc == 2, "third expected:0 got:%i\n", rc); + selftest(rc == 2, "third expected:2 got:%i\n", rc); rc = of_property_match_string(np, "phandle-list-names", "fourth"); selftest(rc == -ENODATA, "unmatched string; rc=%i\n", rc); rc = of_property_match_string(np, "missing-property", "blah"); @@ -478,7 +494,6 @@ static void __init of_selftest_changeset(void) struct device_node *n1, *n2, *n21, *nremove, *parent, *np; struct of_changeset chgset; - of_changeset_init(&chgset); n1 = __of_node_dup(NULL, "/testcase-data/changeset/n1"); selftest(n1, "testcase setup failure\n"); n2 = __of_node_dup(NULL, "/testcase-data/changeset/n2"); @@ -979,7 +994,7 @@ static int of_path_platform_device_exists(const char *path) return pdev != NULL; } -#if IS_ENABLED(CONFIG_I2C) +#if IS_BUILTIN(CONFIG_I2C) /* get the i2c client device instantiated at the path */ static struct i2c_client *of_path_to_i2c_client(const char *path) @@ -1445,7 +1460,7 @@ static void of_selftest_overlay_11(void) return; } -#if IS_ENABLED(CONFIG_I2C) && IS_ENABLED(CONFIG_OF_OVERLAY) +#if IS_BUILTIN(CONFIG_I2C) && IS_ENABLED(CONFIG_OF_OVERLAY) struct selftest_i2c_bus_data { struct platform_device *pdev; @@ -1584,7 +1599,7 @@ static struct i2c_driver selftest_i2c_dev_driver = { .id_table = selftest_i2c_dev_id, }; -#if IS_ENABLED(CONFIG_I2C_MUX) +#if IS_BUILTIN(CONFIG_I2C_MUX) struct selftest_i2c_mux_data { int nchans; @@ -1695,7 +1710,7 @@ static int of_selftest_overlay_i2c_init(void) "could not register selftest i2c bus driver\n")) return ret; -#if IS_ENABLED(CONFIG_I2C_MUX) +#if IS_BUILTIN(CONFIG_I2C_MUX) ret = i2c_add_driver(&selftest_i2c_mux_driver); if (selftest(ret == 0, "could not register selftest i2c mux driver\n")) @@ -1707,7 +1722,7 @@ static int of_selftest_overlay_i2c_init(void) static void of_selftest_overlay_i2c_cleanup(void) { -#if IS_ENABLED(CONFIG_I2C_MUX) +#if IS_BUILTIN(CONFIG_I2C_MUX) i2c_del_driver(&selftest_i2c_mux_driver); #endif platform_driver_unregister(&selftest_i2c_bus_driver); @@ -1814,7 +1829,7 @@ static void __init of_selftest_overlay(void) of_selftest_overlay_10(); of_selftest_overlay_11(); -#if IS_ENABLED(CONFIG_I2C) +#if IS_BUILTIN(CONFIG_I2C) if (selftest(of_selftest_overlay_i2c_init() == 0, "i2c init failed\n")) goto out; diff --git a/drivers/pci/host/pci-versatile.c b/drivers/pci/host/pci-versatile.c index 1ec694a52379..464bf492ee2a 100644 --- a/drivers/pci/host/pci-versatile.c +++ b/drivers/pci/host/pci-versatile.c @@ -80,7 +80,7 @@ static int versatile_pci_parse_request_of_pci_ranges(struct device *dev, if (err) return err; - resource_list_for_each_entry(win, res, list) { + resource_list_for_each_entry(win, res) { struct resource *parent, *res = win->res; switch (resource_type(res)) { diff --git a/drivers/pci/host/pci-xgene.c b/drivers/pci/host/pci-xgene.c index aab55474dd0d..ee082c0366ec 100644 --- a/drivers/pci/host/pci-xgene.c +++ b/drivers/pci/host/pci-xgene.c @@ -127,7 +127,7 @@ static bool xgene_pcie_hide_rc_bars(struct pci_bus *bus, int offset) return false; } -static int xgene_pcie_map_bus(struct pci_bus *bus, unsigned int devfn, +static void __iomem *xgene_pcie_map_bus(struct pci_bus *bus, unsigned int devfn, int offset) { struct xgene_pcie_port *port = bus->sysdata; @@ -137,7 +137,7 @@ static int xgene_pcie_map_bus(struct pci_bus *bus, unsigned int devfn, return NULL; xgene_pcie_set_rtdid_reg(bus, devfn); - return xgene_pcie_get_cfg_base(bus); + return xgene_pcie_get_cfg_base(bus) + offset; } static struct pci_ops xgene_pcie_ops = { diff --git a/drivers/pci/host/pcie-designware.c b/drivers/pci/host/pcie-designware.c index 1f4ea6f2d910..2e9f84fdd9ce 100644 --- a/drivers/pci/host/pcie-designware.c +++ b/drivers/pci/host/pcie-designware.c @@ -342,7 +342,7 @@ static const struct irq_domain_ops msi_domain_ops = { .map = dw_pcie_msi_map, }; -int __init dw_pcie_host_init(struct pcie_port *pp) +int dw_pcie_host_init(struct pcie_port *pp) { struct device_node *np = pp->dev->of_node; struct platform_device *pdev = to_platform_device(pp->dev); diff --git a/drivers/pci/host/pcie-spear13xx.c b/drivers/pci/host/pcie-spear13xx.c index 866465fd3dbf..020d78890719 100644 --- a/drivers/pci/host/pcie-spear13xx.c +++ b/drivers/pci/host/pcie-spear13xx.c @@ -269,7 +269,7 @@ static struct pcie_host_ops spear13xx_pcie_host_ops = { .host_init = spear13xx_pcie_host_init, }; -static int __init spear13xx_add_pcie_port(struct pcie_port *pp, +static int spear13xx_add_pcie_port(struct pcie_port *pp, struct platform_device *pdev) { struct device *dev = &pdev->dev; @@ -299,7 +299,7 @@ static int __init spear13xx_add_pcie_port(struct pcie_port *pp, return 0; } -static int __init spear13xx_pcie_probe(struct platform_device *pdev) +static int spear13xx_pcie_probe(struct platform_device *pdev) { struct spear13xx_pcie *spear13xx_pcie; struct pcie_port *pp; @@ -370,7 +370,7 @@ static const struct of_device_id spear13xx_pcie_of_match[] = { }; MODULE_DEVICE_TABLE(of, spear13xx_pcie_of_match); -static struct platform_driver spear13xx_pcie_driver __initdata = { +static struct platform_driver spear13xx_pcie_driver = { .probe = spear13xx_pcie_probe, .driver = { .name = "spear-pcie", diff --git a/drivers/pci/hotplug/cpci_hotplug_pci.c b/drivers/pci/hotplug/cpci_hotplug_pci.c index 7d48ecae6695..788db48dbbad 100644 --- a/drivers/pci/hotplug/cpci_hotplug_pci.c +++ b/drivers/pci/hotplug/cpci_hotplug_pci.c @@ -286,11 +286,12 @@ int cpci_configure_slot(struct slot *slot) } parent = slot->dev->bus; - list_for_each_entry(dev, &parent->devices, bus_list) + list_for_each_entry(dev, &parent->devices, bus_list) { if (PCI_SLOT(dev->devfn) != PCI_SLOT(slot->devfn)) continue; if (pci_is_bridge(dev)) pci_hp_add_bridge(dev); + } pci_assign_unassigned_bridge_resources(parent->self); diff --git a/drivers/pci/pci-acpi.c b/drivers/pci/pci-acpi.c index 489063987325..c93fbe76d281 100644 --- a/drivers/pci/pci-acpi.c +++ b/drivers/pci/pci-acpi.c @@ -248,6 +248,9 @@ int pci_get_hp_params(struct pci_dev *dev, struct hotplug_params *hpp) acpi_handle handle, phandle; struct pci_bus *pbus; + if (acpi_pci_disabled) + return -ENODEV; + handle = NULL; for (pbus = dev->bus; pbus; pbus = pbus->parent) { handle = acpi_pci_get_bridge_handle(pbus); diff --git a/drivers/pci/pci-sysfs.c b/drivers/pci/pci-sysfs.c index aa012fb3834b..312f23a8429c 100644 --- a/drivers/pci/pci-sysfs.c +++ b/drivers/pci/pci-sysfs.c @@ -521,7 +521,8 @@ static ssize_t driver_override_store(struct device *dev, struct pci_dev *pdev = to_pci_dev(dev); char *driver_override, *old = pdev->driver_override, *cp; - if (count > PATH_MAX) + /* We need to keep extra room for a newline */ + if (count >= (PAGE_SIZE - 1)) return -EINVAL; driver_override = kstrndup(buf, count, GFP_KERNEL); @@ -549,7 +550,7 @@ static ssize_t driver_override_show(struct device *dev, { struct pci_dev *pdev = to_pci_dev(dev); - return sprintf(buf, "%s\n", pdev->driver_override); + return snprintf(buf, PAGE_SIZE, "%s\n", pdev->driver_override); } static DEVICE_ATTR_RW(driver_override); diff --git a/drivers/pci/pcie/aer/aerdrv_errprint.c b/drivers/pci/pcie/aer/aerdrv_errprint.c index c6849d9e86ce..167fe411ce2e 100644 --- a/drivers/pci/pcie/aer/aerdrv_errprint.c +++ b/drivers/pci/pcie/aer/aerdrv_errprint.c @@ -132,16 +132,8 @@ static const char *aer_agent_string[] = { static void __print_tlp_header(struct pci_dev *dev, struct aer_header_log_regs *t) { - unsigned char *tlp = (unsigned char *)&t; - - dev_err(&dev->dev, " TLP Header:" - " %02x%02x%02x%02x %02x%02x%02x%02x" - " %02x%02x%02x%02x %02x%02x%02x%02x\n", - *(tlp + 3), *(tlp + 2), *(tlp + 1), *tlp, - *(tlp + 7), *(tlp + 6), *(tlp + 5), *(tlp + 4), - *(tlp + 11), *(tlp + 10), *(tlp + 9), - *(tlp + 8), *(tlp + 15), *(tlp + 14), - *(tlp + 13), *(tlp + 12)); + dev_err(&dev->dev, " TLP Header: %08x %08x %08x %08x\n", + t->dw0, t->dw1, t->dw2, t->dw3); } static void __aer_print_error(struct pci_dev *dev, diff --git a/drivers/pcmcia/Kconfig b/drivers/pcmcia/Kconfig index 3bb49252a098..45f67c63d385 100644 --- a/drivers/pcmcia/Kconfig +++ b/drivers/pcmcia/Kconfig @@ -69,8 +69,7 @@ config YENTA tristate "CardBus yenta-compatible bridge support" depends on PCI select CARDBUS if !EXPERT - select PCCARD_NONSTATIC if PCMCIA != n && ISA - select PCCARD_PCI if PCMCIA !=n && !ISA + select PCCARD_NONSTATIC if PCMCIA != n ---help--- This option enables support for CardBus host bridges. Virtually all modern PCMCIA bridges are CardBus compatible. A "bridge" is @@ -110,8 +109,7 @@ config YENTA_TOSHIBA config PD6729 tristate "Cirrus PD6729 compatible bridge support" depends on PCMCIA && PCI - select PCCARD_NONSTATIC if PCMCIA != n && ISA - select PCCARD_PCI if PCMCIA !=n && !ISA + select PCCARD_NONSTATIC help This provides support for the Cirrus PD6729 PCI-to-PCMCIA bridge device, found in some older laptops and PCMCIA card readers. @@ -119,8 +117,7 @@ config PD6729 config I82092 tristate "i82092 compatible bridge support" depends on PCMCIA && PCI - select PCCARD_NONSTATIC if PCMCIA != n && ISA - select PCCARD_PCI if PCMCIA !=n && !ISA + select PCCARD_NONSTATIC help This provides support for the Intel I82092AA PCI-to-PCMCIA bridge device, found in some older laptops and more commonly in evaluation boards for the @@ -291,9 +288,6 @@ config ELECTRA_CF Say Y here to support the CompactFlash controller on the PA Semi Electra eval board. -config PCCARD_PCI - bool - config PCCARD_NONSTATIC bool diff --git a/drivers/pcmcia/Makefile b/drivers/pcmcia/Makefile index f1a7ca04d89e..27e94b30cf96 100644 --- a/drivers/pcmcia/Makefile +++ b/drivers/pcmcia/Makefile @@ -12,7 +12,6 @@ obj-$(CONFIG_PCMCIA) += pcmcia.o pcmcia_rsrc-y += rsrc_mgr.o pcmcia_rsrc-$(CONFIG_PCCARD_NONSTATIC) += rsrc_nonstatic.o pcmcia_rsrc-$(CONFIG_PCCARD_IODYN) += rsrc_iodyn.o -pcmcia_rsrc-$(CONFIG_PCCARD_PCI) += rsrc_pci.o obj-$(CONFIG_PCCARD) += pcmcia_rsrc.o diff --git a/drivers/pcmcia/rsrc_pci.c b/drivers/pcmcia/rsrc_pci.c deleted file mode 100644 index 1f67b3ba70fb..000000000000 --- a/drivers/pcmcia/rsrc_pci.c +++ /dev/null @@ -1,173 +0,0 @@ -#include <linux/slab.h> -#include <linux/module.h> -#include <linux/kernel.h> -#include <linux/pci.h> - -#include <pcmcia/ss.h> -#include <pcmcia/cistpl.h> -#include "cs_internal.h" - - -struct pcmcia_align_data { - unsigned long mask; - unsigned long offset; -}; - -static resource_size_t pcmcia_align(void *align_data, - const struct resource *res, - resource_size_t size, resource_size_t align) -{ - struct pcmcia_align_data *data = align_data; - resource_size_t start; - - start = (res->start & ~data->mask) + data->offset; - if (start < res->start) - start += data->mask + 1; - return start; -} - -static struct resource *find_io_region(struct pcmcia_socket *s, - unsigned long base, int num, - unsigned long align) -{ - struct resource *res = pcmcia_make_resource(0, num, IORESOURCE_IO, - dev_name(&s->dev)); - struct pcmcia_align_data data; - int ret; - - data.mask = align - 1; - data.offset = base & data.mask; - - ret = pci_bus_alloc_resource(s->cb_dev->bus, res, num, 1, - base, 0, pcmcia_align, &data); - if (ret != 0) { - kfree(res); - res = NULL; - } - return res; -} - -static int res_pci_find_io(struct pcmcia_socket *s, unsigned int attr, - unsigned int *base, unsigned int num, - unsigned int align, struct resource **parent) -{ - int i, ret = 0; - - /* Check for an already-allocated window that must conflict with - * what was asked for. It is a hack because it does not catch all - * potential conflicts, just the most obvious ones. - */ - for (i = 0; i < MAX_IO_WIN; i++) { - if (!s->io[i].res) - continue; - - if (!*base) - continue; - - if ((s->io[i].res->start & (align-1)) == *base) - return -EBUSY; - } - - for (i = 0; i < MAX_IO_WIN; i++) { - struct resource *res = s->io[i].res; - unsigned int try; - - if (res && (res->flags & IORESOURCE_BITS) != - (attr & IORESOURCE_BITS)) - continue; - - if (!res) { - if (align == 0) - align = 0x10000; - - res = s->io[i].res = find_io_region(s, *base, num, - align); - if (!res) - return -EINVAL; - - *base = res->start; - s->io[i].res->flags = - ((res->flags & ~IORESOURCE_BITS) | - (attr & IORESOURCE_BITS)); - s->io[i].InUse = num; - *parent = res; - return 0; - } - - /* Try to extend top of window */ - try = res->end + 1; - if ((*base == 0) || (*base == try)) { - ret = adjust_resource(s->io[i].res, res->start, - resource_size(res) + num); - if (ret) - continue; - *base = try; - s->io[i].InUse += num; - *parent = res; - return 0; - } - - /* Try to extend bottom of window */ - try = res->start - num; - if ((*base == 0) || (*base == try)) { - ret = adjust_resource(s->io[i].res, - res->start - num, - resource_size(res) + num); - if (ret) - continue; - *base = try; - s->io[i].InUse += num; - *parent = res; - return 0; - } - } - return -EINVAL; -} - -static struct resource *res_pci_find_mem(u_long base, u_long num, - u_long align, int low, struct pcmcia_socket *s) -{ - struct resource *res = pcmcia_make_resource(0, num, IORESOURCE_MEM, - dev_name(&s->dev)); - struct pcmcia_align_data data; - unsigned long min; - int ret; - - if (align < 0x20000) - align = 0x20000; - data.mask = align - 1; - data.offset = base & data.mask; - - min = 0; - if (!low) - min = 0x100000UL; - - ret = pci_bus_alloc_resource(s->cb_dev->bus, - res, num, 1, min, 0, - pcmcia_align, &data); - - if (ret != 0) { - kfree(res); - res = NULL; - } - return res; -} - - -static int res_pci_init(struct pcmcia_socket *s) -{ - if (!s->cb_dev || !(s->features & SS_CAP_PAGE_REGS)) { - dev_err(&s->dev, "not supported by res_pci\n"); - return -EOPNOTSUPP; - } - return 0; -} - -struct pccard_resource_ops pccard_nonstatic_ops = { - .validate_mem = NULL, - .find_io = res_pci_find_io, - .find_mem = res_pci_find_mem, - .init = res_pci_init, - .exit = NULL, -}; -EXPORT_SYMBOL(pccard_nonstatic_ops); diff --git a/drivers/phy/phy-armada375-usb2.c b/drivers/phy/phy-armada375-usb2.c index 7c99ca256f05..8ccc3952c13d 100644 --- a/drivers/phy/phy-armada375-usb2.c +++ b/drivers/phy/phy-armada375-usb2.c @@ -37,7 +37,7 @@ static int armada375_usb_phy_init(struct phy *phy) struct armada375_cluster_phy *cluster_phy; u32 reg; - cluster_phy = dev_get_drvdata(phy->dev.parent); + cluster_phy = phy_get_drvdata(phy); if (!cluster_phy) return -ENODEV; @@ -131,6 +131,7 @@ static int armada375_usb_phy_probe(struct platform_device *pdev) cluster_phy->reg = usb_cluster_base; dev_set_drvdata(dev, cluster_phy); + phy_set_drvdata(phy, cluster_phy); phy_provider = devm_of_phy_provider_register(&pdev->dev, armada375_usb_phy_xlate); diff --git a/drivers/phy/phy-core.c b/drivers/phy/phy-core.c index a12d35338313..3791838f4bd4 100644 --- a/drivers/phy/phy-core.c +++ b/drivers/phy/phy-core.c @@ -52,7 +52,9 @@ static void devm_phy_consume(struct device *dev, void *res) static int devm_phy_match(struct device *dev, void *res, void *match_data) { - return res == match_data; + struct phy **phy = res; + + return *phy == match_data; } /** @@ -223,6 +225,7 @@ int phy_init(struct phy *phy) ret = phy_pm_runtime_get_sync(phy); if (ret < 0 && ret != -ENOTSUPP) return ret; + ret = 0; /* Override possible ret == -ENOTSUPP */ mutex_lock(&phy->mutex); if (phy->init_count == 0 && phy->ops->init) { @@ -231,8 +234,6 @@ int phy_init(struct phy *phy) dev_err(&phy->dev, "phy init failed --> %d\n", ret); goto out; } - } else { - ret = 0; /* Override possible ret == -ENOTSUPP */ } ++phy->init_count; @@ -253,6 +254,7 @@ int phy_exit(struct phy *phy) ret = phy_pm_runtime_get_sync(phy); if (ret < 0 && ret != -ENOTSUPP) return ret; + ret = 0; /* Override possible ret == -ENOTSUPP */ mutex_lock(&phy->mutex); if (phy->init_count == 1 && phy->ops->exit) { @@ -287,6 +289,7 @@ int phy_power_on(struct phy *phy) ret = phy_pm_runtime_get_sync(phy); if (ret < 0 && ret != -ENOTSUPP) return ret; + ret = 0; /* Override possible ret == -ENOTSUPP */ mutex_lock(&phy->mutex); if (phy->power_count == 0 && phy->ops->power_on) { @@ -295,8 +298,6 @@ int phy_power_on(struct phy *phy) dev_err(&phy->dev, "phy poweron failed --> %d\n", ret); goto out; } - } else { - ret = 0; /* Override possible ret == -ENOTSUPP */ } ++phy->power_count; mutex_unlock(&phy->mutex); diff --git a/drivers/phy/phy-exynos-dp-video.c b/drivers/phy/phy-exynos-dp-video.c index f86cbe68ddaf..179cbf9451aa 100644 --- a/drivers/phy/phy-exynos-dp-video.c +++ b/drivers/phy/phy-exynos-dp-video.c @@ -30,28 +30,13 @@ struct exynos_dp_video_phy { const struct exynos_dp_video_phy_drvdata *drvdata; }; -static void exynos_dp_video_phy_pwr_isol(struct exynos_dp_video_phy *state, - unsigned int on) -{ - unsigned int val; - - if (IS_ERR(state->regs)) - return; - - val = on ? 0 : EXYNOS5_PHY_ENABLE; - - regmap_update_bits(state->regs, state->drvdata->phy_ctrl_offset, - EXYNOS5_PHY_ENABLE, val); -} - static int exynos_dp_video_phy_power_on(struct phy *phy) { struct exynos_dp_video_phy *state = phy_get_drvdata(phy); /* Disable power isolation on DP-PHY */ - exynos_dp_video_phy_pwr_isol(state, 0); - - return 0; + return regmap_update_bits(state->regs, state->drvdata->phy_ctrl_offset, + EXYNOS5_PHY_ENABLE, EXYNOS5_PHY_ENABLE); } static int exynos_dp_video_phy_power_off(struct phy *phy) @@ -59,9 +44,8 @@ static int exynos_dp_video_phy_power_off(struct phy *phy) struct exynos_dp_video_phy *state = phy_get_drvdata(phy); /* Enable power isolation on DP-PHY */ - exynos_dp_video_phy_pwr_isol(state, 1); - - return 0; + return regmap_update_bits(state->regs, state->drvdata->phy_ctrl_offset, + EXYNOS5_PHY_ENABLE, 0); } static struct phy_ops exynos_dp_video_phy_ops = { diff --git a/drivers/phy/phy-exynos-mipi-video.c b/drivers/phy/phy-exynos-mipi-video.c index f017b2f2a54e..df7519a39ba0 100644 --- a/drivers/phy/phy-exynos-mipi-video.c +++ b/drivers/phy/phy-exynos-mipi-video.c @@ -43,7 +43,6 @@ struct exynos_mipi_video_phy { } phys[EXYNOS_MIPI_PHYS_NUM]; spinlock_t slock; void __iomem *regs; - struct mutex mutex; struct regmap *regmap; }; @@ -59,8 +58,9 @@ static int __set_phy_state(struct exynos_mipi_video_phy *state, else reset = EXYNOS4_MIPI_PHY_SRESETN; - if (state->regmap) { - mutex_lock(&state->mutex); + spin_lock(&state->slock); + + if (!IS_ERR(state->regmap)) { regmap_read(state->regmap, offset, &val); if (on) val |= reset; @@ -72,11 +72,9 @@ static int __set_phy_state(struct exynos_mipi_video_phy *state, else if (!(val & EXYNOS4_MIPI_PHY_RESET_MASK)) val &= ~EXYNOS4_MIPI_PHY_ENABLE; regmap_write(state->regmap, offset, val); - mutex_unlock(&state->mutex); } else { addr = state->regs + EXYNOS_MIPI_PHY_CONTROL(id / 2); - spin_lock(&state->slock); val = readl(addr); if (on) val |= reset; @@ -90,9 +88,9 @@ static int __set_phy_state(struct exynos_mipi_video_phy *state, val &= ~EXYNOS4_MIPI_PHY_ENABLE; writel(val, addr); - spin_unlock(&state->slock); } + spin_unlock(&state->slock); return 0; } @@ -158,7 +156,6 @@ static int exynos_mipi_video_phy_probe(struct platform_device *pdev) dev_set_drvdata(dev, state); spin_lock_init(&state->slock); - mutex_init(&state->mutex); for (i = 0; i < EXYNOS_MIPI_PHYS_NUM; i++) { struct phy *phy = devm_phy_create(dev, NULL, diff --git a/drivers/phy/phy-exynos4210-usb2.c b/drivers/phy/phy-exynos4210-usb2.c index 236a52ad94eb..f30bbb0fb3b2 100644 --- a/drivers/phy/phy-exynos4210-usb2.c +++ b/drivers/phy/phy-exynos4210-usb2.c @@ -250,7 +250,6 @@ static const struct samsung_usb2_common_phy exynos4210_phys[] = { .power_on = exynos4210_power_on, .power_off = exynos4210_power_off, }, - {}, }; const struct samsung_usb2_phy_config exynos4210_usb2_phy_config = { diff --git a/drivers/phy/phy-exynos4x12-usb2.c b/drivers/phy/phy-exynos4x12-usb2.c index 0b9de88579b1..765da90a536f 100644 --- a/drivers/phy/phy-exynos4x12-usb2.c +++ b/drivers/phy/phy-exynos4x12-usb2.c @@ -361,7 +361,6 @@ static const struct samsung_usb2_common_phy exynos4x12_phys[] = { .power_on = exynos4x12_power_on, .power_off = exynos4x12_power_off, }, - {}, }; const struct samsung_usb2_phy_config exynos3250_usb2_phy_config = { diff --git a/drivers/phy/phy-exynos5-usbdrd.c b/drivers/phy/phy-exynos5-usbdrd.c index 04374018425f..e2a0be750ad9 100644 --- a/drivers/phy/phy-exynos5-usbdrd.c +++ b/drivers/phy/phy-exynos5-usbdrd.c @@ -531,7 +531,7 @@ static struct phy *exynos5_usbdrd_phy_xlate(struct device *dev, { struct exynos5_usbdrd_phy *phy_drd = dev_get_drvdata(dev); - if (WARN_ON(args->args[0] > EXYNOS5_DRDPHYS_NUM)) + if (WARN_ON(args->args[0] >= EXYNOS5_DRDPHYS_NUM)) return ERR_PTR(-ENODEV); return phy_drd->phys[args->args[0]].phy; diff --git a/drivers/phy/phy-exynos5250-usb2.c b/drivers/phy/phy-exynos5250-usb2.c index 1c139aa0d074..2ed1735a076a 100644 --- a/drivers/phy/phy-exynos5250-usb2.c +++ b/drivers/phy/phy-exynos5250-usb2.c @@ -391,7 +391,6 @@ static const struct samsung_usb2_common_phy exynos5250_phys[] = { .power_on = exynos5250_power_on, .power_off = exynos5250_power_off, }, - {}, }; const struct samsung_usb2_phy_config exynos5250_usb2_phy_config = { diff --git a/drivers/phy/phy-hix5hd2-sata.c b/drivers/phy/phy-hix5hd2-sata.c index 34915b4202f1..d6b22659cac1 100644 --- a/drivers/phy/phy-hix5hd2-sata.c +++ b/drivers/phy/phy-hix5hd2-sata.c @@ -147,6 +147,9 @@ static int hix5hd2_sata_phy_probe(struct platform_device *pdev) return -ENOMEM; res = platform_get_resource(pdev, IORESOURCE_MEM, 0); + if (!res) + return -EINVAL; + priv->base = devm_ioremap(dev, res->start, resource_size(res)); if (!priv->base) return -ENOMEM; diff --git a/drivers/phy/phy-miphy28lp.c b/drivers/phy/phy-miphy28lp.c index 9b2848e6115d..933435214acc 100644 --- a/drivers/phy/phy-miphy28lp.c +++ b/drivers/phy/phy-miphy28lp.c @@ -228,6 +228,7 @@ struct miphy28lp_dev { struct regmap *regmap; struct mutex miphy_mutex; struct miphy28lp_phy **phys; + int nphys; }; struct miphy_initval { @@ -1116,7 +1117,7 @@ static struct phy *miphy28lp_xlate(struct device *dev, return ERR_PTR(-EINVAL); } - for (index = 0; index < of_get_child_count(dev->of_node); index++) + for (index = 0; index < miphy_dev->nphys; index++) if (phynode == miphy_dev->phys[index]->phy->dev.of_node) { miphy_phy = miphy_dev->phys[index]; break; @@ -1138,6 +1139,7 @@ static struct phy *miphy28lp_xlate(struct device *dev, static struct phy_ops miphy28lp_ops = { .init = miphy28lp_init, + .owner = THIS_MODULE, }; static int miphy28lp_probe_resets(struct device_node *node, @@ -1200,16 +1202,15 @@ static int miphy28lp_probe(struct platform_device *pdev) struct miphy28lp_dev *miphy_dev; struct phy_provider *provider; struct phy *phy; - int chancount, port = 0; - int ret; + int ret, port = 0; miphy_dev = devm_kzalloc(&pdev->dev, sizeof(*miphy_dev), GFP_KERNEL); if (!miphy_dev) return -ENOMEM; - chancount = of_get_child_count(np); - miphy_dev->phys = devm_kzalloc(&pdev->dev, sizeof(phy) * chancount, - GFP_KERNEL); + miphy_dev->nphys = of_get_child_count(np); + miphy_dev->phys = devm_kcalloc(&pdev->dev, miphy_dev->nphys, + sizeof(*miphy_dev->phys), GFP_KERNEL); if (!miphy_dev->phys) return -ENOMEM; diff --git a/drivers/phy/phy-miphy365x.c b/drivers/phy/phy-miphy365x.c index 6c80154e8bff..51b459db9137 100644 --- a/drivers/phy/phy-miphy365x.c +++ b/drivers/phy/phy-miphy365x.c @@ -150,6 +150,7 @@ struct miphy365x_dev { struct regmap *regmap; struct mutex miphy_mutex; struct miphy365x_phy **phys; + int nphys; }; /* @@ -485,7 +486,7 @@ static struct phy *miphy365x_xlate(struct device *dev, return ERR_PTR(-EINVAL); } - for (index = 0; index < of_get_child_count(dev->of_node); index++) + for (index = 0; index < miphy_dev->nphys; index++) if (phynode == miphy_dev->phys[index]->phy->dev.of_node) { miphy_phy = miphy_dev->phys[index]; break; @@ -541,16 +542,15 @@ static int miphy365x_probe(struct platform_device *pdev) struct miphy365x_dev *miphy_dev; struct phy_provider *provider; struct phy *phy; - int chancount, port = 0; - int ret; + int ret, port = 0; miphy_dev = devm_kzalloc(&pdev->dev, sizeof(*miphy_dev), GFP_KERNEL); if (!miphy_dev) return -ENOMEM; - chancount = of_get_child_count(np); - miphy_dev->phys = devm_kzalloc(&pdev->dev, sizeof(phy) * chancount, - GFP_KERNEL); + miphy_dev->nphys = of_get_child_count(np); + miphy_dev->phys = devm_kcalloc(&pdev->dev, miphy_dev->nphys, + sizeof(*miphy_dev->phys), GFP_KERNEL); if (!miphy_dev->phys) return -ENOMEM; diff --git a/drivers/phy/phy-omap-control.c b/drivers/phy/phy-omap-control.c index efe724f97e02..93252e053a31 100644 --- a/drivers/phy/phy-omap-control.c +++ b/drivers/phy/phy-omap-control.c @@ -360,7 +360,7 @@ static void __exit omap_control_phy_exit(void) } module_exit(omap_control_phy_exit); -MODULE_ALIAS("platform: omap_control_phy"); +MODULE_ALIAS("platform:omap_control_phy"); MODULE_AUTHOR("Texas Instruments Inc."); MODULE_DESCRIPTION("OMAP Control Module PHY Driver"); MODULE_LICENSE("GPL v2"); diff --git a/drivers/phy/phy-omap-usb2.c b/drivers/phy/phy-omap-usb2.c index 6f4aef3db248..4757e765696a 100644 --- a/drivers/phy/phy-omap-usb2.c +++ b/drivers/phy/phy-omap-usb2.c @@ -296,10 +296,11 @@ static int omap_usb2_probe(struct platform_device *pdev) dev_warn(&pdev->dev, "found usb_otg_ss_refclk960m, please fix DTS\n"); } - } else { - clk_prepare(phy->optclk); } + if (!IS_ERR(phy->optclk)) + clk_prepare(phy->optclk); + usb_add_phy_dev(&phy->phy); return 0; @@ -383,7 +384,7 @@ static struct platform_driver omap_usb2_driver = { module_platform_driver(omap_usb2_driver); -MODULE_ALIAS("platform: omap_usb2"); +MODULE_ALIAS("platform:omap_usb2"); MODULE_AUTHOR("Texas Instruments Inc."); MODULE_DESCRIPTION("OMAP USB2 phy driver"); MODULE_LICENSE("GPL v2"); diff --git a/drivers/phy/phy-rockchip-usb.c b/drivers/phy/phy-rockchip-usb.c index 22011c3b6a4b..7d4c33643768 100644 --- a/drivers/phy/phy-rockchip-usb.c +++ b/drivers/phy/phy-rockchip-usb.c @@ -61,8 +61,6 @@ static int rockchip_usb_phy_power_off(struct phy *_phy) return ret; clk_disable_unprepare(phy->clk); - if (ret) - return ret; return 0; } @@ -78,8 +76,10 @@ static int rockchip_usb_phy_power_on(struct phy *_phy) /* Power up usb phy analog blocks by set siddq 0 */ ret = rockchip_usb_phy_power(phy, 0); - if (ret) + if (ret) { + clk_disable_unprepare(phy->clk); return ret; + } return 0; } diff --git a/drivers/phy/phy-ti-pipe3.c b/drivers/phy/phy-ti-pipe3.c index 95c88f929f27..2ba610b72ca2 100644 --- a/drivers/phy/phy-ti-pipe3.c +++ b/drivers/phy/phy-ti-pipe3.c @@ -165,15 +165,11 @@ static int ti_pipe3_dpll_wait_lock(struct ti_pipe3 *phy) cpu_relax(); val = ti_pipe3_readl(phy->pll_ctrl_base, PLL_STATUS); if (val & PLL_LOCK) - break; + return 0; } while (!time_after(jiffies, timeout)); - if (!(val & PLL_LOCK)) { - dev_err(phy->dev, "DPLL failed to lock\n"); - return -EBUSY; - } - - return 0; + dev_err(phy->dev, "DPLL failed to lock\n"); + return -EBUSY; } static int ti_pipe3_dpll_program(struct ti_pipe3 *phy) @@ -608,7 +604,7 @@ static struct platform_driver ti_pipe3_driver = { module_platform_driver(ti_pipe3_driver); -MODULE_ALIAS("platform: ti_pipe3"); +MODULE_ALIAS("platform:ti_pipe3"); MODULE_AUTHOR("Texas Instruments Inc."); MODULE_DESCRIPTION("TI PIPE3 phy driver"); MODULE_LICENSE("GPL v2"); diff --git a/drivers/phy/phy-twl4030-usb.c b/drivers/phy/phy-twl4030-usb.c index 8e87f54671f3..bc42d6a8939f 100644 --- a/drivers/phy/phy-twl4030-usb.c +++ b/drivers/phy/phy-twl4030-usb.c @@ -666,7 +666,6 @@ static int twl4030_usb_probe(struct platform_device *pdev) twl->dev = &pdev->dev; twl->irq = platform_get_irq(pdev, 0); twl->vbus_supplied = false; - twl->linkstat = -EINVAL; twl->linkstat = OMAP_MUSB_UNKNOWN; twl->phy.dev = twl->dev; diff --git a/drivers/phy/phy-xgene.c b/drivers/phy/phy-xgene.c index 29214a36ea28..2263cd010032 100644 --- a/drivers/phy/phy-xgene.c +++ b/drivers/phy/phy-xgene.c @@ -1704,7 +1704,6 @@ static int xgene_phy_probe(struct platform_device *pdev) for (i = 0; i < MAX_LANE; i++) ctx->sata_param.speed[i] = 2; /* Default to Gen3 */ - ctx->dev = &pdev->dev; platform_set_drvdata(pdev, ctx); ctx->phy = devm_phy_create(ctx->dev, NULL, &xgene_phy_ops); diff --git a/drivers/pinctrl/intel/pinctrl-baytrail.c b/drivers/pinctrl/intel/pinctrl-baytrail.c index 5afe03e28b91..2062c224e32f 100644 --- a/drivers/pinctrl/intel/pinctrl-baytrail.c +++ b/drivers/pinctrl/intel/pinctrl-baytrail.c @@ -66,6 +66,10 @@ #define BYT_DIR_MASK (BIT(1) | BIT(2)) #define BYT_TRIG_MASK (BIT(26) | BIT(25) | BIT(24)) +#define BYT_CONF0_RESTORE_MASK (BYT_DIRECT_IRQ_EN | BYT_TRIG_MASK | \ + BYT_PIN_MUX) +#define BYT_VAL_RESTORE_MASK (BYT_DIR_MASK | BYT_LEVEL) + #define BYT_NGPIO_SCORE 102 #define BYT_NGPIO_NCORE 28 #define BYT_NGPIO_SUS 44 @@ -134,12 +138,18 @@ static struct pinctrl_gpio_range byt_ranges[] = { }, }; +struct byt_gpio_pin_context { + u32 conf0; + u32 val; +}; + struct byt_gpio { struct gpio_chip chip; struct platform_device *pdev; spinlock_t lock; void __iomem *reg_base; struct pinctrl_gpio_range *range; + struct byt_gpio_pin_context *saved_context; }; #define to_byt_gpio(c) container_of(c, struct byt_gpio, chip) @@ -158,40 +168,62 @@ static void __iomem *byt_gpio_reg(struct gpio_chip *chip, unsigned offset, return vg->reg_base + reg_offset + reg; } -static bool is_special_pin(struct byt_gpio *vg, unsigned offset) +static void byt_gpio_clear_triggering(struct byt_gpio *vg, unsigned offset) +{ + void __iomem *reg = byt_gpio_reg(&vg->chip, offset, BYT_CONF0_REG); + unsigned long flags; + u32 value; + + spin_lock_irqsave(&vg->lock, flags); + value = readl(reg); + value &= ~(BYT_TRIG_POS | BYT_TRIG_NEG | BYT_TRIG_LVL); + writel(value, reg); + spin_unlock_irqrestore(&vg->lock, flags); +} + +static u32 byt_get_gpio_mux(struct byt_gpio *vg, unsigned offset) { /* SCORE pin 92-93 */ if (!strcmp(vg->range->name, BYT_SCORE_ACPI_UID) && offset >= 92 && offset <= 93) - return true; + return 1; /* SUS pin 11-21 */ if (!strcmp(vg->range->name, BYT_SUS_ACPI_UID) && offset >= 11 && offset <= 21) - return true; + return 1; - return false; + return 0; } static int byt_gpio_request(struct gpio_chip *chip, unsigned offset) { struct byt_gpio *vg = to_byt_gpio(chip); void __iomem *reg = byt_gpio_reg(chip, offset, BYT_CONF0_REG); - u32 value; - bool special; + u32 value, gpio_mux; /* * In most cases, func pin mux 000 means GPIO function. * But, some pins may have func pin mux 001 represents - * GPIO function. Only allow user to export pin with - * func pin mux preset as GPIO function by BIOS/FW. + * GPIO function. + * + * Because there are devices out there where some pins were not + * configured correctly we allow changing the mux value from + * request (but print out warning about that). */ value = readl(reg) & BYT_PIN_MUX; - special = is_special_pin(vg, offset); - if ((special && value != 1) || (!special && value)) { - dev_err(&vg->pdev->dev, - "pin %u cannot be used as GPIO.\n", offset); - return -EINVAL; + gpio_mux = byt_get_gpio_mux(vg, offset); + if (WARN_ON(gpio_mux != value)) { + unsigned long flags; + + spin_lock_irqsave(&vg->lock, flags); + value = readl(reg) & ~BYT_PIN_MUX; + value |= gpio_mux; + writel(value, reg); + spin_unlock_irqrestore(&vg->lock, flags); + + dev_warn(&vg->pdev->dev, + "pin %u forcibly re-configured as GPIO\n", offset); } pm_runtime_get(&vg->pdev->dev); @@ -202,14 +234,8 @@ static int byt_gpio_request(struct gpio_chip *chip, unsigned offset) static void byt_gpio_free(struct gpio_chip *chip, unsigned offset) { struct byt_gpio *vg = to_byt_gpio(chip); - void __iomem *reg = byt_gpio_reg(&vg->chip, offset, BYT_CONF0_REG); - u32 value; - - /* clear interrupt triggering */ - value = readl(reg); - value &= ~(BYT_TRIG_POS | BYT_TRIG_NEG | BYT_TRIG_LVL); - writel(value, reg); + byt_gpio_clear_triggering(vg, offset); pm_runtime_put(&vg->pdev->dev); } @@ -236,23 +262,13 @@ static int byt_irq_type(struct irq_data *d, unsigned type) value &= ~(BYT_DIRECT_IRQ_EN | BYT_TRIG_POS | BYT_TRIG_NEG | BYT_TRIG_LVL); - switch (type) { - case IRQ_TYPE_LEVEL_HIGH: - value |= BYT_TRIG_LVL; - case IRQ_TYPE_EDGE_RISING: - value |= BYT_TRIG_POS; - break; - case IRQ_TYPE_LEVEL_LOW: - value |= BYT_TRIG_LVL; - case IRQ_TYPE_EDGE_FALLING: - value |= BYT_TRIG_NEG; - break; - case IRQ_TYPE_EDGE_BOTH: - value |= (BYT_TRIG_NEG | BYT_TRIG_POS); - break; - } writel(value, reg); + if (type & IRQ_TYPE_EDGE_BOTH) + __irq_set_handler_locked(d->irq, handle_edge_irq); + else if (type & IRQ_TYPE_LEVEL_MASK) + __irq_set_handler_locked(d->irq, handle_level_irq); + spin_unlock_irqrestore(&vg->lock, flags); return 0; @@ -410,58 +426,80 @@ static void byt_gpio_irq_handler(unsigned irq, struct irq_desc *desc) struct irq_data *data = irq_desc_get_irq_data(desc); struct byt_gpio *vg = to_byt_gpio(irq_desc_get_handler_data(desc)); struct irq_chip *chip = irq_data_get_irq_chip(data); - u32 base, pin, mask; + u32 base, pin; void __iomem *reg; - u32 pending; + unsigned long pending; unsigned virq; - int looplimit = 0; /* check from GPIO controller which pin triggered the interrupt */ for (base = 0; base < vg->chip.ngpio; base += 32) { - reg = byt_gpio_reg(&vg->chip, base, BYT_INT_STAT_REG); - - while ((pending = readl(reg))) { - pin = __ffs(pending); - mask = BIT(pin); - /* Clear before handling so we can't lose an edge */ - writel(mask, reg); - + pending = readl(reg); + for_each_set_bit(pin, &pending, 32) { virq = irq_find_mapping(vg->chip.irqdomain, base + pin); generic_handle_irq(virq); - - /* In case bios or user sets triggering incorretly a pin - * might remain in "interrupt triggered" state. - */ - if (looplimit++ > 32) { - dev_err(&vg->pdev->dev, - "Gpio %d interrupt flood, disabling\n", - base + pin); - - reg = byt_gpio_reg(&vg->chip, base + pin, - BYT_CONF0_REG); - mask = readl(reg); - mask &= ~(BYT_TRIG_NEG | BYT_TRIG_POS | - BYT_TRIG_LVL); - writel(mask, reg); - mask = readl(reg); /* flush */ - break; - } } } chip->irq_eoi(data); } +static void byt_irq_ack(struct irq_data *d) +{ + struct gpio_chip *gc = irq_data_get_irq_chip_data(d); + struct byt_gpio *vg = to_byt_gpio(gc); + unsigned offset = irqd_to_hwirq(d); + void __iomem *reg; + + reg = byt_gpio_reg(&vg->chip, offset, BYT_INT_STAT_REG); + writel(BIT(offset % 32), reg); +} + static void byt_irq_unmask(struct irq_data *d) { + struct gpio_chip *gc = irq_data_get_irq_chip_data(d); + struct byt_gpio *vg = to_byt_gpio(gc); + unsigned offset = irqd_to_hwirq(d); + unsigned long flags; + void __iomem *reg; + u32 value; + + spin_lock_irqsave(&vg->lock, flags); + + reg = byt_gpio_reg(&vg->chip, offset, BYT_CONF0_REG); + value = readl(reg); + + switch (irqd_get_trigger_type(d)) { + case IRQ_TYPE_LEVEL_HIGH: + value |= BYT_TRIG_LVL; + case IRQ_TYPE_EDGE_RISING: + value |= BYT_TRIG_POS; + break; + case IRQ_TYPE_LEVEL_LOW: + value |= BYT_TRIG_LVL; + case IRQ_TYPE_EDGE_FALLING: + value |= BYT_TRIG_NEG; + break; + case IRQ_TYPE_EDGE_BOTH: + value |= (BYT_TRIG_NEG | BYT_TRIG_POS); + break; + } + + writel(value, reg); + + spin_unlock_irqrestore(&vg->lock, flags); } static void byt_irq_mask(struct irq_data *d) { + struct gpio_chip *gc = irq_data_get_irq_chip_data(d); + struct byt_gpio *vg = to_byt_gpio(gc); + + byt_gpio_clear_triggering(vg, irqd_to_hwirq(d)); } static struct irq_chip byt_irqchip = { .name = "BYT-GPIO", + .irq_ack = byt_irq_ack, .irq_mask = byt_irq_mask, .irq_unmask = byt_irq_unmask, .irq_set_type = byt_irq_type, @@ -472,6 +510,21 @@ static void byt_gpio_irq_init_hw(struct byt_gpio *vg) { void __iomem *reg; u32 base, value; + int i; + + /* + * Clear interrupt triggers for all pins that are GPIOs and + * do not use direct IRQ mode. This will prevent spurious + * interrupts from misconfigured pins. + */ + for (i = 0; i < vg->chip.ngpio; i++) { + value = readl(byt_gpio_reg(&vg->chip, i, BYT_CONF0_REG)); + if ((value & BYT_PIN_MUX) == byt_get_gpio_mux(vg, i) && + !(value & BYT_DIRECT_IRQ_EN)) { + byt_gpio_clear_triggering(vg, i); + dev_dbg(&vg->pdev->dev, "disabling GPIO %d\n", i); + } + } /* clear interrupt status trigger registers */ for (base = 0; base < vg->chip.ngpio; base += 32) { @@ -541,6 +594,11 @@ static int byt_gpio_probe(struct platform_device *pdev) gc->can_sleep = false; gc->dev = dev; +#ifdef CONFIG_PM_SLEEP + vg->saved_context = devm_kcalloc(&pdev->dev, gc->ngpio, + sizeof(*vg->saved_context), GFP_KERNEL); +#endif + ret = gpiochip_add(gc); if (ret) { dev_err(&pdev->dev, "failed adding byt-gpio chip\n"); @@ -569,6 +627,69 @@ static int byt_gpio_probe(struct platform_device *pdev) return 0; } +#ifdef CONFIG_PM_SLEEP +static int byt_gpio_suspend(struct device *dev) +{ + struct platform_device *pdev = to_platform_device(dev); + struct byt_gpio *vg = platform_get_drvdata(pdev); + int i; + + for (i = 0; i < vg->chip.ngpio; i++) { + void __iomem *reg; + u32 value; + + reg = byt_gpio_reg(&vg->chip, i, BYT_CONF0_REG); + value = readl(reg) & BYT_CONF0_RESTORE_MASK; + vg->saved_context[i].conf0 = value; + + reg = byt_gpio_reg(&vg->chip, i, BYT_VAL_REG); + value = readl(reg) & BYT_VAL_RESTORE_MASK; + vg->saved_context[i].val = value; + } + + return 0; +} + +static int byt_gpio_resume(struct device *dev) +{ + struct platform_device *pdev = to_platform_device(dev); + struct byt_gpio *vg = platform_get_drvdata(pdev); + int i; + + for (i = 0; i < vg->chip.ngpio; i++) { + void __iomem *reg; + u32 value; + + reg = byt_gpio_reg(&vg->chip, i, BYT_CONF0_REG); + value = readl(reg); + if ((value & BYT_CONF0_RESTORE_MASK) != + vg->saved_context[i].conf0) { + value &= ~BYT_CONF0_RESTORE_MASK; + value |= vg->saved_context[i].conf0; + writel(value, reg); + dev_info(dev, "restored pin %d conf0 %#08x", i, value); + } + + reg = byt_gpio_reg(&vg->chip, i, BYT_VAL_REG); + value = readl(reg); + if ((value & BYT_VAL_RESTORE_MASK) != + vg->saved_context[i].val) { + u32 v; + + v = value & ~BYT_VAL_RESTORE_MASK; + v |= vg->saved_context[i].val; + if (v != value) { + writel(v, reg); + dev_dbg(dev, "restored pin %d val %#08x\n", + i, v); + } + } + } + + return 0; +} +#endif + static int byt_gpio_runtime_suspend(struct device *dev) { return 0; @@ -580,8 +701,9 @@ static int byt_gpio_runtime_resume(struct device *dev) } static const struct dev_pm_ops byt_gpio_pm_ops = { - .runtime_suspend = byt_gpio_runtime_suspend, - .runtime_resume = byt_gpio_runtime_resume, + SET_LATE_SYSTEM_SLEEP_PM_OPS(byt_gpio_suspend, byt_gpio_resume) + SET_RUNTIME_PM_OPS(byt_gpio_runtime_suspend, byt_gpio_runtime_resume, + NULL) }; static const struct acpi_device_id byt_gpio_acpi_match[] = { diff --git a/drivers/pinctrl/intel/pinctrl-cherryview.c b/drivers/pinctrl/intel/pinctrl-cherryview.c index 3034fd03bced..82f691eeeec4 100644 --- a/drivers/pinctrl/intel/pinctrl-cherryview.c +++ b/drivers/pinctrl/intel/pinctrl-cherryview.c @@ -1226,6 +1226,7 @@ static int chv_gpio_direction_input(struct gpio_chip *chip, unsigned offset) static int chv_gpio_direction_output(struct gpio_chip *chip, unsigned offset, int value) { + chv_gpio_set(chip, offset, value); return pinctrl_gpio_direction_output(chip->base + offset); } diff --git a/drivers/pinctrl/pinctrl-at91.c b/drivers/pinctrl/pinctrl-at91.c index f4cd0b9b2438..a4814066ea08 100644 --- a/drivers/pinctrl/pinctrl-at91.c +++ b/drivers/pinctrl/pinctrl-at91.c @@ -1477,28 +1477,25 @@ static void gpio_irq_ack(struct irq_data *d) /* the interrupt is already cleared before by reading ISR */ } -static unsigned int gpio_irq_startup(struct irq_data *d) +static int gpio_irq_request_res(struct irq_data *d) { struct at91_gpio_chip *at91_gpio = irq_data_get_irq_chip_data(d); unsigned pin = d->hwirq; int ret; ret = gpiochip_lock_as_irq(&at91_gpio->chip, pin); - if (ret) { + if (ret) dev_err(at91_gpio->chip.dev, "unable to lock pind %lu IRQ\n", d->hwirq); - return ret; - } - gpio_irq_unmask(d); - return 0; + + return ret; } -static void gpio_irq_shutdown(struct irq_data *d) +static void gpio_irq_release_res(struct irq_data *d) { struct at91_gpio_chip *at91_gpio = irq_data_get_irq_chip_data(d); unsigned pin = d->hwirq; - gpio_irq_mask(d); gpiochip_unlock_as_irq(&at91_gpio->chip, pin); } @@ -1577,8 +1574,8 @@ void at91_pinctrl_gpio_resume(void) static struct irq_chip gpio_irqchip = { .name = "GPIO", .irq_ack = gpio_irq_ack, - .irq_startup = gpio_irq_startup, - .irq_shutdown = gpio_irq_shutdown, + .irq_request_resources = gpio_irq_request_res, + .irq_release_resources = gpio_irq_release_res, .irq_disable = gpio_irq_mask, .irq_mask = gpio_irq_mask, .irq_unmask = gpio_irq_unmask, diff --git a/drivers/pinctrl/sunxi/pinctrl-sun4i-a10.c b/drivers/pinctrl/sunxi/pinctrl-sun4i-a10.c index 24c5d88f943f..3c68a8e5e0dd 100644 --- a/drivers/pinctrl/sunxi/pinctrl-sun4i-a10.c +++ b/drivers/pinctrl/sunxi/pinctrl-sun4i-a10.c @@ -1011,6 +1011,7 @@ static const struct sunxi_pinctrl_desc sun4i_a10_pinctrl_data = { .pins = sun4i_a10_pins, .npins = ARRAY_SIZE(sun4i_a10_pins), .irq_banks = 1, + .irq_read_needs_mux = true, }; static int sun4i_a10_pinctrl_probe(struct platform_device *pdev) diff --git a/drivers/pinctrl/sunxi/pinctrl-sunxi.c b/drivers/pinctrl/sunxi/pinctrl-sunxi.c index 3d0744337736..f8e171b76693 100644 --- a/drivers/pinctrl/sunxi/pinctrl-sunxi.c +++ b/drivers/pinctrl/sunxi/pinctrl-sunxi.c @@ -29,6 +29,7 @@ #include <linux/slab.h> #include "../core.h" +#include "../../gpio/gpiolib.h" #include "pinctrl-sunxi.h" static struct irq_chip sunxi_pinctrl_edge_irq_chip; @@ -464,10 +465,19 @@ static int sunxi_pinctrl_gpio_direction_input(struct gpio_chip *chip, static int sunxi_pinctrl_gpio_get(struct gpio_chip *chip, unsigned offset) { struct sunxi_pinctrl *pctl = dev_get_drvdata(chip->dev); - u32 reg = sunxi_data_reg(offset); u8 index = sunxi_data_offset(offset); - u32 val = (readl(pctl->membase + reg) >> index) & DATA_PINS_MASK; + u32 set_mux = pctl->desc->irq_read_needs_mux && + test_bit(FLAG_USED_AS_IRQ, &chip->desc[offset].flags); + u32 val; + + if (set_mux) + sunxi_pmx_set(pctl->pctl_dev, offset, SUN4I_FUNC_INPUT); + + val = (readl(pctl->membase + reg) >> index) & DATA_PINS_MASK; + + if (set_mux) + sunxi_pmx_set(pctl->pctl_dev, offset, SUN4I_FUNC_IRQ); return val; } diff --git a/drivers/pinctrl/sunxi/pinctrl-sunxi.h b/drivers/pinctrl/sunxi/pinctrl-sunxi.h index 5a51523a3459..e248e81a0f9e 100644 --- a/drivers/pinctrl/sunxi/pinctrl-sunxi.h +++ b/drivers/pinctrl/sunxi/pinctrl-sunxi.h @@ -77,6 +77,9 @@ #define IRQ_LEVEL_LOW 0x03 #define IRQ_EDGE_BOTH 0x04 +#define SUN4I_FUNC_INPUT 0 +#define SUN4I_FUNC_IRQ 6 + struct sunxi_desc_function { const char *name; u8 muxval; @@ -94,6 +97,7 @@ struct sunxi_pinctrl_desc { int npins; unsigned pin_base; unsigned irq_banks; + bool irq_read_needs_mux; }; struct sunxi_pinctrl_function { diff --git a/drivers/powercap/intel_rapl.c b/drivers/powercap/intel_rapl.c index 97b5e4ee1ca4..63d4033eb683 100644 --- a/drivers/powercap/intel_rapl.c +++ b/drivers/powercap/intel_rapl.c @@ -73,7 +73,7 @@ #define TIME_WINDOW_MAX_MSEC 40000 #define TIME_WINDOW_MIN_MSEC 250 - +#define ENERGY_UNIT_SCALE 1000 /* scale from driver unit to powercap unit */ enum unit_type { ARBITRARY_UNIT, /* no translation */ POWER_UNIT, @@ -158,6 +158,7 @@ struct rapl_domain { struct rapl_power_limit rpl[NR_POWER_LIMITS]; u64 attr_map; /* track capabilities */ unsigned int state; + unsigned int domain_energy_unit; int package_id; }; #define power_zone_to_rapl_domain(_zone) \ @@ -190,6 +191,7 @@ struct rapl_defaults { void (*set_floor_freq)(struct rapl_domain *rd, bool mode); u64 (*compute_time_window)(struct rapl_package *rp, u64 val, bool to_raw); + unsigned int dram_domain_energy_unit; }; static struct rapl_defaults *rapl_defaults; @@ -227,7 +229,8 @@ static int rapl_read_data_raw(struct rapl_domain *rd, static int rapl_write_data_raw(struct rapl_domain *rd, enum rapl_primitives prim, unsigned long long value); -static u64 rapl_unit_xlate(int package, enum unit_type type, u64 value, +static u64 rapl_unit_xlate(struct rapl_domain *rd, int package, + enum unit_type type, u64 value, int to_raw); static void package_power_limit_irq_save(int package_id); @@ -305,7 +308,9 @@ static int get_energy_counter(struct powercap_zone *power_zone, u64 *energy_raw) static int get_max_energy_counter(struct powercap_zone *pcd_dev, u64 *energy) { - *energy = rapl_unit_xlate(0, ENERGY_UNIT, ENERGY_STATUS_MASK, 0); + struct rapl_domain *rd = power_zone_to_rapl_domain(pcd_dev); + + *energy = rapl_unit_xlate(rd, 0, ENERGY_UNIT, ENERGY_STATUS_MASK, 0); return 0; } @@ -639,6 +644,11 @@ static void rapl_init_domains(struct rapl_package *rp) rd->msrs[4] = MSR_DRAM_POWER_INFO; rd->rpl[0].prim_id = PL1_ENABLE; rd->rpl[0].name = pl1_name; + rd->domain_energy_unit = + rapl_defaults->dram_domain_energy_unit; + if (rd->domain_energy_unit) + pr_info("DRAM domain energy unit %dpj\n", + rd->domain_energy_unit); break; } if (mask) { @@ -648,11 +658,13 @@ static void rapl_init_domains(struct rapl_package *rp) } } -static u64 rapl_unit_xlate(int package, enum unit_type type, u64 value, +static u64 rapl_unit_xlate(struct rapl_domain *rd, int package, + enum unit_type type, u64 value, int to_raw) { u64 units = 1; struct rapl_package *rp; + u64 scale = 1; rp = find_package_by_id(package); if (!rp) @@ -663,7 +675,12 @@ static u64 rapl_unit_xlate(int package, enum unit_type type, u64 value, units = rp->power_unit; break; case ENERGY_UNIT: - units = rp->energy_unit; + scale = ENERGY_UNIT_SCALE; + /* per domain unit takes precedence */ + if (rd && rd->domain_energy_unit) + units = rd->domain_energy_unit; + else + units = rp->energy_unit; break; case TIME_UNIT: return rapl_defaults->compute_time_window(rp, value, to_raw); @@ -673,11 +690,11 @@ static u64 rapl_unit_xlate(int package, enum unit_type type, u64 value, }; if (to_raw) - return div64_u64(value, units); + return div64_u64(value, units) * scale; value *= units; - return value; + return div64_u64(value, scale); } /* in the order of enum rapl_primitives */ @@ -773,7 +790,7 @@ static int rapl_read_data_raw(struct rapl_domain *rd, final = value & rp->mask; final = final >> rp->shift; if (xlate) - *data = rapl_unit_xlate(rd->package_id, rp->unit, final, 0); + *data = rapl_unit_xlate(rd, rd->package_id, rp->unit, final, 0); else *data = final; @@ -799,7 +816,7 @@ static int rapl_write_data_raw(struct rapl_domain *rd, "failed to read msr 0x%x on cpu %d\n", msr, cpu); return -EIO; } - value = rapl_unit_xlate(rd->package_id, rp->unit, value, 1); + value = rapl_unit_xlate(rd, rd->package_id, rp->unit, value, 1); msr_val &= ~rp->mask; msr_val |= value << rp->shift; if (wrmsrl_safe_on_cpu(cpu, msr, msr_val)) { @@ -818,7 +835,7 @@ static int rapl_write_data_raw(struct rapl_domain *rd, * calculate units differ on different CPUs. * We convert the units to below format based on CPUs. * i.e. - * energy unit: microJoules : Represented in microJoules by default + * energy unit: picoJoules : Represented in picoJoules by default * power unit : microWatts : Represented in milliWatts by default * time unit : microseconds: Represented in seconds by default */ @@ -834,7 +851,7 @@ static int rapl_check_unit_core(struct rapl_package *rp, int cpu) } value = (msr_val & ENERGY_UNIT_MASK) >> ENERGY_UNIT_OFFSET; - rp->energy_unit = 1000000 / (1 << value); + rp->energy_unit = ENERGY_UNIT_SCALE * 1000000 / (1 << value); value = (msr_val & POWER_UNIT_MASK) >> POWER_UNIT_OFFSET; rp->power_unit = 1000000 / (1 << value); @@ -842,7 +859,7 @@ static int rapl_check_unit_core(struct rapl_package *rp, int cpu) value = (msr_val & TIME_UNIT_MASK) >> TIME_UNIT_OFFSET; rp->time_unit = 1000000 / (1 << value); - pr_debug("Core CPU package %d energy=%duJ, time=%dus, power=%duW\n", + pr_debug("Core CPU package %d energy=%dpJ, time=%dus, power=%duW\n", rp->id, rp->energy_unit, rp->time_unit, rp->power_unit); return 0; @@ -859,7 +876,7 @@ static int rapl_check_unit_atom(struct rapl_package *rp, int cpu) return -ENODEV; } value = (msr_val & ENERGY_UNIT_MASK) >> ENERGY_UNIT_OFFSET; - rp->energy_unit = 1 << value; + rp->energy_unit = ENERGY_UNIT_SCALE * 1 << value; value = (msr_val & POWER_UNIT_MASK) >> POWER_UNIT_OFFSET; rp->power_unit = (1 << value) * 1000; @@ -867,7 +884,7 @@ static int rapl_check_unit_atom(struct rapl_package *rp, int cpu) value = (msr_val & TIME_UNIT_MASK) >> TIME_UNIT_OFFSET; rp->time_unit = 1000000 / (1 << value); - pr_debug("Atom package %d energy=%duJ, time=%dus, power=%duW\n", + pr_debug("Atom package %d energy=%dpJ, time=%dus, power=%duW\n", rp->id, rp->energy_unit, rp->time_unit, rp->power_unit); return 0; @@ -1017,6 +1034,13 @@ static const struct rapl_defaults rapl_defaults_core = { .compute_time_window = rapl_compute_time_window_core, }; +static const struct rapl_defaults rapl_defaults_hsw_server = { + .check_unit = rapl_check_unit_core, + .set_floor_freq = set_floor_freq_default, + .compute_time_window = rapl_compute_time_window_core, + .dram_domain_energy_unit = 15300, +}; + static const struct rapl_defaults rapl_defaults_atom = { .check_unit = rapl_check_unit_atom, .set_floor_freq = set_floor_freq_atom, @@ -1037,7 +1061,7 @@ static const struct x86_cpu_id rapl_ids[] = { RAPL_CPU(0x3a, rapl_defaults_core),/* Ivy Bridge */ RAPL_CPU(0x3c, rapl_defaults_core),/* Haswell */ RAPL_CPU(0x3d, rapl_defaults_core),/* Broadwell */ - RAPL_CPU(0x3f, rapl_defaults_core),/* Haswell */ + RAPL_CPU(0x3f, rapl_defaults_hsw_server),/* Haswell servers */ RAPL_CPU(0x45, rapl_defaults_core),/* Haswell ULT */ RAPL_CPU(0x4C, rapl_defaults_atom),/* Braswell */ RAPL_CPU(0x4A, rapl_defaults_atom),/* Tangier */ diff --git a/drivers/regulator/core.c b/drivers/regulator/core.c index b899947d839d..a4a8a6dc60c4 100644 --- a/drivers/regulator/core.c +++ b/drivers/regulator/core.c @@ -1839,10 +1839,12 @@ static int _regulator_do_enable(struct regulator_dev *rdev) } if (rdev->ena_pin) { - ret = regulator_ena_gpio_ctrl(rdev, true); - if (ret < 0) - return ret; - rdev->ena_gpio_state = 1; + if (!rdev->ena_gpio_state) { + ret = regulator_ena_gpio_ctrl(rdev, true); + if (ret < 0) + return ret; + rdev->ena_gpio_state = 1; + } } else if (rdev->desc->ops->enable) { ret = rdev->desc->ops->enable(rdev); if (ret < 0) @@ -1939,10 +1941,12 @@ static int _regulator_do_disable(struct regulator_dev *rdev) trace_regulator_disable(rdev_get_name(rdev)); if (rdev->ena_pin) { - ret = regulator_ena_gpio_ctrl(rdev, false); - if (ret < 0) - return ret; - rdev->ena_gpio_state = 0; + if (rdev->ena_gpio_state) { + ret = regulator_ena_gpio_ctrl(rdev, false); + if (ret < 0) + return ret; + rdev->ena_gpio_state = 0; + } } else if (rdev->desc->ops->disable) { ret = rdev->desc->ops->disable(rdev); @@ -3444,13 +3448,6 @@ static umode_t regulator_attr_is_visible(struct kobject *kobj, if (attr == &dev_attr_requested_microamps.attr) return rdev->desc->type == REGULATOR_CURRENT ? mode : 0; - /* all the other attributes exist to support constraints; - * don't show them if there are no constraints, or if the - * relevant supporting methods are missing. - */ - if (!rdev->constraints) - return 0; - /* constraints need specific supporting methods */ if (attr == &dev_attr_min_microvolts.attr || attr == &dev_attr_max_microvolts.attr) @@ -3633,12 +3630,6 @@ regulator_register(const struct regulator_desc *regulator_desc, config->ena_gpio, ret); goto wash; } - - if (config->ena_gpio_flags & GPIOF_OUT_INIT_HIGH) - rdev->ena_gpio_state = 1; - - if (config->ena_gpio_invert) - rdev->ena_gpio_state = !rdev->ena_gpio_state; } /* set regulator constraints */ @@ -3807,9 +3798,11 @@ int regulator_suspend_finish(void) list_for_each_entry(rdev, ®ulator_list, list) { mutex_lock(&rdev->mutex); if (rdev->use_count > 0 || rdev->constraints->always_on) { - error = _regulator_do_enable(rdev); - if (error) - ret = error; + if (!_regulator_is_enabled(rdev)) { + error = _regulator_do_enable(rdev); + if (error) + ret = error; + } } else { if (!have_full_constraints()) goto unlock; diff --git a/drivers/regulator/da9210-regulator.c b/drivers/regulator/da9210-regulator.c index bc6100103f7f..f0489cb9018b 100644 --- a/drivers/regulator/da9210-regulator.c +++ b/drivers/regulator/da9210-regulator.c @@ -152,6 +152,15 @@ static int da9210_i2c_probe(struct i2c_client *i2c, config.regmap = chip->regmap; config.of_node = dev->of_node; + /* Mask all interrupt sources to deassert interrupt line */ + error = regmap_write(chip->regmap, DA9210_REG_MASK_A, ~0); + if (!error) + error = regmap_write(chip->regmap, DA9210_REG_MASK_B, ~0); + if (error) { + dev_err(&i2c->dev, "Failed to write to mask reg: %d\n", error); + return error; + } + rdev = devm_regulator_register(&i2c->dev, &da9210_reg, &config); if (IS_ERR(rdev)) { dev_err(&i2c->dev, "Failed to register DA9210 regulator\n"); diff --git a/drivers/regulator/palmas-regulator.c b/drivers/regulator/palmas-regulator.c index 9205f433573c..18198316b6cf 100644 --- a/drivers/regulator/palmas-regulator.c +++ b/drivers/regulator/palmas-regulator.c @@ -1572,6 +1572,10 @@ static int palmas_regulators_probe(struct platform_device *pdev) if (!pmic) return -ENOMEM; + if (of_device_is_compatible(node, "ti,tps659038-pmic")) + palmas_generic_regs_info[PALMAS_REG_REGEN2].ctrl_addr = + TPS659038_REGEN2_CTRL; + pmic->dev = &pdev->dev; pmic->palmas = palmas; palmas->pmic = pmic; diff --git a/drivers/regulator/rk808-regulator.c b/drivers/regulator/rk808-regulator.c index 1f93b752a81c..3fd44353cc80 100644 --- a/drivers/regulator/rk808-regulator.c +++ b/drivers/regulator/rk808-regulator.c @@ -235,6 +235,7 @@ static const struct regulator_desc rk808_reg[] = { .vsel_mask = RK808_LDO_VSEL_MASK, .enable_reg = RK808_LDO_EN_REG, .enable_mask = BIT(0), + .enable_time = 400, .owner = THIS_MODULE, }, { .name = "LDO_REG2", @@ -249,6 +250,7 @@ static const struct regulator_desc rk808_reg[] = { .vsel_mask = RK808_LDO_VSEL_MASK, .enable_reg = RK808_LDO_EN_REG, .enable_mask = BIT(1), + .enable_time = 400, .owner = THIS_MODULE, }, { .name = "LDO_REG3", @@ -263,6 +265,7 @@ static const struct regulator_desc rk808_reg[] = { .vsel_mask = RK808_BUCK4_VSEL_MASK, .enable_reg = RK808_LDO_EN_REG, .enable_mask = BIT(2), + .enable_time = 400, .owner = THIS_MODULE, }, { .name = "LDO_REG4", @@ -277,6 +280,7 @@ static const struct regulator_desc rk808_reg[] = { .vsel_mask = RK808_LDO_VSEL_MASK, .enable_reg = RK808_LDO_EN_REG, .enable_mask = BIT(3), + .enable_time = 400, .owner = THIS_MODULE, }, { .name = "LDO_REG5", @@ -291,6 +295,7 @@ static const struct regulator_desc rk808_reg[] = { .vsel_mask = RK808_LDO_VSEL_MASK, .enable_reg = RK808_LDO_EN_REG, .enable_mask = BIT(4), + .enable_time = 400, .owner = THIS_MODULE, }, { .name = "LDO_REG6", @@ -305,6 +310,7 @@ static const struct regulator_desc rk808_reg[] = { .vsel_mask = RK808_LDO_VSEL_MASK, .enable_reg = RK808_LDO_EN_REG, .enable_mask = BIT(5), + .enable_time = 400, .owner = THIS_MODULE, }, { .name = "LDO_REG7", @@ -319,6 +325,7 @@ static const struct regulator_desc rk808_reg[] = { .vsel_mask = RK808_LDO_VSEL_MASK, .enable_reg = RK808_LDO_EN_REG, .enable_mask = BIT(6), + .enable_time = 400, .owner = THIS_MODULE, }, { .name = "LDO_REG8", @@ -333,6 +340,7 @@ static const struct regulator_desc rk808_reg[] = { .vsel_mask = RK808_LDO_VSEL_MASK, .enable_reg = RK808_LDO_EN_REG, .enable_mask = BIT(7), + .enable_time = 400, .owner = THIS_MODULE, }, { .name = "SWITCH_REG1", diff --git a/drivers/regulator/tps65910-regulator.c b/drivers/regulator/tps65910-regulator.c index e2cffe01b807..fb991ec76423 100644 --- a/drivers/regulator/tps65910-regulator.c +++ b/drivers/regulator/tps65910-regulator.c @@ -17,6 +17,7 @@ #include <linux/module.h> #include <linux/init.h> #include <linux/err.h> +#include <linux/of.h> #include <linux/platform_device.h> #include <linux/regulator/driver.h> #include <linux/regulator/machine.h> diff --git a/drivers/rpmsg/virtio_rpmsg_bus.c b/drivers/rpmsg/virtio_rpmsg_bus.c index 92f6af6da699..73354ee27877 100644 --- a/drivers/rpmsg/virtio_rpmsg_bus.c +++ b/drivers/rpmsg/virtio_rpmsg_bus.c @@ -951,6 +951,7 @@ static int rpmsg_probe(struct virtio_device *vdev) void *bufs_va; int err = 0, i; size_t total_buf_space; + bool notify; vrp = kzalloc(sizeof(*vrp), GFP_KERNEL); if (!vrp) @@ -1030,8 +1031,22 @@ static int rpmsg_probe(struct virtio_device *vdev) } } + /* + * Prepare to kick but don't notify yet - we can't do this before + * device is ready. + */ + notify = virtqueue_kick_prepare(vrp->rvq); + + /* From this point on, we can notify and get callbacks. */ + virtio_device_ready(vdev); + /* tell the remote processor it can start sending messages */ - virtqueue_kick(vrp->rvq); + /* + * this might be concurrent with callbacks, but we are only + * doing notify, not a full kick here, so that's ok. + */ + if (notify) + virtqueue_notify(vrp->rvq); dev_info(&vdev->dev, "rpmsg host is online\n"); diff --git a/drivers/rtc/class.c b/drivers/rtc/class.c index 472a5adc4642..c29ba7e14304 100644 --- a/drivers/rtc/class.c +++ b/drivers/rtc/class.c @@ -55,7 +55,7 @@ static int rtc_suspend(struct device *dev) struct timespec64 delta, delta_delta; int err; - if (has_persistent_clock()) + if (timekeeping_rtc_skipsuspend()) return 0; if (strcmp(dev_name(&rtc->dev), CONFIG_RTC_HCTOSYS_DEVICE) != 0) @@ -102,7 +102,7 @@ static int rtc_resume(struct device *dev) struct timespec64 sleep_time; int err; - if (has_persistent_clock()) + if (timekeeping_rtc_skipresume()) return 0; rtc_hctosys_ret = -ENODEV; @@ -117,10 +117,6 @@ static int rtc_resume(struct device *dev) return 0; } - if (rtc_valid_tm(&tm) != 0) { - pr_debug("%s: bogus resume time\n", dev_name(&rtc->dev)); - return 0; - } new_rtc.tv_sec = rtc_tm_to_time64(&tm); new_rtc.tv_nsec = 0; diff --git a/drivers/rtc/interface.c b/drivers/rtc/interface.c index 37215cf983e9..d43ee409a5f2 100644 --- a/drivers/rtc/interface.c +++ b/drivers/rtc/interface.c @@ -72,7 +72,11 @@ int rtc_set_time(struct rtc_device *rtc, struct rtc_time *tm) err = -ENODEV; else if (rtc->ops->set_time) err = rtc->ops->set_time(rtc->dev.parent, tm); - else if (rtc->ops->set_mmss) { + else if (rtc->ops->set_mmss64) { + time64_t secs64 = rtc_tm_to_time64(tm); + + err = rtc->ops->set_mmss64(rtc->dev.parent, secs64); + } else if (rtc->ops->set_mmss) { time64_t secs64 = rtc_tm_to_time64(tm); err = rtc->ops->set_mmss(rtc->dev.parent, secs64); } else @@ -96,6 +100,8 @@ int rtc_set_mmss(struct rtc_device *rtc, unsigned long secs) if (!rtc->ops) err = -ENODEV; + else if (rtc->ops->set_mmss64) + err = rtc->ops->set_mmss64(rtc->dev.parent, secs); else if (rtc->ops->set_mmss) err = rtc->ops->set_mmss(rtc->dev.parent, secs); else if (rtc->ops->read_time && rtc->ops->set_time) { diff --git a/drivers/rtc/rtc-ab3100.c b/drivers/rtc/rtc-ab3100.c index 1d0340fdb820..9b725c553058 100644 --- a/drivers/rtc/rtc-ab3100.c +++ b/drivers/rtc/rtc-ab3100.c @@ -43,21 +43,21 @@ /* * RTC clock functions and device struct declaration */ -static int ab3100_rtc_set_mmss(struct device *dev, unsigned long secs) +static int ab3100_rtc_set_mmss(struct device *dev, time64_t secs) { u8 regs[] = {AB3100_TI0, AB3100_TI1, AB3100_TI2, AB3100_TI3, AB3100_TI4, AB3100_TI5}; unsigned char buf[6]; - u64 fat_time = (u64) secs * AB3100_RTC_CLOCK_RATE * 2; + u64 hw_counter = secs * AB3100_RTC_CLOCK_RATE * 2; int err = 0; int i; - buf[0] = (fat_time) & 0xFF; - buf[1] = (fat_time >> 8) & 0xFF; - buf[2] = (fat_time >> 16) & 0xFF; - buf[3] = (fat_time >> 24) & 0xFF; - buf[4] = (fat_time >> 32) & 0xFF; - buf[5] = (fat_time >> 40) & 0xFF; + buf[0] = (hw_counter) & 0xFF; + buf[1] = (hw_counter >> 8) & 0xFF; + buf[2] = (hw_counter >> 16) & 0xFF; + buf[3] = (hw_counter >> 24) & 0xFF; + buf[4] = (hw_counter >> 32) & 0xFF; + buf[5] = (hw_counter >> 40) & 0xFF; for (i = 0; i < 6; i++) { err = abx500_set_register_interruptible(dev, 0, @@ -75,7 +75,7 @@ static int ab3100_rtc_set_mmss(struct device *dev, unsigned long secs) static int ab3100_rtc_read_time(struct device *dev, struct rtc_time *tm) { - unsigned long time; + time64_t time; u8 rtcval; int err; @@ -88,7 +88,7 @@ static int ab3100_rtc_read_time(struct device *dev, struct rtc_time *tm) dev_info(dev, "clock not set (lost power)"); return -EINVAL; } else { - u64 fat_time; + u64 hw_counter; u8 buf[6]; /* Read out time registers */ @@ -98,22 +98,21 @@ static int ab3100_rtc_read_time(struct device *dev, struct rtc_time *tm) if (err != 0) return err; - fat_time = ((u64) buf[5] << 40) | ((u64) buf[4] << 32) | + hw_counter = ((u64) buf[5] << 40) | ((u64) buf[4] << 32) | ((u64) buf[3] << 24) | ((u64) buf[2] << 16) | ((u64) buf[1] << 8) | (u64) buf[0]; - time = (unsigned long) (fat_time / - (u64) (AB3100_RTC_CLOCK_RATE * 2)); + time = hw_counter / (u64) (AB3100_RTC_CLOCK_RATE * 2); } - rtc_time_to_tm(time, tm); + rtc_time64_to_tm(time, tm); return rtc_valid_tm(tm); } static int ab3100_rtc_read_alarm(struct device *dev, struct rtc_wkalrm *alarm) { - unsigned long time; - u64 fat_time; + time64_t time; + u64 hw_counter; u8 buf[6]; u8 rtcval; int err; @@ -134,11 +133,11 @@ static int ab3100_rtc_read_alarm(struct device *dev, struct rtc_wkalrm *alarm) AB3100_AL0, buf, 4); if (err) return err; - fat_time = ((u64) buf[3] << 40) | ((u64) buf[2] << 32) | + hw_counter = ((u64) buf[3] << 40) | ((u64) buf[2] << 32) | ((u64) buf[1] << 24) | ((u64) buf[0] << 16); - time = (unsigned long) (fat_time / (u64) (AB3100_RTC_CLOCK_RATE * 2)); + time = hw_counter / (u64) (AB3100_RTC_CLOCK_RATE * 2); - rtc_time_to_tm(time, &alarm->time); + rtc_time64_to_tm(time, &alarm->time); return rtc_valid_tm(&alarm->time); } @@ -147,17 +146,17 @@ static int ab3100_rtc_set_alarm(struct device *dev, struct rtc_wkalrm *alarm) { u8 regs[] = {AB3100_AL0, AB3100_AL1, AB3100_AL2, AB3100_AL3}; unsigned char buf[4]; - unsigned long secs; - u64 fat_time; + time64_t secs; + u64 hw_counter; int err; int i; - rtc_tm_to_time(&alarm->time, &secs); - fat_time = (u64) secs * AB3100_RTC_CLOCK_RATE * 2; - buf[0] = (fat_time >> 16) & 0xFF; - buf[1] = (fat_time >> 24) & 0xFF; - buf[2] = (fat_time >> 32) & 0xFF; - buf[3] = (fat_time >> 40) & 0xFF; + secs = rtc_tm_to_time64(&alarm->time); + hw_counter = secs * AB3100_RTC_CLOCK_RATE * 2; + buf[0] = (hw_counter >> 16) & 0xFF; + buf[1] = (hw_counter >> 24) & 0xFF; + buf[2] = (hw_counter >> 32) & 0xFF; + buf[3] = (hw_counter >> 40) & 0xFF; /* Set the alarm */ for (i = 0; i < 4; i++) { @@ -193,7 +192,7 @@ static int ab3100_rtc_irq_enable(struct device *dev, unsigned int enabled) static const struct rtc_class_ops ab3100_rtc_ops = { .read_time = ab3100_rtc_read_time, - .set_mmss = ab3100_rtc_set_mmss, + .set_mmss64 = ab3100_rtc_set_mmss, .read_alarm = ab3100_rtc_read_alarm, .set_alarm = ab3100_rtc_set_alarm, .alarm_irq_enable = ab3100_rtc_irq_enable, diff --git a/drivers/rtc/rtc-at91rm9200.c b/drivers/rtc/rtc-at91rm9200.c index 70a5d94cc766..b283a1a573b3 100644 --- a/drivers/rtc/rtc-at91rm9200.c +++ b/drivers/rtc/rtc-at91rm9200.c @@ -31,6 +31,7 @@ #include <linux/io.h> #include <linux/of.h> #include <linux/of_device.h> +#include <linux/suspend.h> #include <linux/uaccess.h> #include "rtc-at91rm9200.h" @@ -54,6 +55,10 @@ static void __iomem *at91_rtc_regs; static int irq; static DEFINE_SPINLOCK(at91_rtc_lock); static u32 at91_rtc_shadow_imr; +static bool suspended; +static DEFINE_SPINLOCK(suspended_lock); +static unsigned long cached_events; +static u32 at91_rtc_imr; static void at91_rtc_write_ier(u32 mask) { @@ -290,7 +295,9 @@ static irqreturn_t at91_rtc_interrupt(int irq, void *dev_id) struct rtc_device *rtc = platform_get_drvdata(pdev); unsigned int rtsr; unsigned long events = 0; + int ret = IRQ_NONE; + spin_lock(&suspended_lock); rtsr = at91_rtc_read(AT91_RTC_SR) & at91_rtc_read_imr(); if (rtsr) { /* this interrupt is shared! Is it ours? */ if (rtsr & AT91_RTC_ALARM) @@ -304,14 +311,22 @@ static irqreturn_t at91_rtc_interrupt(int irq, void *dev_id) at91_rtc_write(AT91_RTC_SCCR, rtsr); /* clear status reg */ - rtc_update_irq(rtc, 1, events); + if (!suspended) { + rtc_update_irq(rtc, 1, events); - dev_dbg(&pdev->dev, "%s(): num=%ld, events=0x%02lx\n", __func__, - events >> 8, events & 0x000000FF); + dev_dbg(&pdev->dev, "%s(): num=%ld, events=0x%02lx\n", + __func__, events >> 8, events & 0x000000FF); + } else { + cached_events |= events; + at91_rtc_write_idr(at91_rtc_imr); + pm_system_wakeup(); + } - return IRQ_HANDLED; + ret = IRQ_HANDLED; } - return IRQ_NONE; /* not handled */ + spin_unlock(&suspended_lock); + + return ret; } static const struct at91_rtc_config at91rm9200_config = { @@ -401,8 +416,8 @@ static int __init at91_rtc_probe(struct platform_device *pdev) AT91_RTC_CALEV); ret = devm_request_irq(&pdev->dev, irq, at91_rtc_interrupt, - IRQF_SHARED, - "at91_rtc", pdev); + IRQF_SHARED | IRQF_COND_SUSPEND, + "at91_rtc", pdev); if (ret) { dev_err(&pdev->dev, "IRQ %d already in use.\n", irq); return ret; @@ -454,8 +469,6 @@ static void at91_rtc_shutdown(struct platform_device *pdev) /* AT91RM9200 RTC Power management control */ -static u32 at91_rtc_imr; - static int at91_rtc_suspend(struct device *dev) { /* this IRQ is shared with DBGU and other hardware which isn't @@ -464,21 +477,42 @@ static int at91_rtc_suspend(struct device *dev) at91_rtc_imr = at91_rtc_read_imr() & (AT91_RTC_ALARM|AT91_RTC_SECEV); if (at91_rtc_imr) { - if (device_may_wakeup(dev)) + if (device_may_wakeup(dev)) { + unsigned long flags; + enable_irq_wake(irq); - else + + spin_lock_irqsave(&suspended_lock, flags); + suspended = true; + spin_unlock_irqrestore(&suspended_lock, flags); + } else { at91_rtc_write_idr(at91_rtc_imr); + } } return 0; } static int at91_rtc_resume(struct device *dev) { + struct rtc_device *rtc = dev_get_drvdata(dev); + if (at91_rtc_imr) { - if (device_may_wakeup(dev)) + if (device_may_wakeup(dev)) { + unsigned long flags; + + spin_lock_irqsave(&suspended_lock, flags); + + if (cached_events) { + rtc_update_irq(rtc, 1, cached_events); + cached_events = 0; + } + + suspended = false; + spin_unlock_irqrestore(&suspended_lock, flags); + disable_irq_wake(irq); - else - at91_rtc_write_ier(at91_rtc_imr); + } + at91_rtc_write_ier(at91_rtc_imr); } return 0; } diff --git a/drivers/rtc/rtc-at91sam9.c b/drivers/rtc/rtc-at91sam9.c index 2183fd2750ab..5ccaee32df72 100644 --- a/drivers/rtc/rtc-at91sam9.c +++ b/drivers/rtc/rtc-at91sam9.c @@ -23,6 +23,7 @@ #include <linux/io.h> #include <linux/mfd/syscon.h> #include <linux/regmap.h> +#include <linux/suspend.h> #include <linux/clk.h> /* @@ -77,6 +78,9 @@ struct sam9_rtc { unsigned int gpbr_offset; int irq; struct clk *sclk; + bool suspended; + unsigned long events; + spinlock_t lock; }; #define rtt_readl(rtc, field) \ @@ -271,14 +275,9 @@ static int at91_rtc_proc(struct device *dev, struct seq_file *seq) return 0; } -/* - * IRQ handler for the RTC - */ -static irqreturn_t at91_rtc_interrupt(int irq, void *_rtc) +static irqreturn_t at91_rtc_cache_events(struct sam9_rtc *rtc) { - struct sam9_rtc *rtc = _rtc; u32 sr, mr; - unsigned long events = 0; /* Shared interrupt may be for another device. Note: reading * SR clears it, so we must only read it in this irq handler! @@ -290,18 +289,54 @@ static irqreturn_t at91_rtc_interrupt(int irq, void *_rtc) /* alarm status */ if (sr & AT91_RTT_ALMS) - events |= (RTC_AF | RTC_IRQF); + rtc->events |= (RTC_AF | RTC_IRQF); /* timer update/increment */ if (sr & AT91_RTT_RTTINC) - events |= (RTC_UF | RTC_IRQF); + rtc->events |= (RTC_UF | RTC_IRQF); + + return IRQ_HANDLED; +} + +static void at91_rtc_flush_events(struct sam9_rtc *rtc) +{ + if (!rtc->events) + return; - rtc_update_irq(rtc->rtcdev, 1, events); + rtc_update_irq(rtc->rtcdev, 1, rtc->events); + rtc->events = 0; pr_debug("%s: num=%ld, events=0x%02lx\n", __func__, - events >> 8, events & 0x000000FF); + rtc->events >> 8, rtc->events & 0x000000FF); +} - return IRQ_HANDLED; +/* + * IRQ handler for the RTC + */ +static irqreturn_t at91_rtc_interrupt(int irq, void *_rtc) +{ + struct sam9_rtc *rtc = _rtc; + int ret; + + spin_lock(&rtc->lock); + + ret = at91_rtc_cache_events(rtc); + + /* We're called in suspended state */ + if (rtc->suspended) { + /* Mask irqs coming from this peripheral */ + rtt_writel(rtc, MR, + rtt_readl(rtc, MR) & + ~(AT91_RTT_ALMIEN | AT91_RTT_RTTINCIEN)); + /* Trigger a system wakeup */ + pm_system_wakeup(); + } else { + at91_rtc_flush_events(rtc); + } + + spin_unlock(&rtc->lock); + + return ret; } static const struct rtc_class_ops at91_rtc_ops = { @@ -421,7 +456,8 @@ static int at91_rtc_probe(struct platform_device *pdev) /* register irq handler after we know what name we'll use */ ret = devm_request_irq(&pdev->dev, rtc->irq, at91_rtc_interrupt, - IRQF_SHARED, dev_name(&rtc->rtcdev->dev), rtc); + IRQF_SHARED | IRQF_COND_SUSPEND, + dev_name(&rtc->rtcdev->dev), rtc); if (ret) { dev_dbg(&pdev->dev, "can't share IRQ %d?\n", rtc->irq); return ret; @@ -482,7 +518,12 @@ static int at91_rtc_suspend(struct device *dev) rtc->imr = mr & (AT91_RTT_ALMIEN | AT91_RTT_RTTINCIEN); if (rtc->imr) { if (device_may_wakeup(dev) && (mr & AT91_RTT_ALMIEN)) { + unsigned long flags; + enable_irq_wake(rtc->irq); + spin_lock_irqsave(&rtc->lock, flags); + rtc->suspended = true; + spin_unlock_irqrestore(&rtc->lock, flags); /* don't let RTTINC cause wakeups */ if (mr & AT91_RTT_RTTINCIEN) rtt_writel(rtc, MR, mr & ~AT91_RTT_RTTINCIEN); @@ -499,10 +540,18 @@ static int at91_rtc_resume(struct device *dev) u32 mr; if (rtc->imr) { + unsigned long flags; + if (device_may_wakeup(dev)) disable_irq_wake(rtc->irq); mr = rtt_readl(rtc, MR); rtt_writel(rtc, MR, mr | rtc->imr); + + spin_lock_irqsave(&rtc->lock, flags); + rtc->suspended = false; + at91_rtc_cache_events(rtc); + at91_rtc_flush_events(rtc); + spin_unlock_irqrestore(&rtc->lock, flags); } return 0; diff --git a/drivers/rtc/rtc-mc13xxx.c b/drivers/rtc/rtc-mc13xxx.c index 5bce904b7ee6..32df1d812367 100644 --- a/drivers/rtc/rtc-mc13xxx.c +++ b/drivers/rtc/rtc-mc13xxx.c @@ -83,20 +83,19 @@ static int mc13xxx_rtc_read_time(struct device *dev, struct rtc_time *tm) return ret; } while (days1 != days2); - rtc_time_to_tm(days1 * SEC_PER_DAY + seconds, tm); + rtc_time64_to_tm((time64_t)days1 * SEC_PER_DAY + seconds, tm); return rtc_valid_tm(tm); } -static int mc13xxx_rtc_set_mmss(struct device *dev, unsigned long secs) +static int mc13xxx_rtc_set_mmss(struct device *dev, time64_t secs) { struct mc13xxx_rtc *priv = dev_get_drvdata(dev); unsigned int seconds, days; unsigned int alarmseconds; int ret; - seconds = secs % SEC_PER_DAY; - days = secs / SEC_PER_DAY; + days = div_s64_rem(secs, SEC_PER_DAY, &seconds); mc13xxx_lock(priv->mc13xxx); @@ -159,7 +158,7 @@ static int mc13xxx_rtc_read_alarm(struct device *dev, struct rtc_wkalrm *alarm) { struct mc13xxx_rtc *priv = dev_get_drvdata(dev); unsigned seconds, days; - unsigned long s1970; + time64_t s1970; int enabled, pending; int ret; @@ -189,10 +188,10 @@ out: alarm->enabled = enabled; alarm->pending = pending; - s1970 = days * SEC_PER_DAY + seconds; + s1970 = (time64_t)days * SEC_PER_DAY + seconds; - rtc_time_to_tm(s1970, &alarm->time); - dev_dbg(dev, "%s: %lu\n", __func__, s1970); + rtc_time64_to_tm(s1970, &alarm->time); + dev_dbg(dev, "%s: %lld\n", __func__, (long long)s1970); return 0; } @@ -200,8 +199,8 @@ out: static int mc13xxx_rtc_set_alarm(struct device *dev, struct rtc_wkalrm *alarm) { struct mc13xxx_rtc *priv = dev_get_drvdata(dev); - unsigned long s1970; - unsigned seconds, days; + time64_t s1970; + u32 seconds, days; int ret; mc13xxx_lock(priv->mc13xxx); @@ -215,20 +214,17 @@ static int mc13xxx_rtc_set_alarm(struct device *dev, struct rtc_wkalrm *alarm) if (unlikely(ret)) goto out; - ret = rtc_tm_to_time(&alarm->time, &s1970); - if (unlikely(ret)) - goto out; + s1970 = rtc_tm_to_time64(&alarm->time); - dev_dbg(dev, "%s: o%2.s %lu\n", __func__, alarm->enabled ? "n" : "ff", - s1970); + dev_dbg(dev, "%s: o%2.s %lld\n", __func__, alarm->enabled ? "n" : "ff", + (long long)s1970); ret = mc13xxx_rtc_irq_enable_unlocked(dev, alarm->enabled, MC13XXX_IRQ_TODA); if (unlikely(ret)) goto out; - seconds = s1970 % SEC_PER_DAY; - days = s1970 / SEC_PER_DAY; + days = div_s64_rem(s1970, SEC_PER_DAY, &seconds); ret = mc13xxx_reg_write(priv->mc13xxx, MC13XXX_RTCDAYA, days); if (unlikely(ret)) @@ -268,7 +264,7 @@ static irqreturn_t mc13xxx_rtc_update_handler(int irq, void *dev) static const struct rtc_class_ops mc13xxx_rtc_ops = { .read_time = mc13xxx_rtc_read_time, - .set_mmss = mc13xxx_rtc_set_mmss, + .set_mmss64 = mc13xxx_rtc_set_mmss, .read_alarm = mc13xxx_rtc_read_alarm, .set_alarm = mc13xxx_rtc_set_alarm, .alarm_irq_enable = mc13xxx_rtc_alarm_irq_enable, diff --git a/drivers/rtc/rtc-mrst.c b/drivers/rtc/rtc-mrst.c index e2436d140175..3a6fd3a8a2ec 100644 --- a/drivers/rtc/rtc-mrst.c +++ b/drivers/rtc/rtc-mrst.c @@ -413,8 +413,8 @@ static void rtc_mrst_do_remove(struct device *dev) mrst->dev = NULL; } -#ifdef CONFIG_PM -static int mrst_suspend(struct device *dev, pm_message_t mesg) +#ifdef CONFIG_PM_SLEEP +static int mrst_suspend(struct device *dev) { struct mrst_rtc *mrst = dev_get_drvdata(dev); unsigned char tmp; @@ -453,7 +453,7 @@ static int mrst_suspend(struct device *dev, pm_message_t mesg) */ static inline int mrst_poweroff(struct device *dev) { - return mrst_suspend(dev, PMSG_HIBERNATE); + return mrst_suspend(dev); } static int mrst_resume(struct device *dev) @@ -490,9 +490,11 @@ static int mrst_resume(struct device *dev) return 0; } +static SIMPLE_DEV_PM_OPS(mrst_pm_ops, mrst_suspend, mrst_resume); +#define MRST_PM_OPS (&mrst_pm_ops) + #else -#define mrst_suspend NULL -#define mrst_resume NULL +#define MRST_PM_OPS NULL static inline int mrst_poweroff(struct device *dev) { @@ -529,9 +531,8 @@ static struct platform_driver vrtc_mrst_platform_driver = { .remove = vrtc_mrst_platform_remove, .shutdown = vrtc_mrst_platform_shutdown, .driver = { - .name = (char *) driver_name, - .suspend = mrst_suspend, - .resume = mrst_resume, + .name = driver_name, + .pm = MRST_PM_OPS, } }; diff --git a/drivers/rtc/rtc-mxc.c b/drivers/rtc/rtc-mxc.c index 3c3f8d10ab43..09d422b9f7f7 100644 --- a/drivers/rtc/rtc-mxc.c +++ b/drivers/rtc/rtc-mxc.c @@ -106,7 +106,7 @@ static inline int is_imx1_rtc(struct rtc_plat_data *data) * This function is used to obtain the RTC time or the alarm value in * second. */ -static u32 get_alarm_or_time(struct device *dev, int time_alarm) +static time64_t get_alarm_or_time(struct device *dev, int time_alarm) { struct platform_device *pdev = to_platform_device(dev); struct rtc_plat_data *pdata = platform_get_drvdata(pdev); @@ -129,29 +129,28 @@ static u32 get_alarm_or_time(struct device *dev, int time_alarm) hr = hr_min >> 8; min = hr_min & 0xff; - return (((day * 24 + hr) * 60) + min) * 60 + sec; + return ((((time64_t)day * 24 + hr) * 60) + min) * 60 + sec; } /* * This function sets the RTC alarm value or the time value. */ -static void set_alarm_or_time(struct device *dev, int time_alarm, u32 time) +static void set_alarm_or_time(struct device *dev, int time_alarm, time64_t time) { - u32 day, hr, min, sec, temp; + u32 tod, day, hr, min, sec, temp; struct platform_device *pdev = to_platform_device(dev); struct rtc_plat_data *pdata = platform_get_drvdata(pdev); void __iomem *ioaddr = pdata->ioaddr; - day = time / 86400; - time -= day * 86400; + day = div_s64_rem(time, 86400, &tod); /* time is within a day now */ - hr = time / 3600; - time -= hr * 3600; + hr = tod / 3600; + tod -= hr * 3600; /* time is within an hour now */ - min = time / 60; - sec = time - min * 60; + min = tod / 60; + sec = tod - min * 60; temp = (hr << 8) + min; @@ -173,29 +172,18 @@ static void set_alarm_or_time(struct device *dev, int time_alarm, u32 time) * This function updates the RTC alarm registers and then clears all the * interrupt status bits. */ -static int rtc_update_alarm(struct device *dev, struct rtc_time *alrm) +static void rtc_update_alarm(struct device *dev, struct rtc_time *alrm) { - struct rtc_time alarm_tm, now_tm; - unsigned long now, time; + time64_t time; struct platform_device *pdev = to_platform_device(dev); struct rtc_plat_data *pdata = platform_get_drvdata(pdev); void __iomem *ioaddr = pdata->ioaddr; - now = get_alarm_or_time(dev, MXC_RTC_TIME); - rtc_time_to_tm(now, &now_tm); - alarm_tm.tm_year = now_tm.tm_year; - alarm_tm.tm_mon = now_tm.tm_mon; - alarm_tm.tm_mday = now_tm.tm_mday; - alarm_tm.tm_hour = alrm->tm_hour; - alarm_tm.tm_min = alrm->tm_min; - alarm_tm.tm_sec = alrm->tm_sec; - rtc_tm_to_time(&alarm_tm, &time); + time = rtc_tm_to_time64(alrm); /* clear all the interrupt status bits */ writew(readw(ioaddr + RTC_RTCISR), ioaddr + RTC_RTCISR); set_alarm_or_time(dev, MXC_RTC_ALARM, time); - - return 0; } static void mxc_rtc_irq_enable(struct device *dev, unsigned int bit, @@ -283,14 +271,14 @@ static int mxc_rtc_alarm_irq_enable(struct device *dev, unsigned int enabled) */ static int mxc_rtc_read_time(struct device *dev, struct rtc_time *tm) { - u32 val; + time64_t val; /* Avoid roll-over from reading the different registers */ do { val = get_alarm_or_time(dev, MXC_RTC_TIME); } while (val != get_alarm_or_time(dev, MXC_RTC_TIME)); - rtc_time_to_tm(val, tm); + rtc_time64_to_tm(val, tm); return 0; } @@ -298,7 +286,7 @@ static int mxc_rtc_read_time(struct device *dev, struct rtc_time *tm) /* * This function sets the internal RTC time based on tm in Gregorian date. */ -static int mxc_rtc_set_mmss(struct device *dev, unsigned long time) +static int mxc_rtc_set_mmss(struct device *dev, time64_t time) { struct platform_device *pdev = to_platform_device(dev); struct rtc_plat_data *pdata = platform_get_drvdata(pdev); @@ -309,9 +297,9 @@ static int mxc_rtc_set_mmss(struct device *dev, unsigned long time) if (is_imx1_rtc(pdata)) { struct rtc_time tm; - rtc_time_to_tm(time, &tm); + rtc_time64_to_tm(time, &tm); tm.tm_year = 70; - rtc_tm_to_time(&tm, &time); + time = rtc_tm_to_time64(&tm); } /* Avoid roll-over from reading the different registers */ @@ -333,7 +321,7 @@ static int mxc_rtc_read_alarm(struct device *dev, struct rtc_wkalrm *alrm) struct rtc_plat_data *pdata = platform_get_drvdata(pdev); void __iomem *ioaddr = pdata->ioaddr; - rtc_time_to_tm(get_alarm_or_time(dev, MXC_RTC_ALARM), &alrm->time); + rtc_time64_to_tm(get_alarm_or_time(dev, MXC_RTC_ALARM), &alrm->time); alrm->pending = ((readw(ioaddr + RTC_RTCISR) & RTC_ALM_BIT)) ? 1 : 0; return 0; @@ -346,11 +334,8 @@ static int mxc_rtc_set_alarm(struct device *dev, struct rtc_wkalrm *alrm) { struct platform_device *pdev = to_platform_device(dev); struct rtc_plat_data *pdata = platform_get_drvdata(pdev); - int ret; - ret = rtc_update_alarm(dev, &alrm->time); - if (ret) - return ret; + rtc_update_alarm(dev, &alrm->time); memcpy(&pdata->g_rtc_alarm, &alrm->time, sizeof(struct rtc_time)); mxc_rtc_irq_enable(dev, RTC_ALM_BIT, alrm->enabled); @@ -362,7 +347,7 @@ static int mxc_rtc_set_alarm(struct device *dev, struct rtc_wkalrm *alrm) static struct rtc_class_ops mxc_rtc_ops = { .release = mxc_rtc_release, .read_time = mxc_rtc_read_time, - .set_mmss = mxc_rtc_set_mmss, + .set_mmss64 = mxc_rtc_set_mmss, .read_alarm = mxc_rtc_read_alarm, .set_alarm = mxc_rtc_set_alarm, .alarm_irq_enable = mxc_rtc_alarm_irq_enable, diff --git a/drivers/rtc/rtc-s3c.c b/drivers/rtc/rtc-s3c.c index 4241eeab3386..f4cf6851fae9 100644 --- a/drivers/rtc/rtc-s3c.c +++ b/drivers/rtc/rtc-s3c.c @@ -849,6 +849,7 @@ static struct s3c_rtc_data const s3c2443_rtc_data = { static struct s3c_rtc_data const s3c6410_rtc_data = { .max_user_freq = 32768, + .needs_src_clk = true, .irq_handler = s3c6410_rtc_irq, .set_freq = s3c6410_rtc_setfreq, .enable_tick = s3c6410_rtc_enable_tick, diff --git a/drivers/rtc/rtc-test.c b/drivers/rtc/rtc-test.c index 8f86fa91de1a..3a2da4c892d6 100644 --- a/drivers/rtc/rtc-test.c +++ b/drivers/rtc/rtc-test.c @@ -13,6 +13,10 @@ #include <linux/rtc.h> #include <linux/platform_device.h> +static int test_mmss64; +module_param(test_mmss64, int, 0644); +MODULE_PARM_DESC(test_mmss64, "Test struct rtc_class_ops.set_mmss64()."); + static struct platform_device *test0 = NULL, *test1 = NULL; static int test_rtc_read_alarm(struct device *dev, @@ -30,7 +34,13 @@ static int test_rtc_set_alarm(struct device *dev, static int test_rtc_read_time(struct device *dev, struct rtc_time *tm) { - rtc_time_to_tm(get_seconds(), tm); + rtc_time64_to_tm(ktime_get_real_seconds(), tm); + return 0; +} + +static int test_rtc_set_mmss64(struct device *dev, time64_t secs) +{ + dev_info(dev, "%s, secs = %lld\n", __func__, (long long)secs); return 0; } @@ -55,7 +65,7 @@ static int test_rtc_alarm_irq_enable(struct device *dev, unsigned int enable) return 0; } -static const struct rtc_class_ops test_rtc_ops = { +static struct rtc_class_ops test_rtc_ops = { .proc = test_rtc_proc, .read_time = test_rtc_read_time, .read_alarm = test_rtc_read_alarm, @@ -101,6 +111,11 @@ static int test_probe(struct platform_device *plat_dev) int err; struct rtc_device *rtc; + if (test_mmss64) { + test_rtc_ops.set_mmss64 = test_rtc_set_mmss64; + test_rtc_ops.set_mmss = NULL; + } + rtc = devm_rtc_device_register(&plat_dev->dev, "test", &test_rtc_ops, THIS_MODULE); if (IS_ERR(rtc)) { diff --git a/drivers/rtc/systohc.c b/drivers/rtc/systohc.c index eb71872d0361..7728d5e32bf4 100644 --- a/drivers/rtc/systohc.c +++ b/drivers/rtc/systohc.c @@ -11,7 +11,7 @@ * rtc_set_ntp_time - Save NTP synchronized time to the RTC * @now: Current time of day * - * Replacement for the NTP platform function update_persistent_clock + * Replacement for the NTP platform function update_persistent_clock64 * that stores time for later retrieval by rtc_hctosys. * * Returns 0 on successful RTC update, -ENODEV if a RTC update is not @@ -35,7 +35,10 @@ int rtc_set_ntp_time(struct timespec64 now) if (rtc) { /* rtc_hctosys exclusively uses UTC, so we call set_time here, * not set_mmss. */ - if (rtc->ops && (rtc->ops->set_time || rtc->ops->set_mmss)) + if (rtc->ops && + (rtc->ops->set_time || + rtc->ops->set_mmss64 || + rtc->ops->set_mmss)) err = rtc_set_time(rtc, &tm); rtc_class_close(rtc); } diff --git a/drivers/s390/block/dcssblk.c b/drivers/s390/block/dcssblk.c index 96128cb009f3..da212813f2d5 100644 --- a/drivers/s390/block/dcssblk.c +++ b/drivers/s390/block/dcssblk.c @@ -547,7 +547,7 @@ dcssblk_add_store(struct device *dev, struct device_attribute *attr, const char * parse input */ num_of_segments = 0; - for (i = 0; ((buf[i] != '\0') && (buf[i] != '\n') && i < count); i++) { + for (i = 0; (i < count && (buf[i] != '\0') && (buf[i] != '\n')); i++) { for (j = i; (buf[j] != ':') && (buf[j] != '\0') && (buf[j] != '\n') && diff --git a/drivers/s390/block/scm_blk_cluster.c b/drivers/s390/block/scm_blk_cluster.c index 09db45296eed..7497ddde2dd6 100644 --- a/drivers/s390/block/scm_blk_cluster.c +++ b/drivers/s390/block/scm_blk_cluster.c @@ -92,7 +92,7 @@ bool scm_reserve_cluster(struct scm_request *scmrq) add = 0; continue; } - for (pos = 0; pos <= iter->aob->request.msb_count; pos++) { + for (pos = 0; pos < iter->aob->request.msb_count; pos++) { if (clusters_intersect(req, iter->request[pos]) && (rq_data_dir(req) == WRITE || rq_data_dir(iter->request[pos]) == WRITE)) { diff --git a/drivers/scsi/be2iscsi/be_main.c b/drivers/scsi/be2iscsi/be_main.c index a7cc61837818..923a2b5a2439 100644 --- a/drivers/scsi/be2iscsi/be_main.c +++ b/drivers/scsi/be2iscsi/be_main.c @@ -5734,9 +5734,9 @@ free_port: hba_free: if (phba->msix_enabled) pci_disable_msix(phba->pcidev); - iscsi_host_remove(phba->shost); pci_dev_put(phba->pcidev); iscsi_host_free(phba->shost); + pci_set_drvdata(pcidev, NULL); disable_pci: pci_disable_device(pcidev); return ret; diff --git a/drivers/scsi/ipr.c b/drivers/scsi/ipr.c index 9219953ee949..d9afc51af7d3 100644 --- a/drivers/scsi/ipr.c +++ b/drivers/scsi/ipr.c @@ -6815,7 +6815,8 @@ static struct ata_port_operations ipr_sata_ops = { }; static struct ata_port_info sata_port_info = { - .flags = ATA_FLAG_SATA | ATA_FLAG_PIO_DMA, + .flags = ATA_FLAG_SATA | ATA_FLAG_PIO_DMA | + ATA_FLAG_SAS_HOST, .pio_mask = ATA_PIO4_ONLY, .mwdma_mask = ATA_MWDMA2, .udma_mask = ATA_UDMA6, diff --git a/drivers/scsi/libsas/sas_ata.c b/drivers/scsi/libsas/sas_ata.c index 932d9cc98d2f..9c706d8c1441 100644 --- a/drivers/scsi/libsas/sas_ata.c +++ b/drivers/scsi/libsas/sas_ata.c @@ -547,7 +547,8 @@ static struct ata_port_operations sas_sata_ops = { }; static struct ata_port_info sata_port_info = { - .flags = ATA_FLAG_SATA | ATA_FLAG_PIO_DMA | ATA_FLAG_NCQ, + .flags = ATA_FLAG_SATA | ATA_FLAG_PIO_DMA | ATA_FLAG_NCQ | + ATA_FLAG_SAS_HOST, .pio_mask = ATA_PIO4, .mwdma_mask = ATA_MWDMA2, .udma_mask = ATA_UDMA6, diff --git a/drivers/scsi/libsas/sas_discover.c b/drivers/scsi/libsas/sas_discover.c index 62b58d38ce2e..60de66252fa2 100644 --- a/drivers/scsi/libsas/sas_discover.c +++ b/drivers/scsi/libsas/sas_discover.c @@ -500,6 +500,7 @@ static void sas_revalidate_domain(struct work_struct *work) struct sas_discovery_event *ev = to_sas_discovery_event(work); struct asd_sas_port *port = ev->port; struct sas_ha_struct *ha = port->ha; + struct domain_device *ddev = port->port_dev; /* prevent revalidation from finding sata links in recovery */ mutex_lock(&ha->disco_mutex); @@ -514,8 +515,9 @@ static void sas_revalidate_domain(struct work_struct *work) SAS_DPRINTK("REVALIDATING DOMAIN on port %d, pid:%d\n", port->id, task_pid_nr(current)); - if (port->port_dev) - res = sas_ex_revalidate_domain(port->port_dev); + if (ddev && (ddev->dev_type == SAS_FANOUT_EXPANDER_DEVICE || + ddev->dev_type == SAS_EDGE_EXPANDER_DEVICE)) + res = sas_ex_revalidate_domain(ddev); SAS_DPRINTK("done REVALIDATING DOMAIN on port %d, pid:%d, res 0x%x\n", port->id, task_pid_nr(current), res); diff --git a/drivers/scsi/qla2xxx/tcm_qla2xxx.c b/drivers/scsi/qla2xxx/tcm_qla2xxx.c index 99f43b7fc9ab..ab4879e12ea7 100644 --- a/drivers/scsi/qla2xxx/tcm_qla2xxx.c +++ b/drivers/scsi/qla2xxx/tcm_qla2xxx.c @@ -1596,7 +1596,7 @@ static int tcm_qla2xxx_check_initiator_node_acl( /* * Finally register the new FC Nexus with TCM */ - __transport_register_session(se_nacl->se_tpg, se_nacl, se_sess, sess); + transport_register_session(se_nacl->se_tpg, se_nacl, se_sess, sess); return 0; } diff --git a/drivers/scsi/scsi_lib.c b/drivers/scsi/scsi_lib.c index 54d7a6cbb98a..b1a263137a23 100644 --- a/drivers/scsi/scsi_lib.c +++ b/drivers/scsi/scsi_lib.c @@ -1311,9 +1311,11 @@ scsi_prep_state_check(struct scsi_device *sdev, struct request *req) "rejecting I/O to dead device\n"); ret = BLKPREP_KILL; break; - case SDEV_QUIESCE: case SDEV_BLOCK: case SDEV_CREATED_BLOCK: + ret = BLKPREP_DEFER; + break; + case SDEV_QUIESCE: /* * If the devices is blocked we defer normal commands. */ diff --git a/drivers/spi/spi-atmel.c b/drivers/spi/spi-atmel.c index 9af7841f2e8c..06de34001c66 100644 --- a/drivers/spi/spi-atmel.c +++ b/drivers/spi/spi-atmel.c @@ -764,17 +764,17 @@ static void atmel_spi_pdc_next_xfer(struct spi_master *master, (unsigned long long)xfer->rx_dma); } - /* REVISIT: We're waiting for ENDRX before we start the next + /* REVISIT: We're waiting for RXBUFF before we start the next * transfer because we need to handle some difficult timing - * issues otherwise. If we wait for ENDTX in one transfer and - * then starts waiting for ENDRX in the next, it's difficult - * to tell the difference between the ENDRX interrupt we're - * actually waiting for and the ENDRX interrupt of the + * issues otherwise. If we wait for TXBUFE in one transfer and + * then starts waiting for RXBUFF in the next, it's difficult + * to tell the difference between the RXBUFF interrupt we're + * actually waiting for and the RXBUFF interrupt of the * previous transfer. * * It should be doable, though. Just not now... */ - spi_writel(as, IER, SPI_BIT(ENDRX) | SPI_BIT(OVRES)); + spi_writel(as, IER, SPI_BIT(RXBUFF) | SPI_BIT(OVRES)); spi_writel(as, PTCR, SPI_BIT(TXTEN) | SPI_BIT(RXTEN)); } diff --git a/drivers/spi/spi-dw-mid.c b/drivers/spi/spi-dw-mid.c index a0197fd4e95c..4f8c798e0633 100644 --- a/drivers/spi/spi-dw-mid.c +++ b/drivers/spi/spi-dw-mid.c @@ -108,7 +108,8 @@ static void dw_spi_dma_tx_done(void *arg) { struct dw_spi *dws = arg; - if (test_and_clear_bit(TX_BUSY, &dws->dma_chan_busy) & BIT(RX_BUSY)) + clear_bit(TX_BUSY, &dws->dma_chan_busy); + if (test_bit(RX_BUSY, &dws->dma_chan_busy)) return; dw_spi_xfer_done(dws); } @@ -139,6 +140,9 @@ static struct dma_async_tx_descriptor *dw_spi_dma_prepare_tx(struct dw_spi *dws) 1, DMA_MEM_TO_DEV, DMA_PREP_INTERRUPT | DMA_CTRL_ACK); + if (!txdesc) + return NULL; + txdesc->callback = dw_spi_dma_tx_done; txdesc->callback_param = dws; @@ -153,7 +157,8 @@ static void dw_spi_dma_rx_done(void *arg) { struct dw_spi *dws = arg; - if (test_and_clear_bit(RX_BUSY, &dws->dma_chan_busy) & BIT(TX_BUSY)) + clear_bit(RX_BUSY, &dws->dma_chan_busy); + if (test_bit(TX_BUSY, &dws->dma_chan_busy)) return; dw_spi_xfer_done(dws); } @@ -184,6 +189,9 @@ static struct dma_async_tx_descriptor *dw_spi_dma_prepare_rx(struct dw_spi *dws) 1, DMA_DEV_TO_MEM, DMA_PREP_INTERRUPT | DMA_CTRL_ACK); + if (!rxdesc) + return NULL; + rxdesc->callback = dw_spi_dma_rx_done; rxdesc->callback_param = dws; diff --git a/drivers/spi/spi-dw-pci.c b/drivers/spi/spi-dw-pci.c index 5ba331047cbe..6d331e0db331 100644 --- a/drivers/spi/spi-dw-pci.c +++ b/drivers/spi/spi-dw-pci.c @@ -36,13 +36,13 @@ struct spi_pci_desc { static struct spi_pci_desc spi_pci_mid_desc_1 = { .setup = dw_spi_mid_init, - .num_cs = 32, + .num_cs = 5, .bus_num = 0, }; static struct spi_pci_desc spi_pci_mid_desc_2 = { .setup = dw_spi_mid_init, - .num_cs = 4, + .num_cs = 2, .bus_num = 1, }; diff --git a/drivers/spi/spi-dw.c b/drivers/spi/spi-dw.c index 5a97a62b298a..4847afba89f4 100644 --- a/drivers/spi/spi-dw.c +++ b/drivers/spi/spi-dw.c @@ -621,14 +621,14 @@ static void spi_hw_init(struct device *dev, struct dw_spi *dws) if (!dws->fifo_len) { u32 fifo; - for (fifo = 2; fifo <= 256; fifo++) { + for (fifo = 1; fifo < 256; fifo++) { dw_writew(dws, DW_SPI_TXFLTR, fifo); if (fifo != dw_readw(dws, DW_SPI_TXFLTR)) break; } dw_writew(dws, DW_SPI_TXFLTR, 0); - dws->fifo_len = (fifo == 2) ? 0 : fifo - 1; + dws->fifo_len = (fifo == 1) ? 0 : fifo; dev_dbg(dev, "Detected FIFO size: %u bytes\n", dws->fifo_len); } } diff --git a/drivers/spi/spi-img-spfi.c b/drivers/spi/spi-img-spfi.c index c01567d53581..e649bc7d4c08 100644 --- a/drivers/spi/spi-img-spfi.c +++ b/drivers/spi/spi-img-spfi.c @@ -459,6 +459,13 @@ static int img_spfi_transfer_one(struct spi_master *master, unsigned long flags; int ret; + if (xfer->len > SPFI_TRANSACTION_TSIZE_MASK) { + dev_err(spfi->dev, + "Transfer length (%d) is greater than the max supported (%d)", + xfer->len, SPFI_TRANSACTION_TSIZE_MASK); + return -EINVAL; + } + /* * Stop all DMA and reset the controller if the previous transaction * timed-out and never completed it's DMA. diff --git a/drivers/spi/spi-pl022.c b/drivers/spi/spi-pl022.c index 89ca162801da..ee513a85296b 100644 --- a/drivers/spi/spi-pl022.c +++ b/drivers/spi/spi-pl022.c @@ -534,12 +534,12 @@ static void giveback(struct pl022 *pl022) pl022->cur_msg = NULL; pl022->cur_transfer = NULL; pl022->cur_chip = NULL; - spi_finalize_current_message(pl022->master); /* disable the SPI/SSP operation */ writew((readw(SSP_CR1(pl022->virtbase)) & (~SSP_CR1_MASK_SSE)), SSP_CR1(pl022->virtbase)); + spi_finalize_current_message(pl022->master); } /** diff --git a/drivers/spi/spi-qup.c b/drivers/spi/spi-qup.c index ff9cdbdb6672..2b2c359f5a50 100644 --- a/drivers/spi/spi-qup.c +++ b/drivers/spi/spi-qup.c @@ -498,7 +498,7 @@ static int spi_qup_probe(struct platform_device *pdev) struct resource *res; struct device *dev; void __iomem *base; - u32 max_freq, iomode; + u32 max_freq, iomode, num_cs; int ret, irq, size; dev = &pdev->dev; @@ -550,10 +550,11 @@ static int spi_qup_probe(struct platform_device *pdev) } /* use num-cs unless not present or out of range */ - if (of_property_read_u16(dev->of_node, "num-cs", - &master->num_chipselect) || - (master->num_chipselect > SPI_NUM_CHIPSELECTS)) + if (of_property_read_u32(dev->of_node, "num-cs", &num_cs) || + num_cs > SPI_NUM_CHIPSELECTS) master->num_chipselect = SPI_NUM_CHIPSELECTS; + else + master->num_chipselect = num_cs; master->bus_num = pdev->id; master->mode_bits = SPI_CPOL | SPI_CPHA | SPI_CS_HIGH | SPI_LOOP; diff --git a/drivers/spi/spi-ti-qspi.c b/drivers/spi/spi-ti-qspi.c index 884a716e50cb..5c0616870358 100644 --- a/drivers/spi/spi-ti-qspi.c +++ b/drivers/spi/spi-ti-qspi.c @@ -101,6 +101,7 @@ struct ti_qspi { #define QSPI_FLEN(n) ((n - 1) << 0) /* STATUS REGISTER */ +#define BUSY 0x01 #define WC 0x02 /* INTERRUPT REGISTER */ @@ -199,6 +200,21 @@ static void ti_qspi_restore_ctx(struct ti_qspi *qspi) ti_qspi_write(qspi, ctx_reg->clkctrl, QSPI_SPI_CLOCK_CNTRL_REG); } +static inline u32 qspi_is_busy(struct ti_qspi *qspi) +{ + u32 stat; + unsigned long timeout = jiffies + QSPI_COMPLETION_TIMEOUT; + + stat = ti_qspi_read(qspi, QSPI_SPI_STATUS_REG); + while ((stat & BUSY) && time_after(timeout, jiffies)) { + cpu_relax(); + stat = ti_qspi_read(qspi, QSPI_SPI_STATUS_REG); + } + + WARN(stat & BUSY, "qspi busy\n"); + return stat & BUSY; +} + static int qspi_write_msg(struct ti_qspi *qspi, struct spi_transfer *t) { int wlen, count; @@ -211,6 +227,9 @@ static int qspi_write_msg(struct ti_qspi *qspi, struct spi_transfer *t) wlen = t->bits_per_word >> 3; /* in bytes */ while (count) { + if (qspi_is_busy(qspi)) + return -EBUSY; + switch (wlen) { case 1: dev_dbg(qspi->dev, "tx cmd %08x dc %08x data %02x\n", @@ -266,6 +285,9 @@ static int qspi_read_msg(struct ti_qspi *qspi, struct spi_transfer *t) while (count) { dev_dbg(qspi->dev, "rx cmd %08x dc %08x\n", cmd, qspi->dc); + if (qspi_is_busy(qspi)) + return -EBUSY; + ti_qspi_write(qspi, cmd, QSPI_SPI_CMD_REG); if (!wait_for_completion_timeout(&qspi->transfer_complete, QSPI_COMPLETION_TIMEOUT)) { diff --git a/drivers/spi/spi.c b/drivers/spi/spi.c index c64a3e59fce3..57a195041dc7 100644 --- a/drivers/spi/spi.c +++ b/drivers/spi/spi.c @@ -1105,13 +1105,14 @@ void spi_finalize_current_message(struct spi_master *master) "failed to unprepare message: %d\n", ret); } } + + trace_spi_message_done(mesg); + master->cur_msg_prepared = false; mesg->state = NULL; if (mesg->complete) mesg->complete(mesg->context); - - trace_spi_message_done(mesg); } EXPORT_SYMBOL_GPL(spi_finalize_current_message); diff --git a/drivers/staging/comedi/drivers/adv_pci1710.c b/drivers/staging/comedi/drivers/adv_pci1710.c index 9800c01e6fb9..3f72451d2de0 100644 --- a/drivers/staging/comedi/drivers/adv_pci1710.c +++ b/drivers/staging/comedi/drivers/adv_pci1710.c @@ -426,7 +426,6 @@ static int pci171x_ai_insn_read(struct comedi_device *dev, unsigned int *data) { struct pci1710_private *devpriv = dev->private; - unsigned int chan = CR_CHAN(insn->chanspec); int ret = 0; int i; @@ -447,7 +446,7 @@ static int pci171x_ai_insn_read(struct comedi_device *dev, if (ret) break; - ret = pci171x_ai_read_sample(dev, s, chan, &val); + ret = pci171x_ai_read_sample(dev, s, 0, &val); if (ret) break; diff --git a/drivers/staging/comedi/drivers/comedi_isadma.c b/drivers/staging/comedi/drivers/comedi_isadma.c index dbdea71d6b95..e856f01ca077 100644 --- a/drivers/staging/comedi/drivers/comedi_isadma.c +++ b/drivers/staging/comedi/drivers/comedi_isadma.c @@ -91,9 +91,10 @@ unsigned int comedi_isadma_disable_on_sample(unsigned int dma_chan, stalled++; if (stalled > 10) break; + } else { + residue = new_residue; + stalled = 0; } - residue = new_residue; - stalled = 0; } return residue; } diff --git a/drivers/staging/comedi/drivers/vmk80xx.c b/drivers/staging/comedi/drivers/vmk80xx.c index e37118321a27..a0906685e27f 100644 --- a/drivers/staging/comedi/drivers/vmk80xx.c +++ b/drivers/staging/comedi/drivers/vmk80xx.c @@ -103,11 +103,6 @@ enum vmk80xx_model { VMK8061_MODEL }; -struct firmware_version { - unsigned char ic3_vers[32]; /* USB-Controller */ - unsigned char ic6_vers[32]; /* CPU */ -}; - static const struct comedi_lrange vmk8061_range = { 2, { UNI_RANGE(5), @@ -156,68 +151,12 @@ static const struct vmk80xx_board vmk80xx_boardinfo[] = { struct vmk80xx_private { struct usb_endpoint_descriptor *ep_rx; struct usb_endpoint_descriptor *ep_tx; - struct firmware_version fw; struct semaphore limit_sem; unsigned char *usb_rx_buf; unsigned char *usb_tx_buf; enum vmk80xx_model model; }; -static int vmk80xx_check_data_link(struct comedi_device *dev) -{ - struct vmk80xx_private *devpriv = dev->private; - struct usb_device *usb = comedi_to_usb_dev(dev); - unsigned int tx_pipe; - unsigned int rx_pipe; - unsigned char tx[1]; - unsigned char rx[2]; - - tx_pipe = usb_sndbulkpipe(usb, 0x01); - rx_pipe = usb_rcvbulkpipe(usb, 0x81); - - tx[0] = VMK8061_CMD_RD_PWR_STAT; - - /* - * Check that IC6 (PIC16F871) is powered and - * running and the data link between IC3 and - * IC6 is working properly - */ - usb_bulk_msg(usb, tx_pipe, tx, 1, NULL, devpriv->ep_tx->bInterval); - usb_bulk_msg(usb, rx_pipe, rx, 2, NULL, HZ * 10); - - return (int)rx[1]; -} - -static void vmk80xx_read_eeprom(struct comedi_device *dev, int flag) -{ - struct vmk80xx_private *devpriv = dev->private; - struct usb_device *usb = comedi_to_usb_dev(dev); - unsigned int tx_pipe; - unsigned int rx_pipe; - unsigned char tx[1]; - unsigned char rx[64]; - int cnt; - - tx_pipe = usb_sndbulkpipe(usb, 0x01); - rx_pipe = usb_rcvbulkpipe(usb, 0x81); - - tx[0] = VMK8061_CMD_RD_VERSION; - - /* - * Read the firmware version info of IC3 and - * IC6 from the internal EEPROM of the IC - */ - usb_bulk_msg(usb, tx_pipe, tx, 1, NULL, devpriv->ep_tx->bInterval); - usb_bulk_msg(usb, rx_pipe, rx, 64, &cnt, HZ * 10); - - rx[cnt] = '\0'; - - if (flag & IC3_VERSION) - strncpy(devpriv->fw.ic3_vers, rx + 1, 24); - else /* IC6_VERSION */ - strncpy(devpriv->fw.ic6_vers, rx + 25, 24); -} - static void vmk80xx_do_bulk_msg(struct comedi_device *dev) { struct vmk80xx_private *devpriv = dev->private; @@ -878,16 +817,6 @@ static int vmk80xx_auto_attach(struct comedi_device *dev, usb_set_intfdata(intf, devpriv); - if (devpriv->model == VMK8061_MODEL) { - vmk80xx_read_eeprom(dev, IC3_VERSION); - dev_info(&intf->dev, "%s\n", devpriv->fw.ic3_vers); - - if (vmk80xx_check_data_link(dev)) { - vmk80xx_read_eeprom(dev, IC6_VERSION); - dev_info(&intf->dev, "%s\n", devpriv->fw.ic6_vers); - } - } - if (devpriv->model == VMK8055_MODEL) vmk80xx_reset_device(dev); diff --git a/drivers/staging/iio/Kconfig b/drivers/staging/iio/Kconfig index 24183028bd71..6d5b38d69578 100644 --- a/drivers/staging/iio/Kconfig +++ b/drivers/staging/iio/Kconfig @@ -38,6 +38,7 @@ config IIO_SIMPLE_DUMMY_EVENTS config IIO_SIMPLE_DUMMY_BUFFER bool "Buffered capture support" select IIO_BUFFER + select IIO_TRIGGER select IIO_KFIFO_BUF help Add buffered data capture to the simple dummy driver. diff --git a/drivers/staging/iio/adc/mxs-lradc.c b/drivers/staging/iio/adc/mxs-lradc.c index d9d6fad7cb00..816174388f13 100644 --- a/drivers/staging/iio/adc/mxs-lradc.c +++ b/drivers/staging/iio/adc/mxs-lradc.c @@ -214,11 +214,17 @@ struct mxs_lradc { unsigned long is_divided; /* - * Touchscreen LRADC channels receives a private slot in the CTRL4 - * register, the slot #7. Therefore only 7 slots instead of 8 in the - * CTRL4 register can be mapped to LRADC channels when using the - * touchscreen. - * + * When the touchscreen is enabled, we give it two private virtual + * channels: #6 and #7. This means that only 6 virtual channels (instead + * of 8) will be available for buffered capture. + */ +#define TOUCHSCREEN_VCHANNEL1 7 +#define TOUCHSCREEN_VCHANNEL2 6 +#define BUFFER_VCHANS_LIMITED 0x3f +#define BUFFER_VCHANS_ALL 0xff + u8 buffer_vchans; + + /* * Furthermore, certain LRADC channels are shared between touchscreen * and/or touch-buttons and generic LRADC block. Therefore when using * either of these, these channels are not available for the regular @@ -342,6 +348,9 @@ struct mxs_lradc { #define LRADC_CTRL4 0x140 #define LRADC_CTRL4_LRADCSELECT_MASK(n) (0xf << ((n) * 4)) #define LRADC_CTRL4_LRADCSELECT_OFFSET(n) ((n) * 4) +#define LRADC_CTRL4_LRADCSELECT(n, x) \ + (((x) << LRADC_CTRL4_LRADCSELECT_OFFSET(n)) & \ + LRADC_CTRL4_LRADCSELECT_MASK(n)) #define LRADC_RESOLUTION 12 #define LRADC_SINGLE_SAMPLE_MASK ((1 << LRADC_RESOLUTION) - 1) @@ -416,6 +425,14 @@ static bool mxs_lradc_check_touch_event(struct mxs_lradc *lradc) LRADC_STATUS_TOUCH_DETECT_RAW); } +static void mxs_lradc_map_channel(struct mxs_lradc *lradc, unsigned vch, + unsigned ch) +{ + mxs_lradc_reg_clear(lradc, LRADC_CTRL4_LRADCSELECT_MASK(vch), + LRADC_CTRL4); + mxs_lradc_reg_set(lradc, LRADC_CTRL4_LRADCSELECT(vch, ch), LRADC_CTRL4); +} + static void mxs_lradc_setup_ts_channel(struct mxs_lradc *lradc, unsigned ch) { /* @@ -450,12 +467,8 @@ static void mxs_lradc_setup_ts_channel(struct mxs_lradc *lradc, unsigned ch) LRADC_DELAY_DELAY(lradc->over_sample_delay - 1), LRADC_DELAY(3)); - mxs_lradc_reg_clear(lradc, LRADC_CTRL1_LRADC_IRQ(2) | - LRADC_CTRL1_LRADC_IRQ(3) | LRADC_CTRL1_LRADC_IRQ(4) | - LRADC_CTRL1_LRADC_IRQ(5), LRADC_CTRL1); + mxs_lradc_reg_clear(lradc, LRADC_CTRL1_LRADC_IRQ(ch), LRADC_CTRL1); - /* wake us again, when the complete conversion is done */ - mxs_lradc_reg_set(lradc, LRADC_CTRL1_LRADC_IRQ_EN(ch), LRADC_CTRL1); /* * after changing the touchscreen plates setting * the signals need some initial time to settle. Start the @@ -509,12 +522,8 @@ static void mxs_lradc_setup_ts_pressure(struct mxs_lradc *lradc, unsigned ch1, LRADC_DELAY_DELAY(lradc->over_sample_delay - 1), LRADC_DELAY(3)); - mxs_lradc_reg_clear(lradc, LRADC_CTRL1_LRADC_IRQ(2) | - LRADC_CTRL1_LRADC_IRQ(3) | LRADC_CTRL1_LRADC_IRQ(4) | - LRADC_CTRL1_LRADC_IRQ(5), LRADC_CTRL1); + mxs_lradc_reg_clear(lradc, LRADC_CTRL1_LRADC_IRQ(ch2), LRADC_CTRL1); - /* wake us again, when the conversions are done */ - mxs_lradc_reg_set(lradc, LRADC_CTRL1_LRADC_IRQ_EN(ch2), LRADC_CTRL1); /* * after changing the touchscreen plates setting * the signals need some initial time to settle. Start the @@ -580,36 +589,6 @@ static unsigned mxs_lradc_read_ts_pressure(struct mxs_lradc *lradc, #define TS_CH_XM 4 #define TS_CH_YM 5 -static int mxs_lradc_read_ts_channel(struct mxs_lradc *lradc) -{ - u32 reg; - int val; - - reg = readl(lradc->base + LRADC_CTRL1); - - /* only channels 3 to 5 are of interest here */ - if (reg & LRADC_CTRL1_LRADC_IRQ(TS_CH_YP)) { - mxs_lradc_reg_clear(lradc, LRADC_CTRL1_LRADC_IRQ_EN(TS_CH_YP) | - LRADC_CTRL1_LRADC_IRQ(TS_CH_YP), LRADC_CTRL1); - val = mxs_lradc_read_raw_channel(lradc, TS_CH_YP); - } else if (reg & LRADC_CTRL1_LRADC_IRQ(TS_CH_XM)) { - mxs_lradc_reg_clear(lradc, LRADC_CTRL1_LRADC_IRQ_EN(TS_CH_XM) | - LRADC_CTRL1_LRADC_IRQ(TS_CH_XM), LRADC_CTRL1); - val = mxs_lradc_read_raw_channel(lradc, TS_CH_XM); - } else if (reg & LRADC_CTRL1_LRADC_IRQ(TS_CH_YM)) { - mxs_lradc_reg_clear(lradc, LRADC_CTRL1_LRADC_IRQ_EN(TS_CH_YM) | - LRADC_CTRL1_LRADC_IRQ(TS_CH_YM), LRADC_CTRL1); - val = mxs_lradc_read_raw_channel(lradc, TS_CH_YM); - } else { - return -EIO; - } - - mxs_lradc_reg_wrt(lradc, 0, LRADC_DELAY(2)); - mxs_lradc_reg_wrt(lradc, 0, LRADC_DELAY(3)); - - return val; -} - /* * YP(open)--+-------------+ * | |--+ @@ -653,7 +632,8 @@ static void mxs_lradc_prepare_x_pos(struct mxs_lradc *lradc) mxs_lradc_reg_set(lradc, mxs_lradc_drive_x_plate(lradc), LRADC_CTRL0); lradc->cur_plate = LRADC_SAMPLE_X; - mxs_lradc_setup_ts_channel(lradc, TS_CH_YP); + mxs_lradc_map_channel(lradc, TOUCHSCREEN_VCHANNEL1, TS_CH_YP); + mxs_lradc_setup_ts_channel(lradc, TOUCHSCREEN_VCHANNEL1); } /* @@ -674,7 +654,8 @@ static void mxs_lradc_prepare_y_pos(struct mxs_lradc *lradc) mxs_lradc_reg_set(lradc, mxs_lradc_drive_y_plate(lradc), LRADC_CTRL0); lradc->cur_plate = LRADC_SAMPLE_Y; - mxs_lradc_setup_ts_channel(lradc, TS_CH_XM); + mxs_lradc_map_channel(lradc, TOUCHSCREEN_VCHANNEL1, TS_CH_XM); + mxs_lradc_setup_ts_channel(lradc, TOUCHSCREEN_VCHANNEL1); } /* @@ -695,7 +676,10 @@ static void mxs_lradc_prepare_pressure(struct mxs_lradc *lradc) mxs_lradc_reg_set(lradc, mxs_lradc_drive_pressure(lradc), LRADC_CTRL0); lradc->cur_plate = LRADC_SAMPLE_PRESSURE; - mxs_lradc_setup_ts_pressure(lradc, TS_CH_XP, TS_CH_YM); + mxs_lradc_map_channel(lradc, TOUCHSCREEN_VCHANNEL1, TS_CH_YM); + mxs_lradc_map_channel(lradc, TOUCHSCREEN_VCHANNEL2, TS_CH_XP); + mxs_lradc_setup_ts_pressure(lradc, TOUCHSCREEN_VCHANNEL2, + TOUCHSCREEN_VCHANNEL1); } static void mxs_lradc_enable_touch_detection(struct mxs_lradc *lradc) @@ -708,6 +692,19 @@ static void mxs_lradc_enable_touch_detection(struct mxs_lradc *lradc) mxs_lradc_reg_set(lradc, LRADC_CTRL1_TOUCH_DETECT_IRQ_EN, LRADC_CTRL1); } +static void mxs_lradc_start_touch_event(struct mxs_lradc *lradc) +{ + mxs_lradc_reg_clear(lradc, LRADC_CTRL1_TOUCH_DETECT_IRQ_EN, + LRADC_CTRL1); + mxs_lradc_reg_set(lradc, + LRADC_CTRL1_LRADC_IRQ_EN(TOUCHSCREEN_VCHANNEL1), LRADC_CTRL1); + /* + * start with the Y-pos, because it uses nearly the same plate + * settings like the touch detection + */ + mxs_lradc_prepare_y_pos(lradc); +} + static void mxs_lradc_report_ts_event(struct mxs_lradc *lradc) { input_report_abs(lradc->ts_input, ABS_X, lradc->ts_x_pos); @@ -725,10 +722,12 @@ static void mxs_lradc_complete_touch_event(struct mxs_lradc *lradc) * start a dummy conversion to burn time to settle the signals * note: we are not interested in the conversion's value */ - mxs_lradc_reg_wrt(lradc, 0, LRADC_CH(5)); - mxs_lradc_reg_clear(lradc, LRADC_CTRL1_LRADC_IRQ(5), LRADC_CTRL1); - mxs_lradc_reg_set(lradc, LRADC_CTRL1_LRADC_IRQ_EN(5), LRADC_CTRL1); - mxs_lradc_reg_wrt(lradc, LRADC_DELAY_TRIGGER(1 << 5) | + mxs_lradc_reg_wrt(lradc, 0, LRADC_CH(TOUCHSCREEN_VCHANNEL1)); + mxs_lradc_reg_clear(lradc, + LRADC_CTRL1_LRADC_IRQ(TOUCHSCREEN_VCHANNEL1) | + LRADC_CTRL1_LRADC_IRQ(TOUCHSCREEN_VCHANNEL2), LRADC_CTRL1); + mxs_lradc_reg_wrt(lradc, + LRADC_DELAY_TRIGGER(1 << TOUCHSCREEN_VCHANNEL1) | LRADC_DELAY_KICK | LRADC_DELAY_DELAY(10), /* waste 5 ms */ LRADC_DELAY(2)); } @@ -760,59 +759,45 @@ static void mxs_lradc_finish_touch_event(struct mxs_lradc *lradc, bool valid) /* if it is released, wait for the next touch via IRQ */ lradc->cur_plate = LRADC_TOUCH; - mxs_lradc_reg_clear(lradc, LRADC_CTRL1_TOUCH_DETECT_IRQ, LRADC_CTRL1); + mxs_lradc_reg_wrt(lradc, 0, LRADC_DELAY(2)); + mxs_lradc_reg_wrt(lradc, 0, LRADC_DELAY(3)); + mxs_lradc_reg_clear(lradc, LRADC_CTRL1_TOUCH_DETECT_IRQ | + LRADC_CTRL1_LRADC_IRQ_EN(TOUCHSCREEN_VCHANNEL1) | + LRADC_CTRL1_LRADC_IRQ(TOUCHSCREEN_VCHANNEL1), LRADC_CTRL1); mxs_lradc_reg_set(lradc, LRADC_CTRL1_TOUCH_DETECT_IRQ_EN, LRADC_CTRL1); } /* touchscreen's state machine */ static void mxs_lradc_handle_touch(struct mxs_lradc *lradc) { - int val; - switch (lradc->cur_plate) { case LRADC_TOUCH: - /* - * start with the Y-pos, because it uses nearly the same plate - * settings like the touch detection - */ - if (mxs_lradc_check_touch_event(lradc)) { - mxs_lradc_reg_clear(lradc, - LRADC_CTRL1_TOUCH_DETECT_IRQ_EN, - LRADC_CTRL1); - mxs_lradc_prepare_y_pos(lradc); - } + if (mxs_lradc_check_touch_event(lradc)) + mxs_lradc_start_touch_event(lradc); mxs_lradc_reg_clear(lradc, LRADC_CTRL1_TOUCH_DETECT_IRQ, LRADC_CTRL1); return; case LRADC_SAMPLE_Y: - val = mxs_lradc_read_ts_channel(lradc); - if (val < 0) { - mxs_lradc_enable_touch_detection(lradc); /* re-start */ - return; - } - lradc->ts_y_pos = val; + lradc->ts_y_pos = mxs_lradc_read_raw_channel(lradc, + TOUCHSCREEN_VCHANNEL1); mxs_lradc_prepare_x_pos(lradc); return; case LRADC_SAMPLE_X: - val = mxs_lradc_read_ts_channel(lradc); - if (val < 0) { - mxs_lradc_enable_touch_detection(lradc); /* re-start */ - return; - } - lradc->ts_x_pos = val; + lradc->ts_x_pos = mxs_lradc_read_raw_channel(lradc, + TOUCHSCREEN_VCHANNEL1); mxs_lradc_prepare_pressure(lradc); return; case LRADC_SAMPLE_PRESSURE: - lradc->ts_pressure = - mxs_lradc_read_ts_pressure(lradc, TS_CH_XP, TS_CH_YM); + lradc->ts_pressure = mxs_lradc_read_ts_pressure(lradc, + TOUCHSCREEN_VCHANNEL2, + TOUCHSCREEN_VCHANNEL1); mxs_lradc_complete_touch_event(lradc); return; case LRADC_SAMPLE_VALID: - val = mxs_lradc_read_ts_channel(lradc); /* ignore the value */ mxs_lradc_finish_touch_event(lradc, 1); break; } @@ -844,9 +829,9 @@ static int mxs_lradc_read_single(struct iio_dev *iio_dev, int chan, int *val) * used if doing raw sampling. */ if (lradc->soc == IMX28_LRADC) - mxs_lradc_reg_clear(lradc, LRADC_CTRL1_MX28_LRADC_IRQ_EN_MASK, + mxs_lradc_reg_clear(lradc, LRADC_CTRL1_LRADC_IRQ_EN(0), LRADC_CTRL1); - mxs_lradc_reg_clear(lradc, 0xff, LRADC_CTRL0); + mxs_lradc_reg_clear(lradc, 0x1, LRADC_CTRL0); /* Enable / disable the divider per requirement */ if (test_bit(chan, &lradc->is_divided)) @@ -1090,9 +1075,8 @@ static void mxs_lradc_disable_ts(struct mxs_lradc *lradc) { /* stop all interrupts from firing */ mxs_lradc_reg_clear(lradc, LRADC_CTRL1_TOUCH_DETECT_IRQ_EN | - LRADC_CTRL1_LRADC_IRQ_EN(2) | LRADC_CTRL1_LRADC_IRQ_EN(3) | - LRADC_CTRL1_LRADC_IRQ_EN(4) | LRADC_CTRL1_LRADC_IRQ_EN(5), - LRADC_CTRL1); + LRADC_CTRL1_LRADC_IRQ_EN(TOUCHSCREEN_VCHANNEL1) | + LRADC_CTRL1_LRADC_IRQ_EN(TOUCHSCREEN_VCHANNEL2), LRADC_CTRL1); /* Power-down touchscreen touch-detect circuitry. */ mxs_lradc_reg_clear(lradc, mxs_lradc_plate_mask(lradc), LRADC_CTRL0); @@ -1158,26 +1142,31 @@ static irqreturn_t mxs_lradc_handle_irq(int irq, void *data) struct iio_dev *iio = data; struct mxs_lradc *lradc = iio_priv(iio); unsigned long reg = readl(lradc->base + LRADC_CTRL1); + uint32_t clr_irq = mxs_lradc_irq_mask(lradc); const uint32_t ts_irq_mask = LRADC_CTRL1_TOUCH_DETECT_IRQ | - LRADC_CTRL1_LRADC_IRQ(2) | - LRADC_CTRL1_LRADC_IRQ(3) | - LRADC_CTRL1_LRADC_IRQ(4) | - LRADC_CTRL1_LRADC_IRQ(5); + LRADC_CTRL1_LRADC_IRQ(TOUCHSCREEN_VCHANNEL1) | + LRADC_CTRL1_LRADC_IRQ(TOUCHSCREEN_VCHANNEL2); if (!(reg & mxs_lradc_irq_mask(lradc))) return IRQ_NONE; - if (lradc->use_touchscreen && (reg & ts_irq_mask)) + if (lradc->use_touchscreen && (reg & ts_irq_mask)) { mxs_lradc_handle_touch(lradc); - if (iio_buffer_enabled(iio)) - iio_trigger_poll(iio->trig); - else if (reg & LRADC_CTRL1_LRADC_IRQ(0)) + /* Make sure we don't clear the next conversion's interrupt. */ + clr_irq &= ~(LRADC_CTRL1_LRADC_IRQ(TOUCHSCREEN_VCHANNEL1) | + LRADC_CTRL1_LRADC_IRQ(TOUCHSCREEN_VCHANNEL2)); + } + + if (iio_buffer_enabled(iio)) { + if (reg & lradc->buffer_vchans) + iio_trigger_poll(iio->trig); + } else if (reg & LRADC_CTRL1_LRADC_IRQ(0)) { complete(&lradc->completion); + } - mxs_lradc_reg_clear(lradc, reg & mxs_lradc_irq_mask(lradc), - LRADC_CTRL1); + mxs_lradc_reg_clear(lradc, reg & clr_irq, LRADC_CTRL1); return IRQ_HANDLED; } @@ -1289,9 +1278,10 @@ static int mxs_lradc_buffer_preenable(struct iio_dev *iio) } if (lradc->soc == IMX28_LRADC) - mxs_lradc_reg_clear(lradc, LRADC_CTRL1_MX28_LRADC_IRQ_EN_MASK, - LRADC_CTRL1); - mxs_lradc_reg_clear(lradc, 0xff, LRADC_CTRL0); + mxs_lradc_reg_clear(lradc, + lradc->buffer_vchans << LRADC_CTRL1_LRADC_IRQ_EN_OFFSET, + LRADC_CTRL1); + mxs_lradc_reg_clear(lradc, lradc->buffer_vchans, LRADC_CTRL0); for_each_set_bit(chan, iio->active_scan_mask, LRADC_MAX_TOTAL_CHANS) { ctrl4_set |= chan << LRADC_CTRL4_LRADCSELECT_OFFSET(ofs); @@ -1324,10 +1314,11 @@ static int mxs_lradc_buffer_postdisable(struct iio_dev *iio) mxs_lradc_reg_clear(lradc, LRADC_DELAY_TRIGGER_LRADCS_MASK | LRADC_DELAY_KICK, LRADC_DELAY(0)); - mxs_lradc_reg_clear(lradc, 0xff, LRADC_CTRL0); + mxs_lradc_reg_clear(lradc, lradc->buffer_vchans, LRADC_CTRL0); if (lradc->soc == IMX28_LRADC) - mxs_lradc_reg_clear(lradc, LRADC_CTRL1_MX28_LRADC_IRQ_EN_MASK, - LRADC_CTRL1); + mxs_lradc_reg_clear(lradc, + lradc->buffer_vchans << LRADC_CTRL1_LRADC_IRQ_EN_OFFSET, + LRADC_CTRL1); kfree(lradc->buffer); mutex_unlock(&lradc->lock); @@ -1353,7 +1344,7 @@ static bool mxs_lradc_validate_scan_mask(struct iio_dev *iio, if (lradc->use_touchbutton) rsvd_chans++; if (lradc->use_touchscreen) - rsvd_chans++; + rsvd_chans += 2; /* Test for attempts to map channels with special mode of operation. */ if (bitmap_intersects(mask, &rsvd_mask, LRADC_MAX_TOTAL_CHANS)) @@ -1413,6 +1404,13 @@ static const struct iio_chan_spec mxs_lradc_chan_spec[] = { .channel = 8, .scan_type = {.sign = 'u', .realbits = 18, .storagebits = 32,}, }, + /* Hidden channel to keep indexes */ + { + .type = IIO_TEMP, + .indexed = 1, + .scan_index = -1, + .channel = 9, + }, MXS_ADC_CHAN(10, IIO_VOLTAGE), /* VDDIO */ MXS_ADC_CHAN(11, IIO_VOLTAGE), /* VTH */ MXS_ADC_CHAN(12, IIO_VOLTAGE), /* VDDA */ @@ -1583,6 +1581,11 @@ static int mxs_lradc_probe(struct platform_device *pdev) touch_ret = mxs_lradc_probe_touchscreen(lradc, node); + if (touch_ret == 0) + lradc->buffer_vchans = BUFFER_VCHANS_LIMITED; + else + lradc->buffer_vchans = BUFFER_VCHANS_ALL; + /* Grab all IRQ sources */ for (i = 0; i < of_cfg->irq_count; i++) { lradc->irq[i] = platform_get_irq(pdev, i); diff --git a/drivers/staging/iio/magnetometer/hmc5843_core.c b/drivers/staging/iio/magnetometer/hmc5843_core.c index fd171d8b38fb..90cc18b703cf 100644 --- a/drivers/staging/iio/magnetometer/hmc5843_core.c +++ b/drivers/staging/iio/magnetometer/hmc5843_core.c @@ -592,6 +592,7 @@ int hmc5843_common_probe(struct device *dev, struct regmap *regmap, mutex_init(&data->lock); indio_dev->dev.parent = dev; + indio_dev->name = dev->driver->name; indio_dev->info = &hmc5843_info; indio_dev->modes = INDIO_DIRECT_MODE; indio_dev->channels = data->variant->channels; diff --git a/drivers/staging/iio/resolver/ad2s1200.c b/drivers/staging/iio/resolver/ad2s1200.c index 017d2f8379b7..c17893b4918c 100644 --- a/drivers/staging/iio/resolver/ad2s1200.c +++ b/drivers/staging/iio/resolver/ad2s1200.c @@ -18,6 +18,7 @@ #include <linux/delay.h> #include <linux/gpio.h> #include <linux/module.h> +#include <linux/bitops.h> #include <linux/iio/iio.h> #include <linux/iio/sysfs.h> @@ -68,7 +69,7 @@ static int ad2s1200_read_raw(struct iio_dev *indio_dev, break; case IIO_ANGL_VEL: vel = (((s16)(st->rx[0])) << 4) | ((st->rx[1] & 0xF0) >> 4); - vel = (vel << 4) >> 4; + vel = sign_extend32(vel, 11); *val = vel; break; default: diff --git a/drivers/staging/vt6655/device_main.c b/drivers/staging/vt6655/device_main.c index 4324282afe49..03b2a90b9ac0 100644 --- a/drivers/staging/vt6655/device_main.c +++ b/drivers/staging/vt6655/device_main.c @@ -330,16 +330,6 @@ static void device_init_registers(struct vnt_private *pDevice) /* zonetype initial */ pDevice->byOriginalZonetype = pDevice->abyEEPROM[EEP_OFS_ZONETYPE]; - /* Get RFType */ - pDevice->byRFType = SROMbyReadEmbedded(pDevice->PortOffset, EEP_OFS_RFTYPE); - - /* force change RevID for VT3253 emu */ - if ((pDevice->byRFType & RF_EMU) != 0) - pDevice->byRevId = 0x80; - - pDevice->byRFType &= RF_MASK; - pr_debug("pDevice->byRFType = %x\n", pDevice->byRFType); - if (!pDevice->bZoneRegExist) pDevice->byZoneType = pDevice->abyEEPROM[EEP_OFS_ZONETYPE]; @@ -1187,12 +1177,14 @@ static int vnt_tx_packet(struct vnt_private *priv, struct sk_buff *skb) { struct ieee80211_hdr *hdr = (struct ieee80211_hdr *)skb->data; PSTxDesc head_td; - u32 dma_idx = TYPE_AC0DMA; + u32 dma_idx; unsigned long flags; spin_lock_irqsave(&priv->lock, flags); - if (!ieee80211_is_data(hdr->frame_control)) + if (ieee80211_is_data(hdr->frame_control)) + dma_idx = TYPE_AC0DMA; + else dma_idx = TYPE_TXDMA0; if (AVAIL_TD(priv, dma_idx) < 1) { @@ -1206,6 +1198,9 @@ static int vnt_tx_packet(struct vnt_private *priv, struct sk_buff *skb) head_td->pTDInfo->skb = skb; + if (dma_idx == TYPE_AC0DMA) + head_td->pTDInfo->byFlags = TD_FLAGS_NETIF_SKB; + priv->iTDUsed[dma_idx]++; /* Take ownership */ @@ -1234,13 +1229,10 @@ static int vnt_tx_packet(struct vnt_private *priv, struct sk_buff *skb) head_td->buff_addr = cpu_to_le32(head_td->pTDInfo->skb_dma); - if (dma_idx == TYPE_AC0DMA) { - head_td->pTDInfo->byFlags = TD_FLAGS_NETIF_SKB; - + if (head_td->pTDInfo->byFlags & TD_FLAGS_NETIF_SKB) MACvTransmitAC0(priv->PortOffset); - } else { + else MACvTransmit0(priv->PortOffset); - } spin_unlock_irqrestore(&priv->lock, flags); @@ -1778,6 +1770,12 @@ vt6655_probe(struct pci_dev *pcid, const struct pci_device_id *ent) MACvInitialize(priv->PortOffset); MACvReadEtherAddress(priv->PortOffset, priv->abyCurrentNetAddr); + /* Get RFType */ + priv->byRFType = SROMbyReadEmbedded(priv->PortOffset, EEP_OFS_RFTYPE); + priv->byRFType &= RF_MASK; + + dev_dbg(&pcid->dev, "RF Type = %x\n", priv->byRFType); + device_get_options(priv); device_set_options(priv); /* Mask out the options cannot be set to the chip */ diff --git a/drivers/staging/vt6655/rf.c b/drivers/staging/vt6655/rf.c index 941b2adca95a..7626f635f160 100644 --- a/drivers/staging/vt6655/rf.c +++ b/drivers/staging/vt6655/rf.c @@ -794,6 +794,7 @@ bool RFbSetPower( break; case RATE_6M: case RATE_9M: + case RATE_12M: case RATE_18M: byPwr = priv->abyOFDMPwrTbl[uCH]; if (priv->byRFType == RF_UW2452) diff --git a/drivers/staging/vt6656/rf.c b/drivers/staging/vt6656/rf.c index c42cde59f598..c4286ccac320 100644 --- a/drivers/staging/vt6656/rf.c +++ b/drivers/staging/vt6656/rf.c @@ -640,6 +640,7 @@ int vnt_rf_setpower(struct vnt_private *priv, u32 rate, u32 channel) break; case RATE_6M: case RATE_9M: + case RATE_12M: case RATE_18M: case RATE_24M: case RATE_36M: diff --git a/drivers/target/iscsi/iscsi_target.c b/drivers/target/iscsi/iscsi_target.c index 50bad55a0c42..77d64251af40 100644 --- a/drivers/target/iscsi/iscsi_target.c +++ b/drivers/target/iscsi/iscsi_target.c @@ -1181,7 +1181,7 @@ iscsit_handle_scsi_cmd(struct iscsi_conn *conn, struct iscsi_cmd *cmd, * traditional iSCSI block I/O. */ if (iscsit_allocate_iovecs(cmd) < 0) { - return iscsit_add_reject_cmd(cmd, + return iscsit_reject_cmd(cmd, ISCSI_REASON_BOOKMARK_NO_RESOURCES, buf); } immed_data = cmd->immediate_data; @@ -3468,6 +3468,7 @@ iscsit_build_sendtargets_response(struct iscsi_cmd *cmd, tpg_np_list) { struct iscsi_np *np = tpg_np->tpg_np; bool inaddr_any = iscsit_check_inaddr_any(np); + char *fmt_str; if (np->np_network_transport != network_transport) continue; @@ -3495,8 +3496,12 @@ iscsit_build_sendtargets_response(struct iscsi_cmd *cmd, } } - len = sprintf(buf, "TargetAddress=" - "%s:%hu,%hu", + if (np->np_sockaddr.ss_family == AF_INET6) + fmt_str = "TargetAddress=[%s]:%hu,%hu"; + else + fmt_str = "TargetAddress=%s:%hu,%hu"; + + len = sprintf(buf, fmt_str, inaddr_any ? conn->local_ip : np->np_ip, np->np_port, tpg->tpgt); @@ -4256,11 +4261,17 @@ int iscsit_close_connection( pr_debug("Closing iSCSI connection CID %hu on SID:" " %u\n", conn->cid, sess->sid); /* - * Always up conn_logout_comp just in case the RX Thread is sleeping - * and the logout response never got sent because the connection - * failed. + * Always up conn_logout_comp for the traditional TCP case just in case + * the RX Thread in iscsi_target_rx_opcode() is sleeping and the logout + * response never got sent because the connection failed. + * + * However for iser-target, isert_wait4logout() is using conn_logout_comp + * to signal logout response TX interrupt completion. Go ahead and skip + * this for iser since isert_rx_opcode() does not wait on logout failure, + * and to avoid iscsi_conn pointer dereference in iser-target code. */ - complete(&conn->conn_logout_comp); + if (conn->conn_transport->transport_type == ISCSI_TCP) + complete(&conn->conn_logout_comp); iscsi_release_thread_set(conn); diff --git a/drivers/target/iscsi/iscsi_target_erl0.c b/drivers/target/iscsi/iscsi_target_erl0.c index 1c197bad6132..bdd8731a4daa 100644 --- a/drivers/target/iscsi/iscsi_target_erl0.c +++ b/drivers/target/iscsi/iscsi_target_erl0.c @@ -22,7 +22,6 @@ #include <target/target_core_fabric.h> #include <target/iscsi/iscsi_target_core.h> -#include <target/iscsi/iscsi_transport.h> #include "iscsi_target_seq_pdu_list.h" #include "iscsi_target_tq.h" #include "iscsi_target_erl0.h" @@ -940,8 +939,7 @@ void iscsit_take_action_for_connection_exit(struct iscsi_conn *conn) if (conn->conn_state == TARG_CONN_STATE_IN_LOGOUT) { spin_unlock_bh(&conn->state_lock); - if (conn->conn_transport->transport_type == ISCSI_TCP) - iscsit_close_connection(conn); + iscsit_close_connection(conn); return; } diff --git a/drivers/target/loopback/tcm_loop.c b/drivers/target/loopback/tcm_loop.c index 6b3c32954689..c36bd7c29136 100644 --- a/drivers/target/loopback/tcm_loop.c +++ b/drivers/target/loopback/tcm_loop.c @@ -953,11 +953,8 @@ static int tcm_loop_make_nexus( transport_free_session(tl_nexus->se_sess); goto out; } - /* - * Now, register the SAS I_T Nexus as active with the call to - * transport_register_session() - */ - __transport_register_session(se_tpg, tl_nexus->se_sess->se_node_acl, + /* Now, register the SAS I_T Nexus as active. */ + transport_register_session(se_tpg, tl_nexus->se_sess->se_node_acl, tl_nexus->se_sess, tl_nexus); tl_tpg->tl_nexus = tl_nexus; pr_debug("TCM_Loop_ConfigFS: Established I_T Nexus to emulated" diff --git a/drivers/target/target_core_device.c b/drivers/target/target_core_device.c index 58f49ff69b14..7faa6aef9a4d 100644 --- a/drivers/target/target_core_device.c +++ b/drivers/target/target_core_device.c @@ -650,6 +650,18 @@ static u32 se_dev_align_max_sectors(u32 max_sectors, u32 block_size) return aligned_max_sectors; } +bool se_dev_check_wce(struct se_device *dev) +{ + bool wce = false; + + if (dev->transport->get_write_cache) + wce = dev->transport->get_write_cache(dev); + else if (dev->dev_attrib.emulate_write_cache > 0) + wce = true; + + return wce; +} + int se_dev_set_max_unmap_lba_count( struct se_device *dev, u32 max_unmap_lba_count) @@ -767,6 +779,16 @@ int se_dev_set_emulate_fua_write(struct se_device *dev, int flag) pr_err("Illegal value %d\n", flag); return -EINVAL; } + if (flag && + dev->transport->get_write_cache) { + pr_warn("emulate_fua_write not supported for this device, ignoring\n"); + return 0; + } + if (dev->export_count) { + pr_err("emulate_fua_write cannot be changed with active" + " exports: %d\n", dev->export_count); + return -EINVAL; + } dev->dev_attrib.emulate_fua_write = flag; pr_debug("dev[%p]: SE Device Forced Unit Access WRITEs: %d\n", dev, dev->dev_attrib.emulate_fua_write); @@ -801,7 +823,11 @@ int se_dev_set_emulate_write_cache(struct se_device *dev, int flag) pr_err("emulate_write_cache not supported for this device\n"); return -EINVAL; } - + if (dev->export_count) { + pr_err("emulate_write_cache cannot be changed with active" + " exports: %d\n", dev->export_count); + return -EINVAL; + } dev->dev_attrib.emulate_write_cache = flag; pr_debug("dev[%p]: SE Device WRITE_CACHE_EMULATION flag: %d\n", dev, dev->dev_attrib.emulate_write_cache); @@ -1534,8 +1560,6 @@ int target_configure_device(struct se_device *dev) ret = dev->transport->configure_device(dev); if (ret) goto out; - dev->dev_flags |= DF_CONFIGURED; - /* * XXX: there is not much point to have two different values here.. */ @@ -1597,6 +1621,8 @@ int target_configure_device(struct se_device *dev) list_add_tail(&dev->g_dev_node, &g_device_list); mutex_unlock(&g_device_mutex); + dev->dev_flags |= DF_CONFIGURED; + return 0; out_free_alua: diff --git a/drivers/target/target_core_pscsi.c b/drivers/target/target_core_pscsi.c index 1045dcd7bf65..f6c954c4635f 100644 --- a/drivers/target/target_core_pscsi.c +++ b/drivers/target/target_core_pscsi.c @@ -1121,7 +1121,7 @@ static u32 pscsi_get_device_type(struct se_device *dev) struct pscsi_dev_virt *pdv = PSCSI_DEV(dev); struct scsi_device *sd = pdv->pdv_sd; - return sd->type; + return (sd) ? sd->type : TYPE_NO_LUN; } static sector_t pscsi_get_blocks(struct se_device *dev) diff --git a/drivers/target/target_core_sbc.c b/drivers/target/target_core_sbc.c index 9a2f9d3a6e70..3e7297411110 100644 --- a/drivers/target/target_core_sbc.c +++ b/drivers/target/target_core_sbc.c @@ -708,8 +708,7 @@ sbc_check_dpofua(struct se_device *dev, struct se_cmd *cmd, unsigned char *cdb) } } if (cdb[1] & 0x8) { - if (!dev->dev_attrib.emulate_fua_write || - !dev->dev_attrib.emulate_write_cache) { + if (!dev->dev_attrib.emulate_fua_write || !se_dev_check_wce(dev)) { pr_err("Got CDB: 0x%02x with FUA bit set, but device" " does not advertise support for FUA write\n", cdb[0]); diff --git a/drivers/target/target_core_spc.c b/drivers/target/target_core_spc.c index 460e93109473..6c8bd6bc175c 100644 --- a/drivers/target/target_core_spc.c +++ b/drivers/target/target_core_spc.c @@ -454,19 +454,6 @@ check_scsi_name: } EXPORT_SYMBOL(spc_emulate_evpd_83); -static bool -spc_check_dev_wce(struct se_device *dev) -{ - bool wce = false; - - if (dev->transport->get_write_cache) - wce = dev->transport->get_write_cache(dev); - else if (dev->dev_attrib.emulate_write_cache > 0) - wce = true; - - return wce; -} - /* Extended INQUIRY Data VPD Page */ static sense_reason_t spc_emulate_evpd_86(struct se_cmd *cmd, unsigned char *buf) @@ -490,7 +477,7 @@ spc_emulate_evpd_86(struct se_cmd *cmd, unsigned char *buf) buf[5] = 0x07; /* If WriteCache emulation is enabled, set V_SUP */ - if (spc_check_dev_wce(dev)) + if (se_dev_check_wce(dev)) buf[6] = 0x01; /* If an LBA map is present set R_SUP */ spin_lock(&cmd->se_dev->t10_alua.lba_map_lock); @@ -897,7 +884,7 @@ static int spc_modesense_caching(struct se_cmd *cmd, u8 pc, u8 *p) if (pc == 1) goto out; - if (spc_check_dev_wce(dev)) + if (se_dev_check_wce(dev)) p[2] = 0x04; /* Write Cache Enable */ p[12] = 0x20; /* Disabled Read Ahead */ @@ -1009,7 +996,7 @@ static sense_reason_t spc_emulate_modesense(struct se_cmd *cmd) (cmd->se_deve->lun_flags & TRANSPORT_LUNFLAGS_READ_ONLY))) spc_modesense_write_protect(&buf[length], type); - if ((spc_check_dev_wce(dev)) && + if ((se_dev_check_wce(dev)) && (dev->dev_attrib.emulate_fua_write > 0)) spc_modesense_dpofua(&buf[length], type); diff --git a/drivers/target/target_core_transport.c b/drivers/target/target_core_transport.c index 0adc0f650213..ac3cbabdbdf0 100644 --- a/drivers/target/target_core_transport.c +++ b/drivers/target/target_core_transport.c @@ -2389,6 +2389,10 @@ int target_get_sess_cmd(struct se_session *se_sess, struct se_cmd *se_cmd, list_add_tail(&se_cmd->se_cmd_list, &se_sess->sess_cmd_list); out: spin_unlock_irqrestore(&se_sess->sess_cmd_lock, flags); + + if (ret && ack_kref) + target_put_sess_cmd(se_sess, se_cmd); + return ret; } EXPORT_SYMBOL(target_get_sess_cmd); diff --git a/drivers/target/tcm_fc/tfc_io.c b/drivers/target/tcm_fc/tfc_io.c index 97b486c3dda1..583e755d8091 100644 --- a/drivers/target/tcm_fc/tfc_io.c +++ b/drivers/target/tcm_fc/tfc_io.c @@ -359,7 +359,7 @@ void ft_invl_hw_context(struct ft_cmd *cmd) ep = fc_seq_exch(seq); if (ep) { lport = ep->lp; - if (lport && (ep->xid <= lport->lro_xid)) + if (lport && (ep->xid <= lport->lro_xid)) { /* * "ddp_done" trigger invalidation of HW * specific DDP context @@ -374,6 +374,7 @@ void ft_invl_hw_context(struct ft_cmd *cmd) * identified using ep->xid) */ cmd->was_ddp_setup = 0; + } } } } diff --git a/drivers/thermal/int340x_thermal/int340x_thermal_zone.c b/drivers/thermal/int340x_thermal/int340x_thermal_zone.c index f88b08877025..1e25133d35e2 100644 --- a/drivers/thermal/int340x_thermal/int340x_thermal_zone.c +++ b/drivers/thermal/int340x_thermal/int340x_thermal_zone.c @@ -208,7 +208,7 @@ struct int34x_thermal_zone *int340x_thermal_zone_add(struct acpi_device *adev, trip_cnt, GFP_KERNEL); if (!int34x_thermal_zone->aux_trips) { ret = -ENOMEM; - goto free_mem; + goto err_trip_alloc; } trip_mask = BIT(trip_cnt) - 1; int34x_thermal_zone->aux_trip_nr = trip_cnt; @@ -248,14 +248,15 @@ struct int34x_thermal_zone *int340x_thermal_zone_add(struct acpi_device *adev, 0, 0); if (IS_ERR(int34x_thermal_zone->zone)) { ret = PTR_ERR(int34x_thermal_zone->zone); - goto free_lpat; + goto err_thermal_zone; } return int34x_thermal_zone; -free_lpat: +err_thermal_zone: acpi_lpat_free_conversion_table(int34x_thermal_zone->lpat_table); -free_mem: + kfree(int34x_thermal_zone->aux_trips); +err_trip_alloc: kfree(int34x_thermal_zone); return ERR_PTR(ret); } @@ -266,6 +267,7 @@ void int340x_thermal_zone_remove(struct int34x_thermal_zone { thermal_zone_device_unregister(int34x_thermal_zone->zone); acpi_lpat_free_conversion_table(int34x_thermal_zone->lpat_table); + kfree(int34x_thermal_zone->aux_trips); kfree(int34x_thermal_zone); } EXPORT_SYMBOL_GPL(int340x_thermal_zone_remove); diff --git a/drivers/thermal/samsung/exynos_tmu.c b/drivers/thermal/samsung/exynos_tmu.c index 1fc54ab911d2..1d30b0975651 100644 --- a/drivers/thermal/samsung/exynos_tmu.c +++ b/drivers/thermal/samsung/exynos_tmu.c @@ -682,6 +682,7 @@ static void exynos7_tmu_control(struct platform_device *pdev, bool on) if (on) { con |= (1 << EXYNOS_TMU_CORE_EN_SHIFT); + con |= (1 << EXYNOS7_PD_DET_EN_SHIFT); interrupt_en = (of_thermal_is_trip_valid(tz, 7) << EXYNOS7_TMU_INTEN_RISE7_SHIFT) | @@ -704,9 +705,9 @@ static void exynos7_tmu_control(struct platform_device *pdev, bool on) interrupt_en << EXYNOS_TMU_INTEN_FALL0_SHIFT; } else { con &= ~(1 << EXYNOS_TMU_CORE_EN_SHIFT); + con &= ~(1 << EXYNOS7_PD_DET_EN_SHIFT); interrupt_en = 0; /* Disable all interrupts */ } - con |= 1 << EXYNOS7_PD_DET_EN_SHIFT; writel(interrupt_en, data->base + EXYNOS7_TMU_REG_INTEN); writel(con, data->base + EXYNOS_TMU_REG_CONTROL); diff --git a/drivers/thermal/st/st_thermal.c b/drivers/thermal/st/st_thermal.c index d1ec5804c0bb..76c515dd802b 100644 --- a/drivers/thermal/st/st_thermal.c +++ b/drivers/thermal/st/st_thermal.c @@ -25,7 +25,7 @@ * Function to allocate regfields which are common * between syscfg and memory mapped based sensors */ -int st_thermal_alloc_regfields(struct st_thermal_sensor *sensor) +static int st_thermal_alloc_regfields(struct st_thermal_sensor *sensor) { struct device *dev = sensor->dev; struct regmap *regmap = sensor->regmap; diff --git a/drivers/thermal/st/st_thermal_memmap.c b/drivers/thermal/st/st_thermal_memmap.c index 067bfcdb91d6..fc0c9e198710 100644 --- a/drivers/thermal/st/st_thermal_memmap.c +++ b/drivers/thermal/st/st_thermal_memmap.c @@ -157,7 +157,7 @@ static const struct st_thermal_sensor_ops st_mmap_sensor_ops = { }; /* Compatible device data stih416 mpe thermal sensor */ -const struct st_thermal_compat_data st_416mpe_cdata = { +static const struct st_thermal_compat_data st_416mpe_cdata = { .reg_fields = st_mmap_thermal_regfields, .ops = &st_mmap_sensor_ops, .calibration_val = 14, @@ -166,7 +166,7 @@ const struct st_thermal_compat_data st_416mpe_cdata = { }; /* Compatible device data stih407 thermal sensor */ -const struct st_thermal_compat_data st_407_cdata = { +static const struct st_thermal_compat_data st_407_cdata = { .reg_fields = st_mmap_thermal_regfields, .ops = &st_mmap_sensor_ops, .calibration_val = 16, @@ -174,19 +174,19 @@ const struct st_thermal_compat_data st_407_cdata = { .crit_temp = 120, }; -static struct of_device_id st_mmap_thermal_of_match[] = { +static const struct of_device_id st_mmap_thermal_of_match[] = { { .compatible = "st,stih416-mpe-thermal", .data = &st_416mpe_cdata }, { .compatible = "st,stih407-thermal", .data = &st_407_cdata }, { /* sentinel */ } }; MODULE_DEVICE_TABLE(of, st_mmap_thermal_of_match); -int st_mmap_probe(struct platform_device *pdev) +static int st_mmap_probe(struct platform_device *pdev) { return st_thermal_register(pdev, st_mmap_thermal_of_match); } -int st_mmap_remove(struct platform_device *pdev) +static int st_mmap_remove(struct platform_device *pdev) { return st_thermal_unregister(pdev); } diff --git a/drivers/thermal/st/st_thermal_syscfg.c b/drivers/thermal/st/st_thermal_syscfg.c index 26d36a242bb8..3df5b7890703 100644 --- a/drivers/thermal/st/st_thermal_syscfg.c +++ b/drivers/thermal/st/st_thermal_syscfg.c @@ -104,7 +104,7 @@ static const struct st_thermal_sensor_ops st_syscfg_sensor_ops = { }; /* Compatible device data for stih415 sas thermal sensor */ -const struct st_thermal_compat_data st_415sas_cdata = { +static const struct st_thermal_compat_data st_415sas_cdata = { .sys_compat = "st,stih415-front-syscfg", .reg_fields = st_415sas_regfields, .ops = &st_syscfg_sensor_ops, @@ -114,7 +114,7 @@ const struct st_thermal_compat_data st_415sas_cdata = { }; /* Compatible device data for stih415 mpe thermal sensor */ -const struct st_thermal_compat_data st_415mpe_cdata = { +static const struct st_thermal_compat_data st_415mpe_cdata = { .sys_compat = "st,stih415-system-syscfg", .reg_fields = st_415mpe_regfields, .ops = &st_syscfg_sensor_ops, @@ -124,7 +124,7 @@ const struct st_thermal_compat_data st_415mpe_cdata = { }; /* Compatible device data for stih416 sas thermal sensor */ -const struct st_thermal_compat_data st_416sas_cdata = { +static const struct st_thermal_compat_data st_416sas_cdata = { .sys_compat = "st,stih416-front-syscfg", .reg_fields = st_416sas_regfields, .ops = &st_syscfg_sensor_ops, @@ -134,7 +134,7 @@ const struct st_thermal_compat_data st_416sas_cdata = { }; /* Compatible device data for stid127 thermal sensor */ -const struct st_thermal_compat_data st_127_cdata = { +static const struct st_thermal_compat_data st_127_cdata = { .sys_compat = "st,stid127-cpu-syscfg", .reg_fields = st_127_regfields, .ops = &st_syscfg_sensor_ops, @@ -143,7 +143,7 @@ const struct st_thermal_compat_data st_127_cdata = { .crit_temp = 120, }; -static struct of_device_id st_syscfg_thermal_of_match[] = { +static const struct of_device_id st_syscfg_thermal_of_match[] = { { .compatible = "st,stih415-sas-thermal", .data = &st_415sas_cdata }, { .compatible = "st,stih415-mpe-thermal", .data = &st_415mpe_cdata }, { .compatible = "st,stih416-sas-thermal", .data = &st_416sas_cdata }, @@ -152,12 +152,12 @@ static struct of_device_id st_syscfg_thermal_of_match[] = { }; MODULE_DEVICE_TABLE(of, st_syscfg_thermal_of_match); -int st_syscfg_probe(struct platform_device *pdev) +static int st_syscfg_probe(struct platform_device *pdev) { return st_thermal_register(pdev, st_syscfg_thermal_of_match); } -int st_syscfg_remove(struct platform_device *pdev) +static int st_syscfg_remove(struct platform_device *pdev) { return st_thermal_unregister(pdev); } diff --git a/drivers/thermal/thermal_core.c b/drivers/thermal/thermal_core.c index 48491d1a81d6..4108db7e10c1 100644 --- a/drivers/thermal/thermal_core.c +++ b/drivers/thermal/thermal_core.c @@ -458,8 +458,10 @@ static void update_temperature(struct thermal_zone_device *tz) ret = thermal_zone_get_temp(tz, &temp); if (ret) { - dev_warn(&tz->device, "failed to read out thermal zone %d\n", - tz->id); + if (ret != -EAGAIN) + dev_warn(&tz->device, + "failed to read out thermal zone (%d)\n", + ret); return; } @@ -899,6 +901,22 @@ thermal_cooling_device_trip_point_show(struct device *dev, return sprintf(buf, "%d\n", instance->trip); } +static struct attribute *cooling_device_attrs[] = { + &dev_attr_cdev_type.attr, + &dev_attr_max_state.attr, + &dev_attr_cur_state.attr, + NULL, +}; + +static const struct attribute_group cooling_device_attr_group = { + .attrs = cooling_device_attrs, +}; + +static const struct attribute_group *cooling_device_attr_groups[] = { + &cooling_device_attr_group, + NULL, +}; + /* Device management */ /** @@ -1130,6 +1148,7 @@ __thermal_cooling_device_register(struct device_node *np, cdev->ops = ops; cdev->updated = false; cdev->device.class = &thermal_class; + cdev->device.groups = cooling_device_attr_groups; cdev->devdata = devdata; dev_set_name(&cdev->device, "cooling_device%d", cdev->id); result = device_register(&cdev->device); @@ -1139,21 +1158,6 @@ __thermal_cooling_device_register(struct device_node *np, return ERR_PTR(result); } - /* sys I/F */ - if (type) { - result = device_create_file(&cdev->device, &dev_attr_cdev_type); - if (result) - goto unregister; - } - - result = device_create_file(&cdev->device, &dev_attr_max_state); - if (result) - goto unregister; - - result = device_create_file(&cdev->device, &dev_attr_cur_state); - if (result) - goto unregister; - /* Add 'this' new cdev to the global cdev list */ mutex_lock(&thermal_list_lock); list_add(&cdev->node, &thermal_cdev_list); @@ -1163,11 +1167,6 @@ __thermal_cooling_device_register(struct device_node *np, bind_cdev(cdev); return cdev; - -unregister: - release_idr(&thermal_cdev_idr, &thermal_idr_lock, cdev->id); - device_unregister(&cdev->device); - return ERR_PTR(result); } /** diff --git a/drivers/tty/bfin_jtag_comm.c b/drivers/tty/bfin_jtag_comm.c index d7b198c400c7..ce24182f8514 100644 --- a/drivers/tty/bfin_jtag_comm.c +++ b/drivers/tty/bfin_jtag_comm.c @@ -210,18 +210,6 @@ bfin_jc_chars_in_buffer(struct tty_struct *tty) return circ_cnt(&bfin_jc_write_buf); } -static void -bfin_jc_wait_until_sent(struct tty_struct *tty, int timeout) -{ - unsigned long expire = jiffies + timeout; - while (!circ_empty(&bfin_jc_write_buf)) { - if (signal_pending(current)) - break; - if (time_after(jiffies, expire)) - break; - } -} - static const struct tty_operations bfin_jc_ops = { .open = bfin_jc_open, .close = bfin_jc_close, @@ -230,7 +218,6 @@ static const struct tty_operations bfin_jc_ops = { .flush_chars = bfin_jc_flush_chars, .write_room = bfin_jc_write_room, .chars_in_buffer = bfin_jc_chars_in_buffer, - .wait_until_sent = bfin_jc_wait_until_sent, }; static int __init bfin_jc_init(void) diff --git a/drivers/tty/serial/8250/8250_core.c b/drivers/tty/serial/8250/8250_core.c index e3b9570a1eff..deae122c9c4b 100644 --- a/drivers/tty/serial/8250/8250_core.c +++ b/drivers/tty/serial/8250/8250_core.c @@ -2138,8 +2138,8 @@ int serial8250_do_startup(struct uart_port *port) /* * Clear the interrupt registers. */ - if (serial_port_in(port, UART_LSR) & UART_LSR_DR) - serial_port_in(port, UART_RX); + serial_port_in(port, UART_LSR); + serial_port_in(port, UART_RX); serial_port_in(port, UART_IIR); serial_port_in(port, UART_MSR); @@ -2300,8 +2300,8 @@ dont_test_tx_en: * saved flags to avoid getting false values from polling * routines or the previous session. */ - if (serial_port_in(port, UART_LSR) & UART_LSR_DR) - serial_port_in(port, UART_RX); + serial_port_in(port, UART_LSR); + serial_port_in(port, UART_RX); serial_port_in(port, UART_IIR); serial_port_in(port, UART_MSR); up->lsr_saved_flags = 0; @@ -2394,8 +2394,7 @@ void serial8250_do_shutdown(struct uart_port *port) * Read data port to reset things, and then unlink from * the IRQ chain. */ - if (serial_port_in(port, UART_LSR) & UART_LSR_DR) - serial_port_in(port, UART_RX); + serial_port_in(port, UART_RX); serial8250_rpm_put(up); del_timer_sync(&up->timer); diff --git a/drivers/tty/serial/8250/8250_dw.c b/drivers/tty/serial/8250/8250_dw.c index e60116235836..6ae5b8560e4d 100644 --- a/drivers/tty/serial/8250/8250_dw.c +++ b/drivers/tty/serial/8250/8250_dw.c @@ -59,6 +59,8 @@ struct dw8250_data { u8 usr_reg; int last_mcr; int line; + int msr_mask_on; + int msr_mask_off; struct clk *clk; struct clk *pclk; struct reset_control *rst; @@ -81,6 +83,12 @@ static inline int dw8250_modify_msr(struct uart_port *p, int offset, int value) value &= ~UART_MSR_DCTS; } + /* Override any modem control signals if needed */ + if (offset == UART_MSR) { + value |= d->msr_mask_on; + value &= ~d->msr_mask_off; + } + return value; } @@ -111,7 +119,10 @@ static void dw8250_serial_out(struct uart_port *p, int offset, int value) dw8250_force_idle(p); writeb(value, p->membase + (UART_LCR << p->regshift)); } - dev_err(p->dev, "Couldn't set LCR to %d\n", value); + /* + * FIXME: this deadlocks if port->lock is already held + * dev_err(p->dev, "Couldn't set LCR to %d\n", value); + */ } } @@ -155,7 +166,10 @@ static void dw8250_serial_outq(struct uart_port *p, int offset, int value) __raw_writeq(value & 0xff, p->membase + (UART_LCR << p->regshift)); } - dev_err(p->dev, "Couldn't set LCR to %d\n", value); + /* + * FIXME: this deadlocks if port->lock is already held + * dev_err(p->dev, "Couldn't set LCR to %d\n", value); + */ } } #endif /* CONFIG_64BIT */ @@ -179,7 +193,10 @@ static void dw8250_serial_out32(struct uart_port *p, int offset, int value) dw8250_force_idle(p); writel(value, p->membase + (UART_LCR << p->regshift)); } - dev_err(p->dev, "Couldn't set LCR to %d\n", value); + /* + * FIXME: this deadlocks if port->lock is already held + * dev_err(p->dev, "Couldn't set LCR to %d\n", value); + */ } } @@ -334,6 +351,30 @@ static int dw8250_probe_of(struct uart_port *p, if (id >= 0) p->line = id; + if (of_property_read_bool(np, "dcd-override")) { + /* Always report DCD as active */ + data->msr_mask_on |= UART_MSR_DCD; + data->msr_mask_off |= UART_MSR_DDCD; + } + + if (of_property_read_bool(np, "dsr-override")) { + /* Always report DSR as active */ + data->msr_mask_on |= UART_MSR_DSR; + data->msr_mask_off |= UART_MSR_DDSR; + } + + if (of_property_read_bool(np, "cts-override")) { + /* Always report DSR as active */ + data->msr_mask_on |= UART_MSR_DSR; + data->msr_mask_off |= UART_MSR_DDSR; + } + + if (of_property_read_bool(np, "ri-override")) { + /* Always report Ring indicator as inactive */ + data->msr_mask_off |= UART_MSR_RI; + data->msr_mask_off |= UART_MSR_TERI; + } + /* clock got configured through clk api, all done */ if (p->uartclk) return 0; diff --git a/drivers/tty/serial/8250/8250_pci.c b/drivers/tty/serial/8250/8250_pci.c index daf2c82984e9..892eb32cdef4 100644 --- a/drivers/tty/serial/8250/8250_pci.c +++ b/drivers/tty/serial/8250/8250_pci.c @@ -69,7 +69,7 @@ static void moan_device(const char *str, struct pci_dev *dev) "Please send the output of lspci -vv, this\n" "message (0x%04x,0x%04x,0x%04x,0x%04x), the\n" "manufacturer and name of serial board or\n" - "modem board to rmk+serial@arm.linux.org.uk.\n", + "modem board to <linux-serial@vger.kernel.org>.\n", pci_name(dev), str, dev->vendor, dev->device, dev->subsystem_vendor, dev->subsystem_device); } @@ -1989,13 +1989,6 @@ static struct pci_serial_quirk pci_serial_quirks[] __refdata = { }, { .vendor = PCI_VENDOR_ID_INTEL, - .device = PCI_DEVICE_ID_INTEL_QRK_UART, - .subvendor = PCI_ANY_ID, - .subdevice = PCI_ANY_ID, - .setup = pci_default_setup, - }, - { - .vendor = PCI_VENDOR_ID_INTEL, .device = PCI_DEVICE_ID_INTEL_BSW_UART1, .subvendor = PCI_ANY_ID, .subdevice = PCI_ANY_ID, @@ -2201,13 +2194,6 @@ static struct pci_serial_quirk pci_serial_quirks[] __refdata = { */ { .vendor = PCI_VENDOR_ID_PLX, - .device = PCI_DEVICE_ID_PLX_9030, - .subvendor = PCI_SUBVENDOR_ID_PERLE, - .subdevice = PCI_ANY_ID, - .setup = pci_default_setup, - }, - { - .vendor = PCI_VENDOR_ID_PLX, .device = PCI_DEVICE_ID_PLX_9050, .subvendor = PCI_SUBVENDOR_ID_EXSYS, .subdevice = PCI_SUBDEVICE_ID_EXSYS_4055, @@ -5415,10 +5401,6 @@ static struct pci_device_id serial_pci_tbl[] = { PCI_ANY_ID, PCI_ANY_ID, 0, 0, pbn_b0_bt_2_115200 }, - { PCI_VENDOR_ID_WCH, PCI_DEVICE_ID_WCH_CH352_2S, - PCI_ANY_ID, PCI_ANY_ID, - 0, 0, pbn_b0_bt_2_115200 }, - { PCIE_VENDOR_ID_WCH, PCIE_DEVICE_ID_WCH_CH384_4S, PCI_ANY_ID, PCI_ANY_ID, 0, 0, pbn_wch384_4 }, diff --git a/drivers/tty/serial/atmel_serial.c b/drivers/tty/serial/atmel_serial.c index 846552bff67d..4e959c43f680 100644 --- a/drivers/tty/serial/atmel_serial.c +++ b/drivers/tty/serial/atmel_serial.c @@ -47,6 +47,7 @@ #include <linux/gpio/consumer.h> #include <linux/err.h> #include <linux/irq.h> +#include <linux/suspend.h> #include <asm/io.h> #include <asm/ioctls.h> @@ -173,6 +174,12 @@ struct atmel_uart_port { bool ms_irq_enabled; bool is_usart; /* usart or uart */ struct timer_list uart_timer; /* uart timer */ + + bool suspended; + unsigned int pending; + unsigned int pending_status; + spinlock_t lock_suspended; + int (*prepare_rx)(struct uart_port *port); int (*prepare_tx)(struct uart_port *port); void (*schedule_rx)(struct uart_port *port); @@ -1179,12 +1186,15 @@ static irqreturn_t atmel_interrupt(int irq, void *dev_id) { struct uart_port *port = dev_id; struct atmel_uart_port *atmel_port = to_atmel_uart_port(port); - unsigned int status, pending, pass_counter = 0; + unsigned int status, pending, mask, pass_counter = 0; bool gpio_handled = false; + spin_lock(&atmel_port->lock_suspended); + do { status = atmel_get_lines_status(port); - pending = status & UART_GET_IMR(port); + mask = UART_GET_IMR(port); + pending = status & mask; if (!gpio_handled) { /* * Dealing with GPIO interrupt @@ -1206,11 +1216,21 @@ static irqreturn_t atmel_interrupt(int irq, void *dev_id) if (!pending) break; + if (atmel_port->suspended) { + atmel_port->pending |= pending; + atmel_port->pending_status = status; + UART_PUT_IDR(port, mask); + pm_system_wakeup(); + break; + } + atmel_handle_receive(port, pending); atmel_handle_status(port, pending, status); atmel_handle_transmit(port, pending); } while (pass_counter++ < ATMEL_ISR_PASS_LIMIT); + spin_unlock(&atmel_port->lock_suspended); + return pass_counter ? IRQ_HANDLED : IRQ_NONE; } @@ -1742,7 +1762,8 @@ static int atmel_startup(struct uart_port *port) /* * Allocate the IRQ */ - retval = request_irq(port->irq, atmel_interrupt, IRQF_SHARED, + retval = request_irq(port->irq, atmel_interrupt, + IRQF_SHARED | IRQF_COND_SUSPEND, tty ? tty->name : "atmel_serial", port); if (retval) { dev_err(port->dev, "atmel_startup - Can't get irq\n"); @@ -2513,8 +2534,14 @@ static int atmel_serial_suspend(struct platform_device *pdev, /* we can not wake up if we're running on slow clock */ atmel_port->may_wakeup = device_may_wakeup(&pdev->dev); - if (atmel_serial_clk_will_stop()) + if (atmel_serial_clk_will_stop()) { + unsigned long flags; + + spin_lock_irqsave(&atmel_port->lock_suspended, flags); + atmel_port->suspended = true; + spin_unlock_irqrestore(&atmel_port->lock_suspended, flags); device_set_wakeup_enable(&pdev->dev, 0); + } uart_suspend_port(&atmel_uart, port); @@ -2525,6 +2552,18 @@ static int atmel_serial_resume(struct platform_device *pdev) { struct uart_port *port = platform_get_drvdata(pdev); struct atmel_uart_port *atmel_port = to_atmel_uart_port(port); + unsigned long flags; + + spin_lock_irqsave(&atmel_port->lock_suspended, flags); + if (atmel_port->pending) { + atmel_handle_receive(port, atmel_port->pending); + atmel_handle_status(port, atmel_port->pending, + atmel_port->pending_status); + atmel_handle_transmit(port, atmel_port->pending); + atmel_port->pending = 0; + } + atmel_port->suspended = false; + spin_unlock_irqrestore(&atmel_port->lock_suspended, flags); uart_resume_port(&atmel_uart, port); device_set_wakeup_enable(&pdev->dev, atmel_port->may_wakeup); @@ -2593,6 +2632,8 @@ static int atmel_serial_probe(struct platform_device *pdev) port->backup_imr = 0; port->uart.line = ret; + spin_lock_init(&port->lock_suspended); + ret = atmel_init_gpios(port, &pdev->dev); if (ret < 0) dev_err(&pdev->dev, "%s", diff --git a/drivers/tty/serial/fsl_lpuart.c b/drivers/tty/serial/fsl_lpuart.c index b1893f3f88f1..3ad1458bfeb0 100644 --- a/drivers/tty/serial/fsl_lpuart.c +++ b/drivers/tty/serial/fsl_lpuart.c @@ -921,6 +921,9 @@ static void lpuart_setup_watermark(struct lpuart_port *sport) writeb(val | UARTPFIFO_TXFE | UARTPFIFO_RXFE, sport->port.membase + UARTPFIFO); + /* explicitly clear RDRF */ + readb(sport->port.membase + UARTSR1); + /* flush Tx and Rx FIFO */ writeb(UARTCFIFO_TXFLUSH | UARTCFIFO_RXFLUSH, sport->port.membase + UARTCFIFO); @@ -1076,6 +1079,8 @@ static int lpuart_startup(struct uart_port *port) sport->txfifo_size = 0x1 << (((temp >> UARTPFIFO_TXSIZE_OFF) & UARTPFIFO_FIFOSIZE_MASK) + 1); + sport->port.fifosize = sport->txfifo_size; + sport->rxfifo_size = 0x1 << (((temp >> UARTPFIFO_RXSIZE_OFF) & UARTPFIFO_FIFOSIZE_MASK) + 1); diff --git a/drivers/tty/serial/of_serial.c b/drivers/tty/serial/of_serial.c index 7ff61e24a195..33fb94f78967 100644 --- a/drivers/tty/serial/of_serial.c +++ b/drivers/tty/serial/of_serial.c @@ -133,10 +133,6 @@ static int of_platform_serial_setup(struct platform_device *ofdev, if (of_find_property(np, "no-loopback-test", NULL)) port->flags |= UPF_SKIP_TEST; - ret = of_alias_get_id(np, "serial"); - if (ret >= 0) - port->line = ret; - port->dev = &ofdev->dev; switch (type) { diff --git a/drivers/tty/serial/samsung.c b/drivers/tty/serial/samsung.c index af821a908720..cf08876922f1 100644 --- a/drivers/tty/serial/samsung.c +++ b/drivers/tty/serial/samsung.c @@ -963,6 +963,7 @@ static void s3c24xx_serial_shutdown(struct uart_port *port) free_irq(ourport->tx_irq, ourport); tx_enabled(port) = 0; ourport->tx_claimed = 0; + ourport->tx_mode = 0; } if (ourport->rx_claimed) { diff --git a/drivers/tty/serial/sprd_serial.c b/drivers/tty/serial/sprd_serial.c index 594b63331ef4..bca975f5093b 100644 --- a/drivers/tty/serial/sprd_serial.c +++ b/drivers/tty/serial/sprd_serial.c @@ -293,8 +293,10 @@ static irqreturn_t sprd_handle_irq(int irq, void *dev_id) ims = serial_in(port, SPRD_IMSR); - if (!ims) + if (!ims) { + spin_unlock(&port->lock); return IRQ_NONE; + } serial_out(port, SPRD_ICLR, ~0); diff --git a/drivers/tty/tty_io.c b/drivers/tty/tty_io.c index 51f066aa375e..2bb4dfc02873 100644 --- a/drivers/tty/tty_io.c +++ b/drivers/tty/tty_io.c @@ -1028,8 +1028,8 @@ EXPORT_SYMBOL(start_tty); /* We limit tty time update visibility to every 8 seconds or so. */ static void tty_update_time(struct timespec *time) { - unsigned long sec = get_seconds() & ~7; - if ((long)(sec - time->tv_sec) > 0) + unsigned long sec = get_seconds(); + if (abs(sec - time->tv_sec) & ~7) time->tv_sec = sec; } diff --git a/drivers/tty/tty_ioctl.c b/drivers/tty/tty_ioctl.c index a5cf253b2544..632fc8152061 100644 --- a/drivers/tty/tty_ioctl.c +++ b/drivers/tty/tty_ioctl.c @@ -217,11 +217,17 @@ void tty_wait_until_sent(struct tty_struct *tty, long timeout) #endif if (!timeout) timeout = MAX_SCHEDULE_TIMEOUT; - if (wait_event_interruptible_timeout(tty->write_wait, - !tty_chars_in_buffer(tty), timeout) >= 0) { - if (tty->ops->wait_until_sent) - tty->ops->wait_until_sent(tty, timeout); - } + + timeout = wait_event_interruptible_timeout(tty->write_wait, + !tty_chars_in_buffer(tty), timeout); + if (timeout <= 0) + return; + + if (timeout == MAX_SCHEDULE_TIMEOUT) + timeout = 0; + + if (tty->ops->wait_until_sent) + tty->ops->wait_until_sent(tty, timeout); } EXPORT_SYMBOL(tty_wait_until_sent); diff --git a/drivers/usb/chipidea/udc.c b/drivers/usb/chipidea/udc.c index ff451048c1ac..4bfb7ac0239f 100644 --- a/drivers/usb/chipidea/udc.c +++ b/drivers/usb/chipidea/udc.c @@ -929,6 +929,13 @@ __acquires(hwep->lock) return retval; } +static int otg_a_alt_hnp_support(struct ci_hdrc *ci) +{ + dev_warn(&ci->gadget.dev, + "connect the device to an alternate port if you want HNP\n"); + return isr_setup_status_phase(ci); +} + /** * isr_setup_packet_handler: setup packet handler * @ci: UDC descriptor @@ -1061,6 +1068,10 @@ __acquires(ci->lock) ci); } break; + case USB_DEVICE_A_ALT_HNP_SUPPORT: + if (ci_otg_is_fsm_mode(ci)) + err = otg_a_alt_hnp_support(ci); + break; default: goto delegate; } diff --git a/drivers/usb/class/cdc-acm.c b/drivers/usb/class/cdc-acm.c index e78720b59d67..683617714e7c 100644 --- a/drivers/usb/class/cdc-acm.c +++ b/drivers/usb/class/cdc-acm.c @@ -1650,6 +1650,8 @@ static int acm_reset_resume(struct usb_interface *intf) static const struct usb_device_id acm_ids[] = { /* quirky and broken devices */ + { USB_DEVICE(0x076d, 0x0006), /* Denso Cradle CU-321 */ + .driver_info = NO_UNION_NORMAL, },/* has no union descriptor */ { USB_DEVICE(0x17ef, 0x7000), /* Lenovo USB modem */ .driver_info = NO_UNION_NORMAL, },/* has no union descriptor */ { USB_DEVICE(0x0870, 0x0001), /* Metricom GS Modem */ diff --git a/drivers/usb/common/usb-otg-fsm.c b/drivers/usb/common/usb-otg-fsm.c index c6b35b77dab7..61d538aa2346 100644 --- a/drivers/usb/common/usb-otg-fsm.c +++ b/drivers/usb/common/usb-otg-fsm.c @@ -150,9 +150,9 @@ static int otg_set_state(struct otg_fsm *fsm, enum usb_otg_state new_state) break; case OTG_STATE_B_PERIPHERAL: otg_chrg_vbus(fsm, 0); - otg_loc_conn(fsm, 1); otg_loc_sof(fsm, 0); otg_set_protocol(fsm, PROTO_GADGET); + otg_loc_conn(fsm, 1); break; case OTG_STATE_B_WAIT_ACON: otg_chrg_vbus(fsm, 0); @@ -213,10 +213,10 @@ static int otg_set_state(struct otg_fsm *fsm, enum usb_otg_state new_state) break; case OTG_STATE_A_PERIPHERAL: - otg_loc_conn(fsm, 1); otg_loc_sof(fsm, 0); otg_set_protocol(fsm, PROTO_GADGET); otg_drv_vbus(fsm, 1); + otg_loc_conn(fsm, 1); otg_add_timer(fsm, A_BIDL_ADIS); break; case OTG_STATE_A_WAIT_VFALL: diff --git a/drivers/usb/core/devio.c b/drivers/usb/core/devio.c index 66abdbcfbfa5..11635537c052 100644 --- a/drivers/usb/core/devio.c +++ b/drivers/usb/core/devio.c @@ -501,6 +501,7 @@ static void async_completed(struct urb *urb) as->status = urb->status; signr = as->signr; if (signr) { + memset(&sinfo, 0, sizeof(sinfo)); sinfo.si_signo = as->signr; sinfo.si_errno = as->status; sinfo.si_code = SI_ASYNCIO; @@ -2382,6 +2383,7 @@ static void usbdev_remove(struct usb_device *udev) wake_up_all(&ps->wait); list_del_init(&ps->list); if (ps->discsignr) { + memset(&sinfo, 0, sizeof(sinfo)); sinfo.si_signo = ps->discsignr; sinfo.si_errno = EPIPE; sinfo.si_code = SI_ASYNCIO; diff --git a/drivers/usb/dwc2/core_intr.c b/drivers/usb/dwc2/core_intr.c index 02e3e2d4ea56..6cf047878dba 100644 --- a/drivers/usb/dwc2/core_intr.c +++ b/drivers/usb/dwc2/core_intr.c @@ -377,6 +377,9 @@ static void dwc2_handle_disconnect_intr(struct dwc2_hsotg *hsotg) dwc2_is_host_mode(hsotg) ? "Host" : "Device", dwc2_op_state_str(hsotg)); + if (hsotg->op_state == OTG_STATE_A_HOST) + dwc2_hcd_disconnect(hsotg); + /* Change to L3 (OFF) state */ hsotg->lx_state = DWC2_L3; diff --git a/drivers/usb/dwc3/dwc3-omap.c b/drivers/usb/dwc3/dwc3-omap.c index 172d64e585b6..52e0c4e5e48e 100644 --- a/drivers/usb/dwc3/dwc3-omap.c +++ b/drivers/usb/dwc3/dwc3-omap.c @@ -205,6 +205,18 @@ static void dwc3_omap_write_irq0_set(struct dwc3_omap *omap, u32 value) omap->irq0_offset, value); } +static void dwc3_omap_write_irqmisc_clr(struct dwc3_omap *omap, u32 value) +{ + dwc3_omap_writel(omap->base, USBOTGSS_IRQENABLE_CLR_MISC + + omap->irqmisc_offset, value); +} + +static void dwc3_omap_write_irq0_clr(struct dwc3_omap *omap, u32 value) +{ + dwc3_omap_writel(omap->base, USBOTGSS_IRQENABLE_CLR_0 - + omap->irq0_offset, value); +} + static void dwc3_omap_set_mailbox(struct dwc3_omap *omap, enum omap_dwc3_vbus_id_status status) { @@ -345,9 +357,23 @@ static void dwc3_omap_enable_irqs(struct dwc3_omap *omap) static void dwc3_omap_disable_irqs(struct dwc3_omap *omap) { + u32 reg; + /* disable all IRQs */ - dwc3_omap_write_irqmisc_set(omap, 0x00); - dwc3_omap_write_irq0_set(omap, 0x00); + reg = USBOTGSS_IRQO_COREIRQ_ST; + dwc3_omap_write_irq0_clr(omap, reg); + + reg = (USBOTGSS_IRQMISC_OEVT | + USBOTGSS_IRQMISC_DRVVBUS_RISE | + USBOTGSS_IRQMISC_CHRGVBUS_RISE | + USBOTGSS_IRQMISC_DISCHRGVBUS_RISE | + USBOTGSS_IRQMISC_IDPULLUP_RISE | + USBOTGSS_IRQMISC_DRVVBUS_FALL | + USBOTGSS_IRQMISC_CHRGVBUS_FALL | + USBOTGSS_IRQMISC_DISCHRGVBUS_FALL | + USBOTGSS_IRQMISC_IDPULLUP_FALL); + + dwc3_omap_write_irqmisc_clr(omap, reg); } static u64 dwc3_omap_dma_mask = DMA_BIT_MASK(32); diff --git a/drivers/usb/gadget/configfs.c b/drivers/usb/gadget/configfs.c index 75648145dc1b..c42765b3a060 100644 --- a/drivers/usb/gadget/configfs.c +++ b/drivers/usb/gadget/configfs.c @@ -1161,7 +1161,6 @@ static ssize_t interf_grp_compatible_id_store(struct usb_os_desc *desc, if (desc->opts_mutex) mutex_lock(desc->opts_mutex); memcpy(desc->ext_compat_id, page, l); - desc->ext_compat_id[l] = '\0'; if (desc->opts_mutex) mutex_unlock(desc->opts_mutex); @@ -1192,7 +1191,6 @@ static ssize_t interf_grp_sub_compatible_id_store(struct usb_os_desc *desc, if (desc->opts_mutex) mutex_lock(desc->opts_mutex); memcpy(desc->ext_compat_id + 8, page, l); - desc->ext_compat_id[l + 8] = '\0'; if (desc->opts_mutex) mutex_unlock(desc->opts_mutex); diff --git a/drivers/usb/gadget/function/f_fs.c b/drivers/usb/gadget/function/f_fs.c index af98b096af2f..175c9956cbe3 100644 --- a/drivers/usb/gadget/function/f_fs.c +++ b/drivers/usb/gadget/function/f_fs.c @@ -144,10 +144,9 @@ struct ffs_io_data { bool read; struct kiocb *kiocb; - const struct iovec *iovec; - unsigned long nr_segs; - char __user *buf; - size_t len; + struct iov_iter data; + const void *to_free; + char *buf; struct mm_struct *mm; struct work_struct work; @@ -649,29 +648,10 @@ static void ffs_user_copy_worker(struct work_struct *work) io_data->req->actual; if (io_data->read && ret > 0) { - int i; - size_t pos = 0; - - /* - * Since req->length may be bigger than io_data->len (after - * being rounded up to maxpacketsize), we may end up with more - * data then user space has space for. - */ - ret = min_t(int, ret, io_data->len); - use_mm(io_data->mm); - for (i = 0; i < io_data->nr_segs; i++) { - size_t len = min_t(size_t, ret - pos, - io_data->iovec[i].iov_len); - if (!len) - break; - if (unlikely(copy_to_user(io_data->iovec[i].iov_base, - &io_data->buf[pos], len))) { - ret = -EFAULT; - break; - } - pos += len; - } + ret = copy_to_iter(io_data->buf, ret, &io_data->data); + if (iov_iter_count(&io_data->data)) + ret = -EFAULT; unuse_mm(io_data->mm); } @@ -684,7 +664,7 @@ static void ffs_user_copy_worker(struct work_struct *work) io_data->kiocb->private = NULL; if (io_data->read) - kfree(io_data->iovec); + kfree(io_data->to_free); kfree(io_data->buf); kfree(io_data); } @@ -743,6 +723,7 @@ static ssize_t ffs_epfile_io(struct file *file, struct ffs_io_data *io_data) * before the waiting completes, so do not assign to 'gadget' earlier */ struct usb_gadget *gadget = epfile->ffs->gadget; + size_t copied; spin_lock_irq(&epfile->ffs->eps_lock); /* In the meantime, endpoint got disabled or changed. */ @@ -750,34 +731,21 @@ static ssize_t ffs_epfile_io(struct file *file, struct ffs_io_data *io_data) spin_unlock_irq(&epfile->ffs->eps_lock); return -ESHUTDOWN; } + data_len = iov_iter_count(&io_data->data); /* * Controller may require buffer size to be aligned to * maxpacketsize of an out endpoint. */ - data_len = io_data->read ? - usb_ep_align_maybe(gadget, ep->ep, io_data->len) : - io_data->len; + if (io_data->read) + data_len = usb_ep_align_maybe(gadget, ep->ep, data_len); spin_unlock_irq(&epfile->ffs->eps_lock); data = kmalloc(data_len, GFP_KERNEL); if (unlikely(!data)) return -ENOMEM; - if (io_data->aio && !io_data->read) { - int i; - size_t pos = 0; - for (i = 0; i < io_data->nr_segs; i++) { - if (unlikely(copy_from_user(&data[pos], - io_data->iovec[i].iov_base, - io_data->iovec[i].iov_len))) { - ret = -EFAULT; - goto error; - } - pos += io_data->iovec[i].iov_len; - } - } else { - if (!io_data->read && - unlikely(__copy_from_user(data, io_data->buf, - io_data->len))) { + if (!io_data->read) { + copied = copy_from_iter(data, data_len, &io_data->data); + if (copied != data_len) { ret = -EFAULT; goto error; } @@ -876,10 +844,8 @@ static ssize_t ffs_epfile_io(struct file *file, struct ffs_io_data *io_data) */ ret = ep->status; if (io_data->read && ret > 0) { - ret = min_t(size_t, ret, io_data->len); - - if (unlikely(copy_to_user(io_data->buf, - data, ret))) + ret = copy_to_iter(data, ret, &io_data->data); + if (unlikely(iov_iter_count(&io_data->data))) ret = -EFAULT; } } @@ -898,37 +864,6 @@ error: return ret; } -static ssize_t -ffs_epfile_write(struct file *file, const char __user *buf, size_t len, - loff_t *ptr) -{ - struct ffs_io_data io_data; - - ENTER(); - - io_data.aio = false; - io_data.read = false; - io_data.buf = (char * __user)buf; - io_data.len = len; - - return ffs_epfile_io(file, &io_data); -} - -static ssize_t -ffs_epfile_read(struct file *file, char __user *buf, size_t len, loff_t *ptr) -{ - struct ffs_io_data io_data; - - ENTER(); - - io_data.aio = false; - io_data.read = true; - io_data.buf = buf; - io_data.len = len; - - return ffs_epfile_io(file, &io_data); -} - static int ffs_epfile_open(struct inode *inode, struct file *file) { @@ -965,67 +900,86 @@ static int ffs_aio_cancel(struct kiocb *kiocb) return value; } -static ssize_t ffs_epfile_aio_write(struct kiocb *kiocb, - const struct iovec *iovec, - unsigned long nr_segs, loff_t loff) +static ssize_t ffs_epfile_write_iter(struct kiocb *kiocb, struct iov_iter *from) { - struct ffs_io_data *io_data; + struct ffs_io_data io_data, *p = &io_data; + ssize_t res; ENTER(); - io_data = kmalloc(sizeof(*io_data), GFP_KERNEL); - if (unlikely(!io_data)) - return -ENOMEM; + if (!is_sync_kiocb(kiocb)) { + p = kmalloc(sizeof(io_data), GFP_KERNEL); + if (unlikely(!p)) + return -ENOMEM; + p->aio = true; + } else { + p->aio = false; + } - io_data->aio = true; - io_data->read = false; - io_data->kiocb = kiocb; - io_data->iovec = iovec; - io_data->nr_segs = nr_segs; - io_data->len = kiocb->ki_nbytes; - io_data->mm = current->mm; + p->read = false; + p->kiocb = kiocb; + p->data = *from; + p->mm = current->mm; - kiocb->private = io_data; + kiocb->private = p; kiocb_set_cancel_fn(kiocb, ffs_aio_cancel); - return ffs_epfile_io(kiocb->ki_filp, io_data); + res = ffs_epfile_io(kiocb->ki_filp, p); + if (res == -EIOCBQUEUED) + return res; + if (p->aio) + kfree(p); + else + *from = p->data; + return res; } -static ssize_t ffs_epfile_aio_read(struct kiocb *kiocb, - const struct iovec *iovec, - unsigned long nr_segs, loff_t loff) +static ssize_t ffs_epfile_read_iter(struct kiocb *kiocb, struct iov_iter *to) { - struct ffs_io_data *io_data; - struct iovec *iovec_copy; + struct ffs_io_data io_data, *p = &io_data; + ssize_t res; ENTER(); - iovec_copy = kmalloc_array(nr_segs, sizeof(*iovec_copy), GFP_KERNEL); - if (unlikely(!iovec_copy)) - return -ENOMEM; - - memcpy(iovec_copy, iovec, sizeof(struct iovec)*nr_segs); - - io_data = kmalloc(sizeof(*io_data), GFP_KERNEL); - if (unlikely(!io_data)) { - kfree(iovec_copy); - return -ENOMEM; + if (!is_sync_kiocb(kiocb)) { + p = kmalloc(sizeof(io_data), GFP_KERNEL); + if (unlikely(!p)) + return -ENOMEM; + p->aio = true; + } else { + p->aio = false; } - io_data->aio = true; - io_data->read = true; - io_data->kiocb = kiocb; - io_data->iovec = iovec_copy; - io_data->nr_segs = nr_segs; - io_data->len = kiocb->ki_nbytes; - io_data->mm = current->mm; + p->read = true; + p->kiocb = kiocb; + if (p->aio) { + p->to_free = dup_iter(&p->data, to, GFP_KERNEL); + if (!p->to_free) { + kfree(p); + return -ENOMEM; + } + } else { + p->data = *to; + p->to_free = NULL; + } + p->mm = current->mm; - kiocb->private = io_data; + kiocb->private = p; kiocb_set_cancel_fn(kiocb, ffs_aio_cancel); - return ffs_epfile_io(kiocb->ki_filp, io_data); + res = ffs_epfile_io(kiocb->ki_filp, p); + if (res == -EIOCBQUEUED) + return res; + + if (p->aio) { + kfree(p->to_free); + kfree(p); + } else { + *to = p->data; + } + return res; } static int @@ -1105,10 +1059,10 @@ static const struct file_operations ffs_epfile_operations = { .llseek = no_llseek, .open = ffs_epfile_open, - .write = ffs_epfile_write, - .read = ffs_epfile_read, - .aio_write = ffs_epfile_aio_write, - .aio_read = ffs_epfile_aio_read, + .write = new_sync_write, + .read = new_sync_read, + .write_iter = ffs_epfile_write_iter, + .read_iter = ffs_epfile_read_iter, .release = ffs_epfile_release, .unlocked_ioctl = ffs_epfile_ioctl, }; diff --git a/drivers/usb/gadget/function/f_hid.c b/drivers/usb/gadget/function/f_hid.c index 426d69a9c018..a2612fb79eff 100644 --- a/drivers/usb/gadget/function/f_hid.c +++ b/drivers/usb/gadget/function/f_hid.c @@ -569,7 +569,7 @@ fail: return status; } -const struct file_operations f_hidg_fops = { +static const struct file_operations f_hidg_fops = { .owner = THIS_MODULE, .open = f_hidg_open, .release = f_hidg_release, diff --git a/drivers/usb/gadget/function/f_loopback.c b/drivers/usb/gadget/function/f_loopback.c index 298b46112b1a..39f49f1ad22f 100644 --- a/drivers/usb/gadget/function/f_loopback.c +++ b/drivers/usb/gadget/function/f_loopback.c @@ -289,8 +289,7 @@ static void disable_loopback(struct f_loopback *loop) struct usb_composite_dev *cdev; cdev = loop->function.config->cdev; - disable_endpoints(cdev, loop->in_ep, loop->out_ep, NULL, NULL, NULL, - NULL); + disable_endpoints(cdev, loop->in_ep, loop->out_ep, NULL, NULL); VDBG(cdev, "%s disabled\n", loop->function.name); } diff --git a/drivers/usb/gadget/function/f_phonet.c b/drivers/usb/gadget/function/f_phonet.c index c89e96cfa3e4..c0c3ef272714 100644 --- a/drivers/usb/gadget/function/f_phonet.c +++ b/drivers/usb/gadget/function/f_phonet.c @@ -417,7 +417,10 @@ static int pn_set_alt(struct usb_function *f, unsigned intf, unsigned alt) return -EINVAL; spin_lock(&port->lock); - __pn_reset(f); + + if (fp->in_ep->driver_data) + __pn_reset(f); + if (alt == 1) { int i; diff --git a/drivers/usb/gadget/function/f_sourcesink.c b/drivers/usb/gadget/function/f_sourcesink.c index e07c50ced64d..3a5ae9900b1e 100644 --- a/drivers/usb/gadget/function/f_sourcesink.c +++ b/drivers/usb/gadget/function/f_sourcesink.c @@ -23,15 +23,6 @@ #include "gadget_chips.h" #include "u_f.h" -#define USB_MS_TO_SS_INTERVAL(x) USB_MS_TO_HS_INTERVAL(x) - -enum eptype { - EP_CONTROL = 0, - EP_BULK, - EP_ISOC, - EP_INTERRUPT, -}; - /* * SOURCE/SINK FUNCTION ... a primary testing vehicle for USB peripheral * controller drivers. @@ -64,8 +55,6 @@ struct f_sourcesink { struct usb_ep *out_ep; struct usb_ep *iso_in_ep; struct usb_ep *iso_out_ep; - struct usb_ep *int_in_ep; - struct usb_ep *int_out_ep; int cur_alt; }; @@ -79,10 +68,6 @@ static unsigned isoc_interval; static unsigned isoc_maxpacket; static unsigned isoc_mult; static unsigned isoc_maxburst; -static unsigned int_interval; /* In ms */ -static unsigned int_maxpacket; -static unsigned int_mult; -static unsigned int_maxburst; static unsigned buflen; /*-------------------------------------------------------------------------*/ @@ -107,16 +92,6 @@ static struct usb_interface_descriptor source_sink_intf_alt1 = { /* .iInterface = DYNAMIC */ }; -static struct usb_interface_descriptor source_sink_intf_alt2 = { - .bLength = USB_DT_INTERFACE_SIZE, - .bDescriptorType = USB_DT_INTERFACE, - - .bAlternateSetting = 2, - .bNumEndpoints = 2, - .bInterfaceClass = USB_CLASS_VENDOR_SPEC, - /* .iInterface = DYNAMIC */ -}; - /* full speed support: */ static struct usb_endpoint_descriptor fs_source_desc = { @@ -155,26 +130,6 @@ static struct usb_endpoint_descriptor fs_iso_sink_desc = { .bInterval = 4, }; -static struct usb_endpoint_descriptor fs_int_source_desc = { - .bLength = USB_DT_ENDPOINT_SIZE, - .bDescriptorType = USB_DT_ENDPOINT, - - .bEndpointAddress = USB_DIR_IN, - .bmAttributes = USB_ENDPOINT_XFER_INT, - .wMaxPacketSize = cpu_to_le16(64), - .bInterval = GZERO_INT_INTERVAL, -}; - -static struct usb_endpoint_descriptor fs_int_sink_desc = { - .bLength = USB_DT_ENDPOINT_SIZE, - .bDescriptorType = USB_DT_ENDPOINT, - - .bEndpointAddress = USB_DIR_OUT, - .bmAttributes = USB_ENDPOINT_XFER_INT, - .wMaxPacketSize = cpu_to_le16(64), - .bInterval = GZERO_INT_INTERVAL, -}; - static struct usb_descriptor_header *fs_source_sink_descs[] = { (struct usb_descriptor_header *) &source_sink_intf_alt0, (struct usb_descriptor_header *) &fs_sink_desc, @@ -185,10 +140,6 @@ static struct usb_descriptor_header *fs_source_sink_descs[] = { (struct usb_descriptor_header *) &fs_source_desc, (struct usb_descriptor_header *) &fs_iso_sink_desc, (struct usb_descriptor_header *) &fs_iso_source_desc, - (struct usb_descriptor_header *) &source_sink_intf_alt2, -#define FS_ALT_IFC_2_OFFSET 8 - (struct usb_descriptor_header *) &fs_int_sink_desc, - (struct usb_descriptor_header *) &fs_int_source_desc, NULL, }; @@ -228,24 +179,6 @@ static struct usb_endpoint_descriptor hs_iso_sink_desc = { .bInterval = 4, }; -static struct usb_endpoint_descriptor hs_int_source_desc = { - .bLength = USB_DT_ENDPOINT_SIZE, - .bDescriptorType = USB_DT_ENDPOINT, - - .bmAttributes = USB_ENDPOINT_XFER_INT, - .wMaxPacketSize = cpu_to_le16(1024), - .bInterval = USB_MS_TO_HS_INTERVAL(GZERO_INT_INTERVAL), -}; - -static struct usb_endpoint_descriptor hs_int_sink_desc = { - .bLength = USB_DT_ENDPOINT_SIZE, - .bDescriptorType = USB_DT_ENDPOINT, - - .bmAttributes = USB_ENDPOINT_XFER_INT, - .wMaxPacketSize = cpu_to_le16(1024), - .bInterval = USB_MS_TO_HS_INTERVAL(GZERO_INT_INTERVAL), -}; - static struct usb_descriptor_header *hs_source_sink_descs[] = { (struct usb_descriptor_header *) &source_sink_intf_alt0, (struct usb_descriptor_header *) &hs_source_desc, @@ -256,10 +189,6 @@ static struct usb_descriptor_header *hs_source_sink_descs[] = { (struct usb_descriptor_header *) &hs_sink_desc, (struct usb_descriptor_header *) &hs_iso_source_desc, (struct usb_descriptor_header *) &hs_iso_sink_desc, - (struct usb_descriptor_header *) &source_sink_intf_alt2, -#define HS_ALT_IFC_2_OFFSET 8 - (struct usb_descriptor_header *) &hs_int_source_desc, - (struct usb_descriptor_header *) &hs_int_sink_desc, NULL, }; @@ -335,42 +264,6 @@ static struct usb_ss_ep_comp_descriptor ss_iso_sink_comp_desc = { .wBytesPerInterval = cpu_to_le16(1024), }; -static struct usb_endpoint_descriptor ss_int_source_desc = { - .bLength = USB_DT_ENDPOINT_SIZE, - .bDescriptorType = USB_DT_ENDPOINT, - - .bmAttributes = USB_ENDPOINT_XFER_INT, - .wMaxPacketSize = cpu_to_le16(1024), - .bInterval = USB_MS_TO_SS_INTERVAL(GZERO_INT_INTERVAL), -}; - -struct usb_ss_ep_comp_descriptor ss_int_source_comp_desc = { - .bLength = USB_DT_SS_EP_COMP_SIZE, - .bDescriptorType = USB_DT_SS_ENDPOINT_COMP, - - .bMaxBurst = 0, - .bmAttributes = 0, - .wBytesPerInterval = cpu_to_le16(1024), -}; - -static struct usb_endpoint_descriptor ss_int_sink_desc = { - .bLength = USB_DT_ENDPOINT_SIZE, - .bDescriptorType = USB_DT_ENDPOINT, - - .bmAttributes = USB_ENDPOINT_XFER_INT, - .wMaxPacketSize = cpu_to_le16(1024), - .bInterval = USB_MS_TO_SS_INTERVAL(GZERO_INT_INTERVAL), -}; - -struct usb_ss_ep_comp_descriptor ss_int_sink_comp_desc = { - .bLength = USB_DT_SS_EP_COMP_SIZE, - .bDescriptorType = USB_DT_SS_ENDPOINT_COMP, - - .bMaxBurst = 0, - .bmAttributes = 0, - .wBytesPerInterval = cpu_to_le16(1024), -}; - static struct usb_descriptor_header *ss_source_sink_descs[] = { (struct usb_descriptor_header *) &source_sink_intf_alt0, (struct usb_descriptor_header *) &ss_source_desc, @@ -387,12 +280,6 @@ static struct usb_descriptor_header *ss_source_sink_descs[] = { (struct usb_descriptor_header *) &ss_iso_source_comp_desc, (struct usb_descriptor_header *) &ss_iso_sink_desc, (struct usb_descriptor_header *) &ss_iso_sink_comp_desc, - (struct usb_descriptor_header *) &source_sink_intf_alt2, -#define SS_ALT_IFC_2_OFFSET 14 - (struct usb_descriptor_header *) &ss_int_source_desc, - (struct usb_descriptor_header *) &ss_int_source_comp_desc, - (struct usb_descriptor_header *) &ss_int_sink_desc, - (struct usb_descriptor_header *) &ss_int_sink_comp_desc, NULL, }; @@ -414,21 +301,6 @@ static struct usb_gadget_strings *sourcesink_strings[] = { }; /*-------------------------------------------------------------------------*/ -static const char *get_ep_string(enum eptype ep_type) -{ - switch (ep_type) { - case EP_ISOC: - return "ISOC-"; - case EP_INTERRUPT: - return "INTERRUPT-"; - case EP_CONTROL: - return "CTRL-"; - case EP_BULK: - return "BULK-"; - default: - return "UNKNOWN-"; - } -} static inline struct usb_request *ss_alloc_ep_req(struct usb_ep *ep, int len) { @@ -456,8 +328,7 @@ static void disable_ep(struct usb_composite_dev *cdev, struct usb_ep *ep) void disable_endpoints(struct usb_composite_dev *cdev, struct usb_ep *in, struct usb_ep *out, - struct usb_ep *iso_in, struct usb_ep *iso_out, - struct usb_ep *int_in, struct usb_ep *int_out) + struct usb_ep *iso_in, struct usb_ep *iso_out) { disable_ep(cdev, in); disable_ep(cdev, out); @@ -465,10 +336,6 @@ void disable_endpoints(struct usb_composite_dev *cdev, disable_ep(cdev, iso_in); if (iso_out) disable_ep(cdev, iso_out); - if (int_in) - disable_ep(cdev, int_in); - if (int_out) - disable_ep(cdev, int_out); } static int @@ -485,7 +352,6 @@ sourcesink_bind(struct usb_configuration *c, struct usb_function *f) return id; source_sink_intf_alt0.bInterfaceNumber = id; source_sink_intf_alt1.bInterfaceNumber = id; - source_sink_intf_alt2.bInterfaceNumber = id; /* allocate bulk endpoints */ ss->in_ep = usb_ep_autoconfig(cdev->gadget, &fs_source_desc); @@ -546,55 +412,14 @@ no_iso: if (isoc_maxpacket > 1024) isoc_maxpacket = 1024; - /* sanity check the interrupt module parameters */ - if (int_interval < 1) - int_interval = 1; - if (int_interval > 4096) - int_interval = 4096; - if (int_mult > 2) - int_mult = 2; - if (int_maxburst > 15) - int_maxburst = 15; - - /* fill in the FS interrupt descriptors from the module parameters */ - fs_int_source_desc.wMaxPacketSize = int_maxpacket > 64 ? - 64 : int_maxpacket; - fs_int_source_desc.bInterval = int_interval > 255 ? - 255 : int_interval; - fs_int_sink_desc.wMaxPacketSize = int_maxpacket > 64 ? - 64 : int_maxpacket; - fs_int_sink_desc.bInterval = int_interval > 255 ? - 255 : int_interval; - - /* allocate int endpoints */ - ss->int_in_ep = usb_ep_autoconfig(cdev->gadget, &fs_int_source_desc); - if (!ss->int_in_ep) - goto no_int; - ss->int_in_ep->driver_data = cdev; /* claim */ - - ss->int_out_ep = usb_ep_autoconfig(cdev->gadget, &fs_int_sink_desc); - if (ss->int_out_ep) { - ss->int_out_ep->driver_data = cdev; /* claim */ - } else { - ss->int_in_ep->driver_data = NULL; - ss->int_in_ep = NULL; -no_int: - fs_source_sink_descs[FS_ALT_IFC_2_OFFSET] = NULL; - hs_source_sink_descs[HS_ALT_IFC_2_OFFSET] = NULL; - ss_source_sink_descs[SS_ALT_IFC_2_OFFSET] = NULL; - } - - if (int_maxpacket > 1024) - int_maxpacket = 1024; - /* support high speed hardware */ hs_source_desc.bEndpointAddress = fs_source_desc.bEndpointAddress; hs_sink_desc.bEndpointAddress = fs_sink_desc.bEndpointAddress; /* - * Fill in the HS isoc and interrupt descriptors from the module - * parameters. We assume that the user knows what they are doing and - * won't give parameters that their UDC doesn't support. + * Fill in the HS isoc descriptors from the module parameters. + * We assume that the user knows what they are doing and won't + * give parameters that their UDC doesn't support. */ hs_iso_source_desc.wMaxPacketSize = isoc_maxpacket; hs_iso_source_desc.wMaxPacketSize |= isoc_mult << 11; @@ -607,17 +432,6 @@ no_int: hs_iso_sink_desc.bInterval = isoc_interval; hs_iso_sink_desc.bEndpointAddress = fs_iso_sink_desc.bEndpointAddress; - hs_int_source_desc.wMaxPacketSize = int_maxpacket; - hs_int_source_desc.wMaxPacketSize |= int_mult << 11; - hs_int_source_desc.bInterval = USB_MS_TO_HS_INTERVAL(int_interval); - hs_int_source_desc.bEndpointAddress = - fs_int_source_desc.bEndpointAddress; - - hs_int_sink_desc.wMaxPacketSize = int_maxpacket; - hs_int_sink_desc.wMaxPacketSize |= int_mult << 11; - hs_int_sink_desc.bInterval = USB_MS_TO_HS_INTERVAL(int_interval); - hs_int_sink_desc.bEndpointAddress = fs_int_sink_desc.bEndpointAddress; - /* support super speed hardware */ ss_source_desc.bEndpointAddress = fs_source_desc.bEndpointAddress; @@ -625,9 +439,9 @@ no_int: fs_sink_desc.bEndpointAddress; /* - * Fill in the SS isoc and interrupt descriptors from the module - * parameters. We assume that the user knows what they are doing and - * won't give parameters that their UDC doesn't support. + * Fill in the SS isoc descriptors from the module parameters. + * We assume that the user knows what they are doing and won't + * give parameters that their UDC doesn't support. */ ss_iso_source_desc.wMaxPacketSize = isoc_maxpacket; ss_iso_source_desc.bInterval = isoc_interval; @@ -646,37 +460,17 @@ no_int: isoc_maxpacket * (isoc_mult + 1) * (isoc_maxburst + 1); ss_iso_sink_desc.bEndpointAddress = fs_iso_sink_desc.bEndpointAddress; - ss_int_source_desc.wMaxPacketSize = int_maxpacket; - ss_int_source_desc.bInterval = USB_MS_TO_SS_INTERVAL(int_interval); - ss_int_source_comp_desc.bmAttributes = int_mult; - ss_int_source_comp_desc.bMaxBurst = int_maxburst; - ss_int_source_comp_desc.wBytesPerInterval = - int_maxpacket * (int_mult + 1) * (int_maxburst + 1); - ss_int_source_desc.bEndpointAddress = - fs_int_source_desc.bEndpointAddress; - - ss_int_sink_desc.wMaxPacketSize = int_maxpacket; - ss_int_sink_desc.bInterval = USB_MS_TO_SS_INTERVAL(int_interval); - ss_int_sink_comp_desc.bmAttributes = int_mult; - ss_int_sink_comp_desc.bMaxBurst = int_maxburst; - ss_int_sink_comp_desc.wBytesPerInterval = - int_maxpacket * (int_mult + 1) * (int_maxburst + 1); - ss_int_sink_desc.bEndpointAddress = fs_int_sink_desc.bEndpointAddress; - ret = usb_assign_descriptors(f, fs_source_sink_descs, hs_source_sink_descs, ss_source_sink_descs); if (ret) return ret; - DBG(cdev, "%s speed %s: IN/%s, OUT/%s, ISO-IN/%s, ISO-OUT/%s, " - "INT-IN/%s, INT-OUT/%s\n", + DBG(cdev, "%s speed %s: IN/%s, OUT/%s, ISO-IN/%s, ISO-OUT/%s\n", (gadget_is_superspeed(c->cdev->gadget) ? "super" : (gadget_is_dualspeed(c->cdev->gadget) ? "dual" : "full")), f->name, ss->in_ep->name, ss->out_ep->name, ss->iso_in_ep ? ss->iso_in_ep->name : "<none>", - ss->iso_out_ep ? ss->iso_out_ep->name : "<none>", - ss->int_in_ep ? ss->int_in_ep->name : "<none>", - ss->int_out_ep ? ss->int_out_ep->name : "<none>"); + ss->iso_out_ep ? ss->iso_out_ep->name : "<none>"); return 0; } @@ -807,15 +601,14 @@ static void source_sink_complete(struct usb_ep *ep, struct usb_request *req) } static int source_sink_start_ep(struct f_sourcesink *ss, bool is_in, - enum eptype ep_type, int speed) + bool is_iso, int speed) { struct usb_ep *ep; struct usb_request *req; int i, size, status; for (i = 0; i < 8; i++) { - switch (ep_type) { - case EP_ISOC: + if (is_iso) { switch (speed) { case USB_SPEED_SUPER: size = isoc_maxpacket * (isoc_mult + 1) * @@ -831,28 +624,9 @@ static int source_sink_start_ep(struct f_sourcesink *ss, bool is_in, } ep = is_in ? ss->iso_in_ep : ss->iso_out_ep; req = ss_alloc_ep_req(ep, size); - break; - case EP_INTERRUPT: - switch (speed) { - case USB_SPEED_SUPER: - size = int_maxpacket * (int_mult + 1) * - (int_maxburst + 1); - break; - case USB_SPEED_HIGH: - size = int_maxpacket * (int_mult + 1); - break; - default: - size = int_maxpacket > 1023 ? - 1023 : int_maxpacket; - break; - } - ep = is_in ? ss->int_in_ep : ss->int_out_ep; - req = ss_alloc_ep_req(ep, size); - break; - default: + } else { ep = is_in ? ss->in_ep : ss->out_ep; req = ss_alloc_ep_req(ep, 0); - break; } if (!req) @@ -870,12 +644,12 @@ static int source_sink_start_ep(struct f_sourcesink *ss, bool is_in, cdev = ss->function.config->cdev; ERROR(cdev, "start %s%s %s --> %d\n", - get_ep_string(ep_type), is_in ? "IN" : "OUT", - ep->name, status); + is_iso ? "ISO-" : "", is_in ? "IN" : "OUT", + ep->name, status); free_ep_req(ep, req); } - if (!(ep_type == EP_ISOC)) + if (!is_iso) break; } @@ -888,7 +662,7 @@ static void disable_source_sink(struct f_sourcesink *ss) cdev = ss->function.config->cdev; disable_endpoints(cdev, ss->in_ep, ss->out_ep, ss->iso_in_ep, - ss->iso_out_ep, ss->int_in_ep, ss->int_out_ep); + ss->iso_out_ep); VDBG(cdev, "%s disabled\n", ss->function.name); } @@ -900,62 +674,6 @@ enable_source_sink(struct usb_composite_dev *cdev, struct f_sourcesink *ss, int speed = cdev->gadget->speed; struct usb_ep *ep; - if (alt == 2) { - /* Configure for periodic interrupt endpoint */ - ep = ss->int_in_ep; - if (ep) { - result = config_ep_by_speed(cdev->gadget, - &(ss->function), ep); - if (result) - return result; - - result = usb_ep_enable(ep); - if (result < 0) - return result; - - ep->driver_data = ss; - result = source_sink_start_ep(ss, true, EP_INTERRUPT, - speed); - if (result < 0) { -fail1: - ep = ss->int_in_ep; - if (ep) { - usb_ep_disable(ep); - ep->driver_data = NULL; - } - return result; - } - } - - /* - * one interrupt endpoint reads (sinks) anything OUT (from the - * host) - */ - ep = ss->int_out_ep; - if (ep) { - result = config_ep_by_speed(cdev->gadget, - &(ss->function), ep); - if (result) - goto fail1; - - result = usb_ep_enable(ep); - if (result < 0) - goto fail1; - - ep->driver_data = ss; - result = source_sink_start_ep(ss, false, EP_INTERRUPT, - speed); - if (result < 0) { - ep = ss->int_out_ep; - usb_ep_disable(ep); - ep->driver_data = NULL; - goto fail1; - } - } - - goto out; - } - /* one bulk endpoint writes (sources) zeroes IN (to the host) */ ep = ss->in_ep; result = config_ep_by_speed(cdev->gadget, &(ss->function), ep); @@ -966,7 +684,7 @@ fail1: return result; ep->driver_data = ss; - result = source_sink_start_ep(ss, true, EP_BULK, speed); + result = source_sink_start_ep(ss, true, false, speed); if (result < 0) { fail: ep = ss->in_ep; @@ -985,7 +703,7 @@ fail: goto fail; ep->driver_data = ss; - result = source_sink_start_ep(ss, false, EP_BULK, speed); + result = source_sink_start_ep(ss, false, false, speed); if (result < 0) { fail2: ep = ss->out_ep; @@ -1008,7 +726,7 @@ fail2: goto fail2; ep->driver_data = ss; - result = source_sink_start_ep(ss, true, EP_ISOC, speed); + result = source_sink_start_ep(ss, true, true, speed); if (result < 0) { fail3: ep = ss->iso_in_ep; @@ -1031,14 +749,13 @@ fail3: goto fail3; ep->driver_data = ss; - result = source_sink_start_ep(ss, false, EP_ISOC, speed); + result = source_sink_start_ep(ss, false, true, speed); if (result < 0) { usb_ep_disable(ep); ep->driver_data = NULL; goto fail3; } } - out: ss->cur_alt = alt; @@ -1054,8 +771,6 @@ static int sourcesink_set_alt(struct usb_function *f, if (ss->in_ep->driver_data) disable_source_sink(ss); - else if (alt == 2 && ss->int_in_ep->driver_data) - disable_source_sink(ss); return enable_source_sink(cdev, ss, alt); } @@ -1168,10 +883,6 @@ static struct usb_function *source_sink_alloc_func( isoc_maxpacket = ss_opts->isoc_maxpacket; isoc_mult = ss_opts->isoc_mult; isoc_maxburst = ss_opts->isoc_maxburst; - int_interval = ss_opts->int_interval; - int_maxpacket = ss_opts->int_maxpacket; - int_mult = ss_opts->int_mult; - int_maxburst = ss_opts->int_maxburst; buflen = ss_opts->bulk_buflen; ss->function.name = "source/sink"; @@ -1468,182 +1179,6 @@ static struct f_ss_opts_attribute f_ss_opts_bulk_buflen = f_ss_opts_bulk_buflen_show, f_ss_opts_bulk_buflen_store); -static ssize_t f_ss_opts_int_interval_show(struct f_ss_opts *opts, char *page) -{ - int result; - - mutex_lock(&opts->lock); - result = sprintf(page, "%u", opts->int_interval); - mutex_unlock(&opts->lock); - - return result; -} - -static ssize_t f_ss_opts_int_interval_store(struct f_ss_opts *opts, - const char *page, size_t len) -{ - int ret; - u32 num; - - mutex_lock(&opts->lock); - if (opts->refcnt) { - ret = -EBUSY; - goto end; - } - - ret = kstrtou32(page, 0, &num); - if (ret) - goto end; - - if (num > 4096) { - ret = -EINVAL; - goto end; - } - - opts->int_interval = num; - ret = len; -end: - mutex_unlock(&opts->lock); - return ret; -} - -static struct f_ss_opts_attribute f_ss_opts_int_interval = - __CONFIGFS_ATTR(int_interval, S_IRUGO | S_IWUSR, - f_ss_opts_int_interval_show, - f_ss_opts_int_interval_store); - -static ssize_t f_ss_opts_int_maxpacket_show(struct f_ss_opts *opts, char *page) -{ - int result; - - mutex_lock(&opts->lock); - result = sprintf(page, "%u", opts->int_maxpacket); - mutex_unlock(&opts->lock); - - return result; -} - -static ssize_t f_ss_opts_int_maxpacket_store(struct f_ss_opts *opts, - const char *page, size_t len) -{ - int ret; - u16 num; - - mutex_lock(&opts->lock); - if (opts->refcnt) { - ret = -EBUSY; - goto end; - } - - ret = kstrtou16(page, 0, &num); - if (ret) - goto end; - - if (num > 1024) { - ret = -EINVAL; - goto end; - } - - opts->int_maxpacket = num; - ret = len; -end: - mutex_unlock(&opts->lock); - return ret; -} - -static struct f_ss_opts_attribute f_ss_opts_int_maxpacket = - __CONFIGFS_ATTR(int_maxpacket, S_IRUGO | S_IWUSR, - f_ss_opts_int_maxpacket_show, - f_ss_opts_int_maxpacket_store); - -static ssize_t f_ss_opts_int_mult_show(struct f_ss_opts *opts, char *page) -{ - int result; - - mutex_lock(&opts->lock); - result = sprintf(page, "%u", opts->int_mult); - mutex_unlock(&opts->lock); - - return result; -} - -static ssize_t f_ss_opts_int_mult_store(struct f_ss_opts *opts, - const char *page, size_t len) -{ - int ret; - u8 num; - - mutex_lock(&opts->lock); - if (opts->refcnt) { - ret = -EBUSY; - goto end; - } - - ret = kstrtou8(page, 0, &num); - if (ret) - goto end; - - if (num > 2) { - ret = -EINVAL; - goto end; - } - - opts->int_mult = num; - ret = len; -end: - mutex_unlock(&opts->lock); - return ret; -} - -static struct f_ss_opts_attribute f_ss_opts_int_mult = - __CONFIGFS_ATTR(int_mult, S_IRUGO | S_IWUSR, - f_ss_opts_int_mult_show, - f_ss_opts_int_mult_store); - -static ssize_t f_ss_opts_int_maxburst_show(struct f_ss_opts *opts, char *page) -{ - int result; - - mutex_lock(&opts->lock); - result = sprintf(page, "%u", opts->int_maxburst); - mutex_unlock(&opts->lock); - - return result; -} - -static ssize_t f_ss_opts_int_maxburst_store(struct f_ss_opts *opts, - const char *page, size_t len) -{ - int ret; - u8 num; - - mutex_lock(&opts->lock); - if (opts->refcnt) { - ret = -EBUSY; - goto end; - } - - ret = kstrtou8(page, 0, &num); - if (ret) - goto end; - - if (num > 15) { - ret = -EINVAL; - goto end; - } - - opts->int_maxburst = num; - ret = len; -end: - mutex_unlock(&opts->lock); - return ret; -} - -static struct f_ss_opts_attribute f_ss_opts_int_maxburst = - __CONFIGFS_ATTR(int_maxburst, S_IRUGO | S_IWUSR, - f_ss_opts_int_maxburst_show, - f_ss_opts_int_maxburst_store); - static struct configfs_attribute *ss_attrs[] = { &f_ss_opts_pattern.attr, &f_ss_opts_isoc_interval.attr, @@ -1651,10 +1186,6 @@ static struct configfs_attribute *ss_attrs[] = { &f_ss_opts_isoc_mult.attr, &f_ss_opts_isoc_maxburst.attr, &f_ss_opts_bulk_buflen.attr, - &f_ss_opts_int_interval.attr, - &f_ss_opts_int_maxpacket.attr, - &f_ss_opts_int_mult.attr, - &f_ss_opts_int_maxburst.attr, NULL, }; @@ -1684,8 +1215,6 @@ static struct usb_function_instance *source_sink_alloc_inst(void) ss_opts->isoc_interval = GZERO_ISOC_INTERVAL; ss_opts->isoc_maxpacket = GZERO_ISOC_MAXPACKET; ss_opts->bulk_buflen = GZERO_BULK_BUFLEN; - ss_opts->int_interval = GZERO_INT_INTERVAL; - ss_opts->int_maxpacket = GZERO_INT_MAXPACKET; config_group_init_type_name(&ss_opts->func_inst.group, "", &ss_func_type); diff --git a/drivers/usb/gadget/function/f_uac2.c b/drivers/usb/gadget/function/f_uac2.c index 33e16658e5cf..6d3eb8b00a48 100644 --- a/drivers/usb/gadget/function/f_uac2.c +++ b/drivers/usb/gadget/function/f_uac2.c @@ -54,7 +54,7 @@ #define UNFLW_CTRL 8 #define OVFLW_CTRL 10 -const char *uac2_name = "snd_uac2"; +static const char *uac2_name = "snd_uac2"; struct uac2_req { struct uac2_rtd_params *pp; /* parent param */ @@ -634,7 +634,7 @@ static struct usb_interface_descriptor std_ac_if_desc = { }; /* Clock source for IN traffic */ -struct uac_clock_source_descriptor in_clk_src_desc = { +static struct uac_clock_source_descriptor in_clk_src_desc = { .bLength = sizeof in_clk_src_desc, .bDescriptorType = USB_DT_CS_INTERFACE, @@ -646,7 +646,7 @@ struct uac_clock_source_descriptor in_clk_src_desc = { }; /* Clock source for OUT traffic */ -struct uac_clock_source_descriptor out_clk_src_desc = { +static struct uac_clock_source_descriptor out_clk_src_desc = { .bLength = sizeof out_clk_src_desc, .bDescriptorType = USB_DT_CS_INTERFACE, @@ -658,7 +658,7 @@ struct uac_clock_source_descriptor out_clk_src_desc = { }; /* Input Terminal for USB_OUT */ -struct uac2_input_terminal_descriptor usb_out_it_desc = { +static struct uac2_input_terminal_descriptor usb_out_it_desc = { .bLength = sizeof usb_out_it_desc, .bDescriptorType = USB_DT_CS_INTERFACE, @@ -672,7 +672,7 @@ struct uac2_input_terminal_descriptor usb_out_it_desc = { }; /* Input Terminal for I/O-In */ -struct uac2_input_terminal_descriptor io_in_it_desc = { +static struct uac2_input_terminal_descriptor io_in_it_desc = { .bLength = sizeof io_in_it_desc, .bDescriptorType = USB_DT_CS_INTERFACE, @@ -686,7 +686,7 @@ struct uac2_input_terminal_descriptor io_in_it_desc = { }; /* Ouput Terminal for USB_IN */ -struct uac2_output_terminal_descriptor usb_in_ot_desc = { +static struct uac2_output_terminal_descriptor usb_in_ot_desc = { .bLength = sizeof usb_in_ot_desc, .bDescriptorType = USB_DT_CS_INTERFACE, @@ -700,7 +700,7 @@ struct uac2_output_terminal_descriptor usb_in_ot_desc = { }; /* Ouput Terminal for I/O-Out */ -struct uac2_output_terminal_descriptor io_out_ot_desc = { +static struct uac2_output_terminal_descriptor io_out_ot_desc = { .bLength = sizeof io_out_ot_desc, .bDescriptorType = USB_DT_CS_INTERFACE, @@ -713,7 +713,7 @@ struct uac2_output_terminal_descriptor io_out_ot_desc = { .bmControls = (CONTROL_RDWR << COPY_CTRL), }; -struct uac2_ac_header_descriptor ac_hdr_desc = { +static struct uac2_ac_header_descriptor ac_hdr_desc = { .bLength = sizeof ac_hdr_desc, .bDescriptorType = USB_DT_CS_INTERFACE, @@ -751,7 +751,7 @@ static struct usb_interface_descriptor std_as_out_if1_desc = { }; /* Audio Stream OUT Intface Desc */ -struct uac2_as_header_descriptor as_out_hdr_desc = { +static struct uac2_as_header_descriptor as_out_hdr_desc = { .bLength = sizeof as_out_hdr_desc, .bDescriptorType = USB_DT_CS_INTERFACE, @@ -764,7 +764,7 @@ struct uac2_as_header_descriptor as_out_hdr_desc = { }; /* Audio USB_OUT Format */ -struct uac2_format_type_i_descriptor as_out_fmt1_desc = { +static struct uac2_format_type_i_descriptor as_out_fmt1_desc = { .bLength = sizeof as_out_fmt1_desc, .bDescriptorType = USB_DT_CS_INTERFACE, .bDescriptorSubtype = UAC_FORMAT_TYPE, @@ -772,7 +772,7 @@ struct uac2_format_type_i_descriptor as_out_fmt1_desc = { }; /* STD AS ISO OUT Endpoint */ -struct usb_endpoint_descriptor fs_epout_desc = { +static struct usb_endpoint_descriptor fs_epout_desc = { .bLength = USB_DT_ENDPOINT_SIZE, .bDescriptorType = USB_DT_ENDPOINT, @@ -782,7 +782,7 @@ struct usb_endpoint_descriptor fs_epout_desc = { .bInterval = 1, }; -struct usb_endpoint_descriptor hs_epout_desc = { +static struct usb_endpoint_descriptor hs_epout_desc = { .bLength = USB_DT_ENDPOINT_SIZE, .bDescriptorType = USB_DT_ENDPOINT, @@ -828,7 +828,7 @@ static struct usb_interface_descriptor std_as_in_if1_desc = { }; /* Audio Stream IN Intface Desc */ -struct uac2_as_header_descriptor as_in_hdr_desc = { +static struct uac2_as_header_descriptor as_in_hdr_desc = { .bLength = sizeof as_in_hdr_desc, .bDescriptorType = USB_DT_CS_INTERFACE, @@ -841,7 +841,7 @@ struct uac2_as_header_descriptor as_in_hdr_desc = { }; /* Audio USB_IN Format */ -struct uac2_format_type_i_descriptor as_in_fmt1_desc = { +static struct uac2_format_type_i_descriptor as_in_fmt1_desc = { .bLength = sizeof as_in_fmt1_desc, .bDescriptorType = USB_DT_CS_INTERFACE, .bDescriptorSubtype = UAC_FORMAT_TYPE, @@ -849,7 +849,7 @@ struct uac2_format_type_i_descriptor as_in_fmt1_desc = { }; /* STD AS ISO IN Endpoint */ -struct usb_endpoint_descriptor fs_epin_desc = { +static struct usb_endpoint_descriptor fs_epin_desc = { .bLength = USB_DT_ENDPOINT_SIZE, .bDescriptorType = USB_DT_ENDPOINT, @@ -859,7 +859,7 @@ struct usb_endpoint_descriptor fs_epin_desc = { .bInterval = 1, }; -struct usb_endpoint_descriptor hs_epin_desc = { +static struct usb_endpoint_descriptor hs_epin_desc = { .bLength = USB_DT_ENDPOINT_SIZE, .bDescriptorType = USB_DT_ENDPOINT, @@ -1563,7 +1563,7 @@ static void afunc_unbind(struct usb_configuration *c, struct usb_function *f) agdev->out_ep->driver_data = NULL; } -struct usb_function *afunc_alloc(struct usb_function_instance *fi) +static struct usb_function *afunc_alloc(struct usb_function_instance *fi) { struct audio_dev *agdev; struct f_uac2_opts *opts; diff --git a/drivers/usb/gadget/function/g_zero.h b/drivers/usb/gadget/function/g_zero.h index 2ce28b9d97cc..15f180904f8a 100644 --- a/drivers/usb/gadget/function/g_zero.h +++ b/drivers/usb/gadget/function/g_zero.h @@ -10,8 +10,6 @@ #define GZERO_QLEN 32 #define GZERO_ISOC_INTERVAL 4 #define GZERO_ISOC_MAXPACKET 1024 -#define GZERO_INT_INTERVAL 1 /* Default interrupt interval = 1 ms */ -#define GZERO_INT_MAXPACKET 1024 struct usb_zero_options { unsigned pattern; @@ -19,10 +17,6 @@ struct usb_zero_options { unsigned isoc_maxpacket; unsigned isoc_mult; unsigned isoc_maxburst; - unsigned int_interval; /* In ms */ - unsigned int_maxpacket; - unsigned int_mult; - unsigned int_maxburst; unsigned bulk_buflen; unsigned qlen; }; @@ -34,10 +28,6 @@ struct f_ss_opts { unsigned isoc_maxpacket; unsigned isoc_mult; unsigned isoc_maxburst; - unsigned int_interval; /* In ms */ - unsigned int_maxpacket; - unsigned int_mult; - unsigned int_maxburst; unsigned bulk_buflen; /* @@ -72,7 +62,6 @@ int lb_modinit(void); void free_ep_req(struct usb_ep *ep, struct usb_request *req); void disable_endpoints(struct usb_composite_dev *cdev, struct usb_ep *in, struct usb_ep *out, - struct usb_ep *iso_in, struct usb_ep *iso_out, - struct usb_ep *int_in, struct usb_ep *int_out); + struct usb_ep *iso_in, struct usb_ep *iso_out); #endif /* __G_ZERO_H */ diff --git a/drivers/usb/gadget/function/uvc_v4l2.c b/drivers/usb/gadget/function/uvc_v4l2.c index 5aad7fededa5..8b818fd027b3 100644 --- a/drivers/usb/gadget/function/uvc_v4l2.c +++ b/drivers/usb/gadget/function/uvc_v4l2.c @@ -27,6 +27,7 @@ #include "uvc.h" #include "uvc_queue.h" #include "uvc_video.h" +#include "uvc_v4l2.h" /* -------------------------------------------------------------------------- * Requests handling diff --git a/drivers/usb/gadget/function/uvc_video.c b/drivers/usb/gadget/function/uvc_video.c index 9cb86bc1a9a5..50a5e637ca35 100644 --- a/drivers/usb/gadget/function/uvc_video.c +++ b/drivers/usb/gadget/function/uvc_video.c @@ -21,6 +21,7 @@ #include "uvc.h" #include "uvc_queue.h" +#include "uvc_video.h" /* -------------------------------------------------------------------------- * Video codecs diff --git a/drivers/usb/gadget/legacy/g_ffs.c b/drivers/usb/gadget/legacy/g_ffs.c index 06acfa55864a..b01b88e1b716 100644 --- a/drivers/usb/gadget/legacy/g_ffs.c +++ b/drivers/usb/gadget/legacy/g_ffs.c @@ -133,7 +133,9 @@ struct gfs_configuration { struct usb_configuration c; int (*eth)(struct usb_configuration *c); int num; -} gfs_configurations[] = { +}; + +static struct gfs_configuration gfs_configurations[] = { #ifdef CONFIG_USB_FUNCTIONFS_RNDIS { .eth = bind_rndis_config, @@ -278,7 +280,7 @@ static void *functionfs_acquire_dev(struct ffs_dev *dev) if (!try_module_get(THIS_MODULE)) return ERR_PTR(-ENOENT); - return 0; + return NULL; } static void functionfs_release_dev(struct ffs_dev *dev) diff --git a/drivers/usb/gadget/legacy/inode.c b/drivers/usb/gadget/legacy/inode.c index db49ec4c748e..200f9a584064 100644 --- a/drivers/usb/gadget/legacy/inode.c +++ b/drivers/usb/gadget/legacy/inode.c @@ -74,6 +74,8 @@ MODULE_DESCRIPTION (DRIVER_DESC); MODULE_AUTHOR ("David Brownell"); MODULE_LICENSE ("GPL"); +static int ep_open(struct inode *, struct file *); + /*----------------------------------------------------------------------*/ @@ -283,14 +285,15 @@ static void epio_complete (struct usb_ep *ep, struct usb_request *req) * still need dev->lock to use epdata->ep. */ static int -get_ready_ep (unsigned f_flags, struct ep_data *epdata) +get_ready_ep (unsigned f_flags, struct ep_data *epdata, bool is_write) { int val; if (f_flags & O_NONBLOCK) { if (!mutex_trylock(&epdata->lock)) goto nonblock; - if (epdata->state != STATE_EP_ENABLED) { + if (epdata->state != STATE_EP_ENABLED && + (!is_write || epdata->state != STATE_EP_READY)) { mutex_unlock(&epdata->lock); nonblock: val = -EAGAIN; @@ -305,18 +308,20 @@ nonblock: switch (epdata->state) { case STATE_EP_ENABLED: + return 0; + case STATE_EP_READY: /* not configured yet */ + if (is_write) + return 0; + // FALLTHRU + case STATE_EP_UNBOUND: /* clean disconnect */ break; // case STATE_EP_DISABLED: /* "can't happen" */ - // case STATE_EP_READY: /* "can't happen" */ default: /* error! */ pr_debug ("%s: ep %p not available, state %d\n", shortname, epdata, epdata->state); - // FALLTHROUGH - case STATE_EP_UNBOUND: /* clean disconnect */ - val = -ENODEV; - mutex_unlock(&epdata->lock); } - return val; + mutex_unlock(&epdata->lock); + return -ENODEV; } static ssize_t @@ -363,97 +368,6 @@ ep_io (struct ep_data *epdata, void *buf, unsigned len) return value; } - -/* handle a synchronous OUT bulk/intr/iso transfer */ -static ssize_t -ep_read (struct file *fd, char __user *buf, size_t len, loff_t *ptr) -{ - struct ep_data *data = fd->private_data; - void *kbuf; - ssize_t value; - - if ((value = get_ready_ep (fd->f_flags, data)) < 0) - return value; - - /* halt any endpoint by doing a "wrong direction" i/o call */ - if (usb_endpoint_dir_in(&data->desc)) { - if (usb_endpoint_xfer_isoc(&data->desc)) { - mutex_unlock(&data->lock); - return -EINVAL; - } - DBG (data->dev, "%s halt\n", data->name); - spin_lock_irq (&data->dev->lock); - if (likely (data->ep != NULL)) - usb_ep_set_halt (data->ep); - spin_unlock_irq (&data->dev->lock); - mutex_unlock(&data->lock); - return -EBADMSG; - } - - /* FIXME readahead for O_NONBLOCK and poll(); careful with ZLPs */ - - value = -ENOMEM; - kbuf = kmalloc (len, GFP_KERNEL); - if (unlikely (!kbuf)) - goto free1; - - value = ep_io (data, kbuf, len); - VDEBUG (data->dev, "%s read %zu OUT, status %d\n", - data->name, len, (int) value); - if (value >= 0 && copy_to_user (buf, kbuf, value)) - value = -EFAULT; - -free1: - mutex_unlock(&data->lock); - kfree (kbuf); - return value; -} - -/* handle a synchronous IN bulk/intr/iso transfer */ -static ssize_t -ep_write (struct file *fd, const char __user *buf, size_t len, loff_t *ptr) -{ - struct ep_data *data = fd->private_data; - void *kbuf; - ssize_t value; - - if ((value = get_ready_ep (fd->f_flags, data)) < 0) - return value; - - /* halt any endpoint by doing a "wrong direction" i/o call */ - if (!usb_endpoint_dir_in(&data->desc)) { - if (usb_endpoint_xfer_isoc(&data->desc)) { - mutex_unlock(&data->lock); - return -EINVAL; - } - DBG (data->dev, "%s halt\n", data->name); - spin_lock_irq (&data->dev->lock); - if (likely (data->ep != NULL)) - usb_ep_set_halt (data->ep); - spin_unlock_irq (&data->dev->lock); - mutex_unlock(&data->lock); - return -EBADMSG; - } - - /* FIXME writebehind for O_NONBLOCK and poll(), qlen = 1 */ - - value = -ENOMEM; - kbuf = memdup_user(buf, len); - if (IS_ERR(kbuf)) { - value = PTR_ERR(kbuf); - kbuf = NULL; - goto free1; - } - - value = ep_io (data, kbuf, len); - VDEBUG (data->dev, "%s write %zu IN, status %d\n", - data->name, len, (int) value); -free1: - mutex_unlock(&data->lock); - kfree (kbuf); - return value; -} - static int ep_release (struct inode *inode, struct file *fd) { @@ -481,7 +395,7 @@ static long ep_ioctl(struct file *fd, unsigned code, unsigned long value) struct ep_data *data = fd->private_data; int status; - if ((status = get_ready_ep (fd->f_flags, data)) < 0) + if ((status = get_ready_ep (fd->f_flags, data, false)) < 0) return status; spin_lock_irq (&data->dev->lock); @@ -517,8 +431,8 @@ struct kiocb_priv { struct mm_struct *mm; struct work_struct work; void *buf; - const struct iovec *iv; - unsigned long nr_segs; + struct iov_iter to; + const void *to_free; unsigned actual; }; @@ -541,35 +455,6 @@ static int ep_aio_cancel(struct kiocb *iocb) return value; } -static ssize_t ep_copy_to_user(struct kiocb_priv *priv) -{ - ssize_t len, total; - void *to_copy; - int i; - - /* copy stuff into user buffers */ - total = priv->actual; - len = 0; - to_copy = priv->buf; - for (i=0; i < priv->nr_segs; i++) { - ssize_t this = min((ssize_t)(priv->iv[i].iov_len), total); - - if (copy_to_user(priv->iv[i].iov_base, to_copy, this)) { - if (len == 0) - len = -EFAULT; - break; - } - - total -= this; - len += this; - to_copy += this; - if (total == 0) - break; - } - - return len; -} - static void ep_user_copy_worker(struct work_struct *work) { struct kiocb_priv *priv = container_of(work, struct kiocb_priv, work); @@ -578,13 +463,16 @@ static void ep_user_copy_worker(struct work_struct *work) size_t ret; use_mm(mm); - ret = ep_copy_to_user(priv); + ret = copy_to_iter(priv->buf, priv->actual, &priv->to); unuse_mm(mm); + if (!ret) + ret = -EFAULT; /* completing the iocb can drop the ctx and mm, don't touch mm after */ aio_complete(iocb, ret, ret); kfree(priv->buf); + kfree(priv->to_free); kfree(priv); } @@ -603,8 +491,9 @@ static void ep_aio_complete(struct usb_ep *ep, struct usb_request *req) * don't need to copy anything to userspace, so we can * complete the aio request immediately. */ - if (priv->iv == NULL || unlikely(req->actual == 0)) { + if (priv->to_free == NULL || unlikely(req->actual == 0)) { kfree(req->buf); + kfree(priv->to_free); kfree(priv); iocb->private = NULL; /* aio_complete() reports bytes-transferred _and_ faults */ @@ -618,6 +507,7 @@ static void ep_aio_complete(struct usb_ep *ep, struct usb_request *req) priv->buf = req->buf; priv->actual = req->actual; + INIT_WORK(&priv->work, ep_user_copy_worker); schedule_work(&priv->work); } spin_unlock(&epdata->dev->lock); @@ -626,38 +516,17 @@ static void ep_aio_complete(struct usb_ep *ep, struct usb_request *req) put_ep(epdata); } -static ssize_t -ep_aio_rwtail( - struct kiocb *iocb, - char *buf, - size_t len, - struct ep_data *epdata, - const struct iovec *iv, - unsigned long nr_segs -) +static ssize_t ep_aio(struct kiocb *iocb, + struct kiocb_priv *priv, + struct ep_data *epdata, + char *buf, + size_t len) { - struct kiocb_priv *priv; - struct usb_request *req; - ssize_t value; + struct usb_request *req; + ssize_t value; - priv = kmalloc(sizeof *priv, GFP_KERNEL); - if (!priv) { - value = -ENOMEM; -fail: - kfree(buf); - return value; - } iocb->private = priv; priv->iocb = iocb; - priv->iv = iv; - priv->nr_segs = nr_segs; - INIT_WORK(&priv->work, ep_user_copy_worker); - - value = get_ready_ep(iocb->ki_filp->f_flags, epdata); - if (unlikely(value < 0)) { - kfree(priv); - goto fail; - } kiocb_set_cancel_fn(iocb, ep_aio_cancel); get_ep(epdata); @@ -669,75 +538,154 @@ fail: * allocate or submit those if the host disconnected. */ spin_lock_irq(&epdata->dev->lock); - if (likely(epdata->ep)) { - req = usb_ep_alloc_request(epdata->ep, GFP_ATOMIC); - if (likely(req)) { - priv->req = req; - req->buf = buf; - req->length = len; - req->complete = ep_aio_complete; - req->context = iocb; - value = usb_ep_queue(epdata->ep, req, GFP_ATOMIC); - if (unlikely(0 != value)) - usb_ep_free_request(epdata->ep, req); - } else - value = -EAGAIN; - } else - value = -ENODEV; - spin_unlock_irq(&epdata->dev->lock); + value = -ENODEV; + if (unlikely(epdata->ep)) + goto fail; - mutex_unlock(&epdata->lock); + req = usb_ep_alloc_request(epdata->ep, GFP_ATOMIC); + value = -ENOMEM; + if (unlikely(!req)) + goto fail; - if (unlikely(value)) { - kfree(priv); - put_ep(epdata); - } else - value = -EIOCBQUEUED; + priv->req = req; + req->buf = buf; + req->length = len; + req->complete = ep_aio_complete; + req->context = iocb; + value = usb_ep_queue(epdata->ep, req, GFP_ATOMIC); + if (unlikely(0 != value)) { + usb_ep_free_request(epdata->ep, req); + goto fail; + } + spin_unlock_irq(&epdata->dev->lock); + return -EIOCBQUEUED; + +fail: + spin_unlock_irq(&epdata->dev->lock); + kfree(priv->to_free); + kfree(priv); + put_ep(epdata); return value; } static ssize_t -ep_aio_read(struct kiocb *iocb, const struct iovec *iov, - unsigned long nr_segs, loff_t o) +ep_read_iter(struct kiocb *iocb, struct iov_iter *to) { - struct ep_data *epdata = iocb->ki_filp->private_data; - char *buf; + struct file *file = iocb->ki_filp; + struct ep_data *epdata = file->private_data; + size_t len = iov_iter_count(to); + ssize_t value; + char *buf; - if (unlikely(usb_endpoint_dir_in(&epdata->desc))) - return -EINVAL; + if ((value = get_ready_ep(file->f_flags, epdata, false)) < 0) + return value; - buf = kmalloc(iocb->ki_nbytes, GFP_KERNEL); - if (unlikely(!buf)) - return -ENOMEM; + /* halt any endpoint by doing a "wrong direction" i/o call */ + if (usb_endpoint_dir_in(&epdata->desc)) { + if (usb_endpoint_xfer_isoc(&epdata->desc) || + !is_sync_kiocb(iocb)) { + mutex_unlock(&epdata->lock); + return -EINVAL; + } + DBG (epdata->dev, "%s halt\n", epdata->name); + spin_lock_irq(&epdata->dev->lock); + if (likely(epdata->ep != NULL)) + usb_ep_set_halt(epdata->ep); + spin_unlock_irq(&epdata->dev->lock); + mutex_unlock(&epdata->lock); + return -EBADMSG; + } - return ep_aio_rwtail(iocb, buf, iocb->ki_nbytes, epdata, iov, nr_segs); + buf = kmalloc(len, GFP_KERNEL); + if (unlikely(!buf)) { + mutex_unlock(&epdata->lock); + return -ENOMEM; + } + if (is_sync_kiocb(iocb)) { + value = ep_io(epdata, buf, len); + if (value >= 0 && copy_to_iter(buf, value, to)) + value = -EFAULT; + } else { + struct kiocb_priv *priv = kzalloc(sizeof *priv, GFP_KERNEL); + value = -ENOMEM; + if (!priv) + goto fail; + priv->to_free = dup_iter(&priv->to, to, GFP_KERNEL); + if (!priv->to_free) { + kfree(priv); + goto fail; + } + value = ep_aio(iocb, priv, epdata, buf, len); + if (value == -EIOCBQUEUED) + buf = NULL; + } +fail: + kfree(buf); + mutex_unlock(&epdata->lock); + return value; } +static ssize_t ep_config(struct ep_data *, const char *, size_t); + static ssize_t -ep_aio_write(struct kiocb *iocb, const struct iovec *iov, - unsigned long nr_segs, loff_t o) +ep_write_iter(struct kiocb *iocb, struct iov_iter *from) { - struct ep_data *epdata = iocb->ki_filp->private_data; - char *buf; - size_t len = 0; - int i = 0; + struct file *file = iocb->ki_filp; + struct ep_data *epdata = file->private_data; + size_t len = iov_iter_count(from); + bool configured; + ssize_t value; + char *buf; + + if ((value = get_ready_ep(file->f_flags, epdata, true)) < 0) + return value; - if (unlikely(!usb_endpoint_dir_in(&epdata->desc))) - return -EINVAL; + configured = epdata->state == STATE_EP_ENABLED; - buf = kmalloc(iocb->ki_nbytes, GFP_KERNEL); - if (unlikely(!buf)) + /* halt any endpoint by doing a "wrong direction" i/o call */ + if (configured && !usb_endpoint_dir_in(&epdata->desc)) { + if (usb_endpoint_xfer_isoc(&epdata->desc) || + !is_sync_kiocb(iocb)) { + mutex_unlock(&epdata->lock); + return -EINVAL; + } + DBG (epdata->dev, "%s halt\n", epdata->name); + spin_lock_irq(&epdata->dev->lock); + if (likely(epdata->ep != NULL)) + usb_ep_set_halt(epdata->ep); + spin_unlock_irq(&epdata->dev->lock); + mutex_unlock(&epdata->lock); + return -EBADMSG; + } + + buf = kmalloc(len, GFP_KERNEL); + if (unlikely(!buf)) { + mutex_unlock(&epdata->lock); return -ENOMEM; + } - for (i=0; i < nr_segs; i++) { - if (unlikely(copy_from_user(&buf[len], iov[i].iov_base, - iov[i].iov_len) != 0)) { - kfree(buf); - return -EFAULT; + if (unlikely(copy_from_iter(buf, len, from) != len)) { + value = -EFAULT; + goto out; + } + + if (unlikely(!configured)) { + value = ep_config(epdata, buf, len); + } else if (is_sync_kiocb(iocb)) { + value = ep_io(epdata, buf, len); + } else { + struct kiocb_priv *priv = kzalloc(sizeof *priv, GFP_KERNEL); + value = -ENOMEM; + if (priv) { + value = ep_aio(iocb, priv, epdata, buf, len); + if (value == -EIOCBQUEUED) + buf = NULL; } - len += iov[i].iov_len; } - return ep_aio_rwtail(iocb, buf, len, epdata, NULL, 0); +out: + kfree(buf); + mutex_unlock(&epdata->lock); + return value; } /*----------------------------------------------------------------------*/ @@ -745,15 +693,15 @@ ep_aio_write(struct kiocb *iocb, const struct iovec *iov, /* used after endpoint configuration */ static const struct file_operations ep_io_operations = { .owner = THIS_MODULE, - .llseek = no_llseek, - .read = ep_read, - .write = ep_write, - .unlocked_ioctl = ep_ioctl, + .open = ep_open, .release = ep_release, - - .aio_read = ep_aio_read, - .aio_write = ep_aio_write, + .llseek = no_llseek, + .read = new_sync_read, + .write = new_sync_write, + .unlocked_ioctl = ep_ioctl, + .read_iter = ep_read_iter, + .write_iter = ep_write_iter, }; /* ENDPOINT INITIALIZATION @@ -770,17 +718,12 @@ static const struct file_operations ep_io_operations = { * speed descriptor, then optional high speed descriptor. */ static ssize_t -ep_config (struct file *fd, const char __user *buf, size_t len, loff_t *ptr) +ep_config (struct ep_data *data, const char *buf, size_t len) { - struct ep_data *data = fd->private_data; struct usb_ep *ep; u32 tag; int value, length = len; - value = mutex_lock_interruptible(&data->lock); - if (value < 0) - return value; - if (data->state != STATE_EP_READY) { value = -EL2HLT; goto fail; @@ -791,9 +734,7 @@ ep_config (struct file *fd, const char __user *buf, size_t len, loff_t *ptr) goto fail0; /* we might need to change message format someday */ - if (copy_from_user (&tag, buf, 4)) { - goto fail1; - } + memcpy(&tag, buf, 4); if (tag != 1) { DBG(data->dev, "config %s, bad tag %d\n", data->name, tag); goto fail0; @@ -806,19 +747,15 @@ ep_config (struct file *fd, const char __user *buf, size_t len, loff_t *ptr) */ /* full/low speed descriptor, then high speed */ - if (copy_from_user (&data->desc, buf, USB_DT_ENDPOINT_SIZE)) { - goto fail1; - } + memcpy(&data->desc, buf, USB_DT_ENDPOINT_SIZE); if (data->desc.bLength != USB_DT_ENDPOINT_SIZE || data->desc.bDescriptorType != USB_DT_ENDPOINT) goto fail0; if (len != USB_DT_ENDPOINT_SIZE) { if (len != 2 * USB_DT_ENDPOINT_SIZE) goto fail0; - if (copy_from_user (&data->hs_desc, buf + USB_DT_ENDPOINT_SIZE, - USB_DT_ENDPOINT_SIZE)) { - goto fail1; - } + memcpy(&data->hs_desc, buf + USB_DT_ENDPOINT_SIZE, + USB_DT_ENDPOINT_SIZE); if (data->hs_desc.bLength != USB_DT_ENDPOINT_SIZE || data->hs_desc.bDescriptorType != USB_DT_ENDPOINT) { @@ -840,24 +777,20 @@ ep_config (struct file *fd, const char __user *buf, size_t len, loff_t *ptr) case USB_SPEED_LOW: case USB_SPEED_FULL: ep->desc = &data->desc; - value = usb_ep_enable(ep); - if (value == 0) - data->state = STATE_EP_ENABLED; break; case USB_SPEED_HIGH: /* fails if caller didn't provide that descriptor... */ ep->desc = &data->hs_desc; - value = usb_ep_enable(ep); - if (value == 0) - data->state = STATE_EP_ENABLED; break; default: DBG(data->dev, "unconnected, %s init abandoned\n", data->name); value = -EINVAL; + goto gone; } + value = usb_ep_enable(ep); if (value == 0) { - fd->f_op = &ep_io_operations; + data->state = STATE_EP_ENABLED; value = length; } gone: @@ -867,14 +800,10 @@ fail: data->desc.bDescriptorType = 0; data->hs_desc.bDescriptorType = 0; } - mutex_unlock(&data->lock); return value; fail0: value = -EINVAL; goto fail; -fail1: - value = -EFAULT; - goto fail; } static int @@ -902,15 +831,6 @@ ep_open (struct inode *inode, struct file *fd) return value; } -/* used before endpoint configuration */ -static const struct file_operations ep_config_operations = { - .llseek = no_llseek, - - .open = ep_open, - .write = ep_config, - .release = ep_release, -}; - /*----------------------------------------------------------------------*/ /* EP0 IMPLEMENTATION can be partly in userspace. @@ -989,6 +909,10 @@ ep0_read (struct file *fd, char __user *buf, size_t len, loff_t *ptr) enum ep0_state state; spin_lock_irq (&dev->lock); + if (dev->state <= STATE_DEV_OPENED) { + retval = -EINVAL; + goto done; + } /* report fd mode change before acting on it */ if (dev->setup_abort) { @@ -1187,8 +1111,6 @@ ep0_write (struct file *fd, const char __user *buf, size_t len, loff_t *ptr) struct dev_data *dev = fd->private_data; ssize_t retval = -ESRCH; - spin_lock_irq (&dev->lock); - /* report fd mode change before acting on it */ if (dev->setup_abort) { dev->setup_abort = 0; @@ -1234,7 +1156,6 @@ ep0_write (struct file *fd, const char __user *buf, size_t len, loff_t *ptr) } else DBG (dev, "fail %s, state %d\n", __func__, dev->state); - spin_unlock_irq (&dev->lock); return retval; } @@ -1281,6 +1202,9 @@ ep0_poll (struct file *fd, poll_table *wait) struct dev_data *dev = fd->private_data; int mask = 0; + if (dev->state <= STATE_DEV_OPENED) + return DEFAULT_POLLMASK; + poll_wait(fd, &dev->wait, wait); spin_lock_irq (&dev->lock); @@ -1316,19 +1240,6 @@ static long dev_ioctl (struct file *fd, unsigned code, unsigned long value) return ret; } -/* used after device configuration */ -static const struct file_operations ep0_io_operations = { - .owner = THIS_MODULE, - .llseek = no_llseek, - - .read = ep0_read, - .write = ep0_write, - .fasync = ep0_fasync, - .poll = ep0_poll, - .unlocked_ioctl = dev_ioctl, - .release = dev_release, -}; - /*----------------------------------------------------------------------*/ /* The in-kernel gadget driver handles most ep0 issues, in particular @@ -1650,7 +1561,7 @@ static int activate_ep_files (struct dev_data *dev) goto enomem1; data->dentry = gadgetfs_create_file (dev->sb, data->name, - data, &ep_config_operations); + data, &ep_io_operations); if (!data->dentry) goto enomem2; list_add_tail (&data->epfiles, &dev->epfiles); @@ -1852,6 +1763,14 @@ dev_config (struct file *fd, const char __user *buf, size_t len, loff_t *ptr) u32 tag; char *kbuf; + spin_lock_irq(&dev->lock); + if (dev->state > STATE_DEV_OPENED) { + value = ep0_write(fd, buf, len, ptr); + spin_unlock_irq(&dev->lock); + return value; + } + spin_unlock_irq(&dev->lock); + if (len < (USB_DT_CONFIG_SIZE + USB_DT_DEVICE_SIZE + 4)) return -EINVAL; @@ -1925,7 +1844,6 @@ dev_config (struct file *fd, const char __user *buf, size_t len, loff_t *ptr) * on, they can work ... except in cleanup paths that * kick in after the ep0 descriptor is closed. */ - fd->f_op = &ep0_io_operations; value = len; } return value; @@ -1956,12 +1874,14 @@ dev_open (struct inode *inode, struct file *fd) return value; } -static const struct file_operations dev_init_operations = { +static const struct file_operations ep0_operations = { .llseek = no_llseek, .open = dev_open, + .read = ep0_read, .write = dev_config, .fasync = ep0_fasync, + .poll = ep0_poll, .unlocked_ioctl = dev_ioctl, .release = dev_release, }; @@ -2077,7 +1997,7 @@ gadgetfs_fill_super (struct super_block *sb, void *opts, int silent) goto Enomem; dev->sb = sb; - dev->dentry = gadgetfs_create_file(sb, CHIP, dev, &dev_init_operations); + dev->dentry = gadgetfs_create_file(sb, CHIP, dev, &ep0_operations); if (!dev->dentry) { put_dev(dev); goto Enomem; diff --git a/drivers/usb/gadget/legacy/tcm_usb_gadget.c b/drivers/usb/gadget/legacy/tcm_usb_gadget.c index 3a494168661e..6e0a019aad54 100644 --- a/drivers/usb/gadget/legacy/tcm_usb_gadget.c +++ b/drivers/usb/gadget/legacy/tcm_usb_gadget.c @@ -1740,10 +1740,9 @@ static int tcm_usbg_make_nexus(struct usbg_tpg *tpg, char *name) goto err_session; } /* - * Now register the TCM vHost virtual I_T Nexus as active with the - * call to __transport_register_session() + * Now register the TCM vHost virtual I_T Nexus as active. */ - __transport_register_session(se_tpg, tv_nexus->tvn_se_sess->se_node_acl, + transport_register_session(se_tpg, tv_nexus->tvn_se_sess->se_node_acl, tv_nexus->tvn_se_sess, tv_nexus); tpg->tpg_nexus = tv_nexus; mutex_unlock(&tpg->tpg_mutex); diff --git a/drivers/usb/gadget/legacy/zero.c b/drivers/usb/gadget/legacy/zero.c index ff97ac93ac03..5ee95152493c 100644 --- a/drivers/usb/gadget/legacy/zero.c +++ b/drivers/usb/gadget/legacy/zero.c @@ -68,8 +68,6 @@ static struct usb_zero_options gzero_options = { .isoc_maxpacket = GZERO_ISOC_MAXPACKET, .bulk_buflen = GZERO_BULK_BUFLEN, .qlen = GZERO_QLEN, - .int_interval = GZERO_INT_INTERVAL, - .int_maxpacket = GZERO_INT_MAXPACKET, }; /*-------------------------------------------------------------------------*/ @@ -268,21 +266,6 @@ module_param_named(isoc_maxburst, gzero_options.isoc_maxburst, uint, S_IRUGO|S_IWUSR); MODULE_PARM_DESC(isoc_maxburst, "0 - 15 (ss only)"); -module_param_named(int_interval, gzero_options.int_interval, uint, - S_IRUGO|S_IWUSR); -MODULE_PARM_DESC(int_interval, "1 - 16"); - -module_param_named(int_maxpacket, gzero_options.int_maxpacket, uint, - S_IRUGO|S_IWUSR); -MODULE_PARM_DESC(int_maxpacket, "0 - 1023 (fs), 0 - 1024 (hs/ss)"); - -module_param_named(int_mult, gzero_options.int_mult, uint, S_IRUGO|S_IWUSR); -MODULE_PARM_DESC(int_mult, "0 - 2 (hs/ss only)"); - -module_param_named(int_maxburst, gzero_options.int_maxburst, uint, - S_IRUGO|S_IWUSR); -MODULE_PARM_DESC(int_maxburst, "0 - 15 (ss only)"); - static struct usb_function *func_lb; static struct usb_function_instance *func_inst_lb; @@ -318,10 +301,6 @@ static int __init zero_bind(struct usb_composite_dev *cdev) ss_opts->isoc_maxpacket = gzero_options.isoc_maxpacket; ss_opts->isoc_mult = gzero_options.isoc_mult; ss_opts->isoc_maxburst = gzero_options.isoc_maxburst; - ss_opts->int_interval = gzero_options.int_interval; - ss_opts->int_maxpacket = gzero_options.int_maxpacket; - ss_opts->int_mult = gzero_options.int_mult; - ss_opts->int_maxburst = gzero_options.int_maxburst; ss_opts->bulk_buflen = gzero_options.bulk_buflen; func_ss = usb_get_function(func_inst_ss); diff --git a/drivers/usb/host/ehci-atmel.c b/drivers/usb/host/ehci-atmel.c index 663f7908b15c..be0964a801e8 100644 --- a/drivers/usb/host/ehci-atmel.c +++ b/drivers/usb/host/ehci-atmel.c @@ -34,7 +34,6 @@ static const char hcd_name[] = "ehci-atmel"; struct atmel_ehci_priv { struct clk *iclk; - struct clk *fclk; struct clk *uclk; bool clocked; }; @@ -51,12 +50,9 @@ static void atmel_start_clock(struct atmel_ehci_priv *atmel_ehci) { if (atmel_ehci->clocked) return; - if (IS_ENABLED(CONFIG_COMMON_CLK)) { - clk_set_rate(atmel_ehci->uclk, 48000000); - clk_prepare_enable(atmel_ehci->uclk); - } + + clk_prepare_enable(atmel_ehci->uclk); clk_prepare_enable(atmel_ehci->iclk); - clk_prepare_enable(atmel_ehci->fclk); atmel_ehci->clocked = true; } @@ -64,10 +60,9 @@ static void atmel_stop_clock(struct atmel_ehci_priv *atmel_ehci) { if (!atmel_ehci->clocked) return; - clk_disable_unprepare(atmel_ehci->fclk); + clk_disable_unprepare(atmel_ehci->iclk); - if (IS_ENABLED(CONFIG_COMMON_CLK)) - clk_disable_unprepare(atmel_ehci->uclk); + clk_disable_unprepare(atmel_ehci->uclk); atmel_ehci->clocked = false; } @@ -146,20 +141,13 @@ static int ehci_atmel_drv_probe(struct platform_device *pdev) retval = -ENOENT; goto fail_request_resource; } - atmel_ehci->fclk = devm_clk_get(&pdev->dev, "uhpck"); - if (IS_ERR(atmel_ehci->fclk)) { - dev_err(&pdev->dev, "Error getting function clock\n"); - retval = -ENOENT; + + atmel_ehci->uclk = devm_clk_get(&pdev->dev, "usb_clk"); + if (IS_ERR(atmel_ehci->uclk)) { + dev_err(&pdev->dev, "failed to get uclk\n"); + retval = PTR_ERR(atmel_ehci->uclk); goto fail_request_resource; } - if (IS_ENABLED(CONFIG_COMMON_CLK)) { - atmel_ehci->uclk = devm_clk_get(&pdev->dev, "usb_clk"); - if (IS_ERR(atmel_ehci->uclk)) { - dev_err(&pdev->dev, "failed to get uclk\n"); - retval = PTR_ERR(atmel_ehci->uclk); - goto fail_request_resource; - } - } ehci = hcd_to_ehci(hcd); /* registers start at offset 0x0 */ diff --git a/drivers/usb/host/xhci-hub.c b/drivers/usb/host/xhci-hub.c index a7865c4b0498..0827d7c96527 100644 --- a/drivers/usb/host/xhci-hub.c +++ b/drivers/usb/host/xhci-hub.c @@ -387,6 +387,10 @@ static void xhci_clear_port_change_bit(struct xhci_hcd *xhci, u16 wValue, status = PORT_PLC; port_change_bit = "link state"; break; + case USB_PORT_FEAT_C_PORT_CONFIG_ERROR: + status = PORT_CEC; + port_change_bit = "config error"; + break; default: /* Should never happen */ return; @@ -588,6 +592,8 @@ static u32 xhci_get_port_status(struct usb_hcd *hcd, status |= USB_PORT_STAT_C_LINK_STATE << 16; if ((raw_port_status & PORT_WRC)) status |= USB_PORT_STAT_C_BH_RESET << 16; + if ((raw_port_status & PORT_CEC)) + status |= USB_PORT_STAT_C_CONFIG_ERROR << 16; } if (hcd->speed != HCD_USB3) { @@ -1005,6 +1011,7 @@ int xhci_hub_control(struct usb_hcd *hcd, u16 typeReq, u16 wValue, case USB_PORT_FEAT_C_OVER_CURRENT: case USB_PORT_FEAT_C_ENABLE: case USB_PORT_FEAT_C_PORT_LINK_STATE: + case USB_PORT_FEAT_C_PORT_CONFIG_ERROR: xhci_clear_port_change_bit(xhci, wValue, wIndex, port_array[wIndex], temp); break; @@ -1069,7 +1076,7 @@ int xhci_hub_status_data(struct usb_hcd *hcd, char *buf) */ status = bus_state->resuming_ports; - mask = PORT_CSC | PORT_PEC | PORT_OCC | PORT_PLC | PORT_WRC; + mask = PORT_CSC | PORT_PEC | PORT_OCC | PORT_PLC | PORT_WRC | PORT_CEC; spin_lock_irqsave(&xhci->lock, flags); /* For each port, did anything change? If so, set that bit in buf. */ diff --git a/drivers/usb/host/xhci-pci.c b/drivers/usb/host/xhci-pci.c index 7f76c8a12f89..2af32e26fafc 100644 --- a/drivers/usb/host/xhci-pci.c +++ b/drivers/usb/host/xhci-pci.c @@ -37,6 +37,9 @@ #define PCI_DEVICE_ID_INTEL_LYNXPOINT_XHCI 0x8c31 #define PCI_DEVICE_ID_INTEL_LYNXPOINT_LP_XHCI 0x9c31 +#define PCI_DEVICE_ID_INTEL_CHERRYVIEW_XHCI 0x22b5 +#define PCI_DEVICE_ID_INTEL_SUNRISEPOINT_H_XHCI 0xa12f +#define PCI_DEVICE_ID_INTEL_SUNRISEPOINT_LP_XHCI 0x9d2f static const char hcd_name[] = "xhci_hcd"; @@ -112,6 +115,7 @@ static void xhci_pci_quirks(struct device *dev, struct xhci_hcd *xhci) if (pdev->vendor == PCI_VENDOR_ID_INTEL) { xhci->quirks |= XHCI_LPM_SUPPORT; xhci->quirks |= XHCI_INTEL_HOST; + xhci->quirks |= XHCI_AVOID_BEI; } if (pdev->vendor == PCI_VENDOR_ID_INTEL && pdev->device == PCI_DEVICE_ID_INTEL_PANTHERPOINT_XHCI) { @@ -127,12 +131,17 @@ static void xhci_pci_quirks(struct device *dev, struct xhci_hcd *xhci) * PPT chipsets. */ xhci->quirks |= XHCI_SPURIOUS_REBOOT; - xhci->quirks |= XHCI_AVOID_BEI; } if (pdev->vendor == PCI_VENDOR_ID_INTEL && pdev->device == PCI_DEVICE_ID_INTEL_LYNXPOINT_LP_XHCI) { xhci->quirks |= XHCI_SPURIOUS_REBOOT; } + if (pdev->vendor == PCI_VENDOR_ID_INTEL && + (pdev->device == PCI_DEVICE_ID_INTEL_SUNRISEPOINT_LP_XHCI || + pdev->device == PCI_DEVICE_ID_INTEL_SUNRISEPOINT_H_XHCI || + pdev->device == PCI_DEVICE_ID_INTEL_CHERRYVIEW_XHCI)) { + xhci->quirks |= XHCI_PME_STUCK_QUIRK; + } if (pdev->vendor == PCI_VENDOR_ID_ETRON && pdev->device == PCI_DEVICE_ID_EJ168) { xhci->quirks |= XHCI_RESET_ON_RESUME; @@ -159,6 +168,21 @@ static void xhci_pci_quirks(struct device *dev, struct xhci_hcd *xhci) "QUIRK: Resetting on resume"); } +/* + * Make sure PME works on some Intel xHCI controllers by writing 1 to clear + * the Internal PME flag bit in vendor specific PMCTRL register at offset 0x80a4 + */ +static void xhci_pme_quirk(struct xhci_hcd *xhci) +{ + u32 val; + void __iomem *reg; + + reg = (void __iomem *) xhci->cap_regs + 0x80a4; + val = readl(reg); + writel(val | BIT(28), reg); + readl(reg); +} + /* called during probe() after chip reset completes */ static int xhci_pci_setup(struct usb_hcd *hcd) { @@ -283,6 +307,9 @@ static int xhci_pci_suspend(struct usb_hcd *hcd, bool do_wakeup) if (xhci->quirks & XHCI_COMP_MODE_QUIRK) pdev->no_d3cold = true; + if (xhci->quirks & XHCI_PME_STUCK_QUIRK) + xhci_pme_quirk(xhci); + return xhci_suspend(xhci, do_wakeup); } @@ -313,6 +340,9 @@ static int xhci_pci_resume(struct usb_hcd *hcd, bool hibernated) if (pdev->vendor == PCI_VENDOR_ID_INTEL) usb_enable_intel_xhci_ports(pdev); + if (xhci->quirks & XHCI_PME_STUCK_QUIRK) + xhci_pme_quirk(xhci); + retval = xhci_resume(xhci, hibernated); return retval; } diff --git a/drivers/usb/host/xhci-plat.c b/drivers/usb/host/xhci-plat.c index 08d402b15482..0e11d61408ff 100644 --- a/drivers/usb/host/xhci-plat.c +++ b/drivers/usb/host/xhci-plat.c @@ -83,16 +83,6 @@ static int xhci_plat_probe(struct platform_device *pdev) if (irq < 0) return -ENODEV; - - if (of_device_is_compatible(pdev->dev.of_node, - "marvell,armada-375-xhci") || - of_device_is_compatible(pdev->dev.of_node, - "marvell,armada-380-xhci")) { - ret = xhci_mvebu_mbus_init_quirk(pdev); - if (ret) - return ret; - } - /* Initialize dma_mask and coherent_dma_mask to 32-bits */ ret = dma_set_coherent_mask(&pdev->dev, DMA_BIT_MASK(32)); if (ret) @@ -127,6 +117,15 @@ static int xhci_plat_probe(struct platform_device *pdev) goto put_hcd; } + if (of_device_is_compatible(pdev->dev.of_node, + "marvell,armada-375-xhci") || + of_device_is_compatible(pdev->dev.of_node, + "marvell,armada-380-xhci")) { + ret = xhci_mvebu_mbus_init_quirk(pdev); + if (ret) + goto disable_clk; + } + ret = usb_add_hcd(hcd, irq, IRQF_SHARED); if (ret) goto disable_clk; diff --git a/drivers/usb/host/xhci-ring.c b/drivers/usb/host/xhci-ring.c index 88da8d629820..73485fa4372f 100644 --- a/drivers/usb/host/xhci-ring.c +++ b/drivers/usb/host/xhci-ring.c @@ -1946,7 +1946,7 @@ static int process_ctrl_td(struct xhci_hcd *xhci, struct xhci_td *td, if (event_trb != ep_ring->dequeue) { /* The event was for the status stage */ if (event_trb == td->last_trb) { - if (td->urb->actual_length != 0) { + if (td->urb_length_set) { /* Don't overwrite a previously set error code */ if ((*status == -EINPROGRESS || *status == 0) && @@ -1960,7 +1960,13 @@ static int process_ctrl_td(struct xhci_hcd *xhci, struct xhci_td *td, td->urb->transfer_buffer_length; } } else { - /* Maybe the event was for the data stage? */ + /* + * Maybe the event was for the data stage? If so, update + * already the actual_length of the URB and flag it as + * set, so that it is not overwritten in the event for + * the last TRB. + */ + td->urb_length_set = true; td->urb->actual_length = td->urb->transfer_buffer_length - EVENT_TRB_LEN(le32_to_cpu(event->transfer_len)); diff --git a/drivers/usb/host/xhci.h b/drivers/usb/host/xhci.h index 974514762a14..8e421b89632d 100644 --- a/drivers/usb/host/xhci.h +++ b/drivers/usb/host/xhci.h @@ -1,3 +1,4 @@ + /* * xHCI host controller driver * @@ -88,9 +89,10 @@ struct xhci_cap_regs { #define HCS_IST(p) (((p) >> 0) & 0xf) /* bits 4:7, max number of Event Ring segments */ #define HCS_ERST_MAX(p) (((p) >> 4) & 0xf) +/* bits 21:25 Hi 5 bits of Scratchpad buffers SW must allocate for the HW */ /* bit 26 Scratchpad restore - for save/restore HW state - not used yet */ -/* bits 27:31 number of Scratchpad buffers SW must allocate for the HW */ -#define HCS_MAX_SCRATCHPAD(p) (((p) >> 27) & 0x1f) +/* bits 27:31 Lo 5 bits of Scratchpad buffers SW must allocate for the HW */ +#define HCS_MAX_SCRATCHPAD(p) ((((p) >> 16) & 0x3e0) | (((p) >> 27) & 0x1f)) /* HCSPARAMS3 - hcs_params3 - bitmasks */ /* bits 0:7, Max U1 to U0 latency for the roothub ports */ @@ -1288,6 +1290,8 @@ struct xhci_td { struct xhci_segment *start_seg; union xhci_trb *first_trb; union xhci_trb *last_trb; + /* actual_length of the URB has already been set */ + bool urb_length_set; }; /* xHCI command default timeout value */ @@ -1560,6 +1564,7 @@ struct xhci_hcd { #define XHCI_SPURIOUS_WAKEUP (1 << 18) /* For controllers with a broken beyond repair streams implementation */ #define XHCI_BROKEN_STREAMS (1 << 19) +#define XHCI_PME_STUCK_QUIRK (1 << 20) unsigned int num_active_eps; unsigned int limit_active_eps; /* There are two roothubs to keep track of bus suspend info for */ diff --git a/drivers/usb/isp1760/isp1760-core.c b/drivers/usb/isp1760/isp1760-core.c index b9827556455f..bfa402cf3a27 100644 --- a/drivers/usb/isp1760/isp1760-core.c +++ b/drivers/usb/isp1760/isp1760-core.c @@ -151,8 +151,7 @@ int isp1760_register(struct resource *mem, int irq, unsigned long irqflags, } if (IS_ENABLED(CONFIG_USB_ISP1761_UDC) && !udc_disabled) { - ret = isp1760_udc_register(isp, irq, irqflags | IRQF_SHARED | - IRQF_DISABLED); + ret = isp1760_udc_register(isp, irq, irqflags); if (ret < 0) { isp1760_hcd_unregister(&isp->hcd); return ret; diff --git a/drivers/usb/isp1760/isp1760-hcd.c b/drivers/usb/isp1760/isp1760-hcd.c index eba9b82e2d70..3cb98b1d5d29 100644 --- a/drivers/usb/isp1760/isp1760-hcd.c +++ b/drivers/usb/isp1760/isp1760-hcd.c @@ -1274,7 +1274,7 @@ static void errata2_function(unsigned long data) for (slot = 0; slot < 32; slot++) if (priv->atl_slots[slot].qh && time_after(jiffies, priv->atl_slots[slot].timestamp + - SLOT_TIMEOUT * HZ / 1000)) { + msecs_to_jiffies(SLOT_TIMEOUT))) { ptd_read(hcd->regs, ATL_PTD_OFFSET, slot, &ptd); if (!FROM_DW0_VALID(ptd.dw0) && !FROM_DW3_ACTIVE(ptd.dw3)) @@ -1286,7 +1286,7 @@ static void errata2_function(unsigned long data) spin_unlock_irqrestore(&priv->lock, spinflags); - errata2_timer.expires = jiffies + SLOT_CHECK_PERIOD * HZ / 1000; + errata2_timer.expires = jiffies + msecs_to_jiffies(SLOT_CHECK_PERIOD); add_timer(&errata2_timer); } @@ -1336,7 +1336,7 @@ static int isp1760_run(struct usb_hcd *hcd) return retval; setup_timer(&errata2_timer, errata2_function, (unsigned long)hcd); - errata2_timer.expires = jiffies + SLOT_CHECK_PERIOD * HZ / 1000; + errata2_timer.expires = jiffies + msecs_to_jiffies(SLOT_CHECK_PERIOD); add_timer(&errata2_timer); chipid = reg_read32(hcd->regs, HC_CHIP_ID_REG); diff --git a/drivers/usb/isp1760/isp1760-udc.c b/drivers/usb/isp1760/isp1760-udc.c index 9612d7990565..3fc4fe770253 100644 --- a/drivers/usb/isp1760/isp1760-udc.c +++ b/drivers/usb/isp1760/isp1760-udc.c @@ -1191,6 +1191,7 @@ static int isp1760_udc_start(struct usb_gadget *gadget, struct usb_gadget_driver *driver) { struct isp1760_udc *udc = gadget_to_udc(gadget); + unsigned long flags; /* The hardware doesn't support low speed. */ if (driver->max_speed < USB_SPEED_FULL) { @@ -1198,17 +1199,17 @@ static int isp1760_udc_start(struct usb_gadget *gadget, return -EINVAL; } - spin_lock(&udc->lock); + spin_lock_irqsave(&udc->lock, flags); if (udc->driver) { dev_err(udc->isp->dev, "UDC already has a gadget driver\n"); - spin_unlock(&udc->lock); + spin_unlock_irqrestore(&udc->lock, flags); return -EBUSY; } udc->driver = driver; - spin_unlock(&udc->lock); + spin_unlock_irqrestore(&udc->lock, flags); dev_dbg(udc->isp->dev, "starting UDC with driver %s\n", driver->function); @@ -1232,6 +1233,7 @@ static int isp1760_udc_start(struct usb_gadget *gadget, static int isp1760_udc_stop(struct usb_gadget *gadget) { struct isp1760_udc *udc = gadget_to_udc(gadget); + unsigned long flags; dev_dbg(udc->isp->dev, "%s\n", __func__); @@ -1239,9 +1241,9 @@ static int isp1760_udc_stop(struct usb_gadget *gadget) isp1760_udc_write(udc, DC_MODE, 0); - spin_lock(&udc->lock); + spin_lock_irqsave(&udc->lock, flags); udc->driver = NULL; - spin_unlock(&udc->lock); + spin_unlock_irqrestore(&udc->lock, flags); return 0; } @@ -1411,7 +1413,7 @@ static int isp1760_udc_init(struct isp1760_udc *udc) return -ENODEV; } - if (chipid != 0x00011582) { + if (chipid != 0x00011582 && chipid != 0x00158210) { dev_err(udc->isp->dev, "udc: invalid chip ID 0x%08x\n", chipid); return -ENODEV; } @@ -1451,8 +1453,8 @@ int isp1760_udc_register(struct isp1760_device *isp, int irq, sprintf(udc->irqname, "%s (udc)", devname); - ret = request_irq(irq, isp1760_udc_irq, IRQF_SHARED | IRQF_DISABLED | - irqflags, udc->irqname, udc); + ret = request_irq(irq, isp1760_udc_irq, IRQF_SHARED | irqflags, + udc->irqname, udc); if (ret < 0) goto error; diff --git a/drivers/usb/musb/Kconfig b/drivers/usb/musb/Kconfig index 14e1628483d9..39db8b603627 100644 --- a/drivers/usb/musb/Kconfig +++ b/drivers/usb/musb/Kconfig @@ -79,7 +79,8 @@ config USB_MUSB_TUSB6010 config USB_MUSB_OMAP2PLUS tristate "OMAP2430 and onwards" - depends on ARCH_OMAP2PLUS && USB && OMAP_CONTROL_PHY + depends on ARCH_OMAP2PLUS && USB + depends on OMAP_CONTROL_PHY || !OMAP_CONTROL_PHY select GENERIC_PHY config USB_MUSB_AM35X diff --git a/drivers/usb/musb/musb_core.c b/drivers/usb/musb/musb_core.c index e6f4cbfeed97..067920f2d570 100644 --- a/drivers/usb/musb/musb_core.c +++ b/drivers/usb/musb/musb_core.c @@ -1969,10 +1969,6 @@ musb_init_controller(struct device *dev, int nIrq, void __iomem *ctrl) goto fail0; } - pm_runtime_use_autosuspend(musb->controller); - pm_runtime_set_autosuspend_delay(musb->controller, 200); - pm_runtime_enable(musb->controller); - spin_lock_init(&musb->lock); musb->board_set_power = plat->set_power; musb->min_power = plat->min_power; @@ -1991,6 +1987,12 @@ musb_init_controller(struct device *dev, int nIrq, void __iomem *ctrl) musb_readl = musb_default_readl; musb_writel = musb_default_writel; + /* We need musb_read/write functions initialized for PM */ + pm_runtime_use_autosuspend(musb->controller); + pm_runtime_set_autosuspend_delay(musb->controller, 200); + pm_runtime_irq_safe(musb->controller); + pm_runtime_enable(musb->controller); + /* The musb_platform_init() call: * - adjusts musb->mregs * - sets the musb->isr diff --git a/drivers/usb/musb/musb_dsps.c b/drivers/usb/musb/musb_dsps.c index 53bd0e71d19f..a900c9877195 100644 --- a/drivers/usb/musb/musb_dsps.c +++ b/drivers/usb/musb/musb_dsps.c @@ -457,12 +457,27 @@ static int dsps_musb_init(struct musb *musb) if (IS_ERR(musb->xceiv)) return PTR_ERR(musb->xceiv); + musb->phy = devm_phy_get(dev->parent, "usb2-phy"); + /* Returns zero if e.g. not clocked */ rev = dsps_readl(reg_base, wrp->revision); if (!rev) return -ENODEV; usb_phy_init(musb->xceiv); + if (IS_ERR(musb->phy)) { + musb->phy = NULL; + } else { + ret = phy_init(musb->phy); + if (ret < 0) + return ret; + ret = phy_power_on(musb->phy); + if (ret) { + phy_exit(musb->phy); + return ret; + } + } + setup_timer(&glue->timer, otg_timer, (unsigned long) musb); /* Reset the musb */ @@ -502,6 +517,8 @@ static int dsps_musb_exit(struct musb *musb) del_timer_sync(&glue->timer); usb_phy_shutdown(musb->xceiv); + phy_power_off(musb->phy); + phy_exit(musb->phy); debugfs_remove_recursive(glue->dbgfs_root); return 0; @@ -610,7 +627,7 @@ static int dsps_musb_reset(struct musb *musb) struct device *dev = musb->controller; struct dsps_glue *glue = dev_get_drvdata(dev->parent); const struct dsps_musb_wrapper *wrp = glue->wrp; - int session_restart = 0; + int session_restart = 0, error; if (glue->sw_babble_enabled) session_restart = sw_babble_control(musb); @@ -624,8 +641,14 @@ static int dsps_musb_reset(struct musb *musb) dsps_writel(musb->ctrl_base, wrp->control, (1 << wrp->reset)); usleep_range(100, 200); usb_phy_shutdown(musb->xceiv); + error = phy_power_off(musb->phy); + if (error) + dev_err(dev, "phy shutdown failed: %i\n", error); usleep_range(100, 200); usb_phy_init(musb->xceiv); + error = phy_power_on(musb->phy); + if (error) + dev_err(dev, "phy powerup failed: %i\n", error); session_restart = 1; } @@ -687,7 +710,7 @@ static int dsps_create_musb_pdev(struct dsps_glue *glue, struct musb_hdrc_config *config; struct platform_device *musb; struct device_node *dn = parent->dev.of_node; - int ret; + int ret, val; memset(resources, 0, sizeof(resources)); res = platform_get_resource_byname(parent, IORESOURCE_MEM, "mc"); @@ -739,7 +762,10 @@ static int dsps_create_musb_pdev(struct dsps_glue *glue, pdata.mode = get_musb_port_mode(dev); /* DT keeps this entry in mA, musb expects it as per USB spec */ pdata.power = get_int_prop(dn, "mentor,power") / 2; - config->multipoint = of_property_read_bool(dn, "mentor,multipoint"); + + ret = of_property_read_u32(dn, "mentor,multipoint", &val); + if (!ret && val) + config->multipoint = true; ret = platform_device_add_data(musb, &pdata, sizeof(pdata)); if (ret) { diff --git a/drivers/usb/musb/musb_host.c b/drivers/usb/musb/musb_host.c index 883a9adfdfff..c3d5fc9dfb5b 100644 --- a/drivers/usb/musb/musb_host.c +++ b/drivers/usb/musb/musb_host.c @@ -2613,7 +2613,7 @@ static const struct hc_driver musb_hc_driver = { .description = "musb-hcd", .product_desc = "MUSB HDRC host driver", .hcd_priv_size = sizeof(struct musb *), - .flags = HCD_USB2 | HCD_MEMORY, + .flags = HCD_USB2 | HCD_MEMORY | HCD_BH, /* not using irq handler or reset hooks from usbcore, since * those must be shared with peripheral code for OTG configs diff --git a/drivers/usb/musb/omap2430.c b/drivers/usb/musb/omap2430.c index 763649eb4987..cc752d8c7773 100644 --- a/drivers/usb/musb/omap2430.c +++ b/drivers/usb/musb/omap2430.c @@ -516,7 +516,7 @@ static int omap2430_probe(struct platform_device *pdev) struct omap2430_glue *glue; struct device_node *np = pdev->dev.of_node; struct musb_hdrc_config *config; - int ret = -ENOMEM; + int ret = -ENOMEM, val; glue = devm_kzalloc(&pdev->dev, sizeof(*glue), GFP_KERNEL); if (!glue) @@ -559,7 +559,10 @@ static int omap2430_probe(struct platform_device *pdev) of_property_read_u32(np, "num-eps", (u32 *)&config->num_eps); of_property_read_u32(np, "ram-bits", (u32 *)&config->ram_bits); of_property_read_u32(np, "power", (u32 *)&pdata->power); - config->multipoint = of_property_read_bool(np, "multipoint"); + + ret = of_property_read_u32(np, "multipoint", &val); + if (!ret && val) + config->multipoint = true; pdata->board_data = data; pdata->config = config; diff --git a/drivers/usb/phy/phy-am335x-control.c b/drivers/usb/phy/phy-am335x-control.c index 403fab772724..7b3035ff9434 100644 --- a/drivers/usb/phy/phy-am335x-control.c +++ b/drivers/usb/phy/phy-am335x-control.c @@ -126,6 +126,9 @@ struct phy_control *am335x_get_phy_control(struct device *dev) return NULL; dev = bus_find_device(&platform_bus_type, NULL, node, match); + if (!dev) + return NULL; + ctrl_usb = dev_get_drvdata(dev); if (!ctrl_usb) return NULL; diff --git a/drivers/usb/renesas_usbhs/Kconfig b/drivers/usb/renesas_usbhs/Kconfig index de83b9d0cd5c..ebc99ee076ce 100644 --- a/drivers/usb/renesas_usbhs/Kconfig +++ b/drivers/usb/renesas_usbhs/Kconfig @@ -6,6 +6,7 @@ config USB_RENESAS_USBHS tristate 'Renesas USBHS controller' depends on USB_GADGET depends on ARCH_SHMOBILE || SUPERH || COMPILE_TEST + depends on EXTCON || !EXTCON # if EXTCON=m, USBHS cannot be built-in default n help Renesas USBHS is a discrete USB host and peripheral controller chip diff --git a/drivers/usb/serial/bus.c b/drivers/usb/serial/bus.c index 9374bd2aba20..8936a83c96cd 100644 --- a/drivers/usb/serial/bus.c +++ b/drivers/usb/serial/bus.c @@ -38,56 +38,51 @@ static int usb_serial_device_match(struct device *dev, return 0; } -static ssize_t port_number_show(struct device *dev, - struct device_attribute *attr, char *buf) -{ - struct usb_serial_port *port = to_usb_serial_port(dev); - - return sprintf(buf, "%d\n", port->port_number); -} -static DEVICE_ATTR_RO(port_number); - static int usb_serial_device_probe(struct device *dev) { struct usb_serial_driver *driver; struct usb_serial_port *port; + struct device *tty_dev; int retval = 0; int minor; port = to_usb_serial_port(dev); - if (!port) { - retval = -ENODEV; - goto exit; - } + if (!port) + return -ENODEV; /* make sure suspend/resume doesn't race against port_probe */ retval = usb_autopm_get_interface(port->serial->interface); if (retval) - goto exit; + return retval; driver = port->serial->type; if (driver->port_probe) { retval = driver->port_probe(port); if (retval) - goto exit_with_autopm; + goto err_autopm_put; } - retval = device_create_file(dev, &dev_attr_port_number); - if (retval) { - if (driver->port_remove) - retval = driver->port_remove(port); - goto exit_with_autopm; + minor = port->minor; + tty_dev = tty_register_device(usb_serial_tty_driver, minor, dev); + if (IS_ERR(tty_dev)) { + retval = PTR_ERR(tty_dev); + goto err_port_remove; } - minor = port->minor; - tty_register_device(usb_serial_tty_driver, minor, dev); + usb_autopm_put_interface(port->serial->interface); + dev_info(&port->serial->dev->dev, "%s converter now attached to ttyUSB%d\n", driver->description, minor); -exit_with_autopm: + return 0; + +err_port_remove: + if (driver->port_remove) + driver->port_remove(port); +err_autopm_put: usb_autopm_put_interface(port->serial->interface); -exit: + return retval; } @@ -114,8 +109,6 @@ static int usb_serial_device_remove(struct device *dev) minor = port->minor; tty_unregister_device(usb_serial_tty_driver, minor); - device_remove_file(&port->dev, &dev_attr_port_number); - driver = port->serial->type; if (driver->port_remove) retval = driver->port_remove(port); diff --git a/drivers/usb/serial/ch341.c b/drivers/usb/serial/ch341.c index 2d72aa3564a3..ede4f5fcfadd 100644 --- a/drivers/usb/serial/ch341.c +++ b/drivers/usb/serial/ch341.c @@ -84,6 +84,10 @@ struct ch341_private { u8 line_status; /* active status of modem control inputs */ }; +static void ch341_set_termios(struct tty_struct *tty, + struct usb_serial_port *port, + struct ktermios *old_termios); + static int ch341_control_out(struct usb_device *dev, u8 request, u16 value, u16 index) { @@ -309,19 +313,12 @@ static int ch341_open(struct tty_struct *tty, struct usb_serial_port *port) struct ch341_private *priv = usb_get_serial_port_data(port); int r; - priv->baud_rate = DEFAULT_BAUD_RATE; - r = ch341_configure(serial->dev, priv); if (r) goto out; - r = ch341_set_handshake(serial->dev, priv->line_control); - if (r) - goto out; - - r = ch341_set_baudrate(serial->dev, priv); - if (r) - goto out; + if (tty) + ch341_set_termios(tty, port, NULL); dev_dbg(&port->dev, "%s - submitting interrupt urb\n", __func__); r = usb_submit_urb(port->interrupt_in_urb, GFP_KERNEL); diff --git a/drivers/usb/serial/console.c b/drivers/usb/serial/console.c index 29fa1c3d0089..3806e7014199 100644 --- a/drivers/usb/serial/console.c +++ b/drivers/usb/serial/console.c @@ -14,6 +14,7 @@ #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt #include <linux/kernel.h> +#include <linux/module.h> #include <linux/slab.h> #include <linux/tty.h> #include <linux/console.h> @@ -144,6 +145,7 @@ static int usb_console_setup(struct console *co, char *options) init_ldsem(&tty->ldisc_sem); INIT_LIST_HEAD(&tty->tty_files); kref_get(&tty->driver->kref); + __module_get(tty->driver->owner); tty->ops = &usb_console_fake_tty_ops; if (tty_init_termios(tty)) { retval = -ENOMEM; diff --git a/drivers/usb/serial/cp210x.c b/drivers/usb/serial/cp210x.c index f40c856ff758..84ce2d74894c 100644 --- a/drivers/usb/serial/cp210x.c +++ b/drivers/usb/serial/cp210x.c @@ -147,6 +147,8 @@ static const struct usb_device_id id_table[] = { { USB_DEVICE(0x166A, 0x0305) }, /* Clipsal C-5000CT2 C-Bus Spectrum Colour Touchscreen */ { USB_DEVICE(0x166A, 0x0401) }, /* Clipsal L51xx C-Bus Architectural Dimmer */ { USB_DEVICE(0x166A, 0x0101) }, /* Clipsal 5560884 C-Bus Multi-room Audio Matrix Switcher */ + { USB_DEVICE(0x16C0, 0x09B0) }, /* Lunatico Seletek */ + { USB_DEVICE(0x16C0, 0x09B1) }, /* Lunatico Seletek */ { USB_DEVICE(0x16D6, 0x0001) }, /* Jablotron serial interface */ { USB_DEVICE(0x16DC, 0x0010) }, /* W-IE-NE-R Plein & Baus GmbH PL512 Power Supply */ { USB_DEVICE(0x16DC, 0x0011) }, /* W-IE-NE-R Plein & Baus GmbH RCM Remote Control for MARATON Power Supply */ diff --git a/drivers/usb/serial/ftdi_sio.c b/drivers/usb/serial/ftdi_sio.c index 1ebb351b9e9a..8eb68a31cab6 100644 --- a/drivers/usb/serial/ftdi_sio.c +++ b/drivers/usb/serial/ftdi_sio.c @@ -604,6 +604,7 @@ static const struct usb_device_id id_table_combined[] = { .driver_info = (kernel_ulong_t)&ftdi_jtag_quirk }, { USB_DEVICE(FTDI_VID, FTDI_NT_ORIONLXM_PID), .driver_info = (kernel_ulong_t)&ftdi_jtag_quirk }, + { USB_DEVICE(FTDI_VID, FTDI_SYNAPSE_SS200_PID) }, /* * ELV devices: */ @@ -799,6 +800,8 @@ static const struct usb_device_id id_table_combined[] = { { USB_DEVICE(FTDI_VID, FTDI_ELSTER_UNICOM_PID) }, { USB_DEVICE(FTDI_VID, FTDI_PROPOX_JTAGCABLEII_PID) }, { USB_DEVICE(FTDI_VID, FTDI_PROPOX_ISPCABLEIII_PID) }, + { USB_DEVICE(FTDI_VID, CYBER_CORTEX_AV_PID), + .driver_info = (kernel_ulong_t)&ftdi_jtag_quirk }, { USB_DEVICE(OLIMEX_VID, OLIMEX_ARM_USB_OCD_PID), .driver_info = (kernel_ulong_t)&ftdi_jtag_quirk }, { USB_DEVICE(OLIMEX_VID, OLIMEX_ARM_USB_OCD_H_PID), @@ -978,6 +981,23 @@ static const struct usb_device_id id_table_combined[] = { { USB_DEVICE_INTERFACE_NUMBER(INFINEON_VID, INFINEON_TRIBOARD_PID, 1) }, /* GE Healthcare devices */ { USB_DEVICE(GE_HEALTHCARE_VID, GE_HEALTHCARE_NEMO_TRACKER_PID) }, + /* Active Research (Actisense) devices */ + { USB_DEVICE(FTDI_VID, ACTISENSE_NDC_PID) }, + { USB_DEVICE(FTDI_VID, ACTISENSE_USG_PID) }, + { USB_DEVICE(FTDI_VID, ACTISENSE_NGT_PID) }, + { USB_DEVICE(FTDI_VID, ACTISENSE_NGW_PID) }, + { USB_DEVICE(FTDI_VID, ACTISENSE_D9AC_PID) }, + { USB_DEVICE(FTDI_VID, ACTISENSE_D9AD_PID) }, + { USB_DEVICE(FTDI_VID, ACTISENSE_D9AE_PID) }, + { USB_DEVICE(FTDI_VID, ACTISENSE_D9AF_PID) }, + { USB_DEVICE(FTDI_VID, CHETCO_SEAGAUGE_PID) }, + { USB_DEVICE(FTDI_VID, CHETCO_SEASWITCH_PID) }, + { USB_DEVICE(FTDI_VID, CHETCO_SEASMART_NMEA2000_PID) }, + { USB_DEVICE(FTDI_VID, CHETCO_SEASMART_ETHERNET_PID) }, + { USB_DEVICE(FTDI_VID, CHETCO_SEASMART_WIFI_PID) }, + { USB_DEVICE(FTDI_VID, CHETCO_SEASMART_DISPLAY_PID) }, + { USB_DEVICE(FTDI_VID, CHETCO_SEASMART_LITE_PID) }, + { USB_DEVICE(FTDI_VID, CHETCO_SEASMART_ANALOG_PID) }, { } /* Terminating entry */ }; @@ -1864,8 +1884,12 @@ static int ftdi_8u2232c_probe(struct usb_serial *serial) { struct usb_device *udev = serial->dev; - if ((udev->manufacturer && !strcmp(udev->manufacturer, "CALAO Systems")) || - (udev->product && !strcmp(udev->product, "BeagleBone/XDS100V2"))) + if (udev->manufacturer && !strcmp(udev->manufacturer, "CALAO Systems")) + return ftdi_jtag_probe(serial); + + if (udev->product && + (!strcmp(udev->product, "BeagleBone/XDS100V2") || + !strcmp(udev->product, "SNAP Connect E10"))) return ftdi_jtag_probe(serial); return 0; diff --git a/drivers/usb/serial/ftdi_sio_ids.h b/drivers/usb/serial/ftdi_sio_ids.h index e52409c9be99..4e4f46f3c89c 100644 --- a/drivers/usb/serial/ftdi_sio_ids.h +++ b/drivers/usb/serial/ftdi_sio_ids.h @@ -38,6 +38,9 @@ #define FTDI_LUMEL_PD12_PID 0x6002 +/* Cyber Cortex AV by Fabulous Silicon (http://fabuloussilicon.com) */ +#define CYBER_CORTEX_AV_PID 0x8698 + /* * Marvell OpenRD Base, Client * http://www.open-rd.org @@ -558,6 +561,12 @@ */ #define FTDI_NT_ORIONLXM_PID 0x7c90 /* OrionLXm Substation Automation Platform */ +/* + * Synapse Wireless product ids (FTDI_VID) + * http://www.synapse-wireless.com + */ +#define FTDI_SYNAPSE_SS200_PID 0x9090 /* SS200 - SNAP Stick 200 */ + /********************************/ /** third-party VID/PID combos **/ @@ -1438,3 +1447,23 @@ */ #define GE_HEALTHCARE_VID 0x1901 #define GE_HEALTHCARE_NEMO_TRACKER_PID 0x0015 + +/* + * Active Research (Actisense) devices + */ +#define ACTISENSE_NDC_PID 0xD9A8 /* NDC USB Serial Adapter */ +#define ACTISENSE_USG_PID 0xD9A9 /* USG USB Serial Adapter */ +#define ACTISENSE_NGT_PID 0xD9AA /* NGT NMEA2000 Interface */ +#define ACTISENSE_NGW_PID 0xD9AB /* NGW NMEA2000 Gateway */ +#define ACTISENSE_D9AC_PID 0xD9AC /* Actisense Reserved */ +#define ACTISENSE_D9AD_PID 0xD9AD /* Actisense Reserved */ +#define ACTISENSE_D9AE_PID 0xD9AE /* Actisense Reserved */ +#define ACTISENSE_D9AF_PID 0xD9AF /* Actisense Reserved */ +#define CHETCO_SEAGAUGE_PID 0xA548 /* SeaGauge USB Adapter */ +#define CHETCO_SEASWITCH_PID 0xA549 /* SeaSwitch USB Adapter */ +#define CHETCO_SEASMART_NMEA2000_PID 0xA54A /* SeaSmart NMEA2000 Gateway */ +#define CHETCO_SEASMART_ETHERNET_PID 0xA54B /* SeaSmart Ethernet Gateway */ +#define CHETCO_SEASMART_WIFI_PID 0xA5AC /* SeaSmart Wifi Gateway */ +#define CHETCO_SEASMART_DISPLAY_PID 0xA5AD /* SeaSmart NMEA2000 Display */ +#define CHETCO_SEASMART_LITE_PID 0xA5AE /* SeaSmart Lite USB Adapter */ +#define CHETCO_SEASMART_ANALOG_PID 0xA5AF /* SeaSmart Analog Adapter */ diff --git a/drivers/usb/serial/generic.c b/drivers/usb/serial/generic.c index ccf1df7c4b80..54e170dd3dad 100644 --- a/drivers/usb/serial/generic.c +++ b/drivers/usb/serial/generic.c @@ -258,7 +258,8 @@ void usb_serial_generic_wait_until_sent(struct tty_struct *tty, long timeout) * character or at least one jiffy. */ period = max_t(unsigned long, (10 * HZ / bps), 1); - period = min_t(unsigned long, period, timeout); + if (timeout) + period = min_t(unsigned long, period, timeout); dev_dbg(&port->dev, "%s - timeout = %u ms, period = %u ms\n", __func__, jiffies_to_msecs(timeout), @@ -268,7 +269,7 @@ void usb_serial_generic_wait_until_sent(struct tty_struct *tty, long timeout) schedule_timeout_interruptible(period); if (signal_pending(current)) break; - if (time_after(jiffies, expire)) + if (timeout && time_after(jiffies, expire)) break; } } diff --git a/drivers/usb/serial/keyspan_pda.c b/drivers/usb/serial/keyspan_pda.c index dd97d8b572c3..4f7e072e4e00 100644 --- a/drivers/usb/serial/keyspan_pda.c +++ b/drivers/usb/serial/keyspan_pda.c @@ -61,6 +61,7 @@ struct keyspan_pda_private { /* For Xircom PGSDB9 and older Entrega version of the same device */ #define XIRCOM_VENDOR_ID 0x085a #define XIRCOM_FAKE_ID 0x8027 +#define XIRCOM_FAKE_ID_2 0x8025 /* "PGMFHUB" serial */ #define ENTREGA_VENDOR_ID 0x1645 #define ENTREGA_FAKE_ID 0x8093 @@ -70,6 +71,7 @@ static const struct usb_device_id id_table_combined[] = { #endif #ifdef XIRCOM { USB_DEVICE(XIRCOM_VENDOR_ID, XIRCOM_FAKE_ID) }, + { USB_DEVICE(XIRCOM_VENDOR_ID, XIRCOM_FAKE_ID_2) }, { USB_DEVICE(ENTREGA_VENDOR_ID, ENTREGA_FAKE_ID) }, #endif { USB_DEVICE(KEYSPAN_VENDOR_ID, KEYSPAN_PDA_ID) }, @@ -93,6 +95,7 @@ static const struct usb_device_id id_table_fake[] = { #ifdef XIRCOM static const struct usb_device_id id_table_fake_xircom[] = { { USB_DEVICE(XIRCOM_VENDOR_ID, XIRCOM_FAKE_ID) }, + { USB_DEVICE(XIRCOM_VENDOR_ID, XIRCOM_FAKE_ID_2) }, { USB_DEVICE(ENTREGA_VENDOR_ID, ENTREGA_FAKE_ID) }, { } }; diff --git a/drivers/usb/serial/mxuport.c b/drivers/usb/serial/mxuport.c index ab1d690274ae..460a40669967 100644 --- a/drivers/usb/serial/mxuport.c +++ b/drivers/usb/serial/mxuport.c @@ -1284,7 +1284,8 @@ static int mxuport_open(struct tty_struct *tty, struct usb_serial_port *port) } /* Initial port termios */ - mxuport_set_termios(tty, port, NULL); + if (tty) + mxuport_set_termios(tty, port, NULL); /* * TODO: use RQ_VENDOR_GET_MSR, once we know what it diff --git a/drivers/usb/serial/pl2303.c b/drivers/usb/serial/pl2303.c index 0f872e6b2c87..829604d11f3f 100644 --- a/drivers/usb/serial/pl2303.c +++ b/drivers/usb/serial/pl2303.c @@ -132,6 +132,7 @@ MODULE_DEVICE_TABLE(usb, id_table); #define UART_OVERRUN_ERROR 0x40 #define UART_CTS 0x80 +static void pl2303_set_break(struct usb_serial_port *port, bool enable); enum pl2303_type { TYPE_01, /* Type 0 and 1 (difference unknown) */ @@ -615,6 +616,7 @@ static void pl2303_close(struct usb_serial_port *port) { usb_serial_generic_close(port); usb_kill_urb(port->interrupt_in_urb); + pl2303_set_break(port, false); } static int pl2303_open(struct tty_struct *tty, struct usb_serial_port *port) @@ -741,17 +743,16 @@ static int pl2303_ioctl(struct tty_struct *tty, return -ENOIOCTLCMD; } -static void pl2303_break_ctl(struct tty_struct *tty, int break_state) +static void pl2303_set_break(struct usb_serial_port *port, bool enable) { - struct usb_serial_port *port = tty->driver_data; struct usb_serial *serial = port->serial; u16 state; int result; - if (break_state == 0) - state = BREAK_OFF; - else + if (enable) state = BREAK_ON; + else + state = BREAK_OFF; dev_dbg(&port->dev, "%s - turning break %s\n", __func__, state == BREAK_OFF ? "off" : "on"); @@ -763,6 +764,13 @@ static void pl2303_break_ctl(struct tty_struct *tty, int break_state) dev_err(&port->dev, "error sending break = %d\n", result); } +static void pl2303_break_ctl(struct tty_struct *tty, int state) +{ + struct usb_serial_port *port = tty->driver_data; + + pl2303_set_break(port, state); +} + static void pl2303_update_line_status(struct usb_serial_port *port, unsigned char *data, unsigned int actual_length) diff --git a/drivers/usb/serial/usb-serial.c b/drivers/usb/serial/usb-serial.c index 475723c006f9..529066bbc7e8 100644 --- a/drivers/usb/serial/usb-serial.c +++ b/drivers/usb/serial/usb-serial.c @@ -687,6 +687,21 @@ static void serial_port_dtr_rts(struct tty_port *port, int on) drv->dtr_rts(p, on); } +static ssize_t port_number_show(struct device *dev, + struct device_attribute *attr, char *buf) +{ + struct usb_serial_port *port = to_usb_serial_port(dev); + + return sprintf(buf, "%u\n", port->port_number); +} +static DEVICE_ATTR_RO(port_number); + +static struct attribute *usb_serial_port_attrs[] = { + &dev_attr_port_number.attr, + NULL +}; +ATTRIBUTE_GROUPS(usb_serial_port); + static const struct tty_port_operations serial_port_ops = { .carrier_raised = serial_port_carrier_raised, .dtr_rts = serial_port_dtr_rts, @@ -902,6 +917,7 @@ static int usb_serial_probe(struct usb_interface *interface, port->dev.driver = NULL; port->dev.bus = &usb_serial_bus_type; port->dev.release = &usb_serial_port_release; + port->dev.groups = usb_serial_port_groups; device_initialize(&port->dev); } @@ -940,8 +956,9 @@ static int usb_serial_probe(struct usb_interface *interface, port = serial->port[i]; if (kfifo_alloc(&port->write_fifo, PAGE_SIZE, GFP_KERNEL)) goto probe_error; - buffer_size = max_t(int, serial->type->bulk_out_size, - usb_endpoint_maxp(endpoint)); + buffer_size = serial->type->bulk_out_size; + if (!buffer_size) + buffer_size = usb_endpoint_maxp(endpoint); port->bulk_out_size = buffer_size; port->bulk_out_endpointAddress = endpoint->bEndpointAddress; diff --git a/drivers/usb/storage/unusual_uas.h b/drivers/usb/storage/unusual_uas.h index dbc00e56c7f5..c85ea530085f 100644 --- a/drivers/usb/storage/unusual_uas.h +++ b/drivers/usb/storage/unusual_uas.h @@ -113,6 +113,20 @@ UNUSUAL_DEV(0x0bc2, 0xab2a, 0x0000, 0x9999, USB_SC_DEVICE, USB_PR_DEVICE, NULL, US_FL_NO_ATA_1X), +/* Reported-by: Benjamin Tissoires <benjamin.tissoires@redhat.com> */ +UNUSUAL_DEV(0x13fd, 0x3940, 0x0000, 0x9999, + "Initio Corporation", + "", + USB_SC_DEVICE, USB_PR_DEVICE, NULL, + US_FL_NO_ATA_1X), + +/* Reported-by: Tom Arild Naess <tanaess@gmail.com> */ +UNUSUAL_DEV(0x152d, 0x0539, 0x0000, 0x9999, + "JMicron", + "JMS539", + USB_SC_DEVICE, USB_PR_DEVICE, NULL, + US_FL_NO_REPORT_OPCODES), + /* Reported-by: Claudio Bizzarri <claudio.bizzarri@gmail.com> */ UNUSUAL_DEV(0x152d, 0x0567, 0x0000, 0x9999, "JMicron", diff --git a/drivers/usb/storage/usb.c b/drivers/usb/storage/usb.c index d468d02179f4..5600c33fcadb 100644 --- a/drivers/usb/storage/usb.c +++ b/drivers/usb/storage/usb.c @@ -889,6 +889,12 @@ static void usb_stor_scan_dwork(struct work_struct *work) !(us->fflags & US_FL_SCM_MULT_TARG)) { mutex_lock(&us->dev_mutex); us->max_lun = usb_stor_Bulk_max_lun(us); + /* + * Allow proper scanning of devices that present more than 8 LUNs + * While not affecting other devices that may need the previous behavior + */ + if (us->max_lun >= 8) + us_to_host(us)->max_lun = us->max_lun+1; mutex_unlock(&us->dev_mutex); } scsi_scan_host(us_to_host(us)); diff --git a/drivers/vfio/pci/vfio_pci_intrs.c b/drivers/vfio/pci/vfio_pci_intrs.c index f88bfdf5b6a0..2027a27546ef 100644 --- a/drivers/vfio/pci/vfio_pci_intrs.c +++ b/drivers/vfio/pci/vfio_pci_intrs.c @@ -868,12 +868,14 @@ int vfio_pci_set_irqs_ioctl(struct vfio_pci_device *vdev, uint32_t flags, func = vfio_pci_set_err_trigger; break; } + break; case VFIO_PCI_REQ_IRQ_INDEX: switch (flags & VFIO_IRQ_SET_ACTION_TYPE_MASK) { case VFIO_IRQ_SET_ACTION_TRIGGER: func = vfio_pci_set_req_trigger; break; } + break; } if (!func) diff --git a/drivers/vhost/net.c b/drivers/vhost/net.c index afa06d28725d..2bbfc25e582c 100644 --- a/drivers/vhost/net.c +++ b/drivers/vhost/net.c @@ -591,11 +591,6 @@ static void handle_rx(struct vhost_net *net) * TODO: support TSO. */ iov_iter_advance(&msg.msg_iter, vhost_hlen); - } else { - /* It'll come from socket; we'll need to patch - * ->num_buffers over if VIRTIO_NET_F_MRG_RXBUF - */ - iov_iter_advance(&fixup, sizeof(hdr)); } err = sock->ops->recvmsg(NULL, sock, &msg, sock_len, MSG_DONTWAIT | MSG_TRUNC); @@ -609,17 +604,25 @@ static void handle_rx(struct vhost_net *net) continue; } /* Supply virtio_net_hdr if VHOST_NET_F_VIRTIO_NET_HDR */ - if (unlikely(vhost_hlen) && - copy_to_iter(&hdr, sizeof(hdr), &fixup) != sizeof(hdr)) { - vq_err(vq, "Unable to write vnet_hdr at addr %p\n", - vq->iov->iov_base); - break; + if (unlikely(vhost_hlen)) { + if (copy_to_iter(&hdr, sizeof(hdr), + &fixup) != sizeof(hdr)) { + vq_err(vq, "Unable to write vnet_hdr " + "at addr %p\n", vq->iov->iov_base); + break; + } + } else { + /* Header came from socket; we'll need to patch + * ->num_buffers over if VIRTIO_NET_F_MRG_RXBUF + */ + iov_iter_advance(&fixup, sizeof(hdr)); } /* TODO: Should check and handle checksum. */ num_buffers = cpu_to_vhost16(vq, headcount); if (likely(mergeable) && - copy_to_iter(&num_buffers, 2, &fixup) != 2) { + copy_to_iter(&num_buffers, sizeof num_buffers, + &fixup) != sizeof num_buffers) { vq_err(vq, "Failed num_buffers write"); vhost_discard_vq_desc(vq, headcount); break; diff --git a/drivers/vhost/scsi.c b/drivers/vhost/scsi.c index 8d4f3f1ff799..71df240a467a 100644 --- a/drivers/vhost/scsi.c +++ b/drivers/vhost/scsi.c @@ -1956,10 +1956,9 @@ static int vhost_scsi_make_nexus(struct vhost_scsi_tpg *tpg, goto out; } /* - * Now register the TCM vhost virtual I_T Nexus as active with the - * call to __transport_register_session() + * Now register the TCM vhost virtual I_T Nexus as active. */ - __transport_register_session(se_tpg, tv_nexus->tvn_se_sess->se_node_acl, + transport_register_session(se_tpg, tv_nexus->tvn_se_sess->se_node_acl, tv_nexus->tvn_se_sess, tv_nexus); tpg->tpg_nexus = tv_nexus; diff --git a/drivers/video/fbdev/amba-clcd.c b/drivers/video/fbdev/amba-clcd.c index 32c0b6b28097..9362424c2340 100644 --- a/drivers/video/fbdev/amba-clcd.c +++ b/drivers/video/fbdev/amba-clcd.c @@ -599,6 +599,9 @@ static int clcdfb_of_get_mode(struct device *dev, struct device_node *endpoint, len = clcdfb_snprintf_mode(NULL, 0, mode); name = devm_kzalloc(dev, len + 1, GFP_KERNEL); + if (!name) + return -ENOMEM; + clcdfb_snprintf_mode(name, len + 1, mode); mode->name = name; diff --git a/drivers/video/fbdev/core/fbmon.c b/drivers/video/fbdev/core/fbmon.c index 95338593ebf4..868facdec638 100644 --- a/drivers/video/fbdev/core/fbmon.c +++ b/drivers/video/fbdev/core/fbmon.c @@ -624,9 +624,6 @@ static struct fb_videomode *fb_create_modedb(unsigned char *edid, int *dbsize, int num = 0, i, first = 1; int ver, rev; - ver = edid[EDID_STRUCT_VERSION]; - rev = edid[EDID_STRUCT_REVISION]; - mode = kzalloc(50 * sizeof(struct fb_videomode), GFP_KERNEL); if (mode == NULL) return NULL; @@ -637,6 +634,9 @@ static struct fb_videomode *fb_create_modedb(unsigned char *edid, int *dbsize, return NULL; } + ver = edid[EDID_STRUCT_VERSION]; + rev = edid[EDID_STRUCT_REVISION]; + *dbsize = 0; DPRINTK(" Detailed Timings\n"); diff --git a/drivers/video/fbdev/omap2/dss/display-sysfs.c b/drivers/video/fbdev/omap2/dss/display-sysfs.c index 5a2095a98ed8..12186557a9d4 100644 --- a/drivers/video/fbdev/omap2/dss/display-sysfs.c +++ b/drivers/video/fbdev/omap2/dss/display-sysfs.c @@ -28,44 +28,22 @@ #include <video/omapdss.h> #include "dss.h" -static struct omap_dss_device *to_dss_device_sysfs(struct device *dev) +static ssize_t display_name_show(struct omap_dss_device *dssdev, char *buf) { - struct omap_dss_device *dssdev = NULL; - - for_each_dss_dev(dssdev) { - if (dssdev->dev == dev) { - omap_dss_put_device(dssdev); - return dssdev; - } - } - - return NULL; -} - -static ssize_t display_name_show(struct device *dev, - struct device_attribute *attr, char *buf) -{ - struct omap_dss_device *dssdev = to_dss_device_sysfs(dev); - return snprintf(buf, PAGE_SIZE, "%s\n", dssdev->name ? dssdev->name : ""); } -static ssize_t display_enabled_show(struct device *dev, - struct device_attribute *attr, char *buf) +static ssize_t display_enabled_show(struct omap_dss_device *dssdev, char *buf) { - struct omap_dss_device *dssdev = to_dss_device_sysfs(dev); - return snprintf(buf, PAGE_SIZE, "%d\n", omapdss_device_is_enabled(dssdev)); } -static ssize_t display_enabled_store(struct device *dev, - struct device_attribute *attr, +static ssize_t display_enabled_store(struct omap_dss_device *dssdev, const char *buf, size_t size) { - struct omap_dss_device *dssdev = to_dss_device_sysfs(dev); int r; bool enable; @@ -90,19 +68,16 @@ static ssize_t display_enabled_store(struct device *dev, return size; } -static ssize_t display_tear_show(struct device *dev, - struct device_attribute *attr, char *buf) +static ssize_t display_tear_show(struct omap_dss_device *dssdev, char *buf) { - struct omap_dss_device *dssdev = to_dss_device_sysfs(dev); return snprintf(buf, PAGE_SIZE, "%d\n", dssdev->driver->get_te ? dssdev->driver->get_te(dssdev) : 0); } -static ssize_t display_tear_store(struct device *dev, - struct device_attribute *attr, const char *buf, size_t size) +static ssize_t display_tear_store(struct omap_dss_device *dssdev, + const char *buf, size_t size) { - struct omap_dss_device *dssdev = to_dss_device_sysfs(dev); int r; bool te; @@ -120,10 +95,8 @@ static ssize_t display_tear_store(struct device *dev, return size; } -static ssize_t display_timings_show(struct device *dev, - struct device_attribute *attr, char *buf) +static ssize_t display_timings_show(struct omap_dss_device *dssdev, char *buf) { - struct omap_dss_device *dssdev = to_dss_device_sysfs(dev); struct omap_video_timings t; if (!dssdev->driver->get_timings) @@ -137,10 +110,9 @@ static ssize_t display_timings_show(struct device *dev, t.y_res, t.vfp, t.vbp, t.vsw); } -static ssize_t display_timings_store(struct device *dev, - struct device_attribute *attr, const char *buf, size_t size) +static ssize_t display_timings_store(struct omap_dss_device *dssdev, + const char *buf, size_t size) { - struct omap_dss_device *dssdev = to_dss_device_sysfs(dev); struct omap_video_timings t = dssdev->panel.timings; int r, found; @@ -176,10 +148,8 @@ static ssize_t display_timings_store(struct device *dev, return size; } -static ssize_t display_rotate_show(struct device *dev, - struct device_attribute *attr, char *buf) +static ssize_t display_rotate_show(struct omap_dss_device *dssdev, char *buf) { - struct omap_dss_device *dssdev = to_dss_device_sysfs(dev); int rotate; if (!dssdev->driver->get_rotate) return -ENOENT; @@ -187,10 +157,9 @@ static ssize_t display_rotate_show(struct device *dev, return snprintf(buf, PAGE_SIZE, "%u\n", rotate); } -static ssize_t display_rotate_store(struct device *dev, - struct device_attribute *attr, const char *buf, size_t size) +static ssize_t display_rotate_store(struct omap_dss_device *dssdev, + const char *buf, size_t size) { - struct omap_dss_device *dssdev = to_dss_device_sysfs(dev); int rot, r; if (!dssdev->driver->set_rotate || !dssdev->driver->get_rotate) @@ -207,10 +176,8 @@ static ssize_t display_rotate_store(struct device *dev, return size; } -static ssize_t display_mirror_show(struct device *dev, - struct device_attribute *attr, char *buf) +static ssize_t display_mirror_show(struct omap_dss_device *dssdev, char *buf) { - struct omap_dss_device *dssdev = to_dss_device_sysfs(dev); int mirror; if (!dssdev->driver->get_mirror) return -ENOENT; @@ -218,10 +185,9 @@ static ssize_t display_mirror_show(struct device *dev, return snprintf(buf, PAGE_SIZE, "%u\n", mirror); } -static ssize_t display_mirror_store(struct device *dev, - struct device_attribute *attr, const char *buf, size_t size) +static ssize_t display_mirror_store(struct omap_dss_device *dssdev, + const char *buf, size_t size) { - struct omap_dss_device *dssdev = to_dss_device_sysfs(dev); int r; bool mirror; @@ -239,10 +205,8 @@ static ssize_t display_mirror_store(struct device *dev, return size; } -static ssize_t display_wss_show(struct device *dev, - struct device_attribute *attr, char *buf) +static ssize_t display_wss_show(struct omap_dss_device *dssdev, char *buf) { - struct omap_dss_device *dssdev = to_dss_device_sysfs(dev); unsigned int wss; if (!dssdev->driver->get_wss) @@ -253,10 +217,9 @@ static ssize_t display_wss_show(struct device *dev, return snprintf(buf, PAGE_SIZE, "0x%05x\n", wss); } -static ssize_t display_wss_store(struct device *dev, - struct device_attribute *attr, const char *buf, size_t size) +static ssize_t display_wss_store(struct omap_dss_device *dssdev, + const char *buf, size_t size) { - struct omap_dss_device *dssdev = to_dss_device_sysfs(dev); u32 wss; int r; @@ -277,50 +240,94 @@ static ssize_t display_wss_store(struct device *dev, return size; } -static DEVICE_ATTR(display_name, S_IRUGO, display_name_show, NULL); -static DEVICE_ATTR(enabled, S_IRUGO|S_IWUSR, +struct display_attribute { + struct attribute attr; + ssize_t (*show)(struct omap_dss_device *, char *); + ssize_t (*store)(struct omap_dss_device *, const char *, size_t); +}; + +#define DISPLAY_ATTR(_name, _mode, _show, _store) \ + struct display_attribute display_attr_##_name = \ + __ATTR(_name, _mode, _show, _store) + +static DISPLAY_ATTR(name, S_IRUGO, display_name_show, NULL); +static DISPLAY_ATTR(display_name, S_IRUGO, display_name_show, NULL); +static DISPLAY_ATTR(enabled, S_IRUGO|S_IWUSR, display_enabled_show, display_enabled_store); -static DEVICE_ATTR(tear_elim, S_IRUGO|S_IWUSR, +static DISPLAY_ATTR(tear_elim, S_IRUGO|S_IWUSR, display_tear_show, display_tear_store); -static DEVICE_ATTR(timings, S_IRUGO|S_IWUSR, +static DISPLAY_ATTR(timings, S_IRUGO|S_IWUSR, display_timings_show, display_timings_store); -static DEVICE_ATTR(rotate, S_IRUGO|S_IWUSR, +static DISPLAY_ATTR(rotate, S_IRUGO|S_IWUSR, display_rotate_show, display_rotate_store); -static DEVICE_ATTR(mirror, S_IRUGO|S_IWUSR, +static DISPLAY_ATTR(mirror, S_IRUGO|S_IWUSR, display_mirror_show, display_mirror_store); -static DEVICE_ATTR(wss, S_IRUGO|S_IWUSR, +static DISPLAY_ATTR(wss, S_IRUGO|S_IWUSR, display_wss_show, display_wss_store); -static const struct attribute *display_sysfs_attrs[] = { - &dev_attr_display_name.attr, - &dev_attr_enabled.attr, - &dev_attr_tear_elim.attr, - &dev_attr_timings.attr, - &dev_attr_rotate.attr, - &dev_attr_mirror.attr, - &dev_attr_wss.attr, +static struct attribute *display_sysfs_attrs[] = { + &display_attr_name.attr, + &display_attr_display_name.attr, + &display_attr_enabled.attr, + &display_attr_tear_elim.attr, + &display_attr_timings.attr, + &display_attr_rotate.attr, + &display_attr_mirror.attr, + &display_attr_wss.attr, NULL }; +static ssize_t display_attr_show(struct kobject *kobj, struct attribute *attr, + char *buf) +{ + struct omap_dss_device *dssdev; + struct display_attribute *display_attr; + + dssdev = container_of(kobj, struct omap_dss_device, kobj); + display_attr = container_of(attr, struct display_attribute, attr); + + if (!display_attr->show) + return -ENOENT; + + return display_attr->show(dssdev, buf); +} + +static ssize_t display_attr_store(struct kobject *kobj, struct attribute *attr, + const char *buf, size_t size) +{ + struct omap_dss_device *dssdev; + struct display_attribute *display_attr; + + dssdev = container_of(kobj, struct omap_dss_device, kobj); + display_attr = container_of(attr, struct display_attribute, attr); + + if (!display_attr->store) + return -ENOENT; + + return display_attr->store(dssdev, buf, size); +} + +static const struct sysfs_ops display_sysfs_ops = { + .show = display_attr_show, + .store = display_attr_store, +}; + +static struct kobj_type display_ktype = { + .sysfs_ops = &display_sysfs_ops, + .default_attrs = display_sysfs_attrs, +}; + int display_init_sysfs(struct platform_device *pdev) { struct omap_dss_device *dssdev = NULL; int r; for_each_dss_dev(dssdev) { - struct kobject *kobj = &dssdev->dev->kobj; - - r = sysfs_create_files(kobj, display_sysfs_attrs); + r = kobject_init_and_add(&dssdev->kobj, &display_ktype, + &pdev->dev.kobj, dssdev->alias); if (r) { DSSERR("failed to create sysfs files\n"); - goto err; - } - - r = sysfs_create_link(&pdev->dev.kobj, kobj, dssdev->alias); - if (r) { - sysfs_remove_files(kobj, display_sysfs_attrs); - - DSSERR("failed to create sysfs display link\n"); + omap_dss_put_device(dssdev); goto err; } } @@ -338,8 +345,12 @@ void display_uninit_sysfs(struct platform_device *pdev) struct omap_dss_device *dssdev = NULL; for_each_dss_dev(dssdev) { - sysfs_remove_link(&pdev->dev.kobj, dssdev->alias); - sysfs_remove_files(&dssdev->dev->kobj, - display_sysfs_attrs); + if (kobject_name(&dssdev->kobj) == NULL) + continue; + + kobject_del(&dssdev->kobj); + kobject_put(&dssdev->kobj); + + memset(&dssdev->kobj, 0, sizeof(dssdev->kobj)); } } diff --git a/drivers/virtio/virtio_balloon.c b/drivers/virtio/virtio_balloon.c index 0413157f3b49..6a356e344f82 100644 --- a/drivers/virtio/virtio_balloon.c +++ b/drivers/virtio/virtio_balloon.c @@ -29,6 +29,7 @@ #include <linux/module.h> #include <linux/balloon_compaction.h> #include <linux/oom.h> +#include <linux/wait.h> /* * Balloon device works in 4K page units. So each page is pointed to by @@ -334,17 +335,25 @@ static int virtballoon_oom_notify(struct notifier_block *self, static int balloon(void *_vballoon) { struct virtio_balloon *vb = _vballoon; + DEFINE_WAIT_FUNC(wait, woken_wake_function); set_freezable(); while (!kthread_should_stop()) { s64 diff; try_to_freeze(); - wait_event_interruptible(vb->config_change, - (diff = towards_target(vb)) != 0 - || vb->need_stats_update - || kthread_should_stop() - || freezing(current)); + + add_wait_queue(&vb->config_change, &wait); + for (;;) { + if ((diff = towards_target(vb)) != 0 || + vb->need_stats_update || + kthread_should_stop() || + freezing(current)) + break; + wait_woken(&wait, TASK_INTERRUPTIBLE, MAX_SCHEDULE_TIMEOUT); + } + remove_wait_queue(&vb->config_change, &wait); + if (vb->need_stats_update) stats_handle_request(vb); if (diff > 0) @@ -499,6 +508,8 @@ static int virtballoon_probe(struct virtio_device *vdev) if (err < 0) goto out_oom_notify; + virtio_device_ready(vdev); + vb->thread = kthread_run(balloon, vb, "vballoon"); if (IS_ERR(vb->thread)) { err = PTR_ERR(vb->thread); diff --git a/drivers/virtio/virtio_mmio.c b/drivers/virtio/virtio_mmio.c index cad569890908..6010d7ec0a0f 100644 --- a/drivers/virtio/virtio_mmio.c +++ b/drivers/virtio/virtio_mmio.c @@ -156,22 +156,95 @@ static void vm_get(struct virtio_device *vdev, unsigned offset, void *buf, unsigned len) { struct virtio_mmio_device *vm_dev = to_virtio_mmio_device(vdev); - u8 *ptr = buf; - int i; + void __iomem *base = vm_dev->base + VIRTIO_MMIO_CONFIG; + u8 b; + __le16 w; + __le32 l; - for (i = 0; i < len; i++) - ptr[i] = readb(vm_dev->base + VIRTIO_MMIO_CONFIG + offset + i); + if (vm_dev->version == 1) { + u8 *ptr = buf; + int i; + + for (i = 0; i < len; i++) + ptr[i] = readb(base + offset + i); + return; + } + + switch (len) { + case 1: + b = readb(base + offset); + memcpy(buf, &b, sizeof b); + break; + case 2: + w = cpu_to_le16(readw(base + offset)); + memcpy(buf, &w, sizeof w); + break; + case 4: + l = cpu_to_le32(readl(base + offset)); + memcpy(buf, &l, sizeof l); + break; + case 8: + l = cpu_to_le32(readl(base + offset)); + memcpy(buf, &l, sizeof l); + l = cpu_to_le32(ioread32(base + offset + sizeof l)); + memcpy(buf + sizeof l, &l, sizeof l); + break; + default: + BUG(); + } } static void vm_set(struct virtio_device *vdev, unsigned offset, const void *buf, unsigned len) { struct virtio_mmio_device *vm_dev = to_virtio_mmio_device(vdev); - const u8 *ptr = buf; - int i; + void __iomem *base = vm_dev->base + VIRTIO_MMIO_CONFIG; + u8 b; + __le16 w; + __le32 l; - for (i = 0; i < len; i++) - writeb(ptr[i], vm_dev->base + VIRTIO_MMIO_CONFIG + offset + i); + if (vm_dev->version == 1) { + const u8 *ptr = buf; + int i; + + for (i = 0; i < len; i++) + writeb(ptr[i], base + offset + i); + + return; + } + + switch (len) { + case 1: + memcpy(&b, buf, sizeof b); + writeb(b, base + offset); + break; + case 2: + memcpy(&w, buf, sizeof w); + writew(le16_to_cpu(w), base + offset); + break; + case 4: + memcpy(&l, buf, sizeof l); + writel(le32_to_cpu(l), base + offset); + break; + case 8: + memcpy(&l, buf, sizeof l); + writel(le32_to_cpu(l), base + offset); + memcpy(&l, buf + sizeof l, sizeof l); + writel(le32_to_cpu(l), base + offset + sizeof l); + break; + default: + BUG(); + } +} + +static u32 vm_generation(struct virtio_device *vdev) +{ + struct virtio_mmio_device *vm_dev = to_virtio_mmio_device(vdev); + + if (vm_dev->version == 1) + return 0; + else + return readl(vm_dev->base + VIRTIO_MMIO_CONFIG_GENERATION); } static u8 vm_get_status(struct virtio_device *vdev) @@ -440,6 +513,7 @@ static const char *vm_bus_name(struct virtio_device *vdev) static const struct virtio_config_ops virtio_mmio_config_ops = { .get = vm_get, .set = vm_set, + .generation = vm_generation, .get_status = vm_get_status, .set_status = vm_set_status, .reset = vm_reset, diff --git a/drivers/watchdog/at91sam9_wdt.c b/drivers/watchdog/at91sam9_wdt.c index 6df940528fd2..1443b3c391de 100644 --- a/drivers/watchdog/at91sam9_wdt.c +++ b/drivers/watchdog/at91sam9_wdt.c @@ -208,7 +208,8 @@ static int at91_wdt_init(struct platform_device *pdev, struct at91wdt *wdt) if ((tmp & AT91_WDT_WDFIEN) && wdt->irq) { err = request_irq(wdt->irq, wdt_interrupt, - IRQF_SHARED | IRQF_IRQPOLL, + IRQF_SHARED | IRQF_IRQPOLL | + IRQF_NO_SUSPEND, pdev->name, wdt); if (err) return err; diff --git a/drivers/watchdog/imgpdc_wdt.c b/drivers/watchdog/imgpdc_wdt.c index c8def68d9e4c..0deaa4f971f5 100644 --- a/drivers/watchdog/imgpdc_wdt.c +++ b/drivers/watchdog/imgpdc_wdt.c @@ -42,10 +42,10 @@ #define PDC_WDT_MIN_TIMEOUT 1 #define PDC_WDT_DEF_TIMEOUT 64 -static int heartbeat; +static int heartbeat = PDC_WDT_DEF_TIMEOUT; module_param(heartbeat, int, 0); -MODULE_PARM_DESC(heartbeat, "Watchdog heartbeats in seconds. " - "(default = " __MODULE_STRING(PDC_WDT_DEF_TIMEOUT) ")"); +MODULE_PARM_DESC(heartbeat, "Watchdog heartbeats in seconds " + "(default=" __MODULE_STRING(PDC_WDT_DEF_TIMEOUT) ")"); static bool nowayout = WATCHDOG_NOWAYOUT; module_param(nowayout, bool, 0); @@ -191,6 +191,7 @@ static int pdc_wdt_probe(struct platform_device *pdev) pdc_wdt->wdt_dev.ops = &pdc_wdt_ops; pdc_wdt->wdt_dev.max_timeout = 1 << PDC_WDT_CONFIG_DELAY_MASK; pdc_wdt->wdt_dev.parent = &pdev->dev; + watchdog_set_drvdata(&pdc_wdt->wdt_dev, pdc_wdt); ret = watchdog_init_timeout(&pdc_wdt->wdt_dev, heartbeat, &pdev->dev); if (ret < 0) { @@ -232,7 +233,6 @@ static int pdc_wdt_probe(struct platform_device *pdev) watchdog_set_nowayout(&pdc_wdt->wdt_dev, nowayout); platform_set_drvdata(pdev, pdc_wdt); - watchdog_set_drvdata(&pdc_wdt->wdt_dev, pdc_wdt); ret = watchdog_register_device(&pdc_wdt->wdt_dev); if (ret) diff --git a/drivers/watchdog/mtk_wdt.c b/drivers/watchdog/mtk_wdt.c index a87f6df6e85f..938b987de551 100644 --- a/drivers/watchdog/mtk_wdt.c +++ b/drivers/watchdog/mtk_wdt.c @@ -133,7 +133,7 @@ static int mtk_wdt_start(struct watchdog_device *wdt_dev) u32 reg; struct mtk_wdt_dev *mtk_wdt = watchdog_get_drvdata(wdt_dev); void __iomem *wdt_base = mtk_wdt->wdt_base; - u32 ret; + int ret; ret = mtk_wdt_set_timeout(wdt_dev, wdt_dev->timeout); if (ret < 0) diff --git a/drivers/xen/Kconfig b/drivers/xen/Kconfig index b812462083fc..94d96809e686 100644 --- a/drivers/xen/Kconfig +++ b/drivers/xen/Kconfig @@ -55,6 +55,23 @@ config XEN_BALLOON_MEMORY_HOTPLUG In that case step 3 should be omitted. +config XEN_BALLOON_MEMORY_HOTPLUG_LIMIT + int "Hotplugged memory limit (in GiB) for a PV guest" + default 512 if X86_64 + default 4 if X86_32 + range 0 64 if X86_32 + depends on XEN_HAVE_PVMMU + depends on XEN_BALLOON_MEMORY_HOTPLUG + help + Maxmium amount of memory (in GiB) that a PV guest can be + expanded to when using memory hotplug. + + A PV guest can have more memory than this limit if is + started with a larger maximum. + + This value is used to allocate enough space in internal + tables needed for physical memory administration. + config XEN_SCRUB_PAGES bool "Scrub pages before returning them to system" depends on XEN_BALLOON diff --git a/drivers/xen/balloon.c b/drivers/xen/balloon.c index 0b52d92cb2e5..fd933695f232 100644 --- a/drivers/xen/balloon.c +++ b/drivers/xen/balloon.c @@ -229,6 +229,29 @@ static enum bp_state reserve_additional_memory(long credit) balloon_hotplug = round_up(balloon_hotplug, PAGES_PER_SECTION); nid = memory_add_physaddr_to_nid(hotplug_start_paddr); +#ifdef CONFIG_XEN_HAVE_PVMMU + /* + * add_memory() will build page tables for the new memory so + * the p2m must contain invalid entries so the correct + * non-present PTEs will be written. + * + * If a failure occurs, the original (identity) p2m entries + * are not restored since this region is now known not to + * conflict with any devices. + */ + if (!xen_feature(XENFEAT_auto_translated_physmap)) { + unsigned long pfn, i; + + pfn = PFN_DOWN(hotplug_start_paddr); + for (i = 0; i < balloon_hotplug; i++) { + if (!set_phys_to_machine(pfn + i, INVALID_P2M_ENTRY)) { + pr_warn("set_phys_to_machine() failed, no memory added\n"); + return BP_ECANCELED; + } + } + } +#endif + rc = add_memory(nid, hotplug_start_paddr, balloon_hotplug << PAGE_SHIFT); if (rc) { diff --git a/drivers/xen/events/events_base.c b/drivers/xen/events/events_base.c index b4bca2d4a7e5..70fba973a107 100644 --- a/drivers/xen/events/events_base.c +++ b/drivers/xen/events/events_base.c @@ -526,20 +526,26 @@ static unsigned int __startup_pirq(unsigned int irq) pirq_query_unmask(irq); rc = set_evtchn_to_irq(evtchn, irq); - if (rc != 0) { - pr_err("irq%d: Failed to set port to irq mapping (%d)\n", - irq, rc); - xen_evtchn_close(evtchn); - return 0; - } + if (rc) + goto err; + bind_evtchn_to_cpu(evtchn, 0); info->evtchn = evtchn; + rc = xen_evtchn_port_setup(info); + if (rc) + goto err; + out: unmask_evtchn(evtchn); eoi_pirq(irq_get_irq_data(irq)); return 0; + +err: + pr_err("irq%d: Failed to set port to irq mapping (%d)\n", irq, rc); + xen_evtchn_close(evtchn); + return 0; } static unsigned int startup_pirq(struct irq_data *data) diff --git a/drivers/xen/xen-pciback/conf_space.c b/drivers/xen/xen-pciback/conf_space.c index 46ae0f9f02ad..75fe3d466515 100644 --- a/drivers/xen/xen-pciback/conf_space.c +++ b/drivers/xen/xen-pciback/conf_space.c @@ -16,7 +16,7 @@ #include "conf_space.h" #include "conf_space_quirks.h" -static bool permissive; +bool permissive; module_param(permissive, bool, 0644); /* This is where xen_pcibk_read_config_byte, xen_pcibk_read_config_word, diff --git a/drivers/xen/xen-pciback/conf_space.h b/drivers/xen/xen-pciback/conf_space.h index e56c934ad137..2e1d73d1d5d0 100644 --- a/drivers/xen/xen-pciback/conf_space.h +++ b/drivers/xen/xen-pciback/conf_space.h @@ -64,6 +64,8 @@ struct config_field_entry { void *data; }; +extern bool permissive; + #define OFFSET(cfg_entry) ((cfg_entry)->base_offset+(cfg_entry)->field->offset) /* Add fields to a device - the add_fields macro expects to get a pointer to diff --git a/drivers/xen/xen-pciback/conf_space_header.c b/drivers/xen/xen-pciback/conf_space_header.c index c5ee82587e8c..2d7369391472 100644 --- a/drivers/xen/xen-pciback/conf_space_header.c +++ b/drivers/xen/xen-pciback/conf_space_header.c @@ -11,6 +11,10 @@ #include "pciback.h" #include "conf_space.h" +struct pci_cmd_info { + u16 val; +}; + struct pci_bar_info { u32 val; u32 len_val; @@ -20,22 +24,36 @@ struct pci_bar_info { #define is_enable_cmd(value) ((value)&(PCI_COMMAND_MEMORY|PCI_COMMAND_IO)) #define is_master_cmd(value) ((value)&PCI_COMMAND_MASTER) -static int command_read(struct pci_dev *dev, int offset, u16 *value, void *data) +/* Bits guests are allowed to control in permissive mode. */ +#define PCI_COMMAND_GUEST (PCI_COMMAND_MASTER|PCI_COMMAND_SPECIAL| \ + PCI_COMMAND_INVALIDATE|PCI_COMMAND_VGA_PALETTE| \ + PCI_COMMAND_WAIT|PCI_COMMAND_FAST_BACK) + +static void *command_init(struct pci_dev *dev, int offset) { - int i; - int ret; - - ret = xen_pcibk_read_config_word(dev, offset, value, data); - if (!pci_is_enabled(dev)) - return ret; - - for (i = 0; i < PCI_ROM_RESOURCE; i++) { - if (dev->resource[i].flags & IORESOURCE_IO) - *value |= PCI_COMMAND_IO; - if (dev->resource[i].flags & IORESOURCE_MEM) - *value |= PCI_COMMAND_MEMORY; + struct pci_cmd_info *cmd = kmalloc(sizeof(*cmd), GFP_KERNEL); + int err; + + if (!cmd) + return ERR_PTR(-ENOMEM); + + err = pci_read_config_word(dev, PCI_COMMAND, &cmd->val); + if (err) { + kfree(cmd); + return ERR_PTR(err); } + return cmd; +} + +static int command_read(struct pci_dev *dev, int offset, u16 *value, void *data) +{ + int ret = pci_read_config_word(dev, offset, value); + const struct pci_cmd_info *cmd = data; + + *value &= PCI_COMMAND_GUEST; + *value |= cmd->val & ~PCI_COMMAND_GUEST; + return ret; } @@ -43,6 +61,8 @@ static int command_write(struct pci_dev *dev, int offset, u16 value, void *data) { struct xen_pcibk_dev_data *dev_data; int err; + u16 val; + struct pci_cmd_info *cmd = data; dev_data = pci_get_drvdata(dev); if (!pci_is_enabled(dev) && is_enable_cmd(value)) { @@ -83,6 +103,19 @@ static int command_write(struct pci_dev *dev, int offset, u16 value, void *data) } } + cmd->val = value; + + if (!permissive && (!dev_data || !dev_data->permissive)) + return 0; + + /* Only allow the guest to control certain bits. */ + err = pci_read_config_word(dev, offset, &val); + if (err || val == value) + return err; + + value &= PCI_COMMAND_GUEST; + value |= val & ~PCI_COMMAND_GUEST; + return pci_write_config_word(dev, offset, value); } @@ -282,6 +315,8 @@ static const struct config_field header_common[] = { { .offset = PCI_COMMAND, .size = 2, + .init = command_init, + .release = bar_release, .u.w.read = command_read, .u.w.write = command_write, }, diff --git a/drivers/xen/xen-scsiback.c b/drivers/xen/xen-scsiback.c index 9faca6a60bb0..42bd55a6c237 100644 --- a/drivers/xen/xen-scsiback.c +++ b/drivers/xen/xen-scsiback.c @@ -1659,11 +1659,8 @@ static int scsiback_make_nexus(struct scsiback_tpg *tpg, name); goto out; } - /* - * Now register the TCM pvscsi virtual I_T Nexus as active with the - * call to __transport_register_session() - */ - __transport_register_session(se_tpg, tv_nexus->tvn_se_sess->se_node_acl, + /* Now register the TCM pvscsi virtual I_T Nexus as active. */ + transport_register_session(se_tpg, tv_nexus->tvn_se_sess->se_node_acl, tv_nexus->tvn_se_sess, tv_nexus); tpg->tpg_nexus = tv_nexus; diff --git a/fs/affs/file.c b/fs/affs/file.c index d2468bf95669..a91795e01a7f 100644 --- a/fs/affs/file.c +++ b/fs/affs/file.c @@ -699,8 +699,10 @@ static int affs_write_end_ofs(struct file *file, struct address_space *mapping, boff = tmp % bsize; if (boff) { bh = affs_bread_ino(inode, bidx, 0); - if (IS_ERR(bh)) - return PTR_ERR(bh); + if (IS_ERR(bh)) { + written = PTR_ERR(bh); + goto err_first_bh; + } tmp = min(bsize - boff, to - from); BUG_ON(boff + tmp > bsize || tmp > bsize); memcpy(AFFS_DATA(bh) + boff, data + from, tmp); @@ -712,14 +714,16 @@ static int affs_write_end_ofs(struct file *file, struct address_space *mapping, bidx++; } else if (bidx) { bh = affs_bread_ino(inode, bidx - 1, 0); - if (IS_ERR(bh)) - return PTR_ERR(bh); + if (IS_ERR(bh)) { + written = PTR_ERR(bh); + goto err_first_bh; + } } while (from + bsize <= to) { prev_bh = bh; bh = affs_getemptyblk_ino(inode, bidx); if (IS_ERR(bh)) - goto out; + goto err_bh; memcpy(AFFS_DATA(bh), data + from, bsize); if (buffer_new(bh)) { AFFS_DATA_HEAD(bh)->ptype = cpu_to_be32(T_DATA); @@ -751,7 +755,7 @@ static int affs_write_end_ofs(struct file *file, struct address_space *mapping, prev_bh = bh; bh = affs_bread_ino(inode, bidx, 1); if (IS_ERR(bh)) - goto out; + goto err_bh; tmp = min(bsize, to - from); BUG_ON(tmp > bsize); memcpy(AFFS_DATA(bh), data + from, tmp); @@ -790,12 +794,13 @@ done: if (tmp > inode->i_size) inode->i_size = AFFS_I(inode)->mmu_private = tmp; +err_first_bh: unlock_page(page); page_cache_release(page); return written; -out: +err_bh: bh = prev_bh; if (!written) written = PTR_ERR(bh); @@ -278,11 +278,11 @@ static int aio_ring_mmap(struct file *file, struct vm_area_struct *vma) return 0; } -static void aio_ring_remap(struct file *file, struct vm_area_struct *vma) +static int aio_ring_remap(struct file *file, struct vm_area_struct *vma) { struct mm_struct *mm = vma->vm_mm; struct kioctx_table *table; - int i; + int i, res = -EINVAL; spin_lock(&mm->ioctx_lock); rcu_read_lock(); @@ -292,13 +292,17 @@ static void aio_ring_remap(struct file *file, struct vm_area_struct *vma) ctx = table->table[i]; if (ctx && ctx->aio_ring_file == file) { - ctx->user_id = ctx->mmap_base = vma->vm_start; + if (!atomic_read(&ctx->dead)) { + ctx->user_id = ctx->mmap_base = vma->vm_start; + res = 0; + } break; } } rcu_read_unlock(); spin_unlock(&mm->ioctx_lock); + return res; } static const struct file_operations aio_ring_fops = { @@ -727,6 +731,9 @@ static struct kioctx *ioctx_alloc(unsigned nr_events) err_cleanup: aio_nr_sub(ctx->max_reqs); err_ctx: + atomic_set(&ctx->dead, 1); + if (ctx->mmap_size) + vm_munmap(ctx->mmap_base, ctx->mmap_size); aio_free_ring(ctx); err: mutex_unlock(&ctx->ring_lock); @@ -748,11 +755,12 @@ static int kill_ioctx(struct mm_struct *mm, struct kioctx *ctx, { struct kioctx_table *table; - if (atomic_xchg(&ctx->dead, 1)) + spin_lock(&mm->ioctx_lock); + if (atomic_xchg(&ctx->dead, 1)) { + spin_unlock(&mm->ioctx_lock); return -EINVAL; + } - - spin_lock(&mm->ioctx_lock); table = rcu_dereference_raw(mm->ioctx_table); WARN_ON(ctx != table->table[ctx->id]); table->table[ctx->id] = NULL; diff --git a/fs/btrfs/ctree.c b/fs/btrfs/ctree.c index 993642199326..6d67f32e648d 100644 --- a/fs/btrfs/ctree.c +++ b/fs/btrfs/ctree.c @@ -1645,14 +1645,14 @@ int btrfs_realloc_node(struct btrfs_trans_handle *trans, parent_nritems = btrfs_header_nritems(parent); blocksize = root->nodesize; - end_slot = parent_nritems; + end_slot = parent_nritems - 1; - if (parent_nritems == 1) + if (parent_nritems <= 1) return 0; btrfs_set_lock_blocking(parent); - for (i = start_slot; i < end_slot; i++) { + for (i = start_slot; i <= end_slot; i++) { int close = 1; btrfs_node_key(parent, &disk_key, i); @@ -1669,7 +1669,7 @@ int btrfs_realloc_node(struct btrfs_trans_handle *trans, other = btrfs_node_blockptr(parent, i - 1); close = close_blocks(blocknr, other, blocksize); } - if (!close && i < end_slot - 2) { + if (!close && i < end_slot) { other = btrfs_node_blockptr(parent, i + 1); close = close_blocks(blocknr, other, blocksize); } diff --git a/fs/btrfs/ctree.h b/fs/btrfs/ctree.h index 84c3b00f3de8..f9c89cae39ee 100644 --- a/fs/btrfs/ctree.h +++ b/fs/btrfs/ctree.h @@ -3387,6 +3387,8 @@ int btrfs_inc_extent_ref(struct btrfs_trans_handle *trans, int btrfs_write_dirty_block_groups(struct btrfs_trans_handle *trans, struct btrfs_root *root); +int btrfs_setup_space_cache(struct btrfs_trans_handle *trans, + struct btrfs_root *root); int btrfs_extent_readonly(struct btrfs_root *root, u64 bytenr); int btrfs_free_block_groups(struct btrfs_fs_info *info); int btrfs_read_block_groups(struct btrfs_root *root); @@ -3909,6 +3911,9 @@ int btrfs_prealloc_file_range_trans(struct inode *inode, loff_t actual_len, u64 *alloc_hint); int btrfs_inode_check_errors(struct inode *inode); extern const struct dentry_operations btrfs_dentry_operations; +#ifdef CONFIG_BTRFS_FS_RUN_SANITY_TESTS +void btrfs_test_inode_set_ops(struct inode *inode); +#endif /* ioctl.c */ long btrfs_ioctl(struct file *file, unsigned int cmd, unsigned long arg); diff --git a/fs/btrfs/disk-io.c b/fs/btrfs/disk-io.c index f79f38542a73..639f2663ed3f 100644 --- a/fs/btrfs/disk-io.c +++ b/fs/btrfs/disk-io.c @@ -3921,7 +3921,7 @@ static int btrfs_check_super_valid(struct btrfs_fs_info *fs_info, } if (btrfs_super_sys_array_size(sb) < sizeof(struct btrfs_disk_key) + sizeof(struct btrfs_chunk)) { - printk(KERN_ERR "BTRFS: system chunk array too small %u < %lu\n", + printk(KERN_ERR "BTRFS: system chunk array too small %u < %zu\n", btrfs_super_sys_array_size(sb), sizeof(struct btrfs_disk_key) + sizeof(struct btrfs_chunk)); diff --git a/fs/btrfs/extent-tree.c b/fs/btrfs/extent-tree.c index 571f402d3fc4..8b353ad02f03 100644 --- a/fs/btrfs/extent-tree.c +++ b/fs/btrfs/extent-tree.c @@ -3208,6 +3208,8 @@ static int cache_save_setup(struct btrfs_block_group_cache *block_group, return 0; } + if (trans->aborted) + return 0; again: inode = lookup_free_space_inode(root, block_group, path); if (IS_ERR(inode) && PTR_ERR(inode) != -ENOENT) { @@ -3243,6 +3245,20 @@ again: */ BTRFS_I(inode)->generation = 0; ret = btrfs_update_inode(trans, root, inode); + if (ret) { + /* + * So theoretically we could recover from this, simply set the + * super cache generation to 0 so we know to invalidate the + * cache, but then we'd have to keep track of the block groups + * that fail this way so we know we _have_ to reset this cache + * before the next commit or risk reading stale cache. So to + * limit our exposure to horrible edge cases lets just abort the + * transaction, this only happens in really bad situations + * anyway. + */ + btrfs_abort_transaction(trans, root, ret); + goto out_put; + } WARN_ON(ret); if (i_size_read(inode) > 0) { @@ -3309,6 +3325,32 @@ out: return ret; } +int btrfs_setup_space_cache(struct btrfs_trans_handle *trans, + struct btrfs_root *root) +{ + struct btrfs_block_group_cache *cache, *tmp; + struct btrfs_transaction *cur_trans = trans->transaction; + struct btrfs_path *path; + + if (list_empty(&cur_trans->dirty_bgs) || + !btrfs_test_opt(root, SPACE_CACHE)) + return 0; + + path = btrfs_alloc_path(); + if (!path) + return -ENOMEM; + + /* Could add new block groups, use _safe just in case */ + list_for_each_entry_safe(cache, tmp, &cur_trans->dirty_bgs, + dirty_list) { + if (cache->disk_cache_state == BTRFS_DC_CLEAR) + cache_save_setup(cache, trans, path); + } + + btrfs_free_path(path); + return 0; +} + int btrfs_write_dirty_block_groups(struct btrfs_trans_handle *trans, struct btrfs_root *root) { @@ -5094,7 +5136,11 @@ int btrfs_delalloc_reserve_metadata(struct inode *inode, u64 num_bytes) num_bytes = ALIGN(num_bytes, root->sectorsize); spin_lock(&BTRFS_I(inode)->lock); - BTRFS_I(inode)->outstanding_extents++; + nr_extents = (unsigned)div64_u64(num_bytes + + BTRFS_MAX_EXTENT_SIZE - 1, + BTRFS_MAX_EXTENT_SIZE); + BTRFS_I(inode)->outstanding_extents += nr_extents; + nr_extents = 0; if (BTRFS_I(inode)->outstanding_extents > BTRFS_I(inode)->reserved_extents) @@ -5239,6 +5285,9 @@ void btrfs_delalloc_release_metadata(struct inode *inode, u64 num_bytes) if (dropped > 0) to_free += btrfs_calc_trans_metadata_size(root, dropped); + if (btrfs_test_is_dummy_root(root)) + return; + trace_btrfs_space_reservation(root->fs_info, "delalloc", btrfs_ino(inode), to_free, 0); if (root->fs_info->quota_enabled) { diff --git a/fs/btrfs/extent_io.c b/fs/btrfs/extent_io.c index c7233ff1d533..d688cfe5d496 100644 --- a/fs/btrfs/extent_io.c +++ b/fs/btrfs/extent_io.c @@ -4968,6 +4968,12 @@ static int release_extent_buffer(struct extent_buffer *eb) /* Should be safe to release our pages at this point */ btrfs_release_extent_buffer_page(eb); +#ifdef CONFIG_BTRFS_FS_RUN_SANITY_TESTS + if (unlikely(test_bit(EXTENT_BUFFER_DUMMY, &eb->bflags))) { + __free_extent_buffer(eb); + return 1; + } +#endif call_rcu(&eb->rcu_head, btrfs_release_extent_buffer_rcu); return 1; } diff --git a/fs/btrfs/file.c b/fs/btrfs/file.c index b78bbbac900d..30982bbd31c3 100644 --- a/fs/btrfs/file.c +++ b/fs/btrfs/file.c @@ -1811,22 +1811,10 @@ static ssize_t btrfs_file_write_iter(struct kiocb *iocb, mutex_unlock(&inode->i_mutex); /* - * we want to make sure fsync finds this change - * but we haven't joined a transaction running right now. - * - * Later on, someone is sure to update the inode and get the - * real transid recorded. - * - * We set last_trans now to the fs_info generation + 1, - * this will either be one more than the running transaction - * or the generation used for the next transaction if there isn't - * one running right now. - * * We also have to set last_sub_trans to the current log transid, * otherwise subsequent syncs to a file that's been synced in this * transaction will appear to have already occured. */ - BTRFS_I(inode)->last_trans = root->fs_info->generation + 1; BTRFS_I(inode)->last_sub_trans = root->log_transid; if (num_written > 0) { err = generic_write_sync(file, pos, num_written); @@ -1959,25 +1947,37 @@ int btrfs_sync_file(struct file *file, loff_t start, loff_t end, int datasync) atomic_inc(&root->log_batch); /* - * check the transaction that last modified this inode - * and see if its already been committed - */ - if (!BTRFS_I(inode)->last_trans) { - mutex_unlock(&inode->i_mutex); - goto out; - } - - /* - * if the last transaction that changed this file was before - * the current transaction, we can bail out now without any - * syncing + * If the last transaction that changed this file was before the current + * transaction and we have the full sync flag set in our inode, we can + * bail out now without any syncing. + * + * Note that we can't bail out if the full sync flag isn't set. This is + * because when the full sync flag is set we start all ordered extents + * and wait for them to fully complete - when they complete they update + * the inode's last_trans field through: + * + * btrfs_finish_ordered_io() -> + * btrfs_update_inode_fallback() -> + * btrfs_update_inode() -> + * btrfs_set_inode_last_trans() + * + * So we are sure that last_trans is up to date and can do this check to + * bail out safely. For the fast path, when the full sync flag is not + * set in our inode, we can not do it because we start only our ordered + * extents and don't wait for them to complete (that is when + * btrfs_finish_ordered_io runs), so here at this point their last_trans + * value might be less than or equals to fs_info->last_trans_committed, + * and setting a speculative last_trans for an inode when a buffered + * write is made (such as fs_info->generation + 1 for example) would not + * be reliable since after setting the value and before fsync is called + * any number of transactions can start and commit (transaction kthread + * commits the current transaction periodically), and a transaction + * commit does not start nor waits for ordered extents to complete. */ smp_mb(); if (btrfs_inode_in_log(inode, root->fs_info->generation) || - BTRFS_I(inode)->last_trans <= - root->fs_info->last_trans_committed) { - BTRFS_I(inode)->last_trans = 0; - + (full_sync && BTRFS_I(inode)->last_trans <= + root->fs_info->last_trans_committed)) { /* * We'v had everything committed since the last time we were * modified so clear this flag in case it was set for whatever @@ -2275,6 +2275,8 @@ static int btrfs_punch_hole(struct inode *inode, loff_t offset, loff_t len) bool same_page; bool no_holes = btrfs_fs_incompat(root->fs_info, NO_HOLES); u64 ino_size; + bool truncated_page = false; + bool updated_inode = false; ret = btrfs_wait_ordered_range(inode, offset, len); if (ret) @@ -2306,13 +2308,18 @@ static int btrfs_punch_hole(struct inode *inode, loff_t offset, loff_t len) * entire page. */ if (same_page && len < PAGE_CACHE_SIZE) { - if (offset < ino_size) + if (offset < ino_size) { + truncated_page = true; ret = btrfs_truncate_page(inode, offset, len, 0); + } else { + ret = 0; + } goto out_only_mutex; } /* zero back part of the first page */ if (offset < ino_size) { + truncated_page = true; ret = btrfs_truncate_page(inode, offset, 0, 0); if (ret) { mutex_unlock(&inode->i_mutex); @@ -2348,6 +2355,7 @@ static int btrfs_punch_hole(struct inode *inode, loff_t offset, loff_t len) if (!ret) { /* zero the front end of the last page */ if (tail_start + tail_len < ino_size) { + truncated_page = true; ret = btrfs_truncate_page(inode, tail_start + tail_len, 0, 1); if (ret) @@ -2357,8 +2365,8 @@ static int btrfs_punch_hole(struct inode *inode, loff_t offset, loff_t len) } if (lockend < lockstart) { - mutex_unlock(&inode->i_mutex); - return 0; + ret = 0; + goto out_only_mutex; } while (1) { @@ -2506,6 +2514,7 @@ out_trans: trans->block_rsv = &root->fs_info->trans_block_rsv; ret = btrfs_update_inode(trans, root, inode); + updated_inode = true; btrfs_end_transaction(trans, root); btrfs_btree_balance_dirty(root); out_free: @@ -2515,6 +2524,22 @@ out: unlock_extent_cached(&BTRFS_I(inode)->io_tree, lockstart, lockend, &cached_state, GFP_NOFS); out_only_mutex: + if (!updated_inode && truncated_page && !ret && !err) { + /* + * If we only end up zeroing part of a page, we still need to + * update the inode item, so that all the time fields are + * updated as well as the necessary btrfs inode in memory fields + * for detecting, at fsync time, if the inode isn't yet in the + * log tree or it's there but not up to date. + */ + trans = btrfs_start_transaction(root, 1); + if (IS_ERR(trans)) { + err = PTR_ERR(trans); + } else { + err = btrfs_update_inode(trans, root, inode); + ret = btrfs_end_transaction(trans, root); + } + } mutex_unlock(&inode->i_mutex); if (ret && !err) err = ret; diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c index a85c23dfcddb..d2e732d7af52 100644 --- a/fs/btrfs/inode.c +++ b/fs/btrfs/inode.c @@ -108,6 +108,13 @@ static struct extent_map *create_pinned_em(struct inode *inode, u64 start, static int btrfs_dirty_inode(struct inode *inode); +#ifdef CONFIG_BTRFS_FS_RUN_SANITY_TESTS +void btrfs_test_inode_set_ops(struct inode *inode) +{ + BTRFS_I(inode)->io_tree.ops = &btrfs_extent_io_ops; +} +#endif + static int btrfs_init_inode_security(struct btrfs_trans_handle *trans, struct inode *inode, struct inode *dir, const struct qstr *qstr) @@ -1542,30 +1549,17 @@ static void btrfs_split_extent_hook(struct inode *inode, u64 new_size; /* - * We need the largest size of the remaining extent to see if we - * need to add a new outstanding extent. Think of the following - * case - * - * [MEAX_EXTENT_SIZEx2 - 4k][4k] - * - * The new_size would just be 4k and we'd think we had enough - * outstanding extents for this if we only took one side of the - * split, same goes for the other direction. We need to see if - * the larger size still is the same amount of extents as the - * original size, because if it is we need to add a new - * outstanding extent. But if we split up and the larger size - * is less than the original then we are good to go since we've - * already accounted for the extra extent in our original - * accounting. + * See the explanation in btrfs_merge_extent_hook, the same + * applies here, just in reverse. */ new_size = orig->end - split + 1; - if ((split - orig->start) > new_size) - new_size = split - orig->start; - - num_extents = div64_u64(size + BTRFS_MAX_EXTENT_SIZE - 1, + num_extents = div64_u64(new_size + BTRFS_MAX_EXTENT_SIZE - 1, BTRFS_MAX_EXTENT_SIZE); - if (div64_u64(new_size + BTRFS_MAX_EXTENT_SIZE - 1, - BTRFS_MAX_EXTENT_SIZE) < num_extents) + new_size = split - orig->start; + num_extents += div64_u64(new_size + BTRFS_MAX_EXTENT_SIZE - 1, + BTRFS_MAX_EXTENT_SIZE); + if (div64_u64(size + BTRFS_MAX_EXTENT_SIZE - 1, + BTRFS_MAX_EXTENT_SIZE) >= num_extents) return; } @@ -1591,8 +1585,10 @@ static void btrfs_merge_extent_hook(struct inode *inode, if (!(other->state & EXTENT_DELALLOC)) return; - old_size = other->end - other->start + 1; - new_size = old_size + (new->end - new->start + 1); + if (new->start > other->start) + new_size = new->end - other->start + 1; + else + new_size = other->end - new->start + 1; /* we're not bigger than the max, unreserve the space and go */ if (new_size <= BTRFS_MAX_EXTENT_SIZE) { @@ -1603,13 +1599,32 @@ static void btrfs_merge_extent_hook(struct inode *inode, } /* - * If we grew by another max_extent, just return, we want to keep that - * reserved amount. + * We have to add up either side to figure out how many extents were + * accounted for before we merged into one big extent. If the number of + * extents we accounted for is <= the amount we need for the new range + * then we can return, otherwise drop. Think of it like this + * + * [ 4k][MAX_SIZE] + * + * So we've grown the extent by a MAX_SIZE extent, this would mean we + * need 2 outstanding extents, on one side we have 1 and the other side + * we have 1 so they are == and we can return. But in this case + * + * [MAX_SIZE+4k][MAX_SIZE+4k] + * + * Each range on their own accounts for 2 extents, but merged together + * they are only 3 extents worth of accounting, so we need to drop in + * this case. */ + old_size = other->end - other->start + 1; num_extents = div64_u64(old_size + BTRFS_MAX_EXTENT_SIZE - 1, BTRFS_MAX_EXTENT_SIZE); + old_size = new->end - new->start + 1; + num_extents += div64_u64(old_size + BTRFS_MAX_EXTENT_SIZE - 1, + BTRFS_MAX_EXTENT_SIZE); + if (div64_u64(new_size + BTRFS_MAX_EXTENT_SIZE - 1, - BTRFS_MAX_EXTENT_SIZE) > num_extents) + BTRFS_MAX_EXTENT_SIZE) >= num_extents) return; spin_lock(&BTRFS_I(inode)->lock); @@ -1686,6 +1701,10 @@ static void btrfs_set_bit_hook(struct inode *inode, spin_unlock(&BTRFS_I(inode)->lock); } + /* For sanity tests */ + if (btrfs_test_is_dummy_root(root)) + return; + __percpu_counter_add(&root->fs_info->delalloc_bytes, len, root->fs_info->delalloc_batch); spin_lock(&BTRFS_I(inode)->lock); @@ -1741,6 +1760,10 @@ static void btrfs_clear_bit_hook(struct inode *inode, root != root->fs_info->tree_root) btrfs_delalloc_release_metadata(inode, len); + /* For sanity tests. */ + if (btrfs_test_is_dummy_root(root)) + return; + if (root->root_key.objectid != BTRFS_DATA_RELOC_TREE_OBJECTID && do_list && !(state->state & EXTENT_NORESERVE)) btrfs_free_reserved_data_space(inode, len); @@ -7213,7 +7236,7 @@ static int btrfs_get_blocks_direct(struct inode *inode, sector_t iblock, u64 start = iblock << inode->i_blkbits; u64 lockstart, lockend; u64 len = bh_result->b_size; - u64 orig_len = len; + u64 *outstanding_extents = NULL; int unlock_bits = EXTENT_LOCKED; int ret = 0; @@ -7225,6 +7248,16 @@ static int btrfs_get_blocks_direct(struct inode *inode, sector_t iblock, lockstart = start; lockend = start + len - 1; + if (current->journal_info) { + /* + * Need to pull our outstanding extents and set journal_info to NULL so + * that anything that needs to check if there's a transction doesn't get + * confused. + */ + outstanding_extents = current->journal_info; + current->journal_info = NULL; + } + /* * If this errors out it's because we couldn't invalidate pagecache for * this range and we need to fallback to buffered. @@ -7285,7 +7318,6 @@ static int btrfs_get_blocks_direct(struct inode *inode, sector_t iblock, ((BTRFS_I(inode)->flags & BTRFS_INODE_NODATACOW) && em->block_start != EXTENT_MAP_HOLE)) { int type; - int ret; u64 block_start, orig_start, orig_block_len, ram_bytes; if (test_bit(EXTENT_FLAG_PREALLOC, &em->flags)) @@ -7349,11 +7381,20 @@ unlock: if (start + len > i_size_read(inode)) i_size_write(inode, start + len); - if (len < orig_len) { + /* + * If we have an outstanding_extents count still set then we're + * within our reservation, otherwise we need to adjust our inode + * counter appropriately. + */ + if (*outstanding_extents) { + (*outstanding_extents)--; + } else { spin_lock(&BTRFS_I(inode)->lock); BTRFS_I(inode)->outstanding_extents++; spin_unlock(&BTRFS_I(inode)->lock); } + + current->journal_info = outstanding_extents; btrfs_free_reserved_data_space(inode, len); } @@ -7377,6 +7418,8 @@ unlock: unlock_err: clear_extent_bit(&BTRFS_I(inode)->io_tree, lockstart, lockend, unlock_bits, 1, 0, &cached_state, GFP_NOFS); + if (outstanding_extents) + current->journal_info = outstanding_extents; return ret; } @@ -8076,6 +8119,7 @@ static ssize_t btrfs_direct_IO(int rw, struct kiocb *iocb, { struct file *file = iocb->ki_filp; struct inode *inode = file->f_mapping->host; + u64 outstanding_extents = 0; size_t count = 0; int flags = 0; bool wakeup = true; @@ -8113,6 +8157,16 @@ static ssize_t btrfs_direct_IO(int rw, struct kiocb *iocb, ret = btrfs_delalloc_reserve_space(inode, count); if (ret) goto out; + outstanding_extents = div64_u64(count + + BTRFS_MAX_EXTENT_SIZE - 1, + BTRFS_MAX_EXTENT_SIZE); + + /* + * We need to know how many extents we reserved so that we can + * do the accounting properly if we go over the number we + * originally calculated. Abuse current->journal_info for this. + */ + current->journal_info = &outstanding_extents; } else if (test_bit(BTRFS_INODE_READDIO_NEED_LOCK, &BTRFS_I(inode)->runtime_flags)) { inode_dio_done(inode); @@ -8125,6 +8179,7 @@ static ssize_t btrfs_direct_IO(int rw, struct kiocb *iocb, iter, offset, btrfs_get_blocks_direct, NULL, btrfs_submit_direct, flags); if (rw & WRITE) { + current->journal_info = NULL; if (ret < 0 && ret != -EIOCBQUEUED) btrfs_delalloc_release_space(inode, count); else if (ret >= 0 && (size_t)ret < count) diff --git a/fs/btrfs/ordered-data.c b/fs/btrfs/ordered-data.c index 534544e08f76..157cc54fc634 100644 --- a/fs/btrfs/ordered-data.c +++ b/fs/btrfs/ordered-data.c @@ -452,9 +452,7 @@ void btrfs_get_logged_extents(struct inode *inode, continue; if (entry_end(ordered) <= start) break; - if (!list_empty(&ordered->log_list)) - continue; - if (test_bit(BTRFS_ORDERED_LOGGED, &ordered->flags)) + if (test_and_set_bit(BTRFS_ORDERED_LOGGED, &ordered->flags)) continue; list_add(&ordered->log_list, logged_list); atomic_inc(&ordered->refs); @@ -511,8 +509,7 @@ void btrfs_wait_logged_extents(struct btrfs_trans_handle *trans, wait_event(ordered->wait, test_bit(BTRFS_ORDERED_IO_DONE, &ordered->flags)); - if (!test_and_set_bit(BTRFS_ORDERED_LOGGED, &ordered->flags)) - list_add_tail(&ordered->trans_list, &trans->ordered); + list_add_tail(&ordered->trans_list, &trans->ordered); spin_lock_irq(&log->log_extents_lock[index]); } spin_unlock_irq(&log->log_extents_lock[index]); diff --git a/fs/btrfs/qgroup.c b/fs/btrfs/qgroup.c index 97159a8e91d4..058c79eecbfb 100644 --- a/fs/btrfs/qgroup.c +++ b/fs/btrfs/qgroup.c @@ -1259,7 +1259,7 @@ static int comp_oper(struct btrfs_qgroup_operation *oper1, if (oper1->seq < oper2->seq) return -1; if (oper1->seq > oper2->seq) - return -1; + return 1; if (oper1->ref_root < oper2->ref_root) return -1; if (oper1->ref_root > oper2->ref_root) diff --git a/fs/btrfs/send.c b/fs/btrfs/send.c index fe5857223515..d6033f540cc7 100644 --- a/fs/btrfs/send.c +++ b/fs/btrfs/send.c @@ -230,6 +230,7 @@ struct pending_dir_move { u64 parent_ino; u64 ino; u64 gen; + bool is_orphan; struct list_head update_refs; }; @@ -2984,7 +2985,8 @@ static int add_pending_dir_move(struct send_ctx *sctx, u64 ino_gen, u64 parent_ino, struct list_head *new_refs, - struct list_head *deleted_refs) + struct list_head *deleted_refs, + const bool is_orphan) { struct rb_node **p = &sctx->pending_dir_moves.rb_node; struct rb_node *parent = NULL; @@ -2999,6 +3001,7 @@ static int add_pending_dir_move(struct send_ctx *sctx, pm->parent_ino = parent_ino; pm->ino = ino; pm->gen = ino_gen; + pm->is_orphan = is_orphan; INIT_LIST_HEAD(&pm->list); INIT_LIST_HEAD(&pm->update_refs); RB_CLEAR_NODE(&pm->node); @@ -3131,16 +3134,20 @@ static int apply_dir_move(struct send_ctx *sctx, struct pending_dir_move *pm) rmdir_ino = dm->rmdir_ino; free_waiting_dir_move(sctx, dm); - ret = get_first_ref(sctx->parent_root, pm->ino, - &parent_ino, &parent_gen, name); - if (ret < 0) - goto out; - - ret = get_cur_path(sctx, parent_ino, parent_gen, - from_path); - if (ret < 0) - goto out; - ret = fs_path_add_path(from_path, name); + if (pm->is_orphan) { + ret = gen_unique_name(sctx, pm->ino, + pm->gen, from_path); + } else { + ret = get_first_ref(sctx->parent_root, pm->ino, + &parent_ino, &parent_gen, name); + if (ret < 0) + goto out; + ret = get_cur_path(sctx, parent_ino, parent_gen, + from_path); + if (ret < 0) + goto out; + ret = fs_path_add_path(from_path, name); + } if (ret < 0) goto out; @@ -3150,7 +3157,8 @@ static int apply_dir_move(struct send_ctx *sctx, struct pending_dir_move *pm) LIST_HEAD(deleted_refs); ASSERT(ancestor > BTRFS_FIRST_FREE_OBJECTID); ret = add_pending_dir_move(sctx, pm->ino, pm->gen, ancestor, - &pm->update_refs, &deleted_refs); + &pm->update_refs, &deleted_refs, + pm->is_orphan); if (ret < 0) goto out; if (rmdir_ino) { @@ -3283,6 +3291,127 @@ out: return ret; } +/* + * We might need to delay a directory rename even when no ancestor directory + * (in the send root) with a higher inode number than ours (sctx->cur_ino) was + * renamed. This happens when we rename a directory to the old name (the name + * in the parent root) of some other unrelated directory that got its rename + * delayed due to some ancestor with higher number that got renamed. + * + * Example: + * + * Parent snapshot: + * . (ino 256) + * |---- a/ (ino 257) + * | |---- file (ino 260) + * | + * |---- b/ (ino 258) + * |---- c/ (ino 259) + * + * Send snapshot: + * . (ino 256) + * |---- a/ (ino 258) + * |---- x/ (ino 259) + * |---- y/ (ino 257) + * |----- file (ino 260) + * + * Here we can not rename 258 from 'b' to 'a' without the rename of inode 257 + * from 'a' to 'x/y' happening first, which in turn depends on the rename of + * inode 259 from 'c' to 'x'. So the order of rename commands the send stream + * must issue is: + * + * 1 - rename 259 from 'c' to 'x' + * 2 - rename 257 from 'a' to 'x/y' + * 3 - rename 258 from 'b' to 'a' + * + * Returns 1 if the rename of sctx->cur_ino needs to be delayed, 0 if it can + * be done right away and < 0 on error. + */ +static int wait_for_dest_dir_move(struct send_ctx *sctx, + struct recorded_ref *parent_ref, + const bool is_orphan) +{ + struct btrfs_path *path; + struct btrfs_key key; + struct btrfs_key di_key; + struct btrfs_dir_item *di; + u64 left_gen; + u64 right_gen; + int ret = 0; + + if (RB_EMPTY_ROOT(&sctx->waiting_dir_moves)) + return 0; + + path = alloc_path_for_send(); + if (!path) + return -ENOMEM; + + key.objectid = parent_ref->dir; + key.type = BTRFS_DIR_ITEM_KEY; + key.offset = btrfs_name_hash(parent_ref->name, parent_ref->name_len); + + ret = btrfs_search_slot(NULL, sctx->parent_root, &key, path, 0, 0); + if (ret < 0) { + goto out; + } else if (ret > 0) { + ret = 0; + goto out; + } + + di = btrfs_match_dir_item_name(sctx->parent_root, path, + parent_ref->name, parent_ref->name_len); + if (!di) { + ret = 0; + goto out; + } + /* + * di_key.objectid has the number of the inode that has a dentry in the + * parent directory with the same name that sctx->cur_ino is being + * renamed to. We need to check if that inode is in the send root as + * well and if it is currently marked as an inode with a pending rename, + * if it is, we need to delay the rename of sctx->cur_ino as well, so + * that it happens after that other inode is renamed. + */ + btrfs_dir_item_key_to_cpu(path->nodes[0], di, &di_key); + if (di_key.type != BTRFS_INODE_ITEM_KEY) { + ret = 0; + goto out; + } + + ret = get_inode_info(sctx->parent_root, di_key.objectid, NULL, + &left_gen, NULL, NULL, NULL, NULL); + if (ret < 0) + goto out; + ret = get_inode_info(sctx->send_root, di_key.objectid, NULL, + &right_gen, NULL, NULL, NULL, NULL); + if (ret < 0) { + if (ret == -ENOENT) + ret = 0; + goto out; + } + + /* Different inode, no need to delay the rename of sctx->cur_ino */ + if (right_gen != left_gen) { + ret = 0; + goto out; + } + + if (is_waiting_for_move(sctx, di_key.objectid)) { + ret = add_pending_dir_move(sctx, + sctx->cur_ino, + sctx->cur_inode_gen, + di_key.objectid, + &sctx->new_refs, + &sctx->deleted_refs, + is_orphan); + if (!ret) + ret = 1; + } +out: + btrfs_free_path(path); + return ret; +} + static int wait_for_parent_move(struct send_ctx *sctx, struct recorded_ref *parent_ref) { @@ -3349,7 +3478,8 @@ out: sctx->cur_inode_gen, ino, &sctx->new_refs, - &sctx->deleted_refs); + &sctx->deleted_refs, + false); if (!ret) ret = 1; } @@ -3372,6 +3502,7 @@ static int process_recorded_refs(struct send_ctx *sctx, int *pending_move) int did_overwrite = 0; int is_orphan = 0; u64 last_dir_ino_rm = 0; + bool can_rename = true; verbose_printk("btrfs: process_recorded_refs %llu\n", sctx->cur_ino); @@ -3490,12 +3621,22 @@ verbose_printk("btrfs: process_recorded_refs %llu\n", sctx->cur_ino); } } + if (S_ISDIR(sctx->cur_inode_mode) && sctx->parent_root) { + ret = wait_for_dest_dir_move(sctx, cur, is_orphan); + if (ret < 0) + goto out; + if (ret == 1) { + can_rename = false; + *pending_move = 1; + } + } + /* * link/move the ref to the new place. If we have an orphan * inode, move it and update valid_path. If not, link or move * it depending on the inode mode. */ - if (is_orphan) { + if (is_orphan && can_rename) { ret = send_rename(sctx, valid_path, cur->full_path); if (ret < 0) goto out; @@ -3503,7 +3644,7 @@ verbose_printk("btrfs: process_recorded_refs %llu\n", sctx->cur_ino); ret = fs_path_copy(valid_path, cur->full_path); if (ret < 0) goto out; - } else { + } else if (can_rename) { if (S_ISDIR(sctx->cur_inode_mode)) { /* * Dirs can't be linked, so move it. For moved diff --git a/fs/btrfs/tests/inode-tests.c b/fs/btrfs/tests/inode-tests.c index a116b55ce788..054fc0d97131 100644 --- a/fs/btrfs/tests/inode-tests.c +++ b/fs/btrfs/tests/inode-tests.c @@ -911,6 +911,197 @@ out: return ret; } +static int test_extent_accounting(void) +{ + struct inode *inode = NULL; + struct btrfs_root *root = NULL; + int ret = -ENOMEM; + + inode = btrfs_new_test_inode(); + if (!inode) { + test_msg("Couldn't allocate inode\n"); + return ret; + } + + root = btrfs_alloc_dummy_root(); + if (IS_ERR(root)) { + test_msg("Couldn't allocate root\n"); + goto out; + } + + root->fs_info = btrfs_alloc_dummy_fs_info(); + if (!root->fs_info) { + test_msg("Couldn't allocate dummy fs info\n"); + goto out; + } + + BTRFS_I(inode)->root = root; + btrfs_test_inode_set_ops(inode); + + /* [BTRFS_MAX_EXTENT_SIZE] */ + BTRFS_I(inode)->outstanding_extents++; + ret = btrfs_set_extent_delalloc(inode, 0, BTRFS_MAX_EXTENT_SIZE - 1, + NULL); + if (ret) { + test_msg("btrfs_set_extent_delalloc returned %d\n", ret); + goto out; + } + if (BTRFS_I(inode)->outstanding_extents != 1) { + ret = -EINVAL; + test_msg("Miscount, wanted 1, got %u\n", + BTRFS_I(inode)->outstanding_extents); + goto out; + } + + /* [BTRFS_MAX_EXTENT_SIZE][4k] */ + BTRFS_I(inode)->outstanding_extents++; + ret = btrfs_set_extent_delalloc(inode, BTRFS_MAX_EXTENT_SIZE, + BTRFS_MAX_EXTENT_SIZE + 4095, NULL); + if (ret) { + test_msg("btrfs_set_extent_delalloc returned %d\n", ret); + goto out; + } + if (BTRFS_I(inode)->outstanding_extents != 2) { + ret = -EINVAL; + test_msg("Miscount, wanted 2, got %u\n", + BTRFS_I(inode)->outstanding_extents); + goto out; + } + + /* [BTRFS_MAX_EXTENT_SIZE/2][4K HOLE][the rest] */ + ret = clear_extent_bit(&BTRFS_I(inode)->io_tree, + BTRFS_MAX_EXTENT_SIZE >> 1, + (BTRFS_MAX_EXTENT_SIZE >> 1) + 4095, + EXTENT_DELALLOC | EXTENT_DIRTY | + EXTENT_UPTODATE | EXTENT_DO_ACCOUNTING, 0, 0, + NULL, GFP_NOFS); + if (ret) { + test_msg("clear_extent_bit returned %d\n", ret); + goto out; + } + if (BTRFS_I(inode)->outstanding_extents != 2) { + ret = -EINVAL; + test_msg("Miscount, wanted 2, got %u\n", + BTRFS_I(inode)->outstanding_extents); + goto out; + } + + /* [BTRFS_MAX_EXTENT_SIZE][4K] */ + BTRFS_I(inode)->outstanding_extents++; + ret = btrfs_set_extent_delalloc(inode, BTRFS_MAX_EXTENT_SIZE >> 1, + (BTRFS_MAX_EXTENT_SIZE >> 1) + 4095, + NULL); + if (ret) { + test_msg("btrfs_set_extent_delalloc returned %d\n", ret); + goto out; + } + if (BTRFS_I(inode)->outstanding_extents != 2) { + ret = -EINVAL; + test_msg("Miscount, wanted 2, got %u\n", + BTRFS_I(inode)->outstanding_extents); + goto out; + } + + /* + * [BTRFS_MAX_EXTENT_SIZE+4K][4K HOLE][BTRFS_MAX_EXTENT_SIZE+4K] + * + * I'm artificially adding 2 to outstanding_extents because in the + * buffered IO case we'd add things up as we go, but I don't feel like + * doing that here, this isn't the interesting case we want to test. + */ + BTRFS_I(inode)->outstanding_extents += 2; + ret = btrfs_set_extent_delalloc(inode, BTRFS_MAX_EXTENT_SIZE + 8192, + (BTRFS_MAX_EXTENT_SIZE << 1) + 12287, + NULL); + if (ret) { + test_msg("btrfs_set_extent_delalloc returned %d\n", ret); + goto out; + } + if (BTRFS_I(inode)->outstanding_extents != 4) { + ret = -EINVAL; + test_msg("Miscount, wanted 4, got %u\n", + BTRFS_I(inode)->outstanding_extents); + goto out; + } + + /* [BTRFS_MAX_EXTENT_SIZE+4k][4k][BTRFS_MAX_EXTENT_SIZE+4k] */ + BTRFS_I(inode)->outstanding_extents++; + ret = btrfs_set_extent_delalloc(inode, BTRFS_MAX_EXTENT_SIZE+4096, + BTRFS_MAX_EXTENT_SIZE+8191, NULL); + if (ret) { + test_msg("btrfs_set_extent_delalloc returned %d\n", ret); + goto out; + } + if (BTRFS_I(inode)->outstanding_extents != 3) { + ret = -EINVAL; + test_msg("Miscount, wanted 3, got %u\n", + BTRFS_I(inode)->outstanding_extents); + goto out; + } + + /* [BTRFS_MAX_EXTENT_SIZE+4k][4K HOLE][BTRFS_MAX_EXTENT_SIZE+4k] */ + ret = clear_extent_bit(&BTRFS_I(inode)->io_tree, + BTRFS_MAX_EXTENT_SIZE+4096, + BTRFS_MAX_EXTENT_SIZE+8191, + EXTENT_DIRTY | EXTENT_DELALLOC | + EXTENT_DO_ACCOUNTING | EXTENT_UPTODATE, 0, 0, + NULL, GFP_NOFS); + if (ret) { + test_msg("clear_extent_bit returned %d\n", ret); + goto out; + } + if (BTRFS_I(inode)->outstanding_extents != 4) { + ret = -EINVAL; + test_msg("Miscount, wanted 4, got %u\n", + BTRFS_I(inode)->outstanding_extents); + goto out; + } + + /* + * Refill the hole again just for good measure, because I thought it + * might fail and I'd rather satisfy my paranoia at this point. + */ + BTRFS_I(inode)->outstanding_extents++; + ret = btrfs_set_extent_delalloc(inode, BTRFS_MAX_EXTENT_SIZE+4096, + BTRFS_MAX_EXTENT_SIZE+8191, NULL); + if (ret) { + test_msg("btrfs_set_extent_delalloc returned %d\n", ret); + goto out; + } + if (BTRFS_I(inode)->outstanding_extents != 3) { + ret = -EINVAL; + test_msg("Miscount, wanted 3, got %u\n", + BTRFS_I(inode)->outstanding_extents); + goto out; + } + + /* Empty */ + ret = clear_extent_bit(&BTRFS_I(inode)->io_tree, 0, (u64)-1, + EXTENT_DIRTY | EXTENT_DELALLOC | + EXTENT_DO_ACCOUNTING | EXTENT_UPTODATE, 0, 0, + NULL, GFP_NOFS); + if (ret) { + test_msg("clear_extent_bit returned %d\n", ret); + goto out; + } + if (BTRFS_I(inode)->outstanding_extents) { + ret = -EINVAL; + test_msg("Miscount, wanted 0, got %u\n", + BTRFS_I(inode)->outstanding_extents); + goto out; + } + ret = 0; +out: + if (ret) + clear_extent_bit(&BTRFS_I(inode)->io_tree, 0, (u64)-1, + EXTENT_DIRTY | EXTENT_DELALLOC | + EXTENT_DO_ACCOUNTING | EXTENT_UPTODATE, 0, 0, + NULL, GFP_NOFS); + iput(inode); + btrfs_free_dummy_root(root); + return ret; +} + int btrfs_test_inodes(void) { int ret; @@ -924,5 +1115,9 @@ int btrfs_test_inodes(void) if (ret) return ret; test_msg("Running hole first btrfs_get_extent test\n"); - return test_hole_first(); + ret = test_hole_first(); + if (ret) + return ret; + test_msg("Running outstanding_extents tests\n"); + return test_extent_accounting(); } diff --git a/fs/btrfs/transaction.c b/fs/btrfs/transaction.c index 7e80f32550a6..8be4278e25e8 100644 --- a/fs/btrfs/transaction.c +++ b/fs/btrfs/transaction.c @@ -1023,17 +1023,13 @@ static int update_cowonly_root(struct btrfs_trans_handle *trans, u64 old_root_bytenr; u64 old_root_used; struct btrfs_root *tree_root = root->fs_info->tree_root; - bool extent_root = (root->objectid == BTRFS_EXTENT_TREE_OBJECTID); old_root_used = btrfs_root_used(&root->root_item); - btrfs_write_dirty_block_groups(trans, root); while (1) { old_root_bytenr = btrfs_root_bytenr(&root->root_item); if (old_root_bytenr == root->node->start && - old_root_used == btrfs_root_used(&root->root_item) && - (!extent_root || - list_empty(&trans->transaction->dirty_bgs))) + old_root_used == btrfs_root_used(&root->root_item)) break; btrfs_set_root_node(&root->root_item, root->node); @@ -1044,17 +1040,6 @@ static int update_cowonly_root(struct btrfs_trans_handle *trans, return ret; old_root_used = btrfs_root_used(&root->root_item); - if (extent_root) { - ret = btrfs_write_dirty_block_groups(trans, root); - if (ret) - return ret; - } - ret = btrfs_run_delayed_refs(trans, root, (unsigned long)-1); - if (ret) - return ret; - ret = btrfs_run_delayed_refs(trans, root, (unsigned long)-1); - if (ret) - return ret; } return 0; @@ -1071,6 +1056,7 @@ static noinline int commit_cowonly_roots(struct btrfs_trans_handle *trans, struct btrfs_root *root) { struct btrfs_fs_info *fs_info = root->fs_info; + struct list_head *dirty_bgs = &trans->transaction->dirty_bgs; struct list_head *next; struct extent_buffer *eb; int ret; @@ -1098,11 +1084,15 @@ static noinline int commit_cowonly_roots(struct btrfs_trans_handle *trans, if (ret) return ret; + ret = btrfs_setup_space_cache(trans, root); + if (ret) + return ret; + /* run_qgroups might have added some more refs */ ret = btrfs_run_delayed_refs(trans, root, (unsigned long)-1); if (ret) return ret; - +again: while (!list_empty(&fs_info->dirty_cowonly_roots)) { next = fs_info->dirty_cowonly_roots.next; list_del_init(next); @@ -1115,8 +1105,23 @@ static noinline int commit_cowonly_roots(struct btrfs_trans_handle *trans, ret = update_cowonly_root(trans, root); if (ret) return ret; + ret = btrfs_run_delayed_refs(trans, root, (unsigned long)-1); + if (ret) + return ret; } + while (!list_empty(dirty_bgs)) { + ret = btrfs_write_dirty_block_groups(trans, root); + if (ret) + return ret; + ret = btrfs_run_delayed_refs(trans, root, (unsigned long)-1); + if (ret) + return ret; + } + + if (!list_empty(&fs_info->dirty_cowonly_roots)) + goto again; + list_add_tail(&fs_info->extent_root->dirty_list, &trans->transaction->switch_commits); btrfs_after_dev_replace_commit(fs_info); @@ -1814,6 +1819,9 @@ int btrfs_commit_transaction(struct btrfs_trans_handle *trans, wait_for_commit(root, cur_trans); + if (unlikely(cur_trans->aborted)) + ret = cur_trans->aborted; + btrfs_put_transaction(cur_trans); return ret; diff --git a/fs/btrfs/tree-log.c b/fs/btrfs/tree-log.c index 9a37f8b39bae..c5b8ba37f88e 100644 --- a/fs/btrfs/tree-log.c +++ b/fs/btrfs/tree-log.c @@ -1012,7 +1012,7 @@ again: base = btrfs_item_ptr_offset(leaf, path->slots[0]); while (cur_offset < item_size) { - extref = (struct btrfs_inode_extref *)base + cur_offset; + extref = (struct btrfs_inode_extref *)(base + cur_offset); victim_name_len = btrfs_inode_extref_name_len(leaf, extref); diff --git a/fs/btrfs/xattr.c b/fs/btrfs/xattr.c index 47b19465f0dc..883b93623bc5 100644 --- a/fs/btrfs/xattr.c +++ b/fs/btrfs/xattr.c @@ -111,6 +111,8 @@ static int do_setxattr(struct btrfs_trans_handle *trans, name, name_len, -1); if (!di && (flags & XATTR_REPLACE)) ret = -ENODATA; + else if (IS_ERR(di)) + ret = PTR_ERR(di); else if (di) ret = btrfs_delete_one_dir_name(trans, root, path, di); goto out; @@ -127,10 +129,12 @@ static int do_setxattr(struct btrfs_trans_handle *trans, ASSERT(mutex_is_locked(&inode->i_mutex)); di = btrfs_lookup_xattr(NULL, root, path, btrfs_ino(inode), name, name_len, 0); - if (!di) { + if (!di) ret = -ENODATA; + else if (IS_ERR(di)) + ret = PTR_ERR(di); + if (ret) goto out; - } btrfs_release_path(path); di = NULL; } diff --git a/fs/cifs/cifsencrypt.c b/fs/cifs/cifsencrypt.c index 4ac7445e6ec7..aa0dc2573374 100644 --- a/fs/cifs/cifsencrypt.c +++ b/fs/cifs/cifsencrypt.c @@ -1,6 +1,9 @@ /* * fs/cifs/cifsencrypt.c * + * Encryption and hashing operations relating to NTLM, NTLMv2. See MS-NLMP + * for more detailed information + * * Copyright (C) International Business Machines Corp., 2005,2013 * Author(s): Steve French (sfrench@us.ibm.com) * @@ -515,7 +518,8 @@ static int calc_ntlmv2_hash(struct cifs_ses *ses, char *ntlmv2_hash, __func__); return rc; } - } else if (ses->serverName) { + } else { + /* We use ses->serverName if no domain name available */ len = strlen(ses->serverName); server = kmalloc(2 + (len * 2), GFP_KERNEL); diff --git a/fs/cifs/connect.c b/fs/cifs/connect.c index d3aa999ab785..480cf9c81d50 100644 --- a/fs/cifs/connect.c +++ b/fs/cifs/connect.c @@ -1599,6 +1599,8 @@ cifs_parse_mount_options(const char *mountdata, const char *devname, pr_warn("CIFS: username too long\n"); goto cifs_parse_mount_err; } + + kfree(vol->username); vol->username = kstrdup(string, GFP_KERNEL); if (!vol->username) goto cifs_parse_mount_err; @@ -1700,6 +1702,7 @@ cifs_parse_mount_options(const char *mountdata, const char *devname, goto cifs_parse_mount_err; } + kfree(vol->domainname); vol->domainname = kstrdup(string, GFP_KERNEL); if (!vol->domainname) { pr_warn("CIFS: no memory for domainname\n"); @@ -1731,6 +1734,7 @@ cifs_parse_mount_options(const char *mountdata, const char *devname, } if (strncasecmp(string, "default", 7) != 0) { + kfree(vol->iocharset); vol->iocharset = kstrdup(string, GFP_KERNEL); if (!vol->iocharset) { @@ -2913,8 +2917,7 @@ ip_rfc1001_connect(struct TCP_Server_Info *server) * calling name ends in null (byte 16) from old smb * convention. */ - if (server->workstation_RFC1001_name && - server->workstation_RFC1001_name[0] != 0) + if (server->workstation_RFC1001_name[0] != 0) rfc1002mangle(ses_init_buf->trailer. session_req.calling_name, server->workstation_RFC1001_name, @@ -3692,6 +3695,12 @@ CIFSTCon(const unsigned int xid, struct cifs_ses *ses, #endif /* CIFS_WEAK_PW_HASH */ rc = SMBNTencrypt(tcon->password, ses->server->cryptkey, bcc_ptr, nls_codepage); + if (rc) { + cifs_dbg(FYI, "%s Can't generate NTLM rsp. Error: %d\n", + __func__, rc); + cifs_buf_release(smb_buffer); + return rc; + } bcc_ptr += CIFS_AUTH_RESP_SIZE; if (ses->capabilities & CAP_UNICODE) { diff --git a/fs/cifs/file.c b/fs/cifs/file.c index a94b3e673182..ca30c391a894 100644 --- a/fs/cifs/file.c +++ b/fs/cifs/file.c @@ -1823,6 +1823,7 @@ refind_writable: cifsFileInfo_put(inv_file); spin_lock(&cifs_file_list_lock); ++refind; + inv_file = NULL; goto refind_writable; } } diff --git a/fs/cifs/inode.c b/fs/cifs/inode.c index 2d4f37235ed0..3e126d7bb2ea 100644 --- a/fs/cifs/inode.c +++ b/fs/cifs/inode.c @@ -771,6 +771,8 @@ cifs_get_inode_info(struct inode **inode, const char *full_path, cifs_buf_release(srchinf->ntwrk_buf_start); } kfree(srchinf); + if (rc) + goto cgii_exit; } else goto cgii_exit; diff --git a/fs/cifs/smb2misc.c b/fs/cifs/smb2misc.c index 689f035915cf..22dfdf17d065 100644 --- a/fs/cifs/smb2misc.c +++ b/fs/cifs/smb2misc.c @@ -322,7 +322,7 @@ smb2_get_data_area_len(int *off, int *len, struct smb2_hdr *hdr) /* return pointer to beginning of data area, ie offset from SMB start */ if ((*off != 0) && (*len != 0)) - return hdr->ProtocolId + *off; + return (char *)(&hdr->ProtocolId[0]) + *off; else return NULL; } diff --git a/fs/cifs/smb2ops.c b/fs/cifs/smb2ops.c index 96b5d40a2ece..eab05e1aa587 100644 --- a/fs/cifs/smb2ops.c +++ b/fs/cifs/smb2ops.c @@ -684,7 +684,8 @@ smb2_clone_range(const unsigned int xid, /* No need to change MaxChunks since already set to 1 */ chunk_sizes_updated = true; - } + } else + goto cchunk_out; } cchunk_out: diff --git a/fs/cifs/smb2pdu.c b/fs/cifs/smb2pdu.c index 3417340bf89e..65cd7a84c8bc 100644 --- a/fs/cifs/smb2pdu.c +++ b/fs/cifs/smb2pdu.c @@ -1218,7 +1218,7 @@ SMB2_ioctl(const unsigned int xid, struct cifs_tcon *tcon, u64 persistent_fid, struct smb2_ioctl_req *req; struct smb2_ioctl_rsp *rsp; struct TCP_Server_Info *server; - struct cifs_ses *ses = tcon->ses; + struct cifs_ses *ses; struct kvec iov[2]; int resp_buftype; int num_iovecs; @@ -1233,6 +1233,11 @@ SMB2_ioctl(const unsigned int xid, struct cifs_tcon *tcon, u64 persistent_fid, if (plen) *plen = 0; + if (tcon) + ses = tcon->ses; + else + return -EIO; + if (ses && (ses->server)) server = ses->server; else @@ -1296,14 +1301,12 @@ SMB2_ioctl(const unsigned int xid, struct cifs_tcon *tcon, u64 persistent_fid, rsp = (struct smb2_ioctl_rsp *)iov[0].iov_base; if ((rc != 0) && (rc != -EINVAL)) { - if (tcon) - cifs_stats_fail_inc(tcon, SMB2_IOCTL_HE); + cifs_stats_fail_inc(tcon, SMB2_IOCTL_HE); goto ioctl_exit; } else if (rc == -EINVAL) { if ((opcode != FSCTL_SRV_COPYCHUNK_WRITE) && (opcode != FSCTL_SRV_COPYCHUNK)) { - if (tcon) - cifs_stats_fail_inc(tcon, SMB2_IOCTL_HE); + cifs_stats_fail_inc(tcon, SMB2_IOCTL_HE); goto ioctl_exit; } } @@ -1629,7 +1632,7 @@ SMB2_flush(const unsigned int xid, struct cifs_tcon *tcon, u64 persistent_fid, rc = SendReceive2(xid, ses, iov, 1, &resp_buftype, 0); - if ((rc != 0) && tcon) + if (rc != 0) cifs_stats_fail_inc(tcon, SMB2_FLUSH_HE); free_rsp_buf(resp_buftype, iov[0].iov_base); @@ -2114,7 +2117,7 @@ SMB2_query_directory(const unsigned int xid, struct cifs_tcon *tcon, struct kvec iov[2]; int rc = 0; int len; - int resp_buftype; + int resp_buftype = CIFS_NO_BUFFER; unsigned char *bufptr; struct TCP_Server_Info *server; struct cifs_ses *ses = tcon->ses; diff --git a/fs/ecryptfs/ecryptfs_kernel.h b/fs/ecryptfs/ecryptfs_kernel.h index 90d1882b306f..5ba029e627cc 100644 --- a/fs/ecryptfs/ecryptfs_kernel.h +++ b/fs/ecryptfs/ecryptfs_kernel.h @@ -124,7 +124,7 @@ ecryptfs_get_key_payload_data(struct key *key) } #define ECRYPTFS_MAX_KEYSET_SIZE 1024 -#define ECRYPTFS_MAX_CIPHER_NAME_SIZE 32 +#define ECRYPTFS_MAX_CIPHER_NAME_SIZE 31 #define ECRYPTFS_MAX_NUM_ENC_KEYS 64 #define ECRYPTFS_MAX_IV_BYTES 16 /* 128 bits */ #define ECRYPTFS_SALT_BYTES 2 @@ -237,7 +237,7 @@ struct ecryptfs_crypt_stat { struct crypto_ablkcipher *tfm; struct crypto_hash *hash_tfm; /* Crypto context for generating * the initialization vectors */ - unsigned char cipher[ECRYPTFS_MAX_CIPHER_NAME_SIZE]; + unsigned char cipher[ECRYPTFS_MAX_CIPHER_NAME_SIZE + 1]; unsigned char key[ECRYPTFS_MAX_KEY_BYTES]; unsigned char root_iv[ECRYPTFS_MAX_IV_BYTES]; struct list_head keysig_list; diff --git a/fs/ecryptfs/file.c b/fs/ecryptfs/file.c index b07731e68c0b..fd39bad6f1bd 100644 --- a/fs/ecryptfs/file.c +++ b/fs/ecryptfs/file.c @@ -303,9 +303,22 @@ ecryptfs_unlocked_ioctl(struct file *file, unsigned int cmd, unsigned long arg) struct file *lower_file = ecryptfs_file_to_lower(file); long rc = -ENOTTY; - if (lower_file->f_op->unlocked_ioctl) + if (!lower_file->f_op->unlocked_ioctl) + return rc; + + switch (cmd) { + case FITRIM: + case FS_IOC_GETFLAGS: + case FS_IOC_SETFLAGS: + case FS_IOC_GETVERSION: + case FS_IOC_SETVERSION: rc = lower_file->f_op->unlocked_ioctl(lower_file, cmd, arg); - return rc; + fsstack_copy_attr_all(file_inode(file), file_inode(lower_file)); + + return rc; + default: + return rc; + } } #ifdef CONFIG_COMPAT @@ -315,9 +328,22 @@ ecryptfs_compat_ioctl(struct file *file, unsigned int cmd, unsigned long arg) struct file *lower_file = ecryptfs_file_to_lower(file); long rc = -ENOIOCTLCMD; - if (lower_file->f_op->compat_ioctl) + if (!lower_file->f_op->compat_ioctl) + return rc; + + switch (cmd) { + case FITRIM: + case FS_IOC32_GETFLAGS: + case FS_IOC32_SETFLAGS: + case FS_IOC32_GETVERSION: + case FS_IOC32_SETVERSION: rc = lower_file->f_op->compat_ioctl(lower_file, cmd, arg); - return rc; + fsstack_copy_attr_all(file_inode(file), file_inode(lower_file)); + + return rc; + default: + return rc; + } } #endif diff --git a/fs/ecryptfs/keystore.c b/fs/ecryptfs/keystore.c index 917bd5c9776a..6bd67e2011f0 100644 --- a/fs/ecryptfs/keystore.c +++ b/fs/ecryptfs/keystore.c @@ -891,7 +891,7 @@ struct ecryptfs_parse_tag_70_packet_silly_stack { struct blkcipher_desc desc; char fnek_sig_hex[ECRYPTFS_SIG_SIZE_HEX + 1]; char iv[ECRYPTFS_MAX_IV_BYTES]; - char cipher_string[ECRYPTFS_MAX_CIPHER_NAME_SIZE]; + char cipher_string[ECRYPTFS_MAX_CIPHER_NAME_SIZE + 1]; }; /** diff --git a/fs/ecryptfs/main.c b/fs/ecryptfs/main.c index 1895d60f4122..c095d3264259 100644 --- a/fs/ecryptfs/main.c +++ b/fs/ecryptfs/main.c @@ -407,7 +407,7 @@ static int ecryptfs_parse_options(struct ecryptfs_sb_info *sbi, char *options, if (!cipher_name_set) { int cipher_name_len = strlen(ECRYPTFS_DEFAULT_CIPHER); - BUG_ON(cipher_name_len >= ECRYPTFS_MAX_CIPHER_NAME_SIZE); + BUG_ON(cipher_name_len > ECRYPTFS_MAX_CIPHER_NAME_SIZE); strcpy(mount_crypt_stat->global_default_cipher_name, ECRYPTFS_DEFAULT_CIPHER); } diff --git a/fs/fs-writeback.c b/fs/fs-writeback.c index e907052eeadb..32a8bbd7a9ad 100644 --- a/fs/fs-writeback.c +++ b/fs/fs-writeback.c @@ -53,6 +53,18 @@ struct wb_writeback_work { struct completion *done; /* set if the caller waits */ }; +/* + * If an inode is constantly having its pages dirtied, but then the + * updates stop dirtytime_expire_interval seconds in the past, it's + * possible for the worst case time between when an inode has its + * timestamps updated and when they finally get written out to be two + * dirtytime_expire_intervals. We set the default to 12 hours (in + * seconds), which means most of the time inodes will have their + * timestamps written to disk after 12 hours, but in the worst case a + * few inodes might not their timestamps updated for 24 hours. + */ +unsigned int dirtytime_expire_interval = 12 * 60 * 60; + /** * writeback_in_progress - determine whether there is writeback in progress * @bdi: the device's backing_dev_info structure. @@ -275,8 +287,8 @@ static int move_expired_inodes(struct list_head *delaying_queue, if ((flags & EXPIRE_DIRTY_ATIME) == 0) older_than_this = work->older_than_this; - else if ((work->reason == WB_REASON_SYNC) == 0) { - expire_time = jiffies - (HZ * 86400); + else if (!work->for_sync) { + expire_time = jiffies - (dirtytime_expire_interval * HZ); older_than_this = &expire_time; } while (!list_empty(delaying_queue)) { @@ -458,6 +470,7 @@ static void requeue_inode(struct inode *inode, struct bdi_writeback *wb, */ redirty_tail(inode, wb); } else if (inode->i_state & I_DIRTY_TIME) { + inode->dirtied_when = jiffies; list_move(&inode->i_wb_list, &wb->b_dirty_time); } else { /* The inode is clean. Remove from writeback lists. */ @@ -505,12 +518,17 @@ __writeback_single_inode(struct inode *inode, struct writeback_control *wbc) spin_lock(&inode->i_lock); dirty = inode->i_state & I_DIRTY; - if (((dirty & (I_DIRTY_SYNC | I_DIRTY_DATASYNC)) && - (inode->i_state & I_DIRTY_TIME)) || - (inode->i_state & I_DIRTY_TIME_EXPIRED)) { - dirty |= I_DIRTY_TIME | I_DIRTY_TIME_EXPIRED; - trace_writeback_lazytime(inode); - } + if (inode->i_state & I_DIRTY_TIME) { + if ((dirty & (I_DIRTY_SYNC | I_DIRTY_DATASYNC)) || + unlikely(inode->i_state & I_DIRTY_TIME_EXPIRED) || + unlikely(time_after(jiffies, + (inode->dirtied_time_when + + dirtytime_expire_interval * HZ)))) { + dirty |= I_DIRTY_TIME | I_DIRTY_TIME_EXPIRED; + trace_writeback_lazytime(inode); + } + } else + inode->i_state &= ~I_DIRTY_TIME_EXPIRED; inode->i_state &= ~dirty; /* @@ -1131,6 +1149,56 @@ void wakeup_flusher_threads(long nr_pages, enum wb_reason reason) rcu_read_unlock(); } +/* + * Wake up bdi's periodically to make sure dirtytime inodes gets + * written back periodically. We deliberately do *not* check the + * b_dirtytime list in wb_has_dirty_io(), since this would cause the + * kernel to be constantly waking up once there are any dirtytime + * inodes on the system. So instead we define a separate delayed work + * function which gets called much more rarely. (By default, only + * once every 12 hours.) + * + * If there is any other write activity going on in the file system, + * this function won't be necessary. But if the only thing that has + * happened on the file system is a dirtytime inode caused by an atime + * update, we need this infrastructure below to make sure that inode + * eventually gets pushed out to disk. + */ +static void wakeup_dirtytime_writeback(struct work_struct *w); +static DECLARE_DELAYED_WORK(dirtytime_work, wakeup_dirtytime_writeback); + +static void wakeup_dirtytime_writeback(struct work_struct *w) +{ + struct backing_dev_info *bdi; + + rcu_read_lock(); + list_for_each_entry_rcu(bdi, &bdi_list, bdi_list) { + if (list_empty(&bdi->wb.b_dirty_time)) + continue; + bdi_wakeup_thread(bdi); + } + rcu_read_unlock(); + schedule_delayed_work(&dirtytime_work, dirtytime_expire_interval * HZ); +} + +static int __init start_dirtytime_writeback(void) +{ + schedule_delayed_work(&dirtytime_work, dirtytime_expire_interval * HZ); + return 0; +} +__initcall(start_dirtytime_writeback); + +int dirtytime_interval_handler(struct ctl_table *table, int write, + void __user *buffer, size_t *lenp, loff_t *ppos) +{ + int ret; + + ret = proc_dointvec_minmax(table, write, buffer, lenp, ppos); + if (ret == 0 && write) + mod_delayed_work(system_wq, &dirtytime_work, 0); + return ret; +} + static noinline void block_dump___mark_inode_dirty(struct inode *inode) { if (inode->i_ino || strcmp(inode->i_sb->s_id, "bdev")) { @@ -1269,8 +1337,13 @@ void __mark_inode_dirty(struct inode *inode, int flags) } inode->dirtied_when = jiffies; - list_move(&inode->i_wb_list, dirtytime ? - &bdi->wb.b_dirty_time : &bdi->wb.b_dirty); + if (dirtytime) + inode->dirtied_time_when = jiffies; + if (inode->i_state & (I_DIRTY_INODE | I_DIRTY_PAGES)) + list_move(&inode->i_wb_list, &bdi->wb.b_dirty); + else + list_move(&inode->i_wb_list, + &bdi->wb.b_dirty_time); spin_unlock(&bdi->wb.list_lock); trace_writeback_dirty_inode_enqueue(inode); diff --git a/fs/fuse/dev.c b/fs/fuse/dev.c index ed19a7d622fa..39706c57ad3c 100644 --- a/fs/fuse/dev.c +++ b/fs/fuse/dev.c @@ -890,8 +890,8 @@ static int fuse_try_move_page(struct fuse_copy_state *cs, struct page **pagep) newpage = buf->page; - if (WARN_ON(!PageUptodate(newpage))) - return -EIO; + if (!PageUptodate(newpage)) + SetPageUptodate(newpage); ClearPageMappedToDisk(newpage); @@ -1353,6 +1353,17 @@ static ssize_t fuse_dev_do_read(struct fuse_conn *fc, struct file *file, return err; } +static int fuse_dev_open(struct inode *inode, struct file *file) +{ + /* + * The fuse device's file's private_data is used to hold + * the fuse_conn(ection) when it is mounted, and is used to + * keep track of whether the file has been mounted already. + */ + file->private_data = NULL; + return 0; +} + static ssize_t fuse_dev_read(struct kiocb *iocb, const struct iovec *iov, unsigned long nr_segs, loff_t pos) { @@ -1797,6 +1808,9 @@ copy_finish: static int fuse_notify(struct fuse_conn *fc, enum fuse_notify_code code, unsigned int size, struct fuse_copy_state *cs) { + /* Don't try to move pages (yet) */ + cs->move_pages = 0; + switch (code) { case FUSE_NOTIFY_POLL: return fuse_notify_poll(fc, size, cs); @@ -2217,6 +2231,7 @@ static int fuse_dev_fasync(int fd, struct file *file, int on) const struct file_operations fuse_dev_operations = { .owner = THIS_MODULE, + .open = fuse_dev_open, .llseek = no_llseek, .read = do_sync_read, .aio_read = fuse_dev_read, diff --git a/fs/hfsplus/brec.c b/fs/hfsplus/brec.c index 6e560d56094b..754fdf8c6356 100644 --- a/fs/hfsplus/brec.c +++ b/fs/hfsplus/brec.c @@ -131,13 +131,16 @@ skip: hfs_bnode_write(node, entry, data_off + key_len, entry_len); hfs_bnode_dump(node); - if (new_node) { - /* update parent key if we inserted a key - * at the start of the first node - */ - if (!rec && new_node != node) - hfs_brec_update_parent(fd); + /* + * update parent key if we inserted a key + * at the start of the node and it is not the new node + */ + if (!rec && new_node != node) { + hfs_bnode_read_key(node, fd->search_key, data_off + size); + hfs_brec_update_parent(fd); + } + if (new_node) { hfs_bnode_put(fd->bnode); if (!new_node->parent) { hfs_btree_inc_height(tree); @@ -168,9 +171,6 @@ skip: goto again; } - if (!rec) - hfs_brec_update_parent(fd); - return 0; } @@ -370,6 +370,8 @@ again: if (IS_ERR(parent)) return PTR_ERR(parent); __hfs_brec_find(parent, fd, hfs_find_rec_by_key); + if (fd->record < 0) + return -ENOENT; hfs_bnode_dump(parent); rec = fd->record; diff --git a/fs/kernfs/file.c b/fs/kernfs/file.c index b684e8a132e6..2bacb9988566 100644 --- a/fs/kernfs/file.c +++ b/fs/kernfs/file.c @@ -207,6 +207,7 @@ static ssize_t kernfs_file_direct_read(struct kernfs_open_file *of, goto out_free; } + of->event = atomic_read(&of->kn->attr.open->event); ops = kernfs_ops(of->kn); if (ops->read) len = ops->read(of, buf, len, *ppos); diff --git a/fs/locks.c b/fs/locks.c index 365c82e1b3a9..40bc384728c0 100644 --- a/fs/locks.c +++ b/fs/locks.c @@ -1388,9 +1388,8 @@ any_leases_conflict(struct inode *inode, struct file_lock *breaker) int __break_lease(struct inode *inode, unsigned int mode, unsigned int type) { int error = 0; - struct file_lock *new_fl; struct file_lock_context *ctx = inode->i_flctx; - struct file_lock *fl; + struct file_lock *new_fl, *fl, *tmp; unsigned long break_time; int want_write = (mode & O_ACCMODE) != O_RDONLY; LIST_HEAD(dispose); @@ -1420,7 +1419,7 @@ int __break_lease(struct inode *inode, unsigned int mode, unsigned int type) break_time++; /* so that 0 means no break time */ } - list_for_each_entry(fl, &ctx->flc_lease, fl_list) { + list_for_each_entry_safe(fl, tmp, &ctx->flc_lease, fl_list) { if (!leases_conflict(fl, new_fl)) continue; if (want_write) { @@ -1665,7 +1664,8 @@ generic_add_lease(struct file *filp, long arg, struct file_lock **flp, void **pr } if (my_fl != NULL) { - error = lease->fl_lmops->lm_change(my_fl, arg, &dispose); + lease = my_fl; + error = lease->fl_lmops->lm_change(lease, arg, &dispose); if (error) goto out; goto out_setup; @@ -1727,7 +1727,7 @@ static int generic_delete_lease(struct file *filp, void *owner) break; } } - trace_generic_delete_lease(inode, fl); + trace_generic_delete_lease(inode, victim); if (victim) error = fl->fl_lmops->lm_change(victim, F_UNLCK, &dispose); spin_unlock(&ctx->flc_lock); diff --git a/fs/nfs/client.c b/fs/nfs/client.c index f9f4845db989..19874151e95c 100644 --- a/fs/nfs/client.c +++ b/fs/nfs/client.c @@ -433,7 +433,7 @@ static struct nfs_client *nfs_match_client(const struct nfs_client_initdata *dat static bool nfs_client_init_is_complete(const struct nfs_client *clp) { - return clp->cl_cons_state != NFS_CS_INITING; + return clp->cl_cons_state <= NFS_CS_READY; } int nfs_wait_client_init_complete(const struct nfs_client *clp) diff --git a/fs/nfs/delegation.c b/fs/nfs/delegation.c index a1f0685b42ff..a6ad68865880 100644 --- a/fs/nfs/delegation.c +++ b/fs/nfs/delegation.c @@ -181,8 +181,8 @@ void nfs_inode_reclaim_delegation(struct inode *inode, struct rpc_cred *cred, clear_bit(NFS_DELEGATION_NEED_RECLAIM, &delegation->flags); spin_unlock(&delegation->lock); - put_rpccred(oldcred); rcu_read_unlock(); + put_rpccred(oldcred); trace_nfs4_reclaim_delegation(inode, res->delegation_type); } else { /* We appear to have raced with a delegation return. */ @@ -370,7 +370,10 @@ int nfs_inode_set_delegation(struct inode *inode, struct rpc_cred *cred, struct delegation = NULL; goto out; } - freeme = nfs_detach_delegation_locked(nfsi, + if (test_and_set_bit(NFS_DELEGATION_RETURNING, + &old_delegation->flags)) + goto out; + freeme = nfs_detach_delegation_locked(nfsi, old_delegation, clp); if (freeme == NULL) goto out; @@ -433,6 +436,8 @@ static bool nfs_delegation_need_return(struct nfs_delegation *delegation) { bool ret = false; + if (test_bit(NFS_DELEGATION_RETURNING, &delegation->flags)) + goto out; if (test_and_clear_bit(NFS_DELEGATION_RETURN, &delegation->flags)) ret = true; if (test_and_clear_bit(NFS_DELEGATION_RETURN_IF_CLOSED, &delegation->flags) && !ret) { @@ -444,6 +449,7 @@ static bool nfs_delegation_need_return(struct nfs_delegation *delegation) ret = true; spin_unlock(&delegation->lock); } +out: return ret; } @@ -471,14 +477,20 @@ restart: super_list) { if (!nfs_delegation_need_return(delegation)) continue; - inode = nfs_delegation_grab_inode(delegation); - if (inode == NULL) + if (!nfs_sb_active(server->super)) continue; + inode = nfs_delegation_grab_inode(delegation); + if (inode == NULL) { + rcu_read_unlock(); + nfs_sb_deactive(server->super); + goto restart; + } delegation = nfs_start_delegation_return_locked(NFS_I(inode)); rcu_read_unlock(); err = nfs_end_delegation_return(inode, delegation, 0); iput(inode); + nfs_sb_deactive(server->super); if (!err) goto restart; set_bit(NFS4CLNT_DELEGRETURN, &clp->cl_state); @@ -809,19 +821,30 @@ restart: list_for_each_entry_rcu(server, &clp->cl_superblocks, client_link) { list_for_each_entry_rcu(delegation, &server->delegations, super_list) { + if (test_bit(NFS_DELEGATION_RETURNING, + &delegation->flags)) + continue; if (test_bit(NFS_DELEGATION_NEED_RECLAIM, &delegation->flags) == 0) continue; - inode = nfs_delegation_grab_inode(delegation); - if (inode == NULL) + if (!nfs_sb_active(server->super)) continue; - delegation = nfs_detach_delegation(NFS_I(inode), - delegation, server); + inode = nfs_delegation_grab_inode(delegation); + if (inode == NULL) { + rcu_read_unlock(); + nfs_sb_deactive(server->super); + goto restart; + } + delegation = nfs_start_delegation_return_locked(NFS_I(inode)); rcu_read_unlock(); - - if (delegation != NULL) - nfs_free_delegation(delegation); + if (delegation != NULL) { + delegation = nfs_detach_delegation(NFS_I(inode), + delegation, server); + if (delegation != NULL) + nfs_free_delegation(delegation); + } iput(inode); + nfs_sb_deactive(server->super); goto restart; } } diff --git a/fs/nfs/dir.c b/fs/nfs/dir.c index 9b0c55cb2a2e..c19e16f0b2d0 100644 --- a/fs/nfs/dir.c +++ b/fs/nfs/dir.c @@ -408,14 +408,22 @@ static int xdr_decode(nfs_readdir_descriptor_t *desc, return 0; } +/* Match file and dirent using either filehandle or fileid + * Note: caller is responsible for checking the fsid + */ static int nfs_same_file(struct dentry *dentry, struct nfs_entry *entry) { + struct nfs_inode *nfsi; + if (dentry->d_inode == NULL) goto different; - if (nfs_compare_fh(entry->fh, NFS_FH(dentry->d_inode)) != 0) - goto different; - return 1; + + nfsi = NFS_I(dentry->d_inode); + if (entry->fattr->fileid == nfsi->fileid) + return 1; + if (nfs_compare_fh(entry->fh, &nfsi->fh) == 0) + return 1; different: return 0; } @@ -469,6 +477,10 @@ void nfs_prime_dcache(struct dentry *parent, struct nfs_entry *entry) struct inode *inode; int status; + if (!(entry->fattr->valid & NFS_ATTR_FATTR_FILEID)) + return; + if (!(entry->fattr->valid & NFS_ATTR_FATTR_FSID)) + return; if (filename.name[0] == '.') { if (filename.len == 1) return; @@ -479,6 +491,10 @@ void nfs_prime_dcache(struct dentry *parent, struct nfs_entry *entry) dentry = d_lookup(parent, &filename); if (dentry != NULL) { + /* Is there a mountpoint here? If so, just exit */ + if (!nfs_fsid_equal(&NFS_SB(dentry->d_sb)->fsid, + &entry->fattr->fsid)) + goto out; if (nfs_same_file(dentry, entry)) { nfs_set_verifier(dentry, nfs_save_change_attribute(dir)); status = nfs_refresh_inode(dentry->d_inode, entry->fattr); diff --git a/fs/nfs/file.c b/fs/nfs/file.c index 94712fc781fa..e679d24c39d3 100644 --- a/fs/nfs/file.c +++ b/fs/nfs/file.c @@ -178,7 +178,7 @@ nfs_file_read(struct kiocb *iocb, struct iov_iter *to) iocb->ki_filp, iov_iter_count(to), (unsigned long) iocb->ki_pos); - result = nfs_revalidate_mapping(inode, iocb->ki_filp->f_mapping); + result = nfs_revalidate_mapping_protected(inode, iocb->ki_filp->f_mapping); if (!result) { result = generic_file_read_iter(iocb, to); if (result > 0) @@ -199,7 +199,7 @@ nfs_file_splice_read(struct file *filp, loff_t *ppos, dprintk("NFS: splice_read(%pD2, %lu@%Lu)\n", filp, (unsigned long) count, (unsigned long long) *ppos); - res = nfs_revalidate_mapping(inode, filp->f_mapping); + res = nfs_revalidate_mapping_protected(inode, filp->f_mapping); if (!res) { res = generic_file_splice_read(filp, ppos, pipe, count, flags); if (res > 0) @@ -372,6 +372,10 @@ start: nfs_wait_bit_killable, TASK_KILLABLE); if (ret) return ret; + /* + * Wait for O_DIRECT to complete + */ + nfs_inode_dio_wait(mapping->host); page = grab_cache_page_write_begin(mapping, index, flags); if (!page) @@ -619,6 +623,9 @@ static int nfs_vm_page_mkwrite(struct vm_area_struct *vma, struct vm_fault *vmf) /* make sure the cache has finished storing the page */ nfs_fscache_wait_on_page_write(NFS_I(inode), page); + wait_on_bit_action(&NFS_I(inode)->flags, NFS_INO_INVALIDATING, + nfs_wait_bit_killable, TASK_KILLABLE); + lock_page(page); mapping = page_file_mapping(page); if (mapping != inode->i_mapping) diff --git a/fs/nfs/inode.c b/fs/nfs/inode.c index 83107be3dd01..d42dff6d5e98 100644 --- a/fs/nfs/inode.c +++ b/fs/nfs/inode.c @@ -556,6 +556,7 @@ EXPORT_SYMBOL_GPL(nfs_setattr); * This is a copy of the common vmtruncate, but with the locking * corrected to take into account the fact that NFS requires * inode->i_size to be updated under the inode->i_lock. + * Note: must be called with inode->i_lock held! */ static int nfs_vmtruncate(struct inode * inode, loff_t offset) { @@ -565,14 +566,14 @@ static int nfs_vmtruncate(struct inode * inode, loff_t offset) if (err) goto out; - spin_lock(&inode->i_lock); i_size_write(inode, offset); /* Optimisation */ if (offset == 0) NFS_I(inode)->cache_validity &= ~NFS_INO_INVALID_DATA; - spin_unlock(&inode->i_lock); + spin_unlock(&inode->i_lock); truncate_pagecache(inode, offset); + spin_lock(&inode->i_lock); out: return err; } @@ -585,10 +586,15 @@ out: * Note: we do this in the *proc.c in order to ensure that * it works for things like exclusive creates too. */ -void nfs_setattr_update_inode(struct inode *inode, struct iattr *attr) +void nfs_setattr_update_inode(struct inode *inode, struct iattr *attr, + struct nfs_fattr *fattr) { + /* Barrier: bump the attribute generation count. */ + nfs_fattr_set_barrier(fattr); + + spin_lock(&inode->i_lock); + NFS_I(inode)->attr_gencount = fattr->gencount; if ((attr->ia_valid & (ATTR_MODE|ATTR_UID|ATTR_GID)) != 0) { - spin_lock(&inode->i_lock); if ((attr->ia_valid & ATTR_MODE) != 0) { int mode = attr->ia_mode & S_IALLUGO; mode |= inode->i_mode & ~S_IALLUGO; @@ -600,12 +606,13 @@ void nfs_setattr_update_inode(struct inode *inode, struct iattr *attr) inode->i_gid = attr->ia_gid; nfs_set_cache_invalid(inode, NFS_INO_INVALID_ACCESS | NFS_INO_INVALID_ACL); - spin_unlock(&inode->i_lock); } if ((attr->ia_valid & ATTR_SIZE) != 0) { nfs_inc_stats(inode, NFSIOS_SETATTRTRUNC); nfs_vmtruncate(inode, attr->ia_size); } + nfs_update_inode(inode, fattr); + spin_unlock(&inode->i_lock); } EXPORT_SYMBOL_GPL(nfs_setattr_update_inode); @@ -1028,6 +1035,7 @@ static int nfs_invalidate_mapping(struct inode *inode, struct address_space *map if (mapping->nrpages != 0) { if (S_ISREG(inode->i_mode)) { + unmap_mapping_range(mapping, 0, 0, 0); ret = nfs_sync_mapping(mapping); if (ret < 0) return ret; @@ -1060,11 +1068,14 @@ static bool nfs_mapping_need_revalidate_inode(struct inode *inode) } /** - * nfs_revalidate_mapping - Revalidate the pagecache + * __nfs_revalidate_mapping - Revalidate the pagecache * @inode - pointer to host inode * @mapping - pointer to mapping + * @may_lock - take inode->i_mutex? */ -int nfs_revalidate_mapping(struct inode *inode, struct address_space *mapping) +static int __nfs_revalidate_mapping(struct inode *inode, + struct address_space *mapping, + bool may_lock) { struct nfs_inode *nfsi = NFS_I(inode); unsigned long *bitlock = &nfsi->flags; @@ -1113,7 +1124,12 @@ int nfs_revalidate_mapping(struct inode *inode, struct address_space *mapping) nfsi->cache_validity &= ~NFS_INO_INVALID_DATA; spin_unlock(&inode->i_lock); trace_nfs_invalidate_mapping_enter(inode); - ret = nfs_invalidate_mapping(inode, mapping); + if (may_lock) { + mutex_lock(&inode->i_mutex); + ret = nfs_invalidate_mapping(inode, mapping); + mutex_unlock(&inode->i_mutex); + } else + ret = nfs_invalidate_mapping(inode, mapping); trace_nfs_invalidate_mapping_exit(inode, ret); clear_bit_unlock(NFS_INO_INVALIDATING, bitlock); @@ -1123,6 +1139,29 @@ out: return ret; } +/** + * nfs_revalidate_mapping - Revalidate the pagecache + * @inode - pointer to host inode + * @mapping - pointer to mapping + */ +int nfs_revalidate_mapping(struct inode *inode, struct address_space *mapping) +{ + return __nfs_revalidate_mapping(inode, mapping, false); +} + +/** + * nfs_revalidate_mapping_protected - Revalidate the pagecache + * @inode - pointer to host inode + * @mapping - pointer to mapping + * + * Differs from nfs_revalidate_mapping() in that it grabs the inode->i_mutex + * while invalidating the mapping. + */ +int nfs_revalidate_mapping_protected(struct inode *inode, struct address_space *mapping) +{ + return __nfs_revalidate_mapping(inode, mapping, true); +} + static unsigned long nfs_wcc_update_inode(struct inode *inode, struct nfs_fattr *fattr) { struct nfs_inode *nfsi = NFS_I(inode); @@ -1231,13 +1270,6 @@ static int nfs_ctime_need_update(const struct inode *inode, const struct nfs_fat return timespec_compare(&fattr->ctime, &inode->i_ctime) > 0; } -static int nfs_size_need_update(const struct inode *inode, const struct nfs_fattr *fattr) -{ - if (!(fattr->valid & NFS_ATTR_FATTR_SIZE)) - return 0; - return nfs_size_to_loff_t(fattr->size) > i_size_read(inode); -} - static atomic_long_t nfs_attr_generation_counter; static unsigned long nfs_read_attr_generation_counter(void) @@ -1249,6 +1281,7 @@ unsigned long nfs_inc_attr_generation_counter(void) { return atomic_long_inc_return(&nfs_attr_generation_counter); } +EXPORT_SYMBOL_GPL(nfs_inc_attr_generation_counter); void nfs_fattr_init(struct nfs_fattr *fattr) { @@ -1260,6 +1293,22 @@ void nfs_fattr_init(struct nfs_fattr *fattr) } EXPORT_SYMBOL_GPL(nfs_fattr_init); +/** + * nfs_fattr_set_barrier + * @fattr: attributes + * + * Used to set a barrier after an attribute was updated. This + * barrier ensures that older attributes from RPC calls that may + * have raced with our update cannot clobber these new values. + * Note that you are still responsible for ensuring that other + * operations which change the attribute on the server do not + * collide. + */ +void nfs_fattr_set_barrier(struct nfs_fattr *fattr) +{ + fattr->gencount = nfs_inc_attr_generation_counter(); +} + struct nfs_fattr *nfs_alloc_fattr(void) { struct nfs_fattr *fattr; @@ -1370,7 +1419,6 @@ static int nfs_inode_attrs_need_update(const struct inode *inode, const struct n return ((long)fattr->gencount - (long)nfsi->attr_gencount) > 0 || nfs_ctime_need_update(inode, fattr) || - nfs_size_need_update(inode, fattr) || ((long)nfsi->attr_gencount - (long)nfs_read_attr_generation_counter() > 0); } @@ -1460,6 +1508,7 @@ int nfs_post_op_update_inode(struct inode *inode, struct nfs_fattr *fattr) int status; spin_lock(&inode->i_lock); + nfs_fattr_set_barrier(fattr); status = nfs_post_op_update_inode_locked(inode, fattr); spin_unlock(&inode->i_lock); @@ -1468,7 +1517,7 @@ int nfs_post_op_update_inode(struct inode *inode, struct nfs_fattr *fattr) EXPORT_SYMBOL_GPL(nfs_post_op_update_inode); /** - * nfs_post_op_update_inode_force_wcc - try to update the inode attribute cache + * nfs_post_op_update_inode_force_wcc_locked - update the inode attribute cache * @inode - pointer to inode * @fattr - updated attributes * @@ -1478,11 +1527,10 @@ EXPORT_SYMBOL_GPL(nfs_post_op_update_inode); * * This function is mainly designed to be used by the ->write_done() functions. */ -int nfs_post_op_update_inode_force_wcc(struct inode *inode, struct nfs_fattr *fattr) +int nfs_post_op_update_inode_force_wcc_locked(struct inode *inode, struct nfs_fattr *fattr) { int status; - spin_lock(&inode->i_lock); /* Don't do a WCC update if these attributes are already stale */ if ((fattr->valid & NFS_ATTR_FATTR) == 0 || !nfs_inode_attrs_need_update(inode, fattr)) { @@ -1514,6 +1562,27 @@ int nfs_post_op_update_inode_force_wcc(struct inode *inode, struct nfs_fattr *fa } out_noforce: status = nfs_post_op_update_inode_locked(inode, fattr); + return status; +} + +/** + * nfs_post_op_update_inode_force_wcc - try to update the inode attribute cache + * @inode - pointer to inode + * @fattr - updated attributes + * + * After an operation that has changed the inode metadata, mark the + * attribute cache as being invalid, then try to update it. Fake up + * weak cache consistency data, if none exist. + * + * This function is mainly designed to be used by the ->write_done() functions. + */ +int nfs_post_op_update_inode_force_wcc(struct inode *inode, struct nfs_fattr *fattr) +{ + int status; + + spin_lock(&inode->i_lock); + nfs_fattr_set_barrier(fattr); + status = nfs_post_op_update_inode_force_wcc_locked(inode, fattr); spin_unlock(&inode->i_lock); return status; } @@ -1715,6 +1784,7 @@ static int nfs_update_inode(struct inode *inode, struct nfs_fattr *fattr) nfs_inc_stats(inode, NFSIOS_ATTRINVALIDATE); nfsi->attrtimeo = NFS_MINATTRTIMEO(inode); nfsi->attrtimeo_timestamp = now; + /* Set barrier to be more recent than all outstanding updates */ nfsi->attr_gencount = nfs_inc_attr_generation_counter(); } else { if (!time_in_range_open(now, nfsi->attrtimeo_timestamp, nfsi->attrtimeo_timestamp + nfsi->attrtimeo)) { @@ -1722,6 +1792,9 @@ static int nfs_update_inode(struct inode *inode, struct nfs_fattr *fattr) nfsi->attrtimeo = NFS_MAXATTRTIMEO(inode); nfsi->attrtimeo_timestamp = now; } + /* Set the barrier to be more recent than this fattr */ + if ((long)fattr->gencount - (long)nfsi->attr_gencount > 0) + nfsi->attr_gencount = fattr->gencount; } invalid &= ~NFS_INO_INVALID_ATTR; /* Don't invalidate the data if we were to blame */ diff --git a/fs/nfs/internal.h b/fs/nfs/internal.h index b802fb3a2d99..9e6475bc5ba2 100644 --- a/fs/nfs/internal.h +++ b/fs/nfs/internal.h @@ -459,6 +459,7 @@ void nfs_mark_request_commit(struct nfs_page *req, struct nfs_commit_info *cinfo, u32 ds_commit_idx); int nfs_write_need_commit(struct nfs_pgio_header *); +void nfs_writeback_update_inode(struct nfs_pgio_header *hdr); int nfs_generic_commit_list(struct inode *inode, struct list_head *head, int how, struct nfs_commit_info *cinfo); void nfs_retry_commit(struct list_head *page_list, diff --git a/fs/nfs/nfs3proc.c b/fs/nfs/nfs3proc.c index 78e557c3ab87..1f11d2533ee4 100644 --- a/fs/nfs/nfs3proc.c +++ b/fs/nfs/nfs3proc.c @@ -138,7 +138,7 @@ nfs3_proc_setattr(struct dentry *dentry, struct nfs_fattr *fattr, nfs_fattr_init(fattr); status = rpc_call_sync(NFS_CLIENT(inode), &msg, 0); if (status == 0) - nfs_setattr_update_inode(inode, sattr); + nfs_setattr_update_inode(inode, sattr, fattr); dprintk("NFS reply setattr: %d\n", status); return status; } @@ -834,7 +834,7 @@ static int nfs3_write_done(struct rpc_task *task, struct nfs_pgio_header *hdr) if (nfs3_async_handle_jukebox(task, inode)) return -EAGAIN; if (task->tk_status >= 0) - nfs_post_op_update_inode_force_wcc(inode, hdr->res.fattr); + nfs_writeback_update_inode(hdr); return 0; } diff --git a/fs/nfs/nfs3xdr.c b/fs/nfs/nfs3xdr.c index 2a932fdc57cb..53852a4bd88b 100644 --- a/fs/nfs/nfs3xdr.c +++ b/fs/nfs/nfs3xdr.c @@ -1987,6 +1987,11 @@ int nfs3_decode_dirent(struct xdr_stream *xdr, struct nfs_entry *entry, if (entry->fattr->valid & NFS_ATTR_FATTR_V3) entry->d_type = nfs_umode_to_dtype(entry->fattr->mode); + if (entry->fattr->fileid != entry->ino) { + entry->fattr->mounted_on_fileid = entry->ino; + entry->fattr->valid |= NFS_ATTR_FATTR_MOUNTED_ON_FILEID; + } + /* In fact, a post_op_fh3: */ p = xdr_inline_decode(xdr, 4); if (unlikely(p == NULL)) diff --git a/fs/nfs/nfs4client.c b/fs/nfs/nfs4client.c index 8646af9b11d2..86d6214ea022 100644 --- a/fs/nfs/nfs4client.c +++ b/fs/nfs/nfs4client.c @@ -621,6 +621,9 @@ int nfs41_walk_client_list(struct nfs_client *new, spin_lock(&nn->nfs_client_lock); list_for_each_entry(pos, &nn->nfs_client_list, cl_share_link) { + if (pos == new) + goto found; + if (pos->rpc_ops != new->rpc_ops) continue; @@ -639,10 +642,6 @@ int nfs41_walk_client_list(struct nfs_client *new, prev = pos; status = nfs_wait_client_init_complete(pos); - if (pos->cl_cons_state == NFS_CS_SESSION_INITING) { - nfs4_schedule_lease_recovery(pos); - status = nfs4_wait_clnt_recover(pos); - } spin_lock(&nn->nfs_client_lock); if (status < 0) break; @@ -668,7 +667,7 @@ int nfs41_walk_client_list(struct nfs_client *new, */ if (!nfs4_match_client_owner_id(pos, new)) continue; - +found: atomic_inc(&pos->cl_count); *result = pos; status = 0; diff --git a/fs/nfs/nfs4proc.c b/fs/nfs/nfs4proc.c index 88180ac5ea0e..627f37c44456 100644 --- a/fs/nfs/nfs4proc.c +++ b/fs/nfs/nfs4proc.c @@ -901,6 +901,7 @@ static void update_changeattr(struct inode *dir, struct nfs4_change_info *cinfo) if (!cinfo->atomic || cinfo->before != dir->i_version) nfs_force_lookup_revalidate(dir); dir->i_version = cinfo->after; + nfsi->attr_gencount = nfs_inc_attr_generation_counter(); nfs_fscache_invalidate(dir); spin_unlock(&dir->i_lock); } @@ -1552,6 +1553,9 @@ static int nfs4_open_recover_helper(struct nfs4_opendata *opendata, fmode_t fmod opendata->o_arg.open_flags = 0; opendata->o_arg.fmode = fmode; + opendata->o_arg.share_access = nfs4_map_atomic_open_share( + NFS_SB(opendata->dentry->d_sb), + fmode, 0); memset(&opendata->o_res, 0, sizeof(opendata->o_res)); memset(&opendata->c_res, 0, sizeof(opendata->c_res)); nfs4_init_opendata_res(opendata); @@ -2413,8 +2417,8 @@ static int _nfs4_do_open(struct inode *dir, opendata->o_res.f_attr, sattr, state, label, olabel); if (status == 0) { - nfs_setattr_update_inode(state->inode, sattr); - nfs_post_op_update_inode(state->inode, opendata->o_res.f_attr); + nfs_setattr_update_inode(state->inode, sattr, + opendata->o_res.f_attr); nfs_setsecurity(state->inode, opendata->o_res.f_attr, olabel); } } @@ -2651,7 +2655,7 @@ static void nfs4_close_done(struct rpc_task *task, void *data) case -NFS4ERR_BAD_STATEID: case -NFS4ERR_EXPIRED: if (!nfs4_stateid_match(&calldata->arg.stateid, - &state->stateid)) { + &state->open_stateid)) { rpc_restart_call_prepare(task); goto out_release; } @@ -2687,7 +2691,7 @@ static void nfs4_close_prepare(struct rpc_task *task, void *data) is_rdwr = test_bit(NFS_O_RDWR_STATE, &state->flags); is_rdonly = test_bit(NFS_O_RDONLY_STATE, &state->flags); is_wronly = test_bit(NFS_O_WRONLY_STATE, &state->flags); - nfs4_stateid_copy(&calldata->arg.stateid, &state->stateid); + nfs4_stateid_copy(&calldata->arg.stateid, &state->open_stateid); /* Calculate the change in open mode */ calldata->arg.fmode = 0; if (state->n_rdwr == 0) { @@ -3288,7 +3292,7 @@ nfs4_proc_setattr(struct dentry *dentry, struct nfs_fattr *fattr, status = nfs4_do_setattr(inode, cred, fattr, sattr, state, NULL, label); if (status == 0) { - nfs_setattr_update_inode(inode, sattr); + nfs_setattr_update_inode(inode, sattr, fattr); nfs_setsecurity(inode, fattr, label); } nfs4_label_free(label); @@ -4234,7 +4238,7 @@ static int nfs4_write_done_cb(struct rpc_task *task, } if (task->tk_status >= 0) { renew_lease(NFS_SERVER(inode), hdr->timestamp); - nfs_post_op_update_inode_force_wcc(inode, &hdr->fattr); + nfs_writeback_update_inode(hdr); } return 0; } @@ -6893,9 +6897,13 @@ static int _nfs4_proc_exchange_id(struct nfs_client *clp, struct rpc_cred *cred, if (status == 0) { clp->cl_clientid = res.clientid; - clp->cl_exchange_flags = (res.flags & ~EXCHGID4_FLAG_CONFIRMED_R); - if (!(res.flags & EXCHGID4_FLAG_CONFIRMED_R)) + clp->cl_exchange_flags = res.flags; + /* Client ID is not confirmed */ + if (!(res.flags & EXCHGID4_FLAG_CONFIRMED_R)) { + clear_bit(NFS4_SESSION_ESTABLISHED, + &clp->cl_session->session_state); clp->cl_seqid = res.seqid; + } kfree(clp->cl_serverowner); clp->cl_serverowner = res.server_owner; @@ -7227,6 +7235,9 @@ static void nfs4_update_session(struct nfs4_session *session, struct nfs41_create_session_res *res) { nfs4_copy_sessionid(&session->sess_id, &res->sessionid); + /* Mark client id and session as being confirmed */ + session->clp->cl_exchange_flags |= EXCHGID4_FLAG_CONFIRMED_R; + set_bit(NFS4_SESSION_ESTABLISHED, &session->session_state); session->flags = res->flags; memcpy(&session->fc_attrs, &res->fc_attrs, sizeof(session->fc_attrs)); if (res->flags & SESSION4_BACK_CHAN) @@ -7322,8 +7333,8 @@ int nfs4_proc_destroy_session(struct nfs4_session *session, dprintk("--> nfs4_proc_destroy_session\n"); /* session is still being setup */ - if (session->clp->cl_cons_state != NFS_CS_READY) - return status; + if (!test_and_clear_bit(NFS4_SESSION_ESTABLISHED, &session->session_state)) + return 0; status = rpc_call_sync(session->clp->cl_rpcclient, &msg, RPC_TASK_TIMEOUT); trace_nfs4_destroy_session(session->clp, status); diff --git a/fs/nfs/nfs4session.h b/fs/nfs/nfs4session.h index fc46c7455898..e3ea2c5324d6 100644 --- a/fs/nfs/nfs4session.h +++ b/fs/nfs/nfs4session.h @@ -70,6 +70,7 @@ struct nfs4_session { enum nfs4_session_state { NFS4_SESSION_INITING, + NFS4_SESSION_ESTABLISHED, }; extern int nfs4_setup_slot_table(struct nfs4_slot_table *tbl, diff --git a/fs/nfs/nfs4state.c b/fs/nfs/nfs4state.c index 5ad908e9ce9c..f95e3b58bbc3 100644 --- a/fs/nfs/nfs4state.c +++ b/fs/nfs/nfs4state.c @@ -346,9 +346,23 @@ int nfs41_discover_server_trunking(struct nfs_client *clp, status = nfs4_proc_exchange_id(clp, cred); if (status != NFS4_OK) return status; - set_bit(NFS4CLNT_LEASE_CONFIRM, &clp->cl_state); - return nfs41_walk_client_list(clp, result, cred); + status = nfs41_walk_client_list(clp, result, cred); + if (status < 0) + return status; + if (clp != *result) + return 0; + + /* Purge state if the client id was established in a prior instance */ + if (clp->cl_exchange_flags & EXCHGID4_FLAG_CONFIRMED_R) + set_bit(NFS4CLNT_PURGE_STATE, &clp->cl_state); + else + set_bit(NFS4CLNT_LEASE_CONFIRM, &clp->cl_state); + nfs4_schedule_state_manager(clp); + status = nfs_wait_client_init_complete(clp); + if (status < 0) + nfs_put_client(clp); + return status; } #endif /* CONFIG_NFS_V4_1 */ diff --git a/fs/nfs/proc.c b/fs/nfs/proc.c index b09cc23d6f43..c63189acd052 100644 --- a/fs/nfs/proc.c +++ b/fs/nfs/proc.c @@ -139,7 +139,7 @@ nfs_proc_setattr(struct dentry *dentry, struct nfs_fattr *fattr, nfs_fattr_init(fattr); status = rpc_call_sync(NFS_CLIENT(inode), &msg, 0); if (status == 0) - nfs_setattr_update_inode(inode, sattr); + nfs_setattr_update_inode(inode, sattr, fattr); dprintk("NFS reply setattr: %d\n", status); return status; } @@ -609,10 +609,8 @@ static int nfs_proc_pgio_rpc_prepare(struct rpc_task *task, static int nfs_write_done(struct rpc_task *task, struct nfs_pgio_header *hdr) { - struct inode *inode = hdr->inode; - if (task->tk_status >= 0) - nfs_post_op_update_inode_force_wcc(inode, hdr->res.fattr); + nfs_writeback_update_inode(hdr); return 0; } diff --git a/fs/nfs/write.c b/fs/nfs/write.c index 595d81e354d1..849ed784d6ac 100644 --- a/fs/nfs/write.c +++ b/fs/nfs/write.c @@ -1377,6 +1377,36 @@ static int nfs_should_remove_suid(const struct inode *inode) return 0; } +static void nfs_writeback_check_extend(struct nfs_pgio_header *hdr, + struct nfs_fattr *fattr) +{ + struct nfs_pgio_args *argp = &hdr->args; + struct nfs_pgio_res *resp = &hdr->res; + + if (!(fattr->valid & NFS_ATTR_FATTR_SIZE)) + return; + if (argp->offset + resp->count != fattr->size) + return; + if (nfs_size_to_loff_t(fattr->size) < i_size_read(hdr->inode)) + return; + /* Set attribute barrier */ + nfs_fattr_set_barrier(fattr); +} + +void nfs_writeback_update_inode(struct nfs_pgio_header *hdr) +{ + struct nfs_fattr *fattr = hdr->res.fattr; + struct inode *inode = hdr->inode; + + if (fattr == NULL) + return; + spin_lock(&inode->i_lock); + nfs_writeback_check_extend(hdr, fattr); + nfs_post_op_update_inode_force_wcc_locked(inode, fattr); + spin_unlock(&inode->i_lock); +} +EXPORT_SYMBOL_GPL(nfs_writeback_update_inode); + /* * This function is called when the WRITE call is complete. */ diff --git a/fs/nfsd/blocklayout.c b/fs/nfsd/blocklayout.c index cdbc78c72542..03d647bf195d 100644 --- a/fs/nfsd/blocklayout.c +++ b/fs/nfsd/blocklayout.c @@ -137,7 +137,7 @@ nfsd4_block_proc_layoutget(struct inode *inode, const struct svc_fh *fhp, seg->offset = iomap.offset; seg->length = iomap.length; - dprintk("GET: %lld:%lld %d\n", bex->foff, bex->len, bex->es); + dprintk("GET: 0x%llx:0x%llx %d\n", bex->foff, bex->len, bex->es); return 0; out_error: diff --git a/fs/nfsd/blocklayoutxdr.c b/fs/nfsd/blocklayoutxdr.c index 9da89fddab33..9aa2796da90d 100644 --- a/fs/nfsd/blocklayoutxdr.c +++ b/fs/nfsd/blocklayoutxdr.c @@ -122,19 +122,19 @@ nfsd4_block_decode_layoutupdate(__be32 *p, u32 len, struct iomap **iomapp, p = xdr_decode_hyper(p, &bex.foff); if (bex.foff & (block_size - 1)) { - dprintk("%s: unaligned offset %lld\n", + dprintk("%s: unaligned offset 0x%llx\n", __func__, bex.foff); goto fail; } p = xdr_decode_hyper(p, &bex.len); if (bex.len & (block_size - 1)) { - dprintk("%s: unaligned length %lld\n", + dprintk("%s: unaligned length 0x%llx\n", __func__, bex.foff); goto fail; } p = xdr_decode_hyper(p, &bex.soff); if (bex.soff & (block_size - 1)) { - dprintk("%s: unaligned disk offset %lld\n", + dprintk("%s: unaligned disk offset 0x%llx\n", __func__, bex.soff); goto fail; } diff --git a/fs/nfsd/nfs4layouts.c b/fs/nfsd/nfs4layouts.c index 3c1bfa155571..6904213a4363 100644 --- a/fs/nfsd/nfs4layouts.c +++ b/fs/nfsd/nfs4layouts.c @@ -118,7 +118,7 @@ void nfsd4_setup_layout_type(struct svc_export *exp) { struct super_block *sb = exp->ex_path.mnt->mnt_sb; - if (exp->ex_flags & NFSEXP_NOPNFS) + if (!(exp->ex_flags & NFSEXP_PNFS)) return; if (sb->s_export_op->get_uuid && @@ -440,15 +440,14 @@ nfsd4_return_file_layout(struct nfs4_layout *lp, struct nfsd4_layout_seg *seg, list_move_tail(&lp->lo_perstate, reaplist); return; } - end = seg->offset; + lo->offset = layout_end(seg); } else { /* retain the whole layout segment on a split. */ if (layout_end(seg) < end) { dprintk("%s: split not supported\n", __func__); return; } - - lo->offset = layout_end(seg); + end = seg->offset; } layout_update_len(lo, end); @@ -513,6 +512,9 @@ nfsd4_return_client_layouts(struct svc_rqst *rqstp, spin_lock(&clp->cl_lock); list_for_each_entry_safe(ls, n, &clp->cl_lo_states, ls_perclnt) { + if (ls->ls_layout_type != lrp->lr_layout_type) + continue; + if (lrp->lr_return_type == RETURN_FSID && !fh_fsid_match(&ls->ls_stid.sc_file->fi_fhandle, &cstate->current_fh.fh_handle)) @@ -587,7 +589,7 @@ nfsd4_cb_layout_fail(struct nfs4_layout_stateid *ls) rpc_ntop((struct sockaddr *)&clp->cl_addr, addr_str, sizeof(addr_str)); - nfsd4_cb_layout_fail(ls); + trace_layout_recall_fail(&ls->ls_stid.sc_stateid); printk(KERN_WARNING "nfsd: client %s failed to respond to layout recall. " diff --git a/fs/nfsd/nfs4proc.c b/fs/nfsd/nfs4proc.c index d30bea8d0277..92b9d97aff4f 100644 --- a/fs/nfsd/nfs4proc.c +++ b/fs/nfsd/nfs4proc.c @@ -1237,8 +1237,8 @@ nfsd4_getdeviceinfo(struct svc_rqst *rqstp, nfserr = ops->proc_getdeviceinfo(exp->ex_path.mnt->mnt_sb, gdp); gdp->gd_notify_types &= ops->notify_types; - exp_put(exp); out: + exp_put(exp); return nfserr; } diff --git a/fs/nfsd/nfs4state.c b/fs/nfsd/nfs4state.c index f6b2a09f793f..8ba1d888f1e6 100644 --- a/fs/nfsd/nfs4state.c +++ b/fs/nfsd/nfs4state.c @@ -1638,7 +1638,7 @@ __destroy_client(struct nfs4_client *clp) nfs4_put_stid(&dp->dl_stid); } while (!list_empty(&clp->cl_revoked)) { - dp = list_entry(reaplist.next, struct nfs4_delegation, dl_recall_lru); + dp = list_entry(clp->cl_revoked.next, struct nfs4_delegation, dl_recall_lru); list_del_init(&dp->dl_recall_lru); nfs4_put_stid(&dp->dl_stid); } @@ -3221,7 +3221,7 @@ alloc_init_open_stateowner(unsigned int strhashval, struct nfsd4_open *open, } else nfs4_free_openowner(&oo->oo_owner); spin_unlock(&clp->cl_lock); - return oo; + return ret; } static void init_open_stateid(struct nfs4_ol_stateid *stp, struct nfs4_file *fp, struct nfsd4_open *open) { @@ -5062,7 +5062,7 @@ alloc_init_lock_stateowner(unsigned int strhashval, struct nfs4_client *clp, } else nfs4_free_lockowner(&lo->lo_owner); spin_unlock(&clp->cl_lock); - return lo; + return ret; } static void diff --git a/fs/nfsd/nfs4xdr.c b/fs/nfsd/nfs4xdr.c index df5e66caf100..5fb7e78169a6 100644 --- a/fs/nfsd/nfs4xdr.c +++ b/fs/nfsd/nfs4xdr.c @@ -1562,7 +1562,11 @@ nfsd4_decode_layoutget(struct nfsd4_compoundargs *argp, p = xdr_decode_hyper(p, &lgp->lg_seg.offset); p = xdr_decode_hyper(p, &lgp->lg_seg.length); p = xdr_decode_hyper(p, &lgp->lg_minlength); - nfsd4_decode_stateid(argp, &lgp->lg_sid); + + status = nfsd4_decode_stateid(argp, &lgp->lg_sid); + if (status) + return status; + READ_BUF(4); lgp->lg_maxcount = be32_to_cpup(p++); @@ -1580,7 +1584,11 @@ nfsd4_decode_layoutcommit(struct nfsd4_compoundargs *argp, p = xdr_decode_hyper(p, &lcp->lc_seg.offset); p = xdr_decode_hyper(p, &lcp->lc_seg.length); lcp->lc_reclaim = be32_to_cpup(p++); - nfsd4_decode_stateid(argp, &lcp->lc_sid); + + status = nfsd4_decode_stateid(argp, &lcp->lc_sid); + if (status) + return status; + READ_BUF(4); lcp->lc_newoffset = be32_to_cpup(p++); if (lcp->lc_newoffset) { @@ -1628,7 +1636,11 @@ nfsd4_decode_layoutreturn(struct nfsd4_compoundargs *argp, READ_BUF(16); p = xdr_decode_hyper(p, &lrp->lr_seg.offset); p = xdr_decode_hyper(p, &lrp->lr_seg.length); - nfsd4_decode_stateid(argp, &lrp->lr_sid); + + status = nfsd4_decode_stateid(argp, &lrp->lr_sid); + if (status) + return status; + READ_BUF(4); lrp->lrf_body_len = be32_to_cpup(p++); if (lrp->lrf_body_len > 0) { @@ -4123,7 +4135,7 @@ nfsd4_encode_layoutreturn(struct nfsd4_compoundres *resp, __be32 nfserr, return nfserr_resource; *p++ = cpu_to_be32(lrp->lrs_present); if (lrp->lrs_present) - nfsd4_encode_stateid(xdr, &lrp->lr_sid); + return nfsd4_encode_stateid(xdr, &lrp->lr_sid); return nfs_ok; } #endif /* CONFIG_NFSD_PNFS */ diff --git a/fs/nfsd/nfscache.c b/fs/nfsd/nfscache.c index 83a9694ec485..46ec934f5dee 100644 --- a/fs/nfsd/nfscache.c +++ b/fs/nfsd/nfscache.c @@ -165,13 +165,17 @@ int nfsd_reply_cache_init(void) { unsigned int hashsize; unsigned int i; + int status = 0; max_drc_entries = nfsd_cache_size_limit(); atomic_set(&num_drc_entries, 0); hashsize = nfsd_hashsize(max_drc_entries); maskbits = ilog2(hashsize); - register_shrinker(&nfsd_reply_cache_shrinker); + status = register_shrinker(&nfsd_reply_cache_shrinker); + if (status) + return status; + drc_slab = kmem_cache_create("nfsd_drc", sizeof(struct svc_cacherep), 0, 0, NULL); if (!drc_slab) diff --git a/fs/nilfs2/segment.c b/fs/nilfs2/segment.c index 469086b9f99b..0c3f303baf32 100644 --- a/fs/nilfs2/segment.c +++ b/fs/nilfs2/segment.c @@ -1907,6 +1907,7 @@ static void nilfs_segctor_drop_written_files(struct nilfs_sc_info *sci, struct the_nilfs *nilfs) { struct nilfs_inode_info *ii, *n; + int during_mount = !(sci->sc_super->s_flags & MS_ACTIVE); int defer_iput = false; spin_lock(&nilfs->ns_inode_lock); @@ -1919,10 +1920,10 @@ static void nilfs_segctor_drop_written_files(struct nilfs_sc_info *sci, brelse(ii->i_bh); ii->i_bh = NULL; list_del_init(&ii->i_dirty); - if (!ii->vfs_inode.i_nlink) { + if (!ii->vfs_inode.i_nlink || during_mount) { /* - * Defer calling iput() to avoid a deadlock - * over I_SYNC flag for inodes with i_nlink == 0 + * Defer calling iput() to avoid deadlocks if + * i_nlink == 0 or mount is not yet finished. */ list_add_tail(&ii->i_dirty, &sci->sc_iput_queue); defer_iput = true; diff --git a/fs/notify/fanotify/fanotify.c b/fs/notify/fanotify/fanotify.c index 9a66ff79ff27..d2f97ecca6a5 100644 --- a/fs/notify/fanotify/fanotify.c +++ b/fs/notify/fanotify/fanotify.c @@ -143,7 +143,8 @@ static bool fanotify_should_send_event(struct fsnotify_mark *inode_mark, !(marks_mask & FS_ISDIR & ~marks_ignored_mask)) return false; - if (event_mask & marks_mask & ~marks_ignored_mask) + if (event_mask & FAN_ALL_OUTGOING_EVENTS & marks_mask & + ~marks_ignored_mask) return true; return false; diff --git a/fs/ocfs2/file.c b/fs/ocfs2/file.c index 46e0d4e857c7..ba1790e52ff2 100644 --- a/fs/ocfs2/file.c +++ b/fs/ocfs2/file.c @@ -2394,7 +2394,6 @@ relock: /* * for completing the rest of the request. */ - *ppos += written; count -= written; written_buffered = generic_perform_write(file, from, *ppos); /* @@ -2409,7 +2408,6 @@ relock: goto out_dio; } - iocb->ki_pos = *ppos + written_buffered; /* We need to ensure that the page cache pages are written to * disk and invalidated to preserve the expected O_DIRECT * semantics. @@ -2418,6 +2416,7 @@ relock: ret = filemap_write_and_wait_range(file->f_mapping, *ppos, endbyte); if (ret == 0) { + iocb->ki_pos = *ppos + written_buffered; written += written_buffered; invalidate_mapping_pages(mapping, *ppos >> PAGE_CACHE_SHIFT, @@ -2440,10 +2439,14 @@ out_dio: /* buffered aio wouldn't have proper lock coverage today */ BUG_ON(ret == -EIOCBQUEUED && !(file->f_flags & O_DIRECT)); + if (unlikely(written <= 0)) + goto no_sync; + if (((file->f_flags & O_DSYNC) && !direct_io) || IS_SYNC(inode) || ((file->f_flags & O_DIRECT) && !direct_io)) { - ret = filemap_fdatawrite_range(file->f_mapping, *ppos, - *ppos + count - 1); + ret = filemap_fdatawrite_range(file->f_mapping, + iocb->ki_pos - written, + iocb->ki_pos - 1); if (ret < 0) written = ret; @@ -2454,10 +2457,12 @@ out_dio: } if (!ret) - ret = filemap_fdatawait_range(file->f_mapping, *ppos, - *ppos + count - 1); + ret = filemap_fdatawait_range(file->f_mapping, + iocb->ki_pos - written, + iocb->ki_pos - 1); } +no_sync: /* * deep in g_f_a_w_n()->ocfs2_direct_IO we pass in a ocfs2_dio_end_io * function pointer which is called when o_direct io completes so that diff --git a/fs/ocfs2/ocfs2.h b/fs/ocfs2/ocfs2.h index 8490c64d34fe..460c6c37e683 100644 --- a/fs/ocfs2/ocfs2.h +++ b/fs/ocfs2/ocfs2.h @@ -502,7 +502,7 @@ static inline int ocfs2_writes_unwritten_extents(struct ocfs2_super *osb) static inline int ocfs2_supports_append_dio(struct ocfs2_super *osb) { - if (osb->s_feature_ro_compat & OCFS2_FEATURE_RO_COMPAT_APPEND_DIO) + if (osb->s_feature_incompat & OCFS2_FEATURE_INCOMPAT_APPEND_DIO) return 1; return 0; } diff --git a/fs/ocfs2/ocfs2_fs.h b/fs/ocfs2/ocfs2_fs.h index 20e37a3ed26f..db64ce2d4667 100644 --- a/fs/ocfs2/ocfs2_fs.h +++ b/fs/ocfs2/ocfs2_fs.h @@ -102,11 +102,11 @@ | OCFS2_FEATURE_INCOMPAT_INDEXED_DIRS \ | OCFS2_FEATURE_INCOMPAT_REFCOUNT_TREE \ | OCFS2_FEATURE_INCOMPAT_DISCONTIG_BG \ - | OCFS2_FEATURE_INCOMPAT_CLUSTERINFO) + | OCFS2_FEATURE_INCOMPAT_CLUSTERINFO \ + | OCFS2_FEATURE_INCOMPAT_APPEND_DIO) #define OCFS2_FEATURE_RO_COMPAT_SUPP (OCFS2_FEATURE_RO_COMPAT_UNWRITTEN \ | OCFS2_FEATURE_RO_COMPAT_USRQUOTA \ - | OCFS2_FEATURE_RO_COMPAT_GRPQUOTA \ - | OCFS2_FEATURE_RO_COMPAT_APPEND_DIO) + | OCFS2_FEATURE_RO_COMPAT_GRPQUOTA) /* * Heartbeat-only devices are missing journals and other files. The @@ -179,6 +179,11 @@ #define OCFS2_FEATURE_INCOMPAT_CLUSTERINFO 0x4000 /* + * Append Direct IO support + */ +#define OCFS2_FEATURE_INCOMPAT_APPEND_DIO 0x8000 + +/* * backup superblock flag is used to indicate that this volume * has backup superblocks. */ @@ -200,10 +205,6 @@ #define OCFS2_FEATURE_RO_COMPAT_USRQUOTA 0x0002 #define OCFS2_FEATURE_RO_COMPAT_GRPQUOTA 0x0004 -/* - * Append Direct IO support - */ -#define OCFS2_FEATURE_RO_COMPAT_APPEND_DIO 0x0008 /* The byte offset of the first backup block will be 1G. * The following will be 4G, 16G, 64G, 256G and 1T. diff --git a/fs/overlayfs/super.c b/fs/overlayfs/super.c index b90952f528b1..5f0d1993e6e3 100644 --- a/fs/overlayfs/super.c +++ b/fs/overlayfs/super.c @@ -529,8 +529,7 @@ static int ovl_remount(struct super_block *sb, int *flags, char *data) { struct ovl_fs *ufs = sb->s_fs_info; - if (!(*flags & MS_RDONLY) && - (!ufs->upper_mnt || (ufs->upper_mnt->mnt_sb->s_flags & MS_RDONLY))) + if (!(*flags & MS_RDONLY) && !ufs->upper_mnt) return -EROFS; return 0; @@ -615,9 +614,19 @@ static int ovl_parse_opt(char *opt, struct ovl_config *config) break; default: + pr_err("overlayfs: unrecognized mount option \"%s\" or missing value\n", p); return -EINVAL; } } + + /* Workdir is useless in non-upper mount */ + if (!config->upperdir && config->workdir) { + pr_info("overlayfs: option \"workdir=%s\" is useless in a non-upper mount, ignore\n", + config->workdir); + kfree(config->workdir); + config->workdir = NULL; + } + return 0; } @@ -837,7 +846,6 @@ static int ovl_fill_super(struct super_block *sb, void *data, int silent) sb->s_stack_depth = 0; if (ufs->config.upperdir) { - /* FIXME: workdir is not needed for a R/O mount */ if (!ufs->config.workdir) { pr_err("overlayfs: missing 'workdir'\n"); goto out_free_config; @@ -847,6 +855,13 @@ static int ovl_fill_super(struct super_block *sb, void *data, int silent) if (err) goto out_free_config; + /* Upper fs should not be r/o */ + if (upperpath.mnt->mnt_sb->s_flags & MS_RDONLY) { + pr_err("overlayfs: upper fs is r/o, try multi-lower layers mount\n"); + err = -EINVAL; + goto out_put_upperpath; + } + err = ovl_mount_dir(ufs->config.workdir, &workpath); if (err) goto out_put_upperpath; @@ -869,8 +884,14 @@ static int ovl_fill_super(struct super_block *sb, void *data, int silent) err = -EINVAL; stacklen = ovl_split_lowerdirs(lowertmp); - if (stacklen > OVL_MAX_STACK) + if (stacklen > OVL_MAX_STACK) { + pr_err("overlayfs: too many lower directries, limit is %d\n", + OVL_MAX_STACK); goto out_free_lowertmp; + } else if (!ufs->config.upperdir && stacklen == 1) { + pr_err("overlayfs: at least 2 lowerdir are needed while upperdir nonexistent\n"); + goto out_free_lowertmp; + } stack = kcalloc(stacklen, sizeof(struct path), GFP_KERNEL); if (!stack) @@ -932,8 +953,8 @@ static int ovl_fill_super(struct super_block *sb, void *data, int silent) ufs->numlower++; } - /* If the upper fs is r/o or nonexistent, we mark overlayfs r/o too */ - if (!ufs->upper_mnt || (ufs->upper_mnt->mnt_sb->s_flags & MS_RDONLY)) + /* If the upper fs is nonexistent, we mark overlayfs r/o too */ + if (!ufs->upper_mnt) sb->s_flags |= MS_RDONLY; sb->s_d_op = &ovl_dentry_operations; diff --git a/fs/proc/task_mmu.c b/fs/proc/task_mmu.c index 956b75d61809..6dee68d013ff 100644 --- a/fs/proc/task_mmu.c +++ b/fs/proc/task_mmu.c @@ -1325,6 +1325,9 @@ out: static int pagemap_open(struct inode *inode, struct file *file) { + /* do not disclose physical addresses: attack vector */ + if (!capable(CAP_SYS_ADMIN)) + return -EPERM; pr_warn_once("Bits 55-60 of /proc/PID/pagemap entries are about " "to stop being page-shift some time soon. See the " "linux/Documentation/vm/pagemap.txt for details.\n"); diff --git a/include/drm/drm_mm.h b/include/drm/drm_mm.h index a24addfdfcec..0de6290df4da 100644 --- a/include/drm/drm_mm.h +++ b/include/drm/drm_mm.h @@ -68,8 +68,8 @@ struct drm_mm_node { unsigned scanned_preceeds_hole : 1; unsigned allocated : 1; unsigned long color; - unsigned long start; - unsigned long size; + u64 start; + u64 size; struct drm_mm *mm; }; @@ -82,16 +82,16 @@ struct drm_mm { unsigned int scan_check_range : 1; unsigned scan_alignment; unsigned long scan_color; - unsigned long scan_size; - unsigned long scan_hit_start; - unsigned long scan_hit_end; + u64 scan_size; + u64 scan_hit_start; + u64 scan_hit_end; unsigned scanned_blocks; - unsigned long scan_start; - unsigned long scan_end; + u64 scan_start; + u64 scan_end; struct drm_mm_node *prev_scanned_node; void (*color_adjust)(struct drm_mm_node *node, unsigned long color, - unsigned long *start, unsigned long *end); + u64 *start, u64 *end); }; /** @@ -124,7 +124,7 @@ static inline bool drm_mm_initialized(struct drm_mm *mm) return mm->hole_stack.next; } -static inline unsigned long __drm_mm_hole_node_start(struct drm_mm_node *hole_node) +static inline u64 __drm_mm_hole_node_start(struct drm_mm_node *hole_node) { return hole_node->start + hole_node->size; } @@ -140,13 +140,13 @@ static inline unsigned long __drm_mm_hole_node_start(struct drm_mm_node *hole_no * Returns: * Start of the subsequent hole. */ -static inline unsigned long drm_mm_hole_node_start(struct drm_mm_node *hole_node) +static inline u64 drm_mm_hole_node_start(struct drm_mm_node *hole_node) { BUG_ON(!hole_node->hole_follows); return __drm_mm_hole_node_start(hole_node); } -static inline unsigned long __drm_mm_hole_node_end(struct drm_mm_node *hole_node) +static inline u64 __drm_mm_hole_node_end(struct drm_mm_node *hole_node) { return list_entry(hole_node->node_list.next, struct drm_mm_node, node_list)->start; @@ -163,7 +163,7 @@ static inline unsigned long __drm_mm_hole_node_end(struct drm_mm_node *hole_node * Returns: * End of the subsequent hole. */ -static inline unsigned long drm_mm_hole_node_end(struct drm_mm_node *hole_node) +static inline u64 drm_mm_hole_node_end(struct drm_mm_node *hole_node) { return __drm_mm_hole_node_end(hole_node); } @@ -222,7 +222,7 @@ int drm_mm_reserve_node(struct drm_mm *mm, struct drm_mm_node *node); int drm_mm_insert_node_generic(struct drm_mm *mm, struct drm_mm_node *node, - unsigned long size, + u64 size, unsigned alignment, unsigned long color, enum drm_mm_search_flags sflags, @@ -245,7 +245,7 @@ int drm_mm_insert_node_generic(struct drm_mm *mm, */ static inline int drm_mm_insert_node(struct drm_mm *mm, struct drm_mm_node *node, - unsigned long size, + u64 size, unsigned alignment, enum drm_mm_search_flags flags) { @@ -255,11 +255,11 @@ static inline int drm_mm_insert_node(struct drm_mm *mm, int drm_mm_insert_node_in_range_generic(struct drm_mm *mm, struct drm_mm_node *node, - unsigned long size, + u64 size, unsigned alignment, unsigned long color, - unsigned long start, - unsigned long end, + u64 start, + u64 end, enum drm_mm_search_flags sflags, enum drm_mm_allocator_flags aflags); /** @@ -282,10 +282,10 @@ int drm_mm_insert_node_in_range_generic(struct drm_mm *mm, */ static inline int drm_mm_insert_node_in_range(struct drm_mm *mm, struct drm_mm_node *node, - unsigned long size, + u64 size, unsigned alignment, - unsigned long start, - unsigned long end, + u64 start, + u64 end, enum drm_mm_search_flags flags) { return drm_mm_insert_node_in_range_generic(mm, node, size, alignment, @@ -296,21 +296,21 @@ static inline int drm_mm_insert_node_in_range(struct drm_mm *mm, void drm_mm_remove_node(struct drm_mm_node *node); void drm_mm_replace_node(struct drm_mm_node *old, struct drm_mm_node *new); void drm_mm_init(struct drm_mm *mm, - unsigned long start, - unsigned long size); + u64 start, + u64 size); void drm_mm_takedown(struct drm_mm *mm); bool drm_mm_clean(struct drm_mm *mm); void drm_mm_init_scan(struct drm_mm *mm, - unsigned long size, + u64 size, unsigned alignment, unsigned long color); void drm_mm_init_scan_with_range(struct drm_mm *mm, - unsigned long size, + u64 size, unsigned alignment, unsigned long color, - unsigned long start, - unsigned long end); + u64 start, + u64 end); bool drm_mm_scan_add_block(struct drm_mm_node *node); bool drm_mm_scan_remove_block(struct drm_mm_node *node); diff --git a/include/drm/ttm/ttm_bo_api.h b/include/drm/ttm/ttm_bo_api.h index 0ccf7f267ff9..c768ddfbe53c 100644 --- a/include/drm/ttm/ttm_bo_api.h +++ b/include/drm/ttm/ttm_bo_api.h @@ -249,7 +249,7 @@ struct ttm_buffer_object { * either of these locks held. */ - unsigned long offset; + uint64_t offset; /* GPU address space is independent of CPU word size */ uint32_t cur_placement; struct sg_table *sg; diff --git a/include/drm/ttm/ttm_bo_driver.h b/include/drm/ttm/ttm_bo_driver.h index 142d752fc450..813042cede57 100644 --- a/include/drm/ttm/ttm_bo_driver.h +++ b/include/drm/ttm/ttm_bo_driver.h @@ -277,7 +277,7 @@ struct ttm_mem_type_manager { bool has_type; bool use_type; uint32_t flags; - unsigned long gpu_offset; + uint64_t gpu_offset; /* GPU address space is independent of CPU word size */ uint64_t size; uint32_t available_caching; uint32_t default_caching; diff --git a/include/dt-bindings/pinctrl/am33xx.h b/include/dt-bindings/pinctrl/am33xx.h index 2fbc804e1a45..226f77246a70 100644 --- a/include/dt-bindings/pinctrl/am33xx.h +++ b/include/dt-bindings/pinctrl/am33xx.h @@ -13,7 +13,8 @@ #define PULL_DISABLE (1 << 3) #define INPUT_EN (1 << 5) -#define SLEWCTRL_FAST (1 << 6) +#define SLEWCTRL_SLOW (1 << 6) +#define SLEWCTRL_FAST 0 /* update macro depending on INPUT_EN and PULL_ENA */ #undef PIN_OUTPUT diff --git a/include/dt-bindings/pinctrl/am43xx.h b/include/dt-bindings/pinctrl/am43xx.h index 9c2e4f82381e..5f4d01898c9c 100644 --- a/include/dt-bindings/pinctrl/am43xx.h +++ b/include/dt-bindings/pinctrl/am43xx.h @@ -18,7 +18,8 @@ #define PULL_DISABLE (1 << 16) #define PULL_UP (1 << 17) #define INPUT_EN (1 << 18) -#define SLEWCTRL_FAST (1 << 19) +#define SLEWCTRL_SLOW (1 << 19) +#define SLEWCTRL_FAST 0 #define DS0_PULL_UP_DOWN_EN (1 << 27) #define PIN_OUTPUT (PULL_DISABLE) diff --git a/include/kvm/arm_arch_timer.h b/include/kvm/arm_arch_timer.h index b3f45a578344..e5966758c093 100644 --- a/include/kvm/arm_arch_timer.h +++ b/include/kvm/arm_arch_timer.h @@ -24,17 +24,14 @@ #include <linux/workqueue.h> struct arch_timer_kvm { -#ifdef CONFIG_KVM_ARM_TIMER /* Is the timer enabled */ bool enabled; /* Virtual offset */ cycle_t cntvoff; -#endif }; struct arch_timer_cpu { -#ifdef CONFIG_KVM_ARM_TIMER /* Registers: control register, timer value */ u32 cntv_ctl; /* Saved/restored */ cycle_t cntv_cval; /* Saved/restored */ @@ -55,10 +52,8 @@ struct arch_timer_cpu { /* Timer IRQ */ const struct kvm_irq_level *irq; -#endif }; -#ifdef CONFIG_KVM_ARM_TIMER int kvm_timer_hyp_init(void); void kvm_timer_enable(struct kvm *kvm); void kvm_timer_init(struct kvm *kvm); @@ -72,30 +67,6 @@ void kvm_timer_vcpu_terminate(struct kvm_vcpu *vcpu); u64 kvm_arm_timer_get_reg(struct kvm_vcpu *, u64 regid); int kvm_arm_timer_set_reg(struct kvm_vcpu *, u64 regid, u64 value); -#else -static inline int kvm_timer_hyp_init(void) -{ - return 0; -}; - -static inline void kvm_timer_enable(struct kvm *kvm) {} -static inline void kvm_timer_init(struct kvm *kvm) {} -static inline void kvm_timer_vcpu_reset(struct kvm_vcpu *vcpu, - const struct kvm_irq_level *irq) {} -static inline void kvm_timer_vcpu_init(struct kvm_vcpu *vcpu) {} -static inline void kvm_timer_flush_hwstate(struct kvm_vcpu *vcpu) {} -static inline void kvm_timer_sync_hwstate(struct kvm_vcpu *vcpu) {} -static inline void kvm_timer_vcpu_terminate(struct kvm_vcpu *vcpu) {} - -static inline int kvm_arm_timer_set_reg(struct kvm_vcpu *vcpu, u64 regid, u64 value) -{ - return 0; -} - -static inline u64 kvm_arm_timer_get_reg(struct kvm_vcpu *vcpu, u64 regid) -{ - return 0; -} -#endif +bool kvm_timer_should_fire(struct kvm_vcpu *vcpu); #endif diff --git a/include/kvm/arm_vgic.h b/include/kvm/arm_vgic.h index 7c55dd5dd2c9..133ea00aa83b 100644 --- a/include/kvm/arm_vgic.h +++ b/include/kvm/arm_vgic.h @@ -24,6 +24,7 @@ #include <linux/irqreturn.h> #include <linux/spinlock.h> #include <linux/types.h> +#include <kvm/iodev.h> #define VGIC_NR_IRQS_LEGACY 256 #define VGIC_NR_SGIS 16 @@ -114,6 +115,7 @@ struct vgic_ops { void (*sync_lr_elrsr)(struct kvm_vcpu *, int, struct vgic_lr); u64 (*get_elrsr)(const struct kvm_vcpu *vcpu); u64 (*get_eisr)(const struct kvm_vcpu *vcpu); + void (*clear_eisr)(struct kvm_vcpu *vcpu); u32 (*get_interrupt_status)(const struct kvm_vcpu *vcpu); void (*enable_underflow)(struct kvm_vcpu *vcpu); void (*disable_underflow)(struct kvm_vcpu *vcpu); @@ -139,16 +141,21 @@ struct vgic_params { }; struct vgic_vm_ops { - bool (*handle_mmio)(struct kvm_vcpu *, struct kvm_run *, - struct kvm_exit_mmio *); bool (*queue_sgi)(struct kvm_vcpu *, int irq); void (*add_sgi_source)(struct kvm_vcpu *, int irq, int source); int (*init_model)(struct kvm *); int (*map_resources)(struct kvm *, const struct vgic_params *); }; +struct vgic_io_device { + gpa_t addr; + int len; + const struct vgic_io_range *reg_ranges; + struct kvm_vcpu *redist_vcpu; + struct kvm_io_device dev; +}; + struct vgic_dist { -#ifdef CONFIG_KVM_ARM_VGIC spinlock_t lock; bool in_kernel; bool ready; @@ -196,6 +203,9 @@ struct vgic_dist { /* Level-triggered interrupt queued on VCPU interface */ struct vgic_bitmap irq_queued; + /* Interrupt was active when unqueue from VCPU interface */ + struct vgic_bitmap irq_active; + /* Interrupt priority. Not used yet. */ struct vgic_bytemap irq_priority; @@ -236,8 +246,12 @@ struct vgic_dist { /* Bitmap indicating which CPU has something pending */ unsigned long *irq_pending_on_cpu; + /* Bitmap indicating which CPU has active IRQs */ + unsigned long *irq_active_on_cpu; + struct vgic_vm_ops vm_ops; -#endif + struct vgic_io_device dist_iodev; + struct vgic_io_device *redist_iodevs; }; struct vgic_v2_cpu_if { @@ -265,13 +279,18 @@ struct vgic_v3_cpu_if { }; struct vgic_cpu { -#ifdef CONFIG_KVM_ARM_VGIC /* per IRQ to LR mapping */ u8 *vgic_irq_lr_map; - /* Pending interrupts on this VCPU */ + /* Pending/active/both interrupts on this VCPU */ DECLARE_BITMAP( pending_percpu, VGIC_NR_PRIVATE_IRQS); + DECLARE_BITMAP( active_percpu, VGIC_NR_PRIVATE_IRQS); + DECLARE_BITMAP( pend_act_percpu, VGIC_NR_PRIVATE_IRQS); + + /* Pending/active/both shared interrupts, dynamically sized */ unsigned long *pending_shared; + unsigned long *active_shared; + unsigned long *pend_act_shared; /* Bitmap of used/free list registers */ DECLARE_BITMAP( lr_used, VGIC_V2_MAX_LRS); @@ -284,7 +303,6 @@ struct vgic_cpu { struct vgic_v2_cpu_if vgic_v2; struct vgic_v3_cpu_if vgic_v3; }; -#endif }; #define LR_EMPTY 0xff @@ -294,10 +312,7 @@ struct vgic_cpu { struct kvm; struct kvm_vcpu; -struct kvm_run; -struct kvm_exit_mmio; -#ifdef CONFIG_KVM_ARM_VGIC int kvm_vgic_addr(struct kvm *kvm, unsigned long type, u64 *addr, bool write); int kvm_vgic_hyp_init(void); int kvm_vgic_map_resources(struct kvm *kvm); @@ -311,8 +326,7 @@ int kvm_vgic_inject_irq(struct kvm *kvm, int cpuid, unsigned int irq_num, bool level); void vgic_v3_dispatch_sgi(struct kvm_vcpu *vcpu, u64 reg); int kvm_vgic_vcpu_pending_irq(struct kvm_vcpu *vcpu); -bool vgic_handle_mmio(struct kvm_vcpu *vcpu, struct kvm_run *run, - struct kvm_exit_mmio *mmio); +int kvm_vgic_vcpu_active_irq(struct kvm_vcpu *vcpu); #define irqchip_in_kernel(k) (!!((k)->arch.vgic.in_kernel)) #define vgic_initialized(k) (!!((k)->arch.vgic.nr_cpus)) @@ -334,84 +348,4 @@ static inline int vgic_v3_probe(struct device_node *vgic_node, } #endif -#else -static inline int kvm_vgic_hyp_init(void) -{ - return 0; -} - -static inline int kvm_vgic_set_addr(struct kvm *kvm, unsigned long type, u64 addr) -{ - return 0; -} - -static inline int kvm_vgic_addr(struct kvm *kvm, unsigned long type, u64 *addr, bool write) -{ - return -ENXIO; -} - -static inline int kvm_vgic_map_resources(struct kvm *kvm) -{ - return 0; -} - -static inline int kvm_vgic_create(struct kvm *kvm, u32 type) -{ - return 0; -} - -static inline void kvm_vgic_destroy(struct kvm *kvm) -{ -} - -static inline void kvm_vgic_vcpu_destroy(struct kvm_vcpu *vcpu) -{ -} - -static inline int kvm_vgic_vcpu_init(struct kvm_vcpu *vcpu) -{ - return 0; -} - -static inline void kvm_vgic_flush_hwstate(struct kvm_vcpu *vcpu) {} -static inline void kvm_vgic_sync_hwstate(struct kvm_vcpu *vcpu) {} - -static inline int kvm_vgic_inject_irq(struct kvm *kvm, int cpuid, - unsigned int irq_num, bool level) -{ - return 0; -} - -static inline int kvm_vgic_vcpu_pending_irq(struct kvm_vcpu *vcpu) -{ - return 0; -} - -static inline bool vgic_handle_mmio(struct kvm_vcpu *vcpu, struct kvm_run *run, - struct kvm_exit_mmio *mmio) -{ - return false; -} - -static inline int irqchip_in_kernel(struct kvm *kvm) -{ - return 0; -} - -static inline bool vgic_initialized(struct kvm *kvm) -{ - return true; -} - -static inline bool vgic_ready(struct kvm *kvm) -{ - return true; -} - -static inline int kvm_vgic_get_max_vcpus(void) -{ - return KVM_MAX_VCPUS; -} -#endif - #endif diff --git a/virt/kvm/iodev.h b/include/kvm/iodev.h index 12fd3caffd2b..a6d208b916f5 100644 --- a/virt/kvm/iodev.h +++ b/include/kvm/iodev.h @@ -9,17 +9,17 @@ * GNU General Public License for more details. * * You should have received a copy of the GNU General Public License - * along with this program; if not, write to the Free Software - * Foundation, 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. + * along with this program. If not, see <http://www.gnu.org/licenses/>. */ #ifndef __KVM_IODEV_H__ #define __KVM_IODEV_H__ #include <linux/kvm_types.h> -#include <asm/errno.h> +#include <linux/errno.h> struct kvm_io_device; +struct kvm_vcpu; /** * kvm_io_device_ops are called under kvm slots_lock. @@ -27,11 +27,13 @@ struct kvm_io_device; * or non-zero to have it passed to the next device. **/ struct kvm_io_device_ops { - int (*read)(struct kvm_io_device *this, + int (*read)(struct kvm_vcpu *vcpu, + struct kvm_io_device *this, gpa_t addr, int len, void *val); - int (*write)(struct kvm_io_device *this, + int (*write)(struct kvm_vcpu *vcpu, + struct kvm_io_device *this, gpa_t addr, int len, const void *val); @@ -49,16 +51,20 @@ static inline void kvm_iodevice_init(struct kvm_io_device *dev, dev->ops = ops; } -static inline int kvm_iodevice_read(struct kvm_io_device *dev, - gpa_t addr, int l, void *v) +static inline int kvm_iodevice_read(struct kvm_vcpu *vcpu, + struct kvm_io_device *dev, gpa_t addr, + int l, void *v) { - return dev->ops->read ? dev->ops->read(dev, addr, l, v) : -EOPNOTSUPP; + return dev->ops->read ? dev->ops->read(vcpu, dev, addr, l, v) + : -EOPNOTSUPP; } -static inline int kvm_iodevice_write(struct kvm_io_device *dev, - gpa_t addr, int l, const void *v) +static inline int kvm_iodevice_write(struct kvm_vcpu *vcpu, + struct kvm_io_device *dev, gpa_t addr, + int l, const void *v) { - return dev->ops->write ? dev->ops->write(dev, addr, l, v) : -EOPNOTSUPP; + return dev->ops->write ? dev->ops->write(vcpu, dev, addr, l, v) + : -EOPNOTSUPP; } static inline void kvm_iodevice_destructor(struct kvm_io_device *dev) diff --git a/include/linux/blk_types.h b/include/linux/blk_types.h index c294e3e25e37..a1b25e35ea5f 100644 --- a/include/linux/blk_types.h +++ b/include/linux/blk_types.h @@ -181,7 +181,9 @@ enum rq_flag_bits { __REQ_ELVPRIV, /* elevator private data attached */ __REQ_FAILED, /* set if the request failed */ __REQ_QUIET, /* don't worry about errors */ - __REQ_PREEMPT, /* set for "ide_preempt" requests */ + __REQ_PREEMPT, /* set for "ide_preempt" requests and also + for requests for which the SCSI "quiesce" + state must be ignored. */ __REQ_ALLOCED, /* request came from our alloc pool */ __REQ_COPY_USER, /* contains copies of user pages */ __REQ_FLUSH_SEQ, /* request for flush sequence */ diff --git a/include/linux/clk.h b/include/linux/clk.h index 8381bbfbc308..68c16a6bedb3 100644 --- a/include/linux/clk.h +++ b/include/linux/clk.h @@ -125,6 +125,19 @@ int clk_set_phase(struct clk *clk, int degrees); */ int clk_get_phase(struct clk *clk); +/** + * clk_is_match - check if two clk's point to the same hardware clock + * @p: clk compared against q + * @q: clk compared against p + * + * Returns true if the two struct clk pointers both point to the same hardware + * clock node. Put differently, returns true if struct clk *p and struct clk *q + * share the same struct clk_core object. + * + * Returns false otherwise. Note that two NULL clks are treated as matching. + */ +bool clk_is_match(const struct clk *p, const struct clk *q); + #else static inline long clk_get_accuracy(struct clk *clk) @@ -142,6 +155,11 @@ static inline long clk_get_phase(struct clk *clk) return -ENOTSUPP; } +static inline bool clk_is_match(const struct clk *p, const struct clk *q) +{ + return p == q; +} + #endif /** diff --git a/include/linux/clockchips.h b/include/linux/clockchips.h index 2e4cb67f6e56..96c280b2c263 100644 --- a/include/linux/clockchips.h +++ b/include/linux/clockchips.h @@ -8,33 +8,19 @@ #ifndef _LINUX_CLOCKCHIPS_H #define _LINUX_CLOCKCHIPS_H -/* Clock event notification values */ -enum clock_event_nofitiers { - CLOCK_EVT_NOTIFY_ADD, - CLOCK_EVT_NOTIFY_BROADCAST_ON, - CLOCK_EVT_NOTIFY_BROADCAST_OFF, - CLOCK_EVT_NOTIFY_BROADCAST_FORCE, - CLOCK_EVT_NOTIFY_BROADCAST_ENTER, - CLOCK_EVT_NOTIFY_BROADCAST_EXIT, - CLOCK_EVT_NOTIFY_SUSPEND, - CLOCK_EVT_NOTIFY_RESUME, - CLOCK_EVT_NOTIFY_CPU_DYING, - CLOCK_EVT_NOTIFY_CPU_DEAD, -}; - -#ifdef CONFIG_GENERIC_CLOCKEVENTS_BUILD +#ifdef CONFIG_GENERIC_CLOCKEVENTS -#include <linux/clocksource.h> -#include <linux/cpumask.h> -#include <linux/ktime.h> -#include <linux/notifier.h> +# include <linux/clocksource.h> +# include <linux/cpumask.h> +# include <linux/ktime.h> +# include <linux/notifier.h> struct clock_event_device; struct module; -/* Clock event mode commands */ +/* Clock event mode commands for legacy ->set_mode(): OBSOLETE */ enum clock_event_mode { - CLOCK_EVT_MODE_UNUSED = 0, + CLOCK_EVT_MODE_UNUSED, CLOCK_EVT_MODE_SHUTDOWN, CLOCK_EVT_MODE_PERIODIC, CLOCK_EVT_MODE_ONESHOT, @@ -42,30 +28,49 @@ enum clock_event_mode { }; /* + * Possible states of a clock event device. + * + * DETACHED: Device is not used by clockevents core. Initial state or can be + * reached from SHUTDOWN. + * SHUTDOWN: Device is powered-off. Can be reached from PERIODIC or ONESHOT. + * PERIODIC: Device is programmed to generate events periodically. Can be + * reached from DETACHED or SHUTDOWN. + * ONESHOT: Device is programmed to generate event only once. Can be reached + * from DETACHED or SHUTDOWN. + */ +enum clock_event_state { + CLOCK_EVT_STATE_DETACHED, + CLOCK_EVT_STATE_SHUTDOWN, + CLOCK_EVT_STATE_PERIODIC, + CLOCK_EVT_STATE_ONESHOT, +}; + +/* * Clock event features */ -#define CLOCK_EVT_FEAT_PERIODIC 0x000001 -#define CLOCK_EVT_FEAT_ONESHOT 0x000002 -#define CLOCK_EVT_FEAT_KTIME 0x000004 +# define CLOCK_EVT_FEAT_PERIODIC 0x000001 +# define CLOCK_EVT_FEAT_ONESHOT 0x000002 +# define CLOCK_EVT_FEAT_KTIME 0x000004 + /* - * x86(64) specific misfeatures: + * x86(64) specific (mis)features: * * - Clockevent source stops in C3 State and needs broadcast support. * - Local APIC timer is used as a dummy device. */ -#define CLOCK_EVT_FEAT_C3STOP 0x000008 -#define CLOCK_EVT_FEAT_DUMMY 0x000010 +# define CLOCK_EVT_FEAT_C3STOP 0x000008 +# define CLOCK_EVT_FEAT_DUMMY 0x000010 /* * Core shall set the interrupt affinity dynamically in broadcast mode */ -#define CLOCK_EVT_FEAT_DYNIRQ 0x000020 -#define CLOCK_EVT_FEAT_PERCPU 0x000040 +# define CLOCK_EVT_FEAT_DYNIRQ 0x000020 +# define CLOCK_EVT_FEAT_PERCPU 0x000040 /* * Clockevent device is based on a hrtimer for broadcast */ -#define CLOCK_EVT_FEAT_HRTIMER 0x000080 +# define CLOCK_EVT_FEAT_HRTIMER 0x000080 /** * struct clock_event_device - clock event device descriptor @@ -78,10 +83,15 @@ enum clock_event_mode { * @min_delta_ns: minimum delta value in ns * @mult: nanosecond to cycles multiplier * @shift: nanoseconds to cycles divisor (power of two) - * @mode: operating mode assigned by the management code + * @mode: operating mode, relevant only to ->set_mode(), OBSOLETE + * @state: current state of the device, assigned by the core code * @features: features * @retries: number of forced programming retries - * @set_mode: set mode function + * @set_mode: legacy set mode function, only for modes <= CLOCK_EVT_MODE_RESUME. + * @set_state_periodic: switch state to periodic, if !set_mode + * @set_state_oneshot: switch state to oneshot, if !set_mode + * @set_state_shutdown: switch state to shutdown, if !set_mode + * @tick_resume: resume clkevt device, if !set_mode * @broadcast: function to broadcast events * @min_delta_ticks: minimum delta value in ticks stored for reconfiguration * @max_delta_ticks: maximum delta value in ticks stored for reconfiguration @@ -95,22 +105,31 @@ enum clock_event_mode { */ struct clock_event_device { void (*event_handler)(struct clock_event_device *); - int (*set_next_event)(unsigned long evt, - struct clock_event_device *); - int (*set_next_ktime)(ktime_t expires, - struct clock_event_device *); + int (*set_next_event)(unsigned long evt, struct clock_event_device *); + int (*set_next_ktime)(ktime_t expires, struct clock_event_device *); ktime_t next_event; u64 max_delta_ns; u64 min_delta_ns; u32 mult; u32 shift; enum clock_event_mode mode; + enum clock_event_state state; unsigned int features; unsigned long retries; + /* + * State transition callback(s): Only one of the two groups should be + * defined: + * - set_mode(), only for modes <= CLOCK_EVT_MODE_RESUME. + * - set_state_{shutdown|periodic|oneshot}(), tick_resume(). + */ + void (*set_mode)(enum clock_event_mode mode, struct clock_event_device *); + int (*set_state_periodic)(struct clock_event_device *); + int (*set_state_oneshot)(struct clock_event_device *); + int (*set_state_shutdown)(struct clock_event_device *); + int (*tick_resume)(struct clock_event_device *); + void (*broadcast)(const struct cpumask *mask); - void (*set_mode)(enum clock_event_mode mode, - struct clock_event_device *); void (*suspend)(struct clock_event_device *); void (*resume)(struct clock_event_device *); unsigned long min_delta_ticks; @@ -136,18 +155,18 @@ struct clock_event_device { * * factor = (clock_ticks << shift) / nanoseconds */ -static inline unsigned long div_sc(unsigned long ticks, unsigned long nsec, - int shift) +static inline unsigned long +div_sc(unsigned long ticks, unsigned long nsec, int shift) { - uint64_t tmp = ((uint64_t)ticks) << shift; + u64 tmp = ((u64)ticks) << shift; do_div(tmp, nsec); + return (unsigned long) tmp; } /* Clock event layer functions */ -extern u64 clockevent_delta2ns(unsigned long latch, - struct clock_event_device *evt); +extern u64 clockevent_delta2ns(unsigned long latch, struct clock_event_device *evt); extern void clockevents_register_device(struct clock_event_device *dev); extern int clockevents_unbind_device(struct clock_event_device *ced, int cpu); @@ -158,57 +177,42 @@ extern void clockevents_config_and_register(struct clock_event_device *dev, extern int clockevents_update_freq(struct clock_event_device *ce, u32 freq); -extern void clockevents_exchange_device(struct clock_event_device *old, - struct clock_event_device *new); -extern void clockevents_set_mode(struct clock_event_device *dev, - enum clock_event_mode mode); -extern int clockevents_program_event(struct clock_event_device *dev, - ktime_t expires, bool force); - -extern void clockevents_handle_noop(struct clock_event_device *dev); - static inline void clockevents_calc_mult_shift(struct clock_event_device *ce, u32 freq, u32 minsec) { - return clocks_calc_mult_shift(&ce->mult, &ce->shift, NSEC_PER_SEC, - freq, minsec); + return clocks_calc_mult_shift(&ce->mult, &ce->shift, NSEC_PER_SEC, freq, minsec); } extern void clockevents_suspend(void); extern void clockevents_resume(void); -#ifdef CONFIG_GENERIC_CLOCKEVENTS_BROADCAST -#ifdef CONFIG_ARCH_HAS_TICK_BROADCAST +# ifdef CONFIG_GENERIC_CLOCKEVENTS_BROADCAST +# ifdef CONFIG_ARCH_HAS_TICK_BROADCAST extern void tick_broadcast(const struct cpumask *mask); -#else -#define tick_broadcast NULL -#endif +# else +# define tick_broadcast NULL +# endif extern int tick_receive_broadcast(void); -#endif +# endif -#if defined(CONFIG_GENERIC_CLOCKEVENTS_BROADCAST) && defined(CONFIG_TICK_ONESHOT) +# if defined(CONFIG_GENERIC_CLOCKEVENTS_BROADCAST) && defined(CONFIG_TICK_ONESHOT) extern void tick_setup_hrtimer_broadcast(void); extern int tick_check_broadcast_expired(void); -#else +# else static inline int tick_check_broadcast_expired(void) { return 0; } -static inline void tick_setup_hrtimer_broadcast(void) {}; -#endif +static inline void tick_setup_hrtimer_broadcast(void) { } +# endif -#ifdef CONFIG_GENERIC_CLOCKEVENTS extern int clockevents_notify(unsigned long reason, void *arg); -#else -static inline int clockevents_notify(unsigned long reason, void *arg) { return 0; } -#endif - -#else /* CONFIG_GENERIC_CLOCKEVENTS_BUILD */ -static inline void clockevents_suspend(void) {} -static inline void clockevents_resume(void) {} +#else /* !CONFIG_GENERIC_CLOCKEVENTS: */ +static inline void clockevents_suspend(void) { } +static inline void clockevents_resume(void) { } static inline int clockevents_notify(unsigned long reason, void *arg) { return 0; } static inline int tick_check_broadcast_expired(void) { return 0; } -static inline void tick_setup_hrtimer_broadcast(void) {}; +static inline void tick_setup_hrtimer_broadcast(void) { } -#endif +#endif /* !CONFIG_GENERIC_CLOCKEVENTS */ -#endif +#endif /* _LINUX_CLOCKCHIPS_H */ diff --git a/include/linux/clocksource.h b/include/linux/clocksource.h index 9c78d15d33e4..135509821c39 100644 --- a/include/linux/clocksource.h +++ b/include/linux/clocksource.h @@ -56,6 +56,7 @@ struct module; * @shift: cycle to nanosecond divisor (power of two) * @max_idle_ns: max idle time permitted by the clocksource (nsecs) * @maxadj: maximum adjustment value to mult (~11%) + * @max_cycles: maximum safe cycle value which won't overflow on multiplication * @flags: flags describing special properties * @archdata: arch-specific data * @suspend: suspend function for the clocksource, if necessary @@ -76,7 +77,7 @@ struct clocksource { #ifdef CONFIG_ARCH_CLOCKSOURCE_DATA struct arch_clocksource_data archdata; #endif - + u64 max_cycles; const char *name; struct list_head list; int rating; @@ -178,7 +179,6 @@ static inline s64 clocksource_cyc2ns(cycle_t cycles, u32 mult, u32 shift) } -extern int clocksource_register(struct clocksource*); extern int clocksource_unregister(struct clocksource*); extern void clocksource_touch_watchdog(void); extern struct clocksource* clocksource_get_next(void); @@ -189,7 +189,7 @@ extern struct clocksource * __init clocksource_default_clock(void); extern void clocksource_mark_unstable(struct clocksource *cs); extern u64 -clocks_calc_max_nsecs(u32 mult, u32 shift, u32 maxadj, u64 mask); +clocks_calc_max_nsecs(u32 mult, u32 shift, u32 maxadj, u64 mask, u64 *max_cycles); extern void clocks_calc_mult_shift(u32 *mult, u32 *shift, u32 from, u32 to, u32 minsec); @@ -200,7 +200,16 @@ clocks_calc_mult_shift(u32 *mult, u32 *shift, u32 from, u32 to, u32 minsec); extern int __clocksource_register_scale(struct clocksource *cs, u32 scale, u32 freq); extern void -__clocksource_updatefreq_scale(struct clocksource *cs, u32 scale, u32 freq); +__clocksource_update_freq_scale(struct clocksource *cs, u32 scale, u32 freq); + +/* + * Don't call this unless you are a default clocksource + * (AKA: jiffies) and absolutely have to. + */ +static inline int __clocksource_register(struct clocksource *cs) +{ + return __clocksource_register_scale(cs, 1, 0); +} static inline int clocksource_register_hz(struct clocksource *cs, u32 hz) { @@ -212,14 +221,14 @@ static inline int clocksource_register_khz(struct clocksource *cs, u32 khz) return __clocksource_register_scale(cs, 1000, khz); } -static inline void __clocksource_updatefreq_hz(struct clocksource *cs, u32 hz) +static inline void __clocksource_update_freq_hz(struct clocksource *cs, u32 hz) { - __clocksource_updatefreq_scale(cs, 1, hz); + __clocksource_update_freq_scale(cs, 1, hz); } -static inline void __clocksource_updatefreq_khz(struct clocksource *cs, u32 khz) +static inline void __clocksource_update_freq_khz(struct clocksource *cs, u32 khz) { - __clocksource_updatefreq_scale(cs, 1000, khz); + __clocksource_update_freq_scale(cs, 1000, khz); } diff --git a/include/linux/compiler.h b/include/linux/compiler.h index 1b45e4a0519b..0e41ca0e5927 100644 --- a/include/linux/compiler.h +++ b/include/linux/compiler.h @@ -192,29 +192,16 @@ void ftrace_likely_update(struct ftrace_branch_data *f, int val, int expect); #include <uapi/linux/types.h> -static __always_inline void data_access_exceeds_word_size(void) -#ifdef __compiletime_warning -__compiletime_warning("data access exceeds word size and won't be atomic") -#endif -; - -static __always_inline void data_access_exceeds_word_size(void) -{ -} - static __always_inline void __read_once_size(const volatile void *p, void *res, int size) { switch (size) { case 1: *(__u8 *)res = *(volatile __u8 *)p; break; case 2: *(__u16 *)res = *(volatile __u16 *)p; break; case 4: *(__u32 *)res = *(volatile __u32 *)p; break; -#ifdef CONFIG_64BIT case 8: *(__u64 *)res = *(volatile __u64 *)p; break; -#endif default: barrier(); __builtin_memcpy((void *)res, (const void *)p, size); - data_access_exceeds_word_size(); barrier(); } } @@ -225,13 +212,10 @@ static __always_inline void __write_once_size(volatile void *p, void *res, int s case 1: *(volatile __u8 *)p = *(__u8 *)res; break; case 2: *(volatile __u16 *)p = *(__u16 *)res; break; case 4: *(volatile __u32 *)p = *(__u32 *)res; break; -#ifdef CONFIG_64BIT case 8: *(volatile __u64 *)p = *(__u64 *)res; break; -#endif default: barrier(); __builtin_memcpy((void *)p, (const void *)res, size); - data_access_exceeds_word_size(); barrier(); } } diff --git a/include/linux/cpuidle.h b/include/linux/cpuidle.h index f551a9299ac9..9c5e89254796 100644 --- a/include/linux/cpuidle.h +++ b/include/linux/cpuidle.h @@ -77,7 +77,6 @@ struct cpuidle_device { unsigned int cpu; int last_residency; - int state_count; struct cpuidle_state_usage states_usage[CPUIDLE_STATE_MAX]; struct cpuidle_state_kobj *kobjs[CPUIDLE_STATE_MAX]; struct cpuidle_driver_kobj *kobj_driver; @@ -126,6 +125,8 @@ struct cpuidle_driver { #ifdef CONFIG_CPU_IDLE extern void disable_cpuidle(void); +extern bool cpuidle_not_available(struct cpuidle_driver *drv, + struct cpuidle_device *dev); extern int cpuidle_select(struct cpuidle_driver *drv, struct cpuidle_device *dev); @@ -150,11 +151,17 @@ extern void cpuidle_resume(void); extern int cpuidle_enable_device(struct cpuidle_device *dev); extern void cpuidle_disable_device(struct cpuidle_device *dev); extern int cpuidle_play_dead(void); -extern void cpuidle_enter_freeze(void); +extern int cpuidle_find_deepest_state(struct cpuidle_driver *drv, + struct cpuidle_device *dev); +extern int cpuidle_enter_freeze(struct cpuidle_driver *drv, + struct cpuidle_device *dev); extern struct cpuidle_driver *cpuidle_get_cpu_driver(struct cpuidle_device *dev); #else static inline void disable_cpuidle(void) { } +static inline bool cpuidle_not_available(struct cpuidle_driver *drv, + struct cpuidle_device *dev) +{return true; } static inline int cpuidle_select(struct cpuidle_driver *drv, struct cpuidle_device *dev) {return -ENODEV; } @@ -183,7 +190,12 @@ static inline int cpuidle_enable_device(struct cpuidle_device *dev) {return -ENODEV; } static inline void cpuidle_disable_device(struct cpuidle_device *dev) { } static inline int cpuidle_play_dead(void) {return -ENODEV; } -static inline void cpuidle_enter_freeze(void) { } +static inline int cpuidle_find_deepest_state(struct cpuidle_driver *drv, + struct cpuidle_device *dev) +{return -ENODEV; } +static inline int cpuidle_enter_freeze(struct cpuidle_driver *drv, + struct cpuidle_device *dev) +{return -ENODEV; } static inline struct cpuidle_driver *cpuidle_get_cpu_driver( struct cpuidle_device *dev) {return NULL; } #endif diff --git a/include/linux/device-mapper.h b/include/linux/device-mapper.h index 2646aed1d3fe..fd23978d93fe 100644 --- a/include/linux/device-mapper.h +++ b/include/linux/device-mapper.h @@ -375,6 +375,7 @@ int dm_create(int minor, struct mapped_device **md); */ struct mapped_device *dm_get_md(dev_t dev); void dm_get(struct mapped_device *md); +int dm_hold(struct mapped_device *md); void dm_put(struct mapped_device *md); /* diff --git a/include/linux/dmapool.h b/include/linux/dmapool.h index 022e34fcbd1b..52456aa566a0 100644 --- a/include/linux/dmapool.h +++ b/include/linux/dmapool.h @@ -14,6 +14,8 @@ #include <asm/io.h> #include <asm/scatterlist.h> +struct device; + struct dma_pool *dma_pool_create(const char *name, struct device *dev, size_t size, size_t align, size_t allocation); diff --git a/include/linux/efi.h b/include/linux/efi.h index cf7e431cbc73..af5be0368dec 100644 --- a/include/linux/efi.h +++ b/include/linux/efi.h @@ -942,6 +942,7 @@ extern int __init efi_setup_pcdp_console(char *); #define EFI_64BIT 5 /* Is the firmware 64-bit? */ #define EFI_PARAVIRT 6 /* Access is via a paravirt interface */ #define EFI_ARCH_1 7 /* First arch-specific bit */ +#define EFI_DBG 8 /* Print additional debug info at runtime */ #ifdef CONFIG_EFI /* diff --git a/include/linux/fs.h b/include/linux/fs.h index b4d71b5e1ff2..52cc4492cb3a 100644 --- a/include/linux/fs.h +++ b/include/linux/fs.h @@ -604,6 +604,7 @@ struct inode { struct mutex i_mutex; unsigned long dirtied_when; /* jiffies of first dirtying */ + unsigned long dirtied_time_when; struct hlist_node i_hash; struct list_head i_wb_list; /* backing dev IO list */ @@ -1548,7 +1549,7 @@ struct file_operations { long (*unlocked_ioctl) (struct file *, unsigned int, unsigned long); long (*compat_ioctl) (struct file *, unsigned int, unsigned long); int (*mmap) (struct file *, struct vm_area_struct *); - void (*mremap)(struct file *, struct vm_area_struct *); + int (*mremap)(struct file *, struct vm_area_struct *); int (*open) (struct inode *, struct file *); int (*flush) (struct file *, fl_owner_t id); int (*release) (struct inode *, struct file *); diff --git a/include/linux/interrupt.h b/include/linux/interrupt.h index d9b05b5bf8c7..2e88580194f0 100644 --- a/include/linux/interrupt.h +++ b/include/linux/interrupt.h @@ -52,11 +52,17 @@ * IRQF_ONESHOT - Interrupt is not reenabled after the hardirq handler finished. * Used by threaded interrupts which need to keep the * irq line disabled until the threaded handler has been run. - * IRQF_NO_SUSPEND - Do not disable this IRQ during suspend + * IRQF_NO_SUSPEND - Do not disable this IRQ during suspend. Does not guarantee + * that this interrupt will wake the system from a suspended + * state. See Documentation/power/suspend-and-interrupts.txt * IRQF_FORCE_RESUME - Force enable it on resume even if IRQF_NO_SUSPEND is set * IRQF_NO_THREAD - Interrupt cannot be threaded * IRQF_EARLY_RESUME - Resume IRQ early during syscore instead of at device * resume time. + * IRQF_COND_SUSPEND - If the IRQ is shared with a NO_SUSPEND user, execute this + * interrupt handler after suspending interrupts. For system + * wakeup devices users need to implement wakeup detection in + * their interrupt handlers. */ #define IRQF_DISABLED 0x00000020 #define IRQF_SHARED 0x00000080 @@ -70,6 +76,7 @@ #define IRQF_FORCE_RESUME 0x00008000 #define IRQF_NO_THREAD 0x00010000 #define IRQF_EARLY_RESUME 0x00020000 +#define IRQF_COND_SUSPEND 0x00040000 #define IRQF_TIMER (__IRQF_TIMER | IRQF_NO_SUSPEND | IRQF_NO_THREAD) diff --git a/include/linux/irq_work.h b/include/linux/irq_work.h index bf3fe719c7ce..47b9ebd4a74f 100644 --- a/include/linux/irq_work.h +++ b/include/linux/irq_work.h @@ -38,16 +38,17 @@ bool irq_work_queue(struct irq_work *work); bool irq_work_queue_on(struct irq_work *work, int cpu); #endif -void irq_work_run(void); void irq_work_tick(void); void irq_work_sync(struct irq_work *work); #ifdef CONFIG_IRQ_WORK #include <asm/irq_work.h> +void irq_work_run(void); bool irq_work_needs_cpu(void); #else static inline bool irq_work_needs_cpu(void) { return false; } +static inline void irq_work_run(void) { } #endif #endif /* _LINUX_IRQ_WORK_H */ diff --git a/include/linux/irqchip/arm-gic-v3.h b/include/linux/irqchip/arm-gic-v3.h index 800544bc7bfd..ffbc034c8810 100644 --- a/include/linux/irqchip/arm-gic-v3.h +++ b/include/linux/irqchip/arm-gic-v3.h @@ -126,8 +126,23 @@ #define GICR_PROPBASER_WaWb (5U << 7) #define GICR_PROPBASER_RaWaWt (6U << 7) #define GICR_PROPBASER_RaWaWb (7U << 7) +#define GICR_PROPBASER_CACHEABILITY_MASK (7U << 7) #define GICR_PROPBASER_IDBITS_MASK (0x1f) +#define GICR_PENDBASER_NonShareable (0U << 10) +#define GICR_PENDBASER_InnerShareable (1U << 10) +#define GICR_PENDBASER_OuterShareable (2U << 10) +#define GICR_PENDBASER_SHAREABILITY_MASK (3UL << 10) +#define GICR_PENDBASER_nCnB (0U << 7) +#define GICR_PENDBASER_nC (1U << 7) +#define GICR_PENDBASER_RaWt (2U << 7) +#define GICR_PENDBASER_RaWb (3U << 7) +#define GICR_PENDBASER_WaWt (4U << 7) +#define GICR_PENDBASER_WaWb (5U << 7) +#define GICR_PENDBASER_RaWaWt (6U << 7) +#define GICR_PENDBASER_RaWaWb (7U << 7) +#define GICR_PENDBASER_CACHEABILITY_MASK (7U << 7) + /* * Re-Distributor registers, offsets from SGI_base */ @@ -166,6 +181,11 @@ #define GITS_TRANSLATER 0x10040 +#define GITS_CTLR_ENABLE (1U << 0) +#define GITS_CTLR_QUIESCENT (1U << 31) + +#define GITS_TYPER_DEVBITS_SHIFT 13 +#define GITS_TYPER_DEVBITS(r) ((((r) >> GITS_TYPER_DEVBITS_SHIFT) & 0x1f) + 1) #define GITS_TYPER_PTA (1UL << 19) #define GITS_CBASER_VALID (1UL << 63) @@ -177,6 +197,7 @@ #define GITS_CBASER_WaWb (5UL << 59) #define GITS_CBASER_RaWaWt (6UL << 59) #define GITS_CBASER_RaWaWb (7UL << 59) +#define GITS_CBASER_CACHEABILITY_MASK (7UL << 59) #define GITS_CBASER_NonShareable (0UL << 10) #define GITS_CBASER_InnerShareable (1UL << 10) #define GITS_CBASER_OuterShareable (2UL << 10) @@ -193,6 +214,7 @@ #define GITS_BASER_WaWb (5UL << 59) #define GITS_BASER_RaWaWt (6UL << 59) #define GITS_BASER_RaWaWb (7UL << 59) +#define GITS_BASER_CACHEABILITY_MASK (7UL << 59) #define GITS_BASER_TYPE_SHIFT (56) #define GITS_BASER_TYPE(r) (((r) >> GITS_BASER_TYPE_SHIFT) & 7) #define GITS_BASER_ENTRY_SIZE_SHIFT (48) diff --git a/include/linux/irqdesc.h b/include/linux/irqdesc.h index faf433af425e..dd1109fb241e 100644 --- a/include/linux/irqdesc.h +++ b/include/linux/irqdesc.h @@ -78,6 +78,7 @@ struct irq_desc { #ifdef CONFIG_PM_SLEEP unsigned int nr_actions; unsigned int no_suspend_depth; + unsigned int cond_suspend_depth; unsigned int force_resume_depth; #endif #ifdef CONFIG_PROC_FS diff --git a/include/linux/jump_label.h b/include/linux/jump_label.h index 98f923b6a0ea..f4de473f226b 100644 --- a/include/linux/jump_label.h +++ b/include/linux/jump_label.h @@ -45,6 +45,12 @@ * same as using STATIC_KEY_INIT_FALSE. */ +#if defined(CC_HAVE_ASM_GOTO) && defined(CONFIG_JUMP_LABEL) +# define HAVE_JUMP_LABEL +#endif + +#ifndef __ASSEMBLY__ + #include <linux/types.h> #include <linux/compiler.h> #include <linux/bug.h> @@ -55,7 +61,7 @@ extern bool static_key_initialized; "%s used before call to jump_label_init", \ __func__) -#if defined(CC_HAVE_ASM_GOTO) && defined(CONFIG_JUMP_LABEL) +#ifdef HAVE_JUMP_LABEL struct static_key { atomic_t enabled; @@ -66,13 +72,18 @@ struct static_key { #endif }; -# include <asm/jump_label.h> -# define HAVE_JUMP_LABEL #else struct static_key { atomic_t enabled; }; -#endif /* CC_HAVE_ASM_GOTO && CONFIG_JUMP_LABEL */ +#endif /* HAVE_JUMP_LABEL */ +#endif /* __ASSEMBLY__ */ + +#ifdef HAVE_JUMP_LABEL +#include <asm/jump_label.h> +#endif + +#ifndef __ASSEMBLY__ enum jump_label_type { JUMP_LABEL_DISABLE = 0, @@ -203,3 +214,5 @@ static inline bool static_key_enabled(struct static_key *key) } #endif /* _LINUX_JUMP_LABEL_H */ + +#endif /* __ASSEMBLY__ */ diff --git a/include/linux/kasan.h b/include/linux/kasan.h index 72ba725ddf9c..5bb074431eb0 100644 --- a/include/linux/kasan.h +++ b/include/linux/kasan.h @@ -5,6 +5,7 @@ struct kmem_cache; struct page; +struct vm_struct; #ifdef CONFIG_KASAN @@ -49,15 +50,11 @@ void kasan_krealloc(const void *object, size_t new_size); void kasan_slab_alloc(struct kmem_cache *s, void *object); void kasan_slab_free(struct kmem_cache *s, void *object); -#define MODULE_ALIGN (PAGE_SIZE << KASAN_SHADOW_SCALE_SHIFT) - int kasan_module_alloc(void *addr, size_t size); -void kasan_module_free(void *addr); +void kasan_free_shadow(const struct vm_struct *vm); #else /* CONFIG_KASAN */ -#define MODULE_ALIGN 1 - static inline void kasan_unpoison_shadow(const void *address, size_t size) {} static inline void kasan_enable_current(void) {} @@ -82,7 +79,7 @@ static inline void kasan_slab_alloc(struct kmem_cache *s, void *object) {} static inline void kasan_slab_free(struct kmem_cache *s, void *object) {} static inline int kasan_module_alloc(void *addr, size_t size) { return 0; } -static inline void kasan_module_free(void *addr) {} +static inline void kasan_free_shadow(const struct vm_struct *vm) {} #endif /* CONFIG_KASAN */ diff --git a/include/linux/kvm_host.h b/include/linux/kvm_host.h index d12b2104d19b..82af5d0b996e 100644 --- a/include/linux/kvm_host.h +++ b/include/linux/kvm_host.h @@ -165,12 +165,12 @@ enum kvm_bus { KVM_NR_BUSES }; -int kvm_io_bus_write(struct kvm *kvm, enum kvm_bus bus_idx, gpa_t addr, +int kvm_io_bus_write(struct kvm_vcpu *vcpu, enum kvm_bus bus_idx, gpa_t addr, int len, const void *val); -int kvm_io_bus_write_cookie(struct kvm *kvm, enum kvm_bus bus_idx, gpa_t addr, - int len, const void *val, long cookie); -int kvm_io_bus_read(struct kvm *kvm, enum kvm_bus bus_idx, gpa_t addr, int len, - void *val); +int kvm_io_bus_write_cookie(struct kvm_vcpu *vcpu, enum kvm_bus bus_idx, + gpa_t addr, int len, const void *val, long cookie); +int kvm_io_bus_read(struct kvm_vcpu *vcpu, enum kvm_bus bus_idx, gpa_t addr, + int len, void *val); int kvm_io_bus_register_dev(struct kvm *kvm, enum kvm_bus bus_idx, gpa_t addr, int len, struct kvm_io_device *dev); int kvm_io_bus_unregister_dev(struct kvm *kvm, enum kvm_bus bus_idx, @@ -658,7 +658,6 @@ int kvm_arch_vcpu_runnable(struct kvm_vcpu *vcpu); int kvm_arch_vcpu_should_kick(struct kvm_vcpu *vcpu); void *kvm_kvzalloc(unsigned long size); -void kvm_kvfree(const void *addr); #ifndef __KVM_HAVE_ARCH_VM_ALLOC static inline struct kvm *kvm_arch_alloc_vm(void) @@ -700,6 +699,20 @@ static inline wait_queue_head_t *kvm_arch_vcpu_wq(struct kvm_vcpu *vcpu) #endif } +#ifdef __KVM_HAVE_ARCH_INTC_INITIALIZED +/* + * returns true if the virtual interrupt controller is initialized and + * ready to accept virtual IRQ. On some architectures the virtual interrupt + * controller is dynamically instantiated and this is not always true. + */ +bool kvm_arch_intc_initialized(struct kvm *kvm); +#else +static inline bool kvm_arch_intc_initialized(struct kvm *kvm) +{ + return true; +} +#endif + int kvm_arch_init_vm(struct kvm *kvm, unsigned long type); void kvm_arch_destroy_vm(struct kvm *kvm); void kvm_arch_sync_events(struct kvm *kvm); @@ -969,11 +982,16 @@ static inline int kvm_ioeventfd(struct kvm *kvm, struct kvm_ioeventfd *args) #endif /* CONFIG_HAVE_KVM_EVENTFD */ #ifdef CONFIG_KVM_APIC_ARCHITECTURE -static inline bool kvm_vcpu_is_bsp(struct kvm_vcpu *vcpu) +static inline bool kvm_vcpu_is_reset_bsp(struct kvm_vcpu *vcpu) { return vcpu->kvm->bsp_vcpu_id == vcpu->vcpu_id; } +static inline bool kvm_vcpu_is_bsp(struct kvm_vcpu *vcpu) +{ + return (vcpu->arch.apic_base & MSR_IA32_APICBASE_BSP) != 0; +} + bool kvm_vcpu_compatible(struct kvm_vcpu *vcpu); #else diff --git a/include/linux/lcm.h b/include/linux/lcm.h index 7bf01d779b45..1ce79a7f1daa 100644 --- a/include/linux/lcm.h +++ b/include/linux/lcm.h @@ -4,5 +4,6 @@ #include <linux/compiler.h> unsigned long lcm(unsigned long a, unsigned long b) __attribute_const__; +unsigned long lcm_not_zero(unsigned long a, unsigned long b) __attribute_const__; #endif /* _LCM_H */ diff --git a/include/linux/libata.h b/include/linux/libata.h index fc03efa64ffe..6b08cc106c21 100644 --- a/include/linux/libata.h +++ b/include/linux/libata.h @@ -232,6 +232,7 @@ enum { * led */ ATA_FLAG_NO_DIPM = (1 << 23), /* host not happy with DIPM */ ATA_FLAG_LOWTAG = (1 << 24), /* host wants lowest available tag */ + ATA_FLAG_SAS_HOST = (1 << 25), /* SAS host */ /* bits 24:31 of ap->flags are reserved for LLD specific flags */ diff --git a/include/linux/mfd/palmas.h b/include/linux/mfd/palmas.h index fb0390a1a498..ee7b1ce7a6f8 100644 --- a/include/linux/mfd/palmas.h +++ b/include/linux/mfd/palmas.h @@ -2999,6 +2999,9 @@ enum usb_irq_events { #define PALMAS_GPADC_TRIM15 0x0E #define PALMAS_GPADC_TRIM16 0x0F +/* TPS659038 regen2_ctrl offset iss different from palmas */ +#define TPS659038_REGEN2_CTRL 0x12 + /* TPS65917 Interrupt registers */ /* Registers for function INTERRUPT */ diff --git a/include/linux/mlx4/qp.h b/include/linux/mlx4/qp.h index 2bbc62aa818a..551f85456c11 100644 --- a/include/linux/mlx4/qp.h +++ b/include/linux/mlx4/qp.h @@ -427,7 +427,7 @@ struct mlx4_wqe_inline_seg { enum mlx4_update_qp_attr { MLX4_UPDATE_QP_SMAC = 1 << 0, - MLX4_UPDATE_QP_VSD = 1 << 2, + MLX4_UPDATE_QP_VSD = 1 << 1, MLX4_UPDATE_QP_SUPPORTED_ATTRS = (1 << 2) - 1 }; diff --git a/include/linux/mmzone.h b/include/linux/mmzone.h index f279d9c158cd..2782df47101e 100644 --- a/include/linux/mmzone.h +++ b/include/linux/mmzone.h @@ -474,16 +474,15 @@ struct zone { unsigned long wait_table_bits; ZONE_PADDING(_pad1_) - - /* Write-intensive fields used from the page allocator */ - spinlock_t lock; - /* free areas of different sizes */ struct free_area free_area[MAX_ORDER]; /* zone flags, see below */ unsigned long flags; + /* Write-intensive fields used from the page allocator */ + spinlock_t lock; + ZONE_PADDING(_pad2_) /* Write-intensive fields used by page reclaim */ diff --git a/include/linux/module.h b/include/linux/module.h index 42999fe2dbd0..b03485bcb82a 100644 --- a/include/linux/module.h +++ b/include/linux/module.h @@ -344,6 +344,10 @@ struct module { unsigned long *ftrace_callsites; #endif +#ifdef CONFIG_LIVEPATCH + bool klp_alive; +#endif + #ifdef CONFIG_MODULE_UNLOAD /* What modules depend on me? */ struct list_head source_list; diff --git a/include/linux/moduleloader.h b/include/linux/moduleloader.h index f7556261fe3c..4d0cb9bba93e 100644 --- a/include/linux/moduleloader.h +++ b/include/linux/moduleloader.h @@ -84,4 +84,12 @@ void module_arch_cleanup(struct module *mod); /* Any cleanup before freeing mod->module_init */ void module_arch_freeing_init(struct module *mod); + +#ifdef CONFIG_KASAN +#include <linux/kasan.h> +#define MODULE_ALIGN (PAGE_SIZE << KASAN_SHADOW_SCALE_SHIFT) +#else +#define MODULE_ALIGN PAGE_SIZE +#endif + #endif diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h index 5897b4ea5a3f..278738873703 100644 --- a/include/linux/netdevice.h +++ b/include/linux/netdevice.h @@ -965,9 +965,12 @@ typedef u16 (*select_queue_fallback_t)(struct net_device *dev, * Used to add FDB entries to dump requests. Implementers should add * entries to skb and update idx with the number of entries. * - * int (*ndo_bridge_setlink)(struct net_device *dev, struct nlmsghdr *nlh) + * int (*ndo_bridge_setlink)(struct net_device *dev, struct nlmsghdr *nlh, + * u16 flags) * int (*ndo_bridge_getlink)(struct sk_buff *skb, u32 pid, u32 seq, * struct net_device *dev, u32 filter_mask) + * int (*ndo_bridge_dellink)(struct net_device *dev, struct nlmsghdr *nlh, + * u16 flags); * * int (*ndo_change_carrier)(struct net_device *dev, bool new_carrier); * Called to change device carrier. Soft-devices (like dummy, team, etc) @@ -2182,6 +2185,12 @@ void netdev_freemem(struct net_device *dev); void synchronize_net(void); int init_dummy_netdev(struct net_device *dev); +DECLARE_PER_CPU(int, xmit_recursion); +static inline int dev_recursion_level(void) +{ + return this_cpu_read(xmit_recursion); +} + struct net_device *dev_get_by_index(struct net *net, int ifindex); struct net_device *__dev_get_by_index(struct net *net, int ifindex); struct net_device *dev_get_by_index_rcu(struct net *net, int ifindex); @@ -2342,6 +2351,7 @@ struct gro_remcsum { static inline void skb_gro_remcsum_init(struct gro_remcsum *grc) { + grc->offset = 0; grc->delta = 0; } diff --git a/include/linux/nfs_fs.h b/include/linux/nfs_fs.h index 2f77e0c651c8..b01ccf371fdc 100644 --- a/include/linux/nfs_fs.h +++ b/include/linux/nfs_fs.h @@ -343,6 +343,7 @@ extern struct inode *nfs_fhget(struct super_block *, struct nfs_fh *, extern int nfs_refresh_inode(struct inode *, struct nfs_fattr *); extern int nfs_post_op_update_inode(struct inode *inode, struct nfs_fattr *fattr); extern int nfs_post_op_update_inode_force_wcc(struct inode *inode, struct nfs_fattr *fattr); +extern int nfs_post_op_update_inode_force_wcc_locked(struct inode *inode, struct nfs_fattr *fattr); extern int nfs_getattr(struct vfsmount *, struct dentry *, struct kstat *); extern void nfs_access_add_cache(struct inode *, struct nfs_access_entry *); extern void nfs_access_set_mask(struct nfs_access_entry *, u32); @@ -355,8 +356,9 @@ extern int nfs_revalidate_inode(struct nfs_server *server, struct inode *inode); extern int nfs_revalidate_inode_rcu(struct nfs_server *server, struct inode *inode); extern int __nfs_revalidate_inode(struct nfs_server *, struct inode *); extern int nfs_revalidate_mapping(struct inode *inode, struct address_space *mapping); +extern int nfs_revalidate_mapping_protected(struct inode *inode, struct address_space *mapping); extern int nfs_setattr(struct dentry *, struct iattr *); -extern void nfs_setattr_update_inode(struct inode *inode, struct iattr *attr); +extern void nfs_setattr_update_inode(struct inode *inode, struct iattr *attr, struct nfs_fattr *); extern void nfs_setsecurity(struct inode *inode, struct nfs_fattr *fattr, struct nfs4_label *label); extern struct nfs_open_context *get_nfs_open_context(struct nfs_open_context *ctx); @@ -369,6 +371,7 @@ extern struct nfs_lock_context *nfs_get_lock_context(struct nfs_open_context *ct extern void nfs_put_lock_context(struct nfs_lock_context *l_ctx); extern u64 nfs_compat_user_ino64(u64 fileid); extern void nfs_fattr_init(struct nfs_fattr *fattr); +extern void nfs_fattr_set_barrier(struct nfs_fattr *fattr); extern unsigned long nfs_inc_attr_generation_counter(void); extern struct nfs_fattr *nfs_alloc_fattr(void); diff --git a/include/linux/of_platform.h b/include/linux/of_platform.h index 8a860f096c35..611a691145c4 100644 --- a/include/linux/of_platform.h +++ b/include/linux/of_platform.h @@ -84,7 +84,7 @@ static inline int of_platform_populate(struct device_node *root, static inline void of_platform_depopulate(struct device *parent) { } #endif -#ifdef CONFIG_OF_DYNAMIC +#if defined(CONFIG_OF_DYNAMIC) && defined(CONFIG_OF_ADDRESS) extern void of_platform_register_reconfig_notifier(void); #else static inline void of_platform_register_reconfig_notifier(void) { } diff --git a/include/linux/pinctrl/consumer.h b/include/linux/pinctrl/consumer.h index 72c0415d6c21..18eccefea06e 100644 --- a/include/linux/pinctrl/consumer.h +++ b/include/linux/pinctrl/consumer.h @@ -82,7 +82,7 @@ static inline int pinctrl_gpio_direction_output(unsigned gpio) static inline struct pinctrl * __must_check pinctrl_get(struct device *dev) { - return ERR_PTR(-ENOSYS); + return NULL; } static inline void pinctrl_put(struct pinctrl *p) @@ -93,7 +93,7 @@ static inline struct pinctrl_state * __must_check pinctrl_lookup_state( struct pinctrl *p, const char *name) { - return ERR_PTR(-ENOSYS); + return NULL; } static inline int pinctrl_select_state(struct pinctrl *p, @@ -104,7 +104,7 @@ static inline int pinctrl_select_state(struct pinctrl *p, static inline struct pinctrl * __must_check devm_pinctrl_get(struct device *dev) { - return ERR_PTR(-ENOSYS); + return NULL; } static inline void devm_pinctrl_put(struct pinctrl *p) diff --git a/include/linux/regulator/driver.h b/include/linux/regulator/driver.h index d4ad5b5a02bb..045f709cb89b 100644 --- a/include/linux/regulator/driver.h +++ b/include/linux/regulator/driver.h @@ -316,7 +316,7 @@ struct regulator_desc { * @driver_data: private regulator data * @of_node: OpenFirmware node to parse for device tree bindings (may be * NULL). - * @regmap: regmap to use for core regmap helpers if dev_get_regulator() is + * @regmap: regmap to use for core regmap helpers if dev_get_regmap() is * insufficient. * @ena_gpio_initialized: GPIO controlling regulator enable was properly * initialized, meaning that >= 0 is a valid gpio diff --git a/include/linux/rhashtable.h b/include/linux/rhashtable.h index 58851275fed9..d438eeb08bff 100644 --- a/include/linux/rhashtable.h +++ b/include/linux/rhashtable.h @@ -54,10 +54,11 @@ struct rhash_head { * @buckets: size * hash buckets */ struct bucket_table { - size_t size; - unsigned int locks_mask; - spinlock_t *locks; - struct rhash_head __rcu *buckets[]; + size_t size; + unsigned int locks_mask; + spinlock_t *locks; + + struct rhash_head __rcu *buckets[] ____cacheline_aligned_in_smp; }; typedef u32 (*rht_hashfn_t)(const void *data, u32 len, u32 seed); @@ -78,12 +79,6 @@ struct rhashtable; * @locks_mul: Number of bucket locks to allocate per cpu (default: 128) * @hashfn: Function to hash key * @obj_hashfn: Function to hash object - * @grow_decision: If defined, may return true if table should expand - * @shrink_decision: If defined, may return true if table should shrink - * - * Note: when implementing the grow and shrink decision function, min/max - * shift must be enforced, otherwise, resizing watermarks they set may be - * useless. */ struct rhashtable_params { size_t nelem_hint; @@ -97,10 +92,6 @@ struct rhashtable_params { size_t locks_mul; rht_hashfn_t hashfn; rht_obj_hashfn_t obj_hashfn; - bool (*grow_decision)(const struct rhashtable *ht, - size_t new_size); - bool (*shrink_decision)(const struct rhashtable *ht, - size_t new_size); }; /** @@ -192,9 +183,6 @@ int rhashtable_init(struct rhashtable *ht, struct rhashtable_params *params); void rhashtable_insert(struct rhashtable *ht, struct rhash_head *node); bool rhashtable_remove(struct rhashtable *ht, struct rhash_head *node); -bool rht_grow_above_75(const struct rhashtable *ht, size_t new_size); -bool rht_shrink_below_30(const struct rhashtable *ht, size_t new_size); - int rhashtable_expand(struct rhashtable *ht); int rhashtable_shrink(struct rhashtable *ht); diff --git a/include/linux/rtc.h b/include/linux/rtc.h index dcad7ee0d746..8dcf6825fa88 100644 --- a/include/linux/rtc.h +++ b/include/linux/rtc.h @@ -77,6 +77,7 @@ struct rtc_class_ops { int (*read_alarm)(struct device *, struct rtc_wkalrm *); int (*set_alarm)(struct device *, struct rtc_wkalrm *); int (*proc)(struct device *, struct seq_file *); + int (*set_mmss64)(struct device *, time64_t secs); int (*set_mmss)(struct device *, unsigned long secs); int (*read_callback)(struct device *, int data); int (*alarm_irq_enable)(struct device *, unsigned int enabled); diff --git a/include/linux/sched.h b/include/linux/sched.h index 6d77432e14ff..3f3308824fa4 100644 --- a/include/linux/sched.h +++ b/include/linux/sched.h @@ -176,6 +176,14 @@ extern void get_iowait_load(unsigned long *nr_waiters, unsigned long *load); extern void calc_global_load(unsigned long ticks); extern void update_cpu_load_nohz(void); +/* Notifier for when a task gets migrated to a new CPU */ +struct task_migration_notifier { + struct task_struct *task; + int from_cpu; + int to_cpu; +}; +extern void register_task_migration_notifier(struct notifier_block *n); + extern unsigned long get_parent_ip(unsigned long addr); extern void dump_cpu_task(int cpu); @@ -1115,15 +1123,28 @@ struct load_weight { }; struct sched_avg { + u64 last_runnable_update; + s64 decay_count; + /* + * utilization_avg_contrib describes the amount of time that a + * sched_entity is running on a CPU. It is based on running_avg_sum + * and is scaled in the range [0..SCHED_LOAD_SCALE]. + * load_avg_contrib described the amount of time that a sched_entity + * is runnable on a rq. It is based on both runnable_avg_sum and the + * weight of the task. + */ + unsigned long load_avg_contrib, utilization_avg_contrib; /* * These sums represent an infinite geometric series and so are bound * above by 1024/(1-y). Thus we only need a u32 to store them for all * choices of y < 1-2^(-32)*1024. + * running_avg_sum reflects the time that the sched_entity is + * effectively running on the CPU. + * runnable_avg_sum represents the amount of time a sched_entity is on + * a runqueue which includes the running time that is monitored by + * running_avg_sum. */ - u32 runnable_avg_sum, runnable_avg_period; - u64 last_runnable_update; - s64 decay_count; - unsigned long load_avg_contrib; + u32 runnable_avg_sum, avg_period, running_avg_sum; }; #ifdef CONFIG_SCHEDSTATS @@ -1625,11 +1646,11 @@ struct task_struct { /* * numa_faults_locality tracks if faults recorded during the last - * scan window were remote/local. The task scan period is adapted - * based on the locality of the faults with different weights - * depending on whether they were shared or private faults + * scan window were remote/local or failed to migrate. The task scan + * period is adapted based on the locality of the faults with different + * weights depending on whether they were shared or private faults */ - unsigned long numa_faults_locality[2]; + unsigned long numa_faults_locality[3]; unsigned long numa_pages_migrated; #endif /* CONFIG_NUMA_BALANCING */ @@ -1719,6 +1740,7 @@ struct task_struct { #define TNF_NO_GROUP 0x02 #define TNF_SHARED 0x04 #define TNF_FAULT_LOCAL 0x08 +#define TNF_MIGRATE_FAIL 0x10 #ifdef CONFIG_NUMA_BALANCING extern void task_numa_fault(int last_node, int node, int pages, int flags); diff --git a/include/linux/seqlock.h b/include/linux/seqlock.h index f5df8f687b4d..5f68d0a391ce 100644 --- a/include/linux/seqlock.h +++ b/include/linux/seqlock.h @@ -108,7 +108,7 @@ static inline unsigned __read_seqcount_begin(const seqcount_t *s) unsigned ret; repeat: - ret = ACCESS_ONCE(s->sequence); + ret = READ_ONCE(s->sequence); if (unlikely(ret & 1)) { cpu_relax(); goto repeat; @@ -127,7 +127,7 @@ repeat: */ static inline unsigned raw_read_seqcount(const seqcount_t *s) { - unsigned ret = ACCESS_ONCE(s->sequence); + unsigned ret = READ_ONCE(s->sequence); smp_rmb(); return ret; } @@ -179,7 +179,7 @@ static inline unsigned read_seqcount_begin(const seqcount_t *s) */ static inline unsigned raw_seqcount_begin(const seqcount_t *s) { - unsigned ret = ACCESS_ONCE(s->sequence); + unsigned ret = READ_ONCE(s->sequence); smp_rmb(); return ret & ~1; } diff --git a/include/linux/serial_core.h b/include/linux/serial_core.h index baf3e1d08416..d10965f0d8a4 100644 --- a/include/linux/serial_core.h +++ b/include/linux/serial_core.h @@ -143,13 +143,13 @@ struct uart_port { unsigned char iotype; /* io access style */ unsigned char unused1; -#define UPIO_PORT (0) /* 8b I/O port access */ -#define UPIO_HUB6 (1) /* Hub6 ISA card */ -#define UPIO_MEM (2) /* 8b MMIO access */ -#define UPIO_MEM32 (3) /* 32b little endian */ -#define UPIO_MEM32BE (4) /* 32b big endian */ -#define UPIO_AU (5) /* Au1x00 and RT288x type IO */ -#define UPIO_TSI (6) /* Tsi108/109 type IO */ +#define UPIO_PORT (SERIAL_IO_PORT) /* 8b I/O port access */ +#define UPIO_HUB6 (SERIAL_IO_HUB6) /* Hub6 ISA card */ +#define UPIO_MEM (SERIAL_IO_MEM) /* 8b MMIO access */ +#define UPIO_MEM32 (SERIAL_IO_MEM32) /* 32b little endian */ +#define UPIO_AU (SERIAL_IO_AU) /* Au1x00 and RT288x type IO */ +#define UPIO_TSI (SERIAL_IO_TSI) /* Tsi108/109 type IO */ +#define UPIO_MEM32BE (SERIAL_IO_MEM32BE) /* 32b big endian */ unsigned int read_status_mask; /* driver specific */ unsigned int ignore_status_mask; /* driver specific */ diff --git a/include/linux/skbuff.h b/include/linux/skbuff.h index 30007afe70b3..f54d6659713a 100644 --- a/include/linux/skbuff.h +++ b/include/linux/skbuff.h @@ -948,6 +948,13 @@ static inline void skb_copy_hash(struct sk_buff *to, const struct sk_buff *from) to->l4_hash = from->l4_hash; }; +static inline void skb_sender_cpu_clear(struct sk_buff *skb) +{ +#ifdef CONFIG_XPS + skb->sender_cpu = 0; +#endif +} + #ifdef NET_SKBUFF_DATA_USES_OFFSET static inline unsigned char *skb_end_pointer(const struct sk_buff *skb) { diff --git a/include/linux/spi/spi.h b/include/linux/spi/spi.h index ed9489d893a4..856d34dde79b 100644 --- a/include/linux/spi/spi.h +++ b/include/linux/spi/spi.h @@ -649,7 +649,7 @@ struct spi_transfer { * sequence completes. On some systems, many such sequences can execute as * as single programmed DMA transfer. On all systems, these messages are * queued, and might complete after transactions to other devices. Messages - * sent to a given spi_device are alway executed in FIFO order. + * sent to a given spi_device are always executed in FIFO order. * * The code that submits an spi_message (and its spi_transfers) * to the lower layers is responsible for managing its memory. diff --git a/include/linux/stddef.h b/include/linux/stddef.h index f4aec0e75c3a..076af437284d 100644 --- a/include/linux/stddef.h +++ b/include/linux/stddef.h @@ -19,3 +19,12 @@ enum { #define offsetof(TYPE, MEMBER) ((size_t) &((TYPE *)0)->MEMBER) #endif #endif + +/** + * offsetofend(TYPE, MEMBER) + * + * @TYPE: The type of the structure + * @MEMBER: The member within the structure to get the end offset of + */ +#define offsetofend(TYPE, MEMBER) \ + (offsetof(TYPE, MEMBER) + sizeof(((TYPE *)0)->MEMBER)) diff --git a/include/linux/sunrpc/debug.h b/include/linux/sunrpc/debug.h index c57d8ea0716c..59a7889e15db 100644 --- a/include/linux/sunrpc/debug.h +++ b/include/linux/sunrpc/debug.h @@ -60,17 +60,17 @@ struct rpc_xprt; #if IS_ENABLED(CONFIG_SUNRPC_DEBUG) void rpc_register_sysctl(void); void rpc_unregister_sysctl(void); -int sunrpc_debugfs_init(void); +void sunrpc_debugfs_init(void); void sunrpc_debugfs_exit(void); -int rpc_clnt_debugfs_register(struct rpc_clnt *); +void rpc_clnt_debugfs_register(struct rpc_clnt *); void rpc_clnt_debugfs_unregister(struct rpc_clnt *); -int rpc_xprt_debugfs_register(struct rpc_xprt *); +void rpc_xprt_debugfs_register(struct rpc_xprt *); void rpc_xprt_debugfs_unregister(struct rpc_xprt *); #else -static inline int +static inline void sunrpc_debugfs_init(void) { - return 0; + return; } static inline void @@ -79,10 +79,10 @@ sunrpc_debugfs_exit(void) return; } -static inline int +static inline void rpc_clnt_debugfs_register(struct rpc_clnt *clnt) { - return 0; + return; } static inline void @@ -91,10 +91,10 @@ rpc_clnt_debugfs_unregister(struct rpc_clnt *clnt) return; } -static inline int +static inline void rpc_xprt_debugfs_register(struct rpc_xprt *xprt) { - return 0; + return; } static inline void diff --git a/include/linux/tick.h b/include/linux/tick.h index 9c085dc12ae9..f8492da57ad3 100644 --- a/include/linux/tick.h +++ b/include/linux/tick.h @@ -1,7 +1,5 @@ -/* linux/include/linux/tick.h - * - * This file contains the structure definitions for tick related functions - * +/* + * Tick related global functions */ #ifndef _LINUX_TICK_H #define _LINUX_TICK_H @@ -9,149 +7,99 @@ #include <linux/clockchips.h> #include <linux/irqflags.h> #include <linux/percpu.h> -#include <linux/hrtimer.h> #include <linux/context_tracking_state.h> #include <linux/cpumask.h> #include <linux/sched.h> #ifdef CONFIG_GENERIC_CLOCKEVENTS - -enum tick_device_mode { - TICKDEV_MODE_PERIODIC, - TICKDEV_MODE_ONESHOT, -}; - -struct tick_device { - struct clock_event_device *evtdev; - enum tick_device_mode mode; -}; - -enum tick_nohz_mode { - NOHZ_MODE_INACTIVE, - NOHZ_MODE_LOWRES, - NOHZ_MODE_HIGHRES, -}; - -/** - * struct tick_sched - sched tick emulation and no idle tick control/stats - * @sched_timer: hrtimer to schedule the periodic tick in high - * resolution mode - * @last_tick: Store the last tick expiry time when the tick - * timer is modified for nohz sleeps. This is necessary - * to resume the tick timer operation in the timeline - * when the CPU returns from nohz sleep. - * @tick_stopped: Indicator that the idle tick has been stopped - * @idle_jiffies: jiffies at the entry to idle for idle time accounting - * @idle_calls: Total number of idle calls - * @idle_sleeps: Number of idle calls, where the sched tick was stopped - * @idle_entrytime: Time when the idle call was entered - * @idle_waketime: Time when the idle was interrupted - * @idle_exittime: Time when the idle state was left - * @idle_sleeptime: Sum of the time slept in idle with sched tick stopped - * @iowait_sleeptime: Sum of the time slept in idle with sched tick stopped, with IO outstanding - * @sleep_length: Duration of the current idle sleep - * @do_timer_lst: CPU was the last one doing do_timer before going idle - */ -struct tick_sched { - struct hrtimer sched_timer; - unsigned long check_clocks; - enum tick_nohz_mode nohz_mode; - ktime_t last_tick; - int inidle; - int tick_stopped; - unsigned long idle_jiffies; - unsigned long idle_calls; - unsigned long idle_sleeps; - int idle_active; - ktime_t idle_entrytime; - ktime_t idle_waketime; - ktime_t idle_exittime; - ktime_t idle_sleeptime; - ktime_t iowait_sleeptime; - ktime_t sleep_length; - unsigned long last_jiffies; - unsigned long next_jiffies; - ktime_t idle_expires; - int do_timer_last; -}; - extern void __init tick_init(void); -extern int tick_is_oneshot_available(void); -extern struct tick_device *tick_get_device(int cpu); - extern void tick_freeze(void); extern void tick_unfreeze(void); +/* Should be core only, but ARM BL switcher requires it */ +extern void tick_suspend_local(void); +/* Should be core only, but XEN resume magic and ARM BL switcher require it */ +extern void tick_resume_local(void); +extern void tick_handover_do_timer(void); +extern void tick_cleanup_dead_cpu(int cpu); +#else /* CONFIG_GENERIC_CLOCKEVENTS */ +static inline void tick_init(void) { } +static inline void tick_freeze(void) { } +static inline void tick_unfreeze(void) { } +static inline void tick_suspend_local(void) { } +static inline void tick_resume_local(void) { } +static inline void tick_handover_do_timer(void) { } +static inline void tick_cleanup_dead_cpu(int cpu) { } +#endif /* !CONFIG_GENERIC_CLOCKEVENTS */ -# ifdef CONFIG_HIGH_RES_TIMERS -extern int tick_init_highres(void); -extern int tick_program_event(ktime_t expires, int force); -extern void tick_setup_sched_timer(void); -# endif - -# if defined CONFIG_NO_HZ_COMMON || defined CONFIG_HIGH_RES_TIMERS -extern void tick_cancel_sched_timer(int cpu); -# else -static inline void tick_cancel_sched_timer(int cpu) { } -# endif - -# ifdef CONFIG_GENERIC_CLOCKEVENTS_BROADCAST -extern struct tick_device *tick_get_broadcast_device(void); -extern struct cpumask *tick_get_broadcast_mask(void); - -# ifdef CONFIG_TICK_ONESHOT -extern struct cpumask *tick_get_broadcast_oneshot_mask(void); -# endif - -# endif /* BROADCAST */ - -# ifdef CONFIG_TICK_ONESHOT -extern void tick_clock_notify(void); -extern int tick_check_oneshot_change(int allow_nohz); -extern struct tick_sched *tick_get_tick_sched(int cpu); +#ifdef CONFIG_TICK_ONESHOT extern void tick_irq_enter(void); -extern int tick_oneshot_mode_active(void); # ifndef arch_needs_cpu # define arch_needs_cpu() (0) # endif # else -static inline void tick_clock_notify(void) { } -static inline int tick_check_oneshot_change(int allow_nohz) { return 0; } static inline void tick_irq_enter(void) { } -static inline int tick_oneshot_mode_active(void) { return 0; } -# endif +#endif -#else /* CONFIG_GENERIC_CLOCKEVENTS */ -static inline void tick_init(void) { } -static inline void tick_freeze(void) { } -static inline void tick_unfreeze(void) { } -static inline void tick_cancel_sched_timer(int cpu) { } -static inline void tick_clock_notify(void) { } -static inline int tick_check_oneshot_change(int allow_nohz) { return 0; } -static inline void tick_irq_enter(void) { } -static inline int tick_oneshot_mode_active(void) { return 0; } -#endif /* !CONFIG_GENERIC_CLOCKEVENTS */ +#if defined(CONFIG_GENERIC_CLOCKEVENTS_BROADCAST) && defined(CONFIG_TICK_ONESHOT) +extern void hotplug_cpu__broadcast_tick_pull(int dead_cpu); +#else +static inline void hotplug_cpu__broadcast_tick_pull(int dead_cpu) { } +#endif -# ifdef CONFIG_NO_HZ_COMMON -DECLARE_PER_CPU(struct tick_sched, tick_cpu_sched); +enum tick_broadcast_mode { + TICK_BROADCAST_OFF, + TICK_BROADCAST_ON, + TICK_BROADCAST_FORCE, +}; + +enum tick_broadcast_state { + TICK_BROADCAST_EXIT, + TICK_BROADCAST_ENTER, +}; + +#ifdef CONFIG_GENERIC_CLOCKEVENTS_BROADCAST +extern void tick_broadcast_control(enum tick_broadcast_mode mode); +#else +static inline void tick_broadcast_control(enum tick_broadcast_mode mode) { } +#endif /* BROADCAST */ + +#if defined(CONFIG_GENERIC_CLOCKEVENTS_BROADCAST) && defined(CONFIG_TICK_ONESHOT) +extern int tick_broadcast_oneshot_control(enum tick_broadcast_state state); +#else +static inline int tick_broadcast_oneshot_control(enum tick_broadcast_state state) { return 0; } +#endif -static inline int tick_nohz_tick_stopped(void) +static inline void tick_broadcast_enable(void) +{ + tick_broadcast_control(TICK_BROADCAST_ON); +} +static inline void tick_broadcast_disable(void) +{ + tick_broadcast_control(TICK_BROADCAST_OFF); +} +static inline void tick_broadcast_force(void) +{ + tick_broadcast_control(TICK_BROADCAST_FORCE); +} +static inline int tick_broadcast_enter(void) { - return __this_cpu_read(tick_cpu_sched.tick_stopped); + return tick_broadcast_oneshot_control(TICK_BROADCAST_ENTER); +} +static inline void tick_broadcast_exit(void) +{ + tick_broadcast_oneshot_control(TICK_BROADCAST_EXIT); } +#ifdef CONFIG_NO_HZ_COMMON +extern int tick_nohz_tick_stopped(void); extern void tick_nohz_idle_enter(void); extern void tick_nohz_idle_exit(void); extern void tick_nohz_irq_exit(void); extern ktime_t tick_nohz_get_sleep_length(void); extern u64 get_cpu_idle_time_us(int cpu, u64 *last_update_time); extern u64 get_cpu_iowait_time_us(int cpu, u64 *last_update_time); - -# else /* !CONFIG_NO_HZ_COMMON */ -static inline int tick_nohz_tick_stopped(void) -{ - return 0; -} - +#else /* !CONFIG_NO_HZ_COMMON */ +static inline int tick_nohz_tick_stopped(void) { return 0; } static inline void tick_nohz_idle_enter(void) { } static inline void tick_nohz_idle_exit(void) { } @@ -163,7 +111,7 @@ static inline ktime_t tick_nohz_get_sleep_length(void) } static inline u64 get_cpu_idle_time_us(int cpu, u64 *unused) { return -1; } static inline u64 get_cpu_iowait_time_us(int cpu, u64 *unused) { return -1; } -# endif /* !CONFIG_NO_HZ_COMMON */ +#endif /* !CONFIG_NO_HZ_COMMON */ #ifdef CONFIG_NO_HZ_FULL extern bool tick_nohz_full_running; diff --git a/include/linux/timekeeper_internal.h b/include/linux/timekeeper_internal.h index 05af9a334893..fb86963859c7 100644 --- a/include/linux/timekeeper_internal.h +++ b/include/linux/timekeeper_internal.h @@ -16,16 +16,16 @@ * @read: Read function of @clock * @mask: Bitmask for two's complement subtraction of non 64bit clocks * @cycle_last: @clock cycle value at last update - * @mult: NTP adjusted multiplier for scaled math conversion + * @mult: (NTP adjusted) multiplier for scaled math conversion * @shift: Shift value for scaled math conversion * @xtime_nsec: Shifted (fractional) nano seconds offset for readout - * @base_mono: ktime_t (nanoseconds) base time for readout + * @base: ktime_t (nanoseconds) base time for readout * * This struct has size 56 byte on 64 bit. Together with a seqcount it * occupies a single 64byte cache line. * * The struct is separate from struct timekeeper as it is also used - * for a fast NMI safe accessor to clock monotonic. + * for a fast NMI safe accessors. */ struct tk_read_base { struct clocksource *clock; @@ -35,12 +35,13 @@ struct tk_read_base { u32 mult; u32 shift; u64 xtime_nsec; - ktime_t base_mono; + ktime_t base; }; /** * struct timekeeper - Structure holding internal timekeeping values. - * @tkr: The readout base structure + * @tkr_mono: The readout base structure for CLOCK_MONOTONIC + * @tkr_raw: The readout base structure for CLOCK_MONOTONIC_RAW * @xtime_sec: Current CLOCK_REALTIME time in seconds * @ktime_sec: Current CLOCK_MONOTONIC time in seconds * @wall_to_monotonic: CLOCK_REALTIME to CLOCK_MONOTONIC offset @@ -48,7 +49,6 @@ struct tk_read_base { * @offs_boot: Offset clock monotonic -> clock boottime * @offs_tai: Offset clock monotonic -> clock tai * @tai_offset: The current UTC to TAI offset in seconds - * @base_raw: Monotonic raw base time in ktime_t format * @raw_time: Monotonic raw base time in timespec64 format * @cycle_interval: Number of clock cycles in one NTP interval * @xtime_interval: Number of clock shifted nano seconds in one NTP @@ -76,7 +76,8 @@ struct tk_read_base { * used instead. */ struct timekeeper { - struct tk_read_base tkr; + struct tk_read_base tkr_mono; + struct tk_read_base tkr_raw; u64 xtime_sec; unsigned long ktime_sec; struct timespec64 wall_to_monotonic; @@ -84,7 +85,6 @@ struct timekeeper { ktime_t offs_boot; ktime_t offs_tai; s32 tai_offset; - ktime_t base_raw; struct timespec64 raw_time; /* The following members are for timekeeping internal use */ diff --git a/include/linux/timekeeping.h b/include/linux/timekeeping.h index 3eaae4754275..99176af216af 100644 --- a/include/linux/timekeeping.h +++ b/include/linux/timekeeping.h @@ -214,12 +214,18 @@ static inline u64 ktime_get_boot_ns(void) return ktime_to_ns(ktime_get_boottime()); } +static inline u64 ktime_get_tai_ns(void) +{ + return ktime_to_ns(ktime_get_clocktai()); +} + static inline u64 ktime_get_raw_ns(void) { return ktime_to_ns(ktime_get_raw()); } extern u64 ktime_get_mono_fast_ns(void); +extern u64 ktime_get_raw_fast_ns(void); /* * Timespec interfaces utilizing the ktime based ones @@ -242,6 +248,9 @@ static inline void timekeeping_clocktai(struct timespec *ts) /* * RTC specific */ +extern bool timekeeping_rtc_skipsuspend(void); +extern bool timekeeping_rtc_skipresume(void); + extern void timekeeping_inject_sleeptime64(struct timespec64 *delta); /* @@ -253,17 +262,14 @@ extern void getnstime_raw_and_real(struct timespec *ts_raw, /* * Persistent clock related interfaces */ -extern bool persistent_clock_exist; extern int persistent_clock_is_local; -static inline bool has_persistent_clock(void) -{ - return persistent_clock_exist; -} - extern void read_persistent_clock(struct timespec *ts); +extern void read_persistent_clock64(struct timespec64 *ts); extern void read_boot_clock(struct timespec *ts); +extern void read_boot_clock64(struct timespec64 *ts); extern int update_persistent_clock(struct timespec now); +extern int update_persistent_clock64(struct timespec64 now); #endif diff --git a/include/linux/uio.h b/include/linux/uio.h index 07a022641996..71880299ed48 100644 --- a/include/linux/uio.h +++ b/include/linux/uio.h @@ -98,6 +98,8 @@ ssize_t iov_iter_get_pages_alloc(struct iov_iter *i, struct page ***pages, size_t maxsize, size_t *start); int iov_iter_npages(const struct iov_iter *i, int maxpages); +const void *dup_iter(struct iov_iter *new, struct iov_iter *old, gfp_t flags); + static inline size_t iov_iter_count(struct iov_iter *i) { return i->count; diff --git a/include/linux/usb/serial.h b/include/linux/usb/serial.h index 9bb547c7bce7..704a1ab8240c 100644 --- a/include/linux/usb/serial.h +++ b/include/linux/usb/serial.h @@ -190,8 +190,7 @@ static inline void usb_set_serial_data(struct usb_serial *serial, void *data) * @num_ports: the number of different ports this device will have. * @bulk_in_size: minimum number of bytes to allocate for bulk-in buffer * (0 = end-point size) - * @bulk_out_size: minimum number of bytes to allocate for bulk-out buffer - * (0 = end-point size) + * @bulk_out_size: bytes to allocate for bulk-out buffer (0 = end-point size) * @calc_num_ports: pointer to a function to determine how many ports this * device has dynamically. It will be called after the probe() * callback is called, but before attach() diff --git a/include/linux/usb/usbnet.h b/include/linux/usb/usbnet.h index d9a4905e01d0..6e0ce8c7b8cb 100644 --- a/include/linux/usb/usbnet.h +++ b/include/linux/usb/usbnet.h @@ -227,9 +227,23 @@ struct skb_data { /* skb->cb is one of these */ struct urb *urb; struct usbnet *dev; enum skb_state state; - size_t length; + long length; + unsigned long packets; }; +/* Drivers that set FLAG_MULTI_PACKET must call this in their + * tx_fixup method before returning an skb. + */ +static inline void +usbnet_set_skb_tx_stats(struct sk_buff *skb, + unsigned long packets, long bytes_delta) +{ + struct skb_data *entry = (struct skb_data *) skb->cb; + + entry->packets = packets; + entry->length = bytes_delta; +} + extern int usbnet_open(struct net_device *net); extern int usbnet_stop(struct net_device *net); extern netdev_tx_t usbnet_start_xmit(struct sk_buff *skb, diff --git a/include/linux/vfio.h b/include/linux/vfio.h index 2d67b8998fd8..049b2f497bc7 100644 --- a/include/linux/vfio.h +++ b/include/linux/vfio.h @@ -78,19 +78,6 @@ extern int vfio_register_iommu_driver(const struct vfio_iommu_driver_ops *ops); extern void vfio_unregister_iommu_driver( const struct vfio_iommu_driver_ops *ops); -/** - * offsetofend(TYPE, MEMBER) - * - * @TYPE: The type of the structure - * @MEMBER: The member within the structure to get the end offset of - * - * Simple helper macro for dealing with variable sized structures passed - * from user space. This allows us to easily determine if the provided - * structure is sized to include various fields. - */ -#define offsetofend(TYPE, MEMBER) \ - (offsetof(TYPE, MEMBER) + sizeof(((TYPE *)0)->MEMBER)) - /* * External user API */ diff --git a/include/linux/vmalloc.h b/include/linux/vmalloc.h index 7d7acb35603d..0ec598381f97 100644 --- a/include/linux/vmalloc.h +++ b/include/linux/vmalloc.h @@ -17,6 +17,7 @@ struct vm_area_struct; /* vma defining user mapping in mm_types.h */ #define VM_VPAGES 0x00000010 /* buffer for pages was vmalloc'ed */ #define VM_UNINITIALIZED 0x00000020 /* vm_struct is not fully initialized */ #define VM_NO_GUARD 0x00000040 /* don't add guard page */ +#define VM_KASAN 0x00000080 /* has allocated kasan shadow memory */ /* bits [20..32] reserved for arch specific ioremap internals */ /* diff --git a/include/linux/workqueue.h b/include/linux/workqueue.h index 74db135f9957..f597846ff605 100644 --- a/include/linux/workqueue.h +++ b/include/linux/workqueue.h @@ -70,7 +70,8 @@ enum { /* data contains off-queue information when !WORK_STRUCT_PWQ */ WORK_OFFQ_FLAG_BASE = WORK_STRUCT_COLOR_SHIFT, - WORK_OFFQ_CANCELING = (1 << WORK_OFFQ_FLAG_BASE), + __WORK_OFFQ_CANCELING = WORK_OFFQ_FLAG_BASE, + WORK_OFFQ_CANCELING = (1 << __WORK_OFFQ_CANCELING), /* * When a work item is off queue, its high bits point to the last diff --git a/include/linux/writeback.h b/include/linux/writeback.h index 00048339c23e..b2dd371ec0ca 100644 --- a/include/linux/writeback.h +++ b/include/linux/writeback.h @@ -130,6 +130,7 @@ extern int vm_dirty_ratio; extern unsigned long vm_dirty_bytes; extern unsigned int dirty_writeback_interval; extern unsigned int dirty_expire_interval; +extern unsigned int dirtytime_expire_interval; extern int vm_highmem_is_dirtyable; extern int block_dump; extern int laptop_mode; @@ -146,6 +147,8 @@ extern int dirty_ratio_handler(struct ctl_table *table, int write, extern int dirty_bytes_handler(struct ctl_table *table, int write, void __user *buffer, size_t *lenp, loff_t *ppos); +int dirtytime_interval_handler(struct ctl_table *table, int write, + void __user *buffer, size_t *lenp, loff_t *ppos); struct ctl_table; int dirty_writeback_centisecs_handler(struct ctl_table *, int, diff --git a/include/media/atmel-isi.h b/include/media/atmel-isi.h index c2e570336269..6008b0985b7b 100644 --- a/include/media/atmel-isi.h +++ b/include/media/atmel-isi.h @@ -59,6 +59,10 @@ #define ISI_CFG1_FRATE_DIV_MASK (7 << 8) #define ISI_CFG1_DISCR (1 << 11) #define ISI_CFG1_FULL_MODE (1 << 12) +/* Definition for THMASK(ISI_V2) */ +#define ISI_CFG1_THMASK_BEATS_4 (0 << 13) +#define ISI_CFG1_THMASK_BEATS_8 (1 << 13) +#define ISI_CFG1_THMASK_BEATS_16 (2 << 13) /* Bitfields in CFG2 */ #define ISI_CFG2_GRAYSCALE (1 << 13) diff --git a/include/net/caif/cfpkt.h b/include/net/caif/cfpkt.h index 1c1ad46250d5..fe328c52c46b 100644 --- a/include/net/caif/cfpkt.h +++ b/include/net/caif/cfpkt.h @@ -171,7 +171,7 @@ struct cfpkt *cfpkt_split(struct cfpkt *pkt, u16 pos); * @return Checksum of buffer. */ -u16 cfpkt_iterate(struct cfpkt *pkt, +int cfpkt_iterate(struct cfpkt *pkt, u16 (*iter_func)(u16 chks, void *buf, u16 len), u16 data); diff --git a/include/net/dst.h b/include/net/dst.h index a8ae4e760778..0fb99a26e973 100644 --- a/include/net/dst.h +++ b/include/net/dst.h @@ -481,6 +481,7 @@ void dst_init(void); enum { XFRM_LOOKUP_ICMP = 1 << 0, XFRM_LOOKUP_QUEUE = 1 << 1, + XFRM_LOOKUP_KEEP_DST_REF = 1 << 2, }; struct flowi; diff --git a/include/net/ip.h b/include/net/ip.h index 025c61c0dffb..6cc1eafb153a 100644 --- a/include/net/ip.h +++ b/include/net/ip.h @@ -453,22 +453,6 @@ static __inline__ void inet_reset_saddr(struct sock *sk) #endif -static inline int sk_mc_loop(struct sock *sk) -{ - if (!sk) - return 1; - switch (sk->sk_family) { - case AF_INET: - return inet_sk(sk)->mc_loop; -#if IS_ENABLED(CONFIG_IPV6) - case AF_INET6: - return inet6_sk(sk)->mc_loop; -#endif - } - WARN_ON(1); - return 1; -} - bool ip_call_ra_chain(struct sk_buff *skb); /* diff --git a/include/net/ip6_route.h b/include/net/ip6_route.h index 1d09b46c1e48..eda131d179d9 100644 --- a/include/net/ip6_route.h +++ b/include/net/ip6_route.h @@ -174,7 +174,8 @@ int ip6_fragment(struct sk_buff *skb, int (*output)(struct sk_buff *)); static inline int ip6_skb_dst_mtu(struct sk_buff *skb) { - struct ipv6_pinfo *np = skb->sk ? inet6_sk(skb->sk) : NULL; + struct ipv6_pinfo *np = skb->sk && !dev_recursion_level() ? + inet6_sk(skb->sk) : NULL; return (np && np->pmtudisc >= IPV6_PMTUDISC_PROBE) ? skb_dst(skb)->dev->mtu : dst_mtu(skb_dst(skb)); diff --git a/include/net/netfilter/nf_log.h b/include/net/netfilter/nf_log.h index 534e1f2ac4fc..57639fca223a 100644 --- a/include/net/netfilter/nf_log.h +++ b/include/net/netfilter/nf_log.h @@ -79,6 +79,16 @@ void nf_log_packet(struct net *net, const struct nf_loginfo *li, const char *fmt, ...); +__printf(8, 9) +void nf_log_trace(struct net *net, + u_int8_t pf, + unsigned int hooknum, + const struct sk_buff *skb, + const struct net_device *in, + const struct net_device *out, + const struct nf_loginfo *li, + const char *fmt, ...); + struct nf_log_buf; struct nf_log_buf *nf_log_buf_open(void); diff --git a/include/net/netfilter/nf_tables.h b/include/net/netfilter/nf_tables.h index 9eaaa7884586..decb9a095ae7 100644 --- a/include/net/netfilter/nf_tables.h +++ b/include/net/netfilter/nf_tables.h @@ -119,6 +119,22 @@ int nft_validate_data_load(const struct nft_ctx *ctx, enum nft_registers reg, const struct nft_data *data, enum nft_data_types type); + +/** + * struct nft_userdata - user defined data associated with an object + * + * @len: length of the data + * @data: content + * + * The presence of user data is indicated in an object specific fashion, + * so a length of zero can't occur and the value "len" indicates data + * of length len + 1. + */ +struct nft_userdata { + u8 len; + unsigned char data[0]; +}; + /** * struct nft_set_elem - generic representation of set elements * @@ -380,7 +396,7 @@ static inline void *nft_expr_priv(const struct nft_expr *expr) * @handle: rule handle * @genmask: generation mask * @dlen: length of expression data - * @ulen: length of user data (used for comments) + * @udata: user data is appended to the rule * @data: expression data */ struct nft_rule { @@ -388,7 +404,7 @@ struct nft_rule { u64 handle:42, genmask:2, dlen:12, - ulen:8; + udata:1; unsigned char data[] __attribute__((aligned(__alignof__(struct nft_expr)))); }; @@ -476,7 +492,7 @@ static inline struct nft_expr *nft_expr_last(const struct nft_rule *rule) return (struct nft_expr *)&rule->data[rule->dlen]; } -static inline void *nft_userdata(const struct nft_rule *rule) +static inline struct nft_userdata *nft_userdata(const struct nft_rule *rule) { return (void *)&rule->data[rule->dlen]; } diff --git a/include/net/sock.h b/include/net/sock.h index ab186b1d31ff..e4079c28e6b8 100644 --- a/include/net/sock.h +++ b/include/net/sock.h @@ -1762,6 +1762,8 @@ struct dst_entry *__sk_dst_check(struct sock *sk, u32 cookie); struct dst_entry *sk_dst_check(struct sock *sk, u32 cookie); +bool sk_mc_loop(struct sock *sk); + static inline bool sk_can_gso(const struct sock *sk) { return net_gso_ok(sk->sk_route_caps, sk->sk_gso_type); diff --git a/include/net/vxlan.h b/include/net/vxlan.h index eabd3a038674..c73e7abbbaa5 100644 --- a/include/net/vxlan.h +++ b/include/net/vxlan.h @@ -91,6 +91,7 @@ struct vxlanhdr { #define VXLAN_N_VID (1u << 24) #define VXLAN_VID_MASK (VXLAN_N_VID - 1) +#define VXLAN_VNI_MASK (VXLAN_VID_MASK << 8) #define VXLAN_HLEN (sizeof(struct udphdr) + sizeof(struct vxlanhdr)) struct vxlan_metadata { diff --git a/include/soc/at91/at91sam9_ddrsdr.h b/include/soc/at91/at91sam9_ddrsdr.h index 0210797abf2e..dc10c52e0e91 100644 --- a/include/soc/at91/at91sam9_ddrsdr.h +++ b/include/soc/at91/at91sam9_ddrsdr.h @@ -92,7 +92,7 @@ #define AT91_DDRSDRC_UPD_MR (3 << 20) /* Update load mode register and extended mode register */ #define AT91_DDRSDRC_MDR 0x20 /* Memory Device Register */ -#define AT91_DDRSDRC_MD (3 << 0) /* Memory Device Type */ +#define AT91_DDRSDRC_MD (7 << 0) /* Memory Device Type */ #define AT91_DDRSDRC_MD_SDR 0 #define AT91_DDRSDRC_MD_LOW_POWER_SDR 1 #define AT91_DDRSDRC_MD_LOW_POWER_DDR 3 diff --git a/include/target/target_core_backend.h b/include/target/target_core_backend.h index db81c65b8f48..d61be7297b2c 100644 --- a/include/target/target_core_backend.h +++ b/include/target/target_core_backend.h @@ -111,6 +111,7 @@ void array_free(void *array, int n); void target_core_setup_sub_cits(struct se_subsystem_api *); /* attribute helpers from target_core_device.c for backend drivers */ +bool se_dev_check_wce(struct se_device *); int se_dev_set_max_unmap_lba_count(struct se_device *, u32); int se_dev_set_max_unmap_block_desc_count(struct se_device *, u32); int se_dev_set_unmap_granularity(struct se_device *, u32); diff --git a/include/trace/events/regmap.h b/include/trace/events/regmap.h index 23d561512f64..22317d2b52ab 100644 --- a/include/trace/events/regmap.h +++ b/include/trace/events/regmap.h @@ -7,27 +7,26 @@ #include <linux/ktime.h> #include <linux/tracepoint.h> -struct device; -struct regmap; +#include "../../../drivers/base/regmap/internal.h" /* * Log register events */ DECLARE_EVENT_CLASS(regmap_reg, - TP_PROTO(struct device *dev, unsigned int reg, + TP_PROTO(struct regmap *map, unsigned int reg, unsigned int val), - TP_ARGS(dev, reg, val), + TP_ARGS(map, reg, val), TP_STRUCT__entry( - __string( name, dev_name(dev) ) - __field( unsigned int, reg ) - __field( unsigned int, val ) + __string( name, regmap_name(map) ) + __field( unsigned int, reg ) + __field( unsigned int, val ) ), TP_fast_assign( - __assign_str(name, dev_name(dev)); + __assign_str(name, regmap_name(map)); __entry->reg = reg; __entry->val = val; ), @@ -39,45 +38,45 @@ DECLARE_EVENT_CLASS(regmap_reg, DEFINE_EVENT(regmap_reg, regmap_reg_write, - TP_PROTO(struct device *dev, unsigned int reg, + TP_PROTO(struct regmap *map, unsigned int reg, unsigned int val), - TP_ARGS(dev, reg, val) + TP_ARGS(map, reg, val) ); DEFINE_EVENT(regmap_reg, regmap_reg_read, - TP_PROTO(struct device *dev, unsigned int reg, + TP_PROTO(struct regmap *map, unsigned int reg, unsigned int val), - TP_ARGS(dev, reg, val) + TP_ARGS(map, reg, val) ); DEFINE_EVENT(regmap_reg, regmap_reg_read_cache, - TP_PROTO(struct device *dev, unsigned int reg, + TP_PROTO(struct regmap *map, unsigned int reg, unsigned int val), - TP_ARGS(dev, reg, val) + TP_ARGS(map, reg, val) ); DECLARE_EVENT_CLASS(regmap_block, - TP_PROTO(struct device *dev, unsigned int reg, int count), + TP_PROTO(struct regmap *map, unsigned int reg, int count), - TP_ARGS(dev, reg, count), + TP_ARGS(map, reg, count), TP_STRUCT__entry( - __string( name, dev_name(dev) ) - __field( unsigned int, reg ) - __field( int, count ) + __string( name, regmap_name(map) ) + __field( unsigned int, reg ) + __field( int, count ) ), TP_fast_assign( - __assign_str(name, dev_name(dev)); + __assign_str(name, regmap_name(map)); __entry->reg = reg; __entry->count = count; ), @@ -89,48 +88,48 @@ DECLARE_EVENT_CLASS(regmap_block, DEFINE_EVENT(regmap_block, regmap_hw_read_start, - TP_PROTO(struct device *dev, unsigned int reg, int count), + TP_PROTO(struct regmap *map, unsigned int reg, int count), - TP_ARGS(dev, reg, count) + TP_ARGS(map, reg, count) ); DEFINE_EVENT(regmap_block, regmap_hw_read_done, - TP_PROTO(struct device *dev, unsigned int reg, int count), + TP_PROTO(struct regmap *map, unsigned int reg, int count), - TP_ARGS(dev, reg, count) + TP_ARGS(map, reg, count) ); DEFINE_EVENT(regmap_block, regmap_hw_write_start, - TP_PROTO(struct device *dev, unsigned int reg, int count), + TP_PROTO(struct regmap *map, unsigned int reg, int count), - TP_ARGS(dev, reg, count) + TP_ARGS(map, reg, count) ); DEFINE_EVENT(regmap_block, regmap_hw_write_done, - TP_PROTO(struct device *dev, unsigned int reg, int count), + TP_PROTO(struct regmap *map, unsigned int reg, int count), - TP_ARGS(dev, reg, count) + TP_ARGS(map, reg, count) ); TRACE_EVENT(regcache_sync, - TP_PROTO(struct device *dev, const char *type, + TP_PROTO(struct regmap *map, const char *type, const char *status), - TP_ARGS(dev, type, status), + TP_ARGS(map, type, status), TP_STRUCT__entry( - __string( name, dev_name(dev) ) - __string( status, status ) - __string( type, type ) - __field( int, type ) + __string( name, regmap_name(map) ) + __string( status, status ) + __string( type, type ) + __field( int, type ) ), TP_fast_assign( - __assign_str(name, dev_name(dev)); + __assign_str(name, regmap_name(map)); __assign_str(status, status); __assign_str(type, type); ), @@ -141,17 +140,17 @@ TRACE_EVENT(regcache_sync, DECLARE_EVENT_CLASS(regmap_bool, - TP_PROTO(struct device *dev, bool flag), + TP_PROTO(struct regmap *map, bool flag), - TP_ARGS(dev, flag), + TP_ARGS(map, flag), TP_STRUCT__entry( - __string( name, dev_name(dev) ) - __field( int, flag ) + __string( name, regmap_name(map) ) + __field( int, flag ) ), TP_fast_assign( - __assign_str(name, dev_name(dev)); + __assign_str(name, regmap_name(map)); __entry->flag = flag; ), @@ -161,32 +160,32 @@ DECLARE_EVENT_CLASS(regmap_bool, DEFINE_EVENT(regmap_bool, regmap_cache_only, - TP_PROTO(struct device *dev, bool flag), + TP_PROTO(struct regmap *map, bool flag), - TP_ARGS(dev, flag) + TP_ARGS(map, flag) ); DEFINE_EVENT(regmap_bool, regmap_cache_bypass, - TP_PROTO(struct device *dev, bool flag), + TP_PROTO(struct regmap *map, bool flag), - TP_ARGS(dev, flag) + TP_ARGS(map, flag) ); DECLARE_EVENT_CLASS(regmap_async, - TP_PROTO(struct device *dev), + TP_PROTO(struct regmap *map), - TP_ARGS(dev), + TP_ARGS(map), TP_STRUCT__entry( - __string( name, dev_name(dev) ) + __string( name, regmap_name(map) ) ), TP_fast_assign( - __assign_str(name, dev_name(dev)); + __assign_str(name, regmap_name(map)); ), TP_printk("%s", __get_str(name)) @@ -194,50 +193,50 @@ DECLARE_EVENT_CLASS(regmap_async, DEFINE_EVENT(regmap_block, regmap_async_write_start, - TP_PROTO(struct device *dev, unsigned int reg, int count), + TP_PROTO(struct regmap *map, unsigned int reg, int count), - TP_ARGS(dev, reg, count) + TP_ARGS(map, reg, count) ); DEFINE_EVENT(regmap_async, regmap_async_io_complete, - TP_PROTO(struct device *dev), + TP_PROTO(struct regmap *map), - TP_ARGS(dev) + TP_ARGS(map) ); DEFINE_EVENT(regmap_async, regmap_async_complete_start, - TP_PROTO(struct device *dev), + TP_PROTO(struct regmap *map), - TP_ARGS(dev) + TP_ARGS(map) ); DEFINE_EVENT(regmap_async, regmap_async_complete_done, - TP_PROTO(struct device *dev), + TP_PROTO(struct regmap *map), - TP_ARGS(dev) + TP_ARGS(map) ); TRACE_EVENT(regcache_drop_region, - TP_PROTO(struct device *dev, unsigned int from, + TP_PROTO(struct regmap *map, unsigned int from, unsigned int to), - TP_ARGS(dev, from, to), + TP_ARGS(map, from, to), TP_STRUCT__entry( - __string( name, dev_name(dev) ) - __field( unsigned int, from ) - __field( unsigned int, to ) + __string( name, regmap_name(map) ) + __field( unsigned int, from ) + __field( unsigned int, to ) ), TP_fast_assign( - __assign_str(name, dev_name(dev)); + __assign_str(name, regmap_name(map)); __entry->from = from; __entry->to = to; ), diff --git a/include/uapi/linux/input.h b/include/uapi/linux/input.h index b0a813079852..2f62ab2d7bf9 100644 --- a/include/uapi/linux/input.h +++ b/include/uapi/linux/input.h @@ -973,7 +973,8 @@ struct input_keymap_entry { */ #define MT_TOOL_FINGER 0 #define MT_TOOL_PEN 1 -#define MT_TOOL_MAX 1 +#define MT_TOOL_PALM 2 +#define MT_TOOL_MAX 2 /* * Values describing the status of a force-feedback effect diff --git a/include/uapi/linux/kvm.h b/include/uapi/linux/kvm.h index 805570650062..f574d7be7631 100644 --- a/include/uapi/linux/kvm.h +++ b/include/uapi/linux/kvm.h @@ -147,6 +147,16 @@ struct kvm_pit_config { #define KVM_PIT_SPEAKER_DUMMY 1 +struct kvm_s390_skeys { + __u64 start_gfn; + __u64 count; + __u64 skeydata_addr; + __u32 flags; + __u32 reserved[9]; +}; +#define KVM_S390_GET_SKEYS_NONE 1 +#define KVM_S390_SKEYS_MAX 1048576 + #define KVM_EXIT_UNKNOWN 0 #define KVM_EXIT_EXCEPTION 1 #define KVM_EXIT_IO 2 @@ -172,6 +182,7 @@ struct kvm_pit_config { #define KVM_EXIT_S390_TSCH 22 #define KVM_EXIT_EPR 23 #define KVM_EXIT_SYSTEM_EVENT 24 +#define KVM_EXIT_S390_STSI 25 /* For KVM_EXIT_INTERNAL_ERROR */ /* Emulate instruction failed. */ @@ -309,6 +320,15 @@ struct kvm_run { __u32 type; __u64 flags; } system_event; + /* KVM_EXIT_S390_STSI */ + struct { + __u64 addr; + __u8 ar; + __u8 reserved; + __u8 fc; + __u8 sel1; + __u16 sel2; + } s390_stsi; /* Fix the size of the union. */ char padding[256]; }; @@ -324,7 +344,7 @@ struct kvm_run { __u64 kvm_dirty_regs; union { struct kvm_sync_regs regs; - char padding[1024]; + char padding[2048]; } s; }; @@ -365,6 +385,24 @@ struct kvm_translation { __u8 pad[5]; }; +/* for KVM_S390_MEM_OP */ +struct kvm_s390_mem_op { + /* in */ + __u64 gaddr; /* the guest address */ + __u64 flags; /* flags */ + __u32 size; /* amount of bytes */ + __u32 op; /* type of operation */ + __u64 buf; /* buffer in userspace */ + __u8 ar; /* the access register number */ + __u8 reserved[31]; /* should be set to 0 */ +}; +/* types for kvm_s390_mem_op->op */ +#define KVM_S390_MEMOP_LOGICAL_READ 0 +#define KVM_S390_MEMOP_LOGICAL_WRITE 1 +/* flags for kvm_s390_mem_op->flags */ +#define KVM_S390_MEMOP_F_CHECK_ONLY (1ULL << 0) +#define KVM_S390_MEMOP_F_INJECT_EXCEPTION (1ULL << 1) + /* for KVM_INTERRUPT */ struct kvm_interrupt { /* in */ @@ -520,6 +558,13 @@ struct kvm_s390_irq { } u; }; +struct kvm_s390_irq_state { + __u64 buf; + __u32 flags; + __u32 len; + __u32 reserved[4]; +}; + /* for KVM_SET_GUEST_DEBUG */ #define KVM_GUESTDBG_ENABLE 0x00000001 @@ -760,6 +805,14 @@ struct kvm_ppc_smmu_info { #define KVM_CAP_PPC_ENABLE_HCALL 104 #define KVM_CAP_CHECK_EXTENSION_VM 105 #define KVM_CAP_S390_USER_SIGP 106 +#define KVM_CAP_S390_VECTOR_REGISTERS 107 +#define KVM_CAP_S390_MEM_OP 108 +#define KVM_CAP_S390_USER_STSI 109 +#define KVM_CAP_S390_SKEYS 110 +#define KVM_CAP_MIPS_FPU 111 +#define KVM_CAP_MIPS_MSA 112 +#define KVM_CAP_S390_INJECT_IRQ 113 +#define KVM_CAP_S390_IRQ_STATE 114 #ifdef KVM_CAP_IRQ_ROUTING @@ -1135,6 +1188,16 @@ struct kvm_s390_ucas_mapping { #define KVM_ARM_VCPU_INIT _IOW(KVMIO, 0xae, struct kvm_vcpu_init) #define KVM_ARM_PREFERRED_TARGET _IOR(KVMIO, 0xaf, struct kvm_vcpu_init) #define KVM_GET_REG_LIST _IOWR(KVMIO, 0xb0, struct kvm_reg_list) +/* Available with KVM_CAP_S390_MEM_OP */ +#define KVM_S390_MEM_OP _IOW(KVMIO, 0xb1, struct kvm_s390_mem_op) +/* Available with KVM_CAP_S390_SKEYS */ +#define KVM_S390_GET_SKEYS _IOW(KVMIO, 0xb2, struct kvm_s390_skeys) +#define KVM_S390_SET_SKEYS _IOW(KVMIO, 0xb3, struct kvm_s390_skeys) +/* Available with KVM_CAP_S390_INJECT_IRQ */ +#define KVM_S390_IRQ _IOW(KVMIO, 0xb4, struct kvm_s390_irq) +/* Available with KVM_CAP_S390_IRQ_STATE */ +#define KVM_S390_SET_IRQ_STATE _IOW(KVMIO, 0xb5, struct kvm_s390_irq_state) +#define KVM_S390_GET_IRQ_STATE _IOW(KVMIO, 0xb6, struct kvm_s390_irq_state) #define KVM_DEV_ASSIGN_ENABLE_IOMMU (1 << 0) #define KVM_DEV_ASSIGN_PCI_2_3 (1 << 1) diff --git a/include/uapi/linux/nfsd/export.h b/include/uapi/linux/nfsd/export.h index 4742f2cb42f2..d3bd6ffec041 100644 --- a/include/uapi/linux/nfsd/export.h +++ b/include/uapi/linux/nfsd/export.h @@ -47,7 +47,7 @@ * exported filesystem. */ #define NFSEXP_V4ROOT 0x10000 -#define NFSEXP_NOPNFS 0x20000 +#define NFSEXP_PNFS 0x20000 /* All flags that we claim to support. (Note we don't support NOACL.) */ #define NFSEXP_ALLFLAGS 0x3FE7F diff --git a/include/uapi/linux/serial.h b/include/uapi/linux/serial.h index 5e0d0ed61cf3..25331f9faa76 100644 --- a/include/uapi/linux/serial.h +++ b/include/uapi/linux/serial.h @@ -65,6 +65,10 @@ struct serial_struct { #define SERIAL_IO_PORT 0 #define SERIAL_IO_HUB6 1 #define SERIAL_IO_MEM 2 +#define SERIAL_IO_MEM32 3 +#define SERIAL_IO_AU 4 +#define SERIAL_IO_TSI 5 +#define SERIAL_IO_MEM32BE 6 #define UART_CLEAR_FIFO 0x01 #define UART_USE_FIFO 0x02 diff --git a/include/uapi/linux/tc_act/Kbuild b/include/uapi/linux/tc_act/Kbuild index 19d5219b0b99..242cf0c6e33d 100644 --- a/include/uapi/linux/tc_act/Kbuild +++ b/include/uapi/linux/tc_act/Kbuild @@ -9,3 +9,4 @@ header-y += tc_pedit.h header-y += tc_skbedit.h header-y += tc_vlan.h header-y += tc_bpf.h +header-y += tc_connmark.h diff --git a/include/uapi/linux/virtio_blk.h b/include/uapi/linux/virtio_blk.h index 3c53eec4ae22..19c66fcbab8a 100644 --- a/include/uapi/linux/virtio_blk.h +++ b/include/uapi/linux/virtio_blk.h @@ -60,7 +60,7 @@ struct virtio_blk_config { __u32 size_max; /* The maximum number of segments (if VIRTIO_BLK_F_SEG_MAX) */ __u32 seg_max; - /* geometry the device (if VIRTIO_BLK_F_GEOMETRY) */ + /* geometry of the device (if VIRTIO_BLK_F_GEOMETRY) */ struct virtio_blk_geometry { __u16 cylinders; __u8 heads; @@ -119,7 +119,11 @@ struct virtio_blk_config { #define VIRTIO_BLK_T_BARRIER 0x80000000 #endif /* !VIRTIO_BLK_NO_LEGACY */ -/* This is the first element of the read scatter-gather list. */ +/* + * This comes first in the read scatter-gather list. + * For legacy virtio, if VIRTIO_F_ANY_LAYOUT is not negotiated, + * this is the first element of the read scatter-gather list. + */ struct virtio_blk_outhdr { /* VIRTIO_BLK_T* */ __virtio32 type; diff --git a/include/uapi/linux/virtio_scsi.h b/include/uapi/linux/virtio_scsi.h index 42b9370771b0..cc18ef8825c0 100644 --- a/include/uapi/linux/virtio_scsi.h +++ b/include/uapi/linux/virtio_scsi.h @@ -29,8 +29,16 @@ #include <linux/virtio_types.h> -#define VIRTIO_SCSI_CDB_SIZE 32 -#define VIRTIO_SCSI_SENSE_SIZE 96 +/* Default values of the CDB and sense data size configuration fields */ +#define VIRTIO_SCSI_CDB_DEFAULT_SIZE 32 +#define VIRTIO_SCSI_SENSE_DEFAULT_SIZE 96 + +#ifndef VIRTIO_SCSI_CDB_SIZE +#define VIRTIO_SCSI_CDB_SIZE VIRTIO_SCSI_CDB_DEFAULT_SIZE +#endif +#ifndef VIRTIO_SCSI_SENSE_SIZE +#define VIRTIO_SCSI_SENSE_SIZE VIRTIO_SCSI_SENSE_DEFAULT_SIZE +#endif /* SCSI command request, followed by data-out */ struct virtio_scsi_cmd_req { diff --git a/include/video/omapdss.h b/include/video/omapdss.h index 60de61fea8e3..c8ed15daad02 100644 --- a/include/video/omapdss.h +++ b/include/video/omapdss.h @@ -689,6 +689,7 @@ struct omapdss_dsi_ops { }; struct omap_dss_device { + struct kobject kobj; struct device *dev; struct module *owner; diff --git a/include/xen/xenbus.h b/include/xen/xenbus.h index b78f21caf55a..b0f1c9e5d687 100644 --- a/include/xen/xenbus.h +++ b/include/xen/xenbus.h @@ -114,9 +114,9 @@ int __must_check __xenbus_register_backend(struct xenbus_driver *drv, const char *mod_name); #define xenbus_register_frontend(drv) \ - __xenbus_register_frontend(drv, THIS_MODULE, KBUILD_MODNAME); + __xenbus_register_frontend(drv, THIS_MODULE, KBUILD_MODNAME) #define xenbus_register_backend(drv) \ - __xenbus_register_backend(drv, THIS_MODULE, KBUILD_MODNAME); + __xenbus_register_backend(drv, THIS_MODULE, KBUILD_MODNAME) void xenbus_unregister_driver(struct xenbus_driver *drv); diff --git a/kernel/cpu.c b/kernel/cpu.c index 1972b161c61e..82eea9c5af61 100644 --- a/kernel/cpu.c +++ b/kernel/cpu.c @@ -20,6 +20,7 @@ #include <linux/gfp.h> #include <linux/suspend.h> #include <linux/lockdep.h> +#include <linux/tick.h> #include <trace/events/power.h> #include "smpboot.h" @@ -338,6 +339,8 @@ static int __ref take_cpu_down(void *_param) return err; cpu_notify(CPU_DYING | param->mod, param->hcpu); + /* Give up timekeeping duties */ + tick_handover_do_timer(); /* Park the stopper thread */ kthread_park(current); return 0; @@ -411,10 +414,12 @@ static int __ref _cpu_down(unsigned int cpu, int tasks_frozen) while (!idle_cpu(cpu)) cpu_relax(); + hotplug_cpu__broadcast_tick_pull(cpu); /* This actually kills the CPU. */ __cpu_die(cpu); /* CPU is completely dead: tell everyone. Too late to complain. */ + tick_cleanup_dead_cpu(cpu); cpu_notify_nofail(CPU_DEAD | mod, hcpu); check_for_tasks(cpu); diff --git a/kernel/cpuset.c b/kernel/cpuset.c index 1d1fe9361d29..fc7f4748d34a 100644 --- a/kernel/cpuset.c +++ b/kernel/cpuset.c @@ -548,9 +548,6 @@ static void update_domain_attr_tree(struct sched_domain_attr *dattr, rcu_read_lock(); cpuset_for_each_descendant_pre(cp, pos_css, root_cs) { - if (cp == root_cs) - continue; - /* skip the whole subtree if @cp doesn't have any CPU */ if (cpumask_empty(cp->cpus_allowed)) { pos_css = css_rightmost_descendant(pos_css); @@ -873,7 +870,7 @@ static void update_cpumasks_hier(struct cpuset *cs, struct cpumask *new_cpus) * If it becomes empty, inherit the effective mask of the * parent, which is guaranteed to have some CPUs. */ - if (cpumask_empty(new_cpus)) + if (cgroup_on_dfl(cp->css.cgroup) && cpumask_empty(new_cpus)) cpumask_copy(new_cpus, parent->effective_cpus); /* Skip the whole subtree if the cpumask remains the same. */ @@ -1129,7 +1126,7 @@ static void update_nodemasks_hier(struct cpuset *cs, nodemask_t *new_mems) * If it becomes empty, inherit the effective mask of the * parent, which is guaranteed to have some MEMs. */ - if (nodes_empty(*new_mems)) + if (cgroup_on_dfl(cp->css.cgroup) && nodes_empty(*new_mems)) *new_mems = parent->effective_mems; /* Skip the whole subtree if the nodemask remains the same. */ @@ -1979,7 +1976,9 @@ static int cpuset_css_online(struct cgroup_subsys_state *css) spin_lock_irq(&callback_lock); cs->mems_allowed = parent->mems_allowed; + cs->effective_mems = parent->mems_allowed; cpumask_copy(cs->cpus_allowed, parent->cpus_allowed); + cpumask_copy(cs->effective_cpus, parent->cpus_allowed); spin_unlock_irq(&callback_lock); out_unlock: mutex_unlock(&cpuset_mutex); diff --git a/kernel/events/core.c b/kernel/events/core.c index f04daabfd1cf..2fabc0627165 100644 --- a/kernel/events/core.c +++ b/kernel/events/core.c @@ -3591,7 +3591,7 @@ static void put_event(struct perf_event *event) ctx = perf_event_ctx_lock_nested(event, SINGLE_DEPTH_NESTING); WARN_ON_ONCE(ctx->parent_ctx); perf_remove_from_context(event, true); - mutex_unlock(&ctx->mutex); + perf_event_ctx_unlock(event, ctx); _free_event(event); } @@ -4574,6 +4574,13 @@ static void perf_pending_event(struct irq_work *entry) { struct perf_event *event = container_of(entry, struct perf_event, pending); + int rctx; + + rctx = perf_swevent_get_recursion_context(); + /* + * If we 'fail' here, that's OK, it means recursion is already disabled + * and we won't recurse 'further'. + */ if (event->pending_disable) { event->pending_disable = 0; @@ -4584,6 +4591,9 @@ static void perf_pending_event(struct irq_work *entry) event->pending_wakeup = 0; perf_event_wakeup(event); } + + if (rctx >= 0) + perf_swevent_put_recursion_context(rctx); } /* diff --git a/kernel/futex.c b/kernel/futex.c index 2a5e3830e953..2579e407ff67 100644 --- a/kernel/futex.c +++ b/kernel/futex.c @@ -900,7 +900,7 @@ static int attach_to_pi_owner(u32 uval, union futex_key *key, if (!p) return -ESRCH; - if (!p->mm) { + if (unlikely(p->flags & PF_KTHREAD)) { put_task_struct(p); return -EPERM; } diff --git a/kernel/irq/manage.c b/kernel/irq/manage.c index 196a06fbc122..886d09e691d5 100644 --- a/kernel/irq/manage.c +++ b/kernel/irq/manage.c @@ -1474,8 +1474,13 @@ int request_threaded_irq(unsigned int irq, irq_handler_t handler, * otherwise we'll have trouble later trying to figure out * which interrupt is which (messes up the interrupt freeing * logic etc). + * + * Also IRQF_COND_SUSPEND only makes sense for shared interrupts and + * it cannot be set along with IRQF_NO_SUSPEND. */ - if ((irqflags & IRQF_SHARED) && !dev_id) + if (((irqflags & IRQF_SHARED) && !dev_id) || + (!(irqflags & IRQF_SHARED) && (irqflags & IRQF_COND_SUSPEND)) || + ((irqflags & IRQF_NO_SUSPEND) && (irqflags & IRQF_COND_SUSPEND))) return -EINVAL; desc = irq_to_desc(irq); diff --git a/kernel/irq/pm.c b/kernel/irq/pm.c index 3ca532592704..5204a6d1b985 100644 --- a/kernel/irq/pm.c +++ b/kernel/irq/pm.c @@ -43,9 +43,12 @@ void irq_pm_install_action(struct irq_desc *desc, struct irqaction *action) if (action->flags & IRQF_NO_SUSPEND) desc->no_suspend_depth++; + else if (action->flags & IRQF_COND_SUSPEND) + desc->cond_suspend_depth++; WARN_ON_ONCE(desc->no_suspend_depth && - desc->no_suspend_depth != desc->nr_actions); + (desc->no_suspend_depth + + desc->cond_suspend_depth) != desc->nr_actions); } /* @@ -61,6 +64,8 @@ void irq_pm_remove_action(struct irq_desc *desc, struct irqaction *action) if (action->flags & IRQF_NO_SUSPEND) desc->no_suspend_depth--; + else if (action->flags & IRQF_COND_SUSPEND) + desc->cond_suspend_depth--; } static bool suspend_device_irq(struct irq_desc *desc, int irq) diff --git a/kernel/livepatch/core.c b/kernel/livepatch/core.c index 782172f073c5..3f9f1d6b4c2e 100644 --- a/kernel/livepatch/core.c +++ b/kernel/livepatch/core.c @@ -89,16 +89,28 @@ static bool klp_is_object_loaded(struct klp_object *obj) /* sets obj->mod if object is not vmlinux and module is found */ static void klp_find_object_module(struct klp_object *obj) { + struct module *mod; + if (!klp_is_module(obj)) return; mutex_lock(&module_mutex); /* - * We don't need to take a reference on the module here because we have - * the klp_mutex, which is also taken by the module notifier. This - * prevents any module from unloading until we release the klp_mutex. + * We do not want to block removal of patched modules and therefore + * we do not take a reference here. The patches are removed by + * a going module handler instead. + */ + mod = find_module(obj->name); + /* + * Do not mess work of the module coming and going notifiers. + * Note that the patch might still be needed before the going handler + * is called. Module functions can be called even in the GOING state + * until mod->exit() finishes. This is especially important for + * patches that modify semantic of the functions. */ - obj->mod = find_module(obj->name); + if (mod && mod->klp_alive) + obj->mod = mod; + mutex_unlock(&module_mutex); } @@ -248,11 +260,12 @@ static int klp_find_external_symbol(struct module *pmod, const char *name, /* first, check if it's an exported symbol */ preempt_disable(); sym = find_symbol(name, NULL, NULL, true, true); - preempt_enable(); if (sym) { *addr = sym->value; + preempt_enable(); return 0; } + preempt_enable(); /* otherwise check if it's in another .o within the patch module */ return klp_find_object_symbol(pmod->name, name, addr); @@ -766,6 +779,7 @@ static int klp_init_object(struct klp_patch *patch, struct klp_object *obj) return -EINVAL; obj->state = KLP_DISABLED; + obj->mod = NULL; klp_find_object_module(obj); @@ -960,6 +974,15 @@ static int klp_module_notify(struct notifier_block *nb, unsigned long action, mutex_lock(&klp_mutex); + /* + * Each module has to know that the notifier has been called. + * We never know what module will get patched by a new patch. + */ + if (action == MODULE_STATE_COMING) + mod->klp_alive = true; + else /* MODULE_STATE_GOING */ + mod->klp_alive = false; + list_for_each_entry(patch, &klp_patches, list) { for (obj = patch->objs; obj->funcs; obj++) { if (!klp_is_module(obj) || strcmp(obj->name, mod->name)) diff --git a/kernel/locking/lockdep.c b/kernel/locking/lockdep.c index 88d0d4420ad2..ba77ab5f64dd 100644 --- a/kernel/locking/lockdep.c +++ b/kernel/locking/lockdep.c @@ -633,7 +633,7 @@ static int count_matching_names(struct lock_class *new_class) if (!new_class->name) return 0; - list_for_each_entry(class, &all_lock_classes, lock_entry) { + list_for_each_entry_rcu(class, &all_lock_classes, lock_entry) { if (new_class->key - new_class->subclass == class->key) return class->name_version; if (class->name && !strcmp(class->name, new_class->name)) @@ -700,10 +700,12 @@ look_up_lock_class(struct lockdep_map *lock, unsigned int subclass) hash_head = classhashentry(key); /* - * We can walk the hash lockfree, because the hash only - * grows, and we are careful when adding entries to the end: + * We do an RCU walk of the hash, see lockdep_free_key_range(). */ - list_for_each_entry(class, hash_head, hash_entry) { + if (DEBUG_LOCKS_WARN_ON(!irqs_disabled())) + return NULL; + + list_for_each_entry_rcu(class, hash_head, hash_entry) { if (class->key == key) { /* * Huh! same key, different name? Did someone trample @@ -728,7 +730,8 @@ register_lock_class(struct lockdep_map *lock, unsigned int subclass, int force) struct lockdep_subclass_key *key; struct list_head *hash_head; struct lock_class *class; - unsigned long flags; + + DEBUG_LOCKS_WARN_ON(!irqs_disabled()); class = look_up_lock_class(lock, subclass); if (likely(class)) @@ -750,28 +753,26 @@ register_lock_class(struct lockdep_map *lock, unsigned int subclass, int force) key = lock->key->subkeys + subclass; hash_head = classhashentry(key); - raw_local_irq_save(flags); if (!graph_lock()) { - raw_local_irq_restore(flags); return NULL; } /* * We have to do the hash-walk again, to avoid races * with another CPU: */ - list_for_each_entry(class, hash_head, hash_entry) + list_for_each_entry_rcu(class, hash_head, hash_entry) { if (class->key == key) goto out_unlock_set; + } + /* * Allocate a new key from the static array, and add it to * the hash: */ if (nr_lock_classes >= MAX_LOCKDEP_KEYS) { if (!debug_locks_off_graph_unlock()) { - raw_local_irq_restore(flags); return NULL; } - raw_local_irq_restore(flags); print_lockdep_off("BUG: MAX_LOCKDEP_KEYS too low!"); dump_stack(); @@ -798,7 +799,6 @@ register_lock_class(struct lockdep_map *lock, unsigned int subclass, int force) if (verbose(class)) { graph_unlock(); - raw_local_irq_restore(flags); printk("\nnew class %p: %s", class->key, class->name); if (class->name_version > 1) @@ -806,15 +806,12 @@ register_lock_class(struct lockdep_map *lock, unsigned int subclass, int force) printk("\n"); dump_stack(); - raw_local_irq_save(flags); if (!graph_lock()) { - raw_local_irq_restore(flags); return NULL; } } out_unlock_set: graph_unlock(); - raw_local_irq_restore(flags); out_set_class_cache: if (!subclass || force) @@ -870,11 +867,9 @@ static int add_lock_to_list(struct lock_class *class, struct lock_class *this, entry->distance = distance; entry->trace = *trace; /* - * Since we never remove from the dependency list, the list can - * be walked lockless by other CPUs, it's only allocation - * that must be protected by the spinlock. But this also means - * we must make new entries visible only once writes to the - * entry become visible - hence the RCU op: + * Both allocation and removal are done under the graph lock; but + * iteration is under RCU-sched; see look_up_lock_class() and + * lockdep_free_key_range(). */ list_add_tail_rcu(&entry->entry, head); @@ -1025,7 +1020,9 @@ static int __bfs(struct lock_list *source_entry, else head = &lock->class->locks_before; - list_for_each_entry(entry, head, entry) { + DEBUG_LOCKS_WARN_ON(!irqs_disabled()); + + list_for_each_entry_rcu(entry, head, entry) { if (!lock_accessed(entry)) { unsigned int cq_depth; mark_lock_accessed(entry, lock); @@ -2022,7 +2019,7 @@ static inline int lookup_chain_cache(struct task_struct *curr, * We can walk it lock-free, because entries only get added * to the hash: */ - list_for_each_entry(chain, hash_head, entry) { + list_for_each_entry_rcu(chain, hash_head, entry) { if (chain->chain_key == chain_key) { cache_hit: debug_atomic_inc(chain_lookup_hits); @@ -2996,8 +2993,18 @@ void lockdep_init_map(struct lockdep_map *lock, const char *name, if (unlikely(!debug_locks)) return; - if (subclass) + if (subclass) { + unsigned long flags; + + if (DEBUG_LOCKS_WARN_ON(current->lockdep_recursion)) + return; + + raw_local_irq_save(flags); + current->lockdep_recursion = 1; register_lock_class(lock, subclass, 1); + current->lockdep_recursion = 0; + raw_local_irq_restore(flags); + } } EXPORT_SYMBOL_GPL(lockdep_init_map); @@ -3887,9 +3894,17 @@ static inline int within(const void *addr, void *start, unsigned long size) return addr >= start && addr < start + size; } +/* + * Used in module.c to remove lock classes from memory that is going to be + * freed; and possibly re-used by other modules. + * + * We will have had one sync_sched() before getting here, so we're guaranteed + * nobody will look up these exact classes -- they're properly dead but still + * allocated. + */ void lockdep_free_key_range(void *start, unsigned long size) { - struct lock_class *class, *next; + struct lock_class *class; struct list_head *head; unsigned long flags; int i; @@ -3905,7 +3920,7 @@ void lockdep_free_key_range(void *start, unsigned long size) head = classhash_table + i; if (list_empty(head)) continue; - list_for_each_entry_safe(class, next, head, hash_entry) { + list_for_each_entry_rcu(class, head, hash_entry) { if (within(class->key, start, size)) zap_class(class); else if (within(class->name, start, size)) @@ -3916,11 +3931,25 @@ void lockdep_free_key_range(void *start, unsigned long size) if (locked) graph_unlock(); raw_local_irq_restore(flags); + + /* + * Wait for any possible iterators from look_up_lock_class() to pass + * before continuing to free the memory they refer to. + * + * sync_sched() is sufficient because the read-side is IRQ disable. + */ + synchronize_sched(); + + /* + * XXX at this point we could return the resources to the pool; + * instead we leak them. We would need to change to bitmap allocators + * instead of the linear allocators we have now. + */ } void lockdep_reset_lock(struct lockdep_map *lock) { - struct lock_class *class, *next; + struct lock_class *class; struct list_head *head; unsigned long flags; int i, j; @@ -3948,7 +3977,7 @@ void lockdep_reset_lock(struct lockdep_map *lock) head = classhash_table + i; if (list_empty(head)) continue; - list_for_each_entry_safe(class, next, head, hash_entry) { + list_for_each_entry_rcu(class, head, hash_entry) { int match = 0; for (j = 0; j < NR_LOCKDEP_CACHING_CLASSES; j++) diff --git a/kernel/locking/mcs_spinlock.h b/kernel/locking/mcs_spinlock.h index d1fe2ba5bac9..75e114bdf3f2 100644 --- a/kernel/locking/mcs_spinlock.h +++ b/kernel/locking/mcs_spinlock.h @@ -78,7 +78,7 @@ void mcs_spin_lock(struct mcs_spinlock **lock, struct mcs_spinlock *node) */ return; } - ACCESS_ONCE(prev->next) = node; + WRITE_ONCE(prev->next, node); /* Wait until the lock holder passes the lock down. */ arch_mcs_spin_lock_contended(&node->locked); @@ -91,7 +91,7 @@ void mcs_spin_lock(struct mcs_spinlock **lock, struct mcs_spinlock *node) static inline void mcs_spin_unlock(struct mcs_spinlock **lock, struct mcs_spinlock *node) { - struct mcs_spinlock *next = ACCESS_ONCE(node->next); + struct mcs_spinlock *next = READ_ONCE(node->next); if (likely(!next)) { /* @@ -100,7 +100,7 @@ void mcs_spin_unlock(struct mcs_spinlock **lock, struct mcs_spinlock *node) if (likely(cmpxchg(lock, node, NULL) == node)) return; /* Wait until the next pointer is set */ - while (!(next = ACCESS_ONCE(node->next))) + while (!(next = READ_ONCE(node->next))) cpu_relax_lowlatency(); } diff --git a/kernel/locking/mutex.c b/kernel/locking/mutex.c index 94674e5919cb..4cccea6b8934 100644 --- a/kernel/locking/mutex.c +++ b/kernel/locking/mutex.c @@ -25,7 +25,7 @@ #include <linux/spinlock.h> #include <linux/interrupt.h> #include <linux/debug_locks.h> -#include "mcs_spinlock.h" +#include <linux/osq_lock.h> /* * In the DEBUG case we are using the "NULL fastpath" for mutexes, @@ -217,44 +217,35 @@ ww_mutex_set_context_slowpath(struct ww_mutex *lock, } #ifdef CONFIG_MUTEX_SPIN_ON_OWNER -static inline bool owner_running(struct mutex *lock, struct task_struct *owner) -{ - if (lock->owner != owner) - return false; - - /* - * Ensure we emit the owner->on_cpu, dereference _after_ checking - * lock->owner still matches owner, if that fails, owner might - * point to free()d memory, if it still matches, the rcu_read_lock() - * ensures the memory stays valid. - */ - barrier(); - - return owner->on_cpu; -} - /* * Look out! "owner" is an entirely speculative pointer * access and not reliable. */ static noinline -int mutex_spin_on_owner(struct mutex *lock, struct task_struct *owner) +bool mutex_spin_on_owner(struct mutex *lock, struct task_struct *owner) { + bool ret = true; + rcu_read_lock(); - while (owner_running(lock, owner)) { - if (need_resched()) + while (lock->owner == owner) { + /* + * Ensure we emit the owner->on_cpu, dereference _after_ + * checking lock->owner still matches owner. If that fails, + * owner might point to freed memory. If it still matches, + * the rcu_read_lock() ensures the memory stays valid. + */ + barrier(); + + if (!owner->on_cpu || need_resched()) { + ret = false; break; + } cpu_relax_lowlatency(); } rcu_read_unlock(); - /* - * We break out the loop above on need_resched() and when the - * owner changed, which is a sign for heavy contention. Return - * success only when lock->owner is NULL. - */ - return lock->owner == NULL; + return ret; } /* @@ -269,7 +260,7 @@ static inline int mutex_can_spin_on_owner(struct mutex *lock) return 0; rcu_read_lock(); - owner = ACCESS_ONCE(lock->owner); + owner = READ_ONCE(lock->owner); if (owner) retval = owner->on_cpu; rcu_read_unlock(); @@ -343,7 +334,7 @@ static bool mutex_optimistic_spin(struct mutex *lock, * As such, when deadlock detection needs to be * performed the optimistic spinning cannot be done. */ - if (ACCESS_ONCE(ww->ctx)) + if (READ_ONCE(ww->ctx)) break; } @@ -351,7 +342,7 @@ static bool mutex_optimistic_spin(struct mutex *lock, * If there's an owner, wait for it to either * release the lock or go to sleep. */ - owner = ACCESS_ONCE(lock->owner); + owner = READ_ONCE(lock->owner); if (owner && !mutex_spin_on_owner(lock, owner)) break; @@ -490,7 +481,7 @@ static inline int __sched __ww_mutex_lock_check_stamp(struct mutex *lock, struct ww_acquire_ctx *ctx) { struct ww_mutex *ww = container_of(lock, struct ww_mutex, base); - struct ww_acquire_ctx *hold_ctx = ACCESS_ONCE(ww->ctx); + struct ww_acquire_ctx *hold_ctx = READ_ONCE(ww->ctx); if (!hold_ctx) return 0; diff --git a/kernel/locking/osq_lock.c b/kernel/locking/osq_lock.c index c112d00341b0..dc85ee23a26f 100644 --- a/kernel/locking/osq_lock.c +++ b/kernel/locking/osq_lock.c @@ -98,7 +98,7 @@ bool osq_lock(struct optimistic_spin_queue *lock) prev = decode_cpu(old); node->prev = prev; - ACCESS_ONCE(prev->next) = node; + WRITE_ONCE(prev->next, node); /* * Normally @prev is untouchable after the above store; because at that @@ -109,7 +109,7 @@ bool osq_lock(struct optimistic_spin_queue *lock) * cmpxchg in an attempt to undo our queueing. */ - while (!ACCESS_ONCE(node->locked)) { + while (!READ_ONCE(node->locked)) { /* * If we need to reschedule bail... so we can block. */ @@ -148,7 +148,7 @@ unqueue: * Or we race against a concurrent unqueue()'s step-B, in which * case its step-C will write us a new @node->prev pointer. */ - prev = ACCESS_ONCE(node->prev); + prev = READ_ONCE(node->prev); } /* @@ -170,8 +170,8 @@ unqueue: * it will wait in Step-A. */ - ACCESS_ONCE(next->prev) = prev; - ACCESS_ONCE(prev->next) = next; + WRITE_ONCE(next->prev, prev); + WRITE_ONCE(prev->next, next); return false; } @@ -193,11 +193,11 @@ void osq_unlock(struct optimistic_spin_queue *lock) node = this_cpu_ptr(&osq_node); next = xchg(&node->next, NULL); if (next) { - ACCESS_ONCE(next->locked) = 1; + WRITE_ONCE(next->locked, 1); return; } next = osq_wait_next(lock, node, NULL); if (next) - ACCESS_ONCE(next->locked) = 1; + WRITE_ONCE(next->locked, 1); } diff --git a/kernel/locking/rtmutex.c b/kernel/locking/rtmutex.c index 6357265a31ad..b73279367087 100644 --- a/kernel/locking/rtmutex.c +++ b/kernel/locking/rtmutex.c @@ -349,7 +349,7 @@ static inline struct rt_mutex *task_blocked_on_lock(struct task_struct *p) * * @task: the task owning the mutex (owner) for which a chain walk is * probably needed - * @deadlock_detect: do we have to carry out deadlock detection? + * @chwalk: do we have to carry out deadlock detection? * @orig_lock: the mutex (can be NULL if we are walking the chain to recheck * things for a task that has just got its priority adjusted, and * is waiting on a mutex) diff --git a/kernel/locking/rwsem-spinlock.c b/kernel/locking/rwsem-spinlock.c index 2555ae15ec14..3a5048572065 100644 --- a/kernel/locking/rwsem-spinlock.c +++ b/kernel/locking/rwsem-spinlock.c @@ -85,6 +85,13 @@ __rwsem_do_wake(struct rw_semaphore *sem, int wakewrite) list_del(&waiter->list); tsk = waiter->task; + /* + * Make sure we do not wakeup the next reader before + * setting the nil condition to grant the next reader; + * otherwise we could miss the wakeup on the other + * side and end up sleeping again. See the pairing + * in rwsem_down_read_failed(). + */ smp_mb(); waiter->task = NULL; wake_up_process(tsk); diff --git a/kernel/locking/rwsem-xadd.c b/kernel/locking/rwsem-xadd.c index 2f7cc4076f50..3417d0172a5d 100644 --- a/kernel/locking/rwsem-xadd.c +++ b/kernel/locking/rwsem-xadd.c @@ -14,8 +14,9 @@ #include <linux/init.h> #include <linux/export.h> #include <linux/sched/rt.h> +#include <linux/osq_lock.h> -#include "mcs_spinlock.h" +#include "rwsem.h" /* * Guide to the rw_semaphore's count field for common values. @@ -186,6 +187,13 @@ __rwsem_do_wake(struct rw_semaphore *sem, enum rwsem_wake_type wake_type) waiter = list_entry(next, struct rwsem_waiter, list); next = waiter->list.next; tsk = waiter->task; + /* + * Make sure we do not wakeup the next reader before + * setting the nil condition to grant the next reader; + * otherwise we could miss the wakeup on the other + * side and end up sleeping again. See the pairing + * in rwsem_down_read_failed(). + */ smp_mb(); waiter->task = NULL; wake_up_process(tsk); @@ -258,6 +266,7 @@ static inline bool rwsem_try_write_lock(long count, struct rw_semaphore *sem) RWSEM_ACTIVE_WRITE_BIAS) == RWSEM_WAITING_BIAS) { if (!list_is_singular(&sem->wait_list)) rwsem_atomic_update(RWSEM_WAITING_BIAS, sem); + rwsem_set_owner(sem); return true; } @@ -270,15 +279,17 @@ static inline bool rwsem_try_write_lock(long count, struct rw_semaphore *sem) */ static inline bool rwsem_try_write_lock_unqueued(struct rw_semaphore *sem) { - long old, count = ACCESS_ONCE(sem->count); + long old, count = READ_ONCE(sem->count); while (true) { if (!(count == 0 || count == RWSEM_WAITING_BIAS)) return false; old = cmpxchg(&sem->count, count, count + RWSEM_ACTIVE_WRITE_BIAS); - if (old == count) + if (old == count) { + rwsem_set_owner(sem); return true; + } count = old; } @@ -287,60 +298,67 @@ static inline bool rwsem_try_write_lock_unqueued(struct rw_semaphore *sem) static inline bool rwsem_can_spin_on_owner(struct rw_semaphore *sem) { struct task_struct *owner; - bool on_cpu = false; + bool ret = true; if (need_resched()) return false; rcu_read_lock(); - owner = ACCESS_ONCE(sem->owner); - if (owner) - on_cpu = owner->on_cpu; - rcu_read_unlock(); - - /* - * If sem->owner is not set, yet we have just recently entered the - * slowpath, then there is a possibility reader(s) may have the lock. - * To be safe, avoid spinning in these situations. - */ - return on_cpu; -} - -static inline bool owner_running(struct rw_semaphore *sem, - struct task_struct *owner) -{ - if (sem->owner != owner) - return false; - - /* - * Ensure we emit the owner->on_cpu, dereference _after_ checking - * sem->owner still matches owner, if that fails, owner might - * point to free()d memory, if it still matches, the rcu_read_lock() - * ensures the memory stays valid. - */ - barrier(); + owner = READ_ONCE(sem->owner); + if (!owner) { + long count = READ_ONCE(sem->count); + /* + * If sem->owner is not set, yet we have just recently entered the + * slowpath with the lock being active, then there is a possibility + * reader(s) may have the lock. To be safe, bail spinning in these + * situations. + */ + if (count & RWSEM_ACTIVE_MASK) + ret = false; + goto done; + } - return owner->on_cpu; + ret = owner->on_cpu; +done: + rcu_read_unlock(); + return ret; } static noinline bool rwsem_spin_on_owner(struct rw_semaphore *sem, struct task_struct *owner) { + long count; + rcu_read_lock(); - while (owner_running(sem, owner)) { - if (need_resched()) - break; + while (sem->owner == owner) { + /* + * Ensure we emit the owner->on_cpu, dereference _after_ + * checking sem->owner still matches owner, if that fails, + * owner might point to free()d memory, if it still matches, + * the rcu_read_lock() ensures the memory stays valid. + */ + barrier(); + + /* abort spinning when need_resched or owner is not running */ + if (!owner->on_cpu || need_resched()) { + rcu_read_unlock(); + return false; + } cpu_relax_lowlatency(); } rcu_read_unlock(); + if (READ_ONCE(sem->owner)) + return true; /* new owner, continue spinning */ + /* - * We break out the loop above on need_resched() or when the - * owner changed, which is a sign for heavy contention. Return - * success only when sem->owner is NULL. + * When the owner is not set, the lock could be free or + * held by readers. Check the counter to verify the + * state. */ - return sem->owner == NULL; + count = READ_ONCE(sem->count); + return (count == 0 || count == RWSEM_WAITING_BIAS); } static bool rwsem_optimistic_spin(struct rw_semaphore *sem) @@ -358,7 +376,7 @@ static bool rwsem_optimistic_spin(struct rw_semaphore *sem) goto done; while (true) { - owner = ACCESS_ONCE(sem->owner); + owner = READ_ONCE(sem->owner); if (owner && !rwsem_spin_on_owner(sem, owner)) break; @@ -432,7 +450,7 @@ struct rw_semaphore __sched *rwsem_down_write_failed(struct rw_semaphore *sem) /* we're now waiting on the lock, but no longer actively locking */ if (waiting) { - count = ACCESS_ONCE(sem->count); + count = READ_ONCE(sem->count); /* * If there were already threads queued before us and there are diff --git a/kernel/locking/rwsem.c b/kernel/locking/rwsem.c index e2d3bc7f03b4..205be0ce34de 100644 --- a/kernel/locking/rwsem.c +++ b/kernel/locking/rwsem.c @@ -9,29 +9,9 @@ #include <linux/sched.h> #include <linux/export.h> #include <linux/rwsem.h> - #include <linux/atomic.h> -#ifdef CONFIG_RWSEM_SPIN_ON_OWNER -static inline void rwsem_set_owner(struct rw_semaphore *sem) -{ - sem->owner = current; -} - -static inline void rwsem_clear_owner(struct rw_semaphore *sem) -{ - sem->owner = NULL; -} - -#else -static inline void rwsem_set_owner(struct rw_semaphore *sem) -{ -} - -static inline void rwsem_clear_owner(struct rw_semaphore *sem) -{ -} -#endif +#include "rwsem.h" /* * lock for reading diff --git a/kernel/locking/rwsem.h b/kernel/locking/rwsem.h new file mode 100644 index 000000000000..870ed9a5b426 --- /dev/null +++ b/kernel/locking/rwsem.h @@ -0,0 +1,20 @@ +#ifdef CONFIG_RWSEM_SPIN_ON_OWNER +static inline void rwsem_set_owner(struct rw_semaphore *sem) +{ + sem->owner = current; +} + +static inline void rwsem_clear_owner(struct rw_semaphore *sem) +{ + sem->owner = NULL; +} + +#else +static inline void rwsem_set_owner(struct rw_semaphore *sem) +{ +} + +static inline void rwsem_clear_owner(struct rw_semaphore *sem) +{ +} +#endif diff --git a/kernel/module.c b/kernel/module.c index b34813f725e9..ec53f594e9c9 100644 --- a/kernel/module.c +++ b/kernel/module.c @@ -56,7 +56,6 @@ #include <linux/async.h> #include <linux/percpu.h> #include <linux/kmemleak.h> -#include <linux/kasan.h> #include <linux/jump_label.h> #include <linux/pfn.h> #include <linux/bsearch.h> @@ -1814,7 +1813,6 @@ static void unset_module_init_ro_nx(struct module *mod) { } void __weak module_memfree(void *module_region) { vfree(module_region); - kasan_module_free(module_region); } void __weak module_arch_cleanup(struct module *mod) @@ -1867,7 +1865,7 @@ static void free_module(struct module *mod) kfree(mod->args); percpu_modfree(mod); - /* Free lock-classes: */ + /* Free lock-classes; relies on the preceding sync_rcu(). */ lockdep_free_key_range(mod->module_core, mod->core_size); /* Finally, free the core (containing the module structure) */ @@ -2313,11 +2311,13 @@ static void layout_symtab(struct module *mod, struct load_info *info) info->symoffs = ALIGN(mod->core_size, symsect->sh_addralign ?: 1); info->stroffs = mod->core_size = info->symoffs + ndst * sizeof(Elf_Sym); mod->core_size += strtab_size; + mod->core_size = debug_align(mod->core_size); /* Put string table section at end of init part of module. */ strsect->sh_flags |= SHF_ALLOC; strsect->sh_entsize = get_offset(mod, &mod->init_size, strsect, info->index.str) | INIT_OFFSET_MASK; + mod->init_size = debug_align(mod->init_size); pr_debug("\t%s\n", info->secstrings + strsect->sh_name); } @@ -2479,6 +2479,23 @@ static int elf_header_check(struct load_info *info) return 0; } +#define COPY_CHUNK_SIZE (16*PAGE_SIZE) + +static int copy_chunked_from_user(void *dst, const void __user *usrc, unsigned long len) +{ + do { + unsigned long n = min(len, COPY_CHUNK_SIZE); + + if (copy_from_user(dst, usrc, n) != 0) + return -EFAULT; + cond_resched(); + dst += n; + usrc += n; + len -= n; + } while (len); + return 0; +} + /* Sets info->hdr and info->len. */ static int copy_module_from_user(const void __user *umod, unsigned long len, struct load_info *info) @@ -2498,7 +2515,7 @@ static int copy_module_from_user(const void __user *umod, unsigned long len, if (!info->hdr) return -ENOMEM; - if (copy_from_user(info->hdr, umod, info->len) != 0) { + if (copy_chunked_from_user(info->hdr, umod, info->len) != 0) { vfree(info->hdr); return -EFAULT; } @@ -3349,9 +3366,6 @@ static int load_module(struct load_info *info, const char __user *uargs, module_bug_cleanup(mod); mutex_unlock(&module_mutex); - /* Free lock-classes: */ - lockdep_free_key_range(mod->module_core, mod->core_size); - /* we can't deallocate the module until we clear memory protection */ unset_module_init_ro_nx(mod); unset_module_core_ro_nx(mod); @@ -3375,6 +3389,9 @@ static int load_module(struct load_info *info, const char __user *uargs, synchronize_rcu(); mutex_unlock(&module_mutex); free_module: + /* Free lock-classes; relies on the preceding sync_rcu() */ + lockdep_free_key_range(mod->module_core, mod->core_size); + module_deallocate(mod, info); free_copy: free_copy(info); diff --git a/kernel/power/snapshot.c b/kernel/power/snapshot.c index c24d5a23bf93..5235dd4e1e2f 100644 --- a/kernel/power/snapshot.c +++ b/kernel/power/snapshot.c @@ -955,25 +955,6 @@ static void mark_nosave_pages(struct memory_bitmap *bm) } } -static bool is_nosave_page(unsigned long pfn) -{ - struct nosave_region *region; - - list_for_each_entry(region, &nosave_regions, list) { - if (pfn >= region->start_pfn && pfn < region->end_pfn) { - pr_err("PM: %#010llx in e820 nosave region: " - "[mem %#010llx-%#010llx]\n", - (unsigned long long) pfn << PAGE_SHIFT, - (unsigned long long) region->start_pfn << PAGE_SHIFT, - ((unsigned long long) region->end_pfn << PAGE_SHIFT) - - 1); - return true; - } - } - - return false; -} - /** * create_basic_memory_bitmaps - create bitmaps needed for marking page * frames that should not be saved and free page frames. The pointers @@ -2042,7 +2023,7 @@ static int mark_unsafe_pages(struct memory_bitmap *bm) do { pfn = memory_bm_next_pfn(bm); if (likely(pfn != BM_END_OF_MAP)) { - if (likely(pfn_valid(pfn)) && !is_nosave_page(pfn)) + if (likely(pfn_valid(pfn))) swsusp_set_page_free(pfn_to_page(pfn)); else return -EFAULT; diff --git a/kernel/printk/console_cmdline.h b/kernel/printk/console_cmdline.h index cbd69d842341..2ca4a8b5fe57 100644 --- a/kernel/printk/console_cmdline.h +++ b/kernel/printk/console_cmdline.h @@ -3,7 +3,7 @@ struct console_cmdline { - char name[8]; /* Name of the driver */ + char name[16]; /* Name of the driver */ int index; /* Minor dev. to use */ char *options; /* Options for the driver */ #ifdef CONFIG_A11Y_BRAILLE_CONSOLE diff --git a/kernel/printk/printk.c b/kernel/printk/printk.c index 01cfd69c54c6..bb0635bd74f2 100644 --- a/kernel/printk/printk.c +++ b/kernel/printk/printk.c @@ -2464,6 +2464,7 @@ void register_console(struct console *newcon) for (i = 0, c = console_cmdline; i < MAX_CMDLINECONSOLES && c->name[0]; i++, c++) { + BUILD_BUG_ON(sizeof(c->name) != sizeof(newcon->name)); if (strcmp(c->name, newcon->name) != 0) continue; if (newcon->index >= 0 && diff --git a/kernel/sched/core.c b/kernel/sched/core.c index f0f831e8a345..261af7bfcb67 100644 --- a/kernel/sched/core.c +++ b/kernel/sched/core.c @@ -690,6 +690,23 @@ static inline bool got_nohz_idle_kick(void) bool sched_can_stop_tick(void) { /* + * FIFO realtime policy runs the highest priority task. Other runnable + * tasks are of a lower priority. The scheduler tick does nothing. + */ + if (current->policy == SCHED_FIFO) + return true; + + /* + * Round-robin realtime tasks time slice with other tasks at the same + * realtime priority. Is this task the only one at this priority? + */ + if (current->policy == SCHED_RR) { + struct sched_rt_entity *rt_se = ¤t->rt; + + return rt_se->run_list.prev == rt_se->run_list.next; + } + + /* * More than one running task need preemption. * nr_running update is assumed to be visible * after IPI is sent from wakers. @@ -996,6 +1013,13 @@ void check_preempt_curr(struct rq *rq, struct task_struct *p, int flags) rq_clock_skip_update(rq, true); } +static ATOMIC_NOTIFIER_HEAD(task_migration_notifier); + +void register_task_migration_notifier(struct notifier_block *n) +{ + atomic_notifier_chain_register(&task_migration_notifier, n); +} + #ifdef CONFIG_SMP void set_task_cpu(struct task_struct *p, unsigned int new_cpu) { @@ -1026,10 +1050,18 @@ void set_task_cpu(struct task_struct *p, unsigned int new_cpu) trace_sched_migrate_task(p, new_cpu); if (task_cpu(p) != new_cpu) { + struct task_migration_notifier tmn; + if (p->sched_class->migrate_task_rq) p->sched_class->migrate_task_rq(p, new_cpu); p->se.nr_migrations++; perf_sw_event_sched(PERF_COUNT_SW_CPU_MIGRATIONS, 1, 0); + + tmn.task = p; + tmn.from_cpu = task_cpu(p); + tmn.to_cpu = new_cpu; + + atomic_notifier_call_chain(&task_migration_notifier, 0, &tmn); } __set_task_cpu(p, new_cpu); @@ -3034,6 +3066,8 @@ void rt_mutex_setprio(struct task_struct *p, int prio) } else { if (dl_prio(oldprio)) p->dl.dl_boosted = 0; + if (rt_prio(oldprio)) + p->rt.timeout = 0; p->sched_class = &fair_sched_class; } @@ -5318,36 +5352,13 @@ static int sched_cpu_active(struct notifier_block *nfb, static int sched_cpu_inactive(struct notifier_block *nfb, unsigned long action, void *hcpu) { - unsigned long flags; - long cpu = (long)hcpu; - struct dl_bw *dl_b; - switch (action & ~CPU_TASKS_FROZEN) { case CPU_DOWN_PREPARE: - set_cpu_active(cpu, false); - - /* explicitly allow suspend */ - if (!(action & CPU_TASKS_FROZEN)) { - bool overflow; - int cpus; - - rcu_read_lock_sched(); - dl_b = dl_bw_of(cpu); - - raw_spin_lock_irqsave(&dl_b->lock, flags); - cpus = dl_bw_cpus(cpu); - overflow = __dl_overflow(dl_b, cpus, 0, 0); - raw_spin_unlock_irqrestore(&dl_b->lock, flags); - - rcu_read_unlock_sched(); - - if (overflow) - return notifier_from_errno(-EBUSY); - } + set_cpu_active((long)hcpu, false); return NOTIFY_OK; + default: + return NOTIFY_DONE; } - - return NOTIFY_DONE; } static int __init migration_init(void) @@ -5428,17 +5439,6 @@ static int sched_domain_debug_one(struct sched_domain *sd, int cpu, int level, break; } - /* - * Even though we initialize ->capacity to something semi-sane, - * we leave capacity_orig unset. This allows us to detect if - * domain iteration is still funny without causing /0 traps. - */ - if (!group->sgc->capacity_orig) { - printk(KERN_CONT "\n"); - printk(KERN_ERR "ERROR: domain->cpu_capacity not set\n"); - break; - } - if (!cpumask_weight(sched_group_cpus(group))) { printk(KERN_CONT "\n"); printk(KERN_ERR "ERROR: empty group\n"); @@ -5922,7 +5922,6 @@ build_overlap_sched_groups(struct sched_domain *sd, int cpu) * die on a /0 trap. */ sg->sgc->capacity = SCHED_CAPACITY_SCALE * cpumask_weight(sg_span); - sg->sgc->capacity_orig = sg->sgc->capacity; /* * Make sure the first group of this domain contains the @@ -6233,6 +6232,7 @@ sd_init(struct sched_domain_topology_level *tl, int cpu) */ if (sd->flags & SD_SHARE_CPUCAPACITY) { + sd->flags |= SD_PREFER_SIBLING; sd->imbalance_pct = 110; sd->smt_gain = 1178; /* ~15% */ @@ -6998,7 +6998,6 @@ static int cpuset_cpu_active(struct notifier_block *nfb, unsigned long action, */ case CPU_ONLINE: - case CPU_DOWN_FAILED: cpuset_update_active_cpus(true); break; default: @@ -7010,8 +7009,30 @@ static int cpuset_cpu_active(struct notifier_block *nfb, unsigned long action, static int cpuset_cpu_inactive(struct notifier_block *nfb, unsigned long action, void *hcpu) { - switch (action) { + unsigned long flags; + long cpu = (long)hcpu; + struct dl_bw *dl_b; + + switch (action & ~CPU_TASKS_FROZEN) { case CPU_DOWN_PREPARE: + /* explicitly allow suspend */ + if (!(action & CPU_TASKS_FROZEN)) { + bool overflow; + int cpus; + + rcu_read_lock_sched(); + dl_b = dl_bw_of(cpu); + + raw_spin_lock_irqsave(&dl_b->lock, flags); + cpus = dl_bw_cpus(cpu); + overflow = __dl_overflow(dl_b, cpus, 0, 0); + raw_spin_unlock_irqrestore(&dl_b->lock, flags); + + rcu_read_unlock_sched(); + + if (overflow) + return notifier_from_errno(-EBUSY); + } cpuset_update_active_cpus(false); break; case CPU_DOWN_PREPARE_FROZEN: @@ -7156,8 +7177,8 @@ void __init sched_init(void) rq->calc_load_active = 0; rq->calc_load_update = jiffies + LOAD_FREQ; init_cfs_rq(&rq->cfs); - init_rt_rq(&rq->rt, rq); - init_dl_rq(&rq->dl, rq); + init_rt_rq(&rq->rt); + init_dl_rq(&rq->dl); #ifdef CONFIG_FAIR_GROUP_SCHED root_task_group.shares = ROOT_TASK_GROUP_LOAD; INIT_LIST_HEAD(&rq->leaf_cfs_rq_list); @@ -7197,7 +7218,7 @@ void __init sched_init(void) #ifdef CONFIG_SMP rq->sd = NULL; rq->rd = NULL; - rq->cpu_capacity = SCHED_CAPACITY_SCALE; + rq->cpu_capacity = rq->cpu_capacity_orig = SCHED_CAPACITY_SCALE; rq->post_schedule = 0; rq->active_balance = 0; rq->next_balance = jiffies; @@ -7796,7 +7817,7 @@ static int sched_rt_global_constraints(void) } #endif /* CONFIG_RT_GROUP_SCHED */ -static int sched_dl_global_constraints(void) +static int sched_dl_global_validate(void) { u64 runtime = global_rt_runtime(); u64 period = global_rt_period(); @@ -7897,11 +7918,11 @@ int sched_rt_handler(struct ctl_table *table, int write, if (ret) goto undo; - ret = sched_rt_global_constraints(); + ret = sched_dl_global_validate(); if (ret) goto undo; - ret = sched_dl_global_constraints(); + ret = sched_rt_global_constraints(); if (ret) goto undo; diff --git a/kernel/sched/deadline.c b/kernel/sched/deadline.c index 3fa8fa6d9403..5e95145088fd 100644 --- a/kernel/sched/deadline.c +++ b/kernel/sched/deadline.c @@ -69,7 +69,7 @@ void init_dl_bw(struct dl_bw *dl_b) dl_b->total_bw = 0; } -void init_dl_rq(struct dl_rq *dl_rq, struct rq *rq) +void init_dl_rq(struct dl_rq *dl_rq) { dl_rq->rb_root = RB_ROOT; @@ -218,6 +218,52 @@ static inline void set_post_schedule(struct rq *rq) rq->post_schedule = has_pushable_dl_tasks(rq); } +static struct rq *find_lock_later_rq(struct task_struct *task, struct rq *rq); + +static void dl_task_offline_migration(struct rq *rq, struct task_struct *p) +{ + struct rq *later_rq = NULL; + bool fallback = false; + + later_rq = find_lock_later_rq(p, rq); + + if (!later_rq) { + int cpu; + + /* + * If we cannot preempt any rq, fall back to pick any + * online cpu. + */ + fallback = true; + cpu = cpumask_any_and(cpu_active_mask, tsk_cpus_allowed(p)); + if (cpu >= nr_cpu_ids) { + /* + * Fail to find any suitable cpu. + * The task will never come back! + */ + BUG_ON(dl_bandwidth_enabled()); + + /* + * If admission control is disabled we + * try a little harder to let the task + * run. + */ + cpu = cpumask_any(cpu_active_mask); + } + later_rq = cpu_rq(cpu); + double_lock_balance(rq, later_rq); + } + + deactivate_task(rq, p, 0); + set_task_cpu(p, later_rq->cpu); + activate_task(later_rq, p, ENQUEUE_REPLENISH); + + if (!fallback) + resched_curr(later_rq); + + double_unlock_balance(rq, later_rq); +} + #else static inline @@ -514,7 +560,7 @@ static enum hrtimer_restart dl_task_timer(struct hrtimer *timer) unsigned long flags; struct rq *rq; - rq = task_rq_lock(current, &flags); + rq = task_rq_lock(p, &flags); /* * We need to take care of several possible races here: @@ -536,6 +582,17 @@ static enum hrtimer_restart dl_task_timer(struct hrtimer *timer) sched_clock_tick(); update_rq_clock(rq); +#ifdef CONFIG_SMP + /* + * If we find that the rq the task was on is no longer + * available, we need to select a new rq. + */ + if (unlikely(!rq->online)) { + dl_task_offline_migration(rq, p); + goto unlock; + } +#endif + /* * If the throttle happened during sched-out; like: * @@ -569,7 +626,7 @@ static enum hrtimer_restart dl_task_timer(struct hrtimer *timer) push_dl_task(rq); #endif unlock: - task_rq_unlock(rq, current, &flags); + task_rq_unlock(rq, p, &flags); return HRTIMER_NORESTART; } @@ -914,6 +971,12 @@ static void yield_task_dl(struct rq *rq) } update_rq_clock(rq); update_curr_dl(rq); + /* + * Tell update_rq_clock() that we've just updated, + * so we don't do microscopic update in schedule() + * and double the fastpath cost. + */ + rq_clock_skip_update(rq, true); } #ifdef CONFIG_SMP @@ -1659,14 +1722,6 @@ static void switched_to_dl(struct rq *rq, struct task_struct *p) { int check_resched = 1; - /* - * If p is throttled, don't consider the possibility - * of preempting rq->curr, the check will be done right - * after its runtime will get replenished. - */ - if (unlikely(p->dl.dl_throttled)) - return; - if (task_on_rq_queued(p) && rq->curr != p) { #ifdef CONFIG_SMP if (p->nr_cpus_allowed > 1 && rq->dl.overloaded && diff --git a/kernel/sched/debug.c b/kernel/sched/debug.c index 8baaf858d25c..a245c1fc6f0a 100644 --- a/kernel/sched/debug.c +++ b/kernel/sched/debug.c @@ -71,7 +71,7 @@ static void print_cfs_group_stats(struct seq_file *m, int cpu, struct task_group if (!se) { struct sched_avg *avg = &cpu_rq(cpu)->avg; P(avg->runnable_avg_sum); - P(avg->runnable_avg_period); + P(avg->avg_period); return; } @@ -94,8 +94,10 @@ static void print_cfs_group_stats(struct seq_file *m, int cpu, struct task_group P(se->load.weight); #ifdef CONFIG_SMP P(se->avg.runnable_avg_sum); - P(se->avg.runnable_avg_period); + P(se->avg.running_avg_sum); + P(se->avg.avg_period); P(se->avg.load_avg_contrib); + P(se->avg.utilization_avg_contrib); P(se->avg.decay_count); #endif #undef PN @@ -214,6 +216,8 @@ void print_cfs_rq(struct seq_file *m, int cpu, struct cfs_rq *cfs_rq) cfs_rq->runnable_load_avg); SEQ_printf(m, " .%-30s: %ld\n", "blocked_load_avg", cfs_rq->blocked_load_avg); + SEQ_printf(m, " .%-30s: %ld\n", "utilization_load_avg", + cfs_rq->utilization_load_avg); #ifdef CONFIG_FAIR_GROUP_SCHED SEQ_printf(m, " .%-30s: %ld\n", "tg_load_contrib", cfs_rq->tg_load_contrib); @@ -636,8 +640,10 @@ void proc_sched_show_task(struct task_struct *p, struct seq_file *m) P(se.load.weight); #ifdef CONFIG_SMP P(se.avg.runnable_avg_sum); - P(se.avg.runnable_avg_period); + P(se.avg.running_avg_sum); + P(se.avg.avg_period); P(se.avg.load_avg_contrib); + P(se.avg.utilization_avg_contrib); P(se.avg.decay_count); #endif P(policy); diff --git a/kernel/sched/fair.c b/kernel/sched/fair.c index 7ce18f3c097a..ffeaa4105e48 100644 --- a/kernel/sched/fair.c +++ b/kernel/sched/fair.c @@ -670,6 +670,7 @@ static int select_idle_sibling(struct task_struct *p, int cpu); static unsigned long task_h_load(struct task_struct *p); static inline void __update_task_entity_contrib(struct sched_entity *se); +static inline void __update_task_entity_utilization(struct sched_entity *se); /* Give new task start runnable values to heavy its load in infant time */ void init_task_runnable_average(struct task_struct *p) @@ -677,9 +678,10 @@ void init_task_runnable_average(struct task_struct *p) u32 slice; slice = sched_slice(task_cfs_rq(p), &p->se) >> 10; - p->se.avg.runnable_avg_sum = slice; - p->se.avg.runnable_avg_period = slice; + p->se.avg.runnable_avg_sum = p->se.avg.running_avg_sum = slice; + p->se.avg.avg_period = slice; __update_task_entity_contrib(&p->se); + __update_task_entity_utilization(&p->se); } #else void init_task_runnable_average(struct task_struct *p) @@ -1196,9 +1198,11 @@ static void task_numa_assign(struct task_numa_env *env, static bool load_too_imbalanced(long src_load, long dst_load, struct task_numa_env *env) { - long imb, old_imb; - long orig_src_load, orig_dst_load; long src_capacity, dst_capacity; + long orig_src_load; + long load_a, load_b; + long moved_load; + long imb; /* * The load is corrected for the CPU capacity available on each node. @@ -1211,30 +1215,39 @@ static bool load_too_imbalanced(long src_load, long dst_load, dst_capacity = env->dst_stats.compute_capacity; /* We care about the slope of the imbalance, not the direction. */ - if (dst_load < src_load) - swap(dst_load, src_load); + load_a = dst_load; + load_b = src_load; + if (load_a < load_b) + swap(load_a, load_b); /* Is the difference below the threshold? */ - imb = dst_load * src_capacity * 100 - - src_load * dst_capacity * env->imbalance_pct; + imb = load_a * src_capacity * 100 - + load_b * dst_capacity * env->imbalance_pct; if (imb <= 0) return false; /* * The imbalance is above the allowed threshold. - * Compare it with the old imbalance. + * Allow a move that brings us closer to a balanced situation, + * without moving things past the point of balance. */ orig_src_load = env->src_stats.load; - orig_dst_load = env->dst_stats.load; - if (orig_dst_load < orig_src_load) - swap(orig_dst_load, orig_src_load); - - old_imb = orig_dst_load * src_capacity * 100 - - orig_src_load * dst_capacity * env->imbalance_pct; + /* + * In a task swap, there will be one load moving from src to dst, + * and another moving back. This is the net sum of both moves. + * A simple task move will always have a positive value. + * Allow the move if it brings the system closer to a balanced + * situation, without crossing over the balance point. + */ + moved_load = orig_src_load - src_load; - /* Would this change make things worse? */ - return (imb > old_imb); + if (moved_load > 0) + /* Moving src -> dst. Did we overshoot balance? */ + return src_load * dst_capacity < dst_load * src_capacity; + else + /* Moving dst -> src. Did we overshoot balance? */ + return dst_load * src_capacity < src_load * dst_capacity; } /* @@ -1609,9 +1622,11 @@ static void update_task_scan_period(struct task_struct *p, /* * If there were no record hinting faults then either the task is * completely idle or all activity is areas that are not of interest - * to automatic numa balancing. Scan slower + * to automatic numa balancing. Related to that, if there were failed + * migration then it implies we are migrating too quickly or the local + * node is overloaded. In either case, scan slower */ - if (local + shared == 0) { + if (local + shared == 0 || p->numa_faults_locality[2]) { p->numa_scan_period = min(p->numa_scan_period_max, p->numa_scan_period << 1); @@ -1673,7 +1688,7 @@ static u64 numa_get_avg_runtime(struct task_struct *p, u64 *period) *period = now - p->last_task_numa_placement; } else { delta = p->se.avg.runnable_avg_sum; - *period = p->se.avg.runnable_avg_period; + *period = p->se.avg.avg_period; } p->last_sum_exec_runtime = runtime; @@ -1763,6 +1778,8 @@ static int preferred_group_nid(struct task_struct *p, int nid) } } /* Next round, evaluate the nodes within max_group. */ + if (!max_faults) + break; nodes = max_group; } return nid; @@ -2080,6 +2097,8 @@ void task_numa_fault(int last_cpupid, int mem_node, int pages, int flags) if (migrated) p->numa_pages_migrated += pages; + if (flags & TNF_MIGRATE_FAIL) + p->numa_faults_locality[2] += pages; p->numa_faults[task_faults_idx(NUMA_MEMBUF, mem_node, priv)] += pages; p->numa_faults[task_faults_idx(NUMA_CPUBUF, cpu_node, priv)] += pages; @@ -2161,8 +2180,10 @@ void task_numa_work(struct callback_head *work) vma = mm->mmap; } for (; vma; vma = vma->vm_next) { - if (!vma_migratable(vma) || !vma_policy_mof(vma)) + if (!vma_migratable(vma) || !vma_policy_mof(vma) || + is_vm_hugetlb_page(vma)) { continue; + } /* * Shared library pages mapped by multiple processes are not @@ -2497,13 +2518,15 @@ static u32 __compute_runnable_contrib(u64 n) * load_avg = u_0` + y*(u_0 + u_1*y + u_2*y^2 + ... ) * = u_0 + u_1*y + u_2*y^2 + ... [re-labeling u_i --> u_{i+1}] */ -static __always_inline int __update_entity_runnable_avg(u64 now, +static __always_inline int __update_entity_runnable_avg(u64 now, int cpu, struct sched_avg *sa, - int runnable) + int runnable, + int running) { u64 delta, periods; u32 runnable_contrib; int delta_w, decayed = 0; + unsigned long scale_freq = arch_scale_freq_capacity(NULL, cpu); delta = now - sa->last_runnable_update; /* @@ -2525,7 +2548,7 @@ static __always_inline int __update_entity_runnable_avg(u64 now, sa->last_runnable_update = now; /* delta_w is the amount already accumulated against our next period */ - delta_w = sa->runnable_avg_period % 1024; + delta_w = sa->avg_period % 1024; if (delta + delta_w >= 1024) { /* period roll-over */ decayed = 1; @@ -2538,7 +2561,10 @@ static __always_inline int __update_entity_runnable_avg(u64 now, delta_w = 1024 - delta_w; if (runnable) sa->runnable_avg_sum += delta_w; - sa->runnable_avg_period += delta_w; + if (running) + sa->running_avg_sum += delta_w * scale_freq + >> SCHED_CAPACITY_SHIFT; + sa->avg_period += delta_w; delta -= delta_w; @@ -2548,20 +2574,28 @@ static __always_inline int __update_entity_runnable_avg(u64 now, sa->runnable_avg_sum = decay_load(sa->runnable_avg_sum, periods + 1); - sa->runnable_avg_period = decay_load(sa->runnable_avg_period, + sa->running_avg_sum = decay_load(sa->running_avg_sum, + periods + 1); + sa->avg_period = decay_load(sa->avg_period, periods + 1); /* Efficiently calculate \sum (1..n_period) 1024*y^i */ runnable_contrib = __compute_runnable_contrib(periods); if (runnable) sa->runnable_avg_sum += runnable_contrib; - sa->runnable_avg_period += runnable_contrib; + if (running) + sa->running_avg_sum += runnable_contrib * scale_freq + >> SCHED_CAPACITY_SHIFT; + sa->avg_period += runnable_contrib; } /* Remainder of delta accrued against u_0` */ if (runnable) sa->runnable_avg_sum += delta; - sa->runnable_avg_period += delta; + if (running) + sa->running_avg_sum += delta * scale_freq + >> SCHED_CAPACITY_SHIFT; + sa->avg_period += delta; return decayed; } @@ -2578,6 +2612,8 @@ static inline u64 __synchronize_entity_decay(struct sched_entity *se) return 0; se->avg.load_avg_contrib = decay_load(se->avg.load_avg_contrib, decays); + se->avg.utilization_avg_contrib = + decay_load(se->avg.utilization_avg_contrib, decays); return decays; } @@ -2613,7 +2649,7 @@ static inline void __update_tg_runnable_avg(struct sched_avg *sa, /* The fraction of a cpu used by this cfs_rq */ contrib = div_u64((u64)sa->runnable_avg_sum << NICE_0_SHIFT, - sa->runnable_avg_period + 1); + sa->avg_period + 1); contrib -= cfs_rq->tg_runnable_contrib; if (abs(contrib) > cfs_rq->tg_runnable_contrib / 64) { @@ -2666,7 +2702,8 @@ static inline void __update_group_entity_contrib(struct sched_entity *se) static inline void update_rq_runnable_avg(struct rq *rq, int runnable) { - __update_entity_runnable_avg(rq_clock_task(rq), &rq->avg, runnable); + __update_entity_runnable_avg(rq_clock_task(rq), cpu_of(rq), &rq->avg, + runnable, runnable); __update_tg_runnable_avg(&rq->avg, &rq->cfs); } #else /* CONFIG_FAIR_GROUP_SCHED */ @@ -2684,7 +2721,7 @@ static inline void __update_task_entity_contrib(struct sched_entity *se) /* avoid overflowing a 32-bit type w/ SCHED_LOAD_SCALE */ contrib = se->avg.runnable_avg_sum * scale_load_down(se->load.weight); - contrib /= (se->avg.runnable_avg_period + 1); + contrib /= (se->avg.avg_period + 1); se->avg.load_avg_contrib = scale_load(contrib); } @@ -2703,6 +2740,30 @@ static long __update_entity_load_avg_contrib(struct sched_entity *se) return se->avg.load_avg_contrib - old_contrib; } + +static inline void __update_task_entity_utilization(struct sched_entity *se) +{ + u32 contrib; + + /* avoid overflowing a 32-bit type w/ SCHED_LOAD_SCALE */ + contrib = se->avg.running_avg_sum * scale_load_down(SCHED_LOAD_SCALE); + contrib /= (se->avg.avg_period + 1); + se->avg.utilization_avg_contrib = scale_load(contrib); +} + +static long __update_entity_utilization_avg_contrib(struct sched_entity *se) +{ + long old_contrib = se->avg.utilization_avg_contrib; + + if (entity_is_task(se)) + __update_task_entity_utilization(se); + else + se->avg.utilization_avg_contrib = + group_cfs_rq(se)->utilization_load_avg; + + return se->avg.utilization_avg_contrib - old_contrib; +} + static inline void subtract_blocked_load_contrib(struct cfs_rq *cfs_rq, long load_contrib) { @@ -2719,7 +2780,8 @@ static inline void update_entity_load_avg(struct sched_entity *se, int update_cfs_rq) { struct cfs_rq *cfs_rq = cfs_rq_of(se); - long contrib_delta; + long contrib_delta, utilization_delta; + int cpu = cpu_of(rq_of(cfs_rq)); u64 now; /* @@ -2731,18 +2793,22 @@ static inline void update_entity_load_avg(struct sched_entity *se, else now = cfs_rq_clock_task(group_cfs_rq(se)); - if (!__update_entity_runnable_avg(now, &se->avg, se->on_rq)) + if (!__update_entity_runnable_avg(now, cpu, &se->avg, se->on_rq, + cfs_rq->curr == se)) return; contrib_delta = __update_entity_load_avg_contrib(se); + utilization_delta = __update_entity_utilization_avg_contrib(se); if (!update_cfs_rq) return; - if (se->on_rq) + if (se->on_rq) { cfs_rq->runnable_load_avg += contrib_delta; - else + cfs_rq->utilization_load_avg += utilization_delta; + } else { subtract_blocked_load_contrib(cfs_rq, -contrib_delta); + } } /* @@ -2817,6 +2883,7 @@ static inline void enqueue_entity_load_avg(struct cfs_rq *cfs_rq, } cfs_rq->runnable_load_avg += se->avg.load_avg_contrib; + cfs_rq->utilization_load_avg += se->avg.utilization_avg_contrib; /* we force update consideration on load-balancer moves */ update_cfs_rq_blocked_load(cfs_rq, !wakeup); } @@ -2835,6 +2902,7 @@ static inline void dequeue_entity_load_avg(struct cfs_rq *cfs_rq, update_cfs_rq_blocked_load(cfs_rq, !sleep); cfs_rq->runnable_load_avg -= se->avg.load_avg_contrib; + cfs_rq->utilization_load_avg -= se->avg.utilization_avg_contrib; if (sleep) { cfs_rq->blocked_load_avg += se->avg.load_avg_contrib; se->avg.decay_count = atomic64_read(&cfs_rq->decay_counter); @@ -3172,6 +3240,7 @@ set_next_entity(struct cfs_rq *cfs_rq, struct sched_entity *se) */ update_stats_wait_end(cfs_rq, se); __dequeue_entity(cfs_rq, se); + update_entity_load_avg(se, 1); } update_stats_curr_start(cfs_rq, se); @@ -4298,6 +4367,11 @@ static unsigned long capacity_of(int cpu) return cpu_rq(cpu)->cpu_capacity; } +static unsigned long capacity_orig_of(int cpu) +{ + return cpu_rq(cpu)->cpu_capacity_orig; +} + static unsigned long cpu_avg_load_per_task(int cpu) { struct rq *rq = cpu_rq(cpu); @@ -4711,6 +4785,33 @@ next: done: return target; } +/* + * get_cpu_usage returns the amount of capacity of a CPU that is used by CFS + * tasks. The unit of the return value must be the one of capacity so we can + * compare the usage with the capacity of the CPU that is available for CFS + * task (ie cpu_capacity). + * cfs.utilization_load_avg is the sum of running time of runnable tasks on a + * CPU. It represents the amount of utilization of a CPU in the range + * [0..SCHED_LOAD_SCALE]. The usage of a CPU can't be higher than the full + * capacity of the CPU because it's about the running time on this CPU. + * Nevertheless, cfs.utilization_load_avg can be higher than SCHED_LOAD_SCALE + * because of unfortunate rounding in avg_period and running_load_avg or just + * after migrating tasks until the average stabilizes with the new running + * time. So we need to check that the usage stays into the range + * [0..cpu_capacity_orig] and cap if necessary. + * Without capping the usage, a group could be seen as overloaded (CPU0 usage + * at 121% + CPU1 usage at 80%) whereas CPU1 has 20% of available capacity + */ +static int get_cpu_usage(int cpu) +{ + unsigned long usage = cpu_rq(cpu)->cfs.utilization_load_avg; + unsigned long capacity = capacity_orig_of(cpu); + + if (usage >= SCHED_LOAD_SCALE) + return capacity; + + return (usage * capacity) >> SCHED_LOAD_SHIFT; +} /* * select_task_rq_fair: Select target runqueue for the waking task in domains @@ -5837,12 +5938,12 @@ struct sg_lb_stats { unsigned long sum_weighted_load; /* Weighted load of group's tasks */ unsigned long load_per_task; unsigned long group_capacity; + unsigned long group_usage; /* Total usage of the group */ unsigned int sum_nr_running; /* Nr tasks running in the group */ - unsigned int group_capacity_factor; unsigned int idle_cpus; unsigned int group_weight; enum group_type group_type; - int group_has_free_capacity; + int group_no_capacity; #ifdef CONFIG_NUMA_BALANCING unsigned int nr_numa_running; unsigned int nr_preferred_running; @@ -5913,16 +6014,6 @@ static inline int get_sd_load_idx(struct sched_domain *sd, return load_idx; } -static unsigned long default_scale_capacity(struct sched_domain *sd, int cpu) -{ - return SCHED_CAPACITY_SCALE; -} - -unsigned long __weak arch_scale_freq_capacity(struct sched_domain *sd, int cpu) -{ - return default_scale_capacity(sd, cpu); -} - static unsigned long default_scale_cpu_capacity(struct sched_domain *sd, int cpu) { if ((sd->flags & SD_SHARE_CPUCAPACITY) && (sd->span_weight > 1)) @@ -5939,7 +6030,7 @@ unsigned long __weak arch_scale_cpu_capacity(struct sched_domain *sd, int cpu) static unsigned long scale_rt_capacity(int cpu) { struct rq *rq = cpu_rq(cpu); - u64 total, available, age_stamp, avg; + u64 total, used, age_stamp, avg; s64 delta; /* @@ -5955,19 +6046,12 @@ static unsigned long scale_rt_capacity(int cpu) total = sched_avg_period() + delta; - if (unlikely(total < avg)) { - /* Ensures that capacity won't end up being negative */ - available = 0; - } else { - available = total - avg; - } - - if (unlikely((s64)total < SCHED_CAPACITY_SCALE)) - total = SCHED_CAPACITY_SCALE; + used = div_u64(avg, total); - total >>= SCHED_CAPACITY_SHIFT; + if (likely(used < SCHED_CAPACITY_SCALE)) + return SCHED_CAPACITY_SCALE - used; - return div_u64(available, total); + return 1; } static void update_cpu_capacity(struct sched_domain *sd, int cpu) @@ -5982,14 +6066,7 @@ static void update_cpu_capacity(struct sched_domain *sd, int cpu) capacity >>= SCHED_CAPACITY_SHIFT; - sdg->sgc->capacity_orig = capacity; - - if (sched_feat(ARCH_CAPACITY)) - capacity *= arch_scale_freq_capacity(sd, cpu); - else - capacity *= default_scale_capacity(sd, cpu); - - capacity >>= SCHED_CAPACITY_SHIFT; + cpu_rq(cpu)->cpu_capacity_orig = capacity; capacity *= scale_rt_capacity(cpu); capacity >>= SCHED_CAPACITY_SHIFT; @@ -6005,7 +6082,7 @@ void update_group_capacity(struct sched_domain *sd, int cpu) { struct sched_domain *child = sd->child; struct sched_group *group, *sdg = sd->groups; - unsigned long capacity, capacity_orig; + unsigned long capacity; unsigned long interval; interval = msecs_to_jiffies(sd->balance_interval); @@ -6017,7 +6094,7 @@ void update_group_capacity(struct sched_domain *sd, int cpu) return; } - capacity_orig = capacity = 0; + capacity = 0; if (child->flags & SD_OVERLAP) { /* @@ -6037,19 +6114,15 @@ void update_group_capacity(struct sched_domain *sd, int cpu) * Use capacity_of(), which is set irrespective of domains * in update_cpu_capacity(). * - * This avoids capacity/capacity_orig from being 0 and + * This avoids capacity from being 0 and * causing divide-by-zero issues on boot. - * - * Runtime updates will correct capacity_orig. */ if (unlikely(!rq->sd)) { - capacity_orig += capacity_of(cpu); capacity += capacity_of(cpu); continue; } sgc = rq->sd->groups->sgc; - capacity_orig += sgc->capacity_orig; capacity += sgc->capacity; } } else { @@ -6060,39 +6133,24 @@ void update_group_capacity(struct sched_domain *sd, int cpu) group = child->groups; do { - capacity_orig += group->sgc->capacity_orig; capacity += group->sgc->capacity; group = group->next; } while (group != child->groups); } - sdg->sgc->capacity_orig = capacity_orig; sdg->sgc->capacity = capacity; } /* - * Try and fix up capacity for tiny siblings, this is needed when - * things like SD_ASYM_PACKING need f_b_g to select another sibling - * which on its own isn't powerful enough. - * - * See update_sd_pick_busiest() and check_asym_packing(). + * Check whether the capacity of the rq has been noticeably reduced by side + * activity. The imbalance_pct is used for the threshold. + * Return true is the capacity is reduced */ static inline int -fix_small_capacity(struct sched_domain *sd, struct sched_group *group) +check_cpu_capacity(struct rq *rq, struct sched_domain *sd) { - /* - * Only siblings can have significantly less than SCHED_CAPACITY_SCALE - */ - if (!(sd->flags & SD_SHARE_CPUCAPACITY)) - return 0; - - /* - * If ~90% of the cpu_capacity is still there, we're good. - */ - if (group->sgc->capacity * 32 > group->sgc->capacity_orig * 29) - return 1; - - return 0; + return ((rq->cpu_capacity * sd->imbalance_pct) < + (rq->cpu_capacity_orig * 100)); } /* @@ -6130,37 +6188,56 @@ static inline int sg_imbalanced(struct sched_group *group) } /* - * Compute the group capacity factor. - * - * Avoid the issue where N*frac(smt_capacity) >= 1 creates 'phantom' cores by - * first dividing out the smt factor and computing the actual number of cores - * and limit unit capacity with that. + * group_has_capacity returns true if the group has spare capacity that could + * be used by some tasks. + * We consider that a group has spare capacity if the * number of task is + * smaller than the number of CPUs or if the usage is lower than the available + * capacity for CFS tasks. + * For the latter, we use a threshold to stabilize the state, to take into + * account the variance of the tasks' load and to return true if the available + * capacity in meaningful for the load balancer. + * As an example, an available capacity of 1% can appear but it doesn't make + * any benefit for the load balance. */ -static inline int sg_capacity_factor(struct lb_env *env, struct sched_group *group) +static inline bool +group_has_capacity(struct lb_env *env, struct sg_lb_stats *sgs) { - unsigned int capacity_factor, smt, cpus; - unsigned int capacity, capacity_orig; + if (sgs->sum_nr_running < sgs->group_weight) + return true; - capacity = group->sgc->capacity; - capacity_orig = group->sgc->capacity_orig; - cpus = group->group_weight; + if ((sgs->group_capacity * 100) > + (sgs->group_usage * env->sd->imbalance_pct)) + return true; + + return false; +} - /* smt := ceil(cpus / capacity), assumes: 1 < smt_capacity < 2 */ - smt = DIV_ROUND_UP(SCHED_CAPACITY_SCALE * cpus, capacity_orig); - capacity_factor = cpus / smt; /* cores */ +/* + * group_is_overloaded returns true if the group has more tasks than it can + * handle. + * group_is_overloaded is not equals to !group_has_capacity because a group + * with the exact right number of tasks, has no more spare capacity but is not + * overloaded so both group_has_capacity and group_is_overloaded return + * false. + */ +static inline bool +group_is_overloaded(struct lb_env *env, struct sg_lb_stats *sgs) +{ + if (sgs->sum_nr_running <= sgs->group_weight) + return false; - capacity_factor = min_t(unsigned, - capacity_factor, DIV_ROUND_CLOSEST(capacity, SCHED_CAPACITY_SCALE)); - if (!capacity_factor) - capacity_factor = fix_small_capacity(env->sd, group); + if ((sgs->group_capacity * 100) < + (sgs->group_usage * env->sd->imbalance_pct)) + return true; - return capacity_factor; + return false; } -static enum group_type -group_classify(struct sched_group *group, struct sg_lb_stats *sgs) +static enum group_type group_classify(struct lb_env *env, + struct sched_group *group, + struct sg_lb_stats *sgs) { - if (sgs->sum_nr_running > sgs->group_capacity_factor) + if (sgs->group_no_capacity) return group_overloaded; if (sg_imbalanced(group)) @@ -6198,6 +6275,7 @@ static inline void update_sg_lb_stats(struct lb_env *env, load = source_load(i, load_idx); sgs->group_load += load; + sgs->group_usage += get_cpu_usage(i); sgs->sum_nr_running += rq->cfs.h_nr_running; if (rq->nr_running > 1) @@ -6220,11 +6298,9 @@ static inline void update_sg_lb_stats(struct lb_env *env, sgs->load_per_task = sgs->sum_weighted_load / sgs->sum_nr_running; sgs->group_weight = group->group_weight; - sgs->group_capacity_factor = sg_capacity_factor(env, group); - sgs->group_type = group_classify(group, sgs); - if (sgs->group_capacity_factor > sgs->sum_nr_running) - sgs->group_has_free_capacity = 1; + sgs->group_no_capacity = group_is_overloaded(env, sgs); + sgs->group_type = group_classify(env, group, sgs); } /** @@ -6346,18 +6422,19 @@ static inline void update_sd_lb_stats(struct lb_env *env, struct sd_lb_stats *sd /* * In case the child domain prefers tasks go to siblings - * first, lower the sg capacity factor to one so that we'll try + * first, lower the sg capacity so that we'll try * and move all the excess tasks away. We lower the capacity * of a group only if the local group has the capacity to fit - * these excess tasks, i.e. nr_running < group_capacity_factor. The - * extra check prevents the case where you always pull from the - * heaviest group when it is already under-utilized (possible - * with a large weight task outweighs the tasks on the system). + * these excess tasks. The extra check prevents the case where + * you always pull from the heaviest group when it is already + * under-utilized (possible with a large weight task outweighs + * the tasks on the system). */ if (prefer_sibling && sds->local && - sds->local_stat.group_has_free_capacity) { - sgs->group_capacity_factor = min(sgs->group_capacity_factor, 1U); - sgs->group_type = group_classify(sg, sgs); + group_has_capacity(env, &sds->local_stat) && + (sgs->sum_nr_running > 1)) { + sgs->group_no_capacity = 1; + sgs->group_type = group_overloaded; } if (update_sd_pick_busiest(env, sds, sg, sgs)) { @@ -6537,11 +6614,12 @@ static inline void calculate_imbalance(struct lb_env *env, struct sd_lb_stats *s */ if (busiest->group_type == group_overloaded && local->group_type == group_overloaded) { - load_above_capacity = - (busiest->sum_nr_running - busiest->group_capacity_factor); - - load_above_capacity *= (SCHED_LOAD_SCALE * SCHED_CAPACITY_SCALE); - load_above_capacity /= busiest->group_capacity; + load_above_capacity = busiest->sum_nr_running * + SCHED_LOAD_SCALE; + if (load_above_capacity > busiest->group_capacity) + load_above_capacity -= busiest->group_capacity; + else + load_above_capacity = ~0UL; } /* @@ -6604,6 +6682,7 @@ static struct sched_group *find_busiest_group(struct lb_env *env) local = &sds.local_stat; busiest = &sds.busiest_stat; + /* ASYM feature bypasses nice load balance check */ if ((env->idle == CPU_IDLE || env->idle == CPU_NEWLY_IDLE) && check_asym_packing(env, &sds)) return sds.busiest; @@ -6624,8 +6703,8 @@ static struct sched_group *find_busiest_group(struct lb_env *env) goto force_balance; /* SD_BALANCE_NEWIDLE trumps SMP nice when underutilized */ - if (env->idle == CPU_NEWLY_IDLE && local->group_has_free_capacity && - !busiest->group_has_free_capacity) + if (env->idle == CPU_NEWLY_IDLE && group_has_capacity(env, local) && + busiest->group_no_capacity) goto force_balance; /* @@ -6684,7 +6763,7 @@ static struct rq *find_busiest_queue(struct lb_env *env, int i; for_each_cpu_and(i, sched_group_cpus(group), env->cpus) { - unsigned long capacity, capacity_factor, wl; + unsigned long capacity, wl; enum fbq_type rt; rq = cpu_rq(i); @@ -6713,9 +6792,6 @@ static struct rq *find_busiest_queue(struct lb_env *env, continue; capacity = capacity_of(i); - capacity_factor = DIV_ROUND_CLOSEST(capacity, SCHED_CAPACITY_SCALE); - if (!capacity_factor) - capacity_factor = fix_small_capacity(env->sd, group); wl = weighted_cpuload(i); @@ -6723,7 +6799,9 @@ static struct rq *find_busiest_queue(struct lb_env *env, * When comparing with imbalance, use weighted_cpuload() * which is not scaled with the cpu capacity. */ - if (capacity_factor && rq->nr_running == 1 && wl > env->imbalance) + + if (rq->nr_running == 1 && wl > env->imbalance && + !check_cpu_capacity(rq, env->sd)) continue; /* @@ -6771,6 +6849,19 @@ static int need_active_balance(struct lb_env *env) return 1; } + /* + * The dst_cpu is idle and the src_cpu CPU has only 1 CFS task. + * It's worth migrating the task if the src_cpu's capacity is reduced + * because of other sched_class or IRQs if more capacity stays + * available on dst_cpu. + */ + if ((env->idle != CPU_NOT_IDLE) && + (env->src_rq->cfs.h_nr_running == 1)) { + if ((check_cpu_capacity(env->src_rq, sd)) && + (capacity_of(env->src_cpu)*sd->imbalance_pct < capacity_of(env->dst_cpu)*100)) + return 1; + } + return unlikely(sd->nr_balance_failed > sd->cache_nice_tries+2); } @@ -6870,6 +6961,9 @@ redo: schedstat_add(sd, lb_imbalance[idle], env.imbalance); + env.src_cpu = busiest->cpu; + env.src_rq = busiest; + ld_moved = 0; if (busiest->nr_running > 1) { /* @@ -6879,8 +6973,6 @@ redo: * correctly treated as an imbalance. */ env.flags |= LBF_ALL_PINNED; - env.src_cpu = busiest->cpu; - env.src_rq = busiest; env.loop_max = min(sysctl_sched_nr_migrate, busiest->nr_running); more_balance: @@ -7580,22 +7672,25 @@ end: /* * Current heuristic for kicking the idle load balancer in the presence - * of an idle cpu is the system. + * of an idle cpu in the system. * - This rq has more than one task. - * - At any scheduler domain level, this cpu's scheduler group has multiple - * busy cpu's exceeding the group's capacity. + * - This rq has at least one CFS task and the capacity of the CPU is + * significantly reduced because of RT tasks or IRQs. + * - At parent of LLC scheduler domain level, this cpu's scheduler group has + * multiple busy cpu. * - For SD_ASYM_PACKING, if the lower numbered cpu's in the scheduler * domain span are idle. */ -static inline int nohz_kick_needed(struct rq *rq) +static inline bool nohz_kick_needed(struct rq *rq) { unsigned long now = jiffies; struct sched_domain *sd; struct sched_group_capacity *sgc; int nr_busy, cpu = rq->cpu; + bool kick = false; if (unlikely(rq->idle_balance)) - return 0; + return false; /* * We may be recently in ticked or tickless idle mode. At the first @@ -7609,38 +7704,46 @@ static inline int nohz_kick_needed(struct rq *rq) * balancing. */ if (likely(!atomic_read(&nohz.nr_cpus))) - return 0; + return false; if (time_before(now, nohz.next_balance)) - return 0; + return false; if (rq->nr_running >= 2) - goto need_kick; + return true; rcu_read_lock(); sd = rcu_dereference(per_cpu(sd_busy, cpu)); - if (sd) { sgc = sd->groups->sgc; nr_busy = atomic_read(&sgc->nr_busy_cpus); - if (nr_busy > 1) - goto need_kick_unlock; + if (nr_busy > 1) { + kick = true; + goto unlock; + } + } - sd = rcu_dereference(per_cpu(sd_asym, cpu)); + sd = rcu_dereference(rq->sd); + if (sd) { + if ((rq->cfs.h_nr_running >= 1) && + check_cpu_capacity(rq, sd)) { + kick = true; + goto unlock; + } + } + sd = rcu_dereference(per_cpu(sd_asym, cpu)); if (sd && (cpumask_first_and(nohz.idle_cpus_mask, - sched_domain_span(sd)) < cpu)) - goto need_kick_unlock; - - rcu_read_unlock(); - return 0; + sched_domain_span(sd)) < cpu)) { + kick = true; + goto unlock; + } -need_kick_unlock: +unlock: rcu_read_unlock(); -need_kick: - return 1; + return kick; } #else static void nohz_idle_balance(struct rq *this_rq, enum cpu_idle_type idle) { } @@ -7656,14 +7759,16 @@ static void run_rebalance_domains(struct softirq_action *h) enum cpu_idle_type idle = this_rq->idle_balance ? CPU_IDLE : CPU_NOT_IDLE; - rebalance_domains(this_rq, idle); - /* * If this cpu has a pending nohz_balance_kick, then do the * balancing on behalf of the other idle cpus whose ticks are - * stopped. + * stopped. Do nohz_idle_balance *before* rebalance_domains to + * give the idle cpus a chance to load balance. Else we may + * load balance only within the local sched_domain hierarchy + * and abort nohz_idle_balance altogether if we pull some load. */ nohz_idle_balance(this_rq, idle); + rebalance_domains(this_rq, idle); } /* diff --git a/kernel/sched/features.h b/kernel/sched/features.h index 90284d117fe6..91e33cd485f6 100644 --- a/kernel/sched/features.h +++ b/kernel/sched/features.h @@ -56,6 +56,19 @@ SCHED_FEAT(NONTASK_CAPACITY, true) */ SCHED_FEAT(TTWU_QUEUE, true) +#ifdef HAVE_RT_PUSH_IPI +/* + * In order to avoid a thundering herd attack of CPUs that are + * lowering their priorities at the same time, and there being + * a single CPU that has an RT task that can migrate and is waiting + * to run, where the other CPUs will try to take that CPUs + * rq lock and possibly create a large contention, sending an + * IPI to that CPU and let that CPU push the RT task to where + * it should go may be a better scenario. + */ +SCHED_FEAT(RT_PUSH_IPI, true) +#endif + SCHED_FEAT(FORCE_SD_OVERLAP, false) SCHED_FEAT(RT_RUNTIME_SHARE, true) SCHED_FEAT(LB_MIN, false) diff --git a/kernel/sched/idle.c b/kernel/sched/idle.c index 94b2d7b88a27..4d207d2abcbd 100644 --- a/kernel/sched/idle.c +++ b/kernel/sched/idle.c @@ -82,6 +82,7 @@ static void cpuidle_idle_call(void) struct cpuidle_driver *drv = cpuidle_get_cpu_driver(dev); int next_state, entered_state; unsigned int broadcast; + bool reflect; /* * Check if the idle task must be rescheduled. If it is the @@ -105,6 +106,9 @@ static void cpuidle_idle_call(void) */ rcu_idle_enter(); + if (cpuidle_not_available(drv, dev)) + goto use_default; + /* * Suspend-to-idle ("freeze") is a system state in which all user space * has been frozen, all I/O devices have been suspended and the only @@ -115,30 +119,24 @@ static void cpuidle_idle_call(void) * until a proper wakeup interrupt happens. */ if (idle_should_freeze()) { - cpuidle_enter_freeze(); - local_irq_enable(); - goto exit_idle; - } + entered_state = cpuidle_enter_freeze(drv, dev); + if (entered_state >= 0) { + local_irq_enable(); + goto exit_idle; + } - /* - * Ask the cpuidle framework to choose a convenient idle state. - * Fall back to the default arch idle method on errors. - */ - next_state = cpuidle_select(drv, dev); - if (next_state < 0) { -use_default: + reflect = false; + next_state = cpuidle_find_deepest_state(drv, dev); + } else { + reflect = true; /* - * We can't use the cpuidle framework, let's use the default - * idle routine. + * Ask the cpuidle framework to choose a convenient idle state. */ - if (current_clr_polling_and_test()) - local_irq_enable(); - else - arch_cpu_idle(); - - goto exit_idle; + next_state = cpuidle_select(drv, dev); } - + /* Fall back to the default arch idle method on errors. */ + if (next_state < 0) + goto use_default; /* * The idle task must be scheduled, it is pointless to @@ -160,8 +158,7 @@ use_default: * is used from another cpu as a broadcast timer, this call may * fail if it is not available */ - if (broadcast && - clockevents_notify(CLOCK_EVT_NOTIFY_BROADCAST_ENTER, &dev->cpu)) + if (broadcast && tick_broadcast_enter()) goto use_default; /* Take note of the planned idle state. */ @@ -178,12 +175,13 @@ use_default: idle_set_state(this_rq(), NULL); if (broadcast) - clockevents_notify(CLOCK_EVT_NOTIFY_BROADCAST_EXIT, &dev->cpu); + tick_broadcast_exit(); /* * Give the governor an opportunity to reflect on the outcome */ - cpuidle_reflect(dev, entered_state); + if (reflect) + cpuidle_reflect(dev, entered_state); exit_idle: __current_set_polling(); @@ -196,6 +194,19 @@ exit_idle: rcu_idle_exit(); start_critical_timings(); + return; + +use_default: + /* + * We can't use the cpuidle framework, let's use the default + * idle routine. + */ + if (current_clr_polling_and_test()) + local_irq_enable(); + else + arch_cpu_idle(); + + goto exit_idle; } /* diff --git a/kernel/sched/rt.c b/kernel/sched/rt.c index f4d4b077eba0..575da76a3874 100644 --- a/kernel/sched/rt.c +++ b/kernel/sched/rt.c @@ -6,6 +6,7 @@ #include "sched.h" #include <linux/slab.h> +#include <linux/irq_work.h> int sched_rr_timeslice = RR_TIMESLICE; @@ -59,7 +60,11 @@ static void start_rt_bandwidth(struct rt_bandwidth *rt_b) raw_spin_unlock(&rt_b->rt_runtime_lock); } -void init_rt_rq(struct rt_rq *rt_rq, struct rq *rq) +#ifdef CONFIG_SMP +static void push_irq_work_func(struct irq_work *work); +#endif + +void init_rt_rq(struct rt_rq *rt_rq) { struct rt_prio_array *array; int i; @@ -78,7 +83,14 @@ void init_rt_rq(struct rt_rq *rt_rq, struct rq *rq) rt_rq->rt_nr_migratory = 0; rt_rq->overloaded = 0; plist_head_init(&rt_rq->pushable_tasks); + +#ifdef HAVE_RT_PUSH_IPI + rt_rq->push_flags = 0; + rt_rq->push_cpu = nr_cpu_ids; + raw_spin_lock_init(&rt_rq->push_lock); + init_irq_work(&rt_rq->push_work, push_irq_work_func); #endif +#endif /* CONFIG_SMP */ /* We start is dequeued state, because no RT tasks are queued */ rt_rq->rt_queued = 0; @@ -193,7 +205,7 @@ int alloc_rt_sched_group(struct task_group *tg, struct task_group *parent) if (!rt_se) goto err_free_rq; - init_rt_rq(rt_rq, cpu_rq(i)); + init_rt_rq(rt_rq); rt_rq->rt_runtime = tg->rt_bandwidth.rt_runtime; init_tg_rt_entry(tg, rt_rq, rt_se, i, parent->rt_se[i]); } @@ -1778,6 +1790,164 @@ static void push_rt_tasks(struct rq *rq) ; } +#ifdef HAVE_RT_PUSH_IPI +/* + * The search for the next cpu always starts at rq->cpu and ends + * when we reach rq->cpu again. It will never return rq->cpu. + * This returns the next cpu to check, or nr_cpu_ids if the loop + * is complete. + * + * rq->rt.push_cpu holds the last cpu returned by this function, + * or if this is the first instance, it must hold rq->cpu. + */ +static int rto_next_cpu(struct rq *rq) +{ + int prev_cpu = rq->rt.push_cpu; + int cpu; + + cpu = cpumask_next(prev_cpu, rq->rd->rto_mask); + + /* + * If the previous cpu is less than the rq's CPU, then it already + * passed the end of the mask, and has started from the beginning. + * We end if the next CPU is greater or equal to rq's CPU. + */ + if (prev_cpu < rq->cpu) { + if (cpu >= rq->cpu) + return nr_cpu_ids; + + } else if (cpu >= nr_cpu_ids) { + /* + * We passed the end of the mask, start at the beginning. + * If the result is greater or equal to the rq's CPU, then + * the loop is finished. + */ + cpu = cpumask_first(rq->rd->rto_mask); + if (cpu >= rq->cpu) + return nr_cpu_ids; + } + rq->rt.push_cpu = cpu; + + /* Return cpu to let the caller know if the loop is finished or not */ + return cpu; +} + +static int find_next_push_cpu(struct rq *rq) +{ + struct rq *next_rq; + int cpu; + + while (1) { + cpu = rto_next_cpu(rq); + if (cpu >= nr_cpu_ids) + break; + next_rq = cpu_rq(cpu); + + /* Make sure the next rq can push to this rq */ + if (next_rq->rt.highest_prio.next < rq->rt.highest_prio.curr) + break; + } + + return cpu; +} + +#define RT_PUSH_IPI_EXECUTING 1 +#define RT_PUSH_IPI_RESTART 2 + +static void tell_cpu_to_push(struct rq *rq) +{ + int cpu; + + if (rq->rt.push_flags & RT_PUSH_IPI_EXECUTING) { + raw_spin_lock(&rq->rt.push_lock); + /* Make sure it's still executing */ + if (rq->rt.push_flags & RT_PUSH_IPI_EXECUTING) { + /* + * Tell the IPI to restart the loop as things have + * changed since it started. + */ + rq->rt.push_flags |= RT_PUSH_IPI_RESTART; + raw_spin_unlock(&rq->rt.push_lock); + return; + } + raw_spin_unlock(&rq->rt.push_lock); + } + + /* When here, there's no IPI going around */ + + rq->rt.push_cpu = rq->cpu; + cpu = find_next_push_cpu(rq); + if (cpu >= nr_cpu_ids) + return; + + rq->rt.push_flags = RT_PUSH_IPI_EXECUTING; + + irq_work_queue_on(&rq->rt.push_work, cpu); +} + +/* Called from hardirq context */ +static void try_to_push_tasks(void *arg) +{ + struct rt_rq *rt_rq = arg; + struct rq *rq, *src_rq; + int this_cpu; + int cpu; + + this_cpu = rt_rq->push_cpu; + + /* Paranoid check */ + BUG_ON(this_cpu != smp_processor_id()); + + rq = cpu_rq(this_cpu); + src_rq = rq_of_rt_rq(rt_rq); + +again: + if (has_pushable_tasks(rq)) { + raw_spin_lock(&rq->lock); + push_rt_task(rq); + raw_spin_unlock(&rq->lock); + } + + /* Pass the IPI to the next rt overloaded queue */ + raw_spin_lock(&rt_rq->push_lock); + /* + * If the source queue changed since the IPI went out, + * we need to restart the search from that CPU again. + */ + if (rt_rq->push_flags & RT_PUSH_IPI_RESTART) { + rt_rq->push_flags &= ~RT_PUSH_IPI_RESTART; + rt_rq->push_cpu = src_rq->cpu; + } + + cpu = find_next_push_cpu(src_rq); + + if (cpu >= nr_cpu_ids) + rt_rq->push_flags &= ~RT_PUSH_IPI_EXECUTING; + raw_spin_unlock(&rt_rq->push_lock); + + if (cpu >= nr_cpu_ids) + return; + + /* + * It is possible that a restart caused this CPU to be + * chosen again. Don't bother with an IPI, just see if we + * have more to push. + */ + if (unlikely(cpu == rq->cpu)) + goto again; + + /* Try the next RT overloaded CPU */ + irq_work_queue_on(&rt_rq->push_work, cpu); +} + +static void push_irq_work_func(struct irq_work *work) +{ + struct rt_rq *rt_rq = container_of(work, struct rt_rq, push_work); + + try_to_push_tasks(rt_rq); +} +#endif /* HAVE_RT_PUSH_IPI */ + static int pull_rt_task(struct rq *this_rq) { int this_cpu = this_rq->cpu, ret = 0, cpu; @@ -1793,6 +1963,13 @@ static int pull_rt_task(struct rq *this_rq) */ smp_rmb(); +#ifdef HAVE_RT_PUSH_IPI + if (sched_feat(RT_PUSH_IPI)) { + tell_cpu_to_push(this_rq); + return 0; + } +#endif + for_each_cpu(cpu, this_rq->rd->rto_mask) { if (this_cpu == cpu) continue; diff --git a/kernel/sched/sched.h b/kernel/sched/sched.h index dc0f435a2779..e0e129993958 100644 --- a/kernel/sched/sched.h +++ b/kernel/sched/sched.h @@ -6,6 +6,7 @@ #include <linux/mutex.h> #include <linux/spinlock.h> #include <linux/stop_machine.h> +#include <linux/irq_work.h> #include <linux/tick.h> #include <linux/slab.h> @@ -362,8 +363,14 @@ struct cfs_rq { * Under CFS, load is tracked on a per-entity basis and aggregated up. * This allows for the description of both thread and group usage (in * the FAIR_GROUP_SCHED case). + * runnable_load_avg is the sum of the load_avg_contrib of the + * sched_entities on the rq. + * blocked_load_avg is similar to runnable_load_avg except that its + * the blocked sched_entities on the rq. + * utilization_load_avg is the sum of the average running time of the + * sched_entities on the rq. */ - unsigned long runnable_load_avg, blocked_load_avg; + unsigned long runnable_load_avg, blocked_load_avg, utilization_load_avg; atomic64_t decay_counter; u64 last_decay; atomic_long_t removed_load; @@ -418,6 +425,11 @@ static inline int rt_bandwidth_enabled(void) return sysctl_sched_rt_runtime >= 0; } +/* RT IPI pull logic requires IRQ_WORK */ +#ifdef CONFIG_IRQ_WORK +# define HAVE_RT_PUSH_IPI +#endif + /* Real-Time classes' related field in a runqueue: */ struct rt_rq { struct rt_prio_array active; @@ -435,7 +447,13 @@ struct rt_rq { unsigned long rt_nr_total; int overloaded; struct plist_head pushable_tasks; +#ifdef HAVE_RT_PUSH_IPI + int push_flags; + int push_cpu; + struct irq_work push_work; + raw_spinlock_t push_lock; #endif +#endif /* CONFIG_SMP */ int rt_queued; int rt_throttled; @@ -597,6 +615,7 @@ struct rq { struct sched_domain *sd; unsigned long cpu_capacity; + unsigned long cpu_capacity_orig; unsigned char idle_balance; /* For active balancing */ @@ -807,7 +826,7 @@ struct sched_group_capacity { * CPU capacity of this group, SCHED_LOAD_SCALE being max capacity * for a single CPU. */ - unsigned int capacity, capacity_orig; + unsigned int capacity; unsigned long next_update; int imbalance; /* XXX unrelated to capacity but shared group state */ /* @@ -1368,9 +1387,18 @@ static inline int hrtick_enabled(struct rq *rq) #ifdef CONFIG_SMP extern void sched_avg_update(struct rq *rq); + +#ifndef arch_scale_freq_capacity +static __always_inline +unsigned long arch_scale_freq_capacity(struct sched_domain *sd, int cpu) +{ + return SCHED_CAPACITY_SCALE; +} +#endif + static inline void sched_rt_avg_update(struct rq *rq, u64 rt_delta) { - rq->rt_avg += rt_delta; + rq->rt_avg += rt_delta * arch_scale_freq_capacity(NULL, cpu_of(rq)); sched_avg_update(rq); } #else @@ -1643,8 +1671,8 @@ extern void print_rt_stats(struct seq_file *m, int cpu); extern void print_dl_stats(struct seq_file *m, int cpu); extern void init_cfs_rq(struct cfs_rq *cfs_rq); -extern void init_rt_rq(struct rt_rq *rt_rq, struct rq *rq); -extern void init_dl_rq(struct dl_rq *dl_rq, struct rq *rq); +extern void init_rt_rq(struct rt_rq *rt_rq); +extern void init_dl_rq(struct dl_rq *dl_rq); extern void cfs_bandwidth_usage_inc(void); extern void cfs_bandwidth_usage_dec(void); diff --git a/kernel/sysctl.c b/kernel/sysctl.c index 88ea2d6e0031..ce410bb9f2e1 100644 --- a/kernel/sysctl.c +++ b/kernel/sysctl.c @@ -1228,6 +1228,14 @@ static struct ctl_table vm_table[] = { .extra1 = &zero, }, { + .procname = "dirtytime_expire_seconds", + .data = &dirtytime_expire_interval, + .maxlen = sizeof(dirty_expire_interval), + .mode = 0644, + .proc_handler = dirtytime_interval_handler, + .extra1 = &zero, + }, + { .procname = "nr_pdflush_threads", .mode = 0444 /* read-only */, .proc_handler = pdflush_proc_obsolete, diff --git a/kernel/time/Kconfig b/kernel/time/Kconfig index d626dc98e8df..579ce1b929af 100644 --- a/kernel/time/Kconfig +++ b/kernel/time/Kconfig @@ -33,12 +33,6 @@ config ARCH_USES_GETTIMEOFFSET config GENERIC_CLOCKEVENTS bool -# Migration helper. Builds, but does not invoke -config GENERIC_CLOCKEVENTS_BUILD - bool - default y - depends on GENERIC_CLOCKEVENTS - # Architecture can handle broadcast in a driver-agnostic way config ARCH_HAS_TICK_BROADCAST bool diff --git a/kernel/time/Makefile b/kernel/time/Makefile index c09c07817d7a..01f0312419b3 100644 --- a/kernel/time/Makefile +++ b/kernel/time/Makefile @@ -2,15 +2,13 @@ obj-y += time.o timer.o hrtimer.o itimer.o posix-timers.o posix-cpu-timers.o obj-y += timekeeping.o ntp.o clocksource.o jiffies.o timer_list.o obj-y += timeconv.o timecounter.o posix-clock.o alarmtimer.o -obj-$(CONFIG_GENERIC_CLOCKEVENTS_BUILD) += clockevents.o -obj-$(CONFIG_GENERIC_CLOCKEVENTS) += tick-common.o +obj-$(CONFIG_GENERIC_CLOCKEVENTS) += clockevents.o tick-common.o ifeq ($(CONFIG_GENERIC_CLOCKEVENTS_BROADCAST),y) obj-y += tick-broadcast.o obj-$(CONFIG_TICK_ONESHOT) += tick-broadcast-hrtimer.o endif obj-$(CONFIG_GENERIC_SCHED_CLOCK) += sched_clock.o -obj-$(CONFIG_TICK_ONESHOT) += tick-oneshot.o -obj-$(CONFIG_TICK_ONESHOT) += tick-sched.o +obj-$(CONFIG_TICK_ONESHOT) += tick-oneshot.o tick-sched.o obj-$(CONFIG_TIMER_STATS) += timer_stats.o obj-$(CONFIG_DEBUG_FS) += timekeeping_debug.o obj-$(CONFIG_TEST_UDELAY) += test_udelay.o diff --git a/kernel/time/clockevents.c b/kernel/time/clockevents.c index 55449909f114..25d942d1da27 100644 --- a/kernel/time/clockevents.c +++ b/kernel/time/clockevents.c @@ -94,25 +94,76 @@ u64 clockevent_delta2ns(unsigned long latch, struct clock_event_device *evt) } EXPORT_SYMBOL_GPL(clockevent_delta2ns); +static int __clockevents_set_state(struct clock_event_device *dev, + enum clock_event_state state) +{ + /* Transition with legacy set_mode() callback */ + if (dev->set_mode) { + /* Legacy callback doesn't support new modes */ + if (state > CLOCK_EVT_STATE_ONESHOT) + return -ENOSYS; + /* + * 'clock_event_state' and 'clock_event_mode' have 1-to-1 + * mapping until *_ONESHOT, and so a simple cast will work. + */ + dev->set_mode((enum clock_event_mode)state, dev); + dev->mode = (enum clock_event_mode)state; + return 0; + } + + if (dev->features & CLOCK_EVT_FEAT_DUMMY) + return 0; + + /* Transition with new state-specific callbacks */ + switch (state) { + case CLOCK_EVT_STATE_DETACHED: + /* + * This is an internal state, which is guaranteed to go from + * SHUTDOWN to DETACHED. No driver interaction required. + */ + return 0; + + case CLOCK_EVT_STATE_SHUTDOWN: + return dev->set_state_shutdown(dev); + + case CLOCK_EVT_STATE_PERIODIC: + /* Core internal bug */ + if (!(dev->features & CLOCK_EVT_FEAT_PERIODIC)) + return -ENOSYS; + return dev->set_state_periodic(dev); + + case CLOCK_EVT_STATE_ONESHOT: + /* Core internal bug */ + if (!(dev->features & CLOCK_EVT_FEAT_ONESHOT)) + return -ENOSYS; + return dev->set_state_oneshot(dev); + + default: + return -ENOSYS; + } +} + /** - * clockevents_set_mode - set the operating mode of a clock event device + * clockevents_set_state - set the operating state of a clock event device * @dev: device to modify - * @mode: new mode + * @state: new state * * Must be called with interrupts disabled ! */ -void clockevents_set_mode(struct clock_event_device *dev, - enum clock_event_mode mode) +void clockevents_set_state(struct clock_event_device *dev, + enum clock_event_state state) { - if (dev->mode != mode) { - dev->set_mode(mode, dev); - dev->mode = mode; + if (dev->state != state) { + if (__clockevents_set_state(dev, state)) + return; + + dev->state = state; /* * A nsec2cyc multiplicator of 0 is invalid and we'd crash * on it, so fix it up and emit a warning: */ - if (mode == CLOCK_EVT_MODE_ONESHOT) { + if (state == CLOCK_EVT_STATE_ONESHOT) { if (unlikely(!dev->mult)) { dev->mult = 1; WARN_ON(1); @@ -127,10 +178,28 @@ void clockevents_set_mode(struct clock_event_device *dev, */ void clockevents_shutdown(struct clock_event_device *dev) { - clockevents_set_mode(dev, CLOCK_EVT_MODE_SHUTDOWN); + clockevents_set_state(dev, CLOCK_EVT_STATE_SHUTDOWN); dev->next_event.tv64 = KTIME_MAX; } +/** + * clockevents_tick_resume - Resume the tick device before using it again + * @dev: device to resume + */ +int clockevents_tick_resume(struct clock_event_device *dev) +{ + int ret = 0; + + if (dev->set_mode) { + dev->set_mode(CLOCK_EVT_MODE_RESUME, dev); + dev->mode = CLOCK_EVT_MODE_RESUME; + } else if (dev->tick_resume) { + ret = dev->tick_resume(dev); + } + + return ret; +} + #ifdef CONFIG_GENERIC_CLOCKEVENTS_MIN_ADJUST /* Limit min_delta to a jiffie */ @@ -183,7 +252,7 @@ static int clockevents_program_min_delta(struct clock_event_device *dev) delta = dev->min_delta_ns; dev->next_event = ktime_add_ns(ktime_get(), delta); - if (dev->mode == CLOCK_EVT_MODE_SHUTDOWN) + if (dev->state == CLOCK_EVT_STATE_SHUTDOWN) return 0; dev->retries++; @@ -220,7 +289,7 @@ static int clockevents_program_min_delta(struct clock_event_device *dev) delta = dev->min_delta_ns; dev->next_event = ktime_add_ns(ktime_get(), delta); - if (dev->mode == CLOCK_EVT_MODE_SHUTDOWN) + if (dev->state == CLOCK_EVT_STATE_SHUTDOWN) return 0; dev->retries++; @@ -252,7 +321,7 @@ int clockevents_program_event(struct clock_event_device *dev, ktime_t expires, dev->next_event = expires; - if (dev->mode == CLOCK_EVT_MODE_SHUTDOWN) + if (dev->state == CLOCK_EVT_STATE_SHUTDOWN) return 0; /* Shortcut for clockevent devices that can deal with ktime. */ @@ -297,7 +366,7 @@ static int clockevents_replace(struct clock_event_device *ced) struct clock_event_device *dev, *newdev = NULL; list_for_each_entry(dev, &clockevent_devices, list) { - if (dev == ced || dev->mode != CLOCK_EVT_MODE_UNUSED) + if (dev == ced || dev->state != CLOCK_EVT_STATE_DETACHED) continue; if (!tick_check_replacement(newdev, dev)) @@ -323,7 +392,7 @@ static int clockevents_replace(struct clock_event_device *ced) static int __clockevents_try_unbind(struct clock_event_device *ced, int cpu) { /* Fast track. Device is unused */ - if (ced->mode == CLOCK_EVT_MODE_UNUSED) { + if (ced->state == CLOCK_EVT_STATE_DETACHED) { list_del_init(&ced->list); return 0; } @@ -373,6 +442,37 @@ int clockevents_unbind_device(struct clock_event_device *ced, int cpu) } EXPORT_SYMBOL_GPL(clockevents_unbind); +/* Sanity check of state transition callbacks */ +static int clockevents_sanity_check(struct clock_event_device *dev) +{ + /* Legacy set_mode() callback */ + if (dev->set_mode) { + /* We shouldn't be supporting new modes now */ + WARN_ON(dev->set_state_periodic || dev->set_state_oneshot || + dev->set_state_shutdown || dev->tick_resume); + + BUG_ON(dev->mode != CLOCK_EVT_MODE_UNUSED); + return 0; + } + + if (dev->features & CLOCK_EVT_FEAT_DUMMY) + return 0; + + /* New state-specific callbacks */ + if (!dev->set_state_shutdown) + return -EINVAL; + + if ((dev->features & CLOCK_EVT_FEAT_PERIODIC) && + !dev->set_state_periodic) + return -EINVAL; + + if ((dev->features & CLOCK_EVT_FEAT_ONESHOT) && + !dev->set_state_oneshot) + return -EINVAL; + + return 0; +} + /** * clockevents_register_device - register a clock event device * @dev: device to register @@ -381,7 +481,11 @@ void clockevents_register_device(struct clock_event_device *dev) { unsigned long flags; - BUG_ON(dev->mode != CLOCK_EVT_MODE_UNUSED); + BUG_ON(clockevents_sanity_check(dev)); + + /* Initialize state to DETACHED */ + dev->state = CLOCK_EVT_STATE_DETACHED; + if (!dev->cpumask) { WARN_ON(num_possible_cpus() > 1); dev->cpumask = cpumask_of(smp_processor_id()); @@ -445,11 +549,11 @@ int __clockevents_update_freq(struct clock_event_device *dev, u32 freq) { clockevents_config(dev, freq); - if (dev->mode == CLOCK_EVT_MODE_ONESHOT) + if (dev->state == CLOCK_EVT_STATE_ONESHOT) return clockevents_program_event(dev, dev->next_event, false); - if (dev->mode == CLOCK_EVT_MODE_PERIODIC) - dev->set_mode(CLOCK_EVT_MODE_PERIODIC, dev); + if (dev->state == CLOCK_EVT_STATE_PERIODIC) + return __clockevents_set_state(dev, CLOCK_EVT_STATE_PERIODIC); return 0; } @@ -491,30 +595,27 @@ void clockevents_handle_noop(struct clock_event_device *dev) * @old: device to release (can be NULL) * @new: device to request (can be NULL) * - * Called from the notifier chain. clockevents_lock is held already + * Called from various tick functions with clockevents_lock held and + * interrupts disabled. */ void clockevents_exchange_device(struct clock_event_device *old, struct clock_event_device *new) { - unsigned long flags; - - local_irq_save(flags); /* * Caller releases a clock event device. We queue it into the * released list and do a notify add later. */ if (old) { module_put(old->owner); - clockevents_set_mode(old, CLOCK_EVT_MODE_UNUSED); + clockevents_set_state(old, CLOCK_EVT_STATE_DETACHED); list_del(&old->list); list_add(&old->list, &clockevents_released); } if (new) { - BUG_ON(new->mode != CLOCK_EVT_MODE_UNUSED); + BUG_ON(new->state != CLOCK_EVT_STATE_DETACHED); clockevents_shutdown(new); } - local_irq_restore(flags); } /** @@ -541,74 +642,40 @@ void clockevents_resume(void) dev->resume(dev); } -#ifdef CONFIG_GENERIC_CLOCKEVENTS +#ifdef CONFIG_HOTPLUG_CPU /** - * clockevents_notify - notification about relevant events - * Returns 0 on success, any other value on error + * tick_cleanup_dead_cpu - Cleanup the tick and clockevents of a dead cpu */ -int clockevents_notify(unsigned long reason, void *arg) +void tick_cleanup_dead_cpu(int cpu) { struct clock_event_device *dev, *tmp; unsigned long flags; - int cpu, ret = 0; raw_spin_lock_irqsave(&clockevents_lock, flags); - switch (reason) { - case CLOCK_EVT_NOTIFY_BROADCAST_ON: - case CLOCK_EVT_NOTIFY_BROADCAST_OFF: - case CLOCK_EVT_NOTIFY_BROADCAST_FORCE: - tick_broadcast_on_off(reason, arg); - break; - - case CLOCK_EVT_NOTIFY_BROADCAST_ENTER: - case CLOCK_EVT_NOTIFY_BROADCAST_EXIT: - ret = tick_broadcast_oneshot_control(reason); - break; - - case CLOCK_EVT_NOTIFY_CPU_DYING: - tick_handover_do_timer(arg); - break; - - case CLOCK_EVT_NOTIFY_SUSPEND: - tick_suspend(); - tick_suspend_broadcast(); - break; - - case CLOCK_EVT_NOTIFY_RESUME: - tick_resume(); - break; - - case CLOCK_EVT_NOTIFY_CPU_DEAD: - tick_shutdown_broadcast_oneshot(arg); - tick_shutdown_broadcast(arg); - tick_shutdown(arg); - /* - * Unregister the clock event devices which were - * released from the users in the notify chain. - */ - list_for_each_entry_safe(dev, tmp, &clockevents_released, list) + tick_shutdown_broadcast_oneshot(cpu); + tick_shutdown_broadcast(cpu); + tick_shutdown(cpu); + /* + * Unregister the clock event devices which were + * released from the users in the notify chain. + */ + list_for_each_entry_safe(dev, tmp, &clockevents_released, list) + list_del(&dev->list); + /* + * Now check whether the CPU has left unused per cpu devices + */ + list_for_each_entry_safe(dev, tmp, &clockevent_devices, list) { + if (cpumask_test_cpu(cpu, dev->cpumask) && + cpumask_weight(dev->cpumask) == 1 && + !tick_is_broadcast_device(dev)) { + BUG_ON(dev->state != CLOCK_EVT_STATE_DETACHED); list_del(&dev->list); - /* - * Now check whether the CPU has left unused per cpu devices - */ - cpu = *((int *)arg); - list_for_each_entry_safe(dev, tmp, &clockevent_devices, list) { - if (cpumask_test_cpu(cpu, dev->cpumask) && - cpumask_weight(dev->cpumask) == 1 && - !tick_is_broadcast_device(dev)) { - BUG_ON(dev->mode != CLOCK_EVT_MODE_UNUSED); - list_del(&dev->list); - } } - break; - default: - break; } raw_spin_unlock_irqrestore(&clockevents_lock, flags); - return ret; } -EXPORT_SYMBOL_GPL(clockevents_notify); +#endif #ifdef CONFIG_SYSFS struct bus_type clockevents_subsys = { @@ -727,5 +794,3 @@ static int __init clockevents_init_sysfs(void) } device_initcall(clockevents_init_sysfs); #endif /* SYSFS */ - -#endif /* GENERIC_CLOCK_EVENTS */ diff --git a/kernel/time/clocksource.c b/kernel/time/clocksource.c index 4892352f0e49..15facb1b9c60 100644 --- a/kernel/time/clocksource.c +++ b/kernel/time/clocksource.c @@ -142,13 +142,6 @@ static void __clocksource_unstable(struct clocksource *cs) schedule_work(&watchdog_work); } -static void clocksource_unstable(struct clocksource *cs, int64_t delta) -{ - printk(KERN_WARNING "Clocksource %s unstable (delta = %Ld ns)\n", - cs->name, delta); - __clocksource_unstable(cs); -} - /** * clocksource_mark_unstable - mark clocksource unstable via watchdog * @cs: clocksource to be marked unstable @@ -174,7 +167,7 @@ void clocksource_mark_unstable(struct clocksource *cs) static void clocksource_watchdog(unsigned long data) { struct clocksource *cs; - cycle_t csnow, wdnow, delta; + cycle_t csnow, wdnow, cslast, wdlast, delta; int64_t wd_nsec, cs_nsec; int next_cpu, reset_pending; @@ -213,6 +206,8 @@ static void clocksource_watchdog(unsigned long data) delta = clocksource_delta(csnow, cs->cs_last, cs->mask); cs_nsec = clocksource_cyc2ns(delta, cs->mult, cs->shift); + wdlast = cs->wd_last; /* save these in case we print them */ + cslast = cs->cs_last; cs->cs_last = csnow; cs->wd_last = wdnow; @@ -221,7 +216,12 @@ static void clocksource_watchdog(unsigned long data) /* Check the deviation from the watchdog clocksource. */ if ((abs(cs_nsec - wd_nsec) > WATCHDOG_THRESHOLD)) { - clocksource_unstable(cs, cs_nsec - wd_nsec); + pr_warn("timekeeping watchdog: Marking clocksource '%s' as unstable, because the skew is too large:\n", cs->name); + pr_warn(" '%s' wd_now: %llx wd_last: %llx mask: %llx\n", + watchdog->name, wdnow, wdlast, watchdog->mask); + pr_warn(" '%s' cs_now: %llx cs_last: %llx mask: %llx\n", + cs->name, csnow, cslast, cs->mask); + __clocksource_unstable(cs); continue; } @@ -469,26 +469,25 @@ static u32 clocksource_max_adjustment(struct clocksource *cs) * @shift: cycle to nanosecond divisor (power of two) * @maxadj: maximum adjustment value to mult (~11%) * @mask: bitmask for two's complement subtraction of non 64 bit counters + * @max_cyc: maximum cycle value before potential overflow (does not include + * any safety margin) + * + * NOTE: This function includes a safety margin of 50%, in other words, we + * return half the number of nanoseconds the hardware counter can technically + * cover. This is done so that we can potentially detect problems caused by + * delayed timers or bad hardware, which might result in time intervals that + * are larger then what the math used can handle without overflows. */ -u64 clocks_calc_max_nsecs(u32 mult, u32 shift, u32 maxadj, u64 mask) +u64 clocks_calc_max_nsecs(u32 mult, u32 shift, u32 maxadj, u64 mask, u64 *max_cyc) { u64 max_nsecs, max_cycles; /* * Calculate the maximum number of cycles that we can pass to the - * cyc2ns function without overflowing a 64-bit signed result. The - * maximum number of cycles is equal to ULLONG_MAX/(mult+maxadj) - * which is equivalent to the below. - * max_cycles < (2^63)/(mult + maxadj) - * max_cycles < 2^(log2((2^63)/(mult + maxadj))) - * max_cycles < 2^(log2(2^63) - log2(mult + maxadj)) - * max_cycles < 2^(63 - log2(mult + maxadj)) - * max_cycles < 1 << (63 - log2(mult + maxadj)) - * Please note that we add 1 to the result of the log2 to account for - * any rounding errors, ensure the above inequality is satisfied and - * no overflow will occur. + * cyc2ns() function without overflowing a 64-bit result. */ - max_cycles = 1ULL << (63 - (ilog2(mult + maxadj) + 1)); + max_cycles = ULLONG_MAX; + do_div(max_cycles, mult+maxadj); /* * The actual maximum number of cycles we can defer the clocksource is @@ -499,27 +498,26 @@ u64 clocks_calc_max_nsecs(u32 mult, u32 shift, u32 maxadj, u64 mask) max_cycles = min(max_cycles, mask); max_nsecs = clocksource_cyc2ns(max_cycles, mult - maxadj, shift); + /* return the max_cycles value as well if requested */ + if (max_cyc) + *max_cyc = max_cycles; + + /* Return 50% of the actual maximum, so we can detect bad values */ + max_nsecs >>= 1; + return max_nsecs; } /** - * clocksource_max_deferment - Returns max time the clocksource can be deferred - * @cs: Pointer to clocksource + * clocksource_update_max_deferment - Updates the clocksource max_idle_ns & max_cycles + * @cs: Pointer to clocksource to be updated * */ -static u64 clocksource_max_deferment(struct clocksource *cs) +static inline void clocksource_update_max_deferment(struct clocksource *cs) { - u64 max_nsecs; - - max_nsecs = clocks_calc_max_nsecs(cs->mult, cs->shift, cs->maxadj, - cs->mask); - /* - * To ensure that the clocksource does not wrap whilst we are idle, - * limit the time the clocksource can be deferred by 12.5%. Please - * note a margin of 12.5% is used because this can be computed with - * a shift, versus say 10% which would require division. - */ - return max_nsecs - (max_nsecs >> 3); + cs->max_idle_ns = clocks_calc_max_nsecs(cs->mult, cs->shift, + cs->maxadj, cs->mask, + &cs->max_cycles); } #ifndef CONFIG_ARCH_USES_GETTIMEOFFSET @@ -648,7 +646,7 @@ static void clocksource_enqueue(struct clocksource *cs) } /** - * __clocksource_updatefreq_scale - Used update clocksource with new freq + * __clocksource_update_freq_scale - Used update clocksource with new freq * @cs: clocksource to be registered * @scale: Scale factor multiplied against freq to get clocksource hz * @freq: clocksource frequency (cycles per second) divided by scale @@ -656,48 +654,64 @@ static void clocksource_enqueue(struct clocksource *cs) * This should only be called from the clocksource->enable() method. * * This *SHOULD NOT* be called directly! Please use the - * clocksource_updatefreq_hz() or clocksource_updatefreq_khz helper functions. + * __clocksource_update_freq_hz() or __clocksource_update_freq_khz() helper + * functions. */ -void __clocksource_updatefreq_scale(struct clocksource *cs, u32 scale, u32 freq) +void __clocksource_update_freq_scale(struct clocksource *cs, u32 scale, u32 freq) { u64 sec; + /* - * Calc the maximum number of seconds which we can run before - * wrapping around. For clocksources which have a mask > 32bit - * we need to limit the max sleep time to have a good - * conversion precision. 10 minutes is still a reasonable - * amount. That results in a shift value of 24 for a - * clocksource with mask >= 40bit and f >= 4GHz. That maps to - * ~ 0.06ppm granularity for NTP. We apply the same 12.5% - * margin as we do in clocksource_max_deferment() + * Default clocksources are *special* and self-define their mult/shift. + * But, you're not special, so you should specify a freq value. */ - sec = (cs->mask - (cs->mask >> 3)); - do_div(sec, freq); - do_div(sec, scale); - if (!sec) - sec = 1; - else if (sec > 600 && cs->mask > UINT_MAX) - sec = 600; - - clocks_calc_mult_shift(&cs->mult, &cs->shift, freq, - NSEC_PER_SEC / scale, sec * scale); - + if (freq) { + /* + * Calc the maximum number of seconds which we can run before + * wrapping around. For clocksources which have a mask > 32-bit + * we need to limit the max sleep time to have a good + * conversion precision. 10 minutes is still a reasonable + * amount. That results in a shift value of 24 for a + * clocksource with mask >= 40-bit and f >= 4GHz. That maps to + * ~ 0.06ppm granularity for NTP. + */ + sec = cs->mask; + do_div(sec, freq); + do_div(sec, scale); + if (!sec) + sec = 1; + else if (sec > 600 && cs->mask > UINT_MAX) + sec = 600; + + clocks_calc_mult_shift(&cs->mult, &cs->shift, freq, + NSEC_PER_SEC / scale, sec * scale); + } /* - * for clocksources that have large mults, to avoid overflow. - * Since mult may be adjusted by ntp, add an safety extra margin - * + * Ensure clocksources that have large 'mult' values don't overflow + * when adjusted. */ cs->maxadj = clocksource_max_adjustment(cs); - while ((cs->mult + cs->maxadj < cs->mult) - || (cs->mult - cs->maxadj > cs->mult)) { + while (freq && ((cs->mult + cs->maxadj < cs->mult) + || (cs->mult - cs->maxadj > cs->mult))) { cs->mult >>= 1; cs->shift--; cs->maxadj = clocksource_max_adjustment(cs); } - cs->max_idle_ns = clocksource_max_deferment(cs); + /* + * Only warn for *special* clocksources that self-define + * their mult/shift values and don't specify a freq. + */ + WARN_ONCE(cs->mult + cs->maxadj < cs->mult, + "timekeeping: Clocksource %s might overflow on 11%% adjustment\n", + cs->name); + + clocksource_update_max_deferment(cs); + + pr_info("clocksource %s: mask: 0x%llx max_cycles: 0x%llx, max_idle_ns: %lld ns\n", + cs->name, cs->mask, cs->max_cycles, cs->max_idle_ns); } -EXPORT_SYMBOL_GPL(__clocksource_updatefreq_scale); +EXPORT_SYMBOL_GPL(__clocksource_update_freq_scale); /** * __clocksource_register_scale - Used to install new clocksources @@ -714,7 +728,7 @@ int __clocksource_register_scale(struct clocksource *cs, u32 scale, u32 freq) { /* Initialize mult/shift and max_idle_ns */ - __clocksource_updatefreq_scale(cs, scale, freq); + __clocksource_update_freq_scale(cs, scale, freq); /* Add clocksource to the clocksource list */ mutex_lock(&clocksource_mutex); @@ -726,33 +740,6 @@ int __clocksource_register_scale(struct clocksource *cs, u32 scale, u32 freq) } EXPORT_SYMBOL_GPL(__clocksource_register_scale); - -/** - * clocksource_register - Used to install new clocksources - * @cs: clocksource to be registered - * - * Returns -EBUSY if registration fails, zero otherwise. - */ -int clocksource_register(struct clocksource *cs) -{ - /* calculate max adjustment for given mult/shift */ - cs->maxadj = clocksource_max_adjustment(cs); - WARN_ONCE(cs->mult + cs->maxadj < cs->mult, - "Clocksource %s might overflow on 11%% adjustment\n", - cs->name); - - /* calculate max idle time permitted for this clocksource */ - cs->max_idle_ns = clocksource_max_deferment(cs); - - mutex_lock(&clocksource_mutex); - clocksource_enqueue(cs); - clocksource_enqueue_watchdog(cs); - clocksource_select(); - mutex_unlock(&clocksource_mutex); - return 0; -} -EXPORT_SYMBOL(clocksource_register); - static void __clocksource_change_rating(struct clocksource *cs, int rating) { list_del(&cs->list); diff --git a/kernel/time/hrtimer.c b/kernel/time/hrtimer.c index bee0c1f78091..76d4bd962b19 100644 --- a/kernel/time/hrtimer.c +++ b/kernel/time/hrtimer.c @@ -54,7 +54,7 @@ #include <trace/events/timer.h> -#include "timekeeping.h" +#include "tick-internal.h" /* * The timer bases: @@ -1707,17 +1707,10 @@ static int hrtimer_cpu_notify(struct notifier_block *self, break; #ifdef CONFIG_HOTPLUG_CPU - case CPU_DYING: - case CPU_DYING_FROZEN: - clockevents_notify(CLOCK_EVT_NOTIFY_CPU_DYING, &scpu); - break; case CPU_DEAD: case CPU_DEAD_FROZEN: - { - clockevents_notify(CLOCK_EVT_NOTIFY_CPU_DEAD, &scpu); migrate_hrtimers(scpu); break; - } #endif default: diff --git a/kernel/time/jiffies.c b/kernel/time/jiffies.c index a6a5bf53e86d..347fecf86a3f 100644 --- a/kernel/time/jiffies.c +++ b/kernel/time/jiffies.c @@ -25,7 +25,7 @@ #include <linux/module.h> #include <linux/init.h> -#include "tick-internal.h" +#include "timekeeping.h" /* The Jiffies based clocksource is the lowest common * denominator clock source which should function on @@ -71,6 +71,7 @@ static struct clocksource clocksource_jiffies = { .mask = 0xffffffff, /*32bits*/ .mult = NSEC_PER_JIFFY << JIFFIES_SHIFT, /* details above */ .shift = JIFFIES_SHIFT, + .max_cycles = 10, }; __cacheline_aligned_in_smp DEFINE_SEQLOCK(jiffies_lock); @@ -94,7 +95,7 @@ EXPORT_SYMBOL(jiffies); static int __init init_jiffies_clocksource(void) { - return clocksource_register(&clocksource_jiffies); + return __clocksource_register(&clocksource_jiffies); } core_initcall(init_jiffies_clocksource); @@ -130,6 +131,6 @@ int register_refined_jiffies(long cycles_per_second) refined_jiffies.mult = ((u32)nsec_per_tick) << JIFFIES_SHIFT; - clocksource_register(&refined_jiffies); + __clocksource_register(&refined_jiffies); return 0; } diff --git a/kernel/time/ntp.c b/kernel/time/ntp.c index 0f60b08a4f07..7a681003001c 100644 --- a/kernel/time/ntp.c +++ b/kernel/time/ntp.c @@ -17,7 +17,6 @@ #include <linux/module.h> #include <linux/rtc.h> -#include "tick-internal.h" #include "ntp_internal.h" /* @@ -459,6 +458,16 @@ out: return leap; } +#ifdef CONFIG_GENERIC_CMOS_UPDATE +int __weak update_persistent_clock64(struct timespec64 now64) +{ + struct timespec now; + + now = timespec64_to_timespec(now64); + return update_persistent_clock(now); +} +#endif + #if defined(CONFIG_GENERIC_CMOS_UPDATE) || defined(CONFIG_RTC_SYSTOHC) static void sync_cmos_clock(struct work_struct *work); @@ -494,8 +503,9 @@ static void sync_cmos_clock(struct work_struct *work) if (persistent_clock_is_local) adjust.tv_sec -= (sys_tz.tz_minuteswest * 60); #ifdef CONFIG_GENERIC_CMOS_UPDATE - fail = update_persistent_clock(timespec64_to_timespec(adjust)); + fail = update_persistent_clock64(adjust); #endif + #ifdef CONFIG_RTC_SYSTOHC if (fail == -ENODEV) fail = rtc_set_ntp_time(adjust); diff --git a/kernel/time/sched_clock.c b/kernel/time/sched_clock.c index 01d2d15aa662..a26036d37a38 100644 --- a/kernel/time/sched_clock.c +++ b/kernel/time/sched_clock.c @@ -1,5 +1,6 @@ /* - * sched_clock.c: support for extending counters to full 64-bit ns counter + * sched_clock.c: Generic sched_clock() support, to extend low level + * hardware time counters to full 64-bit ns values. * * This program is free software; you can redistribute it and/or modify * it under the terms of the GNU General Public License version 2 as @@ -18,15 +19,53 @@ #include <linux/seqlock.h> #include <linux/bitops.h> -struct clock_data { - ktime_t wrap_kt; +/** + * struct clock_read_data - data required to read from sched_clock() + * + * @epoch_ns: sched_clock() value at last update + * @epoch_cyc: Clock cycle value at last update. + * @sched_clock_mask: Bitmask for two's complement subtraction of non 64bit + * clocks. + * @read_sched_clock: Current clock source (or dummy source when suspended). + * @mult: Multipler for scaled math conversion. + * @shift: Shift value for scaled math conversion. + * + * Care must be taken when updating this structure; it is read by + * some very hot code paths. It occupies <=40 bytes and, when combined + * with the seqcount used to synchronize access, comfortably fits into + * a 64 byte cache line. + */ +struct clock_read_data { u64 epoch_ns; u64 epoch_cyc; - seqcount_t seq; - unsigned long rate; + u64 sched_clock_mask; + u64 (*read_sched_clock)(void); u32 mult; u32 shift; - bool suspended; +}; + +/** + * struct clock_data - all data needed for sched_clock() (including + * registration of a new clock source) + * + * @seq: Sequence counter for protecting updates. The lowest + * bit is the index for @read_data. + * @read_data: Data required to read from sched_clock. + * @wrap_kt: Duration for which clock can run before wrapping. + * @rate: Tick rate of the registered clock. + * @actual_read_sched_clock: Registered hardware level clock read function. + * + * The ordering of this structure has been chosen to optimize cache + * performance. In particular 'seq' and 'read_data[0]' (combined) should fit + * into a single 64-byte cache line. + */ +struct clock_data { + seqcount_t seq; + struct clock_read_data read_data[2]; + ktime_t wrap_kt; + unsigned long rate; + + u64 (*actual_read_sched_clock)(void); }; static struct hrtimer sched_clock_timer; @@ -34,12 +73,6 @@ static int irqtime = -1; core_param(irqtime, irqtime, int, 0400); -static struct clock_data cd = { - .mult = NSEC_PER_SEC / HZ, -}; - -static u64 __read_mostly sched_clock_mask; - static u64 notrace jiffy_sched_clock_read(void) { /* @@ -49,7 +82,11 @@ static u64 notrace jiffy_sched_clock_read(void) return (u64)(jiffies - INITIAL_JIFFIES); } -static u64 __read_mostly (*read_sched_clock)(void) = jiffy_sched_clock_read; +static struct clock_data cd ____cacheline_aligned = { + .read_data[0] = { .mult = NSEC_PER_SEC / HZ, + .read_sched_clock = jiffy_sched_clock_read, }, + .actual_read_sched_clock = jiffy_sched_clock_read, +}; static inline u64 notrace cyc_to_ns(u64 cyc, u32 mult, u32 shift) { @@ -58,111 +95,136 @@ static inline u64 notrace cyc_to_ns(u64 cyc, u32 mult, u32 shift) unsigned long long notrace sched_clock(void) { - u64 epoch_ns; - u64 epoch_cyc; - u64 cyc; + u64 cyc, res; unsigned long seq; - - if (cd.suspended) - return cd.epoch_ns; + struct clock_read_data *rd; do { - seq = raw_read_seqcount_begin(&cd.seq); - epoch_cyc = cd.epoch_cyc; - epoch_ns = cd.epoch_ns; + seq = raw_read_seqcount(&cd.seq); + rd = cd.read_data + (seq & 1); + + cyc = (rd->read_sched_clock() - rd->epoch_cyc) & + rd->sched_clock_mask; + res = rd->epoch_ns + cyc_to_ns(cyc, rd->mult, rd->shift); } while (read_seqcount_retry(&cd.seq, seq)); - cyc = read_sched_clock(); - cyc = (cyc - epoch_cyc) & sched_clock_mask; - return epoch_ns + cyc_to_ns(cyc, cd.mult, cd.shift); + return res; +} + +/* + * Updating the data required to read the clock. + * + * sched_clock() will never observe mis-matched data even if called from + * an NMI. We do this by maintaining an odd/even copy of the data and + * steering sched_clock() to one or the other using a sequence counter. + * In order to preserve the data cache profile of sched_clock() as much + * as possible the system reverts back to the even copy when the update + * completes; the odd copy is used *only* during an update. + */ +static void update_clock_read_data(struct clock_read_data *rd) +{ + /* update the backup (odd) copy with the new data */ + cd.read_data[1] = *rd; + + /* steer readers towards the odd copy */ + raw_write_seqcount_latch(&cd.seq); + + /* now its safe for us to update the normal (even) copy */ + cd.read_data[0] = *rd; + + /* switch readers back to the even copy */ + raw_write_seqcount_latch(&cd.seq); } /* - * Atomically update the sched_clock epoch. + * Atomically update the sched_clock() epoch. */ -static void notrace update_sched_clock(void) +static void update_sched_clock(void) { - unsigned long flags; u64 cyc; u64 ns; + struct clock_read_data rd; + + rd = cd.read_data[0]; + + cyc = cd.actual_read_sched_clock(); + ns = rd.epoch_ns + cyc_to_ns((cyc - rd.epoch_cyc) & rd.sched_clock_mask, rd.mult, rd.shift); + + rd.epoch_ns = ns; + rd.epoch_cyc = cyc; - cyc = read_sched_clock(); - ns = cd.epoch_ns + - cyc_to_ns((cyc - cd.epoch_cyc) & sched_clock_mask, - cd.mult, cd.shift); - - raw_local_irq_save(flags); - raw_write_seqcount_begin(&cd.seq); - cd.epoch_ns = ns; - cd.epoch_cyc = cyc; - raw_write_seqcount_end(&cd.seq); - raw_local_irq_restore(flags); + update_clock_read_data(&rd); } static enum hrtimer_restart sched_clock_poll(struct hrtimer *hrt) { update_sched_clock(); hrtimer_forward_now(hrt, cd.wrap_kt); + return HRTIMER_RESTART; } -void __init sched_clock_register(u64 (*read)(void), int bits, - unsigned long rate) +void __init +sched_clock_register(u64 (*read)(void), int bits, unsigned long rate) { u64 res, wrap, new_mask, new_epoch, cyc, ns; u32 new_mult, new_shift; - ktime_t new_wrap_kt; unsigned long r; char r_unit; + struct clock_read_data rd; if (cd.rate > rate) return; WARN_ON(!irqs_disabled()); - /* calculate the mult/shift to convert counter ticks to ns. */ + /* Calculate the mult/shift to convert counter ticks to ns. */ clocks_calc_mult_shift(&new_mult, &new_shift, rate, NSEC_PER_SEC, 3600); new_mask = CLOCKSOURCE_MASK(bits); + cd.rate = rate; + + /* Calculate how many nanosecs until we risk wrapping */ + wrap = clocks_calc_max_nsecs(new_mult, new_shift, 0, new_mask, NULL); + cd.wrap_kt = ns_to_ktime(wrap); - /* calculate how many ns until we wrap */ - wrap = clocks_calc_max_nsecs(new_mult, new_shift, 0, new_mask); - new_wrap_kt = ns_to_ktime(wrap - (wrap >> 3)); + rd = cd.read_data[0]; - /* update epoch for new counter and update epoch_ns from old counter*/ + /* Update epoch for new counter and update 'epoch_ns' from old counter*/ new_epoch = read(); - cyc = read_sched_clock(); - ns = cd.epoch_ns + cyc_to_ns((cyc - cd.epoch_cyc) & sched_clock_mask, - cd.mult, cd.shift); + cyc = cd.actual_read_sched_clock(); + ns = rd.epoch_ns + cyc_to_ns((cyc - rd.epoch_cyc) & rd.sched_clock_mask, rd.mult, rd.shift); + cd.actual_read_sched_clock = read; - raw_write_seqcount_begin(&cd.seq); - read_sched_clock = read; - sched_clock_mask = new_mask; - cd.rate = rate; - cd.wrap_kt = new_wrap_kt; - cd.mult = new_mult; - cd.shift = new_shift; - cd.epoch_cyc = new_epoch; - cd.epoch_ns = ns; - raw_write_seqcount_end(&cd.seq); + rd.read_sched_clock = read; + rd.sched_clock_mask = new_mask; + rd.mult = new_mult; + rd.shift = new_shift; + rd.epoch_cyc = new_epoch; + rd.epoch_ns = ns; + + update_clock_read_data(&rd); r = rate; if (r >= 4000000) { r /= 1000000; r_unit = 'M'; - } else if (r >= 1000) { - r /= 1000; - r_unit = 'k'; - } else - r_unit = ' '; - - /* calculate the ns resolution of this counter */ + } else { + if (r >= 1000) { + r /= 1000; + r_unit = 'k'; + } else { + r_unit = ' '; + } + } + + /* Calculate the ns resolution of this counter */ res = cyc_to_ns(1ULL, new_mult, new_shift); pr_info("sched_clock: %u bits at %lu%cHz, resolution %lluns, wraps every %lluns\n", bits, r, r_unit, res, wrap); - /* Enable IRQ time accounting if we have a fast enough sched_clock */ + /* Enable IRQ time accounting if we have a fast enough sched_clock() */ if (irqtime > 0 || (irqtime == -1 && rate >= 1000000)) enable_sched_clock_irqtime(); @@ -172,10 +234,10 @@ void __init sched_clock_register(u64 (*read)(void), int bits, void __init sched_clock_postinit(void) { /* - * If no sched_clock function has been provided at that point, + * If no sched_clock() function has been provided at that point, * make it the final one one. */ - if (read_sched_clock == jiffy_sched_clock_read) + if (cd.actual_read_sched_clock == jiffy_sched_clock_read) sched_clock_register(jiffy_sched_clock_read, BITS_PER_LONG, HZ); update_sched_clock(); @@ -189,29 +251,53 @@ void __init sched_clock_postinit(void) hrtimer_start(&sched_clock_timer, cd.wrap_kt, HRTIMER_MODE_REL); } +/* + * Clock read function for use when the clock is suspended. + * + * This function makes it appear to sched_clock() as if the clock + * stopped counting at its last update. + * + * This function must only be called from the critical + * section in sched_clock(). It relies on the read_seqcount_retry() + * at the end of the critical section to be sure we observe the + * correct copy of 'epoch_cyc'. + */ +static u64 notrace suspended_sched_clock_read(void) +{ + unsigned long seq = raw_read_seqcount(&cd.seq); + + return cd.read_data[seq & 1].epoch_cyc; +} + static int sched_clock_suspend(void) { + struct clock_read_data *rd = &cd.read_data[0]; + update_sched_clock(); hrtimer_cancel(&sched_clock_timer); - cd.suspended = true; + rd->read_sched_clock = suspended_sched_clock_read; + return 0; } static void sched_clock_resume(void) { - cd.epoch_cyc = read_sched_clock(); + struct clock_read_data *rd = &cd.read_data[0]; + + rd->epoch_cyc = cd.actual_read_sched_clock(); hrtimer_start(&sched_clock_timer, cd.wrap_kt, HRTIMER_MODE_REL); - cd.suspended = false; + rd->read_sched_clock = cd.actual_read_sched_clock; } static struct syscore_ops sched_clock_ops = { - .suspend = sched_clock_suspend, - .resume = sched_clock_resume, + .suspend = sched_clock_suspend, + .resume = sched_clock_resume, }; static int __init sched_clock_syscore_init(void) { register_syscore_ops(&sched_clock_ops); + return 0; } device_initcall(sched_clock_syscore_init); diff --git a/kernel/time/tick-broadcast-hrtimer.c b/kernel/time/tick-broadcast-hrtimer.c index eb682d5c697c..6aac4beedbbe 100644 --- a/kernel/time/tick-broadcast-hrtimer.c +++ b/kernel/time/tick-broadcast-hrtimer.c @@ -49,6 +49,7 @@ static void bc_set_mode(enum clock_event_mode mode, */ static int bc_set_next(ktime_t expires, struct clock_event_device *bc) { + int bc_moved; /* * We try to cancel the timer first. If the callback is on * flight on some other cpu then we let it handle it. If we @@ -60,9 +61,15 @@ static int bc_set_next(ktime_t expires, struct clock_event_device *bc) * restart the timer because we are in the callback, but we * can set the expiry time and let the callback return * HRTIMER_RESTART. + * + * Since we are in the idle loop at this point and because + * hrtimer_{start/cancel} functions call into tracing, + * calls to these functions must be bound within RCU_NONIDLE. */ - if (hrtimer_try_to_cancel(&bctimer) >= 0) { - hrtimer_start(&bctimer, expires, HRTIMER_MODE_ABS_PINNED); + RCU_NONIDLE(bc_moved = (hrtimer_try_to_cancel(&bctimer) >= 0) ? + !hrtimer_start(&bctimer, expires, HRTIMER_MODE_ABS_PINNED) : + 0); + if (bc_moved) { /* Bind the "device" to the cpu */ bc->bound_on = smp_processor_id(); } else if (bc->bound_on == smp_processor_id()) { diff --git a/kernel/time/tick-broadcast.c b/kernel/time/tick-broadcast.c index 066f0ec05e48..7e8ca4f448a8 100644 --- a/kernel/time/tick-broadcast.c +++ b/kernel/time/tick-broadcast.c @@ -33,12 +33,14 @@ static cpumask_var_t tick_broadcast_mask; static cpumask_var_t tick_broadcast_on; static cpumask_var_t tmpmask; static DEFINE_RAW_SPINLOCK(tick_broadcast_lock); -static int tick_broadcast_force; +static int tick_broadcast_forced; #ifdef CONFIG_TICK_ONESHOT static void tick_broadcast_clear_oneshot(int cpu); +static void tick_resume_broadcast_oneshot(struct clock_event_device *bc); #else static inline void tick_broadcast_clear_oneshot(int cpu) { } +static inline void tick_resume_broadcast_oneshot(struct clock_event_device *bc) { } #endif /* @@ -303,7 +305,7 @@ static void tick_handle_periodic_broadcast(struct clock_event_device *dev) /* * The device is in periodic mode. No reprogramming necessary: */ - if (dev->mode == CLOCK_EVT_MODE_PERIODIC) + if (dev->state == CLOCK_EVT_STATE_PERIODIC) goto unlock; /* @@ -324,49 +326,54 @@ unlock: raw_spin_unlock(&tick_broadcast_lock); } -/* - * Powerstate information: The system enters/leaves a state, where - * affected devices might stop +/** + * tick_broadcast_control - Enable/disable or force broadcast mode + * @mode: The selected broadcast mode + * + * Called when the system enters a state where affected tick devices + * might stop. Note: TICK_BROADCAST_FORCE cannot be undone. + * + * Called with interrupts disabled, so clockevents_lock is not + * required here because the local clock event device cannot go away + * under us. */ -static void tick_do_broadcast_on_off(unsigned long *reason) +void tick_broadcast_control(enum tick_broadcast_mode mode) { struct clock_event_device *bc, *dev; struct tick_device *td; - unsigned long flags; int cpu, bc_stopped; - raw_spin_lock_irqsave(&tick_broadcast_lock, flags); - - cpu = smp_processor_id(); - td = &per_cpu(tick_cpu_device, cpu); + td = this_cpu_ptr(&tick_cpu_device); dev = td->evtdev; - bc = tick_broadcast_device.evtdev; /* * Is the device not affected by the powerstate ? */ if (!dev || !(dev->features & CLOCK_EVT_FEAT_C3STOP)) - goto out; + return; if (!tick_device_is_functional(dev)) - goto out; + return; + raw_spin_lock(&tick_broadcast_lock); + cpu = smp_processor_id(); + bc = tick_broadcast_device.evtdev; bc_stopped = cpumask_empty(tick_broadcast_mask); - switch (*reason) { - case CLOCK_EVT_NOTIFY_BROADCAST_ON: - case CLOCK_EVT_NOTIFY_BROADCAST_FORCE: + switch (mode) { + case TICK_BROADCAST_FORCE: + tick_broadcast_forced = 1; + case TICK_BROADCAST_ON: cpumask_set_cpu(cpu, tick_broadcast_on); if (!cpumask_test_and_set_cpu(cpu, tick_broadcast_mask)) { if (tick_broadcast_device.mode == TICKDEV_MODE_PERIODIC) clockevents_shutdown(dev); } - if (*reason == CLOCK_EVT_NOTIFY_BROADCAST_FORCE) - tick_broadcast_force = 1; break; - case CLOCK_EVT_NOTIFY_BROADCAST_OFF: - if (tick_broadcast_force) + + case TICK_BROADCAST_OFF: + if (tick_broadcast_forced) break; cpumask_clear_cpu(cpu, tick_broadcast_on); if (!tick_device_is_functional(dev)) @@ -388,22 +395,9 @@ static void tick_do_broadcast_on_off(unsigned long *reason) else tick_broadcast_setup_oneshot(bc); } -out: - raw_spin_unlock_irqrestore(&tick_broadcast_lock, flags); -} - -/* - * Powerstate information: The system enters/leaves a state, where - * affected devices might stop. - */ -void tick_broadcast_on_off(unsigned long reason, int *oncpu) -{ - if (!cpumask_test_cpu(*oncpu, cpu_online_mask)) - printk(KERN_ERR "tick-broadcast: ignoring broadcast for " - "offline CPU #%d\n", *oncpu); - else - tick_do_broadcast_on_off(&reason); + raw_spin_unlock(&tick_broadcast_lock); } +EXPORT_SYMBOL_GPL(tick_broadcast_control); /* * Set the periodic handler depending on broadcast on/off @@ -416,14 +410,14 @@ void tick_set_periodic_handler(struct clock_event_device *dev, int broadcast) dev->event_handler = tick_handle_periodic_broadcast; } +#ifdef CONFIG_HOTPLUG_CPU /* * Remove a CPU from broadcasting */ -void tick_shutdown_broadcast(unsigned int *cpup) +void tick_shutdown_broadcast(unsigned int cpu) { struct clock_event_device *bc; unsigned long flags; - unsigned int cpu = *cpup; raw_spin_lock_irqsave(&tick_broadcast_lock, flags); @@ -438,6 +432,7 @@ void tick_shutdown_broadcast(unsigned int *cpup) raw_spin_unlock_irqrestore(&tick_broadcast_lock, flags); } +#endif void tick_suspend_broadcast(void) { @@ -453,38 +448,48 @@ void tick_suspend_broadcast(void) raw_spin_unlock_irqrestore(&tick_broadcast_lock, flags); } -int tick_resume_broadcast(void) +/* + * This is called from tick_resume_local() on a resuming CPU. That's + * called from the core resume function, tick_unfreeze() and the magic XEN + * resume hackery. + * + * In none of these cases the broadcast device mode can change and the + * bit of the resuming CPU in the broadcast mask is safe as well. + */ +bool tick_resume_check_broadcast(void) +{ + if (tick_broadcast_device.mode == TICKDEV_MODE_ONESHOT) + return false; + else + return cpumask_test_cpu(smp_processor_id(), tick_broadcast_mask); +} + +void tick_resume_broadcast(void) { struct clock_event_device *bc; unsigned long flags; - int broadcast = 0; raw_spin_lock_irqsave(&tick_broadcast_lock, flags); bc = tick_broadcast_device.evtdev; if (bc) { - clockevents_set_mode(bc, CLOCK_EVT_MODE_RESUME); + clockevents_tick_resume(bc); switch (tick_broadcast_device.mode) { case TICKDEV_MODE_PERIODIC: if (!cpumask_empty(tick_broadcast_mask)) tick_broadcast_start_periodic(bc); - broadcast = cpumask_test_cpu(smp_processor_id(), - tick_broadcast_mask); break; case TICKDEV_MODE_ONESHOT: if (!cpumask_empty(tick_broadcast_mask)) - broadcast = tick_resume_broadcast_oneshot(bc); + tick_resume_broadcast_oneshot(bc); break; } } raw_spin_unlock_irqrestore(&tick_broadcast_lock, flags); - - return broadcast; } - #ifdef CONFIG_TICK_ONESHOT static cpumask_var_t tick_broadcast_oneshot_mask; @@ -532,8 +537,8 @@ static int tick_broadcast_set_event(struct clock_event_device *bc, int cpu, { int ret; - if (bc->mode != CLOCK_EVT_MODE_ONESHOT) - clockevents_set_mode(bc, CLOCK_EVT_MODE_ONESHOT); + if (bc->state != CLOCK_EVT_STATE_ONESHOT) + clockevents_set_state(bc, CLOCK_EVT_STATE_ONESHOT); ret = clockevents_program_event(bc, expires, force); if (!ret) @@ -541,10 +546,9 @@ static int tick_broadcast_set_event(struct clock_event_device *bc, int cpu, return ret; } -int tick_resume_broadcast_oneshot(struct clock_event_device *bc) +static void tick_resume_broadcast_oneshot(struct clock_event_device *bc) { - clockevents_set_mode(bc, CLOCK_EVT_MODE_ONESHOT); - return 0; + clockevents_set_state(bc, CLOCK_EVT_STATE_ONESHOT); } /* @@ -562,8 +566,8 @@ void tick_check_oneshot_broadcast_this_cpu(void) * switched over, leave the device alone. */ if (td->mode == TICKDEV_MODE_ONESHOT) { - clockevents_set_mode(td->evtdev, - CLOCK_EVT_MODE_ONESHOT); + clockevents_set_state(td->evtdev, + CLOCK_EVT_STATE_ONESHOT); } } } @@ -666,31 +670,26 @@ static void broadcast_shutdown_local(struct clock_event_device *bc, if (dev->next_event.tv64 < bc->next_event.tv64) return; } - clockevents_set_mode(dev, CLOCK_EVT_MODE_SHUTDOWN); + clockevents_set_state(dev, CLOCK_EVT_STATE_SHUTDOWN); } -static void broadcast_move_bc(int deadcpu) -{ - struct clock_event_device *bc = tick_broadcast_device.evtdev; - - if (!bc || !broadcast_needs_cpu(bc, deadcpu)) - return; - /* This moves the broadcast assignment to this cpu */ - clockevents_program_event(bc, bc->next_event, 1); -} - -/* - * Powerstate information: The system enters/leaves a state, where - * affected devices might stop +/** + * tick_broadcast_oneshot_control - Enter/exit broadcast oneshot mode + * @state: The target state (enter/exit) + * + * The system enters/leaves a state, where affected devices might stop * Returns 0 on success, -EBUSY if the cpu is used to broadcast wakeups. + * + * Called with interrupts disabled, so clockevents_lock is not + * required here because the local clock event device cannot go away + * under us. */ -int tick_broadcast_oneshot_control(unsigned long reason) +int tick_broadcast_oneshot_control(enum tick_broadcast_state state) { struct clock_event_device *bc, *dev; struct tick_device *td; - unsigned long flags; - ktime_t now; int cpu, ret = 0; + ktime_t now; /* * Periodic mode does not care about the enter/exit of power @@ -703,17 +702,17 @@ int tick_broadcast_oneshot_control(unsigned long reason) * We are called with preemtion disabled from the depth of the * idle code, so we can't be moved away. */ - cpu = smp_processor_id(); - td = &per_cpu(tick_cpu_device, cpu); + td = this_cpu_ptr(&tick_cpu_device); dev = td->evtdev; if (!(dev->features & CLOCK_EVT_FEAT_C3STOP)) return 0; + raw_spin_lock(&tick_broadcast_lock); bc = tick_broadcast_device.evtdev; + cpu = smp_processor_id(); - raw_spin_lock_irqsave(&tick_broadcast_lock, flags); - if (reason == CLOCK_EVT_NOTIFY_BROADCAST_ENTER) { + if (state == TICK_BROADCAST_ENTER) { if (!cpumask_test_and_set_cpu(cpu, tick_broadcast_oneshot_mask)) { WARN_ON_ONCE(cpumask_test_cpu(cpu, tick_broadcast_pending_mask)); broadcast_shutdown_local(bc, dev); @@ -741,7 +740,7 @@ int tick_broadcast_oneshot_control(unsigned long reason) cpumask_clear_cpu(cpu, tick_broadcast_oneshot_mask); } else { if (cpumask_test_and_clear_cpu(cpu, tick_broadcast_oneshot_mask)) { - clockevents_set_mode(dev, CLOCK_EVT_MODE_ONESHOT); + clockevents_set_state(dev, CLOCK_EVT_STATE_ONESHOT); /* * The cpu which was handling the broadcast * timer marked this cpu in the broadcast @@ -805,9 +804,10 @@ int tick_broadcast_oneshot_control(unsigned long reason) } } out: - raw_spin_unlock_irqrestore(&tick_broadcast_lock, flags); + raw_spin_unlock(&tick_broadcast_lock); return ret; } +EXPORT_SYMBOL_GPL(tick_broadcast_oneshot_control); /* * Reset the one shot broadcast for a cpu @@ -842,7 +842,7 @@ void tick_broadcast_setup_oneshot(struct clock_event_device *bc) /* Set it up only once ! */ if (bc->event_handler != tick_handle_oneshot_broadcast) { - int was_periodic = bc->mode == CLOCK_EVT_MODE_PERIODIC; + int was_periodic = bc->state == CLOCK_EVT_STATE_PERIODIC; bc->event_handler = tick_handle_oneshot_broadcast; @@ -858,7 +858,7 @@ void tick_broadcast_setup_oneshot(struct clock_event_device *bc) tick_broadcast_oneshot_mask, tmpmask); if (was_periodic && !cpumask_empty(tmpmask)) { - clockevents_set_mode(bc, CLOCK_EVT_MODE_ONESHOT); + clockevents_set_state(bc, CLOCK_EVT_STATE_ONESHOT); tick_broadcast_init_next_event(tmpmask, tick_next_period); tick_broadcast_set_event(bc, cpu, tick_next_period, 1); @@ -894,14 +894,28 @@ void tick_broadcast_switch_to_oneshot(void) raw_spin_unlock_irqrestore(&tick_broadcast_lock, flags); } +#ifdef CONFIG_HOTPLUG_CPU +void hotplug_cpu__broadcast_tick_pull(int deadcpu) +{ + struct clock_event_device *bc; + unsigned long flags; + + raw_spin_lock_irqsave(&tick_broadcast_lock, flags); + bc = tick_broadcast_device.evtdev; + + if (bc && broadcast_needs_cpu(bc, deadcpu)) { + /* This moves the broadcast assignment to this CPU: */ + clockevents_program_event(bc, bc->next_event, 1); + } + raw_spin_unlock_irqrestore(&tick_broadcast_lock, flags); +} /* * Remove a dead CPU from broadcasting */ -void tick_shutdown_broadcast_oneshot(unsigned int *cpup) +void tick_shutdown_broadcast_oneshot(unsigned int cpu) { unsigned long flags; - unsigned int cpu = *cpup; raw_spin_lock_irqsave(&tick_broadcast_lock, flags); @@ -913,10 +927,9 @@ void tick_shutdown_broadcast_oneshot(unsigned int *cpup) cpumask_clear_cpu(cpu, tick_broadcast_pending_mask); cpumask_clear_cpu(cpu, tick_broadcast_force_mask); - broadcast_move_bc(cpu); - raw_spin_unlock_irqrestore(&tick_broadcast_lock, flags); } +#endif /* * Check, whether the broadcast device is in one shot mode diff --git a/kernel/time/tick-common.c b/kernel/time/tick-common.c index f7c515595b42..3ae6afa1eb98 100644 --- a/kernel/time/tick-common.c +++ b/kernel/time/tick-common.c @@ -102,7 +102,7 @@ void tick_handle_periodic(struct clock_event_device *dev) tick_periodic(cpu); - if (dev->mode != CLOCK_EVT_MODE_ONESHOT) + if (dev->state != CLOCK_EVT_STATE_ONESHOT) return; for (;;) { /* @@ -140,7 +140,7 @@ void tick_setup_periodic(struct clock_event_device *dev, int broadcast) if ((dev->features & CLOCK_EVT_FEAT_PERIODIC) && !tick_broadcast_oneshot_active()) { - clockevents_set_mode(dev, CLOCK_EVT_MODE_PERIODIC); + clockevents_set_state(dev, CLOCK_EVT_STATE_PERIODIC); } else { unsigned long seq; ktime_t next; @@ -150,7 +150,7 @@ void tick_setup_periodic(struct clock_event_device *dev, int broadcast) next = tick_next_period; } while (read_seqretry(&jiffies_lock, seq)); - clockevents_set_mode(dev, CLOCK_EVT_MODE_ONESHOT); + clockevents_set_state(dev, CLOCK_EVT_STATE_ONESHOT); for (;;) { if (!clockevents_program_event(dev, next, false)) @@ -332,14 +332,16 @@ out_bc: tick_install_broadcast_device(newdev); } +#ifdef CONFIG_HOTPLUG_CPU /* * Transfer the do_timer job away from a dying cpu. * - * Called with interrupts disabled. + * Called with interrupts disabled. Not locking required. If + * tick_do_timer_cpu is owned by this cpu, nothing can change it. */ -void tick_handover_do_timer(int *cpup) +void tick_handover_do_timer(void) { - if (*cpup == tick_do_timer_cpu) { + if (tick_do_timer_cpu == smp_processor_id()) { int cpu = cpumask_first(cpu_online_mask); tick_do_timer_cpu = (cpu < nr_cpu_ids) ? cpu : @@ -354,9 +356,9 @@ void tick_handover_do_timer(int *cpup) * access the hardware device itself. * We just set the mode and remove it from the lists. */ -void tick_shutdown(unsigned int *cpup) +void tick_shutdown(unsigned int cpu) { - struct tick_device *td = &per_cpu(tick_cpu_device, *cpup); + struct tick_device *td = &per_cpu(tick_cpu_device, cpu); struct clock_event_device *dev = td->evtdev; td->mode = TICKDEV_MODE_PERIODIC; @@ -365,27 +367,42 @@ void tick_shutdown(unsigned int *cpup) * Prevent that the clock events layer tries to call * the set mode function! */ + dev->state = CLOCK_EVT_STATE_DETACHED; dev->mode = CLOCK_EVT_MODE_UNUSED; clockevents_exchange_device(dev, NULL); dev->event_handler = clockevents_handle_noop; td->evtdev = NULL; } } +#endif -void tick_suspend(void) +/** + * tick_suspend_local - Suspend the local tick device + * + * Called from the local cpu for freeze with interrupts disabled. + * + * No locks required. Nothing can change the per cpu device. + */ +void tick_suspend_local(void) { struct tick_device *td = this_cpu_ptr(&tick_cpu_device); clockevents_shutdown(td->evtdev); } -void tick_resume(void) +/** + * tick_resume_local - Resume the local tick device + * + * Called from the local CPU for unfreeze or XEN resume magic. + * + * No locks required. Nothing can change the per cpu device. + */ +void tick_resume_local(void) { struct tick_device *td = this_cpu_ptr(&tick_cpu_device); - int broadcast = tick_resume_broadcast(); - - clockevents_set_mode(td->evtdev, CLOCK_EVT_MODE_RESUME); + bool broadcast = tick_resume_check_broadcast(); + clockevents_tick_resume(td->evtdev); if (!broadcast) { if (td->mode == TICKDEV_MODE_PERIODIC) tick_setup_periodic(td->evtdev, 0); @@ -394,6 +411,35 @@ void tick_resume(void) } } +/** + * tick_suspend - Suspend the tick and the broadcast device + * + * Called from syscore_suspend() via timekeeping_suspend with only one + * CPU online and interrupts disabled or from tick_unfreeze() under + * tick_freeze_lock. + * + * No locks required. Nothing can change the per cpu device. + */ +void tick_suspend(void) +{ + tick_suspend_local(); + tick_suspend_broadcast(); +} + +/** + * tick_resume - Resume the tick and the broadcast device + * + * Called from syscore_resume() via timekeeping_resume with only one + * CPU online and interrupts disabled. + * + * No locks required. Nothing can change the per cpu device. + */ +void tick_resume(void) +{ + tick_resume_broadcast(); + tick_resume_local(); +} + static DEFINE_RAW_SPINLOCK(tick_freeze_lock); static unsigned int tick_freeze_depth; @@ -411,12 +457,10 @@ void tick_freeze(void) raw_spin_lock(&tick_freeze_lock); tick_freeze_depth++; - if (tick_freeze_depth == num_online_cpus()) { + if (tick_freeze_depth == num_online_cpus()) timekeeping_suspend(); - } else { - tick_suspend(); - tick_suspend_broadcast(); - } + else + tick_suspend_local(); raw_spin_unlock(&tick_freeze_lock); } @@ -437,7 +481,7 @@ void tick_unfreeze(void) if (tick_freeze_depth == num_online_cpus()) timekeeping_resume(); else - tick_resume(); + tick_resume_local(); tick_freeze_depth--; diff --git a/kernel/time/tick-internal.h b/kernel/time/tick-internal.h index 366aeb4f2c66..b64fdd8054c5 100644 --- a/kernel/time/tick-internal.h +++ b/kernel/time/tick-internal.h @@ -5,15 +5,12 @@ #include <linux/tick.h> #include "timekeeping.h" +#include "tick-sched.h" -extern seqlock_t jiffies_lock; +#ifdef CONFIG_GENERIC_CLOCKEVENTS -#define CS_NAME_LEN 32 - -#ifdef CONFIG_GENERIC_CLOCKEVENTS_BUILD - -#define TICK_DO_TIMER_NONE -1 -#define TICK_DO_TIMER_BOOT -2 +# define TICK_DO_TIMER_NONE -1 +# define TICK_DO_TIMER_BOOT -2 DECLARE_PER_CPU(struct tick_device, tick_cpu_device); extern ktime_t tick_next_period; @@ -23,21 +20,72 @@ extern int tick_do_timer_cpu __read_mostly; extern void tick_setup_periodic(struct clock_event_device *dev, int broadcast); extern void tick_handle_periodic(struct clock_event_device *dev); extern void tick_check_new_device(struct clock_event_device *dev); -extern void tick_handover_do_timer(int *cpup); -extern void tick_shutdown(unsigned int *cpup); +extern void tick_shutdown(unsigned int cpu); extern void tick_suspend(void); extern void tick_resume(void); extern bool tick_check_replacement(struct clock_event_device *curdev, struct clock_event_device *newdev); extern void tick_install_replacement(struct clock_event_device *dev); +extern int tick_is_oneshot_available(void); +extern struct tick_device *tick_get_device(int cpu); -extern void clockevents_shutdown(struct clock_event_device *dev); +extern int clockevents_tick_resume(struct clock_event_device *dev); +/* Check, if the device is functional or a dummy for broadcast */ +static inline int tick_device_is_functional(struct clock_event_device *dev) +{ + return !(dev->features & CLOCK_EVT_FEAT_DUMMY); +} +extern void clockevents_shutdown(struct clock_event_device *dev); +extern void clockevents_exchange_device(struct clock_event_device *old, + struct clock_event_device *new); +extern void clockevents_set_state(struct clock_event_device *dev, + enum clock_event_state state); +extern int clockevents_program_event(struct clock_event_device *dev, + ktime_t expires, bool force); +extern void clockevents_handle_noop(struct clock_event_device *dev); +extern int __clockevents_update_freq(struct clock_event_device *dev, u32 freq); extern ssize_t sysfs_get_uname(const char *buf, char *dst, size_t cnt); -/* - * NO_HZ / high resolution timer shared code - */ +/* Broadcasting support */ +# ifdef CONFIG_GENERIC_CLOCKEVENTS_BROADCAST +extern int tick_device_uses_broadcast(struct clock_event_device *dev, int cpu); +extern void tick_install_broadcast_device(struct clock_event_device *dev); +extern int tick_is_broadcast_device(struct clock_event_device *dev); +extern void tick_shutdown_broadcast(unsigned int cpu); +extern void tick_suspend_broadcast(void); +extern void tick_resume_broadcast(void); +extern bool tick_resume_check_broadcast(void); +extern void tick_broadcast_init(void); +extern void tick_set_periodic_handler(struct clock_event_device *dev, int broadcast); +extern int tick_broadcast_update_freq(struct clock_event_device *dev, u32 freq); +extern struct tick_device *tick_get_broadcast_device(void); +extern struct cpumask *tick_get_broadcast_mask(void); +# else /* !CONFIG_GENERIC_CLOCKEVENTS_BROADCAST: */ +static inline void tick_install_broadcast_device(struct clock_event_device *dev) { } +static inline int tick_is_broadcast_device(struct clock_event_device *dev) { return 0; } +static inline int tick_device_uses_broadcast(struct clock_event_device *dev, int cpu) { return 0; } +static inline void tick_do_periodic_broadcast(struct clock_event_device *d) { } +static inline void tick_shutdown_broadcast(unsigned int cpu) { } +static inline void tick_suspend_broadcast(void) { } +static inline void tick_resume_broadcast(void) { } +static inline bool tick_resume_check_broadcast(void) { return false; } +static inline void tick_broadcast_init(void) { } +static inline int tick_broadcast_update_freq(struct clock_event_device *dev, u32 freq) { return -ENODEV; } + +/* Set the periodic handler in non broadcast mode */ +static inline void tick_set_periodic_handler(struct clock_event_device *dev, int broadcast) +{ + dev->event_handler = tick_handle_periodic; +} +# endif /* !CONFIG_GENERIC_CLOCKEVENTS_BROADCAST */ + +#else /* !GENERIC_CLOCKEVENTS: */ +static inline void tick_suspend(void) { } +static inline void tick_resume(void) { } +#endif /* !GENERIC_CLOCKEVENTS */ + +/* Oneshot related functions */ #ifdef CONFIG_TICK_ONESHOT extern void tick_setup_oneshot(struct clock_event_device *newdev, void (*handler)(struct clock_event_device *), @@ -46,58 +94,42 @@ extern int tick_program_event(ktime_t expires, int force); extern void tick_oneshot_notify(void); extern int tick_switch_to_oneshot(void (*handler)(struct clock_event_device *)); extern void tick_resume_oneshot(void); -# ifdef CONFIG_GENERIC_CLOCKEVENTS_BROADCAST +static inline bool tick_oneshot_possible(void) { return true; } +extern int tick_oneshot_mode_active(void); +extern void tick_clock_notify(void); +extern int tick_check_oneshot_change(int allow_nohz); +extern int tick_init_highres(void); +#else /* !CONFIG_TICK_ONESHOT: */ +static inline +void tick_setup_oneshot(struct clock_event_device *newdev, + void (*handler)(struct clock_event_device *), + ktime_t nextevt) { BUG(); } +static inline void tick_resume_oneshot(void) { BUG(); } +static inline int tick_program_event(ktime_t expires, int force) { return 0; } +static inline void tick_oneshot_notify(void) { } +static inline bool tick_oneshot_possible(void) { return false; } +static inline int tick_oneshot_mode_active(void) { return 0; } +static inline void tick_clock_notify(void) { } +static inline int tick_check_oneshot_change(int allow_nohz) { return 0; } +#endif /* !CONFIG_TICK_ONESHOT */ + +/* Functions related to oneshot broadcasting */ +#if defined(CONFIG_GENERIC_CLOCKEVENTS_BROADCAST) && defined(CONFIG_TICK_ONESHOT) extern void tick_broadcast_setup_oneshot(struct clock_event_device *bc); -extern int tick_broadcast_oneshot_control(unsigned long reason); extern void tick_broadcast_switch_to_oneshot(void); -extern void tick_shutdown_broadcast_oneshot(unsigned int *cpup); -extern int tick_resume_broadcast_oneshot(struct clock_event_device *bc); +extern void tick_shutdown_broadcast_oneshot(unsigned int cpu); extern int tick_broadcast_oneshot_active(void); extern void tick_check_oneshot_broadcast_this_cpu(void); bool tick_broadcast_oneshot_available(void); -# else /* BROADCAST */ -static inline void tick_broadcast_setup_oneshot(struct clock_event_device *bc) -{ - BUG(); -} -static inline int tick_broadcast_oneshot_control(unsigned long reason) { return 0; } +extern struct cpumask *tick_get_broadcast_oneshot_mask(void); +#else /* !(BROADCAST && ONESHOT): */ +static inline void tick_broadcast_setup_oneshot(struct clock_event_device *bc) { BUG(); } static inline void tick_broadcast_switch_to_oneshot(void) { } -static inline void tick_shutdown_broadcast_oneshot(unsigned int *cpup) { } +static inline void tick_shutdown_broadcast_oneshot(unsigned int cpu) { } static inline int tick_broadcast_oneshot_active(void) { return 0; } static inline void tick_check_oneshot_broadcast_this_cpu(void) { } -static inline bool tick_broadcast_oneshot_available(void) { return true; } -# endif /* !BROADCAST */ - -#else /* !ONESHOT */ -static inline -void tick_setup_oneshot(struct clock_event_device *newdev, - void (*handler)(struct clock_event_device *), - ktime_t nextevt) -{ - BUG(); -} -static inline void tick_resume_oneshot(void) -{ - BUG(); -} -static inline int tick_program_event(ktime_t expires, int force) -{ - return 0; -} -static inline void tick_oneshot_notify(void) { } -static inline void tick_broadcast_setup_oneshot(struct clock_event_device *bc) -{ - BUG(); -} -static inline int tick_broadcast_oneshot_control(unsigned long reason) { return 0; } -static inline void tick_shutdown_broadcast_oneshot(unsigned int *cpup) { } -static inline int tick_resume_broadcast_oneshot(struct clock_event_device *bc) -{ - return 0; -} -static inline int tick_broadcast_oneshot_active(void) { return 0; } -static inline bool tick_broadcast_oneshot_available(void) { return false; } -#endif /* !TICK_ONESHOT */ +static inline bool tick_broadcast_oneshot_available(void) { return tick_oneshot_possible(); } +#endif /* !(BROADCAST && ONESHOT) */ /* NO_HZ_FULL internal */ #ifdef CONFIG_NO_HZ_FULL @@ -105,68 +137,3 @@ extern void tick_nohz_init(void); # else static inline void tick_nohz_init(void) { } #endif - -/* - * Broadcasting support - */ -#ifdef CONFIG_GENERIC_CLOCKEVENTS_BROADCAST -extern int tick_device_uses_broadcast(struct clock_event_device *dev, int cpu); -extern void tick_install_broadcast_device(struct clock_event_device *dev); -extern int tick_is_broadcast_device(struct clock_event_device *dev); -extern void tick_broadcast_on_off(unsigned long reason, int *oncpu); -extern void tick_shutdown_broadcast(unsigned int *cpup); -extern void tick_suspend_broadcast(void); -extern int tick_resume_broadcast(void); -extern void tick_broadcast_init(void); -extern void -tick_set_periodic_handler(struct clock_event_device *dev, int broadcast); -int tick_broadcast_update_freq(struct clock_event_device *dev, u32 freq); - -#else /* !BROADCAST */ - -static inline void tick_install_broadcast_device(struct clock_event_device *dev) -{ -} - -static inline int tick_is_broadcast_device(struct clock_event_device *dev) -{ - return 0; -} -static inline int tick_device_uses_broadcast(struct clock_event_device *dev, - int cpu) -{ - return 0; -} -static inline void tick_do_periodic_broadcast(struct clock_event_device *d) { } -static inline void tick_broadcast_on_off(unsigned long reason, int *oncpu) { } -static inline void tick_shutdown_broadcast(unsigned int *cpup) { } -static inline void tick_suspend_broadcast(void) { } -static inline int tick_resume_broadcast(void) { return 0; } -static inline void tick_broadcast_init(void) { } -static inline int tick_broadcast_update_freq(struct clock_event_device *dev, - u32 freq) { return -ENODEV; } - -/* - * Set the periodic handler in non broadcast mode - */ -static inline void tick_set_periodic_handler(struct clock_event_device *dev, - int broadcast) -{ - dev->event_handler = tick_handle_periodic; -} -#endif /* !BROADCAST */ - -/* - * Check, if the device is functional or a dummy for broadcast - */ -static inline int tick_device_is_functional(struct clock_event_device *dev) -{ - return !(dev->features & CLOCK_EVT_FEAT_DUMMY); -} - -int __clockevents_update_freq(struct clock_event_device *dev, u32 freq); - -#endif - -extern void do_timer(unsigned long ticks); -extern void update_wall_time(void); diff --git a/kernel/time/tick-oneshot.c b/kernel/time/tick-oneshot.c index 7ce740e78e1b..67a64b1670bf 100644 --- a/kernel/time/tick-oneshot.c +++ b/kernel/time/tick-oneshot.c @@ -38,7 +38,7 @@ void tick_resume_oneshot(void) { struct clock_event_device *dev = __this_cpu_read(tick_cpu_device.evtdev); - clockevents_set_mode(dev, CLOCK_EVT_MODE_ONESHOT); + clockevents_set_state(dev, CLOCK_EVT_STATE_ONESHOT); clockevents_program_event(dev, ktime_get(), true); } @@ -50,7 +50,7 @@ void tick_setup_oneshot(struct clock_event_device *newdev, ktime_t next_event) { newdev->event_handler = handler; - clockevents_set_mode(newdev, CLOCK_EVT_MODE_ONESHOT); + clockevents_set_state(newdev, CLOCK_EVT_STATE_ONESHOT); clockevents_program_event(newdev, next_event, true); } @@ -81,7 +81,7 @@ int tick_switch_to_oneshot(void (*handler)(struct clock_event_device *)) td->mode = TICKDEV_MODE_ONESHOT; dev->event_handler = handler; - clockevents_set_mode(dev, CLOCK_EVT_MODE_ONESHOT); + clockevents_set_state(dev, CLOCK_EVT_STATE_ONESHOT); tick_broadcast_switch_to_oneshot(); return 0; } diff --git a/kernel/time/tick-sched.c b/kernel/time/tick-sched.c index a4c4edac4528..914259128145 100644 --- a/kernel/time/tick-sched.c +++ b/kernel/time/tick-sched.c @@ -34,7 +34,7 @@ /* * Per cpu nohz control structure */ -DEFINE_PER_CPU(struct tick_sched, tick_cpu_sched); +static DEFINE_PER_CPU(struct tick_sched, tick_cpu_sched); /* * The time, when the last jiffy update happened. Protected by jiffies_lock. @@ -416,6 +416,11 @@ static int __init setup_tick_nohz(char *str) __setup("nohz=", setup_tick_nohz); +int tick_nohz_tick_stopped(void) +{ + return __this_cpu_read(tick_cpu_sched.tick_stopped); +} + /** * tick_nohz_update_jiffies - update jiffies when idle was interrupted * diff --git a/kernel/time/tick-sched.h b/kernel/time/tick-sched.h new file mode 100644 index 000000000000..28b5da3e1a17 --- /dev/null +++ b/kernel/time/tick-sched.h @@ -0,0 +1,74 @@ +#ifndef _TICK_SCHED_H +#define _TICK_SCHED_H + +#include <linux/hrtimer.h> + +enum tick_device_mode { + TICKDEV_MODE_PERIODIC, + TICKDEV_MODE_ONESHOT, +}; + +struct tick_device { + struct clock_event_device *evtdev; + enum tick_device_mode mode; +}; + +enum tick_nohz_mode { + NOHZ_MODE_INACTIVE, + NOHZ_MODE_LOWRES, + NOHZ_MODE_HIGHRES, +}; + +/** + * struct tick_sched - sched tick emulation and no idle tick control/stats + * @sched_timer: hrtimer to schedule the periodic tick in high + * resolution mode + * @last_tick: Store the last tick expiry time when the tick + * timer is modified for nohz sleeps. This is necessary + * to resume the tick timer operation in the timeline + * when the CPU returns from nohz sleep. + * @tick_stopped: Indicator that the idle tick has been stopped + * @idle_jiffies: jiffies at the entry to idle for idle time accounting + * @idle_calls: Total number of idle calls + * @idle_sleeps: Number of idle calls, where the sched tick was stopped + * @idle_entrytime: Time when the idle call was entered + * @idle_waketime: Time when the idle was interrupted + * @idle_exittime: Time when the idle state was left + * @idle_sleeptime: Sum of the time slept in idle with sched tick stopped + * @iowait_sleeptime: Sum of the time slept in idle with sched tick stopped, with IO outstanding + * @sleep_length: Duration of the current idle sleep + * @do_timer_lst: CPU was the last one doing do_timer before going idle + */ +struct tick_sched { + struct hrtimer sched_timer; + unsigned long check_clocks; + enum tick_nohz_mode nohz_mode; + ktime_t last_tick; + int inidle; + int tick_stopped; + unsigned long idle_jiffies; + unsigned long idle_calls; + unsigned long idle_sleeps; + int idle_active; + ktime_t idle_entrytime; + ktime_t idle_waketime; + ktime_t idle_exittime; + ktime_t idle_sleeptime; + ktime_t iowait_sleeptime; + ktime_t sleep_length; + unsigned long last_jiffies; + unsigned long next_jiffies; + ktime_t idle_expires; + int do_timer_last; +}; + +extern struct tick_sched *tick_get_tick_sched(int cpu); + +extern void tick_setup_sched_timer(void); +#if defined CONFIG_NO_HZ_COMMON || defined CONFIG_HIGH_RES_TIMERS +extern void tick_cancel_sched_timer(int cpu); +#else +static inline void tick_cancel_sched_timer(int cpu) { } +#endif + +#endif diff --git a/kernel/time/timekeeping.c b/kernel/time/timekeeping.c index 91db94136c10..946acb72179f 100644 --- a/kernel/time/timekeeping.c +++ b/kernel/time/timekeeping.c @@ -59,17 +59,15 @@ struct tk_fast { }; static struct tk_fast tk_fast_mono ____cacheline_aligned; +static struct tk_fast tk_fast_raw ____cacheline_aligned; /* flag for if timekeeping is suspended */ int __read_mostly timekeeping_suspended; -/* Flag for if there is a persistent clock on this platform */ -bool __read_mostly persistent_clock_exist = false; - static inline void tk_normalize_xtime(struct timekeeper *tk) { - while (tk->tkr.xtime_nsec >= ((u64)NSEC_PER_SEC << tk->tkr.shift)) { - tk->tkr.xtime_nsec -= (u64)NSEC_PER_SEC << tk->tkr.shift; + while (tk->tkr_mono.xtime_nsec >= ((u64)NSEC_PER_SEC << tk->tkr_mono.shift)) { + tk->tkr_mono.xtime_nsec -= (u64)NSEC_PER_SEC << tk->tkr_mono.shift; tk->xtime_sec++; } } @@ -79,20 +77,20 @@ static inline struct timespec64 tk_xtime(struct timekeeper *tk) struct timespec64 ts; ts.tv_sec = tk->xtime_sec; - ts.tv_nsec = (long)(tk->tkr.xtime_nsec >> tk->tkr.shift); + ts.tv_nsec = (long)(tk->tkr_mono.xtime_nsec >> tk->tkr_mono.shift); return ts; } static void tk_set_xtime(struct timekeeper *tk, const struct timespec64 *ts) { tk->xtime_sec = ts->tv_sec; - tk->tkr.xtime_nsec = (u64)ts->tv_nsec << tk->tkr.shift; + tk->tkr_mono.xtime_nsec = (u64)ts->tv_nsec << tk->tkr_mono.shift; } static void tk_xtime_add(struct timekeeper *tk, const struct timespec64 *ts) { tk->xtime_sec += ts->tv_sec; - tk->tkr.xtime_nsec += (u64)ts->tv_nsec << tk->tkr.shift; + tk->tkr_mono.xtime_nsec += (u64)ts->tv_nsec << tk->tkr_mono.shift; tk_normalize_xtime(tk); } @@ -118,6 +116,117 @@ static inline void tk_update_sleep_time(struct timekeeper *tk, ktime_t delta) tk->offs_boot = ktime_add(tk->offs_boot, delta); } +#ifdef CONFIG_DEBUG_TIMEKEEPING +#define WARNING_FREQ (HZ*300) /* 5 minute rate-limiting */ +/* + * These simple flag variables are managed + * without locks, which is racy, but ok since + * we don't really care about being super + * precise about how many events were seen, + * just that a problem was observed. + */ +static int timekeeping_underflow_seen; +static int timekeeping_overflow_seen; + +/* last_warning is only modified under the timekeeping lock */ +static long timekeeping_last_warning; + +static void timekeeping_check_update(struct timekeeper *tk, cycle_t offset) +{ + + cycle_t max_cycles = tk->tkr_mono.clock->max_cycles; + const char *name = tk->tkr_mono.clock->name; + + if (offset > max_cycles) { + printk_deferred("WARNING: timekeeping: Cycle offset (%lld) is larger than allowed by the '%s' clock's max_cycles value (%lld): time overflow danger\n", + offset, name, max_cycles); + printk_deferred(" timekeeping: Your kernel is sick, but tries to cope by capping time updates\n"); + } else { + if (offset > (max_cycles >> 1)) { + printk_deferred("INFO: timekeeping: Cycle offset (%lld) is larger than the the '%s' clock's 50%% safety margin (%lld)\n", + offset, name, max_cycles >> 1); + printk_deferred(" timekeeping: Your kernel is still fine, but is feeling a bit nervous\n"); + } + } + + if (timekeeping_underflow_seen) { + if (jiffies - timekeeping_last_warning > WARNING_FREQ) { + printk_deferred("WARNING: Underflow in clocksource '%s' observed, time update ignored.\n", name); + printk_deferred(" Please report this, consider using a different clocksource, if possible.\n"); + printk_deferred(" Your kernel is probably still fine.\n"); + timekeeping_last_warning = jiffies; + } + timekeeping_underflow_seen = 0; + } + + if (timekeeping_overflow_seen) { + if (jiffies - timekeeping_last_warning > WARNING_FREQ) { + printk_deferred("WARNING: Overflow in clocksource '%s' observed, time update capped.\n", name); + printk_deferred(" Please report this, consider using a different clocksource, if possible.\n"); + printk_deferred(" Your kernel is probably still fine.\n"); + timekeeping_last_warning = jiffies; + } + timekeeping_overflow_seen = 0; + } +} + +static inline cycle_t timekeeping_get_delta(struct tk_read_base *tkr) +{ + cycle_t now, last, mask, max, delta; + unsigned int seq; + + /* + * Since we're called holding a seqlock, the data may shift + * under us while we're doing the calculation. This can cause + * false positives, since we'd note a problem but throw the + * results away. So nest another seqlock here to atomically + * grab the points we are checking with. + */ + do { + seq = read_seqcount_begin(&tk_core.seq); + now = tkr->read(tkr->clock); + last = tkr->cycle_last; + mask = tkr->mask; + max = tkr->clock->max_cycles; + } while (read_seqcount_retry(&tk_core.seq, seq)); + + delta = clocksource_delta(now, last, mask); + + /* + * Try to catch underflows by checking if we are seeing small + * mask-relative negative values. + */ + if (unlikely((~delta & mask) < (mask >> 3))) { + timekeeping_underflow_seen = 1; + delta = 0; + } + + /* Cap delta value to the max_cycles values to avoid mult overflows */ + if (unlikely(delta > max)) { + timekeeping_overflow_seen = 1; + delta = tkr->clock->max_cycles; + } + + return delta; +} +#else +static inline void timekeeping_check_update(struct timekeeper *tk, cycle_t offset) +{ +} +static inline cycle_t timekeeping_get_delta(struct tk_read_base *tkr) +{ + cycle_t cycle_now, delta; + + /* read clocksource */ + cycle_now = tkr->read(tkr->clock); + + /* calculate the delta since the last update_wall_time */ + delta = clocksource_delta(cycle_now, tkr->cycle_last, tkr->mask); + + return delta; +} +#endif + /** * tk_setup_internals - Set up internals to use clocksource clock. * @@ -135,11 +244,16 @@ static void tk_setup_internals(struct timekeeper *tk, struct clocksource *clock) u64 tmp, ntpinterval; struct clocksource *old_clock; - old_clock = tk->tkr.clock; - tk->tkr.clock = clock; - tk->tkr.read = clock->read; - tk->tkr.mask = clock->mask; - tk->tkr.cycle_last = tk->tkr.read(clock); + old_clock = tk->tkr_mono.clock; + tk->tkr_mono.clock = clock; + tk->tkr_mono.read = clock->read; + tk->tkr_mono.mask = clock->mask; + tk->tkr_mono.cycle_last = tk->tkr_mono.read(clock); + + tk->tkr_raw.clock = clock; + tk->tkr_raw.read = clock->read; + tk->tkr_raw.mask = clock->mask; + tk->tkr_raw.cycle_last = tk->tkr_mono.cycle_last; /* Do the ns -> cycle conversion first, using original mult */ tmp = NTP_INTERVAL_LENGTH; @@ -163,11 +277,14 @@ static void tk_setup_internals(struct timekeeper *tk, struct clocksource *clock) if (old_clock) { int shift_change = clock->shift - old_clock->shift; if (shift_change < 0) - tk->tkr.xtime_nsec >>= -shift_change; + tk->tkr_mono.xtime_nsec >>= -shift_change; else - tk->tkr.xtime_nsec <<= shift_change; + tk->tkr_mono.xtime_nsec <<= shift_change; } - tk->tkr.shift = clock->shift; + tk->tkr_raw.xtime_nsec = 0; + + tk->tkr_mono.shift = clock->shift; + tk->tkr_raw.shift = clock->shift; tk->ntp_error = 0; tk->ntp_error_shift = NTP_SCALE_SHIFT - clock->shift; @@ -178,7 +295,8 @@ static void tk_setup_internals(struct timekeeper *tk, struct clocksource *clock) * active clocksource. These value will be adjusted via NTP * to counteract clock drifting. */ - tk->tkr.mult = clock->mult; + tk->tkr_mono.mult = clock->mult; + tk->tkr_raw.mult = clock->mult; tk->ntp_err_mult = 0; } @@ -193,14 +311,10 @@ static inline u32 arch_gettimeoffset(void) { return 0; } static inline s64 timekeeping_get_ns(struct tk_read_base *tkr) { - cycle_t cycle_now, delta; + cycle_t delta; s64 nsec; - /* read clocksource: */ - cycle_now = tkr->read(tkr->clock); - - /* calculate the delta since the last update_wall_time: */ - delta = clocksource_delta(cycle_now, tkr->cycle_last, tkr->mask); + delta = timekeeping_get_delta(tkr); nsec = delta * tkr->mult + tkr->xtime_nsec; nsec >>= tkr->shift; @@ -209,25 +323,6 @@ static inline s64 timekeeping_get_ns(struct tk_read_base *tkr) return nsec + arch_gettimeoffset(); } -static inline s64 timekeeping_get_ns_raw(struct timekeeper *tk) -{ - struct clocksource *clock = tk->tkr.clock; - cycle_t cycle_now, delta; - s64 nsec; - - /* read clocksource: */ - cycle_now = tk->tkr.read(clock); - - /* calculate the delta since the last update_wall_time: */ - delta = clocksource_delta(cycle_now, tk->tkr.cycle_last, tk->tkr.mask); - - /* convert delta to nanoseconds. */ - nsec = clocksource_cyc2ns(delta, clock->mult, clock->shift); - - /* If arch requires, add in get_arch_timeoffset() */ - return nsec + arch_gettimeoffset(); -} - /** * update_fast_timekeeper - Update the fast and NMI safe monotonic timekeeper. * @tkr: Timekeeping readout base from which we take the update @@ -267,18 +362,18 @@ static inline s64 timekeeping_get_ns_raw(struct timekeeper *tk) * slightly wrong timestamp (a few nanoseconds). See * @ktime_get_mono_fast_ns. */ -static void update_fast_timekeeper(struct tk_read_base *tkr) +static void update_fast_timekeeper(struct tk_read_base *tkr, struct tk_fast *tkf) { - struct tk_read_base *base = tk_fast_mono.base; + struct tk_read_base *base = tkf->base; /* Force readers off to base[1] */ - raw_write_seqcount_latch(&tk_fast_mono.seq); + raw_write_seqcount_latch(&tkf->seq); /* Update base[0] */ memcpy(base, tkr, sizeof(*base)); /* Force readers back to base[0] */ - raw_write_seqcount_latch(&tk_fast_mono.seq); + raw_write_seqcount_latch(&tkf->seq); /* Update base[1] */ memcpy(base + 1, base, sizeof(*base)); @@ -316,22 +411,33 @@ static void update_fast_timekeeper(struct tk_read_base *tkr) * of the following timestamps. Callers need to be aware of that and * deal with it. */ -u64 notrace ktime_get_mono_fast_ns(void) +static __always_inline u64 __ktime_get_fast_ns(struct tk_fast *tkf) { struct tk_read_base *tkr; unsigned int seq; u64 now; do { - seq = raw_read_seqcount(&tk_fast_mono.seq); - tkr = tk_fast_mono.base + (seq & 0x01); - now = ktime_to_ns(tkr->base_mono) + timekeeping_get_ns(tkr); + seq = raw_read_seqcount(&tkf->seq); + tkr = tkf->base + (seq & 0x01); + now = ktime_to_ns(tkr->base) + timekeeping_get_ns(tkr); + } while (read_seqcount_retry(&tkf->seq, seq)); - } while (read_seqcount_retry(&tk_fast_mono.seq, seq)); return now; } + +u64 ktime_get_mono_fast_ns(void) +{ + return __ktime_get_fast_ns(&tk_fast_mono); +} EXPORT_SYMBOL_GPL(ktime_get_mono_fast_ns); +u64 ktime_get_raw_fast_ns(void) +{ + return __ktime_get_fast_ns(&tk_fast_raw); +} +EXPORT_SYMBOL_GPL(ktime_get_raw_fast_ns); + /* Suspend-time cycles value for halted fast timekeeper. */ static cycle_t cycles_at_suspend; @@ -353,12 +459,17 @@ static cycle_t dummy_clock_read(struct clocksource *cs) static void halt_fast_timekeeper(struct timekeeper *tk) { static struct tk_read_base tkr_dummy; - struct tk_read_base *tkr = &tk->tkr; + struct tk_read_base *tkr = &tk->tkr_mono; memcpy(&tkr_dummy, tkr, sizeof(tkr_dummy)); cycles_at_suspend = tkr->read(tkr->clock); tkr_dummy.read = dummy_clock_read; - update_fast_timekeeper(&tkr_dummy); + update_fast_timekeeper(&tkr_dummy, &tk_fast_mono); + + tkr = &tk->tkr_raw; + memcpy(&tkr_dummy, tkr, sizeof(tkr_dummy)); + tkr_dummy.read = dummy_clock_read; + update_fast_timekeeper(&tkr_dummy, &tk_fast_raw); } #ifdef CONFIG_GENERIC_TIME_VSYSCALL_OLD @@ -369,8 +480,8 @@ static inline void update_vsyscall(struct timekeeper *tk) xt = timespec64_to_timespec(tk_xtime(tk)); wm = timespec64_to_timespec(tk->wall_to_monotonic); - update_vsyscall_old(&xt, &wm, tk->tkr.clock, tk->tkr.mult, - tk->tkr.cycle_last); + update_vsyscall_old(&xt, &wm, tk->tkr_mono.clock, tk->tkr_mono.mult, + tk->tkr_mono.cycle_last); } static inline void old_vsyscall_fixup(struct timekeeper *tk) @@ -387,11 +498,11 @@ static inline void old_vsyscall_fixup(struct timekeeper *tk) * (shifted nanoseconds), and CONFIG_GENERIC_TIME_VSYSCALL_OLD * users are removed, this can be killed. */ - remainder = tk->tkr.xtime_nsec & ((1ULL << tk->tkr.shift) - 1); - tk->tkr.xtime_nsec -= remainder; - tk->tkr.xtime_nsec += 1ULL << tk->tkr.shift; + remainder = tk->tkr_mono.xtime_nsec & ((1ULL << tk->tkr_mono.shift) - 1); + tk->tkr_mono.xtime_nsec -= remainder; + tk->tkr_mono.xtime_nsec += 1ULL << tk->tkr_mono.shift; tk->ntp_error += remainder << tk->ntp_error_shift; - tk->ntp_error -= (1ULL << tk->tkr.shift) << tk->ntp_error_shift; + tk->ntp_error -= (1ULL << tk->tkr_mono.shift) << tk->ntp_error_shift; } #else #define old_vsyscall_fixup(tk) @@ -456,17 +567,17 @@ static inline void tk_update_ktime_data(struct timekeeper *tk) */ seconds = (u64)(tk->xtime_sec + tk->wall_to_monotonic.tv_sec); nsec = (u32) tk->wall_to_monotonic.tv_nsec; - tk->tkr.base_mono = ns_to_ktime(seconds * NSEC_PER_SEC + nsec); + tk->tkr_mono.base = ns_to_ktime(seconds * NSEC_PER_SEC + nsec); /* Update the monotonic raw base */ - tk->base_raw = timespec64_to_ktime(tk->raw_time); + tk->tkr_raw.base = timespec64_to_ktime(tk->raw_time); /* * The sum of the nanoseconds portions of xtime and * wall_to_monotonic can be greater/equal one second. Take * this into account before updating tk->ktime_sec. */ - nsec += (u32)(tk->tkr.xtime_nsec >> tk->tkr.shift); + nsec += (u32)(tk->tkr_mono.xtime_nsec >> tk->tkr_mono.shift); if (nsec >= NSEC_PER_SEC) seconds++; tk->ktime_sec = seconds; @@ -489,7 +600,8 @@ static void timekeeping_update(struct timekeeper *tk, unsigned int action) memcpy(&shadow_timekeeper, &tk_core.timekeeper, sizeof(tk_core.timekeeper)); - update_fast_timekeeper(&tk->tkr); + update_fast_timekeeper(&tk->tkr_mono, &tk_fast_mono); + update_fast_timekeeper(&tk->tkr_raw, &tk_fast_raw); } /** @@ -501,22 +613,23 @@ static void timekeeping_update(struct timekeeper *tk, unsigned int action) */ static void timekeeping_forward_now(struct timekeeper *tk) { - struct clocksource *clock = tk->tkr.clock; + struct clocksource *clock = tk->tkr_mono.clock; cycle_t cycle_now, delta; s64 nsec; - cycle_now = tk->tkr.read(clock); - delta = clocksource_delta(cycle_now, tk->tkr.cycle_last, tk->tkr.mask); - tk->tkr.cycle_last = cycle_now; + cycle_now = tk->tkr_mono.read(clock); + delta = clocksource_delta(cycle_now, tk->tkr_mono.cycle_last, tk->tkr_mono.mask); + tk->tkr_mono.cycle_last = cycle_now; + tk->tkr_raw.cycle_last = cycle_now; - tk->tkr.xtime_nsec += delta * tk->tkr.mult; + tk->tkr_mono.xtime_nsec += delta * tk->tkr_mono.mult; /* If arch requires, add in get_arch_timeoffset() */ - tk->tkr.xtime_nsec += (u64)arch_gettimeoffset() << tk->tkr.shift; + tk->tkr_mono.xtime_nsec += (u64)arch_gettimeoffset() << tk->tkr_mono.shift; tk_normalize_xtime(tk); - nsec = clocksource_cyc2ns(delta, clock->mult, clock->shift); + nsec = clocksource_cyc2ns(delta, tk->tkr_raw.mult, tk->tkr_raw.shift); timespec64_add_ns(&tk->raw_time, nsec); } @@ -537,7 +650,7 @@ int __getnstimeofday64(struct timespec64 *ts) seq = read_seqcount_begin(&tk_core.seq); ts->tv_sec = tk->xtime_sec; - nsecs = timekeeping_get_ns(&tk->tkr); + nsecs = timekeeping_get_ns(&tk->tkr_mono); } while (read_seqcount_retry(&tk_core.seq, seq)); @@ -577,8 +690,8 @@ ktime_t ktime_get(void) do { seq = read_seqcount_begin(&tk_core.seq); - base = tk->tkr.base_mono; - nsecs = timekeeping_get_ns(&tk->tkr); + base = tk->tkr_mono.base; + nsecs = timekeeping_get_ns(&tk->tkr_mono); } while (read_seqcount_retry(&tk_core.seq, seq)); @@ -603,8 +716,8 @@ ktime_t ktime_get_with_offset(enum tk_offsets offs) do { seq = read_seqcount_begin(&tk_core.seq); - base = ktime_add(tk->tkr.base_mono, *offset); - nsecs = timekeeping_get_ns(&tk->tkr); + base = ktime_add(tk->tkr_mono.base, *offset); + nsecs = timekeeping_get_ns(&tk->tkr_mono); } while (read_seqcount_retry(&tk_core.seq, seq)); @@ -645,8 +758,8 @@ ktime_t ktime_get_raw(void) do { seq = read_seqcount_begin(&tk_core.seq); - base = tk->base_raw; - nsecs = timekeeping_get_ns_raw(tk); + base = tk->tkr_raw.base; + nsecs = timekeeping_get_ns(&tk->tkr_raw); } while (read_seqcount_retry(&tk_core.seq, seq)); @@ -674,7 +787,7 @@ void ktime_get_ts64(struct timespec64 *ts) do { seq = read_seqcount_begin(&tk_core.seq); ts->tv_sec = tk->xtime_sec; - nsec = timekeeping_get_ns(&tk->tkr); + nsec = timekeeping_get_ns(&tk->tkr_mono); tomono = tk->wall_to_monotonic; } while (read_seqcount_retry(&tk_core.seq, seq)); @@ -759,8 +872,8 @@ void getnstime_raw_and_real(struct timespec *ts_raw, struct timespec *ts_real) ts_real->tv_sec = tk->xtime_sec; ts_real->tv_nsec = 0; - nsecs_raw = timekeeping_get_ns_raw(tk); - nsecs_real = timekeeping_get_ns(&tk->tkr); + nsecs_raw = timekeeping_get_ns(&tk->tkr_raw); + nsecs_real = timekeeping_get_ns(&tk->tkr_mono); } while (read_seqcount_retry(&tk_core.seq, seq)); @@ -943,7 +1056,7 @@ static int change_clocksource(void *data) */ if (try_module_get(new->owner)) { if (!new->enable || new->enable(new) == 0) { - old = tk->tkr.clock; + old = tk->tkr_mono.clock; tk_setup_internals(tk, new); if (old->disable) old->disable(old); @@ -971,11 +1084,11 @@ int timekeeping_notify(struct clocksource *clock) { struct timekeeper *tk = &tk_core.timekeeper; - if (tk->tkr.clock == clock) + if (tk->tkr_mono.clock == clock) return 0; stop_machine(change_clocksource, clock, NULL); tick_clock_notify(); - return tk->tkr.clock == clock ? 0 : -1; + return tk->tkr_mono.clock == clock ? 0 : -1; } /** @@ -993,7 +1106,7 @@ void getrawmonotonic64(struct timespec64 *ts) do { seq = read_seqcount_begin(&tk_core.seq); - nsecs = timekeeping_get_ns_raw(tk); + nsecs = timekeeping_get_ns(&tk->tkr_raw); ts64 = tk->raw_time; } while (read_seqcount_retry(&tk_core.seq, seq)); @@ -1016,7 +1129,7 @@ int timekeeping_valid_for_hres(void) do { seq = read_seqcount_begin(&tk_core.seq); - ret = tk->tkr.clock->flags & CLOCK_SOURCE_VALID_FOR_HRES; + ret = tk->tkr_mono.clock->flags & CLOCK_SOURCE_VALID_FOR_HRES; } while (read_seqcount_retry(&tk_core.seq, seq)); @@ -1035,7 +1148,7 @@ u64 timekeeping_max_deferment(void) do { seq = read_seqcount_begin(&tk_core.seq); - ret = tk->tkr.clock->max_idle_ns; + ret = tk->tkr_mono.clock->max_idle_ns; } while (read_seqcount_retry(&tk_core.seq, seq)); @@ -1057,6 +1170,14 @@ void __weak read_persistent_clock(struct timespec *ts) ts->tv_nsec = 0; } +void __weak read_persistent_clock64(struct timespec64 *ts64) +{ + struct timespec ts; + + read_persistent_clock(&ts); + *ts64 = timespec_to_timespec64(ts); +} + /** * read_boot_clock - Return time of the system start. * @@ -1072,6 +1193,20 @@ void __weak read_boot_clock(struct timespec *ts) ts->tv_nsec = 0; } +void __weak read_boot_clock64(struct timespec64 *ts64) +{ + struct timespec ts; + + read_boot_clock(&ts); + *ts64 = timespec_to_timespec64(ts); +} + +/* Flag for if timekeeping_resume() has injected sleeptime */ +static bool sleeptime_injected; + +/* Flag for if there is a persistent clock on this platform */ +static bool persistent_clock_exists; + /* * timekeeping_init - Initializes the clocksource and common timekeeping values */ @@ -1081,20 +1216,17 @@ void __init timekeeping_init(void) struct clocksource *clock; unsigned long flags; struct timespec64 now, boot, tmp; - struct timespec ts; - read_persistent_clock(&ts); - now = timespec_to_timespec64(ts); + read_persistent_clock64(&now); if (!timespec64_valid_strict(&now)) { pr_warn("WARNING: Persistent clock returned invalid value!\n" " Check your CMOS/BIOS settings.\n"); now.tv_sec = 0; now.tv_nsec = 0; } else if (now.tv_sec || now.tv_nsec) - persistent_clock_exist = true; + persistent_clock_exists = true; - read_boot_clock(&ts); - boot = timespec_to_timespec64(ts); + read_boot_clock64(&boot); if (!timespec64_valid_strict(&boot)) { pr_warn("WARNING: Boot clock returned invalid value!\n" " Check your CMOS/BIOS settings.\n"); @@ -1114,7 +1246,6 @@ void __init timekeeping_init(void) tk_set_xtime(tk, &now); tk->raw_time.tv_sec = 0; tk->raw_time.tv_nsec = 0; - tk->base_raw.tv64 = 0; if (boot.tv_sec == 0 && boot.tv_nsec == 0) boot = tk_xtime(tk); @@ -1127,7 +1258,7 @@ void __init timekeeping_init(void) raw_spin_unlock_irqrestore(&timekeeper_lock, flags); } -/* time in seconds when suspend began */ +/* time in seconds when suspend began for persistent clock */ static struct timespec64 timekeeping_suspend_time; /** @@ -1152,12 +1283,49 @@ static void __timekeeping_inject_sleeptime(struct timekeeper *tk, tk_debug_account_sleep_time(delta); } +#if defined(CONFIG_PM_SLEEP) && defined(CONFIG_RTC_HCTOSYS_DEVICE) +/** + * We have three kinds of time sources to use for sleep time + * injection, the preference order is: + * 1) non-stop clocksource + * 2) persistent clock (ie: RTC accessible when irqs are off) + * 3) RTC + * + * 1) and 2) are used by timekeeping, 3) by RTC subsystem. + * If system has neither 1) nor 2), 3) will be used finally. + * + * + * If timekeeping has injected sleeptime via either 1) or 2), + * 3) becomes needless, so in this case we don't need to call + * rtc_resume(), and this is what timekeeping_rtc_skipresume() + * means. + */ +bool timekeeping_rtc_skipresume(void) +{ + return sleeptime_injected; +} + +/** + * 1) can be determined whether to use or not only when doing + * timekeeping_resume() which is invoked after rtc_suspend(), + * so we can't skip rtc_suspend() surely if system has 1). + * + * But if system has 2), 2) will definitely be used, so in this + * case we don't need to call rtc_suspend(), and this is what + * timekeeping_rtc_skipsuspend() means. + */ +bool timekeeping_rtc_skipsuspend(void) +{ + return persistent_clock_exists; +} + /** * timekeeping_inject_sleeptime64 - Adds suspend interval to timeekeeping values * @delta: pointer to a timespec64 delta value * - * This hook is for architectures that cannot support read_persistent_clock + * This hook is for architectures that cannot support read_persistent_clock64 * because their RTC/persistent clock is only accessible when irqs are enabled. + * and also don't have an effective nonstop clocksource. * * This function should only be called by rtc_resume(), and allows * a suspend offset to be injected into the timekeeping values. @@ -1167,13 +1335,6 @@ void timekeeping_inject_sleeptime64(struct timespec64 *delta) struct timekeeper *tk = &tk_core.timekeeper; unsigned long flags; - /* - * Make sure we don't set the clock twice, as timekeeping_resume() - * already did it - */ - if (has_persistent_clock()) - return; - raw_spin_lock_irqsave(&timekeeper_lock, flags); write_seqcount_begin(&tk_core.seq); @@ -1189,26 +1350,21 @@ void timekeeping_inject_sleeptime64(struct timespec64 *delta) /* signal hrtimers about time change */ clock_was_set(); } +#endif /** * timekeeping_resume - Resumes the generic timekeeping subsystem. - * - * This is for the generic clocksource timekeeping. - * xtime/wall_to_monotonic/jiffies/etc are - * still managed by arch specific suspend/resume code. */ void timekeeping_resume(void) { struct timekeeper *tk = &tk_core.timekeeper; - struct clocksource *clock = tk->tkr.clock; + struct clocksource *clock = tk->tkr_mono.clock; unsigned long flags; struct timespec64 ts_new, ts_delta; - struct timespec tmp; cycle_t cycle_now, cycle_delta; - bool suspendtime_found = false; - read_persistent_clock(&tmp); - ts_new = timespec_to_timespec64(tmp); + sleeptime_injected = false; + read_persistent_clock64(&ts_new); clockevents_resume(); clocksource_resume(); @@ -1228,16 +1384,16 @@ void timekeeping_resume(void) * The less preferred source will only be tried if there is no better * usable source. The rtc part is handled separately in rtc core code. */ - cycle_now = tk->tkr.read(clock); + cycle_now = tk->tkr_mono.read(clock); if ((clock->flags & CLOCK_SOURCE_SUSPEND_NONSTOP) && - cycle_now > tk->tkr.cycle_last) { + cycle_now > tk->tkr_mono.cycle_last) { u64 num, max = ULLONG_MAX; u32 mult = clock->mult; u32 shift = clock->shift; s64 nsec = 0; - cycle_delta = clocksource_delta(cycle_now, tk->tkr.cycle_last, - tk->tkr.mask); + cycle_delta = clocksource_delta(cycle_now, tk->tkr_mono.cycle_last, + tk->tkr_mono.mask); /* * "cycle_delta * mutl" may cause 64 bits overflow, if the @@ -1253,17 +1409,19 @@ void timekeeping_resume(void) nsec += ((u64) cycle_delta * mult) >> shift; ts_delta = ns_to_timespec64(nsec); - suspendtime_found = true; + sleeptime_injected = true; } else if (timespec64_compare(&ts_new, &timekeeping_suspend_time) > 0) { ts_delta = timespec64_sub(ts_new, timekeeping_suspend_time); - suspendtime_found = true; + sleeptime_injected = true; } - if (suspendtime_found) + if (sleeptime_injected) __timekeeping_inject_sleeptime(tk, &ts_delta); /* Re-base the last cycle value */ - tk->tkr.cycle_last = cycle_now; + tk->tkr_mono.cycle_last = cycle_now; + tk->tkr_raw.cycle_last = cycle_now; + tk->ntp_error = 0; timekeeping_suspended = 0; timekeeping_update(tk, TK_MIRROR | TK_CLOCK_WAS_SET); @@ -1272,9 +1430,7 @@ void timekeeping_resume(void) touch_softlockup_watchdog(); - clockevents_notify(CLOCK_EVT_NOTIFY_RESUME, NULL); - - /* Resume hrtimers */ + tick_resume(); hrtimers_resume(); } @@ -1284,10 +1440,8 @@ int timekeeping_suspend(void) unsigned long flags; struct timespec64 delta, delta_delta; static struct timespec64 old_delta; - struct timespec tmp; - read_persistent_clock(&tmp); - timekeeping_suspend_time = timespec_to_timespec64(tmp); + read_persistent_clock64(&timekeeping_suspend_time); /* * On some systems the persistent_clock can not be detected at @@ -1295,31 +1449,33 @@ int timekeeping_suspend(void) * value returned, update the persistent_clock_exists flag. */ if (timekeeping_suspend_time.tv_sec || timekeeping_suspend_time.tv_nsec) - persistent_clock_exist = true; + persistent_clock_exists = true; raw_spin_lock_irqsave(&timekeeper_lock, flags); write_seqcount_begin(&tk_core.seq); timekeeping_forward_now(tk); timekeeping_suspended = 1; - /* - * To avoid drift caused by repeated suspend/resumes, - * which each can add ~1 second drift error, - * try to compensate so the difference in system time - * and persistent_clock time stays close to constant. - */ - delta = timespec64_sub(tk_xtime(tk), timekeeping_suspend_time); - delta_delta = timespec64_sub(delta, old_delta); - if (abs(delta_delta.tv_sec) >= 2) { + if (persistent_clock_exists) { /* - * if delta_delta is too large, assume time correction - * has occured and set old_delta to the current delta. + * To avoid drift caused by repeated suspend/resumes, + * which each can add ~1 second drift error, + * try to compensate so the difference in system time + * and persistent_clock time stays close to constant. */ - old_delta = delta; - } else { - /* Otherwise try to adjust old_system to compensate */ - timekeeping_suspend_time = - timespec64_add(timekeeping_suspend_time, delta_delta); + delta = timespec64_sub(tk_xtime(tk), timekeeping_suspend_time); + delta_delta = timespec64_sub(delta, old_delta); + if (abs(delta_delta.tv_sec) >= 2) { + /* + * if delta_delta is too large, assume time correction + * has occurred and set old_delta to the current delta. + */ + old_delta = delta; + } else { + /* Otherwise try to adjust old_system to compensate */ + timekeeping_suspend_time = + timespec64_add(timekeeping_suspend_time, delta_delta); + } } timekeeping_update(tk, TK_MIRROR); @@ -1327,7 +1483,7 @@ int timekeeping_suspend(void) write_seqcount_end(&tk_core.seq); raw_spin_unlock_irqrestore(&timekeeper_lock, flags); - clockevents_notify(CLOCK_EVT_NOTIFY_SUSPEND, NULL); + tick_suspend(); clocksource_suspend(); clockevents_suspend(); @@ -1416,15 +1572,15 @@ static __always_inline void timekeeping_apply_adjustment(struct timekeeper *tk, * * XXX - TODO: Doc ntp_error calculation. */ - if ((mult_adj > 0) && (tk->tkr.mult + mult_adj < mult_adj)) { + if ((mult_adj > 0) && (tk->tkr_mono.mult + mult_adj < mult_adj)) { /* NTP adjustment caused clocksource mult overflow */ WARN_ON_ONCE(1); return; } - tk->tkr.mult += mult_adj; + tk->tkr_mono.mult += mult_adj; tk->xtime_interval += interval; - tk->tkr.xtime_nsec -= offset; + tk->tkr_mono.xtime_nsec -= offset; tk->ntp_error -= (interval - offset) << tk->ntp_error_shift; } @@ -1486,13 +1642,13 @@ static void timekeeping_adjust(struct timekeeper *tk, s64 offset) tk->ntp_err_mult = 0; } - if (unlikely(tk->tkr.clock->maxadj && - (abs(tk->tkr.mult - tk->tkr.clock->mult) - > tk->tkr.clock->maxadj))) { + if (unlikely(tk->tkr_mono.clock->maxadj && + (abs(tk->tkr_mono.mult - tk->tkr_mono.clock->mult) + > tk->tkr_mono.clock->maxadj))) { printk_once(KERN_WARNING "Adjusting %s more than 11%% (%ld vs %ld)\n", - tk->tkr.clock->name, (long)tk->tkr.mult, - (long)tk->tkr.clock->mult + tk->tkr.clock->maxadj); + tk->tkr_mono.clock->name, (long)tk->tkr_mono.mult, + (long)tk->tkr_mono.clock->mult + tk->tkr_mono.clock->maxadj); } /* @@ -1509,9 +1665,9 @@ static void timekeeping_adjust(struct timekeeper *tk, s64 offset) * We'll correct this error next time through this function, when * xtime_nsec is not as small. */ - if (unlikely((s64)tk->tkr.xtime_nsec < 0)) { - s64 neg = -(s64)tk->tkr.xtime_nsec; - tk->tkr.xtime_nsec = 0; + if (unlikely((s64)tk->tkr_mono.xtime_nsec < 0)) { + s64 neg = -(s64)tk->tkr_mono.xtime_nsec; + tk->tkr_mono.xtime_nsec = 0; tk->ntp_error += neg << tk->ntp_error_shift; } } @@ -1526,13 +1682,13 @@ static void timekeeping_adjust(struct timekeeper *tk, s64 offset) */ static inline unsigned int accumulate_nsecs_to_secs(struct timekeeper *tk) { - u64 nsecps = (u64)NSEC_PER_SEC << tk->tkr.shift; + u64 nsecps = (u64)NSEC_PER_SEC << tk->tkr_mono.shift; unsigned int clock_set = 0; - while (tk->tkr.xtime_nsec >= nsecps) { + while (tk->tkr_mono.xtime_nsec >= nsecps) { int leap; - tk->tkr.xtime_nsec -= nsecps; + tk->tkr_mono.xtime_nsec -= nsecps; tk->xtime_sec++; /* Figure out if its a leap sec and apply if needed */ @@ -1577,9 +1733,10 @@ static cycle_t logarithmic_accumulation(struct timekeeper *tk, cycle_t offset, /* Accumulate one shifted interval */ offset -= interval; - tk->tkr.cycle_last += interval; + tk->tkr_mono.cycle_last += interval; + tk->tkr_raw.cycle_last += interval; - tk->tkr.xtime_nsec += tk->xtime_interval << shift; + tk->tkr_mono.xtime_nsec += tk->xtime_interval << shift; *clock_set |= accumulate_nsecs_to_secs(tk); /* Accumulate raw time */ @@ -1622,14 +1779,17 @@ void update_wall_time(void) #ifdef CONFIG_ARCH_USES_GETTIMEOFFSET offset = real_tk->cycle_interval; #else - offset = clocksource_delta(tk->tkr.read(tk->tkr.clock), - tk->tkr.cycle_last, tk->tkr.mask); + offset = clocksource_delta(tk->tkr_mono.read(tk->tkr_mono.clock), + tk->tkr_mono.cycle_last, tk->tkr_mono.mask); #endif /* Check if there's really nothing to do */ if (offset < real_tk->cycle_interval) goto out; + /* Do some additional sanity checking */ + timekeeping_check_update(real_tk, offset); + /* * With NO_HZ we may have to accumulate many cycle_intervals * (think "ticks") worth of time at once. To do this efficiently, @@ -1784,8 +1944,8 @@ ktime_t ktime_get_update_offsets_tick(ktime_t *offs_real, ktime_t *offs_boot, do { seq = read_seqcount_begin(&tk_core.seq); - base = tk->tkr.base_mono; - nsecs = tk->tkr.xtime_nsec >> tk->tkr.shift; + base = tk->tkr_mono.base; + nsecs = tk->tkr_mono.xtime_nsec >> tk->tkr_mono.shift; *offs_real = tk->offs_real; *offs_boot = tk->offs_boot; @@ -1816,8 +1976,8 @@ ktime_t ktime_get_update_offsets_now(ktime_t *offs_real, ktime_t *offs_boot, do { seq = read_seqcount_begin(&tk_core.seq); - base = tk->tkr.base_mono; - nsecs = timekeeping_get_ns(&tk->tkr); + base = tk->tkr_mono.base; + nsecs = timekeeping_get_ns(&tk->tkr_mono); *offs_real = tk->offs_real; *offs_boot = tk->offs_boot; diff --git a/kernel/time/timekeeping.h b/kernel/time/timekeeping.h index 1d91416055d5..ead8794b9a4e 100644 --- a/kernel/time/timekeeping.h +++ b/kernel/time/timekeeping.h @@ -19,4 +19,11 @@ extern void timekeeping_clocktai(struct timespec *ts); extern int timekeeping_suspend(void); extern void timekeeping_resume(void); +extern void do_timer(unsigned long ticks); +extern void update_wall_time(void); + +extern seqlock_t jiffies_lock; + +#define CS_NAME_LEN 32 + #endif diff --git a/kernel/time/timer.c b/kernel/time/timer.c index 2d3f5c504939..2ece3aa5069c 100644 --- a/kernel/time/timer.c +++ b/kernel/time/timer.c @@ -90,8 +90,18 @@ struct tvec_base { struct tvec tv5; } ____cacheline_aligned; +/* + * __TIMER_INITIALIZER() needs to set ->base to a valid pointer (because we've + * made NULL special, hint: lock_timer_base()) and we cannot get a compile time + * pointer to per-cpu entries because we don't know where we'll map the section, + * even for the boot cpu. + * + * And so we use boot_tvec_bases for boot CPU and per-cpu __tvec_bases for the + * rest of them. + */ struct tvec_base boot_tvec_bases; EXPORT_SYMBOL(boot_tvec_bases); + static DEFINE_PER_CPU(struct tvec_base *, tvec_bases) = &boot_tvec_bases; /* Functions below help us manage 'deferrable' flag */ @@ -1027,6 +1037,8 @@ int try_to_del_timer_sync(struct timer_list *timer) EXPORT_SYMBOL(try_to_del_timer_sync); #ifdef CONFIG_SMP +static DEFINE_PER_CPU(struct tvec_base, __tvec_bases); + /** * del_timer_sync - deactivate a timer and wait for the handler to finish. * @timer: the timer to be deactivated @@ -1532,64 +1544,6 @@ signed long __sched schedule_timeout_uninterruptible(signed long timeout) } EXPORT_SYMBOL(schedule_timeout_uninterruptible); -static int init_timers_cpu(int cpu) -{ - int j; - struct tvec_base *base; - static char tvec_base_done[NR_CPUS]; - - if (!tvec_base_done[cpu]) { - static char boot_done; - - if (boot_done) { - /* - * The APs use this path later in boot - */ - base = kzalloc_node(sizeof(*base), GFP_KERNEL, - cpu_to_node(cpu)); - if (!base) - return -ENOMEM; - - /* Make sure tvec_base has TIMER_FLAG_MASK bits free */ - if (WARN_ON(base != tbase_get_base(base))) { - kfree(base); - return -ENOMEM; - } - per_cpu(tvec_bases, cpu) = base; - } else { - /* - * This is for the boot CPU - we use compile-time - * static initialisation because per-cpu memory isn't - * ready yet and because the memory allocators are not - * initialised either. - */ - boot_done = 1; - base = &boot_tvec_bases; - } - spin_lock_init(&base->lock); - tvec_base_done[cpu] = 1; - base->cpu = cpu; - } else { - base = per_cpu(tvec_bases, cpu); - } - - - for (j = 0; j < TVN_SIZE; j++) { - INIT_LIST_HEAD(base->tv5.vec + j); - INIT_LIST_HEAD(base->tv4.vec + j); - INIT_LIST_HEAD(base->tv3.vec + j); - INIT_LIST_HEAD(base->tv2.vec + j); - } - for (j = 0; j < TVR_SIZE; j++) - INIT_LIST_HEAD(base->tv1.vec + j); - - base->timer_jiffies = jiffies; - base->next_timer = base->timer_jiffies; - base->active_timers = 0; - base->all_timers = 0; - return 0; -} - #ifdef CONFIG_HOTPLUG_CPU static void migrate_timer_list(struct tvec_base *new_base, struct list_head *head) { @@ -1631,55 +1585,86 @@ static void migrate_timers(int cpu) migrate_timer_list(new_base, old_base->tv5.vec + i); } + old_base->active_timers = 0; + old_base->all_timers = 0; + spin_unlock(&old_base->lock); spin_unlock_irq(&new_base->lock); put_cpu_var(tvec_bases); } -#endif /* CONFIG_HOTPLUG_CPU */ static int timer_cpu_notify(struct notifier_block *self, unsigned long action, void *hcpu) { - long cpu = (long)hcpu; - int err; - - switch(action) { - case CPU_UP_PREPARE: - case CPU_UP_PREPARE_FROZEN: - err = init_timers_cpu(cpu); - if (err < 0) - return notifier_from_errno(err); - break; -#ifdef CONFIG_HOTPLUG_CPU + switch (action) { case CPU_DEAD: case CPU_DEAD_FROZEN: - migrate_timers(cpu); + migrate_timers((long)hcpu); break; -#endif default: break; } + return NOTIFY_OK; } -static struct notifier_block timers_nb = { - .notifier_call = timer_cpu_notify, -}; +static inline void timer_register_cpu_notifier(void) +{ + cpu_notifier(timer_cpu_notify, 0); +} +#else +static inline void timer_register_cpu_notifier(void) { } +#endif /* CONFIG_HOTPLUG_CPU */ +static void __init init_timer_cpu(struct tvec_base *base, int cpu) +{ + int j; -void __init init_timers(void) + BUG_ON(base != tbase_get_base(base)); + + base->cpu = cpu; + per_cpu(tvec_bases, cpu) = base; + spin_lock_init(&base->lock); + + for (j = 0; j < TVN_SIZE; j++) { + INIT_LIST_HEAD(base->tv5.vec + j); + INIT_LIST_HEAD(base->tv4.vec + j); + INIT_LIST_HEAD(base->tv3.vec + j); + INIT_LIST_HEAD(base->tv2.vec + j); + } + for (j = 0; j < TVR_SIZE; j++) + INIT_LIST_HEAD(base->tv1.vec + j); + + base->timer_jiffies = jiffies; + base->next_timer = base->timer_jiffies; +} + +static void __init init_timer_cpus(void) { - int err; + struct tvec_base *base; + int local_cpu = smp_processor_id(); + int cpu; + for_each_possible_cpu(cpu) { + if (cpu == local_cpu) + base = &boot_tvec_bases; +#ifdef CONFIG_SMP + else + base = per_cpu_ptr(&__tvec_bases, cpu); +#endif + + init_timer_cpu(base, cpu); + } +} + +void __init init_timers(void) +{ /* ensure there are enough low bits for flags in timer->base pointer */ BUILD_BUG_ON(__alignof__(struct tvec_base) & TIMER_FLAG_MASK); - err = timer_cpu_notify(&timers_nb, (unsigned long)CPU_UP_PREPARE, - (void *)(long)smp_processor_id()); - BUG_ON(err != NOTIFY_OK); - + init_timer_cpus(); init_timer_stats(); - register_cpu_notifier(&timers_nb); + timer_register_cpu_notifier(); open_softirq(TIMER_SOFTIRQ, run_timer_softirq); } diff --git a/kernel/time/timer_list.c b/kernel/time/timer_list.c index 61ed862cdd37..e878c2e0ba45 100644 --- a/kernel/time/timer_list.c +++ b/kernel/time/timer_list.c @@ -16,10 +16,10 @@ #include <linux/sched.h> #include <linux/seq_file.h> #include <linux/kallsyms.h> -#include <linux/tick.h> #include <asm/uaccess.h> +#include "tick-internal.h" struct timer_list_iter { int cpu; @@ -228,9 +228,35 @@ print_tickdevice(struct seq_file *m, struct tick_device *td, int cpu) print_name_offset(m, dev->set_next_event); SEQ_printf(m, "\n"); - SEQ_printf(m, " set_mode: "); - print_name_offset(m, dev->set_mode); - SEQ_printf(m, "\n"); + if (dev->set_mode) { + SEQ_printf(m, " set_mode: "); + print_name_offset(m, dev->set_mode); + SEQ_printf(m, "\n"); + } else { + if (dev->set_state_shutdown) { + SEQ_printf(m, " shutdown: "); + print_name_offset(m, dev->set_state_shutdown); + SEQ_printf(m, "\n"); + } + + if (dev->set_state_periodic) { + SEQ_printf(m, " periodic: "); + print_name_offset(m, dev->set_state_periodic); + SEQ_printf(m, "\n"); + } + + if (dev->set_state_oneshot) { + SEQ_printf(m, " oneshot: "); + print_name_offset(m, dev->set_state_oneshot); + SEQ_printf(m, "\n"); + } + + if (dev->tick_resume) { + SEQ_printf(m, " resume: "); + print_name_offset(m, dev->tick_resume); + SEQ_printf(m, "\n"); + } + } SEQ_printf(m, " event_handler: "); print_name_offset(m, dev->event_handler); diff --git a/kernel/trace/ftrace.c b/kernel/trace/ftrace.c index 45e5cb143d17..4f228024055b 100644 --- a/kernel/trace/ftrace.c +++ b/kernel/trace/ftrace.c @@ -1059,6 +1059,12 @@ static __init void ftrace_profile_debugfs(struct dentry *d_tracer) static struct pid * const ftrace_swapper_pid = &init_struct_pid; +#ifdef CONFIG_FUNCTION_GRAPH_TRACER +static int ftrace_graph_active; +#else +# define ftrace_graph_active 0 +#endif + #ifdef CONFIG_DYNAMIC_FTRACE static struct ftrace_ops *removed_ops; @@ -2041,8 +2047,12 @@ static int ftrace_check_record(struct dyn_ftrace *rec, int enable, int update) if (!ftrace_rec_count(rec)) rec->flags = 0; else - /* Just disable the record (keep REGS state) */ - rec->flags &= ~FTRACE_FL_ENABLED; + /* + * Just disable the record, but keep the ops TRAMP + * and REGS states. The _EN flags must be disabled though. + */ + rec->flags &= ~(FTRACE_FL_ENABLED | FTRACE_FL_TRAMP_EN | + FTRACE_FL_REGS_EN); } return FTRACE_UPDATE_MAKE_NOP; @@ -2688,24 +2698,36 @@ static int ftrace_shutdown(struct ftrace_ops *ops, int command) static void ftrace_startup_sysctl(void) { + int command; + if (unlikely(ftrace_disabled)) return; /* Force update next time */ saved_ftrace_func = NULL; /* ftrace_start_up is true if we want ftrace running */ - if (ftrace_start_up) - ftrace_run_update_code(FTRACE_UPDATE_CALLS); + if (ftrace_start_up) { + command = FTRACE_UPDATE_CALLS; + if (ftrace_graph_active) + command |= FTRACE_START_FUNC_RET; + ftrace_startup_enable(command); + } } static void ftrace_shutdown_sysctl(void) { + int command; + if (unlikely(ftrace_disabled)) return; /* ftrace_start_up is true if ftrace is running */ - if (ftrace_start_up) - ftrace_run_update_code(FTRACE_DISABLE_CALLS); + if (ftrace_start_up) { + command = FTRACE_DISABLE_CALLS; + if (ftrace_graph_active) + command |= FTRACE_STOP_FUNC_RET; + ftrace_run_update_code(command); + } } static cycle_t ftrace_update_time; @@ -5558,12 +5580,12 @@ ftrace_enable_sysctl(struct ctl_table *table, int write, if (ftrace_enabled) { - ftrace_startup_sysctl(); - /* we are starting ftrace again */ if (ftrace_ops_list != &ftrace_list_end) update_ftrace_function(); + ftrace_startup_sysctl(); + } else { /* stopping ftrace calls (just send to ftrace_stub) */ ftrace_trace_function = ftrace_stub; @@ -5590,8 +5612,6 @@ static struct ftrace_ops graph_ops = { ASSIGN_OPS_HASH(graph_ops, &global_ops.local_hash) }; -static int ftrace_graph_active; - int ftrace_graph_entry_stub(struct ftrace_graph_ent *trace) { return 0; diff --git a/kernel/workqueue.c b/kernel/workqueue.c index f28849394791..41ff75b478c6 100644 --- a/kernel/workqueue.c +++ b/kernel/workqueue.c @@ -2728,19 +2728,57 @@ bool flush_work(struct work_struct *work) } EXPORT_SYMBOL_GPL(flush_work); +struct cwt_wait { + wait_queue_t wait; + struct work_struct *work; +}; + +static int cwt_wakefn(wait_queue_t *wait, unsigned mode, int sync, void *key) +{ + struct cwt_wait *cwait = container_of(wait, struct cwt_wait, wait); + + if (cwait->work != key) + return 0; + return autoremove_wake_function(wait, mode, sync, key); +} + static bool __cancel_work_timer(struct work_struct *work, bool is_dwork) { + static DECLARE_WAIT_QUEUE_HEAD(cancel_waitq); unsigned long flags; int ret; do { ret = try_to_grab_pending(work, is_dwork, &flags); /* - * If someone else is canceling, wait for the same event it - * would be waiting for before retrying. + * If someone else is already canceling, wait for it to + * finish. flush_work() doesn't work for PREEMPT_NONE + * because we may get scheduled between @work's completion + * and the other canceling task resuming and clearing + * CANCELING - flush_work() will return false immediately + * as @work is no longer busy, try_to_grab_pending() will + * return -ENOENT as @work is still being canceled and the + * other canceling task won't be able to clear CANCELING as + * we're hogging the CPU. + * + * Let's wait for completion using a waitqueue. As this + * may lead to the thundering herd problem, use a custom + * wake function which matches @work along with exclusive + * wait and wakeup. */ - if (unlikely(ret == -ENOENT)) - flush_work(work); + if (unlikely(ret == -ENOENT)) { + struct cwt_wait cwait; + + init_wait(&cwait.wait); + cwait.wait.func = cwt_wakefn; + cwait.work = work; + + prepare_to_wait_exclusive(&cancel_waitq, &cwait.wait, + TASK_UNINTERRUPTIBLE); + if (work_is_canceling(work)) + schedule(); + finish_wait(&cancel_waitq, &cwait.wait); + } } while (unlikely(ret < 0)); /* tell other tasks trying to grab @work to back off */ @@ -2749,6 +2787,16 @@ static bool __cancel_work_timer(struct work_struct *work, bool is_dwork) flush_work(work); clear_work_data(work); + + /* + * Paired with prepare_to_wait() above so that either + * waitqueue_active() is visible here or !work_is_canceling() is + * visible there. + */ + smp_mb(); + if (waitqueue_active(&cancel_waitq)) + __wake_up(&cancel_waitq, TASK_NORMAL, 1, work); + return ret; } diff --git a/lib/Kconfig.debug b/lib/Kconfig.debug index c5cefb3c009c..36b6fa88ce5b 100644 --- a/lib/Kconfig.debug +++ b/lib/Kconfig.debug @@ -865,6 +865,19 @@ config SCHED_STACK_END_CHECK data corruption or a sporadic crash at a later stage once the region is examined. The runtime overhead introduced is minimal. +config DEBUG_TIMEKEEPING + bool "Enable extra timekeeping sanity checking" + help + This option will enable additional timekeeping sanity checks + which may be helpful when diagnosing issues where timekeeping + problems are suspected. + + This may include checks in the timekeeping hotpaths, so this + option may have a (very small) performance impact to some + workloads. + + If unsure, say N. + config TIMER_STATS bool "Collect kernel timers statistics" depends on DEBUG_KERNEL && PROC_FS diff --git a/lib/Makefile b/lib/Makefile index 87eb3bffc283..58f74d2dd396 100644 --- a/lib/Makefile +++ b/lib/Makefile @@ -24,7 +24,7 @@ obj-y += lockref.o obj-y += bcd.o div64.o sort.o parser.o halfmd4.o debug_locks.o random32.o \ bust_spinlocks.o kasprintf.o bitmap.o scatterlist.o \ - gcd.o lcm.o list_sort.o uuid.o flex_array.o clz_ctz.o \ + gcd.o lcm.o list_sort.o uuid.o flex_array.o iov_iter.o clz_ctz.o \ bsearch.o find_last_bit.o find_next_bit.o llist.o memweight.o kfifo.o \ percpu-refcount.o percpu_ida.o rhashtable.o reciprocal_div.o obj-y += string_helpers.o diff --git a/mm/iov_iter.c b/lib/iov_iter.c index 827732047da1..9d96e283520c 100644 --- a/mm/iov_iter.c +++ b/lib/iov_iter.c @@ -751,3 +751,18 @@ int iov_iter_npages(const struct iov_iter *i, int maxpages) return npages; } EXPORT_SYMBOL(iov_iter_npages); + +const void *dup_iter(struct iov_iter *new, struct iov_iter *old, gfp_t flags) +{ + *new = *old; + if (new->type & ITER_BVEC) + return new->bvec = kmemdup(new->bvec, + new->nr_segs * sizeof(struct bio_vec), + flags); + else + /* iovec and kvec have identical layout */ + return new->iov = kmemdup(new->iov, + new->nr_segs * sizeof(struct iovec), + flags); +} +EXPORT_SYMBOL(dup_iter); diff --git a/lib/lcm.c b/lib/lcm.c index e97dbd51e756..03d7fcb420b5 100644 --- a/lib/lcm.c +++ b/lib/lcm.c @@ -12,3 +12,14 @@ unsigned long lcm(unsigned long a, unsigned long b) return 0; } EXPORT_SYMBOL_GPL(lcm); + +unsigned long lcm_not_zero(unsigned long a, unsigned long b) +{ + unsigned long l = lcm(a, b); + + if (l) + return l; + + return (b ? : a); +} +EXPORT_SYMBOL_GPL(lcm_not_zero); diff --git a/lib/lockref.c b/lib/lockref.c index ecb9a665ec19..494994bf17c8 100644 --- a/lib/lockref.c +++ b/lib/lockref.c @@ -18,7 +18,7 @@ #define CMPXCHG_LOOP(CODE, SUCCESS) do { \ struct lockref old; \ BUILD_BUG_ON(sizeof(old) != 8); \ - old.lock_count = ACCESS_ONCE(lockref->lock_count); \ + old.lock_count = READ_ONCE(lockref->lock_count); \ while (likely(arch_spin_value_unlocked(old.lock.rlock.raw_lock))) { \ struct lockref new = old, prev = old; \ CODE \ diff --git a/lib/lz4/lz4_decompress.c b/lib/lz4/lz4_decompress.c index 7a85967060a5..f0f5c5c3de12 100644 --- a/lib/lz4/lz4_decompress.c +++ b/lib/lz4/lz4_decompress.c @@ -139,6 +139,9 @@ static int lz4_uncompress(const char *source, char *dest, int osize) /* Error: request to write beyond destination buffer */ if (cpy > oend) goto _output_error; + if ((ref + COPYLENGTH) > oend || + (op + COPYLENGTH) > oend) + goto _output_error; LZ4_SECURECOPY(ref, op, (oend - COPYLENGTH)); while (op < cpy) *op++ = *ref++; diff --git a/lib/nlattr.c b/lib/nlattr.c index 76a1b59523ab..f5907d23272d 100644 --- a/lib/nlattr.c +++ b/lib/nlattr.c @@ -279,6 +279,8 @@ int nla_memcpy(void *dest, const struct nlattr *src, int count) int minlen = min_t(int, count, nla_len(src)); memcpy(dest, nla_data(src), minlen); + if (count > minlen) + memset(dest + minlen, 0, count - minlen); return minlen; } diff --git a/lib/rhashtable.c b/lib/rhashtable.c index 9cc4c4a90d00..b5344ef4c684 100644 --- a/lib/rhashtable.c +++ b/lib/rhashtable.c @@ -17,6 +17,7 @@ #include <linux/kernel.h> #include <linux/init.h> #include <linux/log2.h> +#include <linux/sched.h> #include <linux/slab.h> #include <linux/vmalloc.h> #include <linux/mm.h> @@ -217,15 +218,15 @@ static void bucket_table_free(const struct bucket_table *tbl) static struct bucket_table *bucket_table_alloc(struct rhashtable *ht, size_t nbuckets) { - struct bucket_table *tbl; + struct bucket_table *tbl = NULL; size_t size; int i; size = sizeof(*tbl) + nbuckets * sizeof(tbl->buckets[0]); - tbl = kzalloc(size, GFP_KERNEL | __GFP_NOWARN); + if (size <= (PAGE_SIZE << PAGE_ALLOC_COSTLY_ORDER)) + tbl = kzalloc(size, GFP_KERNEL | __GFP_NOWARN | __GFP_NORETRY); if (tbl == NULL) tbl = vzalloc(size); - if (tbl == NULL) return NULL; @@ -247,26 +248,24 @@ static struct bucket_table *bucket_table_alloc(struct rhashtable *ht, * @ht: hash table * @new_size: new table size */ -bool rht_grow_above_75(const struct rhashtable *ht, size_t new_size) +static bool rht_grow_above_75(const struct rhashtable *ht, size_t new_size) { /* Expand table when exceeding 75% load */ return atomic_read(&ht->nelems) > (new_size / 4 * 3) && - (ht->p.max_shift && atomic_read(&ht->shift) < ht->p.max_shift); + (!ht->p.max_shift || atomic_read(&ht->shift) < ht->p.max_shift); } -EXPORT_SYMBOL_GPL(rht_grow_above_75); /** * rht_shrink_below_30 - returns true if nelems < 0.3 * table-size * @ht: hash table * @new_size: new table size */ -bool rht_shrink_below_30(const struct rhashtable *ht, size_t new_size) +static bool rht_shrink_below_30(const struct rhashtable *ht, size_t new_size) { /* Shrink table beneath 30% load */ return atomic_read(&ht->nelems) < (new_size * 3 / 10) && (atomic_read(&ht->shift) > ht->p.min_shift); } -EXPORT_SYMBOL_GPL(rht_shrink_below_30); static void lock_buckets(struct bucket_table *new_tbl, struct bucket_table *old_tbl, unsigned int hash) @@ -414,6 +413,7 @@ int rhashtable_expand(struct rhashtable *ht) } } unlock_buckets(new_tbl, old_tbl, new_hash); + cond_resched(); } /* Unzip interleaved hash chains */ @@ -437,6 +437,7 @@ int rhashtable_expand(struct rhashtable *ht) complete = false; unlock_buckets(new_tbl, old_tbl, old_hash); + cond_resched(); } } @@ -495,6 +496,7 @@ int rhashtable_shrink(struct rhashtable *ht) tbl->buckets[new_hash + new_tbl->size]); unlock_buckets(new_tbl, tbl, new_hash); + cond_resched(); } /* Publish the new, valid hash table */ @@ -528,31 +530,19 @@ static void rht_deferred_worker(struct work_struct *work) list_for_each_entry(walker, &ht->walkers, list) walker->resize = true; - if (ht->p.grow_decision && ht->p.grow_decision(ht, tbl->size)) + if (rht_grow_above_75(ht, tbl->size)) rhashtable_expand(ht); - else if (ht->p.shrink_decision && ht->p.shrink_decision(ht, tbl->size)) + else if (rht_shrink_below_30(ht, tbl->size)) rhashtable_shrink(ht); - unlock: mutex_unlock(&ht->mutex); } -static void rhashtable_wakeup_worker(struct rhashtable *ht) -{ - struct bucket_table *tbl = rht_dereference_rcu(ht->tbl, ht); - struct bucket_table *new_tbl = rht_dereference_rcu(ht->future_tbl, ht); - size_t size = tbl->size; - - /* Only adjust the table if no resizing is currently in progress. */ - if (tbl == new_tbl && - ((ht->p.grow_decision && ht->p.grow_decision(ht, size)) || - (ht->p.shrink_decision && ht->p.shrink_decision(ht, size)))) - schedule_work(&ht->run_work); -} - static void __rhashtable_insert(struct rhashtable *ht, struct rhash_head *obj, - struct bucket_table *tbl, u32 hash) + struct bucket_table *tbl, + const struct bucket_table *old_tbl, u32 hash) { + bool no_resize_running = tbl == old_tbl; struct rhash_head *head; hash = rht_bucket_index(tbl, hash); @@ -568,8 +558,8 @@ static void __rhashtable_insert(struct rhashtable *ht, struct rhash_head *obj, rcu_assign_pointer(tbl->buckets[hash], obj); atomic_inc(&ht->nelems); - - rhashtable_wakeup_worker(ht); + if (no_resize_running && rht_grow_above_75(ht, tbl->size)) + schedule_work(&ht->run_work); } /** @@ -599,7 +589,7 @@ void rhashtable_insert(struct rhashtable *ht, struct rhash_head *obj) hash = obj_raw_hashfn(ht, rht_obj(ht, obj)); lock_buckets(tbl, old_tbl, hash); - __rhashtable_insert(ht, obj, tbl, hash); + __rhashtable_insert(ht, obj, tbl, old_tbl, hash); unlock_buckets(tbl, old_tbl, hash); rcu_read_unlock(); @@ -681,8 +671,11 @@ found: unlock_buckets(new_tbl, old_tbl, new_hash); if (ret) { + bool no_resize_running = new_tbl == old_tbl; + atomic_dec(&ht->nelems); - rhashtable_wakeup_worker(ht); + if (no_resize_running && rht_shrink_below_30(ht, new_tbl->size)) + schedule_work(&ht->run_work); } rcu_read_unlock(); @@ -852,7 +845,7 @@ bool rhashtable_lookup_compare_insert(struct rhashtable *ht, goto exit; } - __rhashtable_insert(ht, obj, new_tbl, new_hash); + __rhashtable_insert(ht, obj, new_tbl, old_tbl, new_hash); exit: unlock_buckets(new_tbl, old_tbl, new_hash); @@ -894,6 +887,9 @@ int rhashtable_walk_init(struct rhashtable *ht, struct rhashtable_iter *iter) if (!iter->walker) return -ENOMEM; + INIT_LIST_HEAD(&iter->walker->list); + iter->walker->resize = false; + mutex_lock(&ht->mutex); list_add(&iter->walker->list, &ht->walkers); mutex_unlock(&ht->mutex); @@ -1111,8 +1107,7 @@ int rhashtable_init(struct rhashtable *ht, struct rhashtable_params *params) if (!ht->p.hash_rnd) get_random_bytes(&ht->p.hash_rnd, sizeof(ht->p.hash_rnd)); - if (ht->p.grow_decision || ht->p.shrink_decision) - INIT_WORK(&ht->run_work, rht_deferred_worker); + INIT_WORK(&ht->run_work, rht_deferred_worker); return 0; } @@ -1130,8 +1125,7 @@ void rhashtable_destroy(struct rhashtable *ht) { ht->being_destroyed = true; - if (ht->p.grow_decision || ht->p.shrink_decision) - cancel_work_sync(&ht->run_work); + cancel_work_sync(&ht->run_work); mutex_lock(&ht->mutex); bucket_table_free(rht_dereference(ht->tbl, ht)); diff --git a/lib/seq_buf.c b/lib/seq_buf.c index 88c0854bd752..5c94e1012a91 100644 --- a/lib/seq_buf.c +++ b/lib/seq_buf.c @@ -61,7 +61,7 @@ int seq_buf_vprintf(struct seq_buf *s, const char *fmt, va_list args) if (s->len < s->size) { len = vsnprintf(s->buffer + s->len, s->size - s->len, fmt, args); - if (seq_buf_can_fit(s, len)) { + if (s->len + len < s->size) { s->len += len; return 0; } @@ -118,7 +118,7 @@ int seq_buf_bprintf(struct seq_buf *s, const char *fmt, const u32 *binary) if (s->len < s->size) { ret = bstr_printf(s->buffer + s->len, len, fmt, binary); - if (seq_buf_can_fit(s, ret)) { + if (s->len + ret < s->size) { s->len += ret; return 0; } diff --git a/lib/test_rhashtable.c b/lib/test_rhashtable.c index 1dfeba73fc74..67c7593d1dd6 100644 --- a/lib/test_rhashtable.c +++ b/lib/test_rhashtable.c @@ -191,18 +191,18 @@ error: return err; } +static struct rhashtable ht; + static int __init test_rht_init(void) { - struct rhashtable ht; struct rhashtable_params params = { .nelem_hint = TEST_HT_SIZE, .head_offset = offsetof(struct test_obj, node), .key_offset = offsetof(struct test_obj, value), .key_len = sizeof(int), .hashfn = jhash, + .max_shift = 1, /* we expand/shrink manually here */ .nulls_base = (3U << RHT_BASE_SHIFT), - .grow_decision = rht_grow_above_75, - .shrink_decision = rht_shrink_below_30, }; int err; @@ -222,6 +222,11 @@ static int __init test_rht_init(void) return err; } +static void __exit test_rht_exit(void) +{ +} + module_init(test_rht_init); +module_exit(test_rht_exit); MODULE_LICENSE("GPL v2"); diff --git a/mm/Makefile b/mm/Makefile index 3c1caa2693bd..15dbe9903c27 100644 --- a/mm/Makefile +++ b/mm/Makefile @@ -21,7 +21,7 @@ obj-y := filemap.o mempool.o oom_kill.o \ mm_init.o mmu_context.o percpu.o slab_common.o \ compaction.o vmacache.o \ interval_tree.o list_lru.o workingset.o \ - iov_iter.o debug.o $(mmu-y) + debug.o $(mmu-y) obj-y += init-mm.o @@ -64,15 +64,17 @@ static unsigned long cma_bitmap_aligned_mask(struct cma *cma, int align_order) return (1UL << (align_order - cma->order_per_bit)) - 1; } +/* + * Find a PFN aligned to the specified order and return an offset represented in + * order_per_bits. + */ static unsigned long cma_bitmap_aligned_offset(struct cma *cma, int align_order) { - unsigned int alignment; - if (align_order <= cma->order_per_bit) return 0; - alignment = 1UL << (align_order - cma->order_per_bit); - return ALIGN(cma->base_pfn, alignment) - - (cma->base_pfn >> cma->order_per_bit); + + return (ALIGN(cma->base_pfn, (1UL << align_order)) + - cma->base_pfn) >> cma->order_per_bit; } static unsigned long cma_bitmap_maxno(struct cma *cma) diff --git a/mm/huge_memory.c b/mm/huge_memory.c index fc00c8cb5a82..6817b0350c71 100644 --- a/mm/huge_memory.c +++ b/mm/huge_memory.c @@ -1260,6 +1260,7 @@ int do_huge_pmd_numa_page(struct mm_struct *mm, struct vm_area_struct *vma, int target_nid, last_cpupid = -1; bool page_locked; bool migrated = false; + bool was_writable; int flags = 0; /* A PROT_NONE fault should not end up here */ @@ -1291,12 +1292,8 @@ int do_huge_pmd_numa_page(struct mm_struct *mm, struct vm_area_struct *vma, flags |= TNF_FAULT_LOCAL; } - /* - * Avoid grouping on DSO/COW pages in specific and RO pages - * in general, RO pages shouldn't hurt as much anyway since - * they can be in shared cache state. - */ - if (!pmd_write(pmd)) + /* See similar comment in do_numa_page for explanation */ + if (!(vma->vm_flags & VM_WRITE)) flags |= TNF_NO_GROUP; /* @@ -1353,12 +1350,17 @@ int do_huge_pmd_numa_page(struct mm_struct *mm, struct vm_area_struct *vma, if (migrated) { flags |= TNF_MIGRATED; page_nid = target_nid; - } + } else + flags |= TNF_MIGRATE_FAIL; goto out; clear_pmdnuma: BUG_ON(!PageLocked(page)); + was_writable = pmd_write(pmd); pmd = pmd_modify(pmd, vma->vm_page_prot); + pmd = pmd_mkyoung(pmd); + if (was_writable) + pmd = pmd_mkwrite(pmd); set_pmd_at(mm, haddr, pmdp, pmd); update_mmu_cache_pmd(vma, addr, pmdp); unlock_page(page); @@ -1482,6 +1484,8 @@ int change_huge_pmd(struct vm_area_struct *vma, pmd_t *pmd, if (__pmd_trans_huge_lock(pmd, vma, &ptl) == 1) { pmd_t entry; + bool preserve_write = prot_numa && pmd_write(*pmd); + ret = 1; /* * Avoid trapping faults against the zero page. The read-only @@ -1490,16 +1494,17 @@ int change_huge_pmd(struct vm_area_struct *vma, pmd_t *pmd, */ if (prot_numa && is_huge_zero_pmd(*pmd)) { spin_unlock(ptl); - return 0; + return ret; } if (!prot_numa || !pmd_protnone(*pmd)) { - ret = 1; entry = pmdp_get_and_clear_notify(mm, addr, pmd); entry = pmd_modify(entry, newprot); + if (preserve_write) + entry = pmd_mkwrite(entry); ret = HPAGE_PMD_NR; set_pmd_at(mm, addr, pmd, entry); - BUG_ON(pmd_write(entry)); + BUG_ON(!preserve_write && pmd_write(entry)); } spin_unlock(ptl); } diff --git a/mm/hugetlb.c b/mm/hugetlb.c index 0a9ac6c26832..c41b2a0ee273 100644 --- a/mm/hugetlb.c +++ b/mm/hugetlb.c @@ -917,7 +917,6 @@ static void prep_compound_gigantic_page(struct page *page, unsigned long order) __SetPageHead(page); __ClearPageReserved(page); for (i = 1; i < nr_pages; i++, p = mem_map_next(p, page, i)) { - __SetPageTail(p); /* * For gigantic hugepages allocated through bootmem at * boot, it's safer to be consistent with the not-gigantic @@ -933,6 +932,9 @@ static void prep_compound_gigantic_page(struct page *page, unsigned long order) __ClearPageReserved(p); set_page_count(p, 0); p->first_page = page; + /* Make sure p->first_page is always valid for PageTail() */ + smp_wmb(); + __SetPageTail(p); } } diff --git a/mm/kasan/kasan.c b/mm/kasan/kasan.c index 78fee632a7ee..936d81661c47 100644 --- a/mm/kasan/kasan.c +++ b/mm/kasan/kasan.c @@ -29,6 +29,7 @@ #include <linux/stacktrace.h> #include <linux/string.h> #include <linux/types.h> +#include <linux/vmalloc.h> #include <linux/kasan.h> #include "kasan.h" @@ -414,12 +415,19 @@ int kasan_module_alloc(void *addr, size_t size) GFP_KERNEL | __GFP_HIGHMEM | __GFP_ZERO, PAGE_KERNEL, VM_NO_GUARD, NUMA_NO_NODE, __builtin_return_address(0)); - return ret ? 0 : -ENOMEM; + + if (ret) { + find_vm_area(addr)->flags |= VM_KASAN; + return 0; + } + + return -ENOMEM; } -void kasan_module_free(void *addr) +void kasan_free_shadow(const struct vm_struct *vm) { - vfree(kasan_mem_to_shadow(addr)); + if (vm->flags & VM_KASAN) + vfree(kasan_mem_to_shadow(vm->addr)); } static void register_global(struct kasan_global *global) diff --git a/mm/memcontrol.c b/mm/memcontrol.c index 9fe07692eaad..b34ef4a32a3b 100644 --- a/mm/memcontrol.c +++ b/mm/memcontrol.c @@ -5232,7 +5232,9 @@ static void mem_cgroup_bind(struct cgroup_subsys_state *root_css) * on for the root memcg is enough. */ if (cgroup_on_dfl(root_css->cgroup)) - mem_cgroup_from_css(root_css)->use_hierarchy = true; + root_mem_cgroup->use_hierarchy = true; + else + root_mem_cgroup->use_hierarchy = false; } static u64 memory_current_read(struct cgroup_subsys_state *css, diff --git a/mm/memory.c b/mm/memory.c index 8068893697bb..97839f5c8c30 100644 --- a/mm/memory.c +++ b/mm/memory.c @@ -3035,6 +3035,7 @@ static int do_numa_page(struct mm_struct *mm, struct vm_area_struct *vma, int last_cpupid; int target_nid; bool migrated = false; + bool was_writable = pte_write(pte); int flags = 0; /* A PROT_NONE fault should not end up here */ @@ -3059,6 +3060,8 @@ static int do_numa_page(struct mm_struct *mm, struct vm_area_struct *vma, /* Make it present again */ pte = pte_modify(pte, vma->vm_page_prot); pte = pte_mkyoung(pte); + if (was_writable) + pte = pte_mkwrite(pte); set_pte_at(mm, addr, ptep, pte); update_mmu_cache(vma, addr, ptep); @@ -3069,11 +3072,14 @@ static int do_numa_page(struct mm_struct *mm, struct vm_area_struct *vma, } /* - * Avoid grouping on DSO/COW pages in specific and RO pages - * in general, RO pages shouldn't hurt as much anyway since - * they can be in shared cache state. + * Avoid grouping on RO pages in general. RO pages shouldn't hurt as + * much anyway since they can be in shared cache state. This misses + * the case where a mapping is writable but the process never writes + * to it but pte_write gets cleared during protection updates and + * pte_dirty has unpredictable behaviour between PTE scan updates, + * background writeback, dirty balancing and application behaviour. */ - if (!pte_write(pte)) + if (!(vma->vm_flags & VM_WRITE)) flags |= TNF_NO_GROUP; /* @@ -3097,7 +3103,8 @@ static int do_numa_page(struct mm_struct *mm, struct vm_area_struct *vma, if (migrated) { page_nid = target_nid; flags |= TNF_MIGRATED; - } + } else + flags |= TNF_MIGRATE_FAIL; out: if (page_nid != -1) diff --git a/mm/memory_hotplug.c b/mm/memory_hotplug.c index 9fab10795bea..65842d688b7c 100644 --- a/mm/memory_hotplug.c +++ b/mm/memory_hotplug.c @@ -1092,6 +1092,10 @@ static pg_data_t __ref *hotadd_new_pgdat(int nid, u64 start) return NULL; arch_refresh_nodedata(nid, pgdat); + } else { + /* Reset the nr_zones and classzone_idx to 0 before reuse */ + pgdat->nr_zones = 0; + pgdat->classzone_idx = 0; } /* we can use NODE_DATA(nid) from here */ @@ -1977,15 +1981,6 @@ void try_offline_node(int nid) if (is_vmalloc_addr(zone->wait_table)) vfree(zone->wait_table); } - - /* - * Since there is no way to guarentee the address of pgdat/zone is not - * on stack of any kernel threads or used by other kernel objects - * without reference counting or other symchronizing method, do not - * reset node_data and free pgdat here. Just reset it to 0 and reuse - * the memory when the node is online again. - */ - memset(pgdat, 0, sizeof(*pgdat)); } EXPORT_SYMBOL(try_offline_node); diff --git a/mm/mlock.c b/mm/mlock.c index 73cf0987088c..8a54cd214925 100644 --- a/mm/mlock.c +++ b/mm/mlock.c @@ -26,10 +26,10 @@ int can_do_mlock(void) { - if (capable(CAP_IPC_LOCK)) - return 1; if (rlimit(RLIMIT_MEMLOCK) != 0) return 1; + if (capable(CAP_IPC_LOCK)) + return 1; return 0; } EXPORT_SYMBOL(can_do_mlock); diff --git a/mm/mmap.c b/mm/mmap.c index da9990acc08b..9ec50a368634 100644 --- a/mm/mmap.c +++ b/mm/mmap.c @@ -774,10 +774,8 @@ again: remove_next = 1 + (end > next->vm_end); importer->anon_vma = exporter->anon_vma; error = anon_vma_clone(importer, exporter); - if (error) { - importer->anon_vma = NULL; + if (error) return error; - } } } diff --git a/mm/mprotect.c b/mm/mprotect.c index 44727811bf4c..88584838e704 100644 --- a/mm/mprotect.c +++ b/mm/mprotect.c @@ -75,6 +75,7 @@ static unsigned long change_pte_range(struct vm_area_struct *vma, pmd_t *pmd, oldpte = *pte; if (pte_present(oldpte)) { pte_t ptent; + bool preserve_write = prot_numa && pte_write(oldpte); /* * Avoid trapping faults against the zero or KSM @@ -94,6 +95,8 @@ static unsigned long change_pte_range(struct vm_area_struct *vma, pmd_t *pmd, ptent = ptep_modify_prot_start(mm, addr, pte); ptent = pte_modify(ptent, newprot); + if (preserve_write) + ptent = pte_mkwrite(ptent); /* Avoid taking write faults for known dirty pages */ if (dirty_accountable && pte_dirty(ptent) && diff --git a/mm/mremap.c b/mm/mremap.c index 57dadc025c64..2dc44b1cb1df 100644 --- a/mm/mremap.c +++ b/mm/mremap.c @@ -286,8 +286,14 @@ static unsigned long move_vma(struct vm_area_struct *vma, old_len = new_len; old_addr = new_addr; new_addr = -ENOMEM; - } else if (vma->vm_file && vma->vm_file->f_op->mremap) - vma->vm_file->f_op->mremap(vma->vm_file, new_vma); + } else if (vma->vm_file && vma->vm_file->f_op->mremap) { + err = vma->vm_file->f_op->mremap(vma->vm_file, new_vma); + if (err < 0) { + move_page_tables(new_vma, new_addr, vma, old_addr, + moved_len, true); + return err; + } + } /* Conceal VM_ACCOUNT so old reservation is not undone */ if (vm_flags & VM_ACCOUNT) { diff --git a/mm/nommu.c b/mm/nommu.c index 3e67e7538ecf..3fba2dc97c44 100644 --- a/mm/nommu.c +++ b/mm/nommu.c @@ -62,6 +62,7 @@ void *high_memory; EXPORT_SYMBOL(high_memory); struct page *mem_map; unsigned long max_mapnr; +EXPORT_SYMBOL(max_mapnr); unsigned long highest_memmap_pfn; struct percpu_counter vm_committed_as; int sysctl_overcommit_memory = OVERCOMMIT_GUESS; /* heuristic overcommit */ diff --git a/mm/page-writeback.c b/mm/page-writeback.c index 45e187b2d971..644bcb665773 100644 --- a/mm/page-writeback.c +++ b/mm/page-writeback.c @@ -857,8 +857,11 @@ static void bdi_update_write_bandwidth(struct backing_dev_info *bdi, * bw * elapsed + write_bandwidth * (period - elapsed) * write_bandwidth = --------------------------------------------------- * period + * + * @written may have decreased due to account_page_redirty(). + * Avoid underflowing @bw calculation. */ - bw = written - bdi->written_stamp; + bw = written - min(written, bdi->written_stamp); bw *= HZ; if (unlikely(elapsed > period)) { do_div(bw, elapsed); @@ -922,7 +925,7 @@ static void global_update_bandwidth(unsigned long thresh, unsigned long now) { static DEFINE_SPINLOCK(dirty_lock); - static unsigned long update_time; + static unsigned long update_time = INITIAL_JIFFIES; /* * check locklessly first to optimize away locking for the most time diff --git a/mm/page_alloc.c b/mm/page_alloc.c index 7abfa70cdc1a..40e29429e7b0 100644 --- a/mm/page_alloc.c +++ b/mm/page_alloc.c @@ -2373,7 +2373,8 @@ __alloc_pages_may_oom(gfp_t gfp_mask, unsigned int order, goto out; } /* Exhausted what can be done so it's blamo time */ - if (out_of_memory(ac->zonelist, gfp_mask, order, ac->nodemask, false)) + if (out_of_memory(ac->zonelist, gfp_mask, order, ac->nodemask, false) + || WARN_ON_ONCE(gfp_mask & __GFP_NOFAIL)) *did_some_progress = 1; out: oom_zonelist_unlock(ac->zonelist, gfp_mask); diff --git a/mm/page_isolation.c b/mm/page_isolation.c index 72f5ac381ab3..755a42c76eb4 100644 --- a/mm/page_isolation.c +++ b/mm/page_isolation.c @@ -103,6 +103,7 @@ void unset_migratetype_isolate(struct page *page, unsigned migratetype) if (!is_migrate_isolate_page(buddy)) { __isolate_free_page(page, order); + kernel_map_pages(page, (1 << order), 1); set_page_refcounted(page); isolated_page = page; } diff --git a/mm/pagewalk.c b/mm/pagewalk.c index 75c1f2878519..29f2f8b853ae 100644 --- a/mm/pagewalk.c +++ b/mm/pagewalk.c @@ -265,8 +265,15 @@ int walk_page_range(unsigned long start, unsigned long end, vma = vma->vm_next; err = walk_page_test(start, next, walk); - if (err > 0) + if (err > 0) { + /* + * positive return values are purely for + * controlling the pagewalk, so should never + * be passed to the callers. + */ + err = 0; continue; + } if (err < 0) break; } diff --git a/mm/rmap.c b/mm/rmap.c index 5e3e09081164..c161a14b6a8f 100644 --- a/mm/rmap.c +++ b/mm/rmap.c @@ -287,6 +287,13 @@ int anon_vma_clone(struct vm_area_struct *dst, struct vm_area_struct *src) return 0; enomem_failure: + /* + * dst->anon_vma is dropped here otherwise its degree can be incorrectly + * decremented in unlink_anon_vmas(). + * We can safely do this because callers of anon_vma_clone() don't care + * about dst->anon_vma if anon_vma_clone() failed. + */ + dst->anon_vma = NULL; unlink_anon_vmas(dst); return -ENOMEM; } diff --git a/mm/slub.c b/mm/slub.c index 6832c4eab104..82c473780c91 100644 --- a/mm/slub.c +++ b/mm/slub.c @@ -2449,7 +2449,8 @@ redo: do { tid = this_cpu_read(s->cpu_slab->tid); c = raw_cpu_ptr(s->cpu_slab); - } while (IS_ENABLED(CONFIG_PREEMPT) && unlikely(tid != c->tid)); + } while (IS_ENABLED(CONFIG_PREEMPT) && + unlikely(tid != READ_ONCE(c->tid))); /* * Irqless object alloc/free algorithm used here depends on sequence @@ -2718,7 +2719,8 @@ redo: do { tid = this_cpu_read(s->cpu_slab->tid); c = raw_cpu_ptr(s->cpu_slab); - } while (IS_ENABLED(CONFIG_PREEMPT) && unlikely(tid != c->tid)); + } while (IS_ENABLED(CONFIG_PREEMPT) && + unlikely(tid != READ_ONCE(c->tid))); /* Same with comment on barrier() in slab_alloc_node() */ barrier(); diff --git a/mm/vmalloc.c b/mm/vmalloc.c index 35b25e1340ca..49abccf29a29 100644 --- a/mm/vmalloc.c +++ b/mm/vmalloc.c @@ -1418,6 +1418,7 @@ struct vm_struct *remove_vm_area(const void *addr) spin_unlock(&vmap_area_lock); vmap_debug_free_range(va->va_start, va->va_end); + kasan_free_shadow(vm); free_unmap_vmap_area(va); vm->size -= PAGE_SIZE; diff --git a/net/9p/trans_virtio.c b/net/9p/trans_virtio.c index d8e376a5f0f1..36a1a739ad68 100644 --- a/net/9p/trans_virtio.c +++ b/net/9p/trans_virtio.c @@ -658,14 +658,30 @@ p9_virtio_create(struct p9_client *client, const char *devname, char *args) static void p9_virtio_remove(struct virtio_device *vdev) { struct virtio_chan *chan = vdev->priv; - - if (chan->inuse) - p9_virtio_close(chan->client); - vdev->config->del_vqs(vdev); + unsigned long warning_time; mutex_lock(&virtio_9p_lock); + + /* Remove self from list so we don't get new users. */ list_del(&chan->chan_list); + warning_time = jiffies; + + /* Wait for existing users to close. */ + while (chan->inuse) { + mutex_unlock(&virtio_9p_lock); + msleep(250); + if (time_after(jiffies, warning_time + 10 * HZ)) { + dev_emerg(&vdev->dev, + "p9_virtio_remove: waiting for device in use.\n"); + warning_time = jiffies; + } + mutex_lock(&virtio_9p_lock); + } + mutex_unlock(&virtio_9p_lock); + + vdev->config->del_vqs(vdev); + sysfs_remove_file(&(vdev->dev.kobj), &dev_attr_mount_tag.attr); kobject_uevent(&(vdev->dev.kobj), KOBJ_CHANGE); kfree(chan->tag); diff --git a/net/bridge/br.c b/net/bridge/br.c index fb57ab6b24f9..02c24cf63c34 100644 --- a/net/bridge/br.c +++ b/net/bridge/br.c @@ -190,6 +190,8 @@ static int __init br_init(void) { int err; + BUILD_BUG_ON(sizeof(struct br_input_skb_cb) > FIELD_SIZEOF(struct sk_buff, cb)); + err = stp_proto_register(&br_stp_proto); if (err < 0) { pr_err("bridge: can't register sap for STP\n"); diff --git a/net/bridge/br_if.c b/net/bridge/br_if.c index b087d278c679..1849d96b3c91 100644 --- a/net/bridge/br_if.c +++ b/net/bridge/br_if.c @@ -563,6 +563,8 @@ int br_del_if(struct net_bridge *br, struct net_device *dev) */ del_nbp(p); + dev_set_mtu(br->dev, br_min_mtu(br)); + spin_lock_bh(&br->lock); changed_addr = br_stp_recalculate_bridge_id(br); spin_unlock_bh(&br->lock); diff --git a/net/caif/caif_socket.c b/net/caif/caif_socket.c index 769b185fefbd..a6e2da0bc718 100644 --- a/net/caif/caif_socket.c +++ b/net/caif/caif_socket.c @@ -281,7 +281,7 @@ static int caif_seqpkt_recvmsg(struct kiocb *iocb, struct socket *sock, int copylen; ret = -EOPNOTSUPP; - if (m->msg_flags&MSG_OOB) + if (flags & MSG_OOB) goto read_error; skb = skb_recv_datagram(sk, flags, 0 , &ret); diff --git a/net/caif/cffrml.c b/net/caif/cffrml.c index 8bc7caa28e64..434ba8557826 100644 --- a/net/caif/cffrml.c +++ b/net/caif/cffrml.c @@ -84,7 +84,7 @@ static int cffrml_receive(struct cflayer *layr, struct cfpkt *pkt) u16 tmp; u16 len; u16 hdrchks; - u16 pktchks; + int pktchks; struct cffrml *this; this = container_obj(layr); diff --git a/net/caif/cfpkt_skbuff.c b/net/caif/cfpkt_skbuff.c index 1be0b521ac49..f6c3b2137eea 100644 --- a/net/caif/cfpkt_skbuff.c +++ b/net/caif/cfpkt_skbuff.c @@ -255,9 +255,9 @@ inline u16 cfpkt_getlen(struct cfpkt *pkt) return skb->len; } -inline u16 cfpkt_iterate(struct cfpkt *pkt, - u16 (*iter_func)(u16, void *, u16), - u16 data) +int cfpkt_iterate(struct cfpkt *pkt, + u16 (*iter_func)(u16, void *, u16), + u16 data) { /* * Don't care about the performance hit of linearizing, diff --git a/net/can/af_can.c b/net/can/af_can.c index 66e08040ced7..32d710eaf1fc 100644 --- a/net/can/af_can.c +++ b/net/can/af_can.c @@ -259,6 +259,9 @@ int can_send(struct sk_buff *skb, int loop) goto inval_skb; } + skb->ip_summed = CHECKSUM_UNNECESSARY; + + skb_reset_mac_header(skb); skb_reset_network_header(skb); skb_reset_transport_header(skb); diff --git a/net/ceph/messenger.c b/net/ceph/messenger.c index 6b3f54ed65ba..a9f4ae45b7fb 100644 --- a/net/ceph/messenger.c +++ b/net/ceph/messenger.c @@ -484,7 +484,7 @@ static int ceph_tcp_connect(struct ceph_connection *con) IPPROTO_TCP, &sock); if (ret) return ret; - sock->sk->sk_allocation = GFP_NOFS | __GFP_MEMALLOC; + sock->sk->sk_allocation = GFP_NOFS; #ifdef CONFIG_LOCKDEP lockdep_set_class(&sock->sk->sk_lock, &socket_class); @@ -520,8 +520,6 @@ static int ceph_tcp_connect(struct ceph_connection *con) ret); } - sk_set_memalloc(sock->sk); - con->sock = sock; return 0; } @@ -2808,11 +2806,8 @@ static void con_work(struct work_struct *work) { struct ceph_connection *con = container_of(work, struct ceph_connection, work.work); - unsigned long pflags = current->flags; bool fault; - current->flags |= PF_MEMALLOC; - mutex_lock(&con->mutex); while (true) { int ret; @@ -2866,8 +2861,6 @@ static void con_work(struct work_struct *work) con_fault_finish(con); con->ops->put(con); - - tsk_restore_flags(current, pflags, PF_MEMALLOC); } /* diff --git a/net/compat.c b/net/compat.c index 3236b4167a32..f7bd286a8280 100644 --- a/net/compat.c +++ b/net/compat.c @@ -49,6 +49,13 @@ ssize_t get_compat_msghdr(struct msghdr *kmsg, __get_user(kmsg->msg_controllen, &umsg->msg_controllen) || __get_user(kmsg->msg_flags, &umsg->msg_flags)) return -EFAULT; + + if (!uaddr) + kmsg->msg_namelen = 0; + + if (kmsg->msg_namelen < 0) + return -EINVAL; + if (kmsg->msg_namelen > sizeof(struct sockaddr_storage)) kmsg->msg_namelen = sizeof(struct sockaddr_storage); kmsg->msg_control = compat_ptr(tmp3); @@ -711,24 +718,18 @@ static unsigned char nas[21] = { COMPAT_SYSCALL_DEFINE3(sendmsg, int, fd, struct compat_msghdr __user *, msg, unsigned int, flags) { - if (flags & MSG_CMSG_COMPAT) - return -EINVAL; return __sys_sendmsg(fd, (struct user_msghdr __user *)msg, flags | MSG_CMSG_COMPAT); } COMPAT_SYSCALL_DEFINE4(sendmmsg, int, fd, struct compat_mmsghdr __user *, mmsg, unsigned int, vlen, unsigned int, flags) { - if (flags & MSG_CMSG_COMPAT) - return -EINVAL; return __sys_sendmmsg(fd, (struct mmsghdr __user *)mmsg, vlen, flags | MSG_CMSG_COMPAT); } COMPAT_SYSCALL_DEFINE3(recvmsg, int, fd, struct compat_msghdr __user *, msg, unsigned int, flags) { - if (flags & MSG_CMSG_COMPAT) - return -EINVAL; return __sys_recvmsg(fd, (struct user_msghdr __user *)msg, flags | MSG_CMSG_COMPAT); } @@ -751,9 +752,6 @@ COMPAT_SYSCALL_DEFINE5(recvmmsg, int, fd, struct compat_mmsghdr __user *, mmsg, int datagrams; struct timespec ktspec; - if (flags & MSG_CMSG_COMPAT) - return -EINVAL; - if (timeout == NULL) return __sys_recvmmsg(fd, (struct mmsghdr __user *)mmsg, vlen, flags | MSG_CMSG_COMPAT, NULL); diff --git a/net/core/dev.c b/net/core/dev.c index 8f9710c62e20..45109b70664e 100644 --- a/net/core/dev.c +++ b/net/core/dev.c @@ -946,7 +946,7 @@ bool dev_valid_name(const char *name) return false; while (*name) { - if (*name == '/' || isspace(*name)) + if (*name == '/' || *name == ':' || isspace(*name)) return false; name++; } @@ -2848,7 +2848,9 @@ static void skb_update_prio(struct sk_buff *skb) #define skb_update_prio(skb) #endif -static DEFINE_PER_CPU(int, xmit_recursion); +DEFINE_PER_CPU(int, xmit_recursion); +EXPORT_SYMBOL(xmit_recursion); + #define RECURSION_LIMIT 10 /** diff --git a/net/core/ethtool.c b/net/core/ethtool.c index 91f74f3eb204..aa378ecef186 100644 --- a/net/core/ethtool.c +++ b/net/core/ethtool.c @@ -98,6 +98,7 @@ static const char netdev_features_strings[NETDEV_FEATURE_COUNT][ETH_GSTRING_LEN] [NETIF_F_RXALL_BIT] = "rx-all", [NETIF_F_HW_L2FW_DOFFLOAD_BIT] = "l2-fwd-offload", [NETIF_F_BUSY_POLL_BIT] = "busy-poll", + [NETIF_F_HW_SWITCH_OFFLOAD_BIT] = "hw-switch-offload", }; static const char diff --git a/net/core/fib_rules.c b/net/core/fib_rules.c index 44706e81b2e0..e4fdc9dfb2c7 100644 --- a/net/core/fib_rules.c +++ b/net/core/fib_rules.c @@ -175,9 +175,9 @@ void fib_rules_unregister(struct fib_rules_ops *ops) spin_lock(&net->rules_mod_lock); list_del_rcu(&ops->list); - fib_rules_cleanup_ops(ops); spin_unlock(&net->rules_mod_lock); + fib_rules_cleanup_ops(ops); call_rcu(&ops->rcu, fib_rules_put_rcu); } EXPORT_SYMBOL_GPL(fib_rules_unregister); diff --git a/net/core/gen_stats.c b/net/core/gen_stats.c index 0c08062d1796..1e2f46a69d50 100644 --- a/net/core/gen_stats.c +++ b/net/core/gen_stats.c @@ -32,6 +32,9 @@ gnet_stats_copy(struct gnet_dump *d, int type, void *buf, int size) return 0; nla_put_failure: + kfree(d->xstats); + d->xstats = NULL; + d->xstats_len = 0; spin_unlock_bh(d->lock); return -1; } @@ -305,7 +308,9 @@ int gnet_stats_copy_app(struct gnet_dump *d, void *st, int len) { if (d->compat_xstats) { - d->xstats = st; + d->xstats = kmemdup(st, len, GFP_ATOMIC); + if (!d->xstats) + goto err_out; d->xstats_len = len; } @@ -313,6 +318,11 @@ gnet_stats_copy_app(struct gnet_dump *d, void *st, int len) return gnet_stats_copy(d, TCA_STATS_APP, st, len); return 0; + +err_out: + d->xstats_len = 0; + spin_unlock_bh(d->lock); + return -1; } EXPORT_SYMBOL(gnet_stats_copy_app); @@ -345,6 +355,9 @@ gnet_stats_finish_copy(struct gnet_dump *d) return -1; } + kfree(d->xstats); + d->xstats = NULL; + d->xstats_len = 0; spin_unlock_bh(d->lock); return 0; } diff --git a/net/core/net_namespace.c b/net/core/net_namespace.c index cb5290b8c428..70d3450588b2 100644 --- a/net/core/net_namespace.c +++ b/net/core/net_namespace.c @@ -198,8 +198,10 @@ static int __peernet2id(struct net *net, struct net *peer, bool alloc) */ int peernet2id(struct net *net, struct net *peer) { - int id = __peernet2id(net, peer, true); + bool alloc = atomic_read(&peer->count) == 0 ? false : true; + int id; + id = __peernet2id(net, peer, alloc); return id >= 0 ? id : NETNSA_NSID_NOT_ASSIGNED; } EXPORT_SYMBOL(peernet2id); diff --git a/net/core/pktgen.c b/net/core/pktgen.c index b4899f5b7388..508155b283dd 100644 --- a/net/core/pktgen.c +++ b/net/core/pktgen.c @@ -1134,6 +1134,9 @@ static ssize_t pktgen_if_write(struct file *file, return len; i += len; + if ((value > 1) && + (!(pkt_dev->odev->priv_flags & IFF_TX_SKB_SHARING))) + return -ENOTSUPP; pkt_dev->burst = value < 1 ? 1 : value; sprintf(pg_result, "OK: burst=%d", pkt_dev->burst); return count; diff --git a/net/core/rtnetlink.c b/net/core/rtnetlink.c index ab293a3066b3..7ebed55b5f7d 100644 --- a/net/core/rtnetlink.c +++ b/net/core/rtnetlink.c @@ -1300,7 +1300,6 @@ static int rtnl_dump_ifinfo(struct sk_buff *skb, struct netlink_callback *cb) s_h = cb->args[0]; s_idx = cb->args[1]; - rcu_read_lock(); cb->seq = net->dev_base_seq; /* A hack to preserve kernel<->userspace interface. @@ -1322,7 +1321,7 @@ static int rtnl_dump_ifinfo(struct sk_buff *skb, struct netlink_callback *cb) for (h = s_h; h < NETDEV_HASHENTRIES; h++, s_idx = 0) { idx = 0; head = &net->dev_index_head[h]; - hlist_for_each_entry_rcu(dev, head, index_hlist) { + hlist_for_each_entry(dev, head, index_hlist) { if (idx < s_idx) goto cont; err = rtnl_fill_ifinfo(skb, dev, RTM_NEWLINK, @@ -1344,7 +1343,6 @@ cont: } } out: - rcu_read_unlock(); cb->args[1] = idx; cb->args[0] = h; @@ -1934,10 +1932,10 @@ static int rtnl_group_changelink(const struct sk_buff *skb, struct ifinfomsg *ifm, struct nlattr **tb) { - struct net_device *dev; + struct net_device *dev, *aux; int err; - for_each_netdev(net, dev) { + for_each_netdev_safe(net, dev, aux) { if (dev->group == group) { err = do_setlink(skb, dev, ifm, tb, NULL, 0); if (err < 0) @@ -2012,8 +2010,8 @@ replay: } if (1) { - struct nlattr *attr[ops ? ops->maxtype + 1 : 0]; - struct nlattr *slave_attr[m_ops ? m_ops->slave_maxtype + 1 : 0]; + struct nlattr *attr[ops ? ops->maxtype + 1 : 1]; + struct nlattr *slave_attr[m_ops ? m_ops->slave_maxtype + 1 : 1]; struct nlattr **data = NULL; struct nlattr **slave_data = NULL; struct net *dest_net, *link_net = NULL; @@ -2122,6 +2120,10 @@ replay: if (IS_ERR(dest_net)) return PTR_ERR(dest_net); + err = -EPERM; + if (!netlink_ns_capable(skb, dest_net->user_ns, CAP_NET_ADMIN)) + goto out; + if (tb[IFLA_LINK_NETNSID]) { int id = nla_get_s32(tb[IFLA_LINK_NETNSID]); @@ -2130,6 +2132,9 @@ replay: err = -EINVAL; goto out; } + err = -EPERM; + if (!netlink_ns_capable(skb, link_net->user_ns, CAP_NET_ADMIN)) + goto out; } dev = rtnl_create_link(link_net ? : dest_net, ifname, @@ -2161,28 +2166,28 @@ replay: } } err = rtnl_configure_link(dev, ifm); - if (err < 0) { - if (ops->newlink) { - LIST_HEAD(list_kill); - - ops->dellink(dev, &list_kill); - unregister_netdevice_many(&list_kill); - } else { - unregister_netdevice(dev); - } - goto out; - } - + if (err < 0) + goto out_unregister; if (link_net) { err = dev_change_net_namespace(dev, dest_net, ifname); if (err < 0) - unregister_netdevice(dev); + goto out_unregister; } out: if (link_net) put_net(link_net); put_net(dest_net); return err; +out_unregister: + if (ops->newlink) { + LIST_HEAD(list_kill); + + ops->dellink(dev, &list_kill); + unregister_netdevice_many(&list_kill); + } else { + unregister_netdevice(dev); + } + goto out; } } diff --git a/net/core/skbuff.c b/net/core/skbuff.c index 88c613eab142..8e4ac97c8477 100644 --- a/net/core/skbuff.c +++ b/net/core/skbuff.c @@ -3621,13 +3621,14 @@ struct sk_buff *sock_dequeue_err_skb(struct sock *sk) { struct sk_buff_head *q = &sk->sk_error_queue; struct sk_buff *skb, *skb_next; + unsigned long flags; int err = 0; - spin_lock_bh(&q->lock); + spin_lock_irqsave(&q->lock, flags); skb = __skb_dequeue(q); if (skb && (skb_next = skb_peek(q))) err = SKB_EXT_ERR(skb_next)->ee.ee_errno; - spin_unlock_bh(&q->lock); + spin_unlock_irqrestore(&q->lock, flags); sk->sk_err = err; if (err) @@ -3732,9 +3733,13 @@ void __skb_tstamp_tx(struct sk_buff *orig_skb, struct sock *sk, int tstype) { struct sk_buff *skb; - bool tsonly = sk->sk_tsflags & SOF_TIMESTAMPING_OPT_TSONLY; + bool tsonly; + + if (!sk) + return; - if (!sk || !skb_may_tx_timestamp(sk, tsonly)) + tsonly = sk->sk_tsflags & SOF_TIMESTAMPING_OPT_TSONLY; + if (!skb_may_tx_timestamp(sk, tsonly)) return; if (tsonly) @@ -4172,7 +4177,7 @@ void skb_scrub_packet(struct sk_buff *skb, bool xnet) skb->ignore_df = 0; skb_dst_drop(skb); skb->mark = 0; - skb->sender_cpu = 0; + skb_sender_cpu_clear(skb); skb_init_secmark(skb); secpath_reset(skb); nf_reset(skb); diff --git a/net/core/sock.c b/net/core/sock.c index 93c8b20c91e4..71e3e5f1eaa0 100644 --- a/net/core/sock.c +++ b/net/core/sock.c @@ -653,6 +653,25 @@ static inline void sock_valbool_flag(struct sock *sk, int bit, int valbool) sock_reset_flag(sk, bit); } +bool sk_mc_loop(struct sock *sk) +{ + if (dev_recursion_level()) + return false; + if (!sk) + return true; + switch (sk->sk_family) { + case AF_INET: + return inet_sk(sk)->mc_loop; +#if IS_ENABLED(CONFIG_IPV6) + case AF_INET6: + return inet6_sk(sk)->mc_loop; +#endif + } + WARN_ON(1); + return true; +} +EXPORT_SYMBOL(sk_mc_loop); + /* * This is meant for all protocols to use and covers goings on * at the socket level. Everything here is generic. @@ -1655,6 +1674,10 @@ void sock_rfree(struct sk_buff *skb) } EXPORT_SYMBOL(sock_rfree); +/* + * Buffer destructor for skbs that are not used directly in read or write + * path, e.g. for error handler skbs. Automatically called from kfree_skb. + */ void sock_efree(struct sk_buff *skb) { sock_put(skb->sk); diff --git a/net/core/sysctl_net_core.c b/net/core/sysctl_net_core.c index 433424804284..8ce351ffceb1 100644 --- a/net/core/sysctl_net_core.c +++ b/net/core/sysctl_net_core.c @@ -25,6 +25,8 @@ static int zero = 0; static int one = 1; static int ushort_max = USHRT_MAX; +static int min_sndbuf = SOCK_MIN_SNDBUF; +static int min_rcvbuf = SOCK_MIN_RCVBUF; static int net_msg_warn; /* Unused, but still a sysctl */ @@ -237,7 +239,7 @@ static struct ctl_table net_core_table[] = { .maxlen = sizeof(int), .mode = 0644, .proc_handler = proc_dointvec_minmax, - .extra1 = &one, + .extra1 = &min_sndbuf, }, { .procname = "rmem_max", @@ -245,7 +247,7 @@ static struct ctl_table net_core_table[] = { .maxlen = sizeof(int), .mode = 0644, .proc_handler = proc_dointvec_minmax, - .extra1 = &one, + .extra1 = &min_rcvbuf, }, { .procname = "wmem_default", @@ -253,7 +255,7 @@ static struct ctl_table net_core_table[] = { .maxlen = sizeof(int), .mode = 0644, .proc_handler = proc_dointvec_minmax, - .extra1 = &one, + .extra1 = &min_sndbuf, }, { .procname = "rmem_default", @@ -261,7 +263,7 @@ static struct ctl_table net_core_table[] = { .maxlen = sizeof(int), .mode = 0644, .proc_handler = proc_dointvec_minmax, - .extra1 = &one, + .extra1 = &min_rcvbuf, }, { .procname = "dev_weight", diff --git a/net/decnet/dn_route.c b/net/decnet/dn_route.c index 1d7c1256e845..3b81092771f8 100644 --- a/net/decnet/dn_route.c +++ b/net/decnet/dn_route.c @@ -1062,7 +1062,7 @@ source_ok: if (decnet_debug_level & 16) printk(KERN_DEBUG "dn_route_output_slow: initial checks complete." - " dst=%o4x src=%04x oif=%d try_hard=%d\n", + " dst=%04x src=%04x oif=%d try_hard=%d\n", le16_to_cpu(fld.daddr), le16_to_cpu(fld.saddr), fld.flowidn_oif, try_hard); diff --git a/net/decnet/dn_rules.c b/net/decnet/dn_rules.c index faf7cc3483fe..9d66a0f72f90 100644 --- a/net/decnet/dn_rules.c +++ b/net/decnet/dn_rules.c @@ -248,7 +248,9 @@ void __init dn_fib_rules_init(void) void __exit dn_fib_rules_cleanup(void) { + rtnl_lock(); fib_rules_unregister(dn_fib_rules_ops); + rtnl_unlock(); rcu_barrier(); } diff --git a/net/dsa/dsa.c b/net/dsa/dsa.c index 2173402d87e0..4dea2e0681d1 100644 --- a/net/dsa/dsa.c +++ b/net/dsa/dsa.c @@ -501,12 +501,10 @@ static struct net_device *dev_to_net_device(struct device *dev) #ifdef CONFIG_OF static int dsa_of_setup_routing_table(struct dsa_platform_data *pd, struct dsa_chip_data *cd, - int chip_index, + int chip_index, int port_index, struct device_node *link) { - int ret; const __be32 *reg; - int link_port_addr; int link_sw_addr; struct device_node *parent_sw; int len; @@ -519,6 +517,10 @@ static int dsa_of_setup_routing_table(struct dsa_platform_data *pd, if (!reg || (len != sizeof(*reg) * 2)) return -EINVAL; + /* + * Get the destination switch number from the second field of its 'reg' + * property, i.e. for "reg = <0x19 1>" sw_addr is '1'. + */ link_sw_addr = be32_to_cpup(reg + 1); if (link_sw_addr >= pd->nr_chips) @@ -535,20 +537,9 @@ static int dsa_of_setup_routing_table(struct dsa_platform_data *pd, memset(cd->rtable, -1, pd->nr_chips * sizeof(s8)); } - reg = of_get_property(link, "reg", NULL); - if (!reg) { - ret = -EINVAL; - goto out; - } - - link_port_addr = be32_to_cpup(reg); - - cd->rtable[link_sw_addr] = link_port_addr; + cd->rtable[link_sw_addr] = port_index; return 0; -out: - kfree(cd->rtable); - return ret; } static void dsa_of_free_platform_data(struct dsa_platform_data *pd) @@ -658,7 +649,7 @@ static int dsa_of_probe(struct platform_device *pdev) if (!strcmp(port_name, "dsa") && link && pd->nr_chips > 1) { ret = dsa_of_setup_routing_table(pd, cd, - chip_index, link); + chip_index, port_index, link); if (ret) goto out_free_chip; } diff --git a/net/hsr/hsr_device.c b/net/hsr/hsr_device.c index a138d75751df..44d27469ae55 100644 --- a/net/hsr/hsr_device.c +++ b/net/hsr/hsr_device.c @@ -359,8 +359,11 @@ static void hsr_dev_destroy(struct net_device *hsr_dev) struct hsr_port *port; hsr = netdev_priv(hsr_dev); + + rtnl_lock(); hsr_for_each_port(hsr, port) hsr_del_port(port); + rtnl_unlock(); del_timer_sync(&hsr->prune_timer); del_timer_sync(&hsr->announce_timer); diff --git a/net/hsr/hsr_main.c b/net/hsr/hsr_main.c index 779d28b65417..cd37d0011b42 100644 --- a/net/hsr/hsr_main.c +++ b/net/hsr/hsr_main.c @@ -36,6 +36,10 @@ static int hsr_netdev_notify(struct notifier_block *nb, unsigned long event, return NOTIFY_DONE; /* Not an HSR device */ hsr = netdev_priv(dev); port = hsr_port_get_hsr(hsr, HSR_PT_MASTER); + if (port == NULL) { + /* Resend of notification concerning removed device? */ + return NOTIFY_DONE; + } } else { hsr = port->hsr; } diff --git a/net/hsr/hsr_slave.c b/net/hsr/hsr_slave.c index a348dcbcd683..7d37366cc695 100644 --- a/net/hsr/hsr_slave.c +++ b/net/hsr/hsr_slave.c @@ -181,8 +181,10 @@ void hsr_del_port(struct hsr_port *port) list_del_rcu(&port->port_list); if (port != master) { - netdev_update_features(master->dev); - dev_set_mtu(master->dev, hsr_get_max_mtu(hsr)); + if (master != NULL) { + netdev_update_features(master->dev); + dev_set_mtu(master->dev, hsr_get_max_mtu(hsr)); + } netdev_rx_handler_unregister(port->dev); dev_set_promiscuity(port->dev, -1); } @@ -192,5 +194,7 @@ void hsr_del_port(struct hsr_port *port) */ synchronize_rcu(); - dev_put(port->dev); + + if (port != master) + dev_put(port->dev); } diff --git a/net/ipv4/fib_frontend.c b/net/ipv4/fib_frontend.c index 57be71dd6a9e..23b9b3e86f4c 100644 --- a/net/ipv4/fib_frontend.c +++ b/net/ipv4/fib_frontend.c @@ -1111,11 +1111,10 @@ static void ip_fib_net_exit(struct net *net) { unsigned int i; + rtnl_lock(); #ifdef CONFIG_IP_MULTIPLE_TABLES fib4_rules_exit(net); #endif - - rtnl_lock(); for (i = 0; i < FIB_TABLE_HASHSZ; i++) { struct fib_table *tb; struct hlist_head *head; diff --git a/net/ipv4/inet_connection_sock.c b/net/ipv4/inet_connection_sock.c index 14d02ea905b6..3e44b9b0b78e 100644 --- a/net/ipv4/inet_connection_sock.c +++ b/net/ipv4/inet_connection_sock.c @@ -268,6 +268,7 @@ static int inet_csk_wait_for_connect(struct sock *sk, long timeo) release_sock(sk); if (reqsk_queue_empty(&icsk->icsk_accept_queue)) timeo = schedule_timeout(timeo); + sched_annotate_sleep(); lock_sock(sk); err = 0; if (!reqsk_queue_empty(&icsk->icsk_accept_queue)) diff --git a/net/ipv4/inet_diag.c b/net/ipv4/inet_diag.c index 81751f12645f..592aff37366b 100644 --- a/net/ipv4/inet_diag.c +++ b/net/ipv4/inet_diag.c @@ -71,6 +71,20 @@ static inline void inet_diag_unlock_handler( mutex_unlock(&inet_diag_table_mutex); } +static size_t inet_sk_attr_size(void) +{ + return nla_total_size(sizeof(struct tcp_info)) + + nla_total_size(1) /* INET_DIAG_SHUTDOWN */ + + nla_total_size(1) /* INET_DIAG_TOS */ + + nla_total_size(1) /* INET_DIAG_TCLASS */ + + nla_total_size(sizeof(struct inet_diag_meminfo)) + + nla_total_size(sizeof(struct inet_diag_msg)) + + nla_total_size(SK_MEMINFO_VARS * sizeof(u32)) + + nla_total_size(TCP_CA_NAME_MAX) + + nla_total_size(sizeof(struct tcpvegas_info)) + + 64; +} + int inet_sk_diag_fill(struct sock *sk, struct inet_connection_sock *icsk, struct sk_buff *skb, struct inet_diag_req_v2 *req, struct user_namespace *user_ns, @@ -326,9 +340,7 @@ int inet_diag_dump_one_icsk(struct inet_hashinfo *hashinfo, struct sk_buff *in_s if (err) goto out; - rep = nlmsg_new(sizeof(struct inet_diag_msg) + - sizeof(struct inet_diag_meminfo) + - sizeof(struct tcp_info) + 64, GFP_KERNEL); + rep = nlmsg_new(inet_sk_attr_size(), GFP_KERNEL); if (!rep) { err = -ENOMEM; goto out; diff --git a/net/ipv4/ip_forward.c b/net/ipv4/ip_forward.c index 787b3c294ce6..d9bc28ac5d1b 100644 --- a/net/ipv4/ip_forward.c +++ b/net/ipv4/ip_forward.c @@ -67,6 +67,7 @@ static int ip_forward_finish(struct sk_buff *skb) if (unlikely(opt->optlen)) ip_forward_options(skb); + skb_sender_cpu_clear(skb); return dst_output(skb); } diff --git a/net/ipv4/ip_fragment.c b/net/ipv4/ip_fragment.c index e5b6d0ddcb58..145a50c4d566 100644 --- a/net/ipv4/ip_fragment.c +++ b/net/ipv4/ip_fragment.c @@ -659,27 +659,30 @@ EXPORT_SYMBOL(ip_defrag); struct sk_buff *ip_check_defrag(struct sk_buff *skb, u32 user) { struct iphdr iph; + int netoff; u32 len; if (skb->protocol != htons(ETH_P_IP)) return skb; - if (!skb_copy_bits(skb, 0, &iph, sizeof(iph))) + netoff = skb_network_offset(skb); + + if (skb_copy_bits(skb, netoff, &iph, sizeof(iph)) < 0) return skb; if (iph.ihl < 5 || iph.version != 4) return skb; len = ntohs(iph.tot_len); - if (skb->len < len || len < (iph.ihl * 4)) + if (skb->len < netoff + len || len < (iph.ihl * 4)) return skb; if (ip_is_fragment(&iph)) { skb = skb_share_check(skb, GFP_ATOMIC); if (skb) { - if (!pskb_may_pull(skb, iph.ihl*4)) + if (!pskb_may_pull(skb, netoff + iph.ihl * 4)) return skb; - if (pskb_trim_rcsum(skb, len)) + if (pskb_trim_rcsum(skb, netoff + len)) return skb; memset(IPCB(skb), 0, sizeof(struct inet_skb_parm)); if (ip_defrag(skb, user)) diff --git a/net/ipv4/ip_output.c b/net/ipv4/ip_output.c index d68199d9b2b0..a7aea2048a0d 100644 --- a/net/ipv4/ip_output.c +++ b/net/ipv4/ip_output.c @@ -888,7 +888,8 @@ static int __ip_append_data(struct sock *sk, cork->length += length; if (((length > mtu) || (skb && skb_is_gso(skb))) && (sk->sk_protocol == IPPROTO_UDP) && - (rt->dst.dev->features & NETIF_F_UFO) && !rt->dst.header_len) { + (rt->dst.dev->features & NETIF_F_UFO) && !rt->dst.header_len && + (sk->sk_type == SOCK_DGRAM)) { err = ip_ufo_append_data(sk, queue, getfrag, from, length, hh_len, fragheaderlen, transhdrlen, maxfraglen, flags); diff --git a/net/ipv4/ip_sockglue.c b/net/ipv4/ip_sockglue.c index 31d8c71986b4..5cd99271d3a6 100644 --- a/net/ipv4/ip_sockglue.c +++ b/net/ipv4/ip_sockglue.c @@ -432,17 +432,32 @@ void ip_local_error(struct sock *sk, int err, __be32 daddr, __be16 port, u32 inf kfree_skb(skb); } -static bool ipv4_pktinfo_prepare_errqueue(const struct sock *sk, - const struct sk_buff *skb, - int ee_origin) +/* IPv4 supports cmsg on all imcp errors and some timestamps + * + * Timestamp code paths do not initialize the fields expected by cmsg: + * the PKTINFO fields in skb->cb[]. Fill those in here. + */ +static bool ipv4_datagram_support_cmsg(const struct sock *sk, + struct sk_buff *skb, + int ee_origin) { - struct in_pktinfo *info = PKTINFO_SKB_CB(skb); + struct in_pktinfo *info; + + if (ee_origin == SO_EE_ORIGIN_ICMP) + return true; - if ((ee_origin != SO_EE_ORIGIN_TIMESTAMPING) || - (!(sk->sk_tsflags & SOF_TIMESTAMPING_OPT_CMSG)) || + if (ee_origin == SO_EE_ORIGIN_LOCAL) + return false; + + /* Support IP_PKTINFO on tstamp packets if requested, to correlate + * timestamp with egress dev. Not possible for packets without dev + * or without payload (SOF_TIMESTAMPING_OPT_TSONLY). + */ + if ((!(sk->sk_tsflags & SOF_TIMESTAMPING_OPT_CMSG)) || (!skb->dev)) return false; + info = PKTINFO_SKB_CB(skb); info->ipi_spec_dst.s_addr = ip_hdr(skb)->saddr; info->ipi_ifindex = skb->dev->ifindex; return true; @@ -483,7 +498,7 @@ int ip_recv_error(struct sock *sk, struct msghdr *msg, int len, int *addr_len) serr = SKB_EXT_ERR(skb); - if (sin && skb->len) { + if (sin && serr->port) { sin->sin_family = AF_INET; sin->sin_addr.s_addr = *(__be32 *)(skb_network_header(skb) + serr->addr_offset); @@ -496,9 +511,7 @@ int ip_recv_error(struct sock *sk, struct msghdr *msg, int len, int *addr_len) sin = &errhdr.offender; memset(sin, 0, sizeof(*sin)); - if (skb->len && - (serr->ee.ee_origin == SO_EE_ORIGIN_ICMP || - ipv4_pktinfo_prepare_errqueue(sk, skb, serr->ee.ee_origin))) { + if (ipv4_datagram_support_cmsg(sk, skb, serr->ee.ee_origin)) { sin->sin_family = AF_INET; sin->sin_addr.s_addr = ip_hdr(skb)->saddr; if (inet_sk(sk)->cmsg_flags) diff --git a/net/ipv4/ipmr.c b/net/ipv4/ipmr.c index 9d78427652d2..fe54eba6d00d 100644 --- a/net/ipv4/ipmr.c +++ b/net/ipv4/ipmr.c @@ -268,7 +268,7 @@ static int __net_init ipmr_rules_init(struct net *net) return 0; err2: - kfree(mrt); + ipmr_free_table(mrt); err1: fib_rules_unregister(ops); return err; @@ -278,11 +278,13 @@ static void __net_exit ipmr_rules_exit(struct net *net) { struct mr_table *mrt, *next; + rtnl_lock(); list_for_each_entry_safe(mrt, next, &net->ipv4.mr_tables, list) { list_del(&mrt->list); ipmr_free_table(mrt); } fib_rules_unregister(net->ipv4.mr_rules_ops); + rtnl_unlock(); } #else #define ipmr_for_each_table(mrt, net) \ @@ -308,7 +310,10 @@ static int __net_init ipmr_rules_init(struct net *net) static void __net_exit ipmr_rules_exit(struct net *net) { + rtnl_lock(); ipmr_free_table(net->ipv4.mrt); + net->ipv4.mrt = NULL; + rtnl_unlock(); } #endif diff --git a/net/ipv4/netfilter/ip_tables.c b/net/ipv4/netfilter/ip_tables.c index 99e810f84671..cf5e82f39d3b 100644 --- a/net/ipv4/netfilter/ip_tables.c +++ b/net/ipv4/netfilter/ip_tables.c @@ -272,9 +272,9 @@ static void trace_packet(const struct sk_buff *skb, &chainname, &comment, &rulenum) != 0) break; - nf_log_packet(net, AF_INET, hook, skb, in, out, &trace_loginfo, - "TRACE: %s:%s:%s:%u ", - tablename, chainname, comment, rulenum); + nf_log_trace(net, AF_INET, hook, skb, in, out, &trace_loginfo, + "TRACE: %s:%s:%s:%u ", + tablename, chainname, comment, rulenum); } #endif diff --git a/net/ipv4/ping.c b/net/ipv4/ping.c index e9f66e1cda50..208d5439e59b 100644 --- a/net/ipv4/ping.c +++ b/net/ipv4/ping.c @@ -259,6 +259,9 @@ int ping_init_sock(struct sock *sk) kgid_t low, high; int ret = 0; + if (sk->sk_family == AF_INET6) + sk->sk_ipv6only = 1; + inet_get_ping_group_range_net(net, &low, &high); if (gid_lte(low, group) && gid_lte(group, high)) return 0; @@ -305,6 +308,11 @@ static int ping_check_bind_addr(struct sock *sk, struct inet_sock *isk, if (addr_len < sizeof(*addr)) return -EINVAL; + if (addr->sin_family != AF_INET && + !(addr->sin_family == AF_UNSPEC && + addr->sin_addr.s_addr == htonl(INADDR_ANY))) + return -EAFNOSUPPORT; + pr_debug("ping_check_bind_addr(sk=%p,addr=%pI4,port=%d)\n", sk, &addr->sin_addr.s_addr, ntohs(addr->sin_port)); @@ -330,7 +338,7 @@ static int ping_check_bind_addr(struct sock *sk, struct inet_sock *isk, return -EINVAL; if (addr->sin6_family != AF_INET6) - return -EINVAL; + return -EAFNOSUPPORT; pr_debug("ping_check_bind_addr(sk=%p,addr=%pI6c,port=%d)\n", sk, addr->sin6_addr.s6_addr, ntohs(addr->sin6_port)); @@ -716,7 +724,7 @@ static int ping_v4_sendmsg(struct kiocb *iocb, struct sock *sk, struct msghdr *m if (msg->msg_namelen < sizeof(*usin)) return -EINVAL; if (usin->sin_family != AF_INET) - return -EINVAL; + return -EAFNOSUPPORT; daddr = usin->sin_addr.s_addr; /* no remote port */ } else { diff --git a/net/ipv4/tcp.c b/net/ipv4/tcp.c index 9d72a0fcd928..995a2259bcfc 100644 --- a/net/ipv4/tcp.c +++ b/net/ipv4/tcp.c @@ -835,17 +835,13 @@ static unsigned int tcp_xmit_size_goal(struct sock *sk, u32 mss_now, int large_allowed) { struct tcp_sock *tp = tcp_sk(sk); - u32 new_size_goal, size_goal, hlen; + u32 new_size_goal, size_goal; if (!large_allowed || !sk_can_gso(sk)) return mss_now; - /* Maybe we should/could use sk->sk_prot->max_header here ? */ - hlen = inet_csk(sk)->icsk_af_ops->net_header_len + - inet_csk(sk)->icsk_ext_hdr_len + - tp->tcp_header_len; - - new_size_goal = sk->sk_gso_max_size - 1 - hlen; + /* Note : tcp_tso_autosize() will eventually split this later */ + new_size_goal = sk->sk_gso_max_size - 1 - MAX_TCP_HEADER; new_size_goal = tcp_bound_to_half_wnd(tp, new_size_goal); /* We try hard to avoid divides here */ diff --git a/net/ipv4/tcp_cong.c b/net/ipv4/tcp_cong.c index d694088214cd..62856e185a93 100644 --- a/net/ipv4/tcp_cong.c +++ b/net/ipv4/tcp_cong.c @@ -378,6 +378,12 @@ EXPORT_SYMBOL_GPL(tcp_slow_start); */ void tcp_cong_avoid_ai(struct tcp_sock *tp, u32 w, u32 acked) { + /* If credits accumulated at a higher w, apply them gently now. */ + if (tp->snd_cwnd_cnt >= w) { + tp->snd_cwnd_cnt = 0; + tp->snd_cwnd++; + } + tp->snd_cwnd_cnt += acked; if (tp->snd_cwnd_cnt >= w) { u32 delta = tp->snd_cwnd_cnt / w; diff --git a/net/ipv4/tcp_cubic.c b/net/ipv4/tcp_cubic.c index 4b276d1ed980..06d3d665a9fd 100644 --- a/net/ipv4/tcp_cubic.c +++ b/net/ipv4/tcp_cubic.c @@ -306,8 +306,10 @@ tcp_friendliness: } } - if (ca->cnt == 0) /* cannot be zero */ - ca->cnt = 1; + /* The maximum rate of cwnd increase CUBIC allows is 1 packet per + * 2 packets ACKed, meaning cwnd grows at 1.5x per RTT. + */ + ca->cnt = max(ca->cnt, 2U); } static void bictcp_cong_avoid(struct sock *sk, u32 ack, u32 acked) diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c index 8fdd27b17306..f501ac048366 100644 --- a/net/ipv4/tcp_input.c +++ b/net/ipv4/tcp_input.c @@ -3105,10 +3105,11 @@ static int tcp_clean_rtx_queue(struct sock *sk, int prior_fackets, if (!first_ackt.v64) first_ackt = last_ackt; - if (!(sacked & TCPCB_SACKED_ACKED)) + if (!(sacked & TCPCB_SACKED_ACKED)) { reord = min(pkts_acked, reord); - if (!after(scb->end_seq, tp->high_seq)) - flag |= FLAG_ORIG_SACK_ACKED; + if (!after(scb->end_seq, tp->high_seq)) + flag |= FLAG_ORIG_SACK_ACKED; + } } if (sacked & TCPCB_SACKED_ACKED) @@ -4770,7 +4771,7 @@ static bool tcp_should_expand_sndbuf(const struct sock *sk) return false; /* If we filled the congestion window, do not expand. */ - if (tp->packets_out >= tp->snd_cwnd) + if (tcp_packets_in_flight(tp) >= tp->snd_cwnd) return false; return true; diff --git a/net/ipv4/tcp_ipv4.c b/net/ipv4/tcp_ipv4.c index 5a2dfed4783b..f1756ee02207 100644 --- a/net/ipv4/tcp_ipv4.c +++ b/net/ipv4/tcp_ipv4.c @@ -1518,7 +1518,7 @@ void tcp_v4_early_demux(struct sk_buff *skb) skb->sk = sk; skb->destructor = sock_edemux; if (sk->sk_state != TCP_TIME_WAIT) { - struct dst_entry *dst = sk->sk_rx_dst; + struct dst_entry *dst = READ_ONCE(sk->sk_rx_dst); if (dst) dst = dst_check(dst, 0); diff --git a/net/ipv4/tcp_output.c b/net/ipv4/tcp_output.c index a2a796c5536b..1db253e36045 100644 --- a/net/ipv4/tcp_output.c +++ b/net/ipv4/tcp_output.c @@ -2773,15 +2773,11 @@ void tcp_send_fin(struct sock *sk) } else { /* Socket is locked, keep trying until memory is available. */ for (;;) { - skb = alloc_skb_fclone(MAX_TCP_HEADER, - sk->sk_allocation); + skb = sk_stream_alloc_skb(sk, 0, sk->sk_allocation); if (skb) break; yield(); } - - /* Reserve space for headers and prepare control bits. */ - skb_reserve(skb, MAX_TCP_HEADER); /* FIN eats a sequence byte, write_seq advanced by tcp_queue_skb(). */ tcp_init_nondata_skb(skb, tp->write_seq, TCPHDR_ACK | TCPHDR_FIN); diff --git a/net/ipv4/xfrm4_output.c b/net/ipv4/xfrm4_output.c index d5f6bd9a210a..dab73813cb92 100644 --- a/net/ipv4/xfrm4_output.c +++ b/net/ipv4/xfrm4_output.c @@ -63,6 +63,7 @@ int xfrm4_prepare_output(struct xfrm_state *x, struct sk_buff *skb) return err; IPCB(skb)->flags |= IPSKB_XFRM_TUNNEL_SIZE; + skb->protocol = htons(ETH_P_IP); return x->outer_mode->output2(x, skb); } @@ -71,7 +72,6 @@ EXPORT_SYMBOL(xfrm4_prepare_output); int xfrm4_output_finish(struct sk_buff *skb) { memset(IPCB(skb), 0, sizeof(*IPCB(skb))); - skb->protocol = htons(ETH_P_IP); #ifdef CONFIG_NETFILTER IPCB(skb)->flags |= IPSKB_XFRM_TRANSFORMED; diff --git a/net/ipv6/addrconf.c b/net/ipv6/addrconf.c index 98e4a63d72bb..b6030025f411 100644 --- a/net/ipv6/addrconf.c +++ b/net/ipv6/addrconf.c @@ -4903,6 +4903,21 @@ int addrconf_sysctl_forward(struct ctl_table *ctl, int write, return ret; } +static +int addrconf_sysctl_mtu(struct ctl_table *ctl, int write, + void __user *buffer, size_t *lenp, loff_t *ppos) +{ + struct inet6_dev *idev = ctl->extra1; + int min_mtu = IPV6_MIN_MTU; + struct ctl_table lctl; + + lctl = *ctl; + lctl.extra1 = &min_mtu; + lctl.extra2 = idev ? &idev->dev->mtu : NULL; + + return proc_dointvec_minmax(&lctl, write, buffer, lenp, ppos); +} + static void dev_disable_change(struct inet6_dev *idev) { struct netdev_notifier_info info; @@ -5054,7 +5069,7 @@ static struct addrconf_sysctl_table .data = &ipv6_devconf.mtu6, .maxlen = sizeof(int), .mode = 0644, - .proc_handler = proc_dointvec, + .proc_handler = addrconf_sysctl_mtu, }, { .procname = "accept_ra", diff --git a/net/ipv6/datagram.c b/net/ipv6/datagram.c index c215be70cac0..ace8daca5c83 100644 --- a/net/ipv6/datagram.c +++ b/net/ipv6/datagram.c @@ -325,14 +325,34 @@ void ipv6_local_rxpmtu(struct sock *sk, struct flowi6 *fl6, u32 mtu) kfree_skb(skb); } -static void ip6_datagram_prepare_pktinfo_errqueue(struct sk_buff *skb) +/* IPv6 supports cmsg on all origins aside from SO_EE_ORIGIN_LOCAL. + * + * At one point, excluding local errors was a quick test to identify icmp/icmp6 + * errors. This is no longer true, but the test remained, so the v6 stack, + * unlike v4, also honors cmsg requests on all wifi and timestamp errors. + * + * Timestamp code paths do not initialize the fields expected by cmsg: + * the PKTINFO fields in skb->cb[]. Fill those in here. + */ +static bool ip6_datagram_support_cmsg(struct sk_buff *skb, + struct sock_exterr_skb *serr) { - int ifindex = skb->dev ? skb->dev->ifindex : -1; + if (serr->ee.ee_origin == SO_EE_ORIGIN_ICMP || + serr->ee.ee_origin == SO_EE_ORIGIN_ICMP6) + return true; + + if (serr->ee.ee_origin == SO_EE_ORIGIN_LOCAL) + return false; + + if (!skb->dev) + return false; if (skb->protocol == htons(ETH_P_IPV6)) - IP6CB(skb)->iif = ifindex; + IP6CB(skb)->iif = skb->dev->ifindex; else - PKTINFO_SKB_CB(skb)->ipi_ifindex = ifindex; + PKTINFO_SKB_CB(skb)->ipi_ifindex = skb->dev->ifindex; + + return true; } /* @@ -369,7 +389,7 @@ int ipv6_recv_error(struct sock *sk, struct msghdr *msg, int len, int *addr_len) serr = SKB_EXT_ERR(skb); - if (sin && skb->len) { + if (sin && serr->port) { const unsigned char *nh = skb_network_header(skb); sin->sin6_family = AF_INET6; sin->sin6_flowinfo = 0; @@ -394,14 +414,11 @@ int ipv6_recv_error(struct sock *sk, struct msghdr *msg, int len, int *addr_len) memcpy(&errhdr.ee, &serr->ee, sizeof(struct sock_extended_err)); sin = &errhdr.offender; memset(sin, 0, sizeof(*sin)); - if (serr->ee.ee_origin != SO_EE_ORIGIN_LOCAL && skb->len) { + + if (ip6_datagram_support_cmsg(skb, serr)) { sin->sin6_family = AF_INET6; - if (np->rxopt.all) { - if (serr->ee.ee_origin != SO_EE_ORIGIN_ICMP && - serr->ee.ee_origin != SO_EE_ORIGIN_ICMP6) - ip6_datagram_prepare_pktinfo_errqueue(skb); + if (np->rxopt.all) ip6_datagram_recv_common_ctl(sk, msg, skb); - } if (skb->protocol == htons(ETH_P_IPV6)) { sin->sin6_addr = ipv6_hdr(skb)->saddr; if (np->rxopt.all) diff --git a/net/ipv6/fib6_rules.c b/net/ipv6/fib6_rules.c index b4d5e1d97c1b..70bc6abc0639 100644 --- a/net/ipv6/fib6_rules.c +++ b/net/ipv6/fib6_rules.c @@ -104,6 +104,7 @@ static int fib6_rule_action(struct fib_rule *rule, struct flowi *flp, goto again; flp6->saddr = saddr; } + err = rt->dst.error; goto out; } again: @@ -321,7 +322,9 @@ out_fib6_rules_ops: static void __net_exit fib6_rules_net_exit(struct net *net) { + rtnl_lock(); fib_rules_unregister(net->ipv6.fib6_rules_ops); + rtnl_unlock(); } static struct pernet_operations fib6_rules_net_ops = { diff --git a/net/ipv6/ip6_output.c b/net/ipv6/ip6_output.c index 7deebf102cba..36cf0ab685a0 100644 --- a/net/ipv6/ip6_output.c +++ b/net/ipv6/ip6_output.c @@ -318,6 +318,7 @@ static int ip6_forward_proxy_check(struct sk_buff *skb) static inline int ip6_forward_finish(struct sk_buff *skb) { + skb_sender_cpu_clear(skb); return dst_output(skb); } @@ -541,7 +542,8 @@ int ip6_fragment(struct sk_buff *skb, int (*output)(struct sk_buff *)) { struct sk_buff *frag; struct rt6_info *rt = (struct rt6_info *)skb_dst(skb); - struct ipv6_pinfo *np = skb->sk ? inet6_sk(skb->sk) : NULL; + struct ipv6_pinfo *np = skb->sk && !dev_recursion_level() ? + inet6_sk(skb->sk) : NULL; struct ipv6hdr *tmp_hdr; struct frag_hdr *fh; unsigned int mtu, hlen, left, len; @@ -1298,7 +1300,8 @@ emsgsize: if (((length > mtu) || (skb && skb_is_gso(skb))) && (sk->sk_protocol == IPPROTO_UDP) && - (rt->dst.dev->features & NETIF_F_UFO)) { + (rt->dst.dev->features & NETIF_F_UFO) && + (sk->sk_type == SOCK_DGRAM)) { err = ip6_ufo_append_data(sk, queue, getfrag, from, length, hh_len, fragheaderlen, transhdrlen, mtu, flags, rt); diff --git a/net/ipv6/ip6_tunnel.c b/net/ipv6/ip6_tunnel.c index 266a264ec212..ddd94eca19b3 100644 --- a/net/ipv6/ip6_tunnel.c +++ b/net/ipv6/ip6_tunnel.c @@ -314,7 +314,7 @@ out: * Create tunnel matching given parameters. * * Return: - * created tunnel or NULL + * created tunnel or error pointer **/ static struct ip6_tnl *ip6_tnl_create(struct net *net, struct __ip6_tnl_parm *p) @@ -322,7 +322,7 @@ static struct ip6_tnl *ip6_tnl_create(struct net *net, struct __ip6_tnl_parm *p) struct net_device *dev; struct ip6_tnl *t; char name[IFNAMSIZ]; - int err; + int err = -ENOMEM; if (p->name[0]) strlcpy(name, p->name, IFNAMSIZ); @@ -348,7 +348,7 @@ static struct ip6_tnl *ip6_tnl_create(struct net *net, struct __ip6_tnl_parm *p) failed_free: ip6_dev_free(dev); failed: - return NULL; + return ERR_PTR(err); } /** @@ -362,7 +362,7 @@ failed: * tunnel device is created and registered for use. * * Return: - * matching tunnel or NULL + * matching tunnel or error pointer **/ static struct ip6_tnl *ip6_tnl_locate(struct net *net, @@ -380,13 +380,13 @@ static struct ip6_tnl *ip6_tnl_locate(struct net *net, if (ipv6_addr_equal(local, &t->parms.laddr) && ipv6_addr_equal(remote, &t->parms.raddr)) { if (create) - return NULL; + return ERR_PTR(-EEXIST); return t; } } if (!create) - return NULL; + return ERR_PTR(-ENODEV); return ip6_tnl_create(net, p); } @@ -1420,7 +1420,7 @@ ip6_tnl_ioctl(struct net_device *dev, struct ifreq *ifr, int cmd) } ip6_tnl_parm_from_user(&p1, &p); t = ip6_tnl_locate(net, &p1, 0); - if (t == NULL) + if (IS_ERR(t)) t = netdev_priv(dev); } else { memset(&p, 0, sizeof(p)); @@ -1445,7 +1445,7 @@ ip6_tnl_ioctl(struct net_device *dev, struct ifreq *ifr, int cmd) ip6_tnl_parm_from_user(&p1, &p); t = ip6_tnl_locate(net, &p1, cmd == SIOCADDTUNNEL); if (cmd == SIOCCHGTUNNEL) { - if (t != NULL) { + if (!IS_ERR(t)) { if (t->dev != dev) { err = -EEXIST; break; @@ -1457,14 +1457,15 @@ ip6_tnl_ioctl(struct net_device *dev, struct ifreq *ifr, int cmd) else err = ip6_tnl_update(t, &p1); } - if (t) { + if (!IS_ERR(t)) { err = 0; ip6_tnl_parm_to_user(&p, &t->parms); if (copy_to_user(ifr->ifr_ifru.ifru_data, &p, sizeof(p))) err = -EFAULT; - } else - err = (cmd == SIOCADDTUNNEL ? -ENOBUFS : -ENOENT); + } else { + err = PTR_ERR(t); + } break; case SIOCDELTUNNEL: err = -EPERM; @@ -1478,7 +1479,7 @@ ip6_tnl_ioctl(struct net_device *dev, struct ifreq *ifr, int cmd) err = -ENOENT; ip6_tnl_parm_from_user(&p1, &p); t = ip6_tnl_locate(net, &p1, 0); - if (t == NULL) + if (IS_ERR(t)) break; err = -EPERM; if (t->dev == ip6n->fb_tnl_dev) @@ -1672,12 +1673,13 @@ static int ip6_tnl_newlink(struct net *src_net, struct net_device *dev, struct nlattr *tb[], struct nlattr *data[]) { struct net *net = dev_net(dev); - struct ip6_tnl *nt; + struct ip6_tnl *nt, *t; nt = netdev_priv(dev); ip6_tnl_netlink_parms(data, &nt->parms); - if (ip6_tnl_locate(net, &nt->parms, 0)) + t = ip6_tnl_locate(net, &nt->parms, 0); + if (!IS_ERR(t)) return -EEXIST; return ip6_tnl_create2(dev); @@ -1697,8 +1699,7 @@ static int ip6_tnl_changelink(struct net_device *dev, struct nlattr *tb[], ip6_tnl_netlink_parms(data, &p); t = ip6_tnl_locate(net, &p, 0); - - if (t) { + if (!IS_ERR(t)) { if (t->dev != dev) return -EEXIST; } else diff --git a/net/ipv6/ip6mr.c b/net/ipv6/ip6mr.c index 34b682617f50..312e0ff47339 100644 --- a/net/ipv6/ip6mr.c +++ b/net/ipv6/ip6mr.c @@ -252,7 +252,7 @@ static int __net_init ip6mr_rules_init(struct net *net) return 0; err2: - kfree(mrt); + ip6mr_free_table(mrt); err1: fib_rules_unregister(ops); return err; @@ -267,8 +267,8 @@ static void __net_exit ip6mr_rules_exit(struct net *net) list_del(&mrt->list); ip6mr_free_table(mrt); } - rtnl_unlock(); fib_rules_unregister(net->ipv6.mr6_rules_ops); + rtnl_unlock(); } #else #define ip6mr_for_each_table(mrt, net) \ @@ -336,7 +336,7 @@ static struct mr6_table *ip6mr_new_table(struct net *net, u32 id) static void ip6mr_free_table(struct mr6_table *mrt) { - del_timer(&mrt->ipmr_expire_timer); + del_timer_sync(&mrt->ipmr_expire_timer); mroute_clean_tables(mrt); kfree(mrt); } diff --git a/net/ipv6/ndisc.c b/net/ipv6/ndisc.c index 471ed24aabae..14ecdaf06bf7 100644 --- a/net/ipv6/ndisc.c +++ b/net/ipv6/ndisc.c @@ -1218,7 +1218,14 @@ static void ndisc_router_discovery(struct sk_buff *skb) if (rt) rt6_set_expires(rt, jiffies + (HZ * lifetime)); if (ra_msg->icmph.icmp6_hop_limit) { - in6_dev->cnf.hop_limit = ra_msg->icmph.icmp6_hop_limit; + /* Only set hop_limit on the interface if it is higher than + * the current hop_limit. + */ + if (in6_dev->cnf.hop_limit < ra_msg->icmph.icmp6_hop_limit) { + in6_dev->cnf.hop_limit = ra_msg->icmph.icmp6_hop_limit; + } else { + ND_PRINTK(2, warn, "RA: Got route advertisement with lower hop_limit than current\n"); + } if (rt) dst_metric_set(&rt->dst, RTAX_HOPLIMIT, ra_msg->icmph.icmp6_hop_limit); diff --git a/net/ipv6/netfilter/ip6_tables.c b/net/ipv6/netfilter/ip6_tables.c index e080fbbbc0e5..bb00c6f2a885 100644 --- a/net/ipv6/netfilter/ip6_tables.c +++ b/net/ipv6/netfilter/ip6_tables.c @@ -298,9 +298,9 @@ static void trace_packet(const struct sk_buff *skb, &chainname, &comment, &rulenum) != 0) break; - nf_log_packet(net, AF_INET6, hook, skb, in, out, &trace_loginfo, - "TRACE: %s:%s:%s:%u ", - tablename, chainname, comment, rulenum); + nf_log_trace(net, AF_INET6, hook, skb, in, out, &trace_loginfo, + "TRACE: %s:%s:%s:%u ", + tablename, chainname, comment, rulenum); } #endif diff --git a/net/ipv6/ping.c b/net/ipv6/ping.c index bd46f736f61d..a2dfff6ff227 100644 --- a/net/ipv6/ping.c +++ b/net/ipv6/ping.c @@ -102,9 +102,10 @@ int ping_v6_sendmsg(struct kiocb *iocb, struct sock *sk, struct msghdr *msg, if (msg->msg_name) { DECLARE_SOCKADDR(struct sockaddr_in6 *, u, msg->msg_name); - if (msg->msg_namelen < sizeof(struct sockaddr_in6) || - u->sin6_family != AF_INET6) { + if (msg->msg_namelen < sizeof(*u)) return -EINVAL; + if (u->sin6_family != AF_INET6) { + return -EAFNOSUPPORT; } if (sk->sk_bound_dev_if && sk->sk_bound_dev_if != u->sin6_scope_id) { diff --git a/net/ipv6/tcp_ipv6.c b/net/ipv6/tcp_ipv6.c index 5d46832c6f72..1f5e62229aaa 100644 --- a/net/ipv6/tcp_ipv6.c +++ b/net/ipv6/tcp_ipv6.c @@ -1411,6 +1411,15 @@ static void tcp_v6_fill_cb(struct sk_buff *skb, const struct ipv6hdr *hdr, TCP_SKB_CB(skb)->sacked = 0; } +static void tcp_v6_restore_cb(struct sk_buff *skb) +{ + /* We need to move header back to the beginning if xfrm6_policy_check() + * and tcp_v6_fill_cb() are going to be called again. + */ + memmove(IP6CB(skb), &TCP_SKB_CB(skb)->header.h6, + sizeof(struct inet6_skb_parm)); +} + static int tcp_v6_rcv(struct sk_buff *skb) { const struct tcphdr *th; @@ -1543,6 +1552,7 @@ do_time_wait: inet_twsk_deschedule(tw, &tcp_death_row); inet_twsk_put(tw); sk = sk2; + tcp_v6_restore_cb(skb); goto process; } /* Fall through to ACK */ @@ -1551,6 +1561,7 @@ do_time_wait: tcp_v6_timewait_ack(sk, skb); break; case TCP_TW_RST: + tcp_v6_restore_cb(skb); goto no_tcp_socket; case TCP_TW_SUCCESS: ; @@ -1585,7 +1596,7 @@ static void tcp_v6_early_demux(struct sk_buff *skb) skb->sk = sk; skb->destructor = sock_edemux; if (sk->sk_state != TCP_TIME_WAIT) { - struct dst_entry *dst = sk->sk_rx_dst; + struct dst_entry *dst = READ_ONCE(sk->sk_rx_dst); if (dst) dst = dst_check(dst, inet6_sk(sk)->rx_dst_cookie); diff --git a/net/ipv6/udp_offload.c b/net/ipv6/udp_offload.c index ab889bb16b3c..be2c0ba82c85 100644 --- a/net/ipv6/udp_offload.c +++ b/net/ipv6/udp_offload.c @@ -112,11 +112,9 @@ static struct sk_buff *udp6_ufo_fragment(struct sk_buff *skb, fptr = (struct frag_hdr *)(skb_network_header(skb) + unfrag_ip6hlen); fptr->nexthdr = nexthdr; fptr->reserved = 0; - if (skb_shinfo(skb)->ip6_frag_id) - fptr->identification = skb_shinfo(skb)->ip6_frag_id; - else - ipv6_select_ident(fptr, - (struct rt6_info *)skb_dst(skb)); + if (!skb_shinfo(skb)->ip6_frag_id) + ipv6_proxy_select_ident(skb); + fptr->identification = skb_shinfo(skb)->ip6_frag_id; /* Fragment the skb. ipv6 header and the remaining fields of the * fragment header are updated in ipv6_gso_segment() diff --git a/net/ipv6/xfrm6_output.c b/net/ipv6/xfrm6_output.c index ca3f29b98ae5..010f8bd2d577 100644 --- a/net/ipv6/xfrm6_output.c +++ b/net/ipv6/xfrm6_output.c @@ -114,6 +114,7 @@ int xfrm6_prepare_output(struct xfrm_state *x, struct sk_buff *skb) return err; skb->ignore_df = 1; + skb->protocol = htons(ETH_P_IPV6); return x->outer_mode->output2(x, skb); } @@ -122,7 +123,6 @@ EXPORT_SYMBOL(xfrm6_prepare_output); int xfrm6_output_finish(struct sk_buff *skb) { memset(IP6CB(skb), 0, sizeof(*IP6CB(skb))); - skb->protocol = htons(ETH_P_IPV6); #ifdef CONFIG_NETFILTER IP6CB(skb)->flags |= IP6SKB_XFRM_TRANSFORMED; diff --git a/net/ipv6/xfrm6_policy.c b/net/ipv6/xfrm6_policy.c index 48bf5a06847b..8d2d01b4800a 100644 --- a/net/ipv6/xfrm6_policy.c +++ b/net/ipv6/xfrm6_policy.c @@ -200,6 +200,7 @@ _decode_session6(struct sk_buff *skb, struct flowi *fl, int reverse) #if IS_ENABLED(CONFIG_IPV6_MIP6) case IPPROTO_MH: + offset += ipv6_optlen(exthdr); if (!onlyproto && pskb_may_pull(skb, nh + offset + 3 - skb->data)) { struct ip6_mh *mh; diff --git a/net/irda/ircomm/ircomm_tty.c b/net/irda/ircomm/ircomm_tty.c index 40695b9751c1..683346d2d633 100644 --- a/net/irda/ircomm/ircomm_tty.c +++ b/net/irda/ircomm/ircomm_tty.c @@ -798,7 +798,9 @@ static void ircomm_tty_wait_until_sent(struct tty_struct *tty, int timeout) orig_jiffies = jiffies; /* Set poll time to 200 ms */ - poll_time = IRDA_MIN(timeout, msecs_to_jiffies(200)); + poll_time = msecs_to_jiffies(200); + if (timeout) + poll_time = min_t(unsigned long, timeout, poll_time); spin_lock_irqsave(&self->spinlock, flags); while (self->tx_skb && self->tx_skb->len) { @@ -811,7 +813,7 @@ static void ircomm_tty_wait_until_sent(struct tty_struct *tty, int timeout) break; } spin_unlock_irqrestore(&self->spinlock, flags); - current->state = TASK_RUNNING; + __set_current_state(TASK_RUNNING); } /* diff --git a/net/irda/irnet/irnet_ppp.c b/net/irda/irnet/irnet_ppp.c index 3c83a1e5ab03..1215693fdd22 100644 --- a/net/irda/irnet/irnet_ppp.c +++ b/net/irda/irnet/irnet_ppp.c @@ -305,7 +305,7 @@ irnet_ctrl_read(irnet_socket * ap, /* Put ourselves on the wait queue to be woken up */ add_wait_queue(&irnet_events.rwait, &wait); - current->state = TASK_INTERRUPTIBLE; + set_current_state(TASK_INTERRUPTIBLE); for(;;) { /* If there is unread events */ @@ -321,7 +321,7 @@ irnet_ctrl_read(irnet_socket * ap, /* Yield and wait to be woken up */ schedule(); } - current->state = TASK_RUNNING; + __set_current_state(TASK_RUNNING); remove_wait_queue(&irnet_events.rwait, &wait); /* Did we got it ? */ diff --git a/net/iucv/af_iucv.c b/net/iucv/af_iucv.c index 2e9953b2db84..53d931172088 100644 --- a/net/iucv/af_iucv.c +++ b/net/iucv/af_iucv.c @@ -1114,10 +1114,8 @@ static int iucv_sock_sendmsg(struct kiocb *iocb, struct socket *sock, noblock, &err); else skb = sock_alloc_send_skb(sk, len, noblock, &err); - if (!skb) { - err = -ENOMEM; + if (!skb) goto out; - } if (iucv->transport == AF_IUCV_TRANS_HIPER) skb_reserve(skb, sizeof(struct af_iucv_trans_hdr) + ETH_HLEN); if (memcpy_from_msg(skb_put(skb, len), msg, len)) { diff --git a/net/l2tp/l2tp_core.c b/net/l2tp/l2tp_core.c index 895348e44c7d..a29a504492af 100644 --- a/net/l2tp/l2tp_core.c +++ b/net/l2tp/l2tp_core.c @@ -1871,6 +1871,7 @@ static int __init l2tp_init(void) l2tp_wq = alloc_workqueue("l2tp", WQ_UNBOUND, 0); if (!l2tp_wq) { pr_err("alloc_workqueue failed\n"); + unregister_pernet_device(&l2tp_net_ops); rc = -ENOMEM; goto out; } diff --git a/net/mac80211/agg-rx.c b/net/mac80211/agg-rx.c index a48bad468880..7702978a4c99 100644 --- a/net/mac80211/agg-rx.c +++ b/net/mac80211/agg-rx.c @@ -49,8 +49,6 @@ static void ieee80211_free_tid_rx(struct rcu_head *h) container_of(h, struct tid_ampdu_rx, rcu_head); int i; - del_timer_sync(&tid_rx->reorder_timer); - for (i = 0; i < tid_rx->buf_size; i++) __skb_queue_purge(&tid_rx->reorder_buf[i]); kfree(tid_rx->reorder_buf); @@ -93,6 +91,12 @@ void ___ieee80211_stop_rx_ba_session(struct sta_info *sta, u16 tid, del_timer_sync(&tid_rx->session_timer); + /* make sure ieee80211_sta_reorder_release() doesn't re-arm the timer */ + spin_lock_bh(&tid_rx->reorder_lock); + tid_rx->removed = true; + spin_unlock_bh(&tid_rx->reorder_lock); + del_timer_sync(&tid_rx->reorder_timer); + call_rcu(&tid_rx->rcu_head, ieee80211_free_tid_rx); } diff --git a/net/mac80211/chan.c b/net/mac80211/chan.c index ff0d2db09df9..5bcd4e5589d3 100644 --- a/net/mac80211/chan.c +++ b/net/mac80211/chan.c @@ -1508,6 +1508,8 @@ static void __ieee80211_vif_release_channel(struct ieee80211_sub_if_data *sdata) if (ieee80211_chanctx_refcount(local, ctx) == 0) ieee80211_free_chanctx(local, ctx); + sdata->radar_required = false; + /* Unreserving may ready an in-place reservation. */ if (use_reserved_switch) ieee80211_vif_use_reserved_switch(local); @@ -1566,6 +1568,9 @@ int ieee80211_vif_use_channel(struct ieee80211_sub_if_data *sdata, ieee80211_recalc_smps_chanctx(local, ctx); ieee80211_recalc_radar_chanctx(local, ctx); out: + if (ret) + sdata->radar_required = false; + mutex_unlock(&local->chanctx_mtx); return ret; } diff --git a/net/mac80211/ieee80211_i.h b/net/mac80211/ieee80211_i.h index 3afe36824703..8d53d65bd2ab 100644 --- a/net/mac80211/ieee80211_i.h +++ b/net/mac80211/ieee80211_i.h @@ -58,13 +58,24 @@ struct ieee80211_local; #define IEEE80211_UNSET_POWER_LEVEL INT_MIN /* - * Some APs experience problems when working with U-APSD. Decrease the - * probability of that happening by using legacy mode for all ACs but VO. - * The AP that caused us trouble was a Cisco 4410N. It ignores our - * setting, and always treats non-VO ACs as legacy. + * Some APs experience problems when working with U-APSD. Decreasing the + * probability of that happening by using legacy mode for all ACs but VO isn't + * enough. + * + * Cisco 4410N originally forced us to enable VO by default only because it + * treated non-VO ACs as legacy. + * + * However some APs (notably Netgear R7000) silently reclassify packets to + * different ACs. Since u-APSD ACs require trigger frames for frame retrieval + * clients would never see some frames (e.g. ARP responses) or would fetch them + * accidentally after a long time. + * + * It makes little sense to enable u-APSD queues by default because it needs + * userspace applications to be aware of it to actually take advantage of the + * possible additional powersavings. Implicitly depending on driver autotrigger + * frame support doesn't make much sense. */ -#define IEEE80211_DEFAULT_UAPSD_QUEUES \ - IEEE80211_WMM_IE_STA_QOSINFO_AC_VO +#define IEEE80211_DEFAULT_UAPSD_QUEUES 0 #define IEEE80211_DEFAULT_MAX_SP_LEN \ IEEE80211_WMM_IE_STA_QOSINFO_SP_ALL @@ -453,6 +464,7 @@ struct ieee80211_if_managed { unsigned int flags; bool csa_waiting_bcn; + bool csa_ignored_same_chan; bool beacon_crc_valid; u32 beacon_crc; diff --git a/net/mac80211/mlme.c b/net/mac80211/mlme.c index 10ac6324c1d0..142f66aece18 100644 --- a/net/mac80211/mlme.c +++ b/net/mac80211/mlme.c @@ -1150,6 +1150,17 @@ ieee80211_sta_process_chanswitch(struct ieee80211_sub_if_data *sdata, return; } + if (cfg80211_chandef_identical(&csa_ie.chandef, + &sdata->vif.bss_conf.chandef)) { + if (ifmgd->csa_ignored_same_chan) + return; + sdata_info(sdata, + "AP %pM tries to chanswitch to same channel, ignore\n", + ifmgd->associated->bssid); + ifmgd->csa_ignored_same_chan = true; + return; + } + mutex_lock(&local->mtx); mutex_lock(&local->chanctx_mtx); conf = rcu_dereference_protected(sdata->vif.chanctx_conf, @@ -1210,6 +1221,7 @@ ieee80211_sta_process_chanswitch(struct ieee80211_sub_if_data *sdata, sdata->vif.csa_active = true; sdata->csa_chandef = csa_ie.chandef; sdata->csa_block_tx = csa_ie.mode; + ifmgd->csa_ignored_same_chan = false; if (sdata->csa_block_tx) ieee80211_stop_vif_queues(local, sdata, @@ -2090,6 +2102,7 @@ static void ieee80211_set_disassoc(struct ieee80211_sub_if_data *sdata, sdata->vif.csa_active = false; ifmgd->csa_waiting_bcn = false; + ifmgd->csa_ignored_same_chan = false; if (sdata->csa_block_tx) { ieee80211_wake_vif_queues(local, sdata, IEEE80211_QUEUE_STOP_REASON_CSA); @@ -3204,7 +3217,8 @@ static const u64 care_about_ies = (1ULL << WLAN_EID_CHANNEL_SWITCH) | (1ULL << WLAN_EID_PWR_CONSTRAINT) | (1ULL << WLAN_EID_HT_CAPABILITY) | - (1ULL << WLAN_EID_HT_OPERATION); + (1ULL << WLAN_EID_HT_OPERATION) | + (1ULL << WLAN_EID_EXT_CHANSWITCH_ANN); static void ieee80211_rx_mgmt_beacon(struct ieee80211_sub_if_data *sdata, struct ieee80211_mgmt *mgmt, size_t len, diff --git a/net/mac80211/rc80211_minstrel.c b/net/mac80211/rc80211_minstrel.c index 7c86a002df95..ef6e8a6c4253 100644 --- a/net/mac80211/rc80211_minstrel.c +++ b/net/mac80211/rc80211_minstrel.c @@ -373,7 +373,7 @@ minstrel_get_rate(void *priv, struct ieee80211_sta *sta, rate++; mi->sample_deferred++; } else { - if (!msr->sample_limit != 0) + if (!msr->sample_limit) return; mi->sample_packets++; diff --git a/net/mac80211/rx.c b/net/mac80211/rx.c index 1101563357ea..1eb730bf8752 100644 --- a/net/mac80211/rx.c +++ b/net/mac80211/rx.c @@ -873,9 +873,10 @@ static void ieee80211_sta_reorder_release(struct ieee80211_sub_if_data *sdata, set_release_timer: - mod_timer(&tid_agg_rx->reorder_timer, - tid_agg_rx->reorder_time[j] + 1 + - HT_RX_REORDER_BUF_TIMEOUT); + if (!tid_agg_rx->removed) + mod_timer(&tid_agg_rx->reorder_timer, + tid_agg_rx->reorder_time[j] + 1 + + HT_RX_REORDER_BUF_TIMEOUT); } else { del_timer(&tid_agg_rx->reorder_timer); } @@ -2214,6 +2215,9 @@ ieee80211_rx_h_mesh_fwding(struct ieee80211_rx_data *rx) hdr = (struct ieee80211_hdr *) skb->data; mesh_hdr = (struct ieee80211s_hdr *) (skb->data + hdrlen); + if (ieee80211_drop_unencrypted(rx, hdr->frame_control)) + return RX_DROP_MONITOR; + /* frame is in RMC, don't forward */ if (ieee80211_is_data(hdr->frame_control) && is_multicast_ether_addr(hdr->addr1) && diff --git a/net/mac80211/sta_info.h b/net/mac80211/sta_info.h index 925e68fe64c7..fb0fc1302a58 100644 --- a/net/mac80211/sta_info.h +++ b/net/mac80211/sta_info.h @@ -175,6 +175,7 @@ struct tid_ampdu_tx { * @reorder_lock: serializes access to reorder buffer, see below. * @auto_seq: used for offloaded BA sessions to automatically pick head_seq_and * and ssn. + * @removed: this session is removed (but might have been found due to RCU) * * This structure's lifetime is managed by RCU, assignments to * the array holding it must hold the aggregation mutex. @@ -199,6 +200,7 @@ struct tid_ampdu_rx { u16 timeout; u8 dialog_token; bool auto_seq; + bool removed; }; /** diff --git a/net/mac80211/tx.c b/net/mac80211/tx.c index 88a18ffe2975..07bd8db00af8 100644 --- a/net/mac80211/tx.c +++ b/net/mac80211/tx.c @@ -566,6 +566,7 @@ ieee80211_tx_h_check_control_port_protocol(struct ieee80211_tx_data *tx) if (tx->sdata->control_port_no_encrypt) info->flags |= IEEE80211_TX_INTFL_DONT_ENCRYPT; info->control.flags |= IEEE80211_TX_CTRL_PORT_CTRL_PROTO; + info->flags |= IEEE80211_TX_CTL_USE_MINRATE; } return TX_CONTINUE; diff --git a/net/mac80211/util.c b/net/mac80211/util.c index 8428f4a95479..747bdcf72e92 100644 --- a/net/mac80211/util.c +++ b/net/mac80211/util.c @@ -3178,7 +3178,7 @@ int ieee80211_check_combinations(struct ieee80211_sub_if_data *sdata, wdev_iter = &sdata_iter->wdev; if (sdata_iter == sdata || - rcu_access_pointer(sdata_iter->vif.chanctx_conf) == NULL || + !ieee80211_sdata_running(sdata_iter) || local->hw.wiphy->software_iftypes & BIT(wdev_iter->iftype)) continue; diff --git a/net/netfilter/ipvs/ip_vs_ctl.c b/net/netfilter/ipvs/ip_vs_ctl.c index e55759056361..ed99448671c3 100644 --- a/net/netfilter/ipvs/ip_vs_ctl.c +++ b/net/netfilter/ipvs/ip_vs_ctl.c @@ -3402,7 +3402,7 @@ static int ip_vs_genl_set_cmd(struct sk_buff *skb, struct genl_info *info) if (udest.af == 0) udest.af = svc->af; - if (udest.af != svc->af) { + if (udest.af != svc->af && cmd != IPVS_CMD_DEL_DEST) { /* The synchronization protocol is incompatible * with mixed family services */ diff --git a/net/netfilter/ipvs/ip_vs_sync.c b/net/netfilter/ipvs/ip_vs_sync.c index c47ffd7a0a70..d93ceeb3ef04 100644 --- a/net/netfilter/ipvs/ip_vs_sync.c +++ b/net/netfilter/ipvs/ip_vs_sync.c @@ -896,6 +896,8 @@ static void ip_vs_proc_conn(struct net *net, struct ip_vs_conn_param *param, IP_VS_DBG(2, "BACKUP, add new conn. failed\n"); return; } + if (!(flags & IP_VS_CONN_F_TEMPLATE)) + kfree(param->pe_data); } if (opt) @@ -1169,6 +1171,7 @@ static inline int ip_vs_proc_sync_conn(struct net *net, __u8 *p, __u8 *msg_end) (opt_flags & IPVS_OPT_F_SEQ_DATA ? &opt : NULL) ); #endif + ip_vs_pe_put(param.pe); return 0; /* Error exit */ out: diff --git a/net/netfilter/nf_log.c b/net/netfilter/nf_log.c index 0d8448f19dfe..675d12c69e32 100644 --- a/net/netfilter/nf_log.c +++ b/net/netfilter/nf_log.c @@ -212,6 +212,30 @@ void nf_log_packet(struct net *net, } EXPORT_SYMBOL(nf_log_packet); +void nf_log_trace(struct net *net, + u_int8_t pf, + unsigned int hooknum, + const struct sk_buff *skb, + const struct net_device *in, + const struct net_device *out, + const struct nf_loginfo *loginfo, const char *fmt, ...) +{ + va_list args; + char prefix[NF_LOG_PREFIXLEN]; + const struct nf_logger *logger; + + rcu_read_lock(); + logger = rcu_dereference(net->nf.nf_loggers[pf]); + if (logger) { + va_start(args, fmt); + vsnprintf(prefix, sizeof(prefix), fmt, args); + va_end(args); + logger->logfn(net, pf, hooknum, skb, in, out, loginfo, prefix); + } + rcu_read_unlock(); +} +EXPORT_SYMBOL(nf_log_trace); + #define S_SIZE (1024 - (sizeof(unsigned int) + 1)) struct nf_log_buf { diff --git a/net/netfilter/nf_tables_api.c b/net/netfilter/nf_tables_api.c index 199fd0f27b0e..ac1a9528dbf2 100644 --- a/net/netfilter/nf_tables_api.c +++ b/net/netfilter/nf_tables_api.c @@ -227,7 +227,7 @@ nft_rule_deactivate_next(struct net *net, struct nft_rule *rule) static inline void nft_rule_clear(struct net *net, struct nft_rule *rule) { - rule->genmask = 0; + rule->genmask &= ~(1 << gencursor_next(net)); } static int @@ -1225,7 +1225,10 @@ static int nf_tables_newchain(struct sock *nlsk, struct sk_buff *skb, if (nla[NFTA_CHAIN_POLICY]) { if ((chain != NULL && - !(chain->flags & NFT_BASE_CHAIN)) || + !(chain->flags & NFT_BASE_CHAIN))) + return -EOPNOTSUPP; + + if (chain == NULL && nla[NFTA_CHAIN_HOOK] == NULL) return -EOPNOTSUPP; @@ -1711,9 +1714,12 @@ static int nf_tables_fill_rule_info(struct sk_buff *skb, struct net *net, } nla_nest_end(skb, list); - if (rule->ulen && - nla_put(skb, NFTA_RULE_USERDATA, rule->ulen, nft_userdata(rule))) - goto nla_put_failure; + if (rule->udata) { + struct nft_userdata *udata = nft_userdata(rule); + if (nla_put(skb, NFTA_RULE_USERDATA, udata->len + 1, + udata->data) < 0) + goto nla_put_failure; + } nlmsg_end(skb, nlh); return 0; @@ -1896,11 +1902,12 @@ static int nf_tables_newrule(struct sock *nlsk, struct sk_buff *skb, struct nft_table *table; struct nft_chain *chain; struct nft_rule *rule, *old_rule = NULL; + struct nft_userdata *udata; struct nft_trans *trans = NULL; struct nft_expr *expr; struct nft_ctx ctx; struct nlattr *tmp; - unsigned int size, i, n, ulen = 0; + unsigned int size, i, n, ulen = 0, usize = 0; int err, rem; bool create; u64 handle, pos_handle; @@ -1968,12 +1975,19 @@ static int nf_tables_newrule(struct sock *nlsk, struct sk_buff *skb, n++; } } + /* Check for overflow of dlen field */ + err = -EFBIG; + if (size >= 1 << 12) + goto err1; - if (nla[NFTA_RULE_USERDATA]) + if (nla[NFTA_RULE_USERDATA]) { ulen = nla_len(nla[NFTA_RULE_USERDATA]); + if (ulen > 0) + usize = sizeof(struct nft_userdata) + ulen; + } err = -ENOMEM; - rule = kzalloc(sizeof(*rule) + size + ulen, GFP_KERNEL); + rule = kzalloc(sizeof(*rule) + size + usize, GFP_KERNEL); if (rule == NULL) goto err1; @@ -1981,10 +1995,13 @@ static int nf_tables_newrule(struct sock *nlsk, struct sk_buff *skb, rule->handle = handle; rule->dlen = size; - rule->ulen = ulen; + rule->udata = ulen ? 1 : 0; - if (ulen) - nla_memcpy(nft_userdata(rule), nla[NFTA_RULE_USERDATA], ulen); + if (ulen) { + udata = nft_userdata(rule); + udata->len = ulen - 1; + nla_memcpy(udata->data, nla[NFTA_RULE_USERDATA], ulen); + } expr = nft_expr_first(rule); for (i = 0; i < n; i++) { @@ -2031,12 +2048,6 @@ static int nf_tables_newrule(struct sock *nlsk, struct sk_buff *skb, err3: list_del_rcu(&rule->list); - if (trans) { - list_del_rcu(&nft_trans_rule(trans)->list); - nft_rule_clear(net, nft_trans_rule(trans)); - nft_trans_destroy(trans); - chain->use++; - } err2: nf_tables_rule_destroy(&ctx, rule); err1: @@ -3612,12 +3623,11 @@ static int nf_tables_commit(struct sk_buff *skb) &te->elem, NFT_MSG_DELSETELEM, 0); te->set->ops->get(te->set, &te->elem); - te->set->ops->remove(te->set, &te->elem); nft_data_uninit(&te->elem.key, NFT_DATA_VALUE); - if (te->elem.flags & NFT_SET_MAP) { - nft_data_uninit(&te->elem.data, - te->set->dtype); - } + if (te->set->flags & NFT_SET_MAP && + !(te->elem.flags & NFT_SET_ELEM_INTERVAL_END)) + nft_data_uninit(&te->elem.data, te->set->dtype); + te->set->ops->remove(te->set, &te->elem); nft_trans_destroy(trans); break; } @@ -3658,7 +3668,7 @@ static int nf_tables_abort(struct sk_buff *skb) { struct net *net = sock_net(skb->sk); struct nft_trans *trans, *next; - struct nft_set *set; + struct nft_trans_elem *te; list_for_each_entry_safe(trans, next, &net->nft.commit_list, list) { switch (trans->msg_type) { @@ -3719,9 +3729,13 @@ static int nf_tables_abort(struct sk_buff *skb) break; case NFT_MSG_NEWSETELEM: nft_trans_elem_set(trans)->nelems--; - set = nft_trans_elem_set(trans); - set->ops->get(set, &nft_trans_elem(trans)); - set->ops->remove(set, &nft_trans_elem(trans)); + te = (struct nft_trans_elem *)trans->data; + te->set->ops->get(te->set, &te->elem); + nft_data_uninit(&te->elem.key, NFT_DATA_VALUE); + if (te->set->flags & NFT_SET_MAP && + !(te->elem.flags & NFT_SET_ELEM_INTERVAL_END)) + nft_data_uninit(&te->elem.data, te->set->dtype); + te->set->ops->remove(te->set, &te->elem); nft_trans_destroy(trans); break; case NFT_MSG_DELSETELEM: diff --git a/net/netfilter/nf_tables_core.c b/net/netfilter/nf_tables_core.c index 3b90eb2b2c55..2d298dccb6dd 100644 --- a/net/netfilter/nf_tables_core.c +++ b/net/netfilter/nf_tables_core.c @@ -94,10 +94,10 @@ static void nft_trace_packet(const struct nft_pktinfo *pkt, { struct net *net = dev_net(pkt->in ? pkt->in : pkt->out); - nf_log_packet(net, pkt->xt.family, pkt->ops->hooknum, pkt->skb, pkt->in, - pkt->out, &trace_loginfo, "TRACE: %s:%s:%s:%u ", - chain->table->name, chain->name, comments[type], - rulenum); + nf_log_trace(net, pkt->xt.family, pkt->ops->hooknum, pkt->skb, pkt->in, + pkt->out, &trace_loginfo, "TRACE: %s:%s:%s:%u ", + chain->table->name, chain->name, comments[type], + rulenum); } unsigned int diff --git a/net/netfilter/nfnetlink_cthelper.c b/net/netfilter/nfnetlink_cthelper.c index a5599fc51a6f..54330fb5efaf 100644 --- a/net/netfilter/nfnetlink_cthelper.c +++ b/net/netfilter/nfnetlink_cthelper.c @@ -77,6 +77,9 @@ nfnl_cthelper_parse_tuple(struct nf_conntrack_tuple *tuple, if (!tb[NFCTH_TUPLE_L3PROTONUM] || !tb[NFCTH_TUPLE_L4PROTONUM]) return -EINVAL; + /* Not all fields are initialized so first zero the tuple */ + memset(tuple, 0, sizeof(struct nf_conntrack_tuple)); + tuple->src.l3num = ntohs(nla_get_be16(tb[NFCTH_TUPLE_L3PROTONUM])); tuple->dst.protonum = nla_get_u8(tb[NFCTH_TUPLE_L4PROTONUM]); diff --git a/net/netfilter/nft_compat.c b/net/netfilter/nft_compat.c index c598f74063a1..65f3e2b6be44 100644 --- a/net/netfilter/nft_compat.c +++ b/net/netfilter/nft_compat.c @@ -123,7 +123,7 @@ static void nft_target_set_tgchk_param(struct xt_tgchk_param *par, const struct nft_ctx *ctx, struct xt_target *target, void *info, - union nft_entry *entry, u8 proto, bool inv) + union nft_entry *entry, u16 proto, bool inv) { par->net = ctx->net; par->table = ctx->table->name; @@ -133,11 +133,14 @@ nft_target_set_tgchk_param(struct xt_tgchk_param *par, entry->e4.ip.invflags = inv ? IPT_INV_PROTO : 0; break; case AF_INET6: + if (proto) + entry->e6.ipv6.flags |= IP6T_F_PROTO; + entry->e6.ipv6.proto = proto; entry->e6.ipv6.invflags = inv ? IP6T_INV_PROTO : 0; break; case NFPROTO_BRIDGE: - entry->ebt.ethproto = proto; + entry->ebt.ethproto = (__force __be16)proto; entry->ebt.invflags = inv ? EBT_IPROTO : 0; break; } @@ -171,7 +174,7 @@ static const struct nla_policy nft_rule_compat_policy[NFTA_RULE_COMPAT_MAX + 1] [NFTA_RULE_COMPAT_FLAGS] = { .type = NLA_U32 }, }; -static int nft_parse_compat(const struct nlattr *attr, u8 *proto, bool *inv) +static int nft_parse_compat(const struct nlattr *attr, u16 *proto, bool *inv) { struct nlattr *tb[NFTA_RULE_COMPAT_MAX+1]; u32 flags; @@ -203,7 +206,7 @@ nft_target_init(const struct nft_ctx *ctx, const struct nft_expr *expr, struct xt_target *target = expr->ops->data; struct xt_tgchk_param par; size_t size = XT_ALIGN(nla_len(tb[NFTA_TARGET_INFO])); - u8 proto = 0; + u16 proto = 0; bool inv = false; union nft_entry e = {}; int ret; @@ -334,7 +337,7 @@ static const struct nla_policy nft_match_policy[NFTA_MATCH_MAX + 1] = { static void nft_match_set_mtchk_param(struct xt_mtchk_param *par, const struct nft_ctx *ctx, struct xt_match *match, void *info, - union nft_entry *entry, u8 proto, bool inv) + union nft_entry *entry, u16 proto, bool inv) { par->net = ctx->net; par->table = ctx->table->name; @@ -344,11 +347,14 @@ nft_match_set_mtchk_param(struct xt_mtchk_param *par, const struct nft_ctx *ctx, entry->e4.ip.invflags = inv ? IPT_INV_PROTO : 0; break; case AF_INET6: + if (proto) + entry->e6.ipv6.flags |= IP6T_F_PROTO; + entry->e6.ipv6.proto = proto; entry->e6.ipv6.invflags = inv ? IP6T_INV_PROTO : 0; break; case NFPROTO_BRIDGE: - entry->ebt.ethproto = proto; + entry->ebt.ethproto = (__force __be16)proto; entry->ebt.invflags = inv ? EBT_IPROTO : 0; break; } @@ -385,7 +391,7 @@ nft_match_init(const struct nft_ctx *ctx, const struct nft_expr *expr, struct xt_match *match = expr->ops->data; struct xt_mtchk_param par; size_t size = XT_ALIGN(nla_len(tb[NFTA_MATCH_INFO])); - u8 proto = 0; + u16 proto = 0; bool inv = false; union nft_entry e = {}; int ret; @@ -625,8 +631,12 @@ nft_match_select_ops(const struct nft_ctx *ctx, struct xt_match *match = nft_match->ops.data; if (strcmp(match->name, mt_name) == 0 && - match->revision == rev && match->family == family) + match->revision == rev && match->family == family) { + if (!try_module_get(match->me)) + return ERR_PTR(-ENOENT); + return &nft_match->ops; + } } match = xt_request_find_match(family, mt_name, rev); @@ -695,8 +705,12 @@ nft_target_select_ops(const struct nft_ctx *ctx, struct xt_target *target = nft_target->ops.data; if (strcmp(target->name, tg_name) == 0 && - target->revision == rev && target->family == family) + target->revision == rev && target->family == family) { + if (!try_module_get(target->me)) + return ERR_PTR(-ENOENT); + return &nft_target->ops; + } } target = xt_request_find_target(family, tg_name, rev); diff --git a/net/netfilter/nft_hash.c b/net/netfilter/nft_hash.c index 61e6c407476a..37c15e674884 100644 --- a/net/netfilter/nft_hash.c +++ b/net/netfilter/nft_hash.c @@ -153,6 +153,8 @@ static void nft_hash_walk(const struct nft_ctx *ctx, const struct nft_set *set, iter->err = err; goto out; } + + continue; } if (iter->count < iter->skip) @@ -192,8 +194,6 @@ static int nft_hash_init(const struct nft_set *set, .key_offset = offsetof(struct nft_hash_elem, key), .key_len = set->klen, .hashfn = jhash, - .grow_decision = rht_grow_above_75, - .shrink_decision = rht_shrink_below_30, }; return rhashtable_init(priv, ¶ms); diff --git a/net/netfilter/xt_TPROXY.c b/net/netfilter/xt_TPROXY.c index ef8a926752a9..50e1e5aaf4ce 100644 --- a/net/netfilter/xt_TPROXY.c +++ b/net/netfilter/xt_TPROXY.c @@ -513,8 +513,8 @@ static int tproxy_tg6_check(const struct xt_tgchk_param *par) { const struct ip6t_ip6 *i = par->entryinfo; - if ((i->proto == IPPROTO_TCP || i->proto == IPPROTO_UDP) - && !(i->flags & IP6T_INV_PROTO)) + if ((i->proto == IPPROTO_TCP || i->proto == IPPROTO_UDP) && + !(i->invflags & IP6T_INV_PROTO)) return 0; pr_info("Can be used only in combination with " diff --git a/net/netfilter/xt_recent.c b/net/netfilter/xt_recent.c index 30dbe34915ae..45e1b30e4fb2 100644 --- a/net/netfilter/xt_recent.c +++ b/net/netfilter/xt_recent.c @@ -378,12 +378,11 @@ static int recent_mt_check(const struct xt_mtchk_param *par, mutex_lock(&recent_mutex); t = recent_table_lookup(recent_net, info->name); if (t != NULL) { - if (info->hit_count > t->nstamps_max_mask) { - pr_info("hitcount (%u) is larger than packets to be remembered (%u) for table %s\n", - info->hit_count, t->nstamps_max_mask + 1, - info->name); - ret = -EINVAL; - goto out; + if (nstamp_mask > t->nstamps_max_mask) { + spin_lock_bh(&recent_lock); + recent_table_flush(t); + t->nstamps_max_mask = nstamp_mask; + spin_unlock_bh(&recent_lock); } t->refcnt++; diff --git a/net/netfilter/xt_socket.c b/net/netfilter/xt_socket.c index 1ba67931eb1b..13332dbf291d 100644 --- a/net/netfilter/xt_socket.c +++ b/net/netfilter/xt_socket.c @@ -243,12 +243,13 @@ static int extract_icmp6_fields(const struct sk_buff *skb, unsigned int outside_hdrlen, int *protocol, - struct in6_addr **raddr, - struct in6_addr **laddr, + const struct in6_addr **raddr, + const struct in6_addr **laddr, __be16 *rport, - __be16 *lport) + __be16 *lport, + struct ipv6hdr *ipv6_var) { - struct ipv6hdr *inside_iph, _inside_iph; + const struct ipv6hdr *inside_iph; struct icmp6hdr *icmph, _icmph; __be16 *ports, _ports[2]; u8 inside_nexthdr; @@ -263,12 +264,14 @@ extract_icmp6_fields(const struct sk_buff *skb, if (icmph->icmp6_type & ICMPV6_INFOMSG_MASK) return 1; - inside_iph = skb_header_pointer(skb, outside_hdrlen + sizeof(_icmph), sizeof(_inside_iph), &_inside_iph); + inside_iph = skb_header_pointer(skb, outside_hdrlen + sizeof(_icmph), + sizeof(*ipv6_var), ipv6_var); if (inside_iph == NULL) return 1; inside_nexthdr = inside_iph->nexthdr; - inside_hdrlen = ipv6_skip_exthdr(skb, outside_hdrlen + sizeof(_icmph) + sizeof(_inside_iph), + inside_hdrlen = ipv6_skip_exthdr(skb, outside_hdrlen + sizeof(_icmph) + + sizeof(*ipv6_var), &inside_nexthdr, &inside_fragoff); if (inside_hdrlen < 0) return 1; /* hjm: Packet has no/incomplete transport layer headers. */ @@ -315,10 +318,10 @@ xt_socket_get_sock_v6(struct net *net, const u8 protocol, static bool socket_mt6_v1_v2(const struct sk_buff *skb, struct xt_action_param *par) { - struct ipv6hdr *iph = ipv6_hdr(skb); + struct ipv6hdr ipv6_var, *iph = ipv6_hdr(skb); struct udphdr _hdr, *hp = NULL; struct sock *sk = skb->sk; - struct in6_addr *daddr = NULL, *saddr = NULL; + const struct in6_addr *daddr = NULL, *saddr = NULL; __be16 uninitialized_var(dport), uninitialized_var(sport); int thoff = 0, uninitialized_var(tproto); const struct xt_socket_mtinfo1 *info = (struct xt_socket_mtinfo1 *) par->matchinfo; @@ -342,7 +345,7 @@ socket_mt6_v1_v2(const struct sk_buff *skb, struct xt_action_param *par) } else if (tproto == IPPROTO_ICMPV6) { if (extract_icmp6_fields(skb, thoff, &tproto, &saddr, &daddr, - &sport, &dport)) + &sport, &dport, &ipv6_var)) return false; } else { return false; diff --git a/net/netlink/af_netlink.c b/net/netlink/af_netlink.c index 2702673f0f23..05919bf3f670 100644 --- a/net/netlink/af_netlink.c +++ b/net/netlink/af_netlink.c @@ -3126,8 +3126,6 @@ static int __init netlink_proto_init(void) .key_len = sizeof(u32), /* portid */ .hashfn = jhash, .max_shift = 16, /* 64K */ - .grow_decision = rht_grow_above_75, - .shrink_decision = rht_shrink_below_30, }; if (err != 0) diff --git a/net/openvswitch/datapath.c b/net/openvswitch/datapath.c index ae5e77cdc0ca..5bae7243c577 100644 --- a/net/openvswitch/datapath.c +++ b/net/openvswitch/datapath.c @@ -2194,14 +2194,55 @@ static int __net_init ovs_init_net(struct net *net) return 0; } -static void __net_exit ovs_exit_net(struct net *net) +static void __net_exit list_vports_from_net(struct net *net, struct net *dnet, + struct list_head *head) { - struct datapath *dp, *dp_next; struct ovs_net *ovs_net = net_generic(net, ovs_net_id); + struct datapath *dp; + + list_for_each_entry(dp, &ovs_net->dps, list_node) { + int i; + + for (i = 0; i < DP_VPORT_HASH_BUCKETS; i++) { + struct vport *vport; + + hlist_for_each_entry(vport, &dp->ports[i], dp_hash_node) { + struct netdev_vport *netdev_vport; + + if (vport->ops->type != OVS_VPORT_TYPE_INTERNAL) + continue; + + netdev_vport = netdev_vport_priv(vport); + if (dev_net(netdev_vport->dev) == dnet) + list_add(&vport->detach_list, head); + } + } + } +} + +static void __net_exit ovs_exit_net(struct net *dnet) +{ + struct datapath *dp, *dp_next; + struct ovs_net *ovs_net = net_generic(dnet, ovs_net_id); + struct vport *vport, *vport_next; + struct net *net; + LIST_HEAD(head); ovs_lock(); list_for_each_entry_safe(dp, dp_next, &ovs_net->dps, list_node) __dp_destroy(dp); + + rtnl_lock(); + for_each_net(net) + list_vports_from_net(net, dnet, &head); + rtnl_unlock(); + + /* Detach all vports from given namespace. */ + list_for_each_entry_safe(vport, vport_next, &head, detach_list) { + list_del(&vport->detach_list); + ovs_dp_detach_port(vport); + } + ovs_unlock(); cancel_work_sync(&ovs_net->dp_notify_work); diff --git a/net/openvswitch/flow_netlink.c b/net/openvswitch/flow_netlink.c index 216f20b90aa5..22b18c145c92 100644 --- a/net/openvswitch/flow_netlink.c +++ b/net/openvswitch/flow_netlink.c @@ -2253,14 +2253,20 @@ static int masked_set_action_to_set_action_attr(const struct nlattr *a, struct sk_buff *skb) { const struct nlattr *ovs_key = nla_data(a); + struct nlattr *nla; size_t key_len = nla_len(ovs_key) / 2; /* Revert the conversion we did from a non-masked set action to * masked set action. */ - if (nla_put(skb, OVS_ACTION_ATTR_SET, nla_len(a) - key_len, ovs_key)) + nla = nla_nest_start(skb, OVS_ACTION_ATTR_SET); + if (!nla) return -EMSGSIZE; + if (nla_put(skb, nla_type(ovs_key), key_len, nla_data(ovs_key))) + return -EMSGSIZE; + + nla_nest_end(skb, nla); return 0; } diff --git a/net/openvswitch/vport.c b/net/openvswitch/vport.c index ec2954ffc690..067a3fff1d2c 100644 --- a/net/openvswitch/vport.c +++ b/net/openvswitch/vport.c @@ -274,10 +274,8 @@ void ovs_vport_del(struct vport *vport) ASSERT_OVSL(); hlist_del_rcu(&vport->hash_node); - - vport->ops->destroy(vport); - module_put(vport->ops->owner); + vport->ops->destroy(vport); } /** diff --git a/net/openvswitch/vport.h b/net/openvswitch/vport.h index f8ae295fb001..bc85331a6c60 100644 --- a/net/openvswitch/vport.h +++ b/net/openvswitch/vport.h @@ -103,6 +103,7 @@ struct vport_portids { * @ops: Class structure. * @percpu_stats: Points to per-CPU statistics used and maintained by vport * @err_stats: Points to error statistics used and maintained by vport + * @detach_list: list used for detaching vport in net-exit call. */ struct vport { struct rcu_head rcu; @@ -117,6 +118,7 @@ struct vport { struct pcpu_sw_netstats __percpu *percpu_stats; struct vport_err_stats err_stats; + struct list_head detach_list; }; /** diff --git a/net/packet/af_packet.c b/net/packet/af_packet.c index 9c28cec1a083..f8db7064d81c 100644 --- a/net/packet/af_packet.c +++ b/net/packet/af_packet.c @@ -698,6 +698,10 @@ static void prb_retire_rx_blk_timer_expired(unsigned long data) if (pkc->last_kactive_blk_num == pkc->kactive_blk_num) { if (!frozen) { + if (!BLOCK_NUM_PKTS(pbd)) { + /* An empty block. Just refresh the timer. */ + goto refresh_timer; + } prb_retire_current_block(pkc, po, TP_STATUS_BLK_TMO); if (!prb_dispatch_next_block(pkc, po)) goto refresh_timer; @@ -798,7 +802,11 @@ static void prb_close_block(struct tpacket_kbdq_core *pkc1, h1->ts_last_pkt.ts_sec = last_pkt->tp_sec; h1->ts_last_pkt.ts_nsec = last_pkt->tp_nsec; } else { - /* Ok, we tmo'd - so get the current time */ + /* Ok, we tmo'd - so get the current time. + * + * It shouldn't really happen as we don't close empty + * blocks. See prb_retire_rx_blk_timer_expired(). + */ struct timespec ts; getnstimeofday(&ts); h1->ts_last_pkt.ts_sec = ts.tv_sec; @@ -1349,14 +1357,14 @@ static int packet_rcv_fanout(struct sk_buff *skb, struct net_device *dev, return 0; } + if (fanout_has_flag(f, PACKET_FANOUT_FLAG_DEFRAG)) { + skb = ip_check_defrag(skb, IP_DEFRAG_AF_PACKET); + if (!skb) + return 0; + } switch (f->type) { case PACKET_FANOUT_HASH: default: - if (fanout_has_flag(f, PACKET_FANOUT_FLAG_DEFRAG)) { - skb = ip_check_defrag(skb, IP_DEFRAG_AF_PACKET); - if (!skb) - return 0; - } idx = fanout_demux_hash(f, skb, num); break; case PACKET_FANOUT_LB: @@ -3115,11 +3123,18 @@ static int packet_dev_mc(struct net_device *dev, struct packet_mclist *i, return 0; } -static void packet_dev_mclist(struct net_device *dev, struct packet_mclist *i, int what) +static void packet_dev_mclist_delete(struct net_device *dev, + struct packet_mclist **mlp) { - for ( ; i; i = i->next) { - if (i->ifindex == dev->ifindex) - packet_dev_mc(dev, i, what); + struct packet_mclist *ml; + + while ((ml = *mlp) != NULL) { + if (ml->ifindex == dev->ifindex) { + packet_dev_mc(dev, ml, -1); + *mlp = ml->next; + kfree(ml); + } else + mlp = &ml->next; } } @@ -3196,12 +3211,11 @@ static int packet_mc_drop(struct sock *sk, struct packet_mreq_max *mreq) packet_dev_mc(dev, ml, -1); kfree(ml); } - rtnl_unlock(); - return 0; + break; } } rtnl_unlock(); - return -EADDRNOTAVAIL; + return 0; } static void packet_flush_mclist(struct sock *sk) @@ -3551,7 +3565,7 @@ static int packet_notifier(struct notifier_block *this, switch (msg) { case NETDEV_UNREGISTER: if (po->mclist) - packet_dev_mclist(dev, po->mclist, -1); + packet_dev_mclist_delete(dev, &po->mclist); /* fallthrough */ case NETDEV_DOWN: diff --git a/net/rds/iw_rdma.c b/net/rds/iw_rdma.c index a817705ce2d0..dba8d0864f18 100644 --- a/net/rds/iw_rdma.c +++ b/net/rds/iw_rdma.c @@ -88,7 +88,9 @@ static unsigned int rds_iw_unmap_fastreg_list(struct rds_iw_mr_pool *pool, int *unpinned); static void rds_iw_destroy_fastreg(struct rds_iw_mr_pool *pool, struct rds_iw_mr *ibmr); -static int rds_iw_get_device(struct rds_sock *rs, struct rds_iw_device **rds_iwdev, struct rdma_cm_id **cm_id) +static int rds_iw_get_device(struct sockaddr_in *src, struct sockaddr_in *dst, + struct rds_iw_device **rds_iwdev, + struct rdma_cm_id **cm_id) { struct rds_iw_device *iwdev; struct rds_iw_cm_id *i_cm_id; @@ -112,15 +114,15 @@ static int rds_iw_get_device(struct rds_sock *rs, struct rds_iw_device **rds_iwd src_addr->sin_port, dst_addr->sin_addr.s_addr, dst_addr->sin_port, - rs->rs_bound_addr, - rs->rs_bound_port, - rs->rs_conn_addr, - rs->rs_conn_port); + src->sin_addr.s_addr, + src->sin_port, + dst->sin_addr.s_addr, + dst->sin_port); #ifdef WORKING_TUPLE_DETECTION - if (src_addr->sin_addr.s_addr == rs->rs_bound_addr && - src_addr->sin_port == rs->rs_bound_port && - dst_addr->sin_addr.s_addr == rs->rs_conn_addr && - dst_addr->sin_port == rs->rs_conn_port) { + if (src_addr->sin_addr.s_addr == src->sin_addr.s_addr && + src_addr->sin_port == src->sin_port && + dst_addr->sin_addr.s_addr == dst->sin_addr.s_addr && + dst_addr->sin_port == dst->sin_port) { #else /* FIXME - needs to compare the local and remote * ipaddr/port tuple, but the ipaddr is the only @@ -128,7 +130,7 @@ static int rds_iw_get_device(struct rds_sock *rs, struct rds_iw_device **rds_iwd * zero'ed. It doesn't appear to be properly populated * during connection setup... */ - if (src_addr->sin_addr.s_addr == rs->rs_bound_addr) { + if (src_addr->sin_addr.s_addr == src->sin_addr.s_addr) { #endif spin_unlock_irq(&iwdev->spinlock); *rds_iwdev = iwdev; @@ -180,19 +182,13 @@ int rds_iw_update_cm_id(struct rds_iw_device *rds_iwdev, struct rdma_cm_id *cm_i { struct sockaddr_in *src_addr, *dst_addr; struct rds_iw_device *rds_iwdev_old; - struct rds_sock rs; struct rdma_cm_id *pcm_id; int rc; src_addr = (struct sockaddr_in *)&cm_id->route.addr.src_addr; dst_addr = (struct sockaddr_in *)&cm_id->route.addr.dst_addr; - rs.rs_bound_addr = src_addr->sin_addr.s_addr; - rs.rs_bound_port = src_addr->sin_port; - rs.rs_conn_addr = dst_addr->sin_addr.s_addr; - rs.rs_conn_port = dst_addr->sin_port; - - rc = rds_iw_get_device(&rs, &rds_iwdev_old, &pcm_id); + rc = rds_iw_get_device(src_addr, dst_addr, &rds_iwdev_old, &pcm_id); if (rc) rds_iw_remove_cm_id(rds_iwdev, cm_id); @@ -598,9 +594,17 @@ void *rds_iw_get_mr(struct scatterlist *sg, unsigned long nents, struct rds_iw_device *rds_iwdev; struct rds_iw_mr *ibmr = NULL; struct rdma_cm_id *cm_id; + struct sockaddr_in src = { + .sin_addr.s_addr = rs->rs_bound_addr, + .sin_port = rs->rs_bound_port, + }; + struct sockaddr_in dst = { + .sin_addr.s_addr = rs->rs_conn_addr, + .sin_port = rs->rs_conn_port, + }; int ret; - ret = rds_iw_get_device(rs, &rds_iwdev, &cm_id); + ret = rds_iw_get_device(&src, &dst, &rds_iwdev, &cm_id); if (ret || !cm_id) { ret = -ENODEV; goto out; diff --git a/net/rxrpc/ar-ack.c b/net/rxrpc/ar-ack.c index c6be17a959a6..e0547f521f20 100644 --- a/net/rxrpc/ar-ack.c +++ b/net/rxrpc/ar-ack.c @@ -218,7 +218,8 @@ static void rxrpc_resend(struct rxrpc_call *call) struct rxrpc_header *hdr; struct sk_buff *txb; unsigned long *p_txb, resend_at; - int loop, stop; + bool stop; + int loop; u8 resend; _enter("{%d,%d,%d,%d},", @@ -226,7 +227,7 @@ static void rxrpc_resend(struct rxrpc_call *call) atomic_read(&call->sequence), CIRC_CNT(call->acks_head, call->acks_tail, call->acks_winsz)); - stop = 0; + stop = false; resend = 0; resend_at = 0; @@ -255,11 +256,11 @@ static void rxrpc_resend(struct rxrpc_call *call) _proto("Tx DATA %%%u { #%d }", ntohl(sp->hdr.serial), ntohl(sp->hdr.seq)); if (rxrpc_send_packet(call->conn->trans, txb) < 0) { - stop = 0; + stop = true; sp->resend_at = jiffies + 3; } else { sp->resend_at = - jiffies + rxrpc_resend_timeout * HZ; + jiffies + rxrpc_resend_timeout; } } diff --git a/net/rxrpc/ar-error.c b/net/rxrpc/ar-error.c index 5394b6be46ec..0610efa83d72 100644 --- a/net/rxrpc/ar-error.c +++ b/net/rxrpc/ar-error.c @@ -42,7 +42,8 @@ void rxrpc_UDP_error_report(struct sock *sk) _leave("UDP socket errqueue empty"); return; } - if (!skb->len) { + serr = SKB_EXT_ERR(skb); + if (!skb->len && serr->ee.ee_origin == SO_EE_ORIGIN_TIMESTAMPING) { _leave("UDP empty message"); kfree_skb(skb); return; @@ -50,7 +51,6 @@ void rxrpc_UDP_error_report(struct sock *sk) rxrpc_new_skb(skb); - serr = SKB_EXT_ERR(skb); addr = *(__be32 *)(skb_network_header(skb) + serr->addr_offset); port = serr->port; diff --git a/net/rxrpc/ar-recvmsg.c b/net/rxrpc/ar-recvmsg.c index 4575485ad1b4..19a560626dc4 100644 --- a/net/rxrpc/ar-recvmsg.c +++ b/net/rxrpc/ar-recvmsg.c @@ -87,7 +87,7 @@ int rxrpc_recvmsg(struct kiocb *iocb, struct socket *sock, if (!skb) { /* nothing remains on the queue */ if (copied && - (msg->msg_flags & MSG_PEEK || timeo == 0)) + (flags & MSG_PEEK || timeo == 0)) goto out; /* wait for a message to turn up */ diff --git a/net/sched/act_bpf.c b/net/sched/act_bpf.c index 82c5d7fc1988..5f6288fa3f12 100644 --- a/net/sched/act_bpf.c +++ b/net/sched/act_bpf.c @@ -25,21 +25,41 @@ static int tcf_bpf(struct sk_buff *skb, const struct tc_action *a, struct tcf_result *res) { struct tcf_bpf *b = a->priv; - int action; - int filter_res; + int action, filter_res; spin_lock(&b->tcf_lock); + b->tcf_tm.lastuse = jiffies; bstats_update(&b->tcf_bstats, skb); - action = b->tcf_action; filter_res = BPF_PROG_RUN(b->filter, skb); - if (filter_res == 0) { - /* Return code 0 from the BPF program - * is being interpreted as a drop here. - */ - action = TC_ACT_SHOT; + + /* A BPF program may overwrite the default action opcode. + * Similarly as in cls_bpf, if filter_res == -1 we use the + * default action specified from tc. + * + * In case a different well-known TC_ACT opcode has been + * returned, it will overwrite the default one. + * + * For everything else that is unkown, TC_ACT_UNSPEC is + * returned. + */ + switch (filter_res) { + case TC_ACT_PIPE: + case TC_ACT_RECLASSIFY: + case TC_ACT_OK: + action = filter_res; + break; + case TC_ACT_SHOT: + action = filter_res; b->tcf_qstats.drops++; + break; + case TC_ACT_UNSPEC: + action = b->tcf_action; + break; + default: + action = TC_ACT_UNSPEC; + break; } spin_unlock(&b->tcf_lock); diff --git a/net/sched/cls_u32.c b/net/sched/cls_u32.c index 09487afbfd51..95fdf4e40051 100644 --- a/net/sched/cls_u32.c +++ b/net/sched/cls_u32.c @@ -78,8 +78,11 @@ struct tc_u_hnode { struct tc_u_common *tp_c; int refcnt; unsigned int divisor; - struct tc_u_knode __rcu *ht[1]; struct rcu_head rcu; + /* The 'ht' field MUST be the last field in structure to allow for + * more entries allocated at end of structure. + */ + struct tc_u_knode __rcu *ht[1]; }; struct tc_u_common { diff --git a/net/sched/ematch.c b/net/sched/ematch.c index 6742200b1307..fbb7ebfc58c6 100644 --- a/net/sched/ematch.c +++ b/net/sched/ematch.c @@ -228,6 +228,7 @@ static int tcf_em_validate(struct tcf_proto *tp, * to replay the request. */ module_put(em->ops->owner); + em->ops = NULL; err = -EAGAIN; } #endif diff --git a/net/socket.c b/net/socket.c index bbedbfcb42c2..245330ca0015 100644 --- a/net/socket.c +++ b/net/socket.c @@ -1702,6 +1702,8 @@ SYSCALL_DEFINE6(sendto, int, fd, void __user *, buff, size_t, len, if (len > INT_MAX) len = INT_MAX; + if (unlikely(!access_ok(VERIFY_READ, buff, len))) + return -EFAULT; sock = sockfd_lookup_light(fd, &err, &fput_needed); if (!sock) goto out; @@ -1760,6 +1762,8 @@ SYSCALL_DEFINE6(recvfrom, int, fd, void __user *, ubuf, size_t, size, if (size > INT_MAX) size = INT_MAX; + if (unlikely(!access_ok(VERIFY_WRITE, ubuf, size))) + return -EFAULT; sock = sockfd_lookup_light(fd, &err, &fput_needed); if (!sock) goto out; diff --git a/net/sunrpc/auth_gss/gss_rpc_upcall.c b/net/sunrpc/auth_gss/gss_rpc_upcall.c index abbb7dcd1689..59eeed43eda2 100644 --- a/net/sunrpc/auth_gss/gss_rpc_upcall.c +++ b/net/sunrpc/auth_gss/gss_rpc_upcall.c @@ -217,6 +217,8 @@ static void gssp_free_receive_pages(struct gssx_arg_accept_sec_context *arg) for (i = 0; i < arg->npages && arg->pages[i]; i++) __free_page(arg->pages[i]); + + kfree(arg->pages); } static int gssp_alloc_receive_pages(struct gssx_arg_accept_sec_context *arg) diff --git a/net/sunrpc/auth_gss/svcauth_gss.c b/net/sunrpc/auth_gss/svcauth_gss.c index 224a82f24d3c..1095be9c80ab 100644 --- a/net/sunrpc/auth_gss/svcauth_gss.c +++ b/net/sunrpc/auth_gss/svcauth_gss.c @@ -463,6 +463,8 @@ static int rsc_parse(struct cache_detail *cd, /* number of additional gid's */ if (get_int(&mesg, &N)) goto out; + if (N < 0 || N > NGROUPS_MAX) + goto out; status = -ENOMEM; rsci.cred.cr_group_info = groups_alloc(N); if (rsci.cred.cr_group_info == NULL) diff --git a/net/sunrpc/cache.c b/net/sunrpc/cache.c index 33fb105d4352..5199bb1a017e 100644 --- a/net/sunrpc/cache.c +++ b/net/sunrpc/cache.c @@ -921,7 +921,7 @@ static unsigned int cache_poll(struct file *filp, poll_table *wait, poll_wait(filp, &queue_wait, wait); /* alway allow write */ - mask = POLL_OUT | POLLWRNORM; + mask = POLLOUT | POLLWRNORM; if (!rp) return mask; diff --git a/net/sunrpc/clnt.c b/net/sunrpc/clnt.c index 612aa73bbc60..e6ce1517367f 100644 --- a/net/sunrpc/clnt.c +++ b/net/sunrpc/clnt.c @@ -303,9 +303,7 @@ static int rpc_client_register(struct rpc_clnt *clnt, struct super_block *pipefs_sb; int err; - err = rpc_clnt_debugfs_register(clnt); - if (err) - return err; + rpc_clnt_debugfs_register(clnt); pipefs_sb = rpc_get_sb_net(net); if (pipefs_sb) { diff --git a/net/sunrpc/debugfs.c b/net/sunrpc/debugfs.c index e811f390f9f6..82962f7e6e88 100644 --- a/net/sunrpc/debugfs.c +++ b/net/sunrpc/debugfs.c @@ -129,48 +129,52 @@ static const struct file_operations tasks_fops = { .release = tasks_release, }; -int +void rpc_clnt_debugfs_register(struct rpc_clnt *clnt) { - int len, err; + int len; char name[24]; /* enough for "../../rpc_xprt/ + 8 hex digits + NULL */ + struct rpc_xprt *xprt; /* Already registered? */ - if (clnt->cl_debugfs) - return 0; + if (clnt->cl_debugfs || !rpc_clnt_dir) + return; len = snprintf(name, sizeof(name), "%x", clnt->cl_clid); if (len >= sizeof(name)) - return -EINVAL; + return; /* make the per-client dir */ clnt->cl_debugfs = debugfs_create_dir(name, rpc_clnt_dir); if (!clnt->cl_debugfs) - return -ENOMEM; + return; /* make tasks file */ - err = -ENOMEM; if (!debugfs_create_file("tasks", S_IFREG | S_IRUSR, clnt->cl_debugfs, clnt, &tasks_fops)) goto out_err; - err = -EINVAL; rcu_read_lock(); + xprt = rcu_dereference(clnt->cl_xprt); + /* no "debugfs" dentry? Don't bother with the symlink. */ + if (!xprt->debugfs) { + rcu_read_unlock(); + return; + } len = snprintf(name, sizeof(name), "../../rpc_xprt/%s", - rcu_dereference(clnt->cl_xprt)->debugfs->d_name.name); + xprt->debugfs->d_name.name); rcu_read_unlock(); + if (len >= sizeof(name)) goto out_err; - err = -ENOMEM; if (!debugfs_create_symlink("xprt", clnt->cl_debugfs, name)) goto out_err; - return 0; + return; out_err: debugfs_remove_recursive(clnt->cl_debugfs); clnt->cl_debugfs = NULL; - return err; } void @@ -226,33 +230,33 @@ static const struct file_operations xprt_info_fops = { .release = xprt_info_release, }; -int +void rpc_xprt_debugfs_register(struct rpc_xprt *xprt) { int len, id; static atomic_t cur_id; char name[9]; /* 8 hex digits + NULL term */ + if (!rpc_xprt_dir) + return; + id = (unsigned int)atomic_inc_return(&cur_id); len = snprintf(name, sizeof(name), "%x", id); if (len >= sizeof(name)) - return -EINVAL; + return; /* make the per-client dir */ xprt->debugfs = debugfs_create_dir(name, rpc_xprt_dir); if (!xprt->debugfs) - return -ENOMEM; + return; /* make tasks file */ if (!debugfs_create_file("info", S_IFREG | S_IRUSR, xprt->debugfs, xprt, &xprt_info_fops)) { debugfs_remove_recursive(xprt->debugfs); xprt->debugfs = NULL; - return -ENOMEM; } - - return 0; } void @@ -266,14 +270,17 @@ void __exit sunrpc_debugfs_exit(void) { debugfs_remove_recursive(topdir); + topdir = NULL; + rpc_clnt_dir = NULL; + rpc_xprt_dir = NULL; } -int __init +void __init sunrpc_debugfs_init(void) { topdir = debugfs_create_dir("sunrpc", NULL); if (!topdir) - goto out; + return; rpc_clnt_dir = debugfs_create_dir("rpc_clnt", topdir); if (!rpc_clnt_dir) @@ -283,10 +290,9 @@ sunrpc_debugfs_init(void) if (!rpc_xprt_dir) goto out_remove; - return 0; + return; out_remove: debugfs_remove_recursive(topdir); topdir = NULL; -out: - return -ENOMEM; + rpc_clnt_dir = NULL; } diff --git a/net/sunrpc/sunrpc_syms.c b/net/sunrpc/sunrpc_syms.c index e37fbed87956..ee5d3d253102 100644 --- a/net/sunrpc/sunrpc_syms.c +++ b/net/sunrpc/sunrpc_syms.c @@ -98,10 +98,7 @@ init_sunrpc(void) if (err) goto out4; - err = sunrpc_debugfs_init(); - if (err) - goto out5; - + sunrpc_debugfs_init(); #if IS_ENABLED(CONFIG_SUNRPC_DEBUG) rpc_register_sysctl(); #endif @@ -109,8 +106,6 @@ init_sunrpc(void) init_socket_xprt(); /* clnt sock transport */ return 0; -out5: - unregister_rpc_pipefs(); out4: unregister_pernet_subsys(&sunrpc_net_ops); out3: diff --git a/net/sunrpc/xprt.c b/net/sunrpc/xprt.c index e3015aede0d9..9949722d99ce 100644 --- a/net/sunrpc/xprt.c +++ b/net/sunrpc/xprt.c @@ -1331,7 +1331,6 @@ static void xprt_init(struct rpc_xprt *xprt, struct net *net) */ struct rpc_xprt *xprt_create_transport(struct xprt_create *args) { - int err; struct rpc_xprt *xprt; struct xprt_class *t; @@ -1372,11 +1371,7 @@ found: return ERR_PTR(-ENOMEM); } - err = rpc_xprt_debugfs_register(xprt); - if (err) { - xprt_destroy(xprt); - return ERR_PTR(err); - } + rpc_xprt_debugfs_register(xprt); dprintk("RPC: created transport %p with %u slots\n", xprt, xprt->max_reqs); diff --git a/net/sunrpc/xprtrdma/rpc_rdma.c b/net/sunrpc/xprtrdma/rpc_rdma.c index 7e9acd9361c5..91ffde82fa0c 100644 --- a/net/sunrpc/xprtrdma/rpc_rdma.c +++ b/net/sunrpc/xprtrdma/rpc_rdma.c @@ -738,8 +738,9 @@ rpcrdma_reply_handler(struct rpcrdma_rep *rep) struct rpc_xprt *xprt = rep->rr_xprt; struct rpcrdma_xprt *r_xprt = rpcx_to_rdmax(xprt); __be32 *iptr; - int credits, rdmalen, status; + int rdmalen, status; unsigned long cwnd; + u32 credits; /* Check status. If bad, signal disconnect and return rep to pool */ if (rep->rr_len == ~0U) { diff --git a/net/sunrpc/xprtrdma/xprt_rdma.h b/net/sunrpc/xprtrdma/xprt_rdma.h index d1b70397c60f..0a16fb6f0885 100644 --- a/net/sunrpc/xprtrdma/xprt_rdma.h +++ b/net/sunrpc/xprtrdma/xprt_rdma.h @@ -285,7 +285,7 @@ rpcr_to_rdmar(struct rpc_rqst *rqst) */ struct rpcrdma_buffer { spinlock_t rb_lock; /* protects indexes */ - int rb_max_requests;/* client max requests */ + u32 rb_max_requests;/* client max requests */ struct list_head rb_mws; /* optional memory windows/fmrs/frmrs */ struct list_head rb_all; int rb_send_index; diff --git a/net/tipc/core.c b/net/tipc/core.c index 935205e6bcfe..be1c9fa60b09 100644 --- a/net/tipc/core.c +++ b/net/tipc/core.c @@ -152,11 +152,11 @@ out_netlink: static void __exit tipc_exit(void) { tipc_bearer_cleanup(); + unregister_pernet_subsys(&tipc_net_ops); tipc_netlink_stop(); tipc_netlink_compat_stop(); tipc_socket_stop(); tipc_unregister_sysctl(); - unregister_pernet_subsys(&tipc_net_ops); pr_info("Deactivated\n"); } diff --git a/net/tipc/link.c b/net/tipc/link.c index a4cf364316de..14f09b3cb87c 100644 --- a/net/tipc/link.c +++ b/net/tipc/link.c @@ -464,10 +464,11 @@ void tipc_link_reset(struct tipc_link *l_ptr) /* Clean up all queues, except inputq: */ __skb_queue_purge(&l_ptr->outqueue); __skb_queue_purge(&l_ptr->deferred_queue); - skb_queue_splice_init(&l_ptr->wakeupq, &l_ptr->inputq); - if (!skb_queue_empty(&l_ptr->inputq)) + if (!owner->inputq) + owner->inputq = &l_ptr->inputq; + skb_queue_splice_init(&l_ptr->wakeupq, owner->inputq); + if (!skb_queue_empty(owner->inputq)) owner->action_flags |= TIPC_MSG_EVT; - owner->inputq = &l_ptr->inputq; l_ptr->next_out = NULL; l_ptr->unacked_window = 0; l_ptr->checkpoint = 1; diff --git a/net/tipc/socket.c b/net/tipc/socket.c index f73e975af80b..b4d4467d0bb0 100644 --- a/net/tipc/socket.c +++ b/net/tipc/socket.c @@ -2364,8 +2364,6 @@ int tipc_sk_rht_init(struct net *net) .hashfn = jhash, .max_shift = 20, /* 1M */ .min_shift = 8, /* 256 */ - .grow_decision = rht_grow_above_75, - .shrink_decision = rht_shrink_below_30, }; return rhashtable_init(&tn->sk_rht, &rht_params); diff --git a/net/wireless/core.c b/net/wireless/core.c index 3af0ecf1cc16..2a0bbd22854b 100644 --- a/net/wireless/core.c +++ b/net/wireless/core.c @@ -1199,6 +1199,7 @@ out_fail_wq: regulatory_exit(); out_fail_reg: debugfs_remove(ieee80211_debugfs_dir); + nl80211_exit(); out_fail_nl80211: unregister_netdevice_notifier(&cfg80211_netdev_notifier); out_fail_notifier: diff --git a/net/wireless/nl80211.c b/net/wireless/nl80211.c index d78fd8b54515..b6f84f6a2a09 100644 --- a/net/wireless/nl80211.c +++ b/net/wireless/nl80211.c @@ -2654,10 +2654,6 @@ static int nl80211_new_interface(struct sk_buff *skb, struct genl_info *info) return err; } - msg = nlmsg_new(NLMSG_DEFAULT_SIZE, GFP_KERNEL); - if (!msg) - return -ENOMEM; - err = parse_monitor_flags(type == NL80211_IFTYPE_MONITOR ? info->attrs[NL80211_ATTR_MNTR_FLAGS] : NULL, &flags); @@ -2666,6 +2662,10 @@ static int nl80211_new_interface(struct sk_buff *skb, struct genl_info *info) !(rdev->wiphy.features & NL80211_FEATURE_ACTIVE_MONITOR)) return -EOPNOTSUPP; + msg = nlmsg_new(NLMSG_DEFAULT_SIZE, GFP_KERNEL); + if (!msg) + return -ENOMEM; + wdev = rdev_add_virtual_intf(rdev, nla_data(info->attrs[NL80211_ATTR_IFNAME]), type, err ? NULL : &flags, ¶ms); @@ -4400,6 +4400,16 @@ static int nl80211_new_station(struct sk_buff *skb, struct genl_info *info) if (parse_station_flags(info, dev->ieee80211_ptr->iftype, ¶ms)) return -EINVAL; + /* HT/VHT requires QoS, but if we don't have that just ignore HT/VHT + * as userspace might just pass through the capabilities from the IEs + * directly, rather than enforcing this restriction and returning an + * error in this case. + */ + if (!(params.sta_flags_set & BIT(NL80211_STA_FLAG_WME))) { + params.ht_capa = NULL; + params.vht_capa = NULL; + } + /* When you run into this, adjust the code below for the new flag */ BUILD_BUG_ON(NL80211_STA_FLAG_MAX != 7); @@ -12528,9 +12538,7 @@ static int cfg80211_net_detect_results(struct sk_buff *msg, } for (j = 0; j < match->n_channels; j++) { - if (nla_put_u32(msg, - NL80211_ATTR_WIPHY_FREQ, - match->channels[j])) { + if (nla_put_u32(msg, j, match->channels[j])) { nla_nest_cancel(msg, nl_freqs); nla_nest_cancel(msg, nl_match); goto out; diff --git a/net/wireless/reg.c b/net/wireless/reg.c index b586d0dcb09e..48dfc7b4e981 100644 --- a/net/wireless/reg.c +++ b/net/wireless/reg.c @@ -228,7 +228,7 @@ static DECLARE_DELAYED_WORK(reg_timeout, reg_timeout_work); /* We keep a static world regulatory domain in case of the absence of CRDA */ static const struct ieee80211_regdomain world_regdom = { - .n_reg_rules = 6, + .n_reg_rules = 8, .alpha2 = "00", .reg_rules = { /* IEEE 802.11b/g, channels 1..11 */ diff --git a/net/xfrm/xfrm_policy.c b/net/xfrm/xfrm_policy.c index cee479bc655c..638af0655aaf 100644 --- a/net/xfrm/xfrm_policy.c +++ b/net/xfrm/xfrm_policy.c @@ -2269,11 +2269,9 @@ struct dst_entry *xfrm_lookup(struct net *net, struct dst_entry *dst_orig, * have the xfrm_state's. We need to wait for KM to * negotiate new SA's or bail out with error.*/ if (net->xfrm.sysctl_larval_drop) { - dst_release(dst); - xfrm_pols_put(pols, drop_pols); XFRM_INC_STATS(net, LINUX_MIB_XFRMOUTNOSTATES); - - return ERR_PTR(-EREMOTE); + err = -EREMOTE; + goto error; } err = -EAGAIN; @@ -2324,7 +2322,8 @@ nopol: error: dst_release(dst); dropdst: - dst_release(dst_orig); + if (!(flags & XFRM_LOOKUP_KEEP_DST_REF)) + dst_release(dst_orig); xfrm_pols_put(pols, drop_pols); return ERR_PTR(err); } @@ -2338,7 +2337,8 @@ struct dst_entry *xfrm_lookup_route(struct net *net, struct dst_entry *dst_orig, struct sock *sk, int flags) { struct dst_entry *dst = xfrm_lookup(net, dst_orig, fl, sk, - flags | XFRM_LOOKUP_QUEUE); + flags | XFRM_LOOKUP_QUEUE | + XFRM_LOOKUP_KEEP_DST_REF); if (IS_ERR(dst) && PTR_ERR(dst) == -EREMOTE) return make_blackhole(net, dst_orig->ops->family, dst_orig); diff --git a/security/selinux/selinuxfs.c b/security/selinux/selinuxfs.c index 1684bcc78b34..5fde34326dcf 100644 --- a/security/selinux/selinuxfs.c +++ b/security/selinux/selinuxfs.c @@ -152,7 +152,7 @@ static ssize_t sel_write_enforce(struct file *file, const char __user *buf, goto out; /* No partial writes. */ - length = EINVAL; + length = -EINVAL; if (*ppos != 0) goto out; diff --git a/sound/core/control.c b/sound/core/control.c index 35324a8e83c8..eeb691d1911f 100644 --- a/sound/core/control.c +++ b/sound/core/control.c @@ -1170,6 +1170,10 @@ static int snd_ctl_elem_add(struct snd_ctl_file *file, if (info->count < 1) return -EINVAL; + if (!*info->id.name) + return -EINVAL; + if (strnlen(info->id.name, sizeof(info->id.name)) >= sizeof(info->id.name)) + return -EINVAL; access = info->access == 0 ? SNDRV_CTL_ELEM_ACCESS_READWRITE : (info->access & (SNDRV_CTL_ELEM_ACCESS_READWRITE| SNDRV_CTL_ELEM_ACCESS_INACTIVE| diff --git a/sound/drivers/opl3/opl3_midi.c b/sound/drivers/opl3/opl3_midi.c index f62780ed64ad..7821b07415a7 100644 --- a/sound/drivers/opl3/opl3_midi.c +++ b/sound/drivers/opl3/opl3_midi.c @@ -105,6 +105,8 @@ static void snd_opl3_calc_pitch(unsigned char *fnum, unsigned char *blocknum, int pitchbend = chan->midi_pitchbend; int segment; + if (pitchbend < -0x2000) + pitchbend = -0x2000; if (pitchbend > 0x1FFF) pitchbend = 0x1FFF; diff --git a/sound/firewire/bebob/bebob_maudio.c b/sound/firewire/bebob/bebob_maudio.c index a422aaa3bb0c..9ee25a63f684 100644 --- a/sound/firewire/bebob/bebob_maudio.c +++ b/sound/firewire/bebob/bebob_maudio.c @@ -96,10 +96,10 @@ int snd_bebob_maudio_load_firmware(struct fw_unit *unit) struct fw_device *device = fw_parent_device(unit); int err, rcode; u64 date; - __be32 cues[3] = { - MAUDIO_BOOTLOADER_CUE1, - MAUDIO_BOOTLOADER_CUE2, - MAUDIO_BOOTLOADER_CUE3 + __le32 cues[3] = { + cpu_to_le32(MAUDIO_BOOTLOADER_CUE1), + cpu_to_le32(MAUDIO_BOOTLOADER_CUE2), + cpu_to_le32(MAUDIO_BOOTLOADER_CUE3) }; /* check date of software used to build */ diff --git a/sound/firewire/iso-resources.c b/sound/firewire/iso-resources.c index 5f17b77ee152..f0e4d502d604 100644 --- a/sound/firewire/iso-resources.c +++ b/sound/firewire/iso-resources.c @@ -26,7 +26,7 @@ int fw_iso_resources_init(struct fw_iso_resources *r, struct fw_unit *unit) { r->channels_mask = ~0uLL; - r->unit = fw_unit_get(unit); + r->unit = unit; mutex_init(&r->mutex); r->allocated = false; @@ -42,7 +42,6 @@ void fw_iso_resources_destroy(struct fw_iso_resources *r) { WARN_ON(r->allocated); mutex_destroy(&r->mutex); - fw_unit_put(r->unit); } EXPORT_SYMBOL(fw_iso_resources_destroy); diff --git a/sound/firewire/oxfw/oxfw-stream.c b/sound/firewire/oxfw/oxfw-stream.c index 29ccb3637164..e6757cd85724 100644 --- a/sound/firewire/oxfw/oxfw-stream.c +++ b/sound/firewire/oxfw/oxfw-stream.c @@ -171,9 +171,10 @@ static int start_stream(struct snd_oxfw *oxfw, struct amdtp_stream *stream, } /* Wait first packet */ - err = amdtp_stream_wait_callback(stream, CALLBACK_TIMEOUT); - if (err < 0) + if (!amdtp_stream_wait_callback(stream, CALLBACK_TIMEOUT)) { stop_stream(oxfw, stream); + err = -ETIMEDOUT; + } end: return err; } diff --git a/sound/isa/msnd/msnd_pinnacle_mixer.c b/sound/isa/msnd/msnd_pinnacle_mixer.c index 17e49a071af4..b408540798c1 100644 --- a/sound/isa/msnd/msnd_pinnacle_mixer.c +++ b/sound/isa/msnd/msnd_pinnacle_mixer.c @@ -306,11 +306,12 @@ int snd_msndmix_new(struct snd_card *card) spin_lock_init(&chip->mixer_lock); strcpy(card->mixername, "MSND Pinnacle Mixer"); - for (idx = 0; idx < ARRAY_SIZE(snd_msnd_controls); idx++) + for (idx = 0; idx < ARRAY_SIZE(snd_msnd_controls); idx++) { err = snd_ctl_add(card, snd_ctl_new1(snd_msnd_controls + idx, chip)); if (err < 0) return err; + } return 0; } diff --git a/sound/pci/hda/hda_controller.c b/sound/pci/hda/hda_controller.c index a2ce773bdc62..17c2637d842c 100644 --- a/sound/pci/hda/hda_controller.c +++ b/sound/pci/hda/hda_controller.c @@ -1164,7 +1164,7 @@ static unsigned int azx_rirb_get_response(struct hda_bus *bus, } } - if (!bus->no_response_fallback) + if (bus->no_response_fallback) return -1; if (!chip->polling_mode && chip->poll_count < 2) { diff --git a/sound/pci/hda/hda_generic.c b/sound/pci/hda/hda_generic.c index b680b4ec6331..8ec5289f8e05 100644 --- a/sound/pci/hda/hda_generic.c +++ b/sound/pci/hda/hda_generic.c @@ -687,12 +687,45 @@ static int get_amp_val_to_activate(struct hda_codec *codec, hda_nid_t nid, return val; } +/* is this a stereo widget or a stereo-to-mono mix? */ +static bool is_stereo_amps(struct hda_codec *codec, hda_nid_t nid, int dir) +{ + unsigned int wcaps = get_wcaps(codec, nid); + hda_nid_t conn; + + if (wcaps & AC_WCAP_STEREO) + return true; + if (dir != HDA_INPUT || get_wcaps_type(wcaps) != AC_WID_AUD_MIX) + return false; + if (snd_hda_get_num_conns(codec, nid) != 1) + return false; + if (snd_hda_get_connections(codec, nid, &conn, 1) < 0) + return false; + return !!(get_wcaps(codec, conn) & AC_WCAP_STEREO); +} + /* initialize the amp value (only at the first time) */ static void init_amp(struct hda_codec *codec, hda_nid_t nid, int dir, int idx) { unsigned int caps = query_amp_caps(codec, nid, dir); int val = get_amp_val_to_activate(codec, nid, dir, caps, false); - snd_hda_codec_amp_init_stereo(codec, nid, dir, idx, 0xff, val); + + if (is_stereo_amps(codec, nid, dir)) + snd_hda_codec_amp_init_stereo(codec, nid, dir, idx, 0xff, val); + else + snd_hda_codec_amp_init(codec, nid, 0, dir, idx, 0xff, val); +} + +/* update the amp, doing in stereo or mono depending on NID */ +static int update_amp(struct hda_codec *codec, hda_nid_t nid, int dir, int idx, + unsigned int mask, unsigned int val) +{ + if (is_stereo_amps(codec, nid, dir)) + return snd_hda_codec_amp_stereo(codec, nid, dir, idx, + mask, val); + else + return snd_hda_codec_amp_update(codec, nid, 0, dir, idx, + mask, val); } /* calculate amp value mask we can modify; @@ -732,7 +765,7 @@ static void activate_amp(struct hda_codec *codec, hda_nid_t nid, int dir, return; val &= mask; - snd_hda_codec_amp_stereo(codec, nid, dir, idx, mask, val); + update_amp(codec, nid, dir, idx, mask, val); } static void activate_amp_out(struct hda_codec *codec, struct nid_path *path, @@ -4424,13 +4457,11 @@ static void mute_all_mixer_nid(struct hda_codec *codec, hda_nid_t mix) has_amp = nid_has_mute(codec, mix, HDA_INPUT); for (i = 0; i < nums; i++) { if (has_amp) - snd_hda_codec_amp_stereo(codec, mix, - HDA_INPUT, i, - 0xff, HDA_AMP_MUTE); + update_amp(codec, mix, HDA_INPUT, i, + 0xff, HDA_AMP_MUTE); else if (nid_has_volume(codec, conn[i], HDA_OUTPUT)) - snd_hda_codec_amp_stereo(codec, conn[i], - HDA_OUTPUT, 0, - 0xff, HDA_AMP_MUTE); + update_amp(codec, conn[i], HDA_OUTPUT, 0, + 0xff, HDA_AMP_MUTE); } } diff --git a/sound/pci/hda/hda_intel.c b/sound/pci/hda/hda_intel.c index 4ca3d5d02436..a8a1e14272a1 100644 --- a/sound/pci/hda/hda_intel.c +++ b/sound/pci/hda/hda_intel.c @@ -1989,7 +1989,7 @@ static const struct pci_device_id azx_ids[] = { .driver_data = AZX_DRIVER_PCH | AZX_DCAPS_INTEL_PCH }, /* Sunrise Point */ { PCI_DEVICE(0x8086, 0xa170), - .driver_data = AZX_DRIVER_PCH | AZX_DCAPS_INTEL_PCH }, + .driver_data = AZX_DRIVER_PCH | AZX_DCAPS_INTEL_SKYLAKE }, /* Sunrise Point-LP */ { PCI_DEVICE(0x8086, 0x9d70), .driver_data = AZX_DRIVER_PCH | AZX_DCAPS_INTEL_SKYLAKE }, diff --git a/sound/pci/hda/hda_proc.c b/sound/pci/hda/hda_proc.c index ce5a6da83419..05e19f78b4cb 100644 --- a/sound/pci/hda/hda_proc.c +++ b/sound/pci/hda/hda_proc.c @@ -134,13 +134,38 @@ static void print_amp_caps(struct snd_info_buffer *buffer, (caps & AC_AMPCAP_MUTE) >> AC_AMPCAP_MUTE_SHIFT); } +/* is this a stereo widget or a stereo-to-mono mix? */ +static bool is_stereo_amps(struct hda_codec *codec, hda_nid_t nid, + int dir, unsigned int wcaps, int indices) +{ + hda_nid_t conn; + + if (wcaps & AC_WCAP_STEREO) + return true; + /* check for a stereo-to-mono mix; it must be: + * only a single connection, only for input, and only a mixer widget + */ + if (indices != 1 || dir != HDA_INPUT || + get_wcaps_type(wcaps) != AC_WID_AUD_MIX) + return false; + + if (snd_hda_get_raw_connections(codec, nid, &conn, 1) < 0) + return false; + /* the connection source is a stereo? */ + wcaps = snd_hda_param_read(codec, conn, AC_PAR_AUDIO_WIDGET_CAP); + return !!(wcaps & AC_WCAP_STEREO); +} + static void print_amp_vals(struct snd_info_buffer *buffer, struct hda_codec *codec, hda_nid_t nid, - int dir, int stereo, int indices) + int dir, unsigned int wcaps, int indices) { unsigned int val; + bool stereo; int i; + stereo = is_stereo_amps(codec, nid, dir, wcaps, indices); + dir = dir == HDA_OUTPUT ? AC_AMP_GET_OUTPUT : AC_AMP_GET_INPUT; for (i = 0; i < indices; i++) { snd_iprintf(buffer, " ["); @@ -757,12 +782,10 @@ static void print_codec_info(struct snd_info_entry *entry, (codec->single_adc_amp && wid_type == AC_WID_AUD_IN)) print_amp_vals(buffer, codec, nid, HDA_INPUT, - wid_caps & AC_WCAP_STEREO, - 1); + wid_caps, 1); else print_amp_vals(buffer, codec, nid, HDA_INPUT, - wid_caps & AC_WCAP_STEREO, - conn_len); + wid_caps, conn_len); } if (wid_caps & AC_WCAP_OUT_AMP) { snd_iprintf(buffer, " Amp-Out caps: "); @@ -771,11 +794,10 @@ static void print_codec_info(struct snd_info_entry *entry, if (wid_type == AC_WID_PIN && codec->pin_amp_workaround) print_amp_vals(buffer, codec, nid, HDA_OUTPUT, - wid_caps & AC_WCAP_STEREO, - conn_len); + wid_caps, conn_len); else print_amp_vals(buffer, codec, nid, HDA_OUTPUT, - wid_caps & AC_WCAP_STEREO, 1); + wid_caps, 1); } switch (wid_type) { diff --git a/sound/pci/hda/patch_cirrus.c b/sound/pci/hda/patch_cirrus.c index 1589c9bcce3e..dd2b3d92071f 100644 --- a/sound/pci/hda/patch_cirrus.c +++ b/sound/pci/hda/patch_cirrus.c @@ -393,6 +393,7 @@ static const struct snd_pci_quirk cs420x_fixup_tbl[] = { SND_PCI_QUIRK(0x106b, 0x1c00, "MacBookPro 8,1", CS420X_MBP81), SND_PCI_QUIRK(0x106b, 0x2000, "iMac 12,2", CS420X_IMAC27_122), SND_PCI_QUIRK(0x106b, 0x2800, "MacBookPro 10,1", CS420X_MBP101), + SND_PCI_QUIRK(0x106b, 0x5600, "MacBookAir 5,2", CS420X_MBP81), SND_PCI_QUIRK(0x106b, 0x5b00, "MacBookAir 4,2", CS420X_MBA42), SND_PCI_QUIRK_VENDOR(0x106b, "Apple", CS420X_APPLE), {} /* terminator */ @@ -584,6 +585,7 @@ static int patch_cs420x(struct hda_codec *codec) return -ENOMEM; spec->gen.automute_hook = cs_automute; + codec->single_adc_amp = 1; snd_hda_pick_fixup(codec, cs420x_models, cs420x_fixup_tbl, cs420x_fixups); diff --git a/sound/pci/hda/patch_conexant.c b/sound/pci/hda/patch_conexant.c index fd3ed18670e9..da67ea8645a6 100644 --- a/sound/pci/hda/patch_conexant.c +++ b/sound/pci/hda/patch_conexant.c @@ -223,6 +223,7 @@ enum { CXT_PINCFG_LENOVO_TP410, CXT_PINCFG_LEMOTE_A1004, CXT_PINCFG_LEMOTE_A1205, + CXT_PINCFG_COMPAQ_CQ60, CXT_FIXUP_STEREO_DMIC, CXT_FIXUP_INC_MIC_BOOST, CXT_FIXUP_HEADPHONE_MIC_PIN, @@ -660,6 +661,15 @@ static const struct hda_fixup cxt_fixups[] = { .type = HDA_FIXUP_PINS, .v.pins = cxt_pincfg_lemote, }, + [CXT_PINCFG_COMPAQ_CQ60] = { + .type = HDA_FIXUP_PINS, + .v.pins = (const struct hda_pintbl[]) { + /* 0x17 was falsely set up as a mic, it should 0x1d */ + { 0x17, 0x400001f0 }, + { 0x1d, 0x97a70120 }, + { } + } + }, [CXT_FIXUP_STEREO_DMIC] = { .type = HDA_FIXUP_FUNC, .v.func = cxt_fixup_stereo_dmic, @@ -769,6 +779,7 @@ static const struct hda_model_fixup cxt5047_fixup_models[] = { }; static const struct snd_pci_quirk cxt5051_fixups[] = { + SND_PCI_QUIRK(0x103c, 0x360b, "Compaq CQ60", CXT_PINCFG_COMPAQ_CQ60), SND_PCI_QUIRK(0x17aa, 0x20f2, "Lenovo X200", CXT_PINCFG_LENOVO_X200), {} }; diff --git a/sound/pci/hda/patch_realtek.c b/sound/pci/hda/patch_realtek.c index b2b24a8b3dac..f9d12c0a7e5a 100644 --- a/sound/pci/hda/patch_realtek.c +++ b/sound/pci/hda/patch_realtek.c @@ -396,7 +396,7 @@ static void alc_auto_setup_eapd(struct hda_codec *codec, bool on) { /* We currently only handle front, HP */ static hda_nid_t pins[] = { - 0x0f, 0x10, 0x14, 0x15, 0 + 0x0f, 0x10, 0x14, 0x15, 0x17, 0 }; hda_nid_t *p; for (p = pins; *p; p++) @@ -2912,6 +2912,8 @@ static void alc283_init(struct hda_codec *codec) if (!hp_pin) return; + + msleep(30); hp_pin_sense = snd_hda_jack_detect(codec, hp_pin); /* Index 0x43 Direct Drive HP AMP LPM Control 1 */ @@ -3607,6 +3609,7 @@ static void alc_headset_mode_unplugged(struct hda_codec *codec) switch (codec->vendor_id) { case 0x10ec0255: + case 0x10ec0256: alc_process_coef_fw(codec, coef0255); break; case 0x10ec0233: @@ -3662,6 +3665,7 @@ static void alc_headset_mode_mic_in(struct hda_codec *codec, hda_nid_t hp_pin, switch (codec->vendor_id) { case 0x10ec0255: + case 0x10ec0256: alc_write_coef_idx(codec, 0x45, 0xc489); snd_hda_set_pin_ctl_cache(codec, hp_pin, 0); alc_process_coef_fw(codec, coef0255); @@ -3731,6 +3735,7 @@ static void alc_headset_mode_default(struct hda_codec *codec) switch (codec->vendor_id) { case 0x10ec0255: + case 0x10ec0256: alc_process_coef_fw(codec, coef0255); break; case 0x10ec0233: @@ -3785,6 +3790,7 @@ static void alc_headset_mode_ctia(struct hda_codec *codec) switch (codec->vendor_id) { case 0x10ec0255: + case 0x10ec0256: alc_process_coef_fw(codec, coef0255); break; case 0x10ec0233: @@ -3839,6 +3845,7 @@ static void alc_headset_mode_omtp(struct hda_codec *codec) switch (codec->vendor_id) { case 0x10ec0255: + case 0x10ec0256: alc_process_coef_fw(codec, coef0255); break; case 0x10ec0233: @@ -3884,6 +3891,7 @@ static void alc_determine_headset_type(struct hda_codec *codec) switch (codec->vendor_id) { case 0x10ec0255: + case 0x10ec0256: alc_process_coef_fw(codec, coef0255); msleep(300); val = alc_read_coef_idx(codec, 0x46); @@ -4364,6 +4372,7 @@ enum { ALC269_FIXUP_QUANTA_MUTE, ALC269_FIXUP_LIFEBOOK, ALC269_FIXUP_LIFEBOOK_EXTMIC, + ALC269_FIXUP_LIFEBOOK_HP_PIN, ALC269_FIXUP_AMIC, ALC269_FIXUP_DMIC, ALC269VB_FIXUP_AMIC, @@ -4517,6 +4526,13 @@ static const struct hda_fixup alc269_fixups[] = { { } }, }, + [ALC269_FIXUP_LIFEBOOK_HP_PIN] = { + .type = HDA_FIXUP_PINS, + .v.pins = (const struct hda_pintbl[]) { + { 0x21, 0x0221102f }, /* HP out */ + { } + }, + }, [ALC269_FIXUP_AMIC] = { .type = HDA_FIXUP_PINS, .v.pins = (const struct hda_pintbl[]) { @@ -5010,6 +5026,7 @@ static const struct snd_pci_quirk alc269_fixup_tbl[] = { SND_PCI_QUIRK(0x104d, 0x9084, "Sony VAIO", ALC275_FIXUP_SONY_HWEQ), SND_PCI_QUIRK(0x104d, 0x9099, "Sony VAIO S13", ALC275_FIXUP_SONY_DISABLE_AAMIX), SND_PCI_QUIRK(0x10cf, 0x1475, "Lifebook", ALC269_FIXUP_LIFEBOOK), + SND_PCI_QUIRK(0x10cf, 0x15dc, "Lifebook T731", ALC269_FIXUP_LIFEBOOK_HP_PIN), SND_PCI_QUIRK(0x10cf, 0x1845, "Lifebook U904", ALC269_FIXUP_LIFEBOOK_EXTMIC), SND_PCI_QUIRK(0x144d, 0xc109, "Samsung Ativ book 9 (NP900X3G)", ALC269_FIXUP_INV_DMIC), SND_PCI_QUIRK(0x1458, 0xfa53, "Gigabyte BXBT-2807", ALC283_FIXUP_BXBT2807_MIC), @@ -5036,6 +5053,7 @@ static const struct snd_pci_quirk alc269_fixup_tbl[] = { SND_PCI_QUIRK(0x17aa, 0x501a, "Thinkpad", ALC283_FIXUP_INT_MIC), SND_PCI_QUIRK(0x17aa, 0x501e, "Thinkpad L440", ALC292_FIXUP_TPT440_DOCK), SND_PCI_QUIRK(0x17aa, 0x5026, "Thinkpad", ALC269_FIXUP_LIMIT_INT_MIC_BOOST), + SND_PCI_QUIRK(0x17aa, 0x5036, "Thinkpad T450s", ALC292_FIXUP_TPT440_DOCK), SND_PCI_QUIRK(0x17aa, 0x5109, "Thinkpad", ALC269_FIXUP_LIMIT_INT_MIC_BOOST), SND_PCI_QUIRK(0x17aa, 0x3bf8, "Quanta FL1", ALC269_FIXUP_PCM_44K), SND_PCI_QUIRK(0x17aa, 0x9e54, "LENOVO NB", ALC269_FIXUP_LENOVO_EAPD), @@ -5209,6 +5227,23 @@ static const struct snd_hda_pin_quirk alc269_pin_fixup_tbl[] = { {0x17, 0x40000000}, {0x1d, 0x40700001}, {0x21, 0x02211040}), + SND_HDA_PIN_QUIRK(0x10ec0255, 0x1028, "Dell", ALC255_FIXUP_DELL1_MIC_NO_PRESENCE, + ALC255_STANDARD_PINS, + {0x12, 0x90a60170}, + {0x14, 0x90170140}, + {0x17, 0x40000000}, + {0x1d, 0x40700001}, + {0x21, 0x02211050}), + SND_HDA_PIN_QUIRK(0x10ec0256, 0x1028, "Dell", ALC255_FIXUP_DELL1_MIC_NO_PRESENCE, + {0x12, 0x90a60140}, + {0x13, 0x40000000}, + {0x14, 0x90170110}, + {0x19, 0x411111f0}, + {0x1a, 0x411111f0}, + {0x1b, 0x411111f0}, + {0x1d, 0x40700001}, + {0x1e, 0x411111f0}, + {0x21, 0x02211020}), SND_HDA_PIN_QUIRK(0x10ec0280, 0x103c, "HP", ALC280_FIXUP_HP_GPIO4, {0x12, 0x90a60130}, {0x13, 0x40000000}, diff --git a/sound/soc/atmel/sam9g20_wm8731.c b/sound/soc/atmel/sam9g20_wm8731.c index f5ad214663f9..8de836165cf2 100644 --- a/sound/soc/atmel/sam9g20_wm8731.c +++ b/sound/soc/atmel/sam9g20_wm8731.c @@ -46,8 +46,6 @@ #include <sound/pcm_params.h> #include <sound/soc.h> -#include <asm/mach-types.h> - #include "../codecs/wm8731.h" #include "atmel-pcm.h" #include "atmel_ssc_dai.h" @@ -171,9 +169,7 @@ static int at91sam9g20ek_audio_probe(struct platform_device *pdev) int ret; if (!np) { - if (!(machine_is_at91sam9g20ek() || - machine_is_at91sam9g20ek_2mmc())) - return -ENODEV; + return -ENODEV; } ret = atmel_ssc_set_audio(0); @@ -210,39 +206,37 @@ static int at91sam9g20ek_audio_probe(struct platform_device *pdev) card->dev = &pdev->dev; /* Parse device node info */ - if (np) { - ret = snd_soc_of_parse_card_name(card, "atmel,model"); - if (ret) - goto err; - - ret = snd_soc_of_parse_audio_routing(card, - "atmel,audio-routing"); - if (ret) - goto err; - - /* Parse codec info */ - at91sam9g20ek_dai.codec_name = NULL; - codec_np = of_parse_phandle(np, "atmel,audio-codec", 0); - if (!codec_np) { - dev_err(&pdev->dev, "codec info missing\n"); - return -EINVAL; - } - at91sam9g20ek_dai.codec_of_node = codec_np; - - /* Parse dai and platform info */ - at91sam9g20ek_dai.cpu_dai_name = NULL; - at91sam9g20ek_dai.platform_name = NULL; - cpu_np = of_parse_phandle(np, "atmel,ssc-controller", 0); - if (!cpu_np) { - dev_err(&pdev->dev, "dai and pcm info missing\n"); - return -EINVAL; - } - at91sam9g20ek_dai.cpu_of_node = cpu_np; - at91sam9g20ek_dai.platform_of_node = cpu_np; - - of_node_put(codec_np); - of_node_put(cpu_np); + ret = snd_soc_of_parse_card_name(card, "atmel,model"); + if (ret) + goto err; + + ret = snd_soc_of_parse_audio_routing(card, + "atmel,audio-routing"); + if (ret) + goto err; + + /* Parse codec info */ + at91sam9g20ek_dai.codec_name = NULL; + codec_np = of_parse_phandle(np, "atmel,audio-codec", 0); + if (!codec_np) { + dev_err(&pdev->dev, "codec info missing\n"); + return -EINVAL; + } + at91sam9g20ek_dai.codec_of_node = codec_np; + + /* Parse dai and platform info */ + at91sam9g20ek_dai.cpu_dai_name = NULL; + at91sam9g20ek_dai.platform_name = NULL; + cpu_np = of_parse_phandle(np, "atmel,ssc-controller", 0); + if (!cpu_np) { + dev_err(&pdev->dev, "dai and pcm info missing\n"); + return -EINVAL; } + at91sam9g20ek_dai.cpu_of_node = cpu_np; + at91sam9g20ek_dai.platform_of_node = cpu_np; + + of_node_put(codec_np); + of_node_put(cpu_np); ret = snd_soc_register_card(card); if (ret) { diff --git a/sound/soc/cirrus/Kconfig b/sound/soc/cirrus/Kconfig index 7b7fbcd49e5e..c7cd60f009e9 100644 --- a/sound/soc/cirrus/Kconfig +++ b/sound/soc/cirrus/Kconfig @@ -16,7 +16,7 @@ config SND_EP93XX_SOC_AC97 config SND_EP93XX_SOC_SNAPPERCL15 tristate "SoC Audio support for Bluewater Systems Snapper CL15 module" - depends on SND_EP93XX_SOC && MACH_SNAPPER_CL15 + depends on SND_EP93XX_SOC && MACH_SNAPPER_CL15 && I2C select SND_EP93XX_SOC_I2S select SND_SOC_TLV320AIC23_I2C help diff --git a/sound/soc/codecs/Kconfig b/sound/soc/codecs/Kconfig index 064e6c18e109..ea9f0e31f9d4 100644 --- a/sound/soc/codecs/Kconfig +++ b/sound/soc/codecs/Kconfig @@ -69,7 +69,7 @@ config SND_SOC_ALL_CODECS select SND_SOC_MAX98088 if I2C select SND_SOC_MAX98090 if I2C select SND_SOC_MAX98095 if I2C - select SND_SOC_MAX98357A + select SND_SOC_MAX98357A if GPIOLIB select SND_SOC_MAX9850 if I2C select SND_SOC_MAX9768 if I2C select SND_SOC_MAX9877 if I2C diff --git a/sound/soc/codecs/adav80x.c b/sound/soc/codecs/adav80x.c index b67480f1b1aa..4373ada95648 100644 --- a/sound/soc/codecs/adav80x.c +++ b/sound/soc/codecs/adav80x.c @@ -317,7 +317,7 @@ static int adav80x_put_deemph(struct snd_kcontrol *kcontrol, { struct snd_soc_codec *codec = snd_soc_kcontrol_codec(kcontrol); struct adav80x *adav80x = snd_soc_codec_get_drvdata(codec); - unsigned int deemph = ucontrol->value.enumerated.item[0]; + unsigned int deemph = ucontrol->value.integer.value[0]; if (deemph > 1) return -EINVAL; @@ -333,7 +333,7 @@ static int adav80x_get_deemph(struct snd_kcontrol *kcontrol, struct snd_soc_codec *codec = snd_soc_kcontrol_codec(kcontrol); struct adav80x *adav80x = snd_soc_codec_get_drvdata(codec); - ucontrol->value.enumerated.item[0] = adav80x->deemph; + ucontrol->value.integer.value[0] = adav80x->deemph; return 0; }; diff --git a/sound/soc/codecs/ak4641.c b/sound/soc/codecs/ak4641.c index 70861c7b1631..81b54a270bd8 100644 --- a/sound/soc/codecs/ak4641.c +++ b/sound/soc/codecs/ak4641.c @@ -76,7 +76,7 @@ static int ak4641_put_deemph(struct snd_kcontrol *kcontrol, { struct snd_soc_codec *codec = snd_soc_kcontrol_codec(kcontrol); struct ak4641_priv *ak4641 = snd_soc_codec_get_drvdata(codec); - int deemph = ucontrol->value.enumerated.item[0]; + int deemph = ucontrol->value.integer.value[0]; if (deemph > 1) return -EINVAL; @@ -92,7 +92,7 @@ static int ak4641_get_deemph(struct snd_kcontrol *kcontrol, struct snd_soc_codec *codec = snd_soc_kcontrol_codec(kcontrol); struct ak4641_priv *ak4641 = snd_soc_codec_get_drvdata(codec); - ucontrol->value.enumerated.item[0] = ak4641->deemph; + ucontrol->value.integer.value[0] = ak4641->deemph; return 0; }; diff --git a/sound/soc/codecs/ak4671.c b/sound/soc/codecs/ak4671.c index 632e89f793a7..2a58b1dccd2f 100644 --- a/sound/soc/codecs/ak4671.c +++ b/sound/soc/codecs/ak4671.c @@ -343,25 +343,25 @@ static const struct snd_soc_dapm_widget ak4671_dapm_widgets[] = { }; static const struct snd_soc_dapm_route ak4671_intercon[] = { - {"DAC Left", "NULL", "PMPLL"}, - {"DAC Right", "NULL", "PMPLL"}, - {"ADC Left", "NULL", "PMPLL"}, - {"ADC Right", "NULL", "PMPLL"}, + {"DAC Left", NULL, "PMPLL"}, + {"DAC Right", NULL, "PMPLL"}, + {"ADC Left", NULL, "PMPLL"}, + {"ADC Right", NULL, "PMPLL"}, /* Outputs */ - {"LOUT1", "NULL", "LOUT1 Mixer"}, - {"ROUT1", "NULL", "ROUT1 Mixer"}, - {"LOUT2", "NULL", "LOUT2 Mix Amp"}, - {"ROUT2", "NULL", "ROUT2 Mix Amp"}, - {"LOUT3", "NULL", "LOUT3 Mixer"}, - {"ROUT3", "NULL", "ROUT3 Mixer"}, + {"LOUT1", NULL, "LOUT1 Mixer"}, + {"ROUT1", NULL, "ROUT1 Mixer"}, + {"LOUT2", NULL, "LOUT2 Mix Amp"}, + {"ROUT2", NULL, "ROUT2 Mix Amp"}, + {"LOUT3", NULL, "LOUT3 Mixer"}, + {"ROUT3", NULL, "ROUT3 Mixer"}, {"LOUT1 Mixer", "DACL", "DAC Left"}, {"ROUT1 Mixer", "DACR", "DAC Right"}, {"LOUT2 Mixer", "DACHL", "DAC Left"}, {"ROUT2 Mixer", "DACHR", "DAC Right"}, - {"LOUT2 Mix Amp", "NULL", "LOUT2 Mixer"}, - {"ROUT2 Mix Amp", "NULL", "ROUT2 Mixer"}, + {"LOUT2 Mix Amp", NULL, "LOUT2 Mixer"}, + {"ROUT2 Mix Amp", NULL, "ROUT2 Mixer"}, {"LOUT3 Mixer", "DACSL", "DAC Left"}, {"ROUT3 Mixer", "DACSR", "DAC Right"}, @@ -381,18 +381,18 @@ static const struct snd_soc_dapm_route ak4671_intercon[] = { {"LIN2", NULL, "Mic Bias"}, {"RIN2", NULL, "Mic Bias"}, - {"ADC Left", "NULL", "LIN MUX"}, - {"ADC Right", "NULL", "RIN MUX"}, + {"ADC Left", NULL, "LIN MUX"}, + {"ADC Right", NULL, "RIN MUX"}, /* Analog Loops */ - {"LIN1 Mixing Circuit", "NULL", "LIN1"}, - {"RIN1 Mixing Circuit", "NULL", "RIN1"}, - {"LIN2 Mixing Circuit", "NULL", "LIN2"}, - {"RIN2 Mixing Circuit", "NULL", "RIN2"}, - {"LIN3 Mixing Circuit", "NULL", "LIN3"}, - {"RIN3 Mixing Circuit", "NULL", "RIN3"}, - {"LIN4 Mixing Circuit", "NULL", "LIN4"}, - {"RIN4 Mixing Circuit", "NULL", "RIN4"}, + {"LIN1 Mixing Circuit", NULL, "LIN1"}, + {"RIN1 Mixing Circuit", NULL, "RIN1"}, + {"LIN2 Mixing Circuit", NULL, "LIN2"}, + {"RIN2 Mixing Circuit", NULL, "RIN2"}, + {"LIN3 Mixing Circuit", NULL, "LIN3"}, + {"RIN3 Mixing Circuit", NULL, "RIN3"}, + {"LIN4 Mixing Circuit", NULL, "LIN4"}, + {"RIN4 Mixing Circuit", NULL, "RIN4"}, {"LOUT1 Mixer", "LINL1", "LIN1 Mixing Circuit"}, {"ROUT1 Mixer", "RINR1", "RIN1 Mixing Circuit"}, diff --git a/sound/soc/codecs/cs4271.c b/sound/soc/codecs/cs4271.c index 79a4efcb894c..7d3a6accaf9a 100644 --- a/sound/soc/codecs/cs4271.c +++ b/sound/soc/codecs/cs4271.c @@ -286,7 +286,7 @@ static int cs4271_get_deemph(struct snd_kcontrol *kcontrol, struct snd_soc_codec *codec = snd_soc_kcontrol_codec(kcontrol); struct cs4271_private *cs4271 = snd_soc_codec_get_drvdata(codec); - ucontrol->value.enumerated.item[0] = cs4271->deemph; + ucontrol->value.integer.value[0] = cs4271->deemph; return 0; } @@ -296,7 +296,7 @@ static int cs4271_put_deemph(struct snd_kcontrol *kcontrol, struct snd_soc_codec *codec = snd_soc_kcontrol_codec(kcontrol); struct cs4271_private *cs4271 = snd_soc_codec_get_drvdata(codec); - cs4271->deemph = ucontrol->value.enumerated.item[0]; + cs4271->deemph = ucontrol->value.integer.value[0]; return cs4271_set_deemph(codec); } diff --git a/sound/soc/codecs/da732x.c b/sound/soc/codecs/da732x.c index ffe96175a8a5..911c26c705fc 100644 --- a/sound/soc/codecs/da732x.c +++ b/sound/soc/codecs/da732x.c @@ -876,11 +876,11 @@ static const struct snd_soc_dapm_widget da732x_dapm_widgets[] = { static const struct snd_soc_dapm_route da732x_dapm_routes[] = { /* Inputs */ - {"AUX1L PGA", "NULL", "AUX1L"}, - {"AUX1R PGA", "NULL", "AUX1R"}, + {"AUX1L PGA", NULL, "AUX1L"}, + {"AUX1R PGA", NULL, "AUX1R"}, {"MIC1 PGA", NULL, "MIC1"}, - {"MIC2 PGA", "NULL", "MIC2"}, - {"MIC3 PGA", "NULL", "MIC3"}, + {"MIC2 PGA", NULL, "MIC2"}, + {"MIC3 PGA", NULL, "MIC3"}, /* Capture Path */ {"ADC1 Left MUX", "MIC1", "MIC1 PGA"}, diff --git a/sound/soc/codecs/es8328.c b/sound/soc/codecs/es8328.c index f27325155ace..c5f35a07e8e4 100644 --- a/sound/soc/codecs/es8328.c +++ b/sound/soc/codecs/es8328.c @@ -120,7 +120,7 @@ static int es8328_get_deemph(struct snd_kcontrol *kcontrol, struct snd_soc_codec *codec = snd_soc_kcontrol_codec(kcontrol); struct es8328_priv *es8328 = snd_soc_codec_get_drvdata(codec); - ucontrol->value.enumerated.item[0] = es8328->deemph; + ucontrol->value.integer.value[0] = es8328->deemph; return 0; } @@ -129,7 +129,7 @@ static int es8328_put_deemph(struct snd_kcontrol *kcontrol, { struct snd_soc_codec *codec = snd_soc_kcontrol_codec(kcontrol); struct es8328_priv *es8328 = snd_soc_codec_get_drvdata(codec); - int deemph = ucontrol->value.enumerated.item[0]; + int deemph = ucontrol->value.integer.value[0]; int ret; if (deemph > 1) diff --git a/sound/soc/codecs/max98357a.c b/sound/soc/codecs/max98357a.c index 1806333ea29e..e9e6efbc21dd 100644 --- a/sound/soc/codecs/max98357a.c +++ b/sound/soc/codecs/max98357a.c @@ -12,9 +12,19 @@ * max98357a.c -- MAX98357A ALSA SoC Codec driver */ -#include <linux/module.h> +#include <linux/device.h> +#include <linux/err.h> #include <linux/gpio.h> +#include <linux/gpio/consumer.h> +#include <linux/kernel.h> +#include <linux/mod_devicetable.h> +#include <linux/module.h> +#include <linux/of.h> +#include <linux/platform_device.h> +#include <sound/pcm.h> #include <sound/soc.h> +#include <sound/soc-dai.h> +#include <sound/soc-dapm.h> #define DRV_NAME "max98357a" diff --git a/sound/soc/codecs/pcm1681.c b/sound/soc/codecs/pcm1681.c index a722a023c262..477e13d30971 100644 --- a/sound/soc/codecs/pcm1681.c +++ b/sound/soc/codecs/pcm1681.c @@ -118,7 +118,7 @@ static int pcm1681_get_deemph(struct snd_kcontrol *kcontrol, struct snd_soc_codec *codec = snd_soc_kcontrol_codec(kcontrol); struct pcm1681_private *priv = snd_soc_codec_get_drvdata(codec); - ucontrol->value.enumerated.item[0] = priv->deemph; + ucontrol->value.integer.value[0] = priv->deemph; return 0; } @@ -129,7 +129,7 @@ static int pcm1681_put_deemph(struct snd_kcontrol *kcontrol, struct snd_soc_codec *codec = snd_soc_kcontrol_codec(kcontrol); struct pcm1681_private *priv = snd_soc_codec_get_drvdata(codec); - priv->deemph = ucontrol->value.enumerated.item[0]; + priv->deemph = ucontrol->value.integer.value[0]; return pcm1681_set_deemph(codec); } diff --git a/sound/soc/codecs/pcm512x.c b/sound/soc/codecs/pcm512x.c index 9974f201a08f..474cae82a874 100644 --- a/sound/soc/codecs/pcm512x.c +++ b/sound/soc/codecs/pcm512x.c @@ -1156,25 +1156,6 @@ static int pcm512x_hw_params(struct snd_pcm_substream *substream, ret, pcm512x->pll_out); return ret; } - - gpio = PCM512x_G1OE << (4 - 1); - ret = regmap_update_bits(pcm512x->regmap, PCM512x_GPIO_EN, - gpio, gpio); - if (ret != 0) { - dev_err(codec->dev, "Failed to enable gpio %d: %d\n", - 4, ret); - return ret; - } - - gpio = PCM512x_GPIO_OUTPUT_1 + 4 - 1; - ret = regmap_update_bits(pcm512x->regmap, gpio, - PCM512x_GxSL, PCM512x_GxSL_PLLLK); - if (ret != 0) { - dev_err(codec->dev, - "Failed to output pll lock on %d: %d\n", - ret, 4); - return ret; - } } ret = regmap_update_bits(pcm512x->regmap, PCM512x_SYNCHRONIZE, diff --git a/sound/soc/codecs/rt286.c b/sound/soc/codecs/rt286.c index f374840a5a7c..9b541e52da8c 100644 --- a/sound/soc/codecs/rt286.c +++ b/sound/soc/codecs/rt286.c @@ -1198,7 +1198,7 @@ static struct dmi_system_id dmi_dell_dino[] = { .ident = "Dell Dino", .matches = { DMI_MATCH(DMI_SYS_VENDOR, "Dell Inc."), - DMI_MATCH(DMI_BOARD_NAME, "0144P8") + DMI_MATCH(DMI_PRODUCT_NAME, "XPS 13 9343") } }, { } diff --git a/sound/soc/codecs/rt5670.c b/sound/soc/codecs/rt5670.c index e1a4a45c57e2..fd102613d20d 100644 --- a/sound/soc/codecs/rt5670.c +++ b/sound/soc/codecs/rt5670.c @@ -225,7 +225,6 @@ static bool rt5670_volatile_register(struct device *dev, unsigned int reg) case RT5670_ADC_EQ_CTRL1: case RT5670_EQ_CTRL1: case RT5670_ALC_CTRL_1: - case RT5670_IRQ_CTRL1: case RT5670_IRQ_CTRL2: case RT5670_INT_IRQ_ST: case RT5670_IL_CMD: @@ -2703,6 +2702,12 @@ static int rt5670_i2c_probe(struct i2c_client *i2c, regmap_write(rt5670->regmap, RT5670_RESET, 0); + regmap_read(rt5670->regmap, RT5670_VENDOR_ID, &val); + if (val >= 4) + regmap_write(rt5670->regmap, RT5670_GPIO_CTRL3, 0x0980); + else + regmap_write(rt5670->regmap, RT5670_GPIO_CTRL3, 0x0d00); + ret = regmap_register_patch(rt5670->regmap, init_list, ARRAY_SIZE(init_list)); if (ret != 0) diff --git a/sound/soc/codecs/rt5677.c b/sound/soc/codecs/rt5677.c index 5d0bb8748dd1..fb9c20eace3f 100644 --- a/sound/soc/codecs/rt5677.c +++ b/sound/soc/codecs/rt5677.c @@ -3284,8 +3284,8 @@ static const struct snd_soc_dapm_route rt5677_dapm_routes[] = { { "IB45 Bypass Mux", "Bypass", "IB45 Mux" }, { "IB45 Bypass Mux", "Pass SRC", "IB45 Mux" }, - { "IB6 Mux", "IF1 DAC 6", "IF1 DAC6" }, - { "IB6 Mux", "IF2 DAC 6", "IF2 DAC6" }, + { "IB6 Mux", "IF1 DAC 6", "IF1 DAC6 Mux" }, + { "IB6 Mux", "IF2 DAC 6", "IF2 DAC6 Mux" }, { "IB6 Mux", "SLB DAC 6", "SLB DAC6" }, { "IB6 Mux", "STO4 ADC MIX L", "Stereo4 ADC MIXL" }, { "IB6 Mux", "IF4 DAC L", "IF4 DAC L" }, @@ -3293,8 +3293,8 @@ static const struct snd_soc_dapm_route rt5677_dapm_routes[] = { { "IB6 Mux", "STO2 ADC MIX L", "Stereo2 ADC MIXL" }, { "IB6 Mux", "STO3 ADC MIX L", "Stereo3 ADC MIXL" }, - { "IB7 Mux", "IF1 DAC 7", "IF1 DAC7" }, - { "IB7 Mux", "IF2 DAC 7", "IF2 DAC7" }, + { "IB7 Mux", "IF1 DAC 7", "IF1 DAC7 Mux" }, + { "IB7 Mux", "IF2 DAC 7", "IF2 DAC7 Mux" }, { "IB7 Mux", "SLB DAC 7", "SLB DAC7" }, { "IB7 Mux", "STO4 ADC MIX R", "Stereo4 ADC MIXR" }, { "IB7 Mux", "IF4 DAC R", "IF4 DAC R" }, @@ -3635,15 +3635,15 @@ static const struct snd_soc_dapm_route rt5677_dapm_routes[] = { { "DAC1 FS", NULL, "DAC1 MIXL" }, { "DAC1 FS", NULL, "DAC1 MIXR" }, - { "DAC2 L Mux", "IF1 DAC 2", "IF1 DAC2" }, - { "DAC2 L Mux", "IF2 DAC 2", "IF2 DAC2" }, + { "DAC2 L Mux", "IF1 DAC 2", "IF1 DAC2 Mux" }, + { "DAC2 L Mux", "IF2 DAC 2", "IF2 DAC2 Mux" }, { "DAC2 L Mux", "IF3 DAC L", "IF3 DAC L" }, { "DAC2 L Mux", "IF4 DAC L", "IF4 DAC L" }, { "DAC2 L Mux", "SLB DAC 2", "SLB DAC2" }, { "DAC2 L Mux", "OB 2", "OutBound2" }, - { "DAC2 R Mux", "IF1 DAC 3", "IF1 DAC3" }, - { "DAC2 R Mux", "IF2 DAC 3", "IF2 DAC3" }, + { "DAC2 R Mux", "IF1 DAC 3", "IF1 DAC3 Mux" }, + { "DAC2 R Mux", "IF2 DAC 3", "IF2 DAC3 Mux" }, { "DAC2 R Mux", "IF3 DAC R", "IF3 DAC R" }, { "DAC2 R Mux", "IF4 DAC R", "IF4 DAC R" }, { "DAC2 R Mux", "SLB DAC 3", "SLB DAC3" }, @@ -3651,29 +3651,29 @@ static const struct snd_soc_dapm_route rt5677_dapm_routes[] = { { "DAC2 R Mux", "Haptic Generator", "Haptic Generator" }, { "DAC2 R Mux", "VAD ADC", "VAD ADC Mux" }, - { "DAC3 L Mux", "IF1 DAC 4", "IF1 DAC4" }, - { "DAC3 L Mux", "IF2 DAC 4", "IF2 DAC4" }, + { "DAC3 L Mux", "IF1 DAC 4", "IF1 DAC4 Mux" }, + { "DAC3 L Mux", "IF2 DAC 4", "IF2 DAC4 Mux" }, { "DAC3 L Mux", "IF3 DAC L", "IF3 DAC L" }, { "DAC3 L Mux", "IF4 DAC L", "IF4 DAC L" }, { "DAC3 L Mux", "SLB DAC 4", "SLB DAC4" }, { "DAC3 L Mux", "OB 4", "OutBound4" }, - { "DAC3 R Mux", "IF1 DAC 5", "IF1 DAC4" }, - { "DAC3 R Mux", "IF2 DAC 5", "IF2 DAC4" }, + { "DAC3 R Mux", "IF1 DAC 5", "IF1 DAC5 Mux" }, + { "DAC3 R Mux", "IF2 DAC 5", "IF2 DAC5 Mux" }, { "DAC3 R Mux", "IF3 DAC R", "IF3 DAC R" }, { "DAC3 R Mux", "IF4 DAC R", "IF4 DAC R" }, { "DAC3 R Mux", "SLB DAC 5", "SLB DAC5" }, { "DAC3 R Mux", "OB 5", "OutBound5" }, - { "DAC4 L Mux", "IF1 DAC 6", "IF1 DAC6" }, - { "DAC4 L Mux", "IF2 DAC 6", "IF2 DAC6" }, + { "DAC4 L Mux", "IF1 DAC 6", "IF1 DAC6 Mux" }, + { "DAC4 L Mux", "IF2 DAC 6", "IF2 DAC6 Mux" }, { "DAC4 L Mux", "IF3 DAC L", "IF3 DAC L" }, { "DAC4 L Mux", "IF4 DAC L", "IF4 DAC L" }, { "DAC4 L Mux", "SLB DAC 6", "SLB DAC6" }, { "DAC4 L Mux", "OB 6", "OutBound6" }, - { "DAC4 R Mux", "IF1 DAC 7", "IF1 DAC7" }, - { "DAC4 R Mux", "IF2 DAC 7", "IF2 DAC7" }, + { "DAC4 R Mux", "IF1 DAC 7", "IF1 DAC7 Mux" }, + { "DAC4 R Mux", "IF2 DAC 7", "IF2 DAC7 Mux" }, { "DAC4 R Mux", "IF3 DAC R", "IF3 DAC R" }, { "DAC4 R Mux", "IF4 DAC R", "IF4 DAC R" }, { "DAC4 R Mux", "SLB DAC 7", "SLB DAC7" }, diff --git a/sound/soc/codecs/sgtl5000.c b/sound/soc/codecs/sgtl5000.c index e182e6569bbd..3593a1496056 100644 --- a/sound/soc/codecs/sgtl5000.c +++ b/sound/soc/codecs/sgtl5000.c @@ -1151,13 +1151,7 @@ static int sgtl5000_set_power_regs(struct snd_soc_codec *codec) /* Enable VDDC charge pump */ ana_pwr |= SGTL5000_VDDC_CHRGPMP_POWERUP; } else if (vddio >= 3100 && vdda >= 3100) { - /* - * if vddio and vddd > 3.1v, - * charge pump should be clean before set ana_pwr - */ - snd_soc_update_bits(codec, SGTL5000_CHIP_ANA_POWER, - SGTL5000_VDDC_CHRGPMP_POWERUP, 0); - + ana_pwr &= ~SGTL5000_VDDC_CHRGPMP_POWERUP; /* VDDC use VDDIO rail */ lreg_ctrl |= SGTL5000_VDDC_ASSN_OVRD; lreg_ctrl |= SGTL5000_VDDC_MAN_ASSN_VDDIO << diff --git a/sound/soc/codecs/sn95031.c b/sound/soc/codecs/sn95031.c index 47b257e41809..82095d6cd070 100644 --- a/sound/soc/codecs/sn95031.c +++ b/sound/soc/codecs/sn95031.c @@ -538,8 +538,8 @@ static const struct snd_soc_dapm_route sn95031_audio_map[] = { /* speaker map */ { "IHFOUTL", NULL, "Speaker Rail"}, { "IHFOUTR", NULL, "Speaker Rail"}, - { "IHFOUTL", "NULL", "Speaker Left Playback"}, - { "IHFOUTR", "NULL", "Speaker Right Playback"}, + { "IHFOUTL", NULL, "Speaker Left Playback"}, + { "IHFOUTR", NULL, "Speaker Right Playback"}, { "Speaker Left Playback", NULL, "Speaker Left Filter"}, { "Speaker Right Playback", NULL, "Speaker Right Filter"}, { "Speaker Left Filter", NULL, "IHFDAC Left"}, diff --git a/sound/soc/codecs/sta32x.c b/sound/soc/codecs/sta32x.c index 3a1343fa109b..007a0e3bc273 100644 --- a/sound/soc/codecs/sta32x.c +++ b/sound/soc/codecs/sta32x.c @@ -106,13 +106,11 @@ static const struct reg_default sta32x_regs[] = { }; static const struct regmap_range sta32x_write_regs_range[] = { - regmap_reg_range(STA32X_CONFA, STA32X_AUTO2), - regmap_reg_range(STA32X_C1CFG, STA32X_FDRC2), + regmap_reg_range(STA32X_CONFA, STA32X_FDRC2), }; static const struct regmap_range sta32x_read_regs_range[] = { - regmap_reg_range(STA32X_CONFA, STA32X_AUTO2), - regmap_reg_range(STA32X_C1CFG, STA32X_FDRC2), + regmap_reg_range(STA32X_CONFA, STA32X_FDRC2), }; static const struct regmap_range sta32x_volatile_regs_range[] = { diff --git a/sound/soc/codecs/tas5086.c b/sound/soc/codecs/tas5086.c index 249ef5c4c762..32942bed34b1 100644 --- a/sound/soc/codecs/tas5086.c +++ b/sound/soc/codecs/tas5086.c @@ -281,7 +281,7 @@ static int tas5086_get_deemph(struct snd_kcontrol *kcontrol, struct snd_soc_codec *codec = snd_soc_kcontrol_codec(kcontrol); struct tas5086_private *priv = snd_soc_codec_get_drvdata(codec); - ucontrol->value.enumerated.item[0] = priv->deemph; + ucontrol->value.integer.value[0] = priv->deemph; return 0; } @@ -292,7 +292,7 @@ static int tas5086_put_deemph(struct snd_kcontrol *kcontrol, struct snd_soc_codec *codec = snd_soc_kcontrol_codec(kcontrol); struct tas5086_private *priv = snd_soc_codec_get_drvdata(codec); - priv->deemph = ucontrol->value.enumerated.item[0]; + priv->deemph = ucontrol->value.integer.value[0]; return tas5086_set_deemph(codec); } diff --git a/sound/soc/codecs/wm2000.c b/sound/soc/codecs/wm2000.c index 8d9de49a5052..21d5402e343f 100644 --- a/sound/soc/codecs/wm2000.c +++ b/sound/soc/codecs/wm2000.c @@ -610,7 +610,7 @@ static int wm2000_anc_mode_get(struct snd_kcontrol *kcontrol, struct snd_soc_codec *codec = snd_soc_kcontrol_codec(kcontrol); struct wm2000_priv *wm2000 = dev_get_drvdata(codec->dev); - ucontrol->value.enumerated.item[0] = wm2000->anc_active; + ucontrol->value.integer.value[0] = wm2000->anc_active; return 0; } @@ -620,7 +620,7 @@ static int wm2000_anc_mode_put(struct snd_kcontrol *kcontrol, { struct snd_soc_codec *codec = snd_soc_kcontrol_codec(kcontrol); struct wm2000_priv *wm2000 = dev_get_drvdata(codec->dev); - int anc_active = ucontrol->value.enumerated.item[0]; + int anc_active = ucontrol->value.integer.value[0]; int ret; if (anc_active > 1) @@ -643,7 +643,7 @@ static int wm2000_speaker_get(struct snd_kcontrol *kcontrol, struct snd_soc_codec *codec = snd_soc_kcontrol_codec(kcontrol); struct wm2000_priv *wm2000 = dev_get_drvdata(codec->dev); - ucontrol->value.enumerated.item[0] = wm2000->spk_ena; + ucontrol->value.integer.value[0] = wm2000->spk_ena; return 0; } @@ -653,7 +653,7 @@ static int wm2000_speaker_put(struct snd_kcontrol *kcontrol, { struct snd_soc_codec *codec = snd_soc_kcontrol_codec(kcontrol); struct wm2000_priv *wm2000 = dev_get_drvdata(codec->dev); - int val = ucontrol->value.enumerated.item[0]; + int val = ucontrol->value.integer.value[0]; int ret; if (val > 1) diff --git a/sound/soc/codecs/wm8731.c b/sound/soc/codecs/wm8731.c index 098c143f44d6..c6d10533e2bd 100644 --- a/sound/soc/codecs/wm8731.c +++ b/sound/soc/codecs/wm8731.c @@ -125,7 +125,7 @@ static int wm8731_get_deemph(struct snd_kcontrol *kcontrol, struct snd_soc_codec *codec = snd_soc_kcontrol_codec(kcontrol); struct wm8731_priv *wm8731 = snd_soc_codec_get_drvdata(codec); - ucontrol->value.enumerated.item[0] = wm8731->deemph; + ucontrol->value.integer.value[0] = wm8731->deemph; return 0; } @@ -135,7 +135,7 @@ static int wm8731_put_deemph(struct snd_kcontrol *kcontrol, { struct snd_soc_codec *codec = snd_soc_kcontrol_codec(kcontrol); struct wm8731_priv *wm8731 = snd_soc_codec_get_drvdata(codec); - int deemph = ucontrol->value.enumerated.item[0]; + int deemph = ucontrol->value.integer.value[0]; int ret = 0; if (deemph > 1) diff --git a/sound/soc/codecs/wm8903.c b/sound/soc/codecs/wm8903.c index dde462c082be..04b04f8e147c 100644 --- a/sound/soc/codecs/wm8903.c +++ b/sound/soc/codecs/wm8903.c @@ -442,7 +442,7 @@ static int wm8903_get_deemph(struct snd_kcontrol *kcontrol, struct snd_soc_codec *codec = snd_soc_kcontrol_codec(kcontrol); struct wm8903_priv *wm8903 = snd_soc_codec_get_drvdata(codec); - ucontrol->value.enumerated.item[0] = wm8903->deemph; + ucontrol->value.integer.value[0] = wm8903->deemph; return 0; } @@ -452,7 +452,7 @@ static int wm8903_put_deemph(struct snd_kcontrol *kcontrol, { struct snd_soc_codec *codec = snd_soc_kcontrol_codec(kcontrol); struct wm8903_priv *wm8903 = snd_soc_codec_get_drvdata(codec); - int deemph = ucontrol->value.enumerated.item[0]; + int deemph = ucontrol->value.integer.value[0]; int ret = 0; if (deemph > 1) diff --git a/sound/soc/codecs/wm8904.c b/sound/soc/codecs/wm8904.c index d3b3f57668cc..215e93c1ddf0 100644 --- a/sound/soc/codecs/wm8904.c +++ b/sound/soc/codecs/wm8904.c @@ -525,7 +525,7 @@ static int wm8904_get_deemph(struct snd_kcontrol *kcontrol, struct snd_soc_codec *codec = snd_soc_kcontrol_codec(kcontrol); struct wm8904_priv *wm8904 = snd_soc_codec_get_drvdata(codec); - ucontrol->value.enumerated.item[0] = wm8904->deemph; + ucontrol->value.integer.value[0] = wm8904->deemph; return 0; } @@ -534,7 +534,7 @@ static int wm8904_put_deemph(struct snd_kcontrol *kcontrol, { struct snd_soc_codec *codec = snd_soc_kcontrol_codec(kcontrol); struct wm8904_priv *wm8904 = snd_soc_codec_get_drvdata(codec); - int deemph = ucontrol->value.enumerated.item[0]; + int deemph = ucontrol->value.integer.value[0]; if (deemph > 1) return -EINVAL; diff --git a/sound/soc/codecs/wm8955.c b/sound/soc/codecs/wm8955.c index 1ab2d462afad..00bec915d652 100644 --- a/sound/soc/codecs/wm8955.c +++ b/sound/soc/codecs/wm8955.c @@ -393,7 +393,7 @@ static int wm8955_get_deemph(struct snd_kcontrol *kcontrol, struct snd_soc_codec *codec = snd_soc_kcontrol_codec(kcontrol); struct wm8955_priv *wm8955 = snd_soc_codec_get_drvdata(codec); - ucontrol->value.enumerated.item[0] = wm8955->deemph; + ucontrol->value.integer.value[0] = wm8955->deemph; return 0; } @@ -402,7 +402,7 @@ static int wm8955_put_deemph(struct snd_kcontrol *kcontrol, { struct snd_soc_codec *codec = snd_soc_kcontrol_codec(kcontrol); struct wm8955_priv *wm8955 = snd_soc_codec_get_drvdata(codec); - int deemph = ucontrol->value.enumerated.item[0]; + int deemph = ucontrol->value.integer.value[0]; if (deemph > 1) return -EINVAL; diff --git a/sound/soc/codecs/wm8960.c b/sound/soc/codecs/wm8960.c index cf8fecf97f2c..3035d9856415 100644 --- a/sound/soc/codecs/wm8960.c +++ b/sound/soc/codecs/wm8960.c @@ -184,7 +184,7 @@ static int wm8960_get_deemph(struct snd_kcontrol *kcontrol, struct snd_soc_codec *codec = snd_soc_kcontrol_codec(kcontrol); struct wm8960_priv *wm8960 = snd_soc_codec_get_drvdata(codec); - ucontrol->value.enumerated.item[0] = wm8960->deemph; + ucontrol->value.integer.value[0] = wm8960->deemph; return 0; } @@ -193,7 +193,7 @@ static int wm8960_put_deemph(struct snd_kcontrol *kcontrol, { struct snd_soc_codec *codec = snd_soc_kcontrol_codec(kcontrol); struct wm8960_priv *wm8960 = snd_soc_codec_get_drvdata(codec); - int deemph = ucontrol->value.enumerated.item[0]; + int deemph = ucontrol->value.integer.value[0]; if (deemph > 1) return -EINVAL; diff --git a/sound/soc/codecs/wm9712.c b/sound/soc/codecs/wm9712.c index 9517571e820d..98c9525bd751 100644 --- a/sound/soc/codecs/wm9712.c +++ b/sound/soc/codecs/wm9712.c @@ -180,7 +180,7 @@ static int wm9712_hp_mixer_put(struct snd_kcontrol *kcontrol, struct snd_soc_dapm_context *dapm = snd_soc_dapm_kcontrol_dapm(kcontrol); struct snd_soc_codec *codec = snd_soc_dapm_to_codec(dapm); struct wm9712_priv *wm9712 = snd_soc_codec_get_drvdata(codec); - unsigned int val = ucontrol->value.enumerated.item[0]; + unsigned int val = ucontrol->value.integer.value[0]; struct soc_mixer_control *mc = (struct soc_mixer_control *)kcontrol->private_value; unsigned int mixer, mask, shift, old; @@ -193,7 +193,7 @@ static int wm9712_hp_mixer_put(struct snd_kcontrol *kcontrol, mutex_lock(&wm9712->lock); old = wm9712->hp_mixer[mixer]; - if (ucontrol->value.enumerated.item[0]) + if (ucontrol->value.integer.value[0]) wm9712->hp_mixer[mixer] |= mask; else wm9712->hp_mixer[mixer] &= ~mask; @@ -231,7 +231,7 @@ static int wm9712_hp_mixer_get(struct snd_kcontrol *kcontrol, mixer = mc->shift >> 8; shift = mc->shift & 0xff; - ucontrol->value.enumerated.item[0] = + ucontrol->value.integer.value[0] = (wm9712->hp_mixer[mixer] >> shift) & 1; return 0; diff --git a/sound/soc/codecs/wm9713.c b/sound/soc/codecs/wm9713.c index 68222917b396..79552953e1bd 100644 --- a/sound/soc/codecs/wm9713.c +++ b/sound/soc/codecs/wm9713.c @@ -255,7 +255,7 @@ static int wm9713_hp_mixer_put(struct snd_kcontrol *kcontrol, struct snd_soc_dapm_context *dapm = snd_soc_dapm_kcontrol_dapm(kcontrol); struct snd_soc_codec *codec = snd_soc_dapm_to_codec(dapm); struct wm9713_priv *wm9713 = snd_soc_codec_get_drvdata(codec); - unsigned int val = ucontrol->value.enumerated.item[0]; + unsigned int val = ucontrol->value.integer.value[0]; struct soc_mixer_control *mc = (struct soc_mixer_control *)kcontrol->private_value; unsigned int mixer, mask, shift, old; @@ -268,7 +268,7 @@ static int wm9713_hp_mixer_put(struct snd_kcontrol *kcontrol, mutex_lock(&wm9713->lock); old = wm9713->hp_mixer[mixer]; - if (ucontrol->value.enumerated.item[0]) + if (ucontrol->value.integer.value[0]) wm9713->hp_mixer[mixer] |= mask; else wm9713->hp_mixer[mixer] &= ~mask; @@ -306,7 +306,7 @@ static int wm9713_hp_mixer_get(struct snd_kcontrol *kcontrol, mixer = mc->shift >> 8; shift = mc->shift & 0xff; - ucontrol->value.enumerated.item[0] = + ucontrol->value.integer.value[0] = (wm9713->hp_mixer[mixer] >> shift) & 1; return 0; diff --git a/sound/soc/fsl/fsl_spdif.c b/sound/soc/fsl/fsl_spdif.c index 75870c0ea2c9..91eb3aef7f02 100644 --- a/sound/soc/fsl/fsl_spdif.c +++ b/sound/soc/fsl/fsl_spdif.c @@ -1049,7 +1049,7 @@ static u32 fsl_spdif_txclk_caldiv(struct fsl_spdif_priv *spdif_priv, enum spdif_txrate index, bool round) { const u32 rate[] = { 32000, 44100, 48000, 96000, 192000 }; - bool is_sysclk = clk == spdif_priv->sysclk; + bool is_sysclk = clk_is_match(clk, spdif_priv->sysclk); u64 rate_ideal, rate_actual, sub; u32 sysclk_dfmin, sysclk_dfmax; u32 txclk_df, sysclk_df, arate; @@ -1143,7 +1143,7 @@ static int fsl_spdif_probe_txclk(struct fsl_spdif_priv *spdif_priv, spdif_priv->txclk_src[index], rate[index]); dev_dbg(&pdev->dev, "use txclk df %d for %dHz sample rate\n", spdif_priv->txclk_df[index], rate[index]); - if (spdif_priv->txclk[index] == spdif_priv->sysclk) + if (clk_is_match(spdif_priv->txclk[index], spdif_priv->sysclk)) dev_dbg(&pdev->dev, "use sysclk df %d for %dHz sample rate\n", spdif_priv->sysclk_df[index], rate[index]); dev_dbg(&pdev->dev, "the best rate for %dHz sample rate is %dHz\n", diff --git a/sound/soc/fsl/fsl_ssi.c b/sound/soc/fsl/fsl_ssi.c index 2595611e8a6d..6b0c8f717ec2 100644 --- a/sound/soc/fsl/fsl_ssi.c +++ b/sound/soc/fsl/fsl_ssi.c @@ -603,17 +603,20 @@ static int fsl_ssi_set_bclk(struct snd_pcm_substream *substream, factor = (div2 + 1) * (7 * psr + 1) * 2; for (i = 0; i < 255; i++) { - /* The bclk rate must be smaller than 1/5 sysclk rate */ - if (factor * (i + 1) < 5) - continue; - - tmprate = freq * factor * (i + 2); + tmprate = freq * factor * (i + 1); if (baudclk_is_used) clkrate = clk_get_rate(ssi_private->baudclk); else clkrate = clk_round_rate(ssi_private->baudclk, tmprate); + /* + * Hardware limitation: The bclk rate must be + * never greater than 1/5 IPG clock rate + */ + if (clkrate * 5 > clk_get_rate(ssi_private->clk)) + continue; + clkrate /= factor; afreq = clkrate / (i + 1); @@ -1224,7 +1227,7 @@ static int fsl_ssi_imx_probe(struct platform_device *pdev, ssi_private->dma_params_tx.addr = ssi_private->ssi_phys + CCSR_SSI_STX0; ssi_private->dma_params_rx.addr = ssi_private->ssi_phys + CCSR_SSI_SRX0; - ret = !of_property_read_u32_array(np, "dmas", dmas, 4); + ret = of_property_read_u32_array(np, "dmas", dmas, 4); if (ssi_private->use_dma && !ret && dmas[2] == IMX_DMATYPE_SSI_DUAL) { ssi_private->use_dual_fifo = true; /* When using dual fifo mode, we need to keep watermark diff --git a/sound/soc/generic/simple-card.c b/sound/soc/generic/simple-card.c index f7c6734bd5da..fb550b5869d2 100644 --- a/sound/soc/generic/simple-card.c +++ b/sound/soc/generic/simple-card.c @@ -372,6 +372,11 @@ static int asoc_simple_card_dai_link_of(struct device_node *node, strlen(dai_link->cpu_dai_name) + strlen(dai_link->codec_dai_name) + 2, GFP_KERNEL); + if (!name) { + ret = -ENOMEM; + goto dai_link_of_err; + } + sprintf(name, "%s-%s", dai_link->cpu_dai_name, dai_link->codec_dai_name); dai_link->name = dai_link->stream_name = name; diff --git a/sound/soc/intel/sst-atom-controls.h b/sound/soc/intel/sst-atom-controls.h index dfebfdd5eb2a..daecc58f28af 100644 --- a/sound/soc/intel/sst-atom-controls.h +++ b/sound/soc/intel/sst-atom-controls.h @@ -150,7 +150,7 @@ enum sst_cmd_type { enum sst_task { SST_TASK_SBA = 1, - SST_TASK_MMX, + SST_TASK_MMX = 3, }; enum sst_type { diff --git a/sound/soc/intel/sst-haswell-dsp.c b/sound/soc/intel/sst-haswell-dsp.c index c42ffae5fe9f..402b728c0a06 100644 --- a/sound/soc/intel/sst-haswell-dsp.c +++ b/sound/soc/intel/sst-haswell-dsp.c @@ -207,9 +207,6 @@ static int hsw_parse_fw_image(struct sst_fw *sst_fw) module = (void *)module + sizeof(*module) + module->mod_size; } - /* allocate scratch mem regions */ - sst_block_alloc_scratch(dsp); - return 0; } diff --git a/sound/soc/intel/sst-haswell-ipc.c b/sound/soc/intel/sst-haswell-ipc.c index 394af5684c05..863a9ca34b8e 100644 --- a/sound/soc/intel/sst-haswell-ipc.c +++ b/sound/soc/intel/sst-haswell-ipc.c @@ -1732,6 +1732,7 @@ static void sst_hsw_drop_all(struct sst_hsw *hsw) int sst_hsw_dsp_load(struct sst_hsw *hsw) { struct sst_dsp *dsp = hsw->dsp; + struct sst_fw *sst_fw, *t; int ret; dev_dbg(hsw->dev, "loading audio DSP...."); @@ -1748,12 +1749,17 @@ int sst_hsw_dsp_load(struct sst_hsw *hsw) return ret; } - ret = sst_fw_reload(hsw->sst_fw); - if (ret < 0) { - dev_err(hsw->dev, "error: SST FW reload failed\n"); - sst_dsp_dma_put_channel(dsp); - return -ENOMEM; + list_for_each_entry_safe_reverse(sst_fw, t, &dsp->fw_list, list) { + ret = sst_fw_reload(sst_fw); + if (ret < 0) { + dev_err(hsw->dev, "error: SST FW reload failed\n"); + sst_dsp_dma_put_channel(dsp); + return -ENOMEM; + } } + ret = sst_block_alloc_scratch(hsw->dsp); + if (ret < 0) + return -EINVAL; sst_dsp_dma_put_channel(dsp); return 0; @@ -1809,12 +1815,17 @@ int sst_hsw_dsp_runtime_suspend(struct sst_hsw *hsw) int sst_hsw_dsp_runtime_sleep(struct sst_hsw *hsw) { - sst_fw_unload(hsw->sst_fw); - sst_block_free_scratch(hsw->dsp); + struct sst_fw *sst_fw, *t; + struct sst_dsp *dsp = hsw->dsp; + + list_for_each_entry_safe(sst_fw, t, &dsp->fw_list, list) { + sst_fw_unload(sst_fw); + } + sst_block_free_scratch(dsp); hsw->boot_complete = false; - sst_dsp_sleep(hsw->dsp); + sst_dsp_sleep(dsp); return 0; } @@ -1943,6 +1954,11 @@ int sst_hsw_dsp_init(struct device *dev, struct sst_pdata *pdata) goto fw_err; } + /* allocate scratch mem regions */ + ret = sst_block_alloc_scratch(hsw->dsp); + if (ret < 0) + goto boot_err; + /* wait for DSP boot completion */ sst_dsp_boot(hsw->dsp); ret = wait_event_timeout(hsw->boot_wait, hsw->boot_complete, diff --git a/sound/soc/intel/sst/sst.c b/sound/soc/intel/sst/sst.c index 8a8d56a146e7..11c578651c1c 100644 --- a/sound/soc/intel/sst/sst.c +++ b/sound/soc/intel/sst/sst.c @@ -350,7 +350,9 @@ static inline void sst_save_shim64(struct intel_sst_drv *ctx, spin_lock_irqsave(&ctx->ipc_spin_lock, irq_flags); - shim_regs->imrx = sst_shim_read64(shim, SST_IMRX), + shim_regs->imrx = sst_shim_read64(shim, SST_IMRX); + shim_regs->csr = sst_shim_read64(shim, SST_CSR); + spin_unlock_irqrestore(&ctx->ipc_spin_lock, irq_flags); } @@ -367,6 +369,7 @@ static inline void sst_restore_shim64(struct intel_sst_drv *ctx, */ spin_lock_irqsave(&ctx->ipc_spin_lock, irq_flags); sst_shim_write64(shim, SST_IMRX, shim_regs->imrx), + sst_shim_write64(shim, SST_CSR, shim_regs->csr), spin_unlock_irqrestore(&ctx->ipc_spin_lock, irq_flags); } @@ -379,6 +382,10 @@ void sst_configure_runtime_pm(struct intel_sst_drv *ctx) * initially active. So change the state to active before * enabling the pm */ + + if (!acpi_disabled) + pm_runtime_set_active(ctx->dev); + pm_runtime_enable(ctx->dev); if (acpi_disabled) @@ -409,6 +416,7 @@ static int intel_sst_runtime_suspend(struct device *dev) synchronize_irq(ctx->irq_num); flush_workqueue(ctx->post_msg_wq); + ctx->ops->reset(ctx); /* save the shim registers because PMC doesn't save state */ sst_save_shim64(ctx, ctx->shim, ctx->shim_regs64); diff --git a/sound/soc/kirkwood/kirkwood-i2s.c b/sound/soc/kirkwood/kirkwood-i2s.c index def7d8260c4e..d19483081f9b 100644 --- a/sound/soc/kirkwood/kirkwood-i2s.c +++ b/sound/soc/kirkwood/kirkwood-i2s.c @@ -579,7 +579,7 @@ static int kirkwood_i2s_dev_probe(struct platform_device *pdev) if (PTR_ERR(priv->extclk) == -EPROBE_DEFER) return -EPROBE_DEFER; } else { - if (priv->extclk == priv->clk) { + if (clk_is_match(priv->extclk, priv->clk)) { devm_clk_put(&pdev->dev, priv->extclk); priv->extclk = ERR_PTR(-EINVAL); } else { diff --git a/sound/soc/omap/omap-hdmi-audio.c b/sound/soc/omap/omap-hdmi-audio.c index ccfb41c22e53..f7eb42aa3f38 100644 --- a/sound/soc/omap/omap-hdmi-audio.c +++ b/sound/soc/omap/omap-hdmi-audio.c @@ -352,6 +352,9 @@ static int omap_hdmi_audio_probe(struct platform_device *pdev) return ret; card = devm_kzalloc(dev, sizeof(*card), GFP_KERNEL); + if (!card) + return -ENOMEM; + card->name = devm_kasprintf(dev, GFP_KERNEL, "HDMI %s", dev_name(ad->dssdev)); card->owner = THIS_MODULE; diff --git a/sound/soc/omap/omap-mcbsp.c b/sound/soc/omap/omap-mcbsp.c index c7eb9dd67f60..fd99d89de6a8 100644 --- a/sound/soc/omap/omap-mcbsp.c +++ b/sound/soc/omap/omap-mcbsp.c @@ -530,8 +530,19 @@ static int omap_mcbsp_dai_set_dai_sysclk(struct snd_soc_dai *cpu_dai, case OMAP_MCBSP_SYSCLK_CLKX_EXT: regs->srgr2 |= CLKSM; + regs->pcr0 |= SCLKME; + /* + * If McBSP is master but yet the CLKX/CLKR pin drives the SRG, + * disable output on those pins. This enables to inject the + * reference clock through CLKX/CLKR. For this to work + * set_dai_sysclk() _needs_ to be called after set_dai_fmt(). + */ + regs->pcr0 &= ~CLKXM; + break; case OMAP_MCBSP_SYSCLK_CLKR_EXT: regs->pcr0 |= SCLKME; + /* Disable ouput on CLKR pin in master mode */ + regs->pcr0 &= ~CLKRM; break; default: err = -ENODEV; diff --git a/sound/soc/omap/omap-pcm.c b/sound/soc/omap/omap-pcm.c index f4b05bc23e4b..1343ecbf0bd5 100644 --- a/sound/soc/omap/omap-pcm.c +++ b/sound/soc/omap/omap-pcm.c @@ -201,7 +201,7 @@ static int omap_pcm_new(struct snd_soc_pcm_runtime *rtd) struct snd_pcm *pcm = rtd->pcm; int ret; - ret = dma_coerce_mask_and_coherent(card->dev, DMA_BIT_MASK(64)); + ret = dma_coerce_mask_and_coherent(card->dev, DMA_BIT_MASK(32)); if (ret) return ret; diff --git a/sound/soc/samsung/Kconfig b/sound/soc/samsung/Kconfig index 3cebf6ca03df..0632a36852c8 100644 --- a/sound/soc/samsung/Kconfig +++ b/sound/soc/samsung/Kconfig @@ -174,7 +174,7 @@ config SND_SOC_SMDK_WM8994_PCM config SND_SOC_SPEYSIDE tristate "Audio support for Wolfson Speyside" - depends on SND_SOC_SAMSUNG && MACH_WLF_CRAGG_6410 + depends on SND_SOC_SAMSUNG && MACH_WLF_CRAGG_6410 && I2C && SPI_MASTER select SND_SAMSUNG_I2S select SND_SOC_WM8996 select SND_SOC_WM9081 @@ -189,7 +189,7 @@ config SND_SOC_TOBERMORY config SND_SOC_BELLS tristate "Audio support for Wolfson Bells" - depends on SND_SOC_SAMSUNG && MACH_WLF_CRAGG_6410 && MFD_ARIZONA + depends on SND_SOC_SAMSUNG && MACH_WLF_CRAGG_6410 && MFD_ARIZONA && I2C && SPI_MASTER select SND_SAMSUNG_I2S select SND_SOC_WM5102 select SND_SOC_WM5110 @@ -206,7 +206,7 @@ config SND_SOC_LOWLAND config SND_SOC_LITTLEMILL tristate "Audio support for Wolfson Littlemill" - depends on SND_SOC_SAMSUNG && MACH_WLF_CRAGG_6410 + depends on SND_SOC_SAMSUNG && MACH_WLF_CRAGG_6410 && I2C select SND_SAMSUNG_I2S select MFD_WM8994 select SND_SOC_WM8994 @@ -223,7 +223,7 @@ config SND_SOC_SNOW config SND_SOC_ODROIDX2 tristate "Audio support for Odroid-X2 and Odroid-U3" - depends on SND_SOC_SAMSUNG + depends on SND_SOC_SAMSUNG && I2C select SND_SOC_MAX98090 select SND_SAMSUNG_I2S help @@ -231,6 +231,6 @@ config SND_SOC_ODROIDX2 config SND_SOC_ARNDALE_RT5631_ALC5631 tristate "Audio support for RT5631(ALC5631) on Arndale Board" - depends on SND_SOC_SAMSUNG + depends on SND_SOC_SAMSUNG && I2C select SND_SAMSUNG_I2S select SND_SOC_RT5631 diff --git a/sound/soc/sh/rcar/core.c b/sound/soc/sh/rcar/core.c index 1b53605f7154..110577c52317 100644 --- a/sound/soc/sh/rcar/core.c +++ b/sound/soc/sh/rcar/core.c @@ -1252,6 +1252,8 @@ static int rsnd_probe(struct platform_device *pdev) goto exit_snd_probe; } + dev_set_drvdata(dev, priv); + /* * asoc register */ @@ -1268,8 +1270,6 @@ static int rsnd_probe(struct platform_device *pdev) goto exit_snd_soc; } - dev_set_drvdata(dev, priv); - pm_runtime_enable(dev); dev_info(dev, "probed\n"); diff --git a/sound/soc/soc-core.c b/sound/soc/soc-core.c index 30579ca5bacb..e5c990889dcc 100644 --- a/sound/soc/soc-core.c +++ b/sound/soc/soc-core.c @@ -347,6 +347,8 @@ static ssize_t codec_list_read_file(struct file *file, char __user *user_buf, if (!buf) return -ENOMEM; + mutex_lock(&client_mutex); + list_for_each_entry(codec, &codec_list, list) { len = snprintf(buf + ret, PAGE_SIZE - ret, "%s\n", codec->component.name); @@ -358,6 +360,8 @@ static ssize_t codec_list_read_file(struct file *file, char __user *user_buf, } } + mutex_unlock(&client_mutex); + if (ret >= 0) ret = simple_read_from_buffer(user_buf, count, ppos, buf, ret); @@ -382,6 +386,8 @@ static ssize_t dai_list_read_file(struct file *file, char __user *user_buf, if (!buf) return -ENOMEM; + mutex_lock(&client_mutex); + list_for_each_entry(component, &component_list, list) { list_for_each_entry(dai, &component->dai_list, list) { len = snprintf(buf + ret, PAGE_SIZE - ret, "%s\n", @@ -395,6 +401,8 @@ static ssize_t dai_list_read_file(struct file *file, char __user *user_buf, } } + mutex_unlock(&client_mutex); + ret = simple_read_from_buffer(user_buf, count, ppos, buf, ret); kfree(buf); @@ -418,6 +426,8 @@ static ssize_t platform_list_read_file(struct file *file, if (!buf) return -ENOMEM; + mutex_lock(&client_mutex); + list_for_each_entry(platform, &platform_list, list) { len = snprintf(buf + ret, PAGE_SIZE - ret, "%s\n", platform->component.name); @@ -429,6 +439,8 @@ static ssize_t platform_list_read_file(struct file *file, } } + mutex_unlock(&client_mutex); + ret = simple_read_from_buffer(user_buf, count, ppos, buf, ret); kfree(buf); @@ -836,6 +848,8 @@ static struct snd_soc_component *soc_find_component( { struct snd_soc_component *component; + lockdep_assert_held(&client_mutex); + list_for_each_entry(component, &component_list, list) { if (of_node) { if (component->dev->of_node == of_node) @@ -854,6 +868,8 @@ static struct snd_soc_dai *snd_soc_find_dai( struct snd_soc_component *component; struct snd_soc_dai *dai; + lockdep_assert_held(&client_mutex); + /* Find CPU DAI from registered DAIs*/ list_for_each_entry(component, &component_list, list) { if (dlc->of_node && component->dev->of_node != dlc->of_node) @@ -1508,6 +1524,7 @@ static int snd_soc_instantiate_card(struct snd_soc_card *card) struct snd_soc_codec *codec; int ret, i, order; + mutex_lock(&client_mutex); mutex_lock_nested(&card->mutex, SND_SOC_CARD_CLASS_INIT); /* bind DAIs */ @@ -1662,6 +1679,7 @@ static int snd_soc_instantiate_card(struct snd_soc_card *card) card->instantiated = 1; snd_soc_dapm_sync(&card->dapm); mutex_unlock(&card->mutex); + mutex_unlock(&client_mutex); return 0; @@ -1680,6 +1698,7 @@ card_probe_error: base_error: mutex_unlock(&card->mutex); + mutex_unlock(&client_mutex); return ret; } @@ -2713,13 +2732,6 @@ static void snd_soc_component_del_unlocked(struct snd_soc_component *component) list_del(&component->list); } -static void snd_soc_component_del(struct snd_soc_component *component) -{ - mutex_lock(&client_mutex); - snd_soc_component_del_unlocked(component); - mutex_unlock(&client_mutex); -} - int snd_soc_register_component(struct device *dev, const struct snd_soc_component_driver *cmpnt_drv, struct snd_soc_dai_driver *dai_drv, @@ -2767,14 +2779,17 @@ void snd_soc_unregister_component(struct device *dev) { struct snd_soc_component *cmpnt; + mutex_lock(&client_mutex); list_for_each_entry(cmpnt, &component_list, list) { if (dev == cmpnt->dev && cmpnt->registered_as_component) goto found; } + mutex_unlock(&client_mutex); return; found: - snd_soc_component_del(cmpnt); + snd_soc_component_del_unlocked(cmpnt); + mutex_unlock(&client_mutex); snd_soc_component_cleanup(cmpnt); kfree(cmpnt); } @@ -2882,10 +2897,14 @@ struct snd_soc_platform *snd_soc_lookup_platform(struct device *dev) { struct snd_soc_platform *platform; + mutex_lock(&client_mutex); list_for_each_entry(platform, &platform_list, list) { - if (dev == platform->dev) + if (dev == platform->dev) { + mutex_unlock(&client_mutex); return platform; + } } + mutex_unlock(&client_mutex); return NULL; } @@ -3090,15 +3109,15 @@ void snd_soc_unregister_codec(struct device *dev) { struct snd_soc_codec *codec; + mutex_lock(&client_mutex); list_for_each_entry(codec, &codec_list, list) { if (dev == codec->dev) goto found; } + mutex_unlock(&client_mutex); return; found: - - mutex_lock(&client_mutex); list_del(&codec->list); snd_soc_component_del_unlocked(&codec->component); mutex_unlock(&client_mutex); diff --git a/sound/usb/line6/playback.c b/sound/usb/line6/playback.c index 05dee690f487..97ed593f6010 100644 --- a/sound/usb/line6/playback.c +++ b/sound/usb/line6/playback.c @@ -39,7 +39,7 @@ static void change_volume(struct urb *urb_out, int volume[], for (; p < buf_end; ++p) { short pv = le16_to_cpu(*p); int val = (pv * volume[chn & 1]) >> 8; - pv = clamp(val, 0x7fff, -0x8000); + pv = clamp(val, -0x8000, 0x7fff); *p = cpu_to_le16(pv); ++chn; } @@ -54,7 +54,7 @@ static void change_volume(struct urb *urb_out, int volume[], val = p[0] + (p[1] << 8) + ((signed char)p[2] << 16); val = (val * volume[chn & 1]) >> 8; - val = clamp(val, 0x7fffff, -0x800000); + val = clamp(val, -0x800000, 0x7fffff); p[0] = val; p[1] = val >> 8; p[2] = val >> 16; @@ -126,7 +126,7 @@ static void add_monitor_signal(struct urb *urb_out, unsigned char *signal, short pov = le16_to_cpu(*po); short piv = le16_to_cpu(*pi); int val = pov + ((piv * volume) >> 8); - pov = clamp(val, 0x7fff, -0x8000); + pov = clamp(val, -0x8000, 0x7fff); *po = cpu_to_le16(pov); } } diff --git a/sound/usb/mixer_quirks.c b/sound/usb/mixer_quirks.c index dc9df007d3e3..337c317ead6f 100644 --- a/sound/usb/mixer_quirks.c +++ b/sound/usb/mixer_quirks.c @@ -192,6 +192,7 @@ static const struct rc_config { { USB_ID(0x041e, 0x3040), 2, 2, 6, 6, 2, 0x6e91 }, /* Live! 24-bit */ { USB_ID(0x041e, 0x3042), 0, 1, 1, 1, 1, 0x000d }, /* Usb X-Fi S51 */ { USB_ID(0x041e, 0x30df), 0, 1, 1, 1, 1, 0x000d }, /* Usb X-Fi S51 Pro */ + { USB_ID(0x041e, 0x3237), 0, 1, 1, 1, 1, 0x000d }, /* Usb X-Fi S51 Pro */ { USB_ID(0x041e, 0x3048), 2, 2, 6, 6, 2, 0x6e91 }, /* Toshiba SB0500 */ }; diff --git a/sound/usb/quirks-table.h b/sound/usb/quirks-table.h index 67d476548dcf..07f984d5f516 100644 --- a/sound/usb/quirks-table.h +++ b/sound/usb/quirks-table.h @@ -1773,6 +1773,36 @@ YAMAHA_DEVICE(0x7010, "UB99"), } } }, +{ + USB_DEVICE(0x0582, 0x0159), + .driver_info = (unsigned long) & (const struct snd_usb_audio_quirk) { + /* .vendor_name = "Roland", */ + /* .product_name = "UA-22", */ + .ifnum = QUIRK_ANY_INTERFACE, + .type = QUIRK_COMPOSITE, + .data = (const struct snd_usb_audio_quirk[]) { + { + .ifnum = 0, + .type = QUIRK_AUDIO_STANDARD_INTERFACE + }, + { + .ifnum = 1, + .type = QUIRK_AUDIO_STANDARD_INTERFACE + }, + { + .ifnum = 2, + .type = QUIRK_MIDI_FIXED_ENDPOINT, + .data = & (const struct snd_usb_midi_endpoint_info) { + .out_cables = 0x0001, + .in_cables = 0x0001 + } + }, + { + .ifnum = -1 + } + } + } +}, /* this catches most recent vendor-specific Roland devices */ { .match_flags = USB_DEVICE_ID_MATCH_VENDOR | diff --git a/sound/usb/quirks.c b/sound/usb/quirks.c index 753a47de8459..9a28365126f9 100644 --- a/sound/usb/quirks.c +++ b/sound/usb/quirks.c @@ -1113,8 +1113,13 @@ void snd_usb_set_format_quirk(struct snd_usb_substream *subs, bool snd_usb_get_sample_rate_quirk(struct snd_usb_audio *chip) { - /* MS Lifecam HD-5000 doesn't support reading the sample rate. */ - return chip->usb_id == USB_ID(0x045E, 0x076D); + /* devices which do not support reading the sample rate. */ + switch (chip->usb_id) { + case USB_ID(0x045E, 0x076D): /* MS Lifecam HD-5000 */ + case USB_ID(0x04D8, 0xFEEA): /* Benchmark DAC1 Pre */ + return true; + } + return false; } /* Marantz/Denon USB DACs need a vendor cmd to switch diff --git a/tools/perf/bench/mem-memcpy-x86-64-asm-def.h b/tools/perf/bench/mem-memcpy-x86-64-asm-def.h index d66ab799b35f..8c0c1a2770c8 100644 --- a/tools/perf/bench/mem-memcpy-x86-64-asm-def.h +++ b/tools/perf/bench/mem-memcpy-x86-64-asm-def.h @@ -1,12 +1,12 @@ -MEMCPY_FN(__memcpy, +MEMCPY_FN(memcpy_orig, "x86-64-unrolled", "unrolled memcpy() in arch/x86/lib/memcpy_64.S") -MEMCPY_FN(memcpy_c, +MEMCPY_FN(__memcpy, "x86-64-movsq", "movsq-based memcpy() in arch/x86/lib/memcpy_64.S") -MEMCPY_FN(memcpy_c_e, +MEMCPY_FN(memcpy_erms, "x86-64-movsb", "movsb-based memcpy() in arch/x86/lib/memcpy_64.S") diff --git a/tools/perf/bench/mem-memcpy-x86-64-asm.S b/tools/perf/bench/mem-memcpy-x86-64-asm.S index fcd9cf00600a..e4c2c30143b9 100644 --- a/tools/perf/bench/mem-memcpy-x86-64-asm.S +++ b/tools/perf/bench/mem-memcpy-x86-64-asm.S @@ -1,8 +1,6 @@ #define memcpy MEMCPY /* don't hide glibc's memcpy() */ #define altinstr_replacement text #define globl p2align 4; .globl -#define Lmemcpy_c globl memcpy_c; memcpy_c -#define Lmemcpy_c_e globl memcpy_c_e; memcpy_c_e #include "../../../arch/x86/lib/memcpy_64.S" /* * We need to provide note.GNU-stack section, saying that we want diff --git a/tools/perf/bench/mem-memcpy.c b/tools/perf/bench/mem-memcpy.c index db1d3a29d97f..d3dfb7936dcd 100644 --- a/tools/perf/bench/mem-memcpy.c +++ b/tools/perf/bench/mem-memcpy.c @@ -36,7 +36,7 @@ static const struct option options[] = { "Specify length of memory to copy. " "Available units: B, KB, MB, GB and TB (upper and lower)"), OPT_STRING('r', "routine", &routine, "default", - "Specify routine to copy"), + "Specify routine to copy, \"all\" runs all available routines"), OPT_INTEGER('i', "iterations", &iterations, "repeat memcpy() invocation this number of times"), OPT_BOOLEAN('c', "cycle", &use_cycle, @@ -135,55 +135,16 @@ struct bench_mem_info { const char *const *usage; }; -static int bench_mem_common(int argc, const char **argv, - const char *prefix __maybe_unused, - struct bench_mem_info *info) +static void __bench_mem_routine(struct bench_mem_info *info, int r_idx, size_t len, double totallen) { - int i; - size_t len; - double totallen; + const struct routine *r = &info->routines[r_idx]; double result_bps[2]; u64 result_cycle[2]; - argc = parse_options(argc, argv, options, - info->usage, 0); - - if (no_prefault && only_prefault) { - fprintf(stderr, "Invalid options: -o and -n are mutually exclusive\n"); - return 1; - } - - if (use_cycle) - init_cycle(); - - len = (size_t)perf_atoll((char *)length_str); - totallen = (double)len * iterations; - result_cycle[0] = result_cycle[1] = 0ULL; result_bps[0] = result_bps[1] = 0.0; - if ((s64)len <= 0) { - fprintf(stderr, "Invalid length:%s\n", length_str); - return 1; - } - - /* same to without specifying either of prefault and no-prefault */ - if (only_prefault && no_prefault) - only_prefault = no_prefault = false; - - for (i = 0; info->routines[i].name; i++) { - if (!strcmp(info->routines[i].name, routine)) - break; - } - if (!info->routines[i].name) { - printf("Unknown routine:%s\n", routine); - printf("Available routines...\n"); - for (i = 0; info->routines[i].name; i++) { - printf("\t%s ... %s\n", - info->routines[i].name, info->routines[i].desc); - } - return 1; - } + printf("Routine %s (%s)\n", r->name, r->desc); if (bench_format == BENCH_FORMAT_DEFAULT) printf("# Copying %s Bytes ...\n\n", length_str); @@ -191,28 +152,17 @@ static int bench_mem_common(int argc, const char **argv, if (!only_prefault && !no_prefault) { /* show both of results */ if (use_cycle) { - result_cycle[0] = - info->do_cycle(&info->routines[i], len, false); - result_cycle[1] = - info->do_cycle(&info->routines[i], len, true); + result_cycle[0] = info->do_cycle(r, len, false); + result_cycle[1] = info->do_cycle(r, len, true); } else { - result_bps[0] = - info->do_gettimeofday(&info->routines[i], - len, false); - result_bps[1] = - info->do_gettimeofday(&info->routines[i], - len, true); + result_bps[0] = info->do_gettimeofday(r, len, false); + result_bps[1] = info->do_gettimeofday(r, len, true); } } else { - if (use_cycle) { - result_cycle[pf] = - info->do_cycle(&info->routines[i], - len, only_prefault); - } else { - result_bps[pf] = - info->do_gettimeofday(&info->routines[i], - len, only_prefault); - } + if (use_cycle) + result_cycle[pf] = info->do_cycle(r, len, only_prefault); + else + result_bps[pf] = info->do_gettimeofday(r, len, only_prefault); } switch (bench_format) { @@ -265,6 +215,60 @@ static int bench_mem_common(int argc, const char **argv, die("unknown format: %d\n", bench_format); break; } +} + +static int bench_mem_common(int argc, const char **argv, + const char *prefix __maybe_unused, + struct bench_mem_info *info) +{ + int i; + size_t len; + double totallen; + + argc = parse_options(argc, argv, options, + info->usage, 0); + + if (no_prefault && only_prefault) { + fprintf(stderr, "Invalid options: -o and -n are mutually exclusive\n"); + return 1; + } + + if (use_cycle) + init_cycle(); + + len = (size_t)perf_atoll((char *)length_str); + totallen = (double)len * iterations; + + if ((s64)len <= 0) { + fprintf(stderr, "Invalid length:%s\n", length_str); + return 1; + } + + /* same to without specifying either of prefault and no-prefault */ + if (only_prefault && no_prefault) + only_prefault = no_prefault = false; + + if (!strncmp(routine, "all", 3)) { + for (i = 0; info->routines[i].name; i++) + __bench_mem_routine(info, i, len, totallen); + return 0; + } + + for (i = 0; info->routines[i].name; i++) { + if (!strcmp(info->routines[i].name, routine)) + break; + } + if (!info->routines[i].name) { + printf("Unknown routine:%s\n", routine); + printf("Available routines...\n"); + for (i = 0; info->routines[i].name; i++) { + printf("\t%s ... %s\n", + info->routines[i].name, info->routines[i].desc); + } + return 1; + } + + __bench_mem_routine(info, i, len, totallen); return 0; } diff --git a/tools/perf/bench/mem-memset-x86-64-asm-def.h b/tools/perf/bench/mem-memset-x86-64-asm-def.h index a71dff97c1f5..f02d028771d9 100644 --- a/tools/perf/bench/mem-memset-x86-64-asm-def.h +++ b/tools/perf/bench/mem-memset-x86-64-asm-def.h @@ -1,12 +1,12 @@ -MEMSET_FN(__memset, +MEMSET_FN(memset_orig, "x86-64-unrolled", "unrolled memset() in arch/x86/lib/memset_64.S") -MEMSET_FN(memset_c, +MEMSET_FN(__memset, "x86-64-stosq", "movsq-based memset() in arch/x86/lib/memset_64.S") -MEMSET_FN(memset_c_e, +MEMSET_FN(memset_erms, "x86-64-stosb", "movsb-based memset() in arch/x86/lib/memset_64.S") diff --git a/tools/perf/bench/mem-memset-x86-64-asm.S b/tools/perf/bench/mem-memset-x86-64-asm.S index 9e5af89ed13a..de278784c866 100644 --- a/tools/perf/bench/mem-memset-x86-64-asm.S +++ b/tools/perf/bench/mem-memset-x86-64-asm.S @@ -1,8 +1,6 @@ #define memset MEMSET /* don't hide glibc's memset() */ #define altinstr_replacement text #define globl p2align 4; .globl -#define Lmemset_c globl memset_c; memset_c -#define Lmemset_c_e globl memset_c_e; memset_c_e #include "../../../arch/x86/lib/memset_64.S" /* diff --git a/tools/perf/util/annotate.c b/tools/perf/util/annotate.c index 61bf9128e1f2..9d9db3b296dd 100644 --- a/tools/perf/util/annotate.c +++ b/tools/perf/util/annotate.c @@ -30,6 +30,8 @@ static int disasm_line__parse(char *line, char **namep, char **rawp); static void ins__delete(struct ins_operands *ops) { + if (ops == NULL) + return; zfree(&ops->source.raw); zfree(&ops->source.name); zfree(&ops->target.raw); diff --git a/tools/perf/util/include/asm/alternative-asm.h b/tools/perf/util/include/asm/alternative-asm.h index 6789d788d494..3a3a0f16456a 100644 --- a/tools/perf/util/include/asm/alternative-asm.h +++ b/tools/perf/util/include/asm/alternative-asm.h @@ -4,5 +4,6 @@ /* Just disable it so we can build arch/x86/lib/memcpy_64.S for perf bench: */ #define altinstruction_entry # +#define ALTERNATIVE_2 # #endif diff --git a/tools/power/cpupower/Makefile b/tools/power/cpupower/Makefile index 3ed7c0476d48..2e2ba2efa0d9 100644 --- a/tools/power/cpupower/Makefile +++ b/tools/power/cpupower/Makefile @@ -209,7 +209,7 @@ $(OUTPUT)%.o: %.c $(OUTPUT)cpupower: $(UTIL_OBJS) $(OUTPUT)libcpupower.so.$(LIB_MAJ) $(ECHO) " CC " $@ - $(QUIET) $(CC) $(CFLAGS) $(LDFLAGS) $(UTIL_OBJS) -lcpupower -Wl,-rpath=./ -lrt -lpci -L$(OUTPUT) -o $@ + $(QUIET) $(CC) $(CFLAGS) $(LDFLAGS) $(UTIL_OBJS) -lcpupower -lrt -lpci -L$(OUTPUT) -o $@ $(QUIET) $(STRIPCMD) $@ $(OUTPUT)po/$(PACKAGE).pot: $(UTIL_SRC) diff --git a/tools/testing/selftests/Makefile b/tools/testing/selftests/Makefile index 4e511221a0c1..95abddcd7839 100644 --- a/tools/testing/selftests/Makefile +++ b/tools/testing/selftests/Makefile @@ -17,11 +17,20 @@ TARGETS += sysctl TARGETS += timers TARGETS += user TARGETS += vm +TARGETS += x86 #Please keep the TARGETS list alphabetically sorted TARGETS_HOTPLUG = cpu-hotplug TARGETS_HOTPLUG += memory-hotplug +# Clear LDFLAGS and MAKEFLAGS if called from main +# Makefile to avoid test build failures when test +# Makefile doesn't have explicit build rules. +ifeq (1,$(MAKELEVEL)) +undefine LDFLAGS +override MAKEFLAGS = +endif + all: for TARGET in $(TARGETS); do \ make -C $$TARGET; \ @@ -47,7 +56,40 @@ clean_hotplug: make -C $$TARGET clean; \ done; +INSTALL_PATH ?= install +INSTALL_PATH := $(abspath $(INSTALL_PATH)) +ALL_SCRIPT := $(INSTALL_PATH)/run_kselftest.sh + +install: +ifdef INSTALL_PATH + @# Ask all targets to install their files + mkdir -p $(INSTALL_PATH) + for TARGET in $(TARGETS); do \ + mkdir -p $(INSTALL_PATH)/$$TARGET ; \ + make -C $$TARGET INSTALL_PATH=$(INSTALL_PATH)/$$TARGET install; \ + done; + + @# Ask all targets to emit their test scripts + echo "#!/bin/bash" > $(ALL_SCRIPT) + echo "cd \$$(dirname \$$0)" >> $(ALL_SCRIPT) + echo "ROOT=\$$PWD" >> $(ALL_SCRIPT) + + for TARGET in $(TARGETS); do \ + echo "echo ; echo Running tests in $$TARGET" >> $(ALL_SCRIPT); \ + echo "echo ========================================" >> $(ALL_SCRIPT); \ + echo "cd $$TARGET" >> $(ALL_SCRIPT); \ + make -s --no-print-directory -C $$TARGET emit_tests >> $(ALL_SCRIPT); \ + echo "cd \$$ROOT" >> $(ALL_SCRIPT); \ + done; + + chmod u+x $(ALL_SCRIPT) +else + $(error Error: set INSTALL_PATH to use install) +endif + clean: for TARGET in $(TARGETS); do \ make -C $$TARGET clean; \ done; + +.PHONY: install diff --git a/tools/testing/selftests/breakpoints/Makefile b/tools/testing/selftests/breakpoints/Makefile index e18b42b254af..182235640209 100644 --- a/tools/testing/selftests/breakpoints/Makefile +++ b/tools/testing/selftests/breakpoints/Makefile @@ -16,8 +16,9 @@ else echo "Not an x86 target, can't build breakpoints selftests" endif -run_tests: - @./breakpoint_test || echo "breakpoints selftests: [FAIL]" +TEST_PROGS := breakpoint_test + +include ../lib.mk clean: rm -fr breakpoint_test diff --git a/tools/testing/selftests/cpu-hotplug/Makefile b/tools/testing/selftests/cpu-hotplug/Makefile index e9c28d8dc84b..fe1f99101c5d 100644 --- a/tools/testing/selftests/cpu-hotplug/Makefile +++ b/tools/testing/selftests/cpu-hotplug/Makefile @@ -1,9 +1,10 @@ all: -run_tests: - @/bin/bash ./on-off-test.sh || echo "cpu-hotplug selftests: [FAIL]" +TEST_PROGS := cpu-on-off-test.sh + +include ../lib.mk run_full_test: - @/bin/bash ./on-off-test.sh -a || echo "cpu-hotplug selftests: [FAIL]" + @/bin/bash ./cpu-on-off-test.sh -a || echo "cpu-hotplug selftests: [FAIL]" clean: diff --git a/tools/testing/selftests/cpu-hotplug/on-off-test.sh b/tools/testing/selftests/cpu-hotplug/cpu-on-off-test.sh index 98b1d6565f2c..98b1d6565f2c 100644..100755 --- a/tools/testing/selftests/cpu-hotplug/on-off-test.sh +++ b/tools/testing/selftests/cpu-hotplug/cpu-on-off-test.sh diff --git a/tools/testing/selftests/efivarfs/Makefile b/tools/testing/selftests/efivarfs/Makefile index 29e8c6bc81b0..736c3ddfc787 100644 --- a/tools/testing/selftests/efivarfs/Makefile +++ b/tools/testing/selftests/efivarfs/Makefile @@ -1,12 +1,13 @@ -CC = $(CROSS_COMPILE)gcc CFLAGS = -Wall test_objs = open-unlink create-read all: $(test_objs) -run_tests: all - @/bin/bash ./efivarfs.sh || echo "efivarfs selftests: [FAIL]" +TEST_PROGS := efivarfs.sh +TEST_FILES := $(test_objs) + +include ../lib.mk clean: rm -f $(test_objs) diff --git a/tools/testing/selftests/efivarfs/efivarfs.sh b/tools/testing/selftests/efivarfs/efivarfs.sh index 77edcdcc016b..77edcdcc016b 100644..100755 --- a/tools/testing/selftests/efivarfs/efivarfs.sh +++ b/tools/testing/selftests/efivarfs/efivarfs.sh diff --git a/tools/testing/selftests/exec/Makefile b/tools/testing/selftests/exec/Makefile index 66dfc2ce1788..4edb7d0da29b 100644 --- a/tools/testing/selftests/exec/Makefile +++ b/tools/testing/selftests/exec/Makefile @@ -1,4 +1,3 @@ -CC = $(CROSS_COMPILE)gcc CFLAGS = -Wall BINARIES = execveat DEPS = execveat.symlink execveat.denatured script subdir @@ -18,8 +17,12 @@ execveat.denatured: execveat %: %.c $(CC) $(CFLAGS) -o $@ $^ -run_tests: all - ./execveat +TEST_PROGS := execveat +TEST_FILES := $(DEPS) + +include ../lib.mk + +override EMIT_TESTS := echo "mkdir -p subdir; (./execveat && echo \"selftests: execveat [PASS]\") || echo \"selftests: execveat [FAIL]\"" clean: rm -rf $(BINARIES) $(DEPS) subdir.moved execveat.moved xxxxx* diff --git a/tools/testing/selftests/exec/execveat.c b/tools/testing/selftests/exec/execveat.c index e238c9559caf..8d5d1d2ee7c1 100644 --- a/tools/testing/selftests/exec/execveat.c +++ b/tools/testing/selftests/exec/execveat.c @@ -30,7 +30,7 @@ static int execveat_(int fd, const char *path, char **argv, char **envp, #ifdef __NR_execveat return syscall(__NR_execveat, fd, path, argv, envp, flags); #else - errno = -ENOSYS; + errno = ENOSYS; return -1; #endif } @@ -234,6 +234,14 @@ static int run_tests(void) int fd_cloexec = open_or_die("execveat", O_RDONLY|O_CLOEXEC); int fd_script_cloexec = open_or_die("script", O_RDONLY|O_CLOEXEC); + /* Check if we have execveat at all, and bail early if not */ + errno = 0; + execveat_(-1, NULL, NULL, NULL, 0); + if (errno == ENOSYS) { + printf("[FAIL] ENOSYS calling execveat - no kernel support?\n"); + return 1; + } + /* Change file position to confirm it doesn't affect anything */ lseek(fd, 10, SEEK_SET); diff --git a/tools/testing/selftests/firmware/Makefile b/tools/testing/selftests/firmware/Makefile index e23cce0bbc3a..9bf82234855b 100644 --- a/tools/testing/selftests/firmware/Makefile +++ b/tools/testing/selftests/firmware/Makefile @@ -3,25 +3,9 @@ # No binaries, but make sure arg-less "make" doesn't trigger "run_tests" all: -fw_filesystem: - @if /bin/sh ./fw_filesystem.sh ; then \ - echo "fw_filesystem: ok"; \ - else \ - echo "fw_filesystem: [FAIL]"; \ - exit 1; \ - fi +TEST_PROGS := fw_filesystem.sh fw_userhelper.sh -fw_userhelper: - @if /bin/sh ./fw_userhelper.sh ; then \ - echo "fw_userhelper: ok"; \ - else \ - echo "fw_userhelper: [FAIL]"; \ - exit 1; \ - fi - -run_tests: all fw_filesystem fw_userhelper +include ../lib.mk # Nothing to clean up. clean: - -.PHONY: all clean run_tests fw_filesystem fw_userhelper diff --git a/tools/testing/selftests/firmware/fw_filesystem.sh b/tools/testing/selftests/firmware/fw_filesystem.sh index 3fc6c10c2479..3fc6c10c2479 100644..100755 --- a/tools/testing/selftests/firmware/fw_filesystem.sh +++ b/tools/testing/selftests/firmware/fw_filesystem.sh diff --git a/tools/testing/selftests/firmware/fw_userhelper.sh b/tools/testing/selftests/firmware/fw_userhelper.sh index 6efbade12139..6efbade12139 100644..100755 --- a/tools/testing/selftests/firmware/fw_userhelper.sh +++ b/tools/testing/selftests/firmware/fw_userhelper.sh diff --git a/tools/testing/selftests/ftrace/Makefile b/tools/testing/selftests/ftrace/Makefile index 76cc9f156267..346720639d1d 100644 --- a/tools/testing/selftests/ftrace/Makefile +++ b/tools/testing/selftests/ftrace/Makefile @@ -1,7 +1,8 @@ all: -run_tests: - @/bin/sh ./ftracetest || echo "ftrace selftests: [FAIL]" +TEST_PROGS := ftracetest + +include ../lib.mk clean: rm -rf logs/* diff --git a/tools/testing/selftests/ftrace/test.d/00basic/basic4.tc b/tools/testing/selftests/ftrace/test.d/00basic/basic4.tc index fd9c49a13612..aa51f6c17359 100644 --- a/tools/testing/selftests/ftrace/test.d/00basic/basic4.tc +++ b/tools/testing/selftests/ftrace/test.d/00basic/basic4.tc @@ -2,4 +2,4 @@ # description: Basic event tracing check test -f available_events -a -f set_event -a -d events # check scheduler events are available -grep -q sched available_events && exit 0 || exit -1
\ No newline at end of file +grep -q sched available_events && exit 0 || exit $FAIL diff --git a/tools/testing/selftests/ftrace/test.d/event/event-enable.tc b/tools/testing/selftests/ftrace/test.d/event/event-enable.tc index 668616d9bb03..87eb9d6dd4ca 100644 --- a/tools/testing/selftests/ftrace/test.d/event/event-enable.tc +++ b/tools/testing/selftests/ftrace/test.d/event/event-enable.tc @@ -9,7 +9,11 @@ do_reset() { fail() { #msg do_reset echo $1 - exit -1 + exit $FAIL +} + +yield() { + ping localhost -c 1 || sleep .001 || usleep 1 || sleep 1 } if [ ! -f set_event -o ! -d events/sched ]; then @@ -21,7 +25,8 @@ reset_tracer do_reset echo 'sched:sched_switch' > set_event -usleep 1 + +yield count=`cat trace | grep sched_switch | wc -l` if [ $count -eq 0 ]; then @@ -31,7 +36,8 @@ fi do_reset echo 1 > events/sched/sched_switch/enable -usleep 1 + +yield count=`cat trace | grep sched_switch | wc -l` if [ $count -eq 0 ]; then @@ -41,7 +47,8 @@ fi do_reset echo 0 > events/sched/sched_switch/enable -usleep 1 + +yield count=`cat trace | grep sched_switch | wc -l` if [ $count -ne 0 ]; then diff --git a/tools/testing/selftests/ftrace/test.d/event/subsystem-enable.tc b/tools/testing/selftests/ftrace/test.d/event/subsystem-enable.tc index 655c415b6e7f..ced27ef0638f 100644 --- a/tools/testing/selftests/ftrace/test.d/event/subsystem-enable.tc +++ b/tools/testing/selftests/ftrace/test.d/event/subsystem-enable.tc @@ -9,7 +9,11 @@ do_reset() { fail() { #msg do_reset echo $1 - exit -1 + exit $FAIL +} + +yield() { + ping localhost -c 1 || sleep .001 || usleep 1 || sleep 1 } if [ ! -f set_event -o ! -d events/sched ]; then @@ -21,7 +25,8 @@ reset_tracer do_reset echo 'sched:*' > set_event -usleep 1 + +yield count=`cat trace | grep -v ^# | awk '{ print $5 }' | sort -u | wc -l` if [ $count -lt 3 ]; then @@ -31,7 +36,8 @@ fi do_reset echo 1 > events/sched/enable -usleep 1 + +yield count=`cat trace | grep -v ^# | awk '{ print $5 }' | sort -u | wc -l` if [ $count -lt 3 ]; then @@ -41,7 +47,8 @@ fi do_reset echo 0 > events/sched/enable -usleep 1 + +yield count=`cat trace | grep -v ^# | awk '{ print $5 }' | sort -u | wc -l` if [ $count -ne 0 ]; then diff --git a/tools/testing/selftests/ftrace/test.d/event/toplevel-enable.tc b/tools/testing/selftests/ftrace/test.d/event/toplevel-enable.tc index 480845774007..0bb5df3c00d4 100644 --- a/tools/testing/selftests/ftrace/test.d/event/toplevel-enable.tc +++ b/tools/testing/selftests/ftrace/test.d/event/toplevel-enable.tc @@ -9,7 +9,11 @@ do_reset() { fail() { #msg do_reset echo $1 - exit -1 + exit $FAIL +} + +yield() { + ping localhost -c 1 || sleep .001 || usleep 1 || sleep 1 } if [ ! -f available_events -o ! -f set_event -o ! -d events ]; then @@ -21,6 +25,9 @@ reset_tracer do_reset echo '*:*' > set_event + +yield + count=`cat trace | grep -v ^# | wc -l` if [ $count -eq 0 ]; then fail "none of events are recorded" @@ -29,6 +36,9 @@ fi do_reset echo 1 > events/enable + +yield + count=`cat trace | grep -v ^# | wc -l` if [ $count -eq 0 ]; then fail "none of events are recorded" @@ -37,6 +47,9 @@ fi do_reset echo 0 > events/enable + +yield + count=`cat trace | grep -v ^# | wc -l` if [ $count -ne 0 ]; then fail "any of events should not be recorded" diff --git a/tools/testing/selftests/ftrace/test.d/ftrace/fgraph-filter-stack.tc b/tools/testing/selftests/ftrace/test.d/ftrace/fgraph-filter-stack.tc index c15e018e0220..15c2dba06ea2 100644 --- a/tools/testing/selftests/ftrace/test.d/ftrace/fgraph-filter-stack.tc +++ b/tools/testing/selftests/ftrace/test.d/ftrace/fgraph-filter-stack.tc @@ -16,7 +16,9 @@ fi do_reset() { reset_tracer - echo 0 > /proc/sys/kernel/stack_tracer_enabled + if [ -e /proc/sys/kernel/stack_tracer_enabled ]; then + echo 0 > /proc/sys/kernel/stack_tracer_enabled + fi enable_tracing clear_trace echo > set_ftrace_filter @@ -25,7 +27,7 @@ do_reset() { fail() { # msg do_reset echo $1 - exit -1 + exit $FAIL } disable_tracing diff --git a/tools/testing/selftests/ftrace/test.d/ftrace/fgraph-filter.tc b/tools/testing/selftests/ftrace/test.d/ftrace/fgraph-filter.tc index 6af5f6360b18..0ab2189613ef 100644 --- a/tools/testing/selftests/ftrace/test.d/ftrace/fgraph-filter.tc +++ b/tools/testing/selftests/ftrace/test.d/ftrace/fgraph-filter.tc @@ -17,7 +17,7 @@ do_reset() { fail() { # msg do_reset echo $1 - exit -1 + exit $FAIL } disable_tracing diff --git a/tools/testing/selftests/ftrace/test.d/ftrace/func_profiler.tc b/tools/testing/selftests/ftrace/test.d/ftrace/func_profiler.tc index 2e719cb1fc4d..7808336d6f50 100644 --- a/tools/testing/selftests/ftrace/test.d/ftrace/func_profiler.tc +++ b/tools/testing/selftests/ftrace/test.d/ftrace/func_profiler.tc @@ -31,7 +31,7 @@ fail() { # mesg reset_tracer echo > set_ftrace_filter echo $1 - exit -1 + exit $FAIL } echo "Testing function tracer with profiler:" diff --git a/tools/testing/selftests/gen_kselftest_tar.sh b/tools/testing/selftests/gen_kselftest_tar.sh new file mode 100755 index 000000000000..17d5bd0c0936 --- /dev/null +++ b/tools/testing/selftests/gen_kselftest_tar.sh @@ -0,0 +1,55 @@ +#!/bin/bash +# +# gen_kselftest_tar +# Generate kselftest tarball +# Author: Shuah Khan <shuahkh@osg.samsung.com> +# Copyright (C) 2015 Samsung Electronics Co., Ltd. + +# This software may be freely redistributed under the terms of the GNU +# General Public License (GPLv2). + +# main +main() +{ + if [ "$#" -eq 0 ]; then + echo "$0: Generating default compression gzip" + copts="cvzf" + ext=".tar.gz" + else + case "$1" in + tar) + copts="cvf" + ext=".tar" + ;; + targz) + copts="cvzf" + ext=".tar.gz" + ;; + tarbz2) + copts="cvjf" + ext=".tar.bz2" + ;; + tarxz) + copts="cvJf" + ext=".tar.xz" + ;; + *) + echo "Unknown tarball format $1" + exit 1 + ;; + esac + fi + + install_dir=./kselftest + +# Run install using INSTALL_KSFT_PATH override to generate install +# directory +./kselftest_install.sh +tar $copts kselftest${ext} $install_dir +echo "Kselftest archive kselftest${ext} created!" + +# clean up install directory +rm -rf kselftest +} + +main "$@" diff --git a/tools/testing/selftests/ipc/Makefile b/tools/testing/selftests/ipc/Makefile index 74bbefdeaf4c..25d2e702c68a 100644 --- a/tools/testing/selftests/ipc/Makefile +++ b/tools/testing/selftests/ipc/Makefile @@ -12,14 +12,11 @@ endif CFLAGS += -I../../../../usr/include/ all: -ifeq ($(ARCH),x86) - gcc $(CFLAGS) msgque.c -o msgque_test -else - echo "Not an x86 target, can't build msgque selftest" -endif + $(CC) $(CFLAGS) msgque.c -o msgque_test + +TEST_PROGS := msgque_test -run_tests: all - ./msgque_test +include ../lib.mk clean: rm -fr ./msgque_test diff --git a/tools/testing/selftests/kcmp/Makefile b/tools/testing/selftests/kcmp/Makefile index ff0eefdc6ceb..2ae7450a9a89 100644 --- a/tools/testing/selftests/kcmp/Makefile +++ b/tools/testing/selftests/kcmp/Makefile @@ -1,10 +1,10 @@ -CC := $(CROSS_COMPILE)$(CC) CFLAGS += -I../../../../usr/include/ all: kcmp_test -run_tests: all - @./kcmp_test || echo "kcmp_test: [FAIL]" +TEST_PROGS := kcmp_test + +include ../lib.mk clean: $(RM) kcmp_test kcmp-test-file diff --git a/tools/testing/selftests/kselftest_install.sh b/tools/testing/selftests/kselftest_install.sh new file mode 100755 index 000000000000..1555fbdb08da --- /dev/null +++ b/tools/testing/selftests/kselftest_install.sh @@ -0,0 +1,37 @@ +#!/bin/bash +# +# Kselftest Install +# Install kselftest tests +# Author: Shuah Khan <shuahkh@osg.samsung.com> +# Copyright (C) 2015 Samsung Electronics Co., Ltd. + +# This software may be freely redistributed under the terms of the GNU +# General Public License (GPLv2). + +install_loc=`pwd` + +main() +{ + if [ $(basename $install_loc) != "selftests" ]; then + echo "$0: Please run it in selftests directory ..." + exit 1; + fi + if [ "$#" -eq 0 ]; then + echo "$0: Installing in default location - $install_loc ..." + elif [ ! -d "$1" ]; then + echo "$0: $1 doesn't exist!!" + exit 1; + else + install_loc=$1 + echo "$0: Installing in specified location - $install_loc ..." + fi + + install_dir=$install_loc/kselftest + +# Create install directory + mkdir -p $install_dir +# Build tests + INSTALL_PATH=$install_dir make install +} + +main "$@" diff --git a/tools/testing/selftests/lib.mk b/tools/testing/selftests/lib.mk new file mode 100644 index 000000000000..2194155ae62a --- /dev/null +++ b/tools/testing/selftests/lib.mk @@ -0,0 +1,35 @@ +# This mimics the top-level Makefile. We do it explicitly here so that this +# Makefile can operate with or without the kbuild infrastructure. +CC := $(CROSS_COMPILE)gcc + +define RUN_TESTS + @for TEST in $(TEST_PROGS); do \ + (./$$TEST && echo "selftests: $$TEST [PASS]") || echo "selftests: $$TEST [FAIL]"; \ + done; +endef + +run_tests: all + $(RUN_TESTS) + +define INSTALL_RULE + mkdir -p $(INSTALL_PATH) + install -t $(INSTALL_PATH) $(TEST_PROGS) $(TEST_PROGS_EXTENDED) $(TEST_FILES) +endef + +install: all +ifdef INSTALL_PATH + $(INSTALL_RULE) +else + $(error Error: set INSTALL_PATH to use install) +endif + +define EMIT_TESTS + @for TEST in $(TEST_PROGS); do \ + echo "(./$$TEST && echo \"selftests: $$TEST [PASS]\") || echo \"selftests: $$TEST [FAIL]\""; \ + done; +endef + +emit_tests: + $(EMIT_TESTS) + +.PHONY: run_tests all clean install emit_tests diff --git a/tools/testing/selftests/memfd/Makefile b/tools/testing/selftests/memfd/Makefile index b80cd10d53ba..3e7eb7972511 100644 --- a/tools/testing/selftests/memfd/Makefile +++ b/tools/testing/selftests/memfd/Makefile @@ -1,17 +1,19 @@ +CC = $(CROSS_COMPILE)gcc CFLAGS += -D_FILE_OFFSET_BITS=64 CFLAGS += -I../../../../include/uapi/ CFLAGS += -I../../../../include/ +CFLAGS += -I../../../../usr/include/ all: - gcc $(CFLAGS) memfd_test.c -o memfd_test + $(CC) $(CFLAGS) memfd_test.c -o memfd_test -run_tests: all - gcc $(CFLAGS) memfd_test.c -o memfd_test - @./memfd_test || echo "memfd_test: [FAIL]" +TEST_PROGS := memfd_test + +include ../lib.mk build_fuse: - gcc $(CFLAGS) fuse_mnt.c `pkg-config fuse --cflags --libs` -o fuse_mnt - gcc $(CFLAGS) fuse_test.c -o fuse_test + $(CC) $(CFLAGS) fuse_mnt.c `pkg-config fuse --cflags --libs` -o fuse_mnt + $(CC) $(CFLAGS) fuse_test.c -o fuse_test run_fuse: build_fuse @./run_fuse_test.sh || echo "fuse_test: [FAIL]" diff --git a/tools/testing/selftests/memory-hotplug/Makefile b/tools/testing/selftests/memory-hotplug/Makefile index d46b8d489cd2..afb2624c7048 100644 --- a/tools/testing/selftests/memory-hotplug/Makefile +++ b/tools/testing/selftests/memory-hotplug/Makefile @@ -1,9 +1,12 @@ all: -run_tests: - @/bin/bash ./on-off-test.sh -r 2 || echo "memory-hotplug selftests: [FAIL]" +include ../lib.mk + +TEST_PROGS := mem-on-off-test.sh +override RUN_TESTS := ./mem-on-off-test.sh -r 2 || echo "selftests: memory-hotplug [FAIL]" +override EMIT_TESTS := echo "$(RUN_TESTS)" run_full_test: - @/bin/bash ./on-off-test.sh || echo "memory-hotplug selftests: [FAIL]" + @/bin/bash ./mem-on-off-test.sh || echo "memory-hotplug selftests: [FAIL]" clean: diff --git a/tools/testing/selftests/memory-hotplug/on-off-test.sh b/tools/testing/selftests/memory-hotplug/mem-on-off-test.sh index 6cddde0b96f8..6cddde0b96f8 100644..100755 --- a/tools/testing/selftests/memory-hotplug/on-off-test.sh +++ b/tools/testing/selftests/memory-hotplug/mem-on-off-test.sh diff --git a/tools/testing/selftests/mount/.gitignore b/tools/testing/selftests/mount/.gitignore new file mode 100644 index 000000000000..856ad4107eb3 --- /dev/null +++ b/tools/testing/selftests/mount/.gitignore @@ -0,0 +1 @@ +unprivileged-remount-test diff --git a/tools/testing/selftests/mount/Makefile b/tools/testing/selftests/mount/Makefile index 337d853c2b72..95580a97326e 100644 --- a/tools/testing/selftests/mount/Makefile +++ b/tools/testing/selftests/mount/Makefile @@ -1,17 +1,16 @@ # Makefile for mount selftests. - +CFLAGS = -Wall \ + -O2 all: unprivileged-remount-test unprivileged-remount-test: unprivileged-remount-test.c - gcc -Wall -O2 unprivileged-remount-test.c -o unprivileged-remount-test + $(CC) $(CFLAGS) unprivileged-remount-test.c -o unprivileged-remount-test -# Allow specific tests to be selected. -test_unprivileged_remount: unprivileged-remount-test - @if [ -f /proc/self/uid_map ] ; then ./unprivileged-remount-test ; fi +include ../lib.mk -run_tests: all test_unprivileged_remount +TEST_PROGS := unprivileged-remount-test +override RUN_TESTS := if [ -f /proc/self/uid_map ] ; then ./unprivileged-remount-test ; fi +override EMIT_TESTS := echo "$(RUN_TESTS)" clean: rm -f unprivileged-remount-test - -.PHONY: all test_unprivileged_remount diff --git a/tools/testing/selftests/mqueue/Makefile b/tools/testing/selftests/mqueue/Makefile index 8056e2e68fa4..0e3b41eb85cd 100644 --- a/tools/testing/selftests/mqueue/Makefile +++ b/tools/testing/selftests/mqueue/Makefile @@ -1,10 +1,22 @@ +CFLAGS = -O2 + all: - gcc -O2 mq_open_tests.c -o mq_open_tests -lrt - gcc -O2 -o mq_perf_tests mq_perf_tests.c -lrt -lpthread -lpopt + $(CC) $(CFLAGS) mq_open_tests.c -o mq_open_tests -lrt + $(CC) $(CFLAGS) -o mq_perf_tests mq_perf_tests.c -lrt -lpthread -lpopt + +include ../lib.mk + +override define RUN_TESTS + @./mq_open_tests /test1 || echo "selftests: mq_open_tests [FAIL]" + @./mq_perf_tests || echo "selftests: mq_perf_tests [FAIL]" +endef + +TEST_PROGS := mq_open_tests mq_perf_tests -run_tests: - @./mq_open_tests /test1 || echo "mq_open_tests: [FAIL]" - @./mq_perf_tests || echo "mq_perf_tests: [FAIL]" +override define EMIT_TESTS + echo "./mq_open_tests /test1 || echo \"selftests: mq_open_tests [FAIL]\"" + echo "./mq_perf_tests || echo \"selftests: mq_perf_tests [FAIL]\"" +endef clean: rm -f mq_open_tests mq_perf_tests diff --git a/tools/testing/selftests/net/Makefile b/tools/testing/selftests/net/Makefile index 62f22cc9941c..fac4782c51d8 100644 --- a/tools/testing/selftests/net/Makefile +++ b/tools/testing/selftests/net/Makefile @@ -1,6 +1,5 @@ # Makefile for net selftests -CC = $(CROSS_COMPILE)gcc CFLAGS = -Wall -O2 -g CFLAGS += -I../../../../usr/include/ @@ -11,9 +10,10 @@ all: $(NET_PROGS) %: %.c $(CC) $(CFLAGS) -o $@ $^ -run_tests: all - @/bin/sh ./run_netsocktests || echo "sockettests: [FAIL]" - @/bin/sh ./run_afpackettests || echo "afpackettests: [FAIL]" - ./test_bpf.sh +TEST_PROGS := run_netsocktests run_afpackettests test_bpf.sh +TEST_FILES := $(NET_PROGS) + +include ../lib.mk + clean: $(RM) $(NET_PROGS) diff --git a/tools/testing/selftests/net/run_afpackettests b/tools/testing/selftests/net/run_afpackettests index 5246e782d6e8..5246e782d6e8 100644..100755 --- a/tools/testing/selftests/net/run_afpackettests +++ b/tools/testing/selftests/net/run_afpackettests diff --git a/tools/testing/selftests/net/run_netsocktests b/tools/testing/selftests/net/run_netsocktests index c09a682df56a..c09a682df56a 100644..100755 --- a/tools/testing/selftests/net/run_netsocktests +++ b/tools/testing/selftests/net/run_netsocktests diff --git a/tools/testing/selftests/powerpc/Makefile b/tools/testing/selftests/powerpc/Makefile index 1d5e7ad2c460..2958fe9a74e9 100644 --- a/tools/testing/selftests/powerpc/Makefile +++ b/tools/testing/selftests/powerpc/Makefile @@ -8,10 +8,9 @@ ifeq ($(ARCH),powerpc) GIT_VERSION = $(shell git describe --always --long --dirty || echo "unknown") -CC := $(CROSS_COMPILE)$(CC) CFLAGS := -Wall -O2 -flto -Wall -Werror -DGIT_VERSION='"$(GIT_VERSION)"' -I$(CURDIR) $(CFLAGS) -export CC CFLAGS +export CFLAGS TARGETS = pmu copyloops mm tm primitives stringloops @@ -22,10 +21,25 @@ all: $(TARGETS) $(TARGETS): $(MAKE) -k -C $@ all -run_tests: all +include ../lib.mk + +override define RUN_TESTS @for TARGET in $(TARGETS); do \ $(MAKE) -C $$TARGET run_tests; \ done; +endef + +override define INSTALL_RULE + @for TARGET in $(TARGETS); do \ + $(MAKE) -C $$TARGET install; \ + done; +endef + +override define EMIT_TESTS + @for TARGET in $(TARGETS); do \ + $(MAKE) -s -C $$TARGET emit_tests; \ + done; +endef clean: @for TARGET in $(TARGETS); do \ @@ -36,4 +50,4 @@ clean: tags: find . -name '*.c' -o -name '*.h' | xargs ctags -.PHONY: all run_tests clean tags $(TARGETS) +.PHONY: tags $(TARGETS) diff --git a/tools/testing/selftests/powerpc/copyloops/Makefile b/tools/testing/selftests/powerpc/copyloops/Makefile index 6f2d3be227f9..c05023514ce8 100644 --- a/tools/testing/selftests/powerpc/copyloops/Makefile +++ b/tools/testing/selftests/powerpc/copyloops/Makefile @@ -6,24 +6,19 @@ CFLAGS += -D SELFTEST # Use our CFLAGS for the implicit .S rule ASFLAGS = $(CFLAGS) -PROGS := copyuser_64 copyuser_power7 memcpy_64 memcpy_power7 +TEST_PROGS := copyuser_64 copyuser_power7 memcpy_64 memcpy_power7 EXTRA_SOURCES := validate.c ../harness.c -all: $(PROGS) +all: $(TEST_PROGS) copyuser_64: CPPFLAGS += -D COPY_LOOP=test___copy_tofrom_user_base copyuser_power7: CPPFLAGS += -D COPY_LOOP=test___copy_tofrom_user_power7 memcpy_64: CPPFLAGS += -D COPY_LOOP=test_memcpy memcpy_power7: CPPFLAGS += -D COPY_LOOP=test_memcpy_power7 -$(PROGS): $(EXTRA_SOURCES) +$(TEST_PROGS): $(EXTRA_SOURCES) -run_tests: all - @-for PROG in $(PROGS); do \ - ./$$PROG; \ - done; +include ../../lib.mk clean: - rm -f $(PROGS) *.o - -.PHONY: all run_tests clean + rm -f $(TEST_PROGS) *.o diff --git a/tools/testing/selftests/powerpc/mm/Makefile b/tools/testing/selftests/powerpc/mm/Makefile index a14c538dd7f8..41cc3ed66818 100644 --- a/tools/testing/selftests/powerpc/mm/Makefile +++ b/tools/testing/selftests/powerpc/mm/Makefile @@ -1,21 +1,16 @@ noarg: $(MAKE) -C ../ -PROGS := hugetlb_vs_thp_test subpage_prot +TEST_PROGS := hugetlb_vs_thp_test subpage_prot -all: $(PROGS) tempfile +all: $(TEST_PROGS) tempfile -$(PROGS): ../harness.c +$(TEST_PROGS): ../harness.c -run_tests: all - @-for PROG in $(PROGS); do \ - ./$$PROG; \ - done; +include ../../lib.mk tempfile: dd if=/dev/zero of=tempfile bs=64k count=1 clean: - rm -f $(PROGS) tempfile - -.PHONY: all run_tests clean + rm -f $(TEST_PROGS) tempfile diff --git a/tools/testing/selftests/powerpc/pmu/Makefile b/tools/testing/selftests/powerpc/pmu/Makefile index c9f4263906a5..5a161175bbd4 100644 --- a/tools/testing/selftests/powerpc/pmu/Makefile +++ b/tools/testing/selftests/powerpc/pmu/Makefile @@ -1,38 +1,42 @@ noarg: $(MAKE) -C ../ -PROGS := count_instructions l3_bank_test per_event_excludes +TEST_PROGS := count_instructions l3_bank_test per_event_excludes EXTRA_SOURCES := ../harness.c event.c lib.c -SUB_TARGETS = ebb +all: $(TEST_PROGS) ebb -all: $(PROGS) $(SUB_TARGETS) - -$(PROGS): $(EXTRA_SOURCES) +$(TEST_PROGS): $(EXTRA_SOURCES) # loop.S can only be built 64-bit count_instructions: loop.S count_instructions.c $(EXTRA_SOURCES) $(CC) $(CFLAGS) -m64 -o $@ $^ -run_tests: all sub_run_tests - @-for PROG in $(PROGS); do \ - ./$$PROG; \ - done; +include ../../lib.mk -clean: sub_clean - rm -f $(PROGS) loop.o +DEFAULT_RUN_TESTS := $(RUN_TESTS) +override define RUN_TESTS + $(DEFAULT_RUN_TESTS) + $(MAKE) -C ebb run_tests +endef -$(SUB_TARGETS): - $(MAKE) -k -C $@ all +DEFAULT_EMIT_TESTS := $(EMIT_TESTS) +override define EMIT_TESTS + $(DEFAULT_EMIT_TESTS) + $(MAKE) -s -C ebb emit_tests +endef -sub_run_tests: all - @for TARGET in $(SUB_TARGETS); do \ - $(MAKE) -C $$TARGET run_tests; \ - done; +DEFAULT_INSTALL := $(INSTALL_RULE) +override define INSTALL_RULE + $(DEFAULT_INSTALL_RULE) + $(MAKE) -C ebb install +endef -sub_clean: - @for TARGET in $(SUB_TARGETS); do \ - $(MAKE) -C $$TARGET clean; \ - done; +clean: + rm -f $(TEST_PROGS) loop.o + $(MAKE) -C ebb clean + +ebb: + $(MAKE) -k -C $@ all -.PHONY: all run_tests clean sub_run_tests sub_clean $(SUB_TARGETS) +.PHONY: all run_tests clean ebb diff --git a/tools/testing/selftests/powerpc/pmu/ebb/Makefile b/tools/testing/selftests/powerpc/pmu/ebb/Makefile index 3dc4332698cb..5cdc9dbf2b27 100644 --- a/tools/testing/selftests/powerpc/pmu/ebb/Makefile +++ b/tools/testing/selftests/powerpc/pmu/ebb/Makefile @@ -4,7 +4,7 @@ noarg: # The EBB handler is 64-bit code and everything links against it CFLAGS += -m64 -PROGS := reg_access_test event_attributes_test cycles_test \ +TEST_PROGS := reg_access_test event_attributes_test cycles_test \ cycles_with_freeze_test pmc56_overflow_test \ ebb_vs_cpu_event_test cpu_event_vs_ebb_test \ cpu_event_pinned_vs_ebb_test task_event_vs_ebb_test \ @@ -16,18 +16,15 @@ PROGS := reg_access_test event_attributes_test cycles_test \ lost_exception_test no_handler_test \ cycles_with_mmcr2_test -all: $(PROGS) +all: $(TEST_PROGS) -$(PROGS): ../../harness.c ../event.c ../lib.c ebb.c ebb_handler.S trace.c busy_loop.S +$(TEST_PROGS): ../../harness.c ../event.c ../lib.c ebb.c ebb_handler.S trace.c busy_loop.S instruction_count_test: ../loop.S lost_exception_test: ../lib.c -run_tests: all - @-for PROG in $(PROGS); do \ - ./$$PROG; \ - done; +include ../../../lib.mk clean: - rm -f $(PROGS) + rm -f $(TEST_PROGS) diff --git a/tools/testing/selftests/powerpc/primitives/Makefile b/tools/testing/selftests/powerpc/primitives/Makefile index ea737ca01732..b68c6221d3d1 100644 --- a/tools/testing/selftests/powerpc/primitives/Makefile +++ b/tools/testing/selftests/powerpc/primitives/Makefile @@ -1,17 +1,12 @@ CFLAGS += -I$(CURDIR) -PROGS := load_unaligned_zeropad +TEST_PROGS := load_unaligned_zeropad -all: $(PROGS) +all: $(TEST_PROGS) -$(PROGS): ../harness.c +$(TEST_PROGS): ../harness.c -run_tests: all - @-for PROG in $(PROGS); do \ - ./$$PROG; \ - done; +include ../../lib.mk clean: - rm -f $(PROGS) *.o - -.PHONY: all run_tests clean + rm -f $(TEST_PROGS) *.o diff --git a/tools/testing/selftests/powerpc/stringloops/Makefile b/tools/testing/selftests/powerpc/stringloops/Makefile index 506d77346477..2a728f4d2873 100644 --- a/tools/testing/selftests/powerpc/stringloops/Makefile +++ b/tools/testing/selftests/powerpc/stringloops/Makefile @@ -2,19 +2,14 @@ CFLAGS += -m64 CFLAGS += -I$(CURDIR) -PROGS := memcmp +TEST_PROGS := memcmp EXTRA_SOURCES := memcmp_64.S ../harness.c -all: $(PROGS) +all: $(TEST_PROGS) -$(PROGS): $(EXTRA_SOURCES) +$(TEST_PROGS): $(EXTRA_SOURCES) -run_tests: all - @-for PROG in $(PROGS); do \ - ./$$PROG; \ - done; +include ../../lib.mk clean: - rm -f $(PROGS) *.o - -.PHONY: all run_tests clean + rm -f $(TEST_PROGS) *.o diff --git a/tools/testing/selftests/powerpc/tm/Makefile b/tools/testing/selftests/powerpc/tm/Makefile index 2cede239a074..34f2ec634b40 100644 --- a/tools/testing/selftests/powerpc/tm/Makefile +++ b/tools/testing/selftests/powerpc/tm/Makefile @@ -1,15 +1,10 @@ -PROGS := tm-resched-dscr +TEST_PROGS := tm-resched-dscr -all: $(PROGS) +all: $(TEST_PROGS) -$(PROGS): ../harness.c +$(TEST_PROGS): ../harness.c -run_tests: all - @-for PROG in $(PROGS); do \ - ./$$PROG; \ - done; +include ../../lib.mk clean: - rm -f $(PROGS) *.o - -.PHONY: all run_tests clean + rm -f $(TEST_PROGS) *.o diff --git a/tools/testing/selftests/ptrace/Makefile b/tools/testing/selftests/ptrace/Makefile index 47ae2d385ce8..453927fea90c 100644 --- a/tools/testing/selftests/ptrace/Makefile +++ b/tools/testing/selftests/ptrace/Makefile @@ -6,5 +6,6 @@ all: peeksiginfo clean: rm -f peeksiginfo -run_tests: all - @./peeksiginfo || echo "peeksiginfo selftests: [FAIL]" +TEST_PROGS := peeksiginfo + +include ../lib.mk diff --git a/tools/testing/selftests/size/Makefile b/tools/testing/selftests/size/Makefile index 04dc25e4fa92..bbd0b5398b61 100644 --- a/tools/testing/selftests/size/Makefile +++ b/tools/testing/selftests/size/Makefile @@ -1,12 +1,11 @@ -CC = $(CROSS_COMPILE)gcc - all: get_size get_size: get_size.c $(CC) -static -ffreestanding -nostartfiles -s $< -o $@ -run_tests: all - ./get_size +TEST_PROGS := get_size + +include ../lib.mk clean: $(RM) get_size diff --git a/tools/testing/selftests/sysctl/Makefile b/tools/testing/selftests/sysctl/Makefile index 0a92adaf0865..b3c33e071f10 100644 --- a/tools/testing/selftests/sysctl/Makefile +++ b/tools/testing/selftests/sysctl/Makefile @@ -4,16 +4,10 @@ # No binaries, but make sure arg-less "make" doesn't trigger "run_tests". all: -# Allow specific tests to be selected. -test_num: - @/bin/sh ./run_numerictests +TEST_PROGS := run_numerictests run_stringtests +TEST_FILES := common_tests -test_string: - @/bin/sh ./run_stringtests - -run_tests: all test_num test_string +include ../lib.mk # Nothing to clean up. clean: - -.PHONY: all run_tests clean test_num test_string diff --git a/tools/testing/selftests/sysctl/run_numerictests b/tools/testing/selftests/sysctl/run_numerictests index 8510f93f2d14..8510f93f2d14 100644..100755 --- a/tools/testing/selftests/sysctl/run_numerictests +++ b/tools/testing/selftests/sysctl/run_numerictests diff --git a/tools/testing/selftests/sysctl/run_stringtests b/tools/testing/selftests/sysctl/run_stringtests index 90a9293d520c..90a9293d520c 100644..100755 --- a/tools/testing/selftests/sysctl/run_stringtests +++ b/tools/testing/selftests/sysctl/run_stringtests diff --git a/tools/testing/selftests/timers/Makefile b/tools/testing/selftests/timers/Makefile index eb2859f4ad21..89a3f44bf355 100644 --- a/tools/testing/selftests/timers/Makefile +++ b/tools/testing/selftests/timers/Makefile @@ -1,8 +1,36 @@ -all: - gcc posix_timers.c -o posix_timers -lrt +CC = $(CROSS_COMPILE)gcc +BUILD_FLAGS = -DKTEST +CFLAGS += -O3 -Wl,-no-as-needed -Wall $(BUILD_FLAGS) +LDFLAGS += -lrt -lpthread -run_tests: all - ./posix_timers +# these are all "safe" tests that don't modify +# system time or require escalated privledges +TEST_PROGS = posix_timers nanosleep nsleep-lat set-timer-lat mqueue-lat \ + inconsistency-check raw_skew threadtest rtctest + +TEST_PROGS_EXTENDED = alarmtimer-suspend valid-adjtimex change_skew \ + skew_consistency clocksource-switch leap-a-day \ + leapcrash set-tai set-2038 + +bins = $(TEST_PROGS) $(TEST_PROGS_EXTENDED) + +all: ${bins} + +include ../lib.mk + +# these tests require escalated privledges +# and may modify the system time or trigger +# other behavior like suspend +run_destructive_tests: run_tests + ./alarmtimer-suspend + ./valid-adjtimex + ./change_skew + ./skew_consistency + ./clocksource-switch + ./leap-a-day -s -i 10 + ./leapcrash + ./set-tai + ./set-2038 clean: - rm -f ./posix_timers + rm -f ${bins} diff --git a/tools/testing/selftests/timers/alarmtimer-suspend.c b/tools/testing/selftests/timers/alarmtimer-suspend.c new file mode 100644 index 000000000000..aaffbde1d5ee --- /dev/null +++ b/tools/testing/selftests/timers/alarmtimer-suspend.c @@ -0,0 +1,185 @@ +/* alarmtimer suspend test + * John Stultz (john.stultz@linaro.org) + * (C) Copyright Linaro 2013 + * Licensed under the GPLv2 + * + * This test makes sure the alarmtimer & RTC wakeup code is + * functioning. + * + * To build: + * $ gcc alarmtimer-suspend.c -o alarmtimer-suspend -lrt + * + * This program is free software: you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation, either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + */ + + +#include <stdio.h> +#include <unistd.h> +#include <time.h> +#include <string.h> +#include <signal.h> +#include <stdlib.h> +#include <pthread.h> +#ifdef KTEST +#include "../kselftest.h" +#else +static inline int ksft_exit_pass(void) +{ + exit(0); +} +static inline int ksft_exit_fail(void) +{ + exit(1); +} +#endif + +#define CLOCK_REALTIME 0 +#define CLOCK_MONOTONIC 1 +#define CLOCK_PROCESS_CPUTIME_ID 2 +#define CLOCK_THREAD_CPUTIME_ID 3 +#define CLOCK_MONOTONIC_RAW 4 +#define CLOCK_REALTIME_COARSE 5 +#define CLOCK_MONOTONIC_COARSE 6 +#define CLOCK_BOOTTIME 7 +#define CLOCK_REALTIME_ALARM 8 +#define CLOCK_BOOTTIME_ALARM 9 +#define CLOCK_HWSPECIFIC 10 +#define CLOCK_TAI 11 +#define NR_CLOCKIDS 12 + + +#define NSEC_PER_SEC 1000000000ULL +#define UNREASONABLE_LAT (NSEC_PER_SEC * 4) /* hopefully we resume in 4secs */ + +#define SUSPEND_SECS 15 +int alarmcount; +int alarm_clock_id; +struct timespec start_time; + + +char *clockstring(int clockid) +{ + switch (clockid) { + case CLOCK_REALTIME: + return "CLOCK_REALTIME"; + case CLOCK_MONOTONIC: + return "CLOCK_MONOTONIC"; + case CLOCK_PROCESS_CPUTIME_ID: + return "CLOCK_PROCESS_CPUTIME_ID"; + case CLOCK_THREAD_CPUTIME_ID: + return "CLOCK_THREAD_CPUTIME_ID"; + case CLOCK_MONOTONIC_RAW: + return "CLOCK_MONOTONIC_RAW"; + case CLOCK_REALTIME_COARSE: + return "CLOCK_REALTIME_COARSE"; + case CLOCK_MONOTONIC_COARSE: + return "CLOCK_MONOTONIC_COARSE"; + case CLOCK_BOOTTIME: + return "CLOCK_BOOTTIME"; + case CLOCK_REALTIME_ALARM: + return "CLOCK_REALTIME_ALARM"; + case CLOCK_BOOTTIME_ALARM: + return "CLOCK_BOOTTIME_ALARM"; + case CLOCK_TAI: + return "CLOCK_TAI"; + }; + return "UNKNOWN_CLOCKID"; +} + + +long long timespec_sub(struct timespec a, struct timespec b) +{ + long long ret = NSEC_PER_SEC * b.tv_sec + b.tv_nsec; + + ret -= NSEC_PER_SEC * a.tv_sec + a.tv_nsec; + return ret; +} + +int final_ret = 0; + +void sigalarm(int signo) +{ + long long delta_ns; + struct timespec ts; + + clock_gettime(alarm_clock_id, &ts); + alarmcount++; + + delta_ns = timespec_sub(start_time, ts); + delta_ns -= NSEC_PER_SEC * SUSPEND_SECS * alarmcount; + + printf("ALARM(%i): %ld:%ld latency: %lld ns ", alarmcount, ts.tv_sec, + ts.tv_nsec, delta_ns); + + if (delta_ns > UNREASONABLE_LAT) { + printf("[FAIL]\n"); + final_ret = -1; + } else + printf("[OK]\n"); + +} + +int main(void) +{ + timer_t tm1; + struct itimerspec its1, its2; + struct sigevent se; + struct sigaction act; + int signum = SIGRTMAX; + + /* Set up signal handler: */ + sigfillset(&act.sa_mask); + act.sa_flags = 0; + act.sa_handler = sigalarm; + sigaction(signum, &act, NULL); + + /* Set up timer: */ + memset(&se, 0, sizeof(se)); + se.sigev_notify = SIGEV_SIGNAL; + se.sigev_signo = signum; + se.sigev_value.sival_int = 0; + + for (alarm_clock_id = CLOCK_REALTIME_ALARM; + alarm_clock_id <= CLOCK_BOOTTIME_ALARM; + alarm_clock_id++) { + + alarmcount = 0; + timer_create(alarm_clock_id, &se, &tm1); + + clock_gettime(alarm_clock_id, &start_time); + printf("Start time (%s): %ld:%ld\n", clockstring(alarm_clock_id), + start_time.tv_sec, start_time.tv_nsec); + printf("Setting alarm for every %i seconds\n", SUSPEND_SECS); + its1.it_value = start_time; + its1.it_value.tv_sec += SUSPEND_SECS; + its1.it_interval.tv_sec = SUSPEND_SECS; + its1.it_interval.tv_nsec = 0; + + timer_settime(tm1, TIMER_ABSTIME, &its1, &its2); + + while (alarmcount < 5) + sleep(1); /* First 5 alarms, do nothing */ + + printf("Starting suspend loops\n"); + while (alarmcount < 10) { + int ret; + + sleep(1); + ret = system("echo mem > /sys/power/state"); + if (ret) + break; + } + timer_delete(tm1); + } + if (final_ret) + return ksft_exit_fail(); + return ksft_exit_pass(); +} diff --git a/tools/testing/selftests/timers/change_skew.c b/tools/testing/selftests/timers/change_skew.c new file mode 100644 index 000000000000..cb1968977c04 --- /dev/null +++ b/tools/testing/selftests/timers/change_skew.c @@ -0,0 +1,107 @@ +/* ADJ_FREQ Skew change test + * by: john stultz (johnstul@us.ibm.com) + * (C) Copyright IBM 2012 + * Licensed under the GPLv2 + * + * NOTE: This is a meta-test which cranks the ADJ_FREQ knob and + * then uses other tests to detect problems. Thus this test requires + * that the raw_skew, inconsistency-check and nanosleep tests be + * present in the same directory it is run from. + * + * To build: + * $ gcc change_skew.c -o change_skew -lrt + * + * This program is free software: you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation, either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + */ + + +#include <stdio.h> +#include <stdlib.h> +#include <sys/time.h> +#include <sys/timex.h> +#include <time.h> +#ifdef KTEST +#include "../kselftest.h" +#else +static inline int ksft_exit_pass(void) +{ + exit(0); +} +static inline int ksft_exit_fail(void) +{ + exit(1); +} +#endif + +#define NSEC_PER_SEC 1000000000LL + + +int change_skew_test(int ppm) +{ + struct timex tx; + int ret; + + tx.modes = ADJ_FREQUENCY; + tx.freq = ppm << 16; + + ret = adjtimex(&tx); + if (ret < 0) { + printf("Error adjusting freq\n"); + return ret; + } + + ret = system("./raw_skew"); + ret |= system("./inconsistency-check"); + ret |= system("./nanosleep"); + + return ret; +} + + +int main(int argv, char **argc) +{ + struct timex tx; + int i, ret; + + int ppm[5] = {0, 250, 500, -250, -500}; + + /* Kill ntpd */ + ret = system("killall -9 ntpd"); + + /* Make sure there's no offset adjustment going on */ + tx.modes = ADJ_OFFSET; + tx.offset = 0; + ret = adjtimex(&tx); + + if (ret < 0) { + printf("Maybe you're not running as root?\n"); + return -1; + } + + for (i = 0; i < 5; i++) { + printf("Using %i ppm adjustment\n", ppm[i]); + ret = change_skew_test(ppm[i]); + if (ret) + break; + } + + /* Set things back */ + tx.modes = ADJ_FREQUENCY; + tx.offset = 0; + adjtimex(&tx); + + if (ret) { + printf("[FAIL]"); + return ksft_exit_fail(); + } + printf("[OK]"); + return ksft_exit_pass(); +} diff --git a/tools/testing/selftests/timers/clocksource-switch.c b/tools/testing/selftests/timers/clocksource-switch.c new file mode 100644 index 000000000000..627ec7425f78 --- /dev/null +++ b/tools/testing/selftests/timers/clocksource-switch.c @@ -0,0 +1,179 @@ +/* Clocksource change test + * by: john stultz (johnstul@us.ibm.com) + * (C) Copyright IBM 2012 + * Licensed under the GPLv2 + * + * NOTE: This is a meta-test which quickly changes the clocksourc and + * then uses other tests to detect problems. Thus this test requires + * that the inconsistency-check and nanosleep tests be present in the + * same directory it is run from. + * + * To build: + * $ gcc clocksource-switch.c -o clocksource-switch -lrt + * + * This program is free software: you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation, either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + */ + + +#include <stdio.h> +#include <unistd.h> +#include <stdlib.h> +#include <sys/time.h> +#include <sys/timex.h> +#include <time.h> +#include <sys/types.h> +#include <sys/stat.h> +#include <fcntl.h> +#include <string.h> +#include <sys/wait.h> +#ifdef KTEST +#include "../kselftest.h" +#else +static inline int ksft_exit_pass(void) +{ + exit(0); +} +static inline int ksft_exit_fail(void) +{ + exit(1); +} +#endif + + +int get_clocksources(char list[][30]) +{ + int fd, i; + size_t size; + char buf[512]; + char *head, *tmp; + + fd = open("/sys/devices/system/clocksource/clocksource0/available_clocksource", O_RDONLY); + + size = read(fd, buf, 512); + + close(fd); + + for (i = 0; i < 30; i++) + list[i][0] = '\0'; + + head = buf; + i = 0; + while (head - buf < size) { + /* Find the next space */ + for (tmp = head; *tmp != ' '; tmp++) { + if (*tmp == '\n') + break; + if (*tmp == '\0') + break; + } + *tmp = '\0'; + strcpy(list[i], head); + head = tmp + 1; + i++; + } + + return i-1; +} + +int get_cur_clocksource(char *buf, size_t size) +{ + int fd; + + fd = open("/sys/devices/system/clocksource/clocksource0/current_clocksource", O_RDONLY); + + size = read(fd, buf, size); + + return 0; +} + +int change_clocksource(char *clocksource) +{ + int fd; + size_t size; + + fd = open("/sys/devices/system/clocksource/clocksource0/current_clocksource", O_WRONLY); + + if (fd < 0) + return -1; + + size = write(fd, clocksource, strlen(clocksource)); + + if (size < 0) + return -1; + + close(fd); + return 0; +} + + +int run_tests(int secs) +{ + int ret; + char buf[255]; + + sprintf(buf, "./inconsistency-check -t %i", secs); + ret = system(buf); + if (ret) + return ret; + ret = system("./nanosleep"); + return ret; +} + + +char clocksource_list[10][30]; + +int main(int argv, char **argc) +{ + char orig_clk[512]; + int count, i, status; + pid_t pid; + + get_cur_clocksource(orig_clk, 512); + + count = get_clocksources(clocksource_list); + + if (change_clocksource(clocksource_list[0])) { + printf("Error: You probably need to run this as root\n"); + return -1; + } + + /* Check everything is sane before we start switching asyncrhonously */ + for (i = 0; i < count; i++) { + printf("Validating clocksource %s\n", clocksource_list[i]); + if (change_clocksource(clocksource_list[i])) { + status = -1; + goto out; + } + if (run_tests(5)) { + status = -1; + goto out; + } + } + + + printf("Running Asyncrhonous Switching Tests...\n"); + pid = fork(); + if (!pid) + return run_tests(60); + + while (pid != waitpid(pid, &status, WNOHANG)) + for (i = 0; i < count; i++) + if (change_clocksource(clocksource_list[i])) { + status = -1; + goto out; + } +out: + change_clocksource(orig_clk); + + if (status) + return ksft_exit_fail(); + return ksft_exit_pass(); +} diff --git a/tools/testing/selftests/timers/inconsistency-check.c b/tools/testing/selftests/timers/inconsistency-check.c new file mode 100644 index 000000000000..caf1bc9257c4 --- /dev/null +++ b/tools/testing/selftests/timers/inconsistency-check.c @@ -0,0 +1,204 @@ +/* Time inconsistency check test + * by: john stultz (johnstul@us.ibm.com) + * (C) Copyright IBM 2003, 2004, 2005, 2012 + * (C) Copyright Linaro Limited 2015 + * Licensed under the GPLv2 + * + * To build: + * $ gcc inconsistency-check.c -o inconsistency-check -lrt + * + * This program is free software: you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation, either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + */ + + + +#include <stdio.h> +#include <unistd.h> +#include <stdlib.h> +#include <time.h> +#include <sys/time.h> +#include <sys/timex.h> +#include <string.h> +#include <signal.h> +#ifdef KTEST +#include "../kselftest.h" +#else +static inline int ksft_exit_pass(void) +{ + exit(0); +} +static inline int ksft_exit_fail(void) +{ + exit(1); +} +#endif + +#define CALLS_PER_LOOP 64 +#define NSEC_PER_SEC 1000000000ULL + +#define CLOCK_REALTIME 0 +#define CLOCK_MONOTONIC 1 +#define CLOCK_PROCESS_CPUTIME_ID 2 +#define CLOCK_THREAD_CPUTIME_ID 3 +#define CLOCK_MONOTONIC_RAW 4 +#define CLOCK_REALTIME_COARSE 5 +#define CLOCK_MONOTONIC_COARSE 6 +#define CLOCK_BOOTTIME 7 +#define CLOCK_REALTIME_ALARM 8 +#define CLOCK_BOOTTIME_ALARM 9 +#define CLOCK_HWSPECIFIC 10 +#define CLOCK_TAI 11 +#define NR_CLOCKIDS 12 + +char *clockstring(int clockid) +{ + switch (clockid) { + case CLOCK_REALTIME: + return "CLOCK_REALTIME"; + case CLOCK_MONOTONIC: + return "CLOCK_MONOTONIC"; + case CLOCK_PROCESS_CPUTIME_ID: + return "CLOCK_PROCESS_CPUTIME_ID"; + case CLOCK_THREAD_CPUTIME_ID: + return "CLOCK_THREAD_CPUTIME_ID"; + case CLOCK_MONOTONIC_RAW: + return "CLOCK_MONOTONIC_RAW"; + case CLOCK_REALTIME_COARSE: + return "CLOCK_REALTIME_COARSE"; + case CLOCK_MONOTONIC_COARSE: + return "CLOCK_MONOTONIC_COARSE"; + case CLOCK_BOOTTIME: + return "CLOCK_BOOTTIME"; + case CLOCK_REALTIME_ALARM: + return "CLOCK_REALTIME_ALARM"; + case CLOCK_BOOTTIME_ALARM: + return "CLOCK_BOOTTIME_ALARM"; + case CLOCK_TAI: + return "CLOCK_TAI"; + }; + return "UNKNOWN_CLOCKID"; +} + +/* returns 1 if a <= b, 0 otherwise */ +static inline int in_order(struct timespec a, struct timespec b) +{ + /* use unsigned to avoid false positives on 2038 rollover */ + if ((unsigned long)a.tv_sec < (unsigned long)b.tv_sec) + return 1; + if ((unsigned long)a.tv_sec > (unsigned long)b.tv_sec) + return 0; + if (a.tv_nsec > b.tv_nsec) + return 0; + return 1; +} + + + +int consistency_test(int clock_type, unsigned long seconds) +{ + struct timespec list[CALLS_PER_LOOP]; + int i, inconsistent; + long now, then; + time_t t; + char *start_str; + + clock_gettime(clock_type, &list[0]); + now = then = list[0].tv_sec; + + /* timestamp start of test */ + t = time(0); + start_str = ctime(&t); + + while (seconds == -1 || now - then < seconds) { + inconsistent = 0; + + /* Fill list */ + for (i = 0; i < CALLS_PER_LOOP; i++) + clock_gettime(clock_type, &list[i]); + + /* Check for inconsistencies */ + for (i = 0; i < CALLS_PER_LOOP - 1; i++) + if (!in_order(list[i], list[i+1])) + inconsistent = i; + + /* display inconsistency */ + if (inconsistent) { + unsigned long long delta; + + printf("\%s\n", start_str); + for (i = 0; i < CALLS_PER_LOOP; i++) { + if (i == inconsistent) + printf("--------------------\n"); + printf("%lu:%lu\n", list[i].tv_sec, + list[i].tv_nsec); + if (i == inconsistent + 1) + printf("--------------------\n"); + } + delta = list[inconsistent].tv_sec * NSEC_PER_SEC; + delta += list[inconsistent].tv_nsec; + delta -= list[inconsistent+1].tv_sec * NSEC_PER_SEC; + delta -= list[inconsistent+1].tv_nsec; + printf("Delta: %llu ns\n", delta); + fflush(0); + /* timestamp inconsistency*/ + t = time(0); + printf("%s\n", ctime(&t)); + printf("[FAILED]\n"); + return -1; + } + now = list[0].tv_sec; + } + printf("[OK]\n"); + return 0; +} + + +int main(int argc, char *argv[]) +{ + int clockid, opt; + int userclock = CLOCK_REALTIME; + int maxclocks = NR_CLOCKIDS; + int runtime = 10; + struct timespec ts; + + /* Process arguments */ + while ((opt = getopt(argc, argv, "t:c:")) != -1) { + switch (opt) { + case 't': + runtime = atoi(optarg); + break; + case 'c': + userclock = atoi(optarg); + maxclocks = userclock + 1; + break; + default: + printf("Usage: %s [-t <secs>] [-c <clockid>]\n", argv[0]); + printf(" -t: Number of seconds to run\n"); + printf(" -c: clockid to use (default, all clockids)\n"); + exit(-1); + } + } + + setbuf(stdout, NULL); + + for (clockid = userclock; clockid < maxclocks; clockid++) { + + if (clockid == CLOCK_HWSPECIFIC) + continue; + + if (!clock_gettime(clockid, &ts)) { + printf("Consistent %-30s ", clockstring(clockid)); + if (consistency_test(clockid, runtime)) + return ksft_exit_fail(); + } + } + return ksft_exit_pass(); +} diff --git a/tools/testing/selftests/timers/leap-a-day.c b/tools/testing/selftests/timers/leap-a-day.c new file mode 100644 index 000000000000..b8272e6c4b3b --- /dev/null +++ b/tools/testing/selftests/timers/leap-a-day.c @@ -0,0 +1,319 @@ +/* Leap second stress test + * by: John Stultz (john.stultz@linaro.org) + * (C) Copyright IBM 2012 + * (C) Copyright 2013, 2015 Linaro Limited + * Licensed under the GPLv2 + * + * This test signals the kernel to insert a leap second + * every day at midnight GMT. This allows for stessing the + * kernel's leap-second behavior, as well as how well applications + * handle the leap-second discontinuity. + * + * Usage: leap-a-day [-s] [-i <num>] + * + * Options: + * -s: Each iteration, set the date to 10 seconds before midnight GMT. + * This speeds up the number of leapsecond transitions tested, + * but because it calls settimeofday frequently, advancing the + * time by 24 hours every ~16 seconds, it may cause application + * disruption. + * + * -i: Number of iterations to run (default: infinite) + * + * Other notes: Disabling NTP prior to running this is advised, as the two + * may conflict in their commands to the kernel. + * + * To build: + * $ gcc leap-a-day.c -o leap-a-day -lrt + * + * This program is free software: you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation, either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + */ + + + +#include <stdio.h> +#include <stdlib.h> +#include <time.h> +#include <sys/time.h> +#include <sys/timex.h> +#include <string.h> +#include <signal.h> +#include <unistd.h> +#ifdef KTEST +#include "../kselftest.h" +#else +static inline int ksft_exit_pass(void) +{ + exit(0); +} +static inline int ksft_exit_fail(void) +{ + exit(1); +} +#endif + +#define NSEC_PER_SEC 1000000000ULL +#define CLOCK_TAI 11 + +/* returns 1 if a <= b, 0 otherwise */ +static inline int in_order(struct timespec a, struct timespec b) +{ + if (a.tv_sec < b.tv_sec) + return 1; + if (a.tv_sec > b.tv_sec) + return 0; + if (a.tv_nsec > b.tv_nsec) + return 0; + return 1; +} + +struct timespec timespec_add(struct timespec ts, unsigned long long ns) +{ + ts.tv_nsec += ns; + while (ts.tv_nsec >= NSEC_PER_SEC) { + ts.tv_nsec -= NSEC_PER_SEC; + ts.tv_sec++; + } + return ts; +} + +char *time_state_str(int state) +{ + switch (state) { + case TIME_OK: return "TIME_OK"; + case TIME_INS: return "TIME_INS"; + case TIME_DEL: return "TIME_DEL"; + case TIME_OOP: return "TIME_OOP"; + case TIME_WAIT: return "TIME_WAIT"; + case TIME_BAD: return "TIME_BAD"; + } + return "ERROR"; +} + +/* clear NTP time_status & time_state */ +int clear_time_state(void) +{ + struct timex tx; + int ret; + + /* + * We have to call adjtime twice here, as kernels + * prior to 6b1859dba01c7 (included in 3.5 and + * -stable), had an issue with the state machine + * and wouldn't clear the STA_INS/DEL flag directly. + */ + tx.modes = ADJ_STATUS; + tx.status = STA_PLL; + ret = adjtimex(&tx); + + /* Clear maxerror, as it can cause UNSYNC to be set */ + tx.modes = ADJ_MAXERROR; + tx.maxerror = 0; + ret = adjtimex(&tx); + + /* Clear the status */ + tx.modes = ADJ_STATUS; + tx.status = 0; + ret = adjtimex(&tx); + + return ret; +} + +/* Make sure we cleanup on ctrl-c */ +void handler(int unused) +{ + clear_time_state(); + exit(0); +} + +/* Test for known hrtimer failure */ +void test_hrtimer_failure(void) +{ + struct timespec now, target; + + clock_gettime(CLOCK_REALTIME, &now); + target = timespec_add(now, NSEC_PER_SEC/2); + clock_nanosleep(CLOCK_REALTIME, TIMER_ABSTIME, &target, NULL); + clock_gettime(CLOCK_REALTIME, &now); + + if (!in_order(target, now)) + printf("ERROR: hrtimer early expiration failure observed.\n"); +} + +int main(int argc, char **argv) +{ + int settime = 0; + int tai_time = 0; + int insert = 1; + int iterations = -1; + int opt; + + /* Process arguments */ + while ((opt = getopt(argc, argv, "sti:")) != -1) { + switch (opt) { + case 's': + printf("Setting time to speed up testing\n"); + settime = 1; + break; + case 'i': + iterations = atoi(optarg); + break; + case 't': + tai_time = 1; + break; + default: + printf("Usage: %s [-s] [-i <iterations>]\n", argv[0]); + printf(" -s: Set time to right before leap second each iteration\n"); + printf(" -i: Number of iterations\n"); + printf(" -t: Print TAI time\n"); + exit(-1); + } + } + + /* Make sure TAI support is present if -t was used */ + if (tai_time) { + struct timespec ts; + + if (clock_gettime(CLOCK_TAI, &ts)) { + printf("System doesn't support CLOCK_TAI\n"); + ksft_exit_fail(); + } + } + + signal(SIGINT, handler); + signal(SIGKILL, handler); + + if (iterations < 0) + printf("This runs continuously. Press ctrl-c to stop\n"); + else + printf("Running for %i iterations. Press ctrl-c to stop\n", iterations); + + printf("\n"); + while (1) { + int ret; + struct timespec ts; + struct timex tx; + time_t now, next_leap; + + /* Get the current time */ + clock_gettime(CLOCK_REALTIME, &ts); + + /* Calculate the next possible leap second 23:59:60 GMT */ + next_leap = ts.tv_sec; + next_leap += 86400 - (next_leap % 86400); + + if (settime) { + struct timeval tv; + + tv.tv_sec = next_leap - 10; + tv.tv_usec = 0; + settimeofday(&tv, NULL); + printf("Setting time to %s", ctime(&tv.tv_sec)); + } + + /* Reset NTP time state */ + clear_time_state(); + + /* Set the leap second insert flag */ + tx.modes = ADJ_STATUS; + if (insert) + tx.status = STA_INS; + else + tx.status = STA_DEL; + ret = adjtimex(&tx); + if (ret < 0) { + printf("Error: Problem setting STA_INS/STA_DEL!: %s\n", + time_state_str(ret)); + return ksft_exit_fail(); + } + + /* Validate STA_INS was set */ + tx.modes = 0; + ret = adjtimex(&tx); + if (tx.status != STA_INS && tx.status != STA_DEL) { + printf("Error: STA_INS/STA_DEL not set!: %s\n", + time_state_str(ret)); + return ksft_exit_fail(); + } + + if (tai_time) { + printf("Using TAI time," + " no inconsistencies should be seen!\n"); + } + + printf("Scheduling leap second for %s", ctime(&next_leap)); + + /* Wake up 3 seconds before leap */ + ts.tv_sec = next_leap - 3; + ts.tv_nsec = 0; + + while (clock_nanosleep(CLOCK_REALTIME, TIMER_ABSTIME, &ts, NULL)) + printf("Something woke us up, returning to sleep\n"); + + /* Validate STA_INS is still set */ + tx.modes = 0; + ret = adjtimex(&tx); + if (tx.status != STA_INS && tx.status != STA_DEL) { + printf("Something cleared STA_INS/STA_DEL, setting it again.\n"); + tx.modes = ADJ_STATUS; + if (insert) + tx.status = STA_INS; + else + tx.status = STA_DEL; + ret = adjtimex(&tx); + } + + /* Check adjtimex output every half second */ + now = tx.time.tv_sec; + while (now < next_leap + 2) { + char buf[26]; + struct timespec tai; + + tx.modes = 0; + ret = adjtimex(&tx); + + if (tai_time) { + clock_gettime(CLOCK_TAI, &tai); + printf("%ld sec, %9ld ns\t%s\n", + tai.tv_sec, + tai.tv_nsec, + time_state_str(ret)); + } else { + ctime_r(&tx.time.tv_sec, buf); + buf[strlen(buf)-1] = 0; /*remove trailing\n */ + + printf("%s + %6ld us (%i)\t%s\n", + buf, + tx.time.tv_usec, + tx.tai, + time_state_str(ret)); + } + now = tx.time.tv_sec; + /* Sleep for another half second */ + ts.tv_sec = 0; + ts.tv_nsec = NSEC_PER_SEC / 2; + clock_nanosleep(CLOCK_MONOTONIC, 0, &ts, NULL); + } + /* Switch to using other mode */ + insert = !insert; + + /* Note if kernel has known hrtimer failure */ + test_hrtimer_failure(); + + printf("Leap complete\n\n"); + + if ((iterations != -1) && !(--iterations)) + break; + } + + clear_time_state(); + return ksft_exit_pass(); +} diff --git a/tools/testing/selftests/timers/leapcrash.c b/tools/testing/selftests/timers/leapcrash.c new file mode 100644 index 000000000000..a1071bdbdeb7 --- /dev/null +++ b/tools/testing/selftests/timers/leapcrash.c @@ -0,0 +1,120 @@ +/* Demo leapsecond deadlock + * by: John Stultz (john.stultz@linaro.org) + * (C) Copyright IBM 2012 + * (C) Copyright 2013, 2015 Linaro Limited + * Licensed under the GPL + * + * This test demonstrates leapsecond deadlock that is possibe + * on kernels from 2.6.26 to 3.3. + * + * WARNING: THIS WILL LIKELY HARDHANG SYSTEMS AND MAY LOSE DATA + * RUN AT YOUR OWN RISK! + * To build: + * $ gcc leapcrash.c -o leapcrash -lrt + */ + + + +#include <stdio.h> +#include <stdlib.h> +#include <time.h> +#include <sys/time.h> +#include <sys/timex.h> +#include <string.h> +#include <signal.h> +#ifdef KTEST +#include "../kselftest.h" +#else +static inline int ksft_exit_pass(void) +{ + exit(0); +} +static inline int ksft_exit_fail(void) +{ + exit(1); +} +#endif + + + +/* clear NTP time_status & time_state */ +int clear_time_state(void) +{ + struct timex tx; + int ret; + + /* + * We have to call adjtime twice here, as kernels + * prior to 6b1859dba01c7 (included in 3.5 and + * -stable), had an issue with the state machine + * and wouldn't clear the STA_INS/DEL flag directly. + */ + tx.modes = ADJ_STATUS; + tx.status = STA_PLL; + ret = adjtimex(&tx); + + tx.modes = ADJ_STATUS; + tx.status = 0; + ret = adjtimex(&tx); + + return ret; +} + +/* Make sure we cleanup on ctrl-c */ +void handler(int unused) +{ + clear_time_state(); + exit(0); +} + + +int main(void) +{ + struct timex tx; + struct timespec ts; + time_t next_leap; + int count = 0; + + setbuf(stdout, NULL); + + signal(SIGINT, handler); + signal(SIGKILL, handler); + printf("This runs for a few minutes. Press ctrl-c to stop\n"); + + clear_time_state(); + + + /* Get the current time */ + clock_gettime(CLOCK_REALTIME, &ts); + + /* Calculate the next possible leap second 23:59:60 GMT */ + next_leap = ts.tv_sec; + next_leap += 86400 - (next_leap % 86400); + + for (count = 0; count < 20; count++) { + struct timeval tv; + + + /* set the time to 2 seconds before the leap */ + tv.tv_sec = next_leap - 2; + tv.tv_usec = 0; + if (settimeofday(&tv, NULL)) { + printf("Error: You're likely not running with proper (ie: root) permissions\n"); + return ksft_exit_fail(); + } + tx.modes = 0; + adjtimex(&tx); + + /* hammer on adjtime w/ STA_INS */ + while (tx.time.tv_sec < next_leap + 1) { + /* Set the leap second insert flag */ + tx.modes = ADJ_STATUS; + tx.status = STA_INS; + adjtimex(&tx); + } + clear_time_state(); + printf("."); + } + printf("[OK]\n"); + return ksft_exit_pass(); +} diff --git a/tools/testing/selftests/timers/mqueue-lat.c b/tools/testing/selftests/timers/mqueue-lat.c new file mode 100644 index 000000000000..a2a3924d0b41 --- /dev/null +++ b/tools/testing/selftests/timers/mqueue-lat.c @@ -0,0 +1,124 @@ +/* Measure mqueue timeout latency + * by: john stultz (john.stultz@linaro.org) + * (C) Copyright Linaro 2013 + * + * Inspired with permission from example test by: + * Romain Francoise <romain@orebokech.com> + * Licensed under the GPLv2 + * + * To build: + * $ gcc mqueue-lat.c -o mqueue-lat -lrt + * + * This program is free software: you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation, either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + */ + +#include <stdio.h> +#include <stdlib.h> +#include <time.h> +#include <sys/time.h> +#include <sys/timex.h> +#include <string.h> +#include <signal.h> +#include <errno.h> +#include <mqueue.h> +#ifdef KTEST +#include "../kselftest.h" +#else +static inline int ksft_exit_pass(void) +{ + exit(0); +} +static inline int ksft_exit_fail(void) +{ + exit(1); +} +#endif + +#define NSEC_PER_SEC 1000000000ULL + +#define TARGET_TIMEOUT 100000000 /* 100ms in nanoseconds */ +#define UNRESONABLE_LATENCY 40000000 /* 40ms in nanosecs */ + + +long long timespec_sub(struct timespec a, struct timespec b) +{ + long long ret = NSEC_PER_SEC * b.tv_sec + b.tv_nsec; + + ret -= NSEC_PER_SEC * a.tv_sec + a.tv_nsec; + return ret; +} + +struct timespec timespec_add(struct timespec ts, unsigned long long ns) +{ + ts.tv_nsec += ns; + while (ts.tv_nsec >= NSEC_PER_SEC) { + ts.tv_nsec -= NSEC_PER_SEC; + ts.tv_sec++; + } + return ts; +} + +int mqueue_lat_test(void) +{ + + mqd_t q; + struct mq_attr attr; + struct timespec start, end, now, target; + int i, count, ret; + + q = mq_open("/foo", O_CREAT | O_RDONLY, 0666, NULL); + if (q < 0) { + perror("mq_open"); + return -1; + } + mq_getattr(q, &attr); + + + count = 100; + clock_gettime(CLOCK_MONOTONIC, &start); + + for (i = 0; i < count; i++) { + char buf[attr.mq_msgsize]; + + clock_gettime(CLOCK_REALTIME, &now); + target = now; + target = timespec_add(now, TARGET_TIMEOUT); /* 100ms */ + + ret = mq_timedreceive(q, buf, sizeof(buf), NULL, &target); + if (ret < 0 && errno != ETIMEDOUT) { + perror("mq_timedreceive"); + return -1; + } + } + clock_gettime(CLOCK_MONOTONIC, &end); + + mq_close(q); + + if ((timespec_sub(start, end)/count) > TARGET_TIMEOUT + UNRESONABLE_LATENCY) + return -1; + + return 0; +} + +int main(int argc, char **argv) +{ + int ret; + + printf("Mqueue latency : "); + + ret = mqueue_lat_test(); + if (ret < 0) { + printf("[FAILED]\n"); + return ksft_exit_fail(); + } + printf("[OK]\n"); + return ksft_exit_pass(); +} diff --git a/tools/testing/selftests/timers/nanosleep.c b/tools/testing/selftests/timers/nanosleep.c new file mode 100644 index 000000000000..8a3c29de7d49 --- /dev/null +++ b/tools/testing/selftests/timers/nanosleep.c @@ -0,0 +1,174 @@ +/* Make sure timers don't return early + * by: john stultz (johnstul@us.ibm.com) + * John Stultz (john.stultz@linaro.org) + * (C) Copyright IBM 2012 + * (C) Copyright Linaro 2013 2015 + * Licensed under the GPLv2 + * + * To build: + * $ gcc nanosleep.c -o nanosleep -lrt + * + * This program is free software: you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation, either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + */ + +#include <stdio.h> +#include <stdlib.h> +#include <time.h> +#include <sys/time.h> +#include <sys/timex.h> +#include <string.h> +#include <signal.h> +#ifdef KTEST +#include "../kselftest.h" +#else +static inline int ksft_exit_pass(void) +{ + exit(0); +} +static inline int ksft_exit_fail(void) +{ + exit(1); +} +#endif + +#define NSEC_PER_SEC 1000000000ULL + +#define CLOCK_REALTIME 0 +#define CLOCK_MONOTONIC 1 +#define CLOCK_PROCESS_CPUTIME_ID 2 +#define CLOCK_THREAD_CPUTIME_ID 3 +#define CLOCK_MONOTONIC_RAW 4 +#define CLOCK_REALTIME_COARSE 5 +#define CLOCK_MONOTONIC_COARSE 6 +#define CLOCK_BOOTTIME 7 +#define CLOCK_REALTIME_ALARM 8 +#define CLOCK_BOOTTIME_ALARM 9 +#define CLOCK_HWSPECIFIC 10 +#define CLOCK_TAI 11 +#define NR_CLOCKIDS 12 + +#define UNSUPPORTED 0xf00f + +char *clockstring(int clockid) +{ + switch (clockid) { + case CLOCK_REALTIME: + return "CLOCK_REALTIME"; + case CLOCK_MONOTONIC: + return "CLOCK_MONOTONIC"; + case CLOCK_PROCESS_CPUTIME_ID: + return "CLOCK_PROCESS_CPUTIME_ID"; + case CLOCK_THREAD_CPUTIME_ID: + return "CLOCK_THREAD_CPUTIME_ID"; + case CLOCK_MONOTONIC_RAW: + return "CLOCK_MONOTONIC_RAW"; + case CLOCK_REALTIME_COARSE: + return "CLOCK_REALTIME_COARSE"; + case CLOCK_MONOTONIC_COARSE: + return "CLOCK_MONOTONIC_COARSE"; + case CLOCK_BOOTTIME: + return "CLOCK_BOOTTIME"; + case CLOCK_REALTIME_ALARM: + return "CLOCK_REALTIME_ALARM"; + case CLOCK_BOOTTIME_ALARM: + return "CLOCK_BOOTTIME_ALARM"; + case CLOCK_TAI: + return "CLOCK_TAI"; + }; + return "UNKNOWN_CLOCKID"; +} + +/* returns 1 if a <= b, 0 otherwise */ +static inline int in_order(struct timespec a, struct timespec b) +{ + if (a.tv_sec < b.tv_sec) + return 1; + if (a.tv_sec > b.tv_sec) + return 0; + if (a.tv_nsec > b.tv_nsec) + return 0; + return 1; +} + +struct timespec timespec_add(struct timespec ts, unsigned long long ns) +{ + ts.tv_nsec += ns; + while (ts.tv_nsec >= NSEC_PER_SEC) { + ts.tv_nsec -= NSEC_PER_SEC; + ts.tv_sec++; + } + return ts; +} + +int nanosleep_test(int clockid, long long ns) +{ + struct timespec now, target, rel; + + /* First check abs time */ + if (clock_gettime(clockid, &now)) + return UNSUPPORTED; + target = timespec_add(now, ns); + + if (clock_nanosleep(clockid, TIMER_ABSTIME, &target, NULL)) + return UNSUPPORTED; + clock_gettime(clockid, &now); + + if (!in_order(target, now)) + return -1; + + /* Second check reltime */ + clock_gettime(clockid, &now); + rel.tv_sec = 0; + rel.tv_nsec = 0; + rel = timespec_add(rel, ns); + target = timespec_add(now, ns); + clock_nanosleep(clockid, 0, &rel, NULL); + clock_gettime(clockid, &now); + + if (!in_order(target, now)) + return -1; + return 0; +} + +int main(int argc, char **argv) +{ + long long length; + int clockid, ret; + + for (clockid = CLOCK_REALTIME; clockid < NR_CLOCKIDS; clockid++) { + + /* Skip cputime clockids since nanosleep won't increment cputime */ + if (clockid == CLOCK_PROCESS_CPUTIME_ID || + clockid == CLOCK_THREAD_CPUTIME_ID || + clockid == CLOCK_HWSPECIFIC) + continue; + + printf("Nanosleep %-31s ", clockstring(clockid)); + + length = 10; + while (length <= (NSEC_PER_SEC * 10)) { + ret = nanosleep_test(clockid, length); + if (ret == UNSUPPORTED) { + printf("[UNSUPPORTED]\n"); + goto next; + } + if (ret < 0) { + printf("[FAILED]\n"); + return ksft_exit_fail(); + } + length *= 100; + } + printf("[OK]\n"); +next: + ret = 0; + } + return ksft_exit_pass(); +} diff --git a/tools/testing/selftests/timers/nsleep-lat.c b/tools/testing/selftests/timers/nsleep-lat.c new file mode 100644 index 000000000000..2d7898fda0f1 --- /dev/null +++ b/tools/testing/selftests/timers/nsleep-lat.c @@ -0,0 +1,190 @@ +/* Measure nanosleep timer latency + * by: john stultz (john.stultz@linaro.org) + * (C) Copyright Linaro 2013 + * Licensed under the GPLv2 + * + * To build: + * $ gcc nsleep-lat.c -o nsleep-lat -lrt + * + * This program is free software: you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation, either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + */ + +#include <stdio.h> +#include <stdlib.h> +#include <time.h> +#include <sys/time.h> +#include <sys/timex.h> +#include <string.h> +#include <signal.h> +#ifdef KTEST +#include "../kselftest.h" +#else +static inline int ksft_exit_pass(void) +{ + exit(0); +} +static inline int ksft_exit_fail(void) +{ + exit(1); +} +#endif + +#define NSEC_PER_SEC 1000000000ULL + +#define UNRESONABLE_LATENCY 40000000 /* 40ms in nanosecs */ + + +#define CLOCK_REALTIME 0 +#define CLOCK_MONOTONIC 1 +#define CLOCK_PROCESS_CPUTIME_ID 2 +#define CLOCK_THREAD_CPUTIME_ID 3 +#define CLOCK_MONOTONIC_RAW 4 +#define CLOCK_REALTIME_COARSE 5 +#define CLOCK_MONOTONIC_COARSE 6 +#define CLOCK_BOOTTIME 7 +#define CLOCK_REALTIME_ALARM 8 +#define CLOCK_BOOTTIME_ALARM 9 +#define CLOCK_HWSPECIFIC 10 +#define CLOCK_TAI 11 +#define NR_CLOCKIDS 12 + +#define UNSUPPORTED 0xf00f + +char *clockstring(int clockid) +{ + switch (clockid) { + case CLOCK_REALTIME: + return "CLOCK_REALTIME"; + case CLOCK_MONOTONIC: + return "CLOCK_MONOTONIC"; + case CLOCK_PROCESS_CPUTIME_ID: + return "CLOCK_PROCESS_CPUTIME_ID"; + case CLOCK_THREAD_CPUTIME_ID: + return "CLOCK_THREAD_CPUTIME_ID"; + case CLOCK_MONOTONIC_RAW: + return "CLOCK_MONOTONIC_RAW"; + case CLOCK_REALTIME_COARSE: + return "CLOCK_REALTIME_COARSE"; + case CLOCK_MONOTONIC_COARSE: + return "CLOCK_MONOTONIC_COARSE"; + case CLOCK_BOOTTIME: + return "CLOCK_BOOTTIME"; + case CLOCK_REALTIME_ALARM: + return "CLOCK_REALTIME_ALARM"; + case CLOCK_BOOTTIME_ALARM: + return "CLOCK_BOOTTIME_ALARM"; + case CLOCK_TAI: + return "CLOCK_TAI"; + }; + return "UNKNOWN_CLOCKID"; +} + +struct timespec timespec_add(struct timespec ts, unsigned long long ns) +{ + ts.tv_nsec += ns; + while (ts.tv_nsec >= NSEC_PER_SEC) { + ts.tv_nsec -= NSEC_PER_SEC; + ts.tv_sec++; + } + return ts; +} + + +long long timespec_sub(struct timespec a, struct timespec b) +{ + long long ret = NSEC_PER_SEC * b.tv_sec + b.tv_nsec; + + ret -= NSEC_PER_SEC * a.tv_sec + a.tv_nsec; + return ret; +} + +int nanosleep_lat_test(int clockid, long long ns) +{ + struct timespec start, end, target; + long long latency = 0; + int i, count; + + target.tv_sec = ns/NSEC_PER_SEC; + target.tv_nsec = ns%NSEC_PER_SEC; + + if (clock_gettime(clockid, &start)) + return UNSUPPORTED; + if (clock_nanosleep(clockid, 0, &target, NULL)) + return UNSUPPORTED; + + count = 10; + + /* First check relative latency */ + clock_gettime(clockid, &start); + for (i = 0; i < count; i++) + clock_nanosleep(clockid, 0, &target, NULL); + clock_gettime(clockid, &end); + + if (((timespec_sub(start, end)/count)-ns) > UNRESONABLE_LATENCY) { + printf("Large rel latency: %lld ns :", (timespec_sub(start, end)/count)-ns); + return -1; + } + + /* Next check absolute latency */ + for (i = 0; i < count; i++) { + clock_gettime(clockid, &start); + target = timespec_add(start, ns); + clock_nanosleep(clockid, TIMER_ABSTIME, &target, NULL); + clock_gettime(clockid, &end); + latency += timespec_sub(target, end); + } + + if (latency/count > UNRESONABLE_LATENCY) { + printf("Large abs latency: %lld ns :", latency/count); + return -1; + } + + return 0; +} + + + +int main(int argc, char **argv) +{ + long long length; + int clockid, ret; + + for (clockid = CLOCK_REALTIME; clockid < NR_CLOCKIDS; clockid++) { + + /* Skip cputime clockids since nanosleep won't increment cputime */ + if (clockid == CLOCK_PROCESS_CPUTIME_ID || + clockid == CLOCK_THREAD_CPUTIME_ID || + clockid == CLOCK_HWSPECIFIC) + continue; + + printf("nsleep latency %-26s ", clockstring(clockid)); + + length = 10; + while (length <= (NSEC_PER_SEC * 10)) { + ret = nanosleep_lat_test(clockid, length); + if (ret) + break; + length *= 100; + + } + + if (ret == UNSUPPORTED) { + printf("[UNSUPPORTED]\n"); + continue; + } + if (ret < 0) { + printf("[FAILED]\n"); + return ksft_exit_fail(); + } + printf("[OK]\n"); + } + return ksft_exit_pass(); +} diff --git a/tools/testing/selftests/timers/posix_timers.c b/tools/testing/selftests/timers/posix_timers.c index f87d970a485c..5a246a02dff3 100644 --- a/tools/testing/selftests/timers/posix_timers.c +++ b/tools/testing/selftests/timers/posix_timers.c @@ -35,10 +35,11 @@ static void user_loop(void) static void kernel_loop(void) { void *addr = sbrk(0); + int err = 0; - while (!done) { - brk(addr + 4096); - brk(addr); + while (!done && !err) { + err = brk(addr + 4096); + err |= brk(addr); } } @@ -190,8 +191,6 @@ static int check_timer_create(int which) int main(int argc, char **argv) { - int err; - printf("Testing posix timers. False negative may happen on CPU execution \n"); printf("based timers if other threads run on the CPU...\n"); diff --git a/tools/testing/selftests/timers/raw_skew.c b/tools/testing/selftests/timers/raw_skew.c new file mode 100644 index 000000000000..30906bfd9c1b --- /dev/null +++ b/tools/testing/selftests/timers/raw_skew.c @@ -0,0 +1,154 @@ +/* CLOCK_MONOTONIC vs CLOCK_MONOTONIC_RAW skew test + * by: john stultz (johnstul@us.ibm.com) + * John Stultz <john.stultz@linaro.org> + * (C) Copyright IBM 2012 + * (C) Copyright Linaro Limited 2015 + * Licensed under the GPLv2 + * + * To build: + * $ gcc raw_skew.c -o raw_skew -lrt + * + * This program is free software: you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation, either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + */ + +#include <stdio.h> +#include <unistd.h> +#include <stdlib.h> +#include <sys/time.h> +#include <sys/timex.h> +#include <time.h> +#ifdef KTEST +#include "../kselftest.h" +#else +static inline int ksft_exit_pass(void) +{ + exit(0); +} +static inline int ksft_exit_fail(void) +{ + exit(1); +} +#endif + + +#define CLOCK_MONOTONIC_RAW 4 +#define NSEC_PER_SEC 1000000000LL + +#define shift_right(x, s) ({ \ + __typeof__(x) __x = (x); \ + __typeof__(s) __s = (s); \ + __x < 0 ? -(-__x >> __s) : __x >> __s; \ +}) + +long long llabs(long long val) +{ + if (val < 0) + val = -val; + return val; +} + +unsigned long long ts_to_nsec(struct timespec ts) +{ + return ts.tv_sec * NSEC_PER_SEC + ts.tv_nsec; +} + +struct timespec nsec_to_ts(long long ns) +{ + struct timespec ts; + + ts.tv_sec = ns/NSEC_PER_SEC; + ts.tv_nsec = ns%NSEC_PER_SEC; + return ts; +} + +long long diff_timespec(struct timespec start, struct timespec end) +{ + long long start_ns, end_ns; + + start_ns = ts_to_nsec(start); + end_ns = ts_to_nsec(end); + return end_ns - start_ns; +} + +void get_monotonic_and_raw(struct timespec *mon, struct timespec *raw) +{ + struct timespec start, mid, end; + long long diff = 0, tmp; + int i; + + for (i = 0; i < 3; i++) { + long long newdiff; + + clock_gettime(CLOCK_MONOTONIC, &start); + clock_gettime(CLOCK_MONOTONIC_RAW, &mid); + clock_gettime(CLOCK_MONOTONIC, &end); + + newdiff = diff_timespec(start, end); + if (diff == 0 || newdiff < diff) { + diff = newdiff; + *raw = mid; + tmp = (ts_to_nsec(start) + ts_to_nsec(end))/2; + *mon = nsec_to_ts(tmp); + } + } +} + +int main(int argv, char **argc) +{ + struct timespec mon, raw, start, end; + long long delta1, delta2, interval, eppm, ppm; + struct timex tx1, tx2; + + setbuf(stdout, NULL); + + if (clock_gettime(CLOCK_MONOTONIC_RAW, &raw)) { + printf("ERR: NO CLOCK_MONOTONIC_RAW\n"); + return -1; + } + + tx1.modes = 0; + adjtimex(&tx1); + get_monotonic_and_raw(&mon, &raw); + start = mon; + delta1 = diff_timespec(mon, raw); + + if (tx1.offset) + printf("WARNING: ADJ_OFFSET in progress, this will cause inaccurate results\n"); + + printf("Estimating clock drift: "); + sleep(120); + + get_monotonic_and_raw(&mon, &raw); + end = mon; + tx2.modes = 0; + adjtimex(&tx2); + delta2 = diff_timespec(mon, raw); + + interval = diff_timespec(start, end); + + /* calculate measured ppm between MONOTONIC and MONOTONIC_RAW */ + eppm = ((delta2-delta1)*NSEC_PER_SEC)/interval; + eppm = -eppm; + printf("%lld.%i(est)", eppm/1000, abs((int)(eppm%1000))); + + /* Avg the two actual freq samples adjtimex gave us */ + ppm = (tx1.freq + tx2.freq) * 1000 / 2; + ppm = (long long)tx1.freq * 1000; + ppm = shift_right(ppm, 16); + printf(" %lld.%i(act)", ppm/1000, abs((int)(ppm%1000))); + + if (llabs(eppm - ppm) > 1000) { + printf(" [FAILED]\n"); + return ksft_exit_fail(); + } + printf(" [OK]\n"); + return ksft_exit_pass(); +} diff --git a/tools/testing/selftests/timers/rtctest.c b/tools/testing/selftests/timers/rtctest.c new file mode 100644 index 000000000000..d80ae852334d --- /dev/null +++ b/tools/testing/selftests/timers/rtctest.c @@ -0,0 +1,271 @@ +/* + * Real Time Clock Driver Test/Example Program + * + * Compile with: + * gcc -s -Wall -Wstrict-prototypes rtctest.c -o rtctest + * + * Copyright (C) 1996, Paul Gortmaker. + * + * Released under the GNU General Public License, version 2, + * included herein by reference. + * + */ + +#include <stdio.h> +#include <linux/rtc.h> +#include <sys/ioctl.h> +#include <sys/time.h> +#include <sys/types.h> +#include <fcntl.h> +#include <unistd.h> +#include <stdlib.h> +#include <errno.h> + + +/* + * This expects the new RTC class driver framework, working with + * clocks that will often not be clones of what the PC-AT had. + * Use the command line to specify another RTC if you need one. + */ +static const char default_rtc[] = "/dev/rtc0"; + + +int main(int argc, char **argv) +{ + int i, fd, retval, irqcount = 0; + unsigned long tmp, data; + struct rtc_time rtc_tm; + const char *rtc = default_rtc; + struct timeval start, end, diff; + + switch (argc) { + case 2: + rtc = argv[1]; + /* FALLTHROUGH */ + case 1: + break; + default: + fprintf(stderr, "usage: rtctest [rtcdev]\n"); + return 1; + } + + fd = open(rtc, O_RDONLY); + + if (fd == -1) { + perror(rtc); + exit(errno); + } + + fprintf(stderr, "\n\t\t\tRTC Driver Test Example.\n\n"); + + /* Turn on update interrupts (one per second) */ + retval = ioctl(fd, RTC_UIE_ON, 0); + if (retval == -1) { + if (errno == ENOTTY) { + fprintf(stderr, + "\n...Update IRQs not supported.\n"); + goto test_READ; + } + perror("RTC_UIE_ON ioctl"); + exit(errno); + } + + fprintf(stderr, "Counting 5 update (1/sec) interrupts from reading %s:", + rtc); + fflush(stderr); + for (i=1; i<6; i++) { + /* This read will block */ + retval = read(fd, &data, sizeof(unsigned long)); + if (retval == -1) { + perror("read"); + exit(errno); + } + fprintf(stderr, " %d",i); + fflush(stderr); + irqcount++; + } + + fprintf(stderr, "\nAgain, from using select(2) on /dev/rtc:"); + fflush(stderr); + for (i=1; i<6; i++) { + struct timeval tv = {5, 0}; /* 5 second timeout on select */ + fd_set readfds; + + FD_ZERO(&readfds); + FD_SET(fd, &readfds); + /* The select will wait until an RTC interrupt happens. */ + retval = select(fd+1, &readfds, NULL, NULL, &tv); + if (retval == -1) { + perror("select"); + exit(errno); + } + /* This read won't block unlike the select-less case above. */ + retval = read(fd, &data, sizeof(unsigned long)); + if (retval == -1) { + perror("read"); + exit(errno); + } + fprintf(stderr, " %d",i); + fflush(stderr); + irqcount++; + } + + /* Turn off update interrupts */ + retval = ioctl(fd, RTC_UIE_OFF, 0); + if (retval == -1) { + perror("RTC_UIE_OFF ioctl"); + exit(errno); + } + +test_READ: + /* Read the RTC time/date */ + retval = ioctl(fd, RTC_RD_TIME, &rtc_tm); + if (retval == -1) { + perror("RTC_RD_TIME ioctl"); + exit(errno); + } + + fprintf(stderr, "\n\nCurrent RTC date/time is %d-%d-%d, %02d:%02d:%02d.\n", + rtc_tm.tm_mday, rtc_tm.tm_mon + 1, rtc_tm.tm_year + 1900, + rtc_tm.tm_hour, rtc_tm.tm_min, rtc_tm.tm_sec); + + /* Set the alarm to 5 sec in the future, and check for rollover */ + rtc_tm.tm_sec += 5; + if (rtc_tm.tm_sec >= 60) { + rtc_tm.tm_sec %= 60; + rtc_tm.tm_min++; + } + if (rtc_tm.tm_min == 60) { + rtc_tm.tm_min = 0; + rtc_tm.tm_hour++; + } + if (rtc_tm.tm_hour == 24) + rtc_tm.tm_hour = 0; + + retval = ioctl(fd, RTC_ALM_SET, &rtc_tm); + if (retval == -1) { + if (errno == ENOTTY) { + fprintf(stderr, + "\n...Alarm IRQs not supported.\n"); + goto test_PIE; + } + perror("RTC_ALM_SET ioctl"); + exit(errno); + } + + /* Read the current alarm settings */ + retval = ioctl(fd, RTC_ALM_READ, &rtc_tm); + if (retval == -1) { + perror("RTC_ALM_READ ioctl"); + exit(errno); + } + + fprintf(stderr, "Alarm time now set to %02d:%02d:%02d.\n", + rtc_tm.tm_hour, rtc_tm.tm_min, rtc_tm.tm_sec); + + /* Enable alarm interrupts */ + retval = ioctl(fd, RTC_AIE_ON, 0); + if (retval == -1) { + perror("RTC_AIE_ON ioctl"); + exit(errno); + } + + fprintf(stderr, "Waiting 5 seconds for alarm..."); + fflush(stderr); + /* This blocks until the alarm ring causes an interrupt */ + retval = read(fd, &data, sizeof(unsigned long)); + if (retval == -1) { + perror("read"); + exit(errno); + } + irqcount++; + fprintf(stderr, " okay. Alarm rang.\n"); + + /* Disable alarm interrupts */ + retval = ioctl(fd, RTC_AIE_OFF, 0); + if (retval == -1) { + perror("RTC_AIE_OFF ioctl"); + exit(errno); + } + +test_PIE: + /* Read periodic IRQ rate */ + retval = ioctl(fd, RTC_IRQP_READ, &tmp); + if (retval == -1) { + /* not all RTCs support periodic IRQs */ + if (errno == ENOTTY) { + fprintf(stderr, "\nNo periodic IRQ support\n"); + goto done; + } + perror("RTC_IRQP_READ ioctl"); + exit(errno); + } + fprintf(stderr, "\nPeriodic IRQ rate is %ldHz.\n", tmp); + + fprintf(stderr, "Counting 20 interrupts at:"); + fflush(stderr); + + /* The frequencies 128Hz, 256Hz, ... 8192Hz are only allowed for root. */ + for (tmp=2; tmp<=64; tmp*=2) { + + retval = ioctl(fd, RTC_IRQP_SET, tmp); + if (retval == -1) { + /* not all RTCs can change their periodic IRQ rate */ + if (errno == ENOTTY) { + fprintf(stderr, + "\n...Periodic IRQ rate is fixed\n"); + goto done; + } + perror("RTC_IRQP_SET ioctl"); + exit(errno); + } + + fprintf(stderr, "\n%ldHz:\t", tmp); + fflush(stderr); + + /* Enable periodic interrupts */ + retval = ioctl(fd, RTC_PIE_ON, 0); + if (retval == -1) { + perror("RTC_PIE_ON ioctl"); + exit(errno); + } + + for (i=1; i<21; i++) { + gettimeofday(&start, NULL); + /* This blocks */ + retval = read(fd, &data, sizeof(unsigned long)); + if (retval == -1) { + perror("read"); + exit(errno); + } + gettimeofday(&end, NULL); + timersub(&end, &start, &diff); + if (diff.tv_sec > 0 || + diff.tv_usec > ((1000000L / tmp) * 1.10)) { + fprintf(stderr, "\nPIE delta error: %ld.%06ld should be close to 0.%06ld\n", + diff.tv_sec, diff.tv_usec, + (1000000L / tmp)); + fflush(stdout); + exit(-1); + } + + fprintf(stderr, " %d",i); + fflush(stderr); + irqcount++; + } + + /* Disable periodic interrupts */ + retval = ioctl(fd, RTC_PIE_OFF, 0); + if (retval == -1) { + perror("RTC_PIE_OFF ioctl"); + exit(errno); + } + } + +done: + fprintf(stderr, "\n\n\t\t\t *** Test complete ***\n"); + + close(fd); + + return 0; +} diff --git a/tools/testing/selftests/timers/set-2038.c b/tools/testing/selftests/timers/set-2038.c new file mode 100644 index 000000000000..c8a7e14446b1 --- /dev/null +++ b/tools/testing/selftests/timers/set-2038.c @@ -0,0 +1,144 @@ +/* Time bounds setting test + * by: john stultz (johnstul@us.ibm.com) + * (C) Copyright IBM 2012 + * Licensed under the GPLv2 + * + * NOTE: This is a meta-test which sets the time to edge cases then + * uses other tests to detect problems. Thus this test requires that + * the inconsistency-check and nanosleep tests be present in the same + * directory it is run from. + * + * To build: + * $ gcc set-2038.c -o set-2038 -lrt + * + * This program is free software: you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation, either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + */ + +#include <stdio.h> +#include <stdlib.h> +#include <unistd.h> +#include <time.h> +#include <sys/time.h> +#ifdef KTEST +#include "../kselftest.h" +#else +static inline int ksft_exit_pass(void) +{ + exit(0); +} +static inline int ksft_exit_fail(void) +{ + exit(1); +} +#endif + +#define NSEC_PER_SEC 1000000000LL + +#define KTIME_MAX ((long long)~((unsigned long long)1 << 63)) +#define KTIME_SEC_MAX (KTIME_MAX / NSEC_PER_SEC) + +#define YEAR_1901 (-0x7fffffffL) +#define YEAR_1970 1 +#define YEAR_2038 0x7fffffffL /*overflows 32bit time_t */ +#define YEAR_2262 KTIME_SEC_MAX /*overflows 64bit ktime_t */ +#define YEAR_MAX ((long long)((1ULL<<63)-1)) /*overflows 64bit time_t */ + +int is32bits(void) +{ + return (sizeof(long) == 4); +} + +int settime(long long time) +{ + struct timeval now; + int ret; + + now.tv_sec = (time_t)time; + now.tv_usec = 0; + + ret = settimeofday(&now, NULL); + + printf("Setting time to 0x%lx: %d\n", (long)time, ret); + return ret; +} + +int do_tests(void) +{ + int ret; + + ret = system("date"); + ret = system("./inconsistency-check -c 0 -t 20"); + ret |= system("./nanosleep"); + ret |= system("./nsleep-lat"); + return ret; + +} + +int main(int argc, char *argv[]) +{ + int ret = 0; + int opt, dangerous = 0; + time_t start; + + /* Process arguments */ + while ((opt = getopt(argc, argv, "d")) != -1) { + switch (opt) { + case 'd': + dangerous = 1; + } + } + + start = time(0); + + /* First test that crazy values don't work */ + if (!settime(YEAR_1901)) { + ret = -1; + goto out; + } + if (!settime(YEAR_MAX)) { + ret = -1; + goto out; + } + if (!is32bits() && !settime(YEAR_2262)) { + ret = -1; + goto out; + } + + /* Now test behavior near edges */ + settime(YEAR_1970); + ret = do_tests(); + if (ret) + goto out; + + settime(YEAR_2038 - 600); + ret = do_tests(); + if (ret) + goto out; + + /* The rest of the tests can blowup on 32bit systems */ + if (is32bits() && !dangerous) + goto out; + /* Test rollover behavior 32bit edge */ + settime(YEAR_2038 - 10); + ret = do_tests(); + if (ret) + goto out; + + settime(YEAR_2262 - 600); + ret = do_tests(); + +out: + /* restore clock */ + settime(start); + if (ret) + return ksft_exit_fail(); + return ksft_exit_pass(); +} diff --git a/tools/testing/selftests/timers/set-tai.c b/tools/testing/selftests/timers/set-tai.c new file mode 100644 index 000000000000..dc88dbc8831f --- /dev/null +++ b/tools/testing/selftests/timers/set-tai.c @@ -0,0 +1,79 @@ +/* Set tai offset + * by: John Stultz <john.stultz@linaro.org> + * (C) Copyright Linaro 2013 + * Licensed under the GPLv2 + * + * This program is free software: you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation, either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + */ + + +#include <stdio.h> +#include <stdlib.h> +#include <time.h> +#include <sys/time.h> +#include <sys/timex.h> +#include <string.h> +#include <signal.h> +#include <unistd.h> +#ifdef KTEST +#include "../kselftest.h" +#else +static inline int ksft_exit_pass(void) +{ + exit(0); +} +static inline int ksft_exit_fail(void) +{ + exit(1); +} +#endif + +int set_tai(int offset) +{ + struct timex tx; + + memset(&tx, 0, sizeof(tx)); + + tx.modes = ADJ_TAI; + tx.constant = offset; + + return adjtimex(&tx); +} + +int get_tai(void) +{ + struct timex tx; + + memset(&tx, 0, sizeof(tx)); + + adjtimex(&tx); + return tx.tai; +} + +int main(int argc, char **argv) +{ + int i, ret; + + ret = get_tai(); + printf("tai offset started at %i\n", ret); + + printf("Checking tai offsets can be properly set: "); + for (i = 1; i <= 60; i++) { + ret = set_tai(i); + ret = get_tai(); + if (ret != i) { + printf("[FAILED] expected: %i got %i\n", i, ret); + return ksft_exit_fail(); + } + } + printf("[OK]\n"); + return ksft_exit_pass(); +} diff --git a/tools/testing/selftests/timers/set-timer-lat.c b/tools/testing/selftests/timers/set-timer-lat.c new file mode 100644 index 000000000000..4fc98c5b0899 --- /dev/null +++ b/tools/testing/selftests/timers/set-timer-lat.c @@ -0,0 +1,216 @@ +/* set_timer latency test + * John Stultz (john.stultz@linaro.org) + * (C) Copyright Linaro 2014 + * Licensed under the GPLv2 + * + * This test makes sure the set_timer api is correct + * + * To build: + * $ gcc set-timer-lat.c -o set-timer-lat -lrt + * + * This program is free software: you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation, either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + */ + + +#include <stdio.h> +#include <unistd.h> +#include <time.h> +#include <string.h> +#include <signal.h> +#include <stdlib.h> +#include <pthread.h> +#ifdef KTEST +#include "../kselftest.h" +#else +static inline int ksft_exit_pass(void) +{ + exit(0); +} +static inline int ksft_exit_fail(void) +{ + exit(1); +} +#endif + +#define CLOCK_REALTIME 0 +#define CLOCK_MONOTONIC 1 +#define CLOCK_PROCESS_CPUTIME_ID 2 +#define CLOCK_THREAD_CPUTIME_ID 3 +#define CLOCK_MONOTONIC_RAW 4 +#define CLOCK_REALTIME_COARSE 5 +#define CLOCK_MONOTONIC_COARSE 6 +#define CLOCK_BOOTTIME 7 +#define CLOCK_REALTIME_ALARM 8 +#define CLOCK_BOOTTIME_ALARM 9 +#define CLOCK_HWSPECIFIC 10 +#define CLOCK_TAI 11 +#define NR_CLOCKIDS 12 + + +#define NSEC_PER_SEC 1000000000ULL +#define UNRESONABLE_LATENCY 40000000 /* 40ms in nanosecs */ + +#define TIMER_SECS 1 +int alarmcount; +int clock_id; +struct timespec start_time; +long long max_latency_ns; + +char *clockstring(int clockid) +{ + switch (clockid) { + case CLOCK_REALTIME: + return "CLOCK_REALTIME"; + case CLOCK_MONOTONIC: + return "CLOCK_MONOTONIC"; + case CLOCK_PROCESS_CPUTIME_ID: + return "CLOCK_PROCESS_CPUTIME_ID"; + case CLOCK_THREAD_CPUTIME_ID: + return "CLOCK_THREAD_CPUTIME_ID"; + case CLOCK_MONOTONIC_RAW: + return "CLOCK_MONOTONIC_RAW"; + case CLOCK_REALTIME_COARSE: + return "CLOCK_REALTIME_COARSE"; + case CLOCK_MONOTONIC_COARSE: + return "CLOCK_MONOTONIC_COARSE"; + case CLOCK_BOOTTIME: + return "CLOCK_BOOTTIME"; + case CLOCK_REALTIME_ALARM: + return "CLOCK_REALTIME_ALARM"; + case CLOCK_BOOTTIME_ALARM: + return "CLOCK_BOOTTIME_ALARM"; + case CLOCK_TAI: + return "CLOCK_TAI"; + }; + return "UNKNOWN_CLOCKID"; +} + + +long long timespec_sub(struct timespec a, struct timespec b) +{ + long long ret = NSEC_PER_SEC * b.tv_sec + b.tv_nsec; + + ret -= NSEC_PER_SEC * a.tv_sec + a.tv_nsec; + return ret; +} + + +void sigalarm(int signo) +{ + long long delta_ns; + struct timespec ts; + + clock_gettime(clock_id, &ts); + alarmcount++; + + delta_ns = timespec_sub(start_time, ts); + delta_ns -= NSEC_PER_SEC * TIMER_SECS * alarmcount; + + if (delta_ns < 0) + printf("%s timer fired early: FAIL\n", clockstring(clock_id)); + + if (delta_ns > max_latency_ns) + max_latency_ns = delta_ns; +} + +int do_timer(int clock_id, int flags) +{ + struct sigevent se; + timer_t tm1; + struct itimerspec its1, its2; + int err; + + /* Set up timer: */ + memset(&se, 0, sizeof(se)); + se.sigev_notify = SIGEV_SIGNAL; + se.sigev_signo = SIGRTMAX; + se.sigev_value.sival_int = 0; + + max_latency_ns = 0; + alarmcount = 0; + + err = timer_create(clock_id, &se, &tm1); + if (err) { + if ((clock_id == CLOCK_REALTIME_ALARM) || + (clock_id == CLOCK_BOOTTIME_ALARM)) { + printf("%-22s %s missing CAP_WAKE_ALARM? : [UNSUPPORTED]\n", + clockstring(clock_id), + flags ? "ABSTIME":"RELTIME"); + return 0; + } + printf("%s - timer_create() failed\n", clockstring(clock_id)); + return -1; + } + + clock_gettime(clock_id, &start_time); + if (flags) { + its1.it_value = start_time; + its1.it_value.tv_sec += TIMER_SECS; + } else { + its1.it_value.tv_sec = TIMER_SECS; + its1.it_value.tv_nsec = 0; + } + its1.it_interval.tv_sec = TIMER_SECS; + its1.it_interval.tv_nsec = 0; + + err = timer_settime(tm1, flags, &its1, &its2); + if (err) { + printf("%s - timer_settime() failed\n", clockstring(clock_id)); + return -1; + } + + while (alarmcount < 5) + sleep(1); + + printf("%-22s %s max latency: %10lld ns : ", + clockstring(clock_id), + flags ? "ABSTIME":"RELTIME", + max_latency_ns); + + timer_delete(tm1); + if (max_latency_ns < UNRESONABLE_LATENCY) { + printf("[OK]\n"); + return 0; + } + printf("[FAILED]\n"); + return -1; +} + +int main(void) +{ + struct sigaction act; + int signum = SIGRTMAX; + int ret = 0; + + /* Set up signal handler: */ + sigfillset(&act.sa_mask); + act.sa_flags = 0; + act.sa_handler = sigalarm; + sigaction(signum, &act, NULL); + + printf("Setting timers for every %i seconds\n", TIMER_SECS); + for (clock_id = 0; clock_id < NR_CLOCKIDS; clock_id++) { + + if ((clock_id == CLOCK_PROCESS_CPUTIME_ID) || + (clock_id == CLOCK_THREAD_CPUTIME_ID) || + (clock_id == CLOCK_MONOTONIC_RAW) || + (clock_id == CLOCK_REALTIME_COARSE) || + (clock_id == CLOCK_MONOTONIC_COARSE) || + (clock_id == CLOCK_HWSPECIFIC)) + continue; + + ret |= do_timer(clock_id, TIMER_ABSTIME); + ret |= do_timer(clock_id, 0); + } + if (ret) + return ksft_exit_fail(); + return ksft_exit_pass(); +} diff --git a/tools/testing/selftests/timers/skew_consistency.c b/tools/testing/selftests/timers/skew_consistency.c new file mode 100644 index 000000000000..5562f84ee07c --- /dev/null +++ b/tools/testing/selftests/timers/skew_consistency.c @@ -0,0 +1,89 @@ +/* ADJ_FREQ Skew consistency test + * by: john stultz (johnstul@us.ibm.com) + * (C) Copyright IBM 2012 + * Licensed under the GPLv2 + * + * NOTE: This is a meta-test which cranks the ADJ_FREQ knob back + * and forth and watches for consistency problems. Thus this test requires + * that the inconsistency-check tests be present in the same directory it + * is run from. + * + * To build: + * $ gcc skew_consistency.c -o skew_consistency -lrt + * + * This program is free software: you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation, either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + */ + + +#include <stdio.h> +#include <stdlib.h> +#include <unistd.h> +#include <sys/time.h> +#include <sys/timex.h> +#include <time.h> +#include <sys/types.h> +#include <sys/stat.h> +#include <fcntl.h> +#include <stdlib.h> +#include <string.h> +#include <sys/wait.h> +#ifdef KTEST +#include "../kselftest.h" +#else +static inline int ksft_exit_pass(void) +{ + exit(0); +} +static inline int ksft_exit_fail(void) +{ + exit(1); +} +#endif + +#define NSEC_PER_SEC 1000000000LL + +int main(int argv, char **argc) +{ + struct timex tx; + int ret, ppm; + pid_t pid; + + + printf("Running Asyncrhonous Frequency Changing Tests...\n"); + + pid = fork(); + if (!pid) + return system("./inconsistency-check -c 1 -t 600"); + + ppm = 500; + ret = 0; + + while (pid != waitpid(pid, &ret, WNOHANG)) { + ppm = -ppm; + tx.modes = ADJ_FREQUENCY; + tx.freq = ppm << 16; + adjtimex(&tx); + usleep(500000); + } + + /* Set things back */ + tx.modes = ADJ_FREQUENCY; + tx.offset = 0; + adjtimex(&tx); + + + if (ret) { + printf("[FAILED]\n"); + return ksft_exit_fail(); + } + printf("[OK]\n"); + return ksft_exit_pass(); +} diff --git a/tools/testing/selftests/timers/threadtest.c b/tools/testing/selftests/timers/threadtest.c new file mode 100644 index 000000000000..e632e116f05e --- /dev/null +++ b/tools/testing/selftests/timers/threadtest.c @@ -0,0 +1,204 @@ +/* threadtest.c + * by: john stultz (johnstul@us.ibm.com) + * (C) Copyright IBM 2004, 2005, 2006, 2012 + * Licensed under the GPLv2 + * + * To build: + * $ gcc threadtest.c -o threadtest -lrt + * + * This program is free software: you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation, either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + */ +#include <stdio.h> +#include <unistd.h> +#include <stdlib.h> +#include <sys/time.h> +#include <pthread.h> +#ifdef KTEST +#include "../kselftest.h" +#else +static inline int ksft_exit_pass(void) +{ + exit(0); +} +static inline int ksft_exit_fail(void) +{ + exit(1); +} +#endif + + +/* serializes shared list access */ +pthread_mutex_t list_lock = PTHREAD_MUTEX_INITIALIZER; +/* serializes console output */ +pthread_mutex_t print_lock = PTHREAD_MUTEX_INITIALIZER; + + +#define MAX_THREADS 128 +#define LISTSIZE 128 + +int done = 0; + +struct timespec global_list[LISTSIZE]; +int listcount = 0; + + +void checklist(struct timespec *list, int size) +{ + int i, j; + struct timespec *a, *b; + + /* scan the list */ + for (i = 0; i < size-1; i++) { + a = &list[i]; + b = &list[i+1]; + + /* look for any time inconsistencies */ + if ((b->tv_sec <= a->tv_sec) && + (b->tv_nsec < a->tv_nsec)) { + + /* flag other threads */ + done = 1; + + /*serialize printing to avoid junky output*/ + pthread_mutex_lock(&print_lock); + + /* dump the list */ + printf("\n"); + for (j = 0; j < size; j++) { + if (j == i) + printf("---------------\n"); + printf("%lu:%lu\n", list[j].tv_sec, list[j].tv_nsec); + if (j == i+1) + printf("---------------\n"); + } + printf("[FAILED]\n"); + + pthread_mutex_unlock(&print_lock); + } + } +} + +/* The shared thread shares a global list + * that each thread fills while holding the lock. + * This stresses clock syncronization across cpus. + */ +void *shared_thread(void *arg) +{ + while (!done) { + /* protect the list */ + pthread_mutex_lock(&list_lock); + + /* see if we're ready to check the list */ + if (listcount >= LISTSIZE) { + checklist(global_list, LISTSIZE); + listcount = 0; + } + clock_gettime(CLOCK_MONOTONIC, &global_list[listcount++]); + + pthread_mutex_unlock(&list_lock); + } + return NULL; +} + + +/* Each independent thread fills in its own + * list. This stresses clock_gettime() lock contention. + */ +void *independent_thread(void *arg) +{ + struct timespec my_list[LISTSIZE]; + int count; + + while (!done) { + /* fill the list */ + for (count = 0; count < LISTSIZE; count++) + clock_gettime(CLOCK_MONOTONIC, &my_list[count]); + checklist(my_list, LISTSIZE); + } + return NULL; +} + +#define DEFAULT_THREAD_COUNT 8 +#define DEFAULT_RUNTIME 30 + +int main(int argc, char **argv) +{ + int thread_count, i; + time_t start, now, runtime; + char buf[255]; + pthread_t pth[MAX_THREADS]; + int opt; + void *tret; + int ret = 0; + void *(*thread)(void *) = shared_thread; + + thread_count = DEFAULT_THREAD_COUNT; + runtime = DEFAULT_RUNTIME; + + /* Process arguments */ + while ((opt = getopt(argc, argv, "t:n:i")) != -1) { + switch (opt) { + case 't': + runtime = atoi(optarg); + break; + case 'n': + thread_count = atoi(optarg); + break; + case 'i': + thread = independent_thread; + printf("using independent threads\n"); + break; + default: + printf("Usage: %s [-t <secs>] [-n <numthreads>] [-i]\n", argv[0]); + printf(" -t: time to run\n"); + printf(" -n: number of threads\n"); + printf(" -i: use independent threads\n"); + return -1; + } + } + + if (thread_count > MAX_THREADS) + thread_count = MAX_THREADS; + + + setbuf(stdout, NULL); + + start = time(0); + strftime(buf, 255, "%a, %d %b %Y %T %z", localtime(&start)); + printf("%s\n", buf); + printf("Testing consistency with %i threads for %ld seconds: ", thread_count, runtime); + + /* spawn */ + for (i = 0; i < thread_count; i++) + pthread_create(&pth[i], 0, thread, 0); + + while (time(&now) < start + runtime) { + sleep(1); + if (done) { + ret = 1; + strftime(buf, 255, "%a, %d %b %Y %T %z", localtime(&now)); + printf("%s\n", buf); + goto out; + } + } + printf("[OK]\n"); + done = 1; + +out: + /* wait */ + for (i = 0; i < thread_count; i++) + pthread_join(pth[i], &tret); + + /* die */ + if (ret) + ksft_exit_fail(); + return ksft_exit_pass(); +} diff --git a/tools/testing/selftests/timers/valid-adjtimex.c b/tools/testing/selftests/timers/valid-adjtimex.c new file mode 100644 index 000000000000..e86d937cc22c --- /dev/null +++ b/tools/testing/selftests/timers/valid-adjtimex.c @@ -0,0 +1,202 @@ +/* valid adjtimex test + * by: John Stultz <john.stultz@linaro.org> + * (C) Copyright Linaro 2015 + * Licensed under the GPLv2 + * + * This test validates adjtimex interface with valid + * and invalid test data. + * + * Usage: valid-adjtimex + * + * To build: + * $ gcc valid-adjtimex.c -o valid-adjtimex -lrt + * + * This program is free software: you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation, either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + */ + + + +#include <stdio.h> +#include <stdlib.h> +#include <time.h> +#include <sys/time.h> +#include <sys/timex.h> +#include <string.h> +#include <signal.h> +#include <unistd.h> +#ifdef KTEST +#include "../kselftest.h" +#else +static inline int ksft_exit_pass(void) +{ + exit(0); +} +static inline int ksft_exit_fail(void) +{ + exit(1); +} +#endif + +#define NSEC_PER_SEC 1000000000L + +/* clear NTP time_status & time_state */ +int clear_time_state(void) +{ + struct timex tx; + int ret; + + tx.modes = ADJ_STATUS; + tx.status = 0; + ret = adjtimex(&tx); + return ret; +} + +#define NUM_FREQ_VALID 32 +#define NUM_FREQ_OUTOFRANGE 4 +#define NUM_FREQ_INVALID 2 + +long valid_freq[NUM_FREQ_VALID] = { + -499<<16, + -450<<16, + -400<<16, + -350<<16, + -300<<16, + -250<<16, + -200<<16, + -150<<16, + -100<<16, + -75<<16, + -50<<16, + -25<<16, + -10<<16, + -5<<16, + -1<<16, + -1000, + 1<<16, + 5<<16, + 10<<16, + 25<<16, + 50<<16, + 75<<16, + 100<<16, + 150<<16, + 200<<16, + 250<<16, + 300<<16, + 350<<16, + 400<<16, + 450<<16, + 499<<16, +}; + +long outofrange_freq[NUM_FREQ_OUTOFRANGE] = { + -1000<<16, + -550<<16, + 550<<16, + 1000<<16, +}; + +#define LONG_MAX (~0UL>>1) +#define LONG_MIN (-LONG_MAX - 1) + +long invalid_freq[NUM_FREQ_INVALID] = { + LONG_MAX, + LONG_MIN, +}; + +int validate_freq(void) +{ + struct timex tx; + int ret, pass = 0; + int i; + + clear_time_state(); + + memset(&tx, 0, sizeof(struct timex)); + /* Set the leap second insert flag */ + + printf("Testing ADJ_FREQ... "); + for (i = 0; i < NUM_FREQ_VALID; i++) { + tx.modes = ADJ_FREQUENCY; + tx.freq = valid_freq[i]; + + ret = adjtimex(&tx); + if (ret < 0) { + printf("[FAIL]\n"); + printf("Error: adjtimex(ADJ_FREQ, %ld - %ld ppm\n", + valid_freq[i], valid_freq[i]>>16); + pass = -1; + goto out; + } + tx.modes = 0; + ret = adjtimex(&tx); + if (tx.freq != valid_freq[i]) { + printf("Warning: freq value %ld not what we set it (%ld)!\n", + tx.freq, valid_freq[i]); + } + } + for (i = 0; i < NUM_FREQ_OUTOFRANGE; i++) { + tx.modes = ADJ_FREQUENCY; + tx.freq = outofrange_freq[i]; + + ret = adjtimex(&tx); + if (ret < 0) { + printf("[FAIL]\n"); + printf("Error: adjtimex(ADJ_FREQ, %ld - %ld ppm\n", + outofrange_freq[i], outofrange_freq[i]>>16); + pass = -1; + goto out; + } + tx.modes = 0; + ret = adjtimex(&tx); + if (tx.freq == outofrange_freq[i]) { + printf("[FAIL]\n"); + printf("ERROR: out of range value %ld actually set!\n", + tx.freq); + pass = -1; + goto out; + } + } + + + if (sizeof(long) == 8) { /* this case only applies to 64bit systems */ + for (i = 0; i < NUM_FREQ_INVALID; i++) { + tx.modes = ADJ_FREQUENCY; + tx.freq = invalid_freq[i]; + ret = adjtimex(&tx); + if (ret >= 0) { + printf("[FAIL]\n"); + printf("Error: No failure on invalid ADJ_FREQUENCY %ld\n", + invalid_freq[i]); + pass = -1; + goto out; + } + } + } + + printf("[OK]\n"); +out: + /* reset freq to zero */ + tx.modes = ADJ_FREQUENCY; + tx.freq = 0; + ret = adjtimex(&tx); + + return pass; +} + + +int main(int argc, char **argv) +{ + if (validate_freq()) + return ksft_exit_fail(); + + return ksft_exit_pass(); +} diff --git a/tools/testing/selftests/user/Makefile b/tools/testing/selftests/user/Makefile index 12c9d15bab07..d401b63c5b1a 100644 --- a/tools/testing/selftests/user/Makefile +++ b/tools/testing/selftests/user/Makefile @@ -3,5 +3,6 @@ # No binaries, but make sure arg-less "make" doesn't trigger "run_tests" all: -run_tests: all - ./test_user_copy.sh +TEST_PROGS := test_user_copy.sh + +include ../lib.mk diff --git a/tools/testing/selftests/vm/Makefile b/tools/testing/selftests/vm/Makefile index 077828c889f1..a5ce9534eb15 100644 --- a/tools/testing/selftests/vm/Makefile +++ b/tools/testing/selftests/vm/Makefile @@ -1,6 +1,5 @@ # Makefile for vm selftests -CC = $(CROSS_COMPILE)gcc CFLAGS = -Wall BINARIES = hugepage-mmap hugepage-shm map_hugetlb thuge-gen hugetlbfstest BINARIES += transhuge-stress @@ -9,8 +8,10 @@ all: $(BINARIES) %: %.c $(CC) $(CFLAGS) -o $@ $^ -lrt -run_tests: all - @/bin/sh ./run_vmtests || (echo "vmtests: [FAIL]"; exit 1) +TEST_PROGS := run_vmtests +TEST_FILES := $(BINARIES) + +include ../lib.mk clean: $(RM) $(BINARIES) diff --git a/tools/testing/selftests/vm/run_vmtests b/tools/testing/selftests/vm/run_vmtests index c87b6812300d..c87b6812300d 100644..100755 --- a/tools/testing/selftests/vm/run_vmtests +++ b/tools/testing/selftests/vm/run_vmtests diff --git a/tools/testing/selftests/x86/.gitignore b/tools/testing/selftests/x86/.gitignore new file mode 100644 index 000000000000..15034fef9698 --- /dev/null +++ b/tools/testing/selftests/x86/.gitignore @@ -0,0 +1,2 @@ +*_32 +*_64 diff --git a/tools/testing/selftests/x86/Makefile b/tools/testing/selftests/x86/Makefile new file mode 100644 index 000000000000..f0a7918178dd --- /dev/null +++ b/tools/testing/selftests/x86/Makefile @@ -0,0 +1,48 @@ +.PHONY: all all_32 all_64 check_build32 clean run_tests + +TARGETS_C_BOTHBITS := sigreturn + +BINARIES_32 := $(TARGETS_C_BOTHBITS:%=%_32) +BINARIES_64 := $(TARGETS_C_BOTHBITS:%=%_64) + +CFLAGS := -O2 -g -std=gnu99 -pthread -Wall + +UNAME_P := $(shell uname -p) + +# Always build 32-bit tests +all: all_32 + +# If we're on a 64-bit host, build 64-bit tests as well +ifeq ($(shell uname -p),x86_64) +all: all_64 +endif + +all_32: check_build32 $(BINARIES_32) + +all_64: $(BINARIES_64) + +clean: + $(RM) $(BINARIES_32) $(BINARIES_64) + +run_tests: + ./run_x86_tests.sh + +$(TARGETS_C_BOTHBITS:%=%_32): %_32: %.c + $(CC) -m32 -o $@ $(CFLAGS) $(EXTRA_CFLAGS) $^ -lrt -ldl + +$(TARGETS_C_BOTHBITS:%=%_64): %_64: %.c + $(CC) -m64 -o $@ $(CFLAGS) $(EXTRA_CFLAGS) $^ -lrt -ldl + +check_build32: + @if ! $(CC) -m32 -o /dev/null trivial_32bit_program.c; then \ + echo "Warning: you seem to have a broken 32-bit build" 2>&1; \ + echo "environment. If you are using a Debian-like"; \ + echo " distribution, try:"; \ + echo ""; \ + echo " apt-get install gcc-multilib libc6-i386 libc6-dev-i386"; \ + echo ""; \ + echo "If you are using a Fedora-like distribution, try:"; \ + echo ""; \ + echo " yum install glibc-devel.*i686"; \ + exit 1; \ + fi diff --git a/tools/testing/selftests/x86/run_x86_tests.sh b/tools/testing/selftests/x86/run_x86_tests.sh new file mode 100644 index 000000000000..3d3ec65f3e7c --- /dev/null +++ b/tools/testing/selftests/x86/run_x86_tests.sh @@ -0,0 +1,11 @@ +#!/bin/bash + +# This is deliberately minimal. IMO kselftests should provide a standard +# script here. +./sigreturn_32 || exit 1 + +if [[ "$uname -p" -eq "x86_64" ]]; then + ./sigreturn_64 || exit 1 +fi + +exit 0 diff --git a/tools/testing/selftests/x86/sigreturn.c b/tools/testing/selftests/x86/sigreturn.c new file mode 100644 index 000000000000..b5aa1bab7416 --- /dev/null +++ b/tools/testing/selftests/x86/sigreturn.c @@ -0,0 +1,684 @@ +/* + * sigreturn.c - tests for x86 sigreturn(2) and exit-to-userspace + * Copyright (c) 2014-2015 Andrew Lutomirski + * + * This program is free software; you can redistribute it and/or modify + * it under the terms and conditions of the GNU General Public License, + * version 2, as published by the Free Software Foundation. + * + * This program is distributed in the hope it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * General Public License for more details. + * + * This is a series of tests that exercises the sigreturn(2) syscall and + * the IRET / SYSRET paths in the kernel. + * + * For now, this focuses on the effects of unusual CS and SS values, + * and it has a bunch of tests to make sure that ESP/RSP is restored + * properly. + * + * The basic idea behind these tests is to raise(SIGUSR1) to create a + * sigcontext frame, plug in the values to be tested, and then return, + * which implicitly invokes sigreturn(2) and programs the user context + * as desired. + * + * For tests for which we expect sigreturn and the subsequent return to + * user mode to succeed, we return to a short trampoline that generates + * SIGTRAP so that the meat of the tests can be ordinary C code in a + * SIGTRAP handler. + * + * The inner workings of each test is documented below. + * + * Do not run on outdated, unpatched kernels at risk of nasty crashes. + */ + +#define _GNU_SOURCE + +#include <sys/time.h> +#include <time.h> +#include <stdlib.h> +#include <sys/syscall.h> +#include <unistd.h> +#include <stdio.h> +#include <string.h> +#include <inttypes.h> +#include <sys/mman.h> +#include <sys/signal.h> +#include <sys/ucontext.h> +#include <asm/ldt.h> +#include <err.h> +#include <setjmp.h> +#include <stddef.h> +#include <stdbool.h> +#include <sys/ptrace.h> +#include <sys/user.h> + +/* + * In principle, this test can run on Linux emulation layers (e.g. + * Illumos "LX branded zones"). Solaris-based kernels reserve LDT + * entries 0-5 for their own internal purposes, so start our LDT + * allocations above that reservation. (The tests don't pass on LX + * branded zones, but at least this lets them run.) + */ +#define LDT_OFFSET 6 + +/* An aligned stack accessible through some of our segments. */ +static unsigned char stack16[65536] __attribute__((aligned(4096))); + +/* + * An aligned int3 instruction used as a trampoline. Some of the tests + * want to fish out their ss values, so this trampoline copies ss to eax + * before the int3. + */ +asm (".pushsection .text\n\t" + ".type int3, @function\n\t" + ".align 4096\n\t" + "int3:\n\t" + "mov %ss,%eax\n\t" + "int3\n\t" + ".size int3, . - int3\n\t" + ".align 4096, 0xcc\n\t" + ".popsection"); +extern char int3[4096]; + +/* + * At startup, we prepapre: + * + * - ldt_nonexistent_sel: An LDT entry that doesn't exist (all-zero + * descriptor or out of bounds). + * - code16_sel: A 16-bit LDT code segment pointing to int3. + * - data16_sel: A 16-bit LDT data segment pointing to stack16. + * - npcode32_sel: A 32-bit not-present LDT code segment pointing to int3. + * - npdata32_sel: A 32-bit not-present LDT data segment pointing to stack16. + * - gdt_data16_idx: A 16-bit GDT data segment pointing to stack16. + * - gdt_npdata32_idx: A 32-bit not-present GDT data segment pointing to + * stack16. + * + * For no particularly good reason, xyz_sel is a selector value with the + * RPL and LDT bits filled in, whereas xyz_idx is just an index into the + * descriptor table. These variables will be zero if their respective + * segments could not be allocated. + */ +static unsigned short ldt_nonexistent_sel; +static unsigned short code16_sel, data16_sel, npcode32_sel, npdata32_sel; + +static unsigned short gdt_data16_idx, gdt_npdata32_idx; + +static unsigned short GDT3(int idx) +{ + return (idx << 3) | 3; +} + +static unsigned short LDT3(int idx) +{ + return (idx << 3) | 7; +} + +/* Our sigaltstack scratch space. */ +static char altstack_data[SIGSTKSZ]; + +static void sethandler(int sig, void (*handler)(int, siginfo_t *, void *), + int flags) +{ + struct sigaction sa; + memset(&sa, 0, sizeof(sa)); + sa.sa_sigaction = handler; + sa.sa_flags = SA_SIGINFO | flags; + sigemptyset(&sa.sa_mask); + if (sigaction(sig, &sa, 0)) + err(1, "sigaction"); +} + +static void clearhandler(int sig) +{ + struct sigaction sa; + memset(&sa, 0, sizeof(sa)); + sa.sa_handler = SIG_DFL; + sigemptyset(&sa.sa_mask); + if (sigaction(sig, &sa, 0)) + err(1, "sigaction"); +} + +static void add_ldt(const struct user_desc *desc, unsigned short *var, + const char *name) +{ + if (syscall(SYS_modify_ldt, 1, desc, sizeof(*desc)) == 0) { + *var = LDT3(desc->entry_number); + } else { + printf("[NOTE]\tFailed to create %s segment\n", name); + *var = 0; + } +} + +static void setup_ldt(void) +{ + if ((unsigned long)stack16 > (1ULL << 32) - sizeof(stack16)) + errx(1, "stack16 is too high\n"); + if ((unsigned long)int3 > (1ULL << 32) - sizeof(int3)) + errx(1, "int3 is too high\n"); + + ldt_nonexistent_sel = LDT3(LDT_OFFSET + 2); + + const struct user_desc code16_desc = { + .entry_number = LDT_OFFSET + 0, + .base_addr = (unsigned long)int3, + .limit = 4095, + .seg_32bit = 0, + .contents = 2, /* Code, not conforming */ + .read_exec_only = 0, + .limit_in_pages = 0, + .seg_not_present = 0, + .useable = 0 + }; + add_ldt(&code16_desc, &code16_sel, "code16"); + + const struct user_desc data16_desc = { + .entry_number = LDT_OFFSET + 1, + .base_addr = (unsigned long)stack16, + .limit = 0xffff, + .seg_32bit = 0, + .contents = 0, /* Data, grow-up */ + .read_exec_only = 0, + .limit_in_pages = 0, + .seg_not_present = 0, + .useable = 0 + }; + add_ldt(&data16_desc, &data16_sel, "data16"); + + const struct user_desc npcode32_desc = { + .entry_number = LDT_OFFSET + 3, + .base_addr = (unsigned long)int3, + .limit = 4095, + .seg_32bit = 1, + .contents = 2, /* Code, not conforming */ + .read_exec_only = 0, + .limit_in_pages = 0, + .seg_not_present = 1, + .useable = 0 + }; + add_ldt(&npcode32_desc, &npcode32_sel, "npcode32"); + + const struct user_desc npdata32_desc = { + .entry_number = LDT_OFFSET + 4, + .base_addr = (unsigned long)stack16, + .limit = 0xffff, + .seg_32bit = 1, + .contents = 0, /* Data, grow-up */ + .read_exec_only = 0, + .limit_in_pages = 0, + .seg_not_present = 1, + .useable = 0 + }; + add_ldt(&npdata32_desc, &npdata32_sel, "npdata32"); + + struct user_desc gdt_data16_desc = { + .entry_number = -1, + .base_addr = (unsigned long)stack16, + .limit = 0xffff, + .seg_32bit = 0, + .contents = 0, /* Data, grow-up */ + .read_exec_only = 0, + .limit_in_pages = 0, + .seg_not_present = 0, + .useable = 0 + }; + + if (syscall(SYS_set_thread_area, &gdt_data16_desc) == 0) { + /* + * This probably indicates vulnerability to CVE-2014-8133. + * Merely getting here isn't definitive, though, and we'll + * diagnose the problem for real later on. + */ + printf("[WARN]\tset_thread_area allocated data16 at index %d\n", + gdt_data16_desc.entry_number); + gdt_data16_idx = gdt_data16_desc.entry_number; + } else { + printf("[OK]\tset_thread_area refused 16-bit data\n"); + } + + struct user_desc gdt_npdata32_desc = { + .entry_number = -1, + .base_addr = (unsigned long)stack16, + .limit = 0xffff, + .seg_32bit = 1, + .contents = 0, /* Data, grow-up */ + .read_exec_only = 0, + .limit_in_pages = 0, + .seg_not_present = 1, + .useable = 0 + }; + + if (syscall(SYS_set_thread_area, &gdt_npdata32_desc) == 0) { + /* + * As a hardening measure, newer kernels don't allow this. + */ + printf("[WARN]\tset_thread_area allocated npdata32 at index %d\n", + gdt_npdata32_desc.entry_number); + gdt_npdata32_idx = gdt_npdata32_desc.entry_number; + } else { + printf("[OK]\tset_thread_area refused 16-bit data\n"); + } +} + +/* State used by our signal handlers. */ +static gregset_t initial_regs, requested_regs, resulting_regs; + +/* Instructions for the SIGUSR1 handler. */ +static volatile unsigned short sig_cs, sig_ss; +static volatile sig_atomic_t sig_trapped, sig_err, sig_trapno; + +/* Abstractions for some 32-bit vs 64-bit differences. */ +#ifdef __x86_64__ +# define REG_IP REG_RIP +# define REG_SP REG_RSP +# define REG_AX REG_RAX + +struct selectors { + unsigned short cs, gs, fs, ss; +}; + +static unsigned short *ssptr(ucontext_t *ctx) +{ + struct selectors *sels = (void *)&ctx->uc_mcontext.gregs[REG_CSGSFS]; + return &sels->ss; +} + +static unsigned short *csptr(ucontext_t *ctx) +{ + struct selectors *sels = (void *)&ctx->uc_mcontext.gregs[REG_CSGSFS]; + return &sels->cs; +} +#else +# define REG_IP REG_EIP +# define REG_SP REG_ESP +# define REG_AX REG_EAX + +static greg_t *ssptr(ucontext_t *ctx) +{ + return &ctx->uc_mcontext.gregs[REG_SS]; +} + +static greg_t *csptr(ucontext_t *ctx) +{ + return &ctx->uc_mcontext.gregs[REG_CS]; +} +#endif + +/* Number of errors in the current test case. */ +static volatile sig_atomic_t nerrs; + +/* + * SIGUSR1 handler. Sets CS and SS as requested and points IP to the + * int3 trampoline. Sets SP to a large known value so that we can see + * whether the value round-trips back to user mode correctly. + */ +static void sigusr1(int sig, siginfo_t *info, void *ctx_void) +{ + ucontext_t *ctx = (ucontext_t*)ctx_void; + + memcpy(&initial_regs, &ctx->uc_mcontext.gregs, sizeof(gregset_t)); + + *csptr(ctx) = sig_cs; + *ssptr(ctx) = sig_ss; + + ctx->uc_mcontext.gregs[REG_IP] = + sig_cs == code16_sel ? 0 : (unsigned long)&int3; + ctx->uc_mcontext.gregs[REG_SP] = (unsigned long)0x8badf00d5aadc0deULL; + ctx->uc_mcontext.gregs[REG_AX] = 0; + + memcpy(&requested_regs, &ctx->uc_mcontext.gregs, sizeof(gregset_t)); + requested_regs[REG_AX] = *ssptr(ctx); /* The asm code does this. */ + + return; +} + +/* + * Called after a successful sigreturn. Restores our state so that + * the original raise(SIGUSR1) returns. + */ +static void sigtrap(int sig, siginfo_t *info, void *ctx_void) +{ + ucontext_t *ctx = (ucontext_t*)ctx_void; + + sig_err = ctx->uc_mcontext.gregs[REG_ERR]; + sig_trapno = ctx->uc_mcontext.gregs[REG_TRAPNO]; + + unsigned short ss; + asm ("mov %%ss,%0" : "=r" (ss)); + + greg_t asm_ss = ctx->uc_mcontext.gregs[REG_AX]; + if (asm_ss != sig_ss && sig == SIGTRAP) { + /* Sanity check failure. */ + printf("[FAIL]\tSIGTRAP: ss = %hx, frame ss = %hx, ax = %llx\n", + ss, *ssptr(ctx), (unsigned long long)asm_ss); + nerrs++; + } + + memcpy(&resulting_regs, &ctx->uc_mcontext.gregs, sizeof(gregset_t)); + memcpy(&ctx->uc_mcontext.gregs, &initial_regs, sizeof(gregset_t)); + + sig_trapped = sig; +} + +/* + * Checks a given selector for its code bitness or returns -1 if it's not + * a usable code segment selector. + */ +int cs_bitness(unsigned short cs) +{ + uint32_t valid = 0, ar; + asm ("lar %[cs], %[ar]\n\t" + "jnz 1f\n\t" + "mov $1, %[valid]\n\t" + "1:" + : [ar] "=r" (ar), [valid] "+rm" (valid) + : [cs] "r" (cs)); + + if (!valid) + return -1; + + bool db = (ar & (1 << 22)); + bool l = (ar & (1 << 21)); + + if (!(ar & (1<<11))) + return -1; /* Not code. */ + + if (l && !db) + return 64; + else if (!l && db) + return 32; + else if (!l && !db) + return 16; + else + return -1; /* Unknown bitness. */ +} + +/* Finds a usable code segment of the requested bitness. */ +int find_cs(int bitness) +{ + unsigned short my_cs; + + asm ("mov %%cs,%0" : "=r" (my_cs)); + + if (cs_bitness(my_cs) == bitness) + return my_cs; + if (cs_bitness(my_cs + (2 << 3)) == bitness) + return my_cs + (2 << 3); + if (my_cs > (2<<3) && cs_bitness(my_cs - (2 << 3)) == bitness) + return my_cs - (2 << 3); + if (cs_bitness(code16_sel) == bitness) + return code16_sel; + + printf("[WARN]\tCould not find %d-bit CS\n", bitness); + return -1; +} + +static int test_valid_sigreturn(int cs_bits, bool use_16bit_ss, int force_ss) +{ + int cs = find_cs(cs_bits); + if (cs == -1) { + printf("[SKIP]\tCode segment unavailable for %d-bit CS, %d-bit SS\n", + cs_bits, use_16bit_ss ? 16 : 32); + return 0; + } + + if (force_ss != -1) { + sig_ss = force_ss; + } else { + if (use_16bit_ss) { + if (!data16_sel) { + printf("[SKIP]\tData segment unavailable for %d-bit CS, 16-bit SS\n", + cs_bits); + return 0; + } + sig_ss = data16_sel; + } else { + asm volatile ("mov %%ss,%0" : "=r" (sig_ss)); + } + } + + sig_cs = cs; + + printf("[RUN]\tValid sigreturn: %d-bit CS (%hx), %d-bit SS (%hx%s)\n", + cs_bits, sig_cs, use_16bit_ss ? 16 : 32, sig_ss, + (sig_ss & 4) ? "" : ", GDT"); + + raise(SIGUSR1); + + nerrs = 0; + + /* + * Check that each register had an acceptable value when the + * int3 trampoline was invoked. + */ + for (int i = 0; i < NGREG; i++) { + greg_t req = requested_regs[i], res = resulting_regs[i]; + if (i == REG_TRAPNO || i == REG_IP) + continue; /* don't care */ + if (i == REG_SP) { + printf("\tSP: %llx -> %llx\n", (unsigned long long)req, + (unsigned long long)res); + + /* + * In many circumstances, the high 32 bits of rsp + * are zeroed. For example, we could be a real + * 32-bit program, or we could hit any of a number + * of poorly-documented IRET or segmented ESP + * oddities. If this happens, it's okay. + */ + if (res == (req & 0xFFFFFFFF)) + continue; /* OK; not expected to work */ + } + + bool ignore_reg = false; +#if __i386__ + if (i == REG_UESP) + ignore_reg = true; +#else + if (i == REG_CSGSFS) { + struct selectors *req_sels = + (void *)&requested_regs[REG_CSGSFS]; + struct selectors *res_sels = + (void *)&resulting_regs[REG_CSGSFS]; + if (req_sels->cs != res_sels->cs) { + printf("[FAIL]\tCS mismatch: requested 0x%hx; got 0x%hx\n", + req_sels->cs, res_sels->cs); + nerrs++; + } + + if (req_sels->ss != res_sels->ss) { + printf("[FAIL]\tSS mismatch: requested 0x%hx; got 0x%hx\n", + req_sels->ss, res_sels->ss); + nerrs++; + } + + continue; + } +#endif + + /* Sanity check on the kernel */ + if (i == REG_AX && requested_regs[i] != resulting_regs[i]) { + printf("[FAIL]\tAX (saved SP) mismatch: requested 0x%llx; got 0x%llx\n", + (unsigned long long)requested_regs[i], + (unsigned long long)resulting_regs[i]); + nerrs++; + continue; + } + + if (requested_regs[i] != resulting_regs[i] && !ignore_reg) { + /* + * SP is particularly interesting here. The + * usual cause of failures is that we hit the + * nasty IRET case of returning to a 16-bit SS, + * in which case bits 16:31 of the *kernel* + * stack pointer persist in ESP. + */ + printf("[FAIL]\tReg %d mismatch: requested 0x%llx; got 0x%llx\n", + i, (unsigned long long)requested_regs[i], + (unsigned long long)resulting_regs[i]); + nerrs++; + } + } + + if (nerrs == 0) + printf("[OK]\tall registers okay\n"); + + return nerrs; +} + +static int test_bad_iret(int cs_bits, unsigned short ss, int force_cs) +{ + int cs = force_cs == -1 ? find_cs(cs_bits) : force_cs; + if (cs == -1) + return 0; + + sig_cs = cs; + sig_ss = ss; + + printf("[RUN]\t%d-bit CS (%hx), bogus SS (%hx)\n", + cs_bits, sig_cs, sig_ss); + + sig_trapped = 0; + raise(SIGUSR1); + if (sig_trapped) { + char errdesc[32] = ""; + if (sig_err) { + const char *src = (sig_err & 1) ? " EXT" : ""; + const char *table; + if ((sig_err & 0x6) == 0x0) + table = "GDT"; + else if ((sig_err & 0x6) == 0x4) + table = "LDT"; + else if ((sig_err & 0x6) == 0x2) + table = "IDT"; + else + table = "???"; + + sprintf(errdesc, "%s%s index %d, ", + table, src, sig_err >> 3); + } + + char trapname[32]; + if (sig_trapno == 13) + strcpy(trapname, "GP"); + else if (sig_trapno == 11) + strcpy(trapname, "NP"); + else if (sig_trapno == 12) + strcpy(trapname, "SS"); + else if (sig_trapno == 32) + strcpy(trapname, "IRET"); /* X86_TRAP_IRET */ + else + sprintf(trapname, "%d", sig_trapno); + + printf("[OK]\tGot #%s(0x%lx) (i.e. %s%s)\n", + trapname, (unsigned long)sig_err, + errdesc, strsignal(sig_trapped)); + return 0; + } else { + printf("[FAIL]\tDid not get SIGSEGV\n"); + return 1; + } +} + +int main() +{ + int total_nerrs = 0; + unsigned short my_cs, my_ss; + + asm volatile ("mov %%cs,%0" : "=r" (my_cs)); + asm volatile ("mov %%ss,%0" : "=r" (my_ss)); + setup_ldt(); + + stack_t stack = { + .ss_sp = altstack_data, + .ss_size = SIGSTKSZ, + }; + if (sigaltstack(&stack, NULL) != 0) + err(1, "sigaltstack"); + + sethandler(SIGUSR1, sigusr1, 0); + sethandler(SIGTRAP, sigtrap, SA_ONSTACK); + + /* Easy cases: return to a 32-bit SS in each possible CS bitness. */ + total_nerrs += test_valid_sigreturn(64, false, -1); + total_nerrs += test_valid_sigreturn(32, false, -1); + total_nerrs += test_valid_sigreturn(16, false, -1); + + /* + * Test easy espfix cases: return to a 16-bit LDT SS in each possible + * CS bitness. NB: with a long mode CS, the SS bitness is irrelevant. + * + * This catches the original missing-espfix-on-64-bit-kernels issue + * as well as CVE-2014-8134. + */ + total_nerrs += test_valid_sigreturn(64, true, -1); + total_nerrs += test_valid_sigreturn(32, true, -1); + total_nerrs += test_valid_sigreturn(16, true, -1); + + if (gdt_data16_idx) { + /* + * For performance reasons, Linux skips espfix if SS points + * to the GDT. If we were able to allocate a 16-bit SS in + * the GDT, see if it leaks parts of the kernel stack pointer. + * + * This tests for CVE-2014-8133. + */ + total_nerrs += test_valid_sigreturn(64, true, + GDT3(gdt_data16_idx)); + total_nerrs += test_valid_sigreturn(32, true, + GDT3(gdt_data16_idx)); + total_nerrs += test_valid_sigreturn(16, true, + GDT3(gdt_data16_idx)); + } + + /* + * We're done testing valid sigreturn cases. Now we test states + * for which sigreturn itself will succeed but the subsequent + * entry to user mode will fail. + * + * Depending on the failure mode and the kernel bitness, these + * entry failures can generate SIGSEGV, SIGBUS, or SIGILL. + */ + clearhandler(SIGTRAP); + sethandler(SIGSEGV, sigtrap, SA_ONSTACK); + sethandler(SIGBUS, sigtrap, SA_ONSTACK); + sethandler(SIGILL, sigtrap, SA_ONSTACK); /* 32-bit kernels do this */ + + /* Easy failures: invalid SS, resulting in #GP(0) */ + test_bad_iret(64, ldt_nonexistent_sel, -1); + test_bad_iret(32, ldt_nonexistent_sel, -1); + test_bad_iret(16, ldt_nonexistent_sel, -1); + + /* These fail because SS isn't a data segment, resulting in #GP(SS) */ + test_bad_iret(64, my_cs, -1); + test_bad_iret(32, my_cs, -1); + test_bad_iret(16, my_cs, -1); + + /* Try to return to a not-present code segment, triggering #NP(SS). */ + test_bad_iret(32, my_ss, npcode32_sel); + + /* + * Try to return to a not-present but otherwise valid data segment. + * This will cause IRET to fail with #SS on the espfix stack. This + * exercises CVE-2014-9322. + * + * Note that, if espfix is enabled, 64-bit Linux will lose track + * of the actual cause of failure and report #GP(0) instead. + * This would be very difficult for Linux to avoid, because + * espfix64 causes IRET failures to be promoted to #DF, so the + * original exception frame is never pushed onto the stack. + */ + test_bad_iret(32, npdata32_sel, -1); + + /* + * Try to return to a not-present but otherwise valid data + * segment without invoking espfix. Newer kernels don't allow + * this to happen in the first place. On older kernels, though, + * this can trigger CVE-2014-9322. + */ + if (gdt_npdata32_idx) + test_bad_iret(32, GDT3(gdt_npdata32_idx), -1); + + return total_nerrs ? 1 : 0; +} diff --git a/tools/testing/selftests/x86/trivial_32bit_program.c b/tools/testing/selftests/x86/trivial_32bit_program.c new file mode 100644 index 000000000000..2e231beb0a39 --- /dev/null +++ b/tools/testing/selftests/x86/trivial_32bit_program.c @@ -0,0 +1,14 @@ +/* + * Trivial program to check that we have a valid 32-bit build environment. + * Copyright (c) 2015 Andy Lutomirski + * GPL v2 + */ + +#include <stdio.h> + +int main() +{ + printf("\n"); + + return 0; +} diff --git a/virt/kvm/arm/arch_timer.c b/virt/kvm/arm/arch_timer.c index 6e54f3542126..98c95f2fcba4 100644 --- a/virt/kvm/arm/arch_timer.c +++ b/virt/kvm/arm/arch_timer.c @@ -85,13 +85,22 @@ static irqreturn_t kvm_arch_timer_handler(int irq, void *dev_id) return IRQ_HANDLED; } +/* + * Work function for handling the backup timer that we schedule when a vcpu is + * no longer running, but had a timer programmed to fire in the future. + */ static void kvm_timer_inject_irq_work(struct work_struct *work) { struct kvm_vcpu *vcpu; vcpu = container_of(work, struct kvm_vcpu, arch.timer_cpu.expired); vcpu->arch.timer_cpu.armed = false; - kvm_timer_inject_irq(vcpu); + + /* + * If the vcpu is blocked we want to wake it up so that it will see + * the timer has expired when entering the guest. + */ + kvm_vcpu_kick(vcpu); } static enum hrtimer_restart kvm_timer_expire(struct hrtimer *hrt) @@ -102,6 +111,21 @@ static enum hrtimer_restart kvm_timer_expire(struct hrtimer *hrt) return HRTIMER_NORESTART; } +bool kvm_timer_should_fire(struct kvm_vcpu *vcpu) +{ + struct arch_timer_cpu *timer = &vcpu->arch.timer_cpu; + cycle_t cval, now; + + if ((timer->cntv_ctl & ARCH_TIMER_CTRL_IT_MASK) || + !(timer->cntv_ctl & ARCH_TIMER_CTRL_ENABLE)) + return false; + + cval = timer->cntv_cval; + now = kvm_phys_timer_read() - vcpu->kvm->arch.timer.cntvoff; + + return cval <= now; +} + /** * kvm_timer_flush_hwstate - prepare to move the virt timer to the cpu * @vcpu: The vcpu pointer @@ -119,6 +143,13 @@ void kvm_timer_flush_hwstate(struct kvm_vcpu *vcpu) * populate the CPU timer again. */ timer_disarm(timer); + + /* + * If the timer expired while we were not scheduled, now is the time + * to inject it. + */ + if (kvm_timer_should_fire(vcpu)) + kvm_timer_inject_irq(vcpu); } /** @@ -134,16 +165,9 @@ void kvm_timer_sync_hwstate(struct kvm_vcpu *vcpu) cycle_t cval, now; u64 ns; - if ((timer->cntv_ctl & ARCH_TIMER_CTRL_IT_MASK) || - !(timer->cntv_ctl & ARCH_TIMER_CTRL_ENABLE)) - return; - - cval = timer->cntv_cval; - now = kvm_phys_timer_read() - vcpu->kvm->arch.timer.cntvoff; - BUG_ON(timer_is_armed(timer)); - if (cval <= now) { + if (kvm_timer_should_fire(vcpu)) { /* * Timer has already expired while we were not * looking. Inject the interrupt and carry on. @@ -152,6 +176,9 @@ void kvm_timer_sync_hwstate(struct kvm_vcpu *vcpu) return; } + cval = timer->cntv_cval; + now = kvm_phys_timer_read() - vcpu->kvm->arch.timer.cntvoff; + ns = cyclecounter_cyc2ns(timecounter->cc, cval - now, timecounter->mask, &timecounter->frac); timer_arm(timer, ns); diff --git a/virt/kvm/arm/vgic-v2-emul.c b/virt/kvm/arm/vgic-v2-emul.c index 19c6210f02cf..13907970d11c 100644 --- a/virt/kvm/arm/vgic-v2-emul.c +++ b/virt/kvm/arm/vgic-v2-emul.c @@ -107,6 +107,22 @@ static bool handle_mmio_clear_pending_reg(struct kvm_vcpu *vcpu, vcpu->vcpu_id); } +static bool handle_mmio_set_active_reg(struct kvm_vcpu *vcpu, + struct kvm_exit_mmio *mmio, + phys_addr_t offset) +{ + return vgic_handle_set_active_reg(vcpu->kvm, mmio, offset, + vcpu->vcpu_id); +} + +static bool handle_mmio_clear_active_reg(struct kvm_vcpu *vcpu, + struct kvm_exit_mmio *mmio, + phys_addr_t offset) +{ + return vgic_handle_clear_active_reg(vcpu->kvm, mmio, offset, + vcpu->vcpu_id); +} + static bool handle_mmio_priority_reg(struct kvm_vcpu *vcpu, struct kvm_exit_mmio *mmio, phys_addr_t offset) @@ -303,7 +319,7 @@ static bool handle_mmio_sgi_clear(struct kvm_vcpu *vcpu, return write_set_clear_sgi_pend_reg(vcpu, mmio, offset, false); } -static const struct kvm_mmio_range vgic_dist_ranges[] = { +static const struct vgic_io_range vgic_dist_ranges[] = { { .base = GIC_DIST_CTRL, .len = 12, @@ -344,13 +360,13 @@ static const struct kvm_mmio_range vgic_dist_ranges[] = { .base = GIC_DIST_ACTIVE_SET, .len = VGIC_MAX_IRQS / 8, .bits_per_irq = 1, - .handle_mmio = handle_mmio_raz_wi, + .handle_mmio = handle_mmio_set_active_reg, }, { .base = GIC_DIST_ACTIVE_CLEAR, .len = VGIC_MAX_IRQS / 8, .bits_per_irq = 1, - .handle_mmio = handle_mmio_raz_wi, + .handle_mmio = handle_mmio_clear_active_reg, }, { .base = GIC_DIST_PRI, @@ -388,24 +404,6 @@ static const struct kvm_mmio_range vgic_dist_ranges[] = { {} }; -static bool vgic_v2_handle_mmio(struct kvm_vcpu *vcpu, struct kvm_run *run, - struct kvm_exit_mmio *mmio) -{ - unsigned long base = vcpu->kvm->arch.vgic.vgic_dist_base; - - if (!is_in_range(mmio->phys_addr, mmio->len, base, - KVM_VGIC_V2_DIST_SIZE)) - return false; - - /* GICv2 does not support accesses wider than 32 bits */ - if (mmio->len > 4) { - kvm_inject_dabt(vcpu, mmio->phys_addr); - return true; - } - - return vgic_handle_mmio_range(vcpu, run, mmio, vgic_dist_ranges, base); -} - static void vgic_dispatch_sgi(struct kvm_vcpu *vcpu, u32 reg) { struct kvm *kvm = vcpu->kvm; @@ -490,6 +488,7 @@ static bool vgic_v2_queue_sgi(struct kvm_vcpu *vcpu, int irq) static int vgic_v2_map_resources(struct kvm *kvm, const struct vgic_params *params) { + struct vgic_dist *dist = &kvm->arch.vgic; int ret = 0; if (!irqchip_in_kernel(kvm)) @@ -500,13 +499,17 @@ static int vgic_v2_map_resources(struct kvm *kvm, if (vgic_ready(kvm)) goto out; - if (IS_VGIC_ADDR_UNDEF(kvm->arch.vgic.vgic_dist_base) || - IS_VGIC_ADDR_UNDEF(kvm->arch.vgic.vgic_cpu_base)) { + if (IS_VGIC_ADDR_UNDEF(dist->vgic_dist_base) || + IS_VGIC_ADDR_UNDEF(dist->vgic_cpu_base)) { kvm_err("Need to set vgic cpu and dist addresses first\n"); ret = -ENXIO; goto out; } + vgic_register_kvm_io_dev(kvm, dist->vgic_dist_base, + KVM_VGIC_V2_DIST_SIZE, + vgic_dist_ranges, -1, &dist->dist_iodev); + /* * Initialize the vgic if this hasn't already been done on demand by * accessing the vgic state from userspace. @@ -514,18 +517,23 @@ static int vgic_v2_map_resources(struct kvm *kvm, ret = vgic_init(kvm); if (ret) { kvm_err("Unable to allocate maps\n"); - goto out; + goto out_unregister; } - ret = kvm_phys_addr_ioremap(kvm, kvm->arch.vgic.vgic_cpu_base, + ret = kvm_phys_addr_ioremap(kvm, dist->vgic_cpu_base, params->vcpu_base, KVM_VGIC_V2_CPU_SIZE, true); if (ret) { kvm_err("Unable to remap VGIC CPU to VCPU\n"); - goto out; + goto out_unregister; } - kvm->arch.vgic.ready = true; + dist->ready = true; + goto out; + +out_unregister: + kvm_io_bus_unregister_dev(kvm, KVM_MMIO_BUS, &dist->dist_iodev.dev); + out: if (ret) kvm_vgic_destroy(kvm); @@ -554,7 +562,6 @@ void vgic_v2_init_emulation(struct kvm *kvm) { struct vgic_dist *dist = &kvm->arch.vgic; - dist->vm_ops.handle_mmio = vgic_v2_handle_mmio; dist->vm_ops.queue_sgi = vgic_v2_queue_sgi; dist->vm_ops.add_sgi_source = vgic_v2_add_sgi_source; dist->vm_ops.init_model = vgic_v2_init_model; @@ -631,7 +638,7 @@ static bool handle_cpu_mmio_ident(struct kvm_vcpu *vcpu, * CPU Interface Register accesses - these are not accessed by the VM, but by * user space for saving and restoring VGIC state. */ -static const struct kvm_mmio_range vgic_cpu_ranges[] = { +static const struct vgic_io_range vgic_cpu_ranges[] = { { .base = GIC_CPU_CTRL, .len = 12, @@ -658,12 +665,13 @@ static int vgic_attr_regs_access(struct kvm_device *dev, struct kvm_device_attr *attr, u32 *reg, bool is_write) { - const struct kvm_mmio_range *r = NULL, *ranges; + const struct vgic_io_range *r = NULL, *ranges; phys_addr_t offset; int ret, cpuid, c; struct kvm_vcpu *vcpu, *tmp_vcpu; struct vgic_dist *vgic; struct kvm_exit_mmio mmio; + u32 data; offset = attr->attr & KVM_DEV_ARM_VGIC_OFFSET_MASK; cpuid = (attr->attr & KVM_DEV_ARM_VGIC_CPUID_MASK) >> @@ -685,6 +693,7 @@ static int vgic_attr_regs_access(struct kvm_device *dev, mmio.len = 4; mmio.is_write = is_write; + mmio.data = &data; if (is_write) mmio_data_write(&mmio, ~0, *reg); switch (attr->group) { @@ -699,7 +708,7 @@ static int vgic_attr_regs_access(struct kvm_device *dev, default: BUG(); } - r = vgic_find_range(ranges, &mmio, offset); + r = vgic_find_range(ranges, 4, offset); if (unlikely(!r || !r->handle_mmio)) { ret = -ENXIO; diff --git a/virt/kvm/arm/vgic-v2.c b/virt/kvm/arm/vgic-v2.c index a0a7b5d1a070..f9b9c7c51372 100644 --- a/virt/kvm/arm/vgic-v2.c +++ b/virt/kvm/arm/vgic-v2.c @@ -72,6 +72,8 @@ static void vgic_v2_sync_lr_elrsr(struct kvm_vcpu *vcpu, int lr, { if (!(lr_desc.state & LR_STATE_MASK)) vcpu->arch.vgic_cpu.vgic_v2.vgic_elrsr |= (1ULL << lr); + else + vcpu->arch.vgic_cpu.vgic_v2.vgic_elrsr &= ~(1ULL << lr); } static u64 vgic_v2_get_elrsr(const struct kvm_vcpu *vcpu) @@ -84,6 +86,11 @@ static u64 vgic_v2_get_eisr(const struct kvm_vcpu *vcpu) return vcpu->arch.vgic_cpu.vgic_v2.vgic_eisr; } +static void vgic_v2_clear_eisr(struct kvm_vcpu *vcpu) +{ + vcpu->arch.vgic_cpu.vgic_v2.vgic_eisr = 0; +} + static u32 vgic_v2_get_interrupt_status(const struct kvm_vcpu *vcpu) { u32 misr = vcpu->arch.vgic_cpu.vgic_v2.vgic_misr; @@ -148,6 +155,7 @@ static const struct vgic_ops vgic_v2_ops = { .sync_lr_elrsr = vgic_v2_sync_lr_elrsr, .get_elrsr = vgic_v2_get_elrsr, .get_eisr = vgic_v2_get_eisr, + .clear_eisr = vgic_v2_clear_eisr, .get_interrupt_status = vgic_v2_get_interrupt_status, .enable_underflow = vgic_v2_enable_underflow, .disable_underflow = vgic_v2_disable_underflow, diff --git a/virt/kvm/arm/vgic-v3-emul.c b/virt/kvm/arm/vgic-v3-emul.c index b3f154631515..e9c3a7a83833 100644 --- a/virt/kvm/arm/vgic-v3-emul.c +++ b/virt/kvm/arm/vgic-v3-emul.c @@ -340,7 +340,7 @@ static bool handle_mmio_idregs(struct kvm_vcpu *vcpu, return false; } -static const struct kvm_mmio_range vgic_v3_dist_ranges[] = { +static const struct vgic_io_range vgic_v3_dist_ranges[] = { { .base = GICD_CTLR, .len = 0x04, @@ -502,6 +502,43 @@ static const struct kvm_mmio_range vgic_v3_dist_ranges[] = { {}, }; +static bool handle_mmio_ctlr_redist(struct kvm_vcpu *vcpu, + struct kvm_exit_mmio *mmio, + phys_addr_t offset) +{ + /* since we don't support LPIs, this register is zero for now */ + vgic_reg_access(mmio, NULL, offset, + ACCESS_READ_RAZ | ACCESS_WRITE_IGNORED); + return false; +} + +static bool handle_mmio_typer_redist(struct kvm_vcpu *vcpu, + struct kvm_exit_mmio *mmio, + phys_addr_t offset) +{ + u32 reg; + u64 mpidr; + struct kvm_vcpu *redist_vcpu = mmio->private; + int target_vcpu_id = redist_vcpu->vcpu_id; + + /* the upper 32 bits contain the affinity value */ + if ((offset & ~3) == 4) { + mpidr = kvm_vcpu_get_mpidr_aff(redist_vcpu); + reg = compress_mpidr(mpidr); + + vgic_reg_access(mmio, ®, offset, + ACCESS_READ_VALUE | ACCESS_WRITE_IGNORED); + return false; + } + + reg = redist_vcpu->vcpu_id << 8; + if (target_vcpu_id == atomic_read(&vcpu->kvm->online_vcpus) - 1) + reg |= GICR_TYPER_LAST; + vgic_reg_access(mmio, ®, offset, + ACCESS_READ_VALUE | ACCESS_WRITE_IGNORED); + return false; +} + static bool handle_mmio_set_enable_reg_redist(struct kvm_vcpu *vcpu, struct kvm_exit_mmio *mmio, phys_addr_t offset) @@ -570,186 +607,107 @@ static bool handle_mmio_cfg_reg_redist(struct kvm_vcpu *vcpu, return vgic_handle_cfg_reg(reg, mmio, offset); } -static const struct kvm_mmio_range vgic_redist_sgi_ranges[] = { +#define SGI_base(x) ((x) + SZ_64K) + +static const struct vgic_io_range vgic_redist_ranges[] = { + { + .base = GICR_CTLR, + .len = 0x04, + .bits_per_irq = 0, + .handle_mmio = handle_mmio_ctlr_redist, + }, + { + .base = GICR_TYPER, + .len = 0x08, + .bits_per_irq = 0, + .handle_mmio = handle_mmio_typer_redist, + }, + { + .base = GICR_IIDR, + .len = 0x04, + .bits_per_irq = 0, + .handle_mmio = handle_mmio_iidr, + }, + { + .base = GICR_WAKER, + .len = 0x04, + .bits_per_irq = 0, + .handle_mmio = handle_mmio_raz_wi, + }, { - .base = GICR_IGROUPR0, + .base = GICR_IDREGS, + .len = 0x30, + .bits_per_irq = 0, + .handle_mmio = handle_mmio_idregs, + }, + { + .base = SGI_base(GICR_IGROUPR0), .len = 0x04, .bits_per_irq = 1, .handle_mmio = handle_mmio_rao_wi, }, { - .base = GICR_ISENABLER0, + .base = SGI_base(GICR_ISENABLER0), .len = 0x04, .bits_per_irq = 1, .handle_mmio = handle_mmio_set_enable_reg_redist, }, { - .base = GICR_ICENABLER0, + .base = SGI_base(GICR_ICENABLER0), .len = 0x04, .bits_per_irq = 1, .handle_mmio = handle_mmio_clear_enable_reg_redist, }, { - .base = GICR_ISPENDR0, + .base = SGI_base(GICR_ISPENDR0), .len = 0x04, .bits_per_irq = 1, .handle_mmio = handle_mmio_set_pending_reg_redist, }, { - .base = GICR_ICPENDR0, + .base = SGI_base(GICR_ICPENDR0), .len = 0x04, .bits_per_irq = 1, .handle_mmio = handle_mmio_clear_pending_reg_redist, }, { - .base = GICR_ISACTIVER0, + .base = SGI_base(GICR_ISACTIVER0), .len = 0x04, .bits_per_irq = 1, .handle_mmio = handle_mmio_raz_wi, }, { - .base = GICR_ICACTIVER0, + .base = SGI_base(GICR_ICACTIVER0), .len = 0x04, .bits_per_irq = 1, .handle_mmio = handle_mmio_raz_wi, }, { - .base = GICR_IPRIORITYR0, + .base = SGI_base(GICR_IPRIORITYR0), .len = 0x20, .bits_per_irq = 8, .handle_mmio = handle_mmio_priority_reg_redist, }, { - .base = GICR_ICFGR0, + .base = SGI_base(GICR_ICFGR0), .len = 0x08, .bits_per_irq = 2, .handle_mmio = handle_mmio_cfg_reg_redist, }, { - .base = GICR_IGRPMODR0, + .base = SGI_base(GICR_IGRPMODR0), .len = 0x04, .bits_per_irq = 1, .handle_mmio = handle_mmio_raz_wi, }, { - .base = GICR_NSACR, + .base = SGI_base(GICR_NSACR), .len = 0x04, .handle_mmio = handle_mmio_raz_wi, }, {}, }; -static bool handle_mmio_ctlr_redist(struct kvm_vcpu *vcpu, - struct kvm_exit_mmio *mmio, - phys_addr_t offset) -{ - /* since we don't support LPIs, this register is zero for now */ - vgic_reg_access(mmio, NULL, offset, - ACCESS_READ_RAZ | ACCESS_WRITE_IGNORED); - return false; -} - -static bool handle_mmio_typer_redist(struct kvm_vcpu *vcpu, - struct kvm_exit_mmio *mmio, - phys_addr_t offset) -{ - u32 reg; - u64 mpidr; - struct kvm_vcpu *redist_vcpu = mmio->private; - int target_vcpu_id = redist_vcpu->vcpu_id; - - /* the upper 32 bits contain the affinity value */ - if ((offset & ~3) == 4) { - mpidr = kvm_vcpu_get_mpidr_aff(redist_vcpu); - reg = compress_mpidr(mpidr); - - vgic_reg_access(mmio, ®, offset, - ACCESS_READ_VALUE | ACCESS_WRITE_IGNORED); - return false; - } - - reg = redist_vcpu->vcpu_id << 8; - if (target_vcpu_id == atomic_read(&vcpu->kvm->online_vcpus) - 1) - reg |= GICR_TYPER_LAST; - vgic_reg_access(mmio, ®, offset, - ACCESS_READ_VALUE | ACCESS_WRITE_IGNORED); - return false; -} - -static const struct kvm_mmio_range vgic_redist_ranges[] = { - { - .base = GICR_CTLR, - .len = 0x04, - .bits_per_irq = 0, - .handle_mmio = handle_mmio_ctlr_redist, - }, - { - .base = GICR_TYPER, - .len = 0x08, - .bits_per_irq = 0, - .handle_mmio = handle_mmio_typer_redist, - }, - { - .base = GICR_IIDR, - .len = 0x04, - .bits_per_irq = 0, - .handle_mmio = handle_mmio_iidr, - }, - { - .base = GICR_WAKER, - .len = 0x04, - .bits_per_irq = 0, - .handle_mmio = handle_mmio_raz_wi, - }, - { - .base = GICR_IDREGS, - .len = 0x30, - .bits_per_irq = 0, - .handle_mmio = handle_mmio_idregs, - }, - {}, -}; - -/* - * This function splits accesses between the distributor and the two - * redistributor parts (private/SPI). As each redistributor is accessible - * from any CPU, we have to determine the affected VCPU by taking the faulting - * address into account. We then pass this VCPU to the handler function via - * the private parameter. - */ -#define SGI_BASE_OFFSET SZ_64K -static bool vgic_v3_handle_mmio(struct kvm_vcpu *vcpu, struct kvm_run *run, - struct kvm_exit_mmio *mmio) -{ - struct vgic_dist *dist = &vcpu->kvm->arch.vgic; - unsigned long dbase = dist->vgic_dist_base; - unsigned long rdbase = dist->vgic_redist_base; - int nrcpus = atomic_read(&vcpu->kvm->online_vcpus); - int vcpu_id; - const struct kvm_mmio_range *mmio_range; - - if (is_in_range(mmio->phys_addr, mmio->len, dbase, GIC_V3_DIST_SIZE)) { - return vgic_handle_mmio_range(vcpu, run, mmio, - vgic_v3_dist_ranges, dbase); - } - - if (!is_in_range(mmio->phys_addr, mmio->len, rdbase, - GIC_V3_REDIST_SIZE * nrcpus)) - return false; - - vcpu_id = (mmio->phys_addr - rdbase) / GIC_V3_REDIST_SIZE; - rdbase += (vcpu_id * GIC_V3_REDIST_SIZE); - mmio->private = kvm_get_vcpu(vcpu->kvm, vcpu_id); - - if (mmio->phys_addr >= rdbase + SGI_BASE_OFFSET) { - rdbase += SGI_BASE_OFFSET; - mmio_range = vgic_redist_sgi_ranges; - } else { - mmio_range = vgic_redist_ranges; - } - return vgic_handle_mmio_range(vcpu, run, mmio, mmio_range, rdbase); -} - static bool vgic_v3_queue_sgi(struct kvm_vcpu *vcpu, int irq) { if (vgic_queue_irq(vcpu, 0, irq)) { @@ -766,6 +724,9 @@ static int vgic_v3_map_resources(struct kvm *kvm, { int ret = 0; struct vgic_dist *dist = &kvm->arch.vgic; + gpa_t rdbase = dist->vgic_redist_base; + struct vgic_io_device *iodevs = NULL; + int i; if (!irqchip_in_kernel(kvm)) return 0; @@ -791,7 +752,41 @@ static int vgic_v3_map_resources(struct kvm *kvm, goto out; } - kvm->arch.vgic.ready = true; + ret = vgic_register_kvm_io_dev(kvm, dist->vgic_dist_base, + GIC_V3_DIST_SIZE, vgic_v3_dist_ranges, + -1, &dist->dist_iodev); + if (ret) + goto out; + + iodevs = kcalloc(dist->nr_cpus, sizeof(iodevs[0]), GFP_KERNEL); + if (!iodevs) { + ret = -ENOMEM; + goto out_unregister; + } + + for (i = 0; i < dist->nr_cpus; i++) { + ret = vgic_register_kvm_io_dev(kvm, rdbase, + SZ_128K, vgic_redist_ranges, + i, &iodevs[i]); + if (ret) + goto out_unregister; + rdbase += GIC_V3_REDIST_SIZE; + } + + dist->redist_iodevs = iodevs; + dist->ready = true; + goto out; + +out_unregister: + kvm_io_bus_unregister_dev(kvm, KVM_MMIO_BUS, &dist->dist_iodev.dev); + if (iodevs) { + for (i = 0; i < dist->nr_cpus; i++) { + if (iodevs[i].dev.ops) + kvm_io_bus_unregister_dev(kvm, KVM_MMIO_BUS, + &iodevs[i].dev); + } + } + out: if (ret) kvm_vgic_destroy(kvm); @@ -832,7 +827,6 @@ void vgic_v3_init_emulation(struct kvm *kvm) { struct vgic_dist *dist = &kvm->arch.vgic; - dist->vm_ops.handle_mmio = vgic_v3_handle_mmio; dist->vm_ops.queue_sgi = vgic_v3_queue_sgi; dist->vm_ops.add_sgi_source = vgic_v3_add_sgi_source; dist->vm_ops.init_model = vgic_v3_init_model; diff --git a/virt/kvm/arm/vgic-v3.c b/virt/kvm/arm/vgic-v3.c index 3a62d8a9a2c6..dff06021e748 100644 --- a/virt/kvm/arm/vgic-v3.c +++ b/virt/kvm/arm/vgic-v3.c @@ -104,6 +104,8 @@ static void vgic_v3_sync_lr_elrsr(struct kvm_vcpu *vcpu, int lr, { if (!(lr_desc.state & LR_STATE_MASK)) vcpu->arch.vgic_cpu.vgic_v3.vgic_elrsr |= (1U << lr); + else + vcpu->arch.vgic_cpu.vgic_v3.vgic_elrsr &= ~(1U << lr); } static u64 vgic_v3_get_elrsr(const struct kvm_vcpu *vcpu) @@ -116,6 +118,11 @@ static u64 vgic_v3_get_eisr(const struct kvm_vcpu *vcpu) return vcpu->arch.vgic_cpu.vgic_v3.vgic_eisr; } +static void vgic_v3_clear_eisr(struct kvm_vcpu *vcpu) +{ + vcpu->arch.vgic_cpu.vgic_v3.vgic_eisr = 0; +} + static u32 vgic_v3_get_interrupt_status(const struct kvm_vcpu *vcpu) { u32 misr = vcpu->arch.vgic_cpu.vgic_v3.vgic_misr; @@ -192,6 +199,7 @@ static const struct vgic_ops vgic_v3_ops = { .sync_lr_elrsr = vgic_v3_sync_lr_elrsr, .get_elrsr = vgic_v3_get_elrsr, .get_eisr = vgic_v3_get_eisr, + .clear_eisr = vgic_v3_clear_eisr, .get_interrupt_status = vgic_v3_get_interrupt_status, .enable_underflow = vgic_v3_enable_underflow, .disable_underflow = vgic_v3_disable_underflow, diff --git a/virt/kvm/arm/vgic.c b/virt/kvm/arm/vgic.c index 0cc6ab6005a0..8d550ff14700 100644 --- a/virt/kvm/arm/vgic.c +++ b/virt/kvm/arm/vgic.c @@ -31,6 +31,9 @@ #include <asm/kvm_emulate.h> #include <asm/kvm_arm.h> #include <asm/kvm_mmu.h> +#include <trace/events/kvm.h> +#include <asm/kvm.h> +#include <kvm/iodev.h> /* * How the whole thing works (courtesy of Christoffer Dall): @@ -263,6 +266,13 @@ static int vgic_irq_is_queued(struct kvm_vcpu *vcpu, int irq) return vgic_bitmap_get_irq_val(&dist->irq_queued, vcpu->vcpu_id, irq); } +static int vgic_irq_is_active(struct kvm_vcpu *vcpu, int irq) +{ + struct vgic_dist *dist = &vcpu->kvm->arch.vgic; + + return vgic_bitmap_get_irq_val(&dist->irq_active, vcpu->vcpu_id, irq); +} + static void vgic_irq_set_queued(struct kvm_vcpu *vcpu, int irq) { struct vgic_dist *dist = &vcpu->kvm->arch.vgic; @@ -277,6 +287,20 @@ static void vgic_irq_clear_queued(struct kvm_vcpu *vcpu, int irq) vgic_bitmap_set_irq_val(&dist->irq_queued, vcpu->vcpu_id, irq, 0); } +static void vgic_irq_set_active(struct kvm_vcpu *vcpu, int irq) +{ + struct vgic_dist *dist = &vcpu->kvm->arch.vgic; + + vgic_bitmap_set_irq_val(&dist->irq_active, vcpu->vcpu_id, irq, 1); +} + +static void vgic_irq_clear_active(struct kvm_vcpu *vcpu, int irq) +{ + struct vgic_dist *dist = &vcpu->kvm->arch.vgic; + + vgic_bitmap_set_irq_val(&dist->irq_active, vcpu->vcpu_id, irq, 0); +} + static int vgic_dist_irq_get_level(struct kvm_vcpu *vcpu, int irq) { struct vgic_dist *dist = &vcpu->kvm->arch.vgic; @@ -520,6 +544,44 @@ bool vgic_handle_clear_pending_reg(struct kvm *kvm, return false; } +bool vgic_handle_set_active_reg(struct kvm *kvm, + struct kvm_exit_mmio *mmio, + phys_addr_t offset, int vcpu_id) +{ + u32 *reg; + struct vgic_dist *dist = &kvm->arch.vgic; + + reg = vgic_bitmap_get_reg(&dist->irq_active, vcpu_id, offset); + vgic_reg_access(mmio, reg, offset, + ACCESS_READ_VALUE | ACCESS_WRITE_SETBIT); + + if (mmio->is_write) { + vgic_update_state(kvm); + return true; + } + + return false; +} + +bool vgic_handle_clear_active_reg(struct kvm *kvm, + struct kvm_exit_mmio *mmio, + phys_addr_t offset, int vcpu_id) +{ + u32 *reg; + struct vgic_dist *dist = &kvm->arch.vgic; + + reg = vgic_bitmap_get_reg(&dist->irq_active, vcpu_id, offset); + vgic_reg_access(mmio, reg, offset, + ACCESS_READ_VALUE | ACCESS_WRITE_CLEARBIT); + + if (mmio->is_write) { + vgic_update_state(kvm); + return true; + } + + return false; +} + static u32 vgic_cfg_expand(u16 val) { u32 res = 0; @@ -588,16 +650,12 @@ bool vgic_handle_cfg_reg(u32 *reg, struct kvm_exit_mmio *mmio, } /** - * vgic_unqueue_irqs - move pending IRQs from LRs to the distributor + * vgic_unqueue_irqs - move pending/active IRQs from LRs to the distributor * @vgic_cpu: Pointer to the vgic_cpu struct holding the LRs * - * Move any pending IRQs that have already been assigned to LRs back to the + * Move any IRQs that have already been assigned to LRs back to the * emulated distributor state so that the complete emulated state can be read * from the main emulation structures without investigating the LRs. - * - * Note that IRQs in the active state in the LRs get their pending state moved - * to the distributor but the active state stays in the LRs, because we don't - * track the active state on the distributor side. */ void vgic_unqueue_irqs(struct kvm_vcpu *vcpu) { @@ -613,12 +671,22 @@ void vgic_unqueue_irqs(struct kvm_vcpu *vcpu) * 01: pending * 10: active * 11: pending and active - * - * If the LR holds only an active interrupt (not pending) then - * just leave it alone. */ - if ((lr.state & LR_STATE_MASK) == LR_STATE_ACTIVE) - continue; + BUG_ON(!(lr.state & LR_STATE_MASK)); + + /* Reestablish SGI source for pending and active IRQs */ + if (lr.irq < VGIC_NR_SGIS) + add_sgi_source(vcpu, lr.irq, lr.source); + + /* + * If the LR holds an active (10) or a pending and active (11) + * interrupt then move the active state to the + * distributor tracking bit. + */ + if (lr.state & LR_STATE_ACTIVE) { + vgic_irq_set_active(vcpu, lr.irq); + lr.state &= ~LR_STATE_ACTIVE; + } /* * Reestablish the pending state on the distributor and the @@ -626,21 +694,19 @@ void vgic_unqueue_irqs(struct kvm_vcpu *vcpu) * is fine, then we are only setting a few bits that were * already set. */ - vgic_dist_irq_set_pending(vcpu, lr.irq); - if (lr.irq < VGIC_NR_SGIS) - add_sgi_source(vcpu, lr.irq, lr.source); - lr.state &= ~LR_STATE_PENDING; + if (lr.state & LR_STATE_PENDING) { + vgic_dist_irq_set_pending(vcpu, lr.irq); + lr.state &= ~LR_STATE_PENDING; + } + vgic_set_lr(vcpu, i, lr); /* - * If there's no state left on the LR (it could still be - * active), then the LR does not hold any useful info and can - * be marked as free for other use. + * Mark the LR as free for other use. */ - if (!(lr.state & LR_STATE_MASK)) { - vgic_retire_lr(i, lr.irq, vcpu); - vgic_irq_clear_queued(vcpu, lr.irq); - } + BUG_ON(lr.state & LR_STATE_MASK); + vgic_retire_lr(i, lr.irq, vcpu); + vgic_irq_clear_queued(vcpu, lr.irq); /* Finally update the VGIC state. */ vgic_update_state(vcpu->kvm); @@ -648,24 +714,21 @@ void vgic_unqueue_irqs(struct kvm_vcpu *vcpu) } const -struct kvm_mmio_range *vgic_find_range(const struct kvm_mmio_range *ranges, - struct kvm_exit_mmio *mmio, - phys_addr_t offset) -{ - const struct kvm_mmio_range *r = ranges; - - while (r->len) { - if (offset >= r->base && - (offset + mmio->len) <= (r->base + r->len)) - return r; - r++; +struct vgic_io_range *vgic_find_range(const struct vgic_io_range *ranges, + int len, gpa_t offset) +{ + while (ranges->len) { + if (offset >= ranges->base && + (offset + len) <= (ranges->base + ranges->len)) + return ranges; + ranges++; } return NULL; } static bool vgic_validate_access(const struct vgic_dist *dist, - const struct kvm_mmio_range *range, + const struct vgic_io_range *range, unsigned long offset) { int irq; @@ -693,9 +756,8 @@ static bool vgic_validate_access(const struct vgic_dist *dist, static bool call_range_handler(struct kvm_vcpu *vcpu, struct kvm_exit_mmio *mmio, unsigned long offset, - const struct kvm_mmio_range *range) + const struct vgic_io_range *range) { - u32 *data32 = (void *)mmio->data; struct kvm_exit_mmio mmio32; bool ret; @@ -712,91 +774,142 @@ static bool call_range_handler(struct kvm_vcpu *vcpu, mmio32.private = mmio->private; mmio32.phys_addr = mmio->phys_addr + 4; - if (mmio->is_write) - *(u32 *)mmio32.data = data32[1]; + mmio32.data = &((u32 *)mmio->data)[1]; ret = range->handle_mmio(vcpu, &mmio32, offset + 4); - if (!mmio->is_write) - data32[1] = *(u32 *)mmio32.data; mmio32.phys_addr = mmio->phys_addr; - if (mmio->is_write) - *(u32 *)mmio32.data = data32[0]; + mmio32.data = &((u32 *)mmio->data)[0]; ret |= range->handle_mmio(vcpu, &mmio32, offset); - if (!mmio->is_write) - data32[0] = *(u32 *)mmio32.data; return ret; } /** - * vgic_handle_mmio_range - handle an in-kernel MMIO access + * vgic_handle_mmio_access - handle an in-kernel MMIO access + * This is called by the read/write KVM IO device wrappers below. * @vcpu: pointer to the vcpu performing the access - * @run: pointer to the kvm_run structure - * @mmio: pointer to the data describing the access - * @ranges: array of MMIO ranges in a given region - * @mmio_base: base address of that region + * @this: pointer to the KVM IO device in charge + * @addr: guest physical address of the access + * @len: size of the access + * @val: pointer to the data region + * @is_write: read or write access * * returns true if the MMIO access could be performed */ -bool vgic_handle_mmio_range(struct kvm_vcpu *vcpu, struct kvm_run *run, - struct kvm_exit_mmio *mmio, - const struct kvm_mmio_range *ranges, - unsigned long mmio_base) +static int vgic_handle_mmio_access(struct kvm_vcpu *vcpu, + struct kvm_io_device *this, gpa_t addr, + int len, void *val, bool is_write) { - const struct kvm_mmio_range *range; struct vgic_dist *dist = &vcpu->kvm->arch.vgic; + struct vgic_io_device *iodev = container_of(this, + struct vgic_io_device, dev); + struct kvm_run *run = vcpu->run; + const struct vgic_io_range *range; + struct kvm_exit_mmio mmio; bool updated_state; - unsigned long offset; + gpa_t offset; - offset = mmio->phys_addr - mmio_base; - range = vgic_find_range(ranges, mmio, offset); + offset = addr - iodev->addr; + range = vgic_find_range(iodev->reg_ranges, len, offset); if (unlikely(!range || !range->handle_mmio)) { - pr_warn("Unhandled access %d %08llx %d\n", - mmio->is_write, mmio->phys_addr, mmio->len); - return false; + pr_warn("Unhandled access %d %08llx %d\n", is_write, addr, len); + return -ENXIO; } - spin_lock(&vcpu->kvm->arch.vgic.lock); + mmio.phys_addr = addr; + mmio.len = len; + mmio.is_write = is_write; + mmio.data = val; + mmio.private = iodev->redist_vcpu; + + spin_lock(&dist->lock); offset -= range->base; if (vgic_validate_access(dist, range, offset)) { - updated_state = call_range_handler(vcpu, mmio, offset, range); + updated_state = call_range_handler(vcpu, &mmio, offset, range); } else { - if (!mmio->is_write) - memset(mmio->data, 0, mmio->len); + if (!is_write) + memset(val, 0, len); updated_state = false; } - spin_unlock(&vcpu->kvm->arch.vgic.lock); - kvm_prepare_mmio(run, mmio); + spin_unlock(&dist->lock); + run->mmio.is_write = is_write; + run->mmio.len = len; + run->mmio.phys_addr = addr; + memcpy(run->mmio.data, val, len); + kvm_handle_mmio_return(vcpu, run); if (updated_state) vgic_kick_vcpus(vcpu->kvm); - return true; + return 0; } +static int vgic_handle_mmio_read(struct kvm_vcpu *vcpu, + struct kvm_io_device *this, + gpa_t addr, int len, void *val) +{ + return vgic_handle_mmio_access(vcpu, this, addr, len, val, false); +} + +static int vgic_handle_mmio_write(struct kvm_vcpu *vcpu, + struct kvm_io_device *this, + gpa_t addr, int len, const void *val) +{ + return vgic_handle_mmio_access(vcpu, this, addr, len, (void *)val, + true); +} + +struct kvm_io_device_ops vgic_io_ops = { + .read = vgic_handle_mmio_read, + .write = vgic_handle_mmio_write, +}; + /** - * vgic_handle_mmio - handle an in-kernel MMIO access for the GIC emulation - * @vcpu: pointer to the vcpu performing the access - * @run: pointer to the kvm_run structure - * @mmio: pointer to the data describing the access + * vgic_register_kvm_io_dev - register VGIC register frame on the KVM I/O bus + * @kvm: The VM structure pointer + * @base: The (guest) base address for the register frame + * @len: Length of the register frame window + * @ranges: Describing the handler functions for each register + * @redist_vcpu_id: The VCPU ID to pass on to the handlers on call + * @iodev: Points to memory to be passed on to the handler * - * returns true if the MMIO access has been performed in kernel space, - * and false if it needs to be emulated in user space. - * Calls the actual handling routine for the selected VGIC model. + * @iodev stores the parameters of this function to be usable by the handler + * respectively the dispatcher function (since the KVM I/O bus framework lacks + * an opaque parameter). Initialization is done in this function, but the + * reference should be valid and unique for the whole VGIC lifetime. + * If the register frame is not mapped for a specific VCPU, pass -1 to + * @redist_vcpu_id. */ -bool vgic_handle_mmio(struct kvm_vcpu *vcpu, struct kvm_run *run, - struct kvm_exit_mmio *mmio) +int vgic_register_kvm_io_dev(struct kvm *kvm, gpa_t base, int len, + const struct vgic_io_range *ranges, + int redist_vcpu_id, + struct vgic_io_device *iodev) { - if (!irqchip_in_kernel(vcpu->kvm)) - return false; + struct kvm_vcpu *vcpu = NULL; + int ret; - /* - * This will currently call either vgic_v2_handle_mmio() or - * vgic_v3_handle_mmio(), which in turn will call - * vgic_handle_mmio_range() defined above. - */ - return vcpu->kvm->arch.vgic.vm_ops.handle_mmio(vcpu, run, mmio); + if (redist_vcpu_id >= 0) + vcpu = kvm_get_vcpu(kvm, redist_vcpu_id); + + iodev->addr = base; + iodev->len = len; + iodev->reg_ranges = ranges; + iodev->redist_vcpu = vcpu; + + kvm_iodevice_init(&iodev->dev, &vgic_io_ops); + + mutex_lock(&kvm->slots_lock); + + ret = kvm_io_bus_register_dev(kvm, KVM_MMIO_BUS, base, len, + &iodev->dev); + mutex_unlock(&kvm->slots_lock); + + /* Mark the iodev as invalid if registration fails. */ + if (ret) + iodev->dev.ops = NULL; + + return ret; } static int vgic_nr_shared_irqs(struct vgic_dist *dist) @@ -804,6 +917,36 @@ static int vgic_nr_shared_irqs(struct vgic_dist *dist) return dist->nr_irqs - VGIC_NR_PRIVATE_IRQS; } +static int compute_active_for_cpu(struct kvm_vcpu *vcpu) +{ + struct vgic_dist *dist = &vcpu->kvm->arch.vgic; + unsigned long *active, *enabled, *act_percpu, *act_shared; + unsigned long active_private, active_shared; + int nr_shared = vgic_nr_shared_irqs(dist); + int vcpu_id; + + vcpu_id = vcpu->vcpu_id; + act_percpu = vcpu->arch.vgic_cpu.active_percpu; + act_shared = vcpu->arch.vgic_cpu.active_shared; + + active = vgic_bitmap_get_cpu_map(&dist->irq_active, vcpu_id); + enabled = vgic_bitmap_get_cpu_map(&dist->irq_enabled, vcpu_id); + bitmap_and(act_percpu, active, enabled, VGIC_NR_PRIVATE_IRQS); + + active = vgic_bitmap_get_shared_map(&dist->irq_active); + enabled = vgic_bitmap_get_shared_map(&dist->irq_enabled); + bitmap_and(act_shared, active, enabled, nr_shared); + bitmap_and(act_shared, act_shared, + vgic_bitmap_get_shared_map(&dist->irq_spi_target[vcpu_id]), + nr_shared); + + active_private = find_first_bit(act_percpu, VGIC_NR_PRIVATE_IRQS); + active_shared = find_first_bit(act_shared, nr_shared); + + return (active_private < VGIC_NR_PRIVATE_IRQS || + active_shared < nr_shared); +} + static int compute_pending_for_cpu(struct kvm_vcpu *vcpu) { struct vgic_dist *dist = &vcpu->kvm->arch.vgic; @@ -835,7 +978,7 @@ static int compute_pending_for_cpu(struct kvm_vcpu *vcpu) /* * Update the interrupt state and determine which CPUs have pending - * interrupts. Must be called with distributor lock held. + * or active interrupts. Must be called with distributor lock held. */ void vgic_update_state(struct kvm *kvm) { @@ -849,10 +992,13 @@ void vgic_update_state(struct kvm *kvm) } kvm_for_each_vcpu(c, vcpu, kvm) { - if (compute_pending_for_cpu(vcpu)) { - pr_debug("CPU%d has pending interrupts\n", c); + if (compute_pending_for_cpu(vcpu)) set_bit(c, dist->irq_pending_on_cpu); - } + + if (compute_active_for_cpu(vcpu)) + set_bit(c, dist->irq_active_on_cpu); + else + clear_bit(c, dist->irq_active_on_cpu); } } @@ -883,6 +1029,11 @@ static inline u64 vgic_get_eisr(struct kvm_vcpu *vcpu) return vgic_ops->get_eisr(vcpu); } +static inline void vgic_clear_eisr(struct kvm_vcpu *vcpu) +{ + vgic_ops->clear_eisr(vcpu); +} + static inline u32 vgic_get_interrupt_status(struct kvm_vcpu *vcpu) { return vgic_ops->get_interrupt_status(vcpu); @@ -922,6 +1073,7 @@ static void vgic_retire_lr(int lr_nr, int irq, struct kvm_vcpu *vcpu) vgic_set_lr(vcpu, lr_nr, vlr); clear_bit(lr_nr, vgic_cpu->lr_used); vgic_cpu->vgic_irq_lr_map[irq] = LR_EMPTY; + vgic_sync_lr_elrsr(vcpu, lr_nr, vlr); } /* @@ -949,6 +1101,26 @@ static void vgic_retire_disabled_irqs(struct kvm_vcpu *vcpu) } } +static void vgic_queue_irq_to_lr(struct kvm_vcpu *vcpu, int irq, + int lr_nr, struct vgic_lr vlr) +{ + if (vgic_irq_is_active(vcpu, irq)) { + vlr.state |= LR_STATE_ACTIVE; + kvm_debug("Set active, clear distributor: 0x%x\n", vlr.state); + vgic_irq_clear_active(vcpu, irq); + vgic_update_state(vcpu->kvm); + } else if (vgic_dist_irq_is_pending(vcpu, irq)) { + vlr.state |= LR_STATE_PENDING; + kvm_debug("Set pending: 0x%x\n", vlr.state); + } + + if (!vgic_irq_is_edge(vcpu, irq)) + vlr.state |= LR_EOI_INT; + + vgic_set_lr(vcpu, lr_nr, vlr); + vgic_sync_lr_elrsr(vcpu, lr_nr, vlr); +} + /* * Queue an interrupt to a CPU virtual interface. Return true on success, * or false if it wasn't possible to queue it. @@ -976,8 +1148,7 @@ bool vgic_queue_irq(struct kvm_vcpu *vcpu, u8 sgi_source_id, int irq) if (vlr.source == sgi_source_id) { kvm_debug("LR%d piggyback for IRQ%d\n", lr, vlr.irq); BUG_ON(!test_bit(lr, vgic_cpu->lr_used)); - vlr.state |= LR_STATE_PENDING; - vgic_set_lr(vcpu, lr, vlr); + vgic_queue_irq_to_lr(vcpu, irq, lr, vlr); return true; } } @@ -994,11 +1165,8 @@ bool vgic_queue_irq(struct kvm_vcpu *vcpu, u8 sgi_source_id, int irq) vlr.irq = irq; vlr.source = sgi_source_id; - vlr.state = LR_STATE_PENDING; - if (!vgic_irq_is_edge(vcpu, irq)) - vlr.state |= LR_EOI_INT; - - vgic_set_lr(vcpu, lr, vlr); + vlr.state = 0; + vgic_queue_irq_to_lr(vcpu, irq, lr, vlr); return true; } @@ -1030,39 +1198,49 @@ static void __kvm_vgic_flush_hwstate(struct kvm_vcpu *vcpu) { struct vgic_cpu *vgic_cpu = &vcpu->arch.vgic_cpu; struct vgic_dist *dist = &vcpu->kvm->arch.vgic; + unsigned long *pa_percpu, *pa_shared; int i, vcpu_id; int overflow = 0; + int nr_shared = vgic_nr_shared_irqs(dist); vcpu_id = vcpu->vcpu_id; + pa_percpu = vcpu->arch.vgic_cpu.pend_act_percpu; + pa_shared = vcpu->arch.vgic_cpu.pend_act_shared; + + bitmap_or(pa_percpu, vgic_cpu->pending_percpu, vgic_cpu->active_percpu, + VGIC_NR_PRIVATE_IRQS); + bitmap_or(pa_shared, vgic_cpu->pending_shared, vgic_cpu->active_shared, + nr_shared); /* * We may not have any pending interrupt, or the interrupts * may have been serviced from another vcpu. In all cases, * move along. */ - if (!kvm_vgic_vcpu_pending_irq(vcpu)) { - pr_debug("CPU%d has no pending interrupt\n", vcpu_id); + if (!kvm_vgic_vcpu_pending_irq(vcpu) && !kvm_vgic_vcpu_active_irq(vcpu)) goto epilog; - } /* SGIs */ - for_each_set_bit(i, vgic_cpu->pending_percpu, VGIC_NR_SGIS) { + for_each_set_bit(i, pa_percpu, VGIC_NR_SGIS) { if (!queue_sgi(vcpu, i)) overflow = 1; } /* PPIs */ - for_each_set_bit_from(i, vgic_cpu->pending_percpu, VGIC_NR_PRIVATE_IRQS) { + for_each_set_bit_from(i, pa_percpu, VGIC_NR_PRIVATE_IRQS) { if (!vgic_queue_hwirq(vcpu, i)) overflow = 1; } /* SPIs */ - for_each_set_bit(i, vgic_cpu->pending_shared, vgic_nr_shared_irqs(dist)) { + for_each_set_bit(i, pa_shared, nr_shared) { if (!vgic_queue_hwirq(vcpu, i + VGIC_NR_PRIVATE_IRQS)) overflow = 1; } + + + epilog: if (overflow) { vgic_enable_underflow(vcpu); @@ -1081,7 +1259,9 @@ epilog: static bool vgic_process_maintenance(struct kvm_vcpu *vcpu) { u32 status = vgic_get_interrupt_status(vcpu); + struct vgic_dist *dist = &vcpu->kvm->arch.vgic; bool level_pending = false; + struct kvm *kvm = vcpu->kvm; kvm_debug("STATUS = %08x\n", status); @@ -1098,6 +1278,7 @@ static bool vgic_process_maintenance(struct kvm_vcpu *vcpu) struct vgic_lr vlr = vgic_get_lr(vcpu, lr); WARN_ON(vgic_irq_is_edge(vcpu, vlr.irq)); + spin_lock(&dist->lock); vgic_irq_clear_queued(vcpu, vlr.irq); WARN_ON(vlr.state & LR_STATE_MASK); vlr.state = 0; @@ -1116,6 +1297,17 @@ static bool vgic_process_maintenance(struct kvm_vcpu *vcpu) */ vgic_dist_irq_clear_soft_pend(vcpu, vlr.irq); + /* + * kvm_notify_acked_irq calls kvm_set_irq() + * to reset the IRQ level. Need to release the + * lock for kvm_set_irq to grab it. + */ + spin_unlock(&dist->lock); + + kvm_notify_acked_irq(kvm, 0, + vlr.irq - VGIC_NR_PRIVATE_IRQS); + spin_lock(&dist->lock); + /* Any additional pending interrupt? */ if (vgic_dist_irq_get_level(vcpu, vlr.irq)) { vgic_cpu_irq_set(vcpu, vlr.irq); @@ -1125,6 +1317,8 @@ static bool vgic_process_maintenance(struct kvm_vcpu *vcpu) vgic_cpu_irq_clear(vcpu, vlr.irq); } + spin_unlock(&dist->lock); + /* * Despite being EOIed, the LR may not have * been marked as empty. @@ -1136,13 +1330,18 @@ static bool vgic_process_maintenance(struct kvm_vcpu *vcpu) if (status & INT_STATUS_UNDERFLOW) vgic_disable_underflow(vcpu); + /* + * In the next iterations of the vcpu loop, if we sync the vgic state + * after flushing it, but before entering the guest (this happens for + * pending signals and vmid rollovers), then make sure we don't pick + * up any old maintenance interrupts here. + */ + vgic_clear_eisr(vcpu); + return level_pending; } -/* - * Sync back the VGIC state after a guest run. The distributor lock is - * needed so we don't get preempted in the middle of the state processing. - */ +/* Sync back the VGIC state after a guest run */ static void __kvm_vgic_sync_hwstate(struct kvm_vcpu *vcpu) { struct vgic_cpu *vgic_cpu = &vcpu->arch.vgic_cpu; @@ -1189,14 +1388,10 @@ void kvm_vgic_flush_hwstate(struct kvm_vcpu *vcpu) void kvm_vgic_sync_hwstate(struct kvm_vcpu *vcpu) { - struct vgic_dist *dist = &vcpu->kvm->arch.vgic; - if (!irqchip_in_kernel(vcpu->kvm)) return; - spin_lock(&dist->lock); __kvm_vgic_sync_hwstate(vcpu); - spin_unlock(&dist->lock); } int kvm_vgic_vcpu_pending_irq(struct kvm_vcpu *vcpu) @@ -1209,6 +1404,17 @@ int kvm_vgic_vcpu_pending_irq(struct kvm_vcpu *vcpu) return test_bit(vcpu->vcpu_id, dist->irq_pending_on_cpu); } +int kvm_vgic_vcpu_active_irq(struct kvm_vcpu *vcpu) +{ + struct vgic_dist *dist = &vcpu->kvm->arch.vgic; + + if (!irqchip_in_kernel(vcpu->kvm)) + return 0; + + return test_bit(vcpu->vcpu_id, dist->irq_active_on_cpu); +} + + void vgic_kick_vcpus(struct kvm *kvm) { struct kvm_vcpu *vcpu; @@ -1381,8 +1587,12 @@ void kvm_vgic_vcpu_destroy(struct kvm_vcpu *vcpu) struct vgic_cpu *vgic_cpu = &vcpu->arch.vgic_cpu; kfree(vgic_cpu->pending_shared); + kfree(vgic_cpu->active_shared); + kfree(vgic_cpu->pend_act_shared); kfree(vgic_cpu->vgic_irq_lr_map); vgic_cpu->pending_shared = NULL; + vgic_cpu->active_shared = NULL; + vgic_cpu->pend_act_shared = NULL; vgic_cpu->vgic_irq_lr_map = NULL; } @@ -1392,9 +1602,14 @@ static int vgic_vcpu_init_maps(struct kvm_vcpu *vcpu, int nr_irqs) int sz = (nr_irqs - VGIC_NR_PRIVATE_IRQS) / 8; vgic_cpu->pending_shared = kzalloc(sz, GFP_KERNEL); + vgic_cpu->active_shared = kzalloc(sz, GFP_KERNEL); + vgic_cpu->pend_act_shared = kzalloc(sz, GFP_KERNEL); vgic_cpu->vgic_irq_lr_map = kmalloc(nr_irqs, GFP_KERNEL); - if (!vgic_cpu->pending_shared || !vgic_cpu->vgic_irq_lr_map) { + if (!vgic_cpu->pending_shared + || !vgic_cpu->active_shared + || !vgic_cpu->pend_act_shared + || !vgic_cpu->vgic_irq_lr_map) { kvm_vgic_vcpu_destroy(vcpu); return -ENOMEM; } @@ -1447,10 +1662,12 @@ void kvm_vgic_destroy(struct kvm *kvm) kfree(dist->irq_spi_mpidr); kfree(dist->irq_spi_target); kfree(dist->irq_pending_on_cpu); + kfree(dist->irq_active_on_cpu); dist->irq_sgi_sources = NULL; dist->irq_spi_cpu = NULL; dist->irq_spi_target = NULL; dist->irq_pending_on_cpu = NULL; + dist->irq_active_on_cpu = NULL; dist->nr_cpus = 0; } @@ -1486,6 +1703,7 @@ int vgic_init(struct kvm *kvm) ret |= vgic_init_bitmap(&dist->irq_pending, nr_cpus, nr_irqs); ret |= vgic_init_bitmap(&dist->irq_soft_pend, nr_cpus, nr_irqs); ret |= vgic_init_bitmap(&dist->irq_queued, nr_cpus, nr_irqs); + ret |= vgic_init_bitmap(&dist->irq_active, nr_cpus, nr_irqs); ret |= vgic_init_bitmap(&dist->irq_cfg, nr_cpus, nr_irqs); ret |= vgic_init_bytemap(&dist->irq_priority, nr_cpus, nr_irqs); @@ -1498,10 +1716,13 @@ int vgic_init(struct kvm *kvm) GFP_KERNEL); dist->irq_pending_on_cpu = kzalloc(BITS_TO_LONGS(nr_cpus) * sizeof(long), GFP_KERNEL); + dist->irq_active_on_cpu = kzalloc(BITS_TO_LONGS(nr_cpus) * sizeof(long), + GFP_KERNEL); if (!dist->irq_sgi_sources || !dist->irq_spi_cpu || !dist->irq_spi_target || - !dist->irq_pending_on_cpu) { + !dist->irq_pending_on_cpu || + !dist->irq_active_on_cpu) { ret = -ENOMEM; goto out; } @@ -1583,8 +1804,10 @@ int kvm_vgic_create(struct kvm *kvm, u32 type) * emulation. So check this here again. KVM_CREATE_DEVICE does * the proper checks already. */ - if (type == KVM_DEV_TYPE_ARM_VGIC_V2 && !vgic->can_emulate_gicv2) - return -ENODEV; + if (type == KVM_DEV_TYPE_ARM_VGIC_V2 && !vgic->can_emulate_gicv2) { + ret = -ENODEV; + goto out; + } /* * Any time a vcpu is run, vcpu_load is called which tries to grab the @@ -1827,12 +2050,9 @@ int vgic_get_common_attr(struct kvm_device *dev, struct kvm_device_attr *attr) return r; } -int vgic_has_attr_regs(const struct kvm_mmio_range *ranges, phys_addr_t offset) +int vgic_has_attr_regs(const struct vgic_io_range *ranges, phys_addr_t offset) { - struct kvm_exit_mmio dev_attr_mmio; - - dev_attr_mmio.len = 4; - if (vgic_find_range(ranges, &dev_attr_mmio, offset)) + if (vgic_find_range(ranges, 4, offset)) return 0; else return -ENXIO; @@ -1865,8 +2085,10 @@ static struct notifier_block vgic_cpu_nb = { }; static const struct of_device_id vgic_ids[] = { - { .compatible = "arm,cortex-a15-gic", .data = vgic_v2_probe, }, - { .compatible = "arm,gic-v3", .data = vgic_v3_probe, }, + { .compatible = "arm,cortex-a15-gic", .data = vgic_v2_probe, }, + { .compatible = "arm,cortex-a7-gic", .data = vgic_v2_probe, }, + { .compatible = "arm,gic-400", .data = vgic_v2_probe, }, + { .compatible = "arm,gic-v3", .data = vgic_v3_probe, }, {}, }; @@ -1914,3 +2136,38 @@ out_free_irq: free_percpu_irq(vgic->maint_irq, kvm_get_running_vcpus()); return ret; } + +int kvm_irq_map_gsi(struct kvm *kvm, + struct kvm_kernel_irq_routing_entry *entries, + int gsi) +{ + return gsi; +} + +int kvm_irq_map_chip_pin(struct kvm *kvm, unsigned irqchip, unsigned pin) +{ + return pin; +} + +int kvm_set_irq(struct kvm *kvm, int irq_source_id, + u32 irq, int level, bool line_status) +{ + unsigned int spi = irq + VGIC_NR_PRIVATE_IRQS; + + trace_kvm_set_irq(irq, level, irq_source_id); + + BUG_ON(!vgic_initialized(kvm)); + + if (spi > kvm->arch.vgic.nr_irqs) + return -EINVAL; + return kvm_vgic_inject_irq(kvm, 0, spi, level); + +} + +/* MSI not implemented yet */ +int kvm_set_msi(struct kvm_kernel_irq_routing_entry *e, + struct kvm *kvm, int irq_source_id, + int level, bool line_status) +{ + return 0; +} diff --git a/virt/kvm/arm/vgic.h b/virt/kvm/arm/vgic.h index 1e83bdf5f499..0df74cbb6200 100644 --- a/virt/kvm/arm/vgic.h +++ b/virt/kvm/arm/vgic.h @@ -20,6 +20,8 @@ #ifndef __KVM_VGIC_H__ #define __KVM_VGIC_H__ +#include <kvm/iodev.h> + #define VGIC_ADDR_UNDEF (-1) #define IS_VGIC_ADDR_UNDEF(_x) ((_x) == VGIC_ADDR_UNDEF) @@ -57,6 +59,14 @@ void vgic_set_vmcr(struct kvm_vcpu *vcpu, struct vgic_vmcr *vmcr); bool vgic_queue_irq(struct kvm_vcpu *vcpu, u8 sgi_source_id, int irq); void vgic_unqueue_irqs(struct kvm_vcpu *vcpu); +struct kvm_exit_mmio { + phys_addr_t phys_addr; + void *data; + u32 len; + bool is_write; + void *private; +}; + void vgic_reg_access(struct kvm_exit_mmio *mmio, u32 *reg, phys_addr_t offset, int mode); bool handle_mmio_raz_wi(struct kvm_vcpu *vcpu, struct kvm_exit_mmio *mmio, @@ -74,7 +84,7 @@ void mmio_data_write(struct kvm_exit_mmio *mmio, u32 mask, u32 value) *((u32 *)mmio->data) = cpu_to_le32(value) & mask; } -struct kvm_mmio_range { +struct vgic_io_range { phys_addr_t base; unsigned long len; int bits_per_irq; @@ -82,6 +92,11 @@ struct kvm_mmio_range { phys_addr_t offset); }; +int vgic_register_kvm_io_dev(struct kvm *kvm, gpa_t base, int len, + const struct vgic_io_range *ranges, + int redist_id, + struct vgic_io_device *iodev); + static inline bool is_in_range(phys_addr_t addr, unsigned long len, phys_addr_t baseaddr, unsigned long size) { @@ -89,14 +104,8 @@ static inline bool is_in_range(phys_addr_t addr, unsigned long len, } const -struct kvm_mmio_range *vgic_find_range(const struct kvm_mmio_range *ranges, - struct kvm_exit_mmio *mmio, - phys_addr_t offset); - -bool vgic_handle_mmio_range(struct kvm_vcpu *vcpu, struct kvm_run *run, - struct kvm_exit_mmio *mmio, - const struct kvm_mmio_range *ranges, - unsigned long mmio_base); +struct vgic_io_range *vgic_find_range(const struct vgic_io_range *ranges, + int len, gpa_t offset); bool vgic_handle_enable_reg(struct kvm *kvm, struct kvm_exit_mmio *mmio, phys_addr_t offset, int vcpu_id, int access); @@ -107,12 +116,20 @@ bool vgic_handle_set_pending_reg(struct kvm *kvm, struct kvm_exit_mmio *mmio, bool vgic_handle_clear_pending_reg(struct kvm *kvm, struct kvm_exit_mmio *mmio, phys_addr_t offset, int vcpu_id); +bool vgic_handle_set_active_reg(struct kvm *kvm, + struct kvm_exit_mmio *mmio, + phys_addr_t offset, int vcpu_id); + +bool vgic_handle_clear_active_reg(struct kvm *kvm, + struct kvm_exit_mmio *mmio, + phys_addr_t offset, int vcpu_id); + bool vgic_handle_cfg_reg(u32 *reg, struct kvm_exit_mmio *mmio, phys_addr_t offset); void vgic_kick_vcpus(struct kvm *kvm); -int vgic_has_attr_regs(const struct kvm_mmio_range *ranges, phys_addr_t offset); +int vgic_has_attr_regs(const struct vgic_io_range *ranges, phys_addr_t offset); int vgic_set_common_attr(struct kvm_device *dev, struct kvm_device_attr *attr); int vgic_get_common_attr(struct kvm_device *dev, struct kvm_device_attr *attr); diff --git a/virt/kvm/coalesced_mmio.c b/virt/kvm/coalesced_mmio.c index 00d86427af0f..571c1ce37d15 100644 --- a/virt/kvm/coalesced_mmio.c +++ b/virt/kvm/coalesced_mmio.c @@ -8,7 +8,7 @@ * */ -#include "iodev.h" +#include <kvm/iodev.h> #include <linux/kvm_host.h> #include <linux/slab.h> @@ -60,8 +60,9 @@ static int coalesced_mmio_has_room(struct kvm_coalesced_mmio_dev *dev) return 1; } -static int coalesced_mmio_write(struct kvm_io_device *this, - gpa_t addr, int len, const void *val) +static int coalesced_mmio_write(struct kvm_vcpu *vcpu, + struct kvm_io_device *this, gpa_t addr, + int len, const void *val) { struct kvm_coalesced_mmio_dev *dev = to_mmio(this); struct kvm_coalesced_mmio_ring *ring = dev->kvm->coalesced_mmio_ring; diff --git a/virt/kvm/eventfd.c b/virt/kvm/eventfd.c index 148b2392c762..9ff4193dfa49 100644 --- a/virt/kvm/eventfd.c +++ b/virt/kvm/eventfd.c @@ -36,7 +36,7 @@ #include <linux/seqlock.h> #include <trace/events/kvm.h> -#include "iodev.h" +#include <kvm/iodev.h> #ifdef CONFIG_HAVE_KVM_IRQFD /* @@ -311,6 +311,9 @@ kvm_irqfd_assign(struct kvm *kvm, struct kvm_irqfd *args) unsigned int events; int idx; + if (!kvm_arch_intc_initialized(kvm)) + return -EAGAIN; + irqfd = kzalloc(sizeof(*irqfd), GFP_KERNEL); if (!irqfd) return -ENOMEM; @@ -712,8 +715,8 @@ ioeventfd_in_range(struct _ioeventfd *p, gpa_t addr, int len, const void *val) /* MMIO/PIO writes trigger an event if the addr/val match */ static int -ioeventfd_write(struct kvm_io_device *this, gpa_t addr, int len, - const void *val) +ioeventfd_write(struct kvm_vcpu *vcpu, struct kvm_io_device *this, gpa_t addr, + int len, const void *val) { struct _ioeventfd *p = to_ioeventfd(this); diff --git a/virt/kvm/irqchip.c b/virt/kvm/irqchip.c index 7f256f31df10..1d56a901e791 100644 --- a/virt/kvm/irqchip.c +++ b/virt/kvm/irqchip.c @@ -105,7 +105,7 @@ int kvm_set_irq(struct kvm *kvm, int irq_source_id, u32 irq, int level, i = kvm_irq_map_gsi(kvm, irq_set, irq); srcu_read_unlock(&kvm->irq_srcu, idx); - while(i--) { + while (i--) { int r; r = irq_set[i].set(&irq_set[i], kvm, irq_source_id, level, line_status); diff --git a/virt/kvm/kvm_main.c b/virt/kvm/kvm_main.c index a1093700f3a4..d3fc9399062a 100644 --- a/virt/kvm/kvm_main.c +++ b/virt/kvm/kvm_main.c @@ -16,7 +16,7 @@ * */ -#include "iodev.h" +#include <kvm/iodev.h> #include <linux/kvm_host.h> #include <linux/kvm.h> @@ -66,13 +66,13 @@ MODULE_AUTHOR("Qumranet"); MODULE_LICENSE("GPL"); -unsigned int halt_poll_ns = 0; +static unsigned int halt_poll_ns; module_param(halt_poll_ns, uint, S_IRUGO | S_IWUSR); /* * Ordering of locks: * - * kvm->lock --> kvm->slots_lock --> kvm->irq_lock + * kvm->lock --> kvm->slots_lock --> kvm->irq_lock */ DEFINE_SPINLOCK(kvm_lock); @@ -80,7 +80,7 @@ static DEFINE_RAW_SPINLOCK(kvm_count_lock); LIST_HEAD(vm_list); static cpumask_var_t cpus_hardware_enabled; -static int kvm_usage_count = 0; +static int kvm_usage_count; static atomic_t hardware_enable_failed; struct kmem_cache *kvm_vcpu_cache; @@ -471,7 +471,7 @@ static struct kvm *kvm_create_vm(unsigned long type) BUILD_BUG_ON(KVM_MEM_SLOTS_NUM > SHRT_MAX); r = -ENOMEM; - kvm->memslots = kzalloc(sizeof(struct kvm_memslots), GFP_KERNEL); + kvm->memslots = kvm_kvzalloc(sizeof(struct kvm_memslots)); if (!kvm->memslots) goto out_err_no_srcu; @@ -522,7 +522,7 @@ out_err_no_srcu: out_err_no_disable: for (i = 0; i < KVM_NR_BUSES; i++) kfree(kvm->buses[i]); - kfree(kvm->memslots); + kvfree(kvm->memslots); kvm_arch_free_vm(kvm); return ERR_PTR(r); } @@ -539,20 +539,12 @@ void *kvm_kvzalloc(unsigned long size) return kzalloc(size, GFP_KERNEL); } -void kvm_kvfree(const void *addr) -{ - if (is_vmalloc_addr(addr)) - vfree(addr); - else - kfree(addr); -} - static void kvm_destroy_dirty_bitmap(struct kvm_memory_slot *memslot) { if (!memslot->dirty_bitmap) return; - kvm_kvfree(memslot->dirty_bitmap); + kvfree(memslot->dirty_bitmap); memslot->dirty_bitmap = NULL; } @@ -578,7 +570,7 @@ static void kvm_free_physmem(struct kvm *kvm) kvm_for_each_memslot(memslot, slots) kvm_free_physmem_slot(kvm, memslot, NULL); - kfree(kvm->memslots); + kvfree(kvm->memslots); } static void kvm_destroy_devices(struct kvm *kvm) @@ -871,10 +863,10 @@ int __kvm_set_memory_region(struct kvm *kvm, goto out_free; } - slots = kmemdup(kvm->memslots, sizeof(struct kvm_memslots), - GFP_KERNEL); + slots = kvm_kvzalloc(sizeof(struct kvm_memslots)); if (!slots) goto out_free; + memcpy(slots, kvm->memslots, sizeof(struct kvm_memslots)); if ((change == KVM_MR_DELETE) || (change == KVM_MR_MOVE)) { slot = id_to_memslot(slots, mem->slot); @@ -888,8 +880,8 @@ int __kvm_set_memory_region(struct kvm *kvm, * or moved, memslot will be created. * * validation of sp->gfn happens in: - * - gfn_to_hva (kvm_read_guest, gfn_to_pfn) - * - kvm_is_visible_gfn (mmu_check_roots) + * - gfn_to_hva (kvm_read_guest, gfn_to_pfn) + * - kvm_is_visible_gfn (mmu_check_roots) */ kvm_arch_flush_shadow_memslot(kvm, slot); @@ -917,7 +909,7 @@ int __kvm_set_memory_region(struct kvm *kvm, kvm_arch_commit_memory_region(kvm, mem, &old, change); kvm_free_physmem_slot(kvm, &old, &new); - kfree(old_memslots); + kvfree(old_memslots); /* * IOMMU mapping: New slots need to be mapped. Old slots need to be @@ -936,7 +928,7 @@ int __kvm_set_memory_region(struct kvm *kvm, return 0; out_slots: - kfree(slots); + kvfree(slots); out_free: kvm_free_physmem_slot(kvm, &new, &old); out: @@ -1061,9 +1053,11 @@ int kvm_get_dirty_log_protect(struct kvm *kvm, mask = xchg(&dirty_bitmap[i], 0); dirty_bitmap_buffer[i] = mask; - offset = i * BITS_PER_LONG; - kvm_arch_mmu_enable_log_dirty_pt_masked(kvm, memslot, offset, - mask); + if (mask) { + offset = i * BITS_PER_LONG; + kvm_arch_mmu_enable_log_dirty_pt_masked(kvm, memslot, + offset, mask); + } } spin_unlock(&kvm->mmu_lock); @@ -1193,16 +1187,6 @@ unsigned long gfn_to_hva_prot(struct kvm *kvm, gfn_t gfn, bool *writable) return gfn_to_hva_memslot_prot(slot, gfn, writable); } -static int kvm_read_hva(void *data, void __user *hva, int len) -{ - return __copy_from_user(data, hva, len); -} - -static int kvm_read_hva_atomic(void *data, void __user *hva, int len) -{ - return __copy_from_user_inatomic(data, hva, len); -} - static int get_user_page_nowait(struct task_struct *tsk, struct mm_struct *mm, unsigned long start, int write, struct page **page) { @@ -1481,7 +1465,6 @@ struct page *gfn_to_page(struct kvm *kvm, gfn_t gfn) return kvm_pfn_to_page(pfn); } - EXPORT_SYMBOL_GPL(gfn_to_page); void kvm_release_page_clean(struct page *page) @@ -1517,6 +1500,7 @@ void kvm_set_pfn_dirty(pfn_t pfn) { if (!kvm_is_reserved_pfn(pfn)) { struct page *page = pfn_to_page(pfn); + if (!PageReserved(page)) SetPageDirty(page); } @@ -1554,7 +1538,7 @@ int kvm_read_guest_page(struct kvm *kvm, gfn_t gfn, void *data, int offset, addr = gfn_to_hva_prot(kvm, gfn, NULL); if (kvm_is_error_hva(addr)) return -EFAULT; - r = kvm_read_hva(data, (void __user *)addr + offset, len); + r = __copy_from_user(data, (void __user *)addr + offset, len); if (r) return -EFAULT; return 0; @@ -1593,7 +1577,7 @@ int kvm_read_guest_atomic(struct kvm *kvm, gpa_t gpa, void *data, if (kvm_is_error_hva(addr)) return -EFAULT; pagefault_disable(); - r = kvm_read_hva_atomic(data, (void __user *)addr + offset, len); + r = __copy_from_user_inatomic(data, (void __user *)addr + offset, len); pagefault_enable(); if (r) return -EFAULT; @@ -1653,8 +1637,8 @@ int kvm_gfn_to_hva_cache_init(struct kvm *kvm, struct gfn_to_hva_cache *ghc, ghc->generation = slots->generation; ghc->len = len; ghc->memslot = gfn_to_memslot(kvm, start_gfn); - ghc->hva = gfn_to_hva_many(ghc->memslot, start_gfn, &nr_pages_avail); - if (!kvm_is_error_hva(ghc->hva) && nr_pages_avail >= nr_pages_needed) { + ghc->hva = gfn_to_hva_many(ghc->memslot, start_gfn, NULL); + if (!kvm_is_error_hva(ghc->hva) && nr_pages_needed <= 1) { ghc->hva += offset; } else { /* @@ -1742,7 +1726,7 @@ int kvm_clear_guest(struct kvm *kvm, gpa_t gpa, unsigned long len) int offset = offset_in_page(gpa); int ret; - while ((seg = next_segment(len, offset)) != 0) { + while ((seg = next_segment(len, offset)) != 0) { ret = kvm_clear_guest_page(kvm, gfn, offset, seg); if (ret < 0) return ret; @@ -1800,6 +1784,7 @@ void kvm_vcpu_block(struct kvm_vcpu *vcpu) start = cur = ktime_get(); if (halt_poll_ns) { ktime_t stop = ktime_add_ns(ktime_get(), halt_poll_ns); + do { /* * This sets KVM_REQ_UNHALT if an interrupt @@ -2118,7 +2103,7 @@ static long kvm_vcpu_ioctl(struct file *filp, * Special cases: vcpu ioctls that are asynchronous to vcpu execution, * so vcpu_load() would break it. */ - if (ioctl == KVM_S390_INTERRUPT || ioctl == KVM_INTERRUPT) + if (ioctl == KVM_S390_INTERRUPT || ioctl == KVM_S390_IRQ || ioctl == KVM_INTERRUPT) return kvm_arch_vcpu_ioctl(filp, ioctl, arg); #endif @@ -2135,6 +2120,7 @@ static long kvm_vcpu_ioctl(struct file *filp, /* The thread running this VCPU changed. */ struct pid *oldpid = vcpu->pid; struct pid *newpid = get_task_pid(current, PIDTYPE_PID); + rcu_assign_pointer(vcpu->pid, newpid); if (oldpid) synchronize_rcu(); @@ -2205,7 +2191,7 @@ out_free1: if (r) goto out; r = -EFAULT; - if (copy_to_user(argp, &mp_state, sizeof mp_state)) + if (copy_to_user(argp, &mp_state, sizeof(mp_state))) goto out; r = 0; break; @@ -2214,7 +2200,7 @@ out_free1: struct kvm_mp_state mp_state; r = -EFAULT; - if (copy_from_user(&mp_state, argp, sizeof mp_state)) + if (copy_from_user(&mp_state, argp, sizeof(mp_state))) goto out; r = kvm_arch_vcpu_ioctl_set_mpstate(vcpu, &mp_state); break; @@ -2223,13 +2209,13 @@ out_free1: struct kvm_translation tr; r = -EFAULT; - if (copy_from_user(&tr, argp, sizeof tr)) + if (copy_from_user(&tr, argp, sizeof(tr))) goto out; r = kvm_arch_vcpu_ioctl_translate(vcpu, &tr); if (r) goto out; r = -EFAULT; - if (copy_to_user(argp, &tr, sizeof tr)) + if (copy_to_user(argp, &tr, sizeof(tr))) goto out; r = 0; break; @@ -2238,7 +2224,7 @@ out_free1: struct kvm_guest_debug dbg; r = -EFAULT; - if (copy_from_user(&dbg, argp, sizeof dbg)) + if (copy_from_user(&dbg, argp, sizeof(dbg))) goto out; r = kvm_arch_vcpu_ioctl_set_guest_debug(vcpu, &dbg); break; @@ -2252,14 +2238,14 @@ out_free1: if (argp) { r = -EFAULT; if (copy_from_user(&kvm_sigmask, argp, - sizeof kvm_sigmask)) + sizeof(kvm_sigmask))) goto out; r = -EINVAL; - if (kvm_sigmask.len != sizeof sigset) + if (kvm_sigmask.len != sizeof(sigset)) goto out; r = -EFAULT; if (copy_from_user(&sigset, sigmask_arg->sigset, - sizeof sigset)) + sizeof(sigset))) goto out; p = &sigset; } @@ -2321,14 +2307,14 @@ static long kvm_vcpu_compat_ioctl(struct file *filp, if (argp) { r = -EFAULT; if (copy_from_user(&kvm_sigmask, argp, - sizeof kvm_sigmask)) + sizeof(kvm_sigmask))) goto out; r = -EINVAL; - if (kvm_sigmask.len != sizeof csigset) + if (kvm_sigmask.len != sizeof(csigset)) goto out; r = -EFAULT; if (copy_from_user(&csigset, sigmask_arg->sigset, - sizeof csigset)) + sizeof(csigset))) goto out; sigset_from_compat(&sigset, &csigset); r = kvm_vcpu_ioctl_set_sigmask(vcpu, &sigset); @@ -2492,6 +2478,7 @@ static long kvm_vm_ioctl_check_extension_generic(struct kvm *kvm, long arg) case KVM_CAP_SIGNAL_MSI: #endif #ifdef CONFIG_HAVE_KVM_IRQFD + case KVM_CAP_IRQFD: case KVM_CAP_IRQFD_RESAMPLE: #endif case KVM_CAP_CHECK_EXTENSION_VM: @@ -2524,7 +2511,7 @@ static long kvm_vm_ioctl(struct file *filp, r = -EFAULT; if (copy_from_user(&kvm_userspace_mem, argp, - sizeof kvm_userspace_mem)) + sizeof(kvm_userspace_mem))) goto out; r = kvm_vm_ioctl_set_memory_region(kvm, &kvm_userspace_mem); @@ -2534,7 +2521,7 @@ static long kvm_vm_ioctl(struct file *filp, struct kvm_dirty_log log; r = -EFAULT; - if (copy_from_user(&log, argp, sizeof log)) + if (copy_from_user(&log, argp, sizeof(log))) goto out; r = kvm_vm_ioctl_get_dirty_log(kvm, &log); break; @@ -2542,16 +2529,18 @@ static long kvm_vm_ioctl(struct file *filp, #ifdef KVM_COALESCED_MMIO_PAGE_OFFSET case KVM_REGISTER_COALESCED_MMIO: { struct kvm_coalesced_mmio_zone zone; + r = -EFAULT; - if (copy_from_user(&zone, argp, sizeof zone)) + if (copy_from_user(&zone, argp, sizeof(zone))) goto out; r = kvm_vm_ioctl_register_coalesced_mmio(kvm, &zone); break; } case KVM_UNREGISTER_COALESCED_MMIO: { struct kvm_coalesced_mmio_zone zone; + r = -EFAULT; - if (copy_from_user(&zone, argp, sizeof zone)) + if (copy_from_user(&zone, argp, sizeof(zone))) goto out; r = kvm_vm_ioctl_unregister_coalesced_mmio(kvm, &zone); break; @@ -2561,7 +2550,7 @@ static long kvm_vm_ioctl(struct file *filp, struct kvm_irqfd data; r = -EFAULT; - if (copy_from_user(&data, argp, sizeof data)) + if (copy_from_user(&data, argp, sizeof(data))) goto out; r = kvm_irqfd(kvm, &data); break; @@ -2570,7 +2559,7 @@ static long kvm_vm_ioctl(struct file *filp, struct kvm_ioeventfd data; r = -EFAULT; - if (copy_from_user(&data, argp, sizeof data)) + if (copy_from_user(&data, argp, sizeof(data))) goto out; r = kvm_ioeventfd(kvm, &data); break; @@ -2591,7 +2580,7 @@ static long kvm_vm_ioctl(struct file *filp, struct kvm_msi msi; r = -EFAULT; - if (copy_from_user(&msi, argp, sizeof msi)) + if (copy_from_user(&msi, argp, sizeof(msi))) goto out; r = kvm_send_userspace_msi(kvm, &msi); break; @@ -2603,7 +2592,7 @@ static long kvm_vm_ioctl(struct file *filp, struct kvm_irq_level irq_event; r = -EFAULT; - if (copy_from_user(&irq_event, argp, sizeof irq_event)) + if (copy_from_user(&irq_event, argp, sizeof(irq_event))) goto out; r = kvm_vm_ioctl_irq_line(kvm, &irq_event, @@ -2613,7 +2602,7 @@ static long kvm_vm_ioctl(struct file *filp, r = -EFAULT; if (ioctl == KVM_IRQ_LINE_STATUS) { - if (copy_to_user(argp, &irq_event, sizeof irq_event)) + if (copy_to_user(argp, &irq_event, sizeof(irq_event))) goto out; } @@ -2646,7 +2635,7 @@ static long kvm_vm_ioctl(struct file *filp, goto out_free_irq_routing; r = kvm_set_irq_routing(kvm, entries, routing.nr, routing.flags); - out_free_irq_routing: +out_free_irq_routing: vfree(entries); break; } @@ -2821,8 +2810,7 @@ static void hardware_enable_nolock(void *junk) if (r) { cpumask_clear_cpu(cpu, cpus_hardware_enabled); atomic_inc(&hardware_enable_failed); - printk(KERN_INFO "kvm: enabling virtualization on " - "CPU%d failed\n", cpu); + pr_info("kvm: enabling virtualization on CPU%d failed\n", cpu); } } @@ -2898,12 +2886,12 @@ static int kvm_cpu_hotplug(struct notifier_block *notifier, unsigned long val, val &= ~CPU_TASKS_FROZEN; switch (val) { case CPU_DYING: - printk(KERN_INFO "kvm: disabling virtualization on CPU%d\n", + pr_info("kvm: disabling virtualization on CPU%d\n", cpu); hardware_disable(); break; case CPU_STARTING: - printk(KERN_INFO "kvm: enabling virtualization on CPU%d\n", + pr_info("kvm: enabling virtualization on CPU%d\n", cpu); hardware_enable(); break; @@ -2920,7 +2908,7 @@ static int kvm_reboot(struct notifier_block *notifier, unsigned long val, * * And Intel TXT required VMX off for all cpu when system shutdown. */ - printk(KERN_INFO "kvm: exiting hardware virtualization\n"); + pr_info("kvm: exiting hardware virtualization\n"); kvm_rebooting = true; on_each_cpu(hardware_disable_nolock, NULL, 1); return NOTIFY_OK; @@ -2944,7 +2932,7 @@ static void kvm_io_bus_destroy(struct kvm_io_bus *bus) } static inline int kvm_io_bus_cmp(const struct kvm_io_range *r1, - const struct kvm_io_range *r2) + const struct kvm_io_range *r2) { if (r1->addr < r2->addr) return -1; @@ -2997,7 +2985,7 @@ static int kvm_io_bus_get_first_dev(struct kvm_io_bus *bus, return off; } -static int __kvm_io_bus_write(struct kvm_io_bus *bus, +static int __kvm_io_bus_write(struct kvm_vcpu *vcpu, struct kvm_io_bus *bus, struct kvm_io_range *range, const void *val) { int idx; @@ -3008,7 +2996,7 @@ static int __kvm_io_bus_write(struct kvm_io_bus *bus, while (idx < bus->dev_count && kvm_io_bus_cmp(range, &bus->range[idx]) == 0) { - if (!kvm_iodevice_write(bus->range[idx].dev, range->addr, + if (!kvm_iodevice_write(vcpu, bus->range[idx].dev, range->addr, range->len, val)) return idx; idx++; @@ -3018,7 +3006,7 @@ static int __kvm_io_bus_write(struct kvm_io_bus *bus, } /* kvm_io_bus_write - called under kvm->slots_lock */ -int kvm_io_bus_write(struct kvm *kvm, enum kvm_bus bus_idx, gpa_t addr, +int kvm_io_bus_write(struct kvm_vcpu *vcpu, enum kvm_bus bus_idx, gpa_t addr, int len, const void *val) { struct kvm_io_bus *bus; @@ -3030,14 +3018,14 @@ int kvm_io_bus_write(struct kvm *kvm, enum kvm_bus bus_idx, gpa_t addr, .len = len, }; - bus = srcu_dereference(kvm->buses[bus_idx], &kvm->srcu); - r = __kvm_io_bus_write(bus, &range, val); + bus = srcu_dereference(vcpu->kvm->buses[bus_idx], &vcpu->kvm->srcu); + r = __kvm_io_bus_write(vcpu, bus, &range, val); return r < 0 ? r : 0; } /* kvm_io_bus_write_cookie - called under kvm->slots_lock */ -int kvm_io_bus_write_cookie(struct kvm *kvm, enum kvm_bus bus_idx, gpa_t addr, - int len, const void *val, long cookie) +int kvm_io_bus_write_cookie(struct kvm_vcpu *vcpu, enum kvm_bus bus_idx, + gpa_t addr, int len, const void *val, long cookie) { struct kvm_io_bus *bus; struct kvm_io_range range; @@ -3047,12 +3035,12 @@ int kvm_io_bus_write_cookie(struct kvm *kvm, enum kvm_bus bus_idx, gpa_t addr, .len = len, }; - bus = srcu_dereference(kvm->buses[bus_idx], &kvm->srcu); + bus = srcu_dereference(vcpu->kvm->buses[bus_idx], &vcpu->kvm->srcu); /* First try the device referenced by cookie. */ if ((cookie >= 0) && (cookie < bus->dev_count) && (kvm_io_bus_cmp(&range, &bus->range[cookie]) == 0)) - if (!kvm_iodevice_write(bus->range[cookie].dev, addr, len, + if (!kvm_iodevice_write(vcpu, bus->range[cookie].dev, addr, len, val)) return cookie; @@ -3060,11 +3048,11 @@ int kvm_io_bus_write_cookie(struct kvm *kvm, enum kvm_bus bus_idx, gpa_t addr, * cookie contained garbage; fall back to search and return the * correct cookie value. */ - return __kvm_io_bus_write(bus, &range, val); + return __kvm_io_bus_write(vcpu, bus, &range, val); } -static int __kvm_io_bus_read(struct kvm_io_bus *bus, struct kvm_io_range *range, - void *val) +static int __kvm_io_bus_read(struct kvm_vcpu *vcpu, struct kvm_io_bus *bus, + struct kvm_io_range *range, void *val) { int idx; @@ -3074,7 +3062,7 @@ static int __kvm_io_bus_read(struct kvm_io_bus *bus, struct kvm_io_range *range, while (idx < bus->dev_count && kvm_io_bus_cmp(range, &bus->range[idx]) == 0) { - if (!kvm_iodevice_read(bus->range[idx].dev, range->addr, + if (!kvm_iodevice_read(vcpu, bus->range[idx].dev, range->addr, range->len, val)) return idx; idx++; @@ -3085,7 +3073,7 @@ static int __kvm_io_bus_read(struct kvm_io_bus *bus, struct kvm_io_range *range, EXPORT_SYMBOL_GPL(kvm_io_bus_write); /* kvm_io_bus_read - called under kvm->slots_lock */ -int kvm_io_bus_read(struct kvm *kvm, enum kvm_bus bus_idx, gpa_t addr, +int kvm_io_bus_read(struct kvm_vcpu *vcpu, enum kvm_bus bus_idx, gpa_t addr, int len, void *val) { struct kvm_io_bus *bus; @@ -3097,8 +3085,8 @@ int kvm_io_bus_read(struct kvm *kvm, enum kvm_bus bus_idx, gpa_t addr, .len = len, }; - bus = srcu_dereference(kvm->buses[bus_idx], &kvm->srcu); - r = __kvm_io_bus_read(bus, &range, val); + bus = srcu_dereference(vcpu->kvm->buses[bus_idx], &vcpu->kvm->srcu); + r = __kvm_io_bus_read(vcpu, bus, &range, val); return r < 0 ? r : 0; } @@ -3268,6 +3256,7 @@ struct kvm_vcpu *preempt_notifier_to_vcpu(struct preempt_notifier *pn) static void kvm_sched_in(struct preempt_notifier *pn, int cpu) { struct kvm_vcpu *vcpu = preempt_notifier_to_vcpu(pn); + if (vcpu->preempted) vcpu->preempted = false; @@ -3349,7 +3338,7 @@ int kvm_init(void *opaque, unsigned vcpu_size, unsigned vcpu_align, r = misc_register(&kvm_dev); if (r) { - printk(KERN_ERR "kvm: misc device register failed\n"); + pr_err("kvm: misc device register failed\n"); goto out_unreg; } @@ -3360,7 +3349,7 @@ int kvm_init(void *opaque, unsigned vcpu_size, unsigned vcpu_align, r = kvm_init_debug(); if (r) { - printk(KERN_ERR "kvm: create debugfs files failed\n"); + pr_err("kvm: create debugfs files failed\n"); goto out_undebugfs; } |