From 0b4edf111870b83ea77b1d7e16b8ceac29f9f388 Mon Sep 17 00:00:00 2001 From: Faiz Abbas Date: Wed, 13 May 2020 02:08:04 +0530 Subject: ARM: dts: Move am33xx and am43xx mmc nodes to sdhci-omap driver Move mmc nodes to be compatible with the sdhci-omap driver. The following modifications are required for omap_hsmmc specific properties: ti,non-removable: convert to the generic mmc non-removable ti,needs-special-reset: co-opted into the sdhci-omap driver ti,dual-volt: removed. Legacy property not used in am335x or am43xx ti,needs-special-hs-handling: removed. Legacy property not used in am335x or am43xx Also since the sdhci-omap driver does not support runtime PM, explicitly disable the mmc3 instance in the dtsi. Signed-off-by: Faiz Abbas Signed-off-by: Tony Lindgren --- arch/arm/boot/dts/am335x-baltos.dtsi | 2 +- arch/arm/boot/dts/am335x-boneblack-common.dtsi | 1 + arch/arm/boot/dts/am335x-boneblack-wireless.dts | 1 - arch/arm/boot/dts/am335x-boneblue.dts | 1 - arch/arm/boot/dts/am335x-bonegreen-wireless.dts | 1 - arch/arm/boot/dts/am335x-evm.dts | 3 +-- arch/arm/boot/dts/am335x-evmsk.dts | 2 +- arch/arm/boot/dts/am335x-lxm.dts | 2 +- arch/arm/boot/dts/am335x-moxa-uc-2100-common.dtsi | 2 +- arch/arm/boot/dts/am335x-moxa-uc-8100-me-t.dts | 2 +- arch/arm/boot/dts/am335x-pepper.dts | 4 ++-- arch/arm/boot/dts/am335x-phycore-som.dtsi | 2 +- arch/arm/boot/dts/am33xx-l4.dtsi | 6 ++---- arch/arm/boot/dts/am33xx.dtsi | 3 ++- arch/arm/boot/dts/am4372.dtsi | 3 ++- arch/arm/boot/dts/am437x-cm-t43.dts | 2 +- arch/arm/boot/dts/am437x-gp-evm.dts | 4 ++-- arch/arm/boot/dts/am437x-l4.dtsi | 5 ++--- arch/arm/boot/dts/am437x-sk-evm.dts | 2 +- 19 files changed, 22 insertions(+), 26 deletions(-) (limited to 'arch') diff --git a/arch/arm/boot/dts/am335x-baltos.dtsi b/arch/arm/boot/dts/am335x-baltos.dtsi index 05e7b5d4a95b..04f0b1227efe 100644 --- a/arch/arm/boot/dts/am335x-baltos.dtsi +++ b/arch/arm/boot/dts/am335x-baltos.dtsi @@ -369,7 +369,7 @@ &mmc2 { status = "okay"; vmmc-supply = <&wl12xx_vmmc>; - ti,non-removable; + non-removable; bus-width = <4>; cap-power-off-card; pinctrl-names = "default"; diff --git a/arch/arm/boot/dts/am335x-boneblack-common.dtsi b/arch/arm/boot/dts/am335x-boneblack-common.dtsi index 91f93bc89716..dd932220a8bf 100644 --- a/arch/arm/boot/dts/am335x-boneblack-common.dtsi +++ b/arch/arm/boot/dts/am335x-boneblack-common.dtsi @@ -22,6 +22,7 @@ pinctrl-0 = <&emmc_pins>; bus-width = <8>; status = "okay"; + non-removable; }; &am33xx_pinmux { diff --git a/arch/arm/boot/dts/am335x-boneblack-wireless.dts b/arch/arm/boot/dts/am335x-boneblack-wireless.dts index 3124d94c0b3c..e07dd7979586 100644 --- a/arch/arm/boot/dts/am335x-boneblack-wireless.dts +++ b/arch/arm/boot/dts/am335x-boneblack-wireless.dts @@ -75,7 +75,6 @@ bus-width = <4>; non-removable; cap-power-off-card; - ti,needs-special-hs-handling; keep-power-in-suspend; pinctrl-names = "default"; pinctrl-0 = <&mmc3_pins &wl18xx_pins>; diff --git a/arch/arm/boot/dts/am335x-boneblue.dts b/arch/arm/boot/dts/am335x-boneblue.dts index 5811fb8d4fdf..83f9452c9cd3 100644 --- a/arch/arm/boot/dts/am335x-boneblue.dts +++ b/arch/arm/boot/dts/am335x-boneblue.dts @@ -367,7 +367,6 @@ bus-width = <4>; non-removable; cap-power-off-card; - ti,needs-special-hs-handling; keep-power-in-suspend; pinctrl-names = "default"; pinctrl-0 = <&mmc3_pins &wl18xx_pins>; diff --git a/arch/arm/boot/dts/am335x-bonegreen-wireless.dts b/arch/arm/boot/dts/am335x-bonegreen-wireless.dts index 4092cd193b8a..609c8db687ec 100644 --- a/arch/arm/boot/dts/am335x-bonegreen-wireless.dts +++ b/arch/arm/boot/dts/am335x-bonegreen-wireless.dts @@ -75,7 +75,6 @@ bus-width = <4>; non-removable; cap-power-off-card; - ti,needs-special-hs-handling; keep-power-in-suspend; pinctrl-names = "default"; pinctrl-0 = <&mmc3_pins &wl18xx_pins>; diff --git a/arch/arm/boot/dts/am335x-evm.dts b/arch/arm/boot/dts/am335x-evm.dts index 68252dab32c3..a4fc6b168a85 100644 --- a/arch/arm/boot/dts/am335x-evm.dts +++ b/arch/arm/boot/dts/am335x-evm.dts @@ -743,8 +743,7 @@ bus-width = <4>; pinctrl-names = "default"; pinctrl-0 = <&mmc3_pins &wlan_pins>; - ti,non-removable; - ti,needs-special-hs-handling; + non-removable; cap-power-off-card; keep-power-in-suspend; diff --git a/arch/arm/boot/dts/am335x-evmsk.dts b/arch/arm/boot/dts/am335x-evmsk.dts index 32f515a295ee..78b6e1f594c9 100644 --- a/arch/arm/boot/dts/am335x-evmsk.dts +++ b/arch/arm/boot/dts/am335x-evmsk.dts @@ -655,7 +655,7 @@ &mmc2 { status = "okay"; vmmc-supply = <&wl12xx_vmmc>; - ti,non-removable; + non-removable; bus-width = <4>; cap-power-off-card; keep-power-in-suspend; diff --git a/arch/arm/boot/dts/am335x-lxm.dts b/arch/arm/boot/dts/am335x-lxm.dts index fef582852820..dbedf729205c 100644 --- a/arch/arm/boot/dts/am335x-lxm.dts +++ b/arch/arm/boot/dts/am335x-lxm.dts @@ -339,7 +339,7 @@ pinctrl-0 = <&emmc_pins>; vmmc-supply = <&vmmcsd_fixed>; bus-width = <8>; - ti,non-removable; + non-removable; status = "okay"; }; diff --git a/arch/arm/boot/dts/am335x-moxa-uc-2100-common.dtsi b/arch/arm/boot/dts/am335x-moxa-uc-2100-common.dtsi index 6495a125c01f..4e90f9c23d2e 100644 --- a/arch/arm/boot/dts/am335x-moxa-uc-2100-common.dtsi +++ b/arch/arm/boot/dts/am335x-moxa-uc-2100-common.dtsi @@ -159,7 +159,7 @@ vmmc-supply = <&vmmcsd_fixed>; bus-width = <8>; pinctrl-0 = <&mmc1_pins_default>; - ti,non-removable; + non-removable; status = "okay"; }; diff --git a/arch/arm/boot/dts/am335x-moxa-uc-8100-me-t.dts b/arch/arm/boot/dts/am335x-moxa-uc-8100-me-t.dts index 244df9c5a537..f03e72cada41 100644 --- a/arch/arm/boot/dts/am335x-moxa-uc-8100-me-t.dts +++ b/arch/arm/boot/dts/am335x-moxa-uc-8100-me-t.dts @@ -451,7 +451,7 @@ vmmc-supply = <&vmmcsd_fixed>; bus-width = <8>; pinctrl-0 = <&mmc2_pins_default>; - ti,non-removable; + non-removable; status = "okay"; }; diff --git a/arch/arm/boot/dts/am335x-pepper.dts b/arch/arm/boot/dts/am335x-pepper.dts index 6d7608d9377b..f9a027b47962 100644 --- a/arch/arm/boot/dts/am335x-pepper.dts +++ b/arch/arm/boot/dts/am335x-pepper.dts @@ -341,7 +341,7 @@ pinctrl-0 = <&emmc_pins>; vmmc-supply = <&ldo3_reg>; bus-width = <8>; - ti,non-removable; + non-removable; }; &mmc3 { @@ -351,7 +351,7 @@ pinctrl-0 = <&wireless_pins>; vmmmc-supply = <&v3v3c_reg>; bus-width = <4>; - ti,non-removable; + non-removable; dmas = <&edma_xbar 12 0 1 &edma_xbar 13 0 2>; dma-names = "tx", "rx"; diff --git a/arch/arm/boot/dts/am335x-phycore-som.dtsi b/arch/arm/boot/dts/am335x-phycore-som.dtsi index 3d0672b53d77..7e46b4c02709 100644 --- a/arch/arm/boot/dts/am335x-phycore-som.dtsi +++ b/arch/arm/boot/dts/am335x-phycore-som.dtsi @@ -69,7 +69,7 @@ pinctrl-0 = <&emmc_pins>; vmmc-supply = <&vmmc_reg>; bus-width = <8>; - ti,non-removable; + non-removable; status = "disabled"; }; diff --git a/arch/arm/boot/dts/am33xx-l4.dtsi b/arch/arm/boot/dts/am33xx-l4.dtsi index 5ed7f3c58c0f..573ff076178b 100644 --- a/arch/arm/boot/dts/am33xx-l4.dtsi +++ b/arch/arm/boot/dts/am33xx-l4.dtsi @@ -1337,10 +1337,8 @@ ranges = <0x0 0x60000 0x1000>; mmc1: mmc@0 { - compatible = "ti,omap4-hsmmc"; - ti,dual-volt; + compatible = "ti,am335-sdhci"; ti,needs-special-reset; - ti,needs-special-hs-handling; dmas = <&edma_xbar 24 0 0 &edma_xbar 25 0 0>; dma-names = "tx", "rx"; @@ -1818,7 +1816,7 @@ ranges = <0x0 0xd8000 0x1000>; mmc2: mmc@0 { - compatible = "ti,omap4-hsmmc"; + compatible = "ti,am335-sdhci"; ti,needs-special-reset; dmas = <&edma 2 0 &edma 3 0>; diff --git a/arch/arm/boot/dts/am33xx.dtsi b/arch/arm/boot/dts/am33xx.dtsi index a35f5052d76f..3b9d4d2d35bf 100644 --- a/arch/arm/boot/dts/am33xx.dtsi +++ b/arch/arm/boot/dts/am33xx.dtsi @@ -322,10 +322,11 @@ ranges = <0x0 0x47810000 0x1000>; mmc3: mmc@0 { - compatible = "ti,omap4-hsmmc"; + compatible = "ti,am335-sdhci"; ti,needs-special-reset; interrupts = <29>; reg = <0x0 0x1000>; + status = "disabled"; }; }; diff --git a/arch/arm/boot/dts/am4372.dtsi b/arch/arm/boot/dts/am4372.dtsi index dba87bfaf33e..092b3d4404f4 100644 --- a/arch/arm/boot/dts/am4372.dtsi +++ b/arch/arm/boot/dts/am4372.dtsi @@ -316,10 +316,11 @@ ranges = <0x0 0x47810000 0x1000>; mmc3: mmc@0 { - compatible = "ti,omap4-hsmmc"; + compatible = "ti,am437-sdhci"; ti,needs-special-reset; interrupts = ; reg = <0x0 0x1000>; + status = "disabled"; }; }; diff --git a/arch/arm/boot/dts/am437x-cm-t43.dts b/arch/arm/boot/dts/am437x-cm-t43.dts index 063113a5da2d..a6b4fca8626a 100644 --- a/arch/arm/boot/dts/am437x-cm-t43.dts +++ b/arch/arm/boot/dts/am437x-cm-t43.dts @@ -291,7 +291,7 @@ pinctrl-0 = <&emmc_pins>; vmmc-supply = <&vmmc_3v3>; bus-width = <8>; - ti,non-removable; + non-removable; }; &spi0 { diff --git a/arch/arm/boot/dts/am437x-gp-evm.dts b/arch/arm/boot/dts/am437x-gp-evm.dts index 811c8cae315b..cadf47ee337f 100644 --- a/arch/arm/boot/dts/am437x-gp-evm.dts +++ b/arch/arm/boot/dts/am437x-gp-evm.dts @@ -869,7 +869,7 @@ pinctrl-names = "default", "sleep"; pinctrl-0 = <&emmc_pins_default>; pinctrl-1 = <&emmc_pins_sleep>; - ti,non-removable; + non-removable; }; &mmc3 { @@ -886,7 +886,7 @@ pinctrl-1 = <&mmc3_pins_sleep>; cap-power-off-card; keep-power-in-suspend; - ti,non-removable; + non-removable; #address-cells = <1>; #size-cells = <0>; diff --git a/arch/arm/boot/dts/am437x-l4.dtsi b/arch/arm/boot/dts/am437x-l4.dtsi index 49c6a872052e..f4eb36d8b660 100644 --- a/arch/arm/boot/dts/am437x-l4.dtsi +++ b/arch/arm/boot/dts/am437x-l4.dtsi @@ -1086,9 +1086,8 @@ ranges = <0x0 0x60000 0x1000>; mmc1: mmc@0 { - compatible = "ti,omap4-hsmmc"; + compatible = "ti,am437-sdhci"; reg = <0x0 0x1000>; - ti,dual-volt; ti,needs-special-reset; dmas = <&edma 24 0>, <&edma 25 0>; @@ -1601,7 +1600,7 @@ ranges = <0x0 0xd8000 0x1000>; mmc2: mmc@0 { - compatible = "ti,omap4-hsmmc"; + compatible = "ti,am437-sdhci"; reg = <0x0 0x1000>; ti,needs-special-reset; dmas = <&edma 2 0>, diff --git a/arch/arm/boot/dts/am437x-sk-evm.dts b/arch/arm/boot/dts/am437x-sk-evm.dts index 25222497f828..2416597a4f5c 100644 --- a/arch/arm/boot/dts/am437x-sk-evm.dts +++ b/arch/arm/boot/dts/am437x-sk-evm.dts @@ -719,7 +719,7 @@ pinctrl-1 = <&mmc3_pins_sleep>; cap-power-off-card; keep-power-in-suspend; - ti,non-removable; + non-removable; #address-cells = <1>; #size-cells = <0>; -- cgit v1.2.3 From 16accae3d97f97d7f61c4ee5d0002bccdef59088 Mon Sep 17 00:00:00 2001 From: Stephane Eranian Date: Thu, 28 May 2020 13:16:14 -0700 Subject: perf/x86/rapl: Fix RAPL config variable bug This patch fixes a bug introduced by: fd3ae1e1587d6 ("perf/x86/rapl: Move RAPL support to common x86 code") The Kconfig variable name was wrong. It was missing the CONFIG_ prefix. Signed-off-by: Stephane Eranian Signed-off-by: Ingo Molnar Tested-by: Kim Phillips Acked-by: Peter Zijlstra Link: https://lore.kernel.org/r/20200528201614.250182-1-eranian@google.com --- arch/x86/events/Makefile | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'arch') diff --git a/arch/x86/events/Makefile b/arch/x86/events/Makefile index 12c42eba77ec..9933c0e8e97a 100644 --- a/arch/x86/events/Makefile +++ b/arch/x86/events/Makefile @@ -1,6 +1,6 @@ # SPDX-License-Identifier: GPL-2.0-only obj-y += core.o probe.o -obj-$(PERF_EVENTS_INTEL_RAPL) += rapl.o +obj-$(CONFIG_PERF_EVENTS_INTEL_RAPL) += rapl.o obj-y += amd/ obj-$(CONFIG_X86_LOCAL_APIC) += msr.o obj-$(CONFIG_CPU_SUP_INTEL) += intel/ -- cgit v1.2.3 From 0df12a01f4857495816b05f048c4c31439446e35 Mon Sep 17 00:00:00 2001 From: Tony Lindgren Date: Mon, 1 Jun 2020 17:18:56 -0700 Subject: ARM: dts: omap4-droid4: Fix spi configuration and increase rate We can currently sometimes get "RXS timed out" errors and "EOT timed out" errors with spi transfers. These errors can be made easy to reproduce by reading the cpcap iio values in a loop while keeping the CPUs busy by also reading /dev/urandom. The "RXS timed out" errors we can fix by adding spi-cpol and spi-cpha in addition to the spi-cs-high property we already have. The "EOT timed out" errors we can fix by increasing the spi clock rate to 9.6 MHz. Looks similar MC13783 PMIC says it works at spi clock rates up to 20 MHz, so let's assume we can pick any rate up to 20 MHz also for cpcap. Cc: maemo-leste@lists.dyne.org Cc: Merlijn Wajer Cc: Pavel Machek Cc: Sebastian Reichel Signed-off-by: Tony Lindgren --- arch/arm/boot/dts/motorola-cpcap-mapphone.dtsi | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) (limited to 'arch') diff --git a/arch/arm/boot/dts/motorola-cpcap-mapphone.dtsi b/arch/arm/boot/dts/motorola-cpcap-mapphone.dtsi index e39eee628afd..08a7d3ce383f 100644 --- a/arch/arm/boot/dts/motorola-cpcap-mapphone.dtsi +++ b/arch/arm/boot/dts/motorola-cpcap-mapphone.dtsi @@ -13,8 +13,10 @@ #interrupt-cells = <2>; #address-cells = <1>; #size-cells = <0>; - spi-max-frequency = <3000000>; + spi-max-frequency = <9600000>; spi-cs-high; + spi-cpol; + spi-cpha; cpcap_adc: adc { compatible = "motorola,mapphone-cpcap-adc"; -- cgit v1.2.3 From 77cad9dbc957f23a73169e8a8971186744296614 Mon Sep 17 00:00:00 2001 From: Tony Lindgren Date: Wed, 27 May 2020 16:32:06 -0700 Subject: ARM: OMAP2+: Fix legacy mode dss_reset We must check for "dss_core" instead of "dss" to avoid also matching also "dss_dispc". This only matters for the mixed case of data configured in device tree but with legacy booting ti,hwmods property still enabled. Fixes: 8b30919a4e3c ("ARM: OMAP2+: Handle reset quirks for dynamically allocated modules") Cc: Laurent Pinchart Cc: Tomi Valkeinen Signed-off-by: Tony Lindgren --- arch/arm/mach-omap2/omap_hwmod.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'arch') diff --git a/arch/arm/mach-omap2/omap_hwmod.c b/arch/arm/mach-omap2/omap_hwmod.c index 82706af307de..c630457bb228 100644 --- a/arch/arm/mach-omap2/omap_hwmod.c +++ b/arch/arm/mach-omap2/omap_hwmod.c @@ -3489,7 +3489,7 @@ static const struct omap_hwmod_reset dra7_reset_quirks[] = { }; static const struct omap_hwmod_reset omap_reset_quirks[] = { - { .match = "dss", .len = 3, .reset = omap_dss_reset, }, + { .match = "dss_core", .len = 8, .reset = omap_dss_reset, }, { .match = "hdq1w", .len = 5, .reset = omap_hdq1w_reset, }, { .match = "i2c", .len = 3, .reset = omap_i2c_reset, }, { .match = "wd_timer", .len = 8, .reset = omap2_wd_timer_reset, }, -- cgit v1.2.3 From 38ac46002d1df5707566a73486452851341028d2 Mon Sep 17 00:00:00 2001 From: Andre Przywara Date: Wed, 3 Jun 2020 17:22:37 +0100 Subject: arm: dts: vexpress: Move mcc node back into motherboard node Commit d9258898ad49 ("arm64: dts: arm: vexpress: Move fixed devices out of bus node") moved the "mcc" DT node into the root node, because it does not have any children using "reg" properties, so does violate some dtc checks about "simple-bus" nodes. However this broke the vexpress config-bus code, which walks up the device tree to find the first node with an "arm,vexpress,site" property. This gave the wrong result (matching the root node instead of the motherboard node), so broke the clocks and some other devices for VExpress boards. Move the whole node back into its original position. This re-introduces the dtc warning, but is conceptually the right thing to do. The dtc warning seems to be overzealous here, there are discussions on fixing or relaxing this check instead. Link: https://lore.kernel.org/r/20200603162237.16319-1-andre.przywara@arm.com Fixes: d9258898ad49 ("arm64: dts: vexpress: Move fixed devices out of bus node") Reported-and-tested-by: Guenter Roeck Signed-off-by: Andre Przywara Signed-off-by: Sudeep Holla --- arch/arm/boot/dts/vexpress-v2m-rs1.dtsi | 146 ++++++++++++++++---------------- 1 file changed, 73 insertions(+), 73 deletions(-) (limited to 'arch') diff --git a/arch/arm/boot/dts/vexpress-v2m-rs1.dtsi b/arch/arm/boot/dts/vexpress-v2m-rs1.dtsi index e6308fb76183..a88ee5294d35 100644 --- a/arch/arm/boot/dts/vexpress-v2m-rs1.dtsi +++ b/arch/arm/boot/dts/vexpress-v2m-rs1.dtsi @@ -100,79 +100,6 @@ }; }; - mcc { - compatible = "arm,vexpress,config-bus"; - arm,vexpress,config-bridge = <&v2m_sysreg>; - - oscclk0 { - /* MCC static memory clock */ - compatible = "arm,vexpress-osc"; - arm,vexpress-sysreg,func = <1 0>; - freq-range = <25000000 60000000>; - #clock-cells = <0>; - clock-output-names = "v2m:oscclk0"; - }; - - v2m_oscclk1: oscclk1 { - /* CLCD clock */ - compatible = "arm,vexpress-osc"; - arm,vexpress-sysreg,func = <1 1>; - freq-range = <23750000 65000000>; - #clock-cells = <0>; - clock-output-names = "v2m:oscclk1"; - }; - - v2m_oscclk2: oscclk2 { - /* IO FPGA peripheral clock */ - compatible = "arm,vexpress-osc"; - arm,vexpress-sysreg,func = <1 2>; - freq-range = <24000000 24000000>; - #clock-cells = <0>; - clock-output-names = "v2m:oscclk2"; - }; - - volt-vio { - /* Logic level voltage */ - compatible = "arm,vexpress-volt"; - arm,vexpress-sysreg,func = <2 0>; - regulator-name = "VIO"; - regulator-always-on; - label = "VIO"; - }; - - temp-mcc { - /* MCC internal operating temperature */ - compatible = "arm,vexpress-temp"; - arm,vexpress-sysreg,func = <4 0>; - label = "MCC"; - }; - - reset { - compatible = "arm,vexpress-reset"; - arm,vexpress-sysreg,func = <5 0>; - }; - - muxfpga { - compatible = "arm,vexpress-muxfpga"; - arm,vexpress-sysreg,func = <7 0>; - }; - - shutdown { - compatible = "arm,vexpress-shutdown"; - arm,vexpress-sysreg,func = <8 0>; - }; - - reboot { - compatible = "arm,vexpress-reboot"; - arm,vexpress-sysreg,func = <9 0>; - }; - - dvimode { - compatible = "arm,vexpress-dvimode"; - arm,vexpress-sysreg,func = <11 0>; - }; - }; - bus@8000000 { motherboard-bus { model = "V2M-P1"; @@ -435,6 +362,79 @@ }; }; }; + + mcc { + compatible = "arm,vexpress,config-bus"; + arm,vexpress,config-bridge = <&v2m_sysreg>; + + oscclk0 { + /* MCC static memory clock */ + compatible = "arm,vexpress-osc"; + arm,vexpress-sysreg,func = <1 0>; + freq-range = <25000000 60000000>; + #clock-cells = <0>; + clock-output-names = "v2m:oscclk0"; + }; + + v2m_oscclk1: oscclk1 { + /* CLCD clock */ + compatible = "arm,vexpress-osc"; + arm,vexpress-sysreg,func = <1 1>; + freq-range = <23750000 65000000>; + #clock-cells = <0>; + clock-output-names = "v2m:oscclk1"; + }; + + v2m_oscclk2: oscclk2 { + /* IO FPGA peripheral clock */ + compatible = "arm,vexpress-osc"; + arm,vexpress-sysreg,func = <1 2>; + freq-range = <24000000 24000000>; + #clock-cells = <0>; + clock-output-names = "v2m:oscclk2"; + }; + + volt-vio { + /* Logic level voltage */ + compatible = "arm,vexpress-volt"; + arm,vexpress-sysreg,func = <2 0>; + regulator-name = "VIO"; + regulator-always-on; + label = "VIO"; + }; + + temp-mcc { + /* MCC internal operating temperature */ + compatible = "arm,vexpress-temp"; + arm,vexpress-sysreg,func = <4 0>; + label = "MCC"; + }; + + reset { + compatible = "arm,vexpress-reset"; + arm,vexpress-sysreg,func = <5 0>; + }; + + muxfpga { + compatible = "arm,vexpress-muxfpga"; + arm,vexpress-sysreg,func = <7 0>; + }; + + shutdown { + compatible = "arm,vexpress-shutdown"; + arm,vexpress-sysreg,func = <8 0>; + }; + + reboot { + compatible = "arm,vexpress-reboot"; + arm,vexpress-sysreg,func = <9 0>; + }; + + dvimode { + compatible = "arm,vexpress-dvimode"; + arm,vexpress-sysreg,func = <11 0>; + }; + }; }; }; }; -- cgit v1.2.3 From 3f311e8993ed18fb7325373ec0f82a7f8e8be82e Mon Sep 17 00:00:00 2001 From: Oskar Holmlund Date: Fri, 5 Jun 2020 19:49:23 +0200 Subject: ARM: dts: Fix am33xx.dtsi USB ranges length AM335x TRM: Table 2-1 defines USBSS - USB Queue Manager in memory region 0x4740 0000 to 0x4740 7FFF. Looks like the older TRM revisions list the range from 0x5000 to 0x8000 as reserved. Fixes: 0782e8572ce4 ("ARM: dts: Probe am335x musb with ti-sysc") Signed-off-by: Oskar Holmlund [tony@atomide.com: updated comments] Signed-off-by: Tony Lindgren --- arch/arm/boot/dts/am33xx.dtsi | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'arch') diff --git a/arch/arm/boot/dts/am33xx.dtsi b/arch/arm/boot/dts/am33xx.dtsi index a35f5052d76f..be76ded7e4c0 100644 --- a/arch/arm/boot/dts/am33xx.dtsi +++ b/arch/arm/boot/dts/am33xx.dtsi @@ -347,7 +347,7 @@ clock-names = "fck"; #address-cells = <1>; #size-cells = <1>; - ranges = <0x0 0x47400000 0x5000>; + ranges = <0x0 0x47400000 0x8000>; usb0_phy: usb-phy@1300 { compatible = "ti,am335x-usb-phy"; -- cgit v1.2.3 From 9f872f924545324a06fa216ad38132804c20f2db Mon Sep 17 00:00:00 2001 From: Oskar Holmlund Date: Fri, 5 Jun 2020 19:51:09 +0200 Subject: ARM: dts: Fix am33xx.dtsi ti,sysc-mask wrong softreset flag AM335x TRM: Figure 16-23 define sysconfig register and soft_reset are in first position corresponding to SYSC_OMAP4_SOFTRESET defined in ti-sysc.h. Fixes: 0782e8572ce4 ("ARM: dts: Probe am335x musb with ti-sysc") Signed-off-by: Oskar Holmlund Signed-off-by: Tony Lindgren --- arch/arm/boot/dts/am33xx.dtsi | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'arch') diff --git a/arch/arm/boot/dts/am33xx.dtsi b/arch/arm/boot/dts/am33xx.dtsi index be76ded7e4c0..ed6634d34c3c 100644 --- a/arch/arm/boot/dts/am33xx.dtsi +++ b/arch/arm/boot/dts/am33xx.dtsi @@ -335,7 +335,7 @@ <0x47400010 0x4>; reg-names = "rev", "sysc"; ti,sysc-mask = <(SYSC_OMAP4_FREEEMU | - SYSC_OMAP2_SOFTRESET)>; + SYSC_OMAP4_SOFTRESET)>; ti,sysc-midle = , , ; -- cgit v1.2.3 From ebf89ed78b6ad6bc53f49a9dc0754fa97bb11e4a Mon Sep 17 00:00:00 2001 From: Suman Anna Date: Fri, 5 Jun 2020 17:13:46 -0500 Subject: ARM: dts: dra7: Fix timer nodes properly for timer_sys_ck clocks The commit 5390130f3b28 ("ARM: dts: dra7: add timer_sys_ck entries for IPU/DSP timers") was added to allow the OMAP clocksource timer driver to use the clock aliases when reconfiguring the parent clock source for the timer functional clocks after the timer_sys_ck clock aliases got cleaned up in commit a8202cd5174d ("clk: ti: dra7: drop unnecessary clock aliases"). The above patch however has missed adding the entries for couple of timers (14, 15 and 16), and also added erroneously in the parent ti-sysc nodes for couple of clocks (timers 4, 5 and 6). Fix these properly, so that any of these timers can be used with OMAP remoteproc IPU and DSP devices. The always-on timers 1 and 12 are not expected to use this clock source, so they are not modified. Fixes: 5390130f3b28 ("ARM: dts: dra7: add timer_sys_ck entries for IPU/DSP timers") Signed-off-by: Suman Anna Signed-off-by: Tony Lindgren --- arch/arm/boot/dts/dra7-l4.dtsi | 34 ++++++++++++++++------------------ 1 file changed, 16 insertions(+), 18 deletions(-) (limited to 'arch') diff --git a/arch/arm/boot/dts/dra7-l4.dtsi b/arch/arm/boot/dts/dra7-l4.dtsi index 1abd455cf15a..e059054d9110 100644 --- a/arch/arm/boot/dts/dra7-l4.dtsi +++ b/arch/arm/boot/dts/dra7-l4.dtsi @@ -1210,9 +1210,8 @@ , ; /* Domains (P, C): l4per_pwrdm, l4per_clkdm */ - clocks = <&l4per_clkctrl DRA7_L4PER_TIMER4_CLKCTRL 0>, - <&timer_sys_clk_div>; - clock-names = "fck", "timer_sys_ck"; + clocks = <&l4per_clkctrl DRA7_L4PER_TIMER4_CLKCTRL 0>; + clock-names = "fck"; #address-cells = <1>; #size-cells = <1>; ranges = <0x0 0x36000 0x1000>; @@ -3355,8 +3354,8 @@ , ; /* Domains (P, C): ipu_pwrdm, ipu_clkdm */ - clocks = <&ipu_clkctrl DRA7_IPU_TIMER5_CLKCTRL 0>, <&timer_sys_clk_div>; - clock-names = "fck", "timer_sys_ck"; + clocks = <&ipu_clkctrl DRA7_IPU_TIMER5_CLKCTRL 0>; + clock-names = "fck"; #address-cells = <1>; #size-cells = <1>; ranges = <0x0 0x20000 0x1000>; @@ -3364,8 +3363,8 @@ timer5: timer@0 { compatible = "ti,omap5430-timer"; reg = <0x0 0x80>; - clocks = <&ipu_clkctrl DRA7_IPU_TIMER5_CLKCTRL 24>; - clock-names = "fck"; + clocks = <&ipu_clkctrl DRA7_IPU_TIMER5_CLKCTRL 24>, <&timer_sys_clk_div>; + clock-names = "fck", "timer_sys_ck"; interrupts = ; }; }; @@ -3382,9 +3381,8 @@ , ; /* Domains (P, C): ipu_pwrdm, ipu_clkdm */ - clocks = <&ipu_clkctrl DRA7_IPU_TIMER6_CLKCTRL 0>, - <&timer_sys_clk_div>; - clock-names = "fck", "timer_sys_ck"; + clocks = <&ipu_clkctrl DRA7_IPU_TIMER6_CLKCTRL 0>; + clock-names = "fck"; #address-cells = <1>; #size-cells = <1>; ranges = <0x0 0x22000 0x1000>; @@ -3392,8 +3390,8 @@ timer6: timer@0 { compatible = "ti,omap5430-timer"; reg = <0x0 0x80>; - clocks = <&ipu_clkctrl DRA7_IPU_TIMER6_CLKCTRL 24>; - clock-names = "fck"; + clocks = <&ipu_clkctrl DRA7_IPU_TIMER6_CLKCTRL 24>, <&timer_sys_clk_div>; + clock-names = "fck", "timer_sys_ck"; interrupts = ; }; }; @@ -3501,8 +3499,8 @@ timer14: timer@0 { compatible = "ti,omap5430-timer"; reg = <0x0 0x80>; - clocks = <&l4per3_clkctrl DRA7_L4PER3_TIMER14_CLKCTRL 24>; - clock-names = "fck"; + clocks = <&l4per3_clkctrl DRA7_L4PER3_TIMER14_CLKCTRL 24>, <&timer_sys_clk_div>; + clock-names = "fck", "timer_sys_ck"; interrupts = ; ti,timer-pwm; }; @@ -3529,8 +3527,8 @@ timer15: timer@0 { compatible = "ti,omap5430-timer"; reg = <0x0 0x80>; - clocks = <&l4per3_clkctrl DRA7_L4PER3_TIMER15_CLKCTRL 24>; - clock-names = "fck"; + clocks = <&l4per3_clkctrl DRA7_L4PER3_TIMER15_CLKCTRL 24>, <&timer_sys_clk_div>; + clock-names = "fck", "timer_sys_ck"; interrupts = ; ti,timer-pwm; }; @@ -3557,8 +3555,8 @@ timer16: timer@0 { compatible = "ti,omap5430-timer"; reg = <0x0 0x80>; - clocks = <&l4per3_clkctrl DRA7_L4PER3_TIMER16_CLKCTRL 24>; - clock-names = "fck"; + clocks = <&l4per3_clkctrl DRA7_L4PER3_TIMER16_CLKCTRL 24>, <&timer_sys_clk_div>; + clock-names = "fck", "timer_sys_ck"; interrupts = ; ti,timer-pwm; }; -- cgit v1.2.3 From 8e326a8bdef3fb42b92bf2742e8405d9b9209367 Mon Sep 17 00:00:00 2001 From: Suman Anna Date: Fri, 5 Jun 2020 17:13:47 -0500 Subject: ARM: dts: dra7-evm-common: Fix duplicate mailbox nodes The mailbox nodes defined in various dts files have been moved to common dra7-ipu-dsp-common.dtsi and dra74-ipu-dsp-common.dtsi files in commit a11a2f73b32d ("ARM: dts: dra7-ipu-dsp-common: Move mailboxes into common files"), but the nodes were erroneously left out in the dra7-evm-common.dtsi file. Fix this by removing these duplicate nodes. Fixes: a11a2f73b32d ("ARM: dts: dra7-ipu-dsp-common: Move mailboxes into common files") Signed-off-by: Suman Anna Signed-off-by: Tony Lindgren --- arch/arm/boot/dts/dra7-evm-common.dtsi | 20 -------------------- 1 file changed, 20 deletions(-) (limited to 'arch') diff --git a/arch/arm/boot/dts/dra7-evm-common.dtsi b/arch/arm/boot/dts/dra7-evm-common.dtsi index 23244b5a9942..488201f0ac95 100644 --- a/arch/arm/boot/dts/dra7-evm-common.dtsi +++ b/arch/arm/boot/dts/dra7-evm-common.dtsi @@ -244,26 +244,6 @@ rx-num-evt = <32>; }; -&mailbox5 { - status = "okay"; - mbox_ipu1_ipc3x: mbox_ipu1_ipc3x { - status = "okay"; - }; - mbox_dsp1_ipc3x: mbox_dsp1_ipc3x { - status = "okay"; - }; -}; - -&mailbox6 { - status = "okay"; - mbox_ipu2_ipc3x: mbox_ipu2_ipc3x { - status = "okay"; - }; - mbox_dsp2_ipc3x: mbox_dsp2_ipc3x { - status = "okay"; - }; -}; - &pcie1_rc { status = "okay"; }; -- cgit v1.2.3 From f9639f9a779ac0381cabfb793915851a89424f9f Mon Sep 17 00:00:00 2001 From: Tomi Valkeinen Date: Tue, 9 Jun 2020 13:29:58 +0300 Subject: ARM: dts: am437x-sk-evm: remove lcd timings LCD timings now come from panel-simple. Having timings in the DT will cause a WARN. Signed-off-by: Tomi Valkeinen Signed-off-by: Tony Lindgren --- arch/arm/boot/dts/am437x-sk-evm.dts | 16 ---------------- 1 file changed, 16 deletions(-) (limited to 'arch') diff --git a/arch/arm/boot/dts/am437x-sk-evm.dts b/arch/arm/boot/dts/am437x-sk-evm.dts index 4d5a7ca2e25d..6c83812407bf 100644 --- a/arch/arm/boot/dts/am437x-sk-evm.dts +++ b/arch/arm/boot/dts/am437x-sk-evm.dts @@ -134,22 +134,6 @@ enable-gpios = <&gpio1 7 GPIO_ACTIVE_HIGH>; - panel-timing { - clock-frequency = <9000000>; - hactive = <480>; - vactive = <272>; - hfront-porch = <2>; - hback-porch = <2>; - hsync-len = <41>; - vfront-porch = <2>; - vback-porch = <2>; - vsync-len = <10>; - hsync-active = <0>; - vsync-active = <0>; - de-active = <1>; - pixelclk-active = <1>; - }; - port { lcd_in: endpoint { remote-endpoint = <&dpi_out>; -- cgit v1.2.3 From 3991510bf6402b534f158e164569d051bfd81f68 Mon Sep 17 00:00:00 2001 From: Tomi Valkeinen Date: Tue, 9 Jun 2020 13:29:59 +0300 Subject: ARM: dts: am437x-gp-evm: remove lcd timings LCD timings now come from panel-simple. Having timings in the DT will cause a WARN. Signed-off-by: Tomi Valkeinen Signed-off-by: Tony Lindgren --- arch/arm/boot/dts/am437x-gp-evm.dts | 16 ---------------- 1 file changed, 16 deletions(-) (limited to 'arch') diff --git a/arch/arm/boot/dts/am437x-gp-evm.dts b/arch/arm/boot/dts/am437x-gp-evm.dts index d692e3b2812a..2ff9485c28a4 100644 --- a/arch/arm/boot/dts/am437x-gp-evm.dts +++ b/arch/arm/boot/dts/am437x-gp-evm.dts @@ -91,22 +91,6 @@ backlight = <&lcd_bl>; - panel-timing { - clock-frequency = <33000000>; - hactive = <800>; - vactive = <480>; - hfront-porch = <210>; - hback-porch = <16>; - hsync-len = <30>; - vback-porch = <10>; - vfront-porch = <22>; - vsync-len = <13>; - hsync-active = <0>; - vsync-active = <0>; - de-active = <1>; - pixelclk-active = <1>; - }; - port { lcd_in: endpoint { remote-endpoint = <&dpi_out>; -- cgit v1.2.3 From 944021e79e7ec8db9c3edbc6af32276ade4ac0db Mon Sep 17 00:00:00 2001 From: Tomi Valkeinen Date: Tue, 9 Jun 2020 13:30:00 +0300 Subject: ARM: dts: am437x-epos-evm: remove lcd timings LCD timings now come from panel-simple. Having timings in the DT will cause a WARN. Signed-off-by: Tomi Valkeinen Signed-off-by: Tony Lindgren --- arch/arm/boot/dts/am43x-epos-evm.dts | 16 ---------------- 1 file changed, 16 deletions(-) (limited to 'arch') diff --git a/arch/arm/boot/dts/am43x-epos-evm.dts b/arch/arm/boot/dts/am43x-epos-evm.dts index 27259fd6f741..7d4e0dffde7a 100644 --- a/arch/arm/boot/dts/am43x-epos-evm.dts +++ b/arch/arm/boot/dts/am43x-epos-evm.dts @@ -47,22 +47,6 @@ backlight = <&lcd_bl>; - panel-timing { - clock-frequency = <33000000>; - hactive = <800>; - vactive = <480>; - hfront-porch = <210>; - hback-porch = <16>; - hsync-len = <30>; - vback-porch = <10>; - vfront-porch = <22>; - vsync-len = <13>; - hsync-active = <0>; - vsync-active = <0>; - de-active = <1>; - pixelclk-active = <1>; - }; - port { lcd_in: endpoint { remote-endpoint = <&dpi_out>; -- cgit v1.2.3 From 6c58f25e6938c073198af8b1e1832f83f8f0df33 Mon Sep 17 00:00:00 2001 From: Nathan Huckleberry Date: Thu, 11 Jun 2020 18:32:35 +0000 Subject: riscv/atomic: Fix sign extension for RV64I The argument passed to cmpxchg is not guaranteed to be sign extended, but lr.w sign extends on RV64I. This makes cmpxchg fail on clang built kernels when __old is negative. To fix this, we just cast __old to long which sign extends on RV64I. With this fix, clang built RISC-V kernels now boot. Link: https://github.com/ClangBuiltLinux/linux/issues/867 Signed-off-by: Nathan Huckleberry Signed-off-by: Palmer Dabbelt --- arch/riscv/include/asm/cmpxchg.h | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) (limited to 'arch') diff --git a/arch/riscv/include/asm/cmpxchg.h b/arch/riscv/include/asm/cmpxchg.h index d969bab4a26b..262e5bbb2776 100644 --- a/arch/riscv/include/asm/cmpxchg.h +++ b/arch/riscv/include/asm/cmpxchg.h @@ -179,7 +179,7 @@ " bnez %1, 0b\n" \ "1:\n" \ : "=&r" (__ret), "=&r" (__rc), "+A" (*__ptr) \ - : "rJ" (__old), "rJ" (__new) \ + : "rJ" ((long)__old), "rJ" (__new) \ : "memory"); \ break; \ case 8: \ @@ -224,7 +224,7 @@ RISCV_ACQUIRE_BARRIER \ "1:\n" \ : "=&r" (__ret), "=&r" (__rc), "+A" (*__ptr) \ - : "rJ" (__old), "rJ" (__new) \ + : "rJ" ((long)__old), "rJ" (__new) \ : "memory"); \ break; \ case 8: \ @@ -270,7 +270,7 @@ " bnez %1, 0b\n" \ "1:\n" \ : "=&r" (__ret), "=&r" (__rc), "+A" (*__ptr) \ - : "rJ" (__old), "rJ" (__new) \ + : "rJ" ((long)__old), "rJ" (__new) \ : "memory"); \ break; \ case 8: \ @@ -316,7 +316,7 @@ " fence rw, rw\n" \ "1:\n" \ : "=&r" (__ret), "=&r" (__rc), "+A" (*__ptr) \ - : "rJ" (__old), "rJ" (__new) \ + : "rJ" ((long)__old), "rJ" (__new) \ : "memory"); \ break; \ case 8: \ -- cgit v1.2.3 From de1f6d9304c38e414552c3565d36286609ced0c1 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Rafa=C5=82=20Mi=C5=82ecki?= Date: Mon, 8 Jun 2020 18:33:41 +0200 Subject: ARM: dts: BCM5301X: Add missing memory "device_type" for Luxul XWC-2000 MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit This property is needed since commit abe60a3a7afb ("ARM: dts: Kill off skeleton{64}.dtsi"). Without it booting silently hangs at: [ 0.000000] Memory policy: Data cache writealloc Fixes: 984829e2d39b ("ARM: dts: BCM5301X: Add DT for Luxul XWC-2000") Signed-off-by: Rafał Miłecki Signed-off-by: Florian Fainelli --- arch/arm/boot/dts/bcm47094-luxul-xwc-2000.dts | 1 + 1 file changed, 1 insertion(+) (limited to 'arch') diff --git a/arch/arm/boot/dts/bcm47094-luxul-xwc-2000.dts b/arch/arm/boot/dts/bcm47094-luxul-xwc-2000.dts index 334325390aed..29bbecd36f65 100644 --- a/arch/arm/boot/dts/bcm47094-luxul-xwc-2000.dts +++ b/arch/arm/boot/dts/bcm47094-luxul-xwc-2000.dts @@ -17,6 +17,7 @@ }; memory { + device_type = "memory"; reg = <0x00000000 0x08000000 0x88000000 0x18000000>; }; -- cgit v1.2.3 From 0386e9ce5877ee73e07675529d5ae594d00f0900 Mon Sep 17 00:00:00 2001 From: Matthew Hagan Date: Tue, 9 Jun 2020 17:58:31 +0100 Subject: ARM: bcm: Select ARM_TIMER_SP804 for ARCH_BCM_NSP The NSP SoC includes an SP804 timer so should be enabled here. Fixes: a0efb0d28b77 ("ARM: dts: NSP: Add SP804 Support to DT") Signed-off-by: Matthew Hagan Signed-off-by: Florian Fainelli --- arch/arm/mach-bcm/Kconfig | 1 + 1 file changed, 1 insertion(+) (limited to 'arch') diff --git a/arch/arm/mach-bcm/Kconfig b/arch/arm/mach-bcm/Kconfig index 6aa938b949db..1df0ee01ee02 100644 --- a/arch/arm/mach-bcm/Kconfig +++ b/arch/arm/mach-bcm/Kconfig @@ -53,6 +53,7 @@ config ARCH_BCM_NSP select ARM_ERRATA_754322 select ARM_ERRATA_775420 select ARM_ERRATA_764369 if SMP + select ARM_TIMER_SP804 select THERMAL select THERMAL_OF help -- cgit v1.2.3 From 2c18bd525c47f882f033b0a813ecd09c93e1ecdf Mon Sep 17 00:00:00 2001 From: Babu Moger Date: Thu, 4 Jun 2020 14:45:16 -0500 Subject: x86/resctrl: Fix memory bandwidth counter width for AMD MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Memory bandwidth is calculated reading the monitoring counter at two intervals and calculating the delta. It is the software’s responsibility to read the count often enough to avoid having the count roll over _twice_ between reads. The current code hardcodes the bandwidth monitoring counter's width to 24 bits for AMD. This is due to default base counter width which is 24. Currently, AMD does not implement the CPUID 0xF.[ECX=1]:EAX to adjust the counter width. But, the AMD hardware supports much wider bandwidth counter with the default width of 44 bits. Kernel reads these monitoring counters every 1 second and adjusts the counter value for overflow. With 24 bits and scale value of 64 for AMD, it can only measure up to 1GB/s without overflowing. For the rates above 1GB/s this will fail to measure the bandwidth. Fix the issue setting the default width to 44 bits by adjusting the offset. AMD future products will implement CPUID 0xF.[ECX=1]:EAX. [ bp: Let the line stick out and drop {}-brackets around a single statement. ] Fixes: 4d05bf71f157 ("x86/resctrl: Introduce AMD QOS feature") Signed-off-by: Babu Moger Signed-off-by: Borislav Petkov Link: https://lkml.kernel.org/r/159129975546.62538.5656031125604254041.stgit@naples-babu.amd.com --- arch/x86/kernel/cpu/resctrl/core.c | 8 ++++---- arch/x86/kernel/cpu/resctrl/internal.h | 1 + 2 files changed, 5 insertions(+), 4 deletions(-) (limited to 'arch') diff --git a/arch/x86/kernel/cpu/resctrl/core.c b/arch/x86/kernel/cpu/resctrl/core.c index 12f967c6b603..6a9df71c1b9e 100644 --- a/arch/x86/kernel/cpu/resctrl/core.c +++ b/arch/x86/kernel/cpu/resctrl/core.c @@ -981,10 +981,10 @@ void resctrl_cpu_detect(struct cpuinfo_x86 *c) c->x86_cache_max_rmid = ecx; c->x86_cache_occ_scale = ebx; - if (c->x86_vendor == X86_VENDOR_INTEL) - c->x86_cache_mbm_width_offset = eax & 0xff; - else - c->x86_cache_mbm_width_offset = -1; + c->x86_cache_mbm_width_offset = eax & 0xff; + + if (c->x86_vendor == X86_VENDOR_AMD && !c->x86_cache_mbm_width_offset) + c->x86_cache_mbm_width_offset = MBM_CNTR_WIDTH_OFFSET_AMD; } } diff --git a/arch/x86/kernel/cpu/resctrl/internal.h b/arch/x86/kernel/cpu/resctrl/internal.h index f20a47d120b1..5ffa32256b3b 100644 --- a/arch/x86/kernel/cpu/resctrl/internal.h +++ b/arch/x86/kernel/cpu/resctrl/internal.h @@ -37,6 +37,7 @@ #define MBA_IS_LINEAR 0x4 #define MBA_MAX_MBPS U32_MAX #define MAX_MBA_BW_AMD 0x800 +#define MBM_CNTR_WIDTH_OFFSET_AMD 20 #define RMID_VAL_ERROR BIT_ULL(63) #define RMID_VAL_UNAVAIL BIT_ULL(62) -- cgit v1.2.3 From 0ae705f3d2b22d9d762f67fd49aa6c290987c6a3 Mon Sep 17 00:00:00 2001 From: Huacai Chen Date: Mon, 15 Jun 2020 14:56:55 +0800 Subject: KVM: MIPS: Fix a build error for !CPU_LOONGSON64 During the KVM merging progress, a CONFIG_CPU_LOONGSON64 guard in commit 7f2a83f1c2a941ebfee53 ("KVM: MIPS: Add CPUCFG emulation for Loongson-3") is missing by accident. So add it to avoid building error. Fixes: 7f2a83f1c2a941ebfee53 ("KVM: MIPS: Add CPUCFG emulation for Loongson-3") Reported-by: kernel test robot Signed-off-by: Huacai Chen Message-Id: <1592204215-28704-1-git-send-email-chenhc@lemote.com> Signed-off-by: Paolo Bonzini --- arch/mips/kvm/mips.c | 2 ++ 1 file changed, 2 insertions(+) (limited to 'arch') diff --git a/arch/mips/kvm/mips.c b/arch/mips/kvm/mips.c index 521bd5891e84..666d3350b4ac 100644 --- a/arch/mips/kvm/mips.c +++ b/arch/mips/kvm/mips.c @@ -67,7 +67,9 @@ struct kvm_stats_debugfs_item debugfs_entries[] = { VCPU_STAT("vz_ghfc", vz_ghfc_exits), VCPU_STAT("vz_gpa", vz_gpa_exits), VCPU_STAT("vz_resvd", vz_resvd_exits), +#ifdef CONFIG_CPU_LOONGSON64 VCPU_STAT("vz_cpucfg", vz_cpucfg_exits), +#endif #endif VCPU_STAT("halt_successful_poll", halt_successful_poll), VCPU_STAT("halt_attempted_poll", halt_attempted_poll), -- cgit v1.2.3 From e82587336695f14283987c9aa0bfd775b520856d Mon Sep 17 00:00:00 2001 From: Peter Zijlstra Date: Tue, 2 Jun 2020 14:24:47 +0200 Subject: x86, kcsan: Remove __no_kcsan_or_inline usage Now that KCSAN relies on -tsan-distinguish-volatile we no longer need the annotation for constant_test_bit(). Remove it. Signed-off-by: Peter Zijlstra (Intel) --- arch/x86/include/asm/bitops.h | 6 +----- 1 file changed, 1 insertion(+), 5 deletions(-) (limited to 'arch') diff --git a/arch/x86/include/asm/bitops.h b/arch/x86/include/asm/bitops.h index 35460fef39b8..0367efdc5b7a 100644 --- a/arch/x86/include/asm/bitops.h +++ b/arch/x86/include/asm/bitops.h @@ -201,12 +201,8 @@ arch_test_and_change_bit(long nr, volatile unsigned long *addr) return GEN_BINARY_RMWcc(LOCK_PREFIX __ASM_SIZE(btc), *addr, c, "Ir", nr); } -static __no_kcsan_or_inline bool constant_test_bit(long nr, const volatile unsigned long *addr) +static __always_inline bool constant_test_bit(long nr, const volatile unsigned long *addr) { - /* - * Because this is a plain access, we need to disable KCSAN here to - * avoid double instrumentation via instrumented bitops. - */ return ((1UL << (nr & (BITS_PER_LONG-1))) & (addr[nr >> _BITOPS_LONG_SHIFT])) != 0; } -- cgit v1.2.3 From 14d3b376b6c3f66d62559d457d32edf565472163 Mon Sep 17 00:00:00 2001 From: Peter Zijlstra Date: Wed, 3 Jun 2020 13:32:48 +0200 Subject: x86/entry, cpumask: Provide non-instrumented variant of cpu_is_offline() vmlinux.o: warning: objtool: exc_nmi()+0x12: call to cpumask_test_cpu.constprop.0() leaves .noinstr.text section vmlinux.o: warning: objtool: mce_check_crashing_cpu()+0x12: call to cpumask_test_cpu.constprop.0()leaves .noinstr.text section cpumask_test_cpu() test_bit() instrument_atomic_read() arch_test_bit() Signed-off-by: Peter Zijlstra (Intel) --- arch/x86/include/asm/cpumask.h | 18 ++++++++++++++++++ arch/x86/kernel/cpu/mce/core.c | 2 +- arch/x86/kernel/nmi.c | 2 +- 3 files changed, 20 insertions(+), 2 deletions(-) (limited to 'arch') diff --git a/arch/x86/include/asm/cpumask.h b/arch/x86/include/asm/cpumask.h index 6722ffcef2e6..3afa990d756b 100644 --- a/arch/x86/include/asm/cpumask.h +++ b/arch/x86/include/asm/cpumask.h @@ -11,5 +11,23 @@ extern cpumask_var_t cpu_sibling_setup_mask; extern void setup_cpu_local_masks(void); +/* + * NMI and MCE exceptions need cpu_is_offline() _really_ early, + * provide an arch_ special for them to avoid instrumentation. + */ +#if NR_CPUS > 1 +static __always_inline bool arch_cpu_online(int cpu) +{ + return arch_test_bit(cpu, cpumask_bits(cpu_online_mask)); +} +#else +static __always_inline bool arch_cpu_online(int cpu) +{ + return cpu == 0; +} +#endif + +#define arch_cpu_is_offline(cpu) unlikely(!arch_cpu_online(cpu)) + #endif /* __ASSEMBLY__ */ #endif /* _ASM_X86_CPUMASK_H */ diff --git a/arch/x86/kernel/cpu/mce/core.c b/arch/x86/kernel/cpu/mce/core.c index ce9120c4f740..fbe89a92ff36 100644 --- a/arch/x86/kernel/cpu/mce/core.c +++ b/arch/x86/kernel/cpu/mce/core.c @@ -1083,7 +1083,7 @@ static noinstr bool mce_check_crashing_cpu(void) { unsigned int cpu = smp_processor_id(); - if (cpu_is_offline(cpu) || + if (arch_cpu_is_offline(cpu) || (crashing_cpu != -1 && crashing_cpu != cpu)) { u64 mcgstatus; diff --git a/arch/x86/kernel/nmi.c b/arch/x86/kernel/nmi.c index 2de365f15684..d7c5e44b26f7 100644 --- a/arch/x86/kernel/nmi.c +++ b/arch/x86/kernel/nmi.c @@ -478,7 +478,7 @@ static DEFINE_PER_CPU(unsigned long, nmi_dr7); DEFINE_IDTENTRY_RAW(exc_nmi) { - if (IS_ENABLED(CONFIG_SMP) && cpu_is_offline(smp_processor_id())) + if (IS_ENABLED(CONFIG_SMP) && arch_cpu_is_offline(smp_processor_id())) return; if (this_cpu_read(nmi_state) != NMI_NOT_RUNNING) { -- cgit v1.2.3 From 8e8bb06d199a5aa7a534aa3b3fc0abbbc11ca438 Mon Sep 17 00:00:00 2001 From: Peter Zijlstra Date: Thu, 4 Jun 2020 11:17:40 +0200 Subject: x86/entry, bug: Comment the instrumentation_begin() usage for WARN() Explain the rationale for annotating WARN(), even though, strictly speaking printk() and friends are very much not safe in many of the places we put them. Signed-off-by: Peter Zijlstra (Intel) --- arch/x86/include/asm/bug.h | 6 ++++++ 1 file changed, 6 insertions(+) (limited to 'arch') diff --git a/arch/x86/include/asm/bug.h b/arch/x86/include/asm/bug.h index fb34ff641e0a..028189575560 100644 --- a/arch/x86/include/asm/bug.h +++ b/arch/x86/include/asm/bug.h @@ -75,6 +75,12 @@ do { \ unreachable(); \ } while (0) +/* + * This instrumentation_begin() is strictly speaking incorrect; but it + * suppresses the complaints from WARN()s in noinstr code. If such a WARN() + * were to trigger, we'd rather wreck the machine in an attempt to get the + * message out than not know about it. + */ #define __WARN_FLAGS(flags) \ do { \ instrumentation_begin(); \ -- cgit v1.2.3 From 751c263bb74fd36b5fc2589d36abc75042336444 Mon Sep 17 00:00:00 2001 From: Ard Biesheuvel Date: Mon, 15 Jun 2020 12:19:39 +0200 Subject: arm64: remove TEXT_OFFSET randomization TEXT_OFFSET was recently changed to 0x0, in preparation for its removal at a later stage, and a warning is emitted into the kernel log when the bootloader appears to have failed to take the TEXT_OFFSET image header value into account. Ironically, this warning itself fails to take TEXT_OFFSET into account, and compares the kernel image's alignment modulo 2M against a hardcoded value of 0x0, and so the warning will trigger spuriously when TEXT_OFFSET randomization is enabled. Given the intent to get rid of TEXT_OFFSET entirely, let's fix this oversight by just removing support for TEXT_OFFSET randomization. Signed-off-by: Ard Biesheuvel Acked-by: Mark Rutland Link: https://lore.kernel.org/r/20200615101939.634391-1-ardb@kernel.org Signed-off-by: Will Deacon --- arch/arm64/Kconfig.debug | 15 --------------- arch/arm64/Makefile | 6 ------ 2 files changed, 21 deletions(-) (limited to 'arch') diff --git a/arch/arm64/Kconfig.debug b/arch/arm64/Kconfig.debug index cdf7ec0b975e..265c4461031f 100644 --- a/arch/arm64/Kconfig.debug +++ b/arch/arm64/Kconfig.debug @@ -8,21 +8,6 @@ config PID_IN_CONTEXTIDR instructions during context switch. Say Y here only if you are planning to use hardware trace tools with this kernel. -config ARM64_RANDOMIZE_TEXT_OFFSET - bool "Randomize TEXT_OFFSET at build time" - help - Say Y here if you want the image load offset (AKA TEXT_OFFSET) - of the kernel to be randomized at build-time. When selected, - this option will cause TEXT_OFFSET to be randomized upon any - build of the kernel, and the offset will be reflected in the - text_offset field of the resulting Image. This can be used to - fuzz-test bootloaders which respect text_offset. - - This option is intended for bootloader and/or kernel testing - only. Bootloaders must make no assumptions regarding the value - of TEXT_OFFSET and platforms must not require a specific - value. - config DEBUG_EFI depends on EFI && DEBUG_INFO bool "UEFI debugging" diff --git a/arch/arm64/Makefile b/arch/arm64/Makefile index 76359cfb328a..a0d94d063fa8 100644 --- a/arch/arm64/Makefile +++ b/arch/arm64/Makefile @@ -121,13 +121,7 @@ endif head-y := arch/arm64/kernel/head.o # The byte offset of the kernel image in RAM from the start of RAM. -ifeq ($(CONFIG_ARM64_RANDOMIZE_TEXT_OFFSET), y) -TEXT_OFFSET := $(shell awk "BEGIN {srand(); printf \"0x%06x\n\", \ - int(2 * 1024 * 1024 / (2 ^ $(CONFIG_ARM64_PAGE_SHIFT)) * \ - rand()) * (2 ^ $(CONFIG_ARM64_PAGE_SHIFT))}") -else TEXT_OFFSET := 0x0 -endif ifeq ($(CONFIG_KASAN_SW_TAGS), y) KASAN_SHADOW_SCALE_SHIFT := 4 -- cgit v1.2.3 From 5d5103595e9e53048bb7e70ee2673c897ab38300 Mon Sep 17 00:00:00 2001 From: Sean Christopherson Date: Mon, 8 Jun 2020 10:41:34 -0700 Subject: x86/cpu: Reinitialize IA32_FEAT_CTL MSR on BSP during wakeup Reinitialize IA32_FEAT_CTL on the BSP during wakeup to handle the case where firmware doesn't initialize or save/restore across S3. This fixes a bug where IA32_FEAT_CTL is left uninitialized and results in VMXON taking a #GP due to VMX not being fully enabled, i.e. breaks KVM. Use init_ia32_feat_ctl() to "restore" IA32_FEAT_CTL as it already deals with the case where the MSR is locked, and because APs already redo init_ia32_feat_ctl() during suspend by virtue of the SMP boot flow being used to reinitialize APs upon wakeup. Do the call in the early wakeup flow to avoid dependencies in the syscore_ops chain, e.g. simply adding a resume hook is not guaranteed to work, as KVM does VMXON in its own resume hook, kvm_resume(), when KVM has active guests. Fixes: 21bd3467a58e ("KVM: VMX: Drop initialization of IA32_FEAT_CTL MSR") Reported-by: Brad Campbell Signed-off-by: Sean Christopherson Signed-off-by: Borislav Petkov Reviewed-by: Liam Merwick Reviewed-by: Maxim Levitsky Tested-by: Brad Campbell Cc: stable@vger.kernel.org # v5.6 Link: https://lkml.kernel.org/r/20200608174134.11157-1-sean.j.christopherson@intel.com --- arch/x86/include/asm/cpu.h | 5 +++++ arch/x86/kernel/cpu/centaur.c | 1 + arch/x86/kernel/cpu/cpu.h | 4 ---- arch/x86/kernel/cpu/zhaoxin.c | 1 + arch/x86/power/cpu.c | 6 ++++++ 5 files changed, 13 insertions(+), 4 deletions(-) (limited to 'arch') diff --git a/arch/x86/include/asm/cpu.h b/arch/x86/include/asm/cpu.h index dd17c2da1af5..da78ccbd493b 100644 --- a/arch/x86/include/asm/cpu.h +++ b/arch/x86/include/asm/cpu.h @@ -58,4 +58,9 @@ static inline bool handle_guest_split_lock(unsigned long ip) return false; } #endif +#ifdef CONFIG_IA32_FEAT_CTL +void init_ia32_feat_ctl(struct cpuinfo_x86 *c); +#else +static inline void init_ia32_feat_ctl(struct cpuinfo_x86 *c) {} +#endif #endif /* _ASM_X86_CPU_H */ diff --git a/arch/x86/kernel/cpu/centaur.c b/arch/x86/kernel/cpu/centaur.c index 426792565d86..c5cf336e5077 100644 --- a/arch/x86/kernel/cpu/centaur.c +++ b/arch/x86/kernel/cpu/centaur.c @@ -3,6 +3,7 @@ #include #include +#include #include #include #include diff --git a/arch/x86/kernel/cpu/cpu.h b/arch/x86/kernel/cpu/cpu.h index fb538fccd24c..9d033693519a 100644 --- a/arch/x86/kernel/cpu/cpu.h +++ b/arch/x86/kernel/cpu/cpu.h @@ -81,8 +81,4 @@ extern void update_srbds_msr(void); extern u64 x86_read_arch_cap_msr(void); -#ifdef CONFIG_IA32_FEAT_CTL -void init_ia32_feat_ctl(struct cpuinfo_x86 *c); -#endif - #endif /* ARCH_X86_CPU_H */ diff --git a/arch/x86/kernel/cpu/zhaoxin.c b/arch/x86/kernel/cpu/zhaoxin.c index df1358ba622b..05fa4ef63490 100644 --- a/arch/x86/kernel/cpu/zhaoxin.c +++ b/arch/x86/kernel/cpu/zhaoxin.c @@ -2,6 +2,7 @@ #include #include +#include #include #include "cpu.h" diff --git a/arch/x86/power/cpu.c b/arch/x86/power/cpu.c index 7c65102debaf..db1378c6ff26 100644 --- a/arch/x86/power/cpu.c +++ b/arch/x86/power/cpu.c @@ -193,6 +193,8 @@ static void fix_processor_context(void) */ static void notrace __restore_processor_state(struct saved_context *ctxt) { + struct cpuinfo_x86 *c; + if (ctxt->misc_enable_saved) wrmsrl(MSR_IA32_MISC_ENABLE, ctxt->misc_enable); /* @@ -263,6 +265,10 @@ static void notrace __restore_processor_state(struct saved_context *ctxt) mtrr_bp_restore(); perf_restore_debug_store(); msr_restore_context(ctxt); + + c = &cpu_data(smp_processor_id()); + if (cpu_has(c, X86_FEATURE_MSR_IA32_FEAT_CTL)) + init_ia32_feat_ctl(c); } /* Needed by apm.c */ -- cgit v1.2.3 From a6e2c226c3d51fd93636320e47cabc8a8f0824c5 Mon Sep 17 00:00:00 2001 From: "Aneesh Kumar K.V" Date: Sun, 24 May 2020 15:08:19 +0530 Subject: powerpc: Fix kernel crash in show_instructions() w/DEBUG_VIRTUAL With CONFIG_DEBUG_VIRTUAL=y, we can hit a BUG() if we take a hard lockup watchdog interrupt when in OPAL mode. This happens in show_instructions() if the kernel takes the watchdog NMI IPI, or any other interrupt, with MSR_IR == 0. show_instructions() updates the variable pc in the loop and the second iteration will result in BUG(). We hit the BUG_ON due the below check in __va() #define __va(x) ({ VIRTUAL_BUG_ON((unsigned long)(x) >= PAGE_OFFSET); (void *)(unsigned long)((phys_addr_t)(x) | PAGE_OFFSET); }) Fix it by moving the check out of the loop. Also update nip so that the nip == pc check still matches. Fixes: 4dd7554a6456 ("powerpc/64: Add VIRTUAL_BUG_ON checks for __va and __pa addresses") Signed-off-by: Aneesh Kumar K.V [mpe: Use IS_ENABLED(), massage change log] Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/20200524093822.423487-1-aneesh.kumar@linux.ibm.com --- arch/powerpc/kernel/process.c | 20 +++++++++++--------- 1 file changed, 11 insertions(+), 9 deletions(-) (limited to 'arch') diff --git a/arch/powerpc/kernel/process.c b/arch/powerpc/kernel/process.c index 7bb7faf84490..a2f1f0e70a4b 100644 --- a/arch/powerpc/kernel/process.c +++ b/arch/powerpc/kernel/process.c @@ -1252,29 +1252,31 @@ struct task_struct *__switch_to(struct task_struct *prev, static void show_instructions(struct pt_regs *regs) { int i; + unsigned long nip = regs->nip; unsigned long pc = regs->nip - (NR_INSN_TO_PRINT * 3 / 4 * sizeof(int)); printk("Instruction dump:"); + /* + * If we were executing with the MMU off for instructions, adjust pc + * rather than printing XXXXXXXX. + */ + if (!IS_ENABLED(CONFIG_BOOKE) && !(regs->msr & MSR_IR)) { + pc = (unsigned long)phys_to_virt(pc); + nip = (unsigned long)phys_to_virt(regs->nip); + } + for (i = 0; i < NR_INSN_TO_PRINT; i++) { int instr; if (!(i % 8)) pr_cont("\n"); -#if !defined(CONFIG_BOOKE) - /* If executing with the IMMU off, adjust pc rather - * than print XXXXXXXX. - */ - if (!(regs->msr & MSR_IR)) - pc = (unsigned long)phys_to_virt(pc); -#endif - if (!__kernel_text_address(pc) || probe_kernel_address((const void *)pc, instr)) { pr_cont("XXXXXXXX "); } else { - if (regs->nip == pc) + if (nip == pc) pr_cont("<%08x> ", instr); else pr_cont("%08x ", instr); -- cgit v1.2.3 From b95273f1272398a9f7145de37703f1930244e465 Mon Sep 17 00:00:00 2001 From: Qian Cai Date: Wed, 15 Apr 2020 11:37:09 -0400 Subject: kvm/svm: disable KCSAN for svm_vcpu_run() For some reasons, running a simple qemu-kvm command with KCSAN will reset AMD hosts. It turns out svm_vcpu_run() could not be instrumented. Disable it for now. # /usr/libexec/qemu-kvm -name ubuntu-18.04-server-cloudimg -cpu host -smp 2 -m 2G -hda ubuntu-18.04-server-cloudimg.qcow2 === console output === Kernel 5.6.0-next-20200408+ on an x86_64 hp-dl385g10-05 login: <...host reset...> HPE ProLiant System BIOS A40 v1.20 (03/09/2018) (C) Copyright 1982-2018 Hewlett Packard Enterprise Development LP Early system initialization, please wait... Signed-off-by: Qian Cai Message-Id: <20200415153709.1559-1-cai@lca.pw> Signed-off-by: Paolo Bonzini --- arch/x86/kvm/svm/svm.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'arch') diff --git a/arch/x86/kvm/svm/svm.c b/arch/x86/kvm/svm/svm.c index 8ccfa4197d9c..c0da4dd78ac5 100644 --- a/arch/x86/kvm/svm/svm.c +++ b/arch/x86/kvm/svm/svm.c @@ -3344,7 +3344,7 @@ static fastpath_t svm_exit_handlers_fastpath(struct kvm_vcpu *vcpu) void __svm_vcpu_run(unsigned long vmcb_pa, unsigned long *regs); -static fastpath_t svm_vcpu_run(struct kvm_vcpu *vcpu) +static __no_kcsan fastpath_t svm_vcpu_run(struct kvm_vcpu *vcpu) { fastpath_t exit_fastpath; struct vcpu_svm *svm = to_svm(vcpu); -- cgit v1.2.3 From 1e570f512cbdc5e9e401ba640d9827985c1bea1e Mon Sep 17 00:00:00 2001 From: Dave Martin Date: Wed, 10 Jun 2020 18:03:10 +0100 Subject: arm64/sve: Eliminate data races on sve_default_vl sve_default_vl can be modified via the /proc/sys/abi/sve_default_vl sysctl concurrently with use, and modified concurrently by multiple threads. Adding a lock for this seems overkill, and I don't want to think any more than necessary, so just define wrappers using READ_ONCE()/ WRITE_ONCE(). This will avoid the possibility of torn accesses and repeated loads and stores. There's no evidence yet that this is going wrong in practice: this is just hygiene. For generic sysctl users, it would be better to build this kind of thing into the sysctl common code somehow. Reported-by: Will Deacon Signed-off-by: Dave Martin Link: https://lore.kernel.org/r/1591808590-20210-3-git-send-email-Dave.Martin@arm.com [will: move set_sve_default_vl() inside #ifdef to squash allnoconfig warning] Signed-off-by: Will Deacon --- arch/arm64/kernel/fpsimd.c | 25 ++++++++++++++++++------- 1 file changed, 18 insertions(+), 7 deletions(-) (limited to 'arch') diff --git a/arch/arm64/kernel/fpsimd.c b/arch/arm64/kernel/fpsimd.c index 35cb5e66c504..d9eee9194511 100644 --- a/arch/arm64/kernel/fpsimd.c +++ b/arch/arm64/kernel/fpsimd.c @@ -12,6 +12,7 @@ #include #include #include +#include #include #include #include @@ -119,10 +120,20 @@ struct fpsimd_last_state_struct { static DEFINE_PER_CPU(struct fpsimd_last_state_struct, fpsimd_last_state); /* Default VL for tasks that don't set it explicitly: */ -static int sve_default_vl = -1; +static int __sve_default_vl = -1; + +static int get_sve_default_vl(void) +{ + return READ_ONCE(__sve_default_vl); +} #ifdef CONFIG_ARM64_SVE +static void set_sve_default_vl(int val) +{ + WRITE_ONCE(__sve_default_vl, val); +} + /* Maximum supported vector length across all CPUs (initially poisoned) */ int __ro_after_init sve_max_vl = SVE_VL_MIN; int __ro_after_init sve_max_virtualisable_vl = SVE_VL_MIN; @@ -344,7 +355,7 @@ static int sve_proc_do_default_vl(struct ctl_table *table, int write, void *buffer, size_t *lenp, loff_t *ppos) { int ret; - int vl = sve_default_vl; + int vl = get_sve_default_vl(); struct ctl_table tmp_table = { .data = &vl, .maxlen = sizeof(vl), @@ -361,7 +372,7 @@ static int sve_proc_do_default_vl(struct ctl_table *table, int write, if (!sve_vl_valid(vl)) return -EINVAL; - sve_default_vl = find_supported_vector_length(vl); + set_sve_default_vl(find_supported_vector_length(vl)); return 0; } @@ -868,7 +879,7 @@ void __init sve_setup(void) * For the default VL, pick the maximum supported value <= 64. * VL == 64 is guaranteed not to grow the signal frame. */ - sve_default_vl = find_supported_vector_length(64); + set_sve_default_vl(find_supported_vector_length(64)); bitmap_andnot(tmp_map, sve_vq_partial_map, sve_vq_map, SVE_VQ_MAX); @@ -889,7 +900,7 @@ void __init sve_setup(void) pr_info("SVE: maximum available vector length %u bytes per vector\n", sve_max_vl); pr_info("SVE: default vector length %u bytes per vector\n", - sve_default_vl); + get_sve_default_vl()); /* KVM decides whether to support mismatched systems. Just warn here: */ if (sve_max_virtualisable_vl < sve_max_vl) @@ -1029,13 +1040,13 @@ void fpsimd_flush_thread(void) * vector length configured: no kernel task can become a user * task without an exec and hence a call to this function. * By the time the first call to this function is made, all - * early hardware probing is complete, so sve_default_vl + * early hardware probing is complete, so __sve_default_vl * should be valid. * If a bug causes this to go wrong, we make some noise and * try to fudge thread.sve_vl to a safe value here. */ vl = current->thread.sve_vl_onexec ? - current->thread.sve_vl_onexec : sve_default_vl; + current->thread.sve_vl_onexec : get_sve_default_vl(); if (WARN_ON(!sve_vl_valid(vl))) vl = SVE_VL_MIN; -- cgit v1.2.3 From 413d3ea6b775d77b2057f13a9af75875eb066156 Mon Sep 17 00:00:00 2001 From: Will Deacon Date: Mon, 15 Jun 2020 12:23:16 +0100 Subject: arm64: traps: Dump registers prior to panic() in bad_mode() When panicing due to an unknown/unhandled exception at EL1, dump the registers of the faulting context so that it's easier to figure out what went wrong. In particular, this makes it a lot easier to debug in-kernel BTI failures since it pretty-prints PSTATE.BTYPE in the crash log. Cc: Mark Brown Cc: Catalin Marinas Acked-by: Mark Rutland Link: https://lore.kernel.org/r/20200615113458.2884-1-will@kernel.org Signed-off-by: Will Deacon --- arch/arm64/kernel/traps.c | 1 + 1 file changed, 1 insertion(+) (limited to 'arch') diff --git a/arch/arm64/kernel/traps.c b/arch/arm64/kernel/traps.c index 50cc30acf106..24f2af70ac2e 100644 --- a/arch/arm64/kernel/traps.c +++ b/arch/arm64/kernel/traps.c @@ -813,6 +813,7 @@ asmlinkage void bad_mode(struct pt_regs *regs, int reason, unsigned int esr) handler[reason], smp_processor_id(), esr, esr_get_class_string(esr)); + __show_regs(regs); local_daif_mask(); panic("bad mode"); } -- cgit v1.2.3 From 8dd4daa04278d7437641962ed53b843c0b0ec4a9 Mon Sep 17 00:00:00 2001 From: Shyam Thombre Date: Wed, 10 Jun 2020 16:39:44 +0530 Subject: arm64: mm: reset address tag set by kasan sw tagging KASAN sw tagging sets a random tag of 8 bits in the top byte of the pointer returned by the memory allocating functions. So for the functions unaware of this change, the top 8 bits of the address must be reset which is done by the function arch_kasan_reset_tag(). Signed-off-by: Shyam Thombre Link: https://lore.kernel.org/r/1591787384-5823-1-git-send-email-sthombre@codeaurora.org Signed-off-by: Will Deacon --- arch/arm64/mm/mmu.c | 1 + 1 file changed, 1 insertion(+) (limited to 'arch') diff --git a/arch/arm64/mm/mmu.c b/arch/arm64/mm/mmu.c index 990929c8837e..1df25f26571d 100644 --- a/arch/arm64/mm/mmu.c +++ b/arch/arm64/mm/mmu.c @@ -723,6 +723,7 @@ int kern_addr_valid(unsigned long addr) pmd_t *pmdp, pmd; pte_t *ptep, pte; + addr = arch_kasan_reset_tag(addr); if ((((long)addr) >> VA_BITS) != -1UL) return 0; -- cgit v1.2.3 From 88c200d929c969408779dbae4c4fad32bc510373 Mon Sep 17 00:00:00 2001 From: Sean Christopherson Date: Mon, 8 Jun 2020 18:45:18 -0700 Subject: KVM: VMX: Add helpers to identify interrupt type from intr_info Add is_intr_type() and is_intr_type_n() to consolidate the boilerplate code for querying a specific type of interrupt given an encoded value from VMCS.VM_{ENTER,EXIT}_INTR_INFO, with and without an associated vector respectively. Signed-off-by: Sean Christopherson Message-Id: <20200609014518.26756-1-sean.j.christopherson@intel.com> Signed-off-by: Paolo Bonzini --- arch/x86/kvm/vmx/vmcs.h | 32 ++++++++++++++++++++------------ 1 file changed, 20 insertions(+), 12 deletions(-) (limited to 'arch') diff --git a/arch/x86/kvm/vmx/vmcs.h b/arch/x86/kvm/vmx/vmcs.h index 5c0ff80b85c0..7a3675fddec2 100644 --- a/arch/x86/kvm/vmx/vmcs.h +++ b/arch/x86/kvm/vmx/vmcs.h @@ -72,11 +72,24 @@ struct loaded_vmcs { struct vmcs_controls_shadow controls_shadow; }; +static inline bool is_intr_type(u32 intr_info, u32 type) +{ + const u32 mask = INTR_INFO_VALID_MASK | INTR_INFO_INTR_TYPE_MASK; + + return (intr_info & mask) == (INTR_INFO_VALID_MASK | type); +} + +static inline bool is_intr_type_n(u32 intr_info, u32 type, u8 vector) +{ + const u32 mask = INTR_INFO_VALID_MASK | INTR_INFO_INTR_TYPE_MASK | + INTR_INFO_VECTOR_MASK; + + return (intr_info & mask) == (INTR_INFO_VALID_MASK | type | vector); +} + static inline bool is_exception_n(u32 intr_info, u8 vector) { - return (intr_info & (INTR_INFO_INTR_TYPE_MASK | INTR_INFO_VECTOR_MASK | - INTR_INFO_VALID_MASK)) == - (INTR_TYPE_HARD_EXCEPTION | vector | INTR_INFO_VALID_MASK); + return is_intr_type_n(intr_info, INTR_TYPE_HARD_EXCEPTION, vector); } static inline bool is_debug(u32 intr_info) @@ -106,28 +119,23 @@ static inline bool is_gp_fault(u32 intr_info) static inline bool is_machine_check(u32 intr_info) { - return (intr_info & (INTR_INFO_INTR_TYPE_MASK | INTR_INFO_VECTOR_MASK | - INTR_INFO_VALID_MASK)) == - (INTR_TYPE_HARD_EXCEPTION | MC_VECTOR | INTR_INFO_VALID_MASK); + return is_exception_n(intr_info, MC_VECTOR); } /* Undocumented: icebp/int1 */ static inline bool is_icebp(u32 intr_info) { - return (intr_info & (INTR_INFO_INTR_TYPE_MASK | INTR_INFO_VALID_MASK)) - == (INTR_TYPE_PRIV_SW_EXCEPTION | INTR_INFO_VALID_MASK); + return is_intr_type(intr_info, INTR_TYPE_PRIV_SW_EXCEPTION); } static inline bool is_nmi(u32 intr_info) { - return (intr_info & (INTR_INFO_INTR_TYPE_MASK | INTR_INFO_VALID_MASK)) - == (INTR_TYPE_NMI_INTR | INTR_INFO_VALID_MASK); + return is_intr_type(intr_info, INTR_TYPE_NMI_INTR); } static inline bool is_external_intr(u32 intr_info) { - return (intr_info & (INTR_INFO_VALID_MASK | INTR_INFO_INTR_TYPE_MASK)) - == (INTR_INFO_VALID_MASK | INTR_TYPE_EXT_INTR); + return is_intr_type(intr_info, INTR_TYPE_EXT_INTR); } enum vmcs_field_width { -- cgit v1.2.3 From b791abf3201d724ac372c2ba1fa6e90d192e1dbf Mon Sep 17 00:00:00 2001 From: Vaibhav Jain Date: Mon, 15 Jun 2020 18:14:04 +0530 Subject: powerpc/papr_scm: Fetch nvdimm health information from PHYP Implement support for fetching nvdimm health information via H_SCM_HEALTH hcall as documented in Ref[1]. The hcall returns a pair of 64-bit bitmap, bitwise-and of which is then stored in 'struct papr_scm_priv' and subsequently partially exposed to user-space via newly introduced dimm specific attribute 'papr/flags'. Since the hcall is costly, the health information is cached and only re-queried, 60s after the previous successful hcall. The patch also adds a documentation text describing flags reported by the the new sysfs attribute 'papr/flags' is also introduced at Documentation/ABI/testing/sysfs-bus-papr-pmem. [1] commit 58b278f568f0 ("powerpc: Provide initial documentation for PAPR hcalls") Signed-off-by: Vaibhav Jain Cc: "Aneesh Kumar K . V" Cc: Dan Williams Cc: Michael Ellerman Cc: Ira Weiny Link: https://lore.kernel.org/r/20200615124407.32596-4-vaibhav@linux.ibm.com Signed-off-by: Dan Williams --- arch/powerpc/platforms/pseries/papr_scm.c | 168 +++++++++++++++++++++++++++++- 1 file changed, 166 insertions(+), 2 deletions(-) (limited to 'arch') diff --git a/arch/powerpc/platforms/pseries/papr_scm.c b/arch/powerpc/platforms/pseries/papr_scm.c index f35592423380..0c091622b15e 100644 --- a/arch/powerpc/platforms/pseries/papr_scm.c +++ b/arch/powerpc/platforms/pseries/papr_scm.c @@ -12,6 +12,7 @@ #include #include #include +#include #include @@ -22,6 +23,44 @@ (1ul << ND_CMD_GET_CONFIG_DATA) | \ (1ul << ND_CMD_SET_CONFIG_DATA)) +/* DIMM health bitmap bitmap indicators */ +/* SCM device is unable to persist memory contents */ +#define PAPR_PMEM_UNARMED (1ULL << (63 - 0)) +/* SCM device failed to persist memory contents */ +#define PAPR_PMEM_SHUTDOWN_DIRTY (1ULL << (63 - 1)) +/* SCM device contents are persisted from previous IPL */ +#define PAPR_PMEM_SHUTDOWN_CLEAN (1ULL << (63 - 2)) +/* SCM device contents are not persisted from previous IPL */ +#define PAPR_PMEM_EMPTY (1ULL << (63 - 3)) +/* SCM device memory life remaining is critically low */ +#define PAPR_PMEM_HEALTH_CRITICAL (1ULL << (63 - 4)) +/* SCM device will be garded off next IPL due to failure */ +#define PAPR_PMEM_HEALTH_FATAL (1ULL << (63 - 5)) +/* SCM contents cannot persist due to current platform health status */ +#define PAPR_PMEM_HEALTH_UNHEALTHY (1ULL << (63 - 6)) +/* SCM device is unable to persist memory contents in certain conditions */ +#define PAPR_PMEM_HEALTH_NON_CRITICAL (1ULL << (63 - 7)) +/* SCM device is encrypted */ +#define PAPR_PMEM_ENCRYPTED (1ULL << (63 - 8)) +/* SCM device has been scrubbed and locked */ +#define PAPR_PMEM_SCRUBBED_AND_LOCKED (1ULL << (63 - 9)) + +/* Bits status indicators for health bitmap indicating unarmed dimm */ +#define PAPR_PMEM_UNARMED_MASK (PAPR_PMEM_UNARMED | \ + PAPR_PMEM_HEALTH_UNHEALTHY) + +/* Bits status indicators for health bitmap indicating unflushed dimm */ +#define PAPR_PMEM_BAD_SHUTDOWN_MASK (PAPR_PMEM_SHUTDOWN_DIRTY) + +/* Bits status indicators for health bitmap indicating unrestored dimm */ +#define PAPR_PMEM_BAD_RESTORE_MASK (PAPR_PMEM_EMPTY) + +/* Bit status indicators for smart event notification */ +#define PAPR_PMEM_SMART_EVENT_MASK (PAPR_PMEM_HEALTH_CRITICAL | \ + PAPR_PMEM_HEALTH_FATAL | \ + PAPR_PMEM_HEALTH_UNHEALTHY) + +/* private struct associated with each region */ struct papr_scm_priv { struct platform_device *pdev; struct device_node *dn; @@ -39,6 +78,15 @@ struct papr_scm_priv { struct resource res; struct nd_region *region; struct nd_interleave_set nd_set; + + /* Protect dimm health data from concurrent read/writes */ + struct mutex health_mutex; + + /* Last time the health information of the dimm was updated */ + unsigned long lasthealth_jiffies; + + /* Health information for the dimm */ + u64 health_bitmap; }; static int drc_pmem_bind(struct papr_scm_priv *p) @@ -144,6 +192,61 @@ err_out: return drc_pmem_bind(p); } +/* + * Issue hcall to retrieve dimm health info and populate papr_scm_priv with the + * health information. + */ +static int __drc_pmem_query_health(struct papr_scm_priv *p) +{ + unsigned long ret[PLPAR_HCALL_BUFSIZE]; + long rc; + + /* issue the hcall */ + rc = plpar_hcall(H_SCM_HEALTH, ret, p->drc_index); + if (rc != H_SUCCESS) { + dev_err(&p->pdev->dev, + "Failed to query health information, Err:%ld\n", rc); + return -ENXIO; + } + + p->lasthealth_jiffies = jiffies; + p->health_bitmap = ret[0] & ret[1]; + + dev_dbg(&p->pdev->dev, + "Queried dimm health info. Bitmap:0x%016lx Mask:0x%016lx\n", + ret[0], ret[1]); + + return 0; +} + +/* Min interval in seconds for assuming stable dimm health */ +#define MIN_HEALTH_QUERY_INTERVAL 60 + +/* Query cached health info and if needed call drc_pmem_query_health */ +static int drc_pmem_query_health(struct papr_scm_priv *p) +{ + unsigned long cache_timeout; + int rc; + + /* Protect concurrent modifications to papr_scm_priv */ + rc = mutex_lock_interruptible(&p->health_mutex); + if (rc) + return rc; + + /* Jiffies offset for which the health data is assumed to be same */ + cache_timeout = p->lasthealth_jiffies + + msecs_to_jiffies(MIN_HEALTH_QUERY_INTERVAL * 1000); + + /* Fetch new health info is its older than MIN_HEALTH_QUERY_INTERVAL */ + if (time_after(jiffies, cache_timeout)) + rc = __drc_pmem_query_health(p); + else + /* Assume cached health data is valid */ + rc = 0; + + mutex_unlock(&p->health_mutex); + return rc; +} static int papr_scm_meta_get(struct papr_scm_priv *p, struct nd_cmd_get_config_data_hdr *hdr) @@ -286,6 +389,64 @@ static int papr_scm_ndctl(struct nvdimm_bus_descriptor *nd_desc, return 0; } +static ssize_t flags_show(struct device *dev, + struct device_attribute *attr, char *buf) +{ + struct nvdimm *dimm = to_nvdimm(dev); + struct papr_scm_priv *p = nvdimm_provider_data(dimm); + struct seq_buf s; + u64 health; + int rc; + + rc = drc_pmem_query_health(p); + if (rc) + return rc; + + /* Copy health_bitmap locally, check masks & update out buffer */ + health = READ_ONCE(p->health_bitmap); + + seq_buf_init(&s, buf, PAGE_SIZE); + if (health & PAPR_PMEM_UNARMED_MASK) + seq_buf_printf(&s, "not_armed "); + + if (health & PAPR_PMEM_BAD_SHUTDOWN_MASK) + seq_buf_printf(&s, "flush_fail "); + + if (health & PAPR_PMEM_BAD_RESTORE_MASK) + seq_buf_printf(&s, "restore_fail "); + + if (health & PAPR_PMEM_ENCRYPTED) + seq_buf_printf(&s, "encrypted "); + + if (health & PAPR_PMEM_SMART_EVENT_MASK) + seq_buf_printf(&s, "smart_notify "); + + if (health & PAPR_PMEM_SCRUBBED_AND_LOCKED) + seq_buf_printf(&s, "scrubbed locked "); + + if (seq_buf_used(&s)) + seq_buf_printf(&s, "\n"); + + return seq_buf_used(&s); +} +DEVICE_ATTR_RO(flags); + +/* papr_scm specific dimm attributes */ +static struct attribute *papr_nd_attributes[] = { + &dev_attr_flags.attr, + NULL, +}; + +static struct attribute_group papr_nd_attribute_group = { + .name = "papr", + .attrs = papr_nd_attributes, +}; + +static const struct attribute_group *papr_nd_attr_groups[] = { + &papr_nd_attribute_group, + NULL, +}; + static int papr_scm_nvdimm_init(struct papr_scm_priv *p) { struct device *dev = &p->pdev->dev; @@ -312,8 +473,8 @@ static int papr_scm_nvdimm_init(struct papr_scm_priv *p) dimm_flags = 0; set_bit(NDD_LABELING, &dimm_flags); - p->nvdimm = nvdimm_create(p->bus, p, NULL, dimm_flags, - PAPR_SCM_DIMM_CMD_MASK, 0, NULL); + p->nvdimm = nvdimm_create(p->bus, p, papr_nd_attr_groups, + dimm_flags, PAPR_SCM_DIMM_CMD_MASK, 0, NULL); if (!p->nvdimm) { dev_err(dev, "Error creating DIMM object for %pOF\n", p->dn); goto err; @@ -399,6 +560,9 @@ static int papr_scm_probe(struct platform_device *pdev) if (!p) return -ENOMEM; + /* Initialize the dimm mutex */ + mutex_init(&p->health_mutex); + /* optional DT properties */ of_property_read_u32(dn, "ibm,metadata-size", &metadata_size); -- cgit v1.2.3 From b5f38f09e1558c20265a2976b0337bab143a66c7 Mon Sep 17 00:00:00 2001 From: Vaibhav Jain Date: Mon, 15 Jun 2020 18:14:05 +0530 Subject: powerpc/papr_scm: Improve error logging and handling papr_scm_ndctl() Since papr_scm_ndctl() can be called from outside papr_scm, its exposed to the possibility of receiving NULL as value of 'cmd_rc' argument. This patch updates papr_scm_ndctl() to protect against such possibility by assigning it pointer to a local variable in case cmd_rc == NULL. Finally the patch also updates the 'default' add a debug log unknown 'cmd' values. Signed-off-by: Vaibhav Jain Reviewed-by: Ira Weiny Cc: "Aneesh Kumar K . V" Cc: Dan Williams Cc: Michael Ellerman Cc: Ira Weiny Link: https://lore.kernel.org/r/20200615124407.32596-5-vaibhav@linux.ibm.com Signed-off-by: Dan Williams --- arch/powerpc/platforms/pseries/papr_scm.c | 6 ++++++ 1 file changed, 6 insertions(+) (limited to 'arch') diff --git a/arch/powerpc/platforms/pseries/papr_scm.c b/arch/powerpc/platforms/pseries/papr_scm.c index 0c091622b15e..692ad3d79826 100644 --- a/arch/powerpc/platforms/pseries/papr_scm.c +++ b/arch/powerpc/platforms/pseries/papr_scm.c @@ -355,11 +355,16 @@ static int papr_scm_ndctl(struct nvdimm_bus_descriptor *nd_desc, { struct nd_cmd_get_config_size *get_size_hdr; struct papr_scm_priv *p; + int rc; /* Only dimm-specific calls are supported atm */ if (!nvdimm) return -EINVAL; + /* Use a local variable in case cmd_rc pointer is NULL */ + if (!cmd_rc) + cmd_rc = &rc; + p = nvdimm_provider_data(nvdimm); switch (cmd) { @@ -381,6 +386,7 @@ static int papr_scm_ndctl(struct nvdimm_bus_descriptor *nd_desc, break; default: + dev_dbg(&p->pdev->dev, "Unknown command = %d\n", cmd); return -EINVAL; } -- cgit v1.2.3 From f517f7925b7b453cb83be06c268ba057b78e4792 Mon Sep 17 00:00:00 2001 From: Vaibhav Jain Date: Mon, 15 Jun 2020 18:14:06 +0530 Subject: ndctl/papr_scm,uapi: Add support for PAPR nvdimm specific methods Introduce support for PAPR NVDIMM Specific Methods (PDSM) in papr_scm module and add the command family NVDIMM_FAMILY_PAPR to the white list of NVDIMM command sets. Also advertise support for ND_CMD_CALL for the nvdimm command mask and implement necessary scaffolding in the module to handle ND_CMD_CALL ioctl and PDSM requests that we receive. The layout of the PDSM request as we expect from libnvdimm/libndctl is described in newly introduced uapi header 'papr_pdsm.h' which defines a 'struct nd_pkg_pdsm' and a maximal union named 'nd_pdsm_payload'. These new structs together with 'struct nd_cmd_pkg' for a pdsm envelop thats sent by libndctl to libnvdimm and serviced by papr_scm in 'papr_scm_service_pdsm()'. The PDSM request is communicated by member 'struct nd_cmd_pkg.nd_command' together with other information on the pdsm payload (size-in, size-out). The patch also introduces 'struct pdsm_cmd_desc' instances of which are stored in an array __pdsm_cmd_descriptors[] indexed with PDSM cmd and corresponding access function pdsm_cmd_desc() is introduced. 'struct pdsm_cdm_desc' holds the service function for a given PDSM and corresponding payload in/out sizes. A new function papr_scm_service_pdsm() is introduced and is called from papr_scm_ndctl() in case of a PDSM request is received via ND_CMD_CALL command from libnvdimm. The function performs validation on the PDSM payload based on info present in corresponding PDSM descriptor and if valid calls the 'struct pdcm_cmd_desc.service' function to service the PDSM. Signed-off-by: Vaibhav Jain Cc: "Aneesh Kumar K . V" Cc: Dan Williams Cc: Michael Ellerman Cc: Ira Weiny Link: https://lore.kernel.org/r/20200615124407.32596-6-vaibhav@linux.ibm.com Signed-off-by: Dan Williams --- arch/powerpc/include/uapi/asm/papr_pdsm.h | 95 +++++++++++++++ arch/powerpc/platforms/pseries/papr_scm.c | 193 +++++++++++++++++++++++++++++- 2 files changed, 284 insertions(+), 4 deletions(-) create mode 100644 arch/powerpc/include/uapi/asm/papr_pdsm.h (limited to 'arch') diff --git a/arch/powerpc/include/uapi/asm/papr_pdsm.h b/arch/powerpc/include/uapi/asm/papr_pdsm.h new file mode 100644 index 000000000000..28115152aa4e --- /dev/null +++ b/arch/powerpc/include/uapi/asm/papr_pdsm.h @@ -0,0 +1,95 @@ +/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */ +/* + * PAPR nvDimm Specific Methods (PDSM) and structs for libndctl + * + * (C) Copyright IBM 2020 + * + * Author: Vaibhav Jain + */ + +#ifndef _UAPI_ASM_POWERPC_PAPR_PDSM_H_ +#define _UAPI_ASM_POWERPC_PAPR_PDSM_H_ + +#include +#include + +/* + * PDSM Envelope: + * + * The ioctl ND_CMD_CALL exchange data between user-space and kernel via + * envelope which consists of 2 headers sections and payload sections as + * illustrated below: + * +-----------------+---------------+---------------------------+ + * | 64-Bytes | 8-Bytes | Max 184-Bytes | + * +-----------------+---------------+---------------------------+ + * | ND-HEADER | PDSM-HEADER | PDSM-PAYLOAD | + * +-----------------+---------------+---------------------------+ + * | nd_family | | | + * | nd_size_out | cmd_status | | + * | nd_size_in | reserved | nd_pdsm_payload | + * | nd_command | payload --> | | + * | nd_fw_size | | | + * | nd_payload ---> | | | + * +---------------+-----------------+---------------------------+ + * + * ND Header: + * This is the generic libnvdimm header described as 'struct nd_cmd_pkg' + * which is interpreted by libnvdimm before passed on to papr_scm. Important + * member fields used are: + * 'nd_family' : (In) NVDIMM_FAMILY_PAPR_SCM + * 'nd_size_in' : (In) PDSM-HEADER + PDSM-IN-PAYLOAD (usually 0) + * 'nd_size_out' : (In) PDSM-HEADER + PDSM-RETURN-PAYLOAD + * 'nd_command' : (In) One of PAPR_PDSM_XXX + * 'nd_fw_size' : (Out) PDSM-HEADER + size of actual payload returned + * + * PDSM Header: + * This is papr-scm specific header that precedes the payload. This is defined + * as nd_cmd_pdsm_pkg. Following fields aare available in this header: + * + * 'cmd_status' : (Out) Errors if any encountered while servicing PDSM. + * 'reserved' : Not used, reserved for future and should be set to 0. + * 'payload' : A union of all the possible payload structs + * + * PDSM Payload: + * + * The layout of the PDSM Payload is defined by various structs shared between + * papr_scm and libndctl so that contents of payload can be interpreted. As such + * its defined as a union of all possible payload structs as + * 'union nd_pdsm_payload'. Based on the value of 'nd_cmd_pkg.nd_command' + * appropriate member of the union is accessed. + */ + +/* Max payload size that we can handle */ +#define ND_PDSM_PAYLOAD_MAX_SIZE 184 + +/* Max payload size that we can handle */ +#define ND_PDSM_HDR_SIZE \ + (sizeof(struct nd_pkg_pdsm) - ND_PDSM_PAYLOAD_MAX_SIZE) + +/* + * Methods to be embedded in ND_CMD_CALL request. These are sent to the kernel + * via 'nd_cmd_pkg.nd_command' member of the ioctl struct + */ +enum papr_pdsm { + PAPR_PDSM_MIN = 0x0, + PAPR_PDSM_MAX, +}; + +/* Maximal union that can hold all possible payload types */ +union nd_pdsm_payload { + __u8 buf[ND_PDSM_PAYLOAD_MAX_SIZE]; +} __packed; + +/* + * PDSM-header + payload expected with ND_CMD_CALL ioctl from libnvdimm + * Valid member of union 'payload' is identified via 'nd_cmd_pkg.nd_command' + * that should always precede this struct when sent to papr_scm via CMD_CALL + * interface. + */ +struct nd_pkg_pdsm { + __s32 cmd_status; /* Out: Sub-cmd status returned back */ + __u16 reserved[2]; /* Ignored and to be set as '0' */ + union nd_pdsm_payload payload; +} __packed; + +#endif /* _UAPI_ASM_POWERPC_PAPR_PDSM_H_ */ diff --git a/arch/powerpc/platforms/pseries/papr_scm.c b/arch/powerpc/platforms/pseries/papr_scm.c index 692ad3d79826..d3bbf9940ba4 100644 --- a/arch/powerpc/platforms/pseries/papr_scm.c +++ b/arch/powerpc/platforms/pseries/papr_scm.c @@ -15,13 +15,15 @@ #include #include +#include #define BIND_ANY_ADDR (~0ul) #define PAPR_SCM_DIMM_CMD_MASK \ ((1ul << ND_CMD_GET_CONFIG_SIZE) | \ (1ul << ND_CMD_GET_CONFIG_DATA) | \ - (1ul << ND_CMD_SET_CONFIG_DATA)) + (1ul << ND_CMD_SET_CONFIG_DATA) | \ + (1ul << ND_CMD_CALL)) /* DIMM health bitmap bitmap indicators */ /* SCM device is unable to persist memory contents */ @@ -349,17 +351,195 @@ static int papr_scm_meta_set(struct papr_scm_priv *p, return 0; } +/* + * Do a sanity checks on the inputs args to dimm-control function and return + * '0' if valid. Validation of PDSM payloads happens later in + * papr_scm_service_pdsm. + */ +static int is_cmd_valid(struct nvdimm *nvdimm, unsigned int cmd, void *buf, + unsigned int buf_len) +{ + unsigned long cmd_mask = PAPR_SCM_DIMM_CMD_MASK; + struct nd_cmd_pkg *nd_cmd; + struct papr_scm_priv *p; + enum papr_pdsm pdsm; + + /* Only dimm-specific calls are supported atm */ + if (!nvdimm) + return -EINVAL; + + /* get the provider data from struct nvdimm */ + p = nvdimm_provider_data(nvdimm); + + if (!test_bit(cmd, &cmd_mask)) { + dev_dbg(&p->pdev->dev, "Unsupported cmd=%u\n", cmd); + return -EINVAL; + } + + /* For CMD_CALL verify pdsm request */ + if (cmd == ND_CMD_CALL) { + /* Verify the envelope and envelop size */ + if (!buf || + buf_len < (sizeof(struct nd_cmd_pkg) + ND_PDSM_HDR_SIZE)) { + dev_dbg(&p->pdev->dev, "Invalid pkg size=%u\n", + buf_len); + return -EINVAL; + } + + /* Verify that the nd_cmd_pkg.nd_family is correct */ + nd_cmd = (struct nd_cmd_pkg *)buf; + + if (nd_cmd->nd_family != NVDIMM_FAMILY_PAPR) { + dev_dbg(&p->pdev->dev, "Invalid pkg family=0x%llx\n", + nd_cmd->nd_family); + return -EINVAL; + } + + pdsm = (enum papr_pdsm)nd_cmd->nd_command; + + /* Verify if the pdsm command is valid */ + if (pdsm <= PAPR_PDSM_MIN || pdsm >= PAPR_PDSM_MAX) { + dev_dbg(&p->pdev->dev, "PDSM[0x%x]: Invalid PDSM\n", + pdsm); + return -EINVAL; + } + + /* Have enough space to hold returned 'nd_pkg_pdsm' header */ + if (nd_cmd->nd_size_out < ND_PDSM_HDR_SIZE) { + dev_dbg(&p->pdev->dev, "PDSM[0x%x]: Invalid payload\n", + pdsm); + return -EINVAL; + } + } + + /* Let the command be further processed */ + return 0; +} + +/* + * 'struct pdsm_cmd_desc' + * Identifies supported PDSMs' expected length of in/out payloads + * and pdsm service function. + * + * size_in : Size of input payload if any in the PDSM request. + * size_out : Size of output payload if any in the PDSM request. + * service : Service function for the PDSM request. Return semantics: + * rc < 0 : Error servicing PDSM and rc indicates the error. + * rc >=0 : Serviced successfully and 'rc' indicate number of + * bytes written to payload. + */ +struct pdsm_cmd_desc { + u32 size_in; + u32 size_out; + int (*service)(struct papr_scm_priv *dimm, + union nd_pdsm_payload *payload); +}; + +/* Holds all supported PDSMs' command descriptors */ +static const struct pdsm_cmd_desc __pdsm_cmd_descriptors[] = { + [PAPR_PDSM_MIN] = { + .size_in = 0, + .size_out = 0, + .service = NULL, + }, + /* New PDSM command descriptors to be added below */ + + /* Empty */ + [PAPR_PDSM_MAX] = { + .size_in = 0, + .size_out = 0, + .service = NULL, + }, +}; + +/* Given a valid pdsm cmd return its command descriptor else return NULL */ +static inline const struct pdsm_cmd_desc *pdsm_cmd_desc(enum papr_pdsm cmd) +{ + if (cmd >= 0 || cmd < ARRAY_SIZE(__pdsm_cmd_descriptors)) + return &__pdsm_cmd_descriptors[cmd]; + + return NULL; +} + +/* + * For a given pdsm request call an appropriate service function. + * Returns errors if any while handling the pdsm command package. + */ +static int papr_scm_service_pdsm(struct papr_scm_priv *p, + struct nd_cmd_pkg *pkg) +{ + /* Get the PDSM header and PDSM command */ + struct nd_pkg_pdsm *pdsm_pkg = (struct nd_pkg_pdsm *)pkg->nd_payload; + enum papr_pdsm pdsm = (enum papr_pdsm)pkg->nd_command; + const struct pdsm_cmd_desc *pdsc; + int rc; + + /* Fetch corresponding pdsm descriptor for validation and servicing */ + pdsc = pdsm_cmd_desc(pdsm); + + /* Validate pdsm descriptor */ + /* Ensure that reserved fields are 0 */ + if (pdsm_pkg->reserved[0] || pdsm_pkg->reserved[1]) { + dev_dbg(&p->pdev->dev, "PDSM[0x%x]: Invalid reserved field\n", + pdsm); + return -EINVAL; + } + + /* If pdsm expects some input, then ensure that the size_in matches */ + if (pdsc->size_in && + pkg->nd_size_in != (pdsc->size_in + ND_PDSM_HDR_SIZE)) { + dev_dbg(&p->pdev->dev, "PDSM[0x%x]: Mismatched size_in=%d\n", + pdsm, pkg->nd_size_in); + return -EINVAL; + } + + /* If pdsm wants to return data, then ensure that size_out matches */ + if (pdsc->size_out && + pkg->nd_size_out != (pdsc->size_out + ND_PDSM_HDR_SIZE)) { + dev_dbg(&p->pdev->dev, "PDSM[0x%x]: Mismatched size_out=%d\n", + pdsm, pkg->nd_size_out); + return -EINVAL; + } + + /* Service the pdsm */ + if (pdsc->service) { + dev_dbg(&p->pdev->dev, "PDSM[0x%x]: Servicing..\n", pdsm); + + rc = pdsc->service(p, &pdsm_pkg->payload); + + if (rc < 0) { + /* error encountered while servicing pdsm */ + pdsm_pkg->cmd_status = rc; + pkg->nd_fw_size = ND_PDSM_HDR_SIZE; + } else { + /* pdsm serviced and 'rc' bytes written to payload */ + pdsm_pkg->cmd_status = 0; + pkg->nd_fw_size = ND_PDSM_HDR_SIZE + rc; + } + } else { + dev_dbg(&p->pdev->dev, "PDSM[0x%x]: Unsupported PDSM request\n", + pdsm); + pdsm_pkg->cmd_status = -ENOENT; + pkg->nd_fw_size = ND_PDSM_HDR_SIZE; + } + + return pdsm_pkg->cmd_status; +} + static int papr_scm_ndctl(struct nvdimm_bus_descriptor *nd_desc, struct nvdimm *nvdimm, unsigned int cmd, void *buf, unsigned int buf_len, int *cmd_rc) { struct nd_cmd_get_config_size *get_size_hdr; + struct nd_cmd_pkg *call_pkg = NULL; struct papr_scm_priv *p; int rc; - /* Only dimm-specific calls are supported atm */ - if (!nvdimm) - return -EINVAL; + rc = is_cmd_valid(nvdimm, cmd, buf, buf_len); + if (rc) { + pr_debug("Invalid cmd=0x%x. Err=%d\n", cmd, rc); + return rc; + } /* Use a local variable in case cmd_rc pointer is NULL */ if (!cmd_rc) @@ -385,6 +565,11 @@ static int papr_scm_ndctl(struct nvdimm_bus_descriptor *nd_desc, *cmd_rc = papr_scm_meta_set(p, buf); break; + case ND_CMD_CALL: + call_pkg = (struct nd_cmd_pkg *)buf; + *cmd_rc = papr_scm_service_pdsm(p, call_pkg); + break; + default: dev_dbg(&p->pdev->dev, "Unknown command = %d\n", cmd); return -EINVAL; -- cgit v1.2.3 From d35f18b554be015b6fa89fad6447c6fce8e6ad66 Mon Sep 17 00:00:00 2001 From: Vaibhav Jain Date: Mon, 15 Jun 2020 18:14:07 +0530 Subject: powerpc/papr_scm: Implement support for PAPR_PDSM_HEALTH This patch implements support for PDSM request 'PAPR_PDSM_HEALTH' that returns a newly introduced 'struct nd_papr_pdsm_health' instance containing dimm health information back to user space in response to ND_CMD_CALL. This functionality is implemented in newly introduced papr_pdsm_health() that queries the nvdimm health information and then copies this information to the package payload whose layout is defined by 'struct nd_papr_pdsm_health'. Signed-off-by: Vaibhav Jain Cc: "Aneesh Kumar K . V" Cc: Dan Williams Cc: Michael Ellerman Cc: Ira Weiny Link: https://lore.kernel.org/r/20200615124407.32596-7-vaibhav@linux.ibm.com Signed-off-by: Dan Williams --- arch/powerpc/include/uapi/asm/papr_pdsm.h | 37 ++++++++++++++++++++++ arch/powerpc/platforms/pseries/papr_scm.c | 51 +++++++++++++++++++++++++++++++ 2 files changed, 88 insertions(+) (limited to 'arch') diff --git a/arch/powerpc/include/uapi/asm/papr_pdsm.h b/arch/powerpc/include/uapi/asm/papr_pdsm.h index 28115152aa4e..9ccecc1d6840 100644 --- a/arch/powerpc/include/uapi/asm/papr_pdsm.h +++ b/arch/powerpc/include/uapi/asm/papr_pdsm.h @@ -66,17 +66,54 @@ #define ND_PDSM_HDR_SIZE \ (sizeof(struct nd_pkg_pdsm) - ND_PDSM_PAYLOAD_MAX_SIZE) +/* Various nvdimm health indicators */ +#define PAPR_PDSM_DIMM_HEALTHY 0 +#define PAPR_PDSM_DIMM_UNHEALTHY 1 +#define PAPR_PDSM_DIMM_CRITICAL 2 +#define PAPR_PDSM_DIMM_FATAL 3 + +/* + * Struct exchanged between kernel & ndctl in for PAPR_PDSM_HEALTH + * Various flags indicate the health status of the dimm. + * + * extension_flags : Any extension fields present in the struct. + * dimm_unarmed : Dimm not armed. So contents wont persist. + * dimm_bad_shutdown : Previous shutdown did not persist contents. + * dimm_bad_restore : Contents from previous shutdown werent restored. + * dimm_scrubbed : Contents of the dimm have been scrubbed. + * dimm_locked : Contents of the dimm cant be modified until CEC reboot + * dimm_encrypted : Contents of dimm are encrypted. + * dimm_health : Dimm health indicator. One of PAPR_PDSM_DIMM_XXXX + */ +struct nd_papr_pdsm_health { + union { + struct { + __u32 extension_flags; + __u8 dimm_unarmed; + __u8 dimm_bad_shutdown; + __u8 dimm_bad_restore; + __u8 dimm_scrubbed; + __u8 dimm_locked; + __u8 dimm_encrypted; + __u16 dimm_health; + }; + __u8 buf[ND_PDSM_PAYLOAD_MAX_SIZE]; + }; +}; + /* * Methods to be embedded in ND_CMD_CALL request. These are sent to the kernel * via 'nd_cmd_pkg.nd_command' member of the ioctl struct */ enum papr_pdsm { PAPR_PDSM_MIN = 0x0, + PAPR_PDSM_HEALTH, PAPR_PDSM_MAX, }; /* Maximal union that can hold all possible payload types */ union nd_pdsm_payload { + struct nd_papr_pdsm_health health; __u8 buf[ND_PDSM_PAYLOAD_MAX_SIZE]; } __packed; diff --git a/arch/powerpc/platforms/pseries/papr_scm.c b/arch/powerpc/platforms/pseries/papr_scm.c index d3bbf9940ba4..9c569078a09f 100644 --- a/arch/powerpc/platforms/pseries/papr_scm.c +++ b/arch/powerpc/platforms/pseries/papr_scm.c @@ -416,6 +416,52 @@ static int is_cmd_valid(struct nvdimm *nvdimm, unsigned int cmd, void *buf, return 0; } +/* Fetch the DIMM health info and populate it in provided package. */ +static int papr_pdsm_health(struct papr_scm_priv *p, + union nd_pdsm_payload *payload) +{ + int rc; + + /* Ensure dimm health mutex is taken preventing concurrent access */ + rc = mutex_lock_interruptible(&p->health_mutex); + if (rc) + goto out; + + /* Always fetch upto date dimm health data ignoring cached values */ + rc = __drc_pmem_query_health(p); + if (rc) { + mutex_unlock(&p->health_mutex); + goto out; + } + + /* update health struct with various flags derived from health bitmap */ + payload->health = (struct nd_papr_pdsm_health) { + .extension_flags = 0, + .dimm_unarmed = !!(p->health_bitmap & PAPR_PMEM_UNARMED_MASK), + .dimm_bad_shutdown = !!(p->health_bitmap & PAPR_PMEM_BAD_SHUTDOWN_MASK), + .dimm_bad_restore = !!(p->health_bitmap & PAPR_PMEM_BAD_RESTORE_MASK), + .dimm_scrubbed = !!(p->health_bitmap & PAPR_PMEM_SCRUBBED_AND_LOCKED), + .dimm_locked = !!(p->health_bitmap & PAPR_PMEM_SCRUBBED_AND_LOCKED), + .dimm_encrypted = !!(p->health_bitmap & PAPR_PMEM_ENCRYPTED), + .dimm_health = PAPR_PDSM_DIMM_HEALTHY, + }; + + /* Update field dimm_health based on health_bitmap flags */ + if (p->health_bitmap & PAPR_PMEM_HEALTH_FATAL) + payload->health.dimm_health = PAPR_PDSM_DIMM_FATAL; + else if (p->health_bitmap & PAPR_PMEM_HEALTH_CRITICAL) + payload->health.dimm_health = PAPR_PDSM_DIMM_CRITICAL; + else if (p->health_bitmap & PAPR_PMEM_HEALTH_UNHEALTHY) + payload->health.dimm_health = PAPR_PDSM_DIMM_UNHEALTHY; + + /* struct populated hence can release the mutex now */ + mutex_unlock(&p->health_mutex); + rc = sizeof(struct nd_papr_pdsm_health); + +out: + return rc; +} + /* * 'struct pdsm_cmd_desc' * Identifies supported PDSMs' expected length of in/out payloads @@ -444,6 +490,11 @@ static const struct pdsm_cmd_desc __pdsm_cmd_descriptors[] = { }, /* New PDSM command descriptors to be added below */ + [PAPR_PDSM_HEALTH] = { + .size_in = 0, + .size_out = sizeof(struct nd_papr_pdsm_health), + .service = papr_pdsm_health, + }, /* Empty */ [PAPR_PDSM_MAX] = { .size_in = 0, -- cgit v1.2.3 From 0bdcfa182506526fbe4e088ff9ca86a31b81828d Mon Sep 17 00:00:00 2001 From: Nicholas Piggin Date: Mon, 15 Jun 2020 16:12:47 +1000 Subject: powerpc/64s: Fix KVM interrupt using wrong save area The CTR register reload in the KVM interrupt path used the wrong save area for SLB (and NMI) interrupts. Fixes: 9600f261acaa ("powerpc/64s/exception: Move KVM test to common code") Cc: stable@vger.kernel.org # v5.7+ Reported-by: Christian Zigotzky Signed-off-by: Nicholas Piggin Tested-by: Christian Zigotzky Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/20200615061247.1310763-1-npiggin@gmail.com --- arch/powerpc/kernel/exceptions-64s.S | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'arch') diff --git a/arch/powerpc/kernel/exceptions-64s.S b/arch/powerpc/kernel/exceptions-64s.S index e70ebb5c318c..fa080694e581 100644 --- a/arch/powerpc/kernel/exceptions-64s.S +++ b/arch/powerpc/kernel/exceptions-64s.S @@ -270,7 +270,7 @@ BEGIN_FTR_SECTION END_FTR_SECTION_IFSET(CPU_FTR_CFAR) .endif - ld r10,PACA_EXGEN+EX_CTR(r13) + ld r10,IAREA+EX_CTR(r13) mtctr r10 BEGIN_FTR_SECTION ld r10,IAREA+EX_PPR(r13) @@ -298,7 +298,7 @@ END_FTR_SECTION_IFSET(CPU_FTR_HAS_PPR) .if IKVM_SKIP 89: mtocrf 0x80,r9 - ld r10,PACA_EXGEN+EX_CTR(r13) + ld r10,IAREA+EX_CTR(r13) mtctr r10 ld r9,IAREA+EX_R9(r13) ld r10,IAREA+EX_R10(r13) -- cgit v1.2.3 From 1907774c37f052ebc7606b6c24ae6d455ed07d85 Mon Sep 17 00:00:00 2001 From: "Gustavo A. R. Silva" Date: Thu, 28 May 2020 09:35:11 -0500 Subject: ia64: kernel: unwind_i.h: Replace zero-length array with flexible-array MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit There is a regular need in the kernel to provide a way to declare having a dynamically sized set of trailing elements in a structure. Kernel code should always use “flexible array members”[1] for these cases. The older style of one-element or zero-length arrays should no longer be used[2]. [1] https://en.wikipedia.org/wiki/Flexible_array_member [2] https://github.com/KSPP/linux/issues/21 Signed-off-by: Gustavo A. R. Silva --- arch/ia64/kernel/unwind_i.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'arch') diff --git a/arch/ia64/kernel/unwind_i.h b/arch/ia64/kernel/unwind_i.h index 67994a7e5816..1dd57ba44327 100644 --- a/arch/ia64/kernel/unwind_i.h +++ b/arch/ia64/kernel/unwind_i.h @@ -42,7 +42,7 @@ enum unw_register_index { struct unw_info_block { u64 header; - u64 desc[0]; /* unwind descriptors */ + u64 desc[]; /* unwind descriptors */ /* personality routine and language-specific data follow behind descriptors */ }; -- cgit v1.2.3 From b9dbe0101e344e8339406a11b7a91d4a0c50ad13 Mon Sep 17 00:00:00 2001 From: Matthew Hagan Date: Tue, 9 Jun 2020 17:58:29 +0100 Subject: ARM: dts: NSP: Disable PL330 by default, add dma-coherent property Currently the PL330 is enabled by default. However if left in IDM reset, as is the case with the Meraki and Synology NSP devices, the system will hang when probing for the PL330's AMBA peripheral ID. We therefore should be able to disable it in these cases. The PL330 is also included among of the list of peripherals put into coherent mode, so "dma-coherent" has been added here as well. Fixes: 5fa1026a3e4d ("ARM: dts: NSP: Add PL330 support") Signed-off-by: Matthew Hagan Signed-off-by: Florian Fainelli --- arch/arm/boot/dts/bcm-nsp.dtsi | 4 +++- arch/arm/boot/dts/bcm958522er.dts | 4 ++++ arch/arm/boot/dts/bcm958525er.dts | 4 ++++ arch/arm/boot/dts/bcm958525xmc.dts | 4 ++++ arch/arm/boot/dts/bcm958622hr.dts | 4 ++++ arch/arm/boot/dts/bcm958623hr.dts | 4 ++++ arch/arm/boot/dts/bcm958625hr.dts | 4 ++++ arch/arm/boot/dts/bcm958625k.dts | 4 ++++ 8 files changed, 31 insertions(+), 1 deletion(-) (limited to 'arch') diff --git a/arch/arm/boot/dts/bcm-nsp.dtsi b/arch/arm/boot/dts/bcm-nsp.dtsi index da6d70f09ef1..920c0f561e5c 100644 --- a/arch/arm/boot/dts/bcm-nsp.dtsi +++ b/arch/arm/boot/dts/bcm-nsp.dtsi @@ -200,7 +200,7 @@ status = "disabled"; }; - dma@20000 { + dma: dma@20000 { compatible = "arm,pl330", "arm,primecell"; reg = <0x20000 0x1000>; interrupts = , @@ -215,6 +215,8 @@ clocks = <&iprocslow>; clock-names = "apb_pclk"; #dma-cells = <1>; + dma-coherent; + status = "disabled"; }; sdio: sdhci@21000 { diff --git a/arch/arm/boot/dts/bcm958522er.dts b/arch/arm/boot/dts/bcm958522er.dts index 8c388eb8a08f..7be4c4e628e0 100644 --- a/arch/arm/boot/dts/bcm958522er.dts +++ b/arch/arm/boot/dts/bcm958522er.dts @@ -58,6 +58,10 @@ /* USB 3 support needed to be complete */ +&dma { + status = "okay"; +}; + &amac0 { status = "okay"; }; diff --git a/arch/arm/boot/dts/bcm958525er.dts b/arch/arm/boot/dts/bcm958525er.dts index c339771bb22e..e58ed7e95346 100644 --- a/arch/arm/boot/dts/bcm958525er.dts +++ b/arch/arm/boot/dts/bcm958525er.dts @@ -58,6 +58,10 @@ /* USB 3 support needed to be complete */ +&dma { + status = "okay"; +}; + &amac0 { status = "okay"; }; diff --git a/arch/arm/boot/dts/bcm958525xmc.dts b/arch/arm/boot/dts/bcm958525xmc.dts index 1c72ec8288de..716da62f5788 100644 --- a/arch/arm/boot/dts/bcm958525xmc.dts +++ b/arch/arm/boot/dts/bcm958525xmc.dts @@ -58,6 +58,10 @@ /* XHCI support needed to be complete */ +&dma { + status = "okay"; +}; + &amac0 { status = "okay"; }; diff --git a/arch/arm/boot/dts/bcm958622hr.dts b/arch/arm/boot/dts/bcm958622hr.dts index 96a021cebd97..a49c2fd21f4a 100644 --- a/arch/arm/boot/dts/bcm958622hr.dts +++ b/arch/arm/boot/dts/bcm958622hr.dts @@ -58,6 +58,10 @@ /* USB 3 and SLIC support needed to be complete */ +&dma { + status = "okay"; +}; + &amac0 { status = "okay"; }; diff --git a/arch/arm/boot/dts/bcm958623hr.dts b/arch/arm/boot/dts/bcm958623hr.dts index b2c7f21d471e..dd6dff6452b8 100644 --- a/arch/arm/boot/dts/bcm958623hr.dts +++ b/arch/arm/boot/dts/bcm958623hr.dts @@ -58,6 +58,10 @@ /* USB 3 and SLIC support needed to be complete */ +&dma { + status = "okay"; +}; + &amac0 { status = "okay"; }; diff --git a/arch/arm/boot/dts/bcm958625hr.dts b/arch/arm/boot/dts/bcm958625hr.dts index 536fb24f38bb..a71371b4065e 100644 --- a/arch/arm/boot/dts/bcm958625hr.dts +++ b/arch/arm/boot/dts/bcm958625hr.dts @@ -69,6 +69,10 @@ status = "okay"; }; +&dma { + status = "okay"; +}; + &amac0 { status = "okay"; }; diff --git a/arch/arm/boot/dts/bcm958625k.dts b/arch/arm/boot/dts/bcm958625k.dts index 3fcca12d83c2..7b84b54436ed 100644 --- a/arch/arm/boot/dts/bcm958625k.dts +++ b/arch/arm/boot/dts/bcm958625k.dts @@ -48,6 +48,10 @@ }; }; +&dma { + status = "okay"; +}; + &amac0 { status = "okay"; }; -- cgit v1.2.3 From 664f5f8de825648d1d31f6f5652e3cd117c77b50 Mon Sep 17 00:00:00 2001 From: Sven Schnelle Date: Wed, 4 Mar 2020 16:07:34 +0100 Subject: s390/seccomp: pass syscall arguments via seccomp_data Use __secure_computing() and pass the register data via seccomp_data so secure computing doesn't have to fetch it again. Signed-off-by: Sven Schnelle Signed-off-by: Vasily Gorbik --- arch/s390/kernel/ptrace.c | 31 ++++++++++++++++++++++++++----- 1 file changed, 26 insertions(+), 5 deletions(-) (limited to 'arch') diff --git a/arch/s390/kernel/ptrace.c b/arch/s390/kernel/ptrace.c index ce60a459a143..e319482da5f0 100644 --- a/arch/s390/kernel/ptrace.c +++ b/arch/s390/kernel/ptrace.c @@ -838,6 +838,9 @@ asmlinkage long do_syscall_trace_enter(struct pt_regs *regs) { unsigned long mask = -1UL; + if (is_compat_task()) + mask = 0xffffffff; + /* * The sysc_tracesys code in entry.S stored the system * call number to gprs[2]. @@ -854,17 +857,35 @@ asmlinkage long do_syscall_trace_enter(struct pt_regs *regs) return -1; } +#ifdef CONFIG_SECCOMP /* Do the secure computing check after ptrace. */ - if (secure_computing()) { - /* seccomp failures shouldn't expose any additional code. */ - return -1; + if (unlikely(test_thread_flag(TIF_SECCOMP))) { + struct seccomp_data sd; + + if (is_compat_task()) { + sd.instruction_pointer = regs->psw.addr & 0x7fffffff; + sd.arch = AUDIT_ARCH_S390; + } else { + sd.instruction_pointer = regs->psw.addr; + sd.arch = AUDIT_ARCH_S390X; + } + + sd.nr = regs->gprs[2] & 0xffff; + sd.args[0] = regs->orig_gpr2 & mask; + sd.args[1] = regs->gprs[3] & mask; + sd.args[2] = regs->gprs[4] & mask; + sd.args[3] = regs->gprs[5] & mask; + sd.args[4] = regs->gprs[6] & mask; + sd.args[5] = regs->gprs[7] & mask; + + if (__secure_computing(&sd) == -1) + return -1; } +#endif /* CONFIG_SECCOMP */ if (unlikely(test_thread_flag(TIF_SYSCALL_TRACEPOINT))) trace_sys_enter(regs, regs->gprs[2]); - if (is_compat_task()) - mask = 0xffffffff; audit_syscall_entry(regs->gprs[2], regs->orig_gpr2 & mask, regs->gprs[3] &mask, regs->gprs[4] &mask, -- cgit v1.2.3 From cd29fa798001075a554b978df3a64e6656c25794 Mon Sep 17 00:00:00 2001 From: Sven Schnelle Date: Fri, 6 Mar 2020 13:18:31 +0100 Subject: s390/ptrace: return -ENOSYS when invalid syscall is supplied The current code returns the syscall number which an invalid syscall number is supplied and tracing is enabled. This makes the strace testsuite fail. Signed-off-by: Sven Schnelle Signed-off-by: Vasily Gorbik --- arch/s390/kernel/ptrace.c | 19 +++++++++++++------ 1 file changed, 13 insertions(+), 6 deletions(-) (limited to 'arch') diff --git a/arch/s390/kernel/ptrace.c b/arch/s390/kernel/ptrace.c index e319482da5f0..ceb8105a8086 100644 --- a/arch/s390/kernel/ptrace.c +++ b/arch/s390/kernel/ptrace.c @@ -837,6 +837,7 @@ long compat_arch_ptrace(struct task_struct *child, compat_long_t request, asmlinkage long do_syscall_trace_enter(struct pt_regs *regs) { unsigned long mask = -1UL; + long ret = -1; if (is_compat_task()) mask = 0xffffffff; @@ -853,8 +854,7 @@ asmlinkage long do_syscall_trace_enter(struct pt_regs *regs) * debugger stored an invalid system call number. Skip * the system call and the system call restart handling. */ - clear_pt_regs_flag(regs, PIF_SYSCALL); - return -1; + goto skip; } #ifdef CONFIG_SECCOMP @@ -870,7 +870,7 @@ asmlinkage long do_syscall_trace_enter(struct pt_regs *regs) sd.arch = AUDIT_ARCH_S390X; } - sd.nr = regs->gprs[2] & 0xffff; + sd.nr = regs->int_code & 0xffff; sd.args[0] = regs->orig_gpr2 & mask; sd.args[1] = regs->gprs[3] & mask; sd.args[2] = regs->gprs[4] & mask; @@ -879,19 +879,26 @@ asmlinkage long do_syscall_trace_enter(struct pt_regs *regs) sd.args[5] = regs->gprs[7] & mask; if (__secure_computing(&sd) == -1) - return -1; + goto skip; } #endif /* CONFIG_SECCOMP */ if (unlikely(test_thread_flag(TIF_SYSCALL_TRACEPOINT))) - trace_sys_enter(regs, regs->gprs[2]); + trace_sys_enter(regs, regs->int_code & 0xffff); - audit_syscall_entry(regs->gprs[2], regs->orig_gpr2 & mask, + audit_syscall_entry(regs->int_code & 0xffff, regs->orig_gpr2 & mask, regs->gprs[3] &mask, regs->gprs[4] &mask, regs->gprs[5] &mask); + if ((signed long)regs->gprs[2] >= NR_syscalls) { + regs->gprs[2] = -ENOSYS; + ret = -ENOSYS; + } return regs->gprs[2]; +skip: + clear_pt_regs_flag(regs, PIF_SYSCALL); + return ret; } asmlinkage void do_syscall_trace_exit(struct pt_regs *regs) -- cgit v1.2.3 From 00332c16b1604242a56289ff2b26e283dbad0812 Mon Sep 17 00:00:00 2001 From: Sven Schnelle Date: Fri, 6 Mar 2020 13:19:34 +0100 Subject: s390/ptrace: pass invalid syscall numbers to tracing tracing expects to see invalid syscalls, so pass it through. The syscall path in entry.S checks the syscall number before looking up the handler, so it is still safe. Signed-off-by: Sven Schnelle Signed-off-by: Vasily Gorbik --- arch/s390/kernel/entry.S | 2 +- arch/s390/kernel/ptrace.c | 6 ++---- 2 files changed, 3 insertions(+), 5 deletions(-) (limited to 'arch') diff --git a/arch/s390/kernel/entry.S b/arch/s390/kernel/entry.S index 50ff6dd0f995..496f74d98473 100644 --- a/arch/s390/kernel/entry.S +++ b/arch/s390/kernel/entry.S @@ -401,9 +401,9 @@ ENTRY(system_call) jnz .Lsysc_nr_ok # svc 0: system call number in %r1 llgfr %r1,%r1 # clear high word in r1 + sth %r1,__PT_INT_CODE+2(%r11) cghi %r1,NR_syscalls jnl .Lsysc_nr_ok - sth %r1,__PT_INT_CODE+2(%r11) slag %r8,%r1,3 .Lsysc_nr_ok: xc __SF_BACKCHAIN(8,%r15),__SF_BACKCHAIN(%r15) diff --git a/arch/s390/kernel/ptrace.c b/arch/s390/kernel/ptrace.c index ceb8105a8086..1fdbb2d19477 100644 --- a/arch/s390/kernel/ptrace.c +++ b/arch/s390/kernel/ptrace.c @@ -847,11 +847,9 @@ asmlinkage long do_syscall_trace_enter(struct pt_regs *regs) * call number to gprs[2]. */ if (test_thread_flag(TIF_SYSCALL_TRACE) && - (tracehook_report_syscall_entry(regs) || - regs->gprs[2] >= NR_syscalls)) { + tracehook_report_syscall_entry(regs)) { /* - * Tracing decided this syscall should not happen or the - * debugger stored an invalid system call number. Skip + * Tracing decided this syscall should not happen. Skip * the system call and the system call restart handling. */ goto skip; -- cgit v1.2.3 From 873e5a763d604c32988c4a78913a8dab3862d2f9 Mon Sep 17 00:00:00 2001 From: Sven Schnelle Date: Mon, 9 Mar 2020 16:44:50 +0100 Subject: s390/ptrace: fix setting syscall number When strace wants to update the syscall number, it sets GPR2 to the desired number and updates the GPR via PTRACE_SETREGSET. It doesn't update regs->int_code which would cause the old syscall executed on syscall restart. As we cannot change the ptrace ABI and don't have a field for the interruption code, check whether the tracee is in a syscall and the last instruction was svc. In that case assume that the tracer wants to update the syscall number and copy the GPR2 value to regs->int_code. Signed-off-by: Sven Schnelle Signed-off-by: Vasily Gorbik --- arch/s390/kernel/ptrace.c | 31 ++++++++++++++++++++++++++++++- 1 file changed, 30 insertions(+), 1 deletion(-) (limited to 'arch') diff --git a/arch/s390/kernel/ptrace.c b/arch/s390/kernel/ptrace.c index 1fdbb2d19477..3cc15c066298 100644 --- a/arch/s390/kernel/ptrace.c +++ b/arch/s390/kernel/ptrace.c @@ -323,6 +323,25 @@ static inline void __poke_user_per(struct task_struct *child, child->thread.per_user.end = data; } +static void fixup_int_code(struct task_struct *child, addr_t data) +{ + struct pt_regs *regs = task_pt_regs(child); + int ilc = regs->int_code >> 16; + u16 insn; + + if (ilc > 6) + return; + + if (ptrace_access_vm(child, regs->psw.addr - (regs->int_code >> 16), + &insn, sizeof(insn), FOLL_FORCE) != sizeof(insn)) + return; + + /* double check that tracee stopped on svc instruction */ + if ((insn >> 8) != 0xa) + return; + + regs->int_code = 0x20000 | (data & 0xffff); +} /* * Write a word to the user area of a process at location addr. This * operation does have an additional problem compared to peek_user. @@ -334,7 +353,9 @@ static int __poke_user(struct task_struct *child, addr_t addr, addr_t data) struct user *dummy = NULL; addr_t offset; + if (addr < (addr_t) &dummy->regs.acrs) { + struct pt_regs *regs = task_pt_regs(child); /* * psw and gprs are stored on the stack */ @@ -352,7 +373,11 @@ static int __poke_user(struct task_struct *child, addr_t addr, addr_t data) /* Invalid addressing mode bits */ return -EINVAL; } - *(addr_t *)((addr_t) &task_pt_regs(child)->psw + addr) = data; + + if (test_pt_regs_flag(regs, PIF_SYSCALL) && + addr == offsetof(struct user, regs.gprs[2])) + fixup_int_code(child, data); + *(addr_t *)((addr_t) ®s->psw + addr) = data; } else if (addr < (addr_t) (&dummy->regs.orig_gpr2)) { /* @@ -718,6 +743,10 @@ static int __poke_user_compat(struct task_struct *child, regs->psw.mask = (regs->psw.mask & ~PSW_MASK_BA) | (__u64)(tmp & PSW32_ADDR_AMODE); } else { + + if (test_pt_regs_flag(regs, PIF_SYSCALL) && + addr == offsetof(struct compat_user, regs.gprs[2])) + fixup_int_code(child, data); /* gpr 0-15 */ *(__u32*)((addr_t) ®s->psw + addr*2 + 4) = tmp; } -- cgit v1.2.3 From df8cea2a4bef3088c8570af543835992ce1d327e Mon Sep 17 00:00:00 2001 From: Chen Zhou Date: Sat, 9 May 2020 16:56:06 +0800 Subject: s390/crypto: use scnprintf() instead of snprintf() snprintf() returns the number of bytes that would be written, which may be greater than the the actual length to be written. show() methods should return the number of bytes printed into the buffer. This is the return value of scnprintf(). Link: https://lkml.kernel.org/r/20200509085608.41061-2-chenzhou10@huawei.com Signed-off-by: Chen Zhou Signed-off-by: Heiko Carstens Signed-off-by: Vasily Gorbik --- arch/s390/crypto/prng.c | 14 +++++++------- 1 file changed, 7 insertions(+), 7 deletions(-) (limited to 'arch') diff --git a/arch/s390/crypto/prng.c b/arch/s390/crypto/prng.c index d977643fa627..e1ae23911ccd 100644 --- a/arch/s390/crypto/prng.c +++ b/arch/s390/crypto/prng.c @@ -693,7 +693,7 @@ static ssize_t prng_chunksize_show(struct device *dev, struct device_attribute *attr, char *buf) { - return snprintf(buf, PAGE_SIZE, "%u\n", prng_chunk_size); + return scnprintf(buf, PAGE_SIZE, "%u\n", prng_chunk_size); } static DEVICE_ATTR(chunksize, 0444, prng_chunksize_show, NULL); @@ -712,7 +712,7 @@ static ssize_t prng_counter_show(struct device *dev, counter = prng_data->prngws.byte_counter; mutex_unlock(&prng_data->mutex); - return snprintf(buf, PAGE_SIZE, "%llu\n", counter); + return scnprintf(buf, PAGE_SIZE, "%llu\n", counter); } static DEVICE_ATTR(byte_counter, 0444, prng_counter_show, NULL); @@ -721,7 +721,7 @@ static ssize_t prng_errorflag_show(struct device *dev, struct device_attribute *attr, char *buf) { - return snprintf(buf, PAGE_SIZE, "%d\n", prng_errorflag); + return scnprintf(buf, PAGE_SIZE, "%d\n", prng_errorflag); } static DEVICE_ATTR(errorflag, 0444, prng_errorflag_show, NULL); @@ -731,9 +731,9 @@ static ssize_t prng_mode_show(struct device *dev, char *buf) { if (prng_mode == PRNG_MODE_TDES) - return snprintf(buf, PAGE_SIZE, "TDES\n"); + return scnprintf(buf, PAGE_SIZE, "TDES\n"); else - return snprintf(buf, PAGE_SIZE, "SHA512\n"); + return scnprintf(buf, PAGE_SIZE, "SHA512\n"); } static DEVICE_ATTR(mode, 0444, prng_mode_show, NULL); @@ -756,7 +756,7 @@ static ssize_t prng_reseed_limit_show(struct device *dev, struct device_attribute *attr, char *buf) { - return snprintf(buf, PAGE_SIZE, "%u\n", prng_reseed_limit); + return scnprintf(buf, PAGE_SIZE, "%u\n", prng_reseed_limit); } static ssize_t prng_reseed_limit_store(struct device *dev, struct device_attribute *attr, @@ -787,7 +787,7 @@ static ssize_t prng_strength_show(struct device *dev, struct device_attribute *attr, char *buf) { - return snprintf(buf, PAGE_SIZE, "256\n"); + return scnprintf(buf, PAGE_SIZE, "256\n"); } static DEVICE_ATTR(strength, 0444, prng_strength_show, NULL); -- cgit v1.2.3 From 92fd356514b7505f40ca72b38ef84070e6502a70 Mon Sep 17 00:00:00 2001 From: Chen Zhou Date: Sat, 9 May 2020 16:56:07 +0800 Subject: s390: use scnprintf() in sys_##_prefix##_##_name##_show snprintf() returns the number of bytes that would be written, which may be greater than the the actual length to be written. show() methods should return the number of bytes printed into the buffer. This is the return value of scnprintf(). Link: https://lkml.kernel.org/r/20200509085608.41061-3-chenzhou10@huawei.com Signed-off-by: Chen Zhou Signed-off-by: Heiko Carstens Signed-off-by: Vasily Gorbik --- arch/s390/kernel/ipl.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'arch') diff --git a/arch/s390/kernel/ipl.c b/arch/s390/kernel/ipl.c index ccea9a245867..90a2a17239b0 100644 --- a/arch/s390/kernel/ipl.c +++ b/arch/s390/kernel/ipl.c @@ -181,7 +181,7 @@ static ssize_t sys_##_prefix##_##_name##_show(struct kobject *kobj, \ struct kobj_attribute *attr, \ char *page) \ { \ - return snprintf(page, PAGE_SIZE, _format, ##args); \ + return scnprintf(page, PAGE_SIZE, _format, ##args); \ } #define IPL_ATTR_CCW_STORE_FN(_prefix, _name, _ipl_blk) \ -- cgit v1.2.3 From 99448016ac792ac096def056828ab72c21f8582b Mon Sep 17 00:00:00 2001 From: Chen Zhou Date: Sat, 9 May 2020 16:56:08 +0800 Subject: s390/protvirt: use scnprintf() instead of snprintf() snprintf() returns the number of bytes that would be written, which may be greater than the the actual length to be written. uv_query_facilities() should return the number of bytes printed into the buffer. This is the return value of scnprintf(). The other functions are the same. Link: https://lkml.kernel.org/r/20200509085608.41061-4-chenzhou10@huawei.com Signed-off-by: Chen Zhou Signed-off-by: Heiko Carstens Signed-off-by: Vasily Gorbik --- arch/s390/kernel/uv.c | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) (limited to 'arch') diff --git a/arch/s390/kernel/uv.c b/arch/s390/kernel/uv.c index 66e89b2866d7..c296e5c8dbf9 100644 --- a/arch/s390/kernel/uv.c +++ b/arch/s390/kernel/uv.c @@ -331,7 +331,7 @@ EXPORT_SYMBOL_GPL(arch_make_page_accessible); static ssize_t uv_query_facilities(struct kobject *kobj, struct kobj_attribute *attr, char *page) { - return snprintf(page, PAGE_SIZE, "%lx\n%lx\n%lx\n%lx\n", + return scnprintf(page, PAGE_SIZE, "%lx\n%lx\n%lx\n%lx\n", uv_info.inst_calls_list[0], uv_info.inst_calls_list[1], uv_info.inst_calls_list[2], @@ -344,7 +344,7 @@ static struct kobj_attribute uv_query_facilities_attr = static ssize_t uv_query_max_guest_cpus(struct kobject *kobj, struct kobj_attribute *attr, char *page) { - return snprintf(page, PAGE_SIZE, "%d\n", + return scnprintf(page, PAGE_SIZE, "%d\n", uv_info.max_guest_cpus); } @@ -354,7 +354,7 @@ static struct kobj_attribute uv_query_max_guest_cpus_attr = static ssize_t uv_query_max_guest_vms(struct kobject *kobj, struct kobj_attribute *attr, char *page) { - return snprintf(page, PAGE_SIZE, "%d\n", + return scnprintf(page, PAGE_SIZE, "%d\n", uv_info.max_num_sec_conf); } @@ -364,7 +364,7 @@ static struct kobj_attribute uv_query_max_guest_vms_attr = static ssize_t uv_query_max_guest_addr(struct kobject *kobj, struct kobj_attribute *attr, char *page) { - return snprintf(page, PAGE_SIZE, "%lx\n", + return scnprintf(page, PAGE_SIZE, "%lx\n", uv_info.max_sec_stor_addr); } -- cgit v1.2.3 From 2b2a25845d534ac6d55086e35c033961fdd83a26 Mon Sep 17 00:00:00 2001 From: Nathan Chancellor Date: Tue, 2 Jun 2020 12:25:24 -0700 Subject: s390/vdso: Use $(LD) instead of $(CC) to link vDSO Currently, the VDSO is being linked through $(CC). This does not match how the rest of the kernel links objects, which is through the $(LD) variable. When clang is built in a default configuration, it first attempts to use the target triple's default linker, which is just ld. However, the user can override this through the CLANG_DEFAULT_LINKER cmake define so that clang uses another linker by default, such as LLVM's own linker, ld.lld. This can be useful to get more optimized links across various different projects. However, this is problematic for the s390 vDSO because ld.lld does not have any s390 emulatiom support: https://github.com/llvm/llvm-project/blob/llvmorg-10.0.1-rc1/lld/ELF/Driver.cpp#L132-L150 Thus, if a user is using a toolchain with ld.lld as the default, they will see an error, even if they have specified ld.bfd through the LD make variable: $ make -j"$(nproc)" -s ARCH=s390 CROSS_COMPILE=s390x-linux-gnu- LLVM=1 \ LD=s390x-linux-gnu-ld \ defconfig arch/s390/kernel/vdso64/ ld.lld: error: unknown emulation: elf64_s390 clang-11: error: linker command failed with exit code 1 (use -v to see invocation) Normally, '-fuse-ld=bfd' could be used to get around this; however, this can be fragile, depending on paths and variable naming. The cleaner solution for the kernel is to take advantage of the fact that $(LD) can be invoked directly, which bypasses the heuristics of $(CC) and respects the user's choice. Similar changes have been done for ARM, ARM64, and MIPS. Link: https://lkml.kernel.org/r/20200602192523.32758-1-natechancellor@gmail.com Link: https://github.com/ClangBuiltLinux/linux/issues/1041 Signed-off-by: Nathan Chancellor Reviewed-by: Nick Desaulniers [heiko.carstens@de.ibm.com: add --build-id flag] Signed-off-by: Heiko Carstens Signed-off-by: Vasily Gorbik --- arch/s390/kernel/vdso64/Makefile | 10 ++++------ 1 file changed, 4 insertions(+), 6 deletions(-) (limited to 'arch') diff --git a/arch/s390/kernel/vdso64/Makefile b/arch/s390/kernel/vdso64/Makefile index bec19e7e6e1c..4a66a1cb919b 100644 --- a/arch/s390/kernel/vdso64/Makefile +++ b/arch/s390/kernel/vdso64/Makefile @@ -18,8 +18,8 @@ KBUILD_AFLAGS_64 += -m64 -s KBUILD_CFLAGS_64 := $(filter-out -m64,$(KBUILD_CFLAGS)) KBUILD_CFLAGS_64 += -m64 -fPIC -shared -fno-common -fno-builtin -KBUILD_CFLAGS_64 += -nostdlib -Wl,-soname=linux-vdso64.so.1 \ - -Wl,--hash-style=both +ldflags-y := -fPIC -shared -nostdlib -soname=linux-vdso64.so.1 \ + --hash-style=both --build-id -T $(targets:%=$(obj)/%.dbg): KBUILD_CFLAGS = $(KBUILD_CFLAGS_64) $(targets:%=$(obj)/%.dbg): KBUILD_AFLAGS = $(KBUILD_AFLAGS_64) @@ -37,8 +37,8 @@ KASAN_SANITIZE := n $(obj)/vdso64_wrapper.o : $(obj)/vdso64.so # link rule for the .so file, .lds has to be first -$(obj)/vdso64.so.dbg: $(src)/vdso64.lds $(obj-vdso64) FORCE - $(call if_changed,vdso64ld) +$(obj)/vdso64.so.dbg: $(obj)/vdso64.lds $(obj-vdso64) FORCE + $(call if_changed,ld) # strip rule for the .so file $(obj)/%.so: OBJCOPYFLAGS := -S @@ -50,8 +50,6 @@ $(obj-vdso64): %.o: %.S FORCE $(call if_changed_dep,vdso64as) # actual build commands -quiet_cmd_vdso64ld = VDSO64L $@ - cmd_vdso64ld = $(CC) $(c_flags) -Wl,-T $(filter %.lds %.o,$^) -o $@ quiet_cmd_vdso64as = VDSO64A $@ cmd_vdso64as = $(CC) $(a_flags) -c -o $@ $< -- cgit v1.2.3 From 478237a595120a18e9b52fd2c57a6e8b7a01e411 Mon Sep 17 00:00:00 2001 From: Vincenzo Frascino Date: Tue, 24 Mar 2020 12:10:27 +0000 Subject: s390/vdso: fix vDSO clock_getres() clock_getres in the vDSO library has to preserve the same behaviour of posix_get_hrtimer_res(). In particular, posix_get_hrtimer_res() does: sec = 0; ns = hrtimer_resolution; and hrtimer_resolution depends on the enablement of the high resolution timers that can happen either at compile or at run time. Fix the s390 vdso implementation of clock_getres keeping a copy of hrtimer_resolution in vdso data and using that directly. Link: https://lkml.kernel.org/r/20200324121027.21665-1-vincenzo.frascino@arm.com Signed-off-by: Vincenzo Frascino Acked-by: Martin Schwidefsky [heiko.carstens@de.ibm.com: use llgf for proper zero extension] Signed-off-by: Heiko Carstens Signed-off-by: Vasily Gorbik --- arch/s390/include/asm/vdso.h | 1 + arch/s390/kernel/asm-offsets.c | 2 +- arch/s390/kernel/time.c | 1 + arch/s390/kernel/vdso64/clock_getres.S | 10 +++++----- 4 files changed, 8 insertions(+), 6 deletions(-) (limited to 'arch') diff --git a/arch/s390/include/asm/vdso.h b/arch/s390/include/asm/vdso.h index 3bcfdeb01395..0cd085cdeb4f 100644 --- a/arch/s390/include/asm/vdso.h +++ b/arch/s390/include/asm/vdso.h @@ -36,6 +36,7 @@ struct vdso_data { __u32 tk_shift; /* Shift used for xtime_nsec 0x60 */ __u32 ts_dir; /* TOD steering direction 0x64 */ __u64 ts_end; /* TOD steering end 0x68 */ + __u32 hrtimer_res; /* hrtimer resolution 0x70 */ }; struct vdso_per_cpu_data { diff --git a/arch/s390/kernel/asm-offsets.c b/arch/s390/kernel/asm-offsets.c index 165031bd3370..5d8cc1864566 100644 --- a/arch/s390/kernel/asm-offsets.c +++ b/arch/s390/kernel/asm-offsets.c @@ -76,6 +76,7 @@ int main(void) OFFSET(__VDSO_TK_SHIFT, vdso_data, tk_shift); OFFSET(__VDSO_TS_DIR, vdso_data, ts_dir); OFFSET(__VDSO_TS_END, vdso_data, ts_end); + OFFSET(__VDSO_CLOCK_REALTIME_RES, vdso_data, hrtimer_res); OFFSET(__VDSO_ECTG_BASE, vdso_per_cpu_data, ectg_timer_base); OFFSET(__VDSO_ECTG_USER, vdso_per_cpu_data, ectg_user_time); OFFSET(__VDSO_GETCPU_VAL, vdso_per_cpu_data, getcpu_val); @@ -86,7 +87,6 @@ int main(void) DEFINE(__CLOCK_REALTIME_COARSE, CLOCK_REALTIME_COARSE); DEFINE(__CLOCK_MONOTONIC_COARSE, CLOCK_MONOTONIC_COARSE); DEFINE(__CLOCK_THREAD_CPUTIME_ID, CLOCK_THREAD_CPUTIME_ID); - DEFINE(__CLOCK_REALTIME_RES, MONOTONIC_RES_NSEC); DEFINE(__CLOCK_COARSE_RES, LOW_RES_NSEC); BLANK(); /* idle data offsets */ diff --git a/arch/s390/kernel/time.c b/arch/s390/kernel/time.c index f9d070d016e3..b1113b519432 100644 --- a/arch/s390/kernel/time.c +++ b/arch/s390/kernel/time.c @@ -301,6 +301,7 @@ void update_vsyscall(struct timekeeper *tk) vdso_data->tk_mult = tk->tkr_mono.mult; vdso_data->tk_shift = tk->tkr_mono.shift; + vdso_data->hrtimer_res = hrtimer_resolution; smp_wmb(); ++vdso_data->tb_update_count; } diff --git a/arch/s390/kernel/vdso64/clock_getres.S b/arch/s390/kernel/vdso64/clock_getres.S index 081435398e0a..0c79caa32b59 100644 --- a/arch/s390/kernel/vdso64/clock_getres.S +++ b/arch/s390/kernel/vdso64/clock_getres.S @@ -17,12 +17,14 @@ .type __kernel_clock_getres,@function __kernel_clock_getres: CFI_STARTPROC - larl %r1,4f + larl %r1,3f + lg %r0,0(%r1) cghi %r2,__CLOCK_REALTIME_COARSE je 0f cghi %r2,__CLOCK_MONOTONIC_COARSE je 0f - larl %r1,3f + larl %r1,_vdso_data + llgf %r0,__VDSO_CLOCK_REALTIME_RES(%r1) cghi %r2,__CLOCK_REALTIME je 0f cghi %r2,__CLOCK_MONOTONIC @@ -36,7 +38,6 @@ __kernel_clock_getres: jz 2f 0: ltgr %r3,%r3 jz 1f /* res == NULL */ - lg %r0,0(%r1) xc 0(8,%r3),0(%r3) /* set tp->tv_sec to zero */ stg %r0,8(%r3) /* store tp->tv_usec */ 1: lghi %r2,0 @@ -45,6 +46,5 @@ __kernel_clock_getres: svc 0 br %r14 CFI_ENDPROC -3: .quad __CLOCK_REALTIME_RES -4: .quad __CLOCK_COARSE_RES +3: .quad __CLOCK_COARSE_RES .size __kernel_clock_getres,.-__kernel_clock_getres -- cgit v1.2.3 From 64438e1bc0cdbe6d30bcdcb976f935eb3c297adc Mon Sep 17 00:00:00 2001 From: Heiko Carstens Date: Wed, 10 Jun 2020 10:36:05 +0200 Subject: s390/numa: let NODES_SHIFT depend on NEED_MULTIPLE_NODES Qian Cai reported: """ When NUMA=n and nr_node_ids=2, in apply_wqattrs_prepare(), it has, for_each_node(node) { if (wq_calc_node_cpumask(... where it will trigger a booting warning, WARNING: workqueue cpumask: online intersect > possible intersect because it found 2 nodes and wq_numa_possible_cpumask[1] is an empty cpumask. """ Let NODES_SHIFT depend on NEED_MULTIPLE_NODES like it is done on other architectures in order to fix this. Fixes: 701dc81e7412 ("s390/mm: remove fake numa support") Reported-by: Qian Cai Signed-off-by: Heiko Carstens Signed-off-by: Vasily Gorbik --- arch/s390/Kconfig | 1 + 1 file changed, 1 insertion(+) (limited to 'arch') diff --git a/arch/s390/Kconfig b/arch/s390/Kconfig index 194824932a60..c7d7ede6300c 100644 --- a/arch/s390/Kconfig +++ b/arch/s390/Kconfig @@ -462,6 +462,7 @@ config NUMA config NODES_SHIFT int + depends on NEED_MULTIPLE_NODES default "1" config SCHED_SMT -- cgit v1.2.3 From d7af722344e6dc52d87649100516515263e15c75 Mon Sep 17 00:00:00 2001 From: Drew Fustini Date: Tue, 9 Jun 2020 23:45:21 +0200 Subject: ARM: dts: am335x-pocketbeagle: Fix mmc0 Write Protect AM3358 pin mcasp0_aclkr (ZCZ ball B13) [0] is routed to P1.31 header [1] Mode 4 of this pin is mmc0_sdwp (SD Write Protect). A signal connected to P1.31 may accidentally trigger mmc0 write protection. To avoid this situation, do not put mcasp0_aclkr in mode 4 (mmc0_sdwp) by default. [0] http://www.ti.com/lit/ds/symlink/am3358.pdf [1] https://github.com/beagleboard/pocketbeagle/wiki/System-Reference-Manual#531_Expansion_Headers Fixes: 047905376a16 (ARM: dts: Add am335x-pocketbeagle) Signed-off-by: Robert Nelson Signed-off-by: Drew Fustini Signed-off-by: Tony Lindgren --- arch/arm/boot/dts/am335x-pocketbeagle.dts | 1 - 1 file changed, 1 deletion(-) (limited to 'arch') diff --git a/arch/arm/boot/dts/am335x-pocketbeagle.dts b/arch/arm/boot/dts/am335x-pocketbeagle.dts index 4da719098028..f0b222201b86 100644 --- a/arch/arm/boot/dts/am335x-pocketbeagle.dts +++ b/arch/arm/boot/dts/am335x-pocketbeagle.dts @@ -88,7 +88,6 @@ AM33XX_PADCONF(AM335X_PIN_MMC0_DAT3, PIN_INPUT_PULLUP, MUX_MODE0) AM33XX_PADCONF(AM335X_PIN_MMC0_CMD, PIN_INPUT_PULLUP, MUX_MODE0) AM33XX_PADCONF(AM335X_PIN_MMC0_CLK, PIN_INPUT_PULLUP, MUX_MODE0) - AM33XX_PADCONF(AM335X_PIN_MCASP0_ACLKR, PIN_INPUT, MUX_MODE4) /* (B12) mcasp0_aclkr.mmc0_sdwp */ >; }; -- cgit v1.2.3 From 9cf28e41f9f768791f54ee18333239fda6927ed8 Mon Sep 17 00:00:00 2001 From: Tony Lindgren Date: Fri, 12 Jun 2020 10:19:50 -0700 Subject: ARM: dts: Fix duovero smsc interrupt for suspend While testing the recent suspend and resume regressions I noticed that duovero can still end up losing edge gpio interrupts on runtime suspend. This causes NFSroot easily stopping working after resume on duovero. Let's fix the issue by using gpio level interrupts for smsc as then the gpio interrupt state is seen by the gpio controller on resume. Fixes: 731b409878a3 ("ARM: dts: Configure duovero for to allow core retention during idle") Signed-off-by: Tony Lindgren --- arch/arm/boot/dts/omap4-duovero-parlor.dts | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'arch') diff --git a/arch/arm/boot/dts/omap4-duovero-parlor.dts b/arch/arm/boot/dts/omap4-duovero-parlor.dts index 8047e8cdb3af..4548d87534e3 100644 --- a/arch/arm/boot/dts/omap4-duovero-parlor.dts +++ b/arch/arm/boot/dts/omap4-duovero-parlor.dts @@ -139,7 +139,7 @@ ethernet@gpmc { reg = <5 0 0xff>; interrupt-parent = <&gpio2>; - interrupts = <12 IRQ_TYPE_EDGE_FALLING>; /* gpio_44 */ + interrupts = <12 IRQ_TYPE_LEVEL_LOW>; /* gpio_44 */ phy-mode = "mii"; -- cgit v1.2.3 From c030688d4427658bc4e947111341f75d2cb3d526 Mon Sep 17 00:00:00 2001 From: Tony Lindgren Date: Fri, 12 Jun 2020 10:23:40 -0700 Subject: ARM: dts: Fix omap4 system timer source clocks I accidentally flipped the system timer to use system clock instead of the 32k source clock. Fixes: 14b1925a7219 ("ARM: dts: Configure system timers for omap4") Signed-off-by: Tony Lindgren --- arch/arm/boot/dts/omap4.dtsi | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'arch') diff --git a/arch/arm/boot/dts/omap4.dtsi b/arch/arm/boot/dts/omap4.dtsi index 6c2b07f0704d..4400f5f8e099 100644 --- a/arch/arm/boot/dts/omap4.dtsi +++ b/arch/arm/boot/dts/omap4.dtsi @@ -662,6 +662,6 @@ ti,no-idle; timer@0 { assigned-clocks = <&l4_wkup_clkctrl OMAP4_TIMER1_CLKCTRL 24>; - assigned-clock-parents = <&sys_clkin_ck>; + assigned-clock-parents = <&sys_32k_ck>; }; }; -- cgit v1.2.3 From 80bf72598663496d08b3c0231377db6a99d7fd68 Mon Sep 17 00:00:00 2001 From: Drew Fustini Date: Mon, 15 Jun 2020 17:57:01 +0200 Subject: ARM: dts: am5729: beaglebone-ai: fix rgmii phy-mode Since commit cd28d1d6e52e ("net: phy: at803x: Disable phy delay for RGMII mode") the networking is broken on the BeagleBone AI which has the AR8035 PHY for Gigabit Ethernet [0]. The fix is to switch from phy-mode = "rgmii" to phy-mode = "rgmii-rxid". Note: Grygorii made a similar DT fix for other AM57xx boards with a different phy in commit 820f8a870f65 ("ARM: dts: am57xx: fix networking on boards with ksz9031 phy"). [0] https://git.io/Jf7PX Fixes: 520557d4854b ("ARM: dts: am5729: beaglebone-ai: adding device tree") Cc: Vinod Koul Reviewed-by: Grygorii Strashko Signed-off-by: Robert Nelson Signed-off-by: Drew Fustini Signed-off-by: Tony Lindgren --- arch/arm/boot/dts/am5729-beagleboneai.dts | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'arch') diff --git a/arch/arm/boot/dts/am5729-beagleboneai.dts b/arch/arm/boot/dts/am5729-beagleboneai.dts index 9877d7709d41..4c51c6b05e64 100644 --- a/arch/arm/boot/dts/am5729-beagleboneai.dts +++ b/arch/arm/boot/dts/am5729-beagleboneai.dts @@ -505,7 +505,7 @@ &cpsw_emac0 { phy-handle = <&phy0>; - phy-mode = "rgmii"; + phy-mode = "rgmii-rxid"; }; &ocp { -- cgit v1.2.3 From 034aa9cd698e315c767af1bac3fd1ff8898d2cd7 Mon Sep 17 00:00:00 2001 From: Will Deacon Date: Mon, 15 Jun 2020 16:27:43 +0100 Subject: arm64: pgtable: Clear the GP bit for non-executable kernel pages Commit cca98e9f8b5e ("mm: enforce that vmap can't map pages executable") introduced 'pgprot_nx(prot)' for arm64 but collided silently with the BTI support during the merge window, which endeavours to clear the GP bit for non-executable kernel mappings in set_memory_nx(). For consistency between the two APIs, clear the GP bit in pgprot_nx(). Acked-by: Mark Rutland Reviewed-by: Mark Brown Link: https://lore.kernel.org/r/20200615154642.3579-1-will@kernel.org Signed-off-by: Will Deacon --- arch/arm64/include/asm/pgtable.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'arch') diff --git a/arch/arm64/include/asm/pgtable.h b/arch/arm64/include/asm/pgtable.h index 6dbd267ab931..758e2d1577d0 100644 --- a/arch/arm64/include/asm/pgtable.h +++ b/arch/arm64/include/asm/pgtable.h @@ -416,7 +416,7 @@ static inline pmd_t pmd_mkdevmap(pmd_t pmd) __pgprot((pgprot_val(prot) & ~(mask)) | (bits)) #define pgprot_nx(prot) \ - __pgprot_modify(prot, 0, PTE_PXN) + __pgprot_modify(prot, PTE_MAYBE_GP, PTE_PXN) /* * Mark the prot value as uncacheable and unbufferable. -- cgit v1.2.3 From e575fb9e76c8e33440fb859572a8b7d430f053d6 Mon Sep 17 00:00:00 2001 From: Will Deacon Date: Tue, 16 Jun 2020 18:29:11 +0100 Subject: arm64: sve: Fix build failure when ARM64_SVE=y and SYSCTL=n When I squashed the 'allnoconfig' compiler warning about the set_sve_default_vl() function being defined but not used in commit 1e570f512cbd ("arm64/sve: Eliminate data races on sve_default_vl"), I accidentally broke the build for configs where ARM64_SVE is enabled, but SYSCTL is not. Fix this by only compiling the SVE sysctl support if both CONFIG_SVE=y and CONFIG_SYSCTL=y. Cc: Dave Martin Reported-by: Qian Cai Link: https://lore.kernel.org/r/20200616131808.GA1040@lca.pw Signed-off-by: Will Deacon --- arch/arm64/kernel/fpsimd.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) (limited to 'arch') diff --git a/arch/arm64/kernel/fpsimd.c b/arch/arm64/kernel/fpsimd.c index d9eee9194511..55c8f3ec6705 100644 --- a/arch/arm64/kernel/fpsimd.c +++ b/arch/arm64/kernel/fpsimd.c @@ -349,7 +349,7 @@ static unsigned int find_supported_vector_length(unsigned int vl) return sve_vl_from_vq(__bit_to_vq(bit)); } -#ifdef CONFIG_SYSCTL +#if defined(CONFIG_ARM64_SVE) && defined(CONFIG_SYSCTL) static int sve_proc_do_default_vl(struct ctl_table *table, int write, void *buffer, size_t *lenp, loff_t *ppos) @@ -394,9 +394,9 @@ static int __init sve_sysctl_init(void) return 0; } -#else /* ! CONFIG_SYSCTL */ +#else /* ! (CONFIG_ARM64_SVE && CONFIG_SYSCTL) */ static int __init sve_sysctl_init(void) { return 0; } -#endif /* ! CONFIG_SYSCTL */ +#endif /* ! (CONFIG_ARM64_SVE && CONFIG_SYSCTL) */ #define ZREG(sve_state, vq, n) ((char *)(sve_state) + \ (SVE_SIG_ZREG_OFFSET(vq, n) - SVE_SIG_REGS_OFFSET)) -- cgit v1.2.3 From ff58155ca4fa7e931f34d948fa09fe14c6a66116 Mon Sep 17 00:00:00 2001 From: Arvind Sankar Date: Tue, 16 Jun 2020 18:25:47 -0400 Subject: x86/purgatory: Add -fno-stack-protector The purgatory Makefile removes -fstack-protector options if they were configured in, but does not currently add -fno-stack-protector. If gcc was configured with the --enable-default-ssp configure option, this results in the stack protector still being enabled for the purgatory (absent distro-specific specs files that might disable it again for freestanding compilations), if the main kernel is being compiled with stack protection enabled (if it's disabled for the main kernel, the top-level Makefile will add -fno-stack-protector). This will break the build since commit e4160b2e4b02 ("x86/purgatory: Fail the build if purgatory.ro has missing symbols") and prior to that would have caused runtime failure when trying to use kexec. Explicitly add -fno-stack-protector to avoid this, as done in other Makefiles that need to disable the stack protector. Reported-by: Gabriel C Signed-off-by: Arvind Sankar Signed-off-by: Linus Torvalds --- arch/x86/purgatory/Makefile | 1 + 1 file changed, 1 insertion(+) (limited to 'arch') diff --git a/arch/x86/purgatory/Makefile b/arch/x86/purgatory/Makefile index b04e6e72a592..088bd764e0b7 100644 --- a/arch/x86/purgatory/Makefile +++ b/arch/x86/purgatory/Makefile @@ -34,6 +34,7 @@ KCOV_INSTRUMENT := n PURGATORY_CFLAGS_REMOVE := -mcmodel=kernel PURGATORY_CFLAGS := -mcmodel=large -ffreestanding -fno-zero-initialized-in-bss PURGATORY_CFLAGS += $(DISABLE_STACKLEAK_PLUGIN) -DDISABLE_BRANCH_PROFILING +PURGATORY_CFLAGS += $(call cc-option,-fno-stack-protector) # Default KBUILD_CFLAGS can have -pg option set when FTRACE is enabled. That # in turn leaves some undefined symbols like __fentry__ in purgatory and not -- cgit v1.2.3 From 9b38cc704e844e41d9cf74e647bff1d249512cb3 Mon Sep 17 00:00:00 2001 From: Jiri Olsa Date: Tue, 12 May 2020 17:03:18 +0900 Subject: kretprobe: Prevent triggering kretprobe from within kprobe_flush_task Ziqian reported lockup when adding retprobe on _raw_spin_lock_irqsave. My test was also able to trigger lockdep output: ============================================ WARNING: possible recursive locking detected 5.6.0-rc6+ #6 Not tainted -------------------------------------------- sched-messaging/2767 is trying to acquire lock: ffffffff9a492798 (&(kretprobe_table_locks[i].lock)){-.-.}, at: kretprobe_hash_lock+0x52/0xa0 but task is already holding lock: ffffffff9a491a18 (&(kretprobe_table_locks[i].lock)){-.-.}, at: kretprobe_trampoline+0x0/0x50 other info that might help us debug this: Possible unsafe locking scenario: CPU0 ---- lock(&(kretprobe_table_locks[i].lock)); lock(&(kretprobe_table_locks[i].lock)); *** DEADLOCK *** May be due to missing lock nesting notation 1 lock held by sched-messaging/2767: #0: ffffffff9a491a18 (&(kretprobe_table_locks[i].lock)){-.-.}, at: kretprobe_trampoline+0x0/0x50 stack backtrace: CPU: 3 PID: 2767 Comm: sched-messaging Not tainted 5.6.0-rc6+ #6 Call Trace: dump_stack+0x96/0xe0 __lock_acquire.cold.57+0x173/0x2b7 ? native_queued_spin_lock_slowpath+0x42b/0x9e0 ? lockdep_hardirqs_on+0x590/0x590 ? __lock_acquire+0xf63/0x4030 lock_acquire+0x15a/0x3d0 ? kretprobe_hash_lock+0x52/0xa0 _raw_spin_lock_irqsave+0x36/0x70 ? kretprobe_hash_lock+0x52/0xa0 kretprobe_hash_lock+0x52/0xa0 trampoline_handler+0xf8/0x940 ? kprobe_fault_handler+0x380/0x380 ? find_held_lock+0x3a/0x1c0 kretprobe_trampoline+0x25/0x50 ? lock_acquired+0x392/0xbc0 ? _raw_spin_lock_irqsave+0x50/0x70 ? __get_valid_kprobe+0x1f0/0x1f0 ? _raw_spin_unlock_irqrestore+0x3b/0x40 ? finish_task_switch+0x4b9/0x6d0 ? __switch_to_asm+0x34/0x70 ? __switch_to_asm+0x40/0x70 The code within the kretprobe handler checks for probe reentrancy, so we won't trigger any _raw_spin_lock_irqsave probe in there. The problem is in outside kprobe_flush_task, where we call: kprobe_flush_task kretprobe_table_lock raw_spin_lock_irqsave _raw_spin_lock_irqsave where _raw_spin_lock_irqsave triggers the kretprobe and installs kretprobe_trampoline handler on _raw_spin_lock_irqsave return. The kretprobe_trampoline handler is then executed with already locked kretprobe_table_locks, and first thing it does is to lock kretprobe_table_locks ;-) the whole lockup path like: kprobe_flush_task kretprobe_table_lock raw_spin_lock_irqsave _raw_spin_lock_irqsave ---> probe triggered, kretprobe_trampoline installed ---> kretprobe_table_locks locked kretprobe_trampoline trampoline_handler kretprobe_hash_lock(current, &head, &flags); <--- deadlock Adding kprobe_busy_begin/end helpers that mark code with fake probe installed to prevent triggering of another kprobe within this code. Using these helpers in kprobe_flush_task, so the probe recursion protection check is hit and the probe is never set to prevent above lockup. Link: http://lkml.kernel.org/r/158927059835.27680.7011202830041561604.stgit@devnote2 Fixes: ef53d9c5e4da ("kprobes: improve kretprobe scalability with hashed locking") Cc: Ingo Molnar Cc: "Gustavo A . R . Silva" Cc: Anders Roxell Cc: "Naveen N . Rao" Cc: Anil S Keshavamurthy Cc: David Miller Cc: Ingo Molnar Cc: Peter Zijlstra Cc: stable@vger.kernel.org Reported-by: "Ziqian SUN (Zamir)" Acked-by: Masami Hiramatsu Signed-off-by: Jiri Olsa Signed-off-by: Steven Rostedt (VMware) --- arch/x86/kernel/kprobes/core.c | 16 +++------------- 1 file changed, 3 insertions(+), 13 deletions(-) (limited to 'arch') diff --git a/arch/x86/kernel/kprobes/core.c b/arch/x86/kernel/kprobes/core.c index 3bafe1bd4dc7..8a5ec10e95dc 100644 --- a/arch/x86/kernel/kprobes/core.c +++ b/arch/x86/kernel/kprobes/core.c @@ -753,16 +753,11 @@ asm( NOKPROBE_SYMBOL(kretprobe_trampoline); STACK_FRAME_NON_STANDARD(kretprobe_trampoline); -static struct kprobe kretprobe_kprobe = { - .addr = (void *)kretprobe_trampoline, -}; - /* * Called from kretprobe_trampoline */ __used __visible void *trampoline_handler(struct pt_regs *regs) { - struct kprobe_ctlblk *kcb; struct kretprobe_instance *ri = NULL; struct hlist_head *head, empty_rp; struct hlist_node *tmp; @@ -772,16 +767,12 @@ __used __visible void *trampoline_handler(struct pt_regs *regs) void *frame_pointer; bool skipped = false; - preempt_disable(); - /* * Set a dummy kprobe for avoiding kretprobe recursion. * Since kretprobe never run in kprobe handler, kprobe must not * be running at this point. */ - kcb = get_kprobe_ctlblk(); - __this_cpu_write(current_kprobe, &kretprobe_kprobe); - kcb->kprobe_status = KPROBE_HIT_ACTIVE; + kprobe_busy_begin(); INIT_HLIST_HEAD(&empty_rp); kretprobe_hash_lock(current, &head, &flags); @@ -857,7 +848,7 @@ __used __visible void *trampoline_handler(struct pt_regs *regs) __this_cpu_write(current_kprobe, &ri->rp->kp); ri->ret_addr = correct_ret_addr; ri->rp->handler(ri, regs); - __this_cpu_write(current_kprobe, &kretprobe_kprobe); + __this_cpu_write(current_kprobe, &kprobe_busy); } recycle_rp_inst(ri, &empty_rp); @@ -873,8 +864,7 @@ __used __visible void *trampoline_handler(struct pt_regs *regs) kretprobe_hash_unlock(current, &flags); - __this_cpu_write(current_kprobe, NULL); - preempt_enable(); + kprobe_busy_end(); hlist_for_each_entry_safe(ri, tmp, &empty_rp, hlist) { hlist_del(&ri->hlist); -- cgit v1.2.3 From 4d0831e8a029c03f49f434f28b8faef9f0bd403f Mon Sep 17 00:00:00 2001 From: Masahiro Yamada Date: Sun, 14 Jun 2020 23:43:41 +0900 Subject: kconfig: unify cc-option and as-option cc-option and as-option are almost the same; both pass the flag to $(CC). The main difference is the cc-option stops before the assemble stage (-S option) whereas as-option stops after (-c option). I chose -S because it is slightly faster, but $(cc-option,-gz=zlib) returns a wrong result (https://lkml.org/lkml/2020/6/9/1529). It has been fixed by commit 7b16994437c7 ("Makefile: Improve compressed debug info support detection"), but the assembler should always be invoked for more reliable compiler option tests. However, you cannot simply replace -S with -c because the following code in lib/Kconfig.debug would break: depends on $(cc-option,-gsplit-dwarf) The combination of -c and -gsplit-dwarf does not accept /dev/null as output. $ cat /dev/null | gcc -gsplit-dwarf -S -x c - -o /dev/null $ echo $? 0 $ cat /dev/null | gcc -gsplit-dwarf -c -x c - -o /dev/null objcopy: Warning: '/dev/null' is not an ordinary file $ echo $? 1 $ cat /dev/null | gcc -gsplit-dwarf -c -x c - -o tmp.o $ echo $? 0 There is another flag that creates an separate file based on the object file path: $ cat /dev/null | gcc -ftest-coverage -c -x c - -o /dev/null :1: error: cannot open /dev/null.gcno So, we cannot use /dev/null to sink the output. Align the cc-option implementation with scripts/Kbuild.include. With -c option used in cc-option, as-option is unneeded. Signed-off-by: Masahiro Yamada Acked-by: Will Deacon --- arch/arm64/Kconfig | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'arch') diff --git a/arch/arm64/Kconfig b/arch/arm64/Kconfig index 31380da53689..6eb18f45258e 100644 --- a/arch/arm64/Kconfig +++ b/arch/arm64/Kconfig @@ -1564,7 +1564,7 @@ config CC_HAS_SIGN_RETURN_ADDRESS def_bool $(cc-option,-msign-return-address=all) config AS_HAS_PAC - def_bool $(as-option,-Wa$(comma)-march=armv8.3-a) + def_bool $(cc-option,-Wa$(comma)-march=armv8.3-a) config AS_HAS_CFI_NEGATE_RA_STATE def_bool $(as-instr,.cfi_startproc\n.cfi_negate_ra_state\n.cfi_endproc\n) -- cgit v1.2.3 From cc5277fe66cf3ad68f41f1c539b2ef0d5e432974 Mon Sep 17 00:00:00 2001 From: Dan Carpenter Date: Tue, 2 Jun 2020 22:36:11 +0300 Subject: x86/resctrl: Fix a NULL vs IS_ERR() static checker warning in rdt_cdp_peer_get() The callers don't expect *d_cdp to be set to an error pointer, they only check for NULL. This leads to a static checker warning: arch/x86/kernel/cpu/resctrl/rdtgroup.c:2648 __init_one_rdt_domain() warn: 'd_cdp' could be an error pointer This would not trigger a bug in this specific case because __init_one_rdt_domain() calls it with a valid domain that would not have a negative id and thus not trigger the return of the ERR_PTR(). If this was a negative domain id then the call to rdt_find_domain() in domain_add_cpu() would have returned the ERR_PTR() much earlier and the creation of the domain with an invalid id would have been prevented. Even though a bug is not triggered currently the right and safe thing to do is to set the pointer to NULL because that is what can be checked for when the caller is handling the CDP and non-CDP cases. Fixes: 52eb74339a62 ("x86/resctrl: Fix rdt_find_domain() return value and checks") Signed-off-by: Dan Carpenter Signed-off-by: Borislav Petkov Acked-by: Reinette Chatre Acked-by: Fenghua Yu Link: https://lkml.kernel.org/r/20200602193611.GA190851@mwanda --- arch/x86/kernel/cpu/resctrl/rdtgroup.c | 1 + 1 file changed, 1 insertion(+) (limited to 'arch') diff --git a/arch/x86/kernel/cpu/resctrl/rdtgroup.c b/arch/x86/kernel/cpu/resctrl/rdtgroup.c index 23b4b61319d3..3f844f14fc0a 100644 --- a/arch/x86/kernel/cpu/resctrl/rdtgroup.c +++ b/arch/x86/kernel/cpu/resctrl/rdtgroup.c @@ -1117,6 +1117,7 @@ static int rdt_cdp_peer_get(struct rdt_resource *r, struct rdt_domain *d, _d_cdp = rdt_find_domain(_r_cdp, d->id, NULL); if (WARN_ON(IS_ERR_OR_NULL(_d_cdp))) { _r_cdp = NULL; + _d_cdp = NULL; ret = -EINVAL; } -- cgit v1.2.3 From b9249cba25a5dce5de87e5404503a5e11832c2dd Mon Sep 17 00:00:00 2001 From: Will Deacon Date: Tue, 16 Jun 2020 19:03:49 +0100 Subject: arm64: bti: Require clang >= 10.0.1 for in-kernel BTI support Unfortunately, most versions of clang that support BTI are capable of miscompiling the kernel when converting a switch statement into a jump table. As an example, attempting to spawn a KVM guest results in a panic: [ 56.253312] Kernel panic - not syncing: bad mode [ 56.253834] CPU: 0 PID: 279 Comm: lkvm Not tainted 5.8.0-rc1 #2 [ 56.254225] Hardware name: QEMU QEMU Virtual Machine, BIOS 0.0.0 02/06/2015 [ 56.254712] Call trace: [ 56.254952] dump_backtrace+0x0/0x1d4 [ 56.255305] show_stack+0x1c/0x28 [ 56.255647] dump_stack+0xc4/0x128 [ 56.255905] panic+0x16c/0x35c [ 56.256146] bad_el0_sync+0x0/0x58 [ 56.256403] el1_sync_handler+0xb4/0xe0 [ 56.256674] el1_sync+0x7c/0x100 [ 56.256928] kvm_vm_ioctl_check_extension_generic+0x74/0x98 [ 56.257286] __arm64_sys_ioctl+0x94/0xcc [ 56.257569] el0_svc_common+0x9c/0x150 [ 56.257836] do_el0_svc+0x84/0x90 [ 56.258083] el0_sync_handler+0xf8/0x298 [ 56.258361] el0_sync+0x158/0x180 This is because the switch in kvm_vm_ioctl_check_extension_generic() is executed as an indirect branch to tail-call through a jump table: ffff800010032dc8: 3869694c ldrb w12, [x10, x9] ffff800010032dcc: 8b0c096b add x11, x11, x12, lsl #2 ffff800010032dd0: d61f0160 br x11 However, where the target case uses the stack, the landing pad is elided due to the presence of a paciasp instruction: ffff800010032e14: d503233f paciasp ffff800010032e18: a9bf7bfd stp x29, x30, [sp, #-16]! ffff800010032e1c: 910003fd mov x29, sp ffff800010032e20: aa0803e0 mov x0, x8 ffff800010032e24: 940017c0 bl ffff800010038d24 ffff800010032e28: 93407c00 sxtw x0, w0 ffff800010032e2c: a8c17bfd ldp x29, x30, [sp], #16 ffff800010032e30: d50323bf autiasp ffff800010032e34: d65f03c0 ret Unfortunately, this results in a fatal exception because paciasp is compatible only with branch-and-link (call) instructions and not simple indirect branches. A fix is being merged into Clang 10.0.1 so that a 'bti j' instruction is emitted as an explicit landing pad in this situation. Make in-kernel BTI depend on that compiler version when building with clang. Cc: Tom Stellard Cc: Daniel Kiss Reviewed-by: Mark Brown Acked-by: Dave Martin Reviewed-by: Nathan Chancellor Acked-by: Nick Desaulniers Link: https://lore.kernel.org/r/20200615105524.GA2694@willie-the-truck Link: https://lore.kernel.org/r/20200616183630.2445-1-will@kernel.org Signed-off-by: Will Deacon --- arch/arm64/Kconfig | 2 ++ 1 file changed, 2 insertions(+) (limited to 'arch') diff --git a/arch/arm64/Kconfig b/arch/arm64/Kconfig index 31380da53689..4ae2419c14a8 100644 --- a/arch/arm64/Kconfig +++ b/arch/arm64/Kconfig @@ -1630,6 +1630,8 @@ config ARM64_BTI_KERNEL depends on CC_HAS_BRANCH_PROT_PAC_RET_BTI # https://gcc.gnu.org/bugzilla/show_bug.cgi?id=94697 depends on !CC_IS_GCC || GCC_VERSION >= 100100 + # https://reviews.llvm.org/rGb8ae3fdfa579dbf366b1bb1cbfdbf8c51db7fa55 + depends on !CC_IS_CLANG || CLANG_VERSION >= 100001 depends on !(CC_IS_CLANG && GCOV_KERNEL) depends on (!FUNCTION_GRAPH_TRACER || DYNAMIC_FTRACE_WITH_REGS) help -- cgit v1.2.3 From 687993ccf3b05070598b89fad97410b26d7bc9d2 Mon Sep 17 00:00:00 2001 From: Mike Rapoport Date: Mon, 15 Jun 2020 12:22:29 +0300 Subject: powerpc/8xx: use pmd_off() to access a PMD entry in pte_update() The pte_update() implementation for PPC_8xx unfolds page table from the PGD level to access a PMD entry. Since 8xx has only 2-level page table this can be simplified with pmd_off() shortcut. Replace explicit unfolding with pmd_off() and drop defines of pgd_index() and pgd_offset() that are no longer needed. Signed-off-by: Mike Rapoport Reviewed-by: Christophe Leroy Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/20200615092229.23142-1-rppt@kernel.org --- arch/powerpc/include/asm/nohash/32/pgtable.h | 8 +++----- 1 file changed, 3 insertions(+), 5 deletions(-) (limited to 'arch') diff --git a/arch/powerpc/include/asm/nohash/32/pgtable.h b/arch/powerpc/include/asm/nohash/32/pgtable.h index b56f14160ae5..5a590ceaec14 100644 --- a/arch/powerpc/include/asm/nohash/32/pgtable.h +++ b/arch/powerpc/include/asm/nohash/32/pgtable.h @@ -205,10 +205,6 @@ static inline void pmd_clear(pmd_t *pmdp) *pmdp = __pmd(0); } -/* to find an entry in a page-table-directory */ -#define pgd_index(address) ((address) >> PGDIR_SHIFT) -#define pgd_offset(mm, address) ((mm)->pgd + pgd_index(address)) - /* * PTE updates. This function is called whenever an existing * valid PTE is updated. This does -not- include set_pte_at() @@ -230,6 +226,8 @@ static inline void pmd_clear(pmd_t *pmdp) * For other page sizes, we have a single entry in the table. */ #ifdef CONFIG_PPC_8xx +static pmd_t *pmd_off(struct mm_struct *mm, unsigned long addr); + static inline pte_basic_t pte_update(struct mm_struct *mm, unsigned long addr, pte_t *p, unsigned long clr, unsigned long set, int huge) { @@ -237,7 +235,7 @@ static inline pte_basic_t pte_update(struct mm_struct *mm, unsigned long addr, p pte_basic_t old = pte_val(*p); pte_basic_t new = (old & ~(pte_basic_t)clr) | set; int num, i; - pmd_t *pmd = pmd_offset(pud_offset(p4d_offset(pgd_offset(mm, addr), addr), addr), addr); + pmd_t *pmd = pmd_off(mm, addr); if (!huge) num = PAGE_SIZE / SZ_4K; -- cgit v1.2.3 From 1497eea68624f6076bf3eaf66baec3771ea04045 Mon Sep 17 00:00:00 2001 From: Michael Ellerman Date: Tue, 16 Jun 2020 23:56:16 +1000 Subject: powerpc/syscalls: Use the number when building SPU syscall table Currently the macro that inserts entries into the SPU syscall table doesn't actually use the "nr" (syscall number) parameter. This does work, but it relies on the exact right number of syscall entries being emitted in order for the syscal numbers to line up with the array entries. If for example we had two entries with the same syscall number we wouldn't get an error, it would just cause all subsequent syscalls to be off by one in the spu_syscall_table. So instead change the macro to assign to the specific entry of the array, meaning any numbering overlap will be caught by the compiler. Signed-off-by: Michael Ellerman Acked-by: Arnd Bergmann Link: https://lore.kernel.org/r/20200616135617.2937252-1-mpe@ellerman.id.au --- arch/powerpc/platforms/cell/spu_callbacks.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'arch') diff --git a/arch/powerpc/platforms/cell/spu_callbacks.c b/arch/powerpc/platforms/cell/spu_callbacks.c index cbee3666da07..abdef9bcf432 100644 --- a/arch/powerpc/platforms/cell/spu_callbacks.c +++ b/arch/powerpc/platforms/cell/spu_callbacks.c @@ -35,7 +35,7 @@ */ static void *spu_syscall_table[] = { -#define __SYSCALL(nr, entry) entry, +#define __SYSCALL(nr, entry) [nr] = entry, #include #undef __SYSCALL }; -- cgit v1.2.3 From 41d90b0c1108d1e46c48cf79964636c553844f4c Mon Sep 17 00:00:00 2001 From: Arvind Sankar Date: Wed, 17 Jun 2020 09:19:57 -0400 Subject: efi/x86: Setup stack correctly for efi_pe_entry Commit 17054f492dfd ("efi/x86: Implement mixed mode boot without the handover protocol") introduced a new entry point for the EFI stub to be booted in mixed mode on 32-bit firmware. When entered via efi32_pe_entry, control is first transferred to startup_32 to setup for the switch to long mode, and then the EFI stub proper is entered via efi_pe_entry. efi_pe_entry is an MS ABI function, and the ABI requires 32 bytes of shadow stack space to be allocated by the caller, as well as the stack being aligned to 8 mod 16 on entry. Allocate 40 bytes on the stack before switching to 64-bit mode when calling efi_pe_entry to account for this. For robustness, explicitly align boot_stack_end to 16 bytes. It is currently implicitly aligned since .bss is cacheline-size aligned, head_64.o is the first object file with a .bss section, and the heap and boot sizes are aligned. Fixes: 17054f492dfd ("efi/x86: Implement mixed mode boot without the handover protocol") Signed-off-by: Arvind Sankar Link: https://lore.kernel.org/r/20200617131957.2507632-1-nivedita@alum.mit.edu Signed-off-by: Ard Biesheuvel --- arch/x86/boot/compressed/head_64.S | 11 ++++++++++- 1 file changed, 10 insertions(+), 1 deletion(-) (limited to 'arch') diff --git a/arch/x86/boot/compressed/head_64.S b/arch/x86/boot/compressed/head_64.S index e821a7d7d5c4..97d37f0a34f5 100644 --- a/arch/x86/boot/compressed/head_64.S +++ b/arch/x86/boot/compressed/head_64.S @@ -213,7 +213,6 @@ SYM_FUNC_START(startup_32) * We place all of the values on our mini stack so lret can * used to perform that far jump. */ - pushl $__KERNEL_CS leal startup_64(%ebp), %eax #ifdef CONFIG_EFI_MIXED movl efi32_boot_args(%ebp), %edi @@ -224,11 +223,20 @@ SYM_FUNC_START(startup_32) movl efi32_boot_args+8(%ebp), %edx // saved bootparams pointer cmpl $0, %edx jnz 1f + /* + * efi_pe_entry uses MS calling convention, which requires 32 bytes of + * shadow space on the stack even if all arguments are passed in + * registers. We also need an additional 8 bytes for the space that + * would be occupied by the return address, and this also results in + * the correct stack alignment for entry. + */ + subl $40, %esp leal efi_pe_entry(%ebp), %eax movl %edi, %ecx // MS calling convention movl %esi, %edx 1: #endif + pushl $__KERNEL_CS pushl %eax /* Enter paged protected Mode, activating Long Mode */ @@ -784,6 +792,7 @@ SYM_DATA_LOCAL(boot_heap, .fill BOOT_HEAP_SIZE, 1, 0) SYM_DATA_START_LOCAL(boot_stack) .fill BOOT_STACK_SIZE, 1, 0 + .balign 16 SYM_DATA_END_LABEL(boot_stack, SYM_L_LOCAL, boot_stack_end) /* -- cgit v1.2.3 From 2a55280a3675203496d302463b941834228b9875 Mon Sep 17 00:00:00 2001 From: Ard Biesheuvel Date: Sun, 7 Jun 2020 15:41:35 +0200 Subject: efi/libstub: arm: Print CPU boot mode and MMU state at boot On 32-bit ARM, we may boot at HYP mode, or with the MMU and caches off (or both), even though the EFI spec does not actually support this. While booting at HYP mode is something we might tolerate, fiddling with the caches is a more serious issue, as disabling the caches is tricky to do safely from C code, and running without the Dcache makes it impossible to support unaligned memory accesses, which is another explicit requirement imposed by the EFI spec. So take note of the CPU mode and MMU state in the EFI stub diagnostic output so that we can easily diagnose any issues that may arise from this. E.g., EFI stub: Entering in SVC mode with MMU enabled Also, capture the CPSR and SCTLR system register values at EFI stub entry, and after ExitBootServices() returns, and check whether the MMU and Dcache were disabled at any point. If this is the case, a diagnostic message like the following will be emitted: efi: [Firmware Bug]: EFI stub was entered with MMU and Dcache disabled, please fix your firmware! efi: CPSR at EFI stub entry : 0x600001d3 efi: SCTLR at EFI stub entry : 0x00c51838 efi: CPSR after ExitBootServices() : 0x600001d3 efi: SCTLR after ExitBootServices(): 0x00c50838 Signed-off-by: Ard Biesheuvel Reviewed-by: Leif Lindholm --- arch/arm/include/asm/efi.h | 7 +++++++ 1 file changed, 7 insertions(+) (limited to 'arch') diff --git a/arch/arm/include/asm/efi.h b/arch/arm/include/asm/efi.h index 84dc0ba822f5..5dcf3c6011b7 100644 --- a/arch/arm/include/asm/efi.h +++ b/arch/arm/include/asm/efi.h @@ -87,4 +87,11 @@ static inline unsigned long efi_get_max_initrd_addr(unsigned long dram_base, return dram_base + SZ_512M; } +struct efi_arm_entry_state { + u32 cpsr_before_ebs; + u32 sctlr_before_ebs; + u32 cpsr_after_ebs; + u32 sctlr_after_ebs; +}; + #endif /* _ASM_ARM_EFI_H */ -- cgit v1.2.3 From fe557319aa06c23cffc9346000f119547e0f289a Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Wed, 17 Jun 2020 09:37:53 +0200 Subject: maccess: rename probe_kernel_{read,write} to copy_{from,to}_kernel_nofault Better describe what these functions do. Suggested-by: Linus Torvalds Signed-off-by: Christoph Hellwig Signed-off-by: Linus Torvalds --- arch/arm/kernel/ftrace.c | 3 ++- arch/arm/kernel/kgdb.c | 2 +- arch/arm64/kernel/insn.c | 4 ++-- arch/csky/kernel/ftrace.c | 5 +++-- arch/ia64/kernel/ftrace.c | 6 +++--- arch/mips/kernel/kprobes.c | 6 +++--- arch/nds32/kernel/ftrace.c | 5 +++-- arch/parisc/kernel/ftrace.c | 2 +- arch/parisc/kernel/kgdb.c | 4 ++-- arch/parisc/lib/memcpy.c | 2 +- arch/powerpc/kernel/module_64.c | 6 ++++-- arch/powerpc/kernel/trace/ftrace.c | 4 ++-- arch/powerpc/lib/inst.c | 6 +++--- arch/powerpc/perf/core-book3s.c | 3 ++- arch/riscv/kernel/ftrace.c | 3 ++- arch/riscv/kernel/kgdb.c | 4 ++-- arch/riscv/kernel/patch.c | 4 ++-- arch/s390/kernel/ftrace.c | 4 ++-- arch/sh/kernel/ftrace.c | 6 +++--- arch/um/kernel/maccess.c | 2 +- arch/x86/include/asm/ptrace.h | 4 ++-- arch/x86/kernel/dumpstack.c | 2 +- arch/x86/kernel/ftrace.c | 10 +++++----- arch/x86/kernel/kgdb.c | 6 +++--- arch/x86/kernel/kprobes/core.c | 5 +++-- arch/x86/kernel/kprobes/opt.c | 2 +- arch/x86/kernel/traps.c | 3 ++- arch/x86/mm/fault.c | 2 +- arch/x86/mm/init_32.c | 2 +- arch/x86/mm/maccess.c | 4 ++-- arch/x86/xen/enlighten_pv.c | 2 +- 31 files changed, 66 insertions(+), 57 deletions(-) (limited to 'arch') diff --git a/arch/arm/kernel/ftrace.c b/arch/arm/kernel/ftrace.c index 10499d44964a..9a79ef6b1876 100644 --- a/arch/arm/kernel/ftrace.c +++ b/arch/arm/kernel/ftrace.c @@ -84,7 +84,8 @@ static int ftrace_modify_code(unsigned long pc, unsigned long old, old = __opcode_to_mem_arm(old); if (validate) { - if (probe_kernel_read(&replaced, (void *)pc, MCOUNT_INSN_SIZE)) + if (copy_from_kernel_nofault(&replaced, (void *)pc, + MCOUNT_INSN_SIZE)) return -EFAULT; if (replaced != old) diff --git a/arch/arm/kernel/kgdb.c b/arch/arm/kernel/kgdb.c index 6a95b9296640..7bd30c0a4280 100644 --- a/arch/arm/kernel/kgdb.c +++ b/arch/arm/kernel/kgdb.c @@ -236,7 +236,7 @@ int kgdb_arch_set_breakpoint(struct kgdb_bkpt *bpt) /* patch_text() only supports int-sized breakpoints */ BUILD_BUG_ON(sizeof(int) != BREAK_INSTR_SIZE); - err = probe_kernel_read(bpt->saved_instr, (char *)bpt->bpt_addr, + err = copy_from_kernel_nofault(bpt->saved_instr, (char *)bpt->bpt_addr, BREAK_INSTR_SIZE); if (err) return err; diff --git a/arch/arm64/kernel/insn.c b/arch/arm64/kernel/insn.c index 684d871ae38d..a107375005bc 100644 --- a/arch/arm64/kernel/insn.c +++ b/arch/arm64/kernel/insn.c @@ -135,7 +135,7 @@ int __kprobes aarch64_insn_read(void *addr, u32 *insnp) int ret; __le32 val; - ret = probe_kernel_read(&val, addr, AARCH64_INSN_SIZE); + ret = copy_from_kernel_nofault(&val, addr, AARCH64_INSN_SIZE); if (!ret) *insnp = le32_to_cpu(val); @@ -151,7 +151,7 @@ static int __kprobes __aarch64_insn_write(void *addr, __le32 insn) raw_spin_lock_irqsave(&patch_lock, flags); waddr = patch_map(addr, FIX_TEXT_POKE0); - ret = probe_kernel_write(waddr, &insn, AARCH64_INSN_SIZE); + ret = copy_to_kernel_nofault(waddr, &insn, AARCH64_INSN_SIZE); patch_unmap(FIX_TEXT_POKE0); raw_spin_unlock_irqrestore(&patch_lock, flags); diff --git a/arch/csky/kernel/ftrace.c b/arch/csky/kernel/ftrace.c index 3c425b84e3be..b4a7ec1517ff 100644 --- a/arch/csky/kernel/ftrace.c +++ b/arch/csky/kernel/ftrace.c @@ -72,7 +72,8 @@ static int ftrace_check_current_nop(unsigned long hook) uint16_t olds[7]; unsigned long hook_pos = hook - 2; - if (probe_kernel_read((void *)olds, (void *)hook_pos, sizeof(nops))) + if (copy_from_kernel_nofault((void *)olds, (void *)hook_pos, + sizeof(nops))) return -EFAULT; if (memcmp((void *)nops, (void *)olds, sizeof(nops))) { @@ -97,7 +98,7 @@ static int ftrace_modify_code(unsigned long hook, unsigned long target, make_jbsr(target, hook, call, nolr); - ret = probe_kernel_write((void *)hook_pos, enable ? call : nops, + ret = copy_to_kernel_nofault((void *)hook_pos, enable ? call : nops, sizeof(nops)); if (ret) return -EPERM; diff --git a/arch/ia64/kernel/ftrace.c b/arch/ia64/kernel/ftrace.c index cee411e647ca..b2ab2d58fb30 100644 --- a/arch/ia64/kernel/ftrace.c +++ b/arch/ia64/kernel/ftrace.c @@ -108,7 +108,7 @@ ftrace_modify_code(unsigned long ip, unsigned char *old_code, goto skip_check; /* read the text we want to modify */ - if (probe_kernel_read(replaced, (void *)ip, MCOUNT_INSN_SIZE)) + if (copy_from_kernel_nofault(replaced, (void *)ip, MCOUNT_INSN_SIZE)) return -EFAULT; /* Make sure it is what we expect it to be */ @@ -117,7 +117,7 @@ ftrace_modify_code(unsigned long ip, unsigned char *old_code, skip_check: /* replace the text with the new text */ - if (probe_kernel_write(((void *)ip), new_code, MCOUNT_INSN_SIZE)) + if (copy_to_kernel_nofault(((void *)ip), new_code, MCOUNT_INSN_SIZE)) return -EPERM; flush_icache_range(ip, ip + MCOUNT_INSN_SIZE); @@ -129,7 +129,7 @@ static int ftrace_make_nop_check(struct dyn_ftrace *rec, unsigned long addr) unsigned char __attribute__((aligned(8))) replaced[MCOUNT_INSN_SIZE]; unsigned long ip = rec->ip; - if (probe_kernel_read(replaced, (void *)ip, MCOUNT_INSN_SIZE)) + if (copy_from_kernel_nofault(replaced, (void *)ip, MCOUNT_INSN_SIZE)) return -EFAULT; if (rec->flags & FTRACE_FL_CONVERTED) { struct ftrace_call_insn *call_insn, *tmp_call; diff --git a/arch/mips/kernel/kprobes.c b/arch/mips/kernel/kprobes.c index 6cfae2411c04..d043c2f897fc 100644 --- a/arch/mips/kernel/kprobes.c +++ b/arch/mips/kernel/kprobes.c @@ -86,9 +86,9 @@ int __kprobes arch_prepare_kprobe(struct kprobe *p) goto out; } - if ((probe_kernel_read(&prev_insn, p->addr - 1, - sizeof(mips_instruction)) == 0) && - insn_has_delayslot(prev_insn)) { + if (copy_from_kernel_nofault(&prev_insn, p->addr - 1, + sizeof(mips_instruction)) == 0 && + insn_has_delayslot(prev_insn)) { pr_notice("Kprobes for branch delayslot are not supported\n"); ret = -EINVAL; goto out; diff --git a/arch/nds32/kernel/ftrace.c b/arch/nds32/kernel/ftrace.c index 22ab77ea27ad..3763b3f8c3db 100644 --- a/arch/nds32/kernel/ftrace.c +++ b/arch/nds32/kernel/ftrace.c @@ -131,13 +131,14 @@ static int __ftrace_modify_code(unsigned long pc, unsigned long *old_insn, unsigned long orig_insn[3]; if (validate) { - if (probe_kernel_read(orig_insn, (void *)pc, MCOUNT_INSN_SIZE)) + if (copy_from_kernel_nofault(orig_insn, (void *)pc, + MCOUNT_INSN_SIZE)) return -EFAULT; if (memcmp(orig_insn, old_insn, MCOUNT_INSN_SIZE)) return -EINVAL; } - if (probe_kernel_write((void *)pc, new_insn, MCOUNT_INSN_SIZE)) + if (copy_to_kernel_nofault((void *)pc, new_insn, MCOUNT_INSN_SIZE)) return -EPERM; return 0; diff --git a/arch/parisc/kernel/ftrace.c b/arch/parisc/kernel/ftrace.c index b836fc61a24f..1df0f67ed667 100644 --- a/arch/parisc/kernel/ftrace.c +++ b/arch/parisc/kernel/ftrace.c @@ -172,7 +172,7 @@ int ftrace_make_call(struct dyn_ftrace *rec, unsigned long addr) ip = (void *)(rec->ip + 4 - size); - ret = probe_kernel_read(insn, ip, size); + ret = copy_from_kernel_nofault(insn, ip, size); if (ret) return ret; diff --git a/arch/parisc/kernel/kgdb.c b/arch/parisc/kernel/kgdb.c index 664278db9b97..c4554ac13eac 100644 --- a/arch/parisc/kernel/kgdb.c +++ b/arch/parisc/kernel/kgdb.c @@ -154,8 +154,8 @@ void kgdb_arch_set_pc(struct pt_regs *regs, unsigned long ip) int kgdb_arch_set_breakpoint(struct kgdb_bkpt *bpt) { - int ret = probe_kernel_read(bpt->saved_instr, (char *)bpt->bpt_addr, - BREAK_INSTR_SIZE); + int ret = copy_from_kernel_nofault(bpt->saved_instr, + (char *)bpt->bpt_addr, BREAK_INSTR_SIZE); if (ret) return ret; diff --git a/arch/parisc/lib/memcpy.c b/arch/parisc/lib/memcpy.c index 94a9fe2702c2..4b75388190b4 100644 --- a/arch/parisc/lib/memcpy.c +++ b/arch/parisc/lib/memcpy.c @@ -57,7 +57,7 @@ void * memcpy(void * dst,const void *src, size_t count) EXPORT_SYMBOL(raw_copy_in_user); EXPORT_SYMBOL(memcpy); -bool probe_kernel_read_allowed(const void *unsafe_src, size_t size) +bool copy_from_kernel_nofault_allowed(const void *unsafe_src, size_t size) { if ((unsigned long)unsafe_src < PAGE_SIZE) return false; diff --git a/arch/powerpc/kernel/module_64.c b/arch/powerpc/kernel/module_64.c index f4c2fa190192..ae2b188365b1 100644 --- a/arch/powerpc/kernel/module_64.c +++ b/arch/powerpc/kernel/module_64.c @@ -756,7 +756,8 @@ int module_trampoline_target(struct module *mod, unsigned long addr, stub = (struct ppc64_stub_entry *)addr; - if (probe_kernel_read(&magic, &stub->magic, sizeof(magic))) { + if (copy_from_kernel_nofault(&magic, &stub->magic, + sizeof(magic))) { pr_err("%s: fault reading magic for stub %lx for %s\n", __func__, addr, mod->name); return -EFAULT; } @@ -766,7 +767,8 @@ int module_trampoline_target(struct module *mod, unsigned long addr, return -EFAULT; } - if (probe_kernel_read(&funcdata, &stub->funcdata, sizeof(funcdata))) { + if (copy_from_kernel_nofault(&funcdata, &stub->funcdata, + sizeof(funcdata))) { pr_err("%s: fault reading funcdata for stub %lx for %s\n", __func__, addr, mod->name); return -EFAULT; } diff --git a/arch/powerpc/kernel/trace/ftrace.c b/arch/powerpc/kernel/trace/ftrace.c index 5e399628f51a..c1fede6ec934 100644 --- a/arch/powerpc/kernel/trace/ftrace.c +++ b/arch/powerpc/kernel/trace/ftrace.c @@ -226,7 +226,7 @@ __ftrace_make_nop(struct module *mod, unsigned long ip = rec->ip; unsigned long tramp; - if (probe_kernel_read(&op, (void *)ip, MCOUNT_INSN_SIZE)) + if (copy_from_kernel_nofault(&op, (void *)ip, MCOUNT_INSN_SIZE)) return -EFAULT; /* Make sure that that this is still a 24bit jump */ @@ -249,7 +249,7 @@ __ftrace_make_nop(struct module *mod, pr_devel("ip:%lx jumps to %lx", ip, tramp); /* Find where the trampoline jumps to */ - if (probe_kernel_read(jmp, (void *)tramp, sizeof(jmp))) { + if (copy_from_kernel_nofault(jmp, (void *)tramp, sizeof(jmp))) { pr_err("Failed to read %lx\n", tramp); return -EFAULT; } diff --git a/arch/powerpc/lib/inst.c b/arch/powerpc/lib/inst.c index aedfd6e31e53..6c7a20af9fd6 100644 --- a/arch/powerpc/lib/inst.c +++ b/arch/powerpc/lib/inst.c @@ -33,11 +33,11 @@ int probe_kernel_read_inst(struct ppc_inst *inst, unsigned int val, suffix; int err; - err = probe_kernel_read(&val, src, sizeof(val)); + err = copy_from_kernel_nofault(&val, src, sizeof(val)); if (err) return err; if (get_op(val) == OP_PREFIX) { - err = probe_kernel_read(&suffix, (void *)src + 4, 4); + err = copy_from_kernel_nofault(&suffix, (void *)src + 4, 4); *inst = ppc_inst_prefix(val, suffix); } else { *inst = ppc_inst(val); @@ -64,7 +64,7 @@ int probe_kernel_read_inst(struct ppc_inst *inst, unsigned int val; int err; - err = probe_kernel_read(&val, src, sizeof(val)); + err = copy_from_kernel_nofault(&val, src, sizeof(val)); if (!err) *inst = ppc_inst(val); diff --git a/arch/powerpc/perf/core-book3s.c b/arch/powerpc/perf/core-book3s.c index 13b9dd5e4a76..efe97ff82557 100644 --- a/arch/powerpc/perf/core-book3s.c +++ b/arch/powerpc/perf/core-book3s.c @@ -418,7 +418,8 @@ static __u64 power_pmu_bhrb_to(u64 addr) __u64 target; if (is_kernel_addr(addr)) { - if (probe_kernel_read(&instr, (void *)addr, sizeof(instr))) + if (copy_from_kernel_nofault(&instr, (void *)addr, + sizeof(instr))) return 0; return branch_target((struct ppc_inst *)&instr); diff --git a/arch/riscv/kernel/ftrace.c b/arch/riscv/kernel/ftrace.c index 08396614d6f4..2ff63d0cbb50 100644 --- a/arch/riscv/kernel/ftrace.c +++ b/arch/riscv/kernel/ftrace.c @@ -38,7 +38,8 @@ static int ftrace_check_current_call(unsigned long hook_pos, * Read the text we want to modify; * return must be -EFAULT on read error */ - if (probe_kernel_read(replaced, (void *)hook_pos, MCOUNT_INSN_SIZE)) + if (copy_from_kernel_nofault(replaced, (void *)hook_pos, + MCOUNT_INSN_SIZE)) return -EFAULT; /* diff --git a/arch/riscv/kernel/kgdb.c b/arch/riscv/kernel/kgdb.c index f16ade84a11f..a21fb21883e7 100644 --- a/arch/riscv/kernel/kgdb.c +++ b/arch/riscv/kernel/kgdb.c @@ -153,7 +153,7 @@ int do_single_step(struct pt_regs *regs) stepped_address = addr; /* Replace the op code with the break instruction */ - error = probe_kernel_write((void *)stepped_address, + error = copy_to_kernel_nofault((void *)stepped_address, arch_kgdb_ops.gdb_bpt_instr, BREAK_INSTR_SIZE); /* Flush and return */ @@ -173,7 +173,7 @@ int do_single_step(struct pt_regs *regs) static void undo_single_step(struct pt_regs *regs) { if (stepped_opcode != 0) { - probe_kernel_write((void *)stepped_address, + copy_to_kernel_nofault((void *)stepped_address, (void *)&stepped_opcode, BREAK_INSTR_SIZE); flush_icache_range(stepped_address, stepped_address + BREAK_INSTR_SIZE); diff --git a/arch/riscv/kernel/patch.c b/arch/riscv/kernel/patch.c index d4a64dfed342..3fe7a5296aa5 100644 --- a/arch/riscv/kernel/patch.c +++ b/arch/riscv/kernel/patch.c @@ -63,7 +63,7 @@ static int patch_insn_write(void *addr, const void *insn, size_t len) waddr = patch_map(addr, FIX_TEXT_POKE0); - ret = probe_kernel_write(waddr, insn, len); + ret = copy_to_kernel_nofault(waddr, insn, len); patch_unmap(FIX_TEXT_POKE0); @@ -76,7 +76,7 @@ NOKPROBE_SYMBOL(patch_insn_write); #else static int patch_insn_write(void *addr, const void *insn, size_t len) { - return probe_kernel_write(addr, insn, len); + return copy_to_kernel_nofault(addr, insn, len); } NOKPROBE_SYMBOL(patch_insn_write); #endif /* CONFIG_MMU */ diff --git a/arch/s390/kernel/ftrace.c b/arch/s390/kernel/ftrace.c index 44e01dd1e624..b388e87a08bf 100644 --- a/arch/s390/kernel/ftrace.c +++ b/arch/s390/kernel/ftrace.c @@ -83,7 +83,7 @@ int ftrace_make_nop(struct module *mod, struct dyn_ftrace *rec, { struct ftrace_insn orig, new, old; - if (probe_kernel_read(&old, (void *) rec->ip, sizeof(old))) + if (copy_from_kernel_nofault(&old, (void *) rec->ip, sizeof(old))) return -EFAULT; if (addr == MCOUNT_ADDR) { /* Initial code replacement */ @@ -105,7 +105,7 @@ int ftrace_make_call(struct dyn_ftrace *rec, unsigned long addr) { struct ftrace_insn orig, new, old; - if (probe_kernel_read(&old, (void *) rec->ip, sizeof(old))) + if (copy_from_kernel_nofault(&old, (void *) rec->ip, sizeof(old))) return -EFAULT; /* Replace nop with an ftrace call. */ ftrace_generate_nop_insn(&orig); diff --git a/arch/sh/kernel/ftrace.c b/arch/sh/kernel/ftrace.c index 1b04270e5460..0646c5961846 100644 --- a/arch/sh/kernel/ftrace.c +++ b/arch/sh/kernel/ftrace.c @@ -119,7 +119,7 @@ static void ftrace_mod_code(void) * But if one were to fail, then they all should, and if one were * to succeed, then they all should. */ - mod_code_status = probe_kernel_write(mod_code_ip, mod_code_newcode, + mod_code_status = copy_to_kernel_nofault(mod_code_ip, mod_code_newcode, MCOUNT_INSN_SIZE); /* if we fail, then kill any new writers */ @@ -203,7 +203,7 @@ static int ftrace_modify_code(unsigned long ip, unsigned char *old_code, */ /* read the text we want to modify */ - if (probe_kernel_read(replaced, (void *)ip, MCOUNT_INSN_SIZE)) + if (copy_from_kernel_nofault(replaced, (void *)ip, MCOUNT_INSN_SIZE)) return -EFAULT; /* Make sure it is what we expect it to be */ @@ -268,7 +268,7 @@ static int ftrace_mod(unsigned long ip, unsigned long old_addr, { unsigned char code[MCOUNT_INSN_SIZE]; - if (probe_kernel_read(code, (void *)ip, MCOUNT_INSN_SIZE)) + if (copy_from_kernel_nofault(code, (void *)ip, MCOUNT_INSN_SIZE)) return -EFAULT; if (old_addr != __raw_readl((unsigned long *)code)) diff --git a/arch/um/kernel/maccess.c b/arch/um/kernel/maccess.c index e929c0966696..8ccd56813f68 100644 --- a/arch/um/kernel/maccess.c +++ b/arch/um/kernel/maccess.c @@ -7,7 +7,7 @@ #include #include -bool probe_kernel_read_allowed(const void *src, size_t size) +bool copy_from_kernel_nofault_allowed(const void *src, size_t size) { void *psrc = (void *)rounddown((unsigned long)src, PAGE_SIZE); diff --git a/arch/x86/include/asm/ptrace.h b/arch/x86/include/asm/ptrace.h index ebedeab48704..255b2dde2c1b 100644 --- a/arch/x86/include/asm/ptrace.h +++ b/arch/x86/include/asm/ptrace.h @@ -278,7 +278,7 @@ static inline unsigned long *regs_get_kernel_stack_nth_addr(struct pt_regs *regs } /* To avoid include hell, we can't include uaccess.h */ -extern long probe_kernel_read(void *dst, const void *src, size_t size); +extern long copy_from_kernel_nofault(void *dst, const void *src, size_t size); /** * regs_get_kernel_stack_nth() - get Nth entry of the stack @@ -298,7 +298,7 @@ static inline unsigned long regs_get_kernel_stack_nth(struct pt_regs *regs, addr = regs_get_kernel_stack_nth_addr(regs, n); if (addr) { - ret = probe_kernel_read(&val, addr, sizeof(val)); + ret = copy_from_kernel_nofault(&val, addr, sizeof(val)); if (!ret) return val; } diff --git a/arch/x86/kernel/dumpstack.c b/arch/x86/kernel/dumpstack.c index 456511b2284e..b037cfa7c0c5 100644 --- a/arch/x86/kernel/dumpstack.c +++ b/arch/x86/kernel/dumpstack.c @@ -106,7 +106,7 @@ void show_opcodes(struct pt_regs *regs, const char *loglvl) bad_ip = user_mode(regs) && __chk_range_not_ok(prologue, OPCODE_BUFSIZE, TASK_SIZE_MAX); - if (bad_ip || probe_kernel_read(opcodes, (u8 *)prologue, + if (bad_ip || copy_from_kernel_nofault(opcodes, (u8 *)prologue, OPCODE_BUFSIZE)) { printk("%sCode: Bad RIP value.\n", loglvl); } else { diff --git a/arch/x86/kernel/ftrace.c b/arch/x86/kernel/ftrace.c index c84d28e90a58..51504566b3a6 100644 --- a/arch/x86/kernel/ftrace.c +++ b/arch/x86/kernel/ftrace.c @@ -86,7 +86,7 @@ static int ftrace_verify_code(unsigned long ip, const char *old_code) * sure what we read is what we expected it to be before modifying it. */ /* read the text we want to modify */ - if (probe_kernel_read(cur_code, (void *)ip, MCOUNT_INSN_SIZE)) { + if (copy_from_kernel_nofault(cur_code, (void *)ip, MCOUNT_INSN_SIZE)) { WARN_ON(1); return -EFAULT; } @@ -355,7 +355,7 @@ create_trampoline(struct ftrace_ops *ops, unsigned int *tramp_size) npages = DIV_ROUND_UP(*tramp_size, PAGE_SIZE); /* Copy ftrace_caller onto the trampoline memory */ - ret = probe_kernel_read(trampoline, (void *)start_offset, size); + ret = copy_from_kernel_nofault(trampoline, (void *)start_offset, size); if (WARN_ON(ret < 0)) goto fail; @@ -363,13 +363,13 @@ create_trampoline(struct ftrace_ops *ops, unsigned int *tramp_size) /* The trampoline ends with ret(q) */ retq = (unsigned long)ftrace_stub; - ret = probe_kernel_read(ip, (void *)retq, RET_SIZE); + ret = copy_from_kernel_nofault(ip, (void *)retq, RET_SIZE); if (WARN_ON(ret < 0)) goto fail; if (ops->flags & FTRACE_OPS_FL_SAVE_REGS) { ip = trampoline + (ftrace_regs_caller_ret - ftrace_regs_caller); - ret = probe_kernel_read(ip, (void *)retq, RET_SIZE); + ret = copy_from_kernel_nofault(ip, (void *)retq, RET_SIZE); if (WARN_ON(ret < 0)) goto fail; } @@ -506,7 +506,7 @@ static void *addr_from_call(void *ptr) union text_poke_insn call; int ret; - ret = probe_kernel_read(&call, ptr, CALL_INSN_SIZE); + ret = copy_from_kernel_nofault(&call, ptr, CALL_INSN_SIZE); if (WARN_ON_ONCE(ret < 0)) return NULL; diff --git a/arch/x86/kernel/kgdb.c b/arch/x86/kernel/kgdb.c index c44fe7d8d9a4..68acd30c6b87 100644 --- a/arch/x86/kernel/kgdb.c +++ b/arch/x86/kernel/kgdb.c @@ -732,11 +732,11 @@ int kgdb_arch_set_breakpoint(struct kgdb_bkpt *bpt) int err; bpt->type = BP_BREAKPOINT; - err = probe_kernel_read(bpt->saved_instr, (char *)bpt->bpt_addr, + err = copy_from_kernel_nofault(bpt->saved_instr, (char *)bpt->bpt_addr, BREAK_INSTR_SIZE); if (err) return err; - err = probe_kernel_write((char *)bpt->bpt_addr, + err = copy_to_kernel_nofault((char *)bpt->bpt_addr, arch_kgdb_ops.gdb_bpt_instr, BREAK_INSTR_SIZE); if (!err) return err; @@ -768,7 +768,7 @@ int kgdb_arch_remove_breakpoint(struct kgdb_bkpt *bpt) return 0; knl_write: - return probe_kernel_write((char *)bpt->bpt_addr, + return copy_to_kernel_nofault((char *)bpt->bpt_addr, (char *)bpt->saved_instr, BREAK_INSTR_SIZE); } diff --git a/arch/x86/kernel/kprobes/core.c b/arch/x86/kernel/kprobes/core.c index 3bafe1bd4dc7..f09985c87d73 100644 --- a/arch/x86/kernel/kprobes/core.c +++ b/arch/x86/kernel/kprobes/core.c @@ -243,7 +243,7 @@ __recover_probed_insn(kprobe_opcode_t *buf, unsigned long addr) * Fortunately, we know that the original code is the ideal 5-byte * long NOP. */ - if (probe_kernel_read(buf, (void *)addr, + if (copy_from_kernel_nofault(buf, (void *)addr, MAX_INSN_SIZE * sizeof(kprobe_opcode_t))) return 0UL; @@ -346,7 +346,8 @@ int __copy_instruction(u8 *dest, u8 *src, u8 *real, struct insn *insn) return 0; /* This can access kernel text if given address is not recovered */ - if (probe_kernel_read(dest, (void *)recovered_insn, MAX_INSN_SIZE)) + if (copy_from_kernel_nofault(dest, (void *)recovered_insn, + MAX_INSN_SIZE)) return 0; kernel_insn_init(insn, dest, MAX_INSN_SIZE); diff --git a/arch/x86/kernel/kprobes/opt.c b/arch/x86/kernel/kprobes/opt.c index 321c19950285..7af4c61dde52 100644 --- a/arch/x86/kernel/kprobes/opt.c +++ b/arch/x86/kernel/kprobes/opt.c @@ -56,7 +56,7 @@ found: * overwritten by jump destination address. In this case, original * bytes must be recovered from op->optinsn.copied_insn buffer. */ - if (probe_kernel_read(buf, (void *)addr, + if (copy_from_kernel_nofault(buf, (void *)addr, MAX_INSN_SIZE * sizeof(kprobe_opcode_t))) return 0UL; diff --git a/arch/x86/kernel/traps.c b/arch/x86/kernel/traps.c index af75109485c2..7003f2e7b163 100644 --- a/arch/x86/kernel/traps.c +++ b/arch/x86/kernel/traps.c @@ -488,7 +488,8 @@ static enum kernel_gp_hint get_kernel_gp_address(struct pt_regs *regs, u8 insn_buf[MAX_INSN_SIZE]; struct insn insn; - if (probe_kernel_read(insn_buf, (void *)regs->ip, MAX_INSN_SIZE)) + if (copy_from_kernel_nofault(insn_buf, (void *)regs->ip, + MAX_INSN_SIZE)) return GP_NO_HINT; kernel_insn_init(&insn, insn_buf, MAX_INSN_SIZE); diff --git a/arch/x86/mm/fault.c b/arch/x86/mm/fault.c index 66be9bd60307..e996aa3833b8 100644 --- a/arch/x86/mm/fault.c +++ b/arch/x86/mm/fault.c @@ -442,7 +442,7 @@ static void show_ldttss(const struct desc_ptr *gdt, const char *name, u16 index) return; } - if (probe_kernel_read(&desc, (void *)(gdt->address + offset), + if (copy_from_kernel_nofault(&desc, (void *)(gdt->address + offset), sizeof(struct ldttss_desc))) { pr_alert("%s: 0x%hx -- GDT entry is not readable\n", name, index); diff --git a/arch/x86/mm/init_32.c b/arch/x86/mm/init_32.c index bda909e3e37e..8b4afad84f4a 100644 --- a/arch/x86/mm/init_32.c +++ b/arch/x86/mm/init_32.c @@ -737,7 +737,7 @@ static void __init test_wp_bit(void) __set_fixmap(FIX_WP_TEST, __pa_symbol(empty_zero_page), PAGE_KERNEL_RO); - if (probe_kernel_write((char *)fix_to_virt(FIX_WP_TEST), &z, 1)) { + if (copy_to_kernel_nofault((char *)fix_to_virt(FIX_WP_TEST), &z, 1)) { clear_fixmap(FIX_WP_TEST); printk(KERN_CONT "Ok.\n"); return; diff --git a/arch/x86/mm/maccess.c b/arch/x86/mm/maccess.c index e1d7d7477c22..92ec176a7293 100644 --- a/arch/x86/mm/maccess.c +++ b/arch/x86/mm/maccess.c @@ -9,7 +9,7 @@ static __always_inline u64 canonical_address(u64 vaddr, u8 vaddr_bits) return ((s64)vaddr << (64 - vaddr_bits)) >> (64 - vaddr_bits); } -bool probe_kernel_read_allowed(const void *unsafe_src, size_t size) +bool copy_from_kernel_nofault_allowed(const void *unsafe_src, size_t size) { unsigned long vaddr = (unsigned long)unsafe_src; @@ -22,7 +22,7 @@ bool probe_kernel_read_allowed(const void *unsafe_src, size_t size) canonical_address(vaddr, boot_cpu_data.x86_virt_bits) == vaddr; } #else -bool probe_kernel_read_allowed(const void *unsafe_src, size_t size) +bool copy_from_kernel_nofault_allowed(const void *unsafe_src, size_t size) { return (unsigned long)unsafe_src >= TASK_SIZE_MAX; } diff --git a/arch/x86/xen/enlighten_pv.c b/arch/x86/xen/enlighten_pv.c index 33b309d65955..acc49fa6a097 100644 --- a/arch/x86/xen/enlighten_pv.c +++ b/arch/x86/xen/enlighten_pv.c @@ -386,7 +386,7 @@ static void set_aliased_prot(void *v, pgprot_t prot) preempt_disable(); - probe_kernel_read(&dummy, v, 1); + copy_from_kernel_nofault(&dummy, v, 1); if (HYPERVISOR_update_va_mapping((unsigned long)v, pte, 0)) BUG(); -- cgit v1.2.3 From c0ee37e85e0e47402b8bbe35b6cec8e06937ca58 Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Wed, 17 Jun 2020 09:37:54 +0200 Subject: maccess: rename probe_user_{read,write} to copy_{from,to}_user_nofault Better describe what these functions do. Suggested-by: Linus Torvalds Signed-off-by: Christoph Hellwig Signed-off-by: Linus Torvalds --- arch/powerpc/kernel/process.c | 3 ++- arch/powerpc/kvm/book3s_64_mmu_radix.c | 4 ++-- arch/powerpc/lib/inst.c | 6 +++--- arch/powerpc/oprofile/backtrace.c | 6 ++++-- arch/powerpc/perf/callchain_32.c | 2 +- arch/powerpc/perf/callchain_64.c | 2 +- arch/powerpc/perf/core-book3s.c | 3 ++- arch/powerpc/sysdev/fsl_pci.c | 4 ++-- 8 files changed, 17 insertions(+), 13 deletions(-) (limited to 'arch') diff --git a/arch/powerpc/kernel/process.c b/arch/powerpc/kernel/process.c index 7bb7faf84490..d4d0d1048500 100644 --- a/arch/powerpc/kernel/process.c +++ b/arch/powerpc/kernel/process.c @@ -1305,7 +1305,8 @@ void show_user_instructions(struct pt_regs *regs) for (i = 0; i < 8 && n; i++, n--, pc += sizeof(int)) { int instr; - if (probe_user_read(&instr, (void __user *)pc, sizeof(instr))) { + if (copy_from_user_nofault(&instr, (void __user *)pc, + sizeof(instr))) { seq_buf_printf(&s, "XXXXXXXX "); continue; } diff --git a/arch/powerpc/kvm/book3s_64_mmu_radix.c b/arch/powerpc/kvm/book3s_64_mmu_radix.c index 3cb0c9843d01..e738ea652192 100644 --- a/arch/powerpc/kvm/book3s_64_mmu_radix.c +++ b/arch/powerpc/kvm/book3s_64_mmu_radix.c @@ -64,9 +64,9 @@ unsigned long __kvmhv_copy_tofrom_guest_radix(int lpid, int pid, isync(); if (is_load) - ret = probe_user_read(to, (const void __user *)from, n); + ret = copy_from_user_nofault(to, (const void __user *)from, n); else - ret = probe_user_write((void __user *)to, from, n); + ret = copy_to_user_nofault((void __user *)to, from, n); /* switch the pid first to avoid running host with unallocated pid */ if (quadrant == 1 && pid != old_pid) diff --git a/arch/powerpc/lib/inst.c b/arch/powerpc/lib/inst.c index 6c7a20af9fd6..9cc17eb62462 100644 --- a/arch/powerpc/lib/inst.c +++ b/arch/powerpc/lib/inst.c @@ -15,11 +15,11 @@ int probe_user_read_inst(struct ppc_inst *inst, unsigned int val, suffix; int err; - err = probe_user_read(&val, nip, sizeof(val)); + err = copy_from_user_nofault(&val, nip, sizeof(val)); if (err) return err; if (get_op(val) == OP_PREFIX) { - err = probe_user_read(&suffix, (void __user *)nip + 4, 4); + err = copy_from_user_nofault(&suffix, (void __user *)nip + 4, 4); *inst = ppc_inst_prefix(val, suffix); } else { *inst = ppc_inst(val); @@ -51,7 +51,7 @@ int probe_user_read_inst(struct ppc_inst *inst, unsigned int val; int err; - err = probe_user_read(&val, nip, sizeof(val)); + err = copy_from_user_nofault(&val, nip, sizeof(val)); if (!err) *inst = ppc_inst(val); diff --git a/arch/powerpc/oprofile/backtrace.c b/arch/powerpc/oprofile/backtrace.c index 6f347fa29f41..9db7ada79d10 100644 --- a/arch/powerpc/oprofile/backtrace.c +++ b/arch/powerpc/oprofile/backtrace.c @@ -33,7 +33,8 @@ static unsigned int user_getsp32(unsigned int sp, int is_first) * which means that we've done all that we can do from * interrupt context. */ - if (probe_user_read(stack_frame, (void __user *)p, sizeof(stack_frame))) + if (copy_from_user_nofault(stack_frame, (void __user *)p, + sizeof(stack_frame))) return 0; if (!is_first) @@ -51,7 +52,8 @@ static unsigned long user_getsp64(unsigned long sp, int is_first) { unsigned long stack_frame[3]; - if (probe_user_read(stack_frame, (void __user *)sp, sizeof(stack_frame))) + if (copy_from_user_nofault(stack_frame, (void __user *)sp, + sizeof(stack_frame))) return 0; if (!is_first) diff --git a/arch/powerpc/perf/callchain_32.c b/arch/powerpc/perf/callchain_32.c index f7d888d39cd3..542e68b8eae0 100644 --- a/arch/powerpc/perf/callchain_32.c +++ b/arch/powerpc/perf/callchain_32.c @@ -44,7 +44,7 @@ static int read_user_stack_32(unsigned int __user *ptr, unsigned int *ret) ((unsigned long)ptr & 3)) return -EFAULT; - rc = probe_user_read(ret, ptr, sizeof(*ret)); + rc = copy_from_user_nofault(ret, ptr, sizeof(*ret)); if (IS_ENABLED(CONFIG_PPC64) && rc) return read_user_stack_slow(ptr, ret, 4); diff --git a/arch/powerpc/perf/callchain_64.c b/arch/powerpc/perf/callchain_64.c index 814d1c2c2b9c..fa2a1b83b9b0 100644 --- a/arch/powerpc/perf/callchain_64.c +++ b/arch/powerpc/perf/callchain_64.c @@ -50,7 +50,7 @@ static int read_user_stack_64(unsigned long __user *ptr, unsigned long *ret) ((unsigned long)ptr & 7)) return -EFAULT; - if (!probe_user_read(ret, ptr, sizeof(*ret))) + if (!copy_from_user_nofault(ret, ptr, sizeof(*ret))) return 0; return read_user_stack_slow(ptr, ret, 8); diff --git a/arch/powerpc/perf/core-book3s.c b/arch/powerpc/perf/core-book3s.c index efe97ff82557..cd6a742ac6ef 100644 --- a/arch/powerpc/perf/core-book3s.c +++ b/arch/powerpc/perf/core-book3s.c @@ -426,7 +426,8 @@ static __u64 power_pmu_bhrb_to(u64 addr) } /* Userspace: need copy instruction here then translate it */ - if (probe_user_read(&instr, (unsigned int __user *)addr, sizeof(instr))) + if (copy_from_user_nofault(&instr, (unsigned int __user *)addr, + sizeof(instr))) return 0; target = branch_target((struct ppc_inst *)&instr); diff --git a/arch/powerpc/sysdev/fsl_pci.c b/arch/powerpc/sysdev/fsl_pci.c index 4a8874bc1057..73fa37ca40ef 100644 --- a/arch/powerpc/sysdev/fsl_pci.c +++ b/arch/powerpc/sysdev/fsl_pci.c @@ -1066,8 +1066,8 @@ int fsl_pci_mcheck_exception(struct pt_regs *regs) if (is_in_pci_mem_space(addr)) { if (user_mode(regs)) - ret = probe_user_read(&inst, (void __user *)regs->nip, - sizeof(inst)); + ret = copy_from_user_nofault(&inst, + (void __user *)regs->nip, sizeof(inst)); else ret = probe_kernel_address((void *)regs->nip, inst); -- cgit v1.2.3 From ac4e106d8934a5894811fc263f4b03fc8ed0fb7a Mon Sep 17 00:00:00 2001 From: Matthew Hagan Date: Sun, 14 Jun 2020 15:19:00 -0700 Subject: ARM: dts: NSP: Correct FA2 mailbox node The FA2 mailbox is specified at 0x18025000 but should actually be 0x18025c00, length 0x400 according to socregs_nsp.h and board_bu.c. Also the interrupt was off by one and should be GIC SPI 151 instead of 150. Fixes: 17d517172300 ("ARM: dts: NSP: Add mailbox (PDC) to NSP") Signed-off-by: Matthew Hagan Signed-off-by: Florian Fainelli --- arch/arm/boot/dts/bcm-nsp.dtsi | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) (limited to 'arch') diff --git a/arch/arm/boot/dts/bcm-nsp.dtsi b/arch/arm/boot/dts/bcm-nsp.dtsi index 920c0f561e5c..3175266ede64 100644 --- a/arch/arm/boot/dts/bcm-nsp.dtsi +++ b/arch/arm/boot/dts/bcm-nsp.dtsi @@ -259,10 +259,10 @@ status = "disabled"; }; - mailbox: mailbox@25000 { + mailbox: mailbox@25c00 { compatible = "brcm,iproc-fa2-mbox"; - reg = <0x25000 0x445>; - interrupts = ; + reg = <0x25c00 0x400>; + interrupts = ; #mbox-cells = <1>; brcm,rx-status-len = <32>; brcm,use-bcm-hdr; -- cgit v1.2.3 From b3583fca5fb654af2cfc1c08259abb9728272538 Mon Sep 17 00:00:00 2001 From: "Dmitry V. Levin" Date: Tue, 2 Jun 2020 21:00:51 +0300 Subject: s390: fix syscall_get_error for compat processes If both the tracer and the tracee are compat processes, and gprs[2] is assigned a value by __poke_user_compat, then the higher 32 bits of gprs[2] are cleared, IS_ERR_VALUE() always returns false, and syscall_get_error() always returns 0. Fix the implementation by sign-extending the value for compat processes the same way as x86 implementation does. The bug was exposed to user space by commit 201766a20e30f ("ptrace: add PTRACE_GET_SYSCALL_INFO request") and detected by strace test suite. This change fixes strace syscall tampering on s390. Link: https://lkml.kernel.org/r/20200602180051.GA2427@altlinux.org Fixes: 753c4dd6a2fa2 ("[S390] ptrace changes") Cc: Elvira Khabirova Cc: stable@vger.kernel.org # v2.6.28+ Signed-off-by: Dmitry V. Levin Signed-off-by: Heiko Carstens Signed-off-by: Vasily Gorbik --- arch/s390/include/asm/syscall.h | 12 +++++++++++- 1 file changed, 11 insertions(+), 1 deletion(-) (limited to 'arch') diff --git a/arch/s390/include/asm/syscall.h b/arch/s390/include/asm/syscall.h index f073292e9fdb..d9d5de0f67ff 100644 --- a/arch/s390/include/asm/syscall.h +++ b/arch/s390/include/asm/syscall.h @@ -33,7 +33,17 @@ static inline void syscall_rollback(struct task_struct *task, static inline long syscall_get_error(struct task_struct *task, struct pt_regs *regs) { - return IS_ERR_VALUE(regs->gprs[2]) ? regs->gprs[2] : 0; + unsigned long error = regs->gprs[2]; +#ifdef CONFIG_COMPAT + if (test_tsk_thread_flag(task, TIF_31BIT)) { + /* + * Sign-extend the value so (int)-EFOO becomes (long)-EFOO + * and will match correctly in comparisons. + */ + error = (long)(int)error; + } +#endif + return IS_ERR_VALUE(error) ? error : 0; } static inline long syscall_get_return_value(struct task_struct *task, -- cgit v1.2.3 From 4fd6b5735c03c0955d93960d31f17d7144f5578f Mon Sep 17 00:00:00 2001 From: Robin Gong Date: Fri, 22 May 2020 18:44:50 +0800 Subject: arm64: dts: imx8mm-evk: correct ldo1/ldo2 voltage range Correct ldo1 voltage range from wrong high group(3.0V~3.3V) to low group (1.6V~1.9V) because the ldo1 should be 1.8V. Actually, two voltage groups have been supported at bd718x7-regulator driver, hence, just corrrect the voltage range to 1.6V~3.3V. For ldo2@0.8V, correct voltage range too. Otherwise, ldo1 would be kept @3.0V and ldo2@0.9V which violate i.mx8mm datasheet as the below warning log in kernel: [ 0.995524] LDO1: Bringing 1800000uV into 3000000-3000000uV [ 0.999196] LDO2: Bringing 800000uV into 900000-900000uV Fixes: 78cc25fa265d ("arm64: dts: imx8mm-evk: Add BD71847 PMIC") Cc: stable@vger.kernel.org Signed-off-by: Robin Gong Reviewed-by: Dong Aisheng Reviewed-by: Fabio Estevam Signed-off-by: Shawn Guo --- arch/arm64/boot/dts/freescale/imx8mm-evk.dts | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'arch') diff --git a/arch/arm64/boot/dts/freescale/imx8mm-evk.dts b/arch/arm64/boot/dts/freescale/imx8mm-evk.dts index e5ec8322796d..0f1d7f8aeac4 100644 --- a/arch/arm64/boot/dts/freescale/imx8mm-evk.dts +++ b/arch/arm64/boot/dts/freescale/imx8mm-evk.dts @@ -208,7 +208,7 @@ ldo1_reg: LDO1 { regulator-name = "LDO1"; - regulator-min-microvolt = <3000000>; + regulator-min-microvolt = <1600000>; regulator-max-microvolt = <3300000>; regulator-boot-on; regulator-always-on; @@ -216,7 +216,7 @@ ldo2_reg: LDO2 { regulator-name = "LDO2"; - regulator-min-microvolt = <900000>; + regulator-min-microvolt = <800000>; regulator-max-microvolt = <900000>; regulator-boot-on; regulator-always-on; -- cgit v1.2.3 From cfb12c8952f617df58d73d24161e539a035d82b0 Mon Sep 17 00:00:00 2001 From: Robin Gong Date: Fri, 22 May 2020 18:44:51 +0800 Subject: arm64: dts: imx8mn-ddr4-evk: correct ldo1/ldo2 voltage range Correct ldo1 voltage range from wrong high group(3.0V~3.3V) to low group (1.6V~1.9V) because the ldo1 should be 1.8V. Actually, two voltage groups have been supported at bd718x7-regulator driver, hence, just corrrect the voltage range to 1.6V~3.3V. For ldo2@0.8V, correct voltage range too. Otherwise, ldo1 would be kept @3.0V and ldo2@0.9V which violate i.mx8mn datasheet as the below warning log in kernel: [ 0.995524] LDO1: Bringing 1800000uV into 3000000-3000000uV [ 0.999196] LDO2: Bringing 800000uV into 900000-900000uV Fixes: 3e44dd09736d ("arm64: dts: imx8mn-ddr4-evk: Add rohm,bd71847 PMIC support") Cc: stable@vger.kernel.org Signed-off-by: Robin Gong Reviewed-by: Dong Aisheng Reviewed-by: Fabio Estevam Signed-off-by: Shawn Guo --- arch/arm64/boot/dts/freescale/imx8mn-ddr4-evk.dts | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'arch') diff --git a/arch/arm64/boot/dts/freescale/imx8mn-ddr4-evk.dts b/arch/arm64/boot/dts/freescale/imx8mn-ddr4-evk.dts index d07e0e6a00cc..a1e5483dbbbe 100644 --- a/arch/arm64/boot/dts/freescale/imx8mn-ddr4-evk.dts +++ b/arch/arm64/boot/dts/freescale/imx8mn-ddr4-evk.dts @@ -113,7 +113,7 @@ ldo1_reg: LDO1 { regulator-name = "LDO1"; - regulator-min-microvolt = <3000000>; + regulator-min-microvolt = <1600000>; regulator-max-microvolt = <3300000>; regulator-boot-on; regulator-always-on; @@ -121,7 +121,7 @@ ldo2_reg: LDO2 { regulator-name = "LDO2"; - regulator-min-microvolt = <900000>; + regulator-min-microvolt = <800000>; regulator-max-microvolt = <900000>; regulator-boot-on; regulator-always-on; -- cgit v1.2.3 From 618e07865b7453d02410c1f3407c2d78a670eabb Mon Sep 17 00:00:00 2001 From: Barry Song Date: Thu, 18 Jun 2020 09:58:28 +1200 Subject: arm64: mm: reserve hugetlb CMA after numa_init hugetlb_cma_reserve() is called at the wrong place. numa_init has not been done yet. so all reserved memory will be located at node0. Fixes: cf11e85fc08c ("mm: hugetlb: optionally allocate gigantic hugepages using cma") Signed-off-by: Barry Song Reviewed-by: Anshuman Khandual Acked-by: Roman Gushchin Cc: Matthias Brugger Cc: Will Deacon Link: https://lore.kernel.org/r/20200617215828.25296-1-song.bao.hua@hisilicon.com Signed-off-by: Will Deacon --- arch/arm64/mm/init.c | 15 ++++++++++----- 1 file changed, 10 insertions(+), 5 deletions(-) (limited to 'arch') diff --git a/arch/arm64/mm/init.c b/arch/arm64/mm/init.c index e631e6425165..1e93cfc7c47a 100644 --- a/arch/arm64/mm/init.c +++ b/arch/arm64/mm/init.c @@ -404,11 +404,6 @@ void __init arm64_memblock_init(void) high_memory = __va(memblock_end_of_DRAM() - 1) + 1; dma_contiguous_reserve(arm64_dma32_phys_limit); - -#ifdef CONFIG_ARM64_4K_PAGES - hugetlb_cma_reserve(PUD_SHIFT - PAGE_SHIFT); -#endif - } void __init bootmem_init(void) @@ -424,6 +419,16 @@ void __init bootmem_init(void) min_low_pfn = min; arm64_numa_init(); + + /* + * must be done after arm64_numa_init() which calls numa_init() to + * initialize node_online_map that gets used in hugetlb_cma_reserve() + * while allocating required CMA size across online nodes. + */ +#ifdef CONFIG_ARM64_4K_PAGES + hugetlb_cma_reserve(PUD_SHIFT - PAGE_SHIFT); +#endif + /* * Sparsemem tries to allocate bootmem in memory_present(), so must be * done after the fixed reservations. -- cgit v1.2.3 From a13b9d0b97211579ea63b96c606de79b963c0f47 Mon Sep 17 00:00:00 2001 From: Kees Cook Date: Mon, 8 Jun 2020 20:15:09 -0700 Subject: x86/cpu: Use pinning mask for CR4 bits needing to be 0 The X86_CR4_FSGSBASE bit of CR4 should not change after boot[1]. Older kernels should enforce this bit to zero, and newer kernels need to enforce it depending on boot-time configuration (e.g. "nofsgsbase"). To support a pinned bit being either 1 or 0, use an explicit mask in combination with the expected pinned bit values. [1] https://lore.kernel.org/lkml/20200527103147.GI325280@hirez.programming.kicks-ass.net Signed-off-by: Kees Cook Signed-off-by: Thomas Gleixner Cc: stable@vger.kernel.org Link: https://lkml.kernel.org/r/202006082013.71E29A42@keescook --- arch/x86/kernel/cpu/common.c | 24 ++++++++++++------------ 1 file changed, 12 insertions(+), 12 deletions(-) (limited to 'arch') diff --git a/arch/x86/kernel/cpu/common.c b/arch/x86/kernel/cpu/common.c index 043d93cdcaad..95c090a45b4b 100644 --- a/arch/x86/kernel/cpu/common.c +++ b/arch/x86/kernel/cpu/common.c @@ -347,6 +347,9 @@ out: cr4_clear_bits(X86_CR4_UMIP); } +/* These bits should not change their value after CPU init is finished. */ +static const unsigned long cr4_pinned_mask = + X86_CR4_SMEP | X86_CR4_SMAP | X86_CR4_UMIP | X86_CR4_FSGSBASE; static DEFINE_STATIC_KEY_FALSE_RO(cr_pinning); static unsigned long cr4_pinned_bits __ro_after_init; @@ -371,20 +374,20 @@ EXPORT_SYMBOL(native_write_cr0); void native_write_cr4(unsigned long val) { - unsigned long bits_missing = 0; + unsigned long bits_changed = 0; set_register: asm volatile("mov %0,%%cr4": "+r" (val), "+m" (cr4_pinned_bits)); if (static_branch_likely(&cr_pinning)) { - if (unlikely((val & cr4_pinned_bits) != cr4_pinned_bits)) { - bits_missing = ~val & cr4_pinned_bits; - val |= bits_missing; + if (unlikely((val & cr4_pinned_mask) != cr4_pinned_bits)) { + bits_changed = (val & cr4_pinned_mask) ^ cr4_pinned_bits; + val = (val & ~cr4_pinned_mask) | cr4_pinned_bits; goto set_register; } - /* Warn after we've set the missing bits. */ - WARN_ONCE(bits_missing, "CR4 bits went missing: %lx!?\n", - bits_missing); + /* Warn after we've corrected the changed bits. */ + WARN_ONCE(bits_changed, "pinned CR4 bits changed: 0x%lx!?\n", + bits_changed); } } #if IS_MODULE(CONFIG_LKDTM) @@ -419,7 +422,7 @@ void cr4_init(void) if (boot_cpu_has(X86_FEATURE_PCID)) cr4 |= X86_CR4_PCIDE; if (static_branch_likely(&cr_pinning)) - cr4 |= cr4_pinned_bits; + cr4 = (cr4 & ~cr4_pinned_mask) | cr4_pinned_bits; __write_cr4(cr4); @@ -434,10 +437,7 @@ void cr4_init(void) */ static void __init setup_cr_pinning(void) { - unsigned long mask; - - mask = (X86_CR4_SMEP | X86_CR4_SMAP | X86_CR4_UMIP); - cr4_pinned_bits = this_cpu_read(cpu_tlbstate.cr4) & mask; + cr4_pinned_bits = this_cpu_read(cpu_tlbstate.cr4) & cr4_pinned_mask; static_key_enable(&cr_pinning.key); } -- cgit v1.2.3 From bf508ec95ca3b902f14bb311a7709e5cb57fbc49 Mon Sep 17 00:00:00 2001 From: "Gustavo A. R. Silva" Date: Wed, 17 Jun 2020 16:34:07 -0500 Subject: arm64: kexec_file: Use struct_size() in kmalloc() Make use of the struct_size() helper instead of an open-coded version in order to avoid any potential type mistakes. This code was detected with the help of Coccinelle and, audited and fixed manually. Signed-off-by: Gustavo A. R. Silva Link: https://lore.kernel.org/r/20200617213407.GA1385@embeddedor Signed-off-by: Will Deacon --- arch/arm64/kernel/machine_kexec_file.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) (limited to 'arch') diff --git a/arch/arm64/kernel/machine_kexec_file.c b/arch/arm64/kernel/machine_kexec_file.c index 522e6f517ec0..361a1143e09e 100644 --- a/arch/arm64/kernel/machine_kexec_file.c +++ b/arch/arm64/kernel/machine_kexec_file.c @@ -219,8 +219,7 @@ static int prepare_elf_headers(void **addr, unsigned long *sz) MEMBLOCK_NONE, &start, &end, NULL) nr_ranges++; - cmem = kmalloc(sizeof(struct crash_mem) + - sizeof(struct crash_mem_range) * nr_ranges, GFP_KERNEL); + cmem = kmalloc(struct_size(cmem, ranges, nr_ranges), GFP_KERNEL); if (!cmem) return -ENOMEM; -- cgit v1.2.3 From 24ebec25fb270100e252b19c288e21bd7d8cc7f7 Mon Sep 17 00:00:00 2001 From: Will Deacon Date: Fri, 29 May 2020 14:12:18 +0100 Subject: arm64: hw_breakpoint: Don't invoke overflow handler on uaccess watchpoints Unprivileged memory accesses generated by the so-called "translated" instructions (e.g. STTR) at EL1 can cause EL0 watchpoints to fire unexpectedly if kernel debugging is enabled. In such cases, the hw_breakpoint logic will invoke the user overflow handler which will typically raise a SIGTRAP back to the current task. This is futile when returning back to the kernel because (a) the signal won't have been delivered and (b) userspace can't handle the thing anyway. Avoid invoking the user overflow handler for watchpoints triggered by kernel uaccess routines, and instead single-step over the faulting instruction as we would if no overflow handler had been installed. (Fixes tag identifies the introduction of unprivileged memory accesses, which exposed this latent bug in the hw_breakpoint code) Cc: Catalin Marinas Cc: James Morse Fixes: 57f4959bad0a ("arm64: kernel: Add support for User Access Override") Reported-by: Luis Machado Signed-off-by: Will Deacon --- arch/arm64/kernel/hw_breakpoint.c | 44 +++++++++++++++++++++++---------------- 1 file changed, 26 insertions(+), 18 deletions(-) (limited to 'arch') diff --git a/arch/arm64/kernel/hw_breakpoint.c b/arch/arm64/kernel/hw_breakpoint.c index 0b727edf4104..af234a1e08b7 100644 --- a/arch/arm64/kernel/hw_breakpoint.c +++ b/arch/arm64/kernel/hw_breakpoint.c @@ -730,6 +730,27 @@ static u64 get_distance_from_watchpoint(unsigned long addr, u64 val, return 0; } +static int watchpoint_report(struct perf_event *wp, unsigned long addr, + struct pt_regs *regs) +{ + int step = is_default_overflow_handler(wp); + struct arch_hw_breakpoint *info = counter_arch_bp(wp); + + info->trigger = addr; + + /* + * If we triggered a user watchpoint from a uaccess routine, then + * handle the stepping ourselves since userspace really can't help + * us with this. + */ + if (!user_mode(regs) && info->ctrl.privilege == AARCH64_BREAKPOINT_EL0) + step = 1; + else + perf_bp_event(wp, regs); + + return step; +} + static int watchpoint_handler(unsigned long addr, unsigned int esr, struct pt_regs *regs) { @@ -739,7 +760,6 @@ static int watchpoint_handler(unsigned long addr, unsigned int esr, u64 val; struct perf_event *wp, **slots; struct debug_info *debug_info; - struct arch_hw_breakpoint *info; struct arch_hw_breakpoint_ctrl ctrl; slots = this_cpu_ptr(wp_on_reg); @@ -777,25 +797,13 @@ static int watchpoint_handler(unsigned long addr, unsigned int esr, if (dist != 0) continue; - info = counter_arch_bp(wp); - info->trigger = addr; - perf_bp_event(wp, regs); - - /* Do we need to handle the stepping? */ - if (is_default_overflow_handler(wp)) - step = 1; + step = watchpoint_report(wp, addr, regs); } - if (min_dist > 0 && min_dist != -1) { - /* No exact match found. */ - wp = slots[closest_match]; - info = counter_arch_bp(wp); - info->trigger = addr; - perf_bp_event(wp, regs); - /* Do we need to handle the stepping? */ - if (is_default_overflow_handler(wp)) - step = 1; - } + /* No exact match found? */ + if (min_dist > 0 && min_dist != -1) + step = watchpoint_report(slots[closest_match], addr, regs); + rcu_read_unlock(); if (!step) -- cgit v1.2.3 From e353b325948d04f1305a7b4ad32eca87ba0823a2 Mon Sep 17 00:00:00 2001 From: Adam Ford Date: Tue, 26 May 2020 12:09:39 -0500 Subject: arm64: dts: imx8mm-beacon: Fix voltages on LDO1 and LDO2 LDO1 and LDO2 settings are wrong and case the voltage to go above the maximum level of 2.15V permitted by the SoC to 3.0V. This patch is based on work done on the i.MX8M Mini-EVK which utilizes the same fix. Fixes: 593816fa2f35 ("arm64: dts: imx: Add Beacon i.MX8m-Mini development kit") Signed-off-by: Adam Ford Reviewed-by: Daniel Baluta Signed-off-by: Shawn Guo --- arch/arm64/boot/dts/freescale/imx8mm-beacon-som.dtsi | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'arch') diff --git a/arch/arm64/boot/dts/freescale/imx8mm-beacon-som.dtsi b/arch/arm64/boot/dts/freescale/imx8mm-beacon-som.dtsi index fb0137a8611c..94911b1707ef 100644 --- a/arch/arm64/boot/dts/freescale/imx8mm-beacon-som.dtsi +++ b/arch/arm64/boot/dts/freescale/imx8mm-beacon-som.dtsi @@ -136,7 +136,7 @@ ldo1_reg: LDO1 { regulator-name = "LDO1"; - regulator-min-microvolt = <3000000>; + regulator-min-microvolt = <1600000>; regulator-max-microvolt = <3300000>; regulator-boot-on; regulator-always-on; @@ -144,7 +144,7 @@ ldo2_reg: LDO2 { regulator-name = "LDO2"; - regulator-min-microvolt = <900000>; + regulator-min-microvolt = <800000>; regulator-max-microvolt = <900000>; regulator-boot-on; regulator-always-on; -- cgit v1.2.3 From 0f1441b44e823a74f3f3780902a113e07c73fbf6 Mon Sep 17 00:00:00 2001 From: Peter Zijlstra Date: Fri, 12 Jun 2020 16:05:26 +0200 Subject: objtool: Fix noinstr vs KCOV Since many compilers cannot disable KCOV with a function attribute, help it to NOP out any __sanitizer_cov_*() calls injected in noinstr code. This turns: 12: e8 00 00 00 00 callq 17 13: R_X86_64_PLT32 __sanitizer_cov_trace_pc-0x4 into: 12: 0f 1f 44 00 00 nopl 0x0(%rax,%rax,1) 13: R_X86_64_NONE __sanitizer_cov_trace_pc-0x4 Just like recordmcount does. Signed-off-by: Peter Zijlstra (Intel) Acked-by: Dmitry Vyukov --- arch/x86/Kconfig | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'arch') diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig index 6a0cc524882d..883da0abf779 100644 --- a/arch/x86/Kconfig +++ b/arch/x86/Kconfig @@ -67,7 +67,7 @@ config X86 select ARCH_HAS_FILTER_PGPROT select ARCH_HAS_FORTIFY_SOURCE select ARCH_HAS_GCOV_PROFILE_ALL - select ARCH_HAS_KCOV if X86_64 + select ARCH_HAS_KCOV if X86_64 && STACK_VALIDATION select ARCH_HAS_MEM_ENCRYPT select ARCH_HAS_MEMBARRIER_SYNC_CORE select ARCH_HAS_NON_OVERLAPPING_ADDRESS_SPACE -- cgit v1.2.3 From 25f12ae45fc1931a1dce3cc59f9989a9d87834b0 Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Wed, 17 Jun 2020 09:37:55 +0200 Subject: maccess: rename probe_kernel_address to get_kernel_nofault Better describe what this helper does, and match the naming of copy_from_kernel_nofault. Also switch the argument order around, so that it acts and looks like get_user(). Signed-off-by: Christoph Hellwig Signed-off-by: Linus Torvalds --- arch/arm/kernel/traps.c | 2 +- arch/arm/mm/alignment.c | 4 ++-- arch/arm64/kernel/traps.c | 2 +- arch/ia64/include/asm/sections.h | 2 +- arch/parisc/kernel/process.c | 2 +- arch/powerpc/include/asm/sections.h | 2 +- arch/powerpc/kernel/kgdb.c | 2 +- arch/powerpc/kernel/kprobes.c | 2 +- arch/powerpc/kernel/process.c | 2 +- arch/powerpc/sysdev/fsl_pci.c | 2 +- arch/riscv/kernel/kgdb.c | 4 ++-- arch/riscv/kernel/traps.c | 4 ++-- arch/s390/mm/fault.c | 2 +- arch/sh/kernel/traps.c | 2 +- arch/x86/kernel/probe_roms.c | 20 ++++++++++---------- arch/x86/kernel/traps.c | 2 +- arch/x86/mm/fault.c | 6 +++--- arch/x86/pci/pcbios.c | 2 +- 18 files changed, 32 insertions(+), 32 deletions(-) (limited to 'arch') diff --git a/arch/arm/kernel/traps.c b/arch/arm/kernel/traps.c index 65a3b1e75480..49ce15c3612d 100644 --- a/arch/arm/kernel/traps.c +++ b/arch/arm/kernel/traps.c @@ -396,7 +396,7 @@ int is_valid_bugaddr(unsigned long pc) u32 insn = __opcode_to_mem_arm(BUG_INSTR_VALUE); #endif - if (probe_kernel_address((unsigned *)pc, bkpt)) + if (get_kernel_nofault(bkpt, (unsigned *)pc)) return 0; return bkpt == insn; diff --git a/arch/arm/mm/alignment.c b/arch/arm/mm/alignment.c index 84718eddae60..81a627e6e1c5 100644 --- a/arch/arm/mm/alignment.c +++ b/arch/arm/mm/alignment.c @@ -774,7 +774,7 @@ static int alignment_get_arm(struct pt_regs *regs, u32 *ip, u32 *inst) if (user_mode(regs)) fault = get_user(instr, ip); else - fault = probe_kernel_address(ip, instr); + fault = get_kernel_nofault(instr, ip); *inst = __mem_to_opcode_arm(instr); @@ -789,7 +789,7 @@ static int alignment_get_thumb(struct pt_regs *regs, u16 *ip, u16 *inst) if (user_mode(regs)) fault = get_user(instr, ip); else - fault = probe_kernel_address(ip, instr); + fault = get_kernel_nofault(instr, ip); *inst = __mem_to_opcode_thumb16(instr); diff --git a/arch/arm64/kernel/traps.c b/arch/arm64/kernel/traps.c index 50cc30acf106..227b2d9bae3d 100644 --- a/arch/arm64/kernel/traps.c +++ b/arch/arm64/kernel/traps.c @@ -376,7 +376,7 @@ static int call_undef_hook(struct pt_regs *regs) if (!user_mode(regs)) { __le32 instr_le; - if (probe_kernel_address((__force __le32 *)pc, instr_le)) + if (get_kernel_nofault(instr_le, (__force __le32 *)pc)) goto exit; instr = le32_to_cpu(instr_le); } else if (compat_thumb_mode(regs)) { diff --git a/arch/ia64/include/asm/sections.h b/arch/ia64/include/asm/sections.h index cea15f2dd38d..ad4fc06e5f4b 100644 --- a/arch/ia64/include/asm/sections.h +++ b/arch/ia64/include/asm/sections.h @@ -35,7 +35,7 @@ static inline void *dereference_function_descriptor(void *ptr) struct fdesc *desc = ptr; void *p; - if (!probe_kernel_address(&desc->ip, p)) + if (!get_kernel_nofault(p, &desc->ip)) ptr = p; return ptr; } diff --git a/arch/parisc/kernel/process.c b/arch/parisc/kernel/process.c index 230a6422b99f..6c435dbccca0 100644 --- a/arch/parisc/kernel/process.c +++ b/arch/parisc/kernel/process.c @@ -293,7 +293,7 @@ void *dereference_function_descriptor(void *ptr) Elf64_Fdesc *desc = ptr; void *p; - if (!probe_kernel_address(&desc->addr, p)) + if (!get_kernel_nofault(p, &desc->addr)) ptr = p; return ptr; } diff --git a/arch/powerpc/include/asm/sections.h b/arch/powerpc/include/asm/sections.h index d19871763ed4..bd311616fca8 100644 --- a/arch/powerpc/include/asm/sections.h +++ b/arch/powerpc/include/asm/sections.h @@ -85,7 +85,7 @@ static inline void *dereference_function_descriptor(void *ptr) struct ppc64_opd_entry *desc = ptr; void *p; - if (!probe_kernel_address(&desc->funcaddr, p)) + if (!get_kernel_nofault(p, &desc->funcaddr)) ptr = p; return ptr; } diff --git a/arch/powerpc/kernel/kgdb.c b/arch/powerpc/kernel/kgdb.c index 652b2852bea3..e14a1862a3ca 100644 --- a/arch/powerpc/kernel/kgdb.c +++ b/arch/powerpc/kernel/kgdb.c @@ -421,7 +421,7 @@ int kgdb_arch_set_breakpoint(struct kgdb_bkpt *bpt) unsigned int instr; struct ppc_inst *addr = (struct ppc_inst *)bpt->bpt_addr; - err = probe_kernel_address(addr, instr); + err = get_kernel_nofault(instr, addr); if (err) return err; diff --git a/arch/powerpc/kernel/kprobes.c b/arch/powerpc/kernel/kprobes.c index 6f96f65ebfe8..9cc792a3a6a9 100644 --- a/arch/powerpc/kernel/kprobes.c +++ b/arch/powerpc/kernel/kprobes.c @@ -289,7 +289,7 @@ int kprobe_handler(struct pt_regs *regs) if (!p) { unsigned int instr; - if (probe_kernel_address(addr, instr)) + if (get_kernel_nofault(instr, addr)) goto no_kprobe; if (instr != BREAKPOINT_INSTRUCTION) { diff --git a/arch/powerpc/kernel/process.c b/arch/powerpc/kernel/process.c index d4d0d1048500..30955a0c32d0 100644 --- a/arch/powerpc/kernel/process.c +++ b/arch/powerpc/kernel/process.c @@ -1271,7 +1271,7 @@ static void show_instructions(struct pt_regs *regs) #endif if (!__kernel_text_address(pc) || - probe_kernel_address((const void *)pc, instr)) { + get_kernel_nofault(instr, (const void *)pc)) { pr_cont("XXXXXXXX "); } else { if (regs->nip == pc) diff --git a/arch/powerpc/sysdev/fsl_pci.c b/arch/powerpc/sysdev/fsl_pci.c index 73fa37ca40ef..040b9d01c079 100644 --- a/arch/powerpc/sysdev/fsl_pci.c +++ b/arch/powerpc/sysdev/fsl_pci.c @@ -1069,7 +1069,7 @@ int fsl_pci_mcheck_exception(struct pt_regs *regs) ret = copy_from_user_nofault(&inst, (void __user *)regs->nip, sizeof(inst)); else - ret = probe_kernel_address((void *)regs->nip, inst); + ret = get_kernel_nofault(inst, (void *)regs->nip); if (!ret && mcheck_handle_load(regs, inst)) { regs->nip += 4; diff --git a/arch/riscv/kernel/kgdb.c b/arch/riscv/kernel/kgdb.c index a21fb21883e7..c3275f42d1ac 100644 --- a/arch/riscv/kernel/kgdb.c +++ b/arch/riscv/kernel/kgdb.c @@ -62,7 +62,7 @@ int get_step_address(struct pt_regs *regs, unsigned long *next_addr) unsigned int rs1_num, rs2_num; int op_code; - if (probe_kernel_address((void *)pc, op_code)) + if (get_kernel_nofault(op_code, (void *)pc)) return -EINVAL; if ((op_code & __INSN_LENGTH_MASK) != __INSN_LENGTH_GE_32) { if (is_c_jalr_insn(op_code) || is_c_jr_insn(op_code)) { @@ -146,7 +146,7 @@ int do_single_step(struct pt_regs *regs) return error; /* Store the op code in the stepped address */ - error = probe_kernel_address((void *)addr, stepped_opcode); + error = get_kernel_nofault(stepped_opcode, (void *)addr); if (error) return error; diff --git a/arch/riscv/kernel/traps.c b/arch/riscv/kernel/traps.c index ecec1778e3a4..7d95cce5e47c 100644 --- a/arch/riscv/kernel/traps.c +++ b/arch/riscv/kernel/traps.c @@ -137,7 +137,7 @@ static inline unsigned long get_break_insn_length(unsigned long pc) { bug_insn_t insn; - if (probe_kernel_address((bug_insn_t *)pc, insn)) + if (get_kernel_nofault(insn, (bug_insn_t *)pc)) return 0; return GET_INSN_LENGTH(insn); @@ -165,7 +165,7 @@ int is_valid_bugaddr(unsigned long pc) if (pc < VMALLOC_START) return 0; - if (probe_kernel_address((bug_insn_t *)pc, insn)) + if (get_kernel_nofault(insn, (bug_insn_t *)pc)) return 0; if ((insn & __INSN_LENGTH_MASK) == __INSN_LENGTH_32) return (insn == __BUG_INSN_32); diff --git a/arch/s390/mm/fault.c b/arch/s390/mm/fault.c index 6a24751557f0..d53c2e2ea1fd 100644 --- a/arch/s390/mm/fault.c +++ b/arch/s390/mm/fault.c @@ -105,7 +105,7 @@ static int bad_address(void *p) { unsigned long dummy; - return probe_kernel_address((unsigned long *)p, dummy); + return get_kernel_nofault(dummy, (unsigned long *)p); } static void dump_pagetable(unsigned long asce, unsigned long address) diff --git a/arch/sh/kernel/traps.c b/arch/sh/kernel/traps.c index a33025451fcd..9c3d32b80038 100644 --- a/arch/sh/kernel/traps.c +++ b/arch/sh/kernel/traps.c @@ -118,7 +118,7 @@ int is_valid_bugaddr(unsigned long addr) if (addr < PAGE_OFFSET) return 0; - if (probe_kernel_address((insn_size_t *)addr, opcode)) + if (get_kernel_nofault(opcode, (insn_size_t *)addr)) return 0; if (opcode == TRAPA_BUG_OPCODE) return 1; diff --git a/arch/x86/kernel/probe_roms.c b/arch/x86/kernel/probe_roms.c index ee0286390a4c..65b0dd2bf25c 100644 --- a/arch/x86/kernel/probe_roms.c +++ b/arch/x86/kernel/probe_roms.c @@ -99,7 +99,7 @@ static bool probe_list(struct pci_dev *pdev, unsigned short vendor, unsigned short device; do { - if (probe_kernel_address(rom_list, device) != 0) + if (get_kernel_nofault(device, rom_list) != 0) device = 0; if (device && match_id(pdev, vendor, device)) @@ -125,13 +125,13 @@ static struct resource *find_oprom(struct pci_dev *pdev) break; rom = isa_bus_to_virt(res->start); - if (probe_kernel_address(rom + 0x18, offset) != 0) + if (get_kernel_nofault(offset, rom + 0x18) != 0) continue; - if (probe_kernel_address(rom + offset + 0x4, vendor) != 0) + if (get_kernel_nofault(vendor, rom + offset + 0x4) != 0) continue; - if (probe_kernel_address(rom + offset + 0x6, device) != 0) + if (get_kernel_nofault(device, rom + offset + 0x6) != 0) continue; if (match_id(pdev, vendor, device)) { @@ -139,8 +139,8 @@ static struct resource *find_oprom(struct pci_dev *pdev) break; } - if (probe_kernel_address(rom + offset + 0x8, list) == 0 && - probe_kernel_address(rom + offset + 0xc, rev) == 0 && + if (get_kernel_nofault(list, rom + offset + 0x8) == 0 && + get_kernel_nofault(rev, rom + offset + 0xc) == 0 && rev >= 3 && list && probe_list(pdev, vendor, rom + offset + list)) { oprom = res; @@ -183,14 +183,14 @@ static int __init romsignature(const unsigned char *rom) const unsigned short * const ptr = (const unsigned short *)rom; unsigned short sig; - return probe_kernel_address(ptr, sig) == 0 && sig == ROMSIGNATURE; + return get_kernel_nofault(sig, ptr) == 0 && sig == ROMSIGNATURE; } static int __init romchecksum(const unsigned char *rom, unsigned long length) { unsigned char sum, c; - for (sum = 0; length && probe_kernel_address(rom++, c) == 0; length--) + for (sum = 0; length && get_kernel_nofault(c, rom++) == 0; length--) sum += c; return !length && !sum; } @@ -211,7 +211,7 @@ void __init probe_roms(void) video_rom_resource.start = start; - if (probe_kernel_address(rom + 2, c) != 0) + if (get_kernel_nofault(c, rom + 2) != 0) continue; /* 0 < length <= 0x7f * 512, historically */ @@ -249,7 +249,7 @@ void __init probe_roms(void) if (!romsignature(rom)) continue; - if (probe_kernel_address(rom + 2, c) != 0) + if (get_kernel_nofault(c, rom + 2) != 0) continue; /* 0 < length <= 0x7f * 512, historically */ diff --git a/arch/x86/kernel/traps.c b/arch/x86/kernel/traps.c index 7003f2e7b163..f9727b96961f 100644 --- a/arch/x86/kernel/traps.c +++ b/arch/x86/kernel/traps.c @@ -91,7 +91,7 @@ int is_valid_bugaddr(unsigned long addr) if (addr < TASK_SIZE_MAX) return 0; - if (probe_kernel_address((unsigned short *)addr, ud)) + if (get_kernel_nofault(ud, (unsigned short *)addr)) return 0; return ud == INSN_UD0 || ud == INSN_UD2; diff --git a/arch/x86/mm/fault.c b/arch/x86/mm/fault.c index e996aa3833b8..1ead568c0101 100644 --- a/arch/x86/mm/fault.c +++ b/arch/x86/mm/fault.c @@ -99,7 +99,7 @@ check_prefetch_opcode(struct pt_regs *regs, unsigned char *instr, return !instr_lo || (instr_lo>>1) == 1; case 0x00: /* Prefetch instruction is 0x0F0D or 0x0F18 */ - if (probe_kernel_address(instr, opcode)) + if (get_kernel_nofault(opcode, instr)) return 0; *prefetch = (instr_lo == 0xF) && @@ -133,7 +133,7 @@ is_prefetch(struct pt_regs *regs, unsigned long error_code, unsigned long addr) while (instr < max_instr) { unsigned char opcode; - if (probe_kernel_address(instr, opcode)) + if (get_kernel_nofault(opcode, instr)) break; instr++; @@ -301,7 +301,7 @@ static int bad_address(void *p) { unsigned long dummy; - return probe_kernel_address((unsigned long *)p, dummy); + return get_kernel_nofault(dummy, (unsigned long *)p); } static void dump_pagetable(unsigned long address) diff --git a/arch/x86/pci/pcbios.c b/arch/x86/pci/pcbios.c index 9c97d814125e..4f15280732ed 100644 --- a/arch/x86/pci/pcbios.c +++ b/arch/x86/pci/pcbios.c @@ -302,7 +302,7 @@ static const struct pci_raw_ops *__init pci_find_bios(void) check <= (union bios32 *) __va(0xffff0); ++check) { long sig; - if (probe_kernel_address(&check->fields.signature, sig)) + if (get_kernel_nofault(sig, &check->fields.signature)) continue; if (check->fields.signature != BIOS32_SIGNATURE) -- cgit v1.2.3 From 0c389d89abc28edf70ae847ee2fa55acb267b826 Mon Sep 17 00:00:00 2001 From: Linus Torvalds Date: Thu, 18 Jun 2020 12:10:37 -0700 Subject: maccess: make get_kernel_nofault() check for minimal type compatibility Now that we've renamed probe_kernel_address() to get_kernel_nofault() and made it look and behave more in line with get_user(), some of the subtle type behavior differences end up being more obvious and possibly dangerous. When you do get_user(val, user_ptr); the type of the access comes from the "user_ptr" part, and the above basically acts as val = *user_ptr; by design (except, of course, for the fact that the actual dereference is done with a user access). Note how in the above case, the type of the end result comes from the pointer argument, and then the value is cast to the type of 'val' as part of the assignment. So the type of the pointer is ultimately the more important type both for the access itself. But 'get_kernel_nofault()' may now _look_ similar, but it behaves very differently. When you do get_kernel_nofault(val, kernel_ptr); it behaves like val = *(typeof(val) *)kernel_ptr; except, of course, for the fact that the actual dereference is done with exception handling so that a faulting access is suppressed and returned as the error code. But note how different the casting behavior of the two superficially similar accesses are: one does the actual access in the size of the type the pointer points to, while the other does the access in the size of the target, and ignores the pointer type entirely. Actually changing get_kernel_nofault() to act like get_user() is almost certainly the right thing to do eventually, but in the meantime this patch adds logit to at least verify that the pointer type is compatible with the type of the result. In many cases, this involves just casting the pointer to 'void *' to make it obvious that the type of the pointer is not the important part. It's not how 'get_user()' acts, but at least the behavioral difference is now obvious and explicit. Cc: Christoph Hellwig Signed-off-by: Linus Torvalds --- arch/arm/kernel/traps.c | 2 +- arch/ia64/include/asm/sections.h | 2 +- arch/parisc/kernel/process.c | 2 +- arch/powerpc/include/asm/sections.h | 2 +- arch/powerpc/kernel/kgdb.c | 2 +- arch/x86/kernel/probe_roms.c | 4 ++-- 6 files changed, 7 insertions(+), 7 deletions(-) (limited to 'arch') diff --git a/arch/arm/kernel/traps.c b/arch/arm/kernel/traps.c index 49ce15c3612d..17d5a785df28 100644 --- a/arch/arm/kernel/traps.c +++ b/arch/arm/kernel/traps.c @@ -396,7 +396,7 @@ int is_valid_bugaddr(unsigned long pc) u32 insn = __opcode_to_mem_arm(BUG_INSTR_VALUE); #endif - if (get_kernel_nofault(bkpt, (unsigned *)pc)) + if (get_kernel_nofault(bkpt, (void *)pc)) return 0; return bkpt == insn; diff --git a/arch/ia64/include/asm/sections.h b/arch/ia64/include/asm/sections.h index ad4fc06e5f4b..3a033d2008b3 100644 --- a/arch/ia64/include/asm/sections.h +++ b/arch/ia64/include/asm/sections.h @@ -35,7 +35,7 @@ static inline void *dereference_function_descriptor(void *ptr) struct fdesc *desc = ptr; void *p; - if (!get_kernel_nofault(p, &desc->ip)) + if (!get_kernel_nofault(p, (void *)&desc->ip)) ptr = p; return ptr; } diff --git a/arch/parisc/kernel/process.c b/arch/parisc/kernel/process.c index 6c435dbccca0..b7abb12edd3a 100644 --- a/arch/parisc/kernel/process.c +++ b/arch/parisc/kernel/process.c @@ -293,7 +293,7 @@ void *dereference_function_descriptor(void *ptr) Elf64_Fdesc *desc = ptr; void *p; - if (!get_kernel_nofault(p, &desc->addr)) + if (!get_kernel_nofault(p, (void *)&desc->addr)) ptr = p; return ptr; } diff --git a/arch/powerpc/include/asm/sections.h b/arch/powerpc/include/asm/sections.h index bd311616fca8..324d7b298ec3 100644 --- a/arch/powerpc/include/asm/sections.h +++ b/arch/powerpc/include/asm/sections.h @@ -85,7 +85,7 @@ static inline void *dereference_function_descriptor(void *ptr) struct ppc64_opd_entry *desc = ptr; void *p; - if (!get_kernel_nofault(p, &desc->funcaddr)) + if (!get_kernel_nofault(p, (void *)&desc->funcaddr)) ptr = p; return ptr; } diff --git a/arch/powerpc/kernel/kgdb.c b/arch/powerpc/kernel/kgdb.c index e14a1862a3ca..409080208a6c 100644 --- a/arch/powerpc/kernel/kgdb.c +++ b/arch/powerpc/kernel/kgdb.c @@ -421,7 +421,7 @@ int kgdb_arch_set_breakpoint(struct kgdb_bkpt *bpt) unsigned int instr; struct ppc_inst *addr = (struct ppc_inst *)bpt->bpt_addr; - err = get_kernel_nofault(instr, addr); + err = get_kernel_nofault(instr, (unsigned *) addr); if (err) return err; diff --git a/arch/x86/kernel/probe_roms.c b/arch/x86/kernel/probe_roms.c index 65b0dd2bf25c..9e1def3744f2 100644 --- a/arch/x86/kernel/probe_roms.c +++ b/arch/x86/kernel/probe_roms.c @@ -94,7 +94,7 @@ static bool match_id(struct pci_dev *pdev, unsigned short vendor, unsigned short } static bool probe_list(struct pci_dev *pdev, unsigned short vendor, - const unsigned char *rom_list) + const void *rom_list) { unsigned short device; @@ -119,7 +119,7 @@ static struct resource *find_oprom(struct pci_dev *pdev) for (i = 0; i < ARRAY_SIZE(adapter_rom_resources); i++) { struct resource *res = &adapter_rom_resources[i]; unsigned short offset, vendor, device, list, rev; - const unsigned char *rom; + const void *rom; if (res->end == 0) break; -- cgit v1.2.3 From e0d17c842c0f824fd4df9f4688709fc6907201e1 Mon Sep 17 00:00:00 2001 From: Yash Shah Date: Tue, 16 Jun 2020 19:33:06 +0530 Subject: RISC-V: Don't allow write+exec only page mapping request in mmap As per the table 4.4 of version "20190608-Priv-MSU-Ratified" of the RISC-V instruction set manual[0], the PTE permission bit combination of "write+exec only" is reserved for future use. Hence, don't allow such mapping request in mmap call. An issue is been reported by David Abdurachmanov, that while running stress-ng with "sysbadaddr" argument, RCU stalls are observed on RISC-V specific kernel. This issue arises when the stress-sysbadaddr request for pages with "write+exec only" permission bits and then passes the address obtain from this mmap call to various system call. For the riscv kernel, the mmap call should fail for this particular combination of permission bits since it's not valid. [0]: http://dabbelt.com/~palmer/keep/riscv-isa-manual/riscv-privileged-20190608-1.pdf Signed-off-by: Yash Shah Reported-by: David Abdurachmanov [Palmer: Refer to the latest ISA specification at the only link I could find, and update the terminology.] Signed-off-by: Palmer Dabbelt --- arch/riscv/kernel/sys_riscv.c | 6 ++++++ 1 file changed, 6 insertions(+) (limited to 'arch') diff --git a/arch/riscv/kernel/sys_riscv.c b/arch/riscv/kernel/sys_riscv.c index f3619f59d85c..12f8a7fce78b 100644 --- a/arch/riscv/kernel/sys_riscv.c +++ b/arch/riscv/kernel/sys_riscv.c @@ -8,6 +8,7 @@ #include #include #include +#include static long riscv_sys_mmap(unsigned long addr, unsigned long len, unsigned long prot, unsigned long flags, @@ -16,6 +17,11 @@ static long riscv_sys_mmap(unsigned long addr, unsigned long len, { if (unlikely(offset & (~PAGE_MASK >> page_shift_offset))) return -EINVAL; + + if ((prot & PROT_WRITE) && (prot & PROT_EXEC)) + if (unlikely(!(prot & PROT_READ))) + return -EINVAL; + return ksys_mmap_pgoff(addr, len, prot, flags, fd, offset >> (PAGE_SHIFT - page_shift_offset)); } -- cgit v1.2.3 From 0e2c09011d4de4161f615ff860a605a9186cf62a Mon Sep 17 00:00:00 2001 From: Atish Patra Date: Wed, 17 Jun 2020 13:37:32 -0700 Subject: RISC-V: Acquire mmap lock before invoking walk_page_range As per walk_page_range documentation, mmap lock should be acquired by the caller before invoking walk_page_range. mmap_assert_locked gets triggered without that. The details can be found here. http://lists.infradead.org/pipermail/linux-riscv/2020-June/010335.html Fixes: 395a21ff859c(riscv: add ARCH_HAS_SET_DIRECT_MAP support) Signed-off-by: Atish Patra Reviewed-by: Michel Lespinasse Reviewed-by: Zong Li Signed-off-by: Palmer Dabbelt --- arch/riscv/mm/pageattr.c | 14 ++++++++++++-- 1 file changed, 12 insertions(+), 2 deletions(-) (limited to 'arch') diff --git a/arch/riscv/mm/pageattr.c b/arch/riscv/mm/pageattr.c index ec2c70f84994..289a9a5ea5b5 100644 --- a/arch/riscv/mm/pageattr.c +++ b/arch/riscv/mm/pageattr.c @@ -151,6 +151,7 @@ int set_memory_nx(unsigned long addr, int numpages) int set_direct_map_invalid_noflush(struct page *page) { + int ret; unsigned long start = (unsigned long)page_address(page); unsigned long end = start + PAGE_SIZE; struct pageattr_masks masks = { @@ -158,11 +159,16 @@ int set_direct_map_invalid_noflush(struct page *page) .clear_mask = __pgprot(_PAGE_PRESENT) }; - return walk_page_range(&init_mm, start, end, &pageattr_ops, &masks); + mmap_read_lock(&init_mm); + ret = walk_page_range(&init_mm, start, end, &pageattr_ops, &masks); + mmap_read_unlock(&init_mm); + + return ret; } int set_direct_map_default_noflush(struct page *page) { + int ret; unsigned long start = (unsigned long)page_address(page); unsigned long end = start + PAGE_SIZE; struct pageattr_masks masks = { @@ -170,7 +176,11 @@ int set_direct_map_default_noflush(struct page *page) .clear_mask = __pgprot(0) }; - return walk_page_range(&init_mm, start, end, &pageattr_ops, &masks); + mmap_read_lock(&init_mm); + ret = walk_page_range(&init_mm, start, end, &pageattr_ops, &masks); + mmap_read_unlock(&init_mm); + + return ret; } void __kernel_map_pages(struct page *page, int numpages, int enable) -- cgit v1.2.3 From f04a5ba1752512a46ffb61b88a8fa7d4ab7e02f3 Mon Sep 17 00:00:00 2001 From: Wolfram Sang Date: Mon, 15 Jun 2020 09:58:12 +0200 Subject: x86/platform/intel-mid: convert to use i2c_new_client_device() Move away from the deprecated API and return the shiny new ERRPTR where useful. Signed-off-by: Wolfram Sang Reviewed-by: Andy Shevchenko Signed-off-by: Wolfram Sang --- arch/x86/platform/intel-mid/sfi.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'arch') diff --git a/arch/x86/platform/intel-mid/sfi.c b/arch/x86/platform/intel-mid/sfi.c index b8f7f193f383..30bd5714a3d4 100644 --- a/arch/x86/platform/intel-mid/sfi.c +++ b/arch/x86/platform/intel-mid/sfi.c @@ -287,8 +287,8 @@ void intel_scu_devices_create(void) adapter = i2c_get_adapter(i2c_bus[i]); if (adapter) { - client = i2c_new_device(adapter, i2c_devs[i]); - if (!client) + client = i2c_new_client_device(adapter, i2c_devs[i]); + if (IS_ERR(client)) pr_err("can't create i2c device %s\n", i2c_devs[i]->type); } else -- cgit v1.2.3 From 49097762fa405cdc16f8f597f6d27c078d4a31e9 Mon Sep 17 00:00:00 2001 From: Vitaly Kuznetsov Date: Fri, 19 Jun 2020 11:40:46 +0200 Subject: Revert "KVM: VMX: Micro-optimize vmexit time when not exposing PMU" Guest crashes are observed on a Cascade Lake system when 'perf top' is launched on the host, e.g. BUG: unable to handle kernel paging request at fffffe0000073038 PGD 7ffa7067 P4D 7ffa7067 PUD 7ffa6067 PMD 7ffa5067 PTE ffffffffff120 Oops: 0000 [#1] SMP PTI CPU: 1 PID: 1 Comm: systemd Not tainted 4.18.0+ #380 ... Call Trace: serial8250_console_write+0xfe/0x1f0 call_console_drivers.constprop.0+0x9d/0x120 console_unlock+0x1ea/0x460 Call traces are different but the crash is imminent. The problem was blindly bisected to the commit 041bc42ce2d0 ("KVM: VMX: Micro-optimize vmexit time when not exposing PMU"). It was also confirmed that the issue goes away if PMU is exposed to the guest. With some instrumentation of the guest we can see what is being switched (when we do atomic_switch_perf_msrs()): vmx_vcpu_run: switching 2 msrs vmx_vcpu_run: switching MSR38f guest: 70000000d host: 70000000f vmx_vcpu_run: switching MSR3f1 guest: 0 host: 2 The current guess is that PEBS (MSR_IA32_PEBS_ENABLE, 0x3f1) is to blame. Regardless of whether PMU is exposed to the guest or not, PEBS needs to be disabled upon switch. This reverts commit 041bc42ce2d0efac3b85bbb81dea8c74b81f4ef9. Reported-by: Maxime Coquelin Signed-off-by: Vitaly Kuznetsov Message-Id: <20200619094046.654019-1-vkuznets@redhat.com> Signed-off-by: Paolo Bonzini --- arch/x86/kvm/vmx/vmx.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) (limited to 'arch') diff --git a/arch/x86/kvm/vmx/vmx.c b/arch/x86/kvm/vmx/vmx.c index 36c771728c8c..b1a23ad986ff 100644 --- a/arch/x86/kvm/vmx/vmx.c +++ b/arch/x86/kvm/vmx/vmx.c @@ -6728,8 +6728,7 @@ reenter_guest: pt_guest_enter(vmx); - if (vcpu_to_pmu(vcpu)->version) - atomic_switch_perf_msrs(vmx); + atomic_switch_perf_msrs(vmx); atomic_switch_umwait_control_msr(vmx); if (enable_preemption_timer) -- cgit v1.2.3 From bb5570ad3b54e7930997aec76ab68256d5236d94 Mon Sep 17 00:00:00 2001 From: Matt Fleming Date: Thu, 18 Jun 2020 11:20:02 +0100 Subject: x86/asm/64: Align start of __clear_user() loop to 16-bytes x86 CPUs can suffer severe performance drops if a tight loop, such as the ones in __clear_user(), straddles a 16-byte instruction fetch window, or worse, a 64-byte cacheline. This issues was discovered in the SUSE kernel with the following commit, 1153933703d9 ("x86/asm/64: Micro-optimize __clear_user() - Use immediate constants") which increased the code object size from 10 bytes to 15 bytes and caused the 8-byte copy loop in __clear_user() to be split across a 64-byte cacheline. Aligning the start of the loop to 16-bytes makes this fit neatly inside a single instruction fetch window again and restores the performance of __clear_user() which is used heavily when reading from /dev/zero. Here are some numbers from running libmicro's read_z* and pread_z* microbenchmarks which read from /dev/zero: Zen 1 (Naples) libmicro-file 5.7.0-rc6 5.7.0-rc6 5.7.0-rc6 revert-1153933703d9+ align16+ Time mean95-pread_z100k 9.9195 ( 0.00%) 5.9856 ( 39.66%) 5.9938 ( 39.58%) Time mean95-pread_z10k 1.1378 ( 0.00%) 0.7450 ( 34.52%) 0.7467 ( 34.38%) Time mean95-pread_z1k 0.2623 ( 0.00%) 0.2251 ( 14.18%) 0.2252 ( 14.15%) Time mean95-pread_zw100k 9.9974 ( 0.00%) 6.0648 ( 39.34%) 6.0756 ( 39.23%) Time mean95-read_z100k 9.8940 ( 0.00%) 5.9885 ( 39.47%) 5.9994 ( 39.36%) Time mean95-read_z10k 1.1394 ( 0.00%) 0.7483 ( 34.33%) 0.7482 ( 34.33%) Note that this doesn't affect Haswell or Broadwell microarchitectures which seem to avoid the alignment issue by executing the loop straight out of the Loop Stream Detector (verified using perf events). Fixes: 1153933703d9 ("x86/asm/64: Micro-optimize __clear_user() - Use immediate constants") Signed-off-by: Matt Fleming Signed-off-by: Borislav Petkov Cc: # v4.19+ Link: https://lkml.kernel.org/r/20200618102002.30034-1-matt@codeblueprint.co.uk --- arch/x86/lib/usercopy_64.c | 1 + 1 file changed, 1 insertion(+) (limited to 'arch') diff --git a/arch/x86/lib/usercopy_64.c b/arch/x86/lib/usercopy_64.c index fff28c6f73a2..b0dfac3d3df7 100644 --- a/arch/x86/lib/usercopy_64.c +++ b/arch/x86/lib/usercopy_64.c @@ -24,6 +24,7 @@ unsigned long __clear_user(void __user *addr, unsigned long size) asm volatile( " testq %[size8],%[size8]\n" " jz 4f\n" + " .align 16\n" "0: movq $0,(%[dst])\n" " addq $8,%[dst]\n" " decl %%ecx ; jnz 0b\n" -- cgit v1.2.3 From c0e1c8c22bebecef40097c80c1c74492ff96d081 Mon Sep 17 00:00:00 2001 From: Christophe Leroy Date: Mon, 15 Jun 2020 12:57:59 +0000 Subject: powerpc/8xx: Provide ptep_get() with 16k pages READ_ONCE() now enforces atomic read, which leads to: CC mm/gup.o In file included from ./include/linux/kernel.h:11:0, from mm/gup.c:2: In function 'gup_hugepte.constprop', inlined from 'gup_huge_pd.isra.79' at mm/gup.c:2465:8: ./include/linux/compiler.h:392:38: error: call to '__compiletime_assert_222' declared with attribute error: Unsupported access size for {READ,WRITE}_ONCE(). _compiletime_assert(condition, msg, __compiletime_assert_, __COUNTER__) ^ ./include/linux/compiler.h:373:4: note: in definition of macro '__compiletime_assert' prefix ## suffix(); \ ^ ./include/linux/compiler.h:392:2: note: in expansion of macro '_compiletime_assert' _compiletime_assert(condition, msg, __compiletime_assert_, __COUNTER__) ^ ./include/linux/compiler.h:405:2: note: in expansion of macro 'compiletime_assert' compiletime_assert(__native_word(t) || sizeof(t) == sizeof(long long), \ ^ ./include/linux/compiler.h:291:2: note: in expansion of macro 'compiletime_assert_rwonce_type' compiletime_assert_rwonce_type(x); \ ^ mm/gup.c:2428:8: note: in expansion of macro 'READ_ONCE' pte = READ_ONCE(*ptep); ^ In function 'gup_get_pte', inlined from 'gup_pte_range' at mm/gup.c:2228:9, inlined from 'gup_pmd_range' at mm/gup.c:2613:15, inlined from 'gup_pud_range' at mm/gup.c:2641:15, inlined from 'gup_p4d_range' at mm/gup.c:2666:15, inlined from 'gup_pgd_range' at mm/gup.c:2694:15, inlined from 'internal_get_user_pages_fast' at mm/gup.c:2795:3: ./include/linux/compiler.h:392:38: error: call to '__compiletime_assert_219' declared with attribute error: Unsupported access size for {READ,WRITE}_ONCE(). _compiletime_assert(condition, msg, __compiletime_assert_, __COUNTER__) ^ ./include/linux/compiler.h:373:4: note: in definition of macro '__compiletime_assert' prefix ## suffix(); \ ^ ./include/linux/compiler.h:392:2: note: in expansion of macro '_compiletime_assert' _compiletime_assert(condition, msg, __compiletime_assert_, __COUNTER__) ^ ./include/linux/compiler.h:405:2: note: in expansion of macro 'compiletime_assert' compiletime_assert(__native_word(t) || sizeof(t) == sizeof(long long), \ ^ ./include/linux/compiler.h:291:2: note: in expansion of macro 'compiletime_assert_rwonce_type' compiletime_assert_rwonce_type(x); \ ^ mm/gup.c:2199:9: note: in expansion of macro 'READ_ONCE' return READ_ONCE(*ptep); ^ make[2]: *** [mm/gup.o] Error 1 Define ptep_get() on 8xx when using 16k pages. Fixes: 9e343b467c70 ("READ_ONCE: Enforce atomicity for {READ,WRITE}_ONCE() memory accesses") Signed-off-by: Christophe Leroy Acked-by: Will Deacon Acked-by: Peter Zijlstra (Intel) Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/341688399c1b102756046d19ea6ce39db1ae4742.1592225558.git.christophe.leroy@csgroup.eu --- arch/powerpc/include/asm/nohash/32/pgtable.h | 10 ++++++++++ 1 file changed, 10 insertions(+) (limited to 'arch') diff --git a/arch/powerpc/include/asm/nohash/32/pgtable.h b/arch/powerpc/include/asm/nohash/32/pgtable.h index 5a590ceaec14..b0afbdd07740 100644 --- a/arch/powerpc/include/asm/nohash/32/pgtable.h +++ b/arch/powerpc/include/asm/nohash/32/pgtable.h @@ -284,6 +284,16 @@ static inline pte_t ptep_get_and_clear(struct mm_struct *mm, unsigned long addr, return __pte(pte_update(mm, addr, ptep, ~0, 0, 0)); } +#if defined(CONFIG_PPC_8xx) && defined(CONFIG_PPC_16K_PAGES) +#define __HAVE_ARCH_PTEP_GET +static inline pte_t ptep_get(pte_t *ptep) +{ + pte_t pte = {READ_ONCE(ptep->pte), 0, 0, 0}; + + return pte; +} +#endif + #define __HAVE_ARCH_PTEP_SET_WRPROTECT static inline void ptep_set_wrprotect(struct mm_struct *mm, unsigned long addr, pte_t *ptep) -- cgit v1.2.3 From 7e4773f73dcfb92e7e33532162f722ec291e75a4 Mon Sep 17 00:00:00 2001 From: Arseny Solokha Date: Sat, 13 Jun 2020 23:28:01 +0700 Subject: powerpc/fsl_booke/32: Fix build with CONFIG_RANDOMIZE_BASE Building the current 5.8 kernel for an e500 machine with CONFIG_RANDOMIZE_BASE=y and CONFIG_BLOCK=n yields the following failure: arch/powerpc/mm/nohash/kaslr_booke.c: In function 'kaslr_early_init': arch/powerpc/mm/nohash/kaslr_booke.c:387:2: error: implicit declaration of function 'flush_icache_range'; did you mean 'flush_tlb_range'? Indeed, including asm/cacheflush.h into kaslr_booke.c fixes the build. Fixes: 2b0e86cc5de6 ("powerpc/fsl_booke/32: implement KASLR infrastructure") Cc: stable@vger.kernel.org # v5.5+ Signed-off-by: Arseny Solokha Reviewed-by: Jason Yan Acked-by: Scott Wood [mpe: Tweak change log to mention CONFIG_BLOCK=n] Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/20200613162801.1946619-1-asolokha@kb.kras.ru --- arch/powerpc/mm/nohash/kaslr_booke.c | 1 + 1 file changed, 1 insertion(+) (limited to 'arch') diff --git a/arch/powerpc/mm/nohash/kaslr_booke.c b/arch/powerpc/mm/nohash/kaslr_booke.c index 4a75f2d9bf0e..bce0e5349978 100644 --- a/arch/powerpc/mm/nohash/kaslr_booke.c +++ b/arch/powerpc/mm/nohash/kaslr_booke.c @@ -14,6 +14,7 @@ #include #include #include +#include #include #include #include -- cgit v1.2.3 From c1ed1754f271f6b7acb1bfdc8cfb62220fbed423 Mon Sep 17 00:00:00 2001 From: "Aneesh Kumar K.V" Date: Thu, 11 Jun 2020 17:31:59 +0530 Subject: powerpc/kvm/book3s64: Fix kernel crash with nested kvm & DEBUG_VIRTUAL With CONFIG_DEBUG_VIRTUAL=y, __pa() checks for addr value and if it's less than PAGE_OFFSET it leads to a BUG(). #define __pa(x) ({ VIRTUAL_BUG_ON((unsigned long)(x) < PAGE_OFFSET); (unsigned long)(x) & 0x0fffffffffffffffUL; }) kernel BUG at arch/powerpc/kvm/book3s_64_mmu_radix.c:43! cpu 0x70: Vector: 700 (Program Check) at [c0000018a2187360] pc: c000000000161b30: __kvmhv_copy_tofrom_guest_radix+0x130/0x1f0 lr: c000000000161d5c: kvmhv_copy_from_guest_radix+0x3c/0x80 ... kvmhv_copy_from_guest_radix+0x3c/0x80 kvmhv_load_from_eaddr+0x48/0xc0 kvmppc_ld+0x98/0x1e0 kvmppc_load_last_inst+0x50/0x90 kvmppc_hv_emulate_mmio+0x288/0x2b0 kvmppc_book3s_radix_page_fault+0xd8/0x2b0 kvmppc_book3s_hv_page_fault+0x37c/0x1050 kvmppc_vcpu_run_hv+0xbb8/0x1080 kvmppc_vcpu_run+0x34/0x50 kvm_arch_vcpu_ioctl_run+0x2fc/0x410 kvm_vcpu_ioctl+0x2b4/0x8f0 ksys_ioctl+0xf4/0x150 sys_ioctl+0x28/0x80 system_call_exception+0x104/0x1d0 system_call_common+0xe8/0x214 kvmhv_copy_tofrom_guest_radix() uses a NULL value for to/from to indicate direction of copy. Avoid calling __pa() if the value is NULL to avoid the BUG(). Signed-off-by: Aneesh Kumar K.V [mpe: Massage change log a bit to mention CONFIG_DEBUG_VIRTUAL] Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/20200611120159.680284-1-aneesh.kumar@linux.ibm.com --- arch/powerpc/kvm/book3s_64_mmu_radix.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) (limited to 'arch') diff --git a/arch/powerpc/kvm/book3s_64_mmu_radix.c b/arch/powerpc/kvm/book3s_64_mmu_radix.c index e738ea652192..6a73714759ba 100644 --- a/arch/powerpc/kvm/book3s_64_mmu_radix.c +++ b/arch/powerpc/kvm/book3s_64_mmu_radix.c @@ -40,7 +40,8 @@ unsigned long __kvmhv_copy_tofrom_guest_radix(int lpid, int pid, /* Can't access quadrants 1 or 2 in non-HV mode, call the HV to do it */ if (kvmhv_on_pseries()) return plpar_hcall_norets(H_COPY_TOFROM_GUEST, lpid, pid, eaddr, - __pa(to), __pa(from), n); + (to != NULL) ? __pa(to): 0, + (from != NULL) ? __pa(from): 0, n); quadrant = 1; if (!pid) -- cgit v1.2.3 From af28dfacbe00d53df5dec2bf50640df33138b1fe Mon Sep 17 00:00:00 2001 From: Igor Mammedov Date: Mon, 22 Jun 2020 12:08:30 -0400 Subject: kvm: lapic: fix broken vcpu hotplug Guest fails to online hotplugged CPU with error smpboot: do_boot_cpu failed(-1) to wakeup CPU#4 It's caused by the fact that kvm_apic_set_state(), which used to call recalculate_apic_map() unconditionally and pulled hotplugged CPU into apic map, is updating map conditionally on state changes. In this case the APIC map is not considered dirty and the is not updated. Fix the issue by forcing unconditional update from kvm_apic_set_state(), like it used to be. Fixes: 4abaffce4d25a ("KVM: LAPIC: Recalculate apic map in batch") Cc: stable@vger.kernel.org Signed-off-by: Igor Mammedov Message-Id: <20200622160830.426022-1-imammedo@redhat.com> Signed-off-by: Paolo Bonzini --- arch/x86/kvm/lapic.c | 1 + 1 file changed, 1 insertion(+) (limited to 'arch') diff --git a/arch/x86/kvm/lapic.c b/arch/x86/kvm/lapic.c index 34a7e0533dad..6dc177da19da 100644 --- a/arch/x86/kvm/lapic.c +++ b/arch/x86/kvm/lapic.c @@ -2567,6 +2567,7 @@ int kvm_apic_set_state(struct kvm_vcpu *vcpu, struct kvm_lapic_state *s) } memcpy(vcpu->arch.apic->regs, s->regs, sizeof(*s)); + apic->vcpu->kvm->arch.apic_map_dirty = true; kvm_recalculate_apic_map(vcpu->kvm); kvm_apic_set_version(vcpu); -- cgit v1.2.3 From 44d527170731c75587e95052f3eea72b8c651daf Mon Sep 17 00:00:00 2001 From: Paolo Bonzini Date: Mon, 22 Jun 2020 16:37:42 +0200 Subject: KVM: LAPIC: ensure APIC map is up to date on concurrent update requests The following race can cause lost map update events: cpu1 cpu2 apic_map_dirty = true ------------------------------------------------------------ kvm_recalculate_apic_map: pass check mutex_lock(&kvm->arch.apic_map_lock); if (!kvm->arch.apic_map_dirty) and in process of updating map ------------------------------------------------------------- other calls to apic_map_dirty = true might be too late for affected cpu ------------------------------------------------------------- apic_map_dirty = false ------------------------------------------------------------- kvm_recalculate_apic_map: bail out on if (!kvm->arch.apic_map_dirty) To fix it, record the beginning of an update of the APIC map in apic_map_dirty. If another APIC map change switches apic_map_dirty back to DIRTY during the update, kvm_recalculate_apic_map should not make it CLEAN, and the other caller will go through the slow path. Reported-by: Igor Mammedov Signed-off-by: Paolo Bonzini --- arch/x86/include/asm/kvm_host.h | 2 +- arch/x86/kvm/lapic.c | 51 +++++++++++++++++++++++++---------------- 2 files changed, 32 insertions(+), 21 deletions(-) (limited to 'arch') diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h index f8998e97457f..f852ee350beb 100644 --- a/arch/x86/include/asm/kvm_host.h +++ b/arch/x86/include/asm/kvm_host.h @@ -943,7 +943,7 @@ struct kvm_arch { atomic_t vapics_in_nmi_mode; struct mutex apic_map_lock; struct kvm_apic_map *apic_map; - bool apic_map_dirty; + atomic_t apic_map_dirty; bool apic_access_page_done; unsigned long apicv_inhibit_reasons; diff --git a/arch/x86/kvm/lapic.c b/arch/x86/kvm/lapic.c index 6dc177da19da..5bf72fc86a8e 100644 --- a/arch/x86/kvm/lapic.c +++ b/arch/x86/kvm/lapic.c @@ -169,6 +169,18 @@ static void kvm_apic_map_free(struct rcu_head *rcu) kvfree(map); } +/* + * CLEAN -> DIRTY and UPDATE_IN_PROGRESS -> DIRTY changes happen without a lock. + * + * DIRTY -> UPDATE_IN_PROGRESS and UPDATE_IN_PROGRESS -> CLEAN happen with + * apic_map_lock_held. + */ +enum { + CLEAN, + UPDATE_IN_PROGRESS, + DIRTY +}; + void kvm_recalculate_apic_map(struct kvm *kvm) { struct kvm_apic_map *new, *old = NULL; @@ -176,17 +188,17 @@ void kvm_recalculate_apic_map(struct kvm *kvm) int i; u32 max_id = 255; /* enough space for any xAPIC ID */ - if (!kvm->arch.apic_map_dirty) { - /* - * Read kvm->arch.apic_map_dirty before - * kvm->arch.apic_map - */ - smp_rmb(); + /* Read kvm->arch.apic_map_dirty before kvm->arch.apic_map. */ + if (atomic_read_acquire(&kvm->arch.apic_map_dirty) == CLEAN) return; - } mutex_lock(&kvm->arch.apic_map_lock); - if (!kvm->arch.apic_map_dirty) { + /* + * Read kvm->arch.apic_map_dirty before kvm->arch.apic_map + * (if clean) or the APIC registers (if dirty). + */ + if (atomic_cmpxchg_acquire(&kvm->arch.apic_map_dirty, + DIRTY, UPDATE_IN_PROGRESS) == CLEAN) { /* Someone else has updated the map. */ mutex_unlock(&kvm->arch.apic_map_lock); return; @@ -256,11 +268,11 @@ out: lockdep_is_held(&kvm->arch.apic_map_lock)); rcu_assign_pointer(kvm->arch.apic_map, new); /* - * Write kvm->arch.apic_map before - * clearing apic->apic_map_dirty + * Write kvm->arch.apic_map before clearing apic->apic_map_dirty. + * If another update has come in, leave it DIRTY. */ - smp_wmb(); - kvm->arch.apic_map_dirty = false; + atomic_cmpxchg_release(&kvm->arch.apic_map_dirty, + UPDATE_IN_PROGRESS, CLEAN); mutex_unlock(&kvm->arch.apic_map_lock); if (old) @@ -282,20 +294,20 @@ static inline void apic_set_spiv(struct kvm_lapic *apic, u32 val) else static_key_slow_inc(&apic_sw_disabled.key); - apic->vcpu->kvm->arch.apic_map_dirty = true; + atomic_set_release(&apic->vcpu->kvm->arch.apic_map_dirty, DIRTY); } } static inline void kvm_apic_set_xapic_id(struct kvm_lapic *apic, u8 id) { kvm_lapic_set_reg(apic, APIC_ID, id << 24); - apic->vcpu->kvm->arch.apic_map_dirty = true; + atomic_set_release(&apic->vcpu->kvm->arch.apic_map_dirty, DIRTY); } static inline void kvm_apic_set_ldr(struct kvm_lapic *apic, u32 id) { kvm_lapic_set_reg(apic, APIC_LDR, id); - apic->vcpu->kvm->arch.apic_map_dirty = true; + atomic_set_release(&apic->vcpu->kvm->arch.apic_map_dirty, DIRTY); } static inline u32 kvm_apic_calc_x2apic_ldr(u32 id) @@ -311,7 +323,7 @@ static inline void kvm_apic_set_x2apic_id(struct kvm_lapic *apic, u32 id) kvm_lapic_set_reg(apic, APIC_ID, id); kvm_lapic_set_reg(apic, APIC_LDR, ldr); - apic->vcpu->kvm->arch.apic_map_dirty = true; + atomic_set_release(&apic->vcpu->kvm->arch.apic_map_dirty, DIRTY); } static inline int apic_lvt_enabled(struct kvm_lapic *apic, int lvt_type) @@ -1976,7 +1988,7 @@ int kvm_lapic_reg_write(struct kvm_lapic *apic, u32 reg, u32 val) case APIC_DFR: if (!apic_x2apic_mode(apic)) { kvm_lapic_set_reg(apic, APIC_DFR, val | 0x0FFFFFFF); - apic->vcpu->kvm->arch.apic_map_dirty = true; + atomic_set_release(&apic->vcpu->kvm->arch.apic_map_dirty, DIRTY); } else ret = 1; break; @@ -2232,7 +2244,7 @@ void kvm_lapic_set_base(struct kvm_vcpu *vcpu, u64 value) static_key_slow_dec_deferred(&apic_hw_disabled); } else { static_key_slow_inc(&apic_hw_disabled.key); - vcpu->kvm->arch.apic_map_dirty = true; + atomic_set_release(&apic->vcpu->kvm->arch.apic_map_dirty, DIRTY); } } @@ -2273,7 +2285,6 @@ void kvm_lapic_reset(struct kvm_vcpu *vcpu, bool init_event) if (!apic) return; - vcpu->kvm->arch.apic_map_dirty = false; /* Stop the timer in case it's a reset to an active apic */ hrtimer_cancel(&apic->lapic_timer.timer); @@ -2567,7 +2578,7 @@ int kvm_apic_set_state(struct kvm_vcpu *vcpu, struct kvm_lapic_state *s) } memcpy(vcpu->arch.apic->regs, s->regs, sizeof(*s)); - apic->vcpu->kvm->arch.apic_map_dirty = true; + atomic_set_release(&apic->vcpu->kvm->arch.apic_map_dirty, DIRTY); kvm_recalculate_apic_map(vcpu->kvm); kvm_apic_set_version(vcpu); -- cgit v1.2.3 From 312d16c7c06174f44f96ef4a61c2936e6e360414 Mon Sep 17 00:00:00 2001 From: Vitaly Kuznetsov Date: Mon, 22 Jun 2020 17:14:35 +0200 Subject: KVM: x86/mmu: Avoid mixing gpa_t with gfn_t in walk_addr_generic() translate_gpa() returns a GPA, assigning it to 'real_gfn' seems obviously wrong. There is no real issue because both 'gpa_t' and 'gfn_t' are u64 and we don't use the value in 'real_gfn' as a GFN, we do real_gfn = gpa_to_gfn(real_gfn); instead. 'If you see a "buffalo" sign on an elephant's cage, do not trust your eyes', but let's fix it for good. No functional change intended. Signed-off-by: Vitaly Kuznetsov Message-Id: <20200622151435.752560-1-vkuznets@redhat.com> Signed-off-by: Paolo Bonzini --- arch/x86/kvm/mmu/paging_tmpl.h | 9 +++------ 1 file changed, 3 insertions(+), 6 deletions(-) (limited to 'arch') diff --git a/arch/x86/kvm/mmu/paging_tmpl.h b/arch/x86/kvm/mmu/paging_tmpl.h index a6d484ea110b..58234bfaca07 100644 --- a/arch/x86/kvm/mmu/paging_tmpl.h +++ b/arch/x86/kvm/mmu/paging_tmpl.h @@ -360,7 +360,6 @@ retry_walk: ++walker->level; do { - gfn_t real_gfn; unsigned long host_addr; pt_access = pte_access; @@ -375,7 +374,7 @@ retry_walk: walker->table_gfn[walker->level - 1] = table_gfn; walker->pte_gpa[walker->level - 1] = pte_gpa; - real_gfn = mmu->translate_gpa(vcpu, gfn_to_gpa(table_gfn), + real_gpa = mmu->translate_gpa(vcpu, gfn_to_gpa(table_gfn), nested_access, &walker->fault); @@ -389,12 +388,10 @@ retry_walk: * information to fix the exit_qualification or exit_info_1 * fields. */ - if (unlikely(real_gfn == UNMAPPED_GVA)) + if (unlikely(real_gpa == UNMAPPED_GVA)) return 0; - real_gfn = gpa_to_gfn(real_gfn); - - host_addr = kvm_vcpu_gfn_to_hva_prot(vcpu, real_gfn, + host_addr = kvm_vcpu_gfn_to_hva_prot(vcpu, gpa_to_gfn(real_gpa), &walker->pte_writable[walker->level - 1]); if (unlikely(kvm_is_error_hva(host_addr))) goto error; -- cgit v1.2.3 From 2dbebf7ae1ed9a420d954305e2c9d5ed39ec57c3 Mon Sep 17 00:00:00 2001 From: Sean Christopherson Date: Mon, 22 Jun 2020 14:58:29 -0700 Subject: KVM: nVMX: Plumb L2 GPA through to PML emulation Explicitly pass the L2 GPA to kvm_arch_write_log_dirty(), which for all intents and purposes is vmx_write_pml_buffer(), instead of having the latter pull the GPA from vmcs.GUEST_PHYSICAL_ADDRESS. If the dirty bit update is the result of KVM emulation (rare for L2), then the GPA in the VMCS may be stale and/or hold a completely unrelated GPA. Fixes: c5f983f6e8455 ("nVMX: Implement emulated Page Modification Logging") Cc: stable@vger.kernel.org Signed-off-by: Sean Christopherson Message-Id: <20200622215832.22090-2-sean.j.christopherson@intel.com> Signed-off-by: Paolo Bonzini --- arch/x86/include/asm/kvm_host.h | 2 +- arch/x86/kvm/mmu.h | 2 +- arch/x86/kvm/mmu/mmu.c | 4 ++-- arch/x86/kvm/mmu/paging_tmpl.h | 7 ++++--- arch/x86/kvm/vmx/vmx.c | 6 +++--- 5 files changed, 11 insertions(+), 10 deletions(-) (limited to 'arch') diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h index f852ee350beb..be5363b21540 100644 --- a/arch/x86/include/asm/kvm_host.h +++ b/arch/x86/include/asm/kvm_host.h @@ -1220,7 +1220,7 @@ struct kvm_x86_ops { void (*enable_log_dirty_pt_masked)(struct kvm *kvm, struct kvm_memory_slot *slot, gfn_t offset, unsigned long mask); - int (*write_log_dirty)(struct kvm_vcpu *vcpu); + int (*write_log_dirty)(struct kvm_vcpu *vcpu, gpa_t l2_gpa); /* pmu operations of sub-arch */ const struct kvm_pmu_ops *pmu_ops; diff --git a/arch/x86/kvm/mmu.h b/arch/x86/kvm/mmu.h index 0ad06bfe2c2c..444bb9c54548 100644 --- a/arch/x86/kvm/mmu.h +++ b/arch/x86/kvm/mmu.h @@ -222,7 +222,7 @@ void kvm_mmu_gfn_disallow_lpage(struct kvm_memory_slot *slot, gfn_t gfn); void kvm_mmu_gfn_allow_lpage(struct kvm_memory_slot *slot, gfn_t gfn); bool kvm_mmu_slot_gfn_write_protect(struct kvm *kvm, struct kvm_memory_slot *slot, u64 gfn); -int kvm_arch_write_log_dirty(struct kvm_vcpu *vcpu); +int kvm_arch_write_log_dirty(struct kvm_vcpu *vcpu, gpa_t l2_gpa); int kvm_mmu_post_init_vm(struct kvm *kvm); void kvm_mmu_pre_destroy_vm(struct kvm *kvm); diff --git a/arch/x86/kvm/mmu/mmu.c b/arch/x86/kvm/mmu/mmu.c index fdd05c233308..76817d13c86e 100644 --- a/arch/x86/kvm/mmu/mmu.c +++ b/arch/x86/kvm/mmu/mmu.c @@ -1745,10 +1745,10 @@ void kvm_arch_mmu_enable_log_dirty_pt_masked(struct kvm *kvm, * Emulate arch specific page modification logging for the * nested hypervisor */ -int kvm_arch_write_log_dirty(struct kvm_vcpu *vcpu) +int kvm_arch_write_log_dirty(struct kvm_vcpu *vcpu, gpa_t l2_gpa) { if (kvm_x86_ops.write_log_dirty) - return kvm_x86_ops.write_log_dirty(vcpu); + return kvm_x86_ops.write_log_dirty(vcpu, l2_gpa); return 0; } diff --git a/arch/x86/kvm/mmu/paging_tmpl.h b/arch/x86/kvm/mmu/paging_tmpl.h index 58234bfaca07..bd70ece1ef8b 100644 --- a/arch/x86/kvm/mmu/paging_tmpl.h +++ b/arch/x86/kvm/mmu/paging_tmpl.h @@ -235,7 +235,7 @@ static inline unsigned FNAME(gpte_access)(u64 gpte) static int FNAME(update_accessed_dirty_bits)(struct kvm_vcpu *vcpu, struct kvm_mmu *mmu, struct guest_walker *walker, - int write_fault) + gpa_t addr, int write_fault) { unsigned level, index; pt_element_t pte, orig_pte; @@ -260,7 +260,7 @@ static int FNAME(update_accessed_dirty_bits)(struct kvm_vcpu *vcpu, !(pte & PT_GUEST_DIRTY_MASK)) { trace_kvm_mmu_set_dirty_bit(table_gfn, index, sizeof(pte)); #if PTTYPE == PTTYPE_EPT - if (kvm_arch_write_log_dirty(vcpu)) + if (kvm_arch_write_log_dirty(vcpu, addr)) return -EINVAL; #endif pte |= PT_GUEST_DIRTY_MASK; @@ -454,7 +454,8 @@ retry_walk: (PT_GUEST_DIRTY_SHIFT - PT_GUEST_ACCESSED_SHIFT); if (unlikely(!accessed_dirty)) { - ret = FNAME(update_accessed_dirty_bits)(vcpu, mmu, walker, write_fault); + ret = FNAME(update_accessed_dirty_bits)(vcpu, mmu, walker, + addr, write_fault); if (unlikely(ret < 0)) goto error; else if (ret) diff --git a/arch/x86/kvm/vmx/vmx.c b/arch/x86/kvm/vmx/vmx.c index b1a23ad986ff..ad0ac8bc85d9 100644 --- a/arch/x86/kvm/vmx/vmx.c +++ b/arch/x86/kvm/vmx/vmx.c @@ -7500,11 +7500,11 @@ static void vmx_flush_log_dirty(struct kvm *kvm) kvm_flush_pml_buffers(kvm); } -static int vmx_write_pml_buffer(struct kvm_vcpu *vcpu) +static int vmx_write_pml_buffer(struct kvm_vcpu *vcpu, gpa_t gpa) { struct vmcs12 *vmcs12; struct vcpu_vmx *vmx = to_vmx(vcpu); - gpa_t gpa, dst; + gpa_t dst; if (is_guest_mode(vcpu)) { WARN_ON_ONCE(vmx->nested.pml_full); @@ -7523,7 +7523,7 @@ static int vmx_write_pml_buffer(struct kvm_vcpu *vcpu) return 1; } - gpa = vmcs_read64(GUEST_PHYSICAL_ADDRESS) & ~0xFFFull; + gpa &= ~0xFFFull; dst = vmcs12->pml_address + sizeof(u64) * vmcs12->guest_pml_index; if (kvm_write_guest_page(vcpu->kvm, gpa_to_gfn(dst), &gpa, -- cgit v1.2.3 From bf09fb6cba4f7099620cc9ed32d94c27c4af992e Mon Sep 17 00:00:00 2001 From: Sean Christopherson Date: Mon, 22 Jun 2020 17:51:35 -0700 Subject: KVM: VMX: Stop context switching MSR_IA32_UMWAIT_CONTROL Remove support for context switching between the guest's and host's desired UMWAIT_CONTROL. Propagating the guest's value to hardware isn't required for correct functionality, e.g. KVM intercepts reads and writes to the MSR, and the latency effects of the settings controlled by the MSR are not architecturally visible. As a general rule, KVM should not allow the guest to control power management settings unless explicitly enabled by userspace, e.g. see KVM_CAP_X86_DISABLE_EXITS. E.g. Intel's SDM explicitly states that C0.2 can improve the performance of SMT siblings. A devious guest could disable C0.2 so as to improve the performance of their workloads at the detriment to workloads running in the host or on other VMs. Wholesale removal of UMWAIT_CONTROL context switching also fixes a race condition where updates from the host may cause KVM to enter the guest with the incorrect value. Because updates are are propagated to all CPUs via IPI (SMP function callback), the value in hardware may be stale with respect to the cached value and KVM could enter the guest with the wrong value in hardware. As above, the guest can't observe the bad value, but it's a weird and confusing wart in the implementation. Removal also fixes the unnecessary usage of VMX's atomic load/store MSR lists. Using the lists is only necessary for MSRs that are required for correct functionality immediately upon VM-Enter/VM-Exit, e.g. EFER on old hardware, or for MSRs that need to-the-uop precision, e.g. perf related MSRs. For UMWAIT_CONTROL, the effects are only visible in the kernel via TPAUSE/delay(), and KVM doesn't do any form of delay in vcpu_vmx_run(). Using the atomic lists is undesirable as they are more expensive than direct RDMSR/WRMSR. Furthermore, even if giving the guest control of the MSR is legitimate, e.g. in pass-through scenarios, it's not clear that the benefits would outweigh the overhead. E.g. saving and restoring an MSR across a VMX roundtrip costs ~250 cycles, and if the guest diverged from the host that cost would be paid on every run of the guest. In other words, if there is a legitimate use case then it should be enabled by a new per-VM capability. Note, KVM still needs to emulate MSR_IA32_UMWAIT_CONTROL so that it can correctly expose other WAITPKG features to the guest, e.g. TPAUSE, UMWAIT and UMONITOR. Fixes: 6e3ba4abcea56 ("KVM: vmx: Emulate MSR IA32_UMWAIT_CONTROL") Cc: stable@vger.kernel.org Cc: Jingqi Liu Cc: Tao Xu Signed-off-by: Sean Christopherson Message-Id: <20200623005135.10414-1-sean.j.christopherson@intel.com> Signed-off-by: Paolo Bonzini --- arch/x86/include/asm/mwait.h | 2 -- arch/x86/kernel/cpu/umwait.c | 6 ------ arch/x86/kvm/vmx/vmx.c | 18 ------------------ 3 files changed, 26 deletions(-) (limited to 'arch') diff --git a/arch/x86/include/asm/mwait.h b/arch/x86/include/asm/mwait.h index 73d997aa2966..e039a933aca3 100644 --- a/arch/x86/include/asm/mwait.h +++ b/arch/x86/include/asm/mwait.h @@ -25,8 +25,6 @@ #define TPAUSE_C01_STATE 1 #define TPAUSE_C02_STATE 0 -u32 get_umwait_control_msr(void); - static inline void __monitor(const void *eax, unsigned long ecx, unsigned long edx) { diff --git a/arch/x86/kernel/cpu/umwait.c b/arch/x86/kernel/cpu/umwait.c index 300e3fd5ade3..ec8064c0ae03 100644 --- a/arch/x86/kernel/cpu/umwait.c +++ b/arch/x86/kernel/cpu/umwait.c @@ -18,12 +18,6 @@ */ static u32 umwait_control_cached = UMWAIT_CTRL_VAL(100000, UMWAIT_C02_ENABLE); -u32 get_umwait_control_msr(void) -{ - return umwait_control_cached; -} -EXPORT_SYMBOL_GPL(get_umwait_control_msr); - /* * Cache the original IA32_UMWAIT_CONTROL MSR value which is configured by * hardware or BIOS before kernel boot. diff --git a/arch/x86/kvm/vmx/vmx.c b/arch/x86/kvm/vmx/vmx.c index ad0ac8bc85d9..cb22f33bf1d8 100644 --- a/arch/x86/kvm/vmx/vmx.c +++ b/arch/x86/kvm/vmx/vmx.c @@ -6606,23 +6606,6 @@ static void atomic_switch_perf_msrs(struct vcpu_vmx *vmx) msrs[i].host, false); } -static void atomic_switch_umwait_control_msr(struct vcpu_vmx *vmx) -{ - u32 host_umwait_control; - - if (!vmx_has_waitpkg(vmx)) - return; - - host_umwait_control = get_umwait_control_msr(); - - if (vmx->msr_ia32_umwait_control != host_umwait_control) - add_atomic_switch_msr(vmx, MSR_IA32_UMWAIT_CONTROL, - vmx->msr_ia32_umwait_control, - host_umwait_control, false); - else - clear_atomic_switch_msr(vmx, MSR_IA32_UMWAIT_CONTROL); -} - static void vmx_update_hv_timer(struct kvm_vcpu *vcpu) { struct vcpu_vmx *vmx = to_vmx(vcpu); @@ -6729,7 +6712,6 @@ reenter_guest: pt_guest_enter(vmx); atomic_switch_perf_msrs(vmx); - atomic_switch_umwait_control_msr(vmx); if (enable_preemption_timer) vmx_update_hv_timer(vcpu); -- cgit v1.2.3 From 04a2c05179b732a4c097f0a9c701ef4c9a37e1e3 Mon Sep 17 00:00:00 2001 From: Frieder Schrempf Date: Thu, 28 May 2020 14:43:42 +0000 Subject: ARM: dts: imx6ul-kontron: Move watchdog from Kontron i.MX6UL/ULL board to SoM The watchdog's WDOG_ANY signal is used to trigger a POR of the SoC, if a soft reset is issued. As the SoM hardware connects the WDOG_ANY and the POR signals, the watchdog node itself and the pin configuration should be part of the common SoM devicetree. Let's move it from the baseboard's devicetree to its proper place. Fixes: 1ea4b76cdfde ("ARM: dts: imx6ul-kontron-n6310: Add Kontron i.MX6UL N6310 SoM and boards") Cc: stable@vger.kernel.org Signed-off-by: Frieder Schrempf Signed-off-by: Shawn Guo --- arch/arm/boot/dts/imx6ul-kontron-n6x1x-s.dtsi | 13 ------------- arch/arm/boot/dts/imx6ul-kontron-n6x1x-som-common.dtsi | 13 +++++++++++++ 2 files changed, 13 insertions(+), 13 deletions(-) (limited to 'arch') diff --git a/arch/arm/boot/dts/imx6ul-kontron-n6x1x-s.dtsi b/arch/arm/boot/dts/imx6ul-kontron-n6x1x-s.dtsi index f05e91841202..53a25fba34f6 100644 --- a/arch/arm/boot/dts/imx6ul-kontron-n6x1x-s.dtsi +++ b/arch/arm/boot/dts/imx6ul-kontron-n6x1x-s.dtsi @@ -232,13 +232,6 @@ status = "okay"; }; -&wdog1 { - pinctrl-names = "default"; - pinctrl-0 = <&pinctrl_wdog>; - fsl,ext-reset-output; - status = "okay"; -}; - &iomuxc { pinctrl-0 = <&pinctrl_reset_out &pinctrl_gpio>; @@ -409,10 +402,4 @@ MX6UL_PAD_NAND_DATA03__USDHC2_DATA3 0x170f9 >; }; - - pinctrl_wdog: wdoggrp { - fsl,pins = < - MX6UL_PAD_GPIO1_IO09__WDOG1_WDOG_ANY 0x30b0 - >; - }; }; diff --git a/arch/arm/boot/dts/imx6ul-kontron-n6x1x-som-common.dtsi b/arch/arm/boot/dts/imx6ul-kontron-n6x1x-som-common.dtsi index a17af4d9bfdf..fc316408721d 100644 --- a/arch/arm/boot/dts/imx6ul-kontron-n6x1x-som-common.dtsi +++ b/arch/arm/boot/dts/imx6ul-kontron-n6x1x-som-common.dtsi @@ -57,6 +57,13 @@ status = "okay"; }; +&wdog1 { + pinctrl-names = "default"; + pinctrl-0 = <&pinctrl_wdog>; + fsl,ext-reset-output; + status = "okay"; +}; + &iomuxc { pinctrl-names = "default"; pinctrl-0 = <&pinctrl_reset_out>; @@ -106,4 +113,10 @@ MX6UL_PAD_SNVS_TAMPER9__GPIO5_IO09 0x1b0b0 >; }; + + pinctrl_wdog: wdoggrp { + fsl,pins = < + MX6UL_PAD_GPIO1_IO09__WDOG1_WDOG_ANY 0x30b0 + >; + }; }; -- cgit v1.2.3 From d22a16cc92e04d053fd807ef3587e4f135e4206f Mon Sep 17 00:00:00 2001 From: Frieder Schrempf Date: Thu, 28 May 2020 14:43:43 +0000 Subject: ARM: dts: imx6ul-kontron: Change WDOG_ANY signal from push-pull to open-drain The WDOG_ANY signal is connected to the RESET_IN signal of the SoM and baseboard. It is currently configured as push-pull, which means that if some external device like a programmer wants to assert the RESET_IN signal by pulling it to ground, it drives against the high level WDOG_ANY output of the SoC. To fix this we set the WDOG_ANY signal to open-drain configuration. That way we make sure that the RESET_IN can be asserted by the watchdog as well as by external devices. Fixes: 1ea4b76cdfde ("ARM: dts: imx6ul-kontron-n6310: Add Kontron i.MX6UL N6310 SoM and boards") Cc: stable@vger.kernel.org Signed-off-by: Frieder Schrempf Signed-off-by: Shawn Guo --- arch/arm/boot/dts/imx6ul-kontron-n6x1x-som-common.dtsi | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'arch') diff --git a/arch/arm/boot/dts/imx6ul-kontron-n6x1x-som-common.dtsi b/arch/arm/boot/dts/imx6ul-kontron-n6x1x-som-common.dtsi index fc316408721d..61ba21a605a8 100644 --- a/arch/arm/boot/dts/imx6ul-kontron-n6x1x-som-common.dtsi +++ b/arch/arm/boot/dts/imx6ul-kontron-n6x1x-som-common.dtsi @@ -116,7 +116,7 @@ pinctrl_wdog: wdoggrp { fsl,pins = < - MX6UL_PAD_GPIO1_IO09__WDOG1_WDOG_ANY 0x30b0 + MX6UL_PAD_GPIO1_IO09__WDOG1_WDOG_ANY 0x18b0 >; }; }; -- cgit v1.2.3 From bf10bd0be53282183f374af23577b18b5fbf7801 Mon Sep 17 00:00:00 2001 From: Xiaoyao Li Date: Tue, 16 Jun 2020 15:33:07 +0800 Subject: KVM: X86: Fix MSR range of APIC registers in X2APIC mode Only MSR address range 0x800 through 0x8ff is architecturally reserved and dedicated for accessing APIC registers in x2APIC mode. Fixes: 0105d1a52640 ("KVM: x2apic interface to lapic") Signed-off-by: Xiaoyao Li Message-Id: <20200616073307.16440-1-xiaoyao.li@intel.com> Cc: stable@vger.kernel.org Reviewed-by: Sean Christopherson Reviewed-by: Jim Mattson Signed-off-by: Paolo Bonzini --- arch/x86/kvm/x86.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'arch') diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c index 00c88c2f34e4..29d9b078ce69 100644 --- a/arch/x86/kvm/x86.c +++ b/arch/x86/kvm/x86.c @@ -2856,7 +2856,7 @@ int kvm_set_msr_common(struct kvm_vcpu *vcpu, struct msr_data *msr_info) return kvm_mtrr_set_msr(vcpu, msr, data); case MSR_IA32_APICBASE: return kvm_set_apic_base(vcpu, msr_info); - case APIC_BASE_MSR ... APIC_BASE_MSR + 0x3ff: + case APIC_BASE_MSR ... APIC_BASE_MSR + 0xff: return kvm_x2apic_msr_write(vcpu, msr, data); case MSR_IA32_TSCDEADLINE: kvm_set_lapic_tscdeadline_msr(vcpu, data); @@ -3196,7 +3196,7 @@ int kvm_get_msr_common(struct kvm_vcpu *vcpu, struct msr_data *msr_info) case MSR_IA32_APICBASE: msr_info->data = kvm_get_apic_base(vcpu); break; - case APIC_BASE_MSR ... APIC_BASE_MSR + 0x3ff: + case APIC_BASE_MSR ... APIC_BASE_MSR + 0xff: return kvm_x2apic_msr_read(vcpu, msr_info->index, &msr_info->data); case MSR_IA32_TSCDEADLINE: msr_info->data = kvm_get_lapic_tscdeadline_msr(vcpu); -- cgit v1.2.3 From 26769f96e6231095f6b1cc3090c903280d44bb57 Mon Sep 17 00:00:00 2001 From: Marcelo Tosatti Date: Tue, 16 Jun 2020 08:47:41 -0300 Subject: KVM: x86: allow TSC to differ by NTP correction bounds without TSC scaling The Linux TSC calibration procedure is subject to small variations (its common to see +-1 kHz difference between reboots on a given CPU, for example). So migrating a guest between two hosts with identical processor can fail, in case of a small variation in calibrated TSC between them. Without TSC scaling, the current kernel interface will either return an error (if user_tsc_khz <= tsc_khz) or enable TSC catchup mode. This change enables the following TSC tolerance check to accept KVM_SET_TSC_KHZ within tsc_tolerance_ppm (which is 250ppm by default). /* * Compute the variation in TSC rate which is acceptable * within the range of tolerance and decide if the * rate being applied is within that bounds of the hardware * rate. If so, no scaling or compensation need be done. */ thresh_lo = adjust_tsc_khz(tsc_khz, -tsc_tolerance_ppm); thresh_hi = adjust_tsc_khz(tsc_khz, tsc_tolerance_ppm); if (user_tsc_khz < thresh_lo || user_tsc_khz > thresh_hi) { pr_debug("kvm: requested TSC rate %u falls outside tolerance [%u,%u]\n", user_tsc_khz, thresh_lo, thresh_hi); use_scaling = 1; } NTP daemon in the guest can correct this difference (NTP can correct upto 500ppm). Signed-off-by: Marcelo Tosatti Message-Id: <20200616114741.GA298183@fuller.cnet> Signed-off-by: Paolo Bonzini --- arch/x86/kvm/x86.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) (limited to 'arch') diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c index 29d9b078ce69..3b92db412335 100644 --- a/arch/x86/kvm/x86.c +++ b/arch/x86/kvm/x86.c @@ -4603,7 +4603,8 @@ long kvm_arch_vcpu_ioctl(struct file *filp, r = -EINVAL; user_tsc_khz = (u32)arg; - if (user_tsc_khz >= kvm_max_guest_tsc_khz) + if (kvm_has_tsc_control && + user_tsc_khz >= kvm_max_guest_tsc_khz) goto out; if (user_tsc_khz == 0) -- cgit v1.2.3 From e4553b4976d1178c13da295cb5c7b21f55baf8f9 Mon Sep 17 00:00:00 2001 From: Sean Christopherson Date: Tue, 16 Jun 2020 20:41:23 -0700 Subject: KVM: VMX: Remove vcpu_vmx's defunct copy of host_pkru Remove vcpu_vmx.host_pkru, which got left behind when PKRU support was moved to common x86 code. No functional change intended. Fixes: 37486135d3a7b ("KVM: x86: Fix pkru save/restore when guest CR4.PKE=0, move it to x86.c") Signed-off-by: Sean Christopherson Message-Id: <20200617034123.25647-1-sean.j.christopherson@intel.com> Reviewed-by: Vitaly Kuznetsov Reviewed-by: Jim Mattson Signed-off-by: Paolo Bonzini --- arch/x86/kvm/vmx/vmx.h | 2 -- 1 file changed, 2 deletions(-) (limited to 'arch') diff --git a/arch/x86/kvm/vmx/vmx.h b/arch/x86/kvm/vmx/vmx.h index 8a83b5edc820..639798e4a6ca 100644 --- a/arch/x86/kvm/vmx/vmx.h +++ b/arch/x86/kvm/vmx/vmx.h @@ -288,8 +288,6 @@ struct vcpu_vmx { u64 current_tsc_ratio; - u32 host_pkru; - unsigned long host_debugctlmsr; /* -- cgit v1.2.3 From e64a1618af8566d20991607913a4d90d39b30118 Mon Sep 17 00:00:00 2001 From: Sven Schnelle Date: Wed, 17 Jun 2020 17:30:28 +0200 Subject: s390: fix system call single stepping When single stepping an svc instruction on s390, the kernel is entered with a PER program check interruption. The program check handler than jumps to the system call handler by reloading the PSW. The code didn't set GPR13 to the thread pointer in struct task_struct. This made the kernel access invalid memory while trying to fetch the syscall function address. Fix this by always assigned GPR13 after .Lsysc_per. Fixes: 0b0ed657fe00 ("s390: remove critical section cleanup from entry.S") Reported-and-tested-by: Christian Borntraeger Signed-off-by: Sven Schnelle Reviewed-by: Heiko Carstens Signed-off-by: Heiko Carstens --- arch/s390/kernel/entry.S | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'arch') diff --git a/arch/s390/kernel/entry.S b/arch/s390/kernel/entry.S index 496f74d98473..969b35b177dd 100644 --- a/arch/s390/kernel/entry.S +++ b/arch/s390/kernel/entry.S @@ -378,9 +378,9 @@ ENTRY(system_call) stmg %r8,%r15,__LC_SAVE_AREA_SYNC BPOFF lg %r12,__LC_CURRENT - lghi %r13,__TASK_thread lghi %r14,_PIF_SYSCALL .Lsysc_per: + lghi %r13,__TASK_thread lg %r15,__LC_KERNEL_STACK la %r11,STACK_FRAME_OVERHEAD(%r15) # pointer to pt_regs UPDATE_VTIME %r8,%r9,__LC_SYNC_ENTER_TIMER -- cgit v1.2.3 From 998f5bbe3dbdab81c1cfb1aef7c3892f5d24f6c7 Mon Sep 17 00:00:00 2001 From: Vasily Gorbik Date: Wed, 17 Jun 2020 15:05:49 +0200 Subject: s390/kasan: fix early pgm check handler execution Currently if early_pgm_check_handler is called it ends up in pgm check loop. The problem is that early_pgm_check_handler is instrumented by KASAN but executed without DAT flag enabled which leads to addressing exception when KASAN checks try to access shadow memory. Fix that by executing early handlers with DAT flag on under KASAN as expected. Reported-and-tested-by: Alexander Egorenkov Reviewed-by: Heiko Carstens Signed-off-by: Vasily Gorbik Signed-off-by: Heiko Carstens --- arch/s390/kernel/early.c | 2 ++ 1 file changed, 2 insertions(+) (limited to 'arch') diff --git a/arch/s390/kernel/early.c b/arch/s390/kernel/early.c index cd241ee66eff..078277231858 100644 --- a/arch/s390/kernel/early.c +++ b/arch/s390/kernel/early.c @@ -170,6 +170,8 @@ static noinline __init void setup_lowcore_early(void) psw_t psw; psw.mask = PSW_MASK_BASE | PSW_DEFAULT_KEY | PSW_MASK_EA | PSW_MASK_BA; + if (IS_ENABLED(CONFIG_KASAN)) + psw.mask |= PSW_MASK_DAT; psw.addr = (unsigned long) s390_base_ext_handler; S390_lowcore.external_new_psw = psw; psw.addr = (unsigned long) s390_base_pgm_handler; -- cgit v1.2.3 From 827c4913923e0b441ba07ba4cc41e01181102303 Mon Sep 17 00:00:00 2001 From: Christian Borntraeger Date: Tue, 31 Mar 2020 05:57:23 -0400 Subject: s390/debug: avoid kernel warning on too large number of pages When specifying insanely large debug buffers a kernel warning is printed. The debug code does handle the error gracefully, though. Instead of duplicating the check let us silence the warning to avoid crashes when panic_on_warn is used. Signed-off-by: Christian Borntraeger Reviewed-by: Heiko Carstens Signed-off-by: Heiko Carstens --- arch/s390/kernel/debug.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) (limited to 'arch') diff --git a/arch/s390/kernel/debug.c b/arch/s390/kernel/debug.c index 636446003a06..263075a1af36 100644 --- a/arch/s390/kernel/debug.c +++ b/arch/s390/kernel/debug.c @@ -198,9 +198,10 @@ static debug_entry_t ***debug_areas_alloc(int pages_per_area, int nr_areas) if (!areas) goto fail_malloc_areas; for (i = 0; i < nr_areas; i++) { + /* GFP_NOWARN to avoid user triggerable WARN, we handle fails */ areas[i] = kmalloc_array(pages_per_area, sizeof(debug_entry_t *), - GFP_KERNEL); + GFP_KERNEL | __GFP_NOWARN); if (!areas[i]) goto fail_malloc_areas2; for (j = 0; j < pages_per_area; j++) { -- cgit v1.2.3 From 87676cfca14171fc4c99d96ae2f3e87780488ac4 Mon Sep 17 00:00:00 2001 From: Will Deacon Date: Mon, 22 Jun 2020 20:24:22 +0100 Subject: arm64: vdso: Disable dwarf unwinding through the sigreturn trampoline Commit 7e9f5e6629f6 ("arm64: vdso: Add --eh-frame-hdr to ldflags") results in a .eh_frame_hdr section for the vDSO, which in turn causes the libgcc unwinder to unwind out of signal handlers using the .eh_frame information populated by our .cfi directives. In conjunction with a4eb355a3fda ("arm64: vdso: Fix CFI directives in sigreturn trampoline"), this has been shown to cause segmentation faults originating from within the unwinder during thread cancellation: | Thread 14 "virtio-net-rx" received signal SIGSEGV, Segmentation fault. | 0x0000000000435e24 in uw_frame_state_for () | (gdb) bt | #0 0x0000000000435e24 in uw_frame_state_for () | #1 0x0000000000436e88 in _Unwind_ForcedUnwind_Phase2 () | #2 0x00000000004374d8 in _Unwind_ForcedUnwind () | #3 0x0000000000428400 in __pthread_unwind (buf=) at unwind.c:121 | #4 0x0000000000429808 in __do_cancel () at ./pthreadP.h:304 | #5 sigcancel_handler (sig=32, si=0xffff33c743f0, ctx=) at nptl-init.c:200 | #6 sigcancel_handler (sig=, si=0xffff33c743f0, ctx=) at nptl-init.c:165 | #7 | #8 futex_wait_cancelable (private=0, expected=0, futex_word=0x3890b708) at ../sysdeps/unix/sysv/linux/futex-internal.h:88 After considerable bashing of heads, it appears that our CFI directives for unwinding out of the sigreturn trampoline are only processed by libgcc when both a .eh_frame_hdr section is present *and* the mysterious NOP is covered by an entry in .eh_frame. With both of these now in place, it has highlighted that our CFI directives are not comprehensive enough to restore the stack pointer of the interrupted context. This results in libgcc falling back to an arm64-specific unwinder after computing a bogus PC value from the unwind tables. The unwinder promptly dereferences this bogus address in an attempt to see if the pointed-to instruction sequence looks like the sigreturn trampoline. Restore the old unwind behaviour, which relied solely on heuristics in the unwinder, by removing the .eh_frame_hdr section from the vDSO and commenting out the insufficient CFI directives for now. Add comments to explain the current, miserable state of affairs. Cc: Tamas Zsoldos Cc: Szabolcs Nagy Cc: Catalin Marinas Cc: Daniel Kiss Acked-by: Dave Martin Reviewed-by: Vincenzo Frascino Reviewed-by: Ard Biesheuvel Reported-by: Ard Biesheuvel Signed-off-by: Will Deacon --- arch/arm64/kernel/vdso/Makefile | 2 +- arch/arm64/kernel/vdso/sigreturn.S | 54 ++++++++++++++++++++++++-------------- 2 files changed, 35 insertions(+), 21 deletions(-) (limited to 'arch') diff --git a/arch/arm64/kernel/vdso/Makefile b/arch/arm64/kernel/vdso/Makefile index 556d424c6f52..1e5a940532da 100644 --- a/arch/arm64/kernel/vdso/Makefile +++ b/arch/arm64/kernel/vdso/Makefile @@ -24,7 +24,7 @@ btildflags-$(CONFIG_ARM64_BTI_KERNEL) += -z force-bti # routines, as x86 does (see 6f121e548f83 ("x86, vdso: Reimplement vdso.so # preparation in build-time C")). ldflags-y := -shared -nostdlib -soname=linux-vdso.so.1 --hash-style=sysv \ - -Bsymbolic --eh-frame-hdr --build-id -n $(btildflags-y) -T + -Bsymbolic --no-eh-frame-hdr --build-id -n $(btildflags-y) -T ccflags-y := -fno-common -fno-builtin -fno-stack-protector -ffixed-x18 ccflags-y += -DDISABLE_BRANCH_PROFILING diff --git a/arch/arm64/kernel/vdso/sigreturn.S b/arch/arm64/kernel/vdso/sigreturn.S index 620a3ef837b7..0e18729abc3b 100644 --- a/arch/arm64/kernel/vdso/sigreturn.S +++ b/arch/arm64/kernel/vdso/sigreturn.S @@ -18,29 +18,40 @@ .text +/* + * NOTE!!! You may notice that all of the .cfi directives in this file have + * been commented out. This is because they have been shown to trigger segfaults + * in libgcc when unwinding out of a SIGCANCEL handler to invoke pthread + * cleanup handlers during the thread cancellation dance. By omitting the + * directives, we trigger an arm64-specific fallback path in the unwinder which + * recognises the signal frame and restores many of the registers directly from + * the sigcontext. Re-enabling the cfi directives here therefore needs to be + * much more comprehensive to reduce the risk of further regressions. + */ + /* Ensure that the mysterious NOP can be associated with a function. */ - .cfi_startproc +// .cfi_startproc /* - * .cfi_signal_frame causes the corresponding Frame Description Entry in the - * .eh_frame section to be annotated as a signal frame. This allows DWARF - * unwinders (e.g. libstdc++) to implement _Unwind_GetIPInfo(), which permits - * unwinding out of the signal trampoline without the need for the mysterious - * NOP. + * .cfi_signal_frame causes the corresponding Frame Description Entry (FDE) in + * the .eh_frame section to be annotated as a signal frame. This allows DWARF + * unwinders (e.g. libstdc++) to implement _Unwind_GetIPInfo() and identify + * the next frame using the unmodified return address instead of subtracting 1, + * which may yield the wrong FDE. */ - .cfi_signal_frame +// .cfi_signal_frame /* * Tell the unwinder where to locate the frame record linking back to the - * interrupted context. We don't provide unwind info for registers other - * than the frame pointer and the link register here; in practice, this - * is sufficient for unwinding in C/C++ based runtimes and the values in - * the sigcontext may have been modified by this point anyway. Debuggers + * interrupted context. We don't provide unwind info for registers other than + * the frame pointer and the link register here; in practice, this is likely to + * be insufficient for unwinding in C/C++ based runtimes, especially without a + * means to restore the stack pointer. Thankfully, unwinders and debuggers * already have baked-in strategies for attempting to unwind out of signals. */ - .cfi_def_cfa x29, 0 - .cfi_offset x29, 0 * 8 - .cfi_offset x30, 1 * 8 +// .cfi_def_cfa x29, 0 +// .cfi_offset x29, 0 * 8 +// .cfi_offset x30, 1 * 8 /* * This mysterious NOP is required for some unwinders (e.g. libc++) that @@ -51,16 +62,19 @@ nop // Mysterious NOP /* - * GDB relies on being able to identify the sigreturn instruction sequence to - * unwind from signal handlers. We cannot, therefore, use SYM_FUNC_START() - * here, as it will emit a BTI C instruction and break the unwinder. Thankfully, - * this function is only ever called from a RET and so omitting the landing pad - * is perfectly fine. + * GDB, libgcc and libunwind rely on being able to identify the sigreturn + * instruction sequence to unwind from signal handlers. We cannot, therefore, + * use SYM_FUNC_START() here, as it will emit a BTI C instruction and break the + * unwinder. Thankfully, this function is only ever called from a RET and so + * omitting the landing pad is perfectly fine. */ SYM_CODE_START(__kernel_rt_sigreturn) +// PLEASE DO NOT MODIFY mov x8, #__NR_rt_sigreturn +// PLEASE DO NOT MODIFY svc #0 - .cfi_endproc +// PLEASE DO NOT MODIFY +// .cfi_endproc SYM_CODE_END(__kernel_rt_sigreturn) emit_aarch64_feature_1_and -- cgit v1.2.3 From a39060b009ca0b5b5fe0c0dab85ed437531aab52 Mon Sep 17 00:00:00 2001 From: Will Deacon Date: Mon, 22 Jun 2020 12:35:41 +0100 Subject: arm64: compat: Allow 32-bit vdso and sigpage to co-exist In preparation for removing the signal trampoline from the compat vDSO, allow the sigpage and the compat vDSO to co-exist. For the moment the vDSO signal trampoline will still be used when built. Subsequent patches will move to the sigpage consistently. Acked-by: Dave Martin Reviewed-by: Vincenzo Frascino Reviewed-by: Ard Biesheuvel Reviewed-by: Mark Rutland Signed-off-by: Will Deacon --- arch/arm64/include/asm/mmu.h | 3 +++ arch/arm64/kernel/Makefile | 2 -- arch/arm64/kernel/signal32.c | 2 +- arch/arm64/kernel/vdso.c | 61 ++++++++++++++++++++------------------------ 4 files changed, 32 insertions(+), 36 deletions(-) (limited to 'arch') diff --git a/arch/arm64/include/asm/mmu.h b/arch/arm64/include/asm/mmu.h index 68140fdd89d6..8444df000181 100644 --- a/arch/arm64/include/asm/mmu.h +++ b/arch/arm64/include/asm/mmu.h @@ -19,6 +19,9 @@ typedef struct { atomic64_t id; +#ifdef CONFIG_COMPAT + void *sigpage; +#endif void *vdso; unsigned long flags; } mm_context_t; diff --git a/arch/arm64/kernel/Makefile b/arch/arm64/kernel/Makefile index 151f28521f1e..a561cbb91d4d 100644 --- a/arch/arm64/kernel/Makefile +++ b/arch/arm64/kernel/Makefile @@ -29,9 +29,7 @@ $(obj)/%.stub.o: $(obj)/%.o FORCE obj-$(CONFIG_COMPAT) += sys32.o signal32.o \ sys_compat.o -ifneq ($(CONFIG_COMPAT_VDSO), y) obj-$(CONFIG_COMPAT) += sigreturn32.o -endif obj-$(CONFIG_KUSER_HELPERS) += kuser32.o obj-$(CONFIG_FUNCTION_TRACER) += ftrace.o entry-ftrace.o obj-$(CONFIG_MODULES) += module.o diff --git a/arch/arm64/kernel/signal32.c b/arch/arm64/kernel/signal32.c index 82feca6f7052..0aa0b33744de 100644 --- a/arch/arm64/kernel/signal32.c +++ b/arch/arm64/kernel/signal32.c @@ -371,7 +371,7 @@ static void compat_setup_return(struct pt_regs *regs, struct k_sigaction *ka, if (ka->sa.sa_flags & SA_SIGINFO) idx += 3; - retcode = (unsigned long)current->mm->context.vdso + + retcode = (unsigned long)current->mm->context.sigpage + (idx << 2) + thumb; #endif } diff --git a/arch/arm64/kernel/vdso.c b/arch/arm64/kernel/vdso.c index 4e016574bd91..e546df0efefb 100644 --- a/arch/arm64/kernel/vdso.c +++ b/arch/arm64/kernel/vdso.c @@ -191,15 +191,12 @@ enum aarch32_map { #ifdef CONFIG_COMPAT_VDSO AA32_MAP_VVAR, AA32_MAP_VDSO, -#else - AA32_MAP_SIGPAGE #endif + AA32_MAP_SIGPAGE }; static struct page *aarch32_vectors_page __ro_after_init; -#ifndef CONFIG_COMPAT_VDSO static struct page *aarch32_sig_page __ro_after_init; -#endif static struct vm_special_mapping aarch32_vdso_maps[] = { [AA32_MAP_VECTORS] = { @@ -214,12 +211,11 @@ static struct vm_special_mapping aarch32_vdso_maps[] = { .name = "[vdso]", .mremap = aarch32_vdso_mremap, }, -#else +#endif /* CONFIG_COMPAT_VDSO */ [AA32_MAP_SIGPAGE] = { .name = "[sigpage]", /* ABI */ .pages = &aarch32_sig_page, }, -#endif /* CONFIG_COMPAT_VDSO */ }; static int aarch32_alloc_kuser_vdso_page(void) @@ -242,27 +238,11 @@ static int aarch32_alloc_kuser_vdso_page(void) return 0; } -#ifdef CONFIG_COMPAT_VDSO -static int __aarch32_alloc_vdso_pages(void) -{ - int ret; - - vdso_info[VDSO_ABI_AA32].dm = &aarch32_vdso_maps[AA32_MAP_VVAR]; - vdso_info[VDSO_ABI_AA32].cm = &aarch32_vdso_maps[AA32_MAP_VDSO]; - - ret = __vdso_init(VDSO_ABI_AA32); - if (ret) - return ret; - - return aarch32_alloc_kuser_vdso_page(); -} -#else -static int __aarch32_alloc_vdso_pages(void) +static int aarch32_alloc_sigpage(void) { extern char __aarch32_sigret_code_start[], __aarch32_sigret_code_end[]; int sigret_sz = __aarch32_sigret_code_end - __aarch32_sigret_code_start; unsigned long sigpage; - int ret; sigpage = get_zeroed_page(GFP_ATOMIC); if (!sigpage) @@ -271,18 +251,34 @@ static int __aarch32_alloc_vdso_pages(void) memcpy((void *)sigpage, __aarch32_sigret_code_start, sigret_sz); aarch32_sig_page = virt_to_page(sigpage); flush_dcache_page(aarch32_sig_page); + return 0; +} - ret = aarch32_alloc_kuser_vdso_page(); - if (ret) - free_page(sigpage); +#ifdef CONFIG_COMPAT_VDSO +static int __aarch32_alloc_vdso_pages(void) +{ + vdso_info[VDSO_ABI_AA32].dm = &aarch32_vdso_maps[AA32_MAP_VVAR]; + vdso_info[VDSO_ABI_AA32].cm = &aarch32_vdso_maps[AA32_MAP_VDSO]; - return ret; + return __vdso_init(VDSO_ABI_AA32); } #endif /* CONFIG_COMPAT_VDSO */ static int __init aarch32_alloc_vdso_pages(void) { - return __aarch32_alloc_vdso_pages(); + int ret; + +#ifdef CONFIG_COMPAT_VDSO + ret = __aarch32_alloc_vdso_pages(); + if (ret) + return ret; +#endif + + ret = aarch32_alloc_sigpage(); + if (ret) + return ret; + + return aarch32_alloc_kuser_vdso_page(); } arch_initcall(aarch32_alloc_vdso_pages); @@ -305,7 +301,6 @@ static int aarch32_kuser_helpers_setup(struct mm_struct *mm) return PTR_ERR_OR_ZERO(ret); } -#ifndef CONFIG_COMPAT_VDSO static int aarch32_sigreturn_setup(struct mm_struct *mm) { unsigned long addr; @@ -328,12 +323,11 @@ static int aarch32_sigreturn_setup(struct mm_struct *mm) if (IS_ERR(ret)) goto out; - mm->context.vdso = (void *)addr; + mm->context.sigpage = (void *)addr; out: return PTR_ERR_OR_ZERO(ret); } -#endif /* !CONFIG_COMPAT_VDSO */ int aarch32_setup_additional_pages(struct linux_binprm *bprm, int uses_interp) { @@ -352,10 +346,11 @@ int aarch32_setup_additional_pages(struct linux_binprm *bprm, int uses_interp) mm, bprm, uses_interp); -#else - ret = aarch32_sigreturn_setup(mm); + if (ret) + goto out; #endif /* CONFIG_COMPAT_VDSO */ + ret = aarch32_sigreturn_setup(mm); out: mmap_write_unlock(mm); return ret; -- cgit v1.2.3 From 8e411be6aad1387f40d60cb2c11d3260222c590b Mon Sep 17 00:00:00 2001 From: Will Deacon Date: Mon, 22 Jun 2020 13:09:49 +0100 Subject: arm64: compat: Always use sigpage for sigreturn trampoline The 32-bit sigreturn trampoline in the compat sigpage matches the binary representation of the arch/arm/ sigpage exactly. This is important for debuggers (e.g. GDB) and unwinders (e.g. libunwind) since they rely on matching the instruction sequence in order to identify that they are unwinding through a signal. The same cannot be said for the sigreturn trampoline in the compat vDSO, which defeats the unwinder heuristics and instead attempts to use unwind directives for the unwinding. This is in contrast to arch/arm/, which never uses the vDSO for sigreturn. Ensure compatibility with arch/arm/ and existing unwinders by always using the sigpage for the sigreturn trampoline, regardless of the presence of the compat vDSO. Reviewed-by: Vincenzo Frascino Reviewed-by: Ard Biesheuvel Reviewed-by: Mark Rutland Signed-off-by: Will Deacon --- arch/arm64/kernel/signal32.c | 25 ------------------------- 1 file changed, 25 deletions(-) (limited to 'arch') diff --git a/arch/arm64/kernel/signal32.c b/arch/arm64/kernel/signal32.c index 0aa0b33744de..2f507f565c48 100644 --- a/arch/arm64/kernel/signal32.c +++ b/arch/arm64/kernel/signal32.c @@ -342,30 +342,6 @@ static void compat_setup_return(struct pt_regs *regs, struct k_sigaction *ka, retcode = ptr_to_compat(ka->sa.sa_restorer); } else { /* Set up sigreturn pointer */ -#ifdef CONFIG_COMPAT_VDSO - void *vdso_base = current->mm->context.vdso; - void *vdso_trampoline; - - if (ka->sa.sa_flags & SA_SIGINFO) { - if (thumb) { - vdso_trampoline = VDSO_SYMBOL(vdso_base, - compat_rt_sigreturn_thumb); - } else { - vdso_trampoline = VDSO_SYMBOL(vdso_base, - compat_rt_sigreturn_arm); - } - } else { - if (thumb) { - vdso_trampoline = VDSO_SYMBOL(vdso_base, - compat_sigreturn_thumb); - } else { - vdso_trampoline = VDSO_SYMBOL(vdso_base, - compat_sigreturn_arm); - } - } - - retcode = ptr_to_compat(vdso_trampoline) + thumb; -#else unsigned int idx = thumb << 1; if (ka->sa.sa_flags & SA_SIGINFO) @@ -373,7 +349,6 @@ static void compat_setup_return(struct pt_regs *regs, struct k_sigaction *ka, retcode = (unsigned long)current->mm->context.sigpage + (idx << 2) + thumb; -#endif } regs->regs[0] = usig; -- cgit v1.2.3 From 2d071968a4052e58681ace6488e2625b2a30a7f7 Mon Sep 17 00:00:00 2001 From: Will Deacon Date: Mon, 22 Jun 2020 13:13:58 +0100 Subject: arm64: compat: Remove 32-bit sigreturn code from the vDSO The sigreturn code in the compat vDSO is unused. Remove it. Reviewed-by: Vincenzo Frascino Reviewed-by: Ard Biesheuvel Reviewed-by: Mark Rutland Signed-off-by: Will Deacon --- arch/arm64/kernel/vdso32/Makefile | 1 - arch/arm64/kernel/vdso32/sigreturn.S | 58 ------------------------------------ arch/arm64/kernel/vdso32/vdso.lds.S | 12 -------- 3 files changed, 71 deletions(-) delete mode 100644 arch/arm64/kernel/vdso32/sigreturn.S (limited to 'arch') diff --git a/arch/arm64/kernel/vdso32/Makefile b/arch/arm64/kernel/vdso32/Makefile index 7ea1e827e505..d88148bef6b0 100644 --- a/arch/arm64/kernel/vdso32/Makefile +++ b/arch/arm64/kernel/vdso32/Makefile @@ -140,7 +140,6 @@ hostprogs := $(munge) c-obj-vdso := note.o c-obj-vdso-gettimeofday := vgettimeofday.o -asm-obj-vdso := sigreturn.o ifneq ($(c-gettimeofday-y),) VDSO_CFLAGS_gettimeofday_o += -include $(c-gettimeofday-y) diff --git a/arch/arm64/kernel/vdso32/sigreturn.S b/arch/arm64/kernel/vdso32/sigreturn.S deleted file mode 100644 index b0091064c3d6..000000000000 --- a/arch/arm64/kernel/vdso32/sigreturn.S +++ /dev/null @@ -1,58 +0,0 @@ -/* SPDX-License-Identifier: GPL-2.0 */ -/* - * This file provides both A32 and T32 versions, in accordance with the - * arm sigreturn code. - * - * Please read the comments in arch/arm64/kernel/vdso/sigreturn.S to - * understand some of the craziness in here. - * - * Copyright (C) 2018 ARM Limited - */ - -#include -#include -#include - - .text - - .arm - .fnstart - .save {r0-r15} - .pad #COMPAT_SIGFRAME_REGS_OFFSET - nop -SYM_CODE_START(__kernel_sigreturn_arm) - mov r7, #__NR_compat_sigreturn - svc #0 - .fnend -SYM_CODE_END(__kernel_sigreturn_arm) - - .fnstart - .save {r0-r15} - .pad #COMPAT_RT_SIGFRAME_REGS_OFFSET - nop -SYM_CODE_START(__kernel_rt_sigreturn_arm) - mov r7, #__NR_compat_rt_sigreturn - svc #0 - .fnend -SYM_CODE_END(__kernel_rt_sigreturn_arm) - - .thumb - .fnstart - .save {r0-r15} - .pad #COMPAT_SIGFRAME_REGS_OFFSET - nop -SYM_CODE_START(__kernel_sigreturn_thumb) - mov r7, #__NR_compat_sigreturn - svc #0 - .fnend -SYM_CODE_END(__kernel_sigreturn_thumb) - - .fnstart - .save {r0-r15} - .pad #COMPAT_RT_SIGFRAME_REGS_OFFSET - nop -SYM_CODE_START(__kernel_rt_sigreturn_thumb) - mov r7, #__NR_compat_rt_sigreturn - svc #0 - .fnend -SYM_CODE_END(__kernel_rt_sigreturn_thumb) diff --git a/arch/arm64/kernel/vdso32/vdso.lds.S b/arch/arm64/kernel/vdso32/vdso.lds.S index a3944927eaeb..337d03522048 100644 --- a/arch/arm64/kernel/vdso32/vdso.lds.S +++ b/arch/arm64/kernel/vdso32/vdso.lds.S @@ -64,19 +64,7 @@ VERSION __vdso_clock_gettime; __vdso_gettimeofday; __vdso_clock_getres; - __kernel_sigreturn_arm; - __kernel_sigreturn_thumb; - __kernel_rt_sigreturn_arm; - __kernel_rt_sigreturn_thumb; __vdso_clock_gettime64; local: *; }; } - -/* - * Make the sigreturn code visible to the kernel. - */ -VDSO_compat_sigreturn_arm = __kernel_sigreturn_arm; -VDSO_compat_sigreturn_thumb = __kernel_sigreturn_thumb; -VDSO_compat_rt_sigreturn_arm = __kernel_rt_sigreturn_arm; -VDSO_compat_rt_sigreturn_thumb = __kernel_rt_sigreturn_thumb; -- cgit v1.2.3 From 4dc9b282bf5fc80b1761bac467adf78cd417b777 Mon Sep 17 00:00:00 2001 From: Mark Brown Date: Fri, 19 Jun 2020 13:35:50 +0100 Subject: arm64: Depend on newer binutils when building PAC Versions of binutils prior to 2.33.1 don't understand the ELF notes that are added by modern compilers to indicate the PAC and BTI options used to build the code. This causes them to emit large numbers of warnings in the form: aarch64-linux-gnu-nm: warning: .tmp_vmlinux.kallsyms2: unsupported GNU_PROPERTY_TYPE (5) type: 0xc0000000 during the kernel build which is currently causing quite a bit of disruption for automated build testing using clang. In commit 15cd0e675f3f76b (arm64: Kconfig: ptrauth: Add binutils version check to fix mismatch) we added a dependency on binutils to avoid this issue when building with versions of GCC that emit the notes but did not do so for clang as it was believed that the existing check for .cfi_negate_ra_state was already requiring a new enough binutils. This does not appear to be the case for some versions of binutils (eg, the binutils in Debian 10) so instead refactor so we require a new enough GNU binutils in all cases other than when we are using an old GCC version that does not emit notes. Other, more exotic, combinations of tools are possible such as using clang, lld and gas together are possible and may have further problems but rather than adding further version checks it looks like the most robust thing will be to just test that we can build cleanly with the configured tools but that will require more review and discussion so do this for now to address the immediate problem disrupting build testing. Reported-by: KernelCI Reported-by: Nick Desaulniers Signed-off-by: Mark Brown Reviewed-by: Nick Desaulniers Link: https://github.com/ClangBuiltLinux/linux/issues/1054 Link: https://lore.kernel.org/r/20200619123550.48098-1-broonie@kernel.org Signed-off-by: Will Deacon --- arch/arm64/Kconfig | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'arch') diff --git a/arch/arm64/Kconfig b/arch/arm64/Kconfig index 4ae2419c14a8..e391e6580bf7 100644 --- a/arch/arm64/Kconfig +++ b/arch/arm64/Kconfig @@ -1518,9 +1518,9 @@ config ARM64_PTR_AUTH default y depends on !KVM || ARM64_VHE depends on (CC_HAS_SIGN_RETURN_ADDRESS || CC_HAS_BRANCH_PROT_PAC_RET) && AS_HAS_PAC - # GCC 9.1 and later inserts a .note.gnu.property section note for PAC + # Modern compilers insert a .note.gnu.property section note for PAC # which is only understood by binutils starting with version 2.33.1. - depends on !CC_IS_GCC || GCC_VERSION < 90100 || LD_VERSION >= 233010000 + depends on LD_IS_LLD || LD_VERSION >= 233010000 || (CC_IS_GCC && GCC_VERSION < 90100) depends on !CC_IS_CLANG || AS_HAS_CFI_NEGATE_RA_STATE depends on (!FUNCTION_GRAPH_TRACER || DYNAMIC_FTRACE_WITH_REGS) help -- cgit v1.2.3 From 586745f1598ccf71b0a5a6df2222dee0a865954e Mon Sep 17 00:00:00 2001 From: yu kuai Date: Thu, 4 Jun 2020 20:42:06 +0800 Subject: ARM: imx5: add missing put_device() call in imx_suspend_alloc_ocram() if of_find_device_by_node() succeed, imx_suspend_alloc_ocram() doesn't have a corresponding put_device(). Thus add a jump target to fix the exception handling for this function implementation. Fixes: 1579c7b9fe01 ("ARM: imx53: Set DDR pins to high impedance when in suspend to RAM.") Signed-off-by: yu kuai Signed-off-by: Shawn Guo --- arch/arm/mach-imx/pm-imx5.c | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) (limited to 'arch') diff --git a/arch/arm/mach-imx/pm-imx5.c b/arch/arm/mach-imx/pm-imx5.c index f057df813f83..e9962b48e30c 100644 --- a/arch/arm/mach-imx/pm-imx5.c +++ b/arch/arm/mach-imx/pm-imx5.c @@ -295,14 +295,14 @@ static int __init imx_suspend_alloc_ocram( if (!ocram_pool) { pr_warn("%s: ocram pool unavailable!\n", __func__); ret = -ENODEV; - goto put_node; + goto put_device; } ocram_base = gen_pool_alloc(ocram_pool, size); if (!ocram_base) { pr_warn("%s: unable to alloc ocram!\n", __func__); ret = -ENOMEM; - goto put_node; + goto put_device; } phys = gen_pool_virt_to_phys(ocram_pool, ocram_base); @@ -312,6 +312,8 @@ static int __init imx_suspend_alloc_ocram( if (virt_out) *virt_out = virt; +put_device: + put_device(&pdev->dev); put_node: of_node_put(node); -- cgit v1.2.3 From 4845446036fc9c13f43b54a65c9b757c14f5141b Mon Sep 17 00:00:00 2001 From: yu kuai Date: Thu, 4 Jun 2020 20:54:49 +0800 Subject: ARM: imx6: add missing put_device() call in imx6q_suspend_init() if of_find_device_by_node() succeed, imx6q_suspend_init() doesn't have a corresponding put_device(). Thus add a jump target to fix the exception handling for this function implementation. Signed-off-by: yu kuai Signed-off-by: Shawn Guo --- arch/arm/mach-imx/pm-imx6.c | 10 ++++++---- 1 file changed, 6 insertions(+), 4 deletions(-) (limited to 'arch') diff --git a/arch/arm/mach-imx/pm-imx6.c b/arch/arm/mach-imx/pm-imx6.c index dd34dff13762..40c74b4c4d73 100644 --- a/arch/arm/mach-imx/pm-imx6.c +++ b/arch/arm/mach-imx/pm-imx6.c @@ -493,14 +493,14 @@ static int __init imx6q_suspend_init(const struct imx6_pm_socdata *socdata) if (!ocram_pool) { pr_warn("%s: ocram pool unavailable!\n", __func__); ret = -ENODEV; - goto put_node; + goto put_device; } ocram_base = gen_pool_alloc(ocram_pool, MX6Q_SUSPEND_OCRAM_SIZE); if (!ocram_base) { pr_warn("%s: unable to alloc ocram!\n", __func__); ret = -ENOMEM; - goto put_node; + goto put_device; } ocram_pbase = gen_pool_virt_to_phys(ocram_pool, ocram_base); @@ -523,7 +523,7 @@ static int __init imx6q_suspend_init(const struct imx6_pm_socdata *socdata) ret = imx6_pm_get_base(&pm_info->mmdc_base, socdata->mmdc_compat); if (ret) { pr_warn("%s: failed to get mmdc base %d!\n", __func__, ret); - goto put_node; + goto put_device; } ret = imx6_pm_get_base(&pm_info->src_base, socdata->src_compat); @@ -570,7 +570,7 @@ static int __init imx6q_suspend_init(const struct imx6_pm_socdata *socdata) &imx6_suspend, MX6Q_SUSPEND_OCRAM_SIZE - sizeof(*pm_info)); - goto put_node; + goto put_device; pl310_cache_map_failed: iounmap(pm_info->gpc_base.vbase); @@ -580,6 +580,8 @@ iomuxc_map_failed: iounmap(pm_info->src_base.vbase); src_map_failed: iounmap(pm_info->mmdc_base.vbase); +put_device: + put_device(&pdev->dev); put_node: of_node_put(node); -- cgit v1.2.3 From 49a3b0e1c05ab3601100a723f7ea207dc99a492a Mon Sep 17 00:00:00 2001 From: Will Deacon Date: Wed, 24 Jun 2020 13:23:10 +0100 Subject: arm64: vdso: Only pass --no-eh-frame-hdr when linker supports it Commit 87676cfca141 ("arm64: vdso: Disable dwarf unwinding through the sigreturn trampoline") unconditionally passes the '--no-eh-frame-hdr' option to the linker when building the native vDSO in an attempt to prevent generation of the .eh_frame_hdr section, the presence of which has been implicated in segfaults originating from the libgcc unwinder. Unfortunately, not all versions of binutils support this option, which has been shown to cause build failures in linux-next: | CALL scripts/atomic/check-atomics.sh | CALL scripts/checksyscalls.sh | LD arch/arm64/kernel/vdso/vdso.so.dbg | ld: unrecognized option '--no-eh-frame-hdr' | ld: use the --help option for usage information | arch/arm64/kernel/vdso/Makefile:64: recipe for target | 'arch/arm64/kernel/vdso/vdso.so.dbg' failed | make[1]: *** [arch/arm64/kernel/vdso/vdso.so.dbg] Error 1 | arch/arm64/Makefile:175: recipe for target 'vdso_prepare' failed | make: *** [vdso_prepare] Error 2 Only link the vDSO with '--no-eh-frame-hdr' when the linker supports it. If we end up with the section due to linker defaults, the absence of CFI information in the sigreturn trampoline will prevent the unwinder from breaking. Link: https://lore.kernel.org/r/7a7e31a8-9a7b-2428-ad83-2264f20bdc2d@hisilicon.com Fixes: 87676cfca141 ("arm64: vdso: Disable dwarf unwinding through the sigreturn trampoline") Reported-by: Shaokun Zhang Tested-by: Jon Hunter Signed-off-by: Will Deacon --- arch/arm64/kernel/vdso/Makefile | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) (limited to 'arch') diff --git a/arch/arm64/kernel/vdso/Makefile b/arch/arm64/kernel/vdso/Makefile index 1e5a940532da..97d3d3632093 100644 --- a/arch/arm64/kernel/vdso/Makefile +++ b/arch/arm64/kernel/vdso/Makefile @@ -23,8 +23,9 @@ btildflags-$(CONFIG_ARM64_BTI_KERNEL) += -z force-bti # potential future proofing if we end up with internal calls to the exported # routines, as x86 does (see 6f121e548f83 ("x86, vdso: Reimplement vdso.so # preparation in build-time C")). -ldflags-y := -shared -nostdlib -soname=linux-vdso.so.1 --hash-style=sysv \ - -Bsymbolic --no-eh-frame-hdr --build-id -n $(btildflags-y) -T +ldflags-y := -shared -nostdlib -soname=linux-vdso.so.1 --hash-style=sysv \ + -Bsymbolic $(call ld-option, --no-eh-frame-hdr) --build-id -n \ + $(btildflags-y) -T ccflags-y := -fno-common -fno-builtin -fno-stack-protector -ffixed-x18 ccflags-y += -DDISABLE_BRANCH_PROFILING -- cgit v1.2.3 From e56404e8e475c91489b2cca57f2c1b2bc5edf6b2 Mon Sep 17 00:00:00 2001 From: Alexander Popov Date: Wed, 24 Jun 2020 15:33:28 +0300 Subject: arm64: vdso: Don't use gcc plugins for building vgettimeofday.c Don't use gcc plugins for building arch/arm64/kernel/vdso/vgettimeofday.c to avoid unneeded instrumentation. Signed-off-by: Alexander Popov Link: https://lore.kernel.org/r/20200624123330.83226-4-alex.popov@linux.com Signed-off-by: Will Deacon --- arch/arm64/kernel/vdso/Makefile | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'arch') diff --git a/arch/arm64/kernel/vdso/Makefile b/arch/arm64/kernel/vdso/Makefile index 97d3d3632093..45d5cfe46429 100644 --- a/arch/arm64/kernel/vdso/Makefile +++ b/arch/arm64/kernel/vdso/Makefile @@ -30,7 +30,7 @@ ldflags-y := -shared -nostdlib -soname=linux-vdso.so.1 --hash-style=sysv \ ccflags-y := -fno-common -fno-builtin -fno-stack-protector -ffixed-x18 ccflags-y += -DDISABLE_BRANCH_PROFILING -CFLAGS_REMOVE_vgettimeofday.o = $(CC_FLAGS_FTRACE) -Os $(CC_FLAGS_SCS) +CFLAGS_REMOVE_vgettimeofday.o = $(CC_FLAGS_FTRACE) -Os $(CC_FLAGS_SCS) $(GCC_PLUGINS_CFLAGS) KBUILD_CFLAGS += $(DISABLE_LTO) KASAN_SANITIZE := n UBSAN_SANITIZE := n -- cgit v1.2.3 From 2d21889f8b5c50f65f5162bc972b0b1626b97be2 Mon Sep 17 00:00:00 2001 From: Jean-Philippe Brucker Date: Wed, 24 Jun 2020 13:22:54 +0200 Subject: arm64: Don't insert a BTI instruction at inner labels Some ftrace features are broken since commit 714a8d02ca4d ("arm64: asm: Override SYM_FUNC_START when building the kernel with BTI"). For example the function_graph tracer: $ echo function_graph > /sys/kernel/debug/tracing/current_tracer [ 36.107016] WARNING: CPU: 0 PID: 115 at kernel/trace/ftrace.c:2691 ftrace_modify_all_code+0xc8/0x14c When ftrace_modify_graph_caller() attempts to write a branch at ftrace_graph_call, it finds the "BTI J" instruction inserted by SYM_INNER_LABEL() instead of a NOP, and aborts. It turns out we don't currently need the BTI landing pads inserted by SYM_INNER_LABEL: * ftrace_call and ftrace_graph_call are only used for runtime patching of the active tracer. The patched code is not reached from a branch. * install_el2_stub is reached from a CBZ instruction, which doesn't change PSTATE.BTYPE. * __guest_exit is reached from B instructions in the hyp-entry vectors, which aren't subject to BTI checks either. Remove the BTI annotation from SYM_INNER_LABEL. Fixes: 714a8d02ca4d ("arm64: asm: Override SYM_FUNC_START when building the kernel with BTI") Signed-off-by: Jean-Philippe Brucker Reviewed-by: Mark Brown Link: https://lore.kernel.org/r/20200624112253.1602786-1-jean-philippe@linaro.org Signed-off-by: Will Deacon --- arch/arm64/include/asm/linkage.h | 6 ------ 1 file changed, 6 deletions(-) (limited to 'arch') diff --git a/arch/arm64/include/asm/linkage.h b/arch/arm64/include/asm/linkage.h index 81fefd2a1d02..ba89a9af820a 100644 --- a/arch/arm64/include/asm/linkage.h +++ b/arch/arm64/include/asm/linkage.h @@ -12,7 +12,6 @@ * instead. */ #define BTI_C hint 34 ; -#define BTI_J hint 36 ; /* * When using in-kernel BTI we need to ensure that PCS-conformant assembly @@ -43,11 +42,6 @@ SYM_START(name, SYM_L_WEAK, SYM_A_NONE) \ BTI_C -#define SYM_INNER_LABEL(name, linkage) \ - .type name SYM_T_NONE ASM_NL \ - SYM_ENTRY(name, linkage, SYM_A_NONE) \ - BTI_J - #endif /* -- cgit v1.2.3 From f4617be35b4b547e82d30993f56d631dfc2d5f88 Mon Sep 17 00:00:00 2001 From: Sai Prakash Ranjan Date: Wed, 24 Jun 2020 18:04:06 +0530 Subject: arm64: kpti: Add KRYO{3, 4}XX silver CPU cores to kpti safelist QCOM KRYO{3,4}XX silver/LITTLE CPU cores are based on Cortex-A55 and are meltdown safe, hence add them to kpti_safe_list[]. Signed-off-by: Sai Prakash Ranjan Link: https://lore.kernel.org/r/20200624123406.3472-1-saiprakash.ranjan@codeaurora.org Signed-off-by: Will Deacon --- arch/arm64/kernel/cpufeature.c | 2 ++ 1 file changed, 2 insertions(+) (limited to 'arch') diff --git a/arch/arm64/kernel/cpufeature.c b/arch/arm64/kernel/cpufeature.c index 4ae41670c2e6..9f63053a63a9 100644 --- a/arch/arm64/kernel/cpufeature.c +++ b/arch/arm64/kernel/cpufeature.c @@ -1290,6 +1290,8 @@ static bool unmap_kernel_at_el0(const struct arm64_cpu_capabilities *entry, MIDR_ALL_VERSIONS(MIDR_CORTEX_A73), MIDR_ALL_VERSIONS(MIDR_HISI_TSV110), MIDR_ALL_VERSIONS(MIDR_NVIDIA_CARMEL), + MIDR_ALL_VERSIONS(MIDR_QCOM_KRYO_3XX_SILVER), + MIDR_ALL_VERSIONS(MIDR_QCOM_KRYO_4XX_SILVER), { /* sentinel */ } }; char const *str = "kpti command line option"; -- cgit v1.2.3 From e3a9e681adb779b39565a28b3252c3be1033f994 Mon Sep 17 00:00:00 2001 From: Peter Zijlstra Date: Wed, 17 Jun 2020 18:21:16 +0200 Subject: x86/entry: Fixup bad_iret vs noinstr vmlinux.o: warning: objtool: fixup_bad_iret()+0x8e: call to memcpy() leaves .noinstr.text section Worse, when KASAN there is no telling what memcpy() actually is. Force the use of __memcpy() which is our assmebly implementation. Reported-by: Marco Elver Suggested-by: Marco Elver Signed-off-by: Peter Zijlstra (Intel) Tested-by: Marco Elver Link: https://lkml.kernel.org/r/20200618144801.760070502@infradead.org --- arch/x86/kernel/traps.c | 6 +++--- arch/x86/lib/memcpy_64.S | 4 ++++ 2 files changed, 7 insertions(+), 3 deletions(-) (limited to 'arch') diff --git a/arch/x86/kernel/traps.c b/arch/x86/kernel/traps.c index af75109485c2..a7d157090572 100644 --- a/arch/x86/kernel/traps.c +++ b/arch/x86/kernel/traps.c @@ -690,13 +690,13 @@ struct bad_iret_stack *fixup_bad_iret(struct bad_iret_stack *s) (struct bad_iret_stack *)__this_cpu_read(cpu_tss_rw.x86_tss.sp0) - 1; /* Copy the IRET target to the temporary storage. */ - memcpy(&tmp.regs.ip, (void *)s->regs.sp, 5*8); + __memcpy(&tmp.regs.ip, (void *)s->regs.sp, 5*8); /* Copy the remainder of the stack from the current stack. */ - memcpy(&tmp, s, offsetof(struct bad_iret_stack, regs.ip)); + __memcpy(&tmp, s, offsetof(struct bad_iret_stack, regs.ip)); /* Update the entry stack */ - memcpy(new_stack, &tmp, sizeof(tmp)); + __memcpy(new_stack, &tmp, sizeof(tmp)); BUG_ON(!user_mode(&new_stack->regs)); return new_stack; diff --git a/arch/x86/lib/memcpy_64.S b/arch/x86/lib/memcpy_64.S index 56b243b14c3a..bbcc05bcefad 100644 --- a/arch/x86/lib/memcpy_64.S +++ b/arch/x86/lib/memcpy_64.S @@ -8,6 +8,8 @@ #include #include +.pushsection .noinstr.text, "ax" + /* * We build a jump to memcpy_orig by default which gets NOPped out on * the majority of x86 CPUs which set REP_GOOD. In addition, CPUs which @@ -184,6 +186,8 @@ SYM_FUNC_START_LOCAL(memcpy_orig) retq SYM_FUNC_END(memcpy_orig) +.popsection + #ifndef CONFIG_UML MCSAFE_TEST_CTL -- cgit v1.2.3 From c7aadc09321d8f9a1d3bd1e6d8a47222ecddf6c5 Mon Sep 17 00:00:00 2001 From: Peter Zijlstra Date: Wed, 17 Jun 2020 18:25:57 +0200 Subject: x86/entry: Increase entry_stack size to a full page Marco crashed in bad_iret with a Clang11/KCSAN build due to overflowing the stack. Now that we run C code on it, expand it to a full page. Suggested-by: Andy Lutomirski Reported-by: Marco Elver Signed-off-by: Peter Zijlstra (Intel) Reviewed-by: Lai Jiangshan Tested-by: Marco Elver Link: https://lkml.kernel.org/r/20200618144801.819246178@infradead.org --- arch/x86/include/asm/processor.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'arch') diff --git a/arch/x86/include/asm/processor.h b/arch/x86/include/asm/processor.h index 42cd333616c4..03b7c4ca425a 100644 --- a/arch/x86/include/asm/processor.h +++ b/arch/x86/include/asm/processor.h @@ -370,7 +370,7 @@ struct x86_hw_tss { #define IO_BITMAP_OFFSET_INVALID (__KERNEL_TSS_LIMIT + 1) struct entry_stack { - unsigned long words[64]; + char stack[PAGE_SIZE]; }; struct entry_stack_page { -- cgit v1.2.3 From 145a773aef83181d47ebab21bb33c89233aadb1e Mon Sep 17 00:00:00 2001 From: Peter Zijlstra Date: Tue, 16 Jun 2020 13:28:36 +0200 Subject: x86/entry: Fix #UD vs WARN more vmlinux.o: warning: objtool: exc_invalid_op()+0x47: call to probe_kernel_read() leaves .noinstr.text section Since we use UD2 as a short-cut for 'CALL __WARN', treat it as such. Have the bare exception handler do the report_bug() thing. Fixes: 15a416e8aaa7 ("x86/entry: Treat BUG/WARN as NMI-like entries") Signed-off-by: Peter Zijlstra (Intel) Acked-by: Andy Lutomirski Link: https://lkml.kernel.org/r/20200622114713.GE577403@hirez.programming.kicks-ass.net --- arch/x86/kernel/traps.c | 72 ++++++++++++++++++++++++++----------------------- 1 file changed, 38 insertions(+), 34 deletions(-) (limited to 'arch') diff --git a/arch/x86/kernel/traps.c b/arch/x86/kernel/traps.c index a7d157090572..1d9ea2101b97 100644 --- a/arch/x86/kernel/traps.c +++ b/arch/x86/kernel/traps.c @@ -84,17 +84,16 @@ static inline void cond_local_irq_disable(struct pt_regs *regs) local_irq_disable(); } -int is_valid_bugaddr(unsigned long addr) +__always_inline int is_valid_bugaddr(unsigned long addr) { - unsigned short ud; - if (addr < TASK_SIZE_MAX) return 0; - if (probe_kernel_address((unsigned short *)addr, ud)) - return 0; - - return ud == INSN_UD0 || ud == INSN_UD2; + /* + * We got #UD, if the text isn't readable we'd have gotten + * a different exception. + */ + return *(unsigned short *)addr == INSN_UD2; } static nokprobe_inline int @@ -216,40 +215,45 @@ static inline void handle_invalid_op(struct pt_regs *regs) ILL_ILLOPN, error_get_trap_addr(regs)); } -DEFINE_IDTENTRY_RAW(exc_invalid_op) +static noinstr bool handle_bug(struct pt_regs *regs) { - bool rcu_exit; + bool handled = false; + + if (!is_valid_bugaddr(regs->ip)) + return handled; /* - * Handle BUG/WARN like NMIs instead of like normal idtentries: - * if we bugged/warned in a bad RCU context, for example, the last - * thing we want is to BUG/WARN again in the idtentry code, ad - * infinitum. + * All lies, just get the WARN/BUG out. */ - if (!user_mode(regs) && is_valid_bugaddr(regs->ip)) { - enum bug_trap_type type; + instrumentation_begin(); + /* + * Since we're emulating a CALL with exceptions, restore the interrupt + * state to what it was at the exception site. + */ + if (regs->flags & X86_EFLAGS_IF) + raw_local_irq_enable(); + if (report_bug(regs->ip, regs) == BUG_TRAP_TYPE_WARN) { + regs->ip += LEN_UD2; + handled = true; + } + if (regs->flags & X86_EFLAGS_IF) + raw_local_irq_disable(); + instrumentation_end(); - nmi_enter(); - instrumentation_begin(); - trace_hardirqs_off_finish(); - type = report_bug(regs->ip, regs); - if (regs->flags & X86_EFLAGS_IF) - trace_hardirqs_on_prepare(); - instrumentation_end(); - nmi_exit(); + return handled; +} - if (type == BUG_TRAP_TYPE_WARN) { - /* Skip the ud2. */ - regs->ip += LEN_UD2; - return; - } +DEFINE_IDTENTRY_RAW(exc_invalid_op) +{ + bool rcu_exit; - /* - * Else, if this was a BUG and report_bug returns or if this - * was just a normal #UD, we want to continue onward and - * crash. - */ - } + /* + * We use UD2 as a short encoding for 'CALL __WARN', as such + * handle it before exception entry to avoid recursive WARN + * in case exception entry is the one triggering WARNs. + */ + if (!user_mode(regs) && handle_bug(regs)) + return; rcu_exit = idtentry_enter_cond_rcu(regs); instrumentation_begin(); -- cgit v1.2.3 From 8dfe804a4031ca6ba3a3efb2048534249b64f3a5 Mon Sep 17 00:00:00 2001 From: Jiping Ma Date: Mon, 11 May 2020 10:52:07 +0800 Subject: arm64: perf: Report the PC value in REGS_ABI_32 mode A 32-bit perf querying the registers of a compat task using REGS_ABI_32 will receive zeroes from w15, when it expects to find the PC. Return the PC value for register dwarf register 15 when returning register values for a compat task to perf. Cc: Acked-by: Mark Rutland Signed-off-by: Jiping Ma Link: https://lore.kernel.org/r/1589165527-188401-1-git-send-email-jiping.ma2@windriver.com [will: Shuffled code and added a comment] Signed-off-by: Will Deacon --- arch/arm64/kernel/perf_regs.c | 25 ++++++++++++++++++++++--- 1 file changed, 22 insertions(+), 3 deletions(-) (limited to 'arch') diff --git a/arch/arm64/kernel/perf_regs.c b/arch/arm64/kernel/perf_regs.c index 0bbac612146e..666b225aeb3a 100644 --- a/arch/arm64/kernel/perf_regs.c +++ b/arch/arm64/kernel/perf_regs.c @@ -15,15 +15,34 @@ u64 perf_reg_value(struct pt_regs *regs, int idx) return 0; /* - * Compat (i.e. 32 bit) mode: - * - PC has been set in the pt_regs struct in kernel_entry, - * - Handle SP and LR here. + * Our handling of compat tasks (PERF_SAMPLE_REGS_ABI_32) is weird, but + * we're stuck with it for ABI compatability reasons. + * + * For a 32-bit consumer inspecting a 32-bit task, then it will look at + * the first 16 registers (see arch/arm/include/uapi/asm/perf_regs.h). + * These correspond directly to a prefix of the registers saved in our + * 'struct pt_regs', with the exception of the PC, so we copy that down + * (x15 corresponds to SP_hyp in the architecture). + * + * So far, so good. + * + * The oddity arises when a 64-bit consumer looks at a 32-bit task and + * asks for registers beyond PERF_REG_ARM_MAX. In this case, we return + * SP_usr, LR_usr and PC in the positions where the AArch64 SP, LR and + * PC registers would normally live. The initial idea was to allow a + * 64-bit unwinder to unwind a 32-bit task and, although it's not clear + * how well that works in practice, somebody might be relying on it. + * + * At the time we make a sample, we don't know whether the consumer is + * 32-bit or 64-bit, so we have to cater for both possibilities. */ if (compat_user_mode(regs)) { if ((u32)idx == PERF_REG_ARM64_SP) return regs->compat_sp; if ((u32)idx == PERF_REG_ARM64_LR) return regs->compat_lr; + if (idx == 15) + return regs->pc; } if ((u32)idx == PERF_REG_ARM64_SP) -- cgit v1.2.3 From 108447fd0d1a34b0929cd26dc637c917a734ebab Mon Sep 17 00:00:00 2001 From: Sai Prakash Ranjan Date: Thu, 25 Jun 2020 16:01:23 +0530 Subject: arm64: Add KRYO{3,4}XX silver CPU cores to SSB safelist QCOM KRYO{3,4}XX silver/LITTLE CPU cores are based on Cortex-A55 and are SSB safe, hence add them to SSB safelist -> arm64_ssb_cpus[]. Reported-by: Stephen Boyd Signed-off-by: Sai Prakash Ranjan Reviewed-by: Douglas Anderson Link: https://lore.kernel.org/r/20200625103123.7240-1-saiprakash.ranjan@codeaurora.org Signed-off-by: Will Deacon --- arch/arm64/kernel/cpu_errata.c | 2 ++ 1 file changed, 2 insertions(+) (limited to 'arch') diff --git a/arch/arm64/kernel/cpu_errata.c b/arch/arm64/kernel/cpu_errata.c index ad06d6802d2e..cf50c53e9357 100644 --- a/arch/arm64/kernel/cpu_errata.c +++ b/arch/arm64/kernel/cpu_errata.c @@ -460,6 +460,8 @@ static const struct midr_range arm64_ssb_cpus[] = { MIDR_ALL_VERSIONS(MIDR_CORTEX_A53), MIDR_ALL_VERSIONS(MIDR_CORTEX_A55), MIDR_ALL_VERSIONS(MIDR_BRAHMA_B53), + MIDR_ALL_VERSIONS(MIDR_QCOM_KRYO_3XX_SILVER), + MIDR_ALL_VERSIONS(MIDR_QCOM_KRYO_4XX_SILVER), {}, }; -- cgit v1.2.3 From a0fc3b32893b29a7b3a2771b6d63bae16cb1e8de Mon Sep 17 00:00:00 2001 From: Vincent Chen Date: Tue, 23 Jun 2020 09:13:22 +0800 Subject: riscv: Add -fPIC option to CFLAGS_vgettimeofday.o The time related vDSO functions use a variable, vdso_data, to access the vDSO data page to get the system time information. Because the vdso_data for CFLAGS_vgettimeofday.o is an external variable defined in vdso.o, the CFLAGS_vgettimeofday.o should be compiled with -fPIC to ensure that vdso_data is addressable. Reported-by: kernel test robot Signed-off-by: Vincent Chen Signed-off-by: Palmer Dabbelt --- arch/riscv/kernel/vdso/Makefile | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'arch') diff --git a/arch/riscv/kernel/vdso/Makefile b/arch/riscv/kernel/vdso/Makefile index 38ba55b0eb9d..29cf052f6541 100644 --- a/arch/riscv/kernel/vdso/Makefile +++ b/arch/riscv/kernel/vdso/Makefile @@ -17,7 +17,7 @@ vdso-syms += flush_icache obj-vdso = $(patsubst %, %.o, $(vdso-syms)) note.o ifneq ($(c-gettimeofday-y),) - CFLAGS_vgettimeofday.o += -include $(c-gettimeofday-y) + CFLAGS_vgettimeofday.o += -fPIC -include $(c-gettimeofday-y) endif # Build rules -- cgit v1.2.3 From e93b327dbf3d37f0dfb123b58f9627ad17be652e Mon Sep 17 00:00:00 2001 From: Vincent Chen Date: Tue, 23 Jun 2020 13:40:21 +0800 Subject: riscv: Add extern declarations for vDSO time-related functions Add extern declarations for vDSO time-related functions to notify the compiler these functions will be used in somewhere to avoid "no previous prototype" compile warning. Reported-by: kernel test robot Signed-off-by: Vincent Chen Signed-off-by: Palmer Dabbelt --- arch/riscv/kernel/vdso/vgettimeofday.c | 6 ++++++ 1 file changed, 6 insertions(+) (limited to 'arch') diff --git a/arch/riscv/kernel/vdso/vgettimeofday.c b/arch/riscv/kernel/vdso/vgettimeofday.c index d264943e2e47..cc0d80699c31 100644 --- a/arch/riscv/kernel/vdso/vgettimeofday.c +++ b/arch/riscv/kernel/vdso/vgettimeofday.c @@ -9,16 +9,22 @@ #include #include +extern +int __vdso_clock_gettime(clockid_t clock, struct __kernel_timespec *ts); int __vdso_clock_gettime(clockid_t clock, struct __kernel_timespec *ts) { return __cvdso_clock_gettime(clock, ts); } +extern +int __vdso_gettimeofday(struct __kernel_old_timeval *tv, struct timezone *tz); int __vdso_gettimeofday(struct __kernel_old_timeval *tv, struct timezone *tz) { return __cvdso_gettimeofday(tv, tz); } +extern +int __vdso_clock_getres(clockid_t clock_id, struct __kernel_timespec *res); int __vdso_clock_getres(clockid_t clock_id, struct __kernel_timespec *res) { return __cvdso_clock_getres(clock_id, res); -- cgit v1.2.3 From e05d57dcb8c71492268ff46ba9bfe9a9cfb1f95d Mon Sep 17 00:00:00 2001 From: Guo Ren Date: Tue, 23 Jun 2020 09:50:54 +0000 Subject: riscv: Fixup __vdso_gettimeofday broke dynamic ftrace For linux-5.8-rc1, enable ftrace of riscv will cause boot panic: [ 2.388980] Run /sbin/init as init process [ 2.529938] init[39]: unhandled signal 4 code 0x1 at 0x0000003ff449e000 [ 2.531078] CPU: 0 PID: 39 Comm: init Not tainted 5.8.0-rc1-dirty #13 [ 2.532719] epc: 0000003ff449e000 ra : 0000003ff449e954 sp : 0000003fffedb900 [ 2.534005] gp : 00000000000e8528 tp : 0000003ff449d800 t0 : 000000000000001e [ 2.534965] t1 : 000000000000000a t2 : 0000003fffedb89e s0 : 0000003fffedb920 [ 2.536279] s1 : 0000003fffedb940 a0 : 0000003ff43d4b2c a1 : 0000000000000000 [ 2.537334] a2 : 0000000000000001 a3 : 0000000000000000 a4 : fffffffffbad8000 [ 2.538466] a5 : 0000003ff449e93a a6 : 0000000000000000 a7 : 0000000000000000 [ 2.539511] s2 : 0000000000000000 s3 : 0000003ff448412c s4 : 0000000000000010 [ 2.541260] s5 : 0000000000000016 s6 : 00000000000d0a30 s7 : 0000003fffedba70 [ 2.542152] s8 : 0000000000000000 s9 : 0000000000000000 s10: 0000003fffedb960 [ 2.543335] s11: 0000000000000000 t3 : 0000000000000000 t4 : 0000003fffedb8a0 [ 2.544471] t5 : 0000000000000000 t6 : 0000000000000000 [ 2.545730] status: 0000000000004020 badaddr: 00000000464c457f cause: 0000000000000002 [ 2.549867] Kernel panic - not syncing: Attempted to kill init! exitcode=0x00000004 [ 2.551267] CPU: 0 PID: 1 Comm: init Not tainted 5.8.0-rc1-dirty #13 [ 2.552061] Call Trace: [ 2.552626] [] walk_stackframe+0x0/0xc4 [ 2.553486] [] show_stack+0x40/0x4c [ 2.553995] [] dump_stack+0x7a/0x98 [ 2.554615] [] panic+0x114/0x2f4 [ 2.555395] [] do_exit+0x89c/0x8c2 [ 2.555949] [] do_group_exit+0x3a/0x90 [ 2.556715] [] get_signal+0xe2/0x6e6 [ 2.557388] [] do_notify_resume+0x6a/0x37a [ 2.558089] [] ret_from_exception+0x0/0xc "ra:0x3ff449e954" is the return address of "call _mcount" in the prologue of __vdso_gettimeofday(). Without proper relocate, pc jmp to 0x0000003ff449e000 (vdso map base) with a illegal instruction trap. The solution comes from arch/arm64/kernel/vdso/Makefile: CFLAGS_REMOVE_vgettimeofday.o = $(CC_FLAGS_FTRACE) -Os $(CC_FLAGS_SCS) - CC_FLAGS_SCS is ShadowCallStack feature in Clang and only implemented for arm64, no use for riscv. Fixes: ad5d1122b82f ("riscv: use vDSO common flow to reduce the latency of the time-related functions") Cc: stable@vger.kernel.org Signed-off-by: Guo Ren Reviewed-by: Vincent Chen Signed-off-by: Palmer Dabbelt --- arch/riscv/kernel/vdso/Makefile | 3 +++ 1 file changed, 3 insertions(+) (limited to 'arch') diff --git a/arch/riscv/kernel/vdso/Makefile b/arch/riscv/kernel/vdso/Makefile index 29cf052f6541..e4c7c2c8a02f 100644 --- a/arch/riscv/kernel/vdso/Makefile +++ b/arch/riscv/kernel/vdso/Makefile @@ -27,6 +27,9 @@ obj-vdso := $(addprefix $(obj)/, $(obj-vdso)) obj-y += vdso.o vdso-syms.o CPPFLAGS_vdso.lds += -P -C -U$(ARCH) +# Disable -pg to prevent insert call site +CFLAGS_REMOVE_vgettimeofday.o = $(CC_FLAGS_FTRACE) -Os + # Disable gcov profiling for VDSO code GCOV_PROFILE := n -- cgit v1.2.3 From 313a5257b84c26b7f080c5d294aabe7d38ca439c Mon Sep 17 00:00:00 2001 From: Stafford Horne Date: Thu, 25 Jun 2020 20:29:17 -0700 Subject: openrisc: fix boot oops when DEBUG_VM is enabled Since v5.8-rc1 OpenRISC Linux fails to boot when DEBUG_VM is enabled. This has been bisected to commit 42fc541404f2 ("mmap locking API: add mmap_assert_locked() and mmap_assert_write_locked()"). The added locking checks exposed the issue that OpenRISC was not taking this mmap lock when during page walks for DMA operations. This patch locks and unlocks the mmap lock for page walking. Link: http://lkml.kernel.org/r/20200617090247.1680188-1-shorne@gmail.com Fixes: 42fc541404f2 ("mmap locking API: add mmap_assert_locked() and mmap_assert_write_locked()" Signed-off-by: Stafford Horne Reviewed-by: Michel Lespinasse Cc: Jonas Bonn Cc: Stefan Kristiansson Cc: Jason Gunthorpe Cc: Steven Price Cc: Thomas Hellstrom Cc: Robin Murphy Cc: Vlastimil Babka Cc: Daniel Jordan Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- arch/openrisc/kernel/dma.c | 5 +++++ 1 file changed, 5 insertions(+) (limited to 'arch') diff --git a/arch/openrisc/kernel/dma.c b/arch/openrisc/kernel/dma.c index c152a68811dd..345727638d52 100644 --- a/arch/openrisc/kernel/dma.c +++ b/arch/openrisc/kernel/dma.c @@ -74,8 +74,11 @@ void *arch_dma_set_uncached(void *cpu_addr, size_t size) * We need to iterate through the pages, clearing the dcache for * them and setting the cache-inhibit bit. */ + mmap_read_lock(&init_mm); error = walk_page_range(&init_mm, va, va + size, &set_nocache_walk_ops, NULL); + mmap_read_unlock(&init_mm); + if (error) return ERR_PTR(error); return cpu_addr; @@ -85,9 +88,11 @@ void arch_dma_clear_uncached(void *cpu_addr, size_t size) { unsigned long va = (unsigned long)cpu_addr; + mmap_read_lock(&init_mm); /* walk_page_range shouldn't be able to fail here */ WARN_ON(walk_page_range(&init_mm, va, va + size, &clear_nocache_walk_ops, NULL)); + mmap_read_unlock(&init_mm); } void arch_sync_dma_for_device(phys_addr_t addr, size_t size, -- cgit v1.2.3 From 800e26b81311dcc0080b8784f80620bb8f2baaa5 Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Thu, 25 Jun 2020 20:30:40 -0700 Subject: x86/hyperv: allocate the hypercall page with only read and execute bits Patch series "fix a hyperv W^X violation and remove vmalloc_exec" Dexuan reported a W^X violation due to the fact that the hyper hypercall page due switching it to be allocated using vmalloc_exec. The problem is that PAGE_KERNEL_EXEC as used by vmalloc_exec actually sets writable permissions in the pte. This series fixes the issue by switching to the low-level __vmalloc_node_range interface that allows specifing more detailed permissions instead. It then also open codes the other two callers and removes the somewhat confusing vmalloc_exec interface. Peter noted that the hyper hypercall page allocation also has another long standing issue in that it shouldn't use the full vmalloc but just the module space. This issue is so far theoretical as the allocation is done early in the boot process. I plan to fix it with another bigger series for 5.9. This patch (of 3): Avoid a W^X violation cause by the fact that PAGE_KERNEL_EXEC includes the writable bit. For this resurrect the removed PAGE_KERNEL_RX definition, but as PAGE_KERNEL_ROX to match arm64 and powerpc. Link: http://lkml.kernel.org/r/20200618064307.32739-2-hch@lst.de Fixes: 78bb17f76edc ("x86/hyperv: use vmalloc_exec for the hypercall page") Signed-off-by: Christoph Hellwig Reported-by: Dexuan Cui Tested-by: Vitaly Kuznetsov Acked-by: Wei Liu Acked-by: Peter Zijlstra (Intel) Cc: Catalin Marinas Cc: Will Deacon Cc: Jessica Yu Cc: David Hildenbrand Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- arch/x86/hyperv/hv_init.c | 4 +++- arch/x86/include/asm/pgtable_types.h | 2 ++ 2 files changed, 5 insertions(+), 1 deletion(-) (limited to 'arch') diff --git a/arch/x86/hyperv/hv_init.c b/arch/x86/hyperv/hv_init.c index a54c6a401581..2bdc72e6890e 100644 --- a/arch/x86/hyperv/hv_init.c +++ b/arch/x86/hyperv/hv_init.c @@ -375,7 +375,9 @@ void __init hyperv_init(void) guest_id = generate_guest_id(0, LINUX_VERSION_CODE, 0); wrmsrl(HV_X64_MSR_GUEST_OS_ID, guest_id); - hv_hypercall_pg = vmalloc_exec(PAGE_SIZE); + hv_hypercall_pg = __vmalloc_node_range(PAGE_SIZE, 1, VMALLOC_START, + VMALLOC_END, GFP_KERNEL, PAGE_KERNEL_ROX, + VM_FLUSH_RESET_PERMS, NUMA_NO_NODE, __func__); if (hv_hypercall_pg == NULL) { wrmsrl(HV_X64_MSR_GUEST_OS_ID, 0); goto remove_cpuhp_state; diff --git a/arch/x86/include/asm/pgtable_types.h b/arch/x86/include/asm/pgtable_types.h index 2da1f95b88d7..816b31c68550 100644 --- a/arch/x86/include/asm/pgtable_types.h +++ b/arch/x86/include/asm/pgtable_types.h @@ -194,6 +194,7 @@ enum page_cache_mode { #define _PAGE_TABLE_NOENC (__PP|__RW|_USR|___A| 0|___D| 0| 0) #define _PAGE_TABLE (__PP|__RW|_USR|___A| 0|___D| 0| 0| _ENC) #define __PAGE_KERNEL_RO (__PP| 0| 0|___A|__NX|___D| 0|___G) +#define __PAGE_KERNEL_ROX (__PP| 0| 0|___A| 0|___D| 0|___G) #define __PAGE_KERNEL_NOCACHE (__PP|__RW| 0|___A|__NX|___D| 0|___G| __NC) #define __PAGE_KERNEL_VVAR (__PP| 0|_USR|___A|__NX|___D| 0|___G) #define __PAGE_KERNEL_LARGE (__PP|__RW| 0|___A|__NX|___D|_PSE|___G) @@ -219,6 +220,7 @@ enum page_cache_mode { #define PAGE_KERNEL_RO __pgprot_mask(__PAGE_KERNEL_RO | _ENC) #define PAGE_KERNEL_EXEC __pgprot_mask(__PAGE_KERNEL_EXEC | _ENC) #define PAGE_KERNEL_EXEC_NOENC __pgprot_mask(__PAGE_KERNEL_EXEC | 0) +#define PAGE_KERNEL_ROX __pgprot_mask(__PAGE_KERNEL_ROX | _ENC) #define PAGE_KERNEL_NOCACHE __pgprot_mask(__PAGE_KERNEL_NOCACHE | _ENC) #define PAGE_KERNEL_LARGE __pgprot_mask(__PAGE_KERNEL_LARGE | _ENC) #define PAGE_KERNEL_LARGE_EXEC __pgprot_mask(__PAGE_KERNEL_LARGE_EXEC | _ENC) -- cgit v1.2.3 From 10d5e97c1bf816facbc7c431c6caf47ee35fc1ed Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Thu, 25 Jun 2020 20:30:43 -0700 Subject: arm64: use PAGE_KERNEL_ROX directly in alloc_insn_page Use PAGE_KERNEL_ROX directly instead of allocating RWX and setting the page read-only just after the allocation. Link: http://lkml.kernel.org/r/20200618064307.32739-3-hch@lst.de Signed-off-by: Christoph Hellwig Acked-by: David Hildenbrand Acked-by: Peter Zijlstra (Intel) Cc: Catalin Marinas Cc: Dexuan Cui Cc: Jessica Yu Cc: Vitaly Kuznetsov Cc: Wei Liu Cc: Will Deacon Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- arch/arm64/kernel/probes/kprobes.c | 12 +++--------- 1 file changed, 3 insertions(+), 9 deletions(-) (limited to 'arch') diff --git a/arch/arm64/kernel/probes/kprobes.c b/arch/arm64/kernel/probes/kprobes.c index d1c95dcf1d78..cbe49cd117cf 100644 --- a/arch/arm64/kernel/probes/kprobes.c +++ b/arch/arm64/kernel/probes/kprobes.c @@ -120,15 +120,9 @@ int __kprobes arch_prepare_kprobe(struct kprobe *p) void *alloc_insn_page(void) { - void *page; - - page = vmalloc_exec(PAGE_SIZE); - if (page) { - set_memory_ro((unsigned long)page, 1); - set_vm_flush_reset_perms(page); - } - - return page; + return __vmalloc_node_range(PAGE_SIZE, 1, VMALLOC_START, VMALLOC_END, + GFP_KERNEL, PAGE_KERNEL_ROX, VM_FLUSH_RESET_PERMS, + NUMA_NO_NODE, __func__); } /* arm kprobe: install breakpoint in text */ -- cgit v1.2.3 From 0f77ce26ebcf6ea384421d2dd47b924b83649692 Mon Sep 17 00:00:00 2001 From: Patrice Chotard Date: Thu, 18 Jun 2020 19:24:56 +0200 Subject: Revert "ARM: sti: Implement dummy L2 cache's write_sec" This reverts commit 7b8e0188fa717cd9abc4fb52587445b421835c2a. Initially, STiH410-B2260 was supposed to be secured, that's why l2c_write_sec was stubbed to avoid secure register access from non secure world. But by default, STiH410-B2260 is running in non secure mode, so L2 cache register accesses are authorized, l2c_write_sec stub is not needed. With this patch, L2 cache is configured and performance are enhanced. Link: https://lore.kernel.org/r/20200618172456.29475-1-patrice.chotard@st.com Signed-off-by: Patrice Chotard Cc: Alain Volmat Signed-off-by: Arnd Bergmann --- arch/arm/mach-sti/board-dt.c | 9 --------- 1 file changed, 9 deletions(-) (limited to 'arch') diff --git a/arch/arm/mach-sti/board-dt.c b/arch/arm/mach-sti/board-dt.c index dcb98937fcf5..ffecbf29646f 100644 --- a/arch/arm/mach-sti/board-dt.c +++ b/arch/arm/mach-sti/board-dt.c @@ -20,14 +20,6 @@ static const char *const stih41x_dt_match[] __initconst = { NULL }; -static void sti_l2_write_sec(unsigned long val, unsigned reg) -{ - /* - * We can't write to secure registers as we are in non-secure - * mode, until we have some SMI service available. - */ -} - DT_MACHINE_START(STM, "STi SoC with Flattened Device Tree") .dt_compat = stih41x_dt_match, .l2c_aux_val = L2C_AUX_CTRL_SHARED_OVERRIDE | @@ -36,5 +28,4 @@ DT_MACHINE_START(STM, "STi SoC with Flattened Device Tree") L2C_AUX_CTRL_WAY_SIZE(4), .l2c_aux_mask = 0xc0000fff, .smp = smp_ops(sti_smp_ops), - .l2c_write_sec = sti_l2_write_sec, MACHINE_END -- cgit v1.2.3