summaryrefslogtreecommitdiffstats
path: root/arch/riscv
diff options
context:
space:
mode:
Diffstat (limited to 'arch/riscv')
-rw-r--r--arch/riscv/Kconfig22
-rw-r--r--arch/riscv/Kconfig.erratas1
-rw-r--r--arch/riscv/Kconfig.socs4
-rw-r--r--arch/riscv/boot/dts/canaan/k210.dtsi3
-rw-r--r--arch/riscv/boot/dts/microchip/microchip-mpfs-fabric.dtsi25
-rw-r--r--arch/riscv/boot/dts/microchip/microchip-mpfs-icicle-kit.dts115
-rw-r--r--arch/riscv/boot/dts/microchip/microchip-mpfs.dtsi254
-rw-r--r--arch/riscv/configs/defconfig1
-rw-r--r--arch/riscv/configs/nommu_k210_sdcard_defconfig2
-rw-r--r--arch/riscv/configs/rv32_defconfig1
-rw-r--r--arch/riscv/include/asm/Kbuild1
-rw-r--r--arch/riscv/include/asm/csr.h67
-rw-r--r--arch/riscv/include/asm/fixmap.h1
-rw-r--r--arch/riscv/include/asm/hwcap.h26
-rw-r--r--arch/riscv/include/asm/kvm_host.h1
-rw-r--r--arch/riscv/include/asm/kvm_vcpu_sbi.h5
-rw-r--r--arch/riscv/include/asm/page.h3
-rw-r--r--arch/riscv/include/asm/perf_event.h72
-rw-r--r--arch/riscv/include/asm/pgalloc.h49
-rw-r--r--arch/riscv/include/asm/pgtable-64.h106
-rw-r--r--arch/riscv/include/asm/pgtable.h7
-rw-r--r--arch/riscv/include/asm/sbi.h122
-rw-r--r--arch/riscv/include/asm/uaccess.h33
-rw-r--r--arch/riscv/kernel/Makefile3
-rw-r--r--arch/riscv/kernel/cpu.c70
-rw-r--r--arch/riscv/kernel/cpu_ops_sbi.c2
-rw-r--r--arch/riscv/kernel/cpufeature.c130
-rw-r--r--arch/riscv/kernel/entry.S14
-rw-r--r--arch/riscv/kernel/module.c21
-rw-r--r--arch/riscv/kernel/perf_callchain.c4
-rw-r--r--arch/riscv/kernel/perf_event.c485
-rw-r--r--arch/riscv/kernel/ptrace.c5
-rw-r--r--arch/riscv/kernel/sbi.c72
-rw-r--r--arch/riscv/kernel/setup.c3
-rw-r--r--arch/riscv/kernel/signal.c6
-rw-r--r--arch/riscv/kernel/trace_irq.c27
-rw-r--r--arch/riscv/kernel/trace_irq.h11
-rw-r--r--arch/riscv/kvm/vcpu_exit.c22
-rw-r--r--arch/riscv/kvm/vcpu_sbi.c19
-rw-r--r--arch/riscv/kvm/vcpu_sbi_hsm.c18
-rw-r--r--arch/riscv/kvm/vcpu_sbi_replace.c44
-rw-r--r--arch/riscv/kvm/vcpu_sbi_v01.c20
-rw-r--r--arch/riscv/kvm/vcpu_switch.S60
-rw-r--r--arch/riscv/lib/memmove.S368
-rw-r--r--arch/riscv/mm/Makefile3
-rw-r--r--arch/riscv/mm/init.c176
-rw-r--r--arch/riscv/mm/kasan_init.c163
-rw-r--r--arch/riscv/mm/physaddr.c4
48 files changed, 1755 insertions, 916 deletions
diff --git a/arch/riscv/Kconfig b/arch/riscv/Kconfig
index 5adcbd9b5e88..ea8ec8a960bd 100644
--- a/arch/riscv/Kconfig
+++ b/arch/riscv/Kconfig
@@ -40,6 +40,7 @@ config RISCV
select ARCH_USE_MEMTEST
select ARCH_WANT_DEFAULT_TOPDOWN_MMAP_LAYOUT if MMU
select ARCH_WANT_FRAME_POINTERS
+ select ARCH_WANT_GENERAL_HUGETLB
select ARCH_WANT_HUGE_PMD_SHARE if 64BIT
select BINFMT_FLAT_NO_DATA_START_OFFSET if !MMU
select BUILDTIME_TABLE_SORT if MMU
@@ -101,6 +102,7 @@ config RISCV
select HAVE_FUNCTION_ARG_ACCESS_API
select HAVE_STACKPROTECTOR
select HAVE_SYSCALL_TRACEPOINTS
+ select HAVE_RSEQ
select IRQ_DOMAIN
select IRQ_FORCED_THREADING
select MODULES_USE_ELF_RELA if MODULES
@@ -151,7 +153,7 @@ config PAGE_OFFSET
hex
default 0xC0000000 if 32BIT
default 0x80000000 if 64BIT && !MMU
- default 0xffffaf8000000000 if 64BIT
+ default 0xff60000000000000 if 64BIT
config KASAN_SHADOW_OFFSET
hex
@@ -171,9 +173,6 @@ config ARCH_SPARSEMEM_ENABLE
config ARCH_SELECT_MEMORY_MODEL
def_bool ARCH_SPARSEMEM_ENABLE
-config ARCH_WANT_GENERAL_HUGETLB
- def_bool y
-
config ARCH_SUPPORTS_UPROBES
def_bool y
@@ -202,7 +201,7 @@ config FIX_EARLYCON_MEM
config PGTABLE_LEVELS
int
- default 4 if 64BIT
+ default 5 if 64BIT
default 2
config LOCKDEP_SUPPORT
@@ -333,19 +332,6 @@ config RISCV_ISA_C
If you don't know what to do here, say Y.
-menu "supported PMU type"
- depends on PERF_EVENTS
-
-config RISCV_BASE_PMU
- bool "Base Performance Monitoring Unit"
- def_bool y
- help
- A base PMU that serves as a reference implementation and has limited
- feature of perf. It can run on any RISC-V machines so serves as the
- fallback, but this option can also be disable to reduce kernel size.
-
-endmenu
-
config FPU
bool "FPU support"
default y
diff --git a/arch/riscv/Kconfig.erratas b/arch/riscv/Kconfig.erratas
index b44d6ecdb46e..0aacd7052585 100644
--- a/arch/riscv/Kconfig.erratas
+++ b/arch/riscv/Kconfig.erratas
@@ -2,6 +2,7 @@ menu "CPU errata selection"
config RISCV_ERRATA_ALTERNATIVE
bool "RISC-V alternative scheme"
+ depends on !XIP_KERNEL
default y
help
This Kconfig allows the kernel to automatically patch the
diff --git a/arch/riscv/Kconfig.socs b/arch/riscv/Kconfig.socs
index 6ec44a22278a..c112ab2a9052 100644
--- a/arch/riscv/Kconfig.socs
+++ b/arch/riscv/Kconfig.socs
@@ -14,8 +14,8 @@ config SOC_SIFIVE
select CLK_SIFIVE
select CLK_SIFIVE_PRCI
select SIFIVE_PLIC
- select RISCV_ERRATA_ALTERNATIVE
- select ERRATA_SIFIVE
+ select RISCV_ERRATA_ALTERNATIVE if !XIP_KERNEL
+ select ERRATA_SIFIVE if !XIP_KERNEL
help
This enables support for SiFive SoC platform hardware.
diff --git a/arch/riscv/boot/dts/canaan/k210.dtsi b/arch/riscv/boot/dts/canaan/k210.dtsi
index 56f57118c633..44d338514761 100644
--- a/arch/riscv/boot/dts/canaan/k210.dtsi
+++ b/arch/riscv/boot/dts/canaan/k210.dtsi
@@ -113,7 +113,8 @@
compatible = "canaan,k210-plic", "sifive,plic-1.0.0";
reg = <0xC000000 0x4000000>;
interrupt-controller;
- interrupts-extended = <&cpu0_intc 11>, <&cpu1_intc 11>;
+ interrupts-extended = <&cpu0_intc 11>, <&cpu0_intc 9>,
+ <&cpu1_intc 11>, <&cpu1_intc 9>;
riscv,ndev = <65>;
};
diff --git a/arch/riscv/boot/dts/microchip/microchip-mpfs-fabric.dtsi b/arch/riscv/boot/dts/microchip/microchip-mpfs-fabric.dtsi
new file mode 100644
index 000000000000..854320e17b28
--- /dev/null
+++ b/arch/riscv/boot/dts/microchip/microchip-mpfs-fabric.dtsi
@@ -0,0 +1,25 @@
+// SPDX-License-Identifier: (GPL-2.0 OR MIT)
+/* Copyright (c) 2020-2021 Microchip Technology Inc */
+
+/ {
+ core_pwm0: pwm@41000000 {
+ compatible = "microchip,corepwm-rtl-v4";
+ reg = <0x0 0x41000000 0x0 0xF0>;
+ microchip,sync-update-mask = /bits/ 32 <0>;
+ #pwm-cells = <2>;
+ clocks = <&clkcfg CLK_FIC3>;
+ status = "disabled";
+ };
+
+ i2c2: i2c@44000000 {
+ compatible = "microchip,corei2c-rtl-v7";
+ reg = <0x0 0x44000000 0x0 0x1000>;
+ #address-cells = <1>;
+ #size-cells = <0>;
+ clocks = <&clkcfg CLK_FIC3>;
+ interrupt-parent = <&plic>;
+ interrupts = <122>;
+ clock-frequency = <100000>;
+ status = "disabled";
+ };
+};
diff --git a/arch/riscv/boot/dts/microchip/microchip-mpfs-icicle-kit.dts b/arch/riscv/boot/dts/microchip/microchip-mpfs-icicle-kit.dts
index 0c748ae1b006..cd2fe80fa81a 100644
--- a/arch/riscv/boot/dts/microchip/microchip-mpfs-icicle-kit.dts
+++ b/arch/riscv/boot/dts/microchip/microchip-mpfs-icicle-kit.dts
@@ -1,5 +1,5 @@
// SPDX-License-Identifier: (GPL-2.0 OR MIT)
-/* Copyright (c) 2020 Microchip Technology Inc */
+/* Copyright (c) 2020-2021 Microchip Technology Inc */
/dts-v1/;
@@ -13,25 +13,34 @@
compatible = "microchip,mpfs-icicle-kit", "microchip,mpfs";
aliases {
- ethernet0 = &emac1;
- serial0 = &serial0;
- serial1 = &serial1;
- serial2 = &serial2;
- serial3 = &serial3;
+ ethernet0 = &mac1;
+ serial0 = &mmuart0;
+ serial1 = &mmuart1;
+ serial2 = &mmuart2;
+ serial3 = &mmuart3;
+ serial4 = &mmuart4;
};
chosen {
- stdout-path = "serial0:115200n8";
+ stdout-path = "serial1:115200n8";
};
cpus {
timebase-frequency = <RTCCLK_FREQ>;
};
- memory@80000000 {
+ ddrc_cache_lo: memory@80000000 {
device_type = "memory";
- reg = <0x0 0x80000000 0x0 0x40000000>;
- clocks = <&clkcfg 26>;
+ reg = <0x0 0x80000000 0x0 0x2e000000>;
+ clocks = <&clkcfg CLK_DDRC>;
+ status = "okay";
+ };
+
+ ddrc_cache_hi: memory@1000000000 {
+ device_type = "memory";
+ reg = <0x10 0x0 0x0 0x40000000>;
+ clocks = <&clkcfg CLK_DDRC>;
+ status = "okay";
};
};
@@ -39,19 +48,19 @@
clock-frequency = <600000000>;
};
-&serial0 {
+&mmuart1 {
status = "okay";
};
-&serial1 {
+&mmuart2 {
status = "okay";
};
-&serial2 {
+&mmuart3 {
status = "okay";
};
-&serial3 {
+&mmuart4 {
status = "okay";
};
@@ -61,28 +70,92 @@
bus-width = <4>;
disable-wp;
cap-sd-highspeed;
+ cap-mmc-highspeed;
card-detect-delay = <200>;
+ mmc-ddr-1_8v;
+ mmc-hs200-1_8v;
sd-uhs-sdr12;
sd-uhs-sdr25;
sd-uhs-sdr50;
sd-uhs-sdr104;
};
-&emac0 {
+&spi0 {
+ status = "okay";
+};
+
+&spi1 {
+ status = "okay";
+};
+
+&qspi {
+ status = "okay";
+};
+
+&i2c0 {
+ status = "okay";
+};
+
+&i2c1 {
+ status = "okay";
+};
+
+&i2c2 {
+ status = "okay";
+};
+
+&mac0 {
phy-mode = "sgmii";
phy-handle = <&phy0>;
- phy0: ethernet-phy@8 {
- reg = <8>;
- ti,fifo-depth = <0x01>;
- };
};
-&emac1 {
+&mac1 {
status = "okay";
phy-mode = "sgmii";
phy-handle = <&phy1>;
phy1: ethernet-phy@9 {
reg = <9>;
- ti,fifo-depth = <0x01>;
+ ti,fifo-depth = <0x1>;
};
+ phy0: ethernet-phy@8 {
+ reg = <8>;
+ ti,fifo-depth = <0x1>;
+ };
+};
+
+&gpio2 {
+ interrupts = <53>, <53>, <53>, <53>,
+ <53>, <53>, <53>, <53>,
+ <53>, <53>, <53>, <53>,
+ <53>, <53>, <53>, <53>,
+ <53>, <53>, <53>, <53>,
+ <53>, <53>, <53>, <53>,
+ <53>, <53>, <53>, <53>,
+ <53>, <53>, <53>, <53>;
+ status = "okay";
+};
+
+&rtc {
+ status = "okay";
+};
+
+&usb {
+ status = "okay";
+ dr_mode = "host";
+};
+
+&mbox {
+ status = "okay";
+};
+
+&syscontroller {
+ status = "okay";
+};
+
+&pcie {
+ status = "okay";
+};
+
+&core_pwm0 {
+ status = "okay";
};
diff --git a/arch/riscv/boot/dts/microchip/microchip-mpfs.dtsi b/arch/riscv/boot/dts/microchip/microchip-mpfs.dtsi
index 869aaf0d5c06..c5c9d1360de0 100644
--- a/arch/riscv/boot/dts/microchip/microchip-mpfs.dtsi
+++ b/arch/riscv/boot/dts/microchip/microchip-mpfs.dtsi
@@ -1,7 +1,9 @@
// SPDX-License-Identifier: (GPL-2.0 OR MIT)
-/* Copyright (c) 2020 Microchip Technology Inc */
+/* Copyright (c) 2020-2021 Microchip Technology Inc */
/dts-v1/;
+#include "dt-bindings/clock/microchip,mpfs-clock.h"
+#include "microchip-mpfs-fabric.dtsi"
/ {
#address-cells = <2>;
@@ -13,8 +15,7 @@
#address-cells = <1>;
#size-cells = <0>;
- cpu@0 {
- clock-frequency = <0>;
+ cpu0: cpu@0 {
compatible = "sifive,e51", "sifive,rocket0", "riscv";
device_type = "cpu";
i-cache-block-size = <64>;
@@ -22,6 +23,7 @@
i-cache-size = <16384>;
reg = <0>;
riscv,isa = "rv64imac";
+ clocks = <&clkcfg CLK_CPU>;
status = "disabled";
cpu0_intc: interrupt-controller {
@@ -31,8 +33,7 @@
};
};
- cpu@1 {
- clock-frequency = <0>;
+ cpu1: cpu@1 {
compatible = "sifive,u54-mc", "sifive,rocket0", "riscv";
d-cache-block-size = <64>;
d-cache-sets = <64>;
@@ -48,6 +49,7 @@
mmu-type = "riscv,sv39";
reg = <1>;
riscv,isa = "rv64imafdc";
+ clocks = <&clkcfg CLK_CPU>;
tlb-split;
status = "okay";
@@ -58,8 +60,7 @@
};
};
- cpu@2 {
- clock-frequency = <0>;
+ cpu2: cpu@2 {
compatible = "sifive,u54-mc", "sifive,rocket0", "riscv";
d-cache-block-size = <64>;
d-cache-sets = <64>;
@@ -75,6 +76,7 @@
mmu-type = "riscv,sv39";
reg = <2>;
riscv,isa = "rv64imafdc";
+ clocks = <&clkcfg CLK_CPU>;
tlb-split;
status = "okay";
@@ -85,8 +87,7 @@
};
};
- cpu@3 {
- clock-frequency = <0>;
+ cpu3: cpu@3 {
compatible = "sifive,u54-mc", "sifive,rocket0", "riscv";
d-cache-block-size = <64>;
d-cache-sets = <64>;
@@ -102,6 +103,7 @@
mmu-type = "riscv,sv39";
reg = <3>;
riscv,isa = "rv64imafdc";
+ clocks = <&clkcfg CLK_CPU>;
tlb-split;
status = "okay";
@@ -112,8 +114,7 @@
};
};
- cpu@4 {
- clock-frequency = <0>;
+ cpu4: cpu@4 {
compatible = "sifive,u54-mc", "sifive,rocket0", "riscv";
d-cache-block-size = <64>;
d-cache-sets = <64>;
@@ -129,6 +130,7 @@
mmu-type = "riscv,sv39";
reg = <4>;
riscv,isa = "rv64imafdc";
+ clocks = <&clkcfg CLK_CPU>;
tlb-split;
status = "okay";
cpu4_intc: interrupt-controller {
@@ -150,8 +152,9 @@
compatible = "simple-bus";
ranges;
- cache-controller@2010000 {
+ cctrllr: cache-controller@2010000 {
compatible = "sifive,fu540-c000-ccache", "cache";
+ reg = <0x0 0x2010000 0x0 0x1000>;
cache-block-size = <64>;
cache-level = <2>;
cache-sets = <1024>;
@@ -159,10 +162,9 @@
cache-unified;
interrupt-parent = <&plic>;
interrupts = <1>, <2>, <3>;
- reg = <0x0 0x2010000 0x0 0x1000>;
};
- clint@2000000 {
+ clint: clint@2000000 {
compatible = "sifive,fu540-c000-clint", "sifive,clint0";
reg = <0x0 0x2000000 0x0 0xC000>;
interrupts-extended = <&cpu0_intc 3>, <&cpu0_intc 7>,
@@ -186,15 +188,6 @@
riscv,ndev = <186>;
};
- dma@3000000 {
- compatible = "sifive,fu540-c000-pdma";
- reg = <0x0 0x3000000 0x0 0x8000>;
- interrupt-parent = <&plic>;
- interrupts = <23>, <24>, <25>, <26>, <27>, <28>, <29>,
- <30>;
- #dma-cells = <1>;
- };
-
clkcfg: clkcfg@20002000 {
compatible = "microchip,mpfs-clkcfg";
reg = <0x0 0x20002000 0x0 0x1000>;
@@ -202,7 +195,7 @@
#clock-cells = <1>;
};
- serial0: serial@20000000 {
+ mmuart0: serial@20000000 {
compatible = "ns16550a";
reg = <0x0 0x20000000 0x0 0x400>;
reg-io-width = <4>;
@@ -210,11 +203,11 @@
interrupt-parent = <&plic>;
interrupts = <90>;
current-speed = <115200>;
- clocks = <&clkcfg 8>;
- status = "disabled";
+ clocks = <&clkcfg CLK_MMUART0>;
+ status = "disabled"; /* Reserved for the HSS */
};
- serial1: serial@20100000 {
+ mmuart1: serial@20100000 {
compatible = "ns16550a";
reg = <0x0 0x20100000 0x0 0x400>;
reg-io-width = <4>;
@@ -222,11 +215,11 @@
interrupt-parent = <&plic>;
interrupts = <91>;
current-speed = <115200>;
- clocks = <&clkcfg 9>;
+ clocks = <&clkcfg CLK_MMUART1>;
status = "disabled";
};
- serial2: serial@20102000 {
+ mmuart2: serial@20102000 {
compatible = "ns16550a";
reg = <0x0 0x20102000 0x0 0x400>;
reg-io-width = <4>;
@@ -234,11 +227,11 @@
interrupt-parent = <&plic>;
interrupts = <92>;
current-speed = <115200>;
- clocks = <&clkcfg 10>;
+ clocks = <&clkcfg CLK_MMUART2>;
status = "disabled";
};
- serial3: serial@20104000 {
+ mmuart3: serial@20104000 {
compatible = "ns16550a";
reg = <0x0 0x20104000 0x0 0x400>;
reg-io-width = <4>;
@@ -246,7 +239,19 @@
interrupt-parent = <&plic>;
interrupts = <93>;
current-speed = <115200>;
- clocks = <&clkcfg 11>;
+ clocks = <&clkcfg CLK_MMUART3>;
+ status = "disabled";
+ };
+
+ mmuart4: serial@20106000 {
+ compatible = "ns16550a";
+ reg = <0x0 0x20106000 0x0 0x400>;
+ reg-io-width = <4>;
+ reg-shift = <2>;
+ interrupt-parent = <&plic>;
+ interrupts = <94>;
+ clocks = <&clkcfg CLK_MMUART4>;
+ current-speed = <115200>;
status = "disabled";
};
@@ -255,37 +260,196 @@
compatible = "microchip,mpfs-sd4hc", "cdns,sd4hc";
reg = <0x0 0x20008000 0x0 0x1000>;
interrupt-parent = <&plic>;
- interrupts = <88>, <89>;
- clocks = <&clkcfg 6>;
+ interrupts = <88>;
+ clocks = <&clkcfg CLK_MMC>;
max-frequency = <200000000>;
status = "disabled";
};
- emac0: ethernet@20110000 {
+ spi0: spi@20108000 {
+ compatible = "microchip,mpfs-spi";
+ #address-cells = <1>;
+ #size-cells = <0>;
+ reg = <0x0 0x20108000 0x0 0x1000>;
+ interrupt-parent = <&plic>;
+ interrupts = <54>;
+ clocks = <&clkcfg CLK_SPI0>;
+ spi-max-frequency = <25000000>;
+ status = "disabled";
+ };
+
+ spi1: spi@20109000 {
+ compatible = "microchip,mpfs-spi";
+ #address-cells = <1>;
+ #size-cells = <0>;
+ reg = <0x0 0x20109000 0x0 0x1000>;
+ interrupt-parent = <&plic>;
+ interrupts = <55>;
+ clocks = <&clkcfg CLK_SPI1>;
+ spi-max-frequency = <25000000>;
+ status = "disabled";
+ };
+
+ qspi: spi@21000000 {
+ compatible = "microchip,mpfs-qspi";
+ #address-cells = <1>;
+ #size-cells = <0>;
+ reg = <0x0 0x21000000 0x0 0x1000>;
+ interrupt-parent = <&plic>;
+ interrupts = <85>;
+ clocks = <&clkcfg CLK_QSPI>;
+ spi-max-frequency = <25000000>;
+ status = "disabled";
+ };
+
+ i2c0: i2c@2010a000 {
+ compatible = "microchip,mpfs-i2c", "microchip,corei2c-rtl-v7";
+ reg = <0x0 0x2010a000 0x0 0x1000>;
+ #address-cells = <1>;
+ #size-cells = <0>;
+ interrupt-parent = <&plic>;
+ interrupts = <58>;
+ clocks = <&clkcfg CLK_I2C0>;
+ clock-frequency = <100000>;
+ status = "disabled";
+ };
+
+ i2c1: i2c@2010b000 {
+ compatible = "microchip,mpfs-i2c", "microchip,corei2c-rtl-v7";
+ reg = <0x0 0x2010b000 0x0 0x1000>;
+ #address-cells = <1>;
+ #size-cells = <0>;
+ interrupt-parent = <&plic>;
+ interrupts = <61>;
+ clocks = <&clkcfg CLK_I2C1>;
+ clock-frequency = <100000>;
+ status = "disabled";
+ };
+
+ mac0: ethernet@20110000 {
compatible = "cdns,macb";
reg = <0x0 0x20110000 0x0 0x2000>;
+ #address-cells = <1>;
+ #size-cells = <0>;
interrupt-parent = <&plic>;
- interrupts = <64>, <65>, <66>, <67>;
+ interrupts = <64>, <65>, <66>, <67>, <68>, <69>;
local-mac-address = [00 00 00 00 00 00];
- clocks = <&clkcfg 4>, <&clkcfg 2>;
+ clocks = <&clkcfg CLK_MAC0>, <&clkcfg CLK_AHB>;
clock-names = "pclk", "hclk";
status = "disabled";
- #address-cells = <1>;
- #size-cells = <0>;
};
- emac1: ethernet@20112000 {
+ mac1: ethernet@20112000 {
compatible = "cdns,macb";
reg = <0x0 0x20112000 0x0 0x2000>;
+ #address-cells = <1>;
+ #size-cells = <0>;
interrupt-parent = <&plic>;
- interrupts = <70>, <71>, <72>, <73>;
+ interrupts = <70>, <71>, <72>, <73>, <74>, <75>;
local-mac-address = [00 00 00 00 00 00];
- clocks = <&clkcfg 5>, <&clkcfg 2>;
- status = "disabled";
+ clocks = <&clkcfg CLK_MAC1>, <&clkcfg CLK_AHB>;
clock-names = "pclk", "hclk";
- #address-cells = <1>;
- #size-cells = <0>;
+ status = "disabled";
+ };
+
+ gpio0: gpio@20120000 {
+ compatible = "microchip,mpfs-gpio";
+ reg = <0x0 0x20120000 0x0 0x1000>;
+ interrupt-parent = <&plic>;
+ interrupt-controller;
+ #interrupt-cells = <1>;
+ clocks = <&clkcfg CLK_GPIO0>;
+ gpio-controller;
+ #gpio-cells = <2>;
+ status = "disabled";
+ };
+
+ gpio1: gpio@20121000 {
+ compatible = "microchip,mpfs-gpio";
+ reg = <000 0x20121000 0x0 0x1000>;
+ interrupt-parent = <&plic>;
+ interrupt-controller;
+ #interrupt-cells = <1>;
+ clocks = <&clkcfg CLK_GPIO1>;
+ gpio-controller;
+ #gpio-cells = <2>;
+ status = "disabled";
+ };
+
+ gpio2: gpio@20122000 {
+ compatible = "microchip,mpfs-gpio";
+ reg = <0x0 0x20122000 0x0 0x1000>;
+ interrupt-parent = <&plic>;
+ interrupt-controller;
+ #interrupt-cells = <1>;
+ clocks = <&clkcfg CLK_GPIO2>;
+ gpio-controller;
+ #gpio-cells = <2>;
+ status = "disabled";
+ };
+
+ rtc: rtc@20124000 {
+ compatible = "microchip,mpfs-rtc";
+ reg = <0x0 0x20124000 0x0 0x1000>;
+ interrupt-parent = <&plic>;
+ interrupts = <80>, <81>;
+ clocks = <&clkcfg CLK_RTC>;
+ clock-names = "rtc";
+ status = "disabled";
+ };
+
+ usb: usb@20201000 {
+ compatible = "microchip,mpfs-musb";
+ reg = <0x0 0x20201000 0x0 0x1000>;
+ interrupt-parent = <&plic>;
+ interrupts = <86>, <87>;
+ clocks = <&clkcfg CLK_USB>;
+ interrupt-names = "dma","mc";
+ status = "disabled";
};
+ pcie: pcie@2000000000 {
+ compatible = "microchip,pcie-host-1.0";
+ #address-cells = <0x3>;
+ #interrupt-cells = <0x1>;
+ #size-cells = <0x2>;
+ device_type = "pci";
+ reg = <0x20 0x0 0x0 0x8000000>, <0x0 0x43000000 0x0 0x10000>;
+ reg-names = "cfg", "apb";
+ bus-range = <0x0 0x7f>;
+ interrupt-parent = <&plic>;
+ interrupts = <119>;
+ interrupt-map = <0 0 0 1 &pcie_intc 0>,
+ <0 0 0 2 &pcie_intc 1>,
+ <0 0 0 3 &pcie_intc 2>,
+ <0 0 0 4 &pcie_intc 3>;
+ interrupt-map-mask = <0 0 0 7>;
+ clocks = <&clkcfg CLK_FIC0>, <&clkcfg CLK_FIC1>, <&clkcfg CLK_FIC3>;
+ clock-names = "fic0", "fic1", "fic3";
+ ranges = <0x3000000 0x0 0x8000000 0x20 0x8000000 0x0 0x80000000>;
+ msi-parent = <&pcie>;
+ msi-controller;
+ microchip,axi-m-atr0 = <0x10 0x0>;
+ status = "disabled";
+ pcie_intc: legacy-interrupt-controller {
+ #address-cells = <0>;
+ #interrupt-cells = <1>;
+ interrupt-controller;
+ };
+ };
+
+ mbox: mailbox@37020000 {
+ compatible = "microchip,mpfs-mailbox";
+ reg = <0x0 0x37020000 0x0 0x1000>, <0x0 0x2000318C 0x0 0x40>;
+ interrupt-parent = <&plic>;
+ interrupts = <96>;
+ #mbox-cells = <1>;
+ status = "disabled";
+ };
+
+ syscontroller: syscontroller {
+ compatible = "microchip,mpfs-sys-controller";
+ mboxes = <&mbox 0>;
+ };
};
};
diff --git a/arch/riscv/configs/defconfig b/arch/riscv/configs/defconfig
index f120fcc43d0a..7cd10ded7bf8 100644
--- a/arch/riscv/configs/defconfig
+++ b/arch/riscv/configs/defconfig
@@ -108,6 +108,7 @@ CONFIG_MSDOS_FS=y
CONFIG_VFAT_FS=y
CONFIG_TMPFS=y
CONFIG_TMPFS_POSIX_ACL=y
+CONFIG_HUGETLBFS=y
CONFIG_NFS_FS=y
CONFIG_NFS_V4=y
CONFIG_NFS_V4_1=y
diff --git a/arch/riscv/configs/nommu_k210_sdcard_defconfig b/arch/riscv/configs/nommu_k210_sdcard_defconfig
index 2a82a3b2992b..af64b95e88cc 100644
--- a/arch/riscv/configs/nommu_k210_sdcard_defconfig
+++ b/arch/riscv/configs/nommu_k210_sdcard_defconfig
@@ -23,7 +23,7 @@ CONFIG_SLOB=y
CONFIG_SOC_CANAAN=y
CONFIG_SMP=y
CONFIG_NR_CPUS=2
-CONFIG_CMDLINE="earlycon console=ttySIF0 rootdelay=2 root=/dev/mmcblk0p1 ro"
+CONFIG_CMDLINE="earlycon console=ttySIF0 root=/dev/mmcblk0p1 rootwait ro"
CONFIG_CMDLINE_FORCE=y
# CONFIG_SECCOMP is not set
# CONFIG_STACKPROTECTOR is not set
diff --git a/arch/riscv/configs/rv32_defconfig b/arch/riscv/configs/rv32_defconfig
index 8b56a7f1eb06..e0e5c7c09ab8 100644
--- a/arch/riscv/configs/rv32_defconfig
+++ b/arch/riscv/configs/rv32_defconfig
@@ -100,6 +100,7 @@ CONFIG_MSDOS_FS=y
CONFIG_VFAT_FS=y
CONFIG_TMPFS=y
CONFIG_TMPFS_POSIX_ACL=y
+CONFIG_HUGETLBFS=y
CONFIG_NFS_FS=y
CONFIG_NFS_V4=y
CONFIG_NFS_V4_1=y
diff --git a/arch/riscv/include/asm/Kbuild b/arch/riscv/include/asm/Kbuild
index 57b86fd9916c..5edf5b8587e7 100644
--- a/arch/riscv/include/asm/Kbuild
+++ b/arch/riscv/include/asm/Kbuild
@@ -2,5 +2,6 @@
generic-y += early_ioremap.h
generic-y += flat.h
generic-y += kvm_para.h
+generic-y += parport.h
generic-y += user.h
generic-y += vmlinux.lds.h
diff --git a/arch/riscv/include/asm/csr.h b/arch/riscv/include/asm/csr.h
index ae711692eec9..e935f27b10fd 100644
--- a/arch/riscv/include/asm/csr.h
+++ b/arch/riscv/include/asm/csr.h
@@ -47,6 +47,7 @@
#define SATP_PPN _AC(0x00000FFFFFFFFFFF, UL)
#define SATP_MODE_39 _AC(0x8000000000000000, UL)
#define SATP_MODE_48 _AC(0x9000000000000000, UL)
+#define SATP_MODE_57 _AC(0xa000000000000000, UL)
#define SATP_ASID_BITS 16
#define SATP_ASID_SHIFT 44
#define SATP_ASID_MASK _AC(0xFFFF, UL)
@@ -65,6 +66,7 @@
#define IRQ_S_EXT 9
#define IRQ_VS_EXT 10
#define IRQ_M_EXT 11
+#define IRQ_PMU_OVF 13
/* Exception causes */
#define EXC_INST_MISALIGNED 0
@@ -150,9 +152,69 @@
#define CSR_CYCLE 0xc00
#define CSR_TIME 0xc01
#define CSR_INSTRET 0xc02
+#define CSR_HPMCOUNTER3 0xc03
+#define CSR_HPMCOUNTER4 0xc04
+#define CSR_HPMCOUNTER5 0xc05
+#define CSR_HPMCOUNTER6 0xc06
+#define CSR_HPMCOUNTER7 0xc07
+#define CSR_HPMCOUNTER8 0xc08
+#define CSR_HPMCOUNTER9 0xc09
+#define CSR_HPMCOUNTER10 0xc0a
+#define CSR_HPMCOUNTER11 0xc0b
+#define CSR_HPMCOUNTER12 0xc0c
+#define CSR_HPMCOUNTER13 0xc0d
+#define CSR_HPMCOUNTER14 0xc0e
+#define CSR_HPMCOUNTER15 0xc0f
+#define CSR_HPMCOUNTER16 0xc10
+#define CSR_HPMCOUNTER17 0xc11
+#define CSR_HPMCOUNTER18 0xc12
+#define CSR_HPMCOUNTER19 0xc13
+#define CSR_HPMCOUNTER20 0xc14
+#define CSR_HPMCOUNTER21 0xc15
+#define CSR_HPMCOUNTER22 0xc16
+#define CSR_HPMCOUNTER23 0xc17
+#define CSR_HPMCOUNTER24 0xc18
+#define CSR_HPMCOUNTER25 0xc19
+#define CSR_HPMCOUNTER26 0xc1a
+#define CSR_HPMCOUNTER27 0xc1b
+#define CSR_HPMCOUNTER28 0xc1c
+#define CSR_HPMCOUNTER29 0xc1d
+#define CSR_HPMCOUNTER30 0xc1e
+#define CSR_HPMCOUNTER31 0xc1f
#define CSR_CYCLEH 0xc80
#define CSR_TIMEH 0xc81
#define CSR_INSTRETH 0xc82
+#define CSR_HPMCOUNTER3H 0xc83
+#define CSR_HPMCOUNTER4H 0xc84
+#define CSR_HPMCOUNTER5H 0xc85
+#define CSR_HPMCOUNTER6H 0xc86
+#define CSR_HPMCOUNTER7H 0xc87
+#define CSR_HPMCOUNTER8H 0xc88
+#define CSR_HPMCOUNTER9H 0xc89
+#define CSR_HPMCOUNTER10H 0xc8a
+#define CSR_HPMCOUNTER11H 0xc8b
+#define CSR_HPMCOUNTER12H 0xc8c
+#define CSR_HPMCOUNTER13H 0xc8d
+#define CSR_HPMCOUNTER14H 0xc8e
+#define CSR_HPMCOUNTER15H 0xc8f
+#define CSR_HPMCOUNTER16H 0xc90
+#define CSR_HPMCOUNTER17H 0xc91
+#define CSR_HPMCOUNTER18H 0xc92
+#define CSR_HPMCOUNTER19H 0xc93
+#define CSR_HPMCOUNTER20H 0xc94
+#define CSR_HPMCOUNTER21H 0xc95
+#define CSR_HPMCOUNTER22H 0xc96
+#define CSR_HPMCOUNTER23H 0xc97
+#define CSR_HPMCOUNTER24H 0xc98
+#define CSR_HPMCOUNTER25H 0xc99
+#define CSR_HPMCOUNTER26H 0xc9a
+#define CSR_HPMCOUNTER27H 0xc9b
+#define CSR_HPMCOUNTER28H 0xc9c
+#define CSR_HPMCOUNTER29H 0xc9d
+#define CSR_HPMCOUNTER30H 0xc9e
+#define CSR_HPMCOUNTER31H 0xc9f
+
+#define CSR_SSCOUNTOVF 0xda0
#define CSR_SSTATUS 0x100
#define CSR_SIE 0x104
@@ -240,7 +302,10 @@
# define RV_IRQ_SOFT IRQ_S_SOFT
# define RV_IRQ_TIMER IRQ_S_TIMER
# define RV_IRQ_EXT IRQ_S_EXT
-#endif /* CONFIG_RISCV_M_MODE */
+# define RV_IRQ_PMU IRQ_PMU_OVF
+# define SIP_LCOFIP (_AC(0x1, UL) << IRQ_PMU_OVF)
+
+#endif /* !CONFIG_RISCV_M_MODE */
/* IE/IP (Supervisor/Machine Interrupt Enable/Pending) flags */
#define IE_SIE (_AC(0x1, UL) << RV_IRQ_SOFT)
diff --git a/arch/riscv/include/asm/fixmap.h b/arch/riscv/include/asm/fixmap.h
index 58a718573ad6..3cfece8b6568 100644
--- a/arch/riscv/include/asm/fixmap.h
+++ b/arch/riscv/include/asm/fixmap.h
@@ -25,6 +25,7 @@ enum fixed_addresses {
FIX_PTE,
FIX_PMD,
FIX_PUD,
+ FIX_P4D,
FIX_TEXT_POKE1,
FIX_TEXT_POKE0,
FIX_EARLYCON_MEM_BASE,
diff --git a/arch/riscv/include/asm/hwcap.h b/arch/riscv/include/asm/hwcap.h
index 5ce50468aff1..0734e42f74f2 100644
--- a/arch/riscv/include/asm/hwcap.h
+++ b/arch/riscv/include/asm/hwcap.h
@@ -34,7 +34,33 @@ extern unsigned long elf_hwcap;
#define RISCV_ISA_EXT_s ('s' - 'a')
#define RISCV_ISA_EXT_u ('u' - 'a')
+/*
+ * Increse this to higher value as kernel support more ISA extensions.
+ */
#define RISCV_ISA_EXT_MAX 64
+#define RISCV_ISA_EXT_NAME_LEN_MAX 32
+
+/* The base ID for multi-letter ISA extensions */
+#define RISCV_ISA_EXT_BASE 26
+
+/*
+ * This enum represent the logical ID for each multi-letter RISC-V ISA extension.
+ * The logical ID should start from RISCV_ISA_EXT_BASE and must not exceed
+ * RISCV_ISA_EXT_MAX. 0-25 range is reserved for single letter
+ * extensions while all the multi-letter extensions should define the next
+ * available logical extension id.
+ */
+enum riscv_isa_ext_id {
+ RISCV_ISA_EXT_SSCOFPMF = RISCV_ISA_EXT_BASE,
+ RISCV_ISA_EXT_ID_MAX = RISCV_ISA_EXT_MAX,
+};
+
+struct riscv_isa_ext_data {
+ /* Name of the extension displayed to userspace via /proc/cpuinfo */
+ char uprop[RISCV_ISA_EXT_NAME_LEN_MAX];
+ /* The logical ISA extension ID */
+ unsigned int isa_ext_id;
+};
unsigned long riscv_isa_extension_base(const unsigned long *isa_bitmap);
diff --git a/arch/riscv/include/asm/kvm_host.h b/arch/riscv/include/asm/kvm_host.h
index 99ef6a120617..78da839657e5 100644
--- a/arch/riscv/include/asm/kvm_host.h
+++ b/arch/riscv/include/asm/kvm_host.h
@@ -228,6 +228,7 @@ void kvm_riscv_stage2_vmid_update(struct kvm_vcpu *vcpu);
void __kvm_riscv_unpriv_trap(void);
+void kvm_riscv_vcpu_wfi(struct kvm_vcpu *vcpu);
unsigned long kvm_riscv_vcpu_unpriv_read(struct kvm_vcpu *vcpu,
bool read_insn,
unsigned long guest_addr,
diff --git a/arch/riscv/include/asm/kvm_vcpu_sbi.h b/arch/riscv/include/asm/kvm_vcpu_sbi.h
index 76e4e17a3e00..83d6d4d2b1df 100644
--- a/arch/riscv/include/asm/kvm_vcpu_sbi.h
+++ b/arch/riscv/include/asm/kvm_vcpu_sbi.h
@@ -12,7 +12,7 @@
#define KVM_SBI_IMPID 3
#define KVM_SBI_VERSION_MAJOR 0
-#define KVM_SBI_VERSION_MINOR 2
+#define KVM_SBI_VERSION_MINOR 3
struct kvm_vcpu_sbi_extension {
unsigned long extid_start;
@@ -28,6 +28,9 @@ struct kvm_vcpu_sbi_extension {
};
void kvm_riscv_vcpu_sbi_forward(struct kvm_vcpu *vcpu, struct kvm_run *run);
+void kvm_riscv_vcpu_sbi_system_reset(struct kvm_vcpu *vcpu,
+ struct kvm_run *run,
+ u32 type, u64 flags);
const struct kvm_vcpu_sbi_extension *kvm_vcpu_sbi_find_ext(unsigned long extid);
#endif /* __RISCV_KVM_VCPU_SBI_H__ */
diff --git a/arch/riscv/include/asm/page.h b/arch/riscv/include/asm/page.h
index 160e3a1e8f8b..1526e410e802 100644
--- a/arch/riscv/include/asm/page.h
+++ b/arch/riscv/include/asm/page.h
@@ -41,6 +41,7 @@
* By default, CONFIG_PAGE_OFFSET value corresponds to SV48 address space so
* define the PAGE_OFFSET value for SV39.
*/
+#define PAGE_OFFSET_L4 _AC(0xffffaf8000000000, UL)
#define PAGE_OFFSET_L3 _AC(0xffffffd800000000, UL)
#else
#define PAGE_OFFSET _AC(CONFIG_PAGE_OFFSET, UL)
@@ -119,7 +120,7 @@ extern phys_addr_t phys_ram_base;
((x) >= kernel_map.virt_addr && (x) < (kernel_map.virt_addr + kernel_map.size))
#define is_linear_mapping(x) \
- ((x) >= PAGE_OFFSET && (!IS_ENABLED(CONFIG_64BIT) || (x) < kernel_map.virt_addr))
+ ((x) >= PAGE_OFFSET && (!IS_ENABLED(CONFIG_64BIT) || (x) < PAGE_OFFSET + KERN_VIRT_SIZE))
#define linear_mapping_pa_to_va(x) ((void *)((unsigned long)(x) + kernel_map.va_pa_offset))
#define kernel_mapping_pa_to_va(y) ({ \
diff --git a/arch/riscv/include/asm/perf_event.h b/arch/riscv/include/asm/perf_event.h
index 062efd3a1d5d..d42c901f9a97 100644
--- a/arch/riscv/include/asm/perf_event.h
+++ b/arch/riscv/include/asm/perf_event.h
@@ -9,77 +9,5 @@
#define _ASM_RISCV_PERF_EVENT_H
#include <linux/perf_event.h>
-#include <linux/ptrace.h>
-#include <linux/interrupt.h>
-
-#ifdef CONFIG_RISCV_BASE_PMU
-#define RISCV_BASE_COUNTERS 2
-
-/*
- * The RISCV_MAX_COUNTERS parameter should be specified.
- */
-
-#define RISCV_MAX_COUNTERS 2
-
-/*
- * These are the indexes of bits in counteren register *minus* 1,
- * except for cycle. It would be coherent if it can directly mapped
- * to counteren bit definition, but there is a *time* register at
- * counteren[1]. Per-cpu structure is scarce resource here.
- *
- * According to the spec, an implementation can support counter up to
- * mhpmcounter31, but many high-end processors has at most 6 general
- * PMCs, we give the definition to MHPMCOUNTER8 here.
- */
-#define RISCV_PMU_CYCLE 0
-#define RISCV_PMU_INSTRET 1
-#define RISCV_PMU_MHPMCOUNTER3 2
-#define RISCV_PMU_MHPMCOUNTER4 3
-#define RISCV_PMU_MHPMCOUNTER5 4
-#define RISCV_PMU_MHPMCOUNTER6 5
-#define RISCV_PMU_MHPMCOUNTER7 6
-#define RISCV_PMU_MHPMCOUNTER8 7
-
-#define RISCV_OP_UNSUPP (-EOPNOTSUPP)
-
-struct cpu_hw_events {
- /* # currently enabled events*/
- int n_events;
- /* currently enabled events */
- struct perf_event *events[RISCV_MAX_COUNTERS];
- /* vendor-defined PMU data */
- void *platform;
-};
-
-struct riscv_pmu {
- struct pmu *pmu;
-
- /* generic hw/cache events table */
- const int *hw_events;
- const int (*cache_events)[PERF_COUNT_HW_CACHE_MAX]
- [PERF_COUNT_HW_CACHE_OP_MAX]
- [PERF_COUNT_HW_CACHE_RESULT_MAX];
- /* method used to map hw/cache events */
- int (*map_hw_event)(u64 config);
- int (*map_cache_event)(u64 config);
-
- /* max generic hw events in map */
- int max_events;
- /* number total counters, 2(base) + x(general) */
- int num_counters;
- /* the width of the counter */
- int counter_width;
-
- /* vendor-defined PMU features */
- void *platform;
-
- irqreturn_t (*handle_irq)(int irq_num, void *dev);
- int irq;
-};
-
-#endif
-#ifdef CONFIG_PERF_EVENTS
#define perf_arch_bpf_user_pt_regs(regs) (struct user_regs_struct *)regs
-#endif
-
#endif /* _ASM_RISCV_PERF_EVENT_H */
diff --git a/arch/riscv/include/asm/pgalloc.h b/arch/riscv/include/asm/pgalloc.h
index 11823004b87a..947f23d7b6af 100644
--- a/arch/riscv/include/asm/pgalloc.h
+++ b/arch/riscv/include/asm/pgalloc.h
@@ -59,6 +59,26 @@ static inline void p4d_populate_safe(struct mm_struct *mm, p4d_t *p4d,
}
}
+static inline void pgd_populate(struct mm_struct *mm, pgd_t *pgd, p4d_t *p4d)
+{
+ if (pgtable_l5_enabled) {
+ unsigned long pfn = virt_to_pfn(p4d);
+
+ set_pgd(pgd, __pgd((pfn << _PAGE_PFN_SHIFT) | _PAGE_TABLE));
+ }
+}
+
+static inline void pgd_populate_safe(struct mm_struct *mm, pgd_t *pgd,
+ p4d_t *p4d)
+{
+ if (pgtable_l5_enabled) {
+ unsigned long pfn = virt_to_pfn(p4d);
+
+ set_pgd_safe(pgd,
+ __pgd((pfn << _PAGE_PFN_SHIFT) | _PAGE_TABLE));
+ }
+}
+
#define pud_alloc_one pud_alloc_one
static inline pud_t *pud_alloc_one(struct mm_struct *mm, unsigned long addr)
{
@@ -76,6 +96,35 @@ static inline void pud_free(struct mm_struct *mm, pud_t *pud)
}
#define __pud_free_tlb(tlb, pud, addr) pud_free((tlb)->mm, pud)
+
+#define p4d_alloc_one p4d_alloc_one
+static inline p4d_t *p4d_alloc_one(struct mm_struct *mm, unsigned long addr)
+{
+ if (pgtable_l5_enabled) {
+ gfp_t gfp = GFP_PGTABLE_USER;
+
+ if (mm == &init_mm)
+ gfp = GFP_PGTABLE_KERNEL;
+ return (p4d_t *)get_zeroed_page(gfp);
+ }
+
+ return NULL;
+}
+
+static inline void __p4d_free(struct mm_struct *mm, p4d_t *p4d)
+{
+ BUG_ON((unsigned long)p4d & (PAGE_SIZE-1));
+ free_page((unsigned long)p4d);
+}
+
+#define p4d_free p4d_free
+static inline void p4d_free(struct mm_struct *mm, p4d_t *p4d)
+{
+ if (pgtable_l5_enabled)
+ __p4d_free(mm, p4d);
+}
+
+#define __p4d_free_tlb(tlb, p4d, addr) p4d_free((tlb)->mm, p4d)
#endif /* __PAGETABLE_PMD_FOLDED */
static inline pgd_t *pgd_alloc(struct mm_struct *mm)
diff --git a/arch/riscv/include/asm/pgtable-64.h b/arch/riscv/include/asm/pgtable-64.h
index bbbdd66e5e2f..7e246e9f8d70 100644
--- a/arch/riscv/include/asm/pgtable-64.h
+++ b/arch/riscv/include/asm/pgtable-64.h
@@ -9,16 +9,24 @@
#include <linux/const.h>
extern bool pgtable_l4_enabled;
+extern bool pgtable_l5_enabled;
#define PGDIR_SHIFT_L3 30
#define PGDIR_SHIFT_L4 39
+#define PGDIR_SHIFT_L5 48
#define PGDIR_SIZE_L3 (_AC(1, UL) << PGDIR_SHIFT_L3)
-#define PGDIR_SHIFT (pgtable_l4_enabled ? PGDIR_SHIFT_L4 : PGDIR_SHIFT_L3)
+#define PGDIR_SHIFT (pgtable_l5_enabled ? PGDIR_SHIFT_L5 : \
+ (pgtable_l4_enabled ? PGDIR_SHIFT_L4 : PGDIR_SHIFT_L3))
/* Size of region mapped by a page global directory */
#define PGDIR_SIZE (_AC(1, UL) << PGDIR_SHIFT)
#define PGDIR_MASK (~(PGDIR_SIZE - 1))
+/* p4d is folded into pgd in case of 4-level page table */
+#define P4D_SHIFT 39
+#define P4D_SIZE (_AC(1, UL) << P4D_SHIFT)
+#define P4D_MASK (~(P4D_SIZE - 1))
+
/* pud is folded into pgd in case of 3-level page table */
#define PUD_SHIFT 30
#define PUD_SIZE (_AC(1, UL) << PUD_SHIFT)
@@ -29,6 +37,15 @@ extern bool pgtable_l4_enabled;
#define PMD_SIZE (_AC(1, UL) << PMD_SHIFT)
#define PMD_MASK (~(PMD_SIZE - 1))
+/* Page 4th Directory entry */
+typedef struct {
+ unsigned long p4d;
+} p4d_t;
+
+#define p4d_val(x) ((x).p4d)
+#define __p4d(x) ((p4d_t) { (x) })
+#define PTRS_PER_P4D (PAGE_SIZE / sizeof(p4d_t))
+
/* Page Upper Directory entry */
typedef struct {
unsigned long pud;
@@ -99,6 +116,15 @@ static inline struct page *pud_page(pud_t pud)
return pfn_to_page(pud_val(pud) >> _PAGE_PFN_SHIFT);
}
+#define mm_p4d_folded mm_p4d_folded
+static inline bool mm_p4d_folded(struct mm_struct *mm)
+{
+ if (pgtable_l5_enabled)
+ return false;
+
+ return true;
+}
+
#define mm_pud_folded mm_pud_folded
static inline bool mm_pud_folded(struct mm_struct *mm)
{
@@ -128,6 +154,9 @@ static inline unsigned long _pmd_pfn(pmd_t pmd)
#define pud_ERROR(e) \
pr_err("%s:%d: bad pud %016lx.\n", __FILE__, __LINE__, pud_val(e))
+#define p4d_ERROR(e) \
+ pr_err("%s:%d: bad p4d %016lx.\n", __FILE__, __LINE__, p4d_val(e))
+
static inline void set_p4d(p4d_t *p4dp, p4d_t p4d)
{
if (pgtable_l4_enabled)
@@ -166,6 +195,16 @@ static inline void p4d_clear(p4d_t *p4d)
set_p4d(p4d, __p4d(0));
}
+static inline p4d_t pfn_p4d(unsigned long pfn, pgprot_t prot)
+{
+ return __p4d((pfn << _PAGE_PFN_SHIFT) | pgprot_val(prot));
+}
+
+static inline unsigned long _p4d_pfn(p4d_t p4d)
+{
+ return p4d_val(p4d) >> _PAGE_PFN_SHIFT;
+}
+
static inline pud_t *p4d_pgtable(p4d_t p4d)
{
if (pgtable_l4_enabled)
@@ -173,6 +212,7 @@ static inline pud_t *p4d_pgtable(p4d_t p4d)
return (pud_t *)pud_pgtable((pud_t) { p4d_val(p4d) });
}
+#define p4d_page_vaddr(p4d) ((unsigned long)p4d_pgtable(p4d))
static inline struct page *p4d_page(p4d_t p4d)
{
@@ -190,4 +230,68 @@ static inline pud_t *pud_offset(p4d_t *p4d, unsigned long address)
return (pud_t *)p4d;
}
+static inline void set_pgd(pgd_t *pgdp, pgd_t pgd)
+{
+ if (pgtable_l5_enabled)
+ *pgdp = pgd;
+ else
+ set_p4d((p4d_t *)pgdp, (p4d_t){ pgd_val(pgd) });
+}
+
+static inline int pgd_none(pgd_t pgd)
+{
+ if (pgtable_l5_enabled)
+ return (pgd_val(pgd) == 0);
+
+ return 0;
+}
+
+static inline int pgd_present(pgd_t pgd)
+{
+ if (pgtable_l5_enabled)
+ return (pgd_val(pgd) & _PAGE_PRESENT);
+
+ return 1;
+}
+
+static inline int pgd_bad(pgd_t pgd)
+{
+ if (pgtable_l5_enabled)
+ return !pgd_present(pgd);
+
+ return 0;
+}
+
+static inline void pgd_clear(pgd_t *pgd)
+{
+ if (pgtable_l5_enabled)
+ set_pgd(pgd, __pgd(0));
+}
+
+static inline p4d_t *pgd_pgtable(pgd_t pgd)
+{
+ if (pgtable_l5_enabled)
+ return (p4d_t *)pfn_to_virt(pgd_val(pgd) >> _PAGE_PFN_SHIFT);
+
+ return (p4d_t *)p4d_pgtable((p4d_t) { pgd_val(pgd) });
+}
+#define pgd_page_vaddr(pgd) ((unsigned long)pgd_pgtable(pgd))
+
+static inline struct page *pgd_page(pgd_t pgd)
+{
+ return pfn_to_page(pgd_val(pgd) >> _PAGE_PFN_SHIFT);
+}
+#define pgd_page(pgd) pgd_page(pgd)
+
+#define p4d_index(addr) (((addr) >> P4D_SHIFT) & (PTRS_PER_P4D - 1))
+
+#define p4d_offset p4d_offset
+static inline p4d_t *p4d_offset(pgd_t *pgd, unsigned long address)
+{
+ if (pgtable_l5_enabled)
+ return pgd_pgtable(*pgd) + p4d_index(address);
+
+ return (p4d_t *)pgd;
+}
+
#endif /* _ASM_RISCV_PGTABLE_64_H */
diff --git a/arch/riscv/include/asm/pgtable.h b/arch/riscv/include/asm/pgtable.h
index 7e949f25c933..046b44225623 100644
--- a/arch/riscv/include/asm/pgtable.h
+++ b/arch/riscv/include/asm/pgtable.h
@@ -13,6 +13,7 @@
#ifndef CONFIG_MMU
#define KERNEL_LINK_ADDR PAGE_OFFSET
+#define KERN_VIRT_SIZE (UL(-1))
#else
#define ADDRESS_SPACE_END (UL(-1))
@@ -62,7 +63,8 @@
* position vmemmap directly below the VMALLOC region.
*/
#ifdef CONFIG_64BIT
-#define VA_BITS (pgtable_l4_enabled ? 48 : 39)
+#define VA_BITS (pgtable_l5_enabled ? \
+ 57 : (pgtable_l4_enabled ? 48 : 39))
#else
#define VA_BITS 32
#endif
@@ -102,7 +104,6 @@
#ifndef __ASSEMBLY__
-#include <asm-generic/pgtable-nop4d.h>
#include <asm/page.h>
#include <asm/tlbflush.h>
#include <linux/mm_types.h>
@@ -133,6 +134,8 @@ struct pt_alloc_ops {
phys_addr_t (*alloc_pmd)(uintptr_t va);
pud_t *(*get_pud_virt)(phys_addr_t pa);
phys_addr_t (*alloc_pud)(uintptr_t va);
+ p4d_t *(*get_p4d_virt)(phys_addr_t pa);
+ phys_addr_t (*alloc_p4d)(uintptr_t va);
#endif
};
diff --git a/arch/riscv/include/asm/sbi.h b/arch/riscv/include/asm/sbi.h
index d1c37479d828..9e3c2cf1edaf 100644
--- a/arch/riscv/include/asm/sbi.h
+++ b/arch/riscv/include/asm/sbi.h
@@ -29,6 +29,7 @@ enum sbi_ext_id {
SBI_EXT_RFENCE = 0x52464E43,
SBI_EXT_HSM = 0x48534D,
SBI_EXT_SRST = 0x53525354,
+ SBI_EXT_PMU = 0x504D55,
/* Experimentals extensions must lie within this range */
SBI_EXT_EXPERIMENTAL_START = 0x08000000,
@@ -71,15 +72,32 @@ enum sbi_ext_hsm_fid {
SBI_EXT_HSM_HART_START = 0,
SBI_EXT_HSM_HART_STOP,
SBI_EXT_HSM_HART_STATUS,
+ SBI_EXT_HSM_HART_SUSPEND,
};
-enum sbi_hsm_hart_status {
- SBI_HSM_HART_STATUS_STARTED = 0,
- SBI_HSM_HART_STATUS_STOPPED,
- SBI_HSM_HART_STATUS_START_PENDING,
- SBI_HSM_HART_STATUS_STOP_PENDING,
+enum sbi_hsm_hart_state {
+ SBI_HSM_STATE_STARTED = 0,
+ SBI_HSM_STATE_STOPPED,
+ SBI_HSM_STATE_START_PENDING,
+ SBI_HSM_STATE_STOP_PENDING,
+ SBI_HSM_STATE_SUSPENDED,
+ SBI_HSM_STATE_SUSPEND_PENDING,
+ SBI_HSM_STATE_RESUME_PENDING,
};
+#define SBI_HSM_SUSP_BASE_MASK 0x7fffffff
+#define SBI_HSM_SUSP_NON_RET_BIT 0x80000000
+#define SBI_HSM_SUSP_PLAT_BASE 0x10000000
+
+#define SBI_HSM_SUSPEND_RET_DEFAULT 0x00000000
+#define SBI_HSM_SUSPEND_RET_PLATFORM SBI_HSM_SUSP_PLAT_BASE
+#define SBI_HSM_SUSPEND_RET_LAST SBI_HSM_SUSP_BASE_MASK
+#define SBI_HSM_SUSPEND_NON_RET_DEFAULT SBI_HSM_SUSP_NON_RET_BIT
+#define SBI_HSM_SUSPEND_NON_RET_PLATFORM (SBI_HSM_SUSP_NON_RET_BIT | \
+ SBI_HSM_SUSP_PLAT_BASE)
+#define SBI_HSM_SUSPEND_NON_RET_LAST (SBI_HSM_SUSP_NON_RET_BIT | \
+ SBI_HSM_SUSP_BASE_MASK)
+
enum sbi_ext_srst_fid {
SBI_EXT_SRST_RESET = 0,
};
@@ -95,6 +113,98 @@ enum sbi_srst_reset_reason {
SBI_SRST_RESET_REASON_SYS_FAILURE,
};
+enum sbi_ext_pmu_fid {
+ SBI_EXT_PMU_NUM_COUNTERS = 0,
+ SBI_EXT_PMU_COUNTER_GET_INFO,
+ SBI_EXT_PMU_COUNTER_CFG_MATCH,
+ SBI_EXT_PMU_COUNTER_START,
+ SBI_EXT_PMU_COUNTER_STOP,
+ SBI_EXT_PMU_COUNTER_FW_READ,
+};
+
+#define RISCV_PMU_RAW_EVENT_MASK GENMASK_ULL(55, 0)
+#define RISCV_PMU_RAW_EVENT_IDX 0x20000
+
+/** General pmu event codes specified in SBI PMU extension */
+enum sbi_pmu_hw_generic_events_t {
+ SBI_PMU_HW_NO_EVENT = 0,
+ SBI_PMU_HW_CPU_CYCLES = 1,
+ SBI_PMU_HW_INSTRUCTIONS = 2,
+ SBI_PMU_HW_CACHE_REFERENCES = 3,
+ SBI_PMU_HW_CACHE_MISSES = 4,
+ SBI_PMU_HW_BRANCH_INSTRUCTIONS = 5,
+ SBI_PMU_HW_BRANCH_MISSES = 6,
+ SBI_PMU_HW_BUS_CYCLES = 7,
+ SBI_PMU_HW_STALLED_CYCLES_FRONTEND = 8,
+ SBI_PMU_HW_STALLED_CYCLES_BACKEND = 9,
+ SBI_PMU_HW_REF_CPU_CYCLES = 10,
+
+ SBI_PMU_HW_GENERAL_MAX,
+};
+
+/**
+ * Special "firmware" events provided by the firmware, even if the hardware
+ * does not support performance events. These events are encoded as a raw
+ * event type in Linux kernel perf framework.
+ */
+enum sbi_pmu_fw_generic_events_t {
+ SBI_PMU_FW_MISALIGNED_LOAD = 0,
+ SBI_PMU_FW_MISALIGNED_STORE = 1,
+ SBI_PMU_FW_ACCESS_LOAD = 2,
+ SBI_PMU_FW_ACCESS_STORE = 3,
+ SBI_PMU_FW_ILLEGAL_INSN = 4,
+ SBI_PMU_FW_SET_TIMER = 5,
+ SBI_PMU_FW_IPI_SENT = 6,
+ SBI_PMU_FW_IPI_RECVD = 7,
+ SBI_PMU_FW_FENCE_I_SENT = 8,
+ SBI_PMU_FW_FENCE_I_RECVD = 9,
+ SBI_PMU_FW_SFENCE_VMA_SENT = 10,
+ SBI_PMU_FW_SFENCE_VMA_RCVD = 11,
+ SBI_PMU_FW_SFENCE_VMA_ASID_SENT = 12,
+ SBI_PMU_FW_SFENCE_VMA_ASID_RCVD = 13,
+
+ SBI_PMU_FW_HFENCE_GVMA_SENT = 14,
+ SBI_PMU_FW_HFENCE_GVMA_RCVD = 15,
+ SBI_PMU_FW_HFENCE_GVMA_VMID_SENT = 16,
+ SBI_PMU_FW_HFENCE_GVMA_VMID_RCVD = 17,
+
+ SBI_PMU_FW_HFENCE_VVMA_SENT = 18,
+ SBI_PMU_FW_HFENCE_VVMA_RCVD = 19,
+ SBI_PMU_FW_HFENCE_VVMA_ASID_SENT = 20,
+ SBI_PMU_FW_HFENCE_VVMA_ASID_RCVD = 21,
+ SBI_PMU_FW_MAX,
+};
+
+/* SBI PMU event types */
+enum sbi_pmu_event_type {
+ SBI_PMU_EVENT_TYPE_HW = 0x0,
+ SBI_PMU_EVENT_TYPE_CACHE = 0x1,
+ SBI_PMU_EVENT_TYPE_RAW = 0x2,
+ SBI_PMU_EVENT_TYPE_FW = 0xf,
+};
+
+/* SBI PMU event types */
+enum sbi_pmu_ctr_type {
+ SBI_PMU_CTR_TYPE_HW = 0x0,
+ SBI_PMU_CTR_TYPE_FW,
+};
+
+/* Flags defined for config matching function */
+#define SBI_PMU_CFG_FLAG_SKIP_MATCH (1 << 0)
+#define SBI_PMU_CFG_FLAG_CLEAR_VALUE (1 << 1)
+#define SBI_PMU_CFG_FLAG_AUTO_START (1 << 2)
+#define SBI_PMU_CFG_FLAG_SET_VUINH (1 << 3)
+#define SBI_PMU_CFG_FLAG_SET_VSNH (1 << 4)
+#define SBI_PMU_CFG_FLAG_SET_UINH (1 << 5)
+#define SBI_PMU_CFG_FLAG_SET_SINH (1 << 6)
+#define SBI_PMU_CFG_FLAG_SET_MINH (1 << 7)
+
+/* Flags defined for counter start function */
+#define SBI_PMU_START_FLAG_SET_INIT_VALUE (1 << 0)
+
+/* Flags defined for counter stop function */
+#define SBI_PMU_STOP_FLAG_RESET (1 << 0)
+
#define SBI_SPEC_VERSION_DEFAULT 0x1
#define SBI_SPEC_VERSION_MAJOR_SHIFT 24
#define SBI_SPEC_VERSION_MAJOR_MASK 0x7f
@@ -108,6 +218,8 @@ enum sbi_srst_reset_reason {
#define SBI_ERR_DENIED -4
#define SBI_ERR_INVALID_ADDRESS -5
#define SBI_ERR_ALREADY_AVAILABLE -6
+#define SBI_ERR_ALREADY_STARTED -7
+#define SBI_ERR_ALREADY_STOPPED -8
extern unsigned long sbi_spec_version;
struct sbiret {
diff --git a/arch/riscv/include/asm/uaccess.h b/arch/riscv/include/asm/uaccess.h
index c701a5e57a2b..855450bed9f5 100644
--- a/arch/riscv/include/asm/uaccess.h
+++ b/arch/riscv/include/asm/uaccess.h
@@ -21,42 +21,13 @@
#include <asm/byteorder.h>
#include <asm/extable.h>
#include <asm/asm.h>
+#include <asm-generic/access_ok.h>
#define __enable_user_access() \
__asm__ __volatile__ ("csrs sstatus, %0" : : "r" (SR_SUM) : "memory")
#define __disable_user_access() \
__asm__ __volatile__ ("csrc sstatus, %0" : : "r" (SR_SUM) : "memory")
-/**
- * access_ok: - Checks if a user space pointer is valid
- * @addr: User space pointer to start of block to check
- * @size: Size of block to check
- *
- * Context: User context only. This function may sleep.
- *
- * Checks if a pointer to a block of memory in user space is valid.
- *
- * Returns true (nonzero) if the memory block may be valid, false (zero)
- * if it is definitely invalid.
- *
- * Note that, depending on architecture, this function probably just
- * checks that the pointer is in the user space range - after calling
- * this function, memory access functions may still return -EFAULT.
- */
-#define access_ok(addr, size) ({ \
- __chk_user_ptr(addr); \
- likely(__access_ok((unsigned long __force)(addr), (size))); \
-})
-
-/*
- * Ensure that the range [addr, addr+size) is within the process's
- * address space
- */
-static inline int __access_ok(unsigned long addr, unsigned long size)
-{
- return size <= TASK_SIZE && addr <= TASK_SIZE - size;
-}
-
/*
* The exception table consists of pairs of addresses: the first is the
* address of an instruction that is allowed to fault, and the second is
@@ -346,8 +317,6 @@ unsigned long __must_check clear_user(void __user *to, unsigned long n)
__clear_user(to, n) : n;
}
-#define HAVE_GET_KERNEL_NOFAULT
-
#define __get_kernel_nofault(dst, src, type, err_label) \
do { \
long __kr_err; \
diff --git a/arch/riscv/kernel/Makefile b/arch/riscv/kernel/Makefile
index 612556faa527..e0133d113216 100644
--- a/arch/riscv/kernel/Makefile
+++ b/arch/riscv/kernel/Makefile
@@ -51,7 +51,8 @@ obj-$(CONFIG_MODULE_SECTIONS) += module-sections.o
obj-$(CONFIG_FUNCTION_TRACER) += mcount.o ftrace.o
obj-$(CONFIG_DYNAMIC_FTRACE) += mcount-dyn.o
-obj-$(CONFIG_RISCV_BASE_PMU) += perf_event.o
+obj-$(CONFIG_TRACE_IRQFLAGS) += trace_irq.o
+
obj-$(CONFIG_PERF_EVENTS) += perf_callchain.o
obj-$(CONFIG_HAVE_PERF_REGS) += perf_regs.o
obj-$(CONFIG_RISCV_SBI) += sbi.o
diff --git a/arch/riscv/kernel/cpu.c b/arch/riscv/kernel/cpu.c
index ad0a7e9f828b..d2a936195295 100644
--- a/arch/riscv/kernel/cpu.c
+++ b/arch/riscv/kernel/cpu.c
@@ -6,6 +6,7 @@
#include <linux/init.h>
#include <linux/seq_file.h>
#include <linux/of.h>
+#include <asm/hwcap.h>
#include <asm/smp.h>
#include <asm/pgtable.h>
@@ -63,12 +64,73 @@ int riscv_of_parent_hartid(struct device_node *node)
}
#ifdef CONFIG_PROC_FS
+#define __RISCV_ISA_EXT_DATA(UPROP, EXTID) \
+ { \
+ .uprop = #UPROP, \
+ .isa_ext_id = EXTID, \
+ }
+/**
+ * Here are the ordering rules of extension naming defined by RISC-V
+ * specification :
+ * 1. All extensions should be separated from other multi-letter extensions
+ * from other multi-letter extensions by an underscore.
+ * 2. The first letter following the 'Z' conventionally indicates the most
+ * closely related alphabetical extension category, IMAFDQLCBKJTPVH.
+ * If multiple 'Z' extensions are named, they should be ordered first
+ * by category, then alphabetically within a category.
+ * 3. Standard supervisor-level extensions (starts with 'S') should be
+ * listed after standard unprivileged extensions. If multiple
+ * supervisor-level extensions are listed, they should be ordered
+ * alphabetically.
+ * 4. Non-standard extensions (starts with 'X') must be listed after all
+ * standard extensions. They must be separated from other multi-letter
+ * extensions by an underscore.
+ */
+static struct riscv_isa_ext_data isa_ext_arr[] = {
+ __RISCV_ISA_EXT_DATA(sscofpmf, RISCV_ISA_EXT_SSCOFPMF),
+ __RISCV_ISA_EXT_DATA("", RISCV_ISA_EXT_MAX),
+};
+
+static void print_isa_ext(struct seq_file *f)
+{
+ struct riscv_isa_ext_data *edata;
+ int i = 0, arr_sz;
+
+ arr_sz = ARRAY_SIZE(isa_ext_arr) - 1;
+
+ /* No extension support available */
+ if (arr_sz <= 0)
+ return;
+
+ for (i = 0; i <= arr_sz; i++) {
+ edata = &isa_ext_arr[i];
+ if (!__riscv_isa_extension_available(NULL, edata->isa_ext_id))
+ continue;
+ seq_printf(f, "_%s", edata->uprop);
+ }
+}
+
+/**
+ * These are the only valid base (single letter) ISA extensions as per the spec.
+ * It also specifies the canonical order in which it appears in the spec.
+ * Some of the extension may just be a place holder for now (B, K, P, J).
+ * This should be updated once corresponding extensions are ratified.
+ */
+static const char base_riscv_exts[13] = "imafdqcbkjpvh";
static void print_isa(struct seq_file *f, const char *isa)
{
- /* Print the entire ISA as it is */
+ int i;
+
seq_puts(f, "isa\t\t: ");
- seq_write(f, isa, strlen(isa));
+ /* Print the rv[64/32] part */
+ seq_write(f, isa, 4);
+ for (i = 0; i < sizeof(base_riscv_exts); i++) {
+ if (__riscv_isa_extension_available(NULL, base_riscv_exts[i] - 'a'))
+ /* Print only enabled the base ISA extensions */
+ seq_write(f, &base_riscv_exts[i], 1);
+ }
+ print_isa_ext(f);
seq_puts(f, "\n");
}
@@ -79,7 +141,9 @@ static void print_mmu(struct seq_file *f)
#if defined(CONFIG_32BIT)
strncpy(sv_type, "sv32", 5);
#elif defined(CONFIG_64BIT)
- if (pgtable_l4_enabled)
+ if (pgtable_l5_enabled)
+ strncpy(sv_type, "sv57", 5);
+ else if (pgtable_l4_enabled)
strncpy(sv_type, "sv48", 5);
else
strncpy(sv_type, "sv39", 5);
diff --git a/arch/riscv/kernel/cpu_ops_sbi.c b/arch/riscv/kernel/cpu_ops_sbi.c
index dae29cbfe550..2e16f6732cdf 100644
--- a/arch/riscv/kernel/cpu_ops_sbi.c
+++ b/arch/riscv/kernel/cpu_ops_sbi.c
@@ -111,7 +111,7 @@ static int sbi_cpu_is_stopped(unsigned int cpuid)
rc = sbi_hsm_hart_get_status(hartid);
- if (rc == SBI_HSM_HART_STATUS_STOPPED)
+ if (rc == SBI_HSM_STATE_STOPPED)
return 0;
return rc;
}
diff --git a/arch/riscv/kernel/cpufeature.c b/arch/riscv/kernel/cpufeature.c
index d959d207a40d..1b2d42d7f589 100644
--- a/arch/riscv/kernel/cpufeature.c
+++ b/arch/riscv/kernel/cpufeature.c
@@ -7,12 +7,15 @@
*/
#include <linux/bitmap.h>
+#include <linux/ctype.h>
#include <linux/of.h>
#include <asm/processor.h>
#include <asm/hwcap.h>
#include <asm/smp.h>
#include <asm/switch_to.h>
+#define NUM_ALPHA_EXTS ('z' - 'a' + 1)
+
unsigned long elf_hwcap __read_mostly;
/* Host ISA bitmap */
@@ -63,8 +66,8 @@ void __init riscv_fill_hwcap(void)
{
struct device_node *node;
const char *isa;
- char print_str[BITS_PER_LONG + 1];
- size_t i, j, isa_len;
+ char print_str[NUM_ALPHA_EXTS + 1];
+ int i, j;
static unsigned long isa2hwcap[256] = {0};
isa2hwcap['i'] = isa2hwcap['I'] = COMPAT_HWCAP_ISA_I;
@@ -80,7 +83,8 @@ void __init riscv_fill_hwcap(void)
for_each_of_cpu_node(node) {
unsigned long this_hwcap = 0;
- unsigned long this_isa = 0;
+ DECLARE_BITMAP(this_isa, RISCV_ISA_EXT_MAX);
+ const char *temp;
if (riscv_of_processor_hartid(node) < 0)
continue;
@@ -90,23 +94,106 @@ void __init riscv_fill_hwcap(void)
continue;
}
- i = 0;
- isa_len = strlen(isa);
+ temp = isa;
#if IS_ENABLED(CONFIG_32BIT)
if (!strncmp(isa, "rv32", 4))
- i += 4;
+ isa += 4;
#elif IS_ENABLED(CONFIG_64BIT)
if (!strncmp(isa, "rv64", 4))
- i += 4;
+ isa += 4;
#endif
- for (; i < isa_len; ++i) {
- this_hwcap |= isa2hwcap[(unsigned char)(isa[i])];
- /*
- * TODO: X, Y and Z extension parsing for Host ISA
- * bitmap will be added in-future.
- */
- if ('a' <= isa[i] && isa[i] < 'x')
- this_isa |= (1UL << (isa[i] - 'a'));
+ /* The riscv,isa DT property must start with rv64 or rv32 */
+ if (temp == isa)
+ continue;
+ bitmap_zero(this_isa, RISCV_ISA_EXT_MAX);
+ for (; *isa; ++isa) {
+ const char *ext = isa++;
+ const char *ext_end = isa;
+ bool ext_long = false, ext_err = false;
+
+ switch (*ext) {
+ case 's':
+ /**
+ * Workaround for invalid single-letter 's' & 'u'(QEMU).
+ * No need to set the bit in riscv_isa as 's' & 'u' are
+ * not valid ISA extensions. It works until multi-letter
+ * extension starting with "Su" appears.
+ */
+ if (ext[-1] != '_' && ext[1] == 'u') {
+ ++isa;
+ ext_err = true;
+ break;
+ }
+ fallthrough;
+ case 'x':
+ case 'z':
+ ext_long = true;
+ /* Multi-letter extension must be delimited */
+ for (; *isa && *isa != '_'; ++isa)
+ if (unlikely(!islower(*isa)
+ && !isdigit(*isa)))
+ ext_err = true;
+ /* Parse backwards */
+ ext_end = isa;
+ if (unlikely(ext_err))
+ break;
+ if (!isdigit(ext_end[-1]))
+ break;
+ /* Skip the minor version */
+ while (isdigit(*--ext_end))
+ ;
+ if (ext_end[0] != 'p'
+ || !isdigit(ext_end[-1])) {
+ /* Advance it to offset the pre-decrement */
+ ++ext_end;
+ break;
+ }
+ /* Skip the major version */
+ while (isdigit(*--ext_end))
+ ;
+ ++ext_end;
+ break;
+ default:
+ if (unlikely(!islower(*ext))) {
+ ext_err = true;
+ break;
+ }
+ /* Find next extension */
+ if (!isdigit(*isa))
+ break;
+ /* Skip the minor version */
+ while (isdigit(*++isa))
+ ;
+ if (*isa != 'p')
+ break;
+ if (!isdigit(*++isa)) {
+ --isa;
+ break;
+ }
+ /* Skip the major version */
+ while (isdigit(*++isa))
+ ;
+ break;
+ }
+ if (*isa != '_')
+ --isa;
+
+#define SET_ISA_EXT_MAP(name, bit) \
+ do { \
+ if ((ext_end - ext == sizeof(name) - 1) && \
+ !memcmp(ext, name, sizeof(name) - 1)) \
+ set_bit(bit, this_isa); \
+ } while (false) \
+
+ if (unlikely(ext_err))
+ continue;
+ if (!ext_long) {
+ this_hwcap |= isa2hwcap[(unsigned char)(*ext)];
+ set_bit(*ext - 'a', this_isa);
+ } else {
+ SET_ISA_EXT_MAP("sscofpmf", RISCV_ISA_EXT_SSCOFPMF);
+ }
+#undef SET_ISA_EXT_MAP
}
/*
@@ -119,10 +206,11 @@ void __init riscv_fill_hwcap(void)
else
elf_hwcap = this_hwcap;
- if (riscv_isa[0])
- riscv_isa[0] &= this_isa;
+ if (bitmap_weight(riscv_isa, RISCV_ISA_EXT_MAX))
+ bitmap_and(riscv_isa, riscv_isa, this_isa, RISCV_ISA_EXT_MAX);
else
- riscv_isa[0] = this_isa;
+ bitmap_copy(riscv_isa, this_isa, RISCV_ISA_EXT_MAX);
+
}
/* We don't support systems with F but without D, so mask those out
@@ -133,13 +221,13 @@ void __init riscv_fill_hwcap(void)
}
memset(print_str, 0, sizeof(print_str));
- for (i = 0, j = 0; i < BITS_PER_LONG; i++)
+ for (i = 0, j = 0; i < NUM_ALPHA_EXTS; i++)
if (riscv_isa[0] & BIT_MASK(i))
print_str[j++] = (char)('a' + i);
- pr_info("riscv: ISA extensions %s\n", print_str);
+ pr_info("riscv: base ISA extensions %s\n", print_str);
memset(print_str, 0, sizeof(print_str));
- for (i = 0, j = 0; i < BITS_PER_LONG; i++)
+ for (i = 0, j = 0; i < NUM_ALPHA_EXTS; i++)
if (elf_hwcap & BIT_MASK(i))
print_str[j++] = (char)('a' + i);
pr_info("riscv: ELF capabilities %s\n", print_str);
diff --git a/arch/riscv/kernel/entry.S b/arch/riscv/kernel/entry.S
index ed29e9c8f660..c8b9ce274b9a 100644
--- a/arch/riscv/kernel/entry.S
+++ b/arch/riscv/kernel/entry.S
@@ -108,7 +108,7 @@ _save_context:
.option pop
#ifdef CONFIG_TRACE_IRQFLAGS
- call trace_hardirqs_off
+ call __trace_hardirqs_off
#endif
#ifdef CONFIG_CONTEXT_TRACKING
@@ -143,7 +143,7 @@ skip_context_tracking:
li t0, EXC_BREAKPOINT
beq s4, t0, 1f
#ifdef CONFIG_TRACE_IRQFLAGS
- call trace_hardirqs_on
+ call __trace_hardirqs_on
#endif
csrs CSR_STATUS, SR_IE
@@ -225,6 +225,10 @@ ret_from_syscall:
* (If it was configured with SECCOMP_RET_ERRNO/TRACE)
*/
ret_from_syscall_rejected:
+#ifdef CONFIG_DEBUG_RSEQ
+ move a0, sp
+ call rseq_syscall
+#endif
/* Trace syscalls, but only if requested by the user. */
REG_L t0, TASK_TI_FLAGS(tp)
andi t0, t0, _TIF_SYSCALL_WORK
@@ -234,7 +238,7 @@ ret_from_exception:
REG_L s0, PT_STATUS(sp)
csrc CSR_STATUS, SR_IE
#ifdef CONFIG_TRACE_IRQFLAGS
- call trace_hardirqs_off
+ call __trace_hardirqs_off
#endif
#ifdef CONFIG_RISCV_M_MODE
/* the MPP value is too large to be used as an immediate arg for addi */
@@ -270,10 +274,10 @@ restore_all:
REG_L s1, PT_STATUS(sp)
andi t0, s1, SR_PIE
beqz t0, 1f
- call trace_hardirqs_on
+ call __trace_hardirqs_on
j 2f
1:
- call trace_hardirqs_off
+ call __trace_hardirqs_off
2:
#endif
REG_L a0, PT_STATUS(sp)
diff --git a/arch/riscv/kernel/module.c b/arch/riscv/kernel/module.c
index 68a9e3d1fe16..4a48287513c3 100644
--- a/arch/riscv/kernel/module.c
+++ b/arch/riscv/kernel/module.c
@@ -13,6 +13,19 @@
#include <linux/pgtable.h>
#include <asm/sections.h>
+/*
+ * The auipc+jalr instruction pair can reach any PC-relative offset
+ * in the range [-2^31 - 2^11, 2^31 - 2^11)
+ */
+static bool riscv_insn_valid_32bit_offset(ptrdiff_t val)
+{
+#ifdef CONFIG_32BIT
+ return true;
+#else
+ return (-(1L << 31) - (1L << 11)) <= val && val < ((1L << 31) - (1L << 11));
+#endif
+}
+
static int apply_r_riscv_32_rela(struct module *me, u32 *location, Elf_Addr v)
{
if (v != (u32)v) {
@@ -95,7 +108,7 @@ static int apply_r_riscv_pcrel_hi20_rela(struct module *me, u32 *location,
ptrdiff_t offset = (void *)v - (void *)location;
s32 hi20;
- if (offset != (s32)offset) {
+ if (!riscv_insn_valid_32bit_offset(offset)) {
pr_err(
"%s: target %016llx can not be addressed by the 32-bit offset from PC = %p\n",
me->name, (long long)v, location);
@@ -197,10 +210,9 @@ static int apply_r_riscv_call_plt_rela(struct module *me, u32 *location,
Elf_Addr v)
{
ptrdiff_t offset = (void *)v - (void *)location;
- s32 fill_v = offset;
u32 hi20, lo12;
- if (offset != fill_v) {
+ if (!riscv_insn_valid_32bit_offset(offset)) {
/* Only emit the plt entry if offset over 32-bit range */
if (IS_ENABLED(CONFIG_MODULE_SECTIONS)) {
offset = module_emit_plt_entry(me, v);
@@ -224,10 +236,9 @@ static int apply_r_riscv_call_rela(struct module *me, u32 *location,
Elf_Addr v)
{
ptrdiff_t offset = (void *)v - (void *)location;
- s32 fill_v = offset;
u32 hi20, lo12;
- if (offset != fill_v) {
+ if (!riscv_insn_valid_32bit_offset(offset)) {
pr_err(
"%s: target %016llx can not be addressed by the 32-bit offset from PC = %p\n",
me->name, (long long)v, location);
diff --git a/arch/riscv/kernel/perf_callchain.c b/arch/riscv/kernel/perf_callchain.c
index 1fc075b8f764..55faa4991b87 100644
--- a/arch/riscv/kernel/perf_callchain.c
+++ b/arch/riscv/kernel/perf_callchain.c
@@ -15,8 +15,8 @@ static unsigned long user_backtrace(struct perf_callchain_entry_ctx *entry,
{
struct stackframe buftail;
unsigned long ra = 0;
- unsigned long *user_frame_tail =
- (unsigned long *)(fp - sizeof(struct stackframe));
+ unsigned long __user *user_frame_tail =
+ (unsigned long __user *)(fp - sizeof(struct stackframe));
/* Check accessibility of one struct frame_tail beyond */
if (!access_ok(user_frame_tail, sizeof(buftail)))
diff --git a/arch/riscv/kernel/perf_event.c b/arch/riscv/kernel/perf_event.c
deleted file mode 100644
index c835f0362d94..000000000000
--- a/arch/riscv/kernel/perf_event.c
+++ /dev/null
@@ -1,485 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0 */
-/*
- * Copyright (C) 2008 Thomas Gleixner <tglx@linutronix.de>
- * Copyright (C) 2008-2009 Red Hat, Inc., Ingo Molnar
- * Copyright (C) 2009 Jaswinder Singh Rajput
- * Copyright (C) 2009 Advanced Micro Devices, Inc., Robert Richter
- * Copyright (C) 2008-2009 Red Hat, Inc., Peter Zijlstra
- * Copyright (C) 2009 Intel Corporation, <markus.t.metzger@intel.com>
- * Copyright (C) 2009 Google, Inc., Stephane Eranian
- * Copyright 2014 Tilera Corporation. All Rights Reserved.
- * Copyright (C) 2018 Andes Technology Corporation
- *
- * Perf_events support for RISC-V platforms.
- *
- * Since the spec. (as of now, Priv-Spec 1.10) does not provide enough
- * functionality for perf event to fully work, this file provides
- * the very basic framework only.
- *
- * For platform portings, please check Documentations/riscv/pmu.txt.
- *
- * The Copyright line includes x86 and tile ones.
- */
-
-#include <linux/kprobes.h>
-#include <linux/kernel.h>
-#include <linux/kdebug.h>
-#include <linux/mutex.h>
-#include <linux/bitmap.h>
-#include <linux/irq.h>
-#include <linux/perf_event.h>
-#include <linux/atomic.h>
-#include <linux/of.h>
-#include <asm/perf_event.h>
-
-static const struct riscv_pmu *riscv_pmu __read_mostly;
-static DEFINE_PER_CPU(struct cpu_hw_events, cpu_hw_events);
-
-/*
- * Hardware & cache maps and their methods
- */
-
-static const int riscv_hw_event_map[] = {
- [PERF_COUNT_HW_CPU_CYCLES] = RISCV_PMU_CYCLE,
- [PERF_COUNT_HW_INSTRUCTIONS] = RISCV_PMU_INSTRET,
- [PERF_COUNT_HW_CACHE_REFERENCES] = RISCV_OP_UNSUPP,
- [PERF_COUNT_HW_CACHE_MISSES] = RISCV_OP_UNSUPP,
- [PERF_COUNT_HW_BRANCH_INSTRUCTIONS] = RISCV_OP_UNSUPP,
- [PERF_COUNT_HW_BRANCH_MISSES] = RISCV_OP_UNSUPP,
- [PERF_COUNT_HW_BUS_CYCLES] = RISCV_OP_UNSUPP,
-};
-
-#define C(x) PERF_COUNT_HW_CACHE_##x
-static const int riscv_cache_event_map[PERF_COUNT_HW_CACHE_MAX]
-[PERF_COUNT_HW_CACHE_OP_MAX]
-[PERF_COUNT_HW_CACHE_RESULT_MAX] = {
- [C(L1D)] = {
- [C(OP_READ)] = {
- [C(RESULT_ACCESS)] = RISCV_OP_UNSUPP,
- [C(RESULT_MISS)] = RISCV_OP_UNSUPP,
- },
- [C(OP_WRITE)] = {
- [C(RESULT_ACCESS)] = RISCV_OP_UNSUPP,
- [C(RESULT_MISS)] = RISCV_OP_UNSUPP,
- },
- [C(OP_PREFETCH)] = {
- [C(RESULT_ACCESS)] = RISCV_OP_UNSUPP,
- [C(RESULT_MISS)] = RISCV_OP_UNSUPP,
- },
- },
- [C(L1I)] = {
- [C(OP_READ)] = {
- [C(RESULT_ACCESS)] = RISCV_OP_UNSUPP,
- [C(RESULT_MISS)] = RISCV_OP_UNSUPP,
- },
- [C(OP_WRITE)] = {
- [C(RESULT_ACCESS)] = RISCV_OP_UNSUPP,
- [C(RESULT_MISS)] = RISCV_OP_UNSUPP,
- },
- [C(OP_PREFETCH)] = {
- [C(RESULT_ACCESS)] = RISCV_OP_UNSUPP,
- [C(RESULT_MISS)] = RISCV_OP_UNSUPP,
- },
- },
- [C(LL)] = {
- [C(OP_READ)] = {
- [C(RESULT_ACCESS)] = RISCV_OP_UNSUPP,
- [C(RESULT_MISS)] = RISCV_OP_UNSUPP,
- },
- [C(OP_WRITE)] = {
- [C(RESULT_ACCESS)] = RISCV_OP_UNSUPP,
- [C(RESULT_MISS)] = RISCV_OP_UNSUPP,
- },
- [C(OP_PREFETCH)] = {
- [C(RESULT_ACCESS)] = RISCV_OP_UNSUPP,
- [C(RESULT_MISS)] = RISCV_OP_UNSUPP,
- },
- },
- [C(DTLB)] = {
- [C(OP_READ)] = {
- [C(RESULT_ACCESS)] = RISCV_OP_UNSUPP,
- [C(RESULT_MISS)] = RISCV_OP_UNSUPP,
- },
- [C(OP_WRITE)] = {
- [C(RESULT_ACCESS)] = RISCV_OP_UNSUPP,
- [C(RESULT_MISS)] = RISCV_OP_UNSUPP,
- },
- [C(OP_PREFETCH)] = {
- [C(RESULT_ACCESS)] = RISCV_OP_UNSUPP,
- [C(RESULT_MISS)] = RISCV_OP_UNSUPP,
- },
- },
- [C(ITLB)] = {
- [C(OP_READ)] = {
- [C(RESULT_ACCESS)] = RISCV_OP_UNSUPP,
- [C(RESULT_MISS)] = RISCV_OP_UNSUPP,
- },
- [C(OP_WRITE)] = {
- [C(RESULT_ACCESS)] = RISCV_OP_UNSUPP,
- [C(RESULT_MISS)] = RISCV_OP_UNSUPP,
- },
- [C(OP_PREFETCH)] = {
- [C(RESULT_ACCESS)] = RISCV_OP_UNSUPP,
- [C(RESULT_MISS)] = RISCV_OP_UNSUPP,
- },
- },
- [C(BPU)] = {
- [C(OP_READ)] = {
- [C(RESULT_ACCESS)] = RISCV_OP_UNSUPP,
- [C(RESULT_MISS)] = RISCV_OP_UNSUPP,
- },
- [C(OP_WRITE)] = {
- [C(RESULT_ACCESS)] = RISCV_OP_UNSUPP,
- [C(RESULT_MISS)] = RISCV_OP_UNSUPP,
- },
- [C(OP_PREFETCH)] = {
- [C(RESULT_ACCESS)] = RISCV_OP_UNSUPP,
- [C(RESULT_MISS)] = RISCV_OP_UNSUPP,
- },
- },
-};
-
-static int riscv_map_hw_event(u64 config)
-{
- if (config >= riscv_pmu->max_events)
- return -EINVAL;
-
- return riscv_pmu->hw_events[config];
-}
-
-static int riscv_map_cache_decode(u64 config, unsigned int *type,
- unsigned int *op, unsigned int *result)
-{
- return -ENOENT;
-}
-
-static int riscv_map_cache_event(u64 config)
-{
- unsigned int type, op, result;
- int err = -ENOENT;
- int code;
-
- err = riscv_map_cache_decode(config, &type, &op, &result);
- if (!riscv_pmu->cache_events || err)
- return err;
-
- if (type >= PERF_COUNT_HW_CACHE_MAX ||
- op >= PERF_COUNT_HW_CACHE_OP_MAX ||
- result >= PERF_COUNT_HW_CACHE_RESULT_MAX)
- return -EINVAL;
-
- code = (*riscv_pmu->cache_events)[type][op][result];
- if (code == RISCV_OP_UNSUPP)
- return -EINVAL;
-
- return code;
-}
-
-/*
- * Low-level functions: reading/writing counters
- */
-
-static inline u64 read_counter(int idx)
-{
- u64 val = 0;
-
- switch (idx) {
- case RISCV_PMU_CYCLE:
- val = csr_read(CSR_CYCLE);
- break;
- case RISCV_PMU_INSTRET:
- val = csr_read(CSR_INSTRET);
- break;
- default:
- WARN_ON_ONCE(idx < 0 || idx > RISCV_MAX_COUNTERS);
- return -EINVAL;
- }
-
- return val;
-}
-
-static inline void write_counter(int idx, u64 value)
-{
- /* currently not supported */
- WARN_ON_ONCE(1);
-}
-
-/*
- * pmu->read: read and update the counter
- *
- * Other architectures' implementation often have a xxx_perf_event_update
- * routine, which can return counter values when called in the IRQ, but
- * return void when being called by the pmu->read method.
- */
-static void riscv_pmu_read(struct perf_event *event)
-{
- struct hw_perf_event *hwc = &event->hw;
- u64 prev_raw_count, new_raw_count;
- u64 oldval;
- int idx = hwc->idx;
- u64 delta;
-
- do {
- prev_raw_count = local64_read(&hwc->prev_count);
- new_raw_count = read_counter(idx);
-
- oldval = local64_cmpxchg(&hwc->prev_count, prev_raw_count,
- new_raw_count);
- } while (oldval != prev_raw_count);
-
- /*
- * delta is the value to update the counter we maintain in the kernel.
- */
- delta = (new_raw_count - prev_raw_count) &
- ((1ULL << riscv_pmu->counter_width) - 1);
- local64_add(delta, &event->count);
- /*
- * Something like local64_sub(delta, &hwc->period_left) here is
- * needed if there is an interrupt for perf.
- */
-}
-
-/*
- * State transition functions:
- *
- * stop()/start() & add()/del()
- */
-
-/*
- * pmu->stop: stop the counter
- */
-static void riscv_pmu_stop(struct perf_event *event, int flags)
-{
- struct hw_perf_event *hwc = &event->hw;
-
- WARN_ON_ONCE(hwc->state & PERF_HES_STOPPED);
- hwc->state |= PERF_HES_STOPPED;
-
- if ((flags & PERF_EF_UPDATE) && !(hwc->state & PERF_HES_UPTODATE)) {
- riscv_pmu->pmu->read(event);
- hwc->state |= PERF_HES_UPTODATE;
- }
-}
-
-/*
- * pmu->start: start the event.
- */
-static void riscv_pmu_start(struct perf_event *event, int flags)
-{
- struct hw_perf_event *hwc = &event->hw;
-
- if (WARN_ON_ONCE(!(event->hw.state & PERF_HES_STOPPED)))
- return;
-
- if (flags & PERF_EF_RELOAD) {
- WARN_ON_ONCE(!(event->hw.state & PERF_HES_UPTODATE));
-
- /*
- * Set the counter to the period to the next interrupt here,
- * if you have any.
- */
- }
-
- hwc->state = 0;
- perf_event_update_userpage(event);
-
- /*
- * Since we cannot write to counters, this serves as an initialization
- * to the delta-mechanism in pmu->read(); otherwise, the delta would be
- * wrong when pmu->read is called for the first time.
- */
- local64_set(&hwc->prev_count, read_counter(hwc->idx));
-}
-
-/*
- * pmu->add: add the event to PMU.
- */
-static int riscv_pmu_add(struct perf_event *event, int flags)
-{
- struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events);
- struct hw_perf_event *hwc = &event->hw;
-
- if (cpuc->n_events == riscv_pmu->num_counters)
- return -ENOSPC;
-
- /*
- * We don't have general conunters, so no binding-event-to-counter
- * process here.
- *
- * Indexing using hwc->config generally not works, since config may
- * contain extra information, but here the only info we have in
- * hwc->config is the event index.
- */
- hwc->idx = hwc->config;
- cpuc->events[hwc->idx] = event;
- cpuc->n_events++;
-
- hwc->state = PERF_HES_UPTODATE | PERF_HES_STOPPED;
-
- if (flags & PERF_EF_START)
- riscv_pmu->pmu->start(event, PERF_EF_RELOAD);
-
- return 0;
-}
-
-/*
- * pmu->del: delete the event from PMU.
- */
-static void riscv_pmu_del(struct perf_event *event, int flags)
-{
- struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events);
- struct hw_perf_event *hwc = &event->hw;
-
- cpuc->events[hwc->idx] = NULL;
- cpuc->n_events--;
- riscv_pmu->pmu->stop(event, PERF_EF_UPDATE);
- perf_event_update_userpage(event);
-}
-
-/*
- * Interrupt: a skeletion for reference.
- */
-
-static DEFINE_MUTEX(pmc_reserve_mutex);
-
-static irqreturn_t riscv_base_pmu_handle_irq(int irq_num, void *dev)
-{
- return IRQ_NONE;
-}
-
-static int reserve_pmc_hardware(void)
-{
- int err = 0;
-
- mutex_lock(&pmc_reserve_mutex);
- if (riscv_pmu->irq >= 0 && riscv_pmu->handle_irq) {
- err = request_irq(riscv_pmu->irq, riscv_pmu->handle_irq,
- IRQF_PERCPU, "riscv-base-perf", NULL);
- }
- mutex_unlock(&pmc_reserve_mutex);
-
- return err;
-}
-
-static void release_pmc_hardware(void)
-{
- mutex_lock(&pmc_reserve_mutex);
- if (riscv_pmu->irq >= 0)
- free_irq(riscv_pmu->irq, NULL);
- mutex_unlock(&pmc_reserve_mutex);
-}
-
-/*
- * Event Initialization/Finalization
- */
-
-static atomic_t riscv_active_events = ATOMIC_INIT(0);
-
-static void riscv_event_destroy(struct perf_event *event)
-{
- if (atomic_dec_return(&riscv_active_events) == 0)
- release_pmc_hardware();
-}
-
-static int riscv_event_init(struct perf_event *event)
-{
- struct perf_event_attr *attr = &event->attr;
- struct hw_perf_event *hwc = &event->hw;
- int err;
- int code;
-
- if (atomic_inc_return(&riscv_active_events) == 1) {
- err = reserve_pmc_hardware();
-
- if (err) {
- pr_warn("PMC hardware not available\n");
- atomic_dec(&riscv_active_events);
- return -EBUSY;
- }
- }
-
- switch (event->attr.type) {
- case PERF_TYPE_HARDWARE:
- code = riscv_pmu->map_hw_event(attr->config);
- break;
- case PERF_TYPE_HW_CACHE:
- code = riscv_pmu->map_cache_event(attr->config);
- break;
- case PERF_TYPE_RAW:
- return -EOPNOTSUPP;
- default:
- return -ENOENT;
- }
-
- event->destroy = riscv_event_destroy;
- if (code < 0) {
- event->destroy(event);
- return code;
- }
-
- /*
- * idx is set to -1 because the index of a general event should not be
- * decided until binding to some counter in pmu->add().
- *
- * But since we don't have such support, later in pmu->add(), we just
- * use hwc->config as the index instead.
- */
- hwc->config = code;
- hwc->idx = -1;
-
- return 0;
-}
-
-/*
- * Initialization
- */
-
-static struct pmu min_pmu = {
- .name = "riscv-base",
- .event_init = riscv_event_init,
- .add = riscv_pmu_add,
- .del = riscv_pmu_del,
- .start = riscv_pmu_start,
- .stop = riscv_pmu_stop,
- .read = riscv_pmu_read,
-};
-
-static const struct riscv_pmu riscv_base_pmu = {
- .pmu = &min_pmu,
- .max_events = ARRAY_SIZE(riscv_hw_event_map),
- .map_hw_event = riscv_map_hw_event,
- .hw_events = riscv_hw_event_map,
- .map_cache_event = riscv_map_cache_event,
- .cache_events = &riscv_cache_event_map,
- .counter_width = 63,
- .num_counters = RISCV_BASE_COUNTERS + 0,
- .handle_irq = &riscv_base_pmu_handle_irq,
-
- /* This means this PMU has no IRQ. */
- .irq = -1,
-};
-
-static const struct of_device_id riscv_pmu_of_ids[] = {
- {.compatible = "riscv,base-pmu", .data = &riscv_base_pmu},
- { /* sentinel value */ }
-};
-
-static int __init init_hw_perf_events(void)
-{
- struct device_node *node = of_find_node_by_type(NULL, "pmu");
- const struct of_device_id *of_id;
-
- riscv_pmu = &riscv_base_pmu;
-
- if (node) {
- of_id = of_match_node(riscv_pmu_of_ids, node);
-
- if (of_id)
- riscv_pmu = of_id->data;
- of_node_put(node);
- }
-
- perf_pmu_register(riscv_pmu->pmu, "cpu", PERF_TYPE_RAW);
- return 0;
-}
-arch_initcall(init_hw_perf_events);
diff --git a/arch/riscv/kernel/ptrace.c b/arch/riscv/kernel/ptrace.c
index a89243730153..793c7da0554b 100644
--- a/arch/riscv/kernel/ptrace.c
+++ b/arch/riscv/kernel/ptrace.c
@@ -17,7 +17,6 @@
#include <linux/regset.h>
#include <linux/sched.h>
#include <linux/sched/task_stack.h>
-#include <linux/tracehook.h>
#define CREATE_TRACE_POINTS
#include <trace/events/syscalls.h>
@@ -241,7 +240,7 @@ long arch_ptrace(struct task_struct *child, long request,
__visible int do_syscall_trace_enter(struct pt_regs *regs)
{
if (test_thread_flag(TIF_SYSCALL_TRACE))
- if (tracehook_report_syscall_entry(regs))
+ if (ptrace_report_syscall_entry(regs))
return -1;
/*
@@ -266,7 +265,7 @@ __visible void do_syscall_trace_exit(struct pt_regs *regs)
audit_syscall_exit(regs);
if (test_thread_flag(TIF_SYSCALL_TRACE))
- tracehook_report_syscall_exit(regs, 0);
+ ptrace_report_syscall_exit(regs, 0);
#ifdef CONFIG_HAVE_SYSCALL_TRACEPOINTS
if (test_thread_flag(TIF_SYSCALL_TRACEPOINT))
diff --git a/arch/riscv/kernel/sbi.c b/arch/riscv/kernel/sbi.c
index f72527fcb347..775d3322b422 100644
--- a/arch/riscv/kernel/sbi.c
+++ b/arch/riscv/kernel/sbi.c
@@ -5,6 +5,7 @@
* Copyright (c) 2020 Western Digital Corporation or its affiliates.
*/
+#include <linux/bits.h>
#include <linux/init.h>
#include <linux/pm.h>
#include <linux/reboot.h>
@@ -85,7 +86,7 @@ static unsigned long __sbi_v01_cpumask_to_hartmask(const struct cpumask *cpu_mas
pr_warn("Unable to send any request to hartid > BITS_PER_LONG for SBI v0.1\n");
break;
}
- hmask |= 1 << hartid;
+ hmask |= BIT(hartid);
}
return hmask;
@@ -160,7 +161,7 @@ static int __sbi_send_ipi_v01(const struct cpumask *cpu_mask)
{
unsigned long hart_mask;
- if (!cpu_mask)
+ if (!cpu_mask || cpumask_empty(cpu_mask))
cpu_mask = cpu_online_mask;
hart_mask = __sbi_v01_cpumask_to_hartmask(cpu_mask);
@@ -176,7 +177,7 @@ static int __sbi_rfence_v01(int fid, const struct cpumask *cpu_mask,
int result = 0;
unsigned long hart_mask;
- if (!cpu_mask)
+ if (!cpu_mask || cpumask_empty(cpu_mask))
cpu_mask = cpu_online_mask;
hart_mask = __sbi_v01_cpumask_to_hartmask(cpu_mask);
@@ -249,26 +250,37 @@ static void __sbi_set_timer_v02(uint64_t stime_value)
static int __sbi_send_ipi_v02(const struct cpumask *cpu_mask)
{
- unsigned long hartid, cpuid, hmask = 0, hbase = 0;
+ unsigned long hartid, cpuid, hmask = 0, hbase = 0, htop = 0;
struct sbiret ret = {0};
int result;
- if (!cpu_mask)
+ if (!cpu_mask || cpumask_empty(cpu_mask))
cpu_mask = cpu_online_mask;
for_each_cpu(cpuid, cpu_mask) {
hartid = cpuid_to_hartid_map(cpuid);
- if (hmask && ((hbase + BITS_PER_LONG) <= hartid)) {
- ret = sbi_ecall(SBI_EXT_IPI, SBI_EXT_IPI_SEND_IPI,
- hmask, hbase, 0, 0, 0, 0);
- if (ret.error)
- goto ecall_failed;
- hmask = 0;
- hbase = 0;
+ if (hmask) {
+ if (hartid + BITS_PER_LONG <= htop ||
+ hbase + BITS_PER_LONG <= hartid) {
+ ret = sbi_ecall(SBI_EXT_IPI,
+ SBI_EXT_IPI_SEND_IPI, hmask,
+ hbase, 0, 0, 0, 0);
+ if (ret.error)
+ goto ecall_failed;
+ hmask = 0;
+ } else if (hartid < hbase) {
+ /* shift the mask to fit lower hartid */
+ hmask <<= hbase - hartid;
+ hbase = hartid;
+ }
}
- if (!hmask)
+ if (!hmask) {
hbase = hartid;
- hmask |= 1UL << (hartid - hbase);
+ htop = hartid;
+ } else if (hartid > htop) {
+ htop = hartid;
+ }
+ hmask |= BIT(hartid - hbase);
}
if (hmask) {
@@ -344,25 +356,35 @@ static int __sbi_rfence_v02(int fid, const struct cpumask *cpu_mask,
unsigned long start, unsigned long size,
unsigned long arg4, unsigned long arg5)
{
- unsigned long hartid, cpuid, hmask = 0, hbase = 0;
+ unsigned long hartid, cpuid, hmask = 0, hbase = 0, htop = 0;
int result;
- if (!cpu_mask)
+ if (!cpu_mask || cpumask_empty(cpu_mask))
cpu_mask = cpu_online_mask;
for_each_cpu(cpuid, cpu_mask) {
hartid = cpuid_to_hartid_map(cpuid);
- if (hmask && ((hbase + BITS_PER_LONG) <= hartid)) {
- result = __sbi_rfence_v02_call(fid, hmask, hbase,
- start, size, arg4, arg5);
- if (result)
- return result;
- hmask = 0;
- hbase = 0;
+ if (hmask) {
+ if (hartid + BITS_PER_LONG <= htop ||
+ hbase + BITS_PER_LONG <= hartid) {
+ result = __sbi_rfence_v02_call(fid, hmask,
+ hbase, start, size, arg4, arg5);
+ if (result)
+ return result;
+ hmask = 0;
+ } else if (hartid < hbase) {
+ /* shift the mask to fit lower hartid */
+ hmask <<= hbase - hartid;
+ hbase = hartid;
+ }
}
- if (!hmask)
+ if (!hmask) {
hbase = hartid;
- hmask |= 1UL << (hartid - hbase);
+ htop = hartid;
+ } else if (hartid > htop) {
+ htop = hartid;
+ }
+ hmask |= BIT(hartid - hbase);
}
if (hmask) {
diff --git a/arch/riscv/kernel/setup.c b/arch/riscv/kernel/setup.c
index b42bfdc67482..834eb652a7b9 100644
--- a/arch/riscv/kernel/setup.c
+++ b/arch/riscv/kernel/setup.c
@@ -301,9 +301,6 @@ static int __init topology_init(void)
{
int i, ret;
- for_each_online_node(i)
- register_one_node(i);
-
for_each_possible_cpu(i) {
struct cpu *cpu = &per_cpu(cpu_devices, i);
diff --git a/arch/riscv/kernel/signal.c b/arch/riscv/kernel/signal.c
index c2d5ecbe5526..9f4e59f80551 100644
--- a/arch/riscv/kernel/signal.c
+++ b/arch/riscv/kernel/signal.c
@@ -9,7 +9,7 @@
#include <linux/signal.h>
#include <linux/uaccess.h>
#include <linux/syscalls.h>
-#include <linux/tracehook.h>
+#include <linux/resume_user_mode.h>
#include <linux/linkage.h>
#include <asm/ucontext.h>
@@ -258,6 +258,8 @@ static void handle_signal(struct ksignal *ksig, struct pt_regs *regs)
}
}
+ rseq_signal_deliver(ksig, regs);
+
/* Set up the stack frame */
ret = setup_rt_frame(ksig, oldset, regs);
@@ -317,5 +319,5 @@ asmlinkage __visible void do_notify_resume(struct pt_regs *regs,
do_signal(regs);
if (thread_info_flags & _TIF_NOTIFY_RESUME)
- tracehook_notify_resume(regs);
+ resume_user_mode_work(regs);
}
diff --git a/arch/riscv/kernel/trace_irq.c b/arch/riscv/kernel/trace_irq.c
new file mode 100644
index 000000000000..095ac976d7da
--- /dev/null
+++ b/arch/riscv/kernel/trace_irq.c
@@ -0,0 +1,27 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Copyright (C) 2022 Changbin Du <changbin.du@gmail.com>
+ */
+
+#include <linux/irqflags.h>
+#include <linux/kprobes.h>
+#include "trace_irq.h"
+
+/*
+ * trace_hardirqs_on/off require the caller to setup frame pointer properly.
+ * Otherwise, CALLER_ADDR1 might trigger an pagging exception in kernel.
+ * Here we add one extra level so they can be safely called by low
+ * level entry code which $fp is used for other purpose.
+ */
+
+void __trace_hardirqs_on(void)
+{
+ trace_hardirqs_on();
+}
+NOKPROBE_SYMBOL(__trace_hardirqs_on);
+
+void __trace_hardirqs_off(void)
+{
+ trace_hardirqs_off();
+}
+NOKPROBE_SYMBOL(__trace_hardirqs_off);
diff --git a/arch/riscv/kernel/trace_irq.h b/arch/riscv/kernel/trace_irq.h
new file mode 100644
index 000000000000..99fe67377e5e
--- /dev/null
+++ b/arch/riscv/kernel/trace_irq.h
@@ -0,0 +1,11 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/*
+ * Copyright (C) 2022 Changbin Du <changbin.du@gmail.com>
+ */
+#ifndef __TRACE_IRQ_H
+#define __TRACE_IRQ_H
+
+void __trace_hardirqs_on(void);
+void __trace_hardirqs_off(void);
+
+#endif /* __TRACE_IRQ_H */
diff --git a/arch/riscv/kvm/vcpu_exit.c b/arch/riscv/kvm/vcpu_exit.c
index 571f319e995a..aa8af129e4bb 100644
--- a/arch/riscv/kvm/vcpu_exit.c
+++ b/arch/riscv/kvm/vcpu_exit.c
@@ -144,12 +144,7 @@ static int system_opcode_insn(struct kvm_vcpu *vcpu,
{
if ((insn & INSN_MASK_WFI) == INSN_MATCH_WFI) {
vcpu->stat.wfi_exit_stat++;
- if (!kvm_arch_vcpu_runnable(vcpu)) {
- srcu_read_unlock(&vcpu->kvm->srcu, vcpu->arch.srcu_idx);
- kvm_vcpu_halt(vcpu);
- vcpu->arch.srcu_idx = srcu_read_lock(&vcpu->kvm->srcu);
- kvm_clear_request(KVM_REQ_UNHALT, vcpu);
- }
+ kvm_riscv_vcpu_wfi(vcpu);
vcpu->arch.guest_context.sepc += INSN_LEN(insn);
return 1;
}
@@ -454,6 +449,21 @@ static int stage2_page_fault(struct kvm_vcpu *vcpu, struct kvm_run *run,
}
/**
+ * kvm_riscv_vcpu_wfi -- Emulate wait for interrupt (WFI) behaviour
+ *
+ * @vcpu: The VCPU pointer
+ */
+void kvm_riscv_vcpu_wfi(struct kvm_vcpu *vcpu)
+{
+ if (!kvm_arch_vcpu_runnable(vcpu)) {
+ srcu_read_unlock(&vcpu->kvm->srcu, vcpu->arch.srcu_idx);
+ kvm_vcpu_halt(vcpu);
+ vcpu->arch.srcu_idx = srcu_read_lock(&vcpu->kvm->srcu);
+ kvm_clear_request(KVM_REQ_UNHALT, vcpu);
+ }
+}
+
+/**
* kvm_riscv_vcpu_unpriv_read -- Read machine word from Guest memory
*
* @vcpu: The VCPU pointer
diff --git a/arch/riscv/kvm/vcpu_sbi.c b/arch/riscv/kvm/vcpu_sbi.c
index 78aa3db76225..a09ecb97b890 100644
--- a/arch/riscv/kvm/vcpu_sbi.c
+++ b/arch/riscv/kvm/vcpu_sbi.c
@@ -45,6 +45,7 @@ extern const struct kvm_vcpu_sbi_extension vcpu_sbi_ext_base;
extern const struct kvm_vcpu_sbi_extension vcpu_sbi_ext_time;
extern const struct kvm_vcpu_sbi_extension vcpu_sbi_ext_ipi;
extern const struct kvm_vcpu_sbi_extension vcpu_sbi_ext_rfence;
+extern const struct kvm_vcpu_sbi_extension vcpu_sbi_ext_srst;
extern const struct kvm_vcpu_sbi_extension vcpu_sbi_ext_hsm;
extern const struct kvm_vcpu_sbi_extension vcpu_sbi_ext_experimental;
extern const struct kvm_vcpu_sbi_extension vcpu_sbi_ext_vendor;
@@ -55,6 +56,7 @@ static const struct kvm_vcpu_sbi_extension *sbi_ext[] = {
&vcpu_sbi_ext_time,
&vcpu_sbi_ext_ipi,
&vcpu_sbi_ext_rfence,
+ &vcpu_sbi_ext_srst,
&vcpu_sbi_ext_hsm,
&vcpu_sbi_ext_experimental,
&vcpu_sbi_ext_vendor,
@@ -79,6 +81,23 @@ void kvm_riscv_vcpu_sbi_forward(struct kvm_vcpu *vcpu, struct kvm_run *run)
run->riscv_sbi.ret[1] = cp->a1;
}
+void kvm_riscv_vcpu_sbi_system_reset(struct kvm_vcpu *vcpu,
+ struct kvm_run *run,
+ u32 type, u64 flags)
+{
+ unsigned long i;
+ struct kvm_vcpu *tmp;
+
+ kvm_for_each_vcpu(i, tmp, vcpu->kvm)
+ tmp->arch.power_off = true;
+ kvm_make_all_cpus_request(vcpu->kvm, KVM_REQ_SLEEP);
+
+ memset(&run->system_event, 0, sizeof(run->system_event));
+ run->system_event.type = type;
+ run->system_event.flags = flags;
+ run->exit_reason = KVM_EXIT_SYSTEM_EVENT;
+}
+
int kvm_riscv_vcpu_sbi_return(struct kvm_vcpu *vcpu, struct kvm_run *run)
{
struct kvm_cpu_context *cp = &vcpu->arch.guest_context;
diff --git a/arch/riscv/kvm/vcpu_sbi_hsm.c b/arch/riscv/kvm/vcpu_sbi_hsm.c
index 2e383687fa48..239dec0a628a 100644
--- a/arch/riscv/kvm/vcpu_sbi_hsm.c
+++ b/arch/riscv/kvm/vcpu_sbi_hsm.c
@@ -60,9 +60,11 @@ static int kvm_sbi_hsm_vcpu_get_status(struct kvm_vcpu *vcpu)
if (!target_vcpu)
return -EINVAL;
if (!target_vcpu->arch.power_off)
- return SBI_HSM_HART_STATUS_STARTED;
+ return SBI_HSM_STATE_STARTED;
+ else if (vcpu->stat.generic.blocking)
+ return SBI_HSM_STATE_SUSPENDED;
else
- return SBI_HSM_HART_STATUS_STOPPED;
+ return SBI_HSM_STATE_STOPPED;
}
static int kvm_sbi_ext_hsm_handler(struct kvm_vcpu *vcpu, struct kvm_run *run,
@@ -91,6 +93,18 @@ static int kvm_sbi_ext_hsm_handler(struct kvm_vcpu *vcpu, struct kvm_run *run,
ret = 0;
}
break;
+ case SBI_EXT_HSM_HART_SUSPEND:
+ switch (cp->a0) {
+ case SBI_HSM_SUSPEND_RET_DEFAULT:
+ kvm_riscv_vcpu_wfi(vcpu);
+ break;
+ case SBI_HSM_SUSPEND_NON_RET_DEFAULT:
+ ret = -EOPNOTSUPP;
+ break;
+ default:
+ ret = -EINVAL;
+ }
+ break;
default:
ret = -EOPNOTSUPP;
}
diff --git a/arch/riscv/kvm/vcpu_sbi_replace.c b/arch/riscv/kvm/vcpu_sbi_replace.c
index 1bc0608a5bfd..0f217365c287 100644
--- a/arch/riscv/kvm/vcpu_sbi_replace.c
+++ b/arch/riscv/kvm/vcpu_sbi_replace.c
@@ -130,3 +130,47 @@ const struct kvm_vcpu_sbi_extension vcpu_sbi_ext_rfence = {
.extid_end = SBI_EXT_RFENCE,
.handler = kvm_sbi_ext_rfence_handler,
};
+
+static int kvm_sbi_ext_srst_handler(struct kvm_vcpu *vcpu,
+ struct kvm_run *run,
+ unsigned long *out_val,
+ struct kvm_cpu_trap *utrap, bool *exit)
+{
+ struct kvm_cpu_context *cp = &vcpu->arch.guest_context;
+ unsigned long funcid = cp->a6;
+ u32 reason = cp->a1;
+ u32 type = cp->a0;
+ int ret = 0;
+
+ switch (funcid) {
+ case SBI_EXT_SRST_RESET:
+ switch (type) {
+ case SBI_SRST_RESET_TYPE_SHUTDOWN:
+ kvm_riscv_vcpu_sbi_system_reset(vcpu, run,
+ KVM_SYSTEM_EVENT_SHUTDOWN,
+ reason);
+ *exit = true;
+ break;
+ case SBI_SRST_RESET_TYPE_COLD_REBOOT:
+ case SBI_SRST_RESET_TYPE_WARM_REBOOT:
+ kvm_riscv_vcpu_sbi_system_reset(vcpu, run,
+ KVM_SYSTEM_EVENT_RESET,
+ reason);
+ *exit = true;
+ break;
+ default:
+ ret = -EOPNOTSUPP;
+ }
+ break;
+ default:
+ ret = -EOPNOTSUPP;
+ }
+
+ return ret;
+}
+
+const struct kvm_vcpu_sbi_extension vcpu_sbi_ext_srst = {
+ .extid_start = SBI_EXT_SRST,
+ .extid_end = SBI_EXT_SRST,
+ .handler = kvm_sbi_ext_srst_handler,
+};
diff --git a/arch/riscv/kvm/vcpu_sbi_v01.c b/arch/riscv/kvm/vcpu_sbi_v01.c
index 07e2de14433a..da4d6c99c2cf 100644
--- a/arch/riscv/kvm/vcpu_sbi_v01.c
+++ b/arch/riscv/kvm/vcpu_sbi_v01.c
@@ -14,21 +14,6 @@
#include <asm/kvm_vcpu_timer.h>
#include <asm/kvm_vcpu_sbi.h>
-static void kvm_sbi_system_shutdown(struct kvm_vcpu *vcpu,
- struct kvm_run *run, u32 type)
-{
- unsigned long i;
- struct kvm_vcpu *tmp;
-
- kvm_for_each_vcpu(i, tmp, vcpu->kvm)
- tmp->arch.power_off = true;
- kvm_make_all_cpus_request(vcpu->kvm, KVM_REQ_SLEEP);
-
- memset(&run->system_event, 0, sizeof(run->system_event));
- run->system_event.type = type;
- run->exit_reason = KVM_EXIT_SYSTEM_EVENT;
-}
-
static int kvm_sbi_ext_v01_handler(struct kvm_vcpu *vcpu, struct kvm_run *run,
unsigned long *out_val,
struct kvm_cpu_trap *utrap,
@@ -80,7 +65,8 @@ static int kvm_sbi_ext_v01_handler(struct kvm_vcpu *vcpu, struct kvm_run *run,
}
break;
case SBI_EXT_0_1_SHUTDOWN:
- kvm_sbi_system_shutdown(vcpu, run, KVM_SYSTEM_EVENT_SHUTDOWN);
+ kvm_riscv_vcpu_sbi_system_reset(vcpu, run,
+ KVM_SYSTEM_EVENT_SHUTDOWN, 0);
*exit = true;
break;
case SBI_EXT_0_1_REMOTE_FENCE_I:
@@ -111,7 +97,7 @@ static int kvm_sbi_ext_v01_handler(struct kvm_vcpu *vcpu, struct kvm_run *run,
default:
ret = -EINVAL;
break;
- };
+ }
return ret;
}
diff --git a/arch/riscv/kvm/vcpu_switch.S b/arch/riscv/kvm/vcpu_switch.S
index 029a28a195c6..d74df8eb4d71 100644
--- a/arch/riscv/kvm/vcpu_switch.S
+++ b/arch/riscv/kvm/vcpu_switch.S
@@ -41,33 +41,37 @@ ENTRY(__kvm_riscv_switch_to)
REG_S s10, (KVM_ARCH_HOST_S10)(a0)
REG_S s11, (KVM_ARCH_HOST_S11)(a0)
- /* Save Host and Restore Guest SSTATUS */
+ /* Load Guest CSR values */
REG_L t0, (KVM_ARCH_GUEST_SSTATUS)(a0)
+ REG_L t1, (KVM_ARCH_GUEST_HSTATUS)(a0)
+ REG_L t2, (KVM_ARCH_GUEST_SCOUNTEREN)(a0)
+ la t4, __kvm_switch_return
+ REG_L t5, (KVM_ARCH_GUEST_SEPC)(a0)
+
+ /* Save Host and Restore Guest SSTATUS */
csrrw t0, CSR_SSTATUS, t0
- REG_S t0, (KVM_ARCH_HOST_SSTATUS)(a0)
/* Save Host and Restore Guest HSTATUS */
- REG_L t1, (KVM_ARCH_GUEST_HSTATUS)(a0)
csrrw t1, CSR_HSTATUS, t1
- REG_S t1, (KVM_ARCH_HOST_HSTATUS)(a0)
/* Save Host and Restore Guest SCOUNTEREN */
- REG_L t2, (KVM_ARCH_GUEST_SCOUNTEREN)(a0)
csrrw t2, CSR_SCOUNTEREN, t2
- REG_S t2, (KVM_ARCH_HOST_SCOUNTEREN)(a0)
-
- /* Save Host SSCRATCH and change it to struct kvm_vcpu_arch pointer */
- csrrw t3, CSR_SSCRATCH, a0
- REG_S t3, (KVM_ARCH_HOST_SSCRATCH)(a0)
/* Save Host STVEC and change it to return path */
- la t4, __kvm_switch_return
csrrw t4, CSR_STVEC, t4
- REG_S t4, (KVM_ARCH_HOST_STVEC)(a0)
+
+ /* Save Host SSCRATCH and change it to struct kvm_vcpu_arch pointer */
+ csrrw t3, CSR_SSCRATCH, a0
/* Restore Guest SEPC */
- REG_L t0, (KVM_ARCH_GUEST_SEPC)(a0)
- csrw CSR_SEPC, t0
+ csrw CSR_SEPC, t5
+
+ /* Store Host CSR values */
+ REG_S t0, (KVM_ARCH_HOST_SSTATUS)(a0)
+ REG_S t1, (KVM_ARCH_HOST_HSTATUS)(a0)
+ REG_S t2, (KVM_ARCH_HOST_SCOUNTEREN)(a0)
+ REG_S t3, (KVM_ARCH_HOST_SSCRATCH)(a0)
+ REG_S t4, (KVM_ARCH_HOST_STVEC)(a0)
/* Restore Guest GPRs (except A0) */
REG_L ra, (KVM_ARCH_GUEST_RA)(a0)
@@ -145,32 +149,36 @@ __kvm_switch_return:
REG_S t5, (KVM_ARCH_GUEST_T5)(a0)
REG_S t6, (KVM_ARCH_GUEST_T6)(a0)
+ /* Load Host CSR values */
+ REG_L t1, (KVM_ARCH_HOST_STVEC)(a0)
+ REG_L t2, (KVM_ARCH_HOST_SSCRATCH)(a0)
+ REG_L t3, (KVM_ARCH_HOST_SCOUNTEREN)(a0)
+ REG_L t4, (KVM_ARCH_HOST_HSTATUS)(a0)
+ REG_L t5, (KVM_ARCH_HOST_SSTATUS)(a0)
+
/* Save Guest SEPC */
csrr t0, CSR_SEPC
- REG_S t0, (KVM_ARCH_GUEST_SEPC)(a0)
-
- /* Restore Host STVEC */
- REG_L t1, (KVM_ARCH_HOST_STVEC)(a0)
- csrw CSR_STVEC, t1
/* Save Guest A0 and Restore Host SSCRATCH */
- REG_L t2, (KVM_ARCH_HOST_SSCRATCH)(a0)
csrrw t2, CSR_SSCRATCH, t2
- REG_S t2, (KVM_ARCH_GUEST_A0)(a0)
+
+ /* Restore Host STVEC */
+ csrw CSR_STVEC, t1
/* Save Guest and Restore Host SCOUNTEREN */
- REG_L t3, (KVM_ARCH_HOST_SCOUNTEREN)(a0)
csrrw t3, CSR_SCOUNTEREN, t3
- REG_S t3, (KVM_ARCH_GUEST_SCOUNTEREN)(a0)
/* Save Guest and Restore Host HSTATUS */
- REG_L t4, (KVM_ARCH_HOST_HSTATUS)(a0)
csrrw t4, CSR_HSTATUS, t4
- REG_S t4, (KVM_ARCH_GUEST_HSTATUS)(a0)
/* Save Guest and Restore Host SSTATUS */
- REG_L t5, (KVM_ARCH_HOST_SSTATUS)(a0)
csrrw t5, CSR_SSTATUS, t5
+
+ /* Store Guest CSR values */
+ REG_S t0, (KVM_ARCH_GUEST_SEPC)(a0)
+ REG_S t2, (KVM_ARCH_GUEST_A0)(a0)
+ REG_S t3, (KVM_ARCH_GUEST_SCOUNTEREN)(a0)
+ REG_S t4, (KVM_ARCH_GUEST_HSTATUS)(a0)
REG_S t5, (KVM_ARCH_GUEST_SSTATUS)(a0)
/* Restore Host GPRs (except A0 and T0-T6) */
diff --git a/arch/riscv/lib/memmove.S b/arch/riscv/lib/memmove.S
index 07d1d2152ba5..e0609e1f0864 100644
--- a/arch/riscv/lib/memmove.S
+++ b/arch/riscv/lib/memmove.S
@@ -1,64 +1,316 @@
-/* SPDX-License-Identifier: GPL-2.0 */
+/* SPDX-License-Identifier: GPL-2.0-only */
+/*
+ * Copyright (C) 2022 Michael T. Kloos <michael@michaelkloos.com>
+ */
#include <linux/linkage.h>
#include <asm/asm.h>
-ENTRY(__memmove)
-WEAK(memmove)
- move t0, a0
- move t1, a1
-
- beq a0, a1, exit_memcpy
- beqz a2, exit_memcpy
- srli t2, a2, 0x2
-
- slt t3, a0, a1
- beqz t3, do_reverse
-
- andi a2, a2, 0x3
- li t4, 1
- beqz t2, byte_copy
-
-word_copy:
- lw t3, 0(a1)
- addi t2, t2, -1
- addi a1, a1, 4
- sw t3, 0(a0)
- addi a0, a0, 4
- bnez t2, word_copy
- beqz a2, exit_memcpy
- j byte_copy
-
-do_reverse:
- add a0, a0, a2
- add a1, a1, a2
- andi a2, a2, 0x3
- li t4, -1
- beqz t2, reverse_byte_copy
-
-reverse_word_copy:
- addi a1, a1, -4
- addi t2, t2, -1
- lw t3, 0(a1)
- addi a0, a0, -4
- sw t3, 0(a0)
- bnez t2, reverse_word_copy
- beqz a2, exit_memcpy
-
-reverse_byte_copy:
- addi a0, a0, -1
- addi a1, a1, -1
+SYM_FUNC_START(__memmove)
+SYM_FUNC_START_WEAK(memmove)
+ /*
+ * Returns
+ * a0 - dest
+ *
+ * Parameters
+ * a0 - Inclusive first byte of dest
+ * a1 - Inclusive first byte of src
+ * a2 - Length of copy n
+ *
+ * Because the return matches the parameter register a0,
+ * we will not clobber or modify that register.
+ *
+ * Note: This currently only works on little-endian.
+ * To port to big-endian, reverse the direction of shifts
+ * in the 2 misaligned fixup copy loops.
+ */
+ /* Return if nothing to do */
+ beq a0, a1, return_from_memmove
+ beqz a2, return_from_memmove
+
+ /*
+ * Register Uses
+ * Forward Copy: a1 - Index counter of src
+ * Reverse Copy: a4 - Index counter of src
+ * Forward Copy: t3 - Index counter of dest
+ * Reverse Copy: t4 - Index counter of dest
+ * Both Copy Modes: t5 - Inclusive first multibyte/aligned of dest
+ * Both Copy Modes: t6 - Non-Inclusive last multibyte/aligned of dest
+ * Both Copy Modes: t0 - Link / Temporary for load-store
+ * Both Copy Modes: t1 - Temporary for load-store
+ * Both Copy Modes: t2 - Temporary for load-store
+ * Both Copy Modes: a5 - dest to src alignment offset
+ * Both Copy Modes: a6 - Shift ammount
+ * Both Copy Modes: a7 - Inverse Shift ammount
+ * Both Copy Modes: a2 - Alternate breakpoint for unrolled loops
+ */
+
+ /*
+ * Solve for some register values now.
+ * Byte copy does not need t5 or t6.
+ */
+ mv t3, a0
+ add t4, a0, a2
+ add a4, a1, a2
+
+ /*
+ * Byte copy if copying less than (2 * SZREG) bytes. This can
+ * cause problems with the bulk copy implementation and is
+ * small enough not to bother.
+ */
+ andi t0, a2, -(2 * SZREG)
+ beqz t0, byte_copy
+
+ /*
+ * Now solve for t5 and t6.
+ */
+ andi t5, t3, -SZREG
+ andi t6, t4, -SZREG
+ /*
+ * If dest(Register t3) rounded down to the nearest naturally
+ * aligned SZREG address, does not equal dest, then add SZREG
+ * to find the low-bound of SZREG alignment in the dest memory
+ * region. Note that this could overshoot the dest memory
+ * region if n is less than SZREG. This is one reason why
+ * we always byte copy if n is less than SZREG.
+ * Otherwise, dest is already naturally aligned to SZREG.
+ */
+ beq t5, t3, 1f
+ addi t5, t5, SZREG
+ 1:
+
+ /*
+ * If the dest and src are co-aligned to SZREG, then there is
+ * no need for the full rigmarole of a full misaligned fixup copy.
+ * Instead, do a simpler co-aligned copy.
+ */
+ xor t0, a0, a1
+ andi t1, t0, (SZREG - 1)
+ beqz t1, coaligned_copy
+ /* Fall through to misaligned fixup copy */
+
+misaligned_fixup_copy:
+ bltu a1, a0, misaligned_fixup_copy_reverse
+
+misaligned_fixup_copy_forward:
+ jal t0, byte_copy_until_aligned_forward
+
+ andi a5, a1, (SZREG - 1) /* Find the alignment offset of src (a1) */
+ slli a6, a5, 3 /* Multiply by 8 to convert that to bits to shift */
+ sub a5, a1, t3 /* Find the difference between src and dest */
+ andi a1, a1, -SZREG /* Align the src pointer */
+ addi a2, t6, SZREG /* The other breakpoint for the unrolled loop*/
+
+ /*
+ * Compute The Inverse Shift
+ * a7 = XLEN - a6 = XLEN + -a6
+ * 2s complement negation to find the negative: -a6 = ~a6 + 1
+ * Add that to XLEN. XLEN = SZREG * 8.
+ */
+ not a7, a6
+ addi a7, a7, (SZREG * 8 + 1)
+
+ /*
+ * Fix Misalignment Copy Loop - Forward
+ * load_val0 = load_ptr[0];
+ * do {
+ * load_val1 = load_ptr[1];
+ * store_ptr += 2;
+ * store_ptr[0 - 2] = (load_val0 >> {a6}) | (load_val1 << {a7});
+ *
+ * if (store_ptr == {a2})
+ * break;
+ *
+ * load_val0 = load_ptr[2];
+ * load_ptr += 2;
+ * store_ptr[1 - 2] = (load_val1 >> {a6}) | (load_val0 << {a7});
+ *
+ * } while (store_ptr != store_ptr_end);
+ * store_ptr = store_ptr_end;
+ */
+
+ REG_L t0, (0 * SZREG)(a1)
+ 1:
+ REG_L t1, (1 * SZREG)(a1)
+ addi t3, t3, (2 * SZREG)
+ srl t0, t0, a6
+ sll t2, t1, a7
+ or t2, t0, t2
+ REG_S t2, ((0 * SZREG) - (2 * SZREG))(t3)
+
+ beq t3, a2, 2f
+
+ REG_L t0, (2 * SZREG)(a1)
+ addi a1, a1, (2 * SZREG)
+ srl t1, t1, a6
+ sll t2, t0, a7
+ or t2, t1, t2
+ REG_S t2, ((1 * SZREG) - (2 * SZREG))(t3)
+
+ bne t3, t6, 1b
+ 2:
+ mv t3, t6 /* Fix the dest pointer in case the loop was broken */
+
+ add a1, t3, a5 /* Restore the src pointer */
+ j byte_copy_forward /* Copy any remaining bytes */
+
+misaligned_fixup_copy_reverse:
+ jal t0, byte_copy_until_aligned_reverse
+
+ andi a5, a4, (SZREG - 1) /* Find the alignment offset of src (a4) */
+ slli a6, a5, 3 /* Multiply by 8 to convert that to bits to shift */
+ sub a5, a4, t4 /* Find the difference between src and dest */
+ andi a4, a4, -SZREG /* Align the src pointer */
+ addi a2, t5, -SZREG /* The other breakpoint for the unrolled loop*/
+
+ /*
+ * Compute The Inverse Shift
+ * a7 = XLEN - a6 = XLEN + -a6
+ * 2s complement negation to find the negative: -a6 = ~a6 + 1
+ * Add that to XLEN. XLEN = SZREG * 8.
+ */
+ not a7, a6
+ addi a7, a7, (SZREG * 8 + 1)
+
+ /*
+ * Fix Misalignment Copy Loop - Reverse
+ * load_val1 = load_ptr[0];
+ * do {
+ * load_val0 = load_ptr[-1];
+ * store_ptr -= 2;
+ * store_ptr[1] = (load_val0 >> {a6}) | (load_val1 << {a7});
+ *
+ * if (store_ptr == {a2})
+ * break;
+ *
+ * load_val1 = load_ptr[-2];
+ * load_ptr -= 2;
+ * store_ptr[0] = (load_val1 >> {a6}) | (load_val0 << {a7});
+ *
+ * } while (store_ptr != store_ptr_end);
+ * store_ptr = store_ptr_end;
+ */
+
+ REG_L t1, ( 0 * SZREG)(a4)
+ 1:
+ REG_L t0, (-1 * SZREG)(a4)
+ addi t4, t4, (-2 * SZREG)
+ sll t1, t1, a7
+ srl t2, t0, a6
+ or t2, t1, t2
+ REG_S t2, ( 1 * SZREG)(t4)
+
+ beq t4, a2, 2f
+
+ REG_L t1, (-2 * SZREG)(a4)
+ addi a4, a4, (-2 * SZREG)
+ sll t0, t0, a7
+ srl t2, t1, a6
+ or t2, t0, t2
+ REG_S t2, ( 0 * SZREG)(t4)
+
+ bne t4, t5, 1b
+ 2:
+ mv t4, t5 /* Fix the dest pointer in case the loop was broken */
+
+ add a4, t4, a5 /* Restore the src pointer */
+ j byte_copy_reverse /* Copy any remaining bytes */
+
+/*
+ * Simple copy loops for SZREG co-aligned memory locations.
+ * These also make calls to do byte copies for any unaligned
+ * data at their terminations.
+ */
+coaligned_copy:
+ bltu a1, a0, coaligned_copy_reverse
+
+coaligned_copy_forward:
+ jal t0, byte_copy_until_aligned_forward
+
+ 1:
+ REG_L t1, ( 0 * SZREG)(a1)
+ addi a1, a1, SZREG
+ addi t3, t3, SZREG
+ REG_S t1, (-1 * SZREG)(t3)
+ bne t3, t6, 1b
+
+ j byte_copy_forward /* Copy any remaining bytes */
+
+coaligned_copy_reverse:
+ jal t0, byte_copy_until_aligned_reverse
+
+ 1:
+ REG_L t1, (-1 * SZREG)(a4)
+ addi a4, a4, -SZREG
+ addi t4, t4, -SZREG
+ REG_S t1, ( 0 * SZREG)(t4)
+ bne t4, t5, 1b
+
+ j byte_copy_reverse /* Copy any remaining bytes */
+
+/*
+ * These are basically sub-functions within the function. They
+ * are used to byte copy until the dest pointer is in alignment.
+ * At which point, a bulk copy method can be used by the
+ * calling code. These work on the same registers as the bulk
+ * copy loops. Therefore, the register values can be picked
+ * up from where they were left and we avoid code duplication
+ * without any overhead except the call in and return jumps.
+ */
+byte_copy_until_aligned_forward:
+ beq t3, t5, 2f
+ 1:
+ lb t1, 0(a1)
+ addi a1, a1, 1
+ addi t3, t3, 1
+ sb t1, -1(t3)
+ bne t3, t5, 1b
+ 2:
+ jalr zero, 0x0(t0) /* Return to multibyte copy loop */
+
+byte_copy_until_aligned_reverse:
+ beq t4, t6, 2f
+ 1:
+ lb t1, -1(a4)
+ addi a4, a4, -1
+ addi t4, t4, -1
+ sb t1, 0(t4)
+ bne t4, t6, 1b
+ 2:
+ jalr zero, 0x0(t0) /* Return to multibyte copy loop */
+
+/*
+ * Simple byte copy loops.
+ * These will byte copy until they reach the end of data to copy.
+ * At that point, they will call to return from memmove.
+ */
byte_copy:
- lb t3, 0(a1)
- addi a2, a2, -1
- sb t3, 0(a0)
- add a1, a1, t4
- add a0, a0, t4
- bnez a2, byte_copy
-
-exit_memcpy:
- move a0, t0
- move a1, t1
- ret
-END(__memmove)
+ bltu a1, a0, byte_copy_reverse
+
+byte_copy_forward:
+ beq t3, t4, 2f
+ 1:
+ lb t1, 0(a1)
+ addi a1, a1, 1
+ addi t3, t3, 1
+ sb t1, -1(t3)
+ bne t3, t4, 1b
+ 2:
+ ret
+
+byte_copy_reverse:
+ beq t4, t3, 2f
+ 1:
+ lb t1, -1(a4)
+ addi a4, a4, -1
+ addi t4, t4, -1
+ sb t1, 0(t4)
+ bne t4, t3, 1b
+ 2:
+
+return_from_memmove:
+ ret
+
+SYM_FUNC_END(memmove)
+SYM_FUNC_END(__memmove)
diff --git a/arch/riscv/mm/Makefile b/arch/riscv/mm/Makefile
index 7ebaef10ea1b..ac7a25298a04 100644
--- a/arch/riscv/mm/Makefile
+++ b/arch/riscv/mm/Makefile
@@ -24,6 +24,9 @@ obj-$(CONFIG_KASAN) += kasan_init.o
ifdef CONFIG_KASAN
KASAN_SANITIZE_kasan_init.o := n
KASAN_SANITIZE_init.o := n
+ifdef CONFIG_DEBUG_VIRTUAL
+KASAN_SANITIZE_physaddr.o := n
+endif
endif
obj-$(CONFIG_DEBUG_VIRTUAL) += physaddr.o
diff --git a/arch/riscv/mm/init.c b/arch/riscv/mm/init.c
index c27294128e18..9535bea8688c 100644
--- a/arch/riscv/mm/init.c
+++ b/arch/riscv/mm/init.c
@@ -38,14 +38,16 @@ EXPORT_SYMBOL(kernel_map);
#endif
#ifdef CONFIG_64BIT
-u64 satp_mode = !IS_ENABLED(CONFIG_XIP_KERNEL) ? SATP_MODE_48 : SATP_MODE_39;
+u64 satp_mode __ro_after_init = !IS_ENABLED(CONFIG_XIP_KERNEL) ? SATP_MODE_57 : SATP_MODE_39;
#else
-u64 satp_mode = SATP_MODE_32;
+u64 satp_mode __ro_after_init = SATP_MODE_32;
#endif
EXPORT_SYMBOL(satp_mode);
bool pgtable_l4_enabled = IS_ENABLED(CONFIG_64BIT) && !IS_ENABLED(CONFIG_XIP_KERNEL);
+bool pgtable_l5_enabled = IS_ENABLED(CONFIG_64BIT) && !IS_ENABLED(CONFIG_XIP_KERNEL);
EXPORT_SYMBOL(pgtable_l4_enabled);
+EXPORT_SYMBOL(pgtable_l5_enabled);
phys_addr_t phys_ram_base __ro_after_init;
EXPORT_SYMBOL(phys_ram_base);
@@ -125,7 +127,6 @@ void __init mem_init(void)
else
swiotlb_force = SWIOTLB_NO_FORCE;
#endif
- high_memory = (void *)(__va(PFN_PHYS(max_low_pfn)));
memblock_free_all();
print_vm_layout();
@@ -195,6 +196,7 @@ static void __init setup_bootmem(void)
min_low_pfn = PFN_UP(phys_ram_base);
max_low_pfn = max_pfn = PFN_DOWN(phys_ram_end);
+ high_memory = (void *)(__va(PFN_PHYS(max_low_pfn)));
dma32_phys_limit = min(4UL * SZ_1G, (unsigned long)PFN_PHYS(max_low_pfn));
set_max_mapnr(max_low_pfn - ARCH_PFN_OFFSET);
@@ -227,6 +229,7 @@ pgd_t trampoline_pg_dir[PTRS_PER_PGD] __page_aligned_bss;
static pte_t fixmap_pte[PTRS_PER_PTE] __page_aligned_bss;
pgd_t early_pg_dir[PTRS_PER_PGD] __initdata __aligned(PAGE_SIZE);
+static p4d_t __maybe_unused early_dtb_p4d[PTRS_PER_P4D] __initdata __aligned(PAGE_SIZE);
static pud_t __maybe_unused early_dtb_pud[PTRS_PER_PUD] __initdata __aligned(PAGE_SIZE);
static pmd_t __maybe_unused early_dtb_pmd[PTRS_PER_PMD] __initdata __aligned(PAGE_SIZE);
@@ -318,6 +321,16 @@ static pmd_t early_pmd[PTRS_PER_PMD] __initdata __aligned(PAGE_SIZE);
#define early_pmd ((pmd_t *)XIP_FIXUP(early_pmd))
#endif /* CONFIG_XIP_KERNEL */
+static p4d_t trampoline_p4d[PTRS_PER_P4D] __page_aligned_bss;
+static p4d_t fixmap_p4d[PTRS_PER_P4D] __page_aligned_bss;
+static p4d_t early_p4d[PTRS_PER_P4D] __initdata __aligned(PAGE_SIZE);
+
+#ifdef CONFIG_XIP_KERNEL
+#define trampoline_p4d ((p4d_t *)XIP_FIXUP(trampoline_p4d))
+#define fixmap_p4d ((p4d_t *)XIP_FIXUP(fixmap_p4d))
+#define early_p4d ((p4d_t *)XIP_FIXUP(early_p4d))
+#endif /* CONFIG_XIP_KERNEL */
+
static pud_t trampoline_pud[PTRS_PER_PUD] __page_aligned_bss;
static pud_t fixmap_pud[PTRS_PER_PUD] __page_aligned_bss;
static pud_t early_pud[PTRS_PER_PUD] __initdata __aligned(PAGE_SIZE);
@@ -432,6 +445,44 @@ static phys_addr_t alloc_pud_late(uintptr_t va)
return __pa(vaddr);
}
+static p4d_t *__init get_p4d_virt_early(phys_addr_t pa)
+{
+ return (p4d_t *)((uintptr_t)pa);
+}
+
+static p4d_t *__init get_p4d_virt_fixmap(phys_addr_t pa)
+{
+ clear_fixmap(FIX_P4D);
+ return (p4d_t *)set_fixmap_offset(FIX_P4D, pa);
+}
+
+static p4d_t *__init get_p4d_virt_late(phys_addr_t pa)
+{
+ return (p4d_t *)__va(pa);
+}
+
+static phys_addr_t __init alloc_p4d_early(uintptr_t va)
+{
+ /* Only one P4D is available for early mapping */
+ BUG_ON((va - kernel_map.virt_addr) >> PGDIR_SHIFT);
+
+ return (uintptr_t)early_p4d;
+}
+
+static phys_addr_t __init alloc_p4d_fixmap(uintptr_t va)
+{
+ return memblock_phys_alloc(PAGE_SIZE, PAGE_SIZE);
+}
+
+static phys_addr_t alloc_p4d_late(uintptr_t va)
+{
+ unsigned long vaddr;
+
+ vaddr = __get_free_page(GFP_KERNEL);
+ BUG_ON(!vaddr);
+ return __pa(vaddr);
+}
+
static void __init create_pud_mapping(pud_t *pudp,
uintptr_t va, phys_addr_t pa,
phys_addr_t sz, pgprot_t prot)
@@ -459,21 +510,55 @@ static void __init create_pud_mapping(pud_t *pudp,
create_pmd_mapping(nextp, va, pa, sz, prot);
}
-#define pgd_next_t pud_t
-#define alloc_pgd_next(__va) (pgtable_l4_enabled ? \
- pt_ops.alloc_pud(__va) : pt_ops.alloc_pmd(__va))
-#define get_pgd_next_virt(__pa) (pgtable_l4_enabled ? \
- pt_ops.get_pud_virt(__pa) : (pgd_next_t *)pt_ops.get_pmd_virt(__pa))
+static void __init create_p4d_mapping(p4d_t *p4dp,
+ uintptr_t va, phys_addr_t pa,
+ phys_addr_t sz, pgprot_t prot)
+{
+ pud_t *nextp;
+ phys_addr_t next_phys;
+ uintptr_t p4d_index = p4d_index(va);
+
+ if (sz == P4D_SIZE) {
+ if (p4d_val(p4dp[p4d_index]) == 0)
+ p4dp[p4d_index] = pfn_p4d(PFN_DOWN(pa), prot);
+ return;
+ }
+
+ if (p4d_val(p4dp[p4d_index]) == 0) {
+ next_phys = pt_ops.alloc_pud(va);
+ p4dp[p4d_index] = pfn_p4d(PFN_DOWN(next_phys), PAGE_TABLE);
+ nextp = pt_ops.get_pud_virt(next_phys);
+ memset(nextp, 0, PAGE_SIZE);
+ } else {
+ next_phys = PFN_PHYS(_p4d_pfn(p4dp[p4d_index]));
+ nextp = pt_ops.get_pud_virt(next_phys);
+ }
+
+ create_pud_mapping(nextp, va, pa, sz, prot);
+}
+
+#define pgd_next_t p4d_t
+#define alloc_pgd_next(__va) (pgtable_l5_enabled ? \
+ pt_ops.alloc_p4d(__va) : (pgtable_l4_enabled ? \
+ pt_ops.alloc_pud(__va) : pt_ops.alloc_pmd(__va)))
+#define get_pgd_next_virt(__pa) (pgtable_l5_enabled ? \
+ pt_ops.get_p4d_virt(__pa) : (pgd_next_t *)(pgtable_l4_enabled ? \
+ pt_ops.get_pud_virt(__pa) : (pud_t *)pt_ops.get_pmd_virt(__pa)))
#define create_pgd_next_mapping(__nextp, __va, __pa, __sz, __prot) \
+ (pgtable_l5_enabled ? \
+ create_p4d_mapping(__nextp, __va, __pa, __sz, __prot) : \
(pgtable_l4_enabled ? \
- create_pud_mapping(__nextp, __va, __pa, __sz, __prot) : \
- create_pmd_mapping((pmd_t *)__nextp, __va, __pa, __sz, __prot))
-#define fixmap_pgd_next (pgtable_l4_enabled ? \
- (uintptr_t)fixmap_pud : (uintptr_t)fixmap_pmd)
-#define trampoline_pgd_next (pgtable_l4_enabled ? \
- (uintptr_t)trampoline_pud : (uintptr_t)trampoline_pmd)
-#define early_dtb_pgd_next (pgtable_l4_enabled ? \
- (uintptr_t)early_dtb_pud : (uintptr_t)early_dtb_pmd)
+ create_pud_mapping((pud_t *)__nextp, __va, __pa, __sz, __prot) : \
+ create_pmd_mapping((pmd_t *)__nextp, __va, __pa, __sz, __prot)))
+#define fixmap_pgd_next (pgtable_l5_enabled ? \
+ (uintptr_t)fixmap_p4d : (pgtable_l4_enabled ? \
+ (uintptr_t)fixmap_pud : (uintptr_t)fixmap_pmd))
+#define trampoline_pgd_next (pgtable_l5_enabled ? \
+ (uintptr_t)trampoline_p4d : (pgtable_l4_enabled ? \
+ (uintptr_t)trampoline_pud : (uintptr_t)trampoline_pmd))
+#define early_dtb_pgd_next (pgtable_l5_enabled ? \
+ (uintptr_t)early_dtb_p4d : (pgtable_l4_enabled ? \
+ (uintptr_t)early_dtb_pud : (uintptr_t)early_dtb_pmd))
#else
#define pgd_next_t pte_t
#define alloc_pgd_next(__va) pt_ops.alloc_pte(__va)
@@ -482,6 +567,7 @@ static void __init create_pud_mapping(pud_t *pudp,
create_pte_mapping(__nextp, __va, __pa, __sz, __prot)
#define fixmap_pgd_next ((uintptr_t)fixmap_pte)
#define early_dtb_pgd_next ((uintptr_t)early_dtb_pmd)
+#define create_p4d_mapping(__pmdp, __va, __pa, __sz, __prot)
#define create_pud_mapping(__pmdp, __va, __pa, __sz, __prot)
#define create_pmd_mapping(__pmdp, __va, __pa, __sz, __prot)
#endif /* __PAGETABLE_PMD_FOLDED */
@@ -575,6 +661,13 @@ static __init pgprot_t pgprot_from_va(uintptr_t va)
#endif /* CONFIG_STRICT_KERNEL_RWX */
#ifdef CONFIG_64BIT
+static void __init disable_pgtable_l5(void)
+{
+ pgtable_l5_enabled = false;
+ kernel_map.page_offset = PAGE_OFFSET_L4;
+ satp_mode = SATP_MODE_48;
+}
+
static void __init disable_pgtable_l4(void)
{
pgtable_l4_enabled = false;
@@ -591,12 +684,12 @@ static void __init disable_pgtable_l4(void)
static __init void set_satp_mode(void)
{
u64 identity_satp, hw_satp;
- uintptr_t set_satp_mode_pmd;
+ uintptr_t set_satp_mode_pmd = ((unsigned long)set_satp_mode) & PMD_MASK;
+ bool check_l4 = false;
- set_satp_mode_pmd = ((unsigned long)set_satp_mode) & PMD_MASK;
- create_pgd_mapping(early_pg_dir,
- set_satp_mode_pmd, (uintptr_t)early_pud,
- PGDIR_SIZE, PAGE_TABLE);
+ create_p4d_mapping(early_p4d,
+ set_satp_mode_pmd, (uintptr_t)early_pud,
+ P4D_SIZE, PAGE_TABLE);
create_pud_mapping(early_pud,
set_satp_mode_pmd, (uintptr_t)early_pmd,
PUD_SIZE, PAGE_TABLE);
@@ -608,6 +701,11 @@ static __init void set_satp_mode(void)
set_satp_mode_pmd + PMD_SIZE,
set_satp_mode_pmd + PMD_SIZE,
PMD_SIZE, PAGE_KERNEL_EXEC);
+retry:
+ create_pgd_mapping(early_pg_dir,
+ set_satp_mode_pmd,
+ check_l4 ? (uintptr_t)early_pud : (uintptr_t)early_p4d,
+ PGDIR_SIZE, PAGE_TABLE);
identity_satp = PFN_DOWN((uintptr_t)&early_pg_dir) | satp_mode;
@@ -616,10 +714,17 @@ static __init void set_satp_mode(void)
hw_satp = csr_swap(CSR_SATP, 0ULL);
local_flush_tlb_all();
- if (hw_satp != identity_satp)
+ if (hw_satp != identity_satp) {
+ if (!check_l4) {
+ disable_pgtable_l5();
+ check_l4 = true;
+ goto retry;
+ }
disable_pgtable_l4();
+ }
memset(early_pg_dir, 0, PAGE_SIZE);
+ memset(early_p4d, 0, PAGE_SIZE);
memset(early_pud, 0, PAGE_SIZE);
memset(early_pmd, 0, PAGE_SIZE);
}
@@ -693,10 +798,13 @@ static void __init create_fdt_early_page_table(pgd_t *pgdir, uintptr_t dtb_pa)
PGDIR_SIZE,
IS_ENABLED(CONFIG_64BIT) ? PAGE_TABLE : PAGE_KERNEL);
- if (pgtable_l4_enabled) {
+ if (pgtable_l5_enabled)
+ create_p4d_mapping(early_dtb_p4d, DTB_EARLY_BASE_VA,
+ (uintptr_t)early_dtb_pud, P4D_SIZE, PAGE_TABLE);
+
+ if (pgtable_l4_enabled)
create_pud_mapping(early_dtb_pud, DTB_EARLY_BASE_VA,
(uintptr_t)early_dtb_pmd, PUD_SIZE, PAGE_TABLE);
- }
if (IS_ENABLED(CONFIG_64BIT)) {
create_pmd_mapping(early_dtb_pmd, DTB_EARLY_BASE_VA,
@@ -732,6 +840,8 @@ void __init pt_ops_set_early(void)
pt_ops.get_pmd_virt = get_pmd_virt_early;
pt_ops.alloc_pud = alloc_pud_early;
pt_ops.get_pud_virt = get_pud_virt_early;
+ pt_ops.alloc_p4d = alloc_p4d_early;
+ pt_ops.get_p4d_virt = get_p4d_virt_early;
#endif
}
@@ -752,6 +862,8 @@ void __init pt_ops_set_fixmap(void)
pt_ops.get_pmd_virt = kernel_mapping_pa_to_va((uintptr_t)get_pmd_virt_fixmap);
pt_ops.alloc_pud = kernel_mapping_pa_to_va((uintptr_t)alloc_pud_fixmap);
pt_ops.get_pud_virt = kernel_mapping_pa_to_va((uintptr_t)get_pud_virt_fixmap);
+ pt_ops.alloc_p4d = kernel_mapping_pa_to_va((uintptr_t)alloc_p4d_fixmap);
+ pt_ops.get_p4d_virt = kernel_mapping_pa_to_va((uintptr_t)get_p4d_virt_fixmap);
#endif
}
@@ -768,6 +880,8 @@ void __init pt_ops_set_late(void)
pt_ops.get_pmd_virt = get_pmd_virt_late;
pt_ops.alloc_pud = alloc_pud_late;
pt_ops.get_pud_virt = get_pud_virt_late;
+ pt_ops.alloc_p4d = alloc_p4d_late;
+ pt_ops.get_p4d_virt = get_p4d_virt_late;
#endif
}
@@ -828,6 +942,10 @@ asmlinkage void __init setup_vm(uintptr_t dtb_pa)
fixmap_pgd_next, PGDIR_SIZE, PAGE_TABLE);
#ifndef __PAGETABLE_PMD_FOLDED
+ /* Setup fixmap P4D and PUD */
+ if (pgtable_l5_enabled)
+ create_p4d_mapping(fixmap_p4d, FIXADDR_START,
+ (uintptr_t)fixmap_pud, P4D_SIZE, PAGE_TABLE);
/* Setup fixmap PUD and PMD */
if (pgtable_l4_enabled)
create_pud_mapping(fixmap_pud, FIXADDR_START,
@@ -837,6 +955,9 @@ asmlinkage void __init setup_vm(uintptr_t dtb_pa)
/* Setup trampoline PGD and PMD */
create_pgd_mapping(trampoline_pg_dir, kernel_map.virt_addr,
trampoline_pgd_next, PGDIR_SIZE, PAGE_TABLE);
+ if (pgtable_l5_enabled)
+ create_p4d_mapping(trampoline_p4d, kernel_map.virt_addr,
+ (uintptr_t)trampoline_pud, P4D_SIZE, PAGE_TABLE);
if (pgtable_l4_enabled)
create_pud_mapping(trampoline_pud, kernel_map.virt_addr,
(uintptr_t)trampoline_pmd, PUD_SIZE, PAGE_TABLE);
@@ -938,6 +1059,7 @@ static void __init setup_vm_final(void)
clear_fixmap(FIX_PTE);
clear_fixmap(FIX_PMD);
clear_fixmap(FIX_PUD);
+ clear_fixmap(FIX_P4D);
/* Move to swapper page table */
csr_write(CSR_SATP, PFN_DOWN(__pa_symbol(swapper_pg_dir)) | satp_mode);
@@ -957,7 +1079,6 @@ static inline void setup_vm_final(void)
}
#endif /* CONFIG_MMU */
-#ifdef CONFIG_KEXEC_CORE
/*
* reserve_crashkernel() - reserves memory for crash kernel
*
@@ -974,6 +1095,8 @@ static void __init reserve_crashkernel(void)
int ret = 0;
+ if (!IS_ENABLED(CONFIG_KEXEC_CORE))
+ return;
/*
* Don't reserve a region for a crash kernel on a crash kernel
* since it doesn't make much sense and we have limited memory
@@ -1023,7 +1146,6 @@ static void __init reserve_crashkernel(void)
crashk_res.start = crash_base;
crashk_res.end = crash_base + crash_size - 1;
}
-#endif /* CONFIG_KEXEC_CORE */
void __init paging_init(void)
{
@@ -1037,9 +1159,7 @@ void __init misc_mem_init(void)
arch_numa_init();
sparse_init();
zone_sizes_init();
-#ifdef CONFIG_KEXEC_CORE
reserve_crashkernel();
-#endif
memblock_dump_all();
}
diff --git a/arch/riscv/mm/kasan_init.c b/arch/riscv/mm/kasan_init.c
index f61f7ca6fe0f..a22e418dbd82 100644
--- a/arch/riscv/mm/kasan_init.c
+++ b/arch/riscv/mm/kasan_init.c
@@ -111,10 +111,15 @@ static void __init kasan_populate_pud(pgd_t *pgd,
* pt_ops facility.
*/
base_pud = pt_ops.get_pud_virt(pfn_to_phys(_pgd_pfn(*pgd)));
+ } else if (pgd_none(*pgd)) {
+ base_pud = memblock_alloc(PTRS_PER_PUD * sizeof(pud_t), PAGE_SIZE);
} else {
base_pud = (pud_t *)pgd_page_vaddr(*pgd);
- if (base_pud == lm_alias(kasan_early_shadow_pud))
+ if (base_pud == lm_alias(kasan_early_shadow_pud)) {
base_pud = memblock_alloc(PTRS_PER_PUD * sizeof(pud_t), PAGE_SIZE);
+ memcpy(base_pud, (void *)kasan_early_shadow_pud,
+ sizeof(pud_t) * PTRS_PER_PUD);
+ }
}
pudp = base_pud + pud_index(vaddr);
@@ -149,13 +154,72 @@ static void __init kasan_populate_pud(pgd_t *pgd,
set_pgd(pgd, pfn_pgd(PFN_DOWN(__pa(base_pud)), PAGE_TABLE));
}
-#define kasan_early_shadow_pgd_next (pgtable_l4_enabled ? \
+static void __init kasan_populate_p4d(pgd_t *pgd,
+ unsigned long vaddr, unsigned long end,
+ bool early)
+{
+ phys_addr_t phys_addr;
+ p4d_t *p4dp, *base_p4d;
+ unsigned long next;
+
+ if (early) {
+ /*
+ * We can't use pgd_page_vaddr here as it would return a linear
+ * mapping address but it is not mapped yet, but when populating
+ * early_pg_dir, we need the physical address and when populating
+ * swapper_pg_dir, we need the kernel virtual address so use
+ * pt_ops facility.
+ */
+ base_p4d = pt_ops.get_p4d_virt(pfn_to_phys(_pgd_pfn(*pgd)));
+ } else {
+ base_p4d = (p4d_t *)pgd_page_vaddr(*pgd);
+ if (base_p4d == lm_alias(kasan_early_shadow_p4d))
+ base_p4d = memblock_alloc(PTRS_PER_PUD * sizeof(p4d_t), PAGE_SIZE);
+ }
+
+ p4dp = base_p4d + p4d_index(vaddr);
+
+ do {
+ next = p4d_addr_end(vaddr, end);
+
+ if (p4d_none(*p4dp) && IS_ALIGNED(vaddr, P4D_SIZE) && (next - vaddr) >= P4D_SIZE) {
+ if (early) {
+ phys_addr = __pa(((uintptr_t)kasan_early_shadow_pud));
+ set_p4d(p4dp, pfn_p4d(PFN_DOWN(phys_addr), PAGE_TABLE));
+ continue;
+ } else {
+ phys_addr = memblock_phys_alloc(P4D_SIZE, P4D_SIZE);
+ if (phys_addr) {
+ set_p4d(p4dp, pfn_p4d(PFN_DOWN(phys_addr), PAGE_KERNEL));
+ continue;
+ }
+ }
+ }
+
+ kasan_populate_pud((pgd_t *)p4dp, vaddr, next, early);
+ } while (p4dp++, vaddr = next, vaddr != end);
+
+ /*
+ * Wait for the whole P4D to be populated before setting the P4D in
+ * the page table, otherwise, if we did set the P4D before populating
+ * it entirely, memblock could allocate a page at a physical address
+ * where KASAN is not populated yet and then we'd get a page fault.
+ */
+ if (!early)
+ set_pgd(pgd, pfn_pgd(PFN_DOWN(__pa(base_p4d)), PAGE_TABLE));
+}
+
+#define kasan_early_shadow_pgd_next (pgtable_l5_enabled ? \
+ (uintptr_t)kasan_early_shadow_p4d : \
+ (pgtable_l4_enabled ? \
(uintptr_t)kasan_early_shadow_pud : \
- (uintptr_t)kasan_early_shadow_pmd)
+ (uintptr_t)kasan_early_shadow_pmd))
#define kasan_populate_pgd_next(pgdp, vaddr, next, early) \
+ (pgtable_l5_enabled ? \
+ kasan_populate_p4d(pgdp, vaddr, next, early) : \
(pgtable_l4_enabled ? \
kasan_populate_pud(pgdp, vaddr, next, early) : \
- kasan_populate_pmd((pud_t *)pgdp, vaddr, next))
+ kasan_populate_pmd((pud_t *)pgdp, vaddr, next)))
static void __init kasan_populate_pgd(pgd_t *pgdp,
unsigned long vaddr, unsigned long end,
@@ -202,8 +266,7 @@ asmlinkage void __init kasan_early_init(void)
for (i = 0; i < PTRS_PER_PTE; ++i)
set_pte(kasan_early_shadow_pte + i,
- mk_pte(virt_to_page(kasan_early_shadow_page),
- PAGE_KERNEL));
+ pfn_pte(virt_to_pfn(kasan_early_shadow_page), PAGE_KERNEL));
for (i = 0; i < PTRS_PER_PMD; ++i)
set_pmd(kasan_early_shadow_pmd + i,
@@ -219,6 +282,14 @@ asmlinkage void __init kasan_early_init(void)
PAGE_TABLE));
}
+ if (pgtable_l5_enabled) {
+ for (i = 0; i < PTRS_PER_P4D; ++i)
+ set_p4d(kasan_early_shadow_p4d + i,
+ pfn_p4d(PFN_DOWN
+ (__pa(((uintptr_t)kasan_early_shadow_pud))),
+ PAGE_TABLE));
+ }
+
kasan_populate_pgd(early_pg_dir + pgd_index(KASAN_SHADOW_START),
KASAN_SHADOW_START, KASAN_SHADOW_END, true);
@@ -244,9 +315,27 @@ static void __init kasan_populate(void *start, void *end)
memset(start, KASAN_SHADOW_INIT, end - start);
}
+static void __init kasan_shallow_populate_pmd(pgd_t *pgdp,
+ unsigned long vaddr, unsigned long end)
+{
+ unsigned long next;
+ pmd_t *pmdp, *base_pmd;
+ bool is_kasan_pte;
+
+ base_pmd = (pmd_t *)pgd_page_vaddr(*pgdp);
+ pmdp = base_pmd + pmd_index(vaddr);
+
+ do {
+ next = pmd_addr_end(vaddr, end);
+ is_kasan_pte = (pmd_pgtable(*pmdp) == lm_alias(kasan_early_shadow_pte));
+
+ if (is_kasan_pte)
+ pmd_clear(pmdp);
+ } while (pmdp++, vaddr = next, vaddr != end);
+}
+
static void __init kasan_shallow_populate_pud(pgd_t *pgdp,
- unsigned long vaddr, unsigned long end,
- bool kasan_populate)
+ unsigned long vaddr, unsigned long end)
{
unsigned long next;
pud_t *pudp, *base_pud;
@@ -256,21 +345,60 @@ static void __init kasan_shallow_populate_pud(pgd_t *pgdp,
base_pud = (pud_t *)pgd_page_vaddr(*pgdp);
pudp = base_pud + pud_index(vaddr);
- if (kasan_populate)
- memcpy(base_pud, (void *)kasan_early_shadow_pgd_next,
- sizeof(pud_t) * PTRS_PER_PUD);
-
do {
next = pud_addr_end(vaddr, end);
is_kasan_pmd = (pud_pgtable(*pudp) == lm_alias(kasan_early_shadow_pmd));
- if (is_kasan_pmd) {
- base_pmd = memblock_alloc(PAGE_SIZE, PAGE_SIZE);
- set_pud(pudp, pfn_pud(PFN_DOWN(__pa(base_pmd)), PAGE_TABLE));
- }
+ if (!is_kasan_pmd)
+ continue;
+
+ base_pmd = memblock_alloc(PAGE_SIZE, PAGE_SIZE);
+ set_pud(pudp, pfn_pud(PFN_DOWN(__pa(base_pmd)), PAGE_TABLE));
+
+ if (IS_ALIGNED(vaddr, PUD_SIZE) && (next - vaddr) >= PUD_SIZE)
+ continue;
+
+ memcpy(base_pmd, (void *)kasan_early_shadow_pmd, PAGE_SIZE);
+ kasan_shallow_populate_pmd((pgd_t *)pudp, vaddr, next);
} while (pudp++, vaddr = next, vaddr != end);
}
+static void __init kasan_shallow_populate_p4d(pgd_t *pgdp,
+ unsigned long vaddr, unsigned long end)
+{
+ unsigned long next;
+ p4d_t *p4dp, *base_p4d;
+ pud_t *base_pud;
+ bool is_kasan_pud;
+
+ base_p4d = (p4d_t *)pgd_page_vaddr(*pgdp);
+ p4dp = base_p4d + p4d_index(vaddr);
+
+ do {
+ next = p4d_addr_end(vaddr, end);
+ is_kasan_pud = (p4d_pgtable(*p4dp) == lm_alias(kasan_early_shadow_pud));
+
+ if (!is_kasan_pud)
+ continue;
+
+ base_pud = memblock_alloc(PAGE_SIZE, PAGE_SIZE);
+ set_p4d(p4dp, pfn_p4d(PFN_DOWN(__pa(base_pud)), PAGE_TABLE));
+
+ if (IS_ALIGNED(vaddr, P4D_SIZE) && (next - vaddr) >= P4D_SIZE)
+ continue;
+
+ memcpy(base_pud, (void *)kasan_early_shadow_pud, PAGE_SIZE);
+ kasan_shallow_populate_pud((pgd_t *)p4dp, vaddr, next);
+ } while (p4dp++, vaddr = next, vaddr != end);
+}
+
+#define kasan_shallow_populate_pgd_next(pgdp, vaddr, next) \
+ (pgtable_l5_enabled ? \
+ kasan_shallow_populate_p4d(pgdp, vaddr, next) : \
+ (pgtable_l4_enabled ? \
+ kasan_shallow_populate_pud(pgdp, vaddr, next) : \
+ kasan_shallow_populate_pmd(pgdp, vaddr, next)))
+
static void __init kasan_shallow_populate_pgd(unsigned long vaddr, unsigned long end)
{
unsigned long next;
@@ -291,7 +419,8 @@ static void __init kasan_shallow_populate_pgd(unsigned long vaddr, unsigned long
if (IS_ALIGNED(vaddr, PGDIR_SIZE) && (next - vaddr) >= PGDIR_SIZE)
continue;
- kasan_shallow_populate_pud(pgd_k, vaddr, next, is_kasan_pgd_next);
+ memcpy(p, (void *)kasan_early_shadow_pgd_next, PAGE_SIZE);
+ kasan_shallow_populate_pgd_next(pgd_k, vaddr, next);
} while (pgd_k++, vaddr = next, vaddr != end);
}
diff --git a/arch/riscv/mm/physaddr.c b/arch/riscv/mm/physaddr.c
index e7fd0c253c7b..19cf25a74ee2 100644
--- a/arch/riscv/mm/physaddr.c
+++ b/arch/riscv/mm/physaddr.c
@@ -8,12 +8,10 @@
phys_addr_t __virt_to_phys(unsigned long x)
{
- phys_addr_t y = x - PAGE_OFFSET;
-
/*
* Boundary checking aginst the kernel linear mapping space.
*/
- WARN(y >= KERN_VIRT_SIZE,
+ WARN(!is_linear_mapping(x) && !is_kernel_mapping(x),
"virt_to_phys used for non-linear address: %pK (%pS)\n",
(void *)x, (void *)x);