summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorPalmer Dabbelt <palmer@rivosinc.com>2025-03-26 15:55:45 -0700
committerPalmer Dabbelt <palmer@rivosinc.com>2025-03-26 15:56:49 -0700
commitf633de4aa4537c190a9842c3e84e77780621c615 (patch)
tree1ce477b4058436b9ce7a8834a44d3038d94ba8da
parentdf02351331671abb26788bc13f6d276e26ae068f (diff)
parente1cf2d009b00fd890dbbcb8b79613ff538732559 (diff)
downloadlinux-f633de4aa4537c190a9842c3e84e77780621c615.tar.gz
linux-f633de4aa4537c190a9842c3e84e77780621c615.tar.bz2
linux-f633de4aa4537c190a9842c3e84e77780621c615.zip
Merge patch series "riscv: Relocatable NOMMU kernels"
Samuel Holland <samuel.holland@sifive.com> says: Currently, RISC-V NOMMU kernels are linked at CONFIG_PAGE_OFFSET, and since they are not relocatable, must be loaded at this address as well. CONFIG_PAGE_OFFSET is not a user-visible Kconfig option, so its value is not obvious, and users must patch the kernel source if they want to load it at a different address. Make NOMMU kernels more portable by making them relocatable by default. This allows a single kernel binary to work when loaded at any address. * b4-shazam-merge: riscv: Remove CONFIG_PAGE_OFFSET riscv: Support CONFIG_RELOCATABLE on riscv32 asm-generic: Always define Elf_Rel and Elf_Rela riscv: Support CONFIG_RELOCATABLE on NOMMU riscv: Allow NOMMU kernels to access all of RAM riscv: Remove duplicate CONFIG_PAGE_OFFSET definition Link: https://lore.kernel.org/r/20241026171441.3047904-1-samuel.holland@sifive.com Signed-off-by: Palmer Dabbelt <palmer@rivosinc.com>
-rw-r--r--arch/riscv/Kconfig10
-rw-r--r--arch/riscv/Makefile1
-rw-r--r--arch/riscv/errata/Makefile6
-rw-r--r--arch/riscv/include/asm/page.h27
-rw-r--r--arch/riscv/include/asm/pgtable.h6
-rw-r--r--arch/riscv/mm/init.c97
-rw-r--r--include/asm-generic/module.h8
7 files changed, 73 insertions, 82 deletions
diff --git a/arch/riscv/Kconfig b/arch/riscv/Kconfig
index 0d8def968a7e..b0ece4227e68 100644
--- a/arch/riscv/Kconfig
+++ b/arch/riscv/Kconfig
@@ -202,6 +202,7 @@ config RISCV
select PCI_DOMAINS_GENERIC if PCI
select PCI_ECAM if (ACPI && PCI)
select PCI_MSI if PCI
+ select RELOCATABLE if !MMU && !PHYS_RAM_BASE_FIXED
select RISCV_ALTERNATIVE if !XIP_KERNEL
select RISCV_APLIC
select RISCV_IMSIC
@@ -289,13 +290,6 @@ config MMU
Select if you want MMU-based virtualised addressing space
support by paged memory management. If unsure, say 'Y'.
-config PAGE_OFFSET
- hex
- default 0x80000000 if !MMU && RISCV_M_MODE
- default 0x80200000 if !MMU
- default 0xc0000000 if 32BIT
- default 0xff60000000000000 if 64BIT
-
config KASAN_SHADOW_OFFSET
hex
depends on KASAN_GENERIC
@@ -1100,7 +1094,7 @@ config PARAVIRT_TIME_ACCOUNTING
config RELOCATABLE
bool "Build a relocatable kernel"
- depends on MMU && 64BIT && !XIP_KERNEL
+ depends on !XIP_KERNEL
select MODULE_SECTIONS if MODULES
help
This builds a kernel as a Position Independent Executable (PIE),
diff --git a/arch/riscv/Makefile b/arch/riscv/Makefile
index 13fbc0f94238..600df90bc141 100644
--- a/arch/riscv/Makefile
+++ b/arch/riscv/Makefile
@@ -98,7 +98,6 @@ KBUILD_AFLAGS += -march=$(riscv-march-y)
CC_FLAGS_FPU := -march=$(shell echo $(riscv-march-y) | sed -E 's/(rv32ima|rv64ima)([^v_]*)v?/\1\2/')
KBUILD_CFLAGS += -mno-save-restore
-KBUILD_CFLAGS += -DCONFIG_PAGE_OFFSET=$(CONFIG_PAGE_OFFSET)
ifeq ($(CONFIG_CMODEL_MEDLOW),y)
KBUILD_CFLAGS += -mcmodel=medlow
diff --git a/arch/riscv/errata/Makefile b/arch/riscv/errata/Makefile
index f0da9d7b39c3..bc6c77ba837d 100644
--- a/arch/riscv/errata/Makefile
+++ b/arch/riscv/errata/Makefile
@@ -1,5 +1,9 @@
ifdef CONFIG_RELOCATABLE
-KBUILD_CFLAGS += -fno-pie
+# We can't use PIC/PIE when handling early-boot errata parsing, as the kernel
+# doesn't have a GOT setup at that point. So instead just use medany: it's
+# usually position-independent, so it should be good enough for the errata
+# handling.
+KBUILD_CFLAGS += -fno-pie -mcmodel=medany
endif
ifdef CONFIG_RISCV_ALTERNATIVE_EARLY
diff --git a/arch/riscv/include/asm/page.h b/arch/riscv/include/asm/page.h
index 125f5ecd9565..572a141ddecd 100644
--- a/arch/riscv/include/asm/page.h
+++ b/arch/riscv/include/asm/page.h
@@ -24,21 +24,22 @@
* When not using MMU this corresponds to the first free page in
* physical memory (aligned on a page boundary).
*/
-#ifdef CONFIG_64BIT
#ifdef CONFIG_MMU
-#define PAGE_OFFSET kernel_map.page_offset
-#else
-#define PAGE_OFFSET _AC(CONFIG_PAGE_OFFSET, UL)
-#endif
-/*
- * By default, CONFIG_PAGE_OFFSET value corresponds to SV57 address space so
- * define the PAGE_OFFSET value for SV48 and SV39.
- */
+#ifdef CONFIG_64BIT
+#define PAGE_OFFSET_L5 _AC(0xff60000000000000, UL)
#define PAGE_OFFSET_L4 _AC(0xffffaf8000000000, UL)
#define PAGE_OFFSET_L3 _AC(0xffffffd600000000, UL)
+#ifdef CONFIG_XIP_KERNEL
+#define PAGE_OFFSET PAGE_OFFSET_L3
#else
-#define PAGE_OFFSET _AC(CONFIG_PAGE_OFFSET, UL)
+#define PAGE_OFFSET kernel_map.page_offset
+#endif /* CONFIG_XIP_KERNEL */
+#else
+#define PAGE_OFFSET _AC(0xc0000000, UL)
#endif /* CONFIG_64BIT */
+#else
+#define PAGE_OFFSET ((unsigned long)phys_ram_base)
+#endif /* CONFIG_MMU */
#ifndef __ASSEMBLY__
@@ -95,14 +96,9 @@ typedef struct page *pgtable_t;
#define MIN_MEMBLOCK_ADDR 0
#endif
-#ifdef CONFIG_MMU
#define ARCH_PFN_OFFSET (PFN_DOWN((unsigned long)phys_ram_base))
-#else
-#define ARCH_PFN_OFFSET (PAGE_OFFSET >> PAGE_SHIFT)
-#endif /* CONFIG_MMU */
struct kernel_mapping {
- unsigned long page_offset;
unsigned long virt_addr;
unsigned long virt_offset;
uintptr_t phys_addr;
@@ -116,6 +112,7 @@ struct kernel_mapping {
uintptr_t xiprom;
uintptr_t xiprom_sz;
#else
+ unsigned long page_offset;
unsigned long va_kernel_pa_offset;
#endif
};
diff --git a/arch/riscv/include/asm/pgtable.h b/arch/riscv/include/asm/pgtable.h
index b6697dc21daf..428e48e5f57d 100644
--- a/arch/riscv/include/asm/pgtable.h
+++ b/arch/riscv/include/asm/pgtable.h
@@ -12,7 +12,11 @@
#include <asm/pgtable-bits.h>
#ifndef CONFIG_MMU
-#define KERNEL_LINK_ADDR PAGE_OFFSET
+#ifdef CONFIG_RELOCATABLE
+#define KERNEL_LINK_ADDR UL(0)
+#else
+#define KERNEL_LINK_ADDR _AC(CONFIG_PHYS_RAM_BASE, UL)
+#endif
#define KERN_VIRT_SIZE (UL(-1))
#else
diff --git a/arch/riscv/mm/init.c b/arch/riscv/mm/init.c
index 15b2eda4c364..37af6513a50f 100644
--- a/arch/riscv/mm/init.c
+++ b/arch/riscv/mm/init.c
@@ -20,15 +20,13 @@
#include <linux/dma-map-ops.h>
#include <linux/crash_dump.h>
#include <linux/hugetlb.h>
-#ifdef CONFIG_RELOCATABLE
-#include <linux/elf.h>
-#endif
#include <linux/kfence.h>
#include <linux/execmem.h>
#include <asm/fixmap.h>
#include <asm/io.h>
#include <asm/kasan.h>
+#include <asm/module.h>
#include <asm/numa.h>
#include <asm/pgtable.h>
#include <asm/sections.h>
@@ -323,6 +321,44 @@ static void __init setup_bootmem(void)
hugetlb_cma_reserve(PUD_SHIFT - PAGE_SHIFT);
}
+#ifdef CONFIG_RELOCATABLE
+extern unsigned long __rela_dyn_start, __rela_dyn_end;
+
+static void __init relocate_kernel(void)
+{
+ Elf_Rela *rela = (Elf_Rela *)&__rela_dyn_start;
+ /*
+ * This holds the offset between the linked virtual address and the
+ * relocated virtual address.
+ */
+ uintptr_t reloc_offset = kernel_map.virt_addr - KERNEL_LINK_ADDR;
+ /*
+ * This holds the offset between kernel linked virtual address and
+ * physical address.
+ */
+ uintptr_t va_kernel_link_pa_offset = KERNEL_LINK_ADDR - kernel_map.phys_addr;
+
+ for ( ; rela < (Elf_Rela *)&__rela_dyn_end; rela++) {
+ Elf_Addr addr = (rela->r_offset - va_kernel_link_pa_offset);
+ Elf_Addr relocated_addr = rela->r_addend;
+
+ if (rela->r_info != R_RISCV_RELATIVE)
+ continue;
+
+ /*
+ * Make sure to not relocate vdso symbols like rt_sigreturn
+ * which are linked from the address 0 in vmlinux since
+ * vdso symbol addresses are actually used as an offset from
+ * mm->context.vdso in VDSO_OFFSET macro.
+ */
+ if (relocated_addr >= KERNEL_LINK_ADDR)
+ relocated_addr += reloc_offset;
+
+ *(Elf_Addr *)addr = relocated_addr;
+ }
+}
+#endif /* CONFIG_RELOCATABLE */
+
#ifdef CONFIG_MMU
struct pt_alloc_ops pt_ops __meminitdata;
@@ -823,6 +859,8 @@ static __init void set_satp_mode(uintptr_t dtb_pa)
uintptr_t set_satp_mode_pmd = ((unsigned long)set_satp_mode) & PMD_MASK;
u64 satp_mode_cmdline = __pi_set_satp_mode_from_cmdline(dtb_pa);
+ kernel_map.page_offset = PAGE_OFFSET_L5;
+
if (satp_mode_cmdline == SATP_MODE_57) {
disable_pgtable_l5();
} else if (satp_mode_cmdline == SATP_MODE_48) {
@@ -893,44 +931,6 @@ retry:
#error "setup_vm() is called from head.S before relocate so it should not use absolute addressing."
#endif
-#ifdef CONFIG_RELOCATABLE
-extern unsigned long __rela_dyn_start, __rela_dyn_end;
-
-static void __init relocate_kernel(void)
-{
- Elf64_Rela *rela = (Elf64_Rela *)&__rela_dyn_start;
- /*
- * This holds the offset between the linked virtual address and the
- * relocated virtual address.
- */
- uintptr_t reloc_offset = kernel_map.virt_addr - KERNEL_LINK_ADDR;
- /*
- * This holds the offset between kernel linked virtual address and
- * physical address.
- */
- uintptr_t va_kernel_link_pa_offset = KERNEL_LINK_ADDR - kernel_map.phys_addr;
-
- for ( ; rela < (Elf64_Rela *)&__rela_dyn_end; rela++) {
- Elf64_Addr addr = (rela->r_offset - va_kernel_link_pa_offset);
- Elf64_Addr relocated_addr = rela->r_addend;
-
- if (rela->r_info != R_RISCV_RELATIVE)
- continue;
-
- /*
- * Make sure to not relocate vdso symbols like rt_sigreturn
- * which are linked from the address 0 in vmlinux since
- * vdso symbol addresses are actually used as an offset from
- * mm->context.vdso in VDSO_OFFSET macro.
- */
- if (relocated_addr >= KERNEL_LINK_ADDR)
- relocated_addr += reloc_offset;
-
- *(Elf64_Addr *)addr = relocated_addr;
- }
-}
-#endif /* CONFIG_RELOCATABLE */
-
#ifdef CONFIG_XIP_KERNEL
static void __init create_kernel_page_table(pgd_t *pgdir,
__always_unused bool early)
@@ -1108,11 +1108,6 @@ asmlinkage void __init setup_vm(uintptr_t dtb_pa)
kernel_map.virt_addr = KERNEL_LINK_ADDR + kernel_map.virt_offset;
#ifdef CONFIG_XIP_KERNEL
-#ifdef CONFIG_64BIT
- kernel_map.page_offset = PAGE_OFFSET_L3;
-#else
- kernel_map.page_offset = _AC(CONFIG_PAGE_OFFSET, UL);
-#endif
kernel_map.xiprom = (uintptr_t)CONFIG_XIP_PHYS_ADDR;
kernel_map.xiprom_sz = (uintptr_t)(&_exiprom) - (uintptr_t)(&_xiprom);
@@ -1127,7 +1122,6 @@ asmlinkage void __init setup_vm(uintptr_t dtb_pa)
kernel_map.va_kernel_xip_data_pa_offset = kernel_map.virt_addr - kernel_map.phys_addr
+ (uintptr_t)&_sdata - (uintptr_t)&_start;
#else
- kernel_map.page_offset = _AC(CONFIG_PAGE_OFFSET, UL);
kernel_map.phys_addr = (uintptr_t)(&_start);
kernel_map.size = (uintptr_t)(&_end) - kernel_map.phys_addr;
kernel_map.va_kernel_pa_offset = kernel_map.virt_addr - kernel_map.phys_addr;
@@ -1174,7 +1168,8 @@ asmlinkage void __init setup_vm(uintptr_t dtb_pa)
* makes the kernel cross over a PUD_SIZE boundary, raise a bug
* since a part of the kernel would not get mapped.
*/
- BUG_ON(PUD_SIZE - (kernel_map.virt_addr & (PUD_SIZE - 1)) < kernel_map.size);
+ if (IS_ENABLED(CONFIG_64BIT))
+ BUG_ON(PUD_SIZE - (kernel_map.virt_addr & (PUD_SIZE - 1)) < kernel_map.size);
relocate_kernel();
#endif
@@ -1378,6 +1373,12 @@ asmlinkage void __init setup_vm(uintptr_t dtb_pa)
{
dtb_early_va = (void *)dtb_pa;
dtb_early_pa = dtb_pa;
+
+#ifdef CONFIG_RELOCATABLE
+ kernel_map.virt_addr = (uintptr_t)_start;
+ kernel_map.phys_addr = (uintptr_t)_start;
+ relocate_kernel();
+#endif
}
static inline void setup_vm_final(void)
diff --git a/include/asm-generic/module.h b/include/asm-generic/module.h
index 98e1541b72b7..a8622501b975 100644
--- a/include/asm-generic/module.h
+++ b/include/asm-generic/module.h
@@ -19,12 +19,8 @@ struct mod_arch_specific
#define Elf_Dyn Elf64_Dyn
#define Elf_Ehdr Elf64_Ehdr
#define Elf_Addr Elf64_Addr
-#ifdef CONFIG_MODULES_USE_ELF_REL
#define Elf_Rel Elf64_Rel
-#endif
-#ifdef CONFIG_MODULES_USE_ELF_RELA
#define Elf_Rela Elf64_Rela
-#endif
#define ELF_R_TYPE(X) ELF64_R_TYPE(X)
#define ELF_R_SYM(X) ELF64_R_SYM(X)
@@ -36,12 +32,8 @@ struct mod_arch_specific
#define Elf_Dyn Elf32_Dyn
#define Elf_Ehdr Elf32_Ehdr
#define Elf_Addr Elf32_Addr
-#ifdef CONFIG_MODULES_USE_ELF_REL
#define Elf_Rel Elf32_Rel
-#endif
-#ifdef CONFIG_MODULES_USE_ELF_RELA
#define Elf_Rela Elf32_Rela
-#endif
#define ELF_R_TYPE(X) ELF32_R_TYPE(X)
#define ELF_R_SYM(X) ELF32_R_SYM(X)
#endif