diff options
author | Palmer Dabbelt <palmer@rivosinc.com> | 2025-03-26 15:55:45 -0700 |
---|---|---|
committer | Palmer Dabbelt <palmer@rivosinc.com> | 2025-03-26 15:56:49 -0700 |
commit | f633de4aa4537c190a9842c3e84e77780621c615 (patch) | |
tree | 1ce477b4058436b9ce7a8834a44d3038d94ba8da | |
parent | df02351331671abb26788bc13f6d276e26ae068f (diff) | |
parent | e1cf2d009b00fd890dbbcb8b79613ff538732559 (diff) | |
download | linux-f633de4aa4537c190a9842c3e84e77780621c615.tar.gz linux-f633de4aa4537c190a9842c3e84e77780621c615.tar.bz2 linux-f633de4aa4537c190a9842c3e84e77780621c615.zip |
Merge patch series "riscv: Relocatable NOMMU kernels"
Samuel Holland <samuel.holland@sifive.com> says:
Currently, RISC-V NOMMU kernels are linked at CONFIG_PAGE_OFFSET, and
since they are not relocatable, must be loaded at this address as well.
CONFIG_PAGE_OFFSET is not a user-visible Kconfig option, so its value is
not obvious, and users must patch the kernel source if they want to load
it at a different address.
Make NOMMU kernels more portable by making them relocatable by default.
This allows a single kernel binary to work when loaded at any address.
* b4-shazam-merge:
riscv: Remove CONFIG_PAGE_OFFSET
riscv: Support CONFIG_RELOCATABLE on riscv32
asm-generic: Always define Elf_Rel and Elf_Rela
riscv: Support CONFIG_RELOCATABLE on NOMMU
riscv: Allow NOMMU kernels to access all of RAM
riscv: Remove duplicate CONFIG_PAGE_OFFSET definition
Link: https://lore.kernel.org/r/20241026171441.3047904-1-samuel.holland@sifive.com
Signed-off-by: Palmer Dabbelt <palmer@rivosinc.com>
-rw-r--r-- | arch/riscv/Kconfig | 10 | ||||
-rw-r--r-- | arch/riscv/Makefile | 1 | ||||
-rw-r--r-- | arch/riscv/errata/Makefile | 6 | ||||
-rw-r--r-- | arch/riscv/include/asm/page.h | 27 | ||||
-rw-r--r-- | arch/riscv/include/asm/pgtable.h | 6 | ||||
-rw-r--r-- | arch/riscv/mm/init.c | 97 | ||||
-rw-r--r-- | include/asm-generic/module.h | 8 |
7 files changed, 73 insertions, 82 deletions
diff --git a/arch/riscv/Kconfig b/arch/riscv/Kconfig index 0d8def968a7e..b0ece4227e68 100644 --- a/arch/riscv/Kconfig +++ b/arch/riscv/Kconfig @@ -202,6 +202,7 @@ config RISCV select PCI_DOMAINS_GENERIC if PCI select PCI_ECAM if (ACPI && PCI) select PCI_MSI if PCI + select RELOCATABLE if !MMU && !PHYS_RAM_BASE_FIXED select RISCV_ALTERNATIVE if !XIP_KERNEL select RISCV_APLIC select RISCV_IMSIC @@ -289,13 +290,6 @@ config MMU Select if you want MMU-based virtualised addressing space support by paged memory management. If unsure, say 'Y'. -config PAGE_OFFSET - hex - default 0x80000000 if !MMU && RISCV_M_MODE - default 0x80200000 if !MMU - default 0xc0000000 if 32BIT - default 0xff60000000000000 if 64BIT - config KASAN_SHADOW_OFFSET hex depends on KASAN_GENERIC @@ -1100,7 +1094,7 @@ config PARAVIRT_TIME_ACCOUNTING config RELOCATABLE bool "Build a relocatable kernel" - depends on MMU && 64BIT && !XIP_KERNEL + depends on !XIP_KERNEL select MODULE_SECTIONS if MODULES help This builds a kernel as a Position Independent Executable (PIE), diff --git a/arch/riscv/Makefile b/arch/riscv/Makefile index 13fbc0f94238..600df90bc141 100644 --- a/arch/riscv/Makefile +++ b/arch/riscv/Makefile @@ -98,7 +98,6 @@ KBUILD_AFLAGS += -march=$(riscv-march-y) CC_FLAGS_FPU := -march=$(shell echo $(riscv-march-y) | sed -E 's/(rv32ima|rv64ima)([^v_]*)v?/\1\2/') KBUILD_CFLAGS += -mno-save-restore -KBUILD_CFLAGS += -DCONFIG_PAGE_OFFSET=$(CONFIG_PAGE_OFFSET) ifeq ($(CONFIG_CMODEL_MEDLOW),y) KBUILD_CFLAGS += -mcmodel=medlow diff --git a/arch/riscv/errata/Makefile b/arch/riscv/errata/Makefile index f0da9d7b39c3..bc6c77ba837d 100644 --- a/arch/riscv/errata/Makefile +++ b/arch/riscv/errata/Makefile @@ -1,5 +1,9 @@ ifdef CONFIG_RELOCATABLE -KBUILD_CFLAGS += -fno-pie +# We can't use PIC/PIE when handling early-boot errata parsing, as the kernel +# doesn't have a GOT setup at that point. So instead just use medany: it's +# usually position-independent, so it should be good enough for the errata +# handling. +KBUILD_CFLAGS += -fno-pie -mcmodel=medany endif ifdef CONFIG_RISCV_ALTERNATIVE_EARLY diff --git a/arch/riscv/include/asm/page.h b/arch/riscv/include/asm/page.h index 125f5ecd9565..572a141ddecd 100644 --- a/arch/riscv/include/asm/page.h +++ b/arch/riscv/include/asm/page.h @@ -24,21 +24,22 @@ * When not using MMU this corresponds to the first free page in * physical memory (aligned on a page boundary). */ -#ifdef CONFIG_64BIT #ifdef CONFIG_MMU -#define PAGE_OFFSET kernel_map.page_offset -#else -#define PAGE_OFFSET _AC(CONFIG_PAGE_OFFSET, UL) -#endif -/* - * By default, CONFIG_PAGE_OFFSET value corresponds to SV57 address space so - * define the PAGE_OFFSET value for SV48 and SV39. - */ +#ifdef CONFIG_64BIT +#define PAGE_OFFSET_L5 _AC(0xff60000000000000, UL) #define PAGE_OFFSET_L4 _AC(0xffffaf8000000000, UL) #define PAGE_OFFSET_L3 _AC(0xffffffd600000000, UL) +#ifdef CONFIG_XIP_KERNEL +#define PAGE_OFFSET PAGE_OFFSET_L3 #else -#define PAGE_OFFSET _AC(CONFIG_PAGE_OFFSET, UL) +#define PAGE_OFFSET kernel_map.page_offset +#endif /* CONFIG_XIP_KERNEL */ +#else +#define PAGE_OFFSET _AC(0xc0000000, UL) #endif /* CONFIG_64BIT */ +#else +#define PAGE_OFFSET ((unsigned long)phys_ram_base) +#endif /* CONFIG_MMU */ #ifndef __ASSEMBLY__ @@ -95,14 +96,9 @@ typedef struct page *pgtable_t; #define MIN_MEMBLOCK_ADDR 0 #endif -#ifdef CONFIG_MMU #define ARCH_PFN_OFFSET (PFN_DOWN((unsigned long)phys_ram_base)) -#else -#define ARCH_PFN_OFFSET (PAGE_OFFSET >> PAGE_SHIFT) -#endif /* CONFIG_MMU */ struct kernel_mapping { - unsigned long page_offset; unsigned long virt_addr; unsigned long virt_offset; uintptr_t phys_addr; @@ -116,6 +112,7 @@ struct kernel_mapping { uintptr_t xiprom; uintptr_t xiprom_sz; #else + unsigned long page_offset; unsigned long va_kernel_pa_offset; #endif }; diff --git a/arch/riscv/include/asm/pgtable.h b/arch/riscv/include/asm/pgtable.h index b6697dc21daf..428e48e5f57d 100644 --- a/arch/riscv/include/asm/pgtable.h +++ b/arch/riscv/include/asm/pgtable.h @@ -12,7 +12,11 @@ #include <asm/pgtable-bits.h> #ifndef CONFIG_MMU -#define KERNEL_LINK_ADDR PAGE_OFFSET +#ifdef CONFIG_RELOCATABLE +#define KERNEL_LINK_ADDR UL(0) +#else +#define KERNEL_LINK_ADDR _AC(CONFIG_PHYS_RAM_BASE, UL) +#endif #define KERN_VIRT_SIZE (UL(-1)) #else diff --git a/arch/riscv/mm/init.c b/arch/riscv/mm/init.c index 15b2eda4c364..37af6513a50f 100644 --- a/arch/riscv/mm/init.c +++ b/arch/riscv/mm/init.c @@ -20,15 +20,13 @@ #include <linux/dma-map-ops.h> #include <linux/crash_dump.h> #include <linux/hugetlb.h> -#ifdef CONFIG_RELOCATABLE -#include <linux/elf.h> -#endif #include <linux/kfence.h> #include <linux/execmem.h> #include <asm/fixmap.h> #include <asm/io.h> #include <asm/kasan.h> +#include <asm/module.h> #include <asm/numa.h> #include <asm/pgtable.h> #include <asm/sections.h> @@ -323,6 +321,44 @@ static void __init setup_bootmem(void) hugetlb_cma_reserve(PUD_SHIFT - PAGE_SHIFT); } +#ifdef CONFIG_RELOCATABLE +extern unsigned long __rela_dyn_start, __rela_dyn_end; + +static void __init relocate_kernel(void) +{ + Elf_Rela *rela = (Elf_Rela *)&__rela_dyn_start; + /* + * This holds the offset between the linked virtual address and the + * relocated virtual address. + */ + uintptr_t reloc_offset = kernel_map.virt_addr - KERNEL_LINK_ADDR; + /* + * This holds the offset between kernel linked virtual address and + * physical address. + */ + uintptr_t va_kernel_link_pa_offset = KERNEL_LINK_ADDR - kernel_map.phys_addr; + + for ( ; rela < (Elf_Rela *)&__rela_dyn_end; rela++) { + Elf_Addr addr = (rela->r_offset - va_kernel_link_pa_offset); + Elf_Addr relocated_addr = rela->r_addend; + + if (rela->r_info != R_RISCV_RELATIVE) + continue; + + /* + * Make sure to not relocate vdso symbols like rt_sigreturn + * which are linked from the address 0 in vmlinux since + * vdso symbol addresses are actually used as an offset from + * mm->context.vdso in VDSO_OFFSET macro. + */ + if (relocated_addr >= KERNEL_LINK_ADDR) + relocated_addr += reloc_offset; + + *(Elf_Addr *)addr = relocated_addr; + } +} +#endif /* CONFIG_RELOCATABLE */ + #ifdef CONFIG_MMU struct pt_alloc_ops pt_ops __meminitdata; @@ -823,6 +859,8 @@ static __init void set_satp_mode(uintptr_t dtb_pa) uintptr_t set_satp_mode_pmd = ((unsigned long)set_satp_mode) & PMD_MASK; u64 satp_mode_cmdline = __pi_set_satp_mode_from_cmdline(dtb_pa); + kernel_map.page_offset = PAGE_OFFSET_L5; + if (satp_mode_cmdline == SATP_MODE_57) { disable_pgtable_l5(); } else if (satp_mode_cmdline == SATP_MODE_48) { @@ -893,44 +931,6 @@ retry: #error "setup_vm() is called from head.S before relocate so it should not use absolute addressing." #endif -#ifdef CONFIG_RELOCATABLE -extern unsigned long __rela_dyn_start, __rela_dyn_end; - -static void __init relocate_kernel(void) -{ - Elf64_Rela *rela = (Elf64_Rela *)&__rela_dyn_start; - /* - * This holds the offset between the linked virtual address and the - * relocated virtual address. - */ - uintptr_t reloc_offset = kernel_map.virt_addr - KERNEL_LINK_ADDR; - /* - * This holds the offset between kernel linked virtual address and - * physical address. - */ - uintptr_t va_kernel_link_pa_offset = KERNEL_LINK_ADDR - kernel_map.phys_addr; - - for ( ; rela < (Elf64_Rela *)&__rela_dyn_end; rela++) { - Elf64_Addr addr = (rela->r_offset - va_kernel_link_pa_offset); - Elf64_Addr relocated_addr = rela->r_addend; - - if (rela->r_info != R_RISCV_RELATIVE) - continue; - - /* - * Make sure to not relocate vdso symbols like rt_sigreturn - * which are linked from the address 0 in vmlinux since - * vdso symbol addresses are actually used as an offset from - * mm->context.vdso in VDSO_OFFSET macro. - */ - if (relocated_addr >= KERNEL_LINK_ADDR) - relocated_addr += reloc_offset; - - *(Elf64_Addr *)addr = relocated_addr; - } -} -#endif /* CONFIG_RELOCATABLE */ - #ifdef CONFIG_XIP_KERNEL static void __init create_kernel_page_table(pgd_t *pgdir, __always_unused bool early) @@ -1108,11 +1108,6 @@ asmlinkage void __init setup_vm(uintptr_t dtb_pa) kernel_map.virt_addr = KERNEL_LINK_ADDR + kernel_map.virt_offset; #ifdef CONFIG_XIP_KERNEL -#ifdef CONFIG_64BIT - kernel_map.page_offset = PAGE_OFFSET_L3; -#else - kernel_map.page_offset = _AC(CONFIG_PAGE_OFFSET, UL); -#endif kernel_map.xiprom = (uintptr_t)CONFIG_XIP_PHYS_ADDR; kernel_map.xiprom_sz = (uintptr_t)(&_exiprom) - (uintptr_t)(&_xiprom); @@ -1127,7 +1122,6 @@ asmlinkage void __init setup_vm(uintptr_t dtb_pa) kernel_map.va_kernel_xip_data_pa_offset = kernel_map.virt_addr - kernel_map.phys_addr + (uintptr_t)&_sdata - (uintptr_t)&_start; #else - kernel_map.page_offset = _AC(CONFIG_PAGE_OFFSET, UL); kernel_map.phys_addr = (uintptr_t)(&_start); kernel_map.size = (uintptr_t)(&_end) - kernel_map.phys_addr; kernel_map.va_kernel_pa_offset = kernel_map.virt_addr - kernel_map.phys_addr; @@ -1174,7 +1168,8 @@ asmlinkage void __init setup_vm(uintptr_t dtb_pa) * makes the kernel cross over a PUD_SIZE boundary, raise a bug * since a part of the kernel would not get mapped. */ - BUG_ON(PUD_SIZE - (kernel_map.virt_addr & (PUD_SIZE - 1)) < kernel_map.size); + if (IS_ENABLED(CONFIG_64BIT)) + BUG_ON(PUD_SIZE - (kernel_map.virt_addr & (PUD_SIZE - 1)) < kernel_map.size); relocate_kernel(); #endif @@ -1378,6 +1373,12 @@ asmlinkage void __init setup_vm(uintptr_t dtb_pa) { dtb_early_va = (void *)dtb_pa; dtb_early_pa = dtb_pa; + +#ifdef CONFIG_RELOCATABLE + kernel_map.virt_addr = (uintptr_t)_start; + kernel_map.phys_addr = (uintptr_t)_start; + relocate_kernel(); +#endif } static inline void setup_vm_final(void) diff --git a/include/asm-generic/module.h b/include/asm-generic/module.h index 98e1541b72b7..a8622501b975 100644 --- a/include/asm-generic/module.h +++ b/include/asm-generic/module.h @@ -19,12 +19,8 @@ struct mod_arch_specific #define Elf_Dyn Elf64_Dyn #define Elf_Ehdr Elf64_Ehdr #define Elf_Addr Elf64_Addr -#ifdef CONFIG_MODULES_USE_ELF_REL #define Elf_Rel Elf64_Rel -#endif -#ifdef CONFIG_MODULES_USE_ELF_RELA #define Elf_Rela Elf64_Rela -#endif #define ELF_R_TYPE(X) ELF64_R_TYPE(X) #define ELF_R_SYM(X) ELF64_R_SYM(X) @@ -36,12 +32,8 @@ struct mod_arch_specific #define Elf_Dyn Elf32_Dyn #define Elf_Ehdr Elf32_Ehdr #define Elf_Addr Elf32_Addr -#ifdef CONFIG_MODULES_USE_ELF_REL #define Elf_Rel Elf32_Rel -#endif -#ifdef CONFIG_MODULES_USE_ELF_RELA #define Elf_Rela Elf32_Rela -#endif #define ELF_R_TYPE(X) ELF32_R_TYPE(X) #define ELF_R_SYM(X) ELF32_R_SYM(X) #endif |