From 0479a42d4c15bd554f54d89d12bf68218e3e70da Mon Sep 17 00:00:00 2001 From: Peter Zijlstra Date: Thu, 22 Jun 2023 16:27:13 +0200 Subject: x86/cfi: Extend {JMP,CAKK}_NOSPEC comment With the introduction of kCFI these helpers are no longer equivalent to C indirect calls and should be used with care. Signed-off-by: Peter Zijlstra (Intel) Reviewed-by: Kees Cook Reviewed-by: Sami Tolvanen Link: https://lkml.kernel.org/r/20230622144321.360957723%40infradead.org --- arch/x86/include/asm/nospec-branch.h | 4 ++++ 1 file changed, 4 insertions(+) (limited to 'arch/x86') diff --git a/arch/x86/include/asm/nospec-branch.h b/arch/x86/include/asm/nospec-branch.h index 55388c9f7601..1a65cf4acb2b 100644 --- a/arch/x86/include/asm/nospec-branch.h +++ b/arch/x86/include/asm/nospec-branch.h @@ -234,6 +234,10 @@ * JMP_NOSPEC and CALL_NOSPEC macros can be used instead of a simple * indirect jmp/call which may be susceptible to the Spectre variant 2 * attack. + * + * NOTE: these do not take kCFI into account and are thus not comparable to C + * indirect calls, take care when using. The target of these should be an ENDBR + * instruction irrespective of kCFI. */ .macro JMP_NOSPEC reg:req #ifdef CONFIG_RETPOLINE -- cgit v1.2.3 From be0fffa5ca894a971a31c5e28aa77b633a97d1dc Mon Sep 17 00:00:00 2001 From: Peter Zijlstra Date: Thu, 22 Jun 2023 15:36:50 +0200 Subject: x86/alternative: Rename apply_ibt_endbr() The current name doesn't reflect what it does very well. Signed-off-by: Peter Zijlstra (Intel) Reviewed-by: Kees Cook Reviewed-by: Sami Tolvanen Link: https://lkml.kernel.org/r/20230622144321.427441595%40infradead.org --- arch/x86/include/asm/alternative.h | 2 +- arch/x86/include/asm/ibt.h | 2 +- arch/x86/kernel/alternative.c | 9 ++++++--- arch/x86/kernel/module.c | 2 +- 4 files changed, 9 insertions(+), 6 deletions(-) (limited to 'arch/x86') diff --git a/arch/x86/include/asm/alternative.h b/arch/x86/include/asm/alternative.h index 6c15a622ad60..9c4da699e11a 100644 --- a/arch/x86/include/asm/alternative.h +++ b/arch/x86/include/asm/alternative.h @@ -96,7 +96,7 @@ extern void alternative_instructions(void); extern void apply_alternatives(struct alt_instr *start, struct alt_instr *end); extern void apply_retpolines(s32 *start, s32 *end); extern void apply_returns(s32 *start, s32 *end); -extern void apply_ibt_endbr(s32 *start, s32 *end); +extern void apply_seal_endbr(s32 *start, s32 *end); extern void apply_fineibt(s32 *start_retpoline, s32 *end_retpoine, s32 *start_cfi, s32 *end_cfi); diff --git a/arch/x86/include/asm/ibt.h b/arch/x86/include/asm/ibt.h index baae6b4fea23..1e59581d500c 100644 --- a/arch/x86/include/asm/ibt.h +++ b/arch/x86/include/asm/ibt.h @@ -34,7 +34,7 @@ /* * Create a dummy function pointer reference to prevent objtool from marking * the function as needing to be "sealed" (i.e. ENDBR converted to NOP by - * apply_ibt_endbr()). + * apply_seal_endbr()). */ #define IBT_NOSEAL(fname) \ ".pushsection .discard.ibt_endbr_noseal\n\t" \ diff --git a/arch/x86/kernel/alternative.c b/arch/x86/kernel/alternative.c index 72646d75b6ff..27e0cb4d062a 100644 --- a/arch/x86/kernel/alternative.c +++ b/arch/x86/kernel/alternative.c @@ -803,7 +803,7 @@ static void __init_or_module poison_endbr(void *addr, bool warn) /* * Generated by: objtool --ibt */ -void __init_or_module noinline apply_ibt_endbr(s32 *start, s32 *end) +void __init_or_module noinline apply_seal_endbr(s32 *start, s32 *end) { s32 *s; @@ -818,7 +818,7 @@ void __init_or_module noinline apply_ibt_endbr(s32 *start, s32 *end) #else -void __init_or_module apply_ibt_endbr(s32 *start, s32 *end) { } +void __init_or_module apply_seal_endbr(s32 *start, s32 *end) { } #endif /* CONFIG_X86_KERNEL_IBT */ @@ -1565,7 +1565,10 @@ void __init alternative_instructions(void) */ callthunks_patch_builtin_calls(); - apply_ibt_endbr(__ibt_endbr_seal, __ibt_endbr_seal_end); + /* + * Seal all functions that do not have their address taken. + */ + apply_seal_endbr(__ibt_endbr_seal, __ibt_endbr_seal_end); #ifdef CONFIG_SMP /* Patch to UP if other cpus not imminent. */ diff --git a/arch/x86/kernel/module.c b/arch/x86/kernel/module.c index b05f62ee2344..5f71a0cf4399 100644 --- a/arch/x86/kernel/module.c +++ b/arch/x86/kernel/module.c @@ -358,7 +358,7 @@ int module_finalize(const Elf_Ehdr *hdr, } if (ibt_endbr) { void *iseg = (void *)ibt_endbr->sh_addr; - apply_ibt_endbr(iseg, iseg + ibt_endbr->sh_size); + apply_seal_endbr(iseg, iseg + ibt_endbr->sh_size); } if (locks) { void *lseg = (void *)locks->sh_addr; -- cgit v1.2.3 From 9831c6253ace48051189f6d18a15f658f94babc2 Mon Sep 17 00:00:00 2001 From: Peter Zijlstra Date: Wed, 21 Jun 2023 22:17:12 +0200 Subject: x86/cfi: Extend ENDBR sealing to kCFI Kees noted that IBT sealing could be extended to kCFI. Fundamentally it is the list of functions that do not have their address taken and are thus never called indirectly. It doesn't matter that objtool uses IBT infrastructure to determine this list, once we have it it can also be used to clobber kCFI hashes and avoid kCFI indirect calls. Suggested-by: Kees Cook Signed-off-by: Peter Zijlstra (Intel) Reviewed-by: Kees Cook Reviewed-by: Sami Tolvanen Link: https://lkml.kernel.org/r/20230622144321.494426891%40infradead.org --- arch/x86/kernel/alternative.c | 44 ++++++++++++++++++++++++++++++++++++++++++- 1 file changed, 43 insertions(+), 1 deletion(-) (limited to 'arch/x86') diff --git a/arch/x86/kernel/alternative.c b/arch/x86/kernel/alternative.c index 27e0cb4d062a..04b25a275233 100644 --- a/arch/x86/kernel/alternative.c +++ b/arch/x86/kernel/alternative.c @@ -778,6 +778,8 @@ void __init_or_module noinline apply_returns(s32 *start, s32 *end) { } #ifdef CONFIG_X86_KERNEL_IBT +static void poison_cfi(void *addr); + static void __init_or_module poison_endbr(void *addr, bool warn) { u32 endbr, poison = gen_endbr_poison(); @@ -802,6 +804,9 @@ static void __init_or_module poison_endbr(void *addr, bool warn) /* * Generated by: objtool --ibt + * + * Seal the functions for indirect calls by clobbering the ENDBR instructions + * and the kCFI hash value. */ void __init_or_module noinline apply_seal_endbr(s32 *start, s32 *end) { @@ -812,7 +817,7 @@ void __init_or_module noinline apply_seal_endbr(s32 *start, s32 *end) poison_endbr(addr, true); if (IS_ENABLED(CONFIG_FINEIBT)) - poison_endbr(addr - 16, false); + poison_cfi(addr - 16); } } @@ -1177,6 +1182,41 @@ err: pr_err("Something went horribly wrong trying to rewrite the CFI implementation.\n"); } +static inline void poison_hash(void *addr) +{ + *(u32 *)addr = 0; +} + +static void poison_cfi(void *addr) +{ + switch (cfi_mode) { + case CFI_FINEIBT: + /* + * __cfi_\func: + * osp nopl (%rax) + * subl $0, %r10d + * jz 1f + * ud2 + * 1: nop + */ + poison_endbr(addr, false); + poison_hash(addr + fineibt_preamble_hash); + break; + + case CFI_KCFI: + /* + * __cfi_\func: + * movl $0, %eax + * .skip 11, 0x90 + */ + poison_hash(addr + 1); + break; + + default: + break; + } +} + #else static void __apply_fineibt(s32 *start_retpoline, s32 *end_retpoline, @@ -1184,6 +1224,8 @@ static void __apply_fineibt(s32 *start_retpoline, s32 *end_retpoline, { } +static void poison_cfi(void *addr) { } + #endif void apply_fineibt(s32 *start_retpoline, s32 *end_retpoline, -- cgit v1.2.3 From 81f755d561f365f544795fad92f05a085ea4f292 Mon Sep 17 00:00:00 2001 From: Brian Gerst Date: Fri, 23 Jun 2023 18:55:28 -0400 Subject: x86/32: Remove schedule_tail_wrapper() The unwinder expects a return address at the very top of the kernel stack just below pt_regs and before any stack frame is created. Instead of calling a wrapper, set up a return address as if ret_from_fork() was called from the syscall entry code. Signed-off-by: Brian Gerst Signed-off-by: Peter Zijlstra (Intel) Reviewed-by: Kees Cook Reviewed-by: Sami Tolvanen Link: https://lkml.kernel.org/r/20230623225529.34590-2-brgerst@gmail.com --- arch/x86/entry/entry_32.S | 33 ++++++++++----------------------- 1 file changed, 10 insertions(+), 23 deletions(-) (limited to 'arch/x86') diff --git a/arch/x86/entry/entry_32.S b/arch/x86/entry/entry_32.S index 91397f58ac30..e56123f03a79 100644 --- a/arch/x86/entry/entry_32.S +++ b/arch/x86/entry/entry_32.S @@ -719,26 +719,6 @@ SYM_CODE_START(__switch_to_asm) SYM_CODE_END(__switch_to_asm) .popsection -/* - * The unwinder expects the last frame on the stack to always be at the same - * offset from the end of the page, which allows it to validate the stack. - * Calling schedule_tail() directly would break that convention because its an - * asmlinkage function so its argument has to be pushed on the stack. This - * wrapper creates a proper "end of stack" frame header before the call. - */ -.pushsection .text, "ax" -SYM_FUNC_START(schedule_tail_wrapper) - FRAME_BEGIN - - pushl %eax - call schedule_tail - popl %eax - - FRAME_END - RET -SYM_FUNC_END(schedule_tail_wrapper) -.popsection - /* * A newly forked process directly context switches into this address. * @@ -748,16 +728,23 @@ SYM_FUNC_END(schedule_tail_wrapper) */ .pushsection .text, "ax" SYM_CODE_START(ret_from_fork) - call schedule_tail_wrapper + /* return address for the stack unwinder */ + pushl $.Lsyscall_32_done + + FRAME_BEGIN + pushl %eax + call schedule_tail + addl $4, %esp + FRAME_END testl %ebx, %ebx jnz 1f /* kernel threads are uncommon */ 2: /* When we fork, we trace the syscall return in the child, too. */ - movl %esp, %eax + leal 4(%esp), %eax call syscall_exit_to_user_mode - jmp .Lsyscall_32_done + RET /* kernel thread */ 1: movl %edi, %eax -- cgit v1.2.3 From 3aec4ecb3d1f313a8ab985df7cab07c4af81f478 Mon Sep 17 00:00:00 2001 From: Brian Gerst Date: Fri, 23 Jun 2023 18:55:29 -0400 Subject: x86: Rewrite ret_from_fork() in C When kCFI is enabled, special handling is needed for the indirect call to the kernel thread function. Rewrite the ret_from_fork() function in C so that the compiler can properly handle the indirect call. Suggested-by: Peter Zijlstra (Intel) Signed-off-by: Brian Gerst Signed-off-by: Peter Zijlstra (Intel) Reviewed-by: Kees Cook Reviewed-by: Sami Tolvanen Link: https://lkml.kernel.org/r/20230623225529.34590-3-brgerst@gmail.com --- arch/x86/entry/entry_32.S | 30 ++++++++---------------------- arch/x86/entry/entry_64.S | 33 ++++++++------------------------- arch/x86/include/asm/switch_to.h | 4 +++- arch/x86/kernel/process.c | 22 +++++++++++++++++++++- 4 files changed, 40 insertions(+), 49 deletions(-) (limited to 'arch/x86') diff --git a/arch/x86/entry/entry_32.S b/arch/x86/entry/entry_32.S index e56123f03a79..6e6af42e044a 100644 --- a/arch/x86/entry/entry_32.S +++ b/arch/x86/entry/entry_32.S @@ -727,36 +727,22 @@ SYM_CODE_END(__switch_to_asm) * edi: kernel thread arg */ .pushsection .text, "ax" -SYM_CODE_START(ret_from_fork) +SYM_CODE_START(ret_from_fork_asm) + movl %esp, %edx /* regs */ + /* return address for the stack unwinder */ pushl $.Lsyscall_32_done FRAME_BEGIN - pushl %eax - call schedule_tail + /* prev already in EAX */ + movl %ebx, %ecx /* fn */ + pushl %edi /* fn_arg */ + call ret_from_fork addl $4, %esp FRAME_END - testl %ebx, %ebx - jnz 1f /* kernel threads are uncommon */ - -2: - /* When we fork, we trace the syscall return in the child, too. */ - leal 4(%esp), %eax - call syscall_exit_to_user_mode RET - - /* kernel thread */ -1: movl %edi, %eax - CALL_NOSPEC ebx - /* - * A kernel thread is allowed to return here after successfully - * calling kernel_execve(). Exit to userspace to complete the execve() - * syscall. - */ - movl $0, PT_EAX(%esp) - jmp 2b -SYM_CODE_END(ret_from_fork) +SYM_CODE_END(ret_from_fork_asm) .popsection SYM_ENTRY(__begin_SYSENTER_singlestep_region, SYM_L_GLOBAL, SYM_A_NONE) diff --git a/arch/x86/entry/entry_64.S b/arch/x86/entry/entry_64.S index f31e286c2977..91f6818884fa 100644 --- a/arch/x86/entry/entry_64.S +++ b/arch/x86/entry/entry_64.S @@ -284,36 +284,19 @@ SYM_FUNC_END(__switch_to_asm) * r12: kernel thread arg */ .pushsection .text, "ax" - __FUNC_ALIGN -SYM_CODE_START_NOALIGN(ret_from_fork) - UNWIND_HINT_END_OF_STACK +SYM_CODE_START(ret_from_fork_asm) + UNWIND_HINT_REGS ANNOTATE_NOENDBR // copy_thread CALL_DEPTH_ACCOUNT - movq %rax, %rdi - call schedule_tail /* rdi: 'prev' task parameter */ - testq %rbx, %rbx /* from kernel_thread? */ - jnz 1f /* kernel threads are uncommon */ + movq %rax, %rdi /* prev */ + movq %rsp, %rsi /* regs */ + movq %rbx, %rdx /* fn */ + movq %r12, %rcx /* fn_arg */ + call ret_from_fork -2: - UNWIND_HINT_REGS - movq %rsp, %rdi - call syscall_exit_to_user_mode /* returns with IRQs disabled */ jmp swapgs_restore_regs_and_return_to_usermode - -1: - /* kernel thread */ - UNWIND_HINT_END_OF_STACK - movq %r12, %rdi - CALL_NOSPEC rbx - /* - * A kernel thread is allowed to return here after successfully - * calling kernel_execve(). Exit to userspace to complete the execve() - * syscall. - */ - movq $0, RAX(%rsp) - jmp 2b -SYM_CODE_END(ret_from_fork) +SYM_CODE_END(ret_from_fork_asm) .popsection .macro DEBUG_ENTRY_ASSERT_IRQS_OFF diff --git a/arch/x86/include/asm/switch_to.h b/arch/x86/include/asm/switch_to.h index 5c91305d09d2..f42dbf17f52b 100644 --- a/arch/x86/include/asm/switch_to.h +++ b/arch/x86/include/asm/switch_to.h @@ -12,7 +12,9 @@ struct task_struct *__switch_to_asm(struct task_struct *prev, __visible struct task_struct *__switch_to(struct task_struct *prev, struct task_struct *next); -asmlinkage void ret_from_fork(void); +asmlinkage void ret_from_fork_asm(void); +__visible void ret_from_fork(struct task_struct *prev, struct pt_regs *regs, + int (*fn)(void *), void *fn_arg); /* * This is the structure pointed to by thread.sp for an inactive task. The diff --git a/arch/x86/kernel/process.c b/arch/x86/kernel/process.c index ff9b80a0e3e3..72015dba72ab 100644 --- a/arch/x86/kernel/process.c +++ b/arch/x86/kernel/process.c @@ -28,6 +28,7 @@ #include #include #include +#include #include #include #include @@ -134,6 +135,25 @@ static int set_new_tls(struct task_struct *p, unsigned long tls) return do_set_thread_area_64(p, ARCH_SET_FS, tls); } +__visible void ret_from_fork(struct task_struct *prev, struct pt_regs *regs, + int (*fn)(void *), void *fn_arg) +{ + schedule_tail(prev); + + /* Is this a kernel thread? */ + if (unlikely(fn)) { + fn(fn_arg); + /* + * A kernel thread is allowed to return here after successfully + * calling kernel_execve(). Exit to userspace to complete the + * execve() syscall. + */ + regs->ax = 0; + } + + syscall_exit_to_user_mode(regs); +} + int copy_thread(struct task_struct *p, const struct kernel_clone_args *args) { unsigned long clone_flags = args->flags; @@ -149,7 +169,7 @@ int copy_thread(struct task_struct *p, const struct kernel_clone_args *args) frame = &fork_frame->frame; frame->bp = encode_frame_pointer(childregs); - frame->ret_addr = (unsigned long) ret_from_fork; + frame->ret_addr = (unsigned long) ret_from_fork_asm; p->thread.sp = (unsigned long) fork_frame; p->thread.io_bitmap = NULL; p->thread.iopl_warn = 0; -- cgit v1.2.3 From 04505bbbbb15da950ea0239e328a76a3ad2376e0 Mon Sep 17 00:00:00 2001 From: Peter Zijlstra Date: Thu, 15 Jun 2023 21:35:48 +0200 Subject: x86/fineibt: Poison ENDBR at +0 Alyssa noticed that when building the kernel with CFI_CLANG+IBT and booting on IBT enabled hardware to obtain FineIBT, the indirect functions look like: __cfi_foo: endbr64 subl $hash, %r10d jz 1f ud2 nop 1: foo: endbr64 This is because the compiler generates code for kCFI+IBT. In that case the caller does the hash check and will jump to +0, so there must be an ENDBR there. The compiler doesn't know about FineIBT at all; also it is possible to actually use kCFI+IBT when booting with 'cfi=kcfi' on IBT enabled hardware. Having this second ENDBR however makes it possible to elide the CFI check. Therefore, we should poison this second ENDBR when switching to FineIBT mode. Fixes: 931ab63664f0 ("x86/ibt: Implement FineIBT") Reported-by: "Milburn, Alyssa" Signed-off-by: Peter Zijlstra (Intel) Reviewed-by: Kees Cook Reviewed-by: Sami Tolvanen Link: https://lore.kernel.org/r/20230615193722.194131053@infradead.org --- arch/x86/kernel/alternative.c | 16 ++++++++++++++++ 1 file changed, 16 insertions(+) (limited to 'arch/x86') diff --git a/arch/x86/kernel/alternative.c b/arch/x86/kernel/alternative.c index 04b25a275233..d77aaabf9fe8 100644 --- a/arch/x86/kernel/alternative.c +++ b/arch/x86/kernel/alternative.c @@ -1068,6 +1068,17 @@ static int cfi_rewrite_preamble(s32 *start, s32 *end) return 0; } +static void cfi_rewrite_endbr(s32 *start, s32 *end) +{ + s32 *s; + + for (s = start; s < end; s++) { + void *addr = (void *)s + *s; + + poison_endbr(addr+16, false); + } +} + /* .retpoline_sites */ static int cfi_rand_callers(s32 *start, s32 *end) { @@ -1162,14 +1173,19 @@ static void __apply_fineibt(s32 *start_retpoline, s32 *end_retpoline, return; case CFI_FINEIBT: + /* place the FineIBT preamble at func()-16 */ ret = cfi_rewrite_preamble(start_cfi, end_cfi); if (ret) goto err; + /* rewrite the callers to target func()-16 */ ret = cfi_rewrite_callers(start_retpoline, end_retpoline); if (ret) goto err; + /* now that nobody targets func()+0, remove ENDBR there */ + cfi_rewrite_endbr(start_cfi, end_cfi); + if (builtin) pr_info("Using FineIBT CFI\n"); return; -- cgit v1.2.3 From 535d0ae39185a266536a1e97ff9a8956d7fbb9df Mon Sep 17 00:00:00 2001 From: Ingo Molnar Date: Tue, 11 Jul 2023 10:10:40 +0200 Subject: x86/cfi: Only define poison_cfi() if CONFIG_X86_KERNEL_IBT=y MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit poison_cfi() was introduced in: 9831c6253ace ("x86/cfi: Extend ENDBR sealing to kCFI") ... but it's only ever used under CONFIG_X86_KERNEL_IBT=y, and if that option is disabled, we get: arch/x86/kernel/alternative.c:1243:13: error: ‘poison_cfi’ defined but not used [-Werror=unused-function] Guard the definition with CONFIG_X86_KERNEL_IBT. Cc: Peter Zijlstra (Intel) Cc: Kees Cook Cc: Sami Tolvanen Cc: linux-kernel@vger.kernel.org Signed-off-by: Ingo Molnar --- arch/x86/kernel/alternative.c | 2 ++ 1 file changed, 2 insertions(+) (limited to 'arch/x86') diff --git a/arch/x86/kernel/alternative.c b/arch/x86/kernel/alternative.c index d77aaabf9fe8..2dcf3a06af09 100644 --- a/arch/x86/kernel/alternative.c +++ b/arch/x86/kernel/alternative.c @@ -1240,7 +1240,9 @@ static void __apply_fineibt(s32 *start_retpoline, s32 *end_retpoline, { } +#ifdef CONFIG_X86_KERNEL_IBT static void poison_cfi(void *addr) { } +#endif #endif -- cgit v1.2.3