From 9b238748cb6e9fadab0e761f6d30ba311b4ac470 Mon Sep 17 00:00:00 2001 From: Kees Cook Date: Mon, 18 Apr 2016 09:42:10 -0700 Subject: x86/KASLR: Rename aslr.c to kaslr.c In order to avoid confusion over what this file provides, rename it to kaslr.c since it is used exclusively for the kernel ASLR, not userspace ASLR. Suggested-by: Ingo Molnar Signed-off-by: Kees Cook Cc: Andrew Morton Cc: Andrey Ryabinin Cc: Andy Lutomirski Cc: Andy Lutomirski Cc: Baoquan He Cc: Borislav Petkov Cc: Borislav Petkov Cc: Brian Gerst Cc: Denys Vlasenko Cc: Dmitry Vyukov Cc: H. Peter Anvin Cc: H.J. Lu Cc: Josh Poimboeuf Cc: Linus Torvalds Cc: Peter Zijlstra Cc: Thomas Gleixner Cc: Yinghai Lu Link: http://lkml.kernel.org/r/1460997735-24785-2-git-send-email-keescook@chromium.org Signed-off-by: Ingo Molnar --- arch/x86/boot/compressed/Makefile | 2 +- arch/x86/boot/compressed/aslr.c | 339 -------------------------------------- arch/x86/boot/compressed/kaslr.c | 339 ++++++++++++++++++++++++++++++++++++++ arch/x86/boot/compressed/misc.h | 2 +- 4 files changed, 341 insertions(+), 341 deletions(-) delete mode 100644 arch/x86/boot/compressed/aslr.c create mode 100644 arch/x86/boot/compressed/kaslr.c (limited to 'arch/x86/boot') diff --git a/arch/x86/boot/compressed/Makefile b/arch/x86/boot/compressed/Makefile index 8774cb23064f..542c92f5ca4f 100644 --- a/arch/x86/boot/compressed/Makefile +++ b/arch/x86/boot/compressed/Makefile @@ -62,7 +62,7 @@ vmlinux-objs-y := $(obj)/vmlinux.lds $(obj)/head_$(BITS).o $(obj)/misc.o \ $(obj)/piggy.o $(obj)/cpuflags.o vmlinux-objs-$(CONFIG_EARLY_PRINTK) += $(obj)/early_serial_console.o -vmlinux-objs-$(CONFIG_RANDOMIZE_BASE) += $(obj)/aslr.o +vmlinux-objs-$(CONFIG_RANDOMIZE_BASE) += $(obj)/kaslr.o $(obj)/eboot.o: KBUILD_CFLAGS += -fshort-wchar -mno-red-zone diff --git a/arch/x86/boot/compressed/aslr.c b/arch/x86/boot/compressed/aslr.c deleted file mode 100644 index 6a9b96b4624d..000000000000 --- a/arch/x86/boot/compressed/aslr.c +++ /dev/null @@ -1,339 +0,0 @@ -#include "misc.h" - -#include -#include -#include - -#include -#include -#include -#include -#include - -/* Simplified build-specific string for starting entropy. */ -static const char build_str[] = UTS_RELEASE " (" LINUX_COMPILE_BY "@" - LINUX_COMPILE_HOST ") (" LINUX_COMPILER ") " UTS_VERSION; - -#define I8254_PORT_CONTROL 0x43 -#define I8254_PORT_COUNTER0 0x40 -#define I8254_CMD_READBACK 0xC0 -#define I8254_SELECT_COUNTER0 0x02 -#define I8254_STATUS_NOTREADY 0x40 -static inline u16 i8254(void) -{ - u16 status, timer; - - do { - outb(I8254_PORT_CONTROL, - I8254_CMD_READBACK | I8254_SELECT_COUNTER0); - status = inb(I8254_PORT_COUNTER0); - timer = inb(I8254_PORT_COUNTER0); - timer |= inb(I8254_PORT_COUNTER0) << 8; - } while (status & I8254_STATUS_NOTREADY); - - return timer; -} - -static unsigned long rotate_xor(unsigned long hash, const void *area, - size_t size) -{ - size_t i; - unsigned long *ptr = (unsigned long *)area; - - for (i = 0; i < size / sizeof(hash); i++) { - /* Rotate by odd number of bits and XOR. */ - hash = (hash << ((sizeof(hash) * 8) - 7)) | (hash >> 7); - hash ^= ptr[i]; - } - - return hash; -} - -/* Attempt to create a simple but unpredictable starting entropy. */ -static unsigned long get_random_boot(void) -{ - unsigned long hash = 0; - - hash = rotate_xor(hash, build_str, sizeof(build_str)); - hash = rotate_xor(hash, real_mode, sizeof(*real_mode)); - - return hash; -} - -static unsigned long get_random_long(void) -{ -#ifdef CONFIG_X86_64 - const unsigned long mix_const = 0x5d6008cbf3848dd3UL; -#else - const unsigned long mix_const = 0x3f39e593UL; -#endif - unsigned long raw, random = get_random_boot(); - bool use_i8254 = true; - - debug_putstr("KASLR using"); - - if (has_cpuflag(X86_FEATURE_RDRAND)) { - debug_putstr(" RDRAND"); - if (rdrand_long(&raw)) { - random ^= raw; - use_i8254 = false; - } - } - - if (has_cpuflag(X86_FEATURE_TSC)) { - debug_putstr(" RDTSC"); - raw = rdtsc(); - - random ^= raw; - use_i8254 = false; - } - - if (use_i8254) { - debug_putstr(" i8254"); - random ^= i8254(); - } - - /* Circular multiply for better bit diffusion */ - asm("mul %3" - : "=a" (random), "=d" (raw) - : "a" (random), "rm" (mix_const)); - random += raw; - - debug_putstr("...\n"); - - return random; -} - -struct mem_vector { - unsigned long start; - unsigned long size; -}; - -#define MEM_AVOID_MAX 5 -static struct mem_vector mem_avoid[MEM_AVOID_MAX]; - -static bool mem_contains(struct mem_vector *region, struct mem_vector *item) -{ - /* Item at least partially before region. */ - if (item->start < region->start) - return false; - /* Item at least partially after region. */ - if (item->start + item->size > region->start + region->size) - return false; - return true; -} - -static bool mem_overlaps(struct mem_vector *one, struct mem_vector *two) -{ - /* Item one is entirely before item two. */ - if (one->start + one->size <= two->start) - return false; - /* Item one is entirely after item two. */ - if (one->start >= two->start + two->size) - return false; - return true; -} - -static void mem_avoid_init(unsigned long input, unsigned long input_size, - unsigned long output, unsigned long output_size) -{ - u64 initrd_start, initrd_size; - u64 cmd_line, cmd_line_size; - unsigned long unsafe, unsafe_len; - char *ptr; - - /* - * Avoid the region that is unsafe to overlap during - * decompression (see calculations at top of misc.c). - */ - unsafe_len = (output_size >> 12) + 32768 + 18; - unsafe = (unsigned long)input + input_size - unsafe_len; - mem_avoid[0].start = unsafe; - mem_avoid[0].size = unsafe_len; - - /* Avoid initrd. */ - initrd_start = (u64)real_mode->ext_ramdisk_image << 32; - initrd_start |= real_mode->hdr.ramdisk_image; - initrd_size = (u64)real_mode->ext_ramdisk_size << 32; - initrd_size |= real_mode->hdr.ramdisk_size; - mem_avoid[1].start = initrd_start; - mem_avoid[1].size = initrd_size; - - /* Avoid kernel command line. */ - cmd_line = (u64)real_mode->ext_cmd_line_ptr << 32; - cmd_line |= real_mode->hdr.cmd_line_ptr; - /* Calculate size of cmd_line. */ - ptr = (char *)(unsigned long)cmd_line; - for (cmd_line_size = 0; ptr[cmd_line_size++]; ) - ; - mem_avoid[2].start = cmd_line; - mem_avoid[2].size = cmd_line_size; - - /* Avoid heap memory. */ - mem_avoid[3].start = (unsigned long)free_mem_ptr; - mem_avoid[3].size = BOOT_HEAP_SIZE; - - /* Avoid stack memory. */ - mem_avoid[4].start = (unsigned long)free_mem_end_ptr; - mem_avoid[4].size = BOOT_STACK_SIZE; -} - -/* Does this memory vector overlap a known avoided area? */ -static bool mem_avoid_overlap(struct mem_vector *img) -{ - int i; - struct setup_data *ptr; - - for (i = 0; i < MEM_AVOID_MAX; i++) { - if (mem_overlaps(img, &mem_avoid[i])) - return true; - } - - /* Avoid all entries in the setup_data linked list. */ - ptr = (struct setup_data *)(unsigned long)real_mode->hdr.setup_data; - while (ptr) { - struct mem_vector avoid; - - avoid.start = (unsigned long)ptr; - avoid.size = sizeof(*ptr) + ptr->len; - - if (mem_overlaps(img, &avoid)) - return true; - - ptr = (struct setup_data *)(unsigned long)ptr->next; - } - - return false; -} - -static unsigned long slots[CONFIG_RANDOMIZE_BASE_MAX_OFFSET / - CONFIG_PHYSICAL_ALIGN]; -static unsigned long slot_max; - -static void slots_append(unsigned long addr) -{ - /* Overflowing the slots list should be impossible. */ - if (slot_max >= CONFIG_RANDOMIZE_BASE_MAX_OFFSET / - CONFIG_PHYSICAL_ALIGN) - return; - - slots[slot_max++] = addr; -} - -static unsigned long slots_fetch_random(void) -{ - /* Handle case of no slots stored. */ - if (slot_max == 0) - return 0; - - return slots[get_random_long() % slot_max]; -} - -static void process_e820_entry(struct e820entry *entry, - unsigned long minimum, - unsigned long image_size) -{ - struct mem_vector region, img; - - /* Skip non-RAM entries. */ - if (entry->type != E820_RAM) - return; - - /* Ignore entries entirely above our maximum. */ - if (entry->addr >= CONFIG_RANDOMIZE_BASE_MAX_OFFSET) - return; - - /* Ignore entries entirely below our minimum. */ - if (entry->addr + entry->size < minimum) - return; - - region.start = entry->addr; - region.size = entry->size; - - /* Potentially raise address to minimum location. */ - if (region.start < minimum) - region.start = minimum; - - /* Potentially raise address to meet alignment requirements. */ - region.start = ALIGN(region.start, CONFIG_PHYSICAL_ALIGN); - - /* Did we raise the address above the bounds of this e820 region? */ - if (region.start > entry->addr + entry->size) - return; - - /* Reduce size by any delta from the original address. */ - region.size -= region.start - entry->addr; - - /* Reduce maximum size to fit end of image within maximum limit. */ - if (region.start + region.size > CONFIG_RANDOMIZE_BASE_MAX_OFFSET) - region.size = CONFIG_RANDOMIZE_BASE_MAX_OFFSET - region.start; - - /* Walk each aligned slot and check for avoided areas. */ - for (img.start = region.start, img.size = image_size ; - mem_contains(®ion, &img) ; - img.start += CONFIG_PHYSICAL_ALIGN) { - if (mem_avoid_overlap(&img)) - continue; - slots_append(img.start); - } -} - -static unsigned long find_random_addr(unsigned long minimum, - unsigned long size) -{ - int i; - unsigned long addr; - - /* Make sure minimum is aligned. */ - minimum = ALIGN(minimum, CONFIG_PHYSICAL_ALIGN); - - /* Verify potential e820 positions, appending to slots list. */ - for (i = 0; i < real_mode->e820_entries; i++) { - process_e820_entry(&real_mode->e820_map[i], minimum, size); - } - - return slots_fetch_random(); -} - -unsigned char *choose_kernel_location(struct boot_params *boot_params, - unsigned char *input, - unsigned long input_size, - unsigned char *output, - unsigned long output_size) -{ - unsigned long choice = (unsigned long)output; - unsigned long random; - -#ifdef CONFIG_HIBERNATION - if (!cmdline_find_option_bool("kaslr")) { - debug_putstr("KASLR disabled by default...\n"); - goto out; - } -#else - if (cmdline_find_option_bool("nokaslr")) { - debug_putstr("KASLR disabled by cmdline...\n"); - goto out; - } -#endif - - boot_params->hdr.loadflags |= KASLR_FLAG; - - /* Record the various known unsafe memory ranges. */ - mem_avoid_init((unsigned long)input, input_size, - (unsigned long)output, output_size); - - /* Walk e820 and find a random address. */ - random = find_random_addr(choice, output_size); - if (!random) { - debug_putstr("KASLR could not find suitable E820 region...\n"); - goto out; - } - - /* Always enforce the minimum. */ - if (random < choice) - goto out; - - choice = random; -out: - return (unsigned char *)choice; -} diff --git a/arch/x86/boot/compressed/kaslr.c b/arch/x86/boot/compressed/kaslr.c new file mode 100644 index 000000000000..6a9b96b4624d --- /dev/null +++ b/arch/x86/boot/compressed/kaslr.c @@ -0,0 +1,339 @@ +#include "misc.h" + +#include +#include +#include + +#include +#include +#include +#include +#include + +/* Simplified build-specific string for starting entropy. */ +static const char build_str[] = UTS_RELEASE " (" LINUX_COMPILE_BY "@" + LINUX_COMPILE_HOST ") (" LINUX_COMPILER ") " UTS_VERSION; + +#define I8254_PORT_CONTROL 0x43 +#define I8254_PORT_COUNTER0 0x40 +#define I8254_CMD_READBACK 0xC0 +#define I8254_SELECT_COUNTER0 0x02 +#define I8254_STATUS_NOTREADY 0x40 +static inline u16 i8254(void) +{ + u16 status, timer; + + do { + outb(I8254_PORT_CONTROL, + I8254_CMD_READBACK | I8254_SELECT_COUNTER0); + status = inb(I8254_PORT_COUNTER0); + timer = inb(I8254_PORT_COUNTER0); + timer |= inb(I8254_PORT_COUNTER0) << 8; + } while (status & I8254_STATUS_NOTREADY); + + return timer; +} + +static unsigned long rotate_xor(unsigned long hash, const void *area, + size_t size) +{ + size_t i; + unsigned long *ptr = (unsigned long *)area; + + for (i = 0; i < size / sizeof(hash); i++) { + /* Rotate by odd number of bits and XOR. */ + hash = (hash << ((sizeof(hash) * 8) - 7)) | (hash >> 7); + hash ^= ptr[i]; + } + + return hash; +} + +/* Attempt to create a simple but unpredictable starting entropy. */ +static unsigned long get_random_boot(void) +{ + unsigned long hash = 0; + + hash = rotate_xor(hash, build_str, sizeof(build_str)); + hash = rotate_xor(hash, real_mode, sizeof(*real_mode)); + + return hash; +} + +static unsigned long get_random_long(void) +{ +#ifdef CONFIG_X86_64 + const unsigned long mix_const = 0x5d6008cbf3848dd3UL; +#else + const unsigned long mix_const = 0x3f39e593UL; +#endif + unsigned long raw, random = get_random_boot(); + bool use_i8254 = true; + + debug_putstr("KASLR using"); + + if (has_cpuflag(X86_FEATURE_RDRAND)) { + debug_putstr(" RDRAND"); + if (rdrand_long(&raw)) { + random ^= raw; + use_i8254 = false; + } + } + + if (has_cpuflag(X86_FEATURE_TSC)) { + debug_putstr(" RDTSC"); + raw = rdtsc(); + + random ^= raw; + use_i8254 = false; + } + + if (use_i8254) { + debug_putstr(" i8254"); + random ^= i8254(); + } + + /* Circular multiply for better bit diffusion */ + asm("mul %3" + : "=a" (random), "=d" (raw) + : "a" (random), "rm" (mix_const)); + random += raw; + + debug_putstr("...\n"); + + return random; +} + +struct mem_vector { + unsigned long start; + unsigned long size; +}; + +#define MEM_AVOID_MAX 5 +static struct mem_vector mem_avoid[MEM_AVOID_MAX]; + +static bool mem_contains(struct mem_vector *region, struct mem_vector *item) +{ + /* Item at least partially before region. */ + if (item->start < region->start) + return false; + /* Item at least partially after region. */ + if (item->start + item->size > region->start + region->size) + return false; + return true; +} + +static bool mem_overlaps(struct mem_vector *one, struct mem_vector *two) +{ + /* Item one is entirely before item two. */ + if (one->start + one->size <= two->start) + return false; + /* Item one is entirely after item two. */ + if (one->start >= two->start + two->size) + return false; + return true; +} + +static void mem_avoid_init(unsigned long input, unsigned long input_size, + unsigned long output, unsigned long output_size) +{ + u64 initrd_start, initrd_size; + u64 cmd_line, cmd_line_size; + unsigned long unsafe, unsafe_len; + char *ptr; + + /* + * Avoid the region that is unsafe to overlap during + * decompression (see calculations at top of misc.c). + */ + unsafe_len = (output_size >> 12) + 32768 + 18; + unsafe = (unsigned long)input + input_size - unsafe_len; + mem_avoid[0].start = unsafe; + mem_avoid[0].size = unsafe_len; + + /* Avoid initrd. */ + initrd_start = (u64)real_mode->ext_ramdisk_image << 32; + initrd_start |= real_mode->hdr.ramdisk_image; + initrd_size = (u64)real_mode->ext_ramdisk_size << 32; + initrd_size |= real_mode->hdr.ramdisk_size; + mem_avoid[1].start = initrd_start; + mem_avoid[1].size = initrd_size; + + /* Avoid kernel command line. */ + cmd_line = (u64)real_mode->ext_cmd_line_ptr << 32; + cmd_line |= real_mode->hdr.cmd_line_ptr; + /* Calculate size of cmd_line. */ + ptr = (char *)(unsigned long)cmd_line; + for (cmd_line_size = 0; ptr[cmd_line_size++]; ) + ; + mem_avoid[2].start = cmd_line; + mem_avoid[2].size = cmd_line_size; + + /* Avoid heap memory. */ + mem_avoid[3].start = (unsigned long)free_mem_ptr; + mem_avoid[3].size = BOOT_HEAP_SIZE; + + /* Avoid stack memory. */ + mem_avoid[4].start = (unsigned long)free_mem_end_ptr; + mem_avoid[4].size = BOOT_STACK_SIZE; +} + +/* Does this memory vector overlap a known avoided area? */ +static bool mem_avoid_overlap(struct mem_vector *img) +{ + int i; + struct setup_data *ptr; + + for (i = 0; i < MEM_AVOID_MAX; i++) { + if (mem_overlaps(img, &mem_avoid[i])) + return true; + } + + /* Avoid all entries in the setup_data linked list. */ + ptr = (struct setup_data *)(unsigned long)real_mode->hdr.setup_data; + while (ptr) { + struct mem_vector avoid; + + avoid.start = (unsigned long)ptr; + avoid.size = sizeof(*ptr) + ptr->len; + + if (mem_overlaps(img, &avoid)) + return true; + + ptr = (struct setup_data *)(unsigned long)ptr->next; + } + + return false; +} + +static unsigned long slots[CONFIG_RANDOMIZE_BASE_MAX_OFFSET / + CONFIG_PHYSICAL_ALIGN]; +static unsigned long slot_max; + +static void slots_append(unsigned long addr) +{ + /* Overflowing the slots list should be impossible. */ + if (slot_max >= CONFIG_RANDOMIZE_BASE_MAX_OFFSET / + CONFIG_PHYSICAL_ALIGN) + return; + + slots[slot_max++] = addr; +} + +static unsigned long slots_fetch_random(void) +{ + /* Handle case of no slots stored. */ + if (slot_max == 0) + return 0; + + return slots[get_random_long() % slot_max]; +} + +static void process_e820_entry(struct e820entry *entry, + unsigned long minimum, + unsigned long image_size) +{ + struct mem_vector region, img; + + /* Skip non-RAM entries. */ + if (entry->type != E820_RAM) + return; + + /* Ignore entries entirely above our maximum. */ + if (entry->addr >= CONFIG_RANDOMIZE_BASE_MAX_OFFSET) + return; + + /* Ignore entries entirely below our minimum. */ + if (entry->addr + entry->size < minimum) + return; + + region.start = entry->addr; + region.size = entry->size; + + /* Potentially raise address to minimum location. */ + if (region.start < minimum) + region.start = minimum; + + /* Potentially raise address to meet alignment requirements. */ + region.start = ALIGN(region.start, CONFIG_PHYSICAL_ALIGN); + + /* Did we raise the address above the bounds of this e820 region? */ + if (region.start > entry->addr + entry->size) + return; + + /* Reduce size by any delta from the original address. */ + region.size -= region.start - entry->addr; + + /* Reduce maximum size to fit end of image within maximum limit. */ + if (region.start + region.size > CONFIG_RANDOMIZE_BASE_MAX_OFFSET) + region.size = CONFIG_RANDOMIZE_BASE_MAX_OFFSET - region.start; + + /* Walk each aligned slot and check for avoided areas. */ + for (img.start = region.start, img.size = image_size ; + mem_contains(®ion, &img) ; + img.start += CONFIG_PHYSICAL_ALIGN) { + if (mem_avoid_overlap(&img)) + continue; + slots_append(img.start); + } +} + +static unsigned long find_random_addr(unsigned long minimum, + unsigned long size) +{ + int i; + unsigned long addr; + + /* Make sure minimum is aligned. */ + minimum = ALIGN(minimum, CONFIG_PHYSICAL_ALIGN); + + /* Verify potential e820 positions, appending to slots list. */ + for (i = 0; i < real_mode->e820_entries; i++) { + process_e820_entry(&real_mode->e820_map[i], minimum, size); + } + + return slots_fetch_random(); +} + +unsigned char *choose_kernel_location(struct boot_params *boot_params, + unsigned char *input, + unsigned long input_size, + unsigned char *output, + unsigned long output_size) +{ + unsigned long choice = (unsigned long)output; + unsigned long random; + +#ifdef CONFIG_HIBERNATION + if (!cmdline_find_option_bool("kaslr")) { + debug_putstr("KASLR disabled by default...\n"); + goto out; + } +#else + if (cmdline_find_option_bool("nokaslr")) { + debug_putstr("KASLR disabled by cmdline...\n"); + goto out; + } +#endif + + boot_params->hdr.loadflags |= KASLR_FLAG; + + /* Record the various known unsafe memory ranges. */ + mem_avoid_init((unsigned long)input, input_size, + (unsigned long)output, output_size); + + /* Walk e820 and find a random address. */ + random = find_random_addr(choice, output_size); + if (!random) { + debug_putstr("KASLR could not find suitable E820 region...\n"); + goto out; + } + + /* Always enforce the minimum. */ + if (random < choice) + goto out; + + choice = random; +out: + return (unsigned char *)choice; +} diff --git a/arch/x86/boot/compressed/misc.h b/arch/x86/boot/compressed/misc.h index 3783dc3e10b3..a8c4e087e14d 100644 --- a/arch/x86/boot/compressed/misc.h +++ b/arch/x86/boot/compressed/misc.h @@ -66,7 +66,7 @@ int cmdline_find_option_bool(const char *option); #if CONFIG_RANDOMIZE_BASE -/* aslr.c */ +/* kaslr.c */ unsigned char *choose_kernel_location(struct boot_params *boot_params, unsigned char *input, unsigned long input_size, -- cgit v1.2.3 From 206f25a8319b312b9983953a308b0e38e1943c1c Mon Sep 17 00:00:00 2001 From: Yinghai Lu Date: Mon, 18 Apr 2016 09:42:11 -0700 Subject: x86/KASLR: Remove unneeded boot_params argument Since the boot_params can be found using the real_mode global variable, there is no need to pass around a pointer to it. This slightly simplifies the choose_kernel_location function and its callers. [kees: rewrote changelog, tracked file rename] Signed-off-by: Yinghai Lu Signed-off-by: Kees Cook Cc: Andrew Morton Cc: Andrey Ryabinin Cc: Andy Lutomirski Cc: Andy Lutomirski Cc: Baoquan He Cc: Borislav Petkov Cc: Borislav Petkov Cc: Brian Gerst Cc: Denys Vlasenko Cc: Dmitry Vyukov Cc: H. Peter Anvin Cc: H.J. Lu Cc: Josh Poimboeuf Cc: Linus Torvalds Cc: Peter Zijlstra Cc: Thomas Gleixner Link: http://lkml.kernel.org/r/1460997735-24785-3-git-send-email-keescook@chromium.org Signed-off-by: Ingo Molnar --- arch/x86/boot/compressed/kaslr.c | 5 ++--- arch/x86/boot/compressed/misc.c | 2 +- arch/x86/boot/compressed/misc.h | 6 ++---- 3 files changed, 5 insertions(+), 8 deletions(-) (limited to 'arch/x86/boot') diff --git a/arch/x86/boot/compressed/kaslr.c b/arch/x86/boot/compressed/kaslr.c index 6a9b96b4624d..622aa881c6ab 100644 --- a/arch/x86/boot/compressed/kaslr.c +++ b/arch/x86/boot/compressed/kaslr.c @@ -295,8 +295,7 @@ static unsigned long find_random_addr(unsigned long minimum, return slots_fetch_random(); } -unsigned char *choose_kernel_location(struct boot_params *boot_params, - unsigned char *input, +unsigned char *choose_kernel_location(unsigned char *input, unsigned long input_size, unsigned char *output, unsigned long output_size) @@ -316,7 +315,7 @@ unsigned char *choose_kernel_location(struct boot_params *boot_params, } #endif - boot_params->hdr.loadflags |= KASLR_FLAG; + real_mode->hdr.loadflags |= KASLR_FLAG; /* Record the various known unsafe memory ranges. */ mem_avoid_init((unsigned long)input, input_size, diff --git a/arch/x86/boot/compressed/misc.c b/arch/x86/boot/compressed/misc.c index 79dac1758e7c..f35ad9eb1bf1 100644 --- a/arch/x86/boot/compressed/misc.c +++ b/arch/x86/boot/compressed/misc.c @@ -428,7 +428,7 @@ asmlinkage __visible void *decompress_kernel(void *rmode, memptr heap, * the entire decompressed kernel plus relocation table, or the * entire decompressed kernel plus .bss and .brk sections. */ - output = choose_kernel_location(real_mode, input_data, input_len, output, + output = choose_kernel_location(input_data, input_len, output, output_len > run_size ? output_len : run_size); diff --git a/arch/x86/boot/compressed/misc.h b/arch/x86/boot/compressed/misc.h index a8c4e087e14d..22346d5a2fa0 100644 --- a/arch/x86/boot/compressed/misc.h +++ b/arch/x86/boot/compressed/misc.h @@ -67,8 +67,7 @@ int cmdline_find_option_bool(const char *option); #if CONFIG_RANDOMIZE_BASE /* kaslr.c */ -unsigned char *choose_kernel_location(struct boot_params *boot_params, - unsigned char *input, +unsigned char *choose_kernel_location(unsigned char *input, unsigned long input_size, unsigned char *output, unsigned long output_size); @@ -76,8 +75,7 @@ unsigned char *choose_kernel_location(struct boot_params *boot_params, bool has_cpuflag(int flag); #else static inline -unsigned char *choose_kernel_location(struct boot_params *boot_params, - unsigned char *input, +unsigned char *choose_kernel_location(unsigned char *input, unsigned long input_size, unsigned char *output, unsigned long output_size) -- cgit v1.2.3 From 6655e0aaf768c39a62eea739c453b9db1e841cfb Mon Sep 17 00:00:00 2001 From: Kees Cook Date: Mon, 18 Apr 2016 09:42:12 -0700 Subject: x86/boot: Rename "real_mode" to "boot_params" The non-compressed boot code uses the (much more obvious) name "boot_params" for the global pointer to the x86 boot parameters. The compressed kernel loader code, though, was using the legacy name "real_mode". There is no need to have a different name, and changing it improves readability. Suggested-by: Ingo Molnar Signed-off-by: Kees Cook Cc: Andrew Morton Cc: Andrey Ryabinin Cc: Andy Lutomirski Cc: Andy Lutomirski Cc: Baoquan He Cc: Borislav Petkov Cc: Borislav Petkov Cc: Brian Gerst Cc: Denys Vlasenko Cc: Dmitry Vyukov Cc: H. Peter Anvin Cc: H.J. Lu Cc: Josh Poimboeuf Cc: Linus Torvalds Cc: Peter Zijlstra Cc: Thomas Gleixner Cc: Yinghai Lu Link: http://lkml.kernel.org/r/1460997735-24785-4-git-send-email-keescook@chromium.org Signed-off-by: Ingo Molnar --- arch/x86/boot/compressed/cmdline.c | 4 ++-- arch/x86/boot/compressed/kaslr.c | 22 +++++++++++----------- arch/x86/boot/compressed/misc.c | 27 ++++++++++++++------------- arch/x86/boot/compressed/misc.h | 2 +- 4 files changed, 28 insertions(+), 27 deletions(-) (limited to 'arch/x86/boot') diff --git a/arch/x86/boot/compressed/cmdline.c b/arch/x86/boot/compressed/cmdline.c index b68e3033e6b9..73ccf63b0f48 100644 --- a/arch/x86/boot/compressed/cmdline.c +++ b/arch/x86/boot/compressed/cmdline.c @@ -15,9 +15,9 @@ static inline char rdfs8(addr_t addr) #include "../cmdline.c" static unsigned long get_cmd_line_ptr(void) { - unsigned long cmd_line_ptr = real_mode->hdr.cmd_line_ptr; + unsigned long cmd_line_ptr = boot_params->hdr.cmd_line_ptr; - cmd_line_ptr |= (u64)real_mode->ext_cmd_line_ptr << 32; + cmd_line_ptr |= (u64)boot_params->ext_cmd_line_ptr << 32; return cmd_line_ptr; } diff --git a/arch/x86/boot/compressed/kaslr.c b/arch/x86/boot/compressed/kaslr.c index 622aa881c6ab..a51ec841c9b9 100644 --- a/arch/x86/boot/compressed/kaslr.c +++ b/arch/x86/boot/compressed/kaslr.c @@ -55,7 +55,7 @@ static unsigned long get_random_boot(void) unsigned long hash = 0; hash = rotate_xor(hash, build_str, sizeof(build_str)); - hash = rotate_xor(hash, real_mode, sizeof(*real_mode)); + hash = rotate_xor(hash, boot_params, sizeof(*boot_params)); return hash; } @@ -152,16 +152,16 @@ static void mem_avoid_init(unsigned long input, unsigned long input_size, mem_avoid[0].size = unsafe_len; /* Avoid initrd. */ - initrd_start = (u64)real_mode->ext_ramdisk_image << 32; - initrd_start |= real_mode->hdr.ramdisk_image; - initrd_size = (u64)real_mode->ext_ramdisk_size << 32; - initrd_size |= real_mode->hdr.ramdisk_size; + initrd_start = (u64)boot_params->ext_ramdisk_image << 32; + initrd_start |= boot_params->hdr.ramdisk_image; + initrd_size = (u64)boot_params->ext_ramdisk_size << 32; + initrd_size |= boot_params->hdr.ramdisk_size; mem_avoid[1].start = initrd_start; mem_avoid[1].size = initrd_size; /* Avoid kernel command line. */ - cmd_line = (u64)real_mode->ext_cmd_line_ptr << 32; - cmd_line |= real_mode->hdr.cmd_line_ptr; + cmd_line = (u64)boot_params->ext_cmd_line_ptr << 32; + cmd_line |= boot_params->hdr.cmd_line_ptr; /* Calculate size of cmd_line. */ ptr = (char *)(unsigned long)cmd_line; for (cmd_line_size = 0; ptr[cmd_line_size++]; ) @@ -190,7 +190,7 @@ static bool mem_avoid_overlap(struct mem_vector *img) } /* Avoid all entries in the setup_data linked list. */ - ptr = (struct setup_data *)(unsigned long)real_mode->hdr.setup_data; + ptr = (struct setup_data *)(unsigned long)boot_params->hdr.setup_data; while (ptr) { struct mem_vector avoid; @@ -288,8 +288,8 @@ static unsigned long find_random_addr(unsigned long minimum, minimum = ALIGN(minimum, CONFIG_PHYSICAL_ALIGN); /* Verify potential e820 positions, appending to slots list. */ - for (i = 0; i < real_mode->e820_entries; i++) { - process_e820_entry(&real_mode->e820_map[i], minimum, size); + for (i = 0; i < boot_params->e820_entries; i++) { + process_e820_entry(&boot_params->e820_map[i], minimum, size); } return slots_fetch_random(); @@ -315,7 +315,7 @@ unsigned char *choose_kernel_location(unsigned char *input, } #endif - real_mode->hdr.loadflags |= KASLR_FLAG; + boot_params->hdr.loadflags |= KASLR_FLAG; /* Record the various known unsafe memory ranges. */ mem_avoid_init((unsigned long)input, input_size, diff --git a/arch/x86/boot/compressed/misc.c b/arch/x86/boot/compressed/misc.c index f35ad9eb1bf1..462dfbf7467b 100644 --- a/arch/x86/boot/compressed/misc.c +++ b/arch/x86/boot/compressed/misc.c @@ -114,7 +114,7 @@ static void error(char *m); /* * This is set up by the setup-routine at boot-time */ -struct boot_params *real_mode; /* Pointer to real-mode data */ +struct boot_params *boot_params; memptr free_mem_ptr; memptr free_mem_end_ptr; @@ -184,12 +184,12 @@ void __putstr(const char *s) } } - if (real_mode->screen_info.orig_video_mode == 0 && + if (boot_params->screen_info.orig_video_mode == 0 && lines == 0 && cols == 0) return; - x = real_mode->screen_info.orig_x; - y = real_mode->screen_info.orig_y; + x = boot_params->screen_info.orig_x; + y = boot_params->screen_info.orig_y; while ((c = *s++) != '\0') { if (c == '\n') { @@ -210,8 +210,8 @@ void __putstr(const char *s) } } - real_mode->screen_info.orig_x = x; - real_mode->screen_info.orig_y = y; + boot_params->screen_info.orig_x = x; + boot_params->screen_info.orig_y = y; pos = (x + cols * y) * 2; /* Update cursor position */ outb(14, vidport); @@ -392,14 +392,15 @@ asmlinkage __visible void *decompress_kernel(void *rmode, memptr heap, { unsigned char *output_orig = output; - real_mode = rmode; + /* Retain x86 boot parameters pointer passed from startup_32/64. */ + boot_params = rmode; - /* Clear it for solely in-kernel use */ - real_mode->hdr.loadflags &= ~KASLR_FLAG; + /* Clear flags intended for solely in-kernel use. */ + boot_params->hdr.loadflags &= ~KASLR_FLAG; - sanitize_boot_params(real_mode); + sanitize_boot_params(boot_params); - if (real_mode->screen_info.orig_video_mode == 7) { + if (boot_params->screen_info.orig_video_mode == 7) { vidmem = (char *) 0xb0000; vidport = 0x3b4; } else { @@ -407,8 +408,8 @@ asmlinkage __visible void *decompress_kernel(void *rmode, memptr heap, vidport = 0x3d4; } - lines = real_mode->screen_info.orig_video_lines; - cols = real_mode->screen_info.orig_video_cols; + lines = boot_params->screen_info.orig_video_lines; + cols = boot_params->screen_info.orig_video_cols; console_init(); debug_putstr("early console in decompress_kernel\n"); diff --git a/arch/x86/boot/compressed/misc.h b/arch/x86/boot/compressed/misc.h index 22346d5a2fa0..1f750a516580 100644 --- a/arch/x86/boot/compressed/misc.h +++ b/arch/x86/boot/compressed/misc.h @@ -32,7 +32,7 @@ /* misc.c */ extern memptr free_mem_ptr; extern memptr free_mem_end_ptr; -extern struct boot_params *real_mode; /* Pointer to real-mode data */ +extern struct boot_params *boot_params; void __putstr(const char *s); void __puthex(unsigned long value); #define error_putstr(__x) __putstr(__x) -- cgit v1.2.3 From c04028813221c2d39a4f368586795ac4466d311c Mon Sep 17 00:00:00 2001 From: Kees Cook Date: Mon, 18 Apr 2016 09:42:13 -0700 Subject: x86/boot: Clarify purpose of functions in misc.c The function "decompress_kernel" now performs many more duties, so this patch renames it to "extract_kernel" and updates callers and comments. Additionally the file header comment for misc.c is improved to actually describe what is contained. Suggested-by: Ingo Molnar Signed-off-by: Kees Cook Cc: Andrew Morton Cc: Andrey Ryabinin Cc: Andy Lutomirski Cc: Andy Lutomirski Cc: Baoquan He Cc: Borislav Petkov Cc: Borislav Petkov Cc: Brian Gerst Cc: Denys Vlasenko Cc: Dmitry Vyukov Cc: H. Peter Anvin Cc: H.J. Lu Cc: Josh Poimboeuf Cc: Linus Torvalds Cc: Peter Zijlstra Cc: Thomas Gleixner Cc: Yinghai Lu Link: http://lkml.kernel.org/r/1460997735-24785-5-git-send-email-keescook@chromium.org Signed-off-by: Ingo Molnar --- arch/x86/boot/compressed/head_32.S | 8 ++++---- arch/x86/boot/compressed/head_64.S | 4 ++-- arch/x86/boot/compressed/misc.c | 10 ++++++---- 3 files changed, 12 insertions(+), 10 deletions(-) (limited to 'arch/x86/boot') diff --git a/arch/x86/boot/compressed/head_32.S b/arch/x86/boot/compressed/head_32.S index 0256064da8da..26dd9df19a69 100644 --- a/arch/x86/boot/compressed/head_32.S +++ b/arch/x86/boot/compressed/head_32.S @@ -233,9 +233,9 @@ relocated: 2: /* - * Do the decompression, and jump to the new kernel.. + * Do the extraction, and jump to the new kernel.. */ - /* push arguments for decompress_kernel: */ + /* push arguments for extract_kernel: */ pushl $z_run_size /* size of kernel with .bss and .brk */ pushl $z_output_len /* decompressed length, end of relocs */ leal z_extract_offset_negative(%ebx), %ebp @@ -246,11 +246,11 @@ relocated: leal boot_heap(%ebx), %eax pushl %eax /* heap area */ pushl %esi /* real mode pointer */ - call decompress_kernel /* returns kernel location in %eax */ + call extract_kernel /* returns kernel location in %eax */ addl $28, %esp /* - * Jump to the decompressed kernel. + * Jump to the extracted kernel. */ xorl %ebx, %ebx jmp *%eax diff --git a/arch/x86/boot/compressed/head_64.S b/arch/x86/boot/compressed/head_64.S index 86558a199139..d43c30ed89ed 100644 --- a/arch/x86/boot/compressed/head_64.S +++ b/arch/x86/boot/compressed/head_64.S @@ -408,7 +408,7 @@ relocated: 2: /* - * Do the decompression, and jump to the new kernel.. + * Do the extraction, and jump to the new kernel.. */ pushq %rsi /* Save the real mode argument */ movq $z_run_size, %r9 /* size of kernel with .bss and .brk */ @@ -419,7 +419,7 @@ relocated: movl $z_input_len, %ecx /* input_len */ movq %rbp, %r8 /* output target address */ movq $z_output_len, %r9 /* decompressed length, end of relocs */ - call decompress_kernel /* returns kernel location in %rax */ + call extract_kernel /* returns kernel location in %rax */ popq %r9 popq %rsi diff --git a/arch/x86/boot/compressed/misc.c b/arch/x86/boot/compressed/misc.c index 462dfbf7467b..0d69e809673a 100644 --- a/arch/x86/boot/compressed/misc.c +++ b/arch/x86/boot/compressed/misc.c @@ -1,8 +1,10 @@ /* * misc.c * - * This is a collection of several routines from gzip-1.0.3 - * adapted for Linux. + * This is a collection of several routines used to extract the kernel + * which includes KASLR relocation, decompression, ELF parsing, and + * relocation processing. Additionally included are the screen and serial + * output functions and related debugging support functions. * * malloc by Hannu Savolainen 1993 and Matthias Urlichs 1994 * puts by Nick Holloway 1993, better puts by Martin Mares 1995 @@ -383,7 +385,7 @@ static void parse_elf(void *output) free(phdrs); } -asmlinkage __visible void *decompress_kernel(void *rmode, memptr heap, +asmlinkage __visible void *extract_kernel(void *rmode, memptr heap, unsigned char *input_data, unsigned long input_len, unsigned char *output, @@ -412,7 +414,7 @@ asmlinkage __visible void *decompress_kernel(void *rmode, memptr heap, cols = boot_params->screen_info.orig_video_cols; console_init(); - debug_putstr("early console in decompress_kernel\n"); + debug_putstr("early console in extract_kernel\n"); free_mem_ptr = heap; /* Heap */ free_mem_end_ptr = heap + BOOT_HEAP_SIZE; -- cgit v1.2.3 From 7de828dfe607013546ece7ce25aa9839e8f93a66 Mon Sep 17 00:00:00 2001 From: Kees Cook Date: Mon, 18 Apr 2016 09:42:14 -0700 Subject: x86/KASLR: Clarify purpose of kaslr.c The name "choose_kernel_location" isn't specific enough, and doesn't describe the primary thing it does: choosing a random location. This patch renames it to "choose_random_location", and clarifies the what routines are contained in the kaslr.c source file. Suggested-by: Ingo Molnar Signed-off-by: Kees Cook Cc: Andrew Morton Cc: Andrey Ryabinin Cc: Andy Lutomirski Cc: Andy Lutomirski Cc: Baoquan He Cc: Borislav Petkov Cc: Borislav Petkov Cc: Brian Gerst Cc: Denys Vlasenko Cc: Dmitry Vyukov Cc: H. Peter Anvin Cc: H.J. Lu Cc: Josh Poimboeuf Cc: Linus Torvalds Cc: Peter Zijlstra Cc: Thomas Gleixner Cc: Yinghai Lu Link: http://lkml.kernel.org/r/1460997735-24785-6-git-send-email-keescook@chromium.org Signed-off-by: Ingo Molnar --- arch/x86/boot/compressed/kaslr.c | 13 ++++++++++++- arch/x86/boot/compressed/misc.c | 2 +- arch/x86/boot/compressed/misc.h | 4 ++-- 3 files changed, 15 insertions(+), 4 deletions(-) (limited to 'arch/x86/boot') diff --git a/arch/x86/boot/compressed/kaslr.c b/arch/x86/boot/compressed/kaslr.c index a51ec841c9b9..9e03190d00ad 100644 --- a/arch/x86/boot/compressed/kaslr.c +++ b/arch/x86/boot/compressed/kaslr.c @@ -1,3 +1,14 @@ +/* + * kaslr.c + * + * This contains the routines needed to generate a reasonable level of + * entropy to choose a randomized kernel base address offset in support + * of Kernel Address Space Layout Randomization (KASLR). Additionally + * handles walking the physical memory maps (and tracking memory regions + * to avoid) in order to select a physical memory location that can + * contain the entire properly aligned running kernel image. + * + */ #include "misc.h" #include @@ -295,7 +306,7 @@ static unsigned long find_random_addr(unsigned long minimum, return slots_fetch_random(); } -unsigned char *choose_kernel_location(unsigned char *input, +unsigned char *choose_random_location(unsigned char *input, unsigned long input_size, unsigned char *output, unsigned long output_size) diff --git a/arch/x86/boot/compressed/misc.c b/arch/x86/boot/compressed/misc.c index 0d69e809673a..ad8c01ac2885 100644 --- a/arch/x86/boot/compressed/misc.c +++ b/arch/x86/boot/compressed/misc.c @@ -431,7 +431,7 @@ asmlinkage __visible void *extract_kernel(void *rmode, memptr heap, * the entire decompressed kernel plus relocation table, or the * entire decompressed kernel plus .bss and .brk sections. */ - output = choose_kernel_location(input_data, input_len, output, + output = choose_random_location(input_data, input_len, output, output_len > run_size ? output_len : run_size); diff --git a/arch/x86/boot/compressed/misc.h b/arch/x86/boot/compressed/misc.h index 1f750a516580..9887e0d4aaeb 100644 --- a/arch/x86/boot/compressed/misc.h +++ b/arch/x86/boot/compressed/misc.h @@ -67,7 +67,7 @@ int cmdline_find_option_bool(const char *option); #if CONFIG_RANDOMIZE_BASE /* kaslr.c */ -unsigned char *choose_kernel_location(unsigned char *input, +unsigned char *choose_random_location(unsigned char *input, unsigned long input_size, unsigned char *output, unsigned long output_size); @@ -75,7 +75,7 @@ unsigned char *choose_kernel_location(unsigned char *input, bool has_cpuflag(int flag); #else static inline -unsigned char *choose_kernel_location(unsigned char *input, +unsigned char *choose_random_location(unsigned char *input, unsigned long input_size, unsigned char *output, unsigned long output_size) -- cgit v1.2.3 From 9016875df408fc5db6a94a3c5f5f5503c916cf81 Mon Sep 17 00:00:00 2001 From: Kees Cook Date: Mon, 18 Apr 2016 09:42:15 -0700 Subject: x86/KASLR: Rename "random" to "random_addr" The variable "random" is also the name of a libc function. It's better coding style to avoid overloading such things, so rename it to the more accurate "random_addr". Suggested-by: Ingo Molnar Signed-off-by: Kees Cook Cc: Andrew Morton Cc: Andrey Ryabinin Cc: Andy Lutomirski Cc: Andy Lutomirski Cc: Baoquan He Cc: Borislav Petkov Cc: Borislav Petkov Cc: Brian Gerst Cc: Denys Vlasenko Cc: Dmitry Vyukov Cc: H. Peter Anvin Cc: H.J. Lu Cc: Josh Poimboeuf Cc: Linus Torvalds Cc: Peter Zijlstra Cc: Thomas Gleixner Cc: Yinghai Lu Link: http://lkml.kernel.org/r/1460997735-24785-7-git-send-email-keescook@chromium.org Signed-off-by: Ingo Molnar --- arch/x86/boot/compressed/kaslr.c | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) (limited to 'arch/x86/boot') diff --git a/arch/x86/boot/compressed/kaslr.c b/arch/x86/boot/compressed/kaslr.c index 9e03190d00ad..9c29e7885ef0 100644 --- a/arch/x86/boot/compressed/kaslr.c +++ b/arch/x86/boot/compressed/kaslr.c @@ -312,7 +312,7 @@ unsigned char *choose_random_location(unsigned char *input, unsigned long output_size) { unsigned long choice = (unsigned long)output; - unsigned long random; + unsigned long random_addr; #ifdef CONFIG_HIBERNATION if (!cmdline_find_option_bool("kaslr")) { @@ -333,17 +333,17 @@ unsigned char *choose_random_location(unsigned char *input, (unsigned long)output, output_size); /* Walk e820 and find a random address. */ - random = find_random_addr(choice, output_size); - if (!random) { + random_addr = find_random_addr(choice, output_size); + if (!random_addr) { debug_putstr("KASLR could not find suitable E820 region...\n"); goto out; } /* Always enforce the minimum. */ - if (random < choice) + if (random_addr < choice) goto out; - choice = random; + choice = random_addr; out: return (unsigned char *)choice; } -- cgit v1.2.3 From 4252db10559fc3d1efc1e43613254fdd220b014b Mon Sep 17 00:00:00 2001 From: Baoquan He Date: Wed, 20 Apr 2016 13:55:42 -0700 Subject: x86/KASLR: Update description for decompressor worst case size The comment that describes the analysis for the size of the decompressor code only took gzip into account (there are currently 6 other decompressors that could be used). The actual z_extract_offset calculation in code was already handling the correct maximum size, but this documentation hadn't been updated. This updates the documentation, fixes several typos, moves the comment to header.S, updates references, and adds a note at the end of the decompressor include list to remind us about updating the comment in the future. (Instead of moving the comment to mkpiggy.c, where the calculation is currently happening, it is being moved to header.S because the calculations in mkpiggy.c will be removed in favor of header.S calculations in a following patch, and it seemed like overkill to move the giant comment twice, especially when there's already reference to z_extract_offset in header.S.) Signed-off-by: Baoquan He [ Rewrote changelog, cleaned up comment style, moved comments around. ] Signed-off-by: Kees Cook Cc: Andrew Morton Cc: Andrey Ryabinin Cc: Andy Lutomirski Cc: Andy Lutomirski Cc: Borislav Petkov Cc: Borislav Petkov Cc: Brian Gerst Cc: Denys Vlasenko Cc: Dmitry Vyukov Cc: H. Peter Anvin Cc: H.J. Lu Cc: Josh Poimboeuf Cc: Linus Torvalds Cc: Peter Zijlstra Cc: Thomas Gleixner Cc: Yinghai Lu Link: http://lkml.kernel.org/r/1461185746-8017-2-git-send-email-keescook@chromium.org Signed-off-by: Ingo Molnar --- arch/x86/boot/compressed/kaslr.c | 2 +- arch/x86/boot/compressed/misc.c | 89 ++++------------------------------------ arch/x86/boot/header.S | 88 +++++++++++++++++++++++++++++++++++++++ 3 files changed, 97 insertions(+), 82 deletions(-) (limited to 'arch/x86/boot') diff --git a/arch/x86/boot/compressed/kaslr.c b/arch/x86/boot/compressed/kaslr.c index 9c29e7885ef0..7d86c5dd8e99 100644 --- a/arch/x86/boot/compressed/kaslr.c +++ b/arch/x86/boot/compressed/kaslr.c @@ -155,7 +155,7 @@ static void mem_avoid_init(unsigned long input, unsigned long input_size, /* * Avoid the region that is unsafe to overlap during - * decompression (see calculations at top of misc.c). + * decompression (see calculations in ../header.S). */ unsafe_len = (output_size >> 12) + 32768 + 18; unsafe = (unsigned long)input + input_size - unsafe_len; diff --git a/arch/x86/boot/compressed/misc.c b/arch/x86/boot/compressed/misc.c index ad8c01ac2885..e96829bdb6d2 100644 --- a/arch/x86/boot/compressed/misc.c +++ b/arch/x86/boot/compressed/misc.c @@ -14,90 +14,13 @@ #include "misc.h" #include "../string.h" -/* WARNING!! - * This code is compiled with -fPIC and it is relocated dynamically - * at run time, but no relocation processing is performed. - * This means that it is not safe to place pointers in static structures. - */ - /* - * Getting to provable safe in place decompression is hard. - * Worst case behaviours need to be analyzed. - * Background information: - * - * The file layout is: - * magic[2] - * method[1] - * flags[1] - * timestamp[4] - * extraflags[1] - * os[1] - * compressed data blocks[N] - * crc[4] orig_len[4] - * - * resulting in 18 bytes of non compressed data overhead. - * - * Files divided into blocks - * 1 bit (last block flag) - * 2 bits (block type) - * - * 1 block occurs every 32K -1 bytes or when there 50% compression - * has been achieved. The smallest block type encoding is always used. - * - * stored: - * 32 bits length in bytes. - * - * fixed: - * magic fixed tree. - * symbols. - * - * dynamic: - * dynamic tree encoding. - * symbols. - * - * - * The buffer for decompression in place is the length of the - * uncompressed data, plus a small amount extra to keep the algorithm safe. - * The compressed data is placed at the end of the buffer. The output - * pointer is placed at the start of the buffer and the input pointer - * is placed where the compressed data starts. Problems will occur - * when the output pointer overruns the input pointer. - * - * The output pointer can only overrun the input pointer if the input - * pointer is moving faster than the output pointer. A condition only - * triggered by data whose compressed form is larger than the uncompressed - * form. - * - * The worst case at the block level is a growth of the compressed data - * of 5 bytes per 32767 bytes. - * - * The worst case internal to a compressed block is very hard to figure. - * The worst case can at least be boundined by having one bit that represents - * 32764 bytes and then all of the rest of the bytes representing the very - * very last byte. - * - * All of which is enough to compute an amount of extra data that is required - * to be safe. To avoid problems at the block level allocating 5 extra bytes - * per 32767 bytes of data is sufficient. To avoind problems internal to a - * block adding an extra 32767 bytes (the worst case uncompressed block size) - * is sufficient, to ensure that in the worst case the decompressed data for - * block will stop the byte before the compressed data for a block begins. - * To avoid problems with the compressed data's meta information an extra 18 - * bytes are needed. Leading to the formula: - * - * extra_bytes = (uncompressed_size >> 12) + 32768 + 18 + decompressor_size. - * - * Adding 8 bytes per 32K is a bit excessive but much easier to calculate. - * Adding 32768 instead of 32767 just makes for round numbers. - * Adding the decompressor_size is necessary as it musht live after all - * of the data as well. Last I measured the decompressor is about 14K. - * 10K of actual data and 4K of bss. - * + * WARNING!! + * This code is compiled with -fPIC and it is relocated dynamically at + * run time, but no relocation processing is performed. This means that + * it is not safe to place pointers in static structures. */ -/* - * gzip declarations - */ #define STATIC static #undef memcpy @@ -148,6 +71,10 @@ static int lines, cols; #ifdef CONFIG_KERNEL_LZ4 #include "../../../../lib/decompress_unlz4.c" #endif +/* + * NOTE: When adding a new decompressor, please update the analysis in + * ../header.S. + */ static void scroll(void) { diff --git a/arch/x86/boot/header.S b/arch/x86/boot/header.S index 6236b9ec4b76..fd85b9e4e953 100644 --- a/arch/x86/boot/header.S +++ b/arch/x86/boot/header.S @@ -440,6 +440,94 @@ setup_data: .quad 0 # 64-bit physical pointer to pref_address: .quad LOAD_PHYSICAL_ADDR # preferred load addr +# +# Getting to provably safe in-place decompression is hard. Worst case +# behaviours need to be analyzed. Here let's take the decompression of +# a gzip-compressed kernel as example, to illustrate it: +# +# The file layout of gzip compressed kernel is: +# +# magic[2] +# method[1] +# flags[1] +# timestamp[4] +# extraflags[1] +# os[1] +# compressed data blocks[N] +# crc[4] orig_len[4] +# +# ... resulting in +18 bytes overhead of uncompressed data. +# +# (For more information, please refer to RFC 1951 and RFC 1952.) +# +# Files divided into blocks +# 1 bit (last block flag) +# 2 bits (block type) +# +# 1 block occurs every 32K -1 bytes or when there 50% compression +# has been achieved. The smallest block type encoding is always used. +# +# stored: +# 32 bits length in bytes. +# +# fixed: +# magic fixed tree. +# symbols. +# +# dynamic: +# dynamic tree encoding. +# symbols. +# +# +# The buffer for decompression in place is the length of the uncompressed +# data, plus a small amount extra to keep the algorithm safe. The +# compressed data is placed at the end of the buffer. The output pointer +# is placed at the start of the buffer and the input pointer is placed +# where the compressed data starts. Problems will occur when the output +# pointer overruns the input pointer. +# +# The output pointer can only overrun the input pointer if the input +# pointer is moving faster than the output pointer. A condition only +# triggered by data whose compressed form is larger than the uncompressed +# form. +# +# The worst case at the block level is a growth of the compressed data +# of 5 bytes per 32767 bytes. +# +# The worst case internal to a compressed block is very hard to figure. +# The worst case can at least be bounded by having one bit that represents +# 32764 bytes and then all of the rest of the bytes representing the very +# very last byte. +# +# All of which is enough to compute an amount of extra data that is required +# to be safe. To avoid problems at the block level allocating 5 extra bytes +# per 32767 bytes of data is sufficient. To avoid problems internal to a +# block adding an extra 32767 bytes (the worst case uncompressed block size) +# is sufficient, to ensure that in the worst case the decompressed data for +# block will stop the byte before the compressed data for a block begins. +# To avoid problems with the compressed data's meta information an extra 18 +# bytes are needed. Leading to the formula: +# +# extra_bytes = (uncompressed_size >> 12) + 32768 + 18 + decompressor_size +# +# Adding 8 bytes per 32K is a bit excessive but much easier to calculate. +# Adding 32768 instead of 32767 just makes for round numbers. +# Adding the decompressor_size is necessary as it musht live after all +# of the data as well. Last I measured the decompressor is about 14K. +# 10K of actual data and 4K of bss. +# +# Above analysis is for decompressing gzip compressed kernel only. Up to +# now 6 different decompressor are supported all together. And among them +# xz stores data in chunks and has maximum chunk of 64K. Hence safety +# margin should be updated to cover all decompressors so that we don't +# need to deal with each of them separately. Please check +# the description in lib/decompressor_xxx.c for specific information. +# +# extra_bytes = (uncompressed_size >> 12) + 65536 + 128 +# +# Note that this calculation, which results in z_extract_offset (below), +# is currently generated in compressed/mkpiggy.c + #define ZO_INIT_SIZE (ZO__end - ZO_startup_32 + ZO_z_extract_offset) #define VO_INIT_SIZE (VO__end - VO__text) #if ZO_INIT_SIZE > VO_INIT_SIZE -- cgit v1.2.3 From e8581e3d67788b6b29d055fa42c6cb5b258fee64 Mon Sep 17 00:00:00 2001 From: Baoquan He Date: Wed, 20 Apr 2016 13:55:43 -0700 Subject: x86/KASLR: Drop CONFIG_RANDOMIZE_BASE_MAX_OFFSET Currently CONFIG_RANDOMIZE_BASE_MAX_OFFSET is used to limit the maximum offset for kernel randomization. This limit doesn't need to be a CONFIG since it is tied completely to KERNEL_IMAGE_SIZE, and will make no sense once physical and virtual offsets are randomized separately. This patch removes CONFIG_RANDOMIZE_BASE_MAX_OFFSET and consolidates the Kconfig help text. [kees: rewrote changelog, dropped KERNEL_IMAGE_SIZE_DEFAULT, rewrote help] Signed-off-by: Baoquan He Signed-off-by: Kees Cook Cc: Andrew Morton Cc: Andrey Ryabinin Cc: Andy Lutomirski Cc: Andy Lutomirski Cc: Borislav Petkov Cc: Borislav Petkov Cc: Brian Gerst Cc: Denys Vlasenko Cc: Dmitry Vyukov Cc: H. Peter Anvin Cc: H.J. Lu Cc: Josh Poimboeuf Cc: Linus Torvalds Cc: Peter Zijlstra Cc: Thomas Gleixner Cc: Yinghai Lu Link: http://lkml.kernel.org/r/1461185746-8017-3-git-send-email-keescook@chromium.org Signed-off-by: Ingo Molnar --- arch/x86/boot/compressed/kaslr.c | 12 +++++------- 1 file changed, 5 insertions(+), 7 deletions(-) (limited to 'arch/x86/boot') diff --git a/arch/x86/boot/compressed/kaslr.c b/arch/x86/boot/compressed/kaslr.c index 7d86c5dd8e99..3ad71a0afa24 100644 --- a/arch/x86/boot/compressed/kaslr.c +++ b/arch/x86/boot/compressed/kaslr.c @@ -217,15 +217,13 @@ static bool mem_avoid_overlap(struct mem_vector *img) return false; } -static unsigned long slots[CONFIG_RANDOMIZE_BASE_MAX_OFFSET / - CONFIG_PHYSICAL_ALIGN]; +static unsigned long slots[KERNEL_IMAGE_SIZE / CONFIG_PHYSICAL_ALIGN]; static unsigned long slot_max; static void slots_append(unsigned long addr) { /* Overflowing the slots list should be impossible. */ - if (slot_max >= CONFIG_RANDOMIZE_BASE_MAX_OFFSET / - CONFIG_PHYSICAL_ALIGN) + if (slot_max >= KERNEL_IMAGE_SIZE / CONFIG_PHYSICAL_ALIGN) return; slots[slot_max++] = addr; @@ -251,7 +249,7 @@ static void process_e820_entry(struct e820entry *entry, return; /* Ignore entries entirely above our maximum. */ - if (entry->addr >= CONFIG_RANDOMIZE_BASE_MAX_OFFSET) + if (entry->addr >= KERNEL_IMAGE_SIZE) return; /* Ignore entries entirely below our minimum. */ @@ -276,8 +274,8 @@ static void process_e820_entry(struct e820entry *entry, region.size -= region.start - entry->addr; /* Reduce maximum size to fit end of image within maximum limit. */ - if (region.start + region.size > CONFIG_RANDOMIZE_BASE_MAX_OFFSET) - region.size = CONFIG_RANDOMIZE_BASE_MAX_OFFSET - region.start; + if (region.start + region.size > KERNEL_IMAGE_SIZE) + region.size = KERNEL_IMAGE_SIZE - region.start; /* Walk each aligned slot and check for avoided areas. */ for (img.start = region.start, img.size = image_size ; -- cgit v1.2.3 From 1f208de37d10bb9067f3b061d281363be0cd1805 Mon Sep 17 00:00:00 2001 From: Kees Cook Date: Wed, 20 Apr 2016 13:55:44 -0700 Subject: x86/boot: Clean up things used by decompressors This rearranges the pieces needed to include the decompressor code in misc.c. It wasn't obvious why things were there, so a comment was added and definitions consolidated. Signed-off-by: Kees Cook Cc: Andrew Morton Cc: Andrey Ryabinin Cc: Andy Lutomirski Cc: Andy Lutomirski Cc: Baoquan He Cc: Borislav Petkov Cc: Borislav Petkov Cc: Brian Gerst Cc: Denys Vlasenko Cc: Dmitry Vyukov Cc: H. Peter Anvin Cc: H.J. Lu Cc: Josh Poimboeuf Cc: Linus Torvalds Cc: Peter Zijlstra Cc: Thomas Gleixner Cc: Yinghai Lu Link: http://lkml.kernel.org/r/1461185746-8017-4-git-send-email-keescook@chromium.org Signed-off-by: Ingo Molnar --- arch/x86/boot/compressed/misc.c | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) (limited to 'arch/x86/boot') diff --git a/arch/x86/boot/compressed/misc.c b/arch/x86/boot/compressed/misc.c index e96829bdb6d2..0381e250a785 100644 --- a/arch/x86/boot/compressed/misc.c +++ b/arch/x86/boot/compressed/misc.c @@ -21,19 +21,19 @@ * it is not safe to place pointers in static structures. */ +/* Macros used by the included decompressor code below. */ #define STATIC static -#undef memcpy - /* - * Use a normal definition of memset() from string.c. There are already + * Use normal definitions of mem*() from string.c. There are already * included header files which expect a definition of memset() and by * the time we define memset macro, it is too late. */ +#undef memcpy #undef memset #define memzero(s, n) memset((s), 0, (n)) - +/* Functions used by the included decompressor code below. */ static void error(char *m); /* -- cgit v1.2.3 From bf0118dbba9542ceb5d33d4a86830a6c88b0bbf6 Mon Sep 17 00:00:00 2001 From: Kees Cook Date: Wed, 20 Apr 2016 13:55:45 -0700 Subject: x86/boot: Make memcpy() handle overlaps Two uses of memcpy() (screen scrolling and ELF parsing) were handling overlapping memory areas. While there were no explicitly noticed bugs here (yet), it is best to fix this so that the copying will always be safe. Instead of making a new memmove() function that might collide with other memmove() definitions in the decompressors, this just makes the compressed boot code's copy of memcpy() overlap-safe. Suggested-by: Lasse Collin Reported-by: Yinghai Lu Signed-off-by: Kees Cook Cc: Andrew Morton Cc: Andrey Ryabinin Cc: Andy Lutomirski Cc: Andy Lutomirski Cc: Baoquan He Cc: Borislav Petkov Cc: Borislav Petkov Cc: Brian Gerst Cc: Denys Vlasenko Cc: Dmitry Vyukov Cc: H. Peter Anvin Cc: H.J. Lu Cc: Josh Poimboeuf Cc: Linus Torvalds Cc: Peter Zijlstra Cc: Thomas Gleixner Link: http://lkml.kernel.org/r/1461185746-8017-5-git-send-email-keescook@chromium.org Signed-off-by: Ingo Molnar --- arch/x86/boot/compressed/misc.c | 4 +--- arch/x86/boot/compressed/string.c | 22 ++++++++++++++++++++-- 2 files changed, 21 insertions(+), 5 deletions(-) (limited to 'arch/x86/boot') diff --git a/arch/x86/boot/compressed/misc.c b/arch/x86/boot/compressed/misc.c index 0381e250a785..eacc855ae08e 100644 --- a/arch/x86/boot/compressed/misc.c +++ b/arch/x86/boot/compressed/misc.c @@ -301,9 +301,7 @@ static void parse_elf(void *output) #else dest = (void *)(phdr->p_paddr); #endif - memcpy(dest, - output + phdr->p_offset, - phdr->p_filesz); + memcpy(dest, output + phdr->p_offset, phdr->p_filesz); break; default: /* Ignore other PT_* */ break; } diff --git a/arch/x86/boot/compressed/string.c b/arch/x86/boot/compressed/string.c index 00e788be1db9..1e10e40f49dd 100644 --- a/arch/x86/boot/compressed/string.c +++ b/arch/x86/boot/compressed/string.c @@ -1,7 +1,7 @@ #include "../string.c" #ifdef CONFIG_X86_32 -void *memcpy(void *dest, const void *src, size_t n) +void *__memcpy(void *dest, const void *src, size_t n) { int d0, d1, d2; asm volatile( @@ -15,7 +15,7 @@ void *memcpy(void *dest, const void *src, size_t n) return dest; } #else -void *memcpy(void *dest, const void *src, size_t n) +void *__memcpy(void *dest, const void *src, size_t n) { long d0, d1, d2; asm volatile( @@ -39,3 +39,21 @@ void *memset(void *s, int c, size_t n) ss[i] = c; return s; } + +/* + * This memcpy is overlap safe (i.e. it is memmove without conflicting + * with other definitions of memmove from the various decompressors. + */ +void *memcpy(void *dest, const void *src, size_t n) +{ + unsigned char *d = dest; + const unsigned char *s = src; + + if (d <= s || d - s >= n) + return __memcpy(dest, src, n); + + while (n-- > 0) + d[n] = s[n]; + + return dest; +} -- cgit v1.2.3 From 0f8ede1b8c4cb845c53072d7e49d71ca24a61ced Mon Sep 17 00:00:00 2001 From: Kees Cook Date: Wed, 20 Apr 2016 13:55:46 -0700 Subject: x86/KASLR: Warn when KASLR is disabled If KASLR is built in but not available at run-time (either due to the current conflict with hibernation, command-line request, or e820 parsing failures), announce the state explicitly. To support this, a new "warn" function is created, based on the existing "error" function. Suggested-by: Ingo Molnar Signed-off-by: Kees Cook Cc: Andrew Morton Cc: Andrey Ryabinin Cc: Andy Lutomirski Cc: Andy Lutomirski Cc: Baoquan He Cc: Borislav Petkov Cc: Borislav Petkov Cc: Brian Gerst Cc: Denys Vlasenko Cc: Dmitry Vyukov Cc: H. Peter Anvin Cc: H.J. Lu Cc: Josh Poimboeuf Cc: Linus Torvalds Cc: Peter Zijlstra Cc: Thomas Gleixner Cc: Yinghai Lu Link: http://lkml.kernel.org/r/1461185746-8017-6-git-send-email-keescook@chromium.org Signed-off-by: Ingo Molnar --- arch/x86/boot/compressed/kaslr.c | 6 +++--- arch/x86/boot/compressed/misc.c | 12 +++++++++--- arch/x86/boot/compressed/misc.h | 1 + 3 files changed, 13 insertions(+), 6 deletions(-) (limited to 'arch/x86/boot') diff --git a/arch/x86/boot/compressed/kaslr.c b/arch/x86/boot/compressed/kaslr.c index 3ad71a0afa24..8741a6d83bfe 100644 --- a/arch/x86/boot/compressed/kaslr.c +++ b/arch/x86/boot/compressed/kaslr.c @@ -314,12 +314,12 @@ unsigned char *choose_random_location(unsigned char *input, #ifdef CONFIG_HIBERNATION if (!cmdline_find_option_bool("kaslr")) { - debug_putstr("KASLR disabled by default...\n"); + warn("KASLR disabled: 'kaslr' not on cmdline (hibernation selected)."); goto out; } #else if (cmdline_find_option_bool("nokaslr")) { - debug_putstr("KASLR disabled by cmdline...\n"); + warn("KASLR disabled: 'nokaslr' on cmdline."); goto out; } #endif @@ -333,7 +333,7 @@ unsigned char *choose_random_location(unsigned char *input, /* Walk e820 and find a random address. */ random_addr = find_random_addr(choice, output_size); if (!random_addr) { - debug_putstr("KASLR could not find suitable E820 region...\n"); + warn("KASLR disabled: could not find suitable E820 region!"); goto out; } diff --git a/arch/x86/boot/compressed/misc.c b/arch/x86/boot/compressed/misc.c index eacc855ae08e..c57d785ff955 100644 --- a/arch/x86/boot/compressed/misc.c +++ b/arch/x86/boot/compressed/misc.c @@ -166,11 +166,17 @@ void __puthex(unsigned long value) } } -static void error(char *x) +void warn(char *m) { error_putstr("\n\n"); - error_putstr(x); - error_putstr("\n\n -- System halted"); + error_putstr(m); + error_putstr("\n\n"); +} + +static void error(char *m) +{ + warn(m); + error_putstr(" -- System halted"); while (1) asm("hlt"); diff --git a/arch/x86/boot/compressed/misc.h b/arch/x86/boot/compressed/misc.h index 9887e0d4aaeb..e75f6cf9caaf 100644 --- a/arch/x86/boot/compressed/misc.h +++ b/arch/x86/boot/compressed/misc.h @@ -35,6 +35,7 @@ extern memptr free_mem_end_ptr; extern struct boot_params *boot_params; void __putstr(const char *s); void __puthex(unsigned long value); +void warn(char *m); #define error_putstr(__x) __putstr(__x) #define error_puthex(__x) __puthex(__x) -- cgit v1.2.3 From 81b785f3e4114ed74fceb48a54e7de2f797a2ba1 Mon Sep 17 00:00:00 2001 From: Kees Cook Date: Tue, 26 Apr 2016 14:46:06 -0700 Subject: x86/boot: Rename overlapping memcpy() to memmove() Instead of having non-standard memcpy() behavior, explicitly call the new function memmove(), make it available to the decompressors, and switch the two overlap cases (screen scrolling and ELF parsing) to use memmove(). Additionally documents the purpose of compressed/string.c. Suggested-by: Lasse Collin Signed-off-by: Kees Cook Cc: Andrew Morton Cc: Andrey Ryabinin Cc: Andy Lutomirski Cc: Baoquan He Cc: Borislav Petkov Cc: Dmitry Vyukov Cc: H.J. Lu Cc: Josh Poimboeuf Cc: Linus Torvalds Cc: Peter Zijlstra Cc: Thomas Gleixner Cc: Yinghai Lu Link: http://lkml.kernel.org/r/20160426214606.GA5758@www.outflux.net Signed-off-by: Ingo Molnar --- arch/x86/boot/compressed/misc.c | 6 ++++-- arch/x86/boot/compressed/string.c | 19 +++++++++++-------- 2 files changed, 15 insertions(+), 10 deletions(-) (limited to 'arch/x86/boot') diff --git a/arch/x86/boot/compressed/misc.c b/arch/x86/boot/compressed/misc.c index c57d785ff955..6dde6ccdf00e 100644 --- a/arch/x86/boot/compressed/misc.c +++ b/arch/x86/boot/compressed/misc.c @@ -32,9 +32,11 @@ #undef memcpy #undef memset #define memzero(s, n) memset((s), 0, (n)) +#define memmove memmove /* Functions used by the included decompressor code below. */ static void error(char *m); +void *memmove(void *dest, const void *src, size_t n); /* * This is set up by the setup-routine at boot-time @@ -80,7 +82,7 @@ static void scroll(void) { int i; - memcpy(vidmem, vidmem + cols * 2, (lines - 1) * cols * 2); + memmove(vidmem, vidmem + cols * 2, (lines - 1) * cols * 2); for (i = (lines - 1) * cols * 2; i < lines * cols * 2; i += 2) vidmem[i] = ' '; } @@ -307,7 +309,7 @@ static void parse_elf(void *output) #else dest = (void *)(phdr->p_paddr); #endif - memcpy(dest, output + phdr->p_offset, phdr->p_filesz); + memmove(dest, output + phdr->p_offset, phdr->p_filesz); break; default: /* Ignore other PT_* */ break; } diff --git a/arch/x86/boot/compressed/string.c b/arch/x86/boot/compressed/string.c index 1e10e40f49dd..2befeca1aada 100644 --- a/arch/x86/boot/compressed/string.c +++ b/arch/x86/boot/compressed/string.c @@ -1,7 +1,14 @@ +/* + * This provides an optimized implementation of memcpy, and a simplified + * implementation of memset and memmove. These are used here because the + * standard kernel runtime versions are not yet available and we don't + * trust the gcc built-in implementations as they may do unexpected things + * (e.g. FPU ops) in the minimal decompression stub execution environment. + */ #include "../string.c" #ifdef CONFIG_X86_32 -void *__memcpy(void *dest, const void *src, size_t n) +void *memcpy(void *dest, const void *src, size_t n) { int d0, d1, d2; asm volatile( @@ -15,7 +22,7 @@ void *__memcpy(void *dest, const void *src, size_t n) return dest; } #else -void *__memcpy(void *dest, const void *src, size_t n) +void *memcpy(void *dest, const void *src, size_t n) { long d0, d1, d2; asm volatile( @@ -40,17 +47,13 @@ void *memset(void *s, int c, size_t n) return s; } -/* - * This memcpy is overlap safe (i.e. it is memmove without conflicting - * with other definitions of memmove from the various decompressors. - */ -void *memcpy(void *dest, const void *src, size_t n) +void *memmove(void *dest, const void *src, size_t n) { unsigned char *d = dest; const unsigned char *s = src; if (d <= s || d - s >= n) - return __memcpy(dest, src, n); + return memcpy(dest, src, n); while (n-- > 0) d[n] = s[n]; -- cgit v1.2.3 From 6f9af75faa1df61e1ee5bea8a787a90605bb528d Mon Sep 17 00:00:00 2001 From: Baoquan He Date: Thu, 28 Apr 2016 17:09:03 -0700 Subject: x86/KASLR: Handle kernel relocations above 2G correctly When processing the relocation table, the offset used to calculate the relocation is an 'int'. This is sufficient for calculating the physical address of the relocs entry on 32-bit systems and on 64-bit systems when the relocation is under 2G. To handle relocations above 2G (seen in situations like kexec, netboot, etc), this offset needs to be calculated using a 'long' to avoid wrapping and miscalculating the relocation. Signed-off-by: Baoquan He [ Rewrote the changelog. ] Signed-off-by: Kees Cook Cc: Andrew Morton Cc: Andy Lutomirski Cc: Andy Lutomirski Cc: Borislav Petkov Cc: Brian Gerst Cc: Dave Young Cc: Denys Vlasenko Cc: H. Peter Anvin Cc: Linus Torvalds Cc: Peter Zijlstra Cc: Thomas Gleixner Cc: Vivek Goyal Cc: Yinghai Lu Cc: lasse.collin@tukaani.org Link: http://lkml.kernel.org/r/1461888548-32439-2-git-send-email-keescook@chromium.org Signed-off-by: Ingo Molnar Signed-off-by: Ingo Molnar --- arch/x86/boot/compressed/misc.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'arch/x86/boot') diff --git a/arch/x86/boot/compressed/misc.c b/arch/x86/boot/compressed/misc.c index 6dde6ccdf00e..45145149c07d 100644 --- a/arch/x86/boot/compressed/misc.c +++ b/arch/x86/boot/compressed/misc.c @@ -232,7 +232,7 @@ static void handle_relocations(void *output, unsigned long output_len) * So we work backwards from the end of the decompressed image. */ for (reloc = output + output_len - sizeof(*reloc); *reloc; reloc--) { - int extended = *reloc; + long extended = *reloc; extended += map; ptr = (unsigned long)extended; -- cgit v1.2.3 From 974f221c84b05b1dc2f5ea50dc16d2a9d1e95eda Mon Sep 17 00:00:00 2001 From: Yinghai Lu Date: Thu, 28 Apr 2016 17:09:04 -0700 Subject: x86/boot: Move compressed kernel to the end of the decompression buffer This change makes later calculations about where the kernel is located easier to reason about. To better understand this change, we must first clarify what 'VO' and 'ZO' are. These values were introduced in commits by hpa: 77d1a4999502 ("x86, boot: make symbols from the main vmlinux available") 37ba7ab5e33c ("x86, boot: make kernel_alignment adjustable; new bzImage fields") Specifically: All names prefixed with 'VO_': - relate to the uncompressed kernel image - the size of the VO image is: VO__end-VO__text ("VO_INIT_SIZE" define) All names prefixed with 'ZO_': - relate to the bootable compressed kernel image (boot/compressed/vmlinux), which is composed of the following memory areas: - head text - compressed kernel (VO image and relocs table) - decompressor code - the size of the ZO image is: ZO__end - ZO_startup_32 ("ZO_INIT_SIZE" define, though see below) The 'INIT_SIZE' value is used to find the larger of the two image sizes: #define ZO_INIT_SIZE (ZO__end - ZO_startup_32 + ZO_z_extract_offset) #define VO_INIT_SIZE (VO__end - VO__text) #if ZO_INIT_SIZE > VO_INIT_SIZE # define INIT_SIZE ZO_INIT_SIZE #else # define INIT_SIZE VO_INIT_SIZE #endif The current code uses extract_offset to decide where to position the copied ZO (i.e. ZO starts at extract_offset). (This is why ZO_INIT_SIZE currently includes the extract_offset.) Why does z_extract_offset exist? It's needed because we are trying to minimize the amount of RAM used for the whole act of creating an uncompressed, executable, properly relocation-linked kernel image in system memory. We do this so that kernels can be booted on even very small systems. To achieve the goal of minimal memory consumption we have implemented an in-place decompression strategy: instead of cleanly separating the VO and ZO images and also allocating some memory for the decompression code's runtime needs, we instead create this elaborate layout of memory buffers where the output (decompressed) stream, as it progresses, overlaps with and destroys the input (compressed) stream. This can only be done safely if the ZO image is placed to the end of the VO range, plus a certain amount of safety distance to make sure that when the last bytes of the VO range are decompressed, the compressed stream pointer is safely beyond the end of the VO range. z_extract_offset is calculated in arch/x86/boot/compressed/mkpiggy.c during the build process, at a point when we know the exact compressed and uncompressed size of the kernel images and can calculate this safe minimum offset value. (Note that the mkpiggy.c calculation is not perfect, because we don't know the decompressor used at that stage, so the z_extract_offset calculation is necessarily imprecise and is mostly based on gzip internals - we'll improve that in the next patch.) When INIT_SIZE is bigger than VO_INIT_SIZE (uncommon but possible), the copied ZO occupies the memory from extract_offset to the end of decompression buffer. It overlaps with the soon-to-be-uncompressed kernel like this: |-----compressed kernel image------| V V 0 extract_offset +INIT_SIZE |-----------|---------------|-------------------------|--------| | | | | VO__text startup_32 of ZO VO__end ZO__end ^ ^ |-------uncompressed kernel image---------| When INIT_SIZE is equal to VO_INIT_SIZE (likely) there's still space left from end of ZO to the end of decompressing buffer, like below. |-compressed kernel image-| V V 0 extract_offset +INIT_SIZE |-----------|---------------|-------------------------|--------| | | | | VO__text startup_32 of ZO ZO__end VO__end ^ ^ |------------uncompressed kernel image-------------| To simplify calculations and avoid special cases, it is cleaner to always place the compressed kernel image in memory so that ZO__end is at the end of the decompression buffer, instead of placing t at the start of extract_offset as is currently done. This patch adds BP_init_size (which is the INIT_SIZE as passed in from the boot_params) into asm-offsets.c to make it visible to the assembly code. Then when moving the ZO, it calculates the starting position of the copied ZO (via BP_init_size and the ZO run size) so that the VO__end will be at the end of the decompression buffer. To make the position calculation safe, the end of ZO is page aligned (and a comment is added to the existing VO alignment for good measure). Signed-off-by: Yinghai Lu [ Rewrote changelog and comments. ] Signed-off-by: Kees Cook Cc: Andrew Morton Cc: Andy Lutomirski Cc: Andy Lutomirski Cc: Baoquan He Cc: Borislav Petkov Cc: Brian Gerst Cc: Dave Young Cc: Denys Vlasenko Cc: H. Peter Anvin Cc: Linus Torvalds Cc: Peter Zijlstra Cc: Thomas Gleixner Cc: Vivek Goyal Cc: lasse.collin@tukaani.org Link: http://lkml.kernel.org/r/1461888548-32439-3-git-send-email-keescook@chromium.org [ Rewrote the changelog some more. ] Signed-off-by: Ingo Molnar --- arch/x86/boot/compressed/head_32.S | 11 +++++++++-- arch/x86/boot/compressed/head_64.S | 8 ++++++-- arch/x86/boot/compressed/misc.c | 17 +++++++++++++++++ arch/x86/boot/compressed/mkpiggy.c | 3 --- arch/x86/boot/compressed/vmlinux.lds.S | 1 + 5 files changed, 33 insertions(+), 7 deletions(-) (limited to 'arch/x86/boot') diff --git a/arch/x86/boot/compressed/head_32.S b/arch/x86/boot/compressed/head_32.S index 26dd9df19a69..8a95e2f845aa 100644 --- a/arch/x86/boot/compressed/head_32.S +++ b/arch/x86/boot/compressed/head_32.S @@ -176,7 +176,9 @@ preferred_addr: 1: /* Target address to relocate to for decompression */ - addl $z_extract_offset, %ebx + movl BP_init_size(%esi), %eax + subl $_end, %eax + addl %eax, %ebx /* Set up the stack */ leal boot_stack_end(%ebx), %esp @@ -238,8 +240,13 @@ relocated: /* push arguments for extract_kernel: */ pushl $z_run_size /* size of kernel with .bss and .brk */ pushl $z_output_len /* decompressed length, end of relocs */ - leal z_extract_offset_negative(%ebx), %ebp + + movl BP_init_size(%esi), %eax + subl $_end, %eax + movl %ebx, %ebp + subl %eax, %ebp pushl %ebp /* output address */ + pushl $z_input_len /* input_len */ leal input_data(%ebx), %eax pushl %eax /* input_data */ diff --git a/arch/x86/boot/compressed/head_64.S b/arch/x86/boot/compressed/head_64.S index d43c30ed89ed..09cdc0c3ee7e 100644 --- a/arch/x86/boot/compressed/head_64.S +++ b/arch/x86/boot/compressed/head_64.S @@ -110,7 +110,9 @@ ENTRY(startup_32) 1: /* Target address to relocate to for decompression */ - addl $z_extract_offset, %ebx + movl BP_init_size(%esi), %eax + subl $_end, %eax + addl %eax, %ebx /* * Prepare for entering 64 bit mode @@ -338,7 +340,9 @@ preferred_addr: 1: /* Target address to relocate to for decompression */ - leaq z_extract_offset(%rbp), %rbx + movl BP_init_size(%rsi), %ebx + subl $_end, %ebx + addq %rbp, %rbx /* Set up the stack */ leaq boot_stack_end(%rbx), %rsp diff --git a/arch/x86/boot/compressed/misc.c b/arch/x86/boot/compressed/misc.c index 45145149c07d..4b4605e94b3c 100644 --- a/arch/x86/boot/compressed/misc.c +++ b/arch/x86/boot/compressed/misc.c @@ -318,6 +318,23 @@ static void parse_elf(void *output) free(phdrs); } +/* + * The compressed kernel image (ZO), has been moved so that its position + * is against the end of the buffer used to hold the uncompressed kernel + * image (VO) and the execution environment (.bss, .brk), which makes sure + * there is room to do the in-place decompression. (See header.S for the + * calculations.) + * + * |-----compressed kernel image------| + * V V + * 0 extract_offset +INIT_SIZE + * |-----------|---------------|-------------------------|--------| + * | | | | + * VO__text startup_32 of ZO VO__end ZO__end + * ^ ^ + * |-------uncompressed kernel image---------| + * + */ asmlinkage __visible void *extract_kernel(void *rmode, memptr heap, unsigned char *input_data, unsigned long input_len, diff --git a/arch/x86/boot/compressed/mkpiggy.c b/arch/x86/boot/compressed/mkpiggy.c index d8222f213182..b980046c3329 100644 --- a/arch/x86/boot/compressed/mkpiggy.c +++ b/arch/x86/boot/compressed/mkpiggy.c @@ -85,9 +85,6 @@ int main(int argc, char *argv[]) printf("z_output_len = %lu\n", (unsigned long)olen); printf(".globl z_extract_offset\n"); printf("z_extract_offset = 0x%lx\n", offs); - /* z_extract_offset_negative allows simplification of head_32.S */ - printf(".globl z_extract_offset_negative\n"); - printf("z_extract_offset_negative = -0x%lx\n", offs); printf(".globl z_run_size\n"); printf("z_run_size = %lu\n", run_size); diff --git a/arch/x86/boot/compressed/vmlinux.lds.S b/arch/x86/boot/compressed/vmlinux.lds.S index 34d047c98284..e24e0a0c90c9 100644 --- a/arch/x86/boot/compressed/vmlinux.lds.S +++ b/arch/x86/boot/compressed/vmlinux.lds.S @@ -70,5 +70,6 @@ SECTIONS _epgtable = . ; } #endif + . = ALIGN(PAGE_SIZE); /* keep ZO size page aligned */ _end = .; } -- cgit v1.2.3 From d607251ba9acc0b5faeaa08818f60d041dd19472 Mon Sep 17 00:00:00 2001 From: Yinghai Lu Date: Thu, 28 Apr 2016 17:09:05 -0700 Subject: x86/boot: Calculate decompression size during boot not build Currently z_extract_offset is calculated in boot/compressed/mkpiggy.c. This doesn't work well because mkpiggy.c doesn't know the details of the decompressor in use. As a result, it can only make an estimation, which has risks: - output + output_len (VO) could be much bigger than input + input_len (ZO). In this case, the decompressed kernel plus relocs could overwrite the decompression code while it is running. - The head code of ZO could be bigger than z_extract_offset. In this case an overwrite could happen when the head code is running to move ZO to the end of buffer. Though currently the size of the head code is very small it's still a potential risk. Since there is no rule to limit the size of the head code of ZO, it runs the risk of suddenly becoming a (hard to find) bug. Instead, this moves the z_extract_offset calculation into header.S, and makes adjustments to be sure that the above two cases can never happen, and further corrects the comments describing the calculations. Since we have (in the previous patch) made ZO always be located against the end of decompression buffer, z_extract_offset is only used here to calculate an appropriate buffer size (INIT_SIZE), and is not longer used elsewhere. As such, it can be removed from voffset.h. Additionally clean up #if/#else #define to improve readability. Signed-off-by: Yinghai Lu Signed-off-by: Baoquan He [ Rewrote the changelog and comments. ] Signed-off-by: Kees Cook Cc: Andrew Morton Cc: Andy Lutomirski Cc: Andy Lutomirski Cc: Borislav Petkov Cc: Brian Gerst Cc: Dave Young Cc: Denys Vlasenko Cc: H. Peter Anvin Cc: Linus Torvalds Cc: Peter Zijlstra Cc: Thomas Gleixner Cc: Vivek Goyal Cc: lasse.collin@tukaani.org Link: http://lkml.kernel.org/r/1461888548-32439-4-git-send-email-keescook@chromium.org Signed-off-by: Ingo Molnar --- arch/x86/boot/Makefile | 2 +- arch/x86/boot/compressed/mkpiggy.c | 21 ++++----------------- arch/x86/boot/header.S | 35 +++++++++++++++++++++++++---------- 3 files changed, 30 insertions(+), 28 deletions(-) (limited to 'arch/x86/boot') diff --git a/arch/x86/boot/Makefile b/arch/x86/boot/Makefile index b1ef9e489084..942f7dabfb1e 100644 --- a/arch/x86/boot/Makefile +++ b/arch/x86/boot/Makefile @@ -95,7 +95,7 @@ targets += voffset.h $(obj)/voffset.h: vmlinux FORCE $(call if_changed,voffset) -sed-zoffset := -e 's/^\([0-9a-fA-F]*\) [ABCDGRSTVW] \(startup_32\|startup_64\|efi32_stub_entry\|efi64_stub_entry\|efi_pe_entry\|input_data\|_end\|z_.*\)$$/\#define ZO_\2 0x\1/p' +sed-zoffset := -e 's/^\([0-9a-fA-F]*\) [ABCDGRSTVW] \(startup_32\|startup_64\|efi32_stub_entry\|efi64_stub_entry\|efi_pe_entry\|input_data\|_end\|_ehead\|_text\|z_.*\)$$/\#define ZO_\2 0x\1/p' quiet_cmd_zoffset = ZOFFSET $@ cmd_zoffset = $(NM) $< | sed -n $(sed-zoffset) > $@ diff --git a/arch/x86/boot/compressed/mkpiggy.c b/arch/x86/boot/compressed/mkpiggy.c index b980046c3329..f095ed9e7d3c 100644 --- a/arch/x86/boot/compressed/mkpiggy.c +++ b/arch/x86/boot/compressed/mkpiggy.c @@ -18,11 +18,10 @@ * * H. Peter Anvin * - * ----------------------------------------------------------------------- */ - -/* - * Compute the desired load offset from a compressed program; outputs - * a small assembly wrapper with the appropriate symbols defined. + * ----------------------------------------------------------------------- + * + * Outputs a small assembly wrapper with the appropriate symbols defined. + * */ #include @@ -35,7 +34,6 @@ int main(int argc, char *argv[]) { uint32_t olen; long ilen; - unsigned long offs; unsigned long run_size; FILE *f = NULL; int retval = 1; @@ -67,15 +65,6 @@ int main(int argc, char *argv[]) ilen = ftell(f); olen = get_unaligned_le32(&olen); - /* - * Now we have the input (compressed) and output (uncompressed) - * sizes, compute the necessary decompression offset... - */ - - offs = (olen > ilen) ? olen - ilen : 0; - offs += olen >> 12; /* Add 8 bytes for each 32K block */ - offs += 64*1024 + 128; /* Add 64K + 128 bytes slack */ - offs = (offs+4095) & ~4095; /* Round to a 4K boundary */ run_size = atoi(argv[2]); printf(".section \".rodata..compressed\",\"a\",@progbits\n"); @@ -83,8 +72,6 @@ int main(int argc, char *argv[]) printf("z_input_len = %lu\n", ilen); printf(".globl z_output_len\n"); printf("z_output_len = %lu\n", (unsigned long)olen); - printf(".globl z_extract_offset\n"); - printf("z_extract_offset = 0x%lx\n", offs); printf(".globl z_run_size\n"); printf("z_run_size = %lu\n", run_size); diff --git a/arch/x86/boot/header.S b/arch/x86/boot/header.S index fd85b9e4e953..3dd5be33aaa7 100644 --- a/arch/x86/boot/header.S +++ b/arch/x86/boot/header.S @@ -508,13 +508,10 @@ pref_address: .quad LOAD_PHYSICAL_ADDR # preferred load addr # To avoid problems with the compressed data's meta information an extra 18 # bytes are needed. Leading to the formula: # -# extra_bytes = (uncompressed_size >> 12) + 32768 + 18 + decompressor_size +# extra_bytes = (uncompressed_size >> 12) + 32768 + 18 # # Adding 8 bytes per 32K is a bit excessive but much easier to calculate. # Adding 32768 instead of 32767 just makes for round numbers. -# Adding the decompressor_size is necessary as it musht live after all -# of the data as well. Last I measured the decompressor is about 14K. -# 10K of actual data and 4K of bss. # # Above analysis is for decompressing gzip compressed kernel only. Up to # now 6 different decompressor are supported all together. And among them @@ -524,17 +521,35 @@ pref_address: .quad LOAD_PHYSICAL_ADDR # preferred load addr # the description in lib/decompressor_xxx.c for specific information. # # extra_bytes = (uncompressed_size >> 12) + 65536 + 128 -# -# Note that this calculation, which results in z_extract_offset (below), -# is currently generated in compressed/mkpiggy.c -#define ZO_INIT_SIZE (ZO__end - ZO_startup_32 + ZO_z_extract_offset) +#define ZO_z_extra_bytes ((ZO_z_output_len >> 12) + 65536 + 128) +#if ZO_z_output_len > ZO_z_input_len +# define ZO_z_extract_offset (ZO_z_output_len + ZO_z_extra_bytes - \ + ZO_z_input_len) +#else +# define ZO_z_extract_offset ZO_z_extra_bytes +#endif + +/* + * The extract_offset has to be bigger than ZO head section. Otherwise when + * the head code is running to move ZO to the end of the buffer, it will + * overwrite the head code itself. + */ +#if (ZO__ehead - ZO_startup_32) > ZO_z_extract_offset +# define ZO_z_min_extract_offset ((ZO__ehead - ZO_startup_32 + 4095) & ~4095) +#else +# define ZO_z_min_extract_offset ((ZO_z_extract_offset + 4095) & ~4095) +#endif + +#define ZO_INIT_SIZE (ZO__end - ZO_startup_32 + ZO_z_min_extract_offset) + #define VO_INIT_SIZE (VO__end - VO__text) #if ZO_INIT_SIZE > VO_INIT_SIZE -#define INIT_SIZE ZO_INIT_SIZE +# define INIT_SIZE ZO_INIT_SIZE #else -#define INIT_SIZE VO_INIT_SIZE +# define INIT_SIZE VO_INIT_SIZE #endif + init_size: .long INIT_SIZE # kernel initialization size handover_offset: .long 0 # Filled in by build.c -- cgit v1.2.3 From 67b6662559f7f77bcbd3ac67d09aaac11785f3c1 Mon Sep 17 00:00:00 2001 From: Yinghai Lu Date: Thu, 28 Apr 2016 17:09:06 -0700 Subject: x86/boot: Fix "run_size" calculation Currently, the "run_size" variable holds the total kernel size (size of code plus brk and bss) and is calculated via the shell script arch/x86/tools/calc_run_size.sh. It gets the file offset and mem size of the .bss and .brk sections from the vmlinux, and adds them as follows: run_size = $(( $offsetA + $sizeA + $sizeB )) However, this is not correct (it is too large). To illustrate, here's a walk-through of the script's calculation, compared to the correct way to find it. First, offsetA is found as the starting address of the first .bss or .brk section seen in the ELF file. The sizeA and sizeB values are the respective section sizes. [bhe@x1 linux]$ objdump -h vmlinux vmlinux: file format elf64-x86-64 Sections: Idx Name Size VMA LMA File off Algn 27 .bss 00170000 ffffffff81ec8000 0000000001ec8000 012c8000 2**12 ALLOC 28 .brk 00027000 ffffffff82038000 0000000002038000 012c8000 2**0 ALLOC Here, offsetA is 0x012c8000, with sizeA at 0x00170000 and sizeB at 0x00027000. The resulting run_size is 0x145f000: 0x012c8000 + 0x00170000 + 0x00027000 = 0x145f000 However, if we instead examine the ELF LOAD program headers, we see a different picture. [bhe@x1 linux]$ readelf -l vmlinux Elf file type is EXEC (Executable file) Entry point 0x1000000 There are 5 program headers, starting at offset 64 Program Headers: Type Offset VirtAddr PhysAddr FileSiz MemSiz Flags Align LOAD 0x0000000000200000 0xffffffff81000000 0x0000000001000000 0x0000000000b5e000 0x0000000000b5e000 R E 200000 LOAD 0x0000000000e00000 0xffffffff81c00000 0x0000000001c00000 0x0000000000145000 0x0000000000145000 RW 200000 LOAD 0x0000000001000000 0x0000000000000000 0x0000000001d45000 0x0000000000018158 0x0000000000018158 RW 200000 LOAD 0x000000000115e000 0xffffffff81d5e000 0x0000000001d5e000 0x000000000016a000 0x0000000000301000 RWE 200000 NOTE 0x000000000099bcac 0xffffffff8179bcac 0x000000000179bcac 0x00000000000001bc 0x00000000000001bc 4 Section to Segment mapping: Segment Sections... 00 .text .notes __ex_table .rodata __bug_table .pci_fixup .tracedata __ksymtab __ksymtab_gpl __ksymtab_strings __init_rodata __param __modver 01 .data .vvar 02 .data..percpu 03 .init.text .init.data .x86_cpu_dev.init .parainstructions .altinstructions .altinstr_replacement .iommu_table .apicdrivers .exit.text .smp_locks .bss .brk 04 .notes As mentioned, run_size needs to be the size of the running kernel including .bss and .brk. We can see from the Section/Segment mapping above that .bss and .brk are included in segment 03 (which corresponds to the final LOAD program header). To find the run_size, we calculate the end of the LOAD segment from its PhysAddr start (0x0000000001d5e000) and its MemSiz (0x0000000000301000), minus the physical load address of the kernel (the first LOAD segment's PhysAddr: 0x0000000001000000). The resulting run_size is 0x105f000: 0x0000000001d5e000 + 0x0000000000301000 - 0x0000000001000000 = 0x105f000 So, from this we can see that the existing run_size calculation is 0x400000 too high. And, as it turns out, the correct run_size is actually equal to VO_end - VO_text, which is certainly easier to calculate. _end: 0xffffffff8205f000 _text:0xffffffff81000000 0xffffffff8205f000 - 0xffffffff81000000 = 0x105f000 As a result, run_size is a simple constant, so we don't need to pass it around; we already have voffset.h for such things. We can share voffset.h between misc.c and header.S instead of getting run_size in other ways. This patch moves voffset.h creation code to boot/compressed/Makefile, and switches misc.c to use the VO_end - VO_text calculation for run_size. Dependence before: boot/header.S ==> boot/voffset.h ==> vmlinux boot/header.S ==> compressed/vmlinux ==> compressed/misc.c Dependence after: boot/header.S ==> compressed/vmlinux ==> compressed/misc.c ==> boot/voffset.h ==> vmlinux Signed-off-by: Yinghai Lu Signed-off-by: Baoquan He [ Rewrote the changelog. ] Signed-off-by: Kees Cook Cc: Andrew Morton Cc: Andy Lutomirski Cc: Andy Lutomirski Cc: Borislav Petkov Cc: Brian Gerst Cc: Dave Young Cc: Denys Vlasenko Cc: H. Peter Anvin Cc: Josh Triplett Cc: Junjie Mao Cc: Linus Torvalds Cc: Peter Zijlstra Cc: Thomas Gleixner Cc: Vivek Goyal Cc: lasse.collin@tukaani.org Fixes: e6023367d779 ("x86, kaslr: Prevent .bss from overlaping initrd") Link: http://lkml.kernel.org/r/1461888548-32439-5-git-send-email-keescook@chromium.org Signed-off-by: Ingo Molnar --- arch/x86/boot/Makefile | 11 +---------- arch/x86/boot/compressed/Makefile | 12 ++++++++++++ arch/x86/boot/compressed/misc.c | 3 +++ 3 files changed, 16 insertions(+), 10 deletions(-) (limited to 'arch/x86/boot') diff --git a/arch/x86/boot/Makefile b/arch/x86/boot/Makefile index 942f7dabfb1e..700a9c6e6159 100644 --- a/arch/x86/boot/Makefile +++ b/arch/x86/boot/Makefile @@ -86,15 +86,6 @@ $(obj)/vmlinux.bin: $(obj)/compressed/vmlinux FORCE SETUP_OBJS = $(addprefix $(obj)/,$(setup-y)) -sed-voffset := -e 's/^\([0-9a-fA-F]*\) [ABCDGRSTVW] \(_text\|_end\)$$/\#define VO_\2 0x\1/p' - -quiet_cmd_voffset = VOFFSET $@ - cmd_voffset = $(NM) $< | sed -n $(sed-voffset) > $@ - -targets += voffset.h -$(obj)/voffset.h: vmlinux FORCE - $(call if_changed,voffset) - sed-zoffset := -e 's/^\([0-9a-fA-F]*\) [ABCDGRSTVW] \(startup_32\|startup_64\|efi32_stub_entry\|efi64_stub_entry\|efi_pe_entry\|input_data\|_end\|_ehead\|_text\|z_.*\)$$/\#define ZO_\2 0x\1/p' quiet_cmd_zoffset = ZOFFSET $@ @@ -106,7 +97,7 @@ $(obj)/zoffset.h: $(obj)/compressed/vmlinux FORCE AFLAGS_header.o += -I$(obj) -$(obj)/header.o: $(obj)/voffset.h $(obj)/zoffset.h +$(obj)/header.o: $(obj)/zoffset.h LDFLAGS_setup.elf := -T $(obj)/setup.elf: $(src)/setup.ld $(SETUP_OBJS) FORCE diff --git a/arch/x86/boot/compressed/Makefile b/arch/x86/boot/compressed/Makefile index 542c92f5ca4f..45997805adad 100644 --- a/arch/x86/boot/compressed/Makefile +++ b/arch/x86/boot/compressed/Makefile @@ -57,6 +57,18 @@ LDFLAGS_vmlinux := -T hostprogs-y := mkpiggy HOST_EXTRACFLAGS += -I$(srctree)/tools/include +sed-voffset := -e 's/^\([0-9a-fA-F]*\) [ABCDGRSTVW] \(_text\|_end\)$$/\#define VO_\2 _AC(0x\1,UL)/p' + +quiet_cmd_voffset = VOFFSET $@ + cmd_voffset = $(NM) $< | sed -n $(sed-voffset) > $@ + +targets += ../voffset.h + +$(obj)/../voffset.h: vmlinux FORCE + $(call if_changed,voffset) + +$(obj)/misc.o: $(obj)/../voffset.h + vmlinux-objs-y := $(obj)/vmlinux.lds $(obj)/head_$(BITS).o $(obj)/misc.o \ $(obj)/string.o $(obj)/cmdline.o \ $(obj)/piggy.o $(obj)/cpuflags.o diff --git a/arch/x86/boot/compressed/misc.c b/arch/x86/boot/compressed/misc.c index 4b4605e94b3c..cda93d16ad4d 100644 --- a/arch/x86/boot/compressed/misc.c +++ b/arch/x86/boot/compressed/misc.c @@ -13,6 +13,7 @@ #include "misc.h" #include "../string.h" +#include "../voffset.h" /* * WARNING!! @@ -363,6 +364,8 @@ asmlinkage __visible void *extract_kernel(void *rmode, memptr heap, lines = boot_params->screen_info.orig_video_lines; cols = boot_params->screen_info.orig_video_cols; + run_size = VO__end - VO__text; + console_init(); debug_putstr("early console in extract_kernel\n"); -- cgit v1.2.3 From 4d2d542482205d3df1a0852751f5b004cc6390cc Mon Sep 17 00:00:00 2001 From: Yinghai Lu Date: Thu, 28 Apr 2016 17:09:07 -0700 Subject: x86/KASLR: Clean up unused code from old 'run_size' and rename it to 'kernel_total_size' Since 'run_size' is now calculated in misc.c, the old script and associated argument passing is no longer needed. This patch removes them, and renames 'run_size' to the more descriptive 'kernel_total_size'. Signed-off-by: Yinghai Lu Signed-off-by: Baoquan He [ Rewrote the changelog, renamed 'run_size' to 'kernel_total_size' ] Signed-off-by: Kees Cook Cc: Andrew Morton Cc: Andy Lutomirski Cc: Andy Lutomirski Cc: Ard Biesheuvel Cc: Borislav Petkov Cc: Brian Gerst Cc: Dave Young Cc: Denys Vlasenko Cc: H. Peter Anvin Cc: Josh Triplett Cc: Junjie Mao Cc: Linus Torvalds Cc: Peter Zijlstra Cc: Thomas Gleixner Cc: Vivek Goyal Cc: lasse.collin@tukaani.org Link: http://lkml.kernel.org/r/1461888548-32439-6-git-send-email-keescook@chromium.org Signed-off-by: Ingo Molnar --- arch/x86/boot/compressed/Makefile | 4 +--- arch/x86/boot/compressed/head_32.S | 3 +-- arch/x86/boot/compressed/head_64.S | 3 --- arch/x86/boot/compressed/misc.c | 11 ++++------- arch/x86/boot/compressed/mkpiggy.c | 10 ++-------- 5 files changed, 8 insertions(+), 23 deletions(-) (limited to 'arch/x86/boot') diff --git a/arch/x86/boot/compressed/Makefile b/arch/x86/boot/compressed/Makefile index 45997805adad..adef26d22224 100644 --- a/arch/x86/boot/compressed/Makefile +++ b/arch/x86/boot/compressed/Makefile @@ -121,10 +121,8 @@ suffix-$(CONFIG_KERNEL_XZ) := xz suffix-$(CONFIG_KERNEL_LZO) := lzo suffix-$(CONFIG_KERNEL_LZ4) := lz4 -RUN_SIZE = $(shell $(OBJDUMP) -h vmlinux | \ - $(CONFIG_SHELL) $(srctree)/arch/x86/tools/calc_run_size.sh) quiet_cmd_mkpiggy = MKPIGGY $@ - cmd_mkpiggy = $(obj)/mkpiggy $< $(RUN_SIZE) > $@ || ( rm -f $@ ; false ) + cmd_mkpiggy = $(obj)/mkpiggy $< > $@ || ( rm -f $@ ; false ) targets += piggy.S $(obj)/piggy.S: $(obj)/vmlinux.bin.$(suffix-y) $(obj)/mkpiggy FORCE diff --git a/arch/x86/boot/compressed/head_32.S b/arch/x86/boot/compressed/head_32.S index 8a95e2f845aa..1038524270e7 100644 --- a/arch/x86/boot/compressed/head_32.S +++ b/arch/x86/boot/compressed/head_32.S @@ -238,7 +238,6 @@ relocated: * Do the extraction, and jump to the new kernel.. */ /* push arguments for extract_kernel: */ - pushl $z_run_size /* size of kernel with .bss and .brk */ pushl $z_output_len /* decompressed length, end of relocs */ movl BP_init_size(%esi), %eax @@ -254,7 +253,7 @@ relocated: pushl %eax /* heap area */ pushl %esi /* real mode pointer */ call extract_kernel /* returns kernel location in %eax */ - addl $28, %esp + addl $24, %esp /* * Jump to the extracted kernel. diff --git a/arch/x86/boot/compressed/head_64.S b/arch/x86/boot/compressed/head_64.S index 09cdc0c3ee7e..7c047002950c 100644 --- a/arch/x86/boot/compressed/head_64.S +++ b/arch/x86/boot/compressed/head_64.S @@ -415,8 +415,6 @@ relocated: * Do the extraction, and jump to the new kernel.. */ pushq %rsi /* Save the real mode argument */ - movq $z_run_size, %r9 /* size of kernel with .bss and .brk */ - pushq %r9 movq %rsi, %rdi /* real mode address */ leaq boot_heap(%rip), %rsi /* malloc area for uncompression */ leaq input_data(%rip), %rdx /* input_data */ @@ -424,7 +422,6 @@ relocated: movq %rbp, %r8 /* output target address */ movq $z_output_len, %r9 /* decompressed length, end of relocs */ call extract_kernel /* returns kernel location in %rax */ - popq %r9 popq %rsi /* diff --git a/arch/x86/boot/compressed/misc.c b/arch/x86/boot/compressed/misc.c index cda93d16ad4d..bee6238e7cfc 100644 --- a/arch/x86/boot/compressed/misc.c +++ b/arch/x86/boot/compressed/misc.c @@ -340,9 +340,9 @@ asmlinkage __visible void *extract_kernel(void *rmode, memptr heap, unsigned char *input_data, unsigned long input_len, unsigned char *output, - unsigned long output_len, - unsigned long run_size) + unsigned long output_len) { + const unsigned long kernel_total_size = VO__end - VO__text; unsigned char *output_orig = output; /* Retain x86 boot parameters pointer passed from startup_32/64. */ @@ -364,8 +364,6 @@ asmlinkage __visible void *extract_kernel(void *rmode, memptr heap, lines = boot_params->screen_info.orig_video_lines; cols = boot_params->screen_info.orig_video_cols; - run_size = VO__end - VO__text; - console_init(); debug_putstr("early console in extract_kernel\n"); @@ -377,7 +375,7 @@ asmlinkage __visible void *extract_kernel(void *rmode, memptr heap, debug_putaddr(input_len); debug_putaddr(output); debug_putaddr(output_len); - debug_putaddr(run_size); + debug_putaddr(kernel_total_size); /* * The memory hole needed for the kernel is the larger of either @@ -385,8 +383,7 @@ asmlinkage __visible void *extract_kernel(void *rmode, memptr heap, * entire decompressed kernel plus .bss and .brk sections. */ output = choose_random_location(input_data, input_len, output, - output_len > run_size ? output_len - : run_size); + max(output_len, kernel_total_size)); /* Validate memory location choices. */ if ((unsigned long)output & (MIN_KERNEL_ALIGN - 1)) diff --git a/arch/x86/boot/compressed/mkpiggy.c b/arch/x86/boot/compressed/mkpiggy.c index f095ed9e7d3c..72bad2c8debe 100644 --- a/arch/x86/boot/compressed/mkpiggy.c +++ b/arch/x86/boot/compressed/mkpiggy.c @@ -34,13 +34,11 @@ int main(int argc, char *argv[]) { uint32_t olen; long ilen; - unsigned long run_size; FILE *f = NULL; int retval = 1; - if (argc < 3) { - fprintf(stderr, "Usage: %s compressed_file run_size\n", - argv[0]); + if (argc < 2) { + fprintf(stderr, "Usage: %s compressed_file\n", argv[0]); goto bail; } @@ -65,15 +63,11 @@ int main(int argc, char *argv[]) ilen = ftell(f); olen = get_unaligned_le32(&olen); - run_size = atoi(argv[2]); - printf(".section \".rodata..compressed\",\"a\",@progbits\n"); printf(".globl z_input_len\n"); printf("z_input_len = %lu\n", ilen); printf(".globl z_output_len\n"); printf("z_output_len = %lu\n", (unsigned long)olen); - printf(".globl z_run_size\n"); - printf("z_run_size = %lu\n", run_size); printf(".globl input_data, input_data_end\n"); printf("input_data:\n"); -- cgit v1.2.3 From 4abf061bf87bbd856c8d60199b2fba8b8f9b9fd6 Mon Sep 17 00:00:00 2001 From: Yinghai Lu Date: Thu, 28 Apr 2016 17:09:08 -0700 Subject: x86/boot: Correctly bounds-check relocations Relocation handling performs bounds checking on the resulting calculated addresses. The existing code uses output_len (VO size plus relocs size) as the max address. This is not right since the max_addr check should stop at the end of VO and exclude bss, brk, etc, which follows. The valid range should be VO [_text, __bss_start] in the loaded physical address space. This patch adds an export for __bss_start in voffset.h and uses it to set the correct limit for max_addr. Signed-off-by: Yinghai Lu [ Rewrote the changelog. ] Signed-off-by: Kees Cook Cc: Andrew Morton Cc: Andy Lutomirski Cc: Andy Lutomirski Cc: Baoquan He Cc: Borislav Petkov Cc: Brian Gerst Cc: Dave Young Cc: Denys Vlasenko Cc: H. Peter Anvin Cc: Linus Torvalds Cc: Peter Zijlstra Cc: Thomas Gleixner Cc: Vivek Goyal Cc: lasse.collin@tukaani.org Link: http://lkml.kernel.org/r/1461888548-32439-7-git-send-email-keescook@chromium.org Signed-off-by: Ingo Molnar --- arch/x86/boot/compressed/Makefile | 2 +- arch/x86/boot/compressed/misc.c | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) (limited to 'arch/x86/boot') diff --git a/arch/x86/boot/compressed/Makefile b/arch/x86/boot/compressed/Makefile index adef26d22224..75f2233b8414 100644 --- a/arch/x86/boot/compressed/Makefile +++ b/arch/x86/boot/compressed/Makefile @@ -57,7 +57,7 @@ LDFLAGS_vmlinux := -T hostprogs-y := mkpiggy HOST_EXTRACFLAGS += -I$(srctree)/tools/include -sed-voffset := -e 's/^\([0-9a-fA-F]*\) [ABCDGRSTVW] \(_text\|_end\)$$/\#define VO_\2 _AC(0x\1,UL)/p' +sed-voffset := -e 's/^\([0-9a-fA-F]*\) [ABCDGRSTVW] \(_text\|__bss_start\|_end\)$$/\#define VO_\2 _AC(0x\1,UL)/p' quiet_cmd_voffset = VOFFSET $@ cmd_voffset = $(NM) $< | sed -n $(sed-voffset) > $@ diff --git a/arch/x86/boot/compressed/misc.c b/arch/x86/boot/compressed/misc.c index bee6238e7cfc..8f0253d8c7ff 100644 --- a/arch/x86/boot/compressed/misc.c +++ b/arch/x86/boot/compressed/misc.c @@ -191,7 +191,7 @@ static void handle_relocations(void *output, unsigned long output_len) int *reloc; unsigned long delta, map, ptr; unsigned long min_addr = (unsigned long)output; - unsigned long max_addr = min_addr + output_len; + unsigned long max_addr = min_addr + (VO___bss_start - VO__text); /* * Calculate the delta between where vmlinux was linked to load -- cgit v1.2.3 From dc425a6e140bca99bdb4823e9909c9d9b8ba36b6 Mon Sep 17 00:00:00 2001 From: Kees Cook Date: Mon, 2 May 2016 15:51:00 -0700 Subject: x86/boot: Extract error reporting functions Currently to use warn(), a caller would need to include misc.h. However, this means they would get the (unavailable during compressed boot) gcc built-in memcpy family of functions. But since string.c is defining these memcpy functions for use by misc.c, we end up in a weird circular dependency. To break this loop, move the error reporting functions outside of misc.c with their own header so that they can be independently included by other sources. Since the screen-writing routines use memmove(), keep the low-level *_putstr() functions in misc.c. Signed-off-by: Kees Cook Cc: Andy Lutomirski Cc: Baoquan He Cc: Borislav Petkov Cc: Borislav Petkov Cc: Brian Gerst Cc: Denys Vlasenko Cc: H. Peter Anvin Cc: Lasse Collin Cc: Linus Torvalds Cc: One Thousand Gnomes Cc: Peter Zijlstra Cc: Thomas Gleixner Cc: Yinghai Lu Link: http://lkml.kernel.org/r/1462229461-3370-2-git-send-email-keescook@chromium.org Signed-off-by: Ingo Molnar --- arch/x86/boot/compressed/Makefile | 2 +- arch/x86/boot/compressed/error.c | 22 ++++++++++++++++++++++ arch/x86/boot/compressed/error.h | 7 +++++++ arch/x86/boot/compressed/kaslr.c | 1 + arch/x86/boot/compressed/misc.c | 18 +----------------- arch/x86/boot/compressed/misc.h | 1 - arch/x86/boot/compressed/string.c | 2 ++ 7 files changed, 34 insertions(+), 19 deletions(-) create mode 100644 arch/x86/boot/compressed/error.c create mode 100644 arch/x86/boot/compressed/error.h (limited to 'arch/x86/boot') diff --git a/arch/x86/boot/compressed/Makefile b/arch/x86/boot/compressed/Makefile index 75f2233b8414..77ce3a04d46e 100644 --- a/arch/x86/boot/compressed/Makefile +++ b/arch/x86/boot/compressed/Makefile @@ -70,7 +70,7 @@ $(obj)/../voffset.h: vmlinux FORCE $(obj)/misc.o: $(obj)/../voffset.h vmlinux-objs-y := $(obj)/vmlinux.lds $(obj)/head_$(BITS).o $(obj)/misc.o \ - $(obj)/string.o $(obj)/cmdline.o \ + $(obj)/string.o $(obj)/cmdline.o $(obj)/error.o \ $(obj)/piggy.o $(obj)/cpuflags.o vmlinux-objs-$(CONFIG_EARLY_PRINTK) += $(obj)/early_serial_console.o diff --git a/arch/x86/boot/compressed/error.c b/arch/x86/boot/compressed/error.c new file mode 100644 index 000000000000..6248740b68b5 --- /dev/null +++ b/arch/x86/boot/compressed/error.c @@ -0,0 +1,22 @@ +/* + * Callers outside of misc.c need access to the error reporting routines, + * but the *_putstr() functions need to stay in misc.c because of how + * memcpy() and memmove() are defined for the compressed boot environment. + */ +#include "misc.h" + +void warn(char *m) +{ + error_putstr("\n\n"); + error_putstr(m); + error_putstr("\n\n"); +} + +void error(char *m) +{ + warn(m); + error_putstr(" -- System halted"); + + while (1) + asm("hlt"); +} diff --git a/arch/x86/boot/compressed/error.h b/arch/x86/boot/compressed/error.h new file mode 100644 index 000000000000..2e59dac07f9e --- /dev/null +++ b/arch/x86/boot/compressed/error.h @@ -0,0 +1,7 @@ +#ifndef BOOT_COMPRESSED_ERROR_H +#define BOOT_COMPRESSED_ERROR_H + +void warn(char *m); +void error(char *m); + +#endif /* BOOT_COMPRESSED_ERROR_H */ diff --git a/arch/x86/boot/compressed/kaslr.c b/arch/x86/boot/compressed/kaslr.c index 8741a6d83bfe..f1818d95d726 100644 --- a/arch/x86/boot/compressed/kaslr.c +++ b/arch/x86/boot/compressed/kaslr.c @@ -10,6 +10,7 @@ * */ #include "misc.h" +#include "error.h" #include #include diff --git a/arch/x86/boot/compressed/misc.c b/arch/x86/boot/compressed/misc.c index 8f0253d8c7ff..9536d778149e 100644 --- a/arch/x86/boot/compressed/misc.c +++ b/arch/x86/boot/compressed/misc.c @@ -12,6 +12,7 @@ */ #include "misc.h" +#include "error.h" #include "../string.h" #include "../voffset.h" @@ -36,7 +37,6 @@ #define memmove memmove /* Functions used by the included decompressor code below. */ -static void error(char *m); void *memmove(void *dest, const void *src, size_t n); /* @@ -169,22 +169,6 @@ void __puthex(unsigned long value) } } -void warn(char *m) -{ - error_putstr("\n\n"); - error_putstr(m); - error_putstr("\n\n"); -} - -static void error(char *m) -{ - warn(m); - error_putstr(" -- System halted"); - - while (1) - asm("hlt"); -} - #if CONFIG_X86_NEED_RELOCS static void handle_relocations(void *output, unsigned long output_len) { diff --git a/arch/x86/boot/compressed/misc.h b/arch/x86/boot/compressed/misc.h index e75f6cf9caaf..9887e0d4aaeb 100644 --- a/arch/x86/boot/compressed/misc.h +++ b/arch/x86/boot/compressed/misc.h @@ -35,7 +35,6 @@ extern memptr free_mem_end_ptr; extern struct boot_params *boot_params; void __putstr(const char *s); void __puthex(unsigned long value); -void warn(char *m); #define error_putstr(__x) __putstr(__x) #define error_puthex(__x) __puthex(__x) diff --git a/arch/x86/boot/compressed/string.c b/arch/x86/boot/compressed/string.c index 2befeca1aada..faa4dc7dc66b 100644 --- a/arch/x86/boot/compressed/string.c +++ b/arch/x86/boot/compressed/string.c @@ -5,6 +5,8 @@ * trust the gcc built-in implementations as they may do unexpected things * (e.g. FPU ops) in the minimal decompression stub execution environment. */ +#include "error.h" + #include "../string.c" #ifdef CONFIG_X86_32 -- cgit v1.2.3 From 00ec2c37031eb1b1feda006c84748d126dc2ef27 Mon Sep 17 00:00:00 2001 From: Kees Cook Date: Mon, 2 May 2016 15:51:01 -0700 Subject: x86/boot: Warn on future overlapping memcpy() use If an overlapping memcpy() is ever attempted, we should at least report it, in case it might lead to problems, so it could be changed to a memmove() call instead. Suggested-by: Ingo Molnar Signed-off-by: Kees Cook Cc: Andy Lutomirski Cc: Baoquan He Cc: Borislav Petkov Cc: Borislav Petkov Cc: Brian Gerst Cc: Denys Vlasenko Cc: H. Peter Anvin Cc: Lasse Collin Cc: Linus Torvalds Cc: One Thousand Gnomes Cc: Peter Zijlstra Cc: Thomas Gleixner Cc: Yinghai Lu Link: http://lkml.kernel.org/r/1462229461-3370-3-git-send-email-keescook@chromium.org Signed-off-by: Ingo Molnar --- arch/x86/boot/compressed/string.c | 16 +++++++++++++--- 1 file changed, 13 insertions(+), 3 deletions(-) (limited to 'arch/x86/boot') diff --git a/arch/x86/boot/compressed/string.c b/arch/x86/boot/compressed/string.c index faa4dc7dc66b..cea140ce6b42 100644 --- a/arch/x86/boot/compressed/string.c +++ b/arch/x86/boot/compressed/string.c @@ -10,7 +10,7 @@ #include "../string.c" #ifdef CONFIG_X86_32 -void *memcpy(void *dest, const void *src, size_t n) +static void *__memcpy(void *dest, const void *src, size_t n) { int d0, d1, d2; asm volatile( @@ -24,7 +24,7 @@ void *memcpy(void *dest, const void *src, size_t n) return dest; } #else -void *memcpy(void *dest, const void *src, size_t n) +static void *__memcpy(void *dest, const void *src, size_t n) { long d0, d1, d2; asm volatile( @@ -55,10 +55,20 @@ void *memmove(void *dest, const void *src, size_t n) const unsigned char *s = src; if (d <= s || d - s >= n) - return memcpy(dest, src, n); + return __memcpy(dest, src, n); while (n-- > 0) d[n] = s[n]; return dest; } + +/* Detect and warn about potential overlaps, but handle them with memmove. */ +void *memcpy(void *dest, const void *src, size_t n) +{ + if (dest > src && dest - src < n) { + warn("Avoiding potentially unsafe overlapping memcpy()!"); + return memmove(dest, src, n); + } + return __memcpy(dest, src, n); +} -- cgit v1.2.3 From 2bc1cd39fa9f659956b25e500422e700a6cd4ec3 Mon Sep 17 00:00:00 2001 From: Kees Cook Date: Thu, 5 May 2016 15:13:46 -0700 Subject: x86/boot: Clean up pointer casting Currently extract_kernel() defines the input and output buffer pointers as "unsigned char *" since that's effectively what they are. It passes these to the decompressor routine and to the ELF parser, which both logically deal with buffer pointers too. There is some casting ("unsigned long") done to validate the numerical value of the pointers, but it is relatively limited. However, choose_random_location() operates almost exclusively on the numerical representation of these pointers, so it ended up carrying a lot of "unsigned long" casts. With the future physical/virtual split these casts were going to multiply, so this attempts to solve the problem by doing all the casting in choose_random_location()'s entry and return instead of through-out the code. Adjusts argument names to be more meaningful, and changes one us of "choice" to "output" to make the future physical/virtual split more clear (i.e. "choice" should be strictly a function return value and not used as an intermediate). Suggested-by: Ingo Molnar Signed-off-by: Kees Cook Cc: Andrew Morton Cc: Andy Lutomirski Cc: Andy Lutomirski Cc: Baoquan He Cc: Borislav Petkov Cc: Brian Gerst Cc: Dave Young Cc: Denys Vlasenko Cc: H. Peter Anvin Cc: Linus Torvalds Cc: Peter Zijlstra Cc: Thomas Gleixner Cc: Vivek Goyal Cc: Yinghai Lu Cc: kernel-hardening@lists.openwall.com Cc: lasse.collin@tukaani.org Link: http://lkml.kernel.org/r/1462486436-3707-2-git-send-email-keescook@chromium.org Signed-off-by: Ingo Molnar --- arch/x86/boot/compressed/kaslr.c | 20 ++++++++++++++------ arch/x86/boot/compressed/misc.h | 10 +++++----- 2 files changed, 19 insertions(+), 11 deletions(-) (limited to 'arch/x86/boot') diff --git a/arch/x86/boot/compressed/kaslr.c b/arch/x86/boot/compressed/kaslr.c index f1818d95d726..2072d82c1911 100644 --- a/arch/x86/boot/compressed/kaslr.c +++ b/arch/x86/boot/compressed/kaslr.c @@ -305,12 +305,21 @@ static unsigned long find_random_addr(unsigned long minimum, return slots_fetch_random(); } -unsigned char *choose_random_location(unsigned char *input, +unsigned char *choose_random_location(unsigned char *input_ptr, unsigned long input_size, - unsigned char *output, + unsigned char *output_ptr, unsigned long output_size) { - unsigned long choice = (unsigned long)output; + /* + * The caller of choose_random_location() uses unsigned char * for + * buffer pointers since it performs decompression, elf parsing, etc. + * Since this code examines addresses much more numerically, + * unsigned long is used internally here. Instead of sprinkling + * more casts into extract_kernel, do them here and at return. + */ + unsigned long input = (unsigned long)input_ptr; + unsigned long output = (unsigned long)output_ptr; + unsigned long choice = output; unsigned long random_addr; #ifdef CONFIG_HIBERNATION @@ -328,11 +337,10 @@ unsigned char *choose_random_location(unsigned char *input, boot_params->hdr.loadflags |= KASLR_FLAG; /* Record the various known unsafe memory ranges. */ - mem_avoid_init((unsigned long)input, input_size, - (unsigned long)output, output_size); + mem_avoid_init(input, input_size, output, output_size); /* Walk e820 and find a random address. */ - random_addr = find_random_addr(choice, output_size); + random_addr = find_random_addr(output, output_size); if (!random_addr) { warn("KASLR disabled: could not find suitable E820 region!"); goto out; diff --git a/arch/x86/boot/compressed/misc.h b/arch/x86/boot/compressed/misc.h index 9887e0d4aaeb..1f23d022d241 100644 --- a/arch/x86/boot/compressed/misc.h +++ b/arch/x86/boot/compressed/misc.h @@ -67,20 +67,20 @@ int cmdline_find_option_bool(const char *option); #if CONFIG_RANDOMIZE_BASE /* kaslr.c */ -unsigned char *choose_random_location(unsigned char *input, +unsigned char *choose_random_location(unsigned char *input_ptr, unsigned long input_size, - unsigned char *output, + unsigned char *output_ptr, unsigned long output_size); /* cpuflags.c */ bool has_cpuflag(int flag); #else static inline -unsigned char *choose_random_location(unsigned char *input, +unsigned char *choose_random_location(unsigned char *input_ptr, unsigned long input_size, - unsigned char *output, + unsigned char *output_ptr, unsigned long output_size) { - return output; + return output_ptr; } #endif -- cgit v1.2.3 From 9dc1969c24eff8b7d7a9a565d1047b624921ba06 Mon Sep 17 00:00:00 2001 From: Yinghai Lu Date: Thu, 5 May 2016 15:13:47 -0700 Subject: x86/KASLR: Consolidate mem_avoid[] entries The mem_avoid[] array is used to track positions that should be avoided (like the compressed kernel, decompression code, etc) when selecting a memory position for the randomly relocated kernel. Since ZO is now at the end of the decompression buffer and the decompression code (and its heap and stack) are at the front, we can safely consolidate the decompression entry, the heap entry, and the stack entry. The boot_params memory, however, could be elsewhere, so it should be explicitly included. Signed-off-by: Yinghai Lu Signed-off-by: Baoquan He [ Rwrote changelog, cleaned up code comments. ] Signed-off-by: Kees Cook Cc: Andrew Morton Cc: Andy Lutomirski Cc: Andy Lutomirski Cc: Borislav Petkov Cc: Brian Gerst Cc: Dave Young Cc: Denys Vlasenko Cc: H. Peter Anvin Cc: Linus Torvalds Cc: Peter Zijlstra Cc: Thomas Gleixner Cc: Vivek Goyal Cc: kernel-hardening@lists.openwall.com Cc: lasse.collin@tukaani.org Link: http://lkml.kernel.org/r/1462486436-3707-3-git-send-email-keescook@chromium.org Signed-off-by: Ingo Molnar --- arch/x86/boot/compressed/kaslr.c | 77 +++++++++++++++++++++++++++++++--------- 1 file changed, 61 insertions(+), 16 deletions(-) (limited to 'arch/x86/boot') diff --git a/arch/x86/boot/compressed/kaslr.c b/arch/x86/boot/compressed/kaslr.c index 2072d82c1911..6392f0041b8a 100644 --- a/arch/x86/boot/compressed/kaslr.c +++ b/arch/x86/boot/compressed/kaslr.c @@ -121,7 +121,7 @@ struct mem_vector { unsigned long size; }; -#define MEM_AVOID_MAX 5 +#define MEM_AVOID_MAX 4 static struct mem_vector mem_avoid[MEM_AVOID_MAX]; static bool mem_contains(struct mem_vector *region, struct mem_vector *item) @@ -146,22 +146,71 @@ static bool mem_overlaps(struct mem_vector *one, struct mem_vector *two) return true; } +/* + * In theroy, KASLR can put the kernel anywhere in area of [16M, 64T). The + * mem_avoid array is used to store the ranges that need to be avoided when + * KASLR searches for a an appropriate random address. We must avoid any + * regions that are unsafe to overlap with during decompression, and other + * things like the initrd, cmdline and boot_params. + * + * How to calculate the unsafe areas is detailed here, and is informed by + * the decompression calculations in header.S, and the diagram in misc.c. + * + * The compressed vmlinux (ZO) plus relocs and the run space of ZO can't be + * overwritten by decompression output. + * + * ZO sits against the end of the decompression buffer, so we can calculate + * where text, data, bss, etc of ZO are positioned. + * + * The follow are already enforced by the code: + * - init_size >= kernel_total_size + * - input + input_len >= output + output_len + * - kernel_total_size could be >= or < output_len + * + * From this, we can make several observations, illustrated by a diagram: + * - init_size >= kernel_total_size + * - input + input_len > output + output_len + * - kernel_total_size >= output_len + * + * 0 output input input+input_len output+init_size + * | | | | | + * | | | | | + * |-----|--------|--------|------------------|----|------------|----------| + * | | | + * | | | + * output+init_size-ZO_INIT_SIZE output+output_len output+kernel_total_size + * + * [output, output+init_size) is for the buffer for decompressing the + * compressed kernel (ZO). + * + * [output, output+kernel_total_size) is for the uncompressed kernel (VO) + * and its bss, brk, etc. + * [output, output+output_len) is VO plus relocs + * + * [output+init_size-ZO_INIT_SIZE, output+init_size) is the copied ZO. + * [input, input+input_len) is the copied compressed (VO (vmlinux after + * objcopy) plus relocs), not the ZO. + * + * [input+input_len, output+init_size) is [_text, _end) for ZO. That was the + * first range in mem_avoid, which included ZO's heap and stack. Also + * [input, input+input_size) need be put in mem_avoid array, but since it + * is adjacent to the first entry, they can be merged. This is how we get + * the first entry in mem_avoid[]. + */ static void mem_avoid_init(unsigned long input, unsigned long input_size, - unsigned long output, unsigned long output_size) + unsigned long output) { + unsigned long init_size = boot_params->hdr.init_size; u64 initrd_start, initrd_size; u64 cmd_line, cmd_line_size; - unsigned long unsafe, unsafe_len; char *ptr; /* * Avoid the region that is unsafe to overlap during - * decompression (see calculations in ../header.S). + * decompression. */ - unsafe_len = (output_size >> 12) + 32768 + 18; - unsafe = (unsigned long)input + input_size - unsafe_len; - mem_avoid[0].start = unsafe; - mem_avoid[0].size = unsafe_len; + mem_avoid[0].start = input; + mem_avoid[0].size = (output + init_size) - input; /* Avoid initrd. */ initrd_start = (u64)boot_params->ext_ramdisk_image << 32; @@ -181,13 +230,9 @@ static void mem_avoid_init(unsigned long input, unsigned long input_size, mem_avoid[2].start = cmd_line; mem_avoid[2].size = cmd_line_size; - /* Avoid heap memory. */ - mem_avoid[3].start = (unsigned long)free_mem_ptr; - mem_avoid[3].size = BOOT_HEAP_SIZE; - - /* Avoid stack memory. */ - mem_avoid[4].start = (unsigned long)free_mem_end_ptr; - mem_avoid[4].size = BOOT_STACK_SIZE; + /* Avoid params */ + mem_avoid[3].start = (unsigned long)boot_params; + mem_avoid[3].size = sizeof(*boot_params); } /* Does this memory vector overlap a known avoided area? */ @@ -337,7 +382,7 @@ unsigned char *choose_random_location(unsigned char *input_ptr, boot_params->hdr.loadflags |= KASLR_FLAG; /* Record the various known unsafe memory ranges. */ - mem_avoid_init(input, input_size, output, output_size); + mem_avoid_init(input, input_size, output); /* Walk e820 and find a random address. */ random_addr = find_random_addr(output, output_size); -- cgit v1.2.3 From 549f90db68c9f8e21a40ec21c8047441984e7164 Mon Sep 17 00:00:00 2001 From: Borislav Petkov Date: Fri, 6 May 2016 13:50:15 +0200 Subject: x86/boot: Simplify pointer casting in choose_random_location() Pass them down as 'unsigned long' directly and get rid of more casting and assignments. Signed-off-by: Borislav Petkov Cc: Andy Lutomirski Cc: Borislav Petkov Cc: Brian Gerst Cc: Denys Vlasenko Cc: H. Peter Anvin Cc: Kees Cook Cc: Linus Torvalds Cc: Peter Zijlstra Cc: Thomas Gleixner Cc: akpm@linux-foundation.org Cc: bhe@redhat.com Cc: dyoung@redhat.com Cc: linux-tip-commits@vger.kernel.org Cc: luto@kernel.org Cc: vgoyal@redhat.com Cc: yinghai@kernel.org Link: http://lkml.kernel.org/r/20160506115015.GI24044@pd.tnic Signed-off-by: Ingo Molnar --- arch/x86/boot/compressed/kaslr.c | 17 ++++++----------- arch/x86/boot/compressed/misc.c | 3 ++- arch/x86/boot/compressed/misc.h | 10 +++++----- 3 files changed, 13 insertions(+), 17 deletions(-) (limited to 'arch/x86/boot') diff --git a/arch/x86/boot/compressed/kaslr.c b/arch/x86/boot/compressed/kaslr.c index 6392f0041b8a..ff1227746c72 100644 --- a/arch/x86/boot/compressed/kaslr.c +++ b/arch/x86/boot/compressed/kaslr.c @@ -350,20 +350,15 @@ static unsigned long find_random_addr(unsigned long minimum, return slots_fetch_random(); } -unsigned char *choose_random_location(unsigned char *input_ptr, +/* + * Since this function examines addresses much more numerically, + * it takes the input and output pointers as 'unsigned long'. + */ +unsigned char *choose_random_location(unsigned long input, unsigned long input_size, - unsigned char *output_ptr, + unsigned long output, unsigned long output_size) { - /* - * The caller of choose_random_location() uses unsigned char * for - * buffer pointers since it performs decompression, elf parsing, etc. - * Since this code examines addresses much more numerically, - * unsigned long is used internally here. Instead of sprinkling - * more casts into extract_kernel, do them here and at return. - */ - unsigned long input = (unsigned long)input_ptr; - unsigned long output = (unsigned long)output_ptr; unsigned long choice = output; unsigned long random_addr; diff --git a/arch/x86/boot/compressed/misc.c b/arch/x86/boot/compressed/misc.c index 9536d778149e..f14db4e21654 100644 --- a/arch/x86/boot/compressed/misc.c +++ b/arch/x86/boot/compressed/misc.c @@ -366,7 +366,8 @@ asmlinkage __visible void *extract_kernel(void *rmode, memptr heap, * the entire decompressed kernel plus relocation table, or the * entire decompressed kernel plus .bss and .brk sections. */ - output = choose_random_location(input_data, input_len, output, + output = choose_random_location((unsigned long)input_data, input_len, + (unsigned long)output, max(output_len, kernel_total_size)); /* Validate memory location choices. */ diff --git a/arch/x86/boot/compressed/misc.h b/arch/x86/boot/compressed/misc.h index 1f23d022d241..0112005a3f23 100644 --- a/arch/x86/boot/compressed/misc.h +++ b/arch/x86/boot/compressed/misc.h @@ -67,20 +67,20 @@ int cmdline_find_option_bool(const char *option); #if CONFIG_RANDOMIZE_BASE /* kaslr.c */ -unsigned char *choose_random_location(unsigned char *input_ptr, +unsigned char *choose_random_location(unsigned long input_ptr, unsigned long input_size, - unsigned char *output_ptr, + unsigned long output_ptr, unsigned long output_size); /* cpuflags.c */ bool has_cpuflag(int flag); #else static inline -unsigned char *choose_random_location(unsigned char *input_ptr, +unsigned char *choose_random_location(unsigned long input_ptr, unsigned long input_size, - unsigned char *output_ptr, + unsigned long output_ptr, unsigned long output_size) { - return output_ptr; + return (unsigned char *)output_ptr; } #endif -- cgit v1.2.3 From ed09acde44e301b5c13755ab84821fa44b188b5e Mon Sep 17 00:00:00 2001 From: Kees Cook Date: Fri, 6 May 2016 12:44:59 -0700 Subject: x86/KASLR: Improve comments around the mem_avoid[] logic This attempts to improve the comments that describe how the memory range used for decompression is avoided. Additionally uses an enum instead of raw numbers for the mem_avoid[] indexing. Suggested-by: Borislav Petkov Signed-off-by: Kees Cook Cc: Andy Lutomirski Cc: Andy Lutomirski Cc: Baoquan He Cc: Borislav Petkov Cc: Brian Gerst Cc: Denys Vlasenko Cc: H. Peter Anvin Cc: Linus Torvalds Cc: Peter Zijlstra Cc: Thomas Gleixner Cc: Yinghai Lu Link: http://lkml.kernel.org/r/20160506194459.GA16480@www.outflux.net Signed-off-by: Ingo Molnar --- arch/x86/boot/compressed/kaslr.c | 126 ++++++++++++++++++++++++--------------- 1 file changed, 78 insertions(+), 48 deletions(-) (limited to 'arch/x86/boot') diff --git a/arch/x86/boot/compressed/kaslr.c b/arch/x86/boot/compressed/kaslr.c index ff1227746c72..8ef1186f792a 100644 --- a/arch/x86/boot/compressed/kaslr.c +++ b/arch/x86/boot/compressed/kaslr.c @@ -121,7 +121,14 @@ struct mem_vector { unsigned long size; }; -#define MEM_AVOID_MAX 4 +enum mem_avoid_index { + MEM_AVOID_ZO_RANGE = 0, + MEM_AVOID_INITRD, + MEM_AVOID_CMDLINE, + MEM_AVOID_BOOTPARAMS, + MEM_AVOID_MAX, +}; + static struct mem_vector mem_avoid[MEM_AVOID_MAX]; static bool mem_contains(struct mem_vector *region, struct mem_vector *item) @@ -147,55 +154,78 @@ static bool mem_overlaps(struct mem_vector *one, struct mem_vector *two) } /* - * In theroy, KASLR can put the kernel anywhere in area of [16M, 64T). The - * mem_avoid array is used to store the ranges that need to be avoided when - * KASLR searches for a an appropriate random address. We must avoid any + * In theory, KASLR can put the kernel anywhere in the range of [16M, 64T). + * The mem_avoid array is used to store the ranges that need to be avoided + * when KASLR searches for an appropriate random address. We must avoid any * regions that are unsafe to overlap with during decompression, and other - * things like the initrd, cmdline and boot_params. + * things like the initrd, cmdline and boot_params. This comment seeks to + * explain mem_avoid as clearly as possible since incorrect mem_avoid + * memory ranges lead to really hard to debug boot failures. + * + * The initrd, cmdline, and boot_params are trivial to identify for + * avoiding. The are MEM_AVOID_INITRD, MEM_AVOID_CMDLINE, and + * MEM_AVOID_BOOTPARAMS respectively below. + * + * What is not obvious how to avoid is the range of memory that is used + * during decompression (MEM_AVOID_ZO_RANGE below). This range must cover + * the compressed kernel (ZO) and its run space, which is used to extract + * the uncompressed kernel (VO) and relocs. + * + * ZO's full run size sits against the end of the decompression buffer, so + * we can calculate where text, data, bss, etc of ZO are positioned more + * easily. + * + * For additional background, the decompression calculations can be found + * in header.S, and the memory diagram is based on the one found in misc.c. + * + * The following conditions are already enforced by the image layouts and + * associated code: + * - input + input_size >= output + output_size + * - kernel_total_size <= init_size + * - kernel_total_size <= output_size (see Note below) + * - output + init_size >= output + output_size * - * How to calculate the unsafe areas is detailed here, and is informed by - * the decompression calculations in header.S, and the diagram in misc.c. + * (Note that kernel_total_size and output_size have no fundamental + * relationship, but output_size is passed to choose_random_location + * as a maximum of the two. The diagram is showing a case where + * kernel_total_size is larger than output_size, but this case is + * handled by bumping output_size.) * - * The compressed vmlinux (ZO) plus relocs and the run space of ZO can't be - * overwritten by decompression output. + * The above conditions can be illustrated by a diagram: * - * ZO sits against the end of the decompression buffer, so we can calculate - * where text, data, bss, etc of ZO are positioned. + * 0 output input input+input_size output+init_size + * | | | | | + * | | | | | + * |-----|--------|--------|--------------|-----------|--|-------------| + * | | | + * | | | + * output+init_size-ZO_INIT_SIZE output+output_size output+kernel_total_size * - * The follow are already enforced by the code: - * - init_size >= kernel_total_size - * - input + input_len >= output + output_len - * - kernel_total_size could be >= or < output_len + * [output, output+init_size) is the entire memory range used for + * extracting the compressed image. * - * From this, we can make several observations, illustrated by a diagram: - * - init_size >= kernel_total_size - * - input + input_len > output + output_len - * - kernel_total_size >= output_len + * [output, output+kernel_total_size) is the range needed for the + * uncompressed kernel (VO) and its run size (bss, brk, etc). * - * 0 output input input+input_len output+init_size - * | | | | | - * | | | | | - * |-----|--------|--------|------------------|----|------------|----------| - * | | | - * | | | - * output+init_size-ZO_INIT_SIZE output+output_len output+kernel_total_size + * [output, output+output_size) is VO plus relocs (i.e. the entire + * uncompressed payload contained by ZO). This is the area of the buffer + * written to during decompression. * - * [output, output+init_size) is for the buffer for decompressing the - * compressed kernel (ZO). + * [output+init_size-ZO_INIT_SIZE, output+init_size) is the worst-case + * range of the copied ZO and decompression code. (i.e. the range + * covered backwards of size ZO_INIT_SIZE, starting from output+init_size.) * - * [output, output+kernel_total_size) is for the uncompressed kernel (VO) - * and its bss, brk, etc. - * [output, output+output_len) is VO plus relocs + * [input, input+input_size) is the original copied compressed image (ZO) + * (i.e. it does not include its run size). This range must be avoided + * because it contains the data used for decompression. * - * [output+init_size-ZO_INIT_SIZE, output+init_size) is the copied ZO. - * [input, input+input_len) is the copied compressed (VO (vmlinux after - * objcopy) plus relocs), not the ZO. + * [input+input_size, output+init_size) is [_text, _end) for ZO. This + * range includes ZO's heap and stack, and must be avoided since it + * performs the decompression. * - * [input+input_len, output+init_size) is [_text, _end) for ZO. That was the - * first range in mem_avoid, which included ZO's heap and stack. Also - * [input, input+input_size) need be put in mem_avoid array, but since it - * is adjacent to the first entry, they can be merged. This is how we get - * the first entry in mem_avoid[]. + * Since the above two ranges need to be avoided and they are adjacent, + * they can be merged, resulting in: [input, output+init_size) which + * becomes the MEM_AVOID_ZO_RANGE below. */ static void mem_avoid_init(unsigned long input, unsigned long input_size, unsigned long output) @@ -209,16 +239,16 @@ static void mem_avoid_init(unsigned long input, unsigned long input_size, * Avoid the region that is unsafe to overlap during * decompression. */ - mem_avoid[0].start = input; - mem_avoid[0].size = (output + init_size) - input; + mem_avoid[MEM_AVOID_ZO_RANGE].start = input; + mem_avoid[MEM_AVOID_ZO_RANGE].size = (output + init_size) - input; /* Avoid initrd. */ initrd_start = (u64)boot_params->ext_ramdisk_image << 32; initrd_start |= boot_params->hdr.ramdisk_image; initrd_size = (u64)boot_params->ext_ramdisk_size << 32; initrd_size |= boot_params->hdr.ramdisk_size; - mem_avoid[1].start = initrd_start; - mem_avoid[1].size = initrd_size; + mem_avoid[MEM_AVOID_INITRD].start = initrd_start; + mem_avoid[MEM_AVOID_INITRD].size = initrd_size; /* Avoid kernel command line. */ cmd_line = (u64)boot_params->ext_cmd_line_ptr << 32; @@ -227,12 +257,12 @@ static void mem_avoid_init(unsigned long input, unsigned long input_size, ptr = (char *)(unsigned long)cmd_line; for (cmd_line_size = 0; ptr[cmd_line_size++]; ) ; - mem_avoid[2].start = cmd_line; - mem_avoid[2].size = cmd_line_size; + mem_avoid[MEM_AVOID_CMDLINE].start = cmd_line; + mem_avoid[MEM_AVOID_CMDLINE].size = cmd_line_size; - /* Avoid params */ - mem_avoid[3].start = (unsigned long)boot_params; - mem_avoid[3].size = sizeof(*boot_params); + /* Avoid boot parameters. */ + mem_avoid[MEM_AVOID_BOOTPARAMS].start = (unsigned long)boot_params; + mem_avoid[MEM_AVOID_BOOTPARAMS].size = sizeof(*boot_params); } /* Does this memory vector overlap a known avoided area? */ -- cgit v1.2.3 From 3a94707d7a7bb1eb82acae5fbc035247dd1ba8a5 Mon Sep 17 00:00:00 2001 From: Kees Cook Date: Fri, 6 May 2016 15:01:35 -0700 Subject: x86/KASLR: Build identity mappings on demand Currently KASLR only supports relocation in a small physical range (from 16M to 1G), due to using the initial kernel page table identity mapping. To support ranges above this, we need to have an identity mapping for the desired memory range before we can decompress (and later run) the kernel. 32-bit kernels already have the needed identity mapping. This patch adds identity mappings for the needed memory ranges on 64-bit kernels. This happens in two possible boot paths: If loaded via startup_32(), we need to set up the needed identity map. If loaded from a 64-bit bootloader, the bootloader will have already set up an identity mapping, and we'll start via the compressed kernel's startup_64(). In this case, the bootloader's page tables need to be avoided while selecting the new uncompressed kernel location. If not, the decompressor could overwrite them during decompression. To accomplish this, we could walk the pagetable and find every page that is used, and add them to mem_avoid, but this needs extra code and will require increasing the size of the mem_avoid array. Instead, we can create a new set of page tables for our own identity mapping instead. The pages for the new page table will come from the _pagetable section of the compressed kernel, which means they are already contained by in mem_avoid array. To do this, we reuse the code from the uncompressed kernel's identity mapping routines. The _pgtable will be shared by both the 32-bit and 64-bit paths to reduce init_size, as now the compressed kernel's _rodata to _end will contribute to init_size. To handle the possible mappings, we need to increase the existing page table buffer size: When booting via startup_64(), we need to cover the old VO, params, cmdline and uncompressed kernel. In an extreme case we could have them all beyond the 512G boundary, which needs (2+2)*4 pages with 2M mappings. And we'll need 2 for first 2M for VGA RAM. One more is needed for level4. This gets us to 19 pages total. When booting via startup_32(), KASLR could move the uncompressed kernel above 4G, so we need to create extra identity mappings, which should only need (2+2) pages at most when it is beyond the 512G boundary. So 19 pages is sufficient for this case as well. The resulting BOOT_*PGT_SIZE defines use the "_SIZE" suffix on their names to maintain logical consistency with the existing BOOT_HEAP_SIZE and BOOT_STACK_SIZE defines. This patch is based on earlier patches from Yinghai Lu and Baoquan He. Signed-off-by: Kees Cook Cc: Andrew Morton Cc: Andy Lutomirski Cc: Andy Lutomirski Cc: Baoquan He Cc: Borislav Petkov Cc: Borislav Petkov Cc: Brian Gerst Cc: Dave Young Cc: Denys Vlasenko Cc: H. Peter Anvin Cc: Jiri Kosina Cc: Linus Torvalds Cc: Peter Zijlstra Cc: Thomas Gleixner Cc: Vivek Goyal Cc: Yinghai Lu Cc: kernel-hardening@lists.openwall.com Cc: lasse.collin@tukaani.org Link: http://lkml.kernel.org/r/1462572095-11754-4-git-send-email-keescook@chromium.org Signed-off-by: Ingo Molnar --- arch/x86/boot/compressed/Makefile | 3 + arch/x86/boot/compressed/head_64.S | 4 +- arch/x86/boot/compressed/kaslr.c | 17 +++++ arch/x86/boot/compressed/misc.h | 11 +++ arch/x86/boot/compressed/pagetable.c | 135 +++++++++++++++++++++++++++++++++++ 5 files changed, 168 insertions(+), 2 deletions(-) create mode 100644 arch/x86/boot/compressed/pagetable.c (limited to 'arch/x86/boot') diff --git a/arch/x86/boot/compressed/Makefile b/arch/x86/boot/compressed/Makefile index 77ce3a04d46e..cfdd8c3f8af2 100644 --- a/arch/x86/boot/compressed/Makefile +++ b/arch/x86/boot/compressed/Makefile @@ -75,6 +75,9 @@ vmlinux-objs-y := $(obj)/vmlinux.lds $(obj)/head_$(BITS).o $(obj)/misc.o \ vmlinux-objs-$(CONFIG_EARLY_PRINTK) += $(obj)/early_serial_console.o vmlinux-objs-$(CONFIG_RANDOMIZE_BASE) += $(obj)/kaslr.o +ifdef CONFIG_X86_64 + vmlinux-objs-$(CONFIG_RANDOMIZE_BASE) += $(obj)/pagetable.o +endif $(obj)/eboot.o: KBUILD_CFLAGS += -fshort-wchar -mno-red-zone diff --git a/arch/x86/boot/compressed/head_64.S b/arch/x86/boot/compressed/head_64.S index 7c047002950c..0d80a7ad65cd 100644 --- a/arch/x86/boot/compressed/head_64.S +++ b/arch/x86/boot/compressed/head_64.S @@ -134,7 +134,7 @@ ENTRY(startup_32) /* Initialize Page tables to 0 */ leal pgtable(%ebx), %edi xorl %eax, %eax - movl $((4096*6)/4), %ecx + movl $(BOOT_INIT_PGT_SIZE/4), %ecx rep stosl /* Build Level 4 */ @@ -486,4 +486,4 @@ boot_stack_end: .section ".pgtable","a",@nobits .balign 4096 pgtable: - .fill 6*4096, 1, 0 + .fill BOOT_PGT_SIZE, 1, 0 diff --git a/arch/x86/boot/compressed/kaslr.c b/arch/x86/boot/compressed/kaslr.c index 8ef1186f792a..f82975b0f9d6 100644 --- a/arch/x86/boot/compressed/kaslr.c +++ b/arch/x86/boot/compressed/kaslr.c @@ -241,6 +241,8 @@ static void mem_avoid_init(unsigned long input, unsigned long input_size, */ mem_avoid[MEM_AVOID_ZO_RANGE].start = input; mem_avoid[MEM_AVOID_ZO_RANGE].size = (output + init_size) - input; + add_identity_map(mem_avoid[MEM_AVOID_ZO_RANGE].start, + mem_avoid[MEM_AVOID_ZO_RANGE].size); /* Avoid initrd. */ initrd_start = (u64)boot_params->ext_ramdisk_image << 32; @@ -249,6 +251,7 @@ static void mem_avoid_init(unsigned long input, unsigned long input_size, initrd_size |= boot_params->hdr.ramdisk_size; mem_avoid[MEM_AVOID_INITRD].start = initrd_start; mem_avoid[MEM_AVOID_INITRD].size = initrd_size; + /* No need to set mapping for initrd, it will be handled in VO. */ /* Avoid kernel command line. */ cmd_line = (u64)boot_params->ext_cmd_line_ptr << 32; @@ -259,10 +262,21 @@ static void mem_avoid_init(unsigned long input, unsigned long input_size, ; mem_avoid[MEM_AVOID_CMDLINE].start = cmd_line; mem_avoid[MEM_AVOID_CMDLINE].size = cmd_line_size; + add_identity_map(mem_avoid[MEM_AVOID_CMDLINE].start, + mem_avoid[MEM_AVOID_CMDLINE].size); /* Avoid boot parameters. */ mem_avoid[MEM_AVOID_BOOTPARAMS].start = (unsigned long)boot_params; mem_avoid[MEM_AVOID_BOOTPARAMS].size = sizeof(*boot_params); + add_identity_map(mem_avoid[MEM_AVOID_BOOTPARAMS].start, + mem_avoid[MEM_AVOID_BOOTPARAMS].size); + + /* We don't need to set a mapping for setup_data. */ + +#ifdef CONFIG_X86_VERBOSE_BOOTUP + /* Make sure video RAM can be used. */ + add_identity_map(0, PMD_SIZE); +#endif } /* Does this memory vector overlap a known avoided area? */ @@ -421,6 +435,9 @@ unsigned char *choose_random_location(unsigned long input, goto out; choice = random_addr; + + add_identity_map(choice, output_size); + finalize_identity_maps(); out: return (unsigned char *)choice; } diff --git a/arch/x86/boot/compressed/misc.h b/arch/x86/boot/compressed/misc.h index 0112005a3f23..b6fec1ff10e4 100644 --- a/arch/x86/boot/compressed/misc.h +++ b/arch/x86/boot/compressed/misc.h @@ -84,6 +84,17 @@ unsigned char *choose_random_location(unsigned long input_ptr, } #endif +#ifdef CONFIG_X86_64 +void add_identity_map(unsigned long start, unsigned long size); +void finalize_identity_maps(void); +extern unsigned char _pgtable[]; +#else +static inline void add_identity_map(unsigned long start, unsigned long size) +{ } +static inline void finalize_identity_maps(void) +{ } +#endif + #ifdef CONFIG_EARLY_PRINTK /* early_serial_console.c */ extern int early_serial_base; diff --git a/arch/x86/boot/compressed/pagetable.c b/arch/x86/boot/compressed/pagetable.c new file mode 100644 index 000000000000..3c99051566a9 --- /dev/null +++ b/arch/x86/boot/compressed/pagetable.c @@ -0,0 +1,135 @@ +/* + * This code is used on x86_64 to create page table identity mappings on + * demand by building up a new set of page tables (or appending to the + * existing ones), and then switching over to them when ready. + */ + +/* + * Since we're dealing with identity mappings, physical and virtual + * addresses are the same, so override these defines which are ultimately + * used by the headers in misc.h. + */ +#define __pa(x) ((unsigned long)(x)) +#define __va(x) ((void *)((unsigned long)(x))) + +#include "misc.h" + +/* These actually do the work of building the kernel identity maps. */ +#include +#include +#include "../../mm/ident_map.c" + +/* Used by pgtable.h asm code to force instruction serialization. */ +unsigned long __force_order; + +/* Used to track our page table allocation area. */ +struct alloc_pgt_data { + unsigned char *pgt_buf; + unsigned long pgt_buf_size; + unsigned long pgt_buf_offset; +}; + +/* + * Allocates space for a page table entry, using struct alloc_pgt_data + * above. Besides the local callers, this is used as the allocation + * callback in mapping_info below. + */ +static void *alloc_pgt_page(void *context) +{ + struct alloc_pgt_data *pages = (struct alloc_pgt_data *)context; + unsigned char *entry; + + /* Validate there is space available for a new page. */ + if (pages->pgt_buf_offset >= pages->pgt_buf_size) { + debug_putstr("out of pgt_buf in " __FILE__ "!?\n"); + debug_putaddr(pages->pgt_buf_offset); + debug_putaddr(pages->pgt_buf_size); + return NULL; + } + + entry = pages->pgt_buf + pages->pgt_buf_offset; + pages->pgt_buf_offset += PAGE_SIZE; + + return entry; +} + +/* Used to track our allocated page tables. */ +static struct alloc_pgt_data pgt_data; + +/* The top level page table entry pointer. */ +static unsigned long level4p; + +/* Locates and clears a region for a new top level page table. */ +static void prepare_level4(void) +{ + /* + * It should be impossible for this not to already be true, + * but since calling this a second time would rewind the other + * counters, let's just make sure this is reset too. + */ + pgt_data.pgt_buf_offset = 0; + + /* + * If we came here via startup_32(), cr3 will be _pgtable already + * and we must append to the existing area instead of entirely + * overwriting it. + */ + level4p = read_cr3(); + if (level4p == (unsigned long)_pgtable) { + debug_putstr("booted via startup_32()\n"); + pgt_data.pgt_buf = _pgtable + BOOT_INIT_PGT_SIZE; + pgt_data.pgt_buf_size = BOOT_PGT_SIZE - BOOT_INIT_PGT_SIZE; + memset(pgt_data.pgt_buf, 0, pgt_data.pgt_buf_size); + } else { + debug_putstr("booted via startup_64()\n"); + pgt_data.pgt_buf = _pgtable; + pgt_data.pgt_buf_size = BOOT_PGT_SIZE; + memset(pgt_data.pgt_buf, 0, pgt_data.pgt_buf_size); + level4p = (unsigned long)alloc_pgt_page(&pgt_data); + } +} + +/* + * Mapping information structure passed to kernel_ident_mapping_init(). + * Since this never changes, there's no reason to repeatedly fill it + * in on the stack when calling add_identity_map(). + */ +static struct x86_mapping_info mapping_info = { + .alloc_pgt_page = alloc_pgt_page, + .context = &pgt_data, + .pmd_flag = __PAGE_KERNEL_LARGE_EXEC, +}; + +/* + * Adds the specified range to what will become the new identity mappings. + * Once all ranges have been added, the new mapping is activated by calling + * finalize_identity_maps() below. + */ +void add_identity_map(unsigned long start, unsigned long size) +{ + unsigned long end = start + size; + + /* Make sure we have a top level page table ready to use. */ + if (!level4p) + prepare_level4(); + + /* Align boundary to 2M. */ + start = round_down(start, PMD_SIZE); + end = round_up(end, PMD_SIZE); + if (start >= end) + return; + + /* Build the mapping. */ + kernel_ident_mapping_init(&mapping_info, (pgd_t *)level4p, + start, end); +} + +/* + * This switches the page tables to the new level4 that has been built + * via calls to add_identity_map() above. If booted via startup_32(), + * this is effectively a no-op. + */ +void finalize_identity_maps(void) +{ + write_cr3(level4p); +} -- cgit v1.2.3 From 36a39ac967a548154a0fe44d71cb0063fa05010f Mon Sep 17 00:00:00 2001 From: Borislav Petkov Date: Sat, 7 May 2016 11:59:40 +0200 Subject: x86/boot: Comment what finalize_identity_maps() does So it is not really obvious that finalize_identity_maps() doesn't do any finalization but it *actually* writes CR3 with the ident PGD. Comment that at the call site. Signed-off-by: Borislav Petkov Cc: Andy Lutomirski Cc: Borislav Petkov Cc: Brian Gerst Cc: Denys Vlasenko Cc: H. Peter Anvin Cc: Kees Cook Cc: Linus Torvalds Cc: Peter Zijlstra Cc: Thomas Gleixner Cc: akpm@linux-foundation.org Cc: bhe@redhat.com Cc: dyoung@redhat.com Cc: jkosina@suse.cz Cc: linux-tip-commits@vger.kernel.org Cc: luto@kernel.org Cc: vgoyal@redhat.com Cc: yinghai@kernel.org Link: http://lkml.kernel.org/r/20160507100541.GA24613@pd.tnic Signed-off-by: Ingo Molnar --- arch/x86/boot/compressed/kaslr.c | 2 ++ 1 file changed, 2 insertions(+) (limited to 'arch/x86/boot') diff --git a/arch/x86/boot/compressed/kaslr.c b/arch/x86/boot/compressed/kaslr.c index f82975b0f9d6..f5a138c3fe96 100644 --- a/arch/x86/boot/compressed/kaslr.c +++ b/arch/x86/boot/compressed/kaslr.c @@ -437,6 +437,8 @@ unsigned char *choose_random_location(unsigned long input, choice = random_addr; add_identity_map(choice, output_size); + + /* This actually loads the identity pagetable on x86_64. */ finalize_identity_maps(); out: return (unsigned char *)choice; -- cgit v1.2.3 From 434a6c9f90f7ab5ade619455df01ef5ebea533ee Mon Sep 17 00:00:00 2001 From: Kees Cook Date: Mon, 9 May 2016 13:22:04 -0700 Subject: x86/KASLR: Initialize mapping_info every time As it turns out, mapping_info DOES need to be initialized every time, because pgt_data address could be changed during kernel relocation. So it can not be build time assigned. Without this, page tables were not being corrected updated, which could cause reboots when a physical address beyond 2G was chosen. Signed-off-by: Kees Cook Cc: Andrew Morton Cc: Andy Lutomirski Cc: Andy Lutomirski Cc: Baoquan He Cc: Borislav Petkov Cc: Borislav Petkov Cc: Brian Gerst Cc: Dave Young Cc: Denys Vlasenko Cc: H. Peter Anvin Cc: Linus Torvalds Cc: Peter Zijlstra Cc: Thomas Gleixner Cc: Vivek Goyal Cc: Yinghai Lu Cc: kernel-hardening@lists.openwall.com Cc: lasse.collin@tukaani.org Link: http://lkml.kernel.org/r/1462825332-10505-2-git-send-email-keescook@chromium.org Signed-off-by: Ingo Molnar --- arch/x86/boot/compressed/pagetable.c | 16 +++++----------- 1 file changed, 5 insertions(+), 11 deletions(-) (limited to 'arch/x86/boot') diff --git a/arch/x86/boot/compressed/pagetable.c b/arch/x86/boot/compressed/pagetable.c index 3c99051566a9..34b95df14e69 100644 --- a/arch/x86/boot/compressed/pagetable.c +++ b/arch/x86/boot/compressed/pagetable.c @@ -89,17 +89,6 @@ static void prepare_level4(void) } } -/* - * Mapping information structure passed to kernel_ident_mapping_init(). - * Since this never changes, there's no reason to repeatedly fill it - * in on the stack when calling add_identity_map(). - */ -static struct x86_mapping_info mapping_info = { - .alloc_pgt_page = alloc_pgt_page, - .context = &pgt_data, - .pmd_flag = __PAGE_KERNEL_LARGE_EXEC, -}; - /* * Adds the specified range to what will become the new identity mappings. * Once all ranges have been added, the new mapping is activated by calling @@ -107,6 +96,11 @@ static struct x86_mapping_info mapping_info = { */ void add_identity_map(unsigned long start, unsigned long size) { + struct x86_mapping_info mapping_info = { + .alloc_pgt_page = alloc_pgt_page, + .context = &pgt_data, + .pmd_flag = __PAGE_KERNEL_LARGE_EXEC, + }; unsigned long end = start + size; /* Make sure we have a top level page table ready to use. */ -- cgit v1.2.3 From cb18ef0da259db611fbf52806592fde5f469ae67 Mon Sep 17 00:00:00 2001 From: Kees Cook Date: Mon, 9 May 2016 13:22:05 -0700 Subject: x86/boot: Add missing file header comments There were some files with missing header comments. Since they are included from both compressed and regular kernels, make note of that. Also corrects a typo in the mem_avoid comments. Signed-off-by: Kees Cook Cc: Andrew Morton Cc: Andy Lutomirski Cc: Andy Lutomirski Cc: Baoquan He Cc: Borislav Petkov Cc: Borislav Petkov Cc: Brian Gerst Cc: Dave Young Cc: Denys Vlasenko Cc: H. Peter Anvin Cc: Linus Torvalds Cc: Peter Zijlstra Cc: Thomas Gleixner Cc: Vivek Goyal Cc: Yinghai Lu Cc: kernel-hardening@lists.openwall.com Cc: lasse.collin@tukaani.org Link: http://lkml.kernel.org/r/1462825332-10505-3-git-send-email-keescook@chromium.org Signed-off-by: Ingo Molnar --- arch/x86/boot/compressed/kaslr.c | 2 +- arch/x86/boot/early_serial_console.c | 4 ++++ 2 files changed, 5 insertions(+), 1 deletion(-) (limited to 'arch/x86/boot') diff --git a/arch/x86/boot/compressed/kaslr.c b/arch/x86/boot/compressed/kaslr.c index f5a138c3fe96..f15d7b8d1b16 100644 --- a/arch/x86/boot/compressed/kaslr.c +++ b/arch/x86/boot/compressed/kaslr.c @@ -163,7 +163,7 @@ static bool mem_overlaps(struct mem_vector *one, struct mem_vector *two) * memory ranges lead to really hard to debug boot failures. * * The initrd, cmdline, and boot_params are trivial to identify for - * avoiding. The are MEM_AVOID_INITRD, MEM_AVOID_CMDLINE, and + * avoiding. They are MEM_AVOID_INITRD, MEM_AVOID_CMDLINE, and * MEM_AVOID_BOOTPARAMS respectively below. * * What is not obvious how to avoid is the range of memory that is used diff --git a/arch/x86/boot/early_serial_console.c b/arch/x86/boot/early_serial_console.c index 45a07684bbab..f0b8d6d93164 100644 --- a/arch/x86/boot/early_serial_console.c +++ b/arch/x86/boot/early_serial_console.c @@ -1,3 +1,7 @@ +/* + * Serial port routines for use during early boot reporting. This code is + * included from both the compressed kernel and the regular kernel. + */ #include "boot.h" #define DEFAULT_SERIAL_PORT 0x3f8 /* ttyS0 */ -- cgit v1.2.3 From c401cf1524153f9c2ede7ab8ece403513925770a Mon Sep 17 00:00:00 2001 From: Baoquan He Date: Mon, 9 May 2016 13:22:06 -0700 Subject: x86/KASLR: Add 'struct slot_area' to manage random_addr slots In order to support KASLR moving the kernel anywhere in physical memory (which could be up to 64TB), we need to handle counting the potential randomization locations in a more efficient manner. In the worst case with 64TB, there could be roughly 32 * 1024 * 1024 randomization slots if CONFIG_PHYSICAL_ALIGN is 0x1000000. Currently the starting address of candidate positions is stored into the slots[] array, one at a time. This method would cost too much memory and it's also very inefficient to get and save the slot information into the slot array one by one. This patch introduces 'struct slot_area' to manage each contiguous region of randomization slots. Each slot_area will contain the starting address and how many available slots are in this area. As with the original code, the slot_areas[] will avoid the mem_avoid[] regions. Since setup_data is a linked list, it could contain an unknown number of memory regions to be avoided, which could cause us to fragment the contiguous memory that the slot_area array is tracking. In normal operation this level of fragmentation will be extremely rare, but we choose a suitably large value (100) for the array. If setup_data forces the slot_area array to become highly fragmented and there are more slots available beyond the first 100 found, the rest will be ignored for KASLR selection. The function store_slot_info() is used to calculate the number of slots available in the passed-in memory region and stores it into slot_areas[] after adjusting for alignment and size requirements. Signed-off-by: Baoquan He [ Rewrote changelog, squashed with new functions. ] Signed-off-by: Kees Cook Cc: Andrew Morton Cc: Andy Lutomirski Cc: Andy Lutomirski Cc: Borislav Petkov Cc: Borislav Petkov Cc: Brian Gerst Cc: Dave Young Cc: Denys Vlasenko Cc: H. Peter Anvin Cc: Linus Torvalds Cc: Peter Zijlstra Cc: Thomas Gleixner Cc: Vivek Goyal Cc: Yinghai Lu Cc: kernel-hardening@lists.openwall.com Cc: lasse.collin@tukaani.org Link: http://lkml.kernel.org/r/1462825332-10505-4-git-send-email-keescook@chromium.org Signed-off-by: Ingo Molnar --- arch/x86/boot/compressed/kaslr.c | 29 +++++++++++++++++++++++++++++ 1 file changed, 29 insertions(+) (limited to 'arch/x86/boot') diff --git a/arch/x86/boot/compressed/kaslr.c b/arch/x86/boot/compressed/kaslr.c index f15d7b8d1b16..81edf992277a 100644 --- a/arch/x86/boot/compressed/kaslr.c +++ b/arch/x86/boot/compressed/kaslr.c @@ -308,8 +308,37 @@ static bool mem_avoid_overlap(struct mem_vector *img) } static unsigned long slots[KERNEL_IMAGE_SIZE / CONFIG_PHYSICAL_ALIGN]; + +struct slot_area { + unsigned long addr; + int num; +}; + +#define MAX_SLOT_AREA 100 + +static struct slot_area slot_areas[MAX_SLOT_AREA]; + static unsigned long slot_max; +static unsigned long slot_area_index; + +static void store_slot_info(struct mem_vector *region, unsigned long image_size) +{ + struct slot_area slot_area; + + if (slot_area_index == MAX_SLOT_AREA) + return; + + slot_area.addr = region->start; + slot_area.num = (region->size - image_size) / + CONFIG_PHYSICAL_ALIGN + 1; + + if (slot_area.num > 0) { + slot_areas[slot_area_index++] = slot_area; + slot_max += slot_area.num; + } +} + static void slots_append(unsigned long addr) { /* Overflowing the slots list should be impossible. */ -- cgit v1.2.3 From 06486d6c97cebc2433a40a979f3849cd68184de9 Mon Sep 17 00:00:00 2001 From: Kees Cook Date: Mon, 9 May 2016 13:22:07 -0700 Subject: x86/KASLR: Return earliest overlap when avoiding regions In preparation for being able to detect where to split up contiguous memory regions that overlap with memory regions to avoid, we need to pass back what the earliest overlapping region was. This modifies the overlap checker to return that information. Based on a separate mem_min_overlap() implementation by Baoquan He. Signed-off-by: Kees Cook Cc: Andrew Morton Cc: Andy Lutomirski Cc: Andy Lutomirski Cc: Baoquan He Cc: Borislav Petkov Cc: Borislav Petkov Cc: Brian Gerst Cc: Dave Young Cc: Denys Vlasenko Cc: H. Peter Anvin Cc: Linus Torvalds Cc: Peter Zijlstra Cc: Thomas Gleixner Cc: Vivek Goyal Cc: Yinghai Lu Cc: kernel-hardening@lists.openwall.com Cc: lasse.collin@tukaani.org Link: http://lkml.kernel.org/r/1462825332-10505-5-git-send-email-keescook@chromium.org Signed-off-by: Ingo Molnar --- arch/x86/boot/compressed/kaslr.c | 29 ++++++++++++++++++++--------- 1 file changed, 20 insertions(+), 9 deletions(-) (limited to 'arch/x86/boot') diff --git a/arch/x86/boot/compressed/kaslr.c b/arch/x86/boot/compressed/kaslr.c index 81edf992277a..e55ebcbfa290 100644 --- a/arch/x86/boot/compressed/kaslr.c +++ b/arch/x86/boot/compressed/kaslr.c @@ -279,15 +279,24 @@ static void mem_avoid_init(unsigned long input, unsigned long input_size, #endif } -/* Does this memory vector overlap a known avoided area? */ -static bool mem_avoid_overlap(struct mem_vector *img) +/* + * Does this memory vector overlap a known avoided area? If so, record the + * overlap region with the lowest address. + */ +static bool mem_avoid_overlap(struct mem_vector *img, + struct mem_vector *overlap) { int i; struct setup_data *ptr; + unsigned long earliest = img->start + img->size; + bool is_overlapping = false; for (i = 0; i < MEM_AVOID_MAX; i++) { - if (mem_overlaps(img, &mem_avoid[i])) - return true; + if (mem_overlaps(img, &mem_avoid[i]) && + mem_avoid[i].start < earliest) { + *overlap = mem_avoid[i]; + is_overlapping = true; + } } /* Avoid all entries in the setup_data linked list. */ @@ -298,13 +307,15 @@ static bool mem_avoid_overlap(struct mem_vector *img) avoid.start = (unsigned long)ptr; avoid.size = sizeof(*ptr) + ptr->len; - if (mem_overlaps(img, &avoid)) - return true; + if (mem_overlaps(img, &avoid) && (avoid.start < earliest)) { + *overlap = avoid; + is_overlapping = true; + } ptr = (struct setup_data *)(unsigned long)ptr->next; } - return false; + return is_overlapping; } static unsigned long slots[KERNEL_IMAGE_SIZE / CONFIG_PHYSICAL_ALIGN]; @@ -361,7 +372,7 @@ static void process_e820_entry(struct e820entry *entry, unsigned long minimum, unsigned long image_size) { - struct mem_vector region, img; + struct mem_vector region, img, overlap; /* Skip non-RAM entries. */ if (entry->type != E820_RAM) @@ -400,7 +411,7 @@ static void process_e820_entry(struct e820entry *entry, for (img.start = region.start, img.size = image_size ; mem_contains(®ion, &img) ; img.start += CONFIG_PHYSICAL_ALIGN) { - if (mem_avoid_overlap(&img)) + if (mem_avoid_overlap(&img, &overlap)) continue; slots_append(img.start); } -- cgit v1.2.3 From 071a74930e60d1fa51207d71f00a35b4f9d4d179 Mon Sep 17 00:00:00 2001 From: Baoquan He Date: Mon, 9 May 2016 13:22:08 -0700 Subject: x86/KASLR: Add virtual address choosing function To support randomizing the kernel virtual address separately from the physical address, this patch adds find_random_virt_addr() to choose a slot anywhere between LOAD_PHYSICAL_ADDR and KERNEL_IMAGE_SIZE. Since this address is virtual, not physical, we can place the kernel anywhere in this region, as long as it is aligned and (in the case of kernel being larger than the slot size) placed with enough room to load the entire kernel image. For clarity and readability, find_random_addr() is renamed to find_random_phys_addr() and has "size" renamed to "image_size" to match find_random_virt_addr(). Signed-off-by: Baoquan He [ Rewrote changelog, refactored slot calculation for readability. ] [ Renamed find_random_phys_addr() and size argument. ] Signed-off-by: Kees Cook Cc: Andrew Morton Cc: Andy Lutomirski Cc: Andy Lutomirski Cc: Borislav Petkov Cc: Borislav Petkov Cc: Brian Gerst Cc: Dave Young Cc: Denys Vlasenko Cc: H. Peter Anvin Cc: Linus Torvalds Cc: Peter Zijlstra Cc: Thomas Gleixner Cc: Vivek Goyal Cc: Yinghai Lu Cc: kernel-hardening@lists.openwall.com Cc: lasse.collin@tukaani.org Link: http://lkml.kernel.org/r/1462825332-10505-6-git-send-email-keescook@chromium.org Signed-off-by: Ingo Molnar --- arch/x86/boot/compressed/kaslr.c | 32 ++++++++++++++++++++++++++++---- 1 file changed, 28 insertions(+), 4 deletions(-) (limited to 'arch/x86/boot') diff --git a/arch/x86/boot/compressed/kaslr.c b/arch/x86/boot/compressed/kaslr.c index e55ebcbfa290..016a4f48b19e 100644 --- a/arch/x86/boot/compressed/kaslr.c +++ b/arch/x86/boot/compressed/kaslr.c @@ -417,8 +417,8 @@ static void process_e820_entry(struct e820entry *entry, } } -static unsigned long find_random_addr(unsigned long minimum, - unsigned long size) +static unsigned long find_random_phys_addr(unsigned long minimum, + unsigned long image_size) { int i; unsigned long addr; @@ -428,12 +428,36 @@ static unsigned long find_random_addr(unsigned long minimum, /* Verify potential e820 positions, appending to slots list. */ for (i = 0; i < boot_params->e820_entries; i++) { - process_e820_entry(&boot_params->e820_map[i], minimum, size); + process_e820_entry(&boot_params->e820_map[i], minimum, + image_size); } return slots_fetch_random(); } +static unsigned long find_random_virt_addr(unsigned long minimum, + unsigned long image_size) +{ + unsigned long slots, random_addr; + + /* Make sure minimum is aligned. */ + minimum = ALIGN(minimum, CONFIG_PHYSICAL_ALIGN); + /* Align image_size for easy slot calculations. */ + image_size = ALIGN(image_size, CONFIG_PHYSICAL_ALIGN); + + /* + * There are how many CONFIG_PHYSICAL_ALIGN-sized slots + * that can hold image_size within the range of minimum to + * KERNEL_IMAGE_SIZE? + */ + slots = (KERNEL_IMAGE_SIZE - minimum - image_size) / + CONFIG_PHYSICAL_ALIGN + 1; + + random_addr = get_random_long() % slots; + + return random_addr * CONFIG_PHYSICAL_ALIGN + minimum; +} + /* * Since this function examines addresses much more numerically, * it takes the input and output pointers as 'unsigned long'. @@ -464,7 +488,7 @@ unsigned char *choose_random_location(unsigned long input, mem_avoid_init(input, input_size, output); /* Walk e820 and find a random address. */ - random_addr = find_random_addr(output, output_size); + random_addr = find_random_phys_addr(output, output_size); if (!random_addr) { warn("KASLR disabled: could not find suitable E820 region!"); goto out; -- cgit v1.2.3 From d2d3462f9f08da364c8fbd41e8e32229d610d49d Mon Sep 17 00:00:00 2001 From: Kees Cook Date: Mon, 9 May 2016 13:22:09 -0700 Subject: x86/KASLR: Clarify purpose of each get_random_long() KASLR will be calling get_random_long() twice, but the debug output won't distinguishing between them. This patch adds a report on when it is fetching the physical vs virtual address. With this, once the virtual offset is separate, the report changes from: KASLR using RDTSC... KASLR using RDTSC... into: Physical KASLR using RDTSC... Virtual KASLR using RDTSC... Signed-off-by: Kees Cook Cc: Andrew Morton Cc: Andy Lutomirski Cc: Andy Lutomirski Cc: Baoquan He Cc: Borislav Petkov Cc: Borislav Petkov Cc: Brian Gerst Cc: Dave Young Cc: Denys Vlasenko Cc: H. Peter Anvin Cc: Linus Torvalds Cc: Peter Zijlstra Cc: Thomas Gleixner Cc: Vivek Goyal Cc: Yinghai Lu Cc: kernel-hardening@lists.openwall.com Cc: lasse.collin@tukaani.org Link: http://lkml.kernel.org/r/1462825332-10505-7-git-send-email-keescook@chromium.org Signed-off-by: Ingo Molnar --- arch/x86/boot/compressed/kaslr.c | 9 +++++---- 1 file changed, 5 insertions(+), 4 deletions(-) (limited to 'arch/x86/boot') diff --git a/arch/x86/boot/compressed/kaslr.c b/arch/x86/boot/compressed/kaslr.c index 016a4f48b19e..cfeb0259ed81 100644 --- a/arch/x86/boot/compressed/kaslr.c +++ b/arch/x86/boot/compressed/kaslr.c @@ -72,7 +72,7 @@ static unsigned long get_random_boot(void) return hash; } -static unsigned long get_random_long(void) +static unsigned long get_random_long(const char *purpose) { #ifdef CONFIG_X86_64 const unsigned long mix_const = 0x5d6008cbf3848dd3UL; @@ -82,7 +82,8 @@ static unsigned long get_random_long(void) unsigned long raw, random = get_random_boot(); bool use_i8254 = true; - debug_putstr("KASLR using"); + debug_putstr(purpose); + debug_putstr(" KASLR using"); if (has_cpuflag(X86_FEATURE_RDRAND)) { debug_putstr(" RDRAND"); @@ -365,7 +366,7 @@ static unsigned long slots_fetch_random(void) if (slot_max == 0) return 0; - return slots[get_random_long() % slot_max]; + return slots[get_random_long("Physical") % slot_max]; } static void process_e820_entry(struct e820entry *entry, @@ -453,7 +454,7 @@ static unsigned long find_random_virt_addr(unsigned long minimum, slots = (KERNEL_IMAGE_SIZE - minimum - image_size) / CONFIG_PHYSICAL_ALIGN + 1; - random_addr = get_random_long() % slots; + random_addr = get_random_long("Virtual") % slots; return random_addr * CONFIG_PHYSICAL_ALIGN + minimum; } -- cgit v1.2.3