// SPDX-License-Identifier: GPL-2.0-only /* * powerpc code to implement the kexec_file_load syscall * * Copyright (C) 2004 Adam Litke (agl@us.ibm.com) * Copyright (C) 2004 IBM Corp. * Copyright (C) 2004,2005 Milton D Miller II, IBM Corporation * Copyright (C) 2005 R Sharada (sharada@in.ibm.com) * Copyright (C) 2006 Mohan Kumar M (mohan@in.ibm.com) * Copyright (C) 2020 IBM Corporation * * Based on kexec-tools' kexec-ppc64.c, fs2dt.c. * Heavily modified for the kernel by * Hari Bathini, IBM Corporation. */ #define pr_fmt(fmt) "kexec ranges: " fmt #include #include #include #include #include #include #include #include #include #if defined(CONFIG_KEXEC_FILE) || defined(CONFIG_CRASH_DUMP) /** * get_max_nr_ranges - Get the max no. of ranges crash_mem structure * could hold, given the size allocated for it. * @size: Allocation size of crash_mem structure. * * Returns the maximum no. of ranges. */ static inline unsigned int get_max_nr_ranges(size_t size) { return ((size - sizeof(struct crash_mem)) / sizeof(struct range)); } /** * get_mem_rngs_size - Get the allocated size of mem_rngs based on * max_nr_ranges and chunk size. * @mem_rngs: Memory ranges. * * Returns the maximum size of @mem_rngs. */ static inline size_t get_mem_rngs_size(struct crash_mem *mem_rngs) { size_t size; if (!mem_rngs) return 0; size = (sizeof(struct crash_mem) + (mem_rngs->max_nr_ranges * sizeof(struct range))); /* * Memory is allocated in size multiple of MEM_RANGE_CHUNK_SZ. * So, align to get the actual length. */ return ALIGN(size, MEM_RANGE_CHUNK_SZ); } /** * __add_mem_range - add a memory range to memory ranges list. * @mem_ranges: Range list to add the memory range to. * @base: Base address of the range to add. * @size: Size of the memory range to add. * * (Re)allocates memory, if needed. * * Returns 0 on success, negative errno on error. */ static int __add_mem_range(struct crash_mem **mem_ranges, u64 base, u64 size) { struct crash_mem *mem_rngs = *mem_ranges; if (!mem_rngs || (mem_rngs->nr_ranges == mem_rngs->max_nr_ranges)) { mem_rngs = realloc_mem_ranges(mem_ranges); if (!mem_rngs) return -ENOMEM; } mem_rngs->ranges[mem_rngs->nr_ranges].start = base; mem_rngs->ranges[mem_rngs->nr_ranges].end = base + size - 1; pr_debug("Added memory range [%#016llx - %#016llx] at index %d\n", base, base + size - 1, mem_rngs->nr_ranges); mem_rngs->nr_ranges++; return 0; } /** * __merge_memory_ranges - Merges the given memory ranges list. * @mem_rngs: Range list to merge. * * Assumes a sorted range list. * * Returns nothing. */ static void __merge_memory_ranges(struct crash_mem *mem_rngs) { struct range *ranges; int i, idx; if (!mem_rngs) return; idx = 0; ranges = &(mem_rngs->ranges[0]); for (i = 1; i < mem_rngs->nr_ranges; i++) { if (ranges[i].start <= (ranges[i-1].end + 1)) ranges[idx].end = ranges[i].end; else { idx++; if (i == idx) continue; ranges[idx] = ranges[i]; } } mem_rngs->nr_ranges = idx + 1; } /* cmp_func_t callback to sort ranges with sort() */ static int rngcmp(const void *_x, const void *_y) { const struct range *x = _x, *y = _y; if (x->start > y->start) return 1; if (x->start < y->start) return -1; return 0; } /** * sort_memory_ranges - Sorts the given memory ranges list. * @mem_rngs: Range list to sort. * @merge: If true, merge the list after sorting. * * Returns nothing. */ void sort_memory_ranges(struct crash_mem *mem_rngs, bool merge) { int i; if (!mem_rngs) return; /* Sort the ranges in-place */ sort(&(mem_rngs->ranges[0]), mem_rngs->nr_ranges, sizeof(mem_rngs->ranges[0]), rngcmp, NULL); if (merge) __merge_memory_ranges(mem_rngs); /* For debugging purpose */ pr_debug("Memory ranges:\n"); for (i = 0; i < mem_rngs->nr_ranges; i++) { pr_debug("\t[%03d][%#016llx - %#016llx]\n", i, mem_rngs->ranges[i].start, mem_rngs->ranges[i].end); } } /** * realloc_mem_ranges - reallocate mem_ranges with size incremented * by MEM_RANGE_CHUNK_SZ. Frees up the old memory, * if memory allocation fails. * @mem_ranges: Memory ranges to reallocate. * * Returns pointer to reallocated memory on success, NULL otherwise. */ struct crash_mem *realloc_mem_ranges(struct crash_mem **mem_ranges) { struct crash_mem *mem_rngs = *mem_ranges; unsigned int nr_ranges; size_t size; size = get_mem_rngs_size(mem_rngs); nr_ranges = mem_rngs ? mem_rngs->nr_ranges : 0; size += MEM_RANGE_CHUNK_SZ; mem_rngs = krealloc(*mem_ranges, size, GFP_KERNEL); if (!mem_rngs) { kfree(*mem_ranges); *mem_ranges = NULL; return NULL; } mem_rngs->nr_ranges = nr_ranges; mem_rngs->max_nr_ranges = get_max_nr_ranges(size); *mem_ranges = mem_rngs; return mem_rngs; } /** * add_mem_range - Updates existing memory range, if there is an overlap. * Else, adds a new memory range. * @mem_ranges: Range list to add the memory range to. * @base: Base address of the range to add. * @size: Size of the memory range to add. * * (Re)allocates memory, if needed. * * Returns 0 on success, negative errno on error. */ int add_mem_range(struct crash_mem **mem_ranges, u64 base, u64 size) { struct crash_mem *mem_rngs = *mem_ranges; u64 mstart, mend, end; unsigned int i; if (!size) return 0; end = base + size - 1; if (!mem_rngs || !(mem_rngs->nr_ranges)) return __add_mem_range(mem_ranges, base, size); for (i = 0; i < mem_rngs->nr_ranges; i++) { mstart = mem_rngs->ranges[i].start; mend = mem_rngs->ranges[i].end; if (base < mend && end > mstart) { if (base < mstart) mem_rngs->ranges[i].start = base; if (end > mend) mem_rngs->ranges[i].end = end; return 0; } } return __add_mem_range(mem_ranges, base, size); } #endif /* CONFIG_KEXEC_FILE || CONFIG_CRASH_DUMP */ #ifdef CONFIG_KEXEC_FILE /** * add_tce_mem_ranges - Adds tce-table range to the given memory ranges list. * @mem_ranges: Range list to add the memory range(s) to. * * Returns 0 on success, negative errno on error. */ static int add_tce_mem_ranges(struct crash_mem **mem_ranges) { struct device_node *dn = NULL; int ret = 0; for_each_node_by_type(dn, "pci") { u64 base; u32 size; ret = of_property_read_u64(dn, "linux,tce-base", &base); ret |= of_property_read_u32(dn, "linux,tce-size", &size); if (ret) { /* * It is ok to have pci nodes without tce. So, ignore * property does not exist error. */ if (ret == -EINVAL) { ret = 0; continue; } break; } ret = add_mem_range(mem_ranges, base, size); if (ret) break; } of_node_put(dn); return ret; } /** * add_initrd_mem_range - Adds initrd range to the given memory ranges list, * if the initrd was retained. * @mem_ranges: Range list to add the memory range to. * * Returns 0 on success, negative errno on error. */ static int add_initrd_mem_range(struct crash_mem **mem_ranges) { u64 base, end; int ret; /* This range means something, only if initrd was retained */ if (!strstr(saved_command_line, "retain_initrd")) return 0; ret = of_property_read_u64(of_chosen, "linux,initrd-start", &base); ret |= of_property_read_u64(of_chosen, "linux,initrd-end", &end); if (!ret) ret = add_mem_range(mem_ranges, base, end - base + 1); return ret; } /** * add_htab_mem_range - Adds htab range to the given memory ranges list, * if it exists * @mem_ranges: Range list to add the memory range to. * * Returns 0 on success, negative errno on error. */ static int add_htab_mem_range(struct crash_mem **mem_ranges) { #ifdef CONFIG_PPC_64S_HASH_MMU if (!htab_address) return 0; return add_mem_range(mem_ranges, __pa(htab_address), htab_size_bytes); #else return 0; #endif } /** * add_kernel_mem_range - Adds kernel text region to the given * memory ranges list. * @mem_ranges: Range list to add the memory range to. * * Returns 0 on success, negative errno on error. */ static int add_kernel_mem_range(struct crash_mem **mem_ranges) { return add_mem_range(mem_ranges, 0, __pa(_end)); } #endif /* CONFIG_KEXEC_FILE */ #if defined(CONFIG_KEXEC_FILE) || defined(CONFIG_CRASH_DUMP) /** * add_rtas_mem_range - Adds RTAS region to the given memory ranges list. * @mem_ranges: Range list to add the memory range to. * * Returns 0 on success, negative errno on error. */ static int add_rtas_mem_range(struct crash_mem **mem_ranges) { struct device_node *dn; u32 base, size; int ret = 0; dn = of_find_node_by_path("/rtas"); if (!dn) return 0; ret = of_property_read_u32(dn, "linux,rtas-base", &base); ret |= of_property_read_u32(dn, "rtas-size", &size); if (!ret) ret = add_mem_range(mem_ranges, base, size); of_node_put(dn); return ret; } /** * add_opal_mem_range - Adds OPAL region to the given memory ranges list. * @mem_ranges: Range list to add the memory range to. * * Returns 0 on success, negative errno on error. */ static int add_opal_mem_range(struct crash_mem **mem_ranges) { struct device_node *dn; u64 base, size; int ret; dn = of_find_node_by_path("/ibm,opal"); if (!dn) return 0; ret = of_property_read_u64(dn, "opal-base-address", &base); ret |= of_property_read_u64(dn, "opal-runtime-size", &size); if (!ret) ret = add_mem_range(mem_ranges, base, size); of_node_put(dn); return ret; } #endif /* CONFIG_KEXEC_FILE || CONFIG_CRASH_DUMP */ #ifdef CONFIG_KEXEC_FILE /** * add_reserved_mem_ranges - Adds "/reserved-ranges" regions exported by f/w * to the given memory ranges list. * @mem_ranges: Range list to add the memory ranges to. * * Returns 0 on success, negative errno on error. */ static int add_reserved_mem_ranges(struct crash_mem **mem_ranges) { int n_mem_addr_cells, n_mem_size_cells, i, len, cells, ret = 0; struct device_node *root = of_find_node_by_path("/"); const __be32 *prop; prop = of_get_property(root, "reserved-ranges", &len); n_mem_addr_cells = of_n_addr_cells(root); n_mem_size_cells = of_n_size_cells(root); of_node_put(root); if (!prop) return 0; cells = n_mem_addr_cells + n_mem_size_cells; /* Each reserved range is an (address,size) pair */ for (i = 0; i < (len / (sizeof(u32) * cells)); i++) { u64 base, size; base = of_read_number(prop + (i * cells), n_mem_addr_cells); size = of_read_number(prop + (i * cells) + n_mem_addr_cells, n_mem_size_cells); ret = add_mem_range(mem_ranges, base, size); if (ret) break; } return ret; } /** * get_reserved_memory_ranges - Get reserve memory ranges. This list includes * memory regions that should be added to the * memory reserve map to ensure the region is * protected from any mischief. * @mem_ranges: Range list to add the memory ranges to. * * Returns 0 on success, negative errno on error. */ int get_reserved_memory_ranges(struct crash_mem **mem_ranges) { int ret; ret = add_rtas_mem_range(mem_ranges); if (ret) goto out; ret = add_tce_mem_ranges(mem_ranges); if (ret) goto out; ret = add_reserved_mem_ranges(mem_ranges); out: if (ret) pr_err("Failed to setup reserved memory ranges\n"); return ret; } /** * get_exclude_memory_ranges - Get exclude memory ranges. This list includes * regions like opal/rtas, tce-table, initrd, * kernel, htab which should be avoided while * setting up kexec load segments. * @mem_ranges: Range list to add the memory ranges to. * * Returns 0 on success, negative errno on error. */ int get_exclude_memory_ranges(struct crash_mem **mem_ranges) { int ret; ret = add_tce_mem_ranges(mem_ranges); if (ret) goto out; ret = add_initrd_mem_range(mem_ranges); if (ret) goto out; ret = add_htab_mem_range(mem_ranges); if (ret) goto out; ret = add_kernel_mem_range(mem_ranges); if (ret) goto out; ret = add_rtas_mem_range(mem_ranges); if (ret) goto out; ret = add_opal_mem_range(mem_ranges); if (ret) goto out; ret = add_reserved_mem_ranges(mem_ranges); if (ret) goto out; /* exclude memory ranges should be sorted for easy lookup */ sort_memory_ranges(*mem_ranges, true); out: if (ret) pr_err("Failed to setup exclude memory ranges\n"); return ret; } #ifdef CONFIG_CRASH_DUMP /** * get_usable_memory_ranges - Get usable memory ranges. This list includes * regions like crashkernel, opal/rtas & tce-table, * that kdump kernel could use. * @mem_ranges: Range list to add the memory ranges to. * * Returns 0 on success, negative errno on error. */ int get_usable_memory_ranges(struct crash_mem **mem_ranges) { int ret; /* * Early boot failure observed on guests when low memory (first memory * block?) is not added to usable memory. So, add [0, crashk_res.end] * instead of [crashk_res.start, crashk_res.end] to workaround it. * Also, crashed kernel's memory must be added to reserve map to * avoid kdump kernel from using it. */ ret = add_mem_range(mem_ranges, 0, crashk_res.end + 1); if (ret) goto out; ret = add_rtas_mem_range(mem_ranges); if (ret) goto out; ret = add_opal_mem_range(mem_ranges); if (ret) goto out; ret = add_tce_mem_ranges(mem_ranges); out: if (ret) pr_err("Failed to setup usable memory ranges\n"); return ret; } #endif /* CONFIG_CRASH_DUMP */ #endif /* CONFIG_KEXEC_FILE */ #ifdef CONFIG_CRASH_DUMP /** * get_crash_memory_ranges - Get crash memory ranges. This list includes * first/crashing kernel's memory regions that * would be exported via an elfcore. * @mem_ranges: Range list to add the memory ranges to. * * Returns 0 on success, negative errno on error. */ int get_crash_memory_ranges(struct crash_mem **mem_ranges) { phys_addr_t base, end; struct crash_mem *tmem; u64 i; int ret; for_each_mem_range(i, &base, &end) { u64 size = end - base; /* Skip backup memory region, which needs a separate entry */ if (base == BACKUP_SRC_START) { if (size > BACKUP_SRC_SIZE) { base = BACKUP_SRC_END + 1; size -= BACKUP_SRC_SIZE; } else continue; } ret = add_mem_range(mem_ranges, base, size); if (ret) goto out; /* Try merging adjacent ranges before reallocation attempt */ if ((*mem_ranges)->nr_ranges == (*mem_ranges)->max_nr_ranges) sort_memory_ranges(*mem_ranges, true); } /* Reallocate memory ranges if there is no space to split ranges */ tmem = *mem_ranges; if (tmem && (tmem->nr_ranges == tmem->max_nr_ranges)) { tmem = realloc_mem_ranges(mem_ranges); if (!tmem) goto out; } /* Exclude crashkernel region */ ret = crash_exclude_mem_range(tmem, crashk_res.start, crashk_res.end); if (ret) goto out; /* * FIXME: For now, stay in parity with kexec-tools but if RTAS/OPAL * regions are exported to save their context at the time of * crash, they should actually be backed up just like the * first 64K bytes of memory. */ ret = add_rtas_mem_range(mem_ranges); if (ret) goto out; ret = add_opal_mem_range(mem_ranges); if (ret) goto out; /* create a separate program header for the backup region */ ret = add_mem_range(mem_ranges, BACKUP_SRC_START, BACKUP_SRC_SIZE); if (ret) goto out; sort_memory_ranges(*mem_ranges, false); out: if (ret) pr_err("Failed to setup crash memory ranges\n"); return ret; } /** * remove_mem_range - Removes the given memory range from the range list. * @mem_ranges: Range list to remove the memory range to. * @base: Base address of the range to remove. * @size: Size of the memory range to remove. * * (Re)allocates memory, if needed. * * Returns 0 on success, negative errno on error. */ int remove_mem_range(struct crash_mem **mem_ranges, u64 base, u64 size) { u64 end; int ret = 0; unsigned int i; u64 mstart, mend; struct crash_mem *mem_rngs = *mem_ranges; if (!size) return 0; /* * Memory range are stored as start and end address, use * the same format to do remove operation. */ end = base + size - 1; for (i = 0; i < mem_rngs->nr_ranges; i++) { mstart = mem_rngs->ranges[i].start; mend = mem_rngs->ranges[i].end; /* * Memory range to remove is not part of this range entry * in the memory range list */ if (!(base >= mstart && end <= mend)) continue; /* * Memory range to remove is equivalent to this entry in the * memory range list. Remove the range entry from the list. */ if (base == mstart && end == mend) { for (; i < mem_rngs->nr_ranges - 1; i++) { mem_rngs->ranges[i].start = mem_rngs->ranges[i+1].start; mem_rngs->ranges[i].end = mem_rngs->ranges[i+1].end; } mem_rngs->nr_ranges--; goto out; } /* * Start address of the memory range to remove and the * current memory range entry in the list is same. Just * move the start address of the current memory range * entry in the list to end + 1. */ else if (base == mstart) { mem_rngs->ranges[i].start = end + 1; goto out; } /* * End address of the memory range to remove and the * current memory range entry in the list is same. * Just move the end address of the current memory * range entry in the list to base - 1. */ else if (end == mend) { mem_rngs->ranges[i].end = base - 1; goto out; } /* * Memory range to remove is not at the edge of current * memory range entry. Split the current memory entry into * two half. */ else { mem_rngs->ranges[i].end = base - 1; size = mem_rngs->ranges[i].end - end; ret = add_mem_range(mem_ranges, end + 1, size); } } out: return ret; } #endif /* CONFIG_CRASH_DUMP */