diff options
author | Linus Torvalds <torvalds@linux-foundation.org> | 2021-06-29 17:29:11 -0700 |
---|---|---|
committer | Linus Torvalds <torvalds@linux-foundation.org> | 2021-06-29 17:29:11 -0700 |
commit | 65090f30ab791810a3dc840317e57df05018559c (patch) | |
tree | f417526656da37109777e89613e140ffc59228bc /mm/memory-failure.c | |
parent | 349a2d52ffe59b7a0c5876fa7ee9f3eaf188b830 (diff) | |
parent | 0ed950d1f28142ccd9a9453c60df87853530d778 (diff) | |
download | linux-65090f30ab791810a3dc840317e57df05018559c.tar.gz linux-65090f30ab791810a3dc840317e57df05018559c.tar.bz2 linux-65090f30ab791810a3dc840317e57df05018559c.zip |
Merge branch 'akpm' (patches from Andrew)
Merge misc updates from Andrew Morton:
"191 patches.
Subsystems affected by this patch series: kthread, ia64, scripts,
ntfs, squashfs, ocfs2, kernel/watchdog, and mm (gup, pagealloc, slab,
slub, kmemleak, dax, debug, pagecache, gup, swap, memcg, pagemap,
mprotect, bootmem, dma, tracing, vmalloc, kasan, initialization,
pagealloc, and memory-failure)"
* emailed patches from Andrew Morton <akpm@linux-foundation.org>: (191 commits)
mm,hwpoison: make get_hwpoison_page() call get_any_page()
mm,hwpoison: send SIGBUS with error virutal address
mm/page_alloc: split pcp->high across all online CPUs for cpuless nodes
mm/page_alloc: allow high-order pages to be stored on the per-cpu lists
mm: replace CONFIG_FLAT_NODE_MEM_MAP with CONFIG_FLATMEM
mm: replace CONFIG_NEED_MULTIPLE_NODES with CONFIG_NUMA
docs: remove description of DISCONTIGMEM
arch, mm: remove stale mentions of DISCONIGMEM
mm: remove CONFIG_DISCONTIGMEM
m68k: remove support for DISCONTIGMEM
arc: remove support for DISCONTIGMEM
arc: update comment about HIGHMEM implementation
alpha: remove DISCONTIGMEM and NUMA
mm/page_alloc: move free_the_page
mm/page_alloc: fix counting of managed_pages
mm/page_alloc: improve memmap_pages dbg msg
mm: drop SECTION_SHIFT in code comments
mm/page_alloc: introduce vm.percpu_pagelist_high_fraction
mm/page_alloc: limit the number of pages on PCP lists when reclaim is active
mm/page_alloc: scale the number of pages that are batch freed
...
Diffstat (limited to 'mm/memory-failure.c')
-rw-r--r-- | mm/memory-failure.c | 344 |
1 files changed, 258 insertions, 86 deletions
diff --git a/mm/memory-failure.c b/mm/memory-failure.c index 6f5f78885ab4..e5a1531f7f4e 100644 --- a/mm/memory-failure.c +++ b/mm/memory-failure.c @@ -56,6 +56,7 @@ #include <linux/kfifo.h> #include <linux/ratelimit.h> #include <linux/page-isolation.h> +#include <linux/pagewalk.h> #include "internal.h" #include "ras/ras_event.h" @@ -554,6 +555,148 @@ static void collect_procs(struct page *page, struct list_head *tokill, collect_procs_file(page, tokill, force_early); } +struct hwp_walk { + struct to_kill tk; + unsigned long pfn; + int flags; +}; + +static void set_to_kill(struct to_kill *tk, unsigned long addr, short shift) +{ + tk->addr = addr; + tk->size_shift = shift; +} + +static int check_hwpoisoned_entry(pte_t pte, unsigned long addr, short shift, + unsigned long poisoned_pfn, struct to_kill *tk) +{ + unsigned long pfn = 0; + + if (pte_present(pte)) { + pfn = pte_pfn(pte); + } else { + swp_entry_t swp = pte_to_swp_entry(pte); + + if (is_hwpoison_entry(swp)) + pfn = hwpoison_entry_to_pfn(swp); + } + + if (!pfn || pfn != poisoned_pfn) + return 0; + + set_to_kill(tk, addr, shift); + return 1; +} + +#ifdef CONFIG_TRANSPARENT_HUGEPAGE +static int check_hwpoisoned_pmd_entry(pmd_t *pmdp, unsigned long addr, + struct hwp_walk *hwp) +{ + pmd_t pmd = *pmdp; + unsigned long pfn; + unsigned long hwpoison_vaddr; + + if (!pmd_present(pmd)) + return 0; + pfn = pmd_pfn(pmd); + if (pfn <= hwp->pfn && hwp->pfn < pfn + HPAGE_PMD_NR) { + hwpoison_vaddr = addr + ((hwp->pfn - pfn) << PAGE_SHIFT); + set_to_kill(&hwp->tk, hwpoison_vaddr, PAGE_SHIFT); + return 1; + } + return 0; +} +#else +static int check_hwpoisoned_pmd_entry(pmd_t *pmdp, unsigned long addr, + struct hwp_walk *hwp) +{ + return 0; +} +#endif + +static int hwpoison_pte_range(pmd_t *pmdp, unsigned long addr, + unsigned long end, struct mm_walk *walk) +{ + struct hwp_walk *hwp = (struct hwp_walk *)walk->private; + int ret = 0; + pte_t *ptep; + spinlock_t *ptl; + + ptl = pmd_trans_huge_lock(pmdp, walk->vma); + if (ptl) { + ret = check_hwpoisoned_pmd_entry(pmdp, addr, hwp); + spin_unlock(ptl); + goto out; + } + + if (pmd_trans_unstable(pmdp)) + goto out; + + ptep = pte_offset_map_lock(walk->vma->vm_mm, pmdp, addr, &ptl); + for (; addr != end; ptep++, addr += PAGE_SIZE) { + ret = check_hwpoisoned_entry(*ptep, addr, PAGE_SHIFT, + hwp->pfn, &hwp->tk); + if (ret == 1) + break; + } + pte_unmap_unlock(ptep - 1, ptl); +out: + cond_resched(); + return ret; +} + +#ifdef CONFIG_HUGETLB_PAGE +static int hwpoison_hugetlb_range(pte_t *ptep, unsigned long hmask, + unsigned long addr, unsigned long end, + struct mm_walk *walk) +{ + struct hwp_walk *hwp = (struct hwp_walk *)walk->private; + pte_t pte = huge_ptep_get(ptep); + struct hstate *h = hstate_vma(walk->vma); + + return check_hwpoisoned_entry(pte, addr, huge_page_shift(h), + hwp->pfn, &hwp->tk); +} +#else +#define hwpoison_hugetlb_range NULL +#endif + +static struct mm_walk_ops hwp_walk_ops = { + .pmd_entry = hwpoison_pte_range, + .hugetlb_entry = hwpoison_hugetlb_range, +}; + +/* + * Sends SIGBUS to the current process with error info. + * + * This function is intended to handle "Action Required" MCEs on already + * hardware poisoned pages. They could happen, for example, when + * memory_failure() failed to unmap the error page at the first call, or + * when multiple local machine checks happened on different CPUs. + * + * MCE handler currently has no easy access to the error virtual address, + * so this function walks page table to find it. The returned virtual address + * is proper in most cases, but it could be wrong when the application + * process has multiple entries mapping the error page. + */ +static int kill_accessing_process(struct task_struct *p, unsigned long pfn, + int flags) +{ + int ret; + struct hwp_walk priv = { + .pfn = pfn, + }; + priv.tk.tsk = p; + + mmap_read_lock(p->mm); + ret = walk_page_range(p->mm, 0, TASK_SIZE, &hwp_walk_ops, + (void *)&priv); + if (ret == 1 && priv.tk.addr) + kill_proc(&priv.tk, pfn, flags); + mmap_read_unlock(p->mm); + return ret ? -EFAULT : -EHWPOISON; +} + static const char *action_name[] = { [MF_IGNORED] = "Ignored", [MF_FAILED] = "Failed", @@ -974,13 +1117,6 @@ static inline bool HWPoisonHandlable(struct page *page) return PageLRU(page) || __PageMovable(page); } -/** - * __get_hwpoison_page() - Get refcount for memory error handling: - * @page: raw error page (hit by memory error) - * - * Return: return 0 if failed to grab the refcount, otherwise true (some - * non-zero value.) - */ static int __get_hwpoison_page(struct page *page) { struct page *head = compound_head(page); @@ -1025,15 +1161,6 @@ static int __get_hwpoison_page(struct page *page) return 0; } -/* - * Safely get reference count of an arbitrary page. - * - * Returns 0 for a free page, 1 for an in-use page, - * -EIO for a page-type we cannot handle and -EBUSY if we raced with an - * allocation. - * We only incremented refcount in case the page was already in-use and it - * is a known type we can handle. - */ static int get_any_page(struct page *p, unsigned long flags) { int ret = 0, pass = 0; @@ -1043,50 +1170,77 @@ static int get_any_page(struct page *p, unsigned long flags) count_increased = true; try_again: - if (!count_increased && !__get_hwpoison_page(p)) { - if (page_count(p)) { - /* We raced with an allocation, retry. */ - if (pass++ < 3) - goto try_again; - ret = -EBUSY; - } else if (!PageHuge(p) && !is_free_buddy_page(p)) { - /* We raced with put_page, retry. */ + if (!count_increased) { + ret = __get_hwpoison_page(p); + if (!ret) { + if (page_count(p)) { + /* We raced with an allocation, retry. */ + if (pass++ < 3) + goto try_again; + ret = -EBUSY; + } else if (!PageHuge(p) && !is_free_buddy_page(p)) { + /* We raced with put_page, retry. */ + if (pass++ < 3) + goto try_again; + ret = -EIO; + } + goto out; + } else if (ret == -EBUSY) { + /* We raced with freeing huge page to buddy, retry. */ if (pass++ < 3) goto try_again; - ret = -EIO; + goto out; } + } + + if (PageHuge(p) || HWPoisonHandlable(p)) { + ret = 1; } else { - if (PageHuge(p) || HWPoisonHandlable(p)) { - ret = 1; - } else { - /* - * A page we cannot handle. Check whether we can turn - * it into something we can handle. - */ - if (pass++ < 3) { - put_page(p); - shake_page(p, 1); - count_increased = false; - goto try_again; - } + /* + * A page we cannot handle. Check whether we can turn + * it into something we can handle. + */ + if (pass++ < 3) { put_page(p); - ret = -EIO; + shake_page(p, 1); + count_increased = false; + goto try_again; } + put_page(p); + ret = -EIO; } - +out: return ret; } -static int get_hwpoison_page(struct page *p, unsigned long flags, - enum mf_flags ctxt) +/** + * get_hwpoison_page() - Get refcount for memory error handling + * @p: Raw error page (hit by memory error) + * @flags: Flags controlling behavior of error handling + * + * get_hwpoison_page() takes a page refcount of an error page to handle memory + * error on it, after checking that the error page is in a well-defined state + * (defined as a page-type we can successfully handle the memor error on it, + * such as LRU page and hugetlb page). + * + * Memory error handling could be triggered at any time on any type of page, + * so it's prone to race with typical memory management lifecycle (like + * allocation and free). So to avoid such races, get_hwpoison_page() takes + * extra care for the error page's state (as done in __get_hwpoison_page()), + * and has some retry logic in get_any_page(). + * + * Return: 0 on failure, + * 1 on success for in-use pages in a well-defined state, + * -EIO for pages on which we can not handle memory errors, + * -EBUSY when get_hwpoison_page() has raced with page lifecycle + * operations like allocation and free. + */ +static int get_hwpoison_page(struct page *p, unsigned long flags) { int ret; zone_pcp_disable(page_zone(p)); - if (ctxt == MF_SOFT_OFFLINE) - ret = get_any_page(p, flags); - else - ret = __get_hwpoison_page(p); + ret = get_any_page(p, flags); zone_pcp_enable(page_zone(p)); return ret; @@ -1267,32 +1421,41 @@ static int memory_failure_hugetlb(unsigned long pfn, int flags) if (TestSetPageHWPoison(head)) { pr_err("Memory failure: %#lx: already hardware poisoned\n", pfn); - return -EHWPOISON; + res = -EHWPOISON; + if (flags & MF_ACTION_REQUIRED) + res = kill_accessing_process(current, page_to_pfn(head), flags); + return res; } num_poisoned_pages_inc(); - if (!(flags & MF_COUNT_INCREASED) && !get_hwpoison_page(p, flags, 0)) { - /* - * Check "filter hit" and "race with other subpage." - */ - lock_page(head); - if (PageHWPoison(head)) { - if ((hwpoison_filter(p) && TestClearPageHWPoison(p)) - || (p != head && TestSetPageHWPoison(head))) { - num_poisoned_pages_dec(); - unlock_page(head); - return 0; + if (!(flags & MF_COUNT_INCREASED)) { + res = get_hwpoison_page(p, flags); + if (!res) { + /* + * Check "filter hit" and "race with other subpage." + */ + lock_page(head); + if (PageHWPoison(head)) { + if ((hwpoison_filter(p) && TestClearPageHWPoison(p)) + || (p != head && TestSetPageHWPoison(head))) { + num_poisoned_pages_dec(); + unlock_page(head); + return 0; + } } + unlock_page(head); + res = MF_FAILED; + if (!dissolve_free_huge_page(p) && take_page_off_buddy(p)) { + page_ref_inc(p); + res = MF_RECOVERED; + } + action_result(pfn, MF_MSG_FREE_HUGE, res); + return res == MF_RECOVERED ? 0 : -EBUSY; + } else if (res < 0) { + action_result(pfn, MF_MSG_UNKNOWN, MF_IGNORED); + return -EBUSY; } - unlock_page(head); - res = MF_FAILED; - if (!dissolve_free_huge_page(p) && take_page_off_buddy(p)) { - page_ref_inc(p); - res = MF_RECOVERED; - } - action_result(pfn, MF_MSG_FREE_HUGE, res); - return res == MF_RECOVERED ? 0 : -EBUSY; } lock_page(head); @@ -1476,6 +1639,8 @@ try_again: pr_err("Memory failure: %#lx: already hardware poisoned\n", pfn); res = -EHWPOISON; + if (flags & MF_ACTION_REQUIRED) + res = kill_accessing_process(current, pfn, flags); goto unlock_mutex; } @@ -1493,28 +1658,35 @@ try_again: * In fact it's dangerous to directly bump up page count from 0, * that may make page_ref_freeze()/page_ref_unfreeze() mismatch. */ - if (!(flags & MF_COUNT_INCREASED) && !get_hwpoison_page(p, flags, 0)) { - if (is_free_buddy_page(p)) { - if (take_page_off_buddy(p)) { - page_ref_inc(p); - res = MF_RECOVERED; - } else { - /* We lost the race, try again */ - if (retry) { - ClearPageHWPoison(p); - num_poisoned_pages_dec(); - retry = false; - goto try_again; + if (!(flags & MF_COUNT_INCREASED)) { + res = get_hwpoison_page(p, flags); + if (!res) { + if (is_free_buddy_page(p)) { + if (take_page_off_buddy(p)) { + page_ref_inc(p); + res = MF_RECOVERED; + } else { + /* We lost the race, try again */ + if (retry) { + ClearPageHWPoison(p); + num_poisoned_pages_dec(); + retry = false; + goto try_again; + } + res = MF_FAILED; } - res = MF_FAILED; + action_result(pfn, MF_MSG_BUDDY, res); + res = res == MF_RECOVERED ? 0 : -EBUSY; + } else { + action_result(pfn, MF_MSG_KERNEL_HIGH_ORDER, MF_IGNORED); + res = -EBUSY; } - action_result(pfn, MF_MSG_BUDDY, res); - res = res == MF_RECOVERED ? 0 : -EBUSY; - } else { - action_result(pfn, MF_MSG_KERNEL_HIGH_ORDER, MF_IGNORED); + goto unlock_mutex; + } else if (res < 0) { + action_result(pfn, MF_MSG_UNKNOWN, MF_IGNORED); res = -EBUSY; + goto unlock_mutex; } - goto unlock_mutex; } if (PageTransHuge(hpage)) { @@ -1792,7 +1964,7 @@ int unpoison_memory(unsigned long pfn) return 0; } - if (!get_hwpoison_page(p, flags, 0)) { + if (!get_hwpoison_page(p, flags)) { if (TestClearPageHWPoison(p)) num_poisoned_pages_dec(); unpoison_pr_info("Unpoison: Software-unpoisoned free page %#lx\n", @@ -2008,7 +2180,7 @@ int soft_offline_page(unsigned long pfn, int flags) retry: get_online_mems(); - ret = get_hwpoison_page(page, flags, MF_SOFT_OFFLINE); + ret = get_hwpoison_page(page, flags); put_online_mems(); if (ret > 0) { |