From 5898b43af954b83c4a4ee4ab85c4dbafa395822a Mon Sep 17 00:00:00 2001 From: Jane Chu Date: Mon, 16 May 2022 11:38:10 -0700 Subject: mce: fix set_mce_nospec to always unmap the whole page The set_memory_uc() approach doesn't work well in all cases. As Dan pointed out when "The VMM unmapped the bad page from guest physical space and passed the machine check to the guest." "The guest gets virtual #MC on an access to that page. When the guest tries to do set_memory_uc() and instructs cpa_flush() to do clean caches that results in taking another fault / exception perhaps because the VMM unmapped the page from the guest." Since the driver has special knowledge to handle NP or UC, mark the poisoned page with NP and let driver handle it when it comes down to repair. Please refer to discussions here for more details. https://lore.kernel.org/all/CAPcyv4hrXPb1tASBZUg-GgdVs0OOFKXMXLiHmktg_kFi7YBMyQ@mail.gmail.com/ Now since poisoned page is marked as not-present, in order to avoid writing to a not-present page and trigger kernel Oops, also fix pmem_do_write(). Fixes: 284ce4011ba6 ("x86/memory_failure: Introduce {set, clear}_mce_nospec()") Reviewed-by: Christoph Hellwig Reviewed-by: Dan Williams Signed-off-by: Jane Chu Acked-by: Tony Luck Link: https://lore.kernel.org/r/165272615484.103830.2563950688772226611.stgit@dwillia2-desk3.amr.corp.intel.com Signed-off-by: Dan Williams --- arch/x86/kernel/cpu/mce/core.c | 6 +++--- arch/x86/mm/pat/set_memory.c | 23 +++++++++++------------ 2 files changed, 14 insertions(+), 15 deletions(-) (limited to 'arch/x86') diff --git a/arch/x86/kernel/cpu/mce/core.c b/arch/x86/kernel/cpu/mce/core.c index 981496e6bc0e..fa67bb9d1afe 100644 --- a/arch/x86/kernel/cpu/mce/core.c +++ b/arch/x86/kernel/cpu/mce/core.c @@ -579,7 +579,7 @@ static int uc_decode_notifier(struct notifier_block *nb, unsigned long val, pfn = mce->addr >> PAGE_SHIFT; if (!memory_failure(pfn, 0)) { - set_mce_nospec(pfn, whole_page(mce)); + set_mce_nospec(pfn); mce->kflags |= MCE_HANDLED_UC; } @@ -1316,7 +1316,7 @@ static void kill_me_maybe(struct callback_head *cb) ret = memory_failure(p->mce_addr >> PAGE_SHIFT, flags); if (!ret) { - set_mce_nospec(p->mce_addr >> PAGE_SHIFT, p->mce_whole_page); + set_mce_nospec(p->mce_addr >> PAGE_SHIFT); sync_core(); return; } @@ -1342,7 +1342,7 @@ static void kill_me_never(struct callback_head *cb) p->mce_count = 0; pr_err("Kernel accessed poison in user space at %llx\n", p->mce_addr); if (!memory_failure(p->mce_addr >> PAGE_SHIFT, 0)) - set_mce_nospec(p->mce_addr >> PAGE_SHIFT, p->mce_whole_page); + set_mce_nospec(p->mce_addr >> PAGE_SHIFT); } static void queue_task_work(struct mce *m, char *msg, void (*func)(struct callback_head *)) diff --git a/arch/x86/mm/pat/set_memory.c b/arch/x86/mm/pat/set_memory.c index 0caf4b0edcbc..44f0d4260bd8 100644 --- a/arch/x86/mm/pat/set_memory.c +++ b/arch/x86/mm/pat/set_memory.c @@ -1925,14 +1925,9 @@ int set_memory_wb(unsigned long addr, int numpages) } EXPORT_SYMBOL(set_memory_wb); -/* - * Prevent speculative access to the page by either unmapping - * it (if we do not require access to any part of the page) or - * marking it uncacheable (if we want to try to retrieve data - * from non-poisoned lines in the page). - */ +/* Prevent speculative access to a page by marking it not-present */ #ifdef CONFIG_X86_64 -int set_mce_nospec(unsigned long pfn, bool unmap) +int set_mce_nospec(unsigned long pfn) { unsigned long decoy_addr; int rc; @@ -1954,19 +1949,23 @@ int set_mce_nospec(unsigned long pfn, bool unmap) */ decoy_addr = (pfn << PAGE_SHIFT) + (PAGE_OFFSET ^ BIT(63)); - if (unmap) - rc = set_memory_np(decoy_addr, 1); - else - rc = set_memory_uc(decoy_addr, 1); + rc = set_memory_np(decoy_addr, 1); if (rc) pr_warn("Could not invalidate pfn=0x%lx from 1:1 map\n", pfn); return rc; } +static int set_memory_present(unsigned long *addr, int numpages) +{ + return change_page_attr_set(addr, numpages, __pgprot(_PAGE_PRESENT), 0); +} + /* Restore full speculative operation to the pfn. */ int clear_mce_nospec(unsigned long pfn) { - return set_memory_wb((unsigned long) pfn_to_kaddr(pfn), 1); + unsigned long addr = (unsigned long) pfn_to_kaddr(pfn); + + return set_memory_present(&addr, 1); } EXPORT_SYMBOL_GPL(clear_mce_nospec); #endif /* CONFIG_X86_64 */ -- cgit v1.2.3