summaryrefslogtreecommitdiffstats
path: root/arch/arm64
diff options
context:
space:
mode:
authorMarc Zyngier <maz@kernel.org>2022-12-05 14:38:24 +0000
committerMarc Zyngier <maz@kernel.org>2022-12-05 14:38:24 +0000
commit382b5b87a97d0958d0ee7d1f2a56df3c6e431770 (patch)
tree2499fab7d5a7ed825b302f42010b777b06241ec5 /arch/arm64
parentcfa72993d13302fe958a1a58234a1a8efa72a5b8 (diff)
parenta4baf8d2639f24d4d31983ff67c01878e7a5393f (diff)
downloadlinux-stable-382b5b87a97d0958d0ee7d1f2a56df3c6e431770.tar.gz
linux-stable-382b5b87a97d0958d0ee7d1f2a56df3c6e431770.tar.bz2
linux-stable-382b5b87a97d0958d0ee7d1f2a56df3c6e431770.zip
Merge branch kvm-arm64/mte-map-shared into kvmarm-master/next
* kvm-arm64/mte-map-shared: : . : Update the MTE support to allow the VMM to use shared mappings : to back the memslots exposed to MTE-enabled guests. : : Patches courtesy of Catalin Marinas and Peter Collingbourne. : . : Fix a number of issues with MTE, such as races on the tags : being initialised vs the PG_mte_tagged flag as well as the : lack of support for VM_SHARED when KVM is involved. : : Patches from Catalin Marinas and Peter Collingbourne. : . Documentation: document the ABI changes for KVM_CAP_ARM_MTE KVM: arm64: permit all VM_MTE_ALLOWED mappings with MTE enabled KVM: arm64: unify the tests for VMAs in memslots when MTE is enabled arm64: mte: Lock a page for MTE tag initialisation mm: Add PG_arch_3 page flag KVM: arm64: Simplify the sanitise_mte_tags() logic arm64: mte: Fix/clarify the PG_mte_tagged semantics mm: Do not enable PG_arch_2 for all 64-bit architectures Signed-off-by: Marc Zyngier <maz@kernel.org>
Diffstat (limited to 'arch/arm64')
-rw-r--r--arch/arm64/Kconfig1
-rw-r--r--arch/arm64/include/asm/mte.h65
-rw-r--r--arch/arm64/include/asm/pgtable.h4
-rw-r--r--arch/arm64/kernel/cpufeature.c4
-rw-r--r--arch/arm64/kernel/elfcore.c2
-rw-r--r--arch/arm64/kernel/hibernate.c2
-rw-r--r--arch/arm64/kernel/mte.c21
-rw-r--r--arch/arm64/kvm/guest.c18
-rw-r--r--arch/arm64/kvm/mmu.c55
-rw-r--r--arch/arm64/mm/copypage.c7
-rw-r--r--arch/arm64/mm/fault.c4
-rw-r--r--arch/arm64/mm/mteswap.c16
12 files changed, 129 insertions, 70 deletions
diff --git a/arch/arm64/Kconfig b/arch/arm64/Kconfig
index 505c8a1ccbe0..cd93d0738425 100644
--- a/arch/arm64/Kconfig
+++ b/arch/arm64/Kconfig
@@ -1965,6 +1965,7 @@ config ARM64_MTE
depends on ARM64_PAN
select ARCH_HAS_SUBPAGE_FAULTS
select ARCH_USES_HIGH_VMA_FLAGS
+ select ARCH_USES_PG_ARCH_X
help
Memory Tagging (part of the ARMv8.5 Extensions) provides
architectural support for run-time, always-on detection of
diff --git a/arch/arm64/include/asm/mte.h b/arch/arm64/include/asm/mte.h
index 760c62f8e22f..20dd06d70af5 100644
--- a/arch/arm64/include/asm/mte.h
+++ b/arch/arm64/include/asm/mte.h
@@ -25,7 +25,7 @@ unsigned long mte_copy_tags_to_user(void __user *to, void *from,
unsigned long n);
int mte_save_tags(struct page *page);
void mte_save_page_tags(const void *page_addr, void *tag_storage);
-bool mte_restore_tags(swp_entry_t entry, struct page *page);
+void mte_restore_tags(swp_entry_t entry, struct page *page);
void mte_restore_page_tags(void *page_addr, const void *tag_storage);
void mte_invalidate_tags(int type, pgoff_t offset);
void mte_invalidate_tags_area(int type);
@@ -36,6 +36,58 @@ void mte_free_tag_storage(char *storage);
/* track which pages have valid allocation tags */
#define PG_mte_tagged PG_arch_2
+/* simple lock to avoid multiple threads tagging the same page */
+#define PG_mte_lock PG_arch_3
+
+static inline void set_page_mte_tagged(struct page *page)
+{
+ /*
+ * Ensure that the tags written prior to this function are visible
+ * before the page flags update.
+ */
+ smp_wmb();
+ set_bit(PG_mte_tagged, &page->flags);
+}
+
+static inline bool page_mte_tagged(struct page *page)
+{
+ bool ret = test_bit(PG_mte_tagged, &page->flags);
+
+ /*
+ * If the page is tagged, ensure ordering with a likely subsequent
+ * read of the tags.
+ */
+ if (ret)
+ smp_rmb();
+ return ret;
+}
+
+/*
+ * Lock the page for tagging and return 'true' if the page can be tagged,
+ * 'false' if already tagged. PG_mte_tagged is never cleared and therefore the
+ * locking only happens once for page initialisation.
+ *
+ * The page MTE lock state:
+ *
+ * Locked: PG_mte_lock && !PG_mte_tagged
+ * Unlocked: !PG_mte_lock || PG_mte_tagged
+ *
+ * Acquire semantics only if the page is tagged (returning 'false').
+ */
+static inline bool try_page_mte_tagging(struct page *page)
+{
+ if (!test_and_set_bit(PG_mte_lock, &page->flags))
+ return true;
+
+ /*
+ * The tags are either being initialised or may have been initialised
+ * already. Check if the PG_mte_tagged flag has been set or wait
+ * otherwise.
+ */
+ smp_cond_load_acquire(&page->flags, VAL & (1UL << PG_mte_tagged));
+
+ return false;
+}
void mte_zero_clear_page_tags(void *addr);
void mte_sync_tags(pte_t old_pte, pte_t pte);
@@ -56,6 +108,17 @@ size_t mte_probe_user_range(const char __user *uaddr, size_t size);
/* unused if !CONFIG_ARM64_MTE, silence the compiler */
#define PG_mte_tagged 0
+static inline void set_page_mte_tagged(struct page *page)
+{
+}
+static inline bool page_mte_tagged(struct page *page)
+{
+ return false;
+}
+static inline bool try_page_mte_tagging(struct page *page)
+{
+ return false;
+}
static inline void mte_zero_clear_page_tags(void *addr)
{
}
diff --git a/arch/arm64/include/asm/pgtable.h b/arch/arm64/include/asm/pgtable.h
index 71a1af42f0e8..8735ac1a1e32 100644
--- a/arch/arm64/include/asm/pgtable.h
+++ b/arch/arm64/include/asm/pgtable.h
@@ -1049,8 +1049,8 @@ static inline void arch_swap_invalidate_area(int type)
#define __HAVE_ARCH_SWAP_RESTORE
static inline void arch_swap_restore(swp_entry_t entry, struct folio *folio)
{
- if (system_supports_mte() && mte_restore_tags(entry, &folio->page))
- set_bit(PG_mte_tagged, &folio->flags);
+ if (system_supports_mte())
+ mte_restore_tags(entry, &folio->page);
}
#endif /* CONFIG_ARM64_MTE */
diff --git a/arch/arm64/kernel/cpufeature.c b/arch/arm64/kernel/cpufeature.c
index b3f37e2209ad..79d153d34206 100644
--- a/arch/arm64/kernel/cpufeature.c
+++ b/arch/arm64/kernel/cpufeature.c
@@ -2074,8 +2074,10 @@ static void cpu_enable_mte(struct arm64_cpu_capabilities const *cap)
* Clear the tags in the zero page. This needs to be done via the
* linear map which has the Tagged attribute.
*/
- if (!test_and_set_bit(PG_mte_tagged, &ZERO_PAGE(0)->flags))
+ if (try_page_mte_tagging(ZERO_PAGE(0))) {
mte_clear_page_tags(lm_alias(empty_zero_page));
+ set_page_mte_tagged(ZERO_PAGE(0));
+ }
kasan_init_hw_tags_cpu();
}
diff --git a/arch/arm64/kernel/elfcore.c b/arch/arm64/kernel/elfcore.c
index 27ef7ad3ffd2..353009d7f307 100644
--- a/arch/arm64/kernel/elfcore.c
+++ b/arch/arm64/kernel/elfcore.c
@@ -47,7 +47,7 @@ static int mte_dump_tag_range(struct coredump_params *cprm,
* Pages mapped in user space as !pte_access_permitted() (e.g.
* PROT_EXEC only) may not have the PG_mte_tagged flag set.
*/
- if (!test_bit(PG_mte_tagged, &page->flags)) {
+ if (!page_mte_tagged(page)) {
put_page(page);
dump_skip(cprm, MTE_PAGE_TAG_STORAGE);
continue;
diff --git a/arch/arm64/kernel/hibernate.c b/arch/arm64/kernel/hibernate.c
index af5df48ba915..788597a6b6a2 100644
--- a/arch/arm64/kernel/hibernate.c
+++ b/arch/arm64/kernel/hibernate.c
@@ -271,7 +271,7 @@ static int swsusp_mte_save_tags(void)
if (!page)
continue;
- if (!test_bit(PG_mte_tagged, &page->flags))
+ if (!page_mte_tagged(page))
continue;
ret = save_tags(page, pfn);
diff --git a/arch/arm64/kernel/mte.c b/arch/arm64/kernel/mte.c
index 7467217c1eaf..f5bcb0dc6267 100644
--- a/arch/arm64/kernel/mte.c
+++ b/arch/arm64/kernel/mte.c
@@ -41,19 +41,17 @@ static void mte_sync_page_tags(struct page *page, pte_t old_pte,
if (check_swap && is_swap_pte(old_pte)) {
swp_entry_t entry = pte_to_swp_entry(old_pte);
- if (!non_swap_entry(entry) && mte_restore_tags(entry, page))
- return;
+ if (!non_swap_entry(entry))
+ mte_restore_tags(entry, page);
}
if (!pte_is_tagged)
return;
- /*
- * Test PG_mte_tagged again in case it was racing with another
- * set_pte_at().
- */
- if (!test_and_set_bit(PG_mte_tagged, &page->flags))
+ if (try_page_mte_tagging(page)) {
mte_clear_page_tags(page_address(page));
+ set_page_mte_tagged(page);
+ }
}
void mte_sync_tags(pte_t old_pte, pte_t pte)
@@ -69,9 +67,11 @@ void mte_sync_tags(pte_t old_pte, pte_t pte)
/* if PG_mte_tagged is set, tags have already been initialised */
for (i = 0; i < nr_pages; i++, page++) {
- if (!test_bit(PG_mte_tagged, &page->flags))
+ if (!page_mte_tagged(page)) {
mte_sync_page_tags(page, old_pte, check_swap,
pte_is_tagged);
+ set_page_mte_tagged(page);
+ }
}
/* ensure the tags are visible before the PTE is set */
@@ -96,8 +96,7 @@ int memcmp_pages(struct page *page1, struct page *page2)
* pages is tagged, set_pte_at() may zero or change the tags of the
* other page via mte_sync_tags().
*/
- if (test_bit(PG_mte_tagged, &page1->flags) ||
- test_bit(PG_mte_tagged, &page2->flags))
+ if (page_mte_tagged(page1) || page_mte_tagged(page2))
return addr1 != addr2;
return ret;
@@ -454,7 +453,7 @@ static int __access_remote_tags(struct mm_struct *mm, unsigned long addr,
put_page(page);
break;
}
- WARN_ON_ONCE(!test_bit(PG_mte_tagged, &page->flags));
+ WARN_ON_ONCE(!page_mte_tagged(page));
/* limit access to the end of the page */
offset = offset_in_page(addr);
diff --git a/arch/arm64/kvm/guest.c b/arch/arm64/kvm/guest.c
index 2ff13a3f8479..5626ddb540ce 100644
--- a/arch/arm64/kvm/guest.c
+++ b/arch/arm64/kvm/guest.c
@@ -1059,7 +1059,7 @@ long kvm_vm_ioctl_mte_copy_tags(struct kvm *kvm,
maddr = page_address(page);
if (!write) {
- if (test_bit(PG_mte_tagged, &page->flags))
+ if (page_mte_tagged(page))
num_tags = mte_copy_tags_to_user(tags, maddr,
MTE_GRANULES_PER_PAGE);
else
@@ -1068,15 +1068,19 @@ long kvm_vm_ioctl_mte_copy_tags(struct kvm *kvm,
clear_user(tags, MTE_GRANULES_PER_PAGE);
kvm_release_pfn_clean(pfn);
} else {
+ /*
+ * Only locking to serialise with a concurrent
+ * set_pte_at() in the VMM but still overriding the
+ * tags, hence ignoring the return value.
+ */
+ try_page_mte_tagging(page);
num_tags = mte_copy_tags_from_user(maddr, tags,
MTE_GRANULES_PER_PAGE);
- /*
- * Set the flag after checking the write
- * completed fully
- */
- if (num_tags == MTE_GRANULES_PER_PAGE)
- set_bit(PG_mte_tagged, &page->flags);
+ /* uaccess failed, don't leave stale tags */
+ if (num_tags != MTE_GRANULES_PER_PAGE)
+ mte_clear_page_tags(page);
+ set_page_mte_tagged(page);
kvm_release_pfn_dirty(pfn);
}
diff --git a/arch/arm64/kvm/mmu.c b/arch/arm64/kvm/mmu.c
index a1b05e60aebe..39d9a334efb5 100644
--- a/arch/arm64/kvm/mmu.c
+++ b/arch/arm64/kvm/mmu.c
@@ -1164,32 +1164,26 @@ static int get_vma_page_shift(struct vm_area_struct *vma, unsigned long hva)
* - mmap_lock protects between a VM faulting a page in and the VMM performing
* an mprotect() to add VM_MTE
*/
-static int sanitise_mte_tags(struct kvm *kvm, kvm_pfn_t pfn,
- unsigned long size)
+static void sanitise_mte_tags(struct kvm *kvm, kvm_pfn_t pfn,
+ unsigned long size)
{
unsigned long i, nr_pages = size >> PAGE_SHIFT;
- struct page *page;
+ struct page *page = pfn_to_page(pfn);
if (!kvm_has_mte(kvm))
- return 0;
-
- /*
- * pfn_to_online_page() is used to reject ZONE_DEVICE pages
- * that may not support tags.
- */
- page = pfn_to_online_page(pfn);
-
- if (!page)
- return -EFAULT;
+ return;
for (i = 0; i < nr_pages; i++, page++) {
- if (!test_bit(PG_mte_tagged, &page->flags)) {
+ if (try_page_mte_tagging(page)) {
mte_clear_page_tags(page_address(page));
- set_bit(PG_mte_tagged, &page->flags);
+ set_page_mte_tagged(page);
}
}
+}
- return 0;
+static bool kvm_vma_mte_allowed(struct vm_area_struct *vma)
+{
+ return vma->vm_flags & VM_MTE_ALLOWED;
}
static int user_mem_abort(struct kvm_vcpu *vcpu, phys_addr_t fault_ipa,
@@ -1200,7 +1194,6 @@ static int user_mem_abort(struct kvm_vcpu *vcpu, phys_addr_t fault_ipa,
bool write_fault, writable, force_pte = false;
bool exec_fault;
bool device = false;
- bool shared;
unsigned long mmu_seq;
struct kvm *kvm = vcpu->kvm;
struct kvm_mmu_memory_cache *memcache = &vcpu->arch.mmu_page_cache;
@@ -1247,8 +1240,6 @@ static int user_mem_abort(struct kvm_vcpu *vcpu, phys_addr_t fault_ipa,
vma_shift = get_vma_page_shift(vma, hva);
}
- shared = (vma->vm_flags & VM_SHARED);
-
switch (vma_shift) {
#ifndef __PAGETABLE_PMD_FOLDED
case PUD_SHIFT:
@@ -1360,13 +1351,13 @@ static int user_mem_abort(struct kvm_vcpu *vcpu, phys_addr_t fault_ipa,
}
if (fault_status != FSC_PERM && !device && kvm_has_mte(kvm)) {
- /* Check the VMM hasn't introduced a new VM_SHARED VMA */
- if (!shared)
- ret = sanitise_mte_tags(kvm, pfn, vma_pagesize);
- else
+ /* Check the VMM hasn't introduced a new disallowed VMA */
+ if (kvm_vma_mte_allowed(vma)) {
+ sanitise_mte_tags(kvm, pfn, vma_pagesize);
+ } else {
ret = -EFAULT;
- if (ret)
goto out_unlock;
+ }
}
if (writable)
@@ -1582,15 +1573,18 @@ bool kvm_unmap_gfn_range(struct kvm *kvm, struct kvm_gfn_range *range)
bool kvm_set_spte_gfn(struct kvm *kvm, struct kvm_gfn_range *range)
{
kvm_pfn_t pfn = pte_pfn(range->pte);
- int ret;
if (!kvm->arch.mmu.pgt)
return false;
WARN_ON(range->end - range->start != 1);
- ret = sanitise_mte_tags(kvm, pfn, PAGE_SIZE);
- if (ret)
+ /*
+ * If the page isn't tagged, defer to user_mem_abort() for sanitising
+ * the MTE tags. The S2 pte should have been unmapped by
+ * mmu_notifier_invalidate_range_end().
+ */
+ if (kvm_has_mte(kvm) && !page_mte_tagged(pfn_to_page(pfn)))
return false;
/*
@@ -1822,12 +1816,7 @@ int kvm_arch_prepare_memory_region(struct kvm *kvm,
if (!vma)
break;
- /*
- * VM_SHARED mappings are not allowed with MTE to avoid races
- * when updating the PG_mte_tagged page flag, see
- * sanitise_mte_tags for more details.
- */
- if (kvm_has_mte(kvm) && vma->vm_flags & VM_SHARED) {
+ if (kvm_has_mte(kvm) && !kvm_vma_mte_allowed(vma)) {
ret = -EINVAL;
break;
}
diff --git a/arch/arm64/mm/copypage.c b/arch/arm64/mm/copypage.c
index 24913271e898..8dd5a8fe64b4 100644
--- a/arch/arm64/mm/copypage.c
+++ b/arch/arm64/mm/copypage.c
@@ -21,9 +21,12 @@ void copy_highpage(struct page *to, struct page *from)
copy_page(kto, kfrom);
- if (system_supports_mte() && test_bit(PG_mte_tagged, &from->flags)) {
- set_bit(PG_mte_tagged, &to->flags);
+ if (system_supports_mte() && page_mte_tagged(from)) {
+ page_kasan_tag_reset(to);
+ /* It's a new page, shouldn't have been tagged yet */
+ WARN_ON_ONCE(!try_page_mte_tagging(to));
mte_copy_page_tags(kto, kfrom);
+ set_page_mte_tagged(to);
}
}
EXPORT_SYMBOL(copy_highpage);
diff --git a/arch/arm64/mm/fault.c b/arch/arm64/mm/fault.c
index 3e9cf9826417..0b1c102b89c9 100644
--- a/arch/arm64/mm/fault.c
+++ b/arch/arm64/mm/fault.c
@@ -937,6 +937,8 @@ struct page *alloc_zeroed_user_highpage_movable(struct vm_area_struct *vma,
void tag_clear_highpage(struct page *page)
{
+ /* Newly allocated page, shouldn't have been tagged yet */
+ WARN_ON_ONCE(!try_page_mte_tagging(page));
mte_zero_clear_page_tags(page_address(page));
- set_bit(PG_mte_tagged, &page->flags);
+ set_page_mte_tagged(page);
}
diff --git a/arch/arm64/mm/mteswap.c b/arch/arm64/mm/mteswap.c
index bed803d8e158..cd508ba80ab1 100644
--- a/arch/arm64/mm/mteswap.c
+++ b/arch/arm64/mm/mteswap.c
@@ -24,7 +24,7 @@ int mte_save_tags(struct page *page)
{
void *tag_storage, *ret;
- if (!test_bit(PG_mte_tagged, &page->flags))
+ if (!page_mte_tagged(page))
return 0;
tag_storage = mte_allocate_tag_storage();
@@ -46,21 +46,17 @@ int mte_save_tags(struct page *page)
return 0;
}
-bool mte_restore_tags(swp_entry_t entry, struct page *page)
+void mte_restore_tags(swp_entry_t entry, struct page *page)
{
void *tags = xa_load(&mte_pages, entry.val);
if (!tags)
- return false;
+ return;
- /*
- * Test PG_mte_tagged again in case it was racing with another
- * set_pte_at().
- */
- if (!test_and_set_bit(PG_mte_tagged, &page->flags))
+ if (try_page_mte_tagging(page)) {
mte_restore_page_tags(page_address(page), tags);
-
- return true;
+ set_page_mte_tagged(page);
+ }
}
void mte_invalidate_tags(int type, pgoff_t offset)