summaryrefslogtreecommitdiffstats
path: root/arch/x86/kvm/mmu
diff options
context:
space:
mode:
Diffstat (limited to 'arch/x86/kvm/mmu')
-rw-r--r--arch/x86/kvm/mmu/mmu.c21
-rw-r--r--arch/x86/kvm/mmu/mmu_internal.h15
-rw-r--r--arch/x86/kvm/mmu/tdp_mmu.c152
-rw-r--r--arch/x86/kvm/mmu/tdp_mmu.h5
4 files changed, 81 insertions, 112 deletions
diff --git a/arch/x86/kvm/mmu/mmu.c b/arch/x86/kvm/mmu/mmu.c
index e1d011c67cc6..f7901cb4d2fa 100644
--- a/arch/x86/kvm/mmu/mmu.c
+++ b/arch/x86/kvm/mmu/mmu.c
@@ -6167,20 +6167,15 @@ static bool kvm_has_zapped_obsolete_pages(struct kvm *kvm)
return unlikely(!list_empty_careful(&kvm->arch.zapped_obsolete_pages));
}
-int kvm_mmu_init_vm(struct kvm *kvm)
+void kvm_mmu_init_vm(struct kvm *kvm)
{
- int r;
-
INIT_LIST_HEAD(&kvm->arch.active_mmu_pages);
INIT_LIST_HEAD(&kvm->arch.zapped_obsolete_pages);
INIT_LIST_HEAD(&kvm->arch.possible_nx_huge_pages);
spin_lock_init(&kvm->arch.mmu_unsync_pages_lock);
- if (tdp_mmu_enabled) {
- r = kvm_mmu_init_tdp_mmu(kvm);
- if (r < 0)
- return r;
- }
+ if (tdp_mmu_enabled)
+ kvm_mmu_init_tdp_mmu(kvm);
kvm->arch.split_page_header_cache.kmem_cache = mmu_page_header_cache;
kvm->arch.split_page_header_cache.gfp_zero = __GFP_ZERO;
@@ -6189,8 +6184,6 @@ int kvm_mmu_init_vm(struct kvm *kvm)
kvm->arch.split_desc_cache.kmem_cache = pte_list_desc_cache;
kvm->arch.split_desc_cache.gfp_zero = __GFP_ZERO;
-
- return 0;
}
static void mmu_free_vm_memory_caches(struct kvm *kvm)
@@ -6246,7 +6239,6 @@ static bool kvm_rmap_zap_gfn_range(struct kvm *kvm, gfn_t gfn_start, gfn_t gfn_e
void kvm_zap_gfn_range(struct kvm *kvm, gfn_t gfn_start, gfn_t gfn_end)
{
bool flush;
- int i;
if (WARN_ON_ONCE(gfn_end <= gfn_start))
return;
@@ -6257,11 +6249,8 @@ void kvm_zap_gfn_range(struct kvm *kvm, gfn_t gfn_start, gfn_t gfn_end)
flush = kvm_rmap_zap_gfn_range(kvm, gfn_start, gfn_end);
- if (tdp_mmu_enabled) {
- for (i = 0; i < KVM_ADDRESS_SPACE_NUM; i++)
- flush = kvm_tdp_mmu_zap_leafs(kvm, i, gfn_start,
- gfn_end, true, flush);
- }
+ if (tdp_mmu_enabled)
+ flush = kvm_tdp_mmu_zap_leafs(kvm, gfn_start, gfn_end, flush);
if (flush)
kvm_flush_remote_tlbs_range(kvm, gfn_start, gfn_end - gfn_start);
diff --git a/arch/x86/kvm/mmu/mmu_internal.h b/arch/x86/kvm/mmu/mmu_internal.h
index b102014e2c60..decc1f153669 100644
--- a/arch/x86/kvm/mmu/mmu_internal.h
+++ b/arch/x86/kvm/mmu/mmu_internal.h
@@ -58,7 +58,12 @@ struct kvm_mmu_page {
bool tdp_mmu_page;
bool unsync;
- u8 mmu_valid_gen;
+ union {
+ u8 mmu_valid_gen;
+
+ /* Only accessed under slots_lock. */
+ bool tdp_mmu_scheduled_root_to_zap;
+ };
/*
* The shadow page can't be replaced by an equivalent huge page
@@ -100,13 +105,7 @@ struct kvm_mmu_page {
struct kvm_rmap_head parent_ptes; /* rmap pointers to parent sptes */
tdp_ptep_t ptep;
};
- union {
- DECLARE_BITMAP(unsync_child_bitmap, 512);
- struct {
- struct work_struct tdp_mmu_async_work;
- void *tdp_mmu_async_data;
- };
- };
+ DECLARE_BITMAP(unsync_child_bitmap, 512);
/*
* Tracks shadow pages that, if zapped, would allow KVM to create an NX
diff --git a/arch/x86/kvm/mmu/tdp_mmu.c b/arch/x86/kvm/mmu/tdp_mmu.c
index 6c63f2d1675f..6cd4dd631a2f 100644
--- a/arch/x86/kvm/mmu/tdp_mmu.c
+++ b/arch/x86/kvm/mmu/tdp_mmu.c
@@ -12,18 +12,10 @@
#include <trace/events/kvm.h>
/* Initializes the TDP MMU for the VM, if enabled. */
-int kvm_mmu_init_tdp_mmu(struct kvm *kvm)
+void kvm_mmu_init_tdp_mmu(struct kvm *kvm)
{
- struct workqueue_struct *wq;
-
- wq = alloc_workqueue("kvm", WQ_UNBOUND|WQ_MEM_RECLAIM|WQ_CPU_INTENSIVE, 0);
- if (!wq)
- return -ENOMEM;
-
INIT_LIST_HEAD(&kvm->arch.tdp_mmu_roots);
spin_lock_init(&kvm->arch.tdp_mmu_pages_lock);
- kvm->arch.tdp_mmu_zap_wq = wq;
- return 1;
}
/* Arbitrarily returns true so that this may be used in if statements. */
@@ -46,20 +38,15 @@ void kvm_mmu_uninit_tdp_mmu(struct kvm *kvm)
* ultimately frees all roots.
*/
kvm_tdp_mmu_invalidate_all_roots(kvm);
-
- /*
- * Destroying a workqueue also first flushes the workqueue, i.e. no
- * need to invoke kvm_tdp_mmu_zap_invalidated_roots().
- */
- destroy_workqueue(kvm->arch.tdp_mmu_zap_wq);
+ kvm_tdp_mmu_zap_invalidated_roots(kvm);
WARN_ON(atomic64_read(&kvm->arch.tdp_mmu_pages));
WARN_ON(!list_empty(&kvm->arch.tdp_mmu_roots));
/*
* Ensure that all the outstanding RCU callbacks to free shadow pages
- * can run before the VM is torn down. Work items on tdp_mmu_zap_wq
- * can call kvm_tdp_mmu_put_root and create new callbacks.
+ * can run before the VM is torn down. Putting the last reference to
+ * zapped roots will create new callbacks.
*/
rcu_barrier();
}
@@ -86,46 +73,6 @@ static void tdp_mmu_free_sp_rcu_callback(struct rcu_head *head)
tdp_mmu_free_sp(sp);
}
-static void tdp_mmu_zap_root(struct kvm *kvm, struct kvm_mmu_page *root,
- bool shared);
-
-static void tdp_mmu_zap_root_work(struct work_struct *work)
-{
- struct kvm_mmu_page *root = container_of(work, struct kvm_mmu_page,
- tdp_mmu_async_work);
- struct kvm *kvm = root->tdp_mmu_async_data;
-
- read_lock(&kvm->mmu_lock);
-
- /*
- * A TLB flush is not necessary as KVM performs a local TLB flush when
- * allocating a new root (see kvm_mmu_load()), and when migrating vCPU
- * to a different pCPU. Note, the local TLB flush on reuse also
- * invalidates any paging-structure-cache entries, i.e. TLB entries for
- * intermediate paging structures, that may be zapped, as such entries
- * are associated with the ASID on both VMX and SVM.
- */
- tdp_mmu_zap_root(kvm, root, true);
-
- /*
- * Drop the refcount using kvm_tdp_mmu_put_root() to test its logic for
- * avoiding an infinite loop. By design, the root is reachable while
- * it's being asynchronously zapped, thus a different task can put its
- * last reference, i.e. flowing through kvm_tdp_mmu_put_root() for an
- * asynchronously zapped root is unavoidable.
- */
- kvm_tdp_mmu_put_root(kvm, root, true);
-
- read_unlock(&kvm->mmu_lock);
-}
-
-static void tdp_mmu_schedule_zap_root(struct kvm *kvm, struct kvm_mmu_page *root)
-{
- root->tdp_mmu_async_data = kvm;
- INIT_WORK(&root->tdp_mmu_async_work, tdp_mmu_zap_root_work);
- queue_work(kvm->arch.tdp_mmu_zap_wq, &root->tdp_mmu_async_work);
-}
-
void kvm_tdp_mmu_put_root(struct kvm *kvm, struct kvm_mmu_page *root,
bool shared)
{
@@ -211,8 +158,12 @@ static struct kvm_mmu_page *tdp_mmu_next_root(struct kvm *kvm,
#define for_each_valid_tdp_mmu_root_yield_safe(_kvm, _root, _as_id, _shared) \
__for_each_tdp_mmu_root_yield_safe(_kvm, _root, _as_id, _shared, true)
-#define for_each_tdp_mmu_root_yield_safe(_kvm, _root, _as_id) \
- __for_each_tdp_mmu_root_yield_safe(_kvm, _root, _as_id, false, false)
+#define for_each_tdp_mmu_root_yield_safe(_kvm, _root, _shared) \
+ for (_root = tdp_mmu_next_root(_kvm, NULL, _shared, false); \
+ _root; \
+ _root = tdp_mmu_next_root(_kvm, _root, _shared, false)) \
+ if (!kvm_lockdep_assert_mmu_lock_held(_kvm, _shared)) { \
+ } else
/*
* Iterate over all TDP MMU roots. Requires that mmu_lock be held for write,
@@ -292,7 +243,7 @@ hpa_t kvm_tdp_mmu_get_vcpu_root_hpa(struct kvm_vcpu *vcpu)
* by a memslot update or by the destruction of the VM. Initialize the
* refcount to two; one reference for the vCPU, and one reference for
* the TDP MMU itself, which is held until the root is invalidated and
- * is ultimately put by tdp_mmu_zap_root_work().
+ * is ultimately put by kvm_tdp_mmu_zap_invalidated_roots().
*/
refcount_set(&root->tdp_mmu_root_count, 2);
@@ -877,13 +828,12 @@ static bool tdp_mmu_zap_leafs(struct kvm *kvm, struct kvm_mmu_page *root,
* true if a TLB flush is needed before releasing the MMU lock, i.e. if one or
* more SPTEs were zapped since the MMU lock was last acquired.
*/
-bool kvm_tdp_mmu_zap_leafs(struct kvm *kvm, int as_id, gfn_t start, gfn_t end,
- bool can_yield, bool flush)
+bool kvm_tdp_mmu_zap_leafs(struct kvm *kvm, gfn_t start, gfn_t end, bool flush)
{
struct kvm_mmu_page *root;
- for_each_tdp_mmu_root_yield_safe(kvm, root, as_id)
- flush = tdp_mmu_zap_leafs(kvm, root, start, end, can_yield, flush);
+ for_each_tdp_mmu_root_yield_safe(kvm, root, false)
+ flush = tdp_mmu_zap_leafs(kvm, root, start, end, true, flush);
return flush;
}
@@ -891,7 +841,6 @@ bool kvm_tdp_mmu_zap_leafs(struct kvm *kvm, int as_id, gfn_t start, gfn_t end,
void kvm_tdp_mmu_zap_all(struct kvm *kvm)
{
struct kvm_mmu_page *root;
- int i;
/*
* Zap all roots, including invalid roots, as all SPTEs must be dropped
@@ -905,10 +854,8 @@ void kvm_tdp_mmu_zap_all(struct kvm *kvm)
* is being destroyed or the userspace VMM has exited. In both cases,
* KVM_RUN is unreachable, i.e. no vCPUs will ever service the request.
*/
- for (i = 0; i < KVM_ADDRESS_SPACE_NUM; i++) {
- for_each_tdp_mmu_root_yield_safe(kvm, root, i)
- tdp_mmu_zap_root(kvm, root, false);
- }
+ for_each_tdp_mmu_root_yield_safe(kvm, root, false)
+ tdp_mmu_zap_root(kvm, root, false);
}
/*
@@ -917,18 +864,47 @@ void kvm_tdp_mmu_zap_all(struct kvm *kvm)
*/
void kvm_tdp_mmu_zap_invalidated_roots(struct kvm *kvm)
{
- flush_workqueue(kvm->arch.tdp_mmu_zap_wq);
+ struct kvm_mmu_page *root;
+
+ read_lock(&kvm->mmu_lock);
+
+ for_each_tdp_mmu_root_yield_safe(kvm, root, true) {
+ if (!root->tdp_mmu_scheduled_root_to_zap)
+ continue;
+
+ root->tdp_mmu_scheduled_root_to_zap = false;
+ KVM_BUG_ON(!root->role.invalid, kvm);
+
+ /*
+ * A TLB flush is not necessary as KVM performs a local TLB
+ * flush when allocating a new root (see kvm_mmu_load()), and
+ * when migrating a vCPU to a different pCPU. Note, the local
+ * TLB flush on reuse also invalidates paging-structure-cache
+ * entries, i.e. TLB entries for intermediate paging structures,
+ * that may be zapped, as such entries are associated with the
+ * ASID on both VMX and SVM.
+ */
+ tdp_mmu_zap_root(kvm, root, true);
+
+ /*
+ * The referenced needs to be put *after* zapping the root, as
+ * the root must be reachable by mmu_notifiers while it's being
+ * zapped
+ */
+ kvm_tdp_mmu_put_root(kvm, root, true);
+ }
+
+ read_unlock(&kvm->mmu_lock);
}
/*
* Mark each TDP MMU root as invalid to prevent vCPUs from reusing a root that
* is about to be zapped, e.g. in response to a memslots update. The actual
- * zapping is performed asynchronously. Using a separate workqueue makes it
- * easy to ensure that the destruction is performed before the "fast zap"
- * completes, without keeping a separate list of invalidated roots; the list is
- * effectively the list of work items in the workqueue.
+ * zapping is done separately so that it happens with mmu_lock with read,
+ * whereas invalidating roots must be done with mmu_lock held for write (unless
+ * the VM is being destroyed).
*
- * Note, the asynchronous worker is gifted the TDP MMU's reference.
+ * Note, kvm_tdp_mmu_zap_invalidated_roots() is gifted the TDP MMU's reference.
* See kvm_tdp_mmu_get_vcpu_root_hpa().
*/
void kvm_tdp_mmu_invalidate_all_roots(struct kvm *kvm)
@@ -953,19 +929,20 @@ void kvm_tdp_mmu_invalidate_all_roots(struct kvm *kvm)
/*
* As above, mmu_lock isn't held when destroying the VM! There can't
* be other references to @kvm, i.e. nothing else can invalidate roots
- * or be consuming roots, but walking the list of roots does need to be
- * guarded against roots being deleted by the asynchronous zap worker.
+ * or get/put references to roots.
*/
- rcu_read_lock();
-
- list_for_each_entry_rcu(root, &kvm->arch.tdp_mmu_roots, link) {
+ list_for_each_entry(root, &kvm->arch.tdp_mmu_roots, link) {
+ /*
+ * Note, invalid roots can outlive a memslot update! Invalid
+ * roots must be *zapped* before the memslot update completes,
+ * but a different task can acquire a reference and keep the
+ * root alive after its been zapped.
+ */
if (!root->role.invalid) {
+ root->tdp_mmu_scheduled_root_to_zap = true;
root->role.invalid = true;
- tdp_mmu_schedule_zap_root(kvm, root);
}
}
-
- rcu_read_unlock();
}
/*
@@ -1146,8 +1123,13 @@ retry:
bool kvm_tdp_mmu_unmap_gfn_range(struct kvm *kvm, struct kvm_gfn_range *range,
bool flush)
{
- return kvm_tdp_mmu_zap_leafs(kvm, range->slot->as_id, range->start,
- range->end, range->may_block, flush);
+ struct kvm_mmu_page *root;
+
+ __for_each_tdp_mmu_root_yield_safe(kvm, root, range->slot->as_id, false, false)
+ flush = tdp_mmu_zap_leafs(kvm, root, range->start, range->end,
+ range->may_block, flush);
+
+ return flush;
}
typedef bool (*tdp_handler_t)(struct kvm *kvm, struct tdp_iter *iter,
diff --git a/arch/x86/kvm/mmu/tdp_mmu.h b/arch/x86/kvm/mmu/tdp_mmu.h
index 0a63b1afabd3..733a3aef3a96 100644
--- a/arch/x86/kvm/mmu/tdp_mmu.h
+++ b/arch/x86/kvm/mmu/tdp_mmu.h
@@ -7,7 +7,7 @@
#include "spte.h"
-int kvm_mmu_init_tdp_mmu(struct kvm *kvm);
+void kvm_mmu_init_tdp_mmu(struct kvm *kvm);
void kvm_mmu_uninit_tdp_mmu(struct kvm *kvm);
hpa_t kvm_tdp_mmu_get_vcpu_root_hpa(struct kvm_vcpu *vcpu);
@@ -20,8 +20,7 @@ __must_check static inline bool kvm_tdp_mmu_get_root(struct kvm_mmu_page *root)
void kvm_tdp_mmu_put_root(struct kvm *kvm, struct kvm_mmu_page *root,
bool shared);
-bool kvm_tdp_mmu_zap_leafs(struct kvm *kvm, int as_id, gfn_t start,
- gfn_t end, bool can_yield, bool flush);
+bool kvm_tdp_mmu_zap_leafs(struct kvm *kvm, gfn_t start, gfn_t end, bool flush);
bool kvm_tdp_mmu_zap_sp(struct kvm *kvm, struct kvm_mmu_page *sp);
void kvm_tdp_mmu_zap_all(struct kvm *kvm);
void kvm_tdp_mmu_invalidate_all_roots(struct kvm *kvm);