kvm: x86: Add fast CR3 switch code path
authorJunaid Shahid <junaids@google.com>
Wed, 27 Jun 2018 21:59:06 +0000 (14:59 -0700)
committerPaolo Bonzini <pbonzini@redhat.com>
Mon, 6 Aug 2018 15:58:51 +0000 (17:58 +0200)
When using shadow paging, a CR3 switch in the guest results in a VM Exit.
In the common case, that VM exit doesn't require much processing by KVM.
However, it does acquire the MMU lock, which can start showing signs of
contention under some workloads even on a 2 VCPU VM when the guest is
using KPTI. Therefore, we add a fast path that avoids acquiring the MMU
lock in the most common cases e.g. when switching back and forth between
the kernel and user mode CR3s used by KPTI with no guest page table
changes in between.

For now, this fast path is implemented only for 64-bit guests and hosts
to avoid the handling of PDPTEs, but it can be extended later to 32-bit
guests and/or hosts as well.

Signed-off-by: Junaid Shahid <junaids@google.com>
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
arch/x86/include/asm/kvm_host.h
arch/x86/kvm/mmu.c
arch/x86/kvm/x86.c

index bd287b348751a498e304d9448d7ba6bbdb049822..290b7d05790a02346b8863c1e77180408841208a 100644 (file)
@@ -326,6 +326,14 @@ struct rsvd_bits_validate {
        u64 bad_mt_xwr;
 };
 
+struct kvm_mmu_root_info {
+       gpa_t cr3;
+       hpa_t hpa;
+};
+
+#define KVM_MMU_ROOT_INFO_INVALID \
+       ((struct kvm_mmu_root_info) { .cr3 = INVALID_PAGE, .hpa = INVALID_PAGE })
+
 /*
  * x86 supports 4 paging modes (5-level 64-bit, 4-level 64-bit, 3-level 32-bit,
  * and 2-level 32-bit).  The kvm_mmu structure abstracts the details of the
@@ -354,6 +362,7 @@ struct kvm_mmu {
        u8 shadow_root_level;
        u8 ept_ad;
        bool direct_map;
+       struct kvm_mmu_root_info prev_root;
 
        /*
         * Bitmap; bit set = permission fault
@@ -1288,7 +1297,7 @@ void __kvm_mmu_free_some_pages(struct kvm_vcpu *vcpu);
 int kvm_mmu_load(struct kvm_vcpu *vcpu);
 void kvm_mmu_unload(struct kvm_vcpu *vcpu);
 void kvm_mmu_sync_roots(struct kvm_vcpu *vcpu);
-void kvm_mmu_free_roots(struct kvm_vcpu *vcpu);
+void kvm_mmu_free_roots(struct kvm_vcpu *vcpu, bool free_prev_root);
 gpa_t translate_nested_gpa(struct kvm_vcpu *vcpu, gpa_t gpa, u32 access,
                           struct x86_exception *exception);
 gpa_t kvm_mmu_gva_to_gpa_read(struct kvm_vcpu *vcpu, gva_t gva,
@@ -1307,7 +1316,7 @@ int kvm_emulate_hypercall(struct kvm_vcpu *vcpu);
 int kvm_mmu_page_fault(struct kvm_vcpu *vcpu, gva_t gva, u64 error_code,
                       void *insn, int insn_len);
 void kvm_mmu_invlpg(struct kvm_vcpu *vcpu, gva_t gva);
-void kvm_mmu_new_cr3(struct kvm_vcpu *vcpu);
+void kvm_mmu_new_cr3(struct kvm_vcpu *vcpu, gpa_t new_cr3);
 
 void kvm_enable_tdp(void);
 void kvm_disable_tdp(void);
index 4b4452d0022e843ec68235a1c4b2d9b8e759b737..7f490298c635f11b661c58fcdf0c4ae01f46b3bd 100644 (file)
@@ -3405,17 +3405,22 @@ static void mmu_free_root_page(struct kvm *kvm, hpa_t *root_hpa,
        *root_hpa = INVALID_PAGE;
 }
 
-void kvm_mmu_free_roots(struct kvm_vcpu *vcpu)
+void kvm_mmu_free_roots(struct kvm_vcpu *vcpu, bool free_prev_root)
 {
        int i;
        LIST_HEAD(invalid_list);
        struct kvm_mmu *mmu = &vcpu->arch.mmu;
 
-       if (!VALID_PAGE(mmu->root_hpa))
+       if (!VALID_PAGE(mmu->root_hpa) &&
+           (!VALID_PAGE(mmu->prev_root.hpa) || !free_prev_root))
                return;
 
        spin_lock(&vcpu->kvm->mmu_lock);
 
+       if (free_prev_root)
+               mmu_free_root_page(vcpu->kvm, &mmu->prev_root.hpa,
+                                  &invalid_list);
+
        if (mmu->shadow_root_level >= PT64_ROOT_4LEVEL &&
            (mmu->root_level >= PT64_ROOT_4LEVEL || mmu->direct_map)) {
                mmu_free_root_page(vcpu->kvm, &mmu->root_hpa, &invalid_list);
@@ -4015,13 +4020,56 @@ static void nonpaging_init_context(struct kvm_vcpu *vcpu,
        context->root_level = 0;
        context->shadow_root_level = PT32E_ROOT_LEVEL;
        context->root_hpa = INVALID_PAGE;
+       context->prev_root = KVM_MMU_ROOT_INFO_INVALID;
        context->direct_map = true;
        context->nx = false;
 }
 
-void kvm_mmu_new_cr3(struct kvm_vcpu *vcpu)
+static bool fast_cr3_switch(struct kvm_vcpu *vcpu, gpa_t new_cr3)
+{
+       struct kvm_mmu *mmu = &vcpu->arch.mmu;
+
+       /*
+        * For now, limit the fast switch to 64-bit hosts+VMs in order to avoid
+        * having to deal with PDPTEs. We may add support for 32-bit hosts/VMs
+        * later if necessary.
+        */
+       if (mmu->shadow_root_level >= PT64_ROOT_4LEVEL &&
+           mmu->root_level >= PT64_ROOT_4LEVEL) {
+               gpa_t prev_cr3 = mmu->prev_root.cr3;
+
+               if (mmu_check_root(vcpu, new_cr3 >> PAGE_SHIFT))
+                       return false;
+
+               swap(mmu->root_hpa, mmu->prev_root.hpa);
+               mmu->prev_root.cr3 = kvm_read_cr3(vcpu);
+
+               if (new_cr3 == prev_cr3 && VALID_PAGE(mmu->root_hpa)) {
+                       /*
+                        * It is possible that the cached previous root page is
+                        * obsolete because of a change in the MMU
+                        * generation number. However, that is accompanied by
+                        * KVM_REQ_MMU_RELOAD, which will free the root that we
+                        * have set here and allocate a new one.
+                        */
+
+                       kvm_make_request(KVM_REQ_MMU_SYNC, vcpu);
+                       __clear_sp_write_flooding_count(
+                               page_header(mmu->root_hpa));
+
+                       mmu->set_cr3(vcpu, mmu->root_hpa);
+
+                       return true;
+               }
+       }
+
+       return false;
+}
+
+void kvm_mmu_new_cr3(struct kvm_vcpu *vcpu, gpa_t new_cr3)
 {
-       kvm_mmu_free_roots(vcpu);
+       if (!fast_cr3_switch(vcpu, new_cr3))
+               kvm_mmu_free_roots(vcpu, false);
 }
 
 static unsigned long get_cr3(struct kvm_vcpu *vcpu)
@@ -4499,6 +4547,7 @@ static void paging64_init_context_common(struct kvm_vcpu *vcpu,
        context->update_pte = paging64_update_pte;
        context->shadow_root_level = level;
        context->root_hpa = INVALID_PAGE;
+       context->prev_root = KVM_MMU_ROOT_INFO_INVALID;
        context->direct_map = false;
 }
 
@@ -4529,6 +4578,7 @@ static void paging32_init_context(struct kvm_vcpu *vcpu,
        context->update_pte = paging32_update_pte;
        context->shadow_root_level = PT32E_ROOT_LEVEL;
        context->root_hpa = INVALID_PAGE;
+       context->prev_root = KVM_MMU_ROOT_INFO_INVALID;
        context->direct_map = false;
 }
 
@@ -4552,6 +4602,7 @@ static void init_kvm_tdp_mmu(struct kvm_vcpu *vcpu)
        context->update_pte = nonpaging_update_pte;
        context->shadow_root_level = kvm_x86_ops->get_tdp_level(vcpu);
        context->root_hpa = INVALID_PAGE;
+       context->prev_root = KVM_MMU_ROOT_INFO_INVALID;
        context->direct_map = true;
        context->set_cr3 = kvm_x86_ops->set_tdp_cr3;
        context->get_cr3 = get_cr3;
@@ -4634,6 +4685,7 @@ void kvm_init_shadow_ept_mmu(struct kvm_vcpu *vcpu, bool execonly,
        context->update_pte = ept_update_pte;
        context->root_level = PT64_ROOT_4LEVEL;
        context->root_hpa = INVALID_PAGE;
+       context->prev_root = KVM_MMU_ROOT_INFO_INVALID;
        context->direct_map = false;
        context->base_role.ad_disabled = !accessed_dirty;
        context->base_role.guest_mode = 1;
@@ -4736,7 +4788,7 @@ EXPORT_SYMBOL_GPL(kvm_mmu_load);
 
 void kvm_mmu_unload(struct kvm_vcpu *vcpu)
 {
-       kvm_mmu_free_roots(vcpu);
+       kvm_mmu_free_roots(vcpu, true);
        WARN_ON(VALID_PAGE(vcpu->arch.mmu.root_hpa));
 }
 EXPORT_SYMBOL_GPL(kvm_mmu_unload);
@@ -5116,6 +5168,7 @@ int kvm_mmu_create(struct kvm_vcpu *vcpu)
 {
        vcpu->arch.walk_mmu = &vcpu->arch.mmu;
        vcpu->arch.mmu.root_hpa = INVALID_PAGE;
+       vcpu->arch.mmu.prev_root = KVM_MMU_ROOT_INFO_INVALID;
        vcpu->arch.mmu.translate_gpa = translate_gpa;
        vcpu->arch.nested_mmu.translate_gpa = translate_nested_gpa;
 
index 1b14c4a654c32baf770f13307e698407bb0c2989..5a1e4f79398fa41971951967335599462f32307f 100644 (file)
@@ -867,9 +867,10 @@ int kvm_set_cr3(struct kvm_vcpu *vcpu, unsigned long cr3)
                   !load_pdptrs(vcpu, vcpu->arch.walk_mmu, cr3))
                return 1;
 
+       kvm_mmu_new_cr3(vcpu, cr3);
        vcpu->arch.cr3 = cr3;
        __set_bit(VCPU_EXREG_CR3, (ulong *)&vcpu->arch.regs_avail);
-       kvm_mmu_new_cr3(vcpu);
+
        return 0;
 }
 EXPORT_SYMBOL_GPL(kvm_set_cr3);