KVM: L1 TSC handling
authorNadav Har'El <nyh@il.ibm.com>
Tue, 2 Aug 2011 12:54:20 +0000 (15:54 +0300)
committerAvi Kivity <avi@redhat.com>
Sun, 25 Sep 2011 16:18:02 +0000 (19:18 +0300)
KVM assumed in several places that reading the TSC MSR returns the value for
L1. This is incorrect, because when L2 is running, the correct TSC read exit
emulation is to return L2's value.

We therefore add a new x86_ops function, read_l1_tsc, to use in places that
specifically need to read the L1 TSC, NOT the TSC of the current level of
guest.

Note that one change, of one line in kvm_arch_vcpu_load, is made redundant
by a different patch sent by Zachary Amsden (and not yet applied):
kvm_arch_vcpu_load() should not read the guest TSC, and if it didn't, of
course we didn't have to change the call of kvm_get_msr() to read_l1_tsc().

[avi: moved callback to kvm_x86_ops tsc block]

Signed-off-by: Nadav Har'El <nyh@il.ibm.com>
Acked-by: Zachary Amsdem <zamsden@gmail.com>
Signed-off-by: Avi Kivity <avi@redhat.com>
arch/x86/include/asm/kvm_host.h
arch/x86/kvm/svm.c
arch/x86/kvm/vmx.c
arch/x86/kvm/x86.c

index b31a3417a405839e0c511cd6ee274acc318bfad0..6ab4241c27cbe37cc124f497028b0ff110bbe507 100644 (file)
@@ -630,6 +630,7 @@ struct kvm_x86_ops {
        void (*write_tsc_offset)(struct kvm_vcpu *vcpu, u64 offset);
 
        u64 (*compute_tsc_offset)(struct kvm_vcpu *vcpu, u64 target_tsc);
+       u64 (*read_l1_tsc)(struct kvm_vcpu *vcpu);
 
        void (*get_exit_info)(struct kvm_vcpu *vcpu, u64 *info1, u64 *info2);
 
index f043168a5ab1a7504bd81908d3277dc996c56afe..590d1d2d620b412891064e71ab07ab1d7ae508e0 100644 (file)
@@ -2910,6 +2910,13 @@ static int cr8_write_interception(struct vcpu_svm *svm)
        return 0;
 }
 
+u64 svm_read_l1_tsc(struct kvm_vcpu *vcpu)
+{
+       struct vmcb *vmcb = get_host_vmcb(to_svm(vcpu));
+       return vmcb->control.tsc_offset +
+               svm_scale_tsc(vcpu, native_read_tsc());
+}
+
 static int svm_get_msr(struct kvm_vcpu *vcpu, unsigned ecx, u64 *data)
 {
        struct vcpu_svm *svm = to_svm(vcpu);
@@ -4201,6 +4208,7 @@ static struct kvm_x86_ops svm_x86_ops = {
        .write_tsc_offset = svm_write_tsc_offset,
        .adjust_tsc_offset = svm_adjust_tsc_offset,
        .compute_tsc_offset = svm_compute_tsc_offset,
+       .read_l1_tsc = svm_read_l1_tsc,
 
        .set_tdp_cr3 = set_tdp_cr3,
 
index 03df703c8f207b58766f8927f2ff8e6a96eebf23..97b64543d4ed639aeec08c7e325c08e6623ba422 100644 (file)
@@ -1747,6 +1747,21 @@ static u64 guest_read_tsc(void)
        return host_tsc + tsc_offset;
 }
 
+/*
+ * Like guest_read_tsc, but always returns L1's notion of the timestamp
+ * counter, even if a nested guest (L2) is currently running.
+ */
+u64 vmx_read_l1_tsc(struct kvm_vcpu *vcpu)
+{
+       u64 host_tsc, tsc_offset;
+
+       rdtscll(host_tsc);
+       tsc_offset = is_guest_mode(vcpu) ?
+               to_vmx(vcpu)->nested.vmcs01_tsc_offset :
+               vmcs_read64(TSC_OFFSET);
+       return host_tsc + tsc_offset;
+}
+
 /*
  * Empty call-back. Needs to be implemented when VMX enables the SET_TSC_KHZ
  * ioctl. In this case the call-back should update internal vmx state to make
@@ -7010,6 +7025,7 @@ static struct kvm_x86_ops vmx_x86_ops = {
        .write_tsc_offset = vmx_write_tsc_offset,
        .adjust_tsc_offset = vmx_adjust_tsc_offset,
        .compute_tsc_offset = vmx_compute_tsc_offset,
+       .read_l1_tsc = vmx_read_l1_tsc,
 
        .set_tdp_cr3 = vmx_set_cr3,
 
index ea8f9f03e92392a0a7cf2ae67064e025d5889990..6b37f18a1663c355ae718c68bb05b476d9527521 100644 (file)
@@ -1098,7 +1098,7 @@ static int kvm_guest_time_update(struct kvm_vcpu *v)
 
        /* Keep irq disabled to prevent changes to the clock */
        local_irq_save(flags);
-       kvm_get_msr(v, MSR_IA32_TSC, &tsc_timestamp);
+       tsc_timestamp = kvm_x86_ops->read_l1_tsc(v);
        kernel_ns = get_kernel_ns();
        this_tsc_khz = vcpu_tsc_khz(v);
        if (unlikely(this_tsc_khz == 0)) {
@@ -2218,7 +2218,7 @@ void kvm_arch_vcpu_load(struct kvm_vcpu *vcpu, int cpu)
                s64 tsc_delta;
                u64 tsc;
 
-               kvm_get_msr(vcpu, MSR_IA32_TSC, &tsc);
+               tsc = kvm_x86_ops->read_l1_tsc(vcpu);
                tsc_delta = !vcpu->arch.last_guest_tsc ? 0 :
                             tsc - vcpu->arch.last_guest_tsc;
 
@@ -2242,7 +2242,7 @@ void kvm_arch_vcpu_put(struct kvm_vcpu *vcpu)
 {
        kvm_x86_ops->vcpu_put(vcpu);
        kvm_put_guest_fpu(vcpu);
-       kvm_get_msr(vcpu, MSR_IA32_TSC, &vcpu->arch.last_guest_tsc);
+       vcpu->arch.last_guest_tsc = kvm_x86_ops->read_l1_tsc(vcpu);
 }
 
 static int is_efer_nx(void)
@@ -5729,7 +5729,7 @@ static int vcpu_enter_guest(struct kvm_vcpu *vcpu)
        if (hw_breakpoint_active())
                hw_breakpoint_restore();
 
-       kvm_get_msr(vcpu, MSR_IA32_TSC, &vcpu->arch.last_guest_tsc);
+       vcpu->arch.last_guest_tsc = kvm_x86_ops->read_l1_tsc(vcpu);
 
        vcpu->mode = OUTSIDE_GUEST_MODE;
        smp_wmb();