From f99e3daf94ff35dd4a878d32ff66e1fd35223ad6 Mon Sep 17 00:00:00 2001 From: Chao Peng Date: Wed, 24 Oct 2018 16:05:10 +0800 Subject: [PATCH] KVM: x86: Add Intel PT virtualization work mode Intel Processor Trace virtualization can be work in one of 2 possible modes: a. System-Wide mode (default): When the host configures Intel PT to collect trace packets of the entire system, it can leave the relevant VMX controls clear to allow VMX-specific packets to provide information across VMX transitions. KVM guest will not aware this feature in this mode and both host and KVM guest trace will output to host buffer. b. Host-Guest mode: Host can configure trace-packet generation while in VMX non-root operation for guests and root operation for native executing normally. Intel PT will be exposed to KVM guest in this mode, and the trace output to respective buffer of host and guest. In this mode, tht status of PT will be saved and disabled before VM-entry and restored after VM-exit if trace a virtual machine. Signed-off-by: Chao Peng Signed-off-by: Luwei Kang Signed-off-by: Paolo Bonzini --- arch/x86/include/asm/msr-index.h | 1 + arch/x86/include/asm/vmx.h | 8 ++++++++ arch/x86/kvm/vmx/capabilities.h | 15 +++++++++++++++ arch/x86/kvm/vmx/vmx.c | 21 +++++++++++++++++++-- arch/x86/kvm/vmx/vmx.h | 9 ++++++++- 5 files changed, 51 insertions(+), 3 deletions(-) diff --git a/arch/x86/include/asm/msr-index.h b/arch/x86/include/asm/msr-index.h index 6a900150184b..8f95297371af 100644 --- a/arch/x86/include/asm/msr-index.h +++ b/arch/x86/include/asm/msr-index.h @@ -807,6 +807,7 @@ #define VMX_BASIC_INOUT 0x0040000000000000LLU /* MSR_IA32_VMX_MISC bits */ +#define MSR_IA32_VMX_MISC_INTEL_PT (1ULL << 14) #define MSR_IA32_VMX_MISC_VMWRITE_SHADOW_RO_FIELDS (1ULL << 29) #define MSR_IA32_VMX_MISC_PREEMPTION_TIMER_SCALE 0x1F /* AMD-V MSRs */ diff --git a/arch/x86/include/asm/vmx.h b/arch/x86/include/asm/vmx.h index 898c443eeed1..4e4133e86484 100644 --- a/arch/x86/include/asm/vmx.h +++ b/arch/x86/include/asm/vmx.h @@ -77,7 +77,9 @@ #define SECONDARY_EXEC_ENCLS_EXITING 0x00008000 #define SECONDARY_EXEC_RDSEED_EXITING 0x00010000 #define SECONDARY_EXEC_ENABLE_PML 0x00020000 +#define SECONDARY_EXEC_PT_CONCEAL_VMX 0x00080000 #define SECONDARY_EXEC_XSAVES 0x00100000 +#define SECONDARY_EXEC_PT_USE_GPA 0x01000000 #define SECONDARY_EXEC_MODE_BASED_EPT_EXEC 0x00400000 #define SECONDARY_EXEC_TSC_SCALING 0x02000000 @@ -99,6 +101,8 @@ #define VM_EXIT_LOAD_IA32_EFER 0x00200000 #define VM_EXIT_SAVE_VMX_PREEMPTION_TIMER 0x00400000 #define VM_EXIT_CLEAR_BNDCFGS 0x00800000 +#define VM_EXIT_PT_CONCEAL_PIP 0x01000000 +#define VM_EXIT_CLEAR_IA32_RTIT_CTL 0x02000000 #define VM_EXIT_ALWAYSON_WITHOUT_TRUE_MSR 0x00036dff @@ -110,6 +114,8 @@ #define VM_ENTRY_LOAD_IA32_PAT 0x00004000 #define VM_ENTRY_LOAD_IA32_EFER 0x00008000 #define VM_ENTRY_LOAD_BNDCFGS 0x00010000 +#define VM_ENTRY_PT_CONCEAL_PIP 0x00020000 +#define VM_ENTRY_LOAD_IA32_RTIT_CTL 0x00040000 #define VM_ENTRY_ALWAYSON_WITHOUT_TRUE_MSR 0x000011ff @@ -241,6 +247,8 @@ enum vmcs_field { GUEST_PDPTR3_HIGH = 0x00002811, GUEST_BNDCFGS = 0x00002812, GUEST_BNDCFGS_HIGH = 0x00002813, + GUEST_IA32_RTIT_CTL = 0x00002814, + GUEST_IA32_RTIT_CTL_HIGH = 0x00002815, HOST_IA32_PAT = 0x00002c00, HOST_IA32_PAT_HIGH = 0x00002c01, HOST_IA32_EFER = 0x00002c02, diff --git a/arch/x86/kvm/vmx/capabilities.h b/arch/x86/kvm/vmx/capabilities.h index 366b9dd2e4ae..854e144131c6 100644 --- a/arch/x86/kvm/vmx/capabilities.h +++ b/arch/x86/kvm/vmx/capabilities.h @@ -10,6 +10,10 @@ extern bool __read_mostly enable_ept; extern bool __read_mostly enable_unrestricted_guest; extern bool __read_mostly enable_ept_ad_bits; extern bool __read_mostly enable_pml; +extern int __read_mostly pt_mode; + +#define PT_MODE_SYSTEM 0 +#define PT_MODE_HOST_GUEST 1 struct nested_vmx_msrs { /* @@ -325,4 +329,15 @@ static inline bool cpu_has_vmx_invvpid_global(void) return vmx_capability.vpid & VMX_VPID_EXTENT_GLOBAL_CONTEXT_BIT; } +static inline bool cpu_has_vmx_intel_pt(void) +{ + u64 vmx_msr; + + rdmsrl(MSR_IA32_VMX_MISC, vmx_msr); + return (vmx_msr & MSR_IA32_VMX_MISC_INTEL_PT) && + (vmcs_config.cpu_based_2nd_exec_ctrl & SECONDARY_EXEC_PT_USE_GPA) && + (vmcs_config.vmexit_ctrl & VM_EXIT_CLEAR_IA32_RTIT_CTL) && + (vmcs_config.vmentry_ctrl & VM_ENTRY_LOAD_IA32_RTIT_CTL); +} + #endif /* __KVM_X86_VMX_CAPS_H */ diff --git a/arch/x86/kvm/vmx/vmx.c b/arch/x86/kvm/vmx/vmx.c index 9b58921f5cd3..338977e6f552 100644 --- a/arch/x86/kvm/vmx/vmx.c +++ b/arch/x86/kvm/vmx/vmx.c @@ -169,6 +169,10 @@ module_param(ple_window_shrink, uint, 0444); static unsigned int ple_window_max = KVM_VMX_DEFAULT_PLE_WINDOW_MAX; module_param(ple_window_max, uint, 0444); +/* Default is SYSTEM mode, 1 for host-guest mode */ +int __read_mostly pt_mode = PT_MODE_SYSTEM; +module_param(pt_mode, int, S_IRUGO); + static DEFINE_STATIC_KEY_FALSE(vmx_l1d_should_flush); static DEFINE_STATIC_KEY_FALSE(vmx_l1d_flush_cond); static DEFINE_MUTEX(vmx_l1d_flush_mutex); @@ -1975,6 +1979,8 @@ static __init int setup_vmcs_config(struct vmcs_config *vmcs_conf, SECONDARY_EXEC_RDRAND_EXITING | SECONDARY_EXEC_ENABLE_PML | SECONDARY_EXEC_TSC_SCALING | + SECONDARY_EXEC_PT_USE_GPA | + SECONDARY_EXEC_PT_CONCEAL_VMX | SECONDARY_EXEC_ENABLE_VMFUNC | SECONDARY_EXEC_ENCLS_EXITING; if (adjust_vmx_controls(min2, opt2, @@ -2023,7 +2029,9 @@ static __init int setup_vmcs_config(struct vmcs_config *vmcs_conf, VM_EXIT_SAVE_IA32_PAT | VM_EXIT_LOAD_IA32_PAT | VM_EXIT_LOAD_IA32_EFER | - VM_EXIT_CLEAR_BNDCFGS; + VM_EXIT_CLEAR_BNDCFGS | + VM_EXIT_PT_CONCEAL_PIP | + VM_EXIT_CLEAR_IA32_RTIT_CTL; if (adjust_vmx_controls(min, opt, MSR_IA32_VMX_EXIT_CTLS, &_vmexit_control) < 0) return -EIO; @@ -2045,7 +2053,9 @@ static __init int setup_vmcs_config(struct vmcs_config *vmcs_conf, opt = VM_ENTRY_LOAD_IA32_PERF_GLOBAL_CTRL | VM_ENTRY_LOAD_IA32_PAT | VM_ENTRY_LOAD_IA32_EFER | - VM_ENTRY_LOAD_BNDCFGS; + VM_ENTRY_LOAD_BNDCFGS | + VM_ENTRY_PT_CONCEAL_PIP | + VM_ENTRY_LOAD_IA32_RTIT_CTL; if (adjust_vmx_controls(min, opt, MSR_IA32_VMX_ENTRY_CTLS, &_vmentry_control) < 0) return -EIO; @@ -3567,6 +3577,8 @@ static void vmx_compute_secondary_exec_control(struct vcpu_vmx *vmx) u32 exec_control = vmcs_config.cpu_based_2nd_exec_ctrl; + if (pt_mode == PT_MODE_SYSTEM) + exec_control &= ~(SECONDARY_EXEC_PT_USE_GPA | SECONDARY_EXEC_PT_CONCEAL_VMX); if (!cpu_need_virtualize_apic_accesses(vcpu)) exec_control &= ~SECONDARY_EXEC_VIRTUALIZE_APIC_ACCESSES; if (vmx->vpid == 0) @@ -7248,6 +7260,11 @@ static __init int hardware_setup(void) kvm_mce_cap_supported |= MCG_LMCE_P; + if (pt_mode != PT_MODE_SYSTEM && pt_mode != PT_MODE_HOST_GUEST) + return -EINVAL; + if (!enable_ept || !cpu_has_vmx_intel_pt()) + pt_mode = PT_MODE_SYSTEM; + if (nested) { nested_vmx_setup_ctls_msrs(&vmcs_config.nested, vmx_capability.ept, enable_apicv); diff --git a/arch/x86/kvm/vmx/vmx.h b/arch/x86/kvm/vmx/vmx.h index f932d7c971e9..86eb9c887386 100644 --- a/arch/x86/kvm/vmx/vmx.h +++ b/arch/x86/kvm/vmx/vmx.h @@ -5,6 +5,7 @@ #include #include +#include #include "capabilities.h" #include "ops.h" @@ -421,13 +422,19 @@ static inline void vmx_segment_cache_clear(struct vcpu_vmx *vmx) static inline u32 vmx_vmentry_ctrl(void) { + u32 vmentry_ctrl = vmcs_config.vmentry_ctrl; + if (pt_mode == PT_MODE_SYSTEM) + vmentry_ctrl &= ~(VM_EXIT_PT_CONCEAL_PIP | VM_EXIT_CLEAR_IA32_RTIT_CTL); /* Loading of EFER and PERF_GLOBAL_CTRL are toggled dynamically */ - return vmcs_config.vmentry_ctrl & + return vmentry_ctrl & ~(VM_ENTRY_LOAD_IA32_PERF_GLOBAL_CTRL | VM_ENTRY_LOAD_IA32_EFER); } static inline u32 vmx_vmexit_ctrl(void) { + u32 vmexit_ctrl = vmcs_config.vmexit_ctrl; + if (pt_mode == PT_MODE_SYSTEM) + vmexit_ctrl &= ~(VM_ENTRY_PT_CONCEAL_PIP | VM_ENTRY_LOAD_IA32_RTIT_CTL); /* Loading of EFER and PERF_GLOBAL_CTRL are toggled dynamically */ return vmcs_config.vmexit_ctrl & ~(VM_EXIT_LOAD_IA32_PERF_GLOBAL_CTRL | VM_EXIT_LOAD_IA32_EFER); -- 2.30.2