x86/kvm: Use __bss_decrypted attribute in shared variables
authorBrijesh Singh <brijesh.singh@amd.com>
Fri, 14 Sep 2018 13:45:59 +0000 (08:45 -0500)
committerThomas Gleixner <tglx@linutronix.de>
Sat, 15 Sep 2018 18:48:46 +0000 (20:48 +0200)
The recent removal of the memblock dependency from kvmclock caused a SEV
guest regression because the wall_clock and hv_clock_boot variables are
no longer mapped decrypted when SEV is active.

Use the __bss_decrypted attribute to put the static wall_clock and
hv_clock_boot in the .bss..decrypted section so that they are mapped
decrypted during boot.

In the preparatory stage of CPU hotplug, the per-cpu pvclock data pointer
assigns either an element of the static array or dynamically allocated
memory for the pvclock data pointer. The static array are now mapped
decrypted but the dynamically allocated memory is not mapped decrypted.
However, when SEV is active this memory range must be mapped decrypted.

Add a function which is called after the page allocator is up, and
allocate memory for the pvclock data pointers for the all possible cpus.
Map this memory range as decrypted when SEV is active.

Fixes: 368a540e0232 ("x86/kvmclock: Remove memblock dependency")
Suggested-by: Thomas Gleixner <tglx@linutronix.de>
Signed-off-by: Brijesh Singh <brijesh.singh@amd.com>
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
Cc: Tom Lendacky <thomas.lendacky@amd.com>
Cc: Borislav Petkov <bp@suse.de>
Cc: "H. Peter Anvin" <hpa@zytor.com>
Cc: Paolo Bonzini <pbonzini@redhat.com>
Cc: Sean Christopherson <sean.j.christopherson@intel.com>
Cc: "Radim Krčmář" <rkrcmar@redhat.com>
Cc: kvm@vger.kernel.org
Link: https://lkml.kernel.org/r/1536932759-12905-3-git-send-email-brijesh.singh@amd.com
arch/x86/kernel/kvmclock.c

index 1e6764648af3ed9d49c0714002587031b2555563..013fe3d21dbb3f4d5f834f74e0ca3d791a6f0b06 100644 (file)
@@ -28,6 +28,7 @@
 #include <linux/sched/clock.h>
 #include <linux/mm.h>
 #include <linux/slab.h>
+#include <linux/set_memory.h>
 
 #include <asm/hypervisor.h>
 #include <asm/mem_encrypt.h>
@@ -61,9 +62,10 @@ early_param("no-kvmclock-vsyscall", parse_no_kvmclock_vsyscall);
        (PAGE_SIZE / sizeof(struct pvclock_vsyscall_time_info))
 
 static struct pvclock_vsyscall_time_info
-                       hv_clock_boot[HVC_BOOT_ARRAY_SIZE] __aligned(PAGE_SIZE);
-static struct pvclock_wall_clock wall_clock;
+                       hv_clock_boot[HVC_BOOT_ARRAY_SIZE] __bss_decrypted __aligned(PAGE_SIZE);
+static struct pvclock_wall_clock wall_clock __bss_decrypted;
 static DEFINE_PER_CPU(struct pvclock_vsyscall_time_info *, hv_clock_per_cpu);
+static struct pvclock_vsyscall_time_info *hvclock_mem;
 
 static inline struct pvclock_vcpu_time_info *this_cpu_pvti(void)
 {
@@ -236,6 +238,45 @@ static void kvm_shutdown(void)
        native_machine_shutdown();
 }
 
+static void __init kvmclock_init_mem(void)
+{
+       unsigned long ncpus;
+       unsigned int order;
+       struct page *p;
+       int r;
+
+       if (HVC_BOOT_ARRAY_SIZE >= num_possible_cpus())
+               return;
+
+       ncpus = num_possible_cpus() - HVC_BOOT_ARRAY_SIZE;
+       order = get_order(ncpus * sizeof(*hvclock_mem));
+
+       p = alloc_pages(GFP_KERNEL, order);
+       if (!p) {
+               pr_warn("%s: failed to alloc %d pages", __func__, (1U << order));
+               return;
+       }
+
+       hvclock_mem = page_address(p);
+
+       /*
+        * hvclock is shared between the guest and the hypervisor, must
+        * be mapped decrypted.
+        */
+       if (sev_active()) {
+               r = set_memory_decrypted((unsigned long) hvclock_mem,
+                                        1UL << order);
+               if (r) {
+                       __free_pages(p, order);
+                       hvclock_mem = NULL;
+                       pr_warn("kvmclock: set_memory_decrypted() failed. Disabling\n");
+                       return;
+               }
+       }
+
+       memset(hvclock_mem, 0, PAGE_SIZE << order);
+}
+
 static int __init kvm_setup_vsyscall_timeinfo(void)
 {
 #ifdef CONFIG_X86_64
@@ -250,6 +291,9 @@ static int __init kvm_setup_vsyscall_timeinfo(void)
 
        kvm_clock.archdata.vclock_mode = VCLOCK_PVCLOCK;
 #endif
+
+       kvmclock_init_mem();
+
        return 0;
 }
 early_initcall(kvm_setup_vsyscall_timeinfo);
@@ -269,8 +313,10 @@ static int kvmclock_setup_percpu(unsigned int cpu)
        /* Use the static page for the first CPUs, allocate otherwise */
        if (cpu < HVC_BOOT_ARRAY_SIZE)
                p = &hv_clock_boot[cpu];
+       else if (hvclock_mem)
+               p = hvclock_mem + cpu - HVC_BOOT_ARRAY_SIZE;
        else
-               p = kzalloc(sizeof(*p), GFP_KERNEL);
+               return -ENOMEM;
 
        per_cpu(hv_clock_per_cpu, cpu) = p;
        return p ? 0 : -ENOMEM;