x86/mm: Add .bss..decrypted section to hold shared variables
authorBrijesh Singh <brijesh.singh@amd.com>
Fri, 14 Sep 2018 13:45:58 +0000 (08:45 -0500)
committerThomas Gleixner <tglx@linutronix.de>
Sat, 15 Sep 2018 18:48:45 +0000 (20:48 +0200)
kvmclock defines few static variables which are shared with the
hypervisor during the kvmclock initialization.

When SEV is active, memory is encrypted with a guest-specific key, and
if the guest OS wants to share the memory region with the hypervisor
then it must clear the C-bit before sharing it.

Currently, we use kernel_physical_mapping_init() to split large pages
before clearing the C-bit on shared pages. But it fails when called from
the kvmclock initialization (mainly because the memblock allocator is
not ready that early during boot).

Add a __bss_decrypted section attribute which can be used when defining
such shared variable. The so-defined variables will be placed in the
.bss..decrypted section. This section will be mapped with C=0 early
during boot.

The .bss..decrypted section has a big chunk of memory that may be unused
when memory encryption is not active, free it when memory encryption is
not active.

Suggested-by: Thomas Gleixner <tglx@linutronix.de>
Signed-off-by: Brijesh Singh <brijesh.singh@amd.com>
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
Cc: Tom Lendacky <thomas.lendacky@amd.com>
Cc: Borislav Petkov <bp@suse.de>
Cc: "H. Peter Anvin" <hpa@zytor.com>
Cc: Paolo Bonzini <pbonzini@redhat.com>
Cc: Sean Christopherson <sean.j.christopherson@intel.com>
Cc: Radim Krčmář<rkrcmar@redhat.com>
Cc: kvm@vger.kernel.org
Link: https://lkml.kernel.org/r/1536932759-12905-2-git-send-email-brijesh.singh@amd.com
arch/x86/include/asm/mem_encrypt.h
arch/x86/kernel/head64.c
arch/x86/kernel/vmlinux.lds.S
arch/x86/mm/init.c
arch/x86/mm/mem_encrypt.c

index c0643831706e12aaf490c709dbedc428d2bc75f4..616f8e637bc32c2bd48dfc9964c6e94cb6cbc2cb 100644 (file)
@@ -48,10 +48,13 @@ int __init early_set_memory_encrypted(unsigned long vaddr, unsigned long size);
 
 /* Architecture __weak replacement functions */
 void __init mem_encrypt_init(void);
+void __init mem_encrypt_free_decrypted_mem(void);
 
 bool sme_active(void);
 bool sev_active(void);
 
+#define __bss_decrypted __attribute__((__section__(".bss..decrypted")))
+
 #else  /* !CONFIG_AMD_MEM_ENCRYPT */
 
 #define sme_me_mask    0ULL
@@ -77,6 +80,8 @@ early_set_memory_decrypted(unsigned long vaddr, unsigned long size) { return 0;
 static inline int __init
 early_set_memory_encrypted(unsigned long vaddr, unsigned long size) { return 0; }
 
+#define __bss_decrypted
+
 #endif /* CONFIG_AMD_MEM_ENCRYPT */
 
 /*
@@ -88,6 +93,8 @@ early_set_memory_encrypted(unsigned long vaddr, unsigned long size) { return 0;
 #define __sme_pa(x)            (__pa(x) | sme_me_mask)
 #define __sme_pa_nodebug(x)    (__pa_nodebug(x) | sme_me_mask)
 
+extern char __start_bss_decrypted[], __end_bss_decrypted[], __start_bss_decrypted_unused[];
+
 #endif /* __ASSEMBLY__ */
 
 #endif /* __X86_MEM_ENCRYPT_H__ */
index 8047379e575ad39cb47cdbb055131e9bb094bb4d..c16af27eb23fe01b0597b5d8b313885f2ee1ff85 100644 (file)
@@ -112,6 +112,7 @@ static bool __head check_la57_support(unsigned long physaddr)
 unsigned long __head __startup_64(unsigned long physaddr,
                                  struct boot_params *bp)
 {
+       unsigned long vaddr, vaddr_end;
        unsigned long load_delta, *p;
        unsigned long pgtable_flags;
        pgdval_t *pgd;
@@ -234,6 +235,21 @@ unsigned long __head __startup_64(unsigned long physaddr,
        /* Encrypt the kernel and related (if SME is active) */
        sme_encrypt_kernel(bp);
 
+       /*
+        * Clear the memory encryption mask from the .bss..decrypted section.
+        * The bss section will be memset to zero later in the initialization so
+        * there is no need to zero it after changing the memory encryption
+        * attribute.
+        */
+       if (mem_encrypt_active()) {
+               vaddr = (unsigned long)__start_bss_decrypted;
+               vaddr_end = (unsigned long)__end_bss_decrypted;
+               for (; vaddr < vaddr_end; vaddr += PMD_SIZE) {
+                       i = pmd_index(vaddr);
+                       pmd[i] -= sme_get_me_mask();
+               }
+       }
+
        /*
         * Return the SME encryption mask (if SME is active) to be used as a
         * modifier for the initial pgdir entry programmed into CR3.
index 8bde0a419f8689620db0a34cc9df0eaa135132c6..5dd3317d761f4065b0fc8f7cdcac4e08be600f0e 100644 (file)
@@ -65,6 +65,23 @@ jiffies_64 = jiffies;
 #define ALIGN_ENTRY_TEXT_BEGIN . = ALIGN(PMD_SIZE);
 #define ALIGN_ENTRY_TEXT_END   . = ALIGN(PMD_SIZE);
 
+/*
+ * This section contains data which will be mapped as decrypted. Memory
+ * encryption operates on a page basis. Make this section PMD-aligned
+ * to avoid splitting the pages while mapping the section early.
+ *
+ * Note: We use a separate section so that only this section gets
+ * decrypted to avoid exposing more than we wish.
+ */
+#define BSS_DECRYPTED                                          \
+       . = ALIGN(PMD_SIZE);                                    \
+       __start_bss_decrypted = .;                              \
+       *(.bss..decrypted);                                     \
+       . = ALIGN(PAGE_SIZE);                                   \
+       __start_bss_decrypted_unused = .;                       \
+       . = ALIGN(PMD_SIZE);                                    \
+       __end_bss_decrypted = .;                                \
+
 #else
 
 #define X86_ALIGN_RODATA_BEGIN
@@ -74,6 +91,7 @@ jiffies_64 = jiffies;
 
 #define ALIGN_ENTRY_TEXT_BEGIN
 #define ALIGN_ENTRY_TEXT_END
+#define BSS_DECRYPTED
 
 #endif
 
@@ -355,6 +373,7 @@ SECTIONS
                __bss_start = .;
                *(.bss..page_aligned)
                *(.bss)
+               BSS_DECRYPTED
                . = ALIGN(PAGE_SIZE);
                __bss_stop = .;
        }
index 7a8fc26c11155edac84372e2af7133e0bea07c36..faca978ebf9d8b46b6437f908b9ae506d3489da2 100644 (file)
@@ -815,10 +815,14 @@ void free_kernel_image_pages(void *begin, void *end)
                set_memory_np_noalias(begin_ul, len_pages);
 }
 
+void __weak mem_encrypt_free_decrypted_mem(void) { }
+
 void __ref free_initmem(void)
 {
        e820__reallocate_tables();
 
+       mem_encrypt_free_decrypted_mem();
+
        free_kernel_image_pages(&__init_begin, &__init_end);
 }
 
index b2de398d1fd3380003860215a22686cde5bdcada..006f373f54aba1c06f352886034a894216abb082 100644 (file)
@@ -348,6 +348,30 @@ bool sev_active(void)
 EXPORT_SYMBOL(sev_active);
 
 /* Architecture __weak replacement functions */
+void __init mem_encrypt_free_decrypted_mem(void)
+{
+       unsigned long vaddr, vaddr_end, npages;
+       int r;
+
+       vaddr = (unsigned long)__start_bss_decrypted_unused;
+       vaddr_end = (unsigned long)__end_bss_decrypted;
+       npages = (vaddr_end - vaddr) >> PAGE_SHIFT;
+
+       /*
+        * The unused memory range was mapped decrypted, change the encryption
+        * attribute from decrypted to encrypted before freeing it.
+        */
+       if (mem_encrypt_active()) {
+               r = set_memory_encrypted(vaddr, npages);
+               if (r) {
+                       pr_warn("failed to free unused decrypted pages\n");
+                       return;
+               }
+       }
+
+       free_init_pages("unused decrypted", vaddr, vaddr_end);
+}
+
 void __init mem_encrypt_init(void)
 {
        if (!sme_me_mask)