KVM: PPC: Book3S HV: Implement functions to access quadrants 1 & 2
authorSuraj Jitindar Singh <sjitindarsingh@gmail.com>
Fri, 14 Dec 2018 05:29:05 +0000 (16:29 +1100)
committerPaul Mackerras <paulus@ozlabs.org>
Mon, 17 Dec 2018 00:33:50 +0000 (11:33 +1100)
The POWER9 radix mmu has the concept of quadrants. The quadrant number
is the two high bits of the effective address and determines the fully
qualified address to be used for the translation. The fully qualified
address consists of the effective lpid, the effective pid and the
effective address. This gives then 4 possible quadrants 0, 1, 2, and 3.

When accessing these quadrants the fully qualified address is obtained
as follows:

Quadrant | Hypervisor | Guest
--------------------------------------------------------------------------
| EA[0:1] = 0b00 | EA[0:1] = 0b00
0 | effLPID = 0 | effLPID = LPIDR
| effPID  = PIDR | effPID  = PIDR
--------------------------------------------------------------------------
| EA[0:1] = 0b01 |
1 | effLPID = LPIDR | Invalid Access
| effPID  = PIDR |
--------------------------------------------------------------------------
| EA[0:1] = 0b10 |
2 | effLPID = LPIDR | Invalid Access
| effPID  = 0 |
--------------------------------------------------------------------------
| EA[0:1] = 0b11 | EA[0:1] = 0b11
3 | effLPID = 0 | effLPID = LPIDR
| effPID  = 0 | effPID  = 0
--------------------------------------------------------------------------

In the Guest;
Quadrant 3 is normally used to address the operating system since this
uses effPID=0 and effLPID=LPIDR, meaning the PID register doesn't need to
be switched.
Quadrant 0 is normally used to address user space since the effLPID and
effPID are taken from the corresponding registers.

In the Host;
Quadrant 0 and 3 are used as above, however the effLPID is always 0 to
address the host.

Quadrants 1 and 2 can be used by the host to address guest memory using
a guest effective address. Since the effLPID comes from the LPID register,
the host loads the LPID of the guest it would like to access (and the
PID of the process) and can perform accesses to a guest effective
address.

This means quadrant 1 can be used to address the guest user space and
quadrant 2 can be used to address the guest operating system from the
hypervisor, using a guest effective address.

Access to the quadrants can cause a Hypervisor Data Storage Interrupt
(HDSI) due to being unable to perform partition scoped translation.
Previously this could only be generated from a guest and so the code
path expects us to take the KVM trampoline in the interrupt handler.
This is no longer the case so we modify the handler to call
bad_page_fault() to check if we were expecting this fault so we can
handle it gracefully and just return with an error code. In the hash mmu
case we still raise an unknown exception since quadrants aren't defined
for the hash mmu.

Signed-off-by: Suraj Jitindar Singh <sjitindarsingh@gmail.com>
Signed-off-by: Paul Mackerras <paulus@ozlabs.org>
arch/powerpc/include/asm/kvm_book3s.h
arch/powerpc/kernel/exceptions-64s.S
arch/powerpc/kvm/book3s_64_mmu_radix.c
arch/powerpc/mm/fault.c

index f8a5ac85a7df50523d40565fb5bd25afca10a1d0..b25a3f18b301cfbaf57e4c54b79d0c86609efb00 100644 (file)
@@ -188,6 +188,10 @@ extern int kvmppc_book3s_hcall_implemented(struct kvm *kvm, unsigned long hc);
 extern int kvmppc_book3s_radix_page_fault(struct kvm_run *run,
                        struct kvm_vcpu *vcpu,
                        unsigned long ea, unsigned long dsisr);
+extern long kvmhv_copy_from_guest_radix(struct kvm_vcpu *vcpu, gva_t eaddr,
+                                       void *to, unsigned long n);
+extern long kvmhv_copy_to_guest_radix(struct kvm_vcpu *vcpu, gva_t eaddr,
+                                     void *from, unsigned long n);
 extern int kvmppc_mmu_walk_radix_tree(struct kvm_vcpu *vcpu, gva_t eaddr,
                                      struct kvmppc_pte *gpte, u64 root,
                                      u64 *pte_ret_p);
index 89d32bb79d5eeb91520e78364bca1590f7b265b1..db2691ff4c0bf67b6f907adaceb3c87c42bf9f89 100644 (file)
@@ -995,7 +995,16 @@ EXC_COMMON_BEGIN(h_data_storage_common)
        bl      save_nvgprs
        RECONCILE_IRQ_STATE(r10, r11)
        addi    r3,r1,STACK_FRAME_OVERHEAD
+BEGIN_MMU_FTR_SECTION
+       ld      r4,PACA_EXGEN+EX_DAR(r13)
+       lwz     r5,PACA_EXGEN+EX_DSISR(r13)
+       std     r4,_DAR(r1)
+       std     r5,_DSISR(r1)
+       li      r5,SIGSEGV
+       bl      bad_page_fault
+MMU_FTR_SECTION_ELSE
        bl      unknown_exception
+ALT_MMU_FTR_SECTION_END_IFSET(MMU_FTR_TYPE_RADIX)
        b       ret_from_except
 
 
index d675ad92c7adb5d1e253219db1f30dab89bdc9c7..c3f85c1b60d65c07d8e6f6a5d2911104274ac4b0 100644 (file)
  */
 static int p9_supported_radix_bits[4] = { 5, 9, 9, 13 };
 
+static unsigned long __kvmhv_copy_tofrom_guest_radix(int lpid, int pid,
+                                       gva_t eaddr, void *to, void *from,
+                                       unsigned long n)
+{
+       unsigned long quadrant, ret = n;
+       int old_pid, old_lpid;
+       bool is_load = !!to;
+
+       /* Can't access quadrants 1 or 2 in non-HV mode */
+       if (kvmhv_on_pseries()) {
+               /* TODO h-call */
+               return -EPERM;
+       }
+
+       quadrant = 1;
+       if (!pid)
+               quadrant = 2;
+       if (is_load)
+               from = (void *) (eaddr | (quadrant << 62));
+       else
+               to = (void *) (eaddr | (quadrant << 62));
+
+       preempt_disable();
+
+       /* switch the lpid first to avoid running host with unallocated pid */
+       old_lpid = mfspr(SPRN_LPID);
+       if (old_lpid != lpid)
+               mtspr(SPRN_LPID, lpid);
+       if (quadrant == 1) {
+               old_pid = mfspr(SPRN_PID);
+               if (old_pid != pid)
+                       mtspr(SPRN_PID, pid);
+       }
+       isync();
+
+       pagefault_disable();
+       if (is_load)
+               ret = raw_copy_from_user(to, from, n);
+       else
+               ret = raw_copy_to_user(to, from, n);
+       pagefault_enable();
+
+       /* switch the pid first to avoid running host with unallocated pid */
+       if (quadrant == 1 && pid != old_pid)
+               mtspr(SPRN_PID, old_pid);
+       if (lpid != old_lpid)
+               mtspr(SPRN_LPID, old_lpid);
+       isync();
+
+       preempt_enable();
+
+       return ret;
+}
+
+static long kvmhv_copy_tofrom_guest_radix(struct kvm_vcpu *vcpu, gva_t eaddr,
+                                         void *to, void *from, unsigned long n)
+{
+       int lpid = vcpu->kvm->arch.lpid;
+       int pid = vcpu->arch.pid;
+
+       /* This would cause a data segment intr so don't allow the access */
+       if (eaddr & (0x3FFUL << 52))
+               return -EINVAL;
+
+       /* Should we be using the nested lpid */
+       if (vcpu->arch.nested)
+               lpid = vcpu->arch.nested->shadow_lpid;
+
+       /* If accessing quadrant 3 then pid is expected to be 0 */
+       if (((eaddr >> 62) & 0x3) == 0x3)
+               pid = 0;
+
+       eaddr &= ~(0xFFFUL << 52);
+
+       return __kvmhv_copy_tofrom_guest_radix(lpid, pid, eaddr, to, from, n);
+}
+
+long kvmhv_copy_from_guest_radix(struct kvm_vcpu *vcpu, gva_t eaddr, void *to,
+                                unsigned long n)
+{
+       long ret;
+
+       ret = kvmhv_copy_tofrom_guest_radix(vcpu, eaddr, to, NULL, n);
+       if (ret > 0)
+               memset(to + (n - ret), 0, ret);
+
+       return ret;
+}
+EXPORT_SYMBOL_GPL(kvmhv_copy_from_guest_radix);
+
+long kvmhv_copy_to_guest_radix(struct kvm_vcpu *vcpu, gva_t eaddr, void *from,
+                              unsigned long n)
+{
+       return kvmhv_copy_tofrom_guest_radix(vcpu, eaddr, NULL, from, n);
+}
+EXPORT_SYMBOL_GPL(kvmhv_copy_to_guest_radix);
+
 int kvmppc_mmu_walk_radix_tree(struct kvm_vcpu *vcpu, gva_t eaddr,
                               struct kvmppc_pte *gpte, u64 root,
                               u64 *pte_ret_p)
index 1697e903bbf28b944d3f6538875230825ab262df..2e6fb1d758c33c35de05720606c777ba3eb88482 100644 (file)
@@ -636,6 +636,7 @@ void bad_page_fault(struct pt_regs *regs, unsigned long address, int sig)
        switch (TRAP(regs)) {
        case 0x300:
        case 0x380:
+       case 0xe00:
                printk(KERN_ALERT "Unable to handle kernel paging request for "
                        "data at address 0x%08lx\n", regs->dar);
                break;