From 7313d5217e6b9817897172d6a6ff477bdc415ed6 Mon Sep 17 00:00:00 2001 From: Rusty Russell Date: Wed, 11 Feb 2015 15:15:10 +1030 Subject: [PATCH] lguest: add iomem region, where guest page faults get sent to userspace. This lets us implement PCI. Signed-off-by: Rusty Russell --- drivers/lguest/lg.h | 7 ++++++- drivers/lguest/lguest_user.c | 3 ++- drivers/lguest/page_tables.c | 33 ++++++++++++++++++++++++++++++--- drivers/lguest/x86/core.c | 19 ++++++++++++++++++- tools/lguest/lguest.c | 3 ++- 5 files changed, 58 insertions(+), 7 deletions(-) diff --git a/drivers/lguest/lg.h b/drivers/lguest/lg.h index 9da4f351e077..eb81abc05995 100644 --- a/drivers/lguest/lg.h +++ b/drivers/lguest/lg.h @@ -97,8 +97,12 @@ struct lguest { struct lg_cpu cpus[NR_CPUS]; unsigned int nr_cpus; + /* Valid guest memory pages must be < this. */ u32 pfn_limit; + /* Device memory is >= pfn_limit and < device_limit. */ + u32 device_limit; + /* * This provides the offset to the base of guest-physical memory in the * Launcher. @@ -200,7 +204,8 @@ void guest_pagetable_flush_user(struct lg_cpu *cpu); void guest_set_pte(struct lg_cpu *cpu, unsigned long gpgdir, unsigned long vaddr, pte_t val); void map_switcher_in_guest(struct lg_cpu *cpu, struct lguest_pages *pages); -bool demand_page(struct lg_cpu *cpu, unsigned long cr2, int errcode); +bool demand_page(struct lg_cpu *cpu, unsigned long cr2, int errcode, + unsigned long *iomem); void pin_page(struct lg_cpu *cpu, unsigned long vaddr); bool __guest_pa(struct lg_cpu *cpu, unsigned long vaddr, unsigned long *paddr); unsigned long guest_pa(struct lg_cpu *cpu, unsigned long vaddr); diff --git a/drivers/lguest/lguest_user.c b/drivers/lguest/lguest_user.c index be996d173615..c8b0e8575b44 100644 --- a/drivers/lguest/lguest_user.c +++ b/drivers/lguest/lguest_user.c @@ -385,7 +385,7 @@ static int initialize(struct file *file, const unsigned long __user *input) /* "struct lguest" contains all we (the Host) know about a Guest. */ struct lguest *lg; int err; - unsigned long args[3]; + unsigned long args[4]; /* * We grab the Big Lguest lock, which protects against multiple @@ -419,6 +419,7 @@ static int initialize(struct file *file, const unsigned long __user *input) /* Populate the easy fields of our "struct lguest" */ lg->mem_base = (void __user *)args[0]; lg->pfn_limit = args[1]; + lg->device_limit = args[3]; /* This is the first cpu (cpu 0) and it will start booting at args[2] */ err = lg_cpu_start(&lg->cpus[0], 0, args[2]); diff --git a/drivers/lguest/page_tables.c b/drivers/lguest/page_tables.c index 69c35caa955a..e3abebc912c0 100644 --- a/drivers/lguest/page_tables.c +++ b/drivers/lguest/page_tables.c @@ -250,6 +250,16 @@ static void release_pte(pte_t pte) } /*:*/ +static bool gpte_in_iomem(struct lg_cpu *cpu, pte_t gpte) +{ + /* We don't handle large pages. */ + if (pte_flags(gpte) & _PAGE_PSE) + return false; + + return (pte_pfn(gpte) >= cpu->lg->pfn_limit + && pte_pfn(gpte) < cpu->lg->device_limit); +} + static bool check_gpte(struct lg_cpu *cpu, pte_t gpte) { if ((pte_flags(gpte) & _PAGE_PSE) || @@ -374,8 +384,14 @@ static pte_t *find_spte(struct lg_cpu *cpu, unsigned long vaddr, bool allocate, * * If we fixed up the fault (ie. we mapped the address), this routine returns * true. Otherwise, it was a real fault and we need to tell the Guest. + * + * There's a corner case: they're trying to access memory between + * pfn_limit and device_limit, which is I/O memory. In this case, we + * return false and set @iomem to the physical address, so the the + * Launcher can handle the instruction manually. */ -bool demand_page(struct lg_cpu *cpu, unsigned long vaddr, int errcode) +bool demand_page(struct lg_cpu *cpu, unsigned long vaddr, int errcode, + unsigned long *iomem) { unsigned long gpte_ptr; pte_t gpte; @@ -383,6 +399,8 @@ bool demand_page(struct lg_cpu *cpu, unsigned long vaddr, int errcode) pmd_t gpmd; pgd_t gpgd; + *iomem = 0; + /* We never demand page the Switcher, so trying is a mistake. */ if (vaddr >= switcher_addr) return false; @@ -459,6 +477,12 @@ bool demand_page(struct lg_cpu *cpu, unsigned long vaddr, int errcode) if ((errcode & 4) && !(pte_flags(gpte) & _PAGE_USER)) return false; + /* If they're accessing io memory, we expect a fault. */ + if (gpte_in_iomem(cpu, gpte)) { + *iomem = (pte_pfn(gpte) << PAGE_SHIFT) | (vaddr & ~PAGE_MASK); + return false; + } + /* * Check that the Guest PTE flags are OK, and the page number is below * the pfn_limit (ie. not mapping the Launcher binary). @@ -553,7 +577,9 @@ static bool page_writable(struct lg_cpu *cpu, unsigned long vaddr) */ void pin_page(struct lg_cpu *cpu, unsigned long vaddr) { - if (!page_writable(cpu, vaddr) && !demand_page(cpu, vaddr, 2)) + unsigned long iomem; + + if (!page_writable(cpu, vaddr) && !demand_page(cpu, vaddr, 2, &iomem)) kill_guest(cpu, "bad stack page %#lx", vaddr); } /*:*/ @@ -928,7 +954,8 @@ static void __guest_set_pte(struct lg_cpu *cpu, int idx, * now. This shaves 10% off a copy-on-write * micro-benchmark. */ - if (pte_flags(gpte) & (_PAGE_DIRTY | _PAGE_ACCESSED)) { + if ((pte_flags(gpte) & (_PAGE_DIRTY | _PAGE_ACCESSED)) + && !gpte_in_iomem(cpu, gpte)) { if (!check_gpte(cpu, gpte)) return; set_pte(spte, diff --git a/drivers/lguest/x86/core.c b/drivers/lguest/x86/core.c index 42e87bf14113..18d841e738bc 100644 --- a/drivers/lguest/x86/core.c +++ b/drivers/lguest/x86/core.c @@ -362,9 +362,19 @@ static void setup_emulate_insn(struct lg_cpu *cpu) sizeof(cpu->pending.insn)); } +static void setup_iomem_insn(struct lg_cpu *cpu, unsigned long iomem_addr) +{ + cpu->pending.trap = 14; + cpu->pending.addr = iomem_addr; + copy_from_guest(cpu, cpu->pending.insn, cpu->regs->eip, + sizeof(cpu->pending.insn)); +} + /*H:050 Once we've re-enabled interrupts, we look at why the Guest exited. */ void lguest_arch_handle_trap(struct lg_cpu *cpu) { + unsigned long iomem_addr; + switch (cpu->regs->trapnum) { case 13: /* We've intercepted a General Protection Fault. */ /* Hand to Launcher to emulate those pesky IN and OUT insns */ @@ -385,8 +395,15 @@ void lguest_arch_handle_trap(struct lg_cpu *cpu) * whether kernel or userspace code. */ if (demand_page(cpu, cpu->arch.last_pagefault, - cpu->regs->errcode)) + cpu->regs->errcode, &iomem_addr)) + return; + + /* Was this an access to memory mapped IO? */ + if (iomem_addr) { + /* Tell Launcher, let it handle it. */ + setup_iomem_insn(cpu, iomem_addr); return; + } /* * OK, it's really not there (or not OK): the Guest needs to diff --git a/tools/lguest/lguest.c b/tools/lguest/lguest.c index 485fe13db12e..02f353989e6c 100644 --- a/tools/lguest/lguest.c +++ b/tools/lguest/lguest.c @@ -548,7 +548,8 @@ static void tell_kernel(unsigned long start) { unsigned long args[] = { LHREQ_INITIALIZE, (unsigned long)guest_base, - guest_limit / getpagesize(), start }; + guest_limit / getpagesize(), start, + guest_limit / getpagesize() }; verbose("Guest: %p - %p (%#lx)\n", guest_base, guest_base + guest_limit, guest_limit); lguest_fd = open_or_die("/dev/lguest", O_RDWR); -- 2.30.2