return ((ptel & HPTE_R_RPN) & ~(psize - 1)) >> PAGE_SHIFT;
}
+static inline int hpte_is_writable(unsigned long ptel)
+{
+ unsigned long pp = ptel & (HPTE_R_PP0 | HPTE_R_PP);
+
+ return pp != PP_RXRX && pp != PP_RXXX;
+}
+
+static inline unsigned long hpte_make_readonly(unsigned long ptel)
+{
+ if ((ptel & HPTE_R_PP0) || (ptel & HPTE_R_PP) == PP_RWXX)
+ ptel = (ptel & ~HPTE_R_PP) | PP_RXXX;
+ else
+ ptel |= PP_RXRX;
+ return ptel;
+}
+
static inline int hpte_cache_flags_ok(unsigned long ptel, unsigned long io_type)
{
unsigned int wimg = ptel & HPTE_R_WIMG;
* Lock and read a linux PTE. If it's present and writable, atomically
* set dirty and referenced bits and return the PTE, otherwise return 0.
*/
-static inline pte_t kvmppc_read_update_linux_pte(pte_t *p)
+static inline pte_t kvmppc_read_update_linux_pte(pte_t *p, int writing)
{
pte_t pte, tmp;
if (pte_present(pte)) {
pte = pte_mkyoung(pte);
- if (pte_write(pte))
+ if (writing && pte_write(pte))
pte = pte_mkdirty(pte);
}
struct page *page, *pages[1];
long index, ret, npages;
unsigned long is_io;
+ unsigned int writing, write_ok;
struct vm_area_struct *vma;
/*
pfn = 0;
page = NULL;
pte_size = PAGE_SIZE;
+ writing = (dsisr & DSISR_ISSTORE) != 0;
+ /* If writing != 0, then the HPTE must allow writing, if we get here */
+ write_ok = writing;
hva = gfn_to_hva_memslot(memslot, gfn);
- npages = get_user_pages_fast(hva, 1, 1, pages);
+ npages = get_user_pages_fast(hva, 1, writing, pages);
if (npages < 1) {
/* Check if it's an I/O mapping */
down_read(¤t->mm->mmap_sem);
((hva - vma->vm_start) >> PAGE_SHIFT);
pte_size = psize;
is_io = hpte_cache_bits(pgprot_val(vma->vm_page_prot));
+ write_ok = vma->vm_flags & VM_WRITE;
}
up_read(¤t->mm->mmap_sem);
if (!pfn)
page = compound_head(page);
pte_size <<= compound_order(page);
}
+ /* if the guest wants write access, see if that is OK */
+ if (!writing && hpte_is_writable(r)) {
+ pte_t *ptep, pte;
+
+ /*
+ * We need to protect against page table destruction
+ * while looking up and updating the pte.
+ */
+ rcu_read_lock_sched();
+ ptep = find_linux_pte_or_hugepte(current->mm->pgd,
+ hva, NULL);
+ if (ptep && pte_present(*ptep)) {
+ pte = kvmppc_read_update_linux_pte(ptep, 1);
+ if (pte_write(pte))
+ write_ok = 1;
+ }
+ rcu_read_unlock_sched();
+ }
pfn = page_to_pfn(page);
}
/* Set the HPTE to point to pfn */
r = (r & ~(HPTE_R_PP0 - pte_size)) | (pfn << PAGE_SHIFT);
+ if (hpte_is_writable(r) && !write_ok)
+ r = hpte_make_readonly(r);
ret = RESUME_GUEST;
preempt_disable();
while (!try_lock_hpte(hptep, HPTE_V_HVLOCK))
unlock_rmap(rmap);
goto out_unlock;
}
- kvmppc_add_revmap_chain(kvm, rev, rmap, index, 0);
+
+ if (hptep[0] & HPTE_V_VALID) {
+ /* HPTE was previously valid, so we need to invalidate it */
+ unlock_rmap(rmap);
+ hptep[0] |= HPTE_V_ABSENT;
+ kvmppc_invalidate_hpte(kvm, hptep, index);
+ } else {
+ kvmppc_add_revmap_chain(kvm, rev, rmap, index, 0);
+ }
hptep[1] = r;
eieio();
hptep[0] = hpte[0];
asm volatile("ptesync" : : : "memory");
preempt_enable();
- if (page)
+ if (page && hpte_is_writable(r))
SetPageDirty(page);
out_put:
}
static pte_t lookup_linux_pte(struct kvm_vcpu *vcpu, unsigned long hva,
- unsigned long *pte_sizep)
+ int writing, unsigned long *pte_sizep)
{
pte_t *ptep;
unsigned long ps = *pte_sizep;
return __pte(0);
if (!pte_present(*ptep))
return __pte(0);
- return kvmppc_read_update_linux_pte(ptep);
+ return kvmppc_read_update_linux_pte(ptep, writing);
}
long kvmppc_h_enter(struct kvm_vcpu *vcpu, unsigned long flags,
unsigned long is_io;
unsigned long *rmap;
pte_t pte;
+ unsigned int writing;
unsigned long mmu_seq;
bool realmode = vcpu->arch.vcore->vcore_state == VCORE_RUNNING;
psize = hpte_page_size(pteh, ptel);
if (!psize)
return H_PARAMETER;
+ writing = hpte_is_writable(ptel);
pteh &= ~(HPTE_V_HVLOCK | HPTE_V_ABSENT | HPTE_V_VALID);
/* used later to detect if we might have been invalidated */
/* Look up the Linux PTE for the backing page */
pte_size = psize;
- pte = lookup_linux_pte(vcpu, hva, &pte_size);
+ pte = lookup_linux_pte(vcpu, hva, writing, &pte_size);
if (pte_present(pte)) {
+ if (writing && !pte_write(pte))
+ /* make the actual HPTE be read-only */
+ ptel = hpte_make_readonly(ptel);
is_io = hpte_cache_bits(pte_val(pte));
pa = pte_pfn(pte) << PAGE_SHIFT;
}
/*
* Called in real mode to check whether an HPTE not found fault
- * is due to accessing a paged-out page or an emulated MMIO page.
+ * is due to accessing a paged-out page or an emulated MMIO page,
+ * or if a protection fault is due to accessing a page that the
+ * guest wanted read/write access to but which we made read-only.
* Returns a possibly modified status (DSISR) value if not
* (i.e. pass the interrupt to the guest),
* -1 to pass the fault up to host kernel mode code, -2 to do that
struct revmap_entry *rev;
unsigned long pp, key;
- valid = HPTE_V_VALID | HPTE_V_ABSENT;
+ /* For protection fault, expect to find a valid HPTE */
+ valid = HPTE_V_VALID;
+ if (status & DSISR_NOHPTE)
+ valid |= HPTE_V_ABSENT;
index = kvmppc_hv_find_lock_hpte(kvm, addr, slb_v, valid);
- if (index < 0)
- return status; /* there really was no HPTE */
-
+ if (index < 0) {
+ if (status & DSISR_NOHPTE)
+ return status; /* there really was no HPTE */
+ return 0; /* for prot fault, HPTE disappeared */
+ }
hpte = (unsigned long *)(kvm->arch.hpt_virt + (index << 4));
v = hpte[0] & ~HPTE_V_HVLOCK;
r = hpte[1];
asm volatile("lwsync" : : : "memory");
hpte[0] = v;
- /* If the HPTE is valid by now, retry the instruction */
- if (v & HPTE_V_VALID)
+ /* For not found, if the HPTE is valid by now, retry the instruction */
+ if ((status & DSISR_NOHPTE) && (v & HPTE_V_VALID))
return 0;
/* Check access permissions to the page */
kvmppc_hdsi:
mfspr r4, SPRN_HDAR
mfspr r6, SPRN_HDSISR
- /* HPTE not found fault? */
- andis. r0, r6, DSISR_NOHPTE@h
+ /* HPTE not found fault or protection fault? */
+ andis. r0, r6, (DSISR_NOHPTE | DSISR_PROTFAULT)@h
beq 1f /* if not, send it to the guest */
andi. r0, r11, MSR_DR /* data relocation enabled? */
beq 3f