u16 trap_save; /* Used when bad stack is encountered */
u8 irq_soft_mask; /* mask for irq soft masking */
u8 irq_happened; /* irq happened while soft-disabled */
- u8 io_sync; /* writel() needs spin_unlock sync */
u8 irq_work_pending; /* IRQ_WORK interrupt while soft-disable */
- u8 nap_state_lost; /* NV GPR values lost in power7_idle */
#ifdef CONFIG_KVM_BOOK3S_HV_POSSIBLE
u8 pmcregs_in_use; /* pseries puts this in lppaca */
#endif
--- /dev/null
- unsigned int base_shift = (fls(base) - 1) & 31;
+ /*
+ * This file contains the routines for handling the MMU on those
+ * PowerPC implementations where the MMU substantially follows the
+ * architecture specification. This includes the 6xx, 7xx, 7xxx,
+ * and 8260 implementations but excludes the 8xx and 4xx.
+ * -- paulus
+ *
+ * Derived from arch/ppc/mm/init.c:
+ * Copyright (C) 1995-1996 Gary Thomas (gdt@linuxppc.org)
+ *
+ * Modifications by Paul Mackerras (PowerMac) (paulus@cs.anu.edu.au)
+ * and Cort Dougan (PReP) (cort@cs.nmt.edu)
+ * Copyright (C) 1996 Paul Mackerras
+ *
+ * Derived from "arch/i386/mm/init.c"
+ * Copyright (C) 1991, 1992, 1993, 1994 Linus Torvalds
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; either version
+ * 2 of the License, or (at your option) any later version.
+ *
+ */
+
+ #include <linux/kernel.h>
+ #include <linux/mm.h>
+ #include <linux/init.h>
+ #include <linux/highmem.h>
+ #include <linux/memblock.h>
+
+ #include <asm/prom.h>
+ #include <asm/mmu.h>
+ #include <asm/machdep.h>
+ #include <asm/code-patching.h>
+ #include <asm/sections.h>
+
+ #include <mm/mmu_decl.h>
+
+ struct hash_pte *Hash;
+ static unsigned long Hash_size, Hash_mask;
+ unsigned long _SDR1;
+ static unsigned int hash_mb, hash_mb2;
+
+ struct ppc_bat BATS[8][2]; /* 8 pairs of IBAT, DBAT */
+
+ struct batrange { /* stores address ranges mapped by BATs */
+ unsigned long start;
+ unsigned long limit;
+ phys_addr_t phys;
+ } bat_addrs[8];
+
+ /*
+ * Return PA for this VA if it is mapped by a BAT, or 0
+ */
+ phys_addr_t v_block_mapped(unsigned long va)
+ {
+ int b;
+ for (b = 0; b < ARRAY_SIZE(bat_addrs); ++b)
+ if (va >= bat_addrs[b].start && va < bat_addrs[b].limit)
+ return bat_addrs[b].phys + (va - bat_addrs[b].start);
+ return 0;
+ }
+
+ /*
+ * Return VA for a given PA or 0 if not mapped
+ */
+ unsigned long p_block_mapped(phys_addr_t pa)
+ {
+ int b;
+ for (b = 0; b < ARRAY_SIZE(bat_addrs); ++b)
+ if (pa >= bat_addrs[b].phys
+ && pa < (bat_addrs[b].limit-bat_addrs[b].start)
+ +bat_addrs[b].phys)
+ return bat_addrs[b].start+(pa-bat_addrs[b].phys);
+ return 0;
+ }
+
+ static int find_free_bat(void)
+ {
+ int b;
+
+ if (cpu_has_feature(CPU_FTR_601)) {
+ for (b = 0; b < 4; b++) {
+ struct ppc_bat *bat = BATS[b];
+
+ if (!(bat[0].batl & 0x40))
+ return b;
+ }
+ } else {
+ int n = mmu_has_feature(MMU_FTR_USE_HIGH_BATS) ? 8 : 4;
+
+ for (b = 0; b < n; b++) {
+ struct ppc_bat *bat = BATS[b];
+
+ if (!(bat[1].batu & 3))
+ return b;
+ }
+ }
+ return -1;
+ }
+
++/*
++ * This function calculates the size of the larger block usable to map the
++ * beginning of an area based on the start address and size of that area:
++ * - max block size is 8M on 601 and 256 on other 6xx.
++ * - base address must be aligned to the block size. So the maximum block size
++ * is identified by the lowest bit set to 1 in the base address (for instance
++ * if base is 0x16000000, max size is 0x02000000).
++ * - block size has to be a power of two. This is calculated by finding the
++ * highest bit set to 1.
++ */
+ static unsigned int block_size(unsigned long base, unsigned long top)
+ {
+ unsigned int max_size = (cpu_has_feature(CPU_FTR_601) ? 8 : 256) << 20;
- int done;
++ unsigned int base_shift = (ffs(base) - 1) & 31;
+ unsigned int block_shift = (fls(top - base) - 1) & 31;
+
+ return min3(max_size, 1U << base_shift, 1U << block_shift);
+ }
+
+ /*
+ * Set up one of the IBAT (block address translation) register pairs.
+ * The parameters are not checked; in particular size must be a power
+ * of 2 between 128k and 256M.
+ * Only for 603+ ...
+ */
+ static void setibat(int index, unsigned long virt, phys_addr_t phys,
+ unsigned int size, pgprot_t prot)
+ {
+ unsigned int bl = (size >> 17) - 1;
+ int wimgxpp;
+ struct ppc_bat *bat = BATS[index];
+ unsigned long flags = pgprot_val(prot);
+
+ if (!cpu_has_feature(CPU_FTR_NEED_COHERENT))
+ flags &= ~_PAGE_COHERENT;
+
+ wimgxpp = (flags & _PAGE_COHERENT) | (_PAGE_EXEC ? BPP_RX : BPP_XX);
+ bat[0].batu = virt | (bl << 2) | 2; /* Vs=1, Vp=0 */
+ bat[0].batl = BAT_PHYS_ADDR(phys) | wimgxpp;
+ if (flags & _PAGE_USER)
+ bat[0].batu |= 1; /* Vp = 1 */
+ }
+
+ static void clearibat(int index)
+ {
+ struct ppc_bat *bat = BATS[index];
+
+ bat[0].batu = 0;
+ bat[0].batl = 0;
+ }
+
+ static unsigned long __init __mmu_mapin_ram(unsigned long base, unsigned long top)
+ {
+ int idx;
+
+ while ((idx = find_free_bat()) != -1 && base != top) {
+ unsigned int size = block_size(base, top);
+
+ if (size < 128 << 10)
+ break;
+ setbat(idx, PAGE_OFFSET + base, base, size, PAGE_KERNEL_X);
+ base += size;
+ }
+
+ return base;
+ }
+
+ unsigned long __init mmu_mapin_ram(unsigned long base, unsigned long top)
+ {
- if (done != border - base)
++ unsigned long done;
+ unsigned long border = (unsigned long)__init_begin - PAGE_OFFSET;
+
+ if (__map_without_bats) {
+ pr_debug("RAM mapped without BATs\n");
+ return base;
+ }
+
+ if (!strict_kernel_rwx_enabled() || base >= border || top <= border)
+ return __mmu_mapin_ram(base, top);
+
+ done = __mmu_mapin_ram(base, border);
- return done + __mmu_mapin_ram(border, top);
++ if (done != border)
+ return done;
+
++ return __mmu_mapin_ram(border, top);
+ }
+
+ void mmu_mark_initmem_nx(void)
+ {
+ int nb = mmu_has_feature(MMU_FTR_USE_HIGH_BATS) ? 8 : 4;
+ int i;
+ unsigned long base = (unsigned long)_stext - PAGE_OFFSET;
+ unsigned long top = (unsigned long)_etext - PAGE_OFFSET;
+ unsigned long size;
+
+ if (cpu_has_feature(CPU_FTR_601))
+ return;
+
+ for (i = 0; i < nb - 1 && base < top && top - base > (128 << 10);) {
+ size = block_size(base, top);
+ setibat(i++, PAGE_OFFSET + base, base, size, PAGE_KERNEL_TEXT);
+ base += size;
+ }
+ if (base < top) {
+ size = block_size(base, top);
+ size = max(size, 128UL << 10);
+ if ((top - base) > size) {
+ if (strict_kernel_rwx_enabled())
+ pr_warn("Kernel _etext not properly aligned\n");
+ size <<= 1;
+ }
+ setibat(i++, PAGE_OFFSET + base, base, size, PAGE_KERNEL_TEXT);
+ base += size;
+ }
+ for (; i < nb; i++)
+ clearibat(i);
+
+ update_bats();
+
+ for (i = TASK_SIZE >> 28; i < 16; i++) {
+ /* Do not set NX on VM space for modules */
+ if (IS_ENABLED(CONFIG_MODULES) &&
+ (VMALLOC_START & 0xf0000000) == i << 28)
+ break;
+ mtsrin(mfsrin(i << 28) | 0x10000000, i << 28);
+ }
+ }
+
+ void mmu_mark_rodata_ro(void)
+ {
+ int nb = mmu_has_feature(MMU_FTR_USE_HIGH_BATS) ? 8 : 4;
+ int i;
+
+ if (cpu_has_feature(CPU_FTR_601))
+ return;
+
+ for (i = 0; i < nb; i++) {
+ struct ppc_bat *bat = BATS[i];
+
+ if (bat_addrs[i].start < (unsigned long)__init_begin)
+ bat[1].batl = (bat[1].batl & ~BPP_RW) | BPP_RX;
+ }
+
+ update_bats();
+ }
+
+ /*
+ * Set up one of the I/D BAT (block address translation) register pairs.
+ * The parameters are not checked; in particular size must be a power
+ * of 2 between 128k and 256M.
+ * On 603+, only set IBAT when _PAGE_EXEC is set
+ */
+ void __init setbat(int index, unsigned long virt, phys_addr_t phys,
+ unsigned int size, pgprot_t prot)
+ {
+ unsigned int bl;
+ int wimgxpp;
+ struct ppc_bat *bat = BATS[index];
+ unsigned long flags = pgprot_val(prot);
+
+ if ((flags & _PAGE_NO_CACHE) ||
+ (cpu_has_feature(CPU_FTR_NEED_COHERENT) == 0))
+ flags &= ~_PAGE_COHERENT;
+
+ bl = (size >> 17) - 1;
+ if (PVR_VER(mfspr(SPRN_PVR)) != 1) {
+ /* 603, 604, etc. */
+ /* Do DBAT first */
+ wimgxpp = flags & (_PAGE_WRITETHRU | _PAGE_NO_CACHE
+ | _PAGE_COHERENT | _PAGE_GUARDED);
+ wimgxpp |= (flags & _PAGE_RW)? BPP_RW: BPP_RX;
+ bat[1].batu = virt | (bl << 2) | 2; /* Vs=1, Vp=0 */
+ bat[1].batl = BAT_PHYS_ADDR(phys) | wimgxpp;
+ if (flags & _PAGE_USER)
+ bat[1].batu |= 1; /* Vp = 1 */
+ if (flags & _PAGE_GUARDED) {
+ /* G bit must be zero in IBATs */
+ flags &= ~_PAGE_EXEC;
+ }
+ if (flags & _PAGE_EXEC)
+ bat[0] = bat[1];
+ else
+ bat[0].batu = bat[0].batl = 0;
+ } else {
+ /* 601 cpu */
+ if (bl > BL_8M)
+ bl = BL_8M;
+ wimgxpp = flags & (_PAGE_WRITETHRU | _PAGE_NO_CACHE
+ | _PAGE_COHERENT);
+ wimgxpp |= (flags & _PAGE_RW)?
+ ((flags & _PAGE_USER)? PP_RWRW: PP_RWXX): PP_RXRX;
+ bat->batu = virt | wimgxpp | 4; /* Ks=0, Ku=1 */
+ bat->batl = phys | bl | 0x40; /* V=1 */
+ }
+
+ bat_addrs[index].start = virt;
+ bat_addrs[index].limit = virt + ((bl + 1) << 17) - 1;
+ bat_addrs[index].phys = phys;
+ }
+
+ /*
+ * Preload a translation in the hash table
+ */
+ void hash_preload(struct mm_struct *mm, unsigned long ea,
+ bool is_exec, unsigned long trap)
+ {
+ pmd_t *pmd;
+
+ if (!Hash)
+ return;
+ pmd = pmd_offset(pud_offset(pgd_offset(mm, ea), ea), ea);
+ if (!pmd_none(*pmd))
+ add_hash_page(mm->context.id, ea, pmd_val(*pmd));
+ }
+
+ /*
+ * Initialize the hash table and patch the instructions in hashtable.S.
+ */
+ void __init MMU_init_hw(void)
+ {
+ unsigned int n_hpteg, lg_n_hpteg;
+
+ if (!mmu_has_feature(MMU_FTR_HPTE_TABLE))
+ return;
+
+ if ( ppc_md.progress ) ppc_md.progress("hash:enter", 0x105);
+
+ #define LG_HPTEG_SIZE 6 /* 64 bytes per HPTEG */
+ #define SDR1_LOW_BITS ((n_hpteg - 1) >> 10)
+ #define MIN_N_HPTEG 1024 /* min 64kB hash table */
+
+ /*
+ * Allow 1 HPTE (1/8 HPTEG) for each page of memory.
+ * This is less than the recommended amount, but then
+ * Linux ain't AIX.
+ */
+ n_hpteg = total_memory / (PAGE_SIZE * 8);
+ if (n_hpteg < MIN_N_HPTEG)
+ n_hpteg = MIN_N_HPTEG;
+ lg_n_hpteg = __ilog2(n_hpteg);
+ if (n_hpteg & (n_hpteg - 1)) {
+ ++lg_n_hpteg; /* round up if not power of 2 */
+ n_hpteg = 1 << lg_n_hpteg;
+ }
+ Hash_size = n_hpteg << LG_HPTEG_SIZE;
+
+ /*
+ * Find some memory for the hash table.
+ */
+ if ( ppc_md.progress ) ppc_md.progress("hash:find piece", 0x322);
+ Hash = memblock_alloc(Hash_size, Hash_size);
+ if (!Hash)
+ panic("%s: Failed to allocate %lu bytes align=0x%lx\n",
+ __func__, Hash_size, Hash_size);
+ _SDR1 = __pa(Hash) | SDR1_LOW_BITS;
+
+ pr_info("Total memory = %lldMB; using %ldkB for hash table\n",
+ (unsigned long long)(total_memory >> 20), Hash_size >> 10);
+
+
+ Hash_mask = n_hpteg - 1;
+ hash_mb2 = hash_mb = 32 - LG_HPTEG_SIZE - lg_n_hpteg;
+ if (lg_n_hpteg > 16)
+ hash_mb2 = 16 - LG_HPTEG_SIZE;
+ }
+
+ void __init MMU_init_hw_patch(void)
+ {
+ unsigned int hmask = Hash_mask >> (16 - LG_HPTEG_SIZE);
+
+ if (ppc_md.progress)
+ ppc_md.progress("hash:patch", 0x345);
+ if (ppc_md.progress)
+ ppc_md.progress("hash:done", 0x205);
+
+ /* WARNING: Make sure nothing can trigger a KASAN check past this point */
+
+ /*
+ * Patch up the instructions in hashtable.S:create_hpte
+ */
+ modify_instruction_site(&patch__hash_page_A0, 0xffff,
+ ((unsigned int)Hash - PAGE_OFFSET) >> 16);
+ modify_instruction_site(&patch__hash_page_A1, 0x7c0, hash_mb << 6);
+ modify_instruction_site(&patch__hash_page_A2, 0x7c0, hash_mb2 << 6);
+ modify_instruction_site(&patch__hash_page_B, 0xffff, hmask);
+ modify_instruction_site(&patch__hash_page_C, 0xffff, hmask);
+
+ /*
+ * Patch up the instructions in hashtable.S:flush_hash_page
+ */
+ modify_instruction_site(&patch__flush_hash_A0, 0xffff,
+ ((unsigned int)Hash - PAGE_OFFSET) >> 16);
+ modify_instruction_site(&patch__flush_hash_A1, 0x7c0, hash_mb << 6);
+ modify_instruction_site(&patch__flush_hash_A2, 0x7c0, hash_mb2 << 6);
+ modify_instruction_site(&patch__flush_hash_B, 0xffff, hmask);
+ }
+
+ void setup_initial_memory_limit(phys_addr_t first_memblock_base,
+ phys_addr_t first_memblock_size)
+ {
+ /* We don't currently support the first MEMBLOCK not mapping 0
+ * physical on those processors
+ */
+ BUG_ON(first_memblock_base != 0);
+
+ /* 601 can only access 16MB at the moment */
+ if (PVR_VER(mfspr(SPRN_PVR)) == 1)
+ memblock_set_current_limit(min_t(u64, first_memblock_size, 0x01000000));
+ else /* Anything else has 256M mapped */
+ memblock_set_current_limit(min_t(u64, first_memblock_size, 0x10000000));
+ }
+
+ void __init print_system_hash_info(void)
+ {
+ pr_info("Hash_size = 0x%lx\n", Hash_size);
+ if (Hash_mask)
+ pr_info("Hash_mask = 0x%lx\n", Hash_mask);
+ }
+
+ #ifdef CONFIG_PPC_KUEP
+ void __init setup_kuep(bool disabled)
+ {
+ pr_info("Activating Kernel Userspace Execution Prevention\n");
+
+ if (cpu_has_feature(CPU_FTR_601))
+ pr_warn("KUEP is not working on powerpc 601 (No NX bit in Seg Regs)\n");
+
+ if (disabled)
+ pr_warn("KUEP cannot be disabled yet on 6xx when compiled in\n");
+ }
+ #endif
+
+ #ifdef CONFIG_PPC_KUAP
+ void __init setup_kuap(bool disabled)
+ {
+ pr_info("Activating Kernel Userspace Access Protection\n");
+
+ if (disabled)
+ pr_warn("KUAP cannot be disabled yet on 6xx when compiled in\n");
+ }
+ #endif
--- /dev/null
- struct mm_iommu_table_group_mem_t *mem;
- long i, ret, locked_entries = 0;
+ /*
+ * IOMMU helpers in MMU context.
+ *
+ * Copyright (C) 2015 IBM Corp. <aik@ozlabs.ru>
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; either version
+ * 2 of the License, or (at your option) any later version.
+ *
+ */
+
+ #include <linux/sched/signal.h>
+ #include <linux/slab.h>
+ #include <linux/rculist.h>
+ #include <linux/vmalloc.h>
+ #include <linux/mutex.h>
+ #include <linux/migrate.h>
+ #include <linux/hugetlb.h>
+ #include <linux/swap.h>
+ #include <linux/sizes.h>
+ #include <asm/mmu_context.h>
+ #include <asm/pte-walk.h>
+ #include <linux/mm_inline.h>
+
+ static DEFINE_MUTEX(mem_list_mutex);
+
+ #define MM_IOMMU_TABLE_GROUP_PAGE_DIRTY 0x1
+ #define MM_IOMMU_TABLE_GROUP_PAGE_MASK ~(SZ_4K - 1)
+
+ struct mm_iommu_table_group_mem_t {
+ struct list_head next;
+ struct rcu_head rcu;
+ unsigned long used;
+ atomic64_t mapped;
+ unsigned int pageshift;
+ u64 ua; /* userspace address */
+ u64 entries; /* number of entries in hpas/hpages[] */
+ /*
+ * in mm_iommu_get we temporarily use this to store
+ * struct page address.
+ *
+ * We need to convert ua to hpa in real mode. Make it
+ * simpler by storing physical address.
+ */
+ union {
+ struct page **hpages; /* vmalloc'ed */
+ phys_addr_t *hpas;
+ };
+ #define MM_IOMMU_TABLE_INVALID_HPA ((uint64_t)-1)
+ u64 dev_hpa; /* Device memory base address */
+ };
+
+ static long mm_iommu_adjust_locked_vm(struct mm_struct *mm,
+ unsigned long npages, bool incr)
+ {
+ long ret = 0, locked, lock_limit;
+
+ if (!npages)
+ return 0;
+
+ down_write(&mm->mmap_sem);
+
+ if (incr) {
+ locked = mm->locked_vm + npages;
+ lock_limit = rlimit(RLIMIT_MEMLOCK) >> PAGE_SHIFT;
+ if (locked > lock_limit && !capable(CAP_IPC_LOCK))
+ ret = -ENOMEM;
+ else
+ mm->locked_vm += npages;
+ } else {
+ if (WARN_ON_ONCE(npages > mm->locked_vm))
+ npages = mm->locked_vm;
+ mm->locked_vm -= npages;
+ }
+
+ pr_debug("[%d] RLIMIT_MEMLOCK HASH64 %c%ld %ld/%ld\n",
+ current ? current->pid : 0,
+ incr ? '+' : '-',
+ npages << PAGE_SHIFT,
+ mm->locked_vm << PAGE_SHIFT,
+ rlimit(RLIMIT_MEMLOCK));
+ up_write(&mm->mmap_sem);
+
+ return ret;
+ }
+
+ bool mm_iommu_preregistered(struct mm_struct *mm)
+ {
+ return !list_empty(&mm->context.iommu_group_mem_list);
+ }
+ EXPORT_SYMBOL_GPL(mm_iommu_preregistered);
+
+ static long mm_iommu_do_alloc(struct mm_struct *mm, unsigned long ua,
+ unsigned long entries, unsigned long dev_hpa,
+ struct mm_iommu_table_group_mem_t **pmem)
+ {
-
- mutex_lock(&mem_list_mutex);
-
- list_for_each_entry_rcu(mem, &mm->context.iommu_group_mem_list,
- next) {
- /* Overlap? */
- if ((mem->ua < (ua + (entries << PAGE_SHIFT))) &&
- (ua < (mem->ua +
- (mem->entries << PAGE_SHIFT)))) {
- ret = -EINVAL;
- goto unlock_exit;
- }
-
- }
++ struct mm_iommu_table_group_mem_t *mem, *mem2;
++ long i, ret, locked_entries = 0, pinned = 0;
+ unsigned int pageshift;
- goto unlock_exit;
++ unsigned long entry, chunk;
+
+ if (dev_hpa == MM_IOMMU_TABLE_INVALID_HPA) {
+ ret = mm_iommu_adjust_locked_vm(mm, entries, true);
+ if (ret)
- ret = get_user_pages_longterm(ua, entries, FOLL_WRITE, mem->hpages, NULL);
++ return ret;
+
+ locked_entries = entries;
+ }
+
+ mem = kzalloc(sizeof(*mem), GFP_KERNEL);
+ if (!mem) {
+ ret = -ENOMEM;
+ goto unlock_exit;
+ }
+
+ if (dev_hpa != MM_IOMMU_TABLE_INVALID_HPA) {
+ mem->pageshift = __ffs(dev_hpa | (entries << PAGE_SHIFT));
+ mem->dev_hpa = dev_hpa;
+ goto good_exit;
+ }
+ mem->dev_hpa = MM_IOMMU_TABLE_INVALID_HPA;
+
+ /*
+ * For a starting point for a maximum page size calculation
+ * we use @ua and @entries natural alignment to allow IOMMU pages
+ * smaller than huge pages but still bigger than PAGE_SIZE.
+ */
+ mem->pageshift = __ffs(ua | (entries << PAGE_SHIFT));
+ mem->hpas = vzalloc(array_size(entries, sizeof(mem->hpas[0])));
+ if (!mem->hpas) {
+ kfree(mem);
+ ret = -ENOMEM;
+ goto unlock_exit;
+ }
+
+ down_read(&mm->mmap_sem);
- if (ret != entries) {
- /* free the reference taken */
- for (i = 0; i < ret; i++)
- put_page(mem->hpages[i]);
-
- vfree(mem->hpas);
- kfree(mem);
- ret = -EFAULT;
- goto unlock_exit;
++ chunk = (1UL << (PAGE_SHIFT + MAX_ORDER - 1)) /
++ sizeof(struct vm_area_struct *);
++ chunk = min(chunk, entries);
++ for (entry = 0; entry < entries; entry += chunk) {
++ unsigned long n = min(entries - entry, chunk);
++
++ ret = get_user_pages_longterm(ua + (entry << PAGE_SHIFT), n,
++ FOLL_WRITE, mem->hpages + entry, NULL);
++ if (ret == n) {
++ pinned += n;
++ continue;
++ }
++ if (ret > 0)
++ pinned += ret;
++ break;
++ }
+ up_read(&mm->mmap_sem);
- ret = 0;
++ if (pinned != entries) {
++ if (!ret)
++ ret = -EFAULT;
++ goto free_exit;
+ }
+
+ pageshift = PAGE_SHIFT;
+ for (i = 0; i < entries; ++i) {
+ struct page *page = mem->hpages[i];
+
+ /*
+ * Allow to use larger than 64k IOMMU pages. Only do that
+ * if we are backed by hugetlb.
+ */
+ if ((mem->pageshift > PAGE_SHIFT) && PageHuge(page)) {
+ struct page *head = compound_head(page);
+
+ pageshift = compound_order(head) + PAGE_SHIFT;
+ }
+ mem->pageshift = min(mem->pageshift, pageshift);
+ /*
+ * We don't need struct page reference any more, switch
+ * to physical address.
+ */
+ mem->hpas[i] = page_to_pfn(page) << PAGE_SHIFT;
+ }
+
+ good_exit:
- *pmem = mem;
+ atomic64_set(&mem->mapped, 1);
+ mem->used = 1;
+ mem->ua = ua;
+ mem->entries = entries;
- list_add_rcu(&mem->next, &mm->context.iommu_group_mem_list);
+
-unlock_exit:
- if (locked_entries && ret)
- mm_iommu_adjust_locked_vm(mm, locked_entries, false);
++ mutex_lock(&mem_list_mutex);
+
- unsigned long entries, dev_hpa;
++ list_for_each_entry_rcu(mem2, &mm->context.iommu_group_mem_list, next) {
++ /* Overlap? */
++ if ((mem2->ua < (ua + (entries << PAGE_SHIFT))) &&
++ (ua < (mem2->ua +
++ (mem2->entries << PAGE_SHIFT)))) {
++ ret = -EINVAL;
++ mutex_unlock(&mem_list_mutex);
++ goto free_exit;
++ }
++ }
++
++ list_add_rcu(&mem->next, &mm->context.iommu_group_mem_list);
+
+ mutex_unlock(&mem_list_mutex);
+
++ *pmem = mem;
++
++ return 0;
++
++free_exit:
++ /* free the reference taken */
++ for (i = 0; i < pinned; i++)
++ put_page(mem->hpages[i]);
++
++ vfree(mem->hpas);
++ kfree(mem);
++
++unlock_exit:
++ mm_iommu_adjust_locked_vm(mm, locked_entries, false);
++
+ return ret;
+ }
+
+ long mm_iommu_new(struct mm_struct *mm, unsigned long ua, unsigned long entries,
+ struct mm_iommu_table_group_mem_t **pmem)
+ {
+ return mm_iommu_do_alloc(mm, ua, entries, MM_IOMMU_TABLE_INVALID_HPA,
+ pmem);
+ }
+ EXPORT_SYMBOL_GPL(mm_iommu_new);
+
+ long mm_iommu_newdev(struct mm_struct *mm, unsigned long ua,
+ unsigned long entries, unsigned long dev_hpa,
+ struct mm_iommu_table_group_mem_t **pmem)
+ {
+ return mm_iommu_do_alloc(mm, ua, entries, dev_hpa, pmem);
+ }
+ EXPORT_SYMBOL_GPL(mm_iommu_newdev);
+
+ static void mm_iommu_unpin(struct mm_iommu_table_group_mem_t *mem)
+ {
+ long i;
+ struct page *page = NULL;
+
+ if (!mem->hpas)
+ return;
+
+ for (i = 0; i < mem->entries; ++i) {
+ if (!mem->hpas[i])
+ continue;
+
+ page = pfn_to_page(mem->hpas[i] >> PAGE_SHIFT);
+ if (!page)
+ continue;
+
+ if (mem->hpas[i] & MM_IOMMU_TABLE_GROUP_PAGE_DIRTY)
+ SetPageDirty(page);
+
+ put_page(page);
+ mem->hpas[i] = 0;
+ }
+ }
+
+ static void mm_iommu_do_free(struct mm_iommu_table_group_mem_t *mem)
+ {
+
+ mm_iommu_unpin(mem);
+ vfree(mem->hpas);
+ kfree(mem);
+ }
+
+ static void mm_iommu_free(struct rcu_head *head)
+ {
+ struct mm_iommu_table_group_mem_t *mem = container_of(head,
+ struct mm_iommu_table_group_mem_t, rcu);
+
+ mm_iommu_do_free(mem);
+ }
+
+ static void mm_iommu_release(struct mm_iommu_table_group_mem_t *mem)
+ {
+ list_del_rcu(&mem->next);
+ call_rcu(&mem->rcu, mm_iommu_free);
+ }
+
+ long mm_iommu_put(struct mm_struct *mm, struct mm_iommu_table_group_mem_t *mem)
+ {
+ long ret = 0;
- entries = mem->entries;
- dev_hpa = mem->dev_hpa;
++ unsigned long unlock_entries = 0;
+
+ mutex_lock(&mem_list_mutex);
+
+ if (mem->used == 0) {
+ ret = -ENOENT;
+ goto unlock_exit;
+ }
+
+ --mem->used;
+ /* There are still users, exit */
+ if (mem->used)
+ goto unlock_exit;
+
+ /* Are there still mappings? */
+ if (atomic_cmpxchg(&mem->mapped, 1, 0) != 1) {
+ ++mem->used;
+ ret = -EBUSY;
+ goto unlock_exit;
+ }
+
++ if (mem->dev_hpa == MM_IOMMU_TABLE_INVALID_HPA)
++ unlock_entries = mem->entries;
++
+ /* @mapped became 0 so now mappings are disabled, release the region */
- if (dev_hpa == MM_IOMMU_TABLE_INVALID_HPA)
- mm_iommu_adjust_locked_vm(mm, entries, false);
-
+ mm_iommu_release(mem);
+
+ unlock_exit:
+ mutex_unlock(&mem_list_mutex);
+
++ mm_iommu_adjust_locked_vm(mm, unlock_entries, false);
++
+ return ret;
+ }
+ EXPORT_SYMBOL_GPL(mm_iommu_put);
+
+ struct mm_iommu_table_group_mem_t *mm_iommu_lookup(struct mm_struct *mm,
+ unsigned long ua, unsigned long size)
+ {
+ struct mm_iommu_table_group_mem_t *mem, *ret = NULL;
+
+ list_for_each_entry_rcu(mem, &mm->context.iommu_group_mem_list, next) {
+ if ((mem->ua <= ua) &&
+ (ua + size <= mem->ua +
+ (mem->entries << PAGE_SHIFT))) {
+ ret = mem;
+ break;
+ }
+ }
+
+ return ret;
+ }
+ EXPORT_SYMBOL_GPL(mm_iommu_lookup);
+
+ struct mm_iommu_table_group_mem_t *mm_iommu_lookup_rm(struct mm_struct *mm,
+ unsigned long ua, unsigned long size)
+ {
+ struct mm_iommu_table_group_mem_t *mem, *ret = NULL;
+
+ list_for_each_entry_lockless(mem, &mm->context.iommu_group_mem_list,
+ next) {
+ if ((mem->ua <= ua) &&
+ (ua + size <= mem->ua +
+ (mem->entries << PAGE_SHIFT))) {
+ ret = mem;
+ break;
+ }
+ }
+
+ return ret;
+ }
+
+ struct mm_iommu_table_group_mem_t *mm_iommu_get(struct mm_struct *mm,
+ unsigned long ua, unsigned long entries)
+ {
+ struct mm_iommu_table_group_mem_t *mem, *ret = NULL;
+
+ mutex_lock(&mem_list_mutex);
+
+ list_for_each_entry_rcu(mem, &mm->context.iommu_group_mem_list, next) {
+ if ((mem->ua == ua) && (mem->entries == entries)) {
+ ret = mem;
+ ++mem->used;
+ break;
+ }
+ }
+
+ mutex_unlock(&mem_list_mutex);
+
+ return ret;
+ }
+ EXPORT_SYMBOL_GPL(mm_iommu_get);
+
+ long mm_iommu_ua_to_hpa(struct mm_iommu_table_group_mem_t *mem,
+ unsigned long ua, unsigned int pageshift, unsigned long *hpa)
+ {
+ const long entry = (ua - mem->ua) >> PAGE_SHIFT;
+ u64 *va;
+
+ if (entry >= mem->entries)
+ return -EFAULT;
+
+ if (pageshift > mem->pageshift)
+ return -EFAULT;
+
+ if (!mem->hpas) {
+ *hpa = mem->dev_hpa + (ua - mem->ua);
+ return 0;
+ }
+
+ va = &mem->hpas[entry];
+ *hpa = (*va & MM_IOMMU_TABLE_GROUP_PAGE_MASK) | (ua & ~PAGE_MASK);
+
+ return 0;
+ }
+ EXPORT_SYMBOL_GPL(mm_iommu_ua_to_hpa);
+
+ long mm_iommu_ua_to_hpa_rm(struct mm_iommu_table_group_mem_t *mem,
+ unsigned long ua, unsigned int pageshift, unsigned long *hpa)
+ {
+ const long entry = (ua - mem->ua) >> PAGE_SHIFT;
+ unsigned long *pa;
+
+ if (entry >= mem->entries)
+ return -EFAULT;
+
+ if (pageshift > mem->pageshift)
+ return -EFAULT;
+
+ if (!mem->hpas) {
+ *hpa = mem->dev_hpa + (ua - mem->ua);
+ return 0;
+ }
+
+ pa = (void *) vmalloc_to_phys(&mem->hpas[entry]);
+ if (!pa)
+ return -EFAULT;
+
+ *hpa = (*pa & MM_IOMMU_TABLE_GROUP_PAGE_MASK) | (ua & ~PAGE_MASK);
+
+ return 0;
+ }
+
+ extern void mm_iommu_ua_mark_dirty_rm(struct mm_struct *mm, unsigned long ua)
+ {
+ struct mm_iommu_table_group_mem_t *mem;
+ long entry;
+ void *va;
+ unsigned long *pa;
+
+ mem = mm_iommu_lookup_rm(mm, ua, PAGE_SIZE);
+ if (!mem)
+ return;
+
+ if (mem->dev_hpa != MM_IOMMU_TABLE_INVALID_HPA)
+ return;
+
+ entry = (ua - mem->ua) >> PAGE_SHIFT;
+ va = &mem->hpas[entry];
+
+ pa = (void *) vmalloc_to_phys(va);
+ if (!pa)
+ return;
+
+ *pa |= MM_IOMMU_TABLE_GROUP_PAGE_DIRTY;
+ }
+
+ bool mm_iommu_is_devmem(struct mm_struct *mm, unsigned long hpa,
+ unsigned int pageshift, unsigned long *size)
+ {
+ struct mm_iommu_table_group_mem_t *mem;
+ unsigned long end;
+
+ list_for_each_entry_rcu(mem, &mm->context.iommu_group_mem_list, next) {
+ if (mem->dev_hpa == MM_IOMMU_TABLE_INVALID_HPA)
+ continue;
+
+ end = mem->dev_hpa + (mem->entries << PAGE_SHIFT);
+ if ((mem->dev_hpa <= hpa) && (hpa < end)) {
+ /*
+ * Since the IOMMU page size might be bigger than
+ * PAGE_SIZE, the amount of preregistered memory
+ * starting from @hpa might be smaller than 1<<pageshift
+ * and the caller needs to distinguish this situation.
+ */
+ *size = min(1UL << pageshift, end - hpa);
+ return true;
+ }
+ }
+
+ return false;
+ }
+ EXPORT_SYMBOL_GPL(mm_iommu_is_devmem);
+
+ long mm_iommu_mapped_inc(struct mm_iommu_table_group_mem_t *mem)
+ {
+ if (atomic64_inc_not_zero(&mem->mapped))
+ return 0;
+
+ /* Last mm_iommu_put() has been called, no more mappings allowed() */
+ return -ENXIO;
+ }
+ EXPORT_SYMBOL_GPL(mm_iommu_mapped_inc);
+
+ void mm_iommu_mapped_dec(struct mm_iommu_table_group_mem_t *mem)
+ {
+ atomic64_add_unless(&mem->mapped, -1, 1);
+ }
+ EXPORT_SYMBOL_GPL(mm_iommu_mapped_dec);
+
+ void mm_iommu_init(struct mm_struct *mm)
+ {
+ INIT_LIST_HEAD_RCU(&mm->context.iommu_group_mem_list);
+ }
config PPC_RADIX_MMU
bool "Radix MMU Support"
- depends on PPC_BOOK3S_64
+ depends on PPC_BOOK3S_64 && HUGETLB_PAGE
select ARCH_HAS_GIGANTIC_PAGE if (MEMORY_ISOLATION && COMPACTION) || CMA
+ select PPC_HAVE_KUEP
+ select PPC_HAVE_KUAP
default y
help
Enable support for the Power ISA 3.0 Radix style MMU. Currently this
DUMP(p, trap_save, "%#-*x");
DUMP(p, irq_soft_mask, "%#-*x");
DUMP(p, irq_happened, "%#-*x");
- DUMP(p, io_sync, "%#-*x");
+#ifdef CONFIG_MMIOWB
+ DUMP(p, mmiowb_state.nesting_count, "%#-*x");
+ DUMP(p, mmiowb_state.mmiowb_pending, "%#-*x");
+#endif
DUMP(p, irq_work_pending, "%#-*x");
- DUMP(p, nap_state_lost, "%#-*x");
DUMP(p, sprg_vdso, "%#-*llx");
#ifdef CONFIG_PPC_TRANSACTIONAL_MEM