mm: Update generic gup implementation to handle hugepage directory
authorAneesh Kumar K.V <aneesh.kumar@linux.vnet.ibm.com>
Wed, 5 Nov 2014 16:27:40 +0000 (21:57 +0530)
committerMichael Ellerman <mpe@ellerman.id.au>
Fri, 14 Nov 2014 06:24:21 +0000 (17:24 +1100)
Update generic gup implementation with powerpc specific details.
On powerpc at pmd level we can have hugepte, normal pmd pointer
or a pointer to the hugepage directory.

Tested-by: Steve Capper <steve.capper@linaro.org>
Acked-by: Steve Capper <steve.capper@linaro.org>
Signed-off-by: Aneesh Kumar K.V <aneesh.kumar@linux.vnet.ibm.com>
Signed-off-by: Michael Ellerman <mpe@ellerman.id.au>
arch/powerpc/include/asm/page.h
include/linux/hugetlb.h
mm/gup.c

index 26fe1ae1521255b514d267acc0010833b5e93e8a..f973fce73a432f051996dba7d31090c32f5aa80f 100644 (file)
@@ -380,6 +380,7 @@ static inline int hugepd_ok(hugepd_t hpd)
 #endif
 
 #define is_hugepd(pdep)               (hugepd_ok(*((hugepd_t *)(pdep))))
+#define pgd_huge pgd_huge
 int pgd_huge(pgd_t pgd);
 #else /* CONFIG_HUGETLB_PAGE */
 #define is_hugepd(pdep)                        0
index 6e6d338641fe7efc91641df82c494645526428a7..e6b62f30ab217ea30338c2bd58c5d52264c90c91 100644 (file)
@@ -175,6 +175,52 @@ static inline void __unmap_hugepage_range(struct mmu_gather *tlb,
 }
 
 #endif /* !CONFIG_HUGETLB_PAGE */
+/*
+ * hugepages at page global directory. If arch support
+ * hugepages at pgd level, they need to define this.
+ */
+#ifndef pgd_huge
+#define pgd_huge(x)    0
+#endif
+
+#ifndef pgd_write
+static inline int pgd_write(pgd_t pgd)
+{
+       BUG();
+       return 0;
+}
+#endif
+
+#ifndef pud_write
+static inline int pud_write(pud_t pud)
+{
+       BUG();
+       return 0;
+}
+#endif
+
+#ifndef is_hugepd
+/*
+ * Some architectures requires a hugepage directory format that is
+ * required to support multiple hugepage sizes. For example
+ * a4fe3ce76 "powerpc/mm: Allow more flexible layouts for hugepage pagetables"
+ * introduced the same on powerpc. This allows for a more flexible hugepage
+ * pagetable layout.
+ */
+typedef struct { unsigned long pd; } hugepd_t;
+#define is_hugepd(hugepd) (0)
+#define __hugepd(x) ((hugepd_t) { (x) })
+static inline int gup_huge_pd(hugepd_t hugepd, unsigned long addr,
+                             unsigned pdshift, unsigned long end,
+                             int write, struct page **pages, int *nr)
+{
+       return 0;
+}
+#else
+extern int gup_huge_pd(hugepd_t hugepd, unsigned long addr,
+                      unsigned pdshift, unsigned long end,
+                      int write, struct page **pages, int *nr);
+#endif
 
 #define HUGETLB_ANON_FILE "anon_hugepage"
 
index cd62c8c90d4a3584fb087b2f2e949e7004ba615c..0ca1df9075ab7ff3cbe24c0ff06ec19a65701a93 100644 (file)
--- a/mm/gup.c
+++ b/mm/gup.c
@@ -3,7 +3,6 @@
 #include <linux/err.h>
 #include <linux/spinlock.h>
 
-#include <linux/hugetlb.h>
 #include <linux/mm.h>
 #include <linux/pagemap.h>
 #include <linux/rmap.h>
@@ -12,6 +11,7 @@
 
 #include <linux/sched.h>
 #include <linux/rwsem.h>
+#include <linux/hugetlb.h>
 #include <asm/pgtable.h>
 
 #include "internal.h"
@@ -875,6 +875,49 @@ static int gup_huge_pud(pud_t orig, pud_t *pudp, unsigned long addr,
        return 1;
 }
 
+static int gup_huge_pgd(pgd_t orig, pgd_t *pgdp, unsigned long addr,
+                       unsigned long end, int write,
+                       struct page **pages, int *nr)
+{
+       int refs;
+       struct page *head, *page, *tail;
+
+       if (write && !pgd_write(orig))
+               return 0;
+
+       refs = 0;
+       head = pgd_page(orig);
+       page = head + ((addr & ~PGDIR_MASK) >> PAGE_SHIFT);
+       tail = page;
+       do {
+               VM_BUG_ON_PAGE(compound_head(page) != head, page);
+               pages[*nr] = page;
+               (*nr)++;
+               page++;
+               refs++;
+       } while (addr += PAGE_SIZE, addr != end);
+
+       if (!page_cache_add_speculative(head, refs)) {
+               *nr -= refs;
+               return 0;
+       }
+
+       if (unlikely(pgd_val(orig) != pgd_val(*pgdp))) {
+               *nr -= refs;
+               while (refs--)
+                       put_page(head);
+               return 0;
+       }
+
+       while (refs--) {
+               if (PageTail(tail))
+                       get_huge_page_tail(tail);
+               tail++;
+       }
+
+       return 1;
+}
+
 static int gup_pmd_range(pud_t pud, unsigned long addr, unsigned long end,
                int write, struct page **pages, int *nr)
 {
@@ -902,6 +945,14 @@ static int gup_pmd_range(pud_t pud, unsigned long addr, unsigned long end,
                                pages, nr))
                                return 0;
 
+               } else if (unlikely(is_hugepd(__hugepd(pmd_val(pmd))))) {
+                       /*
+                        * architecture have different format for hugetlbfs
+                        * pmd format and THP pmd format
+                        */
+                       if (!gup_huge_pd(__hugepd(pmd_val(pmd)), addr,
+                                        PMD_SHIFT, next, write, pages, nr))
+                               return 0;
                } else if (!gup_pte_range(pmd, addr, next, write, pages, nr))
                                return 0;
        } while (pmdp++, addr = next, addr != end);
@@ -909,22 +960,26 @@ static int gup_pmd_range(pud_t pud, unsigned long addr, unsigned long end,
        return 1;
 }
 
-static int gup_pud_range(pgd_t *pgdp, unsigned long addr, unsigned long end,
-               int write, struct page **pages, int *nr)
+static int gup_pud_range(pgd_t pgd, unsigned long addr, unsigned long end,
+                        int write, struct page **pages, int *nr)
 {
        unsigned long next;
        pud_t *pudp;
 
-       pudp = pud_offset(pgdp, addr);
+       pudp = pud_offset(&pgd, addr);
        do {
                pud_t pud = ACCESS_ONCE(*pudp);
 
                next = pud_addr_end(addr, end);
                if (pud_none(pud))
                        return 0;
-               if (pud_huge(pud)) {
+               if (unlikely(pud_huge(pud))) {
                        if (!gup_huge_pud(pud, pudp, addr, next, write,
-                                       pages, nr))
+                                         pages, nr))
+                               return 0;
+               } else if (unlikely(is_hugepd(__hugepd(pud_val(pud))))) {
+                       if (!gup_huge_pd(__hugepd(pud_val(pud)), addr,
+                                        PUD_SHIFT, next, write, pages, nr))
                                return 0;
                } else if (!gup_pmd_range(pud, addr, next, write, pages, nr))
                        return 0;
@@ -970,10 +1025,20 @@ int __get_user_pages_fast(unsigned long start, int nr_pages, int write,
        local_irq_save(flags);
        pgdp = pgd_offset(mm, addr);
        do {
+               pgd_t pgd = ACCESS_ONCE(*pgdp);
+
                next = pgd_addr_end(addr, end);
-               if (pgd_none(*pgdp))
+               if (pgd_none(pgd))
                        break;
-               else if (!gup_pud_range(pgdp, addr, next, write, pages, &nr))
+               if (unlikely(pgd_huge(pgd))) {
+                       if (!gup_huge_pgd(pgd, pgdp, addr, next, write,
+                                         pages, &nr))
+                               break;
+               } else if (unlikely(is_hugepd(__hugepd(pgd_val(pgd))))) {
+                       if (!gup_huge_pd(__hugepd(pgd_val(pgd)), addr,
+                                        PGDIR_SHIFT, next, write, pages, &nr))
+                               break;
+               } else if (!gup_pud_range(pgd, addr, next, write, pages, &nr))
                        break;
        } while (pgdp++, addr = next, addr != end);
        local_irq_restore(flags);