mm: Only flush TLBs if a transhuge PMD is modified for NUMA pte scanning
authorMel Gorman <mgorman@suse.de>
Mon, 7 Oct 2013 10:28:49 +0000 (11:28 +0100)
committerIngo Molnar <mingo@kernel.org>
Wed, 9 Oct 2013 10:39:49 +0000 (12:39 +0200)
NUMA PTE scanning is expensive both in terms of the scanning itself and
the TLB flush if there are any updates. The TLB flush is avoided if no
PTEs are updated but there is a bug where transhuge PMDs are considered
to be updated even if they were already pmd_numa. This patch addresses
the problem and TLB flushes should be reduced.

Cc: Andrea Arcangeli <aarcange@redhat.com>
Cc: Johannes Weiner <hannes@cmpxchg.org>
Cc: Srikar Dronamraju <srikar@linux.vnet.ibm.com>
Reviewed-by: Rik van Riel <riel@redhat.com>
Signed-off-by: Mel Gorman <mgorman@suse.de>
Signed-off-by: Peter Zijlstra <peterz@infradead.org>
Link: http://lkml.kernel.org/r/1381141781-10992-12-git-send-email-mgorman@suse.de
Signed-off-by: Ingo Molnar <mingo@kernel.org>
mm/huge_memory.c
mm/mprotect.c

index d4928769680f082c8b08afacb3cf11046afc714f..de8d5cfc2bf2f9dffd873a4aef44fa69d4ba76a8 100644 (file)
@@ -1458,6 +1458,12 @@ out:
        return ret;
 }
 
+/*
+ * Returns
+ *  - 0 if PMD could not be locked
+ *  - 1 if PMD was locked but protections unchange and TLB flush unnecessary
+ *  - HPAGE_PMD_NR is protections changed and TLB flush necessary
+ */
 int change_huge_pmd(struct vm_area_struct *vma, pmd_t *pmd,
                unsigned long addr, pgprot_t newprot, int prot_numa)
 {
@@ -1466,9 +1472,11 @@ int change_huge_pmd(struct vm_area_struct *vma, pmd_t *pmd,
 
        if (__pmd_trans_huge_lock(pmd, vma) == 1) {
                pmd_t entry;
-               entry = pmdp_get_and_clear(mm, addr, pmd);
+               ret = 1;
                if (!prot_numa) {
+                       entry = pmdp_get_and_clear(mm, addr, pmd);
                        entry = pmd_modify(entry, newprot);
+                       ret = HPAGE_PMD_NR;
                        BUG_ON(pmd_write(entry));
                } else {
                        struct page *page = pmd_page(*pmd);
@@ -1476,12 +1484,17 @@ int change_huge_pmd(struct vm_area_struct *vma, pmd_t *pmd,
                        /* only check non-shared pages */
                        if (page_mapcount(page) == 1 &&
                            !pmd_numa(*pmd)) {
+                               entry = pmdp_get_and_clear(mm, addr, pmd);
                                entry = pmd_mknuma(entry);
+                               ret = HPAGE_PMD_NR;
                        }
                }
-               set_pmd_at(mm, addr, pmd, entry);
+
+               /* Set PMD if cleared earlier */
+               if (ret == HPAGE_PMD_NR)
+                       set_pmd_at(mm, addr, pmd, entry);
+
                spin_unlock(&vma->vm_mm->page_table_lock);
-               ret = 1;
        }
 
        return ret;
index 7bdbd4b0f6d9306d7d38007b6c17eaeed4e98c25..2da33dca613426a817dd500ff48f1180aa0239a7 100644 (file)
@@ -144,10 +144,16 @@ static inline unsigned long change_pmd_range(struct vm_area_struct *vma,
                if (pmd_trans_huge(*pmd)) {
                        if (next - addr != HPAGE_PMD_SIZE)
                                split_huge_page_pmd(vma, addr, pmd);
-                       else if (change_huge_pmd(vma, pmd, addr, newprot,
-                                                prot_numa)) {
-                               pages++;
-                               continue;
+                       else {
+                               int nr_ptes = change_huge_pmd(vma, pmd, addr,
+                                               newprot, prot_numa);
+
+                               if (nr_ptes) {
+                                       if (nr_ptes == HPAGE_PMD_NR)
+                                               pages++;
+
+                                       continue;
+                               }
                        }
                        /* fall through */
                }