mm: numa: group related processes based on VMA flags instead of page table flags

author Mel Gorman <mgorman@suse.de>

Wed, 25 Mar 2015 22:55:37 +0000 (15:55 -0700)

committer Linus Torvalds <torvalds@linux-foundation.org>

Wed, 25 Mar 2015 23:20:31 +0000 (16:20 -0700)
author Mel Gorman <mgorman@suse.de>
Wed, 25 Mar 2015 22:55:37 +0000 (15:55 -0700)
committer Linus Torvalds <torvalds@linux-foundation.org>
Wed, 25 Mar 2015 23:20:31 +0000 (16:20 -0700)
diff --git a/mm/huge_memory.c b/mm/huge_memory.c

index 626e93db28ba162d11e7d286985604bbc523981c..2f12e9fcf1a236665ae2cd3be9c79bd0b6f96e45 100644 (file)
--- a/mm/huge_memory.c
+++ b/mm/huge_memory.c
@@ -1291,17 +1291,8 @@ int do_huge_pmd_numa_page(struct mm_struct *mm, struct vm_area_struct *vma,
                 flags |= TNF_FAULT_LOCAL;
         }
  
-       /*
-        * Avoid grouping on DSO/COW pages in specific and RO pages
-        * in general, RO pages shouldn't hurt as much anyway since
-        * they can be in shared cache state.
-        *
-        * FIXME! This checks "pmd_dirty()" as an approximation of
-        * "is this a read-only page", since checking "pmd_write()"
-        * is even more broken. We haven't actually turned this into
-        * a writable page, so pmd_write() will always be false.
-        */
-       if (!pmd_dirty(pmd))
+       /* See similar comment in do_numa_page for explanation */
+       if (!(vma->vm_flags & VM_WRITE))
                 flags |= TNF_NO_GROUP;
  
         /*
diff --git a/mm/memory.c b/mm/memory.c

index 411144f977b10eab492410728784efe37c4ea54a..20beb6647dba22714c52c1f6f228b30a9987a5a7 100644 (file)
--- a/mm/memory.c
+++ b/mm/memory.c
@@ -3069,16 +3069,19 @@ static int do_numa_page(struct mm_struct *mm, struct vm_area_struct *vma,
         }
  
         /*
-        * Avoid grouping on DSO/COW pages in specific and RO pages
-        * in general, RO pages shouldn't hurt as much anyway since
-        * they can be in shared cache state.
+        * Avoid grouping on RO pages in general. RO pages shouldn't hurt as
+        * much anyway since they can be in shared cache state. This misses
+        * the case where a mapping is writable but the process never writes
+        * to it but pte_write gets cleared during protection updates and
+        * pte_dirty has unpredictable behaviour between PTE scan updates,
+        * background writeback, dirty balancing and application behaviour.
          *
-        * FIXME! This checks "pmd_dirty()" as an approximation of
-        * "is this a read-only page", since checking "pmd_write()"
-        * is even more broken. We haven't actually turned this into
-        * a writable page, so pmd_write() will always be false.
+        * TODO: Note that the ideal here would be to avoid a situation where a
+        * NUMA fault is taken immediately followed by a write fault in
+        * some cases which would have lower overhead overall but would be
+        * invasive as the fault paths would need to be unified.
          */
-       if (!pte_dirty(pte))
+       if (!(vma->vm_flags & VM_WRITE))
                 flags |= TNF_NO_GROUP;
  
         /*
author	Mel Gorman <mgorman@suse.de>
	Wed, 25 Mar 2015 22:55:37 +0000 (15:55 -0700)
committer	Linus Torvalds <torvalds@linux-foundation.org>
	Wed, 25 Mar 2015 23:20:31 +0000 (16:20 -0700)
mm/huge_memory.c		patch \| blob \| history
mm/memory.c		patch \| blob \| history