mm/page-writeback.c: fix range_cyclic writeback vs writepages deadlock

author Dave Chinner <dchinner@redhat.com>

Fri, 26 Oct 2018 22:09:45 +0000 (15:09 -0700)

committer Linus Torvalds <torvalds@linux-foundation.org>

Fri, 26 Oct 2018 23:38:14 +0000 (16:38 -0700)
author Dave Chinner <dchinner@redhat.com>
Fri, 26 Oct 2018 22:09:45 +0000 (15:09 -0700)
committer Linus Torvalds <torvalds@linux-foundation.org>
Fri, 26 Oct 2018 23:38:14 +0000 (16:38 -0700)
diff --git a/mm/page-writeback.c b/mm/page-writeback.c

index 84ae9bf5858ac9dcf7e1155e514a9b9062e0f10d..439a304a6c921742710e1758b6118fea9f89a52f 100644 (file)
--- a/mm/page-writeback.c
+++ b/mm/page-writeback.c
@@ -2149,6 +2149,13 @@ EXPORT_SYMBOL(tag_pages_for_writeback);
   * not miss some pages (e.g., because some other process has cleared TOWRITE
   * tag we set). The rule we follow is that TOWRITE tag can be cleared only
   * by the process clearing the DIRTY tag (and submitting the page for IO).
+ *
+ * To avoid deadlocks between range_cyclic writeback and callers that hold
+ * pages in PageWriteback to aggregate IO until write_cache_pages() returns,
+ * we do not loop back to the start of the file. Doing so causes a page
+ * lock/page writeback access order inversion - we should only ever lock
+ * multiple pages in ascending page->index order, and looping back to the start
+ * of the file violates that rule and causes deadlocks.
   */
  int write_cache_pages(struct address_space *mapping,
                       struct writeback_control *wbc, writepage_t writepage,
@@ -2162,7 +2169,6 @@ int write_cache_pages(struct address_space *mapping,
         pgoff_t index;
         pgoff_t end;            /* Inclusive */
         pgoff_t done_index;
-       int cycled;
         int range_whole = 0;
         int tag;
  
@@ -2170,23 +2176,17 @@ int write_cache_pages(struct address_space *mapping,
         if (wbc->range_cyclic) {
                 writeback_index = mapping->writeback_index; /* prev offset */
                 index = writeback_index;
-               if (index == 0)
-                       cycled = 1;
-               else
-                       cycled = 0;
                 end = -1;
         } else {
                 index = wbc->range_start >> PAGE_SHIFT;
                 end = wbc->range_end >> PAGE_SHIFT;
                 if (wbc->range_start == 0 && wbc->range_end == LLONG_MAX)
                         range_whole = 1;
-               cycled = 1; /* ignore range_cyclic tests */
         }
         if (wbc->sync_mode == WB_SYNC_ALL || wbc->tagged_writepages)
                 tag = PAGECACHE_TAG_TOWRITE;
         else
                 tag = PAGECACHE_TAG_DIRTY;
-retry:
         if (wbc->sync_mode == WB_SYNC_ALL || wbc->tagged_writepages)
                 tag_pages_for_writeback(mapping, index, end);
         done_index = index;
@@ -2272,17 +2272,14 @@ continue_unlock:
                 pagevec_release(&pvec);
                 cond_resched();
         }
-       if (!cycled && !done) {
-               /*
-                * range_cyclic:
-                * We hit the last page and there is more work to be done: wrap
-                * back to the start of the file
-                */
-               cycled = 1;
-               index = 0;
-               end = writeback_index - 1;
-               goto retry;
-       }
+
+       /*
+        * If we hit the last page and there is more work to be done: wrap
+        * back the index back to the start of the file for the next
+        * time we are called.
+        */
+       if (wbc->range_cyclic && !done)
+               done_index = 0;
         if (wbc->range_cyclic || (range_whole && wbc->nr_to_write > 0))
                 mapping->writeback_index = done_index;
author	Dave Chinner <dchinner@redhat.com>
	Fri, 26 Oct 2018 22:09:45 +0000 (15:09 -0700)
committer	Linus Torvalds <torvalds@linux-foundation.org>
	Fri, 26 Oct 2018 23:38:14 +0000 (16:38 -0700)