[PATCH] reiserfs: reiserfs hang and performance fix for data=journal mode
authorChris Mason <mason@suse.com>
Wed, 1 Feb 2006 11:06:49 +0000 (03:06 -0800)
committerLinus Torvalds <torvalds@g5.osdl.org>
Wed, 1 Feb 2006 16:53:26 +0000 (08:53 -0800)
In data=journal mode, reiserfs writepage needs to make sure not to trigger
transactions while being run under PF_MEMALLOC.  This patch makes sure to
redirty the page instead of forcing a transaction start in this case.

Also, calling filemap_fdata* in order to trigger io on the block device can
cause lock inversions on the page lock.  Instead, do simple batching from
flush_commit_list.

Signed-off-by: Chris Mason <mason@suse.com>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
fs/reiserfs/inode.c
fs/reiserfs/journal.c

index 60e2f23447031b09a4a6fcbb34043016f4c55c46..b33d67bba2fdfd548f30fd47ea81191b042e3e08 100644 (file)
@@ -2363,6 +2363,13 @@ static int reiserfs_write_full_page(struct page *page,
        int bh_per_page = PAGE_CACHE_SIZE / s->s_blocksize;
        th.t_trans_id = 0;
 
+       /* no logging allowed when nonblocking or from PF_MEMALLOC */
+       if (checked && (current->flags & PF_MEMALLOC)) {
+               redirty_page_for_writepage(wbc, page);
+               unlock_page(page);
+               return 0;
+       }
+
        /* The page dirty bit is cleared before writepage is called, which
         * means we have to tell create_empty_buffers to make dirty buffers
         * The page really should be up to date at this point, so tossing
index bc8fe963b3cc57ff671999b5858c4e07d38967d1..1b2402a9a8e1db402bf2579cc9baa30a4bb153ec 100644 (file)
@@ -988,6 +988,7 @@ static int flush_commit_list(struct super_block *s,
        struct reiserfs_journal *journal = SB_JOURNAL(s);
        int barrier = 0;
        int retval = 0;
+       int write_len;
 
        reiserfs_check_lock_depth(s, "flush_commit_list");
 
@@ -1037,16 +1038,24 @@ static int flush_commit_list(struct super_block *s,
        BUG_ON(!list_empty(&jl->j_bh_list));
        /*
         * for the description block and all the log blocks, submit any buffers
-        * that haven't already reached the disk
+        * that haven't already reached the disk.  Try to write at least 256
+        * log blocks. later on, we will only wait on blocks that correspond
+        * to this transaction, but while we're unplugging we might as well
+        * get a chunk of data on there.
         */
        atomic_inc(&journal->j_async_throttle);
-       for (i = 0; i < (jl->j_len + 1); i++) {
+       write_len = jl->j_len + 1;
+       if (write_len < 256)
+               write_len = 256;
+       for (i = 0 ; i < write_len ; i++) {
                bn = SB_ONDISK_JOURNAL_1st_BLOCK(s) + (jl->j_start + i) %
                    SB_ONDISK_JOURNAL_SIZE(s);
                tbh = journal_find_get_block(s, bn);
-               if (buffer_dirty(tbh))  /* redundant, ll_rw_block() checks */
-                       ll_rw_block(SWRITE, 1, &tbh);
-               put_bh(tbh);
+               if (tbh) {
+                       if (buffer_dirty(tbh))
+                           ll_rw_block(WRITE, 1, &tbh) ;
+                       put_bh(tbh) ;
+               }
        }
        atomic_dec(&journal->j_async_throttle);