ext4: use transaction reservation for extent conversion in ext4_end_io
authorJan Kara <jack@suse.cz>
Tue, 4 Jun 2013 17:21:11 +0000 (13:21 -0400)
committerTheodore Ts'o <tytso@mit.edu>
Tue, 4 Jun 2013 17:21:11 +0000 (13:21 -0400)
Later we would like to clear PageWriteback bit only after extent
conversion from unwritten to written extents is performed.  However it
is not possible to start a transaction after PageWriteback is set
because that violates lock ordering (and is easy to deadlock).  So we
have to reserve a transaction before locking pages and sending them
for IO and later we use the transaction for extent conversion from
ext4_end_io().

Reviewed-by: Zheng Liu <wenqing.lz@taobao.com>
Signed-off-by: Jan Kara <jack@suse.cz>
Signed-off-by: "Theodore Ts'o" <tytso@mit.edu>
fs/ext4/ext4.h
fs/ext4/ext4_jbd2.h
fs/ext4/extents.c
fs/ext4/inode.c
fs/ext4/page-io.c

index 0a9b729f991bc31d714b559267b78b3c50080db2..8de219b758fb66873507c8a0295a9eb2aea3d9ca 100644 (file)
@@ -184,10 +184,13 @@ struct ext4_map_blocks {
 #define EXT4_IO_END_DIRECT     0x0004
 
 /*
- * For converting uninitialized extents on a work queue.
+ * For converting uninitialized extents on a work queue. 'handle' is used for
+ * buffered writeback.
  */
 typedef struct ext4_io_end {
        struct list_head        list;           /* per-file finished IO list */
+       handle_t                *handle;        /* handle reserved for extent
+                                                * conversion */
        struct inode            *inode;         /* file being written to */
        unsigned int            flag;           /* unwritten or not */
        loff_t                  offset;         /* offset in the file */
@@ -1322,6 +1325,9 @@ static inline void ext4_set_io_unwritten_flag(struct inode *inode,
                                              struct ext4_io_end *io_end)
 {
        if (!(io_end->flag & EXT4_IO_END_UNWRITTEN)) {
+               /* Writeback has to have coversion transaction reserved */
+               WARN_ON(EXT4_SB(inode->i_sb)->s_journal && !io_end->handle &&
+                       !(io_end->flag & EXT4_IO_END_DIRECT));
                io_end->flag |= EXT4_IO_END_UNWRITTEN;
                atomic_inc(&EXT4_I(inode)->i_unwritten);
        }
@@ -2591,8 +2597,8 @@ extern void ext4_ext_init(struct super_block *);
 extern void ext4_ext_release(struct super_block *);
 extern long ext4_fallocate(struct file *file, int mode, loff_t offset,
                          loff_t len);
-extern int ext4_convert_unwritten_extents(struct inode *inode, loff_t offset,
-                         ssize_t len);
+extern int ext4_convert_unwritten_extents(handle_t *handle, struct inode *inode,
+                                         loff_t offset, ssize_t len);
 extern int ext4_map_blocks(handle_t *handle, struct inode *inode,
                           struct ext4_map_blocks *map, int flags);
 extern int ext4_ext_calc_metadata_amount(struct inode *inode,
index fdd865eb1879b35cf0f3e2cd184c181aba3fa75e..2877258d94977d976f7497ef17bfd69ad0f2b781 100644 (file)
@@ -134,7 +134,8 @@ static inline int ext4_jbd2_credits_xattr(struct inode *inode)
 #define EXT4_HT_MIGRATE          8
 #define EXT4_HT_MOVE_EXTENTS     9
 #define EXT4_HT_XATTR           10
-#define EXT4_HT_MAX             11
+#define EXT4_HT_EXT_CONVERT     11
+#define EXT4_HT_MAX             12
 
 /**
  *   struct ext4_journal_cb_entry - Base structure for callback information.
@@ -319,7 +320,7 @@ static inline handle_t *__ext4_journal_start(struct inode *inode,
 #define ext4_journal_stop(handle) \
        __ext4_journal_stop(__func__, __LINE__, (handle))
 
-#define ext4_journal_start_reserve(handle, type) \
+#define ext4_journal_start_reserved(handle, type) \
        __ext4_journal_start_reserved((handle), __LINE__, (type))
 
 handle_t *__ext4_journal_start_reserved(handle_t *handle, unsigned int line,
index 94283d06cace93a5cdc214c3c13f7af16b314ce5..208f664f9ee0efb1aca83ae1333ffd3b09198f2e 100644 (file)
@@ -4566,10 +4566,9 @@ retry:
  * function, to convert the fallocated extents after IO is completed.
  * Returns 0 on success.
  */
-int ext4_convert_unwritten_extents(struct inode *inode, loff_t offset,
-                                   ssize_t len)
+int ext4_convert_unwritten_extents(handle_t *handle, struct inode *inode,
+                                  loff_t offset, ssize_t len)
 {
-       handle_t *handle;
        unsigned int max_blocks;
        int ret = 0;
        int ret2 = 0;
@@ -4584,16 +4583,32 @@ int ext4_convert_unwritten_extents(struct inode *inode, loff_t offset,
        max_blocks = ((EXT4_BLOCK_ALIGN(len + offset, blkbits) >> blkbits) -
                      map.m_lblk);
        /*
-        * credits to insert 1 extent into extent tree
+        * This is somewhat ugly but the idea is clear: When transaction is
+        * reserved, everything goes into it. Otherwise we rather start several
+        * smaller transactions for conversion of each extent separately.
         */
-       credits = ext4_chunk_trans_blocks(inode, max_blocks);
+       if (handle) {
+               handle = ext4_journal_start_reserved(handle,
+                                                    EXT4_HT_EXT_CONVERT);
+               if (IS_ERR(handle))
+                       return PTR_ERR(handle);
+               credits = 0;
+       } else {
+               /*
+                * credits to insert 1 extent into extent tree
+                */
+               credits = ext4_chunk_trans_blocks(inode, max_blocks);
+       }
        while (ret >= 0 && ret < max_blocks) {
                map.m_lblk += ret;
                map.m_len = (max_blocks -= ret);
-               handle = ext4_journal_start(inode, EXT4_HT_MAP_BLOCKS, credits);
-               if (IS_ERR(handle)) {
-                       ret = PTR_ERR(handle);
-                       break;
+               if (credits) {
+                       handle = ext4_journal_start(inode, EXT4_HT_MAP_BLOCKS,
+                                                   credits);
+                       if (IS_ERR(handle)) {
+                               ret = PTR_ERR(handle);
+                               break;
+                       }
                }
                ret = ext4_map_blocks(handle, inode, &map,
                                      EXT4_GET_BLOCKS_IO_CONVERT_EXT);
@@ -4604,10 +4619,13 @@ int ext4_convert_unwritten_extents(struct inode *inode, loff_t offset,
                                     inode->i_ino, map.m_lblk,
                                     map.m_len, ret);
                ext4_mark_inode_dirty(handle, inode);
-               ret2 = ext4_journal_stop(handle);
-               if (ret <= 0 || ret2 )
+               if (credits)
+                       ret2 = ext4_journal_stop(handle);
+               if (ret <= 0 || ret2)
                        break;
        }
+       if (!credits)
+               ret2 = ext4_journal_stop(handle);
        return ret > 0 ? ret2 : ret;
 }
 
index 736d164dc2ba76d527e3cf7b85f14832313d9ed7..510dba785db45f376812ea5d044f40b19382b5da 100644 (file)
@@ -1410,6 +1410,7 @@ static void ext4_da_page_release_reservation(struct page *page,
 struct mpage_da_data {
        struct inode *inode;
        struct writeback_control *wbc;
+
        pgoff_t first_page;     /* The first page to write */
        pgoff_t next_page;      /* Current page to examine */
        pgoff_t last_page;      /* Last page to examine */
@@ -2108,8 +2109,14 @@ static int mpage_map_one_extent(handle_t *handle, struct mpage_da_data *mpd)
        err = ext4_map_blocks(handle, inode, map, get_blocks_flags);
        if (err < 0)
                return err;
-       if (map->m_flags & EXT4_MAP_UNINIT)
+       if (map->m_flags & EXT4_MAP_UNINIT) {
+               if (!mpd->io_submit.io_end->handle &&
+                   ext4_handle_valid(handle)) {
+                       mpd->io_submit.io_end->handle = handle->h_rsv_handle;
+                       handle->h_rsv_handle = NULL;
+               }
                ext4_set_io_unwritten_flag(inode, mpd->io_submit.io_end);
+       }
 
        BUG_ON(map->m_len == 0);
        if (map->m_flags & EXT4_MAP_NEW) {
@@ -2351,7 +2358,7 @@ static int ext4_da_writepages(struct address_space *mapping,
        handle_t *handle = NULL;
        struct mpage_da_data mpd;
        struct inode *inode = mapping->host;
-       int needed_blocks, ret = 0;
+       int needed_blocks, rsv_blocks = 0, ret = 0;
        struct ext4_sb_info *sbi = EXT4_SB(mapping->host->i_sb);
        bool done;
        struct blk_plug plug;
@@ -2379,6 +2386,14 @@ static int ext4_da_writepages(struct address_space *mapping,
        if (unlikely(sbi->s_mount_flags & EXT4_MF_FS_ABORTED))
                return -EROFS;
 
+       if (ext4_should_dioread_nolock(inode)) {
+               /*
+                * We may need to convert upto one extent per block in
+                * the page and we may dirty the inode.
+                */
+               rsv_blocks = 1 + (PAGE_CACHE_SIZE >> inode->i_blkbits);
+       }
+
        /*
         * If we have inline data and arrive here, it means that
         * we will soon create the block for the 1st page, so
@@ -2438,8 +2453,8 @@ retry:
                needed_blocks = ext4_da_writepages_trans_blocks(inode);
 
                /* start a new transaction */
-               handle = ext4_journal_start(inode, EXT4_HT_WRITE_PAGE,
-                                           needed_blocks);
+               handle = ext4_journal_start_with_reserve(inode,
+                               EXT4_HT_WRITE_PAGE, needed_blocks, rsv_blocks);
                if (IS_ERR(handle)) {
                        ret = PTR_ERR(handle);
                        ext4_msg(inode->i_sb, KERN_CRIT, "%s: jbd2_start: "
@@ -3120,7 +3135,7 @@ static ssize_t ext4_ext_direct_IO(int rw, struct kiocb *iocb,
                 * for non AIO case, since the IO is already
                 * completed, we could do the conversion right here
                 */
-               err = ext4_convert_unwritten_extents(inode,
+               err = ext4_convert_unwritten_extents(NULL, inode,
                                                     offset, ret);
                if (err < 0)
                        ret = err;
index de6860c7836eea1fa56786fbeb5f9ed0f53d9991..5f20bc48104185e686727081a9debe855601182a 100644 (file)
@@ -66,6 +66,7 @@ static void ext4_release_io_end(ext4_io_end_t *io_end)
 {
        BUG_ON(!list_empty(&io_end->list));
        BUG_ON(io_end->flag & EXT4_IO_END_UNWRITTEN);
+       WARN_ON(io_end->handle);
 
        if (atomic_dec_and_test(&EXT4_I(io_end->inode)->i_ioend_count))
                wake_up_all(ext4_ioend_wq(io_end->inode));
@@ -92,13 +93,15 @@ static int ext4_end_io(ext4_io_end_t *io)
        struct inode *inode = io->inode;
        loff_t offset = io->offset;
        ssize_t size = io->size;
+       handle_t *handle = io->handle;
        int ret = 0;
 
        ext4_debug("ext4_end_io_nolock: io 0x%p from inode %lu,list->next 0x%p,"
                   "list->prev 0x%p\n",
                   io, inode->i_ino, io->list.next, io->list.prev);
 
-       ret = ext4_convert_unwritten_extents(inode, offset, size);
+       io->handle = NULL;      /* Following call will use up the handle */
+       ret = ext4_convert_unwritten_extents(handle, inode, offset, size);
        if (ret < 0) {
                ext4_msg(inode->i_sb, KERN_EMERG,
                         "failed to convert unwritten extents to written "
@@ -228,8 +231,10 @@ int ext4_put_io_end(ext4_io_end_t *io_end)
 
        if (atomic_dec_and_test(&io_end->count)) {
                if (io_end->flag & EXT4_IO_END_UNWRITTEN) {
-                       err = ext4_convert_unwritten_extents(io_end->inode,
-                                               io_end->offset, io_end->size);
+                       err = ext4_convert_unwritten_extents(io_end->handle,
+                                               io_end->inode, io_end->offset,
+                                               io_end->size);
+                       io_end->handle = NULL;
                        ext4_clear_io_unwritten_flag(io_end);
                }
                ext4_release_io_end(io_end);