Btrfs: split bio_readpage_error into several functions
authorMiao Xie <miaox@cn.fujitsu.com>
Fri, 12 Sep 2014 10:43:59 +0000 (18:43 +0800)
committerChris Mason <clm@fb.com>
Wed, 17 Sep 2014 20:38:56 +0000 (13:38 -0700)
The data repair function of direct read will be implemented later, and some code
in bio_readpage_error will be reused, so split bio_readpage_error into
several functions which will be used in direct read repair later.

Signed-off-by: Miao Xie <miaox@cn.fujitsu.com>
Signed-off-by: Chris Mason <clm@fb.com>
fs/btrfs/extent_io.c
fs/btrfs/extent_io.h

index c7648f53f63dddd892d6fa4f64d0f6b5058a99c5..c191ea58750f7c28333489554e76e9c79466ac09 100644 (file)
@@ -1962,25 +1962,6 @@ static void check_page_uptodate(struct extent_io_tree *tree, struct page *page)
                SetPageUptodate(page);
 }
 
-/*
- * When IO fails, either with EIO or csum verification fails, we
- * try other mirrors that might have a good copy of the data.  This
- * io_failure_record is used to record state as we go through all the
- * mirrors.  If another mirror has good data, the page is set up to date
- * and things continue.  If a good mirror can't be found, the original
- * bio end_io callback is called to indicate things have failed.
- */
-struct io_failure_record {
-       struct page *page;
-       u64 start;
-       u64 len;
-       u64 logical;
-       unsigned long bio_flags;
-       int this_mirror;
-       int failed_mirror;
-       int in_validation;
-};
-
 static int free_io_failure(struct inode *inode, struct io_failure_record *rec)
 {
        int ret;
@@ -2156,40 +2137,24 @@ out:
        return 0;
 }
 
-/*
- * this is a generic handler for readpage errors (default
- * readpage_io_failed_hook). if other copies exist, read those and write back
- * good data to the failed position. does not investigate in remapping the
- * failed extent elsewhere, hoping the device will be smart enough to do this as
- * needed
- */
-
-static int bio_readpage_error(struct bio *failed_bio, u64 phy_offset,
-                             struct page *page, u64 start, u64 end,
-                             int failed_mirror)
+int btrfs_get_io_failure_record(struct inode *inode, u64 start, u64 end,
+                               struct io_failure_record **failrec_ret)
 {
-       struct io_failure_record *failrec = NULL;
+       struct io_failure_record *failrec;
        u64 private;
        struct extent_map *em;
-       struct inode *inode = page->mapping->host;
        struct extent_io_tree *failure_tree = &BTRFS_I(inode)->io_failure_tree;
        struct extent_io_tree *tree = &BTRFS_I(inode)->io_tree;
        struct extent_map_tree *em_tree = &BTRFS_I(inode)->extent_tree;
-       struct bio *bio;
-       struct btrfs_io_bio *btrfs_failed_bio;
-       struct btrfs_io_bio *btrfs_bio;
-       int num_copies;
        int ret;
-       int read_mode;
        u64 logical;
 
-       BUG_ON(failed_bio->bi_rw & REQ_WRITE);
-
        ret = get_state_private(failure_tree, start, &private);
        if (ret) {
                failrec = kzalloc(sizeof(*failrec), GFP_NOFS);
                if (!failrec)
                        return -ENOMEM;
+
                failrec->start = start;
                failrec->len = end - start + 1;
                failrec->this_mirror = 0;
@@ -2209,11 +2174,11 @@ static int bio_readpage_error(struct bio *failed_bio, u64 phy_offset,
                        em = NULL;
                }
                read_unlock(&em_tree->lock);
-
                if (!em) {
                        kfree(failrec);
                        return -EIO;
                }
+
                logical = start - em->start;
                logical = em->block_start + logical;
                if (test_bit(EXTENT_FLAG_COMPRESSED, &em->flags)) {
@@ -2222,8 +2187,10 @@ static int bio_readpage_error(struct bio *failed_bio, u64 phy_offset,
                        extent_set_compress_type(&failrec->bio_flags,
                                                 em->compress_type);
                }
-               pr_debug("bio_readpage_error: (new) logical=%llu, start=%llu, "
-                        "len=%llu\n", logical, start, failrec->len);
+
+               pr_debug("Get IO Failure Record: (new) logical=%llu, start=%llu, len=%llu\n",
+                        logical, start, failrec->len);
+
                failrec->logical = logical;
                free_extent_map(em);
 
@@ -2243,8 +2210,7 @@ static int bio_readpage_error(struct bio *failed_bio, u64 phy_offset,
                }
        } else {
                failrec = (struct io_failure_record *)(unsigned long)private;
-               pr_debug("bio_readpage_error: (found) logical=%llu, "
-                        "start=%llu, len=%llu, validation=%d\n",
+               pr_debug("Get IO Failure Record: (found) logical=%llu, start=%llu, len=%llu, validation=%d\n",
                         failrec->logical, failrec->start, failrec->len,
                         failrec->in_validation);
                /*
@@ -2253,6 +2219,17 @@ static int bio_readpage_error(struct bio *failed_bio, u64 phy_offset,
                 * clean_io_failure() clean all those errors at once.
                 */
        }
+
+       *failrec_ret = failrec;
+
+       return 0;
+}
+
+int btrfs_check_repairable(struct inode *inode, struct bio *failed_bio,
+                          struct io_failure_record *failrec, int failed_mirror)
+{
+       int num_copies;
+
        num_copies = btrfs_num_copies(BTRFS_I(inode)->root->fs_info,
                                      failrec->logical, failrec->len);
        if (num_copies == 1) {
@@ -2261,10 +2238,9 @@ static int bio_readpage_error(struct bio *failed_bio, u64 phy_offset,
                 * all the retry and error correction code that follows. no
                 * matter what the error is, it is very likely to persist.
                 */
-               pr_debug("bio_readpage_error: cannot repair, num_copies=%d, next_mirror %d, failed_mirror %d\n",
+               pr_debug("Check Repairable: cannot repair, num_copies=%d, next_mirror %d, failed_mirror %d\n",
                         num_copies, failrec->this_mirror, failed_mirror);
-               free_io_failure(inode, failrec);
-               return -EIO;
+               return 0;
        }
 
        /*
@@ -2284,7 +2260,6 @@ static int bio_readpage_error(struct bio *failed_bio, u64 phy_offset,
                BUG_ON(failrec->in_validation);
                failrec->in_validation = 1;
                failrec->this_mirror = failed_mirror;
-               read_mode = READ_SYNC | REQ_FAILFAST_DEV;
        } else {
                /*
                 * we're ready to fulfill a) and b) alongside. get a good copy
@@ -2300,22 +2275,32 @@ static int bio_readpage_error(struct bio *failed_bio, u64 phy_offset,
                failrec->this_mirror++;
                if (failrec->this_mirror == failed_mirror)
                        failrec->this_mirror++;
-               read_mode = READ_SYNC;
        }
 
        if (failrec->this_mirror > num_copies) {
-               pr_debug("bio_readpage_error: (fail) num_copies=%d, next_mirror %d, failed_mirror %d\n",
+               pr_debug("Check Repairable: (fail) num_copies=%d, next_mirror %d, failed_mirror %d\n",
                         num_copies, failrec->this_mirror, failed_mirror);
-               free_io_failure(inode, failrec);
-               return -EIO;
+               return 0;
        }
 
+       return 1;
+}
+
+
+struct bio *btrfs_create_repair_bio(struct inode *inode, struct bio *failed_bio,
+                                   struct io_failure_record *failrec,
+                                   struct page *page, int pg_offset, int icsum,
+                                   bio_end_io_t *endio_func)
+{
+       struct bio *bio;
+       struct btrfs_io_bio *btrfs_failed_bio;
+       struct btrfs_io_bio *btrfs_bio;
+
        bio = btrfs_io_bio_alloc(GFP_NOFS, 1);
-       if (!bio) {
-               free_io_failure(inode, failrec);
-               return -EIO;
-       }
-       bio->bi_end_io = failed_bio->bi_end_io;
+       if (!bio)
+               return NULL;
+
+       bio->bi_end_io = endio_func;
        bio->bi_iter.bi_sector = failrec->logical >> 9;
        bio->bi_bdev = BTRFS_I(inode)->root->fs_info->fs_devices->latest_bdev;
        bio->bi_iter.bi_size = 0;
@@ -2327,17 +2312,63 @@ static int bio_readpage_error(struct bio *failed_bio, u64 phy_offset,
 
                btrfs_bio = btrfs_io_bio(bio);
                btrfs_bio->csum = btrfs_bio->csum_inline;
-               phy_offset >>= inode->i_sb->s_blocksize_bits;
-               phy_offset *= csum_size;
-               memcpy(btrfs_bio->csum, btrfs_failed_bio->csum + phy_offset,
+               icsum *= csum_size;
+               memcpy(btrfs_bio->csum, btrfs_failed_bio->csum + icsum,
                       csum_size);
        }
 
-       bio_add_page(bio, page, failrec->len, start - page_offset(page));
+       bio_add_page(bio, page, failrec->len, pg_offset);
+
+       return bio;
+}
+
+/*
+ * this is a generic handler for readpage errors (default
+ * readpage_io_failed_hook). if other copies exist, read those and write back
+ * good data to the failed position. does not investigate in remapping the
+ * failed extent elsewhere, hoping the device will be smart enough to do this as
+ * needed
+ */
+
+static int bio_readpage_error(struct bio *failed_bio, u64 phy_offset,
+                             struct page *page, u64 start, u64 end,
+                             int failed_mirror)
+{
+       struct io_failure_record *failrec;
+       struct inode *inode = page->mapping->host;
+       struct extent_io_tree *tree = &BTRFS_I(inode)->io_tree;
+       struct bio *bio;
+       int read_mode;
+       int ret;
+
+       BUG_ON(failed_bio->bi_rw & REQ_WRITE);
+
+       ret = btrfs_get_io_failure_record(inode, start, end, &failrec);
+       if (ret)
+               return ret;
+
+       ret = btrfs_check_repairable(inode, failed_bio, failrec, failed_mirror);
+       if (!ret) {
+               free_io_failure(inode, failrec);
+               return -EIO;
+       }
+
+       if (failed_bio->bi_vcnt > 1)
+               read_mode = READ_SYNC | REQ_FAILFAST_DEV;
+       else
+               read_mode = READ_SYNC;
+
+       phy_offset >>= inode->i_sb->s_blocksize_bits;
+       bio = btrfs_create_repair_bio(inode, failed_bio, failrec, page,
+                                     start - page_offset(page),
+                                     (int)phy_offset, failed_bio->bi_end_io);
+       if (!bio) {
+               free_io_failure(inode, failrec);
+               return -EIO;
+       }
 
-       pr_debug("bio_readpage_error: submitting new read[%#x] to "
-                "this_mirror=%d, num_copies=%d, in_validation=%d\n", read_mode,
-                failrec->this_mirror, num_copies, failrec->in_validation);
+       pr_debug("Repair Read Error: submitting new read[%#x] to this_mirror=%d, in_validation=%d\n",
+                read_mode, failrec->this_mirror, failrec->in_validation);
 
        ret = tree->ops->submit_bio_hook(inode, read_mode, bio,
                                         failrec->this_mirror,
index 844b4c5029cdd99dfec2c5d14d9d569806666e59..75b621b7cd9fa67f9b32d158472edc820c00114b 100644 (file)
@@ -344,6 +344,34 @@ int repair_io_failure(struct btrfs_fs_info *fs_info, u64 start,
 int end_extent_writepage(struct page *page, int err, u64 start, u64 end);
 int repair_eb_io_failure(struct btrfs_root *root, struct extent_buffer *eb,
                         int mirror_num);
+
+/*
+ * When IO fails, either with EIO or csum verification fails, we
+ * try other mirrors that might have a good copy of the data.  This
+ * io_failure_record is used to record state as we go through all the
+ * mirrors.  If another mirror has good data, the page is set up to date
+ * and things continue.  If a good mirror can't be found, the original
+ * bio end_io callback is called to indicate things have failed.
+ */
+struct io_failure_record {
+       struct page *page;
+       u64 start;
+       u64 len;
+       u64 logical;
+       unsigned long bio_flags;
+       int this_mirror;
+       int failed_mirror;
+       int in_validation;
+};
+
+int btrfs_get_io_failure_record(struct inode *inode, u64 start, u64 end,
+                               struct io_failure_record **failrec_ret);
+int btrfs_check_repairable(struct inode *inode, struct bio *failed_bio,
+                          struct io_failure_record *failrec, int fail_mirror);
+struct bio *btrfs_create_repair_bio(struct inode *inode, struct bio *failed_bio,
+                                   struct io_failure_record *failrec,
+                                   struct page *page, int pg_offset, int icsum,
+                                   bio_end_io_t *endio_func);
 #ifdef CONFIG_BTRFS_FS_RUN_SANITY_TESTS
 noinline u64 find_lock_delalloc_range(struct inode *inode,
                                      struct extent_io_tree *tree,