Btrfs: add fragment=* debug mount option
authorJosef Bacik <jbacik@fb.com>
Wed, 23 Sep 2015 18:54:14 +0000 (14:54 -0400)
committerChris Mason <clm@fb.com>
Thu, 22 Oct 2015 01:51:43 +0000 (18:51 -0700)
In tracking down these weird bitmap problems it was helpful to artificially
create an extremely fragmented file system.  These mount options let us either
fragment data or metadata or both.  With these options I could reproduce all
sorts of weird latencies and hangs that occur under extreme fragmentation and
get them fixed.  Thanks,

Signed-off-by: Josef Bacik <jbacik@fb.com>
Signed-off-by: Chris Mason <clm@fb.com>
fs/btrfs/ctree.h
fs/btrfs/extent-tree.c
fs/btrfs/free-space-cache.c
fs/btrfs/super.c
fs/btrfs/tests/free-space-tests.c

index 49bc792108b9bc9e2024fa0a29e906ce8972e796..16384231db82d8fabf5f504a070238516c57420a 100644 (file)
@@ -2145,6 +2145,8 @@ struct btrfs_ioctl_defrag_range_args {
 #define BTRFS_MOUNT_CHECK_INTEGRITY_INCLUDING_EXTENT_DATA (1 << 21)
 #define BTRFS_MOUNT_PANIC_ON_FATAL_ERROR       (1 << 22)
 #define BTRFS_MOUNT_RESCAN_UUID_TREE   (1 << 23)
+#define BTRFS_MOUNT_FRAGMENT_DATA      (1 << 24)
+#define BTRFS_MOUNT_FRAGMENT_METADATA  (1 << 25)
 
 #define BTRFS_DEFAULT_COMMIT_INTERVAL  (30)
 #define BTRFS_DEFAULT_MAX_INLINE       (8192)
@@ -2169,6 +2171,18 @@ struct btrfs_ioctl_defrag_range_args {
        btrfs_clear_opt(root->fs_info->mount_opt, opt);                 \
 }
 
+#ifdef CONFIG_BTRFS_DEBUG
+static inline int
+btrfs_should_fragment_free_space(struct btrfs_root *root,
+                                struct btrfs_block_group_cache *block_group)
+{
+       return (btrfs_test_opt(root, FRAGMENT_METADATA) &&
+               block_group->flags & BTRFS_BLOCK_GROUP_METADATA) ||
+              (btrfs_test_opt(root, FRAGMENT_DATA) &&
+               block_group->flags &  BTRFS_BLOCK_GROUP_DATA);
+}
+#endif
+
 /*
  * Requests for changes that need to be done during transaction commit.
  *
index 2df4bc77f5b477e6d0f403b4962256707462d6b7..0e32abf53b5bf2a7e6482271d8e9c00115d19ec6 100644 (file)
@@ -332,6 +332,27 @@ static void put_caching_control(struct btrfs_caching_control *ctl)
                kfree(ctl);
 }
 
+#ifdef CONFIG_BTRFS_DEBUG
+static void fragment_free_space(struct btrfs_root *root,
+                               struct btrfs_block_group_cache *block_group)
+{
+       u64 start = block_group->key.objectid;
+       u64 len = block_group->key.offset;
+       u64 chunk = block_group->flags & BTRFS_BLOCK_GROUP_METADATA ?
+               root->nodesize : root->sectorsize;
+       u64 step = chunk << 1;
+
+       while (len > chunk) {
+               btrfs_remove_free_space(block_group, start, chunk);
+               start += step;
+               if (len < step)
+                       len = 0;
+               else
+                       len -= step;
+       }
+}
+#endif
+
 /*
  * this is only called by cache_block_group, since we could have freed extents
  * we need to check the pinned_extents for any extents that can't be used yet
@@ -388,6 +409,7 @@ static noinline void caching_thread(struct btrfs_work *work)
        u64 last = 0;
        u32 nritems;
        int ret = -ENOMEM;
+       bool wakeup = true;
 
        caching_ctl = container_of(work, struct btrfs_caching_control, work);
        block_group = caching_ctl->block_group;
@@ -400,6 +422,15 @@ static noinline void caching_thread(struct btrfs_work *work)
 
        last = max_t(u64, block_group->key.objectid, BTRFS_SUPER_INFO_OFFSET);
 
+#ifdef CONFIG_BTRFS_DEBUG
+       /*
+        * If we're fragmenting we don't want to make anybody think we can
+        * allocate from this block group until we've had a chance to fragment
+        * the free space.
+        */
+       if (btrfs_should_fragment_free_space(extent_root, block_group))
+               wakeup = false;
+#endif
        /*
         * We don't want to deadlock with somebody trying to allocate a new
         * extent for the extent root while also trying to search the extent
@@ -441,7 +472,8 @@ next:
 
                        if (need_resched() ||
                            rwsem_is_contended(&fs_info->commit_root_sem)) {
-                               caching_ctl->progress = last;
+                               if (wakeup)
+                                       caching_ctl->progress = last;
                                btrfs_release_path(path);
                                up_read(&fs_info->commit_root_sem);
                                mutex_unlock(&caching_ctl->mutex);
@@ -464,7 +496,8 @@ next:
                        key.offset = 0;
                        key.type = BTRFS_EXTENT_ITEM_KEY;
 
-                       caching_ctl->progress = last;
+                       if (wakeup)
+                               caching_ctl->progress = last;
                        btrfs_release_path(path);
                        goto next;
                }
@@ -491,7 +524,8 @@ next:
 
                        if (total_found > (1024 * 1024 * 2)) {
                                total_found = 0;
-                               wake_up(&caching_ctl->wait);
+                               if (wakeup)
+                                       wake_up(&caching_ctl->wait);
                        }
                }
                path->slots[0]++;
@@ -501,13 +535,27 @@ next:
        total_found += add_new_free_space(block_group, fs_info, last,
                                          block_group->key.objectid +
                                          block_group->key.offset);
-       caching_ctl->progress = (u64)-1;
-
        spin_lock(&block_group->lock);
        block_group->caching_ctl = NULL;
        block_group->cached = BTRFS_CACHE_FINISHED;
        spin_unlock(&block_group->lock);
 
+#ifdef CONFIG_BTRFS_DEBUG
+       if (btrfs_should_fragment_free_space(extent_root, block_group)) {
+               u64 bytes_used;
+
+               spin_lock(&block_group->space_info->lock);
+               spin_lock(&block_group->lock);
+               bytes_used = block_group->key.offset -
+                       btrfs_block_group_used(&block_group->item);
+               block_group->space_info->bytes_used += bytes_used >> 1;
+               spin_unlock(&block_group->lock);
+               spin_unlock(&block_group->space_info->lock);
+               fragment_free_space(extent_root, block_group);
+       }
+#endif
+
+       caching_ctl->progress = (u64)-1;
 err:
        btrfs_free_path(path);
        up_read(&fs_info->commit_root_sem);
@@ -607,6 +655,22 @@ static int cache_block_group(struct btrfs_block_group_cache *cache,
                        }
                }
                spin_unlock(&cache->lock);
+#ifdef CONFIG_BTRFS_DEBUG
+               if (ret == 1 &&
+                   btrfs_should_fragment_free_space(fs_info->extent_root,
+                                                    cache)) {
+                       u64 bytes_used;
+
+                       spin_lock(&cache->space_info->lock);
+                       spin_lock(&cache->lock);
+                       bytes_used = cache->key.offset -
+                               btrfs_block_group_used(&cache->item);
+                       cache->space_info->bytes_used += bytes_used >> 1;
+                       spin_unlock(&cache->lock);
+                       spin_unlock(&cache->space_info->lock);
+                       fragment_free_space(fs_info->extent_root, cache);
+               }
+#endif
                mutex_unlock(&caching_ctl->mutex);
 
                wake_up(&caching_ctl->wait);
@@ -9624,6 +9688,14 @@ int btrfs_make_block_group(struct btrfs_trans_handle *trans,
 
        free_excluded_extents(root, cache);
 
+#ifdef CONFIG_BTRFS_DEBUG
+       if (btrfs_should_fragment_free_space(root, cache)) {
+               u64 new_bytes_used = size - bytes_used;
+
+               bytes_used += new_bytes_used >> 1;
+               fragment_free_space(root, cache);
+       }
+#endif
        /*
         * Call to ensure the corresponding space_info object is created and
         * assigned to our block group, but don't update its counters just yet.
index 5a9431dda07f9549e93cc7546f4ce5db016aa72b..c0eb84ecb78ff531e1ce4080211c392f5e713301 100644 (file)
@@ -1951,12 +1951,19 @@ static bool use_bitmap(struct btrfs_free_space_ctl *ctl,
                      struct btrfs_free_space *info)
 {
        struct btrfs_block_group_cache *block_group = ctl->private;
+       bool forced = false;
+
+#ifdef CONFIG_BTRFS_DEBUG
+       if (btrfs_should_fragment_free_space(block_group->fs_info->extent_root,
+                                            block_group))
+               forced = true;
+#endif
 
        /*
         * If we are below the extents threshold then we can add this as an
         * extent, and don't have to deal with the bitmap
         */
-       if (ctl->free_extents < ctl->extents_thresh) {
+       if (!forced && ctl->free_extents < ctl->extents_thresh) {
                /*
                 * If this block group has some small extents we don't want to
                 * use up all of our free slots in the cache with them, we want
index b23d49daa1a2154bc57f4c4f33c9eb73bb3b5059..24154e422945167f474557887c62acaf6ed0779c 100644 (file)
@@ -303,6 +303,9 @@ enum {
        Opt_commit_interval, Opt_barrier, Opt_nodefrag, Opt_nodiscard,
        Opt_noenospc_debug, Opt_noflushoncommit, Opt_acl, Opt_datacow,
        Opt_datasum, Opt_treelog, Opt_noinode_cache,
+#ifdef CONFIG_BTRFS_DEBUG
+       Opt_fragment_data, Opt_fragment_metadata, Opt_fragment_all,
+#endif
        Opt_err,
 };
 
@@ -355,6 +358,11 @@ static match_table_t tokens = {
        {Opt_rescan_uuid_tree, "rescan_uuid_tree"},
        {Opt_fatal_errors, "fatal_errors=%s"},
        {Opt_commit_interval, "commit=%d"},
+#ifdef CONFIG_BTRFS_DEBUG
+       {Opt_fragment_data, "fragment=data"},
+       {Opt_fragment_metadata, "fragment=metadata"},
+       {Opt_fragment_all, "fragment=all"},
+#endif
        {Opt_err, NULL},
 };
 
@@ -721,6 +729,22 @@ int btrfs_parse_options(struct btrfs_root *root, char *options)
                                info->commit_interval = BTRFS_DEFAULT_COMMIT_INTERVAL;
                        }
                        break;
+#ifdef CONFIG_BTRFS_DEBUG
+               case Opt_fragment_all:
+                       btrfs_info(root->fs_info, "fragmenting all space");
+                       btrfs_set_opt(info->mount_opt, FRAGMENT_DATA);
+                       btrfs_set_opt(info->mount_opt, FRAGMENT_METADATA);
+                       break;
+               case Opt_fragment_metadata:
+                       btrfs_info(root->fs_info, "fragmenting metadata");
+                       btrfs_set_opt(info->mount_opt,
+                                     FRAGMENT_METADATA);
+                       break;
+               case Opt_fragment_data:
+                       btrfs_info(root->fs_info, "fragmenting data");
+                       btrfs_set_opt(info->mount_opt, FRAGMENT_DATA);
+                       break;
+#endif
                case Opt_err:
                        btrfs_info(root->fs_info, "unrecognized mount option '%s'", p);
                        ret = -EINVAL;
@@ -1172,6 +1196,12 @@ static int btrfs_show_options(struct seq_file *seq, struct dentry *dentry)
                seq_puts(seq, ",fatal_errors=panic");
        if (info->commit_interval != BTRFS_DEFAULT_COMMIT_INTERVAL)
                seq_printf(seq, ",commit=%d", info->commit_interval);
+#ifdef CONFIG_BTRFS_DEBUG
+       if (btrfs_test_opt(root, FRAGMENT_DATA))
+               seq_puts(seq, ",fragment=data");
+       if (btrfs_test_opt(root, FRAGMENT_METADATA))
+               seq_puts(seq, ",fragment=metadata");
+#endif
        seq_printf(seq, ",subvolid=%llu",
                  BTRFS_I(d_inode(dentry))->root->root_key.objectid);
        seq_puts(seq, ",subvol=");
index 2299bfde39eec666fe1b0876733746a76673f5a5..c8c3d70c31ffad4e02acd04e0f7dcaa54ad0fe2b 100644 (file)
@@ -19,6 +19,7 @@
 #include <linux/slab.h>
 #include "btrfs-tests.h"
 #include "../ctree.h"
+#include "../disk-io.h"
 #include "../free-space-cache.h"
 
 #define BITS_PER_BITMAP                (PAGE_CACHE_SIZE * 8)
@@ -35,6 +36,12 @@ static struct btrfs_block_group_cache *init_test_block_group(void)
                kfree(cache);
                return NULL;
        }
+       cache->fs_info = btrfs_alloc_dummy_fs_info();
+       if (!cache->fs_info) {
+               kfree(cache->free_space_ctl);
+               kfree(cache);
+               return NULL;
+       }
 
        cache->key.objectid = 0;
        cache->key.offset = 1024 * 1024 * 1024;
@@ -879,7 +886,8 @@ test_steal_space_from_bitmap_to_extent(struct btrfs_block_group_cache *cache)
 int btrfs_test_free_space_cache(void)
 {
        struct btrfs_block_group_cache *cache;
-       int ret;
+       struct btrfs_root *root = NULL;
+       int ret = -ENOMEM;
 
        test_msg("Running btrfs free space cache tests\n");
 
@@ -889,6 +897,17 @@ int btrfs_test_free_space_cache(void)
                return 0;
        }
 
+       root = btrfs_alloc_dummy_root();
+       if (!root)
+               goto out;
+
+       root->fs_info = btrfs_alloc_dummy_fs_info();
+       if (!root->fs_info)
+               goto out;
+
+       root->fs_info->extent_root = root;
+       cache->fs_info = root->fs_info;
+
        ret = test_extents(cache);
        if (ret)
                goto out;
@@ -904,6 +923,7 @@ out:
        __btrfs_remove_free_space_cache(cache->free_space_ctl);
        kfree(cache->free_space_ctl);
        kfree(cache);
+       btrfs_free_dummy_root(root);
        test_msg("Free space cache tests finished\n");
        return ret;
 }