bcache: Track dirty data by stripe
authorKent Overstreet <koverstreet@google.com>
Wed, 5 Jun 2013 13:21:07 +0000 (06:21 -0700)
committerKent Overstreet <koverstreet@google.com>
Thu, 27 Jun 2013 04:57:23 +0000 (21:57 -0700)
To make background writeback aware of raid5/6 stripes, we first need to
track the amount of dirty data within each stripe - we do this by
breaking up the existing sectors_dirty into per stripe atomic_ts

Signed-off-by: Kent Overstreet <koverstreet@google.com>
drivers/md/bcache/bcache.h
drivers/md/bcache/btree.c
drivers/md/bcache/request.c
drivers/md/bcache/super.c
drivers/md/bcache/sysfs.c
drivers/md/bcache/writeback.c
drivers/md/bcache/writeback.h [new file with mode: 0644]

index d099d8894c2f9b0a8c202ead6918db2ef944cbaa..dbddef0cdb599171088e70ee3f21f7020d4b8bbc 100644 (file)
@@ -437,7 +437,10 @@ struct bcache_device {
        /* If nonzero, we're detaching/unregistering from cache set */
        atomic_t                detaching;
 
-       atomic_long_t           sectors_dirty;
+       uint64_t                nr_stripes;
+       unsigned                stripe_size_bits;
+       atomic_t                *stripe_sectors_dirty;
+
        unsigned long           sectors_dirty_last;
        long                    sectors_dirty_derivative;
 
@@ -1159,9 +1162,6 @@ static inline void wake_up_allocators(struct cache_set *c)
 
 /* Forward declarations */
 
-void bch_writeback_queue(struct cached_dev *);
-void bch_writeback_add(struct cached_dev *, unsigned);
-
 void bch_count_io_errors(struct cache *, int, const char *);
 void bch_bbio_count_io_errors(struct cache_set *, struct bio *,
                              int, const char *);
@@ -1224,8 +1224,6 @@ void bch_cache_set_stop(struct cache_set *);
 struct cache_set *bch_cache_set_alloc(struct cache_sb *);
 void bch_btree_cache_free(struct cache_set *);
 int bch_btree_cache_alloc(struct cache_set *);
-void bch_sectors_dirty_init(struct cached_dev *);
-void bch_cached_dev_writeback_init(struct cached_dev *);
 void bch_moving_init_cache_set(struct cache_set *);
 
 int bch_cache_allocator_start(struct cache *ca);
index 230c3a6d9be27afc648f167fb4631c96e450602a..b93cf56260a47653c3a70fc1e065be7a90fbc777 100644 (file)
@@ -24,6 +24,7 @@
 #include "btree.h"
 #include "debug.h"
 #include "request.h"
+#include "writeback.h"
 
 #include <linux/slab.h>
 #include <linux/bitops.h>
@@ -1599,14 +1600,14 @@ static bool fix_overlapping_extents(struct btree *b,
                                    struct btree_iter *iter,
                                    struct btree_op *op)
 {
-       void subtract_dirty(struct bkey *k, int sectors)
+       void subtract_dirty(struct bkey *k, uint64_t offset, int sectors)
        {
-               struct bcache_device *d = b->c->devices[KEY_INODE(k)];
-
-               if (KEY_DIRTY(k) && d)
-                       atomic_long_sub(sectors, &d->sectors_dirty);
+               if (KEY_DIRTY(k))
+                       bcache_dev_sectors_dirty_add(b->c, KEY_INODE(k),
+                                                    offset, -sectors);
        }
 
+       uint64_t old_offset;
        unsigned old_size, sectors_found = 0;
 
        while (1) {
@@ -1618,6 +1619,7 @@ static bool fix_overlapping_extents(struct btree *b,
                if (bkey_cmp(k, &START_KEY(insert)) <= 0)
                        continue;
 
+               old_offset = KEY_START(k);
                old_size = KEY_SIZE(k);
 
                /*
@@ -1673,7 +1675,7 @@ static bool fix_overlapping_extents(struct btree *b,
 
                        struct bkey *top;
 
-                       subtract_dirty(k, KEY_SIZE(insert));
+                       subtract_dirty(k, KEY_START(insert), KEY_SIZE(insert));
 
                        if (bkey_written(b, k)) {
                                /*
@@ -1720,7 +1722,7 @@ static bool fix_overlapping_extents(struct btree *b,
                        }
                }
 
-               subtract_dirty(k, old_size - KEY_SIZE(k));
+               subtract_dirty(k, old_offset, old_size - KEY_SIZE(k));
        }
 
 check_failed:
@@ -1796,6 +1798,10 @@ static bool btree_insert_key(struct btree *b, struct btree_op *op,
 insert:        shift_keys(b, m, k);
 copy:  bkey_copy(m, k);
 merged:
+       if (KEY_DIRTY(k))
+               bcache_dev_sectors_dirty_add(b->c, KEY_INODE(k),
+                                            KEY_START(k), KEY_SIZE(k));
+
        bch_check_keys(b, "%u for %s", status, op_type(op));
 
        if (b->level && !KEY_OFFSET(k))
index 695469958c1eb69466e43077635bc162d578c603..017c95fced8e9bbfe3fbd8bdbbde5d8887ac331b 100644 (file)
@@ -10,6 +10,7 @@
 #include "btree.h"
 #include "debug.h"
 #include "request.h"
+#include "writeback.h"
 
 #include <linux/cgroup.h>
 #include <linux/module.h>
@@ -1044,7 +1045,7 @@ static void request_write(struct cached_dev *dc, struct search *s)
                closure_bio_submit(bio, cl, s->d);
        } else {
                s->op.cache_bio = bio;
-               bch_writeback_add(dc, bio_sectors(bio));
+               bch_writeback_add(dc);
        }
 out:
        closure_call(&s->op.cl, bch_insert_data, NULL, cl);
index dbfa1c38e85e331a3ecd9e3d1f5985158c9b1abf..8c73f0c7f28a041a595fabf80703db68b8b6aed1 100644 (file)
@@ -10,6 +10,7 @@
 #include "btree.h"
 #include "debug.h"
 #include "request.h"
+#include "writeback.h"
 
 #include <linux/blkdev.h>
 #include <linux/buffer_head.h>
@@ -744,13 +745,35 @@ static void bcache_device_free(struct bcache_device *d)
                mempool_destroy(d->unaligned_bvec);
        if (d->bio_split)
                bioset_free(d->bio_split);
+       if (is_vmalloc_addr(d->stripe_sectors_dirty))
+               vfree(d->stripe_sectors_dirty);
+       else
+               kfree(d->stripe_sectors_dirty);
 
        closure_debug_destroy(&d->cl);
 }
 
-static int bcache_device_init(struct bcache_device *d, unsigned block_size)
+static int bcache_device_init(struct bcache_device *d, unsigned block_size,
+                             sector_t sectors)
 {
        struct request_queue *q;
+       size_t n;
+
+       if (!d->stripe_size_bits)
+               d->stripe_size_bits = 31;
+
+       d->nr_stripes = round_up(sectors, 1 << d->stripe_size_bits) >>
+               d->stripe_size_bits;
+
+       if (!d->nr_stripes || d->nr_stripes > SIZE_MAX / sizeof(atomic_t))
+               return -ENOMEM;
+
+       n = d->nr_stripes * sizeof(atomic_t);
+       d->stripe_sectors_dirty = n < PAGE_SIZE << 6
+               ? kzalloc(n, GFP_KERNEL)
+               : vzalloc(n);
+       if (!d->stripe_sectors_dirty)
+               return -ENOMEM;
 
        if (!(d->bio_split = bioset_create(4, offsetof(struct bbio, bio))) ||
            !(d->unaligned_bvec = mempool_create_kmalloc_pool(1,
@@ -760,6 +783,7 @@ static int bcache_device_init(struct bcache_device *d, unsigned block_size)
            !(q = blk_alloc_queue(GFP_KERNEL)))
                return -ENOMEM;
 
+       set_capacity(d->disk, sectors);
        snprintf(d->disk->disk_name, DISK_NAME_LEN, "bcache%i", bcache_minor);
 
        d->disk->major          = bcache_major;
@@ -1047,7 +1071,8 @@ static int cached_dev_init(struct cached_dev *dc, unsigned block_size)
                hlist_add_head(&io->hash, dc->io_hash + RECENT_IO);
        }
 
-       ret = bcache_device_init(&dc->disk, block_size);
+       ret = bcache_device_init(&dc->disk, block_size,
+                        dc->bdev->bd_part->nr_sects - dc->sb.data_offset);
        if (ret)
                return ret;
 
@@ -1146,11 +1171,10 @@ static int flash_dev_run(struct cache_set *c, struct uuid_entry *u)
 
        kobject_init(&d->kobj, &bch_flash_dev_ktype);
 
-       if (bcache_device_init(d, block_bytes(c)))
+       if (bcache_device_init(d, block_bytes(c), u->sectors))
                goto err;
 
        bcache_device_attach(d, c, u - c->uuids);
-       set_capacity(d->disk, u->sectors);
        bch_flash_dev_request_init(d);
        add_disk(d->disk);
 
index f5c2d86952300279e918f5acba491e60fed97ee6..cf8d91ec3238d1c5f04d0118bdf38085a9b5347a 100644 (file)
@@ -9,6 +9,7 @@
 #include "sysfs.h"
 #include "btree.h"
 #include "request.h"
+#include "writeback.h"
 
 #include <linux/blkdev.h>
 #include <linux/sort.h>
@@ -128,7 +129,7 @@ SHOW(__bch_cached_dev)
                char derivative[20];
                char target[20];
                bch_hprint(dirty,
-                      atomic_long_read(&dc->disk.sectors_dirty) << 9);
+                          bcache_dev_sectors_dirty(&dc->disk) << 9);
                bch_hprint(derivative,  dc->writeback_rate_derivative << 9);
                bch_hprint(target,      dc->writeback_rate_target << 9);
 
@@ -144,7 +145,7 @@ SHOW(__bch_cached_dev)
        }
 
        sysfs_hprint(dirty_data,
-                    atomic_long_read(&dc->disk.sectors_dirty) << 9);
+                    bcache_dev_sectors_dirty(&dc->disk) << 9);
 
        var_printf(sequential_merge,    "%i");
        var_hprint(sequential_cutoff);
index 553949eefd515bc36d38ca92c27400b3c8d87699..dd815475c524785c9244861170d3f6a78e957f12 100644 (file)
@@ -9,6 +9,7 @@
 #include "bcache.h"
 #include "btree.h"
 #include "debug.h"
+#include "writeback.h"
 
 #include <trace/events/bcache.h>
 
@@ -38,7 +39,7 @@ static void __update_writeback_rate(struct cached_dev *dc)
 
        int change = 0;
        int64_t error;
-       int64_t dirty = atomic_long_read(&dc->disk.sectors_dirty);
+       int64_t dirty = bcache_dev_sectors_dirty(&dc->disk);
        int64_t derivative = dirty - dc->disk.sectors_dirty_last;
 
        dc->disk.sectors_dirty_last = dirty;
@@ -183,10 +184,8 @@ void bch_writeback_queue(struct cached_dev *dc)
        }
 }
 
-void bch_writeback_add(struct cached_dev *dc, unsigned sectors)
+void bch_writeback_add(struct cached_dev *dc)
 {
-       atomic_long_add(sectors, &dc->disk.sectors_dirty);
-
        if (!atomic_read(&dc->has_dirty) &&
            !atomic_xchg(&dc->has_dirty, 1)) {
                atomic_inc(&dc->count);
@@ -205,6 +204,34 @@ void bch_writeback_add(struct cached_dev *dc, unsigned sectors)
        }
 }
 
+void bcache_dev_sectors_dirty_add(struct cache_set *c, unsigned inode,
+                                 uint64_t offset, int nr_sectors)
+{
+       struct bcache_device *d = c->devices[inode];
+       unsigned stripe_size, stripe_offset;
+       uint64_t stripe;
+
+       if (!d)
+               return;
+
+       stripe_size = 1 << d->stripe_size_bits;
+       stripe = offset >> d->stripe_size_bits;
+       stripe_offset = offset & (stripe_size - 1);
+
+       while (nr_sectors) {
+               int s = min_t(unsigned, abs(nr_sectors),
+                             stripe_size - stripe_offset);
+
+               if (nr_sectors < 0)
+                       s = -s;
+
+               atomic_add(s, d->stripe_sectors_dirty + stripe);
+               nr_sectors -= s;
+               stripe_offset = 0;
+               stripe++;
+       }
+}
+
 /* Background writeback - IO loop */
 
 static void dirty_io_destructor(struct closure *cl)
@@ -392,8 +419,9 @@ static int bch_btree_sectors_dirty_init(struct btree *b, struct btree_op *op,
                                break;
 
                        if (KEY_DIRTY(k))
-                               atomic_long_add(KEY_SIZE(k),
-                                               &dc->disk.sectors_dirty);
+                               bcache_dev_sectors_dirty_add(b->c, dc->disk.id,
+                                                            KEY_START(k),
+                                                            KEY_SIZE(k));
                } else {
                        btree(sectors_dirty_init, k, b, op, dc);
                        if (KEY_INODE(k) > dc->disk.id)
diff --git a/drivers/md/bcache/writeback.h b/drivers/md/bcache/writeback.h
new file mode 100644 (file)
index 0000000..5ce9771
--- /dev/null
@@ -0,0 +1,21 @@
+#ifndef _BCACHE_WRITEBACK_H
+#define _BCACHE_WRITEBACK_H
+
+static inline uint64_t bcache_dev_sectors_dirty(struct bcache_device *d)
+{
+       uint64_t i, ret = 0;
+
+       for (i = 0; i < d->nr_stripes; i++)
+               ret += atomic_read(d->stripe_sectors_dirty + i);
+
+       return ret;
+}
+
+void bcache_dev_sectors_dirty_add(struct cache_set *, unsigned, uint64_t, int);
+void bch_writeback_queue(struct cached_dev *);
+void bch_writeback_add(struct cached_dev *);
+
+void bch_sectors_dirty_init(struct cached_dev *dc);
+void bch_cached_dev_writeback_init(struct cached_dev *);
+
+#endif