lightnvm: pblk: add padding distribution sysfs attribute
authorHans Holmberg <hans.holmberg@cnexlabs.com>
Thu, 29 Mar 2018 22:04:55 +0000 (00:04 +0200)
committerJens Axboe <axboe@kernel.dk>
Thu, 29 Mar 2018 23:29:09 +0000 (17:29 -0600)
When pblk receives a sync, all data up to that point in the write buffer
must be comitted to persistent storage, and as flash memory comes with a
minimal write size there is a significant cost involved both in terms
of time for completing the sync and in terms of write amplification
padded sectors for filling up to the minimal write size.

In order to get a better understanding of the costs involved for syncs,
Add a sysfs attribute to pblk: padded_dist, showing a normalized
distribution of sectors padded. In order to facilitate measurements of
specific workloads during the lifetime of the pblk instance, the
distribution can be reset by writing 0 to the attribute.

Do this by introducing counters for each possible padding:
{0..(minimal write size - 1)} and calculate the normalized distribution
when showing the attribute.

Signed-off-by: Hans Holmberg <hans.holmberg@cnexlabs.com>
Signed-off-by: Javier González <javier@cnexlabs.com>
Rearranged total_buckets statement in pblk_sysfs_get_padding_dist
Signed-off-by: Matias Bjørling <mb@lightnvm.io>
Signed-off-by: Jens Axboe <axboe@kernel.dk>
drivers/lightnvm/pblk-init.c
drivers/lightnvm/pblk-rb.c
drivers/lightnvm/pblk-sysfs.c
drivers/lightnvm/pblk.h

index 0ffc17ccf1ccaf2aead986fb0155b1e7b9a27fc8..8416910ee8bffef572e2d47c7368f3349ca433b3 100644 (file)
@@ -921,6 +921,7 @@ static void pblk_free(struct pblk *pblk)
 {
        pblk_luns_free(pblk);
        pblk_lines_free(pblk);
+       kfree(pblk->pad_dist);
        pblk_line_meta_free(pblk);
        pblk_core_free(pblk);
        pblk_l2p_free(pblk);
@@ -998,11 +999,13 @@ static void *pblk_init(struct nvm_tgt_dev *dev, struct gendisk *tdisk,
        pblk->pad_rst_wa = 0;
        pblk->gc_rst_wa = 0;
 
+       atomic64_set(&pblk->nr_flush, 0);
+       pblk->nr_flush_rst = 0;
+
 #ifdef CONFIG_NVM_DEBUG
        atomic_long_set(&pblk->inflight_writes, 0);
        atomic_long_set(&pblk->padded_writes, 0);
        atomic_long_set(&pblk->padded_wb, 0);
-       atomic_long_set(&pblk->nr_flush, 0);
        atomic_long_set(&pblk->req_writes, 0);
        atomic_long_set(&pblk->sub_writes, 0);
        atomic_long_set(&pblk->sync_writes, 0);
@@ -1034,10 +1037,17 @@ static void *pblk_init(struct nvm_tgt_dev *dev, struct gendisk *tdisk,
                goto fail_free_luns;
        }
 
+       pblk->pad_dist = kzalloc((pblk->min_write_pgs - 1) * sizeof(atomic64_t),
+                                GFP_KERNEL);
+       if (!pblk->pad_dist) {
+               ret = -ENOMEM;
+               goto fail_free_line_meta;
+       }
+
        ret = pblk_core_init(pblk);
        if (ret) {
                pr_err("pblk: could not initialize core\n");
-               goto fail_free_line_meta;
+               goto fail_free_pad_dist;
        }
 
        ret = pblk_l2p_init(pblk);
@@ -1097,6 +1107,8 @@ fail_free_l2p:
        pblk_l2p_free(pblk);
 fail_free_core:
        pblk_core_free(pblk);
+fail_free_pad_dist:
+       kfree(pblk->pad_dist);
 fail_free_line_meta:
        pblk_line_meta_free(pblk);
 fail_free_luns:
index 7044b5599cc4fbf60ac4d15f5e304c30ba9367e7..8b1434060fb3bdfb9a905cabf2a18afd20424a89 100644 (file)
@@ -437,9 +437,7 @@ static int pblk_rb_may_write_flush(struct pblk_rb *rb, unsigned int nr_entries,
        if (bio->bi_opf & REQ_PREFLUSH) {
                struct pblk *pblk = container_of(rb, struct pblk, rwb);
 
-#ifdef CONFIG_NVM_DEBUG
-               atomic_long_inc(&pblk->nr_flush);
-#endif
+               atomic64_inc(&pblk->nr_flush);
                if (pblk_rb_flush_point_set(&pblk->rwb, bio, mem))
                        *io_ret = NVM_IO_OK;
        }
@@ -620,14 +618,17 @@ try:
                        pr_err("pblk: could not pad page in write bio\n");
                        return NVM_IO_ERR;
                }
-       }
 
-       atomic64_add(pad, &((struct pblk *)
-                       (container_of(rb, struct pblk, rwb)))->pad_wa);
+               if (pad < pblk->min_write_pgs)
+                       atomic64_inc(&pblk->pad_dist[pad - 1]);
+               else
+                       pr_warn("pblk: padding more than min. sectors\n");
+
+               atomic64_add(pad, &pblk->pad_wa);
+       }
 
 #ifdef CONFIG_NVM_DEBUG
-       atomic_long_add(pad, &((struct pblk *)
-                       (container_of(rb, struct pblk, rwb)))->padded_writes);
+       atomic_long_add(pad, &pblk->padded_writes);
 #endif
 
        return NVM_IO_OK;
index beed99936c89f98c49efe24b6b92589b1b845879..c2cf6c939752d590e4bb83f6d041e8fdf257444d 100644 (file)
@@ -340,15 +340,62 @@ static ssize_t pblk_sysfs_get_write_amp_trip(struct pblk *pblk, char *page)
                atomic64_read(&pblk->pad_wa) - pblk->pad_rst_wa, page);
 }
 
+static long long bucket_percentage(unsigned long long bucket,
+                                  unsigned long long total)
+{
+       int p = bucket * 100;
+
+       p = div_u64(p, total);
+
+       return p;
+}
+
+static ssize_t pblk_sysfs_get_padding_dist(struct pblk *pblk, char *page)
+{
+       int sz = 0;
+       unsigned long long total;
+       unsigned long long total_buckets = 0;
+       int buckets = pblk->min_write_pgs - 1;
+       int i;
+
+       total = atomic64_read(&pblk->nr_flush) - pblk->nr_flush_rst;
+       if (!total) {
+               for (i = 0; i < (buckets + 1); i++)
+                       sz += snprintf(page + sz, PAGE_SIZE - sz,
+                               "%d:0 ", i);
+               sz += snprintf(page + sz, PAGE_SIZE - sz, "\n");
+
+               return sz;
+       }
+
+       for (i = 0; i < buckets; i++)
+               total_buckets += atomic64_read(&pblk->pad_dist[i]);
+
+       sz += snprintf(page + sz, PAGE_SIZE - sz, "0:%lld%% ",
+               bucket_percentage(total - total_buckets, total));
+
+       for (i = 0; i < buckets; i++) {
+               unsigned long long p;
+
+               p = bucket_percentage(atomic64_read(&pblk->pad_dist[i]),
+                                         total);
+               sz += snprintf(page + sz, PAGE_SIZE - sz, "%d:%lld%% ",
+                               i + 1, p);
+       }
+       sz += snprintf(page + sz, PAGE_SIZE - sz, "\n");
+
+       return sz;
+}
+
 #ifdef CONFIG_NVM_DEBUG
 static ssize_t pblk_sysfs_stats_debug(struct pblk *pblk, char *page)
 {
        return snprintf(page, PAGE_SIZE,
-               "%lu\t%lu\t%lu\t%lu\t%lu\t%lu\t%lu\t%lu\t%lu\t%lu\t%lu\t%lu\t%lu\n",
+               "%lu\t%lu\t%ld\t%llu\t%ld\t%lu\t%lu\t%lu\t%lu\t%lu\t%lu\t%lu\t%lu\n",
                        atomic_long_read(&pblk->inflight_writes),
                        atomic_long_read(&pblk->inflight_reads),
                        atomic_long_read(&pblk->req_writes),
-                       atomic_long_read(&pblk->nr_flush),
+                       (u64)atomic64_read(&pblk->nr_flush),
                        atomic_long_read(&pblk->padded_writes),
                        atomic_long_read(&pblk->padded_wb),
                        atomic_long_read(&pblk->sub_writes),
@@ -426,6 +473,32 @@ static ssize_t pblk_sysfs_set_write_amp_trip(struct pblk *pblk,
 }
 
 
+static ssize_t pblk_sysfs_set_padding_dist(struct pblk *pblk,
+                       const char *page, size_t len)
+{
+       size_t c_len;
+       int reset_value;
+       int buckets = pblk->min_write_pgs - 1;
+       int i;
+
+       c_len = strcspn(page, "\n");
+       if (c_len >= len)
+               return -EINVAL;
+
+       if (kstrtouint(page, 0, &reset_value))
+               return -EINVAL;
+
+       if (reset_value !=  0)
+               return -EINVAL;
+
+       for (i = 0; i < buckets; i++)
+               atomic64_set(&pblk->pad_dist[i], 0);
+
+       pblk->nr_flush_rst = atomic64_read(&pblk->nr_flush);
+
+       return len;
+}
+
 static struct attribute sys_write_luns = {
        .name = "write_luns",
        .mode = 0444,
@@ -486,6 +559,11 @@ static struct attribute sys_write_amp_trip = {
        .mode = 0644,
 };
 
+static struct attribute sys_padding_dist = {
+       .name = "padding_dist",
+       .mode = 0644,
+};
+
 #ifdef CONFIG_NVM_DEBUG
 static struct attribute sys_stats_debug_attr = {
        .name = "stats",
@@ -506,6 +584,7 @@ static struct attribute *pblk_attrs[] = {
        &sys_lines_info_attr,
        &sys_write_amp_mileage,
        &sys_write_amp_trip,
+       &sys_padding_dist,
 #ifdef CONFIG_NVM_DEBUG
        &sys_stats_debug_attr,
 #endif
@@ -539,6 +618,8 @@ static ssize_t pblk_sysfs_show(struct kobject *kobj, struct attribute *attr,
                return pblk_sysfs_get_write_amp_mileage(pblk, buf);
        else if (strcmp(attr->name, "write_amp_trip") == 0)
                return pblk_sysfs_get_write_amp_trip(pblk, buf);
+       else if (strcmp(attr->name, "padding_dist") == 0)
+               return pblk_sysfs_get_padding_dist(pblk, buf);
 #ifdef CONFIG_NVM_DEBUG
        else if (strcmp(attr->name, "stats") == 0)
                return pblk_sysfs_stats_debug(pblk, buf);
@@ -557,6 +638,8 @@ static ssize_t pblk_sysfs_store(struct kobject *kobj, struct attribute *attr,
                return pblk_sysfs_set_sec_per_write(pblk, buf, len);
        else if (strcmp(attr->name, "write_amp_trip") == 0)
                return pblk_sysfs_set_write_amp_trip(pblk, buf, len);
+       else if (strcmp(attr->name, "padding_dist") == 0)
+               return pblk_sysfs_set_padding_dist(pblk, buf, len);
        return 0;
 }
 
index 4b7d8618631f6460c158fed62e64e63bcf19dbc7..17e2f242f7da802e6d87a1caf9954deddf4f2c14 100644 (file)
@@ -626,12 +626,16 @@ struct pblk {
        u64 gc_rst_wa;
        u64 pad_rst_wa;
 
+       /* Counters used for calculating padding distribution */
+       atomic64_t *pad_dist;           /* Padding distribution buckets */
+       u64 nr_flush_rst;               /* Flushes reset value for pad dist.*/
+       atomic64_t nr_flush;            /* Number of flush/fua I/O */
+
 #ifdef CONFIG_NVM_DEBUG
        /* Non-persistent debug counters, 4kb sector I/Os */
        atomic_long_t inflight_writes;  /* Inflight writes (user and gc) */
        atomic_long_t padded_writes;    /* Sectors padded due to flush/fua */
        atomic_long_t padded_wb;        /* Sectors padded in write buffer */
-       atomic_long_t nr_flush;         /* Number of flush/fua I/O */
        atomic_long_t req_writes;       /* Sectors stored on write buffer */
        atomic_long_t sub_writes;       /* Sectors submitted from buffer */
        atomic_long_t sync_writes;      /* Sectors synced to media */