lightnvm: pblk: export write amplification counters to sysfs
authorHans Holmberg <hans.holmberg@cnexlabs.com>
Thu, 29 Mar 2018 22:04:52 +0000 (00:04 +0200)
committerJens Axboe <axboe@kernel.dk>
Thu, 29 Mar 2018 23:29:09 +0000 (17:29 -0600)
In a SSD, write amplification, WA, is defined as the average
number of page writes per user page write. Write amplification
negatively affects write performance and decreases the lifetime
of the disk, so it's a useful metric to add to sysfs.

In plkb's case, the number of writes per user sector is the sum of:

    (1) number of user writes
    (2) number of sectors written by the garbage collector
    (3) number of sectors padded (i.e. due to syncs)

This patch adds persistent counters for 1-3 and two sysfs attributes
to export these along with WA calculated with five decimals:

    write_amp_mileage: the accumulated write amplification stats
                      for the lifetime of the pblk instance

    write_amp_trip: resetable stats to facilitate delta measurements,
                    values reset at creation and if 0 is written
                    to the attribute.

64-bit counters are used as a 32 bit counter would wrap around
already after about 17 TB worth of user data. It will take a
long long time before the 64 bit sector counters wrap around.

The counters are stored after the bad block bitmap in the first
emeta sector of each written line. There is plenty of space in the
first emeta sector, so we don't need to bump the major version of
the line data format.

Signed-off-by: Hans Holmberg <hans.holmberg@cnexlabs.com>
Signed-off-by: Javier González <javier@cnexlabs.com>
Signed-off-by: Matias Bjørling <mb@lightnvm.io>
Signed-off-by: Jens Axboe <axboe@kernel.dk>
drivers/lightnvm/pblk-cache.c
drivers/lightnvm/pblk-core.c
drivers/lightnvm/pblk-init.c
drivers/lightnvm/pblk-map.c
drivers/lightnvm/pblk-rb.c
drivers/lightnvm/pblk-recovery.c
drivers/lightnvm/pblk-sysfs.c
drivers/lightnvm/pblk.h

index 000fcad381367a87109bdeb332249bc766842491..29a23111b31c4792128acb749b0b679c4ba0abcb 100644 (file)
@@ -63,6 +63,8 @@ retry:
                bio_advance(bio, PBLK_EXPOSED_PAGE_SIZE);
        }
 
+       atomic64_add(nr_entries, &pblk->user_wa);
+
 #ifdef CONFIG_NVM_DEBUG
        atomic_long_add(nr_entries, &pblk->inflight_writes);
        atomic_long_add(nr_entries, &pblk->req_writes);
@@ -117,6 +119,8 @@ retry:
        WARN_ONCE(gc_rq->secs_to_gc != valid_entries,
                                        "pblk: inconsistent GC write\n");
 
+       atomic64_add(valid_entries, &pblk->gc_wa);
+
 #ifdef CONFIG_NVM_DEBUG
        atomic_long_add(valid_entries, &pblk->inflight_writes);
        atomic_long_add(valid_entries, &pblk->recov_gc_writes);
index 155e42a26293cc525c00958c507964aa4d88360f..22e61cd4f80154b1d1c6924a4276932c1cada593 100644 (file)
@@ -1630,11 +1630,16 @@ void pblk_line_close_meta(struct pblk *pblk, struct pblk_line *line)
        struct pblk_line_meta *lm = &pblk->lm;
        struct pblk_emeta *emeta = line->emeta;
        struct line_emeta *emeta_buf = emeta->buf;
+       struct wa_counters *wa = emeta_to_wa(lm, emeta_buf);
 
        /* No need for exact vsc value; avoid a big line lock and take aprox. */
        memcpy(emeta_to_vsc(pblk, emeta_buf), l_mg->vsc_list, lm->vsc_list_len);
        memcpy(emeta_to_bb(emeta_buf), line->blk_bitmap, lm->blk_bitmap_len);
 
+       wa->user = cpu_to_le64(atomic64_read(&pblk->user_wa));
+       wa->pad = cpu_to_le64(atomic64_read(&pblk->pad_wa));
+       wa->gc = cpu_to_le64(atomic64_read(&pblk->gc_wa));
+
        emeta_buf->nr_valid_lbas = cpu_to_le64(line->nr_valid_lbas);
        emeta_buf->crc = cpu_to_le32(pblk_calc_emeta_crc(pblk, emeta_buf));
 
@@ -1837,6 +1842,7 @@ void pblk_update_map_dev(struct pblk *pblk, sector_t lba,
 #endif
        /* Invalidate and discard padded entries */
        if (lba == ADDR_EMPTY) {
+               atomic64_inc(&pblk->pad_wa);
 #ifdef CONFIG_NVM_DEBUG
                atomic_long_inc(&pblk->padded_wb);
 #endif
index 5b46924ac66cf9ea02be433835835a2e9c275916..0ffc17ccf1ccaf2aead986fb0155b1e7b9a27fc8 100644 (file)
@@ -559,8 +559,8 @@ static unsigned int calc_emeta_len(struct pblk *pblk)
 
        /* Round to sector size so that lba_list starts on its own sector */
        lm->emeta_sec[1] = DIV_ROUND_UP(
-                       sizeof(struct line_emeta) + lm->blk_bitmap_len,
-                       geo->sec_size);
+                       sizeof(struct line_emeta) + lm->blk_bitmap_len +
+                       sizeof(struct wa_counters), geo->sec_size);
        lm->emeta_len[1] = lm->emeta_sec[1] * geo->sec_size;
 
        /* Round to sector size so that vsc_list starts on its own sector */
@@ -991,6 +991,13 @@ static void *pblk_init(struct nvm_tgt_dev *dev, struct gendisk *tdisk,
        if (flags & NVM_TARGET_FACTORY)
                pblk_setup_uuid(pblk);
 
+       atomic64_set(&pblk->user_wa, 0);
+       atomic64_set(&pblk->pad_wa, 0);
+       atomic64_set(&pblk->gc_wa, 0);
+       pblk->user_rst_wa = 0;
+       pblk->pad_rst_wa = 0;
+       pblk->gc_rst_wa = 0;
+
 #ifdef CONFIG_NVM_DEBUG
        atomic_long_set(&pblk->inflight_writes, 0);
        atomic_long_set(&pblk->padded_writes, 0);
index 7445e6430c52f11c6759cb83a31cd265dffbb83a..04e08d76ea5f9a581520e3313720fe8f8c1673f9 100644 (file)
@@ -65,6 +65,8 @@ static void pblk_map_page_data(struct pblk *pblk, unsigned int sentry,
                        lba_list[paddr] = cpu_to_le64(w_ctx->lba);
                        if (lba_list[paddr] != addr_empty)
                                line->nr_valid_lbas++;
+                       else
+                               atomic64_inc(&pblk->pad_wa);
                } else {
                        lba_list[paddr] = meta_list[i].lba = addr_empty;
                        __pblk_map_invalidate(pblk, line, paddr);
index ec8fc314646ba5ffcf04855a81935f5185f9f4bf..7044b5599cc4fbf60ac4d15f5e304c30ba9367e7 100644 (file)
@@ -622,6 +622,9 @@ try:
                }
        }
 
+       atomic64_add(pad, &((struct pblk *)
+                       (container_of(rb, struct pblk, rwb)))->pad_wa);
+
 #ifdef CONFIG_NVM_DEBUG
        atomic_long_add(pad, &((struct pblk *)
                        (container_of(rb, struct pblk, rwb)))->padded_writes);
index a30fe203d45461701558292bfc82451e6e9fe56f..e75a1af2eebe64eae5e9f21142d2e0ca1845676e 100644 (file)
@@ -845,6 +845,29 @@ static int pblk_recov_check_line_version(struct pblk *pblk,
        return 0;
 }
 
+static void pblk_recov_wa_counters(struct pblk *pblk,
+                                  struct line_emeta *emeta)
+{
+       struct pblk_line_meta *lm = &pblk->lm;
+       struct line_header *header = &emeta->header;
+       struct wa_counters *wa = emeta_to_wa(lm, emeta);
+
+       /* WA counters were introduced in emeta version 0.2 */
+       if (header->version_major > 0 || header->version_minor >= 2) {
+               u64 user = le64_to_cpu(wa->user);
+               u64 pad = le64_to_cpu(wa->pad);
+               u64 gc = le64_to_cpu(wa->gc);
+
+               atomic64_set(&pblk->user_wa, user);
+               atomic64_set(&pblk->pad_wa, pad);
+               atomic64_set(&pblk->gc_wa, gc);
+
+               pblk->user_rst_wa = user;
+               pblk->pad_rst_wa = pad;
+               pblk->gc_rst_wa = gc;
+       }
+}
+
 struct pblk_line *pblk_recov_l2p(struct pblk *pblk)
 {
        struct pblk_line_meta *lm = &pblk->lm;
@@ -965,6 +988,8 @@ struct pblk_line *pblk_recov_l2p(struct pblk *pblk)
                if (pblk_recov_check_line_version(pblk, line->emeta->buf))
                        return ERR_PTR(-EINVAL);
 
+               pblk_recov_wa_counters(pblk, line->emeta->buf);
+
                if (pblk_recov_l2p_from_emeta(pblk, line))
                        pblk_recov_l2p_from_oob(pblk, line);
 
index 620bab853579c884109c95429d6ad1b84f871517..beed99936c89f98c49efe24b6b92589b1b845879 100644 (file)
@@ -298,6 +298,48 @@ static ssize_t pblk_sysfs_get_sec_per_write(struct pblk *pblk, char *page)
        return snprintf(page, PAGE_SIZE, "%d\n", pblk->sec_per_write);
 }
 
+static ssize_t pblk_get_write_amp(u64 user, u64 gc, u64 pad,
+                                 char *page)
+{
+       int sz;
+
+
+       sz = snprintf(page, PAGE_SIZE,
+                       "user:%lld gc:%lld pad:%lld WA:",
+                       user, gc, pad);
+
+       if (!user) {
+               sz += snprintf(page + sz, PAGE_SIZE - sz, "NaN\n");
+       } else {
+               u64 wa_int;
+               u32 wa_frac;
+
+               wa_int = (user + gc + pad) * 100000;
+               wa_int = div_u64(wa_int, user);
+               wa_int = div_u64_rem(wa_int, 100000, &wa_frac);
+
+               sz += snprintf(page + sz, PAGE_SIZE - sz, "%llu.%05u\n",
+                                                       wa_int, wa_frac);
+       }
+
+       return sz;
+}
+
+static ssize_t pblk_sysfs_get_write_amp_mileage(struct pblk *pblk, char *page)
+{
+       return pblk_get_write_amp(atomic64_read(&pblk->user_wa),
+               atomic64_read(&pblk->gc_wa), atomic64_read(&pblk->pad_wa),
+               page);
+}
+
+static ssize_t pblk_sysfs_get_write_amp_trip(struct pblk *pblk, char *page)
+{
+       return pblk_get_write_amp(
+               atomic64_read(&pblk->user_wa) - pblk->user_rst_wa,
+               atomic64_read(&pblk->gc_wa) - pblk->gc_rst_wa,
+               atomic64_read(&pblk->pad_wa) - pblk->pad_rst_wa, page);
+}
+
 #ifdef CONFIG_NVM_DEBUG
 static ssize_t pblk_sysfs_stats_debug(struct pblk *pblk, char *page)
 {
@@ -360,6 +402,30 @@ static ssize_t pblk_sysfs_set_sec_per_write(struct pblk *pblk,
        return len;
 }
 
+static ssize_t pblk_sysfs_set_write_amp_trip(struct pblk *pblk,
+                       const char *page, size_t len)
+{
+       size_t c_len;
+       int reset_value;
+
+       c_len = strcspn(page, "\n");
+       if (c_len >= len)
+               return -EINVAL;
+
+       if (kstrtouint(page, 0, &reset_value))
+               return -EINVAL;
+
+       if (reset_value !=  0)
+               return -EINVAL;
+
+       pblk->user_rst_wa = atomic64_read(&pblk->user_wa);
+       pblk->pad_rst_wa = atomic64_read(&pblk->pad_wa);
+       pblk->gc_rst_wa = atomic64_read(&pblk->gc_wa);
+
+       return len;
+}
+
+
 static struct attribute sys_write_luns = {
        .name = "write_luns",
        .mode = 0444,
@@ -410,6 +476,16 @@ static struct attribute sys_max_sec_per_write = {
        .mode = 0644,
 };
 
+static struct attribute sys_write_amp_mileage = {
+       .name = "write_amp_mileage",
+       .mode = 0444,
+};
+
+static struct attribute sys_write_amp_trip = {
+       .name = "write_amp_trip",
+       .mode = 0644,
+};
+
 #ifdef CONFIG_NVM_DEBUG
 static struct attribute sys_stats_debug_attr = {
        .name = "stats",
@@ -428,6 +504,8 @@ static struct attribute *pblk_attrs[] = {
        &sys_stats_ppaf_attr,
        &sys_lines_attr,
        &sys_lines_info_attr,
+       &sys_write_amp_mileage,
+       &sys_write_amp_trip,
 #ifdef CONFIG_NVM_DEBUG
        &sys_stats_debug_attr,
 #endif
@@ -457,6 +535,10 @@ static ssize_t pblk_sysfs_show(struct kobject *kobj, struct attribute *attr,
                return pblk_sysfs_lines_info(pblk, buf);
        else if (strcmp(attr->name, "max_sec_per_write") == 0)
                return pblk_sysfs_get_sec_per_write(pblk, buf);
+       else if (strcmp(attr->name, "write_amp_mileage") == 0)
+               return pblk_sysfs_get_write_amp_mileage(pblk, buf);
+       else if (strcmp(attr->name, "write_amp_trip") == 0)
+               return pblk_sysfs_get_write_amp_trip(pblk, buf);
 #ifdef CONFIG_NVM_DEBUG
        else if (strcmp(attr->name, "stats") == 0)
                return pblk_sysfs_stats_debug(pblk, buf);
@@ -473,7 +555,8 @@ static ssize_t pblk_sysfs_store(struct kobject *kobj, struct attribute *attr,
                return pblk_sysfs_gc_force(pblk, buf, len);
        else if (strcmp(attr->name, "max_sec_per_write") == 0)
                return pblk_sysfs_set_sec_per_write(pblk, buf, len);
-
+       else if (strcmp(attr->name, "write_amp_trip") == 0)
+               return pblk_sysfs_set_write_amp_trip(pblk, buf, len);
        return 0;
 }
 
index fae2526f80b248ca9dc9c164c5bd15442cb5b527..4b7d8618631f6460c158fed62e64e63bcf19dbc7 100644 (file)
@@ -331,7 +331,7 @@ enum {
 #define SMETA_VERSION_MINOR (1)
 
 #define EMETA_VERSION_MAJOR (0)
-#define EMETA_VERSION_MINOR (1)
+#define EMETA_VERSION_MINOR (2)
 
 struct line_header {
        __le32 crc;
@@ -361,11 +361,13 @@ struct line_smeta {
        __le64 lun_bitmap[];
 };
 
+
 /*
  * Metadata layout in media:
  *     First sector:
  *             1. struct line_emeta
  *             2. bad block bitmap (u64 * window_wr_lun)
+ *             3. write amplification counters
  *     Mid sectors (start at lbas_sector):
  *             3. nr_lbas (u64) forming lba list
  *     Last sectors (start at vsc_sector):
@@ -389,7 +391,15 @@ struct line_emeta {
        __le32 next_id;         /* Line id for next line */
        __le64 nr_lbas;         /* Number of lbas mapped in line */
        __le64 nr_valid_lbas;   /* Number of valid lbas mapped in line */
-       __le64 bb_bitmap[];     /* Updated bad block bitmap for line */
+       __le64 bb_bitmap[];     /* Updated bad block bitmap for line */
+};
+
+
+/* Write amplification counters stored on media */
+struct wa_counters {
+       __le64 user;            /* Number of user written sectors */
+       __le64 gc;              /* Number of sectors written by GC*/
+       __le64 pad;             /* Number of padded sectors */
 };
 
 struct pblk_emeta {
@@ -519,10 +529,11 @@ struct pblk_line_meta {
        unsigned int smeta_sec;         /* Sectors needed for smeta */
 
        unsigned int emeta_len[4];      /* Lengths for emeta:
-                                        *  [0]: Total length
-                                        *  [1]: struct line_emeta length
-                                        *  [2]: L2P portion length
-                                        *  [3]: vsc list length
+                                        *  [0]: Total
+                                        *  [1]: struct line_emeta +
+                                        *       bb_bitmap + struct wa_counters
+                                        *  [2]: L2P portion
+                                        *  [3]: vsc
                                         */
        unsigned int emeta_sec[4];      /* Sectors needed for emeta. Same layout
                                         * as emeta_len
@@ -604,8 +615,19 @@ struct pblk {
        int sec_per_write;
 
        unsigned char instance_uuid[16];
+
+       /* Persistent write amplification counters, 4kb sector I/Os */
+       atomic64_t user_wa;             /* Sectors written by user */
+       atomic64_t gc_wa;               /* Sectors written by GC */
+       atomic64_t pad_wa;              /* Padded sectors written */
+
+       /* Reset values for delta write amplification measurements */
+       u64 user_rst_wa;
+       u64 gc_rst_wa;
+       u64 pad_rst_wa;
+
 #ifdef CONFIG_NVM_DEBUG
-       /* All debug counters apply to 4kb sector I/Os */
+       /* Non-persistent debug counters, 4kb sector I/Os */
        atomic_long_t inflight_writes;  /* Inflight writes (user and gc) */
        atomic_long_t padded_writes;    /* Sectors padded due to flush/fua */
        atomic_long_t padded_wb;        /* Sectors padded in write buffer */
@@ -900,6 +922,12 @@ static inline void *emeta_to_bb(struct line_emeta *emeta)
        return emeta->bb_bitmap;
 }
 
+static inline void *emeta_to_wa(struct pblk_line_meta *lm,
+                               struct line_emeta *emeta)
+{
+       return emeta->bb_bitmap + lm->blk_bitmap_len;
+}
+
 static inline void *emeta_to_lbas(struct pblk *pblk, struct line_emeta *emeta)
 {
        return ((void *)emeta + pblk->lm.emeta_len[1]);