lightnvm: pblk: support packed metadata
authorIgor Konopko <igor.j.konopko@intel.com>
Tue, 11 Dec 2018 19:16:26 +0000 (20:16 +0100)
committerJens Axboe <axboe@kernel.dk>
Tue, 11 Dec 2018 19:22:35 +0000 (12:22 -0700)
pblk performs recovery of open lines by storing the LBA in the per LBA
metadata field. Recovery therefore only works for drives that has this
field.

This patch adds support for packed metadata, which store l2p mapping
for open lines in last sector of every write unit and enables drives
without per IO metadata to recover open lines.

After this patch, drives with OOB size <16B will use packed metadata
and metadata size larger than16B will continue to use the device per
IO metadata.

Reviewed-by: Javier González <javier@cnexlabs.com>
Signed-off-by: Igor Konopko <igor.j.konopko@intel.com>
Signed-off-by: Matias Bjørling <mb@lightnvm.io>
Signed-off-by: Jens Axboe <axboe@kernel.dk>
drivers/lightnvm/pblk-core.c
drivers/lightnvm/pblk-init.c
drivers/lightnvm/pblk-map.c
drivers/lightnvm/pblk-rb.c
drivers/lightnvm/pblk-read.c
drivers/lightnvm/pblk-recovery.c
drivers/lightnvm/pblk-sysfs.c
drivers/lightnvm/pblk-write.c
drivers/lightnvm/pblk.h

index 7e3397f8ead1cfb9afd0bbfefca9c1a463f97430..1ff1653511809d22b8ff18ac3c9a7f644d52470c 100644 (file)
@@ -376,7 +376,7 @@ void pblk_write_should_kick(struct pblk *pblk)
 {
        unsigned int secs_avail = pblk_rb_read_count(&pblk->rwb);
 
-       if (secs_avail >= pblk->min_write_pgs)
+       if (secs_avail >= pblk->min_write_pgs_data)
                pblk_write_kick(pblk);
 }
 
@@ -407,7 +407,9 @@ struct list_head *pblk_line_gc_list(struct pblk *pblk, struct pblk_line *line)
        struct pblk_line_meta *lm = &pblk->lm;
        struct pblk_line_mgmt *l_mg = &pblk->l_mg;
        struct list_head *move_list = NULL;
-       int vsc = le32_to_cpu(*line->vsc);
+       int packed_meta = (le32_to_cpu(*line->vsc) / pblk->min_write_pgs_data)
+                       * (pblk->min_write_pgs - pblk->min_write_pgs_data);
+       int vsc = le32_to_cpu(*line->vsc) + packed_meta;
 
        lockdep_assert_held(&line->lock);
 
@@ -620,12 +622,15 @@ out:
 }
 
 int pblk_calc_secs(struct pblk *pblk, unsigned long secs_avail,
-                  unsigned long secs_to_flush)
+                  unsigned long secs_to_flush, bool skip_meta)
 {
        int max = pblk->sec_per_write;
        int min = pblk->min_write_pgs;
        int secs_to_sync = 0;
 
+       if (skip_meta && pblk->min_write_pgs_data != pblk->min_write_pgs)
+               min = max = pblk->min_write_pgs_data;
+
        if (secs_avail >= max)
                secs_to_sync = max;
        else if (secs_avail >= min)
@@ -852,7 +857,7 @@ int pblk_line_emeta_read(struct pblk *pblk, struct pblk_line *line,
 next_rq:
        memset(&rqd, 0, sizeof(struct nvm_rq));
 
-       rq_ppas = pblk_calc_secs(pblk, left_ppas, 0);
+       rq_ppas = pblk_calc_secs(pblk, left_ppas, 0, false);
        rq_len = rq_ppas * geo->csecs;
 
        bio = pblk_bio_map_addr(pblk, emeta_buf, rq_ppas, rq_len,
@@ -2169,3 +2174,38 @@ void pblk_lookup_l2p_rand(struct pblk *pblk, struct ppa_addr *ppas,
        }
        spin_unlock(&pblk->trans_lock);
 }
+
+void *pblk_get_meta_for_writes(struct pblk *pblk, struct nvm_rq *rqd)
+{
+       void *buffer;
+
+       if (pblk_is_oob_meta_supported(pblk)) {
+               /* Just use OOB metadata buffer as always */
+               buffer = rqd->meta_list;
+       } else {
+               /* We need to reuse last page of request (packed metadata)
+                * in similar way as traditional oob metadata
+                */
+               buffer = page_to_virt(
+                       rqd->bio->bi_io_vec[rqd->bio->bi_vcnt - 1].bv_page);
+       }
+
+       return buffer;
+}
+
+void pblk_get_packed_meta(struct pblk *pblk, struct nvm_rq *rqd)
+{
+       void *meta_list = rqd->meta_list;
+       void *page;
+       int i = 0;
+
+       if (pblk_is_oob_meta_supported(pblk))
+               return;
+
+       page = page_to_virt(rqd->bio->bi_io_vec[rqd->bio->bi_vcnt - 1].bv_page);
+       /* We need to fill oob meta buffer with data from packed metadata */
+       for (; i < rqd->nr_ppas; i++)
+               memcpy(pblk_get_meta(pblk, meta_list, i),
+                       page + (i * sizeof(struct pblk_sec_meta)),
+                       sizeof(struct pblk_sec_meta));
+}
index e8055b7963812a00df96b1e514cb49267f8d2ffd..f9a3e47b6a93410e6d129aa8eb8458f89143e586 100644 (file)
@@ -399,6 +399,7 @@ static int pblk_core_init(struct pblk *pblk)
        pblk->nr_flush_rst = 0;
 
        pblk->min_write_pgs = geo->ws_opt;
+       pblk->min_write_pgs_data = pblk->min_write_pgs;
        max_write_ppas = pblk->min_write_pgs * geo->all_luns;
        pblk->max_write_pgs = min_t(int, max_write_ppas, NVM_MAX_VLBA);
        pblk->max_write_pgs = min_t(int, pblk->max_write_pgs,
@@ -406,9 +407,35 @@ static int pblk_core_init(struct pblk *pblk)
        pblk_set_sec_per_write(pblk, pblk->min_write_pgs);
 
        pblk->oob_meta_size = geo->sos;
-       if (pblk->oob_meta_size < sizeof(struct pblk_sec_meta)) {
-               pblk_err(pblk, "Unsupported metadata size\n");
-               return -EINVAL;
+       if (!pblk_is_oob_meta_supported(pblk)) {
+               /* For drives which does not have OOB metadata feature
+                * in order to support recovery feature we need to use
+                * so called packed metadata. Packed metada will store
+                * the same information as OOB metadata (l2p table mapping,
+                * but in the form of the single page at the end of
+                * every write request.
+                */
+               if (pblk->min_write_pgs
+                       * sizeof(struct pblk_sec_meta) > PAGE_SIZE) {
+                       /* We want to keep all the packed metadata on single
+                        * page per write requests. So we need to ensure that
+                        * it will fit.
+                        *
+                        * This is more like sanity check, since there is
+                        * no device with such a big minimal write size
+                        * (above 1 metabytes).
+                        */
+                       pblk_err(pblk, "Not supported min write size\n");
+                       return -EINVAL;
+               }
+               /* For packed meta approach we do some simplification.
+                * On read path we always issue requests which size
+                * equal to max_write_pgs, with all pages filled with
+                * user payload except of last one page which will be
+                * filled with packed metadata.
+                */
+               pblk->max_write_pgs = pblk->min_write_pgs;
+               pblk->min_write_pgs_data = pblk->min_write_pgs - 1;
        }
 
        pblk->pad_dist = kcalloc(pblk->min_write_pgs - 1, sizeof(atomic64_t),
@@ -641,7 +668,7 @@ static int pblk_set_provision(struct pblk *pblk, int nr_free_chks)
        struct pblk_line_meta *lm = &pblk->lm;
        struct nvm_geo *geo = &dev->geo;
        sector_t provisioned;
-       int sec_meta, blk_meta;
+       int sec_meta, blk_meta, clba;
        int minimum;
 
        if (geo->op == NVM_TARGET_DEFAULT_OP)
@@ -682,7 +709,8 @@ static int pblk_set_provision(struct pblk *pblk, int nr_free_chks)
        sec_meta = (lm->smeta_sec + lm->emeta_sec[0]) * l_mg->nr_free_lines;
        blk_meta = DIV_ROUND_UP(sec_meta, geo->clba);
 
-       pblk->capacity = (provisioned - blk_meta) * geo->clba;
+       clba = (geo->clba / pblk->min_write_pgs) * pblk->min_write_pgs_data;
+       pblk->capacity = (provisioned - blk_meta) * clba;
 
        atomic_set(&pblk->rl.free_blocks, nr_free_chks);
        atomic_set(&pblk->rl.free_user_blocks, nr_free_chks);
index 81e503ec384ea36fed4e6cc4e520e1cd91953d35..79df583ea709d0733712f8d0304b8301d248bb63 100644 (file)
@@ -96,7 +96,7 @@ int pblk_map_rq(struct pblk *pblk, struct nvm_rq *rqd, unsigned int sentry,
                 unsigned long *lun_bitmap, unsigned int valid_secs,
                 unsigned int off)
 {
-       void *meta_list = rqd->meta_list;
+       void *meta_list = pblk_get_meta_for_writes(pblk, rqd);
        void *meta_buffer;
        struct ppa_addr *ppa_list = nvm_rq_to_ppa_list(rqd);
        unsigned int map_secs;
@@ -125,7 +125,7 @@ int pblk_map_erase_rq(struct pblk *pblk, struct nvm_rq *rqd,
        struct nvm_tgt_dev *dev = pblk->dev;
        struct nvm_geo *geo = &dev->geo;
        struct pblk_line_meta *lm = &pblk->lm;
-       void *meta_list = rqd->meta_list;
+       void *meta_list = pblk_get_meta_for_writes(pblk, rqd);
        void *meta_buffer;
        struct ppa_addr *ppa_list = nvm_rq_to_ppa_list(rqd);
        struct pblk_line *e_line, *d_line;
index 9f7fa0fe9c779378926f752807c100dc3aa6063b..d4ca8c64ee0f87f0496bfbed346836319c4e1d85 100644 (file)
@@ -552,6 +552,9 @@ unsigned int pblk_rb_read_to_bio(struct pblk_rb *rb, struct nvm_rq *rqd,
                to_read = count;
        }
 
+       /* Add space for packed metadata if in use*/
+       pad += (pblk->min_write_pgs - pblk->min_write_pgs_data);
+
        c_ctx->sentry = pos;
        c_ctx->nr_valid = to_read;
        c_ctx->nr_padded = pad;
index 6becd85ca4c68b71bc7868cfd792029ae1c1e33e..3789185144dae34241911d3c42308e6cbad650f2 100644 (file)
@@ -112,6 +112,9 @@ static void pblk_read_check_seq(struct pblk *pblk, struct nvm_rq *rqd,
        int nr_lbas = rqd->nr_ppas;
        int i;
 
+       if (!pblk_is_oob_meta_supported(pblk))
+               return;
+
        for (i = 0; i < nr_lbas; i++) {
                struct pblk_sec_meta *meta = pblk_get_meta(pblk, meta_list, i);
                u64 lba = le64_to_cpu(meta->lba);
@@ -141,6 +144,9 @@ static void pblk_read_check_rand(struct pblk *pblk, struct nvm_rq *rqd,
        void *meta_lba_list = rqd->meta_list;
        int i, j;
 
+       if (!pblk_is_oob_meta_supported(pblk))
+               return;
+
        for (i = 0, j = 0; i < nr_lbas; i++) {
                struct pblk_sec_meta *meta = pblk_get_meta(pblk,
                                                           meta_lba_list, j);
index 3a775d10f616d654d374908051abba38dfc88c25..3fcf062d752cfe3cfed93102a97ba9f78bc5c2c4 100644 (file)
@@ -191,7 +191,7 @@ static int pblk_recov_pad_line(struct pblk *pblk, struct pblk_line *line,
        kref_init(&pad_rq->ref);
 
 next_pad_rq:
-       rq_ppas = pblk_calc_secs(pblk, left_ppas, 0);
+       rq_ppas = pblk_calc_secs(pblk, left_ppas, 0, false);
        if (rq_ppas < pblk->min_write_pgs) {
                pblk_err(pblk, "corrupted pad line %d\n", line->id);
                goto fail_free_pad;
@@ -371,17 +371,19 @@ static int pblk_recov_scan_oob(struct pblk *pblk, struct pblk_line *line,
 next_rq:
        memset(rqd, 0, pblk_g_rq_size);
 
-       rq_ppas = pblk_calc_secs(pblk, left_ppas, 0);
+       rq_ppas = pblk_calc_secs(pblk, left_ppas, 0, false);
        if (!rq_ppas)
                rq_ppas = pblk->min_write_pgs;
        rq_len = rq_ppas * geo->csecs;
 
+retry_rq:
        bio = bio_map_kern(dev->q, data, rq_len, GFP_KERNEL);
        if (IS_ERR(bio))
                return PTR_ERR(bio);
 
        bio->bi_iter.bi_sector = 0; /* internal bio */
        bio_set_op_attrs(bio, REQ_OP_READ, 0);
+       bio_get(bio);
 
        rqd->bio = bio;
        rqd->opcode = NVM_OP_PREAD;
@@ -394,7 +396,6 @@ next_rq:
        if (pblk_io_aligned(pblk, rq_ppas))
                rqd->is_seq = 1;
 
-retry_rq:
        for (i = 0; i < rqd->nr_ppas; ) {
                struct ppa_addr ppa;
                int pos;
@@ -417,6 +418,7 @@ retry_rq:
        if (ret) {
                pblk_err(pblk, "I/O submission failed: %d\n", ret);
                bio_put(bio);
+               bio_put(bio);
                return ret;
        }
 
@@ -428,18 +430,25 @@ retry_rq:
 
                if (padded) {
                        pblk_log_read_err(pblk, rqd);
+                       bio_put(bio);
                        return -EINTR;
                }
 
                pad_distance = pblk_pad_distance(pblk, line);
                ret = pblk_recov_pad_line(pblk, line, pad_distance);
-               if (ret)
+               if (ret) {
+                       bio_put(bio);
                        return ret;
+               }
 
                padded = true;
+               bio_put(bio);
                goto retry_rq;
        }
 
+       pblk_get_packed_meta(pblk, rqd);
+       bio_put(bio);
+
        for (i = 0; i < rqd->nr_ppas; i++) {
                struct pblk_sec_meta *meta = pblk_get_meta(pblk, meta_list, i);
                u64 lba = le64_to_cpu(meta->lba);
index 2d2818155aa8a18e42d18de657af08b136659820..7d8958df94721a9949283a7522ac0c9ad8a57c63 100644 (file)
@@ -479,6 +479,13 @@ static ssize_t pblk_sysfs_set_sec_per_write(struct pblk *pblk,
        if (kstrtouint(page, 0, &sec_per_write))
                return -EINVAL;
 
+       if (!pblk_is_oob_meta_supported(pblk)) {
+               /* For packed metadata case it is
+                * not allowed to change sec_per_write.
+                */
+               return -EINVAL;
+       }
+
        if (sec_per_write < pblk->min_write_pgs
                                || sec_per_write > pblk->max_write_pgs
                                || sec_per_write % pblk->min_write_pgs != 0)
index 2bf78f81862de6b308c192b1ef211ea001f86834..06d56deb645d28049862f6cc04a814303b6ed3ad 100644 (file)
@@ -348,7 +348,7 @@ static int pblk_calc_secs_to_sync(struct pblk *pblk, unsigned int secs_avail,
 {
        int secs_to_sync;
 
-       secs_to_sync = pblk_calc_secs(pblk, secs_avail, secs_to_flush);
+       secs_to_sync = pblk_calc_secs(pblk, secs_avail, secs_to_flush, true);
 
 #ifdef CONFIG_NVM_PBLK_DEBUG
        if ((!secs_to_sync && secs_to_flush)
@@ -569,7 +569,7 @@ static int pblk_submit_write(struct pblk *pblk, int *secs_left)
        struct bio *bio;
        struct nvm_rq *rqd;
        unsigned int secs_avail, secs_to_sync, secs_to_com;
-       unsigned int secs_to_flush;
+       unsigned int secs_to_flush, packed_meta_pgs;
        unsigned long pos;
        unsigned int resubmit;
 
@@ -607,7 +607,7 @@ static int pblk_submit_write(struct pblk *pblk, int *secs_left)
                        return 0;
 
                secs_to_flush = pblk_rb_flush_point_count(&pblk->rwb);
-               if (!secs_to_flush && secs_avail < pblk->min_write_pgs)
+               if (!secs_to_flush && secs_avail < pblk->min_write_pgs_data)
                        return 0;
 
                secs_to_sync = pblk_calc_secs_to_sync(pblk, secs_avail,
@@ -622,7 +622,8 @@ static int pblk_submit_write(struct pblk *pblk, int *secs_left)
                pos = pblk_rb_read_commit(&pblk->rwb, secs_to_com);
        }
 
-       bio = bio_alloc(GFP_KERNEL, secs_to_sync);
+       packed_meta_pgs = (pblk->min_write_pgs - pblk->min_write_pgs_data);
+       bio = bio_alloc(GFP_KERNEL, secs_to_sync + packed_meta_pgs);
 
        bio->bi_iter.bi_sector = 0; /* internal bio */
        bio_set_op_attrs(bio, REQ_OP_WRITE, 0);
index 9087d53d5c2584994a13230651b6d2760194928e..bc40b1381ff6c4e1c955f14d4435e6abef840771 100644 (file)
@@ -632,6 +632,7 @@ struct pblk {
        int state;                      /* pblk line state */
 
        int min_write_pgs; /* Minimum amount of pages required by controller */
+       int min_write_pgs_data; /* Minimum amount of payload pages */
        int max_write_pgs; /* Maximum amount of pages supported by controller */
        int oob_meta_size; /* Size of OOB sector metadata */
 
@@ -838,7 +839,7 @@ void pblk_dealloc_page(struct pblk *pblk, struct pblk_line *line, int nr_secs);
 u64 pblk_alloc_page(struct pblk *pblk, struct pblk_line *line, int nr_secs);
 u64 __pblk_alloc_page(struct pblk *pblk, struct pblk_line *line, int nr_secs);
 int pblk_calc_secs(struct pblk *pblk, unsigned long secs_avail,
-                  unsigned long secs_to_flush);
+                  unsigned long secs_to_flush, bool skip_meta);
 void pblk_down_rq(struct pblk *pblk, struct ppa_addr ppa,
                  unsigned long *lun_bitmap);
 void pblk_down_chunk(struct pblk *pblk, struct ppa_addr ppa);
@@ -862,6 +863,8 @@ void pblk_lookup_l2p_rand(struct pblk *pblk, struct ppa_addr *ppas,
                          u64 *lba_list, int nr_secs);
 void pblk_lookup_l2p_seq(struct pblk *pblk, struct ppa_addr *ppas,
                         sector_t blba, int nr_secs);
+void *pblk_get_meta_for_writes(struct pblk *pblk, struct nvm_rq *rqd);
+void pblk_get_packed_meta(struct pblk *pblk, struct nvm_rq *rqd);
 
 /*
  * pblk user I/O write path
@@ -1392,4 +1395,9 @@ static inline int pblk_dma_meta_size(struct pblk *pblk)
 {
        return pblk->oob_meta_size * NVM_MAX_VLBA;
 }
+
+static inline int pblk_is_oob_meta_supported(struct pblk *pblk)
+{
+       return pblk->oob_meta_size >= sizeof(struct pblk_sec_meta);
+}
 #endif /* PBLK_H_ */