diff options
author | Linus Torvalds <torvalds@linux-foundation.org> | 2017-11-14 15:32:19 -0800 |
---|---|---|
committer | Linus Torvalds <torvalds@linux-foundation.org> | 2017-11-14 15:32:19 -0800 |
commit | e2c5923c349c1738fe8fda980874d93f6fb2e5b6 (patch) | |
tree | b97a90170c45211bcc437761653aa8016c34afcd /drivers/lightnvm | |
parent | abc36be236358162202e86ad88616ff95a755101 (diff) | |
parent | a04b5de5050ab8b891128eb2c47a0916fe8622e1 (diff) | |
download | linux-e2c5923c349c1738fe8fda980874d93f6fb2e5b6.tar.gz linux-e2c5923c349c1738fe8fda980874d93f6fb2e5b6.tar.bz2 linux-e2c5923c349c1738fe8fda980874d93f6fb2e5b6.zip |
Merge branch 'for-4.15/block' of git://git.kernel.dk/linux-block
Pull core block layer updates from Jens Axboe:
"This is the main pull request for block storage for 4.15-rc1.
Nothing out of the ordinary in here, and no API changes or anything
like that. Just various new features for drivers, core changes, etc.
In particular, this pull request contains:
- A patch series from Bart, closing the whole on blk/scsi-mq queue
quescing.
- A series from Christoph, building towards hidden gendisks (for
multipath) and ability to move bio chains around.
- NVMe
- Support for native multipath for NVMe (Christoph).
- Userspace notifications for AENs (Keith).
- Command side-effects support (Keith).
- SGL support (Chaitanya Kulkarni)
- FC fixes and improvements (James Smart)
- Lots of fixes and tweaks (Various)
- bcache
- New maintainer (Michael Lyle)
- Writeback control improvements (Michael)
- Various fixes (Coly, Elena, Eric, Liang, et al)
- lightnvm updates, mostly centered around the pblk interface
(Javier, Hans, and Rakesh).
- Removal of unused bio/bvec kmap atomic interfaces (me, Christoph)
- Writeback series that fix the much discussed hundreds of millions
of sync-all units. This goes all the way, as discussed previously
(me).
- Fix for missing wakeup on writeback timer adjustments (Yafang
Shao).
- Fix laptop mode on blk-mq (me).
- {mq,name} tupple lookup for IO schedulers, allowing us to have
alias names. This means you can use 'deadline' on both !mq and on
mq (where it's called mq-deadline). (me).
- blktrace race fix, oopsing on sg load (me).
- blk-mq optimizations (me).
- Obscure waitqueue race fix for kyber (Omar).
- NBD fixes (Josef).
- Disable writeback throttling by default on bfq, like we do on cfq
(Luca Miccio).
- Series from Ming that enable us to treat flush requests on blk-mq
like any other request. This is a really nice cleanup.
- Series from Ming that improves merging on blk-mq with schedulers,
getting us closer to flipping the switch on scsi-mq again.
- BFQ updates (Paolo).
- blk-mq atomic flags memory ordering fixes (Peter Z).
- Loop cgroup support (Shaohua).
- Lots of minor fixes from lots of different folks, both for core and
driver code"
* 'for-4.15/block' of git://git.kernel.dk/linux-block: (294 commits)
nvme: fix visibility of "uuid" ns attribute
blk-mq: fixup some comment typos and lengths
ide: ide-atapi: fix compile error with defining macro DEBUG
blk-mq: improve tag waiting setup for non-shared tags
brd: remove unused brd_mutex
blk-mq: only run the hardware queue if IO is pending
block: avoid null pointer dereference on null disk
fs: guard_bio_eod() needs to consider partitions
xtensa/simdisk: fix compile error
nvme: expose subsys attribute to sysfs
nvme: create 'slaves' and 'holders' entries for hidden controllers
block: create 'slaves' and 'holders' entries for hidden gendisks
nvme: also expose the namespace identification sysfs files for mpath nodes
nvme: implement multipath access to nvme subsystems
nvme: track shared namespaces
nvme: introduce a nvme_ns_ids structure
nvme: track subsystems
block, nvme: Introduce blk_mq_req_flags_t
block, scsi: Make SCSI quiesce and resume work reliably
block: Add the QUEUE_FLAG_PREEMPT_ONLY request queue flag
...
Diffstat (limited to 'drivers/lightnvm')
-rw-r--r-- | drivers/lightnvm/Kconfig | 3 | ||||
-rw-r--r-- | drivers/lightnvm/core.c | 176 | ||||
-rw-r--r-- | drivers/lightnvm/pblk-cache.c | 24 | ||||
-rw-r--r-- | drivers/lightnvm/pblk-core.c | 512 | ||||
-rw-r--r-- | drivers/lightnvm/pblk-gc.c | 289 | ||||
-rw-r--r-- | drivers/lightnvm/pblk-init.c | 197 | ||||
-rw-r--r-- | drivers/lightnvm/pblk-map.c | 28 | ||||
-rw-r--r-- | drivers/lightnvm/pblk-rb.c | 30 | ||||
-rw-r--r-- | drivers/lightnvm/pblk-read.c | 274 | ||||
-rw-r--r-- | drivers/lightnvm/pblk-recovery.c | 129 | ||||
-rw-r--r-- | drivers/lightnvm/pblk-rl.c | 43 | ||||
-rw-r--r-- | drivers/lightnvm/pblk-sysfs.c | 2 | ||||
-rw-r--r-- | drivers/lightnvm/pblk-write.c | 229 | ||||
-rw-r--r-- | drivers/lightnvm/pblk.h | 132 |
14 files changed, 1079 insertions, 989 deletions
diff --git a/drivers/lightnvm/Kconfig b/drivers/lightnvm/Kconfig index ead61a93cb4e..2a953efec4e1 100644 --- a/drivers/lightnvm/Kconfig +++ b/drivers/lightnvm/Kconfig @@ -4,7 +4,8 @@ menuconfig NVM bool "Open-Channel SSD target support" - depends on BLOCK && HAS_DMA + depends on BLOCK && HAS_DMA && PCI + select BLK_DEV_NVME help Say Y here to get to enable Open-channel SSDs. diff --git a/drivers/lightnvm/core.c b/drivers/lightnvm/core.c index ddae430b6eae..83249b43dd06 100644 --- a/drivers/lightnvm/core.c +++ b/drivers/lightnvm/core.c @@ -22,6 +22,7 @@ #include <linux/types.h> #include <linux/sem.h> #include <linux/bitmap.h> +#include <linux/module.h> #include <linux/moduleparam.h> #include <linux/miscdevice.h> #include <linux/lightnvm.h> @@ -138,7 +139,6 @@ static struct nvm_tgt_dev *nvm_create_tgt_dev(struct nvm_dev *dev, int prev_nr_luns; int i, j; - nr_chnls = nr_luns / dev->geo.luns_per_chnl; nr_chnls = (nr_chnls_mod == 0) ? nr_chnls : nr_chnls + 1; dev_map = kmalloc(sizeof(struct nvm_dev_map), GFP_KERNEL); @@ -226,6 +226,24 @@ static const struct block_device_operations nvm_fops = { .owner = THIS_MODULE, }; +static struct nvm_tgt_type *nvm_find_target_type(const char *name, int lock) +{ + struct nvm_tgt_type *tmp, *tt = NULL; + + if (lock) + down_write(&nvm_tgtt_lock); + + list_for_each_entry(tmp, &nvm_tgt_types, list) + if (!strcmp(name, tmp->name)) { + tt = tmp; + break; + } + + if (lock) + up_write(&nvm_tgtt_lock); + return tt; +} + static int nvm_create_tgt(struct nvm_dev *dev, struct nvm_ioctl_create *create) { struct nvm_ioctl_create_simple *s = &create->conf.s; @@ -316,6 +334,8 @@ static int nvm_create_tgt(struct nvm_dev *dev, struct nvm_ioctl_create *create) list_add_tail(&t->list, &dev->targets); mutex_unlock(&dev->mlock); + __module_get(tt->owner); + return 0; err_sysfs: if (tt->exit) @@ -351,6 +371,7 @@ static void __nvm_remove_target(struct nvm_target *t) nvm_remove_tgt_dev(t->dev, 1); put_disk(tdisk); + module_put(t->type->owner); list_del(&t->list); kfree(t); @@ -532,25 +553,6 @@ void nvm_part_to_tgt(struct nvm_dev *dev, sector_t *entries, } EXPORT_SYMBOL(nvm_part_to_tgt); -struct nvm_tgt_type *nvm_find_target_type(const char *name, int lock) -{ - struct nvm_tgt_type *tmp, *tt = NULL; - - if (lock) - down_write(&nvm_tgtt_lock); - - list_for_each_entry(tmp, &nvm_tgt_types, list) - if (!strcmp(name, tmp->name)) { - tt = tmp; - break; - } - - if (lock) - up_write(&nvm_tgtt_lock); - return tt; -} -EXPORT_SYMBOL(nvm_find_target_type); - int nvm_register_tgt_type(struct nvm_tgt_type *tt) { int ret = 0; @@ -571,9 +573,9 @@ void nvm_unregister_tgt_type(struct nvm_tgt_type *tt) if (!tt) return; - down_write(&nvm_lock); + down_write(&nvm_tgtt_lock); list_del(&tt->list); - up_write(&nvm_lock); + up_write(&nvm_tgtt_lock); } EXPORT_SYMBOL(nvm_unregister_tgt_type); @@ -602,6 +604,52 @@ static struct nvm_dev *nvm_find_nvm_dev(const char *name) return NULL; } +static int nvm_set_rqd_ppalist(struct nvm_tgt_dev *tgt_dev, struct nvm_rq *rqd, + const struct ppa_addr *ppas, int nr_ppas) +{ + struct nvm_dev *dev = tgt_dev->parent; + struct nvm_geo *geo = &tgt_dev->geo; + int i, plane_cnt, pl_idx; + struct ppa_addr ppa; + + if (geo->plane_mode == NVM_PLANE_SINGLE && nr_ppas == 1) { + rqd->nr_ppas = nr_ppas; + rqd->ppa_addr = ppas[0]; + + return 0; + } + + rqd->nr_ppas = nr_ppas; + rqd->ppa_list = nvm_dev_dma_alloc(dev, GFP_KERNEL, &rqd->dma_ppa_list); + if (!rqd->ppa_list) { + pr_err("nvm: failed to allocate dma memory\n"); + return -ENOMEM; + } + + plane_cnt = geo->plane_mode; + rqd->nr_ppas *= plane_cnt; + + for (i = 0; i < nr_ppas; i++) { + for (pl_idx = 0; pl_idx < plane_cnt; pl_idx++) { + ppa = ppas[i]; + ppa.g.pl = pl_idx; + rqd->ppa_list[(pl_idx * nr_ppas) + i] = ppa; + } + } + + return 0; +} + +static void nvm_free_rqd_ppalist(struct nvm_tgt_dev *tgt_dev, + struct nvm_rq *rqd) +{ + if (!rqd->ppa_list) + return; + + nvm_dev_dma_free(tgt_dev->parent, rqd->ppa_list, rqd->dma_ppa_list); +} + + int nvm_set_tgt_bb_tbl(struct nvm_tgt_dev *tgt_dev, struct ppa_addr *ppas, int nr_ppas, int type) { @@ -616,7 +664,7 @@ int nvm_set_tgt_bb_tbl(struct nvm_tgt_dev *tgt_dev, struct ppa_addr *ppas, memset(&rqd, 0, sizeof(struct nvm_rq)); - nvm_set_rqd_ppalist(tgt_dev, &rqd, ppas, nr_ppas, 1); + nvm_set_rqd_ppalist(tgt_dev, &rqd, ppas, nr_ppas); nvm_rq_tgt_to_dev(tgt_dev, &rqd); ret = dev->ops->set_bb_tbl(dev, &rqd.ppa_addr, rqd.nr_ppas, type); @@ -658,12 +706,25 @@ int nvm_submit_io(struct nvm_tgt_dev *tgt_dev, struct nvm_rq *rqd) } EXPORT_SYMBOL(nvm_submit_io); -static void nvm_end_io_sync(struct nvm_rq *rqd) +int nvm_submit_io_sync(struct nvm_tgt_dev *tgt_dev, struct nvm_rq *rqd) { - struct completion *waiting = rqd->private; + struct nvm_dev *dev = tgt_dev->parent; + int ret; - complete(waiting); + if (!dev->ops->submit_io_sync) + return -ENODEV; + + nvm_rq_tgt_to_dev(tgt_dev, rqd); + + rqd->dev = tgt_dev; + + /* In case of error, fail with right address format */ + ret = dev->ops->submit_io_sync(dev, rqd); + nvm_rq_dev_to_tgt(tgt_dev, rqd); + + return ret; } +EXPORT_SYMBOL(nvm_submit_io_sync); int nvm_erase_sync(struct nvm_tgt_dev *tgt_dev, struct ppa_addr *ppas, int nr_ppas) @@ -671,25 +732,21 @@ int nvm_erase_sync(struct nvm_tgt_dev *tgt_dev, struct ppa_addr *ppas, struct nvm_geo *geo = &tgt_dev->geo; struct nvm_rq rqd; int ret; - DECLARE_COMPLETION_ONSTACK(wait); memset(&rqd, 0, sizeof(struct nvm_rq)); rqd.opcode = NVM_OP_ERASE; - rqd.end_io = nvm_end_io_sync; - rqd.private = &wait; rqd.flags = geo->plane_mode >> 1; - ret = nvm_set_rqd_ppalist(tgt_dev, &rqd, ppas, nr_ppas, 1); + ret = nvm_set_rqd_ppalist(tgt_dev, &rqd, ppas, nr_ppas); if (ret) return ret; - ret = nvm_submit_io(tgt_dev, &rqd); + ret = nvm_submit_io_sync(tgt_dev, &rqd); if (ret) { pr_err("rrpr: erase I/O submission failed: %d\n", ret); goto free_ppa_list; } - wait_for_completion_io(&wait); free_ppa_list: nvm_free_rqd_ppalist(tgt_dev, &rqd); @@ -775,57 +832,6 @@ void nvm_put_area(struct nvm_tgt_dev *tgt_dev, sector_t begin) } EXPORT_SYMBOL(nvm_put_area); -int nvm_set_rqd_ppalist(struct nvm_tgt_dev *tgt_dev, struct nvm_rq *rqd, - const struct ppa_addr *ppas, int nr_ppas, int vblk) -{ - struct nvm_dev *dev = tgt_dev->parent; - struct nvm_geo *geo = &tgt_dev->geo; - int i, plane_cnt, pl_idx; - struct ppa_addr ppa; - - if ((!vblk || geo->plane_mode == NVM_PLANE_SINGLE) && nr_ppas == 1) { - rqd->nr_ppas = nr_ppas; - rqd->ppa_addr = ppas[0]; - - return 0; - } - - rqd->nr_ppas = nr_ppas; - rqd->ppa_list = nvm_dev_dma_alloc(dev, GFP_KERNEL, &rqd->dma_ppa_list); - if (!rqd->ppa_list) { - pr_err("nvm: failed to allocate dma memory\n"); - return -ENOMEM; - } - - if (!vblk) { - for (i = 0; i < nr_ppas; i++) - rqd->ppa_list[i] = ppas[i]; - } else { - plane_cnt = geo->plane_mode; - rqd->nr_ppas *= plane_cnt; - - for (i = 0; i < nr_ppas; i++) { - for (pl_idx = 0; pl_idx < plane_cnt; pl_idx++) { - ppa = ppas[i]; - ppa.g.pl = pl_idx; - rqd->ppa_list[(pl_idx * nr_ppas) + i] = ppa; - } - } - } - - return 0; -} -EXPORT_SYMBOL(nvm_set_rqd_ppalist); - -void nvm_free_rqd_ppalist(struct nvm_tgt_dev *tgt_dev, struct nvm_rq *rqd) -{ - if (!rqd->ppa_list) - return; - - nvm_dev_dma_free(tgt_dev->parent, rqd->ppa_list, rqd->dma_ppa_list); -} -EXPORT_SYMBOL(nvm_free_rqd_ppalist); - void nvm_end_io(struct nvm_rq *rqd) { struct nvm_tgt_dev *tgt_dev = rqd->dev; @@ -1177,7 +1183,7 @@ static long nvm_ioctl_info(struct file *file, void __user *arg) info->version[1] = NVM_VERSION_MINOR; info->version[2] = NVM_VERSION_PATCH; - down_write(&nvm_lock); + down_write(&nvm_tgtt_lock); list_for_each_entry(tt, &nvm_tgt_types, list) { struct nvm_ioctl_info_tgt *tgt = &info->tgts[tgt_iter]; @@ -1190,7 +1196,7 @@ static long nvm_ioctl_info(struct file *file, void __user *arg) } info->tgtsize = tgt_iter; - up_write(&nvm_lock); + up_write(&nvm_tgtt_lock); if (copy_to_user(arg, info, sizeof(struct nvm_ioctl_info))) { kfree(info); diff --git a/drivers/lightnvm/pblk-cache.c b/drivers/lightnvm/pblk-cache.c index 024a8fc93069..0d227ef7d1b9 100644 --- a/drivers/lightnvm/pblk-cache.c +++ b/drivers/lightnvm/pblk-cache.c @@ -43,8 +43,10 @@ retry: if (unlikely(!bio_has_data(bio))) goto out; - w_ctx.flags = flags; pblk_ppa_set_empty(&w_ctx.ppa); + w_ctx.flags = flags; + if (bio->bi_opf & REQ_PREFLUSH) + w_ctx.flags |= PBLK_FLUSH_ENTRY; for (i = 0; i < nr_entries; i++) { void *data = bio_data(bio); @@ -73,12 +75,11 @@ out: * On GC the incoming lbas are not necessarily sequential. Also, some of the * lbas might not be valid entries, which are marked as empty by the GC thread */ -int pblk_write_gc_to_cache(struct pblk *pblk, void *data, u64 *lba_list, - unsigned int nr_entries, unsigned int nr_rec_entries, - struct pblk_line *gc_line, unsigned long flags) +int pblk_write_gc_to_cache(struct pblk *pblk, struct pblk_gc_rq *gc_rq) { struct pblk_w_ctx w_ctx; unsigned int bpos, pos; + void *data = gc_rq->data; int i, valid_entries; /* Update the write buffer head (mem) with the entries that we can @@ -86,28 +87,29 @@ int pblk_write_gc_to_cache(struct pblk *pblk, void *data, u64 *lba_list, * rollback from here on. */ retry: - if (!pblk_rb_may_write_gc(&pblk->rwb, nr_rec_entries, &bpos)) { + if (!pblk_rb_may_write_gc(&pblk->rwb, gc_rq->secs_to_gc, &bpos)) { io_schedule(); goto retry; } - w_ctx.flags = flags; + w_ctx.flags = PBLK_IOTYPE_GC; pblk_ppa_set_empty(&w_ctx.ppa); - for (i = 0, valid_entries = 0; i < nr_entries; i++) { - if (lba_list[i] == ADDR_EMPTY) + for (i = 0, valid_entries = 0; i < gc_rq->nr_secs; i++) { + if (gc_rq->lba_list[i] == ADDR_EMPTY) continue; - w_ctx.lba = lba_list[i]; + w_ctx.lba = gc_rq->lba_list[i]; pos = pblk_rb_wrap_pos(&pblk->rwb, bpos + valid_entries); - pblk_rb_write_entry_gc(&pblk->rwb, data, w_ctx, gc_line, pos); + pblk_rb_write_entry_gc(&pblk->rwb, data, w_ctx, gc_rq->line, + gc_rq->paddr_list[i], pos); data += PBLK_EXPOSED_PAGE_SIZE; valid_entries++; } - WARN_ONCE(nr_rec_entries != valid_entries, + WARN_ONCE(gc_rq->secs_to_gc != valid_entries, "pblk: inconsistent GC write\n"); #ifdef CONFIG_NVM_DEBUG diff --git a/drivers/lightnvm/pblk-core.c b/drivers/lightnvm/pblk-core.c index 81501644fb15..ce90213a42fa 100644 --- a/drivers/lightnvm/pblk-core.c +++ b/drivers/lightnvm/pblk-core.c @@ -18,6 +18,31 @@ #include "pblk.h" +static void pblk_line_mark_bb(struct work_struct *work) +{ + struct pblk_line_ws *line_ws = container_of(work, struct pblk_line_ws, + ws); + struct pblk *pblk = line_ws->pblk; + struct nvm_tgt_dev *dev = pblk->dev; + struct ppa_addr *ppa = line_ws->priv; + int ret; + + ret = nvm_set_tgt_bb_tbl(dev, ppa, 1, NVM_BLK_T_GRWN_BAD); + if (ret) { + struct pblk_line *line; + int pos; + + line = &pblk->lines[pblk_dev_ppa_to_line(*ppa)]; + pos = pblk_dev_ppa_to_pos(&dev->geo, *ppa); + + pr_err("pblk: failed to mark bb, line:%d, pos:%d\n", + line->id, pos); + } + + kfree(ppa); + mempool_free(line_ws, pblk->gen_ws_pool); +} + static void pblk_mark_bb(struct pblk *pblk, struct pblk_line *line, struct ppa_addr *ppa) { @@ -33,7 +58,8 @@ static void pblk_mark_bb(struct pblk *pblk, struct pblk_line *line, pr_err("pblk: attempted to erase bb: line:%d, pos:%d\n", line->id, pos); - pblk_line_run_ws(pblk, NULL, ppa, pblk_line_mark_bb, pblk->bb_wq); + pblk_gen_run_ws(pblk, NULL, ppa, pblk_line_mark_bb, + GFP_ATOMIC, pblk->bb_wq); } static void __pblk_end_io_erase(struct pblk *pblk, struct nvm_rq *rqd) @@ -63,7 +89,7 @@ static void pblk_end_io_erase(struct nvm_rq *rqd) struct pblk *pblk = rqd->private; __pblk_end_io_erase(pblk, rqd); - mempool_free(rqd, pblk->g_rq_pool); + mempool_free(rqd, pblk->e_rq_pool); } void __pblk_map_invalidate(struct pblk *pblk, struct pblk_line *line, @@ -77,11 +103,7 @@ void __pblk_map_invalidate(struct pblk *pblk, struct pblk_line *line, * that newer updates are not overwritten. */ spin_lock(&line->lock); - if (line->state == PBLK_LINESTATE_GC || - line->state == PBLK_LINESTATE_FREE) { - spin_unlock(&line->lock); - return; - } + WARN_ON(line->state == PBLK_LINESTATE_FREE); if (test_and_set_bit(paddr, line->invalid_bitmap)) { WARN_ONCE(1, "pblk: double invalidate\n"); @@ -98,8 +120,7 @@ void __pblk_map_invalidate(struct pblk *pblk, struct pblk_line *line, spin_lock(&l_mg->gc_lock); spin_lock(&line->lock); /* Prevent moving a line that has just been chosen for GC */ - if (line->state == PBLK_LINESTATE_GC || - line->state == PBLK_LINESTATE_FREE) { + if (line->state == PBLK_LINESTATE_GC) { spin_unlock(&line->lock); spin_unlock(&l_mg->gc_lock); return; @@ -150,17 +171,25 @@ static void pblk_invalidate_range(struct pblk *pblk, sector_t slba, spin_unlock(&pblk->trans_lock); } -struct nvm_rq *pblk_alloc_rqd(struct pblk *pblk, int rw) +/* Caller must guarantee that the request is a valid type */ +struct nvm_rq *pblk_alloc_rqd(struct pblk *pblk, int type) { mempool_t *pool; struct nvm_rq *rqd; int rq_size; - if (rw == WRITE) { + switch (type) { + case PBLK_WRITE: + case PBLK_WRITE_INT: pool = pblk->w_rq_pool; rq_size = pblk_w_rq_size; - } else { - pool = pblk->g_rq_pool; + break; + case PBLK_READ: + pool = pblk->r_rq_pool; + rq_size = pblk_g_rq_size; + break; + default: + pool = pblk->e_rq_pool; rq_size = pblk_g_rq_size; } @@ -170,15 +199,30 @@ struct nvm_rq *pblk_alloc_rqd(struct pblk *pblk, int rw) return rqd; } -void pblk_free_rqd(struct pblk *pblk, struct nvm_rq *rqd, int rw) +/* Typically used on completion path. Cannot guarantee request consistency */ +void pblk_free_rqd(struct pblk *pblk, struct nvm_rq *rqd, int type) { + struct nvm_tgt_dev *dev = pblk->dev; mempool_t *pool; - if (rw == WRITE) + switch (type) { + case PBLK_WRITE: + kfree(((struct pblk_c_ctx *)nvm_rq_to_pdu(rqd))->lun_bitmap); + case PBLK_WRITE_INT: pool = pblk->w_rq_pool; - else - pool = pblk->g_rq_pool; + break; + case PBLK_READ: + pool = pblk->r_rq_pool; + break; + case PBLK_ERASE: + pool = pblk->e_rq_pool; + break; + default: + pr_err("pblk: trying to free unknown rqd type\n"); + return; + } + nvm_dev_dma_free(dev->parent, rqd->meta_list, rqd->dma_meta_list); mempool_free(rqd, pool); } @@ -190,10 +234,9 @@ void pblk_bio_free_pages(struct pblk *pblk, struct bio *bio, int off, WARN_ON(off + nr_pages != bio->bi_vcnt); - bio_advance(bio, off * PBLK_EXPOSED_PAGE_SIZE); for (i = off; i < nr_pages + off; i++) { bv = bio->bi_io_vec[i]; - mempool_free(bv.bv_page, pblk->page_pool); + mempool_free(bv.bv_page, pblk->page_bio_pool); } } @@ -205,14 +248,12 @@ int pblk_bio_add_pages(struct pblk *pblk, struct bio *bio, gfp_t flags, int i, ret; for (i = 0; i < nr_pages; i++) { - page = mempool_alloc(pblk->page_pool, flags); - if (!page) - goto err; + page = mempool_alloc(pblk->page_bio_pool, flags); ret = bio_add_pc_page(q, bio, page, PBLK_EXPOSED_PAGE_SIZE, 0); if (ret != PBLK_EXPOSED_PAGE_SIZE) { pr_err("pblk: could not add page to bio\n"); - mempool_free(page, pblk->page_pool); + mempool_free(page, pblk->page_bio_pool); goto err; } } @@ -245,13 +286,6 @@ void pblk_write_should_kick(struct pblk *pblk) pblk_write_kick(pblk); } -void pblk_end_bio_sync(struct bio *bio) -{ - struct completion *waiting = bio->bi_private; - - complete(waiting); -} - void pblk_end_io_sync(struct nvm_rq *rqd) { struct completion *waiting = rqd->private; @@ -259,7 +293,7 @@ void pblk_end_io_sync(struct nvm_rq *rqd) complete(waiting); } -void pblk_wait_for_meta(struct pblk *pblk) +static void pblk_wait_for_meta(struct pblk *pblk) { do { if (!atomic_read(&pblk->inflight_io)) @@ -336,17 +370,6 @@ void pblk_discard(struct pblk *pblk, struct bio *bio) pblk_invalidate_range(pblk, slba, nr_secs); } -struct ppa_addr pblk_get_lba_map(struct pblk *pblk, sector_t lba) -{ - struct ppa_addr ppa; - - spin_lock(&pblk->trans_lock); - ppa = pblk_trans_map_get(pblk, lba); - spin_unlock(&pblk->trans_lock); - - return ppa; -} - void pblk_log_write_err(struct pblk *pblk, struct nvm_rq *rqd) { atomic_long_inc(&pblk->write_failed); @@ -389,39 +412,38 @@ int pblk_submit_io(struct pblk *pblk, struct nvm_rq *rqd) struct nvm_tgt_dev *dev = pblk->dev; #ifdef CONFIG_NVM_DEBUG - struct ppa_addr *ppa_list; + int ret; - ppa_list = (rqd->nr_ppas > 1) ? rqd->ppa_list : &rqd->ppa_addr; - if (pblk_boundary_ppa_checks(dev, ppa_list, rqd->nr_ppas)) { - WARN_ON(1); - return -EINVAL; - } + ret = pblk_check_io(pblk, rqd); + if (ret) + return ret; +#endif - if (rqd->opcode == NVM_OP_PWRITE) { - struct pblk_line *line; - struct ppa_addr ppa; - int i; + atomic_inc(&pblk->inflight_io); - for (i = 0; i < rqd->nr_ppas; i++) { - ppa = ppa_list[i]; - line = &pblk->lines[pblk_dev_ppa_to_line(ppa)]; + return nvm_submit_io(dev, rqd); +} - spin_lock(&line->lock); - if (line->state != PBLK_LINESTATE_OPEN) { - pr_err("pblk: bad ppa: line:%d,state:%d\n", - line->id, line->state); - WARN_ON(1); - spin_unlock(&line->lock); - return -EINVAL; - } - spin_unlock(&line->lock); - } - } +int pblk_submit_io_sync(struct pblk *pblk, struct nvm_rq *rqd) +{ + struct nvm_tgt_dev *dev = pblk->dev; + +#ifdef CONFIG_NVM_DEBUG + int ret; + + ret = pblk_check_io(pblk, rqd); + if (ret) + return ret; #endif atomic_inc(&pblk->inflight_io); - return nvm_submit_io(dev, rqd); + return nvm_submit_io_sync(dev, rqd); +} + +static void pblk_bio_map_addr_endio(struct bio *bio) +{ + bio_put(bio); } struct bio *pblk_bio_map_addr(struct pblk *pblk, void *data, @@ -460,6 +482,8 @@ struct bio *pblk_bio_map_addr(struct pblk *pblk, void *data, kaddr += PAGE_SIZE; } + + bio->bi_end_io = pblk_bio_map_addr_endio; out: return bio; } @@ -486,12 +510,14 @@ void pblk_dealloc_page(struct pblk *pblk, struct pblk_line *line, int nr_secs) u64 addr; int i; + spin_lock(&line->lock); addr = find_next_zero_bit(line->map_bitmap, pblk->lm.sec_per_line, line->cur_sec); line->cur_sec = addr - nr_secs; for (i = 0; i < nr_secs; i++, line->cur_sec--) WARN_ON(!test_and_clear_bit(line->cur_sec, line->map_bitmap)); + spin_unlock(&line->lock); } u64 __pblk_alloc_page(struct pblk *pblk, struct pblk_line *line, int nr_secs) @@ -565,12 +591,11 @@ static int pblk_line_submit_emeta_io(struct pblk *pblk, struct pblk_line *line, int cmd_op, bio_op; int i, j; int ret; - DECLARE_COMPLETION_ONSTACK(wait); - if (dir == WRITE) { + if (dir == PBLK_WRITE) { bio_op = REQ_OP_WRITE; cmd_op = NVM_OP_PWRITE; - } else if (dir == READ) { + } else if (dir == PBLK_READ) { bio_op = REQ_OP_READ; cmd_op = NVM_OP_PREAD; } else @@ -607,13 +632,11 @@ next_rq: rqd.dma_ppa_list = dma_ppa_list; rqd.opcode = cmd_op; rqd.nr_ppas = rq_ppas; - rqd.end_io = pblk_end_io_sync; - rqd.private = &wait; - if (dir == WRITE) { + if (dir == PBLK_WRITE) { struct pblk_sec_meta *meta_list = rqd.meta_list; - rqd.flags = pblk_set_progr_mode(pblk, WRITE); + rqd.flags = pblk_set_progr_mode(pblk, PBLK_WRITE); for (i = 0; i < rqd.nr_ppas; ) { spin_lock(&line->lock); paddr = __pblk_alloc_page(pblk, line, min); @@ -662,25 +685,17 @@ next_rq: } } - ret = pblk_submit_io(pblk, &rqd); + ret = pblk_submit_io_sync(pblk, &rqd); if (ret) { pr_err("pblk: emeta I/O submission failed: %d\n", ret); bio_put(bio); goto free_rqd_dma; } - if (!wait_for_completion_io_timeout(&wait, - msecs_to_jiffies(PBLK_COMMAND_TIMEOUT_MS))) { - pr_err("pblk: emeta I/O timed out\n"); - } atomic_dec(&pblk->inflight_io); - reinit_completion(&wait); - - if (likely(pblk->l_mg.emeta_alloc_type == PBLK_VMALLOC_META)) - bio_put(bio); if (rqd.error) { - if (dir == WRITE) + if (dir == PBLK_WRITE) pblk_log_write_err(pblk, &rqd); else pblk_log_read_err(pblk, &rqd); @@ -721,14 +736,13 @@ static int pblk_line_submit_smeta_io(struct pblk *pblk, struct pblk_line *line, int i, ret; int cmd_op, bio_op; int flags; - DECLARE_COMPLETION_ONSTACK(wait); - if (dir == WRITE) { + if (dir == PBLK_WRITE) { bio_op = REQ_OP_WRITE; cmd_op = NVM_OP_PWRITE; - flags = pblk_set_progr_mode(pblk, WRITE); + flags = pblk_set_progr_mode(pblk, PBLK_WRITE); lba_list = emeta_to_lbas(pblk, line->emeta->buf); - } else if (dir == READ) { + } else if (dir == PBLK_READ) { bio_op = REQ_OP_READ; cmd_op = NVM_OP_PREAD; flags = pblk_set_read_mode(pblk, PBLK_READ_SEQUENTIAL); @@ -758,15 +772,13 @@ static int pblk_line_submit_smeta_io(struct pblk *pblk, struct pblk_line *line, rqd.opcode = cmd_op; rqd.flags = flags; rqd.nr_ppas = lm->smeta_sec; - rqd.end_io = pblk_end_io_sync; - rqd.private = &wait; for (i = 0; i < lm->smeta_sec; i++, paddr++) { struct pblk_sec_meta *meta_list = rqd.meta_list; rqd.ppa_list[i] = addr_to_gen_ppa(pblk, paddr, line->id); - if (dir == WRITE) { + if (dir == PBLK_WRITE) { __le64 addr_empty = cpu_to_le64(ADDR_EMPTY); meta_list[i].lba = lba_list[paddr] = addr_empty; @@ -778,21 +790,17 @@ static int pblk_line_submit_smeta_io(struct pblk *pblk, struct pblk_line *line, * the write thread is the only one sending write and erase commands, * there is no need to take the LUN semaphore. */ - ret = pblk_submit_io(pblk, &rqd); + ret = pblk_submit_io_sync(pblk, &rqd); if (ret) { pr_err("pblk: smeta I/O submission failed: %d\n", ret); bio_put(bio); goto free_ppa_list; } - if (!wait_for_completion_io_timeout(&wait, - msecs_to_jiffies(PBLK_COMMAND_TIMEOUT_MS))) { - pr_err("pblk: smeta I/O timed out\n"); - } atomic_dec(&pblk->inflight_io); if (rqd.error) { - if (dir == WRITE) + if (dir == PBLK_WRITE) pblk_log_write_err(pblk, &rqd); else pblk_log_read_err(pblk, &rqd); @@ -808,14 +816,14 @@ int pblk_line_read_smeta(struct pblk *pblk, struct pblk_line *line) { u64 bpaddr = pblk_line_smeta_start(pblk, line); - return pblk_line_submit_smeta_io(pblk, line, bpaddr, READ); + return pblk_line_submit_smeta_io(pblk, line, bpaddr, PBLK_READ); } int pblk_line_read_emeta(struct pblk *pblk, struct pblk_line *line, void *emeta_buf) { return pblk_line_submit_emeta_io(pblk, line, emeta_buf, - line->emeta_ssec, READ); + line->emeta_ssec, PBLK_READ); } static void pblk_setup_e_rq(struct pblk *pblk, struct nvm_rq *rqd, @@ -824,7 +832,7 @@ static void pblk_setup_e_rq(struct pblk *pblk, struct nvm_rq *rqd, rqd->opcode = NVM_OP_ERASE; rqd->ppa_addr = ppa; rqd->nr_ppas = 1; - rqd->flags = pblk_set_progr_mode(pblk, ERASE); + rqd->flags = pblk_set_progr_mode(pblk, PBLK_ERASE); rqd->bio = NULL; } @@ -832,19 +840,15 @@ static int pblk_blk_erase_sync(struct pblk *pblk, struct ppa_addr ppa) { struct nvm_rq rqd; int ret = 0; - DECLARE_COMPLETION_ONSTACK(wait); memset(&rqd, 0, sizeof(struct nvm_rq)); pblk_setup_e_rq(pblk, &rqd, ppa); - rqd.end_io = pblk_end_io_sync; - rqd.private = &wait; - /* The write thread schedules erases so that it minimizes disturbances * with writes. Thus, there is no need to take the LUN semaphore. */ - ret = pblk_submit_io(pblk, &rqd); + ret = pblk_submit_io_sync(pblk, &rqd); if (ret) { struct nvm_tgt_dev *dev = pblk->dev; struct nvm_geo *geo = &dev->geo; @@ -857,11 +861,6 @@ static int pblk_blk_erase_sync(struct pblk *pblk, struct ppa_addr ppa) goto out; } - if (!wait_for_completion_io_timeout(&wait, - msecs_to_jiffies(PBLK_COMMAND_TIMEOUT_MS))) { - pr_err("pblk: sync erase timed out\n"); - } - out: rqd.private = pblk; __pblk_end_io_erase(pblk, &rqd); @@ -976,7 +975,7 @@ static int pblk_line_init_metadata(struct pblk *pblk, struct pblk_line *line, memcpy(smeta_buf->header.uuid, pblk->instance_uuid, 16); smeta_buf->header.id = cpu_to_le32(line->id); smeta_buf->header.type = cpu_to_le16(line->type); - smeta_buf->header.version = cpu_to_le16(1); + smeta_buf->header.version = SMETA_VERSION; /* Start metadata */ smeta_buf->seq_nr = cpu_to_le64(line->seq_nr); @@ -1046,7 +1045,7 @@ static int pblk_line_init_bb(struct pblk *pblk, struct pblk_line *line, line->smeta_ssec = off; line->cur_sec = off + lm->smeta_sec; - if (init && pblk_line_submit_smeta_io(pblk, line, off, WRITE)) { + if (init && pblk_line_submit_smeta_io(pblk, line, off, PBLK_WRITE)) { pr_debug("pblk: line smeta I/O failed. Retry\n"); return 1; } @@ -1056,7 +1055,6 @@ static int pblk_line_init_bb(struct pblk *pblk, struct pblk_line *line, /* Mark emeta metadata sectors as bad sectors. We need to consider bad * blocks to make sure that there are enough sectors to store emeta */ - bit = lm->sec_per_line; off = lm->sec_per_line - lm->emeta_sec[0]; bitmap_set(line->invalid_bitmap, off, lm->emeta_sec[0]); while (nr_bb) { @@ -1093,25 +1091,21 @@ static int pblk_line_prepare(struct pblk *pblk, struct pblk_line *line) struct pblk_line_meta *lm = &pblk->lm; int blk_in_line = atomic_read(&line->blk_in_line); - line->map_bitmap = mempool_alloc(pblk->line_meta_pool, GFP_ATOMIC); + line->map_bitmap = kzalloc(lm->sec_bitmap_len, GFP_ATOMIC); if (!line->map_bitmap) return -ENOMEM; - memset(line->map_bitmap, 0, lm->sec_bitmap_len); - /* invalid_bitmap is special since it is used when line is closed. No - * need to zeroized; it will be initialized using bb info form - * map_bitmap - */ - line->invalid_bitmap = mempool_alloc(pblk->line_meta_pool, GFP_ATOMIC); + /* will be initialized using bb info from map_bitmap */ + line->invalid_bitmap = kmalloc(lm->sec_bitmap_len, GFP_ATOMIC); if (!line->invalid_bitmap) { - mempool_free(line->map_bitmap, pblk->line_meta_pool); + kfree(line->map_bitmap); return -ENOMEM; } spin_lock(&line->lock); if (line->state != PBLK_LINESTATE_FREE) { - mempool_free(line->invalid_bitmap, pblk->line_meta_pool); - mempool_free(line->map_bitmap, pblk->line_meta_pool); + kfree(line->map_bitmap); + kfree(line->invalid_bitmap); spin_unlock(&line->lock); WARN(1, "pblk: corrupted line %d, state %d\n", line->id, line->state); @@ -1163,7 +1157,7 @@ int pblk_line_recov_alloc(struct pblk *pblk, struct pblk_line *line) void pblk_line_recov_close(struct pblk *pblk, struct pblk_line *line) { - mempool_free(line->map_bitmap, pblk->line_meta_pool); + kfree(line->map_bitmap); line->map_bitmap = NULL; line->smeta = NULL; line->emeta = NULL; @@ -1328,6 +1322,41 @@ static void pblk_stop_writes(struct pblk *pblk, struct pblk_line *line) pblk->state = PBLK_STATE_STOPPING; } +static void pblk_line_close_meta_sync(struct pblk *pblk) +{ + struct pblk_line_mgmt *l_mg = &pblk->l_mg; + struct pblk_line_meta *lm = &pblk->lm; + struct pblk_line *line, *tline; + LIST_HEAD(list); + + spin_lock(&l_mg->close_lock); + if (list_empty(&l_mg->emeta_list)) { + spin_unlock(&l_mg->close_lock); + return; + } + + list_cut_position(&list, &l_mg->emeta_list, l_mg->emeta_list.prev); + spin_unlock(&l_mg->close_lock); + + list_for_each_entry_safe(line, tline, &list, list) { + struct pblk_emeta *emeta = line->emeta; + + while (emeta->mem < lm->emeta_len[0]) { + int ret; + + ret = pblk_submit_meta_io(pblk, line); + if (ret) { + pr_err("pblk: sync meta line %d failed (%d)\n", + line->id, ret); + return; + } + } + } + + pblk_wait_for_meta(pblk); + flush_workqueue(pblk->close_wq); +} + void pblk_pipeline_stop(struct pblk *pblk) { struct pblk_line_mgmt *l_mg = &pblk->l_mg; @@ -1361,17 +1390,17 @@ void pblk_pipeline_stop(struct pblk *pblk) spin_unlock(&l_mg->free_lock); } -void pblk_line_replace_data(struct pblk *pblk) +struct pblk_line *pblk_line_replace_data(struct pblk *pblk) { struct pblk_line_mgmt *l_mg = &pblk->l_mg; - struct pblk_line *cur, *new; + struct pblk_line *cur, *new = NULL; unsigned int left_seblks; int is_next = 0; cur = l_mg->data_line; new = l_mg->data_next; if (!new) - return; + goto out; l_mg->data_line = new; spin_lock(&l_mg->free_lock); @@ -1379,7 +1408,7 @@ void pblk_line_replace_data(struct pblk *pblk) l_mg->data_line = NULL; l_mg->data_next = NULL; spin_unlock(&l_mg->free_lock); - return; + goto out; } pblk_line_setup_metadata(new, l_mg, &pblk->lm); @@ -1391,7 +1420,7 @@ retry_erase: /* If line is not fully erased, erase it */ if (atomic_read(&new->left_eblks)) { if (pblk_line_erase(pblk, new)) - return; + goto out; } else { io_schedule(); } @@ -1402,7 +1431,7 @@ retry_setup: if (!pblk_line_init_metadata(pblk, new, cur)) { new = pblk_line_retry(pblk, new); if (!new) - return; + goto out; goto retry_setup; } @@ -1410,7 +1439,7 @@ retry_setup: if (!pblk_line_init_bb(pblk, new, 1)) { new = pblk_line_retry(pblk, new); if (!new) - return; + goto out; goto retry_setup; } @@ -1434,14 +1463,15 @@ retry_setup: if (is_next) pblk_rl_free_lines_dec(&pblk->rl, l_mg->data_next); + +out: + return new; } void pblk_line_free(struct pblk *pblk, struct pblk_line *line) { - if (line->map_bitmap) - mempool_free(line->map_bitmap, pblk->line_meta_pool); - if (line->invalid_bitmap) - mempool_free(line->invalid_bitmap, pblk->line_meta_pool); + kfree(line->map_bitmap); + kfree(line->invalid_bitmap); *line->vsc = cpu_to_le32(EMPTY_ENTRY); @@ -1451,11 +1481,10 @@ void pblk_line_free(struct pblk *pblk, struct pblk_line *line) line->emeta = NULL; } -void pblk_line_put(struct kref *ref) +static void __pblk_line_put(struct pblk *pblk, struct pblk_line *line) { - struct pblk_line *line = container_of(ref, struct pblk_line, ref); - struct pblk *pblk = line->pblk; struct pblk_line_mgmt *l_mg = &pblk->l_mg; + struct pblk_gc *gc = &pblk->gc; spin_lock(&line->lock); WARN_ON(line->state != PBLK_LINESTATE_GC); @@ -1464,6 +1493,8 @@ void pblk_line_put(struct kref *ref) pblk_line_free(pblk, line); spin_unlock(&line->lock); + atomic_dec(&gc->pipeline_gc); + spin_lock(&l_mg->free_lock); list_add_tail(&line->list, &l_mg->free_list); l_mg->nr_free_lines++; @@ -1472,13 +1503,49 @@ void pblk_line_put(struct kref *ref) pblk_rl_free_lines_inc(&pblk->rl, line); } +static void pblk_line_put_ws(struct work_struct *work) +{ + struct pblk_line_ws *line_put_ws = container_of(work, + struct pblk_line_ws, ws); + struct pblk *pblk = line_put_ws->pblk; + struct pblk_line *line = line_put_ws->line; + + __pblk_line_put(pblk, line); + mempool_free(line_put_ws, pblk->gen_ws_pool); +} + +void pblk_line_put(struct kref *ref) +{ + struct pblk_line *line = container_of(ref, struct pblk_line, ref); + struct pblk *pblk = line->pblk; + + __pblk_line_put(pblk, line); +} + +void pblk_line_put_wq(struct kref *ref) +{ + struct pblk_line *line = container_of(ref, struct pblk_line, ref); + struct pblk *pblk = line->pblk; + struct pblk_line_ws *line_put_ws; + + line_put_ws = mempool_alloc(pblk->gen_ws_pool, GFP_ATOMIC); + if (!line_put_ws) + return; + + line_put_ws->pblk = pblk; + line_put_ws->line = line; + line_put_ws->priv = NULL; + + INIT_WORK(&line_put_ws->ws, pblk_line_put_ws); + queue_work(pblk->r_end_wq, &line_put_ws->ws); +} + int pblk_blk_erase_async(struct pblk *pblk, struct ppa_addr ppa) { struct nvm_rq *rqd; int err; - rqd = mempool_alloc(pblk->g_rq_pool, GFP_KERNEL); - memset(rqd, 0, pblk_g_rq_size); + rqd = pblk_alloc_rqd(pblk, PBLK_ERASE); pblk_setup_e_rq(pblk, rqd, ppa); @@ -1517,41 +1584,6 @@ int pblk_line_is_full(struct pblk_line *line) return (line->left_msecs == 0); } -void pblk_line_close_meta_sync(struct pblk *pblk) -{ - struct pblk_line_mgmt *l_mg = &pblk->l_mg; - struct pblk_line_meta *lm = &pblk->lm; - struct pblk_line *line, *tline; - LIST_HEAD(list); - - spin_lock(&l_mg->close_lock); - if (list_empty(&l_mg->emeta_list)) { - spin_unlock(&l_mg->close_lock); - return; - } - - list_cut_position(&list, &l_mg->emeta_list, l_mg->emeta_list.prev); - spin_unlock(&l_mg->close_lock); - - list_for_each_entry_safe(line, tline, &list, list) { - struct pblk_emeta *emeta = line->emeta; - - while (emeta->mem < lm->emeta_len[0]) { - int ret; - - ret = pblk_submit_meta_io(pblk, line); - if (ret) { - pr_err("pblk: sync meta line %d failed (%d)\n", - line->id, ret); - return; - } - } - } - - pblk_wait_for_meta(pblk); - flush_workqueue(pblk->close_wq); -} - static void pblk_line_should_sync_meta(struct pblk *pblk) { if (pblk_rl_is_limit(&pblk->rl)) @@ -1582,15 +1614,13 @@ void pblk_line_close(struct pblk *pblk, struct pblk_line *line) list_add_tail(&line->list, move_list); - mempool_free(line->map_bitmap, pblk->line_meta_pool); + kfree(line->map_bitmap); line->map_bitmap = NULL; line->smeta = NULL; line->emeta = NULL; spin_unlock(&line->lock); spin_unlock(&l_mg->gc_lock); - - pblk_gc_should_kick(pblk); } void pblk_line_close_meta(struct pblk *pblk, struct pblk_line *line) @@ -1624,43 +1654,16 @@ void pblk_line_close_ws(struct work_struct *work) struct pblk_line *line = line_ws->line; pblk_line_close(pblk, line); - mempool_free(line_ws, pblk->line_ws_pool); -} - -void pblk_line_mark_bb(struct work_struct *work) -{ - struct pblk_line_ws *line_ws = container_of(work, struct pblk_line_ws, - ws); - struct pblk *pblk = line_ws->pblk; - struct nvm_tgt_dev *dev = pblk->dev; - struct ppa_addr *ppa = line_ws->priv; - int ret; - - ret = nvm_set_tgt_bb_tbl(dev, ppa, 1, NVM_BLK_T_GRWN_BAD); - if (ret) { - struct pblk_line *line; - int pos; - - line = &pblk->lines[pblk_dev_ppa_to_line(*ppa)]; - pos = pblk_dev_ppa_to_pos(&dev->geo, *ppa); - - pr_err("pblk: failed to mark bb, line:%d, pos:%d\n", - line->id, pos); - } - - kfree(ppa); - mempool_free(line_ws, pblk->line_ws_pool); + mempool_free(line_ws, pblk->gen_ws_pool); } -void pblk_line_run_ws(struct pblk *pblk, struct pblk_line *line, void *priv, - void (*work)(struct work_struct *), +void pblk_gen_run_ws(struct pblk *pblk, struct pblk_line *line, void *priv, + void (*work)(struct work_struct *), gfp_t gfp_mask, struct workqueue_struct *wq) { struct pblk_line_ws *line_ws; - line_ws = mempool_alloc(pblk->line_ws_pool, GFP_ATOMIC); - if (!line_ws) - return; + line_ws = mempool_alloc(pblk->gen_ws_pool, gfp_mask); line_ws->pblk = pblk; line_ws->line = line; @@ -1689,16 +1692,8 @@ static void __pblk_down_page(struct pblk *pblk, struct ppa_addr *ppa_list, #endif ret = down_timeout(&rlun->wr_sem, msecs_to_jiffies(30000)); - if (ret) { - switch (ret) { - case -ETIME: - pr_err("pblk: lun semaphore timed out\n"); - break; - case -EINTR: - pr_err("pblk: lun semaphore timed out\n"); - break; - } - } + if (ret == -ETIME || ret == -EINTR) + pr_err("pblk: taking lun semaphore timed out: err %d\n", -ret); } void pblk_down_page(struct pblk *pblk, struct ppa_addr *ppa_list, int nr_ppas) @@ -1758,13 +1753,11 @@ void pblk_up_rq(struct pblk *pblk, struct ppa_addr *ppa_list, int nr_ppas, rlun = &pblk->luns[bit]; up(&rlun->wr_sem); } - - kfree(lun_bitmap); } void pblk_update_map(struct pblk *pblk, sector_t lba, struct ppa_addr ppa) { - struct ppa_addr l2p_ppa; + struct ppa_addr ppa_l2p; /* logic error: lba out-of-bounds. Ignore update */ if (!(lba < pblk->rl.nr_secs)) { @@ -1773,10 +1766,10 @@ void pblk_update_map(struct pblk *pblk, sector_t lba, struct ppa_addr ppa) } spin_lock(&pblk->trans_lock); - l2p_ppa = pblk_trans_map_get(pblk, lba); + ppa_l2p = pblk_trans_map_get(pblk, lba); - if (!pblk_addr_in_cache(l2p_ppa) && !pblk_ppa_empty(l2p_ppa)) - pblk_map_invalidate(pblk, l2p_ppa); + if (!pblk_addr_in_cache(ppa_l2p) && !pblk_ppa_empty(ppa_l2p)) + pblk_map_invalidate(pblk, ppa_l2p); pblk_trans_map_set(pblk, lba, ppa); spin_unlock(&pblk->trans_lock); @@ -1784,6 +1777,7 @@ void pblk_update_map(struct pblk *pblk, sector_t lba, struct ppa_addr ppa) void pblk_update_map_cache(struct pblk *pblk, sector_t lba, struct ppa_addr ppa) { + #ifdef CONFIG_NVM_DEBUG /* Callers must ensure that the ppa points to a cache address */ BUG_ON(!pblk_addr_in_cache(ppa)); @@ -1793,16 +1787,16 @@ void pblk_update_map_cache(struct pblk *pblk, sector_t lba, struct ppa_addr ppa) pblk_update_map(pblk, lba, ppa); } -int pblk_update_map_gc(struct pblk *pblk, sector_t lba, struct ppa_addr ppa, - struct pblk_line *gc_line) +int pblk_update_map_gc(struct pblk *pblk, sector_t lba, struct ppa_addr ppa_new, + struct pblk_line *gc_line, u64 paddr_gc) { - struct ppa_addr l2p_ppa; + struct ppa_addr ppa_l2p, ppa_gc; int ret = 1; #ifdef CONFIG_NVM_DEBUG /* Callers must ensure that the ppa points to a cache address */ - BUG_ON(!pblk_addr_in_cache(ppa)); - BUG_ON(pblk_rb_pos_oob(&pblk->rwb, pblk_addr_to_cacheline(ppa))); + BUG_ON(!pblk_addr_in_cache(ppa_new)); + BUG_ON(pblk_rb_pos_oob(&pblk->rwb, pblk_addr_to_cacheline(ppa_new))); #endif /* logic error: lba out-of-bounds. Ignore update */ @@ -1812,36 +1806,41 @@ int pblk_update_map_gc(struct pblk *pblk, sector_t lba, struct ppa_addr ppa, } spin_lock(&pblk->trans_lock); - l2p_ppa = pblk_trans_map_get(pblk, lba); + ppa_l2p = pblk_trans_map_get(pblk, lba); + ppa_gc = addr_to_gen_ppa(pblk, paddr_gc, gc_line->id); + + if (!pblk_ppa_comp(ppa_l2p, ppa_gc)) { + spin_lock(&gc_line->lock); + WARN(!test_bit(paddr_gc, gc_line->invalid_bitmap), + "pblk: corrupted GC update"); + spin_unlock(&gc_line->lock); - /* Prevent updated entries to be overwritten by GC */ - if (pblk_addr_in_cache(l2p_ppa) || pblk_ppa_empty(l2p_ppa) || - pblk_tgt_ppa_to_line(l2p_ppa) != gc_line->id) { ret = 0; goto out; } - pblk_trans_map_set(pblk, lba, ppa); + pblk_trans_map_set(pblk, lba, ppa_new); out: spin_unlock(&pblk->trans_lock); return ret; } -void pblk_update_map_dev(struct pblk *pblk, sector_t lba, struct ppa_addr ppa, - struct ppa_addr entry_line) +void pblk_update_map_dev(struct pblk *pblk, sector_t lba, + struct ppa_addr ppa_mapped, struct ppa_addr ppa_cache) { - struct ppa_addr l2p_line; + struct ppa_addr ppa_l2p; #ifdef CONFIG_NVM_DEBUG /* Callers must ensure that the ppa points to a device address */ - BUG_ON(pblk_addr_in_cache(ppa)); + BUG_ON(pblk_addr_in_cache(ppa_mapped)); #endif /* Invalidate and discard padded entries */ if (lba == ADDR_EMPTY) { #ifdef CONFIG_NVM_DEBUG atomic_long_inc(&pblk->padded_wb); #endif - pblk_map_invalidate(pblk, ppa); + if (!pblk_ppa_empty(ppa_mapped)) + pblk_map_invalidate(pblk, ppa_mapped); return; } @@ -1852,22 +1851,22 @@ void pblk_update_map_dev(struct pblk *pblk, sector_t lba, struct ppa_addr ppa, } spin_lock(&pblk->trans_lock); - l2p_line = pblk_trans_map_get(pblk, lba); + ppa_l2p = pblk_trans_map_get(pblk, lba); /* Do not update L2P if the cacheline has been updated. In this case, * the mapped ppa must be invalidated */ - if (l2p_line.ppa != entry_line.ppa) { - if (!pblk_ppa_empty(ppa)) - pblk_map_invalidate(pblk, ppa); + if (!pblk_ppa_comp(ppa_l2p, ppa_cache)) { + if (!pblk_ppa_empty(ppa_mapped)) + pblk_map_invalidate(pblk, ppa_mapped); goto out; } #ifdef CONFIG_NVM_DEBUG - WARN_ON(!pblk_addr_in_cache(l2p_line) && !pblk_ppa_empty(l2p_line)); + WARN_ON(!pblk_addr_in_cache(ppa_l2p) && !pblk_ppa_empty(ppa_l2p)); #endif - pblk_trans_map_set(pblk, lba, ppa); + pblk_trans_map_set(pblk, lba, ppa_mapped); out: spin_unlock(&pblk->trans_lock); } @@ -1878,23 +1877,32 @@ void pblk_lookup_l2p_seq(struct pblk *pblk, struct ppa_addr *ppas, int i; spin_lock(&pblk->trans_lock); - for (i = 0; i < nr_secs; i++) - ppas[i] = pblk_trans_map_get(pblk, blba + i); + for (i = 0; i < nr_secs; i++) { + struct ppa_addr ppa; + + ppa = ppas[i] = pblk_trans_map_get(pblk, blba + i); + + /* If the L2P entry maps to a line, the reference is valid */ + if (!pblk_ppa_empty(ppa) && !pblk_addr_in_cache(ppa)) { + int line_id = pblk_dev_ppa_to_line(ppa); + struct pblk_line *line = &pblk->lines[line_id]; + + kref_get(&line->ref); + } + } spin_unlock(&pblk->trans_lock); } void pblk_lookup_l2p_rand(struct pblk *pblk, struct ppa_addr *ppas, u64 *lba_list, int nr_secs) { - sector_t lba; + u64 lba; int i; spin_lock(&pblk->trans_lock); for (i = 0; i < nr_secs; i++) { lba = lba_list[i]; - if (lba == ADDR_EMPTY) { - ppas[i].ppa = ADDR_EMPTY; - } else { + if (lba != ADDR_EMPTY) { /* logic error: lba out-of-bounds. Ignore update */ if (!(lba < pblk->rl.nr_secs)) { WARN(1, "pblk: corrupted L2P map request\n"); diff --git a/drivers/lightnvm/pblk-gc.c b/drivers/lightnvm/pblk-gc.c index 6090d28f7995..00d5698d64a9 100644 --- a/drivers/lightnvm/pblk-gc.c +++ b/drivers/lightnvm/pblk-gc.c @@ -20,7 +20,8 @@ static void pblk_gc_free_gc_rq(struct pblk_gc_rq *gc_rq) { - vfree(gc_rq->data); + if (gc_rq->data) + vfree(gc_rq->data); kfree(gc_rq); } @@ -41,10 +42,7 @@ static int pblk_gc_write(struct pblk *pblk) spin_unlock(&gc->w_lock); list_for_each_entry_safe(gc_rq, tgc_rq, &w_list, list) { - pblk_write_gc_to_cache(pblk, gc_rq->data, gc_rq->lba_list, - gc_rq->nr_secs, gc_rq->secs_to_gc, - gc_rq->line, PBLK_IOTYPE_GC); - + pblk_write_gc_to_cache(pblk, gc_rq); list_del(&gc_rq->list); kref_put(&gc_rq->line->ref, pblk_line_put); pblk_gc_free_gc_rq(gc_rq); @@ -58,42 +56,59 @@ static void pblk_gc_writer_kick(struct pblk_gc *gc) wake_up_process(gc->gc_writer_ts); } -/* - * Responsible for managing all memory related to a gc request. Also in case of - * failure - */ -static int pblk_gc_move_valid_secs(struct pblk *pblk, struct pblk_gc_rq *gc_rq) +static void pblk_put_line_back(struct pblk *pblk, struct pblk_line *line) +{ + struct pblk_line_mgmt *l_mg = &pblk->l_mg; + struct list_head *move_list; + + spin_lock(&line->lock); + WARN_ON(line->state != PBLK_LINESTATE_GC); + line->state = PBLK_LINESTATE_CLOSED; + move_list = pblk_line_gc_list(pblk, line); + spin_unlock(&line->lock); + + if (move_list) { + spin_lock(&l_mg->gc_lock); + list_add_tail(&line->list, move_list); + spin_unlock(&l_mg->gc_lock); + } +} + +static void pblk_gc_line_ws(struct work_struct *work) { + struct pblk_line_ws *gc_rq_ws = container_of(work, + struct pblk_line_ws, ws); + struct pblk *pblk = gc_rq_ws->pblk; struct nvm_tgt_dev *dev = pblk->dev; struct nvm_geo *geo = &dev->geo; struct pblk_gc *gc = &pblk->gc; - struct pblk_line *line = gc_rq->line; - void *data; - unsigned int secs_to_gc; - int ret = 0; + struct pblk_line *line = gc_rq_ws->line; + struct pblk_gc_rq *gc_rq = gc_rq_ws->priv; + int ret; - data = vmalloc(gc_rq->nr_secs * geo->sec_size); - if (!data) { - ret = -ENOMEM; + up(&gc->gc_sem); + + gc_rq->data = vmalloc(gc_rq->nr_secs * geo->sec_size); + if (!gc_rq->data) { + pr_err("pblk: could not GC line:%d (%d/%d)\n", + line->id, *line->vsc, gc_rq->nr_secs); goto out; } /* Read from GC victim block */ - if (pblk_submit_read_gc(pblk, gc_rq->lba_list, data, gc_rq->nr_secs, - &secs_to_gc, line)) { - ret = -EFAULT; - goto free_data; + ret = pblk_submit_read_gc(pblk, gc_rq); + if (ret) { + pr_err("pblk: failed GC read in line:%d (err:%d)\n", + line->id, ret); + goto out; } - if (!secs_to_gc) - goto free_rq; - - gc_rq->data = data; - gc_rq->secs_to_gc = secs_to_gc; + if (!gc_rq->secs_to_gc) + goto out; retry: spin_lock(&gc->w_lock); - if (gc->w_entries >= PBLK_GC_W_QD) { + if (gc->w_entries >= PBLK_GC_RQ_QD) { spin_unlock(&gc->w_lock); pblk_gc_writer_kick(&pblk->gc); usleep_range(128, 256); @@ -105,53 +120,13 @@ retry: pblk_gc_writer_kick(&pblk->gc); - return 0; + kfree(gc_rq_ws); + return; -free_rq: - kfree(gc_rq); -free_data: - vfree(data); out: + pblk_gc_free_gc_rq(gc_rq); kref_put(&line->ref, pblk_line_put); - return ret; -} - -static void pblk_put_line_back(struct pblk *pblk, struct pblk_line *line) -{ - struct pblk_line_mgmt *l_mg = &pblk->l_mg; - struct list_head *move_list; - - spin_lock(&line->lock); - WARN_ON(line->state != PBLK_LINESTATE_GC); - line->state = PBLK_LINESTATE_CLOSED; - move_list = pblk_line_gc_list(pblk, line); - spin_unlock(&line->lock); - - if (move_list) { - spin_lock(&l_mg->gc_lock); - list_add_tail(&line->list, move_list); - spin_unlock(&l_mg->gc_lock); - } -} - -static void pblk_gc_line_ws(struct work_struct *work) -{ - struct pblk_line_ws *line_rq_ws = container_of(work, - struct pblk_line_ws, ws); - struct pblk *pblk = line_rq_ws->pblk; - struct pblk_gc *gc = &pblk->gc; - struct pblk_line *line = line_rq_ws->line; - struct pblk_gc_rq *gc_rq = line_rq_ws->priv; - - up(&gc->gc_sem); - - if (pblk_gc_move_valid_secs(pblk, gc_rq)) { - pr_err("pblk: could not GC all sectors: line:%d (%d/%d)\n", - line->id, *line->vsc, - gc_rq->nr_secs); - } - - mempool_free(line_rq_ws, pblk->line_ws_pool); + kfree(gc_rq_ws); } static void pblk_gc_line_prepare_ws(struct work_struct *work) @@ -164,17 +139,24 @@ static void pblk_gc_line_prepare_ws(struct work_struct *work) struct pblk_line_meta *lm = &pblk->lm; struct pblk_gc *gc = &pblk->gc; struct line_emeta *emeta_buf; - struct pblk_line_ws *line_rq_ws; + struct pblk_line_ws *gc_rq_ws; struct pblk_gc_rq *gc_rq; __le64 *lba_list; + unsigned long *invalid_bitmap; int sec_left, nr_secs, bit; int ret; + invalid_bitmap = kmalloc(lm->sec_bitmap_len, GFP_KERNEL); + if (!invalid_bitmap) { + pr_err("pblk: could not allocate GC invalid bitmap\n"); + goto fail_free_ws; + } + emeta_buf = pblk_malloc(lm->emeta_len[0], l_mg->emeta_alloc_type, GFP_KERNEL); if (!emeta_buf) { pr_err("pblk: cannot use GC emeta\n"); - return; + goto fail_free_bitmap; } ret = pblk_line_read_emeta(pblk, line, emeta_buf); @@ -193,7 +175,11 @@ static void pblk_gc_line_prepare_ws(struct work_struct *work) goto fail_free_emeta; } + spin_lock(&line->lock); + bitmap_copy(invalid_bitmap, line->invalid_bitmap, lm->sec_per_line); sec_left = pblk_line_vsc(line); + spin_unlock(&line->lock); + if (sec_left < 0) { pr_err("pblk: corrupted GC line (%d)\n", line->id); goto fail_free_emeta; @@ -207,11 +193,12 @@ next_rq: nr_secs = 0; do { - bit = find_next_zero_bit(line->invalid_bitmap, lm->sec_per_line, + bit = find_next_zero_bit(invalid_bitmap, lm->sec_per_line, bit + 1); if (bit > line->emeta_ssec) break; + gc_rq->paddr_list[nr_secs] = bit; gc_rq->lba_list[nr_secs++] = le64_to_cpu(lba_list[bit]); } while (nr_secs < pblk->max_write_pgs); @@ -223,19 +210,25 @@ next_rq: gc_rq->nr_secs = nr_secs; gc_rq->line = line; - line_rq_ws = mempool_alloc(pblk->line_ws_pool, GFP_KERNEL); - if (!line_rq_ws) + gc_rq_ws = kmalloc(sizeof(struct pblk_line_ws), GFP_KERNEL); + if (!gc_rq_ws) goto fail_free_gc_rq; - line_rq_ws->pblk = pblk; - line_rq_ws->line = line; - line_rq_ws->priv = gc_rq; + gc_rq_ws->pblk = pblk; + gc_rq_ws->line = line; + gc_rq_ws->priv = gc_rq; + + /* The write GC path can be much slower than the read GC one due to + * the budget imposed by the rate-limiter. Balance in case that we get + * back pressure from the write GC path. + */ + while (down_timeout(&gc->gc_sem, msecs_to_jiffies(30000))) + io_schedule(); - down(&gc->gc_sem); kref_get(&line->ref); - INIT_WORK(&line_rq_ws->ws, pblk_gc_line_ws); - queue_work(gc->gc_line_reader_wq, &line_rq_ws->ws); + INIT_WORK(&gc_rq_ws->ws, pblk_gc_line_ws); + queue_work(gc->gc_line_reader_wq, &gc_rq_ws->ws); sec_left -= nr_secs; if (sec_left > 0) @@ -243,10 +236,11 @@ next_rq: out: pblk_mfree(emeta_buf, l_mg->emeta_alloc_type); - mempool_free(line_ws, pblk->line_ws_pool); + kfree(line_ws); + kfree(invalid_bitmap); kref_put(&line->ref, pblk_line_put); - atomic_dec(&gc->inflight_gc); + atomic_dec(&gc->read_inflight_gc); return; @@ -254,10 +248,14 @@ fail_free_gc_rq: kfree(gc_rq); fail_free_emeta: pblk_mfree(emeta_buf, l_mg->emeta_alloc_type); +fail_free_bitmap: + kfree(invalid_bitmap); +fail_free_ws: + kfree(line_ws); + pblk_put_line_back(pblk, line); kref_put(&line->ref, pblk_line_put); - mempool_free(line_ws, pblk->line_ws_pool); - atomic_dec(&gc->inflight_gc); + atomic_dec(&gc->read_inflight_gc); pr_err("pblk: Failed to GC line %d\n", line->id); } @@ -269,19 +267,40 @@ static int pblk_gc_line(struct pblk *pblk, struct pblk_line *line) pr_debug("pblk: line '%d' being reclaimed for GC\n", line->id); - line_ws = mempool_alloc(pblk->line_ws_pool, GFP_KERNEL); + line_ws = kmalloc(sizeof(struct pblk_line_ws), GFP_KERNEL); if (!line_ws) return -ENOMEM; line_ws->pblk = pblk; line_ws->line = line; + atomic_inc(&gc->pipeline_gc); INIT_WORK(&line_ws->ws, pblk_gc_line_prepare_ws); queue_work(gc->gc_reader_wq, &line_ws->ws); return 0; } +static void pblk_gc_reader_kick(struct pblk_gc *gc) +{ + wake_up_process(gc->gc_reader_ts); +} + +static void pblk_gc_kick(struct pblk *pblk) +{ + struct pblk_gc *gc = &pblk->gc; + + pblk_gc_writer_kick(gc); + pblk_gc_reader_kick(gc); + + /* If we're shutting down GC, let's not start it up again */ + if (gc->gc_enabled) { + wake_up_process(gc->gc_ts); + mod_timer(&gc->gc_timer, + jiffies + msecs_to_jiffies(GC_TIME_MSECS)); + } +} + static int pblk_gc_read(struct pblk *pblk) { struct pblk_gc *gc = &pblk->gc; @@ -305,11 +324,6 @@ static int pblk_gc_read(struct pblk *pblk) return 0; } -static void pblk_gc_reader_kick(struct pblk_gc *gc) -{ - wake_up_process(gc->gc_reader_ts); -} - static struct pblk_line *pblk_gc_get_victim_line(struct pblk *pblk, struct list_head *group_list) { @@ -338,26 +352,17 @@ static bool pblk_gc_should_run(struct pblk_gc *gc, struct pblk_rl *rl) return ((gc->gc_active) && (nr_blocks_need > nr_blocks_free)); } -/* - * Lines with no valid sectors will be returned to the free list immediately. If - * GC is activated - either because the free block count is under the determined - * threshold, or because it is being forced from user space - only lines with a - * high count of invalid sectors will be recycled. - */ -static void pblk_gc_run(struct pblk *pblk) +void pblk_gc_free_full_lines(struct pblk *pblk) { struct pblk_line_mgmt *l_mg = &pblk->l_mg; struct pblk_gc *gc = &pblk->gc; struct pblk_line *line; - struct list_head *group_list; - bool run_gc; - int inflight_gc, gc_group = 0, prev_group = 0; do { spin_lock(&l_mg->gc_lock); if (list_empty(&l_mg->gc_full_list)) { spin_unlock(&l_mg->gc_lock); - break; + return; } line = list_first_entry(&l_mg->gc_full_list, @@ -371,11 +376,30 @@ static void pblk_gc_run(struct pblk *pblk) list_del(&line->list); spin_unlock(&l_mg->gc_lock); + atomic_inc(&gc->pipeline_gc); kref_put(&line->ref, pblk_line_put); } while (1); +} + +/* + * Lines with no valid sectors will be returned to the free list immediately. If + * GC is activated - either because the free block count is under the determined + * threshold, or because it is being forced from user space - only lines with a + * high count of invalid sectors will be recycled. + */ +static void pblk_gc_run(struct pblk *pblk) +{ + struct pblk_line_mgmt *l_mg = &pblk->l_mg; + struct pblk_gc *gc = &pblk->gc; + struct pblk_line *line; + struct list_head *group_list; + bool run_gc; + int read_inflight_gc, gc_group = 0, prev_group = 0; + + pblk_gc_free_full_lines(pblk); run_gc = pblk_gc_should_run(&pblk->gc, &pblk->rl); - if (!run_gc || (atomic_read(&gc->inflight_gc) >= PBLK_GC_L_QD)) + if (!run_gc || (atomic_read(&gc->read_inflight_gc) >= PBLK_GC_L_QD)) return; next_gc_group: @@ -402,14 +426,14 @@ next_gc_group: list_add_tail(&line->list, &gc->r_list); spin_unlock(&gc->r_lock); - inflight_gc = atomic_inc_return(&gc->inflight_gc); + read_inflight_gc = atomic_inc_return(&gc->read_inflight_gc); pblk_gc_reader_kick(gc); prev_group = 1; /* No need to queue up more GC lines than we can handle */ run_gc = pblk_gc_should_run(&pblk->gc, &pblk->rl); - if (!run_gc || inflight_gc >= PBLK_GC_L_QD) + if (!run_gc || read_inflight_gc >= PBLK_GC_L_QD) break; } while (1); @@ -418,16 +442,6 @@ next_gc_group: goto next_gc_group; } -void pblk_gc_kick(struct pblk *pblk) -{ - struct pblk_gc *gc = &pblk->gc; - - wake_up_process(gc->gc_ts); - pblk_gc_writer_kick(gc); - pblk_gc_reader_kick(gc); - mod_timer(&gc->gc_timer, jiffies + msecs_to_jiffies(GC_TIME_MSECS)); -} - static void pblk_gc_timer(unsigned long data) { struct pblk *pblk = (struct pblk *)data; @@ -465,6 +479,7 @@ static int pblk_gc_writer_ts(void *data) static int pblk_gc_reader_ts(void *data) { struct pblk *pblk = data; + struct pblk_gc *gc = &pblk->gc; while (!kthread_should_stop()) { if (!pblk_gc_read(pblk)) @@ -473,6 +488,18 @@ static int pblk_gc_reader_ts(void *data) io_schedule(); } +#ifdef CONFIG_NVM_DEBUG + pr_info("pblk: flushing gc pipeline, %d lines left\n", + atomic_read(&gc->pipeline_gc)); +#endif + + do { + if (!atomic_read(&gc->pipeline_gc)) + break; + + schedule(); + } while (1); + return 0; } @@ -486,10 +513,10 @@ void pblk_gc_should_start(struct pblk *pblk) { struct pblk_gc *gc = &pblk->gc; - if (gc->gc_enabled && !gc->gc_active) + if (gc->gc_enabled && !gc->gc_active) { pblk_gc_start(pblk); - - pblk_gc_kick(pblk); + pblk_gc_kick(pblk); + } } /* @@ -510,6 +537,11 @@ void pblk_gc_should_stop(struct pblk *pblk) pblk_gc_stop(pblk, 0); } +void pblk_gc_should_kick(struct pblk *pblk) +{ + pblk_rl_update_rates(&pblk->rl); +} + void pblk_gc_sysfs_state_show(struct pblk *pblk, int *gc_enabled, int *gc_active) { @@ -576,7 +608,8 @@ int pblk_gc_init(struct pblk *pblk) gc->gc_forced = 0; gc->gc_enabled = 1; gc->w_entries = 0; - atomic_set(&gc->inflight_gc, 0); + atomic_set(&gc->read_inflight_gc, 0); + atomic_set(&gc->pipeline_gc, 0); /* Workqueue that reads valid sectors from a line and submit them to the * GC writer to be recycled. @@ -602,7 +635,7 @@ int pblk_gc_init(struct pblk *pblk) spin_lock_init(&gc->w_lock); spin_lock_init(&gc->r_lock); - sema_init(&gc->gc_sem, 128); + sema_init(&gc->gc_sem, PBLK_GC_RQ_QD); INIT_LIST_HEAD(&gc->w_list); INIT_LIST_HEAD(&gc->r_list); @@ -625,24 +658,24 @@ void pblk_gc_exit(struct pblk *pblk) { struct pblk_gc *gc = &pblk->gc; - flush_workqueue(gc->gc_reader_wq); - flush_workqueue(gc->gc_line_reader_wq); - - del_timer(&gc->gc_timer); + gc->gc_enabled = 0; + del_timer_sync(&gc->gc_timer); pblk_gc_stop(pblk, 1); if (gc->gc_ts) kthread_stop(gc->gc_ts); + if (gc->gc_reader_ts) + kthread_stop(gc->gc_reader_ts); + + flush_workqueue(gc->gc_reader_wq); if (gc->gc_reader_wq) destroy_workqueue(gc->gc_reader_wq); + flush_workqueue(gc->gc_line_reader_wq); if (gc->gc_line_reader_wq) destroy_workqueue(gc->gc_line_reader_wq); if (gc->gc_writer_ts) kthread_stop(gc->gc_writer_ts); - - if (gc->gc_reader_ts) - kthread_stop(gc->gc_reader_ts); } diff --git a/drivers/lightnvm/pblk-init.c b/drivers/lightnvm/pblk-init.c index 1b0f61233c21..f62112ba5482 100644 --- a/drivers/lightnvm/pblk-init.c +++ b/drivers/lightnvm/pblk-init.c @@ -20,8 +20,8 @@ #include "pblk.h" -static struct kmem_cache *pblk_blk_ws_cache, *pblk_rec_cache, *pblk_g_rq_cache, - *pblk_w_rq_cache, *pblk_line_meta_cache; +static struct kmem_cache *pblk_ws_cache, *pblk_rec_cache, *pblk_g_rq_cache, + *pblk_w_rq_cache; static DECLARE_RWSEM(pblk_lock); struct bio_set *pblk_bio_set; @@ -46,7 +46,7 @@ static int pblk_rw_io(struct request_queue *q, struct pblk *pblk, * user I/Os. Unless stalled, the rate limiter leaves at least 256KB * available for user I/O. */ - if (unlikely(pblk_get_secs(bio) >= pblk_rl_sysfs_rate_show(&pblk->rl))) + if (pblk_get_secs(bio) > pblk_rl_max_io(&pblk->rl)) blk_queue_split(q, &bio); return pblk_write_to_cache(pblk, bio, PBLK_IOTYPE_USER); @@ -76,6 +76,28 @@ static blk_qc_t pblk_make_rq(struct request_queue *q, struct bio *bio) return BLK_QC_T_NONE; } +static size_t pblk_trans_map_size(struct pblk *pblk) +{ + int entry_size = 8; + + if (pblk->ppaf_bitsize < 32) + entry_size = 4; + + return entry_size * pblk->rl.nr_secs; +} + +#ifdef CONFIG_NVM_DEBUG +static u32 pblk_l2p_crc(struct pblk *pblk) +{ + size_t map_size; + u32 crc = ~(u32)0; + + map_size = pblk_trans_map_size(pblk); + crc = crc32_le(crc, pblk->trans_map, map_size); + return crc; +} +#endif + static void pblk_l2p_free(struct pblk *pblk) { vfree(pblk->trans_map); @@ -85,12 +107,10 @@ static int pblk_l2p_init(struct pblk *pblk) { sector_t i; struct ppa_addr ppa; - int entry_size = 8; + size_t map_size; - if (pblk->ppaf_bitsize < 32) - entry_size = 4; - - pblk->trans_map = vmalloc(entry_size * pblk->rl.nr_secs); + map_size = pblk_trans_map_size(pblk); + pblk->trans_map = vmalloc(map_size); if (!pblk->trans_map) return -ENOMEM; @@ -132,7 +152,6 @@ static int pblk_rwb_init(struct pblk *pblk) } /* Minimum pages needed within a lun */ -#define PAGE_POOL_SIZE 16 #define ADDR_POOL_SIZE 64 static int pblk_set_ppaf(struct pblk *pblk) @@ -182,12 +201,10 @@ static int pblk_set_ppaf(struct pblk *pblk) static int pblk_init_global_caches(struct pblk *pblk) { - char cache_name[PBLK_CACHE_NAME_LEN]; - down_write(&pblk_lock); - pblk_blk_ws_cache = kmem_cache_create("pblk_blk_ws", + pblk_ws_cache = kmem_cache_create("pblk_blk_ws", sizeof(struct pblk_line_ws), 0, 0, NULL); - if (!pblk_blk_ws_cache) { + if (!pblk_ws_cache) { up_write(&pblk_lock); return -ENOMEM; } @@ -195,7 +212,7 @@ static int pblk_init_global_caches(struct pblk *pblk) pblk_rec_cache = kmem_cache_create("pblk_rec", sizeof(struct pblk_rec_ctx), 0, 0, NULL); if (!pblk_rec_cache) { - kmem_cache_destroy(pblk_blk_ws_cache); + kmem_cache_destroy(pblk_ws_cache); up_write(&pblk_lock); return -ENOMEM; } @@ -203,7 +220,7 @@ static int pblk_init_global_caches(struct pblk *pblk) pblk_g_rq_cache = kmem_cache_create("pblk_g_rq", pblk_g_rq_size, 0, 0, NULL); if (!pblk_g_rq_cache) { - kmem_cache_destroy(pblk_blk_ws_cache); + kmem_cache_destroy(pblk_ws_cache); kmem_cache_destroy(pblk_rec_cache); up_write(&pblk_lock); return -ENOMEM; @@ -212,30 +229,25 @@ static int pblk_init_global_caches(struct pblk *pblk) pblk_w_rq_cache = kmem_cache_create("pblk_w_rq", pblk_w_rq_size, 0, 0, NULL); if (!pblk_w_rq_cache) { - kmem_cache_destroy(pblk_blk_ws_cache); + kmem_cache_destroy(pblk_ws_cache); kmem_cache_destroy(pblk_rec_cache); kmem_cache_destroy(pblk_g_rq_cache); up_write(&pblk_lock); return -ENOMEM; } - - snprintf(cache_name, sizeof(cache_name), "pblk_line_m_%s", - pblk->disk->disk_name); - pblk_line_meta_cache = kmem_cache_create(cache_name, - pblk->lm.sec_bitmap_len, 0, 0, NULL); - if (!pblk_line_meta_cache) { - kmem_cache_destroy(pblk_blk_ws_cache); - kmem_cache_destroy(pblk_rec_cache); - kmem_cache_destroy(pblk_g_rq_cache); - kmem_cache_destroy(pblk_w_rq_cache); - up_write(&pblk_lock); - return -ENOMEM; - } up_write(&pblk_lock); return 0; } +static void pblk_free_global_caches(struct pblk *pblk) +{ + kmem_cache_destroy(pblk_ws_cache); + kmem_cache_destroy(pblk_rec_cache); + kmem_cache_destroy(pblk_g_rq_cache); + kmem_cache_destroy(pblk_w_rq_cache); +} + static int pblk_core_init(struct pblk *pblk) { struct nvm_tgt_dev *dev = pblk->dev; @@ -247,70 +259,80 @@ static int pblk_core_init(struct pblk *pblk) if (pblk_init_global_caches(pblk)) return -ENOMEM; - pblk->page_pool = mempool_create_page_pool(PAGE_POOL_SIZE, 0); - if (!pblk->page_pool) - return -ENOMEM; + /* Internal bios can be at most the sectors signaled by the device. */ + pblk->page_bio_pool = mempool_create_page_pool(nvm_max_phys_sects(dev), + 0); + if (!pblk->page_bio_pool) + goto free_global_caches; - pblk->line_ws_pool = mempool_create_slab_pool(PBLK_WS_POOL_SIZE, - pblk_blk_ws_cache); - if (!pblk->line_ws_pool) - goto free_page_pool; + pblk->gen_ws_pool = mempool_create_slab_pool(PBLK_GEN_WS_POOL_SIZE, + pblk_ws_cache); + if (!pblk->gen_ws_pool) + goto free_page_bio_pool; pblk->rec_pool = mempool_create_slab_pool(geo->nr_luns, pblk_rec_cache); if (!pblk->rec_pool) - goto free_blk_ws_pool; + goto free_gen_ws_pool; - pblk->g_rq_pool = mempool_create_slab_pool(PBLK_READ_REQ_POOL_SIZE, + pblk->r_rq_pool = mempool_create_slab_pool(geo->nr_luns, pblk_g_rq_cache); - if (!pblk->g_rq_pool) + if (!pblk->r_rq_pool) goto free_rec_pool; - pblk->w_rq_pool = mempool_create_slab_pool(geo->nr_luns * 2, + pblk->e_rq_pool = mempool_create_slab_pool(geo->nr_luns, + pblk_g_rq_cache); + if (!pblk->e_rq_pool) + goto free_r_rq_pool; + + pblk->w_rq_pool = mempool_create_slab_pool(geo->nr_luns, pblk_w_rq_cache); if (!pblk->w_rq_pool) - goto free_g_rq_pool; - - pblk->line_meta_pool = - mempool_create_slab_pool(PBLK_META_POOL_SIZE, - pblk_line_meta_cache); - if (!pblk->line_meta_pool) - goto free_w_rq_pool; + goto free_e_rq_pool; pblk->close_wq = alloc_workqueue("pblk-close-wq", WQ_MEM_RECLAIM | WQ_UNBOUND, PBLK_NR_CLOSE_JOBS); if (!pblk->close_wq) - goto free_line_meta_pool; + goto free_w_rq_pool; pblk->bb_wq = alloc_workqueue("pblk-bb-wq", WQ_MEM_RECLAIM | WQ_UNBOUND, 0); if (!pblk->bb_wq) goto free_close_wq; - if (pblk_set_ppaf(pblk)) + pblk->r_end_wq = alloc_workqueue("pblk-read-end-wq", + WQ_MEM_RECLAIM | WQ_UNBOUND, 0); + if (!pblk->r_end_wq) goto free_bb_wq; + if (pblk_set_ppaf(pblk)) + goto free_r_end_wq; + if (pblk_rwb_init(pblk)) - goto free_bb_wq; + goto free_r_end_wq; INIT_LIST_HEAD(&pblk->compl_list); return 0; +free_r_end_wq: + destroy_workqueue(pblk->r_end_wq); free_bb_wq: destroy_workqueue(pblk->bb_wq); free_close_wq: destroy_workqueue(pblk->close_wq); -free_line_meta_pool: - mempool_destroy(pblk->line_meta_pool); free_w_rq_pool: mempool_destroy(pblk->w_rq_pool); -free_g_rq_pool: - mempool_destroy(pblk->g_rq_pool); +free_e_rq_pool: + mempool_destroy(pblk->e_rq_pool); +free_r_rq_pool: + mempool_destroy(pblk->r_rq_pool); free_rec_pool: mempool_destroy(pblk->rec_pool); -free_blk_ws_pool: - mempool_destroy(pblk->line_ws_pool); -free_page_pool: - mempool_destroy(pblk->page_pool); +free_gen_ws_pool: + mempool_destroy(pblk->gen_ws_pool); +free_page_bio_pool: + mempool_destroy(pblk->page_bio_pool); +free_global_caches: + pblk_free_global_caches(pblk); return -ENOMEM; } @@ -319,21 +341,20 @@ static void pblk_core_free(struct pblk *pblk) if (pblk->close_wq) destroy_workqueue(pblk->close_wq); + if (pblk->r_end_wq) + destroy_workqueue(pblk->r_end_wq); + if (pblk->bb_wq) destroy_workqueue(pblk->bb_wq); - mempool_destroy(pblk->page_pool); - mempool_destroy(pblk->line_ws_pool); + mempool_destroy(pblk->page_bio_pool); + mempool_destroy(pblk->gen_ws_pool); mempool_destroy(pblk->rec_pool); - mempool_destroy(pblk->g_rq_pool); + mempool_destroy(pblk->r_rq_pool); + mempool_destroy(pblk->e_rq_pool); mempool_destroy(pblk->w_rq_pool); - mempool_destroy(pblk->line_meta_pool); - kmem_cache_destroy(pblk_blk_ws_cache); - kmem_cache_destroy(pblk_rec_cache); - kmem_cache_destroy(pblk_g_rq_cache); - kmem_cache_destroy(pblk_w_rq_cache); - kmem_cache_destroy(pblk_line_meta_cache); + pblk_free_global_caches(pblk); } static void pblk_luns_free(struct pblk *pblk) @@ -372,13 +393,11 @@ static void pblk_line_meta_free(struct pblk *pblk) kfree(l_mg->bb_aux); kfree(l_mg->vsc_list); - spin_lock(&l_mg->free_lock); for (i = 0; i < PBLK_DATA_LINES; i++) { kfree(l_mg->sline_meta[i]); pblk_mfree(l_mg->eline_meta[i]->buf, l_mg->emeta_alloc_type); kfree(l_mg->eline_meta[i]); } - spin_unlock(&l_mg->free_lock); kfree(pblk->lines); } @@ -507,6 +526,13 @@ static int pblk_lines_configure(struct pblk *pblk, int flags) } } +#ifdef CONFIG_NVM_DEBUG + pr_info("pblk init: L2P CRC: %x\n", pblk_l2p_crc(pblk)); +#endif + + /* Free full lines directly as GC has not been started yet */ + pblk_gc_free_full_lines(pblk); + if (!line) { /* Configure next line for user data */ line = pblk_line_get_first_data(pblk); @@ -630,7 +656,10 @@ static int pblk_lines_alloc_metadata(struct pblk *pblk) fail_free_emeta: while (--i >= 0) { - vfree(l_mg->eline_meta[i]->buf); + if (l_mg->emeta_alloc_type == PBLK_VMALLOC_META) + vfree(l_mg->eline_meta[i]->buf); + else + kfree(l_mg->eline_meta[i]->buf); kfree(l_mg->eline_meta[i]); } @@ -681,8 +710,8 @@ static int pblk_lines_init(struct pblk *pblk) lm->blk_bitmap_len = BITS_TO_LONGS(geo->nr_luns) * sizeof(long); lm->sec_bitmap_len = BITS_TO_LONGS(lm->sec_per_line) * sizeof(long); lm->lun_bitmap_len = BITS_TO_LONGS(geo->nr_luns) * sizeof(long); - lm->high_thrs = lm->sec_per_line / 2; - lm->mid_thrs = lm->sec_per_line / 4; + lm->mid_thrs = lm->sec_per_line / 2; + lm->high_thrs = lm->sec_per_line / 4; lm->meta_distance = (geo->nr_luns / 2) * pblk->min_write_pgs; /* Calculate necessary pages for smeta. See comment over struct @@ -713,9 +742,13 @@ add_emeta_page: goto add_emeta_page; } - lm->emeta_bb = geo->nr_luns - i; - lm->min_blk_line = 1 + DIV_ROUND_UP(lm->smeta_sec + lm->emeta_sec[0], - geo->sec_per_blk); + lm->emeta_bb = geo->nr_luns > i ? geo->nr_luns - i : 0; + + lm->min_blk_line = 1; + if (geo->nr_luns > 1) + lm->min_blk_line += DIV_ROUND_UP(lm->smeta_sec + + lm->emeta_sec[0], geo->sec_per_blk); + if (lm->min_blk_line > lm->blk_per_line) { pr_err("pblk: config. not supported. Min. LUN in line:%d\n", lm->blk_per_line); @@ -890,6 +923,11 @@ static void pblk_exit(void *private) down_write(&pblk_lock); pblk_gc_exit(pblk); pblk_tear_down(pblk); + +#ifdef CONFIG_NVM_DEBUG + pr_info("pblk exit: L2P CRC: %x\n", pblk_l2p_crc(pblk)); +#endif + pblk_free(pblk); up_write(&pblk_lock); } @@ -911,7 +949,7 @@ static void *pblk_init(struct nvm_tgt_dev *dev, struct gendisk *tdisk, int ret; if (dev->identity.dom & NVM_RSP_L2P) { - pr_err("pblk: device-side L2P table not supported. (%x)\n", + pr_err("pblk: host-side L2P table not supported. (%x)\n", dev->identity.dom); return ERR_PTR(-EINVAL); } @@ -923,6 +961,7 @@ static void *pblk_init(struct nvm_tgt_dev *dev, struct gendisk *tdisk, pblk->dev = dev; pblk->disk = tdisk; pblk->state = PBLK_STATE_RUNNING; + pblk->gc.gc_enabled = 0; spin_lock_init(&pblk->trans_lock); spin_lock_init(&pblk->lock); @@ -944,6 +983,7 @@ static void *pblk_init(struct nvm_tgt_dev *dev, struct gendisk *tdisk, atomic_long_set(&pblk->recov_writes, 0); atomic_long_set(&pblk->recov_writes, 0); atomic_long_set(&pblk->recov_gc_writes, 0); + atomic_long_set(&pblk->recov_gc_reads, 0); #endif atomic_long_set(&pblk->read_failed, 0); @@ -1012,6 +1052,10 @@ static void *pblk_init(struct nvm_tgt_dev *dev, struct gendisk *tdisk, pblk->rwb.nr_entries); wake_up_process(pblk->writer_ts); + + /* Check if we need to start GC */ + pblk_gc_should_kick(pblk); + return pblk; fail_stop_writer: @@ -1044,6 +1088,7 @@ static struct nvm_tgt_type tt_pblk = { .sysfs_init = pblk_sysfs_init, .sysfs_exit = pblk_sysfs_exit, + .owner = THIS_MODULE, }; static int __init pblk_module_init(void) diff --git a/drivers/lightnvm/pblk-map.c b/drivers/lightnvm/pblk-map.c index fddb924f6dde..6f3ecde2140f 100644 --- a/drivers/lightnvm/pblk-map.c +++ b/drivers/lightnvm/pblk-map.c @@ -25,16 +25,28 @@ static void pblk_map_page_data(struct pblk *pblk, unsigned int sentry, unsigned int valid_secs) { struct pblk_line *line = pblk_line_get_data(pblk); - struct pblk_emeta *emeta = line->emeta; + struct pblk_emeta *emeta; struct pblk_w_ctx *w_ctx; - __le64 *lba_list = emeta_to_lbas(pblk, emeta->buf); + __le64 *lba_list; u64 paddr; int nr_secs = pblk->min_write_pgs; int i; + if (pblk_line_is_full(line)) { + struct pblk_line *prev_line = line; + + line = pblk_line_replace_data(pblk); + pblk_line_close_meta(pblk, prev_line); + } + + emeta = line->emeta; + lba_list = emeta_to_lbas(pblk, emeta->buf); + paddr = pblk_alloc_page(pblk, line, nr_secs); for (i = 0; i < nr_secs; i++, paddr++) { + __le64 addr_empty = cpu_to_le64(ADDR_EMPTY); + /* ppa to be sent to the device */ ppa_list[i] = addr_to_gen_ppa(pblk, paddr, line->id); @@ -51,22 +63,14 @@ static void pblk_map_page_data(struct pblk *pblk, unsigned int sentry, w_ctx->ppa = ppa_list[i]; meta_list[i].lba = cpu_to_le64(w_ctx->lba); lba_list[paddr] = cpu_to_le64(w_ctx->lba); - line->nr_valid_lbas++; + if (lba_list[paddr] != addr_empty) + line->nr_valid_lbas++; } else { - __le64 addr_empty = cpu_to_le64(ADDR_EMPTY); - lba_list[paddr] = meta_list[i].lba = addr_empty; __pblk_map_invalidate(pblk, line, paddr); } } - if (pblk_line_is_full(line)) { - struct pblk_line *prev_line = line; - - pblk_line_replace_data(pblk); - pblk_line_close_meta(pblk, prev_line); - } - pblk_down_rq(pblk, ppa_list, nr_secs, lun_bitmap); } diff --git a/drivers/lightnvm/pblk-rb.c b/drivers/lightnvm/pblk-rb.c index 9bc32578a766..b8f78e401482 100644 --- a/drivers/lightnvm/pblk-rb.c +++ b/drivers/lightnvm/pblk-rb.c @@ -201,8 +201,7 @@ unsigned int pblk_rb_read_commit(struct pblk_rb *rb, unsigned int nr_entries) return subm; } -static int __pblk_rb_update_l2p(struct pblk_rb *rb, unsigned int *l2p_upd, - unsigned int to_update) +static int __pblk_rb_update_l2p(struct pblk_rb *rb, unsigned int to_update) { struct pblk *pblk = container_of(rb, struct pblk, rwb); struct pblk_line *line; @@ -213,7 +212,7 @@ static int __pblk_rb_update_l2p(struct pblk_rb *rb, unsigned int *l2p_upd, int flags; for (i = 0; i < to_update; i++) { - entry = &rb->entries[*l2p_upd]; + entry = &rb->entries[rb->l2p_update]; w_ctx = &entry->w_ctx; flags = READ_ONCE(entry->w_ctx.flags); @@ -230,7 +229,7 @@ static int __pblk_rb_update_l2p(struct pblk_rb *rb, unsigned int *l2p_upd, line = &pblk->lines[pblk_tgt_ppa_to_line(w_ctx->ppa)]; kref_put(&line->ref, pblk_line_put); clean_wctx(w_ctx); - *l2p_upd = (*l2p_upd + 1) & (rb->nr_entries - 1); + rb->l2p_update = (rb->l2p_update + 1) & (rb->nr_entries - 1); } pblk_rl_out(&pblk->rl, user_io, gc_io); @@ -258,7 +257,7 @@ static int pblk_rb_update_l2p(struct pblk_rb *rb, unsigned int nr_entries, count = nr_entries - space; /* l2p_update used exclusively under rb->w_lock */ - ret = __pblk_rb_update_l2p(rb, &rb->l2p_update, count); + ret = __pblk_rb_update_l2p(rb, count); out: return ret; @@ -280,7 +279,7 @@ void pblk_rb_sync_l2p(struct pblk_rb *rb) sync = smp_load_acquire(&rb->sync); to_update = pblk_rb_ring_count(sync, rb->l2p_update, rb->nr_entries); - __pblk_rb_update_l2p(rb, &rb->l2p_update, to_update); + __pblk_rb_update_l2p(rb, to_update); spin_unlock(&rb->w_lock); } @@ -325,8 +324,8 @@ void pblk_rb_write_entry_user(struct pblk_rb *rb, void *data, } void pblk_rb_write_entry_gc(struct pblk_rb *rb, void *data, - struct pblk_w_ctx w_ctx, struct pblk_line *gc_line, - unsigned int ring_pos) + struct pblk_w_ctx w_ctx, struct pblk_line *line, + u64 paddr, unsigned int ring_pos) { struct pblk *pblk = container_of(rb, struct pblk, rwb); struct pblk_rb_entry *entry; @@ -341,7 +340,7 @@ void pblk_rb_write_entry_gc(struct pblk_rb *rb, void *data, __pblk_rb_write_entry(rb, data, w_ctx, entry); - if (!pblk_update_map_gc(pblk, w_ctx.lba, entry->cacheline, gc_line)) + if (!pblk_update_map_gc(pblk, w_ctx.lba, entry->cacheline, line, paddr)) entry->w_ctx.lba = ADDR_EMPTY; flags = w_ctx.flags | PBLK_WRITTEN_DATA; @@ -355,7 +354,6 @@ static int pblk_rb_sync_point_set(struct pblk_rb *rb, struct bio *bio, { struct pblk_rb_entry *entry; unsigned int subm, sync_point; - int flags; subm = READ_ONCE(rb->subm); @@ -369,12 +367,6 @@ static int pblk_rb_sync_point_set(struct pblk_rb *rb, struct bio *bio, sync_point = (pos == 0) ? (rb->nr_entries - 1) : (pos - 1); entry = &rb->entries[sync_point]; - flags = READ_ONCE(entry->w_ctx.flags); - flags |= PBLK_FLUSH_ENTRY; - - /* Release flags on context. Protect from writes */ - smp_store_release(&entry->w_ctx.flags, flags); - /* Protect syncs */ smp_store_release(&rb->sync_point, sync_point); @@ -454,6 +446,7 @@ static int pblk_rb_may_write_flush(struct pblk_rb *rb, unsigned int nr_entries, /* Protect from read count */ smp_store_release(&rb->mem, mem); + return 1; } @@ -558,12 +551,13 @@ out: * persist data on the write buffer to the media. */ unsigned int pblk_rb_read_to_bio(struct pblk_rb *rb, struct nvm_rq *rqd, - struct bio *bio, unsigned int pos, - unsigned int nr_entries, unsigned int count) + unsigned int pos, unsigned int nr_entries, + unsigned int count) { struct pblk *pblk = container_of(rb, struct pblk, rwb); struct request_queue *q = pblk->dev->q; struct pblk_c_ctx *c_ctx = nvm_rq_to_pdu(rqd); + struct bio *bio = rqd->bio; struct pblk_rb_entry *entry; struct page *page; unsigned int pad = 0, to_read = nr_entries; diff --git a/drivers/lightnvm/pblk-read.c b/drivers/lightnvm/pblk-read.c index d682e89e6493..ca79d8fb3e60 100644 --- a/drivers/lightnvm/pblk-read.c +++ b/drivers/lightnvm/pblk-read.c @@ -39,21 +39,15 @@ static int pblk_read_from_cache(struct pblk *pblk, struct bio *bio, } static void pblk_read_ppalist_rq(struct pblk *pblk, struct nvm_rq *rqd, - unsigned long *read_bitmap) + sector_t blba, unsigned long *read_bitmap) { + struct pblk_sec_meta *meta_list = rqd->meta_list; struct bio *bio = rqd->bio; struct ppa_addr ppas[PBLK_MAX_REQ_ADDRS]; - sector_t blba = pblk_get_lba(bio); int nr_secs = rqd->nr_ppas; bool advanced_bio = false; int i, j = 0; - /* logic error: lba out-of-bounds. Ignore read request */ - if (blba + nr_secs >= pblk->rl.nr_secs) { - WARN(1, "pblk: read lbas out of bounds\n"); - return; - } - pblk_lookup_l2p_seq(pblk, ppas, blba, nr_secs); for (i = 0; i < nr_secs; i++) { @@ -63,6 +57,7 @@ static void pblk_read_ppalist_rq(struct pblk *pblk, struct nvm_rq *rqd, retry: if (pblk_ppa_empty(p)) { WARN_ON(test_and_set_bit(i, read_bitmap)); + meta_list[i].lba = cpu_to_le64(ADDR_EMPTY); if (unlikely(!advanced_bio)) { bio_advance(bio, (i) * PBLK_EXPOSED_PAGE_SIZE); @@ -82,6 +77,7 @@ retry: goto retry; } WARN_ON(test_and_set_bit(i, read_bitmap)); + meta_list[i].lba = cpu_to_le64(lba); advanced_bio = true; #ifdef CONFIG_NVM_DEBUG atomic_long_inc(&pblk->cache_reads); @@ -117,10 +113,51 @@ static int pblk_submit_read_io(struct pblk *pblk, struct nvm_rq *rqd) return NVM_IO_OK; } -static void pblk_end_io_read(struct nvm_rq *rqd) +static void pblk_read_check(struct pblk *pblk, struct nvm_rq *rqd, + sector_t blba) +{ + struct pblk_sec_meta *meta_list = rqd->meta_list; + int nr_lbas = rqd->nr_ppas; + int i; + + for (i = 0; i < nr_lbas; i++) { + u64 lba = le64_to_cpu(meta_list[i].lba); + + if (lba == ADDR_EMPTY) + continue; + + WARN(lba != blba + i, "pblk: corrupted read LBA\n"); + } +} + +static void pblk_read_put_rqd_kref(struct pblk *pblk, struct nvm_rq *rqd) +{ + struct ppa_addr *ppa_list; + int i; + + ppa_list = (rqd->nr_ppas > 1) ? rqd->ppa_list : &rqd->ppa_addr; + + for (i = 0; i < rqd->nr_ppas; i++) { + struct ppa_addr ppa = ppa_list[i]; + struct pblk_line *line; + + line = &pblk->lines[pblk_dev_ppa_to_line(ppa)]; + kref_put(&line->ref, pblk_line_put_wq); + } +} + +static void pblk_end_user_read(struct bio *bio) +{ +#ifdef CONFIG_NVM_DEBUG + WARN_ONCE(bio->bi_status, "pblk: corrupted read bio\n"); +#endif + bio_endio(bio); + bio_put(bio); +} + +static void __pblk_end_io_read(struct pblk *pblk, struct nvm_rq *rqd, + bool put_line) { - struct pblk *pblk = rqd->private; - struct nvm_tgt_dev *dev = pblk->dev; struct pblk_g_ctx *r_ctx = nvm_rq_to_pdu(rqd); struct bio *bio = rqd->bio; @@ -131,47 +168,51 @@ static void pblk_end_io_read(struct nvm_rq *rqd) WARN_ONCE(bio->bi_status, "pblk: corrupted read error\n"); #endif - nvm_dev_dma_free(dev->parent, rqd->meta_list, rqd->dma_meta_list); + pblk_read_check(pblk, rqd, r_ctx->lba); bio_put(bio); - if (r_ctx->private) { - struct bio *orig_bio = r_ctx->private; + if (r_ctx->private) + pblk_end_user_read((struct bio *)r_ctx->private); -#ifdef CONFIG_NVM_DEBUG - WARN_ONCE(orig_bio->bi_status, "pblk: corrupted read bio\n"); -#endif - bio_endio(orig_bio); - bio_put(orig_bio); - } + if (put_line) + pblk_read_put_rqd_kref(pblk, rqd); #ifdef CONFIG_NVM_DEBUG atomic_long_add(rqd->nr_ppas, &pblk->sync_reads); atomic_long_sub(rqd->nr_ppas, &pblk->inflight_reads); #endif - pblk_free_rqd(pblk, rqd, READ); + pblk_free_rqd(pblk, rqd, PBLK_READ); atomic_dec(&pblk->inflight_io); } +static void pblk_end_io_read(struct nvm_rq *rqd) +{ + struct pblk *pblk = rqd->private; + + __pblk_end_io_read(pblk, rqd, true); +} + static int pblk_fill_partial_read_bio(struct pblk *pblk, struct nvm_rq *rqd, unsigned int bio_init_idx, unsigned long *read_bitmap) { struct bio *new_bio, *bio = rqd->bio; + struct pblk_sec_meta *meta_list = rqd->meta_list; struct bio_vec src_bv, dst_bv; void *ppa_ptr = NULL; void *src_p, *dst_p; dma_addr_t dma_ppa_list = 0; + __le64 *lba_list_mem, *lba_list_media; int nr_secs = rqd->nr_ppas; int nr_holes = nr_secs - bitmap_weight(read_bitmap, nr_secs); int i, ret, hole; - DECLARE_COMPLETION_ONSTACK(wait); + + /* Re-use allocated memory for intermediate lbas */ + lba_list_mem = (((void *)rqd->ppa_list) + pblk_dma_ppa_size); + lba_list_media = (((void *)rqd->ppa_list) + 2 * pblk_dma_ppa_size); new_bio = bio_alloc(GFP_KERNEL, nr_holes); - if (!new_bio) { - pr_err("pblk: could not alloc read bio\n"); - return NVM_IO_ERR; - } if (pblk_bio_add_pages(pblk, new_bio, GFP_KERNEL, nr_holes)) goto err; @@ -181,34 +222,29 @@ static int pblk_fill_partial_read_bio(struct pblk *pblk, struct nvm_rq *rqd, goto err; } + for (i = 0; i < nr_secs; i++) + lba_list_mem[i] = meta_list[i].lba; + new_bio->bi_iter.bi_sector = 0; /* internal bio */ bio_set_op_attrs(new_bio, REQ_OP_READ, 0); - new_bio->bi_private = &wait; - new_bio->bi_end_io = pblk_end_bio_sync; rqd->bio = new_bio; rqd->nr_ppas = nr_holes; rqd->flags = pblk_set_read_mode(pblk, PBLK_READ_RANDOM); - rqd->end_io = NULL; - if (unlikely(nr_secs > 1 && nr_holes == 1)) { + if (unlikely(nr_holes == 1)) { ppa_ptr = rqd->ppa_list; dma_ppa_list = rqd->dma_ppa_list; rqd->ppa_addr = rqd->ppa_list[0]; } - ret = pblk_submit_read_io(pblk, rqd); + ret = pblk_submit_io_sync(pblk, rqd); if (ret) { bio_put(rqd->bio); - pr_err("pblk: read IO submission failed\n"); + pr_err("pblk: sync read IO submission failed\n"); goto err; } - if (!wait_for_completion_io_timeout(&wait, - msecs_to_jiffies(PBLK_COMMAND_TIMEOUT_MS))) { - pr_err("pblk: partial read I/O timed out\n"); - } - if (rqd->error) { atomic_long_inc(&pblk->read_failed); #ifdef CONFIG_NVM_DEBUG @@ -216,15 +252,31 @@ static int pblk_fill_partial_read_bio(struct pblk *pblk, struct nvm_rq *rqd, #endif } - if (unlikely(nr_secs > 1 && nr_holes == 1)) { + if (unlikely(nr_holes == 1)) { + struct ppa_addr ppa; + + ppa = rqd->ppa_addr; rqd->ppa_list = ppa_ptr; rqd->dma_ppa_list = dma_ppa_list; + rqd->ppa_list[0] = ppa; + } + + for (i = 0; i < nr_secs; i++) { + lba_list_media[i] = meta_list[i].lba; + meta_list[i].lba = lba_list_mem[i]; } /* Fill the holes in the original bio */ i = 0; hole = find_first_zero_bit(read_bitmap, nr_secs); do { + int line_id = pblk_dev_ppa_to_line(rqd->ppa_list[i]); + struct pblk_line *line = &pblk->lines[line_id]; + + kref_put(&line->ref, pblk_line_put); + + meta_list[hole].lba = lba_list_media[i]; + src_bv = new_bio->bi_io_vec[i++]; dst_bv = bio->bi_io_vec[bio_init_idx + hole]; @@ -238,7 +290,7 @@ static int pblk_fill_partial_read_bio(struct pblk *pblk, struct nvm_rq *rqd, kunmap_atomic(src_p); kunmap_atomic(dst_p); - mempool_free(src_bv.bv_page, pblk->page_pool); + mempool_free(src_bv.bv_page, pblk->page_bio_pool); hole = find_next_zero_bit(read_bitmap, nr_secs, hole + 1); } while (hole < nr_secs); @@ -246,34 +298,26 @@ static int pblk_fill_partial_read_bio(struct pblk *pblk, struct nvm_rq *rqd, bio_put(new_bio); /* Complete the original bio and associated request */ + bio_endio(bio); rqd->bio = bio; rqd->nr_ppas = nr_secs; - rqd->private = pblk; - bio_endio(bio); - pblk_end_io_read(rqd); + __pblk_end_io_read(pblk, rqd, false); return NVM_IO_OK; err: /* Free allocated pages in new bio */ pblk_bio_free_pages(pblk, bio, 0, new_bio->bi_vcnt); - rqd->private = pblk; - pblk_end_io_read(rqd); + __pblk_end_io_read(pblk, rqd, false); return NVM_IO_ERR; } static void pblk_read_rq(struct pblk *pblk, struct nvm_rq *rqd, - unsigned long *read_bitmap) + sector_t lba, unsigned long *read_bitmap) { + struct pblk_sec_meta *meta_list = rqd->meta_list; struct bio *bio = rqd->bio; struct ppa_addr ppa; - sector_t lba = pblk_get_lba(bio); - - /* logic error: lba out-of-bounds. Ignore read request */ - if (lba >= pblk->rl.nr_secs) { - WARN(1, "pblk: read lba out of bounds\n"); - return; - } pblk_lookup_l2p_seq(pblk, &ppa, lba, 1); @@ -284,6 +328,7 @@ static void pblk_read_rq(struct pblk *pblk, struct nvm_rq *rqd, retry: if (pblk_ppa_empty(ppa)) { WARN_ON(test_and_set_bit(0, read_bitmap)); + meta_list[0].lba = cpu_to_le64(ADDR_EMPTY); return; } @@ -295,9 +340,12 @@ retry: pblk_lookup_l2p_seq(pblk, &ppa, lba, 1); goto retry; } + WARN_ON(test_and_set_bit(0, read_bitmap)); + meta_list[0].lba = cpu_to_le64(lba); + #ifdef CONFIG_NVM_DEBUG - atomic_long_inc(&pblk->cache_reads); + atomic_long_inc(&pblk->cache_reads); #endif } else { rqd->ppa_addr = ppa; @@ -309,22 +357,24 @@ retry: int pblk_submit_read(struct pblk *pblk, struct bio *bio) { struct nvm_tgt_dev *dev = pblk->dev; + sector_t blba = pblk_get_lba(bio); unsigned int nr_secs = pblk_get_secs(bio); + struct pblk_g_ctx *r_ctx; struct nvm_rq *rqd; - unsigned long read_bitmap; /* Max 64 ppas per request */ unsigned int bio_init_idx; + unsigned long read_bitmap; /* Max 64 ppas per request */ int ret = NVM_IO_ERR; - if (nr_secs > PBLK_MAX_REQ_ADDRS) + /* logic error: lba out-of-bounds. Ignore read request */ + if (blba >= pblk->rl.nr_secs || nr_secs > PBLK_MAX_REQ_ADDRS) { + WARN(1, "pblk: read lba out of bounds (lba:%llu, nr:%d)\n", + (unsigned long long)blba, nr_secs); return NVM_IO_ERR; + } bitmap_zero(&read_bitmap, nr_secs); - rqd = pblk_alloc_rqd(pblk, READ); - if (IS_ERR(rqd)) { - pr_err_ratelimited("pblk: not able to alloc rqd"); - return NVM_IO_ERR; - } + rqd = pblk_alloc_rqd(pblk, PBLK_READ); rqd->opcode = NVM_OP_PREAD; rqd->bio = bio; @@ -332,6 +382,9 @@ int pblk_submit_read(struct pblk *pblk, struct bio *bio) rqd->private = pblk; rqd->end_io = pblk_end_io_read; + r_ctx = nvm_rq_to_pdu(rqd); + r_ctx->lba = blba; + /* Save the index for this bio's start. This is needed in case * we need to fill a partial read. */ @@ -348,23 +401,22 @@ int pblk_submit_read(struct pblk *pblk, struct bio *bio) rqd->ppa_list = rqd->meta_list + pblk_dma_meta_size; rqd->dma_ppa_list = rqd->dma_meta_list + pblk_dma_meta_size; - pblk_read_ppalist_rq(pblk, rqd, &read_bitmap); + pblk_read_ppalist_rq(pblk, rqd, blba, &read_bitmap); } else { - pblk_read_rq(pblk, rqd, &read_bitmap); + pblk_read_rq(pblk, rqd, blba, &read_bitmap); } bio_get(bio); if (bitmap_full(&read_bitmap, nr_secs)) { bio_endio(bio); atomic_inc(&pblk->inflight_io); - pblk_end_io_read(rqd); + __pblk_end_io_read(pblk, rqd, false); return NVM_IO_OK; } /* All sectors are to be read from the device */ if (bitmap_empty(&read_bitmap, rqd->nr_ppas)) { struct bio *int_bio = NULL; - struct pblk_g_ctx *r_ctx = nvm_rq_to_pdu(rqd); /* Clone read bio to deal with read errors internally */ int_bio = bio_clone_fast(bio, GFP_KERNEL, pblk_bio_set); @@ -399,40 +451,46 @@ int pblk_submit_read(struct pblk *pblk, struct bio *bio) return NVM_IO_OK; fail_rqd_free: - pblk_free_rqd(pblk, rqd, READ); + pblk_free_rqd(pblk, rqd, PBLK_READ); return ret; } static int read_ppalist_rq_gc(struct pblk *pblk, struct nvm_rq *rqd, struct pblk_line *line, u64 *lba_list, - unsigned int nr_secs) + u64 *paddr_list_gc, unsigned int nr_secs) { - struct ppa_addr ppas[PBLK_MAX_REQ_ADDRS]; + struct ppa_addr ppa_list_l2p[PBLK_MAX_REQ_ADDRS]; + struct ppa_addr ppa_gc; int valid_secs = 0; int i; - pblk_lookup_l2p_rand(pblk, ppas, lba_list, nr_secs); + pblk_lookup_l2p_rand(pblk, ppa_list_l2p, lba_list, nr_secs); for (i = 0; i < nr_secs; i++) { - if (pblk_addr_in_cache(ppas[i]) || ppas[i].g.blk != line->id || - pblk_ppa_empty(ppas[i])) { - lba_list[i] = ADDR_EMPTY; + if (lba_list[i] == ADDR_EMPTY) + continue; + + ppa_gc = addr_to_gen_ppa(pblk, paddr_list_gc[i], line->id); + if (!pblk_ppa_comp(ppa_list_l2p[i], ppa_gc)) { + paddr_list_gc[i] = lba_list[i] = ADDR_EMPTY; continue; } - rqd->ppa_list[valid_secs++] = ppas[i]; + rqd->ppa_list[valid_secs++] = ppa_list_l2p[i]; } #ifdef CONFIG_NVM_DEBUG atomic_long_add(valid_secs, &pblk->inflight_reads); #endif + return valid_secs; } static int read_rq_gc(struct pblk *pblk, struct nvm_rq *rqd, - struct pblk_line *line, sector_t lba) + struct pblk_line *line, sector_t lba, + u64 paddr_gc) { - struct ppa_addr ppa; + struct ppa_addr ppa_l2p, ppa_gc; int valid_secs = 0; if (lba == ADDR_EMPTY) @@ -445,15 +503,14 @@ static int read_rq_gc(struct pblk *pblk, struct nvm_rq *rqd, } spin_lock(&pblk->trans_lock); - ppa = pblk_trans_map_get(pblk, lba); + ppa_l2p = pblk_trans_map_get(pblk, lba); spin_unlock(&pblk->trans_lock); - /* Ignore updated values until the moment */ - if (pblk_addr_in_cache(ppa) || ppa.g.blk != line->id || - pblk_ppa_empty(ppa)) + ppa_gc = addr_to_gen_ppa(pblk, paddr_gc, line->id); + if (!pblk_ppa_comp(ppa_l2p, ppa_gc)) goto out; - rqd->ppa_addr = ppa; + rqd->ppa_addr = ppa_l2p; valid_secs = 1; #ifdef CONFIG_NVM_DEBUG @@ -464,42 +521,44 @@ out: return valid_secs; } -int pblk_submit_read_gc(struct pblk *pblk, u64 *lba_list, void *data, - unsigned int nr_secs, unsigned int *secs_to_gc, - struct pblk_line *line) +int pblk_submit_read_gc(struct pblk *pblk, struct pblk_gc_rq *gc_rq) { struct nvm_tgt_dev *dev = pblk->dev; struct nvm_geo *geo = &dev->geo; struct bio *bio; struct nvm_rq rqd; - int ret, data_len; - DECLARE_COMPLETION_ONSTACK(wait); + int data_len; + int ret = NVM_IO_OK; memset(&rqd, 0, sizeof(struct nvm_rq)); rqd.meta_list = nvm_dev_dma_alloc(dev->parent, GFP_KERNEL, &rqd.dma_meta_list); if (!rqd.meta_list) - return NVM_IO_ERR; + return -ENOMEM; - if (nr_secs > 1) { + if (gc_rq->nr_secs > 1) { rqd.ppa_list = rqd.meta_list + pblk_dma_meta_size; rqd.dma_ppa_list = rqd.dma_meta_list + pblk_dma_meta_size; - *secs_to_gc = read_ppalist_rq_gc(pblk, &rqd, line, lba_list, - nr_secs); - if (*secs_to_gc == 1) + gc_rq->secs_to_gc = read_ppalist_rq_gc(pblk, &rqd, gc_rq->line, + gc_rq->lba_list, + gc_rq->paddr_list, + gc_rq->nr_secs); + if (gc_rq->secs_to_gc == 1) rqd.ppa_addr = rqd.ppa_list[0]; } else { - *secs_to_gc = read_rq_gc(pblk, &rqd, line, lba_list[0]); + gc_rq->secs_to_gc = read_rq_gc(pblk, &rqd, gc_rq->line, + gc_rq->lba_list[0], + gc_rq->paddr_list[0]); } - if (!(*secs_to_gc)) + if (!(gc_rq->secs_to_gc)) goto out; - data_len = (*secs_to_gc) * geo->sec_size; - bio = pblk_bio_map_addr(pblk, data, *secs_to_gc, data_len, - PBLK_KMALLOC_META, GFP_KERNEL); + data_len = (gc_rq->secs_to_gc) * geo->sec_size; + bio = pblk_bio_map_addr(pblk, gc_rq->data, gc_rq->secs_to_gc, data_len, + PBLK_VMALLOC_META, GFP_KERNEL); if (IS_ERR(bio)) { pr_err("pblk: could not allocate GC bio (%lu)\n", PTR_ERR(bio)); goto err_free_dma; @@ -509,23 +568,16 @@ int pblk_submit_read_gc(struct pblk *pblk, u64 *lba_list, void *data, bio_set_op_attrs(bio, REQ_OP_READ, 0); rqd.opcode = NVM_OP_PREAD; - rqd.end_io = pblk_end_io_sync; - rqd.private = &wait; - rqd.nr_ppas = *secs_to_gc; + rqd.nr_ppas = gc_rq->secs_to_gc; rqd.flags = pblk_set_read_mode(pblk, PBLK_READ_RANDOM); rqd.bio = bio; - ret = pblk_submit_read_io(pblk, &rqd); - if (ret) { - bio_endio(bio); + if (pblk_submit_io_sync(pblk, &rqd)) { + ret = -EIO; pr_err("pblk: GC read request failed\n"); - goto err_free_dma; + goto err_free_bio; } - if (!wait_for_completion_io_timeout(&wait, - msecs_to_jiffies(PBLK_COMMAND_TIMEOUT_MS))) { - pr_err("pblk: GC read I/O timed out\n"); - } atomic_dec(&pblk->inflight_io); if (rqd.error) { @@ -536,16 +588,18 @@ int pblk_submit_read_gc(struct pblk *pblk, u64 *lba_list, void *data, } #ifdef CONFIG_NVM_DEBUG - atomic_long_add(*secs_to_gc, &pblk->sync_reads); - atomic_long_add(*secs_to_gc, &pblk->recov_gc_reads); - atomic_long_sub(*secs_to_gc, &pblk->inflight_reads); + atomic_long_add(gc_rq->secs_to_gc, &pblk->sync_reads); + atomic_long_add(gc_rq->secs_to_gc, &pblk->recov_gc_reads); + atomic_long_sub(gc_rq->secs_to_gc, &pblk->inflight_reads); #endif out: nvm_dev_dma_free(dev->parent, rqd.meta_list, rqd.dma_meta_list); - return NVM_IO_OK; + return ret; +err_free_bio: + bio_put(bio); err_free_dma: nvm_dev_dma_free(dev->parent, rqd.meta_list, rqd.dma_meta_list); - return NVM_IO_ERR; + return ret; } diff --git a/drivers/lightnvm/pblk-recovery.c b/drivers/lightnvm/pblk-recovery.c index cb556e06673e..eadb3eb5d4dc 100644 --- a/drivers/lightnvm/pblk-recovery.c +++ b/drivers/lightnvm/pblk-recovery.c @@ -34,10 +34,6 @@ void pblk_submit_rec(struct work_struct *work) max_secs); bio = bio_alloc(GFP_KERNEL, nr_rec_secs); - if (!bio) { - pr_err("pblk: not able to create recovery bio\n"); - return; - } bio->bi_iter.bi_sector = 0; bio_set_op_attrs(bio, REQ_OP_WRITE, 0); @@ -71,7 +67,7 @@ void pblk_submit_rec(struct work_struct *work) err: bio_put(bio); - pblk_free_rqd(pblk, rqd, WRITE); + pblk_free_rqd(pblk, rqd, PBLK_WRITE); } int pblk_recov_setup_rq(struct pblk *pblk, struct pblk_c_ctx *c_ctx, @@ -84,12 +80,7 @@ int pblk_recov_setup_rq(struct pblk *pblk, struct pblk_c_ctx *c_ctx, struct pblk_c_ctx *rec_ctx; int nr_entries = c_ctx->nr_valid + c_ctx->nr_padded; - rec_rqd = pblk_alloc_rqd(pblk, WRITE); - if (IS_ERR(rec_rqd)) { - pr_err("pblk: could not create recovery req.\n"); - return -ENOMEM; - } - + rec_rqd = pblk_alloc_rqd(pblk, PBLK_WRITE); rec_ctx = nvm_rq_to_pdu(rec_rqd); /* Copy completion bitmap, but exclude the first X completed entries */ @@ -142,19 +133,19 @@ static int pblk_recov_l2p_from_emeta(struct pblk *pblk, struct pblk_line *line) struct pblk_emeta *emeta = line->emeta; struct line_emeta *emeta_buf = emeta->buf; __le64 *lba_list; - int data_start; - int nr_data_lbas, nr_valid_lbas, nr_lbas = 0; - int i; + u64 data_start, data_end; + u64 nr_valid_lbas, nr_lbas = 0; + u64 i; lba_list = pblk_recov_get_lba_list(pblk, emeta_buf); if (!lba_list) return 1; data_start = pblk_line_smeta_start(pblk, line) + lm->smeta_sec; - nr_data_lbas = lm->sec_per_line - lm->emeta_sec[0]; + data_end = line->emeta_ssec; nr_valid_lbas = le64_to_cpu(emeta_buf->nr_valid_lbas); - for (i = data_start; i < nr_data_lbas && nr_lbas < nr_valid_lbas; i++) { + for (i = data_start; i < data_end; i++) { struct ppa_addr ppa; int pos; @@ -181,8 +172,8 @@ static int pblk_recov_l2p_from_emeta(struct pblk *pblk, struct pblk_line *line) } if (nr_valid_lbas != nr_lbas) - pr_err("pblk: line %d - inconsistent lba list(%llu/%d)\n", - line->id, emeta_buf->nr_valid_lbas, nr_lbas); + pr_err("pblk: line %d - inconsistent lba list(%llu/%llu)\n", + line->id, nr_valid_lbas, nr_lbas); line->left_msecs = 0; @@ -225,7 +216,6 @@ static int pblk_recov_read_oob(struct pblk *pblk, struct pblk_line *line, int rq_ppas, rq_len; int i, j; int ret = 0; - DECLARE_COMPLETION_ONSTACK(wait); ppa_list = p.ppa_list; meta_list = p.meta_list; @@ -262,8 +252,6 @@ next_read_rq: rqd->ppa_list = ppa_list; rqd->dma_ppa_list = dma_ppa_list; rqd->dma_meta_list = dma_meta_list; - rqd->end_io = pblk_end_io_sync; - rqd->private = &wait; if (pblk_io_aligned(pblk, rq_ppas)) rqd->flags = pblk_set_read_mode(pblk, PBLK_READ_SEQUENTIAL); @@ -289,19 +277,13 @@ next_read_rq: } /* If read fails, more padding is needed */ - ret = pblk_submit_io(pblk, rqd); + ret = pblk_submit_io_sync(pblk, rqd); if (ret) { pr_err("pblk: I/O submission failed: %d\n", ret); return ret; } - if (!wait_for_completion_io_timeout(&wait, - msecs_to_jiffies(PBLK_COMMAND_TIMEOUT_MS))) { - pr_err("pblk: L2P recovery read timed out\n"); - return -EINTR; - } atomic_dec(&pblk->inflight_io); - reinit_completion(&wait); /* At this point, the read should not fail. If it does, it is a problem * we cannot recover from here. Need FTL log. @@ -338,13 +320,10 @@ static void pblk_end_io_recov(struct nvm_rq *rqd) { struct pblk_pad_rq *pad_rq = rqd->private; struct pblk *pblk = pad_rq->pblk; - struct nvm_tgt_dev *dev = pblk->dev; pblk_up_page(pblk, rqd->ppa_list, rqd->nr_ppas); - bio_put(rqd->bio); - nvm_dev_dma_free(dev->parent, rqd->meta_list, rqd->dma_meta_list); - pblk_free_rqd(pblk, rqd, WRITE); + pblk_free_rqd(pblk, rqd, PBLK_WRITE_INT); atomic_dec(&pblk->inflight_io); kref_put(&pad_rq->ref, pblk_recov_complete); @@ -404,25 +383,21 @@ next_pad_rq: ppa_list = (void *)(meta_list) + pblk_dma_meta_size; dma_ppa_list = dma_meta_list + pblk_dma_meta_size; - rqd = pblk_alloc_rqd(pblk, WRITE); - if (IS_ERR(rqd)) { - ret = PTR_ERR(rqd); - goto fail_free_meta; - } - bio = pblk_bio_map_addr(pblk, data, rq_ppas, rq_len, PBLK_VMALLOC_META, GFP_KERNEL); if (IS_ERR(bio)) { ret = PTR_ERR(bio); - goto fail_free_rqd; + goto fail_free_meta; } bio->bi_iter.bi_sector = 0; /* internal bio */ bio_set_op_attrs(bio, REQ_OP_WRITE, 0); + rqd = pblk_alloc_rqd(pblk, PBLK_WRITE_INT); + rqd->bio = bio; rqd->opcode = NVM_OP_PWRITE; - rqd->flags = pblk_set_progr_mode(pblk, WRITE); + rqd->flags = pblk_set_progr_mode(pblk, PBLK_WRITE); rqd->meta_list = meta_list; rqd->nr_ppas = rq_ppas; rqd->ppa_list = ppa_list; @@ -490,8 +465,6 @@ free_rq: fail_free_bio: bio_put(bio); -fail_free_rqd: - pblk_free_rqd(pblk, rqd, WRITE); fail_free_meta: nvm_dev_dma_free(dev->parent, meta_list, dma_meta_list); fail_free_pad: @@ -522,7 +495,6 @@ static int pblk_recov_scan_all_oob(struct pblk *pblk, struct pblk_line *line, int ret = 0; int rec_round; int left_ppas = pblk_calc_sec_in_line(pblk, line) - line->cur_sec; - DECLARE_COMPLETION_ONSTACK(wait); ppa_list = p.ppa_list; meta_list = p.meta_list; @@ -557,8 +529,6 @@ next_rq: rqd->ppa_list = ppa_list; rqd->dma_ppa_list = dma_ppa_list; rqd->dma_meta_list = dma_meta_list; - rqd->end_io = pblk_end_io_sync; - rqd->private = &wait; if (pblk_io_aligned(pblk, rq_ppas)) rqd->flags = pblk_set_read_mode(pblk, PBLK_READ_SEQUENTIAL); @@ -584,18 +554,13 @@ next_rq: addr_to_gen_ppa(pblk, w_ptr, line->id); } - ret = pblk_submit_io(pblk, rqd); + ret = pblk_submit_io_sync(pblk, rqd); if (ret) { pr_err("pblk: I/O submission failed: %d\n", ret); return ret; } - if (!wait_for_completion_io_timeout(&wait, - msecs_to_jiffies(PBLK_COMMAND_TIMEOUT_MS))) { - pr_err("pblk: L2P recovery read timed out\n"); - } atomic_dec(&pblk->inflight_io); - reinit_completion(&wait); /* This should not happen since the read failed during normal recovery, * but the media works funny sometimes... @@ -663,7 +628,6 @@ static int pblk_recov_scan_oob(struct pblk *pblk, struct pblk_line *line, int i, j; int ret = 0; int left_ppas = pblk_calc_sec_in_line(pblk, line); - DECLARE_COMPLETION_ONSTACK(wait); ppa_list = p.ppa_list; meta_list = p.meta_list; @@ -696,8 +660,6 @@ next_rq: rqd->ppa_list = ppa_list; rqd->dma_ppa_list = dma_ppa_list; rqd->dma_meta_list = dma_meta_list; - rqd->end_io = pblk_end_io_sync; - rqd->private = &wait; if (pblk_io_aligned(pblk, rq_ppas)) rqd->flags = pblk_set_read_mode(pblk, PBLK_READ_SEQUENTIAL); @@ -723,19 +685,14 @@ next_rq: addr_to_gen_ppa(pblk, paddr, line->id); } - ret = pblk_submit_io(pblk, rqd); + ret = pblk_submit_io_sync(pblk, rqd); if (ret) { pr_err("pblk: I/O submission failed: %d\n", ret); bio_put(bio); return ret; } - if (!wait_for_completion_io_timeout(&wait, - msecs_to_jiffies(PBLK_COMMAND_TIMEOUT_MS))) { - pr_err("pblk: L2P recovery read timed out\n"); - } atomic_dec(&pblk->inflight_io); - reinit_completion(&wait); /* Reached the end of the written line */ if (rqd->error) { @@ -785,15 +742,9 @@ static int pblk_recov_l2p_from_oob(struct pblk *pblk, struct pblk_line *line) dma_addr_t dma_ppa_list, dma_meta_list; int done, ret = 0; - rqd = pblk_alloc_rqd(pblk, READ); - if (IS_ERR(rqd)) - return PTR_ERR(rqd); - meta_list = nvm_dev_dma_alloc(dev->parent, GFP_KERNEL, &dma_meta_list); - if (!meta_list) { - ret = -ENOMEM; - goto free_rqd; - } + if (!meta_list) + return -ENOMEM; ppa_list = (void *)(meta_list) + pblk_dma_meta_size; dma_ppa_list = dma_meta_list + pblk_dma_meta_size; @@ -804,6 +755,8 @@ static int pblk_recov_l2p_from_oob(struct pblk *pblk, struct pblk_line *line) goto free_meta_list; } + rqd = pblk_alloc_rqd(pblk, PBLK_READ); + p.ppa_list = ppa_list; p.meta_list = meta_list; p.rqd = rqd; @@ -832,8 +785,6 @@ out: kfree(data); free_meta_list: nvm_dev_dma_free(dev->parent, meta_list, dma_meta_list); -free_rqd: - pblk_free_rqd(pblk, rqd, READ); return ret; } @@ -851,11 +802,33 @@ static void pblk_recov_line_add_ordered(struct list_head *head, __list_add(&line->list, t->list.prev, &t->list); } -struct pblk_line *pblk_recov_l2p(struct pblk *pblk) +static u64 pblk_line_emeta_start(struct pblk *pblk, struct pblk_line *line) { struct nvm_tgt_dev *dev = pblk->dev; struct nvm_geo *geo = &dev->geo; struct pblk_line_meta *lm = &pblk->lm; + unsigned int emeta_secs; + u64 emeta_start; + struct ppa_addr ppa; + int pos; + + emeta_secs = lm->emeta_sec[0]; + emeta_start = lm->sec_per_line; + + while (emeta_secs) { + emeta_start--; + ppa = addr_to_pblk_ppa(pblk, emeta_start, line->id); + pos = pblk_ppa_to_pos(geo, ppa); + if (!test_bit(pos, line->blk_bitmap)) + emeta_secs--; + } + + return emeta_start; +} + +struct pblk_line *pblk_recov_l2p(struct pblk *pblk) +{ + struct pblk_line_meta *lm = &pblk->lm; struct pblk_line_mgmt *l_mg = &pblk->l_mg; struct pblk_line *line, *tline, *data_line = NULL; struct pblk_smeta *smeta; @@ -900,9 +873,9 @@ struct pblk_line *pblk_recov_l2p(struct pblk *pblk) if (le32_to_cpu(smeta_buf->header.identifier) != PBLK_MAGIC) continue; - if (le16_to_cpu(smeta_buf->header.version) != 1) { + if (smeta_buf->header.version != SMETA_VERSION) { pr_err("pblk: found incompatible line version %u\n", - smeta_buf->header.version); + le16_to_cpu(smeta_buf->header.version)); return ERR_PTR(-EINVAL); } @@ -954,15 +927,9 @@ struct pblk_line *pblk_recov_l2p(struct pblk *pblk) /* Verify closed blocks and recover this portion of L2P table*/ list_for_each_entry_safe(line, tline, &recov_list, list) { - int off, nr_bb; - recovered_lines++; - /* Calculate where emeta starts based on the line bb */ - off = lm->sec_per_line - lm->emeta_sec[0]; - nr_bb = bitmap_weight(line->blk_bitmap, lm->blk_per_line); - off -= nr_bb * geo->sec_per_pl; - line->emeta_ssec = off; + line->emeta_ssec = pblk_line_emeta_start(pblk, line); line->emeta = emeta; memset(line->emeta->buf, 0, lm->emeta_len[0]); @@ -987,7 +954,7 @@ next: list_move_tail(&line->list, move_list); spin_unlock(&l_mg->gc_lock); - mempool_free(line->map_bitmap, pblk->line_meta_pool); + kfree(line->map_bitmap); line->map_bitmap = NULL; line->smeta = NULL; line->emeta = NULL; diff --git a/drivers/lightnvm/pblk-rl.c b/drivers/lightnvm/pblk-rl.c index 2e6a5361baf0..abae31fd434e 100644 --- a/drivers/lightnvm/pblk-rl.c +++ b/drivers/lightnvm/pblk-rl.c @@ -96,9 +96,11 @@ unsigned long pblk_rl_nr_free_blks(struct pblk_rl *rl) * * Only the total number of free blocks is used to configure the rate limiter. */ -static int pblk_rl_update_rates(struct pblk_rl *rl, unsigned long max) +void pblk_rl_update_rates(struct pblk_rl *rl) { + struct pblk *pblk = container_of(rl, struct pblk, rl); unsigned long free_blocks = pblk_rl_nr_free_blks(rl); + int max = rl->rb_budget; if (free_blocks >= rl->high) { rl->rb_user_max = max; @@ -124,23 +126,18 @@ static int pblk_rl_update_rates(struct pblk_rl *rl, unsigned long max) rl->rb_state = PBLK_RL_LOW; } - return rl->rb_state; + if (rl->rb_state == (PBLK_RL_MID | PBLK_RL_LOW)) + pblk_gc_should_start(pblk); + else + pblk_gc_should_stop(pblk); } void pblk_rl_free_lines_inc(struct pblk_rl *rl, struct pblk_line *line) { - struct pblk *pblk = container_of(rl, struct pblk, rl); int blk_in_line = atomic_read(&line->blk_in_line); - int ret; atomic_add(blk_in_line, &rl->free_blocks); - /* Rates will not change that often - no need to lock update */ - ret = pblk_rl_update_rates(rl, rl->rb_budget); - - if (ret == (PBLK_RL_MID | PBLK_RL_LOW)) - pblk_gc_should_start(pblk); - else - pblk_gc_should_stop(pblk); + pblk_rl_update_rates(rl); } void pblk_rl_free_lines_dec(struct pblk_rl *rl, struct pblk_line *line) @@ -148,19 +145,7 @@ void pblk_rl_free_lines_dec(struct pblk_rl *rl, struct pblk_line *line) int blk_in_line = atomic_read(&line->blk_in_line); atomic_sub(blk_in_line, &rl->free_blocks); -} - -void pblk_gc_should_kick(struct pblk *pblk) -{ - struct pblk_rl *rl = &pblk->rl; - int ret; - - /* Rates will not change that often - no need to lock update */ - ret = pblk_rl_update_rates(rl, rl->rb_budget); - if (ret == (PBLK_RL_MID | PBLK_RL_LOW)) - pblk_gc_should_start(pblk); - else - pblk_gc_should_stop(pblk); + pblk_rl_update_rates(rl); } int pblk_rl_high_thrs(struct pblk_rl *rl) @@ -168,14 +153,9 @@ int pblk_rl_high_thrs(struct pblk_rl *rl) return rl->high; } -int pblk_rl_low_thrs(struct pblk_rl *rl) -{ - return rl->low; -} - -int pblk_rl_sysfs_rate_show(struct pblk_rl *rl) +int pblk_rl_max_io(struct pblk_rl *rl) { - return rl->rb_user_max; + return rl->rb_max_io; } static void pblk_rl_u_timer(unsigned long data) @@ -214,6 +194,7 @@ void pblk_rl_init(struct pblk_rl *rl, int budget) /* To start with, all buffer is available to user I/O writers */ rl->rb_budget = budget; rl->rb_user_max = budget; + rl->rb_max_io = budget >> 1; rl->rb_gc_max = 0; rl->rb_state = PBLK_RL_HIGH; diff --git a/drivers/lightnvm/pblk-sysfs.c b/drivers/lightnvm/pblk-sysfs.c index 95fb434e2f01..cd49e8875d4e 100644 --- a/drivers/lightnvm/pblk-sysfs.c +++ b/drivers/lightnvm/pblk-sysfs.c @@ -253,7 +253,7 @@ static ssize_t pblk_sysfs_lines(struct pblk *pblk, char *page) sz += snprintf(page + sz, PAGE_SIZE - sz, "GC: full:%d, high:%d, mid:%d, low:%d, empty:%d, queue:%d\n", gc_full, gc_high, gc_mid, gc_low, gc_empty, - atomic_read(&pblk->gc.inflight_gc)); + atomic_read(&pblk->gc.read_inflight_gc)); sz += snprintf(page + sz, PAGE_SIZE - sz, "data (%d) cur:%d, left:%d, vsc:%d, s:%d, map:%d/%d (%d)\n", diff --git a/drivers/lightnvm/pblk-write.c b/drivers/lightnvm/pblk-write.c index 3ad9e56d2473..6c1cafafef53 100644 --- a/drivers/lightnvm/pblk-write.c +++ b/drivers/lightnvm/pblk-write.c @@ -20,7 +20,6 @@ static unsigned long pblk_end_w_bio(struct pblk *pblk, struct nvm_rq *rqd, struct pblk_c_ctx *c_ctx) { - struct nvm_tgt_dev *dev = pblk->dev; struct bio *original_bio; unsigned long ret; int i; @@ -33,16 +32,18 @@ static unsigned long pblk_end_w_bio(struct pblk *pblk, struct nvm_rq *rqd, bio_endio(original_bio); } + if (c_ctx->nr_padded) + pblk_bio_free_pages(pblk, rqd->bio, c_ctx->nr_valid, + c_ctx->nr_padded); + #ifdef CONFIG_NVM_DEBUG - atomic_long_add(c_ctx->nr_valid, &pblk->sync_writes); + atomic_long_add(rqd->nr_ppas, &pblk->sync_writes); #endif ret = pblk_rb_sync_advance(&pblk->rwb, c_ctx->nr_valid); - nvm_dev_dma_free(dev->parent, rqd->meta_list, rqd->dma_meta_list); - bio_put(rqd->bio); - pblk_free_rqd(pblk, rqd, WRITE); + pblk_free_rqd(pblk, rqd, PBLK_WRITE); return ret; } @@ -107,10 +108,7 @@ static void pblk_end_w_fail(struct pblk *pblk, struct nvm_rq *rqd) ppa_list = &rqd->ppa_addr; recovery = mempool_alloc(pblk->rec_pool, GFP_ATOMIC); - if (!recovery) { - pr_err("pblk: could not allocate recovery context\n"); - return; - } + INIT_LIST_HEAD(&recovery->failed); bit = -1; @@ -175,7 +173,6 @@ static void pblk_end_io_write(struct nvm_rq *rqd) static void pblk_end_io_write_meta(struct nvm_rq *rqd) { struct pblk *pblk = rqd->private; - struct nvm_tgt_dev *dev = pblk->dev; struct pblk_g_ctx *m_ctx = nvm_rq_to_pdu(rqd); struct pblk_line *line = m_ctx->private; struct pblk_emeta *emeta = line->emeta; @@ -187,19 +184,13 @@ static void pblk_end_io_write_meta(struct nvm_rq *rqd) pblk_log_write_err(pblk, rqd); pr_err("pblk: metadata I/O failed. Line %d\n", line->id); } -#ifdef CONFIG_NVM_DEBUG - else - WARN_ONCE(rqd->bio->bi_status, "pblk: corrupted write error\n"); -#endif sync = atomic_add_return(rqd->nr_ppas, &emeta->sync); if (sync == emeta->nr_entries) - pblk_line_run_ws(pblk, line, NULL, pblk_line_close_ws, - pblk->close_wq); + pblk_gen_run_ws(pblk, line, NULL, pblk_line_close_ws, + GFP_ATOMIC, pblk->close_wq); - bio_put(rqd->bio); - nvm_dev_dma_free(dev->parent, rqd->meta_list, rqd->dma_meta_list); - pblk_free_rqd(pblk, rqd, READ); + pblk_free_rqd(pblk, rqd, PBLK_WRITE_INT); atomic_dec(&pblk->inflight_io); } @@ -213,7 +204,7 @@ static int pblk_alloc_w_rq(struct pblk *pblk, struct nvm_rq *rqd, /* Setup write request */ rqd->opcode = NVM_OP_PWRITE; rqd->nr_ppas = nr_secs; - rqd->flags = pblk_set_progr_mode(pblk, WRITE); + rqd->flags = pblk_set_progr_mode(pblk, PBLK_WRITE); rqd->private = pblk; rqd->end_io = end_io; @@ -229,15 +220,16 @@ static int pblk_alloc_w_rq(struct pblk *pblk, struct nvm_rq *rqd, } static int pblk_setup_w_rq(struct pblk *pblk, struct nvm_rq *rqd, - struct pblk_c_ctx *c_ctx, struct ppa_addr *erase_ppa) + struct ppa_addr *erase_ppa) { struct pblk_line_meta *lm = &pblk->lm; struct pblk_line *e_line = pblk_line_get_erase(pblk); + struct pblk_c_ctx *c_ctx = nvm_rq_to_pdu(rqd); unsigned int valid = c_ctx->nr_valid; unsigned int padded = c_ctx->nr_padded; unsigned int nr_secs = valid + padded; unsigned long *lun_bitmap; - int ret = 0; + int ret; lun_bitmap = kzalloc(lm->lun_bitmap_len, GFP_KERNEL); if (!lun_bitmap) @@ -279,7 +271,7 @@ int pblk_setup_w_rec_rq(struct pblk *pblk, struct nvm_rq *rqd, pblk_map_rq(pblk, rqd, c_ctx->sentry, lun_bitmap, c_ctx->nr_valid, 0); rqd->ppa_status = (u64)0; - rqd->flags = pblk_set_progr_mode(pblk, WRITE); + rqd->flags = pblk_set_progr_mode(pblk, PBLK_WRITE); return ret; } @@ -303,55 +295,6 @@ static int pblk_calc_secs_to_sync(struct pblk *pblk, unsigned int secs_avail, return secs_to_sync; } -static inline int pblk_valid_meta_ppa(struct pblk *pblk, - struct pblk_line *meta_line, - struct ppa_addr *ppa_list, int nr_ppas) -{ - struct nvm_tgt_dev *dev = pblk->dev; - struct nvm_geo *geo = &dev->geo; - struct pblk_line *data_line; - struct ppa_addr ppa, ppa_opt; - u64 paddr; - int i; - - data_line = &pblk->lines[pblk_dev_ppa_to_line(ppa_list[0])]; - paddr = pblk_lookup_page(pblk, meta_line); - ppa = addr_to_gen_ppa(pblk, paddr, 0); - - if (test_bit(pblk_ppa_to_pos(geo, ppa), data_line->blk_bitmap)) - return 1; - - /* Schedule a metadata I/O that is half the distance from the data I/O - * with regards to the number of LUNs forming the pblk instance. This - * balances LUN conflicts across every I/O. - * - * When the LUN configuration changes (e.g., due to GC), this distance - * can align, which would result on a LUN deadlock. In this case, modify - * the distance to not be optimal, but allow metadata I/Os to succeed. - */ - ppa_opt = addr_to_gen_ppa(pblk, paddr + data_line->meta_distance, 0); - if (unlikely(ppa_opt.ppa == ppa.ppa)) { - data_line->meta_distance--; - return 0; - } - - for (i = 0; i < nr_ppas; i += pblk->min_write_pgs) - if (ppa_list[i].g.ch == ppa_opt.g.ch && - ppa_list[i].g.lun == ppa_opt.g.lun) - return 1; - - if (test_bit(pblk_ppa_to_pos(geo, ppa_opt), data_line->blk_bitmap)) { - for (i = 0; i < nr_ppas; i += pblk->min_write_pgs) - if (ppa_list[i].g.ch == ppa.g.ch && - ppa_list[i].g.lun == ppa.g.lun) - return 0; - - return 1; - } - - return 0; -} - int pblk_submit_meta_io(struct pblk *pblk, struct pblk_line *meta_line) { struct nvm_tgt_dev *dev = pblk->dev; @@ -370,11 +313,8 @@ int pblk_submit_meta_io(struct pblk *pblk, struct pblk_line *meta_line) int i, j; int ret; - rqd = pblk_alloc_rqd(pblk, READ); - if (IS_ERR(rqd)) { - pr_err("pblk: cannot allocate write req.\n"); - return PTR_ERR(rqd); - } + rqd = pblk_alloc_rqd(pblk, PBLK_WRITE_INT); + m_ctx = nvm_rq_to_pdu(rqd); m_ctx->private = meta_line; @@ -407,8 +347,6 @@ int pblk_submit_meta_io(struct pblk *pblk, struct pblk_line *meta_line) if (emeta->mem >= lm->emeta_len[0]) { spin_lock(&l_mg->close_lock); list_del(&meta_line->list); - WARN(!bitmap_full(meta_line->map_bitmap, lm->sec_per_line), - "pblk: corrupt meta line %d\n", meta_line->id); spin_unlock(&l_mg->close_lock); } @@ -428,18 +366,51 @@ fail_rollback: pblk_dealloc_page(pblk, meta_line, rq_ppas); list_add(&meta_line->list, &meta_line->list); spin_unlock(&l_mg->close_lock); - - nvm_dev_dma_free(dev->parent, rqd->meta_list, rqd->dma_meta_list); fail_free_bio: - if (likely(l_mg->emeta_alloc_type == PBLK_VMALLOC_META)) - bio_put(bio); + bio_put(bio); fail_free_rqd: - pblk_free_rqd(pblk, rqd, READ); + pblk_free_rqd(pblk, rqd, PBLK_WRITE_INT); return ret; } -static int pblk_sched_meta_io(struct pblk *pblk, struct ppa_addr *prev_list, - int prev_n) +static inline bool pblk_valid_meta_ppa(struct pblk *pblk, + struct pblk_line *meta_line, + struct nvm_rq *data_rqd) +{ + struct nvm_tgt_dev *dev = pblk->dev; + struct nvm_geo *geo = &dev->geo; + struct pblk_c_ctx *data_c_ctx = nvm_rq_to_pdu(data_rqd); + struct pblk_line *data_line = pblk_line_get_data(pblk); + struct ppa_addr ppa, ppa_opt; + u64 paddr; + int pos_opt; + + /* Schedule a metadata I/O that is half the distance from the data I/O + * with regards to the number of LUNs forming the pblk instance. This + * balances LUN conflicts across every I/O. + * + * When the LUN configuration changes (e.g., due to GC), this distance + * can align, which would result on metadata and data I/Os colliding. In + * this case, modify the distance to not be optimal, but move the + * optimal in the right direction. + */ + paddr = pblk_lookup_page(pblk, meta_line); + ppa = addr_to_gen_ppa(pblk, paddr, 0); + ppa_opt = addr_to_gen_ppa(pblk, paddr + data_line->meta_distance, 0); + pos_opt = pblk_ppa_to_pos(geo, ppa_opt); + + if (test_bit(pos_opt, data_c_ctx->lun_bitmap) || + test_bit(pos_opt, data_line->blk_bitmap)) + return true; + + if (unlikely(pblk_ppa_comp(ppa_opt, ppa))) + data_line->meta_distance--; + + return false; +} + +static struct pblk_line *pblk_should_submit_meta_io(struct pblk *pblk, + struct nvm_rq *data_rqd) { struct pblk_line_meta *lm = &pblk->lm; struct pblk_line_mgmt *l_mg = &pblk->l_mg; @@ -449,57 +420,45 @@ static int pblk_sched_meta_io(struct pblk *pblk, struct ppa_addr *prev_list, retry: if (list_empty(&l_mg->emeta_list)) { spin_unlock(&l_mg->close_lock); - return 0; + return NULL; } meta_line = list_first_entry(&l_mg->emeta_list, struct pblk_line, list); - if (bitmap_full(meta_line->map_bitmap, lm->sec_per_line)) + if (meta_line->emeta->mem >= lm->emeta_len[0]) goto retry; spin_unlock(&l_mg->close_lock); - if (!pblk_valid_meta_ppa(pblk, meta_line, prev_list, prev_n)) - return 0; + if (!pblk_valid_meta_ppa(pblk, meta_line, data_rqd)) + return NULL; - return pblk_submit_meta_io(pblk, meta_line); + return meta_line; } static int pblk_submit_io_set(struct pblk *pblk, struct nvm_rq *rqd) { - struct pblk_c_ctx *c_ctx = nvm_rq_to_pdu(rqd); struct ppa_addr erase_ppa; + struct pblk_line *meta_line; int err; ppa_set_empty(&erase_ppa); /* Assign lbas to ppas and populate request structure */ - err = pblk_setup_w_rq(pblk, rqd, c_ctx, &erase_ppa); + err = pblk_setup_w_rq(pblk, rqd, &erase_ppa); if (err) { pr_err("pblk: could not setup write request: %d\n", err); return NVM_IO_ERR; } - if (likely(ppa_empty(erase_ppa))) { - /* Submit metadata write for previous data line */ - err = pblk_sched_meta_io(pblk, rqd->ppa_list, rqd->nr_ppas); - if (err) { - pr_err("pblk: metadata I/O submission failed: %d", err); - return NVM_IO_ERR; - } + meta_line = pblk_should_submit_meta_io(pblk, rqd); - /* Submit data write for current data line */ - err = pblk_submit_io(pblk, rqd); - if (err) { - pr_err("pblk: data I/O submission failed: %d\n", err); - return NVM_IO_ERR; - } - } else { - /* Submit data write for current data line */ - err = pblk_submit_io(pblk, rqd); - if (err) { - pr_err("pblk: data I/O submission failed: %d\n", err); - return NVM_IO_ERR; - } + /* Submit data write for current data line */ + err = pblk_submit_io(pblk, rqd); + if (err) { + pr_err("pblk: data I/O submission failed: %d\n", err); + return NVM_IO_ERR; + } - /* Submit available erase for next data line */ + if (!ppa_empty(erase_ppa)) { + /* Submit erase for next data line */ if (pblk_blk_erase_async(pblk, erase_ppa)) { struct pblk_line *e_line = pblk_line_get_erase(pblk); struct nvm_tgt_dev *dev = pblk->dev; @@ -512,6 +471,15 @@ static int pblk_submit_io_set(struct pblk *pblk, struct nvm_rq *rqd) } } + if (meta_line) { + /* Submit metadata write for previous data line */ + err = pblk_submit_meta_io(pblk, meta_line); + if (err) { + pr_err("pblk: metadata I/O submission failed: %d", err); + return NVM_IO_ERR; + } + } + return NVM_IO_OK; } @@ -521,7 +489,8 @@ static void pblk_free_write_rqd(struct pblk *pblk, struct nvm_rq *rqd) struct bio *bio = rqd->bio; if (c_ctx->nr_padded) - pblk_bio_free_pages(pblk, bio, rqd->nr_ppas, c_ctx->nr_padded); + pblk_bio_free_pages(pblk, bio, c_ctx->nr_valid, + c_ctx->nr_padded); } static int pblk_submit_write(struct pblk *pblk) @@ -543,31 +512,24 @@ static int pblk_submit_write(struct pblk *pblk) if (!secs_to_flush && secs_avail < pblk->min_write_pgs) return 1; - rqd = pblk_alloc_rqd(pblk, WRITE); - if (IS_ERR(rqd)) { - pr_err("pblk: cannot allocate write req.\n"); - return 1; - } - - bio = bio_alloc(GFP_KERNEL, pblk->max_write_pgs); - if (!bio) { - pr_err("pblk: cannot allocate write bio\n"); - goto fail_free_rqd; - } - bio->bi_iter.bi_sector = 0; /* internal bio */ - bio_set_op_attrs(bio, REQ_OP_WRITE, 0); - rqd->bio = bio; - secs_to_sync = pblk_calc_secs_to_sync(pblk, secs_avail, secs_to_flush); if (secs_to_sync > pblk->max_write_pgs) { pr_err("pblk: bad buffer sync calculation\n"); - goto fail_put_bio; + return 1; } secs_to_com = (secs_to_sync > secs_avail) ? secs_avail : secs_to_sync; pos = pblk_rb_read_commit(&pblk->rwb, secs_to_com); - if (pblk_rb_read_to_bio(&pblk->rwb, rqd, bio, pos, secs_to_sync, + bio = bio_alloc(GFP_KERNEL, secs_to_sync); + + bio->bi_iter.bi_sector = 0; /* internal bio */ + bio_set_op_attrs(bio, REQ_OP_WRITE, 0); + + rqd = pblk_alloc_rqd(pblk, PBLK_WRITE); + rqd->bio = bio; + + if (pblk_rb_read_to_bio(&pblk->rwb, rqd, pos, secs_to_sync, secs_avail)) { pr_err("pblk: corrupted write bio\n"); goto fail_put_bio; @@ -586,8 +548,7 @@ fail_free_bio: pblk_free_write_rqd(pblk, rqd); fail_put_bio: bio_put(bio); -fail_free_rqd: - pblk_free_rqd(pblk, rqd, WRITE); + pblk_free_rqd(pblk, rqd, PBLK_WRITE); return 1; } diff --git a/drivers/lightnvm/pblk.h b/drivers/lightnvm/pblk.h index 67e623bd5c2d..90961033a79f 100644 --- a/drivers/lightnvm/pblk.h +++ b/drivers/lightnvm/pblk.h @@ -40,10 +40,6 @@ #define PBLK_MAX_REQ_ADDRS (64) #define PBLK_MAX_REQ_ADDRS_PW (6) -#define PBLK_WS_POOL_SIZE (128) -#define PBLK_META_POOL_SIZE (128) -#define PBLK_READ_REQ_POOL_SIZE (1024) - #define PBLK_NR_CLOSE_JOBS (4) #define PBLK_CACHE_NAME_LEN (DISK_NAME_LEN + 16) @@ -59,7 +55,15 @@ for ((i) = 0, rlun = &(pblk)->luns[0]; \ (i) < (pblk)->nr_luns; (i)++, rlun = &(pblk)->luns[(i)]) -#define ERASE 2 /* READ = 0, WRITE = 1 */ +/* Static pool sizes */ +#define PBLK_GEN_WS_POOL_SIZE (2) + +enum { + PBLK_READ = READ, + PBLK_WRITE = WRITE,/* Write from write buffer */ + PBLK_WRITE_INT, /* Internal write - no write buffer */ + PBLK_ERASE, +}; enum { /* IO Types */ @@ -95,6 +99,7 @@ enum { }; #define pblk_dma_meta_size (sizeof(struct pblk_sec_meta) * PBLK_MAX_REQ_ADDRS) +#define pblk_dma_ppa_size (sizeof(u64) * PBLK_MAX_REQ_ADDRS) /* write buffer completion context */ struct pblk_c_ctx { @@ -106,9 +111,10 @@ struct pblk_c_ctx { unsigned int nr_padded; }; -/* generic context */ +/* read context */ struct pblk_g_ctx { void *private; + u64 lba; }; /* Pad context */ @@ -207,6 +213,7 @@ struct pblk_lun { struct pblk_gc_rq { struct pblk_line *line; void *data; + u64 paddr_list[PBLK_MAX_REQ_ADDRS]; u64 lba_list[PBLK_MAX_REQ_ADDRS]; int nr_secs; int secs_to_gc; @@ -231,7 +238,10 @@ struct pblk_gc { struct timer_list gc_timer; struct semaphore gc_sem; - atomic_t inflight_gc; + atomic_t read_inflight_gc; /* Number of lines with inflight GC reads */ + atomic_t pipeline_gc; /* Number of lines in the GC pipeline - + * started reads to finished writes + */ int w_entries; struct list_head w_list; @@ -267,6 +277,7 @@ struct pblk_rl { int rb_gc_max; /* Max buffer entries available for GC I/O */ int rb_gc_rsv; /* Reserved buffer entries for GC I/O */ int rb_state; /* Rate-limiter current state */ + int rb_max_io; /* Maximum size for an I/O giving the config */ atomic_t rb_user_cnt; /* User I/O buffer counter */ atomic_t rb_gc_cnt; /* GC I/O buffer counter */ @@ -310,6 +321,7 @@ enum { }; #define PBLK_MAGIC 0x70626c6b /*pblk*/ +#define SMETA_VERSION cpu_to_le16(1) struct line_header { __le32 crc; @@ -618,15 +630,16 @@ struct pblk { struct list_head compl_list; - mempool_t *page_pool; - mempool_t *line_ws_pool; + mempool_t *page_bio_pool; + mempool_t *gen_ws_pool; mempool_t *rec_pool; - mempool_t *g_rq_pool; + mempool_t *r_rq_pool; mempool_t *w_rq_pool; - mempool_t *line_meta_pool; + mempool_t *e_rq_pool; struct workqueue_struct *close_wq; struct workqueue_struct *bb_wq; + struct workqueue_struct *r_end_wq; struct timer_list wtimer; @@ -657,15 +670,15 @@ int pblk_rb_may_write_gc(struct pblk_rb *rb, unsigned int nr_entries, void pblk_rb_write_entry_user(struct pblk_rb *rb, void *data, struct pblk_w_ctx w_ctx, unsigned int pos); void pblk_rb_write_entry_gc(struct pblk_rb *rb, void *data, - struct pblk_w_ctx w_ctx, struct pblk_line *gc_line, - unsigned int pos); + struct pblk_w_ctx w_ctx, struct pblk_line *line, + u64 paddr, unsigned int pos); struct pblk_w_ctx *pblk_rb_w_ctx(struct pblk_rb *rb, unsigned int pos); void pblk_rb_flush(struct pblk_rb *rb); void pblk_rb_sync_l2p(struct pblk_rb *rb); unsigned int pblk_rb_read_to_bio(struct pblk_rb *rb, struct nvm_rq *rqd, - struct bio *bio, unsigned int pos, - unsigned int nr_entries, unsigned int count); + unsigned int pos, unsigned int nr_entries, + unsigned int count); unsigned int pblk_rb_read_to_bio_list(struct pblk_rb *rb, struct bio *bio, struct list_head *list, unsigned int max); @@ -692,24 +705,23 @@ ssize_t pblk_rb_sysfs(struct pblk_rb *rb, char *buf); /* * pblk core */ -struct nvm_rq *pblk_alloc_rqd(struct pblk *pblk, int rw); +struct nvm_rq *pblk_alloc_rqd(struct pblk *pblk, int type); +void pblk_free_rqd(struct pblk *pblk, struct nvm_rq *rqd, int type); void pblk_set_sec_per_write(struct pblk *pblk, int sec_per_write); int pblk_setup_w_rec_rq(struct pblk *pblk, struct nvm_rq *rqd, struct pblk_c_ctx *c_ctx); -void pblk_free_rqd(struct pblk *pblk, struct nvm_rq *rqd, int rw); -void pblk_wait_for_meta(struct pblk *pblk); -struct ppa_addr pblk_get_lba_map(struct pblk *pblk, sector_t lba); void pblk_discard(struct pblk *pblk, struct bio *bio); void pblk_log_write_err(struct pblk *pblk, struct nvm_rq *rqd); void pblk_log_read_err(struct pblk *pblk, struct nvm_rq *rqd); int pblk_submit_io(struct pblk *pblk, struct nvm_rq *rqd); +int pblk_submit_io_sync(struct pblk *pblk, struct nvm_rq *rqd); int pblk_submit_meta_io(struct pblk *pblk, struct pblk_line *meta_line); struct bio *pblk_bio_map_addr(struct pblk *pblk, void *data, unsigned int nr_secs, unsigned int len, int alloc_type, gfp_t gfp_mask); struct pblk_line *pblk_line_get(struct pblk *pblk); struct pblk_line *pblk_line_get_first_data(struct pblk *pblk); -void pblk_line_replace_data(struct pblk *pblk); +struct pblk_line *pblk_line_replace_data(struct pblk *pblk); int pblk_line_recov_alloc(struct pblk *pblk, struct pblk_line *line); void pblk_line_recov_close(struct pblk *pblk, struct pblk_line *line); struct pblk_line *pblk_line_get_data(struct pblk *pblk); @@ -719,19 +731,18 @@ int pblk_line_is_full(struct pblk_line *line); void pblk_line_free(struct pblk *pblk, struct pblk_line *line); void pblk_line_close_meta(struct pblk *pblk, struct pblk_line *line); void pblk_line_close(struct pblk *pblk, struct pblk_line *line); -void pblk_line_close_meta_sync(struct pblk *pblk); void pblk_line_close_ws(struct work_struct *work); void pblk_pipeline_stop(struct pblk *pblk); -void pblk_line_mark_bb(struct work_struct *work); -void pblk_line_run_ws(struct pblk *pblk, struct pblk_line *line, void *priv, - void (*work)(struct work_struct *), - struct workqueue_struct *wq); +void pblk_gen_run_ws(struct pblk *pblk, struct pblk_line *line, void *priv, + void (*work)(struct work_struct *), gfp_t gfp_mask, + struct workqueue_struct *wq); u64 pblk_line_smeta_start(struct pblk *pblk, struct pblk_line *line); int pblk_line_read_smeta(struct pblk *pblk, struct pblk_line *line); int pblk_line_read_emeta(struct pblk *pblk, struct pblk_line *line, void *emeta_buf); int pblk_blk_erase_async(struct pblk *pblk, struct ppa_addr erase_ppa); void pblk_line_put(struct kref *ref); +void pblk_line_put_wq(struct kref *ref); struct list_head *pblk_line_gc_list(struct pblk *pblk, struct pblk_line *line); u64 pblk_lookup_page(struct pblk *pblk, struct pblk_line *line); void pblk_dealloc_page(struct pblk *pblk, struct pblk_line *line, int nr_secs); @@ -745,7 +756,6 @@ void pblk_down_rq(struct pblk *pblk, struct ppa_addr *ppa_list, int nr_ppas, void pblk_down_page(struct pblk *pblk, struct ppa_addr *ppa_list, int nr_ppas); void pblk_up_rq(struct pblk *pblk, struct ppa_addr *ppa_list, int nr_ppas, unsigned long *lun_bitmap); -void pblk_end_bio_sync(struct bio *bio); void pblk_end_io_sync(struct nvm_rq *rqd); int pblk_bio_add_pages(struct pblk *pblk, struct bio *bio, gfp_t flags, int nr_pages); @@ -760,7 +770,7 @@ void pblk_update_map_cache(struct pblk *pblk, sector_t lba, void pblk_update_map_dev(struct pblk *pblk, sector_t lba, struct ppa_addr ppa, struct ppa_addr entry_line); int pblk_update_map_gc(struct pblk *pblk, sector_t lba, struct ppa_addr ppa, - struct pblk_line *gc_line); + struct pblk_line *gc_line, u64 paddr); void pblk_lookup_l2p_rand(struct pblk *pblk, struct ppa_addr *ppas, u64 *lba_list, int nr_secs); void pblk_lookup_l2p_seq(struct pblk *pblk, struct ppa_addr *ppas, @@ -771,9 +781,7 @@ void pblk_lookup_l2p_seq(struct pblk *pblk, struct ppa_addr *ppas, */ int pblk_write_to_cache(struct pblk *pblk, struct bio *bio, unsigned long flags); -int pblk_write_gc_to_cache(struct pblk *pblk, void *data, u64 *lba_list, - unsigned int nr_entries, unsigned int nr_rec_entries, - struct pblk_line *gc_line, unsigned long flags); +int pblk_write_gc_to_cache(struct pblk *pblk, struct pblk_gc_rq *gc_rq); /* * pblk map @@ -797,9 +805,7 @@ void pblk_write_should_kick(struct pblk *pblk); */ extern struct bio_set *pblk_bio_set; int pblk_submit_read(struct pblk *pblk, struct bio *bio); -int pblk_submit_read_gc(struct pblk *pblk, u64 *lba_list, void *data, - unsigned int nr_secs, unsigned int *secs_to_gc, - struct pblk_line *line); +int pblk_submit_read_gc(struct pblk *pblk, struct pblk_gc_rq *gc_rq); /* * pblk recovery */ @@ -815,7 +821,7 @@ int pblk_recov_setup_rq(struct pblk *pblk, struct pblk_c_ctx *c_ctx, * pblk gc */ #define PBLK_GC_MAX_READERS 8 /* Max number of outstanding GC reader jobs */ -#define PBLK_GC_W_QD 128 /* Queue depth for inflight GC write I/Os */ +#define PBLK_GC_RQ_QD 128 /* Queue depth for inflight GC requests */ #define PBLK_GC_L_QD 4 /* Queue depth for inflight GC lines */ #define PBLK_GC_RSV_LINE 1 /* Reserved lines for GC */ @@ -824,7 +830,7 @@ void pblk_gc_exit(struct pblk *pblk); void pblk_gc_should_start(struct pblk *pblk); void pblk_gc_should_stop(struct pblk *pblk); void pblk_gc_should_kick(struct pblk *pblk); -void pblk_gc_kick(struct pblk *pblk); +void pblk_gc_free_full_lines(struct pblk *pblk); void pblk_gc_sysfs_state_show(struct pblk *pblk, int *gc_enabled, int *gc_active); int pblk_gc_sysfs_force(struct pblk *pblk, int force); @@ -834,8 +840,8 @@ int pblk_gc_sysfs_force(struct pblk *pblk, int force); */ void pblk_rl_init(struct pblk_rl *rl, int budget); void pblk_rl_free(struct pblk_rl *rl); +void pblk_rl_update_rates(struct pblk_rl *rl); int pblk_rl_high_thrs(struct pblk_rl *rl); -int pblk_rl_low_thrs(struct pblk_rl *rl); unsigned long pblk_rl_nr_free_blks(struct pblk_rl *rl); int pblk_rl_user_may_insert(struct pblk_rl *rl, int nr_entries); void pblk_rl_inserted(struct pblk_rl *rl, int nr_entries); @@ -843,10 +849,9 @@ void pblk_rl_user_in(struct pblk_rl *rl, int nr_entries); int pblk_rl_gc_may_insert(struct pblk_rl *rl, int nr_entries); void pblk_rl_gc_in(struct pblk_rl *rl, int nr_entries); void pblk_rl_out(struct pblk_rl *rl, int nr_user, int nr_gc); -int pblk_rl_sysfs_rate_show(struct pblk_rl *rl); +int pblk_rl_max_io(struct pblk_rl *rl); void pblk_rl_free_lines_inc(struct pblk_rl *rl, struct pblk_line *line); void pblk_rl_free_lines_dec(struct pblk_rl *rl, struct pblk_line *line); -void pblk_rl_set_space_limit(struct pblk_rl *rl, int entries_left); int pblk_rl_is_limit(struct pblk_rl *rl); /* @@ -892,13 +897,7 @@ static inline void *emeta_to_vsc(struct pblk *pblk, struct line_emeta *emeta) static inline int pblk_line_vsc(struct pblk_line *line) { - int vsc; - - spin_lock(&line->lock); - vsc = le32_to_cpu(*line->vsc); - spin_unlock(&line->lock); - - return vsc; + return le32_to_cpu(*line->vsc); } #define NVM_MEM_PAGE_WRITE (8) @@ -1140,7 +1139,7 @@ static inline int pblk_set_progr_mode(struct pblk *pblk, int type) flags = geo->plane_mode >> 1; - if (type == WRITE) + if (type == PBLK_WRITE) flags |= NVM_IO_SCRAMBLE_ENABLE; return flags; @@ -1200,7 +1199,6 @@ static inline void pblk_print_failed_rqd(struct pblk *pblk, struct nvm_rq *rqd, pr_err("error:%d, ppa_status:%llx\n", error, rqd->ppa_status); } -#endif static inline int pblk_boundary_ppa_checks(struct nvm_tgt_dev *tgt_dev, struct ppa_addr *ppas, int nr_ppas) @@ -1221,14 +1219,50 @@ static inline int pblk_boundary_ppa_checks(struct nvm_tgt_dev *tgt_dev, ppa->g.sec < geo->sec_per_pg) continue; -#ifdef CONFIG_NVM_DEBUG print_ppa(ppa, "boundary", i); -#endif + return 1; } return 0; } +static inline int pblk_check_io(struct pblk *pblk, struct nvm_rq *rqd) +{ + struct nvm_tgt_dev *dev = pblk->dev; + struct ppa_addr *ppa_list; + + ppa_list = (rqd->nr_ppas > 1) ? rqd->ppa_list : &rqd->ppa_addr; + + if (pblk_boundary_ppa_checks(dev, ppa_list, rqd->nr_ppas)) { + WARN_ON(1); + return -EINVAL; + } + + if (rqd->opcode == NVM_OP_PWRITE) { + struct pblk_line *line; + struct ppa_addr ppa; + int i; + + for (i = 0; i < rqd->nr_ppas; i++) { + ppa = ppa_list[i]; + line = &pblk->lines[pblk_dev_ppa_to_line(ppa)]; + + spin_lock(&line->lock); + if (line->state != PBLK_LINESTATE_OPEN) { + pr_err("pblk: bad ppa: line:%d,state:%d\n", + line->id, line->state); + WARN_ON(1); + spin_unlock(&line->lock); + return -EINVAL; + } + spin_unlock(&line->lock); + } + } + + return 0; +} +#endif + static inline int pblk_boundary_paddr_checks(struct pblk *pblk, u64 paddr) { struct pblk_line_meta *lm = &pblk->lm; |