diff options
Diffstat (limited to 'drivers/lightnvm')
-rw-r--r-- | drivers/lightnvm/pblk-core.c | 7 | ||||
-rw-r--r-- | drivers/lightnvm/pblk-gc.c | 439 | ||||
-rw-r--r-- | drivers/lightnvm/pblk-rb.c | 21 | ||||
-rw-r--r-- | drivers/lightnvm/pblk-rl.c | 62 | ||||
-rw-r--r-- | drivers/lightnvm/pblk-sysfs.c | 51 | ||||
-rw-r--r-- | drivers/lightnvm/pblk.h | 66 |
6 files changed, 368 insertions, 278 deletions
diff --git a/drivers/lightnvm/pblk-core.c b/drivers/lightnvm/pblk-core.c index ed41cd7700b3..ba3b88f0e1f7 100644 --- a/drivers/lightnvm/pblk-core.c +++ b/drivers/lightnvm/pblk-core.c @@ -302,12 +302,12 @@ struct list_head *pblk_line_gc_list(struct pblk *pblk, struct pblk_line *line) line->gc_group = PBLK_LINEGC_FULL; move_list = &l_mg->gc_full_list; } - } else if (vsc < lm->mid_thrs) { + } else if (vsc < lm->high_thrs) { if (line->gc_group != PBLK_LINEGC_HIGH) { line->gc_group = PBLK_LINEGC_HIGH; move_list = &l_mg->gc_high_list; } - } else if (vsc < lm->high_thrs) { + } else if (vsc < lm->mid_thrs) { if (line->gc_group != PBLK_LINEGC_MID) { line->gc_group = PBLK_LINEGC_MID; move_list = &l_mg->gc_mid_list; @@ -1199,6 +1199,7 @@ retry_get: if (pblk_line_prepare(pblk, line)) { pr_err("pblk: failed to prepare line %d\n", line->id); list_add(&line->list, &l_mg->free_list); + l_mg->nr_free_lines++; return NULL; } @@ -1465,6 +1466,8 @@ void pblk_line_close(struct pblk *pblk, struct pblk_line *line) spin_unlock(&line->lock); spin_unlock(&l_mg->gc_lock); + + pblk_gc_should_kick(pblk); } void pblk_line_close_meta(struct pblk *pblk, struct pblk_line *line) diff --git a/drivers/lightnvm/pblk-gc.c b/drivers/lightnvm/pblk-gc.c index f811e4ca63f4..1d289242ab92 100644 --- a/drivers/lightnvm/pblk-gc.c +++ b/drivers/lightnvm/pblk-gc.c @@ -21,7 +21,6 @@ static void pblk_gc_free_gc_rq(struct pblk_gc_rq *gc_rq) { kfree(gc_rq->data); - kfree(gc_rq->lba_list); kfree(gc_rq); } @@ -37,10 +36,8 @@ static int pblk_gc_write(struct pblk *pblk) return 1; } - list_for_each_entry_safe(gc_rq, tgc_rq, &gc->w_list, list) { - list_move_tail(&gc_rq->list, &w_list); - gc->w_entries--; - } + list_cut_position(&w_list, &gc->w_list, gc->w_list.prev); + gc->w_entries = 0; spin_unlock(&gc->w_lock); list_for_each_entry_safe(gc_rq, tgc_rq, &w_list, list) { @@ -48,9 +45,8 @@ static int pblk_gc_write(struct pblk *pblk) gc_rq->nr_secs, gc_rq->secs_to_gc, gc_rq->line, PBLK_IOTYPE_GC); - kref_put(&gc_rq->line->ref, pblk_line_put); - list_del(&gc_rq->list); + kref_put(&gc_rq->line->ref, pblk_line_put); pblk_gc_free_gc_rq(gc_rq); } @@ -66,52 +62,41 @@ static void pblk_gc_writer_kick(struct pblk_gc *gc) * Responsible for managing all memory related to a gc request. Also in case of * failure */ -static int pblk_gc_move_valid_secs(struct pblk *pblk, struct pblk_line *line, - u64 *lba_list, unsigned int nr_secs) +static int pblk_gc_move_valid_secs(struct pblk *pblk, struct pblk_gc_rq *gc_rq) { struct nvm_tgt_dev *dev = pblk->dev; struct nvm_geo *geo = &dev->geo; struct pblk_gc *gc = &pblk->gc; - struct pblk_gc_rq *gc_rq; + struct pblk_line *line = gc_rq->line; void *data; unsigned int secs_to_gc; - int ret = NVM_IO_OK; + int ret = 0; - data = kmalloc(nr_secs * geo->sec_size, GFP_KERNEL); + data = kmalloc(gc_rq->nr_secs * geo->sec_size, GFP_KERNEL); if (!data) { - ret = NVM_IO_ERR; - goto free_lba_list; + ret = -ENOMEM; + goto out; } /* Read from GC victim block */ - if (pblk_submit_read_gc(pblk, lba_list, data, nr_secs, + if (pblk_submit_read_gc(pblk, gc_rq->lba_list, data, gc_rq->nr_secs, &secs_to_gc, line)) { - ret = NVM_IO_ERR; + ret = -EFAULT; goto free_data; } if (!secs_to_gc) - goto free_data; - - gc_rq = kmalloc(sizeof(struct pblk_gc_rq), GFP_KERNEL); - if (!gc_rq) { - ret = NVM_IO_ERR; - goto free_data; - } + goto free_rq; - gc_rq->line = line; gc_rq->data = data; - gc_rq->lba_list = lba_list; - gc_rq->nr_secs = nr_secs; gc_rq->secs_to_gc = secs_to_gc; - kref_get(&line->ref); - retry: spin_lock(&gc->w_lock); - if (gc->w_entries > 256) { + if (gc->w_entries >= PBLK_GC_W_QD) { spin_unlock(&gc->w_lock); - usleep_range(256, 1024); + pblk_gc_writer_kick(&pblk->gc); + usleep_range(128, 256); goto retry; } gc->w_entries++; @@ -120,13 +105,14 @@ retry: pblk_gc_writer_kick(&pblk->gc); - return NVM_IO_OK; + return 0; +free_rq: + kfree(gc_rq); free_data: kfree(data); -free_lba_list: - kfree(lba_list); - +out: + kref_put(&line->ref, pblk_line_put); return ret; } @@ -150,20 +136,52 @@ static void pblk_put_line_back(struct pblk *pblk, struct pblk_line *line) static void pblk_gc_line_ws(struct work_struct *work) { + struct pblk_line_ws *line_rq_ws = container_of(work, + struct pblk_line_ws, ws); + struct pblk *pblk = line_rq_ws->pblk; + struct pblk_gc *gc = &pblk->gc; + struct pblk_line *line = line_rq_ws->line; + struct pblk_gc_rq *gc_rq = line_rq_ws->priv; + + up(&gc->gc_sem); + + if (pblk_gc_move_valid_secs(pblk, gc_rq)) { + pr_err("pblk: could not GC all sectors: line:%d (%d/%d)\n", + line->id, *line->vsc, + gc_rq->nr_secs); + } + + mempool_free(line_rq_ws, pblk->line_ws_pool); +} + +static void pblk_gc_line_prepare_ws(struct work_struct *work) +{ struct pblk_line_ws *line_ws = container_of(work, struct pblk_line_ws, ws); struct pblk *pblk = line_ws->pblk; - struct pblk_line_mgmt *l_mg = &pblk->l_mg; struct pblk_line *line = line_ws->line; + struct pblk_line_mgmt *l_mg = &pblk->l_mg; struct pblk_line_meta *lm = &pblk->lm; - struct line_emeta *emeta_buf = line_ws->priv; + struct pblk_gc *gc = &pblk->gc; + struct line_emeta *emeta_buf; + struct pblk_line_ws *line_rq_ws; + struct pblk_gc_rq *gc_rq; __le64 *lba_list; - u64 *gc_list; - int sec_left; - int nr_ppas, bit; - int put_line = 1; + int sec_left, nr_secs, bit; + int ret; - pr_debug("pblk: line '%d' being reclaimed for GC\n", line->id); + emeta_buf = pblk_malloc(lm->emeta_len[0], l_mg->emeta_alloc_type, + GFP_KERNEL); + if (!emeta_buf) { + pr_err("pblk: cannot use GC emeta\n"); + return; + } + + ret = pblk_line_read_emeta(pblk, line, emeta_buf); + if (ret) { + pr_err("pblk: line %d read emeta failed (%d)\n", line->id, ret); + goto fail_free_emeta; + } /* If this read fails, it means that emeta is corrupted. For now, leave * the line untouched. TODO: Implement a recovery routine that scans and @@ -172,119 +190,124 @@ static void pblk_gc_line_ws(struct work_struct *work) lba_list = pblk_recov_get_lba_list(pblk, emeta_buf); if (!lba_list) { pr_err("pblk: could not interpret emeta (line %d)\n", line->id); - goto out; - } - - spin_lock(&line->lock); - sec_left = le32_to_cpu(*line->vsc); - if (!sec_left) { - /* Lines are erased before being used (l_mg->data_/log_next) */ - spin_unlock(&line->lock); - goto out; + goto fail_free_emeta; } - spin_unlock(&line->lock); + sec_left = pblk_line_vsc(line); if (sec_left < 0) { pr_err("pblk: corrupted GC line (%d)\n", line->id); - put_line = 0; - pblk_put_line_back(pblk, line); - goto out; + goto fail_free_emeta; } bit = -1; next_rq: - gc_list = kmalloc_array(pblk->max_write_pgs, sizeof(u64), GFP_KERNEL); - if (!gc_list) { - put_line = 0; - pblk_put_line_back(pblk, line); - goto out; - } + gc_rq = kmalloc(sizeof(struct pblk_gc_rq), GFP_KERNEL); + if (!gc_rq) + goto fail_free_emeta; - nr_ppas = 0; + nr_secs = 0; do { bit = find_next_zero_bit(line->invalid_bitmap, lm->sec_per_line, bit + 1); if (bit > line->emeta_ssec) break; - gc_list[nr_ppas++] = le64_to_cpu(lba_list[bit]); - } while (nr_ppas < pblk->max_write_pgs); + gc_rq->lba_list[nr_secs++] = le64_to_cpu(lba_list[bit]); + } while (nr_secs < pblk->max_write_pgs); - if (unlikely(!nr_ppas)) { - kfree(gc_list); + if (unlikely(!nr_secs)) { + kfree(gc_rq); goto out; } - if (pblk_gc_move_valid_secs(pblk, line, gc_list, nr_ppas)) { - pr_err("pblk: could not GC all sectors: line:%d (%d/%d/%d)\n", - line->id, *line->vsc, - nr_ppas, nr_ppas); - put_line = 0; - pblk_put_line_back(pblk, line); - goto out; - } + gc_rq->nr_secs = nr_secs; + gc_rq->line = line; + + line_rq_ws = mempool_alloc(pblk->line_ws_pool, GFP_KERNEL); + if (!line_rq_ws) + goto fail_free_gc_rq; + + line_rq_ws->pblk = pblk; + line_rq_ws->line = line; + line_rq_ws->priv = gc_rq; + + down(&gc->gc_sem); + kref_get(&line->ref); + + INIT_WORK(&line_rq_ws->ws, pblk_gc_line_ws); + queue_work(gc->gc_line_reader_wq, &line_rq_ws->ws); - sec_left -= nr_ppas; + sec_left -= nr_secs; if (sec_left > 0) goto next_rq; out: pblk_mfree(emeta_buf, l_mg->emeta_alloc_type); mempool_free(line_ws, pblk->line_ws_pool); - atomic_dec(&pblk->gc.inflight_gc); - if (put_line) - kref_put(&line->ref, pblk_line_put); + + kref_put(&line->ref, pblk_line_put); + atomic_dec(&gc->inflight_gc); + + return; + +fail_free_gc_rq: + kfree(gc_rq); +fail_free_emeta: + pblk_mfree(emeta_buf, l_mg->emeta_alloc_type); + pblk_put_line_back(pblk, line); + kref_put(&line->ref, pblk_line_put); + mempool_free(line_ws, pblk->line_ws_pool); + atomic_dec(&gc->inflight_gc); + + pr_err("pblk: Failed to GC line %d\n", line->id); } static int pblk_gc_line(struct pblk *pblk, struct pblk_line *line) { - struct pblk_line_mgmt *l_mg = &pblk->l_mg; - struct pblk_line_meta *lm = &pblk->lm; - struct line_emeta *emeta_buf; + struct pblk_gc *gc = &pblk->gc; struct pblk_line_ws *line_ws; - int ret; - line_ws = mempool_alloc(pblk->line_ws_pool, GFP_KERNEL); - emeta_buf = pblk_malloc(lm->emeta_len[0], l_mg->emeta_alloc_type, - GFP_KERNEL); - if (!emeta_buf) { - pr_err("pblk: cannot use GC emeta\n"); - goto fail_free_ws; - } + pr_debug("pblk: line '%d' being reclaimed for GC\n", line->id); - ret = pblk_line_read_emeta(pblk, line, emeta_buf); - if (ret) { - pr_err("pblk: line %d read emeta failed (%d)\n", line->id, ret); - goto fail_free_emeta; - } + line_ws = mempool_alloc(pblk->line_ws_pool, GFP_KERNEL); + if (!line_ws) + return -ENOMEM; line_ws->pblk = pblk; line_ws->line = line; - line_ws->priv = emeta_buf; - INIT_WORK(&line_ws->ws, pblk_gc_line_ws); - queue_work(pblk->gc.gc_reader_wq, &line_ws->ws); + INIT_WORK(&line_ws->ws, pblk_gc_line_prepare_ws); + queue_work(gc->gc_reader_wq, &line_ws->ws); return 0; - -fail_free_emeta: - pblk_mfree(emeta_buf, l_mg->emeta_alloc_type); -fail_free_ws: - mempool_free(line_ws, pblk->line_ws_pool); - pblk_put_line_back(pblk, line); - - return 1; } -static void pblk_gc_lines(struct pblk *pblk, struct list_head *gc_list) +static int pblk_gc_read(struct pblk *pblk) { - struct pblk_line *line, *tline; + struct pblk_gc *gc = &pblk->gc; + struct pblk_line *line; - list_for_each_entry_safe(line, tline, gc_list, list) { - if (pblk_gc_line(pblk, line)) - pr_err("pblk: failed to GC line %d\n", line->id); - list_del(&line->list); + spin_lock(&gc->r_lock); + if (list_empty(&gc->r_list)) { + spin_unlock(&gc->r_lock); + return 1; } + + line = list_first_entry(&gc->r_list, struct pblk_line, list); + list_del(&line->list); + spin_unlock(&gc->r_lock); + + pblk_gc_kick(pblk); + + if (pblk_gc_line(pblk, line)) + pr_err("pblk: failed to GC line %d\n", line->id); + + return 0; +} + +static void pblk_gc_reader_kick(struct pblk_gc *gc) +{ + wake_up_process(gc->gc_reader_ts); } static struct pblk_line *pblk_gc_get_victim_line(struct pblk *pblk, @@ -301,6 +324,17 @@ static struct pblk_line *pblk_gc_get_victim_line(struct pblk *pblk, return victim; } +static bool pblk_gc_should_run(struct pblk_gc *gc, struct pblk_rl *rl) +{ + unsigned int nr_blocks_free, nr_blocks_need; + + nr_blocks_need = pblk_rl_high_thrs(rl); + nr_blocks_free = pblk_rl_nr_free_blks(rl); + + /* This is not critical, no need to take lock here */ + return ((gc->gc_active) && (nr_blocks_need > nr_blocks_free)); +} + /* * Lines with no valid sectors will be returned to the free list immediately. If * GC is activated - either because the free block count is under the determined @@ -311,71 +345,83 @@ static void pblk_gc_run(struct pblk *pblk) { struct pblk_line_mgmt *l_mg = &pblk->l_mg; struct pblk_gc *gc = &pblk->gc; - struct pblk_line *line, *tline; - unsigned int nr_blocks_free, nr_blocks_need; + struct pblk_line *line; struct list_head *group_list; - int run_gc, gc_group = 0; - int prev_gc = 0; - int inflight_gc = atomic_read(&gc->inflight_gc); - LIST_HEAD(gc_list); + bool run_gc; + int inflight_gc, gc_group = 0, prev_group = 0; + + do { + spin_lock(&l_mg->gc_lock); + if (list_empty(&l_mg->gc_full_list)) { + spin_unlock(&l_mg->gc_lock); + break; + } + + line = list_first_entry(&l_mg->gc_full_list, + struct pblk_line, list); - spin_lock(&l_mg->gc_lock); - list_for_each_entry_safe(line, tline, &l_mg->gc_full_list, list) { spin_lock(&line->lock); WARN_ON(line->state != PBLK_LINESTATE_CLOSED); line->state = PBLK_LINESTATE_GC; spin_unlock(&line->lock); list_del(&line->list); + spin_unlock(&l_mg->gc_lock); + kref_put(&line->ref, pblk_line_put); - } - spin_unlock(&l_mg->gc_lock); + } while (1); - nr_blocks_need = pblk_rl_gc_thrs(&pblk->rl); - nr_blocks_free = pblk_rl_nr_free_blks(&pblk->rl); - run_gc = (nr_blocks_need > nr_blocks_free || gc->gc_forced); + run_gc = pblk_gc_should_run(&pblk->gc, &pblk->rl); + if (!run_gc || (atomic_read(&gc->inflight_gc) >= PBLK_GC_L_QD)) + return; next_gc_group: group_list = l_mg->gc_lists[gc_group++]; - spin_lock(&l_mg->gc_lock); - while (run_gc && !list_empty(group_list)) { - /* No need to queue up more GC lines than we can handle */ - if (!run_gc || inflight_gc > gc->gc_jobs_active) { + + do { + spin_lock(&l_mg->gc_lock); + if (list_empty(group_list)) { spin_unlock(&l_mg->gc_lock); - pblk_gc_lines(pblk, &gc_list); - return; + break; } line = pblk_gc_get_victim_line(pblk, group_list); - nr_blocks_free += atomic_read(&line->blk_in_line); spin_lock(&line->lock); WARN_ON(line->state != PBLK_LINESTATE_CLOSED); line->state = PBLK_LINESTATE_GC; - list_move_tail(&line->list, &gc_list); - atomic_inc(&gc->inflight_gc); - inflight_gc++; spin_unlock(&line->lock); - prev_gc = 1; - run_gc = (nr_blocks_need > nr_blocks_free || gc->gc_forced); - } - spin_unlock(&l_mg->gc_lock); + list_del(&line->list); + spin_unlock(&l_mg->gc_lock); + + spin_lock(&gc->r_lock); + list_add_tail(&line->list, &gc->r_list); + spin_unlock(&gc->r_lock); + + inflight_gc = atomic_inc_return(&gc->inflight_gc); + pblk_gc_reader_kick(gc); - pblk_gc_lines(pblk, &gc_list); + prev_group = 1; - if (!prev_gc && pblk->rl.rb_state > gc_group && - gc_group < PBLK_NR_GC_LISTS) + /* No need to queue up more GC lines than we can handle */ + run_gc = pblk_gc_should_run(&pblk->gc, &pblk->rl); + if (!run_gc || inflight_gc >= PBLK_GC_L_QD) + break; + } while (1); + + if (!prev_group && pblk->rl.rb_state > gc_group && + gc_group < PBLK_GC_NR_LISTS) goto next_gc_group; } - -static void pblk_gc_kick(struct pblk *pblk) +void pblk_gc_kick(struct pblk *pblk) { struct pblk_gc *gc = &pblk->gc; wake_up_process(gc->gc_ts); pblk_gc_writer_kick(gc); + pblk_gc_reader_kick(gc); mod_timer(&gc->gc_timer, jiffies + msecs_to_jiffies(GC_TIME_MSECS)); } @@ -413,42 +459,34 @@ static int pblk_gc_writer_ts(void *data) return 0; } -static void pblk_gc_start(struct pblk *pblk) +static int pblk_gc_reader_ts(void *data) { - pblk->gc.gc_active = 1; + struct pblk *pblk = data; - pr_debug("pblk: gc start\n"); + while (!kthread_should_stop()) { + if (!pblk_gc_read(pblk)) + continue; + set_current_state(TASK_INTERRUPTIBLE); + io_schedule(); + } + + return 0; } -int pblk_gc_status(struct pblk *pblk) +static void pblk_gc_start(struct pblk *pblk) { - struct pblk_gc *gc = &pblk->gc; - int ret; - - spin_lock(&gc->lock); - ret = gc->gc_active; - spin_unlock(&gc->lock); - - return ret; + pblk->gc.gc_active = 1; + pr_debug("pblk: gc start\n"); } -static void __pblk_gc_should_start(struct pblk *pblk) +void pblk_gc_should_start(struct pblk *pblk) { struct pblk_gc *gc = &pblk->gc; - lockdep_assert_held(&gc->lock); - if (gc->gc_enabled && !gc->gc_active) pblk_gc_start(pblk); -} - -void pblk_gc_should_start(struct pblk *pblk) -{ - struct pblk_gc *gc = &pblk->gc; - spin_lock(&gc->lock); - __pblk_gc_should_start(pblk); - spin_unlock(&gc->lock); + pblk_gc_kick(pblk); } /* @@ -457,10 +495,7 @@ void pblk_gc_should_start(struct pblk *pblk) */ static void pblk_gc_stop(struct pblk *pblk, int flush_wq) { - spin_lock(&pblk->gc.lock); pblk->gc.gc_active = 0; - spin_unlock(&pblk->gc.lock); - pr_debug("pblk: gc stop\n"); } @@ -483,20 +518,25 @@ void pblk_gc_sysfs_state_show(struct pblk *pblk, int *gc_enabled, spin_unlock(&gc->lock); } -void pblk_gc_sysfs_force(struct pblk *pblk, int force) +int pblk_gc_sysfs_force(struct pblk *pblk, int force) { struct pblk_gc *gc = &pblk->gc; - int rsv = 0; + + if (force < 0 || force > 1) + return -EINVAL; spin_lock(&gc->lock); - if (force) { - gc->gc_enabled = 1; - rsv = 64; - } - pblk_rl_set_gc_rsc(&pblk->rl, rsv); gc->gc_forced = force; - __pblk_gc_should_start(pblk); + + if (force) + gc->gc_enabled = 1; + else + gc->gc_enabled = 0; spin_unlock(&gc->lock); + + pblk_gc_should_start(pblk); + + return 0; } int pblk_gc_init(struct pblk *pblk) @@ -518,30 +558,58 @@ int pblk_gc_init(struct pblk *pblk) goto fail_free_main_kthread; } + gc->gc_reader_ts = kthread_create(pblk_gc_reader_ts, pblk, + "pblk-gc-reader-ts"); + if (IS_ERR(gc->gc_reader_ts)) { + pr_err("pblk: could not allocate GC reader kthread\n"); + ret = PTR_ERR(gc->gc_reader_ts); + goto fail_free_writer_kthread; + } + setup_timer(&gc->gc_timer, pblk_gc_timer, (unsigned long)pblk); mod_timer(&gc->gc_timer, jiffies + msecs_to_jiffies(GC_TIME_MSECS)); gc->gc_active = 0; gc->gc_forced = 0; gc->gc_enabled = 1; - gc->gc_jobs_active = 8; gc->w_entries = 0; atomic_set(&gc->inflight_gc, 0); - gc->gc_reader_wq = alloc_workqueue("pblk-gc-reader-wq", - WQ_MEM_RECLAIM | WQ_UNBOUND, gc->gc_jobs_active); + /* Workqueue that reads valid sectors from a line and submit them to the + * GC writer to be recycled. + */ + gc->gc_line_reader_wq = alloc_workqueue("pblk-gc-line-reader-wq", + WQ_MEM_RECLAIM | WQ_UNBOUND, PBLK_GC_MAX_READERS); + if (!gc->gc_line_reader_wq) { + pr_err("pblk: could not allocate GC line reader workqueue\n"); + ret = -ENOMEM; + goto fail_free_reader_kthread; + } + + /* Workqueue that prepare lines for GC */ + gc->gc_reader_wq = alloc_workqueue("pblk-gc-line_wq", + WQ_MEM_RECLAIM | WQ_UNBOUND, 1); if (!gc->gc_reader_wq) { pr_err("pblk: could not allocate GC reader workqueue\n"); ret = -ENOMEM; - goto fail_free_writer_kthread; + goto fail_free_reader_line_wq; } spin_lock_init(&gc->lock); spin_lock_init(&gc->w_lock); + spin_lock_init(&gc->r_lock); + + sema_init(&gc->gc_sem, 128); + INIT_LIST_HEAD(&gc->w_list); + INIT_LIST_HEAD(&gc->r_list); return 0; +fail_free_reader_line_wq: + destroy_workqueue(gc->gc_line_reader_wq); +fail_free_reader_kthread: + kthread_stop(gc->gc_reader_ts); fail_free_writer_kthread: kthread_stop(gc->gc_writer_ts); fail_free_main_kthread: @@ -555,6 +623,7 @@ void pblk_gc_exit(struct pblk *pblk) struct pblk_gc *gc = &pblk->gc; flush_workqueue(gc->gc_reader_wq); + flush_workqueue(gc->gc_line_reader_wq); del_timer(&gc->gc_timer); pblk_gc_stop(pblk, 1); @@ -562,9 +631,15 @@ void pblk_gc_exit(struct pblk *pblk) if (gc->gc_ts) kthread_stop(gc->gc_ts); - if (pblk->gc.gc_reader_wq) - destroy_workqueue(pblk->gc.gc_reader_wq); + if (gc->gc_reader_wq) + destroy_workqueue(gc->gc_reader_wq); + + if (gc->gc_line_reader_wq) + destroy_workqueue(gc->gc_line_reader_wq); if (gc->gc_writer_ts) kthread_stop(gc->gc_writer_ts); + + if (gc->gc_reader_ts) + kthread_stop(gc->gc_reader_ts); } diff --git a/drivers/lightnvm/pblk-rb.c b/drivers/lightnvm/pblk-rb.c index d293af12aa7a..50886878568b 100644 --- a/drivers/lightnvm/pblk-rb.c +++ b/drivers/lightnvm/pblk-rb.c @@ -199,12 +199,22 @@ static int __pblk_rb_update_l2p(struct pblk_rb *rb, unsigned int *l2p_upd, struct pblk_line *line; struct pblk_rb_entry *entry; struct pblk_w_ctx *w_ctx; + unsigned int user_io = 0, gc_io = 0; unsigned int i; + int flags; for (i = 0; i < to_update; i++) { entry = &rb->entries[*l2p_upd]; w_ctx = &entry->w_ctx; + flags = READ_ONCE(entry->w_ctx.flags); + if (flags & PBLK_IOTYPE_USER) + user_io++; + else if (flags & PBLK_IOTYPE_GC) + gc_io++; + else + WARN(1, "pblk: unknown IO type\n"); + pblk_update_map_dev(pblk, w_ctx->lba, w_ctx->ppa, entry->cacheline); @@ -214,6 +224,8 @@ static int __pblk_rb_update_l2p(struct pblk_rb *rb, unsigned int *l2p_upd, *l2p_upd = (*l2p_upd + 1) & (rb->nr_entries - 1); } + pblk_rl_out(&pblk->rl, user_io, gc_io); + return 0; } @@ -531,7 +543,6 @@ unsigned int pblk_rb_read_to_bio(struct pblk_rb *rb, struct nvm_rq *rqd, struct pblk_rb_entry *entry; struct page *page; unsigned int pad = 0, to_read = nr_entries; - unsigned int user_io = 0, gc_io = 0; unsigned int i; int flags; @@ -555,13 +566,6 @@ try: if (!(flags & PBLK_WRITTEN_DATA)) goto try; - if (flags & PBLK_IOTYPE_USER) - user_io++; - else if (flags & PBLK_IOTYPE_GC) - gc_io++; - else - WARN(1, "pblk: unknown IO type\n"); - page = virt_to_page(entry->data); if (!page) { pr_err("pblk: could not allocate write bio page\n"); @@ -613,7 +617,6 @@ try: } } - pblk_rl_out(&pblk->rl, user_io, gc_io); #ifdef CONFIG_NVM_DEBUG atomic_long_add(pad, &((struct pblk *) (container_of(rb, struct pblk, rwb)))->padded_writes); diff --git a/drivers/lightnvm/pblk-rl.c b/drivers/lightnvm/pblk-rl.c index ab7cbb144f3f..52068a1807a8 100644 --- a/drivers/lightnvm/pblk-rl.c +++ b/drivers/lightnvm/pblk-rl.c @@ -27,7 +27,7 @@ int pblk_rl_user_may_insert(struct pblk_rl *rl, int nr_entries) { int rb_user_cnt = atomic_read(&rl->rb_user_cnt); - return (!(rb_user_cnt + nr_entries > rl->rb_user_max)); + return (!(rb_user_cnt >= rl->rb_user_max)); } int pblk_rl_gc_may_insert(struct pblk_rl *rl, int nr_entries) @@ -37,7 +37,7 @@ int pblk_rl_gc_may_insert(struct pblk_rl *rl, int nr_entries) /* If there is no user I/O let GC take over space on the write buffer */ rb_user_active = READ_ONCE(rl->rb_user_active); - return (!(rb_gc_cnt + nr_entries > rl->rb_gc_max && rb_user_active)); + return (!(rb_gc_cnt >= rl->rb_gc_max && rb_user_active)); } void pblk_rl_user_in(struct pblk_rl *rl, int nr_entries) @@ -77,33 +77,32 @@ static int pblk_rl_update_rates(struct pblk_rl *rl, unsigned long max) unsigned long free_blocks = pblk_rl_nr_free_blks(rl); if (free_blocks >= rl->high) { - rl->rb_user_max = max - rl->rb_gc_rsv; - rl->rb_gc_max = rl->rb_gc_rsv; + rl->rb_user_max = max; + rl->rb_gc_max = 0; rl->rb_state = PBLK_RL_HIGH; } else if (free_blocks < rl->high) { int shift = rl->high_pw - rl->rb_windows_pw; int user_windows = free_blocks >> shift; int user_max = user_windows << PBLK_MAX_REQ_ADDRS_PW; - int gc_max; rl->rb_user_max = user_max; - gc_max = max - rl->rb_user_max; - rl->rb_gc_max = max(gc_max, rl->rb_gc_rsv); - - if (free_blocks > rl->low) - rl->rb_state = PBLK_RL_MID; - else - rl->rb_state = PBLK_RL_LOW; + rl->rb_gc_max = max - user_max; + + if (free_blocks <= rl->rsv_blocks) { + rl->rb_user_max = 0; + rl->rb_gc_max = max; + } + + /* In the worst case, we will need to GC lines in the low list + * (high valid sector count). If there are lines to GC on high + * or mid lists, these will be prioritized + */ + rl->rb_state = PBLK_RL_LOW; } return rl->rb_state; } -void pblk_rl_set_gc_rsc(struct pblk_rl *rl, int rsv) -{ - rl->rb_gc_rsv = rl->rb_gc_max = rsv; -} - void pblk_rl_free_lines_inc(struct pblk_rl *rl, struct pblk_line *line) { struct pblk *pblk = container_of(rl, struct pblk, rl); @@ -122,11 +121,15 @@ void pblk_rl_free_lines_inc(struct pblk_rl *rl, struct pblk_line *line) void pblk_rl_free_lines_dec(struct pblk_rl *rl, struct pblk_line *line) { - struct pblk *pblk = container_of(rl, struct pblk, rl); int blk_in_line = atomic_read(&line->blk_in_line); - int ret; atomic_sub(blk_in_line, &rl->free_blocks); +} + +void pblk_gc_should_kick(struct pblk *pblk) +{ + struct pblk_rl *rl = &pblk->rl; + int ret; /* Rates will not change that often - no need to lock update */ ret = pblk_rl_update_rates(rl, rl->rb_budget); @@ -136,11 +139,16 @@ void pblk_rl_free_lines_dec(struct pblk_rl *rl, struct pblk_line *line) pblk_gc_should_stop(pblk); } -int pblk_rl_gc_thrs(struct pblk_rl *rl) +int pblk_rl_high_thrs(struct pblk_rl *rl) { return rl->high; } +int pblk_rl_low_thrs(struct pblk_rl *rl) +{ + return rl->low; +} + int pblk_rl_sysfs_rate_show(struct pblk_rl *rl) { return rl->rb_user_max; @@ -161,15 +169,23 @@ void pblk_rl_free(struct pblk_rl *rl) void pblk_rl_init(struct pblk_rl *rl, int budget) { + struct pblk *pblk = container_of(rl, struct pblk, rl); + struct pblk_line_meta *lm = &pblk->lm; + int min_blocks = lm->blk_per_line * PBLK_GC_RSV_LINE; unsigned int rb_windows; rl->high = rl->total_blocks / PBLK_USER_HIGH_THRS; - rl->low = rl->total_blocks / PBLK_USER_LOW_THRS; rl->high_pw = get_count_order(rl->high); + rl->low = rl->total_blocks / PBLK_USER_LOW_THRS; + if (rl->low < min_blocks) + rl->low = min_blocks; + + rl->rsv_blocks = min_blocks; + /* This will always be a power-of-2 */ rb_windows = budget / PBLK_MAX_REQ_ADDRS; - rl->rb_windows_pw = get_count_order(rb_windows) + 1; + rl->rb_windows_pw = get_count_order(rb_windows); /* To start with, all buffer is available to user I/O writers */ rl->rb_budget = budget; @@ -180,5 +196,7 @@ void pblk_rl_init(struct pblk_rl *rl, int budget) atomic_set(&rl->rb_gc_cnt, 0); setup_timer(&rl->u_timer, pblk_rl_u_timer, (unsigned long)rl); + rl->rb_user_active = 0; + rl->rb_gc_active = 0; } diff --git a/drivers/lightnvm/pblk-sysfs.c b/drivers/lightnvm/pblk-sysfs.c index e1e92c9498a9..d9f7f13a38cc 100644 --- a/drivers/lightnvm/pblk-sysfs.c +++ b/drivers/lightnvm/pblk-sysfs.c @@ -49,30 +49,26 @@ static ssize_t pblk_sysfs_luns_show(struct pblk *pblk, char *page) static ssize_t pblk_sysfs_rate_limiter(struct pblk *pblk, char *page) { - struct nvm_tgt_dev *dev = pblk->dev; - struct nvm_geo *geo = &dev->geo; int free_blocks, total_blocks; int rb_user_max, rb_user_cnt; - int rb_gc_max, rb_gc_rsv, rb_gc_cnt, rb_budget, rb_state; + int rb_gc_max, rb_gc_cnt, rb_budget, rb_state; free_blocks = atomic_read(&pblk->rl.free_blocks); rb_user_max = pblk->rl.rb_user_max; rb_user_cnt = atomic_read(&pblk->rl.rb_user_cnt); rb_gc_max = pblk->rl.rb_gc_max; - rb_gc_rsv = pblk->rl.rb_gc_rsv; rb_gc_cnt = atomic_read(&pblk->rl.rb_gc_cnt); rb_budget = pblk->rl.rb_budget; rb_state = pblk->rl.rb_state; - total_blocks = geo->blks_per_lun * geo->nr_luns; + total_blocks = pblk->rl.total_blocks; return snprintf(page, PAGE_SIZE, - "u:%u/%u,gc:%u/%u/%u(%u/%u)(stop:<%u,full:>%u,free:%d/%d)-%d\n", + "u:%u/%u,gc:%u/%u(%u/%u)(stop:<%u,full:>%u,free:%d/%d)-%d\n", rb_user_cnt, rb_user_max, rb_gc_cnt, rb_gc_max, - rb_gc_rsv, rb_state, rb_budget, pblk->rl.low, @@ -237,7 +233,8 @@ static ssize_t pblk_sysfs_lines(struct pblk *pblk, char *page) spin_unlock(&l_mg->free_lock); if (nr_free_lines != free_line_cnt) - pr_err("pblk: corrupted free line list\n"); + pr_err("pblk: corrupted free line list:%d/%d\n", + nr_free_lines, free_line_cnt); sz = snprintf(page, PAGE_SIZE - sz, "line: nluns:%d, nblks:%d, nsecs:%d\n", @@ -319,32 +316,11 @@ static ssize_t pblk_sysfs_stats_debug(struct pblk *pblk, char *page) } #endif -static ssize_t pblk_sysfs_rate_store(struct pblk *pblk, const char *page, - size_t len) -{ - struct pblk_gc *gc = &pblk->gc; - size_t c_len; - int value; - - c_len = strcspn(page, "\n"); - if (c_len >= len) - return -EINVAL; - - if (kstrtouint(page, 0, &value)) - return -EINVAL; - - spin_lock(&gc->lock); - pblk_rl_set_gc_rsc(&pblk->rl, value); - spin_unlock(&gc->lock); - - return len; -} - static ssize_t pblk_sysfs_gc_force(struct pblk *pblk, const char *page, size_t len) { size_t c_len; - int force; + int ret, force; c_len = strcspn(page, "\n"); if (c_len >= len) @@ -353,10 +329,7 @@ static ssize_t pblk_sysfs_gc_force(struct pblk *pblk, const char *page, if (kstrtouint(page, 0, &force)) return -EINVAL; - if (force < 0 || force > 1) - return -EINVAL; - - pblk_gc_sysfs_force(pblk, force); + ret = pblk_gc_sysfs_force(pblk, force); return len; } @@ -434,11 +407,6 @@ static struct attribute sys_max_sec_per_write = { .mode = 0644, }; -static struct attribute sys_gc_rl_max = { - .name = "gc_rl_max", - .mode = 0200, -}; - #ifdef CONFIG_NVM_DEBUG static struct attribute sys_stats_debug_attr = { .name = "stats", @@ -453,7 +421,6 @@ static struct attribute *pblk_attrs[] = { &sys_gc_state, &sys_gc_force, &sys_max_sec_per_write, - &sys_gc_rl_max, &sys_rb_attr, &sys_stats_ppaf_attr, &sys_lines_attr, @@ -499,9 +466,7 @@ static ssize_t pblk_sysfs_store(struct kobject *kobj, struct attribute *attr, { struct pblk *pblk = container_of(kobj, struct pblk, kobj); - if (strcmp(attr->name, "gc_rl_max") == 0) - return pblk_sysfs_rate_store(pblk, buf, len); - else if (strcmp(attr->name, "gc_force") == 0) + if (strcmp(attr->name, "gc_force") == 0) return pblk_sysfs_gc_force(pblk, buf, len); else if (strcmp(attr->name, "max_sec_per_write") == 0) return pblk_sysfs_set_sec_per_write(pblk, buf, len); diff --git a/drivers/lightnvm/pblk.h b/drivers/lightnvm/pblk.h index 3fe8b05e3de0..596c1914a13a 100644 --- a/drivers/lightnvm/pblk.h +++ b/drivers/lightnvm/pblk.h @@ -72,11 +72,15 @@ enum { PBLK_BLK_ST_CLOSED = 0x2, }; +struct pblk_sec_meta { + u64 reserved; + __le64 lba; +}; + /* The number of GC lists and the rate-limiter states go together. This way the * rate-limiter can dictate how much GC is needed based on resource utilization. */ -#define PBLK_NR_GC_LISTS 3 -#define PBLK_MAX_GC_JOBS 32 +#define PBLK_GC_NR_LISTS 3 enum { PBLK_RL_HIGH = 1, @@ -84,11 +88,6 @@ enum { PBLK_RL_LOW = 3, }; -struct pblk_sec_meta { - u64 reserved; - __le64 lba; -}; - #define pblk_dma_meta_size (sizeof(struct pblk_sec_meta) * PBLK_MAX_REQ_ADDRS) /* write buffer completion context */ @@ -195,29 +194,39 @@ struct pblk_lun { struct pblk_gc_rq { struct pblk_line *line; void *data; - u64 *lba_list; + u64 lba_list[PBLK_MAX_REQ_ADDRS]; int nr_secs; int secs_to_gc; struct list_head list; }; struct pblk_gc { + /* These states are not protected by a lock since (i) they are in the + * fast path, and (ii) they are not critical. + */ int gc_active; int gc_enabled; int gc_forced; - int gc_jobs_active; - atomic_t inflight_gc; struct task_struct *gc_ts; struct task_struct *gc_writer_ts; + struct task_struct *gc_reader_ts; + + struct workqueue_struct *gc_line_reader_wq; struct workqueue_struct *gc_reader_wq; + struct timer_list gc_timer; + struct semaphore gc_sem; + atomic_t inflight_gc; int w_entries; + struct list_head w_list; + struct list_head r_list; spinlock_t lock; spinlock_t w_lock; + spinlock_t r_lock; }; struct pblk_rl { @@ -229,10 +238,8 @@ struct pblk_rl { */ unsigned int high_pw; /* High rounded up as a power of 2 */ -#define PBLK_USER_HIGH_THRS 2 /* Begin write limit at 50 percent - * available blks - */ -#define PBLK_USER_LOW_THRS 20 /* Aggressive GC at 5% available blocks */ +#define PBLK_USER_HIGH_THRS 8 /* Begin write limit at 12% available blks */ +#define PBLK_USER_LOW_THRS 10 /* Aggressive GC at 10% available blocks */ int rb_windows_pw; /* Number of rate windows in the write buffer * given as a power-of-2. This guarantees that @@ -250,7 +257,11 @@ struct pblk_rl { int rb_state; /* Rate-limiter current state */ atomic_t rb_gc_cnt; /* GC I/O buffer counter */ + int rsv_blocks; /* Reserved blocks for GC */ + int rb_user_active; + int rb_gc_active; + struct timer_list u_timer; unsigned long long nr_secs; @@ -428,7 +439,7 @@ struct pblk_line_mgmt { struct list_head bad_list; /* Full lines bad */ /* GC lists - use gc_lock */ - struct list_head *gc_lists[PBLK_NR_GC_LISTS]; + struct list_head *gc_lists[PBLK_GC_NR_LISTS]; struct list_head gc_high_list; /* Full lines ready to GC, high isc */ struct list_head gc_mid_list; /* Full lines ready to GC, mid isc */ struct list_head gc_low_list; /* Full lines ready to GC, low isc */ @@ -768,30 +779,34 @@ int pblk_recov_setup_rq(struct pblk *pblk, struct pblk_c_ctx *c_ctx, /* * pblk gc */ -#define PBLK_GC_TRIES 3 +#define PBLK_GC_MAX_READERS 8 /* Max number of outstanding GC reader jobs */ +#define PBLK_GC_W_QD 1024 /* Queue depth for inflight GC write I/Os */ +#define PBLK_GC_L_QD 4 /* Queue depth for inflight GC lines */ +#define PBLK_GC_RSV_LINE 1 /* Reserved lines for GC */ int pblk_gc_init(struct pblk *pblk); void pblk_gc_exit(struct pblk *pblk); void pblk_gc_should_start(struct pblk *pblk); void pblk_gc_should_stop(struct pblk *pblk); -int pblk_gc_status(struct pblk *pblk); +void pblk_gc_should_kick(struct pblk *pblk); +void pblk_gc_kick(struct pblk *pblk); void pblk_gc_sysfs_state_show(struct pblk *pblk, int *gc_enabled, int *gc_active); -void pblk_gc_sysfs_force(struct pblk *pblk, int force); +int pblk_gc_sysfs_force(struct pblk *pblk, int force); /* * pblk rate limiter */ void pblk_rl_init(struct pblk_rl *rl, int budget); void pblk_rl_free(struct pblk_rl *rl); -int pblk_rl_gc_thrs(struct pblk_rl *rl); +int pblk_rl_high_thrs(struct pblk_rl *rl); +int pblk_rl_low_thrs(struct pblk_rl *rl); unsigned long pblk_rl_nr_free_blks(struct pblk_rl *rl); int pblk_rl_user_may_insert(struct pblk_rl *rl, int nr_entries); void pblk_rl_user_in(struct pblk_rl *rl, int nr_entries); int pblk_rl_gc_may_insert(struct pblk_rl *rl, int nr_entries); void pblk_rl_gc_in(struct pblk_rl *rl, int nr_entries); void pblk_rl_out(struct pblk_rl *rl, int nr_user, int nr_gc); -void pblk_rl_set_gc_rsc(struct pblk_rl *rl, int rsv); int pblk_rl_sysfs_rate_show(struct pblk_rl *rl); void pblk_rl_free_lines_inc(struct pblk_rl *rl, struct pblk_line *line); void pblk_rl_free_lines_dec(struct pblk_rl *rl, struct pblk_line *line); @@ -837,6 +852,17 @@ static inline void *emeta_to_vsc(struct pblk *pblk, struct line_emeta *emeta) return (emeta_to_lbas(pblk, emeta) + pblk->lm.emeta_len[2]); } +static inline int pblk_line_vsc(struct pblk_line *line) +{ + int vsc; + + spin_lock(&line->lock); + vsc = le32_to_cpu(*line->vsc); + spin_unlock(&line->lock); + + return vsc; +} + #define NVM_MEM_PAGE_WRITE (8) static inline int pblk_pad_distance(struct pblk *pblk) |