diff options
author | Linus Torvalds <torvalds@linux-foundation.org> | 2018-04-05 14:27:02 -0700 |
---|---|---|
committer | Linus Torvalds <torvalds@linux-foundation.org> | 2018-04-05 14:27:02 -0700 |
commit | 3526dd0c7832f1011a0477cc6d903662bae05ea8 (patch) | |
tree | 22fbac64eb40a0b29bfa4c029695f39b2f591e62 /drivers | |
parent | dd972f924df6bdbc0ab185a38d5d2361dbc26311 (diff) | |
parent | bc6d65e6dc89c3b7ff78e4ad797117c122ffde8e (diff) | |
download | linux-3526dd0c7832f1011a0477cc6d903662bae05ea8.tar.gz linux-3526dd0c7832f1011a0477cc6d903662bae05ea8.tar.bz2 linux-3526dd0c7832f1011a0477cc6d903662bae05ea8.zip |
Merge tag 'for-4.17/block-20180402' of git://git.kernel.dk/linux-block
Pull block layer updates from Jens Axboe:
"It's a pretty quiet round this time, which is nice. This contains:
- series from Bart, cleaning up the way we set/test/clear atomic
queue flags.
- series from Bart, fixing races between gendisk and queue
registration and removal.
- set of bcache fixes and improvements from various folks, by way of
Michael Lyle.
- set of lightnvm updates from Matias, most of it being the 1.2 to
2.0 transition.
- removal of unused DIO flags from Nikolay.
- blk-mq/sbitmap memory ordering fixes from Omar.
- divide-by-zero fix for BFQ from Paolo.
- minor documentation patches from Randy.
- timeout fix from Tejun.
- Alpha "can't write a char atomically" fix from Mikulas.
- set of NVMe fixes by way of Keith.
- bsg and bsg-lib improvements from Christoph.
- a few sed-opal fixes from Jonas.
- cdrom check-disk-change deadlock fix from Maurizio.
- various little fixes, comment fixes, etc from various folks"
* tag 'for-4.17/block-20180402' of git://git.kernel.dk/linux-block: (139 commits)
blk-mq: Directly schedule q->timeout_work when aborting a request
blktrace: fix comment in blktrace_api.h
lightnvm: remove function name in strings
lightnvm: pblk: remove some unnecessary NULL checks
lightnvm: pblk: don't recover unwritten lines
lightnvm: pblk: implement 2.0 support
lightnvm: pblk: implement get log report chunk
lightnvm: pblk: rename ppaf* to addrf*
lightnvm: pblk: check for supported version
lightnvm: implement get log report chunk helpers
lightnvm: make address conversions depend on generic device
lightnvm: add support for 2.0 address format
lightnvm: normalize geometry nomenclature
lightnvm: complete geo structure with maxoc*
lightnvm: add shorten OCSSD version in geo
lightnvm: add minor version to generic geometry
lightnvm: simplify geometry structure
lightnvm: pblk: refactor init/exit sequences
lightnvm: Avoid validation of default op value
lightnvm: centralize permission check for lightnvm ioctl
...
Diffstat (limited to 'drivers')
106 files changed, 3072 insertions, 1369 deletions
diff --git a/drivers/block/brd.c b/drivers/block/brd.c index deea78e485da..66cb0f857f64 100644 --- a/drivers/block/brd.c +++ b/drivers/block/brd.c @@ -24,7 +24,6 @@ #include <linux/uaccess.h> -#define SECTOR_SHIFT 9 #define PAGE_SECTORS_SHIFT (PAGE_SHIFT - SECTOR_SHIFT) #define PAGE_SECTORS (1 << PAGE_SECTORS_SHIFT) diff --git a/drivers/block/drbd/drbd_main.c b/drivers/block/drbd/drbd_main.c index 0a0394aa1b9c..185f1ef00a7c 100644 --- a/drivers/block/drbd/drbd_main.c +++ b/drivers/block/drbd/drbd_main.c @@ -2816,7 +2816,7 @@ enum drbd_ret_code drbd_create_device(struct drbd_config_context *adm_ctx, unsig drbd_init_set_defaults(device); - q = blk_alloc_queue(GFP_KERNEL); + q = blk_alloc_queue_node(GFP_KERNEL, NUMA_NO_NODE, &resource->req_lock); if (!q) goto out_no_q; device->rq_queue = q; @@ -2848,7 +2848,6 @@ enum drbd_ret_code drbd_create_device(struct drbd_config_context *adm_ctx, unsig /* Setting the max_hw_sectors to an odd value of 8kibyte here This triggers a max_bio_size message upon first attach or connect */ blk_queue_max_hw_sectors(q, DRBD_MAX_BIO_SIZE_SAFE >> 8); - q->queue_lock = &resource->req_lock; device->md_io.page = alloc_page(GFP_KERNEL); if (!device->md_io.page) diff --git a/drivers/block/drbd/drbd_nl.c b/drivers/block/drbd/drbd_nl.c index a12f77e6891e..b4f02768ba47 100644 --- a/drivers/block/drbd/drbd_nl.c +++ b/drivers/block/drbd/drbd_nl.c @@ -1212,10 +1212,10 @@ static void decide_on_discard_support(struct drbd_device *device, * topology on all peers. */ blk_queue_discard_granularity(q, 512); q->limits.max_discard_sectors = drbd_max_discard_sectors(connection); - queue_flag_set_unlocked(QUEUE_FLAG_DISCARD, q); + blk_queue_flag_set(QUEUE_FLAG_DISCARD, q); q->limits.max_write_zeroes_sectors = drbd_max_discard_sectors(connection); } else { - queue_flag_clear_unlocked(QUEUE_FLAG_DISCARD, q); + blk_queue_flag_clear(QUEUE_FLAG_DISCARD, q); blk_queue_discard_granularity(q, 0); q->limits.max_discard_sectors = 0; q->limits.max_write_zeroes_sectors = 0; diff --git a/drivers/block/loop.c b/drivers/block/loop.c index ee62d2d517bf..264abaaff662 100644 --- a/drivers/block/loop.c +++ b/drivers/block/loop.c @@ -214,10 +214,10 @@ static void __loop_update_dio(struct loop_device *lo, bool dio) blk_mq_freeze_queue(lo->lo_queue); lo->use_dio = use_dio; if (use_dio) { - queue_flag_clear_unlocked(QUEUE_FLAG_NOMERGES, lo->lo_queue); + blk_queue_flag_clear(QUEUE_FLAG_NOMERGES, lo->lo_queue); lo->lo_flags |= LO_FLAGS_DIRECT_IO; } else { - queue_flag_set_unlocked(QUEUE_FLAG_NOMERGES, lo->lo_queue); + blk_queue_flag_set(QUEUE_FLAG_NOMERGES, lo->lo_queue); lo->lo_flags &= ~LO_FLAGS_DIRECT_IO; } blk_mq_unfreeze_queue(lo->lo_queue); @@ -817,7 +817,7 @@ static void loop_config_discard(struct loop_device *lo) q->limits.discard_alignment = 0; blk_queue_max_discard_sectors(q, 0); blk_queue_max_write_zeroes_sectors(q, 0); - queue_flag_clear_unlocked(QUEUE_FLAG_DISCARD, q); + blk_queue_flag_clear(QUEUE_FLAG_DISCARD, q); return; } @@ -826,7 +826,7 @@ static void loop_config_discard(struct loop_device *lo) blk_queue_max_discard_sectors(q, UINT_MAX >> 9); blk_queue_max_write_zeroes_sectors(q, UINT_MAX >> 9); - queue_flag_set_unlocked(QUEUE_FLAG_DISCARD, q); + blk_queue_flag_set(QUEUE_FLAG_DISCARD, q); } static void loop_unprepare_queue(struct loop_device *lo) @@ -1167,21 +1167,17 @@ loop_set_status(struct loop_device *lo, const struct loop_info64 *info) static int loop_get_status(struct loop_device *lo, struct loop_info64 *info) { - struct file *file = lo->lo_backing_file; + struct file *file; struct kstat stat; - int error; + int ret; - if (lo->lo_state != Lo_bound) + if (lo->lo_state != Lo_bound) { + mutex_unlock(&lo->lo_ctl_mutex); return -ENXIO; - error = vfs_getattr(&file->f_path, &stat, - STATX_INO, AT_STATX_SYNC_AS_STAT); - if (error) - return error; + } + memset(info, 0, sizeof(*info)); info->lo_number = lo->lo_number; - info->lo_device = huge_encode_dev(stat.dev); - info->lo_inode = stat.ino; - info->lo_rdevice = huge_encode_dev(lo->lo_device ? stat.rdev : stat.dev); info->lo_offset = lo->lo_offset; info->lo_sizelimit = lo->lo_sizelimit; info->lo_flags = lo->lo_flags; @@ -1194,7 +1190,19 @@ loop_get_status(struct loop_device *lo, struct loop_info64 *info) memcpy(info->lo_encrypt_key, lo->lo_encrypt_key, lo->lo_encrypt_key_size); } - return 0; + + /* Drop lo_ctl_mutex while we call into the filesystem. */ + file = get_file(lo->lo_backing_file); + mutex_unlock(&lo->lo_ctl_mutex); + ret = vfs_getattr(&file->f_path, &stat, STATX_INO, + AT_STATX_SYNC_AS_STAT); + if (!ret) { + info->lo_device = huge_encode_dev(stat.dev); + info->lo_inode = stat.ino; + info->lo_rdevice = huge_encode_dev(stat.rdev); + } + fput(file); + return ret; } static void @@ -1352,7 +1360,10 @@ static int lo_ioctl(struct block_device *bdev, fmode_t mode, struct loop_device *lo = bdev->bd_disk->private_data; int err; - mutex_lock_nested(&lo->lo_ctl_mutex, 1); + err = mutex_lock_killable_nested(&lo->lo_ctl_mutex, 1); + if (err) + goto out_unlocked; + switch (cmd) { case LOOP_SET_FD: err = loop_set_fd(lo, mode, bdev, arg); @@ -1374,7 +1385,8 @@ static int lo_ioctl(struct block_device *bdev, fmode_t mode, break; case LOOP_GET_STATUS: err = loop_get_status_old(lo, (struct loop_info __user *) arg); - break; + /* loop_get_status() unlocks lo_ctl_mutex */ + goto out_unlocked; case LOOP_SET_STATUS64: err = -EPERM; if ((mode & FMODE_WRITE) || capable(CAP_SYS_ADMIN)) @@ -1383,7 +1395,8 @@ static int lo_ioctl(struct block_device *bdev, fmode_t mode, break; case LOOP_GET_STATUS64: err = loop_get_status64(lo, (struct loop_info64 __user *) arg); - break; + /* loop_get_status() unlocks lo_ctl_mutex */ + goto out_unlocked; case LOOP_SET_CAPACITY: err = -EPERM; if ((mode & FMODE_WRITE) || capable(CAP_SYS_ADMIN)) @@ -1535,16 +1548,20 @@ static int lo_compat_ioctl(struct block_device *bdev, fmode_t mode, switch(cmd) { case LOOP_SET_STATUS: - mutex_lock(&lo->lo_ctl_mutex); - err = loop_set_status_compat( - lo, (const struct compat_loop_info __user *) arg); - mutex_unlock(&lo->lo_ctl_mutex); + err = mutex_lock_killable(&lo->lo_ctl_mutex); + if (!err) { + err = loop_set_status_compat(lo, + (const struct compat_loop_info __user *)arg); + mutex_unlock(&lo->lo_ctl_mutex); + } break; case LOOP_GET_STATUS: - mutex_lock(&lo->lo_ctl_mutex); - err = loop_get_status_compat( - lo, (struct compat_loop_info __user *) arg); - mutex_unlock(&lo->lo_ctl_mutex); + err = mutex_lock_killable(&lo->lo_ctl_mutex); + if (!err) { + err = loop_get_status_compat(lo, + (struct compat_loop_info __user *)arg); + /* loop_get_status() unlocks lo_ctl_mutex */ + } break; case LOOP_SET_CAPACITY: case LOOP_CLR_FD: @@ -1808,7 +1825,7 @@ static int loop_add(struct loop_device **l, int i) * page. For directio mode, merge does help to dispatch bigger request * to underlayer disk. We will enable merge once directio is enabled. */ - queue_flag_set_unlocked(QUEUE_FLAG_NOMERGES, lo->lo_queue); + blk_queue_flag_set(QUEUE_FLAG_NOMERGES, lo->lo_queue); err = -ENOMEM; disk = lo->lo_disk = alloc_disk(1 << part_shift); @@ -1864,8 +1881,8 @@ out: static void loop_remove(struct loop_device *lo) { - blk_cleanup_queue(lo->lo_queue); del_gendisk(lo->lo_disk); + blk_cleanup_queue(lo->lo_queue); blk_mq_free_tag_set(&lo->tag_set); put_disk(lo->lo_disk); kfree(lo); @@ -1949,7 +1966,9 @@ static long loop_control_ioctl(struct file *file, unsigned int cmd, ret = loop_lookup(&lo, parm); if (ret < 0) break; - mutex_lock(&lo->lo_ctl_mutex); + ret = mutex_lock_killable(&lo->lo_ctl_mutex); + if (ret) + break; if (lo->lo_state != Lo_unbound) { ret = -EBUSY; mutex_unlock(&lo->lo_ctl_mutex); diff --git a/drivers/block/mtip32xx/mtip32xx.c b/drivers/block/mtip32xx/mtip32xx.c index b8af7352a18f..769c551e3d71 100644 --- a/drivers/block/mtip32xx/mtip32xx.c +++ b/drivers/block/mtip32xx/mtip32xx.c @@ -159,7 +159,7 @@ static bool mtip_check_surprise_removal(struct pci_dev *pdev) if (vendor_id == 0xFFFF) { dd->sr = true; if (dd->queue) - set_bit(QUEUE_FLAG_DEAD, &dd->queue->queue_flags); + blk_queue_flag_set(QUEUE_FLAG_DEAD, dd->queue); else dev_warn(&dd->pdev->dev, "%s: dd->queue is NULL\n", __func__); @@ -3855,8 +3855,8 @@ skip_create_disk: goto start_service_thread; /* Set device limits. */ - set_bit(QUEUE_FLAG_NONROT, &dd->queue->queue_flags); - clear_bit(QUEUE_FLAG_ADD_RANDOM, &dd->queue->queue_flags); + blk_queue_flag_set(QUEUE_FLAG_NONROT, dd->queue); + blk_queue_flag_clear(QUEUE_FLAG_ADD_RANDOM, dd->queue); blk_queue_max_segments(dd->queue, MTIP_MAX_SG); blk_queue_physical_block_size(dd->queue, 4096); blk_queue_max_hw_sectors(dd->queue, 0xffff); @@ -3866,7 +3866,7 @@ skip_create_disk: /* Signal trim support */ if (dd->trim_supp == true) { - set_bit(QUEUE_FLAG_DISCARD, &dd->queue->queue_flags); + blk_queue_flag_set(QUEUE_FLAG_DISCARD, dd->queue); dd->queue->limits.discard_granularity = 4096; blk_queue_max_discard_sectors(dd->queue, MTIP_MAX_TRIM_ENTRY_LEN * MTIP_MAX_TRIM_ENTRIES); diff --git a/drivers/block/nbd.c b/drivers/block/nbd.c index 86258b00a1d4..afbc202ca6fd 100644 --- a/drivers/block/nbd.c +++ b/drivers/block/nbd.c @@ -964,7 +964,7 @@ static void nbd_parse_flags(struct nbd_device *nbd) else set_disk_ro(nbd->disk, false); if (config->flags & NBD_FLAG_SEND_TRIM) - queue_flag_set_unlocked(QUEUE_FLAG_DISCARD, nbd->disk->queue); + blk_queue_flag_set(QUEUE_FLAG_DISCARD, nbd->disk->queue); if (config->flags & NBD_FLAG_SEND_FLUSH) { if (config->flags & NBD_FLAG_SEND_FUA) blk_queue_write_cache(nbd->disk->queue, true, true); @@ -1040,7 +1040,7 @@ static void nbd_config_put(struct nbd_device *nbd) nbd->config = NULL; nbd->tag_set.timeout = 0; - queue_flag_clear_unlocked(QUEUE_FLAG_DISCARD, nbd->disk->queue); + blk_queue_flag_clear(QUEUE_FLAG_DISCARD, nbd->disk->queue); mutex_unlock(&nbd->config_lock); nbd_put(nbd); @@ -1488,8 +1488,8 @@ static int nbd_dev_add(int index) /* * Tell the block layer that we are not a rotational device */ - queue_flag_set_unlocked(QUEUE_FLAG_NONROT, disk->queue); - queue_flag_clear_unlocked(QUEUE_FLAG_ADD_RANDOM, disk->queue); + blk_queue_flag_set(QUEUE_FLAG_NONROT, disk->queue); + blk_queue_flag_clear(QUEUE_FLAG_ADD_RANDOM, disk->queue); disk->queue->limits.discard_granularity = 512; blk_queue_max_discard_sectors(disk->queue, UINT_MAX); blk_queue_max_segment_size(disk->queue, UINT_MAX); diff --git a/drivers/block/null_blk.c b/drivers/block/null_blk.c index 287a09611c0f..a76553293a31 100644 --- a/drivers/block/null_blk.c +++ b/drivers/block/null_blk.c @@ -16,10 +16,8 @@ #include <linux/badblocks.h> #include <linux/fault-inject.h> -#define SECTOR_SHIFT 9 #define PAGE_SECTORS_SHIFT (PAGE_SHIFT - SECTOR_SHIFT) #define PAGE_SECTORS (1 << PAGE_SECTORS_SHIFT) -#define SECTOR_SIZE (1 << SECTOR_SHIFT) #define SECTOR_MASK (PAGE_SECTORS - 1) #define FREE_BATCH 16 @@ -29,6 +27,7 @@ #ifdef CONFIG_BLK_DEV_NULL_BLK_FAULT_INJECTION static DECLARE_FAULT_ATTR(null_timeout_attr); +static DECLARE_FAULT_ATTR(null_requeue_attr); #endif static inline u64 mb_per_tick(int mbps) @@ -53,6 +52,7 @@ struct nullb_queue { wait_queue_head_t wait; unsigned int queue_depth; struct nullb_device *dev; + unsigned int requeue_selection; struct nullb_cmd *cmds; }; @@ -72,6 +72,7 @@ enum nullb_device_flags { NULLB_DEV_FL_CACHE = 3, }; +#define MAP_SZ ((PAGE_SIZE >> SECTOR_SHIFT) + 2) /* * nullb_page is a page in memory for nullb devices. * @@ -86,10 +87,10 @@ enum nullb_device_flags { */ struct nullb_page { struct page *page; - unsigned long bitmap; + DECLARE_BITMAP(bitmap, MAP_SZ); }; -#define NULLB_PAGE_LOCK (sizeof(unsigned long) * 8 - 1) -#define NULLB_PAGE_FREE (sizeof(unsigned long) * 8 - 2) +#define NULLB_PAGE_LOCK (MAP_SZ - 1) +#define NULLB_PAGE_FREE (MAP_SZ - 2) struct nullb_device { struct nullb *nullb; @@ -170,6 +171,9 @@ MODULE_PARM_DESC(home_node, "Home node for the device"); #ifdef CONFIG_BLK_DEV_NULL_BLK_FAULT_INJECTION static char g_timeout_str[80]; module_param_string(timeout, g_timeout_str, sizeof(g_timeout_str), S_IRUGO); + +static char g_requeue_str[80]; +module_param_string(requeue, g_requeue_str, sizeof(g_requeue_str), S_IRUGO); #endif static int g_queue_mode = NULL_Q_MQ; @@ -728,7 +732,7 @@ static struct nullb_page *null_alloc_page(gfp_t gfp_flags) if (!t_page->page) goto out_freepage; - t_page->bitmap = 0; + memset(t_page->bitmap, 0, sizeof(t_page->bitmap)); return t_page; out_freepage: kfree(t_page); @@ -738,13 +742,20 @@ out: static void null_free_page(struct nullb_page *t_page) { - __set_bit(NULLB_PAGE_FREE, &t_page->bitmap); - if (test_bit(NULLB_PAGE_LOCK, &t_page->bitmap)) + __set_bit(NULLB_PAGE_FREE, t_page->bitmap); + if (test_bit(NULLB_PAGE_LOCK, t_page->bitmap)) return; __free_page(t_page->page); kfree(t_page); } +static bool null_page_empty(struct nullb_page *page) +{ + int size = MAP_SZ - 2; + + return find_first_bit(page->bitmap, size) == size; +} + static void null_free_sector(struct nullb *nullb, sector_t sector, bool is_cache) { @@ -759,9 +770,9 @@ static void null_free_sector(struct nullb *nullb, sector_t sector, t_page = radix_tree_lookup(root, idx); if (t_page) { - __clear_bit(sector_bit, &t_page->bitmap); + __clear_bit(sector_bit, t_page->bitmap); - if (!t_page->bitmap) { + if (null_page_empty(t_page)) { ret = radix_tree_delete_item(root, idx, t_page); WARN_ON(ret != t_page); null_free_page(ret); @@ -832,7 +843,7 @@ static struct nullb_page *__null_lookup_page(struct nullb *nullb, t_page = radix_tree_lookup(root, idx); WARN_ON(t_page && t_page->page->index != idx); - if (t_page && (for_write || test_bit(sector_bit, &t_page->bitmap))) + if (t_page && (for_write || test_bit(sector_bit, t_page->bitmap))) return t_page; return NULL; @@ -895,10 +906,10 @@ static int null_flush_cache_page(struct nullb *nullb, struct nullb_page *c_page) t_page = null_insert_page(nullb, idx << PAGE_SECTORS_SHIFT, true); - __clear_bit(NULLB_PAGE_LOCK, &c_page->bitmap); - if (test_bit(NULLB_PAGE_FREE, &c_page->bitmap)) { + __clear_bit(NULLB_PAGE_LOCK, c_page->bitmap); + if (test_bit(NULLB_PAGE_FREE, c_page->bitmap)) { null_free_page(c_page); - if (t_page && t_page->bitmap == 0) { + if (t_page && null_page_empty(t_page)) { ret = radix_tree_delete_item(&nullb->dev->data, idx, t_page); null_free_page(t_page); @@ -914,11 +925,11 @@ static int null_flush_cache_page(struct nullb *nullb, struct nullb_page *c_page) for (i = 0; i < PAGE_SECTORS; i += (nullb->dev->blocksize >> SECTOR_SHIFT)) { - if (test_bit(i, &c_page->bitmap)) { + if (test_bit(i, c_page->bitmap)) { offset = (i << SECTOR_SHIFT); memcpy(dst + offset, src + offset, nullb->dev->blocksize); - __set_bit(i, &t_page->bitmap); + __set_bit(i, t_page->bitmap); } } @@ -955,10 +966,10 @@ again: * We found the page which is being flushed to disk by other * threads */ - if (test_bit(NULLB_PAGE_LOCK, &c_pages[i]->bitmap)) + if (test_bit(NULLB_PAGE_LOCK, c_pages[i]->bitmap)) c_pages[i] = NULL; else - __set_bit(NULLB_PAGE_LOCK, &c_pages[i]->bitmap); + __set_bit(NULLB_PAGE_LOCK, c_pages[i]->bitmap); } one_round = 0; @@ -1011,7 +1022,7 @@ static int copy_to_nullb(struct nullb *nullb, struct page *source, kunmap_atomic(dst); kunmap_atomic(src); - __set_bit(sector & SECTOR_MASK, &t_page->bitmap); + __set_bit(sector & SECTOR_MASK, t_page->bitmap); if (is_fua) null_free_sector(nullb, sector, true); @@ -1380,7 +1391,15 @@ static bool should_timeout_request(struct request *rq) if (g_timeout_str[0]) return should_fail(&null_timeout_attr, 1); #endif + return false; +} +static bool should_requeue_request(struct request *rq) +{ +#ifdef CONFIG_BLK_DEV_NULL_BLK_FAULT_INJECTION + if (g_requeue_str[0]) + return should_fail(&null_requeue_attr, 1); +#endif return false; } @@ -1391,11 +1410,17 @@ static void null_request_fn(struct request_queue *q) while ((rq = blk_fetch_request(q)) != NULL) { struct nullb_cmd *cmd = rq->special; - if (!should_timeout_request(rq)) { - spin_unlock_irq(q->queue_lock); - null_handle_cmd(cmd); - spin_lock_irq(q->queue_lock); + /* just ignore the request */ + if (should_timeout_request(rq)) + continue; + if (should_requeue_request(rq)) { + blk_requeue_request(q, rq); + continue; } + + spin_unlock_irq(q->queue_lock); + null_handle_cmd(cmd); + spin_lock_irq(q->queue_lock); } } @@ -1422,10 +1447,23 @@ static blk_status_t null_queue_rq(struct blk_mq_hw_ctx *hctx, blk_mq_start_request(bd->rq); - if (!should_timeout_request(bd->rq)) - return null_handle_cmd(cmd); + if (should_requeue_request(bd->rq)) { + /* + * Alternate between hitting the core BUSY path, and the + * driver driven requeue path + */ + nq->requeue_selection++; + if (nq->requeue_selection & 1) + return BLK_STS_RESOURCE; + else { + blk_mq_requeue_request(bd->rq, true); + return BLK_STS_OK; + } + } + if (should_timeout_request(bd->rq)) + return BLK_STS_OK; - return BLK_STS_OK; + return null_handle_cmd(cmd); } static const struct blk_mq_ops null_mq_ops = { @@ -1485,7 +1523,7 @@ static void null_config_discard(struct nullb *nullb) nullb->q->limits.discard_granularity = nullb->dev->blocksize; nullb->q->limits.discard_alignment = nullb->dev->blocksize; blk_queue_max_discard_sectors(nullb->q, UINT_MAX >> 9); - queue_flag_set_unlocked(QUEUE_FLAG_DISCARD, nullb->q); + blk_queue_flag_set(QUEUE_FLAG_DISCARD, nullb->q); } static int null_open(struct block_device *bdev, fmode_t mode) @@ -1659,16 +1697,27 @@ static void null_validate_conf(struct nullb_device *dev) dev->mbps = 0; } -static bool null_setup_fault(void) -{ #ifdef CONFIG_BLK_DEV_NULL_BLK_FAULT_INJECTION - if (!g_timeout_str[0]) +static bool __null_setup_fault(struct fault_attr *attr, char *str) +{ + if (!str[0]) return true; - if (!setup_fault_attr(&null_timeout_attr, g_timeout_str)) + if (!setup_fault_attr(attr, str)) return false; - null_timeout_attr.verbose = 0; + attr->verbose = 0; + return true; +} +#endif + +static bool null_setup_fault(void) +{ +#ifdef CONFIG_BLK_DEV_NULL_BLK_FAULT_INJECTION + if (!__null_setup_fault(&null_timeout_attr, g_timeout_str)) + return false; + if (!__null_setup_fault(&null_requeue_attr, g_requeue_str)) + return false; #endif return true; } @@ -1717,7 +1766,8 @@ static int null_add_dev(struct nullb_device *dev) } null_init_queues(nullb); } else if (dev->queue_mode == NULL_Q_BIO) { - nullb->q = blk_alloc_queue_node(GFP_KERNEL, dev->home_node); + nullb->q = blk_alloc_queue_node(GFP_KERNEL, dev->home_node, + NULL); if (!nullb->q) { rv = -ENOMEM; goto out_cleanup_queues; @@ -1758,8 +1808,8 @@ static int null_add_dev(struct nullb_device *dev) } nullb->q->queuedata = nullb; - queue_flag_set_unlocked(QUEUE_FLAG_NONROT, nullb->q); - queue_flag_clear_unlocked(QUEUE_FLAG_ADD_RANDOM, nullb->q); + blk_queue_flag_set(QUEUE_FLAG_NONROT, nullb->q); + blk_queue_flag_clear(QUEUE_FLAG_ADD_RANDOM, nullb->q); mutex_lock(&lock); nullb->index = ida_simple_get(&nullb_indexes, 0, 0, GFP_KERNEL); @@ -1802,10 +1852,6 @@ static int __init null_init(void) struct nullb *nullb; struct nullb_device *dev; - /* check for nullb_page.bitmap */ - if (sizeof(unsigned long) * 8 - 2 < (PAGE_SIZE >> SECTOR_SHIFT)) - return -EINVAL; - if (g_bs > PAGE_SIZE) { pr_warn("null_blk: invalid block size\n"); pr_warn("null_blk: defaults block size to %lu\n", PAGE_SIZE); diff --git a/drivers/block/paride/pcd.c b/drivers/block/paride/pcd.c index 7b8c6368beb7..a026211afb51 100644 --- a/drivers/block/paride/pcd.c +++ b/drivers/block/paride/pcd.c @@ -230,6 +230,8 @@ static int pcd_block_open(struct block_device *bdev, fmode_t mode) struct pcd_unit *cd = bdev->bd_disk->private_data; int ret; + check_disk_change(bdev); + mutex_lock(&pcd_mutex); ret = cdrom_open(&cd->info, bdev, mode); mutex_unlock(&pcd_mutex); diff --git a/drivers/block/rbd.c b/drivers/block/rbd.c index 8e40da093766..1e03b04819c8 100644 --- a/drivers/block/rbd.c +++ b/drivers/block/rbd.c @@ -51,15 +51,6 @@ #define RBD_DEBUG /* Activate rbd_assert() calls */ /* - * The basic unit of block I/O is a sector. It is interpreted in a - * number of contexts in Linux (blk, bio, genhd), but the default is - * universally 512 bytes. These symbols are just slightly more - * meaningful than the bare numbers they represent. - */ -#define SECTOR_SHIFT 9 -#define SECTOR_SIZE (1ULL << SECTOR_SHIFT) - -/* * Increment the given counter and return its updated value. * If the counter is already 0 it will not be incremented. * If the counter is already at its maximum value returns @@ -4370,7 +4361,7 @@ static int rbd_init_disk(struct rbd_device *rbd_dev) goto out_tag_set; } - queue_flag_set_unlocked(QUEUE_FLAG_NONROT, q); + blk_queue_flag_set(QUEUE_FLAG_NONROT, q); /* QUEUE_FLAG_ADD_RANDOM is off by default for blk-mq */ /* set io sizes to object size */ @@ -4383,7 +4374,7 @@ static int rbd_init_disk(struct rbd_device *rbd_dev) blk_queue_io_opt(q, segment_size); /* enable the discard support */ - queue_flag_set_unlocked(QUEUE_FLAG_DISCARD, q); + blk_queue_flag_set(QUEUE_FLAG_DISCARD, q); q->limits.discard_granularity = segment_size; blk_queue_max_discard_sectors(q, segment_size / SECTOR_SIZE); blk_queue_max_write_zeroes_sectors(q, segment_size / SECTOR_SIZE); diff --git a/drivers/block/rsxx/dev.c b/drivers/block/rsxx/dev.c index e397d3ee7308..dddb3f2490b6 100644 --- a/drivers/block/rsxx/dev.c +++ b/drivers/block/rsxx/dev.c @@ -287,10 +287,10 @@ int rsxx_setup_dev(struct rsxx_cardinfo *card) blk_queue_max_hw_sectors(card->queue, blkdev_max_hw_sectors); blk_queue_physical_block_size(card->queue, RSXX_HW_BLK_SIZE); - queue_flag_set_unlocked(QUEUE_FLAG_NONROT, card->queue); - queue_flag_clear_unlocked(QUEUE_FLAG_ADD_RANDOM, card->queue); + blk_queue_flag_set(QUEUE_FLAG_NONROT, card->queue); + blk_queue_flag_clear(QUEUE_FLAG_ADD_RANDOM, card->queue); if (rsxx_discard_supported(card)) { - queue_flag_set_unlocked(QUEUE_FLAG_DISCARD, card->queue); + blk_queue_flag_set(QUEUE_FLAG_DISCARD, card->queue); blk_queue_max_discard_sectors(card->queue, RSXX_HW_BLK_SIZE >> 9); card->queue->limits.discard_granularity = RSXX_HW_BLK_SIZE; diff --git a/drivers/block/skd_main.c b/drivers/block/skd_main.c index e41935ab41ef..bc7aea6d7b7c 100644 --- a/drivers/block/skd_main.c +++ b/drivers/block/skd_main.c @@ -2858,8 +2858,8 @@ static int skd_cons_disk(struct skd_device *skdev) /* set optimal I/O size to 8KB */ blk_queue_io_opt(q, 8192); - queue_flag_set_unlocked(QUEUE_FLAG_NONROT, q); - queue_flag_clear_unlocked(QUEUE_FLAG_ADD_RANDOM, q); + blk_queue_flag_set(QUEUE_FLAG_NONROT, q); + blk_queue_flag_clear(QUEUE_FLAG_ADD_RANDOM, q); blk_queue_rq_timeout(q, 8 * HZ); diff --git a/drivers/block/umem.c b/drivers/block/umem.c index 8077123678ad..5c7fb8cc4149 100644 --- a/drivers/block/umem.c +++ b/drivers/block/umem.c @@ -888,13 +888,14 @@ static int mm_pci_probe(struct pci_dev *dev, const struct pci_device_id *id) card->Active = -1; /* no page is active */ card->bio = NULL; card->biotail = &card->bio; + spin_lock_init(&card->lock); - card->queue = blk_alloc_queue(GFP_KERNEL); + card->queue = blk_alloc_queue_node(GFP_KERNEL, NUMA_NO_NODE, + &card->lock); if (!card->queue) goto failed_alloc; blk_queue_make_request(card->queue, mm_make_request); - card->queue->queue_lock = &card->lock; card->queue->queuedata = card; tasklet_init(&card->tasklet, process_page, (unsigned long)card); @@ -968,8 +969,6 @@ static int mm_pci_probe(struct pci_dev *dev, const struct pci_device_id *id) dev_printk(KERN_INFO, &card->dev->dev, "Window size %d bytes, IRQ %d\n", data, dev->irq); - spin_lock_init(&card->lock); - pci_set_drvdata(dev, card); if (pci_write_cmd != 0x0F) /* If not Memory Write & Invalidate */ diff --git a/drivers/block/xen-blkfront.c b/drivers/block/xen-blkfront.c index 92ec1bbece51..2a8e7813bd1a 100644 --- a/drivers/block/xen-blkfront.c +++ b/drivers/block/xen-blkfront.c @@ -932,15 +932,15 @@ static void blkif_set_queue_limits(struct blkfront_info *info) unsigned int segments = info->max_indirect_segments ? : BLKIF_MAX_SEGMENTS_PER_REQUEST; - queue_flag_set_unlocked(QUEUE_FLAG_VIRT, rq); + blk_queue_flag_set(QUEUE_FLAG_VIRT, rq); if (info->feature_discard) { - queue_flag_set_unlocked(QUEUE_FLAG_DISCARD, rq); + blk_queue_flag_set(QUEUE_FLAG_DISCARD, rq); blk_queue_max_discard_sectors(rq, get_capacity(gd)); rq->limits.discard_granularity = info->discard_granularity; rq->limits.discard_alignment = info->discard_alignment; if (info->feature_secdiscard) - queue_flag_set_unlocked(QUEUE_FLAG_SECERASE, rq); + blk_queue_flag_set(QUEUE_FLAG_SECERASE, rq); } /* Hard sector size and max sectors impersonate the equiv. hardware. */ @@ -1611,8 +1611,8 @@ static irqreturn_t blkif_interrupt(int irq, void *dev_id) blkif_req(req)->error = BLK_STS_NOTSUPP; info->feature_discard = 0; info->feature_secdiscard = 0; - queue_flag_clear(QUEUE_FLAG_DISCARD, rq); - queue_flag_clear(QUEUE_FLAG_SECERASE, rq); + blk_queue_flag_clear(QUEUE_FLAG_DISCARD, rq); + blk_queue_flag_clear(QUEUE_FLAG_SECERASE, rq); } break; case BLKIF_OP_FLUSH_DISKCACHE: diff --git a/drivers/block/zram/zram_drv.c b/drivers/block/zram/zram_drv.c index 0afa6c8c3857..71b449613cfa 100644 --- a/drivers/block/zram/zram_drv.c +++ b/drivers/block/zram/zram_drv.c @@ -1530,8 +1530,8 @@ static int zram_add(void) /* Actual capacity set using syfs (/sys/block/zram<id>/disksize */ set_capacity(zram->disk, 0); /* zram devices sort of resembles non-rotational disks */ - queue_flag_set_unlocked(QUEUE_FLAG_NONROT, zram->disk->queue); - queue_flag_clear_unlocked(QUEUE_FLAG_ADD_RANDOM, zram->disk->queue); + blk_queue_flag_set(QUEUE_FLAG_NONROT, zram->disk->queue); + blk_queue_flag_clear(QUEUE_FLAG_ADD_RANDOM, zram->disk->queue); /* * To ensure that we always get PAGE_SIZE aligned @@ -1544,7 +1544,7 @@ static int zram_add(void) blk_queue_io_opt(zram->disk->queue, PAGE_SIZE); zram->disk->queue->limits.discard_granularity = PAGE_SIZE; blk_queue_max_discard_sectors(zram->disk->queue, UINT_MAX); - queue_flag_set_unlocked(QUEUE_FLAG_DISCARD, zram->disk->queue); + blk_queue_flag_set(QUEUE_FLAG_DISCARD, zram->disk->queue); /* * zram_bio_discard() will clear all logical blocks if logical block @@ -1620,8 +1620,8 @@ static int zram_remove(struct zram *zram) pr_info("Removed device: %s\n", zram->disk->disk_name); - blk_cleanup_queue(zram->disk->queue); del_gendisk(zram->disk); + blk_cleanup_queue(zram->disk->queue); put_disk(zram->disk); kfree(zram); return 0; diff --git a/drivers/block/zram/zram_drv.h b/drivers/block/zram/zram_drv.h index 31762db861e3..1e9bf65c0bfb 100644 --- a/drivers/block/zram/zram_drv.h +++ b/drivers/block/zram/zram_drv.h @@ -37,7 +37,6 @@ static const size_t max_zpage_size = PAGE_SIZE / 4 * 3; /*-- End of configurable params */ -#define SECTOR_SHIFT 9 #define SECTORS_PER_PAGE_SHIFT (PAGE_SHIFT - SECTOR_SHIFT) #define SECTORS_PER_PAGE (1 << SECTORS_PER_PAGE_SHIFT) #define ZRAM_LOGICAL_BLOCK_SHIFT 12 diff --git a/drivers/cdrom/cdrom.c b/drivers/cdrom/cdrom.c index e36d160c458f..8327478effd0 100644 --- a/drivers/cdrom/cdrom.c +++ b/drivers/cdrom/cdrom.c @@ -1152,9 +1152,6 @@ int cdrom_open(struct cdrom_device_info *cdi, struct block_device *bdev, cd_dbg(CD_OPEN, "entering cdrom_open\n"); - /* open is event synchronization point, check events first */ - check_disk_change(bdev); - /* if this was a O_NONBLOCK open and we should honor the flags, * do a quick open without drive/disc integrity checks. */ cdi->use_count++; diff --git a/drivers/cdrom/gdrom.c b/drivers/cdrom/gdrom.c index 6495b03f576c..ae3a7537cf0f 100644 --- a/drivers/cdrom/gdrom.c +++ b/drivers/cdrom/gdrom.c @@ -497,6 +497,9 @@ static const struct cdrom_device_ops gdrom_ops = { static int gdrom_bdops_open(struct block_device *bdev, fmode_t mode) { int ret; + + check_disk_change(bdev); + mutex_lock(&gdrom_mutex); ret = cdrom_open(gd.cd_info, bdev, mode); mutex_unlock(&gdrom_mutex); diff --git a/drivers/ide/ide-cd.c b/drivers/ide/ide-cd.c index 7c3ed7c9af77..5a8e8e3c22cd 100644 --- a/drivers/ide/ide-cd.c +++ b/drivers/ide/ide-cd.c @@ -712,7 +712,7 @@ static ide_startstop_t cdrom_start_rw(ide_drive_t *drive, struct request *rq) struct request_queue *q = drive->queue; int write = rq_data_dir(rq) == WRITE; unsigned short sectors_per_frame = - queue_logical_block_size(q) >> SECTOR_BITS; + queue_logical_block_size(q) >> SECTOR_SHIFT; ide_debug_log(IDE_DBG_RQ, "rq->cmd[0]: 0x%x, rq->cmd_flags: 0x%x, " "secs_per_frame: %u", @@ -919,7 +919,7 @@ static int cdrom_read_capacity(ide_drive_t *drive, unsigned long *capacity, * end up being bogus. */ blocklen = be32_to_cpu(capbuf.blocklen); - blocklen = (blocklen >> SECTOR_BITS) << SECTOR_BITS; + blocklen = (blocklen >> SECTOR_SHIFT) << SECTOR_SHIFT; switch (blocklen) { case 512: case 1024: @@ -935,7 +935,7 @@ static int cdrom_read_capacity(ide_drive_t *drive, unsigned long *capacity, } *capacity = 1 + be32_to_cpu(capbuf.lba); - *sectors_per_frame = blocklen >> SECTOR_BITS; + *sectors_per_frame = blocklen >> SECTOR_SHIFT; ide_debug_log(IDE_DBG_PROBE, "cap: %lu, sectors_per_frame: %lu", *capacity, *sectors_per_frame); @@ -1012,7 +1012,7 @@ int ide_cd_read_toc(ide_drive_t *drive, struct request_sense *sense) drive->probed_capacity = toc->capacity * sectors_per_frame; blk_queue_logical_block_size(drive->queue, - sectors_per_frame << SECTOR_BITS); + sectors_per_frame << SECTOR_SHIFT); /* first read just the header, so we know how long the TOC is */ stat = cdrom_read_tocentry(drive, 0, 1, 0, (char *) &toc->hdr, @@ -1613,6 +1613,8 @@ static int idecd_open(struct block_device *bdev, fmode_t mode) struct cdrom_info *info; int rc = -ENXIO; + check_disk_change(bdev); + mutex_lock(&ide_cd_mutex); info = ide_cd_get(bdev->bd_disk); if (!info) diff --git a/drivers/ide/ide-cd.h b/drivers/ide/ide-cd.h index 264e822eba58..04f0f310a856 100644 --- a/drivers/ide/ide-cd.h +++ b/drivers/ide/ide-cd.h @@ -21,11 +21,7 @@ /************************************************************************/ -#define SECTOR_BITS 9 -#ifndef SECTOR_SIZE -#define SECTOR_SIZE (1 << SECTOR_BITS) -#endif -#define SECTORS_PER_FRAME (CD_FRAMESIZE >> SECTOR_BITS) +#define SECTORS_PER_FRAME (CD_FRAMESIZE >> SECTOR_SHIFT) #define SECTOR_BUFFER_SIZE (CD_FRAMESIZE * 32) /* Capabilities Page size including 8 bytes of Mode Page Header */ diff --git a/drivers/ide/ide-disk.c b/drivers/ide/ide-disk.c index 67bc72d78fbf..f1a7c58fe418 100644 --- a/drivers/ide/ide-disk.c +++ b/drivers/ide/ide-disk.c @@ -687,8 +687,8 @@ static void ide_disk_setup(ide_drive_t *drive) queue_max_sectors(q) / 2); if (ata_id_is_ssd(id)) { - queue_flag_set_unlocked(QUEUE_FLAG_NONROT, q); - queue_flag_clear_unlocked(QUEUE_FLAG_ADD_RANDOM, q); + blk_queue_flag_set(QUEUE_FLAG_NONROT, q); + blk_queue_flag_clear(QUEUE_FLAG_ADD_RANDOM, q); } /* calculate drive capacity, and select LBA if possible */ diff --git a/drivers/ide/ide-probe.c b/drivers/ide/ide-probe.c index caa20eb5f26b..2019e66eada7 100644 --- a/drivers/ide/ide-probe.c +++ b/drivers/ide/ide-probe.c @@ -766,14 +766,14 @@ static int ide_init_queue(ide_drive_t *drive) * limits and LBA48 we could raise it but as yet * do not. */ - q = blk_alloc_queue_node(GFP_KERNEL, hwif_to_node(hwif)); + q = blk_alloc_queue_node(GFP_KERNEL, hwif_to_node(hwif), NULL); if (!q) return 1; q->request_fn = do_ide_request; q->initialize_rq_fn = ide_initialize_rq; q->cmd_size = sizeof(struct ide_request); - queue_flag_set_unlocked(QUEUE_FLAG_SCSI_PASSTHROUGH, q); + blk_queue_flag_set(QUEUE_FLAG_SCSI_PASSTHROUGH, q); if (blk_init_allocated_queue(q) < 0) { blk_cleanup_queue(q); return 1; diff --git a/drivers/lightnvm/core.c b/drivers/lightnvm/core.c index dcc9e621e651..63171cdce270 100644 --- a/drivers/lightnvm/core.c +++ b/drivers/lightnvm/core.c @@ -36,13 +36,13 @@ static DECLARE_RWSEM(nvm_lock); /* Map between virtual and physical channel and lun */ struct nvm_ch_map { int ch_off; - int nr_luns; + int num_lun; int *lun_offs; }; struct nvm_dev_map { struct nvm_ch_map *chnls; - int nr_chnls; + int num_ch; }; static struct nvm_target *nvm_find_target(struct nvm_dev *dev, const char *name) @@ -114,15 +114,15 @@ static void nvm_remove_tgt_dev(struct nvm_tgt_dev *tgt_dev, int clear) struct nvm_dev_map *dev_map = tgt_dev->map; int i, j; - for (i = 0; i < dev_map->nr_chnls; i++) { + for (i = 0; i < dev_map->num_ch; i++) { struct nvm_ch_map *ch_map = &dev_map->chnls[i]; int *lun_offs = ch_map->lun_offs; int ch = i + ch_map->ch_off; if (clear) { - for (j = 0; j < ch_map->nr_luns; j++) { + for (j = 0; j < ch_map->num_lun; j++) { int lun = j + lun_offs[j]; - int lunid = (ch * dev->geo.nr_luns) + lun; + int lunid = (ch * dev->geo.num_lun) + lun; WARN_ON(!test_and_clear_bit(lunid, dev->lun_map)); @@ -147,47 +147,46 @@ static struct nvm_tgt_dev *nvm_create_tgt_dev(struct nvm_dev *dev, struct nvm_dev_map *dev_rmap = dev->rmap; struct nvm_dev_map *dev_map; struct ppa_addr *luns; - int nr_luns = lun_end - lun_begin + 1; - int luns_left = nr_luns; - int nr_chnls = nr_luns / dev->geo.nr_luns; - int nr_chnls_mod = nr_luns % dev->geo.nr_luns; - int bch = lun_begin / dev->geo.nr_luns; - int blun = lun_begin % dev->geo.nr_luns; + int num_lun = lun_end - lun_begin + 1; + int luns_left = num_lun; + int num_ch = num_lun / dev->geo.num_lun; + int num_ch_mod = num_lun % dev->geo.num_lun; + int bch = lun_begin / dev->geo.num_lun; + int blun = lun_begin % dev->geo.num_lun; int lunid = 0; int lun_balanced = 1; - int prev_nr_luns; + int sec_per_lun, prev_num_lun; int i, j; - nr_chnls = (nr_chnls_mod == 0) ? nr_chnls : nr_chnls + 1; + num_ch = (num_ch_mod == 0) ? num_ch : num_ch + 1; dev_map = kmalloc(sizeof(struct nvm_dev_map), GFP_KERNEL); if (!dev_map) goto err_dev; - dev_map->chnls = kcalloc(nr_chnls, sizeof(struct nvm_ch_map), - GFP_KERNEL); + dev_map->chnls = kcalloc(num_ch, sizeof(struct nvm_ch_map), GFP_KERNEL); if (!dev_map->chnls) goto err_chnls; - luns = kcalloc(nr_luns, sizeof(struct ppa_addr), GFP_KERNEL); + luns = kcalloc(num_lun, sizeof(struct ppa_addr), GFP_KERNEL); if (!luns) goto err_luns; - prev_nr_luns = (luns_left > dev->geo.nr_luns) ? - dev->geo.nr_luns : luns_left; - for (i = 0; i < nr_chnls; i++) { + prev_num_lun = (luns_left > dev->geo.num_lun) ? + dev->geo.num_lun : luns_left; + for (i = 0; i < num_ch; i++) { struct nvm_ch_map *ch_rmap = &dev_rmap->chnls[i + bch]; int *lun_roffs = ch_rmap->lun_offs; struct nvm_ch_map *ch_map = &dev_map->chnls[i]; int *lun_offs; - int luns_in_chnl = (luns_left > dev->geo.nr_luns) ? - dev->geo.nr_luns : luns_left; + int luns_in_chnl = (luns_left > dev->geo.num_lun) ? + dev->geo.num_lun : luns_left; - if (lun_balanced && prev_nr_luns != luns_in_chnl) + if (lun_balanced && prev_num_lun != luns_in_chnl) lun_balanced = 0; ch_map->ch_off = ch_rmap->ch_off = bch; - ch_map->nr_luns = luns_in_chnl; + ch_map->num_lun = luns_in_chnl; lun_offs = kcalloc(luns_in_chnl, sizeof(int), GFP_KERNEL); if (!lun_offs) @@ -195,8 +194,8 @@ static struct nvm_tgt_dev *nvm_create_tgt_dev(struct nvm_dev *dev, for (j = 0; j < luns_in_chnl; j++) { luns[lunid].ppa = 0; - luns[lunid].g.ch = i; - luns[lunid++].g.lun = j; + luns[lunid].a.ch = i; + luns[lunid++].a.lun = j; lun_offs[j] = blun; lun_roffs[j + blun] = blun; @@ -209,24 +208,29 @@ static struct nvm_tgt_dev *nvm_create_tgt_dev(struct nvm_dev *dev, luns_left -= luns_in_chnl; } - dev_map->nr_chnls = nr_chnls; + dev_map->num_ch = num_ch; tgt_dev = kmalloc(sizeof(struct nvm_tgt_dev), GFP_KERNEL); if (!tgt_dev) goto err_ch; + /* Inherit device geometry from parent */ memcpy(&tgt_dev->geo, &dev->geo, sizeof(struct nvm_geo)); + /* Target device only owns a portion of the physical device */ - tgt_dev->geo.nr_chnls = nr_chnls; - tgt_dev->geo.all_luns = nr_luns; - tgt_dev->geo.nr_luns = (lun_balanced) ? prev_nr_luns : -1; + tgt_dev->geo.num_ch = num_ch; + tgt_dev->geo.num_lun = (lun_balanced) ? prev_num_lun : -1; + tgt_dev->geo.all_luns = num_lun; + tgt_dev->geo.all_chunks = num_lun * dev->geo.num_chk; + tgt_dev->geo.op = op; - tgt_dev->total_secs = nr_luns * tgt_dev->geo.sec_per_lun; + + sec_per_lun = dev->geo.clba * dev->geo.num_chk; + tgt_dev->geo.total_secs = num_lun * sec_per_lun; + tgt_dev->q = dev->q; tgt_dev->map = dev_map; tgt_dev->luns = luns; - memcpy(&tgt_dev->identity, &dev->identity, sizeof(struct nvm_id)); - tgt_dev->parent = dev; return tgt_dev; @@ -296,24 +300,20 @@ static int __nvm_config_simple(struct nvm_dev *dev, static int __nvm_config_extended(struct nvm_dev *dev, struct nvm_ioctl_create_extended *e) { - struct nvm_geo *geo = &dev->geo; - if (e->lun_begin == 0xFFFF && e->lun_end == 0xFFFF) { e->lun_begin = 0; e->lun_end = dev->geo.all_luns - 1; } /* op not set falls into target's default */ - if (e->op == 0xFFFF) + if (e->op == 0xFFFF) { e->op = NVM_TARGET_DEFAULT_OP; - - if (e->op < NVM_TARGET_MIN_OP || - e->op > NVM_TARGET_MAX_OP) { + } else if (e->op < NVM_TARGET_MIN_OP || e->op > NVM_TARGET_MAX_OP) { pr_err("nvm: invalid over provisioning value\n"); return -EINVAL; } - return nvm_config_check_luns(geo, e->lun_begin, e->lun_end); + return nvm_config_check_luns(&dev->geo, e->lun_begin, e->lun_end); } static int nvm_create_tgt(struct nvm_dev *dev, struct nvm_ioctl_create *create) @@ -384,7 +384,7 @@ static int nvm_create_tgt(struct nvm_dev *dev, struct nvm_ioctl_create *create) goto err_dev; } - tqueue = blk_alloc_queue_node(GFP_KERNEL, dev->q->node); + tqueue = blk_alloc_queue_node(GFP_KERNEL, dev->q->node, NULL); if (!tqueue) { ret = -ENOMEM; goto err_disk; @@ -407,7 +407,8 @@ static int nvm_create_tgt(struct nvm_dev *dev, struct nvm_ioctl_create *create) tdisk->private_data = targetdata; tqueue->queuedata = targetdata; - blk_queue_max_hw_sectors(tqueue, 8 * dev->ops->max_phys_sect); + blk_queue_max_hw_sectors(tqueue, + (dev->geo.csecs >> 9) * NVM_MAX_VLBA); set_capacity(tdisk, tt->capacity(targetdata)); add_disk(tdisk); @@ -503,20 +504,20 @@ static int nvm_register_map(struct nvm_dev *dev) if (!rmap) goto err_rmap; - rmap->chnls = kcalloc(dev->geo.nr_chnls, sizeof(struct nvm_ch_map), + rmap->chnls = kcalloc(dev->geo.num_ch, sizeof(struct nvm_ch_map), GFP_KERNEL); if (!rmap->chnls) goto err_chnls; - for (i = 0; i < dev->geo.nr_chnls; i++) { + for (i = 0; i < dev->geo.num_ch; i++) { struct nvm_ch_map *ch_rmap; int *lun_roffs; - int luns_in_chnl = dev->geo.nr_luns; + int luns_in_chnl = dev->geo.num_lun; ch_rmap = &rmap->chnls[i]; ch_rmap->ch_off = -1; - ch_rmap->nr_luns = luns_in_chnl; + ch_rmap->num_lun = luns_in_chnl; lun_roffs = kcalloc(luns_in_chnl, sizeof(int), GFP_KERNEL); if (!lun_roffs) @@ -545,7 +546,7 @@ static void nvm_unregister_map(struct nvm_dev *dev) struct nvm_dev_map *rmap = dev->rmap; int i; - for (i = 0; i < dev->geo.nr_chnls; i++) + for (i = 0; i < dev->geo.num_ch; i++) kfree(rmap->chnls[i].lun_offs); kfree(rmap->chnls); @@ -555,22 +556,22 @@ static void nvm_unregister_map(struct nvm_dev *dev) static void nvm_map_to_dev(struct nvm_tgt_dev *tgt_dev, struct ppa_addr *p) { struct nvm_dev_map *dev_map = tgt_dev->map; - struct nvm_ch_map *ch_map = &dev_map->chnls[p->g.ch]; - int lun_off = ch_map->lun_offs[p->g.lun]; + struct nvm_ch_map *ch_map = &dev_map->chnls[p->a.ch]; + int lun_off = ch_map->lun_offs[p->a.lun]; - p->g.ch += ch_map->ch_off; - p->g.lun += lun_off; + p->a.ch += ch_map->ch_off; + p->a.lun += lun_off; } static void nvm_map_to_tgt(struct nvm_tgt_dev *tgt_dev, struct ppa_addr *p) { struct nvm_dev *dev = tgt_dev->parent; struct nvm_dev_map *dev_rmap = dev->rmap; - struct nvm_ch_map *ch_rmap = &dev_rmap->chnls[p->g.ch]; - int lun_roff = ch_rmap->lun_offs[p->g.lun]; + struct nvm_ch_map *ch_rmap = &dev_rmap->chnls[p->a.ch]; + int lun_roff = ch_rmap->lun_offs[p->a.lun]; - p->g.ch -= ch_rmap->ch_off; - p->g.lun -= lun_roff; + p->a.ch -= ch_rmap->ch_off; + p->a.lun -= lun_roff; } static void nvm_ppa_tgt_to_dev(struct nvm_tgt_dev *tgt_dev, @@ -580,7 +581,7 @@ static void nvm_ppa_tgt_to_dev(struct nvm_tgt_dev *tgt_dev, for (i = 0; i < nr_ppas; i++) { nvm_map_to_dev(tgt_dev, &ppa_list[i]); - ppa_list[i] = generic_to_dev_addr(tgt_dev, ppa_list[i]); + ppa_list[i] = generic_to_dev_addr(tgt_dev->parent, ppa_list[i]); } } @@ -590,7 +591,7 @@ static void nvm_ppa_dev_to_tgt(struct nvm_tgt_dev *tgt_dev, int i; for (i = 0; i < nr_ppas; i++) { - ppa_list[i] = dev_to_generic_addr(tgt_dev, ppa_list[i]); + ppa_list[i] = dev_to_generic_addr(tgt_dev->parent, ppa_list[i]); nvm_map_to_tgt(tgt_dev, &ppa_list[i]); } } @@ -674,7 +675,7 @@ static int nvm_set_rqd_ppalist(struct nvm_tgt_dev *tgt_dev, struct nvm_rq *rqd, int i, plane_cnt, pl_idx; struct ppa_addr ppa; - if (geo->plane_mode == NVM_PLANE_SINGLE && nr_ppas == 1) { + if (geo->pln_mode == NVM_PLANE_SINGLE && nr_ppas == 1) { rqd->nr_ppas = nr_ppas; rqd->ppa_addr = ppas[0]; @@ -688,7 +689,7 @@ static int nvm_set_rqd_ppalist(struct nvm_tgt_dev *tgt_dev, struct nvm_rq *rqd, return -ENOMEM; } - plane_cnt = geo->plane_mode; + plane_cnt = geo->pln_mode; rqd->nr_ppas *= plane_cnt; for (i = 0; i < nr_ppas; i++) { @@ -711,6 +712,17 @@ static void nvm_free_rqd_ppalist(struct nvm_tgt_dev *tgt_dev, nvm_dev_dma_free(tgt_dev->parent, rqd->ppa_list, rqd->dma_ppa_list); } +int nvm_get_chunk_meta(struct nvm_tgt_dev *tgt_dev, struct nvm_chk_meta *meta, + struct ppa_addr ppa, int nchks) +{ + struct nvm_dev *dev = tgt_dev->parent; + + nvm_ppa_tgt_to_dev(tgt_dev, &ppa, 1); + + return dev->ops->get_chk_meta(tgt_dev->parent, meta, + (sector_t)ppa.ppa, nchks); +} +EXPORT_SYMBOL(nvm_get_chunk_meta); int nvm_set_tgt_bb_tbl(struct nvm_tgt_dev *tgt_dev, struct ppa_addr *ppas, int nr_ppas, int type) @@ -719,7 +731,7 @@ int nvm_set_tgt_bb_tbl(struct nvm_tgt_dev *tgt_dev, struct ppa_addr *ppas, struct nvm_rq rqd; int ret; - if (nr_ppas > dev->ops->max_phys_sect) { + if (nr_ppas > NVM_MAX_VLBA) { pr_err("nvm: unable to update all blocks atomically\n"); return -EINVAL; } @@ -740,14 +752,6 @@ int nvm_set_tgt_bb_tbl(struct nvm_tgt_dev *tgt_dev, struct ppa_addr *ppas, } EXPORT_SYMBOL(nvm_set_tgt_bb_tbl); -int nvm_max_phys_sects(struct nvm_tgt_dev *tgt_dev) -{ - struct nvm_dev *dev = tgt_dev->parent; - - return dev->ops->max_phys_sect; -} -EXPORT_SYMBOL(nvm_max_phys_sects); - int nvm_submit_io(struct nvm_tgt_dev *tgt_dev, struct nvm_rq *rqd) { struct nvm_dev *dev = tgt_dev->parent; @@ -814,15 +818,15 @@ int nvm_bb_tbl_fold(struct nvm_dev *dev, u8 *blks, int nr_blks) struct nvm_geo *geo = &dev->geo; int blk, offset, pl, blktype; - if (nr_blks != geo->nr_chks * geo->plane_mode) + if (nr_blks != geo->num_chk * geo->pln_mode) return -EINVAL; - for (blk = 0; blk < geo->nr_chks; blk++) { - offset = blk * geo->plane_mode; + for (blk = 0; blk < geo->num_chk; blk++) { + offset = blk * geo->pln_mode; blktype = blks[offset]; /* Bad blocks on any planes take precedence over other types */ - for (pl = 0; pl < geo->plane_mode; pl++) { + for (pl = 0; pl < geo->pln_mode; pl++) { if (blks[offset + pl] & (NVM_BLK_T_BAD|NVM_BLK_T_GRWN_BAD)) { blktype = blks[offset + pl]; @@ -833,7 +837,7 @@ int nvm_bb_tbl_fold(struct nvm_dev *dev, u8 *blks, int nr_blks) blks[blk] = blktype; } - return geo->nr_chks; + return geo->num_chk; } EXPORT_SYMBOL(nvm_bb_tbl_fold); @@ -850,44 +854,9 @@ EXPORT_SYMBOL(nvm_get_tgt_bb_tbl); static int nvm_core_init(struct nvm_dev *dev) { - struct nvm_id *id = &dev->identity; - struct nvm_id_group *grp = &id->grp; struct nvm_geo *geo = &dev->geo; int ret; - memcpy(&geo->ppaf, &id->ppaf, sizeof(struct nvm_addr_format)); - - if (grp->mtype != 0) { - pr_err("nvm: memory type not supported\n"); - return -EINVAL; - } - - /* Whole device values */ - geo->nr_chnls = grp->num_ch; - geo->nr_luns = grp->num_lun; - - /* Generic device geometry values */ - geo->ws_min = grp->ws_min; - geo->ws_opt = grp->ws_opt; - geo->ws_seq = grp->ws_seq; - geo->ws_per_chk = grp->ws_per_chk; - geo->nr_chks = grp->num_chk; - geo->sec_size = grp->csecs; - geo->oob_size = grp->sos; - geo->mccap = grp->mccap; - geo->max_rq_size = dev->ops->max_phys_sect * geo->sec_size; - - geo->sec_per_chk = grp->clba; - geo->sec_per_lun = geo->sec_per_chk * geo->nr_chks; - geo->all_luns = geo->nr_luns * geo->nr_chnls; - - /* 1.2 spec device geometry values */ - geo->plane_mode = 1 << geo->ws_seq; - geo->nr_planes = geo->ws_opt / geo->ws_min; - geo->sec_per_pg = geo->ws_min; - geo->sec_per_pl = geo->sec_per_pg * geo->nr_planes; - - dev->total_secs = geo->all_luns * geo->sec_per_lun; dev->lun_map = kcalloc(BITS_TO_LONGS(geo->all_luns), sizeof(unsigned long), GFP_KERNEL); if (!dev->lun_map) @@ -902,7 +871,6 @@ static int nvm_core_init(struct nvm_dev *dev) if (ret) goto err_fmtype; - blk_queue_logical_block_size(dev->q, geo->sec_size); return 0; err_fmtype: kfree(dev->lun_map); @@ -927,18 +895,14 @@ static int nvm_init(struct nvm_dev *dev) struct nvm_geo *geo = &dev->geo; int ret = -EINVAL; - if (dev->ops->identity(dev, &dev->identity)) { + if (dev->ops->identity(dev)) { pr_err("nvm: device could not be identified\n"); goto err; } - pr_debug("nvm: ver:%x nvm_vendor:%x\n", - dev->identity.ver_id, dev->identity.vmnt); - - if (dev->identity.ver_id != 1) { - pr_err("nvm: device not supported by kernel."); - goto err; - } + pr_debug("nvm: ver:%u.%u nvm_vendor:%x\n", + geo->major_ver_id, geo->minor_ver_id, + geo->vmnt); ret = nvm_core_init(dev); if (ret) { @@ -946,10 +910,10 @@ static int nvm_init(struct nvm_dev *dev) goto err; } - pr_info("nvm: registered %s [%u/%u/%u/%u/%u/%u]\n", - dev->name, geo->sec_per_pg, geo->nr_planes, - geo->ws_per_chk, geo->nr_chks, - geo->all_luns, geo->nr_chnls); + pr_info("nvm: registered %s [%u/%u/%u/%u/%u]\n", + dev->name, dev->geo.ws_min, dev->geo.ws_opt, + dev->geo.num_chk, dev->geo.all_luns, + dev->geo.num_ch); return 0; err: pr_err("nvm: failed to initialize nvm\n"); @@ -969,17 +933,10 @@ int nvm_register(struct nvm_dev *dev) if (!dev->q || !dev->ops) return -EINVAL; - if (dev->ops->max_phys_sect > 256) { - pr_info("nvm: max sectors supported is 256.\n"); - return -EINVAL; - } - - if (dev->ops->max_phys_sect > 1) { - dev->dma_pool = dev->ops->create_dma_pool(dev, "ppalist"); - if (!dev->dma_pool) { - pr_err("nvm: could not create dma pool\n"); - return -ENOMEM; - } + dev->dma_pool = dev->ops->create_dma_pool(dev, "ppalist"); + if (!dev->dma_pool) { + pr_err("nvm: could not create dma pool\n"); + return -ENOMEM; } ret = nvm_init(dev); @@ -1040,9 +997,6 @@ static long nvm_ioctl_info(struct file *file, void __user *arg) struct nvm_tgt_type *tt; int tgt_iter = 0; - if (!capable(CAP_SYS_ADMIN)) - return -EPERM; - info = memdup_user(arg, sizeof(struct nvm_ioctl_info)); if (IS_ERR(info)) return -EFAULT; @@ -1081,9 +1035,6 @@ static long nvm_ioctl_get_devices(struct file *file, void __user *arg) struct nvm_dev *dev; int i = 0; - if (!capable(CAP_SYS_ADMIN)) - return -EPERM; - devices = kzalloc(sizeof(struct nvm_ioctl_get_devices), GFP_KERNEL); if (!devices) return -ENOMEM; @@ -1124,9 +1075,6 @@ static long nvm_ioctl_dev_create(struct file *file, void __user *arg) { struct nvm_ioctl_create create; - if (!capable(CAP_SYS_ADMIN)) - return -EPERM; - if (copy_from_user(&create, arg, sizeof(struct nvm_ioctl_create))) return -EFAULT; @@ -1162,9 +1110,6 @@ static long nvm_ioctl_dev_remove(struct file *file, void __user *arg) struct nvm_dev *dev; int ret = 0; - if (!capable(CAP_SYS_ADMIN)) - return -EPERM; - if (copy_from_user(&remove, arg, sizeof(struct nvm_ioctl_remove))) return -EFAULT; @@ -1189,9 +1134,6 @@ static long nvm_ioctl_dev_init(struct file *file, void __user *arg) { struct nvm_ioctl_dev_init init; - if (!capable(CAP_SYS_ADMIN)) - return -EPERM; - if (copy_from_user(&init, arg, sizeof(struct nvm_ioctl_dev_init))) return -EFAULT; @@ -1208,9 +1150,6 @@ static long nvm_ioctl_dev_factory(struct file *file, void __user *arg) { struct nvm_ioctl_dev_factory fact; - if (!capable(CAP_SYS_ADMIN)) - return -EPERM; - if (copy_from_user(&fact, arg, sizeof(struct nvm_ioctl_dev_factory))) return -EFAULT; @@ -1226,6 +1165,9 @@ static long nvm_ctl_ioctl(struct file *file, uint cmd, unsigned long arg) { void __user *argp = (void __user *)arg; + if (!capable(CAP_SYS_ADMIN)) + return -EPERM; + switch (cmd) { case NVM_INFO: return nvm_ioctl_info(file, argp); diff --git a/drivers/lightnvm/pblk-cache.c b/drivers/lightnvm/pblk-cache.c index 000fcad38136..29a23111b31c 100644 --- a/drivers/lightnvm/pblk-cache.c +++ b/drivers/lightnvm/pblk-cache.c @@ -63,6 +63,8 @@ retry: bio_advance(bio, PBLK_EXPOSED_PAGE_SIZE); } + atomic64_add(nr_entries, &pblk->user_wa); + #ifdef CONFIG_NVM_DEBUG atomic_long_add(nr_entries, &pblk->inflight_writes); atomic_long_add(nr_entries, &pblk->req_writes); @@ -117,6 +119,8 @@ retry: WARN_ONCE(gc_rq->secs_to_gc != valid_entries, "pblk: inconsistent GC write\n"); + atomic64_add(valid_entries, &pblk->gc_wa); + #ifdef CONFIG_NVM_DEBUG atomic_long_add(valid_entries, &pblk->inflight_writes); atomic_long_add(valid_entries, &pblk->recov_gc_writes); diff --git a/drivers/lightnvm/pblk-core.c b/drivers/lightnvm/pblk-core.c index 0487b9340c1d..94d5d97c9d8a 100644 --- a/drivers/lightnvm/pblk-core.c +++ b/drivers/lightnvm/pblk-core.c @@ -44,11 +44,12 @@ static void pblk_line_mark_bb(struct work_struct *work) } static void pblk_mark_bb(struct pblk *pblk, struct pblk_line *line, - struct ppa_addr *ppa) + struct ppa_addr ppa_addr) { struct nvm_tgt_dev *dev = pblk->dev; struct nvm_geo *geo = &dev->geo; - int pos = pblk_ppa_to_pos(geo, *ppa); + struct ppa_addr *ppa; + int pos = pblk_ppa_to_pos(geo, ppa_addr); pr_debug("pblk: erase failed: line:%d, pos:%d\n", line->id, pos); atomic_long_inc(&pblk->erase_failed); @@ -58,26 +59,38 @@ static void pblk_mark_bb(struct pblk *pblk, struct pblk_line *line, pr_err("pblk: attempted to erase bb: line:%d, pos:%d\n", line->id, pos); + /* Not necessary to mark bad blocks on 2.0 spec. */ + if (geo->version == NVM_OCSSD_SPEC_20) + return; + + ppa = kmalloc(sizeof(struct ppa_addr), GFP_ATOMIC); + if (!ppa) + return; + + *ppa = ppa_addr; pblk_gen_run_ws(pblk, NULL, ppa, pblk_line_mark_bb, GFP_ATOMIC, pblk->bb_wq); } static void __pblk_end_io_erase(struct pblk *pblk, struct nvm_rq *rqd) { + struct nvm_tgt_dev *dev = pblk->dev; + struct nvm_geo *geo = &dev->geo; + struct nvm_chk_meta *chunk; struct pblk_line *line; + int pos; line = &pblk->lines[pblk_ppa_to_line(rqd->ppa_addr)]; + pos = pblk_ppa_to_pos(geo, rqd->ppa_addr); + chunk = &line->chks[pos]; + atomic_dec(&line->left_seblks); if (rqd->error) { - struct ppa_addr *ppa; - - ppa = kmalloc(sizeof(struct ppa_addr), GFP_ATOMIC); - if (!ppa) - return; - - *ppa = rqd->ppa_addr; - pblk_mark_bb(pblk, line, ppa); + chunk->state = NVM_CHK_ST_OFFLINE; + pblk_mark_bb(pblk, line, rqd->ppa_addr); + } else { + chunk->state = NVM_CHK_ST_FREE; } atomic_dec(&pblk->inflight_io); @@ -92,6 +105,49 @@ static void pblk_end_io_erase(struct nvm_rq *rqd) mempool_free(rqd, pblk->e_rq_pool); } +/* + * Get information for all chunks from the device. + * + * The caller is responsible for freeing the returned structure + */ +struct nvm_chk_meta *pblk_chunk_get_info(struct pblk *pblk) +{ + struct nvm_tgt_dev *dev = pblk->dev; + struct nvm_geo *geo = &dev->geo; + struct nvm_chk_meta *meta; + struct ppa_addr ppa; + unsigned long len; + int ret; + + ppa.ppa = 0; + + len = geo->all_chunks * sizeof(*meta); + meta = kzalloc(len, GFP_KERNEL); + if (!meta) + return ERR_PTR(-ENOMEM); + + ret = nvm_get_chunk_meta(dev, meta, ppa, geo->all_chunks); + if (ret) { + kfree(meta); + return ERR_PTR(-EIO); + } + + return meta; +} + +struct nvm_chk_meta *pblk_chunk_get_off(struct pblk *pblk, + struct nvm_chk_meta *meta, + struct ppa_addr ppa) +{ + struct nvm_tgt_dev *dev = pblk->dev; + struct nvm_geo *geo = &dev->geo; + int ch_off = ppa.m.grp * geo->num_chk * geo->num_lun; + int lun_off = ppa.m.pu * geo->num_chk; + int chk_off = ppa.m.chk; + + return meta + ch_off + lun_off + chk_off; +} + void __pblk_map_invalidate(struct pblk *pblk, struct pblk_line *line, u64 paddr) { @@ -613,7 +669,7 @@ next_rq: memset(&rqd, 0, sizeof(struct nvm_rq)); rq_ppas = pblk_calc_secs(pblk, left_ppas, 0); - rq_len = rq_ppas * geo->sec_size; + rq_len = rq_ppas * geo->csecs; bio = pblk_bio_map_addr(pblk, emeta_buf, rq_ppas, rq_len, l_mg->emeta_alloc_type, GFP_KERNEL); @@ -722,7 +778,7 @@ u64 pblk_line_smeta_start(struct pblk *pblk, struct pblk_line *line) if (bit >= lm->blk_per_line) return -1; - return bit * geo->sec_per_pl; + return bit * geo->ws_opt; } static int pblk_line_submit_smeta_io(struct pblk *pblk, struct pblk_line *line, @@ -885,7 +941,7 @@ int pblk_line_erase(struct pblk *pblk, struct pblk_line *line) } ppa = pblk->luns[bit].bppa; /* set ch and lun */ - ppa.g.blk = line->id; + ppa.a.blk = line->id; atomic_dec(&line->left_eblks); WARN_ON(test_and_set_bit(bit, line->erase_bitmap)); @@ -975,7 +1031,8 @@ static int pblk_line_init_metadata(struct pblk *pblk, struct pblk_line *line, memcpy(smeta_buf->header.uuid, pblk->instance_uuid, 16); smeta_buf->header.id = cpu_to_le32(line->id); smeta_buf->header.type = cpu_to_le16(line->type); - smeta_buf->header.version = SMETA_VERSION; + smeta_buf->header.version_major = SMETA_VERSION_MAJOR; + smeta_buf->header.version_minor = SMETA_VERSION_MINOR; /* Start metadata */ smeta_buf->seq_nr = cpu_to_le64(line->seq_nr); @@ -998,6 +1055,12 @@ static int pblk_line_init_metadata(struct pblk *pblk, struct pblk_line *line, /* End metadata */ memcpy(&emeta_buf->header, &smeta_buf->header, sizeof(struct line_header)); + + emeta_buf->header.version_major = EMETA_VERSION_MAJOR; + emeta_buf->header.version_minor = EMETA_VERSION_MINOR; + emeta_buf->header.crc = cpu_to_le32( + pblk_calc_meta_header_crc(pblk, &emeta_buf->header)); + emeta_buf->seq_nr = cpu_to_le64(line->seq_nr); emeta_buf->nr_lbas = cpu_to_le64(line->sec_in_line); emeta_buf->nr_valid_lbas = cpu_to_le64(0); @@ -1018,28 +1081,26 @@ static int pblk_line_init_bb(struct pblk *pblk, struct pblk_line *line, struct nvm_geo *geo = &dev->geo; struct pblk_line_meta *lm = &pblk->lm; struct pblk_line_mgmt *l_mg = &pblk->l_mg; - int nr_bb = 0; u64 off; int bit = -1; + int emeta_secs; line->sec_in_line = lm->sec_per_line; /* Capture bad block information on line mapping bitmaps */ while ((bit = find_next_bit(line->blk_bitmap, lm->blk_per_line, bit + 1)) < lm->blk_per_line) { - off = bit * geo->sec_per_pl; + off = bit * geo->ws_opt; bitmap_shift_left(l_mg->bb_aux, l_mg->bb_template, off, lm->sec_per_line); bitmap_or(line->map_bitmap, line->map_bitmap, l_mg->bb_aux, lm->sec_per_line); - line->sec_in_line -= geo->sec_per_chk; - if (bit >= lm->emeta_bb) - nr_bb++; + line->sec_in_line -= geo->clba; } /* Mark smeta metadata sectors as bad sectors */ bit = find_first_zero_bit(line->blk_bitmap, lm->blk_per_line); - off = bit * geo->sec_per_pl; + off = bit * geo->ws_opt; bitmap_set(line->map_bitmap, off, lm->smeta_sec); line->sec_in_line -= lm->smeta_sec; line->smeta_ssec = off; @@ -1055,18 +1116,18 @@ static int pblk_line_init_bb(struct pblk *pblk, struct pblk_line *line, /* Mark emeta metadata sectors as bad sectors. We need to consider bad * blocks to make sure that there are enough sectors to store emeta */ - off = lm->sec_per_line - lm->emeta_sec[0]; - bitmap_set(line->invalid_bitmap, off, lm->emeta_sec[0]); - while (nr_bb) { - off -= geo->sec_per_pl; + emeta_secs = lm->emeta_sec[0]; + off = lm->sec_per_line; + while (emeta_secs) { + off -= geo->ws_opt; if (!test_bit(off, line->invalid_bitmap)) { - bitmap_set(line->invalid_bitmap, off, geo->sec_per_pl); - nr_bb--; + bitmap_set(line->invalid_bitmap, off, geo->ws_opt); + emeta_secs -= geo->ws_opt; } } - line->sec_in_line -= lm->emeta_sec[0]; line->emeta_ssec = off; + line->sec_in_line -= lm->emeta_sec[0]; line->nr_valid_lbas = 0; line->left_msecs = line->sec_in_line; *line->vsc = cpu_to_le32(line->sec_in_line); @@ -1086,10 +1147,34 @@ static int pblk_line_init_bb(struct pblk *pblk, struct pblk_line *line, return 1; } +static int pblk_prepare_new_line(struct pblk *pblk, struct pblk_line *line) +{ + struct pblk_line_meta *lm = &pblk->lm; + struct nvm_tgt_dev *dev = pblk->dev; + struct nvm_geo *geo = &dev->geo; + int blk_to_erase = atomic_read(&line->blk_in_line); + int i; + + for (i = 0; i < lm->blk_per_line; i++) { + struct pblk_lun *rlun = &pblk->luns[i]; + int pos = pblk_ppa_to_pos(geo, rlun->bppa); + int state = line->chks[pos].state; + + /* Free chunks should not be erased */ + if (state & NVM_CHK_ST_FREE) { + set_bit(pblk_ppa_to_pos(geo, rlun->bppa), + line->erase_bitmap); + blk_to_erase--; + } + } + + return blk_to_erase; +} + static int pblk_line_prepare(struct pblk *pblk, struct pblk_line *line) { struct pblk_line_meta *lm = &pblk->lm; - int blk_in_line = atomic_read(&line->blk_in_line); + int blk_to_erase; line->map_bitmap = kzalloc(lm->sec_bitmap_len, GFP_ATOMIC); if (!line->map_bitmap) @@ -1102,7 +1187,21 @@ static int pblk_line_prepare(struct pblk *pblk, struct pblk_line *line) return -ENOMEM; } + /* Bad blocks do not need to be erased */ + bitmap_copy(line->erase_bitmap, line->blk_bitmap, lm->blk_per_line); + spin_lock(&line->lock); + + /* If we have not written to this line, we need to mark up free chunks + * as already erased + */ + if (line->state == PBLK_LINESTATE_NEW) { + blk_to_erase = pblk_prepare_new_line(pblk, line); + line->state = PBLK_LINESTATE_FREE; + } else { + blk_to_erase = atomic_read(&line->blk_in_line); + } + if (line->state != PBLK_LINESTATE_FREE) { kfree(line->map_bitmap); kfree(line->invalid_bitmap); @@ -1114,15 +1213,12 @@ static int pblk_line_prepare(struct pblk *pblk, struct pblk_line *line) line->state = PBLK_LINESTATE_OPEN; - atomic_set(&line->left_eblks, blk_in_line); - atomic_set(&line->left_seblks, blk_in_line); + atomic_set(&line->left_eblks, blk_to_erase); + atomic_set(&line->left_seblks, blk_to_erase); line->meta_distance = lm->meta_distance; spin_unlock(&line->lock); - /* Bad blocks do not need to be erased */ - bitmap_copy(line->erase_bitmap, line->blk_bitmap, lm->blk_per_line); - kref_init(&line->ref); return 0; @@ -1399,13 +1495,6 @@ struct pblk_line *pblk_line_replace_data(struct pblk *pblk) l_mg->data_line = new; spin_lock(&l_mg->free_lock); - if (pblk->state != PBLK_STATE_RUNNING) { - l_mg->data_line = NULL; - l_mg->data_next = NULL; - spin_unlock(&l_mg->free_lock); - goto out; - } - pblk_line_setup_metadata(new, l_mg, &pblk->lm); spin_unlock(&l_mg->free_lock); @@ -1585,12 +1674,14 @@ static void pblk_line_should_sync_meta(struct pblk *pblk) void pblk_line_close(struct pblk *pblk, struct pblk_line *line) { + struct nvm_tgt_dev *dev = pblk->dev; + struct nvm_geo *geo = &dev->geo; + struct pblk_line_meta *lm = &pblk->lm; struct pblk_line_mgmt *l_mg = &pblk->l_mg; struct list_head *move_list; + int i; #ifdef CONFIG_NVM_DEBUG - struct pblk_line_meta *lm = &pblk->lm; - WARN(!bitmap_full(line->map_bitmap, lm->sec_per_line), "pblk: corrupt closed line %d\n", line->id); #endif @@ -1612,6 +1703,15 @@ void pblk_line_close(struct pblk *pblk, struct pblk_line *line) line->smeta = NULL; line->emeta = NULL; + for (i = 0; i < lm->blk_per_line; i++) { + struct pblk_lun *rlun = &pblk->luns[i]; + int pos = pblk_ppa_to_pos(geo, rlun->bppa); + int state = line->chks[pos].state; + + if (!(state & NVM_CHK_ST_OFFLINE)) + state = NVM_CHK_ST_CLOSED; + } + spin_unlock(&line->lock); spin_unlock(&l_mg->gc_lock); } @@ -1622,11 +1722,16 @@ void pblk_line_close_meta(struct pblk *pblk, struct pblk_line *line) struct pblk_line_meta *lm = &pblk->lm; struct pblk_emeta *emeta = line->emeta; struct line_emeta *emeta_buf = emeta->buf; + struct wa_counters *wa = emeta_to_wa(lm, emeta_buf); /* No need for exact vsc value; avoid a big line lock and take aprox. */ memcpy(emeta_to_vsc(pblk, emeta_buf), l_mg->vsc_list, lm->vsc_list_len); memcpy(emeta_to_bb(emeta_buf), line->blk_bitmap, lm->blk_bitmap_len); + wa->user = cpu_to_le64(atomic64_read(&pblk->user_wa)); + wa->pad = cpu_to_le64(atomic64_read(&pblk->pad_wa)); + wa->gc = cpu_to_le64(atomic64_read(&pblk->gc_wa)); + emeta_buf->nr_valid_lbas = cpu_to_le64(line->nr_valid_lbas); emeta_buf->crc = cpu_to_le32(pblk_calc_emeta_crc(pblk, emeta_buf)); @@ -1680,8 +1785,8 @@ static void __pblk_down_page(struct pblk *pblk, struct ppa_addr *ppa_list, int i; for (i = 1; i < nr_ppas; i++) - WARN_ON(ppa_list[0].g.lun != ppa_list[i].g.lun || - ppa_list[0].g.ch != ppa_list[i].g.ch); + WARN_ON(ppa_list[0].a.lun != ppa_list[i].a.lun || + ppa_list[0].a.ch != ppa_list[i].a.ch); #endif ret = down_timeout(&rlun->wr_sem, msecs_to_jiffies(30000)); @@ -1725,8 +1830,8 @@ void pblk_up_page(struct pblk *pblk, struct ppa_addr *ppa_list, int nr_ppas) int i; for (i = 1; i < nr_ppas; i++) - WARN_ON(ppa_list[0].g.lun != ppa_list[i].g.lun || - ppa_list[0].g.ch != ppa_list[i].g.ch); + WARN_ON(ppa_list[0].a.lun != ppa_list[i].a.lun || + ppa_list[0].a.ch != ppa_list[i].a.ch); #endif rlun = &pblk->luns[pos]; @@ -1739,10 +1844,10 @@ void pblk_up_rq(struct pblk *pblk, struct ppa_addr *ppa_list, int nr_ppas, struct nvm_tgt_dev *dev = pblk->dev; struct nvm_geo *geo = &dev->geo; struct pblk_lun *rlun; - int nr_luns = geo->all_luns; + int num_lun = geo->all_luns; int bit = -1; - while ((bit = find_next_bit(lun_bitmap, nr_luns, bit + 1)) < nr_luns) { + while ((bit = find_next_bit(lun_bitmap, num_lun, bit + 1)) < num_lun) { rlun = &pblk->luns[bit]; up(&rlun->wr_sem); } @@ -1829,6 +1934,7 @@ void pblk_update_map_dev(struct pblk *pblk, sector_t lba, #endif /* Invalidate and discard padded entries */ if (lba == ADDR_EMPTY) { + atomic64_inc(&pblk->pad_wa); #ifdef CONFIG_NVM_DEBUG atomic_long_inc(&pblk->padded_wb); #endif diff --git a/drivers/lightnvm/pblk-gc.c b/drivers/lightnvm/pblk-gc.c index 3d899383666e..6851a5c67189 100644 --- a/drivers/lightnvm/pblk-gc.c +++ b/drivers/lightnvm/pblk-gc.c @@ -88,7 +88,7 @@ static void pblk_gc_line_ws(struct work_struct *work) up(&gc->gc_sem); - gc_rq->data = vmalloc(gc_rq->nr_secs * geo->sec_size); + gc_rq->data = vmalloc(gc_rq->nr_secs * geo->csecs); if (!gc_rq->data) { pr_err("pblk: could not GC line:%d (%d/%d)\n", line->id, *line->vsc, gc_rq->nr_secs); @@ -147,10 +147,8 @@ static void pblk_gc_line_prepare_ws(struct work_struct *work) int ret; invalid_bitmap = kmalloc(lm->sec_bitmap_len, GFP_KERNEL); - if (!invalid_bitmap) { - pr_err("pblk: could not allocate GC invalid bitmap\n"); + if (!invalid_bitmap) goto fail_free_ws; - } emeta_buf = pblk_malloc(lm->emeta_len[0], l_mg->emeta_alloc_type, GFP_KERNEL); @@ -666,12 +664,10 @@ void pblk_gc_exit(struct pblk *pblk) kthread_stop(gc->gc_reader_ts); flush_workqueue(gc->gc_reader_wq); - if (gc->gc_reader_wq) - destroy_workqueue(gc->gc_reader_wq); + destroy_workqueue(gc->gc_reader_wq); flush_workqueue(gc->gc_line_reader_wq); - if (gc->gc_line_reader_wq) - destroy_workqueue(gc->gc_line_reader_wq); + destroy_workqueue(gc->gc_line_reader_wq); if (gc->gc_writer_ts) kthread_stop(gc->gc_writer_ts); diff --git a/drivers/lightnvm/pblk-init.c b/drivers/lightnvm/pblk-init.c index 93d671ca518e..91a5bc2556a3 100644 --- a/drivers/lightnvm/pblk-init.c +++ b/drivers/lightnvm/pblk-init.c @@ -80,7 +80,7 @@ static size_t pblk_trans_map_size(struct pblk *pblk) { int entry_size = 8; - if (pblk->ppaf_bitsize < 32) + if (pblk->addrf_len < 32) entry_size = 4; return entry_size * pblk->rl.nr_secs; @@ -103,7 +103,40 @@ static void pblk_l2p_free(struct pblk *pblk) vfree(pblk->trans_map); } -static int pblk_l2p_init(struct pblk *pblk) +static int pblk_l2p_recover(struct pblk *pblk, bool factory_init) +{ + struct pblk_line *line = NULL; + + if (factory_init) { + pblk_setup_uuid(pblk); + } else { + line = pblk_recov_l2p(pblk); + if (IS_ERR(line)) { + pr_err("pblk: could not recover l2p table\n"); + return -EFAULT; + } + } + +#ifdef CONFIG_NVM_DEBUG + pr_info("pblk init: L2P CRC: %x\n", pblk_l2p_crc(pblk)); +#endif + + /* Free full lines directly as GC has not been started yet */ + pblk_gc_free_full_lines(pblk); + + if (!line) { + /* Configure next line for user data */ + line = pblk_line_get_first_data(pblk); + if (!line) { + pr_err("pblk: line list corrupted\n"); + return -EFAULT; + } + } + + return 0; +} + +static int pblk_l2p_init(struct pblk *pblk, bool factory_init) { sector_t i; struct ppa_addr ppa; @@ -119,7 +152,7 @@ static int pblk_l2p_init(struct pblk *pblk) for (i = 0; i < pblk->rl.nr_secs; i++) pblk_trans_map_set(pblk, i, ppa); - return 0; + return pblk_l2p_recover(pblk, factory_init); } static void pblk_rwb_free(struct pblk *pblk) @@ -146,7 +179,7 @@ static int pblk_rwb_init(struct pblk *pblk) return -ENOMEM; power_size = get_count_order(nr_entries); - power_seg_sz = get_count_order(geo->sec_size); + power_seg_sz = get_count_order(geo->csecs); return pblk_rb_init(&pblk->rwb, entries, power_size, power_seg_sz); } @@ -154,47 +187,103 @@ static int pblk_rwb_init(struct pblk *pblk) /* Minimum pages needed within a lun */ #define ADDR_POOL_SIZE 64 -static int pblk_set_ppaf(struct pblk *pblk) +static int pblk_set_addrf_12(struct nvm_geo *geo, struct nvm_addrf_12 *dst) { - struct nvm_tgt_dev *dev = pblk->dev; - struct nvm_geo *geo = &dev->geo; - struct nvm_addr_format ppaf = geo->ppaf; + struct nvm_addrf_12 *src = (struct nvm_addrf_12 *)&geo->addrf; int power_len; /* Re-calculate channel and lun format to adapt to configuration */ - power_len = get_count_order(geo->nr_chnls); - if (1 << power_len != geo->nr_chnls) { + power_len = get_count_order(geo->num_ch); + if (1 << power_len != geo->num_ch) { pr_err("pblk: supports only power-of-two channel config.\n"); return -EINVAL; } - ppaf.ch_len = power_len; + dst->ch_len = power_len; - power_len = get_count_order(geo->nr_luns); - if (1 << power_len != geo->nr_luns) { + power_len = get_count_order(geo->num_lun); + if (1 << power_len != geo->num_lun) { pr_err("pblk: supports only power-of-two LUN config.\n"); return -EINVAL; } - ppaf.lun_len = power_len; - - pblk->ppaf.sec_offset = 0; - pblk->ppaf.pln_offset = ppaf.sect_len; - pblk->ppaf.ch_offset = pblk->ppaf.pln_offset + ppaf.pln_len; - pblk->ppaf.lun_offset = pblk->ppaf.ch_offset + ppaf.ch_len; - pblk->ppaf.pg_offset = pblk->ppaf.lun_offset + ppaf.lun_len; - pblk->ppaf.blk_offset = pblk->ppaf.pg_offset + ppaf.pg_len; - pblk->ppaf.sec_mask = (1ULL << ppaf.sect_len) - 1; - pblk->ppaf.pln_mask = ((1ULL << ppaf.pln_len) - 1) << - pblk->ppaf.pln_offset; - pblk->ppaf.ch_mask = ((1ULL << ppaf.ch_len) - 1) << - pblk->ppaf.ch_offset; - pblk->ppaf.lun_mask = ((1ULL << ppaf.lun_len) - 1) << - pblk->ppaf.lun_offset; - pblk->ppaf.pg_mask = ((1ULL << ppaf.pg_len) - 1) << - pblk->ppaf.pg_offset; - pblk->ppaf.blk_mask = ((1ULL << ppaf.blk_len) - 1) << - pblk->ppaf.blk_offset; - - pblk->ppaf_bitsize = pblk->ppaf.blk_offset + ppaf.blk_len; + dst->lun_len = power_len; + + dst->blk_len = src->blk_len; + dst->pg_len = src->pg_len; + dst->pln_len = src->pln_len; + dst->sec_len = src->sec_len; + + dst->sec_offset = 0; + dst->pln_offset = dst->sec_len; + dst->ch_offset = dst->pln_offset + dst->pln_len; + dst->lun_offset = dst->ch_offset + dst->ch_len; + dst->pg_offset = dst->lun_offset + dst->lun_len; + dst->blk_offset = dst->pg_offset + dst->pg_len; + + dst->sec_mask = ((1ULL << dst->sec_len) - 1) << dst->sec_offset; + dst->pln_mask = ((1ULL << dst->pln_len) - 1) << dst->pln_offset; + dst->ch_mask = ((1ULL << dst->ch_len) - 1) << dst->ch_offset; + dst->lun_mask = ((1ULL << dst->lun_len) - 1) << dst->lun_offset; + dst->pg_mask = ((1ULL << dst->pg_len) - 1) << dst->pg_offset; + dst->blk_mask = ((1ULL << dst->blk_len) - 1) << dst->blk_offset; + + return dst->blk_offset + src->blk_len; +} + +static int pblk_set_addrf_20(struct nvm_geo *geo, struct nvm_addrf *adst, + struct pblk_addrf *udst) +{ + struct nvm_addrf *src = &geo->addrf; + + adst->ch_len = get_count_order(geo->num_ch); + adst->lun_len = get_count_order(geo->num_lun); + adst->chk_len = src->chk_len; + adst->sec_len = src->sec_len; + + adst->sec_offset = 0; + adst->ch_offset = adst->sec_len; + adst->lun_offset = adst->ch_offset + adst->ch_len; + adst->chk_offset = adst->lun_offset + adst->lun_len; + + adst->sec_mask = ((1ULL << adst->sec_len) - 1) << adst->sec_offset; + adst->chk_mask = ((1ULL << adst->chk_len) - 1) << adst->chk_offset; + adst->lun_mask = ((1ULL << adst->lun_len) - 1) << adst->lun_offset; + adst->ch_mask = ((1ULL << adst->ch_len) - 1) << adst->ch_offset; + + udst->sec_stripe = geo->ws_opt; + udst->ch_stripe = geo->num_ch; + udst->lun_stripe = geo->num_lun; + + udst->sec_lun_stripe = udst->sec_stripe * udst->ch_stripe; + udst->sec_ws_stripe = udst->sec_lun_stripe * udst->lun_stripe; + + return adst->chk_offset + adst->chk_len; +} + +static int pblk_set_addrf(struct pblk *pblk) +{ + struct nvm_tgt_dev *dev = pblk->dev; + struct nvm_geo *geo = &dev->geo; + int mod; + + switch (geo->version) { + case NVM_OCSSD_SPEC_12: + div_u64_rem(geo->clba, pblk->min_write_pgs, &mod); + if (mod) { + pr_err("pblk: bad configuration of sectors/pages\n"); + return -EINVAL; + } + + pblk->addrf_len = pblk_set_addrf_12(geo, (void *)&pblk->addrf); + break; + case NVM_OCSSD_SPEC_20: + pblk->addrf_len = pblk_set_addrf_20(geo, (void *)&pblk->addrf, + &pblk->uaddrf); + break; + default: + pr_err("pblk: OCSSD revision not supported (%d)\n", + geo->version); + return -EINVAL; + } return 0; } @@ -252,16 +341,41 @@ static int pblk_core_init(struct pblk *pblk) { struct nvm_tgt_dev *dev = pblk->dev; struct nvm_geo *geo = &dev->geo; + int max_write_ppas; - pblk->pgs_in_buffer = NVM_MEM_PAGE_WRITE * geo->sec_per_pg * - geo->nr_planes * geo->all_luns; + atomic64_set(&pblk->user_wa, 0); + atomic64_set(&pblk->pad_wa, 0); + atomic64_set(&pblk->gc_wa, 0); + pblk->user_rst_wa = 0; + pblk->pad_rst_wa = 0; + pblk->gc_rst_wa = 0; - if (pblk_init_global_caches(pblk)) + atomic64_set(&pblk->nr_flush, 0); + pblk->nr_flush_rst = 0; + + pblk->pgs_in_buffer = geo->mw_cunits * geo->all_luns; + + pblk->min_write_pgs = geo->ws_opt * (geo->csecs / PAGE_SIZE); + max_write_ppas = pblk->min_write_pgs * geo->all_luns; + pblk->max_write_pgs = min_t(int, max_write_ppas, NVM_MAX_VLBA); + pblk_set_sec_per_write(pblk, pblk->min_write_pgs); + + if (pblk->max_write_pgs > PBLK_MAX_REQ_ADDRS) { + pr_err("pblk: vector list too big(%u > %u)\n", + pblk->max_write_pgs, PBLK_MAX_REQ_ADDRS); + return -EINVAL; + } + + pblk->pad_dist = kzalloc((pblk->min_write_pgs - 1) * sizeof(atomic64_t), + GFP_KERNEL); + if (!pblk->pad_dist) return -ENOMEM; + if (pblk_init_global_caches(pblk)) + goto fail_free_pad_dist; + /* Internal bios can be at most the sectors signaled by the device. */ - pblk->page_bio_pool = mempool_create_page_pool(nvm_max_phys_sects(dev), - 0); + pblk->page_bio_pool = mempool_create_page_pool(NVM_MAX_VLBA, 0); if (!pblk->page_bio_pool) goto free_global_caches; @@ -305,13 +419,11 @@ static int pblk_core_init(struct pblk *pblk) if (!pblk->r_end_wq) goto free_bb_wq; - if (pblk_set_ppaf(pblk)) - goto free_r_end_wq; - - if (pblk_rwb_init(pblk)) + if (pblk_set_addrf(pblk)) goto free_r_end_wq; INIT_LIST_HEAD(&pblk->compl_list); + return 0; free_r_end_wq: @@ -334,6 +446,8 @@ free_page_bio_pool: mempool_destroy(pblk->page_bio_pool); free_global_caches: pblk_free_global_caches(pblk); +fail_free_pad_dist: + kfree(pblk->pad_dist); return -ENOMEM; } @@ -355,20 +469,31 @@ static void pblk_core_free(struct pblk *pblk) mempool_destroy(pblk->e_rq_pool); mempool_destroy(pblk->w_rq_pool); - pblk_rwb_free(pblk); - pblk_free_global_caches(pblk); + kfree(pblk->pad_dist); } -static void pblk_luns_free(struct pblk *pblk) +static void pblk_line_mg_free(struct pblk *pblk) { - kfree(pblk->luns); + struct pblk_line_mgmt *l_mg = &pblk->l_mg; + int i; + + kfree(l_mg->bb_template); + kfree(l_mg->bb_aux); + kfree(l_mg->vsc_list); + + for (i = 0; i < PBLK_DATA_LINES; i++) { + kfree(l_mg->sline_meta[i]); + pblk_mfree(l_mg->eline_meta[i]->buf, l_mg->emeta_alloc_type); + kfree(l_mg->eline_meta[i]); + } } -static void pblk_free_line_bitmaps(struct pblk_line *line) +static void pblk_line_meta_free(struct pblk_line *line) { kfree(line->blk_bitmap); kfree(line->erase_bitmap); + kfree(line->chks); } static void pblk_lines_free(struct pblk *pblk) @@ -382,40 +507,21 @@ static void pblk_lines_free(struct pblk *pblk) line = &pblk->lines[i]; pblk_line_free(pblk, line); - pblk_free_line_bitmaps(line); + pblk_line_meta_free(line); } spin_unlock(&l_mg->free_lock); -} - -static void pblk_line_meta_free(struct pblk *pblk) -{ - struct pblk_line_mgmt *l_mg = &pblk->l_mg; - int i; - - kfree(l_mg->bb_template); - kfree(l_mg->bb_aux); - kfree(l_mg->vsc_list); - for (i = 0; i < PBLK_DATA_LINES; i++) { - kfree(l_mg->sline_meta[i]); - pblk_mfree(l_mg->eline_meta[i]->buf, l_mg->emeta_alloc_type); - kfree(l_mg->eline_meta[i]); - } + pblk_line_mg_free(pblk); + kfree(pblk->luns); kfree(pblk->lines); } -static int pblk_bb_discovery(struct nvm_tgt_dev *dev, struct pblk_lun *rlun) +static int pblk_bb_get_tbl(struct nvm_tgt_dev *dev, struct pblk_lun *rlun, + u8 *blks, int nr_blks) { - struct nvm_geo *geo = &dev->geo; struct ppa_addr ppa; - u8 *blks; - int nr_blks, ret; - - nr_blks = geo->nr_chks * geo->plane_mode; - blks = kmalloc(nr_blks, GFP_KERNEL); - if (!blks) - return -ENOMEM; + int ret; ppa.ppa = 0; ppa.g.ch = rlun->bppa.g.ch; @@ -423,69 +529,64 @@ static int pblk_bb_discovery(struct nvm_tgt_dev *dev, struct pblk_lun *rlun) ret = nvm_get_tgt_bb_tbl(dev, ppa, blks); if (ret) - goto out; + return ret; nr_blks = nvm_bb_tbl_fold(dev->parent, blks, nr_blks); - if (nr_blks < 0) { - ret = nr_blks; - goto out; - } - - rlun->bb_list = blks; + if (nr_blks < 0) + return -EIO; return 0; -out: - kfree(blks); - return ret; } -static int pblk_bb_line(struct pblk *pblk, struct pblk_line *line, - int blk_per_line) +static void *pblk_bb_get_meta(struct pblk *pblk) { struct nvm_tgt_dev *dev = pblk->dev; struct nvm_geo *geo = &dev->geo; - struct pblk_lun *rlun; - int bb_cnt = 0; - int i; + u8 *meta; + int i, nr_blks, blk_per_lun; + int ret; - for (i = 0; i < blk_per_line; i++) { - rlun = &pblk->luns[i]; - if (rlun->bb_list[line->id] == NVM_BLK_T_FREE) - continue; + blk_per_lun = geo->num_chk * geo->pln_mode; + nr_blks = blk_per_lun * geo->all_luns; + + meta = kmalloc(nr_blks, GFP_KERNEL); + if (!meta) + return ERR_PTR(-ENOMEM); + + for (i = 0; i < geo->all_luns; i++) { + struct pblk_lun *rlun = &pblk->luns[i]; + u8 *meta_pos = meta + i * blk_per_lun; - set_bit(pblk_ppa_to_pos(geo, rlun->bppa), line->blk_bitmap); - bb_cnt++; + ret = pblk_bb_get_tbl(dev, rlun, meta_pos, blk_per_lun); + if (ret) { + kfree(meta); + return ERR_PTR(-EIO); + } } - return bb_cnt; + return meta; } -static int pblk_alloc_line_bitmaps(struct pblk *pblk, struct pblk_line *line) +static void *pblk_chunk_get_meta(struct pblk *pblk) { - struct pblk_line_meta *lm = &pblk->lm; - - line->blk_bitmap = kzalloc(lm->blk_bitmap_len, GFP_KERNEL); - if (!line->blk_bitmap) - return -ENOMEM; - - line->erase_bitmap = kzalloc(lm->blk_bitmap_len, GFP_KERNEL); - if (!line->erase_bitmap) { - kfree(line->blk_bitmap); - return -ENOMEM; - } + struct nvm_tgt_dev *dev = pblk->dev; + struct nvm_geo *geo = &dev->geo; - return 0; + if (geo->version == NVM_OCSSD_SPEC_12) + return pblk_bb_get_meta(pblk); + else + return pblk_chunk_get_info(pblk); } -static int pblk_luns_init(struct pblk *pblk, struct ppa_addr *luns) +static int pblk_luns_init(struct pblk *pblk) { struct nvm_tgt_dev *dev = pblk->dev; struct nvm_geo *geo = &dev->geo; struct pblk_lun *rlun; - int i, ret; + int i; /* TODO: Implement unbalanced LUN support */ - if (geo->nr_luns < 0) { + if (geo->num_lun < 0) { pr_err("pblk: unbalanced LUN config.\n"); return -EINVAL; } @@ -497,58 +598,19 @@ static int pblk_luns_init(struct pblk *pblk, struct ppa_addr *luns) for (i = 0; i < geo->all_luns; i++) { /* Stripe across channels */ - int ch = i % geo->nr_chnls; - int lun_raw = i / geo->nr_chnls; - int lunid = lun_raw + ch * geo->nr_luns; + int ch = i % geo->num_ch; + int lun_raw = i / geo->num_ch; + int lunid = lun_raw + ch * geo->num_lun; rlun = &pblk->luns[i]; - rlun->bppa = luns[lunid]; + rlun->bppa = dev->luns[lunid]; sema_init(&rlun->wr_sem, 1); - - ret = pblk_bb_discovery(dev, rlun); - if (ret) { - while (--i >= 0) - kfree(pblk->luns[i].bb_list); - return ret; - } } return 0; } -static int pblk_lines_configure(struct pblk *pblk, int flags) -{ - struct pblk_line *line = NULL; - int ret = 0; - - if (!(flags & NVM_TARGET_FACTORY)) { - line = pblk_recov_l2p(pblk); - if (IS_ERR(line)) { - pr_err("pblk: could not recover l2p table\n"); - ret = -EFAULT; - } - } - -#ifdef CONFIG_NVM_DEBUG - pr_info("pblk init: L2P CRC: %x\n", pblk_l2p_crc(pblk)); -#endif - - /* Free full lines directly as GC has not been started yet */ - pblk_gc_free_full_lines(pblk); - - if (!line) { - /* Configure next line for user data */ - line = pblk_line_get_first_data(pblk); - if (!line) { - pr_err("pblk: line list corrupted\n"); - ret = -EFAULT; - } - } - - return ret; -} - /* See comment over struct line_emeta definition */ static unsigned int calc_emeta_len(struct pblk *pblk) { @@ -559,19 +621,19 @@ static unsigned int calc_emeta_len(struct pblk *pblk) /* Round to sector size so that lba_list starts on its own sector */ lm->emeta_sec[1] = DIV_ROUND_UP( - sizeof(struct line_emeta) + lm->blk_bitmap_len, - geo->sec_size); - lm->emeta_len[1] = lm->emeta_sec[1] * geo->sec_size; + sizeof(struct line_emeta) + lm->blk_bitmap_len + + sizeof(struct wa_counters), geo->csecs); + lm->emeta_len[1] = lm->emeta_sec[1] * geo->csecs; /* Round to sector size so that vsc_list starts on its own sector */ lm->dsec_per_line = lm->sec_per_line - lm->emeta_sec[0]; lm->emeta_sec[2] = DIV_ROUND_UP(lm->dsec_per_line * sizeof(u64), - geo->sec_size); - lm->emeta_len[2] = lm->emeta_sec[2] * geo->sec_size; + geo->csecs); + lm->emeta_len[2] = lm->emeta_sec[2] * geo->csecs; lm->emeta_sec[3] = DIV_ROUND_UP(l_mg->nr_lines * sizeof(u32), - geo->sec_size); - lm->emeta_len[3] = lm->emeta_sec[3] * geo->sec_size; + geo->csecs); + lm->emeta_len[3] = lm->emeta_sec[3] * geo->csecs; lm->vsc_list_len = l_mg->nr_lines * sizeof(u32); @@ -602,23 +664,211 @@ static void pblk_set_provision(struct pblk *pblk, long nr_free_blks) * on user capacity consider only provisioned blocks */ pblk->rl.total_blocks = nr_free_blks; - pblk->rl.nr_secs = nr_free_blks * geo->sec_per_chk; + pblk->rl.nr_secs = nr_free_blks * geo->clba; /* Consider sectors used for metadata */ sec_meta = (lm->smeta_sec + lm->emeta_sec[0]) * l_mg->nr_free_lines; - blk_meta = DIV_ROUND_UP(sec_meta, geo->sec_per_chk); + blk_meta = DIV_ROUND_UP(sec_meta, geo->clba); - pblk->capacity = (provisioned - blk_meta) * geo->sec_per_chk; + pblk->capacity = (provisioned - blk_meta) * geo->clba; atomic_set(&pblk->rl.free_blocks, nr_free_blks); atomic_set(&pblk->rl.free_user_blocks, nr_free_blks); } -static int pblk_lines_alloc_metadata(struct pblk *pblk) +static int pblk_setup_line_meta_12(struct pblk *pblk, struct pblk_line *line, + void *chunk_meta) +{ + struct nvm_tgt_dev *dev = pblk->dev; + struct nvm_geo *geo = &dev->geo; + struct pblk_line_meta *lm = &pblk->lm; + int i, chk_per_lun, nr_bad_chks = 0; + + chk_per_lun = geo->num_chk * geo->pln_mode; + + for (i = 0; i < lm->blk_per_line; i++) { + struct pblk_lun *rlun = &pblk->luns[i]; + struct nvm_chk_meta *chunk; + int pos = pblk_ppa_to_pos(geo, rlun->bppa); + u8 *lun_bb_meta = chunk_meta + pos * chk_per_lun; + + chunk = &line->chks[pos]; + + /* + * In 1.2 spec. chunk state is not persisted by the device. Thus + * some of the values are reset each time pblk is instantiated. + */ + if (lun_bb_meta[line->id] == NVM_BLK_T_FREE) + chunk->state = NVM_CHK_ST_FREE; + else + chunk->state = NVM_CHK_ST_OFFLINE; + + chunk->type = NVM_CHK_TP_W_SEQ; + chunk->wi = 0; + chunk->slba = -1; + chunk->cnlb = geo->clba; + chunk->wp = 0; + + if (!(chunk->state & NVM_CHK_ST_OFFLINE)) + continue; + + set_bit(pos, line->blk_bitmap); + nr_bad_chks++; + } + + return nr_bad_chks; +} + +static int pblk_setup_line_meta_20(struct pblk *pblk, struct pblk_line *line, + struct nvm_chk_meta *meta) +{ + struct nvm_tgt_dev *dev = pblk->dev; + struct nvm_geo *geo = &dev->geo; + struct pblk_line_meta *lm = &pblk->lm; + int i, nr_bad_chks = 0; + + for (i = 0; i < lm->blk_per_line; i++) { + struct pblk_lun *rlun = &pblk->luns[i]; + struct nvm_chk_meta *chunk; + struct nvm_chk_meta *chunk_meta; + struct ppa_addr ppa; + int pos; + + ppa = rlun->bppa; + pos = pblk_ppa_to_pos(geo, ppa); + chunk = &line->chks[pos]; + + ppa.m.chk = line->id; + chunk_meta = pblk_chunk_get_off(pblk, meta, ppa); + + chunk->state = chunk_meta->state; + chunk->type = chunk_meta->type; + chunk->wi = chunk_meta->wi; + chunk->slba = chunk_meta->slba; + chunk->cnlb = chunk_meta->cnlb; + chunk->wp = chunk_meta->wp; + + if (!(chunk->state & NVM_CHK_ST_OFFLINE)) + continue; + + if (chunk->type & NVM_CHK_TP_SZ_SPEC) { + WARN_ONCE(1, "pblk: custom-sized chunks unsupported\n"); + continue; + } + + set_bit(pos, line->blk_bitmap); + nr_bad_chks++; + } + + return nr_bad_chks; +} + +static long pblk_setup_line_meta(struct pblk *pblk, struct pblk_line *line, + void *chunk_meta, int line_id) { + struct nvm_tgt_dev *dev = pblk->dev; + struct nvm_geo *geo = &dev->geo; struct pblk_line_mgmt *l_mg = &pblk->l_mg; struct pblk_line_meta *lm = &pblk->lm; - int i; + long nr_bad_chks, chk_in_line; + + line->pblk = pblk; + line->id = line_id; + line->type = PBLK_LINETYPE_FREE; + line->state = PBLK_LINESTATE_NEW; + line->gc_group = PBLK_LINEGC_NONE; + line->vsc = &l_mg->vsc_list[line_id]; + spin_lock_init(&line->lock); + + if (geo->version == NVM_OCSSD_SPEC_12) + nr_bad_chks = pblk_setup_line_meta_12(pblk, line, chunk_meta); + else + nr_bad_chks = pblk_setup_line_meta_20(pblk, line, chunk_meta); + + chk_in_line = lm->blk_per_line - nr_bad_chks; + if (nr_bad_chks < 0 || nr_bad_chks > lm->blk_per_line || + chk_in_line < lm->min_blk_line) { + line->state = PBLK_LINESTATE_BAD; + list_add_tail(&line->list, &l_mg->bad_list); + return 0; + } + + atomic_set(&line->blk_in_line, chk_in_line); + list_add_tail(&line->list, &l_mg->free_list); + l_mg->nr_free_lines++; + + return chk_in_line; +} + +static int pblk_alloc_line_meta(struct pblk *pblk, struct pblk_line *line) +{ + struct pblk_line_meta *lm = &pblk->lm; + + line->blk_bitmap = kzalloc(lm->blk_bitmap_len, GFP_KERNEL); + if (!line->blk_bitmap) + return -ENOMEM; + + line->erase_bitmap = kzalloc(lm->blk_bitmap_len, GFP_KERNEL); + if (!line->erase_bitmap) { + kfree(line->blk_bitmap); + return -ENOMEM; + } + + line->chks = kmalloc(lm->blk_per_line * sizeof(struct nvm_chk_meta), + GFP_KERNEL); + if (!line->chks) { + kfree(line->erase_bitmap); + kfree(line->blk_bitmap); + return -ENOMEM; + } + + return 0; +} + +static int pblk_line_mg_init(struct pblk *pblk) +{ + struct nvm_tgt_dev *dev = pblk->dev; + struct nvm_geo *geo = &dev->geo; + struct pblk_line_mgmt *l_mg = &pblk->l_mg; + struct pblk_line_meta *lm = &pblk->lm; + int i, bb_distance; + + l_mg->nr_lines = geo->num_chk; + l_mg->log_line = l_mg->data_line = NULL; + l_mg->l_seq_nr = l_mg->d_seq_nr = 0; + l_mg->nr_free_lines = 0; + bitmap_zero(&l_mg->meta_bitmap, PBLK_DATA_LINES); + + INIT_LIST_HEAD(&l_mg->free_list); + INIT_LIST_HEAD(&l_mg->corrupt_list); + INIT_LIST_HEAD(&l_mg->bad_list); + INIT_LIST_HEAD(&l_mg->gc_full_list); + INIT_LIST_HEAD(&l_mg->gc_high_list); + INIT_LIST_HEAD(&l_mg->gc_mid_list); + INIT_LIST_HEAD(&l_mg->gc_low_list); + INIT_LIST_HEAD(&l_mg->gc_empty_list); + + INIT_LIST_HEAD(&l_mg->emeta_list); + + l_mg->gc_lists[0] = &l_mg->gc_high_list; + l_mg->gc_lists[1] = &l_mg->gc_mid_list; + l_mg->gc_lists[2] = &l_mg->gc_low_list; + + spin_lock_init(&l_mg->free_lock); + spin_lock_init(&l_mg->close_lock); + spin_lock_init(&l_mg->gc_lock); + + l_mg->vsc_list = kcalloc(l_mg->nr_lines, sizeof(__le32), GFP_KERNEL); + if (!l_mg->vsc_list) + goto fail; + + l_mg->bb_template = kzalloc(lm->sec_bitmap_len, GFP_KERNEL); + if (!l_mg->bb_template) + goto fail_free_vsc_list; + + l_mg->bb_aux = kzalloc(lm->sec_bitmap_len, GFP_KERNEL); + if (!l_mg->bb_aux) + goto fail_free_bb_template; /* smeta is always small enough to fit on a kmalloc memory allocation, * emeta depends on the number of LUNs allocated to the pblk instance @@ -664,13 +914,13 @@ static int pblk_lines_alloc_metadata(struct pblk *pblk) } } - l_mg->vsc_list = kcalloc(l_mg->nr_lines, sizeof(__le32), GFP_KERNEL); - if (!l_mg->vsc_list) - goto fail_free_emeta; - for (i = 0; i < l_mg->nr_lines; i++) l_mg->vsc_list[i] = cpu_to_le32(EMPTY_ENTRY); + bb_distance = (geo->all_luns) * geo->ws_opt; + for (i = 0; i < lm->sec_per_line; i += bb_distance) + bitmap_set(l_mg->bb_template, i, geo->ws_opt); + return 0; fail_free_emeta: @@ -681,50 +931,27 @@ fail_free_emeta: kfree(l_mg->eline_meta[i]->buf); kfree(l_mg->eline_meta[i]); } - fail_free_smeta: for (i = 0; i < PBLK_DATA_LINES; i++) kfree(l_mg->sline_meta[i]); - + kfree(l_mg->bb_aux); +fail_free_bb_template: + kfree(l_mg->bb_template); +fail_free_vsc_list: + kfree(l_mg->vsc_list); +fail: return -ENOMEM; } -static int pblk_lines_init(struct pblk *pblk) +static int pblk_line_meta_init(struct pblk *pblk) { struct nvm_tgt_dev *dev = pblk->dev; struct nvm_geo *geo = &dev->geo; - struct pblk_line_mgmt *l_mg = &pblk->l_mg; struct pblk_line_meta *lm = &pblk->lm; - struct pblk_line *line; unsigned int smeta_len, emeta_len; - long nr_bad_blks, nr_free_blks; - int bb_distance, max_write_ppas, mod; - int i, ret; - - pblk->min_write_pgs = geo->sec_per_pl * (geo->sec_size / PAGE_SIZE); - max_write_ppas = pblk->min_write_pgs * geo->all_luns; - pblk->max_write_pgs = (max_write_ppas < nvm_max_phys_sects(dev)) ? - max_write_ppas : nvm_max_phys_sects(dev); - pblk_set_sec_per_write(pblk, pblk->min_write_pgs); - - if (pblk->max_write_pgs > PBLK_MAX_REQ_ADDRS) { - pr_err("pblk: cannot support device max_phys_sect\n"); - return -EINVAL; - } - - div_u64_rem(geo->sec_per_chk, pblk->min_write_pgs, &mod); - if (mod) { - pr_err("pblk: bad configuration of sectors/pages\n"); - return -EINVAL; - } - - l_mg->nr_lines = geo->nr_chks; - l_mg->log_line = l_mg->data_line = NULL; - l_mg->l_seq_nr = l_mg->d_seq_nr = 0; - l_mg->nr_free_lines = 0; - bitmap_zero(&l_mg->meta_bitmap, PBLK_DATA_LINES); + int i; - lm->sec_per_line = geo->sec_per_chk * geo->all_luns; + lm->sec_per_line = geo->clba * geo->all_luns; lm->blk_per_line = geo->all_luns; lm->blk_bitmap_len = BITS_TO_LONGS(geo->all_luns) * sizeof(long); lm->sec_bitmap_len = BITS_TO_LONGS(lm->sec_per_line) * sizeof(long); @@ -738,8 +965,8 @@ static int pblk_lines_init(struct pblk *pblk) */ i = 1; add_smeta_page: - lm->smeta_sec = i * geo->sec_per_pl; - lm->smeta_len = lm->smeta_sec * geo->sec_size; + lm->smeta_sec = i * geo->ws_opt; + lm->smeta_len = lm->smeta_sec * geo->csecs; smeta_len = sizeof(struct line_smeta) + lm->lun_bitmap_len; if (smeta_len > lm->smeta_len) { @@ -752,8 +979,8 @@ add_smeta_page: */ i = 1; add_emeta_page: - lm->emeta_sec[0] = i * geo->sec_per_pl; - lm->emeta_len[0] = lm->emeta_sec[0] * geo->sec_size; + lm->emeta_sec[0] = i * geo->ws_opt; + lm->emeta_len[0] = lm->emeta_sec[0] * geo->csecs; emeta_len = calc_emeta_len(pblk); if (emeta_len > lm->emeta_len[0]) { @@ -766,119 +993,75 @@ add_emeta_page: lm->min_blk_line = 1; if (geo->all_luns > 1) lm->min_blk_line += DIV_ROUND_UP(lm->smeta_sec + - lm->emeta_sec[0], geo->sec_per_chk); + lm->emeta_sec[0], geo->clba); if (lm->min_blk_line > lm->blk_per_line) { pr_err("pblk: config. not supported. Min. LUN in line:%d\n", lm->blk_per_line); - ret = -EINVAL; - goto fail; - } - - ret = pblk_lines_alloc_metadata(pblk); - if (ret) - goto fail; - - l_mg->bb_template = kzalloc(lm->sec_bitmap_len, GFP_KERNEL); - if (!l_mg->bb_template) { - ret = -ENOMEM; - goto fail_free_meta; + return -EINVAL; } - l_mg->bb_aux = kzalloc(lm->sec_bitmap_len, GFP_KERNEL); - if (!l_mg->bb_aux) { - ret = -ENOMEM; - goto fail_free_bb_template; - } + return 0; +} - bb_distance = (geo->all_luns) * geo->sec_per_pl; - for (i = 0; i < lm->sec_per_line; i += bb_distance) - bitmap_set(l_mg->bb_template, i, geo->sec_per_pl); +static int pblk_lines_init(struct pblk *pblk) +{ + struct pblk_line_mgmt *l_mg = &pblk->l_mg; + struct pblk_line *line; + void *chunk_meta; + long nr_free_chks = 0; + int i, ret; - INIT_LIST_HEAD(&l_mg->free_list); - INIT_LIST_HEAD(&l_mg->corrupt_list); - INIT_LIST_HEAD(&l_mg->bad_list); - INIT_LIST_HEAD(&l_mg->gc_full_list); - INIT_LIST_HEAD(&l_mg->gc_high_list); - INIT_LIST_HEAD(&l_mg->gc_mid_list); - INIT_LIST_HEAD(&l_mg->gc_low_list); - INIT_LIST_HEAD(&l_mg->gc_empty_list); + ret = pblk_line_meta_init(pblk); + if (ret) + return ret; - INIT_LIST_HEAD(&l_mg->emeta_list); + ret = pblk_line_mg_init(pblk); + if (ret) + return ret; - l_mg->gc_lists[0] = &l_mg->gc_high_list; - l_mg->gc_lists[1] = &l_mg->gc_mid_list; - l_mg->gc_lists[2] = &l_mg->gc_low_list; + ret = pblk_luns_init(pblk); + if (ret) + goto fail_free_meta; - spin_lock_init(&l_mg->free_lock); - spin_lock_init(&l_mg->close_lock); - spin_lock_init(&l_mg->gc_lock); + chunk_meta = pblk_chunk_get_meta(pblk); + if (IS_ERR(chunk_meta)) { + ret = PTR_ERR(chunk_meta); + goto fail_free_luns; + } pblk->lines = kcalloc(l_mg->nr_lines, sizeof(struct pblk_line), GFP_KERNEL); if (!pblk->lines) { ret = -ENOMEM; - goto fail_free_bb_aux; + goto fail_free_chunk_meta; } - nr_free_blks = 0; for (i = 0; i < l_mg->nr_lines; i++) { - int blk_in_line; - line = &pblk->lines[i]; - line->pblk = pblk; - line->id = i; - line->type = PBLK_LINETYPE_FREE; - line->state = PBLK_LINESTATE_FREE; - line->gc_group = PBLK_LINEGC_NONE; - line->vsc = &l_mg->vsc_list[i]; - spin_lock_init(&line->lock); - - ret = pblk_alloc_line_bitmaps(pblk, line); + ret = pblk_alloc_line_meta(pblk, line); if (ret) goto fail_free_lines; - nr_bad_blks = pblk_bb_line(pblk, line, lm->blk_per_line); - if (nr_bad_blks < 0 || nr_bad_blks > lm->blk_per_line) { - pblk_free_line_bitmaps(line); - ret = -EINVAL; - goto fail_free_lines; - } - - blk_in_line = lm->blk_per_line - nr_bad_blks; - if (blk_in_line < lm->min_blk_line) { - line->state = PBLK_LINESTATE_BAD; - list_add_tail(&line->list, &l_mg->bad_list); - continue; - } - - nr_free_blks += blk_in_line; - atomic_set(&line->blk_in_line, blk_in_line); - - l_mg->nr_free_lines++; - list_add_tail(&line->list, &l_mg->free_list); + nr_free_chks += pblk_setup_line_meta(pblk, line, chunk_meta, i); } - pblk_set_provision(pblk, nr_free_blks); - - /* Cleanup per-LUN bad block lists - managed within lines on run-time */ - for (i = 0; i < geo->all_luns; i++) - kfree(pblk->luns[i].bb_list); + pblk_set_provision(pblk, nr_free_chks); + kfree(chunk_meta); return 0; + fail_free_lines: while (--i >= 0) - pblk_free_line_bitmaps(&pblk->lines[i]); -fail_free_bb_aux: - kfree(l_mg->bb_aux); -fail_free_bb_template: - kfree(l_mg->bb_template); + pblk_line_meta_free(&pblk->lines[i]); + kfree(pblk->lines); +fail_free_chunk_meta: + kfree(chunk_meta); +fail_free_luns: + kfree(pblk->luns); fail_free_meta: - pblk_line_meta_free(pblk); -fail: - for (i = 0; i < geo->all_luns; i++) - kfree(pblk->luns[i].bb_list); + pblk_line_mg_free(pblk); return ret; } @@ -912,18 +1095,17 @@ static void pblk_writer_stop(struct pblk *pblk) WARN(pblk_rb_sync_count(&pblk->rwb), "Stopping not fully synced write buffer\n"); + del_timer_sync(&pblk->wtimer); if (pblk->writer_ts) kthread_stop(pblk->writer_ts); - del_timer(&pblk->wtimer); } static void pblk_free(struct pblk *pblk) { - pblk_luns_free(pblk); pblk_lines_free(pblk); - pblk_line_meta_free(pblk); - pblk_core_free(pblk); pblk_l2p_free(pblk); + pblk_rwb_free(pblk); + pblk_core_free(pblk); kfree(pblk); } @@ -970,9 +1152,17 @@ static void *pblk_init(struct nvm_tgt_dev *dev, struct gendisk *tdisk, struct pblk *pblk; int ret; - if (dev->identity.dom & NVM_RSP_L2P) { + /* pblk supports 1.2 and 2.0 versions */ + if (!(geo->version == NVM_OCSSD_SPEC_12 || + geo->version == NVM_OCSSD_SPEC_20)) { + pr_err("pblk: OCSSD version not supported (%u)\n", + geo->version); + return ERR_PTR(-EINVAL); + } + + if (geo->version == NVM_OCSSD_SPEC_12 && geo->dom & NVM_RSP_L2P) { pr_err("pblk: host-side L2P table not supported. (%x)\n", - dev->identity.dom); + geo->dom); return ERR_PTR(-EINVAL); } @@ -988,14 +1178,10 @@ static void *pblk_init(struct nvm_tgt_dev *dev, struct gendisk *tdisk, spin_lock_init(&pblk->trans_lock); spin_lock_init(&pblk->lock); - if (flags & NVM_TARGET_FACTORY) - pblk_setup_uuid(pblk); - #ifdef CONFIG_NVM_DEBUG atomic_long_set(&pblk->inflight_writes, 0); atomic_long_set(&pblk->padded_writes, 0); atomic_long_set(&pblk->padded_wb, 0); - atomic_long_set(&pblk->nr_flush, 0); atomic_long_set(&pblk->req_writes, 0); atomic_long_set(&pblk->sub_writes, 0); atomic_long_set(&pblk->sync_writes, 0); @@ -1015,41 +1201,35 @@ static void *pblk_init(struct nvm_tgt_dev *dev, struct gendisk *tdisk, atomic_long_set(&pblk->write_failed, 0); atomic_long_set(&pblk->erase_failed, 0); - ret = pblk_luns_init(pblk, dev->luns); + ret = pblk_core_init(pblk); if (ret) { - pr_err("pblk: could not initialize luns\n"); + pr_err("pblk: could not initialize core\n"); goto fail; } ret = pblk_lines_init(pblk); if (ret) { pr_err("pblk: could not initialize lines\n"); - goto fail_free_luns; + goto fail_free_core; } - ret = pblk_core_init(pblk); + ret = pblk_rwb_init(pblk); if (ret) { - pr_err("pblk: could not initialize core\n"); - goto fail_free_line_meta; + pr_err("pblk: could not initialize write buffer\n"); + goto fail_free_lines; } - ret = pblk_l2p_init(pblk); + ret = pblk_l2p_init(pblk, flags & NVM_TARGET_FACTORY); if (ret) { pr_err("pblk: could not initialize maps\n"); - goto fail_free_core; - } - - ret = pblk_lines_configure(pblk, flags); - if (ret) { - pr_err("pblk: could not configure lines\n"); - goto fail_free_l2p; + goto fail_free_rwb; } ret = pblk_writer_init(pblk); if (ret) { if (ret != -EINTR) pr_err("pblk: could not initialize write thread\n"); - goto fail_free_lines; + goto fail_free_l2p; } ret = pblk_gc_init(pblk); @@ -1064,10 +1244,10 @@ static void *pblk_init(struct nvm_tgt_dev *dev, struct gendisk *tdisk, blk_queue_write_cache(tqueue, true, false); - tqueue->limits.discard_granularity = geo->sec_per_chk * geo->sec_size; + tqueue->limits.discard_granularity = geo->clba * geo->csecs; tqueue->limits.discard_alignment = 0; blk_queue_max_discard_sectors(tqueue, UINT_MAX >> 9); - queue_flag_set_unlocked(QUEUE_FLAG_DISCARD, tqueue); + blk_queue_flag_set(QUEUE_FLAG_DISCARD, tqueue); pr_info("pblk(%s): luns:%u, lines:%d, secs:%llu, buf entries:%u\n", tdisk->disk_name, @@ -1084,16 +1264,14 @@ static void *pblk_init(struct nvm_tgt_dev *dev, struct gendisk *tdisk, fail_stop_writer: pblk_writer_stop(pblk); -fail_free_lines: - pblk_lines_free(pblk); fail_free_l2p: pblk_l2p_free(pblk); +fail_free_rwb: + pblk_rwb_free(pblk); +fail_free_lines: + pblk_lines_free(pblk); fail_free_core: pblk_core_free(pblk); -fail_free_line_meta: - pblk_line_meta_free(pblk); -fail_free_luns: - pblk_luns_free(pblk); fail: kfree(pblk); return ERR_PTR(ret); diff --git a/drivers/lightnvm/pblk-map.c b/drivers/lightnvm/pblk-map.c index 7445e6430c52..20dbaa89c9df 100644 --- a/drivers/lightnvm/pblk-map.c +++ b/drivers/lightnvm/pblk-map.c @@ -65,6 +65,8 @@ static void pblk_map_page_data(struct pblk *pblk, unsigned int sentry, lba_list[paddr] = cpu_to_le64(w_ctx->lba); if (lba_list[paddr] != addr_empty) line->nr_valid_lbas++; + else + atomic64_inc(&pblk->pad_wa); } else { lba_list[paddr] = meta_list[i].lba = addr_empty; __pblk_map_invalidate(pblk, line, paddr); @@ -125,7 +127,7 @@ void pblk_map_erase_rq(struct pblk *pblk, struct nvm_rq *rqd, atomic_dec(&e_line->left_eblks); *erase_ppa = rqd->ppa_list[i]; - erase_ppa->g.blk = e_line->id; + erase_ppa->a.blk = e_line->id; spin_unlock(&e_line->lock); @@ -166,6 +168,6 @@ retry: set_bit(bit, e_line->erase_bitmap); atomic_dec(&e_line->left_eblks); *erase_ppa = pblk->luns[bit].bppa; /* set ch and lun */ - erase_ppa->g.blk = e_line->id; + erase_ppa->a.blk = e_line->id; } } diff --git a/drivers/lightnvm/pblk-rb.c b/drivers/lightnvm/pblk-rb.c index ec8fc314646b..52fdd85dbc97 100644 --- a/drivers/lightnvm/pblk-rb.c +++ b/drivers/lightnvm/pblk-rb.c @@ -355,10 +355,13 @@ static int pblk_rb_flush_point_set(struct pblk_rb *rb, struct bio *bio, struct pblk_rb_entry *entry; unsigned int sync, flush_point; + pblk_rb_sync_init(rb, NULL); sync = READ_ONCE(rb->sync); - if (pos == sync) + if (pos == sync) { + pblk_rb_sync_end(rb, NULL); return 0; + } #ifdef CONFIG_NVM_DEBUG atomic_inc(&rb->inflight_flush_point); @@ -367,8 +370,6 @@ static int pblk_rb_flush_point_set(struct pblk_rb *rb, struct bio *bio, flush_point = (pos == 0) ? (rb->nr_entries - 1) : (pos - 1); entry = &rb->entries[flush_point]; - pblk_rb_sync_init(rb, NULL); - /* Protect flush points */ smp_store_release(&rb->flush_point, flush_point); @@ -437,9 +438,7 @@ static int pblk_rb_may_write_flush(struct pblk_rb *rb, unsigned int nr_entries, if (bio->bi_opf & REQ_PREFLUSH) { struct pblk *pblk = container_of(rb, struct pblk, rwb); -#ifdef CONFIG_NVM_DEBUG - atomic_long_inc(&pblk->nr_flush); -#endif + atomic64_inc(&pblk->nr_flush); if (pblk_rb_flush_point_set(&pblk->rwb, bio, mem)) *io_ret = NVM_IO_OK; } @@ -620,11 +619,17 @@ try: pr_err("pblk: could not pad page in write bio\n"); return NVM_IO_ERR; } + + if (pad < pblk->min_write_pgs) + atomic64_inc(&pblk->pad_dist[pad - 1]); + else + pr_warn("pblk: padding more than min. sectors\n"); + + atomic64_add(pad, &pblk->pad_wa); } #ifdef CONFIG_NVM_DEBUG - atomic_long_add(pad, &((struct pblk *) - (container_of(rb, struct pblk, rwb)))->padded_writes); + atomic_long_add(pad, &pblk->padded_writes); #endif return NVM_IO_OK; diff --git a/drivers/lightnvm/pblk-read.c b/drivers/lightnvm/pblk-read.c index 2f761283f43e..9eee10f69df0 100644 --- a/drivers/lightnvm/pblk-read.c +++ b/drivers/lightnvm/pblk-read.c @@ -563,7 +563,7 @@ int pblk_submit_read_gc(struct pblk *pblk, struct pblk_gc_rq *gc_rq) if (!(gc_rq->secs_to_gc)) goto out; - data_len = (gc_rq->secs_to_gc) * geo->sec_size; + data_len = (gc_rq->secs_to_gc) * geo->csecs; bio = pblk_bio_map_addr(pblk, gc_rq->data, gc_rq->secs_to_gc, data_len, PBLK_VMALLOC_META, GFP_KERNEL); if (IS_ERR(bio)) { diff --git a/drivers/lightnvm/pblk-recovery.c b/drivers/lightnvm/pblk-recovery.c index 1d5e961bf5e0..3e079c2afa6e 100644 --- a/drivers/lightnvm/pblk-recovery.c +++ b/drivers/lightnvm/pblk-recovery.c @@ -21,17 +21,15 @@ void pblk_submit_rec(struct work_struct *work) struct pblk_rec_ctx *recovery = container_of(work, struct pblk_rec_ctx, ws_rec); struct pblk *pblk = recovery->pblk; - struct nvm_tgt_dev *dev = pblk->dev; struct nvm_rq *rqd = recovery->rqd; struct pblk_c_ctx *c_ctx = nvm_rq_to_pdu(rqd); - int max_secs = nvm_max_phys_sects(dev); struct bio *bio; unsigned int nr_rec_secs; unsigned int pgs_read; int ret; nr_rec_secs = bitmap_weight((unsigned long int *)&rqd->ppa_status, - max_secs); + NVM_MAX_VLBA); bio = bio_alloc(GFP_KERNEL, nr_rec_secs); @@ -74,8 +72,6 @@ int pblk_recov_setup_rq(struct pblk *pblk, struct pblk_c_ctx *c_ctx, struct pblk_rec_ctx *recovery, u64 *comp_bits, unsigned int comp) { - struct nvm_tgt_dev *dev = pblk->dev; - int max_secs = nvm_max_phys_sects(dev); struct nvm_rq *rec_rqd; struct pblk_c_ctx *rec_ctx; int nr_entries = c_ctx->nr_valid + c_ctx->nr_padded; @@ -86,7 +82,7 @@ int pblk_recov_setup_rq(struct pblk *pblk, struct pblk_c_ctx *c_ctx, /* Copy completion bitmap, but exclude the first X completed entries */ bitmap_shift_right((unsigned long int *)&rec_rqd->ppa_status, (unsigned long int *)comp_bits, - comp, max_secs); + comp, NVM_MAX_VLBA); /* Save the context for the entries that need to be re-written and * update current context with the completed entries. @@ -188,7 +184,7 @@ static int pblk_calc_sec_in_line(struct pblk *pblk, struct pblk_line *line) int nr_bb = bitmap_weight(line->blk_bitmap, lm->blk_per_line); return lm->sec_per_line - lm->smeta_sec - lm->emeta_sec[0] - - nr_bb * geo->sec_per_chk; + nr_bb * geo->clba; } struct pblk_recov_alloc { @@ -236,7 +232,7 @@ next_read_rq: rq_ppas = pblk_calc_secs(pblk, left_ppas, 0); if (!rq_ppas) rq_ppas = pblk->min_write_pgs; - rq_len = rq_ppas * geo->sec_size; + rq_len = rq_ppas * geo->csecs; bio = bio_map_kern(dev->q, data, rq_len, GFP_KERNEL); if (IS_ERR(bio)) @@ -355,7 +351,7 @@ static int pblk_recov_pad_oob(struct pblk *pblk, struct pblk_line *line, if (!pad_rq) return -ENOMEM; - data = vzalloc(pblk->max_write_pgs * geo->sec_size); + data = vzalloc(pblk->max_write_pgs * geo->csecs); if (!data) { ret = -ENOMEM; goto free_rq; @@ -372,7 +368,7 @@ next_pad_rq: goto fail_free_pad; } - rq_len = rq_ppas * geo->sec_size; + rq_len = rq_ppas * geo->csecs; meta_list = nvm_dev_dma_alloc(dev->parent, GFP_KERNEL, &dma_meta_list); if (!meta_list) { @@ -513,7 +509,7 @@ next_rq: rq_ppas = pblk_calc_secs(pblk, left_ppas, 0); if (!rq_ppas) rq_ppas = pblk->min_write_pgs; - rq_len = rq_ppas * geo->sec_size; + rq_len = rq_ppas * geo->csecs; bio = bio_map_kern(dev->q, data, rq_len, GFP_KERNEL); if (IS_ERR(bio)) @@ -644,7 +640,7 @@ next_rq: rq_ppas = pblk_calc_secs(pblk, left_ppas, 0); if (!rq_ppas) rq_ppas = pblk->min_write_pgs; - rq_len = rq_ppas * geo->sec_size; + rq_len = rq_ppas * geo->csecs; bio = bio_map_kern(dev->q, data, rq_len, GFP_KERNEL); if (IS_ERR(bio)) @@ -749,7 +745,7 @@ static int pblk_recov_l2p_from_oob(struct pblk *pblk, struct pblk_line *line) ppa_list = (void *)(meta_list) + pblk_dma_meta_size; dma_ppa_list = dma_meta_list + pblk_dma_meta_size; - data = kcalloc(pblk->max_write_pgs, geo->sec_size, GFP_KERNEL); + data = kcalloc(pblk->max_write_pgs, geo->csecs, GFP_KERNEL); if (!data) { ret = -ENOMEM; goto free_meta_list; @@ -826,6 +822,63 @@ static u64 pblk_line_emeta_start(struct pblk *pblk, struct pblk_line *line) return emeta_start; } +static int pblk_recov_check_line_version(struct pblk *pblk, + struct line_emeta *emeta) +{ + struct line_header *header = &emeta->header; + + if (header->version_major != EMETA_VERSION_MAJOR) { + pr_err("pblk: line major version mismatch: %d, expected: %d\n", + header->version_major, EMETA_VERSION_MAJOR); + return 1; + } + +#ifdef NVM_DEBUG + if (header->version_minor > EMETA_VERSION_MINOR) + pr_info("pblk: newer line minor version found: %d\n", line_v); +#endif + + return 0; +} + +static void pblk_recov_wa_counters(struct pblk *pblk, + struct line_emeta *emeta) +{ + struct pblk_line_meta *lm = &pblk->lm; + struct line_header *header = &emeta->header; + struct wa_counters *wa = emeta_to_wa(lm, emeta); + + /* WA counters were introduced in emeta version 0.2 */ + if (header->version_major > 0 || header->version_minor >= 2) { + u64 user = le64_to_cpu(wa->user); + u64 pad = le64_to_cpu(wa->pad); + u64 gc = le64_to_cpu(wa->gc); + + atomic64_set(&pblk->user_wa, user); + atomic64_set(&pblk->pad_wa, pad); + atomic64_set(&pblk->gc_wa, gc); + + pblk->user_rst_wa = user; + pblk->pad_rst_wa = pad; + pblk->gc_rst_wa = gc; + } +} + +static int pblk_line_was_written(struct pblk_line *line, + struct pblk_line_meta *lm) +{ + + int i; + int state_mask = NVM_CHK_ST_OFFLINE | NVM_CHK_ST_FREE; + + for (i = 0; i < lm->blk_per_line; i++) { + if (!(line->chks[i].state & state_mask)) + return 1; + } + + return 0; +} + struct pblk_line *pblk_recov_l2p(struct pblk *pblk) { struct pblk_line_meta *lm = &pblk->lm; @@ -862,6 +915,9 @@ struct pblk_line *pblk_recov_l2p(struct pblk *pblk) line->lun_bitmap = ((void *)(smeta_buf)) + sizeof(struct line_smeta); + if (!pblk_line_was_written(line, lm)) + continue; + /* Lines that cannot be read are assumed as not written here */ if (pblk_line_read_smeta(pblk, line)) continue; @@ -873,9 +929,9 @@ struct pblk_line *pblk_recov_l2p(struct pblk *pblk) if (le32_to_cpu(smeta_buf->header.identifier) != PBLK_MAGIC) continue; - if (smeta_buf->header.version != SMETA_VERSION) { + if (smeta_buf->header.version_major != SMETA_VERSION_MAJOR) { pr_err("pblk: found incompatible line version %u\n", - le16_to_cpu(smeta_buf->header.version)); + smeta_buf->header.version_major); return ERR_PTR(-EINVAL); } @@ -943,6 +999,11 @@ struct pblk_line *pblk_recov_l2p(struct pblk *pblk) goto next; } + if (pblk_recov_check_line_version(pblk, line->emeta->buf)) + return ERR_PTR(-EINVAL); + + pblk_recov_wa_counters(pblk, line->emeta->buf); + if (pblk_recov_l2p_from_emeta(pblk, line)) pblk_recov_l2p_from_oob(pblk, line); diff --git a/drivers/lightnvm/pblk-rl.c b/drivers/lightnvm/pblk-rl.c index 0d457b162f23..883a7113b19d 100644 --- a/drivers/lightnvm/pblk-rl.c +++ b/drivers/lightnvm/pblk-rl.c @@ -200,7 +200,7 @@ void pblk_rl_init(struct pblk_rl *rl, int budget) /* Consider sectors used for metadata */ sec_meta = (lm->smeta_sec + lm->emeta_sec[0]) * l_mg->nr_free_lines; - blk_meta = DIV_ROUND_UP(sec_meta, geo->sec_per_chk); + blk_meta = DIV_ROUND_UP(sec_meta, geo->clba); rl->high = pblk->op_blks - blk_meta - lm->blk_per_line; rl->high_pw = get_count_order(rl->high); diff --git a/drivers/lightnvm/pblk-sysfs.c b/drivers/lightnvm/pblk-sysfs.c index 620bab853579..e61909af23a5 100644 --- a/drivers/lightnvm/pblk-sysfs.c +++ b/drivers/lightnvm/pblk-sysfs.c @@ -39,8 +39,8 @@ static ssize_t pblk_sysfs_luns_show(struct pblk *pblk, char *page) sz += snprintf(page + sz, PAGE_SIZE - sz, "pblk: pos:%d, ch:%d, lun:%d - %d\n", i, - rlun->bppa.g.ch, - rlun->bppa.g.lun, + rlun->bppa.a.ch, + rlun->bppa.a.lun, active); } @@ -115,24 +115,47 @@ static ssize_t pblk_sysfs_ppaf(struct pblk *pblk, char *page) struct nvm_geo *geo = &dev->geo; ssize_t sz = 0; - sz = snprintf(page, PAGE_SIZE - sz, - "g:(b:%d)blk:%d/%d,pg:%d/%d,lun:%d/%d,ch:%d/%d,pl:%d/%d,sec:%d/%d\n", - pblk->ppaf_bitsize, - pblk->ppaf.blk_offset, geo->ppaf.blk_len, - pblk->ppaf.pg_offset, geo->ppaf.pg_len, - pblk->ppaf.lun_offset, geo->ppaf.lun_len, - pblk->ppaf.ch_offset, geo->ppaf.ch_len, - pblk->ppaf.pln_offset, geo->ppaf.pln_len, - pblk->ppaf.sec_offset, geo->ppaf.sect_len); + if (geo->version == NVM_OCSSD_SPEC_12) { + struct nvm_addrf_12 *ppaf = (struct nvm_addrf_12 *)&pblk->addrf; + struct nvm_addrf_12 *gppaf = (struct nvm_addrf_12 *)&geo->addrf; - sz += snprintf(page + sz, PAGE_SIZE - sz, - "d:blk:%d/%d,pg:%d/%d,lun:%d/%d,ch:%d/%d,pl:%d/%d,sec:%d/%d\n", - geo->ppaf.blk_offset, geo->ppaf.blk_len, - geo->ppaf.pg_offset, geo->ppaf.pg_len, - geo->ppaf.lun_offset, geo->ppaf.lun_len, - geo->ppaf.ch_offset, geo->ppaf.ch_len, - geo->ppaf.pln_offset, geo->ppaf.pln_len, - geo->ppaf.sect_offset, geo->ppaf.sect_len); + sz = snprintf(page, PAGE_SIZE, + "g:(b:%d)blk:%d/%d,pg:%d/%d,lun:%d/%d,ch:%d/%d,pl:%d/%d,sec:%d/%d\n", + pblk->addrf_len, + ppaf->blk_offset, ppaf->blk_len, + ppaf->pg_offset, ppaf->pg_len, + ppaf->lun_offset, ppaf->lun_len, + ppaf->ch_offset, ppaf->ch_len, + ppaf->pln_offset, ppaf->pln_len, + ppaf->sec_offset, ppaf->sec_len); + + sz += snprintf(page + sz, PAGE_SIZE - sz, + "d:blk:%d/%d,pg:%d/%d,lun:%d/%d,ch:%d/%d,pl:%d/%d,sec:%d/%d\n", + gppaf->blk_offset, gppaf->blk_len, + gppaf->pg_offset, gppaf->pg_len, + gppaf->lun_offset, gppaf->lun_len, + gppaf->ch_offset, gppaf->ch_len, + gppaf->pln_offset, gppaf->pln_len, + gppaf->sec_offset, gppaf->sec_len); + } else { + struct nvm_addrf *ppaf = &pblk->addrf; + struct nvm_addrf *gppaf = &geo->addrf; + + sz = snprintf(page, PAGE_SIZE, + "pblk:(s:%d)ch:%d/%d,lun:%d/%d,chk:%d/%d/sec:%d/%d\n", + pblk->addrf_len, + ppaf->ch_offset, ppaf->ch_len, + ppaf->lun_offset, ppaf->lun_len, + ppaf->chk_offset, ppaf->chk_len, + ppaf->sec_offset, ppaf->sec_len); + + sz += snprintf(page + sz, PAGE_SIZE - sz, + "device:ch:%d/%d,lun:%d/%d,chk:%d/%d,sec:%d/%d\n", + gppaf->ch_offset, gppaf->ch_len, + gppaf->lun_offset, gppaf->lun_len, + gppaf->chk_offset, gppaf->chk_len, + gppaf->sec_offset, gppaf->sec_len); + } return sz; } @@ -288,7 +311,7 @@ static ssize_t pblk_sysfs_lines_info(struct pblk *pblk, char *page) "blk_line:%d, sec_line:%d, sec_blk:%d\n", lm->blk_per_line, lm->sec_per_line, - geo->sec_per_chk); + geo->clba); return sz; } @@ -298,15 +321,104 @@ static ssize_t pblk_sysfs_get_sec_per_write(struct pblk *pblk, char *page) return snprintf(page, PAGE_SIZE, "%d\n", pblk->sec_per_write); } +static ssize_t pblk_get_write_amp(u64 user, u64 gc, u64 pad, + char *page) +{ + int sz; + + + sz = snprintf(page, PAGE_SIZE, + "user:%lld gc:%lld pad:%lld WA:", + user, gc, pad); + + if (!user) { + sz += snprintf(page + sz, PAGE_SIZE - sz, "NaN\n"); + } else { + u64 wa_int; + u32 wa_frac; + + wa_int = (user + gc + pad) * 100000; + wa_int = div_u64(wa_int, user); + wa_int = div_u64_rem(wa_int, 100000, &wa_frac); + + sz += snprintf(page + sz, PAGE_SIZE - sz, "%llu.%05u\n", + wa_int, wa_frac); + } + + return sz; +} + +static ssize_t pblk_sysfs_get_write_amp_mileage(struct pblk *pblk, char *page) +{ + return pblk_get_write_amp(atomic64_read(&pblk->user_wa), + atomic64_read(&pblk->gc_wa), atomic64_read(&pblk->pad_wa), + page); +} + +static ssize_t pblk_sysfs_get_write_amp_trip(struct pblk *pblk, char *page) +{ + return pblk_get_write_amp( + atomic64_read(&pblk->user_wa) - pblk->user_rst_wa, + atomic64_read(&pblk->gc_wa) - pblk->gc_rst_wa, + atomic64_read(&pblk->pad_wa) - pblk->pad_rst_wa, page); +} + +static long long bucket_percentage(unsigned long long bucket, + unsigned long long total) +{ + int p = bucket * 100; + + p = div_u64(p, total); + + return p; +} + +static ssize_t pblk_sysfs_get_padding_dist(struct pblk *pblk, char *page) +{ + int sz = 0; + unsigned long long total; + unsigned long long total_buckets = 0; + int buckets = pblk->min_write_pgs - 1; + int i; + + total = atomic64_read(&pblk->nr_flush) - pblk->nr_flush_rst; + if (!total) { + for (i = 0; i < (buckets + 1); i++) + sz += snprintf(page + sz, PAGE_SIZE - sz, + "%d:0 ", i); + sz += snprintf(page + sz, PAGE_SIZE - sz, "\n"); + + return sz; + } + + for (i = 0; i < buckets; i++) + total_buckets += atomic64_read(&pblk->pad_dist[i]); + + sz += snprintf(page + sz, PAGE_SIZE - sz, "0:%lld%% ", + bucket_percentage(total - total_buckets, total)); + + for (i = 0; i < buckets; i++) { + unsigned long long p; + + p = bucket_percentage(atomic64_read(&pblk->pad_dist[i]), + total); + sz += snprintf(page + sz, PAGE_SIZE - sz, "%d:%lld%% ", + i + 1, p); + } + sz += snprintf(page + sz, PAGE_SIZE - sz, "\n"); + + return sz; +} + #ifdef CONFIG_NVM_DEBUG static ssize_t pblk_sysfs_stats_debug(struct pblk *pblk, char *page) { return snprintf(page, PAGE_SIZE, - "%lu\t%lu\t%lu\t%lu\t%lu\t%lu\t%lu\t%lu\t%lu\t%lu\t%lu\t%lu\t%lu\n", + "%lu\t%lu\t%ld\t%llu\t%ld\t%lu\t%lu\t%lu\t%lu\t%lu\t%lu\t%lu\t%lu\n", atomic_long_read(&pblk->inflight_writes), atomic_long_read(&pblk->inflight_reads), atomic_long_read(&pblk->req_writes), - atomic_long_read(&pblk->nr_flush), + (u64)atomic64_read(&pblk->nr_flush), atomic_long_read(&pblk->padded_writes), atomic_long_read(&pblk->padded_wb), atomic_long_read(&pblk->sub_writes), @@ -360,6 +472,56 @@ static ssize_t pblk_sysfs_set_sec_per_write(struct pblk *pblk, return len; } +static ssize_t pblk_sysfs_set_write_amp_trip(struct pblk *pblk, + const char *page, size_t len) +{ + size_t c_len; + int reset_value; + + c_len = strcspn(page, "\n"); + if (c_len >= len) + return -EINVAL; + + if (kstrtouint(page, 0, &reset_value)) + return -EINVAL; + + if (reset_value != 0) + return -EINVAL; + + pblk->user_rst_wa = atomic64_read(&pblk->user_wa); + pblk->pad_rst_wa = atomic64_read(&pblk->pad_wa); + pblk->gc_rst_wa = atomic64_read(&pblk->gc_wa); + + return len; +} + + +static ssize_t pblk_sysfs_set_padding_dist(struct pblk *pblk, + const char *page, size_t len) +{ + size_t c_len; + int reset_value; + int buckets = pblk->min_write_pgs - 1; + int i; + + c_len = strcspn(page, "\n"); + if (c_len >= len) + return -EINVAL; + + if (kstrtouint(page, 0, &reset_value)) + return -EINVAL; + + if (reset_value != 0) + return -EINVAL; + + for (i = 0; i < buckets; i++) + atomic64_set(&pblk->pad_dist[i], 0); + + pblk->nr_flush_rst = atomic64_read(&pblk->nr_flush); + + return len; +} + static struct attribute sys_write_luns = { .name = "write_luns", .mode = 0444, @@ -410,6 +572,21 @@ static struct attribute sys_max_sec_per_write = { .mode = 0644, }; +static struct attribute sys_write_amp_mileage = { + .name = "write_amp_mileage", + .mode = 0444, +}; + +static struct attribute sys_write_amp_trip = { + .name = "write_amp_trip", + .mode = 0644, +}; + +static struct attribute sys_padding_dist = { + .name = "padding_dist", + .mode = 0644, +}; + #ifdef CONFIG_NVM_DEBUG static struct attribute sys_stats_debug_attr = { .name = "stats", @@ -428,6 +605,9 @@ static struct attribute *pblk_attrs[] = { &sys_stats_ppaf_attr, &sys_lines_attr, &sys_lines_info_attr, + &sys_write_amp_mileage, + &sys_write_amp_trip, + &sys_padding_dist, #ifdef CONFIG_NVM_DEBUG &sys_stats_debug_attr, #endif @@ -457,6 +637,12 @@ static ssize_t pblk_sysfs_show(struct kobject *kobj, struct attribute *attr, return pblk_sysfs_lines_info(pblk, buf); else if (strcmp(attr->name, "max_sec_per_write") == 0) return pblk_sysfs_get_sec_per_write(pblk, buf); + else if (strcmp(attr->name, "write_amp_mileage") == 0) + return pblk_sysfs_get_write_amp_mileage(pblk, buf); + else if (strcmp(attr->name, "write_amp_trip") == 0) + return pblk_sysfs_get_write_amp_trip(pblk, buf); + else if (strcmp(attr->name, "padding_dist") == 0) + return pblk_sysfs_get_padding_dist(pblk, buf); #ifdef CONFIG_NVM_DEBUG else if (strcmp(attr->name, "stats") == 0) return pblk_sysfs_stats_debug(pblk, buf); @@ -473,7 +659,10 @@ static ssize_t pblk_sysfs_store(struct kobject *kobj, struct attribute *attr, return pblk_sysfs_gc_force(pblk, buf, len); else if (strcmp(attr->name, "max_sec_per_write") == 0) return pblk_sysfs_set_sec_per_write(pblk, buf, len); - + else if (strcmp(attr->name, "write_amp_trip") == 0) + return pblk_sysfs_set_write_amp_trip(pblk, buf, len); + else if (strcmp(attr->name, "padding_dist") == 0) + return pblk_sysfs_set_padding_dist(pblk, buf, len); return 0; } diff --git a/drivers/lightnvm/pblk-write.c b/drivers/lightnvm/pblk-write.c index aae86ed60b98..3e6f1ebd743a 100644 --- a/drivers/lightnvm/pblk-write.c +++ b/drivers/lightnvm/pblk-write.c @@ -333,7 +333,7 @@ int pblk_submit_meta_io(struct pblk *pblk, struct pblk_line *meta_line) m_ctx = nvm_rq_to_pdu(rqd); m_ctx->private = meta_line; - rq_len = rq_ppas * geo->sec_size; + rq_len = rq_ppas * geo->csecs; data = ((void *)emeta->buf) + emeta->mem; bio = pblk_bio_map_addr(pblk, data, rq_ppas, rq_len, diff --git a/drivers/lightnvm/pblk.h b/drivers/lightnvm/pblk.h index 8c357fb6538e..9c682acfc5d1 100644 --- a/drivers/lightnvm/pblk.h +++ b/drivers/lightnvm/pblk.h @@ -201,12 +201,6 @@ struct pblk_rb { struct pblk_lun { struct ppa_addr bppa; - - u8 *bb_list; /* Bad block list for LUN. Only used on - * bring up. Bad blocks are managed - * within lines on run-time. - */ - struct semaphore wr_sem; }; @@ -303,6 +297,7 @@ enum { PBLK_LINETYPE_DATA = 2, /* Line state */ + PBLK_LINESTATE_NEW = 9, PBLK_LINESTATE_FREE = 10, PBLK_LINESTATE_OPEN = 11, PBLK_LINESTATE_CLOSED = 12, @@ -320,14 +315,26 @@ enum { }; #define PBLK_MAGIC 0x70626c6b /*pblk*/ -#define SMETA_VERSION cpu_to_le16(1) + +/* emeta/smeta persistent storage format versions: + * Changes in major version requires offline migration. + * Changes in minor version are handled automatically during + * recovery. + */ + +#define SMETA_VERSION_MAJOR (0) +#define SMETA_VERSION_MINOR (1) + +#define EMETA_VERSION_MAJOR (0) +#define EMETA_VERSION_MINOR (2) struct line_header { __le32 crc; __le32 identifier; /* pblk identifier */ __u8 uuid[16]; /* instance uuid */ __le16 type; /* line type */ - __le16 version; /* type version */ + __u8 version_major; /* version major */ + __u8 version_minor; /* version minor */ __le32 id; /* line id for current line */ }; @@ -349,11 +356,13 @@ struct line_smeta { __le64 lun_bitmap[]; }; + /* * Metadata layout in media: * First sector: * 1. struct line_emeta * 2. bad block bitmap (u64 * window_wr_lun) + * 3. write amplification counters * Mid sectors (start at lbas_sector): * 3. nr_lbas (u64) forming lba list * Last sectors (start at vsc_sector): @@ -377,7 +386,15 @@ struct line_emeta { __le32 next_id; /* Line id for next line */ __le64 nr_lbas; /* Number of lbas mapped in line */ __le64 nr_valid_lbas; /* Number of valid lbas mapped in line */ - __le64 bb_bitmap[]; /* Updated bad block bitmap for line */ + __le64 bb_bitmap[]; /* Updated bad block bitmap for line */ +}; + + +/* Write amplification counters stored on media */ +struct wa_counters { + __le64 user; /* Number of user written sectors */ + __le64 gc; /* Number of sectors written by GC*/ + __le64 pad; /* Number of padded sectors */ }; struct pblk_emeta { @@ -410,6 +427,8 @@ struct pblk_line { unsigned long *lun_bitmap; /* Bitmap for LUNs mapped in line */ + struct nvm_chk_meta *chks; /* Chunks forming line */ + struct pblk_smeta *smeta; /* Start metadata */ struct pblk_emeta *emeta; /* End medatada */ @@ -507,10 +526,11 @@ struct pblk_line_meta { unsigned int smeta_sec; /* Sectors needed for smeta */ unsigned int emeta_len[4]; /* Lengths for emeta: - * [0]: Total length - * [1]: struct line_emeta length - * [2]: L2P portion length - * [3]: vsc list length + * [0]: Total + * [1]: struct line_emeta + + * bb_bitmap + struct wa_counters + * [2]: L2P portion + * [3]: vsc */ unsigned int emeta_sec[4]; /* Sectors needed for emeta. Same layout * as emeta_len @@ -534,21 +554,6 @@ struct pblk_line_meta { unsigned int meta_distance; /* Distance between data and metadata */ }; -struct pblk_addr_format { - u64 ch_mask; - u64 lun_mask; - u64 pln_mask; - u64 blk_mask; - u64 pg_mask; - u64 sec_mask; - u8 ch_offset; - u8 lun_offset; - u8 pln_offset; - u8 blk_offset; - u8 pg_offset; - u8 sec_offset; -}; - enum { PBLK_STATE_RUNNING = 0, PBLK_STATE_STOPPING = 1, @@ -556,6 +561,18 @@ enum { PBLK_STATE_STOPPED = 3, }; +/* Internal format to support not power-of-2 device formats */ +struct pblk_addrf { + /* gen to dev */ + int sec_stripe; + int ch_stripe; + int lun_stripe; + + /* dev to gen */ + int sec_lun_stripe; + int sec_ws_stripe; +}; + struct pblk { struct nvm_tgt_dev *dev; struct gendisk *disk; @@ -568,8 +585,9 @@ struct pblk { struct pblk_line_mgmt l_mg; /* Line management */ struct pblk_line_meta lm; /* Line metadata */ - int ppaf_bitsize; - struct pblk_addr_format ppaf; + struct nvm_addrf addrf; /* Aligned address format */ + struct pblk_addrf uaddrf; /* Unaligned address format */ + int addrf_len; struct pblk_rb rwb; @@ -592,12 +610,27 @@ struct pblk { int sec_per_write; unsigned char instance_uuid[16]; + + /* Persistent write amplification counters, 4kb sector I/Os */ + atomic64_t user_wa; /* Sectors written by user */ + atomic64_t gc_wa; /* Sectors written by GC */ + atomic64_t pad_wa; /* Padded sectors written */ + + /* Reset values for delta write amplification measurements */ + u64 user_rst_wa; + u64 gc_rst_wa; + u64 pad_rst_wa; + + /* Counters used for calculating padding distribution */ + atomic64_t *pad_dist; /* Padding distribution buckets */ + u64 nr_flush_rst; /* Flushes reset value for pad dist.*/ + atomic64_t nr_flush; /* Number of flush/fua I/O */ + #ifdef CONFIG_NVM_DEBUG - /* All debug counters apply to 4kb sector I/Os */ + /* Non-persistent debug counters, 4kb sector I/Os */ atomic_long_t inflight_writes; /* Inflight writes (user and gc) */ atomic_long_t padded_writes; /* Sectors padded due to flush/fua */ atomic_long_t padded_wb; /* Sectors padded in write buffer */ - atomic_long_t nr_flush; /* Number of flush/fua I/O */ atomic_long_t req_writes; /* Sectors stored on write buffer */ atomic_long_t sub_writes; /* Sectors submitted from buffer */ atomic_long_t sync_writes; /* Sectors synced to media */ @@ -712,6 +745,10 @@ void pblk_set_sec_per_write(struct pblk *pblk, int sec_per_write); int pblk_setup_w_rec_rq(struct pblk *pblk, struct nvm_rq *rqd, struct pblk_c_ctx *c_ctx); void pblk_discard(struct pblk *pblk, struct bio *bio); +struct nvm_chk_meta *pblk_chunk_get_info(struct pblk *pblk); +struct nvm_chk_meta *pblk_chunk_get_off(struct pblk *pblk, + struct nvm_chk_meta *lp, + struct ppa_addr ppa); void pblk_log_write_err(struct pblk *pblk, struct nvm_rq *rqd); void pblk_log_read_err(struct pblk *pblk, struct nvm_rq *rqd); int pblk_submit_io(struct pblk *pblk, struct nvm_rq *rqd); @@ -888,6 +925,12 @@ static inline void *emeta_to_bb(struct line_emeta *emeta) return emeta->bb_bitmap; } +static inline void *emeta_to_wa(struct pblk_line_meta *lm, + struct line_emeta *emeta) +{ + return emeta->bb_bitmap + lm->blk_bitmap_len; +} + static inline void *emeta_to_lbas(struct pblk *pblk, struct line_emeta *emeta) { return ((void *)emeta + pblk->lm.emeta_len[1]); @@ -903,38 +946,60 @@ static inline int pblk_line_vsc(struct pblk_line *line) return le32_to_cpu(*line->vsc); } -#define NVM_MEM_PAGE_WRITE (8) - static inline int pblk_pad_distance(struct pblk *pblk) { struct nvm_tgt_dev *dev = pblk->dev; struct nvm_geo *geo = &dev->geo; - return NVM_MEM_PAGE_WRITE * geo->all_luns * geo->sec_per_pl; + return geo->mw_cunits * geo->all_luns * geo->ws_opt; } static inline int pblk_ppa_to_line(struct ppa_addr p) { - return p.g.blk; + return p.a.blk; } static inline int pblk_ppa_to_pos(struct nvm_geo *geo, struct ppa_addr p) { - return p.g.lun * geo->nr_chnls + p.g.ch; + return p.a.lun * geo->num_ch + p.a.ch; } static inline struct ppa_addr addr_to_gen_ppa(struct pblk *pblk, u64 paddr, u64 line_id) { + struct nvm_tgt_dev *dev = pblk->dev; + struct nvm_geo *geo = &dev->geo; struct ppa_addr ppa; - ppa.ppa = 0; - ppa.g.blk = line_id; - ppa.g.pg = (paddr & pblk->ppaf.pg_mask) >> pblk->ppaf.pg_offset; - ppa.g.lun = (paddr & pblk->ppaf.lun_mask) >> pblk->ppaf.lun_offset; - ppa.g.ch = (paddr & pblk->ppaf.ch_mask) >> pblk->ppaf.ch_offset; - ppa.g.pl = (paddr & pblk->ppaf.pln_mask) >> pblk->ppaf.pln_offset; - ppa.g.sec = (paddr & pblk->ppaf.sec_mask) >> pblk->ppaf.sec_offset; + if (geo->version == NVM_OCSSD_SPEC_12) { + struct nvm_addrf_12 *ppaf = (struct nvm_addrf_12 *)&pblk->addrf; + + ppa.ppa = 0; + ppa.g.blk = line_id; + ppa.g.pg = (paddr & ppaf->pg_mask) >> ppaf->pg_offset; + ppa.g.lun = (paddr & ppaf->lun_mask) >> ppaf->lun_offset; + ppa.g.ch = (paddr & ppaf->ch_mask) >> ppaf->ch_offset; + ppa.g.pl = (paddr & ppaf->pln_mask) >> ppaf->pln_offset; + ppa.g.sec = (paddr & ppaf->sec_mask) >> ppaf->sec_offset; + } else { + struct pblk_addrf *uaddrf = &pblk->uaddrf; + int secs, chnls, luns; + + ppa.ppa = 0; + + ppa.m.chk = line_id; + + paddr = div_u64_rem(paddr, uaddrf->sec_stripe, &secs); + ppa.m.sec = secs; + + paddr = div_u64_rem(paddr, uaddrf->ch_stripe, &chnls); + ppa.m.grp = chnls; + + paddr = div_u64_rem(paddr, uaddrf->lun_stripe, &luns); + ppa.m.pu = luns; + + ppa.m.sec += uaddrf->sec_stripe * paddr; + } return ppa; } @@ -942,13 +1007,30 @@ static inline struct ppa_addr addr_to_gen_ppa(struct pblk *pblk, u64 paddr, static inline u64 pblk_dev_ppa_to_line_addr(struct pblk *pblk, struct ppa_addr p) { + struct nvm_tgt_dev *dev = pblk->dev; + struct nvm_geo *geo = &dev->geo; u64 paddr; - paddr = (u64)p.g.pg << pblk->ppaf.pg_offset; - paddr |= (u64)p.g.lun << pblk->ppaf.lun_offset; - paddr |= (u64)p.g.ch << pblk->ppaf.ch_offset; - paddr |= (u64)p.g.pl << pblk->ppaf.pln_offset; - paddr |= (u64)p.g.sec << pblk->ppaf.sec_offset; + if (geo->version == NVM_OCSSD_SPEC_12) { + struct nvm_addrf_12 *ppaf = (struct nvm_addrf_12 *)&pblk->addrf; + + paddr = (u64)p.g.ch << ppaf->ch_offset; + paddr |= (u64)p.g.lun << ppaf->lun_offset; + paddr |= (u64)p.g.pg << ppaf->pg_offset; + paddr |= (u64)p.g.pl << ppaf->pln_offset; + paddr |= (u64)p.g.sec << ppaf->sec_offset; + } else { + struct pblk_addrf *uaddrf = &pblk->uaddrf; + u64 secs = p.m.sec; + int sec_stripe; + + paddr = (u64)p.m.grp * uaddrf->sec_stripe; + paddr += (u64)p.m.pu * uaddrf->sec_lun_stripe; + + secs = div_u64_rem(secs, uaddrf->sec_stripe, &sec_stripe); + paddr += secs * uaddrf->sec_ws_stripe; + paddr += sec_stripe; + } return paddr; } @@ -965,18 +1047,37 @@ static inline struct ppa_addr pblk_ppa32_to_ppa64(struct pblk *pblk, u32 ppa32) ppa64.c.line = ppa32 & ((~0U) >> 1); ppa64.c.is_cached = 1; } else { - ppa64.g.blk = (ppa32 & pblk->ppaf.blk_mask) >> - pblk->ppaf.blk_offset; - ppa64.g.pg = (ppa32 & pblk->ppaf.pg_mask) >> - pblk->ppaf.pg_offset; - ppa64.g.lun = (ppa32 & pblk->ppaf.lun_mask) >> - pblk->ppaf.lun_offset; - ppa64.g.ch = (ppa32 & pblk->ppaf.ch_mask) >> - pblk->ppaf.ch_offset; - ppa64.g.pl = (ppa32 & pblk->ppaf.pln_mask) >> - pblk->ppaf.pln_offset; - ppa64.g.sec = (ppa32 & pblk->ppaf.sec_mask) >> - pblk->ppaf.sec_offset; + struct nvm_tgt_dev *dev = pblk->dev; + struct nvm_geo *geo = &dev->geo; + + if (geo->version == NVM_OCSSD_SPEC_12) { + struct nvm_addrf_12 *ppaf = + (struct nvm_addrf_12 *)&pblk->addrf; + + ppa64.g.ch = (ppa32 & ppaf->ch_mask) >> + ppaf->ch_offset; + ppa64.g.lun = (ppa32 & ppaf->lun_mask) >> + ppaf->lun_offset; + ppa64.g.blk = (ppa32 & ppaf->blk_mask) >> + ppaf->blk_offset; + ppa64.g.pg = (ppa32 & ppaf->pg_mask) >> + ppaf->pg_offset; + ppa64.g.pl = (ppa32 & ppaf->pln_mask) >> + ppaf->pln_offset; + ppa64.g.sec = (ppa32 & ppaf->sec_mask) >> + ppaf->sec_offset; + } else { + struct nvm_addrf *lbaf = &pblk->addrf; + + ppa64.m.grp = (ppa32 & lbaf->ch_mask) >> + lbaf->ch_offset; + ppa64.m.pu = (ppa32 & lbaf->lun_mask) >> + lbaf->lun_offset; + ppa64.m.chk = (ppa32 & lbaf->chk_mask) >> + lbaf->chk_offset; + ppa64.m.sec = (ppa32 & lbaf->sec_mask) >> + lbaf->sec_offset; + } } return ppa64; @@ -992,12 +1093,27 @@ static inline u32 pblk_ppa64_to_ppa32(struct pblk *pblk, struct ppa_addr ppa64) ppa32 |= ppa64.c.line; ppa32 |= 1U << 31; } else { - ppa32 |= ppa64.g.blk << pblk->ppaf.blk_offset; - ppa32 |= ppa64.g.pg << pblk->ppaf.pg_offset; - ppa32 |= ppa64.g.lun << pblk->ppaf.lun_offset; - ppa32 |= ppa64.g.ch << pblk->ppaf.ch_offset; - ppa32 |= ppa64.g.pl << pblk->ppaf.pln_offset; - ppa32 |= ppa64.g.sec << pblk->ppaf.sec_offset; + struct nvm_tgt_dev *dev = pblk->dev; + struct nvm_geo *geo = &dev->geo; + + if (geo->version == NVM_OCSSD_SPEC_12) { + struct nvm_addrf_12 *ppaf = + (struct nvm_addrf_12 *)&pblk->addrf; + + ppa32 |= ppa64.g.ch << ppaf->ch_offset; + ppa32 |= ppa64.g.lun << ppaf->lun_offset; + ppa32 |= ppa64.g.blk << ppaf->blk_offset; + ppa32 |= ppa64.g.pg << ppaf->pg_offset; + ppa32 |= ppa64.g.pl << ppaf->pln_offset; + ppa32 |= ppa64.g.sec << ppaf->sec_offset; + } else { + struct nvm_addrf *lbaf = &pblk->addrf; + + ppa32 |= ppa64.m.grp << lbaf->ch_offset; + ppa32 |= ppa64.m.pu << lbaf->lun_offset; + ppa32 |= ppa64.m.chk << lbaf->chk_offset; + ppa32 |= ppa64.m.sec << lbaf->sec_offset; + } } return ppa32; @@ -1008,7 +1124,7 @@ static inline struct ppa_addr pblk_trans_map_get(struct pblk *pblk, { struct ppa_addr ppa; - if (pblk->ppaf_bitsize < 32) { + if (pblk->addrf_len < 32) { u32 *map = (u32 *)pblk->trans_map; ppa = pblk_ppa32_to_ppa64(pblk, map[lba]); @@ -1024,7 +1140,7 @@ static inline struct ppa_addr pblk_trans_map_get(struct pblk *pblk, static inline void pblk_trans_map_set(struct pblk *pblk, sector_t lba, struct ppa_addr ppa) { - if (pblk->ppaf_bitsize < 32) { + if (pblk->addrf_len < 32) { u32 *map = (u32 *)pblk->trans_map; map[lba] = pblk_ppa64_to_ppa32(pblk, ppa); @@ -1115,7 +1231,10 @@ static inline int pblk_set_progr_mode(struct pblk *pblk, int type) struct nvm_geo *geo = &dev->geo; int flags; - flags = geo->plane_mode >> 1; + if (geo->version == NVM_OCSSD_SPEC_20) + return 0; + + flags = geo->pln_mode >> 1; if (type == PBLK_WRITE) flags |= NVM_IO_SCRAMBLE_ENABLE; @@ -1134,9 +1253,12 @@ static inline int pblk_set_read_mode(struct pblk *pblk, int type) struct nvm_geo *geo = &dev->geo; int flags; + if (geo->version == NVM_OCSSD_SPEC_20) + return 0; + flags = NVM_IO_SUSPEND | NVM_IO_SCRAMBLE_ENABLE; if (type == PBLK_READ_SEQUENTIAL) - flags |= geo->plane_mode >> 1; + flags |= geo->pln_mode >> 1; return flags; } @@ -1147,16 +1269,21 @@ static inline int pblk_io_aligned(struct pblk *pblk, int nr_secs) } #ifdef CONFIG_NVM_DEBUG -static inline void print_ppa(struct ppa_addr *p, char *msg, int error) +static inline void print_ppa(struct nvm_geo *geo, struct ppa_addr *p, + char *msg, int error) { if (p->c.is_cached) { pr_err("ppa: (%s: %x) cache line: %llu\n", msg, error, (u64)p->c.line); - } else { + } else if (geo->version == NVM_OCSSD_SPEC_12) { pr_err("ppa: (%s: %x):ch:%d,lun:%d,blk:%d,pg:%d,pl:%d,sec:%d\n", msg, error, p->g.ch, p->g.lun, p->g.blk, p->g.pg, p->g.pl, p->g.sec); + } else { + pr_err("ppa: (%s: %x):ch:%d,lun:%d,chk:%d,sec:%d\n", + msg, error, + p->m.grp, p->m.pu, p->m.chk, p->m.sec); } } @@ -1166,13 +1293,13 @@ static inline void pblk_print_failed_rqd(struct pblk *pblk, struct nvm_rq *rqd, int bit = -1; if (rqd->nr_ppas == 1) { - print_ppa(&rqd->ppa_addr, "rqd", error); + print_ppa(&pblk->dev->geo, &rqd->ppa_addr, "rqd", error); return; } while ((bit = find_next_bit((void *)&rqd->ppa_status, rqd->nr_ppas, bit + 1)) < rqd->nr_ppas) { - print_ppa(&rqd->ppa_list[bit], "rqd", error); + print_ppa(&pblk->dev->geo, &rqd->ppa_list[bit], "rqd", error); } pr_err("error:%d, ppa_status:%llx\n", error, rqd->ppa_status); @@ -1188,16 +1315,25 @@ static inline int pblk_boundary_ppa_checks(struct nvm_tgt_dev *tgt_dev, for (i = 0; i < nr_ppas; i++) { ppa = &ppas[i]; - if (!ppa->c.is_cached && - ppa->g.ch < geo->nr_chnls && - ppa->g.lun < geo->nr_luns && - ppa->g.pl < geo->nr_planes && - ppa->g.blk < geo->nr_chks && - ppa->g.pg < geo->ws_per_chk && - ppa->g.sec < geo->sec_per_pg) - continue; + if (geo->version == NVM_OCSSD_SPEC_12) { + if (!ppa->c.is_cached && + ppa->g.ch < geo->num_ch && + ppa->g.lun < geo->num_lun && + ppa->g.pl < geo->num_pln && + ppa->g.blk < geo->num_chk && + ppa->g.pg < geo->num_pg && + ppa->g.sec < geo->ws_min) + continue; + } else { + if (!ppa->c.is_cached && + ppa->m.grp < geo->num_ch && + ppa->m.pu < geo->num_lun && + ppa->m.chk < geo->num_chk && + ppa->m.sec < geo->clba) + continue; + } - print_ppa(ppa, "boundary", i); + print_ppa(geo, ppa, "boundary", i); return 1; } diff --git a/drivers/md/bcache/alloc.c b/drivers/md/bcache/alloc.c index 458e1d38577d..004cc3cc6123 100644 --- a/drivers/md/bcache/alloc.c +++ b/drivers/md/bcache/alloc.c @@ -287,7 +287,8 @@ do { \ break; \ \ mutex_unlock(&(ca)->set->bucket_lock); \ - if (kthread_should_stop()) { \ + if (kthread_should_stop() || \ + test_bit(CACHE_SET_IO_DISABLE, &ca->set->flags)) { \ set_current_state(TASK_RUNNING); \ return 0; \ } \ diff --git a/drivers/md/bcache/bcache.h b/drivers/md/bcache/bcache.h index 12e5197f186c..d338b7086013 100644 --- a/drivers/md/bcache/bcache.h +++ b/drivers/md/bcache/bcache.h @@ -188,6 +188,7 @@ #include <linux/refcount.h> #include <linux/types.h> #include <linux/workqueue.h> +#include <linux/kthread.h> #include "bset.h" #include "util.h" @@ -258,10 +259,11 @@ struct bcache_device { struct gendisk *disk; unsigned long flags; -#define BCACHE_DEV_CLOSING 0 -#define BCACHE_DEV_DETACHING 1 -#define BCACHE_DEV_UNLINK_DONE 2 - +#define BCACHE_DEV_CLOSING 0 +#define BCACHE_DEV_DETACHING 1 +#define BCACHE_DEV_UNLINK_DONE 2 +#define BCACHE_DEV_WB_RUNNING 3 +#define BCACHE_DEV_RATE_DW_RUNNING 4 unsigned nr_stripes; unsigned stripe_size; atomic_t *stripe_sectors_dirty; @@ -286,6 +288,12 @@ struct io { sector_t last; }; +enum stop_on_failure { + BCH_CACHED_DEV_STOP_AUTO = 0, + BCH_CACHED_DEV_STOP_ALWAYS, + BCH_CACHED_DEV_STOP_MODE_MAX, +}; + struct cached_dev { struct list_head list; struct bcache_device disk; @@ -359,6 +367,7 @@ struct cached_dev { unsigned sequential_cutoff; unsigned readahead; + unsigned io_disable:1; unsigned verify:1; unsigned bypass_torture_test:1; @@ -378,6 +387,11 @@ struct cached_dev { unsigned writeback_rate_i_term_inverse; unsigned writeback_rate_p_term_inverse; unsigned writeback_rate_minimum; + + enum stop_on_failure stop_when_cache_set_failed; +#define DEFAULT_CACHED_DEV_ERROR_LIMIT 64 + atomic_t io_errors; + unsigned error_limit; }; enum alloc_reserve { @@ -474,10 +488,15 @@ struct gc_stat { * * CACHE_SET_RUNNING means all cache devices have been registered and journal * replay is complete. + * + * CACHE_SET_IO_DISABLE is set when bcache is stopping the whold cache set, all + * external and internal I/O should be denied when this flag is set. + * */ #define CACHE_SET_UNREGISTERING 0 #define CACHE_SET_STOPPING 1 #define CACHE_SET_RUNNING 2 +#define CACHE_SET_IO_DISABLE 3 struct cache_set { struct closure cl; @@ -867,8 +886,36 @@ static inline void wake_up_allocators(struct cache_set *c) wake_up_process(ca->alloc_thread); } +static inline void closure_bio_submit(struct cache_set *c, + struct bio *bio, + struct closure *cl) +{ + closure_get(cl); + if (unlikely(test_bit(CACHE_SET_IO_DISABLE, &c->flags))) { + bio->bi_status = BLK_STS_IOERR; + bio_endio(bio); + return; + } + generic_make_request(bio); +} + +/* + * Prevent the kthread exits directly, and make sure when kthread_stop() + * is called to stop a kthread, it is still alive. If a kthread might be + * stopped by CACHE_SET_IO_DISABLE bit set, wait_for_kthread_stop() is + * necessary before the kthread returns. + */ +static inline void wait_for_kthread_stop(void) +{ + while (!kthread_should_stop()) { + set_current_state(TASK_INTERRUPTIBLE); + schedule(); + } +} + /* Forward declarations */ +void bch_count_backing_io_errors(struct cached_dev *dc, struct bio *bio); void bch_count_io_errors(struct cache *, blk_status_t, int, const char *); void bch_bbio_count_io_errors(struct cache_set *, struct bio *, blk_status_t, const char *); @@ -896,6 +943,7 @@ int bch_bucket_alloc_set(struct cache_set *, unsigned, struct bkey *, int, bool); bool bch_alloc_sectors(struct cache_set *, struct bkey *, unsigned, unsigned, unsigned, bool); +bool bch_cached_dev_error(struct cached_dev *dc); __printf(2, 3) bool bch_cache_set_error(struct cache_set *, const char *, ...); @@ -905,6 +953,7 @@ void bch_write_bdev_super(struct cached_dev *, struct closure *); extern struct workqueue_struct *bcache_wq; extern const char * const bch_cache_modes[]; +extern const char * const bch_stop_on_failure_modes[]; extern struct mutex bch_register_lock; extern struct list_head bch_cache_sets; diff --git a/drivers/md/bcache/bset.c b/drivers/md/bcache/bset.c index e56d3ecdbfcb..579c696a5fe0 100644 --- a/drivers/md/bcache/bset.c +++ b/drivers/md/bcache/bset.c @@ -1072,7 +1072,7 @@ EXPORT_SYMBOL(bch_btree_iter_init); static inline struct bkey *__bch_btree_iter_next(struct btree_iter *iter, btree_iter_cmp_fn *cmp) { - struct btree_iter_set unused; + struct btree_iter_set b __maybe_unused; struct bkey *ret = NULL; if (!btree_iter_end(iter)) { @@ -1087,7 +1087,7 @@ static inline struct bkey *__bch_btree_iter_next(struct btree_iter *iter, } if (iter->data->k == iter->data->end) - heap_pop(iter, unused, cmp); + heap_pop(iter, b, cmp); else heap_sift(iter, 0, cmp); } diff --git a/drivers/md/bcache/bset.h b/drivers/md/bcache/bset.h index fa506c1aa524..0c24280f3b98 100644 --- a/drivers/md/bcache/bset.h +++ b/drivers/md/bcache/bset.h @@ -531,14 +531,15 @@ int __bch_keylist_realloc(struct keylist *, unsigned); #ifdef CONFIG_BCACHE_DEBUG int __bch_count_data(struct btree_keys *); -void __bch_check_keys(struct btree_keys *, const char *, ...); +void __printf(2, 3) __bch_check_keys(struct btree_keys *, const char *, ...); void bch_dump_bset(struct btree_keys *, struct bset *, unsigned); void bch_dump_bucket(struct btree_keys *); #else static inline int __bch_count_data(struct btree_keys *b) { return -1; } -static inline void __bch_check_keys(struct btree_keys *b, const char *fmt, ...) {} +static inline void __printf(2, 3) + __bch_check_keys(struct btree_keys *b, const char *fmt, ...) {} static inline void bch_dump_bucket(struct btree_keys *b) {} void bch_dump_bset(struct btree_keys *, struct bset *, unsigned); diff --git a/drivers/md/bcache/btree.c b/drivers/md/bcache/btree.c index fad9fe8817eb..17936b2dc7d6 100644 --- a/drivers/md/bcache/btree.c +++ b/drivers/md/bcache/btree.c @@ -665,6 +665,7 @@ static unsigned long bch_mca_scan(struct shrinker *shrink, struct btree *b, *t; unsigned long i, nr = sc->nr_to_scan; unsigned long freed = 0; + unsigned int btree_cache_used; if (c->shrinker_disabled) return SHRINK_STOP; @@ -689,9 +690,10 @@ static unsigned long bch_mca_scan(struct shrinker *shrink, nr = min_t(unsigned long, nr, mca_can_free(c)); i = 0; + btree_cache_used = c->btree_cache_used; list_for_each_entry_safe(b, t, &c->btree_cache_freeable, list) { - if (freed >= nr) - break; + if (nr <= 0) + goto out; if (++i > 3 && !mca_reap(b, 0, false)) { @@ -699,9 +701,10 @@ static unsigned long bch_mca_scan(struct shrinker *shrink, rw_unlock(true, b); freed++; } + nr--; } - for (i = 0; (nr--) && i < c->btree_cache_used; i++) { + for (; (nr--) && i < btree_cache_used; i++) { if (list_empty(&c->btree_cache)) goto out; @@ -719,7 +722,7 @@ static unsigned long bch_mca_scan(struct shrinker *shrink, } out: mutex_unlock(&c->bucket_lock); - return freed; + return freed * c->btree_pages; } static unsigned long bch_mca_count(struct shrinker *shrink, @@ -959,7 +962,7 @@ err: return b; } -/** +/* * bch_btree_node_get - find a btree node in the cache and lock it, reading it * in from disk if necessary. * @@ -1744,6 +1747,7 @@ static void bch_btree_gc(struct cache_set *c) btree_gc_start(c); + /* if CACHE_SET_IO_DISABLE set, gc thread should stop too */ do { ret = btree_root(gc_root, c, &op, &writes, &stats); closure_sync(&writes); @@ -1751,7 +1755,7 @@ static void bch_btree_gc(struct cache_set *c) if (ret && ret != -EAGAIN) pr_warn("gc failed!"); - } while (ret); + } while (ret && !test_bit(CACHE_SET_IO_DISABLE, &c->flags)); bch_btree_gc_finish(c); wake_up_allocators(c); @@ -1789,15 +1793,19 @@ static int bch_gc_thread(void *arg) while (1) { wait_event_interruptible(c->gc_wait, - kthread_should_stop() || gc_should_run(c)); + kthread_should_stop() || + test_bit(CACHE_SET_IO_DISABLE, &c->flags) || + gc_should_run(c)); - if (kthread_should_stop()) + if (kthread_should_stop() || + test_bit(CACHE_SET_IO_DISABLE, &c->flags)) break; set_gc_sectors(c); bch_btree_gc(c); } + wait_for_kthread_stop(); return 0; } @@ -2170,7 +2178,7 @@ int bch_btree_insert_check_key(struct btree *b, struct btree_op *op, if (b->key.ptr[0] != btree_ptr || b->seq != seq + 1) { - op->lock = b->level; + op->lock = b->level; goto out; } } diff --git a/drivers/md/bcache/closure.c b/drivers/md/bcache/closure.c index 7f12920c14f7..0e14969182c6 100644 --- a/drivers/md/bcache/closure.c +++ b/drivers/md/bcache/closure.c @@ -46,7 +46,7 @@ void closure_sub(struct closure *cl, int v) } EXPORT_SYMBOL(closure_sub); -/** +/* * closure_put - decrement a closure's refcount */ void closure_put(struct closure *cl) @@ -55,7 +55,7 @@ void closure_put(struct closure *cl) } EXPORT_SYMBOL(closure_put); -/** +/* * closure_wake_up - wake up all closures on a wait list, without memory barrier */ void __closure_wake_up(struct closure_waitlist *wait_list) @@ -79,9 +79,9 @@ EXPORT_SYMBOL(__closure_wake_up); /** * closure_wait - add a closure to a waitlist - * - * @waitlist will own a ref on @cl, which will be released when + * @waitlist: will own a ref on @cl, which will be released when * closure_wake_up() is called on @waitlist. + * @cl: closure pointer. * */ bool closure_wait(struct closure_waitlist *waitlist, struct closure *cl) @@ -157,7 +157,7 @@ void closure_debug_destroy(struct closure *cl) } EXPORT_SYMBOL(closure_debug_destroy); -static struct dentry *debug; +static struct dentry *closure_debug; static int debug_seq_show(struct seq_file *f, void *data) { @@ -199,11 +199,12 @@ static const struct file_operations debug_ops = { .release = single_release }; -void __init closure_debug_init(void) +int __init closure_debug_init(void) { - debug = debugfs_create_file("closures", 0400, NULL, NULL, &debug_ops); + closure_debug = debugfs_create_file("closures", + 0400, bcache_debug, NULL, &debug_ops); + return IS_ERR_OR_NULL(closure_debug); } - #endif MODULE_AUTHOR("Kent Overstreet <koverstreet@google.com>"); diff --git a/drivers/md/bcache/closure.h b/drivers/md/bcache/closure.h index 3b9dfc9962ad..71427eb5fdae 100644 --- a/drivers/md/bcache/closure.h +++ b/drivers/md/bcache/closure.h @@ -105,6 +105,7 @@ struct closure; struct closure_syncer; typedef void (closure_fn) (struct closure *); +extern struct dentry *bcache_debug; struct closure_waitlist { struct llist_head list; @@ -185,13 +186,13 @@ static inline void closure_sync(struct closure *cl) #ifdef CONFIG_BCACHE_CLOSURES_DEBUG -void closure_debug_init(void); +int closure_debug_init(void); void closure_debug_create(struct closure *cl); void closure_debug_destroy(struct closure *cl); #else -static inline void closure_debug_init(void) {} +static inline int closure_debug_init(void) { return 0; } static inline void closure_debug_create(struct closure *cl) {} static inline void closure_debug_destroy(struct closure *cl) {} diff --git a/drivers/md/bcache/debug.c b/drivers/md/bcache/debug.c index af89408befe8..028f7b386e01 100644 --- a/drivers/md/bcache/debug.c +++ b/drivers/md/bcache/debug.c @@ -17,7 +17,7 @@ #include <linux/random.h> #include <linux/seq_file.h> -static struct dentry *debug; +struct dentry *bcache_debug; #ifdef CONFIG_BCACHE_DEBUG @@ -232,11 +232,11 @@ static const struct file_operations cache_set_debug_ops = { void bch_debug_init_cache_set(struct cache_set *c) { - if (!IS_ERR_OR_NULL(debug)) { + if (!IS_ERR_OR_NULL(bcache_debug)) { char name[50]; snprintf(name, 50, "bcache-%pU", c->sb.set_uuid); - c->debug = debugfs_create_file(name, 0400, debug, c, + c->debug = debugfs_create_file(name, 0400, bcache_debug, c, &cache_set_debug_ops); } } @@ -245,13 +245,13 @@ void bch_debug_init_cache_set(struct cache_set *c) void bch_debug_exit(void) { - if (!IS_ERR_OR_NULL(debug)) - debugfs_remove_recursive(debug); + if (!IS_ERR_OR_NULL(bcache_debug)) + debugfs_remove_recursive(bcache_debug); } int __init bch_debug_init(struct kobject *kobj) { - debug = debugfs_create_dir("bcache", NULL); + bcache_debug = debugfs_create_dir("bcache", NULL); - return IS_ERR_OR_NULL(debug); + return IS_ERR_OR_NULL(bcache_debug); } diff --git a/drivers/md/bcache/extents.c b/drivers/md/bcache/extents.c index f9d391711595..c334e6666461 100644 --- a/drivers/md/bcache/extents.c +++ b/drivers/md/bcache/extents.c @@ -534,7 +534,6 @@ err: static bool bch_extent_bad(struct btree_keys *bk, const struct bkey *k) { struct btree *b = container_of(bk, struct btree, keys); - struct bucket *g; unsigned i, stale; if (!KEY_PTRS(k) || @@ -549,7 +548,6 @@ static bool bch_extent_bad(struct btree_keys *bk, const struct bkey *k) return false; for (i = 0; i < KEY_PTRS(k); i++) { - g = PTR_BUCKET(b->c, k, i); stale = ptr_stale(b->c, k, i); btree_bug_on(stale > 96, b, diff --git a/drivers/md/bcache/io.c b/drivers/md/bcache/io.c index a783c5a41ff1..7fac97ae036e 100644 --- a/drivers/md/bcache/io.c +++ b/drivers/md/bcache/io.c @@ -38,7 +38,7 @@ void __bch_submit_bbio(struct bio *bio, struct cache_set *c) bio_set_dev(bio, PTR_CACHE(c, &b->key, 0)->bdev); b->submit_time_us = local_clock_us(); - closure_bio_submit(bio, bio->bi_private); + closure_bio_submit(c, bio, bio->bi_private); } void bch_submit_bbio(struct bio *bio, struct cache_set *c, @@ -50,6 +50,20 @@ void bch_submit_bbio(struct bio *bio, struct cache_set *c, } /* IO errors */ +void bch_count_backing_io_errors(struct cached_dev *dc, struct bio *bio) +{ + char buf[BDEVNAME_SIZE]; + unsigned errors; + + WARN_ONCE(!dc, "NULL pointer of struct cached_dev"); + + errors = atomic_add_return(1, &dc->io_errors); + if (errors < dc->error_limit) + pr_err("%s: IO error on backing device, unrecoverable", + bio_devname(bio, buf)); + else + bch_cached_dev_error(dc); +} void bch_count_io_errors(struct cache *ca, blk_status_t error, diff --git a/drivers/md/bcache/journal.c b/drivers/md/bcache/journal.c index 1b736b860739..18f1b5239620 100644 --- a/drivers/md/bcache/journal.c +++ b/drivers/md/bcache/journal.c @@ -62,7 +62,7 @@ reread: left = ca->sb.bucket_size - offset; bio_set_op_attrs(bio, REQ_OP_READ, 0); bch_bio_map(bio, data); - closure_bio_submit(bio, &cl); + closure_bio_submit(ca->set, bio, &cl); closure_sync(&cl); /* This function could be simpler now since we no longer write @@ -493,7 +493,7 @@ static void journal_reclaim(struct cache_set *c) struct cache *ca; uint64_t last_seq; unsigned iter, n = 0; - atomic_t p; + atomic_t p __maybe_unused; atomic_long_inc(&c->reclaim); @@ -594,6 +594,7 @@ static void journal_write_done(struct closure *cl) } static void journal_write_unlock(struct closure *cl) + __releases(&c->journal.lock) { struct cache_set *c = container_of(cl, struct cache_set, journal.io); @@ -674,7 +675,7 @@ static void journal_write_unlocked(struct closure *cl) spin_unlock(&c->journal.lock); while ((bio = bio_list_pop(&list))) - closure_bio_submit(bio, cl); + closure_bio_submit(c, bio, cl); continue_at(cl, journal_write_done, NULL); } @@ -705,6 +706,7 @@ static void journal_try_write(struct cache_set *c) static struct journal_write *journal_wait_for_write(struct cache_set *c, unsigned nkeys) + __acquires(&c->journal.lock) { size_t sectors; struct closure cl; diff --git a/drivers/md/bcache/request.c b/drivers/md/bcache/request.c index 6422846b546e..a65e3365eeb9 100644 --- a/drivers/md/bcache/request.c +++ b/drivers/md/bcache/request.c @@ -139,6 +139,7 @@ static void bch_data_invalidate(struct closure *cl) } op->insert_data_done = true; + /* get in bch_data_insert() */ bio_put(bio); out: continue_at(cl, bch_data_insert_keys, op->wq); @@ -295,6 +296,7 @@ err: /** * bch_data_insert - stick some data in the cache + * @cl: closure pointer. * * This is the starting point for any data to end up in a cache device; it could * be from a normal write, or a writeback write, or a write to a flash only @@ -630,6 +632,41 @@ static void request_endio(struct bio *bio) closure_put(cl); } +static void backing_request_endio(struct bio *bio) +{ + struct closure *cl = bio->bi_private; + + if (bio->bi_status) { + struct search *s = container_of(cl, struct search, cl); + struct cached_dev *dc = container_of(s->d, + struct cached_dev, disk); + /* + * If a bio has REQ_PREFLUSH for writeback mode, it is + * speically assembled in cached_dev_write() for a non-zero + * write request which has REQ_PREFLUSH. we don't set + * s->iop.status by this failure, the status will be decided + * by result of bch_data_insert() operation. + */ + if (unlikely(s->iop.writeback && + bio->bi_opf & REQ_PREFLUSH)) { + char buf[BDEVNAME_SIZE]; + + bio_devname(bio, buf); + pr_err("Can't flush %s: returned bi_status %i", + buf, bio->bi_status); + } else { + /* set to orig_bio->bi_status in bio_complete() */ + s->iop.status = bio->bi_status; + } + s->recoverable = false; + /* should count I/O error for backing device here */ + bch_count_backing_io_errors(dc, bio); + } + + bio_put(bio); + closure_put(cl); +} + static void bio_complete(struct search *s) { if (s->orig_bio) { @@ -644,13 +681,21 @@ static void bio_complete(struct search *s) } } -static void do_bio_hook(struct search *s, struct bio *orig_bio) +static void do_bio_hook(struct search *s, + struct bio *orig_bio, + bio_end_io_t *end_io_fn) { struct bio *bio = &s->bio.bio; bio_init(bio, NULL, 0); __bio_clone_fast(bio, orig_bio); - bio->bi_end_io = request_endio; + /* + * bi_end_io can be set separately somewhere else, e.g. the + * variants in, + * - cache_bio->bi_end_io from cached_dev_cache_miss() + * - n->bi_end_io from cache_lookup_fn() + */ + bio->bi_end_io = end_io_fn; bio->bi_private = &s->cl; bio_cnt_set(bio, 3); @@ -676,7 +721,7 @@ static inline struct search *search_alloc(struct bio *bio, s = mempool_alloc(d->c->search, GFP_NOIO); closure_init(&s->cl, NULL); - do_bio_hook(s, bio); + do_bio_hook(s, bio, request_endio); s->orig_bio = bio; s->cache_miss = NULL; @@ -743,11 +788,12 @@ static void cached_dev_read_error(struct closure *cl) trace_bcache_read_retry(s->orig_bio); s->iop.status = 0; - do_bio_hook(s, s->orig_bio); + do_bio_hook(s, s->orig_bio, backing_request_endio); /* XXX: invalidate cache */ - closure_bio_submit(bio, cl); + /* I/O request sent to backing device */ + closure_bio_submit(s->iop.c, bio, cl); } continue_at(cl, cached_dev_cache_miss_done, NULL); @@ -859,7 +905,7 @@ static int cached_dev_cache_miss(struct btree *b, struct search *s, bio_copy_dev(cache_bio, miss); cache_bio->bi_iter.bi_size = s->insert_bio_sectors << 9; - cache_bio->bi_end_io = request_endio; + cache_bio->bi_end_io = backing_request_endio; cache_bio->bi_private = &s->cl; bch_bio_map(cache_bio, NULL); @@ -872,15 +918,17 @@ static int cached_dev_cache_miss(struct btree *b, struct search *s, s->cache_miss = miss; s->iop.bio = cache_bio; bio_get(cache_bio); - closure_bio_submit(cache_bio, &s->cl); + /* I/O request sent to backing device */ + closure_bio_submit(s->iop.c, cache_bio, &s->cl); return ret; out_put: bio_put(cache_bio); out_submit: - miss->bi_end_io = request_endio; + miss->bi_end_io = backing_request_endio; miss->bi_private = &s->cl; - closure_bio_submit(miss, &s->cl); + /* I/O request sent to backing device */ + closure_bio_submit(s->iop.c, miss, &s->cl); return ret; } @@ -943,31 +991,46 @@ static void cached_dev_write(struct cached_dev *dc, struct search *s) s->iop.bio = s->orig_bio; bio_get(s->iop.bio); - if ((bio_op(bio) != REQ_OP_DISCARD) || - blk_queue_discard(bdev_get_queue(dc->bdev))) - closure_bio_submit(bio, cl); + if (bio_op(bio) == REQ_OP_DISCARD && + !blk_queue_discard(bdev_get_queue(dc->bdev))) + goto insert_data; + + /* I/O request sent to backing device */ + bio->bi_end_io = backing_request_endio; + closure_bio_submit(s->iop.c, bio, cl); + } else if (s->iop.writeback) { bch_writeback_add(dc); s->iop.bio = bio; if (bio->bi_opf & REQ_PREFLUSH) { - /* Also need to send a flush to the backing device */ - struct bio *flush = bio_alloc_bioset(GFP_NOIO, 0, - dc->disk.bio_split); - + /* + * Also need to send a flush to the backing + * device. + */ + struct bio *flush; + + flush = bio_alloc_bioset(GFP_NOIO, 0, + dc->disk.bio_split); + if (!flush) { + s->iop.status = BLK_STS_RESOURCE; + goto insert_data; + } bio_copy_dev(flush, bio); - flush->bi_end_io = request_endio; + flush->bi_end_io = backing_request_endio; flush->bi_private = cl; flush->bi_opf = REQ_OP_WRITE | REQ_PREFLUSH; - - closure_bio_submit(flush, cl); + /* I/O request sent to backing device */ + closure_bio_submit(s->iop.c, flush, cl); } } else { s->iop.bio = bio_clone_fast(bio, GFP_NOIO, dc->disk.bio_split); - - closure_bio_submit(bio, cl); + /* I/O request sent to backing device */ + bio->bi_end_io = backing_request_endio; + closure_bio_submit(s->iop.c, bio, cl); } +insert_data: closure_call(&s->iop.cl, bch_data_insert, NULL, cl); continue_at(cl, cached_dev_write_complete, NULL); } @@ -981,11 +1044,67 @@ static void cached_dev_nodata(struct closure *cl) bch_journal_meta(s->iop.c, cl); /* If it's a flush, we send the flush to the backing device too */ - closure_bio_submit(bio, cl); + bio->bi_end_io = backing_request_endio; + closure_bio_submit(s->iop.c, bio, cl); continue_at(cl, cached_dev_bio_complete, NULL); } +struct detached_dev_io_private { + struct bcache_device *d; + unsigned long start_time; + bio_end_io_t *bi_end_io; + void *bi_private; +}; + +static void detached_dev_end_io(struct bio *bio) +{ + struct detached_dev_io_private *ddip; + + ddip = bio->bi_private; + bio->bi_end_io = ddip->bi_end_io; + bio->bi_private = ddip->bi_private; + + generic_end_io_acct(ddip->d->disk->queue, + bio_data_dir(bio), + &ddip->d->disk->part0, ddip->start_time); + + if (bio->bi_status) { + struct cached_dev *dc = container_of(ddip->d, + struct cached_dev, disk); + /* should count I/O error for backing device here */ + bch_count_backing_io_errors(dc, bio); + } + + kfree(ddip); + bio->bi_end_io(bio); +} + +static void detached_dev_do_request(struct bcache_device *d, struct bio *bio) +{ + struct detached_dev_io_private *ddip; + struct cached_dev *dc = container_of(d, struct cached_dev, disk); + + /* + * no need to call closure_get(&dc->disk.cl), + * because upper layer had already opened bcache device, + * which would call closure_get(&dc->disk.cl) + */ + ddip = kzalloc(sizeof(struct detached_dev_io_private), GFP_NOIO); + ddip->d = d; + ddip->start_time = jiffies; + ddip->bi_end_io = bio->bi_end_io; + ddip->bi_private = bio->bi_private; + bio->bi_end_io = detached_dev_end_io; + bio->bi_private = ddip; + + if ((bio_op(bio) == REQ_OP_DISCARD) && + !blk_queue_discard(bdev_get_queue(dc->bdev))) + bio->bi_end_io(bio); + else + generic_make_request(bio); +} + /* Cached devices - read & write stuff */ static blk_qc_t cached_dev_make_request(struct request_queue *q, @@ -996,6 +1115,13 @@ static blk_qc_t cached_dev_make_request(struct request_queue *q, struct cached_dev *dc = container_of(d, struct cached_dev, disk); int rw = bio_data_dir(bio); + if (unlikely((d->c && test_bit(CACHE_SET_IO_DISABLE, &d->c->flags)) || + dc->io_disable)) { + bio->bi_status = BLK_STS_IOERR; + bio_endio(bio); + return BLK_QC_T_NONE; + } + atomic_set(&dc->backing_idle, 0); generic_start_io_acct(q, rw, bio_sectors(bio), &d->disk->part0); @@ -1022,13 +1148,9 @@ static blk_qc_t cached_dev_make_request(struct request_queue *q, else cached_dev_read(dc, s); } - } else { - if ((bio_op(bio) == REQ_OP_DISCARD) && - !blk_queue_discard(bdev_get_queue(dc->bdev))) - bio_endio(bio); - else - generic_make_request(bio); - } + } else + /* I/O request sent to backing device */ + detached_dev_do_request(d, bio); return BLK_QC_T_NONE; } @@ -1112,6 +1234,12 @@ static blk_qc_t flash_dev_make_request(struct request_queue *q, struct bcache_device *d = bio->bi_disk->private_data; int rw = bio_data_dir(bio); + if (unlikely(d->c && test_bit(CACHE_SET_IO_DISABLE, &d->c->flags))) { + bio->bi_status = BLK_STS_IOERR; + bio_endio(bio); + return BLK_QC_T_NONE; + } + generic_start_io_acct(q, rw, bio_sectors(bio), &d->disk->part0); s = search_alloc(bio, d); diff --git a/drivers/md/bcache/super.c b/drivers/md/bcache/super.c index f2273143b3cb..d90d9e59ca00 100644 --- a/drivers/md/bcache/super.c +++ b/drivers/md/bcache/super.c @@ -47,6 +47,14 @@ const char * const bch_cache_modes[] = { NULL }; +/* Default is -1; we skip past it for stop_when_cache_set_failed */ +const char * const bch_stop_on_failure_modes[] = { + "default", + "auto", + "always", + NULL +}; + static struct kobject *bcache_kobj; struct mutex bch_register_lock; LIST_HEAD(bch_cache_sets); @@ -265,6 +273,7 @@ void bch_write_bdev_super(struct cached_dev *dc, struct closure *parent) bio->bi_private = dc; closure_get(cl); + /* I/O request sent to backing device */ __write_super(&dc->sb, bio); closure_return_with_destructor(cl, bch_write_bdev_super_unlock); @@ -521,7 +530,7 @@ static void prio_io(struct cache *ca, uint64_t bucket, int op, bio_set_op_attrs(bio, op, REQ_SYNC|REQ_META|op_flags); bch_bio_map(bio, ca->disk_buckets); - closure_bio_submit(bio, &ca->prio); + closure_bio_submit(ca->set, bio, &ca->prio); closure_sync(cl); } @@ -769,6 +778,8 @@ static int bcache_device_init(struct bcache_device *d, unsigned block_size, sector_t sectors) { struct request_queue *q; + const size_t max_stripes = min_t(size_t, INT_MAX, + SIZE_MAX / sizeof(atomic_t)); size_t n; int idx; @@ -777,9 +788,7 @@ static int bcache_device_init(struct bcache_device *d, unsigned block_size, d->nr_stripes = DIV_ROUND_UP_ULL(sectors, d->stripe_size); - if (!d->nr_stripes || - d->nr_stripes > INT_MAX || - d->nr_stripes > SIZE_MAX / sizeof(atomic_t)) { + if (!d->nr_stripes || d->nr_stripes > max_stripes) { pr_err("nr_stripes too large or invalid: %u (start sector beyond end of disk?)", (unsigned)d->nr_stripes); return -ENOMEM; @@ -833,9 +842,9 @@ static int bcache_device_init(struct bcache_device *d, unsigned block_size, q->limits.io_min = block_size; q->limits.logical_block_size = block_size; q->limits.physical_block_size = block_size; - set_bit(QUEUE_FLAG_NONROT, &d->disk->queue->queue_flags); - clear_bit(QUEUE_FLAG_ADD_RANDOM, &d->disk->queue->queue_flags); - set_bit(QUEUE_FLAG_DISCARD, &d->disk->queue->queue_flags); + blk_queue_flag_set(QUEUE_FLAG_NONROT, d->disk->queue); + blk_queue_flag_clear(QUEUE_FLAG_ADD_RANDOM, d->disk->queue); + blk_queue_flag_set(QUEUE_FLAG_DISCARD, d->disk->queue); blk_queue_write_cache(q, true, true); @@ -899,6 +908,31 @@ void bch_cached_dev_run(struct cached_dev *dc) pr_debug("error creating sysfs link"); } +/* + * If BCACHE_DEV_RATE_DW_RUNNING is set, it means routine of the delayed + * work dc->writeback_rate_update is running. Wait until the routine + * quits (BCACHE_DEV_RATE_DW_RUNNING is clear), then continue to + * cancel it. If BCACHE_DEV_RATE_DW_RUNNING is not clear after time_out + * seconds, give up waiting here and continue to cancel it too. + */ +static void cancel_writeback_rate_update_dwork(struct cached_dev *dc) +{ + int time_out = WRITEBACK_RATE_UPDATE_SECS_MAX * HZ; + + do { + if (!test_bit(BCACHE_DEV_RATE_DW_RUNNING, + &dc->disk.flags)) + break; + time_out--; + schedule_timeout_interruptible(1); + } while (time_out > 0); + + if (time_out == 0) + pr_warn("give up waiting for dc->writeback_write_update to quit"); + + cancel_delayed_work_sync(&dc->writeback_rate_update); +} + static void cached_dev_detach_finish(struct work_struct *w) { struct cached_dev *dc = container_of(w, struct cached_dev, detach); @@ -911,7 +945,9 @@ static void cached_dev_detach_finish(struct work_struct *w) mutex_lock(&bch_register_lock); - cancel_delayed_work_sync(&dc->writeback_rate_update); + if (test_and_clear_bit(BCACHE_DEV_WB_RUNNING, &dc->disk.flags)) + cancel_writeback_rate_update_dwork(dc); + if (!IS_ERR_OR_NULL(dc->writeback_thread)) { kthread_stop(dc->writeback_thread); dc->writeback_thread = NULL; @@ -954,6 +990,7 @@ void bch_cached_dev_detach(struct cached_dev *dc) closure_get(&dc->disk.cl); bch_writeback_queue(dc); + cached_dev_put(dc); } @@ -1065,7 +1102,6 @@ int bch_cached_dev_attach(struct cached_dev *dc, struct cache_set *c, if (BDEV_STATE(&dc->sb) == BDEV_STATE_DIRTY) { bch_sectors_dirty_init(&dc->disk); atomic_set(&dc->has_dirty, 1); - refcount_inc(&dc->count); bch_writeback_queue(dc); } @@ -1093,14 +1129,16 @@ static void cached_dev_free(struct closure *cl) { struct cached_dev *dc = container_of(cl, struct cached_dev, disk.cl); - cancel_delayed_work_sync(&dc->writeback_rate_update); + mutex_lock(&bch_register_lock); + + if (test_and_clear_bit(BCACHE_DEV_WB_RUNNING, &dc->disk.flags)) + cancel_writeback_rate_update_dwork(dc); + if (!IS_ERR_OR_NULL(dc->writeback_thread)) kthread_stop(dc->writeback_thread); if (dc->writeback_write_wq) destroy_workqueue(dc->writeback_write_wq); - mutex_lock(&bch_register_lock); - if (atomic_read(&dc->running)) bd_unlink_disk_holder(dc->bdev, dc->disk.disk); bcache_device_free(&dc->disk); @@ -1170,6 +1208,12 @@ static int cached_dev_init(struct cached_dev *dc, unsigned block_size) max(dc->disk.disk->queue->backing_dev_info->ra_pages, q->backing_dev_info->ra_pages); + atomic_set(&dc->io_errors, 0); + dc->io_disable = false; + dc->error_limit = DEFAULT_CACHED_DEV_ERROR_LIMIT; + /* default to auto */ + dc->stop_when_cache_set_failed = BCH_CACHED_DEV_STOP_AUTO; + bch_cached_dev_request_init(dc); bch_cached_dev_writeback_init(dc); return 0; @@ -1321,6 +1365,24 @@ int bch_flash_dev_create(struct cache_set *c, uint64_t size) return flash_dev_run(c, u); } +bool bch_cached_dev_error(struct cached_dev *dc) +{ + char name[BDEVNAME_SIZE]; + + if (!dc || test_bit(BCACHE_DEV_CLOSING, &dc->disk.flags)) + return false; + + dc->io_disable = true; + /* make others know io_disable is true earlier */ + smp_mb(); + + pr_err("stop %s: too many IO errors on backing device %s\n", + dc->disk.disk->disk_name, bdevname(dc->bdev, name)); + + bcache_device_stop(&dc->disk); + return true; +} + /* Cache set */ __printf(2, 3) @@ -1332,6 +1394,9 @@ bool bch_cache_set_error(struct cache_set *c, const char *fmt, ...) test_bit(CACHE_SET_STOPPING, &c->flags)) return false; + if (test_and_set_bit(CACHE_SET_IO_DISABLE, &c->flags)) + pr_warn("CACHE_SET_IO_DISABLE already set"); + /* XXX: we can be called from atomic context acquire_console_sem(); */ @@ -1443,25 +1508,72 @@ static void cache_set_flush(struct closure *cl) closure_return(cl); } +/* + * This function is only called when CACHE_SET_IO_DISABLE is set, which means + * cache set is unregistering due to too many I/O errors. In this condition, + * the bcache device might be stopped, it depends on stop_when_cache_set_failed + * value and whether the broken cache has dirty data: + * + * dc->stop_when_cache_set_failed dc->has_dirty stop bcache device + * BCH_CACHED_STOP_AUTO 0 NO + * BCH_CACHED_STOP_AUTO 1 YES + * BCH_CACHED_DEV_STOP_ALWAYS 0 YES + * BCH_CACHED_DEV_STOP_ALWAYS 1 YES + * + * The expected behavior is, if stop_when_cache_set_failed is configured to + * "auto" via sysfs interface, the bcache device will not be stopped if the + * backing device is clean on the broken cache device. + */ +static void conditional_stop_bcache_device(struct cache_set *c, + struct bcache_device *d, + struct cached_dev *dc) +{ + if (dc->stop_when_cache_set_failed == BCH_CACHED_DEV_STOP_ALWAYS) { + pr_warn("stop_when_cache_set_failed of %s is \"always\", stop it for failed cache set %pU.", + d->disk->disk_name, c->sb.set_uuid); + bcache_device_stop(d); + } else if (atomic_read(&dc->has_dirty)) { + /* + * dc->stop_when_cache_set_failed == BCH_CACHED_STOP_AUTO + * and dc->has_dirty == 1 + */ + pr_warn("stop_when_cache_set_failed of %s is \"auto\" and cache is dirty, stop it to avoid potential data corruption.", + d->disk->disk_name); + bcache_device_stop(d); + } else { + /* + * dc->stop_when_cache_set_failed == BCH_CACHED_STOP_AUTO + * and dc->has_dirty == 0 + */ + pr_warn("stop_when_cache_set_failed of %s is \"auto\" and cache is clean, keep it alive.", + d->disk->disk_name); + } +} + static void __cache_set_unregister(struct closure *cl) { struct cache_set *c = container_of(cl, struct cache_set, caching); struct cached_dev *dc; + struct bcache_device *d; size_t i; mutex_lock(&bch_register_lock); - for (i = 0; i < c->devices_max_used; i++) - if (c->devices[i]) { - if (!UUID_FLASH_ONLY(&c->uuids[i]) && - test_bit(CACHE_SET_UNREGISTERING, &c->flags)) { - dc = container_of(c->devices[i], - struct cached_dev, disk); - bch_cached_dev_detach(dc); - } else { - bcache_device_stop(c->devices[i]); - } + for (i = 0; i < c->devices_max_used; i++) { + d = c->devices[i]; + if (!d) + continue; + + if (!UUID_FLASH_ONLY(&c->uuids[i]) && + test_bit(CACHE_SET_UNREGISTERING, &c->flags)) { + dc = container_of(d, struct cached_dev, disk); + bch_cached_dev_detach(dc); + if (test_bit(CACHE_SET_IO_DISABLE, &c->flags)) + conditional_stop_bcache_device(c, d, dc); + } else { + bcache_device_stop(d); } + } mutex_unlock(&bch_register_lock); @@ -1567,6 +1679,7 @@ struct cache_set *bch_cache_set_alloc(struct cache_sb *sb) c->congested_read_threshold_us = 2000; c->congested_write_threshold_us = 20000; c->error_limit = DEFAULT_IO_ERROR_LIMIT; + WARN_ON(test_and_clear_bit(CACHE_SET_IO_DISABLE, &c->flags)); return c; err: @@ -2148,7 +2261,6 @@ static int __init bcache_init(void) mutex_init(&bch_register_lock); init_waitqueue_head(&unregister_wait); register_reboot_notifier(&reboot); - closure_debug_init(); bcache_major = register_blkdev(0, "bcache"); if (bcache_major < 0) { @@ -2160,7 +2272,7 @@ static int __init bcache_init(void) if (!(bcache_wq = alloc_workqueue("bcache", WQ_MEM_RECLAIM, 0)) || !(bcache_kobj = kobject_create_and_add("bcache", fs_kobj)) || bch_request_init() || - bch_debug_init(bcache_kobj) || + bch_debug_init(bcache_kobj) || closure_debug_init() || sysfs_create_files(bcache_kobj, files)) goto err; diff --git a/drivers/md/bcache/sysfs.c b/drivers/md/bcache/sysfs.c index 78cd7bd50fdd..dfeef583ee50 100644 --- a/drivers/md/bcache/sysfs.c +++ b/drivers/md/bcache/sysfs.c @@ -78,6 +78,7 @@ rw_attribute(congested_write_threshold_us); rw_attribute(sequential_cutoff); rw_attribute(data_csum); rw_attribute(cache_mode); +rw_attribute(stop_when_cache_set_failed); rw_attribute(writeback_metadata); rw_attribute(writeback_running); rw_attribute(writeback_percent); @@ -95,6 +96,7 @@ read_attribute(partial_stripes_expensive); rw_attribute(synchronous); rw_attribute(journal_delay_ms); +rw_attribute(io_disable); rw_attribute(discard); rw_attribute(running); rw_attribute(label); @@ -125,6 +127,12 @@ SHOW(__bch_cached_dev) bch_cache_modes + 1, BDEV_CACHE_MODE(&dc->sb)); + if (attr == &sysfs_stop_when_cache_set_failed) + return bch_snprint_string_list(buf, PAGE_SIZE, + bch_stop_on_failure_modes + 1, + dc->stop_when_cache_set_failed); + + sysfs_printf(data_csum, "%i", dc->disk.data_csum); var_printf(verify, "%i"); var_printf(bypass_torture_test, "%i"); @@ -133,7 +141,9 @@ SHOW(__bch_cached_dev) var_print(writeback_delay); var_print(writeback_percent); sysfs_hprint(writeback_rate, dc->writeback_rate.rate << 9); - + sysfs_hprint(io_errors, atomic_read(&dc->io_errors)); + sysfs_printf(io_error_limit, "%i", dc->error_limit); + sysfs_printf(io_disable, "%i", dc->io_disable); var_print(writeback_rate_update_seconds); var_print(writeback_rate_i_term_inverse); var_print(writeback_rate_p_term_inverse); @@ -173,7 +183,7 @@ SHOW(__bch_cached_dev) sysfs_hprint(dirty_data, bcache_dev_sectors_dirty(&dc->disk) << 9); - sysfs_hprint(stripe_size, dc->disk.stripe_size << 9); + sysfs_hprint(stripe_size, ((uint64_t)dc->disk.stripe_size) << 9); var_printf(partial_stripes_expensive, "%u"); var_hprint(sequential_cutoff); @@ -224,6 +234,14 @@ STORE(__cached_dev) d_strtoul(writeback_rate_i_term_inverse); d_strtoul_nonzero(writeback_rate_p_term_inverse); + sysfs_strtoul_clamp(io_error_limit, dc->error_limit, 0, INT_MAX); + + if (attr == &sysfs_io_disable) { + int v = strtoul_or_return(buf); + + dc->io_disable = v ? 1 : 0; + } + d_strtoi_h(sequential_cutoff); d_strtoi_h(readahead); @@ -246,6 +264,15 @@ STORE(__cached_dev) } } + if (attr == &sysfs_stop_when_cache_set_failed) { + v = bch_read_string_list(buf, bch_stop_on_failure_modes + 1); + + if (v < 0) + return v; + + dc->stop_when_cache_set_failed = v; + } + if (attr == &sysfs_label) { if (size > SB_LABEL_SIZE) return -EINVAL; @@ -309,7 +336,8 @@ STORE(bch_cached_dev) bch_writeback_queue(dc); if (attr == &sysfs_writeback_percent) - schedule_delayed_work(&dc->writeback_rate_update, + if (!test_and_set_bit(BCACHE_DEV_WB_RUNNING, &dc->disk.flags)) + schedule_delayed_work(&dc->writeback_rate_update, dc->writeback_rate_update_seconds * HZ); mutex_unlock(&bch_register_lock); @@ -324,6 +352,7 @@ static struct attribute *bch_cached_dev_files[] = { &sysfs_data_csum, #endif &sysfs_cache_mode, + &sysfs_stop_when_cache_set_failed, &sysfs_writeback_metadata, &sysfs_writeback_running, &sysfs_writeback_delay, @@ -333,6 +362,9 @@ static struct attribute *bch_cached_dev_files[] = { &sysfs_writeback_rate_i_term_inverse, &sysfs_writeback_rate_p_term_inverse, &sysfs_writeback_rate_debug, + &sysfs_errors, + &sysfs_io_error_limit, + &sysfs_io_disable, &sysfs_dirty_data, &sysfs_stripe_size, &sysfs_partial_stripes_expensive, @@ -590,6 +622,8 @@ SHOW(__bch_cache_set) sysfs_printf(gc_always_rewrite, "%i", c->gc_always_rewrite); sysfs_printf(btree_shrinker_disabled, "%i", c->shrinker_disabled); sysfs_printf(copy_gc_enabled, "%i", c->copy_gc_enabled); + sysfs_printf(io_disable, "%i", + test_bit(CACHE_SET_IO_DISABLE, &c->flags)); if (attr == &sysfs_bset_tree_stats) return bch_bset_print_stats(c, buf); @@ -679,6 +713,20 @@ STORE(__bch_cache_set) if (attr == &sysfs_io_error_halflife) c->error_decay = strtoul_or_return(buf) / 88; + if (attr == &sysfs_io_disable) { + int v = strtoul_or_return(buf); + + if (v) { + if (test_and_set_bit(CACHE_SET_IO_DISABLE, + &c->flags)) + pr_warn("CACHE_SET_IO_DISABLE already set"); + } else { + if (!test_and_clear_bit(CACHE_SET_IO_DISABLE, + &c->flags)) + pr_warn("CACHE_SET_IO_DISABLE already cleared"); + } + } + sysfs_strtoul(journal_delay_ms, c->journal_delay_ms); sysfs_strtoul(verify, c->verify); sysfs_strtoul(key_merging_disabled, c->key_merging_disabled); @@ -764,6 +812,7 @@ static struct attribute *bch_cache_set_internal_files[] = { &sysfs_gc_always_rewrite, &sysfs_btree_shrinker_disabled, &sysfs_copy_gc_enabled, + &sysfs_io_disable, NULL }; KTYPE(bch_cache_set_internal); diff --git a/drivers/md/bcache/util.c b/drivers/md/bcache/util.c index a23cd6a14b74..74febd5230df 100644 --- a/drivers/md/bcache/util.c +++ b/drivers/md/bcache/util.c @@ -32,20 +32,27 @@ int bch_ ## name ## _h(const char *cp, type *res) \ case 'y': \ case 'z': \ u++; \ + /* fall through */ \ case 'e': \ u++; \ + /* fall through */ \ case 'p': \ u++; \ + /* fall through */ \ case 't': \ u++; \ + /* fall through */ \ case 'g': \ u++; \ + /* fall through */ \ case 'm': \ u++; \ + /* fall through */ \ case 'k': \ u++; \ if (e++ == cp) \ return -EINVAL; \ + /* fall through */ \ case '\n': \ case '\0': \ if (*e == '\n') \ @@ -75,10 +82,9 @@ STRTO_H(strtoll, long long) STRTO_H(strtoull, unsigned long long) /** - * bch_hprint() - formats @v to human readable string for sysfs. - * - * @v - signed 64 bit integer - * @buf - the (at least 8 byte) buffer to format the result into. + * bch_hprint - formats @v to human readable string for sysfs. + * @buf: the (at least 8 byte) buffer to format the result into. + * @v: signed 64 bit integer * * Returns the number of bytes used by format. */ @@ -218,13 +224,12 @@ void bch_time_stats_update(struct time_stats *stats, uint64_t start_time) } /** - * bch_next_delay() - increment @d by the amount of work done, and return how - * long to delay until the next time to do some work. - * - * @d - the struct bch_ratelimit to update - * @done - the amount of work done, in arbitrary units + * bch_next_delay() - update ratelimiting statistics and calculate next delay + * @d: the struct bch_ratelimit to update + * @done: the amount of work done, in arbitrary units * - * Returns the amount of time to delay by, in jiffies + * Increment @d by the amount of work done, and return how long to delay in + * jiffies until the next time to do some work. */ uint64_t bch_next_delay(struct bch_ratelimit *d, uint64_t done) { diff --git a/drivers/md/bcache/util.h b/drivers/md/bcache/util.h index a6763db7f061..268024529edd 100644 --- a/drivers/md/bcache/util.h +++ b/drivers/md/bcache/util.h @@ -567,12 +567,6 @@ static inline sector_t bdev_sectors(struct block_device *bdev) return bdev->bd_inode->i_size >> 9; } -#define closure_bio_submit(bio, cl) \ -do { \ - closure_get(cl); \ - generic_make_request(bio); \ -} while (0) - uint64_t bch_crc64_update(uint64_t, const void *, size_t); uint64_t bch_crc64(const void *, size_t); diff --git a/drivers/md/bcache/writeback.c b/drivers/md/bcache/writeback.c index f1d2fc15abcc..4a9547cdcdc5 100644 --- a/drivers/md/bcache/writeback.c +++ b/drivers/md/bcache/writeback.c @@ -114,6 +114,27 @@ static void update_writeback_rate(struct work_struct *work) struct cached_dev *dc = container_of(to_delayed_work(work), struct cached_dev, writeback_rate_update); + struct cache_set *c = dc->disk.c; + + /* + * should check BCACHE_DEV_RATE_DW_RUNNING before calling + * cancel_delayed_work_sync(). + */ + set_bit(BCACHE_DEV_RATE_DW_RUNNING, &dc->disk.flags); + /* paired with where BCACHE_DEV_RATE_DW_RUNNING is tested */ + smp_mb(); + + /* + * CACHE_SET_IO_DISABLE might be set via sysfs interface, + * check it here too. + */ + if (!test_bit(BCACHE_DEV_WB_RUNNING, &dc->disk.flags) || + test_bit(CACHE_SET_IO_DISABLE, &c->flags)) { + clear_bit(BCACHE_DEV_RATE_DW_RUNNING, &dc->disk.flags); + /* paired with where BCACHE_DEV_RATE_DW_RUNNING is tested */ + smp_mb(); + return; + } down_read(&dc->writeback_lock); @@ -123,8 +144,23 @@ static void update_writeback_rate(struct work_struct *work) up_read(&dc->writeback_lock); - schedule_delayed_work(&dc->writeback_rate_update, + /* + * CACHE_SET_IO_DISABLE might be set via sysfs interface, + * check it here too. + */ + if (test_bit(BCACHE_DEV_WB_RUNNING, &dc->disk.flags) && + !test_bit(CACHE_SET_IO_DISABLE, &c->flags)) { + schedule_delayed_work(&dc->writeback_rate_update, dc->writeback_rate_update_seconds * HZ); + } + + /* + * should check BCACHE_DEV_RATE_DW_RUNNING before calling + * cancel_delayed_work_sync(). + */ + clear_bit(BCACHE_DEV_RATE_DW_RUNNING, &dc->disk.flags); + /* paired with where BCACHE_DEV_RATE_DW_RUNNING is tested */ + smp_mb(); } static unsigned writeback_delay(struct cached_dev *dc, unsigned sectors) @@ -253,7 +289,8 @@ static void write_dirty(struct closure *cl) bio_set_dev(&io->bio, io->dc->bdev); io->bio.bi_end_io = dirty_endio; - closure_bio_submit(&io->bio, cl); + /* I/O request sent to backing device */ + closure_bio_submit(io->dc->disk.c, &io->bio, cl); } atomic_set(&dc->writeback_sequence_next, next_sequence); @@ -279,7 +316,7 @@ static void read_dirty_submit(struct closure *cl) { struct dirty_io *io = container_of(cl, struct dirty_io, cl); - closure_bio_submit(&io->bio, cl); + closure_bio_submit(io->dc->disk.c, &io->bio, cl); continue_at(cl, write_dirty, io->dc->writeback_write_wq); } @@ -305,7 +342,9 @@ static void read_dirty(struct cached_dev *dc) next = bch_keybuf_next(&dc->writeback_keys); - while (!kthread_should_stop() && next) { + while (!kthread_should_stop() && + !test_bit(CACHE_SET_IO_DISABLE, &dc->disk.c->flags) && + next) { size = 0; nk = 0; @@ -402,7 +441,9 @@ static void read_dirty(struct cached_dev *dc) } } - while (!kthread_should_stop() && delay) { + while (!kthread_should_stop() && + !test_bit(CACHE_SET_IO_DISABLE, &dc->disk.c->flags) && + delay) { schedule_timeout_interruptible(delay); delay = writeback_delay(dc, 0); } @@ -558,21 +599,30 @@ static bool refill_dirty(struct cached_dev *dc) static int bch_writeback_thread(void *arg) { struct cached_dev *dc = arg; + struct cache_set *c = dc->disk.c; bool searched_full_index; bch_ratelimit_reset(&dc->writeback_rate); - while (!kthread_should_stop()) { + while (!kthread_should_stop() && + !test_bit(CACHE_SET_IO_DISABLE, &c->flags)) { down_write(&dc->writeback_lock); set_current_state(TASK_INTERRUPTIBLE); - if (!atomic_read(&dc->has_dirty) || - (!test_bit(BCACHE_DEV_DETACHING, &dc->disk.flags) && - !dc->writeback_running)) { + /* + * If the bache device is detaching, skip here and continue + * to perform writeback. Otherwise, if no dirty data on cache, + * or there is dirty data on cache but writeback is disabled, + * the writeback thread should sleep here and wait for others + * to wake up it. + */ + if (!test_bit(BCACHE_DEV_DETACHING, &dc->disk.flags) && + (!atomic_read(&dc->has_dirty) || !dc->writeback_running)) { up_write(&dc->writeback_lock); - if (kthread_should_stop()) { + if (kthread_should_stop() || + test_bit(CACHE_SET_IO_DISABLE, &c->flags)) { set_current_state(TASK_RUNNING); - return 0; + break; } schedule(); @@ -585,9 +635,16 @@ static int bch_writeback_thread(void *arg) if (searched_full_index && RB_EMPTY_ROOT(&dc->writeback_keys.keys)) { atomic_set(&dc->has_dirty, 0); - cached_dev_put(dc); SET_BDEV_STATE(&dc->sb, BDEV_STATE_CLEAN); bch_write_bdev_super(dc, NULL); + /* + * If bcache device is detaching via sysfs interface, + * writeback thread should stop after there is no dirty + * data on cache. BCACHE_DEV_DETACHING flag is set in + * bch_cached_dev_detach(). + */ + if (test_bit(BCACHE_DEV_DETACHING, &dc->disk.flags)) + break; } up_write(&dc->writeback_lock); @@ -599,6 +656,7 @@ static int bch_writeback_thread(void *arg) while (delay && !kthread_should_stop() && + !test_bit(CACHE_SET_IO_DISABLE, &c->flags) && !test_bit(BCACHE_DEV_DETACHING, &dc->disk.flags)) delay = schedule_timeout_interruptible(delay); @@ -606,6 +664,9 @@ static int bch_writeback_thread(void *arg) } } + cached_dev_put(dc); + wait_for_kthread_stop(); + return 0; } @@ -659,6 +720,7 @@ void bch_cached_dev_writeback_init(struct cached_dev *dc) dc->writeback_rate_p_term_inverse = 40; dc->writeback_rate_i_term_inverse = 10000; + WARN_ON(test_and_clear_bit(BCACHE_DEV_WB_RUNNING, &dc->disk.flags)); INIT_DELAYED_WORK(&dc->writeback_rate_update, update_writeback_rate); } @@ -669,11 +731,15 @@ int bch_cached_dev_writeback_start(struct cached_dev *dc) if (!dc->writeback_write_wq) return -ENOMEM; + cached_dev_get(dc); dc->writeback_thread = kthread_create(bch_writeback_thread, dc, "bcache_writeback"); - if (IS_ERR(dc->writeback_thread)) + if (IS_ERR(dc->writeback_thread)) { + cached_dev_put(dc); return PTR_ERR(dc->writeback_thread); + } + WARN_ON(test_and_set_bit(BCACHE_DEV_WB_RUNNING, &dc->disk.flags)); schedule_delayed_work(&dc->writeback_rate_update, dc->writeback_rate_update_seconds * HZ); diff --git a/drivers/md/bcache/writeback.h b/drivers/md/bcache/writeback.h index 587b25599856..610fb01de629 100644 --- a/drivers/md/bcache/writeback.h +++ b/drivers/md/bcache/writeback.h @@ -39,7 +39,7 @@ static inline uint64_t bcache_flash_devs_sectors_dirty(struct cache_set *c) if (!d || !UUID_FLASH_ONLY(&c->uuids[i])) continue; - ret += bcache_dev_sectors_dirty(d); + ret += bcache_dev_sectors_dirty(d); } mutex_unlock(&bch_register_lock); @@ -105,8 +105,6 @@ static inline void bch_writeback_add(struct cached_dev *dc) { if (!atomic_read(&dc->has_dirty) && !atomic_xchg(&dc->has_dirty, 1)) { - refcount_inc(&dc->count); - if (BDEV_STATE(&dc->sb) != BDEV_STATE_DIRTY) { SET_BDEV_STATE(&dc->sb, BDEV_STATE_DIRTY); /* XXX: should do this synchronously */ diff --git a/drivers/md/dm-table.c b/drivers/md/dm-table.c index 7eb3e2a3c07d..954f4e3b68ac 100644 --- a/drivers/md/dm-table.c +++ b/drivers/md/dm-table.c @@ -1857,7 +1857,7 @@ void dm_table_set_restrictions(struct dm_table *t, struct request_queue *q, q->limits = *limits; if (!dm_table_supports_discards(t)) { - queue_flag_clear_unlocked(QUEUE_FLAG_DISCARD, q); + blk_queue_flag_clear(QUEUE_FLAG_DISCARD, q); /* Must also clear discard limits... */ q->limits.max_discard_sectors = 0; q->limits.max_hw_discard_sectors = 0; @@ -1865,7 +1865,7 @@ void dm_table_set_restrictions(struct dm_table *t, struct request_queue *q, q->limits.discard_alignment = 0; q->limits.discard_misaligned = 0; } else - queue_flag_set_unlocked(QUEUE_FLAG_DISCARD, q); + blk_queue_flag_set(QUEUE_FLAG_DISCARD, q); if (dm_table_supports_flush(t, (1UL << QUEUE_FLAG_WC))) { wc = true; @@ -1875,15 +1875,15 @@ void dm_table_set_restrictions(struct dm_table *t, struct request_queue *q, blk_queue_write_cache(q, wc, fua); if (dm_table_supports_dax(t)) - queue_flag_set_unlocked(QUEUE_FLAG_DAX, q); + blk_queue_flag_set(QUEUE_FLAG_DAX, q); if (dm_table_supports_dax_write_cache(t)) dax_write_cache(t->md->dax_dev, true); /* Ensure that all underlying devices are non-rotational. */ if (dm_table_all_devices_attribute(t, device_is_nonrot)) - queue_flag_set_unlocked(QUEUE_FLAG_NONROT, q); + blk_queue_flag_set(QUEUE_FLAG_NONROT, q); else - queue_flag_clear_unlocked(QUEUE_FLAG_NONROT, q); + blk_queue_flag_clear(QUEUE_FLAG_NONROT, q); if (!dm_table_supports_write_same(t)) q->limits.max_write_same_sectors = 0; @@ -1891,9 +1891,9 @@ void dm_table_set_restrictions(struct dm_table *t, struct request_queue *q, q->limits.max_write_zeroes_sectors = 0; if (dm_table_all_devices_attribute(t, queue_supports_sg_merge)) - queue_flag_clear_unlocked(QUEUE_FLAG_NO_SG_MERGE, q); + blk_queue_flag_clear(QUEUE_FLAG_NO_SG_MERGE, q); else - queue_flag_set_unlocked(QUEUE_FLAG_NO_SG_MERGE, q); + blk_queue_flag_set(QUEUE_FLAG_NO_SG_MERGE, q); dm_table_verify_integrity(t); @@ -1904,7 +1904,7 @@ void dm_table_set_restrictions(struct dm_table *t, struct request_queue *q, * have it set. */ if (blk_queue_add_random(q) && dm_table_all_devices_attribute(t, device_is_not_random)) - queue_flag_clear_unlocked(QUEUE_FLAG_ADD_RANDOM, q); + blk_queue_flag_clear(QUEUE_FLAG_ADD_RANDOM, q); } unsigned int dm_table_get_num_targets(struct dm_table *t) diff --git a/drivers/md/dm.c b/drivers/md/dm.c index 353ea0ede091..ded74e1eb0d1 100644 --- a/drivers/md/dm.c +++ b/drivers/md/dm.c @@ -1848,7 +1848,7 @@ static struct mapped_device *alloc_dev(int minor) INIT_LIST_HEAD(&md->table_devices); spin_lock_init(&md->uevent_lock); - md->queue = blk_alloc_queue_node(GFP_KERNEL, numa_node_id); + md->queue = blk_alloc_queue_node(GFP_KERNEL, numa_node_id, NULL); if (!md->queue) goto bad; md->queue->queuedata = md; diff --git a/drivers/md/md-linear.c b/drivers/md/md-linear.c index 773fc70dced7..4964323d936b 100644 --- a/drivers/md/md-linear.c +++ b/drivers/md/md-linear.c @@ -138,9 +138,9 @@ static struct linear_conf *linear_conf(struct mddev *mddev, int raid_disks) } if (!discard_supported) - queue_flag_clear_unlocked(QUEUE_FLAG_DISCARD, mddev->queue); + blk_queue_flag_clear(QUEUE_FLAG_DISCARD, mddev->queue); else - queue_flag_set_unlocked(QUEUE_FLAG_DISCARD, mddev->queue); + blk_queue_flag_set(QUEUE_FLAG_DISCARD, mddev->queue); /* * Here we calculate the device offsets. diff --git a/drivers/md/md.c b/drivers/md/md.c index 254e44e44668..3bea45e8ccff 100644 --- a/drivers/md/md.c +++ b/drivers/md/md.c @@ -5206,12 +5206,12 @@ static void md_free(struct kobject *ko) if (mddev->sysfs_state) sysfs_put(mddev->sysfs_state); + if (mddev->gendisk) + del_gendisk(mddev->gendisk); if (mddev->queue) blk_cleanup_queue(mddev->queue); - if (mddev->gendisk) { - del_gendisk(mddev->gendisk); + if (mddev->gendisk) put_disk(mddev->gendisk); - } percpu_ref_exit(&mddev->writes_pending); kfree(mddev); @@ -5619,9 +5619,9 @@ int md_run(struct mddev *mddev) if (mddev->degraded) nonrot = false; if (nonrot) - queue_flag_set_unlocked(QUEUE_FLAG_NONROT, mddev->queue); + blk_queue_flag_set(QUEUE_FLAG_NONROT, mddev->queue); else - queue_flag_clear_unlocked(QUEUE_FLAG_NONROT, mddev->queue); + blk_queue_flag_clear(QUEUE_FLAG_NONROT, mddev->queue); mddev->queue->backing_dev_info->congested_data = mddev; mddev->queue->backing_dev_info->congested_fn = md_congested; } diff --git a/drivers/md/raid0.c b/drivers/md/raid0.c index 5ecba9eef441..584c10347267 100644 --- a/drivers/md/raid0.c +++ b/drivers/md/raid0.c @@ -399,9 +399,9 @@ static int raid0_run(struct mddev *mddev) discard_supported = true; } if (!discard_supported) - queue_flag_clear_unlocked(QUEUE_FLAG_DISCARD, mddev->queue); + blk_queue_flag_clear(QUEUE_FLAG_DISCARD, mddev->queue); else - queue_flag_set_unlocked(QUEUE_FLAG_DISCARD, mddev->queue); + blk_queue_flag_set(QUEUE_FLAG_DISCARD, mddev->queue); } /* calculate array device size */ diff --git a/drivers/md/raid1.c b/drivers/md/raid1.c index fe872dc6712e..e2943fb74056 100644 --- a/drivers/md/raid1.c +++ b/drivers/md/raid1.c @@ -1760,7 +1760,7 @@ static int raid1_add_disk(struct mddev *mddev, struct md_rdev *rdev) } } if (mddev->queue && blk_queue_discard(bdev_get_queue(rdev->bdev))) - queue_flag_set_unlocked(QUEUE_FLAG_DISCARD, mddev->queue); + blk_queue_flag_set(QUEUE_FLAG_DISCARD, mddev->queue); print_conf(conf); return err; } @@ -3110,10 +3110,10 @@ static int raid1_run(struct mddev *mddev) if (mddev->queue) { if (discard_supported) - queue_flag_set_unlocked(QUEUE_FLAG_DISCARD, + blk_queue_flag_set(QUEUE_FLAG_DISCARD, mddev->queue); else - queue_flag_clear_unlocked(QUEUE_FLAG_DISCARD, + blk_queue_flag_clear(QUEUE_FLAG_DISCARD, mddev->queue); } diff --git a/drivers/md/raid10.c b/drivers/md/raid10.c index c5e6c60fc0d4..3c60774c8430 100644 --- a/drivers/md/raid10.c +++ b/drivers/md/raid10.c @@ -1845,7 +1845,7 @@ static int raid10_add_disk(struct mddev *mddev, struct md_rdev *rdev) break; } if (mddev->queue && blk_queue_discard(bdev_get_queue(rdev->bdev))) - queue_flag_set_unlocked(QUEUE_FLAG_DISCARD, mddev->queue); + blk_queue_flag_set(QUEUE_FLAG_DISCARD, mddev->queue); print_conf(conf); return err; @@ -3846,10 +3846,10 @@ static int raid10_run(struct mddev *mddev) if (mddev->queue) { if (discard_supported) - queue_flag_set_unlocked(QUEUE_FLAG_DISCARD, + blk_queue_flag_set(QUEUE_FLAG_DISCARD, mddev->queue); else - queue_flag_clear_unlocked(QUEUE_FLAG_DISCARD, + blk_queue_flag_clear(QUEUE_FLAG_DISCARD, mddev->queue); } /* need to check that every block has at least one working mirror */ diff --git a/drivers/md/raid5.c b/drivers/md/raid5.c index b5d2601483e3..be117d0a65a8 100644 --- a/drivers/md/raid5.c +++ b/drivers/md/raid5.c @@ -7443,10 +7443,10 @@ static int raid5_run(struct mddev *mddev) if (devices_handle_discard_safely && mddev->queue->limits.max_discard_sectors >= (stripe >> 9) && mddev->queue->limits.discard_granularity >= stripe) - queue_flag_set_unlocked(QUEUE_FLAG_DISCARD, + blk_queue_flag_set(QUEUE_FLAG_DISCARD, mddev->queue); else - queue_flag_clear_unlocked(QUEUE_FLAG_DISCARD, + blk_queue_flag_clear(QUEUE_FLAG_DISCARD, mddev->queue); blk_queue_max_hw_sectors(mddev->queue, UINT_MAX); diff --git a/drivers/misc/cardreader/rtsx_pcr.c b/drivers/misc/cardreader/rtsx_pcr.c index fd09b0960097..e8f1d4bb806a 100644 --- a/drivers/misc/cardreader/rtsx_pcr.c +++ b/drivers/misc/cardreader/rtsx_pcr.c @@ -444,12 +444,12 @@ static void rtsx_pci_add_sg_tbl(struct rtsx_pcr *pcr, { u64 *ptr = (u64 *)(pcr->host_sg_tbl_ptr) + pcr->sgi; u64 val; - u8 option = SG_VALID | SG_TRANS_DATA; + u8 option = RTSX_SG_VALID | RTSX_SG_TRANS_DATA; pcr_dbg(pcr, "DMA addr: 0x%x, Len: 0x%x\n", (unsigned int)addr, len); if (end) - option |= SG_END; + option |= RTSX_SG_END; val = ((u64)addr << 32) | ((u64)len << 12) | option; put_unaligned_le64(val, ptr); diff --git a/drivers/mmc/core/block.c b/drivers/mmc/core/block.c index a2b9c2500c4c..02485e310c81 100644 --- a/drivers/mmc/core/block.c +++ b/drivers/mmc/core/block.c @@ -2659,7 +2659,6 @@ static void mmc_blk_remove_req(struct mmc_blk_data *md) * from being accepted. */ card = md->queue.card; - mmc_cleanup_queue(&md->queue); if (md->disk->flags & GENHD_FL_UP) { device_remove_file(disk_to_dev(md->disk), &md->force_ro); if ((md->area_type & MMC_BLK_DATA_AREA_BOOT) && @@ -2669,6 +2668,7 @@ static void mmc_blk_remove_req(struct mmc_blk_data *md) del_gendisk(md->disk); } + mmc_cleanup_queue(&md->queue); mmc_blk_put(md); } } diff --git a/drivers/mmc/core/queue.c b/drivers/mmc/core/queue.c index 421fab7250ac..56e9a803db21 100644 --- a/drivers/mmc/core/queue.c +++ b/drivers/mmc/core/queue.c @@ -185,14 +185,14 @@ static void mmc_queue_setup_discard(struct request_queue *q, if (!max_discard) return; - queue_flag_set_unlocked(QUEUE_FLAG_DISCARD, q); + blk_queue_flag_set(QUEUE_FLAG_DISCARD, q); blk_queue_max_discard_sectors(q, max_discard); q->limits.discard_granularity = card->pref_erase << 9; /* granularity must not be greater than max. discard */ if (card->pref_erase > max_discard) q->limits.discard_granularity = 0; if (mmc_can_secure_erase_trim(card)) - queue_flag_set_unlocked(QUEUE_FLAG_SECERASE, q); + blk_queue_flag_set(QUEUE_FLAG_SECERASE, q); } /** @@ -356,8 +356,8 @@ static void mmc_setup_queue(struct mmc_queue *mq, struct mmc_card *card) if (mmc_dev(host)->dma_mask && *mmc_dev(host)->dma_mask) limit = (u64)dma_max_pfn(mmc_dev(host)) << PAGE_SHIFT; - queue_flag_set_unlocked(QUEUE_FLAG_NONROT, mq->queue); - queue_flag_clear_unlocked(QUEUE_FLAG_ADD_RANDOM, mq->queue); + blk_queue_flag_set(QUEUE_FLAG_NONROT, mq->queue); + blk_queue_flag_clear(QUEUE_FLAG_ADD_RANDOM, mq->queue); if (mmc_can_erase(card)) mmc_queue_setup_discard(mq->queue, card); diff --git a/drivers/mtd/mtd_blkdevs.c b/drivers/mtd/mtd_blkdevs.c index 9ec8f033ac5f..16ae4ae8e8f9 100644 --- a/drivers/mtd/mtd_blkdevs.c +++ b/drivers/mtd/mtd_blkdevs.c @@ -419,11 +419,11 @@ int add_mtd_blktrans_dev(struct mtd_blktrans_dev *new) blk_queue_logical_block_size(new->rq, tr->blksize); blk_queue_bounce_limit(new->rq, BLK_BOUNCE_HIGH); - queue_flag_set_unlocked(QUEUE_FLAG_NONROT, new->rq); - queue_flag_clear_unlocked(QUEUE_FLAG_ADD_RANDOM, new->rq); + blk_queue_flag_set(QUEUE_FLAG_NONROT, new->rq); + blk_queue_flag_clear(QUEUE_FLAG_ADD_RANDOM, new->rq); if (tr->discard) { - queue_flag_set_unlocked(QUEUE_FLAG_DISCARD, new->rq); + blk_queue_flag_set(QUEUE_FLAG_DISCARD, new->rq); blk_queue_max_discard_sectors(new->rq, UINT_MAX); } diff --git a/drivers/nvdimm/blk.c b/drivers/nvdimm/blk.c index 1bd7b3734751..62e9cb167aad 100644 --- a/drivers/nvdimm/blk.c +++ b/drivers/nvdimm/blk.c @@ -266,7 +266,7 @@ static int nsblk_attach_disk(struct nd_namespace_blk *nsblk) blk_queue_make_request(q, nd_blk_make_request); blk_queue_max_hw_sectors(q, UINT_MAX); blk_queue_logical_block_size(q, nsblk_sector_size(nsblk)); - queue_flag_set_unlocked(QUEUE_FLAG_NONROT, q); + blk_queue_flag_set(QUEUE_FLAG_NONROT, q); q->queuedata = nsblk; disk = alloc_disk(0); diff --git a/drivers/nvdimm/btt.c b/drivers/nvdimm/btt.c index 4b95ac513de2..85de8053aa34 100644 --- a/drivers/nvdimm/btt.c +++ b/drivers/nvdimm/btt.c @@ -1542,7 +1542,7 @@ static int btt_blk_init(struct btt *btt) blk_queue_make_request(btt->btt_queue, btt_make_request); blk_queue_logical_block_size(btt->btt_queue, btt->sector_size); blk_queue_max_hw_sectors(btt->btt_queue, UINT_MAX); - queue_flag_set_unlocked(QUEUE_FLAG_NONROT, btt->btt_queue); + blk_queue_flag_set(QUEUE_FLAG_NONROT, btt->btt_queue); btt->btt_queue->queuedata = btt; if (btt_meta_size(btt)) { diff --git a/drivers/nvdimm/nd.h b/drivers/nvdimm/nd.h index 8d6375ee0fda..184e070d50a2 100644 --- a/drivers/nvdimm/nd.h +++ b/drivers/nvdimm/nd.h @@ -29,7 +29,6 @@ enum { * BTT instance */ ND_MAX_LANES = 256, - SECTOR_SHIFT = 9, INT_LBASIZE_ALIGNMENT = 64, NVDIMM_IO_ATOMIC = 1, }; diff --git a/drivers/nvdimm/pmem.c b/drivers/nvdimm/pmem.c index 06f8dcc52ca6..5a96d30c294a 100644 --- a/drivers/nvdimm/pmem.c +++ b/drivers/nvdimm/pmem.c @@ -343,7 +343,7 @@ static int pmem_attach_disk(struct device *dev, return -EBUSY; } - q = blk_alloc_queue_node(GFP_KERNEL, dev_to_node(dev)); + q = blk_alloc_queue_node(GFP_KERNEL, dev_to_node(dev), NULL); if (!q) return -ENOMEM; @@ -387,8 +387,8 @@ static int pmem_attach_disk(struct device *dev, blk_queue_physical_block_size(q, PAGE_SIZE); blk_queue_logical_block_size(q, pmem_sector_size(ndns)); blk_queue_max_hw_sectors(q, UINT_MAX); - queue_flag_set_unlocked(QUEUE_FLAG_NONROT, q); - queue_flag_set_unlocked(QUEUE_FLAG_DAX, q); + blk_queue_flag_set(QUEUE_FLAG_NONROT, q); + blk_queue_flag_set(QUEUE_FLAG_DAX, q); q->queuedata = pmem; disk = alloc_disk_node(0, nid); diff --git a/drivers/nvme/host/Makefile b/drivers/nvme/host/Makefile index 441e67e3a9d7..aea459c65ae1 100644 --- a/drivers/nvme/host/Makefile +++ b/drivers/nvme/host/Makefile @@ -12,6 +12,7 @@ nvme-core-y := core.o nvme-core-$(CONFIG_TRACING) += trace.o nvme-core-$(CONFIG_NVME_MULTIPATH) += multipath.o nvme-core-$(CONFIG_NVM) += lightnvm.o +nvme-core-$(CONFIG_FAULT_INJECTION_DEBUG_FS) += fault_inject.o nvme-y += pci.o diff --git a/drivers/nvme/host/core.c b/drivers/nvme/host/core.c index 7aeca5db7916..197a6ba9700f 100644 --- a/drivers/nvme/host/core.c +++ b/drivers/nvme/host/core.c @@ -100,11 +100,6 @@ static struct class *nvme_subsys_class; static void nvme_ns_remove(struct nvme_ns *ns); static int nvme_revalidate_disk(struct gendisk *disk); -static __le32 nvme_get_log_dw10(u8 lid, size_t size) -{ - return cpu_to_le32((((size / 4) - 1) << 16) | lid); -} - int nvme_reset_ctrl(struct nvme_ctrl *ctrl) { if (!nvme_change_ctrl_state(ctrl, NVME_CTRL_RESETTING)) @@ -135,6 +130,9 @@ static void nvme_delete_ctrl_work(struct work_struct *work) struct nvme_ctrl *ctrl = container_of(work, struct nvme_ctrl, delete_work); + dev_info(ctrl->device, + "Removing ctrl: NQN \"%s\"\n", ctrl->opts->subsysnqn); + flush_work(&ctrl->reset_work); nvme_stop_ctrl(ctrl); nvme_remove_namespaces(ctrl); @@ -948,7 +946,8 @@ static int nvme_identify_ns_list(struct nvme_ctrl *dev, unsigned nsid, __le32 *n c.identify.opcode = nvme_admin_identify; c.identify.cns = NVME_ID_CNS_NS_ACTIVE_LIST; c.identify.nsid = cpu_to_le32(nsid); - return nvme_submit_sync_cmd(dev->admin_q, &c, ns_list, 0x1000); + return nvme_submit_sync_cmd(dev->admin_q, &c, ns_list, + NVME_IDENTIFY_DATA_SIZE); } static struct nvme_id_ns *nvme_identify_ns(struct nvme_ctrl *ctrl, @@ -1124,13 +1123,13 @@ static void nvme_update_formats(struct nvme_ctrl *ctrl) struct nvme_ns *ns, *next; LIST_HEAD(rm_list); - mutex_lock(&ctrl->namespaces_mutex); + down_write(&ctrl->namespaces_rwsem); list_for_each_entry(ns, &ctrl->namespaces, list) { if (ns->disk && nvme_revalidate_disk(ns->disk)) { list_move_tail(&ns->list, &rm_list); } } - mutex_unlock(&ctrl->namespaces_mutex); + up_write(&ctrl->namespaces_rwsem); list_for_each_entry_safe(ns, next, &rm_list, list) nvme_ns_remove(ns); @@ -1358,7 +1357,7 @@ static void nvme_config_discard(struct nvme_ctrl *ctrl, blk_queue_max_discard_sectors(queue, UINT_MAX); blk_queue_max_discard_segments(queue, NVME_DSM_MAX_RANGES); - queue_flag_set_unlocked(QUEUE_FLAG_DISCARD, queue); + blk_queue_flag_set(QUEUE_FLAG_DISCARD, queue); if (ctrl->quirks & NVME_QUIRK_DEALLOCATE_ZEROES) blk_queue_max_write_zeroes_sectors(queue, UINT_MAX); @@ -1449,6 +1448,8 @@ static void __nvme_revalidate_disk(struct gendisk *disk, struct nvme_id_ns *id) if (ns->noiob) nvme_set_chunk_size(ns); nvme_update_disk_info(disk, ns, id); + if (ns->ndev) + nvme_nvm_update_nvm_info(ns); #ifdef CONFIG_NVME_MULTIPATH if (ns->head->disk) nvme_update_disk_info(ns->head->disk, ns, id); @@ -2217,18 +2218,35 @@ out_unlock: return ret; } -static int nvme_get_log(struct nvme_ctrl *ctrl, u8 log_page, void *log, - size_t size) +int nvme_get_log_ext(struct nvme_ctrl *ctrl, struct nvme_ns *ns, + u8 log_page, void *log, + size_t size, size_t offset) { struct nvme_command c = { }; + unsigned long dwlen = size / 4 - 1; + + c.get_log_page.opcode = nvme_admin_get_log_page; + + if (ns) + c.get_log_page.nsid = cpu_to_le32(ns->head->ns_id); + else + c.get_log_page.nsid = cpu_to_le32(NVME_NSID_ALL); - c.common.opcode = nvme_admin_get_log_page; - c.common.nsid = cpu_to_le32(NVME_NSID_ALL); - c.common.cdw10[0] = nvme_get_log_dw10(log_page, size); + c.get_log_page.lid = log_page; + c.get_log_page.numdl = cpu_to_le16(dwlen & ((1 << 16) - 1)); + c.get_log_page.numdu = cpu_to_le16(dwlen >> 16); + c.get_log_page.lpol = cpu_to_le32(offset & ((1ULL << 32) - 1)); + c.get_log_page.lpou = cpu_to_le32(offset >> 32ULL); return nvme_submit_sync_cmd(ctrl->admin_q, &c, log, size); } +static int nvme_get_log(struct nvme_ctrl *ctrl, u8 log_page, void *log, + size_t size) +{ + return nvme_get_log_ext(ctrl, NULL, log_page, log, size, 0); +} + static int nvme_get_effects_log(struct nvme_ctrl *ctrl) { int ret; @@ -2440,7 +2458,7 @@ static int nvme_dev_user_cmd(struct nvme_ctrl *ctrl, void __user *argp) struct nvme_ns *ns; int ret; - mutex_lock(&ctrl->namespaces_mutex); + down_read(&ctrl->namespaces_rwsem); if (list_empty(&ctrl->namespaces)) { ret = -ENOTTY; goto out_unlock; @@ -2457,14 +2475,14 @@ static int nvme_dev_user_cmd(struct nvme_ctrl *ctrl, void __user *argp) dev_warn(ctrl->device, "using deprecated NVME_IOCTL_IO_CMD ioctl on the char device!\n"); kref_get(&ns->kref); - mutex_unlock(&ctrl->namespaces_mutex); + up_read(&ctrl->namespaces_rwsem); ret = nvme_user_cmd(ctrl, ns, argp); nvme_put_ns(ns); return ret; out_unlock: - mutex_unlock(&ctrl->namespaces_mutex); + up_read(&ctrl->namespaces_rwsem); return ret; } @@ -2793,6 +2811,7 @@ static int __nvme_check_ids(struct nvme_subsystem *subsys, list_for_each_entry(h, &subsys->nsheads, entry) { if (nvme_ns_ids_valid(&new->ids) && + !list_empty(&h->list) && nvme_ns_ids_equal(&new->ids, &h->ids)) return -EINVAL; } @@ -2893,7 +2912,7 @@ static struct nvme_ns *nvme_find_get_ns(struct nvme_ctrl *ctrl, unsigned nsid) { struct nvme_ns *ns, *ret = NULL; - mutex_lock(&ctrl->namespaces_mutex); + down_read(&ctrl->namespaces_rwsem); list_for_each_entry(ns, &ctrl->namespaces, list) { if (ns->head->ns_id == nsid) { if (!kref_get_unless_zero(&ns->kref)) @@ -2904,7 +2923,7 @@ static struct nvme_ns *nvme_find_get_ns(struct nvme_ctrl *ctrl, unsigned nsid) if (ns->head->ns_id > nsid) break; } - mutex_unlock(&ctrl->namespaces_mutex); + up_read(&ctrl->namespaces_rwsem); return ret; } @@ -2949,7 +2968,7 @@ static void nvme_alloc_ns(struct nvme_ctrl *ctrl, unsigned nsid) ns->queue = blk_mq_init_queue(ctrl->tagset); if (IS_ERR(ns->queue)) goto out_free_ns; - queue_flag_set_unlocked(QUEUE_FLAG_NONROT, ns->queue); + blk_queue_flag_set(QUEUE_FLAG_NONROT, ns->queue); ns->queue->queuedata = ns; ns->ctrl = ctrl; @@ -3015,9 +3034,9 @@ static void nvme_alloc_ns(struct nvme_ctrl *ctrl, unsigned nsid) __nvme_revalidate_disk(disk, id); - mutex_lock(&ctrl->namespaces_mutex); + down_write(&ctrl->namespaces_rwsem); list_add_tail(&ns->list, &ctrl->namespaces); - mutex_unlock(&ctrl->namespaces_mutex); + up_write(&ctrl->namespaces_rwsem); nvme_get_ctrl(ctrl); @@ -3033,6 +3052,7 @@ static void nvme_alloc_ns(struct nvme_ctrl *ctrl, unsigned nsid) ns->disk->disk_name); nvme_mpath_add_disk(ns->head); + nvme_fault_inject_init(ns); return; out_unlink_ns: mutex_lock(&ctrl->subsys->lock); @@ -3051,6 +3071,7 @@ static void nvme_ns_remove(struct nvme_ns *ns) if (test_and_set_bit(NVME_NS_REMOVING, &ns->flags)) return; + nvme_fault_inject_fini(ns); if (ns->disk && ns->disk->flags & GENHD_FL_UP) { sysfs_remove_group(&disk_to_dev(ns->disk)->kobj, &nvme_ns_id_attr_group); @@ -3067,9 +3088,9 @@ static void nvme_ns_remove(struct nvme_ns *ns) list_del_rcu(&ns->siblings); mutex_unlock(&ns->ctrl->subsys->lock); - mutex_lock(&ns->ctrl->namespaces_mutex); + down_write(&ns->ctrl->namespaces_rwsem); list_del_init(&ns->list); - mutex_unlock(&ns->ctrl->namespaces_mutex); + up_write(&ns->ctrl->namespaces_rwsem); synchronize_srcu(&ns->head->srcu); nvme_mpath_check_last_path(ns); @@ -3093,11 +3114,18 @@ static void nvme_remove_invalid_namespaces(struct nvme_ctrl *ctrl, unsigned nsid) { struct nvme_ns *ns, *next; + LIST_HEAD(rm_list); + down_write(&ctrl->namespaces_rwsem); list_for_each_entry_safe(ns, next, &ctrl->namespaces, list) { if (ns->head->ns_id > nsid) - nvme_ns_remove(ns); + list_move_tail(&ns->list, &rm_list); } + up_write(&ctrl->namespaces_rwsem); + + list_for_each_entry_safe(ns, next, &rm_list, list) + nvme_ns_remove(ns); + } static int nvme_scan_ns_list(struct nvme_ctrl *ctrl, unsigned nn) @@ -3107,7 +3135,7 @@ static int nvme_scan_ns_list(struct nvme_ctrl *ctrl, unsigned nn) unsigned i, j, nsid, prev = 0, num_lists = DIV_ROUND_UP(nn, 1024); int ret = 0; - ns_list = kzalloc(0x1000, GFP_KERNEL); + ns_list = kzalloc(NVME_IDENTIFY_DATA_SIZE, GFP_KERNEL); if (!ns_list) return -ENOMEM; @@ -3173,9 +3201,9 @@ static void nvme_scan_work(struct work_struct *work) } nvme_scan_ns_sequential(ctrl, nn); done: - mutex_lock(&ctrl->namespaces_mutex); + down_write(&ctrl->namespaces_rwsem); list_sort(NULL, &ctrl->namespaces, ns_cmp); - mutex_unlock(&ctrl->namespaces_mutex); + up_write(&ctrl->namespaces_rwsem); kfree(id); } @@ -3197,6 +3225,7 @@ EXPORT_SYMBOL_GPL(nvme_queue_scan); void nvme_remove_namespaces(struct nvme_ctrl *ctrl) { struct nvme_ns *ns, *next; + LIST_HEAD(ns_list); /* * The dead states indicates the controller was not gracefully @@ -3207,7 +3236,11 @@ void nvme_remove_namespaces(struct nvme_ctrl *ctrl) if (ctrl->state == NVME_CTRL_DEAD) nvme_kill_queues(ctrl); - list_for_each_entry_safe(ns, next, &ctrl->namespaces, list) + down_write(&ctrl->namespaces_rwsem); + list_splice_init(&ctrl->namespaces, &ns_list); + up_write(&ctrl->namespaces_rwsem); + + list_for_each_entry_safe(ns, next, &ns_list, list) nvme_ns_remove(ns); } EXPORT_SYMBOL_GPL(nvme_remove_namespaces); @@ -3337,6 +3370,8 @@ void nvme_stop_ctrl(struct nvme_ctrl *ctrl) flush_work(&ctrl->async_event_work); flush_work(&ctrl->scan_work); cancel_work_sync(&ctrl->fw_act_work); + if (ctrl->ops->stop_ctrl) + ctrl->ops->stop_ctrl(ctrl); } EXPORT_SYMBOL_GPL(nvme_stop_ctrl); @@ -3394,7 +3429,7 @@ int nvme_init_ctrl(struct nvme_ctrl *ctrl, struct device *dev, ctrl->state = NVME_CTRL_NEW; spin_lock_init(&ctrl->lock); INIT_LIST_HEAD(&ctrl->namespaces); - mutex_init(&ctrl->namespaces_mutex); + init_rwsem(&ctrl->namespaces_rwsem); ctrl->dev = dev; ctrl->ops = ops; ctrl->quirks = quirks; @@ -3455,7 +3490,7 @@ void nvme_kill_queues(struct nvme_ctrl *ctrl) { struct nvme_ns *ns; - mutex_lock(&ctrl->namespaces_mutex); + down_read(&ctrl->namespaces_rwsem); /* Forcibly unquiesce queues to avoid blocking dispatch */ if (ctrl->admin_q) @@ -3474,7 +3509,7 @@ void nvme_kill_queues(struct nvme_ctrl *ctrl) /* Forcibly unquiesce queues to avoid blocking dispatch */ blk_mq_unquiesce_queue(ns->queue); } - mutex_unlock(&ctrl->namespaces_mutex); + up_read(&ctrl->namespaces_rwsem); } EXPORT_SYMBOL_GPL(nvme_kill_queues); @@ -3482,10 +3517,10 @@ void nvme_unfreeze(struct nvme_ctrl *ctrl) { struct nvme_ns *ns; - mutex_lock(&ctrl->namespaces_mutex); + down_read(&ctrl->namespaces_rwsem); list_for_each_entry(ns, &ctrl->namespaces, list) blk_mq_unfreeze_queue(ns->queue); - mutex_unlock(&ctrl->namespaces_mutex); + up_read(&ctrl->namespaces_rwsem); } EXPORT_SYMBOL_GPL(nvme_unfreeze); @@ -3493,13 +3528,13 @@ void nvme_wait_freeze_timeout(struct nvme_ctrl *ctrl, long timeout) { struct nvme_ns *ns; - mutex_lock(&ctrl->namespaces_mutex); + down_read(&ctrl->namespaces_rwsem); list_for_each_entry(ns, &ctrl->namespaces, list) { timeout = blk_mq_freeze_queue_wait_timeout(ns->queue, timeout); if (timeout <= 0) break; } - mutex_unlock(&ctrl->namespaces_mutex); + up_read(&ctrl->namespaces_rwsem); } EXPORT_SYMBOL_GPL(nvme_wait_freeze_timeout); @@ -3507,10 +3542,10 @@ void nvme_wait_freeze(struct nvme_ctrl *ctrl) { struct nvme_ns *ns; - mutex_lock(&ctrl->namespaces_mutex); + down_read(&ctrl->namespaces_rwsem); list_for_each_entry(ns, &ctrl->namespaces, list) blk_mq_freeze_queue_wait(ns->queue); - mutex_unlock(&ctrl->namespaces_mutex); + up_read(&ctrl->namespaces_rwsem); } EXPORT_SYMBOL_GPL(nvme_wait_freeze); @@ -3518,10 +3553,10 @@ void nvme_start_freeze(struct nvme_ctrl *ctrl) { struct nvme_ns *ns; - mutex_lock(&ctrl->namespaces_mutex); + down_read(&ctrl->namespaces_rwsem); list_for_each_entry(ns, &ctrl->namespaces, list) blk_freeze_queue_start(ns->queue); - mutex_unlock(&ctrl->namespaces_mutex); + up_read(&ctrl->namespaces_rwsem); } EXPORT_SYMBOL_GPL(nvme_start_freeze); @@ -3529,10 +3564,10 @@ void nvme_stop_queues(struct nvme_ctrl *ctrl) { struct nvme_ns *ns; - mutex_lock(&ctrl->namespaces_mutex); + down_read(&ctrl->namespaces_rwsem); list_for_each_entry(ns, &ctrl->namespaces, list) blk_mq_quiesce_queue(ns->queue); - mutex_unlock(&ctrl->namespaces_mutex); + up_read(&ctrl->namespaces_rwsem); } EXPORT_SYMBOL_GPL(nvme_stop_queues); @@ -3540,10 +3575,10 @@ void nvme_start_queues(struct nvme_ctrl *ctrl) { struct nvme_ns *ns; - mutex_lock(&ctrl->namespaces_mutex); + down_read(&ctrl->namespaces_rwsem); list_for_each_entry(ns, &ctrl->namespaces, list) blk_mq_unquiesce_queue(ns->queue); - mutex_unlock(&ctrl->namespaces_mutex); + up_read(&ctrl->namespaces_rwsem); } EXPORT_SYMBOL_GPL(nvme_start_queues); diff --git a/drivers/nvme/host/fault_inject.c b/drivers/nvme/host/fault_inject.c new file mode 100644 index 000000000000..02632266ac06 --- /dev/null +++ b/drivers/nvme/host/fault_inject.c @@ -0,0 +1,79 @@ +/* + * fault injection support for nvme. + * + * Copyright (c) 2018, Oracle and/or its affiliates + * + */ + +#include <linux/moduleparam.h> +#include "nvme.h" + +static DECLARE_FAULT_ATTR(fail_default_attr); +/* optional fault injection attributes boot time option: + * nvme_core.fail_request=<interval>,<probability>,<space>,<times> + */ +static char *fail_request; +module_param(fail_request, charp, 0000); + +void nvme_fault_inject_init(struct nvme_ns *ns) +{ + struct dentry *dir, *parent; + char *name = ns->disk->disk_name; + struct nvme_fault_inject *fault_inj = &ns->fault_inject; + struct fault_attr *attr = &fault_inj->attr; + + /* set default fault injection attribute */ + if (fail_request) + setup_fault_attr(&fail_default_attr, fail_request); + + /* create debugfs directory and attribute */ + parent = debugfs_create_dir(name, NULL); + if (!parent) { + pr_warn("%s: failed to create debugfs directory\n", name); + return; + } + + *attr = fail_default_attr; + dir = fault_create_debugfs_attr("fault_inject", parent, attr); + if (IS_ERR(dir)) { + pr_warn("%s: failed to create debugfs attr\n", name); + debugfs_remove_recursive(parent); + return; + } + ns->fault_inject.parent = parent; + + /* create debugfs for status code and dont_retry */ + fault_inj->status = NVME_SC_INVALID_OPCODE; + fault_inj->dont_retry = true; + debugfs_create_x16("status", 0600, dir, &fault_inj->status); + debugfs_create_bool("dont_retry", 0600, dir, &fault_inj->dont_retry); +} + +void nvme_fault_inject_fini(struct nvme_ns *ns) +{ + /* remove debugfs directories */ + debugfs_remove_recursive(ns->fault_inject.parent); +} + +void nvme_should_fail(struct request *req) +{ + struct gendisk *disk = req->rq_disk; + struct nvme_ns *ns = NULL; + u16 status; + + /* + * make sure this request is coming from a valid namespace + */ + if (!disk) + return; + + ns = disk->private_data; + if (ns && should_fail(&ns->fault_inject.attr, 1)) { + /* inject status code and DNR bit */ + status = ns->fault_inject.status; + if (ns->fault_inject.dont_retry) + status |= NVME_SC_DNR; + nvme_req(req)->status = status; + } +} +EXPORT_SYMBOL_GPL(nvme_should_fail); diff --git a/drivers/nvme/host/fc.c b/drivers/nvme/host/fc.c index 1dc1387b7134..c6e719b2f3ca 100644 --- a/drivers/nvme/host/fc.c +++ b/drivers/nvme/host/fc.c @@ -588,6 +588,8 @@ nvme_fc_attach_to_suspended_rport(struct nvme_fc_lport *lport, return ERR_PTR(-ESTALE); } + rport->remoteport.port_role = pinfo->port_role; + rport->remoteport.port_id = pinfo->port_id; rport->remoteport.port_state = FC_OBJSTATE_ONLINE; rport->dev_loss_end = 0; @@ -768,8 +770,7 @@ nvme_fc_ctrl_connectivity_loss(struct nvme_fc_ctrl *ctrl) */ if (nvme_reset_ctrl(&ctrl->ctrl)) { dev_warn(ctrl->ctrl.device, - "NVME-FC{%d}: Couldn't schedule reset. " - "Deleting controller.\n", + "NVME-FC{%d}: Couldn't schedule reset.\n", ctrl->cnum); nvme_delete_ctrl(&ctrl->ctrl); } @@ -836,8 +837,7 @@ nvme_fc_unregister_remoteport(struct nvme_fc_remote_port *portptr) /* if dev_loss_tmo==0, dev loss is immediate */ if (!portptr->dev_loss_tmo) { dev_warn(ctrl->ctrl.device, - "NVME-FC{%d}: controller connectivity lost. " - "Deleting controller.\n", + "NVME-FC{%d}: controller connectivity lost.\n", ctrl->cnum); nvme_delete_ctrl(&ctrl->ctrl); } else @@ -2076,20 +2076,10 @@ nvme_fc_timeout(struct request *rq, bool reserved) { struct nvme_fc_fcp_op *op = blk_mq_rq_to_pdu(rq); struct nvme_fc_ctrl *ctrl = op->ctrl; - int ret; - - if (ctrl->rport->remoteport.port_state != FC_OBJSTATE_ONLINE || - atomic_read(&op->state) == FCPOP_STATE_ABORTED) - return BLK_EH_RESET_TIMER; - - ret = __nvme_fc_abort_op(ctrl, op); - if (ret) - /* io wasn't active to abort */ - return BLK_EH_NOT_HANDLED; /* * we can't individually ABTS an io without affecting the queue, - * thus killing the queue, adn thus the association. + * thus killing the queue, and thus the association. * So resolve by performing a controller reset, which will stop * the host/io stack, terminate the association on the link, * and recreate an association on the link. @@ -2191,7 +2181,7 @@ nvme_fc_start_fcp_op(struct nvme_fc_ctrl *ctrl, struct nvme_fc_queue *queue, struct nvme_fc_cmd_iu *cmdiu = &op->cmd_iu; struct nvme_command *sqe = &cmdiu->sqe; u32 csn; - int ret; + int ret, opstate; /* * before attempting to send the io, check to see if we believe @@ -2269,6 +2259,9 @@ nvme_fc_start_fcp_op(struct nvme_fc_ctrl *ctrl, struct nvme_fc_queue *queue, queue->lldd_handle, &op->fcp_req); if (ret) { + opstate = atomic_xchg(&op->state, FCPOP_STATE_COMPLETE); + __nvme_fc_fcpop_chk_teardowns(ctrl, op, opstate); + if (!(op->flags & FCOP_FLAGS_AEN)) nvme_fc_unmap_data(ctrl, op->rq, op); @@ -2889,14 +2882,13 @@ nvme_fc_reconnect_or_delete(struct nvme_fc_ctrl *ctrl, int status) if (portptr->port_state == FC_OBJSTATE_ONLINE) dev_warn(ctrl->ctrl.device, "NVME-FC{%d}: Max reconnect attempts (%d) " - "reached. Removing controller\n", + "reached.\n", ctrl->cnum, ctrl->ctrl.nr_reconnects); else dev_warn(ctrl->ctrl.device, "NVME-FC{%d}: dev_loss_tmo (%d) expired " - "while waiting for remoteport connectivity. " - "Removing controller\n", ctrl->cnum, - portptr->dev_loss_tmo); + "while waiting for remoteport connectivity.\n", + ctrl->cnum, portptr->dev_loss_tmo); WARN_ON(nvme_delete_ctrl(&ctrl->ctrl)); } } @@ -3133,6 +3125,10 @@ nvme_fc_init_ctrl(struct device *dev, struct nvmf_ctrl_options *opts, } if (ret) { + nvme_change_ctrl_state(&ctrl->ctrl, NVME_CTRL_DELETING); + cancel_work_sync(&ctrl->ctrl.reset_work); + cancel_delayed_work_sync(&ctrl->connect_work); + /* couldn't schedule retry - fail out */ dev_err(ctrl->ctrl.device, "NVME-FC{%d}: Connect retry failed\n", ctrl->cnum); diff --git a/drivers/nvme/host/lightnvm.c b/drivers/nvme/host/lightnvm.c index 50ef71ee3d86..41279da799ed 100644 --- a/drivers/nvme/host/lightnvm.c +++ b/drivers/nvme/host/lightnvm.c @@ -35,6 +35,10 @@ enum nvme_nvm_admin_opcode { nvme_nvm_admin_set_bb_tbl = 0xf1, }; +enum nvme_nvm_log_page { + NVME_NVM_LOG_REPORT_CHUNK = 0xca, +}; + struct nvme_nvm_ph_rw { __u8 opcode; __u8 flags; @@ -51,6 +55,21 @@ struct nvme_nvm_ph_rw { __le64 resv; }; +struct nvme_nvm_erase_blk { + __u8 opcode; + __u8 flags; + __u16 command_id; + __le32 nsid; + __u64 rsvd[2]; + __le64 prp1; + __le64 prp2; + __le64 spba; + __le16 length; + __le16 control; + __le32 dsmgmt; + __le64 resv; +}; + struct nvme_nvm_identity { __u8 opcode; __u8 flags; @@ -59,8 +78,7 @@ struct nvme_nvm_identity { __u64 rsvd[2]; __le64 prp1; __le64 prp2; - __le32 chnl_off; - __u32 rsvd11[5]; + __u32 rsvd11[6]; }; struct nvme_nvm_getbbtbl { @@ -90,44 +108,18 @@ struct nvme_nvm_setbbtbl { __u32 rsvd4[3]; }; -struct nvme_nvm_erase_blk { - __u8 opcode; - __u8 flags; - __u16 command_id; - __le32 nsid; - __u64 rsvd[2]; - __le64 prp1; - __le64 prp2; - __le64 spba; - __le16 length; - __le16 control; - __le32 dsmgmt; - __le64 resv; -}; - struct nvme_nvm_command { union { struct nvme_common_command common; - struct nvme_nvm_identity identity; struct nvme_nvm_ph_rw ph_rw; + struct nvme_nvm_erase_blk erase; + struct nvme_nvm_identity identity; struct nvme_nvm_getbbtbl get_bb; struct nvme_nvm_setbbtbl set_bb; - struct nvme_nvm_erase_blk erase; }; }; -#define NVME_NVM_LP_MLC_PAIRS 886 -struct nvme_nvm_lp_mlc { - __le16 num_pairs; - __u8 pairs[NVME_NVM_LP_MLC_PAIRS]; -}; - -struct nvme_nvm_lp_tbl { - __u8 id[8]; - struct nvme_nvm_lp_mlc mlc; -}; - -struct nvme_nvm_id_group { +struct nvme_nvm_id12_grp { __u8 mtype; __u8 fmtype; __le16 res16; @@ -150,11 +142,10 @@ struct nvme_nvm_id_group { __le32 mpos; __le32 mccap; __le16 cpar; - __u8 reserved[10]; - struct nvme_nvm_lp_tbl lptbl; + __u8 reserved[906]; } __packed; -struct nvme_nvm_addr_format { +struct nvme_nvm_id12_addrf { __u8 ch_offset; __u8 ch_len; __u8 lun_offset; @@ -165,21 +156,22 @@ struct nvme_nvm_addr_format { __u8 blk_len; __u8 pg_offset; __u8 pg_len; - __u8 sect_offset; - __u8 sect_len; + __u8 sec_offset; + __u8 sec_len; __u8 res[4]; } __packed; -struct nvme_nvm_id { +struct nvme_nvm_id12 { __u8 ver_id; __u8 vmnt; __u8 cgrps; __u8 res; __le32 cap; __le32 dom; - struct nvme_nvm_addr_format ppaf; + struct nvme_nvm_id12_addrf ppaf; __u8 resv[228]; - struct nvme_nvm_id_group groups[4]; + struct nvme_nvm_id12_grp grp; + __u8 resv2[2880]; } __packed; struct nvme_nvm_bb_tbl { @@ -196,6 +188,68 @@ struct nvme_nvm_bb_tbl { __u8 blk[0]; }; +struct nvme_nvm_id20_addrf { + __u8 grp_len; + __u8 pu_len; + __u8 chk_len; + __u8 lba_len; + __u8 resv[4]; +}; + +struct nvme_nvm_id20 { + __u8 mjr; + __u8 mnr; + __u8 resv[6]; + + struct nvme_nvm_id20_addrf lbaf; + + __le32 mccap; + __u8 resv2[12]; + + __u8 wit; + __u8 resv3[31]; + + /* Geometry */ + __le16 num_grp; + __le16 num_pu; + __le32 num_chk; + __le32 clba; + __u8 resv4[52]; + + /* Write data requirements */ + __le32 ws_min; + __le32 ws_opt; + __le32 mw_cunits; + __le32 maxoc; + __le32 maxocpu; + __u8 resv5[44]; + + /* Performance related metrics */ + __le32 trdt; + __le32 trdm; + __le32 twrt; + __le32 twrm; + __le32 tcrst; + __le32 tcrsm; + __u8 resv6[40]; + + /* Reserved area */ + __u8 resv7[2816]; + + /* Vendor specific */ + __u8 vs[1024]; +}; + +struct nvme_nvm_chk_meta { + __u8 state; + __u8 type; + __u8 wi; + __u8 rsvd[5]; + __le64 slba; + __le64 cnlb; + __le64 wp; +}; + /* * Check we didn't inadvertently grow the command struct */ @@ -203,105 +257,238 @@ static inline void _nvme_nvm_check_size(void) { BUILD_BUG_ON(sizeof(struct nvme_nvm_identity) != 64); BUILD_BUG_ON(sizeof(struct nvme_nvm_ph_rw) != 64); + BUILD_BUG_ON(sizeof(struct nvme_nvm_erase_blk) != 64); BUILD_BUG_ON(sizeof(struct nvme_nvm_getbbtbl) != 64); BUILD_BUG_ON(sizeof(struct nvme_nvm_setbbtbl) != 64); - BUILD_BUG_ON(sizeof(struct nvme_nvm_erase_blk) != 64); - BUILD_BUG_ON(sizeof(struct nvme_nvm_id_group) != 960); - BUILD_BUG_ON(sizeof(struct nvme_nvm_addr_format) != 16); - BUILD_BUG_ON(sizeof(struct nvme_nvm_id) != NVME_IDENTIFY_DATA_SIZE); + BUILD_BUG_ON(sizeof(struct nvme_nvm_id12_grp) != 960); + BUILD_BUG_ON(sizeof(struct nvme_nvm_id12_addrf) != 16); + BUILD_BUG_ON(sizeof(struct nvme_nvm_id12) != NVME_IDENTIFY_DATA_SIZE); BUILD_BUG_ON(sizeof(struct nvme_nvm_bb_tbl) != 64); + BUILD_BUG_ON(sizeof(struct nvme_nvm_id20_addrf) != 8); + BUILD_BUG_ON(sizeof(struct nvme_nvm_id20) != NVME_IDENTIFY_DATA_SIZE); + BUILD_BUG_ON(sizeof(struct nvme_nvm_chk_meta) != 32); + BUILD_BUG_ON(sizeof(struct nvme_nvm_chk_meta) != + sizeof(struct nvm_chk_meta)); +} + +static void nvme_nvm_set_addr_12(struct nvm_addrf_12 *dst, + struct nvme_nvm_id12_addrf *src) +{ + dst->ch_len = src->ch_len; + dst->lun_len = src->lun_len; + dst->blk_len = src->blk_len; + dst->pg_len = src->pg_len; + dst->pln_len = src->pln_len; + dst->sec_len = src->sec_len; + + dst->ch_offset = src->ch_offset; + dst->lun_offset = src->lun_offset; + dst->blk_offset = src->blk_offset; + dst->pg_offset = src->pg_offset; + dst->pln_offset = src->pln_offset; + dst->sec_offset = src->sec_offset; + + dst->ch_mask = ((1ULL << dst->ch_len) - 1) << dst->ch_offset; + dst->lun_mask = ((1ULL << dst->lun_len) - 1) << dst->lun_offset; + dst->blk_mask = ((1ULL << dst->blk_len) - 1) << dst->blk_offset; + dst->pg_mask = ((1ULL << dst->pg_len) - 1) << dst->pg_offset; + dst->pln_mask = ((1ULL << dst->pln_len) - 1) << dst->pln_offset; + dst->sec_mask = ((1ULL << dst->sec_len) - 1) << dst->sec_offset; } -static int init_grps(struct nvm_id *nvm_id, struct nvme_nvm_id *nvme_nvm_id) +static int nvme_nvm_setup_12(struct nvme_nvm_id12 *id, + struct nvm_geo *geo) { - struct nvme_nvm_id_group *src; - struct nvm_id_group *grp; + struct nvme_nvm_id12_grp *src; int sec_per_pg, sec_per_pl, pg_per_blk; - if (nvme_nvm_id->cgrps != 1) + if (id->cgrps != 1) + return -EINVAL; + + src = &id->grp; + + if (src->mtype != 0) { + pr_err("nvm: memory type not supported\n"); return -EINVAL; + } + + /* 1.2 spec. only reports a single version id - unfold */ + geo->major_ver_id = id->ver_id; + geo->minor_ver_id = 2; - src = &nvme_nvm_id->groups[0]; - grp = &nvm_id->grp; + /* Set compacted version for upper layers */ + geo->version = NVM_OCSSD_SPEC_12; - grp->mtype = src->mtype; - grp->fmtype = src->fmtype; + geo->num_ch = src->num_ch; + geo->num_lun = src->num_lun; + geo->all_luns = geo->num_ch * geo->num_lun; - grp->num_ch = src->num_ch; - grp->num_lun = src->num_lun; + geo->num_chk = le16_to_cpu(src->num_chk); - grp->num_chk = le16_to_cpu(src->num_chk); - grp->csecs = le16_to_cpu(src->csecs); - grp->sos = le16_to_cpu(src->sos); + geo->csecs = le16_to_cpu(src->csecs); + geo->sos = le16_to_cpu(src->sos); pg_per_blk = le16_to_cpu(src->num_pg); - sec_per_pg = le16_to_cpu(src->fpg_sz) / grp->csecs; + sec_per_pg = le16_to_cpu(src->fpg_sz) / geo->csecs; sec_per_pl = sec_per_pg * src->num_pln; - grp->clba = sec_per_pl * pg_per_blk; - grp->ws_per_chk = pg_per_blk; - - grp->mpos = le32_to_cpu(src->mpos); - grp->cpar = le16_to_cpu(src->cpar); - grp->mccap = le32_to_cpu(src->mccap); - - grp->ws_opt = grp->ws_min = sec_per_pg; - grp->ws_seq = NVM_IO_SNGL_ACCESS; - - if (grp->mpos & 0x020202) { - grp->ws_seq = NVM_IO_DUAL_ACCESS; - grp->ws_opt <<= 1; - } else if (grp->mpos & 0x040404) { - grp->ws_seq = NVM_IO_QUAD_ACCESS; - grp->ws_opt <<= 2; - } + geo->clba = sec_per_pl * pg_per_blk; + + geo->all_chunks = geo->all_luns * geo->num_chk; + geo->total_secs = geo->clba * geo->all_chunks; + + geo->ws_min = sec_per_pg; + geo->ws_opt = sec_per_pg; + geo->mw_cunits = geo->ws_opt << 3; /* default to MLC safe values */ - grp->trdt = le32_to_cpu(src->trdt); - grp->trdm = le32_to_cpu(src->trdm); - grp->tprt = le32_to_cpu(src->tprt); - grp->tprm = le32_to_cpu(src->tprm); - grp->tbet = le32_to_cpu(src->tbet); - grp->tbem = le32_to_cpu(src->tbem); + /* Do not impose values for maximum number of open blocks as it is + * unspecified in 1.2. Users of 1.2 must be aware of this and eventually + * specify these values through a quirk if restrictions apply. + */ + geo->maxoc = geo->all_luns * geo->num_chk; + geo->maxocpu = geo->num_chk; + + geo->mccap = le32_to_cpu(src->mccap); + + geo->trdt = le32_to_cpu(src->trdt); + geo->trdm = le32_to_cpu(src->trdm); + geo->tprt = le32_to_cpu(src->tprt); + geo->tprm = le32_to_cpu(src->tprm); + geo->tbet = le32_to_cpu(src->tbet); + geo->tbem = le32_to_cpu(src->tbem); /* 1.2 compatibility */ - grp->num_pln = src->num_pln; - grp->num_pg = le16_to_cpu(src->num_pg); - grp->fpg_sz = le16_to_cpu(src->fpg_sz); + geo->vmnt = id->vmnt; + geo->cap = le32_to_cpu(id->cap); + geo->dom = le32_to_cpu(id->dom); + + geo->mtype = src->mtype; + geo->fmtype = src->fmtype; + + geo->cpar = le16_to_cpu(src->cpar); + geo->mpos = le32_to_cpu(src->mpos); + + geo->pln_mode = NVM_PLANE_SINGLE; + + if (geo->mpos & 0x020202) { + geo->pln_mode = NVM_PLANE_DOUBLE; + geo->ws_opt <<= 1; + } else if (geo->mpos & 0x040404) { + geo->pln_mode = NVM_PLANE_QUAD; + geo->ws_opt <<= 2; + } + + geo->num_pln = src->num_pln; + geo->num_pg = le16_to_cpu(src->num_pg); + geo->fpg_sz = le16_to_cpu(src->fpg_sz); + + nvme_nvm_set_addr_12((struct nvm_addrf_12 *)&geo->addrf, &id->ppaf); return 0; } -static int nvme_nvm_identity(struct nvm_dev *nvmdev, struct nvm_id *nvm_id) +static void nvme_nvm_set_addr_20(struct nvm_addrf *dst, + struct nvme_nvm_id20_addrf *src) +{ + dst->ch_len = src->grp_len; + dst->lun_len = src->pu_len; + dst->chk_len = src->chk_len; + dst->sec_len = src->lba_len; + + dst->sec_offset = 0; + dst->chk_offset = dst->sec_len; + dst->lun_offset = dst->chk_offset + dst->chk_len; + dst->ch_offset = dst->lun_offset + dst->lun_len; + + dst->ch_mask = ((1ULL << dst->ch_len) - 1) << dst->ch_offset; + dst->lun_mask = ((1ULL << dst->lun_len) - 1) << dst->lun_offset; + dst->chk_mask = ((1ULL << dst->chk_len) - 1) << dst->chk_offset; + dst->sec_mask = ((1ULL << dst->sec_len) - 1) << dst->sec_offset; +} + +static int nvme_nvm_setup_20(struct nvme_nvm_id20 *id, + struct nvm_geo *geo) +{ + geo->major_ver_id = id->mjr; + geo->minor_ver_id = id->mnr; + + /* Set compacted version for upper layers */ + geo->version = NVM_OCSSD_SPEC_20; + + if (!(geo->major_ver_id == 2 && geo->minor_ver_id == 0)) { + pr_err("nvm: OCSSD version not supported (v%d.%d)\n", + geo->major_ver_id, geo->minor_ver_id); + return -EINVAL; + } + + geo->num_ch = le16_to_cpu(id->num_grp); + geo->num_lun = le16_to_cpu(id->num_pu); + geo->all_luns = geo->num_ch * geo->num_lun; + + geo->num_chk = le32_to_cpu(id->num_chk); + geo->clba = le32_to_cpu(id->clba); + + geo->all_chunks = geo->all_luns * geo->num_chk; + geo->total_secs = geo->clba * geo->all_chunks; + + geo->ws_min = le32_to_cpu(id->ws_min); + geo->ws_opt = le32_to_cpu(id->ws_opt); + geo->mw_cunits = le32_to_cpu(id->mw_cunits); + geo->maxoc = le32_to_cpu(id->maxoc); + geo->maxocpu = le32_to_cpu(id->maxocpu); + + geo->trdt = le32_to_cpu(id->trdt); + geo->trdm = le32_to_cpu(id->trdm); + geo->tprt = le32_to_cpu(id->twrt); + geo->tprm = le32_to_cpu(id->twrm); + geo->tbet = le32_to_cpu(id->tcrst); + geo->tbem = le32_to_cpu(id->tcrsm); + + nvme_nvm_set_addr_20(&geo->addrf, &id->lbaf); + + return 0; +} + +static int nvme_nvm_identity(struct nvm_dev *nvmdev) { struct nvme_ns *ns = nvmdev->q->queuedata; - struct nvme_nvm_id *nvme_nvm_id; + struct nvme_nvm_id12 *id; struct nvme_nvm_command c = {}; int ret; c.identity.opcode = nvme_nvm_admin_identity; c.identity.nsid = cpu_to_le32(ns->head->ns_id); - c.identity.chnl_off = 0; - nvme_nvm_id = kmalloc(sizeof(struct nvme_nvm_id), GFP_KERNEL); - if (!nvme_nvm_id) + id = kmalloc(sizeof(struct nvme_nvm_id12), GFP_KERNEL); + if (!id) return -ENOMEM; ret = nvme_submit_sync_cmd(ns->ctrl->admin_q, (struct nvme_command *)&c, - nvme_nvm_id, sizeof(struct nvme_nvm_id)); + id, sizeof(struct nvme_nvm_id12)); if (ret) { ret = -EIO; goto out; } - nvm_id->ver_id = nvme_nvm_id->ver_id; - nvm_id->vmnt = nvme_nvm_id->vmnt; - nvm_id->cap = le32_to_cpu(nvme_nvm_id->cap); - nvm_id->dom = le32_to_cpu(nvme_nvm_id->dom); - memcpy(&nvm_id->ppaf, &nvme_nvm_id->ppaf, - sizeof(struct nvm_addr_format)); + /* + * The 1.2 and 2.0 specifications share the first byte in their geometry + * command to make it possible to know what version a device implements. + */ + switch (id->ver_id) { + case 1: + ret = nvme_nvm_setup_12(id, &nvmdev->geo); + break; + case 2: + ret = nvme_nvm_setup_20((struct nvme_nvm_id20 *)id, + &nvmdev->geo); + break; + default: + dev_err(ns->ctrl->device, "OCSSD revision not supported (%d)\n", + id->ver_id); + ret = -EINVAL; + } - ret = init_grps(nvm_id, nvme_nvm_id); out: - kfree(nvme_nvm_id); + kfree(id); return ret; } @@ -314,7 +501,7 @@ static int nvme_nvm_get_bb_tbl(struct nvm_dev *nvmdev, struct ppa_addr ppa, struct nvme_ctrl *ctrl = ns->ctrl; struct nvme_nvm_command c = {}; struct nvme_nvm_bb_tbl *bb_tbl; - int nr_blks = geo->nr_chks * geo->plane_mode; + int nr_blks = geo->num_chk * geo->num_pln; int tblsz = sizeof(struct nvme_nvm_bb_tbl) + nr_blks; int ret = 0; @@ -355,7 +542,7 @@ static int nvme_nvm_get_bb_tbl(struct nvm_dev *nvmdev, struct ppa_addr ppa, goto out; } - memcpy(blks, bb_tbl->blk, geo->nr_chks * geo->plane_mode); + memcpy(blks, bb_tbl->blk, geo->num_chk * geo->num_pln); out: kfree(bb_tbl); return ret; @@ -382,6 +569,61 @@ static int nvme_nvm_set_bb_tbl(struct nvm_dev *nvmdev, struct ppa_addr *ppas, return ret; } +/* + * Expect the lba in device format + */ +static int nvme_nvm_get_chk_meta(struct nvm_dev *ndev, + struct nvm_chk_meta *meta, + sector_t slba, int nchks) +{ + struct nvm_geo *geo = &ndev->geo; + struct nvme_ns *ns = ndev->q->queuedata; + struct nvme_ctrl *ctrl = ns->ctrl; + struct nvme_nvm_chk_meta *dev_meta = (struct nvme_nvm_chk_meta *)meta; + struct ppa_addr ppa; + size_t left = nchks * sizeof(struct nvme_nvm_chk_meta); + size_t log_pos, offset, len; + int ret, i; + + /* Normalize lba address space to obtain log offset */ + ppa.ppa = slba; + ppa = dev_to_generic_addr(ndev, ppa); + + log_pos = ppa.m.chk; + log_pos += ppa.m.pu * geo->num_chk; + log_pos += ppa.m.grp * geo->num_lun * geo->num_chk; + + offset = log_pos * sizeof(struct nvme_nvm_chk_meta); + + while (left) { + len = min_t(unsigned int, left, ctrl->max_hw_sectors << 9); + + ret = nvme_get_log_ext(ctrl, ns, NVME_NVM_LOG_REPORT_CHUNK, + dev_meta, len, offset); + if (ret) { + dev_err(ctrl->device, "Get REPORT CHUNK log error\n"); + break; + } + + for (i = 0; i < len; i += sizeof(struct nvme_nvm_chk_meta)) { + meta->state = dev_meta->state; + meta->type = dev_meta->type; + meta->wi = dev_meta->wi; + meta->slba = le64_to_cpu(dev_meta->slba); + meta->cnlb = le64_to_cpu(dev_meta->cnlb); + meta->wp = le64_to_cpu(dev_meta->wp); + + meta++; + dev_meta++; + } + + offset += len; + left -= len; + } + + return ret; +} + static inline void nvme_nvm_rqtocmd(struct nvm_rq *rqd, struct nvme_ns *ns, struct nvme_nvm_command *c) { @@ -513,6 +755,8 @@ static struct nvm_dev_ops nvme_nvm_dev_ops = { .get_bb_tbl = nvme_nvm_get_bb_tbl, .set_bb_tbl = nvme_nvm_set_bb_tbl, + .get_chk_meta = nvme_nvm_get_chk_meta, + .submit_io = nvme_nvm_submit_io, .submit_io_sync = nvme_nvm_submit_io_sync, @@ -520,8 +764,6 @@ static struct nvm_dev_ops nvme_nvm_dev_ops = { .destroy_dma_pool = nvme_nvm_destroy_dma_pool, .dev_dma_alloc = nvme_nvm_dev_dma_alloc, .dev_dma_free = nvme_nvm_dev_dma_free, - - .max_phys_sect = 64, }; static int nvme_nvm_submit_user_cmd(struct request_queue *q, @@ -722,6 +964,15 @@ int nvme_nvm_ioctl(struct nvme_ns *ns, unsigned int cmd, unsigned long arg) } } +void nvme_nvm_update_nvm_info(struct nvme_ns *ns) +{ + struct nvm_dev *ndev = ns->ndev; + struct nvm_geo *geo = &ndev->geo; + + geo->csecs = 1 << ns->lba_shift; + geo->sos = ns->ms; +} + int nvme_nvm_register(struct nvme_ns *ns, char *disk_name, int node) { struct request_queue *q = ns->queue; @@ -748,125 +999,205 @@ void nvme_nvm_unregister(struct nvme_ns *ns) } static ssize_t nvm_dev_attr_show(struct device *dev, - struct device_attribute *dattr, char *page) + struct device_attribute *dattr, char *page) { struct nvme_ns *ns = nvme_get_ns_from_dev(dev); struct nvm_dev *ndev = ns->ndev; - struct nvm_id *id; - struct nvm_id_group *grp; + struct nvm_geo *geo = &ndev->geo; struct attribute *attr; if (!ndev) return 0; - id = &ndev->identity; - grp = &id->grp; attr = &dattr->attr; if (strcmp(attr->name, "version") == 0) { - return scnprintf(page, PAGE_SIZE, "%u\n", id->ver_id); - } else if (strcmp(attr->name, "vendor_opcode") == 0) { - return scnprintf(page, PAGE_SIZE, "%u\n", id->vmnt); + if (geo->major_ver_id == 1) + return scnprintf(page, PAGE_SIZE, "%u\n", + geo->major_ver_id); + else + return scnprintf(page, PAGE_SIZE, "%u.%u\n", + geo->major_ver_id, + geo->minor_ver_id); } else if (strcmp(attr->name, "capabilities") == 0) { - return scnprintf(page, PAGE_SIZE, "%u\n", id->cap); + return scnprintf(page, PAGE_SIZE, "%u\n", geo->cap); + } else if (strcmp(attr->name, "read_typ") == 0) { + return scnprintf(page, PAGE_SIZE, "%u\n", geo->trdt); + } else if (strcmp(attr->name, "read_max") == 0) { + return scnprintf(page, PAGE_SIZE, "%u\n", geo->trdm); + } else { + return scnprintf(page, + PAGE_SIZE, + "Unhandled attr(%s) in `%s`\n", + attr->name, __func__); + } +} + +static ssize_t nvm_dev_attr_show_ppaf(struct nvm_addrf_12 *ppaf, char *page) +{ + return scnprintf(page, PAGE_SIZE, + "0x%02x%02x%02x%02x%02x%02x%02x%02x%02x%02x%02x%02x\n", + ppaf->ch_offset, ppaf->ch_len, + ppaf->lun_offset, ppaf->lun_len, + ppaf->pln_offset, ppaf->pln_len, + ppaf->blk_offset, ppaf->blk_len, + ppaf->pg_offset, ppaf->pg_len, + ppaf->sec_offset, ppaf->sec_len); +} + +static ssize_t nvm_dev_attr_show_12(struct device *dev, + struct device_attribute *dattr, char *page) +{ + struct nvme_ns *ns = nvme_get_ns_from_dev(dev); + struct nvm_dev *ndev = ns->ndev; + struct nvm_geo *geo = &ndev->geo; + struct attribute *attr; + + if (!ndev) + return 0; + + attr = &dattr->attr; + + if (strcmp(attr->name, "vendor_opcode") == 0) { + return scnprintf(page, PAGE_SIZE, "%u\n", geo->vmnt); } else if (strcmp(attr->name, "device_mode") == 0) { - return scnprintf(page, PAGE_SIZE, "%u\n", id->dom); + return scnprintf(page, PAGE_SIZE, "%u\n", geo->dom); /* kept for compatibility */ } else if (strcmp(attr->name, "media_manager") == 0) { return scnprintf(page, PAGE_SIZE, "%s\n", "gennvm"); } else if (strcmp(attr->name, "ppa_format") == 0) { - return scnprintf(page, PAGE_SIZE, - "0x%02x%02x%02x%02x%02x%02x%02x%02x%02x%02x%02x%02x\n", - id->ppaf.ch_offset, id->ppaf.ch_len, - id->ppaf.lun_offset, id->ppaf.lun_len, - id->ppaf.pln_offset, id->ppaf.pln_len, - id->ppaf.blk_offset, id->ppaf.blk_len, - id->ppaf.pg_offset, id->ppaf.pg_len, - id->ppaf.sect_offset, id->ppaf.sect_len); + return nvm_dev_attr_show_ppaf((void *)&geo->addrf, page); } else if (strcmp(attr->name, "media_type") == 0) { /* u8 */ - return scnprintf(page, PAGE_SIZE, "%u\n", grp->mtype); + return scnprintf(page, PAGE_SIZE, "%u\n", geo->mtype); } else if (strcmp(attr->name, "flash_media_type") == 0) { - return scnprintf(page, PAGE_SIZE, "%u\n", grp->fmtype); + return scnprintf(page, PAGE_SIZE, "%u\n", geo->fmtype); } else if (strcmp(attr->name, "num_channels") == 0) { - return scnprintf(page, PAGE_SIZE, "%u\n", grp->num_ch); + return scnprintf(page, PAGE_SIZE, "%u\n", geo->num_ch); } else if (strcmp(attr->name, "num_luns") == 0) { - return scnprintf(page, PAGE_SIZE, "%u\n", grp->num_lun); + return scnprintf(page, PAGE_SIZE, "%u\n", geo->num_lun); } else if (strcmp(attr->name, "num_planes") == 0) { - return scnprintf(page, PAGE_SIZE, "%u\n", grp->num_pln); + return scnprintf(page, PAGE_SIZE, "%u\n", geo->num_pln); } else if (strcmp(attr->name, "num_blocks") == 0) { /* u16 */ - return scnprintf(page, PAGE_SIZE, "%u\n", grp->num_chk); + return scnprintf(page, PAGE_SIZE, "%u\n", geo->num_chk); } else if (strcmp(attr->name, "num_pages") == 0) { - return scnprintf(page, PAGE_SIZE, "%u\n", grp->num_pg); + return scnprintf(page, PAGE_SIZE, "%u\n", geo->num_pg); } else if (strcmp(attr->name, "page_size") == 0) { - return scnprintf(page, PAGE_SIZE, "%u\n", grp->fpg_sz); + return scnprintf(page, PAGE_SIZE, "%u\n", geo->fpg_sz); } else if (strcmp(attr->name, "hw_sector_size") == 0) { - return scnprintf(page, PAGE_SIZE, "%u\n", grp->csecs); + return scnprintf(page, PAGE_SIZE, "%u\n", geo->csecs); } else if (strcmp(attr->name, "oob_sector_size") == 0) {/* u32 */ - return scnprintf(page, PAGE_SIZE, "%u\n", grp->sos); - } else if (strcmp(attr->name, "read_typ") == 0) { - return scnprintf(page, PAGE_SIZE, "%u\n", grp->trdt); - } else if (strcmp(attr->name, "read_max") == 0) { - return scnprintf(page, PAGE_SIZE, "%u\n", grp->trdm); + return scnprintf(page, PAGE_SIZE, "%u\n", geo->sos); } else if (strcmp(attr->name, "prog_typ") == 0) { - return scnprintf(page, PAGE_SIZE, "%u\n", grp->tprt); + return scnprintf(page, PAGE_SIZE, "%u\n", geo->tprt); } else if (strcmp(attr->name, "prog_max") == 0) { - return scnprintf(page, PAGE_SIZE, "%u\n", grp->tprm); + return scnprintf(page, PAGE_SIZE, "%u\n", geo->tprm); } else if (strcmp(attr->name, "erase_typ") == 0) { - return scnprintf(page, PAGE_SIZE, "%u\n", grp->tbet); + return scnprintf(page, PAGE_SIZE, "%u\n", geo->tbet); } else if (strcmp(attr->name, "erase_max") == 0) { - return scnprintf(page, PAGE_SIZE, "%u\n", grp->tbem); + return scnprintf(page, PAGE_SIZE, "%u\n", geo->tbem); } else if (strcmp(attr->name, "multiplane_modes") == 0) { - return scnprintf(page, PAGE_SIZE, "0x%08x\n", grp->mpos); + return scnprintf(page, PAGE_SIZE, "0x%08x\n", geo->mpos); } else if (strcmp(attr->name, "media_capabilities") == 0) { - return scnprintf(page, PAGE_SIZE, "0x%08x\n", grp->mccap); + return scnprintf(page, PAGE_SIZE, "0x%08x\n", geo->mccap); } else if (strcmp(attr->name, "max_phys_secs") == 0) { - return scnprintf(page, PAGE_SIZE, "%u\n", - ndev->ops->max_phys_sect); + return scnprintf(page, PAGE_SIZE, "%u\n", NVM_MAX_VLBA); } else { - return scnprintf(page, - PAGE_SIZE, - "Unhandled attr(%s) in `nvm_dev_attr_show`\n", - attr->name); + return scnprintf(page, PAGE_SIZE, + "Unhandled attr(%s) in `%s`\n", + attr->name, __func__); + } +} + +static ssize_t nvm_dev_attr_show_20(struct device *dev, + struct device_attribute *dattr, char *page) +{ + struct nvme_ns *ns = nvme_get_ns_from_dev(dev); + struct nvm_dev *ndev = ns->ndev; + struct nvm_geo *geo = &ndev->geo; + struct attribute *attr; + + if (!ndev) + return 0; + + attr = &dattr->attr; + + if (strcmp(attr->name, "groups") == 0) { + return scnprintf(page, PAGE_SIZE, "%u\n", geo->num_ch); + } else if (strcmp(attr->name, "punits") == 0) { + return scnprintf(page, PAGE_SIZE, "%u\n", geo->num_lun); + } else if (strcmp(attr->name, "chunks") == 0) { + return scnprintf(page, PAGE_SIZE, "%u\n", geo->num_chk); + } else if (strcmp(attr->name, "clba") == 0) { + return scnprintf(page, PAGE_SIZE, "%u\n", geo->clba); + } else if (strcmp(attr->name, "ws_min") == 0) { + return scnprintf(page, PAGE_SIZE, "%u\n", geo->ws_min); + } else if (strcmp(attr->name, "ws_opt") == 0) { + return scnprintf(page, PAGE_SIZE, "%u\n", geo->ws_opt); + } else if (strcmp(attr->name, "maxoc") == 0) { + return scnprintf(page, PAGE_SIZE, "%u\n", geo->maxoc); + } else if (strcmp(attr->name, "maxocpu") == 0) { + return scnprintf(page, PAGE_SIZE, "%u\n", geo->maxocpu); + } else if (strcmp(attr->name, "mw_cunits") == 0) { + return scnprintf(page, PAGE_SIZE, "%u\n", geo->mw_cunits); + } else if (strcmp(attr->name, "write_typ") == 0) { + return scnprintf(page, PAGE_SIZE, "%u\n", geo->tprt); + } else if (strcmp(attr->name, "write_max") == 0) { + return scnprintf(page, PAGE_SIZE, "%u\n", geo->tprm); + } else if (strcmp(attr->name, "reset_typ") == 0) { + return scnprintf(page, PAGE_SIZE, "%u\n", geo->tbet); + } else if (strcmp(attr->name, "reset_max") == 0) { + return scnprintf(page, PAGE_SIZE, "%u\n", geo->tbem); + } else { + return scnprintf(page, PAGE_SIZE, + "Unhandled attr(%s) in `%s`\n", + attr->name, __func__); } } -#define NVM_DEV_ATTR_RO(_name) \ +#define NVM_DEV_ATTR_RO(_name) \ DEVICE_ATTR(_name, S_IRUGO, nvm_dev_attr_show, NULL) +#define NVM_DEV_ATTR_12_RO(_name) \ + DEVICE_ATTR(_name, S_IRUGO, nvm_dev_attr_show_12, NULL) +#define NVM_DEV_ATTR_20_RO(_name) \ + DEVICE_ATTR(_name, S_IRUGO, nvm_dev_attr_show_20, NULL) +/* general attributes */ static NVM_DEV_ATTR_RO(version); -static NVM_DEV_ATTR_RO(vendor_opcode); static NVM_DEV_ATTR_RO(capabilities); -static NVM_DEV_ATTR_RO(device_mode); -static NVM_DEV_ATTR_RO(ppa_format); -static NVM_DEV_ATTR_RO(media_manager); - -static NVM_DEV_ATTR_RO(media_type); -static NVM_DEV_ATTR_RO(flash_media_type); -static NVM_DEV_ATTR_RO(num_channels); -static NVM_DEV_ATTR_RO(num_luns); -static NVM_DEV_ATTR_RO(num_planes); -static NVM_DEV_ATTR_RO(num_blocks); -static NVM_DEV_ATTR_RO(num_pages); -static NVM_DEV_ATTR_RO(page_size); -static NVM_DEV_ATTR_RO(hw_sector_size); -static NVM_DEV_ATTR_RO(oob_sector_size); + static NVM_DEV_ATTR_RO(read_typ); static NVM_DEV_ATTR_RO(read_max); -static NVM_DEV_ATTR_RO(prog_typ); -static NVM_DEV_ATTR_RO(prog_max); -static NVM_DEV_ATTR_RO(erase_typ); -static NVM_DEV_ATTR_RO(erase_max); -static NVM_DEV_ATTR_RO(multiplane_modes); -static NVM_DEV_ATTR_RO(media_capabilities); -static NVM_DEV_ATTR_RO(max_phys_secs); - -static struct attribute *nvm_dev_attrs[] = { + +/* 1.2 values */ +static NVM_DEV_ATTR_12_RO(vendor_opcode); +static NVM_DEV_ATTR_12_RO(device_mode); +static NVM_DEV_ATTR_12_RO(ppa_format); +static NVM_DEV_ATTR_12_RO(media_manager); +static NVM_DEV_ATTR_12_RO(media_type); +static NVM_DEV_ATTR_12_RO(flash_media_type); +static NVM_DEV_ATTR_12_RO(num_channels); +static NVM_DEV_ATTR_12_RO(num_luns); +static NVM_DEV_ATTR_12_RO(num_planes); +static NVM_DEV_ATTR_12_RO(num_blocks); +static NVM_DEV_ATTR_12_RO(num_pages); +static NVM_DEV_ATTR_12_RO(page_size); +static NVM_DEV_ATTR_12_RO(hw_sector_size); +static NVM_DEV_ATTR_12_RO(oob_sector_size); +static NVM_DEV_ATTR_12_RO(prog_typ); +static NVM_DEV_ATTR_12_RO(prog_max); +static NVM_DEV_ATTR_12_RO(erase_typ); +static NVM_DEV_ATTR_12_RO(erase_max); +static NVM_DEV_ATTR_12_RO(multiplane_modes); +static NVM_DEV_ATTR_12_RO(media_capabilities); +static NVM_DEV_ATTR_12_RO(max_phys_secs); + +static struct attribute *nvm_dev_attrs_12[] = { &dev_attr_version.attr, - &dev_attr_vendor_opcode.attr, &dev_attr_capabilities.attr, + + &dev_attr_vendor_opcode.attr, &dev_attr_device_mode.attr, &dev_attr_media_manager.attr, - &dev_attr_ppa_format.attr, &dev_attr_media_type.attr, &dev_attr_flash_media_type.attr, @@ -887,22 +1218,92 @@ static struct attribute *nvm_dev_attrs[] = { &dev_attr_multiplane_modes.attr, &dev_attr_media_capabilities.attr, &dev_attr_max_phys_secs.attr, + NULL, }; -static const struct attribute_group nvm_dev_attr_group = { +static const struct attribute_group nvm_dev_attr_group_12 = { .name = "lightnvm", - .attrs = nvm_dev_attrs, + .attrs = nvm_dev_attrs_12, +}; + +/* 2.0 values */ +static NVM_DEV_ATTR_20_RO(groups); +static NVM_DEV_ATTR_20_RO(punits); +static NVM_DEV_ATTR_20_RO(chunks); +static NVM_DEV_ATTR_20_RO(clba); +static NVM_DEV_ATTR_20_RO(ws_min); +static NVM_DEV_ATTR_20_RO(ws_opt); +static NVM_DEV_ATTR_20_RO(maxoc); +static NVM_DEV_ATTR_20_RO(maxocpu); +static NVM_DEV_ATTR_20_RO(mw_cunits); +static NVM_DEV_ATTR_20_RO(write_typ); +static NVM_DEV_ATTR_20_RO(write_max); +static NVM_DEV_ATTR_20_RO(reset_typ); +static NVM_DEV_ATTR_20_RO(reset_max); + +static struct attribute *nvm_dev_attrs_20[] = { + &dev_attr_version.attr, + &dev_attr_capabilities.attr, + + &dev_attr_groups.attr, + &dev_attr_punits.attr, + &dev_attr_chunks.attr, + &dev_attr_clba.attr, + &dev_attr_ws_min.attr, + &dev_attr_ws_opt.attr, + &dev_attr_maxoc.attr, + &dev_attr_maxocpu.attr, + &dev_attr_mw_cunits.attr, + + &dev_attr_read_typ.attr, + &dev_attr_read_max.attr, + &dev_attr_write_typ.attr, + &dev_attr_write_max.attr, + &dev_attr_reset_typ.attr, + &dev_attr_reset_max.attr, + + NULL, +}; + +static const struct attribute_group nvm_dev_attr_group_20 = { + .name = "lightnvm", + .attrs = nvm_dev_attrs_20, }; int nvme_nvm_register_sysfs(struct nvme_ns *ns) { - return sysfs_create_group(&disk_to_dev(ns->disk)->kobj, - &nvm_dev_attr_group); + struct nvm_dev *ndev = ns->ndev; + struct nvm_geo *geo = &ndev->geo; + + if (!ndev) + return -EINVAL; + + switch (geo->major_ver_id) { + case 1: + return sysfs_create_group(&disk_to_dev(ns->disk)->kobj, + &nvm_dev_attr_group_12); + case 2: + return sysfs_create_group(&disk_to_dev(ns->disk)->kobj, + &nvm_dev_attr_group_20); + } + + return -EINVAL; } void nvme_nvm_unregister_sysfs(struct nvme_ns *ns) { - sysfs_remove_group(&disk_to_dev(ns->disk)->kobj, - &nvm_dev_attr_group); + struct nvm_dev *ndev = ns->ndev; + struct nvm_geo *geo = &ndev->geo; + + switch (geo->major_ver_id) { + case 1: + sysfs_remove_group(&disk_to_dev(ns->disk)->kobj, + &nvm_dev_attr_group_12); + break; + case 2: + sysfs_remove_group(&disk_to_dev(ns->disk)->kobj, + &nvm_dev_attr_group_20); + break; + } } diff --git a/drivers/nvme/host/multipath.c b/drivers/nvme/host/multipath.c index 060f69e03427..956e0b8e9c4d 100644 --- a/drivers/nvme/host/multipath.c +++ b/drivers/nvme/host/multipath.c @@ -44,12 +44,12 @@ void nvme_kick_requeue_lists(struct nvme_ctrl *ctrl) { struct nvme_ns *ns; - mutex_lock(&ctrl->namespaces_mutex); + down_read(&ctrl->namespaces_rwsem); list_for_each_entry(ns, &ctrl->namespaces, list) { if (ns->head->disk) kblockd_schedule_work(&ns->head->requeue_work); } - mutex_unlock(&ctrl->namespaces_mutex); + up_read(&ctrl->namespaces_rwsem); } static struct nvme_ns *__nvme_find_path(struct nvme_ns_head *head) @@ -162,13 +162,13 @@ int nvme_mpath_alloc_disk(struct nvme_ctrl *ctrl, struct nvme_ns_head *head) if (!(ctrl->subsys->cmic & (1 << 1)) || !multipath) return 0; - q = blk_alloc_queue_node(GFP_KERNEL, NUMA_NO_NODE); + q = blk_alloc_queue_node(GFP_KERNEL, NUMA_NO_NODE, NULL); if (!q) goto out; q->queuedata = head; blk_queue_make_request(q, nvme_ns_head_make_request); q->poll_fn = nvme_ns_head_poll; - queue_flag_set_unlocked(QUEUE_FLAG_NONROT, q); + blk_queue_flag_set(QUEUE_FLAG_NONROT, q); /* set to a default value for 512 until disk is validated */ blk_queue_logical_block_size(q, 512); diff --git a/drivers/nvme/host/nvme.h b/drivers/nvme/host/nvme.h index d733b14ede9d..cf93690b3ffc 100644 --- a/drivers/nvme/host/nvme.h +++ b/drivers/nvme/host/nvme.h @@ -21,6 +21,7 @@ #include <linux/blk-mq.h> #include <linux/lightnvm.h> #include <linux/sed-opal.h> +#include <linux/fault-inject.h> extern unsigned int nvme_io_timeout; #define NVME_IO_TIMEOUT (nvme_io_timeout * HZ) @@ -140,7 +141,7 @@ struct nvme_ctrl { struct blk_mq_tag_set *tagset; struct blk_mq_tag_set *admin_tagset; struct list_head namespaces; - struct mutex namespaces_mutex; + struct rw_semaphore namespaces_rwsem; struct device ctrl_device; struct device *device; /* char device */ struct cdev cdev; @@ -261,6 +262,15 @@ struct nvme_ns_head { int instance; }; +#ifdef CONFIG_FAULT_INJECTION_DEBUG_FS +struct nvme_fault_inject { + struct fault_attr attr; + struct dentry *parent; + bool dont_retry; /* DNR, do not retry */ + u16 status; /* status code */ +}; +#endif + struct nvme_ns { struct list_head list; @@ -282,6 +292,11 @@ struct nvme_ns { #define NVME_NS_REMOVING 0 #define NVME_NS_DEAD 1 u16 noiob; + +#ifdef CONFIG_FAULT_INJECTION_DEBUG_FS + struct nvme_fault_inject fault_inject; +#endif + }; struct nvme_ctrl_ops { @@ -298,8 +313,19 @@ struct nvme_ctrl_ops { void (*delete_ctrl)(struct nvme_ctrl *ctrl); int (*get_address)(struct nvme_ctrl *ctrl, char *buf, int size); int (*reinit_request)(void *data, struct request *rq); + void (*stop_ctrl)(struct nvme_ctrl *ctrl); }; +#ifdef CONFIG_FAULT_INJECTION_DEBUG_FS +void nvme_fault_inject_init(struct nvme_ns *ns); +void nvme_fault_inject_fini(struct nvme_ns *ns); +void nvme_should_fail(struct request *req); +#else +static inline void nvme_fault_inject_init(struct nvme_ns *ns) {} +static inline void nvme_fault_inject_fini(struct nvme_ns *ns) {} +static inline void nvme_should_fail(struct request *req) {} +#endif + static inline bool nvme_ctrl_ready(struct nvme_ctrl *ctrl) { u32 val = 0; @@ -336,6 +362,8 @@ static inline void nvme_end_request(struct request *req, __le16 status, rq->status = le16_to_cpu(status) >> 1; rq->result = result; + /* inject error when permitted by fault injection framework */ + nvme_should_fail(req); blk_mq_complete_request(req); } @@ -401,6 +429,9 @@ int nvme_reset_ctrl_sync(struct nvme_ctrl *ctrl); int nvme_delete_ctrl(struct nvme_ctrl *ctrl); int nvme_delete_ctrl_sync(struct nvme_ctrl *ctrl); +int nvme_get_log_ext(struct nvme_ctrl *ctrl, struct nvme_ns *ns, + u8 log_page, void *log, size_t size, size_t offset); + extern const struct attribute_group nvme_ns_id_attr_group; extern const struct block_device_operations nvme_ns_head_ops; @@ -461,12 +492,14 @@ static inline void nvme_mpath_check_last_path(struct nvme_ns *ns) #endif /* CONFIG_NVME_MULTIPATH */ #ifdef CONFIG_NVM +void nvme_nvm_update_nvm_info(struct nvme_ns *ns); int nvme_nvm_register(struct nvme_ns *ns, char *disk_name, int node); void nvme_nvm_unregister(struct nvme_ns *ns); int nvme_nvm_register_sysfs(struct nvme_ns *ns); void nvme_nvm_unregister_sysfs(struct nvme_ns *ns); int nvme_nvm_ioctl(struct nvme_ns *ns, unsigned int cmd, unsigned long arg); #else +static inline void nvme_nvm_update_nvm_info(struct nvme_ns *ns) {}; static inline int nvme_nvm_register(struct nvme_ns *ns, char *disk_name, int node) { diff --git a/drivers/nvme/host/pci.c b/drivers/nvme/host/pci.c index b6f43b738f03..295fbec1e5f2 100644 --- a/drivers/nvme/host/pci.c +++ b/drivers/nvme/host/pci.c @@ -414,7 +414,7 @@ static int nvme_pci_map_queues(struct blk_mq_tag_set *set) { struct nvme_dev *dev = set->driver_data; - return blk_mq_pci_map_queues(set, to_pci_dev(dev->dev)); + return blk_mq_pci_map_queues(set, to_pci_dev(dev->dev), 0); } /** @@ -2197,7 +2197,11 @@ static void nvme_dev_disable(struct nvme_dev *dev, bool shutdown) if (!dead) { if (shutdown) nvme_wait_freeze_timeout(&dev->ctrl, NVME_IO_TIMEOUT); + } + + nvme_stop_queues(&dev->ctrl); + if (!dead) { /* * If the controller is still alive tell it to stop using the * host memory buffer. In theory the shutdown / reset should @@ -2206,11 +2210,6 @@ static void nvme_dev_disable(struct nvme_dev *dev, bool shutdown) */ if (dev->host_mem_descs) nvme_set_host_mem(dev, 0); - - } - nvme_stop_queues(&dev->ctrl); - - if (!dead) { nvme_disable_io_queues(dev); nvme_disable_admin_queue(dev, shutdown); } @@ -2416,6 +2415,13 @@ static int nvme_pci_reg_read64(struct nvme_ctrl *ctrl, u32 off, u64 *val) return 0; } +static int nvme_pci_get_address(struct nvme_ctrl *ctrl, char *buf, int size) +{ + struct pci_dev *pdev = to_pci_dev(to_nvme_dev(ctrl)->dev); + + return snprintf(buf, size, "%s", dev_name(&pdev->dev)); +} + static const struct nvme_ctrl_ops nvme_pci_ctrl_ops = { .name = "pcie", .module = THIS_MODULE, @@ -2425,6 +2431,7 @@ static const struct nvme_ctrl_ops nvme_pci_ctrl_ops = { .reg_read64 = nvme_pci_reg_read64, .free_ctrl = nvme_pci_free_ctrl, .submit_async_event = nvme_pci_submit_async_event, + .get_address = nvme_pci_get_address, }; static int nvme_dev_map(struct nvme_dev *dev) @@ -2461,10 +2468,13 @@ static unsigned long check_vendor_combination_bug(struct pci_dev *pdev) } else if (pdev->vendor == 0x144d && pdev->device == 0xa804) { /* * Samsung SSD 960 EVO drops off the PCIe bus after system - * suspend on a Ryzen board, ASUS PRIME B350M-A. + * suspend on a Ryzen board, ASUS PRIME B350M-A, as well as + * within few minutes after bootup on a Coffee Lake board - + * ASUS PRIME Z370-A */ if (dmi_match(DMI_BOARD_VENDOR, "ASUSTeK COMPUTER INC.") && - dmi_match(DMI_BOARD_NAME, "PRIME B350M-A")) + (dmi_match(DMI_BOARD_NAME, "PRIME B350M-A") || + dmi_match(DMI_BOARD_NAME, "PRIME Z370-A"))) return NVME_QUIRK_NO_APST; } diff --git a/drivers/nvme/host/rdma.c b/drivers/nvme/host/rdma.c index 4d84a73ee12d..758537e9ba07 100644 --- a/drivers/nvme/host/rdma.c +++ b/drivers/nvme/host/rdma.c @@ -867,6 +867,14 @@ out_free_io_queues: return ret; } +static void nvme_rdma_stop_ctrl(struct nvme_ctrl *nctrl) +{ + struct nvme_rdma_ctrl *ctrl = to_rdma_ctrl(nctrl); + + cancel_work_sync(&ctrl->err_work); + cancel_delayed_work_sync(&ctrl->reconnect_work); +} + static void nvme_rdma_free_ctrl(struct nvme_ctrl *nctrl) { struct nvme_rdma_ctrl *ctrl = to_rdma_ctrl(nctrl); @@ -899,7 +907,6 @@ static void nvme_rdma_reconnect_or_remove(struct nvme_rdma_ctrl *ctrl) queue_delayed_work(nvme_wq, &ctrl->reconnect_work, ctrl->ctrl.opts->reconnect_delay * HZ); } else { - dev_info(ctrl->ctrl.device, "Removing controller...\n"); nvme_delete_ctrl(&ctrl->ctrl); } } @@ -974,8 +981,8 @@ static void nvme_rdma_error_recovery_work(struct work_struct *work) nvme_start_queues(&ctrl->ctrl); if (!nvme_change_ctrl_state(&ctrl->ctrl, NVME_CTRL_CONNECTING)) { - /* state change failure should never happen */ - WARN_ON_ONCE(1); + /* state change failure is ok if we're in DELETING state */ + WARN_ON_ONCE(ctrl->ctrl.state != NVME_CTRL_DELETING); return; } @@ -1719,9 +1726,6 @@ static const struct blk_mq_ops nvme_rdma_admin_mq_ops = { static void nvme_rdma_shutdown_ctrl(struct nvme_rdma_ctrl *ctrl, bool shutdown) { - cancel_work_sync(&ctrl->err_work); - cancel_delayed_work_sync(&ctrl->reconnect_work); - if (ctrl->ctrl.queue_count > 1) { nvme_stop_queues(&ctrl->ctrl); blk_mq_tagset_busy_iter(&ctrl->tag_set, @@ -1799,6 +1803,7 @@ static const struct nvme_ctrl_ops nvme_rdma_ctrl_ops = { .submit_async_event = nvme_rdma_submit_async_event, .delete_ctrl = nvme_rdma_delete_ctrl, .get_address = nvmf_get_address, + .stop_ctrl = nvme_rdma_stop_ctrl, }; static inline bool @@ -2025,15 +2030,26 @@ static struct nvmf_transport_ops nvme_rdma_transport = { static void nvme_rdma_remove_one(struct ib_device *ib_device, void *client_data) { struct nvme_rdma_ctrl *ctrl; + struct nvme_rdma_device *ndev; + bool found = false; + + mutex_lock(&device_list_mutex); + list_for_each_entry(ndev, &device_list, entry) { + if (ndev->dev == ib_device) { + found = true; + break; + } + } + mutex_unlock(&device_list_mutex); + + if (!found) + return; /* Delete all controllers using this device */ mutex_lock(&nvme_rdma_ctrl_mutex); list_for_each_entry(ctrl, &nvme_rdma_ctrl_list, list) { if (ctrl->device->dev != ib_device) continue; - dev_info(ctrl->ctrl.device, - "Removing ctrl: NQN \"%s\", addr %pISp\n", - ctrl->ctrl.opts->subsysnqn, &ctrl->addr); nvme_delete_ctrl(&ctrl->ctrl); } mutex_unlock(&nvme_rdma_ctrl_mutex); diff --git a/drivers/nvme/target/configfs.c b/drivers/nvme/target/configfs.c index e6b2d2af81b6..ad9ff27234b5 100644 --- a/drivers/nvme/target/configfs.c +++ b/drivers/nvme/target/configfs.c @@ -23,6 +23,15 @@ static const struct config_item_type nvmet_host_type; static const struct config_item_type nvmet_subsys_type; +static const struct nvmet_transport_name { + u8 type; + const char *name; +} nvmet_transport_names[] = { + { NVMF_TRTYPE_RDMA, "rdma" }, + { NVMF_TRTYPE_FC, "fc" }, + { NVMF_TRTYPE_LOOP, "loop" }, +}; + /* * nvmet_port Generic ConfigFS definitions. * Used in any place in the ConfigFS tree that refers to an address. @@ -208,43 +217,30 @@ CONFIGFS_ATTR(nvmet_, addr_trsvcid); static ssize_t nvmet_addr_trtype_show(struct config_item *item, char *page) { - switch (to_nvmet_port(item)->disc_addr.trtype) { - case NVMF_TRTYPE_RDMA: - return sprintf(page, "rdma\n"); - case NVMF_TRTYPE_LOOP: - return sprintf(page, "loop\n"); - case NVMF_TRTYPE_FC: - return sprintf(page, "fc\n"); - default: - return sprintf(page, "\n"); + struct nvmet_port *port = to_nvmet_port(item); + int i; + + for (i = 0; i < ARRAY_SIZE(nvmet_transport_names); i++) { + if (port->disc_addr.trtype != nvmet_transport_names[i].type) + continue; + return sprintf(page, "%s\n", nvmet_transport_names[i].name); } + + return sprintf(page, "\n"); } static void nvmet_port_init_tsas_rdma(struct nvmet_port *port) { - port->disc_addr.trtype = NVMF_TRTYPE_RDMA; - memset(&port->disc_addr.tsas.rdma, 0, NVMF_TSAS_SIZE); port->disc_addr.tsas.rdma.qptype = NVMF_RDMA_QPTYPE_CONNECTED; port->disc_addr.tsas.rdma.prtype = NVMF_RDMA_PRTYPE_NOT_SPECIFIED; port->disc_addr.tsas.rdma.cms = NVMF_RDMA_CMS_RDMA_CM; } -static void nvmet_port_init_tsas_loop(struct nvmet_port *port) -{ - port->disc_addr.trtype = NVMF_TRTYPE_LOOP; - memset(&port->disc_addr.tsas, 0, NVMF_TSAS_SIZE); -} - -static void nvmet_port_init_tsas_fc(struct nvmet_port *port) -{ - port->disc_addr.trtype = NVMF_TRTYPE_FC; - memset(&port->disc_addr.tsas, 0, NVMF_TSAS_SIZE); -} - static ssize_t nvmet_addr_trtype_store(struct config_item *item, const char *page, size_t count) { struct nvmet_port *port = to_nvmet_port(item); + int i; if (port->enabled) { pr_err("Cannot modify address while enabled\n"); @@ -252,17 +248,18 @@ static ssize_t nvmet_addr_trtype_store(struct config_item *item, return -EACCES; } - if (sysfs_streq(page, "rdma")) { - nvmet_port_init_tsas_rdma(port); - } else if (sysfs_streq(page, "loop")) { - nvmet_port_init_tsas_loop(port); - } else if (sysfs_streq(page, "fc")) { - nvmet_port_init_tsas_fc(port); - } else { - pr_err("Invalid value '%s' for trtype\n", page); - return -EINVAL; + for (i = 0; i < ARRAY_SIZE(nvmet_transport_names); i++) { + if (sysfs_streq(page, nvmet_transport_names[i].name)) + goto found; } + pr_err("Invalid value '%s' for trtype\n", page); + return -EINVAL; +found: + memset(&port->disc_addr.tsas, 0, NVMF_TSAS_SIZE); + port->disc_addr.trtype = nvmet_transport_names[i].type; + if (port->disc_addr.trtype == NVMF_TRTYPE_RDMA) + nvmet_port_init_tsas_rdma(port); return count; } @@ -333,13 +330,13 @@ out_unlock: return ret ? ret : count; } +CONFIGFS_ATTR(nvmet_ns_, device_uuid); + static ssize_t nvmet_ns_device_nguid_show(struct config_item *item, char *page) { return sprintf(page, "%pUb\n", &to_nvmet_ns(item)->nguid); } -CONFIGFS_ATTR(nvmet_ns_, device_uuid); - static ssize_t nvmet_ns_device_nguid_store(struct config_item *item, const char *page, size_t count) { diff --git a/drivers/nvme/target/core.c b/drivers/nvme/target/core.c index a78029e4e5f4..e95424f172fd 100644 --- a/drivers/nvme/target/core.c +++ b/drivers/nvme/target/core.c @@ -18,7 +18,7 @@ #include "nvmet.h" -static struct nvmet_fabrics_ops *nvmet_transports[NVMF_TRTYPE_MAX]; +static const struct nvmet_fabrics_ops *nvmet_transports[NVMF_TRTYPE_MAX]; static DEFINE_IDA(cntlid_ida); /* @@ -137,7 +137,7 @@ static void nvmet_add_async_event(struct nvmet_ctrl *ctrl, u8 event_type, schedule_work(&ctrl->async_event_work); } -int nvmet_register_transport(struct nvmet_fabrics_ops *ops) +int nvmet_register_transport(const struct nvmet_fabrics_ops *ops) { int ret = 0; @@ -152,7 +152,7 @@ int nvmet_register_transport(struct nvmet_fabrics_ops *ops) } EXPORT_SYMBOL_GPL(nvmet_register_transport); -void nvmet_unregister_transport(struct nvmet_fabrics_ops *ops) +void nvmet_unregister_transport(const struct nvmet_fabrics_ops *ops) { down_write(&nvmet_config_sem); nvmet_transports[ops->type] = NULL; @@ -162,7 +162,7 @@ EXPORT_SYMBOL_GPL(nvmet_unregister_transport); int nvmet_enable_port(struct nvmet_port *port) { - struct nvmet_fabrics_ops *ops; + const struct nvmet_fabrics_ops *ops; int ret; lockdep_assert_held(&nvmet_config_sem); @@ -195,7 +195,7 @@ int nvmet_enable_port(struct nvmet_port *port) void nvmet_disable_port(struct nvmet_port *port) { - struct nvmet_fabrics_ops *ops; + const struct nvmet_fabrics_ops *ops; lockdep_assert_held(&nvmet_config_sem); @@ -500,7 +500,7 @@ int nvmet_sq_init(struct nvmet_sq *sq) EXPORT_SYMBOL_GPL(nvmet_sq_init); bool nvmet_req_init(struct nvmet_req *req, struct nvmet_cq *cq, - struct nvmet_sq *sq, struct nvmet_fabrics_ops *ops) + struct nvmet_sq *sq, const struct nvmet_fabrics_ops *ops) { u8 flags = req->cmd->common.flags; u16 status; diff --git a/drivers/nvme/target/discovery.c b/drivers/nvme/target/discovery.c index 8f3b57b4c97b..a72425d8bce0 100644 --- a/drivers/nvme/target/discovery.c +++ b/drivers/nvme/target/discovery.c @@ -43,7 +43,8 @@ void nvmet_referral_disable(struct nvmet_port *port) } static void nvmet_format_discovery_entry(struct nvmf_disc_rsp_page_hdr *hdr, - struct nvmet_port *port, char *subsys_nqn, u8 type, u32 numrec) + struct nvmet_port *port, char *subsys_nqn, char *traddr, + u8 type, u32 numrec) { struct nvmf_disc_rsp_page_entry *e = &hdr->entries[numrec]; @@ -56,11 +57,30 @@ static void nvmet_format_discovery_entry(struct nvmf_disc_rsp_page_hdr *hdr, e->asqsz = cpu_to_le16(NVME_AQ_DEPTH); e->subtype = type; memcpy(e->trsvcid, port->disc_addr.trsvcid, NVMF_TRSVCID_SIZE); - memcpy(e->traddr, port->disc_addr.traddr, NVMF_TRADDR_SIZE); + memcpy(e->traddr, traddr, NVMF_TRADDR_SIZE); memcpy(e->tsas.common, port->disc_addr.tsas.common, NVMF_TSAS_SIZE); memcpy(e->subnqn, subsys_nqn, NVMF_NQN_SIZE); } +/* + * nvmet_set_disc_traddr - set a correct discovery log entry traddr + * + * IP based transports (e.g RDMA) can listen on "any" ipv4/ipv6 addresses + * (INADDR_ANY or IN6ADDR_ANY_INIT). The discovery log page traddr reply + * must not contain that "any" IP address. If the transport implements + * .disc_traddr, use it. this callback will set the discovery traddr + * from the req->port address in case the port in question listens + * "any" IP address. + */ +static void nvmet_set_disc_traddr(struct nvmet_req *req, struct nvmet_port *port, + char *traddr) +{ + if (req->ops->disc_traddr) + req->ops->disc_traddr(req, port, traddr); + else + memcpy(traddr, port->disc_addr.traddr, NVMF_TRADDR_SIZE); +} + static void nvmet_execute_get_disc_log_page(struct nvmet_req *req) { const int entry_size = sizeof(struct nvmf_disc_rsp_page_entry); @@ -90,8 +110,11 @@ static void nvmet_execute_get_disc_log_page(struct nvmet_req *req) if (!nvmet_host_allowed(req, p->subsys, ctrl->hostnqn)) continue; if (residual_len >= entry_size) { + char traddr[NVMF_TRADDR_SIZE]; + + nvmet_set_disc_traddr(req, req->port, traddr); nvmet_format_discovery_entry(hdr, req->port, - p->subsys->subsysnqn, + p->subsys->subsysnqn, traddr, NVME_NQN_NVME, numrec); residual_len -= entry_size; } @@ -102,6 +125,7 @@ static void nvmet_execute_get_disc_log_page(struct nvmet_req *req) if (residual_len >= entry_size) { nvmet_format_discovery_entry(hdr, r, NVME_DISC_SUBSYS_NAME, + r->disc_addr.traddr, NVME_NQN_DISC, numrec); residual_len -= entry_size; } diff --git a/drivers/nvme/target/fc.c b/drivers/nvme/target/fc.c index 9b39a6cb1935..33ee8d3145f8 100644 --- a/drivers/nvme/target/fc.c +++ b/drivers/nvme/target/fc.c @@ -87,6 +87,7 @@ struct nvmet_fc_fcp_iod { struct nvmet_req req; struct work_struct work; struct work_struct done_work; + struct work_struct defer_work; struct nvmet_fc_tgtport *tgtport; struct nvmet_fc_tgt_queue *queue; @@ -224,6 +225,7 @@ static DEFINE_IDA(nvmet_fc_tgtport_cnt); static void nvmet_fc_handle_ls_rqst_work(struct work_struct *work); static void nvmet_fc_handle_fcp_rqst_work(struct work_struct *work); static void nvmet_fc_fcp_rqst_op_done_work(struct work_struct *work); +static void nvmet_fc_fcp_rqst_op_defer_work(struct work_struct *work); static void nvmet_fc_tgt_a_put(struct nvmet_fc_tgt_assoc *assoc); static int nvmet_fc_tgt_a_get(struct nvmet_fc_tgt_assoc *assoc); static void nvmet_fc_tgt_q_put(struct nvmet_fc_tgt_queue *queue); @@ -429,6 +431,7 @@ nvmet_fc_prep_fcp_iodlist(struct nvmet_fc_tgtport *tgtport, for (i = 0; i < queue->sqsize; fod++, i++) { INIT_WORK(&fod->work, nvmet_fc_handle_fcp_rqst_work); INIT_WORK(&fod->done_work, nvmet_fc_fcp_rqst_op_done_work); + INIT_WORK(&fod->defer_work, nvmet_fc_fcp_rqst_op_defer_work); fod->tgtport = tgtport; fod->queue = queue; fod->active = false; @@ -512,6 +515,17 @@ nvmet_fc_queue_fcp_req(struct nvmet_fc_tgtport *tgtport, } static void +nvmet_fc_fcp_rqst_op_defer_work(struct work_struct *work) +{ + struct nvmet_fc_fcp_iod *fod = + container_of(work, struct nvmet_fc_fcp_iod, defer_work); + + /* Submit deferred IO for processing */ + nvmet_fc_queue_fcp_req(fod->tgtport, fod->queue, fod->fcpreq); + +} + +static void nvmet_fc_free_fcp_iod(struct nvmet_fc_tgt_queue *queue, struct nvmet_fc_fcp_iod *fod) { @@ -568,13 +582,12 @@ nvmet_fc_free_fcp_iod(struct nvmet_fc_tgt_queue *queue, /* inform LLDD IO is now being processed */ tgtport->ops->defer_rcv(&tgtport->fc_target_port, fcpreq); - /* Submit deferred IO for processing */ - nvmet_fc_queue_fcp_req(tgtport, queue, fcpreq); - /* * Leave the queue lookup get reference taken when * fod was originally allocated. */ + + queue_work(queue->work_q, &fod->defer_work); } static int @@ -1550,7 +1563,7 @@ nvmet_fc_ls_disconnect(struct nvmet_fc_tgtport *tgtport, static void nvmet_fc_fcp_nvme_cmd_done(struct nvmet_req *nvme_req); -static struct nvmet_fabrics_ops nvmet_fc_tgt_fcp_ops; +static const struct nvmet_fabrics_ops nvmet_fc_tgt_fcp_ops; static void nvmet_fc_xmt_ls_rsp_done(struct nvmefc_tgt_ls_req *lsreq) @@ -2505,7 +2518,7 @@ nvmet_fc_remove_port(struct nvmet_port *port) /* nothing to do */ } -static struct nvmet_fabrics_ops nvmet_fc_tgt_fcp_ops = { +static const struct nvmet_fabrics_ops nvmet_fc_tgt_fcp_ops = { .owner = THIS_MODULE, .type = NVMF_TRTYPE_FC, .msdbd = 1, diff --git a/drivers/nvme/target/loop.c b/drivers/nvme/target/loop.c index 861d1509b22b..a350765d2d5c 100644 --- a/drivers/nvme/target/loop.c +++ b/drivers/nvme/target/loop.c @@ -71,7 +71,7 @@ static DEFINE_MUTEX(nvme_loop_ctrl_mutex); static void nvme_loop_queue_response(struct nvmet_req *nvme_req); static void nvme_loop_delete_ctrl(struct nvmet_ctrl *ctrl); -static struct nvmet_fabrics_ops nvme_loop_ops; +static const struct nvmet_fabrics_ops nvme_loop_ops; static inline int nvme_loop_queue_idx(struct nvme_loop_queue *queue) { @@ -675,7 +675,7 @@ static void nvme_loop_remove_port(struct nvmet_port *port) nvmet_loop_port = NULL; } -static struct nvmet_fabrics_ops nvme_loop_ops = { +static const struct nvmet_fabrics_ops nvme_loop_ops = { .owner = THIS_MODULE, .type = NVMF_TRTYPE_LOOP, .add_port = nvme_loop_add_port, diff --git a/drivers/nvme/target/nvmet.h b/drivers/nvme/target/nvmet.h index 417f6c0331cc..15fd84ab21f8 100644 --- a/drivers/nvme/target/nvmet.h +++ b/drivers/nvme/target/nvmet.h @@ -130,7 +130,7 @@ struct nvmet_ctrl { struct delayed_work ka_work; struct work_struct fatal_err_work; - struct nvmet_fabrics_ops *ops; + const struct nvmet_fabrics_ops *ops; char subsysnqn[NVMF_NQN_FIELD_LEN]; char hostnqn[NVMF_NQN_FIELD_LEN]; @@ -209,6 +209,8 @@ struct nvmet_fabrics_ops { int (*add_port)(struct nvmet_port *port); void (*remove_port)(struct nvmet_port *port); void (*delete_ctrl)(struct nvmet_ctrl *ctrl); + void (*disc_traddr)(struct nvmet_req *req, + struct nvmet_port *port, char *traddr); }; #define NVMET_MAX_INLINE_BIOVEC 8 @@ -231,7 +233,7 @@ struct nvmet_req { struct nvmet_port *port; void (*execute)(struct nvmet_req *req); - struct nvmet_fabrics_ops *ops; + const struct nvmet_fabrics_ops *ops; }; static inline void nvmet_set_status(struct nvmet_req *req, u16 status) @@ -267,7 +269,7 @@ u16 nvmet_parse_discovery_cmd(struct nvmet_req *req); u16 nvmet_parse_fabrics_cmd(struct nvmet_req *req); bool nvmet_req_init(struct nvmet_req *req, struct nvmet_cq *cq, - struct nvmet_sq *sq, struct nvmet_fabrics_ops *ops); + struct nvmet_sq *sq, const struct nvmet_fabrics_ops *ops); void nvmet_req_uninit(struct nvmet_req *req); void nvmet_req_execute(struct nvmet_req *req); void nvmet_req_complete(struct nvmet_req *req, u16 status); @@ -301,8 +303,8 @@ void nvmet_ns_disable(struct nvmet_ns *ns); struct nvmet_ns *nvmet_ns_alloc(struct nvmet_subsys *subsys, u32 nsid); void nvmet_ns_free(struct nvmet_ns *ns); -int nvmet_register_transport(struct nvmet_fabrics_ops *ops); -void nvmet_unregister_transport(struct nvmet_fabrics_ops *ops); +int nvmet_register_transport(const struct nvmet_fabrics_ops *ops); +void nvmet_unregister_transport(const struct nvmet_fabrics_ops *ops); int nvmet_enable_port(struct nvmet_port *port); void nvmet_disable_port(struct nvmet_port *port); diff --git a/drivers/nvme/target/rdma.c b/drivers/nvme/target/rdma.c index 978e169c11bf..52e0c5d579a7 100644 --- a/drivers/nvme/target/rdma.c +++ b/drivers/nvme/target/rdma.c @@ -77,7 +77,6 @@ enum nvmet_rdma_queue_state { NVMET_RDMA_Q_CONNECTING, NVMET_RDMA_Q_LIVE, NVMET_RDMA_Q_DISCONNECTING, - NVMET_RDMA_IN_DEVICE_REMOVAL, }; struct nvmet_rdma_queue { @@ -137,7 +136,7 @@ static void nvmet_rdma_read_data_done(struct ib_cq *cq, struct ib_wc *wc); static void nvmet_rdma_qp_event(struct ib_event *event, void *priv); static void nvmet_rdma_queue_disconnect(struct nvmet_rdma_queue *queue); -static struct nvmet_fabrics_ops nvmet_rdma_ops; +static const struct nvmet_fabrics_ops nvmet_rdma_ops; /* XXX: really should move to a generic header sooner or later.. */ static inline u32 get_unaligned_le24(const u8 *p) @@ -914,8 +913,11 @@ err_destroy_cq: static void nvmet_rdma_destroy_queue_ib(struct nvmet_rdma_queue *queue) { - ib_drain_qp(queue->cm_id->qp); - rdma_destroy_qp(queue->cm_id); + struct ib_qp *qp = queue->cm_id->qp; + + ib_drain_qp(qp); + rdma_destroy_id(queue->cm_id); + ib_destroy_qp(qp); ib_free_cq(queue->cq); } @@ -940,15 +942,10 @@ static void nvmet_rdma_release_queue_work(struct work_struct *w) { struct nvmet_rdma_queue *queue = container_of(w, struct nvmet_rdma_queue, release_work); - struct rdma_cm_id *cm_id = queue->cm_id; struct nvmet_rdma_device *dev = queue->dev; - enum nvmet_rdma_queue_state state = queue->state; nvmet_rdma_free_queue(queue); - if (state != NVMET_RDMA_IN_DEVICE_REMOVAL) - rdma_destroy_id(cm_id); - kref_put(&dev->ref, nvmet_rdma_free_dev); } @@ -1153,8 +1150,11 @@ static int nvmet_rdma_queue_connect(struct rdma_cm_id *cm_id, } ret = nvmet_rdma_cm_accept(cm_id, queue, &event->param.conn); - if (ret) - goto release_queue; + if (ret) { + schedule_work(&queue->release_work); + /* Destroying rdma_cm id is not needed here */ + return 0; + } mutex_lock(&nvmet_rdma_queue_mutex); list_add_tail(&queue->queue_list, &nvmet_rdma_queue_list); @@ -1162,8 +1162,6 @@ static int nvmet_rdma_queue_connect(struct rdma_cm_id *cm_id, return 0; -release_queue: - nvmet_rdma_free_queue(queue); put_device: kref_put(&ndev->ref, nvmet_rdma_free_dev); @@ -1209,7 +1207,6 @@ static void __nvmet_rdma_queue_disconnect(struct nvmet_rdma_queue *queue) case NVMET_RDMA_Q_CONNECTING: case NVMET_RDMA_Q_LIVE: queue->state = NVMET_RDMA_Q_DISCONNECTING; - case NVMET_RDMA_IN_DEVICE_REMOVAL: disconnect = true; break; case NVMET_RDMA_Q_DISCONNECTING: @@ -1322,13 +1319,7 @@ static int nvmet_rdma_cm_handler(struct rdma_cm_id *cm_id, case RDMA_CM_EVENT_ADDR_CHANGE: case RDMA_CM_EVENT_DISCONNECTED: case RDMA_CM_EVENT_TIMEWAIT_EXIT: - /* - * We might end up here when we already freed the qp - * which means queue release sequence is in progress, - * so don't get in the way... - */ - if (queue) - nvmet_rdma_queue_disconnect(queue); + nvmet_rdma_queue_disconnect(queue); break; case RDMA_CM_EVENT_DEVICE_REMOVAL: ret = nvmet_rdma_device_removal(cm_id, queue); @@ -1445,7 +1436,24 @@ static void nvmet_rdma_remove_port(struct nvmet_port *port) rdma_destroy_id(cm_id); } -static struct nvmet_fabrics_ops nvmet_rdma_ops = { +static void nvmet_rdma_disc_port_addr(struct nvmet_req *req, + struct nvmet_port *port, char *traddr) +{ + struct rdma_cm_id *cm_id = port->priv; + + if (inet_addr_is_any((struct sockaddr *)&cm_id->route.addr.src_addr)) { + struct nvmet_rdma_rsp *rsp = + container_of(req, struct nvmet_rdma_rsp, req); + struct rdma_cm_id *req_cm_id = rsp->queue->cm_id; + struct sockaddr *addr = (void *)&req_cm_id->route.addr.src_addr; + + sprintf(traddr, "%pISc", addr); + } else { + memcpy(traddr, port->disc_addr.traddr, NVMF_TRADDR_SIZE); + } +} + +static const struct nvmet_fabrics_ops nvmet_rdma_ops = { .owner = THIS_MODULE, .type = NVMF_TRTYPE_RDMA, .sqe_inline_size = NVMET_RDMA_INLINE_DATA_SIZE, @@ -1455,13 +1463,31 @@ static struct nvmet_fabrics_ops nvmet_rdma_ops = { .remove_port = nvmet_rdma_remove_port, .queue_response = nvmet_rdma_queue_response, .delete_ctrl = nvmet_rdma_delete_ctrl, + .disc_traddr = nvmet_rdma_disc_port_addr, }; static void nvmet_rdma_remove_one(struct ib_device *ib_device, void *client_data) { struct nvmet_rdma_queue *queue, *tmp; + struct nvmet_rdma_device *ndev; + bool found = false; - /* Device is being removed, delete all queues using this device */ + mutex_lock(&device_list_mutex); + list_for_each_entry(ndev, &device_list, entry) { + if (ndev->device == ib_device) { + found = true; + break; + } + } + mutex_unlock(&device_list_mutex); + + if (!found) + return; + + /* + * IB Device that is used by nvmet controllers is being removed, + * delete all queues using this device. + */ mutex_lock(&nvmet_rdma_queue_mutex); list_for_each_entry_safe(queue, tmp, &nvmet_rdma_queue_list, queue_list) { diff --git a/drivers/s390/block/dasd.c b/drivers/s390/block/dasd.c index ecef8e73d40b..b5692a284bd8 100644 --- a/drivers/s390/block/dasd.c +++ b/drivers/s390/block/dasd.c @@ -3208,7 +3208,7 @@ static void dasd_setup_queue(struct dasd_block *block) } else { max = block->base->discipline->max_blocks << block->s2b_shift; } - queue_flag_set_unlocked(QUEUE_FLAG_NONROT, q); + blk_queue_flag_set(QUEUE_FLAG_NONROT, q); q->limits.max_dev_sectors = max; blk_queue_logical_block_size(q, logical_block_size); blk_queue_max_hw_sectors(q, max); @@ -3231,7 +3231,7 @@ static void dasd_setup_queue(struct dasd_block *block) blk_queue_max_discard_sectors(q, max_discard_sectors); blk_queue_max_write_zeroes_sectors(q, max_discard_sectors); - queue_flag_set_unlocked(QUEUE_FLAG_DISCARD, q); + blk_queue_flag_set(QUEUE_FLAG_DISCARD, q); } } diff --git a/drivers/s390/block/dcssblk.c b/drivers/s390/block/dcssblk.c index 9cae08b36b80..0a312e450207 100644 --- a/drivers/s390/block/dcssblk.c +++ b/drivers/s390/block/dcssblk.c @@ -633,7 +633,7 @@ dcssblk_add_store(struct device *dev, struct device_attribute *attr, const char dev_info->gd->private_data = dev_info; blk_queue_make_request(dev_info->dcssblk_queue, dcssblk_make_request); blk_queue_logical_block_size(dev_info->dcssblk_queue, 4096); - queue_flag_set_unlocked(QUEUE_FLAG_DAX, dev_info->dcssblk_queue); + blk_queue_flag_set(QUEUE_FLAG_DAX, dev_info->dcssblk_queue); seg_byte_size = (dev_info->end - dev_info->start + 1); set_capacity(dev_info->gd, seg_byte_size >> 9); // size in sectors diff --git a/drivers/s390/block/scm_blk.c b/drivers/s390/block/scm_blk.c index b4130c7880d8..b1fcb76dd272 100644 --- a/drivers/s390/block/scm_blk.c +++ b/drivers/s390/block/scm_blk.c @@ -472,8 +472,8 @@ int scm_blk_dev_setup(struct scm_blk_dev *bdev, struct scm_device *scmdev) blk_queue_logical_block_size(rq, 1 << 12); blk_queue_max_hw_sectors(rq, nr_max_blk << 3); /* 8 * 512 = blk_size */ blk_queue_max_segments(rq, nr_max_blk); - queue_flag_set_unlocked(QUEUE_FLAG_NONROT, rq); - queue_flag_clear_unlocked(QUEUE_FLAG_ADD_RANDOM, rq); + blk_queue_flag_set(QUEUE_FLAG_NONROT, rq); + blk_queue_flag_clear(QUEUE_FLAG_ADD_RANDOM, rq); bdev->gendisk = alloc_disk(SCM_NR_PARTS); if (!bdev->gendisk) { diff --git a/drivers/s390/block/xpram.c b/drivers/s390/block/xpram.c index 2a6334ca750e..3df5d68d09f0 100644 --- a/drivers/s390/block/xpram.c +++ b/drivers/s390/block/xpram.c @@ -348,8 +348,8 @@ static int __init xpram_setup_blkdev(void) put_disk(xpram_disks[i]); goto out; } - queue_flag_set_unlocked(QUEUE_FLAG_NONROT, xpram_queues[i]); - queue_flag_clear_unlocked(QUEUE_FLAG_ADD_RANDOM, xpram_queues[i]); + blk_queue_flag_set(QUEUE_FLAG_NONROT, xpram_queues[i]); + blk_queue_flag_clear(QUEUE_FLAG_ADD_RANDOM, xpram_queues[i]); blk_queue_make_request(xpram_queues[i], xpram_make_request); blk_queue_logical_block_size(xpram_queues[i], 4096); } diff --git a/drivers/s390/scsi/zfcp_fc.c b/drivers/s390/scsi/zfcp_fc.c index ca218c82321f..6162cf57a20a 100644 --- a/drivers/s390/scsi/zfcp_fc.c +++ b/drivers/s390/scsi/zfcp_fc.c @@ -961,7 +961,7 @@ static int zfcp_fc_exec_els_job(struct bsg_job *job, d_id = ntoh24(bsg_request->rqst_data.h_els.port_id); els->handler = zfcp_fc_ct_els_job_handler; - return zfcp_fsf_send_els(adapter, d_id, els, job->req->timeout / HZ); + return zfcp_fsf_send_els(adapter, d_id, els, job->timeout / HZ); } static int zfcp_fc_exec_ct_job(struct bsg_job *job, @@ -980,7 +980,7 @@ static int zfcp_fc_exec_ct_job(struct bsg_job *job, return ret; ct->handler = zfcp_fc_ct_job_handler; - ret = zfcp_fsf_send_ct(wka_port, ct, NULL, job->req->timeout / HZ); + ret = zfcp_fsf_send_ct(wka_port, ct, NULL, job->timeout / HZ); if (ret) zfcp_fc_wka_port_put(wka_port); diff --git a/drivers/scsi/gdth.h b/drivers/scsi/gdth.h index 95fc720c1b30..e6e5ccb1e0f3 100644 --- a/drivers/scsi/gdth.h +++ b/drivers/scsi/gdth.h @@ -178,9 +178,6 @@ #define MSG_SIZE 34 /* size of message structure */ #define MSG_REQUEST 0 /* async. event: message */ -/* cacheservice defines */ -#define SECTOR_SIZE 0x200 /* always 512 bytes per sec. */ - /* DPMEM constants */ #define DPMEM_MAGIC 0xC0FFEE11 #define IC_HEADER_BYTES 48 diff --git a/drivers/scsi/iscsi_tcp.c b/drivers/scsi/iscsi_tcp.c index 6d886b13dbe9..2ba4b68fdb73 100644 --- a/drivers/scsi/iscsi_tcp.c +++ b/drivers/scsi/iscsi_tcp.c @@ -949,7 +949,7 @@ static umode_t iscsi_sw_tcp_attr_is_visible(int param_type, int param) static int iscsi_sw_tcp_slave_alloc(struct scsi_device *sdev) { - set_bit(QUEUE_FLAG_BIDI, &sdev->request_queue->queue_flags); + blk_queue_flag_set(QUEUE_FLAG_BIDI, sdev->request_queue); return 0; } diff --git a/drivers/scsi/megaraid/megaraid_sas_base.c b/drivers/scsi/megaraid/megaraid_sas_base.c index dde0798b8a91..7a37ace4239b 100644 --- a/drivers/scsi/megaraid/megaraid_sas_base.c +++ b/drivers/scsi/megaraid/megaraid_sas_base.c @@ -1864,7 +1864,7 @@ megasas_set_nvme_device_properties(struct scsi_device *sdev, u32 max_io_size) blk_queue_max_hw_sectors(sdev->request_queue, (max_io_size / 512)); - queue_flag_set_unlocked(QUEUE_FLAG_NOMERGES, sdev->request_queue); + blk_queue_flag_set(QUEUE_FLAG_NOMERGES, sdev->request_queue); blk_queue_virt_boundary(sdev->request_queue, mr_nvme_pg_size - 1); } diff --git a/drivers/scsi/megaraid/megaraid_sas_fusion.c b/drivers/scsi/megaraid/megaraid_sas_fusion.c index 5ec3b74e8aed..ce97cde3b41c 100644 --- a/drivers/scsi/megaraid/megaraid_sas_fusion.c +++ b/drivers/scsi/megaraid/megaraid_sas_fusion.c @@ -1894,7 +1894,7 @@ megasas_is_prp_possible(struct megasas_instance *instance, * then sending IOs with holes. * * Though driver can request block layer to disable IO merging by calling- - * queue_flag_set_unlocked(QUEUE_FLAG_NOMERGES, sdev->request_queue) but + * blk_queue_flag_set(QUEUE_FLAG_NOMERGES, sdev->request_queue) but * user may tune sysfs parameter- nomerges again to 0 or 1. * * If in future IO scheduling is enabled with SCSI BLK MQ, diff --git a/drivers/scsi/mpt3sas/mpt3sas_scsih.c b/drivers/scsi/mpt3sas/mpt3sas_scsih.c index a1cb0236c550..aee1a0e1c600 100644 --- a/drivers/scsi/mpt3sas/mpt3sas_scsih.c +++ b/drivers/scsi/mpt3sas/mpt3sas_scsih.c @@ -2352,7 +2352,7 @@ scsih_slave_configure(struct scsi_device *sdev) ** merged and can eliminate holes created during merging ** operation. **/ - queue_flag_set_unlocked(QUEUE_FLAG_NOMERGES, + blk_queue_flag_set(QUEUE_FLAG_NOMERGES, sdev->request_queue); blk_queue_virt_boundary(sdev->request_queue, ioc->page_size - 1); diff --git a/drivers/scsi/qla2xxx/qla_os.c b/drivers/scsi/qla2xxx/qla_os.c index 5c5dcca4d1da..822d22336e15 100644 --- a/drivers/scsi/qla2xxx/qla_os.c +++ b/drivers/scsi/qla2xxx/qla_os.c @@ -6830,7 +6830,7 @@ static int qla2xxx_map_queues(struct Scsi_Host *shost) if (USER_CTRL_IRQ(vha->hw)) rc = blk_mq_map_queues(&shost->tag_set); else - rc = blk_mq_pci_map_queues(&shost->tag_set, vha->hw->pdev); + rc = blk_mq_pci_map_queues(&shost->tag_set, vha->hw->pdev, 0); return rc; } diff --git a/drivers/scsi/scsi_debug.c b/drivers/scsi/scsi_debug.c index a5986dae9020..1cb353f18d08 100644 --- a/drivers/scsi/scsi_debug.c +++ b/drivers/scsi/scsi_debug.c @@ -3897,7 +3897,7 @@ static int scsi_debug_slave_alloc(struct scsi_device *sdp) if (sdebug_verbose) pr_info("slave_alloc <%u %u %u %llu>\n", sdp->host->host_no, sdp->channel, sdp->id, sdp->lun); - queue_flag_set_unlocked(QUEUE_FLAG_BIDI, sdp->request_queue); + blk_queue_flag_set(QUEUE_FLAG_BIDI, sdp->request_queue); return 0; } diff --git a/drivers/scsi/scsi_lib.c b/drivers/scsi/scsi_lib.c index c84f931388f2..ed79d3925860 100644 --- a/drivers/scsi/scsi_lib.c +++ b/drivers/scsi/scsi_lib.c @@ -2144,8 +2144,6 @@ void __scsi_init_queue(struct Scsi_Host *shost, struct request_queue *q) { struct device *dev = shost->dma_dev; - queue_flag_set_unlocked(QUEUE_FLAG_SCSI_PASSTHROUGH, q); - /* * this limit is imposed by hardware restrictions */ @@ -2227,7 +2225,7 @@ struct request_queue *scsi_old_alloc_queue(struct scsi_device *sdev) struct Scsi_Host *shost = sdev->host; struct request_queue *q; - q = blk_alloc_queue_node(GFP_KERNEL, NUMA_NO_NODE); + q = blk_alloc_queue_node(GFP_KERNEL, NUMA_NO_NODE, NULL); if (!q) return NULL; q->cmd_size = sizeof(struct scsi_cmnd) + shost->hostt->cmd_size; @@ -2243,6 +2241,7 @@ struct request_queue *scsi_old_alloc_queue(struct scsi_device *sdev) } __scsi_init_queue(shost, q); + blk_queue_flag_set(QUEUE_FLAG_SCSI_PASSTHROUGH, q); blk_queue_prep_rq(q, scsi_prep_fn); blk_queue_unprep_rq(q, scsi_unprep_fn); blk_queue_softirq_done(q, scsi_softirq_done); @@ -2274,6 +2273,7 @@ struct request_queue *scsi_mq_alloc_queue(struct scsi_device *sdev) sdev->request_queue->queuedata = sdev; __scsi_init_queue(sdev->host, sdev->request_queue); + blk_queue_flag_set(QUEUE_FLAG_SCSI_PASSTHROUGH, sdev->request_queue); return sdev->request_queue; } diff --git a/drivers/scsi/scsi_sysfs.c b/drivers/scsi/scsi_sysfs.c index 91b90f672d23..7142c8be1099 100644 --- a/drivers/scsi/scsi_sysfs.c +++ b/drivers/scsi/scsi_sysfs.c @@ -1292,8 +1292,7 @@ int scsi_sysfs_add_sdev(struct scsi_device *sdev) transport_add_device(&sdev->sdev_gendev); sdev->is_visible = 1; - error = bsg_register_queue(rq, &sdev->sdev_gendev, NULL, NULL); - + error = bsg_scsi_register_queue(rq, &sdev->sdev_gendev); if (error) /* we're treating error on bsg register as non-fatal, * so pretend nothing went wrong */ diff --git a/drivers/scsi/scsi_transport_sas.c b/drivers/scsi/scsi_transport_sas.c index 736a1f4f9676..08acbabfae07 100644 --- a/drivers/scsi/scsi_transport_sas.c +++ b/drivers/scsi/scsi_transport_sas.c @@ -227,8 +227,7 @@ static int sas_bsg_initialize(struct Scsi_Host *shost, struct sas_rphy *rphy) * by default assume old behaviour and bounce for any highmem page */ blk_queue_bounce_limit(q, BLK_BOUNCE_HIGH); - queue_flag_set_unlocked(QUEUE_FLAG_BIDI, q); - queue_flag_set_unlocked(QUEUE_FLAG_SCSI_PASSTHROUGH, q); + blk_queue_flag_set(QUEUE_FLAG_BIDI, q); return 0; } diff --git a/drivers/scsi/sd.c b/drivers/scsi/sd.c index 1fa84d6a0f8b..a6201e696ab9 100644 --- a/drivers/scsi/sd.c +++ b/drivers/scsi/sd.c @@ -714,7 +714,7 @@ static void sd_config_discard(struct scsi_disk *sdkp, unsigned int mode) case SD_LBP_FULL: case SD_LBP_DISABLE: blk_queue_max_discard_sectors(q, 0); - queue_flag_clear_unlocked(QUEUE_FLAG_DISCARD, q); + blk_queue_flag_clear(QUEUE_FLAG_DISCARD, q); return; case SD_LBP_UNMAP: @@ -747,7 +747,7 @@ static void sd_config_discard(struct scsi_disk *sdkp, unsigned int mode) } blk_queue_max_discard_sectors(q, max_blocks * (logical_block_size >> 9)); - queue_flag_set_unlocked(QUEUE_FLAG_DISCARD, q); + blk_queue_flag_set(QUEUE_FLAG_DISCARD, q); } static int sd_setup_unmap_cmnd(struct scsi_cmnd *cmd) @@ -2955,8 +2955,8 @@ static void sd_read_block_characteristics(struct scsi_disk *sdkp) rot = get_unaligned_be16(&buffer[4]); if (rot == 1) { - queue_flag_set_unlocked(QUEUE_FLAG_NONROT, q); - queue_flag_clear_unlocked(QUEUE_FLAG_ADD_RANDOM, q); + blk_queue_flag_set(QUEUE_FLAG_NONROT, q); + blk_queue_flag_clear(QUEUE_FLAG_ADD_RANDOM, q); } if (sdkp->device->type == TYPE_ZBC) { diff --git a/drivers/scsi/smartpqi/smartpqi_init.c b/drivers/scsi/smartpqi/smartpqi_init.c index b2880c7709e6..10c94011c8a8 100644 --- a/drivers/scsi/smartpqi/smartpqi_init.c +++ b/drivers/scsi/smartpqi/smartpqi_init.c @@ -5348,7 +5348,7 @@ static int pqi_map_queues(struct Scsi_Host *shost) { struct pqi_ctrl_info *ctrl_info = shost_to_hba(shost); - return blk_mq_pci_map_queues(&shost->tag_set, ctrl_info->pci_dev); + return blk_mq_pci_map_queues(&shost->tag_set, ctrl_info->pci_dev, 0); } static int pqi_getpciinfo_ioctl(struct pqi_ctrl_info *ctrl_info, diff --git a/drivers/scsi/sr.c b/drivers/scsi/sr.c index 9be34d37c356..0cf25d789d05 100644 --- a/drivers/scsi/sr.c +++ b/drivers/scsi/sr.c @@ -525,6 +525,8 @@ static int sr_block_open(struct block_device *bdev, fmode_t mode) struct scsi_cd *cd; int ret = -ENXIO; + check_disk_change(bdev); + mutex_lock(&sr_mutex); cd = scsi_cd_get(bdev->bd_disk); if (cd) { diff --git a/drivers/staging/rts5208/rtsx_chip.h b/drivers/staging/rts5208/rtsx_chip.h index 4f6e3c1c4621..8a8cd5d3cf7e 100644 --- a/drivers/staging/rts5208/rtsx_chip.h +++ b/drivers/staging/rts5208/rtsx_chip.h @@ -339,13 +339,13 @@ struct sense_data_t { #define CHK_BIT(data, idx) ((data) & (1 << (idx))) /* SG descriptor */ -#define SG_INT 0x04 -#define SG_END 0x02 -#define SG_VALID 0x01 +#define RTSX_SG_INT 0x04 +#define RTSX_SG_END 0x02 +#define RTSX_SG_VALID 0x01 -#define SG_NO_OP 0x00 -#define SG_TRANS_DATA (0x02 << 4) -#define SG_LINK_DESC (0x03 << 4) +#define RTSX_SG_NO_OP 0x00 +#define RTSX_SG_TRANS_DATA (0x02 << 4) +#define RTSX_SG_LINK_DESC (0x03 << 4) struct rtsx_chip; diff --git a/drivers/staging/rts5208/rtsx_transport.c b/drivers/staging/rts5208/rtsx_transport.c index 8b57e17ee6d3..716cce2bd7f0 100644 --- a/drivers/staging/rts5208/rtsx_transport.c +++ b/drivers/staging/rts5208/rtsx_transport.c @@ -308,7 +308,7 @@ static inline void rtsx_add_sg_tbl( do { if (len > 0x80000) { temp_len = 0x80000; - temp_opt = option & (~SG_END); + temp_opt = option & (~RTSX_SG_END); } else { temp_len = len; temp_opt = option; @@ -407,9 +407,9 @@ static int rtsx_transfer_sglist_adma_partial(struct rtsx_chip *chip, u8 card, *index = *index + 1; } if ((i == (sg_cnt - 1)) || !resid) - option = SG_VALID | SG_END | SG_TRANS_DATA; + option = RTSX_SG_VALID | RTSX_SG_END | RTSX_SG_TRANS_DATA; else - option = SG_VALID | SG_TRANS_DATA; + option = RTSX_SG_VALID | RTSX_SG_TRANS_DATA; rtsx_add_sg_tbl(chip, (u32)addr, (u32)len, option); @@ -555,9 +555,9 @@ static int rtsx_transfer_sglist_adma(struct rtsx_chip *chip, u8 card, (unsigned int)addr, len); if (j == (sg_cnt - 1)) - option = SG_VALID | SG_END | SG_TRANS_DATA; + option = RTSX_SG_VALID | RTSX_SG_END | RTSX_SG_TRANS_DATA; else - option = SG_VALID | SG_TRANS_DATA; + option = RTSX_SG_VALID | RTSX_SG_TRANS_DATA; rtsx_add_sg_tbl(chip, (u32)addr, (u32)len, option); diff --git a/drivers/target/iscsi/iscsi_target.c b/drivers/target/iscsi/iscsi_target.c index 9eb10d34682c..8e223799347a 100644 --- a/drivers/target/iscsi/iscsi_target.c +++ b/drivers/target/iscsi/iscsi_target.c @@ -26,6 +26,7 @@ #include <linux/delay.h> #include <linux/sched/signal.h> #include <asm/unaligned.h> +#include <linux/inet.h> #include <net/ipv6.h> #include <scsi/scsi_proto.h> #include <scsi/iscsi_proto.h> @@ -3291,30 +3292,6 @@ iscsit_send_task_mgt_rsp(struct iscsi_cmd *cmd, struct iscsi_conn *conn) return conn->conn_transport->iscsit_xmit_pdu(conn, cmd, NULL, NULL, 0); } -static bool iscsit_check_inaddr_any(struct iscsi_np *np) -{ - bool ret = false; - - if (np->np_sockaddr.ss_family == AF_INET6) { - const struct sockaddr_in6 sin6 = { - .sin6_addr = IN6ADDR_ANY_INIT }; - struct sockaddr_in6 *sock_in6 = - (struct sockaddr_in6 *)&np->np_sockaddr; - - if (!memcmp(sock_in6->sin6_addr.s6_addr, - sin6.sin6_addr.s6_addr, 16)) - ret = true; - } else { - struct sockaddr_in * sock_in = - (struct sockaddr_in *)&np->np_sockaddr; - - if (sock_in->sin_addr.s_addr == htonl(INADDR_ANY)) - ret = true; - } - - return ret; -} - #define SENDTARGETS_BUF_LIMIT 32768U static int @@ -3393,7 +3370,6 @@ iscsit_build_sendtargets_response(struct iscsi_cmd *cmd, list_for_each_entry(tpg_np, &tpg->tpg_gnp_list, tpg_np_list) { struct iscsi_np *np = tpg_np->tpg_np; - bool inaddr_any = iscsit_check_inaddr_any(np); struct sockaddr_storage *sockaddr; if (np->np_network_transport != network_transport) @@ -3422,7 +3398,7 @@ iscsit_build_sendtargets_response(struct iscsi_cmd *cmd, } } - if (inaddr_any) + if (inet_addr_is_any((struct sockaddr *)&np->np_sockaddr)) sockaddr = &conn->local_sockaddr; else sockaddr = &np->np_sockaddr; diff --git a/drivers/target/loopback/tcm_loop.c b/drivers/target/loopback/tcm_loop.c index 9cd4ffe76c07..60d5b918c4ac 100644 --- a/drivers/target/loopback/tcm_loop.c +++ b/drivers/target/loopback/tcm_loop.c @@ -309,7 +309,7 @@ static int tcm_loop_target_reset(struct scsi_cmnd *sc) static int tcm_loop_slave_alloc(struct scsi_device *sd) { - set_bit(QUEUE_FLAG_BIDI, &sd->request_queue->queue_flags); + blk_queue_flag_set(QUEUE_FLAG_BIDI, sd->request_queue); return 0; } |