From 63f6e6fd05624a1c915ae95dfb81264f659914d8 Mon Sep 17 00:00:00 2001 From: Mike Snitzer Date: Tue, 5 Dec 2017 14:10:33 -0500 Subject: dm mpath: remove unused param from multipath_init_per_bio_data() 'struct dm_bio_details *' isn't ever needed. Signed-off-by: Mike Snitzer --- drivers/md/dm-mpath.c | 8 ++------ 1 file changed, 2 insertions(+), 6 deletions(-) (limited to 'drivers/md/dm-mpath.c') diff --git a/drivers/md/dm-mpath.c b/drivers/md/dm-mpath.c index f7810cc869ac..8ed72275026d 100644 --- a/drivers/md/dm-mpath.c +++ b/drivers/md/dm-mpath.c @@ -273,8 +273,7 @@ static struct dm_bio_details *get_bio_details_from_bio(struct bio *bio) return bio_details; } -static void multipath_init_per_bio_data(struct bio *bio, struct dm_mpath_io **mpio_p, - struct dm_bio_details **bio_details_p) +static void multipath_init_per_bio_data(struct bio *bio, struct dm_mpath_io **mpio_p) { struct dm_mpath_io *mpio = get_mpio_from_bio(bio); struct dm_bio_details *bio_details = get_bio_details_from_bio(bio); @@ -285,8 +284,6 @@ static void multipath_init_per_bio_data(struct bio *bio, struct dm_mpath_io **mp if (mpio_p) *mpio_p = mpio; - if (bio_details_p) - *bio_details_p = bio_details; } /*----------------------------------------------- @@ -610,8 +607,7 @@ static int multipath_map_bio(struct dm_target *ti, struct bio *bio) struct multipath *m = ti->private; struct dm_mpath_io *mpio = NULL; - multipath_init_per_bio_data(bio, &mpio, NULL); - + multipath_init_per_bio_data(bio, &mpio); return __multipath_map_bio(m, bio, mpio); } -- cgit v1.2.3 From d0442f8039ee54716dd3f3100cfd8e11d9a2486c Mon Sep 17 00:00:00 2001 From: Mike Snitzer Date: Mon, 11 Dec 2017 15:58:41 -0500 Subject: dm mpath: remove unnecessary memset() calls for per-io-data All underlying members are initialized directly so the memset() calls are not needed. Also, initialize mpio->nr_bytes from the start since it never changes. Signed-off-by: Mike Snitzer --- drivers/md/dm-mpath.c | 16 ++++++---------- 1 file changed, 6 insertions(+), 10 deletions(-) (limited to 'drivers/md/dm-mpath.c') diff --git a/drivers/md/dm-mpath.c b/drivers/md/dm-mpath.c index 8ed72275026d..a3acbfb638be 100644 --- a/drivers/md/dm-mpath.c +++ b/drivers/md/dm-mpath.c @@ -278,12 +278,11 @@ static void multipath_init_per_bio_data(struct bio *bio, struct dm_mpath_io **mp struct dm_mpath_io *mpio = get_mpio_from_bio(bio); struct dm_bio_details *bio_details = get_bio_details_from_bio(bio); - memset(mpio, 0, sizeof(*mpio)); - memset(bio_details, 0, sizeof(*bio_details)); - dm_bio_record(bio_details, bio); + mpio->nr_bytes = bio->bi_iter.bi_size; + mpio->pgpath = NULL; + *mpio_p = mpio; - if (mpio_p) - *mpio_p = mpio; + dm_bio_record(bio_details, bio); } /*----------------------------------------------- @@ -518,7 +517,6 @@ static int multipath_clone_and_map(struct dm_target *ti, struct request *rq, return DM_MAPIO_REQUEUE; } - memset(mpio, 0, sizeof(*mpio)); mpio->pgpath = pgpath; mpio->nr_bytes = nr_bytes; @@ -556,7 +554,6 @@ static void multipath_release_clone(struct request *clone) */ static int __multipath_map_bio(struct multipath *m, struct bio *bio, struct dm_mpath_io *mpio) { - size_t nr_bytes = bio->bi_iter.bi_size; struct pgpath *pgpath; unsigned long flags; bool queue_io; @@ -565,7 +562,7 @@ static int __multipath_map_bio(struct multipath *m, struct bio *bio, struct dm_m pgpath = READ_ONCE(m->current_pgpath); queue_io = test_bit(MPATHF_QUEUE_IO, &m->flags); if (!pgpath || !queue_io) - pgpath = choose_pgpath(m, nr_bytes); + pgpath = choose_pgpath(m, mpio->nr_bytes); if ((pgpath && queue_io) || (!pgpath && test_bit(MPATHF_QUEUE_IF_NO_PATH, &m->flags))) { @@ -589,7 +586,6 @@ static int __multipath_map_bio(struct multipath *m, struct bio *bio, struct dm_m } mpio->pgpath = pgpath; - mpio->nr_bytes = nr_bytes; bio->bi_status = 0; bio_set_dev(bio, pgpath->path.dev->bdev); @@ -598,7 +594,7 @@ static int __multipath_map_bio(struct multipath *m, struct bio *bio, struct dm_m if (pgpath->pg->ps.type->start_io) pgpath->pg->ps.type->start_io(&pgpath->pg->ps, &pgpath->path, - nr_bytes); + mpio->nr_bytes); return DM_MAPIO_REMAPPED; } -- cgit v1.2.3 From d07a241d4f9aaab61b4a2ce7ec8053ad429c4232 Mon Sep 17 00:00:00 2001 From: Mike Snitzer Date: Mon, 11 Dec 2017 16:08:54 -0500 Subject: dm mpath: optimize retrieval of bio_details from per-bio-data Signed-off-by: Mike Snitzer --- drivers/md/dm-mpath.c | 8 +++----- 1 file changed, 3 insertions(+), 5 deletions(-) (limited to 'drivers/md/dm-mpath.c') diff --git a/drivers/md/dm-mpath.c b/drivers/md/dm-mpath.c index a3acbfb638be..40d721df11d3 100644 --- a/drivers/md/dm-mpath.c +++ b/drivers/md/dm-mpath.c @@ -264,19 +264,17 @@ static struct dm_mpath_io *get_mpio_from_bio(struct bio *bio) return dm_per_bio_data(bio, multipath_per_bio_data_size()); } -static struct dm_bio_details *get_bio_details_from_bio(struct bio *bio) +static struct dm_bio_details *get_bio_details_from_mpio(struct dm_mpath_io *mpio) { /* dm_bio_details is immediately after the dm_mpath_io in bio's per-bio-data */ - struct dm_mpath_io *mpio = get_mpio_from_bio(bio); void *bio_details = mpio + 1; - return bio_details; } static void multipath_init_per_bio_data(struct bio *bio, struct dm_mpath_io **mpio_p) { struct dm_mpath_io *mpio = get_mpio_from_bio(bio); - struct dm_bio_details *bio_details = get_bio_details_from_bio(bio); + struct dm_bio_details *bio_details = get_bio_details_from_mpio(mpio); mpio->nr_bytes = bio->bi_iter.bi_size; mpio->pgpath = NULL; @@ -1554,7 +1552,7 @@ static int multipath_end_io_bio(struct dm_target *ti, struct bio *clone, } /* Queue for the daemon to resubmit */ - dm_bio_restore(get_bio_details_from_bio(clone), clone); + dm_bio_restore(get_bio_details_from_mpio(mpio), clone); spin_lock_irqsave(&m->lock, flags); bio_list_add(&m->queued_bios, clone); -- cgit v1.2.3 From 1836df0891423dcf2b68771e04b28e2208ee95f2 Mon Sep 17 00:00:00 2001 From: Mike Snitzer Date: Wed, 6 Dec 2017 16:08:14 -0500 Subject: dm mpath: move dm_bio_restore out of endio method Moving the dm_bio_restore() to process_queued_bios() avoids doing that work in multipath_end_io_bio(). Signed-off-by: Mike Snitzer --- drivers/md/dm-mpath.c | 7 +++---- 1 file changed, 3 insertions(+), 4 deletions(-) (limited to 'drivers/md/dm-mpath.c') diff --git a/drivers/md/dm-mpath.c b/drivers/md/dm-mpath.c index 40d721df11d3..45e9044d3bb5 100644 --- a/drivers/md/dm-mpath.c +++ b/drivers/md/dm-mpath.c @@ -639,7 +639,9 @@ static void process_queued_bios(struct work_struct *work) blk_start_plug(&plug); while ((bio = bio_list_pop(&bios))) { - r = __multipath_map_bio(m, bio, get_mpio_from_bio(bio)); + struct dm_mpath_io *mpio = get_mpio_from_bio(bio); + dm_bio_restore(get_bio_details_from_mpio(mpio), bio); + r = __multipath_map_bio(m, bio, mpio); switch (r) { case DM_MAPIO_KILL: bio->bi_status = BLK_STS_IOERR; @@ -1551,9 +1553,6 @@ static int multipath_end_io_bio(struct dm_target *ti, struct bio *clone, goto done; } - /* Queue for the daemon to resubmit */ - dm_bio_restore(get_bio_details_from_mpio(mpio), clone); - spin_lock_irqsave(&m->lock, flags); bio_list_add(&m->queued_bios, clone); spin_unlock_irqrestore(&m->lock, flags); -- cgit v1.2.3 From cd025384455715525a296e54999349e540850301 Mon Sep 17 00:00:00 2001 From: Mike Snitzer Date: Tue, 5 Dec 2017 16:02:21 -0500 Subject: dm mpath: implement NVMe bio-based support This DM multipath NVMe bio-based support requires CONFIG_NVME_MULTIPATH to not be set. In the future hopefully NVMe multipath and DM multipath can co-exist more seemlessly. But as is, if CONFIG_NVME_MULTIPATH=Y then all the individal NVMe paths will remain hidden to upper layers and as such DM multipath will not be able to manage them. Though NVMe's native multipathing doesn't multipath namespaces across subsystems; so technically a user _could_ use CONFIG_NVME_MULTIPATH=Y and also use DM multipath to multipath across subsystems. Signed-off-by: Mike Snitzer --- drivers/md/dm-mpath.c | 30 +++++++++++++++++++++--------- 1 file changed, 21 insertions(+), 9 deletions(-) (limited to 'drivers/md/dm-mpath.c') diff --git a/drivers/md/dm-mpath.c b/drivers/md/dm-mpath.c index 45e9044d3bb5..d3813b1e74e2 100644 --- a/drivers/md/dm-mpath.c +++ b/drivers/md/dm-mpath.c @@ -221,13 +221,18 @@ static int alloc_multipath_stage2(struct dm_target *ti, struct multipath *m) m->queue_mode = DM_TYPE_MQ_REQUEST_BASED; else m->queue_mode = DM_TYPE_REQUEST_BASED; - } else if (m->queue_mode == DM_TYPE_BIO_BASED) { + + } else if (m->queue_mode == DM_TYPE_BIO_BASED || + m->queue_mode == DM_TYPE_NVME_BIO_BASED) { INIT_WORK(&m->process_queued_bios, process_queued_bios); - /* - * bio-based doesn't support any direct scsi_dh management; - * it just discovers if a scsi_dh is attached. - */ - set_bit(MPATHF_RETAIN_ATTACHED_HW_HANDLER, &m->flags); + + if (m->queue_mode == DM_TYPE_BIO_BASED) { + /* + * bio-based doesn't support any direct scsi_dh management; + * it just discovers if a scsi_dh is attached. + */ + set_bit(MPATHF_RETAIN_ATTACHED_HW_HANDLER, &m->flags); + } } dm_table_set_type(ti->table, m->queue_mode); @@ -609,7 +614,8 @@ static void process_queued_io_list(struct multipath *m) { if (m->queue_mode == DM_TYPE_MQ_REQUEST_BASED) dm_mq_kick_requeue_list(dm_table_get_md(m->ti->table)); - else if (m->queue_mode == DM_TYPE_BIO_BASED) + else if (m->queue_mode == DM_TYPE_BIO_BASED || + m->queue_mode == DM_TYPE_NVME_BIO_BASED) queue_work(kmultipathd, &m->process_queued_bios); } @@ -925,7 +931,8 @@ static int parse_hw_handler(struct dm_arg_set *as, struct multipath *m) if (!hw_argc) return 0; - if (m->queue_mode == DM_TYPE_BIO_BASED) { + if (m->queue_mode == DM_TYPE_BIO_BASED || + m->queue_mode == DM_TYPE_NVME_BIO_BASED) { dm_consume_args(as, hw_argc); DMERR("bio-based multipath doesn't allow hardware handler args"); return 0; @@ -1014,6 +1021,8 @@ static int parse_features(struct dm_arg_set *as, struct multipath *m) if (!strcasecmp(queue_mode_name, "bio")) m->queue_mode = DM_TYPE_BIO_BASED; + else if (!strcasecmp(queue_mode_name, "nvme")) + m->queue_mode = DM_TYPE_NVME_BIO_BASED; else if (!strcasecmp(queue_mode_name, "rq")) m->queue_mode = DM_TYPE_REQUEST_BASED; else if (!strcasecmp(queue_mode_name, "mq")) @@ -1114,7 +1123,7 @@ static int multipath_ctr(struct dm_target *ti, unsigned argc, char **argv) ti->num_discard_bios = 1; ti->num_write_same_bios = 1; ti->num_write_zeroes_bios = 1; - if (m->queue_mode == DM_TYPE_BIO_BASED) + if (m->queue_mode == DM_TYPE_BIO_BASED || m->queue_mode == DM_TYPE_NVME_BIO_BASED) ti->per_io_data_size = multipath_per_bio_data_size(); else ti->per_io_data_size = sizeof(struct dm_mpath_io); @@ -1660,6 +1669,9 @@ static void multipath_status(struct dm_target *ti, status_type_t type, case DM_TYPE_BIO_BASED: DMEMIT("queue_mode bio "); break; + case DM_TYPE_NVME_BIO_BASED: + DMEMIT("queue_mode nvme "); + break; case DM_TYPE_MQ_REQUEST_BASED: DMEMIT("queue_mode mq "); break; -- cgit v1.2.3 From 848b8aefd44df99b3e38a872acb8d54d3530bebf Mon Sep 17 00:00:00 2001 From: Mike Snitzer Date: Sun, 10 Dec 2017 15:37:21 -0500 Subject: dm mpath: optimize NVMe bio-based support All code that deals with pg_init is not used with bio-based NVMe mode. This includes skipping initialization of pg_init related variables. Also, pg_init related members on 'struct multipath' have been grouped together. Signed-off-by: Mike Snitzer --- drivers/md/dm-mpath.c | 171 ++++++++++++++++++++++++++++---------------------- 1 file changed, 95 insertions(+), 76 deletions(-) (limited to 'drivers/md/dm-mpath.c') diff --git a/drivers/md/dm-mpath.c b/drivers/md/dm-mpath.c index d3813b1e74e2..fa5ee78c69c9 100644 --- a/drivers/md/dm-mpath.c +++ b/drivers/md/dm-mpath.c @@ -64,36 +64,30 @@ struct priority_group { /* Multipath context */ struct multipath { - struct list_head list; - struct dm_target *ti; - - const char *hw_handler_name; - char *hw_handler_params; + unsigned long flags; /* Multipath state flags */ spinlock_t lock; - - unsigned nr_priority_groups; - struct list_head priority_groups; - - wait_queue_head_t pg_init_wait; /* Wait for pg_init completion */ + enum dm_queue_mode queue_mode; struct pgpath *current_pgpath; struct priority_group *current_pg; struct priority_group *next_pg; /* Switch to this PG if set */ - unsigned long flags; /* Multipath state flags */ + atomic_t nr_valid_paths; /* Total number of usable paths */ + unsigned nr_priority_groups; + struct list_head priority_groups; + const char *hw_handler_name; + char *hw_handler_params; + wait_queue_head_t pg_init_wait; /* Wait for pg_init completion */ unsigned pg_init_retries; /* Number of times to retry pg_init */ unsigned pg_init_delay_msecs; /* Number of msecs before pg_init retry */ - - atomic_t nr_valid_paths; /* Total number of usable paths */ atomic_t pg_init_in_progress; /* Only one pg_init allowed at once */ atomic_t pg_init_count; /* Number of times pg_init called */ - enum dm_queue_mode queue_mode; - struct mutex work_mutex; struct work_struct trigger_event; + struct dm_target *ti; struct work_struct process_queued_bios; struct bio_list queued_bios; @@ -135,10 +129,10 @@ static struct pgpath *alloc_pgpath(void) { struct pgpath *pgpath = kzalloc(sizeof(*pgpath), GFP_KERNEL); - if (pgpath) { - pgpath->is_active = true; - INIT_DELAYED_WORK(&pgpath->activate_path, activate_path_work); - } + if (!pgpath) + return NULL; + + pgpath->is_active = true; return pgpath; } @@ -193,13 +187,8 @@ static struct multipath *alloc_multipath(struct dm_target *ti) if (m) { INIT_LIST_HEAD(&m->priority_groups); spin_lock_init(&m->lock); - set_bit(MPATHF_QUEUE_IO, &m->flags); atomic_set(&m->nr_valid_paths, 0); - atomic_set(&m->pg_init_in_progress, 0); - atomic_set(&m->pg_init_count, 0); - m->pg_init_delay_msecs = DM_PG_INIT_DELAY_DEFAULT; INIT_WORK(&m->trigger_event, trigger_event); - init_waitqueue_head(&m->pg_init_wait); mutex_init(&m->work_mutex); m->queue_mode = DM_TYPE_NONE; @@ -235,6 +224,14 @@ static int alloc_multipath_stage2(struct dm_target *ti, struct multipath *m) } } + if (m->queue_mode != DM_TYPE_NVME_BIO_BASED) { + set_bit(MPATHF_QUEUE_IO, &m->flags); + atomic_set(&m->pg_init_in_progress, 0); + atomic_set(&m->pg_init_count, 0); + m->pg_init_delay_msecs = DM_PG_INIT_DELAY_DEFAULT; + init_waitqueue_head(&m->pg_init_wait); + } + dm_table_set_type(ti->table, m->queue_mode); return 0; @@ -339,6 +336,9 @@ static void __switch_pg(struct multipath *m, struct priority_group *pg) { m->current_pg = pg; + if (m->queue_mode == DM_TYPE_NVME_BIO_BASED) + return; + /* Must we initialise the PG first, and queue I/O till it's ready? */ if (m->hw_handler_name) { set_bit(MPATHF_PG_INIT_REQUIRED, &m->flags); @@ -384,7 +384,8 @@ static struct pgpath *choose_pgpath(struct multipath *m, size_t nr_bytes) unsigned bypassed = 1; if (!atomic_read(&m->nr_valid_paths)) { - clear_bit(MPATHF_QUEUE_IO, &m->flags); + if (m->queue_mode != DM_TYPE_NVME_BIO_BASED) + clear_bit(MPATHF_QUEUE_IO, &m->flags); goto failed; } @@ -528,8 +529,7 @@ static int multipath_clone_and_map(struct dm_target *ti, struct request *rq, clone = blk_get_request(q, rq->cmd_flags | REQ_NOMERGE, GFP_ATOMIC); if (IS_ERR(clone)) { /* EBUSY, ENODEV or EWOULDBLOCK: requeue */ - bool queue_dying = blk_queue_dying(q); - if (queue_dying) { + if (blk_queue_dying(q)) { atomic_inc(&m->pg_init_in_progress); activate_or_offline_path(pgpath); } @@ -563,21 +563,28 @@ static int __multipath_map_bio(struct multipath *m, struct bio *bio, struct dm_m /* Do we need to select a new pgpath? */ pgpath = READ_ONCE(m->current_pgpath); + /* MPATHF_QUEUE_IO will never be set for NVMe */ queue_io = test_bit(MPATHF_QUEUE_IO, &m->flags); if (!pgpath || !queue_io) pgpath = choose_pgpath(m, mpio->nr_bytes); - if ((pgpath && queue_io) || - (!pgpath && test_bit(MPATHF_QUEUE_IF_NO_PATH, &m->flags))) { + if ((!pgpath && test_bit(MPATHF_QUEUE_IF_NO_PATH, &m->flags)) || + (pgpath && queue_io)) { /* Queue for the daemon to resubmit */ spin_lock_irqsave(&m->lock, flags); bio_list_add(&m->queued_bios, bio); spin_unlock_irqrestore(&m->lock, flags); - /* PG_INIT_REQUIRED cannot be set without QUEUE_IO */ - if (queue_io || test_bit(MPATHF_PG_INIT_REQUIRED, &m->flags)) - pg_init_all_paths(m); - else if (!queue_io) + + if (m->queue_mode == DM_TYPE_NVME_BIO_BASED) { queue_work(kmultipathd, &m->process_queued_bios); + } else { + /* PG_INIT_REQUIRED cannot be set without QUEUE_IO */ + if (queue_io || test_bit(MPATHF_PG_INIT_REQUIRED, &m->flags)) + pg_init_all_paths(m); + else if (!queue_io) + queue_work(kmultipathd, &m->process_queued_bios); + } + return DM_MAPIO_SUBMITTED; } @@ -750,34 +757,11 @@ static int parse_path_selector(struct dm_arg_set *as, struct priority_group *pg, return 0; } -static struct pgpath *parse_path(struct dm_arg_set *as, struct path_selector *ps, - struct dm_target *ti) +static int setup_scsi_dh(struct block_device *bdev, struct multipath *m, char **error) { - int r; - struct pgpath *p; - struct multipath *m = ti->private; - struct request_queue *q = NULL; + struct request_queue *q = bdev_get_queue(bdev); const char *attached_handler_name; - - /* we need at least a path arg */ - if (as->argc < 1) { - ti->error = "no device given"; - return ERR_PTR(-EINVAL); - } - - p = alloc_pgpath(); - if (!p) - return ERR_PTR(-ENOMEM); - - r = dm_get_device(ti, dm_shift_arg(as), dm_table_get_mode(ti->table), - &p->path.dev); - if (r) { - ti->error = "error getting device"; - goto bad; - } - - if (test_bit(MPATHF_RETAIN_ATTACHED_HW_HANDLER, &m->flags) || m->hw_handler_name) - q = bdev_get_queue(p->path.dev->bdev); + int r; if (test_bit(MPATHF_RETAIN_ATTACHED_HW_HANDLER, &m->flags)) { retain: @@ -809,26 +793,59 @@ retain: char b[BDEVNAME_SIZE]; printk(KERN_INFO "dm-mpath: retaining handler on device %s\n", - bdevname(p->path.dev->bdev, b)); + bdevname(bdev, b)); goto retain; } if (r < 0) { - ti->error = "error attaching hardware handler"; - dm_put_device(ti, p->path.dev); - goto bad; + *error = "error attaching hardware handler"; + return r; } if (m->hw_handler_params) { r = scsi_dh_set_params(q, m->hw_handler_params); if (r < 0) { - ti->error = "unable to set hardware " - "handler parameters"; - dm_put_device(ti, p->path.dev); - goto bad; + *error = "unable to set hardware handler parameters"; + return r; } } } + return 0; +} + +static struct pgpath *parse_path(struct dm_arg_set *as, struct path_selector *ps, + struct dm_target *ti) +{ + int r; + struct pgpath *p; + struct multipath *m = ti->private; + + /* we need at least a path arg */ + if (as->argc < 1) { + ti->error = "no device given"; + return ERR_PTR(-EINVAL); + } + + p = alloc_pgpath(); + if (!p) + return ERR_PTR(-ENOMEM); + + r = dm_get_device(ti, dm_shift_arg(as), dm_table_get_mode(ti->table), + &p->path.dev); + if (r) { + ti->error = "error getting device"; + goto bad; + } + + if (m->queue_mode != DM_TYPE_NVME_BIO_BASED) { + INIT_DELAYED_WORK(&p->activate_path, activate_path_work); + r = setup_scsi_dh(p->path.dev->bdev, m, &ti->error); + if (r) { + dm_put_device(ti, p->path.dev); + goto bad; + } + } + r = ps->type->add_path(ps, &p->path, as->argc, as->argv, &ti->error); if (r) { dm_put_device(ti, p->path.dev); @@ -836,7 +853,6 @@ retain: } return p; - bad: free_pgpath(p); return ERR_PTR(r); @@ -1152,16 +1168,19 @@ static void multipath_wait_for_pg_init_completion(struct multipath *m) static void flush_multipath_work(struct multipath *m) { - set_bit(MPATHF_PG_INIT_DISABLED, &m->flags); - smp_mb__after_atomic(); + if (m->hw_handler_name) { + set_bit(MPATHF_PG_INIT_DISABLED, &m->flags); + smp_mb__after_atomic(); + + flush_workqueue(kmpath_handlerd); + multipath_wait_for_pg_init_completion(m); + + clear_bit(MPATHF_PG_INIT_DISABLED, &m->flags); + smp_mb__after_atomic(); + } - flush_workqueue(kmpath_handlerd); - multipath_wait_for_pg_init_completion(m); flush_workqueue(kmultipathd); flush_work(&m->trigger_event); - - clear_bit(MPATHF_PG_INIT_DISABLED, &m->flags); - smp_mb__after_atomic(); } static void multipath_dtr(struct dm_target *ti) @@ -1537,7 +1556,7 @@ static int multipath_end_io(struct dm_target *ti, struct request *clone, } static int multipath_end_io_bio(struct dm_target *ti, struct bio *clone, - blk_status_t *error) + blk_status_t *error) { struct multipath *m = ti->private; struct dm_mpath_io *mpio = get_mpio_from_bio(clone); -- cgit v1.2.3 From 0001ec565db7ed01a6cc9575453900801bd0841b Mon Sep 17 00:00:00 2001 From: Mike Snitzer Date: Mon, 11 Dec 2017 11:02:29 -0500 Subject: dm mpath: factor out SCSI vs NVMe path selection Trying to do both SCSI and NVMe bio-based handling with branching in the same common code has proven too tedious on a code maintenance level. In addition it slightly hurts IO performance. Fix this by factoring out __map_bio() and __map_bio_nvme(). Signed-off-by: Mike Snitzer --- drivers/md/dm-mpath.c | 68 +++++++++++++++++++++++++++++++++++++++++---------- 1 file changed, 55 insertions(+), 13 deletions(-) (limited to 'drivers/md/dm-mpath.c') diff --git a/drivers/md/dm-mpath.c b/drivers/md/dm-mpath.c index fa5ee78c69c9..6d1a9906c582 100644 --- a/drivers/md/dm-mpath.c +++ b/drivers/md/dm-mpath.c @@ -555,7 +555,8 @@ static void multipath_release_clone(struct request *clone) /* * Map cloned bios (bio-based multipath) */ -static int __multipath_map_bio(struct multipath *m, struct bio *bio, struct dm_mpath_io *mpio) + +static struct pgpath *__map_bio(struct multipath *m, struct bio *bio) { struct pgpath *pgpath; unsigned long flags; @@ -563,30 +564,71 @@ static int __multipath_map_bio(struct multipath *m, struct bio *bio, struct dm_m /* Do we need to select a new pgpath? */ pgpath = READ_ONCE(m->current_pgpath); - /* MPATHF_QUEUE_IO will never be set for NVMe */ queue_io = test_bit(MPATHF_QUEUE_IO, &m->flags); if (!pgpath || !queue_io) - pgpath = choose_pgpath(m, mpio->nr_bytes); + pgpath = choose_pgpath(m, bio->bi_iter.bi_size); - if ((!pgpath && test_bit(MPATHF_QUEUE_IF_NO_PATH, &m->flags)) || - (pgpath && queue_io)) { + if ((pgpath && queue_io) || + (!pgpath && test_bit(MPATHF_QUEUE_IF_NO_PATH, &m->flags))) { /* Queue for the daemon to resubmit */ spin_lock_irqsave(&m->lock, flags); bio_list_add(&m->queued_bios, bio); spin_unlock_irqrestore(&m->lock, flags); - if (m->queue_mode == DM_TYPE_NVME_BIO_BASED) { + /* PG_INIT_REQUIRED cannot be set without QUEUE_IO */ + if (queue_io || test_bit(MPATHF_PG_INIT_REQUIRED, &m->flags)) + pg_init_all_paths(m); + else if (!queue_io) queue_work(kmultipathd, &m->process_queued_bios); - } else { - /* PG_INIT_REQUIRED cannot be set without QUEUE_IO */ - if (queue_io || test_bit(MPATHF_PG_INIT_REQUIRED, &m->flags)) - pg_init_all_paths(m); - else if (!queue_io) - queue_work(kmultipathd, &m->process_queued_bios); + + return ERR_PTR(-EAGAIN); + } + + return pgpath; +} + +static struct pgpath *__map_bio_nvme(struct multipath *m, struct bio *bio) +{ + struct pgpath *pgpath; + unsigned long flags; + + /* Do we need to select a new pgpath? */ + /* + * FIXME: currently only switching path if no path (due to failure, etc) + * - which negates the point of using a path selector + */ + pgpath = READ_ONCE(m->current_pgpath); + if (!pgpath) + pgpath = choose_pgpath(m, bio->bi_iter.bi_size); + + if (!pgpath) { + if (test_bit(MPATHF_QUEUE_IF_NO_PATH, &m->flags)) { + /* Queue for the daemon to resubmit */ + spin_lock_irqsave(&m->lock, flags); + bio_list_add(&m->queued_bios, bio); + spin_unlock_irqrestore(&m->lock, flags); + queue_work(kmultipathd, &m->process_queued_bios); + + return ERR_PTR(-EAGAIN); } + return NULL; + } + return pgpath; +} + +static int __multipath_map_bio(struct multipath *m, struct bio *bio, + struct dm_mpath_io *mpio) +{ + struct pgpath *pgpath; + + if (m->queue_mode == DM_TYPE_NVME_BIO_BASED) + pgpath = __map_bio_nvme(m, bio); + else + pgpath = __map_bio(m, bio); + + if (IS_ERR(pgpath)) return DM_MAPIO_SUBMITTED; - } if (!pgpath) { if (must_push_back_bio(m)) -- cgit v1.2.3 From d5ffebdd797a7c1c89576267640f671db2a668fc Mon Sep 17 00:00:00 2001 From: Mike Snitzer Date: Fri, 5 Jan 2018 21:17:20 -0500 Subject: dm: backfill missing calls to mutex_destroy() Signed-off-by: Mike Snitzer --- drivers/md/dm-mpath.c | 1 + 1 file changed, 1 insertion(+) (limited to 'drivers/md/dm-mpath.c') diff --git a/drivers/md/dm-mpath.c b/drivers/md/dm-mpath.c index 6d1a9906c582..be581765edd1 100644 --- a/drivers/md/dm-mpath.c +++ b/drivers/md/dm-mpath.c @@ -248,6 +248,7 @@ static void free_multipath(struct multipath *m) kfree(m->hw_handler_name); kfree(m->hw_handler_params); + mutex_destroy(&m->work_mutex); kfree(m); } -- cgit v1.2.3 From 050af08ffb1b62af69196d61c22a0755f9a3cdbd Mon Sep 17 00:00:00 2001 From: Ming Lei Date: Thu, 11 Jan 2018 14:01:56 +0800 Subject: dm mpath: return DM_MAPIO_REQUEUE on blk-mq rq allocation failure blk-mq will rerun queue via RESTART or dispatch wake after one request is completed, so not necessary to wait random time for requeuing, we should trust blk-mq to do it. More importantly, we need to return BLK_STS_RESOURCE to blk-mq so that dequeuing from the I/O scheduler can be stopped, this results in improved I/O merging. Signed-off-by: Ming Lei Signed-off-by: Mike Snitzer --- drivers/md/dm-mpath.c | 14 +++++++++++++- 1 file changed, 13 insertions(+), 1 deletion(-) (limited to 'drivers/md/dm-mpath.c') diff --git a/drivers/md/dm-mpath.c b/drivers/md/dm-mpath.c index be581765edd1..e0187798ccae 100644 --- a/drivers/md/dm-mpath.c +++ b/drivers/md/dm-mpath.c @@ -533,8 +533,20 @@ static int multipath_clone_and_map(struct dm_target *ti, struct request *rq, if (blk_queue_dying(q)) { atomic_inc(&m->pg_init_in_progress); activate_or_offline_path(pgpath); + return DM_MAPIO_DELAY_REQUEUE; } - return DM_MAPIO_DELAY_REQUEUE; + + /* + * blk-mq's SCHED_RESTART can cover this requeue, so we + * needn't deal with it by DELAY_REQUEUE. More importantly, + * we have to return DM_MAPIO_REQUEUE so that blk-mq can + * get the queue busy feedback (via BLK_STS_RESOURCE), + * otherwise I/O merging can suffer. + */ + if (q->mq_ops) + return DM_MAPIO_REQUEUE; + else + return DM_MAPIO_DELAY_REQUEUE; } clone->bio = clone->biotail = NULL; clone->rq_disk = bdev->bd_disk; -- cgit v1.2.3 From 459b54019cfeb7330ed4863ad40f78489e0ff23d Mon Sep 17 00:00:00 2001 From: Ming Lei Date: Thu, 11 Jan 2018 14:01:55 +0800 Subject: dm mpath: return DM_MAPIO_DELAY_REQUEUE if QUEUE_IO or PG_INIT_REQUIRED Avoid using DM_MAPIO_REQUEUE unless absolutely necessary because it results in dm-rq.c:dm_mq_queue_rq() returning BLK_STS_RESOURCE to blk-mq -- doing so should only ever be done if the underlying queue is out of resources. So switch to returning DM_MAPIO_DELAY_REQUEUE from multipath_clone_and_map() if either MPATHF_QUEUE_IO or MPATHF_PG_INIT_REQUIRED are set. Signed-off-by: Ming Lei Signed-off-by: Mike Snitzer --- drivers/md/dm-mpath.c | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) (limited to 'drivers/md/dm-mpath.c') diff --git a/drivers/md/dm-mpath.c b/drivers/md/dm-mpath.c index e0187798ccae..815de2b091a5 100644 --- a/drivers/md/dm-mpath.c +++ b/drivers/md/dm-mpath.c @@ -517,9 +517,8 @@ static int multipath_clone_and_map(struct dm_target *ti, struct request *rq, return DM_MAPIO_KILL; } else if (test_bit(MPATHF_QUEUE_IO, &m->flags) || test_bit(MPATHF_PG_INIT_REQUIRED, &m->flags)) { - if (pg_init_all_paths(m)) - return DM_MAPIO_DELAY_REQUEUE; - return DM_MAPIO_REQUEUE; + pg_init_all_paths(m); + return DM_MAPIO_DELAY_REQUEUE; } mpio->pgpath = pgpath; -- cgit v1.2.3 From ac514ffc968bf14649dd0e048447dc966ee49555 Mon Sep 17 00:00:00 2001 From: Mike Snitzer Date: Fri, 12 Jan 2018 19:53:40 -0500 Subject: dm mpath: delay the retry of a request if the target responded as busy Add DM_ENDIO_DELAY_REQUEUE to allow request-based multipath's multipath_end_io() to instruct dm-rq.c:dm_done() to delay a requeue. This is beneficial to do if BLK_STS_RESOURCE is returned from the target (because target is busy). Relative to blk-mq: kick the hw queues via blk_mq_requeue_work(), indirectly from dm-rq.c:__dm_mq_kick_requeue_list(), after a delay. For old .request_fn: use blk_delay_queue(). bio-based multipath doesn't have feature parity with request-based for retryable error requeues; that is something that'll need fixing in the future. Suggested-by: Bart Van Assche Signed-off-by: Mike Snitzer Acked-by: Bart Van Assche [as interpreted from Bart's "... patch looks fine to me."] --- drivers/md/dm-mpath.c | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) (limited to 'drivers/md/dm-mpath.c') diff --git a/drivers/md/dm-mpath.c b/drivers/md/dm-mpath.c index 815de2b091a5..a8b1ffc0cb3d 100644 --- a/drivers/md/dm-mpath.c +++ b/drivers/md/dm-mpath.c @@ -1585,7 +1585,10 @@ static int multipath_end_io(struct dm_target *ti, struct request *clone, if (error && !noretry_error(error)) { struct multipath *m = ti->private; - r = DM_ENDIO_REQUEUE; + if (error == BLK_STS_RESOURCE) + r = DM_ENDIO_DELAY_REQUEUE; + else + r = DM_ENDIO_REQUEUE; if (pgpath) fail_path(pgpath); -- cgit v1.2.3