summaryrefslogtreecommitdiffstats
path: root/block/blk-mq.c
diff options
context:
space:
mode:
authorLinus Torvalds <torvalds@linux-foundation.org>2017-02-24 14:13:34 -0800
committerLinus Torvalds <torvalds@linux-foundation.org>2017-02-24 14:13:34 -0800
commit1802979ab1ee8ec5a72987ad518f5a91bf41cd89 (patch)
tree4b1bfc68a180040d713e8b1f106ca12f2461b8ec /block/blk-mq.c
parentf1ef09fde17f9b77ca1435a5b53a28b203afb81c (diff)
parent61febef40bfe8ab68259d8545257686e8a0d91d1 (diff)
downloadlinux-stable-1802979ab1ee8ec5a72987ad518f5a91bf41cd89.tar.gz
linux-stable-1802979ab1ee8ec5a72987ad518f5a91bf41cd89.tar.bz2
linux-stable-1802979ab1ee8ec5a72987ad518f5a91bf41cd89.zip
Merge branch 'for-linus' of git://git.kernel.dk/linux-block
Pull block updates and fixes from Jens Axboe: - NVMe updates and fixes that missed the first pull request. This includes bug fixes, and support for autonomous power management. - Fix from Christoph for missing clear of the request payload, causing a problem with (at least) the storvsc driver. - Further fixes for the queue/bdi life time issues from Jan. - The Kconfig mq scheduler update from me. - Fixing a use-after-free in dm-rq, spotted by Bart, introduced in this merge window. - Three fixes for nbd from Josef. - Bug fix from Omar, fixing a bug in sas transport code that oopses when bsg ioctls were used. From Omar. - Improvements to the queue restart and tag wait from from Omar. - Set of fixes for the sed/opal code from Scott. - Three trivial patches to cciss from Tobin * 'for-linus' of git://git.kernel.dk/linux-block: (41 commits) dm-rq: don't dereference request payload after ending request blk-mq-sched: separate mark hctx and queue restart operations blk-mq: use sbq wait queues instead of restart for driver tags block/sed-opal: Propagate original error message to userland. nvme/pci: re-check security protocol support after reset block/sed-opal: Introduce free_opal_dev to free the structure and clean up state nvme: detect NVMe controller in recent MacBooks nvme-rdma: add support for host_traddr nvmet-rdma: Fix error handling nvmet-rdma: use nvme cm status helper nvme-rdma: move nvme cm status helper to .h file nvme-fc: don't bother to validate ioccsz and iorcsz nvme/pci: No special case for queue busy on IO nvme/core: Fix race kicking freed request_queue nvme/pci: Disable on removal when disconnected nvme: Enable autonomous power state transitions nvme: Add a quirk mechanism that uses identify_ctrl nvme: make nvmf_register_transport require a create_ctrl callback nvme: Use CNS as 8-bit field and avoid endianness conversion nvme: add semicolon in nvme_command setting ...
Diffstat (limited to 'block/blk-mq.c')
-rw-r--r--block/blk-mq.c64
1 files changed, 55 insertions, 9 deletions
diff --git a/block/blk-mq.c b/block/blk-mq.c
index b29e7dc7b309..9e6b064e5339 100644
--- a/block/blk-mq.c
+++ b/block/blk-mq.c
@@ -904,6 +904,44 @@ static bool reorder_tags_to_front(struct list_head *list)
return first != NULL;
}
+static int blk_mq_dispatch_wake(wait_queue_t *wait, unsigned mode, int flags,
+ void *key)
+{
+ struct blk_mq_hw_ctx *hctx;
+
+ hctx = container_of(wait, struct blk_mq_hw_ctx, dispatch_wait);
+
+ list_del(&wait->task_list);
+ clear_bit_unlock(BLK_MQ_S_TAG_WAITING, &hctx->state);
+ blk_mq_run_hw_queue(hctx, true);
+ return 1;
+}
+
+static bool blk_mq_dispatch_wait_add(struct blk_mq_hw_ctx *hctx)
+{
+ struct sbq_wait_state *ws;
+
+ /*
+ * The TAG_WAITING bit serves as a lock protecting hctx->dispatch_wait.
+ * The thread which wins the race to grab this bit adds the hardware
+ * queue to the wait queue.
+ */
+ if (test_bit(BLK_MQ_S_TAG_WAITING, &hctx->state) ||
+ test_and_set_bit_lock(BLK_MQ_S_TAG_WAITING, &hctx->state))
+ return false;
+
+ init_waitqueue_func_entry(&hctx->dispatch_wait, blk_mq_dispatch_wake);
+ ws = bt_wait_ptr(&hctx->tags->bitmap_tags, hctx);
+
+ /*
+ * As soon as this returns, it's no longer safe to fiddle with
+ * hctx->dispatch_wait, since a completion can wake up the wait queue
+ * and unlock the bit.
+ */
+ add_wait_queue(&ws->wait, &hctx->dispatch_wait);
+ return true;
+}
+
bool blk_mq_dispatch_rq_list(struct blk_mq_hw_ctx *hctx, struct list_head *list)
{
struct request_queue *q = hctx->queue;
@@ -931,15 +969,22 @@ bool blk_mq_dispatch_rq_list(struct blk_mq_hw_ctx *hctx, struct list_head *list)
continue;
/*
- * We failed getting a driver tag. Mark the queue(s)
- * as needing a restart. Retry getting a tag again,
- * in case the needed IO completed right before we
- * marked the queue as needing a restart.
+ * The initial allocation attempt failed, so we need to
+ * rerun the hardware queue when a tag is freed.
*/
- blk_mq_sched_mark_restart(hctx);
- if (!blk_mq_get_driver_tag(rq, &hctx, false))
+ if (blk_mq_dispatch_wait_add(hctx)) {
+ /*
+ * It's possible that a tag was freed in the
+ * window between the allocation failure and
+ * adding the hardware queue to the wait queue.
+ */
+ if (!blk_mq_get_driver_tag(rq, &hctx, false))
+ break;
+ } else {
break;
+ }
}
+
list_del_init(&rq->queuelist);
bd.rq = rq;
@@ -995,10 +1040,11 @@ bool blk_mq_dispatch_rq_list(struct blk_mq_hw_ctx *hctx, struct list_head *list)
*
* blk_mq_run_hw_queue() already checks the STOPPED bit
*
- * If RESTART is set, then let completion restart the queue
- * instead of potentially looping here.
+ * If RESTART or TAG_WAITING is set, then let completion restart
+ * the queue instead of potentially looping here.
*/
- if (!blk_mq_sched_needs_restart(hctx))
+ if (!blk_mq_sched_needs_restart(hctx) &&
+ !test_bit(BLK_MQ_S_TAG_WAITING, &hctx->state))
blk_mq_run_hw_queue(hctx, true);
}