summaryrefslogtreecommitdiffstats
path: root/block/blk-mq.c
diff options
context:
space:
mode:
authorMing Lei <ming.lei@redhat.com>2017-10-14 17:22:29 +0800
committerJens Axboe <axboe@kernel.dk>2017-11-01 08:20:02 -0600
commitde1482974080ec9ef414bf048b2646b246b63f6e (patch)
tree3ecdc2b581a83848c43205c2fd9b6e97a6808f6a /block/blk-mq.c
parent63ba8e31c3ac6393b07c6e18538814a730478766 (diff)
downloadlinux-de1482974080ec9ef414bf048b2646b246b63f6e.tar.gz
linux-de1482974080ec9ef414bf048b2646b246b63f6e.tar.bz2
linux-de1482974080ec9ef414bf048b2646b246b63f6e.zip
blk-mq: introduce .get_budget and .put_budget in blk_mq_ops
For SCSI devices, there is often a per-request-queue depth, which needs to be respected before queuing one request. Currently blk-mq always dequeues the request first, then calls .queue_rq() to dispatch the request to lld. One obvious issue with this approach is that I/O merging may not be successful, because when the per-request-queue depth can't be respected, .queue_rq() has to return BLK_STS_RESOURCE, and then this request has to stay in hctx->dispatch list. This means it never gets a chance to be merged with other IO. This patch introduces .get_budget and .put_budget callback in blk_mq_ops, then we can try to get reserved budget first before dequeuing request. If the budget for queueing I/O can't be satisfied, we don't need to dequeue request at all. Hence the request can be left in the IO scheduler queue, for more merging opportunities. Signed-off-by: Ming Lei <ming.lei@redhat.com> Signed-off-by: Jens Axboe <axboe@kernel.dk>
Diffstat (limited to 'block/blk-mq.c')
-rw-r--r--block/blk-mq.c43
1 files changed, 38 insertions, 5 deletions
diff --git a/block/blk-mq.c b/block/blk-mq.c
index 40cba1b1978f..dcb467369999 100644
--- a/block/blk-mq.c
+++ b/block/blk-mq.c
@@ -1048,7 +1048,8 @@ static bool blk_mq_dispatch_wait_add(struct blk_mq_hw_ctx *hctx)
return true;
}
-bool blk_mq_dispatch_rq_list(struct request_queue *q, struct list_head *list)
+bool blk_mq_dispatch_rq_list(struct request_queue *q, struct list_head *list,
+ bool got_budget)
{
struct blk_mq_hw_ctx *hctx;
struct request *rq;
@@ -1057,6 +1058,8 @@ bool blk_mq_dispatch_rq_list(struct request_queue *q, struct list_head *list)
if (list_empty(list))
return false;
+ WARN_ON(!list_is_singular(list) && got_budget);
+
/*
* Now process all the entries, sending them to the driver.
*/
@@ -1074,16 +1077,30 @@ bool blk_mq_dispatch_rq_list(struct request_queue *q, struct list_head *list)
* The initial allocation attempt failed, so we need to
* rerun the hardware queue when a tag is freed.
*/
- if (!blk_mq_dispatch_wait_add(hctx))
+ if (!blk_mq_dispatch_wait_add(hctx)) {
+ if (got_budget)
+ blk_mq_put_dispatch_budget(hctx);
break;
+ }
/*
* It's possible that a tag was freed in the window
* between the allocation failure and adding the
* hardware queue to the wait queue.
*/
- if (!blk_mq_get_driver_tag(rq, &hctx, false))
+ if (!blk_mq_get_driver_tag(rq, &hctx, false)) {
+ if (got_budget)
+ blk_mq_put_dispatch_budget(hctx);
+ break;
+ }
+ }
+
+ if (!got_budget) {
+ ret = blk_mq_get_dispatch_budget(hctx);
+ if (ret == BLK_STS_RESOURCE)
break;
+ if (ret != BLK_STS_OK)
+ goto fail_rq;
}
list_del_init(&rq->queuelist);
@@ -1111,6 +1128,7 @@ bool blk_mq_dispatch_rq_list(struct request_queue *q, struct list_head *list)
break;
}
+ fail_rq:
if (unlikely(ret != BLK_STS_OK)) {
errors++;
blk_mq_end_request(rq, BLK_STS_IOERR);
@@ -1169,6 +1187,7 @@ bool blk_mq_dispatch_rq_list(struct request_queue *q, struct list_head *list)
static void __blk_mq_run_hw_queue(struct blk_mq_hw_ctx *hctx)
{
int srcu_idx;
+ bool run_queue;
/*
* We should be running this queue from one of the CPUs that
@@ -1185,15 +1204,18 @@ static void __blk_mq_run_hw_queue(struct blk_mq_hw_ctx *hctx)
if (!(hctx->flags & BLK_MQ_F_BLOCKING)) {
rcu_read_lock();
- blk_mq_sched_dispatch_requests(hctx);
+ run_queue = blk_mq_sched_dispatch_requests(hctx);
rcu_read_unlock();
} else {
might_sleep();
srcu_idx = srcu_read_lock(hctx->queue_rq_srcu);
- blk_mq_sched_dispatch_requests(hctx);
+ run_queue = blk_mq_sched_dispatch_requests(hctx);
srcu_read_unlock(hctx->queue_rq_srcu, srcu_idx);
}
+
+ if (run_queue)
+ blk_mq_run_hw_queue(hctx, true);
}
/*
@@ -1582,6 +1604,13 @@ static void __blk_mq_try_issue_directly(struct blk_mq_hw_ctx *hctx,
if (!blk_mq_get_driver_tag(rq, NULL, false))
goto insert;
+ ret = blk_mq_get_dispatch_budget(hctx);
+ if (ret == BLK_STS_RESOURCE) {
+ blk_mq_put_driver_tag(rq);
+ goto insert;
+ } else if (ret != BLK_STS_OK)
+ goto fail_rq;
+
new_cookie = request_to_qc_t(hctx, rq);
/*
@@ -1598,6 +1627,7 @@ static void __blk_mq_try_issue_directly(struct blk_mq_hw_ctx *hctx,
__blk_mq_requeue_request(rq);
goto insert;
default:
+ fail_rq:
*cookie = BLK_QC_T_NONE;
blk_mq_end_request(rq, ret);
return;
@@ -2582,6 +2612,9 @@ int blk_mq_alloc_tag_set(struct blk_mq_tag_set *set)
if (!set->ops->queue_rq)
return -EINVAL;
+ if (!set->ops->get_budget ^ !set->ops->put_budget)
+ return -EINVAL;
+
if (set->queue_depth > BLK_MQ_MAX_DEPTH) {
pr_info("blk-mq: reduced tag depth to %u\n",
BLK_MQ_MAX_DEPTH);