diff options
Diffstat (limited to 'block/blk-core.c')
-rw-r--r-- | block/blk-core.c | 290 |
1 files changed, 148 insertions, 142 deletions
diff --git a/block/blk-core.c b/block/blk-core.c index 03252af8c82c..d9d632639bd1 100644 --- a/block/blk-core.c +++ b/block/blk-core.c @@ -51,9 +51,7 @@ #include "blk-pm.h" #include "blk-rq-qos.h" -#ifdef CONFIG_DEBUG_FS struct dentry *blk_debugfs_root; -#endif EXPORT_TRACEPOINT_SYMBOL_GPL(block_bio_remap); EXPORT_TRACEPOINT_SYMBOL_GPL(block_rq_remap); @@ -285,7 +283,7 @@ EXPORT_SYMBOL(blk_dump_rq_flags); * A block device may call blk_sync_queue to ensure that any * such activity is cancelled, thus allowing it to release resources * that the callbacks might use. The caller must already have made sure - * that its ->make_request_fn will not re-add plugging prior to calling + * that its ->submit_bio will not re-add plugging prior to calling * this function. * * This function does not cancel any asynchronous activity arising @@ -321,6 +319,16 @@ void blk_clear_pm_only(struct request_queue *q) } EXPORT_SYMBOL_GPL(blk_clear_pm_only); +/** + * blk_put_queue - decrement the request_queue refcount + * @q: the request_queue structure to decrement the refcount for + * + * Decrements the refcount of the request_queue kobject. When this reaches 0 + * we'll have blk_release_queue() called. + * + * Context: Any context, but the last reference must not be dropped from + * atomic context. + */ void blk_put_queue(struct request_queue *q) { kobject_put(&q->kobj); @@ -352,9 +360,14 @@ EXPORT_SYMBOL_GPL(blk_set_queue_dying); * * Mark @q DYING, drain all pending requests, mark @q DEAD, destroy and * put it. All future requests will be failed immediately with -ENODEV. + * + * Context: can sleep */ void blk_cleanup_queue(struct request_queue *q) { + /* cannot be called from atomic context */ + might_sleep(); + WARN_ON_ONCE(blk_queue_registered(q)); /* mark @q DYING, no new request or merges will be allowed afterwards */ @@ -497,7 +510,7 @@ static void blk_timeout_work(struct work_struct *work) { } -struct request_queue *__blk_alloc_queue(int node_id) +struct request_queue *blk_alloc_queue(int node_id) { struct request_queue *q; int ret; @@ -540,9 +553,7 @@ struct request_queue *__blk_alloc_queue(int node_id) kobject_init(&q->kobj, &blk_queue_ktype); -#ifdef CONFIG_BLK_DEV_IO_TRACE - mutex_init(&q->blk_trace_mutex); -#endif + mutex_init(&q->debugfs_mutex); mutex_init(&q->sysfs_lock); mutex_init(&q->sysfs_dir_lock); spin_lock_init(&q->queue_lock); @@ -564,6 +575,7 @@ struct request_queue *__blk_alloc_queue(int node_id) blk_queue_dma_alignment(q, 511); blk_set_default_limits(&q->limits); + q->nr_requests = BLKDEV_MAX_RQ; return q; @@ -581,23 +593,16 @@ fail_q: kmem_cache_free(blk_requestq_cachep, q); return NULL; } - -struct request_queue *blk_alloc_queue(make_request_fn make_request, int node_id) -{ - struct request_queue *q; - - if (WARN_ON_ONCE(!make_request)) - return NULL; - - q = __blk_alloc_queue(node_id); - if (!q) - return NULL; - q->make_request_fn = make_request; - q->nr_requests = BLKDEV_MAX_RQ; - return q; -} EXPORT_SYMBOL(blk_alloc_queue); +/** + * blk_get_queue - increment the request_queue refcount + * @q: the request_queue structure to increment the refcount for + * + * Increment the refcount of the request_queue kobject. + * + * Context: Any context. + */ bool blk_get_queue(struct request_queue *q) { if (likely(!blk_queue_dying(q))) { @@ -850,8 +855,7 @@ static inline bool bio_check_ro(struct bio *bio, struct hd_struct *part) return false; WARN_ONCE(1, - "generic_make_request: Trying to write " - "to read-only block-device %s (partno %d)\n", + "Trying to write to read-only block-device %s (partno %d)\n", bio_devname(bio, b), part->partno); /* Older lvm-tools actually trigger this */ return false; @@ -952,24 +956,17 @@ static inline blk_status_t blk_check_zone_append(struct request_queue *q, return BLK_STS_OK; } -static noinline_for_stack bool -generic_make_request_checks(struct bio *bio) +static noinline_for_stack bool submit_bio_checks(struct bio *bio) { - struct request_queue *q; - int nr_sectors = bio_sectors(bio); + struct request_queue *q = bio->bi_disk->queue; blk_status_t status = BLK_STS_IOERR; - char b[BDEVNAME_SIZE]; + struct blk_plug *plug; might_sleep(); - q = bio->bi_disk->queue; - if (unlikely(!q)) { - printk(KERN_ERR - "generic_make_request: Trying to access " - "nonexistent block-device %s (%Lu)\n", - bio_devname(bio, b), (long long)bio->bi_iter.bi_sector); - goto end_io; - } + plug = blk_mq_plug(q, bio); + if (plug && plug->nowait) + bio->bi_opf |= REQ_NOWAIT; /* * For a REQ_NOWAIT based request, return -EOPNOTSUPP @@ -992,14 +989,13 @@ generic_make_request_checks(struct bio *bio) } /* - * Filter flush bio's early so that make_request based - * drivers without flush support don't have to worry - * about them. + * Filter flush bio's early so that bio based drivers without flush + * support don't have to worry about them. */ if (op_is_flush(bio->bi_opf) && !test_bit(QUEUE_FLAG_WC, &q->queue_flags)) { bio->bi_opf &= ~(REQ_PREFLUSH | REQ_FUA); - if (!nr_sectors) { + if (!bio_sectors(bio)) { status = BLK_STS_OK; goto end_io; } @@ -1054,8 +1050,13 @@ generic_make_request_checks(struct bio *bio) if (unlikely(!current->io_context)) create_task_io_context(current, GFP_ATOMIC, q->node); - if (!blkcg_bio_issue_check(q, bio)) + if (blk_throtl_bio(bio)) { + blkcg_bio_issue_init(bio); return false; + } + + blk_cgroup_bio_start(bio); + blkcg_bio_issue_init(bio); if (!bio_flagged(bio, BIO_TRACE_COMPLETION)) { trace_block_bio_queue(q, bio); @@ -1074,138 +1075,144 @@ end_io: return false; } -static blk_qc_t do_make_request(struct bio *bio) +static blk_qc_t __submit_bio(struct bio *bio) { - struct request_queue *q = bio->bi_disk->queue; + struct gendisk *disk = bio->bi_disk; blk_qc_t ret = BLK_QC_T_NONE; if (blk_crypto_bio_prep(&bio)) { - if (!q->make_request_fn) - return blk_mq_make_request(q, bio); - ret = q->make_request_fn(q, bio); + if (!disk->fops->submit_bio) + return blk_mq_submit_bio(bio); + ret = disk->fops->submit_bio(bio); } - blk_queue_exit(q); + blk_queue_exit(disk->queue); return ret; } -/** - * generic_make_request - re-submit a bio to the block device layer for I/O - * @bio: The bio describing the location in memory and on the device. +/* + * The loop in this function may be a bit non-obvious, and so deserves some + * explanation: * - * This is a version of submit_bio() that shall only be used for I/O that is - * resubmitted to lower level drivers by stacking block drivers. All file - * systems and other upper level users of the block layer should use - * submit_bio() instead. + * - Before entering the loop, bio->bi_next is NULL (as all callers ensure + * that), so we have a list with a single bio. + * - We pretend that we have just taken it off a longer list, so we assign + * bio_list to a pointer to the bio_list_on_stack, thus initialising the + * bio_list of new bios to be added. ->submit_bio() may indeed add some more + * bios through a recursive call to submit_bio_noacct. If it did, we find a + * non-NULL value in bio_list and re-enter the loop from the top. + * - In this case we really did just take the bio of the top of the list (no + * pretending) and so remove it from bio_list, and call into ->submit_bio() + * again. + * + * bio_list_on_stack[0] contains bios submitted by the current ->submit_bio. + * bio_list_on_stack[1] contains bios that were submitted before the current + * ->submit_bio_bio, but that haven't been processed yet. */ -blk_qc_t generic_make_request(struct bio *bio) +static blk_qc_t __submit_bio_noacct(struct bio *bio) { - /* - * bio_list_on_stack[0] contains bios submitted by the current - * make_request_fn. - * bio_list_on_stack[1] contains bios that were submitted before - * the current make_request_fn, but that haven't been processed - * yet. - */ struct bio_list bio_list_on_stack[2]; blk_qc_t ret = BLK_QC_T_NONE; - if (!generic_make_request_checks(bio)) - goto out; - - /* - * We only want one ->make_request_fn to be active at a time, else - * stack usage with stacked devices could be a problem. So use - * current->bio_list to keep a list of requests submited by a - * make_request_fn function. current->bio_list is also used as a - * flag to say if generic_make_request is currently active in this - * task or not. If it is NULL, then no make_request is active. If - * it is non-NULL, then a make_request is active, and new requests - * should be added at the tail - */ - if (current->bio_list) { - bio_list_add(¤t->bio_list[0], bio); - goto out; - } - - /* following loop may be a bit non-obvious, and so deserves some - * explanation. - * Before entering the loop, bio->bi_next is NULL (as all callers - * ensure that) so we have a list with a single bio. - * We pretend that we have just taken it off a longer list, so - * we assign bio_list to a pointer to the bio_list_on_stack, - * thus initialising the bio_list of new bios to be - * added. ->make_request() may indeed add some more bios - * through a recursive call to generic_make_request. If it - * did, we find a non-NULL value in bio_list and re-enter the loop - * from the top. In this case we really did just take the bio - * of the top of the list (no pretending) and so remove it from - * bio_list, and call into ->make_request() again. - */ BUG_ON(bio->bi_next); + bio_list_init(&bio_list_on_stack[0]); current->bio_list = bio_list_on_stack; + do { struct request_queue *q = bio->bi_disk->queue; + struct bio_list lower, same; + + if (unlikely(bio_queue_enter(bio) != 0)) + continue; - if (likely(bio_queue_enter(bio) == 0)) { - struct bio_list lower, same; + /* + * Create a fresh bio_list for all subordinate requests. + */ + bio_list_on_stack[1] = bio_list_on_stack[0]; + bio_list_init(&bio_list_on_stack[0]); - /* Create a fresh bio_list for all subordinate requests */ - bio_list_on_stack[1] = bio_list_on_stack[0]; - bio_list_init(&bio_list_on_stack[0]); - ret = do_make_request(bio); + ret = __submit_bio(bio); - /* sort new bios into those for a lower level - * and those for the same level - */ - bio_list_init(&lower); - bio_list_init(&same); - while ((bio = bio_list_pop(&bio_list_on_stack[0])) != NULL) - if (q == bio->bi_disk->queue) - bio_list_add(&same, bio); - else - bio_list_add(&lower, bio); - /* now assemble so we handle the lowest level first */ - bio_list_merge(&bio_list_on_stack[0], &lower); - bio_list_merge(&bio_list_on_stack[0], &same); - bio_list_merge(&bio_list_on_stack[0], &bio_list_on_stack[1]); + /* + * Sort new bios into those for a lower level and those for the + * same level. + */ + bio_list_init(&lower); + bio_list_init(&same); + while ((bio = bio_list_pop(&bio_list_on_stack[0])) != NULL) + if (q == bio->bi_disk->queue) + bio_list_add(&same, bio); + else + bio_list_add(&lower, bio); + + /* + * Now assemble so we handle the lowest level first. + */ + bio_list_merge(&bio_list_on_stack[0], &lower); + bio_list_merge(&bio_list_on_stack[0], &same); + bio_list_merge(&bio_list_on_stack[0], &bio_list_on_stack[1]); + } while ((bio = bio_list_pop(&bio_list_on_stack[0]))); + + current->bio_list = NULL; + return ret; +} + +static blk_qc_t __submit_bio_noacct_mq(struct bio *bio) +{ + struct bio_list bio_list[2] = { }; + blk_qc_t ret = BLK_QC_T_NONE; + + current->bio_list = bio_list; + + do { + struct gendisk *disk = bio->bi_disk; + + if (unlikely(bio_queue_enter(bio) != 0)) + continue; + + if (!blk_crypto_bio_prep(&bio)) { + blk_queue_exit(disk->queue); + ret = BLK_QC_T_NONE; + continue; } - bio = bio_list_pop(&bio_list_on_stack[0]); - } while (bio); - current->bio_list = NULL; /* deactivate */ -out: + ret = blk_mq_submit_bio(bio); + } while ((bio = bio_list_pop(&bio_list[0]))); + + current->bio_list = NULL; return ret; } -EXPORT_SYMBOL(generic_make_request); /** - * direct_make_request - hand a buffer directly to its device driver for I/O + * submit_bio_noacct - re-submit a bio to the block device layer for I/O * @bio: The bio describing the location in memory and on the device. * - * This function behaves like generic_make_request(), but does not protect - * against recursion. Must only be used if the called driver is known - * to be blk-mq based. + * This is a version of submit_bio() that shall only be used for I/O that is + * resubmitted to lower level drivers by stacking block drivers. All file + * systems and other upper level users of the block layer should use + * submit_bio() instead. */ -blk_qc_t direct_make_request(struct bio *bio) +blk_qc_t submit_bio_noacct(struct bio *bio) { - struct request_queue *q = bio->bi_disk->queue; - - if (WARN_ON_ONCE(q->make_request_fn)) { - bio_io_error(bio); - return BLK_QC_T_NONE; - } - if (!generic_make_request_checks(bio)) - return BLK_QC_T_NONE; - if (unlikely(bio_queue_enter(bio))) + if (!submit_bio_checks(bio)) return BLK_QC_T_NONE; - if (!blk_crypto_bio_prep(&bio)) { - blk_queue_exit(q); + + /* + * We only want one ->submit_bio to be active at a time, else stack + * usage with stacked devices could be a problem. Use current->bio_list + * to collect a list of requests submited by a ->submit_bio method while + * it is active, and then process them after it returned. + */ + if (current->bio_list) { + bio_list_add(¤t->bio_list[0], bio); return BLK_QC_T_NONE; } - return blk_mq_make_request(q, bio); + + if (!bio->bi_disk->fops->submit_bio) + return __submit_bio_noacct_mq(bio); + return __submit_bio_noacct(bio); } -EXPORT_SYMBOL_GPL(direct_make_request); +EXPORT_SYMBOL(submit_bio_noacct); /** * submit_bio - submit a bio to the block device layer for I/O @@ -1266,13 +1273,13 @@ blk_qc_t submit_bio(struct bio *bio) blk_qc_t ret; psi_memstall_enter(&pflags); - ret = generic_make_request(bio); + ret = submit_bio_noacct(bio); psi_memstall_leave(&pflags); return ret; } - return generic_make_request(bio); + return submit_bio_noacct(bio); } EXPORT_SYMBOL(submit_bio); @@ -1800,6 +1807,7 @@ void blk_start_plug(struct blk_plug *plug) INIT_LIST_HEAD(&plug->cb_list); plug->rq_count = 0; plug->multiple_queues = false; + plug->nowait = false; /* * Store ordering should not be needed here, since a potential @@ -1908,9 +1916,7 @@ int __init blk_dev_init(void) blk_requestq_cachep = kmem_cache_create("request_queue", sizeof(struct request_queue), 0, SLAB_PANIC, NULL); -#ifdef CONFIG_DEBUG_FS blk_debugfs_root = debugfs_create_dir("block", NULL); -#endif return 0; } |