diff options
author | Linus Torvalds <torvalds@linux-foundation.org> | 2025-01-20 19:38:46 -0800 |
---|---|---|
committer | Linus Torvalds <torvalds@linux-foundation.org> | 2025-01-20 19:38:46 -0800 |
commit | 1cbfb828e05171ca2dd77b5988d068e6872480fe (patch) | |
tree | bfb33c9ad8840908058649ba2e261bdb7e5f7ee9 /block | |
parent | 3d3a9c8b89d4f8a3785e06ffd15405c670696f02 (diff) | |
parent | 554b22864cc79e28cd65e3a6e1d0d1dfa8581c68 (diff) | |
download | linux-stable-1cbfb828e05171ca2dd77b5988d068e6872480fe.tar.gz linux-stable-1cbfb828e05171ca2dd77b5988d068e6872480fe.tar.bz2 linux-stable-1cbfb828e05171ca2dd77b5988d068e6872480fe.zip |
Merge tag 'for-6.14/block-20250118' of git://git.kernel.dk/linux
Pull block updates from Jens Axboe:
- NVMe pull requests via Keith:
- Target support for PCI-Endpoint transport (Damien)
- TCP IO queue spreading fixes (Sagi, Chaitanya)
- Target handling for "limited retry" flags (Guixen)
- Poll type fix (Yongsoo)
- Xarray storage error handling (Keisuke)
- Host memory buffer free size fix on error (Francis)
- MD pull requests via Song:
- Reintroduce md-linear (Yu Kuai)
- md-bitmap refactor and fix (Yu Kuai)
- Replace kmap_atomic with kmap_local_page (David Reaver)
- Quite a few queue freeze and debugfs deadlock fixes
Ming introduced lockdep support for this in the 6.13 kernel, and it
has (unsurprisingly) uncovered quite a few issues
- Use const attributes for IO schedulers
- Remove bio ioprio wrappers
- Fixes for stacked device atomic write support
- Refactor queue affinity helpers, in preparation for better supporting
isolated CPUs
- Cleanups of loop O_DIRECT handling
- Cleanup of BLK_MQ_F_* flags
- Add rotational support for null_blk
- Various fixes and cleanups
* tag 'for-6.14/block-20250118' of git://git.kernel.dk/linux: (106 commits)
block: Don't trim an atomic write
block: Add common atomic writes enable flag
md/md-linear: Fix a NULL vs IS_ERR() bug in linear_add()
block: limit disk max sectors to (LLONG_MAX >> 9)
block: Change blk_stack_atomic_writes_limits() unit_min check
block: Ensure start sector is aligned for stacking atomic writes
blk-mq: Move more error handling into blk_mq_submit_bio()
block: Reorder the request allocation code in blk_mq_submit_bio()
nvme: fix bogus kzalloc() return check in nvme_init_effects_log()
md/md-bitmap: move bitmap_{start, end}write to md upper layer
md/raid5: implement pers->bitmap_sector()
md: add a new callback pers->bitmap_sector()
md/md-bitmap: remove the last parameter for bimtap_ops->endwrite()
md/md-bitmap: factor behind write counters out from bitmap_{start/end}write()
md: Replace deprecated kmap_atomic() with kmap_local_page()
md: reintroduce md-linear
partitions: ldm: remove the initial kernel-doc notation
blk-cgroup: rwstat: fix kernel-doc warnings in header file
blk-cgroup: fix kernel-doc warnings in header file
nbd: fix partial sending
...
Diffstat (limited to 'block')
-rw-r--r-- | block/Makefile | 2 | ||||
-rw-r--r-- | block/bfq-iosched.c | 2 | ||||
-rw-r--r-- | block/bio.c | 111 | ||||
-rw-r--r-- | block/blk-cgroup-rwstat.h | 5 | ||||
-rw-r--r-- | block/blk-cgroup.h | 10 | ||||
-rw-r--r-- | block/blk-core.c | 21 | ||||
-rw-r--r-- | block/blk-integrity.c | 4 | ||||
-rw-r--r-- | block/blk-map.c | 128 | ||||
-rw-r--r-- | block/blk-merge.c | 177 | ||||
-rw-r--r-- | block/blk-mq-cpumap.c | 37 | ||||
-rw-r--r-- | block/blk-mq-debugfs.c | 27 | ||||
-rw-r--r-- | block/blk-mq-pci.c | 46 | ||||
-rw-r--r-- | block/blk-mq-sched.c | 3 | ||||
-rw-r--r-- | block/blk-mq-tag.c | 41 | ||||
-rw-r--r-- | block/blk-mq-virtio.c | 46 | ||||
-rw-r--r-- | block/blk-mq.c | 71 | ||||
-rw-r--r-- | block/blk-mq.h | 11 | ||||
-rw-r--r-- | block/blk-settings.c | 42 | ||||
-rw-r--r-- | block/blk-sysfs.c | 140 | ||||
-rw-r--r-- | block/blk-zoned.c | 65 | ||||
-rw-r--r-- | block/blk.h | 33 | ||||
-rw-r--r-- | block/bsg-lib.c | 2 | ||||
-rw-r--r-- | block/elevator.c | 35 | ||||
-rw-r--r-- | block/elevator.h | 2 | ||||
-rw-r--r-- | block/genhd.c | 63 | ||||
-rw-r--r-- | block/kyber-iosched.c | 2 | ||||
-rw-r--r-- | block/mq-deadline.c | 2 | ||||
-rw-r--r-- | block/partitions/ldm.h | 2 |
28 files changed, 442 insertions, 688 deletions
diff --git a/block/Makefile b/block/Makefile index ddfd21c1a9ff..33748123710b 100644 --- a/block/Makefile +++ b/block/Makefile @@ -27,8 +27,6 @@ bfq-y := bfq-iosched.o bfq-wf2q.o bfq-cgroup.o obj-$(CONFIG_IOSCHED_BFQ) += bfq.o obj-$(CONFIG_BLK_DEV_INTEGRITY) += bio-integrity.o blk-integrity.o t10-pi.o -obj-$(CONFIG_BLK_MQ_PCI) += blk-mq-pci.o -obj-$(CONFIG_BLK_MQ_VIRTIO) += blk-mq-virtio.o obj-$(CONFIG_BLK_DEV_ZONED) += blk-zoned.o obj-$(CONFIG_BLK_WBT) += blk-wbt.o obj-$(CONFIG_BLK_DEBUG_FS) += blk-mq-debugfs.o diff --git a/block/bfq-iosched.c b/block/bfq-iosched.c index cad16c163611..167542201603 100644 --- a/block/bfq-iosched.c +++ b/block/bfq-iosched.c @@ -7622,7 +7622,7 @@ static ssize_t bfq_low_latency_store(struct elevator_queue *e, #define BFQ_ATTR(name) \ __ATTR(name, 0644, bfq_##name##_show, bfq_##name##_store) -static struct elv_fs_entry bfq_attrs[] = { +static const struct elv_fs_entry bfq_attrs[] = { BFQ_ATTR(fifo_expire_sync), BFQ_ATTR(fifo_expire_async), BFQ_ATTR(back_seek_max), diff --git a/block/bio.c b/block/bio.c index d5bdc31d88d3..f0c416e5931d 100644 --- a/block/bio.c +++ b/block/bio.c @@ -946,8 +946,11 @@ static bool bvec_try_merge_page(struct bio_vec *bv, struct page *page, /* * Try to merge a page into a segment, while obeying the hardware segment - * size limit. This is not for normal read/write bios, but for passthrough - * or Zone Append operations that we can't split. + * size limit. + * + * This is kept around for the integrity metadata, which is still tries + * to build the initial bio to the hardware limit and doesn't have proper + * helpers to split. Hopefully this will go away soon. */ bool bvec_try_merge_hw_page(struct request_queue *q, struct bio_vec *bv, struct page *page, unsigned len, unsigned offset, @@ -965,106 +968,6 @@ bool bvec_try_merge_hw_page(struct request_queue *q, struct bio_vec *bv, } /** - * bio_add_hw_page - attempt to add a page to a bio with hw constraints - * @q: the target queue - * @bio: destination bio - * @page: page to add - * @len: vec entry length - * @offset: vec entry offset - * @max_sectors: maximum number of sectors that can be added - * @same_page: return if the segment has been merged inside the same page - * - * Add a page to a bio while respecting the hardware max_sectors, max_segment - * and gap limitations. - */ -int bio_add_hw_page(struct request_queue *q, struct bio *bio, - struct page *page, unsigned int len, unsigned int offset, - unsigned int max_sectors, bool *same_page) -{ - unsigned int max_size = max_sectors << SECTOR_SHIFT; - - if (WARN_ON_ONCE(bio_flagged(bio, BIO_CLONED))) - return 0; - - len = min3(len, max_size, queue_max_segment_size(q)); - if (len > max_size - bio->bi_iter.bi_size) - return 0; - - if (bio->bi_vcnt > 0) { - struct bio_vec *bv = &bio->bi_io_vec[bio->bi_vcnt - 1]; - - if (bvec_try_merge_hw_page(q, bv, page, len, offset, - same_page)) { - bio->bi_iter.bi_size += len; - return len; - } - - if (bio->bi_vcnt >= - min(bio->bi_max_vecs, queue_max_segments(q))) - return 0; - - /* - * If the queue doesn't support SG gaps and adding this segment - * would create a gap, disallow it. - */ - if (bvec_gap_to_prev(&q->limits, bv, offset)) - return 0; - } - - bvec_set_page(&bio->bi_io_vec[bio->bi_vcnt], page, len, offset); - bio->bi_vcnt++; - bio->bi_iter.bi_size += len; - return len; -} - -/** - * bio_add_hw_folio - attempt to add a folio to a bio with hw constraints - * @q: the target queue - * @bio: destination bio - * @folio: folio to add - * @len: vec entry length - * @offset: vec entry offset in the folio - * @max_sectors: maximum number of sectors that can be added - * @same_page: return if the segment has been merged inside the same folio - * - * Add a folio to a bio while respecting the hardware max_sectors, max_segment - * and gap limitations. - */ -int bio_add_hw_folio(struct request_queue *q, struct bio *bio, - struct folio *folio, size_t len, size_t offset, - unsigned int max_sectors, bool *same_page) -{ - if (len > UINT_MAX || offset > UINT_MAX) - return 0; - return bio_add_hw_page(q, bio, folio_page(folio, 0), len, offset, - max_sectors, same_page); -} - -/** - * bio_add_pc_page - attempt to add page to passthrough bio - * @q: the target queue - * @bio: destination bio - * @page: page to add - * @len: vec entry length - * @offset: vec entry offset - * - * Attempt to add a page to the bio_vec maplist. This can fail for a - * number of reasons, such as the bio being full or target block device - * limitations. The target block device must allow bio's up to PAGE_SIZE, - * so it is always possible to add a single page to an empty bio. - * - * This should only be used by passthrough bios. - */ -int bio_add_pc_page(struct request_queue *q, struct bio *bio, - struct page *page, unsigned int len, unsigned int offset) -{ - bool same_page = false; - return bio_add_hw_page(q, bio, page, len, offset, - queue_max_hw_sectors(q), &same_page); -} -EXPORT_SYMBOL(bio_add_pc_page); - -/** * __bio_add_page - add page(s) to a bio in a new segment * @bio: destination bio * @page: start page to add @@ -1707,6 +1610,10 @@ EXPORT_SYMBOL(bio_split); */ void bio_trim(struct bio *bio, sector_t offset, sector_t size) { + /* We should never trim an atomic write */ + if (WARN_ON_ONCE(bio->bi_opf & REQ_ATOMIC && size)) + return; + if (WARN_ON_ONCE(offset > BIO_MAX_SECTORS || size > BIO_MAX_SECTORS || offset + size > bio_sectors(bio))) return; diff --git a/block/blk-cgroup-rwstat.h b/block/blk-cgroup-rwstat.h index 022527b0b043..703a16fe1404 100644 --- a/block/blk-cgroup-rwstat.h +++ b/block/blk-cgroup-rwstat.h @@ -52,7 +52,7 @@ void blkg_rwstat_recursive_sum(struct blkcg_gq *blkg, struct blkcg_policy *pol, /** * blkg_rwstat_add - add a value to a blkg_rwstat * @rwstat: target blkg_rwstat - * @op: REQ_OP and flags + * @opf: REQ_OP and flags * @val: value to add * * Add @val to @rwstat. The counters are chosen according to @rw. The @@ -83,8 +83,9 @@ static inline void blkg_rwstat_add(struct blkg_rwstat *rwstat, /** * blkg_rwstat_read - read the current values of a blkg_rwstat * @rwstat: blkg_rwstat to read + * @result: where to put the current values * - * Read the current snapshot of @rwstat and return it in the aux counts. + * Read the current snapshot of @rwstat and return it in the @result counts. */ static inline void blkg_rwstat_read(struct blkg_rwstat *rwstat, struct blkg_rwstat_sample *result) diff --git a/block/blk-cgroup.h b/block/blk-cgroup.h index b9e3265c1eb3..2c4663bd993a 100644 --- a/block/blk-cgroup.h +++ b/block/blk-cgroup.h @@ -225,7 +225,9 @@ void blkg_conf_exit(struct blkg_conf_ctx *ctx); /** * bio_issue_as_root_blkg - see if this bio needs to be issued as root blkg - * @return: true if this bio needs to be submitted with the root blkg context. + * @bio: the target &bio + * + * Return: true if this bio needs to be submitted with the root blkg context. * * In order to avoid priority inversions we sometimes need to issue a bio as if * it were attached to the root blkg, and then backcharge to the actual owning @@ -245,7 +247,7 @@ static inline bool bio_issue_as_root_blkg(struct bio *bio) * @q: request_queue of interest * * Lookup blkg for the @blkcg - @q pair. - + * * Must be called in a RCU critical section. */ static inline struct blkcg_gq *blkg_lookup(struct blkcg *blkcg, @@ -268,7 +270,7 @@ static inline struct blkcg_gq *blkg_lookup(struct blkcg *blkcg, } /** - * blkg_to_pdata - get policy private data + * blkg_to_pd - get policy private data * @blkg: blkg of interest * @pol: policy of interest * @@ -287,7 +289,7 @@ static inline struct blkcg_policy_data *blkcg_to_cpd(struct blkcg *blkcg, } /** - * pdata_to_blkg - get blkg associated with policy private data + * pd_to_blkg - get blkg associated with policy private data * @pd: policy private data of interest * * @pd is policy private data. Determine the blkg it's associated with. diff --git a/block/blk-core.c b/block/blk-core.c index 666efe8fa202..32fb28a6372c 100644 --- a/block/blk-core.c +++ b/block/blk-core.c @@ -629,8 +629,14 @@ static void __submit_bio(struct bio *bio) blk_mq_submit_bio(bio); } else if (likely(bio_queue_enter(bio) == 0)) { struct gendisk *disk = bio->bi_bdev->bd_disk; - - disk->fops->submit_bio(bio); + + if ((bio->bi_opf & REQ_POLLED) && + !(disk->queue->limits.features & BLK_FEAT_POLL)) { + bio->bi_status = BLK_STS_NOTSUPP; + bio_endio(bio); + } else { + disk->fops->submit_bio(bio); + } blk_queue_exit(disk->queue); } @@ -805,12 +811,6 @@ void submit_bio_noacct(struct bio *bio) } } - if (!(q->limits.features & BLK_FEAT_POLL) && - (bio->bi_opf & REQ_POLLED)) { - bio_clear_polled(bio); - goto not_supported; - } - switch (bio_op(bio)) { case REQ_OP_READ: break; @@ -935,7 +935,7 @@ int bio_poll(struct bio *bio, struct io_comp_batch *iob, unsigned int flags) return 0; q = bdev_get_queue(bdev); - if (cookie == BLK_QC_T_NONE || !(q->limits.features & BLK_FEAT_POLL)) + if (cookie == BLK_QC_T_NONE) return 0; blk_flush_plug(current->plug, false); @@ -956,7 +956,8 @@ int bio_poll(struct bio *bio, struct io_comp_batch *iob, unsigned int flags) } else { struct gendisk *disk = q->disk; - if (disk && disk->fops->poll_bio) + if ((q->limits.features & BLK_FEAT_POLL) && disk && + disk->fops->poll_bio) ret = disk->fops->poll_bio(bio, iob, flags); } blk_queue_exit(q); diff --git a/block/blk-integrity.c b/block/blk-integrity.c index b180cac61a9d..013469faa5e7 100644 --- a/block/blk-integrity.c +++ b/block/blk-integrity.c @@ -218,9 +218,7 @@ static ssize_t flag_store(struct device *dev, const char *page, size_t count, else lim.integrity.flags |= flag; - blk_mq_freeze_queue(q); - err = queue_limits_commit_update(q, &lim); - blk_mq_unfreeze_queue(q); + err = queue_limits_commit_update_frozen(q, &lim); if (err) return err; return count; diff --git a/block/blk-map.c b/block/blk-map.c index 894009b2d881..d2f22744b3d1 100644 --- a/block/blk-map.c +++ b/block/blk-map.c @@ -189,7 +189,7 @@ static int bio_copy_user_iov(struct request *rq, struct rq_map_data *map_data, } } - if (bio_add_pc_page(rq->q, bio, page, bytes, offset) < bytes) { + if (bio_add_page(bio, page, bytes, offset) < bytes) { if (!map_data) __free_page(page); break; @@ -272,86 +272,27 @@ static struct bio *blk_rq_map_bio_alloc(struct request *rq, static int bio_map_user_iov(struct request *rq, struct iov_iter *iter, gfp_t gfp_mask) { - iov_iter_extraction_t extraction_flags = 0; - unsigned int max_sectors = queue_max_hw_sectors(rq->q); unsigned int nr_vecs = iov_iter_npages(iter, BIO_MAX_VECS); struct bio *bio; int ret; - int j; if (!iov_iter_count(iter)) return -EINVAL; bio = blk_rq_map_bio_alloc(rq, nr_vecs, gfp_mask); - if (bio == NULL) + if (!bio) return -ENOMEM; - - if (blk_queue_pci_p2pdma(rq->q)) - extraction_flags |= ITER_ALLOW_P2PDMA; - if (iov_iter_extract_will_pin(iter)) - bio_set_flag(bio, BIO_PAGE_PINNED); - - while (iov_iter_count(iter)) { - struct page *stack_pages[UIO_FASTIOV]; - struct page **pages = stack_pages; - ssize_t bytes; - size_t offs; - int npages; - - if (nr_vecs > ARRAY_SIZE(stack_pages)) - pages = NULL; - - bytes = iov_iter_extract_pages(iter, &pages, LONG_MAX, - nr_vecs, extraction_flags, &offs); - if (unlikely(bytes <= 0)) { - ret = bytes ? bytes : -EFAULT; - goto out_unmap; - } - - npages = DIV_ROUND_UP(offs + bytes, PAGE_SIZE); - - if (unlikely(offs & queue_dma_alignment(rq->q))) - j = 0; - else { - for (j = 0; j < npages; j++) { - struct page *page = pages[j]; - unsigned int n = PAGE_SIZE - offs; - bool same_page = false; - - if (n > bytes) - n = bytes; - - if (!bio_add_hw_page(rq->q, bio, page, n, offs, - max_sectors, &same_page)) - break; - - if (same_page) - bio_release_page(bio, page); - bytes -= n; - offs = 0; - } - } - /* - * release the pages we didn't map into the bio, if any - */ - while (j < npages) - bio_release_page(bio, pages[j++]); - if (pages != stack_pages) - kvfree(pages); - /* couldn't stuff something into bio? */ - if (bytes) { - iov_iter_revert(iter, bytes); - break; - } - } - + ret = bio_iov_iter_get_pages(bio, iter); + if (ret) + goto out_put; ret = blk_rq_append_bio(rq, bio); if (ret) - goto out_unmap; + goto out_release; return 0; - out_unmap: +out_release: bio_release_pages(bio, false); +out_put: blk_mq_map_bio_put(bio); return ret; } @@ -422,8 +363,7 @@ static struct bio *bio_map_kern(struct request_queue *q, void *data, page = virt_to_page(data); else page = vmalloc_to_page(data); - if (bio_add_pc_page(q, bio, page, bytes, - offset) < bytes) { + if (bio_add_page(bio, page, bytes, offset) < bytes) { /* we don't support partial mappings */ bio_uninit(bio); kfree(bio); @@ -507,7 +447,7 @@ static struct bio *bio_copy_kern(struct request_queue *q, void *data, if (!reading) memcpy(page_address(page), p, bytes); - if (bio_add_pc_page(q, bio, page, bytes, 0) < bytes) + if (bio_add_page(bio, page, bytes, 0) < bytes) break; len -= bytes; @@ -536,24 +476,33 @@ cleanup: */ int blk_rq_append_bio(struct request *rq, struct bio *bio) { - struct bvec_iter iter; - struct bio_vec bv; + const struct queue_limits *lim = &rq->q->limits; + unsigned int max_bytes = lim->max_hw_sectors << SECTOR_SHIFT; unsigned int nr_segs = 0; + int ret; - bio_for_each_bvec(bv, bio, iter) - nr_segs++; + /* check that the data layout matches the hardware restrictions */ + ret = bio_split_rw_at(bio, lim, &nr_segs, max_bytes); + if (ret) { + /* if we would have to split the bio, copy instead */ + if (ret > 0) + ret = -EREMOTEIO; + return ret; + } - if (!rq->bio) { - blk_rq_bio_prep(rq, bio, nr_segs); - } else { + if (rq->bio) { if (!ll_back_merge_fn(rq, bio, nr_segs)) return -EINVAL; rq->biotail->bi_next = bio; rq->biotail = bio; - rq->__data_len += (bio)->bi_iter.bi_size; + rq->__data_len += bio->bi_iter.bi_size; bio_crypt_free_ctx(bio); + return 0; } + rq->nr_phys_segments = nr_segs; + rq->bio = rq->biotail = bio; + rq->__data_len = bio->bi_iter.bi_size; return 0; } EXPORT_SYMBOL(blk_rq_append_bio); @@ -561,9 +510,7 @@ EXPORT_SYMBOL(blk_rq_append_bio); /* Prepare bio for passthrough IO given ITER_BVEC iter */ static int blk_rq_map_user_bvec(struct request *rq, const struct iov_iter *iter) { - const struct queue_limits *lim = &rq->q->limits; - unsigned int max_bytes = lim->max_hw_sectors << SECTOR_SHIFT; - unsigned int nsegs; + unsigned int max_bytes = rq->q->limits.max_hw_sectors << SECTOR_SHIFT; struct bio *bio; int ret; @@ -576,18 +523,10 @@ static int blk_rq_map_user_bvec(struct request *rq, const struct iov_iter *iter) return -ENOMEM; bio_iov_bvec_set(bio, iter); - /* check that the data layout matches the hardware restrictions */ - ret = bio_split_rw_at(bio, lim, &nsegs, max_bytes); - if (ret) { - /* if we would have to split the bio, copy instead */ - if (ret > 0) - ret = -EREMOTEIO; + ret = blk_rq_append_bio(rq, bio); + if (ret) blk_mq_map_bio_put(bio); - return ret; - } - - blk_rq_bio_prep(rq, bio, nsegs); - return 0; + return ret; } /** @@ -644,8 +583,11 @@ int blk_rq_map_user_iov(struct request_queue *q, struct request *rq, ret = bio_copy_user_iov(rq, map_data, &i, gfp_mask); else ret = bio_map_user_iov(rq, &i, gfp_mask); - if (ret) + if (ret) { + if (ret == -EREMOTEIO) + ret = -EINVAL; goto unmap_rq; + } if (!bio) bio = rq->bio; } while (iov_iter_count(&i)); diff --git a/block/blk-merge.c b/block/blk-merge.c index e01383c6e534..15cd231d560c 100644 --- a/block/blk-merge.c +++ b/block/blk-merge.c @@ -473,137 +473,100 @@ unsigned int blk_recalc_rq_segments(struct request *rq) return nr_phys_segs; } -static inline struct scatterlist *blk_next_sg(struct scatterlist **sg, - struct scatterlist *sglist) -{ - if (!*sg) - return sglist; +struct phys_vec { + phys_addr_t paddr; + u32 len; +}; - /* - * If the driver previously mapped a shorter list, we could see a - * termination bit prematurely unless it fully inits the sg table - * on each mapping. We KNOW that there must be more entries here - * or the driver would be buggy, so force clear the termination bit - * to avoid doing a full sg_init_table() in drivers for each command. - */ - sg_unmark_end(*sg); - return sg_next(*sg); -} - -static unsigned blk_bvec_map_sg(struct request_queue *q, - struct bio_vec *bvec, struct scatterlist *sglist, - struct scatterlist **sg) +static bool blk_map_iter_next(struct request *req, + struct req_iterator *iter, struct phys_vec *vec) { - unsigned nbytes = bvec->bv_len; - unsigned nsegs = 0, total = 0; - - while (nbytes > 0) { - unsigned offset = bvec->bv_offset + total; - unsigned len = get_max_segment_size(&q->limits, - bvec_phys(bvec) + total, nbytes); - struct page *page = bvec->bv_page; - - /* - * Unfortunately a fair number of drivers barf on scatterlists - * that have an offset larger than PAGE_SIZE, despite other - * subsystems dealing with that invariant just fine. For now - * stick to the legacy format where we never present those from - * the block layer, but the code below should be removed once - * these offenders (mostly MMC/SD drivers) are fixed. - */ - page += (offset >> PAGE_SHIFT); - offset &= ~PAGE_MASK; - - *sg = blk_next_sg(sg, sglist); - sg_set_page(*sg, page, len, offset); + unsigned int max_size; + struct bio_vec bv; - total += len; - nbytes -= len; - nsegs++; + if (req->rq_flags & RQF_SPECIAL_PAYLOAD) { + if (!iter->bio) + return false; + vec->paddr = bvec_phys(&req->special_vec); + vec->len = req->special_vec.bv_len; + iter->bio = NULL; + return true; } - return nsegs; -} - -static inline int __blk_bvec_map_sg(struct bio_vec bv, - struct scatterlist *sglist, struct scatterlist **sg) -{ - *sg = blk_next_sg(sg, sglist); - sg_set_page(*sg, bv.bv_page, bv.bv_len, bv.bv_offset); - return 1; -} - -/* only try to merge bvecs into one sg if they are from two bios */ -static inline bool -__blk_segment_map_sg_merge(struct request_queue *q, struct bio_vec *bvec, - struct bio_vec *bvprv, struct scatterlist **sg) -{ - - int nbytes = bvec->bv_len; - - if (!*sg) + if (!iter->iter.bi_size) return false; - if ((*sg)->length + nbytes > queue_max_segment_size(q)) - return false; + bv = mp_bvec_iter_bvec(iter->bio->bi_io_vec, iter->iter); + vec->paddr = bvec_phys(&bv); + max_size = get_max_segment_size(&req->q->limits, vec->paddr, UINT_MAX); + bv.bv_len = min(bv.bv_len, max_size); + bio_advance_iter_single(iter->bio, &iter->iter, bv.bv_len); - if (!biovec_phys_mergeable(q, bvprv, bvec)) - return false; + /* + * If we are entirely done with this bi_io_vec entry, check if the next + * one could be merged into it. This typically happens when moving to + * the next bio, but some callers also don't pack bvecs tight. + */ + while (!iter->iter.bi_size || !iter->iter.bi_bvec_done) { + struct bio_vec next; + + if (!iter->iter.bi_size) { + if (!iter->bio->bi_next) + break; + iter->bio = iter->bio->bi_next; + iter->iter = iter->bio->bi_iter; + } - (*sg)->length += nbytes; + next = mp_bvec_iter_bvec(iter->bio->bi_io_vec, iter->iter); + if (bv.bv_len + next.bv_len > max_size || + !biovec_phys_mergeable(req->q, &bv, &next)) + break; + + bv.bv_len += next.bv_len; + bio_advance_iter_single(iter->bio, &iter->iter, next.bv_len); + } + vec->len = bv.bv_len; return true; } -static int __blk_bios_map_sg(struct request_queue *q, struct bio *bio, - struct scatterlist *sglist, - struct scatterlist **sg) +static inline struct scatterlist *blk_next_sg(struct scatterlist **sg, + struct scatterlist *sglist) { - struct bio_vec bvec, bvprv = { NULL }; - struct bvec_iter iter; - int nsegs = 0; - bool new_bio = false; - - for_each_bio(bio) { - bio_for_each_bvec(bvec, bio, iter) { - /* - * Only try to merge bvecs from two bios given we - * have done bio internal merge when adding pages - * to bio - */ - if (new_bio && - __blk_segment_map_sg_merge(q, &bvec, &bvprv, sg)) - goto next_bvec; - - if (bvec.bv_offset + bvec.bv_len <= PAGE_SIZE) - nsegs += __blk_bvec_map_sg(bvec, sglist, sg); - else - nsegs += blk_bvec_map_sg(q, &bvec, sglist, sg); - next_bvec: - new_bio = false; - } - if (likely(bio->bi_iter.bi_size)) { - bvprv = bvec; - new_bio = true; - } - } + if (!*sg) + return sglist; - return nsegs; + /* + * If the driver previously mapped a shorter list, we could see a + * termination bit prematurely unless it fully inits the sg table + * on each mapping. We KNOW that there must be more entries here + * or the driver would be buggy, so force clear the termination bit + * to avoid doing a full sg_init_table() in drivers for each command. + */ + sg_unmark_end(*sg); + return sg_next(*sg); } /* - * map a request to scatterlist, return number of sg entries setup. Caller - * must make sure sg can hold rq->nr_phys_segments entries + * Map a request to scatterlist, return number of sg entries setup. Caller + * must make sure sg can hold rq->nr_phys_segments entries. */ int __blk_rq_map_sg(struct request_queue *q, struct request *rq, struct scatterlist *sglist, struct scatterlist **last_sg) { + struct req_iterator iter = { + .bio = rq->bio, + .iter = rq->bio->bi_iter, + }; + struct phys_vec vec; int nsegs = 0; - if (rq->rq_flags & RQF_SPECIAL_PAYLOAD) - nsegs = __blk_bvec_map_sg(rq->special_vec, sglist, last_sg); - else if (rq->bio) - nsegs = __blk_bios_map_sg(q, rq->bio, sglist, last_sg); + while (blk_map_iter_next(rq, &iter, &vec)) { + *last_sg = blk_next_sg(last_sg, sglist); + sg_set_page(*last_sg, phys_to_page(vec.paddr), vec.len, + offset_in_page(vec.paddr)); + nsegs++; + } if (*last_sg) sg_mark_end(*last_sg); diff --git a/block/blk-mq-cpumap.c b/block/blk-mq-cpumap.c index 9638b25fd521..ad8d6a363f24 100644 --- a/block/blk-mq-cpumap.c +++ b/block/blk-mq-cpumap.c @@ -11,6 +11,7 @@ #include <linux/smp.h> #include <linux/cpu.h> #include <linux/group_cpus.h> +#include <linux/device/bus.h> #include "blk.h" #include "blk-mq.h" @@ -54,3 +55,39 @@ int blk_mq_hw_queue_to_node(struct blk_mq_queue_map *qmap, unsigned int index) return NUMA_NO_NODE; } + +/** + * blk_mq_map_hw_queues - Create CPU to hardware queue mapping + * @qmap: CPU to hardware queue map + * @dev: The device to map queues + * @offset: Queue offset to use for the device + * + * Create a CPU to hardware queue mapping in @qmap. The struct bus_type + * irq_get_affinity callback will be used to retrieve the affinity. + */ +void blk_mq_map_hw_queues(struct blk_mq_queue_map *qmap, + struct device *dev, unsigned int offset) + +{ + const struct cpumask *mask; + unsigned int queue, cpu; + + if (!dev->bus->irq_get_affinity) + goto fallback; + + for (queue = 0; queue < qmap->nr_queues; queue++) { + mask = dev->bus->irq_get_affinity(dev, queue + offset); + if (!mask) + goto fallback; + + for_each_cpu(cpu, mask) + qmap->mq_map[cpu] = qmap->queue_offset + queue; + } + + return; + +fallback: + WARN_ON_ONCE(qmap->nr_queues > 1); + blk_mq_clear_mq_map(qmap); +} +EXPORT_SYMBOL_GPL(blk_mq_map_hw_queues); diff --git a/block/blk-mq-debugfs.c b/block/blk-mq-debugfs.c index 5463697a8442..adf5f0697b6b 100644 --- a/block/blk-mq-debugfs.c +++ b/block/blk-mq-debugfs.c @@ -172,21 +172,13 @@ static int hctx_state_show(void *data, struct seq_file *m) return 0; } -#define BLK_TAG_ALLOC_NAME(name) [BLK_TAG_ALLOC_##name] = #name -static const char *const alloc_policy_name[] = { - BLK_TAG_ALLOC_NAME(FIFO), - BLK_TAG_ALLOC_NAME(RR), -}; -#undef BLK_TAG_ALLOC_NAME - #define HCTX_FLAG_NAME(name) [ilog2(BLK_MQ_F_##name)] = #name static const char *const hctx_flag_name[] = { - HCTX_FLAG_NAME(SHOULD_MERGE), HCTX_FLAG_NAME(TAG_QUEUE_SHARED), HCTX_FLAG_NAME(STACKING), HCTX_FLAG_NAME(TAG_HCTX_SHARED), HCTX_FLAG_NAME(BLOCKING), - HCTX_FLAG_NAME(NO_SCHED), + HCTX_FLAG_NAME(TAG_RR), HCTX_FLAG_NAME(NO_SCHED_BY_DEFAULT), }; #undef HCTX_FLAG_NAME @@ -194,22 +186,11 @@ static const char *const hctx_flag_name[] = { static int hctx_flags_show(void *data, struct seq_file *m) { struct blk_mq_hw_ctx *hctx = data; - const int alloc_policy = BLK_MQ_FLAG_TO_ALLOC_POLICY(hctx->flags); - BUILD_BUG_ON(ARRAY_SIZE(hctx_flag_name) != - BLK_MQ_F_ALLOC_POLICY_START_BIT); - BUILD_BUG_ON(ARRAY_SIZE(alloc_policy_name) != BLK_TAG_ALLOC_MAX); + BUILD_BUG_ON(ARRAY_SIZE(hctx_flag_name) != ilog2(BLK_MQ_F_MAX)); - seq_puts(m, "alloc_policy="); - if (alloc_policy < ARRAY_SIZE(alloc_policy_name) && - alloc_policy_name[alloc_policy]) - seq_puts(m, alloc_policy_name[alloc_policy]); - else - seq_printf(m, "%d", alloc_policy); - seq_puts(m, " "); - blk_flags_show(m, - hctx->flags ^ BLK_ALLOC_POLICY_TO_MQ_FLAG(alloc_policy), - hctx_flag_name, ARRAY_SIZE(hctx_flag_name)); + blk_flags_show(m, hctx->flags, hctx_flag_name, + ARRAY_SIZE(hctx_flag_name)); seq_puts(m, "\n"); return 0; } diff --git a/block/blk-mq-pci.c b/block/blk-mq-pci.c deleted file mode 100644 index d47b5c73c9eb..000000000000 --- a/block/blk-mq-pci.c +++ /dev/null @@ -1,46 +0,0 @@ -// SPDX-License-Identifier: GPL-2.0 -/* - * Copyright (c) 2016 Christoph Hellwig. - */ -#include <linux/kobject.h> -#include <linux/blkdev.h> -#include <linux/blk-mq-pci.h> -#include <linux/pci.h> -#include <linux/module.h> - -#include "blk-mq.h" - -/** - * blk_mq_pci_map_queues - provide a default queue mapping for PCI device - * @qmap: CPU to hardware queue map. - * @pdev: PCI device associated with @set. - * @offset: Offset to use for the pci irq vector - * - * This function assumes the PCI device @pdev has at least as many available - * interrupt vectors as @set has queues. It will then query the vector - * corresponding to each queue for it's affinity mask and built queue mapping - * that maps a queue to the CPUs that have irq affinity for the corresponding - * vector. - */ -void blk_mq_pci_map_queues(struct blk_mq_queue_map *qmap, struct pci_dev *pdev, - int offset) -{ - const struct cpumask *mask; - unsigned int queue, cpu; - - for (queue = 0; queue < qmap->nr_queues; queue++) { - mask = pci_irq_get_affinity(pdev, queue + offset); - if (!mask) - goto fallback; - - for_each_cpu(cpu, mask) - qmap->mq_map[cpu] = qmap->queue_offset + queue; - } - - return; - -fallback: - WARN_ON_ONCE(qmap->nr_queues > 1); - blk_mq_clear_mq_map(qmap); -} -EXPORT_SYMBOL_GPL(blk_mq_pci_map_queues); diff --git a/block/blk-mq-sched.c b/block/blk-mq-sched.c index 451a2c1f1f32..7442ca27c2bf 100644 --- a/block/blk-mq-sched.c +++ b/block/blk-mq-sched.c @@ -351,8 +351,7 @@ bool blk_mq_sched_bio_merge(struct request_queue *q, struct bio *bio, ctx = blk_mq_get_ctx(q); hctx = blk_mq_map_queue(q, bio->bi_opf, ctx); type = hctx->type; - if (!(hctx->flags & BLK_MQ_F_SHOULD_MERGE) || - list_empty_careful(&ctx->rq_lists[type])) + if (list_empty_careful(&ctx->rq_lists[type])) goto out_put; /* default per sw-queue merge */ diff --git a/block/blk-mq-tag.c b/block/blk-mq-tag.c index 2cafcf11ee8b..b9f417d980b4 100644 --- a/block/blk-mq-tag.c +++ b/block/blk-mq-tag.c @@ -544,30 +544,11 @@ static int bt_alloc(struct sbitmap_queue *bt, unsigned int depth, node); } -int blk_mq_init_bitmaps(struct sbitmap_queue *bitmap_tags, - struct sbitmap_queue *breserved_tags, - unsigned int queue_depth, unsigned int reserved, - int node, int alloc_policy) -{ - unsigned int depth = queue_depth - reserved; - bool round_robin = alloc_policy == BLK_TAG_ALLOC_RR; - - if (bt_alloc(bitmap_tags, depth, round_robin, node)) - return -ENOMEM; - if (bt_alloc(breserved_tags, reserved, round_robin, node)) - goto free_bitmap_tags; - - return 0; - -free_bitmap_tags: - sbitmap_queue_free(bitmap_tags); - return -ENOMEM; -} - struct blk_mq_tags *blk_mq_init_tags(unsigned int total_tags, - unsigned int reserved_tags, - int node, int alloc_policy) + unsigned int reserved_tags, unsigned int flags, int node) { + unsigned int depth = total_tags - reserved_tags; + bool round_robin = flags & BLK_MQ_F_TAG_RR; struct blk_mq_tags *tags; if (total_tags > BLK_MQ_TAG_MAX) { @@ -582,14 +563,18 @@ struct blk_mq_tags *blk_mq_init_tags(unsigned int total_tags, tags->nr_tags = total_tags; tags->nr_reserved_tags = reserved_tags; spin_lock_init(&tags->lock); + if (bt_alloc(&tags->bitmap_tags, depth, round_robin, node)) + goto out_free_tags; + if (bt_alloc(&tags->breserved_tags, reserved_tags, round_robin, node)) + goto out_free_bitmap_tags; - if (blk_mq_init_bitmaps(&tags->bitmap_tags, &tags->breserved_tags, - total_tags, reserved_tags, node, - alloc_policy) < 0) { - kfree(tags); - return NULL; - } return tags; + +out_free_bitmap_tags: + sbitmap_queue_free(&tags->bitmap_tags); +out_free_tags: + kfree(tags); + return NULL; } void blk_mq_free_tags(struct blk_mq_tags *tags) diff --git a/block/blk-mq-virtio.c b/block/blk-mq-virtio.c deleted file mode 100644 index 68d0945c0b08..000000000000 --- a/block/blk-mq-virtio.c +++ /dev/null @@ -1,46 +0,0 @@ -// SPDX-License-Identifier: GPL-2.0 -/* - * Copyright (c) 2016 Christoph Hellwig. - */ -#include <linux/device.h> -#include <linux/blk-mq-virtio.h> -#include <linux/virtio_config.h> -#include <linux/module.h> -#include "blk-mq.h" - -/** - * blk_mq_virtio_map_queues - provide a default queue mapping for virtio device - * @qmap: CPU to hardware queue map. - * @vdev: virtio device to provide a mapping for. - * @first_vec: first interrupt vectors to use for queues (usually 0) - * - * This function assumes the virtio device @vdev has at least as many available - * interrupt vectors as @set has queues. It will then query the vector - * corresponding to each queue for it's affinity mask and built queue mapping - * that maps a queue to the CPUs that have irq affinity for the corresponding - * vector. - */ -void blk_mq_virtio_map_queues(struct blk_mq_queue_map *qmap, - struct virtio_device *vdev, int first_vec) -{ - const struct cpumask *mask; - unsigned int queue, cpu; - - if (!vdev->config->get_vq_affinity) - goto fallback; - - for (queue = 0; queue < qmap->nr_queues; queue++) { - mask = vdev->config->get_vq_affinity(vdev, first_vec + queue); - if (!mask) - goto fallback; - - for_each_cpu(cpu, mask) - qmap->mq_map[cpu] = qmap->queue_offset + queue; - } - - return; - -fallback: - blk_mq_map_queues(qmap); -} -EXPORT_SYMBOL_GPL(blk_mq_virtio_map_queues); diff --git a/block/blk-mq.c b/block/blk-mq.c index 8ac19d4ae3c0..da39a1cac702 100644 --- a/block/blk-mq.c +++ b/block/blk-mq.c @@ -131,6 +131,10 @@ static bool blk_freeze_set_owner(struct request_queue *q, if (!q->mq_freeze_depth) { q->mq_freeze_owner = owner; q->mq_freeze_owner_depth = 1; + q->mq_freeze_disk_dead = !q->disk || + test_bit(GD_DEAD, &q->disk->state) || + !blk_queue_registered(q); + q->mq_freeze_queue_dying = blk_queue_dying(q); return true; } @@ -142,8 +146,6 @@ static bool blk_freeze_set_owner(struct request_queue *q, /* verify the last unfreeze in owner context */ static bool blk_unfreeze_check_owner(struct request_queue *q) { - if (!q->mq_freeze_owner) - return false; if (q->mq_freeze_owner != current) return false; if (--q->mq_freeze_owner_depth == 0) { @@ -189,7 +191,7 @@ bool __blk_freeze_queue_start(struct request_queue *q, void blk_freeze_queue_start(struct request_queue *q) { if (__blk_freeze_queue_start(q, current)) - blk_freeze_acquire_lock(q, false, false); + blk_freeze_acquire_lock(q); } EXPORT_SYMBOL_GPL(blk_freeze_queue_start); @@ -237,7 +239,7 @@ bool __blk_mq_unfreeze_queue(struct request_queue *q, bool force_atomic) void blk_mq_unfreeze_queue(struct request_queue *q) { if (__blk_mq_unfreeze_queue(q, false)) - blk_unfreeze_release_lock(q, false, false); + blk_unfreeze_release_lock(q); } EXPORT_SYMBOL_GPL(blk_mq_unfreeze_queue); @@ -2656,8 +2658,10 @@ static void blk_mq_bio_to_request(struct request *rq, struct bio *bio, if (bio->bi_opf & REQ_RAHEAD) rq->cmd_flags |= REQ_FAILFAST_MASK; + rq->bio = rq->biotail = bio; rq->__sector = bio->bi_iter.bi_sector; - blk_rq_bio_prep(rq, bio, nr_segs); + rq->__data_len = bio->bi_iter.bi_size; + rq->nr_phys_segments = nr_segs; if (bio_integrity(bio)) rq->nr_integrity_segments = blk_rq_count_integrity_sg(rq->q, bio); @@ -2980,12 +2984,9 @@ static struct request *blk_mq_get_new_requests(struct request_queue *q, } rq = __blk_mq_alloc_requests(&data); - if (rq) - return rq; - rq_qos_cleanup(q, bio); - if (bio->bi_opf & REQ_NOWAIT) - bio_wouldblock_error(bio); - return NULL; + if (unlikely(!rq)) + rq_qos_cleanup(q, bio); + return rq; } /* @@ -3092,14 +3093,21 @@ void blk_mq_submit_bio(struct bio *bio) } /* - * Device reconfiguration may change logical block size, so alignment - * check has to be done with queue usage counter held + * Device reconfiguration may change logical block size or reduce the + * number of poll queues, so the checks for alignment and poll support + * have to be done with queue usage counter held. */ if (unlikely(bio_unaligned(bio, q))) { bio_io_error(bio); goto queue_exit; } + if ((bio->bi_opf & REQ_POLLED) && !blk_mq_can_poll(q)) { + bio->bi_status = BLK_STS_NOTSUPP; + bio_endio(bio); + goto queue_exit; + } + bio = __bio_split_to_limits(bio, &q->limits, &nr_segs); if (!bio) goto queue_exit; @@ -3114,12 +3122,15 @@ void blk_mq_submit_bio(struct bio *bio) goto queue_exit; new_request: - if (!rq) { + if (rq) { + blk_mq_use_cached_rq(rq, plug, bio); + } else { rq = blk_mq_get_new_requests(q, plug, bio, nr_segs); - if (unlikely(!rq)) + if (unlikely(!rq)) { + if (bio->bi_opf & REQ_NOWAIT) + bio_wouldblock_error(bio); goto queue_exit; - } else { - blk_mq_use_cached_rq(rq, plug, bio); + } } trace_block_getrq(bio); @@ -3472,8 +3483,7 @@ static struct blk_mq_tags *blk_mq_alloc_rq_map(struct blk_mq_tag_set *set, if (node == NUMA_NO_NODE) node = set->numa_node; - tags = blk_mq_init_tags(nr_tags, reserved_tags, node, - BLK_MQ_FLAG_TO_ALLOC_POLICY(set->flags)); + tags = blk_mq_init_tags(nr_tags, reserved_tags, set->flags, node); if (!tags) return NULL; @@ -4317,12 +4327,6 @@ void blk_mq_release(struct request_queue *q) blk_mq_sysfs_deinit(q); } -static bool blk_mq_can_poll(struct blk_mq_tag_set *set) -{ - return set->nr_maps > HCTX_TYPE_POLL && - set->map[HCTX_TYPE_POLL].nr_queues; -} - struct request_queue *blk_mq_alloc_queue(struct blk_mq_tag_set *set, struct queue_limits *lim, void *queuedata) { @@ -4333,7 +4337,7 @@ struct request_queue *blk_mq_alloc_queue(struct blk_mq_tag_set *set, if (!lim) lim = &default_lim; lim->features |= BLK_FEAT_IO_STAT | BLK_FEAT_NOWAIT; - if (blk_mq_can_poll(set)) + if (set->nr_maps > HCTX_TYPE_POLL) lim->features |= BLK_FEAT_POLL; q = blk_alloc_queue(lim, set->numa_node); @@ -5021,8 +5025,6 @@ static void __blk_mq_update_nr_hw_queues(struct blk_mq_tag_set *set, fallback: blk_mq_update_queue_map(set); list_for_each_entry(q, &set->tag_list, tag_set_list) { - struct queue_limits lim; - blk_mq_realloc_hw_ctxs(set, q); if (q->nr_hw_queues != set->nr_hw_queues) { @@ -5036,13 +5038,6 @@ fallback: set->nr_hw_queues = prev_nr_hw_queues; goto fallback; } - lim = queue_limits_start_update(q); - if (blk_mq_can_poll(set)) - lim.features |= BLK_FEAT_POLL; - else - lim.features &= ~BLK_FEAT_POLL; - if (queue_limits_commit_update(q, &lim) < 0) - pr_warn("updating the poll flag failed\n"); blk_mq_map_swqueue(q); } @@ -5102,9 +5097,9 @@ static int blk_hctx_poll(struct request_queue *q, struct blk_mq_hw_ctx *hctx, int blk_mq_poll(struct request_queue *q, blk_qc_t cookie, struct io_comp_batch *iob, unsigned int flags) { - struct blk_mq_hw_ctx *hctx = xa_load(&q->hctx_table, cookie); - - return blk_hctx_poll(q, hctx, iob, flags); + if (!blk_mq_can_poll(q)) + return 0; + return blk_hctx_poll(q, xa_load(&q->hctx_table, cookie), iob, flags); } int blk_rq_poll(struct request *rq, struct io_comp_batch *iob, diff --git a/block/blk-mq.h b/block/blk-mq.h index 89a20fffa4b1..44979e92b79f 100644 --- a/block/blk-mq.h +++ b/block/blk-mq.h @@ -163,11 +163,8 @@ struct blk_mq_alloc_data { }; struct blk_mq_tags *blk_mq_init_tags(unsigned int nr_tags, - unsigned int reserved_tags, int node, int alloc_policy); + unsigned int reserved_tags, unsigned int flags, int node); void blk_mq_free_tags(struct blk_mq_tags *tags); -int blk_mq_init_bitmaps(struct sbitmap_queue *bitmap_tags, - struct sbitmap_queue *breserved_tags, unsigned int queue_depth, - unsigned int reserved, int node, int alloc_policy); unsigned int blk_mq_get_tag(struct blk_mq_alloc_data *data); unsigned long blk_mq_get_tags(struct blk_mq_alloc_data *data, int nr_tags, @@ -451,4 +448,10 @@ do { \ #define blk_mq_run_dispatch_ops(q, dispatch_ops) \ __blk_mq_run_dispatch_ops(q, true, dispatch_ops) \ +static inline bool blk_mq_can_poll(struct request_queue *q) +{ + return (q->limits.features & BLK_FEAT_POLL) && + q->tag_set->map[HCTX_TYPE_POLL].nr_queues; +} + #endif diff --git a/block/blk-settings.c b/block/blk-settings.c index 8f09e33f41f6..db12396ff5c7 100644 --- a/block/blk-settings.c +++ b/block/blk-settings.c @@ -175,6 +175,9 @@ static void blk_validate_atomic_write_limits(struct queue_limits *lim) { unsigned int boundary_sectors; + if (!(lim->features & BLK_FEAT_ATOMIC_WRITES)) + goto unsupported; + if (!lim->atomic_write_hw_max) goto unsupported; @@ -413,7 +416,8 @@ int blk_set_default_limits(struct queue_limits *lim) * @lim: limits to apply * * Apply the limits in @lim that were obtained from queue_limits_start_update() - * and updated by the caller to @q. + * and updated by the caller to @q. The caller must have frozen the queue or + * ensure that there are no outstanding I/Os by other means. * * Returns 0 if successful, else a negative error code. */ @@ -444,6 +448,30 @@ out_unlock: EXPORT_SYMBOL_GPL(queue_limits_commit_update); /** + * queue_limits_commit_update_frozen - commit an atomic update of queue limits + * @q: queue to update + * @lim: limits to apply + * + * Apply the limits in @lim that were obtained from queue_limits_start_update() + * and updated with the new values by the caller to @q. Freezes the queue + * before the update and unfreezes it after. + * + * Returns 0 if successful, else a negative error code. + */ +int queue_limits_commit_update_frozen(struct request_queue *q, + struct queue_limits *lim) +{ + int ret; + + blk_mq_freeze_queue(q); + ret = queue_limits_commit_update(q, lim); + blk_mq_unfreeze_queue(q); + + return ret; +} +EXPORT_SYMBOL_GPL(queue_limits_commit_update_frozen); + +/** * queue_limits_set - apply queue limits to queue * @q: queue to update * @lim: limits to apply @@ -584,12 +612,15 @@ static bool blk_stack_atomic_writes_head(struct queue_limits *t, } static void blk_stack_atomic_writes_limits(struct queue_limits *t, - struct queue_limits *b) + struct queue_limits *b, sector_t start) { - if (!(t->features & BLK_FEAT_ATOMIC_WRITES_STACKED)) + if (!(b->features & BLK_FEAT_ATOMIC_WRITES)) + goto unsupported; + + if (!b->atomic_write_hw_unit_min) goto unsupported; - if (!b->atomic_write_unit_min) + if (!blk_atomic_write_start_sect_aligned(start, b)) goto unsupported; /* @@ -611,7 +642,6 @@ unsupported: t->atomic_write_hw_unit_max = 0; t->atomic_write_hw_unit_min = 0; t->atomic_write_hw_boundary = 0; - t->features &= ~BLK_FEAT_ATOMIC_WRITES_STACKED; } /** @@ -774,7 +804,7 @@ int blk_stack_limits(struct queue_limits *t, struct queue_limits *b, t->zone_write_granularity = 0; t->max_zone_append_sectors = 0; } - blk_stack_atomic_writes_limits(t, b); + blk_stack_atomic_writes_limits(t, b, start); return ret; } diff --git a/block/blk-sysfs.c b/block/blk-sysfs.c index 767598e719ab..e09b455874bf 100644 --- a/block/blk-sysfs.c +++ b/block/blk-sysfs.c @@ -24,6 +24,8 @@ struct queue_sysfs_entry { struct attribute attr; ssize_t (*show)(struct gendisk *disk, char *page); ssize_t (*store)(struct gendisk *disk, const char *page, size_t count); + int (*store_limit)(struct gendisk *disk, const char *page, + size_t count, struct queue_limits *lim); void (*load_module)(struct gendisk *disk, const char *page, size_t count); }; @@ -153,13 +155,11 @@ QUEUE_SYSFS_SHOW_CONST(discard_zeroes_data, 0) QUEUE_SYSFS_SHOW_CONST(write_same_max, 0) QUEUE_SYSFS_SHOW_CONST(poll_delay, -1) -static ssize_t queue_max_discard_sectors_store(struct gendisk *disk, - const char *page, size_t count) +static int queue_max_discard_sectors_store(struct gendisk *disk, + const char *page, size_t count, struct queue_limits *lim) { unsigned long max_discard_bytes; - struct queue_limits lim; ssize_t ret; - int err; ret = queue_var_store(&max_discard_bytes, page, count); if (ret < 0) @@ -171,38 +171,28 @@ static ssize_t queue_max_discard_sectors_store(struct gendisk *disk, if ((max_discard_bytes >> SECTOR_SHIFT) > UINT_MAX) return -EINVAL; - lim = queue_limits_start_update(disk->queue); - lim.max_user_discard_sectors = max_discard_bytes >> SECTOR_SHIFT; - err = queue_limits_commit_update(disk->queue, &lim); - if (err) - return err; - return ret; + lim->max_user_discard_sectors = max_discard_bytes >> SECTOR_SHIFT; + return 0; } -static ssize_t -queue_max_sectors_store(struct gendisk *disk, const char *page, size_t count) +static int +queue_max_sectors_store(struct gendisk *disk, const char *page, size_t count, + struct queue_limits *lim) { unsigned long max_sectors_kb; - struct queue_limits lim; ssize_t ret; - int err; ret = queue_var_store(&max_sectors_kb, page, count); if (ret < 0) return ret; - lim = queue_limits_start_update(disk->queue); - lim.max_user_sectors = max_sectors_kb << 1; - err = queue_limits_commit_update(disk->queue, &lim); - if (err) - return err; - return ret; + lim->max_user_sectors = max_sectors_kb << 1; + return 0; } static ssize_t queue_feature_store(struct gendisk *disk, const char *page, - size_t count, blk_features_t feature) + size_t count, struct queue_limits *lim, blk_features_t feature) { - struct queue_limits lim; unsigned long val; ssize_t ret; @@ -210,15 +200,11 @@ static ssize_t queue_feature_store(struct gendisk *disk, const char *page, if (ret < 0) return ret; - lim = queue_limits_start_update(disk->queue); if (val) - lim.features |= feature; + lim->features |= feature; else - lim.features &= ~feature; - ret = queue_limits_commit_update(disk->queue, &lim); - if (ret) - return ret; - return count; + lim->features &= ~feature; + return 0; } #define QUEUE_SYSFS_FEATURE(_name, _feature) \ @@ -227,10 +213,10 @@ static ssize_t queue_##_name##_show(struct gendisk *disk, char *page) \ return sysfs_emit(page, "%u\n", \ !!(disk->queue->limits.features & _feature)); \ } \ -static ssize_t queue_##_name##_store(struct gendisk *disk, \ - const char *page, size_t count) \ +static int queue_##_name##_store(struct gendisk *disk, \ + const char *page, size_t count, struct queue_limits *lim) \ { \ - return queue_feature_store(disk, page, count, _feature); \ + return queue_feature_store(disk, page, count, lim, _feature); \ } QUEUE_SYSFS_FEATURE(rotational, BLK_FEAT_ROTATIONAL) @@ -245,10 +231,17 @@ static ssize_t queue_##_name##_show(struct gendisk *disk, char *page) \ !!(disk->queue->limits.features & _feature)); \ } -QUEUE_SYSFS_FEATURE_SHOW(poll, BLK_FEAT_POLL); QUEUE_SYSFS_FEATURE_SHOW(fua, BLK_FEAT_FUA); QUEUE_SYSFS_FEATURE_SHOW(dax, BLK_FEAT_DAX); +static ssize_t queue_poll_show(struct gendisk *disk, char *page) +{ + if (queue_is_mq(disk->queue)) + return sysfs_emit(page, "%u\n", blk_mq_can_poll(disk->queue)); + return sysfs_emit(page, "%u\n", + !!(disk->queue->limits.features & BLK_FEAT_POLL)); +} + static ssize_t queue_zoned_show(struct gendisk *disk, char *page) { if (blk_queue_is_zoned(disk->queue)) @@ -266,10 +259,9 @@ static ssize_t queue_iostats_passthrough_show(struct gendisk *disk, char *page) return queue_var_show(!!blk_queue_passthrough_stat(disk->queue), page); } -static ssize_t queue_iostats_passthrough_store(struct gendisk *disk, - const char *page, size_t count) +static int queue_iostats_passthrough_store(struct gendisk *disk, + const char *page, size_t count, struct queue_limits *lim) { - struct queue_limits lim; unsigned long ios; ssize_t ret; @@ -277,18 +269,13 @@ static ssize_t queue_iostats_passthrough_store(struct gendisk *disk, if (ret < 0) return ret; - lim = queue_limits_start_update(disk->queue); if (ios) - lim.flags |= BLK_FLAG_IOSTATS_PASSTHROUGH; + lim->flags |= BLK_FLAG_IOSTATS_PASSTHROUGH; else - lim.flags &= ~BLK_FLAG_IOSTATS_PASSTHROUGH; - - ret = queue_limits_commit_update(disk->queue, &lim); - if (ret) - return ret; - - return count; + lim->flags &= ~BLK_FLAG_IOSTATS_PASSTHROUGH; + return 0; } + static ssize_t queue_nomerges_show(struct gendisk *disk, char *page) { return queue_var_show((blk_queue_nomerges(disk->queue) << 1) | @@ -391,12 +378,10 @@ static ssize_t queue_wc_show(struct gendisk *disk, char *page) return sysfs_emit(page, "write through\n"); } -static ssize_t queue_wc_store(struct gendisk *disk, const char *page, - size_t count) +static int queue_wc_store(struct gendisk *disk, const char *page, + size_t count, struct queue_limits *lim) { - struct queue_limits lim; bool disable; - int err; if (!strncmp(page, "write back", 10)) { disable = false; @@ -407,15 +392,11 @@ static ssize_t queue_wc_store(struct gendisk *disk, const char *page, return -EINVAL; } - lim = queue_limits_start_update(disk->queue); if (disable) - lim.flags |= BLK_FLAG_WRITE_CACHE_DISABLED; + lim->flags |= BLK_FLAG_WRITE_CACHE_DISABLED; else - lim.flags &= ~BLK_FLAG_WRITE_CACHE_DISABLED; - err = queue_limits_commit_update(disk->queue, &lim); - if (err) - return err; - return count; + lim->flags &= ~BLK_FLAG_WRITE_CACHE_DISABLED; + return 0; } #define QUEUE_RO_ENTRY(_prefix, _name) \ @@ -431,6 +412,13 @@ static struct queue_sysfs_entry _prefix##_entry = { \ .store = _prefix##_store, \ }; +#define QUEUE_LIM_RW_ENTRY(_prefix, _name) \ +static struct queue_sysfs_entry _prefix##_entry = { \ + .attr = { .name = _name, .mode = 0644 }, \ + .show = _prefix##_show, \ + .store_limit = _prefix##_store, \ +} + #define QUEUE_RW_LOAD_MODULE_ENTRY(_prefix, _name) \ static struct queue_sysfs_entry _prefix##_entry = { \ .attr = { .name = _name, .mode = 0644 }, \ @@ -441,7 +429,7 @@ static struct queue_sysfs_entry _prefix##_entry = { \ QUEUE_RW_ENTRY(queue_requests, "nr_requests"); QUEUE_RW_ENTRY(queue_ra, "read_ahead_kb"); -QUEUE_RW_ENTRY(queue_max_sectors, "max_sectors_kb"); +QUEUE_LIM_RW_ENTRY(queue_max_sectors, "max_sectors_kb"); QUEUE_RO_ENTRY(queue_max_hw_sectors, "max_hw_sectors_kb"); QUEUE_RO_ENTRY(queue_max_segments, "max_segments"); QUEUE_RO_ENTRY(queue_max_integrity_segments, "max_integrity_segments"); @@ -457,7 +445,7 @@ QUEUE_RO_ENTRY(queue_io_opt, "optimal_io_size"); QUEUE_RO_ENTRY(queue_max_discard_segments, "max_discard_segments"); QUEUE_RO_ENTRY(queue_discard_granularity, "discard_granularity"); QUEUE_RO_ENTRY(queue_max_hw_discard_sectors, "discard_max_hw_bytes"); -QUEUE_RW_ENTRY(queue_max_discard_sectors, "discard_max_bytes"); +QUEUE_LIM_RW_ENTRY(queue_max_discard_sectors, "discard_max_bytes"); QUEUE_RO_ENTRY(queue_discard_zeroes_data, "discard_zeroes_data"); QUEUE_RO_ENTRY(queue_atomic_write_max_sectors, "atomic_write_max_bytes"); @@ -477,11 +465,11 @@ QUEUE_RO_ENTRY(queue_max_open_zones, "max_open_zones"); QUEUE_RO_ENTRY(queue_max_active_zones, "max_active_zones"); QUEUE_RW_ENTRY(queue_nomerges, "nomerges"); -QUEUE_RW_ENTRY(queue_iostats_passthrough, "iostats_passthrough"); +QUEUE_LIM_RW_ENTRY(queue_iostats_passthrough, "iostats_passthrough"); QUEUE_RW_ENTRY(queue_rq_affinity, "rq_affinity"); QUEUE_RW_ENTRY(queue_poll, "io_poll"); QUEUE_RW_ENTRY(queue_poll_delay, "io_poll_delay"); -QUEUE_RW_ENTRY(queue_wc, "write_cache"); +QUEUE_LIM_RW_ENTRY(queue_wc, "write_cache"); QUEUE_RO_ENTRY(queue_fua, "fua"); QUEUE_RO_ENTRY(queue_dax, "dax"); QUEUE_RW_ENTRY(queue_io_timeout, "io_timeout"); @@ -494,10 +482,10 @@ static struct queue_sysfs_entry queue_hw_sector_size_entry = { .show = queue_logical_block_size_show, }; -QUEUE_RW_ENTRY(queue_rotational, "rotational"); -QUEUE_RW_ENTRY(queue_iostats, "iostats"); -QUEUE_RW_ENTRY(queue_add_random, "add_random"); -QUEUE_RW_ENTRY(queue_stable_writes, "stable_writes"); +QUEUE_LIM_RW_ENTRY(queue_rotational, "rotational"); +QUEUE_LIM_RW_ENTRY(queue_iostats, "iostats"); +QUEUE_LIM_RW_ENTRY(queue_add_random, "add_random"); +QUEUE_LIM_RW_ENTRY(queue_stable_writes, "stable_writes"); #ifdef CONFIG_BLK_WBT static ssize_t queue_var_store64(s64 *var, const char *page) @@ -693,9 +681,10 @@ queue_attr_store(struct kobject *kobj, struct attribute *attr, struct queue_sysfs_entry *entry = to_queue(attr); struct gendisk *disk = container_of(kobj, struct gendisk, queue_kobj); struct request_queue *q = disk->queue; + unsigned int noio_flag; ssize_t res; - if (!entry->store) + if (!entry->store_limit && !entry->store) return -EIO; /* @@ -706,11 +695,28 @@ queue_attr_store(struct kobject *kobj, struct attribute *attr, if (entry->load_module) entry->load_module(disk, page, length); - blk_mq_freeze_queue(q); + if (entry->store_limit) { + struct queue_limits lim = queue_limits_start_update(q); + + res = entry->store_limit(disk, page, length, &lim); + if (res < 0) { + queue_limits_cancel_update(q); + return res; + } + + res = queue_limits_commit_update_frozen(q, &lim); + if (res) + return res; + return length; + } + mutex_lock(&q->sysfs_lock); + blk_mq_freeze_queue(q); + noio_flag = memalloc_noio_save(); res = entry->store(disk, page, length); - mutex_unlock(&q->sysfs_lock); + memalloc_noio_restore(noio_flag); blk_mq_unfreeze_queue(q); + mutex_unlock(&q->sysfs_lock); return res; } diff --git a/block/blk-zoned.c b/block/blk-zoned.c index 84da1eadff64..9d08a54c201e 100644 --- a/block/blk-zoned.c +++ b/block/blk-zoned.c @@ -11,12 +11,8 @@ */ #include <linux/kernel.h> -#include <linux/module.h> #include <linux/blkdev.h> #include <linux/blk-mq.h> -#include <linux/mm.h> -#include <linux/vmalloc.h> -#include <linux/sched/mm.h> #include <linux/spinlock.h> #include <linux/refcount.h> #include <linux/mempool.h> @@ -463,6 +459,8 @@ static inline void disk_put_zone_wplug(struct blk_zone_wplug *zwplug) static inline bool disk_should_remove_zone_wplug(struct gendisk *disk, struct blk_zone_wplug *zwplug) { + lockdep_assert_held(&zwplug->lock); + /* If the zone write plug was already removed, we are done. */ if (zwplug->flags & BLK_ZONE_WPLUG_UNHASHED) return false; @@ -584,6 +582,7 @@ static inline void blk_zone_wplug_bio_io_error(struct blk_zone_wplug *zwplug, bio_clear_flag(bio, BIO_ZONE_WRITE_PLUGGING); bio_io_error(bio); disk_put_zone_wplug(zwplug); + /* Drop the reference taken by disk_zone_wplug_add_bio(() */ blk_queue_exit(q); } @@ -895,10 +894,7 @@ void blk_zone_write_plug_init_request(struct request *req) break; } - /* - * Drop the extra reference on the queue usage we got when - * plugging the BIO and advance the write pointer offset. - */ + /* Drop the reference taken by disk_zone_wplug_add_bio(). */ blk_queue_exit(q); zwplug->wp_offset += bio_sectors(bio); @@ -917,6 +913,8 @@ static bool blk_zone_wplug_prepare_bio(struct blk_zone_wplug *zwplug, { struct gendisk *disk = bio->bi_bdev->bd_disk; + lockdep_assert_held(&zwplug->lock); + /* * If we lost track of the zone write pointer due to a write error, * the user must either execute a report zones, reset the zone or finish @@ -1446,7 +1444,6 @@ static int disk_update_zone_resources(struct gendisk *disk, unsigned int nr_seq_zones, nr_conv_zones; unsigned int pool_size; struct queue_limits lim; - int ret; disk->nr_zones = args->nr_zones; disk->zone_capacity = args->zone_capacity; @@ -1497,11 +1494,7 @@ static int disk_update_zone_resources(struct gendisk *disk, } commit: - blk_mq_freeze_queue(q); - ret = queue_limits_commit_update(q, &lim); - blk_mq_unfreeze_queue(q); - - return ret; + return queue_limits_commit_update_frozen(q, &lim); } static int blk_revalidate_conv_zone(struct blk_zone *zone, unsigned int idx, @@ -1776,37 +1769,41 @@ int blk_zone_issue_zeroout(struct block_device *bdev, sector_t sector, EXPORT_SYMBOL_GPL(blk_zone_issue_zeroout); #ifdef CONFIG_BLK_DEBUG_FS +static void queue_zone_wplug_show(struct blk_zone_wplug *zwplug, + struct seq_file *m) +{ + unsigned int zwp_wp_offset, zwp_flags; + unsigned int zwp_zone_no, zwp_ref; + unsigned int zwp_bio_list_size; + unsigned long flags; + + spin_lock_irqsave(&zwplug->lock, flags); + zwp_zone_no = zwplug->zone_no; + zwp_flags = zwplug->flags; + zwp_ref = refcount_read(&zwplug->ref); + zwp_wp_offset = zwplug->wp_offset; + zwp_bio_list_size = bio_list_size(&zwplug->bio_list); + spin_unlock_irqrestore(&zwplug->lock, flags); + + seq_printf(m, "%u 0x%x %u %u %u\n", zwp_zone_no, zwp_flags, zwp_ref, + zwp_wp_offset, zwp_bio_list_size); +} int queue_zone_wplugs_show(void *data, struct seq_file *m) { struct request_queue *q = data; struct gendisk *disk = q->disk; struct blk_zone_wplug *zwplug; - unsigned int zwp_wp_offset, zwp_flags; - unsigned int zwp_zone_no, zwp_ref; - unsigned int zwp_bio_list_size, i; - unsigned long flags; + unsigned int i; if (!disk->zone_wplugs_hash) return 0; rcu_read_lock(); - for (i = 0; i < disk_zone_wplugs_hash_size(disk); i++) { - hlist_for_each_entry_rcu(zwplug, - &disk->zone_wplugs_hash[i], node) { - spin_lock_irqsave(&zwplug->lock, flags); - zwp_zone_no = zwplug->zone_no; - zwp_flags = zwplug->flags; - zwp_ref = refcount_read(&zwplug->ref); - zwp_wp_offset = zwplug->wp_offset; - zwp_bio_list_size = bio_list_size(&zwplug->bio_list); - spin_unlock_irqrestore(&zwplug->lock, flags); - - seq_printf(m, "%u 0x%x %u %u %u\n", - zwp_zone_no, zwp_flags, zwp_ref, - zwp_wp_offset, zwp_bio_list_size); - } - } + for (i = 0; i < disk_zone_wplugs_hash_size(disk); i++) + hlist_for_each_entry_rcu(zwplug, &disk->zone_wplugs_hash[i], + node) + queue_zone_wplug_show(zwplug, m); rcu_read_unlock(); return 0; diff --git a/block/blk.h b/block/blk.h index 2c26abf505b8..90fa5f28ccab 100644 --- a/block/blk.h +++ b/block/blk.h @@ -13,6 +13,8 @@ struct elevator_type; +#define BLK_DEV_MAX_SECTORS (LLONG_MAX >> 9) + /* Max future timer expiry for timeouts */ #define BLK_MAX_TIMEOUT (5 * HZ) @@ -556,14 +558,6 @@ void bdev_set_nr_sectors(struct block_device *bdev, sector_t sectors); struct gendisk *__alloc_disk_node(struct request_queue *q, int node_id, struct lock_class_key *lkclass); -int bio_add_hw_page(struct request_queue *q, struct bio *bio, - struct page *page, unsigned int len, unsigned int offset, - unsigned int max_sectors, bool *same_page); - -int bio_add_hw_folio(struct request_queue *q, struct bio *bio, - struct folio *folio, size_t len, size_t offset, - unsigned int max_sectors, bool *same_page); - /* * Clean up a page appropriately, where the page may be pinned, may have a * ref taken on it or neither. @@ -720,22 +714,29 @@ void blk_integrity_verify(struct bio *bio); void blk_integrity_prepare(struct request *rq); void blk_integrity_complete(struct request *rq, unsigned int nr_bytes); -static inline void blk_freeze_acquire_lock(struct request_queue *q, bool - disk_dead, bool queue_dying) +#ifdef CONFIG_LOCKDEP +static inline void blk_freeze_acquire_lock(struct request_queue *q) { - if (!disk_dead) + if (!q->mq_freeze_disk_dead) rwsem_acquire(&q->io_lockdep_map, 0, 1, _RET_IP_); - if (!queue_dying) + if (!q->mq_freeze_queue_dying) rwsem_acquire(&q->q_lockdep_map, 0, 1, _RET_IP_); } -static inline void blk_unfreeze_release_lock(struct request_queue *q, bool - disk_dead, bool queue_dying) +static inline void blk_unfreeze_release_lock(struct request_queue *q) { - if (!queue_dying) + if (!q->mq_freeze_queue_dying) rwsem_release(&q->q_lockdep_map, _RET_IP_); - if (!disk_dead) + if (!q->mq_freeze_disk_dead) rwsem_release(&q->io_lockdep_map, _RET_IP_); } +#else +static inline void blk_freeze_acquire_lock(struct request_queue *q) +{ +} +static inline void blk_unfreeze_release_lock(struct request_queue *q) +{ +} +#endif #endif /* BLK_INTERNAL_H */ diff --git a/block/bsg-lib.c b/block/bsg-lib.c index 32da4a4429ce..93523d8f8195 100644 --- a/block/bsg-lib.c +++ b/block/bsg-lib.c @@ -381,7 +381,7 @@ struct request_queue *bsg_setup_queue(struct device *dev, const char *name, set->queue_depth = 128; set->numa_node = NUMA_NO_NODE; set->cmd_size = sizeof(struct bsg_job) + dd_job_size; - set->flags = BLK_MQ_F_NO_SCHED | BLK_MQ_F_BLOCKING; + set->flags = BLK_MQ_F_BLOCKING; if (blk_mq_alloc_tag_set(set)) goto out_tag_set; diff --git a/block/elevator.c b/block/elevator.c index 7c3ba80e5ff4..b81216c48b6b 100644 --- a/block/elevator.c +++ b/block/elevator.c @@ -405,12 +405,12 @@ struct request *elv_former_request(struct request_queue *q, struct request *rq) return NULL; } -#define to_elv(atr) container_of((atr), struct elv_fs_entry, attr) +#define to_elv(atr) container_of_const((atr), struct elv_fs_entry, attr) static ssize_t elv_attr_show(struct kobject *kobj, struct attribute *attr, char *page) { - struct elv_fs_entry *entry = to_elv(attr); + const struct elv_fs_entry *entry = to_elv(attr); struct elevator_queue *e; ssize_t error; @@ -428,7 +428,7 @@ static ssize_t elv_attr_store(struct kobject *kobj, struct attribute *attr, const char *page, size_t length) { - struct elv_fs_entry *entry = to_elv(attr); + const struct elv_fs_entry *entry = to_elv(attr); struct elevator_queue *e; ssize_t error; @@ -461,7 +461,7 @@ int elv_register_queue(struct request_queue *q, bool uevent) error = kobject_add(&e->kobj, &q->disk->queue_kobj, "iosched"); if (!error) { - struct elv_fs_entry *attr = e->type->elevator_attrs; + const struct elv_fs_entry *attr = e->type->elevator_attrs; if (attr) { while (attr->attr.name) { if (sysfs_create_file(&e->kobj, &attr->attr)) @@ -547,14 +547,6 @@ void elv_unregister(struct elevator_type *e) } EXPORT_SYMBOL_GPL(elv_unregister); -static inline bool elv_support_iosched(struct request_queue *q) -{ - if (!queue_is_mq(q) || - (q->tag_set->flags & BLK_MQ_F_NO_SCHED)) - return false; - return true; -} - /* * For single queue devices, default to using mq-deadline. If we have multiple * queues or mq-deadline is not available, default to "none". @@ -580,9 +572,6 @@ void elevator_init_mq(struct request_queue *q) struct elevator_type *e; int err; - if (!elv_support_iosched(q)) - return; - WARN_ON_ONCE(blk_queue_registered(q)); if (unlikely(q->elevator)) @@ -601,16 +590,13 @@ void elevator_init_mq(struct request_queue *q) * * Disk isn't added yet, so verifying queue lock only manually. */ - blk_freeze_queue_start_non_owner(q); - blk_freeze_acquire_lock(q, true, false); - blk_mq_freeze_queue_wait(q); + blk_mq_freeze_queue(q); blk_mq_cancel_work_sync(q); err = blk_mq_init_sched(q, e); - blk_unfreeze_release_lock(q, true, false); - blk_mq_unfreeze_queue_non_owner(q); + blk_mq_unfreeze_queue(q); if (err) { pr_warn("\"%s\" elevator initialization failed, " @@ -717,9 +703,6 @@ void elv_iosched_load_module(struct gendisk *disk, const char *buf, struct elevator_type *found; const char *name; - if (!elv_support_iosched(disk->queue)) - return; - strscpy(elevator_name, buf, sizeof(elevator_name)); name = strstrip(elevator_name); @@ -737,9 +720,6 @@ ssize_t elv_iosched_store(struct gendisk *disk, const char *buf, char elevator_name[ELV_NAME_MAX]; int ret; - if (!elv_support_iosched(disk->queue)) - return count; - strscpy(elevator_name, buf, sizeof(elevator_name)); ret = elevator_change(disk->queue, strstrip(elevator_name)); if (!ret) @@ -754,9 +734,6 @@ ssize_t elv_iosched_show(struct gendisk *disk, char *name) struct elevator_type *cur = NULL, *e; int len = 0; - if (!elv_support_iosched(q)) - return sprintf(name, "none\n"); - if (!q->elevator) { len += sprintf(name+len, "[none] "); } else { diff --git a/block/elevator.h b/block/elevator.h index dbf357ef4fab..e526662c5dbb 100644 --- a/block/elevator.h +++ b/block/elevator.h @@ -71,7 +71,7 @@ struct elevator_type size_t icq_size; /* see iocontext.h */ size_t icq_align; /* ditto */ - struct elv_fs_entry *elevator_attrs; + const struct elv_fs_entry *elevator_attrs; const char *elevator_name; const char *elevator_alias; struct module *elevator_owner; diff --git a/block/genhd.c b/block/genhd.c index 79230c109fca..e9375e20d866 100644 --- a/block/genhd.c +++ b/block/genhd.c @@ -58,6 +58,13 @@ static DEFINE_IDA(ext_devt_ida); void set_capacity(struct gendisk *disk, sector_t sectors) { + if (sectors > BLK_DEV_MAX_SECTORS) { + pr_warn_once("%s: truncate capacity from %lld to %lld\n", + disk->disk_name, sectors, + BLK_DEV_MAX_SECTORS); + sectors = BLK_DEV_MAX_SECTORS; + } + bdev_set_nr_sectors(disk->part0, sectors); } EXPORT_SYMBOL(set_capacity); @@ -400,21 +407,26 @@ int __must_check add_disk_fwnode(struct device *parent, struct gendisk *disk, struct device *ddev = disk_to_dev(disk); int ret; - /* Only makes sense for bio-based to set ->poll_bio */ - if (queue_is_mq(disk->queue) && disk->fops->poll_bio) + if (WARN_ON_ONCE(bdev_nr_sectors(disk->part0) > BLK_DEV_MAX_SECTORS)) return -EINVAL; - /* - * The disk queue should now be all set with enough information about - * the device for the elevator code to pick an adequate default - * elevator if one is needed, that is, for devices requesting queue - * registration. - */ - elevator_init_mq(disk->queue); + if (queue_is_mq(disk->queue)) { + /* + * ->submit_bio and ->poll_bio are bypassed for blk-mq drivers. + */ + if (disk->fops->submit_bio || disk->fops->poll_bio) + return -EINVAL; - /* Mark bdev as having a submit_bio, if needed */ - if (disk->fops->submit_bio) + /* + * Initialize the I/O scheduler code and pick a default one if + * needed. + */ + elevator_init_mq(disk->queue); + } else { + if (!disk->fops->submit_bio) + return -EINVAL; bdev_set_flag(disk->part0, BD_HAS_SUBMIT_BIO); + } /* * If the driver provides an explicit major number it also must provide @@ -661,7 +673,7 @@ void del_gendisk(struct gendisk *disk) struct request_queue *q = disk->queue; struct block_device *part; unsigned long idx; - bool start_drain, queue_dying; + bool start_drain; might_sleep(); @@ -690,9 +702,8 @@ void del_gendisk(struct gendisk *disk) */ mutex_lock(&disk->open_mutex); start_drain = __blk_mark_disk_dead(disk); - queue_dying = blk_queue_dying(q); if (start_drain) - blk_freeze_acquire_lock(q, true, queue_dying); + blk_freeze_acquire_lock(q); xa_for_each_start(&disk->part_tbl, idx, part, 1) drop_partition(part); mutex_unlock(&disk->open_mutex); @@ -748,7 +759,7 @@ void del_gendisk(struct gendisk *disk) blk_mq_exit_queue(q); if (start_drain) - blk_unfreeze_release_lock(q, true, queue_dying); + blk_unfreeze_release_lock(q); } EXPORT_SYMBOL(del_gendisk); @@ -798,7 +809,7 @@ static ssize_t disk_badblocks_store(struct device *dev, } #ifdef CONFIG_BLOCK_LEGACY_AUTOLOAD -void blk_request_module(dev_t devt) +static bool blk_probe_dev(dev_t devt) { unsigned int major = MAJOR(devt); struct blk_major_name **n; @@ -808,14 +819,26 @@ void blk_request_module(dev_t devt) if ((*n)->major == major && (*n)->probe) { (*n)->probe(devt); mutex_unlock(&major_names_lock); - return; + return true; } } mutex_unlock(&major_names_lock); + return false; +} + +void blk_request_module(dev_t devt) +{ + int error; + + if (blk_probe_dev(devt)) + return; - if (request_module("block-major-%d-%d", MAJOR(devt), MINOR(devt)) > 0) - /* Make old-style 2.4 aliases work */ - request_module("block-major-%d", MAJOR(devt)); + error = request_module("block-major-%d-%d", MAJOR(devt), MINOR(devt)); + /* Make old-style 2.4 aliases work */ + if (error > 0) + error = request_module("block-major-%d", MAJOR(devt)); + if (!error) + blk_probe_dev(devt); } #endif /* CONFIG_BLOCK_LEGACY_AUTOLOAD */ diff --git a/block/kyber-iosched.c b/block/kyber-iosched.c index 4155594aefc6..dc31f2dfa414 100644 --- a/block/kyber-iosched.c +++ b/block/kyber-iosched.c @@ -889,7 +889,7 @@ KYBER_LAT_SHOW_STORE(KYBER_WRITE, write); #undef KYBER_LAT_SHOW_STORE #define KYBER_LAT_ATTR(op) __ATTR(op##_lat_nsec, 0644, kyber_##op##_lat_show, kyber_##op##_lat_store) -static struct elv_fs_entry kyber_sched_attrs[] = { +static const struct elv_fs_entry kyber_sched_attrs[] = { KYBER_LAT_ATTR(read), KYBER_LAT_ATTR(write), __ATTR_NULL diff --git a/block/mq-deadline.c b/block/mq-deadline.c index 5528347b5fcf..754f6b7415cd 100644 --- a/block/mq-deadline.c +++ b/block/mq-deadline.c @@ -834,7 +834,7 @@ STORE_INT(deadline_fifo_batch_store, &dd->fifo_batch, 0, INT_MAX); #define DD_ATTR(name) \ __ATTR(name, 0644, deadline_##name##_show, deadline_##name##_store) -static struct elv_fs_entry deadline_attrs[] = { +static const struct elv_fs_entry deadline_attrs[] = { DD_ATTR(read_expire), DD_ATTR(write_expire), DD_ATTR(writes_starved), diff --git a/block/partitions/ldm.h b/block/partitions/ldm.h index e259180c8914..aa3bd050d8cd 100644 --- a/block/partitions/ldm.h +++ b/block/partitions/ldm.h @@ -1,5 +1,5 @@ // SPDX-License-Identifier: GPL-2.0-or-later -/** +/* * ldm - Part of the Linux-NTFS project. * * Copyright (C) 2001,2002 Richard Russon <ldm@flatcap.org> |