summaryrefslogtreecommitdiffstats
path: root/block
diff options
context:
space:
mode:
authorLinus Torvalds <torvalds@linux-foundation.org>2025-01-20 19:38:46 -0800
committerLinus Torvalds <torvalds@linux-foundation.org>2025-01-20 19:38:46 -0800
commit1cbfb828e05171ca2dd77b5988d068e6872480fe (patch)
treebfb33c9ad8840908058649ba2e261bdb7e5f7ee9 /block
parent3d3a9c8b89d4f8a3785e06ffd15405c670696f02 (diff)
parent554b22864cc79e28cd65e3a6e1d0d1dfa8581c68 (diff)
downloadlinux-stable-1cbfb828e05171ca2dd77b5988d068e6872480fe.tar.gz
linux-stable-1cbfb828e05171ca2dd77b5988d068e6872480fe.tar.bz2
linux-stable-1cbfb828e05171ca2dd77b5988d068e6872480fe.zip
Merge tag 'for-6.14/block-20250118' of git://git.kernel.dk/linux
Pull block updates from Jens Axboe: - NVMe pull requests via Keith: - Target support for PCI-Endpoint transport (Damien) - TCP IO queue spreading fixes (Sagi, Chaitanya) - Target handling for "limited retry" flags (Guixen) - Poll type fix (Yongsoo) - Xarray storage error handling (Keisuke) - Host memory buffer free size fix on error (Francis) - MD pull requests via Song: - Reintroduce md-linear (Yu Kuai) - md-bitmap refactor and fix (Yu Kuai) - Replace kmap_atomic with kmap_local_page (David Reaver) - Quite a few queue freeze and debugfs deadlock fixes Ming introduced lockdep support for this in the 6.13 kernel, and it has (unsurprisingly) uncovered quite a few issues - Use const attributes for IO schedulers - Remove bio ioprio wrappers - Fixes for stacked device atomic write support - Refactor queue affinity helpers, in preparation for better supporting isolated CPUs - Cleanups of loop O_DIRECT handling - Cleanup of BLK_MQ_F_* flags - Add rotational support for null_blk - Various fixes and cleanups * tag 'for-6.14/block-20250118' of git://git.kernel.dk/linux: (106 commits) block: Don't trim an atomic write block: Add common atomic writes enable flag md/md-linear: Fix a NULL vs IS_ERR() bug in linear_add() block: limit disk max sectors to (LLONG_MAX >> 9) block: Change blk_stack_atomic_writes_limits() unit_min check block: Ensure start sector is aligned for stacking atomic writes blk-mq: Move more error handling into blk_mq_submit_bio() block: Reorder the request allocation code in blk_mq_submit_bio() nvme: fix bogus kzalloc() return check in nvme_init_effects_log() md/md-bitmap: move bitmap_{start, end}write to md upper layer md/raid5: implement pers->bitmap_sector() md: add a new callback pers->bitmap_sector() md/md-bitmap: remove the last parameter for bimtap_ops->endwrite() md/md-bitmap: factor behind write counters out from bitmap_{start/end}write() md: Replace deprecated kmap_atomic() with kmap_local_page() md: reintroduce md-linear partitions: ldm: remove the initial kernel-doc notation blk-cgroup: rwstat: fix kernel-doc warnings in header file blk-cgroup: fix kernel-doc warnings in header file nbd: fix partial sending ...
Diffstat (limited to 'block')
-rw-r--r--block/Makefile2
-rw-r--r--block/bfq-iosched.c2
-rw-r--r--block/bio.c111
-rw-r--r--block/blk-cgroup-rwstat.h5
-rw-r--r--block/blk-cgroup.h10
-rw-r--r--block/blk-core.c21
-rw-r--r--block/blk-integrity.c4
-rw-r--r--block/blk-map.c128
-rw-r--r--block/blk-merge.c177
-rw-r--r--block/blk-mq-cpumap.c37
-rw-r--r--block/blk-mq-debugfs.c27
-rw-r--r--block/blk-mq-pci.c46
-rw-r--r--block/blk-mq-sched.c3
-rw-r--r--block/blk-mq-tag.c41
-rw-r--r--block/blk-mq-virtio.c46
-rw-r--r--block/blk-mq.c71
-rw-r--r--block/blk-mq.h11
-rw-r--r--block/blk-settings.c42
-rw-r--r--block/blk-sysfs.c140
-rw-r--r--block/blk-zoned.c65
-rw-r--r--block/blk.h33
-rw-r--r--block/bsg-lib.c2
-rw-r--r--block/elevator.c35
-rw-r--r--block/elevator.h2
-rw-r--r--block/genhd.c63
-rw-r--r--block/kyber-iosched.c2
-rw-r--r--block/mq-deadline.c2
-rw-r--r--block/partitions/ldm.h2
28 files changed, 442 insertions, 688 deletions
diff --git a/block/Makefile b/block/Makefile
index ddfd21c1a9ff..33748123710b 100644
--- a/block/Makefile
+++ b/block/Makefile
@@ -27,8 +27,6 @@ bfq-y := bfq-iosched.o bfq-wf2q.o bfq-cgroup.o
obj-$(CONFIG_IOSCHED_BFQ) += bfq.o
obj-$(CONFIG_BLK_DEV_INTEGRITY) += bio-integrity.o blk-integrity.o t10-pi.o
-obj-$(CONFIG_BLK_MQ_PCI) += blk-mq-pci.o
-obj-$(CONFIG_BLK_MQ_VIRTIO) += blk-mq-virtio.o
obj-$(CONFIG_BLK_DEV_ZONED) += blk-zoned.o
obj-$(CONFIG_BLK_WBT) += blk-wbt.o
obj-$(CONFIG_BLK_DEBUG_FS) += blk-mq-debugfs.o
diff --git a/block/bfq-iosched.c b/block/bfq-iosched.c
index cad16c163611..167542201603 100644
--- a/block/bfq-iosched.c
+++ b/block/bfq-iosched.c
@@ -7622,7 +7622,7 @@ static ssize_t bfq_low_latency_store(struct elevator_queue *e,
#define BFQ_ATTR(name) \
__ATTR(name, 0644, bfq_##name##_show, bfq_##name##_store)
-static struct elv_fs_entry bfq_attrs[] = {
+static const struct elv_fs_entry bfq_attrs[] = {
BFQ_ATTR(fifo_expire_sync),
BFQ_ATTR(fifo_expire_async),
BFQ_ATTR(back_seek_max),
diff --git a/block/bio.c b/block/bio.c
index d5bdc31d88d3..f0c416e5931d 100644
--- a/block/bio.c
+++ b/block/bio.c
@@ -946,8 +946,11 @@ static bool bvec_try_merge_page(struct bio_vec *bv, struct page *page,
/*
* Try to merge a page into a segment, while obeying the hardware segment
- * size limit. This is not for normal read/write bios, but for passthrough
- * or Zone Append operations that we can't split.
+ * size limit.
+ *
+ * This is kept around for the integrity metadata, which is still tries
+ * to build the initial bio to the hardware limit and doesn't have proper
+ * helpers to split. Hopefully this will go away soon.
*/
bool bvec_try_merge_hw_page(struct request_queue *q, struct bio_vec *bv,
struct page *page, unsigned len, unsigned offset,
@@ -965,106 +968,6 @@ bool bvec_try_merge_hw_page(struct request_queue *q, struct bio_vec *bv,
}
/**
- * bio_add_hw_page - attempt to add a page to a bio with hw constraints
- * @q: the target queue
- * @bio: destination bio
- * @page: page to add
- * @len: vec entry length
- * @offset: vec entry offset
- * @max_sectors: maximum number of sectors that can be added
- * @same_page: return if the segment has been merged inside the same page
- *
- * Add a page to a bio while respecting the hardware max_sectors, max_segment
- * and gap limitations.
- */
-int bio_add_hw_page(struct request_queue *q, struct bio *bio,
- struct page *page, unsigned int len, unsigned int offset,
- unsigned int max_sectors, bool *same_page)
-{
- unsigned int max_size = max_sectors << SECTOR_SHIFT;
-
- if (WARN_ON_ONCE(bio_flagged(bio, BIO_CLONED)))
- return 0;
-
- len = min3(len, max_size, queue_max_segment_size(q));
- if (len > max_size - bio->bi_iter.bi_size)
- return 0;
-
- if (bio->bi_vcnt > 0) {
- struct bio_vec *bv = &bio->bi_io_vec[bio->bi_vcnt - 1];
-
- if (bvec_try_merge_hw_page(q, bv, page, len, offset,
- same_page)) {
- bio->bi_iter.bi_size += len;
- return len;
- }
-
- if (bio->bi_vcnt >=
- min(bio->bi_max_vecs, queue_max_segments(q)))
- return 0;
-
- /*
- * If the queue doesn't support SG gaps and adding this segment
- * would create a gap, disallow it.
- */
- if (bvec_gap_to_prev(&q->limits, bv, offset))
- return 0;
- }
-
- bvec_set_page(&bio->bi_io_vec[bio->bi_vcnt], page, len, offset);
- bio->bi_vcnt++;
- bio->bi_iter.bi_size += len;
- return len;
-}
-
-/**
- * bio_add_hw_folio - attempt to add a folio to a bio with hw constraints
- * @q: the target queue
- * @bio: destination bio
- * @folio: folio to add
- * @len: vec entry length
- * @offset: vec entry offset in the folio
- * @max_sectors: maximum number of sectors that can be added
- * @same_page: return if the segment has been merged inside the same folio
- *
- * Add a folio to a bio while respecting the hardware max_sectors, max_segment
- * and gap limitations.
- */
-int bio_add_hw_folio(struct request_queue *q, struct bio *bio,
- struct folio *folio, size_t len, size_t offset,
- unsigned int max_sectors, bool *same_page)
-{
- if (len > UINT_MAX || offset > UINT_MAX)
- return 0;
- return bio_add_hw_page(q, bio, folio_page(folio, 0), len, offset,
- max_sectors, same_page);
-}
-
-/**
- * bio_add_pc_page - attempt to add page to passthrough bio
- * @q: the target queue
- * @bio: destination bio
- * @page: page to add
- * @len: vec entry length
- * @offset: vec entry offset
- *
- * Attempt to add a page to the bio_vec maplist. This can fail for a
- * number of reasons, such as the bio being full or target block device
- * limitations. The target block device must allow bio's up to PAGE_SIZE,
- * so it is always possible to add a single page to an empty bio.
- *
- * This should only be used by passthrough bios.
- */
-int bio_add_pc_page(struct request_queue *q, struct bio *bio,
- struct page *page, unsigned int len, unsigned int offset)
-{
- bool same_page = false;
- return bio_add_hw_page(q, bio, page, len, offset,
- queue_max_hw_sectors(q), &same_page);
-}
-EXPORT_SYMBOL(bio_add_pc_page);
-
-/**
* __bio_add_page - add page(s) to a bio in a new segment
* @bio: destination bio
* @page: start page to add
@@ -1707,6 +1610,10 @@ EXPORT_SYMBOL(bio_split);
*/
void bio_trim(struct bio *bio, sector_t offset, sector_t size)
{
+ /* We should never trim an atomic write */
+ if (WARN_ON_ONCE(bio->bi_opf & REQ_ATOMIC && size))
+ return;
+
if (WARN_ON_ONCE(offset > BIO_MAX_SECTORS || size > BIO_MAX_SECTORS ||
offset + size > bio_sectors(bio)))
return;
diff --git a/block/blk-cgroup-rwstat.h b/block/blk-cgroup-rwstat.h
index 022527b0b043..703a16fe1404 100644
--- a/block/blk-cgroup-rwstat.h
+++ b/block/blk-cgroup-rwstat.h
@@ -52,7 +52,7 @@ void blkg_rwstat_recursive_sum(struct blkcg_gq *blkg, struct blkcg_policy *pol,
/**
* blkg_rwstat_add - add a value to a blkg_rwstat
* @rwstat: target blkg_rwstat
- * @op: REQ_OP and flags
+ * @opf: REQ_OP and flags
* @val: value to add
*
* Add @val to @rwstat. The counters are chosen according to @rw. The
@@ -83,8 +83,9 @@ static inline void blkg_rwstat_add(struct blkg_rwstat *rwstat,
/**
* blkg_rwstat_read - read the current values of a blkg_rwstat
* @rwstat: blkg_rwstat to read
+ * @result: where to put the current values
*
- * Read the current snapshot of @rwstat and return it in the aux counts.
+ * Read the current snapshot of @rwstat and return it in the @result counts.
*/
static inline void blkg_rwstat_read(struct blkg_rwstat *rwstat,
struct blkg_rwstat_sample *result)
diff --git a/block/blk-cgroup.h b/block/blk-cgroup.h
index b9e3265c1eb3..2c4663bd993a 100644
--- a/block/blk-cgroup.h
+++ b/block/blk-cgroup.h
@@ -225,7 +225,9 @@ void blkg_conf_exit(struct blkg_conf_ctx *ctx);
/**
* bio_issue_as_root_blkg - see if this bio needs to be issued as root blkg
- * @return: true if this bio needs to be submitted with the root blkg context.
+ * @bio: the target &bio
+ *
+ * Return: true if this bio needs to be submitted with the root blkg context.
*
* In order to avoid priority inversions we sometimes need to issue a bio as if
* it were attached to the root blkg, and then backcharge to the actual owning
@@ -245,7 +247,7 @@ static inline bool bio_issue_as_root_blkg(struct bio *bio)
* @q: request_queue of interest
*
* Lookup blkg for the @blkcg - @q pair.
-
+ *
* Must be called in a RCU critical section.
*/
static inline struct blkcg_gq *blkg_lookup(struct blkcg *blkcg,
@@ -268,7 +270,7 @@ static inline struct blkcg_gq *blkg_lookup(struct blkcg *blkcg,
}
/**
- * blkg_to_pdata - get policy private data
+ * blkg_to_pd - get policy private data
* @blkg: blkg of interest
* @pol: policy of interest
*
@@ -287,7 +289,7 @@ static inline struct blkcg_policy_data *blkcg_to_cpd(struct blkcg *blkcg,
}
/**
- * pdata_to_blkg - get blkg associated with policy private data
+ * pd_to_blkg - get blkg associated with policy private data
* @pd: policy private data of interest
*
* @pd is policy private data. Determine the blkg it's associated with.
diff --git a/block/blk-core.c b/block/blk-core.c
index 666efe8fa202..32fb28a6372c 100644
--- a/block/blk-core.c
+++ b/block/blk-core.c
@@ -629,8 +629,14 @@ static void __submit_bio(struct bio *bio)
blk_mq_submit_bio(bio);
} else if (likely(bio_queue_enter(bio) == 0)) {
struct gendisk *disk = bio->bi_bdev->bd_disk;
-
- disk->fops->submit_bio(bio);
+
+ if ((bio->bi_opf & REQ_POLLED) &&
+ !(disk->queue->limits.features & BLK_FEAT_POLL)) {
+ bio->bi_status = BLK_STS_NOTSUPP;
+ bio_endio(bio);
+ } else {
+ disk->fops->submit_bio(bio);
+ }
blk_queue_exit(disk->queue);
}
@@ -805,12 +811,6 @@ void submit_bio_noacct(struct bio *bio)
}
}
- if (!(q->limits.features & BLK_FEAT_POLL) &&
- (bio->bi_opf & REQ_POLLED)) {
- bio_clear_polled(bio);
- goto not_supported;
- }
-
switch (bio_op(bio)) {
case REQ_OP_READ:
break;
@@ -935,7 +935,7 @@ int bio_poll(struct bio *bio, struct io_comp_batch *iob, unsigned int flags)
return 0;
q = bdev_get_queue(bdev);
- if (cookie == BLK_QC_T_NONE || !(q->limits.features & BLK_FEAT_POLL))
+ if (cookie == BLK_QC_T_NONE)
return 0;
blk_flush_plug(current->plug, false);
@@ -956,7 +956,8 @@ int bio_poll(struct bio *bio, struct io_comp_batch *iob, unsigned int flags)
} else {
struct gendisk *disk = q->disk;
- if (disk && disk->fops->poll_bio)
+ if ((q->limits.features & BLK_FEAT_POLL) && disk &&
+ disk->fops->poll_bio)
ret = disk->fops->poll_bio(bio, iob, flags);
}
blk_queue_exit(q);
diff --git a/block/blk-integrity.c b/block/blk-integrity.c
index b180cac61a9d..013469faa5e7 100644
--- a/block/blk-integrity.c
+++ b/block/blk-integrity.c
@@ -218,9 +218,7 @@ static ssize_t flag_store(struct device *dev, const char *page, size_t count,
else
lim.integrity.flags |= flag;
- blk_mq_freeze_queue(q);
- err = queue_limits_commit_update(q, &lim);
- blk_mq_unfreeze_queue(q);
+ err = queue_limits_commit_update_frozen(q, &lim);
if (err)
return err;
return count;
diff --git a/block/blk-map.c b/block/blk-map.c
index 894009b2d881..d2f22744b3d1 100644
--- a/block/blk-map.c
+++ b/block/blk-map.c
@@ -189,7 +189,7 @@ static int bio_copy_user_iov(struct request *rq, struct rq_map_data *map_data,
}
}
- if (bio_add_pc_page(rq->q, bio, page, bytes, offset) < bytes) {
+ if (bio_add_page(bio, page, bytes, offset) < bytes) {
if (!map_data)
__free_page(page);
break;
@@ -272,86 +272,27 @@ static struct bio *blk_rq_map_bio_alloc(struct request *rq,
static int bio_map_user_iov(struct request *rq, struct iov_iter *iter,
gfp_t gfp_mask)
{
- iov_iter_extraction_t extraction_flags = 0;
- unsigned int max_sectors = queue_max_hw_sectors(rq->q);
unsigned int nr_vecs = iov_iter_npages(iter, BIO_MAX_VECS);
struct bio *bio;
int ret;
- int j;
if (!iov_iter_count(iter))
return -EINVAL;
bio = blk_rq_map_bio_alloc(rq, nr_vecs, gfp_mask);
- if (bio == NULL)
+ if (!bio)
return -ENOMEM;
-
- if (blk_queue_pci_p2pdma(rq->q))
- extraction_flags |= ITER_ALLOW_P2PDMA;
- if (iov_iter_extract_will_pin(iter))
- bio_set_flag(bio, BIO_PAGE_PINNED);
-
- while (iov_iter_count(iter)) {
- struct page *stack_pages[UIO_FASTIOV];
- struct page **pages = stack_pages;
- ssize_t bytes;
- size_t offs;
- int npages;
-
- if (nr_vecs > ARRAY_SIZE(stack_pages))
- pages = NULL;
-
- bytes = iov_iter_extract_pages(iter, &pages, LONG_MAX,
- nr_vecs, extraction_flags, &offs);
- if (unlikely(bytes <= 0)) {
- ret = bytes ? bytes : -EFAULT;
- goto out_unmap;
- }
-
- npages = DIV_ROUND_UP(offs + bytes, PAGE_SIZE);
-
- if (unlikely(offs & queue_dma_alignment(rq->q)))
- j = 0;
- else {
- for (j = 0; j < npages; j++) {
- struct page *page = pages[j];
- unsigned int n = PAGE_SIZE - offs;
- bool same_page = false;
-
- if (n > bytes)
- n = bytes;
-
- if (!bio_add_hw_page(rq->q, bio, page, n, offs,
- max_sectors, &same_page))
- break;
-
- if (same_page)
- bio_release_page(bio, page);
- bytes -= n;
- offs = 0;
- }
- }
- /*
- * release the pages we didn't map into the bio, if any
- */
- while (j < npages)
- bio_release_page(bio, pages[j++]);
- if (pages != stack_pages)
- kvfree(pages);
- /* couldn't stuff something into bio? */
- if (bytes) {
- iov_iter_revert(iter, bytes);
- break;
- }
- }
-
+ ret = bio_iov_iter_get_pages(bio, iter);
+ if (ret)
+ goto out_put;
ret = blk_rq_append_bio(rq, bio);
if (ret)
- goto out_unmap;
+ goto out_release;
return 0;
- out_unmap:
+out_release:
bio_release_pages(bio, false);
+out_put:
blk_mq_map_bio_put(bio);
return ret;
}
@@ -422,8 +363,7 @@ static struct bio *bio_map_kern(struct request_queue *q, void *data,
page = virt_to_page(data);
else
page = vmalloc_to_page(data);
- if (bio_add_pc_page(q, bio, page, bytes,
- offset) < bytes) {
+ if (bio_add_page(bio, page, bytes, offset) < bytes) {
/* we don't support partial mappings */
bio_uninit(bio);
kfree(bio);
@@ -507,7 +447,7 @@ static struct bio *bio_copy_kern(struct request_queue *q, void *data,
if (!reading)
memcpy(page_address(page), p, bytes);
- if (bio_add_pc_page(q, bio, page, bytes, 0) < bytes)
+ if (bio_add_page(bio, page, bytes, 0) < bytes)
break;
len -= bytes;
@@ -536,24 +476,33 @@ cleanup:
*/
int blk_rq_append_bio(struct request *rq, struct bio *bio)
{
- struct bvec_iter iter;
- struct bio_vec bv;
+ const struct queue_limits *lim = &rq->q->limits;
+ unsigned int max_bytes = lim->max_hw_sectors << SECTOR_SHIFT;
unsigned int nr_segs = 0;
+ int ret;
- bio_for_each_bvec(bv, bio, iter)
- nr_segs++;
+ /* check that the data layout matches the hardware restrictions */
+ ret = bio_split_rw_at(bio, lim, &nr_segs, max_bytes);
+ if (ret) {
+ /* if we would have to split the bio, copy instead */
+ if (ret > 0)
+ ret = -EREMOTEIO;
+ return ret;
+ }
- if (!rq->bio) {
- blk_rq_bio_prep(rq, bio, nr_segs);
- } else {
+ if (rq->bio) {
if (!ll_back_merge_fn(rq, bio, nr_segs))
return -EINVAL;
rq->biotail->bi_next = bio;
rq->biotail = bio;
- rq->__data_len += (bio)->bi_iter.bi_size;
+ rq->__data_len += bio->bi_iter.bi_size;
bio_crypt_free_ctx(bio);
+ return 0;
}
+ rq->nr_phys_segments = nr_segs;
+ rq->bio = rq->biotail = bio;
+ rq->__data_len = bio->bi_iter.bi_size;
return 0;
}
EXPORT_SYMBOL(blk_rq_append_bio);
@@ -561,9 +510,7 @@ EXPORT_SYMBOL(blk_rq_append_bio);
/* Prepare bio for passthrough IO given ITER_BVEC iter */
static int blk_rq_map_user_bvec(struct request *rq, const struct iov_iter *iter)
{
- const struct queue_limits *lim = &rq->q->limits;
- unsigned int max_bytes = lim->max_hw_sectors << SECTOR_SHIFT;
- unsigned int nsegs;
+ unsigned int max_bytes = rq->q->limits.max_hw_sectors << SECTOR_SHIFT;
struct bio *bio;
int ret;
@@ -576,18 +523,10 @@ static int blk_rq_map_user_bvec(struct request *rq, const struct iov_iter *iter)
return -ENOMEM;
bio_iov_bvec_set(bio, iter);
- /* check that the data layout matches the hardware restrictions */
- ret = bio_split_rw_at(bio, lim, &nsegs, max_bytes);
- if (ret) {
- /* if we would have to split the bio, copy instead */
- if (ret > 0)
- ret = -EREMOTEIO;
+ ret = blk_rq_append_bio(rq, bio);
+ if (ret)
blk_mq_map_bio_put(bio);
- return ret;
- }
-
- blk_rq_bio_prep(rq, bio, nsegs);
- return 0;
+ return ret;
}
/**
@@ -644,8 +583,11 @@ int blk_rq_map_user_iov(struct request_queue *q, struct request *rq,
ret = bio_copy_user_iov(rq, map_data, &i, gfp_mask);
else
ret = bio_map_user_iov(rq, &i, gfp_mask);
- if (ret)
+ if (ret) {
+ if (ret == -EREMOTEIO)
+ ret = -EINVAL;
goto unmap_rq;
+ }
if (!bio)
bio = rq->bio;
} while (iov_iter_count(&i));
diff --git a/block/blk-merge.c b/block/blk-merge.c
index e01383c6e534..15cd231d560c 100644
--- a/block/blk-merge.c
+++ b/block/blk-merge.c
@@ -473,137 +473,100 @@ unsigned int blk_recalc_rq_segments(struct request *rq)
return nr_phys_segs;
}
-static inline struct scatterlist *blk_next_sg(struct scatterlist **sg,
- struct scatterlist *sglist)
-{
- if (!*sg)
- return sglist;
+struct phys_vec {
+ phys_addr_t paddr;
+ u32 len;
+};
- /*
- * If the driver previously mapped a shorter list, we could see a
- * termination bit prematurely unless it fully inits the sg table
- * on each mapping. We KNOW that there must be more entries here
- * or the driver would be buggy, so force clear the termination bit
- * to avoid doing a full sg_init_table() in drivers for each command.
- */
- sg_unmark_end(*sg);
- return sg_next(*sg);
-}
-
-static unsigned blk_bvec_map_sg(struct request_queue *q,
- struct bio_vec *bvec, struct scatterlist *sglist,
- struct scatterlist **sg)
+static bool blk_map_iter_next(struct request *req,
+ struct req_iterator *iter, struct phys_vec *vec)
{
- unsigned nbytes = bvec->bv_len;
- unsigned nsegs = 0, total = 0;
-
- while (nbytes > 0) {
- unsigned offset = bvec->bv_offset + total;
- unsigned len = get_max_segment_size(&q->limits,
- bvec_phys(bvec) + total, nbytes);
- struct page *page = bvec->bv_page;
-
- /*
- * Unfortunately a fair number of drivers barf on scatterlists
- * that have an offset larger than PAGE_SIZE, despite other
- * subsystems dealing with that invariant just fine. For now
- * stick to the legacy format where we never present those from
- * the block layer, but the code below should be removed once
- * these offenders (mostly MMC/SD drivers) are fixed.
- */
- page += (offset >> PAGE_SHIFT);
- offset &= ~PAGE_MASK;
-
- *sg = blk_next_sg(sg, sglist);
- sg_set_page(*sg, page, len, offset);
+ unsigned int max_size;
+ struct bio_vec bv;
- total += len;
- nbytes -= len;
- nsegs++;
+ if (req->rq_flags & RQF_SPECIAL_PAYLOAD) {
+ if (!iter->bio)
+ return false;
+ vec->paddr = bvec_phys(&req->special_vec);
+ vec->len = req->special_vec.bv_len;
+ iter->bio = NULL;
+ return true;
}
- return nsegs;
-}
-
-static inline int __blk_bvec_map_sg(struct bio_vec bv,
- struct scatterlist *sglist, struct scatterlist **sg)
-{
- *sg = blk_next_sg(sg, sglist);
- sg_set_page(*sg, bv.bv_page, bv.bv_len, bv.bv_offset);
- return 1;
-}
-
-/* only try to merge bvecs into one sg if they are from two bios */
-static inline bool
-__blk_segment_map_sg_merge(struct request_queue *q, struct bio_vec *bvec,
- struct bio_vec *bvprv, struct scatterlist **sg)
-{
-
- int nbytes = bvec->bv_len;
-
- if (!*sg)
+ if (!iter->iter.bi_size)
return false;
- if ((*sg)->length + nbytes > queue_max_segment_size(q))
- return false;
+ bv = mp_bvec_iter_bvec(iter->bio->bi_io_vec, iter->iter);
+ vec->paddr = bvec_phys(&bv);
+ max_size = get_max_segment_size(&req->q->limits, vec->paddr, UINT_MAX);
+ bv.bv_len = min(bv.bv_len, max_size);
+ bio_advance_iter_single(iter->bio, &iter->iter, bv.bv_len);
- if (!biovec_phys_mergeable(q, bvprv, bvec))
- return false;
+ /*
+ * If we are entirely done with this bi_io_vec entry, check if the next
+ * one could be merged into it. This typically happens when moving to
+ * the next bio, but some callers also don't pack bvecs tight.
+ */
+ while (!iter->iter.bi_size || !iter->iter.bi_bvec_done) {
+ struct bio_vec next;
+
+ if (!iter->iter.bi_size) {
+ if (!iter->bio->bi_next)
+ break;
+ iter->bio = iter->bio->bi_next;
+ iter->iter = iter->bio->bi_iter;
+ }
- (*sg)->length += nbytes;
+ next = mp_bvec_iter_bvec(iter->bio->bi_io_vec, iter->iter);
+ if (bv.bv_len + next.bv_len > max_size ||
+ !biovec_phys_mergeable(req->q, &bv, &next))
+ break;
+
+ bv.bv_len += next.bv_len;
+ bio_advance_iter_single(iter->bio, &iter->iter, next.bv_len);
+ }
+ vec->len = bv.bv_len;
return true;
}
-static int __blk_bios_map_sg(struct request_queue *q, struct bio *bio,
- struct scatterlist *sglist,
- struct scatterlist **sg)
+static inline struct scatterlist *blk_next_sg(struct scatterlist **sg,
+ struct scatterlist *sglist)
{
- struct bio_vec bvec, bvprv = { NULL };
- struct bvec_iter iter;
- int nsegs = 0;
- bool new_bio = false;
-
- for_each_bio(bio) {
- bio_for_each_bvec(bvec, bio, iter) {
- /*
- * Only try to merge bvecs from two bios given we
- * have done bio internal merge when adding pages
- * to bio
- */
- if (new_bio &&
- __blk_segment_map_sg_merge(q, &bvec, &bvprv, sg))
- goto next_bvec;
-
- if (bvec.bv_offset + bvec.bv_len <= PAGE_SIZE)
- nsegs += __blk_bvec_map_sg(bvec, sglist, sg);
- else
- nsegs += blk_bvec_map_sg(q, &bvec, sglist, sg);
- next_bvec:
- new_bio = false;
- }
- if (likely(bio->bi_iter.bi_size)) {
- bvprv = bvec;
- new_bio = true;
- }
- }
+ if (!*sg)
+ return sglist;
- return nsegs;
+ /*
+ * If the driver previously mapped a shorter list, we could see a
+ * termination bit prematurely unless it fully inits the sg table
+ * on each mapping. We KNOW that there must be more entries here
+ * or the driver would be buggy, so force clear the termination bit
+ * to avoid doing a full sg_init_table() in drivers for each command.
+ */
+ sg_unmark_end(*sg);
+ return sg_next(*sg);
}
/*
- * map a request to scatterlist, return number of sg entries setup. Caller
- * must make sure sg can hold rq->nr_phys_segments entries
+ * Map a request to scatterlist, return number of sg entries setup. Caller
+ * must make sure sg can hold rq->nr_phys_segments entries.
*/
int __blk_rq_map_sg(struct request_queue *q, struct request *rq,
struct scatterlist *sglist, struct scatterlist **last_sg)
{
+ struct req_iterator iter = {
+ .bio = rq->bio,
+ .iter = rq->bio->bi_iter,
+ };
+ struct phys_vec vec;
int nsegs = 0;
- if (rq->rq_flags & RQF_SPECIAL_PAYLOAD)
- nsegs = __blk_bvec_map_sg(rq->special_vec, sglist, last_sg);
- else if (rq->bio)
- nsegs = __blk_bios_map_sg(q, rq->bio, sglist, last_sg);
+ while (blk_map_iter_next(rq, &iter, &vec)) {
+ *last_sg = blk_next_sg(last_sg, sglist);
+ sg_set_page(*last_sg, phys_to_page(vec.paddr), vec.len,
+ offset_in_page(vec.paddr));
+ nsegs++;
+ }
if (*last_sg)
sg_mark_end(*last_sg);
diff --git a/block/blk-mq-cpumap.c b/block/blk-mq-cpumap.c
index 9638b25fd521..ad8d6a363f24 100644
--- a/block/blk-mq-cpumap.c
+++ b/block/blk-mq-cpumap.c
@@ -11,6 +11,7 @@
#include <linux/smp.h>
#include <linux/cpu.h>
#include <linux/group_cpus.h>
+#include <linux/device/bus.h>
#include "blk.h"
#include "blk-mq.h"
@@ -54,3 +55,39 @@ int blk_mq_hw_queue_to_node(struct blk_mq_queue_map *qmap, unsigned int index)
return NUMA_NO_NODE;
}
+
+/**
+ * blk_mq_map_hw_queues - Create CPU to hardware queue mapping
+ * @qmap: CPU to hardware queue map
+ * @dev: The device to map queues
+ * @offset: Queue offset to use for the device
+ *
+ * Create a CPU to hardware queue mapping in @qmap. The struct bus_type
+ * irq_get_affinity callback will be used to retrieve the affinity.
+ */
+void blk_mq_map_hw_queues(struct blk_mq_queue_map *qmap,
+ struct device *dev, unsigned int offset)
+
+{
+ const struct cpumask *mask;
+ unsigned int queue, cpu;
+
+ if (!dev->bus->irq_get_affinity)
+ goto fallback;
+
+ for (queue = 0; queue < qmap->nr_queues; queue++) {
+ mask = dev->bus->irq_get_affinity(dev, queue + offset);
+ if (!mask)
+ goto fallback;
+
+ for_each_cpu(cpu, mask)
+ qmap->mq_map[cpu] = qmap->queue_offset + queue;
+ }
+
+ return;
+
+fallback:
+ WARN_ON_ONCE(qmap->nr_queues > 1);
+ blk_mq_clear_mq_map(qmap);
+}
+EXPORT_SYMBOL_GPL(blk_mq_map_hw_queues);
diff --git a/block/blk-mq-debugfs.c b/block/blk-mq-debugfs.c
index 5463697a8442..adf5f0697b6b 100644
--- a/block/blk-mq-debugfs.c
+++ b/block/blk-mq-debugfs.c
@@ -172,21 +172,13 @@ static int hctx_state_show(void *data, struct seq_file *m)
return 0;
}
-#define BLK_TAG_ALLOC_NAME(name) [BLK_TAG_ALLOC_##name] = #name
-static const char *const alloc_policy_name[] = {
- BLK_TAG_ALLOC_NAME(FIFO),
- BLK_TAG_ALLOC_NAME(RR),
-};
-#undef BLK_TAG_ALLOC_NAME
-
#define HCTX_FLAG_NAME(name) [ilog2(BLK_MQ_F_##name)] = #name
static const char *const hctx_flag_name[] = {
- HCTX_FLAG_NAME(SHOULD_MERGE),
HCTX_FLAG_NAME(TAG_QUEUE_SHARED),
HCTX_FLAG_NAME(STACKING),
HCTX_FLAG_NAME(TAG_HCTX_SHARED),
HCTX_FLAG_NAME(BLOCKING),
- HCTX_FLAG_NAME(NO_SCHED),
+ HCTX_FLAG_NAME(TAG_RR),
HCTX_FLAG_NAME(NO_SCHED_BY_DEFAULT),
};
#undef HCTX_FLAG_NAME
@@ -194,22 +186,11 @@ static const char *const hctx_flag_name[] = {
static int hctx_flags_show(void *data, struct seq_file *m)
{
struct blk_mq_hw_ctx *hctx = data;
- const int alloc_policy = BLK_MQ_FLAG_TO_ALLOC_POLICY(hctx->flags);
- BUILD_BUG_ON(ARRAY_SIZE(hctx_flag_name) !=
- BLK_MQ_F_ALLOC_POLICY_START_BIT);
- BUILD_BUG_ON(ARRAY_SIZE(alloc_policy_name) != BLK_TAG_ALLOC_MAX);
+ BUILD_BUG_ON(ARRAY_SIZE(hctx_flag_name) != ilog2(BLK_MQ_F_MAX));
- seq_puts(m, "alloc_policy=");
- if (alloc_policy < ARRAY_SIZE(alloc_policy_name) &&
- alloc_policy_name[alloc_policy])
- seq_puts(m, alloc_policy_name[alloc_policy]);
- else
- seq_printf(m, "%d", alloc_policy);
- seq_puts(m, " ");
- blk_flags_show(m,
- hctx->flags ^ BLK_ALLOC_POLICY_TO_MQ_FLAG(alloc_policy),
- hctx_flag_name, ARRAY_SIZE(hctx_flag_name));
+ blk_flags_show(m, hctx->flags, hctx_flag_name,
+ ARRAY_SIZE(hctx_flag_name));
seq_puts(m, "\n");
return 0;
}
diff --git a/block/blk-mq-pci.c b/block/blk-mq-pci.c
deleted file mode 100644
index d47b5c73c9eb..000000000000
--- a/block/blk-mq-pci.c
+++ /dev/null
@@ -1,46 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0
-/*
- * Copyright (c) 2016 Christoph Hellwig.
- */
-#include <linux/kobject.h>
-#include <linux/blkdev.h>
-#include <linux/blk-mq-pci.h>
-#include <linux/pci.h>
-#include <linux/module.h>
-
-#include "blk-mq.h"
-
-/**
- * blk_mq_pci_map_queues - provide a default queue mapping for PCI device
- * @qmap: CPU to hardware queue map.
- * @pdev: PCI device associated with @set.
- * @offset: Offset to use for the pci irq vector
- *
- * This function assumes the PCI device @pdev has at least as many available
- * interrupt vectors as @set has queues. It will then query the vector
- * corresponding to each queue for it's affinity mask and built queue mapping
- * that maps a queue to the CPUs that have irq affinity for the corresponding
- * vector.
- */
-void blk_mq_pci_map_queues(struct blk_mq_queue_map *qmap, struct pci_dev *pdev,
- int offset)
-{
- const struct cpumask *mask;
- unsigned int queue, cpu;
-
- for (queue = 0; queue < qmap->nr_queues; queue++) {
- mask = pci_irq_get_affinity(pdev, queue + offset);
- if (!mask)
- goto fallback;
-
- for_each_cpu(cpu, mask)
- qmap->mq_map[cpu] = qmap->queue_offset + queue;
- }
-
- return;
-
-fallback:
- WARN_ON_ONCE(qmap->nr_queues > 1);
- blk_mq_clear_mq_map(qmap);
-}
-EXPORT_SYMBOL_GPL(blk_mq_pci_map_queues);
diff --git a/block/blk-mq-sched.c b/block/blk-mq-sched.c
index 451a2c1f1f32..7442ca27c2bf 100644
--- a/block/blk-mq-sched.c
+++ b/block/blk-mq-sched.c
@@ -351,8 +351,7 @@ bool blk_mq_sched_bio_merge(struct request_queue *q, struct bio *bio,
ctx = blk_mq_get_ctx(q);
hctx = blk_mq_map_queue(q, bio->bi_opf, ctx);
type = hctx->type;
- if (!(hctx->flags & BLK_MQ_F_SHOULD_MERGE) ||
- list_empty_careful(&ctx->rq_lists[type]))
+ if (list_empty_careful(&ctx->rq_lists[type]))
goto out_put;
/* default per sw-queue merge */
diff --git a/block/blk-mq-tag.c b/block/blk-mq-tag.c
index 2cafcf11ee8b..b9f417d980b4 100644
--- a/block/blk-mq-tag.c
+++ b/block/blk-mq-tag.c
@@ -544,30 +544,11 @@ static int bt_alloc(struct sbitmap_queue *bt, unsigned int depth,
node);
}
-int blk_mq_init_bitmaps(struct sbitmap_queue *bitmap_tags,
- struct sbitmap_queue *breserved_tags,
- unsigned int queue_depth, unsigned int reserved,
- int node, int alloc_policy)
-{
- unsigned int depth = queue_depth - reserved;
- bool round_robin = alloc_policy == BLK_TAG_ALLOC_RR;
-
- if (bt_alloc(bitmap_tags, depth, round_robin, node))
- return -ENOMEM;
- if (bt_alloc(breserved_tags, reserved, round_robin, node))
- goto free_bitmap_tags;
-
- return 0;
-
-free_bitmap_tags:
- sbitmap_queue_free(bitmap_tags);
- return -ENOMEM;
-}
-
struct blk_mq_tags *blk_mq_init_tags(unsigned int total_tags,
- unsigned int reserved_tags,
- int node, int alloc_policy)
+ unsigned int reserved_tags, unsigned int flags, int node)
{
+ unsigned int depth = total_tags - reserved_tags;
+ bool round_robin = flags & BLK_MQ_F_TAG_RR;
struct blk_mq_tags *tags;
if (total_tags > BLK_MQ_TAG_MAX) {
@@ -582,14 +563,18 @@ struct blk_mq_tags *blk_mq_init_tags(unsigned int total_tags,
tags->nr_tags = total_tags;
tags->nr_reserved_tags = reserved_tags;
spin_lock_init(&tags->lock);
+ if (bt_alloc(&tags->bitmap_tags, depth, round_robin, node))
+ goto out_free_tags;
+ if (bt_alloc(&tags->breserved_tags, reserved_tags, round_robin, node))
+ goto out_free_bitmap_tags;
- if (blk_mq_init_bitmaps(&tags->bitmap_tags, &tags->breserved_tags,
- total_tags, reserved_tags, node,
- alloc_policy) < 0) {
- kfree(tags);
- return NULL;
- }
return tags;
+
+out_free_bitmap_tags:
+ sbitmap_queue_free(&tags->bitmap_tags);
+out_free_tags:
+ kfree(tags);
+ return NULL;
}
void blk_mq_free_tags(struct blk_mq_tags *tags)
diff --git a/block/blk-mq-virtio.c b/block/blk-mq-virtio.c
deleted file mode 100644
index 68d0945c0b08..000000000000
--- a/block/blk-mq-virtio.c
+++ /dev/null
@@ -1,46 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0
-/*
- * Copyright (c) 2016 Christoph Hellwig.
- */
-#include <linux/device.h>
-#include <linux/blk-mq-virtio.h>
-#include <linux/virtio_config.h>
-#include <linux/module.h>
-#include "blk-mq.h"
-
-/**
- * blk_mq_virtio_map_queues - provide a default queue mapping for virtio device
- * @qmap: CPU to hardware queue map.
- * @vdev: virtio device to provide a mapping for.
- * @first_vec: first interrupt vectors to use for queues (usually 0)
- *
- * This function assumes the virtio device @vdev has at least as many available
- * interrupt vectors as @set has queues. It will then query the vector
- * corresponding to each queue for it's affinity mask and built queue mapping
- * that maps a queue to the CPUs that have irq affinity for the corresponding
- * vector.
- */
-void blk_mq_virtio_map_queues(struct blk_mq_queue_map *qmap,
- struct virtio_device *vdev, int first_vec)
-{
- const struct cpumask *mask;
- unsigned int queue, cpu;
-
- if (!vdev->config->get_vq_affinity)
- goto fallback;
-
- for (queue = 0; queue < qmap->nr_queues; queue++) {
- mask = vdev->config->get_vq_affinity(vdev, first_vec + queue);
- if (!mask)
- goto fallback;
-
- for_each_cpu(cpu, mask)
- qmap->mq_map[cpu] = qmap->queue_offset + queue;
- }
-
- return;
-
-fallback:
- blk_mq_map_queues(qmap);
-}
-EXPORT_SYMBOL_GPL(blk_mq_virtio_map_queues);
diff --git a/block/blk-mq.c b/block/blk-mq.c
index 8ac19d4ae3c0..da39a1cac702 100644
--- a/block/blk-mq.c
+++ b/block/blk-mq.c
@@ -131,6 +131,10 @@ static bool blk_freeze_set_owner(struct request_queue *q,
if (!q->mq_freeze_depth) {
q->mq_freeze_owner = owner;
q->mq_freeze_owner_depth = 1;
+ q->mq_freeze_disk_dead = !q->disk ||
+ test_bit(GD_DEAD, &q->disk->state) ||
+ !blk_queue_registered(q);
+ q->mq_freeze_queue_dying = blk_queue_dying(q);
return true;
}
@@ -142,8 +146,6 @@ static bool blk_freeze_set_owner(struct request_queue *q,
/* verify the last unfreeze in owner context */
static bool blk_unfreeze_check_owner(struct request_queue *q)
{
- if (!q->mq_freeze_owner)
- return false;
if (q->mq_freeze_owner != current)
return false;
if (--q->mq_freeze_owner_depth == 0) {
@@ -189,7 +191,7 @@ bool __blk_freeze_queue_start(struct request_queue *q,
void blk_freeze_queue_start(struct request_queue *q)
{
if (__blk_freeze_queue_start(q, current))
- blk_freeze_acquire_lock(q, false, false);
+ blk_freeze_acquire_lock(q);
}
EXPORT_SYMBOL_GPL(blk_freeze_queue_start);
@@ -237,7 +239,7 @@ bool __blk_mq_unfreeze_queue(struct request_queue *q, bool force_atomic)
void blk_mq_unfreeze_queue(struct request_queue *q)
{
if (__blk_mq_unfreeze_queue(q, false))
- blk_unfreeze_release_lock(q, false, false);
+ blk_unfreeze_release_lock(q);
}
EXPORT_SYMBOL_GPL(blk_mq_unfreeze_queue);
@@ -2656,8 +2658,10 @@ static void blk_mq_bio_to_request(struct request *rq, struct bio *bio,
if (bio->bi_opf & REQ_RAHEAD)
rq->cmd_flags |= REQ_FAILFAST_MASK;
+ rq->bio = rq->biotail = bio;
rq->__sector = bio->bi_iter.bi_sector;
- blk_rq_bio_prep(rq, bio, nr_segs);
+ rq->__data_len = bio->bi_iter.bi_size;
+ rq->nr_phys_segments = nr_segs;
if (bio_integrity(bio))
rq->nr_integrity_segments = blk_rq_count_integrity_sg(rq->q,
bio);
@@ -2980,12 +2984,9 @@ static struct request *blk_mq_get_new_requests(struct request_queue *q,
}
rq = __blk_mq_alloc_requests(&data);
- if (rq)
- return rq;
- rq_qos_cleanup(q, bio);
- if (bio->bi_opf & REQ_NOWAIT)
- bio_wouldblock_error(bio);
- return NULL;
+ if (unlikely(!rq))
+ rq_qos_cleanup(q, bio);
+ return rq;
}
/*
@@ -3092,14 +3093,21 @@ void blk_mq_submit_bio(struct bio *bio)
}
/*
- * Device reconfiguration may change logical block size, so alignment
- * check has to be done with queue usage counter held
+ * Device reconfiguration may change logical block size or reduce the
+ * number of poll queues, so the checks for alignment and poll support
+ * have to be done with queue usage counter held.
*/
if (unlikely(bio_unaligned(bio, q))) {
bio_io_error(bio);
goto queue_exit;
}
+ if ((bio->bi_opf & REQ_POLLED) && !blk_mq_can_poll(q)) {
+ bio->bi_status = BLK_STS_NOTSUPP;
+ bio_endio(bio);
+ goto queue_exit;
+ }
+
bio = __bio_split_to_limits(bio, &q->limits, &nr_segs);
if (!bio)
goto queue_exit;
@@ -3114,12 +3122,15 @@ void blk_mq_submit_bio(struct bio *bio)
goto queue_exit;
new_request:
- if (!rq) {
+ if (rq) {
+ blk_mq_use_cached_rq(rq, plug, bio);
+ } else {
rq = blk_mq_get_new_requests(q, plug, bio, nr_segs);
- if (unlikely(!rq))
+ if (unlikely(!rq)) {
+ if (bio->bi_opf & REQ_NOWAIT)
+ bio_wouldblock_error(bio);
goto queue_exit;
- } else {
- blk_mq_use_cached_rq(rq, plug, bio);
+ }
}
trace_block_getrq(bio);
@@ -3472,8 +3483,7 @@ static struct blk_mq_tags *blk_mq_alloc_rq_map(struct blk_mq_tag_set *set,
if (node == NUMA_NO_NODE)
node = set->numa_node;
- tags = blk_mq_init_tags(nr_tags, reserved_tags, node,
- BLK_MQ_FLAG_TO_ALLOC_POLICY(set->flags));
+ tags = blk_mq_init_tags(nr_tags, reserved_tags, set->flags, node);
if (!tags)
return NULL;
@@ -4317,12 +4327,6 @@ void blk_mq_release(struct request_queue *q)
blk_mq_sysfs_deinit(q);
}
-static bool blk_mq_can_poll(struct blk_mq_tag_set *set)
-{
- return set->nr_maps > HCTX_TYPE_POLL &&
- set->map[HCTX_TYPE_POLL].nr_queues;
-}
-
struct request_queue *blk_mq_alloc_queue(struct blk_mq_tag_set *set,
struct queue_limits *lim, void *queuedata)
{
@@ -4333,7 +4337,7 @@ struct request_queue *blk_mq_alloc_queue(struct blk_mq_tag_set *set,
if (!lim)
lim = &default_lim;
lim->features |= BLK_FEAT_IO_STAT | BLK_FEAT_NOWAIT;
- if (blk_mq_can_poll(set))
+ if (set->nr_maps > HCTX_TYPE_POLL)
lim->features |= BLK_FEAT_POLL;
q = blk_alloc_queue(lim, set->numa_node);
@@ -5021,8 +5025,6 @@ static void __blk_mq_update_nr_hw_queues(struct blk_mq_tag_set *set,
fallback:
blk_mq_update_queue_map(set);
list_for_each_entry(q, &set->tag_list, tag_set_list) {
- struct queue_limits lim;
-
blk_mq_realloc_hw_ctxs(set, q);
if (q->nr_hw_queues != set->nr_hw_queues) {
@@ -5036,13 +5038,6 @@ fallback:
set->nr_hw_queues = prev_nr_hw_queues;
goto fallback;
}
- lim = queue_limits_start_update(q);
- if (blk_mq_can_poll(set))
- lim.features |= BLK_FEAT_POLL;
- else
- lim.features &= ~BLK_FEAT_POLL;
- if (queue_limits_commit_update(q, &lim) < 0)
- pr_warn("updating the poll flag failed\n");
blk_mq_map_swqueue(q);
}
@@ -5102,9 +5097,9 @@ static int blk_hctx_poll(struct request_queue *q, struct blk_mq_hw_ctx *hctx,
int blk_mq_poll(struct request_queue *q, blk_qc_t cookie,
struct io_comp_batch *iob, unsigned int flags)
{
- struct blk_mq_hw_ctx *hctx = xa_load(&q->hctx_table, cookie);
-
- return blk_hctx_poll(q, hctx, iob, flags);
+ if (!blk_mq_can_poll(q))
+ return 0;
+ return blk_hctx_poll(q, xa_load(&q->hctx_table, cookie), iob, flags);
}
int blk_rq_poll(struct request *rq, struct io_comp_batch *iob,
diff --git a/block/blk-mq.h b/block/blk-mq.h
index 89a20fffa4b1..44979e92b79f 100644
--- a/block/blk-mq.h
+++ b/block/blk-mq.h
@@ -163,11 +163,8 @@ struct blk_mq_alloc_data {
};
struct blk_mq_tags *blk_mq_init_tags(unsigned int nr_tags,
- unsigned int reserved_tags, int node, int alloc_policy);
+ unsigned int reserved_tags, unsigned int flags, int node);
void blk_mq_free_tags(struct blk_mq_tags *tags);
-int blk_mq_init_bitmaps(struct sbitmap_queue *bitmap_tags,
- struct sbitmap_queue *breserved_tags, unsigned int queue_depth,
- unsigned int reserved, int node, int alloc_policy);
unsigned int blk_mq_get_tag(struct blk_mq_alloc_data *data);
unsigned long blk_mq_get_tags(struct blk_mq_alloc_data *data, int nr_tags,
@@ -451,4 +448,10 @@ do { \
#define blk_mq_run_dispatch_ops(q, dispatch_ops) \
__blk_mq_run_dispatch_ops(q, true, dispatch_ops) \
+static inline bool blk_mq_can_poll(struct request_queue *q)
+{
+ return (q->limits.features & BLK_FEAT_POLL) &&
+ q->tag_set->map[HCTX_TYPE_POLL].nr_queues;
+}
+
#endif
diff --git a/block/blk-settings.c b/block/blk-settings.c
index 8f09e33f41f6..db12396ff5c7 100644
--- a/block/blk-settings.c
+++ b/block/blk-settings.c
@@ -175,6 +175,9 @@ static void blk_validate_atomic_write_limits(struct queue_limits *lim)
{
unsigned int boundary_sectors;
+ if (!(lim->features & BLK_FEAT_ATOMIC_WRITES))
+ goto unsupported;
+
if (!lim->atomic_write_hw_max)
goto unsupported;
@@ -413,7 +416,8 @@ int blk_set_default_limits(struct queue_limits *lim)
* @lim: limits to apply
*
* Apply the limits in @lim that were obtained from queue_limits_start_update()
- * and updated by the caller to @q.
+ * and updated by the caller to @q. The caller must have frozen the queue or
+ * ensure that there are no outstanding I/Os by other means.
*
* Returns 0 if successful, else a negative error code.
*/
@@ -444,6 +448,30 @@ out_unlock:
EXPORT_SYMBOL_GPL(queue_limits_commit_update);
/**
+ * queue_limits_commit_update_frozen - commit an atomic update of queue limits
+ * @q: queue to update
+ * @lim: limits to apply
+ *
+ * Apply the limits in @lim that were obtained from queue_limits_start_update()
+ * and updated with the new values by the caller to @q. Freezes the queue
+ * before the update and unfreezes it after.
+ *
+ * Returns 0 if successful, else a negative error code.
+ */
+int queue_limits_commit_update_frozen(struct request_queue *q,
+ struct queue_limits *lim)
+{
+ int ret;
+
+ blk_mq_freeze_queue(q);
+ ret = queue_limits_commit_update(q, lim);
+ blk_mq_unfreeze_queue(q);
+
+ return ret;
+}
+EXPORT_SYMBOL_GPL(queue_limits_commit_update_frozen);
+
+/**
* queue_limits_set - apply queue limits to queue
* @q: queue to update
* @lim: limits to apply
@@ -584,12 +612,15 @@ static bool blk_stack_atomic_writes_head(struct queue_limits *t,
}
static void blk_stack_atomic_writes_limits(struct queue_limits *t,
- struct queue_limits *b)
+ struct queue_limits *b, sector_t start)
{
- if (!(t->features & BLK_FEAT_ATOMIC_WRITES_STACKED))
+ if (!(b->features & BLK_FEAT_ATOMIC_WRITES))
+ goto unsupported;
+
+ if (!b->atomic_write_hw_unit_min)
goto unsupported;
- if (!b->atomic_write_unit_min)
+ if (!blk_atomic_write_start_sect_aligned(start, b))
goto unsupported;
/*
@@ -611,7 +642,6 @@ unsupported:
t->atomic_write_hw_unit_max = 0;
t->atomic_write_hw_unit_min = 0;
t->atomic_write_hw_boundary = 0;
- t->features &= ~BLK_FEAT_ATOMIC_WRITES_STACKED;
}
/**
@@ -774,7 +804,7 @@ int blk_stack_limits(struct queue_limits *t, struct queue_limits *b,
t->zone_write_granularity = 0;
t->max_zone_append_sectors = 0;
}
- blk_stack_atomic_writes_limits(t, b);
+ blk_stack_atomic_writes_limits(t, b, start);
return ret;
}
diff --git a/block/blk-sysfs.c b/block/blk-sysfs.c
index 767598e719ab..e09b455874bf 100644
--- a/block/blk-sysfs.c
+++ b/block/blk-sysfs.c
@@ -24,6 +24,8 @@ struct queue_sysfs_entry {
struct attribute attr;
ssize_t (*show)(struct gendisk *disk, char *page);
ssize_t (*store)(struct gendisk *disk, const char *page, size_t count);
+ int (*store_limit)(struct gendisk *disk, const char *page,
+ size_t count, struct queue_limits *lim);
void (*load_module)(struct gendisk *disk, const char *page, size_t count);
};
@@ -153,13 +155,11 @@ QUEUE_SYSFS_SHOW_CONST(discard_zeroes_data, 0)
QUEUE_SYSFS_SHOW_CONST(write_same_max, 0)
QUEUE_SYSFS_SHOW_CONST(poll_delay, -1)
-static ssize_t queue_max_discard_sectors_store(struct gendisk *disk,
- const char *page, size_t count)
+static int queue_max_discard_sectors_store(struct gendisk *disk,
+ const char *page, size_t count, struct queue_limits *lim)
{
unsigned long max_discard_bytes;
- struct queue_limits lim;
ssize_t ret;
- int err;
ret = queue_var_store(&max_discard_bytes, page, count);
if (ret < 0)
@@ -171,38 +171,28 @@ static ssize_t queue_max_discard_sectors_store(struct gendisk *disk,
if ((max_discard_bytes >> SECTOR_SHIFT) > UINT_MAX)
return -EINVAL;
- lim = queue_limits_start_update(disk->queue);
- lim.max_user_discard_sectors = max_discard_bytes >> SECTOR_SHIFT;
- err = queue_limits_commit_update(disk->queue, &lim);
- if (err)
- return err;
- return ret;
+ lim->max_user_discard_sectors = max_discard_bytes >> SECTOR_SHIFT;
+ return 0;
}
-static ssize_t
-queue_max_sectors_store(struct gendisk *disk, const char *page, size_t count)
+static int
+queue_max_sectors_store(struct gendisk *disk, const char *page, size_t count,
+ struct queue_limits *lim)
{
unsigned long max_sectors_kb;
- struct queue_limits lim;
ssize_t ret;
- int err;
ret = queue_var_store(&max_sectors_kb, page, count);
if (ret < 0)
return ret;
- lim = queue_limits_start_update(disk->queue);
- lim.max_user_sectors = max_sectors_kb << 1;
- err = queue_limits_commit_update(disk->queue, &lim);
- if (err)
- return err;
- return ret;
+ lim->max_user_sectors = max_sectors_kb << 1;
+ return 0;
}
static ssize_t queue_feature_store(struct gendisk *disk, const char *page,
- size_t count, blk_features_t feature)
+ size_t count, struct queue_limits *lim, blk_features_t feature)
{
- struct queue_limits lim;
unsigned long val;
ssize_t ret;
@@ -210,15 +200,11 @@ static ssize_t queue_feature_store(struct gendisk *disk, const char *page,
if (ret < 0)
return ret;
- lim = queue_limits_start_update(disk->queue);
if (val)
- lim.features |= feature;
+ lim->features |= feature;
else
- lim.features &= ~feature;
- ret = queue_limits_commit_update(disk->queue, &lim);
- if (ret)
- return ret;
- return count;
+ lim->features &= ~feature;
+ return 0;
}
#define QUEUE_SYSFS_FEATURE(_name, _feature) \
@@ -227,10 +213,10 @@ static ssize_t queue_##_name##_show(struct gendisk *disk, char *page) \
return sysfs_emit(page, "%u\n", \
!!(disk->queue->limits.features & _feature)); \
} \
-static ssize_t queue_##_name##_store(struct gendisk *disk, \
- const char *page, size_t count) \
+static int queue_##_name##_store(struct gendisk *disk, \
+ const char *page, size_t count, struct queue_limits *lim) \
{ \
- return queue_feature_store(disk, page, count, _feature); \
+ return queue_feature_store(disk, page, count, lim, _feature); \
}
QUEUE_SYSFS_FEATURE(rotational, BLK_FEAT_ROTATIONAL)
@@ -245,10 +231,17 @@ static ssize_t queue_##_name##_show(struct gendisk *disk, char *page) \
!!(disk->queue->limits.features & _feature)); \
}
-QUEUE_SYSFS_FEATURE_SHOW(poll, BLK_FEAT_POLL);
QUEUE_SYSFS_FEATURE_SHOW(fua, BLK_FEAT_FUA);
QUEUE_SYSFS_FEATURE_SHOW(dax, BLK_FEAT_DAX);
+static ssize_t queue_poll_show(struct gendisk *disk, char *page)
+{
+ if (queue_is_mq(disk->queue))
+ return sysfs_emit(page, "%u\n", blk_mq_can_poll(disk->queue));
+ return sysfs_emit(page, "%u\n",
+ !!(disk->queue->limits.features & BLK_FEAT_POLL));
+}
+
static ssize_t queue_zoned_show(struct gendisk *disk, char *page)
{
if (blk_queue_is_zoned(disk->queue))
@@ -266,10 +259,9 @@ static ssize_t queue_iostats_passthrough_show(struct gendisk *disk, char *page)
return queue_var_show(!!blk_queue_passthrough_stat(disk->queue), page);
}
-static ssize_t queue_iostats_passthrough_store(struct gendisk *disk,
- const char *page, size_t count)
+static int queue_iostats_passthrough_store(struct gendisk *disk,
+ const char *page, size_t count, struct queue_limits *lim)
{
- struct queue_limits lim;
unsigned long ios;
ssize_t ret;
@@ -277,18 +269,13 @@ static ssize_t queue_iostats_passthrough_store(struct gendisk *disk,
if (ret < 0)
return ret;
- lim = queue_limits_start_update(disk->queue);
if (ios)
- lim.flags |= BLK_FLAG_IOSTATS_PASSTHROUGH;
+ lim->flags |= BLK_FLAG_IOSTATS_PASSTHROUGH;
else
- lim.flags &= ~BLK_FLAG_IOSTATS_PASSTHROUGH;
-
- ret = queue_limits_commit_update(disk->queue, &lim);
- if (ret)
- return ret;
-
- return count;
+ lim->flags &= ~BLK_FLAG_IOSTATS_PASSTHROUGH;
+ return 0;
}
+
static ssize_t queue_nomerges_show(struct gendisk *disk, char *page)
{
return queue_var_show((blk_queue_nomerges(disk->queue) << 1) |
@@ -391,12 +378,10 @@ static ssize_t queue_wc_show(struct gendisk *disk, char *page)
return sysfs_emit(page, "write through\n");
}
-static ssize_t queue_wc_store(struct gendisk *disk, const char *page,
- size_t count)
+static int queue_wc_store(struct gendisk *disk, const char *page,
+ size_t count, struct queue_limits *lim)
{
- struct queue_limits lim;
bool disable;
- int err;
if (!strncmp(page, "write back", 10)) {
disable = false;
@@ -407,15 +392,11 @@ static ssize_t queue_wc_store(struct gendisk *disk, const char *page,
return -EINVAL;
}
- lim = queue_limits_start_update(disk->queue);
if (disable)
- lim.flags |= BLK_FLAG_WRITE_CACHE_DISABLED;
+ lim->flags |= BLK_FLAG_WRITE_CACHE_DISABLED;
else
- lim.flags &= ~BLK_FLAG_WRITE_CACHE_DISABLED;
- err = queue_limits_commit_update(disk->queue, &lim);
- if (err)
- return err;
- return count;
+ lim->flags &= ~BLK_FLAG_WRITE_CACHE_DISABLED;
+ return 0;
}
#define QUEUE_RO_ENTRY(_prefix, _name) \
@@ -431,6 +412,13 @@ static struct queue_sysfs_entry _prefix##_entry = { \
.store = _prefix##_store, \
};
+#define QUEUE_LIM_RW_ENTRY(_prefix, _name) \
+static struct queue_sysfs_entry _prefix##_entry = { \
+ .attr = { .name = _name, .mode = 0644 }, \
+ .show = _prefix##_show, \
+ .store_limit = _prefix##_store, \
+}
+
#define QUEUE_RW_LOAD_MODULE_ENTRY(_prefix, _name) \
static struct queue_sysfs_entry _prefix##_entry = { \
.attr = { .name = _name, .mode = 0644 }, \
@@ -441,7 +429,7 @@ static struct queue_sysfs_entry _prefix##_entry = { \
QUEUE_RW_ENTRY(queue_requests, "nr_requests");
QUEUE_RW_ENTRY(queue_ra, "read_ahead_kb");
-QUEUE_RW_ENTRY(queue_max_sectors, "max_sectors_kb");
+QUEUE_LIM_RW_ENTRY(queue_max_sectors, "max_sectors_kb");
QUEUE_RO_ENTRY(queue_max_hw_sectors, "max_hw_sectors_kb");
QUEUE_RO_ENTRY(queue_max_segments, "max_segments");
QUEUE_RO_ENTRY(queue_max_integrity_segments, "max_integrity_segments");
@@ -457,7 +445,7 @@ QUEUE_RO_ENTRY(queue_io_opt, "optimal_io_size");
QUEUE_RO_ENTRY(queue_max_discard_segments, "max_discard_segments");
QUEUE_RO_ENTRY(queue_discard_granularity, "discard_granularity");
QUEUE_RO_ENTRY(queue_max_hw_discard_sectors, "discard_max_hw_bytes");
-QUEUE_RW_ENTRY(queue_max_discard_sectors, "discard_max_bytes");
+QUEUE_LIM_RW_ENTRY(queue_max_discard_sectors, "discard_max_bytes");
QUEUE_RO_ENTRY(queue_discard_zeroes_data, "discard_zeroes_data");
QUEUE_RO_ENTRY(queue_atomic_write_max_sectors, "atomic_write_max_bytes");
@@ -477,11 +465,11 @@ QUEUE_RO_ENTRY(queue_max_open_zones, "max_open_zones");
QUEUE_RO_ENTRY(queue_max_active_zones, "max_active_zones");
QUEUE_RW_ENTRY(queue_nomerges, "nomerges");
-QUEUE_RW_ENTRY(queue_iostats_passthrough, "iostats_passthrough");
+QUEUE_LIM_RW_ENTRY(queue_iostats_passthrough, "iostats_passthrough");
QUEUE_RW_ENTRY(queue_rq_affinity, "rq_affinity");
QUEUE_RW_ENTRY(queue_poll, "io_poll");
QUEUE_RW_ENTRY(queue_poll_delay, "io_poll_delay");
-QUEUE_RW_ENTRY(queue_wc, "write_cache");
+QUEUE_LIM_RW_ENTRY(queue_wc, "write_cache");
QUEUE_RO_ENTRY(queue_fua, "fua");
QUEUE_RO_ENTRY(queue_dax, "dax");
QUEUE_RW_ENTRY(queue_io_timeout, "io_timeout");
@@ -494,10 +482,10 @@ static struct queue_sysfs_entry queue_hw_sector_size_entry = {
.show = queue_logical_block_size_show,
};
-QUEUE_RW_ENTRY(queue_rotational, "rotational");
-QUEUE_RW_ENTRY(queue_iostats, "iostats");
-QUEUE_RW_ENTRY(queue_add_random, "add_random");
-QUEUE_RW_ENTRY(queue_stable_writes, "stable_writes");
+QUEUE_LIM_RW_ENTRY(queue_rotational, "rotational");
+QUEUE_LIM_RW_ENTRY(queue_iostats, "iostats");
+QUEUE_LIM_RW_ENTRY(queue_add_random, "add_random");
+QUEUE_LIM_RW_ENTRY(queue_stable_writes, "stable_writes");
#ifdef CONFIG_BLK_WBT
static ssize_t queue_var_store64(s64 *var, const char *page)
@@ -693,9 +681,10 @@ queue_attr_store(struct kobject *kobj, struct attribute *attr,
struct queue_sysfs_entry *entry = to_queue(attr);
struct gendisk *disk = container_of(kobj, struct gendisk, queue_kobj);
struct request_queue *q = disk->queue;
+ unsigned int noio_flag;
ssize_t res;
- if (!entry->store)
+ if (!entry->store_limit && !entry->store)
return -EIO;
/*
@@ -706,11 +695,28 @@ queue_attr_store(struct kobject *kobj, struct attribute *attr,
if (entry->load_module)
entry->load_module(disk, page, length);
- blk_mq_freeze_queue(q);
+ if (entry->store_limit) {
+ struct queue_limits lim = queue_limits_start_update(q);
+
+ res = entry->store_limit(disk, page, length, &lim);
+ if (res < 0) {
+ queue_limits_cancel_update(q);
+ return res;
+ }
+
+ res = queue_limits_commit_update_frozen(q, &lim);
+ if (res)
+ return res;
+ return length;
+ }
+
mutex_lock(&q->sysfs_lock);
+ blk_mq_freeze_queue(q);
+ noio_flag = memalloc_noio_save();
res = entry->store(disk, page, length);
- mutex_unlock(&q->sysfs_lock);
+ memalloc_noio_restore(noio_flag);
blk_mq_unfreeze_queue(q);
+ mutex_unlock(&q->sysfs_lock);
return res;
}
diff --git a/block/blk-zoned.c b/block/blk-zoned.c
index 84da1eadff64..9d08a54c201e 100644
--- a/block/blk-zoned.c
+++ b/block/blk-zoned.c
@@ -11,12 +11,8 @@
*/
#include <linux/kernel.h>
-#include <linux/module.h>
#include <linux/blkdev.h>
#include <linux/blk-mq.h>
-#include <linux/mm.h>
-#include <linux/vmalloc.h>
-#include <linux/sched/mm.h>
#include <linux/spinlock.h>
#include <linux/refcount.h>
#include <linux/mempool.h>
@@ -463,6 +459,8 @@ static inline void disk_put_zone_wplug(struct blk_zone_wplug *zwplug)
static inline bool disk_should_remove_zone_wplug(struct gendisk *disk,
struct blk_zone_wplug *zwplug)
{
+ lockdep_assert_held(&zwplug->lock);
+
/* If the zone write plug was already removed, we are done. */
if (zwplug->flags & BLK_ZONE_WPLUG_UNHASHED)
return false;
@@ -584,6 +582,7 @@ static inline void blk_zone_wplug_bio_io_error(struct blk_zone_wplug *zwplug,
bio_clear_flag(bio, BIO_ZONE_WRITE_PLUGGING);
bio_io_error(bio);
disk_put_zone_wplug(zwplug);
+ /* Drop the reference taken by disk_zone_wplug_add_bio(() */
blk_queue_exit(q);
}
@@ -895,10 +894,7 @@ void blk_zone_write_plug_init_request(struct request *req)
break;
}
- /*
- * Drop the extra reference on the queue usage we got when
- * plugging the BIO and advance the write pointer offset.
- */
+ /* Drop the reference taken by disk_zone_wplug_add_bio(). */
blk_queue_exit(q);
zwplug->wp_offset += bio_sectors(bio);
@@ -917,6 +913,8 @@ static bool blk_zone_wplug_prepare_bio(struct blk_zone_wplug *zwplug,
{
struct gendisk *disk = bio->bi_bdev->bd_disk;
+ lockdep_assert_held(&zwplug->lock);
+
/*
* If we lost track of the zone write pointer due to a write error,
* the user must either execute a report zones, reset the zone or finish
@@ -1446,7 +1444,6 @@ static int disk_update_zone_resources(struct gendisk *disk,
unsigned int nr_seq_zones, nr_conv_zones;
unsigned int pool_size;
struct queue_limits lim;
- int ret;
disk->nr_zones = args->nr_zones;
disk->zone_capacity = args->zone_capacity;
@@ -1497,11 +1494,7 @@ static int disk_update_zone_resources(struct gendisk *disk,
}
commit:
- blk_mq_freeze_queue(q);
- ret = queue_limits_commit_update(q, &lim);
- blk_mq_unfreeze_queue(q);
-
- return ret;
+ return queue_limits_commit_update_frozen(q, &lim);
}
static int blk_revalidate_conv_zone(struct blk_zone *zone, unsigned int idx,
@@ -1776,37 +1769,41 @@ int blk_zone_issue_zeroout(struct block_device *bdev, sector_t sector,
EXPORT_SYMBOL_GPL(blk_zone_issue_zeroout);
#ifdef CONFIG_BLK_DEBUG_FS
+static void queue_zone_wplug_show(struct blk_zone_wplug *zwplug,
+ struct seq_file *m)
+{
+ unsigned int zwp_wp_offset, zwp_flags;
+ unsigned int zwp_zone_no, zwp_ref;
+ unsigned int zwp_bio_list_size;
+ unsigned long flags;
+
+ spin_lock_irqsave(&zwplug->lock, flags);
+ zwp_zone_no = zwplug->zone_no;
+ zwp_flags = zwplug->flags;
+ zwp_ref = refcount_read(&zwplug->ref);
+ zwp_wp_offset = zwplug->wp_offset;
+ zwp_bio_list_size = bio_list_size(&zwplug->bio_list);
+ spin_unlock_irqrestore(&zwplug->lock, flags);
+
+ seq_printf(m, "%u 0x%x %u %u %u\n", zwp_zone_no, zwp_flags, zwp_ref,
+ zwp_wp_offset, zwp_bio_list_size);
+}
int queue_zone_wplugs_show(void *data, struct seq_file *m)
{
struct request_queue *q = data;
struct gendisk *disk = q->disk;
struct blk_zone_wplug *zwplug;
- unsigned int zwp_wp_offset, zwp_flags;
- unsigned int zwp_zone_no, zwp_ref;
- unsigned int zwp_bio_list_size, i;
- unsigned long flags;
+ unsigned int i;
if (!disk->zone_wplugs_hash)
return 0;
rcu_read_lock();
- for (i = 0; i < disk_zone_wplugs_hash_size(disk); i++) {
- hlist_for_each_entry_rcu(zwplug,
- &disk->zone_wplugs_hash[i], node) {
- spin_lock_irqsave(&zwplug->lock, flags);
- zwp_zone_no = zwplug->zone_no;
- zwp_flags = zwplug->flags;
- zwp_ref = refcount_read(&zwplug->ref);
- zwp_wp_offset = zwplug->wp_offset;
- zwp_bio_list_size = bio_list_size(&zwplug->bio_list);
- spin_unlock_irqrestore(&zwplug->lock, flags);
-
- seq_printf(m, "%u 0x%x %u %u %u\n",
- zwp_zone_no, zwp_flags, zwp_ref,
- zwp_wp_offset, zwp_bio_list_size);
- }
- }
+ for (i = 0; i < disk_zone_wplugs_hash_size(disk); i++)
+ hlist_for_each_entry_rcu(zwplug, &disk->zone_wplugs_hash[i],
+ node)
+ queue_zone_wplug_show(zwplug, m);
rcu_read_unlock();
return 0;
diff --git a/block/blk.h b/block/blk.h
index 2c26abf505b8..90fa5f28ccab 100644
--- a/block/blk.h
+++ b/block/blk.h
@@ -13,6 +13,8 @@
struct elevator_type;
+#define BLK_DEV_MAX_SECTORS (LLONG_MAX >> 9)
+
/* Max future timer expiry for timeouts */
#define BLK_MAX_TIMEOUT (5 * HZ)
@@ -556,14 +558,6 @@ void bdev_set_nr_sectors(struct block_device *bdev, sector_t sectors);
struct gendisk *__alloc_disk_node(struct request_queue *q, int node_id,
struct lock_class_key *lkclass);
-int bio_add_hw_page(struct request_queue *q, struct bio *bio,
- struct page *page, unsigned int len, unsigned int offset,
- unsigned int max_sectors, bool *same_page);
-
-int bio_add_hw_folio(struct request_queue *q, struct bio *bio,
- struct folio *folio, size_t len, size_t offset,
- unsigned int max_sectors, bool *same_page);
-
/*
* Clean up a page appropriately, where the page may be pinned, may have a
* ref taken on it or neither.
@@ -720,22 +714,29 @@ void blk_integrity_verify(struct bio *bio);
void blk_integrity_prepare(struct request *rq);
void blk_integrity_complete(struct request *rq, unsigned int nr_bytes);
-static inline void blk_freeze_acquire_lock(struct request_queue *q, bool
- disk_dead, bool queue_dying)
+#ifdef CONFIG_LOCKDEP
+static inline void blk_freeze_acquire_lock(struct request_queue *q)
{
- if (!disk_dead)
+ if (!q->mq_freeze_disk_dead)
rwsem_acquire(&q->io_lockdep_map, 0, 1, _RET_IP_);
- if (!queue_dying)
+ if (!q->mq_freeze_queue_dying)
rwsem_acquire(&q->q_lockdep_map, 0, 1, _RET_IP_);
}
-static inline void blk_unfreeze_release_lock(struct request_queue *q, bool
- disk_dead, bool queue_dying)
+static inline void blk_unfreeze_release_lock(struct request_queue *q)
{
- if (!queue_dying)
+ if (!q->mq_freeze_queue_dying)
rwsem_release(&q->q_lockdep_map, _RET_IP_);
- if (!disk_dead)
+ if (!q->mq_freeze_disk_dead)
rwsem_release(&q->io_lockdep_map, _RET_IP_);
}
+#else
+static inline void blk_freeze_acquire_lock(struct request_queue *q)
+{
+}
+static inline void blk_unfreeze_release_lock(struct request_queue *q)
+{
+}
+#endif
#endif /* BLK_INTERNAL_H */
diff --git a/block/bsg-lib.c b/block/bsg-lib.c
index 32da4a4429ce..93523d8f8195 100644
--- a/block/bsg-lib.c
+++ b/block/bsg-lib.c
@@ -381,7 +381,7 @@ struct request_queue *bsg_setup_queue(struct device *dev, const char *name,
set->queue_depth = 128;
set->numa_node = NUMA_NO_NODE;
set->cmd_size = sizeof(struct bsg_job) + dd_job_size;
- set->flags = BLK_MQ_F_NO_SCHED | BLK_MQ_F_BLOCKING;
+ set->flags = BLK_MQ_F_BLOCKING;
if (blk_mq_alloc_tag_set(set))
goto out_tag_set;
diff --git a/block/elevator.c b/block/elevator.c
index 7c3ba80e5ff4..b81216c48b6b 100644
--- a/block/elevator.c
+++ b/block/elevator.c
@@ -405,12 +405,12 @@ struct request *elv_former_request(struct request_queue *q, struct request *rq)
return NULL;
}
-#define to_elv(atr) container_of((atr), struct elv_fs_entry, attr)
+#define to_elv(atr) container_of_const((atr), struct elv_fs_entry, attr)
static ssize_t
elv_attr_show(struct kobject *kobj, struct attribute *attr, char *page)
{
- struct elv_fs_entry *entry = to_elv(attr);
+ const struct elv_fs_entry *entry = to_elv(attr);
struct elevator_queue *e;
ssize_t error;
@@ -428,7 +428,7 @@ static ssize_t
elv_attr_store(struct kobject *kobj, struct attribute *attr,
const char *page, size_t length)
{
- struct elv_fs_entry *entry = to_elv(attr);
+ const struct elv_fs_entry *entry = to_elv(attr);
struct elevator_queue *e;
ssize_t error;
@@ -461,7 +461,7 @@ int elv_register_queue(struct request_queue *q, bool uevent)
error = kobject_add(&e->kobj, &q->disk->queue_kobj, "iosched");
if (!error) {
- struct elv_fs_entry *attr = e->type->elevator_attrs;
+ const struct elv_fs_entry *attr = e->type->elevator_attrs;
if (attr) {
while (attr->attr.name) {
if (sysfs_create_file(&e->kobj, &attr->attr))
@@ -547,14 +547,6 @@ void elv_unregister(struct elevator_type *e)
}
EXPORT_SYMBOL_GPL(elv_unregister);
-static inline bool elv_support_iosched(struct request_queue *q)
-{
- if (!queue_is_mq(q) ||
- (q->tag_set->flags & BLK_MQ_F_NO_SCHED))
- return false;
- return true;
-}
-
/*
* For single queue devices, default to using mq-deadline. If we have multiple
* queues or mq-deadline is not available, default to "none".
@@ -580,9 +572,6 @@ void elevator_init_mq(struct request_queue *q)
struct elevator_type *e;
int err;
- if (!elv_support_iosched(q))
- return;
-
WARN_ON_ONCE(blk_queue_registered(q));
if (unlikely(q->elevator))
@@ -601,16 +590,13 @@ void elevator_init_mq(struct request_queue *q)
*
* Disk isn't added yet, so verifying queue lock only manually.
*/
- blk_freeze_queue_start_non_owner(q);
- blk_freeze_acquire_lock(q, true, false);
- blk_mq_freeze_queue_wait(q);
+ blk_mq_freeze_queue(q);
blk_mq_cancel_work_sync(q);
err = blk_mq_init_sched(q, e);
- blk_unfreeze_release_lock(q, true, false);
- blk_mq_unfreeze_queue_non_owner(q);
+ blk_mq_unfreeze_queue(q);
if (err) {
pr_warn("\"%s\" elevator initialization failed, "
@@ -717,9 +703,6 @@ void elv_iosched_load_module(struct gendisk *disk, const char *buf,
struct elevator_type *found;
const char *name;
- if (!elv_support_iosched(disk->queue))
- return;
-
strscpy(elevator_name, buf, sizeof(elevator_name));
name = strstrip(elevator_name);
@@ -737,9 +720,6 @@ ssize_t elv_iosched_store(struct gendisk *disk, const char *buf,
char elevator_name[ELV_NAME_MAX];
int ret;
- if (!elv_support_iosched(disk->queue))
- return count;
-
strscpy(elevator_name, buf, sizeof(elevator_name));
ret = elevator_change(disk->queue, strstrip(elevator_name));
if (!ret)
@@ -754,9 +734,6 @@ ssize_t elv_iosched_show(struct gendisk *disk, char *name)
struct elevator_type *cur = NULL, *e;
int len = 0;
- if (!elv_support_iosched(q))
- return sprintf(name, "none\n");
-
if (!q->elevator) {
len += sprintf(name+len, "[none] ");
} else {
diff --git a/block/elevator.h b/block/elevator.h
index dbf357ef4fab..e526662c5dbb 100644
--- a/block/elevator.h
+++ b/block/elevator.h
@@ -71,7 +71,7 @@ struct elevator_type
size_t icq_size; /* see iocontext.h */
size_t icq_align; /* ditto */
- struct elv_fs_entry *elevator_attrs;
+ const struct elv_fs_entry *elevator_attrs;
const char *elevator_name;
const char *elevator_alias;
struct module *elevator_owner;
diff --git a/block/genhd.c b/block/genhd.c
index 79230c109fca..e9375e20d866 100644
--- a/block/genhd.c
+++ b/block/genhd.c
@@ -58,6 +58,13 @@ static DEFINE_IDA(ext_devt_ida);
void set_capacity(struct gendisk *disk, sector_t sectors)
{
+ if (sectors > BLK_DEV_MAX_SECTORS) {
+ pr_warn_once("%s: truncate capacity from %lld to %lld\n",
+ disk->disk_name, sectors,
+ BLK_DEV_MAX_SECTORS);
+ sectors = BLK_DEV_MAX_SECTORS;
+ }
+
bdev_set_nr_sectors(disk->part0, sectors);
}
EXPORT_SYMBOL(set_capacity);
@@ -400,21 +407,26 @@ int __must_check add_disk_fwnode(struct device *parent, struct gendisk *disk,
struct device *ddev = disk_to_dev(disk);
int ret;
- /* Only makes sense for bio-based to set ->poll_bio */
- if (queue_is_mq(disk->queue) && disk->fops->poll_bio)
+ if (WARN_ON_ONCE(bdev_nr_sectors(disk->part0) > BLK_DEV_MAX_SECTORS))
return -EINVAL;
- /*
- * The disk queue should now be all set with enough information about
- * the device for the elevator code to pick an adequate default
- * elevator if one is needed, that is, for devices requesting queue
- * registration.
- */
- elevator_init_mq(disk->queue);
+ if (queue_is_mq(disk->queue)) {
+ /*
+ * ->submit_bio and ->poll_bio are bypassed for blk-mq drivers.
+ */
+ if (disk->fops->submit_bio || disk->fops->poll_bio)
+ return -EINVAL;
- /* Mark bdev as having a submit_bio, if needed */
- if (disk->fops->submit_bio)
+ /*
+ * Initialize the I/O scheduler code and pick a default one if
+ * needed.
+ */
+ elevator_init_mq(disk->queue);
+ } else {
+ if (!disk->fops->submit_bio)
+ return -EINVAL;
bdev_set_flag(disk->part0, BD_HAS_SUBMIT_BIO);
+ }
/*
* If the driver provides an explicit major number it also must provide
@@ -661,7 +673,7 @@ void del_gendisk(struct gendisk *disk)
struct request_queue *q = disk->queue;
struct block_device *part;
unsigned long idx;
- bool start_drain, queue_dying;
+ bool start_drain;
might_sleep();
@@ -690,9 +702,8 @@ void del_gendisk(struct gendisk *disk)
*/
mutex_lock(&disk->open_mutex);
start_drain = __blk_mark_disk_dead(disk);
- queue_dying = blk_queue_dying(q);
if (start_drain)
- blk_freeze_acquire_lock(q, true, queue_dying);
+ blk_freeze_acquire_lock(q);
xa_for_each_start(&disk->part_tbl, idx, part, 1)
drop_partition(part);
mutex_unlock(&disk->open_mutex);
@@ -748,7 +759,7 @@ void del_gendisk(struct gendisk *disk)
blk_mq_exit_queue(q);
if (start_drain)
- blk_unfreeze_release_lock(q, true, queue_dying);
+ blk_unfreeze_release_lock(q);
}
EXPORT_SYMBOL(del_gendisk);
@@ -798,7 +809,7 @@ static ssize_t disk_badblocks_store(struct device *dev,
}
#ifdef CONFIG_BLOCK_LEGACY_AUTOLOAD
-void blk_request_module(dev_t devt)
+static bool blk_probe_dev(dev_t devt)
{
unsigned int major = MAJOR(devt);
struct blk_major_name **n;
@@ -808,14 +819,26 @@ void blk_request_module(dev_t devt)
if ((*n)->major == major && (*n)->probe) {
(*n)->probe(devt);
mutex_unlock(&major_names_lock);
- return;
+ return true;
}
}
mutex_unlock(&major_names_lock);
+ return false;
+}
+
+void blk_request_module(dev_t devt)
+{
+ int error;
+
+ if (blk_probe_dev(devt))
+ return;
- if (request_module("block-major-%d-%d", MAJOR(devt), MINOR(devt)) > 0)
- /* Make old-style 2.4 aliases work */
- request_module("block-major-%d", MAJOR(devt));
+ error = request_module("block-major-%d-%d", MAJOR(devt), MINOR(devt));
+ /* Make old-style 2.4 aliases work */
+ if (error > 0)
+ error = request_module("block-major-%d", MAJOR(devt));
+ if (!error)
+ blk_probe_dev(devt);
}
#endif /* CONFIG_BLOCK_LEGACY_AUTOLOAD */
diff --git a/block/kyber-iosched.c b/block/kyber-iosched.c
index 4155594aefc6..dc31f2dfa414 100644
--- a/block/kyber-iosched.c
+++ b/block/kyber-iosched.c
@@ -889,7 +889,7 @@ KYBER_LAT_SHOW_STORE(KYBER_WRITE, write);
#undef KYBER_LAT_SHOW_STORE
#define KYBER_LAT_ATTR(op) __ATTR(op##_lat_nsec, 0644, kyber_##op##_lat_show, kyber_##op##_lat_store)
-static struct elv_fs_entry kyber_sched_attrs[] = {
+static const struct elv_fs_entry kyber_sched_attrs[] = {
KYBER_LAT_ATTR(read),
KYBER_LAT_ATTR(write),
__ATTR_NULL
diff --git a/block/mq-deadline.c b/block/mq-deadline.c
index 5528347b5fcf..754f6b7415cd 100644
--- a/block/mq-deadline.c
+++ b/block/mq-deadline.c
@@ -834,7 +834,7 @@ STORE_INT(deadline_fifo_batch_store, &dd->fifo_batch, 0, INT_MAX);
#define DD_ATTR(name) \
__ATTR(name, 0644, deadline_##name##_show, deadline_##name##_store)
-static struct elv_fs_entry deadline_attrs[] = {
+static const struct elv_fs_entry deadline_attrs[] = {
DD_ATTR(read_expire),
DD_ATTR(write_expire),
DD_ATTR(writes_starved),
diff --git a/block/partitions/ldm.h b/block/partitions/ldm.h
index e259180c8914..aa3bd050d8cd 100644
--- a/block/partitions/ldm.h
+++ b/block/partitions/ldm.h
@@ -1,5 +1,5 @@
// SPDX-License-Identifier: GPL-2.0-or-later
-/**
+/*
* ldm - Part of the Linux-NTFS project.
*
* Copyright (C) 2001,2002 Richard Russon <ldm@flatcap.org>