blk-throtl: optimize IOPS throttle for large IO scenarios

After patch 54efd50 (block: make generic_make_request handle arbitrarily sized bios), the IO through io-throttle may be larger, and these IOs may be further split into more small IOs. However, IOPS throttle does not seem to be aware of this change, which makes the calculation of IOPS of large IOs incomplete, resulting in disk-side IOPS that does not meet expectations. Maybe we should fix this problem. We can reproduce it by set max_sectors_kb of disk to 128, set blkio.write_iops_throttle to 100, run a dd instance inside blkio and use iostat to watch IOPS: dd if=/dev/zero of=/dev/sdb bs=1M count=1000 oflag=direct As a result, without this change the average IOPS is 1995, with this change the IOPS is 98. Signed-off-by: Chunguang Xu <brookxu@tencent.com> Acked-by: Tejun Heo <tj@kernel.org> Link: https://lore.kernel.org/r/65869aaad05475797d63b4c3fed4f529febe3c26.1627876014.git.brookxu@tencent.com Signed-off-by: Jens Axboe <axboe@kernel.dk>
author: Chunguang Xu <brookxu@tencent.com> 2021-08-02 11:51:56 +0800
committer: Jens Axboe <axboe@kernel.dk> 2021-08-14 19:14:56 -0600
commit: 4f1e9630afe6332de7286820fedd019f19eac057 (patch)
tree: 470518b99f9dcee9e8c73f5772c755860bfdb279 /block/blk-throttle.c
parent: 3d2e79894bd7adc7d14638a0c72ceb8b722d1fa3 (diff)
download: linux-stable-4f1e9630afe6332de7286820fedd019f19eac057.tar.gz
linux-stable-4f1e9630afe6332de7286820fedd019f19eac057.tar.bz2
linux-stable-4f1e9630afe6332de7286820fedd019f19eac057.zip
1 files changed, 32 insertions, 0 deletions
diff --git a/block/blk-throttle.c b/block/blk-throttle.c
index b1b22d863bdf..55c49015e533 100644
--- a/block/blk-throttle.c
+++ b/block/blk-throttle.c
@@ -178,6 +178,9 @@ struct throtl_grp {
 	unsigned int bad_bio_cnt; /* bios exceeding latency threshold */
 	unsigned long bio_cnt_reset_time;
 
+	atomic_t io_split_cnt[2];
+	atomic_t last_io_split_cnt[2];
+
 	struct blkg_rwstat stat_bytes;
 	struct blkg_rwstat stat_ios;
 };
@@ -777,6 +780,8 @@ static inline void throtl_start_new_slice_with_credit(struct throtl_grp *tg,
 	tg->bytes_disp[rw] = 0;
 	tg->io_disp[rw] = 0;
 
+	atomic_set(&tg->io_split_cnt[rw], 0);
+
 	/*
 	 * Previous slice has expired. We must have trimmed it after last
 	 * bio dispatch. That means since start of last slice, we never used
@@ -799,6 +804,9 @@ static inline void throtl_start_new_slice(struct throtl_grp *tg, bool rw)
 	tg->io_disp[rw] = 0;
 	tg->slice_start[rw] = jiffies;
 	tg->slice_end[rw] = jiffies + tg->td->throtl_slice;
+
+	atomic_set(&tg->io_split_cnt[rw], 0);
+
 	throtl_log(&tg->service_queue,
 		   "[%c] new slice start=%lu end=%lu jiffies=%lu",
 		   rw == READ ? 'R' : 'W', tg->slice_start[rw],
@@ -1031,6 +1039,9 @@ static bool tg_may_dispatch(struct throtl_grp *tg, struct bio *bio,
 				jiffies + tg->td->throtl_slice);
 	}
 
+	if (iops_limit != UINT_MAX)
+		tg->io_disp[rw] += atomic_xchg(&tg->io_split_cnt[rw], 0);
+
 	if (tg_with_in_bps_limit(tg, bio, bps_limit, &bps_wait) &&
 	    tg_with_in_iops_limit(tg, bio, iops_limit, &iops_wait)) {
 		if (wait)
@@ -2052,12 +2063,14 @@ static void throtl_downgrade_check(struct throtl_grp *tg)
 	}
 
 	if (tg->iops[READ][LIMIT_LOW]) {
+		tg->last_io_disp[READ] += atomic_xchg(&tg->last_io_split_cnt[READ], 0);
 		iops = tg->last_io_disp[READ] * HZ / elapsed_time;
 		if (iops >= tg->iops[READ][LIMIT_LOW])
 			tg->last_low_overflow_time[READ] = now;
 	}
 
 	if (tg->iops[WRITE][LIMIT_LOW]) {
+		tg->last_io_disp[WRITE] += atomic_xchg(&tg->last_io_split_cnt[WRITE], 0);
 		iops = tg->last_io_disp[WRITE] * HZ / elapsed_time;
 		if (iops >= tg->iops[WRITE][LIMIT_LOW])
 			tg->last_low_overflow_time[WRITE] = now;
@@ -2176,6 +2189,25 @@ static inline void throtl_update_latency_buckets(struct throtl_data *td)
 }
 #endif
 
+void blk_throtl_charge_bio_split(struct bio *bio)
+{
+	struct blkcg_gq *blkg = bio->bi_blkg;
+	struct throtl_grp *parent = blkg_to_tg(blkg);
+	struct throtl_service_queue *parent_sq;
+	bool rw = bio_data_dir(bio);
+
+	do {
+		if (!parent->has_rules[rw])
+			break;
+
+		atomic_inc(&parent->io_split_cnt[rw]);
+		atomic_inc(&parent->last_io_split_cnt[rw]);
+
+		parent_sq = parent->service_queue.parent_sq;
+		parent = sq_to_tg(parent_sq);
+	} while (parent);
+}
+
 bool blk_throtl_bio(struct bio *bio)
 {
 	struct request_queue *q = bio->bi_bdev->bd_disk->queue;
author	Chunguang Xu <brookxu@tencent.com>	2021-08-02 11:51:56 +0800
committer	Jens Axboe <axboe@kernel.dk>	2021-08-14 19:14:56 -0600
commit	4f1e9630afe6332de7286820fedd019f19eac057 (patch)
tree	470518b99f9dcee9e8c73f5772c755860bfdb279 /block/blk-throttle.c
parent	3d2e79894bd7adc7d14638a0c72ceb8b722d1fa3 (diff)
download	linux-stable-4f1e9630afe6332de7286820fedd019f19eac057.tar.gz linux-stable-4f1e9630afe6332de7286820fedd019f19eac057.tar.bz2 linux-stable-4f1e9630afe6332de7286820fedd019f19eac057.zip