summaryrefslogtreecommitdiffstats
path: root/mm/backing-dev.c
diff options
context:
space:
mode:
authorJan Kara <jack@suse.cz>2021-09-02 14:53:09 -0700
committerLinus Torvalds <torvalds@linux-foundation.org>2021-09-03 09:58:10 -0700
commit45a2966fd64147518dc5bca25f447bd0fb5359ac (patch)
tree8f4657fb497f54eb7d38479bb9d5cdb29f76eebe /mm/backing-dev.c
parentfee468fdf41cdf36ba6b5a780e2474d0a3e066ac (diff)
downloadlinux-45a2966fd64147518dc5bca25f447bd0fb5359ac.tar.gz
linux-45a2966fd64147518dc5bca25f447bd0fb5359ac.tar.bz2
linux-45a2966fd64147518dc5bca25f447bd0fb5359ac.zip
writeback: fix bandwidth estimate for spiky workload
Michael Stapelberg has reported that for workload with short big spikes of writes (GCC linker seem to trigger this frequently) the write throughput is heavily underestimated and tends to steadily sink until it reaches zero. This has rather bad impact on writeback throttling (causing stalls). The problem is that writeback throughput estimate gets updated at most once per 200 ms. One update happens early after we submit pages for writeback (at that point writeout of only small fraction of pages is completed and thus observed throughput is tiny). Next update happens only during the next write spike (updates happen only from inode writeback and dirty throttling code) and if that is more than 1s after previous spike, we decide system was idle and just ignore whatever was written until this moment. Fix the problem by making sure writeback throughput estimate is also updated shortly after writeback completes to get reasonable estimate of throughput for spiky workloads. [jack@suse.cz: avoid division by 0 in wb_update_dirty_ratelimit()] Link: https://lore.kernel.org/lkml/20210617095309.3542373-1-stapelberg+linux@google.com Link: https://lkml.kernel.org/r/20210713104716.22868-3-jack@suse.cz Signed-off-by: Jan Kara <jack@suse.cz> Reported-by: Michael Stapelberg <stapelberg+linux@google.com> Tested-by: Michael Stapelberg <stapelberg+linux@google.com> Cc: Wu Fengguang <fengguang.wu@intel.com> Signed-off-by: Andrew Morton <akpm@linux-foundation.org> Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
Diffstat (limited to 'mm/backing-dev.c')
-rw-r--r--mm/backing-dev.c10
1 files changed, 10 insertions, 0 deletions
diff --git a/mm/backing-dev.c b/mm/backing-dev.c
index b4c707ddedb1..6122c78ce914 100644
--- a/mm/backing-dev.c
+++ b/mm/backing-dev.c
@@ -271,6 +271,14 @@ void wb_wakeup_delayed(struct bdi_writeback *wb)
spin_unlock_bh(&wb->work_lock);
}
+static void wb_update_bandwidth_workfn(struct work_struct *work)
+{
+ struct bdi_writeback *wb = container_of(to_delayed_work(work),
+ struct bdi_writeback, bw_dwork);
+
+ wb_update_bandwidth(wb);
+}
+
/*
* Initial write bandwidth: 100 MB/s
*/
@@ -303,6 +311,7 @@ static int wb_init(struct bdi_writeback *wb, struct backing_dev_info *bdi,
spin_lock_init(&wb->work_lock);
INIT_LIST_HEAD(&wb->work_list);
INIT_DELAYED_WORK(&wb->dwork, wb_workfn);
+ INIT_DELAYED_WORK(&wb->bw_dwork, wb_update_bandwidth_workfn);
wb->dirty_sleep = jiffies;
err = fprop_local_init_percpu(&wb->completions, gfp);
@@ -351,6 +360,7 @@ static void wb_shutdown(struct bdi_writeback *wb)
mod_delayed_work(bdi_wq, &wb->dwork, 0);
flush_delayed_work(&wb->dwork);
WARN_ON(!list_empty(&wb->work_list));
+ flush_delayed_work(&wb->bw_dwork);
}
static void wb_exit(struct bdi_writeback *wb)