diff options
Diffstat (limited to 'fs/fs-writeback.c')
-rw-r--r-- | fs/fs-writeback.c | 81 |
1 files changed, 32 insertions, 49 deletions
diff --git a/fs/fs-writeback.c b/fs/fs-writeback.c index 2f76c4a081a2..7d9d06ba184b 100644 --- a/fs/fs-writeback.c +++ b/fs/fs-writeback.c @@ -68,7 +68,7 @@ int nr_pdflush_threads; */ int writeback_in_progress(struct backing_dev_info *bdi) { - return !list_empty(&bdi->work_list); + return test_bit(BDI_writeback_running, &bdi->state); } static void bdi_queue_work(struct backing_dev_info *bdi, @@ -249,10 +249,18 @@ static void move_expired_inodes(struct list_head *delaying_queue, /* * Queue all expired dirty inodes for io, eldest first. + * Before + * newly dirtied b_dirty b_io b_more_io + * =============> gf edc BA + * After + * newly dirtied b_dirty b_io b_more_io + * =============> g fBAedc + * | + * +--> dequeue for IO */ static void queue_io(struct bdi_writeback *wb, unsigned long *older_than_this) { - list_splice_init(&wb->b_more_io, wb->b_io.prev); + list_splice_init(&wb->b_more_io, &wb->b_io); move_expired_inodes(&wb->b_dirty, &wb->b_io, older_than_this); } @@ -363,62 +371,35 @@ writeback_single_inode(struct inode *inode, struct writeback_control *wbc) spin_lock(&inode_lock); inode->i_state &= ~I_SYNC; if (!(inode->i_state & I_FREEING)) { - if ((inode->i_state & I_DIRTY_PAGES) && wbc->for_kupdate) { - /* - * More pages get dirtied by a fast dirtier. - */ - goto select_queue; - } else if (inode->i_state & I_DIRTY) { - /* - * At least XFS will redirty the inode during the - * writeback (delalloc) and on io completion (isize). - */ - redirty_tail(inode); - } else if (mapping_tagged(mapping, PAGECACHE_TAG_DIRTY)) { + if (mapping_tagged(mapping, PAGECACHE_TAG_DIRTY)) { /* * We didn't write back all the pages. nfs_writepages() - * sometimes bales out without doing anything. Redirty - * the inode; Move it from b_io onto b_more_io/b_dirty. + * sometimes bales out without doing anything. */ - /* - * akpm: if the caller was the kupdate function we put - * this inode at the head of b_dirty so it gets first - * consideration. Otherwise, move it to the tail, for - * the reasons described there. I'm not really sure - * how much sense this makes. Presumably I had a good - * reasons for doing it this way, and I'd rather not - * muck with it at present. - */ - if (wbc->for_kupdate) { + inode->i_state |= I_DIRTY_PAGES; + if (wbc->nr_to_write <= 0) { /* - * For the kupdate function we move the inode - * to b_more_io so it will get more writeout as - * soon as the queue becomes uncongested. + * slice used up: queue for next turn */ - inode->i_state |= I_DIRTY_PAGES; -select_queue: - if (wbc->nr_to_write <= 0) { - /* - * slice used up: queue for next turn - */ - requeue_io(inode); - } else { - /* - * somehow blocked: retry later - */ - redirty_tail(inode); - } + requeue_io(inode); } else { /* - * Otherwise fully redirty the inode so that - * other inodes on this superblock will get some - * writeout. Otherwise heavy writing to one - * file would indefinitely suspend writeout of - * all the other files. + * Writeback blocked by something other than + * congestion. Delay the inode for some time to + * avoid spinning on the CPU (100% iowait) + * retrying writeback of the dirty page/inode + * that cannot be performed immediately. */ - inode->i_state |= I_DIRTY_PAGES; redirty_tail(inode); } + } else if (inode->i_state & I_DIRTY) { + /* + * Filesystems can dirty the inode during writeback + * operations, such as delayed allocation during + * submission or metadata updates after data IO + * completion. + */ + redirty_tail(inode); } else if (atomic_read(&inode->i_count)) { /* * The inode is clean, inuse @@ -590,7 +571,7 @@ static inline bool over_bground_thresh(void) { unsigned long background_thresh, dirty_thresh; - get_dirty_limits(&background_thresh, &dirty_thresh, NULL, NULL); + global_dirty_limits(&background_thresh, &dirty_thresh); return (global_page_state(NR_FILE_DIRTY) + global_page_state(NR_UNSTABLE_NFS) >= background_thresh); @@ -759,6 +740,7 @@ long wb_do_writeback(struct bdi_writeback *wb, int force_wait) struct wb_writeback_work *work; long wrote = 0; + set_bit(BDI_writeback_running, &wb->bdi->state); while ((work = get_next_work_item(bdi)) != NULL) { /* * Override sync mode, in case we must wait for completion @@ -785,6 +767,7 @@ long wb_do_writeback(struct bdi_writeback *wb, int force_wait) * Check for periodic writeback, kupdated() style */ wrote += wb_check_old_data_flush(wb); + clear_bit(BDI_writeback_running, &wb->bdi->state); return wrote; } |