From 6d840a18773f36baaecc2d2f7fb18ec5862349e6 Mon Sep 17 00:00:00 2001 From: "Matthew Wilcox (Oracle)" Date: Thu, 9 Nov 2023 21:06:02 +0000 Subject: buffer: return bool from grow_dev_folio() Patch series "More buffer_head cleanups", v2. The first patch is a left-over from last cycle. The rest fix "obvious" block size > PAGE_SIZE problems. I haven't tested with a large block size setup (but I have done an ext4 xfstests run). This patch (of 7): Rename grow_dev_page() to grow_dev_folio() and make it return a bool. Document what that bool means; it's more subtle than it first appears. Also rename the 'failed' label to 'unlock' beacuse it's not exactly 'failed'. It just hasn't succeeded. Link: https://lkml.kernel.org/r/20231109210608.2252323-2-willy@infradead.org Signed-off-by: Matthew Wilcox (Oracle) Cc: Hannes Reinecke Cc: Luis Chamberlain Cc: Pankaj Raghav Cc: Ryusuke Konishi Signed-off-by: Andrew Morton --- fs/buffer.c | 50 +++++++++++++++++++++++++------------------------- 1 file changed, 25 insertions(+), 25 deletions(-) (limited to 'fs/buffer.c') diff --git a/fs/buffer.c b/fs/buffer.c index 967f34b70aa8..8dad6c691e14 100644 --- a/fs/buffer.c +++ b/fs/buffer.c @@ -1024,40 +1024,43 @@ static sector_t folio_init_buffers(struct folio *folio, } /* - * Create the page-cache page that contains the requested block. + * Create the page-cache folio that contains the requested block. * * This is used purely for blockdev mappings. + * + * Returns false if we have a 'permanent' failure. Returns true if + * we succeeded, or the caller should retry. */ -static int -grow_dev_page(struct block_device *bdev, sector_t block, - pgoff_t index, int size, int sizebits, gfp_t gfp) +static bool grow_dev_folio(struct block_device *bdev, sector_t block, + pgoff_t index, unsigned size, int sizebits, gfp_t gfp) { struct inode *inode = bdev->bd_inode; struct folio *folio; struct buffer_head *bh; - sector_t end_block; - int ret = 0; + sector_t end_block = 0; folio = __filemap_get_folio(inode->i_mapping, index, FGP_LOCK | FGP_ACCESSED | FGP_CREAT, gfp); if (IS_ERR(folio)) - return PTR_ERR(folio); + return false; bh = folio_buffers(folio); if (bh) { if (bh->b_size == size) { end_block = folio_init_buffers(folio, bdev, (sector_t)index << sizebits, size); - goto done; + goto unlock; } + + /* Caller should retry if this call fails */ + end_block = ~0ULL; if (!try_to_free_buffers(folio)) - goto failed; + goto unlock; } - ret = -ENOMEM; bh = folio_alloc_buffers(folio, size, gfp | __GFP_ACCOUNT); if (!bh) - goto failed; + goto unlock; /* * Link the folio to the buffers and initialise them. Take the @@ -1069,20 +1072,19 @@ grow_dev_page(struct block_device *bdev, sector_t block, end_block = folio_init_buffers(folio, bdev, (sector_t)index << sizebits, size); spin_unlock(&inode->i_mapping->private_lock); -done: - ret = (block < end_block) ? 1 : -ENXIO; -failed: +unlock: folio_unlock(folio); folio_put(folio); - return ret; + return block < end_block; } /* - * Create buffers for the specified block device block's page. If - * that page was dirty, the buffers are set dirty also. + * Create buffers for the specified block device block's folio. If + * that folio was dirty, the buffers are set dirty also. Returns false + * if we've hit a permanent error. */ -static int -grow_buffers(struct block_device *bdev, sector_t block, int size, gfp_t gfp) +static bool grow_buffers(struct block_device *bdev, sector_t block, + unsigned size, gfp_t gfp) { pgoff_t index; int sizebits; @@ -1099,11 +1101,11 @@ grow_buffers(struct block_device *bdev, sector_t block, int size, gfp_t gfp) "device %pg\n", __func__, (unsigned long long)block, bdev); - return -EIO; + return false; } - /* Create a page with the proper size buffers.. */ - return grow_dev_page(bdev, block, index, size, sizebits, gfp); + /* Create a folio with the proper size buffers */ + return grow_dev_folio(bdev, block, index, size, sizebits, gfp); } static struct buffer_head * @@ -1124,14 +1126,12 @@ __getblk_slow(struct block_device *bdev, sector_t block, for (;;) { struct buffer_head *bh; - int ret; bh = __find_get_block(bdev, block, size); if (bh) return bh; - ret = grow_buffers(bdev, block, size, gfp); - if (ret < 0) + if (!grow_buffers(bdev, block, size, gfp)) return NULL; } } -- cgit v1.2.3 From 382497ada051a6fc79612aba5e30cdfa26364374 Mon Sep 17 00:00:00 2001 From: "Matthew Wilcox (Oracle)" Date: Thu, 9 Nov 2023 21:06:03 +0000 Subject: buffer: calculate block number inside folio_init_buffers() The calculation of block from index doesn't work for devices with a block size larger than PAGE_SIZE as we end up shifting by a negative number. Instead, calculate the number of the first block from the folio's position in the block device. We no longer need to pass sizebits to grow_dev_folio(). Link: https://lkml.kernel.org/r/20231109210608.2252323-3-willy@infradead.org Signed-off-by: Matthew Wilcox (Oracle) Reviewed-by: Pankaj Raghav Cc: Hannes Reinecke Cc: Luis Chamberlain Cc: Ryusuke Konishi Signed-off-by: Andrew Morton --- fs/buffer.c | 13 ++++++------- 1 file changed, 6 insertions(+), 7 deletions(-) (limited to 'fs/buffer.c') diff --git a/fs/buffer.c b/fs/buffer.c index 8dad6c691e14..44e0c0b7f71f 100644 --- a/fs/buffer.c +++ b/fs/buffer.c @@ -995,11 +995,12 @@ static sector_t blkdev_max_block(struct block_device *bdev, unsigned int size) * Initialise the state of a blockdev folio's buffers. */ static sector_t folio_init_buffers(struct folio *folio, - struct block_device *bdev, sector_t block, int size) + struct block_device *bdev, unsigned size) { struct buffer_head *head = folio_buffers(folio); struct buffer_head *bh = head; bool uptodate = folio_test_uptodate(folio); + sector_t block = div_u64(folio_pos(folio), size); sector_t end_block = blkdev_max_block(bdev, size); do { @@ -1032,7 +1033,7 @@ static sector_t folio_init_buffers(struct folio *folio, * we succeeded, or the caller should retry. */ static bool grow_dev_folio(struct block_device *bdev, sector_t block, - pgoff_t index, unsigned size, int sizebits, gfp_t gfp) + pgoff_t index, unsigned size, gfp_t gfp) { struct inode *inode = bdev->bd_inode; struct folio *folio; @@ -1047,8 +1048,7 @@ static bool grow_dev_folio(struct block_device *bdev, sector_t block, bh = folio_buffers(folio); if (bh) { if (bh->b_size == size) { - end_block = folio_init_buffers(folio, bdev, - (sector_t)index << sizebits, size); + end_block = folio_init_buffers(folio, bdev, size); goto unlock; } @@ -1069,8 +1069,7 @@ static bool grow_dev_folio(struct block_device *bdev, sector_t block, */ spin_lock(&inode->i_mapping->private_lock); link_dev_buffers(folio, bh); - end_block = folio_init_buffers(folio, bdev, - (sector_t)index << sizebits, size); + end_block = folio_init_buffers(folio, bdev, size); spin_unlock(&inode->i_mapping->private_lock); unlock: folio_unlock(folio); @@ -1105,7 +1104,7 @@ static bool grow_buffers(struct block_device *bdev, sector_t block, } /* Create a folio with the proper size buffers */ - return grow_dev_folio(bdev, block, index, size, sizebits, gfp); + return grow_dev_folio(bdev, block, index, size, gfp); } static struct buffer_head * -- cgit v1.2.3 From 5f3bd90d9b98855c2e811aa3b4823d583b0020df Mon Sep 17 00:00:00 2001 From: "Matthew Wilcox (Oracle)" Date: Thu, 9 Nov 2023 21:06:04 +0000 Subject: buffer: fix grow_buffers() for block size > PAGE_SIZE We must not shift by a negative number so work in terms of a byte offset to avoid the awkward shift left-or-right-depending-on-sign option. This means we need to use check_mul_overflow() to ensure that a large block number does not result in a wrap. Link: https://lkml.kernel.org/r/20231109210608.2252323-4-willy@infradead.org Signed-off-by: Matthew Wilcox (Oracle) Signed-off-by: Nathan Chancellor Cc: Hannes Reinecke Cc: Luis Chamberlain Cc: Pankaj Raghav Cc: Ryusuke Konishi [nathan@kernel.org: add cast in grow_buffers() to avoid a multiplication libcall] Link: https://lkml.kernel.org/r/20231128-avoid-muloti4-grow_buffers-v1-1-bc3d0f0ec483@kernel.org Signed-off-by: Andrew Morton --- fs/buffer.c | 17 ++++++----------- 1 file changed, 6 insertions(+), 11 deletions(-) (limited to 'fs/buffer.c') diff --git a/fs/buffer.c b/fs/buffer.c index 44e0c0b7f71f..f765a95034cf 100644 --- a/fs/buffer.c +++ b/fs/buffer.c @@ -1085,26 +1085,21 @@ unlock: static bool grow_buffers(struct block_device *bdev, sector_t block, unsigned size, gfp_t gfp) { - pgoff_t index; - int sizebits; - - sizebits = PAGE_SHIFT - __ffs(size); - index = block >> sizebits; + loff_t pos; /* - * Check for a block which wants to lie outside our maximum possible - * pagecache index. (this comparison is done using sector_t types). + * Check for a block which lies outside our maximum possible + * pagecache index. */ - if (unlikely(index != block >> sizebits)) { - printk(KERN_ERR "%s: requested out-of-range block %llu for " - "device %pg\n", + if (check_mul_overflow(block, (sector_t)size, &pos) || pos > MAX_LFS_FILESIZE) { + printk(KERN_ERR "%s: requested out-of-range block %llu for device %pg\n", __func__, (unsigned long long)block, bdev); return false; } /* Create a folio with the proper size buffers */ - return grow_dev_folio(bdev, block, index, size, gfp); + return grow_dev_folio(bdev, block, pos / PAGE_SIZE, size, gfp); } static struct buffer_head * -- cgit v1.2.3 From 808441943f6b817f4836752c6e0d1c07507f375e Mon Sep 17 00:00:00 2001 From: "Matthew Wilcox (Oracle)" Date: Thu, 9 Nov 2023 21:06:05 +0000 Subject: buffer: cast block to loff_t before shifting it While sector_t is always defined as a u64 today, that hasn't always been the case and it might not always be the same size as loff_t in the future. Link: https://lkml.kernel.org/r/20231109210608.2252323-5-willy@infradead.org Signed-off-by: Matthew Wilcox (Oracle) Cc: Hannes Reinecke Cc: Luis Chamberlain Cc: Pankaj Raghav Cc: Ryusuke Konishi Signed-off-by: Andrew Morton --- fs/buffer.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'fs/buffer.c') diff --git a/fs/buffer.c b/fs/buffer.c index f765a95034cf..1662ddddfa27 100644 --- a/fs/buffer.c +++ b/fs/buffer.c @@ -2008,7 +2008,7 @@ static int iomap_to_bh(struct inode *inode, sector_t block, struct buffer_head *bh, const struct iomap *iomap) { - loff_t offset = block << inode->i_blkbits; + loff_t offset = (loff_t)block << inode->i_blkbits; bh->b_bdev = iomap->bdev; -- cgit v1.2.3 From 4b04646caed5449ca97b909bbadca0a7a2762159 Mon Sep 17 00:00:00 2001 From: "Matthew Wilcox (Oracle)" Date: Thu, 9 Nov 2023 21:06:06 +0000 Subject: buffer: fix various functions for block size > PAGE_SIZE If i_blkbits is larger than PAGE_SHIFT, we shift by a negative number, which is undefined. It is safe to shift the block left as a block device must be smaller than MAX_LFS_FILESIZE, which is guaranteed to fit in loff_t. Link: https://lkml.kernel.org/r/20231109210608.2252323-6-willy@infradead.org Signed-off-by: Matthew Wilcox (Oracle) Reviewed-by: Pankaj Raghav Cc: Hannes Reinecke Cc: Luis Chamberlain Cc: Ryusuke Konishi Signed-off-by: Andrew Morton --- fs/buffer.c | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) (limited to 'fs/buffer.c') diff --git a/fs/buffer.c b/fs/buffer.c index 1662ddddfa27..d75264326643 100644 --- a/fs/buffer.c +++ b/fs/buffer.c @@ -199,7 +199,7 @@ __find_get_block_slow(struct block_device *bdev, sector_t block) int all_mapped = 1; static DEFINE_RATELIMIT_STATE(last_warned, HZ, 1); - index = block >> (PAGE_SHIFT - bd_inode->i_blkbits); + index = ((loff_t)block << bd_inode->i_blkbits) / PAGE_SIZE; folio = __filemap_get_folio(bd_mapping, index, FGP_ACCESSED, 0); if (IS_ERR(folio)) goto out; @@ -1693,13 +1693,13 @@ void clean_bdev_aliases(struct block_device *bdev, sector_t block, sector_t len) struct inode *bd_inode = bdev->bd_inode; struct address_space *bd_mapping = bd_inode->i_mapping; struct folio_batch fbatch; - pgoff_t index = block >> (PAGE_SHIFT - bd_inode->i_blkbits); + pgoff_t index = ((loff_t)block << bd_inode->i_blkbits) / PAGE_SIZE; pgoff_t end; int i, count; struct buffer_head *bh; struct buffer_head *head; - end = (block + len - 1) >> (PAGE_SHIFT - bd_inode->i_blkbits); + end = ((loff_t)(block + len - 1) << bd_inode->i_blkbits) / PAGE_SIZE; folio_batch_init(&fbatch); while (filemap_get_folios(bd_mapping, &index, end, &fbatch)) { count = folio_batch_count(&fbatch); @@ -2660,8 +2660,8 @@ int block_truncate_page(struct address_space *mapping, return 0; length = blocksize - length; - iblock = (sector_t)index << (PAGE_SHIFT - inode->i_blkbits); - + iblock = ((loff_t)index * PAGE_SIZE) >> inode->i_blkbits; + folio = filemap_grab_folio(mapping, index); if (IS_ERR(folio)) return PTR_ERR(folio); -- cgit v1.2.3 From b0619401b8cdafcf32ad352a8e9a225ab0b4b10d Mon Sep 17 00:00:00 2001 From: "Matthew Wilcox (Oracle)" Date: Thu, 9 Nov 2023 21:06:07 +0000 Subject: buffer: handle large folios in __block_write_begin_int() When __block_write_begin_int() was converted to support folios, we did not expect large folios to be passed to it. With the current work to support large block size storage devices, this will no longer be true so change the checks on 'from' and 'to' to be related to the size of the folio instead of PAGE_SIZE. Also remove an assumption that the block size is smaller than PAGE_SIZE. Link: https://lkml.kernel.org/r/20231109210608.2252323-7-willy@infradead.org Signed-off-by: Matthew Wilcox (Oracle) Reported-by: Ryusuke Konishi Cc: Hannes Reinecke Cc: Luis Chamberlain Cc: Pankaj Raghav Signed-off-by: Andrew Morton --- fs/buffer.c | 17 +++++++---------- 1 file changed, 7 insertions(+), 10 deletions(-) (limited to 'fs/buffer.c') diff --git a/fs/buffer.c b/fs/buffer.c index d75264326643..9f4784150194 100644 --- a/fs/buffer.c +++ b/fs/buffer.c @@ -2075,27 +2075,24 @@ iomap_to_bh(struct inode *inode, sector_t block, struct buffer_head *bh, int __block_write_begin_int(struct folio *folio, loff_t pos, unsigned len, get_block_t *get_block, const struct iomap *iomap) { - unsigned from = pos & (PAGE_SIZE - 1); - unsigned to = from + len; + size_t from = offset_in_folio(folio, pos); + size_t to = from + len; struct inode *inode = folio->mapping->host; - unsigned block_start, block_end; + size_t block_start, block_end; sector_t block; int err = 0; - unsigned blocksize, bbits; + size_t blocksize; struct buffer_head *bh, *head, *wait[2], **wait_bh=wait; BUG_ON(!folio_test_locked(folio)); - BUG_ON(from > PAGE_SIZE); - BUG_ON(to > PAGE_SIZE); + BUG_ON(to > folio_size(folio)); BUG_ON(from > to); head = folio_create_buffers(folio, inode, 0); blocksize = head->b_size; - bbits = block_size_bits(blocksize); - - block = (sector_t)folio->index << (PAGE_SHIFT - bbits); + block = div_u64(folio_pos(folio), blocksize); - for(bh = head, block_start = 0; bh != head || !block_start; + for (bh = head, block_start = 0; bh != head || !block_start; block++, block_start=block_end, bh = bh->b_this_page) { block_end = block_start + blocksize; if (block_end <= from || block_start >= to) { -- cgit v1.2.3 From fa399c3112344fa420944e99cd529d679411ebe6 Mon Sep 17 00:00:00 2001 From: "Matthew Wilcox (Oracle)" Date: Thu, 9 Nov 2023 21:06:08 +0000 Subject: buffer: fix more functions for block size > PAGE_SIZE Both __block_write_full_folio() and block_read_full_folio() assumed that block size <= PAGE_SIZE. Replace the shift with a divide, which is probably cheaper than first calculating the shift. That lets us remove block_size_bits() as these were the last callers. Link: https://lkml.kernel.org/r/20231109210608.2252323-8-willy@infradead.org Signed-off-by: Matthew Wilcox (Oracle) Cc: Hannes Reinecke Cc: Luis Chamberlain Cc: Pankaj Raghav Cc: Ryusuke Konishi Signed-off-by: Andrew Morton --- fs/buffer.c | 27 ++++++--------------------- 1 file changed, 6 insertions(+), 21 deletions(-) (limited to 'fs/buffer.c') diff --git a/fs/buffer.c b/fs/buffer.c index 9f4784150194..3a8c8322ed28 100644 --- a/fs/buffer.c +++ b/fs/buffer.c @@ -1742,19 +1742,6 @@ unlock_page: } EXPORT_SYMBOL(clean_bdev_aliases); -/* - * Size is a power-of-two in the range 512..PAGE_SIZE, - * and the case we care about most is PAGE_SIZE. - * - * So this *could* possibly be written with those - * constraints in mind (relevant mostly if some - * architecture has a slow bit-scan instruction) - */ -static inline int block_size_bits(unsigned int blocksize) -{ - return ilog2(blocksize); -} - static struct buffer_head *folio_create_buffers(struct folio *folio, struct inode *inode, unsigned int b_state) @@ -1807,7 +1794,7 @@ int __block_write_full_folio(struct inode *inode, struct folio *folio, sector_t block; sector_t last_block; struct buffer_head *bh, *head; - unsigned int blocksize, bbits; + size_t blocksize; int nr_underway = 0; blk_opf_t write_flags = wbc_to_write_flags(wbc); @@ -1826,10 +1813,9 @@ int __block_write_full_folio(struct inode *inode, struct folio *folio, bh = head; blocksize = bh->b_size; - bbits = block_size_bits(blocksize); - block = (sector_t)folio->index << (PAGE_SHIFT - bbits); - last_block = (i_size_read(inode) - 1) >> bbits; + block = div_u64(folio_pos(folio), blocksize); + last_block = div_u64(i_size_read(inode) - 1, blocksize); /* * Get all the dirty buffers mapped to disk addresses and @@ -2355,7 +2341,7 @@ int block_read_full_folio(struct folio *folio, get_block_t *get_block) struct inode *inode = folio->mapping->host; sector_t iblock, lblock; struct buffer_head *bh, *head, *arr[MAX_BUF_PER_PAGE]; - unsigned int blocksize, bbits; + size_t blocksize; int nr, i; int fully_mapped = 1; bool page_error = false; @@ -2369,10 +2355,9 @@ int block_read_full_folio(struct folio *folio, get_block_t *get_block) head = folio_create_buffers(folio, inode, 0); blocksize = head->b_size; - bbits = block_size_bits(blocksize); - iblock = (sector_t)folio->index << (PAGE_SHIFT - bbits); - lblock = (limit+blocksize-1) >> bbits; + iblock = div_u64(folio_pos(folio), blocksize); + lblock = div_u64(limit + blocksize - 1, blocksize); bh = head; nr = 0; i = 0; -- cgit v1.2.3 From 17bf23a981be9c6629198a76940c777eb5c8c521 Mon Sep 17 00:00:00 2001 From: "Matthew Wilcox (Oracle)" Date: Fri, 15 Dec 2023 20:02:44 +0000 Subject: fs: convert block_write_full_page to block_write_full_folio Convert the function to be compatible with writepage_t so that it can be passed to write_cache_pages() by blkdev. This removes a call to compound_head(). We can also remove the function export as both callers are built-in. Link: https://lkml.kernel.org/r/20231215200245.748418-14-willy@infradead.org Signed-off-by: Matthew Wilcox (Oracle) Reviewed-by: Christoph Hellwig Reviewed-by: Jens Axboe Signed-off-by: Andrew Morton --- fs/buffer.c | 16 +++++++--------- 1 file changed, 7 insertions(+), 9 deletions(-) (limited to 'fs/buffer.c') diff --git a/fs/buffer.c b/fs/buffer.c index 3a8c8322ed28..c838b4a31009 100644 --- a/fs/buffer.c +++ b/fs/buffer.c @@ -372,7 +372,7 @@ static void end_buffer_async_read_io(struct buffer_head *bh, int uptodate) } /* - * Completion handler for block_write_full_page() - pages which are unlocked + * Completion handler for block_write_full_folio() - pages which are unlocked * during I/O, and which have PageWriteback cleared upon I/O completion. */ void end_buffer_async_write(struct buffer_head *bh, int uptodate) @@ -1771,18 +1771,18 @@ static struct buffer_head *folio_create_buffers(struct folio *folio, */ /* - * While block_write_full_page is writing back the dirty buffers under + * While block_write_full_folio is writing back the dirty buffers under * the page lock, whoever dirtied the buffers may decide to clean them * again at any time. We handle that by only looking at the buffer * state inside lock_buffer(). * - * If block_write_full_page() is called for regular writeback + * If block_write_full_folio() is called for regular writeback * (wbc->sync_mode == WB_SYNC_NONE) then it will redirty a page which has a * locked buffer. This only can happen if someone has written the buffer * directly, with submit_bh(). At the address_space level PageWriteback * prevents this contention from occurring. * - * If block_write_full_page() is called with wbc->sync_mode == + * If block_write_full_folio() is called with wbc->sync_mode == * WB_SYNC_ALL, the writes are posted using REQ_SYNC; this * causes the writes to be flagged as synchronous writes. */ @@ -1829,7 +1829,7 @@ int __block_write_full_folio(struct inode *inode, struct folio *folio, * truncate in progress. */ /* - * The buffer was zeroed by block_write_full_page() + * The buffer was zeroed by block_write_full_folio() */ clear_buffer_dirty(bh); set_buffer_uptodate(bh); @@ -2696,10 +2696,9 @@ EXPORT_SYMBOL(block_truncate_page); /* * The generic ->writepage function for buffer-backed address_spaces */ -int block_write_full_page(struct page *page, get_block_t *get_block, - struct writeback_control *wbc) +int block_write_full_folio(struct folio *folio, struct writeback_control *wbc, + void *get_block) { - struct folio *folio = page_folio(page); struct inode * const inode = folio->mapping->host; loff_t i_size = i_size_read(inode); @@ -2726,7 +2725,6 @@ int block_write_full_page(struct page *page, get_block_t *get_block, return __block_write_full_folio(inode, folio, get_block, wbc, end_buffer_async_write); } -EXPORT_SYMBOL(block_write_full_page); sector_t generic_block_bmap(struct address_space *mapping, sector_t block, get_block_t *get_block) -- cgit v1.2.3 From 14059f66a959c760467ea2041e165f412845bcb8 Mon Sep 17 00:00:00 2001 From: "Matthew Wilcox (Oracle)" Date: Fri, 15 Dec 2023 20:02:45 +0000 Subject: fs: remove the bh_end_io argument from __block_write_full_folio All callers are passing end_buffer_async_write as this argument, so we can hardcode references to it within __block_write_full_folio(). That lets us make end_buffer_async_write() static. Link: https://lkml.kernel.org/r/20231215200245.748418-15-willy@infradead.org Signed-off-by: Matthew Wilcox (Oracle) Reviewed-by: Jens Axboe Reviewed-by: Christoph Hellwig Signed-off-by: Andrew Morton --- fs/buffer.c | 22 ++++++++++------------ 1 file changed, 10 insertions(+), 12 deletions(-) (limited to 'fs/buffer.c') diff --git a/fs/buffer.c b/fs/buffer.c index c838b4a31009..19548369bc6c 100644 --- a/fs/buffer.c +++ b/fs/buffer.c @@ -372,10 +372,10 @@ static void end_buffer_async_read_io(struct buffer_head *bh, int uptodate) } /* - * Completion handler for block_write_full_folio() - pages which are unlocked - * during I/O, and which have PageWriteback cleared upon I/O completion. + * Completion handler for block_write_full_folio() - folios which are unlocked + * during I/O, and which have the writeback flag cleared upon I/O completion. */ -void end_buffer_async_write(struct buffer_head *bh, int uptodate) +static void end_buffer_async_write(struct buffer_head *bh, int uptodate) { unsigned long flags; struct buffer_head *first; @@ -415,7 +415,6 @@ still_busy: spin_unlock_irqrestore(&first->b_uptodate_lock, flags); return; } -EXPORT_SYMBOL(end_buffer_async_write); /* * If a page's buffers are under async readin (end_buffer_async_read @@ -1787,8 +1786,7 @@ static struct buffer_head *folio_create_buffers(struct folio *folio, * causes the writes to be flagged as synchronous writes. */ int __block_write_full_folio(struct inode *inode, struct folio *folio, - get_block_t *get_block, struct writeback_control *wbc, - bh_end_io_t *handler) + get_block_t *get_block, struct writeback_control *wbc) { int err; sector_t block; @@ -1867,7 +1865,8 @@ int __block_write_full_folio(struct inode *inode, struct folio *folio, continue; } if (test_clear_buffer_dirty(bh)) { - mark_buffer_async_write_endio(bh, handler); + mark_buffer_async_write_endio(bh, + end_buffer_async_write); } else { unlock_buffer(bh); } @@ -1920,7 +1919,8 @@ recover: if (buffer_mapped(bh) && buffer_dirty(bh) && !buffer_delay(bh)) { lock_buffer(bh); - mark_buffer_async_write_endio(bh, handler); + mark_buffer_async_write_endio(bh, + end_buffer_async_write); } else { /* * The buffer may have been set dirty during @@ -2704,8 +2704,7 @@ int block_write_full_folio(struct folio *folio, struct writeback_control *wbc, /* Is the folio fully inside i_size? */ if (folio_pos(folio) + folio_size(folio) <= i_size) - return __block_write_full_folio(inode, folio, get_block, wbc, - end_buffer_async_write); + return __block_write_full_folio(inode, folio, get_block, wbc); /* Is the folio fully outside i_size? (truncate in progress) */ if (folio_pos(folio) >= i_size) { @@ -2722,8 +2721,7 @@ int block_write_full_folio(struct folio *folio, struct writeback_control *wbc, */ folio_zero_segment(folio, offset_in_folio(folio, i_size), folio_size(folio)); - return __block_write_full_folio(inode, folio, get_block, wbc, - end_buffer_async_write); + return __block_write_full_folio(inode, folio, get_block, wbc); } sector_t generic_block_bmap(struct address_space *mapping, sector_t block, -- cgit v1.2.3 From bcd30d4cd937e8e15c3986358c5e601135475ce1 Mon Sep 17 00:00:00 2001 From: "Matthew Wilcox (Oracle)" Date: Mon, 1 Jan 2024 09:38:48 +0000 Subject: buffer: fix unintended successful return If try_to_free_buffers() succeeded and then folio_alloc_buffers() failed, grow_dev_folio() would return success. This would be incorrect; memory allocation failure is supposed to result in a failure. It's a harmless bug; the caller will simply go around the loop one more time and grow_dev_folio() will correctly return a failure that time. But it was an unintended change and looks like a more serious bug than it is. While I'm in here, improve the commentary about why we return success even though we failed. Link: https://lkml.kernel.org/r/20240101093848.2017115-1-willy@infradead.org Fixes: 6d840a18773f ("buffer: return bool from grow_dev_folio()") Signed-off-by: Matthew Wilcox (Oracle) Reported-by: Ryusuke Konishi Signed-off-by: Andrew Morton --- fs/buffer.c | 17 ++++++++++++----- 1 file changed, 12 insertions(+), 5 deletions(-) (limited to 'fs/buffer.c') diff --git a/fs/buffer.c b/fs/buffer.c index 19548369bc6c..5c29850e4781 100644 --- a/fs/buffer.c +++ b/fs/buffer.c @@ -1028,8 +1028,8 @@ static sector_t folio_init_buffers(struct folio *folio, * * This is used purely for blockdev mappings. * - * Returns false if we have a 'permanent' failure. Returns true if - * we succeeded, or the caller should retry. + * Returns false if we have a failure which cannot be cured by retrying + * without sleeping. Returns true if we succeeded, or the caller should retry. */ static bool grow_dev_folio(struct block_device *bdev, sector_t block, pgoff_t index, unsigned size, gfp_t gfp) @@ -1051,10 +1051,17 @@ static bool grow_dev_folio(struct block_device *bdev, sector_t block, goto unlock; } - /* Caller should retry if this call fails */ - end_block = ~0ULL; - if (!try_to_free_buffers(folio)) + /* + * Retrying may succeed; for example the folio may finish + * writeback, or buffers may be cleaned. This should not + * happen very often; maybe we have old buffers attached to + * this blockdev's page cache and we're trying to change + * the block size? + */ + if (!try_to_free_buffers(folio)) { + end_block = ~0ULL; goto unlock; + } } bh = folio_alloc_buffers(folio, size, gfp | __GFP_ACCOUNT); -- cgit v1.2.3