summaryrefslogtreecommitdiffstats
path: root/fs
diff options
context:
space:
mode:
Diffstat (limited to 'fs')
-rw-r--r--fs/afs/write.c16
-rw-r--r--fs/btrfs/file.c6
-rw-r--r--fs/buffer.c259
-rw-r--r--fs/ceph/file.c6
-rw-r--r--fs/direct-io.c10
-rw-r--r--fs/exec.c2
-rw-r--r--fs/ext4/file.c11
-rw-r--r--fs/ext4/inode.c4
-rw-r--r--fs/f2fs/file.c3
-rw-r--r--fs/fs-writeback.c16
-rw-r--r--fs/fuse/file.c45
-rw-r--r--fs/gfs2/aops.c69
-rw-r--r--fs/gfs2/aops.h2
-rw-r--r--fs/gfs2/file.c6
-rw-r--r--fs/hugetlbfs/inode.c13
-rw-r--r--fs/iomap/buffered-io.c9
-rw-r--r--fs/iomap/direct-io.c88
-rw-r--r--fs/libfs.c41
-rw-r--r--fs/nfs/file.c6
-rw-r--r--fs/ntfs/aops.c2
-rw-r--r--fs/ntfs/file.c2
-rw-r--r--fs/ntfs3/file.c3
-rw-r--r--fs/proc/task_mmu.c77
-rw-r--r--fs/proc/task_nommu.c6
-rw-r--r--fs/ramfs/inode.c2
-rw-r--r--fs/reiserfs/inode.c9
-rw-r--r--fs/userfaultfd.c62
-rw-r--r--fs/xfs/xfs_file.c6
-rw-r--r--fs/zonefs/file.c4
29 files changed, 364 insertions, 421 deletions
diff --git a/fs/afs/write.c b/fs/afs/write.c
index 8750b99c3f56..9c7fd6fd8095 100644
--- a/fs/afs/write.c
+++ b/fs/afs/write.c
@@ -465,7 +465,7 @@ static void afs_extend_writeback(struct address_space *mapping,
bool caching,
unsigned int *_len)
{
- struct pagevec pvec;
+ struct folio_batch fbatch;
struct folio *folio;
unsigned long priv;
unsigned int psize, filler = 0;
@@ -476,7 +476,7 @@ static void afs_extend_writeback(struct address_space *mapping,
unsigned int i;
XA_STATE(xas, &mapping->i_pages, index);
- pagevec_init(&pvec);
+ folio_batch_init(&fbatch);
do {
/* Firstly, we gather up a batch of contiguous dirty pages
@@ -535,7 +535,7 @@ static void afs_extend_writeback(struct address_space *mapping,
stop = false;
index += folio_nr_pages(folio);
- if (!pagevec_add(&pvec, &folio->page))
+ if (!folio_batch_add(&fbatch, folio))
break;
if (stop)
break;
@@ -545,14 +545,14 @@ static void afs_extend_writeback(struct address_space *mapping,
xas_pause(&xas);
rcu_read_unlock();
- /* Now, if we obtained any pages, we can shift them to being
+ /* Now, if we obtained any folios, we can shift them to being
* writable and mark them for caching.
*/
- if (!pagevec_count(&pvec))
+ if (!folio_batch_count(&fbatch))
break;
- for (i = 0; i < pagevec_count(&pvec); i++) {
- folio = page_folio(pvec.pages[i]);
+ for (i = 0; i < folio_batch_count(&fbatch); i++) {
+ folio = fbatch.folios[i];
trace_afs_folio_dirty(vnode, tracepoint_string("store+"), folio);
if (!folio_clear_dirty_for_io(folio))
@@ -565,7 +565,7 @@ static void afs_extend_writeback(struct address_space *mapping,
folio_unlock(folio);
}
- pagevec_release(&pvec);
+ folio_batch_release(&fbatch);
cond_resched();
} while (!stop);
diff --git a/fs/btrfs/file.c b/fs/btrfs/file.c
index ba5b0c9f2bbd..fd03e689a6be 100644
--- a/fs/btrfs/file.c
+++ b/fs/btrfs/file.c
@@ -1145,7 +1145,6 @@ static int btrfs_write_check(struct kiocb *iocb, struct iov_iter *from,
!(BTRFS_I(inode)->flags & (BTRFS_INODE_NODATACOW | BTRFS_INODE_PREALLOC)))
return -EAGAIN;
- current->backing_dev_info = inode_to_bdi(inode);
ret = file_remove_privs(file);
if (ret)
return ret;
@@ -1165,10 +1164,8 @@ static int btrfs_write_check(struct kiocb *iocb, struct iov_iter *from,
loff_t end_pos = round_up(pos + count, fs_info->sectorsize);
ret = btrfs_cont_expand(BTRFS_I(inode), oldsize, end_pos);
- if (ret) {
- current->backing_dev_info = NULL;
+ if (ret)
return ret;
- }
}
return 0;
@@ -1682,7 +1679,6 @@ ssize_t btrfs_do_write_iter(struct kiocb *iocb, struct iov_iter *from,
num_written = num_sync;
}
- current->backing_dev_info = NULL;
return num_written;
}
diff --git a/fs/buffer.c b/fs/buffer.c
index 93c7446d9221..cdd100273450 100644
--- a/fs/buffer.c
+++ b/fs/buffer.c
@@ -194,19 +194,19 @@ __find_get_block_slow(struct block_device *bdev, sector_t block)
pgoff_t index;
struct buffer_head *bh;
struct buffer_head *head;
- struct page *page;
+ struct folio *folio;
int all_mapped = 1;
static DEFINE_RATELIMIT_STATE(last_warned, HZ, 1);
index = block >> (PAGE_SHIFT - bd_inode->i_blkbits);
- page = find_get_page_flags(bd_mapping, index, FGP_ACCESSED);
- if (!page)
+ folio = __filemap_get_folio(bd_mapping, index, FGP_ACCESSED, 0);
+ if (IS_ERR(folio))
goto out;
spin_lock(&bd_mapping->private_lock);
- if (!page_has_buffers(page))
+ head = folio_buffers(folio);
+ if (!head)
goto out_unlock;
- head = page_buffers(page);
bh = head;
do {
if (!buffer_mapped(bh))
@@ -236,7 +236,7 @@ __find_get_block_slow(struct block_device *bdev, sector_t block)
}
out_unlock:
spin_unlock(&bd_mapping->private_lock);
- put_page(page);
+ folio_put(folio);
out:
return ret;
}
@@ -906,8 +906,8 @@ struct buffer_head *alloc_page_buffers(struct page *page, unsigned long size,
}
EXPORT_SYMBOL_GPL(alloc_page_buffers);
-static inline void
-link_dev_buffers(struct page *page, struct buffer_head *head)
+static inline void link_dev_buffers(struct folio *folio,
+ struct buffer_head *head)
{
struct buffer_head *bh, *tail;
@@ -917,7 +917,7 @@ link_dev_buffers(struct page *page, struct buffer_head *head)
bh = bh->b_this_page;
} while (bh);
tail->b_this_page = head;
- attach_page_private(page, head);
+ folio_attach_private(folio, head);
}
static sector_t blkdev_max_block(struct block_device *bdev, unsigned int size)
@@ -933,15 +933,14 @@ static sector_t blkdev_max_block(struct block_device *bdev, unsigned int size)
}
/*
- * Initialise the state of a blockdev page's buffers.
+ * Initialise the state of a blockdev folio's buffers.
*/
-static sector_t
-init_page_buffers(struct page *page, struct block_device *bdev,
- sector_t block, int size)
+static sector_t folio_init_buffers(struct folio *folio,
+ struct block_device *bdev, sector_t block, int size)
{
- struct buffer_head *head = page_buffers(page);
+ struct buffer_head *head = folio_buffers(folio);
struct buffer_head *bh = head;
- int uptodate = PageUptodate(page);
+ bool uptodate = folio_test_uptodate(folio);
sector_t end_block = blkdev_max_block(bdev, size);
do {
@@ -975,7 +974,7 @@ grow_dev_page(struct block_device *bdev, sector_t block,
pgoff_t index, int size, int sizebits, gfp_t gfp)
{
struct inode *inode = bdev->bd_inode;
- struct page *page;
+ struct folio *folio;
struct buffer_head *bh;
sector_t end_block;
int ret = 0;
@@ -991,42 +990,37 @@ grow_dev_page(struct block_device *bdev, sector_t block,
*/
gfp_mask |= __GFP_NOFAIL;
- page = find_or_create_page(inode->i_mapping, index, gfp_mask);
-
- BUG_ON(!PageLocked(page));
+ folio = __filemap_get_folio(inode->i_mapping, index,
+ FGP_LOCK | FGP_ACCESSED | FGP_CREAT, gfp_mask);
- if (page_has_buffers(page)) {
- bh = page_buffers(page);
+ bh = folio_buffers(folio);
+ if (bh) {
if (bh->b_size == size) {
- end_block = init_page_buffers(page, bdev,
- (sector_t)index << sizebits,
- size);
+ end_block = folio_init_buffers(folio, bdev,
+ (sector_t)index << sizebits, size);
goto done;
}
- if (!try_to_free_buffers(page_folio(page)))
+ if (!try_to_free_buffers(folio))
goto failed;
}
- /*
- * Allocate some buffers for this page
- */
- bh = alloc_page_buffers(page, size, true);
+ bh = folio_alloc_buffers(folio, size, true);
/*
- * Link the page to the buffers and initialise them. Take the
+ * Link the folio to the buffers and initialise them. Take the
* lock to be atomic wrt __find_get_block(), which does not
- * run under the page lock.
+ * run under the folio lock.
*/
spin_lock(&inode->i_mapping->private_lock);
- link_dev_buffers(page, bh);
- end_block = init_page_buffers(page, bdev, (sector_t)index << sizebits,
- size);
+ link_dev_buffers(folio, bh);
+ end_block = folio_init_buffers(folio, bdev,
+ (sector_t)index << sizebits, size);
spin_unlock(&inode->i_mapping->private_lock);
done:
ret = (block < end_block) ? 1 : -ENXIO;
failed:
- unlock_page(page);
- put_page(page);
+ folio_unlock(folio);
+ folio_put(folio);
return ret;
}
@@ -1763,7 +1757,7 @@ static struct buffer_head *folio_create_buffers(struct folio *folio,
* WB_SYNC_ALL, the writes are posted using REQ_SYNC; this
* causes the writes to be flagged as synchronous writes.
*/
-int __block_write_full_page(struct inode *inode, struct page *page,
+int __block_write_full_folio(struct inode *inode, struct folio *folio,
get_block_t *get_block, struct writeback_control *wbc,
bh_end_io_t *handler)
{
@@ -1775,14 +1769,14 @@ int __block_write_full_page(struct inode *inode, struct page *page,
int nr_underway = 0;
blk_opf_t write_flags = wbc_to_write_flags(wbc);
- head = folio_create_buffers(page_folio(page), inode,
+ head = folio_create_buffers(folio, inode,
(1 << BH_Dirty) | (1 << BH_Uptodate));
/*
* Be very careful. We have no exclusion from block_dirty_folio
* here, and the (potentially unmapped) buffers may become dirty at
* any time. If a buffer becomes dirty here after we've inspected it
- * then we just miss that fact, and the page stays dirty.
+ * then we just miss that fact, and the folio stays dirty.
*
* Buffers outside i_size may be dirtied by block_dirty_folio;
* handle that here by just cleaning them.
@@ -1792,7 +1786,7 @@ int __block_write_full_page(struct inode *inode, struct page *page,
blocksize = bh->b_size;
bbits = block_size_bits(blocksize);
- block = (sector_t)page->index << (PAGE_SHIFT - bbits);
+ block = (sector_t)folio->index << (PAGE_SHIFT - bbits);
last_block = (i_size_read(inode) - 1) >> bbits;
/*
@@ -1803,7 +1797,7 @@ int __block_write_full_page(struct inode *inode, struct page *page,
if (block > last_block) {
/*
* mapped buffers outside i_size will occur, because
- * this page can be outside i_size when there is a
+ * this folio can be outside i_size when there is a
* truncate in progress.
*/
/*
@@ -1833,7 +1827,7 @@ int __block_write_full_page(struct inode *inode, struct page *page,
continue;
/*
* If it's a fully non-blocking write attempt and we cannot
- * lock the buffer then redirty the page. Note that this can
+ * lock the buffer then redirty the folio. Note that this can
* potentially cause a busy-wait loop from writeback threads
* and kswapd activity, but those code paths have their own
* higher-level throttling.
@@ -1841,7 +1835,7 @@ int __block_write_full_page(struct inode *inode, struct page *page,
if (wbc->sync_mode != WB_SYNC_NONE) {
lock_buffer(bh);
} else if (!trylock_buffer(bh)) {
- redirty_page_for_writepage(wbc, page);
+ folio_redirty_for_writepage(wbc, folio);
continue;
}
if (test_clear_buffer_dirty(bh)) {
@@ -1852,11 +1846,11 @@ int __block_write_full_page(struct inode *inode, struct page *page,
} while ((bh = bh->b_this_page) != head);
/*
- * The page and its buffers are protected by PageWriteback(), so we can
- * drop the bh refcounts early.
+ * The folio and its buffers are protected by the writeback flag,
+ * so we can drop the bh refcounts early.
*/
- BUG_ON(PageWriteback(page));
- set_page_writeback(page);
+ BUG_ON(folio_test_writeback(folio));
+ folio_start_writeback(folio);
do {
struct buffer_head *next = bh->b_this_page;
@@ -1866,20 +1860,20 @@ int __block_write_full_page(struct inode *inode, struct page *page,
}
bh = next;
} while (bh != head);
- unlock_page(page);
+ folio_unlock(folio);
err = 0;
done:
if (nr_underway == 0) {
/*
- * The page was marked dirty, but the buffers were
+ * The folio was marked dirty, but the buffers were
* clean. Someone wrote them back by hand with
* write_dirty_buffer/submit_bh. A rare case.
*/
- end_page_writeback(page);
+ folio_end_writeback(folio);
/*
- * The page and buffer_heads can be released at any time from
+ * The folio and buffer_heads can be released at any time from
* here on.
*/
}
@@ -1890,7 +1884,7 @@ recover:
* ENOSPC, or some other error. We may already have added some
* blocks to the file, so we need to write these out to avoid
* exposing stale data.
- * The page is currently locked and not marked for writeback
+ * The folio is currently locked and not marked for writeback
*/
bh = head;
/* Recovery: lock and submit the mapped buffers */
@@ -1902,15 +1896,15 @@ recover:
} else {
/*
* The buffer may have been set dirty during
- * attachment to a dirty page.
+ * attachment to a dirty folio.
*/
clear_buffer_dirty(bh);
}
} while ((bh = bh->b_this_page) != head);
- SetPageError(page);
- BUG_ON(PageWriteback(page));
- mapping_set_error(page->mapping, err);
- set_page_writeback(page);
+ folio_set_error(folio);
+ BUG_ON(folio_test_writeback(folio));
+ mapping_set_error(folio->mapping, err);
+ folio_start_writeback(folio);
do {
struct buffer_head *next = bh->b_this_page;
if (buffer_async_write(bh)) {
@@ -1920,39 +1914,40 @@ recover:
}
bh = next;
} while (bh != head);
- unlock_page(page);
+ folio_unlock(folio);
goto done;
}
-EXPORT_SYMBOL(__block_write_full_page);
+EXPORT_SYMBOL(__block_write_full_folio);
/*
- * If a page has any new buffers, zero them out here, and mark them uptodate
+ * If a folio has any new buffers, zero them out here, and mark them uptodate
* and dirty so they'll be written out (in order to prevent uninitialised
* block data from leaking). And clear the new bit.
*/
-void page_zero_new_buffers(struct page *page, unsigned from, unsigned to)
+void folio_zero_new_buffers(struct folio *folio, size_t from, size_t to)
{
- unsigned int block_start, block_end;
+ size_t block_start, block_end;
struct buffer_head *head, *bh;
- BUG_ON(!PageLocked(page));
- if (!page_has_buffers(page))
+ BUG_ON(!folio_test_locked(folio));
+ head = folio_buffers(folio);
+ if (!head)
return;
- bh = head = page_buffers(page);
+ bh = head;
block_start = 0;
do {
block_end = block_start + bh->b_size;
if (buffer_new(bh)) {
if (block_end > from && block_start < to) {
- if (!PageUptodate(page)) {
- unsigned start, size;
+ if (!folio_test_uptodate(folio)) {
+ size_t start, xend;
start = max(from, block_start);
- size = min(to, block_end) - start;
+ xend = min(to, block_end);
- zero_user(page, start, size);
+ folio_zero_segment(folio, start, xend);
set_buffer_uptodate(bh);
}
@@ -1965,7 +1960,7 @@ void page_zero_new_buffers(struct page *page, unsigned from, unsigned to)
bh = bh->b_this_page;
} while (bh != head);
}
-EXPORT_SYMBOL(page_zero_new_buffers);
+EXPORT_SYMBOL(folio_zero_new_buffers);
static void
iomap_to_bh(struct inode *inode, sector_t block, struct buffer_head *bh,
@@ -2103,7 +2098,7 @@ int __block_write_begin_int(struct folio *folio, loff_t pos, unsigned len,
err = -EIO;
}
if (unlikely(err))
- page_zero_new_buffers(&folio->page, from, to);
+ folio_zero_new_buffers(folio, from, to);
return err;
}
@@ -2115,15 +2110,15 @@ int __block_write_begin(struct page *page, loff_t pos, unsigned len,
}
EXPORT_SYMBOL(__block_write_begin);
-static int __block_commit_write(struct inode *inode, struct page *page,
- unsigned from, unsigned to)
+static int __block_commit_write(struct inode *inode, struct folio *folio,
+ size_t from, size_t to)
{
- unsigned block_start, block_end;
- int partial = 0;
+ size_t block_start, block_end;
+ bool partial = false;
unsigned blocksize;
struct buffer_head *bh, *head;
- bh = head = page_buffers(page);
+ bh = head = folio_buffers(folio);
blocksize = bh->b_size;
block_start = 0;
@@ -2131,7 +2126,7 @@ static int __block_commit_write(struct inode *inode, struct page *page,
block_end = block_start + blocksize;
if (block_end <= from || block_start >= to) {
if (!buffer_uptodate(bh))
- partial = 1;
+ partial = true;
} else {
set_buffer_uptodate(bh);
mark_buffer_dirty(bh);
@@ -2146,11 +2141,11 @@ static int __block_commit_write(struct inode *inode, struct page *page,
/*
* If this is a partial write which happened to make all buffers
* uptodate then we can optimize away a bogus read_folio() for
- * the next read(). Here we 'discover' whether the page went
+ * the next read(). Here we 'discover' whether the folio went
* uptodate as a result of this (potentially partial) write.
*/
if (!partial)
- SetPageUptodate(page);
+ folio_mark_uptodate(folio);
return 0;
}
@@ -2187,10 +2182,9 @@ int block_write_end(struct file *file, struct address_space *mapping,
loff_t pos, unsigned len, unsigned copied,
struct page *page, void *fsdata)
{
+ struct folio *folio = page_folio(page);
struct inode *inode = mapping->host;
- unsigned start;
-
- start = pos & (PAGE_SIZE - 1);
+ size_t start = pos - folio_pos(folio);
if (unlikely(copied < len)) {
/*
@@ -2202,18 +2196,18 @@ int block_write_end(struct file *file, struct address_space *mapping,
* read_folio might come in and destroy our partial write.
*
* Do the simplest thing, and just treat any short write to a
- * non uptodate page as a zero-length write, and force the
+ * non uptodate folio as a zero-length write, and force the
* caller to redo the whole thing.
*/
- if (!PageUptodate(page))
+ if (!folio_test_uptodate(folio))
copied = 0;
- page_zero_new_buffers(page, start+copied, start+len);
+ folio_zero_new_buffers(folio, start+copied, start+len);
}
- flush_dcache_page(page);
+ flush_dcache_folio(folio);
/* This could be a short (even 0-length) commit */
- __block_commit_write(inode, page, start, start+copied);
+ __block_commit_write(inode, folio, start, start + copied);
return copied;
}
@@ -2536,8 +2530,9 @@ EXPORT_SYMBOL(cont_write_begin);
int block_commit_write(struct page *page, unsigned from, unsigned to)
{
- struct inode *inode = page->mapping->host;
- __block_commit_write(inode,page,from,to);
+ struct folio *folio = page_folio(page);
+ struct inode *inode = folio->mapping->host;
+ __block_commit_write(inode, folio, from, to);
return 0;
}
EXPORT_SYMBOL(block_commit_write);
@@ -2563,38 +2558,37 @@ EXPORT_SYMBOL(block_commit_write);
int block_page_mkwrite(struct vm_area_struct *vma, struct vm_fault *vmf,
get_block_t get_block)
{
- struct page *page = vmf->page;
+ struct folio *folio = page_folio(vmf->page);
struct inode *inode = file_inode(vma->vm_file);
unsigned long end;
loff_t size;
int ret;
- lock_page(page);
+ folio_lock(folio);
size = i_size_read(inode);
- if ((page->mapping != inode->i_mapping) ||
- (page_offset(page) > size)) {
+ if ((folio->mapping != inode->i_mapping) ||
+ (folio_pos(folio) >= size)) {
/* We overload EFAULT to mean page got truncated */
ret = -EFAULT;
goto out_unlock;
}
- /* page is wholly or partially inside EOF */
- if (((page->index + 1) << PAGE_SHIFT) > size)
- end = size & ~PAGE_MASK;
- else
- end = PAGE_SIZE;
+ end = folio_size(folio);
+ /* folio is wholly or partially inside EOF */
+ if (folio_pos(folio) + end > size)
+ end = size - folio_pos(folio);
- ret = __block_write_begin(page, 0, end, get_block);
+ ret = __block_write_begin_int(folio, 0, end, get_block, NULL);
if (!ret)
- ret = block_commit_write(page, 0, end);
+ ret = __block_commit_write(inode, folio, 0, end);
if (unlikely(ret < 0))
goto out_unlock;
- set_page_dirty(page);
- wait_for_stable_page(page);
+ folio_mark_dirty(folio);
+ folio_wait_stable(folio);
return 0;
out_unlock:
- unlock_page(page);
+ folio_unlock(folio);
return ret;
}
EXPORT_SYMBOL(block_page_mkwrite);
@@ -2603,17 +2597,16 @@ int block_truncate_page(struct address_space *mapping,
loff_t from, get_block_t *get_block)
{
pgoff_t index = from >> PAGE_SHIFT;
- unsigned offset = from & (PAGE_SIZE-1);
unsigned blocksize;
sector_t iblock;
- unsigned length, pos;
+ size_t offset, length, pos;
struct inode *inode = mapping->host;
- struct page *page;
+ struct folio *folio;
struct buffer_head *bh;
int err = 0;
blocksize = i_blocksize(inode);
- length = offset & (blocksize - 1);
+ length = from & (blocksize - 1);
/* Block boundary? Nothing to do */
if (!length)
@@ -2622,15 +2615,18 @@ int block_truncate_page(struct address_space *mapping,
length = blocksize - length;
iblock = (sector_t)index << (PAGE_SHIFT - inode->i_blkbits);
- page = grab_cache_page(mapping, index);
- if (!page)
- return -ENOMEM;
+ folio = filemap_grab_folio(mapping, index);
+ if (IS_ERR(folio))
+ return PTR_ERR(folio);
- if (!page_has_buffers(page))
- create_empty_buffers(page, blocksize, 0);
+ bh = folio_buffers(folio);
+ if (!bh) {
+ folio_create_empty_buffers(folio, blocksize, 0);
+ bh = folio_buffers(folio);
+ }
/* Find the buffer that contains "offset" */
- bh = page_buffers(page);
+ offset = offset_in_folio(folio, from);
pos = blocksize;
while (offset >= pos) {
bh = bh->b_this_page;
@@ -2649,7 +2645,7 @@ int block_truncate_page(struct address_space *mapping,
}
/* Ok, it's mapped. Make sure it's up-to-date */
- if (PageUptodate(page))
+ if (folio_test_uptodate(folio))
set_buffer_uptodate(bh);
if (!buffer_uptodate(bh) && !buffer_delay(bh) && !buffer_unwritten(bh)) {
@@ -2659,12 +2655,12 @@ int block_truncate_page(struct address_space *mapping,
goto unlock;
}
- zero_user(page, offset, length);
+ folio_zero_range(folio, offset, length);
mark_buffer_dirty(bh);
unlock:
- unlock_page(page);
- put_page(page);
+ folio_unlock(folio);
+ folio_put(folio);
return err;
}
@@ -2676,33 +2672,32 @@ EXPORT_SYMBOL(block_truncate_page);
int block_write_full_page(struct page *page, get_block_t *get_block,
struct writeback_control *wbc)
{
- struct inode * const inode = page->mapping->host;
+ struct folio *folio = page_folio(page);
+ struct inode * const inode = folio->mapping->host;
loff_t i_size = i_size_read(inode);
- const pgoff_t end_index = i_size >> PAGE_SHIFT;
- unsigned offset;
- /* Is the page fully inside i_size? */
- if (page->index < end_index)
- return __block_write_full_page(inode, page, get_block, wbc,
+ /* Is the folio fully inside i_size? */
+ if (folio_pos(folio) + folio_size(folio) <= i_size)
+ return __block_write_full_folio(inode, folio, get_block, wbc,
end_buffer_async_write);
- /* Is the page fully outside i_size? (truncate in progress) */
- offset = i_size & (PAGE_SIZE-1);
- if (page->index >= end_index+1 || !offset) {
- unlock_page(page);
+ /* Is the folio fully outside i_size? (truncate in progress) */
+ if (folio_pos(folio) >= i_size) {
+ folio_unlock(folio);
return 0; /* don't care */
}
/*
- * The page straddles i_size. It must be zeroed out on each and every
+ * The folio straddles i_size. It must be zeroed out on each and every
* writepage invocation because it may be mmapped. "A file is mapped
* in multiples of the page size. For a file that is not a multiple of
- * the page size, the remaining memory is zeroed when mapped, and
+ * the page size, the remaining memory is zeroed when mapped, and
* writes to that region are not written out to the file."
*/
- zero_user_segment(page, offset, PAGE_SIZE);
- return __block_write_full_page(inode, page, get_block, wbc,
- end_buffer_async_write);
+ folio_zero_segment(folio, offset_in_folio(folio, i_size),
+ folio_size(folio));
+ return __block_write_full_folio(inode, folio, get_block, wbc,
+ end_buffer_async_write);
}
EXPORT_SYMBOL(block_write_full_page);
diff --git a/fs/ceph/file.c b/fs/ceph/file.c
index 4285f6cb5d3b..b1925232dc08 100644
--- a/fs/ceph/file.c
+++ b/fs/ceph/file.c
@@ -1854,9 +1854,6 @@ retry_snap:
else
ceph_start_io_write(inode);
- /* We can write back this queue in page reclaim */
- current->backing_dev_info = inode_to_bdi(inode);
-
if (iocb->ki_flags & IOCB_APPEND) {
err = ceph_do_getattr(inode, CEPH_STAT_CAP_SIZE, false);
if (err < 0)
@@ -1957,8 +1954,6 @@ retry_snap:
* can not run at the same time
*/
written = generic_perform_write(iocb, from);
- if (likely(written >= 0))
- iocb->ki_pos = pos + written;
ceph_end_io_write(inode);
}
@@ -2003,7 +1998,6 @@ out:
ceph_end_io_write(inode);
out_unlocked:
ceph_free_cap_flush(prealloc_cf);
- current->backing_dev_info = NULL;
return written ? written : err;
}
diff --git a/fs/direct-io.c b/fs/direct-io.c
index 2ceb378b93c0..7bc494ee56b9 100644
--- a/fs/direct-io.c
+++ b/fs/direct-io.c
@@ -297,14 +297,8 @@ static ssize_t dio_complete(struct dio *dio, ssize_t ret, unsigned int flags)
* zeros from unwritten extents.
*/
if (flags & DIO_COMPLETE_INVALIDATE &&
- ret > 0 && dio_op == REQ_OP_WRITE &&
- dio->inode->i_mapping->nrpages) {
- err = invalidate_inode_pages2_range(dio->inode->i_mapping,
- offset >> PAGE_SHIFT,
- (offset + ret - 1) >> PAGE_SHIFT);
- if (err)
- dio_warn_stale_pagecache(dio->iocb->ki_filp);
- }
+ ret > 0 && dio_op == REQ_OP_WRITE)
+ kiocb_invalidate_post_direct_write(dio->iocb, ret);
inode_dio_end(dio->inode);
diff --git a/fs/exec.c b/fs/exec.c
index a466e797c8e2..25c65b64544b 100644
--- a/fs/exec.c
+++ b/fs/exec.c
@@ -220,7 +220,7 @@ static struct page *get_arg_page(struct linux_binprm *bprm, unsigned long pos,
*/
mmap_read_lock(bprm->mm);
ret = get_user_pages_remote(bprm->mm, pos, 1, gup_flags,
- &page, NULL, NULL);
+ &page, NULL);
mmap_read_unlock(bprm->mm);
if (ret <= 0)
return NULL;
diff --git a/fs/ext4/file.c b/fs/ext4/file.c
index e8261900f4f3..6a16d07965f9 100644
--- a/fs/ext4/file.c
+++ b/fs/ext4/file.c
@@ -296,18 +296,13 @@ static ssize_t ext4_buffered_write_iter(struct kiocb *iocb,
if (ret <= 0)
goto out;
- current->backing_dev_info = inode_to_bdi(inode);
ret = generic_perform_write(iocb, from);
- current->backing_dev_info = NULL;
out:
inode_unlock(inode);
- if (likely(ret > 0)) {
- iocb->ki_pos += ret;
- ret = generic_write_sync(iocb, ret);
- }
-
- return ret;
+ if (unlikely(ret <= 0))
+ return ret;
+ return generic_write_sync(iocb, ret);
}
static ssize_t ext4_handle_inode_extension(struct inode *inode, loff_t offset,
diff --git a/fs/ext4/inode.c b/fs/ext4/inode.c
index 02de439bf1f0..9ca583360166 100644
--- a/fs/ext4/inode.c
+++ b/fs/ext4/inode.c
@@ -1093,7 +1093,7 @@ static int ext4_block_write_begin(struct folio *folio, loff_t pos, unsigned len,
err = -EIO;
}
if (unlikely(err)) {
- page_zero_new_buffers(&folio->page, from, to);
+ folio_zero_new_buffers(folio, from, to);
} else if (fscrypt_inode_uses_fs_layer_crypto(inode)) {
for (i = 0; i < nr_wait; i++) {
int err2;
@@ -1339,7 +1339,7 @@ static int ext4_write_end(struct file *file,
}
/*
- * This is a private version of page_zero_new_buffers() which doesn't
+ * This is a private version of folio_zero_new_buffers() which doesn't
* set the buffer to be dirty, since in data=journalled mode we need
* to call ext4_dirty_journalled_data() instead.
*/
diff --git a/fs/f2fs/file.c b/fs/f2fs/file.c
index 3fce122997ca..2435111a8532 100644
--- a/fs/f2fs/file.c
+++ b/fs/f2fs/file.c
@@ -4543,12 +4543,9 @@ static ssize_t f2fs_buffered_write_iter(struct kiocb *iocb,
if (iocb->ki_flags & IOCB_NOWAIT)
return -EOPNOTSUPP;
- current->backing_dev_info = inode_to_bdi(inode);
ret = generic_perform_write(iocb, from);
- current->backing_dev_info = NULL;
if (ret > 0) {
- iocb->ki_pos += ret;
f2fs_update_iostat(F2FS_I_SB(inode), inode,
APP_BUFFERED_IO, ret);
}
diff --git a/fs/fs-writeback.c b/fs/fs-writeback.c
index ae4e51e91ee3..aca4b4811394 100644
--- a/fs/fs-writeback.c
+++ b/fs/fs-writeback.c
@@ -2024,7 +2024,6 @@ static long wb_writeback(struct bdi_writeback *wb,
struct blk_plug plug;
blk_start_plug(&plug);
- spin_lock(&wb->list_lock);
for (;;) {
/*
* Stop writeback when nr_pages has been consumed
@@ -2049,6 +2048,9 @@ static long wb_writeback(struct bdi_writeback *wb,
if (work->for_background && !wb_over_bg_thresh(wb))
break;
+
+ spin_lock(&wb->list_lock);
+
/*
* Kupdate and background works are special and we want to
* include all inodes that need writing. Livelock avoidance is
@@ -2078,13 +2080,19 @@ static long wb_writeback(struct bdi_writeback *wb,
* mean the overall work is done. So we keep looping as long
* as made some progress on cleaning pages or inodes.
*/
- if (progress)
+ if (progress) {
+ spin_unlock(&wb->list_lock);
continue;
+ }
+
/*
* No more inodes for IO, bail
*/
- if (list_empty(&wb->b_more_io))
+ if (list_empty(&wb->b_more_io)) {
+ spin_unlock(&wb->list_lock);
break;
+ }
+
/*
* Nothing written. Wait for some inode to
* become available for writeback. Otherwise
@@ -2096,9 +2104,7 @@ static long wb_writeback(struct bdi_writeback *wb,
spin_unlock(&wb->list_lock);
/* This function drops i_lock... */
inode_sleep_on_writeback(inode);
- spin_lock(&wb->list_lock);
}
- spin_unlock(&wb->list_lock);
blk_finish_plug(&plug);
return nr_pages - work->nr_pages;
diff --git a/fs/fuse/file.c b/fs/fuse/file.c
index 4553124f5406..bc4115288eec 100644
--- a/fs/fuse/file.c
+++ b/fs/fuse/file.c
@@ -1280,13 +1280,13 @@ static inline unsigned int fuse_wr_pages(loff_t pos, size_t len,
max_pages);
}
-static ssize_t fuse_perform_write(struct kiocb *iocb,
- struct address_space *mapping,
- struct iov_iter *ii, loff_t pos)
+static ssize_t fuse_perform_write(struct kiocb *iocb, struct iov_iter *ii)
{
+ struct address_space *mapping = iocb->ki_filp->f_mapping;
struct inode *inode = mapping->host;
struct fuse_conn *fc = get_fuse_conn(inode);
struct fuse_inode *fi = get_fuse_inode(inode);
+ loff_t pos = iocb->ki_pos;
int err = 0;
ssize_t res = 0;
@@ -1329,7 +1329,10 @@ static ssize_t fuse_perform_write(struct kiocb *iocb,
fuse_write_update_attr(inode, pos, res);
clear_bit(FUSE_I_SIZE_UNSTABLE, &fi->state);
- return res > 0 ? res : err;
+ if (!res)
+ return err;
+ iocb->ki_pos += res;
+ return res;
}
static ssize_t fuse_cache_write_iter(struct kiocb *iocb, struct iov_iter *from)
@@ -1337,11 +1340,9 @@ static ssize_t fuse_cache_write_iter(struct kiocb *iocb, struct iov_iter *from)
struct file *file = iocb->ki_filp;
struct address_space *mapping = file->f_mapping;
ssize_t written = 0;
- ssize_t written_buffered = 0;
struct inode *inode = mapping->host;
ssize_t err;
struct fuse_conn *fc = get_fuse_conn(inode);
- loff_t endbyte = 0;
if (fc->writeback_cache) {
/* Update size (EOF optimization) and mode (SUID clearing) */
@@ -1362,9 +1363,6 @@ static ssize_t fuse_cache_write_iter(struct kiocb *iocb, struct iov_iter *from)
writethrough:
inode_lock(inode);
- /* We can write back this queue in page reclaim */
- current->backing_dev_info = inode_to_bdi(inode);
-
err = generic_write_checks(iocb, from);
if (err <= 0)
goto out;
@@ -1378,38 +1376,15 @@ writethrough:
goto out;
if (iocb->ki_flags & IOCB_DIRECT) {
- loff_t pos = iocb->ki_pos;
written = generic_file_direct_write(iocb, from);
if (written < 0 || !iov_iter_count(from))
goto out;
-
- pos += written;
-
- written_buffered = fuse_perform_write(iocb, mapping, from, pos);
- if (written_buffered < 0) {
- err = written_buffered;
- goto out;
- }
- endbyte = pos + written_buffered - 1;
-
- err = filemap_write_and_wait_range(file->f_mapping, pos,
- endbyte);
- if (err)
- goto out;
-
- invalidate_mapping_pages(file->f_mapping,
- pos >> PAGE_SHIFT,
- endbyte >> PAGE_SHIFT);
-
- written += written_buffered;
- iocb->ki_pos = pos + written_buffered;
+ written = direct_write_fallback(iocb, from, written,
+ fuse_perform_write(iocb, from));
} else {
- written = fuse_perform_write(iocb, mapping, from, iocb->ki_pos);
- if (written >= 0)
- iocb->ki_pos += written;
+ written = fuse_perform_write(iocb, from);
}
out:
- current->backing_dev_info = NULL;
inode_unlock(inode);
if (written > 0)
written = generic_write_sync(iocb, written);
diff --git a/fs/gfs2/aops.c b/fs/gfs2/aops.c
index a5f4be6b9213..1c407eba1e30 100644
--- a/fs/gfs2/aops.c
+++ b/fs/gfs2/aops.c
@@ -38,13 +38,13 @@
void gfs2_trans_add_databufs(struct gfs2_inode *ip, struct folio *folio,
- unsigned int from, unsigned int len)
+ size_t from, size_t len)
{
struct buffer_head *head = folio_buffers(folio);
unsigned int bsize = head->b_size;
struct buffer_head *bh;
- unsigned int to = from + len;
- unsigned int start, end;
+ size_t to = from + len;
+ size_t start, end;
for (bh = head, start = 0; bh != head || !start;
bh = bh->b_this_page, start = end) {
@@ -82,61 +82,61 @@ static int gfs2_get_block_noalloc(struct inode *inode, sector_t lblock,
}
/**
- * gfs2_write_jdata_page - gfs2 jdata-specific version of block_write_full_page
- * @page: The page to write
+ * gfs2_write_jdata_folio - gfs2 jdata-specific version of block_write_full_page
+ * @folio: The folio to write
* @wbc: The writeback control
*
* This is the same as calling block_write_full_page, but it also
* writes pages outside of i_size
*/
-static int gfs2_write_jdata_page(struct page *page,
+static int gfs2_write_jdata_folio(struct folio *folio,
struct writeback_control *wbc)
{
- struct inode * const inode = page->mapping->host;
+ struct inode * const inode = folio->mapping->host;
loff_t i_size = i_size_read(inode);
- const pgoff_t end_index = i_size >> PAGE_SHIFT;
- unsigned offset;
/*
- * The page straddles i_size. It must be zeroed out on each and every
+ * The folio straddles i_size. It must be zeroed out on each and every
* writepage invocation because it may be mmapped. "A file is mapped
* in multiples of the page size. For a file that is not a multiple of
- * the page size, the remaining memory is zeroed when mapped, and
+ * the page size, the remaining memory is zeroed when mapped, and
* writes to that region are not written out to the file."
*/
- offset = i_size & (PAGE_SIZE - 1);
- if (page->index == end_index && offset)
- zero_user_segment(page, offset, PAGE_SIZE);
+ if (folio_pos(folio) < i_size &&
+ i_size < folio_pos(folio) + folio_size(folio))
+ folio_zero_segment(folio, offset_in_folio(folio, i_size),
+ folio_size(folio));
- return __block_write_full_page(inode, page, gfs2_get_block_noalloc, wbc,
- end_buffer_async_write);
+ return __block_write_full_folio(inode, folio, gfs2_get_block_noalloc,
+ wbc, end_buffer_async_write);
}
/**
- * __gfs2_jdata_writepage - The core of jdata writepage
- * @page: The page to write
+ * __gfs2_jdata_write_folio - The core of jdata writepage
+ * @folio: The folio to write
* @wbc: The writeback control
*
* This is shared between writepage and writepages and implements the
* core of the writepage operation. If a transaction is required then
- * PageChecked will have been set and the transaction will have
+ * the checked flag will have been set and the transaction will have
* already been started before this is called.
*/
-
-static int __gfs2_jdata_writepage(struct page *page, struct writeback_control *wbc)
+static int __gfs2_jdata_write_folio(struct folio *folio,
+ struct writeback_control *wbc)
{
- struct inode *inode = page->mapping->host;
+ struct inode *inode = folio->mapping->host;
struct gfs2_inode *ip = GFS2_I(inode);
- if (PageChecked(page)) {
- ClearPageChecked(page);
- if (!page_has_buffers(page)) {
- create_empty_buffers(page, inode->i_sb->s_blocksize,
- BIT(BH_Dirty)|BIT(BH_Uptodate));
+ if (folio_test_checked(folio)) {
+ folio_clear_checked(folio);
+ if (!folio_buffers(folio)) {
+ folio_create_empty_buffers(folio,
+ inode->i_sb->s_blocksize,
+ BIT(BH_Dirty)|BIT(BH_Uptodate));
}
- gfs2_trans_add_databufs(ip, page_folio(page), 0, PAGE_SIZE);
+ gfs2_trans_add_databufs(ip, folio, 0, folio_size(folio));
}
- return gfs2_write_jdata_page(page, wbc);
+ return gfs2_write_jdata_folio(folio, wbc);
}
/**
@@ -150,20 +150,21 @@ static int __gfs2_jdata_writepage(struct page *page, struct writeback_control *w
static int gfs2_jdata_writepage(struct page *page, struct writeback_control *wbc)
{
+ struct folio *folio = page_folio(page);
struct inode *inode = page->mapping->host;
struct gfs2_inode *ip = GFS2_I(inode);
struct gfs2_sbd *sdp = GFS2_SB(inode);
if (gfs2_assert_withdraw(sdp, gfs2_glock_is_held_excl(ip->i_gl)))
goto out;
- if (PageChecked(page) || current->journal_info)
+ if (folio_test_checked(folio) || current->journal_info)
goto out_ignore;
- return __gfs2_jdata_writepage(page, wbc);
+ return __gfs2_jdata_write_folio(folio, wbc);
out_ignore:
- redirty_page_for_writepage(wbc, page);
+ folio_redirty_for_writepage(wbc, folio);
out:
- unlock_page(page);
+ folio_unlock(folio);
return 0;
}
@@ -255,7 +256,7 @@ continue_unlock:
trace_wbc_writepage(wbc, inode_to_bdi(inode));
- ret = __gfs2_jdata_writepage(&folio->page, wbc);
+ ret = __gfs2_jdata_write_folio(folio, wbc);
if (unlikely(ret)) {
if (ret == AOP_WRITEPAGE_ACTIVATE) {
folio_unlock(folio);
diff --git a/fs/gfs2/aops.h b/fs/gfs2/aops.h
index 09db1914425e..f08322ef41cf 100644
--- a/fs/gfs2/aops.h
+++ b/fs/gfs2/aops.h
@@ -10,6 +10,6 @@
extern void adjust_fs_space(struct inode *inode);
extern void gfs2_trans_add_databufs(struct gfs2_inode *ip, struct folio *folio,
- unsigned int from, unsigned int len);
+ size_t from, size_t len);
#endif /* __AOPS_DOT_H__ */
diff --git a/fs/gfs2/file.c b/fs/gfs2/file.c
index 1d679a3178ff..f146447eac63 100644
--- a/fs/gfs2/file.c
+++ b/fs/gfs2/file.c
@@ -1052,15 +1052,11 @@ retry:
goto out_unlock;
}
- current->backing_dev_info = inode_to_bdi(inode);
pagefault_disable();
ret = iomap_file_buffered_write(iocb, from, &gfs2_iomap_ops);
pagefault_enable();
- current->backing_dev_info = NULL;
- if (ret > 0) {
- iocb->ki_pos += ret;
+ if (ret > 0)
written += ret;
- }
if (inode == sdp->sd_rindex)
gfs2_glock_dq_uninit(statfs_gh);
diff --git a/fs/hugetlbfs/inode.c b/fs/hugetlbfs/inode.c
index ecfdfb2529a3..7b17ccfa039d 100644
--- a/fs/hugetlbfs/inode.c
+++ b/fs/hugetlbfs/inode.c
@@ -821,7 +821,6 @@ static long hugetlbfs_fallocate(struct file *file, int mode, loff_t offset,
*/
struct folio *folio;
unsigned long addr;
- bool present;
cond_resched();
@@ -834,9 +833,6 @@ static long hugetlbfs_fallocate(struct file *file, int mode, loff_t offset,
break;
}
- /* Set numa allocation policy based on index */
- hugetlb_set_vma_policy(&pseudo_vma, inode, index);
-
/* addr is the offset within the file (zero based) */
addr = index * hpage_size;
@@ -845,12 +841,10 @@ static long hugetlbfs_fallocate(struct file *file, int mode, loff_t offset,
mutex_lock(&hugetlb_fault_mutex_table[hash]);
/* See if already present in mapping to avoid alloc/free */
- rcu_read_lock();
- present = page_cache_next_miss(mapping, index, 1) != index;
- rcu_read_unlock();
- if (present) {
+ folio = filemap_get_folio(mapping, index);
+ if (!IS_ERR(folio)) {
+ folio_put(folio);
mutex_unlock(&hugetlb_fault_mutex_table[hash]);
- hugetlb_drop_vma_policy(&pseudo_vma);
continue;
}
@@ -862,6 +856,7 @@ static long hugetlbfs_fallocate(struct file *file, int mode, loff_t offset,
* folios in these areas, we need to consume the reserves
* to keep reservation accounting consistent.
*/
+ hugetlb_set_vma_policy(&pseudo_vma, inode, index);
folio = alloc_hugetlb_folio(&pseudo_vma, addr, 0);
hugetlb_drop_vma_policy(&pseudo_vma);
if (IS_ERR(folio)) {
diff --git a/fs/iomap/buffered-io.c b/fs/iomap/buffered-io.c
index 0edab9deae2a..a4fa81af60d9 100644
--- a/fs/iomap/buffered-io.c
+++ b/fs/iomap/buffered-io.c
@@ -864,16 +864,19 @@ iomap_file_buffered_write(struct kiocb *iocb, struct iov_iter *i,
.len = iov_iter_count(i),
.flags = IOMAP_WRITE,
};
- int ret;
+ ssize_t ret;
if (iocb->ki_flags & IOCB_NOWAIT)
iter.flags |= IOMAP_NOWAIT;
while ((ret = iomap_iter(&iter, ops)) > 0)
iter.processed = iomap_write_iter(&iter, i);
- if (iter.pos == iocb->ki_pos)
+
+ if (unlikely(ret < 0))
return ret;
- return iter.pos - iocb->ki_pos;
+ ret = iter.pos - iocb->ki_pos;
+ iocb->ki_pos += ret;
+ return ret;
}
EXPORT_SYMBOL_GPL(iomap_file_buffered_write);
diff --git a/fs/iomap/direct-io.c b/fs/iomap/direct-io.c
index 08873f0627dd..ea3b868c8355 100644
--- a/fs/iomap/direct-io.c
+++ b/fs/iomap/direct-io.c
@@ -81,7 +81,6 @@ ssize_t iomap_dio_complete(struct iomap_dio *dio)
{
const struct iomap_dio_ops *dops = dio->dops;
struct kiocb *iocb = dio->iocb;
- struct inode *inode = file_inode(iocb->ki_filp);
loff_t offset = iocb->ki_pos;
ssize_t ret = dio->error;
@@ -94,7 +93,6 @@ ssize_t iomap_dio_complete(struct iomap_dio *dio)
if (offset + ret > dio->i_size &&
!(dio->flags & IOMAP_DIO_WRITE))
ret = dio->i_size - offset;
- iocb->ki_pos += ret;
}
/*
@@ -109,30 +107,25 @@ ssize_t iomap_dio_complete(struct iomap_dio *dio)
* ->end_io() when necessary, otherwise a racing buffer read would cache
* zeros from unwritten extents.
*/
- if (!dio->error && dio->size &&
- (dio->flags & IOMAP_DIO_WRITE) && inode->i_mapping->nrpages) {
- int err;
- err = invalidate_inode_pages2_range(inode->i_mapping,
- offset >> PAGE_SHIFT,
- (offset + dio->size - 1) >> PAGE_SHIFT);
- if (err)
- dio_warn_stale_pagecache(iocb->ki_filp);
- }
+ if (!dio->error && dio->size && (dio->flags & IOMAP_DIO_WRITE))
+ kiocb_invalidate_post_direct_write(iocb, dio->size);
inode_dio_end(file_inode(iocb->ki_filp));
- /*
- * If this is a DSYNC write, make sure we push it to stable storage now
- * that we've written data.
- */
- if (ret > 0 && (dio->flags & IOMAP_DIO_NEED_SYNC))
- ret = generic_write_sync(iocb, ret);
- if (ret > 0)
- ret += dio->done_before;
+ if (ret > 0) {
+ iocb->ki_pos += ret;
+ /*
+ * If this is a DSYNC write, make sure we push it to stable
+ * storage now that we've written data.
+ */
+ if (dio->flags & IOMAP_DIO_NEED_SYNC)
+ ret = generic_write_sync(iocb, ret);
+ if (ret > 0)
+ ret += dio->done_before;
+ }
trace_iomap_dio_complete(iocb, dio->error, ret);
kfree(dio);
-
return ret;
}
EXPORT_SYMBOL_GPL(iomap_dio_complete);
@@ -478,7 +471,6 @@ __iomap_dio_rw(struct kiocb *iocb, struct iov_iter *iter,
const struct iomap_ops *ops, const struct iomap_dio_ops *dops,
unsigned int dio_flags, void *private, size_t done_before)
{
- struct address_space *mapping = iocb->ki_filp->f_mapping;
struct inode *inode = file_inode(iocb->ki_filp);
struct iomap_iter iomi = {
.inode = inode,
@@ -487,11 +479,11 @@ __iomap_dio_rw(struct kiocb *iocb, struct iov_iter *iter,
.flags = IOMAP_DIRECT,
.private = private,
};
- loff_t end = iomi.pos + iomi.len - 1, ret = 0;
bool wait_for_completion =
is_sync_kiocb(iocb) || (dio_flags & IOMAP_DIO_FORCE_WAIT);
struct blk_plug plug;
struct iomap_dio *dio;
+ loff_t ret = 0;
trace_iomap_dio_rw_begin(iocb, iter, dio_flags, done_before);
@@ -515,31 +507,29 @@ __iomap_dio_rw(struct kiocb *iocb, struct iov_iter *iter,
dio->submit.waiter = current;
dio->submit.poll_bio = NULL;
+ if (iocb->ki_flags & IOCB_NOWAIT)
+ iomi.flags |= IOMAP_NOWAIT;
+
if (iov_iter_rw(iter) == READ) {
if (iomi.pos >= dio->i_size)
goto out_free_dio;
- if (iocb->ki_flags & IOCB_NOWAIT) {
- if (filemap_range_needs_writeback(mapping, iomi.pos,
- end)) {
- ret = -EAGAIN;
- goto out_free_dio;
- }
- iomi.flags |= IOMAP_NOWAIT;
- }
-
if (user_backed_iter(iter))
dio->flags |= IOMAP_DIO_DIRTY;
+
+ ret = kiocb_write_and_wait(iocb, iomi.len);
+ if (ret)
+ goto out_free_dio;
} else {
iomi.flags |= IOMAP_WRITE;
dio->flags |= IOMAP_DIO_WRITE;
- if (iocb->ki_flags & IOCB_NOWAIT) {
- if (filemap_range_has_page(mapping, iomi.pos, end)) {
- ret = -EAGAIN;
+ if (dio_flags & IOMAP_DIO_OVERWRITE_ONLY) {
+ ret = -EAGAIN;
+ if (iomi.pos >= dio->i_size ||
+ iomi.pos + iomi.len > dio->i_size)
goto out_free_dio;
- }
- iomi.flags |= IOMAP_NOWAIT;
+ iomi.flags |= IOMAP_OVERWRITE_ONLY;
}
/* for data sync or sync, we need sync completion processing */
@@ -555,31 +545,19 @@ __iomap_dio_rw(struct kiocb *iocb, struct iov_iter *iter,
if (!(iocb->ki_flags & IOCB_SYNC))
dio->flags |= IOMAP_DIO_WRITE_FUA;
}
- }
- if (dio_flags & IOMAP_DIO_OVERWRITE_ONLY) {
- ret = -EAGAIN;
- if (iomi.pos >= dio->i_size ||
- iomi.pos + iomi.len > dio->i_size)
- goto out_free_dio;
- iomi.flags |= IOMAP_OVERWRITE_ONLY;
- }
-
- ret = filemap_write_and_wait_range(mapping, iomi.pos, end);
- if (ret)
- goto out_free_dio;
-
- if (iov_iter_rw(iter) == WRITE) {
/*
* Try to invalidate cache pages for the range we are writing.
* If this invalidation fails, let the caller fall back to
* buffered I/O.
*/
- if (invalidate_inode_pages2_range(mapping,
- iomi.pos >> PAGE_SHIFT, end >> PAGE_SHIFT)) {
- trace_iomap_dio_invalidate_fail(inode, iomi.pos,
- iomi.len);
- ret = -ENOTBLK;
+ ret = kiocb_invalidate_pages(iocb, iomi.len);
+ if (ret) {
+ if (ret != -EAGAIN) {
+ trace_iomap_dio_invalidate_fail(inode, iomi.pos,
+ iomi.len);
+ ret = -ENOTBLK;
+ }
goto out_free_dio;
}
diff --git a/fs/libfs.c b/fs/libfs.c
index 89cf614a3271..5b851315eeed 100644
--- a/fs/libfs.c
+++ b/fs/libfs.c
@@ -1613,3 +1613,44 @@ u64 inode_query_iversion(struct inode *inode)
return cur >> I_VERSION_QUERIED_SHIFT;
}
EXPORT_SYMBOL(inode_query_iversion);
+
+ssize_t direct_write_fallback(struct kiocb *iocb, struct iov_iter *iter,
+ ssize_t direct_written, ssize_t buffered_written)
+{
+ struct address_space *mapping = iocb->ki_filp->f_mapping;
+ loff_t pos = iocb->ki_pos - buffered_written;
+ loff_t end = iocb->ki_pos - 1;
+ int err;
+
+ /*
+ * If the buffered write fallback returned an error, we want to return
+ * the number of bytes which were written by direct I/O, or the error
+ * code if that was zero.
+ *
+ * Note that this differs from normal direct-io semantics, which will
+ * return -EFOO even if some bytes were written.
+ */
+ if (unlikely(buffered_written < 0)) {
+ if (direct_written)
+ return direct_written;
+ return buffered_written;
+ }
+
+ /*
+ * We need to ensure that the page cache pages are written to disk and
+ * invalidated to preserve the expected O_DIRECT semantics.
+ */
+ err = filemap_write_and_wait_range(mapping, pos, end);
+ if (err < 0) {
+ /*
+ * We don't know how much we wrote, so just return the number of
+ * bytes which were direct-written
+ */
+ if (direct_written)
+ return direct_written;
+ return err;
+ }
+ invalidate_mapping_pages(mapping, pos >> PAGE_SHIFT, end >> PAGE_SHIFT);
+ return direct_written + buffered_written;
+}
+EXPORT_SYMBOL_GPL(direct_write_fallback);
diff --git a/fs/nfs/file.c b/fs/nfs/file.c
index 3855f3ce8d2d..79b1b3fcd3fc 100644
--- a/fs/nfs/file.c
+++ b/fs/nfs/file.c
@@ -669,17 +669,13 @@ ssize_t nfs_file_write(struct kiocb *iocb, struct iov_iter *from)
since = filemap_sample_wb_err(file->f_mapping);
nfs_start_io_write(inode);
result = generic_write_checks(iocb, from);
- if (result > 0) {
- current->backing_dev_info = inode_to_bdi(inode);
+ if (result > 0)
result = generic_perform_write(iocb, from);
- current->backing_dev_info = NULL;
- }
nfs_end_io_write(inode);
if (result <= 0)
goto out;
written = result;
- iocb->ki_pos += written;
nfs_add_stats(inode, NFSIOS_NORMALWRITTENBYTES, written);
if (mntflags & NFS_MOUNT_WRITE_EAGER) {
diff --git a/fs/ntfs/aops.c b/fs/ntfs/aops.c
index e8aeba124a95..4e158bce4192 100644
--- a/fs/ntfs/aops.c
+++ b/fs/ntfs/aops.c
@@ -526,7 +526,7 @@ err_out:
*
* Return 0 on success and -errno on error.
*
- * Based on ntfs_read_block() and __block_write_full_page().
+ * Based on ntfs_read_block() and __block_write_full_folio().
*/
static int ntfs_write_block(struct page *page, struct writeback_control *wbc)
{
diff --git a/fs/ntfs/file.c b/fs/ntfs/file.c
index e5e0ed58670b..cbc545999cfe 100644
--- a/fs/ntfs/file.c
+++ b/fs/ntfs/file.c
@@ -1911,11 +1911,9 @@ static ssize_t ntfs_file_write_iter(struct kiocb *iocb, struct iov_iter *from)
inode_lock(vi);
/* We can write back this queue in page reclaim. */
- current->backing_dev_info = inode_to_bdi(vi);
err = ntfs_prepare_file_for_write(iocb, from);
if (iov_iter_count(from) && !err)
written = ntfs_perform_write(file, from, iocb->ki_pos);
- current->backing_dev_info = NULL;
inode_unlock(vi);
iocb->ki_pos += written;
if (likely(written > 0))
diff --git a/fs/ntfs3/file.c b/fs/ntfs3/file.c
index 036efd85f60c..9be3e8edf4f3 100644
--- a/fs/ntfs3/file.c
+++ b/fs/ntfs3/file.c
@@ -849,7 +849,6 @@ static ssize_t ntfs_compress_write(struct kiocb *iocb, struct iov_iter *from)
if (!pages)
return -ENOMEM;
- current->backing_dev_info = inode_to_bdi(inode);
err = file_remove_privs(file);
if (err)
goto out;
@@ -1022,8 +1021,6 @@ static ssize_t ntfs_compress_write(struct kiocb *iocb, struct iov_iter *from)
out:
kfree(pages);
- current->backing_dev_info = NULL;
-
if (err < 0)
return err;
diff --git a/fs/proc/task_mmu.c b/fs/proc/task_mmu.c
index 420510f6a545..507cd4e59d07 100644
--- a/fs/proc/task_mmu.c
+++ b/fs/proc/task_mmu.c
@@ -538,13 +538,14 @@ static void smaps_pte_entry(pte_t *pte, unsigned long addr,
bool locked = !!(vma->vm_flags & VM_LOCKED);
struct page *page = NULL;
bool migration = false, young = false, dirty = false;
+ pte_t ptent = ptep_get(pte);
- if (pte_present(*pte)) {
- page = vm_normal_page(vma, addr, *pte);
- young = pte_young(*pte);
- dirty = pte_dirty(*pte);
- } else if (is_swap_pte(*pte)) {
- swp_entry_t swpent = pte_to_swp_entry(*pte);
+ if (pte_present(ptent)) {
+ page = vm_normal_page(vma, addr, ptent);
+ young = pte_young(ptent);
+ dirty = pte_dirty(ptent);
+ } else if (is_swap_pte(ptent)) {
+ swp_entry_t swpent = pte_to_swp_entry(ptent);
if (!non_swap_entry(swpent)) {
int mapcount;
@@ -631,14 +632,11 @@ static int smaps_pte_range(pmd_t *pmd, unsigned long addr, unsigned long end,
goto out;
}
- if (pmd_trans_unstable(pmd))
- goto out;
- /*
- * The mmap_lock held all the way back in m_start() is what
- * keeps khugepaged out of here and from collapsing things
- * in here.
- */
pte = pte_offset_map_lock(vma->vm_mm, pmd, addr, &ptl);
+ if (!pte) {
+ walk->action = ACTION_AGAIN;
+ return 0;
+ }
for (; addr != end; pte++, addr += PAGE_SIZE)
smaps_pte_entry(pte, addr, walk);
pte_unmap_unlock(pte - 1, ptl);
@@ -735,11 +733,12 @@ static int smaps_hugetlb_range(pte_t *pte, unsigned long hmask,
struct mem_size_stats *mss = walk->private;
struct vm_area_struct *vma = walk->vma;
struct page *page = NULL;
+ pte_t ptent = ptep_get(pte);
- if (pte_present(*pte)) {
- page = vm_normal_page(vma, addr, *pte);
- } else if (is_swap_pte(*pte)) {
- swp_entry_t swpent = pte_to_swp_entry(*pte);
+ if (pte_present(ptent)) {
+ page = vm_normal_page(vma, addr, ptent);
+ } else if (is_swap_pte(ptent)) {
+ swp_entry_t swpent = pte_to_swp_entry(ptent);
if (is_pfn_swap_entry(swpent))
page = pfn_swap_entry_to_page(swpent);
@@ -1108,7 +1107,7 @@ static inline void clear_soft_dirty(struct vm_area_struct *vma,
* Documentation/admin-guide/mm/soft-dirty.rst for full description
* of how soft-dirty works.
*/
- pte_t ptent = *pte;
+ pte_t ptent = ptep_get(pte);
if (pte_present(ptent)) {
pte_t old_pte;
@@ -1191,12 +1190,13 @@ out:
return 0;
}
- if (pmd_trans_unstable(pmd))
- return 0;
-
pte = pte_offset_map_lock(vma->vm_mm, pmd, addr, &ptl);
+ if (!pte) {
+ walk->action = ACTION_AGAIN;
+ return 0;
+ }
for (; addr != end; pte++, addr += PAGE_SIZE) {
- ptent = *pte;
+ ptent = ptep_get(pte);
if (cp->type == CLEAR_REFS_SOFT_DIRTY) {
clear_soft_dirty(vma, addr, pte);
@@ -1538,9 +1538,6 @@ static int pagemap_pmd_range(pmd_t *pmdp, unsigned long addr, unsigned long end,
spin_unlock(ptl);
return err;
}
-
- if (pmd_trans_unstable(pmdp))
- return 0;
#endif /* CONFIG_TRANSPARENT_HUGEPAGE */
/*
@@ -1548,10 +1545,14 @@ static int pagemap_pmd_range(pmd_t *pmdp, unsigned long addr, unsigned long end,
* goes beyond vma->vm_end.
*/
orig_pte = pte = pte_offset_map_lock(walk->mm, pmdp, addr, &ptl);
+ if (!pte) {
+ walk->action = ACTION_AGAIN;
+ return err;
+ }
for (; addr < end; pte++, addr += PAGE_SIZE) {
pagemap_entry_t pme;
- pme = pte_to_pagemap_entry(pm, vma, addr, *pte);
+ pme = pte_to_pagemap_entry(pm, vma, addr, ptep_get(pte));
err = add_to_pagemap(addr, &pme, pm);
if (err)
break;
@@ -1689,23 +1690,23 @@ static ssize_t pagemap_read(struct file *file, char __user *buf,
/* watch out for wraparound */
start_vaddr = end_vaddr;
if (svpfn <= (ULONG_MAX >> PAGE_SHIFT)) {
+ unsigned long end;
+
ret = mmap_read_lock_killable(mm);
if (ret)
goto out_free;
start_vaddr = untagged_addr_remote(mm, svpfn << PAGE_SHIFT);
mmap_read_unlock(mm);
+
+ end = start_vaddr + ((count / PM_ENTRY_BYTES) << PAGE_SHIFT);
+ if (end >= start_vaddr && end < mm->task_size)
+ end_vaddr = end;
}
/* Ensure the address is inside the task */
if (start_vaddr > mm->task_size)
start_vaddr = end_vaddr;
- /*
- * The odds are that this will stop walking way
- * before end_vaddr, because the length of the
- * user buffer is tracked in "pm", and the walk
- * will stop when we hit the end of the buffer.
- */
ret = 0;
while (count && (start_vaddr < end_vaddr)) {
int len;
@@ -1887,16 +1888,18 @@ static int gather_pte_stats(pmd_t *pmd, unsigned long addr,
spin_unlock(ptl);
return 0;
}
-
- if (pmd_trans_unstable(pmd))
- return 0;
#endif
orig_pte = pte = pte_offset_map_lock(walk->mm, pmd, addr, &ptl);
+ if (!pte) {
+ walk->action = ACTION_AGAIN;
+ return 0;
+ }
do {
- struct page *page = can_gather_numa_stats(*pte, vma, addr);
+ pte_t ptent = ptep_get(pte);
+ struct page *page = can_gather_numa_stats(ptent, vma, addr);
if (!page)
continue;
- gather_stats(page, md, pte_dirty(*pte), 1);
+ gather_stats(page, md, pte_dirty(ptent), 1);
} while (pte++, addr += PAGE_SIZE, addr != end);
pte_unmap_unlock(orig_pte, ptl);
diff --git a/fs/proc/task_nommu.c b/fs/proc/task_nommu.c
index 0ec35072a8e5..2c8b62265981 100644
--- a/fs/proc/task_nommu.c
+++ b/fs/proc/task_nommu.c
@@ -51,7 +51,7 @@ void task_mem(struct seq_file *m, struct mm_struct *mm)
sbytes += kobjsize(mm);
else
bytes += kobjsize(mm);
-
+
if (current->fs && current->fs->users > 1)
sbytes += kobjsize(current->fs);
else
@@ -69,13 +69,13 @@ void task_mem(struct seq_file *m, struct mm_struct *mm)
bytes += kobjsize(current); /* includes kernel stack */
+ mmap_read_unlock(mm);
+
seq_printf(m,
"Mem:\t%8lu bytes\n"
"Slack:\t%8lu bytes\n"
"Shared:\t%8lu bytes\n",
bytes, slack, sbytes);
-
- mmap_read_unlock(mm);
}
unsigned long task_vsize(struct mm_struct *mm)
diff --git a/fs/ramfs/inode.c b/fs/ramfs/inode.c
index 5ba580c78835..fef477c78107 100644
--- a/fs/ramfs/inode.c
+++ b/fs/ramfs/inode.c
@@ -278,7 +278,7 @@ int ramfs_init_fs_context(struct fs_context *fc)
return 0;
}
-static void ramfs_kill_sb(struct super_block *sb)
+void ramfs_kill_sb(struct super_block *sb)
{
kfree(sb->s_fs_info);
kill_litter_super(sb);
diff --git a/fs/reiserfs/inode.c b/fs/reiserfs/inode.c
index d8debbb6105f..77bd3b27059f 100644
--- a/fs/reiserfs/inode.c
+++ b/fs/reiserfs/inode.c
@@ -2506,7 +2506,7 @@ out:
/*
* mason@suse.com: updated in 2.5.54 to follow the same general io
- * start/recovery path as __block_write_full_page, along with special
+ * start/recovery path as __block_write_full_folio, along with special
* code to handle reiserfs tails.
*/
static int reiserfs_write_full_page(struct page *page,
@@ -2872,6 +2872,7 @@ static int reiserfs_write_end(struct file *file, struct address_space *mapping,
loff_t pos, unsigned len, unsigned copied,
struct page *page, void *fsdata)
{
+ struct folio *folio = page_folio(page);
struct inode *inode = page->mapping->host;
int ret = 0;
int update_sd = 0;
@@ -2887,12 +2888,12 @@ static int reiserfs_write_end(struct file *file, struct address_space *mapping,
start = pos & (PAGE_SIZE - 1);
if (unlikely(copied < len)) {
- if (!PageUptodate(page))
+ if (!folio_test_uptodate(folio))
copied = 0;
- page_zero_new_buffers(page, start + copied, start + len);
+ folio_zero_new_buffers(folio, start + copied, start + len);
}
- flush_dcache_page(page);
+ flush_dcache_folio(folio);
reiserfs_commit_page(inode, page, start, start + copied);
diff --git a/fs/userfaultfd.c b/fs/userfaultfd.c
index 4e800bb7d2ab..7cecd49e078b 100644
--- a/fs/userfaultfd.c
+++ b/fs/userfaultfd.c
@@ -335,6 +335,7 @@ static inline bool userfaultfd_must_wait(struct userfaultfd_ctx *ctx,
pud_t *pud;
pmd_t *pmd, _pmd;
pte_t *pte;
+ pte_t ptent;
bool ret = true;
mmap_assert_locked(mm);
@@ -349,20 +350,13 @@ static inline bool userfaultfd_must_wait(struct userfaultfd_ctx *ctx,
if (!pud_present(*pud))
goto out;
pmd = pmd_offset(pud, address);
- /*
- * READ_ONCE must function as a barrier with narrower scope
- * and it must be equivalent to:
- * _pmd = *pmd; barrier();
- *
- * This is to deal with the instability (as in
- * pmd_trans_unstable) of the pmd.
- */
- _pmd = READ_ONCE(*pmd);
+again:
+ _pmd = pmdp_get_lockless(pmd);
if (pmd_none(_pmd))
goto out;
ret = false;
- if (!pmd_present(_pmd))
+ if (!pmd_present(_pmd) || pmd_devmap(_pmd))
goto out;
if (pmd_trans_huge(_pmd)) {
@@ -371,19 +365,20 @@ static inline bool userfaultfd_must_wait(struct userfaultfd_ctx *ctx,
goto out;
}
- /*
- * the pmd is stable (as in !pmd_trans_unstable) so we can re-read it
- * and use the standard pte_offset_map() instead of parsing _pmd.
- */
pte = pte_offset_map(pmd, address);
+ if (!pte) {
+ ret = true;
+ goto again;
+ }
/*
* Lockless access: we're in a wait_event so it's ok if it
* changes under us. PTE markers should be handled the same as none
* ptes here.
*/
- if (pte_none_mostly(*pte))
+ ptent = ptep_get(pte);
+ if (pte_none_mostly(ptent))
ret = true;
- if (!pte_write(*pte) && (reason & VM_UFFD_WP))
+ if (!pte_write(ptent) && (reason & VM_UFFD_WP))
ret = true;
pte_unmap(pte);
@@ -857,31 +852,26 @@ static bool has_unmap_ctx(struct userfaultfd_ctx *ctx, struct list_head *unmaps,
return false;
}
-int userfaultfd_unmap_prep(struct mm_struct *mm, unsigned long start,
+int userfaultfd_unmap_prep(struct vm_area_struct *vma, unsigned long start,
unsigned long end, struct list_head *unmaps)
{
- VMA_ITERATOR(vmi, mm, start);
- struct vm_area_struct *vma;
-
- for_each_vma_range(vmi, vma, end) {
- struct userfaultfd_unmap_ctx *unmap_ctx;
- struct userfaultfd_ctx *ctx = vma->vm_userfaultfd_ctx.ctx;
+ struct userfaultfd_unmap_ctx *unmap_ctx;
+ struct userfaultfd_ctx *ctx = vma->vm_userfaultfd_ctx.ctx;
- if (!ctx || !(ctx->features & UFFD_FEATURE_EVENT_UNMAP) ||
- has_unmap_ctx(ctx, unmaps, start, end))
- continue;
+ if (!ctx || !(ctx->features & UFFD_FEATURE_EVENT_UNMAP) ||
+ has_unmap_ctx(ctx, unmaps, start, end))
+ return 0;
- unmap_ctx = kzalloc(sizeof(*unmap_ctx), GFP_KERNEL);
- if (!unmap_ctx)
- return -ENOMEM;
+ unmap_ctx = kzalloc(sizeof(*unmap_ctx), GFP_KERNEL);
+ if (!unmap_ctx)
+ return -ENOMEM;
- userfaultfd_ctx_get(ctx);
- atomic_inc(&ctx->mmap_changing);
- unmap_ctx->ctx = ctx;
- unmap_ctx->start = start;
- unmap_ctx->end = end;
- list_add_tail(&unmap_ctx->list, unmaps);
- }
+ userfaultfd_ctx_get(ctx);
+ atomic_inc(&ctx->mmap_changing);
+ unmap_ctx->ctx = ctx;
+ unmap_ctx->start = start;
+ unmap_ctx->end = end;
+ list_add_tail(&unmap_ctx->list, unmaps);
return 0;
}
diff --git a/fs/xfs/xfs_file.c b/fs/xfs/xfs_file.c
index 08d632668e94..0c6671eb11d6 100644
--- a/fs/xfs/xfs_file.c
+++ b/fs/xfs/xfs_file.c
@@ -745,14 +745,9 @@ write_retry:
if (ret)
goto out;
- /* We can write back this queue in page reclaim */
- current->backing_dev_info = inode_to_bdi(inode);
-
trace_xfs_file_buffered_write(iocb, from);
ret = iomap_file_buffered_write(iocb, from,
&xfs_buffered_write_iomap_ops);
- if (likely(ret >= 0))
- iocb->ki_pos += ret;
/*
* If we hit a space limit, try to free up some lingering preallocated
@@ -781,7 +776,6 @@ write_retry:
goto write_retry;
}
- current->backing_dev_info = NULL;
out:
if (iolock)
xfs_iunlock(ip, iolock);
diff --git a/fs/zonefs/file.c b/fs/zonefs/file.c
index 1451e7b48669..92c9aaae3663 100644
--- a/fs/zonefs/file.c
+++ b/fs/zonefs/file.c
@@ -644,9 +644,7 @@ static ssize_t zonefs_file_buffered_write(struct kiocb *iocb,
goto inode_unlock;
ret = iomap_file_buffered_write(iocb, from, &zonefs_write_iomap_ops);
- if (ret > 0)
- iocb->ki_pos += ret;
- else if (ret == -EIO)
+ if (ret == -EIO)
zonefs_io_error(inode, true);
inode_unlock: