summaryrefslogtreecommitdiffstats
path: root/fs/iomap/direct-io.c
diff options
context:
space:
mode:
authorLinus Torvalds <torvalds@linux-foundation.org>2023-06-28 10:28:11 -0700
committerLinus Torvalds <torvalds@linux-foundation.org>2023-06-28 10:28:11 -0700
commit6e17c6de3ddf3073741d9c91a796ee696914d8a0 (patch)
tree2c425707f78642625dbe2c824c7fded2021e3dc7 /fs/iomap/direct-io.c
parent6aeadf7896bff4ca230702daba8788455e6b866e (diff)
parentacc72d59c7509540c27c49625cb4b5a8db1f1a84 (diff)
downloadlinux-6e17c6de3ddf3073741d9c91a796ee696914d8a0.tar.gz
linux-6e17c6de3ddf3073741d9c91a796ee696914d8a0.tar.bz2
linux-6e17c6de3ddf3073741d9c91a796ee696914d8a0.zip
Merge tag 'mm-stable-2023-06-24-19-15' of git://git.kernel.org/pub/scm/linux/kernel/git/akpm/mm
Pull mm updates from Andrew Morton: - Yosry Ahmed brought back some cgroup v1 stats in OOM logs - Yosry has also eliminated cgroup's atomic rstat flushing - Nhat Pham adds the new cachestat() syscall. It provides userspace with the ability to query pagecache status - a similar concept to mincore() but more powerful and with improved usability - Mel Gorman provides more optimizations for compaction, reducing the prevalence of page rescanning - Lorenzo Stoakes has done some maintanance work on the get_user_pages() interface - Liam Howlett continues with cleanups and maintenance work to the maple tree code. Peng Zhang also does some work on maple tree - Johannes Weiner has done some cleanup work on the compaction code - David Hildenbrand has contributed additional selftests for get_user_pages() - Thomas Gleixner has contributed some maintenance and optimization work for the vmalloc code - Baolin Wang has provided some compaction cleanups, - SeongJae Park continues maintenance work on the DAMON code - Huang Ying has done some maintenance on the swap code's usage of device refcounting - Christoph Hellwig has some cleanups for the filemap/directio code - Ryan Roberts provides two patch series which yield some rationalization of the kernel's access to pte entries - use the provided APIs rather than open-coding accesses - Lorenzo Stoakes has some fixes to the interaction between pagecache and directio access to file mappings - John Hubbard has a series of fixes to the MM selftesting code - ZhangPeng continues the folio conversion campaign - Hugh Dickins has been working on the pagetable handling code, mainly with a view to reducing the load on the mmap_lock - Catalin Marinas has reduced the arm64 kmalloc() minimum alignment from 128 to 8 - Domenico Cerasuolo has improved the zswap reclaim mechanism by reorganizing the LRU management - Matthew Wilcox provides some fixups to make gfs2 work better with the buffer_head code - Vishal Moola also has done some folio conversion work - Matthew Wilcox has removed the remnants of the pagevec code - their functionality is migrated over to struct folio_batch * tag 'mm-stable-2023-06-24-19-15' of git://git.kernel.org/pub/scm/linux/kernel/git/akpm/mm: (380 commits) mm/hugetlb: remove hugetlb_set_page_subpool() mm: nommu: correct the range of mmap_sem_read_lock in task_mem() hugetlb: revert use of page_cache_next_miss() Revert "page cache: fix page_cache_next/prev_miss off by one" mm/vmscan: fix root proactive reclaim unthrottling unbalanced node mm: memcg: rename and document global_reclaim() mm: kill [add|del]_page_to_lru_list() mm: compaction: convert to use a folio in isolate_migratepages_block() mm: zswap: fix double invalidate with exclusive loads mm: remove unnecessary pagevec includes mm: remove references to pagevec mm: rename invalidate_mapping_pagevec to mapping_try_invalidate mm: remove struct pagevec net: convert sunrpc from pagevec to folio_batch i915: convert i915_gpu_error to use a folio_batch pagevec: rename fbatch_count() mm: remove check_move_unevictable_pages() drm: convert drm_gem_put_pages() to use a folio_batch i915: convert shmem_sg_free_table() to use a folio_batch scatterlist: add sg_set_folio() ...
Diffstat (limited to 'fs/iomap/direct-io.c')
-rw-r--r--fs/iomap/direct-io.c88
1 files changed, 33 insertions, 55 deletions
diff --git a/fs/iomap/direct-io.c b/fs/iomap/direct-io.c
index 08873f0627dd..ea3b868c8355 100644
--- a/fs/iomap/direct-io.c
+++ b/fs/iomap/direct-io.c
@@ -81,7 +81,6 @@ ssize_t iomap_dio_complete(struct iomap_dio *dio)
{
const struct iomap_dio_ops *dops = dio->dops;
struct kiocb *iocb = dio->iocb;
- struct inode *inode = file_inode(iocb->ki_filp);
loff_t offset = iocb->ki_pos;
ssize_t ret = dio->error;
@@ -94,7 +93,6 @@ ssize_t iomap_dio_complete(struct iomap_dio *dio)
if (offset + ret > dio->i_size &&
!(dio->flags & IOMAP_DIO_WRITE))
ret = dio->i_size - offset;
- iocb->ki_pos += ret;
}
/*
@@ -109,30 +107,25 @@ ssize_t iomap_dio_complete(struct iomap_dio *dio)
* ->end_io() when necessary, otherwise a racing buffer read would cache
* zeros from unwritten extents.
*/
- if (!dio->error && dio->size &&
- (dio->flags & IOMAP_DIO_WRITE) && inode->i_mapping->nrpages) {
- int err;
- err = invalidate_inode_pages2_range(inode->i_mapping,
- offset >> PAGE_SHIFT,
- (offset + dio->size - 1) >> PAGE_SHIFT);
- if (err)
- dio_warn_stale_pagecache(iocb->ki_filp);
- }
+ if (!dio->error && dio->size && (dio->flags & IOMAP_DIO_WRITE))
+ kiocb_invalidate_post_direct_write(iocb, dio->size);
inode_dio_end(file_inode(iocb->ki_filp));
- /*
- * If this is a DSYNC write, make sure we push it to stable storage now
- * that we've written data.
- */
- if (ret > 0 && (dio->flags & IOMAP_DIO_NEED_SYNC))
- ret = generic_write_sync(iocb, ret);
- if (ret > 0)
- ret += dio->done_before;
+ if (ret > 0) {
+ iocb->ki_pos += ret;
+ /*
+ * If this is a DSYNC write, make sure we push it to stable
+ * storage now that we've written data.
+ */
+ if (dio->flags & IOMAP_DIO_NEED_SYNC)
+ ret = generic_write_sync(iocb, ret);
+ if (ret > 0)
+ ret += dio->done_before;
+ }
trace_iomap_dio_complete(iocb, dio->error, ret);
kfree(dio);
-
return ret;
}
EXPORT_SYMBOL_GPL(iomap_dio_complete);
@@ -478,7 +471,6 @@ __iomap_dio_rw(struct kiocb *iocb, struct iov_iter *iter,
const struct iomap_ops *ops, const struct iomap_dio_ops *dops,
unsigned int dio_flags, void *private, size_t done_before)
{
- struct address_space *mapping = iocb->ki_filp->f_mapping;
struct inode *inode = file_inode(iocb->ki_filp);
struct iomap_iter iomi = {
.inode = inode,
@@ -487,11 +479,11 @@ __iomap_dio_rw(struct kiocb *iocb, struct iov_iter *iter,
.flags = IOMAP_DIRECT,
.private = private,
};
- loff_t end = iomi.pos + iomi.len - 1, ret = 0;
bool wait_for_completion =
is_sync_kiocb(iocb) || (dio_flags & IOMAP_DIO_FORCE_WAIT);
struct blk_plug plug;
struct iomap_dio *dio;
+ loff_t ret = 0;
trace_iomap_dio_rw_begin(iocb, iter, dio_flags, done_before);
@@ -515,31 +507,29 @@ __iomap_dio_rw(struct kiocb *iocb, struct iov_iter *iter,
dio->submit.waiter = current;
dio->submit.poll_bio = NULL;
+ if (iocb->ki_flags & IOCB_NOWAIT)
+ iomi.flags |= IOMAP_NOWAIT;
+
if (iov_iter_rw(iter) == READ) {
if (iomi.pos >= dio->i_size)
goto out_free_dio;
- if (iocb->ki_flags & IOCB_NOWAIT) {
- if (filemap_range_needs_writeback(mapping, iomi.pos,
- end)) {
- ret = -EAGAIN;
- goto out_free_dio;
- }
- iomi.flags |= IOMAP_NOWAIT;
- }
-
if (user_backed_iter(iter))
dio->flags |= IOMAP_DIO_DIRTY;
+
+ ret = kiocb_write_and_wait(iocb, iomi.len);
+ if (ret)
+ goto out_free_dio;
} else {
iomi.flags |= IOMAP_WRITE;
dio->flags |= IOMAP_DIO_WRITE;
- if (iocb->ki_flags & IOCB_NOWAIT) {
- if (filemap_range_has_page(mapping, iomi.pos, end)) {
- ret = -EAGAIN;
+ if (dio_flags & IOMAP_DIO_OVERWRITE_ONLY) {
+ ret = -EAGAIN;
+ if (iomi.pos >= dio->i_size ||
+ iomi.pos + iomi.len > dio->i_size)
goto out_free_dio;
- }
- iomi.flags |= IOMAP_NOWAIT;
+ iomi.flags |= IOMAP_OVERWRITE_ONLY;
}
/* for data sync or sync, we need sync completion processing */
@@ -555,31 +545,19 @@ __iomap_dio_rw(struct kiocb *iocb, struct iov_iter *iter,
if (!(iocb->ki_flags & IOCB_SYNC))
dio->flags |= IOMAP_DIO_WRITE_FUA;
}
- }
- if (dio_flags & IOMAP_DIO_OVERWRITE_ONLY) {
- ret = -EAGAIN;
- if (iomi.pos >= dio->i_size ||
- iomi.pos + iomi.len > dio->i_size)
- goto out_free_dio;
- iomi.flags |= IOMAP_OVERWRITE_ONLY;
- }
-
- ret = filemap_write_and_wait_range(mapping, iomi.pos, end);
- if (ret)
- goto out_free_dio;
-
- if (iov_iter_rw(iter) == WRITE) {
/*
* Try to invalidate cache pages for the range we are writing.
* If this invalidation fails, let the caller fall back to
* buffered I/O.
*/
- if (invalidate_inode_pages2_range(mapping,
- iomi.pos >> PAGE_SHIFT, end >> PAGE_SHIFT)) {
- trace_iomap_dio_invalidate_fail(inode, iomi.pos,
- iomi.len);
- ret = -ENOTBLK;
+ ret = kiocb_invalidate_pages(iocb, iomi.len);
+ if (ret) {
+ if (ret != -EAGAIN) {
+ trace_iomap_dio_invalidate_fail(inode, iomi.pos,
+ iomi.len);
+ ret = -ENOTBLK;
+ }
goto out_free_dio;
}