From ee70daaba82d70766d0723b743d9fdeb3b06102a Mon Sep 17 00:00:00 2001 From: Eryu Guan Date: Thu, 21 Sep 2017 11:26:18 -0700 Subject: xfs: update i_size after unwritten conversion in dio completion Since commit d531d91d6990 ("xfs: always use unwritten extents for direct I/O writes"), we start allocating unwritten extents for all direct writes to allow appending aio in XFS. But for dio writes that could extend file size we update the in-core inode size first, then convert the unwritten extents to real allocations at dio completion time in xfs_dio_write_end_io(). Thus a racing direct read could see the new i_size and find the unwritten extents first and read zeros instead of actual data, if the direct writer also takes a shared iolock. Fix it by updating the in-core inode size after the unwritten extent conversion. To do this, introduce a new boolean argument to xfs_iomap_write_unwritten() to tell if we want to update in-core i_size or not. Suggested-by: Brian Foster Reviewed-by: Brian Foster Signed-off-by: Eryu Guan Reviewed-by: Darrick J. Wong Signed-off-by: Darrick J. Wong --- fs/xfs/xfs_iomap.c | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) (limited to 'fs/xfs/xfs_iomap.c') diff --git a/fs/xfs/xfs_iomap.c b/fs/xfs/xfs_iomap.c index a1909bc064e9..f179bdf1644d 100644 --- a/fs/xfs/xfs_iomap.c +++ b/fs/xfs/xfs_iomap.c @@ -829,7 +829,8 @@ int xfs_iomap_write_unwritten( xfs_inode_t *ip, xfs_off_t offset, - xfs_off_t count) + xfs_off_t count, + bool update_isize) { xfs_mount_t *mp = ip->i_mount; xfs_fileoff_t offset_fsb; @@ -840,6 +841,7 @@ xfs_iomap_write_unwritten( xfs_trans_t *tp; xfs_bmbt_irec_t imap; struct xfs_defer_ops dfops; + struct inode *inode = VFS_I(ip); xfs_fsize_t i_size; uint resblks; int error; @@ -899,7 +901,8 @@ xfs_iomap_write_unwritten( i_size = XFS_FSB_TO_B(mp, offset_fsb + count_fsb); if (i_size > offset + count) i_size = offset + count; - + if (update_isize && i_size > i_size_read(inode)) + i_size_write(inode, i_size); i_size = xfs_new_eof(ip, i_size); if (i_size) { ip->i_d.di_size = i_size; -- cgit v1.2.3 From a39e596baa07cb1dc19c2ead14c9fd2a30f22352 Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Wed, 1 Nov 2017 16:36:47 +0100 Subject: xfs: support for synchronous DAX faults Return IOMAP_F_DIRTY from xfs_file_iomap_begin() when asked to prepare blocks for writing and the inode is pinned, and has dirty fields other than the timestamps. In __xfs_filemap_fault() we then detect this case and call dax_finish_sync_fault() to make sure all metadata is committed, and to insert the page table entry. Note that this will also dirty corresponding radix tree entry which is what we want - fsync(2) will still provide data integrity guarantees for applications not using userspace flushing. And applications using userspace flushing can avoid calling fsync(2) and thus avoid the performance overhead. [JK: Added VM_SYNC flag handling] Reviewed-by: Ross Zwisler Signed-off-by: Christoph Hellwig Signed-off-by: Jan Kara Signed-off-by: Dan Williams --- fs/xfs/xfs_iomap.c | 5 +++++ 1 file changed, 5 insertions(+) (limited to 'fs/xfs/xfs_iomap.c') diff --git a/fs/xfs/xfs_iomap.c b/fs/xfs/xfs_iomap.c index f179bdf1644d..b43be199fbdf 100644 --- a/fs/xfs/xfs_iomap.c +++ b/fs/xfs/xfs_iomap.c @@ -33,6 +33,7 @@ #include "xfs_error.h" #include "xfs_trans.h" #include "xfs_trans_space.h" +#include "xfs_inode_item.h" #include "xfs_iomap.h" #include "xfs_trace.h" #include "xfs_icache.h" @@ -1086,6 +1087,10 @@ xfs_file_iomap_begin( trace_xfs_iomap_found(ip, offset, length, 0, &imap); } + if ((flags & IOMAP_WRITE) && xfs_ipincount(ip) && + (ip->i_itemp->ili_fsync_fields & ~XFS_ILOG_TIMESTAMP)) + iomap->flags |= IOMAP_F_DIRTY; + xfs_bmbt_to_iomap(ip, iomap, &imap); if (shared) -- cgit v1.2.3 From aaa422c4c3f6ee958ea9d6c9260ac40f90a3f4e9 Mon Sep 17 00:00:00 2001 From: Dan Williams Date: Mon, 13 Nov 2017 16:38:44 -0800 Subject: fs, dax: unify IOMAP_F_DIRTY read vs write handling policy in the dax core While reviewing whether MAP_SYNC should strengthen its current guarantee of syncing writes from the initiating process to also include third-party readers observing dirty metadata, Dave pointed out that the check of IOMAP_WRITE is misplaced. The policy of what to with IOMAP_F_DIRTY should be separated from the generic filesystem mechanism of reporting dirty metadata. Move this policy to the fs-dax core to simplify the per-filesystem iomap handlers, and further centralize code that implements the MAP_SYNC policy. This otherwise should not change behavior, it just makes it easier to change behavior in the future. Reviewed-by: Jan Kara Reviewed-by: Christoph Hellwig Reviewed-by: Darrick J. Wong Cc: Ross Zwisler Reported-by: Dave Chinner Signed-off-by: Dan Williams --- fs/xfs/xfs_iomap.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'fs/xfs/xfs_iomap.c') diff --git a/fs/xfs/xfs_iomap.c b/fs/xfs/xfs_iomap.c index b43be199fbdf..3c0e4cf72d2b 100644 --- a/fs/xfs/xfs_iomap.c +++ b/fs/xfs/xfs_iomap.c @@ -1087,8 +1087,8 @@ xfs_file_iomap_begin( trace_xfs_iomap_found(ip, offset, length, 0, &imap); } - if ((flags & IOMAP_WRITE) && xfs_ipincount(ip) && - (ip->i_itemp->ili_fsync_fields & ~XFS_ILOG_TIMESTAMP)) + if (xfs_ipincount(ip) && (ip->i_itemp->ili_fsync_fields + & ~XFS_ILOG_TIMESTAMP)) iomap->flags |= IOMAP_F_DIRTY; xfs_bmbt_to_iomap(ip, iomap, &imap); -- cgit v1.2.3