summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
-rw-r--r--fs/direct-io.c20
-rw-r--r--fs/iomap.c41
-rw-r--r--fs/xfs/libxfs/xfs_bmap.c11
-rw-r--r--fs/xfs/libxfs/xfs_bmap.h1
-rw-r--r--fs/xfs/xfs_aops.c47
-rw-r--r--fs/xfs/xfs_fsmap.c48
6 files changed, 107 insertions, 61 deletions
diff --git a/fs/direct-io.c b/fs/direct-io.c
index 563254869e2f..b53e66d9abd7 100644
--- a/fs/direct-io.c
+++ b/fs/direct-io.c
@@ -265,12 +265,24 @@ static ssize_t dio_complete(struct dio *dio, ssize_t ret, unsigned int flags)
if (ret == 0)
ret = transferred;
+ if (dio->end_io) {
+ // XXX: ki_pos??
+ err = dio->end_io(dio->iocb, offset, ret, dio->private);
+ if (err)
+ ret = err;
+ }
+
/*
* Try again to invalidate clean pages which might have been cached by
* non-direct readahead, or faulted in by get_user_pages() if the source
* of the write was an mmap'ed region of the file we're writing. Either
* one is a pretty crazy thing to do, so we don't support it 100%. If
* this invalidation fails, tough, the write still worked...
+ *
+ * And this page cache invalidation has to be after dio->end_io(), as
+ * some filesystems convert unwritten extents to real allocations in
+ * end_io() when necessary, otherwise a racing buffer read would cache
+ * zeros from unwritten extents.
*/
if (flags & DIO_COMPLETE_INVALIDATE &&
ret > 0 && dio->op == REQ_OP_WRITE &&
@@ -281,14 +293,6 @@ static ssize_t dio_complete(struct dio *dio, ssize_t ret, unsigned int flags)
WARN_ON_ONCE(err);
}
- if (dio->end_io) {
-
- // XXX: ki_pos??
- err = dio->end_io(dio->iocb, offset, ret, dio->private);
- if (err)
- ret = err;
- }
-
if (!(dio->flags & DIO_SKIP_DIO_COUNT))
inode_dio_end(dio->inode);
diff --git a/fs/iomap.c b/fs/iomap.c
index be61cf742b5e..d4801f8dd4fd 100644
--- a/fs/iomap.c
+++ b/fs/iomap.c
@@ -714,23 +714,9 @@ static ssize_t iomap_dio_complete(struct iomap_dio *dio)
{
struct kiocb *iocb = dio->iocb;
struct inode *inode = file_inode(iocb->ki_filp);
+ loff_t offset = iocb->ki_pos;
ssize_t ret;
- /*
- * Try again to invalidate clean pages which might have been cached by
- * non-direct readahead, or faulted in by get_user_pages() if the source
- * of the write was an mmap'ed region of the file we're writing. Either
- * one is a pretty crazy thing to do, so we don't support it 100%. If
- * this invalidation fails, tough, the write still worked...
- */
- if (!dio->error &&
- (dio->flags & IOMAP_DIO_WRITE) && inode->i_mapping->nrpages) {
- ret = invalidate_inode_pages2_range(inode->i_mapping,
- iocb->ki_pos >> PAGE_SHIFT,
- (iocb->ki_pos + dio->size - 1) >> PAGE_SHIFT);
- WARN_ON_ONCE(ret);
- }
-
if (dio->end_io) {
ret = dio->end_io(iocb,
dio->error ? dio->error : dio->size,
@@ -742,12 +728,33 @@ static ssize_t iomap_dio_complete(struct iomap_dio *dio)
if (likely(!ret)) {
ret = dio->size;
/* check for short read */
- if (iocb->ki_pos + ret > dio->i_size &&
+ if (offset + ret > dio->i_size &&
!(dio->flags & IOMAP_DIO_WRITE))
- ret = dio->i_size - iocb->ki_pos;
+ ret = dio->i_size - offset;
iocb->ki_pos += ret;
}
+ /*
+ * Try again to invalidate clean pages which might have been cached by
+ * non-direct readahead, or faulted in by get_user_pages() if the source
+ * of the write was an mmap'ed region of the file we're writing. Either
+ * one is a pretty crazy thing to do, so we don't support it 100%. If
+ * this invalidation fails, tough, the write still worked...
+ *
+ * And this page cache invalidation has to be after dio->end_io(), as
+ * some filesystems convert unwritten extents to real allocations in
+ * end_io() when necessary, otherwise a racing buffer read would cache
+ * zeros from unwritten extents.
+ */
+ if (!dio->error &&
+ (dio->flags & IOMAP_DIO_WRITE) && inode->i_mapping->nrpages) {
+ int err;
+ err = invalidate_inode_pages2_range(inode->i_mapping,
+ offset >> PAGE_SHIFT,
+ (offset + dio->size - 1) >> PAGE_SHIFT);
+ WARN_ON_ONCE(err);
+ }
+
inode_dio_end(file_inode(iocb->ki_filp));
kfree(dio);
diff --git a/fs/xfs/libxfs/xfs_bmap.c b/fs/xfs/libxfs/xfs_bmap.c
index def32fa1c225..89263797cf32 100644
--- a/fs/xfs/libxfs/xfs_bmap.c
+++ b/fs/xfs/libxfs/xfs_bmap.c
@@ -3852,6 +3852,17 @@ xfs_trim_extent(
}
}
+/* trim extent to within eof */
+void
+xfs_trim_extent_eof(
+ struct xfs_bmbt_irec *irec,
+ struct xfs_inode *ip)
+
+{
+ xfs_trim_extent(irec, 0, XFS_B_TO_FSB(ip->i_mount,
+ i_size_read(VFS_I(ip))));
+}
+
/*
* Trim the returned map to the required bounds
*/
diff --git a/fs/xfs/libxfs/xfs_bmap.h b/fs/xfs/libxfs/xfs_bmap.h
index 851982a5dfbc..502e0d8fb4ff 100644
--- a/fs/xfs/libxfs/xfs_bmap.h
+++ b/fs/xfs/libxfs/xfs_bmap.h
@@ -208,6 +208,7 @@ void xfs_bmap_trace_exlist(struct xfs_inode *ip, xfs_extnum_t cnt,
void xfs_trim_extent(struct xfs_bmbt_irec *irec, xfs_fileoff_t bno,
xfs_filblks_t len);
+void xfs_trim_extent_eof(struct xfs_bmbt_irec *, struct xfs_inode *);
int xfs_bmap_add_attrfork(struct xfs_inode *ip, int size, int rsvd);
void xfs_bmap_local_to_extents_empty(struct xfs_inode *ip, int whichfork);
void xfs_bmap_add_free(struct xfs_mount *mp, struct xfs_defer_ops *dfops,
diff --git a/fs/xfs/xfs_aops.c b/fs/xfs/xfs_aops.c
index f18e5932aec4..a3eeaba156c5 100644
--- a/fs/xfs/xfs_aops.c
+++ b/fs/xfs/xfs_aops.c
@@ -446,6 +446,19 @@ xfs_imap_valid(
{
offset >>= inode->i_blkbits;
+ /*
+ * We have to make sure the cached mapping is within EOF to protect
+ * against eofblocks trimming on file release leaving us with a stale
+ * mapping. Otherwise, a page for a subsequent file extending buffered
+ * write could get picked up by this writeback cycle and written to the
+ * wrong blocks.
+ *
+ * Note that what we really want here is a generic mapping invalidation
+ * mechanism to protect us from arbitrary extent modifying contexts, not
+ * just eofblocks.
+ */
+ xfs_trim_extent_eof(imap, XFS_I(inode));
+
return offset >= imap->br_startoff &&
offset < imap->br_startoff + imap->br_blockcount;
}
@@ -735,6 +748,14 @@ xfs_vm_invalidatepage(
{
trace_xfs_invalidatepage(page->mapping->host, page, offset,
length);
+
+ /*
+ * If we are invalidating the entire page, clear the dirty state from it
+ * so that we can check for attempts to release dirty cached pages in
+ * xfs_vm_releasepage().
+ */
+ if (offset == 0 && length >= PAGE_SIZE)
+ cancel_dirty_page(page);
block_invalidatepage(page, offset, length);
}
@@ -1190,25 +1211,27 @@ xfs_vm_releasepage(
* mm accommodates an old ext3 case where clean pages might not have had
* the dirty bit cleared. Thus, it can send actual dirty pages to
* ->releasepage() via shrink_active_list(). Conversely,
- * block_invalidatepage() can send pages that are still marked dirty
- * but otherwise have invalidated buffers.
+ * block_invalidatepage() can send pages that are still marked dirty but
+ * otherwise have invalidated buffers.
*
* We want to release the latter to avoid unnecessary buildup of the
- * LRU, skip the former and warn if we've left any lingering
- * delalloc/unwritten buffers on clean pages. Skip pages with delalloc
- * or unwritten buffers and warn if the page is not dirty. Otherwise
- * try to release the buffers.
+ * LRU, so xfs_vm_invalidatepage() clears the page dirty flag on pages
+ * that are entirely invalidated and need to be released. Hence the
+ * only time we should get dirty pages here is through
+ * shrink_active_list() and so we can simply skip those now.
+ *
+ * warn if we've left any lingering delalloc/unwritten buffers on clean
+ * or invalidated pages we are about to release.
*/
+ if (PageDirty(page))
+ return 0;
+
xfs_count_page_state(page, &delalloc, &unwritten);
- if (delalloc) {
- WARN_ON_ONCE(!PageDirty(page));
+ if (WARN_ON_ONCE(delalloc))
return 0;
- }
- if (unwritten) {
- WARN_ON_ONCE(!PageDirty(page));
+ if (WARN_ON_ONCE(unwritten))
return 0;
- }
return try_to_free_buffers(page);
}
diff --git a/fs/xfs/xfs_fsmap.c b/fs/xfs/xfs_fsmap.c
index 560e0b40ac1b..43cfc07996a4 100644
--- a/fs/xfs/xfs_fsmap.c
+++ b/fs/xfs/xfs_fsmap.c
@@ -367,29 +367,6 @@ xfs_getfsmap_datadev_helper(
return xfs_getfsmap_helper(cur->bc_tp, info, rec, rec_daddr);
}
-/* Transform a rtbitmap "record" into a fsmap */
-STATIC int
-xfs_getfsmap_rtdev_rtbitmap_helper(
- struct xfs_trans *tp,
- struct xfs_rtalloc_rec *rec,
- void *priv)
-{
- struct xfs_mount *mp = tp->t_mountp;
- struct xfs_getfsmap_info *info = priv;
- struct xfs_rmap_irec irec;
- xfs_daddr_t rec_daddr;
-
- rec_daddr = XFS_FSB_TO_BB(mp, rec->ar_startblock);
-
- irec.rm_startblock = rec->ar_startblock;
- irec.rm_blockcount = rec->ar_blockcount;
- irec.rm_owner = XFS_RMAP_OWN_NULL; /* "free" */
- irec.rm_offset = 0;
- irec.rm_flags = 0;
-
- return xfs_getfsmap_helper(tp, info, &irec, rec_daddr);
-}
-
/* Transform a bnobt irec into a fsmap */
STATIC int
xfs_getfsmap_datadev_bnobt_helper(
@@ -475,6 +452,30 @@ xfs_getfsmap_logdev(
return xfs_getfsmap_helper(tp, info, &rmap, 0);
}
+#ifdef CONFIG_XFS_RT
+/* Transform a rtbitmap "record" into a fsmap */
+STATIC int
+xfs_getfsmap_rtdev_rtbitmap_helper(
+ struct xfs_trans *tp,
+ struct xfs_rtalloc_rec *rec,
+ void *priv)
+{
+ struct xfs_mount *mp = tp->t_mountp;
+ struct xfs_getfsmap_info *info = priv;
+ struct xfs_rmap_irec irec;
+ xfs_daddr_t rec_daddr;
+
+ rec_daddr = XFS_FSB_TO_BB(mp, rec->ar_startblock);
+
+ irec.rm_startblock = rec->ar_startblock;
+ irec.rm_blockcount = rec->ar_blockcount;
+ irec.rm_owner = XFS_RMAP_OWN_NULL; /* "free" */
+ irec.rm_offset = 0;
+ irec.rm_flags = 0;
+
+ return xfs_getfsmap_helper(tp, info, &irec, rec_daddr);
+}
+
/* Execute a getfsmap query against the realtime device. */
STATIC int
__xfs_getfsmap_rtdev(
@@ -521,7 +522,6 @@ __xfs_getfsmap_rtdev(
return query_fn(tp, info);
}
-#ifdef CONFIG_XFS_RT
/* Actually query the realtime bitmap. */
STATIC int
xfs_getfsmap_rtdev_rtbitmap_query(