summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorMatthew Wilcox <matthew.r.wilcox@intel.com>2015-02-16 15:58:53 -0800
committerLinus Torvalds <torvalds@linux-foundation.org>2015-02-16 17:56:03 -0800
commitfbbbad4bc2101e452b24e6e65d3d5e11314a0b5f (patch)
tree7d12515701e867b88856b0c7f78c9abd7a61785b
parent2e4cdab0584fa884e0a81c4f45b93ce875c9fcaa (diff)
downloadlinux-fbbbad4bc2101e452b24e6e65d3d5e11314a0b5f.tar.gz
linux-fbbbad4bc2101e452b24e6e65d3d5e11314a0b5f.tar.bz2
linux-fbbbad4bc2101e452b24e6e65d3d5e11314a0b5f.zip
vfs,ext2: introduce IS_DAX(inode)
Use an inode flag to tag inodes which should avoid using the page cache. Convert ext2 to use it instead of mapping_is_xip(). Prevent I/Os to files tagged with the DAX flag from falling back to buffered I/O. Signed-off-by: Matthew Wilcox <matthew.r.wilcox@intel.com> Reviewed-by: Jan Kara <jack@suse.cz> Reviewed-by: Mathieu Desnoyers <mathieu.desnoyers@efficios.com> Cc: Andreas Dilger <andreas.dilger@intel.com> Cc: Boaz Harrosh <boaz@plexistor.com> Cc: Christoph Hellwig <hch@lst.de> Cc: Dave Chinner <david@fromorbit.com> Cc: Jens Axboe <axboe@kernel.dk> Cc: Kirill A. Shutemov <kirill.shutemov@linux.intel.com> Cc: Randy Dunlap <rdunlap@infradead.org> Cc: Ross Zwisler <ross.zwisler@linux.intel.com> Cc: Theodore Ts'o <tytso@mit.edu> Signed-off-by: Andrew Morton <akpm@linux-foundation.org> Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
-rw-r--r--fs/ext2/inode.c9
-rw-r--r--fs/ext2/xip.h2
-rw-r--r--include/linux/fs.h6
-rw-r--r--mm/filemap.c19
4 files changed, 24 insertions, 12 deletions
diff --git a/fs/ext2/inode.c b/fs/ext2/inode.c
index 36d35c36311d..0cb04486577d 100644
--- a/fs/ext2/inode.c
+++ b/fs/ext2/inode.c
@@ -731,7 +731,7 @@ static int ext2_get_blocks(struct inode *inode,
goto cleanup;
}
- if (ext2_use_xip(inode->i_sb)) {
+ if (IS_DAX(inode)) {
/*
* we need to clear the block
*/
@@ -1201,7 +1201,7 @@ static int ext2_setsize(struct inode *inode, loff_t newsize)
inode_dio_wait(inode);
- if (mapping_is_xip(inode->i_mapping))
+ if (IS_DAX(inode))
error = xip_truncate_page(inode->i_mapping, newsize);
else if (test_opt(inode->i_sb, NOBH))
error = nobh_truncate_page(inode->i_mapping,
@@ -1273,7 +1273,8 @@ void ext2_set_inode_flags(struct inode *inode)
{
unsigned int flags = EXT2_I(inode)->i_flags;
- inode->i_flags &= ~(S_SYNC|S_APPEND|S_IMMUTABLE|S_NOATIME|S_DIRSYNC);
+ inode->i_flags &= ~(S_SYNC | S_APPEND | S_IMMUTABLE | S_NOATIME |
+ S_DIRSYNC | S_DAX);
if (flags & EXT2_SYNC_FL)
inode->i_flags |= S_SYNC;
if (flags & EXT2_APPEND_FL)
@@ -1284,6 +1285,8 @@ void ext2_set_inode_flags(struct inode *inode)
inode->i_flags |= S_NOATIME;
if (flags & EXT2_DIRSYNC_FL)
inode->i_flags |= S_DIRSYNC;
+ if (test_opt(inode->i_sb, XIP))
+ inode->i_flags |= S_DAX;
}
/* Propagate flags from i_flags to EXT2_I(inode)->i_flags */
diff --git a/fs/ext2/xip.h b/fs/ext2/xip.h
index 18b34d2f31b3..29be73781419 100644
--- a/fs/ext2/xip.h
+++ b/fs/ext2/xip.h
@@ -16,9 +16,7 @@ static inline int ext2_use_xip (struct super_block *sb)
}
int ext2_get_xip_mem(struct address_space *, pgoff_t, int,
void **, unsigned long *);
-#define mapping_is_xip(map) unlikely(map->a_ops->get_xip_mem)
#else
-#define mapping_is_xip(map) 0
#define ext2_xip_verify_sb(sb) do { } while (0)
#define ext2_use_xip(sb) 0
#define ext2_clear_xip_target(inode, chain) 0
diff --git a/include/linux/fs.h b/include/linux/fs.h
index e49f10cc8a73..fb373bb5cf03 100644
--- a/include/linux/fs.h
+++ b/include/linux/fs.h
@@ -1677,6 +1677,11 @@ struct super_operations {
#define S_IMA 1024 /* Inode has an associated IMA struct */
#define S_AUTOMOUNT 2048 /* Automount/referral quasi-directory */
#define S_NOSEC 4096 /* no suid or xattr security attributes */
+#ifdef CONFIG_FS_XIP
+#define S_DAX 8192 /* Direct Access, avoiding the page cache */
+#else
+#define S_DAX 0 /* Make all the DAX code disappear */
+#endif
/*
* Note that nosuid etc flags are inode-specific: setting some file-system
@@ -1714,6 +1719,7 @@ struct super_operations {
#define IS_IMA(inode) ((inode)->i_flags & S_IMA)
#define IS_AUTOMOUNT(inode) ((inode)->i_flags & S_AUTOMOUNT)
#define IS_NOSEC(inode) ((inode)->i_flags & S_NOSEC)
+#define IS_DAX(inode) ((inode)->i_flags & S_DAX)
#define IS_WHITEOUT(inode) (S_ISCHR(inode->i_mode) && \
(inode)->i_rdev == WHITEOUT_DEV)
diff --git a/mm/filemap.c b/mm/filemap.c
index d9f5336552d7..1578c224285e 100644
--- a/mm/filemap.c
+++ b/mm/filemap.c
@@ -1723,9 +1723,11 @@ generic_file_read_iter(struct kiocb *iocb, struct iov_iter *iter)
* we've already read everything we wanted to, or if
* there was a short read because we hit EOF, go ahead
* and return. Otherwise fallthrough to buffered io for
- * the rest of the read.
+ * the rest of the read. Buffered reads will not work for
+ * DAX files, so don't bother trying.
*/
- if (retval < 0 || !iov_iter_count(iter) || *ppos >= size) {
+ if (retval < 0 || !iov_iter_count(iter) || *ppos >= size ||
+ IS_DAX(inode)) {
file_accessed(file);
goto out;
}
@@ -2587,13 +2589,16 @@ ssize_t __generic_file_write_iter(struct kiocb *iocb, struct iov_iter *from)
loff_t endbyte;
written = generic_file_direct_write(iocb, from, pos);
- if (written < 0 || written == count)
- goto out;
-
/*
- * direct-io write to a hole: fall through to buffered I/O
- * for completing the rest of the request.
+ * If the write stopped short of completing, fall back to
+ * buffered writes. Some filesystems do this for writes to
+ * holes, for example. For DAX files, a buffered write will
+ * not succeed (even if it did, DAX does not handle dirty
+ * page-cache pages correctly).
*/
+ if (written < 0 || written == count || IS_DAX(inode))
+ goto out;
+
pos += written;
count -= written;