summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
-rw-r--r--fs/ext4/ext4.h11
-rw-r--r--fs/ext4/file.c4
-rw-r--r--fs/ext4/inode.c43
3 files changed, 44 insertions, 14 deletions
diff --git a/fs/ext4/ext4.h b/fs/ext4/ext4.h
index 89e1bcb21341..b84aa1ca480a 100644
--- a/fs/ext4/ext4.h
+++ b/fs/ext4/ext4.h
@@ -2527,8 +2527,8 @@ struct buffer_head *ext4_getblk(handle_t *, struct inode *, ext4_lblk_t, int);
struct buffer_head *ext4_bread(handle_t *, struct inode *, ext4_lblk_t, int);
int ext4_get_block_unwritten(struct inode *inode, sector_t iblock,
struct buffer_head *bh_result, int create);
-int ext4_dax_mmap_get_block(struct inode *inode, sector_t iblock,
- struct buffer_head *bh_result, int create);
+int ext4_dax_get_block(struct inode *inode, sector_t iblock,
+ struct buffer_head *bh_result, int create);
int ext4_get_block(struct inode *inode, sector_t iblock,
struct buffer_head *bh_result, int create);
int ext4_dio_get_block(struct inode *inode, sector_t iblock,
@@ -3334,6 +3334,13 @@ static inline void ext4_clear_io_unwritten_flag(ext4_io_end_t *io_end)
}
}
+static inline bool ext4_aligned_io(struct inode *inode, loff_t off, loff_t len)
+{
+ int blksize = 1 << inode->i_blkbits;
+
+ return IS_ALIGNED(off, blksize) && IS_ALIGNED(len, blksize);
+}
+
#endif /* __KERNEL__ */
#define EFSBADCRC EBADMSG /* Bad CRC detected */
diff --git a/fs/ext4/file.c b/fs/ext4/file.c
index 3e850b988923..37e28082885a 100644
--- a/fs/ext4/file.c
+++ b/fs/ext4/file.c
@@ -207,7 +207,7 @@ static int ext4_dax_fault(struct vm_area_struct *vma, struct vm_fault *vmf)
if (IS_ERR(handle))
result = VM_FAULT_SIGBUS;
else
- result = __dax_fault(vma, vmf, ext4_dax_mmap_get_block, NULL);
+ result = __dax_fault(vma, vmf, ext4_dax_get_block, NULL);
if (write) {
if (!IS_ERR(handle))
@@ -243,7 +243,7 @@ static int ext4_dax_pmd_fault(struct vm_area_struct *vma, unsigned long addr,
result = VM_FAULT_SIGBUS;
else
result = __dax_pmd_fault(vma, addr, pmd, flags,
- ext4_dax_mmap_get_block, NULL);
+ ext4_dax_get_block, NULL);
if (write) {
if (!IS_ERR(handle))
diff --git a/fs/ext4/inode.c b/fs/ext4/inode.c
index 4879e93c91d3..f9ab1e8cc416 100644
--- a/fs/ext4/inode.c
+++ b/fs/ext4/inode.c
@@ -3229,13 +3229,17 @@ static int ext4_releasepage(struct page *page, gfp_t wait)
}
#ifdef CONFIG_FS_DAX
-int ext4_dax_mmap_get_block(struct inode *inode, sector_t iblock,
- struct buffer_head *bh_result, int create)
+/*
+ * Get block function for DAX IO and mmap faults. It takes care of converting
+ * unwritten extents to written ones and initializes new / converted blocks
+ * to zeros.
+ */
+int ext4_dax_get_block(struct inode *inode, sector_t iblock,
+ struct buffer_head *bh_result, int create)
{
int ret;
- ext4_debug("ext4_dax_mmap_get_block: inode %lu, create flag %d\n",
- inode->i_ino, create);
+ ext4_debug("inode %lu, create flag %d\n", inode->i_ino, create);
if (!create)
return _ext4_get_block(inode, iblock, bh_result, 0);
@@ -3247,9 +3251,9 @@ int ext4_dax_mmap_get_block(struct inode *inode, sector_t iblock,
if (buffer_unwritten(bh_result)) {
/*
- * We are protected by i_mmap_sem so we know block cannot go
- * away from under us even though we dropped i_data_sem.
- * Convert extent to written and write zeros there.
+ * We are protected by i_mmap_sem or i_mutex so we know block
+ * cannot go away from under us even though we dropped
+ * i_data_sem. Convert extent to written and write zeros there.
*/
ret = ext4_get_block_trans(inode, iblock, bh_result,
EXT4_GET_BLOCKS_CONVERT |
@@ -3264,6 +3268,14 @@ int ext4_dax_mmap_get_block(struct inode *inode, sector_t iblock,
clear_buffer_new(bh_result);
return 0;
}
+#else
+/* Just define empty function, it will never get called. */
+int ext4_dax_get_block(struct inode *inode, sector_t iblock,
+ struct buffer_head *bh_result, int create)
+{
+ BUG();
+ return 0;
+}
#endif
static int ext4_end_io_dio(struct kiocb *iocb, loff_t offset,
@@ -3385,8 +3397,20 @@ static ssize_t ext4_direct_IO_write(struct kiocb *iocb, struct iov_iter *iter,
iocb->private = NULL;
if (overwrite)
get_block_func = ext4_dio_get_block_overwrite;
- else if (!ext4_test_inode_flag(inode, EXT4_INODE_EXTENTS) ||
- round_down(offset, 1 << inode->i_blkbits) >= inode->i_size) {
+ else if (IS_DAX(inode)) {
+ /*
+ * We can avoid zeroing for aligned DAX writes beyond EOF. Other
+ * writes need zeroing either because they can race with page
+ * faults or because they use partial blocks.
+ */
+ if (round_down(offset, 1<<inode->i_blkbits) >= inode->i_size &&
+ ext4_aligned_io(inode, offset, count))
+ get_block_func = ext4_dio_get_block;
+ else
+ get_block_func = ext4_dax_get_block;
+ dio_flags = DIO_LOCKING;
+ } else if (!ext4_test_inode_flag(inode, EXT4_INODE_EXTENTS) ||
+ round_down(offset, 1 << inode->i_blkbits) >= inode->i_size) {
get_block_func = ext4_dio_get_block;
dio_flags = DIO_LOCKING | DIO_SKIP_HOLES;
} else if (is_sync_kiocb(iocb)) {
@@ -3400,7 +3424,6 @@ static ssize_t ext4_direct_IO_write(struct kiocb *iocb, struct iov_iter *iter,
BUG_ON(ext4_encrypted_inode(inode) && S_ISREG(inode->i_mode));
#endif
if (IS_DAX(inode)) {
- dio_flags &= ~DIO_SKIP_HOLES;
ret = dax_do_io(iocb, inode, iter, offset, get_block_func,
ext4_end_io_dio, dio_flags);
} else