From 13ef954445df4fd1d7c003a500ec5ce49573e14b Mon Sep 17 00:00:00 2001 From: Jan Kara Date: Tue, 15 Oct 2019 08:43:42 -0700 Subject: iomap: Allow forcing of waiting for running DIO in iomap_dio_rw() Filesystems do not support doing IO as asynchronous in some cases. For example in case of unaligned writes or in case file size needs to be extended (e.g. for ext4). Instead of forcing filesystem to wait for AIO in such cases, add argument to iomap_dio_rw() which makes the function wait for IO completion. This also results in executing iomap_dio_complete() inline in iomap_dio_rw() providing its return value to the caller as for ordinary sync IO. Signed-off-by: Jan Kara Reviewed-by: Darrick J. Wong Signed-off-by: Darrick J. Wong --- include/linux/iomap.h | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) (limited to 'include') diff --git a/include/linux/iomap.h b/include/linux/iomap.h index 7aa5d6117936..76b14cb729dc 100644 --- a/include/linux/iomap.h +++ b/include/linux/iomap.h @@ -195,7 +195,8 @@ struct iomap_dio_ops { }; ssize_t iomap_dio_rw(struct kiocb *iocb, struct iov_iter *iter, - const struct iomap_ops *ops, const struct iomap_dio_ops *dops); + const struct iomap_ops *ops, const struct iomap_dio_ops *dops, + bool wait_for_completion); int iomap_dio_iopoll(struct kiocb *kiocb, bool spin); #ifdef CONFIG_SWAP -- cgit v1.2.3 From 7684e2c4384d5d1f884b01ab8bff2369e4db0bff Mon Sep 17 00:00:00 2001 From: Dave Chinner Date: Thu, 17 Oct 2019 13:12:01 -0700 Subject: iomap: iomap that extends beyond EOF should be marked dirty When doing a direct IO that spans the current EOF, and there are written blocks beyond EOF that extend beyond the current write, the only metadata update that needs to be done is a file size extension. However, we don't mark such iomaps as IOMAP_F_DIRTY to indicate that there is IO completion metadata updates required, and hence we may fail to correctly sync file size extensions made in IO completion when O_DSYNC writes are being used and the hardware supports FUA. Hence when setting IOMAP_F_DIRTY, we need to also take into account whether the iomap spans the current EOF. If it does, then we need to mark it dirty so that IO completion will call generic_write_sync() to flush the inode size update to stable storage correctly. Fixes: 3460cac1ca76 ("iomap: Use FUA for pure data O_DSYNC DIO writes") Signed-off-by: Dave Chinner Signed-off-by: Christoph Hellwig Reviewed-by: Darrick J. Wong [darrick: removed the ext4 part; they'll handle it separately] Signed-off-by: Darrick J. Wong --- include/linux/iomap.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include') diff --git a/include/linux/iomap.h b/include/linux/iomap.h index 76b14cb729dc..4b25ad6b5edd 100644 --- a/include/linux/iomap.h +++ b/include/linux/iomap.h @@ -32,6 +32,8 @@ struct vm_fault; * * IOMAP_F_DIRTY indicates the inode has uncommitted metadata needed to access * written data and requires fdatasync to commit them to persistent storage. + * This needs to take into account metadata changes that *may* be made at IO + * completion, such as file size updates from direct IO. */ #define IOMAP_F_NEW 0x01 /* blocks have been newly allocated */ #define IOMAP_F_DIRTY 0x02 /* uncommitted metadata */ -- cgit v1.2.3 From 598ecfbaa742aca0dcdbbea25681406f95cc0b63 Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Thu, 17 Oct 2019 13:12:15 -0700 Subject: iomap: lift the xfs writeback code to iomap Take the xfs writeback code and move it to fs/iomap. A new structure with three methods is added as the abstraction from the generic writeback code to the file system. These methods are used to map blocks, submit an ioend, and cancel a page that encountered an error before it was added to an ioend. Signed-off-by: Christoph Hellwig [darrick: rename ->submit_ioend to ->prepare_ioend to clarify what it does] Reviewed-by: Darrick J. Wong Signed-off-by: Darrick J. Wong Reviewed-by: Dave Chinner --- include/linux/iomap.h | 59 +++++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 59 insertions(+) (limited to 'include') diff --git a/include/linux/iomap.h b/include/linux/iomap.h index 4b25ad6b5edd..6e00bb843c7f 100644 --- a/include/linux/iomap.h +++ b/include/linux/iomap.h @@ -4,6 +4,7 @@ #include #include +#include #include #include #include @@ -12,6 +13,7 @@ struct address_space; struct fiemap_extent_info; struct inode; +struct iomap_writepage_ctx; struct iov_iter; struct kiocb; struct page; @@ -185,6 +187,63 @@ loff_t iomap_seek_data(struct inode *inode, loff_t offset, sector_t iomap_bmap(struct address_space *mapping, sector_t bno, const struct iomap_ops *ops); +/* + * Structure for writeback I/O completions. + */ +struct iomap_ioend { + struct list_head io_list; /* next ioend in chain */ + u16 io_type; + u16 io_flags; /* IOMAP_F_* */ + struct inode *io_inode; /* file being written to */ + size_t io_size; /* size of the extent */ + loff_t io_offset; /* offset in the file */ + void *io_private; /* file system private data */ + struct bio *io_bio; /* bio being built */ + struct bio io_inline_bio; /* MUST BE LAST! */ +}; + +struct iomap_writeback_ops { + /* + * Required, maps the blocks so that writeback can be performed on + * the range starting at offset. + */ + int (*map_blocks)(struct iomap_writepage_ctx *wpc, struct inode *inode, + loff_t offset); + + /* + * Optional, allows the file systems to perform actions just before + * submitting the bio and/or override the bio end_io handler for complex + * operations like copy on write extent manipulation or unwritten extent + * conversions. + */ + int (*prepare_ioend)(struct iomap_ioend *ioend, int status); + + /* + * Optional, allows the file system to discard state on a page where + * we failed to submit any I/O. + */ + void (*discard_page)(struct page *page); +}; + +struct iomap_writepage_ctx { + struct iomap iomap; + struct iomap_ioend *ioend; + const struct iomap_writeback_ops *ops; +}; + +void iomap_finish_ioends(struct iomap_ioend *ioend, int error); +void iomap_ioend_try_merge(struct iomap_ioend *ioend, + struct list_head *more_ioends, + void (*merge_private)(struct iomap_ioend *ioend, + struct iomap_ioend *next)); +void iomap_sort_ioends(struct list_head *ioend_list); +int iomap_writepage(struct page *page, struct writeback_control *wbc, + struct iomap_writepage_ctx *wpc, + const struct iomap_writeback_ops *ops); +int iomap_writepages(struct address_space *mapping, + struct writeback_control *wbc, struct iomap_writepage_ctx *wpc, + const struct iomap_writeback_ops *ops); + /* * Flags for direct I/O ->end_io: */ -- cgit v1.2.3 From ab08b01ec0a205d9c98e712eb504c850a51e6fdb Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Thu, 17 Oct 2019 13:12:19 -0700 Subject: iomap: move struct iomap_page out of iomap.h Now that all the writepage code is in the iomap code there is no need to keep this structure public. Signed-off-by: Christoph Hellwig Reviewed-by: Dave Chinner Reviewed-by: Carlos Maiolino Reviewed-by: Darrick J. Wong Signed-off-by: Darrick J. Wong --- include/linux/iomap.h | 17 ----------------- 1 file changed, 17 deletions(-) (limited to 'include') diff --git a/include/linux/iomap.h b/include/linux/iomap.h index 6e00bb843c7f..0f741eebf3a7 100644 --- a/include/linux/iomap.h +++ b/include/linux/iomap.h @@ -136,23 +136,6 @@ loff_t iomap_apply(struct inode *inode, loff_t pos, loff_t length, unsigned flags, const struct iomap_ops *ops, void *data, iomap_actor_t actor); -/* - * Structure allocate for each page when block size < PAGE_SIZE to track - * sub-page uptodate status and I/O completions. - */ -struct iomap_page { - atomic_t read_count; - atomic_t write_count; - DECLARE_BITMAP(uptodate, PAGE_SIZE / 512); -}; - -static inline struct iomap_page *to_iomap_page(struct page *page) -{ - if (page_has_private(page)) - return (struct iomap_page *)page_private(page); - return NULL; -} - ssize_t iomap_file_buffered_write(struct kiocb *iocb, struct iov_iter *from, const struct iomap_ops *ops); int iomap_readpage(struct page *page, const struct iomap_ops *ops); -- cgit v1.2.3 From 65a60e8687c1c8f69aae3e77eafbf4a54b9f99e7 Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Fri, 18 Oct 2019 16:40:17 -0700 Subject: iomap: better document the IOMAP_F_* flags The documentation for IOMAP_F_* is a bit disorganized, and doesn't mention the fact that most flags are set by the file system and consumed by the iomap core, while IOMAP_F_SIZE_CHANGED is set by the core and consumed by the file system. Signed-off-by: Christoph Hellwig Reviewed-by: Allison Collins Reviewed-by: Darrick J. Wong Signed-off-by: Darrick J. Wong --- include/linux/iomap.h | 31 +++++++++++++++++++++++-------- 1 file changed, 23 insertions(+), 8 deletions(-) (limited to 'include') diff --git a/include/linux/iomap.h b/include/linux/iomap.h index 0f741eebf3a7..8016700e6121 100644 --- a/include/linux/iomap.h +++ b/include/linux/iomap.h @@ -30,23 +30,38 @@ struct vm_fault; #define IOMAP_INLINE 0x05 /* data inline in the inode */ /* - * Flags for all iomap mappings: + * Flags reported by the file system from iomap_begin: + * + * IOMAP_F_NEW indicates that the blocks have been newly allocated and need + * zeroing for areas that no data is copied to. * * IOMAP_F_DIRTY indicates the inode has uncommitted metadata needed to access * written data and requires fdatasync to commit them to persistent storage. * This needs to take into account metadata changes that *may* be made at IO * completion, such as file size updates from direct IO. + * + * IOMAP_F_SHARED indicates that the blocks are shared, and will need to be + * unshared as part a write. + * + * IOMAP_F_MERGED indicates that the iomap contains the merge of multiple block + * mappings. + * + * IOMAP_F_BUFFER_HEAD indicates that the file system requires the use of + * buffer heads for this mapping. */ -#define IOMAP_F_NEW 0x01 /* blocks have been newly allocated */ -#define IOMAP_F_DIRTY 0x02 /* uncommitted metadata */ -#define IOMAP_F_BUFFER_HEAD 0x04 /* file system requires buffer heads */ -#define IOMAP_F_SIZE_CHANGED 0x08 /* file size has changed */ +#define IOMAP_F_NEW 0x01 +#define IOMAP_F_DIRTY 0x02 +#define IOMAP_F_SHARED 0x04 +#define IOMAP_F_MERGED 0x08 +#define IOMAP_F_BUFFER_HEAD 0x10 /* - * Flags that only need to be reported for IOMAP_REPORT requests: + * Flags set by the core iomap code during operations: + * + * IOMAP_F_SIZE_CHANGED indicates to the iomap_end method that the file size + * has changed as the result of this write operation. */ -#define IOMAP_F_MERGED 0x10 /* contains multiple blocks/extents */ -#define IOMAP_F_SHARED 0x20 /* block shared with another file */ +#define IOMAP_F_SIZE_CHANGED 0x100 /* * Flags from 0x1000 up are for file system specific usage: -- cgit v1.2.3 From 3590c4d8979bcc364e2ded95ab3966b4e436b7bf Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Fri, 18 Oct 2019 16:41:34 -0700 Subject: iomap: ignore non-shared or non-data blocks in xfs_file_dirty xfs_file_dirty is used to unshare reflink blocks. Rename the function to xfs_file_unshare to better document that purpose, and skip iomaps that are not shared and don't need zeroing. This will allow to simplify the caller. Signed-off-by: Christoph Hellwig Reviewed-by: Darrick J. Wong Signed-off-by: Darrick J. Wong --- include/linux/iomap.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include') diff --git a/include/linux/iomap.h b/include/linux/iomap.h index 8016700e6121..1623851ade90 100644 --- a/include/linux/iomap.h +++ b/include/linux/iomap.h @@ -168,7 +168,7 @@ int iomap_migrate_page(struct address_space *mapping, struct page *newpage, #else #define iomap_migrate_page NULL #endif -int iomap_file_dirty(struct inode *inode, loff_t pos, loff_t len, +int iomap_file_unshare(struct inode *inode, loff_t pos, loff_t len, const struct iomap_ops *ops); int iomap_zero_range(struct inode *inode, loff_t pos, loff_t len, bool *did_zero, const struct iomap_ops *ops); -- cgit v1.2.3 From eb81cf9d0e18d438e27339e0d1a49d3ac8644674 Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Fri, 18 Oct 2019 16:43:08 -0700 Subject: iomap: renumber IOMAP_HOLE to 0 Instead of keeping a separate unnamed state for uninitialized iomaps, renumber IOMAP_HOLE to zero so that an uninitialized iomap is treated as a hole. Suggested-by: Darrick J. Wong Signed-off-by: Christoph Hellwig Reviewed-by: Darrick J. Wong Signed-off-by: Darrick J. Wong --- include/linux/iomap.h | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) (limited to 'include') diff --git a/include/linux/iomap.h b/include/linux/iomap.h index 1623851ade90..53e6e2275d3d 100644 --- a/include/linux/iomap.h +++ b/include/linux/iomap.h @@ -23,11 +23,11 @@ struct vm_fault; /* * Types of block ranges for iomap mappings: */ -#define IOMAP_HOLE 0x01 /* no blocks allocated, need allocation */ -#define IOMAP_DELALLOC 0x02 /* delayed allocation blocks */ -#define IOMAP_MAPPED 0x03 /* blocks allocated at @addr */ -#define IOMAP_UNWRITTEN 0x04 /* blocks allocated at @addr in unwritten state */ -#define IOMAP_INLINE 0x05 /* data inline in the inode */ +#define IOMAP_HOLE 0 /* no blocks allocated, need allocation */ +#define IOMAP_DELALLOC 1 /* delayed allocation blocks */ +#define IOMAP_MAPPED 2 /* blocks allocated at @addr */ +#define IOMAP_UNWRITTEN 3 /* blocks allocated at @addr in unwritten state */ +#define IOMAP_INLINE 4 /* data inline in the inode */ /* * Flags reported by the file system from iomap_begin: -- cgit v1.2.3 From c039b99792726346ad46ff17c5a5bcb77a5edac4 Mon Sep 17 00:00:00 2001 From: Goldwyn Rodrigues Date: Fri, 18 Oct 2019 16:44:10 -0700 Subject: iomap: use a srcmap for a read-modify-write I/O The srcmap is used to identify where the read is to be performed from. It is passed to ->iomap_begin, which can fill it in if we need to read data for partially written blocks from a different location than the write target. The srcmap is only supported for buffered writes so far. Signed-off-by: Goldwyn Rodrigues [hch: merged two patches, removed the IOMAP_F_COW flag, use iomap as srcmap if not set, adjust length down to srcmap end as well] Signed-off-by: Christoph Hellwig Reviewed-by: Darrick J. Wong Signed-off-by: Darrick J. Wong Acked-by: Goldwyn Rodrigues --- include/linux/iomap.h | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) (limited to 'include') diff --git a/include/linux/iomap.h b/include/linux/iomap.h index 53e6e2275d3d..8b09463dae0d 100644 --- a/include/linux/iomap.h +++ b/include/linux/iomap.h @@ -129,7 +129,8 @@ struct iomap_ops { * The actual length is returned in iomap->length. */ int (*iomap_begin)(struct inode *inode, loff_t pos, loff_t length, - unsigned flags, struct iomap *iomap); + unsigned flags, struct iomap *iomap, + struct iomap *srcmap); /* * Commit and/or unreserve space previous allocated using iomap_begin. @@ -145,7 +146,7 @@ struct iomap_ops { * Main iomap iterator function. */ typedef loff_t (*iomap_actor_t)(struct inode *inode, loff_t pos, loff_t len, - void *data, struct iomap *iomap); + void *data, struct iomap *iomap, struct iomap *srcmap); loff_t iomap_apply(struct inode *inode, loff_t pos, loff_t length, unsigned flags, const struct iomap_ops *ops, void *data, -- cgit v1.2.3