From c63a8eae63d3859c9c7067aa239a4cfd7423a665 Mon Sep 17 00:00:00 2001 From: Dan Williams Date: Mon, 12 Mar 2018 14:12:29 -0700 Subject: xfs: prepare xfs_break_layouts() to be called with XFS_MMAPLOCK_EXCL In preparation for adding coordination between extent unmap operations and busy dax-pages, update xfs_break_layouts() to permit it to be called with the mmap lock held. This lock scheme will be required for coordinating the break of 'dax layouts' (non-idle dax (ZONE_DEVICE) pages mapped into the file's address space). Breaking dax layouts will be added to xfs_break_layouts() in a future patch, for now this preps the unmap call sites to take and hold XFS_MMAPLOCK_EXCL over the call to xfs_break_layouts(). Cc: "Darrick J. Wong" Cc: Ross Zwisler Cc: Dave Chinner Suggested-by: Christoph Hellwig Reviewed-by: Christoph Hellwig Reviewed-by: "Darrick J. Wong" Signed-off-by: Dan Williams --- fs/xfs/xfs_file.c | 5 +---- 1 file changed, 1 insertion(+), 4 deletions(-) (limited to 'fs/xfs/xfs_file.c') diff --git a/fs/xfs/xfs_file.c b/fs/xfs/xfs_file.c index 299aee4b7b0b..35309bd046be 100644 --- a/fs/xfs/xfs_file.c +++ b/fs/xfs/xfs_file.c @@ -734,7 +734,7 @@ xfs_file_fallocate( struct xfs_inode *ip = XFS_I(inode); long error; enum xfs_prealloc_flags flags = 0; - uint iolock = XFS_IOLOCK_EXCL; + uint iolock = XFS_IOLOCK_EXCL | XFS_MMAPLOCK_EXCL; loff_t new_size = 0; bool do_file_insert = false; @@ -748,9 +748,6 @@ xfs_file_fallocate( if (error) goto out_unlock; - xfs_ilock(ip, XFS_MMAPLOCK_EXCL); - iolock |= XFS_MMAPLOCK_EXCL; - if (mode & FALLOC_FL_PUNCH_HOLE) { error = xfs_free_file_space(ip, offset, len); if (error) -- cgit v1.2.3 From 69eb5fa10eb283e9fcae3ce6f8aaf103b8f0c28d Mon Sep 17 00:00:00 2001 From: Dan Williams Date: Tue, 20 Mar 2018 14:42:38 -0700 Subject: xfs: prepare xfs_break_layouts() for another layout type When xfs is operating as the back-end of a pNFS block server, it prevents collisions between local and remote operations by requiring a lease to be held for remotely accessed blocks. Local filesystem operations break those leases before writing or mutating the extent map of the file. A similar mechanism is needed to prevent operations on pinned dax mappings, like device-DMA, from colliding with extent unmap operations. BREAK_WRITE and BREAK_UNMAP are introduced as two distinct levels of layout breaking. Layouts are broken in the BREAK_WRITE case to ensure that layout-holders do not collide with local writes. Additionally, layouts are broken in the BREAK_UNMAP case to make sure the layout-holder has a consistent view of the file's extent map. While BREAK_WRITE breaks can be satisfied be recalling FL_LAYOUT leases, BREAK_UNMAP breaks additionally require waiting for busy dax-pages to go idle while holding XFS_MMAPLOCK_EXCL. After this refactoring xfs_break_layouts() becomes the entry point for coordinating both types of breaks. Finally, xfs_break_leased_layouts() becomes just the BREAK_WRITE handler. Note that the unlock tracking is needed in a follow on change. That will coordinate retrying either break handler until both successfully test for a lease break while maintaining the lock state. Cc: Ross Zwisler Cc: "Darrick J. Wong" Reported-by: Dave Chinner Reported-by: Christoph Hellwig Reviewed-by: Christoph Hellwig Signed-off-by: Dan Williams --- fs/xfs/xfs_file.c | 26 ++++++++++++++++++++++++-- 1 file changed, 24 insertions(+), 2 deletions(-) (limited to 'fs/xfs/xfs_file.c') diff --git a/fs/xfs/xfs_file.c b/fs/xfs/xfs_file.c index 35309bd046be..4774c7172ef4 100644 --- a/fs/xfs/xfs_file.c +++ b/fs/xfs/xfs_file.c @@ -312,7 +312,7 @@ restart: if (error <= 0) return error; - error = xfs_break_layouts(inode, iolock); + error = xfs_break_layouts(inode, iolock, BREAK_WRITE); if (error) return error; @@ -718,6 +718,28 @@ buffered: return ret; } +int +xfs_break_layouts( + struct inode *inode, + uint *iolock, + enum layout_break_reason reason) +{ + bool retry; + + ASSERT(xfs_isilocked(XFS_I(inode), XFS_IOLOCK_SHARED|XFS_IOLOCK_EXCL)); + + switch (reason) { + case BREAK_UNMAP: + ASSERT(xfs_isilocked(XFS_I(inode), XFS_MMAPLOCK_EXCL)); + /* fall through */ + case BREAK_WRITE: + return xfs_break_leased_layouts(inode, iolock, &retry); + default: + WARN_ON_ONCE(1); + return -EINVAL; + } +} + #define XFS_FALLOC_FL_SUPPORTED \ (FALLOC_FL_KEEP_SIZE | FALLOC_FL_PUNCH_HOLE | \ FALLOC_FL_COLLAPSE_RANGE | FALLOC_FL_ZERO_RANGE | \ @@ -744,7 +766,7 @@ xfs_file_fallocate( return -EOPNOTSUPP; xfs_ilock(ip, iolock); - error = xfs_break_layouts(inode, &iolock); + error = xfs_break_layouts(inode, &iolock, BREAK_UNMAP); if (error) goto out_unlock; -- cgit v1.2.3 From d6dc57e251a43c428a9ee3adb7665543a1a584f0 Mon Sep 17 00:00:00 2001 From: Dan Williams Date: Wed, 9 May 2018 15:47:49 -0700 Subject: xfs, dax: introduce xfs_break_dax_layouts() xfs_break_dax_layouts(), similar to xfs_break_leased_layouts(), scans for busy / pinned dax pages and waits for those pages to go idle before any potential extent unmap operation. dax_layout_busy_page() handles synchronizing against new page-busy events (get_user_pages). It invalidates all mappings to trigger the get_user_pages slow path which will eventually block on the xfs inode lock held in XFS_MMAPLOCK_EXCL mode. If dax_layout_busy_page() finds a busy page it returns it for xfs to wait for the page-idle event that will fire when the page reference count reaches 1 (recall ZONE_DEVICE pages are idle at count 1, see generic_dax_pagefree()). While waiting, the XFS_MMAPLOCK_EXCL lock is dropped in order to not deadlock the process that might be trying to elevate the page count of more pages before arranging for any of them to go idle. I.e. the typical case of submitting I/O is that iov_iter_get_pages() elevates the reference count of all pages in the I/O before starting I/O on the first page. The process of elevating the reference count of all pages involved in an I/O may cause faults that need to take XFS_MMAPLOCK_EXCL. Although XFS_MMAPLOCK_EXCL is dropped while waiting, XFS_IOLOCK_EXCL is held while sleeping. We need this to prevent starvation of the truncate path as continuous submission of direct-I/O could starve the truncate path indefinitely if the lock is dropped. Cc: Dave Chinner Cc: Ross Zwisler Reported-by: Jan Kara Reviewed-by: Jan Kara Reviewed-by: Christoph Hellwig Acked-by: Darrick J. Wong Signed-off-by: Dan Williams --- fs/xfs/xfs_file.c | 61 ++++++++++++++++++++++++++++++++++++++++++++++--------- 1 file changed, 51 insertions(+), 10 deletions(-) (limited to 'fs/xfs/xfs_file.c') diff --git a/fs/xfs/xfs_file.c b/fs/xfs/xfs_file.c index 4774c7172ef4..f5695dc314f1 100644 --- a/fs/xfs/xfs_file.c +++ b/fs/xfs/xfs_file.c @@ -718,6 +718,38 @@ buffered: return ret; } +static void +xfs_wait_dax_page( + struct inode *inode, + bool *did_unlock) +{ + struct xfs_inode *ip = XFS_I(inode); + + *did_unlock = true; + xfs_iunlock(ip, XFS_MMAPLOCK_EXCL); + schedule(); + xfs_ilock(ip, XFS_MMAPLOCK_EXCL); +} + +static int +xfs_break_dax_layouts( + struct inode *inode, + uint iolock, + bool *did_unlock) +{ + struct page *page; + + ASSERT(xfs_isilocked(XFS_I(inode), XFS_MMAPLOCK_EXCL)); + + page = dax_layout_busy_page(inode->i_mapping); + if (!page) + return 0; + + return ___wait_var_event(&page->_refcount, + atomic_read(&page->_refcount) == 1, TASK_INTERRUPTIBLE, + 0, 0, xfs_wait_dax_page(inode, did_unlock)); +} + int xfs_break_layouts( struct inode *inode, @@ -725,19 +757,28 @@ xfs_break_layouts( enum layout_break_reason reason) { bool retry; + int error; ASSERT(xfs_isilocked(XFS_I(inode), XFS_IOLOCK_SHARED|XFS_IOLOCK_EXCL)); - switch (reason) { - case BREAK_UNMAP: - ASSERT(xfs_isilocked(XFS_I(inode), XFS_MMAPLOCK_EXCL)); - /* fall through */ - case BREAK_WRITE: - return xfs_break_leased_layouts(inode, iolock, &retry); - default: - WARN_ON_ONCE(1); - return -EINVAL; - } + do { + retry = false; + switch (reason) { + case BREAK_UNMAP: + error = xfs_break_dax_layouts(inode, *iolock, &retry); + if (error || retry) + break; + /* fall through */ + case BREAK_WRITE: + error = xfs_break_leased_layouts(inode, iolock, &retry); + break; + default: + WARN_ON_ONCE(1); + error = -EINVAL; + } + } while (error == 0 && retry); + + return error; } #define XFS_FALLOC_FL_SUPPORTED \ -- cgit v1.2.3